From d5caf299e834a95bedc92cc47a4555aac6e74c09 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 2 Oct 2013 20:04:17 +0300 Subject: [PATCH 0001/1110] forgotten credit.. --- platform/base_readme.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/platform/base_readme.txt b/platform/base_readme.txt index 60e91f5e..31d3b71b 100644 --- a/platform/base_readme.txt +++ b/platform/base_readme.txt @@ -574,6 +574,7 @@ Changelog regressions this time. * pandora: fixed tv-out (again), added automatic layer switching * libretro: fixed crackling sound for some games, added some core options + * sdl: multiple joystick support has been fixed (Victor Luchits) 1.85 (2013-08-31) * Lots of 32X compatibility and accuracy improvements. All commercial games From 51c089500cdaed23ca2532d0910a2ace62ea34ba Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 3 Oct 2013 02:12:37 +0300 Subject: [PATCH 0002/1110] libretro: get rid of borders --- platform/libretro.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/platform/libretro.c b/platform/libretro.c index f3252ab5..f59cf672 100644 --- a/platform/libretro.c +++ b/platform/libretro.c @@ -39,7 +39,7 @@ static FILE *emu_log; #define VOUT_MAX_WIDTH 320 #define VOUT_MAX_HEIGHT 240 static void *vout_buf; -static int vout_width, vout_height; +static int vout_width, vout_height, vout_offset; static short __attribute__((aligned(4))) sndBuffer[2*44100/50]; @@ -251,6 +251,9 @@ void emu_video_mode_change(int start_line, int line_count, int is_32cols) memset(vout_buf, 0, 320 * 240 * 2); vout_width = is_32cols ? 256 : 320; PicoDrawSetOutBuf(vout_buf, vout_width * 2); + + vout_height = line_count; + vout_offset = vout_width * start_line; } void emu_32x_startup(void) @@ -335,10 +338,10 @@ void retro_get_system_av_info(struct retro_system_av_info *info) info->timing.fps = Pico.m.pal ? 50 : 60; info->timing.sample_rate = 44100; info->geometry.base_width = 320; - info->geometry.base_height = 240; + info->geometry.base_height = vout_height; info->geometry.max_width = VOUT_MAX_WIDTH; info->geometry.max_height = VOUT_MAX_HEIGHT; - info->geometry.aspect_ratio = 4.0 / 3.0; + info->geometry.aspect_ratio = 0.0f; } /* savestates */ @@ -849,7 +852,8 @@ void retro_run(void) PicoFrame(); - video_cb(vout_buf, vout_width, vout_height, vout_width * 2); + video_cb((short *)vout_buf + vout_offset, + vout_width, vout_height, vout_width * 2); } void retro_init(void) From fcdf2aff9bcdf6523ac5870ba31cf06bfb1bded6 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 3 Oct 2013 02:16:09 +0300 Subject: [PATCH 0003/1110] let linux/plat know it's for compiled pandora --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index cef431cc..67ba86f1 100644 --- a/Makefile +++ b/Makefile @@ -65,6 +65,7 @@ USE_FRONTEND = 1 endif ifeq "$(PLATFORM)" "pandora" platform/common/menu_pico.o: CFLAGS += -DPANDORA +platform/libpicofe/linux/plat.o: CFLAGS += -DPANDORA OBJS += platform/pandora/plat.o OBJS += platform/pandora/asm_utils.o OBJS += platform/common/arm_utils.o From 31f944ea8ceee82ec8b1451aa86b8d4a9d07367e Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 4 Oct 2013 03:18:59 +0300 Subject: [PATCH 0004/1110] gp2x: various fixes, prepare for release --- platform/gp2x/Makefile | 18 +++++++- platform/gp2x/PicoDrive.ini | 6 +++ platform/gp2x/PicoDrive_s.png | Bin 0 -> 3384 bytes platform/gp2x/PicoDrive_t.png | Bin 0 -> 27669 bytes platform/gp2x/emu.c | 39 +++-------------- platform/gp2x/plat.c | 9 ++-- platform/gp2x/plat.h | 4 +- platform/gp2x/vid_mmsp2.c | 5 +-- platform/gp2x/vid_pollux.c | 76 +++++++++++----------------------- platform/libpicofe | 2 +- 10 files changed, 61 insertions(+), 98 deletions(-) create mode 100644 platform/gp2x/PicoDrive.ini create mode 100644 platform/gp2x/PicoDrive_s.png create mode 100644 platform/gp2x/PicoDrive_t.png diff --git a/platform/gp2x/Makefile b/platform/gp2x/Makefile index a3786c76..9d90ecc4 100644 --- a/platform/gp2x/Makefile +++ b/platform/gp2x/Makefile @@ -1,10 +1,24 @@ +# release packaging makefile + +VER := $(shell head -n 1 ../common/version.h | \ + sed 's/.*"\(.*\)\.\(.*\)".*/\1\2/g') +BUILD := $(shell git describe HEAD | grep -- - | \ + sed -e 's/.*\-\(.*\)\-.*/\1/') +ifneq "$(BUILD)" "" +VER := $(VER)_$(BUILD) +endif + +all: rel + +../../tools/textfilter: + make -C ../../tools/ + readme.txt: ../../tools/textfilter ../base_readme.txt ../../tools/textfilter ../base_readme.txt $@ GP2X -VER ?= $(shell head -n 1 version.h | sed 's/.*"\(.*\)\.\(.*\)".*/\1\2/g') CODE940 = code940/pico940_v3.bin -rel: PicoDrive PicoDrive.gpe $(CODE940) readme.txt ../game_def.cfg \ +rel: ../../PicoDrive PicoDrive.gpe $(CODE940) readme.txt ../game_def.cfg \ PicoDrive.png PicoDrive_s.png PicoDrive_t.png \ warm_2.4.25.o warm_2.4.26-open2x.o warm_2.6.24.ko \ ../../pico/carthw.cfg diff --git a/platform/gp2x/PicoDrive.ini b/platform/gp2x/PicoDrive.ini new file mode 100644 index 00000000..06b816fa --- /dev/null +++ b/platform/gp2x/PicoDrive.ini @@ -0,0 +1,6 @@ +[info] +name="PicoDrive" +path="/PicoDrive/PicoDrive.gpe" +icon="/PicoDrive/PicoDrive_s.png" +title="/PicoDrive/PicoDrive_t.png" +group="GAMES" diff --git a/platform/gp2x/PicoDrive_s.png b/platform/gp2x/PicoDrive_s.png new file mode 100644 index 0000000000000000000000000000000000000000..f596616582e0aa54590d695ab23691dbfb13a4cc GIT binary patch literal 3384 zcmV-84af3{P)Oz@Z0f2-7z;ux~O9+4z06=<WDR*FRcSTFz- zW=q650N5=6FiBTtNC2?60Km==3$g$R3;-}uh=nNt1bYBr$Ri_o0EC$U6h`t_Jn<{8 z5a%iY0C<_QJh>z}MS)ugEpZ1|S1ukX&Pf+56gFW3VVXcL!g-k)GJ!M?;PcD?0HBc- z5#WRK{dmp}uFlRjj{U%*%WZ25jX z{P*?XzTzZ-GF^d31o+^>%=Ap99M6&ogks$0k4OBs3;+Bb(;~!4V!2o<6ys46agIcq zjPo+3B8fthDa9qy|77CdEc*jK-!%ZRYCZvbku9iQV*~a}ClFY4z~c7+0P?$U!PF=S z1Au6Q;m>#f??3%Vpd|o+W=WE9003S@Bra6Svp>fO002awfhw>;8}z{#EWidF!3EsG z3;bXU&9EIRU@z1_9W=mEXoiz;4lcq~xDGvV5BgyU zp1~-*fe8db$Osc*A=-!mVv1NJjtCc-h4>-CNCXm#Bp}I%6j35eku^v$Qi@a{RY)E3 zJ#qp$hg?Rwkvqr$GJ^buyhkyVfwECO)C{#lxu`c9ghrwZ&}4KmnvWKso6vH!8a<3Q zq36)6Xb;+tK10Vaz~~qUGsJ8#F2=(`u{bOVlVi)VBCHIn#u~6ztOL7=^<&SmcLWlF zMZgI*1b0FpVIDz9SWH+>*hr`#93(Um+6gxa1B6k+CnA%mOSC4s5&6UzVlpv@SV$}* z))J2sFA#f(L&P^E5{W}HC%KRUNwK6<(h|}}(r!{C=`5+6G)NjFlgZj-YqAG9lq?`C z$c5yc>d>VnA`E_*3F2Qp##d8RZb=H01_mm@+|Cqnc9PsG(F5HIG_C zt)aG3uTh7n6Et<2In9F>NlT@zqLtGcXcuVrX|L#Xx)I%#9!{6gSJKPrN9dR61N3(c z4Tcqi$B1Vr8Jidf7-t!G7_XR2rWwr)$3XQ?}=hpK0&Z&W{| zep&sA23f;Q!%st`QJ}G3cbou<7-yIK2z4nfCCCtN2-XOGSWo##{8Q{ATurxr~;I`ytDs%xbip}RzP zziy}Qn4Z2~fSycmr`~zJ=lUFdFa1>gZThG6M+{g7vkW8#+YHVaJjFF}Z#*3@$J_By zLtVo_L#1JrVVB{Ak-5=4qt!-@Mh}c>#$4kh<88)m#-k<%CLtzEP3leVno>={htGUuD;o7bD)w_sX$S}eAxwzy?UvgBH(S?;#HZiQMoS*2K2 zT3xe7t(~nU*1N5{rxB;QPLocnp4Ml>u<^FZwyC!nu;thW+pe~4wtZn|Vi#w(#jeBd zlf9FDx_yoPJqHbk*$%56S{;6Kv~mM9!g3B(KJ}#RZ#@)!hR|78Dq|Iq-afF%KE1Brn_fm;Im z_u$xr8UFki1L{Ox>G0o)(&RAZ;=|I=wN2l97;cLaHH6leTB-XXa*h%dBOEvi`+x zi?=Txl?TadvyiL>SuF~-LZ;|cS}4~l2eM~nS7yJ>iOM;atDY;(?aZ^v+mJV$@1Ote z62cPUlD4IWOIIx&SmwQ~YB{nzae3Pc;}r!fhE@iwJh+OsDs9zItL;~pu715HdQEGA zUct(O!LkCy1<%NCg+}G`0PgpNm-?d@-hMgNe6^V+j6x$b<6@S<$+<4_1hi}Ti zncS4LsjI}fWY1>OX6feMEuLErma3QLmkw?X+1j)X-&VBk_4Y;EFPF_I+q;9dL%E~B zJh;4Nr^(LEJ3myURP{Rblsw%57T)g973R8o)DE9*xN#~;4_o$q%o z4K@u`jhx2fBXC4{U8Qn{*%*B$Ge=nny$HAYq{=vy|sI0 z_vss+H_qMky?OB#|JK!>IX&II^LlUh#rO5!7TtbwC;iULyV-Xq?ybB}ykGP{?LpZ? z-G|jbTmIbG@7#ZCz;~eY(cDM(28Dyq{*m>M4?_iynUBkc4TkHUI6gT!;y-fz>HMcd z&t%Ugo)`Y2{>!cx7B7DI)$7;J(U{Spm-3gBzioV_{p!H$8L!*M!p0uH$#^p{Ui4P` z?ZJ24cOCDe-w#jZd?0@)|7iKK^;6KN`;!@ylm7$*nDhK&GcDTy000JJOGiWi{{a60 z|De66lK=n!32;bRa{vGi!~g&e!~vBn4jTXf00(qQO+^RV2@)0w60)IvPXGV{YDq*v zR7l6|lutwIEdliXdF)dtF9;!=>4U*V~hv+is7|9$vqYyIL;~>= z9R#l;3=9&99U`K(D57*DSfZerZ0})WrnL-@@B6*|{_OMo{yfkAfM05Pc{!0tKoE3s zaREjY!!SJ0(=(b~>FHhVAX`O-xK+7&bFA^FtqlF1y|C^?ElqH%XFQSy_Q$*k-c<09vhF_6y-bI>+9?N{e9O|Rkd2J1_FT(*4*4&rBcy#UDLFemzU}3 z>3lw~X_`rmjYgwbEIvIwx!vy7)zxyjys)s)*(^=dE|&`caC&;WzP`S@yBiD!S(Y`K zay%X<2qKY45CpNbw1gnY$5V;LV#b-z&(E1m2FLNEqodo~+rgrm&F1#@_Qu8r$8jV{ zX0zFD=-}X>gNsI^U9YBT48w?`_+^i^T5W!Q9snRoQr|T8_xC$EpU}lu=Q(F%KWZqx#URB1003{5mE^Pm00eIMxiA_c{Ck!zZWjQ+(6pD8{rJ(& z)yvh>&ee@tSyqQdXjmCayjuLBSXDBVcpG0whekHHIi$ic=-mLgXwB9%p3ig$fV*HZ zu@AI;s67A#-#JV)K%aVU9~XJ_E6$-zwh2OL1VW!zijXQ=C?P<~KT)OvAf`!cT|0S4oYT-GSLS@0ZT;794U?gzibkoFg~1(%P9b$DCHe| zwU^Jn5bQb#L?E#h+noKd7v+`q^XILXotYXpDF9&8D|q&WjkB3FR1_uD`K5&66v@sE zv*^ohoNW`16c|u+u&jUS_TS#f7bmwbE$wV?FRBd57+X&02fY9X%=`48o$iCho^G!< zdj8Uf@tB4wBHe8aOkJs!lh350M|@u0O;UVn!+d?Bn4=w1{@iZBO1Pzk@17`^6MxB5 zP8%ze$}-IG`_=j{jP3D_1dJch;2?nzki!1M)auQRDif}OSKr@T0N|q2wf_$@+M7_j zubVSIuV<3aibV{7P+R3xcL2alj-EqzqFHhj4FHfU3S(`MCcEk)Ql8j!)2!LSY`_po584VPh#nr@(Awcz|CMZwpnWfzs^hRreX=_j_h8_t6zT0uW^ zR~R~y^cWg}CCzdS!sl-Wbctw0*^zfN{YpsqarD&VF@##Qsww8+w}6WFu`YR?P@$gI?{M&e&_KnMNNZ{4n`G zbO9tE8EQ*PX(azuTzJA}U18?m+rAFp2{oj)Ziu;-SZ?1f> z-bTIqIvN9HCdnm6B?glC5Z7m^RglcZk&Nnc334B0?PcR+yM8iYZDkF7mokCzUFAFD zcQRH^-Ky$}>h)^OPnVx2b(5;hbb{&>KJn^oRiXdMsQFZ#|M5tNOb1vc`Xjm;_`_9K zvMku}ITpomP;F3CwoI$2(R{0Ggc{R|q}i8^#_?@U)R!>+A;!k(r9f}i&a7vO&N%Vv zs$yYTUKL*bX6?&#TS?TdEcd%SFAelP=E}^=;BV`MXDokN$SU~M+PQ5^9IG=jGYo2! zYP4z;kGciwPAc;&y5&0!AH%SvM^%_im=u9Y5aj$loh6;MN{q_)>f$2TbsW`>5Mr@mes@AiS>)YnC75Ix zvd`Daol^u70oy&lUdF`5s)kQ;WJ+-4l8vm5b#JS9UAvoZ`v1Pp&uz{fIQg+se=2Zl ze;U17IK@}UQeepI`D<`Yc0+4tRuZ%DJC`G@Hds?uGYkmcqvN#VOwk7|G_;lbRNq>j z5lP%gcoc3)Y>5f3^Q;@1PYL$5jkQf`4!RbYN5W)7=w7W~{az&EjB(qYDk zk9YooG`)$EA}cmGO-m=z@kQ;@@1&E$LE$_7AR4=|<75!c?Oqln1!9jK`7wAO`U}-^ z*D`J}7>g+~I#NGMGSMx@fOM8Sliz1{Rn`+G_aK)cx1I~+))cO1SK()ssOE4GTNdo# z>yRAbur-{TH^?_oc2Xr5=3uQ8aB=E^J)K-e|IPo4xfMW^#W`l`=2r|ExQeWNU&VpX zCXykg7Na4QelCs6`r~~(!v%p4Ln4OX0L1{1mUC8MHfB-Qgq2d`LWxz~KlM1Ji{Foa zn75p5r#*tHD!EJTt=d|yNWuFYZ)#^AaglEBuc;}??lRRh>S?XOL*TBh_hhY2WYf#A zOI#Z#<#*I^$L8c(WJ7k0R+)-@T_ffNR@5e-jlON~bnJw1Moqzo>65AEiIrr*ytBgB z6Z`@E&D7TNC{=xS3WMG5S_k1KF?YWejIDRbAENY7YQ=tjYh`tw*Q`0w&#VjXX7pEg z3^VJW2=50YI38hs!q|xnI$B*gG1WBSG=zaIAENTfB;Lh-t0PXKo|R7spVa4V z=WQO9ADeF_>td@9?lK;oO+gyY?kA&fe!qD}rNeGrX#X=$@mpM1I|=&XND+IKs^=yD z%!mE0z}rs=c_V-6+~oYW4{p(6x0G*1mc8QxYBy2msJtnF6r2t+ITtw!xjH$I#-EKl zG^ZqHp@-}KPl_v4IC*?xhJGrSy5siC2jO#Rz>Asq{O|ce!5sIi7AqyK0ee;um;;XM zSqHAy=5P7exINJ>QHaUKPp^xnl^p{8pZYoVBIeGAUWZ~YX>*#Roq3%(b?qjO?Wg|T z-~ij(1F`}^$l6u6-@o^-(D?#k@wM(tf7jdL+iTeSwO|jXXUJsdk`L1IMr|5EaRq+-j^6Sh?%5@TsEOJtI(qz=zsG|ZQ31Q*Ck{-_o zPnEJW)-#L+L@y;TlXXPzIfL)!j}{=aS+m8N7ny0j&Vl96hkZvR^(!L-Ba}VkzP#LK zS9*^p{XVNc6lbL{9L?=O>Ja8vyZgp#onMuYX(VZBuSjpQE}yJ;*;n8Q6Xmn2q8#A$ zzfVzjWg7enx|@=rCjfwB`QL%?L$cByei7A6SzR7=86E#UIs*pzA{_lt1C-^Yb$yqQ zyZu_o^}ld--Ae4~e!^*cOeA%7q-0xZauQQ2ux{Y~hlJo1k2(;q92yA2Ko?U&I|Iox zUdYH0zoDV8wTMqZr5>Scb>eDeQ*B}}ZnB=9`+oWfRyvcAXq`f6#e&1LCguJFaFR(qCl zJQKtISnV%>1W?EcnZ%k}G&ERRmPZPyZ$=J_fwZS_>=)k3NclXuXg*9h35lZG zwnDu*C3+JiqggUMOYk(7?qd}^eox;;=;U9l3+va+<7%la1*vOvT(XQ_lM!e+#6vr@ z>SuEJk4oY=M4qqaW_mZmwO#!hLa246ZEEoajrq9TtUm{iH`(w4XUOYyJKh##N8*kU zpT&1{15)A9WFZB_!y_k1ebcahO@oZq60}m&>3v!t5d^LNMAUt>cDCGTHJZckcyk7O z{Q%6kQ>A1Y|+LH?N9Mc!z^Z&@Io+dnbfIp>b-1e zCq66oRJif_SM4dobYasVFJ0Tn@w;}+q67GabIbbn%ZDq%6w9S18(`j--FNyX+yPK0 zWv_n>nNMr5kRS+4Wrj*Qy)yiUIU^QZY$jE61;oL&>fL5%4~?NpGBIp)=Ofw30aQ%g z$UM9NT2?94z8I-7`eZJoA`{()j`ca?p1&6hkV452U*Ee}rUj>!o@ae#jTh~e?Ta1Y zn%C9}c_-Mh(74k@oV3C6+k5o^fF7AuD1%f9!Og!JZet|x7`8}jJL5N zcs~N0il#graGWPgE9C)sc#D<<+F~o0lSA0gB3WjH|Bm{Lgc!HAUmPuM??aH%U(yW_5=)tOxN8&K=uVcD;x#~YkR0rk1l{S(cm6?N;@ zLsK=T<4y)3d+$-F4@C6(&vdQ;m5{I7u;JuaW-%{cU$0t^{f&Nyn)?&ir>&bz^Fv7+ z?;0M%8hv&|I-WO;1af8Ra$3xU--bWu(O>(EClUaK<8>Wcl-~j{5T<_h1Td!n`fVh0 za1k2Y2(#%m-V$s|nV)6DlmE26zG{Ifgvl9<2(8@qc*`^HY7{4Pn-(Fp)xWvd4}iUVly?>8+xt?Rnnf}Y}Ywj?UTlS&^8=Fn(q z_Wx!~BPBJ}l?OlUEkfq+GkdV}D8Z>h$=6@!=Sfr{ zVj8ImVAcpb0aB=;w^YoCp)^__0C&gZa&OwW`{80Zj;PZ9Vrm(6oc%hS)Hn0ml$Q*6 zSPD3M4F>5wg4GM2__=uK7?1i_w&biClu*qdgbP&DF+bSQ8SbhGuAXq5rwN|0Wvcx#@m7dNKdb8 z-mgT;INj!bd@37Lzh&yF7HdqMDp?{#o9NBd_SdcIEXYgsE8yGKkdE)*-99}RMUK3< zE=#5*Qh+Yo*1Ti>KDh&i#^6R2>sHU&Q9mpyVa50Dc=-KGE#E5)d6~-Q^*LR>Kjxir zS?l1RMW-=v-h<>nw&uG0d^`>K@Li>QUFq|*MW-naporRSl&gW`Jfp}nNDOD_xWt6! zBaaIhuvBJy!_w^fb5s3!=cQ2Wc}|habnYZ7*g5SZGofBV$jg@nQ)AEqW$^yyHK$X$ zT;jM<+!xhK&^#vlQeaswlANZ(opgs51cr*d(dWqWG)OML;v1R*{2{xvu_lNcbQOO!I z&^B$2`BeEtvNLtfZ$tUgz_mkXN#mTK-`DEP6dH9T^E-ch$9R4f6?j$mxH;A_yX~0V zg%nmM1Z+koI4;zQx4cT_4M9B`P2R@{%nr+Jnw|yU5;#(ZxX;vfX3uEq*o5heHHwtQXRb9>fjF{)E7!feDQV&|cOIY9RaK7m z?O3J)VoJ(Z=)L?w@hcm;(^i@sGcJWcA)j&oQ@sZk6J|l+XKLrYMbjVqvGPWlv6RLh zqc#hsiU+NNA$T2*!z};WI}J`GbZ7^#J$P*P)|Bmu7*F_nHll#Pm7#yB76joS98pF_p%tqS8y8uwJqBdjb{aX&d1msZ zANlO(Jv5H9V;@!WuD|E`F_>iPqIZO`9M{$F(zR!YT~O>gt@5;eZ#{N=T!!VWR*8TP zfcSTjc#`tXbp#=Try>L)g1P)k8XXS-Ar=V_F||lCNNa(6nPz*15e5b7njtow!S)x%6M0ONGyLt zPfUz}Hfys}hdbu{hCwWF zA5TORHI{_eb}EP83eq)atCl+p6LdQhhvgF2qev6W3;&p}&|P?mwk}8ZUN0WCMF@N1 zmq)U`+N;6U21HYk3?(U+CErKQp@glcp(tO#oaA5nbiajC1L{ z-bMD@l{i1Ao2fOAhA;&|jR2z-VMy*P2LamQVdyE=)Zg(f@|!v0X&vJ&YDo)70taiy8dQ|MijSvJr|8x~JUrSYHA+>0q9rE)RsE<# znkLp%#Gk0nm4YbZzM~ERfH;LXw2vkf1+j{dR6$yeu==BnL~)erR99qkUDw51y_L?h z4xpFURj-zKyT^QF+nV>}&53C5ZTFeFnp%_9Ss6uhGZZLCQdm%2YL=EAM{9fD$gQqJ z6<7;4+>a}!qfE4o4mhQ`rhUETiU5qLPx@H8= zOdZp?RdkInKC#r4O(m11KSFExsx<;9U=RILx@3Qi;pTi{Jx=dKvsQk7y^5YobUWU(gy{j2w z6gDbYWd2!*v|H4Fl?#Y)rYR?)w#`L5l z$Bfi$<=Ad;>^JCY(tdo!OX+*w4=f1Y?UfhzdnT(7n9HpTx!W9t_Y%v!MWf06#a6#( zEW-no=MrX-C=Q-vYHKI35s58tq`;4bBe^SRThAxf>tA1S{8hH6n)StQuQcx?@)|IM zw>9d%k}z;Zrq3vKo#vbq+o7u|F=cqJ>kR&T3fP;Wm??0^+1SEEL@f5i{$a|2uSGt3 z7CaMB*Lk|kZro~cR7PlDo$m=Tbaci=vd^K#r&Z9>t(kv{wUJXQo%OwlW%52*2c;iI z8Q}GezHVU{_x4oEF!!Nh)99%AVlOJv0tjVje2N)hkUIWw z%SuRGlt>`8e|A*Z>-Lfj%%6oV^a5uJ0#4h3PEJ$8dq|x_<@T)QSmAvE>mlmauy)r` zLLZ5;!YQ^+Nl$1K3=71Mra~P?{cd#n@hrsR=)=nT1ypcG{0rpvv|GROfODNqo={#n z^{J9dVa(XY?@>Op_Ti?j_j0cANlv9d5t#+pZIV}MaVn8CvEy+c*z=~Q!WyR_~jEe$Zy_e%v&nOZ1b|q?akn$ZV(jS4K4KCfQ>+AH4BO4 zB-vSci&uXugfvq5s`P5Mwg`@(;(Ijv%<&2eiUp>f0z^BzGr7Hvw{_*J877`<{I3_B zICp@fejx2dFvfUh&z}@TI;s%M?i#?ASfl235F$39C_;`#JHMdOY3b25K@_kk$7)SsJY z&!zTbyEWF5yJv*MXr>fHH(F+u4~@~uOY}q3s=!$!=&?vZ-LWb zqgIIuXl^dwFv!uBW3Z-Ycost=7T%C}Gb<_Y)bfvDy?wjpzLyVs8WD6zo0q)5lq3@l zTolf%urJq^E|IDVdkOp8D8wy{7hzdymZ|LmC(fAWPk*U`F4Cp~TeR=zB=6HW0}tYk z3XE@eI71B4QYxxuoEl48kpVPl2q5qYT5#Y@Z%MD=hUJ;?R+1bdneSidaL zuDXJQfJAcPCjaY0=ap7CJfEG_st5 zF%<31^Hc ze1mGT$%TA3h~6{3&N*M0o&^Gb{DD`K{MnRJOTr3~*$QXcU&CB?V{{S3#N@|EN4z6< z)EV@#r&oUv!ud}~wOMxXlnM{;J||@-fF;6}HO;l6^8mOM4Xc=Q=vuhf?d|Q2U%$W$Ce6FC{LLEKrl%)sHxsQow$lZoJ}%v}#5KD96FbpVfp$xO z%e5pXh%hd9Av!7?W?es5jU{|H`sk7*Xv?2APN<$X;pit)eab1}%dwd6(Q;$HQZoI8 zidM+Kq<2JSPT%@YNE1@fkd#u*cm;tFd#!X=ft)#@TIbz`Dn$5kRiXQ3AO2sBw$qz~ z(Z(RX?j<|Eoa4p8>JbCSB_qrcBNqYz$t2WM6;aJ~^PJ~mW0QZLdY3o>seIlGVKQ-0lBrZ4{a97L5R;kbUZXKw~c7A16{B#{c?;9++>XDA&Uc zlf_%F2eRI{xUOlm;i7Zn;>Qw`)XLf-5aWgMBIppa>M`RdKrucmON}5jbLAQEI_}2z zJ{+3|Uw<%P@A{%zRXH@3V9Pr%^3h>4961`BobCE1G!FAOgN7p*S?8H0r${lEubJ9%a=O>bZfYzua6Fga$=3pzFYc79(73fNsL zq#%``Pv;wHXx3J?K-wcWwzfE0G^B>FT?z1eGPVLxVd`x2QvL9X8cB3%wD zVUIGXogUe>P+}v2I&w-hO$4D?T z4-_4L+CKVCqUvhlaG+B#|S)=e!4Xp4Q{lz*o5P?I2ouSa$k0AX?*F zYLd=a#XBNM>DPt!2^aYuAfJqxPpJEs3+-Y!99O1@`MXL`P*B?ZI4p3|*lhXY0?s`Q zS#P#4+a2rLZP%af=PJ}!+O0Gbq*FmL@jXW5>|p8q#L(i9T=P7k z49iR@G3Jz6hDH7t`l2-p)Wk5+pE-(cY`i>i*f@kpVU^zT-1l4<=_(1*%JdP9(0Wun z<}hY@UOK$-8&S4*ah8OBDhKDAgJy90nXE0ulr5~DwrBuo&=ajeFl1i(#qK!0J=Dq> zbpL_s{=`RxBxvt6Amq~*G1rh4^~wT|L;cs&8l^J+P-i^qHi820!?9x{kZn!{oQatA z1m5pX=PtF|*4Tsk1^y()?Ehqsk(JrYFz@EkMdT|pDuxgo*M9` zMn&cc0l^xI4VVkuceB&LtaQgq&9}y{SDI9>LrlRJ+i9U_5v7WXTS9dUN6O?$Nt%4Q z(=G3E>7DsV-BZg%)JfH6KB3eNo~fjUsm-N_0mjlerkq-|wAJI)^_Uz))X`b^NmFo> zDU7{0{>T?=*cn$xyDevh?3OcqjZgaT~xUA}&!HfN2s_OGTls~aQ^~@P< zP&OLp^+dntxmWVhaP)5{tn*QShH-B?-)Fa}9ClY3(%@8!QcjDM>n#=TVpyRu_wIw< zs>dXIz|AoU35l@(6*C~2@Ruc)IN!oe;<6WS#1byi=rMvClAN3}J|6nGC0Tp2jKAW> zHJ?*b4M&6X^XTleb9Jb6RgqSWx!2}|0_bL;@Nspa=jA%PGT&&WV;&|P^4XBg(nmS_ z-`yM8ulnXe*Yo=3;sOFUV{}R6B3?f}x$=nSAKKP@KV#YUY;P=mM`M#d!=TK=EwOHX z$RLNS%lsa(_#q&`b2v~Uvry2-#m4D4mfx|}Wlcn{YAQ$AbNSb=t)$mQAxbK;qYs&d z=Dq*PSbFBmP4X&L>4ygg?hyDmJ@`SV(1AY(PWV2jxvbv}Y)J--#h1uPv{}#oeUZ^r z{qYew=<6GJjFp(+A_YYj4E6q{SN?0es*>h431ycIda#%C>Vp~p^H3Dkm>IZuIu%Mc zqaW_#tm5sNkd=;2s^Y17*9xV2HHN!^66PR1@qFl8h#&I@9cj_MT5h!D zI2GkoB^pJAD9PKlkY|Xo3mFAT&{GwEuZn=M)kyqp?^$<}#Ld?1?6=vBG5`?Y2WuI7 z@RyDzqXdIJ>VxUj9{{ z-S^{UA?R4;@1$=DLG~`;g#EAWDWj~{-cC`Pl0@s^ovLgqW!j_hQ+x?w?2A zx`!c(bn(ZJ+Ahm22TNebM}L*x+t;UFIj_5Oym!nNi6#5tm>i3De=+-#84ot;qJfaS z`@|KOM>u{q@m=@3+?`qj182IBFMH3+IY}K|J2pq)TUVuvpZ}m%-`3`{P-hD3KB{|J z?`d1}(kWevNi!(=Wt5Z{FLWf}vM9tCWK^ZQ-0rwgytz3$JNxnyTyEX?@%)cHyoI%! z6mVOObopJ@b-r{gfXrX7>qCNkeGRq|Ro{nEG+Seraw+4(Rjen6WF^o1Gv!-f;tOqFs4_%xLQ> zUvko8WI{nfVg3Hi)8*k;3pGZs&+RRW|0C?7Lh$+0r`L15JagBh{VOVt3JYfBz|F}D zmEZm*PT=`Xec*vADmI14Ufw&nhU8aS82r#B>?!ejKO@;=JAKjyqeSX=gowJXE${7e zX7#$wmaKJplUO-`m*K+wBxkP}d>r)h@-ofoTp%HoKDpy?{cx_%5>g?z!;IOK) z=P_S&bYv7M)g3zsd3GGH$zvJk?&^fj5|_&}r{b9A2(rorlia}BWb*RUu?HJT1T!js z?udE@C&@4`w<{aFVq;NM<=@5|Ozm(6pBzd)Pxdlb)aFlLUpIOBth$d@N?cd5ae3na z$HFT|FA=ygWLxXjYw4(&>7|wF+P6#M>O1~U)CZoNMd47rll=R4&O?t~M_4<}SgF{XQCJ{$`n)S&tm=z7}A3@>MwpEh&O+v_BbX(8; zG_3dRg%=`3S2#KE04`Ph@BtzbutYN1b2ncv_Sg)B2;Z(2cvR`t7rO6~jAWrw`JR`v zbDH`Mf9LGIy0{yXkUrY z_woj{;_ch%0`bRwRI1gUpvQyC!k6rsK-(t0?j7ig*!z+)q}`MhI)W_%_}u$7ZYH?( zU7H@rsQvJ7oag=OOLxGG&{1uaMoD0utDV~c$)v;d$a%4BB-e6>^GeiPGMCQ#jJ*PL z!N9u)>d&^Cr#zvWhR>##7PdvY5)$`2|cBkFRU$wXD^i8j$+A(*7 z4?dV%Z;bmpnL4zfnpCusUgKcFJBFW#y7}VIe>)&!Ja=DTUpPR1o~u}Mzg-(Pf!O7j z*$ad>2K?Q3m-UHBmS$Qu*b151*eGgy>%2k}M-i=+rHMI`rO;jXMT9R%K_c@KADE3qRjqinnk^ zQ%z0mWt%kpbCX`agtHMYlGN4RZSU*VoF>fU2t;J&Cfz)X<~chyHukFzU?3>K(-HJ| zGhtA!%X)9cQr@BgpF|XPd0ii_z;0iH0&e21bOZQ%y*lmHYlJ5gPwNCz9hZl zMG!`)0{(ZucYD0dP@0Uu$i(!zaT`*YPCuzVp-1jB$^Yb>A%pvydzw(DE{ju6c+IK2 z3PKb&XA2QBs`^v?$qI{_r%746)#UK#ZK)gdD*JQmVlz+^WCq7Th}`DPXyx#y9GQZF zIC)GIR61+7SBDs*%tcdm(EM1cl=vwpEiG+mdC5EYh+vIgxohbTD z3%ad^lMm-N-_)P3e>uF$7VLhmmXB`*&{=M~&mgWkxIAeDW7M71Ozj9i{pN6xfCiFgh5fO$^1pH&C zrcRSZvc4evur0ky{Fw_YujRA130r2GktsS}hKh12_qs!)rwW?X)s{HAmOrE!MC+x5 zB?svc((tgXb9E~+1Qo~YHZJgwdY1QOB=!c3=C}3iOvEh)w37}3Lqrr@+K^R|WF(2sCu-_Mj;?GZU;ZWCQc{4m7ZoN&9J``0wTxHP z{<^SOWdK>Lp2%4X>k*}vRThP0B0i2Y|4|n4zjG@Ar{lwiXJ%9@PJCW)tFuI@2?xRK z&|((RPgNH5qSZON^xx2jM7gudd@C08Fyh4`TgMVECV4JC-5+{273^rvny5F2lAtfR zNTYu|WbWr76oJrs5>TNKmkv%wVUw>q8wG9esU)dmye4 z=c(>CP~j?dc7=Vn*)8ct;D4Ead$P{!Y`5GQ{P^a@0OT(#A_T{t>}~7bJ4Y~TBnlz7 zjp~}Z-&HQdpntK|nlEr;&aah~$G$f>d49Xu|EY3tXfN;RJ6aQShf4Lj2!{v@Psf$_ zGW|)(5`1D$e{?k0e76#$ann`G8yg$B&?=@2?%|9FV?-Xrkf0!5IHM@7%aQpB7ZB>5 z7`j;T>wQcqn33SGy)%0n1|b&`^8gppP=U$1hK4}?^~3V=ko9k;SSjgZROJJ{M(icx za$UlR2&G>W)@_E(V8_Ac9*Utxs}MwW5A0vUNPT+b3%Lo6joe6RrDJ~_);x9u5}ZKZ z5dQ{6G+@3i0*myrLo2q)2Nz+^3PIc5mSP1Bjl@r$JR#fr@LLqSScliyorK)mmuPI} zyQIeP1+$msMirasEl0qt!`6zWiGFX=7wmX$=v1!cS@j3=|EgkT@C5-@`W)12#LTbG zRV5(tO#gf?xja>Z51l5JK1F-?(ht!NN{0TE3oEW&3>Tq^dKSkDYM>Fe2ft&{#2%y#4j%5fv3RIyzeXAFqVO>Ey%7N-GtJd<||8V`&B^ zB_ur>(nNl4?CpgaiTZoBkciXhnIdS!YCd}8j=Zjw3_W5D*lSp-HF&&>#iny;{Z z$z&G$rDBP@UDC5Ik?C-Pg>kW1->Q(DY-s_?Czf1>NqWt{)31d)V*Nb#0&iws1Gl0Q zP<{>C>x}HF&vMxk7{@n4V15W#Y)V`u7~#xlMj$<@LWBj`kFT1H&Gvb3A^wi3s;uF-!-q8Ni^c%sS?KhjC5{wpXdRkJBtlN|h2sc>V|S!r~jBRB`T zj7e6)?7&9~C1$Y!nNbOgVJXQVf0dP;|JOJ^o+Drnr92sI!Y%p8e78Ffw@={Cdd`UJ zSLwzK)fzU#MHU}U^PVR|7>z6Uhbft9lFY#gjLtjHAVnm0`WjsMIWkcTUF9 zk!ol~`6%$?PoMng3pAEOVlc)m8fJj3Hy{ z5-ye;?^vc`%5<{lwNA_Qoo zZPmU(LVAZpKa33bYOS}|`(2DPwYkCyD$u>p?Ql#h>~k6;ohYbLRPK#)C-(-ae~s{& zd_^nAi0214RudwUB!FemX^;V!wCAL4@y+iCcs1Zw6F7_3?z~;NWgy;r@b^T@!Ypl` zF>(Qk2Hv^B-;=xk?&AcbDR*-sxjWxXDJZC*`n<>!~7TWVee#wfStfh8%?UokL9(w5qL^yUcvC`FJTYo))6lT_PjPg`Rso!=xagj2`zZ_bB^M&cv=z(s&PcqJX?m|{Nc6OL2 z^c}AQPP_L8;p*Tr*f9utXoi9iFtSQ`V=-!xPXIS$Irn{vC26+p zeSuF(4t6p_t~Qcbzhj`!I`OB>Ce_}og*x*L`m8*$F;XHD{xCK!P`0*Aj{oUT3I zo+(<;IwhrfZ4jsOVTX6=rfB3&Ij{2Pp1>$K=^|V~wzl{Ko?MSNCy%Rp5_{cANrj^r zwlpTdCQ$^bXu->xRbbS&hyuIEPHL=?4^|_@(zLVo?;Ckl6!1y*o?NGX2GCfSBxvN^ zo8WwHhB7F}(MR!ENR_A8({wwis4T%?g_AoKnuYK(B~$-MBqOJF1+EVYdVaWmsk^T( zh@p=aQV;FFR5m-9U4T(3qyPYYf3n~nF$EA~O!64c@pZo4wf{6RU8Jx3Wd&%ZjU%ru z28S}0zWVwB?tsZ}{gHSibZe*?m7{S$$}~ughfAWYWC=PsIEWo^YthCFg@+fVRByD^ z*T7Dc^$L+M^W)5!vQjW{0+cZK5hI=Yg(Px{d|_FLdBZuJkA81dq25tlG4q4F27CWS z2?p*#L_=7dF2I?+E?Xl+4&a>hA&4X&YSV6q(_KQsoTq-C=_$M9%MgKmbx=Z#95ykz zb+VTz$R}RTtVGQ5hOKnvueJioBY5Rh&`Gj4GA#cKRK&SuOTaH-v{Y_M=$snuNh{{Z zl9O#i=gb!ZtC$}=A|jrF#8eSR&IYeXc36 zmQ;Ql?Rf~xIFxha2_K1k?2(fqqz6Q5$6KTl!o@8wj?5WDD`XjoOhM>~^3)XE26&`| z#z5%B==fklhlL1i*?Z_zXblIv*MbO1%vhIN*75cZYYqZBpf5S8xMDV3Vt8*NSbx641)fE! zpr`RsD#^$A=FEIQg+_y}{{-)L_<*rLEH+jo6z^{qi#JYA*7C>1F}Q7s`twFXK>-)W zoL)H8P;O27!&B++qwIeA4$pnK(*?n`A*)vGhP-bkW!5;#hv;8mX4;jsX|RPK3T z4nG~4`|O;G2T%q)je_*z6T?1Q4{^4woG(9*TBMATE=clmS=kwRHfe-M5DF-lP?H~1 z>4vGK{;~TS8nF$gj{HnRq;PFJr{9#S#B3AqPI2{Ry~{7bzZ-72`k3*hi9n0SeE;a= zTGN(R**>*Gxx@D$RTdR{*qyDcN$(Lclfu6z;>UEjb(g0^A?N$Jsl3UZZ;>)Z;X>+V zxR&X>Bc!E`1ON&*=1-N%h@rw*w5bCmWJoGUx=bQy;WCZ;aUPDKQJQCh+C}#O1BFMP zwuCS_MT@Z$AkjVFZzCA-n=pDiP4`Q)By{fNrnkwYR-&3Dho>vv5WkywA_ORDZ;{Q- zM--{(<5ztpVJJ-|)D1A8VY$daWwPePNAFGL23)SVqRU>7 z4dT2E7Y5yk-ma!GHfo1~vC*H>=;Beo__6kR*NzcxzfBF)G?&nfN!v=>5aMRx+V*HK zD4;ykSw?`ii5TqFB1LAPaUrlFUoR(aQN44If#jRLp3XcLWl)_b)F0H(YPey>eh9*eNREF#d2$ePBGD9 z&kqNSEkaiXujtK7!L`U-z2M zN|g~Q));1GopRl}kIC8&rQfwl za8CR1FiyRMBP_FG@7h6vSn(S9p5+C!GzNQ!3`^YjEahUpyX-KW4)rwM++%m;O z0%J|1+4MiwcOdu?VZOVl&2b7sz(6YD<+y}Os8nkeDkhBbIZ7=d=krmjQ=^SGMk<-M z;+|r$l_W-kBp5z&c-6%h9~rKUjg9_0ZSvoqyUt%QFAQ_*);(iwyH^|8x(=*=Iv})z-f$3}4Jx_(88OQZn zJu3DDN(iOWICjnnq82*<79bGOGV4~%#(=e8IVE7{*b`-r!+aP++_aBE3RnjWXrZ~N{B!OX_5k105C>NC8xBpXN7CM zRtv*`kfI={PD~QEXHPok!YD#xj85CAtTTiZM1eM%txK&&i-c&cF`E7AC&XMn&qT(L zTwu1&rAYz+0*S4YkqlH&w{@p2qYIRB%!tnSb278%0F+WK=NtnA)SIyo6oz4vrfhAy zd{wGgET$btLxe;qFdLuY{19GWXb340z_LRCYb^=cQ!dY#J{&8Bgj#tl!UX0D}; zq%)giV?@9bxBS#x#PRCH&z|^cpn`I_&lv3*EyuQQtZ5S%;K$~*^A+mo20BS>o0~yE zAwUQ~O0~AlR*;XZ6gqB4#a63z9Q!SVH*+BckTlv@=bTGP2!Rwg;s^zWs@ZJXAhbg0 zBsI;J&c|UGI_CsZk{Fh)=4k8Xm!Dt#+6!OZvE|=84B+27LjW&1f93iO8^_0MVbo6a z0Xv=johr^38_Ul=I|SOfV~1mwN|wuIq1xj!gny8Zbp*%^mRTSrIz8vSbN=Y}|3|)5 zI(Xoq3WCXcl`SW6Q>WT>$`~(XLq@^v#xAXOqt#^F9t8|Umf5j0#t0#!Fo?ov>5?UV zeSO}omP#cLOP+GdDKlrzY}6Y@8{ZY_2SdK{>h?32pII&xl4i?VPb?z<7-v2Ck);e7 z2th=?4YyP(_4f7otj{m>APDlg+%eQ%2tmSH+h{bTkUag&VTBL~=!|9K zeL)*c^3`b&hW-gftya@NR8tD+0ke!V@Ld`{kixNNtDqNoJ8Ok(PaE-)TXQZSeL}zR+7dh(!DFL9>XkbTn7ss*3Bb;-AY$F@bKC^CcsBhuIMgKK6 zIW#c1c=3|0TVE*@bFEfNL<|I=Uk)FsMqTrqV%}PMOu}z7T~WN#TG~on$PIM?2eH-OS_xD!=FjwUWe}qW2Nmz-5m-XB{>^i}O=dv@y=H z>o5cWVAhfnKgZy(OODbtJOwFN0TJUW^iUcC_8^2^n7=Y zH>w~A3WY)#2B}V)jb@sr;86lVe}9oc1u6=IAPj^QK1TzKkp8*9{&CvG z7zIHP1=&-5x?j0MArW7PZ+^qW z!@f(?w<$a4n$0Ew9vwY;@x>QE@x&9Oqod#$s12ofR5KgsCWI&!i=KNx0B7C!_;?tG zLWm#?;w1J@&ddM_FMRRUU^YC#T8qy4_ChZMu+|D96$Fq%M#}j)=sxs$MVK*GDk+7u z#^^K+!q9hDWNoEBR@;tv&__S7o|i&|N(DifB#8s=R z5kgE%Rx4v;hmRa-Hk-|6GfU>Zv?!ZZd2G&VhZD-@^Snt!5PzD*tijq zGL+T#4EC0L>y0{FXV?Njvl&Yvz=6?*5rAE64InvV1(CpGPuES;G)a;|p`f*nRL90{z3tYku6g6(1BX8UH=iTtMn^_Xk}_Cg7Ay^0 zZ%)S^{3?VBgIttz+6W;km5QH{#m=FB0uZ7$MoRhr_4cJPcAe*W-+tD+&CY!rYNI5} zlC>ma(HM~VgCM}copaAQ@Aq!c(&=>8 zH#e-L3`kW~ILtsg7`7R;fCaw3D0Dd(X1CmYGokdYH{XDXfg~=V@$ulSm2;lwxiKb= zqAVL8Jb0+tZmnEd9u9{dJfo~;6=F87FQEQ)D2Cy|qb8LDKiXuDW7t}YagMz!Ch9!T zowMLa+cqgBsTCN!$F~r%O?BF=Lyl4^q?A%pO02V3MULLuaSvRxwCga#@_7IL{de7U z7orrmx3-4EAs%Wq*cGn_V-HV((WIKuAPAx;O4C$Ixx2etlc%^4qgp@+q2$ryi?`o? zV}wMY001BWNklV0U?kk$(?uH@xTKQ?Ck8IzK&8V2m)}9 z(2;8x<4g!SH#@g?-@a?>>!z&gZ6tn?K+0ybS(T;nBVUXpjsvN*(at$aC~^Q9XSf!c z)V9PtUzVjc+E~juWsLNCy%Q&H_0E0Q4(;FNjOWd-GmTDte;McCJ*M<}yNEjm z)Udp^y86P4FCI8}FiFyEn0fCRP|k&?)LF*`M=_?le7ymiw8jrytJUgtdz4eed=VI7 zfRs{k9M{yaS``XK8Yv;hT3llYV{|Q-YK*luiXt5FAbbL2 z8)FrJFS3?LxzUZhTR0m25Q67UElxcW^(5rr!GjM!{P6Pf^5*77Q53!1-Qh4Z&T(95 z5yHlTLdx&;9<-Wn46!UTbLgXvbW!ObEn(QhePXJ zr_<>ggO4)i{G~Abp^tpzXP$WC<(FUn=5x>Gd0rHSB|x1cj0r9zNOA})N9^O22 z_MUt0e&K~v-_zRlrsK!XEuG&f2LuFwV@z-^t<{v$Bu%oxum-8L+HECeSrkACE|s_t zWl<3CnPLt+m7F_I84;XHPh0!;PKiKYUjFJI{9%$tz}z%rS99YT^PCc(U`I>ds;Yt@ zXf>OmRFu&{kw;Nzh@+Bopp+2qfqL&aC4@4df-;~qlmYW}H_KEQTCMXuN5eM87?;9X zJ2G!$ga8$cw_2^ZnJ$0(@;wjSf8glhrMKT9TyRR2khw0cx18`G2%Is_+Bl9mW8hq+ zb+g^lT7Tg7+jn+*LUPoobl751fzl*pRcT|CQr4I-2vD4sawhmljz~%Az0<~6W4Pc1 zfD7Tr)J+UjYb`>B2mmECkV;!a#tvMJvAQaD&f(GqtvI+48jA%&nBbIBqm6ZzQAP=M z)`d}k6jnSzgb>!+G;J8I7Z(>l{;`i|!z>$S8ynk{Q0F)W6%!90m=K8Op_~~4&YJq% ziQ`x)1s-}ky}ZbAPlH)EjKY8lD!KE%(QF(zd}wuL<;s;S03-_HIEkg;j0?^Mc&CE4 z0-qQ8GfzFWbn)UNAOH9-+$f`Wf+FFrGGAGcz+%N=%`~J1Mv% zluOYY4#2sF3Y}nCHe?K3ITX$_!aNX%&Fokrq!x?8Spc4cjCe*p0|AaO!QMUp&JX|S zM`vf}wzf9EXSFN+$PYbq=FFMvo0}FKqYMF$-Xj};)mu9Pkff=#Hp?!<(ZfB(5-KrE?rMF|Kcq!1z$k~7Xl9LI=IX*3#G zuS%toW;*B(2s#?#`zQr4hFn)?{V*FyrL1##o;RD#x!GA21Yr>1u3zA)REQ{w+O2ll zXegyTcx_D@Mt9$PPb*1Jzxt|-LP`)h;)FB<@YFmTQpt#Q1WBniGlaU2W5aV$hBHO3++LNH#7l0Yr7A%s#AQLl*F z$JwM5qULa7L;)erS~ji=CeQpEI{!e>m;kdD6WrU84<{GgStq6Bl-+vUZTH@H?_e-k zUA?-pyxi;cjWGeElvC$yWi3$8Ib|H>br66o(=_dLIyh0=+39Mn$;4m`|5pZTW_A{P z)OX)~cWZMK@%K`SBuN-!$Tcd;(pgI=N9f$f*48^0&L6$`_@keE?5#6zWJLi0Trf(g zk_tf%x1G3kes1pKg>U0nJIgX~PFLD`KgEH8nRoH8qo_NgTzEW^-Y7 zHc2AM1@6c3Z>+wU5J3>6akAU%_4<7PuvG=lGr=6iO}9h*ozVuja13{e-Zz^~EU^UV zVGtmN0x1_#2oF98g0zvMP}Th0{HfQz6*7V476I@8K^S^to%cj=C8ZFOG8)Hmkr#Uw z7OZo+sx~(_i=rH9f?y%!JsI21Vtr_>tt&>(xD*n@uZBuv%{i%ig299Ghn#c9c~w

1~s|)E(F488Dro*cyFxXoO@%nF_aK&EOFa;_XqEO=))gA zclO=Qt*w={)z!5%!ice&5HQYq3f37#X@^OY-C*4M6KQwIQ3Q&UPQA*51jXJ?0V9)^K;CQVYHv=yg= z?#{Q*oxl70?mKey=((jOU1VihF+mY1)M_^Fz5Cvi51icE-nw%6N*G6jejimx!I&a1 zN~6(B3aJa=iD%5>Tzwd9q`NH?8pFF;J zw6`kvj8EZ%0?! zI1S!oyE!b1BG3E%zSY_o!O$dMOMy)@Dd60*{ju~u+yjKP{2=b0llFk>jCdII`a zM<5bVuwKHtbY#R(IE1+cVHn48Srpp)AP7d9h~7~@N=IZ2zQ@mu;}{qE*g~Vh7V%z4 zNi227NU4y>Mi~`c1Yz_;AAabTTW}Ygbo`VXnf^6}bZvBuN+q&1R~N z(WPn6%*1Ka71{Oc*9U_^4R}IVY5dj3m^4jC+#n$B>1m}@Q51u0fOBRXzu}26Mi-78 zpGsX3LJhc?_SCSq`&<9%KmXbn|NRF}-1;}qu6yud7)qw{vh42eo;!cxu}?hq#1l`v z_UfzJ+Ir#XIOCLY4-AO)4t1lb2k#vvgi}U1M?^$b7PUm%q|`kCC@FHS0T>1F4mc%O zuU?I#WNvnDV{7AkQM*3!@JCkH*DkLtGr?d&%e3}#tw&omCDK|uvDxD{fWICB84yO$ zw@|6z__4)n>+84Na`VMYmv(wR;=T30D2id06=i9S-QM2L^BiSI`~7~u-_P^B(wYDe z&Y~b(SXgMZn#6;OBA_rB4waOhRy!{X^fq_aW<~Dp2qR{k14hAtHO4wu%j98>nx39c z~B8g)lWO{l^TNg+vD9ww4OEJGNzkmPU>8YtV&z!+A zFXOB%OFC9gK>aRURMc#vdZ&kTT_h9Li7|>IrPPSg##o@#$iIj&`~hPOnRwl6(I6b{ z;(ih{wx4h@sb`7x+*Jr6h2)$`r5I&l6#nEt{E0^%dF1NVYu)W`Z@2g8W1qbL{`)`t zgAWad<#3n^~d+)R6VW@>txOTM|jxxThqX^p4Pq(v8A0}@cMt4yPKYG!(6 zWo2z`t?q|0T=BBAEbDYS=#O9tt(*yiz=D@bN+Gs3w~DI#fs+rOednz6K5e8?98*e- zHCtQPZ@=S2tJUmh*|%PKg#ZZ738Mlicm(}=)N#%8Jc%Ny z6drFQ9JX_mGZskz5R4JV8qIX?p1o_Ut8??S=gytai$YhGHO2!F55`$-4IQt)YiNkz zTuLc|fDw|$aeHcN`O>8{iN|wJDDvEc55jOb95Tj;2Q#W>2`WcYAOPMu=K)5EckF%8 zYPF_1oxw13#_XmSaa(VdB z;eGq|1uA^?)mN*cblzF-*myfeDNT};P=dJ&ryZCIa4LhiztM(ZOiHZ9elTc9up!)+ zP#UNJM*dJ>uwjD{;&0@=Gwz5OV~7{6qqmmT939n+VH1VA*dKZL;fEf2XnEx-qj2if zsYau5*IggDbm`kwS>1B$@gF$(V3C(+&o8Bo=KlQ$_8mN6jagY<*<4#4_Iq}0uQV$8 zC}rb+kgi0KkExj7g;sXIzVpbF^^uzCfN^6kT0eA&eRCyK5WW?e6ZN&!i}HWwZ&Dvf5Z< zI3|b8KeCM4XD6{_AH_x;Z^-I6>&leYu z-*(&Wt#8kMlDoL$qCpn~gAv(DoNADGb-LERigJ z^ytyLMe1~>=jP|V_pZ{!6B!1ju6B2KUw`ejW~(W<81{2hRh-ePD2G|TdUfsTr=J## z-*ofMM;DI)AX^hAXs6pfa^%RdV~ZEReQ{%B0}Y4+ z6?D=@H`}dkotwc0{NnKoE^XJP?bGz6VN)2k>x%42cWCD2l48qKrA~`h$K$ML`f!N>MKu zytl?Ql9V%cWqJ9V&p!JXfA!R-KmF-n{^fsh=-|O8pZwEKXR6!X4y3F~Tj#^kI|}cy zf<}-U0F+gQk%>bcr4%Ny8uW)WsIn|MR$5}z!%s>LP0k@E-Goq-)>#LB;=G?Yf-pj` zYXt(jsy_PBM}FlCUwGr~x3fWib94QbSH3knzaS`o^IK;aW6gGFeQW#Ng_W}xuI}4^ z;KIf8S67yub%F`P2_@hO#fBD{KmMfVrFhe=cN#$(P(5@HsWUs(dl$l zpu!+@&Y|wf;lqdDef#b0>({5FMy`S)%TP;_5VE?udT`(2BuVVPJ$c>HlK|fnOdi3o1bMJ3V?%%iP+Qvqp1S6a=LCFnecrxCbqfrK$h6Z8S@Ac9s z`s7c2_8&nQ8uPjJk60t6HXA;4PVJZD65N|Y2z5btfJWfC(=X(0TtNYj*PCxj@$0Ao-U z#DgFsj&F^7#H_;@{|*65Aw;{~UfW#9AzM{x1PRD6h#23ye}9t1Ti4bJr60WKg9mS3 z{E;8~(Y3YJ&pq+e)gZg?L-!eLy4|h_q%U&8Ip+e+R1pMQ>8h5Lp_Jl$SP4;96-FSU z(rQ(8DJ2k)Qs#NiIKxRP4rVE(DhTk+w$9ecwr(g!9V}zUIv_y&NH-iWX3}Q2#@`W2 zr{?DV)vx~RXP^G-QrkFaJok+kXyp$bI9v{Pr}uPTeD2iJ>ZQeF$2WI(&!4~4TU)7g z1w06u@eaTfDnLjEqXj&0R$J{5Vu3S)93Tf?Yi%kM$#`{ZMFj!pyuZ7fM3Dwh8Jn4z z@y1&3Ip;wX=2h9-+3^HgQ*E%e*Uf;^qAZkAAjJXfBX4=`PBb<5(EeBy!V_lYs}h(OT}*H%0hcHFy&fzOMBZd zvtd;V%0UT2sh|vL1e`J;lnTKdfl3qWiKoPa^Nu=FRk}1fNmJrscXxL%9D?KqoQgxo zh~&~$h8V{QZZSM#loDdSbH?C)sb(`9WkWC0l zS6Y`IUof4F3OmYipv{#@yS%It-*9M3Q0nM z5QBqplouR3>U!^ii{JRoFE5>6x^VHr(Z!=h*-sN5rK%)(FYgZqdRUb|@{u1-gXr{2 zFI{~1oxyNmy`x~f_1alafp|(h1y8*r)7vZ3)x^hf>=?Pe)xEN^+}rNv{mfKa1p!l%2{9_M38B^t?E=cVC%!VAvM5P#(l+Vz zT#w?w0}#iUBg7Hv7f1yuY=pl(NxC`?~|i#JKFH?@_|7y*tM$ z9>$nYed<%aUhivP{mS3Dd#xX!5Lb&+Z|D9NA3dP zt)-NBLj2f7xt_LRc^rk|&dv^HtSXBv%fxtW=&UV_-Q3tn8YvL+XHS0p8_zy-b#*lo zQJf^kTPfm&eG9EP+`BN>&jyq+9HiAUFhU42QG{dcjhv*$vz)piN3GWy{GB8TBGJ*e zoU@Us2DU?pm+;ip-(r$3`2YS>*Kb}1i29$aRo_G(O05G7~!ULt=Iz|lR0vI8L z2u7WDghP}>Q?qSr_q_AYJA{xfYR}^s+1A$?l_p{6GW;0L7MQ@E@P?iiDUa(->7<)R*%7IFN6B< zLV>~>-}@ad35>Gst!{iz{Qkxy^7^WL$RwZKG)}58Opc_G8TiF7e(}*qAN};tKJnRq z@n&xKF{A=K$gN)^UY9L1)rhzCbpp)1CiHkOYQCS_D8 zr8V_s#R$VL)>-E(qXLzi+*((ffp=bOEv1a(csLw7=MdFqiSt0f1EWj|=_xqy#1l$j za?I0ewbC@*+3r$GtEyVxSO?>AC_x-h&bGF$XZf(vXf)$=r?N z9XmrQDT6TZ-n+tmM|=PO8I$-f?%#jl=YQ_!e&=_7_ul*O`O4S7{@h=G!|9<^l5zU@ zW1rZ&e-VJZ{`#xWJoB^=GEQP*pfnaJ1FH^$H3}U%>x!x@b(Ka*oF-uy?e6a8Ma~#Yle8$yN^2p6 zCjf-#qV(Rkn=L{}S(V_S(M&6=tGv9v*^Q#efb-f^m1dHA>l|eRSGlp2X?-%*eJs`s zh>yn}2g{8bpNw$j$>VHdr3_h7D5>7xm`tLiEUU_DO1ZTT7$ai>TfKLxU%Og8_4Mh} zFTC)=)Kurtp+hQ7FDzZ4N+65|3Hua)N=7IFfYLb?ut3Nl3|&#Rr)QeYW>FN^wl=iZ z)PbWQx!{aZ50Y`m7!@K)qOvHN^S%CVv)L4ZG~gnsa#teux+sbfqk=KwftXR#h}>mL zKym4u)7s$kKtj--J$n`w7V_bc3`m+JtyZhIc|8rIN^3IeMJc6p(C-hkOlzGaiKozP zH5;ua5U>`WdG;Hj64h`hgTPr^E7l1qIb(4gc|U^bXIWM!#*w%VV>lG8Nxzc^AEi`D zDYyU$+3ELo%fY}H(@0ZK zAgf9xgddw0qL;n%E{KDOCOFaHK+)}o07xMT8A)BW+iiRvS(cU7Y;A2FJ$z*G*s)h$ zIOVMY?=1oAoHj-TswxT~lrh#!8rz%O>(|!fD5^?*=N%_bojNt>?dqzmoDY;5QG2X) z!y%psc+=3L6xka!um-7ZcpDk#LWrU$u(-|h9Cy}bRZ1anhmH!Ax~gifW4*$yYv#H( zn*^iO?-@b`oO2I6@WAoo$DjH?f3b9V<=xj_l~HVswUs__XwNVH;y*ik_WVnm12y|&&~hMv(G;H z^(WVNuFuZy8T9*=tyZtBc%$3XGikHY?e45zxcIrB{K>VI)%Epj+uPekSx&V(obh3Q zaOKjaG)bd0=@0r>uU_SxYoier?7bfrIRO%cA>}Mdl6;uw*)U3?veZ>pHKtmPM&w;p zSuessSCzJ!DGAOA#~gjnsI0DCM`EQDwr;g#^9Yg7)(ho+|HQv(#p{b^a6x>ZJq-aR3<4l%QY_MSz}AD&joP%eT+J8*qtA zFh~@}TL$3EqQJs}5TdoIs)`V3B#qA0bXgRR5Nk}g+ogx~k50XK>c9TZ?_OD1 zUS7V!0FED9+}zq~w>!sfIzBf$^Z4UG{(}!c{HI_0<3E4l`Qs;Ub!9m{Grjc2TY0~~ zuy^0a+WLj7m-Di?|Gs-SH#XK*SG~2XYimiGawRE{;c&P+=;NJHO2uK6m4zeVz!Lz* zx%G}Kxo_Xz^KV_CKqzqMJy1H>*^zP~;Gyns0cXG&=b|br#+X+M4TkubSvH!tRn?@4 z+oUue;yTdjbP|&OcNOHEu~%1D!zg?ov3186k5j>xmd@iSALuBS>H~&SibY*5vQSUh z<%pwAJ?eDe-~k9@lrzc*5W%Dhh|z^E!#EL?dt)nIl||X__scvV4u`s`taG@`2)HBw z*`S~Gc8juz;}~(%3P%5J!=%v`|7QVKY5DKmYh+kAkstd-tQ( z)hn;RcIm=}ngcMg>&%8sNX|K<)EbKoJZ-B-0003aNkl}XGna^x(ZX8=Y_CNo} z?^#O-F=0d>d-Soz!^fWd`k%dX_B<8Ldrui?UEO~BiLZY3D}sps@SFef*=L@SO72;h z-&ngA24P7(i-M)|=g*x#_wviHy!P6wT=2(#;!}4WIdbZ$r|!7#-b06uTv)nry}Nzz z*ilpF#UMAve)e;pt4wumWBuCtI(P^JCDo`bvL1<`D$BCDxw)f9k0`E8SvAr|AeAnw zn~p8E+O5~myk2Q-tpV>j5b(4t%1W0&38#z#m{L1u2>@tY^U>;(e$w?|QhdFx@bN*6 yh;ri}xN)?hBozoo!b3;)IqQ~}ul&7mJpUK_ANP8df#=cy0000>1] = (((bpp+1)/8)<<9)|0xAB; /*8/15/16/24bpp...*/ memregs[0x290C>>1] = 320*((bpp+1)/8); /*line width in bytes*/ } @@ -163,7 +162,7 @@ void vid_mmsp2_init(void) void vid_mmsp2_finish(void) { gp2x_video_RGB_setscaling_(0, 320, 240); - gp2x_video_changemode_ll_(16); + gp2x_video_changemode_ll_(16, 0); memregs[0x290E>>1] = gp2x_screenaddr_old[0]; memregs[0x2910>>1] = gp2x_screenaddr_old[1]; diff --git a/platform/gp2x/vid_pollux.c b/platform/gp2x/vid_pollux.c index 308eb001..b0b28da4 100644 --- a/platform/gp2x/vid_pollux.c +++ b/platform/gp2x/vid_pollux.c @@ -34,31 +34,21 @@ #include "../common/arm_utils.h" #include "plat.h" -#define fb_buf_count 4 -static unsigned int fb_paddr[fb_buf_count]; +#define FB_BUF_COUNT 4 +#define FB_MEM_SIZE (320*240*2 * FB_BUF_COUNT) + +static unsigned int fb_paddr[FB_BUF_COUNT]; static int fb_work_buf; static int fbdev = -1; -static unsigned short memtimex_old[2]; -static int last_pal_setting = 0; - - -/* misc */ -static void pollux_set_fromenv(const char *env_var) -{ - const char *set_string; - set_string = getenv(env_var); - if (set_string) - pollux_set(memregs, set_string); - else - printf("env var %s not defined.\n", env_var); -} /* video stuff */ static void pollux_video_flip(int buf_count) { - memregl[0x406C>>2] = fb_paddr[fb_work_buf]; + memregl[0x406C>>2] = memregl[0x446C>>2] = fb_paddr[fb_work_buf]; memregl[0x4058>>2] |= 0x10; + memregl[0x4458>>2] |= 0x10; + fb_work_buf++; if (fb_work_buf >= buf_count) fb_work_buf = 0; @@ -67,7 +57,7 @@ static void pollux_video_flip(int buf_count) static void gp2x_video_flip_(void) { - pollux_video_flip(fb_buf_count); + pollux_video_flip(FB_BUF_COUNT); } /* doulblebuffered flip */ @@ -76,7 +66,7 @@ static void gp2x_video_flip2_(void) pollux_video_flip(2); } -static void gp2x_video_changemode_ll_(int bpp) +static void gp2x_video_changemode_ll_(int bpp, int is_pal) { static int prev_bpp = 0; int code = 0, bytes = 2; @@ -100,8 +90,9 @@ static void gp2x_video_changemode_ll_(int bpp) memregl[0x4000>>2] |= 1 << 3; /* the above ioctl resets LCD timings, so set them here */ - snprintf(buff, sizeof(buff), "POLLUX_LCD_TIMINGS_%s", last_pal_setting ? "PAL" : "NTSC"); - pollux_set_fromenv(buff); + snprintf(buff, sizeof(buff), "POLLUX_LCD_TIMINGS_%s", + is_pal ? "PAL" : "NTSC"); + pollux_set_fromenv(memregs, buff); switch (abs(bpp)) { @@ -121,12 +112,18 @@ static void gp2x_video_changemode_ll_(int bpp) return; } - memregl[0x405c>>2] = bytes; - memregl[0x4060>>2] = bytes * (bpp < 0 ? 240 : 320); + // program both MLCs so that TV-out works + memregl[0x405c>>2] = memregl[0x445c>>2] = bytes; + memregl[0x4060>>2] = memregl[0x4460>>2] = + bytes * (bpp < 0 ? 240 : 320); r = memregl[0x4058>>2]; r = (r & 0xffff) | (code << 16) | 0x10; memregl[0x4058>>2] = r; + + r = memregl[0x4458>>2]; + r = (r & 0xffff) | (code << 16) | 0x10; + memregl[0x4458>>2] = r; } static void gp2x_video_setpalette_(int *pal, int len) @@ -153,26 +150,6 @@ static void gp2x_video_wait_vsync_(void) memregl[0x308c>>2] |= 1 << 10; } -/* RAM timings */ -static void set_ram_timings_(void) -{ - pollux_set_fromenv("POLLUX_RAM_TIMINGS"); -} - -static void unset_ram_timings_(void) -{ - int i; - - memregs[0x14802>>1] = memtimex_old[0]; - memregs[0x14804>>1] = memtimex_old[1] | 0x8000; - - for (i = 0; i < 0x100000; i++) - if (!(memregs[0x14804>>1] & 0x8000)) - break; - - printf("RAM timings reset to startup values.\n"); -} - void vid_pollux_init(void) { struct fb_fix_screeninfo fbfix; @@ -193,17 +170,17 @@ void vid_pollux_init(void) printf("framebuffer: \"%s\" @ %08lx\n", fbfix.id, fbfix.smem_start); fb_paddr[0] = fbfix.smem_start; - gp2x_screens[0] = mmap(0, 320*240*2*fb_buf_count, PROT_READ|PROT_WRITE, + gp2x_screens[0] = mmap(0, FB_MEM_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, memdev, fb_paddr[0]); if (gp2x_screens[0] == MAP_FAILED) { perror("mmap(gp2x_screens) failed"); exit(1); } - memset(gp2x_screens[0], 0, 320*240*2*fb_buf_count); + memset(gp2x_screens[0], 0, FB_MEM_SIZE); printf(" %p -> %08x\n", gp2x_screens[0], fb_paddr[0]); - for (i = 1; i < fb_buf_count; i++) + for (i = 1; i < FB_BUF_COUNT; i++) { fb_paddr[i] = fb_paddr[i-1] + 320*240*2; gp2x_screens[i] = (char *)gp2x_screens[i-1] + 320*240*2; @@ -212,8 +189,6 @@ void vid_pollux_init(void) fb_work_buf = 0; g_screen_ptr = gp2x_screens[0]; - set_ram_timings_(); - gp2x_video_flip = gp2x_video_flip_; gp2x_video_flip2 = gp2x_video_flip2_; gp2x_video_changemode_ll = gp2x_video_changemode_ll_; @@ -224,9 +199,8 @@ void vid_pollux_init(void) void vid_pollux_finish(void) { - munmap(gp2x_screens[0], 320*240*2 * fb_buf_count); + memset(gp2x_screens[0], 0, FB_MEM_SIZE); + munmap(gp2x_screens[0], FB_MEM_SIZE); close(fbdev); fbdev = -1; - - unset_ram_timings_(); } diff --git a/platform/libpicofe b/platform/libpicofe index 1bc471eb..39014486 160000 --- a/platform/libpicofe +++ b/platform/libpicofe @@ -1 +1 @@ -Subproject commit 1bc471ebf1c85cf78f1862f5596a76f051e7112d +Subproject commit 39014486f9e50110d23dece007ce4c0ed90d15b1 From 5ad7000693ff868383277f7ba4da3567ccbeed04 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 4 Oct 2013 03:50:29 +0300 Subject: [PATCH 0005/1110] sound: remove cd rate limitations this was only there for mp3 and is now causing problems on caanoo due to it's sample rate limitations --- pico/sound/sound.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/pico/sound/sound.c b/pico/sound/sound.c index ec0e2059..b12afc3c 100644 --- a/pico/sound/sound.c +++ b/pico/sound/sound.c @@ -121,15 +121,6 @@ void PsndRerate(int preserve_state) void *state = NULL; int target_fps = Pico.m.pal ? 50 : 60; - // not all rates are supported in MCD mode due to mp3 decoder limitations - if (PicoAHW & PAHW_MCD) { - if (!(11025-100 <= PsndRate && PsndRate <= 11025+100) && - !(22050-100 <= PsndRate && PsndRate <= 22050+100) && - !(44100-100 <= PsndRate && PsndRate <= 44100+100)) - PsndRate = 22050; - PicoOpt |= POPT_EN_STEREO; // force stereo - } - if (preserve_state) { state = malloc(0x204); if (state == NULL) return; From a6523294e28d7e6c119a578eb65b91af8da77f8d Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 4 Oct 2013 23:24:36 +0300 Subject: [PATCH 0006/1110] cd: fix cycle overflow issue --- pico/32x/32x.c | 3 +++ pico/cd/mcd.c | 32 +++++++++++++++++++++++--------- pico/pico.c | 5 ++++- pico/pico_int.h | 1 + 4 files changed, 31 insertions(+), 10 deletions(-) diff --git a/pico/32x/32x.c b/pico/32x/32x.c index b8a84d93..26162e49 100644 --- a/pico/32x/32x.c +++ b/pico/32x/32x.c @@ -547,6 +547,9 @@ void PicoFrame32x(void) p32x_sh2_poll_event(&msh2, SH2_STATE_VPOLL, 0); p32x_sh2_poll_event(&ssh2, SH2_STATE_VPOLL, 0); + if (PicoAHW & PAHW_MCD) + pcd_prepare_frame(); + PicoFrameStart(); PicoFrameHints(); sh2_drc_frame(); diff --git a/pico/cd/mcd.c b/pico/cd/mcd.c index 8451a1c2..24e99e44 100644 --- a/pico/cd/mcd.c +++ b/pico/cd/mcd.c @@ -11,7 +11,9 @@ extern unsigned char formatted_bram[4*0x10]; -static unsigned int m68k_cycle_mult; +static unsigned int mcd_m68k_cycle_mult; +static unsigned int mcd_m68k_cycle_base; +static unsigned int mcd_s68k_cycle_base; void (*PicoMCDopenTray)(void) = NULL; void (*PicoMCDcloseTray)(void) = NULL; @@ -116,14 +118,14 @@ static void pcd_set_cycle_mult(void) { // ~1.63 for NTSC, ~1.645 for PAL if (Pico.m.pal) - m68k_cycle_mult = ((12500000ull << 16) / (50*312*488)); + mcd_m68k_cycle_mult = ((12500000ull << 16) / (50*312*488)); else - m68k_cycle_mult = ((12500000ull << 16) / (60*262*488)) + 1; + mcd_m68k_cycle_mult = ((12500000ull << 16) / (60*262*488)) + 1; } unsigned int pcd_cycles_m68k_to_s68k(unsigned int c) { - return (long long)c * m68k_cycle_mult >> 16; + return (long long)c * mcd_m68k_cycle_mult >> 16; } /* events */ @@ -234,10 +236,13 @@ static void pcd_run_events(unsigned int until) int pcd_sync_s68k(unsigned int m68k_target, int m68k_poll_sync) { #define now SekCycleCntS68k - unsigned int s68k_target = - (unsigned long long)m68k_target * m68k_cycle_mult >> 16; + unsigned int s68k_target; unsigned int target; + target = m68k_target - mcd_m68k_cycle_base; + s68k_target = mcd_s68k_cycle_base + + ((unsigned long long)target * mcd_m68k_cycle_mult >> 16); + elprintf(EL_CD, "s68k sync to %u, %u->%u", m68k_target, now, s68k_target); @@ -307,12 +312,21 @@ void pcd_run_cpus_lockstep(int m68k_cycles) #include "../pico_cmn.c" +void pcd_prepare_frame(void) +{ + pcd_set_cycle_mult(); + + // need this because we can't have direct mapping between + // master<->slave cycle counters because of overflows + mcd_m68k_cycle_base = SekCycleAim; + mcd_s68k_cycle_base = SekCycleAimS68k; +} + PICO_INTERNAL void PicoFrameMCD(void) { - if (!(PicoOpt&POPT_ALT_RENDERER)) - PicoFrameStart(); + PicoFrameStart(); - pcd_set_cycle_mult(); + pcd_prepare_frame(); PicoFrameHints(); } diff --git a/pico/pico.c b/pico/pico.c index 6888080b..8535d486 100644 --- a/pico/pico.c +++ b/pico/pico.c @@ -285,8 +285,11 @@ int z80_scanline_cycles; /* cycles done until z80_scanline */ /* sync z80 to 68k */ PICO_INTERNAL void PicoSyncZ80(unsigned int m68k_cycles_done) { + int m68k_cnt; int cnt; - z80_cycle_aim += cycles_68k_to_z80(m68k_cycles_done - last_z80_sync); + + m68k_cnt = m68k_cycles_done - last_z80_sync; + z80_cycle_aim += cycles_68k_to_z80(m68k_cnt); cnt = z80_cycle_aim - z80_cycle_cnt; last_z80_sync = m68k_cycles_done; diff --git a/pico/pico_int.h b/pico/pico_int.h index 41dc59dc..a8bf7ee4 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -664,6 +664,7 @@ enum pcd_event { extern unsigned int pcd_event_times[PCD_EVENT_COUNT]; void pcd_event_schedule(unsigned int now, enum pcd_event event, int after); void pcd_event_schedule_s68k(enum pcd_event event, int after); +void pcd_prepare_frame(void); unsigned int pcd_cycles_m68k_to_s68k(unsigned int c); int pcd_sync_s68k(unsigned int m68k_target, int m68k_poll_sync); void pcd_run_cpus(int m68k_cycles); From 3f23709ef37c5b3511c1445cbed7b447b56a37e0 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 5 Oct 2013 04:14:45 +0300 Subject: [PATCH 0007/1110] cd: switch to CD controller code from genplus same license, much cleaner code using own dma code though.. --- pico/cd/LC89510.c | 637 --------------------------- pico/cd/LC89510.h | 96 +---- pico/cd/cd_file.c | 2 + pico/cd/cd_sys.c | 188 +++++++- pico/cd/cdc.c | 851 +++++++++++++++++++++++++++++++++++++ pico/cd/genplus_macros.h | 16 + pico/cd/mcd.c | 11 +- pico/cd/memory.c | 8 +- pico/cd/memory_arm.s | 10 +- pico/cd/sek.c | 14 + pico/pico_int.h | 83 ++-- pico/state.c | 94 +++- platform/base_readme.txt | 7 +- platform/common/common.mak | 2 +- 14 files changed, 1212 insertions(+), 807 deletions(-) delete mode 100644 pico/cd/LC89510.c create mode 100644 pico/cd/cdc.c diff --git a/pico/cd/LC89510.c b/pico/cd/LC89510.c deleted file mode 100644 index 74894760..00000000 --- a/pico/cd/LC89510.c +++ /dev/null @@ -1,637 +0,0 @@ -/*********************************************************** - * * - * This source file was taken from the Gens project * - * Written by Stéphane Dallongeville * - * Copyright (c) 2002 by Stéphane Dallongeville * - * Modified/adapted for PicoDrive by notaz, 2007 * - * * - ***********************************************************/ - -#include "../pico_int.h" - -#define CDC_DMA_SPEED 256 - - -static void CDD_Reset(void) -{ - // Reseting CDD - - memset(Pico_mcd->s68k_regs+0x34, 0, 2*2); // CDD.Fader, CDD.Control - Pico_mcd->cdd.Status = 0; - Pico_mcd->cdd.Minute = 0; - Pico_mcd->cdd.Seconde = 0; - Pico_mcd->cdd.Frame = 0; - Pico_mcd->cdd.Ext = 0; - - // clear receive status and transfer command - memset(Pico_mcd->s68k_regs+0x38, 0, 20); - Pico_mcd->s68k_regs[0x38+9] = 0xF; // Default checksum -} - - -static void CDC_Reset(void) -{ - // Reseting CDC - - memset(Pico_mcd->cdc.Buffer, 0, sizeof(Pico_mcd->cdc.Buffer)); - - Pico_mcd->cdc.COMIN = 0; - Pico_mcd->cdc.IFSTAT = 0xFF; - Pico_mcd->cdc.DAC.N = 0; - Pico_mcd->cdc.DBC.N = 0; - Pico_mcd->cdc.HEAD.N = 0x01000000; - Pico_mcd->cdc.PT.N = 0; - Pico_mcd->cdc.WA.N = 2352 * 2; - Pico_mcd->cdc.STAT.N = 0x00000080; - Pico_mcd->cdc.SBOUT = 0; - Pico_mcd->cdc.IFCTRL = 0; - Pico_mcd->cdc.CTRL.N = 0; - - Pico_mcd->cdc.Decode_Reg_Read = 0; - Pico_mcd->scd.Status_CDC &= ~0x08; -} - - -PICO_INTERNAL void LC89510_Reset(void) -{ - CDD_Reset(); - CDC_Reset(); - - // clear DMA_Adr & Stop_Watch - memset(Pico_mcd->s68k_regs + 0xA, 0, 4); -} - - -PICO_INTERNAL void Update_CDC_TRansfer(int which) -{ - unsigned int DMA_Adr, dep, length; - unsigned short *dest; - unsigned char *src; - - if (1) //Pico_mcd->cdc.DBC.N <= (CDC_DMA_SPEED * 2)) - { - length = (Pico_mcd->cdc.DBC.N + 1) >> 1; - Pico_mcd->scd.Status_CDC &= ~0x08; // Last transfer - Pico_mcd->s68k_regs[4] |= 0x80; // End data transfer - Pico_mcd->s68k_regs[4] &= ~0x40; // no more data ready - Pico_mcd->cdc.IFSTAT |= 0x08; // No more data transfer in progress - - if (Pico_mcd->cdc.IFCTRL & 0x40) // DTEIEN = Data Trasnfer End Interrupt Enable ? - { - Pico_mcd->cdc.IFSTAT &= ~0x40; - - if (Pico_mcd->s68k_regs[0x33] & PCDS_IEN5) - { - elprintf(EL_INTS, "cdc DTE irq 5"); - SekInterruptS68k(5); - } - } - } - else length = CDC_DMA_SPEED; - - - // TODO: dst bounds checking? - src = Pico_mcd->cdc.Buffer + Pico_mcd->cdc.DAC.N; - DMA_Adr = (Pico_mcd->s68k_regs[0xA]<<8) | Pico_mcd->s68k_regs[0xB]; - - if (which == 7) // WORD RAM - { - if (Pico_mcd->s68k_regs[3] & 4) - { - // test: Final Fight - int bank = !(Pico_mcd->s68k_regs[3]&1); - dep = ((DMA_Adr & 0x3FFF) << 3); - cdprintf("CD DMA # %04x -> word_ram1M # %06x, len=%i", - Pico_mcd->cdc.DAC.N, dep, length); - - dest = (unsigned short *) (Pico_mcd->word_ram1M[bank] + dep); - - memcpy16bswap(dest, src, length); - - /*{ // debug - unsigned char *b1 = Pico_mcd->word_ram1M[bank] + dep; - unsigned char *b2 = (unsigned char *)(dest+length) - 8; - dprintf("%02x %02x %02x %02x .. %02x %02x %02x %02x", - b1[0], b1[1], b1[4], b1[5], b2[0], b2[1], b2[4], b2[5]); - }*/ - } - else - { - dep = ((DMA_Adr & 0x7FFF) << 3); - cdprintf("CD DMA # %04x -> word_ram2M # %06x, len=%i", - Pico_mcd->cdc.DAC.N, dep, length); - dest = (unsigned short *) (Pico_mcd->word_ram2M + dep); - - memcpy16bswap(dest, src, length); - - /*{ // debug - unsigned char *b1 = Pico_mcd->word_ram2M + dep; - unsigned char *b2 = (unsigned char *)(dest+length) - 4; - dprintf("%02x %02x %02x %02x .. %02x %02x %02x %02x", - b1[0], b1[1], b1[2], b1[3], b2[0], b2[1], b2[2], b2[3]); - }*/ - } - } - else if (which == 4) // PCM RAM (check: popful Mail) - { - dep = (DMA_Adr & 0x03FF) << 2; - cdprintf("CD DMA # %04x -> PCM[%i] # %04x, len=%i", - Pico_mcd->cdc.DAC.N, Pico_mcd->pcm.bank, dep, length); - dest = (unsigned short *) (Pico_mcd->pcm_ram_b[Pico_mcd->pcm.bank] + dep); - - if (Pico_mcd->cdc.DAC.N & 1) /* unaligned src? */ - memcpy(dest, src, length*2); - else memcpy16(dest, (unsigned short *) src, length); - } - else if (which == 5) // PRG RAM - { - dep = DMA_Adr << 3; - dest = (unsigned short *) (Pico_mcd->prg_ram + dep); - cdprintf("CD DMA # %04x -> prg_ram # %06x, len=%i", - Pico_mcd->cdc.DAC.N, dep, length); - - memcpy16bswap(dest, src, length); - - /*{ // debug - unsigned char *b1 = Pico_mcd->prg_ram + dep; - unsigned char *b2 = (unsigned char *)(dest+length) - 4; - dprintf("%02x %02x %02x %02x .. %02x %02x %02x %02x", - b1[0], b1[1], b1[2], b1[3], b2[0], b2[1], b2[2], b2[3]); - }*/ - } - - length <<= 1; - Pico_mcd->cdc.DAC.N = (Pico_mcd->cdc.DAC.N + length) & 0xFFFF; - if (Pico_mcd->scd.Status_CDC & 0x08) Pico_mcd->cdc.DBC.N -= length; - else Pico_mcd->cdc.DBC.N = 0; - - // update DMA_Adr - length >>= 2; - if (which != 4) length >>= 1; - DMA_Adr += length; - Pico_mcd->s68k_regs[0xA] = DMA_Adr >> 8; - Pico_mcd->s68k_regs[0xB] = DMA_Adr; -} - - -PICO_INTERNAL_ASM unsigned short Read_CDC_Host(int is_sub) -{ - int addr; - - if (!(Pico_mcd->scd.Status_CDC & 0x08)) - { - // Transfer data disabled - cdprintf("Read_CDC_Host FIXME: Transfer data disabled"); - return 0; - } - - if ((is_sub && (Pico_mcd->s68k_regs[4] & 7) != 3) || - (!is_sub && (Pico_mcd->s68k_regs[4] & 7) != 2)) - { - // Wrong setting - cdprintf("Read_CDC_Host FIXME: Wrong setting"); - return 0; - } - - Pico_mcd->cdc.DBC.N -= 2; - - if (Pico_mcd->cdc.DBC.N <= 0) - { - Pico_mcd->cdc.DBC.N = 0; - Pico_mcd->scd.Status_CDC &= ~0x08; // Last transfer - Pico_mcd->s68k_regs[4] |= 0x80; // End data transfer - Pico_mcd->s68k_regs[4] &= ~0x40; // no more data ready - Pico_mcd->cdc.IFSTAT |= 0x08; // No more data transfer in progress - - if (Pico_mcd->cdc.IFCTRL & 0x40) // DTEIEN = Data Transfer End Interrupt Enable ? - { - Pico_mcd->cdc.IFSTAT &= ~0x40; - - if (Pico_mcd->s68k_regs[0x33]&(1<<5)) { - elprintf(EL_INTS, "m68k: s68k irq 5"); - SekInterruptS68k(5); - } - - cdprintf("CDC - DTE interrupt"); - } - } - - addr = Pico_mcd->cdc.DAC.N; - Pico_mcd->cdc.DAC.N += 2; - - cdprintf("Read_CDC_Host sub=%i d=%04x dac=%04x dbc=%04x", is_sub, - (Pico_mcd->cdc.Buffer[addr]<<8) | Pico_mcd->cdc.Buffer[addr+1], Pico_mcd->cdc.DAC.N, Pico_mcd->cdc.DBC.N); - - return (Pico_mcd->cdc.Buffer[addr]<<8) | Pico_mcd->cdc.Buffer[addr+1]; -} - - -PICO_INTERNAL void CDC_Update_Header(void) -{ - if (Pico_mcd->cdc.CTRL.B.B1 & 0x01) // Sub-Header wanted ? - { - Pico_mcd->cdc.HEAD.B.B0 = 0; - Pico_mcd->cdc.HEAD.B.B1 = 0; - Pico_mcd->cdc.HEAD.B.B2 = 0; - Pico_mcd->cdc.HEAD.B.B3 = 0; - } - else - { - _msf MSF; - - LBA_to_MSF(Pico_mcd->scd.Cur_LBA, &MSF); - - Pico_mcd->cdc.HEAD.B.B0 = INT_TO_BCDB(MSF.M); - Pico_mcd->cdc.HEAD.B.B1 = INT_TO_BCDB(MSF.S); - Pico_mcd->cdc.HEAD.B.B2 = INT_TO_BCDB(MSF.F); - Pico_mcd->cdc.HEAD.B.B3 = 0x01; - } -} - - -PICO_INTERNAL unsigned char CDC_Read_Reg(void) -{ - unsigned char ret; - - switch(Pico_mcd->s68k_regs[5] & 0xF) - { - case 0x0: // COMIN - cdprintf("CDC read reg 00 = %.2X", Pico_mcd->cdc.COMIN); - - Pico_mcd->s68k_regs[5] = 0x1; - return Pico_mcd->cdc.COMIN; - - case 0x1: // IFSTAT - cdprintf("CDC read reg 01 = %.2X", Pico_mcd->cdc.IFSTAT); - - Pico_mcd->cdc.Decode_Reg_Read |= (1 << 1); // Reg 1 (decoding) - Pico_mcd->s68k_regs[5] = 0x2; - return Pico_mcd->cdc.IFSTAT; - - case 0x2: // DBCL - cdprintf("CDC read reg 02 = %.2X", Pico_mcd->cdc.DBC.B.L); - - Pico_mcd->s68k_regs[5] = 0x3; - return Pico_mcd->cdc.DBC.B.L; - - case 0x3: // DBCH - cdprintf("CDC read reg 03 = %.2X", Pico_mcd->cdc.DBC.B.H); - - Pico_mcd->s68k_regs[5] = 0x4; - return Pico_mcd->cdc.DBC.B.H; - - case 0x4: // HEAD0 - cdprintf("CDC read reg 04 = %.2X", Pico_mcd->cdc.HEAD.B.B0); - - Pico_mcd->cdc.Decode_Reg_Read |= (1 << 4); // Reg 4 (decoding) - Pico_mcd->s68k_regs[5] = 0x5; - return Pico_mcd->cdc.HEAD.B.B0; - - case 0x5: // HEAD1 - cdprintf("CDC read reg 05 = %.2X", Pico_mcd->cdc.HEAD.B.B1); - - Pico_mcd->cdc.Decode_Reg_Read |= (1 << 5); // Reg 5 (decoding) - Pico_mcd->s68k_regs[5] = 0x6; - return Pico_mcd->cdc.HEAD.B.B1; - - case 0x6: // HEAD2 - cdprintf("CDC read reg 06 = %.2X", Pico_mcd->cdc.HEAD.B.B2); - - Pico_mcd->cdc.Decode_Reg_Read |= (1 << 6); // Reg 6 (decoding) - Pico_mcd->s68k_regs[5] = 0x7; - return Pico_mcd->cdc.HEAD.B.B2; - - case 0x7: // HEAD3 - cdprintf("CDC read reg 07 = %.2X", Pico_mcd->cdc.HEAD.B.B3); - - Pico_mcd->cdc.Decode_Reg_Read |= (1 << 7); // Reg 7 (decoding) - Pico_mcd->s68k_regs[5] = 0x8; - return Pico_mcd->cdc.HEAD.B.B3; - - case 0x8: // PTL - cdprintf("CDC read reg 08 = %.2X", Pico_mcd->cdc.PT.B.L); - - Pico_mcd->cdc.Decode_Reg_Read |= (1 << 8); // Reg 8 (decoding) - Pico_mcd->s68k_regs[5] = 0x9; - return Pico_mcd->cdc.PT.B.L; - - case 0x9: // PTH - cdprintf("CDC read reg 09 = %.2X", Pico_mcd->cdc.PT.B.H); - - Pico_mcd->cdc.Decode_Reg_Read |= (1 << 9); // Reg 9 (decoding) - Pico_mcd->s68k_regs[5] = 0xA; - return Pico_mcd->cdc.PT.B.H; - - case 0xA: // WAL - cdprintf("CDC read reg 10 = %.2X", Pico_mcd->cdc.WA.B.L); - - Pico_mcd->s68k_regs[5] = 0xB; - return Pico_mcd->cdc.WA.B.L; - - case 0xB: // WAH - cdprintf("CDC read reg 11 = %.2X", Pico_mcd->cdc.WA.B.H); - - Pico_mcd->s68k_regs[5] = 0xC; - return Pico_mcd->cdc.WA.B.H; - - case 0xC: // STAT0 - cdprintf("CDC read reg 12 = %.2X", Pico_mcd->cdc.STAT.B.B0); - - Pico_mcd->cdc.Decode_Reg_Read |= (1 << 12); // Reg 12 (decoding) - Pico_mcd->s68k_regs[5] = 0xD; - return Pico_mcd->cdc.STAT.B.B0; - - case 0xD: // STAT1 - cdprintf("CDC read reg 13 = %.2X", Pico_mcd->cdc.STAT.B.B1); - - Pico_mcd->cdc.Decode_Reg_Read |= (1 << 13); // Reg 13 (decoding) - Pico_mcd->s68k_regs[5] = 0xE; - return Pico_mcd->cdc.STAT.B.B1; - - case 0xE: // STAT2 - cdprintf("CDC read reg 14 = %.2X", Pico_mcd->cdc.STAT.B.B2); - - Pico_mcd->cdc.Decode_Reg_Read |= (1 << 14); // Reg 14 (decoding) - Pico_mcd->s68k_regs[5] = 0xF; - return Pico_mcd->cdc.STAT.B.B2; - - case 0xF: // STAT3 - cdprintf("CDC read reg 15 = %.2X", Pico_mcd->cdc.STAT.B.B3); - - ret = Pico_mcd->cdc.STAT.B.B3; - Pico_mcd->cdc.IFSTAT |= 0x20; // decoding interrupt flag cleared - if ((Pico_mcd->cdc.CTRL.B.B0 & 0x80) && (Pico_mcd->cdc.IFCTRL & 0x20)) - { - if ((Pico_mcd->cdc.Decode_Reg_Read & 0x73F2) == 0x73F2) - Pico_mcd->cdc.STAT.B.B3 = 0x80; - } - return ret; - } - - return 0; -} - - -PICO_INTERNAL void CDC_Write_Reg(unsigned char Data) -{ - cdprintf("CDC write reg%02d = %.2X", Pico_mcd->s68k_regs[5] & 0xF, Data); - - switch (Pico_mcd->s68k_regs[5] & 0xF) - { - case 0x0: // SBOUT - Pico_mcd->s68k_regs[5] = 0x1; - Pico_mcd->cdc.SBOUT = Data; - - break; - - case 0x1: // IFCTRL - Pico_mcd->s68k_regs[5] = 0x2; - Pico_mcd->cdc.IFCTRL = Data; - - if ((Pico_mcd->cdc.IFCTRL & 0x02) == 0) // Stop data transfer - { - Pico_mcd->cdc.DBC.N = 0; - Pico_mcd->scd.Status_CDC &= ~0x08; - Pico_mcd->cdc.IFSTAT |= 0x08; // No more data transfer in progress - } - break; - - case 0x2: // DBCL - Pico_mcd->s68k_regs[5] = 0x3; - Pico_mcd->cdc.DBC.B.L = Data; - - break; - - case 0x3: // DBCH - Pico_mcd->s68k_regs[5] = 0x4; - Pico_mcd->cdc.DBC.B.H = Data; - - break; - - case 0x4: // DACL - Pico_mcd->s68k_regs[5] = 0x5; - Pico_mcd->cdc.DAC.B.L = Data; - - break; - - case 0x5: // DACH - Pico_mcd->s68k_regs[5] = 0x6; - Pico_mcd->cdc.DAC.B.H = Data; - - break; - - case 0x6: // DTTRG - if (Pico_mcd->cdc.IFCTRL & 0x02) // Data transfer enable ? - { - Pico_mcd->cdc.IFSTAT &= ~0x08; // Data transfer in progress - Pico_mcd->scd.Status_CDC |= 0x08; // Data transfer in progress - Pico_mcd->s68k_regs[4] &= 0x7F; // A data transfer start - - cdprintf("************** Starting Data Transfer ***********"); - cdprintf("RS0 = %.4X DAC = %.4X DBC = %.4X DMA adr = %.4X\n\n", Pico_mcd->s68k_regs[4]<<8, - Pico_mcd->cdc.DAC.N, Pico_mcd->cdc.DBC.N, (Pico_mcd->s68k_regs[0xA]<<8) | Pico_mcd->s68k_regs[0xB]); - - // tmp - { - int ddx = Pico_mcd->s68k_regs[4] & 7; - if (ddx < 2) break; // invalid - if (ddx < 4) { - Pico_mcd->s68k_regs[4] |= 0x40; // Data set ready in host port - break; - } - if (ddx == 6) break; // invalid - - pcd_event_schedule_s68k(PCD_EVENT_DMA, Pico_mcd->cdc.DBC.N / 2); - } - } - break; - - case 0x7: // DTACK - Pico_mcd->cdc.IFSTAT |= 0x40; // end data transfer interrupt flag cleared - break; - - case 0x8: // WAL - Pico_mcd->s68k_regs[5] = 0x9; - Pico_mcd->cdc.WA.B.L = Data; - - break; - - case 0x9: // WAH - Pico_mcd->s68k_regs[5] = 0xA; - Pico_mcd->cdc.WA.B.H = Data; - - break; - - case 0xA: // CTRL0 - Pico_mcd->s68k_regs[5] = 0xB; - Pico_mcd->cdc.CTRL.B.B0 = Data; - - break; - - case 0xB: // CTRL1 - Pico_mcd->s68k_regs[5] = 0xC; - Pico_mcd->cdc.CTRL.B.B1 = Data; - - break; - - case 0xC: // PTL - Pico_mcd->s68k_regs[5] = 0xD; - Pico_mcd->cdc.PT.B.L = Data; - - break; - - case 0xD: // PTH - Pico_mcd->s68k_regs[5] = 0xE; - Pico_mcd->cdc.PT.B.H = Data; - - break; - - case 0xE: // CTRL2 - Pico_mcd->cdc.CTRL.B.B2 = Data; - break; - - case 0xF: // RESET - CDC_Reset(); - break; - } -} - - -static int bswapwrite(int a, unsigned short d) -{ - *(unsigned short *)(Pico_mcd->s68k_regs + a) = (d>>8)|(d<<8); - return d + (d >> 8); -} - -PICO_INTERNAL void CDD_Export_Status(void) -{ - unsigned int csum; - - csum = bswapwrite( 0x38+0, Pico_mcd->cdd.Status); - csum += bswapwrite( 0x38+2, Pico_mcd->cdd.Minute); - csum += bswapwrite( 0x38+4, Pico_mcd->cdd.Seconde); - csum += bswapwrite( 0x38+6, Pico_mcd->cdd.Frame); - Pico_mcd->s68k_regs[0x38+8] = Pico_mcd->cdd.Ext; - csum += Pico_mcd->cdd.Ext; - Pico_mcd->s68k_regs[0x38+9] = ~csum & 0xf; - - Pico_mcd->s68k_regs[0x37] &= 3; // CDD.Control - - if (Pico_mcd->s68k_regs[0x33] & PCDS_IEN4) - { - elprintf(EL_INTS, "cdd export irq 4"); - SekInterruptS68k(4); - } - -// cdprintf("CDD exported status\n"); - cdprintf("out: Status=%.4X, Minute=%.4X, Second=%.4X, Frame=%.4X Checksum=%.4X", - (Pico_mcd->s68k_regs[0x38+0] << 8) | Pico_mcd->s68k_regs[0x38+1], - (Pico_mcd->s68k_regs[0x38+2] << 8) | Pico_mcd->s68k_regs[0x38+3], - (Pico_mcd->s68k_regs[0x38+4] << 8) | Pico_mcd->s68k_regs[0x38+5], - (Pico_mcd->s68k_regs[0x38+6] << 8) | Pico_mcd->s68k_regs[0x38+7], - (Pico_mcd->s68k_regs[0x38+8] << 8) | Pico_mcd->s68k_regs[0x38+9]); -} - - -PICO_INTERNAL void CDD_Import_Command(void) -{ -// cdprintf("CDD importing command\n"); - cdprintf("in: Command=%.4X, Minute=%.4X, Second=%.4X, Frame=%.4X Checksum=%.4X", - (Pico_mcd->s68k_regs[0x38+10+0] << 8) | Pico_mcd->s68k_regs[0x38+10+1], - (Pico_mcd->s68k_regs[0x38+10+2] << 8) | Pico_mcd->s68k_regs[0x38+10+3], - (Pico_mcd->s68k_regs[0x38+10+4] << 8) | Pico_mcd->s68k_regs[0x38+10+5], - (Pico_mcd->s68k_regs[0x38+10+6] << 8) | Pico_mcd->s68k_regs[0x38+10+7], - (Pico_mcd->s68k_regs[0x38+10+8] << 8) | Pico_mcd->s68k_regs[0x38+10+9]); - - switch (Pico_mcd->s68k_regs[0x38+10+0]) - { - case 0x0: // STATUS (?) - Get_Status_CDD_c0(); - break; - - case 0x1: // STOP ALL (?) - Stop_CDD_c1(); - break; - - case 0x2: // GET TOC INFORMATIONS - switch(Pico_mcd->s68k_regs[0x38+10+3]) - { - case 0x0: // get current position (MSF format) - Pico_mcd->cdd.Status = (Pico_mcd->cdd.Status & 0xFF00); - Get_Pos_CDD_c20(); - break; - - case 0x1: // get elapsed time of current track played/scanned (relative MSF format) - Pico_mcd->cdd.Status = (Pico_mcd->cdd.Status & 0xFF00) | 1; - Get_Track_Pos_CDD_c21(); - break; - - case 0x2: // get current track in RS2-RS3 - Pico_mcd->cdd.Status = (Pico_mcd->cdd.Status & 0xFF00) | 2; - Get_Current_Track_CDD_c22(); - break; - - case 0x3: // get total length (MSF format) - Pico_mcd->cdd.Status = (Pico_mcd->cdd.Status & 0xFF00) | 3; - Get_Total_Lenght_CDD_c23(); - break; - - case 0x4: // first & last track number - Pico_mcd->cdd.Status = (Pico_mcd->cdd.Status & 0xFF00) | 4; - Get_First_Last_Track_CDD_c24(); - break; - - case 0x5: // get track addresse (MSF format) - Pico_mcd->cdd.Status = (Pico_mcd->cdd.Status & 0xFF00) | 5; - Get_Track_Adr_CDD_c25(); - break; - - default : // invalid, then we return status - Pico_mcd->cdd.Status = (Pico_mcd->cdd.Status & 0xFF00) | 0xF; - Get_Status_CDD_c0(); - break; - } - break; - - case 0x3: // READ - Play_CDD_c3(); - break; - - case 0x4: // SEEK - Seek_CDD_c4(); - break; - - case 0x6: // PAUSE/STOP - Pause_CDD_c6(); - break; - - case 0x7: // RESUME - Resume_CDD_c7(); - break; - - case 0x8: // FAST FOWARD - Fast_Foward_CDD_c8(); - break; - - case 0x9: // FAST REWIND - Fast_Rewind_CDD_c9(); - break; - - case 0xA: // RECOVER INITIAL STATE (?) - CDD_cA(); - break; - - case 0xC: // CLOSE TRAY - Close_Tray_CDD_cC(); - break; - - case 0xD: // OPEN TRAY - Open_Tray_CDD_cD(); - break; - - default: // UNKNOWN - CDD_Def(); - break; - } -} - diff --git a/pico/cd/LC89510.h b/pico/cd/LC89510.h index 2b0d3826..d641ebb3 100644 --- a/pico/cd/LC89510.h +++ b/pico/cd/LC89510.h @@ -14,92 +14,6 @@ extern "C" { #endif -typedef struct -{ - unsigned char Buffer[(32 * 1024 * 2) + 2352]; -// unsigned int Host_Data; // unused -// unsigned int DMA_Adr; // 0A -// unsigned int Stop_Watch; // 0C - unsigned int COMIN; - unsigned int IFSTAT; - union - { - struct - { - unsigned char L; - unsigned char H; - unsigned short unused; - } B; - int N; - } DBC; - union - { - struct - { - unsigned char L; - unsigned char H; - unsigned short unused; - } B; - int N; - } DAC; - union - { - struct - { - unsigned char B0; - unsigned char B1; - unsigned char B2; - unsigned char B3; - } B; - unsigned int N; - } HEAD; - union - { - struct - { - unsigned char L; - unsigned char H; - unsigned short unused; - } B; - int N; - } PT; - union - { - struct - { - unsigned char L; - unsigned char H; - unsigned short unused; - } B; - int N; - } WA; - union - { - struct - { - unsigned char B0; - unsigned char B1; - unsigned char B2; - unsigned char B3; - } B; - unsigned int N; - } STAT; - unsigned int SBOUT; - unsigned int IFCTRL; - union - { - struct - { - unsigned char B0; - unsigned char B1; - unsigned char B2; - unsigned char B3; - } B; - unsigned int N; - } CTRL; - unsigned int Decode_Reg_Read; -} CDC; - typedef struct { // unsigned short Fader; // 34 @@ -116,17 +30,11 @@ typedef struct } CDD; -PICO_INTERNAL_ASM unsigned short Read_CDC_Host(int is_sub); -PICO_INTERNAL void LC89510_Reset(void); -PICO_INTERNAL void Update_CDC_TRansfer(int which); -PICO_INTERNAL void CDC_Update_Header(void); - -PICO_INTERNAL unsigned char CDC_Read_Reg(void); -PICO_INTERNAL void CDC_Write_Reg(unsigned char Data); - PICO_INTERNAL void CDD_Export_Status(void); PICO_INTERNAL void CDD_Import_Command(void); +void CDD_Reset(void); + #ifdef __cplusplus }; #endif diff --git a/pico/cd/cd_file.c b/pico/cd/cd_file.c index 0f19b71d..43bbb5d8 100644 --- a/pico/cd/cd_file.c +++ b/pico/cd/cd_file.c @@ -282,6 +282,7 @@ PICO_INTERNAL void Unload_ISO(void) memset(Pico_mcd->TOC.Tracks, 0, sizeof(Pico_mcd->TOC.Tracks)); } +#if 1*0 PICO_INTERNAL int FILE_Read_One_LBA_CDC(void) { @@ -398,3 +399,4 @@ PICO_INTERNAL int FILE_Read_One_LBA_CDC(void) return 0; } +#endif diff --git a/pico/cd/cd_sys.c b/pico/cd/cd_sys.c index 1c19057f..f7cd7b5e 100644 --- a/pico/cd/cd_sys.c +++ b/pico/cd/cd_sys.c @@ -23,6 +23,10 @@ #define FAST_REV 0x10300 // FAST REVERSE track CDD status #define PLAYING 0x0100 // PLAYING audio track CDD status +//#undef cdprintf +//#define cdprintf(x, ...) elprintf(EL_STATUS, x, ##__VA_ARGS__) + +#define CDC_Update_Header() static int CD_Present = 0; @@ -139,15 +143,32 @@ PICO_INTERNAL void Check_CD_Command(void) cdprintf("Got a read command"); // DATA ? - if (Pico_mcd->scd.Cur_Track == 1) + if (Pico_mcd->scd.Cur_Track == 1) { Pico_mcd->s68k_regs[0x36] |= 0x01; - else Pico_mcd->s68k_regs[0x36] &= ~0x01; // AUDIO - if (Pico_mcd->scd.File_Add_Delay == 0) - { - FILE_Read_One_LBA_CDC(); + if (Pico_mcd->scd.File_Add_Delay == 0) + { + unsigned char header[4]; + _msf MSF; + + LBA_to_MSF(Pico_mcd->scd.Cur_LBA, &MSF); + + header[0] = INT_TO_BCDB(MSF.M); + header[1] = INT_TO_BCDB(MSF.S); + header[2] = INT_TO_BCDB(MSF.F); + header[3] = 0x01; + + //FILE_Read_One_LBA_CDC(); + Pico_mcd->scd.Cur_LBA += + cdc_decoder_update(header); + } + else Pico_mcd->scd.File_Add_Delay--; + } + else { + Pico_mcd->s68k_regs[0x36] &= ~0x01; // AUDIO + unsigned char header[4] = { 0, }; + cdc_decoder_update(header); } - else Pico_mcd->scd.File_Add_Delay--; } // Check CDD @@ -755,3 +776,158 @@ PICO_INTERNAL int CDD_Def(void) } +static int bswapwrite(int a, unsigned short d) +{ + *(unsigned short *)(Pico_mcd->s68k_regs + a) = (d>>8)|(d<<8); + return d + (d >> 8); +} + +PICO_INTERNAL void CDD_Export_Status(void) +{ + unsigned int csum; + + csum = bswapwrite( 0x38+0, Pico_mcd->cdd.Status); + csum += bswapwrite( 0x38+2, Pico_mcd->cdd.Minute); + csum += bswapwrite( 0x38+4, Pico_mcd->cdd.Seconde); + csum += bswapwrite( 0x38+6, Pico_mcd->cdd.Frame); + Pico_mcd->s68k_regs[0x38+8] = Pico_mcd->cdd.Ext; + csum += Pico_mcd->cdd.Ext; + Pico_mcd->s68k_regs[0x38+9] = ~csum & 0xf; + + Pico_mcd->s68k_regs[0x37] &= 3; // CDD.Control + + if (Pico_mcd->s68k_regs[0x33] & PCDS_IEN4) + { + elprintf(EL_INTS, "cdd export irq 4"); + SekInterruptS68k(4); + } + +// cdprintf("CDD exported status\n"); + cdprintf("out: Status=%.4X, Minute=%.4X, Second=%.4X, Frame=%.4X Checksum=%.4X", + (Pico_mcd->s68k_regs[0x38+0] << 8) | Pico_mcd->s68k_regs[0x38+1], + (Pico_mcd->s68k_regs[0x38+2] << 8) | Pico_mcd->s68k_regs[0x38+3], + (Pico_mcd->s68k_regs[0x38+4] << 8) | Pico_mcd->s68k_regs[0x38+5], + (Pico_mcd->s68k_regs[0x38+6] << 8) | Pico_mcd->s68k_regs[0x38+7], + (Pico_mcd->s68k_regs[0x38+8] << 8) | Pico_mcd->s68k_regs[0x38+9]); +} + + +PICO_INTERNAL void CDD_Import_Command(void) +{ +// cdprintf("CDD importing command\n"); + cdprintf("in: Command=%.4X, Minute=%.4X, Second=%.4X, Frame=%.4X Checksum=%.4X", + (Pico_mcd->s68k_regs[0x38+10+0] << 8) | Pico_mcd->s68k_regs[0x38+10+1], + (Pico_mcd->s68k_regs[0x38+10+2] << 8) | Pico_mcd->s68k_regs[0x38+10+3], + (Pico_mcd->s68k_regs[0x38+10+4] << 8) | Pico_mcd->s68k_regs[0x38+10+5], + (Pico_mcd->s68k_regs[0x38+10+6] << 8) | Pico_mcd->s68k_regs[0x38+10+7], + (Pico_mcd->s68k_regs[0x38+10+8] << 8) | Pico_mcd->s68k_regs[0x38+10+9]); + + switch (Pico_mcd->s68k_regs[0x38+10+0]) + { + case 0x0: // STATUS (?) + Get_Status_CDD_c0(); + break; + + case 0x1: // STOP ALL (?) + Stop_CDD_c1(); + break; + + case 0x2: // GET TOC INFORMATIONS + switch(Pico_mcd->s68k_regs[0x38+10+3]) + { + case 0x0: // get current position (MSF format) + Pico_mcd->cdd.Status = (Pico_mcd->cdd.Status & 0xFF00); + Get_Pos_CDD_c20(); + break; + + case 0x1: // get elapsed time of current track played/scanned (relative MSF format) + Pico_mcd->cdd.Status = (Pico_mcd->cdd.Status & 0xFF00) | 1; + Get_Track_Pos_CDD_c21(); + break; + + case 0x2: // get current track in RS2-RS3 + Pico_mcd->cdd.Status = (Pico_mcd->cdd.Status & 0xFF00) | 2; + Get_Current_Track_CDD_c22(); + break; + + case 0x3: // get total length (MSF format) + Pico_mcd->cdd.Status = (Pico_mcd->cdd.Status & 0xFF00) | 3; + Get_Total_Lenght_CDD_c23(); + break; + + case 0x4: // first & last track number + Pico_mcd->cdd.Status = (Pico_mcd->cdd.Status & 0xFF00) | 4; + Get_First_Last_Track_CDD_c24(); + break; + + case 0x5: // get track addresse (MSF format) + Pico_mcd->cdd.Status = (Pico_mcd->cdd.Status & 0xFF00) | 5; + Get_Track_Adr_CDD_c25(); + break; + + default : // invalid, then we return status + Pico_mcd->cdd.Status = (Pico_mcd->cdd.Status & 0xFF00) | 0xF; + Get_Status_CDD_c0(); + break; + } + break; + + case 0x3: // READ + Play_CDD_c3(); + break; + + case 0x4: // SEEK + Seek_CDD_c4(); + break; + + case 0x6: // PAUSE/STOP + Pause_CDD_c6(); + break; + + case 0x7: // RESUME + Resume_CDD_c7(); + break; + + case 0x8: // FAST FOWARD + Fast_Foward_CDD_c8(); + break; + + case 0x9: // FAST REWIND + Fast_Rewind_CDD_c9(); + break; + + case 0xA: // RECOVER INITIAL STATE (?) + CDD_cA(); + break; + + case 0xC: // CLOSE TRAY + Close_Tray_CDD_cC(); + break; + + case 0xD: // OPEN TRAY + Open_Tray_CDD_cD(); + break; + + default: // UNKNOWN + CDD_Def(); + break; + } +} + +void CDD_Reset(void) +{ + // Reseting CDD + + memset(Pico_mcd->s68k_regs+0x34, 0, 2*2); // CDD.Fader, CDD.Control + Pico_mcd->cdd.Status = 0; + Pico_mcd->cdd.Minute = 0; + Pico_mcd->cdd.Seconde = 0; + Pico_mcd->cdd.Frame = 0; + Pico_mcd->cdd.Ext = 0; + + // clear receive status and transfer command + memset(Pico_mcd->s68k_regs+0x38, 0, 20); + Pico_mcd->s68k_regs[0x38+9] = 0xF; // Default checksum +} + + diff --git a/pico/cd/cdc.c b/pico/cd/cdc.c new file mode 100644 index 00000000..aa1ded97 --- /dev/null +++ b/pico/cd/cdc.c @@ -0,0 +1,851 @@ +/*************************************************************************************** + * Genesis Plus + * CD data controller (LC89510 compatible) + * + * Copyright (C) 2012 Eke-Eke (Genesis Plus GX) + * + * Redistribution and use of this code or any derivative works are permitted + * provided that the following conditions are met: + * + * - Redistributions may not be sold, nor may they be used in a commercial + * product or activity. + * + * - Redistributions that are modified from the original source must include the + * complete source code, including the source code for all components used by a + * binary built from the modified sources. However, as a special exception, the + * source code distributed need not include anything that is normally distributed + * (in either source or binary form) with the major components (compiler, kernel, + * and so on) of the operating system on which the executable runs, unless that + * component itself accompanies the executable. + * + * - Redistributions must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other + * materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + ****************************************************************************************/ + +#include "../pico_int.h" +#include "genplus_macros.h" + +/* IFSTAT register bitmasks */ +#define BIT_DTEI 0x40 +#define BIT_DECI 0x20 +#define BIT_DTBSY 0x08 +#define BIT_DTEN 0x02 + +/* IFCTRL register bitmasks */ +#define BIT_DTEIEN 0x40 +#define BIT_DECIEN 0x20 +#define BIT_DOUTEN 0x02 + +/* CTRL0 register bitmasks */ +#define BIT_DECEN 0x80 +#define BIT_E01RQ 0x20 +#define BIT_AUTORQ 0x10 +#define BIT_WRRQ 0x04 + +/* CTRL1 register bitmasks */ +#define BIT_MODRQ 0x08 +#define BIT_FORMRQ 0x04 +#define BIT_SHDREN 0x01 + +/* CTRL2 register bitmask */ +#define BIT_VALST 0x80 + +/* PicoDrive: doing DMA at once, not using callbacks */ +//#define DMA_BYTES_PER_LINE 512 + +enum dma_type { + word_ram_0_dma_w = 1, + word_ram_1_dma_w = 2, + word_ram_2M_dma_w = 3, + pcm_ram_dma_w = 4, + prg_ram_dma_w = 5, +}; + +/* CDC hardware */ +typedef struct +{ + uint8 ifstat; + uint8 ifctrl; + reg16_t dbc; + reg16_t dac; + reg16_t pt; + reg16_t wa; + uint8 ctrl[2]; + uint8 head[2][4]; + uint8 stat[4]; + int cycles; + //void (*dma_w)(unsigned int words); + int dma_w; + uint8 ram[0x4000 + 2352]; /* 16K external RAM (with one block overhead to handle buffer overrun) */ +} cdc_t; + +static cdc_t cdc; + +void cdc_init(void) +{ + memset(&cdc, 0, sizeof(cdc_t)); +} + +void cdc_reset(void) +{ + /* reset CDC register index */ + Pico_mcd->regs[0x04>>1].byte.l = 0x00; + + /* reset CDC registers */ + cdc.ifstat = 0xff; + cdc.ifctrl = 0x00; + cdc.ctrl[0] = 0x00; + cdc.ctrl[1] = 0x00; + cdc.stat[0] = 0x00; + cdc.stat[1] = 0x00; + cdc.stat[2] = 0x00; + cdc.stat[3] = 0x80; + cdc.head[0][0] = 0x00; + cdc.head[0][1] = 0x00; + cdc.head[0][2] = 0x00; + cdc.head[0][3] = 0x01; + cdc.head[1][0] = 0x00; + cdc.head[1][1] = 0x00; + cdc.head[1][2] = 0x00; + cdc.head[1][3] = 0x00; + + /* reset CDC cycle counter */ + cdc.cycles = 0; + + /* DMA transfer disabled */ + cdc.dma_w = 0; +} + +int cdc_context_save(uint8 *state) +{ + uint8 tmp8; + int bufferptr = 0; + + if (cdc.dma_w == pcm_ram_dma_w) + { + tmp8 = 1; + } + else if (cdc.dma_w == prg_ram_dma_w) + { + tmp8 = 2; + } + else if (cdc.dma_w == word_ram_0_dma_w) + { + tmp8 = 3; + } + else if (cdc.dma_w == word_ram_1_dma_w) + { + tmp8 = 4; + } + else if (cdc.dma_w == word_ram_2M_dma_w) + { + tmp8 = 5; + } + else + { + tmp8 = 0; + } + + save_param(&cdc, sizeof(cdc)); + save_param(&tmp8, 1); + + return bufferptr; +} + +int cdc_context_load(uint8 *state) +{ + uint8 tmp8; + int bufferptr = 0; + + load_param(&cdc, sizeof(cdc)); + load_param(&tmp8, 1); + + switch (tmp8) + { + case 1: + cdc.dma_w = pcm_ram_dma_w; + break; + case 2: + cdc.dma_w = prg_ram_dma_w; + break; + case 3: + cdc.dma_w = word_ram_0_dma_w; + break; + case 4: + cdc.dma_w = word_ram_1_dma_w; + break; + case 5: + cdc.dma_w = word_ram_2M_dma_w; + break; + default: + cdc.dma_w = 0; + break; + } + + return bufferptr; +} + +int cdc_context_load_old(uint8 *state) +{ +#define old_load(v, ofs) \ + memcpy(&cdc.v, state + ofs, sizeof(cdc.v)) + + memcpy(cdc.ram, state, 0x4000); + old_load(ifstat, 67892); + old_load(ifctrl, 67924); + old_load(dbc, 67896); + old_load(dac, 67900); + old_load(pt, 67908); + old_load(wa, 67912); + old_load(ctrl, 67928); + old_load(head[0], 67904); + old_load(stat, 67916); + + cdc.dma_w = 0; + switch (Pico_mcd->regs[0x04>>1].byte.h & 0x07) + { + case 4: /* PCM RAM DMA */ + cdc.dma_w = pcm_ram_dma_w; + break; + case 5: /* PRG-RAM DMA */ + cdc.dma_w = prg_ram_dma_w; + break; + case 7: /* WORD-RAM DMA */ + if (Pico_mcd->regs[0x02 >> 1].byte.l & 0x04) + { + if (Pico_mcd->regs[0x02 >> 1].byte.l & 0x01) + cdc.dma_w = word_ram_0_dma_w; + else + cdc.dma_w = word_ram_1_dma_w; + } + else + { + if (Pico_mcd->regs[0x02 >> 1].byte.l & 0x02) + cdc.dma_w = word_ram_2M_dma_w; + } + break; + } + + return 0x10960; // sizeof(old_cdc) +#undef old_load +} + +static void do_dma(enum dma_type type, int words_in) +{ + int dma_addr = (Pico_mcd->s68k_regs[0x0a] << 8) | Pico_mcd->s68k_regs[0x0b]; + int src_addr = cdc.dac.w & 0x3ffe; + int dst_addr = dma_addr; + int words = words_in; + int dst_limit = 0; + uint8 *dst; + int len; + + elprintf(EL_CD, "dma %d %04x->%04x %x", + type, cdc.dac.w, dst_addr, words_in); + + switch (type) + { + case pcm_ram_dma_w: + dst_addr = (dst_addr << 2) & 0xffc; + if (dst_addr + words * 2 > 0x1000) { + elprintf(EL_ANOMALY, "pcm dma oflow: %x %x", dst_addr, words); + words = (0x1000 - dst_addr) / 2; + } + dst = Pico_mcd->pcm_ram_b[Pico_mcd->pcm.bank]; + dst = dst + dst_addr; + while (words > 0) + { + if (src_addr + words * 2 > 0x4000) { + len = 0x4000 - src_addr; + memcpy(dst, cdc.ram + src_addr, len); + dst += len; + src_addr = 0; + words -= len / 2; + continue; + } + memcpy(dst, cdc.ram + src_addr, words * 2); + break; + } + goto update_dma; + + case prg_ram_dma_w: + dst_addr <<= 3; + dst = Pico_mcd->prg_ram + dst_addr; + dst_limit = 0x80000; + break; + + case word_ram_0_dma_w: + dst_addr = (dst_addr << 3) & 0x1fffe; + dst = Pico_mcd->word_ram1M[0] + dst_addr; + dst_limit = 0x20000; + break; + + case word_ram_1_dma_w: + dst_addr = (dst_addr << 3) & 0x1fffe; + dst = Pico_mcd->word_ram1M[1] + dst_addr; + dst_limit = 0x20000; + break; + + case word_ram_2M_dma_w: + dst_addr = (dst_addr << 3) & 0x3fffe; + dst = Pico_mcd->word_ram2M + dst_addr; + dst_limit = 0x40000; + break; + + default: + elprintf(EL_ANOMALY, "invalid dma: %d", type); + goto update_dma; + } + + if (dst_addr + words * 2 > dst_limit) { + elprintf(EL_ANOMALY, "cd dma %d oflow: %x %x", type, dst_addr, words); + words = (dst_limit - dst_addr) / 2; + } + while (words > 0) + { + if (src_addr + words * 2 > 0x4000) { + len = 0x4000 - src_addr; + memcpy16bswap((void *)dst, cdc.ram + src_addr, len / 2); + dst += len; + src_addr = 0; + words -= len / 2; + continue; + } + memcpy16bswap((void *)dst, cdc.ram + src_addr, words); + break; + } + +update_dma: + /* update DMA addresses */ + cdc.dac.w += words_in * 2; + if (type == pcm_ram_dma_w) + dma_addr += words_in >> 1; + else + dma_addr += words_in >> 2; + + Pico_mcd->s68k_regs[0x0a] = dma_addr >> 8; + Pico_mcd->s68k_regs[0x0b] = dma_addr; +} + +// tmp +static void cdd_read_data(uint8 *dst) +{ + int lba = Pico_mcd->scd.Cur_LBA; + + /* only read DATA track sectors */ + if (0 <= lba && lba < Pico_mcd->TOC.Tracks[0].Length) + { + /* read sector data (Mode 1 = 2048 bytes) */ + PicoCDBufferRead(dst, lba); + } +} + +void cdc_dma_update(void) +{ + /* end of DMA transfer ? */ + //if (cdc.dbc.w < DMA_BYTES_PER_LINE) + { + /* transfer remaining words using 16-bit DMA */ + //cdc.dma_w((cdc.dbc.w + 1) >> 1); + do_dma(cdc.dma_w, (cdc.dbc.w + 1) >> 1); + + /* reset data byte counter (DBCH bits 4-7 should be set to 1) */ + cdc.dbc.w = 0xf000; + + /* clear !DTEN and !DTBSY */ + cdc.ifstat |= (BIT_DTBSY | BIT_DTEN); + + /* pending Data Transfer End interrupt */ + cdc.ifstat &= ~BIT_DTEI; + + /* Data Transfer End interrupt enabled ? */ + if (cdc.ifctrl & BIT_DTEIEN) + { + /* level 5 interrupt enabled ? */ + if (Pico_mcd->regs[0x32>>1].byte.l & PCDS_IEN5) + { + /* update IRQ level */ + elprintf(EL_INTS, "cdc DTE irq 5"); + SekInterruptS68k(5); + } + } + + /* clear DSR bit & set EDT bit (SCD register $04) */ + Pico_mcd->regs[0x04>>1].byte.h = (Pico_mcd->regs[0x04>>1].byte.h & 0x07) | 0x80; + + /* disable DMA transfer */ + cdc.dma_w = 0; + } +#if 0 + else + { + /* transfer all words using 16-bit DMA */ + cdc.dma_w(DMA_BYTES_PER_LINE >> 1); + + /* decrement data byte counter */ + cdc.dbc.w -= length; + } +#endif +} + +int cdc_decoder_update(uint8 header[4]) +{ + /* data decoding enabled ? */ + if (cdc.ctrl[0] & BIT_DECEN) + { + /* update HEAD registers */ + memcpy(cdc.head[0], header, sizeof(cdc.head[0])); + + /* set !VALST */ + cdc.stat[3] = 0x00; + + /* pending decoder interrupt */ + cdc.ifstat &= ~BIT_DECI; + + /* decoder interrupt enabled ? */ + if (cdc.ifctrl & BIT_DECIEN) + { + /* level 5 interrupt enabled ? */ + if (Pico_mcd->regs[0x32>>1].byte.l & PCDS_IEN5) + { + /* update IRQ level */ + elprintf(EL_INTS, "cdc DEC irq 5"); + SekInterruptS68k(5); + } + } + + /* buffer RAM write enabled ? */ + if (cdc.ctrl[0] & BIT_WRRQ) + { + uint16 offset; + + /* increment block pointer */ + cdc.pt.w += 2352; + + /* increment write address */ + cdc.wa.w += 2352; + + /* CDC buffer address */ + offset = cdc.pt.w & 0x3fff; + + /* write CDD block header (4 bytes) */ + memcpy(cdc.ram + offset, header, 4); + + /* write CDD block data (2048 bytes) */ + cdd_read_data(cdc.ram + 4 + offset); + + /* take care of buffer overrun */ + if (offset > (0x4000 - 2048 - 4)) + { + /* data should be written at the start of buffer */ + memcpy(cdc.ram, cdc.ram + 0x4000, offset + 2048 + 4 - 0x4000); + } + + /* read next data block */ + return 1; + } + } + + /* keep decoding same data block if Buffer Write is disabled */ + return 0; +} + +void cdc_reg_w(unsigned char data) +{ +#ifdef LOG_CDC + elprintf(EL_STATUS, "CDC register %X write 0x%04x", Pico_mcd->regs[0x04>>1].byte.l & 0x0F, data); +#endif + switch (Pico_mcd->regs[0x04>>1].byte.l & 0x0F) + { + case 0x01: /* IFCTRL */ + { + /* pending interrupts ? */ + if (((data & BIT_DTEIEN) && !(cdc.ifstat & BIT_DTEI)) || + ((data & BIT_DECIEN) && !(cdc.ifstat & BIT_DECI))) + { + /* level 5 interrupt enabled ? */ + if (Pico_mcd->regs[0x32>>1].byte.l & PCDS_IEN5) + { + /* update IRQ level */ + elprintf(EL_INTS, "cdc pending irq 5"); + SekInterruptS68k(5); + } + } + else // if (scd.pending & (1 << 5)) + { + /* clear pending level 5 interrupts */ + SekInterruptClearS68k(5); + } + + /* abort any data transfer if data output is disabled */ + if (!(data & BIT_DOUTEN)) + { + /* clear !DTBSY and !DTEN */ + cdc.ifstat |= (BIT_DTBSY | BIT_DTEN); + } + + cdc.ifctrl = data; + Pico_mcd->regs[0x04>>1].byte.l = 0x02; + break; + } + + case 0x02: /* DBCL */ + cdc.dbc.byte.l = data; + Pico_mcd->regs[0x04>>1].byte.l = 0x03; + break; + + case 0x03: /* DBCH */ + cdc.dbc.byte.h = data; + Pico_mcd->regs[0x04>>1].byte.l = 0x04; + break; + + case 0x04: /* DACL */ + cdc.dac.byte.l = data; + Pico_mcd->regs[0x04>>1].byte.l = 0x05; + break; + + case 0x05: /* DACH */ + cdc.dac.byte.h = data; + Pico_mcd->regs[0x04>>1].byte.l = 0x06; + break; + + case 0x06: /* DTRG */ + { + /* start data transfer if data output is enabled */ + if (cdc.ifctrl & BIT_DOUTEN) + { + /* set !DTBSY */ + cdc.ifstat &= ~BIT_DTBSY; + + /* clear DBCH bits 4-7 */ + cdc.dbc.byte.h &= 0x0f; + + /* clear EDT & DSR bits (SCD register $04) */ + Pico_mcd->regs[0x04>>1].byte.h &= 0x07; + + cdc.dma_w = 0; + + /* setup data transfer destination */ + switch (Pico_mcd->regs[0x04>>1].byte.h & 0x07) + { + case 2: /* MAIN-CPU host read */ + case 3: /* SUB-CPU host read */ + { + /* set !DTEN */ + cdc.ifstat &= ~BIT_DTEN; + + /* set DSR bit (register $04) */ + Pico_mcd->regs[0x04>>1].byte.h |= 0x40; + break; + } + + case 4: /* PCM RAM DMA */ + { + cdc.dma_w = pcm_ram_dma_w; + break; + } + + case 5: /* PRG-RAM DMA */ + { + cdc.dma_w = prg_ram_dma_w; + break; + } + + case 7: /* WORD-RAM DMA */ + { + /* check memory mode */ + if (Pico_mcd->regs[0x02 >> 1].byte.l & 0x04) + { + /* 1M mode */ + if (Pico_mcd->regs[0x02 >> 1].byte.l & 0x01) + { + /* Word-RAM bank 0 is assigned to SUB-CPU */ + cdc.dma_w = word_ram_0_dma_w; + } + else + { + /* Word-RAM bank 1 is assigned to SUB-CPU */ + cdc.dma_w = word_ram_1_dma_w; + } + } + else + { + /* 2M mode */ + if (Pico_mcd->regs[0x02 >> 1].byte.l & 0x02) + { + /* only process DMA if Word-RAM is assigned to SUB-CPU */ + cdc.dma_w = word_ram_2M_dma_w; + } + } + break; + } + + default: /* invalid */ + { + elprintf(EL_ANOMALY, "invalid CDC tranfer destination (%d)", + Pico_mcd->regs[0x04>>1].byte.h & 0x07); + break; + } + } + + if (cdc.dma_w) + pcd_event_schedule_s68k(PCD_EVENT_DMA, cdc.dbc.w / 2); + } + + Pico_mcd->regs[0x04>>1].byte.l = 0x07; + break; + } + + case 0x07: /* DTACK */ + { + /* clear pending data transfer end interrupt */ + cdc.ifstat |= BIT_DTEI; + + /* clear DBCH bits 4-7 */ + cdc.dbc.byte.h &= 0x0f; + +#if 0 + /* no pending decoder interrupt ? */ + if ((cdc.ifstat | BIT_DECI) || !(cdc.ifctrl & BIT_DECIEN)) + { + /* clear pending level 5 interrupt */ + SekInterruptClearS68k(5); + } +#endif + Pico_mcd->regs[0x04>>1].byte.l = 0x08; + break; + } + + case 0x08: /* WAL */ + cdc.wa.byte.l = data; + Pico_mcd->regs[0x04>>1].byte.l = 0x09; + break; + + case 0x09: /* WAH */ + cdc.wa.byte.h = data; + Pico_mcd->regs[0x04>>1].byte.l = 0x0a; + break; + + case 0x0a: /* CTRL0 */ + { + /* set CRCOK bit only if decoding is enabled */ + cdc.stat[0] = data & BIT_DECEN; + + /* update decoding mode */ + if (data & BIT_AUTORQ) + { + /* set MODE bit according to CTRL1 register & clear FORM bit */ + cdc.stat[2] = cdc.ctrl[1] & BIT_MODRQ; + } + else + { + /* set MODE & FORM bits according to CTRL1 register */ + cdc.stat[2] = cdc.ctrl[1] & (BIT_MODRQ | BIT_FORMRQ); + } + + cdc.ctrl[0] = data; + Pico_mcd->regs[0x04>>1].byte.l = 0x0b; + break; + } + + case 0x0b: /* CTRL1 */ + { + /* update decoding mode */ + if (cdc.ctrl[0] & BIT_AUTORQ) + { + /* set MODE bit according to CTRL1 register & clear FORM bit */ + cdc.stat[2] = data & BIT_MODRQ; + } + else + { + /* set MODE & FORM bits according to CTRL1 register */ + cdc.stat[2] = data & (BIT_MODRQ | BIT_FORMRQ); + } + + cdc.ctrl[1] = data; + Pico_mcd->regs[0x04>>1].byte.l = 0x0c; + break; + } + + case 0x0c: /* PTL */ + cdc.pt.byte.l = data; + Pico_mcd->regs[0x04>>1].byte.l = 0x0d; + break; + + case 0x0d: /* PTH */ + cdc.pt.byte.h = data; + Pico_mcd->regs[0x04>>1].byte.l = 0x0e; + break; + + case 0x0e: /* CTRL2 (unused) */ + Pico_mcd->regs[0x04>>1].byte.l = 0x0f; + break; + + case 0x0f: /* RESET */ + cdc_reset(); + break; + + default: /* by default, SBOUT is not used */ + break; + } +} + +unsigned char cdc_reg_r(void) +{ + switch (Pico_mcd->regs[0x04>>1].byte.l & 0x0F) + { + case 0x01: /* IFSTAT */ + Pico_mcd->regs[0x04>>1].byte.l = 0x02; + return cdc.ifstat; + + case 0x02: /* DBCL */ + Pico_mcd->regs[0x04>>1].byte.l = 0x03; + return cdc.dbc.byte.l; + + case 0x03: /* DBCH */ + Pico_mcd->regs[0x04>>1].byte.l = 0x04; + return cdc.dbc.byte.h; + + case 0x04: /* HEAD0 */ + Pico_mcd->regs[0x04>>1].byte.l = 0x05; + return cdc.head[cdc.ctrl[1] & BIT_SHDREN][0]; + + case 0x05: /* HEAD1 */ + Pico_mcd->regs[0x04>>1].byte.l = 0x06; + return cdc.head[cdc.ctrl[1] & BIT_SHDREN][1]; + + case 0x06: /* HEAD2 */ + Pico_mcd->regs[0x04>>1].byte.l = 0x07; + return cdc.head[cdc.ctrl[1] & BIT_SHDREN][2]; + + case 0x07: /* HEAD3 */ + Pico_mcd->regs[0x04>>1].byte.l = 0x08; + return cdc.head[cdc.ctrl[1] & BIT_SHDREN][3]; + + case 0x08: /* PTL */ + Pico_mcd->regs[0x04>>1].byte.l = 0x09; + return cdc.pt.byte.l; + + case 0x09: /* PTH */ + Pico_mcd->regs[0x04>>1].byte.l = 0x0a; + return cdc.pt.byte.h; + + case 0x0a: /* WAL */ + Pico_mcd->regs[0x04>>1].byte.l = 0x0b; + return cdc.wa.byte.l; + + case 0x0b: /* WAH */ + Pico_mcd->regs[0x04>>1].byte.l = 0x0c; + return cdc.wa.byte.h; + + case 0x0c: /* STAT0 */ + Pico_mcd->regs[0x04>>1].byte.l = 0x0d; + return cdc.stat[0]; + + case 0x0d: /* STAT1 (always return 0) */ + Pico_mcd->regs[0x04>>1].byte.l = 0x0e; + return 0x00; + + case 0x0e: /* STAT2 */ + Pico_mcd->regs[0x04>>1].byte.l = 0x0f; + return cdc.stat[2]; + + case 0x0f: /* STAT3 */ + { + uint8 data = cdc.stat[3]; + + /* clear !VALST (note: this is not 100% correct but BIOS do not seem to care) */ + cdc.stat[3] = BIT_VALST; + + /* clear pending decoder interrupt */ + cdc.ifstat |= BIT_DECI; + +#if 0 + /* no pending data transfer end interrupt */ + if ((cdc.ifstat | BIT_DTEI) || !(cdc.ifctrl & BIT_DTEIEN)) + { + /* clear pending level 5 interrupt */ + SekInterruptClearS68k(5); + } +#endif + + Pico_mcd->regs[0x04>>1].byte.l = 0x00; + return data; + } + + default: /* by default, COMIN is always empty */ + return 0xff; + } +} + +unsigned short cdc_host_r(void) +{ + /* check if data is available */ + if (!(cdc.ifstat & BIT_DTEN)) + { + /* read data word from CDC RAM buffer */ + uint8 *datap = cdc.ram + (cdc.dac.w & 0x3ffe); + uint16 data = (datap[0] << 8) | datap[1]; + +#ifdef LOG_CDC + error("CDC host read 0x%04x -> 0x%04x (dbc=0x%x) (%X)\n", cdc.dac.w, data, cdc.dbc.w, s68k.pc); +#endif + + /* increment data address counter */ + cdc.dac.w += 2; + + /* decrement data byte counter */ + cdc.dbc.w -= 2; + + /* end of transfer ? */ + if ((int16)cdc.dbc.w <= 0) + { + /* reset data byte counter (DBCH bits 4-7 should be set to 1) */ + cdc.dbc.w = 0xf000; + + /* clear !DTEN and !DTBSY */ + cdc.ifstat |= (BIT_DTBSY | BIT_DTEN); + + /* pending Data Transfer End interrupt */ + cdc.ifstat &= ~BIT_DTEI; + + /* Data Transfer End interrupt enabled ? */ + if (cdc.ifctrl & BIT_DTEIEN) + { + /* level 5 interrupt enabled ? */ + if (Pico_mcd->regs[0x32>>1].byte.l & PCDS_IEN5) + { + /* update IRQ level */ + elprintf(EL_INTS, "cdc DTE irq 5"); + SekInterruptS68k(5); + } + } + + /* clear DSR bit & set EDT bit (SCD register $04) */ + Pico_mcd->regs[0x04>>1].byte.h = (Pico_mcd->regs[0x04>>1].byte.h & 0x07) | 0x80; + } + + return data; + } + +#ifdef LOG_CDC + error("error reading CDC host (data transfer disabled)\n"); +#endif + return 0xffff; +} + +// vim:shiftwidth=2:ts=2:expandtab diff --git a/pico/cd/genplus_macros.h b/pico/cd/genplus_macros.h index 8ac5d35b..04c381a7 100644 --- a/pico/cd/genplus_macros.h +++ b/pico/cd/genplus_macros.h @@ -12,6 +12,22 @@ #define int16 signed short #define int32 signed int +typedef union +{ + uint16 w; + struct + { +#if 1 + uint8 l; + uint8 h; +#else + uint8 h; + uint8 l; +#endif + } byte; + +} reg16_t; + #define READ_BYTE(BASE, ADDR) (BASE)[(ADDR)^1] #define WRITE_BYTE(BASE, ADDR, VAL) (BASE)[(ADDR)^1] = (VAL) diff --git a/pico/cd/mcd.c b/pico/cd/mcd.c index 24e99e44..33553613 100644 --- a/pico/cd/mcd.c +++ b/pico/cd/mcd.c @@ -46,6 +46,7 @@ PICO_INTERNAL void PicoPowerMCD(void) memset(&Pico_mcd->pcm, 0, sizeof(Pico_mcd->pcm)); memset(&Pico_mcd->m, 0, sizeof(Pico_mcd->m)); + cdc_init(); Reset_CD(); // cold reset state (tested) @@ -59,7 +60,9 @@ void pcd_soft_reset(void) { // Reset_CD(); // breaks Fahrenheit CD swap - LC89510_Reset(); + Pico_mcd->m.s68k_pend_ints = 0; + cdc_reset(); + CDD_Reset(); #ifdef _ASM_CD_MEMORY_C //PicoMemResetCDdecode(1); // don't have to call this in 2M mode #endif @@ -150,8 +153,7 @@ static void pcd_int3_timer_event(unsigned int now) static void pcd_dma_event(unsigned int now) { - int ddx = Pico_mcd->s68k_regs[4] & 7; - Update_CDC_TRansfer(ddx); + cdc_dma_update(); } typedef void (event_cb)(unsigned int now); @@ -355,9 +357,6 @@ void pcd_state_loaded(void) if (Pico_mcd->s68k_regs[0x31]) pcd_event_schedule(SekCycleAimS68k, PCD_EVENT_TIMER3, Pico_mcd->s68k_regs[0x31] * 384); - - if (Pico_mcd->scd.Status_CDC & 0x08) - Update_CDC_TRansfer(Pico_mcd->s68k_regs[4] & 7); } diff = cycles - Pico_mcd->pcm.update_cycles; diff --git a/pico/cd/memory.c b/pico/cd/memory.c index b0f5e4a8..d3a2927e 100644 --- a/pico/cd/memory.c +++ b/pico/cd/memory.c @@ -100,7 +100,7 @@ static u32 m68k_reg_read16(u32 a) d = *(u16 *)(Pico_mcd->bios + 0x72); goto end; case 8: - d = Read_CDC_Host(0); + d = cdc_host_r(); goto end; case 0xA: elprintf(EL_UIO, "m68k FIXME: reserved read"); @@ -286,9 +286,9 @@ u32 s68k_reg_read16(u32 a) elprintf(EL_CDREG3, "s68k_regs r3: %02x @%06x", (u8)d, SekPcS68k); return s68k_poll_detect(a, d); case 6: - return CDC_Read_Reg(); + return cdc_reg_r(); case 8: - return Read_CDC_Host(1); // Gens returns 0 here on byte reads + return cdc_host_r(); case 0xC: d = SekCyclesDoneS68k() - Pico_mcd->m.stopwatch_base_c; d /= 384; @@ -379,7 +379,7 @@ void s68k_reg_write8(u32 a, u32 d) //dprintf("s68k CDC reg addr: %x", d&0xf); break; case 7: - CDC_Write_Reg(d); + cdc_reg_w(d); return; case 0xa: elprintf(EL_CDREGS, "s68k set CDC dma addr"); diff --git a/pico/cd/memory_arm.s b/pico/cd/memory_arm.s index e19c5613..f3a1372a 100644 --- a/pico/cd/memory_arm.s +++ b/pico/cd/memory_arm.s @@ -49,7 +49,7 @@ @ externs, just for reference .extern Pico -.extern Read_CDC_Host +.extern cdc_host_r .extern m68k_reg_write8 .extern s68k_reg_read16 .extern s68k_reg_write8 @@ -195,12 +195,12 @@ m_m68k_read8_r07: bx lr m_m68k_read8_r08: mov r0, #0 - bl Read_CDC_Host @ TODO: make it local + bl cdc_host_r mov r0, r0, lsr #8 bx lr m_m68k_read8_r09: mov r0, #0 - b Read_CDC_Host + b cdc_host_r m_m68k_read8_r0c: add r1, r1, #0x110000 add r1, r1, #0x002200 @@ -292,7 +292,7 @@ m_m68k_read16_r06: bx lr m_m68k_read16_r08: mov r0, #0 - b Read_CDC_Host + b cdc_host_r m_m68k_read16_r0c: add r1, r1, #0x110000 add r1, r1, #0x002200 @@ -501,7 +501,7 @@ m_s68k_read16_regs: cmp r0, #8 bne s68k_reg_read16 mov r0, #1 - b Read_CDC_Host + b cdc_host_r @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ diff --git a/pico/cd/sek.c b/pico/cd/sek.c index 8d838628..d4914901 100644 --- a/pico/cd/sek.c +++ b/pico/cd/sek.c @@ -189,3 +189,17 @@ PICO_INTERNAL int SekInterruptS68k(int irq) return 0; } +void SekInterruptClearS68k(int irq) +{ + int level_new = new_irq_level(irq); + +#ifdef EMU_C68K + PicoCpuCS68k.irq = level_new; +#endif +#ifdef EMU_M68K + CPU_INT_LEVEL = level_new << 8; +#endif +#ifdef EMU_F68K + PicoCpuFS68k.interrupts[0] = level_new; +#endif +} diff --git a/pico/pico_int.h b/pico/pico_int.h index a8bf7ee4..d4d6d7de 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -417,38 +417,44 @@ struct mcd_misc typedef struct { - unsigned char bios[0x20000]; // 000000: 128K - union { // 020000: 512K - unsigned char prg_ram[0x80000]; - unsigned char prg_ram_b[4][0x20000]; - }; - union { // 0a0000: 256K - struct { - unsigned char word_ram2M[0x40000]; - unsigned char unused0[0x20000]; - }; - struct { - unsigned char unused1[0x20000]; - unsigned char word_ram1M[2][0x20000]; - }; - }; - union { // 100000: 64K - unsigned char pcm_ram[0x10000]; - unsigned char pcm_ram_b[0x10][0x1000]; - }; - // FIXME: should be short - unsigned char s68k_regs[0x200]; // 110000: GA, not CPU regs - unsigned char bram[0x2000]; // 110200: 8K - struct mcd_misc m; // 112200: misc - struct mcd_pcm pcm; // 112240: - _scd_toc TOC; // not to be saved - CDD cdd; - CDC cdc; - _scd scd; - int pcm_mixbuf[PCM_MIXBUF_LEN * 2]; - int pcm_mixpos; - char pcm_mixbuf_dirty; - char pcm_regs_dirty; + unsigned char bios[0x20000]; // 000000: 128K + union { // 020000: 512K + unsigned char prg_ram[0x80000]; + unsigned char prg_ram_b[4][0x20000]; + }; + union { // 0a0000: 256K + struct { + unsigned char word_ram2M[0x40000]; + unsigned char unused0[0x20000]; + }; + struct { + unsigned char unused1[0x20000]; + unsigned char word_ram1M[2][0x20000]; + }; + }; + union { // 100000: 64K + unsigned char pcm_ram[0x10000]; + unsigned char pcm_ram_b[0x10][0x1000]; + }; + union { + unsigned char s68k_regs[0x200]; // 110000: GA, not CPU regs + union { + struct { + unsigned char h; + unsigned char l; + } byte; + } regs[0x200/2]; + }; + unsigned char bram[0x2000]; // 110200: 8K + struct mcd_misc m; // 112200: misc + struct mcd_pcm pcm; // 112240: + _scd_toc TOC; // not to be saved + CDD cdd; + _scd scd; + int pcm_mixbuf[PCM_MIXBUF_LEN * 2]; + int pcm_mixpos; + char pcm_mixbuf_dirty; + char pcm_regs_dirty; } mcd_state; // XXX: this will need to be reworked for cart+cd support. @@ -610,6 +616,18 @@ void PicoWrite16_io(unsigned int a, unsigned int d); // pico/memory.c PICO_INTERNAL void PicoMemSetupPico(void); +// cd/cdc.c +void cdc_init(void); +void cdc_reset(void); +int cdc_context_save(unsigned char *state); +int cdc_context_load(unsigned char *state); +int cdc_context_load_old(unsigned char *state); +void cdc_dma_update(void); +int cdc_decoder_update(unsigned char header[4]); +void cdc_reg_w(unsigned char data); +unsigned char cdc_reg_r(void); +unsigned short cdc_host_r(void); + // cd/gfx.c void gfx_init(void); void gfx_start(unsigned int base); @@ -706,6 +724,7 @@ void SekTrace(int is_s68k); PICO_INTERNAL void SekInitS68k(void); PICO_INTERNAL int SekResetS68k(void); PICO_INTERNAL int SekInterruptS68k(int irq); +void SekInterruptClearS68k(int irq); // sound/sound.c PICO_INTERNAL void cdda_start_play(); diff --git a/pico/state.c b/pico/state.c index a1ceac2c..94cefade 100644 --- a/pico/state.c +++ b/pico/state.c @@ -148,10 +148,10 @@ typedef enum { CHUNK_BRAM, CHUNK_GA_REGS, CHUNK_PCM, - CHUNK_CDC, + CHUNK_CDC, // old CHUNK_CDD, // 20 CHUNK_SCD, - CHUNK_RC, + CHUNK_RC, // old CHUNK_MISC_CD, // CHUNK_IOPORTS, // versions < 1.70 did not save that.. @@ -176,6 +176,7 @@ typedef enum { // add new stuff here CHUNK_CD_EVT = 50, CHUNK_CD_GFX, + CHUNK_CD_CDC, // CHUNK_DEFAULT_COUNT, CHUNK_CARTHW_ = CHUNK_CARTHW, // 64 (defined in PicoInt) @@ -237,12 +238,17 @@ static int write_chunk(chunk_name_e name, int len, void *data, void *file) return (bwritten == len + 4 + 1); } +#define CHUNK_LIMIT_W 18772 // sizeof(cdc) + #define CHECKED_WRITE(name,len,data) { \ if (PicoStateProgressCB && name < CHUNK_DEFAULT_COUNT && chunk_names[name]) { \ strncpy(sbuff + 9, chunk_names[name], sizeof(sbuff) - 9); \ PicoStateProgressCB(sbuff); \ } \ - if (!write_chunk(name, len, data, file)) return 1; \ + if (data == buf2 && len > CHUNK_LIMIT_W) \ + goto out; \ + if (!write_chunk(name, len, data, file)) \ + goto out; \ } #define CHECKED_WRITE_BUFF(name,buff) { \ @@ -250,7 +256,8 @@ static int write_chunk(chunk_name_e name, int len, void *data, void *file) strncpy(sbuff + 9, chunk_names[name], sizeof(sbuff) - 9); \ PicoStateProgressCB(sbuff); \ } \ - if (!write_chunk(name, sizeof(buff), &buff, file)) return 1; \ + if (!write_chunk(name, sizeof(buff), &buff, file)) \ + goto out; \ } static int state_save(void *file) @@ -258,7 +265,9 @@ static int state_save(void *file) char sbuff[32] = "Saving.. "; unsigned char buff[0x60], buff_z80[Z80_STATE_SIZE]; void *ym2612_regs = YM2612GetRegs(); - int ver = 0x0170; // not really used.. + void *buf2 = NULL; + int ver = 0x0191; // not really used.. + int retval = -1; int len; areaWrite("PicoSEXT", 1, 8, file); @@ -290,6 +299,10 @@ static int state_save(void *file) if (PicoAHW & PAHW_MCD) { + buf2 = malloc(CHUNK_LIMIT_W); + if (buf2 == NULL) + return -1; + memset(buff, 0, sizeof(buff)); SekPackCpu(buff, 1); if (Pico_mcd->s68k_regs[3] & 4) // 1M mode? @@ -305,14 +318,16 @@ static int state_save(void *file) CHECKED_WRITE_BUFF(CHUNK_GA_REGS, Pico_mcd->s68k_regs); // GA regs, not CPU regs CHECKED_WRITE_BUFF(CHUNK_PCM, Pico_mcd->pcm); CHECKED_WRITE_BUFF(CHUNK_CDD, Pico_mcd->cdd); - CHECKED_WRITE_BUFF(CHUNK_CDC, Pico_mcd->cdc); CHECKED_WRITE_BUFF(CHUNK_SCD, Pico_mcd->scd); CHECKED_WRITE_BUFF(CHUNK_MISC_CD, Pico_mcd->m); memset(buff, 0, 0x40); memcpy(buff, pcd_event_times, sizeof(pcd_event_times)); CHECKED_WRITE(CHUNK_CD_EVT, 0x40, buff); - len = gfx_context_save(buff); - CHECKED_WRITE(CHUNK_CD_GFX, len, buff); + + len = gfx_context_save(buf2); + CHECKED_WRITE(CHUNK_CD_GFX, len, buf2); + len = cdc_context_save(buf2); + CHECKED_WRITE(CHUNK_CD_CDC, len, buf2); if (Pico_mcd->s68k_regs[3] & 4) // convert back wram_2M_to_1M(Pico_mcd->word_ram2M); @@ -358,7 +373,12 @@ static int state_save(void *file) CHECKED_WRITE(chwc->chunk, chwc->size, chwc->ptr); } - return 0; + retval = 0; + +out: + if (buf2 != NULL) + free(buf2); + return retval; } static int g_read_offs = 0; @@ -366,7 +386,7 @@ static int g_read_offs = 0; #define R_ERROR_RETURN(error) \ { \ elprintf(EL_STATUS, "load_state @ %x: " error, g_read_offs); \ - return 1; \ + goto out; \ } // when is eof really set? @@ -374,7 +394,6 @@ static int g_read_offs = 0; if (areaRead(data, 1, len, file) != len) { \ if (len == 1 && areaEof(file)) goto readend; \ R_ERROR_RETURN("areaRead: premature EOF\n"); \ - return 1; \ } \ g_read_offs += len; \ } @@ -390,14 +409,24 @@ static int g_read_offs = 0; #define CHECKED_READ_BUFF(buff) CHECKED_READ2(sizeof(buff), &buff); +#define CHUNK_LIMIT_R 0x10960 // sizeof(old_cdc) + +#define CHECKED_READ_LIM(data) { \ + if (len > CHUNK_LIMIT_R) \ + R_ERROR_RETURN("chunk size over limit."); \ + CHECKED_READ(len, data); \ +} + static int state_load(void *file) { unsigned char buff_m68k[0x60], buff_s68k[0x60]; unsigned char buff_z80[Z80_STATE_SIZE]; unsigned char buff_sh2[SH2_STATE_SIZE]; - unsigned char buff[0x40]; + unsigned char *buf = NULL; unsigned char chunk; void *ym2612_regs; + int len_check; + int retval = -1; char header[8]; int ver, len; @@ -405,6 +434,10 @@ static int state_load(void *file) memset(buff_s68k, 0, sizeof(buff_s68k)); memset(buff_z80, 0, sizeof(buff_z80)); + buf = malloc(CHUNK_LIMIT_R); + if (buf == NULL) + return -1; + g_read_offs = 0; CHECKED_READ(8, header); if (strncmp(header, "PicoSMCD", 8) && strncmp(header, "PicoSEXT", 8)) @@ -416,6 +449,7 @@ static int state_load(void *file) while (!areaEof(file)) { + len_check = 0; CHECKED_READ(1, &chunk); CHECKED_READ(4, &len); if (len < 0 || len > 1024*512) R_ERROR_RETURN("bad length"); @@ -465,18 +499,28 @@ static int state_load(void *file) case CHUNK_GA_REGS: CHECKED_READ_BUFF(Pico_mcd->s68k_regs); break; case CHUNK_PCM: CHECKED_READ_BUFF(Pico_mcd->pcm); break; case CHUNK_CDD: CHECKED_READ_BUFF(Pico_mcd->cdd); break; - case CHUNK_CDC: CHECKED_READ_BUFF(Pico_mcd->cdc); break; case CHUNK_SCD: CHECKED_READ_BUFF(Pico_mcd->scd); break; case CHUNK_MISC_CD: CHECKED_READ_BUFF(Pico_mcd->m); break; case CHUNK_CD_EVT: - CHECKED_READ_BUFF(buff); - memcpy(pcd_event_times, buff, sizeof(pcd_event_times)); + CHECKED_READ2(0x40, buf); + memcpy(pcd_event_times, buf, sizeof(pcd_event_times)); break; case CHUNK_CD_GFX: - CHECKED_READ2(0x18, buff); - gfx_context_load(buff); + CHECKED_READ_LIM(buf); + len_check = gfx_context_load(buf); + break; + + case CHUNK_CD_CDC: + CHECKED_READ_LIM(buf); + len_check = cdc_context_load(buf); + break; + + // old, to be removed: + case CHUNK_CDC: + CHECKED_READ_LIM(buf); + cdc_context_load_old(buf); break; // 32x stuff @@ -504,8 +548,8 @@ static int state_load(void *file) case CHUNK_32XPAL: CHECKED_READ_BUFF(Pico32xMem->pal); break; case CHUNK_32X_EVT: - CHECKED_READ_BUFF(buff); - memcpy(p32x_event_times, buff, sizeof(p32x_event_times)); + CHECKED_READ2(0x40, buf); + memcpy(p32x_event_times, buf, sizeof(p32x_event_times)); break; #endif default: @@ -523,7 +567,10 @@ static int state_load(void *file) areaSeek(file, len, SEEK_CUR); break; } -breakswitch:; +breakswitch: + if (len_check != 0 && len_check != len) + elprintf(EL_STATUS, "load_state: chunk %d has bad len %d/%d", + len, len_check); } readend: @@ -554,7 +601,11 @@ readend: cdda_start_play(); } - return 0; + retval = 0; + +out: + free(buf); + return retval; } static int state_load_gfx(void *file) @@ -608,6 +659,7 @@ static int state_load_gfx(void *file) } } +out: readend: return 0; } diff --git a/platform/base_readme.txt b/platform/base_readme.txt index 31d3b71b..175bcc1b 100644 --- a/platform/base_readme.txt +++ b/platform/base_readme.txt @@ -519,7 +519,7 @@ Texas Instruments SN76489 / SN76496 programmable tone/noise generator Homepage: http://www.mame.net/ Eke -CD graphics processor implementation (from Genesis Plus GX) +CD graphics processor and CD controller implementation (from Genesis Plus GX) Stephane Dallongeville Gens, MD/Mega CD/32X emulator. Some Sega CD code is based on this emu. @@ -565,6 +565,11 @@ Additional thanks Changelog --------- +1.91 (2013-10-) + + Switched to CD controller code from Eke's Genesis Plus GX. + * Fixed overflow issue where cd emulation would break after + ~10 minutes of gameplay + 1.90 (2013-09-24) + 32X+CD emulation has been implemented. + CD graphics processor code has been replaced with much cleaner Eke's diff --git a/platform/common/common.mak b/platform/common/common.mak index 3a836e15..c651bcad 100644 --- a/platform/common/common.mak +++ b/platform/common/common.mak @@ -93,7 +93,7 @@ DEFINES += NO_SMS endif # CD SRCS_COMMON += $(R)pico/cd/mcd.c $(R)pico/cd/memory.c $(R)pico/cd/sek.c \ - $(R)pico/cd/LC89510.c $(R)pico/cd/cd_sys.c $(R)pico/cd/cd_file.c \ + $(R)pico/cd/cdc.c $(R)pico/cd/cd_sys.c $(R)pico/cd/cd_file.c \ $(R)pico/cd/cue.c $(R)pico/cd/gfx.c $(R)pico/cd/gfx_dma.c \ $(R)pico/cd/misc.c $(R)pico/cd/pcm.c $(R)pico/cd/buffering.c # 32X From 274fcc35aa20e9777a8e09630a94088757384329 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 6 Oct 2013 21:08:07 +0300 Subject: [PATCH 0008/1110] cd: switch to CD drive emu code from genplus same license, much cleaner code --- pico/cart.c | 4 +- pico/cd/LC89510.h | 43 -- pico/cd/buffering.c | 152 ---- pico/cd/cd_file.c | 402 ---------- pico/cd/cd_file.h | 27 - pico/cd/cd_image.c | 266 +++++++ pico/cd/cd_sys.c | 933 ----------------------- pico/cd/cd_sys.h | 109 --- pico/cd/cdc.c | 13 - pico/cd/cdd.c | 1328 +++++++++++++++++++++++++++++++++ pico/cd/cdd.h | 98 +++ pico/cd/cue.c | 2 + pico/cd/mcd.c | 24 +- pico/cd/memory.c | 19 +- pico/media.c | 6 +- pico/pico.h | 21 +- pico/pico_int.h | 27 +- pico/sound/sound.c | 60 +- pico/state.c | 24 +- platform/base_readme.txt | 4 +- platform/common/common.mak | 4 +- platform/common/config_file.c | 5 - platform/common/emu.c | 18 +- platform/common/menu_pico.c | 28 +- platform/libretro.c | 5 +- 25 files changed, 1798 insertions(+), 1824 deletions(-) delete mode 100644 pico/cd/LC89510.h delete mode 100644 pico/cd/buffering.c delete mode 100644 pico/cd/cd_file.c delete mode 100644 pico/cd/cd_file.h create mode 100644 pico/cd/cd_image.c delete mode 100644 pico/cd/cd_sys.c delete mode 100644 pico/cd/cd_sys.h create mode 100644 pico/cd/cdd.c create mode 100644 pico/cd/cdd.h diff --git a/pico/cart.c b/pico/cart.c index 6a835b63..a5c563d6 100644 --- a/pico/cart.c +++ b/pico/cart.c @@ -157,7 +157,7 @@ zip_failed: if (f == NULL) goto cso_failed; -#ifndef __EPOC32__ +#ifdef __GP2X__ /* we use our own buffering */ setvbuf(f, NULL, _IONBF, 0); #endif @@ -227,7 +227,7 @@ cso_failed: strncpy(file->ext, ext, sizeof(file->ext) - 1); fseek(f, 0, SEEK_SET); -#ifndef __EPOC32__ // makes things worse on Symbian +#ifdef __GP2X__ if (file->size > 0x400000) /* we use our own buffering */ setvbuf(f, NULL, _IONBF, 0); diff --git a/pico/cd/LC89510.h b/pico/cd/LC89510.h deleted file mode 100644 index d641ebb3..00000000 --- a/pico/cd/LC89510.h +++ /dev/null @@ -1,43 +0,0 @@ -/*********************************************************** - * * - * This source was taken from the Gens project * - * Written by Stéphane Dallongeville * - * Copyright (c) 2002 by Stéphane Dallongeville * - * Modified/adapted for PicoDrive by notaz, 2007 * - * * - ***********************************************************/ - -#ifndef _LC89510_H -#define _LC89510_H - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct -{ -// unsigned short Fader; // 34 -// unsigned short Control; // 36 -// unsigned short Cur_Comm;// unused - - // "Receive status" - unsigned short Status; - unsigned short Minute; - unsigned short Seconde; - unsigned short Frame; - unsigned char Ext; - unsigned char pad[3]; -} CDD; - - -PICO_INTERNAL void CDD_Export_Status(void); -PICO_INTERNAL void CDD_Import_Command(void); - -void CDD_Reset(void); - -#ifdef __cplusplus -}; -#endif - -#endif - diff --git a/pico/cd/buffering.c b/pico/cd/buffering.c deleted file mode 100644 index 84203369..00000000 --- a/pico/cd/buffering.c +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Buffering handling - * (C) notaz, 2007,2008 - * - * This work is licensed under the terms of MAME license. - * See COPYING file in the top-level directory. - */ - -#include "../pico_int.h" -#include "../cd/cue.h" - -int PicoCDBuffers = 0; -static unsigned char *cd_buffer = NULL; -static int prev_lba = 0x80000000; - -static int hits, reads; - -#undef dprintf -#define dprintf(...) - -void PicoCDBufferInit(void) -{ - void *tmp = NULL; - - prev_lba = 0x80000000; - hits = reads = 0; - - if (PicoCDBuffers <= 1) { - PicoCDBuffers = 0; - return; /* buffering off */ - } - - /* try alloc'ing until we succeed */ - while (PicoCDBuffers > 0) - { - tmp = realloc(cd_buffer, PicoCDBuffers * 2048 + 304); - if (tmp != NULL) break; - PicoCDBuffers >>= 1; - } - - if (PicoCDBuffers <= 0) return; /* buffering became off */ - - cd_buffer = tmp; -} - - -void PicoCDBufferFree(void) -{ - if (cd_buffer) { - free(cd_buffer); - cd_buffer = NULL; - } - if (reads) - elprintf(EL_STATUS, "CD buffer hits: %i/%i (%i%%)\n", hits, reads, hits * 100 / reads); -} - - -void PicoCDBufferFlush(void) -{ - prev_lba = 0x80000000; -} - - -/* this is was a try to fight slow SD access of GP2X */ -PICO_INTERNAL void PicoCDBufferRead(void *dest, int lba) -{ - int is_bin, offs, read_len, moved = 0; - reads++; - - is_bin = Pico_mcd->TOC.Tracks[0].ftype == CT_BIN; - - if (PicoCDBuffers <= 0) - { - /* no buffering */ - int where_seek = is_bin ? (lba * 2352 + 16) : (lba << 11); - pm_seek(Pico_mcd->TOC.Tracks[0].F, where_seek, SEEK_SET); - pm_read(dest, 2048, Pico_mcd->TOC.Tracks[0].F); - return; - } - - /* hit? */ - offs = lba - prev_lba; - if (offs >= 0 && offs < PicoCDBuffers) - { - hits++; - if (offs == 0) dprintf("CD buffer seek to old %i -> %i\n", prev_lba, lba); - memcpy32(dest, (int *)(cd_buffer + offs*2048), 2048/4); - return; - } - - if (prev_lba + PicoCDBuffers != lba) - { - int where_seek = is_bin ? (lba * 2352 + 16) : (lba << 11); - dprintf("CD buffer seek %i -> %i\n", prev_lba, lba); - pm_seek(Pico_mcd->TOC.Tracks[0].F, where_seek, SEEK_SET); - } - - dprintf("CD buffer miss %i -> %i\n", prev_lba, lba); - - if (lba < prev_lba && prev_lba - lba < PicoCDBuffers) - { - read_len = prev_lba - lba; - dprintf("CD buffer move=%i, read_len=%i", PicoCDBuffers - read_len, read_len); - memmove(cd_buffer + read_len*2048, cd_buffer, (PicoCDBuffers - read_len)*2048); - moved = 1; - } - else - { - read_len = PicoCDBuffers; - } - - if (PicoMessage != NULL && read_len >= 512) - { - PicoMessage("Buffering data..."); - } - - if (is_bin) - { - int i = 0; -#ifdef _PSP_FW_VERSION - int bufs = (read_len*2048) / (2048+304); - pm_read(cd_buffer, bufs*(2048+304), Pico_mcd->TOC.Tracks[0].F); - for (i = 1; i < bufs; i++) - // should really use memmove here, but my memcpy32 implementation is also suitable here - memcpy32((int *)(cd_buffer + i*2048), (int *)(cd_buffer + i*(2048+304)), 2048/4); -#endif - for (; i < read_len - 1; i++) - { - pm_read(cd_buffer + i*2048, 2048 + 304, Pico_mcd->TOC.Tracks[0].F); - // pm_seek(Pico_mcd->TOC.Tracks[0].F, 304, SEEK_CUR); // seeking is slower, in PSP case even more - } - // further data might be moved, do not overwrite - pm_read(cd_buffer + i*2048, 2048, Pico_mcd->TOC.Tracks[0].F); - pm_seek(Pico_mcd->TOC.Tracks[0].F, 304, SEEK_CUR); - } - else - { - pm_read(cd_buffer, read_len*2048, Pico_mcd->TOC.Tracks[0].F); - } - memcpy32(dest, (int *) cd_buffer, 2048/4); - prev_lba = lba; - - if (moved) - { - /* file pointer must point to the same data in file, as would-be data after our buffer */ - int where_seek; - lba += PicoCDBuffers; - where_seek = is_bin ? (lba * 2352 + 16) : (lba << 11); - pm_seek(Pico_mcd->TOC.Tracks[0].F, where_seek, SEEK_SET); - } -} - diff --git a/pico/cd/cd_file.c b/pico/cd/cd_file.c deleted file mode 100644 index 43bbb5d8..00000000 --- a/pico/cd/cd_file.c +++ /dev/null @@ -1,402 +0,0 @@ -/*********************************************************** - * * - * This source was taken from the Gens project * - * Written by Stéphane Dallongeville * - * Copyright (c) 2002 by Stéphane Dallongeville * - * Modified/adapted for PicoDrive by notaz, 2007 * - * * - ***********************************************************/ - -#include "../pico_int.h" -#include "cd_file.h" -#include "cue.h" - -//#define cdprintf(f,...) printf(f "\n",##__VA_ARGS__) // tmp - -static void to_upper(char *d, const char *s) -{ - for (; *s != 0; d++, s++) { - if ('a' <= *s && *s <= 'z') - *d = *s - 'a' + 'A'; - else - *d = *s; - } -} - -static int audio_track_mp3(const char *fname, int index) -{ - _scd_track *Tracks = Pico_mcd->TOC.Tracks; - FILE *tmp_file; - int fs, ret; - - tmp_file = fopen(fname, "rb"); - if (tmp_file == NULL) - return -1; - - ret = fseek(tmp_file, 0, SEEK_END); - fs = ftell(tmp_file); // used to calculate length - fseek(tmp_file, 0, SEEK_SET); - -#ifdef _PSP_FW_VERSION - // some systems (like PSP) can't have many open files at a time, - // so we work with their names instead. - fclose(tmp_file); - tmp_file = (void *) strdup(fname); -#endif - Tracks[index].KBtps = (short) mp3_get_bitrate(tmp_file, fs); - Tracks[index].KBtps >>= 3; - if (ret != 0 || Tracks[index].KBtps <= 0) - { - elprintf(EL_STATUS, "track %2i: mp3 bitrate %i", index+1, Tracks[index].KBtps); -#ifdef _PSP_FW_VERSION - free(tmp_file); -#else - fclose(tmp_file); -#endif - return -1; - } - - Tracks[index].F = tmp_file; - - // MP3 File - Tracks[index].ftype = CT_MP3; - fs *= 75; - fs /= Tracks[index].KBtps * 1000; - Tracks[index].Length = fs; - Tracks[index].Offset = 0; - - return 0; -} - -PICO_INTERNAL int Load_CD_Image(const char *cd_img_name, cd_img_type type) -{ - int i, j, num_track, Cur_LBA, index, ret; - int iso_name_len, missed, cd_img_sectors; - _scd_track *Tracks = Pico_mcd->TOC.Tracks; - char tmp_name[256], tmp_ext[10], tmp_ext_u[10]; - cue_data_t *cue_data = NULL; - pm_file *pmf; - static const char *exts[] = { - "%02d.mp3", " %02d.mp3", "-%02d.mp3", "_%02d.mp3", " - %02d.mp3", - "%d.mp3", " %d.mp3", "-%d.mp3", "_%d.mp3", " - %d.mp3", - }; - - if (PicoCDLoadProgressCB != NULL) - PicoCDLoadProgressCB(cd_img_name, 1); - - Unload_ISO(); - - /* is this a .cue? */ - cue_data = cue_parse(cd_img_name); - if (cue_data != NULL) { - cd_img_name = cue_data->tracks[1].fname; - Tracks[0].ftype = cue_data->tracks[1].type; - } - else - Tracks[0].ftype = type == CIT_BIN ? CT_BIN : CT_ISO; - - Tracks[0].F = pmf = pm_open(cd_img_name); - if (Tracks[0].F == NULL) - { - Tracks[0].ftype = 0; - Tracks[0].Length = 0; - if (cue_data != NULL) - cue_destroy(cue_data); - return -1; - } - - if (Tracks[0].ftype == CT_ISO) - cd_img_sectors = pmf->size >>= 11; // size in sectors - else cd_img_sectors = pmf->size /= 2352; - Tracks[0].Offset = 0; - - Tracks[0].MSF.M = 0; // minutes - Tracks[0].MSF.S = 2; // seconds - Tracks[0].MSF.F = 0; // frames - - elprintf(EL_STATUS, "Track 1: %02d:%02d:%02d %9i DATA %s", - Tracks[0].MSF.M, Tracks[0].MSF.S, Tracks[0].MSF.F, - Tracks[0].Length, cd_img_name); - - Cur_LBA = Tracks[0].Length = cd_img_sectors; - - if (cue_data != NULL) - { - if (cue_data->tracks[2].fname == NULL) { // NULL means track2 is in same file as track1 - Cur_LBA = Tracks[0].Length = cue_data->tracks[2].sector_offset; - } - i = 100 / cue_data->track_count+1; - for (num_track = 2; num_track <= cue_data->track_count; num_track++) - { - if (PicoCDLoadProgressCB != NULL) - PicoCDLoadProgressCB(cd_img_name, i * num_track); - index = num_track - 1; - Cur_LBA += cue_data->tracks[num_track].pregap; - if (cue_data->tracks[num_track].type == CT_MP3) { - ret = audio_track_mp3(cue_data->tracks[num_track].fname, index); - if (ret != 0) break; - } - else - { - Tracks[index].ftype = cue_data->tracks[num_track].type; - if (cue_data->tracks[num_track].fname != NULL) - { - pm_file *pmfn = pm_open(cue_data->tracks[num_track].fname); - if (pmfn != NULL) - { - // addume raw, ignore header for wav.. - Tracks[index].F = pmfn; - Tracks[index].Length = pmfn->size / 2352; - Tracks[index].Offset = cue_data->tracks[num_track].sector_offset; - } - else - { - elprintf(EL_STATUS, "track %2i (%s): can't determine length", - num_track, cue_data->tracks[num_track].fname); - Tracks[index].Length = 2*75; - Tracks[index].Offset = 0; - } - } - else - { - if (num_track < cue_data->track_count) - Tracks[index].Length = cue_data->tracks[num_track+1].sector_offset - - cue_data->tracks[num_track].sector_offset; - else - Tracks[index].Length = cd_img_sectors - cue_data->tracks[num_track].sector_offset; - Tracks[index].Offset = cue_data->tracks[num_track].sector_offset; - } - } - - if (cue_data->tracks[num_track].sector_xlength != 0) - // overriden by custom cue command - Tracks[index].Length = cue_data->tracks[num_track].sector_xlength; - - LBA_to_MSF(Cur_LBA, &Tracks[index].MSF); - Cur_LBA += Tracks[index].Length; - - elprintf(EL_STATUS, "Track %2i: %02d:%02d:%02d %9i AUDIO %s", num_track, Tracks[index].MSF.M, - Tracks[index].MSF.S, Tracks[index].MSF.F, Tracks[index].Length, - cue_data->tracks[num_track].fname); - } - cue_destroy(cue_data); - goto finish; - } - - /* mp3 track autosearch, Gens-like */ - iso_name_len = strlen(cd_img_name); - if (iso_name_len >= sizeof(tmp_name)) - iso_name_len = sizeof(tmp_name) - 1; - - for (num_track = 2, i = 0, missed = 0; i < 100 && missed < 4; i++) - { - if (PicoCDLoadProgressCB != NULL && i > 1) - PicoCDLoadProgressCB(cd_img_name, i + (100-i)*missed/4); - - for (j = 0; j < sizeof(exts)/sizeof(char *); j++) - { - int ext_len; - char *p; - - index = num_track - 1; - - sprintf(tmp_ext, exts[j], i); - ext_len = strlen(tmp_ext); - to_upper(tmp_ext_u, tmp_ext); - - memcpy(tmp_name, cd_img_name, iso_name_len + 1); - p = tmp_name + iso_name_len - 4; - - strcpy(p, tmp_ext); - ret = audio_track_mp3(tmp_name, index); - if (ret != 0) { - strcpy(p, tmp_ext_u); - ret = audio_track_mp3(tmp_name, index); - } - - if (ret != 0 && i > 1 && iso_name_len > ext_len) { - p = tmp_name + iso_name_len - ext_len; - strcpy(p, tmp_ext); - ret = audio_track_mp3(tmp_name, index); - if (ret != 0) { - strcpy(p, tmp_ext_u); - ret = audio_track_mp3(tmp_name, index); - } - } - - if (ret == 0) - { - LBA_to_MSF(Cur_LBA, &Tracks[index].MSF); - Cur_LBA += Tracks[index].Length; - - elprintf(EL_STATUS, "Track %2i: %02d:%02d:%02d %9i AUDIO - %s", num_track, Tracks[index].MSF.M, - Tracks[index].MSF.S, Tracks[index].MSF.F, Tracks[index].Length, tmp_name); - - num_track++; - missed = 0; - break; - } - } - if (ret != 0 && i > 1) missed++; - } - -finish: - Pico_mcd->TOC.Last_Track = num_track - 1; - - index = num_track - 1; - - LBA_to_MSF(Cur_LBA, &Tracks[index].MSF); - - elprintf(EL_STATUS, "End CD - %02d:%02d:%02d\n", Tracks[index].MSF.M, - Tracks[index].MSF.S, Tracks[index].MSF.F); - - if (PicoCDLoadProgressCB != NULL) - PicoCDLoadProgressCB(cd_img_name, 100); - - return 0; -} - - -PICO_INTERNAL void Unload_ISO(void) -{ - int i; - - if (Pico_mcd == NULL) return; - - if (Pico_mcd->TOC.Tracks[0].F) pm_close(Pico_mcd->TOC.Tracks[0].F); - - for(i = 1; i < 100; i++) - { - if (Pico_mcd->TOC.Tracks[i].F != NULL) - { - if (Pico_mcd->TOC.Tracks[i].ftype == CT_MP3) -#ifdef _PSP_FW_VERSION - free(Pico_mcd->TOC.Tracks[i].F); -#else - fclose(Pico_mcd->TOC.Tracks[i].F); -#endif - else - pm_close(Pico_mcd->TOC.Tracks[i].F); - } - } - memset(Pico_mcd->TOC.Tracks, 0, sizeof(Pico_mcd->TOC.Tracks)); -} - -#if 1*0 - -PICO_INTERNAL int FILE_Read_One_LBA_CDC(void) -{ - if (Pico_mcd->s68k_regs[0x36] & 1) // DATA - { - if (Pico_mcd->TOC.Tracks[0].F == NULL) return -1; - - // moved below.. - //fseek(Pico_mcd->TOC.Tracks[0].F, where_read, SEEK_SET); - //fread(cp_buf, 1, 2048, Pico_mcd->TOC.Tracks[0].F); - - cdprintf("Read file CDC 1 data sector :\n"); - } - else // AUDIO - { - cdprintf("Read file CDC 1 audio sector :\n"); - } - - // Update CDC stuff - - CDC_Update_Header(); - - if (Pico_mcd->s68k_regs[0x36] & 1) // DATA track - { - if (Pico_mcd->cdc.CTRL.B.B0 & 0x80) // DECEN = decoding enable - { - if (Pico_mcd->cdc.CTRL.B.B0 & 0x04) // WRRQ : this bit enable write to buffer - { - int where_read = 0; - - // CAUTION : lookahead bit not implemented - - if (Pico_mcd->scd.Cur_LBA < 0) - where_read = 0; - else if (Pico_mcd->scd.Cur_LBA >= Pico_mcd->TOC.Tracks[0].Length) - where_read = Pico_mcd->TOC.Tracks[0].Length - 1; - else where_read = Pico_mcd->scd.Cur_LBA; - - Pico_mcd->scd.Cur_LBA++; - - Pico_mcd->cdc.WA.N = (Pico_mcd->cdc.WA.N + 2352) & 0x7FFF; // add one sector to WA - Pico_mcd->cdc.PT.N = (Pico_mcd->cdc.PT.N + 2352) & 0x7FFF; - - *(unsigned int *)(Pico_mcd->cdc.Buffer + Pico_mcd->cdc.PT.N) = Pico_mcd->cdc.HEAD.N; - //memcpy(&Pico_mcd->cdc.Buffer[Pico_mcd->cdc.PT.N + 4], cp_buf, 2048); - - //pm_seek(Pico_mcd->TOC.Tracks[0].F, where_read, SEEK_SET); - //pm_read(Pico_mcd->cdc.Buffer + Pico_mcd->cdc.PT.N + 4, 2048, Pico_mcd->TOC.Tracks[0].F); - PicoCDBufferRead(Pico_mcd->cdc.Buffer + Pico_mcd->cdc.PT.N + 4, where_read); - - cdprintf("Read -> WA = %d Buffer[%d] =", Pico_mcd->cdc.WA.N, Pico_mcd->cdc.PT.N & 0x3FFF); - cdprintf("Header 1 = %.2X %.2X %.2X %.2X", Pico_mcd->cdc.HEAD.B.B0, - Pico_mcd->cdc.HEAD.B.B1, Pico_mcd->cdc.HEAD.B.B2, Pico_mcd->cdc.HEAD.B.B3); - cdprintf("Header 2 = %.2X %.2X %.2X %.2X --- %.2X %.2X\n\n", - Pico_mcd->cdc.Buffer[(Pico_mcd->cdc.PT.N + 0) & 0x3FFF], - Pico_mcd->cdc.Buffer[(Pico_mcd->cdc.PT.N + 1) & 0x3FFF], - Pico_mcd->cdc.Buffer[(Pico_mcd->cdc.PT.N + 2) & 0x3FFF], - Pico_mcd->cdc.Buffer[(Pico_mcd->cdc.PT.N + 3) & 0x3FFF], - Pico_mcd->cdc.Buffer[(Pico_mcd->cdc.PT.N + 4) & 0x3FFF], - Pico_mcd->cdc.Buffer[(Pico_mcd->cdc.PT.N + 5) & 0x3FFF]); - } - - } - } - else // music track - { - Pico_mcd->scd.Cur_LBA++; - - Pico_mcd->cdc.WA.N = (Pico_mcd->cdc.WA.N + 2352) & 0x7FFF; // add one sector to WA - Pico_mcd->cdc.PT.N = (Pico_mcd->cdc.PT.N + 2352) & 0x7FFF; - - if (Pico_mcd->cdc.CTRL.B.B0 & 0x80) // DECEN = decoding enable - { - if (Pico_mcd->cdc.CTRL.B.B0 & 0x04) // WRRQ : this bit enable write to buffer - { - // CAUTION : lookahead bit not implemented - - // this is pretty rough, but oh well - not much depends on this anyway - memcpy(&Pico_mcd->cdc.Buffer[Pico_mcd->cdc.PT.N], cdda_out_buffer, 2352); - } - } - } - - if (Pico_mcd->cdc.CTRL.B.B0 & 0x80) // DECEN = decoding enable - { - Pico_mcd->cdc.STAT.B.B0 = 0x80; - - if (Pico_mcd->cdc.CTRL.B.B0 & 0x10) // determine form bit form sub header ? - { - Pico_mcd->cdc.STAT.B.B2 = Pico_mcd->cdc.CTRL.B.B1 & 0x08; - } - else - { - Pico_mcd->cdc.STAT.B.B2 = Pico_mcd->cdc.CTRL.B.B1 & 0x0C; - } - - if (Pico_mcd->cdc.CTRL.B.B0 & 0x02) Pico_mcd->cdc.STAT.B.B3 = 0x20; // ECC done - else Pico_mcd->cdc.STAT.B.B3 = 0x00; // ECC not done - - if (Pico_mcd->cdc.IFCTRL & 0x20) - { - if (Pico_mcd->s68k_regs[0x33] & (1<<5)) - { - elprintf(EL_INTS, "cdc dec irq 5"); - SekInterruptS68k(5); - } - - Pico_mcd->cdc.IFSTAT &= ~0x20; // DEC interrupt happen - Pico_mcd->cdc.Decode_Reg_Read = 0; // Reset read after DEC int - } - } - - - return 0; -} - -#endif diff --git a/pico/cd/cd_file.h b/pico/cd/cd_file.h deleted file mode 100644 index f9bb8eac..00000000 --- a/pico/cd/cd_file.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef _CD_FILE_H -#define _CD_FILE_H - -#ifdef __cplusplus -extern "C" { -#endif - -typedef enum -{ - CIT_NOT_CD = 0, - CIT_ISO, - CIT_BIN, - CIT_CUE -} -cd_img_type; - - -PICO_INTERNAL int Load_CD_Image(const char *iso_name, cd_img_type type); -PICO_INTERNAL void Unload_ISO(void); -PICO_INTERNAL int FILE_Read_One_LBA_CDC(void); - - -#ifdef __cplusplus -}; -#endif - -#endif diff --git a/pico/cd/cd_image.c b/pico/cd/cd_image.c new file mode 100644 index 00000000..97c8f3f0 --- /dev/null +++ b/pico/cd/cd_image.c @@ -0,0 +1,266 @@ +/* + * CD image handler + * (C) notaz, 2007,2013 + * + * This work is licensed under the terms of MAME license. + * See COPYING file in the top-level directory. + */ + +#include "../pico_int.h" +#include "genplus_macros.h" +#include "cdd.h" +#include "cue.h" + +static int handle_mp3(const char *fname, int index) +{ + track_t *track = &cdd.toc.tracks[index]; + FILE *tmp_file; + int kBps; + int fs, ret; + + tmp_file = fopen(fname, "rb"); + if (tmp_file == NULL) + return -1; + + ret = fseek(tmp_file, 0, SEEK_END); + fs = ftell(tmp_file); + fseek(tmp_file, 0, SEEK_SET); + +#ifdef _PSP_FW_VERSION + // some systems (like PSP) can't have many open files at a time, + // so we work with their names instead. + fclose(tmp_file); + tmp_file = (void *) strdup(fname); +#endif + + kBps = mp3_get_bitrate(tmp_file, fs) / 8; + if (ret != 0 || kBps <= 0) + { + elprintf(EL_STATUS, "track %2i: mp3 bitrate %i", index+1, kBps); +#ifdef _PSP_FW_VERSION + free(tmp_file); +#else + fclose(tmp_file); +#endif + return -1; + } + + track->fd = tmp_file; + track->offset = 0; + + fs *= 75; + fs /= kBps * 1000; + return fs; +} + +static void to_upper(char *d, const char *s) +{ + for (; *s != 0; d++, s++) { + if ('a' <= *s && *s <= 'z') + *d = *s - 'a' + 'A'; + else + *d = *s; + } +} + +// cdd.c uses lba - 150 +static void sprintf_lba(char *buf, size_t size, int lba) +{ + lba += 150; + snprintf(buf, size, "%02d:%02d:%02d", lba / 60 / 75, + (lba / 75) % 60, lba % 75); +} + +int load_cd_image(const char *cd_img_name, int *type) +{ + static const char *exts[] = { + "%02d.mp3", " %02d.mp3", "-%02d.mp3", "_%02d.mp3", " - %02d.mp3", + "%d.mp3", " %d.mp3", "-%d.mp3", "_%d.mp3", " - %d.mp3", + }; + int i, j, n, lba, index, length, ret; + int iso_name_len, missed, cd_img_sectors; + char tmp_name[256], tmp_ext[10], tmp_ext_u[10]; + track_t *tracks = cdd.toc.tracks; + cue_data_t *cue_data = NULL; + pm_file *pmf; + + if (PicoCDLoadProgressCB != NULL) + PicoCDLoadProgressCB(cd_img_name, 1); + + Pico_mcd->cdda_type = CT_UNKNOWN; + + /* is this a .cue? */ + cue_data = cue_parse(cd_img_name); + if (cue_data != NULL) { + cd_img_name = cue_data->tracks[1].fname; + *type = cue_data->tracks[1].type; + } + + pmf = pm_open(cd_img_name); + if (pmf == NULL) + { + if (cue_data != NULL) + cue_destroy(cue_data); + return -1; + } + tracks[0].fd = pmf; + + if (*type == CT_ISO) + cd_img_sectors = pmf->size >>= 11; // size in sectors + else cd_img_sectors = pmf->size /= 2352; + + // cdd.c operates with lba - 150 + tracks[0].start = 0; + tracks[0].end = cd_img_sectors; + tracks[0].offset = 0; + + sprintf_lba(tmp_ext, sizeof(tmp_ext), 0); + elprintf(EL_STATUS, "Track 1: %s %9i DATA %s", + tmp_ext, tracks[0].end, cd_img_name); + + lba = cd_img_sectors; + + if (cue_data != NULL) + { + if (cue_data->tracks[2].fname == NULL) { + // NULL fname means track2 is in same file as track1 + lba = tracks[0].end = cue_data->tracks[2].sector_offset; + } + i = 100 / cue_data->track_count + 1; // progress display + + for (n = 2; n <= cue_data->track_count; n++) + { + if (PicoCDLoadProgressCB != NULL) + PicoCDLoadProgressCB(cd_img_name, i * n); + + index = n - 1; + lba += cue_data->tracks[n].pregap; + if (cue_data->tracks[n].type == CT_MP3) { + ret = handle_mp3(cue_data->tracks[n].fname, index); + if (ret < 0) + break; + length = ret; + } + else if (cue_data->tracks[n].fname != NULL) + { + pm_file *f = pm_open(cue_data->tracks[n].fname); + if (f != NULL) + { + // assume raw, ignore header for wav.. + tracks[index].fd = f; + tracks[index].offset = cue_data->tracks[n].sector_offset; + length = f->size / 2352; + } + else + { + elprintf(EL_STATUS, "track %2i (%s): can't determine length", + n, cue_data->tracks[n].fname); + tracks[index].offset = 0; + length = 2*75; + } + } + else + { + if (n < cue_data->track_count) + length = cue_data->tracks[n+1].sector_offset - + cue_data->tracks[n].sector_offset; + else + length = cd_img_sectors - cue_data->tracks[n].sector_offset; + tracks[index].offset = cue_data->tracks[n].sector_offset; + } + + if (cue_data->tracks[n].sector_xlength != 0) + // overriden by custom cue command + length = cue_data->tracks[n].sector_xlength; + + Pico_mcd->cdda_type = cue_data->tracks[n].type; + + tracks[index].start = lba; + lba += length; + tracks[index].end = lba; + + sprintf_lba(tmp_ext, sizeof(tmp_ext), tracks[index].start); + elprintf(EL_STATUS, "Track %2i: %s %9i AUDIO %s", + n, tmp_ext, length, cue_data->tracks[n].fname); + } + cue_destroy(cue_data); + goto finish; + } + + /* mp3 track autosearch, Gens-like */ + iso_name_len = strlen(cd_img_name); + if (iso_name_len >= sizeof(tmp_name)) + iso_name_len = sizeof(tmp_name) - 1; + + for (n = 2, i = 0, missed = 0; i < 100 && missed < 4; i++) + { + if (PicoCDLoadProgressCB != NULL && i > 1) + PicoCDLoadProgressCB(cd_img_name, i + (100-i)*missed/4); + + for (j = 0; j < sizeof(exts)/sizeof(char *); j++) + { + int ext_len; + char *p; + + index = n - 1; + + snprintf(tmp_ext, sizeof(tmp_ext), exts[j], i); + ext_len = strlen(tmp_ext); + to_upper(tmp_ext_u, tmp_ext); + + memcpy(tmp_name, cd_img_name, iso_name_len + 1); + p = tmp_name + iso_name_len - 4; + + strcpy(p, tmp_ext); + ret = handle_mp3(tmp_name, index); + if (ret <= 0) { + strcpy(p, tmp_ext_u); + ret = handle_mp3(tmp_name, index); + } + + if (ret <= 0 && i > 1 && iso_name_len > ext_len) { + p = tmp_name + iso_name_len - ext_len; + strcpy(p, tmp_ext); + ret = handle_mp3(tmp_name, index); + if (ret <= 0) { + strcpy(p, tmp_ext_u); + ret = handle_mp3(tmp_name, index); + } + } + + if (ret > 0) + { + length = ret; + tracks[index].start = lba; + lba += length; + tracks[index].end = lba; + + Pico_mcd->cdda_type = CT_MP3; + + sprintf_lba(tmp_ext, sizeof(tmp_ext), tracks[index].start); + elprintf(EL_STATUS, "Track %2i: %s %9i AUDIO - %s", + n, tmp_ext, length, tmp_name); + + n++; + missed = 0; + break; + } + } + if (ret <= 0 && i > 1) + missed++; + } + +finish: + cdd.toc.last = n - 1; + cdd.toc.end = lba; + + sprintf_lba(tmp_ext, sizeof(tmp_ext), cdd.toc.end); + elprintf(EL_STATUS, "End CD - %s\n", tmp_ext); + + if (PicoCDLoadProgressCB != NULL) + PicoCDLoadProgressCB(cd_img_name, 100); + + return 0; +} + +// vim:shiftwidth=2:ts=2:expandtab diff --git a/pico/cd/cd_sys.c b/pico/cd/cd_sys.c deleted file mode 100644 index f7cd7b5e..00000000 --- a/pico/cd/cd_sys.c +++ /dev/null @@ -1,933 +0,0 @@ -/*********************************************************** - * * - * This source file was taken from the Gens project * - * Written by Stéphane Dallongeville * - * Copyright (c) 2002 by Stéphane Dallongeville * - * Modified/adapted for PicoDrive by notaz, 2007 * - * * - ***********************************************************/ - -#include - -#include "../pico_int.h" -#include "cd_sys.h" -#include "cd_file.h" - -#define DEBUG_CD - -#define TRAY_OPEN 0x0500 // TRAY OPEN CDD status -#define NOCD 0x0000 // CD removed CDD status -#define STOPPED 0x0900 // STOPPED CDD status (happen after stop or close tray command) -#define READY 0x0400 // READY CDD status (also used for seeking) -#define FAST_FOW 0x0300 // FAST FORWARD track CDD status -#define FAST_REV 0x10300 // FAST REVERSE track CDD status -#define PLAYING 0x0100 // PLAYING audio track CDD status - -//#undef cdprintf -//#define cdprintf(x, ...) elprintf(EL_STATUS, x, ##__VA_ARGS__) - -#define CDC_Update_Header() - -static int CD_Present = 0; - - -#define CHECK_TRAY_OPEN \ -if (Pico_mcd->scd.Status_CDD == TRAY_OPEN) \ -{ \ - Pico_mcd->cdd.Status = Pico_mcd->scd.Status_CDD; \ - \ - Pico_mcd->cdd.Minute = 0; \ - Pico_mcd->cdd.Seconde = 0; \ - Pico_mcd->cdd.Frame = 0; \ - Pico_mcd->cdd.Ext = 0; \ - \ - Pico_mcd->scd.CDD_Complete = 1; \ - \ - return 2; \ -} - - -#define CHECK_CD_PRESENT \ -if (!CD_Present) \ -{ \ - Pico_mcd->scd.Status_CDD = NOCD; \ - Pico_mcd->cdd.Status = Pico_mcd->scd.Status_CDD; \ - \ - Pico_mcd->cdd.Minute = 0; \ - Pico_mcd->cdd.Seconde = 0; \ - Pico_mcd->cdd.Frame = 0; \ - Pico_mcd->cdd.Ext = 0; \ - \ - Pico_mcd->scd.CDD_Complete = 1; \ - \ - return 3; \ -} - - -static int MSF_to_LBA(_msf *MSF) -{ - return (MSF->M * 60 * 75) + (MSF->S * 75) + MSF->F - 150; -} - - -PICO_INTERNAL void LBA_to_MSF(int lba, _msf *MSF) -{ - if (lba < -150) lba = 0; - else lba += 150; - MSF->M = lba / (60 * 75); - MSF->S = (lba / 75) % 60; - MSF->F = lba % 75; -} - - -static unsigned int MSF_to_Track(_msf *MSF) -{ - int i, Start, Cur; - - Start = (MSF->M << 16) + (MSF->S << 8) + MSF->F; - - for(i = 1; i <= (Pico_mcd->TOC.Last_Track + 1); i++) - { - Cur = Pico_mcd->TOC.Tracks[i - 1].MSF.M << 16; - Cur += Pico_mcd->TOC.Tracks[i - 1].MSF.S << 8; - Cur += Pico_mcd->TOC.Tracks[i - 1].MSF.F; - - if (Cur > Start) break; - } - - --i; - - if (i > Pico_mcd->TOC.Last_Track) return 100; - else if (i < 1) i = 1; - - return (unsigned) i; -} - - -static unsigned int LBA_to_Track(int lba) -{ - _msf MSF; - - LBA_to_MSF(lba, &MSF); - return MSF_to_Track(&MSF); -} - - -static void Track_to_MSF(int track, _msf *MSF) -{ - if (track < 1) track = 1; - else if (track > Pico_mcd->TOC.Last_Track) track = Pico_mcd->TOC.Last_Track; - - MSF->M = Pico_mcd->TOC.Tracks[track - 1].MSF.M; - MSF->S = Pico_mcd->TOC.Tracks[track - 1].MSF.S; - MSF->F = Pico_mcd->TOC.Tracks[track - 1].MSF.F; -} - - -PICO_INTERNAL int Track_to_LBA(int track) -{ - _msf MSF; - - Track_to_MSF(track, &MSF); - return MSF_to_LBA(&MSF); -} - - -PICO_INTERNAL void Check_CD_Command(void) -{ - cdprintf("CHECK CD COMMAND"); - - // Check CDC - if (Pico_mcd->scd.Status_CDC & 1) // CDC is reading data ... - { - cdprintf("Got a read command"); - - // DATA ? - if (Pico_mcd->scd.Cur_Track == 1) { - Pico_mcd->s68k_regs[0x36] |= 0x01; - - if (Pico_mcd->scd.File_Add_Delay == 0) - { - unsigned char header[4]; - _msf MSF; - - LBA_to_MSF(Pico_mcd->scd.Cur_LBA, &MSF); - - header[0] = INT_TO_BCDB(MSF.M); - header[1] = INT_TO_BCDB(MSF.S); - header[2] = INT_TO_BCDB(MSF.F); - header[3] = 0x01; - - //FILE_Read_One_LBA_CDC(); - Pico_mcd->scd.Cur_LBA += - cdc_decoder_update(header); - } - else Pico_mcd->scd.File_Add_Delay--; - } - else { - Pico_mcd->s68k_regs[0x36] &= ~0x01; // AUDIO - unsigned char header[4] = { 0, }; - cdc_decoder_update(header); - } - } - - // Check CDD - if (Pico_mcd->scd.CDD_Complete) - { - Pico_mcd->scd.CDD_Complete = 0; - - CDD_Export_Status(); - } - - if (Pico_mcd->scd.Status_CDD == FAST_FOW) - { - Pico_mcd->scd.Cur_LBA += 10; - CDC_Update_Header(); - - } - else if (Pico_mcd->scd.Status_CDD == FAST_REV) - { - Pico_mcd->scd.Cur_LBA -= 10; - if (Pico_mcd->scd.Cur_LBA < -150) Pico_mcd->scd.Cur_LBA = -150; - CDC_Update_Header(); - } -} - - -PICO_INTERNAL int Init_CD_Driver(void) -{ - return 0; -} - - -PICO_INTERNAL void End_CD_Driver(void) -{ - Unload_ISO(); -} - - -PICO_INTERNAL void Reset_CD(void) -{ - Pico_mcd->scd.Cur_Track = 0; - Pico_mcd->scd.Cur_LBA = -150; - Pico_mcd->scd.Status_CDC &= ~1; - if (Pico_mcd->scd.Status_CDD != TRAY_OPEN) - Pico_mcd->scd.Status_CDD = CD_Present ? READY : NOCD; - Pico_mcd->scd.CDD_Complete = 0; - Pico_mcd->scd.File_Add_Delay = 0; -} - - -int Insert_CD(const char *cdimg_name, int type) -{ - int ret = 1; - - CD_Present = 0; - - if (cdimg_name != NULL && type != CIT_NOT_CD) - { - ret = Load_CD_Image(cdimg_name, type); - if (ret == 0) { - CD_Present = 1; - - if (Pico_mcd->scd.Status_CDD == TRAY_OPEN) - { - if (Pico_mcd->bios[0x122 ^ 1] == '2') - Close_Tray_CDD_cC(); - // else bios will issue it - } - else - { - Pico_mcd->scd.Status_CDD = READY; - } - } - } - - if (Pico_mcd->scd.Status_CDD != TRAY_OPEN && !CD_Present) - Pico_mcd->scd.Status_CDD = NOCD; - - return ret; -} - - -int Stop_CD(void) -{ - int ret = CD_Present; - - Unload_ISO(); - CD_Present = 0; - - return ret; -} - - -/* -PICO_INTERNAL void Change_CD(void) -{ - if (Pico_mcd->scd.Status_CDD == TRAY_OPEN) Close_Tray_CDD_cC(); - else Open_Tray_CDD_cD(); -} -*/ - -PICO_INTERNAL int Get_Status_CDD_c0(void) -{ - cdprintf("Status command : Cur LBA = %d", Pico_mcd->scd.Cur_LBA); - - // Clear immediat status - if ((Pico_mcd->cdd.Status & 0x0F00) == 0x0200) - Pico_mcd->cdd.Status = (Pico_mcd->scd.Status_CDD & 0xFF00) | (Pico_mcd->cdd.Status & 0x00FF); - else if ((Pico_mcd->cdd.Status & 0x0F00) == 0x0700) - Pico_mcd->cdd.Status = (Pico_mcd->scd.Status_CDD & 0xFF00) | (Pico_mcd->cdd.Status & 0x00FF); - else if ((Pico_mcd->cdd.Status & 0x0F00) == 0x0E00) - Pico_mcd->cdd.Status = (Pico_mcd->scd.Status_CDD & 0xFF00) | (Pico_mcd->cdd.Status & 0x00FF); - - Pico_mcd->scd.CDD_Complete = 1; - - return 0; -} - - -PICO_INTERNAL int Stop_CDD_c1(void) -{ - CHECK_TRAY_OPEN - - Pico_mcd->scd.Status_CDC &= ~1; // Stop CDC read - - if (CD_Present) Pico_mcd->scd.Status_CDD = STOPPED; - else Pico_mcd->scd.Status_CDD = NOCD; - Pico_mcd->cdd.Status = 0x0000; - - Pico_mcd->s68k_regs[0x36] |= 0x01; // Data bit set because stopped - - Pico_mcd->cdd.Minute = 0; - Pico_mcd->cdd.Seconde = 0; - Pico_mcd->cdd.Frame = 0; - Pico_mcd->cdd.Ext = 0; - - Pico_mcd->scd.CDD_Complete = 1; - - return 0; -} - - -PICO_INTERNAL int Get_Pos_CDD_c20(void) -{ - _msf MSF; - - cdprintf("command 200 : Cur LBA = %d", Pico_mcd->scd.Cur_LBA); - - CHECK_TRAY_OPEN - - Pico_mcd->cdd.Status &= 0xFF; - if (!CD_Present) - { - Pico_mcd->scd.Status_CDD = NOCD; - Pico_mcd->cdd.Status |= Pico_mcd->scd.Status_CDD; - } -// else if (!(CDC.CTRL.B.B0 & 0x80)) Pico_mcd->cdd.Status |= Pico_mcd->scd.Status_CDD; - Pico_mcd->cdd.Status |= Pico_mcd->scd.Status_CDD; - - cdprintf("Status CDD = %.4X Status = %.4X", Pico_mcd->scd.Status_CDD, Pico_mcd->cdd.Status); - - LBA_to_MSF(Pico_mcd->scd.Cur_LBA, &MSF); - - Pico_mcd->cdd.Minute = INT_TO_BCDW(MSF.M); - Pico_mcd->cdd.Seconde = INT_TO_BCDW(MSF.S); - Pico_mcd->cdd.Frame = INT_TO_BCDW(MSF.F); - Pico_mcd->cdd.Ext = 0; - - Pico_mcd->scd.CDD_Complete = 1; - - return 0; -} - - -PICO_INTERNAL int Get_Track_Pos_CDD_c21(void) -{ - int elapsed_time; - _msf MSF; - - cdprintf("command 201 : Cur LBA = %d", Pico_mcd->scd.Cur_LBA); - - CHECK_TRAY_OPEN - - Pico_mcd->cdd.Status &= 0xFF; - if (!CD_Present) - { - Pico_mcd->scd.Status_CDD = NOCD; - Pico_mcd->cdd.Status |= Pico_mcd->scd.Status_CDD; - } -// else if (!(CDC.CTRL.B.B0 & 0x80)) Pico_mcd->cdd.Status |= Pico_mcd->scd.Status_CDD; - Pico_mcd->cdd.Status |= Pico_mcd->scd.Status_CDD; - - elapsed_time = Pico_mcd->scd.Cur_LBA - Track_to_LBA(LBA_to_Track(Pico_mcd->scd.Cur_LBA)); - LBA_to_MSF(elapsed_time - 150, &MSF); - - cdprintf(" elapsed = %d", elapsed_time); - - Pico_mcd->cdd.Minute = INT_TO_BCDW(MSF.M); - Pico_mcd->cdd.Seconde = INT_TO_BCDW(MSF.S); - Pico_mcd->cdd.Frame = INT_TO_BCDW(MSF.F); - Pico_mcd->cdd.Ext = 0; - - Pico_mcd->scd.CDD_Complete = 1; - - return 0; -} - - -PICO_INTERNAL int Get_Current_Track_CDD_c22(void) -{ - cdprintf("Status CDD = %.4X Status = %.4X", Pico_mcd->scd.Status_CDD, Pico_mcd->cdd.Status); - - CHECK_TRAY_OPEN - - Pico_mcd->cdd.Status &= 0xFF; - if (!CD_Present) - { - Pico_mcd->scd.Status_CDD = NOCD; - Pico_mcd->cdd.Status |= Pico_mcd->scd.Status_CDD; - } -// else if (!(CDC.CTRL.B.B0 & 0x80)) Pico_mcd->cdd.Status |= Pico_mcd->scd.Status_CDD; - Pico_mcd->cdd.Status |= Pico_mcd->scd.Status_CDD; - - Pico_mcd->scd.Cur_Track = LBA_to_Track(Pico_mcd->scd.Cur_LBA); - - if (Pico_mcd->scd.Cur_Track == 100) Pico_mcd->cdd.Minute = 0x0A02; - else Pico_mcd->cdd.Minute = INT_TO_BCDW(Pico_mcd->scd.Cur_Track); - Pico_mcd->cdd.Seconde = 0; - Pico_mcd->cdd.Frame = 0; - Pico_mcd->cdd.Ext = 0; - - Pico_mcd->scd.CDD_Complete = 1; - - return 0; -} - - -PICO_INTERNAL int Get_Total_Lenght_CDD_c23(void) -{ - CHECK_TRAY_OPEN - - Pico_mcd->cdd.Status &= 0xFF; - if (!CD_Present) - { - Pico_mcd->scd.Status_CDD = NOCD; - Pico_mcd->cdd.Status |= Pico_mcd->scd.Status_CDD; - } -// else if (!(CDC.CTRL.B.B0 & 0x80)) Pico_mcd->cdd.Status |= Pico_mcd->scd.Status_CDD; - Pico_mcd->cdd.Status |= Pico_mcd->scd.Status_CDD; - - Pico_mcd->cdd.Minute = INT_TO_BCDW(Pico_mcd->TOC.Tracks[Pico_mcd->TOC.Last_Track].MSF.M); - Pico_mcd->cdd.Seconde = INT_TO_BCDW(Pico_mcd->TOC.Tracks[Pico_mcd->TOC.Last_Track].MSF.S); - Pico_mcd->cdd.Frame = INT_TO_BCDW(Pico_mcd->TOC.Tracks[Pico_mcd->TOC.Last_Track].MSF.F); - Pico_mcd->cdd.Ext = 0; - - Pico_mcd->scd.CDD_Complete = 1; - - return 0; -} - - -PICO_INTERNAL int Get_First_Last_Track_CDD_c24(void) -{ - CHECK_TRAY_OPEN - - Pico_mcd->cdd.Status &= 0xFF; - if (!CD_Present) - { - Pico_mcd->scd.Status_CDD = NOCD; - } -// else if (!(CDC.CTRL.B.B0 & 0x80)) Pico_mcd->cdd.Status |= Pico_mcd->scd.Status_CDD; - Pico_mcd->cdd.Status |= Pico_mcd->scd.Status_CDD; - - Pico_mcd->cdd.Minute = INT_TO_BCDW(1); - Pico_mcd->cdd.Seconde = INT_TO_BCDW(Pico_mcd->TOC.Last_Track); - Pico_mcd->cdd.Frame = 0; - Pico_mcd->cdd.Ext = 0; - - Pico_mcd->scd.CDD_Complete = 1; - - return 0; -} - - -PICO_INTERNAL int Get_Track_Adr_CDD_c25(void) -{ - int track_number; - - CHECK_TRAY_OPEN - - // track number in TC4 & TC5 - - track_number = (Pico_mcd->s68k_regs[0x38+10+4] & 0xF) * 10 + (Pico_mcd->s68k_regs[0x38+10+5] & 0xF); - - Pico_mcd->cdd.Status &= 0xFF; - if (!CD_Present) - { - Pico_mcd->scd.Status_CDD = NOCD; - Pico_mcd->cdd.Status |= Pico_mcd->scd.Status_CDD; - } -// else if (!(CDC.CTRL.B.B0 & 0x80)) Pico_mcd->cdd.Status |= Pico_mcd->scd.Status_CDD; - Pico_mcd->cdd.Status |= Pico_mcd->scd.Status_CDD; - - if (track_number > Pico_mcd->TOC.Last_Track) track_number = Pico_mcd->TOC.Last_Track; - else if (track_number < 1) track_number = 1; - - Pico_mcd->cdd.Minute = INT_TO_BCDW(Pico_mcd->TOC.Tracks[track_number - 1].MSF.M); - Pico_mcd->cdd.Seconde = INT_TO_BCDW(Pico_mcd->TOC.Tracks[track_number - 1].MSF.S); - Pico_mcd->cdd.Frame = INT_TO_BCDW(Pico_mcd->TOC.Tracks[track_number - 1].MSF.F); - Pico_mcd->cdd.Ext = track_number % 10; - - if (track_number == 1) Pico_mcd->cdd.Frame |= 0x0800; // data track - - Pico_mcd->scd.CDD_Complete = 1; - return 0; -} - - -PICO_INTERNAL int Play_CDD_c3(void) -{ - _msf MSF; - int delay, new_lba; - - CHECK_TRAY_OPEN - CHECK_CD_PRESENT - - // MSF of the track to play in TC buffer - - MSF.M = (Pico_mcd->s68k_regs[0x38+10+2] & 0xF) * 10 + (Pico_mcd->s68k_regs[0x38+10+3] & 0xF); - MSF.S = (Pico_mcd->s68k_regs[0x38+10+4] & 0xF) * 10 + (Pico_mcd->s68k_regs[0x38+10+5] & 0xF); - MSF.F = (Pico_mcd->s68k_regs[0x38+10+6] & 0xF) * 10 + (Pico_mcd->s68k_regs[0x38+10+7] & 0xF); - - Pico_mcd->scd.Cur_Track = MSF_to_Track(&MSF); - - new_lba = MSF_to_LBA(&MSF); - delay = new_lba - Pico_mcd->scd.Cur_LBA; - if (delay < 0) delay = -delay; - delay >>= 12; - - if (Pico_mcd->scd.Cur_LBA > 0 && delay < 13) - // based on genplus GX - delay = 13; - - Pico_mcd->scd.Cur_LBA = new_lba; - CDC_Update_Header(); - - cdprintf("Read : Cur LBA = %d, M=%d, S=%d, F=%d", Pico_mcd->scd.Cur_LBA, MSF.M, MSF.S, MSF.F); - - if (Pico_mcd->scd.Status_CDD != PLAYING) delay += 20; - - Pico_mcd->scd.Status_CDD = PLAYING; - Pico_mcd->cdd.Status = 0x0102; -// Pico_mcd->cdd.Status = COMM_OK; - - if (Pico_mcd->scd.File_Add_Delay == 0) Pico_mcd->scd.File_Add_Delay = delay; - - if (Pico_mcd->scd.Cur_Track == 1) - { - Pico_mcd->s68k_regs[0x36] |= 0x01; // DATA - } - else - { - Pico_mcd->s68k_regs[0x36] &= ~0x01; // AUDIO - cdda_start_play(); - } - - if (Pico_mcd->scd.Cur_Track == 100) Pico_mcd->cdd.Minute = 0x0A02; - else Pico_mcd->cdd.Minute = INT_TO_BCDW(Pico_mcd->scd.Cur_Track); - Pico_mcd->cdd.Seconde = 0; - Pico_mcd->cdd.Frame = 0; - Pico_mcd->cdd.Ext = 0; - - Pico_mcd->scd.Status_CDC |= 1; // Read data with CDC - - Pico_mcd->scd.CDD_Complete = 1; - return 0; -} - - -PICO_INTERNAL int Seek_CDD_c4(void) -{ - _msf MSF; - - CHECK_TRAY_OPEN - CHECK_CD_PRESENT - - // MSF to seek in TC buffer - - MSF.M = (Pico_mcd->s68k_regs[0x38+10+2] & 0xF) * 10 + (Pico_mcd->s68k_regs[0x38+10+3] & 0xF); - MSF.S = (Pico_mcd->s68k_regs[0x38+10+4] & 0xF) * 10 + (Pico_mcd->s68k_regs[0x38+10+5] & 0xF); - MSF.F = (Pico_mcd->s68k_regs[0x38+10+6] & 0xF) * 10 + (Pico_mcd->s68k_regs[0x38+10+7] & 0xF); - - Pico_mcd->scd.Cur_Track = MSF_to_Track(&MSF); - Pico_mcd->scd.Cur_LBA = MSF_to_LBA(&MSF); - CDC_Update_Header(); - - Pico_mcd->scd.Status_CDC &= ~1; // Stop CDC read - - Pico_mcd->scd.Status_CDD = READY; - Pico_mcd->cdd.Status = 0x0200; - - // DATA ? - if (Pico_mcd->scd.Cur_Track == 1) - Pico_mcd->s68k_regs[0x36] |= 0x01; - else Pico_mcd->s68k_regs[0x36] &= ~0x01; // AUDIO - - Pico_mcd->cdd.Minute = 0; - Pico_mcd->cdd.Seconde = 0; - Pico_mcd->cdd.Frame = 0; - Pico_mcd->cdd.Ext = 0; - - Pico_mcd->scd.CDD_Complete = 1; - - return 0; -} - - -PICO_INTERNAL int Pause_CDD_c6(void) -{ - CHECK_TRAY_OPEN - CHECK_CD_PRESENT - - Pico_mcd->scd.Status_CDC &= ~1; // Stop CDC read to start a new one if raw data - - Pico_mcd->scd.Status_CDD = READY; - Pico_mcd->cdd.Status = Pico_mcd->scd.Status_CDD; - - Pico_mcd->s68k_regs[0x36] |= 0x01; // Data bit set because stopped - - Pico_mcd->cdd.Minute = 0; - Pico_mcd->cdd.Seconde = 0; - Pico_mcd->cdd.Frame = 0; - Pico_mcd->cdd.Ext = 0; - - Pico_mcd->scd.CDD_Complete = 1; - - return 0; -} - - -PICO_INTERNAL int Resume_CDD_c7(void) -{ - CHECK_TRAY_OPEN - CHECK_CD_PRESENT - - Pico_mcd->scd.Cur_Track = LBA_to_Track(Pico_mcd->scd.Cur_LBA); - -#ifdef DEBUG_CD - { - _msf MSF; - LBA_to_MSF(Pico_mcd->scd.Cur_LBA, &MSF); - cdprintf("Resume read : Cur LBA = %d, M=%d, S=%d, F=%d", Pico_mcd->scd.Cur_LBA, MSF.M, MSF.S, MSF.F); - } -#endif - - Pico_mcd->scd.Status_CDD = PLAYING; - Pico_mcd->cdd.Status = 0x0102; - - if (Pico_mcd->scd.Cur_Track == 1) - { - Pico_mcd->s68k_regs[0x36] |= 0x01; // DATA - } - else - { - Pico_mcd->s68k_regs[0x36] &= ~0x01; // AUDIO - cdda_start_play(); - } - - if (Pico_mcd->scd.Cur_Track == 100) Pico_mcd->cdd.Minute = 0x0A02; - else Pico_mcd->cdd.Minute = INT_TO_BCDW(Pico_mcd->scd.Cur_Track); - Pico_mcd->cdd.Seconde = 0; - Pico_mcd->cdd.Frame = 0; - Pico_mcd->cdd.Ext = 0; - - Pico_mcd->scd.Status_CDC |= 1; // Read data with CDC - - Pico_mcd->scd.CDD_Complete = 1; - return 0; -} - - -PICO_INTERNAL int Fast_Foward_CDD_c8(void) -{ - CHECK_TRAY_OPEN - CHECK_CD_PRESENT - - Pico_mcd->scd.Status_CDC &= ~1; // Stop CDC read - - Pico_mcd->scd.Status_CDD = FAST_FOW; - Pico_mcd->cdd.Status = Pico_mcd->scd.Status_CDD | 2; - - Pico_mcd->cdd.Minute = INT_TO_BCDW(Pico_mcd->scd.Cur_Track); - Pico_mcd->cdd.Seconde = 0; - Pico_mcd->cdd.Frame = 0; - Pico_mcd->cdd.Ext = 0; - - Pico_mcd->scd.CDD_Complete = 1; - - return 0; -} - - -PICO_INTERNAL int Fast_Rewind_CDD_c9(void) -{ - CHECK_TRAY_OPEN - CHECK_CD_PRESENT - - Pico_mcd->scd.Status_CDC &= ~1; // Stop CDC read - - Pico_mcd->scd.Status_CDD = FAST_REV; - Pico_mcd->cdd.Status = Pico_mcd->scd.Status_CDD | 2; - - Pico_mcd->cdd.Minute = INT_TO_BCDW(Pico_mcd->scd.Cur_Track); - Pico_mcd->cdd.Seconde = 0; - Pico_mcd->cdd.Frame = 0; - Pico_mcd->cdd.Ext = 0; - - Pico_mcd->scd.CDD_Complete = 1; - - return 0; -} - - -PICO_INTERNAL int Close_Tray_CDD_cC(void) -{ - Pico_mcd->scd.Status_CDC &= ~1; // Stop CDC read - - elprintf(EL_STATUS, "tray close\n"); - - if (PicoMCDcloseTray != NULL) - PicoMCDcloseTray(); - - Pico_mcd->scd.Status_CDD = CD_Present ? STOPPED : NOCD; - Pico_mcd->cdd.Status = 0x0000; - - Pico_mcd->cdd.Minute = 0; - Pico_mcd->cdd.Seconde = 0; - Pico_mcd->cdd.Frame = 0; - Pico_mcd->cdd.Ext = 0; - - Pico_mcd->scd.CDD_Complete = 1; - - return 0; -} - - -PICO_INTERNAL int Open_Tray_CDD_cD(void) -{ - CHECK_TRAY_OPEN - - Pico_mcd->scd.Status_CDC &= ~1; // Stop CDC read - - elprintf(EL_STATUS, "tray open\n"); - - Unload_ISO(); - CD_Present = 0; - - if (PicoMCDopenTray != NULL) - PicoMCDopenTray(); - - Pico_mcd->scd.Status_CDD = TRAY_OPEN; - Pico_mcd->cdd.Status = 0x0E00; - - Pico_mcd->cdd.Minute = 0; - Pico_mcd->cdd.Seconde = 0; - Pico_mcd->cdd.Frame = 0; - Pico_mcd->cdd.Ext = 0; - - Pico_mcd->scd.CDD_Complete = 1; - - return 0; -} - - -PICO_INTERNAL int CDD_cA(void) -{ - CHECK_TRAY_OPEN - CHECK_CD_PRESENT - - Pico_mcd->scd.Status_CDC &= ~1; - - Pico_mcd->scd.Status_CDD = READY; - Pico_mcd->cdd.Status = Pico_mcd->scd.Status_CDD; - - Pico_mcd->cdd.Minute = 0; - Pico_mcd->cdd.Seconde = INT_TO_BCDW(1); - Pico_mcd->cdd.Frame = INT_TO_BCDW(1); - Pico_mcd->cdd.Ext = 0; - - Pico_mcd->scd.CDD_Complete = 1; - - return 0; -} - - -PICO_INTERNAL int CDD_Def(void) -{ - Pico_mcd->cdd.Status = Pico_mcd->scd.Status_CDD; - - Pico_mcd->cdd.Minute = 0; - Pico_mcd->cdd.Seconde = 0; - Pico_mcd->cdd.Frame = 0; - Pico_mcd->cdd.Ext = 0; - - return 0; -} - - -static int bswapwrite(int a, unsigned short d) -{ - *(unsigned short *)(Pico_mcd->s68k_regs + a) = (d>>8)|(d<<8); - return d + (d >> 8); -} - -PICO_INTERNAL void CDD_Export_Status(void) -{ - unsigned int csum; - - csum = bswapwrite( 0x38+0, Pico_mcd->cdd.Status); - csum += bswapwrite( 0x38+2, Pico_mcd->cdd.Minute); - csum += bswapwrite( 0x38+4, Pico_mcd->cdd.Seconde); - csum += bswapwrite( 0x38+6, Pico_mcd->cdd.Frame); - Pico_mcd->s68k_regs[0x38+8] = Pico_mcd->cdd.Ext; - csum += Pico_mcd->cdd.Ext; - Pico_mcd->s68k_regs[0x38+9] = ~csum & 0xf; - - Pico_mcd->s68k_regs[0x37] &= 3; // CDD.Control - - if (Pico_mcd->s68k_regs[0x33] & PCDS_IEN4) - { - elprintf(EL_INTS, "cdd export irq 4"); - SekInterruptS68k(4); - } - -// cdprintf("CDD exported status\n"); - cdprintf("out: Status=%.4X, Minute=%.4X, Second=%.4X, Frame=%.4X Checksum=%.4X", - (Pico_mcd->s68k_regs[0x38+0] << 8) | Pico_mcd->s68k_regs[0x38+1], - (Pico_mcd->s68k_regs[0x38+2] << 8) | Pico_mcd->s68k_regs[0x38+3], - (Pico_mcd->s68k_regs[0x38+4] << 8) | Pico_mcd->s68k_regs[0x38+5], - (Pico_mcd->s68k_regs[0x38+6] << 8) | Pico_mcd->s68k_regs[0x38+7], - (Pico_mcd->s68k_regs[0x38+8] << 8) | Pico_mcd->s68k_regs[0x38+9]); -} - - -PICO_INTERNAL void CDD_Import_Command(void) -{ -// cdprintf("CDD importing command\n"); - cdprintf("in: Command=%.4X, Minute=%.4X, Second=%.4X, Frame=%.4X Checksum=%.4X", - (Pico_mcd->s68k_regs[0x38+10+0] << 8) | Pico_mcd->s68k_regs[0x38+10+1], - (Pico_mcd->s68k_regs[0x38+10+2] << 8) | Pico_mcd->s68k_regs[0x38+10+3], - (Pico_mcd->s68k_regs[0x38+10+4] << 8) | Pico_mcd->s68k_regs[0x38+10+5], - (Pico_mcd->s68k_regs[0x38+10+6] << 8) | Pico_mcd->s68k_regs[0x38+10+7], - (Pico_mcd->s68k_regs[0x38+10+8] << 8) | Pico_mcd->s68k_regs[0x38+10+9]); - - switch (Pico_mcd->s68k_regs[0x38+10+0]) - { - case 0x0: // STATUS (?) - Get_Status_CDD_c0(); - break; - - case 0x1: // STOP ALL (?) - Stop_CDD_c1(); - break; - - case 0x2: // GET TOC INFORMATIONS - switch(Pico_mcd->s68k_regs[0x38+10+3]) - { - case 0x0: // get current position (MSF format) - Pico_mcd->cdd.Status = (Pico_mcd->cdd.Status & 0xFF00); - Get_Pos_CDD_c20(); - break; - - case 0x1: // get elapsed time of current track played/scanned (relative MSF format) - Pico_mcd->cdd.Status = (Pico_mcd->cdd.Status & 0xFF00) | 1; - Get_Track_Pos_CDD_c21(); - break; - - case 0x2: // get current track in RS2-RS3 - Pico_mcd->cdd.Status = (Pico_mcd->cdd.Status & 0xFF00) | 2; - Get_Current_Track_CDD_c22(); - break; - - case 0x3: // get total length (MSF format) - Pico_mcd->cdd.Status = (Pico_mcd->cdd.Status & 0xFF00) | 3; - Get_Total_Lenght_CDD_c23(); - break; - - case 0x4: // first & last track number - Pico_mcd->cdd.Status = (Pico_mcd->cdd.Status & 0xFF00) | 4; - Get_First_Last_Track_CDD_c24(); - break; - - case 0x5: // get track addresse (MSF format) - Pico_mcd->cdd.Status = (Pico_mcd->cdd.Status & 0xFF00) | 5; - Get_Track_Adr_CDD_c25(); - break; - - default : // invalid, then we return status - Pico_mcd->cdd.Status = (Pico_mcd->cdd.Status & 0xFF00) | 0xF; - Get_Status_CDD_c0(); - break; - } - break; - - case 0x3: // READ - Play_CDD_c3(); - break; - - case 0x4: // SEEK - Seek_CDD_c4(); - break; - - case 0x6: // PAUSE/STOP - Pause_CDD_c6(); - break; - - case 0x7: // RESUME - Resume_CDD_c7(); - break; - - case 0x8: // FAST FOWARD - Fast_Foward_CDD_c8(); - break; - - case 0x9: // FAST REWIND - Fast_Rewind_CDD_c9(); - break; - - case 0xA: // RECOVER INITIAL STATE (?) - CDD_cA(); - break; - - case 0xC: // CLOSE TRAY - Close_Tray_CDD_cC(); - break; - - case 0xD: // OPEN TRAY - Open_Tray_CDD_cD(); - break; - - default: // UNKNOWN - CDD_Def(); - break; - } -} - -void CDD_Reset(void) -{ - // Reseting CDD - - memset(Pico_mcd->s68k_regs+0x34, 0, 2*2); // CDD.Fader, CDD.Control - Pico_mcd->cdd.Status = 0; - Pico_mcd->cdd.Minute = 0; - Pico_mcd->cdd.Seconde = 0; - Pico_mcd->cdd.Frame = 0; - Pico_mcd->cdd.Ext = 0; - - // clear receive status and transfer command - memset(Pico_mcd->s68k_regs+0x38, 0, 20); - Pico_mcd->s68k_regs[0x38+9] = 0xF; // Default checksum -} - - diff --git a/pico/cd/cd_sys.h b/pico/cd/cd_sys.h deleted file mode 100644 index 6291c2cd..00000000 --- a/pico/cd/cd_sys.h +++ /dev/null @@ -1,109 +0,0 @@ -/*********************************************************** - * * - * This source was taken from the Gens project * - * Written by Stéphane Dallongeville * - * Copyright (c) 2002 by Stéphane Dallongeville * - * Modified/adapted for PicoDrive by notaz, 2007 * - * * - ***********************************************************/ - -#ifndef _CD_SYS_H -#define _CD_SYS_H - -#include "cd_file.h" - -#ifdef __cplusplus -extern "C" { -#endif - - -#define INT_TO_BCDB(c) \ -((c) > 99)?(0x99):((((c) / 10) << 4) + ((c) % 10)); - -#define INT_TO_BCDW(c) \ -((c) > 99)?(0x0909):((((c) / 10) << 8) + ((c) % 10)); - -#define BCDB_TO_INT(c) \ -(((c) >> 4) * 10) + ((c) & 0xF); - -#define BCDW_TO_INT(c) \ -(((c) >> 8) * 10) + ((c) & 0xF); - - -typedef struct -{ - unsigned char M; - unsigned char S; - unsigned char F; -} _msf; - -typedef struct -{ - _msf MSF; - // - char ftype; // cue_track_type - void *F; - int Length; - int Offset; // sector offset, when single file is used for multiple virtual tracks - short KBtps; // kbytes per sec for mp3s (bitrate / 1000 / 8) - short pad; -} _scd_track; - -typedef struct -{ -// unsigned char First_Track; // always 1 - _scd_track Tracks[100]; - unsigned int Last_Track; -} _scd_toc; - -typedef struct { - unsigned int Status_CDD; - unsigned int Status_CDC; - int Cur_LBA; - unsigned int Cur_Track; - int File_Add_Delay; - char CDD_Complete; - int pad[6]; -} _scd; - - -PICO_INTERNAL void LBA_to_MSF(int lba, _msf *MSF); -PICO_INTERNAL int Track_to_LBA(int track); - -// moved to pico.h -// int Insert_CD(char *iso_name, int is_bin); -// void Stop_CD(void); - -PICO_INTERNAL void Check_CD_Command(void); - -PICO_INTERNAL int Init_CD_Driver(void); -PICO_INTERNAL void End_CD_Driver(void); -PICO_INTERNAL void Reset_CD(void); - -PICO_INTERNAL int Get_Status_CDD_c0(void); -PICO_INTERNAL int Stop_CDD_c1(void); -PICO_INTERNAL int Get_Pos_CDD_c20(void); -PICO_INTERNAL int Get_Track_Pos_CDD_c21(void); -PICO_INTERNAL int Get_Current_Track_CDD_c22(void); -PICO_INTERNAL int Get_Total_Lenght_CDD_c23(void); -PICO_INTERNAL int Get_First_Last_Track_CDD_c24(void); -PICO_INTERNAL int Get_Track_Adr_CDD_c25(void); -PICO_INTERNAL int Play_CDD_c3(void); -PICO_INTERNAL int Seek_CDD_c4(void); -PICO_INTERNAL int Pause_CDD_c6(void); -PICO_INTERNAL int Resume_CDD_c7(void); -PICO_INTERNAL int Fast_Foward_CDD_c8(void); -PICO_INTERNAL int Fast_Rewind_CDD_c9(void); -PICO_INTERNAL int CDD_cA(void); -PICO_INTERNAL int Close_Tray_CDD_cC(void); -PICO_INTERNAL int Open_Tray_CDD_cD(void); - -PICO_INTERNAL int CDD_Def(void); - - -#ifdef __cplusplus -}; -#endif - -#endif - diff --git a/pico/cd/cdc.c b/pico/cd/cdc.c index aa1ded97..8b47b3dd 100644 --- a/pico/cd/cdc.c +++ b/pico/cd/cdc.c @@ -341,19 +341,6 @@ update_dma: Pico_mcd->s68k_regs[0x0b] = dma_addr; } -// tmp -static void cdd_read_data(uint8 *dst) -{ - int lba = Pico_mcd->scd.Cur_LBA; - - /* only read DATA track sectors */ - if (0 <= lba && lba < Pico_mcd->TOC.Tracks[0].Length) - { - /* read sector data (Mode 1 = 2048 bytes) */ - PicoCDBufferRead(dst, lba); - } -} - void cdc_dma_update(void) { /* end of DMA transfer ? */ diff --git a/pico/cd/cdd.c b/pico/cd/cdd.c new file mode 100644 index 00000000..58a60536 --- /dev/null +++ b/pico/cd/cdd.c @@ -0,0 +1,1328 @@ +/*************************************************************************************** + * Genesis Plus + * CD drive processor & CD-DA fader + * + * Copyright (C) 2012-2013 Eke-Eke (Genesis Plus GX) + * + * Redistribution and use of this code or any derivative works are permitted + * provided that the following conditions are met: + * + * - Redistributions may not be sold, nor may they be used in a commercial + * product or activity. + * + * - Redistributions that are modified from the original source must include the + * complete source code, including the source code for all components used by a + * binary built from the modified sources. However, as a special exception, the + * source code distributed need not include anything that is normally distributed + * (in either source or binary form) with the major components (compiler, kernel, + * and so on) of the operating system on which the executable runs, unless that + * component itself accompanies the executable. + * + * - Redistributions must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other + * materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + ****************************************************************************************/ + +#include "../pico_int.h" +#include "genplus_macros.h" +#include "cue.h" +#include "cdd.h" + +#ifdef USE_LIBTREMOR +#define SUPPORTED_EXT 20 +#else +#define SUPPORTED_EXT 10 +#endif + +cdd_t cdd; + +/* BCD conversion lookup tables */ +static const uint8 lut_BCD_8[100] = +{ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, +}; + +static const uint16 lut_BCD_16[100] = +{ + 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, + 0x0100, 0x0101, 0x0102, 0x0103, 0x0104, 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, + 0x0200, 0x0201, 0x0202, 0x0203, 0x0204, 0x0205, 0x0206, 0x0207, 0x0208, 0x0209, + 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307, 0x0308, 0x0309, + 0x0400, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, 0x0408, 0x0409, + 0x0500, 0x0501, 0x0502, 0x0503, 0x0504, 0x0505, 0x0506, 0x0507, 0x0508, 0x0509, + 0x0600, 0x0601, 0x0602, 0x0603, 0x0604, 0x0605, 0x0606, 0x0607, 0x0608, 0x0609, + 0x0700, 0x0701, 0x0702, 0x0703, 0x0704, 0x0705, 0x0706, 0x0707, 0x0708, 0x0709, + 0x0800, 0x0801, 0x0802, 0x0803, 0x0804, 0x0805, 0x0806, 0x0807, 0x0808, 0x0809, + 0x0900, 0x0901, 0x0902, 0x0903, 0x0904, 0x0905, 0x0906, 0x0907, 0x0908, 0x0909, +}; + +/* pre-build TOC */ +static const uint16 toc_snatcher[21] = +{ + 56014, 495, 10120, 20555, 1580, 5417, 12502, 16090, 6553, 9681, + 8148, 20228, 8622, 6142, 5858, 1287, 7424, 3535, 31697, 2485, + 31380 +}; + +static const uint16 toc_lunar[52] = +{ + 5422, 1057, 7932, 5401, 6380, 6592, 5862, 5937, 5478, 5870, + 6673, 6613, 6429, 4996, 4977, 5657, 3720, 5892, 3140, 3263, + 6351, 5187, 3249, 1464, 1596, 1750, 1751, 6599, 4578, 5205, + 1550, 1827, 2328, 1346, 1569, 1613, 7199, 4928, 1656, 2549, + 1875, 3901, 1850, 2399, 2028, 1724, 4889, 14551, 1184, 2132, + 685, 3167 +}; + +static const uint32 toc_shadow[15] = +{ + 10226, 70054, 11100, 12532, 12444, 11923, 10059, 10167, 10138, 13792, + 11637, 2547, 2521, 3856, 900 +}; + +static const uint32 toc_dungeon[13] = +{ + 2250, 22950, 16350, 24900, 13875, 19950, 13800, 15375, 17400, 17100, + 3325, 6825, 25275 +}; + +static const uint32 toc_ffight[26] = +{ + 11994, 9742, 10136, 9685, 9553, 14588, 9430, 8721, 9975, 9764, + 9704, 12796, 585, 754, 951, 624, 9047, 1068, 817, 9191, 1024, + 14562, 10320, 8627, 3795, 3047 +}; + +static const uint32 toc_ffightj[29] = +{ + 11994, 9752, 10119, 9690, 9567, 14575, 9431, 8731, 9965, 9763, + 9716, 12791, 579, 751, 958, 630, 9050, 1052, 825, 9193, 1026, + 14553, 9834, 10542, 1699, 1792, 1781, 3783, 3052 +}; + +/* supported WAVE file header (16-bit stereo samples @44.1kHz) */ +static const unsigned char waveHeader[32] = +{ + 0x57,0x41,0x56,0x45,0x66,0x6d,0x74,0x20,0x10,0x00,0x00,0x00,0x01,0x00,0x02,0x00, + 0x44,0xac,0x00,0x00,0x10,0xb1,0x02,0x00,0x04,0x00,0x10,0x00,0x64,0x61,0x74,0x61 +}; + +#ifdef USE_LIBTREMOR +#ifdef DISABLE_MANY_OGG_OPEN_FILES +static void ogg_free(int i) +{ + /* clear OGG file descriptor to prevent file from being closed */ + cdd.toc.tracks[i].vf.datasource = NULL; + + /* close VORBIS file structure */ + ov_clear(&cdd.toc.tracks[i].vf); + + /* indicates that the track is a seekable VORBIS file */ + cdd.toc.tracks[i].vf.seekable = 1; + + /* reset file reading position */ + fseek(cdd.toc.tracks[i].fd, 0, SEEK_SET); +} +#endif +#endif + +void cdd_reset(void) +{ + /* reset cycle counter */ + cdd.cycles = 0; + + /* reset drive access latency */ + cdd.latency = 0; + + /* reset track index */ + cdd.index = 0; + + /* reset logical block address */ + cdd.lba = 0; + + /* reset status */ + cdd.status = cdd.loaded ? CD_STOP : NO_DISC; + + /* reset CD-DA fader (full volume) */ + cdd.volume = 0x400; + + /* clear CD-DA output */ + cdd.audio[0] = cdd.audio[1] = 0; +} + +/* FIXME: use cdd_read_audio() instead */ +static void cdd_change_track(int index, int lba) +{ + int i, base, lba_offset, lb_len; + + for (i = index; i > 0; i--) + if (cdd.toc.tracks[i].fd != NULL) + break; + + Pico_mcd->cdda_stream = cdd.toc.tracks[i].fd; + base = cdd.toc.tracks[index].offset; + lba_offset = lba - cdd.toc.tracks[index].start; + lb_len = cdd.toc.tracks[index].end - cdd.toc.tracks[index].start; + + elprintf(EL_CD, "play #%d lba %d base %d", index, lba, base); + + cdda_start_play(base, lba_offset, lb_len); +} + +int cdd_context_save(uint8 *state) +{ + int bufferptr = 0; + + save_param(&cdd.cycles, sizeof(cdd.cycles)); + save_param(&cdd.latency, sizeof(cdd.latency)); + save_param(&cdd.index, sizeof(cdd.index)); + save_param(&cdd.lba, sizeof(cdd.lba)); + save_param(&cdd.scanOffset, sizeof(cdd.scanOffset)); + save_param(&cdd.volume, sizeof(cdd.volume)); + save_param(&cdd.status, sizeof(cdd.status)); + + return bufferptr; +} + +int cdd_context_load(uint8 *state) +{ + int lba; + int bufferptr = 0; + +#ifdef USE_LIBTREMOR +#ifdef DISABLE_MANY_OGG_OPEN_FILES + /* close previous track VORBIS file structure to save memory */ + if (cdd.toc.tracks[cdd.index].vf.datasource) + { + ogg_free(cdd.index); + } +#endif +#endif + + load_param(&cdd.cycles, sizeof(cdd.cycles)); + load_param(&cdd.latency, sizeof(cdd.latency)); + load_param(&cdd.index, sizeof(cdd.index)); + load_param(&cdd.lba, sizeof(cdd.lba)); + load_param(&cdd.scanOffset, sizeof(cdd.scanOffset)); + load_param(&cdd.volume, sizeof(cdd.volume)); + load_param(&cdd.status, sizeof(cdd.status)); + + /* adjust current LBA within track limit */ + lba = cdd.lba; + if (lba < cdd.toc.tracks[cdd.index].start) + { + lba = cdd.toc.tracks[cdd.index].start; + } + + /* seek to current track position */ + if (!cdd.index) + { + /* DATA track */ + if (cdd.toc.tracks[0].fd) + { + pm_seek(cdd.toc.tracks[0].fd, lba * cdd.sectorSize, SEEK_SET); + } + } +#ifdef USE_LIBTREMOR + else if (cdd.toc.tracks[cdd.index].vf.seekable) + { +#ifdef DISABLE_MANY_OGG_OPEN_FILES + /* VORBIS file need to be opened first */ + ov_open(cdd.toc.tracks[cdd.index].fd,&cdd.toc.tracks[cdd.index].vf,0,0); +#endif + /* VORBIS AUDIO track */ + ov_pcm_seek(&cdd.toc.tracks[cdd.index].vf, (lba - cdd.toc.tracks[cdd.index].start) * 588 - cdd.toc.tracks[cdd.index].offset); + } +#endif +#if 0 + else if (cdd.toc.tracks[cdd.index].fd) + { + /* PCM AUDIO track */ + fseek(cdd.toc.tracks[cdd.index].fd, (lba * 2352) - cdd.toc.tracks[cdd.index].offset, SEEK_SET); + } +#else + else + { + cdd_change_track(cdd.index, lba); + } +#endif + + return bufferptr; +} + +int cdd_context_load_old(uint8 *state) +{ + memcpy(&cdd.lba, state + 8, sizeof(cdd.lba)); + return 12 * 4; +} + +int cdd_load(const char *filename, int type) +{ + char header[0x210]; + int ret; + + /* first unmount any loaded disc */ + cdd_unload(); + + /* genplus parses cue here, in PD we use our own parser */ + ret = load_cd_image(filename, &type); + if (ret != 0) + return ret; + + /* read first 16 bytes */ + pm_read(header, 0x10, cdd.toc.tracks[0].fd); + + /* look for valid CD image ID string */ + if (memcmp("SEGADISCSYSTEM", header, 14)) + { + /* if not found, read next 16 bytes */ + pm_read(header, 0x10, cdd.toc.tracks[0].fd); + + /* look again for valid CD image ID string */ + if (memcmp("SEGADISCSYSTEM", header, 14)) + { + elprintf(EL_STATUS|EL_ANOMALY, "cd: bad cd image?"); + /* assume bin without security code */ + } + + /* BIN format (2352 bytes data blocks) */ + cdd.sectorSize = 2352; + } + else + { + /* ISO format (2048 bytes data blocks) */ + cdd.sectorSize = 2048; + } + + ret = (type == CT_BIN) ? 2352 : 2048; + if (ret != cdd.sectorSize) + elprintf(EL_STATUS|EL_ANOMALY, "cd: type detection mismatch"); + + /* read CD image header + security code */ + pm_read(header + 0x10, 0x200, cdd.toc.tracks[0].fd); + + /* Simulate audio tracks if none found */ + if (cdd.toc.last == 1) + { + /* Some games require exact TOC infos */ + if (strstr(header + 0x180,"T-95035") != NULL) + { + /* Snatcher */ + cdd.toc.last = cdd.toc.end = 0; + do + { + cdd.toc.tracks[cdd.toc.last].start = cdd.toc.end; + cdd.toc.tracks[cdd.toc.last].end = cdd.toc.tracks[cdd.toc.last].start + toc_snatcher[cdd.toc.last]; + cdd.toc.end = cdd.toc.tracks[cdd.toc.last].end; + cdd.toc.last++; + } + while (cdd.toc.last < 21); + } + else if (strstr(header + 0x180,"T-127015") != NULL) + { + /* Lunar - The Silver Star */ + cdd.toc.last = cdd.toc.end = 0; + do + { + cdd.toc.tracks[cdd.toc.last].start = cdd.toc.end; + cdd.toc.tracks[cdd.toc.last].end = cdd.toc.tracks[cdd.toc.last].start + toc_lunar[cdd.toc.last]; + cdd.toc.end = cdd.toc.tracks[cdd.toc.last].end; + cdd.toc.last++; + } + while (cdd.toc.last < 52); + } + else if (strstr(header + 0x180,"T-113045") != NULL) + { + /* Shadow of the Beast II */ + cdd.toc.last = cdd.toc.end = 0; + do + { + cdd.toc.tracks[cdd.toc.last].start = cdd.toc.end; + cdd.toc.tracks[cdd.toc.last].end = cdd.toc.tracks[cdd.toc.last].start + toc_shadow[cdd.toc.last]; + cdd.toc.end = cdd.toc.tracks[cdd.toc.last].end; + cdd.toc.last++; + } + while (cdd.toc.last < 15); + } + else if (strstr(header + 0x180,"T-143025") != NULL) + { + /* Dungeon Explorer */ + cdd.toc.last = cdd.toc.end = 0; + do + { + cdd.toc.tracks[cdd.toc.last].start = cdd.toc.end; + cdd.toc.tracks[cdd.toc.last].end = cdd.toc.tracks[cdd.toc.last].start + toc_dungeon[cdd.toc.last]; + cdd.toc.end = cdd.toc.tracks[cdd.toc.last].end; + cdd.toc.last++; + } + while (cdd.toc.last < 13); + } + else if (strstr(header + 0x180,"MK-4410") != NULL) + { + /* Final Fight CD (USA, Europe) */ + cdd.toc.last = cdd.toc.end = 0; + do + { + cdd.toc.tracks[cdd.toc.last].start = cdd.toc.end; + cdd.toc.tracks[cdd.toc.last].end = cdd.toc.tracks[cdd.toc.last].start + toc_ffight[cdd.toc.last]; + cdd.toc.end = cdd.toc.tracks[cdd.toc.last].end; + cdd.toc.last++; + } + while (cdd.toc.last < 26); + } + else if (strstr(header + 0x180,"G-6013") != NULL) + { + /* Final Fight CD (Japan) */ + cdd.toc.last = cdd.toc.end = 0; + do + { + cdd.toc.tracks[cdd.toc.last].start = cdd.toc.end; + cdd.toc.tracks[cdd.toc.last].end = cdd.toc.tracks[cdd.toc.last].start + toc_ffightj[cdd.toc.last]; + cdd.toc.end = cdd.toc.tracks[cdd.toc.last].end; + cdd.toc.last++; + } + while (cdd.toc.last < 29); + } +#if 0 + else + { + /* default TOC (99 tracks & 2s per audio tracks) */ + do + { + cdd.toc.tracks[cdd.toc.last].start = cdd.toc.end + 2*75; + cdd.toc.tracks[cdd.toc.last].end = cdd.toc.tracks[cdd.toc.last].start + 2*75; + cdd.toc.end = cdd.toc.tracks[cdd.toc.last].end; + cdd.toc.last++; + } + while ((cdd.toc.last < 99) && (cdd.toc.end < 56*60*75)); + } +#endif + } + + /* Lead-out */ + cdd.toc.tracks[cdd.toc.last].start = cdd.toc.end; + + /* CD loaded */ + cdd.loaded = 1; + return 0; +} + +int cdd_unload(void) +{ + int was_loaded = cdd.loaded; + + if (cdd.loaded) + { + int i; + + /* close CD tracks */ + if (cdd.toc.tracks[0].fd) + { + pm_close(cdd.toc.tracks[0].fd); + cdd.toc.tracks[0].fd = NULL; + } + + for (i = 1; i < cdd.toc.last; i++) + { +#ifdef USE_LIBTREMOR + if (cdd.toc.tracks[i].vf.datasource) + { + /* close VORBIS file (if still opened) */ + ov_clear(&cdd.toc.tracks[i].vf); + } + else +#endif + if (cdd.toc.tracks[i].fd) + { + /* close file */ + if (Pico_mcd->cdda_type == CT_MP3) + fclose(cdd.toc.tracks[i].fd); + else + pm_close(cdd.toc.tracks[0].fd); + + /* detect single file images */ + if (cdd.toc.tracks[i+1].fd == cdd.toc.tracks[i].fd) + { + /* exit loop */ + i = cdd.toc.last; + } + } + } + + /* CD unloaded */ + cdd.loaded = 0; + } + + /* reset TOC */ + memset(&cdd.toc, 0x00, sizeof(cdd.toc)); + + /* unknown CD image file format */ + cdd.sectorSize = 0; + + return was_loaded; +} + +void cdd_read_data(uint8 *dst) +{ + /* only read DATA track sectors */ + if ((cdd.lba >= 0) && (cdd.lba < cdd.toc.tracks[0].end)) + { + /* BIN format ? */ + if (cdd.sectorSize == 2352) + { + /* skip 16-byte header */ + pm_seek(cdd.toc.tracks[0].fd, cdd.lba * 2352 + 16, SEEK_SET); + } + + /* read sector data (Mode 1 = 2048 bytes) */ + pm_read(dst, 2048, cdd.toc.tracks[0].fd); + } +} + +#if 0 +void cdd_read_audio(unsigned int samples) +{ + /* previous audio outputs */ + int16 l = cdd.audio[0]; + int16 r = cdd.audio[1]; + + /* get number of internal clocks (samples) needed */ + samples = blip_clocks_needed(blip[0], samples); + + /* audio track playing ? */ + if (!Pico_mcd->regs[0x36>>1].byte.h && cdd.toc.tracks[cdd.index].fd) + { + int i, mul, delta; + + /* current CD-DA fader volume */ + int curVol = cdd.volume; + + /* CD-DA fader volume setup (0-1024) */ + int endVol = Pico_mcd->regs[0x34>>1].w >> 4; + + /* read samples from current block */ +#ifdef USE_LIBTREMOR + if (cdd.toc.tracks[cdd.index].vf.datasource) + { + int len, done = 0; + int16 *ptr = (int16 *) (cdc.ram); + samples = samples * 4; + while (done < samples) + { + len = ov_read(&cdd.toc.tracks[cdd.index].vf, (char *)(cdc.ram + done), samples - done, 0); + if (len <= 0) + { + done = samples; + break; + } + done += len; + } + samples = done / 4; + + /* process 16-bit (host-endian) stereo samples */ + for (i=0; i endVol) + { + /* fade-out */ + curVol--; + } + else if (!curVol) + { + /* audio will remain muted until next setup */ + break; + } + } + } + else +#endif + { +#ifdef LSB_FIRST + int16 *ptr = (int16 *) (cdc.ram); +#else + uint8 *ptr = cdc.ram; +#endif + fread(cdc.ram, 1, samples * 4, cdd.toc.tracks[cdd.index].fd); + + /* process 16-bit (little-endian) stereo samples */ + for (i=0; i endVol) + { + /* fade-out */ + curVol--; + } + else if (!curVol) + { + /* audio will remain muted until next setup */ + break; + } + } + } + + /* save current CD-DA fader volume */ + cdd.volume = curVol; + + /* save last audio output for next frame */ + cdd.audio[0] = l; + cdd.audio[1] = r; + } + else + { + /* no audio output */ + if (l) blip_add_delta_fast(blip[0], 0, -l); + if (r) blip_add_delta_fast(blip[1], 0, -r); + + /* save audio output for next frame */ + cdd.audio[0] = 0; + cdd.audio[1] = 0; + } + + /* end of Blip Buffer timeframe */ + blip_end_frame(blip[0], samples); + blip_end_frame(blip[1], samples); +} +#endif + + +void cdd_update(void) +{ +#ifdef LOG_CDD + error("LBA = %d (track n%d)(latency=%d)\n", cdd.lba, cdd.index, cdd.latency); +#endif + + /* seeking disc */ + if (cdd.status == CD_SEEK) + { + /* drive latency */ + if (cdd.latency > 0) + { + cdd.latency--; + return; + } + + /* drive is ready */ + cdd.status = CD_READY; + } + + /* reading disc */ + else if (cdd.status == CD_PLAY) + { + /* drive latency */ + if (cdd.latency > 0) + { + cdd.latency--; + return; + } + + /* track type */ + if (!cdd.index) + { + /* DATA sector header (CD-ROM Mode 1) */ + uint8 header[4]; + uint32 msf = cdd.lba + 150; + header[0] = lut_BCD_8[(msf / 75) / 60]; + header[1] = lut_BCD_8[(msf / 75) % 60]; + header[2] = lut_BCD_8[(msf % 75)]; + header[3] = 0x01; + + /* data track sector read is controlled by CDC */ + cdd.lba += cdc_decoder_update(header); + } + else if (cdd.index < cdd.toc.last) + { + uint8 header[4] = { 0, }; + + /* check against audio track start index */ + if (cdd.lba >= cdd.toc.tracks[cdd.index].start) + { + /* audio track playing */ + Pico_mcd->regs[0x36>>1].byte.h = 0x00; + } + + /* audio blocks are still sent to CDC as well as CD DAC/Fader */ + cdc_decoder_update(header); + + /* next audio block is automatically read */ + cdd.lba++; + } + else + { + /* end of disc */ + cdd.status = CD_END; + return; + } + + /* check end of current track */ + if (cdd.lba >= cdd.toc.tracks[cdd.index].end) + { +#ifdef USE_LIBTREMOR +#ifdef DISABLE_MANY_OGG_OPEN_FILES + /* close previous track VORBIS file structure to save memory */ + if (cdd.toc.tracks[cdd.index].vf.datasource) + { + ogg_free(cdd.index); + } +#endif +#endif + /* play next track */ + cdd.index++; + + /* PAUSE between tracks */ + Pico_mcd->regs[0x36>>1].byte.h = 0x01; + + /* seek to next audio track start */ +#ifdef USE_LIBTREMOR + if (cdd.toc.tracks[cdd.index].vf.seekable) + { +#ifdef DISABLE_MANY_OGG_OPEN_FILES + /* VORBIS file need to be opened first */ + ov_open(cdd.toc.tracks[cdd.index].fd,&cdd.toc.tracks[cdd.index].vf,0,0); +#endif + ov_pcm_seek(&cdd.toc.tracks[cdd.index].vf, -cdd.toc.tracks[cdd.index].offset); + } + else +#endif +#if 0 + if (cdd.toc.tracks[cdd.index].fd) + { + fseek(cdd.toc.tracks[cdd.index].fd, (cdd.toc.tracks[cdd.index].start * 2352) - cdd.toc.tracks[cdd.index].offset, SEEK_SET); + } +#else + { + cdd_change_track(cdd.index, cdd.lba); + } +#endif + } + } + + /* scanning disc */ + else if (cdd.status == CD_SCAN) + { + /* fast-forward or fast-rewind */ + cdd.lba += cdd.scanOffset; + + /* check current track limits */ + if (cdd.lba >= cdd.toc.tracks[cdd.index].end) + { +#ifdef USE_LIBTREMOR +#ifdef DISABLE_MANY_OGG_OPEN_FILES + /* close previous track VORBIS file structure to save memory */ + if (cdd.toc.tracks[cdd.index].vf.datasource) + { + ogg_free(cdd.index); + } +#endif +#endif + /* next track */ + cdd.index++; + + /* skip directly to track start position */ + cdd.lba = cdd.toc.tracks[cdd.index].start; + + /* AUDIO track playing ? */ + if (cdd.status == CD_PLAY) + { + Pico_mcd->regs[0x36>>1].byte.h = 0x00; + } + } + else if (cdd.lba < cdd.toc.tracks[cdd.index].start) + { +#ifdef USE_LIBTREMOR +#ifdef DISABLE_MANY_OGG_OPEN_FILES + /* close previous track VORBIS file structure to save memory */ + if (cdd.toc.tracks[cdd.index].vf.datasource) + { + ogg_free(cdd.index); + } +#endif +#endif + + /* previous track */ + cdd.index--; + + /* skip directly to track end position */ + cdd.lba = cdd.toc.tracks[cdd.index].end; + } + + /* check disc limits */ + if (cdd.index < 0) + { + cdd.index = 0; + cdd.lba = 0; + } + else if (cdd.index >= cdd.toc.last) + { + /* no AUDIO track playing */ + Pico_mcd->regs[0x36>>1].byte.h = 0x01; + + /* end of disc */ + cdd.index = cdd.toc.last; + cdd.lba = cdd.toc.end; + cdd.status = CD_END; + return; + } + + /* seek to current block */ + if (!cdd.index) + { + /* no AUDIO track playing */ + Pico_mcd->regs[0x36>>1].byte.h = 0x01; + + /* DATA track */ + pm_seek(cdd.toc.tracks[0].fd, cdd.lba * cdd.sectorSize, SEEK_SET); + } +#ifdef USE_LIBTREMOR + else if (cdd.toc.tracks[cdd.index].vf.seekable) + { +#ifdef DISABLE_MANY_OGG_OPEN_FILES + /* check if a new track is being played */ + if (!cdd.toc.tracks[cdd.index].vf.datasource) + { + /* VORBIS file need to be opened first */ + ov_open(cdd.toc.tracks[cdd.index].fd,&cdd.toc.tracks[cdd.index].vf,0,0); + } +#endif + /* VORBIS AUDIO track */ + ov_pcm_seek(&cdd.toc.tracks[cdd.index].vf, (cdd.lba - cdd.toc.tracks[cdd.index].start) * 588 - cdd.toc.tracks[cdd.index].offset); + } +#endif +#if 0 + else if (cdd.toc.tracks[cdd.index].fd) + { + /* PCM AUDIO track */ + fseek(cdd.toc.tracks[cdd.index].fd, (cdd.lba * 2352) - cdd.toc.tracks[cdd.index].offset, SEEK_SET); + } +#else + else + { + cdd_change_track(cdd.index, cdd.lba); + } +#endif + } +} + +#define set_reg16(r, v) { \ + uint16 _v = v; \ + Pico_mcd->s68k_regs[(r)] = _v >> 8; \ + Pico_mcd->s68k_regs[(r)+1] = _v; \ +} + +void cdd_process(void) +{ + /* Process CDD command */ + switch (Pico_mcd->regs[0x42>>1].byte.h & 0x0f) + { + case 0x00: /* Drive Status */ + { + /* RS1-RS8 normally unchanged */ + Pico_mcd->regs[0x38>>1].byte.h = cdd.status; + + /* unless RS1 indicated invalid track infos */ + if (Pico_mcd->regs[0x38>>1].byte.l == 0x0f) + { + /* and SEEK has ended */ + if (cdd.status != CD_SEEK) + { + /* then return valid track infos, e.g current track number in RS2-RS3 (fixes Lunar - The Silver Star) */ + Pico_mcd->regs[0x38>>1].byte.l = 0x02; + set_reg16(0x3a, (cdd.index < cdd.toc.last) ? lut_BCD_16[cdd.index + 1] : 0x0A0A); + } + } + break; + } + + case 0x01: /* Stop Drive */ + { + /* update status */ + cdd.status = cdd.loaded ? CD_STOP : NO_DISC; + + /* no audio track playing */ + Pico_mcd->regs[0x36>>1].byte.h = 0x01; + + /* RS1-RS8 ignored, expects 0x0 ("no disc" ?) in RS0 once */ + set_reg16(0x38, 0x0000); + set_reg16(0x3a, 0x0000); + set_reg16(0x3c, 0x0000); + set_reg16(0x3e, 0x0000); + set_reg16(0x40, 0x000f); + return; + } + + case 0x02: /* Read TOC */ + { + /* Infos automatically retrieved by CDD processor from Q-Channel */ + /* commands 0x00-0x02 (current block) and 0x03-0x05 (Lead-In) */ + switch (Pico_mcd->regs[0x44>>1].byte.l) + { + case 0x00: /* Current Absolute Time (MM:SS:FF) */ + { + int lba = cdd.lba + 150; + set_reg16(0x38, cdd.status << 8); + set_reg16(0x3a, lut_BCD_16[(lba/75)/60]); + set_reg16(0x3c, lut_BCD_16[(lba/75)%60]); + set_reg16(0x3e, lut_BCD_16[(lba%75)]); + Pico_mcd->regs[0x40>>1].byte.h = cdd.index ? 0x00 : 0x04; /* Current block flags in RS8 (bit0 = mute status, bit1: pre-emphasis status, bit2: track type) */ + break; + } + + case 0x01: /* Current Track Relative Time (MM:SS:FF) */ + { + int lba = cdd.lba - cdd.toc.tracks[cdd.index].start; + set_reg16(0x38, (cdd.status << 8) | 0x01); + set_reg16(0x3a, lut_BCD_16[(lba/75)/60]); + set_reg16(0x3c, lut_BCD_16[(lba/75)%60]); + set_reg16(0x3e, lut_BCD_16[(lba%75)]); + Pico_mcd->regs[0x40>>1].byte.h = cdd.index ? 0x00 : 0x04; /* Current block flags in RS8 (bit0 = mute status, bit1: pre-emphasis status, bit2: track type) */ + break; + } + + case 0x02: /* Current Track Number */ + { + set_reg16(0x38, (cdd.status << 8) | 0x02); + set_reg16(0x3a, (cdd.index < cdd.toc.last) ? lut_BCD_16[cdd.index + 1] : 0x0A0A); + set_reg16(0x3c, 0x0000); + set_reg16(0x3e, 0x0000); /* Disk Control Code (?) in RS6 */ + Pico_mcd->regs[0x40>>1].byte.h = 0x00; + break; + } + + case 0x03: /* Total length (MM:SS:FF) */ + { + int lba = cdd.toc.end + 150; + set_reg16(0x38, (cdd.status << 8) | 0x03); + set_reg16(0x3a, lut_BCD_16[(lba/75)/60]); + set_reg16(0x3c, lut_BCD_16[(lba/75)%60]); + set_reg16(0x3e, lut_BCD_16[(lba%75)]); + Pico_mcd->regs[0x40>>1].byte.h = 0x00; + break; + } + + case 0x04: /* First & Last Track Numbers */ + { + set_reg16(0x38, (cdd.status << 8) | 0x04); + set_reg16(0x3a, 0x0001); + set_reg16(0x3c, lut_BCD_16[cdd.toc.last]); + set_reg16(0x3e, 0x0000); /* Drive Version (?) in RS6-RS7 */ + Pico_mcd->regs[0x40>>1].byte.h = 0x00; /* Lead-In flags in RS8 (bit0 = mute status, bit1: pre-emphasis status, bit2: track type) */ + break; + } + + case 0x05: /* Track Start Time (MM:SS:FF) */ + { + int track = Pico_mcd->regs[0x46>>1].byte.h * 10 + Pico_mcd->regs[0x46>>1].byte.l; + int lba = cdd.toc.tracks[track-1].start + 150; + set_reg16(0x38, (cdd.status << 8) | 0x05); + set_reg16(0x3a, lut_BCD_16[(lba/75)/60]); + set_reg16(0x3c, lut_BCD_16[(lba/75)%60]); + set_reg16(0x3e, lut_BCD_16[(lba%75)]); + Pico_mcd->regs[0x40>>1].byte.h = track % 10; /* Track Number (low digit) */ + if (track == 1) + { + /* RS6 bit 3 is set for the first (DATA) track */ + Pico_mcd->regs[0x3e>>1].byte.h |= 0x08; + } + break; + } + + default: + { +#ifdef LOG_ERROR + error("Unknown CDD Command %02X (%X)\n", Pico_mcd->regs[0x44>>1].byte.l, s68k.pc); +#endif + return; + } + } + break; + } + + case 0x03: /* Play */ + { + /* reset track index */ + int index = 0; + + /* new LBA position */ + int lba = ((Pico_mcd->regs[0x44>>1].byte.h * 10 + Pico_mcd->regs[0x44>>1].byte.l) * 60 + + (Pico_mcd->regs[0x46>>1].byte.h * 10 + Pico_mcd->regs[0x46>>1].byte.l)) * 75 + + (Pico_mcd->regs[0x48>>1].byte.h * 10 + Pico_mcd->regs[0x48>>1].byte.l) - 150; + + /* CD drive latency */ + if (!cdd.latency) + { + /* Fixes a few games hanging during intro because they expect data to be read with some delay */ + /* Radical Rex needs at least one interrupt delay */ + /* Wolf Team games (Anet Futatabi, Cobra Command, Road Avenger & Time Gal) need at least 6 interrupts delay */ + /* Space Adventure Cobra (2nd morgue scene) needs at least 13 interrupts delay (incl. seek time, so 6 is OK) */ + /* Jeopardy & ESPN Sunday Night NFL are picky about this as well: 10 interrupts delay (+ seek time) seems OK */ + cdd.latency = 10; + } + + /* CD drive seek time */ + /* max. seek time = 1.5 s = 1.5 x 75 = 112.5 CDD interrupts (rounded to 120) for 270000 sectors max on disc. */ + /* Note: This is only a rough approximation since, on real hardware, seek time is much likely not linear and */ + /* latency much larger than above value, but this model works fine for Sonic CD (track 26 playback needs to */ + /* be enough delayed to start in sync with intro sequence, as compared with real hardware recording). */ + if (lba > cdd.lba) + { + cdd.latency += (((lba - cdd.lba) * 120) / 270000); + } + else + { + cdd.latency += (((cdd.lba - lba) * 120) / 270000); + } + + /* update current LBA */ + cdd.lba = lba; + + /* get track index */ + while ((cdd.toc.tracks[index].end <= lba) && (index < cdd.toc.last)) index++; + +#ifdef USE_LIBTREMOR +#ifdef DISABLE_MANY_OGG_OPEN_FILES + /* check if track index has changed */ + if (index != cdd.index) + { + /* close previous track VORBIS file structure to save memory */ + if (cdd.toc.tracks[cdd.index].vf.datasource) + { + ogg_free(cdd.index); + } + + /* open current track VORBIS file */ + if (cdd.toc.tracks[index].vf.seekable) + { + ov_open(cdd.toc.tracks[index].fd,&cdd.toc.tracks[index].vf,0,0); + } + } +#endif +#endif + + /* update current track index */ + cdd.index = index; + + /* stay within track limits when seeking files */ + if (lba < cdd.toc.tracks[index].start) + { + lba = cdd.toc.tracks[index].start; + } + + /* seek to current block */ + if (!index) + { + /* DATA track */ + pm_seek(cdd.toc.tracks[0].fd, lba * cdd.sectorSize, SEEK_SET); + } +#ifdef USE_LIBTREMOR + else if (cdd.toc.tracks[index].vf.seekable) + { + /* VORBIS AUDIO track */ + ov_pcm_seek(&cdd.toc.tracks[index].vf, (lba - cdd.toc.tracks[index].start) * 588 - cdd.toc.tracks[index].offset); + } +#endif +#if 0 + else if (cdd.toc.tracks[index].fd) + { + /* PCM AUDIO track */ + fseek(cdd.toc.tracks[index].fd, (lba * 2352) - cdd.toc.tracks[index].offset, SEEK_SET); + } +#else + else + { + cdd_change_track(index, lba); + } +#endif + + /* no audio track playing (yet) */ + Pico_mcd->regs[0x36>>1].byte.h = 0x01; + + /* update status */ + cdd.status = CD_PLAY; + + /* return track index in RS2-RS3 */ + set_reg16(0x38, (CD_PLAY << 8) | 0x02); + set_reg16(0x3a, (cdd.index < cdd.toc.last) ? lut_BCD_16[index + 1] : 0x0A0A); + set_reg16(0x3c, 0x0000); + set_reg16(0x3e, 0x0000); + Pico_mcd->regs[0x40>>1].byte.h = 0x00; + break; + } + + case 0x04: /* Seek */ + { + /* reset track index */ + int index = 0; + + /* new LBA position */ + int lba = ((Pico_mcd->regs[0x44>>1].byte.h * 10 + Pico_mcd->regs[0x44>>1].byte.l) * 60 + + (Pico_mcd->regs[0x46>>1].byte.h * 10 + Pico_mcd->regs[0x46>>1].byte.l)) * 75 + + (Pico_mcd->regs[0x48>>1].byte.h * 10 + Pico_mcd->regs[0x48>>1].byte.l) - 150; + + /* CD drive seek time */ + /* We are using similar linear model as above, although still not exactly accurate, */ + /* it works fine for Switch/Panic! intro (Switch needs at least 30 interrupts while */ + /* seeking from 00:05:63 to 24:03:19, Panic! when seeking from 00:05:60 to 24:06:07) */ + if (lba > cdd.lba) + { + cdd.latency = ((lba - cdd.lba) * 120) / 270000; + } + else + { + cdd.latency = ((cdd.lba - lba) * 120) / 270000; + } + + /* update current LBA */ + cdd.lba = lba; + + /* get current track index */ + while ((cdd.toc.tracks[index].end <= lba) && (index < cdd.toc.last)) index++; + +#ifdef USE_LIBTREMOR +#ifdef DISABLE_MANY_OGG_OPEN_FILES + /* check if track index has changed */ + if (index != cdd.index) + { + /* close previous track VORBIS file structure to save memory */ + if (cdd.toc.tracks[cdd.index].vf.datasource) + { + ogg_free(cdd.index); + } + + /* open current track VORBIS file */ + if (cdd.toc.tracks[index].vf.seekable) + { + ov_open(cdd.toc.tracks[index].fd,&cdd.toc.tracks[index].vf,0,0); + } + } +#endif +#endif + + /* update current track index */ + cdd.index = index; + + /* stay within track limits */ + if (lba < cdd.toc.tracks[index].start) + { + lba = cdd.toc.tracks[index].start; + } + + /* seek to current block */ + if (!index) + { + /* DATA track */ + pm_seek(cdd.toc.tracks[0].fd, lba * cdd.sectorSize, SEEK_SET); + } +#ifdef USE_LIBTREMOR + else if (cdd.toc.tracks[index].vf.seekable) + { + /* VORBIS AUDIO track */ + ov_pcm_seek(&cdd.toc.tracks[index].vf, (lba - cdd.toc.tracks[index].start) * 588 - cdd.toc.tracks[index].offset); + } +#endif +#if 0 + else if (cdd.toc.tracks[index].fd) + { + /* PCM AUDIO track */ + fseek(cdd.toc.tracks[index].fd, (lba * 2352) - cdd.toc.tracks[index].offset, SEEK_SET); + } +#endif + + /* no audio track playing */ + Pico_mcd->regs[0x36>>1].byte.h = 0x01; + + /* update status */ + cdd.status = CD_SEEK; + + /* unknown RS1-RS8 values (returning 0xF in RS1 invalidates track infos in RS2-RS8 and fixes Final Fight CD intro when seek time is emulated) */ + set_reg16(0x38, (CD_SEEK << 8) | 0x0f); + set_reg16(0x3a, 0x0000); + set_reg16(0x3c, 0x0000); + set_reg16(0x3e, 0x0000); + set_reg16(0x40, ~(CD_SEEK + 0xf) & 0x0f); + return; + } + + case 0x06: /* Pause */ + { + /* no audio track playing */ + Pico_mcd->regs[0x36>>1].byte.h = 0x01; + + /* update status (RS1-RS8 unchanged) */ + cdd.status = Pico_mcd->regs[0x38>>1].byte.h = CD_READY; + break; + } + + case 0x07: /* Resume */ + { + /* update status (RS1-RS8 unchanged) */ + cdd.status = Pico_mcd->regs[0x38>>1].byte.h = CD_PLAY; + break; + } + + case 0x08: /* Forward Scan */ + { + /* reset scanning direction / speed */ + cdd.scanOffset = CD_SCAN_SPEED; + + /* update status (RS1-RS8 unchanged) */ + cdd.status = Pico_mcd->regs[0x38>>1].byte.h = CD_SCAN; + break; + } + + case 0x09: /* Rewind Scan */ + { + /* reset scanning direction / speed */ + cdd.scanOffset = -CD_SCAN_SPEED; + + /* update status (RS1-RS8 unchanged) */ + cdd.status = Pico_mcd->regs[0x38>>1].byte.h = CD_SCAN; + break; + } + + + case 0x0a: /* N-Track Jump Control ? (usually sent before CD_SEEK or CD_PLAY commands) */ + { + /* TC3 corresponds to seek direction (00=forward, FF=reverse) */ + /* TC4-TC7 are related to seek length (4x4 bits i.e parameter values are between -65535 and +65535) */ + /* Maybe related to number of auto-sequenced track jumps/moves for CD DSP (cf. CXD2500BQ datasheet) */ + /* also see US Patent nr. 5222054 for a detailled description of seeking operation using Track Jump */ + + /* no audio track playing */ + Pico_mcd->regs[0x36>>1].byte.h = 0x01; + + /* update status (RS1-RS8 unchanged) */ + cdd.status = Pico_mcd->regs[0x38>>1].byte.h = CD_READY; + break; + } + + case 0x0c: /* Close Tray */ + { + /* no audio track playing */ + Pico_mcd->regs[0x36>>1].byte.h = 0x01; + + /* update status */ + cdd.status = cdd.loaded ? CD_STOP : NO_DISC; + + /* RS1-RS8 ignored, expects 0x0 ("no disc" ?) in RS0 once */ + set_reg16(0x38, 0x0000); + set_reg16(0x3a, 0x0000); + set_reg16(0x3c, 0x0000); + set_reg16(0x3e, 0x0000); + set_reg16(0x40, 0x000f); + + if (PicoMCDcloseTray) + PicoMCDcloseTray(); + return; + } + + case 0x0d: /* Open Tray */ + { + /* no audio track playing */ + Pico_mcd->regs[0x36>>1].byte.h = 0x01; + + /* update status (RS1-RS8 ignored) */ + cdd.status = CD_OPEN; + set_reg16(0x38, CD_OPEN << 8); + set_reg16(0x3a, 0x0000); + set_reg16(0x3c, 0x0000); + set_reg16(0x3e, 0x0000); + set_reg16(0x40, ~CD_OPEN & 0x0f); + + if (PicoMCDopenTray) + PicoMCDopenTray(); + return; + } + + default: /* Unknown command */ +#ifdef LOG_CDD + error("Unknown CDD Command !!!\n"); +#endif + Pico_mcd->regs[0x38>>1].byte.h = cdd.status; + break; + } + + /* only compute checksum when necessary */ + Pico_mcd->regs[0x40>>1].byte.l = + ~(Pico_mcd->regs[0x38>>1].byte.h + Pico_mcd->regs[0x38>>1].byte.l + + Pico_mcd->regs[0x3a>>1].byte.h + Pico_mcd->regs[0x3a>>1].byte.l + + Pico_mcd->regs[0x3c>>1].byte.h + Pico_mcd->regs[0x3c>>1].byte.l + + Pico_mcd->regs[0x3e>>1].byte.h + Pico_mcd->regs[0x3e>>1].byte.l + + Pico_mcd->regs[0x40>>1].byte.h) & 0x0f; +} + +// vim:shiftwidth=2:ts=2:expandtab diff --git a/pico/cd/cdd.h b/pico/cd/cdd.h new file mode 100644 index 00000000..4789cdb4 --- /dev/null +++ b/pico/cd/cdd.h @@ -0,0 +1,98 @@ +/*************************************************************************************** + * Genesis Plus + * CD drive processor & CD-DA fader + * + * Copyright (C) 2012-2013 Eke-Eke (Genesis Plus GX) + * + * Redistribution and use of this code or any derivative works are permitted + * provided that the following conditions are met: + * + * - Redistributions may not be sold, nor may they be used in a commercial + * product or activity. + * + * - Redistributions that are modified from the original source must include the + * complete source code, including the source code for all components used by a + * binary built from the modified sources. However, as a special exception, the + * source code distributed need not include anything that is normally distributed + * (in either source or binary form) with the major components (compiler, kernel, + * and so on) of the operating system on which the executable runs, unless that + * component itself accompanies the executable. + * + * - Redistributions must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other + * materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + ****************************************************************************************/ +#ifndef _HW_CDD_ +#define _HW_CDD_ + +#ifdef USE_LIBTREMOR +#include "tremor/ivorbisfile.h" +#endif + +/* CDD status */ +#define NO_DISC 0x00 +#define CD_PLAY 0x01 +#define CD_SEEK 0x02 +#define CD_SCAN 0x03 +#define CD_READY 0x04 +#define CD_OPEN 0x05 /* similar to 0x0E ? */ +#define CD_STOP 0x09 +#define CD_END 0x0C + +/* CD blocks scanning speed */ +#define CD_SCAN_SPEED 30 + +#define CD_MAX_TRACKS 100 + +/* CD track */ +typedef struct +{ + void *fd; +#ifdef USE_LIBTREMOR + OggVorbis_File vf; +#endif + int offset; + int start; + int end; +} track_t; + +/* CD TOC */ +typedef struct +{ + int end; + int last; + track_t tracks[CD_MAX_TRACKS]; +} toc_t; + +/* CDD hardware */ +typedef struct +{ + uint32 cycles; + uint32 latency; + int loaded; + int index; + int lba; + int scanOffset; + int volume; + uint8 status; + uint16 sectorSize; + toc_t toc; + int16 audio[2]; +} cdd_t; + +extern cdd_t cdd; + +#endif diff --git a/pico/cd/cue.c b/pico/cd/cue.c index a038ccf7..78c6b0f5 100644 --- a/pico/cd/cue.c +++ b/pico/cd/cue.c @@ -242,6 +242,8 @@ file_ok: data->tracks[count].type = CT_MP3; else if (strcasecmp(ext, "wav") == 0) data->tracks[count].type = CT_WAV; + else if (strcasecmp(ext, "bin") == 0) + data->tracks[count].type = CT_BIN; else { elprintf(EL_STATUS, "unhandled audio format: \"%s\"", data->tracks[count].fname); diff --git a/pico/cd/mcd.c b/pico/cd/mcd.c index 33553613..cad03e9f 100644 --- a/pico/cd/mcd.c +++ b/pico/cd/mcd.c @@ -22,13 +22,10 @@ void (*PicoMCDcloseTray)(void) = NULL; PICO_INTERNAL void PicoInitMCD(void) { SekInitS68k(); - Init_CD_Driver(); - gfx_init(); } PICO_INTERNAL void PicoExitMCD(void) { - End_CD_Driver(); } PICO_INTERNAL void PicoPowerMCD(void) @@ -45,9 +42,11 @@ PICO_INTERNAL void PicoPowerMCD(void) memset(Pico_mcd->s68k_regs, 0, sizeof(Pico_mcd->s68k_regs)); memset(&Pico_mcd->pcm, 0, sizeof(Pico_mcd->pcm)); memset(&Pico_mcd->m, 0, sizeof(Pico_mcd->m)); + Pico_mcd->s68k_regs[0x38+9] = 0x0f; // default checksum cdc_init(); - Reset_CD(); + cdd_reset(); + gfx_init(); // cold reset state (tested) Pico_mcd->m.state_flags = PCD_ST_S68K_RST; @@ -62,7 +61,7 @@ void pcd_soft_reset(void) Pico_mcd->m.s68k_pend_ints = 0; cdc_reset(); - CDD_Reset(); + cdd_reset(); #ifdef _ASM_CD_MEMORY_C //PicoMemResetCDdecode(1); // don't have to call this in 2M mode #endif @@ -135,7 +134,20 @@ unsigned int pcd_cycles_m68k_to_s68k(unsigned int c) static void pcd_cdc_event(unsigned int now) { // 75Hz CDC update - Check_CD_Command(); + cdd_update(); + + /* check if a new CDD command has been processed */ + if (!(Pico_mcd->s68k_regs[0x4b] & 0xf0)) + { + /* reset CDD command wait flag */ + Pico_mcd->s68k_regs[0x4b] = 0xf0; + + if (Pico_mcd->s68k_regs[0x33] & PCDS_IEN4) { + elprintf(EL_INTS|EL_CD, "s68k: cdd irq 4"); + SekInterruptS68k(4); + } + } + pcd_event_schedule(now, PCD_EVENT_CDC, 12500000/75); } diff --git a/pico/cd/memory.c b/pico/cd/memory.c index d3a2927e..7d838fc9 100644 --- a/pico/cd/memory.c +++ b/pico/cd/memory.c @@ -407,8 +407,11 @@ void s68k_reg_write8(u32 a, u32 d) elprintf(EL_CDREGS|EL_CD, "s68k irq mask: %02x", d); d &= 0x7e; if ((d ^ Pico_mcd->s68k_regs[0x33]) & d & PCDS_IEN4) { - if (Pico_mcd->s68k_regs[0x37] & 4) - CDD_Export_Status(); + // XXX: emulate pending irq instead? + if (Pico_mcd->s68k_regs[0x37] & 4) { + elprintf(EL_INTS, "cdd export irq 4 (unmask)"); + SekInterruptS68k(4); + } } break; case 0x34: // fader @@ -418,15 +421,21 @@ void s68k_reg_write8(u32 a, u32 d) return; // d/m bit is unsetable case 0x37: { u32 d_old = Pico_mcd->s68k_regs[0x37]; - Pico_mcd->s68k_regs[0x37] = d&7; + Pico_mcd->s68k_regs[0x37] = d & 7; if ((d&4) && !(d_old&4)) { - CDD_Export_Status(); + // ?? + pcd_event_schedule_s68k(PCD_EVENT_CDC, 12500000/75); + + if (Pico_mcd->s68k_regs[0x33] & PCDS_IEN4) { + elprintf(EL_INTS, "cdd export irq 4"); + SekInterruptS68k(4); + } } return; } case 0x4b: Pico_mcd->s68k_regs[a] = (u8) d; - CDD_Import_Command(); + cdd_process(); return; case 0x58: return; diff --git a/pico/media.c b/pico/media.c index 904693f9..3ba45324 100644 --- a/pico/media.c +++ b/pico/media.c @@ -198,7 +198,7 @@ enum media_type_e PicoLoadMedia(const char *filename, { const char *rom_fname = filename; enum media_type_e media_type; - cd_img_type cd_img_type = CIT_NOT_CD; + enum cd_img_type cd_img_type = CIT_NOT_CD; unsigned char *rom_data = NULL; unsigned int rom_size = 0; pm_file *rom = NULL; @@ -210,7 +210,7 @@ enum media_type_e PicoLoadMedia(const char *filename, goto out; if ((PicoAHW & PAHW_MCD) && Pico_mcd != NULL) - Stop_CD(); + cdd_unload(); PicoCartUnload(); PicoAHW = 0; PicoQuirks = 0; @@ -291,7 +291,7 @@ enum media_type_e PicoLoadMedia(const char *filename, // insert CD if it was detected if (cd_img_type != CIT_NOT_CD) { - ret = Insert_CD(filename, cd_img_type); + ret = cdd_load(filename, cd_img_type); if (ret != 0) { PicoCartUnload(); media_type = PM_BAD_CD; diff --git a/pico/pico.h b/pico/pico.h index 41ba7fb0..d5416727 100644 --- a/pico/pico.h +++ b/pico/pico.h @@ -102,7 +102,6 @@ void PicoGetInternal(pint_t which, pint_ret_t *ret); // cd/mcd.c extern void (*PicoMCDopenTray)(void); extern void (*PicoMCDcloseTray)(void); -extern int PicoCDBuffers; // pico.c #define XPCM_BUFFER_SIZE (320+160) @@ -128,14 +127,9 @@ void *PicoTmpStateSave(void); void PicoTmpStateRestore(void *data); extern void (*PicoStateProgressCB)(const char *str); -// cd/buffering.c -void PicoCDBufferInit(void); -void PicoCDBufferFree(void); -void PicoCDBufferFlush(void); - -// cd/cd_sys.c -int Insert_CD(const char *cdimg_name, int type); -int Stop_CD(void); // unloads CD, returns 1 if there was cd loaded +// cd/cdd.c +int cdd_load(const char *filename, int type); +int cdd_unload(void); // Cart.c typedef enum @@ -244,6 +238,15 @@ enum media_type_e { PM_MARK3, PM_CD, }; + +enum cd_img_type +{ + CIT_NOT_CD = 0, + CIT_ISO, + CIT_BIN, + CIT_CUE +}; + enum media_type_e PicoLoadMedia(const char *filename, const char *carthw_cfg_fname, const char *(*get_bios_filename)(int *region, const char *cd_fname), diff --git a/pico/pico_int.h b/pico/pico_int.h index d4d6d7de..76c4812e 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -374,9 +374,6 @@ struct PicoSRAM }; // MCD -#include "cd/cd_sys.h" -#include "cd/LC89510.h" - #define PCM_MIXBUF_LEN ((12500000 / 384) / 50 + 1) struct mcd_pcm @@ -448,9 +445,8 @@ typedef struct unsigned char bram[0x2000]; // 110200: 8K struct mcd_misc m; // 112200: misc struct mcd_pcm pcm; // 112240: - _scd_toc TOC; // not to be saved - CDD cdd; - _scd scd; + void *cdda_stream; + int cdda_type; int pcm_mixbuf[PCM_MIXBUF_LEN * 2]; int pcm_mixpos; char pcm_mixbuf_dirty; @@ -628,6 +624,19 @@ void cdc_reg_w(unsigned char data); unsigned char cdc_reg_r(void); unsigned short cdc_host_r(void); +// cd/cdd.c +void cdd_reset(void); +int cdd_context_save(unsigned char *state); +int cdd_context_load(unsigned char *state); +int cdd_context_load_old(unsigned char *state); +void cdd_read_data(unsigned char *dst); +void cdd_read_audio(unsigned int samples); +void cdd_update(void); +void cdd_process(void); + +// cd/cd_image.c +int load_cd_image(const char *cd_img_name, int *type); + // cd/gfx.c void gfx_init(void); void gfx_start(unsigned int base); @@ -727,13 +736,14 @@ PICO_INTERNAL int SekInterruptS68k(int irq); void SekInterruptClearS68k(int irq); // sound/sound.c -PICO_INTERNAL void cdda_start_play(); extern short cdda_out_buffer[2*1152]; extern int PsndLen_exc_cnt; extern int PsndLen_exc_add; extern int timer_a_next_oflow, timer_a_step; // in z80 cycles extern int timer_b_next_oflow, timer_b_step; +void cdda_start_play(int lba_base, int lba_offset, int lb_len); + void ym2612_sync_timers(int z80_cycles, int mode_old, int mode_new); void ym2612_pack_state(void); void ym2612_unpack_state(void); @@ -786,9 +796,6 @@ PICO_INTERNAL void z80_exit(void); PICO_INTERNAL_ASM void wram_2M_to_1M(unsigned char *m); PICO_INTERNAL_ASM void wram_1M_to_2M(unsigned char *m); -// cd/buffering.c -PICO_INTERNAL void PicoCDBufferRead(void *dest, int lba); - // sound/sound.c PICO_INTERNAL void PsndReset(void); PICO_INTERNAL void PsndDoDAC(int line_to); diff --git a/pico/sound/sound.c b/pico/sound/sound.c index b12afc3c..69c1be03 100644 --- a/pico/sound/sound.c +++ b/pico/sound/sound.c @@ -132,8 +132,6 @@ void PsndRerate(int preserve_state) // feed it back it's own registers, just like after loading state memcpy(YM2612GetRegs(), state, 0x204); ym2612_unpack_state(); - if ((PicoAHW & PAHW_MCD) && !(Pico_mcd->s68k_regs[0x36] & 1) && (Pico_mcd->scd.Status_CDC & 1)) - cdda_start_play(); } if (preserve_state) memcpy(state, sn76496_regs, 28*4); // remember old state @@ -191,23 +189,19 @@ PICO_INTERNAL void PsndDoDAC(int line_to) } // cdda -static pm_file *cdda_stream = NULL; - static void cdda_raw_update(int *buffer, int length) { int ret, cdda_bytes, mult = 1; - if (cdda_stream == NULL) - return; cdda_bytes = length*4; if (PsndRate <= 22050 + 100) mult = 2; if (PsndRate < 22050 - 100) mult = 4; cdda_bytes *= mult; - ret = pm_read(cdda_out_buffer, cdda_bytes, cdda_stream); + ret = pm_read(cdda_out_buffer, cdda_bytes, Pico_mcd->cdda_stream); if (ret < cdda_bytes) { memset((char *)cdda_out_buffer + ret, 0, cdda_bytes - ret); - cdda_stream = NULL; + Pico_mcd->cdda_stream = NULL; return; } @@ -219,51 +213,24 @@ static void cdda_raw_update(int *buffer, int length) } } -PICO_INTERNAL void cdda_start_play(void) +void cdda_start_play(int lba_base, int lba_offset, int lb_len) { - int lba_offset, index, lba_length, i; - - elprintf(EL_STATUS, "cdda play track #%i", Pico_mcd->scd.Cur_Track); - - index = Pico_mcd->scd.Cur_Track - 1; - - lba_offset = Pico_mcd->scd.Cur_LBA - Track_to_LBA(index + 1); - if (lba_offset < 0) lba_offset = 0; - lba_offset += Pico_mcd->TOC.Tracks[index].Offset; - - // find the actual file for this track - for (i = index; i > 0; i--) - if (Pico_mcd->TOC.Tracks[i].F != NULL) break; - - if (Pico_mcd->TOC.Tracks[i].F == NULL) { - elprintf(EL_STATUS|EL_ANOMALY, "no track?!"); - return; - } - - if (Pico_mcd->TOC.Tracks[i].ftype == CT_MP3) + if (Pico_mcd->cdda_type == CT_MP3) { int pos1024 = 0; - lba_length = Pico_mcd->TOC.Tracks[i].Length; - for (i++; i < Pico_mcd->TOC.Last_Track; i++) { - if (Pico_mcd->TOC.Tracks[i].F != NULL) break; - lba_length += Pico_mcd->TOC.Tracks[i].Length; - } - if (lba_offset) - pos1024 = lba_offset * 1024 / lba_length; + pos1024 = lba_offset * 1024 / lb_len; - mp3_start_play(Pico_mcd->TOC.Tracks[index].F, pos1024); + mp3_start_play(Pico_mcd->cdda_stream, pos1024); return; } - cdda_stream = Pico_mcd->TOC.Tracks[i].F; - PicoCDBufferFlush(); // buffering relies on fp not being touched - pm_seek(cdda_stream, lba_offset * 2352, SEEK_SET); - if (Pico_mcd->TOC.Tracks[i].ftype == CT_WAV) + pm_seek(Pico_mcd->cdda_stream, (lba_base + lba_offset) * 2352, SEEK_SET); + if (Pico_mcd->cdda_type == CT_WAV) { // skip headers, assume it's 44kHz stereo uncompressed - pm_seek(cdda_stream, 44, SEEK_CUR); + pm_seek(Pico_mcd->cdda_stream, 44, SEEK_CUR); } } @@ -330,13 +297,12 @@ static int PsndRender(int offset, int length) // CD: CDDA audio // CD mode, cdda enabled, not data track, CDC is reading - if ((PicoAHW & PAHW_MCD) && (PicoOpt & POPT_EN_MCD_CDDA) && - !(Pico_mcd->s68k_regs[0x36] & 1) && (Pico_mcd->scd.Status_CDC & 1)) + if ((PicoAHW & PAHW_MCD) && (PicoOpt & POPT_EN_MCD_CDDA) + && Pico_mcd->cdda_stream != NULL + && !(Pico_mcd->s68k_regs[0x36] & 1)) { // note: only 44, 22 and 11 kHz supported, with forced stereo - int index = Pico_mcd->scd.Cur_Track - 1; - - if (Pico_mcd->TOC.Tracks[index].ftype == CT_MP3) + if (Pico_mcd->cdda_type == CT_MP3) mp3_update(buf32, length, stereo); else cdda_raw_update(buf32, length); diff --git a/pico/state.c b/pico/state.c index 94cefade..089957eb 100644 --- a/pico/state.c +++ b/pico/state.c @@ -149,8 +149,8 @@ typedef enum { CHUNK_GA_REGS, CHUNK_PCM, CHUNK_CDC, // old - CHUNK_CDD, // 20 - CHUNK_SCD, + CHUNK_CDD, // 20 old + CHUNK_SCD, // old CHUNK_RC, // old CHUNK_MISC_CD, // @@ -177,6 +177,7 @@ typedef enum { CHUNK_CD_EVT = 50, CHUNK_CD_GFX, CHUNK_CD_CDC, + CHUNK_CD_CDD, // CHUNK_DEFAULT_COUNT, CHUNK_CARTHW_ = CHUNK_CARTHW, // 64 (defined in PicoInt) @@ -317,8 +318,6 @@ static int state_save(void *file) CHECKED_WRITE_BUFF(CHUNK_BRAM, Pico_mcd->bram); CHECKED_WRITE_BUFF(CHUNK_GA_REGS, Pico_mcd->s68k_regs); // GA regs, not CPU regs CHECKED_WRITE_BUFF(CHUNK_PCM, Pico_mcd->pcm); - CHECKED_WRITE_BUFF(CHUNK_CDD, Pico_mcd->cdd); - CHECKED_WRITE_BUFF(CHUNK_SCD, Pico_mcd->scd); CHECKED_WRITE_BUFF(CHUNK_MISC_CD, Pico_mcd->m); memset(buff, 0, 0x40); memcpy(buff, pcd_event_times, sizeof(pcd_event_times)); @@ -328,6 +327,8 @@ static int state_save(void *file) CHECKED_WRITE(CHUNK_CD_GFX, len, buf2); len = cdc_context_save(buf2); CHECKED_WRITE(CHUNK_CD_CDC, len, buf2); + len = cdd_context_save(buf2); + CHECKED_WRITE(CHUNK_CD_CDD, len, buf2); if (Pico_mcd->s68k_regs[3] & 4) // convert back wram_2M_to_1M(Pico_mcd->word_ram2M); @@ -498,8 +499,6 @@ static int state_load(void *file) case CHUNK_BRAM: CHECKED_READ_BUFF(Pico_mcd->bram); break; case CHUNK_GA_REGS: CHECKED_READ_BUFF(Pico_mcd->s68k_regs); break; case CHUNK_PCM: CHECKED_READ_BUFF(Pico_mcd->pcm); break; - case CHUNK_CDD: CHECKED_READ_BUFF(Pico_mcd->cdd); break; - case CHUNK_SCD: CHECKED_READ_BUFF(Pico_mcd->scd); break; case CHUNK_MISC_CD: CHECKED_READ_BUFF(Pico_mcd->m); break; case CHUNK_CD_EVT: @@ -517,12 +516,22 @@ static int state_load(void *file) len_check = cdc_context_load(buf); break; + case CHUNK_CD_CDD: + CHECKED_READ_LIM(buf); + len_check = cdd_context_load(buf); + break; + // old, to be removed: case CHUNK_CDC: CHECKED_READ_LIM(buf); cdc_context_load_old(buf); break; + case CHUNK_SCD: + CHECKED_READ_LIM(buf); + cdd_context_load_old(buf); + break; + // 32x stuff #ifndef NO_32X case CHUNK_MSH2: @@ -596,9 +605,6 @@ readend: { SekCycleAimS68k = SekCycleCntS68k; pcd_state_loaded(); - - if (!(Pico_mcd->s68k_regs[0x36] & 1) && (Pico_mcd->scd.Status_CDC & 1)) - cdda_start_play(); } retval = 0; diff --git a/platform/base_readme.txt b/platform/base_readme.txt index 175bcc1b..d2f7e96c 100644 --- a/platform/base_readme.txt +++ b/platform/base_readme.txt @@ -520,9 +520,6 @@ Homepage: http://www.mame.net/ Eke CD graphics processor and CD controller implementation (from Genesis Plus GX) - -Stephane Dallongeville -Gens, MD/Mega CD/32X emulator. Some Sega CD code is based on this emu. #ifdef PSP people @ ps2dev.org forums / PSPSDK crew @@ -537,6 +534,7 @@ Additional thanks * Charles MacDonald (http://cgfm2.emuviews.com/) for old but still very useful info about genesis hardware. * Steve Snake for all that he has done for Genesis emulation scene. +* Stephane Dallongeville for writing Gens and making it open source. * Tasco Deluxe for his reverse engineering work on SVP and some mappers. * Bart Trzynadlowski for his SSFII and 68000 docs. * Haze for his research (http://haze.mameworld.info). diff --git a/platform/common/common.mak b/platform/common/common.mak index c651bcad..fb59ecf3 100644 --- a/platform/common/common.mak +++ b/platform/common/common.mak @@ -93,9 +93,9 @@ DEFINES += NO_SMS endif # CD SRCS_COMMON += $(R)pico/cd/mcd.c $(R)pico/cd/memory.c $(R)pico/cd/sek.c \ - $(R)pico/cd/cdc.c $(R)pico/cd/cd_sys.c $(R)pico/cd/cd_file.c \ + $(R)pico/cd/cdc.c $(R)pico/cd/cdd.c $(R)pico/cd/cd_image.c \ $(R)pico/cd/cue.c $(R)pico/cd/gfx.c $(R)pico/cd/gfx_dma.c \ - $(R)pico/cd/misc.c $(R)pico/cd/pcm.c $(R)pico/cd/buffering.c + $(R)pico/cd/misc.c $(R)pico/cd/pcm.c # 32X ifneq "$(no_32x)" "1" SRCS_COMMON += $(R)pico/32x/32x.c $(R)pico/32x/memory.c $(R)pico/32x/draw.c \ diff --git a/platform/common/config_file.c b/platform/common/config_file.c index 324c993e..97369cb4 100644 --- a/platform/common/config_file.c +++ b/platform/common/config_file.c @@ -308,11 +308,6 @@ static int custom_read(menu_entry *me, const char *var, const char *val) return 0; return 1; - case MA_CDOPT_READAHEAD: - if (strcasecmp(var, "ReadAhead buffer") != 0) return 0; - PicoCDBuffers = atoi(val) / 2; - return 1; - case MA_32XOPT_MSH2_CYCLES: currentConfig.msh2_khz = atoi(val); Pico32xSetClocks(currentConfig.msh2_khz * 1000, 0); diff --git a/platform/common/emu.c b/platform/common/emu.c index c66c637c..18f63f2c 100644 --- a/platform/common/emu.c +++ b/platform/common/emu.c @@ -530,19 +530,20 @@ out: int emu_swap_cd(const char *fname) { - cd_img_type cd_type; + enum cd_img_type cd_type; int ret = -1; cd_type = PicoCdCheck(fname, NULL); if (cd_type != CIT_NOT_CD) - ret = Insert_CD(fname, cd_type); + ret = cdd_load(fname, cd_type); if (ret != 0) { menu_update_msg("Load failed, invalid CD image?"); return 0; } strncpy(rom_fname_loaded, fname, sizeof(rom_fname_loaded)-1); - rom_fname_loaded[sizeof(rom_fname_loaded)-1] = 0; + rom_fname_loaded[sizeof(rom_fname_loaded) - 1] = 0; + return 1; } @@ -606,7 +607,6 @@ void emu_set_defconfig(void) PsndRate = currentConfig.s_PsndRate; PicoRegionOverride = currentConfig.s_PicoRegion; PicoAutoRgnOrder = currentConfig.s_PicoAutoRgnOrder; - PicoCDBuffers = currentConfig.s_PicoCDBuffers; } int emu_read_config(const char *rom_fname, int no_defaults) @@ -1336,10 +1336,6 @@ void emu_loop(void) PicoLoopPrepare(); - // prepare CD buffer - if (PicoAHW & PAHW_MCD) - PicoCDBufferInit(); - plat_video_loop_prepare(); emu_loop_prep(); pemu_sound_start(); @@ -1501,10 +1497,4 @@ void emu_loop(void) pemu_loop_end(); emu_sound_stop(); - - // pemu_loop_end() might want to do 1 frame for bg image, - // so free CD buffer here - if (PicoAHW & PAHW_MCD) - PicoCDBufferFree(); } - diff --git a/platform/common/menu_pico.c b/platform/common/menu_pico.c index 372fad91..0f04bc41 100644 --- a/platform/common/menu_pico.c +++ b/platform/common/menu_pico.c @@ -395,31 +395,6 @@ static int menu_loop_keyconfig(int id, int keys) // ------------ SCD options menu ------------ -static const char *mgn_cdopt_ra(int id, int *offs) -{ - *offs = -5; - if (PicoCDBuffers <= 0) - return " OFF"; - sprintf(static_buff, "%5iK", PicoCDBuffers * 2); - return static_buff; -} - -static int mh_cdopt_ra(int id, int keys) -{ - if (keys & PBTN_LEFT) { - PicoCDBuffers >>= 1; - if (PicoCDBuffers < 2) - PicoCDBuffers = 0; - } else { - if (PicoCDBuffers <= 0) - PicoCDBuffers = 1; - PicoCDBuffers <<= 1; - if (PicoCDBuffers > 8*1024) - PicoCDBuffers = 8*1024; // 16M - } - return 0; -} - static const char h_cdleds[] = "Show power/CD LEDs of emulated console"; static const char h_cdda[] = "Play audio tracks from mp3s/wavs/bins"; static const char h_cdpcm[] = "Emulate PCM audio chip for effects/voices/music"; @@ -435,7 +410,6 @@ static menu_entry e_menu_cd_options[] = mee_onoff_h("CD LEDs", MA_CDOPT_LEDS, currentConfig.EmuOpt, EOPT_EN_CD_LEDS, h_cdleds), mee_onoff_h("CDDA audio", MA_CDOPT_CDDA, PicoOpt, POPT_EN_MCD_CDDA, h_cdda), mee_onoff_h("PCM audio", MA_CDOPT_PCM, PicoOpt, POPT_EN_MCD_PCM, h_cdpcm), - mee_cust ("ReadAhead buffer", MA_CDOPT_READAHEAD, mh_cdopt_ra, mgn_cdopt_ra), mee_onoff_h("SaveRAM cart", MA_CDOPT_SAVERAM, PicoOpt, POPT_EN_MCD_RAMCART, h_srcart), mee_onoff_h("Scale/Rot. fx (slow)", MA_CDOPT_SCALEROT_CHIP, PicoOpt, POPT_EN_MCD_GFX, h_scfx), mee_end, @@ -1045,7 +1019,7 @@ static int main_menu_handler(int id, int keys) break; case MA_MAIN_CHANGE_CD: if (PicoAHW & PAHW_MCD) { - if (!Stop_CD()) + if (!cdd_unload()) menu_loop_tray(); return 1; } diff --git a/platform/libretro.c b/platform/libretro.c index f59cf672..ee15d9c8 100644 --- a/platform/libretro.c +++ b/platform/libretro.c @@ -500,7 +500,7 @@ static unsigned int disk_get_image_index(void) static bool disk_set_image_index(unsigned int index) { - cd_img_type cd_type; + enum cd_img_type cd_type; int ret; if (index >= sizeof(disks) / sizeof(disks[0])) @@ -521,7 +521,7 @@ static bool disk_set_image_index(unsigned int index) ret = -1; cd_type = PicoCdCheck(disks[index].fname, NULL); if (cd_type != CIT_NOT_CD) - ret = Insert_CD(disks[index].fname, cd_type); + ret = cdd_load(disks[index].fname, cd_type); if (ret != 0) { lprintf("Load failed, invalid CD image?\n"); return 0; @@ -882,7 +882,6 @@ void retro_init(void) #endif PsndRate = 44100; PicoAutoRgnOrder = 0x184; // US, EU, JP - PicoCDBuffers = 0; vout_width = 320; vout_height = 240; From 7b3ddc11dc21025f2a64116d664c745c07c54984 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 7 Oct 2013 01:39:53 +0300 Subject: [PATCH 0009/1110] cd: hacks.. --- pico/cd/cdd.c | 15 +++++++++++++-- pico/cd/mcd.c | 7 ++++--- pico/cd/memory.c | 17 ++++++++++++++++- platform/common/menu_pico.c | 2 ++ 4 files changed, 35 insertions(+), 6 deletions(-) diff --git a/pico/cd/cdd.c b/pico/cd/cdd.c index 58a60536..2e37b60e 100644 --- a/pico/cd/cdd.c +++ b/pico/cd/cdd.c @@ -163,8 +163,8 @@ void cdd_reset(void) cdd.lba = 0; /* reset status */ - cdd.status = cdd.loaded ? CD_STOP : NO_DISC; - + cdd.status = NO_DISC; + /* reset CD-DA fader (full volume) */ cdd.volume = 0x400; @@ -426,6 +426,10 @@ int cdd_load(const char *filename, int type) /* CD loaded */ cdd.loaded = 1; + + /* disc not scanned yet */ + cdd.status = NO_DISC; + return 0; } @@ -473,6 +477,9 @@ int cdd_unload(void) /* CD unloaded */ cdd.loaded = 0; + + if (cdd.status != CD_OPEN) + cdd.status = NO_DISC; } /* reset TOC */ @@ -927,6 +934,9 @@ void cdd_process(void) case 0x02: /* Read TOC */ { + if (cdd.status == NO_DISC) + cdd.status = cdd.loaded ? CD_STOP : NO_DISC; + /* Infos automatically retrieved by CDD processor from Q-Channel */ /* commands 0x00-0x02 (current block) and 0x03-0x05 (Lead-In) */ switch (Pico_mcd->regs[0x44>>1].byte.l) @@ -1287,6 +1297,7 @@ void cdd_process(void) if (PicoMCDcloseTray) PicoMCDcloseTray(); + return; } diff --git a/pico/cd/mcd.c b/pico/cd/mcd.c index cad03e9f..a20b01dc 100644 --- a/pico/cd/mcd.c +++ b/pico/cd/mcd.c @@ -42,10 +42,8 @@ PICO_INTERNAL void PicoPowerMCD(void) memset(Pico_mcd->s68k_regs, 0, sizeof(Pico_mcd->s68k_regs)); memset(&Pico_mcd->pcm, 0, sizeof(Pico_mcd->pcm)); memset(&Pico_mcd->m, 0, sizeof(Pico_mcd->m)); - Pico_mcd->s68k_regs[0x38+9] = 0x0f; // default checksum cdc_init(); - cdd_reset(); gfx_init(); // cold reset state (tested) @@ -57,7 +55,7 @@ PICO_INTERNAL void PicoPowerMCD(void) void pcd_soft_reset(void) { - // Reset_CD(); // breaks Fahrenheit CD swap + elprintf(EL_CD, "cd: soft reset"); Pico_mcd->m.s68k_pend_ints = 0; cdc_reset(); @@ -66,6 +64,9 @@ void pcd_soft_reset(void) //PicoMemResetCDdecode(1); // don't have to call this in 2M mode #endif + memset(&Pico_mcd->s68k_regs[0x38], 0, 9); + Pico_mcd->s68k_regs[0x38+9] = 0x0f; // default checksum + pcd_event_schedule_s68k(PCD_EVENT_CDC, 12500000/75); // TODO: test if register state/timers change diff --git a/pico/cd/memory.c b/pico/cd/memory.c index 7d838fc9..05510788 100644 --- a/pico/cd/memory.c +++ b/pico/cd/memory.c @@ -434,8 +434,23 @@ void s68k_reg_write8(u32 a, u32 d) return; } case 0x4b: - Pico_mcd->s68k_regs[a] = (u8) d; + Pico_mcd->s68k_regs[a] = 0; // (u8) d; ? cdd_process(); + { + static const char *nm[] = + { "stat", "stop", "read_toc", "play", + "seek", "???", "pause", "resume", + "ff", "fr", "tjump", "???", + "close","open", "???", "???" }; + u8 *c = &Pico_mcd->s68k_regs[0x42]; + u8 *s = &Pico_mcd->s68k_regs[0x38]; + elprintf(EL_CD, + "CDD command: %02x %02x %02x %02x %02x %02x %02x %02x %12s", + c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], nm[c[0] & 0x0f]); + elprintf(EL_CD, + "CDD status: %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x", + s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[8], s[9]); + } return; case 0x58: return; diff --git a/platform/common/menu_pico.c b/platform/common/menu_pico.c index 0f04bc41..306326bf 100644 --- a/platform/common/menu_pico.c +++ b/platform/common/menu_pico.c @@ -1019,6 +1019,8 @@ static int main_menu_handler(int id, int keys) break; case MA_MAIN_CHANGE_CD: if (PicoAHW & PAHW_MCD) { + // if cd is loaded, cdd_unload() triggers eject and + // returns 1, else we'll select and load new CD here if (!cdd_unload()) menu_loop_tray(); return 1; From 6901d0e45dbf77671d86cb9bf9af98c486db97c8 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 9 Oct 2013 02:26:52 +0300 Subject: [PATCH 0010/1110] fix yet another sync issue.. --- pico/cd/mcd.c | 35 +++++++++++++++++++++++++++++++++-- pico/cd/memory.c | 6 ++++++ pico/pico_int.h | 29 +++++++++++++++-------------- 3 files changed, 54 insertions(+), 16 deletions(-) diff --git a/pico/cd/mcd.c b/pico/cd/mcd.c index a20b01dc..687dfc3c 100644 --- a/pico/cd/mcd.c +++ b/pico/cd/mcd.c @@ -90,7 +90,32 @@ PICO_INTERNAL int PicoResetMCD(void) return 0; } -static __inline void SekRunS68k(unsigned int to) +static void SekRunM68kOnce(void) +{ + int cyc_do; + pevt_log_m68k_o(EVT_RUN_START); + + if ((cyc_do = SekCycleAim - SekCycleCnt) > 0) { + SekCycleCnt += cyc_do; + +#if defined(EMU_C68K) + PicoCpuCM68k.cycles = cyc_do; + CycloneRun(&PicoCpuCM68k); + SekCycleCnt -= PicoCpuCM68k.cycles; +#elif defined(EMU_M68K) + SekCycleCnt += m68k_execute(cyc_do) - cyc_do; +#elif defined(EMU_F68K) + SekCycleCnt += fm68k_emulate(cyc_do, 0) - cyc_do; +#endif + } + + SekCyclesLeft = 0; + + SekTrace(0); + pevt_log_m68k_o(EVT_RUN_END); +} + +static void SekRunS68k(unsigned int to) { int cyc_do; @@ -305,7 +330,13 @@ void pcd_run_cpus_normal(int m68k_cycles) SekCycleCnt = SekCycleAim - (s68k_left * 40220 >> 16); } - SekSyncM68k(); + while (CYCLES_GT(SekCycleAim, SekCycleCnt)) { + SekRunM68kOnce(); + if (Pico_mcd->m.need_sync) { + Pico_mcd->m.need_sync = 0; + pcd_sync_s68k(SekCycleCnt, 0); + } + } } void pcd_run_cpus_lockstep(int m68k_cycles) diff --git a/pico/cd/memory.c b/pico/cd/memory.c index 05510788..acf29c64 100644 --- a/pico/cd/memory.c +++ b/pico/cd/memory.c @@ -67,6 +67,12 @@ static void remap_word_ram(u32 r3); void m68k_comm_check(u32 a) { pcd_sync_s68k(SekCyclesDone(), 0); + if (a >= 0x0e && !Pico_mcd->m.need_sync) { + // there are cases when slave updates comm and only switches RAM + // over after that (mcd1b), so there must be a resync.. + SekEndRun(64); + Pico_mcd->m.need_sync = 1; + } if (SekNotPolling || a != Pico_mcd->m.m68k_poll_a) { Pico_mcd->m.m68k_poll_a = a; Pico_mcd->m.m68k_poll_cnt = 0; diff --git a/pico/pico_int.h b/pico/pico_int.h index 76c4812e..48d52372 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -396,20 +396,21 @@ struct mcd_pcm struct mcd_misc { - unsigned short hint_vector; - unsigned char busreq; // not s68k_regs[1] - unsigned char s68k_pend_ints; - unsigned int state_flags; // 04 - unsigned int stopwatch_base_c; - unsigned short m68k_poll_a; - unsigned short m68k_poll_cnt; - unsigned short s68k_poll_a; - unsigned short s68k_poll_cnt; - unsigned int s68k_poll_clk; - unsigned char bcram_reg; // 18: battery-backed RAM cart register - unsigned char dmna_ret_2m; - unsigned short pad3; - int pad4[9]; + unsigned short hint_vector; + unsigned char busreq; // not s68k_regs[1] + unsigned char s68k_pend_ints; + unsigned int state_flags; // 04 + unsigned int stopwatch_base_c; + unsigned short m68k_poll_a; + unsigned short m68k_poll_cnt; + unsigned short s68k_poll_a; + unsigned short s68k_poll_cnt; + unsigned int s68k_poll_clk; + unsigned char bcram_reg; // 18: battery-backed RAM cart register + unsigned char dmna_ret_2m; + unsigned char need_sync; + unsigned char pad3; + int pad4[9]; }; typedef struct From ae632fd100beb3bd004e077a272cdbdf81108c54 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Thu, 26 Sep 2013 10:09:38 -0300 Subject: [PATCH 0011/1110] configure: Deduce the path to sdl-config from the compiler --- configure | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/configure b/configure index 097a2764..453d5309 100755 --- a/configure +++ b/configure @@ -51,6 +51,7 @@ optimize_arm920="no" CC="${CC-${CROSS_COMPILE}gcc}" CXX="${CXX-${CROSS_COMPILE}g++}" AS="${AS-${CROSS_COMPILE}as}" +SDL_CONFIG="`$CC --print-sysroot`/usr/bin/sdl-config" MAIN_LDLIBS="$LDLIBS -lm" config_mak="config.mak" @@ -308,7 +309,7 @@ if [ "x$sound_drivers" = "x" ]; then sound_drivers="$sound_drivers alsa" MAIN_LDLIBS="-lasound $MAIN_LDLIBS" fi - if [ "$need_sdl" = "yes" ] || check_sdl `sdl-config --cflags --libs`; then + if [ "$need_sdl" = "yes" ] || check_sdl `$SDL_CONFIG --cflags --libs`; then sound_drivers="$sound_drivers sdl" need_sdl="yes" fi @@ -323,11 +324,11 @@ else fi if [ "$need_sdl" = "yes" ]; then - which sdl-config > /dev/null || \ + [ -x "$SDL_CONFIG" ] || \ fail "sdl-config is missing; please install libsdl (libsdl1.2-dev)" - CFLAGS="$CFLAGS `sdl-config --cflags`" - MAIN_LDLIBS="`sdl-config --libs` $MAIN_LDLIBS" - check_sdl `sdl-config --libs` || fail "please install libsdl (libsdl1.2-dev)" + CFLAGS="$CFLAGS `$SDL_CONFIG --cflags`" + MAIN_LDLIBS="`$SDL_CONFIG --libs` $MAIN_LDLIBS" + check_sdl `$SDL_CONFIG --libs` || fail "please install libsdl (libsdl1.2-dev)" fi cat > $TMPC < Date: Sun, 22 Sep 2013 07:59:38 -0300 Subject: [PATCH 0012/1110] Menu: Use function plat_get_skin_dir to locate bg image --- platform/common/menu_pico.c | 7 +++++-- platform/libpicofe | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/platform/common/menu_pico.c b/platform/common/menu_pico.c index 306326bf..32a3fdbf 100644 --- a/platform/common/menu_pico.c +++ b/platform/common/menu_pico.c @@ -119,11 +119,14 @@ static void menu_enter(int is_rom_loaded) } else { + int pos; char buff[256]; + pos = plat_get_skin_dir(buff, 256); + strcpy(buff + pos, "background.png"); // should really only happen once, on startup.. - emu_make_path(buff, "skin/background.png", sizeof(buff)); - if (readpng(g_menubg_ptr, buff, READPNG_BG, g_menuscreen_w, g_menuscreen_h) < 0) + if (readpng(g_menubg_ptr, buff, READPNG_BG, + g_menuscreen_w, g_menuscreen_h) < 0) memset(g_menubg_ptr, 0, g_menuscreen_w * g_menuscreen_h * 2); } diff --git a/platform/libpicofe b/platform/libpicofe index 39014486..c52e6628 160000 --- a/platform/libpicofe +++ b/platform/libpicofe @@ -1 +1 @@ -Subproject commit 39014486f9e50110d23dece007ce4c0ed90d15b1 +Subproject commit c52e6628cdf9c53e9143e903ab793bf59987a0ea From eb7ce29e8dcfe0835a0b0193e99ca76f4ed6dcd1 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Mon, 7 Oct 2013 17:14:09 +0200 Subject: [PATCH 0013/1110] Add a couple of fixes to allow double buffering to work --- platform/common/emu.c | 2 ++ platform/common/plat_sdl.c | 14 ++++++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/platform/common/emu.c b/platform/common/emu.c index 18f63f2c..12d8c095 100644 --- a/platform/common/emu.c +++ b/platform/common/emu.c @@ -1379,6 +1379,8 @@ void emu_loop(void) { notice_msg_time = 0; plat_status_msg_clear(); + plat_video_flip(); + plat_status_msg_clear(); /* Do it again in case of double buffering */ notice_msg = NULL; } else { diff --git a/platform/common/plat_sdl.c b/platform/common/plat_sdl.c index 600af45a..fc7b200e 100644 --- a/platform/common/plat_sdl.c +++ b/platform/common/plat_sdl.c @@ -125,9 +125,11 @@ void plat_video_flip(void) gl_flip(shadow_fb, g_screen_width, g_screen_height); } else { - // XXX: no locking, but should be fine with SDL_SWSURFACE? + if (SDL_MUSTLOCK(plat_sdl_screen)) + SDL_UnlockSurface(plat_sdl_screen); SDL_Flip(plat_sdl_screen); g_screen_ptr = plat_sdl_screen->pixels; + PicoDrawSetOutBuf(g_screen_ptr, g_screen_width * 2); } } @@ -147,7 +149,8 @@ void plat_video_menu_begin(void) g_menuscreen_ptr = shadow_fb; } else { - SDL_LockSurface(plat_sdl_screen); + if (SDL_MUSTLOCK(plat_sdl_screen)) + SDL_LockSurface(plat_sdl_screen); g_menuscreen_ptr = plat_sdl_screen->pixels; } } @@ -169,7 +172,8 @@ void plat_video_menu_end(void) gl_flip(g_menuscreen_ptr, g_menuscreen_w, g_menuscreen_h); } else { - SDL_UnlockSurface(plat_sdl_screen); + if (SDL_MUSTLOCK(plat_sdl_screen)) + SDL_UnlockSurface(plat_sdl_screen); SDL_Flip(plat_sdl_screen); } g_menuscreen_ptr = NULL; @@ -188,9 +192,11 @@ void plat_video_loop_prepare(void) g_screen_ptr = shadow_fb; } else { - SDL_LockSurface(plat_sdl_screen); + if (SDL_MUSTLOCK(plat_sdl_screen)) + SDL_LockSurface(plat_sdl_screen); g_screen_ptr = plat_sdl_screen->pixels; } + PicoDrawSetOutBuf(g_screen_ptr, g_screen_width * 2); } void plat_early_init(void) From 4e3551a5f617a424143aca227a2ce89cfb7d8640 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Mon, 7 Oct 2013 19:35:46 +0200 Subject: [PATCH 0014/1110] Make the platform code provide the key mapping notaz: fix pandora build --- platform/common/plat_sdl.c | 39 ++++++++++++++++++++++++++++++++++++-- platform/gp2x/plat.c | 32 ++++++++++++++++++++++++++++++- platform/libpicofe | 2 +- platform/pandora/plat.c | 32 ++++++++++++++++++++++++++++++- 4 files changed, 100 insertions(+), 5 deletions(-) diff --git a/platform/common/plat_sdl.c b/platform/common/plat_sdl.c index fc7b200e..63039d44 100644 --- a/platform/common/plat_sdl.c +++ b/platform/common/plat_sdl.c @@ -19,7 +19,7 @@ static void *shadow_fb; -static const struct in_default_bind in_sdl_defbinds[] = { +const struct in_default_bind in_sdl_defbinds[] __attribute__((weak)) = { { SDLK_UP, IN_BINDTYPE_PLAYER12, GBTN_UP }, { SDLK_DOWN, IN_BINDTYPE_PLAYER12, GBTN_DOWN }, { SDLK_LEFT, IN_BINDTYPE_PLAYER12, GBTN_LEFT }, @@ -45,6 +45,41 @@ static const struct in_default_bind in_sdl_defbinds[] = { { 0, 0, 0 } }; +const struct menu_keymap in_sdl_key_map[] __attribute__((weak)) = +{ + { SDLK_UP, PBTN_UP }, + { SDLK_DOWN, PBTN_DOWN }, + { SDLK_LEFT, PBTN_LEFT }, + { SDLK_RIGHT, PBTN_RIGHT }, + { SDLK_RETURN, PBTN_MOK }, + { SDLK_ESCAPE, PBTN_MBACK }, + { SDLK_SEMICOLON, PBTN_MA2 }, + { SDLK_QUOTE, PBTN_MA3 }, + { SDLK_LEFTBRACKET, PBTN_L }, + { SDLK_RIGHTBRACKET, PBTN_R }, +}; + +const struct menu_keymap in_sdl_joy_map[] __attribute__((weak)) = +{ + { SDLK_UP, PBTN_UP }, + { SDLK_DOWN, PBTN_DOWN }, + { SDLK_LEFT, PBTN_LEFT }, + { SDLK_RIGHT, PBTN_RIGHT }, + /* joystick */ + { SDLK_WORLD_0, PBTN_MOK }, + { SDLK_WORLD_1, PBTN_MBACK }, + { SDLK_WORLD_2, PBTN_MA2 }, + { SDLK_WORLD_3, PBTN_MA3 }, +}; + +static const struct in_pdata in_sdl_platform_data = { + .defbinds = in_sdl_defbinds, + .key_map = in_sdl_key_map, + .kmap_size = sizeof(in_sdl_key_map) / sizeof(in_sdl_key_map[0]), + .joy_map = in_sdl_joy_map, + .jmap_size = sizeof(in_sdl_joy_map) / sizeof(in_sdl_joy_map[0]), +}; + /* YUV stuff */ static int yuv_ry[32], yuv_gy[32], yuv_by[32]; static unsigned char yuv_u[32 * 2], yuv_v[32 * 2]; @@ -241,7 +276,7 @@ void plat_init(void) g_screen_height = 240; g_screen_ptr = shadow_fb; - in_sdl_init(in_sdl_defbinds, plat_sdl_event_handler); + in_sdl_init(&in_sdl_platform_data, plat_sdl_event_handler); in_probe(); bgr_to_uyvy_init(); diff --git a/platform/gp2x/plat.c b/platform/gp2x/plat.c index e5bf18a3..e7f8f730 100644 --- a/platform/gp2x/plat.c +++ b/platform/gp2x/plat.c @@ -71,6 +71,36 @@ static struct in_default_bind in_gp2x_defbinds[] = { 0, 0, 0 } }; +static const struct menu_keymap key_pbtn_map[] = +{ + { KEY_UP, PBTN_UP }, + { KEY_DOWN, PBTN_DOWN }, + { KEY_LEFT, PBTN_LEFT }, + { KEY_RIGHT, PBTN_RIGHT }, + /* Caanoo */ + { BTN_THUMB2, PBTN_MOK }, + { BTN_THUMB, PBTN_MBACK }, + { BTN_TRIGGER, PBTN_MA2 }, + { BTN_TOP, PBTN_MA3 }, + { BTN_BASE, PBTN_MENU }, + { BTN_TOP2, PBTN_L }, + { BTN_PINKIE, PBTN_R }, + /* "normal" keyboards */ + { KEY_ENTER, PBTN_MOK }, + { KEY_ESC, PBTN_MBACK }, + { KEY_SEMICOLON, PBTN_MA2 }, + { KEY_APOSTROPHE, PBTN_MA3 }, + { KEY_BACKSLASH, PBTN_MENU }, + { KEY_LEFTBRACE, PBTN_L }, + { KEY_RIGHTBRACE, PBTN_R }, +}; + +static const struct in_evdev_pdata gp2x_evdev_pdata = { + .defbinds = in_gp2x_defbinds, + .key_map = key_pbtn_map, + .kmap_size = sizeof(key_pbtn_map) / sizeof(key_pbtn_map[0]), +}; + void gp2x_video_changemode(int bpp, int is_pal) { gp2x_video_changemode_ll(bpp, is_pal); @@ -183,7 +213,7 @@ void plat_init(void) flip_after_sync = 1; gp2x_menu_init(); - in_evdev_init(in_evdev_defbinds); + in_evdev_init(&gp2x_evdev_pdata); in_gp2x_init(in_gp2x_defbinds); in_probe(); plat_target_setup_input(); diff --git a/platform/libpicofe b/platform/libpicofe index c52e6628..c19e28f6 160000 --- a/platform/libpicofe +++ b/platform/libpicofe @@ -1 +1 @@ -Subproject commit c52e6628cdf9c53e9143e903ab793bf59987a0ea +Subproject commit c19e28f62660cdaed26698234cff9c084517b34c diff --git a/platform/pandora/plat.c b/platform/pandora/plat.c index f7d68792..f82d7027 100644 --- a/platform/pandora/plat.c +++ b/platform/pandora/plat.c @@ -93,6 +93,36 @@ static struct in_default_bind in_evdev_defbinds[] = { 0, 0, 0 } }; +static const struct menu_keymap key_pbtn_map[] = +{ + { KEY_UP, PBTN_UP }, + { KEY_DOWN, PBTN_DOWN }, + { KEY_LEFT, PBTN_LEFT }, + { KEY_RIGHT, PBTN_RIGHT }, + /* Pandora */ + { KEY_END, PBTN_MOK }, + { KEY_PAGEDOWN, PBTN_MBACK }, + { KEY_HOME, PBTN_MA2 }, + { KEY_PAGEUP, PBTN_MA3 }, + { KEY_LEFTCTRL, PBTN_MENU }, + { KEY_RIGHTSHIFT, PBTN_L }, + { KEY_RIGHTCTRL, PBTN_R }, + /* "normal" keyboards */ + { KEY_ENTER, PBTN_MOK }, + { KEY_ESC, PBTN_MBACK }, + { KEY_SEMICOLON, PBTN_MA2 }, + { KEY_APOSTROPHE, PBTN_MA3 }, + { KEY_BACKSLASH, PBTN_MENU }, + { KEY_LEFTBRACE, PBTN_L }, + { KEY_RIGHTBRACE, PBTN_R }, +}; + +static const struct in_pdata pandora_evdev_pdata = { + .defbinds = in_evdev_defbinds, + .key_map = key_pbtn_map, + .kmap_size = sizeof(key_pbtn_map) / sizeof(key_pbtn_map[0]), +}; + void pemu_prep_defconfig(void) { defaultConfig.EmuOpt |= EOPT_VSYNC|EOPT_16BPP; @@ -500,7 +530,7 @@ void plat_init(void) // default ROM path strcpy(rom_fname_loaded, "/media"); - in_evdev_init(in_evdev_defbinds); + in_evdev_init(&pandora_evdev_pdata); in_probe(); plat_target_setup_input(); From d4bea61c8f1e5fb1a68c401610bd424c052ef636 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Mon, 7 Oct 2013 21:06:54 +0200 Subject: [PATCH 0015/1110] Add support for the OpenDingux platform --- .gitignore | 3 + Makefile | 17 +++++- configure | 11 +++- platform/common/plat_sdl.c | 3 + platform/libpicofe | 2 +- platform/opendingux/data/default.gcw0.desktop | 9 +++ platform/opendingux/data/megadrive.png | Bin 0 -> 1524 bytes platform/opendingux/data/skin/background.png | Bin 0 -> 16170 bytes platform/opendingux/data/skin/font.png | Bin 0 -> 11264 bytes platform/opendingux/data/skin/readme.txt | 7 +++ platform/opendingux/data/skin/skin.txt | 3 + platform/opendingux/inputmap.c | 55 ++++++++++++++++++ 12 files changed, 105 insertions(+), 5 deletions(-) create mode 100644 platform/opendingux/data/default.gcw0.desktop create mode 100644 platform/opendingux/data/megadrive.png create mode 100644 platform/opendingux/data/skin/background.png create mode 100644 platform/opendingux/data/skin/font.png create mode 100644 platform/opendingux/data/skin/readme.txt create mode 100644 platform/opendingux/data/skin/skin.txt create mode 100644 platform/opendingux/inputmap.c diff --git a/.gitignore b/.gitignore index 9f71eb92..b7ef852d 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,6 @@ mds/ cfg/ libs/ obj/ +.opk_data +PicoDrive +PicoDrive.opk diff --git a/Makefile b/Makefile index 67ba86f1..0df793e8 100644 --- a/Makefile +++ b/Makefile @@ -55,7 +55,21 @@ asm_cdpico = 0 asm_cdmemory = 0 endif -# frontend +ifeq "$(PLATFORM)" "opendingux" +opk: $(TARGET).opk + +$(TARGET).opk: $(TARGET) + $(RM) -rf .opk_data + cp -r platform/opendingux/data .opk_data + cp $< .opk_data/PicoDrive + $(STRIP) .opk_data/PicoDrive + mksquashfs .opk_data $@ -all-root -noappend -no-exports -no-xattrs + +OBJS += platform/opendingux/inputmap.o + +# OpenDingux is a generic platform, really. +PLATFORM := generic +endif ifeq "$(PLATFORM)" "generic" OBJS += platform/linux/emu.o platform/linux/blit.o # FIXME OBJS += platform/common/plat_sdl.o @@ -156,6 +170,7 @@ target_: $(TARGET) clean: $(RM) $(TARGET) $(OBJS) + $(RM) -r .opk_data $(TARGET): $(OBJS) $(CC) -o $@ $(CFLAGS) $^ $(LDFLAGS) $(LDLIBS) diff --git a/configure b/configure index 453d5309..29b45e1f 100755 --- a/configure +++ b/configure @@ -31,7 +31,7 @@ check_define() # setting options to "yes" or "no" will make that choice default, # "" means "autodetect". -platform_list="generic pandora gp2x" +platform_list="generic pandora gp2x opendingux" platform="generic" sound_driver_list="oss alsa sdl" sound_drivers="" @@ -51,6 +51,7 @@ optimize_arm920="no" CC="${CC-${CROSS_COMPILE}gcc}" CXX="${CXX-${CROSS_COMPILE}g++}" AS="${AS-${CROSS_COMPILE}as}" +STRIP="${STRIP-${CROSS_COMPILE}strip}" SDL_CONFIG="`$CC --print-sysroot`/usr/bin/sdl-config" MAIN_LDLIBS="$LDLIBS -lm" config_mak="config.mak" @@ -68,6 +69,9 @@ set_platform() case "$platform" in generic) ;; + opendingux) + sound_drivers="sdl" + ;; pandora) sound_drivers="oss alsa" optimize_cortexa8="yes" @@ -111,7 +115,7 @@ if [ "$show_help" = "yes" ]; then echo " --sound-drivers=LIST sound output drivers [guessed]" echo " available: $sound_driver_list" echo "influential environment variables:" - echo " CROSS_COMPILE CC CXX AS CFLAGS ASFLAGS LDFLAGS LDLIBS" + echo " CROSS_COMPILE CC CXX AS STRIP CFLAGS ASFLAGS LDFLAGS LDLIBS" exit 1 fi @@ -225,7 +229,7 @@ arm*) esac case "$platform" in -generic) +generic | opendingux) need_sdl="yes" ;; esac @@ -361,6 +365,7 @@ echo >> $config_mak echo "CC = $CC" >> $config_mak echo "CXX = $CXX" >> $config_mak echo "AS = $AS" >> $config_mak +echo "STRIP = $STRIP" >> $config_mak echo "CFLAGS += $CFLAGS" >> $config_mak echo "ASFLAGS += $ASFLAGS" >> $config_mak echo "LDFLAGS += $LDFLAGS" >> $config_mak diff --git a/platform/common/plat_sdl.c b/platform/common/plat_sdl.c index 63039d44..4404691a 100644 --- a/platform/common/plat_sdl.c +++ b/platform/common/plat_sdl.c @@ -72,12 +72,15 @@ const struct menu_keymap in_sdl_joy_map[] __attribute__((weak)) = { SDLK_WORLD_3, PBTN_MA3 }, }; +extern const char * const in_sdl_key_names[] __attribute__((weak)); + static const struct in_pdata in_sdl_platform_data = { .defbinds = in_sdl_defbinds, .key_map = in_sdl_key_map, .kmap_size = sizeof(in_sdl_key_map) / sizeof(in_sdl_key_map[0]), .joy_map = in_sdl_joy_map, .jmap_size = sizeof(in_sdl_joy_map) / sizeof(in_sdl_joy_map[0]), + .key_names = in_sdl_key_names, }; /* YUV stuff */ diff --git a/platform/libpicofe b/platform/libpicofe index c19e28f6..d685ce46 160000 --- a/platform/libpicofe +++ b/platform/libpicofe @@ -1 +1 @@ -Subproject commit c19e28f62660cdaed26698234cff9c084517b34c +Subproject commit d685ce4625e9f3b25b0852d31960cb429da06a9d diff --git a/platform/opendingux/data/default.gcw0.desktop b/platform/opendingux/data/default.gcw0.desktop new file mode 100644 index 00000000..80458bd8 --- /dev/null +++ b/platform/opendingux/data/default.gcw0.desktop @@ -0,0 +1,9 @@ +[Desktop Entry] +Name=Picodrive +Comment=A megadrive/genesis emulator +Exec=PicoDrive +Terminal=false +Type=Application +StartupNotify=true +Icon=megadrive +Categories=emulators; diff --git a/platform/opendingux/data/megadrive.png b/platform/opendingux/data/megadrive.png new file mode 100644 index 0000000000000000000000000000000000000000..e1bc5f8d4edadb7c42935eb2d79ec5da1ce586f0 GIT binary patch literal 1524 zcmV3d)L)W7Pp4_halYjr6s z^upbT56t&)&iQ`$!uOmxL%`6tRzYG8uxwAel^tPN#!q zSv-02gqJU0^7{2_-oAay>FFtdZMTWXEk?>ge& zl1`_Yo}Nb6b^dba4qIDW?CtH@5-(o7;OOXxt4oQ*yZ+ zx~|)M9UL4C1Y}vJ*=*8mHrd?Vbi`*g8VzP=XOU$YS(X_eA14q95C{YqA0Nl>_dD?U zeB8KkgVE7Z_V@R(EQ@lvjIQf6n@zgi?m%FAdiorVM#F)kD9q2#qiGs`zaL4G5Cq}B zB1sY>BO`qH@PS+|N2OAs(P*%@w>J2>_?(RS! z6bhZgFbtbgse~ws$g)hKP(af(DwPVgS`A&-(RH2WcCjpr=g*(hYPFDMnNq35_V)HbAQ%juqgJcgoF$)|n?q3)R8^%^ zDp4+%9n~-l1Jg8VHk+)kuXE>9o9^!JVp$fAMgvKb$mMcuZEXz%5{U$wrlDyX%gf7- zSkp8_Q6v_N5sgNXB#BHWgQ}_&i$yd|vnyp72FuIKtgNiCzP`@t>M9!>8?@VPc6WEV zefzdu#dfL$+Zr!>?BoZMS zjgrsjX|-Adfti^Z3WdU@gu`K5A`*$Pu&{t6N%(v|e)+kFk`d|db@=7y9^JhTKA(?j zwaVh+BC;%#NFKXH8WE5|3l zqU-uVz~}RkN~MU!VqYYcN|DRuNG6k{QmOw5wOY;D1z$Q1&zPK?Boc{SI_Gm@v6%hm z`=vM#2p|Z;Kwx-ym}}Rr*@QwNve~R->}xnn8jHnnyWNiXY|ilT@P7op`McnE@iO{D a3;Y|Ox_VreIEYFB0000g$mJ literal 0 HcmV?d00001 diff --git a/platform/opendingux/data/skin/background.png b/platform/opendingux/data/skin/background.png new file mode 100644 index 0000000000000000000000000000000000000000..5dc2bb6653f81e839f0303fb194a86da1d444984 GIT binary patch literal 16170 zcmYkDbyQT}*T(7Y9BE|0K?WQek%l3Z8kCeS0ZHkQF6j~^B$XJt5u~L>z@bN486+fz z&i8(Qf4!^)Gi%Lo@44shv(J7$`?>FQwbdxeSjcd2a40p@RrGLha20^}2PAmF-&>uB zGQbO|ySlL_4h}ipf4{gtmx8ZxaJXXTP81{pif^lnuxu zMKBC)2y_3or7BRDH=6_A4|$ZCEtjZKhhi=c{l3EzHl)f-gZsqWYbG$BnRb7lIOg19 zC*&XeLINiu9c$oACr#>ILn5_P`qk)-P)&cmOkTMq#}G6Uvklcl$2{2jVbC!ViU@3_ zZ-$+5rX7L-j(AZJD(ZGQ(oT}JQTQw{-*@`VlP#?1asR&ootf8D)cDa>=f1j`v%L+w z^{kS1N3VYS=VSa43>L>8|485nceWwywS$Reu_p`XiYq>#avn`-eYMP)$4n5`r40AT$1Lna;?@HPd0NrccE;!2swK4C zpJZdubVm9!hkj5EI`j6L*XS0!Z%9$^f|2)hT zlnof*C^6&A-ln%AEj3$8GD;ganv2u~HJtsHoBkZ(A6=O^+sv|l>g#8h8Z*FY2&_PS z8fln|Ki%<3MbKwM9-!zjwoNHlDKHsHSBkv!8Q}W*?O+-0LfNr~{xurS zq~c+ZJgFqkmwbl$%}cHoY=e>tChq4rVRpv(baev+lQR^DXnQ>{n zmHx#a``ylFZ_LEE?bG2huaQeq1sSy!DTl7x@jr`s(s3w91&tV(I4#GZz>C*qrlCE3)qM(D^(FzW*0dB`4Dte&QF zV}2g0k@n4l4UPasn_Y2X`aSX*plT(wrz>mj!wtIuoy#L@O9Yu~(J_`)T0y9ND$Vhgv8YMFf{Z-lh=8Sp&3)OIKN8&inj3cX_?W-*ygoD#o9B zv$+O2ij`3!c_UQ#tP$E_QZ2XB{e%P>x9%*FZg=ldKg<-$)VcXfYjsYXVk4qOljQL2 zibr0D1WP3{oLYtiFS0ATpSzfwqQB=$wWCQIICq>cuJ~o`nKe#KC6Xn1NVk-t-x6eB zZxoc$&q?|n8}zXg!O+>&b-KI9Wt6F}Khprcw4qf>;i847x}Z6Kb^B7n7ChMIyw?y} zVA6J?ArD~c;@16_dEPp?f3RDaav+#do;yJz<%ZW*{u^g9r+oPmD}3tBm)!-lcpX$= zOr-qTXCc#$uKmFLtkS($%b2j+OoXO%355FGd0pw5WlZ(nMXKtj0vY-v#JY* zM9e;%L=v@?ro8r6R9(AB*f7%9Z)+|z#L7?`1m-gn^xDXDLJ;9Pj(|dsJyQXJ_mq{W18qcFBB8W+L#^ zhb-FaLn*%o-As0Tah(h6bZ4x)GKE3fr-=mFyGEX3e*5~lgPE%ao#6lKmvR9O-8ul?*D>us%tMV4I`16 zcJG|P=WE)ltQp%3QbN&fEIrS6(2szhc2 z$Ui7CM;6h(q866|sGom6Go03+R;=?K9-lzmuH&Fx8aV4I8i_HS5lAnWr1GCi0~=>D zMso+ffqh86RE8s}=5`=)(fvimkol*&WAzKp98mw)l0d?{KG~V0^ z6ePGp%5BTD?%i)9=egH@SdV5x(yl!Ty)(%Ca}o4UDrrNCnkUYe{@my=(AZFo)hKhI zVfW3KubZ5mR9t?J0jl|!qzxcy!V9vkK(L~G1Vv8-rM1GmIW}9hkM!jI&rGg)2RV#M z*agJfxx2Av0$2=|upZagBOC-nD|!##uVwgVv4^7nL+KZjY7Px{rNwbeSfL3pEjuTd z1ij>8HpWaMncLHZE-8DHsX`L-D#L+2*^8KSjug*m$$prZ*%{q#^4XnpZ~=EDBrc(}g`%IP&$O@f zAAw(ffhJ#j&GQCe9!;vX>-<~Ygs03IPm|J>luo*u_ZC{bL_z3*&_?t z8chK^ie#4~ze(HzR~)s>LDj}w@<;=&(}61g61F?7Fbuf;54fedVSkt~1f5W=iTlO%m-`?a%E zR&R0TZcVgp5qYK1B6-gXLQ=BG>S?c-)>wmsvZ|P%^tP}b1L3C7NgyLUEawwggJX9`$*=`R!K)#n^;};c zGzdA9q$shw&@#>>DN7!*`Cah0{YS&DedCuWbH}cDLmpp^pgFaXYPc0;#+mG10cHgL z`SOEM62Gaxerhr~z;@bcLnICMvUe^~ulHhm5LZlG;>r$3a1B->A37zIaLY`T_!eON zUD}InI-qgZ(jV{lMDJNPn=CQM_|msQ$0DBG>xaGAzCW=tS^!YpLPPSI(n$SOs3_8 zZaN#DD_aozRd45b2A&pMvb&ItbgW0Hims>}{38(CKJ=?XK0FnNBZ6-(n@y^NR^-o@ z?2TJYs)3@m1xF(d7_fDH8+n1jepJ^(7*~?SXsb)d9}+gDSg;0A=#{egGk;A+>;}li z6&pdB)US2*8NbW|X)*JUM1B-l!=RF}1!w!0dF|X1=3qJe7DhpBPLUq*HSprm*Cw+z z<^lT$lMv1okk*XDh`APO)g`+7;e;o;MRuYCwe^P?caL)QC43f4<&2j5AQw8Dd0@Y0 zv4vs3i}t0L(n3Xe<3YzvGNXosC$Kxyj2U(p{!fR^aJ~ed;Ci&Uv`*(y_J7diJB}!G zY&U{(1e;DeDzIo~+BIdDlsblU0R>Bh2!GPXpEvKilD4=E^=A-226i@%r4C~Fy{Xw+ zr8vX6xPGfIFQ9|f6YY%CMAxeQVC8aPV#M^IrqC`ymewUKRO3nm7G*X=eXwMq%*q@j&9clCbw*q@_+-*0Px>R)A7!7LvmI981R?bIwP zNv0Umhxb!VPzzPVNWIVJo_pKF*)H4k_d$@H^vROAjr2!7)ExF}gby=}B@qxgNrq9 zuht*NewB+u;^vh0s`ErL!0VS>Uy|){sP(qEByFrN0~0He+xhjY*sF8@{4xj`3*(f> zH+BS3Af5$Cw`uz^tIJpX1}KWnQHkUyxZ9^_g>mw;us30F-iYlRcWvO@y%a1;v5q;Q z0k3LaNvm4sOUqNzH{<2Zm(mp(OFF;QCYb#Q)}>tOuV@KyZ!8Xv%Gv&MJisBNx*WT| zx6n|vnV|tLKoK2h+I@VSzNa_86D8Ny%cYzcP*mH(?Y%%QNP;IVxc0}X*(u#OY}0vF z#$@Dl_r}Tc8^Z*)0jkWvu9>Y8sBqq6Yn-oLR$E&;XR>d;%U-SYw&*H!6_oTi>51Ph ze6s6qRV#E;GcpB$`ERp`Vx#k_rea(wla%xV(sXsYpY>(mn9?dqmdslY-y=%Uyj%LvWhtHK%4kXR2qa zW|a4W1~=5yq^V;1b05qfYgdhhT7gUsiRpK^7*pvYZA{-__pVv{yv@$JhOboO?dkbQ z@Vv@ZsjJuA{a~*SpVhZknB@+~aFO`gRwCPz0xKFXMHwC{ksJk5>7`nM7`D>SDq51I z!`e4Op96AZ`Vgq{G)Y%{jM@Jeuu(mwTCPNrZfiW0fM7em&63q1O;%j2GD=dvBf2v=OjW9c#t_AW>P zYm}+fA(1Bx!x8Juj}N5=w|Z*LERuPw&@D*h0qj356|5}9N1?oOb{LUU`qzK?@o-hksf{xU6%YI^)ZRZX6*{}xVAR#uM=)^u zn2UL4+1(@TMeP*fnc;M1Us=2m(e(c9y_M6@w`_Erm3`0`$~#~*)5*)W~RQpf3wph@Nd z57V0?3NCLU7z^W&eC6MqFfw3cZsZXTuIdvJ}weS#j zT_vLsHGpj~Fv9CySjDKqd&IPqVy!^q3vaAI*2BO>#ufj{?=jA7{!5jcyf&D2q@X6xRKBv?@((-xFL2p;p-7{J^?T|S1-5Ui$t~q)dx1nsC!x3^Be+7H_~PzQU|YfN`uZ-WYjun0f)F`Zp_~Kdb*Tw# zgtj;^6-PwY0|!1dNZlN=%lG$~|SCS}x*Fw|^tefW4Y-%OT1g`u;FGlSQ*l#b<)$L-}@_pHT8 zw@2hEceSL?AekxDo%v%&+g-;xh2u|ZChu&GRt)~9*&j-KTY5(wf`mdJ|Ag=qU#}qNsHvPd^%1-TIgDfcv|7n;oGx1R0N|v;DPw-}x!GoUXWkj7W5l`$C z&e#S4swnQr@U1dcOTnN!x`$yPwAI^9b~k2mlqyE~+xYwD_b#wRg4$>$U8S6lIEm%4WrNWc}vUo56ILRqt);7f1% z9iO}cZzA!wmDZ_3tmq=8VX}3~`=d3&IMd1-vnN({b6&#D6IDi~wu4Ga_6N_mXq`|T zCuj%1$mGqZHL$8$&BwqIUXHJ$wwyd-5?ir)amn_`heZ&}W<1$u<(X;wNvg zw9q1>IPK_fQ3kCWhp`{;eRewjX=|+7n+$AG+K#%RKs}%V?Z!u}G>BMh^F6 zl=s*C%wbvy4a*VS706rL1V(rfpE7!yhDGI(I}l^CwFjXdd6)~0HUSD{CsS#b#3PxZ zm*IS5lfjnDB4C07pM10-!SsugsaS$-hti0GoYKcEiRa(QCS<)P?&Hk`k?)I5z4m-y znxHgyOoJ_l=n~_Q+xUYF;jMNmP6~FKAF4|$W}%W3H(Ohu5m8K)@6CL`jhSkRiVoV*%0{RDQj!I{<0 zjGX4&g}4fd)I{a|hmw>JG#`29lmc1Xjg#$e%R@t}-}jPMFX9aCy|`wtS6KH67EY+N zE~ogVv5XNeoI4&@ymb;VkFIC6Fj+jTcT&2M$Cv*2<7I`7apus5heWPf+!M>F0jyH< z!RHAb6oUs;X7)+%t5D6HT)39KMP8P0{g#ahw8Uv*ZK?2dInk=a>q+=pMtzqEdRUT#}ttWF}aop z`wbk)LmP+c9lp;j342OX5tQ_P1?BmfwiV*amSYWr8y+|B)hpIOBgw2Qi;t(11HaM( zbj&@qRL)BC{+btF_JrFM*Fk9EP@mIF)J9UU*PN9hhBP6e<+@D4&%NX&QaPt|n^aS= zBK`iHB2l)YCkpDd1o_7UE%#G-CZ6a#XL18rVOGTfwo3RWbYY#f?R#bzEXMLLsS zr=$c`9R;4K9SjstCaxmc+Xf~gCJI}>%o`Y#?h8vF*KW)YmDTYjJ0z81yvl^c^OxK z-Au$tMhwj;d4#AW=~dtR&vZ3Fj^BKF!5J0oD^^^3;Sm2QuJTPBnY%EVnT4P?9-ynG z@0PfX-|(bdI0X~Cl3x3zBs(DAy=_4_2-l=d7shhle_}R=E=Na%eSNk@Rb^k>1&rCg zM>4T8E8FKSz1UBKQ)Gh7UDIM%L^AO$VR_c5Id4&j^FC!E*$u# zZ9t7+7$>zGCf`gcov+CX^g-7gKk+p$`ma|cW`)3^8o3(pcTjAwcGOFMzAOlT2_af} zq@ljb=L^J^t?B?ukR`gvzUewh&T!?=^+WxD|L*QNqpA9{(J3(n%e_V_$=6_labHC0 zVc11}kITdJ)KJ$eDW+%}KV;=Ua>0xDUo(}px<1j0Ov(`W)3ZdXd4%L@1b_~B?4wL< ziZ9JDj;7869i~D%wYy%@AH@1WV_4I9oK-&cw8(lQ2`iT6_yq&;i3m5%7$RTbN{HHP zp3!}Xo-)w)o)cnVMyV?qrZc-`*j0QN-}7QAhiIk|oTPzr^(Hh#zF2uQSFFb1viLS~ ziuS(#@#(_o?4P#>;_1cFkzNQdXOskmvfr&%-I6P-2H|r7O_>h?c&MOPRUT(Mqbo|h zO|MnIn`C~z0o9^#!o1D89}0egCVEM+#wv2xpp_FZLq z03ZnnnaX?0{_>Du$DAX4sYQij8-@Iypt{Rte2+&G&FP0BSgnlpuKBRd&Wo zG4e?^LmsO{tp>+7p}wIzlKR2YL!DRU`40{b{k|d#<;NNTPJMIg>r0i`G|)#z7-(dl z;m-ze@W1WBb8B9L8DS*QU(0eL=9uk{_MI|$3gjL8Kj`Zfk7qbI9LWE?08Mc^gZ;sD z0*hFg564<7!!x0TUNRi4yYH#fisj7(P>e)_0r|*Lps85}=f6r%_>p5~J*QNa^{g{R##;4v;;G zGL>m>!PkUU`-9pVBgTUNUme z=D$|S-RFg8KrwuF)4x_Y&s%iEuZ0S@T7XfEvOk36Uj_7!=QP|M$>^jJ5d8D$*g0vx zy@tf4_-}hj*X*bwrl-rn=Dk<7Ave#o@K@1FsHUteW>CF|NltFalf*tIy7>m^{R5wl z&a=)XDA~;rXUMPHe?N*USUQU#X<*WDoFfX9Sk!A%P;^9ZsPLv#l4FjROJ5M}`^(Hw zNkx7An0{OWgZ=)OnS@vOSoLp|oQ;{x5#laRLYF;#2!4`@$@-kBhlwh6=WdVU@Vc9K z+LiFs5CjwS><6q(#V>;hmRy;s)blYXZd_?pKzI2?*vF9i!n=zLV?W`6QOf?5$9Ee5 zBBuNqYCV=9Jm>A(VRmz&k@nl4DxKQk_?0?ea~oNZf}}=T0CD7lx}4v4z%C>5h7hkn zuU&uz;X$Kd$rh1$GW!C-5Yr#FZeBTq6coIEbMUs{n6c@nNTws7hAQ*ayc1{91mTD@DyyAUv_!Zd5HzsmQPXc7wVywys7o~Mp zp<|Jek$M)wVaBLxjt$_t83Oo*JSc!=RX#~=#n=iZ&#!Cfa5+>w@6tKM+yq;6pD(r~ z66opck2XYONS$-|j+Dh$c_!Lx$N^PBL+qqW5gQbZ@lO!$hp`ROmXfVYsd58q{wBDa z_rMU)$6%l@uX4T$nI#;bSlUk(HlU9JcHO@Cck1mmV|jNT026CtOjIXwl5;^b4Q8f; z-)(?+DQk8vyH?(uKMN&Fh7Cq`B?Il(OePkz8*zfn5hLV1j@}K?--t@2 zN$ni!Z^nO-wIQVx_}5Qn(1XH_EdQtBwCL@N(CWyIJs$xu_)CgLT6so?S%zNl-EHUX zsCJns8&%Pq4(hnz?i#bV|BrIC;hJKDss+90F7+*G+dm&>9i{~mkRF^ZT(?-DY;7a^ zs7*W~KWLN&R?=wPkYecSv#wqj0){g{(Z?H8z}+V|ySo`eY4g3M+iTsMEd9!=d&LHj zqK2w`_ux}&?x7h#c9}FE`PkCN0`&A0Uh2QsREm8wU}yX=Y0C74^~XICN* z2x407|Bryh`_j26zxc)b_G%%-+bnCl%+}I4Gbf2n=Je`q1VJ>E>4{VK%F3RPk_1Px z+GM`)=GAR*x0~dePVAaHNg&X5f>A`Bh$_-dY6FswQS#=?xbvruB)0(awv5V|rwfu@ zaO2F5C90Nm$DB@c9l(HMURzW>AhP1xS3ud~lD>Cus2-+9az&1Ae)<@7x6-o^GLBup z@>P_D**J@_5Qj~G#u}>Zds;2CGL}^CFfva0%UpG&JO&FpCoSmegUp{|X4MAiBLt7v z4A^ff3LO_6#EPAkA#srxgh0d(0^bQpKS*Perj_>2DYY{moexpoOWYBd#uw3;^X5*c z2v>k`y-HAIp`^BhfyBgL7Z1Dy^5+p7b&N77`UF#gVkfa+h&s3qEWF70K1XQ#{x=?F zOn?6xmseN=6usw@X~&&&a(sMzdAZ2)GZFt8%<0{c%!;!_Ukn~A*$}WSQ={5=4`Ncl zBf;HjE-*48yRU`+0nxuF0kBWJHyEZfe7f$Yn}0q}tYG(=)eeH~kZ1oQGkZ7*HLbmF zr~qr!{k5kQp5*IKz7og%c_OI2dKKJ?hO3;qOAS|*0nV(_C}F-0!4*=$F!?w!E$SHj zxI~(Mh99c064;Z6_I#djB(KQ>`1d_8RL~C;EzFW=DZ>n}Hx-KNRo6lR0Lbym1y-^^ zcTQwfJEurv`2;>^o$Y)J_|MpgwHdxvoCGXw3ortXguh;7{+uuViLA{7J_zqVE06@J z*M&3)Kf?}aj04A+V*P%01J;PF^wZa`oFQX>B`i|}AtU@4`I06@Qt3X;iwqAt+TEUd z&5s9WK;bJ9I=%284{To&QZekS{3-Tv+#dCpVZ-F|COD_G;FB(Ce~M1{wnP6BOc?9h z8b7$St2PyWV5m$EWFHH_Qy*pLS1$JpfvGYuWOw!WU-aU=~AUG)IS{cC-LjwS86lU~bE zD%^4VXj+8wdq}cH$c_Ho$wg2;cm#`W+`qB{9bJh^yc(RhZ{#0qK<_DW-<@^eeGLBO z$fC6uwxOWx&#<#CGr^ju`uh?Hf^2>=2qAC9tbL=S`>($tZWRl)A+SiREq@NZr|BcV z{bWX)ieh%iG1wm{c4y}`kIB@b9~%Mg$qnA5AY=2Je2j}kZVrU|74nz3KRFo@2M*)Lh)_Q?mnH^^DW!Wf`=yMB~DDF|GWbc~3wCj*UIj*Uz*A z)`k4-c?8nfulRP)EujGGL{-O<=1pcC+F!-r3C?qtg0+Z^k-y=Znf3^}wl(Xg^FKYwr2AkohC zm_NFxi{l&>zS-TgrBrOO*>@96jb=^UxVj0}*B60npAiubdYesP**X9#6ZRsHY-Qv= zyj%N{Cr|tnD=q?ug_l)fcf?oQ`{5!j{e_(>^ z;7xV&*wwk&M1u?lEQ#jfVx5k)PQb?CE5Ghb2XcX&i&qTrw$(fVQDlyNwzYgr(cb_q zLseLTrk8;UK_17XgD(KX=RM3X*H-jRaPqRSac1Y+lM+ZDuY$h5fBuHy4=kJfdUNsE zpEr#QhN3N5T-`Yh4P!AAxDK&hye62`x{NY~3S?v4#a_zKTp- ztVT=y%D$VCXLvcN`IRDZ!E5>7vILvYPvMB~7;O5ZXoWjbF)PDRnTbc8u^MUDe;$P> z>N<(B40u2vnes8Rva*Nxff($U-xk!`_b?FQ&ep)8}7UaLA2`>@RW0NHB7hYbSNLuXQg=2inV9qd2cHl~j_18ZcC z{TUl0=NHqR(n%3-3Vd&fsAJ7*B|mCk^s=iPjD!Hb!ikBAX?{Re zihrpAYXCM?g5Q&OmP^d$5Wc$GkUg^npW>$sf{OJxF_5X>KJveA@481#TAv925E%yl zis^h~AkIgS5}UKA^G*>$nJe8dhbiCQ*eIF5#hjdo-w9$0O#Ck9_s@X5xC-mbg}maw zy;-ebi{^H`)LoBZ-&C5`*21N)cHDrcJ~u`&bH82;wDRMmiZQSCjjq!McDR0;R^t34 zD^PIz?F1=Q_2-6^kJ;JtDaQQ!ySwzPEVzR~@tjyQ?c#R;yillQ1W7vEaH2I73mvg>A$F_34^FyGD_cyu#WFll=9obIx#+X%`f-vlM6 zHc=0n-SOsM_s@@EeJ#ay|Cuu=y40~|2RQwHx@|Zr)^*ow46vt784;eQ&GP_O)sM8J zUB9cIei2J6hJ2RO%qis*&Qn!qBo4aCGrBJ(`T2)20c=q2vJ&!Z0$07uRET{jtSv4h zEup@FP4&X+KjDVs1FSXl}(i&&6P^VI{Yyn^R;IfU>A)MUDTGHMAH)QqdRJ8;@WRWFiOnXK)Nq zjw1xD_M1wcUSxrA|ePuD*4F87;|kYt_Uey33w1!9aAup5br z!N@#tUy$fIFl^pa8cnDtGbxZtqD2<{i`%Le|EMkOR-GCqYXMl(zNyKBza1Ek<;Qty z-{av^Z+@W(lQLMN6+2{72&N+sV!5#vf(D=_=1j0dM9{F?g#Q^qoOc z#joRU@OV!?e@IiHS@9onv(-CbWq zkkQ*BK{k1wKQ5M*mYRvnUsh)#kwd2}X&C_dwEW@?z@2#ZqLKZmEymrb$eoLe3z3B} zp`y<}0C%+8VBjyyY7#)+yp>|Ry})pSW|B5^#giR#sHv%uNMLc!K9>)^?va1<#ngH% z{G-9%&mq7fYxN2#Mv}Q&eTEALB^ux`>8v|xFBA+z*m z&o{UR86@V^W9-jQ>{l|)_+paxHJ#=YFp6T~-i9hJlERuVMtsT(wT?M8J0D_%ZW%u} zzJI>5)vbja#KOcG&CQT;KkykTj5Q2=aDnsp+*U;KmdQ3c-3E++J^8mkWJ@OxZUo+7eq^ z9R+gdcck!LJx(Ezwq`yW;|3lgrrpt4H>P$I+FYz2^ z`yT_kE3TM}M7R8xP1EqcPKC20xlr{*lK+ z_!&n%*o*6e35GuBz+2|O1!E{?o#2{b6<<2VSC z*jsY-Z7F0n(bHX+6sF-ct~LOS5lc(@KWOhes>#}dY!D3N7|*RdpkUkB;A#E>IP-Bc zoaUrO0UC@W(iHnlbdakVzPk-%M-n{esRUsyRPgnAJ&^A@%miDfft}Y{=>r~5!0@D~ zQD&6AB=DGk8_>PhNK3YTd=c8focj9*tC&_qYEV#68aU;n_3*}Dopje$@}r1OlF(im z)xXq{9=n1$;sq16QJuTdqzN(7ONDJ=t9o%0f$IEQu(X`DliHX0F6i><7&vh)Pz!yj z0bH3tqc{#{S>$MY=Y-`;{qvgmDsdvkoQ?o-jcKo_GxEp}_yxf2X8kR4aOPK=$GINxu4F$fI z6b6uzHCw_>H126r$Hb;Cl1_}1Wt^(4n;VF!%$kF}e_Ljrs862UB`ki;xC7(OK-MLoS)GtU? z%ZO8-`e8;)aN{wCOY8akvK8olBRt7V+_#-%vPG-PJAT8j*@S!L2Pk4e^Yj{UBn1bb zzP&5{={Vr=oIpG6w}8@}#S((x3681td2t?gXlMv%NzKnVOL&@Pv7{B*pldCoX~Bix&&%nu zm-l=EML**3hCQMJlG@x3a1b`Cs~fpIOH$-*guh-7g-AP>xXWu^Yh>YTtUd!Q zIY7-0YJx|FQ`}du|6Hu9(-G$73t2ULpx|>+pQ2K#cgvtz%eJVt5-$J zINwcn6SIGQEsziScNW-G4Zb+!C&$W6c(5IXO+Ee^p8ifpg;38ZB)XO6jpB;ZK=PzX zM7tM|)F3qv#WUAffQuJb*no>w6w=AOpgIeXT{%nDD1JPhqT}&{vcFJ)3a>HRs!#*8 zO|Q2wMNvz1FS`^=Hmf^4XMZ@L03W4HkyAYGc&I|qK|J2Qo$7R&9;!;Ui_JEy+ zQWEGxzsVsm#xkxUbMNWfKm{fO)2L<&IK$I5E!6Hwz_93m2Zh-u7c?XbllNSmNn6C8 zML~*?FJ0lErvkYu1@% z>)NXn+wD#$5Mr{o^KfLsUxsWfQIqVEy}yqt@G^^OBiy6e^8PeiINqkG?ojE)Y0+8S*>e7Vr1SCC>@XS%$!x0x{XHT4eRPra1Bh5>G~rk zc^a}{rt?;aU?dXi3FuA5ggDRqFE0F+LZ14HTqIuD{B-sQSrAcruho&w!W4AtJCVTYXfJQlU9Obq`UIDt((~*X z!ml3Z*twToFQ4Q*s8iVsb@}VhVN|peP&>y{O?awj`&PvVF`OmCBF*J_YqK2(F{(Sqm>x<=gX0?#)$PIXRhQupyI)q47t-(5z zcA%*)FkXbT~gX6q|fGIrVmei|(u zf?|Qw868p8Y;Tc4uBN_1-7gElahs$Rf`}iu0~mH5iswVcw|M^+7j@7iGz2Eq27qEx zg;wwnPcbIT^O-MH{CF(DO?_{h_S?K`;qYx<8wuXaO#QJ6F|Df$>?^@Cy0Ep6%>^wU zy2E5o9t@-u#uumP9||6Wn!GtED-Is|G`7{1+oiul6R^oOyL5;rit&zV;sDQ#%eV@Ic)u$LejsE$KU`r`#IUM zwC+ba4*+yWq~Q~xEA^8g0!_+N<7(al9|l3v4g8U8SAj-ATV8HR0=Rw1`7VZ$Rr2G4k8Wn}W& zyhEa@Oq7bvfveC$m7*N?${5Ss)YX4I1Y)D<1uytvi7?))#VDqrXI)8;k~U_XycgS- zyIuD%87bw2fzO(Or9OCB}D#CeTSZ_FOK>+M-Ii<}2H>iw~djoU?p0e|TSrjX0etIu8 zK*dlxsA=ri?=6?iK-b7VA#uot;w?bL!~_kLl-9rZxNH1)9DE|<_QlkmLD!gbzGV-r zQHtVcSpe$41=SZy#k7{?G@6zxXUy>f^%>_MxiWX!8yB2KvMpO0#EO6u;UXr^ZZH^( zFTUQsT9K90%S_(*MP6_YoGQ>*n*%r=;Nl9icl+`g5+~RBNvgi#I=mJ?yCQE~NOj$w z%Qf3E2N=zOb!s}rDE%9$q!P%egBV_nxE6qGqpcV$RDh!F_0j6*FeUOmpR0fWw6%BA z$_ev4BNKabQFFO}cD3^w-jnD|YoDc9V91Xa(N{y0b`2PXJ)LVA| literal 0 HcmV?d00001 diff --git a/platform/opendingux/data/skin/font.png b/platform/opendingux/data/skin/font.png new file mode 100644 index 0000000000000000000000000000000000000000..707a5b433f25537a1514c1c6b71bbd1e0a92daf3 GIT binary patch literal 11264 zcmV+bEdSGqP)U8P*7-ZbZ>KLZ*U+lnSp_Ufq@}0xwybFAi#%#fq@|}KQEO56)-X|e7nZL z$iTqBa9P*U#mSX{G{Bl%P*lRez;J+pfx##xwK$o9f#C}S14DXwNkIt%17i#W1A|CX zc0maP17iUL1A|C*NRTrF17iyV0~1e4YDEbH0|SF|enDkXW_m`6f}y3QrGjHhep0GJ zaAk2xYHqQDXI^rCQ9*uDVo7QW0|Nup4h9AW240u^5(W3f%sd4n162kpgNVo|1qcff zJ_s=cNG>fZg9jx8g8+j9g8_pBLjXe}Lp{R+hNBE`7{wV~7)u#fFy3PlV+vxLz;uCG zm^qSpA@ds+OO_6nTdaDlt*rOhEZL^9ePa)2-_4=K(Z%tFGm-NGmm}8}ZcXk5JW@PU zd4+f<@d@)yL(o<5icqT158+-B6_LH7;i6x}CW#w~Uy-Pgl#@Irl`kzV zeL|*8R$ca%T%Wv){2zs_iiJvgN^h0dsuZZ2sQy$tsNSU!s;Q*;LF<6_B%M@UD?LHI zSNcZ`78uqV#TeU~$eS{ozBIdFzSClfs*^S+dw;4dus<{M;#|MXC)T}S9v!D zcV!QCPhBq)ZyO(X-(bH4|NMaZz==UigLj2o41F2S6d@OB6%`R(5i>J(Puzn9wnW{e zu;hl6HK{k#IWjCVGqdJqU(99Cv(K+6*i`tgSi2;vbXD1#3jNBGs$DgVwO(~o>mN4i zHPtkqZIx>)Y(Ls5-Br|mx>vQYvH$Kwn@O`L|D75??eGkZnfg$5<;Xeg_o%+-I&+-3%01W^SH2RkDT>t<8AY({UO#lFTB>(_`g8%^e z{{R4h=>PzAFaQARU;qF*m;eA5Z<1fdMgRaPVo5|nRCwCVlihWLFbqbo*a%+x+7V!k zWC=1v8G=mV%ys*KF*c+H3=729jkA?IZEN77|Y&9lN)s(c45aIUQ zbsiRRto1FX2 z()Cm!V$4CQ$E~*l1Kt5<#{yNm(LIMT!}SwmW)6Ty%uUzNQ>e%NVgjbp@R$~+tksnZ zDn)S?Z|#%eg*)z<2Zm;*6+~;APv^5kY9ZI#bF7Fe>HNZ0=96Uun;$^%Ab>kBqRsj3 zUQ}BLEN^i6)t@%g3VtisN}MxmGJR85Hb)49Koox1gN3YD+X!V~6Ietnf-bPG=V2GZ zVxnFRGlT?|dHcgh_!~|tChvA~pHmz<+$zOr$~>pLOz}!)HtewW&uTk@ zm5_mdpxEY`mZAxi0h4rN53!>-1fYZQ0U#g(2)K13xDzbQ^W_0Ye|F~v4W6aLXzRX+ z)G9Z`-l9XVB?}vp2+GEyS!DABBvI7pK!g4cw1TxzH8l#wM5|m@1(2JEewK#^xYM1WCz$U(IkR(=tRXfyKfPU-4m* z`*H&xcHt8-xL$!^hp=3loC&6Fu17Jo03(YlgiMZ~Wk@A;v-r#dNMuVQkjZAjKtQn> zfR}VxNfk0;D?DpW0mvNJ&*~L7j&#L?E6+=7QA;vuBOV2D=We4{?E%sg1H!$AIxNtD z60?ljfwi$C9GG3Ff$SE&8;PuHcHFLH^D(~z#O)=v0|0rw6KPw-0G8N&Y$U59xkMoa zp53)q2pB6^7f?8lgLuz}G^0$`*c>V!|KX|ziNs!S9v=G14vORc zM+Sh#|Km7RwM|GeCK?L)%)!2S@reVH$e$63mkW+ZSkrkL=*Nr_4*#P%KgPHfm92=R zHDo6}4pO46w_5TFN~`lyjH7|KT^mQ1O8#fpT9PC;f#{>dcW`*^6HwckV=!m%in@gA z5aJ+JM&pqrzJTFKfL1?-rqE{0x1)**vAxP!tnt23k#ubdfx*{Uu#pG^gb9fm`2^X)mQr8@55f4Tcuu$n4uN(6 zkn~9%m_Fpmh*aniT}oS}IDp?ln{zyQUiGVLO{7j5NROy;#7e^ z8_*9r(2q}u3vS2Z4!}KdcXxbqr5@q1>-i|On?~cuZ2p|fH`)_OLUiuAYhDHX(lyq1 z!R6QnU?PX;FC(ho8cS-Z&(yTCpEt1?5*MHJ<3#DQl2N$-bb%0TPGpvlS2Nf^wS8go zq^SKTp9pxvx(8Kza08j3RBZ4Z2>Z}1aZ+VWNxb=T)jS43n*3}FvL4_Sbc7G&`Z|Mw z$*~sUWoCFrXpT(~%mgctFx;ujYK2u#?K?Zo6rAUDsJt^3AatY$EC)S9@7Wsy(z@WTO&spZ(io-u1wzP5hj zO-PZ6OY%}oy=E#4Dx36Vkp6;Ke{Zc6`W^TK0REYR(H(H^!0j>h-?Wm{rY~jsTUyaa zD6~Q-FrW!9U)ciA3KDaPs2&(i`&ARGJY9f+3E*cD5G{>+pw>=W-wr(8e!__l_cS;$ zlR#<$tAQlPiV+J1kt~k{8HrICU(l6hO#K8C>tzQJ)u0xQf~IA5AXDv*#__HRSG?x% z3Qn{Sq?#M9N!sqcF`_gJV?$la902RBQE%O<%<8(mO_k39MFfObe5BXk8j(i;@8=B= z?Mwh)iTo!JeiyuNwf{39sumjTYFCPKehU@`aV8#e=- zr$c;+!lwaXu>AVbYBFKj->uYtf=@hl0QVvg=kB%IZntIu`_cU4kW24cVOX_@Nb#P6 zJhZHPUTQ<|1t;3Jfee7=-+pKdO_F}yaIM1y_VJwVct?&g#yR8ZY{(o;Cvi zv2%q-&Z98&@pYt>a!1M_O^^=m4=KW~0_x&^AYp;QPIj|%xtaBhmv!RUax6=FPmg@t zmH%@d>eqn(y#c>Y_K4>$#pv;NF&EyvuBruH_rD7aEdShBgInU(*w7jL70-wCc`{yQ@w~|L!K|DnF!X|`G&#GZ_=n<}ZR@wqHwkX)s zGW-UdYYmX|%an&8SA^pAa>9rzSl~-3`z-*4dfc2n?`HvOoOyG;#%HT$!r>{k;m=1x zlYqw+K&h``SV4D5rPww=GMfKa@b0GA9))RSlMl-;-tjQlH~WzT5U@>h06SlQ-*=?r zjtS|w{?=DBtS^BqqKDZ8xSRt38az@*24G;orpvOn-9PLQ6>(rCFsDgNaYD~d=-4Tj zIzL9qAP5aK%O3m`ZY_xV+zt$m<+z_v5A;rP)2QnR3 zgN@vWgoYvtjeXie3B|~|3*wVN z_9#DZq=z`2NM?xI=O|d-c9I%4?Q6rqR;SfcxTygjQIuX1_LbrMrOWKi)c#`*j<}n6 z(&l%W&J}b!Q(}b*2W1SiNEmGENs#-6VgEqO45)QtWSy3=-+vN{zTx@+VAg`1QVqWAnQ5u3id&D|Dk&PJ-wYda&Y6N^-J@hb zbUIb);tz?uCv@t|^2tzJyb>76acoDKf_`8E>nk^$zei!U|E`(~q? zHRUlCFct&iOg(DjGXe>Uo<%OUmz;*U#T{*zeoEj0%rG66@wH+g=U;qn8FCsAE)Scf zEqZ{**2dAM>`RKoNLp#^b*@k>^47% zBf4-LSvekE%M0edn&Y5yXF$X9_Umoyw+!WG3A%>N?_M^V#5#GXbl1S6A`ZqOSRolT z0k|;oc%+dZt{2(Q7_*Le!d{pz8rmG&&NyQCVd6gBLZL(v3MHO)s({uj2L{A4@ltW>BLcyYEPkD&}5!lnA&gr_bbN(o=_OeZ&Yw+UfKM0hy(auujyl6qR%P z2p=Ya2xr{#Nb55XNRb&w8wJpmO2ikg=;eu)P@=3Z!^bmHoaLJU^P|k=yp{`~O1ZLU%O4fL$R}?=DTZ@CA8iqeMZycmf5HIa2WO+_lFbP?#+wbO z{NTZ;ID>_9$)^#UMTeso72|OHMpRq>#ydjdc6B(FPwCZ)^juF$JX$ldx9uHhJ0=~8 ze7Z)GlFL44fbfWwA|dp$HlU+u4#o&hx9Et@`7FB*&{Q!BcR)H4`E=AZUQ|5dY~1tw z7VfinLhb?IR$Nml^)9OB?Op7_Y**GVwO!>tX<_Awl%Oe?p07Qm@!XM7YpR4cQ6AAy|aa~OU zlAzq`jh$1iNX1*lx&E@brQL4qY&UV{^{)57bsV-h;T8#>QnSkH!QHJRU177vmJE-S zDeB%a@=FFZr1*2WKti`vx4xv3Kqaz?0kVBIR;a^~lt9jh`KWosxG@RuCn(N%L}3lN zD)^VZYe#b22BHsph^_J+oPgqhXD@*9tJ!Nco^_7;9fbQL>>qU&q7i#3yn)o4{riUykyf|els%oM6B%B^_C#v#fAZi)ps)lOEcN$jq$ z$CC(_lwq#tF#&yFXGE4OjD9U!egwC?62P^o!=7q5GMyi52B_dn2yRx`tZ4$;o{ik@ zb&K5LDP90~&r zSIMNbamh%VDHJJK$L$2fH8XU>u|J_v*?CRV&1yzRc33pn@rekQOAC10kZ3Kx`#9Uu z6)>xlkY4vAw|ERMf4(;v;XX`w?S$wuEGLAxFq`HQA4hgBKS62wYe&!XI|t_Ni_{Mw z3aClH)C7K|^Tx`Dnu^t|t|fg^obgL79(tA7%%^4IAk`Pk`XmF*5Q4U{W=x01tS>aE z*A1wub(B>neJGi*#qnJ>$s6h1)y@r=w3TCaU7>VTlSmAG;tcJ_b18GGHlKK%e6zyA zlg!^@fSf$LGVRx^f=vDBGdkV?;tyWCa{ zrg4}!V9l_3F7iFo+Cx2w3M^<;3Na!>f~ni{2(c6y%L!Ud1N=OOLPR+9GA;y4N0A1K z1(&4;b!mW4jZv8a2e*2~*k$JNf-B_Ir{{^a?ubB{fJfEffIJN_9Whal?1a5$l?Io$ ziv~=+W(Xme#5+XJn{KH+p0e4kJa$Wi&<#{@EKD1Z!0M z9O5CIsNYc0f7*a#tbJO?gj@ZFp=m%kSHok|0H1{Q8gW8C2e#q@e~SS+PBJ&2ghXBe zq-?BiwE+_>pSnz12GAtD-x0k$wIkbN*RlT-6aFna+d8guK*ZyL`GTI(0T+t*GZfrW!vcmL7%%9D{J0R}rmrBo?bw4&PNx-GB~PYf z3J&qwPMwW#8(i<;>!iF4^QKo_ohN}(&A^E^AflViD#Bha#cnk=Gn69Ms0}!_>vr}P zq_&Cl*fcPqp)#6hk4J@^DK)&e>lQyo+pnF!Tif?E;QqejS<`El)itXqioXgK)3Gu_ zJ6)d7OJLUYT3&&J8=qIM`%Qhvsnc$Xs;#CxRQ>+^A3MJD`u;1h7*$bJozO`2AC|lA zo}l$R-+ZL3bdbmM-h>d0H{ST=D}W%S^vI)XM)K7sx~<#reS6QbSnE}3ZCNP(zRl06 z`i_YO(?>wlkMV%$`&ER~iJsGg-BkeHFm$W0f9XbKdBvQiUficp6nGf7x<+`?zXv;6 zrL(f&5_cu#6)~Uv*5|W77(TE`Nr8)-a*|J`*tpeup5+z4r}lrH>YMBPKBk*2f<;+- zTUX>2zbE~HCD>5S$q7p`Q2`5df`G*_wB|z8KkP(Cnb9Q*ay zaG)4;9-cGB3RC)AhfIGURYcOFPoY@cH8mAExJBIhmNRrR-GH++-L(6JEdmsZTvSF|NXzCF1wKg}1Z|Hd8b^T!o|Mip}QAPXOjE{P+oEKt}dp z6UZYi8TM#n;n_xk(nY4!DQ^WVa!lA$W~~>If;}b~5%Rp*ZKl{CRg*!^V8N;k2*)}; z)!Y;{RI(#AE~j$MvfVb3%~TcpsT&MXYN7>^KZ%5I!^~|avmGylViP4akwxj!yJm^g zp{-%!rX5xYky(VG5c$Xoa26qW$u(ztG9?zb3^MOBAo)S36<|V8j-qQAUKMmWffUSs zr!u`#RPVa{^xOMlLI+nK`2AJP>fO2r?2tIABe~qqz+qur7eYCM7!L6W7e_@USj8OM zH`by#SdHfc0BBkb+v8VA0J#41lxV`OIcbb#O2bFkb+8sdcn<>~pUgh36H+Qp)= zW8Mu5GWw;phhjYft=7_??Z%r ze!U|)0VZU<0Dxk_1a=*W7NdzBLnJcWgwig(&&0(_a?Z#iN6}NbU=c$2fWP4IGjR3c z-)DlUq)j)3PXNY`;o}J4m*BMr0QXGkflJY*{S>~DHr==I1fc&CejW1H_x?+nW{Agw zjSWjR<9UZN+f*Cgi6NxRmTzT)lO-lRP#mw-zO#*Rl4EvzUeA2PtbY;V6zY7hz5>X; zmX$HcY|Hp5_46U^DZ@Yfcx#+ob0io`(S0-H@`%rd^A?{y0DO|4_VJ3hafahvIJrX<$;1~-W1#lb!M*$oKa1_AN2p%;+e9dNF6iW6@f-Mqk z1KUsV8adlSVTf(y_&|asDMDLd+OU29mSMiX0C<0f4R+)f^{;0*$Jl-&U&+pop8=SQ z%d5Z1@H!$TLh>&%FWP^R4x#z%}jw ztZ;EKji@!Ql>qp&xxy5G;L<+x8I}Mx_UYvKf(N`MD$NzV-P!1O8FK6#o$NL8>&Mqs z0Kg1?;4}WT3hpX7AH(MI!^dnAzy!C*@da}LIZo~OXMusB04&6yl6P zwvfs%g?^6rM}h^WSmL^;8*(p^+LRn0(Pd#qh8)|y2Ebkd(CtZT=RXY+T;md8ewLdw z^SiOR6Wn2lF;me3DH{Y}Mp8RUv%j$h#&NE(#*Uod*!laQ9r6!@&;V1!Mb}qjoVYUM zxxR%0BWFz0PYq%uR)m1cAOif`GhjH7ihZ`beFO((fs~1WU%x87Cv5V}yq6QF7vIwB ziF*rRW({sT5zwO#+K&7bD`BPUc50nttphtTq&SpaidbNdthE)FeJV^^FtmnABW#2S z)>y=FfDis|ShSX7h_xNFve=)#Rb?+r?_2kC=831c!WCczJVb^$$%6y_636HLZ}@}Z zlu-MJ{Qr_dVWSLmLM|clK zj&~0QU}y)x3{$rK!n(~G*MU(e@3CY8xW=6jz%3@^sq*F;Kzh9YBq!Jn`5j5oCIJ+f zVkeYNIk=9S6hx4ny9Xaa=6v=3t`WQvCY1v%y`>AKLdI8PN@dTVQr}csn@ka#VqUyt zCd}|VmR1x<646+(?#-%k*ba!vU-{MY<1a8LzRL*D@0AoMFG~w8*1~41#@h>7dD%|_ zjBhsqs0i{jtVJz=dyJjyH1`D)4y~-r5^Ti{XSiwPLgxHswrW{N6PQDOG7;X<|1G-aA|)*!DHUG-Rx_`$Q>!RPzCTN<@Pw z9o}nl^gAAKNAAbuBUF(~bLN(Gi@I+n$b49TBZw6q%(&aH*i>2@PH-i14*ZlU4YNYA zh(`cA17z2jz!0LWjQ5Do1%`v8cBE+8f-`piZ6$!v4l-5p#`b_NCD?2)eKAR6%WQG?+PL)b8D zoj-680X&v^wm)MVjwmn$46bvLoG99RvVJug`foKA_cv}a z!wWXuPPcF4TY3xuKhl%P>%s$Y;w&vEQMn(U;f4|A_e5#s_C8W%9}V#H4TGA88%BA( z8hNK4jwbGQ-aFqd@d9`Dh}xyq+h5}!X7qA^ov&!b5%+esz+?uefh7Q{Y~=BM@|@sn zAfYD23BIubcTzEeV5~3=mL<}2K~K`S4A1!+Vw&IE2CwDaj+`oZtz75mNh|R{ss5JW{6c z%j&ON`g~_2`24QTMC%tV&dn2~`)I%qCjb>8&%zOc1VZC38c^7ThxY4q3Z&7Md0l`@ zfs~<&*T?}v(aG6pc_mXDr}_s;97m8m!K~9dt+$;bv6ui7Eb+l+dzh8SF5SibxM8Gx zsKKW%??Q_#%e|Dn>ppHKUubQoV&>5A1nG>hNp&h4&L0Y$2N{}|n!3*)oxqUHK933g z9v{S<)W(^OrW4}3RsndJym=N4i0|)o?D=5jhhVThL`4RL|1>TkY>04BIaQeBaTz)3aI{nK}KIN#(31HZOWWxhgV zf;%Tv`EbBB98V0qe)ienK>Yw-#l>By_=;?yFa@Fu?&B@N4pY0ovLV&7GG0kV&u{lW z{u~)e@ZougUJXb@wFjQ1rErF?Z0c-L)p^;k6&N97dB3HAuM_;n>{A!a%6&$C29t;A z)qo{Fu>NYa{rICEVLuAv4?o_(ssd1H%e(b-T|9rYsgXLtd{Q z`0;IkFS~&1gqaJBWg&1^Nb?XMWnyH8rL%PUZ11@P9|!pssV%ECYy`!UQ>1x_kD>u> zla$&Eg>#U6F|2Y5yEH@L#yUbzLu#}$kv(1R}IY*9?j z0FAYiP+yD3kXNJ1E}uu1IB<5y2V37)1^QC4_>~f3a%Lb##VAXym2Hh+Y^ic+F*GfK zm;z2oMoxQ%ae~HlIa%(UlVC=NGWF_(2`SoE1_cntihH=hiEuhQ2Z@awwdD@#6H2Tb zXtggy2TuWful|394RY-4)4%mVESDivSOl{A~5e z_q8n)Hds36@axkUGu&g0??}8hqh(1=zuDVhct}<(?Wtkw}KQ$oMgfv(H%?o-72N;dg7qMJX*`yAzzC zw8$Ljj@a$%Q$JSrsOAGxz|q$+(~Gy_S|5n^jR>QuN+cL#E+#+X>$5Xwp`-is=mLtn zo*Gc#n&cyBK2WiuE|m9`>yl59VnJAEhY;Cf4TTk3u_Gfkp67LfX~2v7+AXUF-Nq>9 zLW5EOuk8(LK5&IgTvVWI##CQZpvEfcYdyt=oO9hB2Xwdi@%r?AD6$kAd}2m-g10QX zP$*eWNCuF)LL$;BJr=k-Y58;l(AFqr(d9>c!=3dq6P)8$Hao2+Y-=xMl64;m2bg7W zT?7zYfYa7q(5ko8aj)?mbH@#mvbs+JdYS|vC%>MV5<7gm2O7-g@|gsn$`-*(*S-Ws zl9fwH;jS7g$@+us`?CIEm%bIiO8ChxeOb0tX65kNX0sF`YZ*JX$uHUe2ukB;T0gLR ze}Xq`DJ!4I^4EhroW31E0K9%+|NaE$q(Ht-X8lnGV2z+GLeZtaV{}u2uNaf6H1BbN zOO`0<517gx{hs0Qj#d8c+BdnT23ou)B$>AFXuRCAXTO&hXs#c)&u5*^>b|E+2QHI9 zQPgJ|0L~$U%>Ul zkfO3iNW2nr|GqE4)kA7~5E~@x5yCS|ac6O;ci9}G<@Ne~8q-(USM?a9d3m7oxb+Bq zCeV4s#z-jXm>4_W`t{!;!L|a=^!LjHsx{36tV8sdjNKT<++eH{bN^3`40)E<1aA(g z0kR&UTScTU5;IU9n>@lGr-Fhjv8 zKb_sx)@OK7A&(rK!QY56uR`=75)4&h?)zyHRj3hMFv_a`5NKDWEaA9(1# zelc^+0@oG)wk#GfI@Yx>L#u9m)$XMTefu;^uYRiyUH|Qt?rz)fO^E3Ff!sy(>6=*q z$`KppPy?b-MVT1n&R2eP?H`B|Q;~{~{ri-`ai0cf`nvWHMu|OE8#*+m&L^xR&gY{< zfq|?a*uQ@!`lfz>F?t)z36_;Z<@E=<_Wy&FSW_kABPaN@0w{1@1rzm~rB-b2&qZoE zP6bk{=Xz%%+uucQ&Ei=&uj0NUSTw;dvf@Nuv2PZEA;C}33sqlL21;iDUkIc|lVuBJ z5v2AfZct!K66+*#^C!`nRO>NH%mlLgmjT~-g+=FqHUIw)+5Z&bA{})JgxXEsMB;s&N-@SMt6eq@)-S9} zg_-p%Zz?_7hB?7ehc`oa`=l_55+Uxp!1n+2T|a%Z_7qz?LD!<3-l0iR*CUij$ZK}{ z9)FF$aZZxt{O?2byn0){i10B5;Hwm_4md9Wr_c`1U43n54F;5}eY(bV!RH~e9{nM9 zp7Q*3Zla!*7|2S?Zj- +#include + +#include "../libpicofe/input.h" +#include "../libpicofe/in_sdl.h" +#include "../common/input_pico.h" + +const struct in_default_bind in_sdl_defbinds[] = { + { SDLK_UP, IN_BINDTYPE_PLAYER12, GBTN_UP }, + { SDLK_DOWN, IN_BINDTYPE_PLAYER12, GBTN_DOWN }, + { SDLK_LEFT, IN_BINDTYPE_PLAYER12, GBTN_LEFT }, + { SDLK_RIGHT, IN_BINDTYPE_PLAYER12, GBTN_RIGHT }, + { SDLK_LSHIFT, IN_BINDTYPE_PLAYER12, GBTN_A }, + { SDLK_LALT, IN_BINDTYPE_PLAYER12, GBTN_B }, + { SDLK_LCTRL, IN_BINDTYPE_PLAYER12, GBTN_C }, + { SDLK_RETURN, IN_BINDTYPE_PLAYER12, GBTN_START }, + { SDLK_ESCAPE, IN_BINDTYPE_EMU, PEVB_MENU }, + { SDLK_TAB, IN_BINDTYPE_EMU, PEVB_PICO_PPREV }, + { SDLK_BACKSPACE, IN_BINDTYPE_EMU, PEVB_PICO_PNEXT }, + { SDLK_BACKSPACE, IN_BINDTYPE_EMU, PEVB_STATE_SAVE }, + { SDLK_TAB, IN_BINDTYPE_EMU, PEVB_STATE_LOAD }, + { SDLK_SPACE, IN_BINDTYPE_EMU, PEVB_FF }, + { 0, 0, 0 } +}; + +const struct menu_keymap in_sdl_key_map[] = +{ + { SDLK_UP, PBTN_UP }, + { SDLK_DOWN, PBTN_DOWN }, + { SDLK_LEFT, PBTN_LEFT }, + { SDLK_RIGHT, PBTN_RIGHT }, + { SDLK_LCTRL, PBTN_MOK }, + { SDLK_LALT, PBTN_MBACK }, + { SDLK_SPACE, PBTN_MA2 }, + { SDLK_LSHIFT, PBTN_MA3 }, + { SDLK_TAB, PBTN_L }, + { SDLK_BACKSPACE, PBTN_R }, +}; + +const char * const in_sdl_key_names[SDLK_LAST] = { + [SDLK_UP] = "UP", + [SDLK_DOWN] = "DOWN", + [SDLK_LEFT] = "LEFT", + [SDLK_RIGHT] = "RIGHT", + [SDLK_LCTRL] = "A", + [SDLK_LALT] = "B", + [SDLK_LSHIFT] = "X", + [SDLK_SPACE] = "Y", + [SDLK_TAB] = "L", + [SDLK_BACKSPACE] = "R", + [SDLK_RETURN] = "START", + [SDLK_ESCAPE] = "SELECT", + [SDLK_POWER] = "POWER", + [SDLK_PAUSE] = "LOCK", +}; From 868cc0cc8f2fc6cfd278b15e8dbd042188de53ca Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Tue, 8 Oct 2013 00:46:49 +0200 Subject: [PATCH 0016/1110] Use PBTN_MOK instead of PBTN_MA3 to confirm loading/saving state --- platform/common/emu.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/platform/common/emu.c b/platform/common/emu.c index 12d8c095..88fefd57 100644 --- a/platform/common/emu.c +++ b/platform/common/emu.c @@ -1073,9 +1073,9 @@ static void run_events_ui(unsigned int which) char tmp[64]; int keys, len; - strcpy(tmp, (which & PEV_STATE_LOAD) ? "LOAD STATE?" : "OVERWRITE SAVE?"); + strcpy(tmp, (which & PEV_STATE_LOAD) ? "LOAD STATE? " : "OVERWRITE SAVE? "); len = strlen(tmp); - nm = in_get_key_name(-1, -PBTN_MA3); + nm = in_get_key_name(-1, -PBTN_MOK); snprintf(tmp + len, sizeof(tmp) - len, "(%s=yes, ", nm); len = strlen(tmp); nm = in_get_key_name(-1, -PBTN_MBACK); @@ -1084,13 +1084,13 @@ static void run_events_ui(unsigned int which) plat_status_msg_busy_first(tmp); in_set_config_int(0, IN_CFG_BLOCKING, 1); - while (in_menu_wait_any(NULL, 50) & (PBTN_MA3|PBTN_MBACK)) + while (in_menu_wait_any(NULL, 50) & (PBTN_MOK | PBTN_MBACK)) ; - while ( !((keys = in_menu_wait_any(NULL, 50)) & (PBTN_MA3|PBTN_MBACK)) ) + while ( !((keys = in_menu_wait_any(NULL, 50)) & (PBTN_MOK | PBTN_MBACK))) ; if (keys & PBTN_MBACK) do_it = 0; - while (in_menu_wait_any(NULL, 50) & (PBTN_MA3|PBTN_MBACK)) + while (in_menu_wait_any(NULL, 50) & (PBTN_MOK | PBTN_MBACK)) ; in_set_config_int(0, IN_CFG_BLOCKING, 0); } From 948aa481fa136185771fc85b4b8abaffe3795aef Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Mon, 30 Sep 2013 09:29:24 -0300 Subject: [PATCH 0017/1110] Don't include dead code when linking program (saves 48kB) --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0df793e8..c199b853 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,8 @@ TARGET ?= PicoDrive CFLAGS += -Wall -ggdb -falign-functions=2 CFLAGS += -I. ifndef DEBUG -CFLAGS += -O2 -DNDEBUG +CFLAGS += -O2 -DNDEBUG -ffunction-sections +LDFLAGS += -Wl,--gc-sections endif #CFLAGS += -DEVT_LOG #CFLAGS += -DDRC_CMP From 99823d0f2394ff482c3b8bd5b928454c347549a3 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 10 Oct 2013 04:04:57 +0300 Subject: [PATCH 0018/1110] split base_readme to more traditional files --- AUTHORS | 56 ++++ ChangeLog | 485 +++++++++++++++++++++++++++++++ README | 15 + platform/base_readme.txt | 607 +-------------------------------------- tools/textfilter.c | 46 ++- 5 files changed, 602 insertions(+), 607 deletions(-) create mode 100644 AUTHORS create mode 100644 ChangeLog create mode 100644 README diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 00000000..d4791101 --- /dev/null +++ b/AUTHORS @@ -0,0 +1,56 @@ +notaz +core, 32X emulation, CD code, ARM asm renderers, dynamic recompilers, +Pandora, GPH device, PSP, Gizmondo ports, CPU core hacks +lots of additional coding (see changeLog). +Homepage: http://notaz.gp2x.de/ + +fDave +project starter +Cyclone 68000 core and PicoDrive core itself + +Chui +FAME/C 68k interpreter core +(based on C68K by Stephane Dallongeville) + +Stephane Dallongeville (written), NJ (optimized) +CZ80 Z80 interpreter core + +Reesy & FluBBa +DrZ80, the Z80 interpreter written in ARM assembly. +Homepage: http://reesy.gp32x.de/ (defunct) + +Tatsuyuki Satoh, Jarek Burczynski, MAME development +software implementation of Yamaha FM sound generator + +MAME development +Texas Instruments SN76489 / SN76496 programmable tone/noise generator +Homepage: http://www.mame.net/ + +Eke-Eke +CD graphics processor and CD controller implementation (from Genesis Plus GX) + + +Additional thanks +----------------- + +* Charles MacDonald (http://cgfm2.emuviews.com/) for old but still very useful + info about genesis hardware. +* Steve Snake for all that he has done for Genesis emulation scene. +* Stephane Dallongeville for writing Gens and making it open source. +* Tasco Deluxe for his reverse engineering work on SVP and some mappers. +* Bart Trzynadlowski for his SSFII and 68000 docs. +* Haze for his research (http://mamedev.emulab.it/haze/). +* Lordus, Exophase and Rokas for various ideas. +* Nemesis for his YM2612, VDP research and docs. +* Eke-Eke for sharing the knowledge and his work on Genesis Plus GX. +* Many posters at spritesmind.net forums for valuable information. +* Mark and Jean-loup for zlib library. +* ketchupgun for the skin. +* GP2X specific help: rlyeh, Squidge, Dzz, A_SN, Alex and GP32X posters. +* Gizmondo code: Kingcdr, Reesy, jens.l (for the device itself) +* Hardware: craigix (GP2X), EvilDragon (Wiz, Caanoo, Pandora, ...) + and jens.l (Gizmondo) +* Paul Cercueil for OpenDingux port. +* Inder for some graphics. +* squarepusher for some libretro fixes +* Anyone else I forgot. Let me know if it's you. diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 00000000..ba840f4f --- /dev/null +++ b/ChangeLog @@ -0,0 +1,485 @@ +1.91 (2013-10-) + + Added OpenDingux support (Paul Cercueil). + + Revived GP2X/Caanoo/Wiz support. + + Switched to cleaner CD controller code from Eke-Eke's Genesis Plus GX. + * Fixed overflow issue where cd emulation would break after + ~10 minutes of gameplay. + * Fixed synchronization issue where model1 CD BIOS would randomly hang. + +1.90 (2013-09-24) + + 32X+CD emulation has been implemented. + + CD graphics processor code has been replaced with much cleaner Eke-Eke's + implementation from Genesis Plus GX. + + CD PCM code has been completely rewritten. + * Various CD compatibility issues have been solved. Hopefully no more + regressions this time. + * pandora: fixed tv-out (again), added automatic layer switching + * libretro: fixed crackling sound for some games, added some core options + * sdl: multiple joystick support has been fixed (Victor Luchits) + +1.85 (2013-08-31) + * Lots of 32X compatibility and accuracy improvements. All commercial games + are booting now, but some still have issues. + * Fixed some regressions in MegaCD code, like hang in jap BIOS. + * Implemented pause for SMS. + * Updated UI with improvements from PCSX ReARMed. + * Frontend timing has been rewritten, should no longer slowly desync from + LCD on pandora. + * Added libretro and SDL 32/64bit ports, fixed compatibility issues with + Android, iOS. + * Various other things I forgot (it has been a while since last release..) + +1.80 (2010-09-19) + + Added Caanoo support. Now the GP2X binary supports GP2X F100/F200, Wiz + and Caanoo. Lots of internal refactoring to support this. + + Enabled 32X and SMS code. It's still unfinished but better release something + now than wait even more (it has been in development for more then a year now + due to various other projects or simply lack of time). + + Pandora: added hardware scaler support, including ability to resize the + layer and control filtering. + + GP2X: Added basic line-doubling vertical scaling option. + * Changed the way keys are bound, no need to unbind old one any more. + * Handle MP3s with ID3 tags better (some MP3s with ID3 did not play). + * Improved shadow/hilight color levels. + * Fixed broken cheat support. + +1.80beta2 + * Pandora: updated documentation. + +1.80beta1 (2010-06-02) + + Added pandora port. + * Internal refactoring for 32x/SMS support. + * Move mapper database to external file. + + Added preliminary SMS emulation. + + Added emulation of 32x peripherals including VDP. More work is needed here. + + ARM: Added new SH2 recompiler for 32x. Some unification with SVP one. + - Disabled most of the above bacause I'm not yet happy with the results. + +1.56 (2009-09-19) + * Changed sync in Sega CD emulation again. Should fix games that + broke after changes in 1.51a. + * Fixed default keys rebinding when they shouldn't. + * Fixed sram being loaded from wrong game. + * Emu should no longer hang shortly after using fast-forward. + * Fixed save states sometimes no longer showing up in save state menu. + * ARM: some asm code refactoring for slight speed improvement. + +1.55 + + Added Wiz support. Now the same GP2X binary supports F100/F200 and Wiz. + * Changed shadow/hilight handling a bit, fixes some effects in Pirates! Gold. + * Complete input code rewrite. This fixes some limitations like not allowing + to control both players using single input device. It also allows to use + more devices (like keyboards) on Linux based devices. + * Options menu has been reordered, "restore defaults" option added. + +1.51b + * Fixed a crash when uncompressed savestate is loaded. + * Fixed an idle loop detection related hanging problem. + * PSP: fixed another palette related regression. + * UIQ3: updated frontend for the latest emu core. + +1.51a + * Fixed a sync problem between main and sub 68k. Should fix the hanging + problem for some games. + * ARM: fixed a crash when CD savestate is loaded just after loading ROM. + +1.51 + * Improved bin_to_cso_mp3 tool, it should no longer complain about + missing lame.exe even if it's in working dir. + * Fixed a regression from 1.50, which caused slowdowns in Final Fight. + * Fixed some regressions from 1.50 related to sprite limit and palette + handling (caused graphical glitches in some games). + + Added ABC turbo actions to key config. + * Some other minor adjustments. + +1.50 + + Added some basic support for Sega Pico, a MegaDrive-based toy. + + Added proper support for cue/bin images, including cdda playback. + .cue sheets with iso/cso/mp3/wav files listed in them are now + supported too (but 44kHz restriction still applies). + + Added bin_to_cso_mp3 tool, based on Exophase's bin_to_iso_ogg. + The tool can convert .cue/.bin Sega CD images to .cso/.mp3. + * Greatly improved Sega CD load times. + * Changed how scheduling between 68k and z80 is handled. Improves + performance for some games. Credits to Lordus for the idea. + * YM2612 state was not 100% saved, this should be better now. + * Improved renderer performance for shadow/hilight mode. + * Added a hack for YM2612 frequency overflow issue (bleep noises + in Shaq Fu, Spider-Man - The Animated Series (intro music), etc.) + Credits to Nemesis @ spritesmind forum. Works only if sound rate + is set to 44kHz. + + Implemented some sprite rendering improvements, as suggested by + Exophase. Games with lots of sprites now perform better. + + Added better idle loop detection, based on Lordus' idea again. + - "accurate timing" option removed, as disabling it no longer + improves performance. + - "accurate sprites" was removed too, the new sprite code can + properly handle sprite priorities in all cases. + * Timers adjusted again. + * Improved .smd detection code. + * ARM: fixed a bug in DrZ80 core, which could cause problems in + some rare cases. + * ARM: fixed a problem of occasional clicks on MP3 music start. + * Minor general optimizations and menu improvements. + * Fixed a bug in Sega CD savestate loader, where the game would + sometimes crash after load. + * Fixed a crash of games using eeprom (introduced in 1.40b). + * PSP: fixed suspend/resume (hopefully for real). + +1.40c + * Fixed a problem with sound in Marble Madness. + * GP2X: Fixed minor problem with key config. + +1.40b + * Fixed sprite masking code. Thanks to Lordus for explaining how it works. + + Added "disable sprite limit" option. + + PSP: added black level adjustment to display options. + * Changed reset to act as 'soft' reset. + + Added detection for Puggsy (it doesn't really have sram). + * Some small timing adjustments. + +1.40a + * GP2X: Fixed a binding problem with up and down keys. + * Default game config no longer overrides global user config. + +1.40 + + Added support for SVP (Sega Virtua Processor) to emulate Virtua Racing, + wrote ARM recompiler and some HLE code for VR. Credits to Exophase and + Rokas for various ideas. + * Changed config file format, files are now human-readable. Game specific + configs are now held in single file (but old game config files are still + read when new one is missing). + * Fixed a bug where some key combos didn't work as expected. + * Fixed a regression in renderer (ARM ports only, some graphic glitches in + rare cases). + * Adjusted fast renderer to work with more games, including VR. + * Fixed a problem where SegaCD RAM cart data was getting lost on reset. + * GP2X: Greatly reduced SegaCD FMV game slowdowns by disabling read-ahead + in the Linux kernel and C library (thanks to Rokas and Exophase for ideas + again). Be sure to keep "ReadAhead buffer" OFF to avoid slowdowns. + + PicoDrive now comes with a game config file for some games which need + special settings, so they should now work out-of-the-box. More games will + be added with later updates. + + GP2X: Files now can be deleted by pressing A+SELECT in the file browser. + +1.35b + * PSP: mp3 code should no longer fail on 1.5 firmware. + + PSP: added gamma adjustment option. + + Added .cso ISO format support. Useful for non-FMV games. + * It is now possile to force a region after the ROM is loaded. + * Fixed a sram bug in memhandlers (fixes Shining in the Darkness saves). + * PSP: fixed another bug in memhanlers, which crashed the emu for some games + (like NBA Jam and NHL 9x). + + PSP: added suspend/resume handling for Sega CD games. + + GP2X: added additional low volume levels for my late-night gaming sessions + (in stereo mode only). + + GP2X: added "fast forward" action in key config. Not recommended to use for + Sega CD, may case problems there. + * Some other small tweaks I forgot about. + +1.35a + * PSP: fixed a bug which prevented to load any ROMs after testing the BIOS. + * PSP: fixed incorrect CZ80 memory map setup, which caused Z80 crashes and + graphics corruption in EU Mega CD model1 BIOS menus. + + PSP: added additional "set to 4:3 scaled" display option for convenience. + + PSP: Added an option to disable frame limitter (works only with non-auto frameskip). + +1.35 + + PSP port added. Lots of new code for it. Integrated modified FAME/C, CZ80 cores. + + Some minor generic optimizations. + * Patched some code which was crashing under PSP, but was working in GP2X/Giz + (although it should have crashed there too). + * Readme updated. + +1.34 + + Gizmondo port added. + + Some new optimizations in memory handlers, and for shadow/hilight mode. + + Added some hacks to make more games work without enabling "accurate timing". + * Adjusted timing for "accurate timing" mode and added preliminary VDP FIFO + emulation. Fixes Double Dragon 2, tearing in Chaos Engine and some other games. + * Fixed a few games not having sound at startup. + * Updated serial EEPROM code to support more games. Thanks to EkeEke for + providing info about additional EEPROM types and game mappers. + * The above change fixed hang of NBA Jam. + * Minor adjustments to control configurator. + +1.33 + * Updated Cyclone core to 0.0088. + + Added A r k's usbjoy fix. + + Added "perfect vsync" option, which adjusts GP2X LCD refresh rate and syncs + emulation to it to eliminate tearing and ensure smoothest scrolling possible. + + Added an option to use A_SN's gamma curve for gamma correction (improves dark + and bright color display for mk2s). + * Sometimes stray sounds were played after loading a savestate. Fixed. + * Fixed a problem where >6MB mp3s were corrupted in memory (sound glitches in + Snatcher). + * PD no longer overwrites video player code in memory, video player now can be + used after exiting PicoDrive. + * Fixed a bug which was causing Sonic 3 code to deadlock in some rare conditions + if "accurate timing" was not enabled. + * Fixed support for large hacked ROMs like "Ultimate Mortal Kombat Trilogy". + Upto 10MB hacked ROMs are supported now. + + Config profiles added (press left/right when saving config). + * Changed key configuration behavior to the one from gpfce (should be more + intuitive). + + Added some skinning capabilities to the menu system with default skin by + ketchupgun. Delete skin directory if you want old behaviour. + * Some other little tweaks I forgot about. + +1.32 + + Added some new scaling options. + + Added ability to reload CD images while game is running (needed for games + with multiple CDs, like Night Trap). + + Added RAM cart emulation. + * Fixed DMA timing emulation (caused lock-ups for some genesis games). + * Idle loop detection was picking up wrong code and causing glitches, fixed. + * The ym2612 code on 940 now can handle multiple updates per frame + (fixes Thunger Force III "seiren" level drums for example). + * Memory handlers were ignoring some writes to PSG chip, fixed (missing sounds in + Popful Mail, Silpheed). + * Improved z80 timing, should fix some sound problems. + * Fixed a bug with sram register (fixes Phantasy Star 4). + * ROM loader was incorrectly identifying some ROMs as invalid. Fixed. + * Added code for PRG ram write protection register (Dungeon Explorer). + * The memory mode register change in 1.31 was unsafe and caused some glitches in + AH-3 Thunderstrike. Fixed. + * Fixed a file descriptor leak. + * Updated documentation, added Gmenu2x manual. + +1.31 + * Changed the way memory mode register is read (fixes Lunar 2, broken in 1.30). + * Fixed TAS opcode on sub-68k side (fixes Batman games). + * File browser now filters out mp3s, saves and some other files, which are not ROMS. + +1.30 + + ISO files now can be zipped. Note that this causes VERY long loading times. + + Added data pre-buffering support, this allows to reduce frequency of short pauses + in FMV games (caused by SD access), but makes those pauses longer. + * Fixed PCM DMA transfers (intro FMV in Popful Mail). + + Properly implemented "decode" data transformation (Jaguar XJ220). + * Integrated "better sync" code into cyclone code, what made this mode much faster. + * Fixed a bug related to game specific config saving. + * Frameskipper was skipping sound processing, what caused some audio desyncs. Fixed. + * Fixed reset not working for some games. + + New assembly optimized memory handlers for CD (gives at least a few fps). + Also re-enabled all optimizations from 0.964 release. + + New idle-loop detection code for sub-68k. Speeds up at least a few games. + +1.201 + + Added basic cheat support (GameGenie and Genecyst patches). + +1.20 + * Fixed a long-standing problem in audio mixing code which caused slight distortions + at lower sample rates. + * Changed the way 920 and 940 communicates (again), should be more reliable and give + slight performance increase. + * Some optimizations in audio mixing code. + * Some menu changes (background added, smaller font in ROM browser, savestate loader + now can select slots). + + 1M mode DMA transfers implemented (used by FMV games like Night Trap and Sewer Shark). + + Games now can run code from WORD RAM in 1M mode (fixes Adventures of Willy Beamish). + + "Cell arrange" address mapping is now emulated (Heart of the alien). + + "Color numeric operation" is now emulated (text in Lunar 2, Silpheed intro graphics). + + "Better sync" option added (prevents some games from hanging). + +1.14 + + Region autodetection now can be customized. + * When CDDA music tracks changed, old buffer contents were incorrectly played. Fixed. + * BRAM is now automatically formatted (no need to enter BIOS menu and format any more). + * Games now can be reset, CDDA music no longer breaks after loading another ISO. + * Fixed a race condition between 920 and 940 which sometimes caused CDDA music not to play. + + Savestates implemented for Sega/Mega CD. + + PCM sound added. + * Some mixer code rewritten in asm. 22kHz and 11kHz sound rates are now supported in + Mega CD mode (but mp3s must still be 44kHz stereo). + + Timer emulation added. + * CDC DMA tansfers fixed. Snatcher and probably some more games now boot. + * 2M word RAM -> VDP transfers fixed, no more corruption in Ecco and some other games. + +1.10 + + GP2X: Added experimental Sega CD support. + + GP2X: Added partial gmv movie playback support. + +0.964 (2006-12-03) + * GP2X: Fixed a sound buffer underflow issue on lower sample rate modes, which was + happening for NTSC games and causing sound clicks. + * GP2X: Redone key config to better support USB joysticks (now multiple joysticks + should be useable and configurable). + + GP2X: Added save confirmation option. + + GP2X: Added 940 CPU crash detection. + + ALL: UIQ3 port added. + +0.963 + * GP2X: Gamma-reset-on-entering-menu bug fixed. + * GP2X: Recompiled PicoDrive with gcc profiling option set as described here: + http://www.gp32x.com/board/index.php?showtopic=28490 + +0.962 + * GP2X: Fixed an issue with incorrect sounds in some games when dualcore operation + was enabled (for example punch sound in SOR). + * GP2X: Limited max volume to 90, because higher values often cause distortions. + * GP2X: Fixed a bug with lower res scaling. + * GP2X: Gamma is now reset on exit. + +0.96 + * ALL: Severely optimized MAME's YM2612 core, part of it is now rewritten in asm. + + GP2X: The YM2612's code now can be run in GP2X's ARM940T CPU, what causes large + performance increase. + * ALL: Accurate renderers are slightly faster now. + + GP2X: Using quadruple buffering instead of doublebuffer now, also updated + framelimitter, this should eliminate some scrolling and tearing problems. + * GP2X: Fixed some flickering issues of 8bit accurate renderer. + + GP2X: craigix's RAM timings now can be enabled in the menu (see advanced options). + + GP2X: Added ability to save config for specific games only. + + GP2X: Gamma control added (using GP2X's hardware capabilities for this). + * GP2X: Volume keys are now configurable. + + GP2X: GnoStiC added USB joystick support, I made it possible to use it for + player 2 control (currently untested). + * GP2X: squidgehack is now applied through kernel module (cleaner way). + +0.95 + * ALL: Fixed a bug in sprite renderer which was causing slowdowns for some games. + + GP2X: Added command line support + + GP2X: Added optional hardware scaling for lower-res games like Shining Force. + * ALL: Sound chips are now sampled 2 times per frame. This fixed some games which + had missing sounds (Vectorman 2 1st level, Thunder Force 3 water level, + etc.). + + ALL: Added another accurate 8-bit renderer which is slightly faster and made it + default. + +0.945 + + GP2X: Added frame limiter for frameskipped modes. + * GP2X: Increased brightness a bit (unused pixel bits now also contain data). + * GP2X: Suidgehack was not applied correctly (was applied before allocating some + high memory and had no effect). + +0.94 + + Added GP2X port. + * Improved interrupt timing, Mazin Saga and Burning Force now works. + * Rewritten renderer code to better suit GP2X, should be faster on other + ports too. + + Added support for banking used by 12-in-1 and 4-in-1 ROMs (thanks Haze). + + Added some protection device faking, used by some unlicensed games like + Super Bubble Bobble, King of Fighters, Elf Wor, ... (thanks to Haze again) + + Added primitive Virtua Racing SVP faking, so menus can be seen now. + +0.93 + * Fixed a problem with P900/P910 key configuration in FC mode. + * Improved shadow/hilight mode emulation. Still not perfect, but should be + enough for most games. + + Save state slots added. + + Region selector added. + +0.92 + VDP changes: + * VDP emulation is now more accurate (fixes flickering in Chase HQ II, + Super Hang-On and some other problems in other games). + * HV counter emulation is now much more accurate. Fixes the Asterix games, + line in Road Rash 3, etc. + * Minor sprite and layer scroll masking bugs fixed. + + Added partial interlace mode renderer (Sonic 2 vs mode) + * Fixed a crash in both renderers when certain size window layers were used. + + Added emulation of shadow/hilight operator sprites. Other shadow/hilight + effects are still unemulated. + + Sprite emulation is more accurate, sprite limit is emulated. + + Added "accurate sprites" option, which always draws sprites in correct + order and emulates sprite collision bit, but is significantly slower. + + Emulation changes: + * Improved interrupt handling, added deferred interrupt emulation + (Lemmings, etc). + + Added serial EEPROM SRAM support (Wonder Boy in Monster World, + Megaman - The Wily Wars and many EA sports games like NBA Jam). + + Implemented ROM banking for Super Street Fighter II - The New Challengers + * Updated to the latest version of DrZ80 core, integrated memory handlers + in it for better performance. A noticeable performance increase, but save + states may not work from the previous version (you can only use them with + sound disabled in that case). + + SRAM word read handler was using incorrect byte order, fixed. + + Changes in Cyclone 0.0086: + + Added missing CHK opcode handler (used by SeaQuest DSV). + + Added missing TAS opcode handler (Gargoyles,Bubba N Stix,...). As in real genesis, + memory write-back phase is ignored (but can be enabled in config.h if needed). + + Added missing NBCD and TRAPV opcode handlers. + + Added missing addressing mode for CMP/EOR. + + Added some minor optimizations. + - Removed 216 handlers for 2927 opcodes which were generated for invalid addressing modes. + + Fixed flags for ASL, NEG, NEGX, DIVU, ADDX, SUBX, ROXR. + + Bugs fixed in MOVEP, LINK, ADDQ, DIVS handlers. + * Undocumented flags for CHK, ABCD, SBCD and NBCD are now emulated the same way as in Musashi. + + Added Uninitialized Interrupt emulation. + + Altered timing for about half of opcodes to match Musashi's. + +0.80 + * Nearly all VDP code was rewritten in ARM asm. Gives ~10-25% performance + increase (depends on game). + * Optimized 32-column renderer not to render tiles offscreen, games which + use 32-column display (like Shining Force) run ~50% faster. + + Added new "Alternative renderer", which gives another ~30-45% performance + increase (in addition to mentioned above), but works only with some games, + because it is missing some features (it uses tile-based rendering + instead of default line-based and disables H-ints). + + Added "fit2" display mode for all FC gamers. It always uses 208x146 for + P800 and 208x208 for all other phones. + + Added volume control for Motorolas (experimental). + + VDP changes: + + Added support for vertical window (used by Vapor Trail, Mercs, GRIND + Stormer and others). + + Added sprite masking (hiding), adds some speed. + + Added preliminary H counter emulation. Comix Zone and Sonic 3D Blast + special stage are now playable. + + Added column based vertical scrolling (Gunstar Heroes battleship level, + Sonic and Knuckles lava boss, etc). + + Emulation changes: + + Re-added and improved Z80 faking when Z80 is disabled. Many games now can + be played without enabling Z80 (Lost Vikings, Syndicate, etc), but some + still need it (International Superstar Soccer Deluxe). + * Improved ym2612 timers, Outrun music plays at correct speed, voices in + Earthworm Jim play better, more games play sound. + * I/O registers now remember their values (needed for Pirates! Gold) + + Added support for 6 button pad. + + Changes in Cyclone 0.0083wip: + + Added missing CHK opcode (used by SeaQuest DSV). + + Added missing TAS opcode (Gargoyles). As in real genesis, write-back phase + is ignored (but is enabled for other systems). + + Backported stuff from Snes9x: + * Fixed Pxxx jog up/down which were not working in game. + + Added an option to gzip save states to save space. + + The emulator now pauses whenever it is loosing focus, so it will now pause + when alarm/ponecall/battery low/... windows come up. + - Removed 'pause on phonecall' feature, as it is no longer needed. + + Video fix for asian A1000s. + +0.70 + * Started using tools from "Symbian GCC Improvement Project", which give + considerable speed increase (~4fps in "center 90" mode). + * Rewrote some drawing routines in ARM assembly (gives ~6 more fps in + "center 90" mode). + * Minor improvement to 0 and 180 "fit" modes. Now they look slightly better + and are faster. + * Minor stability improvements (emulator is less likely to crash). + + Added some background for OSD text for better readability. + + Added Pal/NTSC detection. This is needed for proper sound speed. + + Implemented Reesy's DrZ80 Z80 emu. Made some changes to it with hope to make + it faster. + + Implemented ym2612 emu from the MAME project. Runs well but sometimes sounds + a bit weird. Could be a little faster, so made some changes too. + + Implemented SN76489 emu from the MAME project. + + Added two separate sound output methods (mediaserver and cmaudiofb) with + autodetection (needs testing). + * Fixed VDP DMA fill emulation (as described in Charles MacDonald's docs), + fixes Contra and some other games. + +0.301 + Launcher: + * Launcher now starts emulation process from current directory, + not from hardcoded paths. + * Improved 'pause on call' feature, should hopefully work with Motorola phones. + +0.30 (2006-01-07) + Initial release based on fDave's code. diff --git a/README b/README new file mode 100644 index 00000000..fbcecc13 --- /dev/null +++ b/README @@ -0,0 +1,15 @@ + +This is yet another Megadrive / Genesis / Sega CD / Mega CD / 32X / SMS +emulator, which was written having ARM-based handheld devices in mind +(such as smartphones and handheld consoles like GP2X and Pandora), +but also runs on non-ARM little-endian hardware too. + +The emulator is heavily optimized for ARM, features assembly cores for +68k, Z80 and VDP chip emulation, also has dynamic recompilers for SH2 and +SSP16 (for 32X and SVP emulation). It was started by Dave (aka fdave, +finalburn author) as basic Genesis/Megadrive emulator for Pocket PC, +then taken over and expanded by notaz. + +PicoDrive was the first emulator ever to properly emulate Virtua Racing and +it's SVP chip. + diff --git a/platform/base_readme.txt b/platform/base_readme.txt index d2f7e96c..7a3bd897 100644 --- a/platform/base_readme.txt +++ b/platform/base_readme.txt @@ -3,20 +3,7 @@ PicoDrive 1.xx About ----- - -This is yet another Megadrive / Genesis / Sega CD / Mega CD / 32X / SMS -emulator, which was written having ARM-based handheld devices in mind -(such as smartphones and handheld consoles like GP2X and Pandora). - -The emulator is heavily optimized for ARM, features assembly cores for -68k, Z80 and VDP chip emulation, also has dynamic recompilers for SH2 and -SSP16 (for 32X and SVP emulation). It was started by Dave (aka fdave, -finalburn author) as basic Genesis/Megadrive emulator for Pocket PC, -then taken over and expanded by notaz. - -PicoDrive is the first emulator ever to properly emulate Virtua Racing and -it's SVP chip. - +#include "../README" How to make it run ------------------ @@ -486,601 +473,25 @@ Problems / limitations * The FM sound core doesn't support all features and has some accuracy issues. +Changelog +------- + +#include "../ChangeLog" + + Credits ------- This emulator is made of the code from following people/projects: -notaz -GP2X, UIQ, PSP, Gizmondo ports, CPU core hacks, dynamic recompilers, -lots of additional coding (see changelog). -Homepage: http://notaz.gp2x.de/ - -fDave -one who started it all: -Cyclone 68000 core and PicoDrive itself - -Chui -FAME/C 68k interpreter core -(based on C68K by Stephane Dallongeville) - -Stephane Dallongeville (written), NJ (optimized) -CZ80 Z80 interpreter core - -Reesy & FluBBa -DrZ80, the Z80 interpreter written in ARM assembly. -Homepage: http://reesy.gp32x.de/ (defunct) - -Tatsuyuki Satoh, Jarek Burczynski, MAME development -software implementation of Yamaha FM sound generator - -MAME development -Texas Instruments SN76489 / SN76496 programmable tone/noise generator -Homepage: http://www.mame.net/ - -Eke -CD graphics processor and CD controller implementation (from Genesis Plus GX) -#ifdef PSP - -people @ ps2dev.org forums / PSPSDK crew -libaudiocodec code (by cooleyes) -other sample code -#endif - - -Additional thanks ------------------ - -* Charles MacDonald (http://cgfm2.emuviews.com/) for old but still very useful - info about genesis hardware. -* Steve Snake for all that he has done for Genesis emulation scene. -* Stephane Dallongeville for writing Gens and making it open source. -* Tasco Deluxe for his reverse engineering work on SVP and some mappers. -* Bart Trzynadlowski for his SSFII and 68000 docs. -* Haze for his research (http://haze.mameworld.info). -* Lordus, Exophase and Rokas for various ideas. -* Nemesis for his YM2612 research. -* Eke -* Many posters at spritesmind.net forums for valuable information. -* Mark and Jean-loup for zlib library. -* ketchupgun for the skin. -#ifdef GP2X -* rlyeh and all the other people behind the minimal library. -* Squidge for his famous squidgehack(tm). -* Dzz for his ARM940 sample code. -* A_SN for his gamma code. -* craigix for supplying the GP2X hardware and making this port possible. -* Alex for the icon. -* All the people from gp32x boards for their support. -#endif -#ifdef GIZ -* Kingcdr's for the SDK and Reesy for the DLL and sound code. -* jens.l for supplying the Gizmondo hardware and making this port possible. -#endif -* Inder for some graphics. -* Anyone else I forgot. You know who you are. - - -Changelog ---------- -1.91 (2013-10-) - + Switched to CD controller code from Eke's Genesis Plus GX. - * Fixed overflow issue where cd emulation would break after - ~10 minutes of gameplay - -1.90 (2013-09-24) - + 32X+CD emulation has been implemented. - + CD graphics processor code has been replaced with much cleaner Eke's - implementation from Genesis Plus GX. - + CD PCM code has been completely rewritten. - * Various CD compatibility issues have been solved. Hopefully no more - regressions this time. - * pandora: fixed tv-out (again), added automatic layer switching - * libretro: fixed crackling sound for some games, added some core options - * sdl: multiple joystick support has been fixed (Victor Luchits) - -1.85 (2013-08-31) - * Lots of 32X compatibility and accuracy improvements. All commercial games - are booting now, but some still have issues. - * Fixed some regressions in MegaCD code, like hang in jap BIOS. - * Implemented pause for SMS. - * Updated UI with improvements from PCSX ReARMed. - * Frontend timing has been rewritten, should no longer slowly desync from - LCD on pandora. - * Added libretro and SDL 32/64bit ports, fixed compatibility issues with - Android, iOS. - * Various other things I forgot (it has been a while since last release..) - -1.80 (2010-09-19) - + Added Caanoo support. Now the GP2X binary supports GP2X F100/F200, Wiz - and Caanoo. Lots of internal refactoring to support this. - + Enabled 32X and SMS code. It's still unfinished but better release something - now than wait even more (it has been in development for more then a year now - due to various other projects or simply lack of time). - + Pandora: added hardware scaler support, including ability to resize the - layer and control filtering. - + GP2X: Added basic line-doubling vertical scaling option. - * Changed the way keys are bound, no need to unbind old one any more. - * Handle MP3s with ID3 tags better (some MP3s with ID3 did not play). - * Improved shadow/hilight color levels. - * Fixed broken cheat support. - -1.80beta2 - * Pandora: updated documentation. - -1.80beta1 (2010-06-02) - + Added pandora port. - * Internal refactoring for 32x/SMS support. - * Move mapper database to external file. - + Added preliminary SMS emulation. - + Added emulation of 32x peripherals including VDP. More work is needed here. - + ARM: Added new SH2 recompiler for 32x. Some unification with SVP one. - - Disabled most of the above bacause I'm not yet happy with the results. - -1.56 (2009-09-19) - * Changed sync in Sega CD emulation again. Should fix games that - broke after changes in 1.51a. - * Fixed default keys rebinding when they shouldn't. - * Fixed sram being loaded from wrong game. - * Emu should no longer hang shortly after using fast-forward. - * Fixed save states sometimes no longer showing up in save state menu. - * ARM: some asm code refactoring for slight speed improvement. - -1.55 - + Added Wiz support. Now the same GP2X binary supports F100/F200 and Wiz. - * Changed shadow/hilight handling a bit, fixes some effects in Pirates! Gold. - * Complete input code rewrite. This fixes some limitations like not allowing - to control both players using single input device. It also allows to use - more devices (like keyboards) on Linux based devices. - * Options menu has been reordered, "restore defaults" option added. - -1.51b - * Fixed a crash when uncompressed savestate is loaded. - * Fixed an idle loop detection related hanging problem. - * PSP: fixed another palette related regression. - * UIQ3: updated frontend for the latest emu core. - -1.51a - * Fixed a sync problem between main and sub 68k. Should fix the hanging - problem for some games. - * ARM: fixed a crash when CD savestate is loaded just after loading ROM. - -1.51 - * Improved bin_to_cso_mp3 tool, it should no longer complain about - missing lame.exe even if it's in working dir. - * Fixed a regression from 1.50, which caused slowdowns in Final Fight. - * Fixed some regressions from 1.50 related to sprite limit and palette - handling (caused graphical glitches in some games). - + Added ABC turbo actions to key config. - * Some other minor adjustments. - -1.50 - + Added some basic support for Sega Pico, a MegaDrive-based toy. - + Added proper support for cue/bin images, including cdda playback. - .cue sheets with iso/cso/mp3/wav files listed in them are now - supported too (but 44kHz restriction still applies). - + Added bin_to_cso_mp3 tool, based on Exophase's bin_to_iso_ogg. - The tool can convert .cue/.bin Sega CD images to .cso/.mp3. - * Greatly improved Sega CD load times. - * Changed how scheduling between 68k and z80 is handled. Improves - performance for some games. Credits to Lordus for the idea. - * YM2612 state was not 100% saved, this should be better now. - * Improved renderer performance for shadow/hilight mode. - * Added a hack for YM2612 frequency overflow issue (bleep noises - in Shaq Fu, Spider-Man - The Animated Series (intro music), etc.) - Credits to Nemesis @ spritesmind forum. Works only if sound rate - is set to 44kHz. - + Implemented some sprite rendering improvements, as suggested by - Exophase. Games with lots of sprites now perform better. - + Added better idle loop detection, based on Lordus' idea again. - - "accurate timing" option removed, as disabling it no longer - improves performance. - - "accurate sprites" was removed too, the new sprite code can - properly handle sprite priorities in all cases. - * Timers adjusted again. - * Improved .smd detection code. - * ARM: fixed a bug in DrZ80 core, which could cause problems in - some rare cases. - * ARM: fixed a problem of occasional clicks on MP3 music start. - * Minor general optimizations and menu improvements. - * Fixed a bug in Sega CD savestate loader, where the game would - sometimes crash after load. - * Fixed a crash of games using eeprom (introduced in 1.40b). - * PSP: fixed suspend/resume (hopefully for real). - -1.40c - * Fixed a problem with sound in Marble Madness. - * GP2X: Fixed minor problem with key config. - -1.40b - * Fixed sprite masking code. Thanks to Lordus for explaining how it works. - + Added "disable sprite limit" option. - + PSP: added black level adjustment to display options. - * Changed reset to act as 'soft' reset. - + Added detection for Puggsy (it doesn't really have sram). - * Some small timing adjustments. - -1.40a - * GP2X: Fixed a binding problem with up and down keys. - * Default game config no longer overrides global user config. - -1.40 - + Added support for SVP (Sega Virtua Processor) to emulate Virtua Racing, - wrote ARM recompiler and some HLE code for VR. Credits to Exophase and - Rokas for various ideas. - * Changed config file format, files are now human-readable. Game specific - configs are now held in single file (but old game config files are still - read when new one is missing). - * Fixed a bug where some key combos didn't work as expected. - * Fixed a regression in renderer (ARM ports only, some graphic glitches in - rare cases). - * Adjusted fast renderer to work with more games, including VR. - * Fixed a problem where SegaCD RAM cart data was getting lost on reset. - * GP2X: Greatly reduced SegaCD FMV game slowdowns by disabling read-ahead - in the Linux kernel and C library (thanks to Rokas and Exophase for ideas - again). Be sure to keep "ReadAhead buffer" OFF to avoid slowdowns. - + PicoDrive now comes with a game config file for some games which need - special settings, so they should now work out-of-the-box. More games will - be added with later updates. - + GP2X: Files now can be deleted by pressing A+SELECT in the file browser. - -1.35b - * PSP: mp3 code should no longer fail on 1.5 firmware. - + PSP: added gamma adjustment option. - + Added .cso ISO format support. Useful for non-FMV games. - * It is now possile to force a region after the ROM is loaded. - * Fixed a sram bug in memhandlers (fixes Shining in the Darkness saves). - * PSP: fixed another bug in memhanlers, which crashed the emu for some games - (like NBA Jam and NHL 9x). - + PSP: added suspend/resume handling for Sega CD games. - + GP2X: added additional low volume levels for my late-night gaming sessions - (in stereo mode only). - + GP2X: added "fast forward" action in key config. Not recommended to use for - Sega CD, may case problems there. - * Some other small tweaks I forgot about. - -1.35a - * PSP: fixed a bug which prevented to load any ROMs after testing the BIOS. - * PSP: fixed incorrect CZ80 memory map setup, which caused Z80 crashes and - graphics corruption in EU Mega CD model1 BIOS menus. - + PSP: added additional "set to 4:3 scaled" display option for convenience. - + PSP: Added an option to disable frame limitter (works only with non-auto frameskip). - -1.35 - + PSP port added. Lots of new code for it. Integrated modified FAME/C, CZ80 cores. - + Some minor generic optimizations. - * Patched some code which was crashing under PSP, but was working in GP2X/Giz - (although it should have crashed there too). - * Readme updated. - -1.34 - + Gizmondo port added. - + Some new optimizations in memory handlers, and for shadow/hilight mode. - + Added some hacks to make more games work without enabling "accurate timing". - * Adjusted timing for "accurate timing" mode and added preliminary VDP FIFO - emulation. Fixes Double Dragon 2, tearing in Chaos Engine and some other games. - * Fixed a few games not having sound at startup. - * Updated serial EEPROM code to support more games. Thanks to EkeEke for - providing info about additional EEPROM types and game mappers. - * The above change fixed hang of NBA Jam. - * Minor adjustments to control configurator. - -1.33 - * Updated Cyclone core to 0.0088. - + Added A r k's usbjoy fix. - + Added "perfect vsync" option, which adjusts GP2X LCD refresh rate and syncs - emulation to it to eliminate tearing and ensure smoothest scrolling possible. - + Added an option to use A_SN's gamma curve for gamma correction (improves dark - and bright color display for mk2s). - * Sometimes stray sounds were played after loading a savestate. Fixed. - * Fixed a problem where >6MB mp3s were corrupted in memory (sound glitches in - Snatcher). - * PD no longer overwrites video player code in memory, video player now can be - used after exiting PicoDrive. - * Fixed a bug which was causing Sonic 3 code to deadlock in some rare conditions - if "accurate timing" was not enabled. - * Fixed support for large hacked ROMs like "Ultimate Mortal Kombat Trilogy". - Upto 10MB hacked ROMs are supported now. - + Config profiles added (press left/right when saving config). - * Changed key configuration behavior to the one from gpfce (should be more - intuitive). - + Added some skinning capabilities to the menu system with default skin by - ketchupgun. Delete skin directory if you want old behaviour. - * Some other little tweaks I forgot about. - -1.32 - + Added some new scaling options. - + Added ability to reload CD images while game is running (needed for games - with multiple CDs, like Night Trap). - + Added RAM cart emulation. - * Fixed DMA timing emulation (caused lock-ups for some genesis games). - * Idle loop detection was picking up wrong code and causing glitches, fixed. - * The ym2612 code on 940 now can handle multiple updates per frame - (fixes Thunger Force III "seiren" level drums for example). - * Memory handlers were ignoring some writes to PSG chip, fixed (missing sounds in - Popful Mail, Silpheed). - * Improved z80 timing, should fix some sound problems. - * Fixed a bug with sram register (fixes Phantasy Star 4). - * ROM loader was incorrectly identifying some ROMs as invalid. Fixed. - * Added code for PRG ram write protection register (Dungeon Explorer). - * The memory mode register change in 1.31 was unsafe and caused some glitches in - AH-3 Thunderstrike. Fixed. - * Fixed a file descriptor leak. - * Updated documentation, added Gmenu2x manual. - -1.31 - * Changed the way memory mode register is read (fixes Lunar 2, broken in 1.30). - * Fixed TAS opcode on sub-68k side (fixes Batman games). - * File browser now filters out mp3s, saves and some other files, which are not ROMS. - -1.30 - + ISO files now can be zipped. Note that this causes VERY long loading times. - + Added data pre-buffering support, this allows to reduce frequency of short pauses - in FMV games (caused by SD access), but makes those pauses longer. - * Fixed PCM DMA transfers (intro FMV in Popful Mail). - + Properly implemented "decode" data transformation (Jaguar XJ220). - * Integrated "better sync" code into cyclone code, what made this mode much faster. - * Fixed a bug related to game specific config saving. - * Frameskipper was skipping sound processing, what caused some audio desyncs. Fixed. - * Fixed reset not working for some games. - + New assembly optimized memory handlers for CD (gives at least a few fps). - Also re-enabled all optimizations from 0.964 release. - + New idle-loop detection code for sub-68k. Speeds up at least a few games. - -1.201 - + Added basic cheat support (GameGenie and Genecyst patches). - -1.20 - * Fixed a long-standing problem in audio mixing code which caused slight distortions - at lower sample rates. - * Changed the way 920 and 940 communicates (again), should be more reliable and give - slight performance increase. - * Some optimizations in audio mixing code. - * Some menu changes (background added, smaller font in ROM browser, savestate loader - now can select slots). - + 1M mode DMA transfers implemented (used by FMV games like Night Trap and Sewer Shark). - + Games now can run code from WORD RAM in 1M mode (fixes Adventures of Willy Beamish). - + "Cell arrange" address mapping is now emulated (Heart of the alien). - + "Color numeric operation" is now emulated (text in Lunar 2, Silpheed intro graphics). - + "Better sync" option added (prevents some games from hanging). - -1.14 - + Region autodetection now can be customized. - * When CDDA music tracks changed, old buffer contents were incorrectly played. Fixed. - * BRAM is now automatically formatted (no need to enter BIOS menu and format any more). - * Games now can be reset, CDDA music no longer breaks after loading another ISO. - * Fixed a race condition between 920 and 940 which sometimes caused CDDA music not to play. - + Savestates implemented for Sega/Mega CD. - + PCM sound added. - * Some mixer code rewritten in asm. 22kHz and 11kHz sound rates are now supported in - Mega CD mode (but mp3s must still be 44kHz stereo). - + Timer emulation added. - * CDC DMA tansfers fixed. Snatcher and probably some more games now boot. - * 2M word RAM -> VDP transfers fixed, no more corruption in Ecco and some other games. - -1.10 - + GP2X: Added experimental Sega CD support. - + GP2X: Added partial gmv movie playback support. - -0.964 (2006-12-03) - * GP2X: Fixed a sound buffer underflow issue on lower sample rate modes, which was - happening for NTSC games and causing sound clicks. - * GP2X: Redone key config to better support USB joysticks (now multiple joysticks - should be useable and configurable). - + GP2X: Added save confirmation option. - + GP2X: Added 940 CPU crash detection. - + ALL: UIQ3 port added. - -0.963 - * GP2X: Gamma-reset-on-entering-menu bug fixed. - * GP2X: Recompiled PicoDrive with gcc profiling option set as described here: - http://www.gp32x.com/board/index.php?showtopic=28490 - -0.962 - * GP2X: Fixed an issue with incorrect sounds in some games when dualcore operation - was enabled (for example punch sound in SOR). - * GP2X: Limited max volume to 90, because higher values often cause distortions. - * GP2X: Fixed a bug with lower res scaling. - * GP2X: Gamma is now reset on exit. - -0.96 - * ALL: Severely optimized MAME's YM2612 core, part of it is now rewritten in asm. - + GP2X: The YM2612's code now can be run in GP2X's ARM940T CPU, what causes large - performance increase. - * ALL: Accurate renderers are slightly faster now. - + GP2X: Using quadruple buffering instead of doublebuffer now, also updated - framelimitter, this should eliminate some scrolling and tearing problems. - * GP2X: Fixed some flickering issues of 8bit accurate renderer. - + GP2X: craigix's RAM timings now can be enabled in the menu (see advanced options). - + GP2X: Added ability to save config for specific games only. - + GP2X: Gamma control added (using GP2X's hardware capabilities for this). - * GP2X: Volume keys are now configurable. - + GP2X: GnoStiC added USB joystick support, I made it possible to use it for - player 2 control (currently untested). - * GP2X: squidgehack is now applied through kernel module (cleaner way). - -0.95 - * ALL: Fixed a bug in sprite renderer which was causing slowdowns for some games. - + GP2X: Added command line support - + GP2X: Added optional hardware scaling for lower-res games like Shining Force. - * ALL: Sound chips are now sampled 2 times per frame. This fixed some games which - had missing sounds (Vectorman 2 1st level, Thunder Force 3 water level, - etc.). - + ALL: Added another accurate 8-bit renderer which is slightly faster and made it - default. - -0.945 - + GP2X: Added frame limiter for frameskipped modes. - * GP2X: Increased brightness a bit (unused pixel bits now also contain data). - * GP2X: Suidgehack was not applied correctly (was applied before allocating some - high memory and had no effect). - -0.94 - + Added GP2X port. - * Improved interrupt timing, Mazin Saga and Burning Force now works. - * Rewritten renderer code to better suit GP2X, should be faster on other - ports too. - + Added support for banking used by 12-in-1 and 4-in-1 ROMs (thanks Haze). - + Added some protection device faking, used by some unlicensed games like - Super Bubble Bobble, King of Fighters, Elf Wor, ... (thanks to Haze again) - + Added primitive Virtua Racing SVP faking, so menus can be seen now. - -0.93 - * Fixed a problem with P900/P910 key configuration in FC mode. - * Improved shadow/hilight mode emulation. Still not perfect, but should be - enough for most games. - + Save state slots added. - + Region selector added. - -0.92 - VDP changes: - * VDP emulation is now more accurate (fixes flickering in Chase HQ II, - Super Hang-On and some other problems in other games). - * HV counter emulation is now much more accurate. Fixes the Asterix games, - line in Road Rash 3, etc. - * Minor sprite and layer scroll masking bugs fixed. - + Added partial interlace mode renderer (Sonic 2 vs mode) - * Fixed a crash in both renderers when certain size window layers were used. - + Added emulation of shadow/hilight operator sprites. Other shadow/hilight - effects are still unemulated. - + Sprite emulation is more accurate, sprite limit is emulated. - + Added "accurate sprites" option, which always draws sprites in correct - order and emulates sprite collision bit, but is significantly slower. - - Emulation changes: - * Improved interrupt handling, added deferred interrupt emulation - (Lemmings, etc). - + Added serial EEPROM SRAM support (Wonder Boy in Monster World, - Megaman - The Wily Wars and many EA sports games like NBA Jam). - + Implemented ROM banking for Super Street Fighter II - The New Challengers - * Updated to the latest version of DrZ80 core, integrated memory handlers - in it for better performance. A noticeable performance increase, but save - states may not work from the previous version (you can only use them with - sound disabled in that case). - + SRAM word read handler was using incorrect byte order, fixed. - - Changes in Cyclone 0.0086: - + Added missing CHK opcode handler (used by SeaQuest DSV). - + Added missing TAS opcode handler (Gargoyles,Bubba N Stix,...). As in real genesis, - memory write-back phase is ignored (but can be enabled in config.h if needed). - + Added missing NBCD and TRAPV opcode handlers. - + Added missing addressing mode for CMP/EOR. - + Added some minor optimizations. - - Removed 216 handlers for 2927 opcodes which were generated for invalid addressing modes. - + Fixed flags for ASL, NEG, NEGX, DIVU, ADDX, SUBX, ROXR. - + Bugs fixed in MOVEP, LINK, ADDQ, DIVS handlers. - * Undocumented flags for CHK, ABCD, SBCD and NBCD are now emulated the same way as in Musashi. - + Added Uninitialized Interrupt emulation. - + Altered timing for about half of opcodes to match Musashi's. - -0.80 - * Nearly all VDP code was rewritten in ARM asm. Gives ~10-25% performance - increase (depends on game). - * Optimized 32-column renderer not to render tiles offscreen, games which - use 32-column display (like Shining Force) run ~50% faster. - + Added new "Alternative renderer", which gives another ~30-45% performance - increase (in addition to mentioned above), but works only with some games, - because it is missing some features (it uses tile-based rendering - instead of default line-based and disables H-ints). - + Added "fit2" display mode for all FC gamers. It always uses 208x146 for - P800 and 208x208 for all other phones. - + Added volume control for Motorolas (experimental). - - VDP changes: - + Added support for vertical window (used by Vapor Trail, Mercs, GRIND - Stormer and others). - + Added sprite masking (hiding), adds some speed. - + Added preliminary H counter emulation. Comix Zone and Sonic 3D Blast - special stage are now playable. - + Added column based vertical scrolling (Gunstar Heroes battleship level, - Sonic and Knuckles lava boss, etc). - - Emulation changes: - + Re-added and improved Z80 faking when Z80 is disabled. Many games now can - be played without enabling Z80 (Lost Vikings, Syndicate, etc), but some - still need it (International Superstar Soccer Deluxe). - * Improved ym2612 timers, Outrun music plays at correct speed, voices in - Earthworm Jim play better, more games play sound. - * I/O registers now remember their values (needed for Pirates! Gold) - + Added support for 6 button pad. - - Changes in Cyclone 0.0083wip: - + Added missing CHK opcode (used by SeaQuest DSV). - + Added missing TAS opcode (Gargoyles). As in real genesis, write-back phase - is ignored (but is enabled for other systems). - - Backported stuff from Snes9x: - * Fixed Pxxx jog up/down which were not working in game. - + Added an option to gzip save states to save space. - + The emulator now pauses whenever it is loosing focus, so it will now pause - when alarm/ponecall/battery low/... windows come up. - - Removed 'pause on phonecall' feature, as it is no longer needed. - + Video fix for asian A1000s. - -0.70 - * Started using tools from "Symbian GCC Improvement Project", which give - considerable speed increase (~4fps in "center 90" mode). - * Rewrote some drawing routines in ARM assembly (gives ~6 more fps in - "center 90" mode). - * Minor improvement to 0 and 180 "fit" modes. Now they look slightly better - and are faster. - * Minor stability improvements (emulator is less likely to crash). - + Added some background for OSD text for better readability. - + Added Pal/NTSC detection. This is needed for proper sound speed. - + Implemented Reesy's DrZ80 Z80 emu. Made some changes to it with hope to make - it faster. - + Implemented ym2612 emu from the MAME project. Runs well but sometimes sounds - a bit weird. Could be a little faster, so made some changes too. - + Implemented SN76489 emu from the MAME project. - + Added two separate sound output methods (mediaserver and cmaudiofb) with - autodetection (needs testing). - * Fixed VDP DMA fill emulation (as described in Charles MacDonald's docs), - fixes Contra and some other games. - -0.301 - Launcher: - * Launcher now starts emulation process from current directory, - not from hardcoded paths. - * Improved 'pause on call' feature, should hopefully work with Motorola phones. - -0.30 (2006-01-07) - Initial release. +#include "../AUTHORS" License ------- This program and it's code is released under the terms of MAME license: - - Redistribution and use of this code or any derivative works are permitted - provided that the following conditions are met: - - * Redistributions may not be sold, nor may they be used in a commercial - product or activity. - - * Redistributions that are modified from the original source must include the - complete source code, including the source code for all components used by a - binary built from the modified sources. However, as a special exception, the - source code distributed need not include anything that is normally distributed - (in either source or binary form) with the major components (compiler, kernel, - and so on) of the operating system on which the executable runs, unless that - component itself accompanies the executable. - - * Redistributions must reproduce the above copyright notice, this list of - conditions and the following disclaimer in the documentation and/or other - materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. +#include "../COPYING" SEGA/Genesis/MegaDrive/SEGA-CD/Mega-CD/32X are trademarks of Sega Enterprises Ltd. diff --git a/tools/textfilter.c b/tools/textfilter.c index 90354c27..c7a088b6 100644 --- a/tools/textfilter.c +++ b/tools/textfilter.c @@ -22,7 +22,6 @@ static int check_defines(const char **defs, int defcount, char *tdef) return 0; } - static void do_counters(char *str) { static int counter_id = -1, counter; @@ -42,12 +41,28 @@ static void do_counters(char *str) } } +static int my_fputs(char *s, FILE *stream) +{ + char *p; + + for (p = s + strlen(s) - 1; p >= s; p--) + if (!isspace(*p)) + break; + p++; + + /* use DOS endings for better viewer compatibility */ + memcpy(p, "\r\n", 3); + + return fputs(s, stream); +} int main(int argc, char *argv[]) { + char path[256], path_file[256]; char buff[1024]; FILE *fi, *fo; int skip_mode = 0, ifdef_level = 0, skip_level = 0, line = 0; + char *p; if (argc < 3) { @@ -62,13 +77,21 @@ int main(int argc, char *argv[]) return 2; } - fo = fopen(argv[2], "w"); + fo = fopen(argv[2], "wb"); if (fo == NULL) { printf("failed to open: %s\n", argv[2]); return 3; } + snprintf(path, sizeof(path), "%s", argv[1]); + for (p = path + strlen(path) - 1; p > path; p--) { + if (*p == '/' || *p == '\\') { + p[1] = 0; + break; + } + } + for (++line; !feof(fi); line++) { char *fgs; @@ -112,12 +135,16 @@ int main(int argc, char *argv[]) { char *pe, *p = buff + 9; FILE *ftmp; - if (skip_mode) continue; - while (*p && (*p == ' ' || *p == '\"')) p++; - for (pe = p + strlen(p) - 1; pe > p; pe--) + if (skip_mode) + continue; + while (*p && (*p == ' ' || *p == '\"')) + p++; + for (pe = p + strlen(p) - 1; pe > p; pe--) { if (isspace(*pe) || *pe == '\"') *pe = 0; else break; - ftmp = fopen(p, "r"); + } + snprintf(path_file, sizeof(path_file), "%s%s", path, p); + ftmp = fopen(path_file, "r"); if (ftmp == NULL) { printf("%i: error: failed to include \"%s\"\n", line, p); return 1; @@ -125,8 +152,9 @@ int main(int argc, char *argv[]) while (!feof(ftmp)) { fgs = fgets(buff, sizeof(buff), ftmp); - if (fgs == NULL) break; - fputs(buff, fo); + if (fgs == NULL) + break; + my_fputs(buff, fo); } fclose(ftmp); continue; @@ -138,7 +166,7 @@ int main(int argc, char *argv[]) if (!skip_mode) { do_counters(buff); - fputs(buff, fo); + my_fputs(buff, fo); } } From 9770f5316fdc015d989b1ef2811f0b07e9e8e9a7 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 11 Oct 2013 01:05:28 +0300 Subject: [PATCH 0019/1110] update gp2x for input changes --- pico/cd/gfx.c | 2 +- platform/gp2x/plat.c | 4 ++-- platform/libpicofe | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pico/cd/gfx.c b/pico/cd/gfx.c index 948402bd..a2c97be0 100644 --- a/pico/cd/gfx.c +++ b/pico/cd/gfx.c @@ -316,7 +316,7 @@ void gfx_start(unsigned int base) /* make sure 2M mode is enabled */ if (!(Pico_mcd->s68k_regs[3] & 0x04)) { - uint32 mask; + uint32 mask = 0; uint32 reg; /* trace vector pointer */ diff --git a/platform/gp2x/plat.c b/platform/gp2x/plat.c index e7f8f730..4d35cbd8 100644 --- a/platform/gp2x/plat.c +++ b/platform/gp2x/plat.c @@ -95,8 +95,8 @@ static const struct menu_keymap key_pbtn_map[] = { KEY_RIGHTBRACE, PBTN_R }, }; -static const struct in_evdev_pdata gp2x_evdev_pdata = { - .defbinds = in_gp2x_defbinds, +static const struct in_pdata gp2x_evdev_pdata = { + .defbinds = in_evdev_defbinds, .key_map = key_pbtn_map, .kmap_size = sizeof(key_pbtn_map) / sizeof(key_pbtn_map[0]), }; diff --git a/platform/libpicofe b/platform/libpicofe index d685ce46..8b4363e3 160000 --- a/platform/libpicofe +++ b/platform/libpicofe @@ -1 +1 @@ -Subproject commit d685ce4625e9f3b25b0852d31960cb429da06a9d +Subproject commit 8b4363e302e6bbcf41321ec70f8c033efed5840d From f47d0a28983cc999d2cc9d050b4badcd41652c27 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 12 Oct 2013 00:40:40 +0300 Subject: [PATCH 0020/1110] remove regs union due to compiler issues GP2X toolchains are padding the unions no matter what :( --- pico/cd/cdc.c | 184 ++++++++++++++++++++------------------- pico/cd/cdd.c | 92 ++++++++++---------- pico/cd/genplus_macros.h | 16 ---- pico/cd/memory.c | 2 +- pico/pico_int.h | 10 +-- 5 files changed, 144 insertions(+), 160 deletions(-) diff --git a/pico/cd/cdc.c b/pico/cd/cdc.c index 8b47b3dd..bf688f57 100644 --- a/pico/cd/cdc.c +++ b/pico/cd/cdc.c @@ -80,10 +80,10 @@ typedef struct { uint8 ifstat; uint8 ifctrl; - reg16_t dbc; - reg16_t dac; - reg16_t pt; - reg16_t wa; + uint16 dbc; + uint16 dac; + uint16 pt; + uint16 wa; uint8 ctrl[2]; uint8 head[2][4]; uint8 stat[4]; @@ -103,7 +103,7 @@ void cdc_init(void) void cdc_reset(void) { /* reset CDC register index */ - Pico_mcd->regs[0x04>>1].byte.l = 0x00; + Pico_mcd->s68k_regs[0x04+1] = 0x00; /* reset CDC registers */ cdc.ifstat = 0xff; @@ -216,7 +216,7 @@ int cdc_context_load_old(uint8 *state) old_load(stat, 67916); cdc.dma_w = 0; - switch (Pico_mcd->regs[0x04>>1].byte.h & 0x07) + switch (Pico_mcd->s68k_regs[0x04+0] & 0x07) { case 4: /* PCM RAM DMA */ cdc.dma_w = pcm_ram_dma_w; @@ -225,16 +225,16 @@ int cdc_context_load_old(uint8 *state) cdc.dma_w = prg_ram_dma_w; break; case 7: /* WORD-RAM DMA */ - if (Pico_mcd->regs[0x02 >> 1].byte.l & 0x04) + if (Pico_mcd->s68k_regs[0x02+1] & 0x04) { - if (Pico_mcd->regs[0x02 >> 1].byte.l & 0x01) + if (Pico_mcd->s68k_regs[0x02+1] & 0x01) cdc.dma_w = word_ram_0_dma_w; else cdc.dma_w = word_ram_1_dma_w; } else { - if (Pico_mcd->regs[0x02 >> 1].byte.l & 0x02) + if (Pico_mcd->s68k_regs[0x02+1] & 0x02) cdc.dma_w = word_ram_2M_dma_w; } break; @@ -247,7 +247,7 @@ int cdc_context_load_old(uint8 *state) static void do_dma(enum dma_type type, int words_in) { int dma_addr = (Pico_mcd->s68k_regs[0x0a] << 8) | Pico_mcd->s68k_regs[0x0b]; - int src_addr = cdc.dac.w & 0x3ffe; + int src_addr = cdc.dac & 0x3ffe; int dst_addr = dma_addr; int words = words_in; int dst_limit = 0; @@ -255,7 +255,7 @@ static void do_dma(enum dma_type type, int words_in) int len; elprintf(EL_CD, "dma %d %04x->%04x %x", - type, cdc.dac.w, dst_addr, words_in); + type, cdc.dac, dst_addr, words_in); switch (type) { @@ -331,7 +331,7 @@ static void do_dma(enum dma_type type, int words_in) update_dma: /* update DMA addresses */ - cdc.dac.w += words_in * 2; + cdc.dac += words_in * 2; if (type == pcm_ram_dma_w) dma_addr += words_in >> 1; else @@ -344,14 +344,14 @@ update_dma: void cdc_dma_update(void) { /* end of DMA transfer ? */ - //if (cdc.dbc.w < DMA_BYTES_PER_LINE) + //if (cdc.dbc < DMA_BYTES_PER_LINE) { /* transfer remaining words using 16-bit DMA */ - //cdc.dma_w((cdc.dbc.w + 1) >> 1); - do_dma(cdc.dma_w, (cdc.dbc.w + 1) >> 1); + //cdc.dma_w((cdc.dbc + 1) >> 1); + do_dma(cdc.dma_w, (cdc.dbc + 1) >> 1); /* reset data byte counter (DBCH bits 4-7 should be set to 1) */ - cdc.dbc.w = 0xf000; + cdc.dbc = 0xf000; /* clear !DTEN and !DTBSY */ cdc.ifstat |= (BIT_DTBSY | BIT_DTEN); @@ -363,7 +363,7 @@ void cdc_dma_update(void) if (cdc.ifctrl & BIT_DTEIEN) { /* level 5 interrupt enabled ? */ - if (Pico_mcd->regs[0x32>>1].byte.l & PCDS_IEN5) + if (Pico_mcd->s68k_regs[0x32+1] & PCDS_IEN5) { /* update IRQ level */ elprintf(EL_INTS, "cdc DTE irq 5"); @@ -372,7 +372,7 @@ void cdc_dma_update(void) } /* clear DSR bit & set EDT bit (SCD register $04) */ - Pico_mcd->regs[0x04>>1].byte.h = (Pico_mcd->regs[0x04>>1].byte.h & 0x07) | 0x80; + Pico_mcd->s68k_regs[0x04+0] = (Pico_mcd->s68k_regs[0x04+0] & 0x07) | 0x80; /* disable DMA transfer */ cdc.dma_w = 0; @@ -384,7 +384,7 @@ void cdc_dma_update(void) cdc.dma_w(DMA_BYTES_PER_LINE >> 1); /* decrement data byte counter */ - cdc.dbc.w -= length; + cdc.dbc -= length; } #endif } @@ -407,7 +407,7 @@ int cdc_decoder_update(uint8 header[4]) if (cdc.ifctrl & BIT_DECIEN) { /* level 5 interrupt enabled ? */ - if (Pico_mcd->regs[0x32>>1].byte.l & PCDS_IEN5) + if (Pico_mcd->s68k_regs[0x32+1] & PCDS_IEN5) { /* update IRQ level */ elprintf(EL_INTS, "cdc DEC irq 5"); @@ -421,13 +421,13 @@ int cdc_decoder_update(uint8 header[4]) uint16 offset; /* increment block pointer */ - cdc.pt.w += 2352; + cdc.pt += 2352; /* increment write address */ - cdc.wa.w += 2352; + cdc.wa += 2352; /* CDC buffer address */ - offset = cdc.pt.w & 0x3fff; + offset = cdc.pt & 0x3fff; /* write CDD block header (4 bytes) */ memcpy(cdc.ram + offset, header, 4); @@ -454,9 +454,9 @@ int cdc_decoder_update(uint8 header[4]) void cdc_reg_w(unsigned char data) { #ifdef LOG_CDC - elprintf(EL_STATUS, "CDC register %X write 0x%04x", Pico_mcd->regs[0x04>>1].byte.l & 0x0F, data); + elprintf(EL_STATUS, "CDC register %X write 0x%04x", Pico_mcd->s68k_regs[0x04+1] & 0x0F, data); #endif - switch (Pico_mcd->regs[0x04>>1].byte.l & 0x0F) + switch (Pico_mcd->s68k_regs[0x04+1] & 0x0F) { case 0x01: /* IFCTRL */ { @@ -465,7 +465,7 @@ void cdc_reg_w(unsigned char data) ((data & BIT_DECIEN) && !(cdc.ifstat & BIT_DECI))) { /* level 5 interrupt enabled ? */ - if (Pico_mcd->regs[0x32>>1].byte.l & PCDS_IEN5) + if (Pico_mcd->s68k_regs[0x32+1] & PCDS_IEN5) { /* update IRQ level */ elprintf(EL_INTS, "cdc pending irq 5"); @@ -486,28 +486,32 @@ void cdc_reg_w(unsigned char data) } cdc.ifctrl = data; - Pico_mcd->regs[0x04>>1].byte.l = 0x02; + Pico_mcd->s68k_regs[0x04+1] = 0x02; break; } case 0x02: /* DBCL */ - cdc.dbc.byte.l = data; - Pico_mcd->regs[0x04>>1].byte.l = 0x03; + cdc.dbc &= 0xff00; + cdc.dbc |= data; + Pico_mcd->s68k_regs[0x04+1] = 0x03; break; case 0x03: /* DBCH */ - cdc.dbc.byte.h = data; - Pico_mcd->regs[0x04>>1].byte.l = 0x04; + cdc.dbc &= 0x00ff; + cdc.dbc |= data << 8; + Pico_mcd->s68k_regs[0x04+1] = 0x04; break; case 0x04: /* DACL */ - cdc.dac.byte.l = data; - Pico_mcd->regs[0x04>>1].byte.l = 0x05; + cdc.dac &= 0xff00; + cdc.dac |= data; + Pico_mcd->s68k_regs[0x04+1] = 0x05; break; case 0x05: /* DACH */ - cdc.dac.byte.h = data; - Pico_mcd->regs[0x04>>1].byte.l = 0x06; + cdc.dac &= 0x00ff; + cdc.dac |= data << 8; + Pico_mcd->s68k_regs[0x04+1] = 0x06; break; case 0x06: /* DTRG */ @@ -519,15 +523,15 @@ void cdc_reg_w(unsigned char data) cdc.ifstat &= ~BIT_DTBSY; /* clear DBCH bits 4-7 */ - cdc.dbc.byte.h &= 0x0f; + cdc.dbc &= 0x0fff; /* clear EDT & DSR bits (SCD register $04) */ - Pico_mcd->regs[0x04>>1].byte.h &= 0x07; + Pico_mcd->s68k_regs[0x04+0] &= 0x07; cdc.dma_w = 0; /* setup data transfer destination */ - switch (Pico_mcd->regs[0x04>>1].byte.h & 0x07) + switch (Pico_mcd->s68k_regs[0x04+0] & 0x07) { case 2: /* MAIN-CPU host read */ case 3: /* SUB-CPU host read */ @@ -536,7 +540,7 @@ void cdc_reg_w(unsigned char data) cdc.ifstat &= ~BIT_DTEN; /* set DSR bit (register $04) */ - Pico_mcd->regs[0x04>>1].byte.h |= 0x40; + Pico_mcd->s68k_regs[0x04+0] |= 0x40; break; } @@ -555,10 +559,10 @@ void cdc_reg_w(unsigned char data) case 7: /* WORD-RAM DMA */ { /* check memory mode */ - if (Pico_mcd->regs[0x02 >> 1].byte.l & 0x04) + if (Pico_mcd->s68k_regs[0x02+1] & 0x04) { /* 1M mode */ - if (Pico_mcd->regs[0x02 >> 1].byte.l & 0x01) + if (Pico_mcd->s68k_regs[0x02+1] & 0x01) { /* Word-RAM bank 0 is assigned to SUB-CPU */ cdc.dma_w = word_ram_0_dma_w; @@ -572,7 +576,7 @@ void cdc_reg_w(unsigned char data) else { /* 2M mode */ - if (Pico_mcd->regs[0x02 >> 1].byte.l & 0x02) + if (Pico_mcd->s68k_regs[0x02+1] & 0x02) { /* only process DMA if Word-RAM is assigned to SUB-CPU */ cdc.dma_w = word_ram_2M_dma_w; @@ -584,16 +588,16 @@ void cdc_reg_w(unsigned char data) default: /* invalid */ { elprintf(EL_ANOMALY, "invalid CDC tranfer destination (%d)", - Pico_mcd->regs[0x04>>1].byte.h & 0x07); + Pico_mcd->s68k_regs[0x04+0] & 0x07); break; } } if (cdc.dma_w) - pcd_event_schedule_s68k(PCD_EVENT_DMA, cdc.dbc.w / 2); + pcd_event_schedule_s68k(PCD_EVENT_DMA, cdc.dbc / 2); } - Pico_mcd->regs[0x04>>1].byte.l = 0x07; + Pico_mcd->s68k_regs[0x04+1] = 0x07; break; } @@ -603,7 +607,7 @@ void cdc_reg_w(unsigned char data) cdc.ifstat |= BIT_DTEI; /* clear DBCH bits 4-7 */ - cdc.dbc.byte.h &= 0x0f; + cdc.dbc &= 0x0fff; #if 0 /* no pending decoder interrupt ? */ @@ -613,18 +617,20 @@ void cdc_reg_w(unsigned char data) SekInterruptClearS68k(5); } #endif - Pico_mcd->regs[0x04>>1].byte.l = 0x08; + Pico_mcd->s68k_regs[0x04+1] = 0x08; break; } case 0x08: /* WAL */ - cdc.wa.byte.l = data; - Pico_mcd->regs[0x04>>1].byte.l = 0x09; + cdc.wa &= 0xff00; + cdc.wa |= data; + Pico_mcd->s68k_regs[0x04+1] = 0x09; break; case 0x09: /* WAH */ - cdc.wa.byte.h = data; - Pico_mcd->regs[0x04>>1].byte.l = 0x0a; + cdc.wa &= 0x00ff; + cdc.wa |= data << 8; + Pico_mcd->s68k_regs[0x04+1] = 0x0a; break; case 0x0a: /* CTRL0 */ @@ -645,7 +651,7 @@ void cdc_reg_w(unsigned char data) } cdc.ctrl[0] = data; - Pico_mcd->regs[0x04>>1].byte.l = 0x0b; + Pico_mcd->s68k_regs[0x04+1] = 0x0b; break; } @@ -664,22 +670,24 @@ void cdc_reg_w(unsigned char data) } cdc.ctrl[1] = data; - Pico_mcd->regs[0x04>>1].byte.l = 0x0c; + Pico_mcd->s68k_regs[0x04+1] = 0x0c; break; } case 0x0c: /* PTL */ - cdc.pt.byte.l = data; - Pico_mcd->regs[0x04>>1].byte.l = 0x0d; + cdc.pt &= 0xff00; + cdc.pt |= data; + Pico_mcd->s68k_regs[0x04+1] = 0x0d; break; case 0x0d: /* PTH */ - cdc.pt.byte.h = data; - Pico_mcd->regs[0x04>>1].byte.l = 0x0e; + cdc.pt &= 0x00ff; + cdc.pt |= data << 8; + Pico_mcd->s68k_regs[0x04+1] = 0x0e; break; case 0x0e: /* CTRL2 (unused) */ - Pico_mcd->regs[0x04>>1].byte.l = 0x0f; + Pico_mcd->s68k_regs[0x04+1] = 0x0f; break; case 0x0f: /* RESET */ @@ -693,62 +701,62 @@ void cdc_reg_w(unsigned char data) unsigned char cdc_reg_r(void) { - switch (Pico_mcd->regs[0x04>>1].byte.l & 0x0F) + switch (Pico_mcd->s68k_regs[0x04+1] & 0x0F) { case 0x01: /* IFSTAT */ - Pico_mcd->regs[0x04>>1].byte.l = 0x02; + Pico_mcd->s68k_regs[0x04+1] = 0x02; return cdc.ifstat; case 0x02: /* DBCL */ - Pico_mcd->regs[0x04>>1].byte.l = 0x03; - return cdc.dbc.byte.l; + Pico_mcd->s68k_regs[0x04+1] = 0x03; + return cdc.dbc & 0xff; case 0x03: /* DBCH */ - Pico_mcd->regs[0x04>>1].byte.l = 0x04; - return cdc.dbc.byte.h; + Pico_mcd->s68k_regs[0x04+1] = 0x04; + return (cdc.dbc >> 8) & 0xff; case 0x04: /* HEAD0 */ - Pico_mcd->regs[0x04>>1].byte.l = 0x05; + Pico_mcd->s68k_regs[0x04+1] = 0x05; return cdc.head[cdc.ctrl[1] & BIT_SHDREN][0]; case 0x05: /* HEAD1 */ - Pico_mcd->regs[0x04>>1].byte.l = 0x06; + Pico_mcd->s68k_regs[0x04+1] = 0x06; return cdc.head[cdc.ctrl[1] & BIT_SHDREN][1]; case 0x06: /* HEAD2 */ - Pico_mcd->regs[0x04>>1].byte.l = 0x07; + Pico_mcd->s68k_regs[0x04+1] = 0x07; return cdc.head[cdc.ctrl[1] & BIT_SHDREN][2]; case 0x07: /* HEAD3 */ - Pico_mcd->regs[0x04>>1].byte.l = 0x08; + Pico_mcd->s68k_regs[0x04+1] = 0x08; return cdc.head[cdc.ctrl[1] & BIT_SHDREN][3]; case 0x08: /* PTL */ - Pico_mcd->regs[0x04>>1].byte.l = 0x09; - return cdc.pt.byte.l; + Pico_mcd->s68k_regs[0x04+1] = 0x09; + return cdc.pt & 0xff; case 0x09: /* PTH */ - Pico_mcd->regs[0x04>>1].byte.l = 0x0a; - return cdc.pt.byte.h; + Pico_mcd->s68k_regs[0x04+1] = 0x0a; + return (cdc.pt >> 8) & 0xff; case 0x0a: /* WAL */ - Pico_mcd->regs[0x04>>1].byte.l = 0x0b; - return cdc.wa.byte.l; + Pico_mcd->s68k_regs[0x04+1] = 0x0b; + return cdc.wa & 0xff; case 0x0b: /* WAH */ - Pico_mcd->regs[0x04>>1].byte.l = 0x0c; - return cdc.wa.byte.h; + Pico_mcd->s68k_regs[0x04+1] = 0x0c; + return (cdc.wa >> 8) & 0xff; case 0x0c: /* STAT0 */ - Pico_mcd->regs[0x04>>1].byte.l = 0x0d; + Pico_mcd->s68k_regs[0x04+1] = 0x0d; return cdc.stat[0]; case 0x0d: /* STAT1 (always return 0) */ - Pico_mcd->regs[0x04>>1].byte.l = 0x0e; + Pico_mcd->s68k_regs[0x04+1] = 0x0e; return 0x00; case 0x0e: /* STAT2 */ - Pico_mcd->regs[0x04>>1].byte.l = 0x0f; + Pico_mcd->s68k_regs[0x04+1] = 0x0f; return cdc.stat[2]; case 0x0f: /* STAT3 */ @@ -770,7 +778,7 @@ unsigned char cdc_reg_r(void) } #endif - Pico_mcd->regs[0x04>>1].byte.l = 0x00; + Pico_mcd->s68k_regs[0x04+1] = 0x00; return data; } @@ -785,24 +793,24 @@ unsigned short cdc_host_r(void) if (!(cdc.ifstat & BIT_DTEN)) { /* read data word from CDC RAM buffer */ - uint8 *datap = cdc.ram + (cdc.dac.w & 0x3ffe); + uint8 *datap = cdc.ram + (cdc.dac & 0x3ffe); uint16 data = (datap[0] << 8) | datap[1]; #ifdef LOG_CDC - error("CDC host read 0x%04x -> 0x%04x (dbc=0x%x) (%X)\n", cdc.dac.w, data, cdc.dbc.w, s68k.pc); + error("CDC host read 0x%04x -> 0x%04x (dbc=0x%x) (%X)\n", cdc.dac, data, cdc.dbc, s68k.pc); #endif /* increment data address counter */ - cdc.dac.w += 2; + cdc.dac += 2; /* decrement data byte counter */ - cdc.dbc.w -= 2; + cdc.dbc -= 2; /* end of transfer ? */ - if ((int16)cdc.dbc.w <= 0) + if ((int16)cdc.dbc <= 0) { /* reset data byte counter (DBCH bits 4-7 should be set to 1) */ - cdc.dbc.w = 0xf000; + cdc.dbc = 0xf000; /* clear !DTEN and !DTBSY */ cdc.ifstat |= (BIT_DTBSY | BIT_DTEN); @@ -814,7 +822,7 @@ unsigned short cdc_host_r(void) if (cdc.ifctrl & BIT_DTEIEN) { /* level 5 interrupt enabled ? */ - if (Pico_mcd->regs[0x32>>1].byte.l & PCDS_IEN5) + if (Pico_mcd->s68k_regs[0x32+1] & PCDS_IEN5) { /* update IRQ level */ elprintf(EL_INTS, "cdc DTE irq 5"); @@ -823,7 +831,7 @@ unsigned short cdc_host_r(void) } /* clear DSR bit & set EDT bit (SCD register $04) */ - Pico_mcd->regs[0x04>>1].byte.h = (Pico_mcd->regs[0x04>>1].byte.h & 0x07) | 0x80; + Pico_mcd->s68k_regs[0x04+0] = (Pico_mcd->s68k_regs[0x04+0] & 0x07) | 0x80; } return data; diff --git a/pico/cd/cdd.c b/pico/cd/cdd.c index 2e37b60e..c6b24b82 100644 --- a/pico/cd/cdd.c +++ b/pico/cd/cdd.c @@ -519,7 +519,7 @@ void cdd_read_audio(unsigned int samples) samples = blip_clocks_needed(blip[0], samples); /* audio track playing ? */ - if (!Pico_mcd->regs[0x36>>1].byte.h && cdd.toc.tracks[cdd.index].fd) + if (!Pico_mcd->s68k_regs[0x36+0] && cdd.toc.tracks[cdd.index].fd) { int i, mul, delta; @@ -720,7 +720,7 @@ void cdd_update(void) if (cdd.lba >= cdd.toc.tracks[cdd.index].start) { /* audio track playing */ - Pico_mcd->regs[0x36>>1].byte.h = 0x00; + Pico_mcd->s68k_regs[0x36+0] = 0x00; } /* audio blocks are still sent to CDC as well as CD DAC/Fader */ @@ -752,7 +752,7 @@ void cdd_update(void) cdd.index++; /* PAUSE between tracks */ - Pico_mcd->regs[0x36>>1].byte.h = 0x01; + Pico_mcd->s68k_regs[0x36+0] = 0x01; /* seek to next audio track start */ #ifdef USE_LIBTREMOR @@ -806,7 +806,7 @@ void cdd_update(void) /* AUDIO track playing ? */ if (cdd.status == CD_PLAY) { - Pico_mcd->regs[0x36>>1].byte.h = 0x00; + Pico_mcd->s68k_regs[0x36+0] = 0x00; } } else if (cdd.lba < cdd.toc.tracks[cdd.index].start) @@ -837,7 +837,7 @@ void cdd_update(void) else if (cdd.index >= cdd.toc.last) { /* no AUDIO track playing */ - Pico_mcd->regs[0x36>>1].byte.h = 0x01; + Pico_mcd->s68k_regs[0x36+0] = 0x01; /* end of disc */ cdd.index = cdd.toc.last; @@ -850,7 +850,7 @@ void cdd_update(void) if (!cdd.index) { /* no AUDIO track playing */ - Pico_mcd->regs[0x36>>1].byte.h = 0x01; + Pico_mcd->s68k_regs[0x36+0] = 0x01; /* DATA track */ pm_seek(cdd.toc.tracks[0].fd, cdd.lba * cdd.sectorSize, SEEK_SET); @@ -894,21 +894,21 @@ void cdd_update(void) void cdd_process(void) { /* Process CDD command */ - switch (Pico_mcd->regs[0x42>>1].byte.h & 0x0f) + switch (Pico_mcd->s68k_regs[0x42+0] & 0x0f) { case 0x00: /* Drive Status */ { /* RS1-RS8 normally unchanged */ - Pico_mcd->regs[0x38>>1].byte.h = cdd.status; + Pico_mcd->s68k_regs[0x38+0] = cdd.status; /* unless RS1 indicated invalid track infos */ - if (Pico_mcd->regs[0x38>>1].byte.l == 0x0f) + if (Pico_mcd->s68k_regs[0x38+1] == 0x0f) { /* and SEEK has ended */ if (cdd.status != CD_SEEK) { /* then return valid track infos, e.g current track number in RS2-RS3 (fixes Lunar - The Silver Star) */ - Pico_mcd->regs[0x38>>1].byte.l = 0x02; + Pico_mcd->s68k_regs[0x38+1] = 0x02; set_reg16(0x3a, (cdd.index < cdd.toc.last) ? lut_BCD_16[cdd.index + 1] : 0x0A0A); } } @@ -921,7 +921,7 @@ void cdd_process(void) cdd.status = cdd.loaded ? CD_STOP : NO_DISC; /* no audio track playing */ - Pico_mcd->regs[0x36>>1].byte.h = 0x01; + Pico_mcd->s68k_regs[0x36+0] = 0x01; /* RS1-RS8 ignored, expects 0x0 ("no disc" ?) in RS0 once */ set_reg16(0x38, 0x0000); @@ -939,7 +939,7 @@ void cdd_process(void) /* Infos automatically retrieved by CDD processor from Q-Channel */ /* commands 0x00-0x02 (current block) and 0x03-0x05 (Lead-In) */ - switch (Pico_mcd->regs[0x44>>1].byte.l) + switch (Pico_mcd->s68k_regs[0x44+1]) { case 0x00: /* Current Absolute Time (MM:SS:FF) */ { @@ -948,7 +948,7 @@ void cdd_process(void) set_reg16(0x3a, lut_BCD_16[(lba/75)/60]); set_reg16(0x3c, lut_BCD_16[(lba/75)%60]); set_reg16(0x3e, lut_BCD_16[(lba%75)]); - Pico_mcd->regs[0x40>>1].byte.h = cdd.index ? 0x00 : 0x04; /* Current block flags in RS8 (bit0 = mute status, bit1: pre-emphasis status, bit2: track type) */ + Pico_mcd->s68k_regs[0x40+0] = cdd.index ? 0x00 : 0x04; /* Current block flags in RS8 (bit0 = mute status, bit1: pre-emphasis status, bit2: track type) */ break; } @@ -959,7 +959,7 @@ void cdd_process(void) set_reg16(0x3a, lut_BCD_16[(lba/75)/60]); set_reg16(0x3c, lut_BCD_16[(lba/75)%60]); set_reg16(0x3e, lut_BCD_16[(lba%75)]); - Pico_mcd->regs[0x40>>1].byte.h = cdd.index ? 0x00 : 0x04; /* Current block flags in RS8 (bit0 = mute status, bit1: pre-emphasis status, bit2: track type) */ + Pico_mcd->s68k_regs[0x40+0] = cdd.index ? 0x00 : 0x04; /* Current block flags in RS8 (bit0 = mute status, bit1: pre-emphasis status, bit2: track type) */ break; } @@ -969,7 +969,7 @@ void cdd_process(void) set_reg16(0x3a, (cdd.index < cdd.toc.last) ? lut_BCD_16[cdd.index + 1] : 0x0A0A); set_reg16(0x3c, 0x0000); set_reg16(0x3e, 0x0000); /* Disk Control Code (?) in RS6 */ - Pico_mcd->regs[0x40>>1].byte.h = 0x00; + Pico_mcd->s68k_regs[0x40+0] = 0x00; break; } @@ -980,7 +980,7 @@ void cdd_process(void) set_reg16(0x3a, lut_BCD_16[(lba/75)/60]); set_reg16(0x3c, lut_BCD_16[(lba/75)%60]); set_reg16(0x3e, lut_BCD_16[(lba%75)]); - Pico_mcd->regs[0x40>>1].byte.h = 0x00; + Pico_mcd->s68k_regs[0x40+0] = 0x00; break; } @@ -990,23 +990,23 @@ void cdd_process(void) set_reg16(0x3a, 0x0001); set_reg16(0x3c, lut_BCD_16[cdd.toc.last]); set_reg16(0x3e, 0x0000); /* Drive Version (?) in RS6-RS7 */ - Pico_mcd->regs[0x40>>1].byte.h = 0x00; /* Lead-In flags in RS8 (bit0 = mute status, bit1: pre-emphasis status, bit2: track type) */ + Pico_mcd->s68k_regs[0x40+0] = 0x00; /* Lead-In flags in RS8 (bit0 = mute status, bit1: pre-emphasis status, bit2: track type) */ break; } case 0x05: /* Track Start Time (MM:SS:FF) */ { - int track = Pico_mcd->regs[0x46>>1].byte.h * 10 + Pico_mcd->regs[0x46>>1].byte.l; + int track = Pico_mcd->s68k_regs[0x46+0] * 10 + Pico_mcd->s68k_regs[0x46+1]; int lba = cdd.toc.tracks[track-1].start + 150; set_reg16(0x38, (cdd.status << 8) | 0x05); set_reg16(0x3a, lut_BCD_16[(lba/75)/60]); set_reg16(0x3c, lut_BCD_16[(lba/75)%60]); set_reg16(0x3e, lut_BCD_16[(lba%75)]); - Pico_mcd->regs[0x40>>1].byte.h = track % 10; /* Track Number (low digit) */ + Pico_mcd->s68k_regs[0x40+0] = track % 10; /* Track Number (low digit) */ if (track == 1) { /* RS6 bit 3 is set for the first (DATA) track */ - Pico_mcd->regs[0x3e>>1].byte.h |= 0x08; + Pico_mcd->s68k_regs[0x3e + 0] |= 0x08; } break; } @@ -1014,7 +1014,7 @@ void cdd_process(void) default: { #ifdef LOG_ERROR - error("Unknown CDD Command %02X (%X)\n", Pico_mcd->regs[0x44>>1].byte.l, s68k.pc); + error("Unknown CDD Command %02X (%X)\n", Pico_mcd->s68k_regs[0x44+1], s68k.pc); #endif return; } @@ -1028,9 +1028,9 @@ void cdd_process(void) int index = 0; /* new LBA position */ - int lba = ((Pico_mcd->regs[0x44>>1].byte.h * 10 + Pico_mcd->regs[0x44>>1].byte.l) * 60 + - (Pico_mcd->regs[0x46>>1].byte.h * 10 + Pico_mcd->regs[0x46>>1].byte.l)) * 75 + - (Pico_mcd->regs[0x48>>1].byte.h * 10 + Pico_mcd->regs[0x48>>1].byte.l) - 150; + int lba = ((Pico_mcd->s68k_regs[0x44+0] * 10 + Pico_mcd->s68k_regs[0x44+1]) * 60 + + (Pico_mcd->s68k_regs[0x46+0] * 10 + Pico_mcd->s68k_regs[0x46+1])) * 75 + + (Pico_mcd->s68k_regs[0x48+0] * 10 + Pico_mcd->s68k_regs[0x48+1]) - 150; /* CD drive latency */ if (!cdd.latency) @@ -1119,7 +1119,7 @@ void cdd_process(void) #endif /* no audio track playing (yet) */ - Pico_mcd->regs[0x36>>1].byte.h = 0x01; + Pico_mcd->s68k_regs[0x36+0] = 0x01; /* update status */ cdd.status = CD_PLAY; @@ -1129,7 +1129,7 @@ void cdd_process(void) set_reg16(0x3a, (cdd.index < cdd.toc.last) ? lut_BCD_16[index + 1] : 0x0A0A); set_reg16(0x3c, 0x0000); set_reg16(0x3e, 0x0000); - Pico_mcd->regs[0x40>>1].byte.h = 0x00; + Pico_mcd->s68k_regs[0x40+0] = 0x00; break; } @@ -1139,9 +1139,9 @@ void cdd_process(void) int index = 0; /* new LBA position */ - int lba = ((Pico_mcd->regs[0x44>>1].byte.h * 10 + Pico_mcd->regs[0x44>>1].byte.l) * 60 + - (Pico_mcd->regs[0x46>>1].byte.h * 10 + Pico_mcd->regs[0x46>>1].byte.l)) * 75 + - (Pico_mcd->regs[0x48>>1].byte.h * 10 + Pico_mcd->regs[0x48>>1].byte.l) - 150; + int lba = ((Pico_mcd->s68k_regs[0x44+0] * 10 + Pico_mcd->s68k_regs[0x44+1]) * 60 + + (Pico_mcd->s68k_regs[0x46+0] * 10 + Pico_mcd->s68k_regs[0x46+1])) * 75 + + (Pico_mcd->s68k_regs[0x48+0] * 10 + Pico_mcd->s68k_regs[0x48+1]) - 150; /* CD drive seek time */ /* We are using similar linear model as above, although still not exactly accurate, */ @@ -1213,7 +1213,7 @@ void cdd_process(void) #endif /* no audio track playing */ - Pico_mcd->regs[0x36>>1].byte.h = 0x01; + Pico_mcd->s68k_regs[0x36+0] = 0x01; /* update status */ cdd.status = CD_SEEK; @@ -1230,17 +1230,17 @@ void cdd_process(void) case 0x06: /* Pause */ { /* no audio track playing */ - Pico_mcd->regs[0x36>>1].byte.h = 0x01; + Pico_mcd->s68k_regs[0x36+0] = 0x01; /* update status (RS1-RS8 unchanged) */ - cdd.status = Pico_mcd->regs[0x38>>1].byte.h = CD_READY; + cdd.status = Pico_mcd->s68k_regs[0x38+0] = CD_READY; break; } case 0x07: /* Resume */ { /* update status (RS1-RS8 unchanged) */ - cdd.status = Pico_mcd->regs[0x38>>1].byte.h = CD_PLAY; + cdd.status = Pico_mcd->s68k_regs[0x38+0] = CD_PLAY; break; } @@ -1250,7 +1250,7 @@ void cdd_process(void) cdd.scanOffset = CD_SCAN_SPEED; /* update status (RS1-RS8 unchanged) */ - cdd.status = Pico_mcd->regs[0x38>>1].byte.h = CD_SCAN; + cdd.status = Pico_mcd->s68k_regs[0x38+0] = CD_SCAN; break; } @@ -1260,7 +1260,7 @@ void cdd_process(void) cdd.scanOffset = -CD_SCAN_SPEED; /* update status (RS1-RS8 unchanged) */ - cdd.status = Pico_mcd->regs[0x38>>1].byte.h = CD_SCAN; + cdd.status = Pico_mcd->s68k_regs[0x38+0] = CD_SCAN; break; } @@ -1273,17 +1273,17 @@ void cdd_process(void) /* also see US Patent nr. 5222054 for a detailled description of seeking operation using Track Jump */ /* no audio track playing */ - Pico_mcd->regs[0x36>>1].byte.h = 0x01; + Pico_mcd->s68k_regs[0x36+0] = 0x01; /* update status (RS1-RS8 unchanged) */ - cdd.status = Pico_mcd->regs[0x38>>1].byte.h = CD_READY; + cdd.status = Pico_mcd->s68k_regs[0x38+0] = CD_READY; break; } case 0x0c: /* Close Tray */ { /* no audio track playing */ - Pico_mcd->regs[0x36>>1].byte.h = 0x01; + Pico_mcd->s68k_regs[0x36+0] = 0x01; /* update status */ cdd.status = cdd.loaded ? CD_STOP : NO_DISC; @@ -1304,7 +1304,7 @@ void cdd_process(void) case 0x0d: /* Open Tray */ { /* no audio track playing */ - Pico_mcd->regs[0x36>>1].byte.h = 0x01; + Pico_mcd->s68k_regs[0x36+0] = 0x01; /* update status (RS1-RS8 ignored) */ cdd.status = CD_OPEN; @@ -1323,17 +1323,17 @@ void cdd_process(void) #ifdef LOG_CDD error("Unknown CDD Command !!!\n"); #endif - Pico_mcd->regs[0x38>>1].byte.h = cdd.status; + Pico_mcd->s68k_regs[0x38+0] = cdd.status; break; } /* only compute checksum when necessary */ - Pico_mcd->regs[0x40>>1].byte.l = - ~(Pico_mcd->regs[0x38>>1].byte.h + Pico_mcd->regs[0x38>>1].byte.l + - Pico_mcd->regs[0x3a>>1].byte.h + Pico_mcd->regs[0x3a>>1].byte.l + - Pico_mcd->regs[0x3c>>1].byte.h + Pico_mcd->regs[0x3c>>1].byte.l + - Pico_mcd->regs[0x3e>>1].byte.h + Pico_mcd->regs[0x3e>>1].byte.l + - Pico_mcd->regs[0x40>>1].byte.h) & 0x0f; + Pico_mcd->s68k_regs[0x40 + 1] = + ~(Pico_mcd->s68k_regs[0x38 + 0] + Pico_mcd->s68k_regs[0x38 + 1] + + Pico_mcd->s68k_regs[0x3a + 0] + Pico_mcd->s68k_regs[0x3a + 1] + + Pico_mcd->s68k_regs[0x3c + 0] + Pico_mcd->s68k_regs[0x3c + 1] + + Pico_mcd->s68k_regs[0x3e + 0] + Pico_mcd->s68k_regs[0x3e + 1] + + Pico_mcd->s68k_regs[0x40 + 0]) & 0x0f; } // vim:shiftwidth=2:ts=2:expandtab diff --git a/pico/cd/genplus_macros.h b/pico/cd/genplus_macros.h index 04c381a7..8ac5d35b 100644 --- a/pico/cd/genplus_macros.h +++ b/pico/cd/genplus_macros.h @@ -12,22 +12,6 @@ #define int16 signed short #define int32 signed int -typedef union -{ - uint16 w; - struct - { -#if 1 - uint8 l; - uint8 h; -#else - uint8 h; - uint8 l; -#endif - } byte; - -} reg16_t; - #define READ_BYTE(BASE, ADDR) (BASE)[(ADDR)^1] #define WRITE_BYTE(BASE, ADDR, VAL) (BASE)[(ADDR)^1] = (VAL) diff --git a/pico/cd/memory.c b/pico/cd/memory.c index acf29c64..e660e35b 100644 --- a/pico/cd/memory.c +++ b/pico/cd/memory.c @@ -385,7 +385,7 @@ void s68k_reg_write8(u32 a, u32 d) //dprintf("s68k CDC reg addr: %x", d&0xf); break; case 7: - cdc_reg_w(d); + cdc_reg_w(d & 0xff); return; case 0xa: elprintf(EL_CDREGS, "s68k set CDC dma addr"); diff --git a/pico/pico_int.h b/pico/pico_int.h index 48d52372..7447db94 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -434,15 +434,7 @@ typedef struct unsigned char pcm_ram[0x10000]; unsigned char pcm_ram_b[0x10][0x1000]; }; - union { - unsigned char s68k_regs[0x200]; // 110000: GA, not CPU regs - union { - struct { - unsigned char h; - unsigned char l; - } byte; - } regs[0x200/2]; - }; + unsigned char s68k_regs[0x200]; // 110000: GA, not CPU regs unsigned char bram[0x2000]; // 110200: 8K struct mcd_misc m; // 112200: misc struct mcd_pcm pcm; // 112240: From 2d6460655c774462193bbc893dce5a43e04b4ed7 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 12 Oct 2013 03:16:51 +0300 Subject: [PATCH 0021/1110] gp2x: hack to avoid 940 reinit ..it used to rely on upper layer to avoid YM2612Init_940() calls on menu entry --- platform/gp2x/940ctl.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/platform/gp2x/940ctl.c b/platform/gp2x/940ctl.c index d3769570..6ba13bd8 100644 --- a/platform/gp2x/940ctl.c +++ b/platform/gp2x/940ctl.c @@ -284,6 +284,12 @@ void sharedmem940_finish(void) void YM2612Init_940(int baseclock, int rate) { + static int oldrate; + + // HACK + if (Pico.m.frame_count > 0 && !crashed_940 && rate == oldrate) + return; + printf("YM2612Init_940()\n"); printf("Mem usage: shared_data: %i, shared_ctl: %i\n", sizeof(*shared_data), sizeof(*shared_ctl)); @@ -353,6 +359,8 @@ void YM2612Init_940(int baseclock, int rate) shared_ctl->baseclock = baseclock; shared_ctl->rate = rate; add_job_940(JOB940_INITALL); + + oldrate = rate; } From 9993e0d6dd65e7065e81cab76791c7c607cda0b2 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 12 Oct 2013 02:43:47 +0300 Subject: [PATCH 0022/1110] fix use of freed mem --- pico/cd/cd_image.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pico/cd/cd_image.c b/pico/cd/cd_image.c index 97c8f3f0..07b55ceb 100644 --- a/pico/cd/cd_image.c +++ b/pico/cd/cd_image.c @@ -183,7 +183,6 @@ int load_cd_image(const char *cd_img_name, int *type) elprintf(EL_STATUS, "Track %2i: %s %9i AUDIO %s", n, tmp_ext, length, cue_data->tracks[n].fname); } - cue_destroy(cue_data); goto finish; } @@ -260,6 +259,9 @@ finish: if (PicoCDLoadProgressCB != NULL) PicoCDLoadProgressCB(cd_img_name, 100); + if (cue_data != NULL) + cue_destroy(cue_data); + return 0; } From f7e40c9b2e6d5ca446cb8c2edf9e7601dd9c97cd Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 12 Oct 2013 22:05:04 +0300 Subject: [PATCH 0023/1110] frontend: eliminate osd_text dupes not only dupe code is bad, it's crasing too --- platform/common/emu.c | 20 +++++++++++++++++++- platform/common/emu.h | 2 ++ platform/gp2x/emu.c | 17 +---------------- platform/linux/emu.c | 40 ++-------------------------------------- platform/pandora/plat.c | 22 ++-------------------- 5 files changed, 26 insertions(+), 75 deletions(-) diff --git a/platform/common/emu.c b/platform/common/emu.c index 88fefd57..7f375a3c 100644 --- a/platform/common/emu.c +++ b/platform/common/emu.c @@ -738,8 +738,26 @@ mk_text_out(emu_text_out16_rot, unsigned short, 0xffff, #undef mk_text_out +void emu_osd_text16(int x, int y, const char *text) +{ + int len = strlen(text) * 8; + int i, h; -void update_movie(void) + len++; + if (x + len > g_screen_width) + len = g_screen_width - x; + + for (h = 0; h < 8; h++) { + unsigned short *p; + p = (unsigned short *)g_screen_ptr + + x + g_screen_width * (y + h); + for (i = len; i > 0; i--, p++) + *p = (*p >> 2) & 0x39e7; + } + emu_text_out16(x, y, text); +} + +static void update_movie(void) { int offs = Pico.m.frame_count*3 + 0x40; if (offs+3 > movie_size) { diff --git a/platform/common/emu.h b/platform/common/emu.h index 8f8f61f4..6e7c3991 100644 --- a/platform/common/emu.h +++ b/platform/common/emu.h @@ -129,6 +129,8 @@ void emu_text_out16(int x, int y, const char *text); void emu_text_out8_rot (int x, int y, const char *text); void emu_text_out16_rot(int x, int y, const char *text); +void emu_osd_text16(int x, int y, const char *text); + void emu_make_path(char *buff, const char *end, int size); void emu_update_input(void); void emu_get_game_name(char *str150); diff --git a/platform/gp2x/emu.c b/platform/gp2x/emu.c index 74bcbcb3..244f15d9 100644 --- a/platform/gp2x/emu.c +++ b/platform/gp2x/emu.c @@ -128,21 +128,6 @@ static void osd_text8(int x, int y, const char *text) emu_text_out8(x, y, text); } -static void osd_text16(int x, int y, const char *text) -{ - int len = strlen(text)*8; - int *p, i, h, offs; - - len = (len+1) >> 1; - for (h = 0; h < 8; h++) { - offs = (x + g_screen_width * (y+h)) & ~1; - p = (int *) ((short *)g_screen_ptr + offs); - for (i = len; i; i--, p++) - *p = (*p >> 2) & 0x39e7; - } - emu_text_out16(x, y, text); -} - static void osd_text8_rot(int x, int y, const char *text) { int len = strlen(text) * 8; @@ -540,7 +525,7 @@ static void vid_reset_mode(void) PicoDrawSetCallbacks(emu_scan_begin, emu_scan_end); if (is_16bit_mode()) - osd_text = (currentConfig.EmuOpt & EOPT_WIZ_TEAR_FIX) ? osd_text16_rot : osd_text16; + osd_text = (currentConfig.EmuOpt & EOPT_WIZ_TEAR_FIX) ? osd_text16_rot : emu_osd_text16; else osd_text = (currentConfig.EmuOpt & EOPT_WIZ_TEAR_FIX) ? osd_text8_rot : osd_text8; diff --git a/platform/linux/emu.c b/platform/linux/emu.c index 9e45fb13..c22f4fd0 100644 --- a/platform/linux/emu.c +++ b/platform/linux/emu.c @@ -34,41 +34,6 @@ void pemu_validate_config(void) PicoOpt &= ~POPT_EN_DRC; } -// FIXME: dupes from GP2X, need cleanup -static void (*osd_text)(int x, int y, const char *text); - -/* -static void osd_text8(int x, int y, const char *text) -{ - int len = strlen(text)*8; - int *p, i, h, offs; - - len = (len+3) >> 2; - for (h = 0; h < 8; h++) { - offs = (x + g_screen_width * (y+h)) & ~3; - p = (int *) ((char *)g_screen_ptr + offs); - for (i = len; i; i--, p++) - *p = 0xe0e0e0e0; - } - emu_text_out8(x, y, text); -} -*/ - -static void osd_text16(int x, int y, const char *text) -{ - int len = strlen(text)*8; - int *p, i, h, offs; - - len = (len+1) >> 1; - for (h = 0; h < 8; h++) { - offs = (x + g_screen_width * (y+h)) & ~1; - p = (int *) ((short *)g_screen_ptr + offs); - for (i = len; i; i--, p++) - *p = (*p >> 2) & 0x39e7; - } - emu_text_out16(x, y, text); -} - static void draw_cd_leds(void) { int led_reg, pitch, scr_offs, led_offs; @@ -115,9 +80,9 @@ void pemu_finalize_frame(const char *fps, const char *notice) if (notice || (currentConfig.EmuOpt & EOPT_SHOW_FPS)) { if (notice) - osd_text(4, g_screen_height - 8, notice); + emu_osd_text16(4, g_screen_height - 8, notice); if (currentConfig.EmuOpt & EOPT_SHOW_FPS) - osd_text(g_screen_width - 60, g_screen_height - 8, fps); + emu_osd_text16(g_screen_width - 60, g_screen_height - 8, fps); } if ((PicoAHW & PAHW_MCD) && (currentConfig.EmuOpt & EOPT_EN_CD_LEDS)) draw_cd_leds(); @@ -215,7 +180,6 @@ void emu_video_mode_change(int start_line, int line_count, int is_32cols) void pemu_loop_prep(void) { apply_renderer(); - osd_text = osd_text16; } void pemu_loop_end(void) diff --git a/platform/pandora/plat.c b/platform/pandora/plat.c index f82d7027..8f8eac6c 100644 --- a/platform/pandora/plat.c +++ b/platform/pandora/plat.c @@ -135,24 +135,6 @@ void pemu_validate_config(void) currentConfig.CPUclock = plat_target_cpu_clock_get(); } -static void osd_text(int x, int y, const char *text) -{ - int len = strlen(text)*8; - int i, h; - - len++; - if (x + len > g_screen_width) - len = g_screen_width - x; - - for (h = 0; h < 8; h++) { - unsigned short *p; - p = (unsigned short *)g_screen_ptr + x + g_screen_width*(y + h); - for (i = len; i > 0; i--, p++) - *p = (*p>>2) & 0x39e7; - } - emu_text_out16(x, y, text); -} - static void draw_cd_leds(void) { int old_reg; @@ -182,9 +164,9 @@ static void draw_cd_leds(void) void pemu_finalize_frame(const char *fps, const char *notice) { if (notice && notice[0]) - osd_text(2, g_osd_y, notice); + emu_osd_text16(2, g_osd_y, notice); if (fps && fps[0] && (currentConfig.EmuOpt & EOPT_SHOW_FPS)) - osd_text(g_osd_fps_x, g_osd_y, fps); + emu_osd_text16(g_osd_fps_x, g_osd_y, fps); if ((PicoAHW & PAHW_MCD) && (currentConfig.EmuOpt & EOPT_EN_CD_LEDS)) draw_cd_leds(); } From 74e770b1ecb6f0f0e506fd20c511c801249b5f5f Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 11 Oct 2013 00:16:37 +0300 Subject: [PATCH 0024/1110] release 1.91 --- ChangeLog | 4 +++- configure | 2 +- platform/base_readme.txt | 13 +++---------- platform/common/menu_pico.c | 2 +- platform/common/plat_sdl.c | 2 ++ platform/common/version.h | 2 +- platform/gp2x/Makefile | 8 ++++---- platform/pandora/Makefile | 6 +++--- 8 files changed, 18 insertions(+), 21 deletions(-) diff --git a/ChangeLog b/ChangeLog index ba840f4f..a8ba44eb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,7 @@ -1.91 (2013-10-) +1.91 (2013-10-12) + Added OpenDingux support (Paul Cercueil). + * Save directory changed to ~/.picodrive/ for generic platform build + (Paul Cercueil). + Revived GP2X/Caanoo/Wiz support. + Switched to cleaner CD controller code from Eke-Eke's Genesis Plus GX. * Fixed overflow issue where cd emulation would break after diff --git a/configure b/configure index 29b45e1f..96cea7e2 100755 --- a/configure +++ b/configure @@ -52,7 +52,7 @@ CC="${CC-${CROSS_COMPILE}gcc}" CXX="${CXX-${CROSS_COMPILE}g++}" AS="${AS-${CROSS_COMPILE}as}" STRIP="${STRIP-${CROSS_COMPILE}strip}" -SDL_CONFIG="`$CC --print-sysroot`/usr/bin/sdl-config" +SDL_CONFIG="`$CC --print-sysroot 2> /dev/null || true`/usr/bin/sdl-config" MAIN_LDLIBS="$LDLIBS -lm" config_mak="config.mak" diff --git a/platform/base_readme.txt b/platform/base_readme.txt index 7a3bd897..c37ed037 100644 --- a/platform/base_readme.txt +++ b/platform/base_readme.txt @@ -281,13 +281,6 @@ This option enables CD audio playback. This enables 8 channel PCM sound source. It is required for some games to run, because they monitor state of this audio chip. -@@2. "ReadAhead buffer" -This option can prefetch more data from the CD image than requested by game -(to avoid accessing card later), what can improve performance in some cases. -#ifndef PSP -"OFF" is the recommended setting. -#endif - @@2. "Save RAM cart" Here you can enable 64K RAM cart. Format it in BIOS if you do. @@ -373,9 +366,9 @@ for sound (i.e. to generate YM2612 samples) to improve performance noticeably. It also decodes MP3s in Sega/Mega CD mode. #endif -@@4. "SVP dynarec" -This enables dynamic recompilation for SVP chip emulated for Virtua Racing game, -what improves it's emulation performance greatly. +@@4. "Enable dynarecs" +This enables dynamic recompilation for SH2 and SVP CPU code, +what improves emulation performance greatly. Key configuration diff --git a/platform/common/menu_pico.c b/platform/common/menu_pico.c index 32a3fdbf..b127077a 100644 --- a/platform/common/menu_pico.c +++ b/platform/common/menu_pico.c @@ -414,7 +414,7 @@ static menu_entry e_menu_cd_options[] = mee_onoff_h("CDDA audio", MA_CDOPT_CDDA, PicoOpt, POPT_EN_MCD_CDDA, h_cdda), mee_onoff_h("PCM audio", MA_CDOPT_PCM, PicoOpt, POPT_EN_MCD_PCM, h_cdpcm), mee_onoff_h("SaveRAM cart", MA_CDOPT_SAVERAM, PicoOpt, POPT_EN_MCD_RAMCART, h_srcart), - mee_onoff_h("Scale/Rot. fx (slow)", MA_CDOPT_SCALEROT_CHIP, PicoOpt, POPT_EN_MCD_GFX, h_scfx), + mee_onoff_h("Scale/Rot. fx", MA_CDOPT_SCALEROT_CHIP, PicoOpt, POPT_EN_MCD_GFX, h_scfx), mee_end, }; diff --git a/platform/common/plat_sdl.c b/platform/common/plat_sdl.c index 4404691a..3387b952 100644 --- a/platform/common/plat_sdl.c +++ b/platform/common/plat_sdl.c @@ -17,6 +17,8 @@ #include "input_pico.h" #include "version.h" +#include + static void *shadow_fb; const struct in_default_bind in_sdl_defbinds[] __attribute__((weak)) = { diff --git a/platform/common/version.h b/platform/common/version.h index ec86a564..01e9b7d0 100644 --- a/platform/common/version.h +++ b/platform/common/version.h @@ -1 +1 @@ -#define VERSION "1.90" +#define VERSION "1.91" diff --git a/platform/gp2x/Makefile b/platform/gp2x/Makefile index 9d90ecc4..e7b4326b 100644 --- a/platform/gp2x/Makefile +++ b/platform/gp2x/Makefile @@ -10,10 +10,10 @@ endif all: rel -../../tools/textfilter: +../../tools/textfilter: ../../tools/textfilter.c make -C ../../tools/ -readme.txt: ../../tools/textfilter ../base_readme.txt +readme.txt: ../../tools/textfilter ../base_readme.txt ../../ChangeLog ../../tools/textfilter ../base_readme.txt $@ GP2X CODE940 = code940/pico940_v3.bin @@ -27,8 +27,8 @@ rel: ../../PicoDrive PicoDrive.gpe $(CODE940) readme.txt ../game_def.cfg \ cp PicoDrive.ini out/ cp skin/*.png out/PicoDrive/skin/ cp skin/*.txt out/PicoDrive/skin/ - mkdir out/bin_to_cso_mp3 - cp ../../tools/bin_to_cso_mp3/* out/bin_to_cso_mp3/ + #mkdir out/bin_to_cso_mp3 + #cp ../../tools/bin_to_cso_mp3/* out/bin_to_cso_mp3/ cd out && zip -9 -r ../../../PicoDrive_$(VER).zip * rm -rf out diff --git a/platform/pandora/Makefile b/platform/pandora/Makefile index 59071692..4240897a 100644 --- a/platform/pandora/Makefile +++ b/platform/pandora/Makefile @@ -12,10 +12,10 @@ PND_MAKE ?= $(HOME)/dev/pnd/src/pandora-libraries/testdata/scripts/pnd_make.sh all: rel -../../tools/textfilter: +../../tools/textfilter: ../../tools/textfilter.c make -C ../../tools/ -/tmp/readme.txt: ../../tools/textfilter ../base_readme.txt +readme.txt: ../../tools/textfilter ../base_readme.txt ../../ChangeLog ../../tools/textfilter ../base_readme.txt $@ PANDORA /tmp/PicoDrive.pxml: PicoDrive.pxml.template FORCE @@ -24,7 +24,7 @@ all: rel rel: ../../PicoDrive PicoDrive.sh picorestore \ PicoDrive.png PicoDrive_p.png \ ../../pico/carthw.cfg skin \ - /tmp/readme.txt /tmp/PicoDrive.pxml + readme.txt /tmp/PicoDrive.pxml rm -rf out mkdir out cp -r $^ out/ From 0fd6751db80565b9c0c1ba66e37c647fc724e6f2 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 13 Oct 2013 18:46:48 +0300 Subject: [PATCH 0025/1110] libretro: empty srm prevention hack --- platform/libretro.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/platform/libretro.c b/platform/libretro.c index ee15d9c8..23f0694e 100644 --- a/platform/libretro.c +++ b/platform/libretro.c @@ -741,14 +741,25 @@ void *retro_get_memory_data(unsigned id) size_t retro_get_memory_size(unsigned id) { + unsigned int i; + int sum; + if (id != RETRO_MEMORY_SAVE_RAM) return 0; if (PicoAHW & PAHW_MCD) // bram return 0x2000; - else + + if (Pico.m.frame_count == 0) return SRam.size; + + // if game doesn't write to sram, don't report it to + // libretro so that RA doesn't write out zeroed .srm + for (i = 0, sum = 0; i < SRam.size; i++) + sum |= SRam.data[i]; + + return (sum != 0) ? SRam.size : 0; } void retro_reset(void) From 21299f18ad706a17222bc4bc2ae31b1cbd033fc5 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 7 Dec 2013 02:03:38 +0200 Subject: [PATCH 0026/1110] try to deal with some famec compiler issues --- cpu/fame/famec.c | 12 +++++++++++- jni/Android.mk | 3 ++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/cpu/fame/famec.c b/cpu/fame/famec.c index 2f10540d..9e9dc153 100644 --- a/cpu/fame/famec.c +++ b/cpu/fame/famec.c @@ -35,7 +35,17 @@ #define PICODRIVE_HACK // Options // - +#ifndef FAMEC_NO_GOTOS +// computed gotos is a GNU extension +#ifndef __GNUC__ +#define FAMEC_NO_GOTOS +#endif +// as of 3.3, clang takes over 3h to compile this in computed goto mode.. +#ifdef __clang__ +#define FAMEC_NO_GOTOS +#endif +#endif + #undef INLINE #ifdef _MSC_VER #define INLINE diff --git a/jni/Android.mk b/jni/Android.mk index 72a81634..122b18e8 100644 --- a/jni/Android.mk +++ b/jni/Android.mk @@ -73,8 +73,9 @@ LOCAL_SRC_FILES += $(R)unzip/unzip.c $(R)unzip/unzip_stream.c LOCAL_C_INCLUDES += $(R) +# note: don't use -O3, causes some NDKs run out of memory while compiling FAME +LOCAL_CFLAGS += -Wall -O2 -ffast-math -DNDEBUG LOCAL_CFLAGS += $(addprefix -D,$(DEFINES)) -LOCAL_CFLAGS += -Wall -O3 -ffast-math -DNDEBUG LOCAL_LDLIBS := -llog include $(BUILD_SHARED_LIBRARY) From 1c67828540de2f5c8579eab6d79eb3b330266212 Mon Sep 17 00:00:00 2001 From: twinaphex Date: Sat, 30 Nov 2013 02:31:42 +0100 Subject: [PATCH 0027/1110] (iOS) Add min SDK version flags --- Makefile.libretro | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Makefile.libretro b/Makefile.libretro index 87360482..e6229d51 100644 --- a/Makefile.libretro +++ b/Makefile.libretro @@ -39,12 +39,12 @@ else ifeq ($(platform), ios) TARGET := $(TARGET_NAME)_libretro_ios.dylib SHARED := -dynamiclib - CC = clang -arch armv7 -isysroot $(IOSSDK) - CXX = clang++ -arch armv7 -isysroot $(IOSSDK) - CC_AS = perl ./tools/gas-preprocessor.pl $(CC) - CFLAGS += -mcpu=cortex-a8 -mtune=cortex-a8 -mfpu=neon -marm + CC = clang -arch armv7 -isysroot $(IOSSDK) -miphoneos-version-min=5.0 + CXX = clang++ -arch armv7 -isysroot $(IOSSDK) -miphoneos-version-min=5.0 + CC_AS = perl ./tools/gas-preprocessor.pl $(CC) -miphoneos-version-min=5.0 + CFLAGS += -mcpu=cortex-a8 -mtune=cortex-a8 -mfpu=neon -marm ASFLAGS += -mcpu=cortex-a8 -mtune=cortex-a8 -mfpu=neon - CFLAGS += -DIOS + CFLAGS += -DIOS -miphoneos-version-min=5.0 ARCH := arm From f9020f359ce58d56770c55d0ddb5fbb386cf2346 Mon Sep 17 00:00:00 2001 From: twinaphex Date: Tue, 10 Dec 2013 22:25:38 +0100 Subject: [PATCH 0028/1110] (Clang) Linker fix --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index c199b853..d31779cc 100644 --- a/Makefile +++ b/Makefile @@ -3,8 +3,10 @@ CFLAGS += -Wall -ggdb -falign-functions=2 CFLAGS += -I. ifndef DEBUG CFLAGS += -O2 -DNDEBUG -ffunction-sections +ifeq ($(findstring clang,$(CC)),) LDFLAGS += -Wl,--gc-sections endif +endif #CFLAGS += -DEVT_LOG #CFLAGS += -DDRC_CMP #cpu_cmp = 1 From f814c3f6f890d68d5c97256080ba522e31f6ce44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9s=20Su=C3=A1rez?= Date: Wed, 30 Jul 2014 09:24:59 -0500 Subject: [PATCH 0029/1110] fix libretro-super build fix libretro-super build --- Makefile.libretro | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.libretro b/Makefile.libretro index e6229d51..96e4fe91 100644 --- a/Makefile.libretro +++ b/Makefile.libretro @@ -126,7 +126,7 @@ ifneq (,$(findstring armasm,$(platform))) endif ARCH = arm else - TARGET := $(TARGET_NAME)_retro.dll + TARGET := $(TARGET_NAME)_libretro.dll CC = gcc LD_FLAGS := -fPIC SHARED := -shared -static-libgcc -static-libstdc++ From 1dfbe19da9d3e1be38522d07985682f6fae1e669 Mon Sep 17 00:00:00 2001 From: EXL Date: Tue, 21 Apr 2015 16:32:16 +0600 Subject: [PATCH 0030/1110] Fix avcodec compile errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mp3_libavcodec.c:141:31: error: ‘CODEC_ID_MP3’ undeclared (first use in this function) mp3_libavcodec.c:141:31: error: type of formal parameter 1 is incomplete --- platform/common/mp3_libavcodec.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/platform/common/mp3_libavcodec.c b/platform/common/mp3_libavcodec.c index 2d3c799c..9a528c1e 100644 --- a/platform/common/mp3_libavcodec.c +++ b/platform/common/mp3_libavcodec.c @@ -16,6 +16,11 @@ #include "../libpicofe/lprintf.h" #include "mp3.h" +#if LIBAVCODEC_VERSION_MAJOR < 55 +#define AVCodecID CodecID +#define AV_CODEC_ID_MP3 CODEC_ID_MP3 +#endif + static AVCodecContext *ctx; /* avoid compile time linking to libavcodec due to huge list of it's deps.. @@ -94,7 +99,7 @@ int mp3dec_decode(FILE *f, int *file_pos, int file_len) int mp3dec_start(FILE *f, int fpos_start) { void (*avcodec_register_all)(void); - AVCodec *(*avcodec_find_decoder)(enum CodecID id); + AVCodec *(*avcodec_find_decoder)(enum AVCodecID id); AVCodecContext *(*avcodec_alloc_context)(void); int (*avcodec_open)(AVCodecContext *avctx, AVCodec *codec); void (*av_free)(void *ptr); @@ -137,8 +142,7 @@ int mp3dec_start(FILE *f, int fpos_start) //avcodec_init(); avcodec_register_all(); - // AV_CODEC_ID_MP3 ? - codec = avcodec_find_decoder(CODEC_ID_MP3); + codec = avcodec_find_decoder(AV_CODEC_ID_MP3); if (codec == NULL) { lprintf("mp3dec: codec missing\n"); return -1; From fbba0ff6a0ce56304c8b6c558dd298d3a03c99d0 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 5 Apr 2014 21:55:02 +0300 Subject: [PATCH 0031/1110] make memcpy32/memset32 take void * --- pico/misc.c | 13 +++++++++---- pico/pico_int.h | 4 ++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/pico/misc.c b/pico/misc.c index f9e85cda..a500ac8c 100644 --- a/pico/misc.c +++ b/pico/misc.c @@ -125,21 +125,26 @@ PICO_INTERNAL_ASM void memcpy16bswap(unsigned short *dest, void *src, int count) } #ifndef _ASM_MISC_C_AMIPS -PICO_INTERNAL_ASM void memcpy32(int *dest, int *src, int count) +PICO_INTERNAL_ASM void memcpy32(void *dest_in, const void *src_in, int count) { - intblock *bd = (intblock *) dest, *bs = (intblock *) src; + const intblock *bs = (intblock *) src_in; + intblock *bd = (intblock *) dest_in; + const int *src; + int *dest; for (; count >= sizeof(*bd)/4; count -= sizeof(*bd)/4) *bd++ = *bs++; - dest = (int *)bd; src = (int *)bs; + dest = (int *)bd; src = (const int *)bs; while (count--) *dest++ = *src++; } -PICO_INTERNAL_ASM void memset32(int *dest, int c, int count) +PICO_INTERNAL_ASM void memset32(void *dest_in, int c, int count) { + int *dest = dest_in; + for (; count >= 8; count -= 8, dest += 8) dest[0] = dest[1] = dest[2] = dest[3] = dest[4] = dest[5] = dest[6] = dest[7] = c; diff --git a/pico/pico_int.h b/pico/pico_int.h index 7447db94..c85319c0 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -770,8 +770,8 @@ extern int (*PicoDmaHook)(unsigned int source, int len, unsigned short **srcp, u // misc.c PICO_INTERNAL_ASM void memcpy16(unsigned short *dest, unsigned short *src, int count); PICO_INTERNAL_ASM void memcpy16bswap(unsigned short *dest, void *src, int count); -PICO_INTERNAL_ASM void memcpy32(int *dest, int *src, int count); // 32bit word count -PICO_INTERNAL_ASM void memset32(int *dest, int c, int count); +PICO_INTERNAL_ASM void memcpy32(void *dest, const void *src, int count); // 32bit word count +PICO_INTERNAL_ASM void memset32(void *dest, int c, int count); // eeprom.c void EEPROM_write8(unsigned int a, unsigned int d); From 9ddede0990ff21df51b72d7eb51b4b3a214afbad Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 3 Dec 2014 01:20:46 +0200 Subject: [PATCH 0032/1110] update to newer cyclone and libpicofe --- cpu/cyclone | 2 +- cpu/cyclone_config.h | 1 + platform/libpicofe | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/cpu/cyclone b/cpu/cyclone index 590d780f..355815eb 160000 --- a/cpu/cyclone +++ b/cpu/cyclone @@ -1 +1 @@ -Subproject commit 590d780f20871b29fdc803bd2c74b046fd2d0f28 +Subproject commit 355815ebb5b1c60916f66d043a43f3af7839589f diff --git a/cpu/cyclone_config.h b/cpu/cyclone_config.h index 13e2c5ea..ed3b257e 100644 --- a/cpu/cyclone_config.h +++ b/cpu/cyclone_config.h @@ -8,6 +8,7 @@ **/ +#define HAVE_ARMv6 1 #define USE_MS_SYNTAX 0 #define CYCLONE_FOR_GENESIS 2 #define COMPRESS_JUMPTABLE 0 diff --git a/platform/libpicofe b/platform/libpicofe index 8b4363e3..d1453cf7 160000 --- a/platform/libpicofe +++ b/platform/libpicofe @@ -1 +1 @@ -Subproject commit 8b4363e302e6bbcf41321ec70f8c033efed5840d +Subproject commit d1453cf7e6d5d6758cc5d72c6d3af7d37156bf72 From 0bc48495d3d3e00928d89ab35f95506262d60f92 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 29 Aug 2015 02:19:38 +0300 Subject: [PATCH 0033/1110] another try to reduce compiler problems with fame also update libpicofe --- Makefile | 4 ++++ platform/libpicofe | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d31779cc..8e2feee1 100644 --- a/Makefile +++ b/Makefile @@ -197,6 +197,10 @@ pico/cd/pcm.o: CFLAGS += -fno-strict-aliasing pico/cd/LC89510.o: CFLAGS += -fno-strict-aliasing pico/cd/gfx_cd.o: CFLAGS += -fno-strict-aliasing +# fame needs ~2GB of RAM to compile on gcc 4.8 +# on x86, this is reduced by ~300MB when debug info is off (but not on ARM) +cpu/fame/famec.o: CFLAGS += -g0 + # random deps pico/carthw/svp/compiler.o : cpu/drc/emit_$(ARCH).c cpu/sh2/compiler.o : cpu/drc/emit_$(ARCH).c diff --git a/platform/libpicofe b/platform/libpicofe index d1453cf7..0415ebf1 160000 --- a/platform/libpicofe +++ b/platform/libpicofe @@ -1 +1 @@ -Subproject commit d1453cf7e6d5d6758cc5d72c6d3af7d37156bf72 +Subproject commit 0415ebf191d0d277d1dfba1bebe6f051a5fc89a8 From 05eb243d030a0a999cdbc660f80cf219a76d2049 Mon Sep 17 00:00:00 2001 From: Chips-fr Date: Sun, 25 Oct 2015 18:19:48 +0100 Subject: [PATCH 0034/1110] Enabe JIT for linux arm platform(Rpi...) --- platform/linux/emu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/platform/linux/emu.c b/platform/linux/emu.c index c22f4fd0..91115ad6 100644 --- a/platform/linux/emu.c +++ b/platform/linux/emu.c @@ -31,7 +31,9 @@ void pemu_validate_config(void) { extern int PicoOpt; // PicoOpt &= ~POPT_EXT_FM; +#ifndef __arm__ PicoOpt &= ~POPT_EN_DRC; +#endif } static void draw_cd_leds(void) From 8450a2f56172b22bf2b7e871f75ab8b8b4cd2732 Mon Sep 17 00:00:00 2001 From: Chips-fr Date: Wed, 20 Jan 2016 20:34:46 +0100 Subject: [PATCH 0035/1110] Add Rpi1 and Rpi2 support with GLES upscaling --- Makefile | 14 +++++++++++++- README | 6 ++++++ configure | 21 +++++++++++++++++++-- 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 8e2feee1..eecb7687 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ TARGET ?= PicoDrive CFLAGS += -Wall -ggdb -falign-functions=2 CFLAGS += -I. ifndef DEBUG -CFLAGS += -O2 -DNDEBUG -ffunction-sections +CFLAGS += -O3 -DNDEBUG -ffunction-sections ifeq ($(findstring clang,$(CC)),) LDFLAGS += -Wl,--gc-sections endif @@ -73,6 +73,18 @@ OBJS += platform/opendingux/inputmap.o # OpenDingux is a generic platform, really. PLATFORM := generic endif +ifeq ("$(PLATFORM)",$(filter "$(PLATFORM)","rpi1" "rpi2")) +CFLAGS += -DHAVE_GLES -DRASPBERRY +CFLAGS += -I/opt/vc/include/ -I/opt/vc/include/interface/vcos/pthreads/ -I/opt/vc/include/interface/vmcs_host/linux/ +LDFLAGS += -ldl -lbcm_host -L/opt/vc/lib -lEGL -lGLESv2 +OBJS += platform/linux/emu.o platform/linux/blit.o # FIXME +OBJS += platform/common/plat_sdl.o +OBJS += platform/libpicofe/plat_sdl.o platform/libpicofe/in_sdl.o +OBJS += platform/libpicofe/plat_dummy.o +OBJS += platform/libpicofe/gl.o +OBJS += platform/libpicofe/gl_platform.o +USE_FRONTEND = 1 +endif ifeq "$(PLATFORM)" "generic" OBJS += platform/linux/emu.o platform/linux/blit.o # FIXME OBJS += platform/common/plat_sdl.o diff --git a/README b/README index fbcecc13..3254fe21 100644 --- a/README +++ b/README @@ -13,3 +13,9 @@ then taken over and expanded by notaz. PicoDrive was the first emulator ever to properly emulate Virtua Racing and it's SVP chip. +How to compile on Raspbian Wheezy: + +export CC=gcc-4.8 +export CXX=g++-4.8 +./configure --platform=rpi2 +make \ No newline at end of file diff --git a/configure b/configure index 96cea7e2..6cc42545 100755 --- a/configure +++ b/configure @@ -31,7 +31,7 @@ check_define() # setting options to "yes" or "no" will make that choice default, # "" means "autodetect". -platform_list="generic pandora gp2x opendingux" +platform_list="generic pandora gp2x opendingux rpi1 rpi2" platform="generic" sound_driver_list="oss alsa sdl" sound_drivers="" @@ -44,6 +44,8 @@ need_sdl="no" need_xlib="no" # these are for known platforms optimize_cortexa8="no" +optimize_cortexa7="no" +optimize_arm1176jzf="no" optimize_arm926ej="no" optimize_arm920="no" @@ -67,6 +69,13 @@ set_platform() { platform=$1 case "$platform" in + rpi1) + optimize_arm1176jzf="yes" + ;; + rpi2) + optimize_cortexa7="yes" + have_arm_neon="yes" + ;; generic) ;; opendingux) @@ -159,6 +168,14 @@ arm*) CFLAGS="$CFLAGS -mcpu=cortex-a8 -mtune=cortex-a8" ASFLAGS="$ASFLAGS -mcpu=cortex-a8" fi + if [ "$optimize_cortexa7" = "yes" ]; then + CFLAGS="$CFLAGS -mcpu=cortex-a7" + ASFLAGS="$ASFLAGS -mcpu=cortex-a7" + fi + if [ "$optimize_arm1176jzf" = "yes" ]; then + CFLAGS="$CFLAGS -mcpu=arm1176jzf-s -mfloat-abi=hard" + ASFLAGS="$ASFLAGS -mcpu=arm1176jzf-s -mfloat-abi=hard" + fi if [ "$optimize_arm926ej" = "yes" ]; then CFLAGS="$CFLAGS -mcpu=arm926ej-s -mtune=arm926ej-s" ASFLAGS="$ASFLAGS -mcpu=arm926ej-s -mfloat-abi=softfp" @@ -229,7 +246,7 @@ arm*) esac case "$platform" in -generic | opendingux) +rpi1 | rpi2 | generic | opendingux) need_sdl="yes" ;; esac From 0270424fac34be5b63fb37caaaca60e378365d81 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 27 Jan 2016 02:11:16 +0200 Subject: [PATCH 0036/1110] adjust famec flags after O2 -> O3 change --- Makefile | 4 +++- README | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index eecb7687..70ce193a 100644 --- a/Makefile +++ b/Makefile @@ -211,7 +211,9 @@ pico/cd/gfx_cd.o: CFLAGS += -fno-strict-aliasing # fame needs ~2GB of RAM to compile on gcc 4.8 # on x86, this is reduced by ~300MB when debug info is off (but not on ARM) -cpu/fame/famec.o: CFLAGS += -g0 +# not using O3 and -fno-expensive-optimizations seems to also help, but you may +# want to remove this stuff for better performance if your compiler can handle it +cpu/fame/famec.o: CFLAGS += -g0 -O2 -fno-expensive-optimizations # random deps pico/carthw/svp/compiler.o : cpu/drc/emit_$(ARCH).c diff --git a/README b/README index 3254fe21..d9c10910 100644 --- a/README +++ b/README @@ -18,4 +18,4 @@ How to compile on Raspbian Wheezy: export CC=gcc-4.8 export CXX=g++-4.8 ./configure --platform=rpi2 -make \ No newline at end of file +make From b4c2331e793ba8ca719fc540f6f2eb1b9e53a0e6 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 17 Apr 2017 00:30:30 +0300 Subject: [PATCH 0037/1110] get rid of the silly unzip_stream ... and just use zlib's inflate. Needed for the next commit. --- Makefile | 2 +- pico/cart.c | 135 +++++++++++++++++++++++--------- unzip/unzip_stream.c | 178 ------------------------------------------- unzip/unzip_stream.h | 4 - 4 files changed, 101 insertions(+), 218 deletions(-) delete mode 100644 unzip/unzip_stream.c delete mode 100644 unzip/unzip_stream.h diff --git a/Makefile b/Makefile index 70ce193a..0d37237c 100644 --- a/Makefile +++ b/Makefile @@ -168,7 +168,7 @@ endif OBJS += zlib/gzio.o zlib/inffast.o zlib/inflate.o zlib/inftrees.o zlib/trees.o \ zlib/deflate.o zlib/crc32.o zlib/adler32.o zlib/zutil.o zlib/compress.o zlib/uncompr.o # unzip -OBJS += unzip/unzip.o unzip/unzip_stream.o +OBJS += unzip/unzip.o include platform/common/common.mak diff --git a/pico/cart.c b/pico/cart.c index a5c563d6..377ecbc2 100644 --- a/pico/cart.c +++ b/pico/cart.c @@ -11,7 +11,6 @@ #include "../zlib/zlib.h" #include "../cpu/debug.h" #include "../unzip/unzip.h" -#include "../unzip/unzip_stream.h" static int rom_alloc_size; @@ -90,6 +89,16 @@ static const char *get_ext(const char *path) return ext; } +struct zip_file { + pm_file file; + ZIP *zip; + struct zipent *entry; + z_stream stream; + unsigned char inbuf[16384]; + long start; + unsigned int pos; +}; + pm_file *pm_open(const char *path) { pm_file *file = NULL; @@ -102,10 +111,10 @@ pm_file *pm_open(const char *path) ext = get_ext(path); if (strcasecmp(ext, "zip") == 0) { + struct zip_file *zfile = NULL; struct zipent *zipentry; - gzFile gzf = NULL; ZIP *zipfile; - int i; + int i, ret; zipfile = openzip(path); if (zipfile != NULL) @@ -127,25 +136,29 @@ pm_file *pm_open(const char *path) goto zip_failed; found_rom_zip: - /* try to convert to gzip stream, so we could use standard gzio functions from zlib */ - gzf = zip2gz(zipfile, zipentry); - if (gzf == NULL) goto zip_failed; - - file = calloc(1, sizeof(*file)); - if (file == NULL) goto zip_failed; - file->file = zipfile; - file->param = gzf; - file->size = zipentry->uncompressed_size; - file->type = PMT_ZIP; - strncpy(file->ext, ext, sizeof(file->ext) - 1); - return file; + zfile = calloc(1, sizeof(*zfile)); + if (zfile == NULL) + goto zip_failed; + ret = seekcompresszip(zipfile, zipentry); + if (ret != 0) + goto zip_failed; + ret = inflateInit2(&zfile->stream, -15); + if (ret != Z_OK) { + elprintf(EL_STATUS, "zip: inflateInit2 %d", ret); + goto zip_failed; + } + zfile->zip = zipfile; + zfile->entry = zipentry; + zfile->start = ftell(zipfile->fp); + zfile->file.file = zfile; + zfile->file.size = zipentry->uncompressed_size; + zfile->file.type = PMT_ZIP; + strncpy(zfile->file.ext, ext, sizeof(zfile->file.ext) - 1); + return &zfile->file; zip_failed: - if (gzf) { - gzclose(gzf); - zipfile->fp = NULL; // gzclose() closed it - } closezip(zipfile); + free(zfile); return NULL; } } @@ -246,13 +259,33 @@ size_t pm_read(void *ptr, size_t bytes, pm_file *stream) } else if (stream->type == PMT_ZIP) { - gzFile gf = stream->param; - int err; - ret = gzread(gf, ptr, bytes); - err = gzerror2(gf); - if (ret > 0 && (err == Z_DATA_ERROR || err == Z_STREAM_END)) - /* we must reset stream pointer or else next seek/read fails */ - gzrewind(gf); + struct zip_file *z = stream->file; + + if (z->entry->compression_method == 0) { + int ret = fread(ptr, 1, bytes, z->zip->fp); + z->pos += ret; + return ret; + } + + z->stream.next_out = ptr; + z->stream.avail_out = bytes; + while (z->stream.avail_out != 0) { + if (z->stream.avail_in == 0) { + z->stream.avail_in = fread(z->inbuf, 1, sizeof(z->inbuf), z->zip->fp); + if (z->stream.avail_in == 0) + break; + z->stream.next_in = z->inbuf; + } + ret = inflate(&z->stream, Z_NO_FLUSH); + if (ret == Z_STREAM_END) + break; + if (ret != Z_OK) { + elprintf(EL_STATUS, "zip: inflate: %d", ret); + return 0; + } + } + z->pos += bytes - z->stream.avail_out; + return bytes - z->stream.avail_out; } else if (stream->type == PMT_CSO) { @@ -331,12 +364,45 @@ int pm_seek(pm_file *stream, long offset, int whence) } else if (stream->type == PMT_ZIP) { - if (PicoMessage != NULL && offset > 6*1024*1024) { - long pos = gztell((gzFile) stream->param); - if (offset < pos || offset - pos > 6*1024*1024) - PicoMessage("Decompressing data..."); + struct zip_file *z = stream->file; + unsigned int pos = z->pos; + int ret; + + switch (whence) + { + case SEEK_CUR: pos += offset; break; + case SEEK_SET: pos = offset; break; + case SEEK_END: pos = stream->size - offset; break; } - return gzseek((gzFile) stream->param, offset, whence); + if (z->entry->compression_method == 0) { + ret = fseek(z->zip->fp, z->start + pos, SEEK_SET); + if (ret == 0) + return (z->pos = pos); + return -1; + } + offset = pos - z->pos; + if (pos < z->pos) { + // full decompress from the start + fseek(z->zip->fp, z->start, SEEK_SET); + z->stream.avail_in = 0; + z->stream.next_in = z->inbuf; + inflateReset(&z->stream); + z->pos = 0; + offset = pos; + } + + if (PicoMessage != NULL && offset > 4 * 1024 * 1024) + PicoMessage("Decompressing data..."); + + while (offset > 0) { + char buf[16 * 1024]; + size_t l = offset > sizeof(buf) ? sizeof(buf) : offset; + ret = pm_read(buf, l, stream); + if (ret != l) + break; + offset -= l; + } + return z->pos; } else if (stream->type == PMT_CSO) { @@ -365,10 +431,9 @@ int pm_close(pm_file *fp) } else if (fp->type == PMT_ZIP) { - ZIP *zipfile = fp->file; - gzclose((gzFile) fp->param); - zipfile->fp = NULL; // gzclose() closed it - closezip(zipfile); + struct zip_file *z = fp->file; + inflateEnd(&z->stream); + closezip(z->zip); } else if (fp->type == PMT_CSO) { diff --git a/unzip/unzip_stream.c b/unzip/unzip_stream.c deleted file mode 100644 index 7f3e9354..00000000 --- a/unzip/unzip_stream.c +++ /dev/null @@ -1,178 +0,0 @@ -/* seekable zip */ - -#include "unzip.h" - -#include -#include -#include - -#include "zlib/zlib.h" - - -#define errormsg(str1,def,fname) printf("%s: " #def ": " str1 "\n", fname); - - -/* from gzio.c . Be careful with binary compatibility */ -typedef struct gz_stream { - z_stream stream; - int z_err; /* error code for last stream operation */ - int z_eof; /* set if end of input file */ - FILE *file; /* .gz file */ - Byte *inbuf; /* input buffer */ - Byte *outbuf; /* output buffer */ - uLong crc; /* crc32 of uncompressed data */ - char *msg; /* error message */ - char *path; /* path name for debugging only */ - int transparent; /* 1 if input file is not a .gz file */ - char mode; /* 'w' or 'r' */ - z_off_t start; /* start of compressed data in file (header skipped) */ - z_off_t in; /* bytes into deflate or inflate */ - z_off_t out; /* bytes out of deflate or inflate */ - int back; /* one character push-back */ - int last; /* true if push-back is last character */ -} gz_stream; - -#ifndef Z_BUFSIZE -# ifdef MAXSEG_64K -# define Z_BUFSIZE 4096 /* minimize memory usage for 16-bit DOS */ -# else -# define Z_BUFSIZE 16384 -# endif -#endif -#ifndef Z_PRINTF_BUFSIZE -# define Z_PRINTF_BUFSIZE 4096 -#endif - -#define ALLOC(size) malloc(size) - -int destroy OF((gz_stream *s)); - - -gzFile zip2gz(ZIP* zip, struct zipent* ent) -{ - int err; - gz_stream *s; - const char *path; - int transparent = 0; - uInt len; - - if (!zip || !ent) - return NULL; - - /* zip stuff */ - if (ent->compression_method == 0x0000) - { - /* file is not compressed, simply stored */ - - /* check if size are equal */ - if (ent->compressed_size != ent->uncompressed_size) { - errormsg("Wrong uncompressed size in store compression", ERROR_CORRUPT,zip->zip); - return NULL; - } - - transparent = 1; - } - else if (ent->compression_method == 0x0008) - { - /* file is compressed using "Deflate" method */ - if (ent->version_needed_to_extract > 0x14) { - errormsg("Version too new", ERROR_UNSUPPORTED,zip->zip); - return NULL; - } - - if (ent->os_needed_to_extract != 0x00) { - errormsg("OS not supported", ERROR_UNSUPPORTED,zip->zip); - return NULL; - } - - if (ent->disk_number_start != zip->number_of_this_disk) { - errormsg("Cannot span disks", ERROR_UNSUPPORTED,zip->zip); - return NULL; - } - - } else { - errormsg("Compression method unsupported", ERROR_UNSUPPORTED, zip->zip); - return NULL; - } - - /* seek to compressed data */ - if (seekcompresszip(zip,ent) != 0) { - return NULL; - } - - path = zip->zip; - - /* normal gzip init for read */ - s = (gz_stream *)ALLOC(sizeof(gz_stream)); - if (!s) return Z_NULL; - - s->stream.zalloc = (alloc_func)0; - s->stream.zfree = (free_func)0; - s->stream.opaque = (voidpf)0; - s->stream.next_in = s->inbuf = Z_NULL; - s->stream.next_out = s->outbuf = Z_NULL; - s->stream.avail_in = s->stream.avail_out = 0; - s->file = NULL; - s->z_err = Z_OK; - s->z_eof = 0; - s->in = 0; - s->out = 0; - s->back = EOF; - s->crc = crc32(0L, Z_NULL, 0); - s->msg = NULL; - s->transparent = transparent; - s->mode = 'r'; - - s->path = (char*)ALLOC(strlen(path)+1); - if (s->path == NULL) { - return destroy(s), (gzFile)Z_NULL; - } - strcpy(s->path, path); /* do this early for debugging */ - - s->stream.next_in = s->inbuf = (Byte*)ALLOC(Z_BUFSIZE); - - err = inflateInit2(&(s->stream), -MAX_WBITS); - /* windowBits is passed < 0 to tell that there is no zlib header. - * Note that in this case inflate *requires* an extra "dummy" byte - * after the compressed stream in order to complete decompression and - * return Z_STREAM_END. Here the gzip CRC32 ensures that 4 bytes are - * present after the compressed stream. - */ - if (err != Z_OK || s->inbuf == Z_NULL) { - return destroy(s), (gzFile)Z_NULL; - } - s->stream.avail_out = Z_BUFSIZE; - - errno = 0; - s->file = zip->fp; - if (s->file == NULL) { - return destroy(s), (gzFile)Z_NULL; - } - - /* check_header(s); */ - errno = 0; - len = (uInt)fread(s->inbuf, 1, Z_BUFSIZE, s->file); - if (len == 0 && ferror(s->file)) s->z_err = Z_ERRNO; - s->stream.avail_in += len; - s->stream.next_in = s->inbuf; - if (s->stream.avail_in < 2) { - return destroy(s), (gzFile)Z_NULL; - } - - s->start = ftell(s->file) - s->stream.avail_in; - - return (gzFile)s; -} - - -int gzerror2(gzFile file) -{ - gz_stream *s = (gz_stream*)file; - - if (s == NULL) - return Z_STREAM_ERROR; - - return s->z_err; -} - - diff --git a/unzip/unzip_stream.h b/unzip/unzip_stream.h deleted file mode 100644 index 59ff2faf..00000000 --- a/unzip/unzip_stream.h +++ /dev/null @@ -1,4 +0,0 @@ - -gzFile zip2gz(ZIP* zip, struct zipent* ent); -int gzerror2(gzFile file); - From 325ee167fd00e070f0e5ab6319880aa3ecbc3062 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 17 Apr 2017 00:34:00 +0300 Subject: [PATCH 0038/1110] use system's zlib except for libretro github issue #45 --- Makefile | 3 +++ configure | 6 +++--- pico/cart.c | 2 +- pico/state.c | 2 +- platform/gizmondo/menu.c | 2 +- platform/gp2x/emu.c | 2 +- platform/psp/menu.c | 2 +- unzip/unzip.c | 2 +- 8 files changed, 12 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index 0d37237c..30b246fd 100644 --- a/Makefile +++ b/Makefile @@ -164,9 +164,12 @@ else OBJS += platform/common/mp3_dummy.o endif +ifeq "$(PLATFORM)" "libretro" # zlib OBJS += zlib/gzio.o zlib/inffast.o zlib/inflate.o zlib/inftrees.o zlib/trees.o \ zlib/deflate.o zlib/crc32.o zlib/adler32.o zlib/zutil.o zlib/compress.o zlib/uncompr.o +CFLAGS += -Izlib +endif # unzip OBJS += unzip/unzip.o diff --git a/configure b/configure index 6cc42545..35130cb8 100755 --- a/configure +++ b/configure @@ -258,7 +258,7 @@ check_zlib() #include int main(void) { uncompress(0, 0, 0, 0); } EOF - compile_binary + compile_binary "$@" } check_libpng() @@ -308,8 +308,8 @@ EOF compile_object "$@" } -#MAIN_LDLIBS="$MAIN_LDLIBS -lz" -#check_zlib || fail "please install zlib (libz-dev)" +MAIN_LDLIBS="$MAIN_LDLIBS -lz" +check_zlib -lz || fail "please install zlib (libz-dev)" MAIN_LDLIBS="-lpng $MAIN_LDLIBS" check_libpng || fail "please install libpng (libpng-dev)" diff --git a/pico/cart.c b/pico/cart.c index 377ecbc2..bb452c0b 100644 --- a/pico/cart.c +++ b/pico/cart.c @@ -8,9 +8,9 @@ */ #include "pico_int.h" -#include "../zlib/zlib.h" #include "../cpu/debug.h" #include "../unzip/unzip.h" +#include static int rom_alloc_size; diff --git a/pico/state.c b/pico/state.c index 089957eb..88b8655c 100644 --- a/pico/state.c +++ b/pico/state.c @@ -7,7 +7,7 @@ */ #include "pico_int.h" -#include +#include #include "../cpu/sh2/sh2.h" #include "sound/ym2612.h" diff --git a/platform/gizmondo/menu.c b/platform/gizmondo/menu.c index d4f6bd49..dd6740a3 100644 --- a/platform/gizmondo/menu.c +++ b/platform/gizmondo/menu.c @@ -32,7 +32,7 @@ #include #include -#include +#include #define gizKeyUnkn "???" diff --git a/platform/gp2x/emu.c b/platform/gp2x/emu.c index 244f15d9..7741ba60 100644 --- a/platform/gp2x/emu.c +++ b/platform/gp2x/emu.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #ifdef BENCHMARK #define OSD_FPS_X 220 diff --git a/platform/psp/menu.c b/platform/psp/menu.c index 24c6d606..f01f0ae5 100644 --- a/platform/psp/menu.c +++ b/platform/psp/menu.c @@ -33,7 +33,7 @@ #include #include -#include +#include #define pspKeyUnkn "???" diff --git a/unzip/unzip.c b/unzip/unzip.c index ddf59aa6..10291652 100644 --- a/unzip/unzip.c +++ b/unzip/unzip.c @@ -5,7 +5,7 @@ #include #include -#include "zlib/zlib.h" +#include /* public globals */ //int gUnzipQuiet = 0; /* flag controls error messages */ From 01685eefb022ab2d13cc154a43807e9d20c77c03 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 17 Apr 2017 00:36:30 +0300 Subject: [PATCH 0039/1110] fix broken error path --- pico/cart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pico/cart.c b/pico/cart.c index bb452c0b..d091d623 100644 --- a/pico/cart.c +++ b/pico/cart.c @@ -569,7 +569,7 @@ int PicoCartLoad(pm_file *f,unsigned char **prom,unsigned int *psize,int is_sms) bytes_read = pm_read(rom,size,f); // Load up the rom if (bytes_read <= 0) { elprintf(EL_STATUS, "read failed"); - free(rom); + plat_munmap(rom, rom_alloc_size); return 3; } From 0e352905c7aa80b166933970abbcecfce96ad64e Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 17 Apr 2017 00:36:57 +0300 Subject: [PATCH 0040/1110] fix some warnings warning: array subscript is above array bounds [-Warray-bounds] --- pico/carthw/svp/ssp16.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pico/carthw/svp/ssp16.c b/pico/carthw/svp/ssp16.c index e8de0013..2fb40015 100644 --- a/pico/carthw/svp/ssp16.c +++ b/pico/carthw/svp/ssp16.c @@ -474,6 +474,8 @@ static int get_inc(int mode) static u32 pm_io(int reg, int write, u32 d) { + unsigned int *pmac; + if (ssp->emu_status & SSP_PMC_SET) { // this MUST be blind r or w @@ -484,7 +486,8 @@ static u32 pm_io(int reg, int write, u32 d) return 0; } elprintf(EL_SVP, "PM%i (%c) set to %08x @ %04x", reg, write ? 'w' : 'r', rPMC.v, GET_PPC_OFFS()); - ssp->pmac_read[write ? reg + 6 : reg] = rPMC.v; + pmac = write ? ssp->pmac_write : ssp->pmac_read; + pmac[reg] = rPMC.v; ssp->emu_status &= ~SSP_PMC_SET; if ((rPMC.v & 0x7fffff) == 0x1c8000 || (rPMC.v & 0x7fffff) == 0x1c8240) { elprintf(EL_SVP, "ssp IRAM copy from %06x to %04x", (ssp->RAM1[0]-1)<<1, (rPMC.v&0x7fff)<<1); @@ -573,7 +576,8 @@ static u32 pm_io(int reg, int write, u32 d) } // PMC value corresponds to last PMR accessed (not sure). - rPMC.v = ssp->pmac_read[write ? reg + 6 : reg]; + pmac = write ? ssp->pmac_write : ssp->pmac_read; + rPMC.v = pmac[reg]; return d; } From ad949800e42b25e498d58ff73e656df4c0bc2c6d Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 4 Aug 2017 00:36:13 +0300 Subject: [PATCH 0041/1110] fix a name clash with newer zlib --- pico/cart.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pico/cart.c b/pico/cart.c index d091d623..1ce80597 100644 --- a/pico/cart.c +++ b/pico/cart.c @@ -49,7 +49,7 @@ typedef struct _cso_struct } cso_struct; -static int uncompress2(void *dest, int destLen, void *source, int sourceLen) +static int uncompress_buf(void *dest, int destLen, void *source, int sourceLen) { z_stream stream; int err; @@ -326,7 +326,7 @@ size_t pm_read(void *ptr, size_t bytes, pm_file *stream) } cso->block_in_buff = block; } - rret = uncompress2(tmp_dst, 2048, cso->in_buff, read_len); + rret = uncompress_buf(tmp_dst, 2048, cso->in_buff, read_len); if (rret != 0) { elprintf(EL_STATUS, "cso: uncompress failed @ %08x with %i", read_pos, rret); break; From 14caeebcc7fc4a73731eec05c1c6150fa29d1374 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 4 Aug 2017 01:08:17 +0300 Subject: [PATCH 0042/1110] fix android build --- jni/Android.mk | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/jni/Android.mk b/jni/Android.mk index 122b18e8..1b8b9563 100644 --- a/jni/Android.mk +++ b/jni/Android.mk @@ -52,6 +52,9 @@ else use_sh2mame = 1 endif +# PD is currently not strict aliasing safe +LOCAL_CFLAGS += -fno-strict-aliasing + # sources SRCS_COMMON := DEFINES := @@ -69,7 +72,7 @@ LOCAL_SRC_FILES += $(R)zlib/gzio.c $(R)zlib/inffast.c $(R)zlib/inflate.c \ $(R)zlib/crc32.c $(R)zlib/adler32.c $(R)zlib/zutil.c \ $(R)zlib/compress.c $(R)zlib/uncompr.c -LOCAL_SRC_FILES += $(R)unzip/unzip.c $(R)unzip/unzip_stream.c +LOCAL_SRC_FILES += $(R)unzip/unzip.c LOCAL_C_INCLUDES += $(R) From c5c5dd7132a11b1774a4e5ebbf68eb72bf8764a7 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 6 Aug 2017 01:48:25 +0300 Subject: [PATCH 0043/1110] cd: fix missed null terminator --- pico/cd/cd_image.c | 1 + 1 file changed, 1 insertion(+) diff --git a/pico/cd/cd_image.c b/pico/cd/cd_image.c index 07b55ceb..ae2ef0cd 100644 --- a/pico/cd/cd_image.c +++ b/pico/cd/cd_image.c @@ -61,6 +61,7 @@ static void to_upper(char *d, const char *s) else *d = *s; } + *d = 0; } // cdd.c uses lba - 150 From bc38f4d24fcdbbefbda10ef023c0ffbab4b208b8 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 6 Aug 2017 02:05:09 +0300 Subject: [PATCH 0044/1110] remove some dead code --- pico/draw.c | 40 ----------------- pico/draw_arm.S | 113 ------------------------------------------------ 2 files changed, 153 deletions(-) diff --git a/pico/draw.c b/pico/draw.c index a03d8873..ca9a140b 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -1197,46 +1197,6 @@ void PicoDoHighPal555(int sh) } } -#if 0 -static void FinalizeLineBGR444(int sh, int line) -{ - unsigned short *pd=DrawLineDest; - unsigned char *ps=HighCol+8; - unsigned short *pal=Pico.cram; - int len, i, t, mask=0xff; - - if (Pico.video.reg[12]&1) { - len = 320; - } else { - if(!(PicoOpt&POPT_DIS_32C_BORDER)) pd+=32; - len = 256; - } - - if(sh) { - pal=HighPal; - if(Pico.m.dirtyPal) { - blockcpy(pal, Pico.cram, 0x40*2); - // shadowed pixels - for(i = 0x3f; i >= 0; i--) - pal[0x40|i] = pal[0xc0|i] = (unsigned short)((pal[i]>>1)&0x0777); - // hilighted pixels - for(i = 0x3f; i >= 0; i--) { - t=pal[i]&0xeee;t+=0x444;if(t&0x10)t|=0xe;if(t&0x100)t|=0xe0;if(t&0x1000)t|=0xe00;t&=0xeee; - pal[0x80|i]=(unsigned short)t; - } - Pico.m.dirtyPal = 0; - } - } - - if (!sh && (rendstatus & PDRAW_SPR_LO_ON_HI)) - mask=0x3f; // accurate sprites - - for(i = 0; i < len; i++) - pd[i] = pal[ps[i] & mask]; -} -#endif - - void FinalizeLine555(int sh, int line) { unsigned short *pd=DrawLineDest; diff --git a/pico/draw_arm.S b/pico/draw_arm.S index 0bf297b9..bbdda5b0 100644 --- a/pico/draw_arm.S +++ b/pico/draw_arm.S @@ -1479,119 +1479,6 @@ DrawWindow: .endm -.global FinalizeLineBGR444 @ int sh - -FinalizeLineBGR444: - stmfd sp!, {r4-r6,lr} - mov r6, r0 - ldr lr, =(Pico+0x22228) @ Pico.video - ldr r0, =DrawLineDest - ldrb r12, [lr, #12] - ldr r0, [r0] - sub r3, lr, #0x128 @ r3=Pico.cram - - tst r12, #1 - movne r2, #320/4 @ len - bne .fl_no32colBGR444 - ldr r4, =PicoOpt - mov r2, #256/4 - ldr r4, [r4] - tst r4, #0x100 - addeq r0, r0, #32*2 - -.fl_no32colBGR444: - tst r6, r6 - beq .fl_noshBGR444 - - ldr r4, =HighPal - - ldrb r12, [lr, #-0x1a] @ 0x2220e ~ dirtyPal - tst r12, r12 - moveq r3, r4 - beq .fl_noshBGR444 - mov r12, #0 - strb r12, [lr, #-0x1a] - - mov lr, #0x40/8 - @ copy pal: -.fl_loopcpBGR444: - ldmia r3!, {r1,r5,r6,r12} - subs lr, lr, #1 - stmia r4!, {r1,r5,r6,r12} - bne .fl_loopcpBGR444 - - @ shadowed pixels: - mov r12, #0x0077 - orr r12,r12,#0x0700 - orr r12,r12,r12,lsl #16 - sub r3, r3, #0x40*2 - add r5, r4, #0x80*2 - mov lr, #0x40/4 -.fl_loopcpBGR444_sh: - ldmia r3!, {r1,r6} - subs lr, lr, #1 - and r1, r12, r1, lsr #1 - and r6, r12, r6, lsr #1 - stmia r4!, {r1,r6} - stmia r5!, {r1,r6} - bne .fl_loopcpBGR444_sh - - @ hilighted pixels: - sub r3, r3, #0x40*2 - mov lr, #0x40/2 -.fl_loopcpBGR444_hi: - ldr r1, [r3], #4 - TileDoShHi2Pixels444 r1 - str r1, [r4], #4 - subs lr, lr, #1 - bne .fl_loopcpBGR444_hi - - sub r3, r4, #0x40*3*2 - mov r6, #1 - - -.fl_noshBGR444: - ldr r12,=rendstatus - eors r6, r6, #1 @ sh is 0 - ldr r12,[r12] - mov lr, #0xff - tstne r12,#PDRAW_ACC_SPRITES - -.if OVERRIDE_HIGHCOL - ldr r1, =HighCol - movne lr, #0x3f - ldr r1, [r1] - mov lr, lr, lsl #1 - add r1, r1, #8 -.else - ldr r1, =(HighCol+8) - movne lr, #0x3f - mov lr, lr, lsl #1 -.endif - -.fl_loopBGR444: - ldr r12, [r1], #4 - subs r2, r2, #1 - - and r4, lr, r12, lsl #1 - ldrh r4, [r3, r4] - and r5, lr, r12, lsr #7 - ldrh r5, [r3, r5] - and r6, lr, r12, lsr #15 - ldrh r6, [r3, r6] - and r12,lr, r12, lsr #23 - ldrh r12,[r3, r12] @ 1c.i. - orr r4, r4, r5, lsl #16 - orr r5, r6, r12,lsl #16 - - stmia r0!, {r4,r5} - bne .fl_loopBGR444 - - - ldmfd sp!, {r4-r6,lr} - bx lr - - @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ From ea38612fad50103e224a3d00492d40b7dcff9e94 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 6 Aug 2017 02:03:35 +0300 Subject: [PATCH 0045/1110] eliminate texrels (wip) --- pico/32x/draw.c | 4 +- pico/cd/gfx_dma.c | 2 +- pico/debug.c | 2 +- pico/draw.c | 183 ++++++++++++++++++++++++-------------------- pico/draw_arm.S | 182 ++++++++++++++++++++++--------------------- pico/mode4.c | 8 +- pico/pico.c | 3 + pico/pico.h | 6 +- pico/pico_cmn.c | 2 +- pico/pico_int.h | 15 +++- pico/pico_int_o32.h | 5 ++ pico/videoport.c | 12 +-- platform/gp2x/emu.c | 2 +- platform/psp/emu.c | 4 +- tools/Makefile | 5 +- tools/mkoffsets.c | 31 ++++++++ 16 files changed, 268 insertions(+), 198 deletions(-) create mode 100644 pico/pico_int_o32.h create mode 100644 tools/mkoffsets.c diff --git a/pico/32x/draw.c b/pico/32x/draw.c index 66f67a7e..9500e088 100644 --- a/pico/32x/draw.c +++ b/pico/32x/draw.c @@ -83,7 +83,7 @@ static void convert_pal555(int invert_prio) } // this is almost never used (Wiz and menu bg gen only) -void FinalizeLine32xRGB555(int sh, int line) +void FinalizeLine32xRGB555(int sh, int line, struct PicoEState *est) { unsigned short *pd = DrawLineDest; unsigned short *pal = Pico32xMem->pal_native; @@ -91,7 +91,7 @@ void FinalizeLine32xRGB555(int sh, int line) unsigned short *dram, *p32x; unsigned char mdbg; - FinalizeLine555(sh, line); + FinalizeLine555(sh, line, est); if ((Pico32x.vdp_regs[0] & P32XV_Mx) == 0 || // 32x blanking // XXX: how is 32col mode hadled by real hardware? diff --git a/pico/cd/gfx_dma.c b/pico/cd/gfx_dma.c index 44fa7ab2..a24a1c3c 100644 --- a/pico/cd/gfx_dma.c +++ b/pico/cd/gfx_dma.c @@ -37,7 +37,7 @@ PICO_INTERNAL void DmaSlowCell(unsigned int source, unsigned int a, int len, uns // AutoIncrement a=(u16)(a+inc); } - rendstatus |= PDRAW_SPRITES_MOVED; + Pico.est.rendstatus |= PDRAW_SPRITES_MOVED; break; case 3: // cram diff --git a/pico/debug.c b/pico/debug.c index 959331f8..91bff566 100644 --- a/pico/debug.c +++ b/pico/debug.c @@ -203,7 +203,7 @@ void PDebugShowPalette(unsigned short *screen, int stride) if (PicoAHW & PAHW_SMS) PicoDoHighPal555M4(); else - PicoDoHighPal555(1); + PicoDoHighPal555(1, 0, &Pico.est); Pico.m.dirtyPal = 1; screen += 16*stride+8; diff --git a/pico/draw.c b/pico/draw.c index ca9a140b..ff84be57 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -54,9 +54,8 @@ int HighPreSpr[80*2+1]; // slightly preprocessed sprites #define SPRL_LO_ABOVE_HI 0x10 // low priority sprites may be on top of hi unsigned char HighLnSpr[240][3 + MAX_LINE_SPRITES]; // sprite_count, ^flags, tile_count, [spritep]... -int rendstatus, rendstatus_old; +int rendstatus_old; int rendlines; -int DrawScanline; int PicoDrawMask = -1; static int skip_next_line=0; @@ -75,12 +74,15 @@ struct TileStrip // stuff available in asm: #ifdef _ASM_DRAW_C -void DrawWindow(int tstart, int tend, int prio, int sh); -void DrawAllSprites(unsigned char *sprited, int prio, int sh); -void DrawTilesFromCache(int *hc, int sh, int rlim); -void DrawSpritesSHi(unsigned char *sprited); -void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells); -void FinalizeLineBGR444(int sh, int line); +void DrawWindow(int tstart, int tend, int prio, int sh, + struct PicoEState *est); +void DrawAllSprites(unsigned char *sprited, int prio, int sh, + struct PicoEState *est); +void DrawTilesFromCache(int *hc, int sh, int rlim, + struct PicoEState *est); +void DrawSpritesSHi(unsigned char *sprited, struct PicoEState *est); +void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells, + struct PicoEState *est); void *blockcpy(void *dst, const void *src, size_t n); void blockcpy_or(void *dst, void *src, size_t n, int pat); #else @@ -258,7 +260,7 @@ static void DrawStrip(struct TileStrip *ts, int plane_sh, int cellskip) // terminate the cache list *ts->hc = 0; // if oldcode wasn't changed, it means all layer is hi priority - if (oldcode == -1) rendstatus |= PDRAW_PLANE_HI_PRIO; + if (oldcode == -1) Pico.est.rendstatus |= PDRAW_PLANE_HI_PRIO; } // this is messy @@ -266,7 +268,7 @@ void DrawStripVSRam(struct TileStrip *ts, int plane_sh, int cellskip) { int tilex,dx,code=0,addr=0,cell=0; int oldcode=-1,blank=-1; // The tile we know is blank - int pal=0,scan=DrawScanline; + int pal=0,scan=Pico.est.DrawScanline; // Draw tiles across screen: tilex=(-ts->hscroll)>>3; @@ -317,7 +319,7 @@ void DrawStripVSRam(struct TileStrip *ts, int plane_sh, int cellskip) // terminate the cache list *ts->hc = 0; - if (oldcode == -1) rendstatus |= PDRAW_PLANE_HI_PRIO; + if (oldcode == -1) Pico.est.rendstatus |= PDRAW_PLANE_HI_PRIO; } #endif @@ -374,7 +376,8 @@ void DrawStripInterlace(struct TileStrip *ts) // -------------------------------------------- #ifndef _ASM_DRAW_C -static void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells) +static void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells, + struct PicoEState *est) { struct PicoVideo *pvid=&Pico.video; const char shift[4]={5,6,5,7}; // 32,64 or 128 sized tilemaps (2 is invalid) @@ -401,7 +404,7 @@ static void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells) else ts.nametab=(pvid->reg[2]&0x38)<< 9; // A htab=pvid->reg[13]<<9; // Horizontal scroll table address - if ( pvid->reg[11]&2) htab+=DrawScanline<<1; // Offset by line + if ( pvid->reg[11]&2) htab+=est->DrawScanline<<1; // Offset by line if ((pvid->reg[11]&1)==0) htab&=~0xf; // Offset by tile htab+=plane_sh&1; // A or B @@ -413,7 +416,7 @@ static void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells) vscroll=Pico.vsram[plane_sh&1]; // Get vertical scroll value // Find the line in the name table - ts.line=(vscroll+(DrawScanline<<1))&((ymask<<1)|1); + ts.line=(vscroll+(est->DrawScanline<<1))&((ymask<<1)|1); ts.nametab+=(ts.line>>4)<DrawScanline)&ymask; ts.nametab+=(ts.line>>3)<reg[12]&1) { nametab=(pvid->reg[3]&0x3c)<<9; // 40-cell mode - nametab+=(DrawScanline>>3)<<6; + nametab+=(est->DrawScanline>>3)<<6; } else { nametab=(pvid->reg[3]&0x3e)<<9; // 32-cell mode - nametab+=(DrawScanline>>3)<<5; + nametab+=(est->DrawScanline>>3)<<5; } tilex=tstart<<1; - if (!(rendstatus & PDRAW_WND_DIFF_PRIO)) { + if (!(est->rendstatus & PDRAW_WND_DIFF_PRIO)) { // check the first tile code code=Pico.vram[nametab+tilex]; // if the whole window uses same priority (what is often the case), we may be able to skip this field @@ -465,7 +469,7 @@ static void DrawWindow(int tstart, int tend, int prio, int sh) // int *hcache } tend<<=1; - ty=(DrawScanline&7)<<1; // Y-Offset into tile + ty=(est->DrawScanline&7)<<1; // Y-Offset into tile // Draw tiles across screen: if (!sh) @@ -478,7 +482,7 @@ static void DrawWindow(int tstart, int tend, int prio, int sh) // int *hcache code=Pico.vram[nametab+tilex]; if (code==blank) continue; if ((code>>15) != prio) { - rendstatus |= PDRAW_WND_DIFF_PRIO; + est->rendstatus |= PDRAW_WND_DIFF_PRIO; continue; } @@ -504,7 +508,7 @@ static void DrawWindow(int tstart, int tend, int prio, int sh) // int *hcache code=Pico.vram[nametab+tilex]; if(code==blank) continue; if((code>>15) != prio) { - rendstatus |= PDRAW_WND_DIFF_PRIO; + est->rendstatus |= PDRAW_WND_DIFF_PRIO; continue; } @@ -538,23 +542,23 @@ static void DrawTilesFromCacheShPrep(void) // we can process whole line and then act as if sh/hi mode was off, // but leave lo pri op sprite markers alone int c = 320/4, *zb = (int *)(HighCol+8); - rendstatus |= PDRAW_SHHI_DONE; + Pico.est.rendstatus |= PDRAW_SHHI_DONE; while (c--) { *zb++ &= 0xbfbfbfbf; } } -static void DrawTilesFromCache(int *hc, int sh, int rlim) +static void DrawTilesFromCache(int *hc, int sh, int rlim, struct PicoEState *est) { int code, addr, dx; int pal; // *ts->hc++ = code | (dx<<16) | (ty<<25); // cache it - if (sh && (rendstatus & (PDRAW_SHHI_DONE|PDRAW_PLANE_HI_PRIO))) + if (sh && (est->rendstatus & (PDRAW_SHHI_DONE|PDRAW_PLANE_HI_PRIO))) { - if (!(rendstatus & PDRAW_SHHI_DONE)) + if (!(est->rendstatus & PDRAW_SHHI_DONE)) DrawTilesFromCacheShPrep(); sh = 0; } @@ -658,7 +662,7 @@ static void DrawSprite(int *sprite, int sh) height=(sy>>24)&7; // Width and height in tiles sy=(sy<<16)>>16; // Y - row=DrawScanline-sy; // Row of the sprite we are on + row=Pico.est.DrawScanline-sy; // Row of the sprite we are on if (code&0x1000) row=(height<<3)-1-row; // Flip Y @@ -706,7 +710,7 @@ static void DrawSpriteInterlace(unsigned int *sprite) width=(height>>2)&3; height&=3; width++; height++; // Width and height in tiles - row=(DrawScanline<<1)-sy; // Row of the sprite we are on + row=(Pico.est.DrawScanline<<1)-sy; // Row of the sprite we are on code=sprite[1]; sx=((code>>16)&0x1ff)-0x78; // X @@ -738,7 +742,7 @@ static void DrawSpriteInterlace(unsigned int *sprite) static void DrawAllSpritesInterlace(int pri, int sh) { struct PicoVideo *pvid=&Pico.video; - int i,u,table,link=0,sline=DrawScanline<<1; + int i,u,table,link=0,sline=Pico.est.DrawScanline<<1; unsigned int *sprites[80]; // Sprite index table=pvid->reg[5]&0x7f; @@ -790,7 +794,7 @@ static void DrawAllSpritesInterlace(int pri, int sh) * Index + 0 : hhhhvvvv ----hhvv yyyyyyyy yyyyyyyy // v, h: vert./horiz. size * Index + 4 : xxxxxxxx xxxxxxxx pccvhnnn nnnnnnnn // x: x coord + 8 */ -static void DrawSpritesSHi(unsigned char *sprited) +static void DrawSpritesSHi(unsigned char *sprited, const struct PicoEState *est) { int (*fTileFunc)(int sx,int addr,int pal); unsigned char *p; @@ -835,7 +839,7 @@ static void DrawSpritesSHi(unsigned char *sprited) height=(sy>>24)&7; // Width and height in tiles sy=(sy<<16)>>16; // Y - row=DrawScanline-sy; // Row of the sprite we are on + row=est->DrawScanline-sy; // Row of the sprite we are on if (code&0x1000) row=(height<<3)-1-row; // Flip Y @@ -867,7 +871,7 @@ static void DrawSpritesHiAS(unsigned char *sprited, int sh) cnt = sprited[0] & 0x7f; if (cnt == 0) return; - rendstatus |= PDRAW_SPR_LO_ON_HI; + Pico.est.rendstatus |= PDRAW_SPR_LO_ON_HI; p = &sprited[3]; @@ -906,7 +910,7 @@ static void DrawSpritesHiAS(unsigned char *sprited, int sh) height=(sy>>24)&7; // Width and height in tiles sy=(sy<<16)>>16; // Y - row=DrawScanline-sy; // Row of the sprite we are on + row=Pico.est.DrawScanline-sy; // Row of the sprite we are on if (code&0x1000) row=(height<<3)-1-row; // Flip Y @@ -942,7 +946,7 @@ static void DrawSpritesHiAS(unsigned char *sprited, int sh) /* nasty 2: sh operator pass */ sprited[0] = sh_cnt; - DrawSpritesSHi(sprited); + DrawSpritesSHi(sprited, &Pico.est); } @@ -954,7 +958,8 @@ static void DrawSpritesHiAS(unsigned char *sprited, int sh) void PrepareSprites(int full) { - struct PicoVideo *pvid=&Pico.video; + const struct PicoVideo *pvid=&Pico.video; + const struct PicoEState *est=&Pico.est; int u,link=0,sh; int table=0; int *pd = HighPreSpr; @@ -991,10 +996,11 @@ void PrepareSprites(int full) sy = (pack << 16) >> 16; height = (pack >> 24) & 0xf; - if (sy < max_lines && sy + (height<<3) > DrawScanline && // sprite onscreen (y)? + if (sy < max_lines && + sy + (height<<3) > est->DrawScanline && // sprite onscreen (y)? (sx > -24 || sx < max_width)) // onscreen x { - int y = (sy >= DrawScanline) ? sy : DrawScanline; + int y = (sy >= est->DrawScanline) ? sy : est->DrawScanline; int entry = ((pd - HighPreSpr) / 2) | ((code2>>8)&0x80); for (; y < sy + (height<<3) && y < max_lines; y++) { @@ -1047,7 +1053,7 @@ found:; sx = (code2>>16)&0x1ff; sx -= 0x78; // Get X coordinate + 8 - if (sy < max_lines && sy + (height<<3) > DrawScanline) // sprite onscreen (y)? + if (sy < max_lines && sy + (height<<3) > est->DrawScanline) // sprite onscreen (y)? { int entry, y, sx_min, onscr_x, maybe_op = 0; @@ -1057,7 +1063,7 @@ found:; maybe_op = SPRL_MAY_HAVE_OP; entry = ((pd - HighPreSpr) / 2) | ((code2>>8)&0x80); - y = (sy >= DrawScanline) ? sy : DrawScanline; + y = (sy >= est->DrawScanline) ? sy : est->DrawScanline; for (; y < sy + (height<<3) && y < max_lines; y++) { unsigned char *p = &HighLnSpr[y][0]; @@ -1110,16 +1116,17 @@ found:; } #ifndef _ASM_DRAW_C -static void DrawAllSprites(unsigned char *sprited, int prio, int sh) +static void DrawAllSprites(unsigned char *sprited, int prio, int sh, + struct PicoEState *est) { - int rs = rendstatus; + int rs = est->rendstatus; unsigned char *p; int cnt; if (rs & (PDRAW_SPRITES_MOVED|PDRAW_DIRTY_SPRITES)) { //elprintf(EL_STATUS, "PrepareSprites(%i)", (rs>>4)&1); PrepareSprites(rs & PDRAW_DIRTY_SPRITES); - rendstatus = rs & ~(PDRAW_SPRITES_MOVED|PDRAW_DIRTY_SPRITES); + est->rendstatus = rs & ~(PDRAW_SPRITES_MOVED|PDRAW_DIRTY_SPRITES); } cnt = sprited[0] & 0x7f; @@ -1159,7 +1166,7 @@ void BackFill(int reg7, int sh) unsigned short HighPal[0x100]; #ifndef _ASM_DRAW_C -void PicoDoHighPal555(int sh) +void PicoDoHighPal555(int sh, int line, struct PicoEState *est) { unsigned int *spal, *dpal; unsigned int t, i; @@ -1205,7 +1212,7 @@ void FinalizeLine555(int sh, int line) int len; if (Pico.m.dirtyPal) - PicoDoHighPal555(sh); + PicoDoHighPal555(sh, line, est); if (Pico.video.reg[12]&1) { len = 320; @@ -1217,7 +1224,7 @@ void FinalizeLine555(int sh, int line) { #ifndef PSP int i, mask=0xff; - if (!sh && (rendstatus & PDRAW_SPR_LO_ON_HI)) + if (!sh && (est->rendstatus & PDRAW_SPR_LO_ON_HI)) mask=0x3f; // accurate sprites, upper bits are priority stuff for (i = 0; i < len; i++) @@ -1225,7 +1232,7 @@ void FinalizeLine555(int sh, int line) #else extern void amips_clut(unsigned short *dst, unsigned char *src, unsigned short *pal, int count); extern void amips_clut_6bit(unsigned short *dst, unsigned char *src, unsigned short *pal, int count); - if (!sh && (rendstatus & PDRAW_SPR_LO_ON_HI)) + if (!sh && (est->rendstatus & PDRAW_SPR_LO_ON_HI)) amips_clut_6bit(pd, ps, pal, len); else amips_clut(pd, ps, pal, len); #endif @@ -1233,10 +1240,10 @@ void FinalizeLine555(int sh, int line) } #endif -static void FinalizeLine8bit(int sh, int line) +static void FinalizeLine8bit(int sh, int line, struct PicoEState *est) { unsigned char *pd = DrawLineDest; - int len, rs = rendstatus; + int len, rs = est->rendstatus; static int dirty_count; if (!sh && Pico.m.dirtyPal == 1) @@ -1246,7 +1253,7 @@ static void FinalizeLine8bit(int sh, int line) dirty_count = 1; else dirty_count++; rs |= PDRAW_SONIC_MODE; - rendstatus = rs; + est->rendstatus = rs; if (dirty_count == 3) { blockcpy(HighPal, Pico.cram, 0x40*2); } else if (dirty_count == 11) { @@ -1273,18 +1280,19 @@ static void FinalizeLine8bit(int sh, int line) } } -static void (*FinalizeLine)(int sh, int line); +static void (*FinalizeLine)(int sh, int line, struct PicoEState *est); // -------------------------------------------- static int DrawDisplay(int sh) { - unsigned char *sprited = &HighLnSpr[DrawScanline][0]; + struct PicoEState *est=&Pico.est; + unsigned char *sprited = &HighLnSpr[est->DrawScanline][0]; struct PicoVideo *pvid=&Pico.video; int win=0,edge=0,hvwind=0; int maxw,maxcells; - rendstatus &= ~(PDRAW_SHHI_DONE|PDRAW_PLANE_HI_PRIO); + est->rendstatus &= ~(PDRAW_SHHI_DONE|PDRAW_PLANE_HI_PRIO); if (pvid->reg[12]&1) { maxw = 328; maxcells = 40; @@ -1296,8 +1304,8 @@ static int DrawDisplay(int sh) win=pvid->reg[0x12]; edge=(win&0x1f)<<3; - if (win&0x80) { if (DrawScanline>=edge) hvwind=1; } - else { if (DrawScanline< edge) hvwind=1; } + if (win&0x80) { if (est->DrawScanline>=edge) hvwind=1; } + else { if (est->DrawScanline< edge) hvwind=1; } if (!hvwind) // we might have a vertical window here { @@ -1315,53 +1323,56 @@ static int DrawDisplay(int sh) /* - layer B low - */ if (PicoDrawMask & PDRAW_LAYERB_ON) - DrawLayer(1|(sh<<1), HighCacheB, 0, maxcells); + DrawLayer(1|(sh<<1), HighCacheB, 0, maxcells, est); /* - layer A low - */ if (!(PicoDrawMask & PDRAW_LAYERA_ON)); else if (hvwind == 1) - DrawWindow(0, maxcells>>1, 0, sh); + DrawWindow(0, maxcells>>1, 0, sh, est); else if (hvwind == 2) { - DrawLayer(0|(sh<<1), HighCacheA, (win&0x80) ? 0 : edge<<1, (win&0x80) ? edge<<1 : maxcells); - DrawWindow( (win&0x80) ? edge : 0, (win&0x80) ? maxcells>>1 : edge, 0, sh); + DrawLayer(0|(sh<<1), HighCacheA, (win&0x80) ? 0 : edge<<1, (win&0x80) ? edge<<1 : maxcells, est); + DrawWindow( (win&0x80) ? edge : 0, (win&0x80) ? maxcells>>1 : edge, 0, sh, est); } else - DrawLayer(0|(sh<<1), HighCacheA, 0, maxcells); + DrawLayer(0|(sh<<1), HighCacheA, 0, maxcells, est); /* - sprites low - */ if (!(PicoDrawMask & PDRAW_SPRITES_LOW_ON)); - else if (rendstatus & PDRAW_INTERLACE) + else if (Pico.est.rendstatus & PDRAW_INTERLACE) DrawAllSpritesInterlace(0, sh); else if (sprited[1] & SPRL_HAVE_LO) - DrawAllSprites(sprited, 0, sh); + DrawAllSprites(sprited, 0, sh, est); /* - layer B hi - */ if ((PicoDrawMask & PDRAW_LAYERB_ON) && HighCacheB[0]) - DrawTilesFromCache(HighCacheB, sh, maxw); + DrawTilesFromCache(HighCacheB, sh, maxw, est); /* - layer A hi - */ if (!(PicoDrawMask & PDRAW_LAYERA_ON)); else if (hvwind == 1) - DrawWindow(0, maxcells>>1, 1, sh); + DrawWindow(0, maxcells>>1, 1, sh, est); else if (hvwind == 2) { - if (HighCacheA[0]) DrawTilesFromCache(HighCacheA, sh, (win&0x80) ? edge<<4 : maxw); - DrawWindow((win&0x80) ? edge : 0, (win&0x80) ? maxcells>>1 : edge, 1, sh); + if (HighCacheA[0]) + DrawTilesFromCache(HighCacheA, sh, (win&0x80) ? edge<<4 : maxw, est); + DrawWindow((win&0x80) ? edge : 0, (win&0x80) ? maxcells>>1 : edge, 1, sh, est); } else - if (HighCacheA[0]) DrawTilesFromCache(HighCacheA, sh, maxw); + if (HighCacheA[0]) + DrawTilesFromCache(HighCacheA, sh, maxw, est); /* - sprites hi - */ if (!(PicoDrawMask & PDRAW_SPRITES_HI_ON)); - else if (rendstatus & PDRAW_INTERLACE) + else if (Pico.est.rendstatus & PDRAW_INTERLACE) DrawAllSpritesInterlace(1, sh); // have sprites without layer pri bit ontop of sprites with that bit else if ((sprited[1] & 0xd0) == 0xd0 && (PicoOpt & POPT_ACC_SPRITES)) DrawSpritesHiAS(sprited, sh); else if (sh && (sprited[1] & SPRL_MAY_HAVE_OP)) - DrawSpritesSHi(sprited); + DrawSpritesSHi(sprited, est); else if (sprited[1] & SPRL_HAVE_HI) - DrawAllSprites(sprited, 1, 0); + DrawAllSprites(sprited, 1, 0, est); #if 0 { int *c, a, b; for (a = 0, c = HighCacheA; *c; c++, a++); for (b = 0, c = HighCacheB; *c; c++, b++); - printf("%i:%03i: a=%i, b=%i\n", Pico.m.frame_count, DrawScanline, a, b); + printf("%i:%03i: a=%i, b=%i\n", Pico.m.frame_count, + Pico.est.DrawScanline, a, b); } #endif @@ -1374,27 +1385,27 @@ PICO_INTERNAL void PicoFrameStart(void) int offs = 8, lines = 224; // prepare to do this frame - rendstatus = 0; + Pico.est.rendstatus = 0; if ((Pico.video.reg[12] & 6) == 6) - rendstatus |= PDRAW_INTERLACE; // interlace mode + Pico.est.rendstatus |= PDRAW_INTERLACE; // interlace mode if (!(Pico.video.reg[12] & 1)) - rendstatus |= PDRAW_32_COLS; + Pico.est.rendstatus |= PDRAW_32_COLS; if (Pico.video.reg[1] & 8) { offs = 0; lines = 240; } - if (rendstatus != rendstatus_old || lines != rendlines) { + if (Pico.est.rendstatus != rendstatus_old || lines != rendlines) { rendlines = lines; // mode_change() might reset rendstatus_old by calling SetColorFormat emu_video_mode_change((lines == 240) ? 0 : 8, lines, (Pico.video.reg[12] & 1) ? 0 : 1); - rendstatus_old = rendstatus; + rendstatus_old = Pico.est.rendstatus; } HighCol = HighColBase + offs * HighColIncrement; DrawLineDest = (char *)DrawLineDestBase + offs * DrawLineDestIncrement; - DrawScanline = 0; + Pico.est.DrawScanline = 0; skip_next_line = 0; if (PicoOpt & POPT_ALT_RENDERER) @@ -1413,7 +1424,7 @@ static void DrawBlankedLine(int line, int offs, int sh, int bgc) BackFill(bgc, sh); if (FinalizeLine != NULL) - FinalizeLine(sh, line); + FinalizeLine(sh, line, &Pico.est); if (PicoScanEnd != NULL) PicoScanEnd(line + offs); @@ -1431,7 +1442,7 @@ static void PicoLine(int line, int offs, int sh, int bgc) return; } - DrawScanline = line; + Pico.est.DrawScanline = line; if (PicoScanBegin != NULL) skip = PicoScanBegin(line + offs); @@ -1446,7 +1457,7 @@ static void PicoLine(int line, int offs, int sh, int bgc) DrawDisplay(sh); if (FinalizeLine != NULL) - FinalizeLine(sh, line); + FinalizeLine(sh, line, &Pico.est); if (PicoScanEnd != NULL) skip_next_line = PicoScanEnd(line + offs); @@ -1466,7 +1477,7 @@ void PicoDrawSync(int to, int blank_last_line) if (rendlines != 240) offs = 8; - for (line = DrawScanline; line < to; line++) + for (line = Pico.est.DrawScanline; line < to; line++) { PicoLine(line, offs, sh, bgc); } @@ -1479,7 +1490,7 @@ void PicoDrawSync(int to, int blank_last_line) else PicoLine(line, offs, sh, bgc); line++; } - DrawScanline = line; + Pico.est.DrawScanline = line; pprof_end(draw); } @@ -1491,8 +1502,8 @@ void PicoDrawUpdateHighPal(void) if (PicoOpt & POPT_ALT_RENDERER) sh = 0; // no s/h support - PicoDoHighPal555(sh); - if (rendstatus & PDRAW_SONIC_MODE) { + PicoDoHighPal555(sh, 0, &Pico.est); + if (Pico.est.rendstatus & PDRAW_SONIC_MODE) { // FIXME? memcpy(HighPal + 0x40, HighPal, 0x40*2); memcpy(HighPal + 0x80, HighPal, 0x40*2); @@ -1528,7 +1539,7 @@ void PicoDrawSetOutBuf(void *dest, int increment) { DrawLineDestBase = dest; DrawLineDestIncrement = increment; - DrawLineDest = DrawLineDestBase + DrawScanline * increment; + DrawLineDest = DrawLineDestBase + Pico.est.DrawScanline * increment; } void PicoDrawSetInternalBuf(void *dest, int increment) @@ -1536,7 +1547,7 @@ void PicoDrawSetInternalBuf(void *dest, int increment) if (dest != NULL) { HighColBase = dest; HighColIncrement = increment; - HighCol = HighColBase + DrawScanline * increment; + HighCol = HighColBase + Pico.est.DrawScanline * increment; } else { HighColBase = DefHighCol; @@ -1560,3 +1571,5 @@ void PicoDrawSetCallbacks(int (*begin)(unsigned int num), int (*end)(unsigned in PicoScanEnd = end; } } + +// vim:ts=4:sw=4:expandtab diff --git a/pico/draw_arm.S b/pico/draw_arm.S index bbdda5b0..2c1db104 100644 --- a/pico/draw_arm.S +++ b/pico/draw_arm.S @@ -1,6 +1,6 @@ /* * assembly optimized versions of most funtions from draw.c - * (C) notaz, 2006-2010 + * (C) notaz, 2006-2010,2017 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. @@ -8,12 +8,11 @@ * this is highly specialized, be careful if changing related C code! */ -.extern Pico +#include "pico_int_o32.h" + .extern PicoOpt .extern HighCol -.extern DrawScanline .extern HighSprZ -.extern rendstatus .extern HighPreSpr .extern DrawLineDest .extern DrawStripInterlace @@ -287,14 +286,16 @@ @ int cells; // 0x14 @ }; -@ void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells); +@ void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells, +@ struct PicoEState *est) .global DrawLayer DrawLayer: + ldr r12, [sp] @ est stmfd sp!, {r4-r11,lr} - ldr r11, =(Pico+0x22228) @ Pico.video + ldr r11, [r12, #OFS_Pico_video] mov r8, #1 ldrb r7, [r11, #16] @ ??vv??hh @@ -316,17 +317,17 @@ DrawLayer: cmp r10, #7 subge r10, r10, #1 @ r10=shift[width] (5,6,6,7) - @ calculate xmask: - mov r5, r8, lsl r10 - sub r5, r5, #1 @ r5=xmask + ldr r2, [r12, #OFS_DrawScanline] + ldr lr, [r12, #OFS_Pico_vram] @ Find name table: ands r0, r0, #1 ldreqb r12, [r11, #2] ldrneb r12, [r11, #4] - ldr r2, =DrawScanline @ trying to make good use of pipeline here - ldr lr, =(Pico+0x10000) @ lr=Pico.vram + @ calculate xmask: + mov r5, r8, lsl r10 + sub r5, r5, #1 @ r5=xmask moveq r12, r12, lsl #10 movne r12, r12, lsl #13 @@ -334,7 +335,6 @@ DrawLayer: ldrh r8, [r11, #12] ldrb r7, [r11, #11] - ldr r2, [r2] mov r4, r8, lsr #8 @ pvid->reg[13] mov r4, r4, lsl #10 @ htab=pvid->reg[13]<<9; (halfwords) @@ -504,12 +504,12 @@ DrawLayer: .dsloop_exit: tst r10, #1<<21 @ seen non hi-prio tile - ldreq r1, =rendstatus + ldr r1, [sp, #9*4] @ est mov r0, #0 - ldreq r2, [r1] + ldreq r2, [r1, #OFS_rendstatus] str r0, [r6] @ terminate the cache list orreq r2, r2, #PDRAW_PLANE_HI_PRIO @ had a layer with all hi-prio tiles - streq r2, [r1] + streq r2, [r1, #OFS_rendstatus] ldmfd sp!, {r4-r11,lr} bx lr @@ -522,9 +522,9 @@ DrawLayer: bic r8, r8, #0x3fc00000 orr r8, r8, r5, lsl #25 @ r8=(xmask[31:25]|had_output[24]|tilex[21:0]) - ldr r4, =DrawScanline + ldr r11, [sp, #9*4] @ est orr r5, r1, r10, lsl #24 - ldr r4, [r4] + ldr r4, [r11, #OFS_DrawScanline] sub r1, r3, #1 orr r5, r5, r4, lsl #16 @ r5=(shift_width[31:24]|scanline[23:16]|ymask[15:0]) and r1, r1, #7 @@ -679,12 +679,12 @@ DrawLayer: .dsloop_vs_exit: tst r8, #(1<<24) @ seen non hi-prio tile - ldreq r1, =rendstatus + ldr r1, [sp, #9*4] @ est mov r0, #0 - ldreq r2, [r1] + ldreq r2, [r1, #OFS_rendstatus] str r0, [r6] @ terminate the cache list orreq r2, r2, #PDRAW_PLANE_HI_PRIO @ had a layer with all hi-prio tiles - streq r2, [r1] + streq r2, [r1, #OFS_rendstatus] ldmfd sp!, {r4-r11,lr} bx lr @@ -770,11 +770,12 @@ BackFill: @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +@ void DrawTilesFromCache(int *hc, int sh, int rlim, struct PicoEState *est) -.global DrawTilesFromCache @ int *hc, int sh, int rlim +.global DrawTilesFromCache DrawTilesFromCache: - stmfd sp!, {r4-r8,r11,lr} + stmfd sp!, {r4-r9,r11,lr} @ cache some stuff to avoid mem access .if OVERRIDE_HIGHCOL @@ -785,7 +786,8 @@ DrawTilesFromCache: ldr r11,=HighCol mov r12,#0xf .endif - ldr lr, =(Pico+0x10000) @ lr=Pico.vram + ldr lr, [r3, #OFS_Pico_vram] + mov r9, r3 @ est mvn r5, #0 @ r5=prevcode=-1 ands r8, r1, #1 @@ -796,7 +798,7 @@ DrawTilesFromCache: .dtfc_loop: ldr r6, [r0], #4 @ read code movs r1, r6, lsr #16 @ r1=dx; - ldmeqfd sp!, {r4-r8,r11,pc} @ dx is never zero, this must be a terminator, return + ldmeqfd sp!, {r4-r9,r11,pc} @ dx is never zero, this must be a terminator, return bic r4, r1, #0xfe00 add r1, r11, r4 @ r1=pdest @@ -915,15 +917,14 @@ DrawTilesFromCache: @ check if we have detected layer covered with hi-prio tiles: .dtfc_check_rendflags: - ldr r1, =rendstatus - ldr r2, [r1] + ldr r2, [r9, #OFS_rendstatus] tst r2, #(PDRAW_PLANE_HI_PRIO|PDRAW_SHHI_DONE) beq .dtfc_loop bic r8, r8, #1 @ sh/hi mode off tst r2, #PDRAW_SHHI_DONE bne .dtfc_loop @ already processed orr r2, r2, #PDRAW_SHHI_DONE - str r2, [r1] + str r2, [r9, #OFS_rendstatus] add r1, r11,#8 mov r3, #320/4/4 @@ -948,7 +949,9 @@ DrawTilesFromCache: @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -.global DrawSpritesSHi @ unsigned char *sprited +@ void DrawSpritesSHi(unsigned char *sprited, struct PicoEState *est) + +.global DrawSpritesSHi DrawSpritesSHi: ldr r3, [r0] @@ -956,7 +959,7 @@ DrawSpritesSHi: ands r3, r3, #0x7f bxeq lr - stmfd sp!, {r4-r11,lr} + stmfd sp!, {r1,r4-r11,lr} @ +est strb r12,[r0,#2] @ set end marker add r10,r0, #3 @ r10=HighLnSpr end add r10,r10,r3 @ r10=HighLnSpr end @@ -969,16 +972,16 @@ DrawSpritesSHi: ldr r11,=HighCol mov r12,#0xf .endif - ldr lr, =(Pico+0x10000) @ lr=Pico.vram + ldr lr, [r1, #OFS_Pico_vram] DrawSpriteSHi: @ draw next sprite ldrb r0, [r10,#-1]! + ldr r7, [sp] @ est ldr r1, =HighPreSpr -@ ldr r8, [sp, #-4] cmp r0, #0xff - ldmeqfd sp!, {r4-r11,pc} @ end of list + ldmeqfd sp!, {r1,r4-r11,pc} @ end of list and r0, r0, #0x7f add r0, r1, r0, lsl #3 @@ -997,15 +1000,14 @@ DrawSpriteSHi: bne DrawSpriteSHi @ non-operator low sprite, already drawn ldr r3, [r0] @ sprite[0] - ldr r7, =DrawScanline mov r6, r3, lsr #28 sub r6, r6, #1 @ r6=width-1 (inc later) mov r5, r3, lsr #24 and r5, r5, #7 @ r5=height + ldr r7, [r7, #OFS_DrawScanline] mov r0, r3, lsl #16 @ r4=sy<<16 (tmp) - ldr r7, [r7] sub r7, r7, r0, asr #16 @ r7=row=DrawScanline-sy tst r9, #0x1000 @@ -1128,34 +1130,34 @@ DrawSpriteSHi: @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -.global DrawAllSprites @ unsigned char *sprited, int prio, int sh +@ void DrawAllSprites(unsigned char *sprited, int prio, int sh, +@ struct PicoEState *est) + +.global DrawAllSprites DrawAllSprites: - ldr r3, =rendstatus orr r1, r2, r1, lsl #1 - ldr r12,[r3] + ldr r12,[r3, #OFS_rendstatus] tst r12,#(PDRAW_DIRTY_SPRITES|PDRAW_SPRITES_MOVED) beq das_no_prep - stmfd sp!, {r0,r1,lr} + stmfd sp!, {r0,r1,r3,lr} and r0, r12,#PDRAW_DIRTY_SPRITES bic r12,r12,#(PDRAW_DIRTY_SPRITES|PDRAW_SPRITES_MOVED) - str r12,[r3] + str r12,[r3, #OFS_rendstatus] bl PrepareSprites - ldmfd sp!, {r0,r1,lr} + ldmfd sp!, {r0,r1,r3,lr} das_no_prep: - ldr r3, [r0] - ands r3, r3, #0x7f + ldr r2, [r0] + ands r2, r2, #0x7f bxeq lr @ time to do some real work - stmfd sp!, {r4-r11,lr} + stmfd sp!, {r1,r3-r11,lr} @ +sh|prio<<1 +est mov r12,#0xff strb r12,[r0,#2] @ set end marker add r10,r0, #3 - add r10,r10,r3 @ r10=HighLnSpr end - - str r1, [sp, #-4] @ no calls after this point + add r10,r10,r2 @ r10=HighLnSpr end .if OVERRIDE_HIGHCOL ldr r11,=HighCol @@ -1165,29 +1167,27 @@ das_no_prep: ldr r11,=HighCol mov r12,#0xf .endif - ldr lr, =(Pico+0x10000) @ lr=Pico.vram + ldr lr, [r3, #OFS_Pico_vram] @ + 0 : hhhhvvvv ----hhvv yyyyyyyy yyyyyyyy // v, h: horiz. size @ + 4 : xxxxxxxx xxxxxxxx pccvhnnn nnnnnnnn // x: x coord + 8 -DrawSprite: @ was: unsigned int *sprite, int sh, int acc_sprites +DrawSprite: @ draw next sprite ldrb r0, [r10,#-1]! - ldr r1, =HighPreSpr - ldr r8, [sp, #-4] + ldr r8, [sp] @ sh|prio<<1 + ldr r7, [sp, #4] @ est mov r2, r0, lsr #7 cmp r0, #0xff - ldmeqfd sp!, {r4-r11,pc} @ end of list + ldmeqfd sp!, {r1,r3-r11,pc} @ end of list cmp r2, r8, lsr #1 bne DrawSprite @ wrong priority + ldr r1, =HighPreSpr and r0, r0, #0x7f add r0, r1, r0, lsl #3 -@ stmfd sp!, {r4-r9,r11,lr} -@ orr r8, r2, r1, lsl #4 - ldr r3, [r0] @ sprite[0] - ldr r7, =DrawScanline + ldr r7, [r7, #OFS_DrawScanline] mov r6, r3, lsr #28 sub r6, r6, #1 @ r6=width-1 (inc later) mov r5, r3, lsr #24 @@ -1195,7 +1195,6 @@ DrawSprite: @ was: unsigned int *sprite, int sh, int acc_sprites mov r4, r3, lsl #16 @ r4=sy<<16 (tmp) - ldr r7, [r7] ldr r9, [r0, #4] sub r7, r7, r4, asr #16 @ r7=row=DrawScanline-sy @@ -1307,17 +1306,21 @@ DrawSprite: @ was: unsigned int *sprite, int sh, int acc_sprites @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -.global DrawWindow @ int tstart, int tend, int prio, int sh // int *hcache +@ void DrawWindow(int tstart, int tend, int prio, int sh +@ struct PicoEState *est) + +.global DrawWindow DrawWindow: + ldr r12, [sp] @ est stmfd sp!, {r4-r11,lr} - ldr r11, =(Pico+0x22228) @ Pico.video - ldr r10, =DrawScanline - ldrb r12, [r11, #3] @ pvid->reg[3] + ldr r6, [r12, #OFS_Pico_video] + ldr r10, [r12, #OFS_DrawScanline] + mov r11, r12 @ est + ldrb r12, [r6, #3] @ pvid->reg[3] - ldr r10, [r10] - ldr r4, [r11, #12] + ldr r4, [r6, #12] mov r5, r10, lsr #3 and r10, r10, #7 mov r10, r10, lsl #1 @ r10=ty @@ -1331,9 +1334,8 @@ DrawWindow: addeq r12, r12, r5, lsl #6 @ nametab add r12, r12, r0, lsl #2 @ +starttile - ldr r6, =rendstatus - ldr lr, =(Pico+0x10000) @ lr=Pico.vram - ldr r6, [r6] + ldr lr, [r11, #OFS_Pico_vram] + ldr r6, [r11, #OFS_rendstatus] @ fetch the first code now ldrh r7, [lr, r12] @@ -1442,14 +1444,14 @@ DrawWindow: b .dw_shadow_done .dwloop_end: - ldr r0, =rendstatus - ldr r1, [r0] - and r6, r6, #PDRAW_WND_DIFF_PRIO - orr r1, r1, r6 - str r1, [r0] + and r2, r6, #PDRAW_WND_DIFF_PRIO + ldmfd sp!, {r4-r11,lr} + ldr r0, [sp] + ldr r1, [r0, #OFS_rendstatus] + orr r1, r1, r2 + str r1, [r0, #OFS_rendstatus] - ldmfd sp!, {r4-r11,r12} - bx r12 + bx lr @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@ -1531,12 +1533,15 @@ vidConvCpyRGB565: @ void *to, void *from, int pixels bx lr -.global PicoDoHighPal555 @ int sh +@ void PicoDoHighPal555(int sh, int line, struct PicoEState *est) + +.global PicoDoHighPal555 PicoDoHighPal555: - stmfd sp!, {r4-r9,lr} + stmfd sp!, {r4-r10,lr} + mov r10,r2 @ est mov r1, #0 - ldr r8, =(Pico+0x22228) @ Pico.video + ldr r8, [r10, #OFS_Pico_video] PicoDoHighPal555_nopush: orr r9, r1, r0, lsl #31 @ 0:called from FinalizeLine555, 31: s/h @@ -1598,17 +1603,20 @@ PicoDoHighPal555_nopush: PicoDoHighPal555_end: tst r9, #1 - ldmeqfd sp!, {r4-r9,pc} + ldmeqfd sp!, {r4-r10,pc} - ldr r8, =(Pico+0x22228) @ Pico.video + ldr r8, [r10, #OFS_Pico_video] b FinalizeLineRGB555_pal_done -.global FinalizeLine555 @ int sh +@ void FinalizeLine555(int sh, int line, struct PicoEState *est) + +.global FinalizeLine555 FinalizeLine555: - stmfd sp!, {r4-r9,lr} - ldr r8, =(Pico+0x22228) @ Pico.video + stmfd sp!, {r4-r10,lr} + mov r10,r2 @ est + ldr r8, [r10, #OFS_Pico_video] ldrb r2, [r8, #-0x1a] @ 0x2220e ~ dirtyPal mov r1, #1 @@ -1618,9 +1626,8 @@ FinalizeLine555: FinalizeLineRGB555_pal_done: ldr r3, =HighPal - ldr r12,=rendstatus + ldr r12, [r10, #OFS_rendstatus] eors r0, r0, #1 @ sh is 0 - ldr r12,[r12] mov lr, #0xff tstne r12,#PDRAW_ACC_SPRITES movne lr, #0x3f @@ -1691,12 +1698,11 @@ FinalizeLineRGB555_pal_done: stmia r0!, {r4,r5,r8,r12} bne .fl_loopRGB555 - ldmfd sp!, {r4-r9,lr} + ldmfd sp!, {r4-r10,lr} bx lr .fl_32scale_RGB555: - stmfd sp!, {r10} mov r9, #0x3900 @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007 orr r9, r9, #0x00e7 @@ -1757,8 +1763,7 @@ FinalizeLineRGB555_pal_done: stmia r0!, {r4,r5,r6,r8,r10} bne .fl_loop32scale_RGB555 - ldmfd sp!, {r10} - ldmfd sp!, {r4-r9,lr} + ldmfd sp!, {r4-r10,lr} bx lr #ifdef UNALIGNED_DRAWLINEDEST @@ -1804,7 +1809,7 @@ FinalizeLineRGB555_pal_done: strh r8, [r0], #2 - ldmfd sp!, {r4-r9,lr} + ldmfd sp!, {r4-r10,lr} bx lr @@ -1870,8 +1875,7 @@ FinalizeLineRGB555_pal_done: strh r4, [r0], #2 - ldmfd sp!, {r10} - ldmfd sp!, {r4-r9,lr} + ldmfd sp!, {r4-r10,lr} bx lr #endif /* UNALIGNED_DRAWLINEDEST */ diff --git a/pico/mode4.c b/pico/mode4.c index 22e30419..c945f6fe 100644 --- a/pico/mode4.c +++ b/pico/mode4.c @@ -200,7 +200,7 @@ void PicoFrameStartMode4(void) int lines = 192; skip_next_line = 0; screen_offset = 24; - rendstatus = PDRAW_32_COLS; + Pico.est.rendstatus = PDRAW_32_COLS; if ((Pico.video.reg[0] & 6) == 6 && (Pico.video.reg[1] & 0x18)) { if (Pico.video.reg[1] & 0x08) { @@ -213,9 +213,9 @@ void PicoFrameStartMode4(void) } } - if (rendstatus != rendstatus_old || lines != rendlines) { + if (Pico.est.rendstatus != rendstatus_old || lines != rendlines) { emu_video_mode_change(screen_offset, lines, 1); - rendstatus_old = rendstatus; + rendstatus_old = Pico.est.rendstatus; rendlines = lines; } @@ -277,7 +277,7 @@ static void FinalizeLineRGB555M4(int line) // standard FinalizeLine can finish it for us, // with features like scaling and such - FinalizeLine555(0, line); + FinalizeLine555(0, line, &Pico.est); } static void FinalizeLine8bitM4(int line) diff --git a/pico/pico.c b/pico/pico.c index 8535d486..1c58f227 100644 --- a/pico/pico.c +++ b/pico/pico.c @@ -36,6 +36,9 @@ void PicoInit(void) memset(&PicoPad,0,sizeof(PicoPad)); memset(&PicoPadInt,0,sizeof(PicoPadInt)); + Pico.est.Pico_video = &Pico.video; + Pico.est.Pico_vram = Pico.vram; + // Init CPUs: SekInit(); z80_init(); // init even if we aren't going to use it diff --git a/pico/pico.h b/pico/pico.h index d5416727..c033c2da 100644 --- a/pico/pico.h +++ b/pico/pico.h @@ -99,6 +99,8 @@ typedef enum { PI_ROM, PI_ISPAL, PI_IS40_CELL, PI_IS240_LINES } pint_t; typedef union { int vint; void *vptr; } pint_ret_t; void PicoGetInternal(pint_t which, pint_ret_t *ret); +struct PicoEState; + // cd/mcd.c extern void (*PicoMCDopenTray)(void); extern void (*PicoMCDcloseTray)(void); @@ -175,7 +177,7 @@ extern unsigned char *HighCol; #ifdef _ASM_DRAW_C void vidConvCpyRGB565(void *to, void *from, int pixels); #endif -void PicoDoHighPal555(int sh); +void PicoDoHighPal555(int sh, int line, struct PicoEState *est); extern int PicoDrawMask; #define PDRAW_LAYERB_ON (1<<2) #define PDRAW_LAYERA_ON (1<<3) @@ -192,7 +194,7 @@ extern int PicoDrawMask; #define PDRAW_PLANE_HI_PRIO (1<<6) // have layer with all hi prio tiles (mk3) #define PDRAW_SHHI_DONE (1<<7) // layer sh/hi already processed #define PDRAW_32_COLS (1<<8) // 32 column mode -extern int rendstatus, rendstatus_old; +extern int rendstatus_old; extern int rendlines; extern unsigned short HighPal[0x100]; diff --git a/pico/pico_cmn.c b/pico/pico_cmn.c index 341255a8..2de65d02 100644 --- a/pico/pico_cmn.c +++ b/pico/pico_cmn.c @@ -168,7 +168,7 @@ static int PicoFrameHints(void) if (!skip) { - if (DrawScanline < y) + if (Pico.est.DrawScanline < y) PicoDrawSync(y - 1, 0); #ifdef DRAW_FINISH_FUNC DRAW_FINISH_FUNC(); diff --git a/pico/pico_int.h b/pico/pico_int.h index c85319c0..da49e04b 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -328,6 +328,15 @@ struct PicoMS unsigned char pad[0x4e]; }; +// emu state and data for the asm code +struct PicoEState +{ + int DrawScanline; + int rendstatus; + void *Pico_video; + void *Pico_vram; +}; + // some assembly stuff depend on these, do not touch! struct Pico { @@ -348,6 +357,7 @@ struct Pico struct PicoMisc m; struct PicoVideo video; struct PicoMS ms; + struct PicoEState est; }; // sram @@ -577,10 +587,9 @@ int CM_compareRun(int cyc, int is_sub); PICO_INTERNAL void PicoFrameStart(void); void PicoDrawSync(int to, int blank_last_line); void BackFill(int reg7, int sh); -void FinalizeLine555(int sh, int line); +void FinalizeLine555(int sh, int line, struct PicoEState *est); extern int (*PicoScanBegin)(unsigned int num); extern int (*PicoScanEnd)(unsigned int num); -extern int DrawScanline; #define MAX_LINE_SPRITES 29 extern unsigned char HighLnSpr[240][3 + MAX_LINE_SPRITES]; extern void *DrawLineDestBase; @@ -856,7 +865,7 @@ void p32x_sh2_poll_event(SH2 *sh2, unsigned int flags, unsigned int m68k_cycles) // 32x/draw.c void PicoDrawSetOutFormat32x(pdso_t which, int use_32x_line_mode); -void FinalizeLine32xRGB555(int sh, int line); +void FinalizeLine32xRGB555(int sh, int line, struct PicoEState *est); void PicoDraw32xLayer(int offs, int lines, int mdbg); void PicoDraw32xLayerMdOnly(int offs, int lines); extern int (*PicoScan32xBegin)(unsigned int num); diff --git a/pico/pico_int_o32.h b/pico/pico_int_o32.h new file mode 100644 index 00000000..1cdc6b58 --- /dev/null +++ b/pico/pico_int_o32.h @@ -0,0 +1,5 @@ +/* autogenerated by ./tools/mkoffsets, do not edit */ +#define OFS_DrawScanline 0x00 +#define OFS_rendstatus 0x04 +#define OFS_Pico_video 0x08 +#define OFS_Pico_vram 0x0c diff --git a/pico/videoport.c b/pico/videoport.c index 6c876aea..9ddc1679 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -36,7 +36,7 @@ static void VideoWrite(u16 d) case 1: if(a&1) d=(u16)((d<<8)|(d>>8)); // If address is odd, bytes are swapped (which game needs this?) Pico.vram [(a>>1)&0x7fff]=d; if (a - ((unsigned)(Pico.video.reg[5]&0x7f) << 9) < 0x400) - rendstatus |= PDRAW_DIRTY_SPRITES; + Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; break; case 3: Pico.m.dirtyPal = 1; Pico.cram [(a>>1)&0x003f]=d; break; // wraps (Desert Strike) @@ -172,7 +172,7 @@ static void DmaSlow(int len) //if(pd >= pdend) pd-=0x8000; // should be good for RAM, bad for ROM } } - rendstatus |= PDRAW_DIRTY_SPRITES; + Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; break; case 3: // cram @@ -241,7 +241,7 @@ static void DmaCopy(int len) } // remember addr Pico.video.addr=a; - rendstatus |= PDRAW_DIRTY_SPRITES; + Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; } // check: Contra, Megaman @@ -280,7 +280,7 @@ static void DmaFill(int data) // update length Pico.video.reg[0x13] = Pico.video.reg[0x14] = 0; // Dino Dini's Soccer (E) (by Haze) - rendstatus |= PDRAW_DIRTY_SPRITES; + Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; } static void CommandDma(void) @@ -319,7 +319,7 @@ static void CommandChange(void) static void DrawSync(int blank_on) { if (Pico.m.scanline < 224 && !(PicoOpt & POPT_ALT_RENDERER) && - !PicoSkipFrame && DrawScanline <= Pico.m.scanline) { + !PicoSkipFrame && Pico.est.DrawScanline <= Pico.m.scanline) { //elprintf(EL_ANOMALY, "sync"); PicoDrawSync(Pico.m.scanline, blank_on); } @@ -412,7 +412,7 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) goto update_irq; case 0x05: //elprintf(EL_STATUS, "spritep moved to %04x", (unsigned)(Pico.video.reg[5]&0x7f) << 9); - if (d^dold) rendstatus |= PDRAW_SPRITES_MOVED; + if (d^dold) Pico.est.rendstatus |= PDRAW_SPRITES_MOVED; break; case 0x0c: // renderers should update their palettes if sh/hi mode is changed diff --git a/platform/gp2x/emu.c b/platform/gp2x/emu.c index 7741ba60..7e91361f 100644 --- a/platform/gp2x/emu.c +++ b/platform/gp2x/emu.c @@ -310,7 +310,7 @@ static int make_local_pal_md(int fast_mode) localPal[0xf0] = 0x00ffffff; pallen = 0x100; } - else if (rendstatus & PDRAW_SONIC_MODE) { // mid-frame palette changes + else if (Pico.est.rendstatus & PDRAW_SONIC_MODE) { // mid-frame palette changes bgr444_to_rgb32(localPal+0x40, HighPal); bgr444_to_rgb32(localPal+0x80, HighPal+0x40); } diff --git a/platform/psp/emu.c b/platform/psp/emu.c index 5a2e8809..8deb7fd3 100644 --- a/platform/psp/emu.c +++ b/platform/psp/emu.c @@ -223,7 +223,7 @@ static void do_pal_update(int allow_sh, int allow_as) localPal[0xe0] = 0; localPal[0xf0] = 0x001f; } - else if (allow_as && (rendstatus & PDRAW_SPR_LO_ON_HI)) + else if (allow_as && (Pico.est.rendstatus & PDRAW_SPR_LO_ON_HI)) { memcpy32((int *)dpal+0x80/2, (void *)localPal, 0x40*2/4); } @@ -250,7 +250,7 @@ static void EmuScanPrepare(void) if (Pico.m.dirtyPal) do_pal_update(1, 1); - if ((rendstatus & PDRAW_SPR_LO_ON_HI) && !(Pico.video.reg[0xC]&8)) + if ((Pico.est.rendstatus & PDRAW_SPR_LO_ON_HI) && !(Pico.video.reg[0xC]&8)) amips_clut_f = amips_clut_6bit; else amips_clut_f = amips_clut; } diff --git a/tools/Makefile b/tools/Makefile index 0c126cc1..28b748d4 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -1,6 +1,6 @@ CFLAGS = -Wall -ggdb -TARGETS = amalgamate textfilter +TARGETS = amalgamate textfilter mkoffsets OBJS = $(addsuffix .o,$(TARGETS)) all: $(TARGETS) @@ -8,3 +8,6 @@ all: $(TARGETS) clean: $(RM) $(TARGETS) $(OBJS) +mkoffsets: CFLAGS += -m32 -I.. + +.PHONY: clean all diff --git a/tools/mkoffsets.c b/tools/mkoffsets.c new file mode 100644 index 00000000..4044ad3d --- /dev/null +++ b/tools/mkoffsets.c @@ -0,0 +1,31 @@ +#include +#include + +#include "../pico/pico_int.h" + +#define DUMP(f, field) \ + fprintf(f, "#define %-20s 0x%02x\n", \ + "OFS_" #field, \ + (int)offsetof(struct PicoEState, field)) + +int main(int argc, char *argv[]) +{ + char buf[128]; + FILE *f; + + snprintf(buf, sizeof(buf), "pico/pico_int_o%d.h", sizeof(void *) * 8); + f = fopen(buf, "w"); + if (!f) { + perror("fopen"); + return 1; + } + + fprintf(f, "/* autogenerated by %s, do not edit */\n", argv[0]); + DUMP(f, DrawScanline); + DUMP(f, rendstatus); + DUMP(f, Pico_video); + DUMP(f, Pico_vram); + fclose(f); + + return 0; +} From 99bdfd31b8708f8059fbc16ec8be90cb8d7c8bc3 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 6 Aug 2017 19:23:20 +0300 Subject: [PATCH 0046/1110] eliminate texrels (wip2) --- pico/32x/draw.c | 12 +++--- pico/debug.c | 5 +-- pico/draw.c | 68 +++++++++++++++-------------- pico/draw_arm.S | 95 +++++++++-------------------------------- pico/mode4.c | 16 +++---- pico/pico.c | 3 ++ pico/pico.h | 2 - pico/pico_int.h | 7 ++- pico/pico_int_o32.h | 8 +++- platform/gizmondo/emu.c | 11 +---- platform/gp2x/emu.c | 10 ++--- platform/psp/emu.c | 8 ++-- tools/mkoffsets.c | 4 ++ 13 files changed, 102 insertions(+), 147 deletions(-) diff --git a/pico/32x/draw.c b/pico/32x/draw.c index 9500e088..e20238e4 100644 --- a/pico/32x/draw.c +++ b/pico/32x/draw.c @@ -85,9 +85,9 @@ static void convert_pal555(int invert_prio) // this is almost never used (Wiz and menu bg gen only) void FinalizeLine32xRGB555(int sh, int line, struct PicoEState *est) { - unsigned short *pd = DrawLineDest; + unsigned short *pd = est->DrawLineDest; unsigned short *pal = Pico32xMem->pal_native; - unsigned char *pmd = HighCol + 8; + unsigned char *pmd = est->HighCol + 8; unsigned short *dram, *p32x; unsigned char mdbg; @@ -130,7 +130,7 @@ void FinalizeLine32xRGB555(int sh, int line, struct PicoEState *est) #define PICOSCAN_PRE \ PicoScan32xBegin(l + (lines_sft_offs & 0xff)); \ - dst = DrawLineDest; \ + dst = Pico.est.DrawLineDest; \ #define PICOSCAN_POST \ PicoScan32xEnd(l + (lines_sft_offs & 0xff)); \ @@ -228,7 +228,7 @@ void PicoDraw32xLayer(int offs, int lines, int md_bg) int lines_sft_offs; int which_func; - DrawLineDest = (char *)DrawLineDestBase + offs * DrawLineDestIncrement; + Pico.est.DrawLineDest = (char *)DrawLineDestBase + offs * DrawLineDestIncrement; dram = Pico32xMem->dram[Pico32x.vdp_regs[0x0a/2] & P32XV_FS]; if (Pico32xDrawMode == PDM32X_BOTH) { @@ -266,7 +266,7 @@ do_it: if (Pico32x.vdp_regs[2 / 2] & P32XV_SFT) lines_sft_offs |= 1 << 8; - do_loop[which_func](DrawLineDest, dram, lines_sft_offs, md_bg); + do_loop[which_func](Pico.est.DrawLineDest, dram, lines_sft_offs, md_bg); } // mostly unused, games tend to keep 32X layer on @@ -292,7 +292,7 @@ void PicoDraw32xLayerMdOnly(int offs, int lines) for (l = 0; l < lines; l++) { if (have_scan) { PicoScan32xBegin(l + offs); - dst = DrawLineDest + poffs; + dst = Pico.est.DrawLineDest + poffs; } for (p = 0; p < plen; p += 4) { dst[p + 0] = pal[*pmd++]; diff --git a/pico/debug.c b/pico/debug.c index 91bff566..7cc3c325 100644 --- a/pico/debug.c +++ b/pico/debug.c @@ -20,13 +20,12 @@ char *PDebugMain(void) { struct PicoVideo *pv=&Pico.video; unsigned char *reg=pv->reg, r; - extern int HighPreSpr[]; int i, sprites_lo, sprites_hi; char *dstrp; sprites_lo = sprites_hi = 0; - for (i = 0; HighPreSpr[i] != 0; i+=2) - if (HighPreSpr[i+1] & 0x8000) + for (i = 0; Pico.est.HighPreSpr[i] != 0; i+=2) + if (Pico.est.HighPreSpr[i+1] & 0x8000) sprites_hi++; else sprites_lo++; diff --git a/pico/draw.c b/pico/draw.c index ff84be57..7dbdc34e 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -35,18 +35,16 @@ int (*PicoScanBegin)(unsigned int num) = NULL; int (*PicoScanEnd) (unsigned int num) = NULL; static unsigned char DefHighCol[8+320+8]; -unsigned char *HighCol = DefHighCol; static unsigned char *HighColBase = DefHighCol; static int HighColIncrement; static unsigned int DefOutBuff[320*2/2]; -void *DrawLineDest = DefOutBuff; // pointer to dest buffer where to draw this line to void *DrawLineDestBase = DefOutBuff; int DrawLineDestIncrement; static int HighCacheA[41+1]; // caches for high layers static int HighCacheB[41+1]; -int HighPreSpr[80*2+1]; // slightly preprocessed sprites +static int HighPreSpr[80*2+1]; // slightly preprocessed sprites #define SPRL_HAVE_HI 0x80 // have hi priority sprites #define SPRL_HAVE_LO 0x40 // *lo* @@ -100,7 +98,7 @@ void blockcpy_or(void *dst, void *src, size_t n, int pat) #define TileNormMaker(funcname,pix_func) \ static int funcname(int sx,int addr,int pal) \ { \ - unsigned char *pd = HighCol+sx; \ + unsigned char *pd = Pico.est.HighCol+sx; \ unsigned int pack=0; unsigned int t=0; \ \ pack=*(unsigned int *)(Pico.vram+addr); /* Get 8 pixels */ \ @@ -124,7 +122,7 @@ static int funcname(int sx,int addr,int pal) \ #define TileFlipMaker(funcname,pix_func) \ static int funcname(int sx,int addr,int pal) \ { \ - unsigned char *pd = HighCol+sx; \ + unsigned char *pd = Pico.est.HighCol+sx; \ unsigned int pack=0; unsigned int t=0; \ \ pack=*(unsigned int *)(Pico.vram+addr); /* Get 8 pixels */ \ @@ -515,7 +513,7 @@ static void DrawWindow(int tstart, int tend, int prio, int sh, pal=((code>>9)&0x30); if (prio) { - int *zb = (int *)(HighCol+8+(tilex<<3)); + int *zb = (int *)(est->HighCol+8+(tilex<<3)); *zb++ &= 0xbfbfbfbf; *zb &= 0xbfbfbfbf; } else { @@ -541,7 +539,7 @@ static void DrawTilesFromCacheShPrep(void) // as some layer has covered whole line with hi priority tiles, // we can process whole line and then act as if sh/hi mode was off, // but leave lo pri op sprite markers alone - int c = 320/4, *zb = (int *)(HighCol+8); + int c = 320/4, *zb = (int *)(Pico.est.HighCol+8); Pico.est.rendstatus |= PDRAW_SHHI_DONE; while (c--) { @@ -591,7 +589,7 @@ static void DrawTilesFromCache(int *hc, int sh, int rlim, struct PicoEState *est addr=(code&0x7ff)<<4; addr+=(unsigned int)code>>25; // y offset into tile dx=(code>>16)&0x1ff; - zb = HighCol+dx; + zb = est->HighCol+dx; *zb++ &= 0xbf; *zb++ &= 0xbf; *zb++ &= 0xbf; *zb++ &= 0xbf; *zb++ &= 0xbf; *zb++ &= 0xbf; *zb++ &= 0xbf; *zb++ &= 0xbf; @@ -607,7 +605,7 @@ static void DrawTilesFromCache(int *hc, int sh, int rlim, struct PicoEState *est last_cut_tile: { unsigned int t, pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels - unsigned char *pd = HighCol+dx; + unsigned char *pd = est->HighCol+dx; if (!pack) return; if (code&0x0800) { @@ -812,7 +810,7 @@ static void DrawSpritesSHi(unsigned char *sprited, const struct PicoEState *est) int offs, delta, width, height, row; offs = (p[cnt] & 0x7f) * 2; - sprite = HighPreSpr + offs; + sprite = est->HighPreSpr + offs; code = sprite[1]; pal = (code>>9)&0x30; @@ -936,7 +934,7 @@ static void DrawSpritesHiAS(unsigned char *sprited, int sh) /* nasty 1: remove 'sprite' flags */ { - int c = 320/4/4, *zb = (int *)(HighCol+8); + int c = 320/4/4, *zb = (int *)(Pico.est.HighCol+8); while (c--) { *zb++ &= 0x7f7f7f7f; *zb++ &= 0x7f7f7f7f; @@ -1147,7 +1145,7 @@ static void DrawAllSprites(unsigned char *sprited, int prio, int sh, // -------------------------------------------- -void BackFill(int reg7, int sh) +void BackFill(int reg7, int sh, struct PicoEState *est) { unsigned int back; @@ -1157,7 +1155,7 @@ void BackFill(int reg7, int sh) back|=back<<8; back|=back<<16; - memset32((int *)(HighCol+8), back, 320/4); + memset32((int *)(est->HighCol+8), back, 320/4); } #endif @@ -1204,10 +1202,10 @@ void PicoDoHighPal555(int sh, int line, struct PicoEState *est) } } -void FinalizeLine555(int sh, int line) +void FinalizeLine555(int sh, int line, struct PicoEState *est) { - unsigned short *pd=DrawLineDest; - unsigned char *ps=HighCol+8; + unsigned short *pd=est->DrawLineDest; + unsigned char *ps=est->HighCol+8; unsigned short *pal=HighPal; int len; @@ -1242,7 +1240,7 @@ void FinalizeLine555(int sh, int line) static void FinalizeLine8bit(int sh, int line, struct PicoEState *est) { - unsigned char *pd = DrawLineDest; + unsigned char *pd = est->DrawLineDest; int len, rs = est->rendstatus; static int dirty_count; @@ -1271,12 +1269,12 @@ static void FinalizeLine8bit(int sh, int line, struct PicoEState *est) if (!sh && (rs & PDRAW_SONIC_MODE)) { if (dirty_count >= 11) { - blockcpy_or(pd, HighCol+8, len, 0x80); + blockcpy_or(pd, est->HighCol+8, len, 0x80); } else { - blockcpy_or(pd, HighCol+8, len, 0x40); + blockcpy_or(pd, est->HighCol+8, len, 0x40); } } else { - blockcpy(pd, HighCol+8, len); + blockcpy(pd, est->HighCol+8, len); } } @@ -1403,8 +1401,8 @@ PICO_INTERNAL void PicoFrameStart(void) rendstatus_old = Pico.est.rendstatus; } - HighCol = HighColBase + offs * HighColIncrement; - DrawLineDest = (char *)DrawLineDestBase + offs * DrawLineDestIncrement; + Pico.est.HighCol = HighColBase + offs * HighColIncrement; + Pico.est.DrawLineDest = (char *)DrawLineDestBase + offs * DrawLineDestIncrement; Pico.est.DrawScanline = 0; skip_next_line = 0; @@ -1421,7 +1419,7 @@ static void DrawBlankedLine(int line, int offs, int sh, int bgc) if (PicoScanBegin != NULL) PicoScanBegin(line + offs); - BackFill(bgc, sh); + BackFill(bgc, sh, &Pico.est); if (FinalizeLine != NULL) FinalizeLine(sh, line, &Pico.est); @@ -1429,8 +1427,8 @@ static void DrawBlankedLine(int line, int offs, int sh, int bgc) if (PicoScanEnd != NULL) PicoScanEnd(line + offs); - HighCol += HighColIncrement; - DrawLineDest = (char *)DrawLineDest + DrawLineDestIncrement; + Pico.est.HighCol += HighColIncrement; + Pico.est.DrawLineDest = (char *)Pico.est.DrawLineDest + DrawLineDestIncrement; } static void PicoLine(int line, int offs, int sh, int bgc) @@ -1452,7 +1450,7 @@ static void PicoLine(int line, int offs, int sh, int bgc) } // Draw screen: - BackFill(bgc, sh); + BackFill(bgc, sh, &Pico.est); if (Pico.video.reg[1]&0x40) DrawDisplay(sh); @@ -1462,8 +1460,8 @@ static void PicoLine(int line, int offs, int sh, int bgc) if (PicoScanEnd != NULL) skip_next_line = PicoScanEnd(line + offs); - HighCol += HighColIncrement; - DrawLineDest = (char *)DrawLineDest + DrawLineDestIncrement; + Pico.est.HighCol += HighColIncrement; + Pico.est.DrawLineDest = (char *)Pico.est.DrawLineDest + DrawLineDestIncrement; } void PicoDrawSync(int to, int blank_last_line) @@ -1539,7 +1537,7 @@ void PicoDrawSetOutBuf(void *dest, int increment) { DrawLineDestBase = dest; DrawLineDestIncrement = increment; - DrawLineDest = DrawLineDestBase + Pico.est.DrawScanline * increment; + Pico.est.DrawLineDest = DrawLineDestBase + Pico.est.DrawScanline * increment; } void PicoDrawSetInternalBuf(void *dest, int increment) @@ -1547,7 +1545,7 @@ void PicoDrawSetInternalBuf(void *dest, int increment) if (dest != NULL) { HighColBase = dest; HighColIncrement = increment; - HighCol = HighColBase + Pico.est.DrawScanline * increment; + Pico.est.HighCol = HighColBase + Pico.est.DrawScanline * increment; } else { HighColBase = DefHighCol; @@ -1572,4 +1570,12 @@ void PicoDrawSetCallbacks(int (*begin)(unsigned int num), int (*end)(unsigned in } } -// vim:ts=4:sw=4:expandtab +void PicoDrawInit(void) +{ + Pico.est.DrawLineDest = DefOutBuff; + Pico.est.HighCol = HighColBase; + Pico.est.HighPreSpr = HighPreSpr; + rendstatus_old = -1; +} + +// vim:ts=2:sw=2:expandtab diff --git a/pico/draw_arm.S b/pico/draw_arm.S index 2c1db104..23c522b0 100644 --- a/pico/draw_arm.S +++ b/pico/draw_arm.S @@ -10,15 +10,7 @@ #include "pico_int_o32.h" -.extern PicoOpt -.extern HighCol -.extern HighSprZ -.extern HighPreSpr -.extern DrawLineDest .extern DrawStripInterlace -.extern HighCacheS_ptr - -.equiv OVERRIDE_HIGHCOL, 1 .equ PDRAW_SPRITES_MOVED, (1<<0) .equ PDRAW_WND_DIFF_PRIO, (1<<1) @@ -395,17 +387,12 @@ DrawLayer: sub r10,r10,r9, lsl #16 @ cells-=cellskip @ cache some stuff to avoid mem access -.if OVERRIDE_HIGHCOL - ldr r11,=HighCol + ldr r11,[sp, #9*4] @ est mov r0, #0xf - ldr r11,[r11] -.else - ldr r11,=HighCol - mov r0, #0xf -.endif + ldr r11,[r11, #OFS_HighCol] mvn r9, #0 @ r9=prevcode=-1 - add r1, r11, r7 @ r1=pdest + add r1, r11, r7 @ r1=pdest @ r4 & r7 are scratch in this loop @@ -548,14 +535,9 @@ DrawLayer: add r10,r10,r9, lsl #16 @ cell+=cellskip @ cache some stuff to avoid mem access -.if OVERRIDE_HIGHCOL - ldr r11,=HighCol + ldr r11,[sp, #9*4] @ est mov r0, #0xf - ldr r11,[r11] -.else - ldr r11,=HighCol - mov r0, #0xf -.endif + ldr r11,[r11, #OFS_HighCol] mvn r9, #0 @ r9=prevcode=-1 add r1, r11, r7 @ r1=pdest @@ -722,23 +704,17 @@ DrawLayer: @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +@ void BackFill(int reg7, int sh, struct PicoEState *est) -.global BackFill @ int reg7, int sh +.global BackFill BackFill: stmfd sp!, {r4-r9,lr} -.if OVERRIDE_HIGHCOL - ldr lr, =HighCol mov r0, r0, lsl #26 - ldr lr, [lr] + ldr lr, [r2, #OFS_HighCol] mov r0, r0, lsr #26 add lr, lr, #8 -.else - ldr lr, =(HighCol+8) - mov r0, r0, lsl #26 - mov r0, r0, lsr #26 -.endif orr r0, r0, r1, lsl #6 orr r0, r0, r0, lsl #8 @@ -764,8 +740,8 @@ BackFill: stmia lr!, {r0-r7} stmia lr!, {r0-r7} - ldmfd sp!, {r4-r9,r12} - bx r12 + ldmfd sp!, {r4-r9,lr} + bx lr @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@ -778,14 +754,8 @@ DrawTilesFromCache: stmfd sp!, {r4-r9,r11,lr} @ cache some stuff to avoid mem access -.if OVERRIDE_HIGHCOL - ldr r11,=HighCol + ldr r11,[r3, #OFS_HighCol] mov r12,#0xf - ldr r11,[r11] -.else - ldr r11,=HighCol - mov r12,#0xf -.endif ldr lr, [r3, #OFS_Pico_vram] mov r9, r3 @ est @@ -964,14 +934,8 @@ DrawSpritesSHi: add r10,r0, #3 @ r10=HighLnSpr end add r10,r10,r3 @ r10=HighLnSpr end -.if OVERRIDE_HIGHCOL - ldr r11,=HighCol + ldr r11,[r1, #OFS_HighCol] mov r12,#0xf - ldr r11,[r11] -.else - ldr r11,=HighCol - mov r12,#0xf -.endif ldr lr, [r1, #OFS_Pico_vram] @@ -979,7 +943,7 @@ DrawSpriteSHi: @ draw next sprite ldrb r0, [r10,#-1]! ldr r7, [sp] @ est - ldr r1, =HighPreSpr + ldr r1, [r7, #OFS_HighPreSpr] cmp r0, #0xff ldmeqfd sp!, {r1,r4-r11,pc} @ end of list and r0, r0, #0x7f @@ -1159,14 +1123,8 @@ das_no_prep: add r10,r0, #3 add r10,r10,r2 @ r10=HighLnSpr end -.if OVERRIDE_HIGHCOL - ldr r11,=HighCol + ldr r11,[r3, #OFS_HighCol] mov r12,#0xf - ldr r11,[r11] -.else - ldr r11,=HighCol - mov r12,#0xf -.endif ldr lr, [r3, #OFS_Pico_vram] @ + 0 : hhhhvvvv ----hhvv yyyyyyyy yyyyyyyy // v, h: horiz. size @@ -1182,7 +1140,7 @@ DrawSprite: ldmeqfd sp!, {r1,r3-r11,pc} @ end of list cmp r2, r8, lsr #1 bne DrawSprite @ wrong priority - ldr r1, =HighPreSpr + ldr r1, [r7, #OFS_HighPreSpr] and r0, r0, #0x7f add r0, r1, r0, lsl #3 @@ -1352,17 +1310,10 @@ DrawWindow: sub r8, r1, r0 @ cache some stuff to avoid mem access -.if OVERRIDE_HIGHCOL - ldr r11,=HighCol + ldr r11, [r11, #OFS_HighCol] mov r8, r8, lsl #1 @ cells - ldr r11,[r11] - mvn r9, #0 @ r9=prevcode=-1 add r11,r11,#8 -.else - ldr r11,=(HighCol+8) - mov r8, r8, lsl #1 @ cells mvn r9, #0 @ r9=prevcode=-1 -.endif add r1, r11, r0, lsl #4 @ r1=pdest mov r0, #0xf b .dwloop_enter @@ -1632,17 +1583,9 @@ FinalizeLineRGB555_pal_done: tstne r12,#PDRAW_ACC_SPRITES movne lr, #0x3f -.if OVERRIDE_HIGHCOL - ldr r1, =HighCol - ldr r0, =DrawLineDest - ldr r1, [r1] - ldr r0, [r0] + ldr r1, [r10, #OFS_HighCol] + ldr r0, [r10, #OFS_DrawLineDest] add r1, r1, #8 -.else - ldr r0, =DrawLineDest - ldr r1, =(HighCol+8) - ldr r0, [r0] -.endif ldrb r12, [r8, #12] mov lr, lr, lsl #1 @@ -1650,7 +1593,7 @@ FinalizeLineRGB555_pal_done: tst r12, #1 movne r2, #320/8 @ len bne .fl_no32colRGB555 - ldr r4, =PicoOpt + ldr r4, [r10, #OFS_PicoOpt] mov r2, #256/8 ldr r4, [r4] tst r4, #0x4000 diff --git a/pico/mode4.c b/pico/mode4.c index c945f6fe..1f9adff5 100644 --- a/pico/mode4.c +++ b/pico/mode4.c @@ -28,7 +28,7 @@ static int screen_offset; static int TileNormM4(int sx, int addr, int pal) { - unsigned char *pd = HighCol + sx; + unsigned char *pd = Pico.est.HighCol + sx; unsigned int pack, t; pack = *(unsigned int *)(Pico.vram + addr); /* Get 4 bitplanes / 8 pixels */ @@ -50,7 +50,7 @@ static int TileNormM4(int sx, int addr, int pal) static int TileFlipM4(int sx,int addr,int pal) { - unsigned char *pd = HighCol + sx; + unsigned char *pd = Pico.est.HighCol + sx; unsigned int pack, t; pack = *(unsigned int *)(Pico.vram + addr); /* Get 4 bitplanes / 8 pixels */ @@ -192,7 +192,7 @@ static void DrawDisplayM4(int scanline) if (pv->reg[0] & 0x20) // first column masked - ((int *)HighCol)[2] = ((int *)HighCol)[3] = 0xe0e0e0e0; + ((int *)Pico.est.HighCol)[2] = ((int *)Pico.est.HighCol)[3] = 0xe0e0e0e0; } void PicoFrameStartMode4(void) @@ -219,7 +219,7 @@ void PicoFrameStartMode4(void) rendlines = lines; } - DrawLineDest = (char *)DrawLineDestBase + screen_offset * DrawLineDestIncrement; + Pico.est.DrawLineDest = (char *)DrawLineDestBase + screen_offset * DrawLineDestIncrement; } void PicoLineMode4(int line) @@ -233,7 +233,7 @@ void PicoLineMode4(int line) skip_next_line = PicoScanBegin(line + screen_offset); // Draw screen: - BackFill(Pico.video.reg[7] & 0x0f, 0); + BackFill(Pico.video.reg[7] & 0x0f, 0, &Pico.est); if (Pico.video.reg[1] & 0x40) DrawDisplayM4(line); @@ -243,7 +243,7 @@ void PicoLineMode4(int line) if (PicoScanEnd != NULL) skip_next_line = PicoScanEnd(line + screen_offset); - DrawLineDest = (char *)DrawLineDest + DrawLineDestIncrement; + Pico.est.DrawLineDest = (char *)Pico.est.DrawLineDest + DrawLineDestIncrement; } void PicoDoHighPal555M4(void) @@ -282,12 +282,12 @@ static void FinalizeLineRGB555M4(int line) static void FinalizeLine8bitM4(int line) { - unsigned char *pd = DrawLineDest; + unsigned char *pd = Pico.est.DrawLineDest; if (!(PicoOpt & POPT_DIS_32C_BORDER)) pd += 32; - memcpy32((int *)pd, (int *)(HighCol+8), 256/4); + memcpy32((int *)pd, (int *)(Pico.est.HighCol+8), 256/4); } void PicoDrawSetOutputMode4(pdso_t which) diff --git a/pico/pico.c b/pico/pico.c index 1c58f227..4b05d6e0 100644 --- a/pico/pico.c +++ b/pico/pico.c @@ -38,6 +38,7 @@ void PicoInit(void) Pico.est.Pico_video = &Pico.video; Pico.est.Pico_vram = Pico.vram; + Pico.est.PicoOpt = &PicoOpt; // Init CPUs: SekInit(); @@ -46,6 +47,8 @@ void PicoInit(void) PicoInitMCD(); PicoSVPInit(); Pico32xInit(); + + PicoDrawInit(); } // to be called once on emu exit diff --git a/pico/pico.h b/pico/pico.h index c033c2da..6eb1dc0d 100644 --- a/pico/pico.h +++ b/pico/pico.h @@ -171,8 +171,6 @@ typedef enum void PicoDrawSetOutFormat(pdso_t which, int use_32x_line_mode); void PicoDrawSetOutBuf(void *dest, int increment); void PicoDrawSetCallbacks(int (*begin)(unsigned int num), int (*end)(unsigned int num)); -extern void *DrawLineDest; -extern unsigned char *HighCol; // utility #ifdef _ASM_DRAW_C void vidConvCpyRGB565(void *to, void *from, int pixels); diff --git a/pico/pico_int.h b/pico/pico_int.h index da49e04b..4dbfa659 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -333,8 +333,12 @@ struct PicoEState { int DrawScanline; int rendstatus; + void *DrawLineDest; // draw estination + unsigned char *HighCol; + int *HighPreSpr; void *Pico_video; void *Pico_vram; + int *PicoOpt; }; // some assembly stuff depend on these, do not touch! @@ -584,9 +588,10 @@ extern void (*PicoCartUnloadHook)(void); int CM_compareRun(int cyc, int is_sub); // draw.c +void PicoDrawInit(void); PICO_INTERNAL void PicoFrameStart(void); void PicoDrawSync(int to, int blank_last_line); -void BackFill(int reg7, int sh); +void BackFill(int reg7, int sh, struct PicoEState *est); void FinalizeLine555(int sh, int line, struct PicoEState *est); extern int (*PicoScanBegin)(unsigned int num); extern int (*PicoScanEnd)(unsigned int num); diff --git a/pico/pico_int_o32.h b/pico/pico_int_o32.h index 1cdc6b58..4094697d 100644 --- a/pico/pico_int_o32.h +++ b/pico/pico_int_o32.h @@ -1,5 +1,9 @@ /* autogenerated by ./tools/mkoffsets, do not edit */ #define OFS_DrawScanline 0x00 #define OFS_rendstatus 0x04 -#define OFS_Pico_video 0x08 -#define OFS_Pico_vram 0x0c +#define OFS_DrawLineDest 0x08 +#define OFS_HighCol 0x0c +#define OFS_HighPreSpr 0x10 +#define OFS_Pico_video 0x14 +#define OFS_Pico_vram 0x18 +#define OFS_PicoOpt 0x1c diff --git a/platform/gizmondo/emu.c b/platform/gizmondo/emu.c index 3115ebd3..3a6c861b 100644 --- a/platform/gizmondo/emu.c +++ b/platform/gizmondo/emu.c @@ -94,7 +94,7 @@ void pemu_prep_defconfig(void) static int EmuScanBegin16(unsigned int num) { - DrawLineDest = (unsigned short *) giz_screen + 321 * num; + Pico.est.DrawLineDest = (unsigned short *) giz_screen + 321 * num; if ((currentConfig.EmuOpt&0x4000) && (num&1) == 0) // (Pico.m.frame_count&1)) return 1; // skip next line @@ -105,7 +105,7 @@ static int EmuScanBegin16(unsigned int num) static int EmuScanBegin8(unsigned int num) { // draw like the fast renderer - HighCol = gfx_buffer + 328 * num; + Pico.est.HighCol = gfx_buffer + 328 * num; return 0; } @@ -122,13 +122,6 @@ static void osd_text(int x, int y, const char *text) emu_text_out16(x, y, text); } -/* -void log1(void *p1, void *p2) -{ - lprintf("%p %p %p\n", p1, p2, DrawLineDest); -} -*/ - static void cd_leds(void) { static int old_reg = 0; diff --git a/platform/gp2x/emu.c b/platform/gp2x/emu.c index 7e91361f..683e2673 100644 --- a/platform/gp2x/emu.c +++ b/platform/gp2x/emu.c @@ -222,7 +222,7 @@ static unsigned char __attribute__((aligned(4))) rot_buff[320*4*2]; static int EmuScanBegin16_rot(unsigned int num) { - DrawLineDest = rot_buff + (num & 3) * 320 * 2; + Pico.est.DrawLineDest = rot_buff + (num & 3) * 320 * 2; return 0; } @@ -237,7 +237,7 @@ static int EmuScanEnd16_rot(unsigned int num) static int EmuScanBegin8_rot(unsigned int num) { - DrawLineDest = rot_buff + (num & 3) * 320; + Pico.est.DrawLineDest = rot_buff + (num & 3) * 320; return 0; } @@ -262,14 +262,14 @@ static int EmuScanBegin16_ld(unsigned int num) if (emu_scan_begin) return emu_scan_begin(ld_counter); else - DrawLineDest = (char *)g_screen_ptr + 320 * ld_counter * gp2x_current_bpp / 8; + Pico.est.DrawLineDest = (char *)g_screen_ptr + 320 * ld_counter * gp2x_current_bpp / 8; return 0; } static int EmuScanEnd16_ld(unsigned int num) { - void *oldline = DrawLineDest; + void *oldline = Pico.est.DrawLineDest; if (emu_scan_end) emu_scan_end(ld_counter); @@ -280,7 +280,7 @@ static int EmuScanEnd16_ld(unsigned int num) ld_left = ld_lines; EmuScanBegin16_ld(num); - memcpy32(DrawLineDest, oldline, 320 * gp2x_current_bpp / 8 / 4); + memcpy32(Pico.est.DrawLineDest, oldline, 320 * gp2x_current_bpp / 8 / 4); if (emu_scan_end) emu_scan_end(ld_counter); diff --git a/platform/psp/emu.c b/platform/psp/emu.c index 8deb7fd3..cb16d264 100644 --- a/platform/psp/emu.c +++ b/platform/psp/emu.c @@ -242,8 +242,8 @@ static void do_slowmode_lines(int line_to) static void EmuScanPrepare(void) { - HighCol = (unsigned char *)VRAM_CACHED_STUFF + 8; - if (!(Pico.video.reg[1]&8)) HighCol += 8*512; + Pico.est.HighCol = (unsigned char *)VRAM_CACHED_STUFF + 8; + if (!(Pico.video.reg[1]&8)) Pico.est.HighCol += 8*512; if (dynamic_palette > 0) dynamic_palette--; @@ -258,7 +258,7 @@ static void EmuScanPrepare(void) static int EmuScanSlowBegin(unsigned int num) { if (!dynamic_palette) - HighCol = (unsigned char *)VRAM_CACHED_STUFF + num * 512 + 8; + Pico.est.HighCol = (unsigned char *)VRAM_CACHED_STUFF + num * 512 + 8; return 0; } @@ -276,7 +276,7 @@ static int EmuScanSlowEnd(unsigned int num) if (dynamic_palette) { int line_len = (Pico.video.reg[12]&1) ? 320 : 256; void *dst = (char *)VRAM_STUFF + 512*240 + 512*2*num; - amips_clut_f(dst, HighCol + 8, localPal, line_len); + amips_clut_f(dst, Pico.est.HighCol + 8, localPal, line_len); } return 0; diff --git a/tools/mkoffsets.c b/tools/mkoffsets.c index 4044ad3d..e9eb3c5f 100644 --- a/tools/mkoffsets.c +++ b/tools/mkoffsets.c @@ -23,8 +23,12 @@ int main(int argc, char *argv[]) fprintf(f, "/* autogenerated by %s, do not edit */\n", argv[0]); DUMP(f, DrawScanline); DUMP(f, rendstatus); + DUMP(f, DrawLineDest); + DUMP(f, HighCol); + DUMP(f, HighPreSpr); DUMP(f, Pico_video); DUMP(f, Pico_vram); + DUMP(f, PicoOpt); fclose(f); return 0; From 88a8088545a6ffa72286ee4613cb512ea7cd2ecf Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 6 Aug 2017 19:23:36 +0300 Subject: [PATCH 0047/1110] some fps counter cosmetics --- platform/common/emu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/platform/common/emu.c b/platform/common/emu.c index 7f375a3c..6f42b73d 100644 --- a/platform/common/emu.c +++ b/platform/common/emu.c @@ -1427,7 +1427,7 @@ void emu_loop(void) printf("%s\n", fpsbuff); #else if (currentConfig.EmuOpt & EOPT_SHOW_FPS) - sprintf(fpsbuff, "%02i/%02i ", frames_shown, frames_done); + snprintf(fpsbuff, 8, "%02i/%02i ", frames_shown, frames_done); #endif frames_shown = frames_done = 0; timestamp_fps_x3 += ms_to_ticks(1000) * 3; From 6027c719ba015f2d820db3842148abbf62ea65a4 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 6 Aug 2017 21:21:35 +0300 Subject: [PATCH 0048/1110] configure: allow to override SDL_CONFIG --- configure | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure b/configure index 35130cb8..1310ab2c 100755 --- a/configure +++ b/configure @@ -54,7 +54,7 @@ CC="${CC-${CROSS_COMPILE}gcc}" CXX="${CXX-${CROSS_COMPILE}g++}" AS="${AS-${CROSS_COMPILE}as}" STRIP="${STRIP-${CROSS_COMPILE}strip}" -SDL_CONFIG="`$CC --print-sysroot 2> /dev/null || true`/usr/bin/sdl-config" +test -n "$SDL_CONFIG" || SDL_CONFIG="`$CC --print-sysroot 2> /dev/null || true`/usr/bin/sdl-config" MAIN_LDLIBS="$LDLIBS -lm" config_mak="config.mak" From 98a27142346e05a4cef4fe89469bc09d5560edc1 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 6 Aug 2017 22:25:26 +0300 Subject: [PATCH 0049/1110] eliminate texrels, part 3 --- pico/32x/draw.c | 18 +++++----- pico/debug.c | 13 ++++---- pico/draw.c | 17 +++++----- pico/draw2.c | 73 ++++++++++++++++++++++------------------- pico/draw2_arm.S | 64 +++++++++++++++++++----------------- pico/draw_arm.S | 6 ++-- pico/mode4.c | 4 +-- pico/pico.c | 1 + pico/pico.h | 2 -- pico/pico_int.h | 5 ++- pico/pico_int_o32.h | 2 ++ platform/gizmondo/emu.c | 6 ++-- platform/gp2x/emu.c | 12 +++---- platform/linux/emu.c | 6 ++-- platform/psp/emu.c | 6 ++-- tools/mkoffsets.c | 2 ++ 16 files changed, 125 insertions(+), 112 deletions(-) diff --git a/pico/32x/draw.c b/pico/32x/draw.c index e20238e4..3e007ae0 100644 --- a/pico/32x/draw.c +++ b/pico/32x/draw.c @@ -141,9 +141,9 @@ static void do_loop_dc##name(unsigned short *dst, \ unsigned short *dram, int lines_sft_offs, int mdbg) \ { \ int inv_bit = (Pico32x.vdp_regs[0] & P32XV_PRI) ? 0x8000 : 0; \ - unsigned char *pmd = PicoDraw2FB + \ + unsigned char *pmd = Pico.est.Draw2FB + \ 328 * (lines_sft_offs & 0xff) + 8; \ - unsigned short *palmd = HighPal; \ + unsigned short *palmd = Pico.est.HighPal; \ unsigned short *p32x; \ int lines = lines_sft_offs >> 16; \ int l; \ @@ -161,9 +161,9 @@ static void do_loop_pp##name(unsigned short *dst, \ unsigned short *dram, int lines_sft_offs, int mdbg) \ { \ unsigned short *pal = Pico32xMem->pal_native; \ - unsigned char *pmd = PicoDraw2FB + \ + unsigned char *pmd = Pico.est.Draw2FB + \ 328 * (lines_sft_offs & 0xff) + 8; \ - unsigned short *palmd = HighPal; \ + unsigned short *palmd = Pico.est.HighPal; \ unsigned char *p32x; \ int lines = lines_sft_offs >> 16; \ int l; \ @@ -182,9 +182,9 @@ static void do_loop_rl##name(unsigned short *dst, \ unsigned short *dram, int lines_sft_offs, int mdbg) \ { \ unsigned short *pal = Pico32xMem->pal_native; \ - unsigned char *pmd = PicoDraw2FB + \ + unsigned char *pmd = Pico.est.Draw2FB + \ 328 * (lines_sft_offs & 0xff) + 8; \ - unsigned short *palmd = HighPal; \ + unsigned short *palmd = Pico.est.HighPal; \ unsigned short *p32x; \ int lines = lines_sft_offs >> 16; \ int l; \ @@ -274,8 +274,8 @@ void PicoDraw32xLayerMdOnly(int offs, int lines) { int have_scan = PicoScan32xBegin != NULL && PicoScan32xEnd != NULL; unsigned short *dst = (void *)((char *)DrawLineDestBase + offs * DrawLineDestIncrement); - unsigned char *pmd = PicoDraw2FB + 328 * offs + 8; - unsigned short *pal = HighPal; + unsigned char *pmd = Pico.est.Draw2FB + 328 * offs + 8; + unsigned short *pal = Pico.est.HighPal; int poffs = 0, plen = 320; int l, p; @@ -322,7 +322,7 @@ void PicoDrawSetOutFormat32x(pdso_t which, int use_32x_line_mode) } // use the same layout as alt renderer - PicoDrawSetInternalBuf(PicoDraw2FB, 328); + PicoDrawSetInternalBuf(Pico.est.Draw2FB, 328); Pico32xDrawMode = (which == PDF_RGB555) ? PDM32X_32X_ONLY : PDM32X_BOTH; } diff --git a/pico/debug.c b/pico/debug.c index 7cc3c325..efcd3fde 100644 --- a/pico/debug.c +++ b/pico/debug.c @@ -196,29 +196,30 @@ void PDebugShowSpriteStats(unsigned short *screen, int stride) void PDebugShowPalette(unsigned short *screen, int stride) { + struct PicoEState *est = &Pico.est; int x, y; Pico.m.dirtyPal = 1; if (PicoAHW & PAHW_SMS) PicoDoHighPal555M4(); else - PicoDoHighPal555(1, 0, &Pico.est); + PicoDoHighPal555(1, 0, est); Pico.m.dirtyPal = 1; screen += 16*stride+8; for (y = 0; y < 8*4; y++) for (x = 0; x < 8*16; x++) - screen[x + y*stride] = HighPal[x/8 + (y/8)*16]; + screen[x + y*stride] = est->HighPal[x/8 + (y/8)*16]; screen += 160; for (y = 0; y < 8*4; y++) for (x = 0; x < 8*16; x++) - screen[x + y*stride] = HighPal[(x/8 + (y/8)*16) | 0x40]; + screen[x + y*stride] = est->HighPal[(x/8 + (y/8)*16) | 0x40]; screen += stride*48; for (y = 0; y < 8*4; y++) for (x = 0; x < 8*16; x++) - screen[x + y*stride] = HighPal[(x/8 + (y/8)*16) | 0x80]; + screen[x + y*stride] = est->HighPal[(x/8 + (y/8)*16) | 0x80]; } #if defined(DRAW2_OVERRIDE_LINE_WIDTH) @@ -263,9 +264,9 @@ void PDebugShowSprite(unsigned short *screen, int stride, int which) PicoFrameFull(); for (y = 0; y < 8*4; y++) { - unsigned char *ps = PicoDraw2FB + DRAW2_LINE_WIDTH*y + 8; + unsigned char *ps = Pico.est.Draw2FB + DRAW2_LINE_WIDTH*y + 8; for (x = 0; x < 8*4; x++) - if (ps[x]) screen[x] = HighPal[ps[x]], ps[x] = 0; + if (ps[x]) screen[x] = Pico.est.HighPal[ps[x]], ps[x] = 0; screen += stride; } diff --git a/pico/draw.c b/pico/draw.c index 7dbdc34e..9ad32dad 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -1161,8 +1161,6 @@ void BackFill(int reg7, int sh, struct PicoEState *est) // -------------------------------------------- -unsigned short HighPal[0x100]; - #ifndef _ASM_DRAW_C void PicoDoHighPal555(int sh, int line, struct PicoEState *est) { @@ -1172,7 +1170,7 @@ void PicoDoHighPal555(int sh, int line, struct PicoEState *est) Pico.m.dirtyPal = 0; spal = (void *)Pico.cram; - dpal = (void *)HighPal; + dpal = (void *)est->HighPal; for (i = 0; i < 0x40 / 2; i++) { t = spal[i]; @@ -1206,7 +1204,7 @@ void FinalizeLine555(int sh, int line, struct PicoEState *est) { unsigned short *pd=est->DrawLineDest; unsigned char *ps=est->HighCol+8; - unsigned short *pal=HighPal; + unsigned short *pal=est->HighPal; int len; if (Pico.m.dirtyPal) @@ -1253,9 +1251,9 @@ static void FinalizeLine8bit(int sh, int line, struct PicoEState *est) rs |= PDRAW_SONIC_MODE; est->rendstatus = rs; if (dirty_count == 3) { - blockcpy(HighPal, Pico.cram, 0x40*2); + blockcpy(est->HighPal, Pico.cram, 0x40*2); } else if (dirty_count == 11) { - blockcpy(HighPal+0x40, Pico.cram, 0x40*2); + blockcpy(est->HighPal+0x40, Pico.cram, 0x40*2); } } @@ -1496,15 +1494,16 @@ void PicoDrawSync(int to, int blank_last_line) // also works for fast renderer void PicoDrawUpdateHighPal(void) { + struct PicoEState *est = &Pico.est; int sh = (Pico.video.reg[0xC] & 8) >> 3; // shadow/hilight? if (PicoOpt & POPT_ALT_RENDERER) sh = 0; // no s/h support PicoDoHighPal555(sh, 0, &Pico.est); - if (Pico.est.rendstatus & PDRAW_SONIC_MODE) { + if (est->rendstatus & PDRAW_SONIC_MODE) { // FIXME? - memcpy(HighPal + 0x40, HighPal, 0x40*2); - memcpy(HighPal + 0x80, HighPal, 0x40*2); + memcpy(est->HighPal + 0x40, est->HighPal, 0x40*2); + memcpy(est->HighPal + 0x80, est->HighPal, 0x40*2); } } diff --git a/pico/draw2.c b/pico/draw2.c index 5730d7b6..1b8cdf5f 100644 --- a/pico/draw2.c +++ b/pico/draw2.c @@ -21,7 +21,6 @@ #endif static unsigned char PicoDraw2FB_[(8+320) * (8+240+8)]; -unsigned char *PicoDraw2FB = PicoDraw2FB_; static int HighCache2A[41*(TILE_ROWS+1)+1+1]; // caches for high layers static int HighCache2B[41*(TILE_ROWS+1)+1+1]; @@ -32,11 +31,12 @@ void (*PicoPrepareCram)()=0; // prepares PicoCramHigh for renderer to // stuff available in asm: #ifdef _ASM_DRAW_C -void BackFillFull(int reg7); -void DrawLayerFull(int plane, int *hcache, int planestart, int planeend); -void DrawTilesFromCacheF(int *hc); -void DrawWindowFull(int start, int end, int prio); -void DrawSpriteFull(unsigned int *sprite); +void BackFillFull(void *dst, int reg7); +void DrawLayerFull(int plane, int *hcache, int planestart, int planeend, + struct PicoEState *est); +void DrawTilesFromCacheF(int *hc, struct PicoEState *est); +void DrawWindowFull(int start, int end, int prio, struct PicoEState *est); +void DrawSpriteFull(unsigned int *sprite, struct PicoEState *est); #else @@ -134,11 +134,11 @@ static int TileXflipYflip(unsigned char *pd,int addr,unsigned char pal) // start: (tile_start<<16)|row_start, end: [same] -static void DrawWindowFull(int start, int end, int prio) +static void DrawWindowFull(int start, int end, int prio, struct PicoEState *est) { struct PicoVideo *pvid=&Pico.video; int nametab, nametab_step, trow, tilex, blank=-1, code; - unsigned char *scrpos = PicoDraw2FB; + unsigned char *scrpos = est->Draw2FB; int tile_start, tile_end; // in cells // parse ranges @@ -198,7 +198,8 @@ static void DrawWindowFull(int start, int end, int prio) } -static void DrawLayerFull(int plane, int *hcache, int planestart, int planeend) +static void DrawLayerFull(int plane, int *hcache, int planestart, int planeend, + struct PicoEState *est) { struct PicoVideo *pvid=&Pico.video; static char shift[4]={5,6,6,7}; // 32,64 or 128 sized tilemaps @@ -238,7 +239,7 @@ static void DrawLayerFull(int plane, int *hcache, int planestart, int planeend) if (plane==0) nametab=(pvid->reg[2]&0x38)<< 9; // A else nametab=(pvid->reg[4]&0x07)<<12; // B - scrpos = PicoDraw2FB; + scrpos = est->Draw2FB; scrpos+=8*LINE_WIDTH*(planestart-START_ROW); // Get vertical scroll value: @@ -305,14 +306,14 @@ static void DrawLayerFull(int plane, int *hcache, int planestart, int planeend) } -static void DrawTilesFromCacheF(int *hc) +static void DrawTilesFromCacheF(int *hc, struct PicoEState *est) { int code, addr, zero = 0; unsigned int prevy=0xFFFFFFFF; // unsigned short *pal; unsigned char pal; short blank=-1; // The tile we know is blank - unsigned char *scrpos = PicoDraw2FB, *pd = 0; + unsigned char *scrpos = est->Draw2FB, *pd = 0; // *hcache++ = code|(dx<<16)|(trow<<27); // cache it scrpos+=(*hc++)*LINE_WIDTH - START_ROW*LINE_WIDTH*8; @@ -344,7 +345,7 @@ static void DrawTilesFromCacheF(int *hc) // sx and sy are coords of virtual screen with 8pix borders on top and on left -static void DrawSpriteFull(unsigned int *sprite) +static void DrawSpriteFull(unsigned int *sprite, struct PicoEState *est) { int width=0,height=0; // unsigned short *pal=NULL; @@ -375,7 +376,7 @@ static void DrawSpriteFull(unsigned int *sprite) // goto first vertically visible tile while(sy <= START_ROW*8) { sy+=8; tile+=tdeltay; height--; } - scrpos = PicoDraw2FB; + scrpos = est->Draw2FB; scrpos+=(sy-START_ROW*8)*LINE_WIDTH; for (; height > 0; height--, sy+=8, tile+=tdeltay) @@ -466,29 +467,29 @@ static void DrawAllSpritesFull(int prio, int maxwidth) } // Go through sprites backwards: - for (i-- ;i>=0; i--) + for (i--; i >= 0; i--) { - DrawSpriteFull(sprites[i]); + DrawSpriteFull(sprites[i], &Pico.est); } } #ifndef _ASM_DRAW_C -static void BackFillFull(int reg7) +static void BackFillFull(void *dst, int reg7) { unsigned int back; // Start with a background color: -// back=PicoCramHigh[reg7&0x3f]; back=reg7&0x3f; back|=back<<8; back|=back<<16; - memset32((int *)PicoDraw2FB, back, LINE_WIDTH*(8+(END_ROW-START_ROW)*8)/4); + memset32(dst, back, LINE_WIDTH*(8+(END_ROW-START_ROW)*8)/4); } #endif static void DrawDisplayFull(void) { + struct PicoEState *est = &Pico.est; struct PicoVideo *pvid=&Pico.video; int win, edge=0, hvwin=0; // LSb->MSb: hwin&plane, vwin&plane, full int planestart=START_ROW, planeend=END_ROW; // plane A start/end when window shares display with plane A (in tile rows or columns) @@ -551,55 +552,55 @@ static void DrawDisplayFull(void) HighCache2A[1] = HighCache2B[1] = 0; if (PicoDrawMask & PDRAW_LAYERB_ON) - DrawLayerFull(1, HighCache2B, START_ROW, (maxcolc<<16)|END_ROW); + DrawLayerFull(1, HighCache2B, START_ROW, (maxcolc<<16)|END_ROW, est); if (PicoDrawMask & PDRAW_LAYERA_ON) switch (hvwin) { case 4: // fullscreen window - DrawWindowFull(START_ROW, (maxcolc<<16)|END_ROW, 0); + DrawWindowFull(START_ROW, (maxcolc<<16)|END_ROW, 0, est); break; case 3: // we have plane A and both v and h windows - DrawLayerFull(0, HighCache2A, planestart, planeend); - DrawWindowFull( winstart&~0xff0000, (winend&~0xff0000)|(maxcolc<<16), 0); // h - DrawWindowFull((winstart&~0xff)|START_ROW, (winend&~0xff)|END_ROW, 0); // v + DrawLayerFull(0, HighCache2A, planestart, planeend, est); + DrawWindowFull( winstart&~0xff0000, (winend&~0xff0000)|(maxcolc<<16), 0, est); // h + DrawWindowFull((winstart&~0xff)|START_ROW, (winend&~0xff)|END_ROW, 0, est); // v break; case 2: case 1: // both window and plane A visible, window is vertical XOR horizontal - DrawLayerFull(0, HighCache2A, planestart, planeend); - DrawWindowFull(winstart, winend, 0); + DrawLayerFull(0, HighCache2A, planestart, planeend, est); + DrawWindowFull(winstart, winend, 0, est); break; default: // fullscreen plane A - DrawLayerFull(0, HighCache2A, START_ROW, (maxcolc<<16)|END_ROW); + DrawLayerFull(0, HighCache2A, START_ROW, (maxcolc<<16)|END_ROW, est); break; } if (PicoDrawMask & PDRAW_SPRITES_LOW_ON) DrawAllSpritesFull(0, maxw); - if (HighCache2B[1]) DrawTilesFromCacheF(HighCache2B); - if (HighCache2A[1]) DrawTilesFromCacheF(HighCache2A); + if (HighCache2B[1]) DrawTilesFromCacheF(HighCache2B, est); + if (HighCache2A[1]) DrawTilesFromCacheF(HighCache2A, est); if (PicoDrawMask & PDRAW_LAYERA_ON) switch (hvwin) { case 4: // fullscreen window - DrawWindowFull(START_ROW, (maxcolc<<16)|END_ROW, 1); + DrawWindowFull(START_ROW, (maxcolc<<16)|END_ROW, 1, est); break; case 3: // we have plane A and both v and h windows - DrawWindowFull( winstart&~0xff0000, (winend&~0xff0000)|(maxcolc<<16), 1); // h - DrawWindowFull((winstart&~0xff)|START_ROW, (winend&~0xff)|END_ROW, 1); // v + DrawWindowFull( winstart&~0xff0000, (winend&~0xff0000)|(maxcolc<<16), 1, est); // h + DrawWindowFull((winstart&~0xff)|START_ROW, (winend&~0xff)|END_ROW, 1, est); // v break; case 2: case 1: // both window and plane A visible, window is vertical XOR horizontal - DrawWindowFull(winstart, winend, 1); + DrawWindowFull(winstart, winend, 1, est); break; } if (PicoDrawMask & PDRAW_SPRITES_HI_ON) @@ -615,10 +616,14 @@ PICO_INTERNAL void PicoFrameFull() if (PicoPrepareCram) PicoPrepareCram(); // Draw screen: - BackFillFull(Pico.video.reg[7]); + BackFillFull(Pico.est.Draw2FB, Pico.video.reg[7]); if (Pico.video.reg[1] & 0x40) DrawDisplayFull(); pprof_end(draw); } +void PicoDraw2Init(void) +{ + Pico.est.Draw2FB = PicoDraw2FB_; +} diff --git a/pico/draw2_arm.S b/pico/draw2_arm.S index c37d059a..6e7e1ac0 100644 --- a/pico/draw2_arm.S +++ b/pico/draw2_arm.S @@ -8,8 +8,7 @@ * this is highly specialized, be careful if changing related C code! */ -.extern Pico -.extern PicoDraw2FB +#include "pico_int_o32.h" @ define these constants in your include file: @ .equiv START_ROW, 1 @@ -25,16 +24,16 @@ .text .align 2 -.global BackFillFull @ int reg7 +@ void BackFillFull(void *dst, int reg7) + +.global BackFillFull BackFillFull: stmfd sp!, {r4-r9,lr} - ldr lr, =PicoDraw2FB @ lr=PicoDraw2FB - mov r0, r0, lsl #26 - ldr lr, [lr] + add lr, r0, #328*8 + mov r0, r1, lsl #26 mov r0, r0, lsr #26 - add lr, lr, #328*8 orr r0, r0, r0, lsl #8 orr r0, r0, r0, lsl #16 @@ -67,8 +66,8 @@ BackFillFull: bne .bff_loop - ldmfd sp!, {r4-r9,r12} - bx r12 + ldmfd sp!, {r4-r9,lr} + bx lr .pool @@ -343,17 +342,19 @@ BackFillFull: @ DrawLayerTiles(*hcache, *scrpos, (cells<<24)|(nametab<<9)|(vscroll&0x3ff)<<11|(shift[width]<<8)|planeend, (ymask<<24)|(planestart<<16)|[htab||hscroll] -@static void DrawLayerFull(int plane, int *hcache, int planestart, int planeend) +@ void DrawLayerFull(int plane, int *hcache, int planestart, int planeend, +@ struct PicoEState *est) .global DrawLayerFull DrawLayerFull: + ldr r12,[sp] @ est stmfd sp!, {r4-r11,lr} mov r6, r1 @ hcache - ldr r11, =(Pico+0x22228) @ Pico.video - ldr r10, =(Pico+0x10000) @ r10=Pico.vram + ldr r11, [r12, #OFS_Pico_video] + ldr r10, [r12, #OFS_Pico_vram] ldrb r5, [r11, #13] @ pvid->reg[13] ldrb r7, [r11, #11] @@ -402,9 +403,9 @@ DrawLayerFull: and r4, r4, #7 orr lr, lr, r4, lsl #13 @ lr|=nametab_bits{3}<<13 - ldr r11, =PicoDraw2FB @ r11=PicoDraw2FB + ldr r11,[sp, #9*4] @ est sub r4, r9, #(START_ROW<<24) - ldr r11, [r11] + ldr r11, [r11, #OFS_Draw2FB] mov r4, r4, asr #24 mov r7, #328*8 mla r11, r4, r7, r11 @ scrpos+=8*328*(planestart-START_ROW); @@ -571,8 +572,9 @@ DrawLayerFull: .pool +@ void DrawTilesFromCacheF(int *hc, struct PicoEState *est) -.global DrawTilesFromCacheF @ int *hc +.global DrawTilesFromCacheF DrawTilesFromCacheF: stmfd sp!, {r4-r10,lr} @@ -580,14 +582,13 @@ DrawTilesFromCacheF: mov r9, #0xff000000 @ r9=prevcode=-1 mvn r6, #0 @ r6=prevy=-1 - ldr r4, =PicoDraw2FB @ r4=PicoDraw2FB - ldr r1, [r0], #4 @ read y offset - ldr r4, [r4] + ldr r4, [r1, #OFS_Draw2FB] + ldr r2, [r0], #4 @ read y offset mov r7, #328 - mla r1, r7, r1, r4 - sub r12, r1, #(328*8*START_ROW) @ r12=scrpos + mla r2, r7, r2, r4 + sub r12, r2, #(328*8*START_ROW) @ r12=scrpos - ldr r10, =(Pico+0x10000) @ r10=Pico.vram + ldr r10, [r1, #OFS_Pico_vram] mov r8, r0 @ hc mov r0, #0xf @@ -666,12 +667,14 @@ DrawTilesFromCacheF: @ @@@@@@@@@@@@@@@ @ (tile_start<<16)|row_start -.global DrawWindowFull @ int tstart, int tend, int prio +@ void DrawWindowFull(int start, int end, int prio, struct PicoEState *est) + +.global DrawWindowFull DrawWindowFull: stmfd sp!, {r4-r11,lr} - ldr r11, =(Pico+0x22228) @ Pico.video + ldr r11, [r3, #OFS_Pico_video] ldrb r12, [r11, #3] @ pvid->reg[3] mov r12, r12, lsl #10 @@ -686,11 +689,11 @@ DrawWindowFull: and r4, r0, #0xff mla r12, r5, r4, r12 @ nametab += nametab_step*start; + ldr r10, [r3, #OFS_Pico_vram] mov r4, r0, lsr #16 @ r4=start_cell_h add r7, r12, r4, lsl #1 @ fetch the first code now - ldr r10, =(Pico+0x10000) @ lr=Pico.vram ldrh r7, [r10, r7] cmp r2, r7, lsr #15 ldmnefd sp!, {r4-r11,pc} @ hack: simply assume that whole window uses same priority @@ -704,11 +707,10 @@ DrawWindowFull: mov r9, #0xff000000 @ r9=prevcode=-1 - ldr r11, =PicoDraw2FB @ r11=scrpos + ldr r11, [r3, #OFS_Draw2FB] and r4, r0, #0xff - ldr r11, [r11] - sub r4, r4, #START_ROW add r11, r11, #328*8 + sub r4, r4, #START_ROW add r11, r11, #8 mov r7, #328*8 @@ -873,8 +875,9 @@ DrawWindowFull: b 52b .endm +@ void DrawSpriteFull(unsigned int *sprite, struct PicoEState *est) -.global DrawSpriteFull @ unsigned int *sprite +.global DrawSpriteFull DrawSpriteFull: stmfd sp!, {r4-r11,lr} @@ -902,9 +905,8 @@ DrawSpriteFull: and r3, lr, #0x6000 mov r3, r3, lsr #9 @ r3=pal=((code>>9)&0x30); - ldr r11, =PicoDraw2FB @ r11=scrpos - ldr r10, =(Pico+0x10000) @ r10=Pico.vram - ldr r11, [r11] + ldr r11, [r1, #OFS_Draw2FB] + ldr r10, [r1, #OFS_Pico_vram] sub r1, r12, #(START_ROW*8) mov r0, #328 mla r11, r1, r0, r11 @ scrpos+=(sy-START_ROW*8)*328; diff --git a/pico/draw_arm.S b/pico/draw_arm.S index 23c522b0..71db183c 100644 --- a/pico/draw_arm.S +++ b/pico/draw_arm.S @@ -1497,7 +1497,7 @@ PicoDoHighPal555: PicoDoHighPal555_nopush: orr r9, r1, r0, lsl #31 @ 0:called from FinalizeLine555, 31: s/h - ldr r0, =HighPal + add r0, r10, #OFS_HighPal mov r1, #0 strb r1, [r8, #-0x1a] @ 0x2220e ~ dirtyPal @@ -1513,7 +1513,7 @@ PicoDoHighPal555_nopush: tst r9, #(1<<31) beq PicoDoHighPal555_end - ldr r3, =HighPal + add r3, r10, #OFS_HighPal @ shadowed pixels: mov r12, #0x008e @@ -1575,7 +1575,7 @@ FinalizeLine555: bne PicoDoHighPal555_nopush FinalizeLineRGB555_pal_done: - ldr r3, =HighPal + add r3, r10, #OFS_HighPal ldr r12, [r10, #OFS_rendstatus] eors r0, r0, #1 @ sh is 0 diff --git a/pico/mode4.c b/pico/mode4.c index 1f9adff5..4abace6b 100644 --- a/pico/mode4.c +++ b/pico/mode4.c @@ -249,7 +249,7 @@ void PicoLineMode4(int line) void PicoDoHighPal555M4(void) { unsigned int *spal=(void *)Pico.cram; - unsigned int *dpal=(void *)HighPal; + unsigned int *dpal=(void *)Pico.est.HighPal; unsigned int t; int i; @@ -267,7 +267,7 @@ void PicoDoHighPal555M4(void) t |= (t >> 4) & 0x08610861; *dpal = t; } - HighPal[0xe0] = 0; + Pico.est.HighPal[0xe0] = 0; } static void FinalizeLineRGB555M4(int line) diff --git a/pico/pico.c b/pico/pico.c index 4b05d6e0..5951b595 100644 --- a/pico/pico.c +++ b/pico/pico.c @@ -49,6 +49,7 @@ void PicoInit(void) Pico32xInit(); PicoDrawInit(); + PicoDraw2Init(); } // to be called once on emu exit diff --git a/pico/pico.h b/pico/pico.h index 6eb1dc0d..e2dde1b2 100644 --- a/pico/pico.h +++ b/pico/pico.h @@ -194,7 +194,6 @@ extern int PicoDrawMask; #define PDRAW_32_COLS (1<<8) // 32 column mode extern int rendstatus_old; extern int rendlines; -extern unsigned short HighPal[0x100]; // draw.c void PicoDrawUpdateHighPal(void); @@ -202,7 +201,6 @@ void PicoDrawSetInternalBuf(void *dest, int line_increment); // draw2.c // stuff below is optional -extern unsigned char *PicoDraw2FB; // buffer for fast renderer in format (8+320)x(8+224+8) (eights for borders) extern unsigned short *PicoCramHigh; // pointer to CRAM buff (0x40 shorts), converted to native device color (works only with 16bit for now) extern void (*PicoPrepareCram)(); // prepares PicoCramHigh for renderer to use diff --git a/pico/pico_int.h b/pico/pico_int.h index 4dbfa659..5d7bfbc3 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -333,12 +333,14 @@ struct PicoEState { int DrawScanline; int rendstatus; - void *DrawLineDest; // draw estination + void *DrawLineDest; // draw destination unsigned char *HighCol; int *HighPreSpr; void *Pico_video; void *Pico_vram; int *PicoOpt; + unsigned char *Draw2FB; + unsigned short HighPal[0x100]; }; // some assembly stuff depend on these, do not touch! @@ -601,6 +603,7 @@ extern void *DrawLineDestBase; extern int DrawLineDestIncrement; // draw2.c +void PicoDraw2Init(void); PICO_INTERNAL void PicoFrameFull(); // mode4.c diff --git a/pico/pico_int_o32.h b/pico/pico_int_o32.h index 4094697d..dc7aaa2a 100644 --- a/pico/pico_int_o32.h +++ b/pico/pico_int_o32.h @@ -7,3 +7,5 @@ #define OFS_Pico_video 0x14 #define OFS_Pico_vram 0x18 #define OFS_PicoOpt 0x1c +#define OFS_Draw2FB 0x20 +#define OFS_HighPal 0x24 diff --git a/platform/gizmondo/emu.c b/platform/gizmondo/emu.c index 3a6c861b..30c6651b 100644 --- a/platform/gizmondo/emu.c +++ b/platform/gizmondo/emu.c @@ -155,11 +155,11 @@ static void blit(const char *fps, const char *notice) } // a hack for VR if (PicoAHW & PAHW_SVP) - memset32((int *)(PicoDraw2FB+328*8+328*223), 0xe0e0e0e0, 328); + memset32((int *)(Pico.est.Draw2FB+328*8+328*223), 0xe0e0e0e0, 328); if (!(Pico.video.reg[12]&1)) lines_flags|=0x10000; if (currentConfig.EmuOpt&0x4000) lines_flags|=0x40000; // (Pico.m.frame_count&1)?0x20000:0x40000; - vidCpy8to16((unsigned short *)giz_screen+321*8, PicoDraw2FB+328*8, localPal, lines_flags); + vidCpy8to16((unsigned short *)giz_screen+321*8, Pico.est.Draw2FB+328*8, localPal, lines_flags); } else if (!(emu_opt&0x80)) { @@ -187,7 +187,7 @@ static void blit(const char *fps, const char *notice) if (!(Pico.video.reg[12]&1)) lines_flags|=0x10000; if (currentConfig.EmuOpt&0x4000) lines_flags|=0x40000; // (Pico.m.frame_count&1)?0x20000:0x40000; - vidCpy8to16((unsigned short *)giz_screen+321*8, PicoDraw2FB+328*8, localPal, lines_flags); + vidCpy8to16((unsigned short *)giz_screen+321*8, Pico.est.Draw2FB+328*8, localPal, lines_flags); } if (notice || (emu_opt & 2)) { diff --git a/platform/gp2x/emu.c b/platform/gp2x/emu.c index 683e2673..c0bc71f4 100644 --- a/platform/gp2x/emu.c +++ b/platform/gp2x/emu.c @@ -7,8 +7,8 @@ * - 8bpp tile renderer * In 32x mode: * - 32x layer is overlayed on top of 16bpp one - * - line internal one done on PicoDraw2FB, then mixed with 32x - * - tile internal one done on PicoDraw2FB, then mixed with 32x + * - line internal one done on .Draw2FB, then mixed with 32x + * - tile internal one done on .Draw2FB, then mixed with 32x */ #include @@ -311,8 +311,8 @@ static int make_local_pal_md(int fast_mode) pallen = 0x100; } else if (Pico.est.rendstatus & PDRAW_SONIC_MODE) { // mid-frame palette changes - bgr444_to_rgb32(localPal+0x40, HighPal); - bgr444_to_rgb32(localPal+0x80, HighPal+0x40); + bgr444_to_rgb32(localPal+0x40, Pico.est.HighPal); + bgr444_to_rgb32(localPal+0x80, Pico.est.HighPal+0x40); } else memcpy32(localPal+0x80, localPal, 0x40); // for spr prio mess @@ -355,9 +355,9 @@ void pemu_finalize_frame(const char *fps, const char *notice) } // a hack for VR if (PicoAHW & PAHW_SVP) - memset32((int *)(PicoDraw2FB+328*8+328*223), 0xe0e0e0e0, 328); + memset32((int *)(Pico.est.Draw2FB+328*8+328*223), 0xe0e0e0e0, 328); // do actual copy - vidcpyM2(g_screen_ptr, PicoDraw2FB+328*8, + vidcpyM2(g_screen_ptr, Pico.est.Draw2FB+328*8, !(Pico.video.reg[12] & 1), !(PicoOpt & POPT_DIS_32C_BORDER)); } else if (get_renderer() == RT_8BIT_ACC) diff --git a/platform/linux/emu.c b/platform/linux/emu.c index 91115ad6..5a97959b 100644 --- a/platform/linux/emu.c +++ b/platform/linux/emu.c @@ -70,8 +70,8 @@ void pemu_finalize_frame(const char *fps, const char *notice) { if (currentConfig.renderer != RT_16BIT && !(PicoAHW & PAHW_32X)) { unsigned short *pd = (unsigned short *)g_screen_ptr + 8 * g_screen_width; - unsigned char *ps = PicoDraw2FB + 328*8 + 8; - unsigned short *pal = HighPal; + unsigned char *ps = Pico.est.Draw2FB + 328*8 + 8; + unsigned short *pal = Pico.est.HighPal; int i, x; if (Pico.m.dirtyPal) PicoDrawUpdateHighPal(); @@ -101,7 +101,7 @@ static void apply_renderer(void) case RT_8BIT_ACC: PicoOpt &= ~POPT_ALT_RENDERER; PicoDrawSetOutFormat(PDF_8BIT, 0); - PicoDrawSetOutBuf(PicoDraw2FB + 8, 328); + PicoDrawSetOutBuf(Pico.est.Draw2FB + 8, 328); break; case RT_8BIT_FAST: PicoOpt |= POPT_ALT_RENDERER; diff --git a/platform/psp/emu.c b/platform/psp/emu.c index cb16d264..dd4381bb 100644 --- a/platform/psp/emu.c +++ b/platform/psp/emu.c @@ -36,7 +36,7 @@ int sceAudio_E0727056(int volume, void *buffer); // blocking output int sceAudioOutput2GetRestSample(); -//unsigned char *PicoDraw2FB = (unsigned char *)VRAM_CACHED_STUFF + 8; // +8 to be able to skip border with 1 quadword.. +//unsigned char *Draw2FB = (unsigned char *)VRAM_CACHED_STUFF + 8; // +8 to be able to skip border with 1 quadword.. int engineStateSuspend; #define PICO_PEN_ADJUST_X 4 @@ -400,9 +400,9 @@ void blit1(void) int i; unsigned char *pd; // clear top and bottom trash - for (pd = PicoDraw2FB+8, i = 8; i > 0; i--, pd += 512) + for (pd = Pico.est.Draw2FB+8, i = 8; i > 0; i--, pd += 512) memset32((int *)pd, 0xe0e0e0e0, 320/4); - for (pd = PicoDraw2FB+512*232+8, i = 8; i > 0; i--, pd += 512) + for (pd = Pico.est.Draw2FB+512*232+8, i = 8; i > 0; i--, pd += 512) memset32((int *)pd, 0xe0e0e0e0, 320/4); } diff --git a/tools/mkoffsets.c b/tools/mkoffsets.c index e9eb3c5f..a5c475f2 100644 --- a/tools/mkoffsets.c +++ b/tools/mkoffsets.c @@ -29,6 +29,8 @@ int main(int argc, char *argv[]) DUMP(f, Pico_video); DUMP(f, Pico_vram); DUMP(f, PicoOpt); + DUMP(f, Draw2FB); + DUMP(f, HighPal); fclose(f); return 0; From 34e424aa2709dfc457d751f55cdbf5b881ae3ef4 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 6 Aug 2017 23:02:28 +0300 Subject: [PATCH 0050/1110] remove dead code --- Makefile | 2 - Makefile.libretro | 2 - jni/Android.mk | 2 - pico/cd/mcd_arm.s | 185 ------------------------------------- platform/common/common.mak | 5 - platform/gizmondo/Makefile | 1 - 6 files changed, 197 deletions(-) delete mode 100644 pico/cd/mcd_arm.s diff --git a/Makefile b/Makefile index 30b246fd..ff8e0327 100644 --- a/Makefile +++ b/Makefile @@ -42,7 +42,6 @@ asm_memory ?= 1 asm_render ?= 1 asm_ym2612 ?= 1 asm_misc ?= 1 -asm_cdpico ?= 1 asm_cdmemory ?= 1 asm_mix ?= 1 else # if not arm @@ -54,7 +53,6 @@ endif ifneq "$(use_cyclone)" "1" # due to CPU stop flag access -asm_cdpico = 0 asm_cdmemory = 0 endif diff --git a/Makefile.libretro b/Makefile.libretro index 96e4fe91..5d5f4729 100644 --- a/Makefile.libretro +++ b/Makefile.libretro @@ -25,7 +25,6 @@ asm_memory = 0 asm_render = 0 asm_ym2612 = 0 asm_misc = 0 -asm_cdpico = 0 asm_cdmemory = 0 asm_mix = 0 @@ -138,7 +137,6 @@ asm_memory = 1 asm_render = 1 asm_ym2612 = 1 asm_misc = 1 -asm_cdpico = 1 asm_cdmemory = 1 asm_mix = 1 endif diff --git a/jni/Android.mk b/jni/Android.mk index 1b8b9563..91b31fd7 100644 --- a/jni/Android.mk +++ b/jni/Android.mk @@ -24,7 +24,6 @@ asm_memory = 0 asm_render = 0 asm_ym2612 = 0 asm_misc = 0 -asm_cdpico = 0 asm_cdmemory = 0 asm_mix = 0 @@ -43,7 +42,6 @@ ifeq ($(TARGET_ARCH),arm) asm_render = 1 asm_ym2612 = 1 asm_misc = 1 - asm_cdpico = 1 asm_cdmemory = 1 asm_mix = 1 else diff --git a/pico/cd/mcd_arm.s b/pico/cd/mcd_arm.s deleted file mode 100644 index 9159e28b..00000000 --- a/pico/cd/mcd_arm.s +++ /dev/null @@ -1,185 +0,0 @@ -@* -@* CPU scheduling code -@* (C) notaz, 2007-2008 -@* -@* This work is licensed under the terms of MAME license. -@* See COPYING file in the top-level directory. -@* - -@ SekRunPS runs PicoCpuCM68k and PicoCpuCS68k interleaved in steps of PS_STEP_M68K -@ cycles. This is done without calling CycloneRun and jumping directly to -@ Cyclone code to avoid pushing/popping all the registers every time. - - -.equiv PS_STEP_M68K, ((488<<16)/20) @ ~24 - -@ .extern is ignored by gas, we add these here just to see what we depend on. -.extern CycloneJumpTab -.extern CycloneDoInterrupt -.extern PicoCpuCM68k -.extern PicoCpuCS68k -.extern SekCycleAim -.extern SekCycleCnt -.extern SekCycleAimS68k -.extern SekCycleCntS68k - - -.text -.align 4 - - -.global SekRunPS @ cyc_m68k, cyc_s68k - -SekRunPS: - stmfd sp!, {r4-r8,r10,r11,lr} - sub sp, sp, #2*4 @ sp[0] = main_cycle_cnt, sp[4] = run_cycle_cnt - - @ override CycloneEnd for both contexts - ldr r7, =PicoCpuCM68k - ldr lr, =PicoCpuCS68k - ldr r2, =CycloneEnd_M68k - ldr r3, =CycloneEnd_S68k - str r2, [r7,#0x98] - str r3, [lr,#0x98] - - @ update aims - ldr r8, =SekCycleAim - ldr r10,=SekCycleAimS68k - ldr r2, [r8] - ldr r3, [r10] - add r2, r2, r0 - add r3, r3, r1 - str r2, [r8] - str r3, [r10] - - ldr r6, =CycloneJumpTab - ldr r1, =SekCycleCnt - ldr r0, =((488<<16)-PS_STEP_M68K) - str r6, [r7,#0x54] - str r6, [lr,#0x54] @ make copies to avoid literal pools - - @ schedule m68k for the first time.. - ldr r1, [r1] - str r0, [sp] @ main target 'left cycle' counter - sub r1, r2, r1 - subs r5, r1, r0, asr #16 - ble schedule_s68k @ m68k has not enough cycles - - str r5, [sp,#4] @ run_cycle_cnt - b CycloneRunLocal - - - -CycloneEnd_M68k: - ldr r3, =SekCycleCnt - ldr r0, [sp,#4] @ run_cycle_cnt - ldr r1, [r3] - str r4, [r7,#0x40] ;@ Save Current PC + Memory Base - strb r10,[r7,#0x46] ;@ Save Flags (NZCV) - sub r0, r0, r5 @ subtract leftover cycles (which should be negative) - add r0, r0, r1 - str r0, [r3] - -schedule_s68k: - ldr r8, =SekCycleCntS68k - ldr r10,=SekCycleAimS68k - ldr r3, [sp] - ldr r8, [r8] - ldr r10,[r10] - - sub r0, r10, r8 - mov r2, r3 - add r3, r3, r2, asr #1 - add r3, r3, r2, asr #3 @ cycn_s68k = (cycn + cycn/2 + cycn/8) - - subs r5, r0, r3, asr #16 - ble schedule_m68k @ s68k has not enough cycles - - ldr r7, =PicoCpuCS68k - str r5, [sp,#4] @ run_cycle_cnt - b CycloneRunLocal - - - -CycloneEnd_S68k: - ldr r3, =SekCycleCntS68k - ldr r0, [sp,#4] @ run_cycle_cnt - ldr r1, [r3] - str r4, [r7,#0x40] ;@ Save Current PC + Memory Base - strb r10,[r7,#0x46] ;@ Save Flags (NZCV) - sub r0, r0, r5 @ subtract leftover cycles (should be negative) - add r0, r0, r1 - str r0, [r3] - -schedule_m68k: - ldr r1, =PS_STEP_M68K - ldr r3, [sp] @ main_cycle_cnt - ldr r8, =SekCycleCnt - ldr r10,=SekCycleAim - subs r3, r3, r1 - bmi SekRunPS_end - - ldr r8, [r8] - ldr r10,[r10] - str r3, [sp] @ update main_cycle_cnt - sub r0, r10, r8 - - subs r5, r0, r3, asr #16 - ble schedule_s68k @ m68k has not enough cycles - - ldr r7, =PicoCpuCM68k - str r5, [sp,#4] @ run_cycle_cnt - b CycloneRunLocal - - - -SekRunPS_end: - ldr r7, =PicoCpuCM68k - ldr lr, =PicoCpuCS68k - mov r0, #0 - str r0, [r7,#0x98] @ remove CycloneEnd handler - str r0, [lr,#0x98] - @ return - add sp, sp, #2*4 - ldmfd sp!, {r4-r8,r10,r11,pc} - - - -CycloneRunLocal: - ;@ r0-3 = Temporary registers - ldr r4,[r7,#0x40] ;@ r4 = Current PC + Memory Base - ;@ r5 = Cycles - ;@ r6 = Opcode Jump table - ;@ r7 = Pointer to Cpu Context - ;@ r8 = Current Opcode - ldrb r10,[r7,#0x46];@ r10 = Flags (NZCV) - ldr r1,[r7,#0x44] ;@ get SR high and IRQ level - orr r10,r10,r10,lsl #28 ;@ r10 = Flags 0xf0000000, cpsr format - -;@ CheckInterrupt: - movs r0,r1,lsr #24 ;@ Get IRQ level - beq NoIntsLocal - cmp r0,#6 ;@ irq>6 ? - andle r1,r1,#7 ;@ Get interrupt mask - cmple r0,r1 ;@ irq<=6: Is irq<=mask ? - bgt CycloneDoInterrupt -NoIntsLocal: - -;@ Check if our processor is in special state -;@ and jump to opcode handler if not - ldr r0,[r7,#0x58] ;@ state_flags - ldrh r8,[r4],#2 ;@ Fetch first opcode - tst r0,#0x03 ;@ special state? - andeq r10,r10,#0xf0000000 - ldreq pc,[r6,r8,asl #2] ;@ Jump to opcode handler - -CycloneSpecial2: - tst r0,#2 ;@ tracing? - bne CycloneDoTrace -;@ stopped or halted - sub r4,r4,#2 - ldr r1,[r7,#0x98] - mov r5,#0 - bx r1 - -@ vim:filetype=armasm diff --git a/platform/common/common.mak b/platform/common/common.mak index fb59ecf3..0cf8a7e4 100644 --- a/platform/common/common.mak +++ b/platform/common/common.mak @@ -8,7 +8,6 @@ asm_memory = 0 asm_render = 0 asm_ym2612 = 0 asm_misc = 0 -asm_cdpico = 0 asm_cdmemory = 0 asm_mix = 0 endif @@ -61,10 +60,6 @@ DEFINES += _ASM_MISC_C SRCS_COMMON += $(R)pico/misc_arm.s SRCS_COMMON += $(R)pico/cd/misc_arm.s endif -ifeq "$(asm_cdpico)" "1" -DEFINES += _ASM_CD_PICO_C -SRCS_COMMON += $(R)pico/cd/mcd_arm.s -endif ifeq "$(asm_cdmemory)" "1" DEFINES += _ASM_CD_MEMORY_C SRCS_COMMON += $(R)pico/cd/memory_arm.s diff --git a/platform/gizmondo/Makefile b/platform/gizmondo/Makefile index d7aab76a..7df468d6 100644 --- a/platform/gizmondo/Makefile +++ b/platform/gizmondo/Makefile @@ -7,7 +7,6 @@ asm_memory = 1 asm_render = 1 asm_ym2612 = 1 asm_misc = 1 -asm_cdpico = 1 asm_cdmemory = 1 amalgamate = 0 #profile = 1 From ae7830aae15ce83a9256fb3f893efef00e53a105 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 6 Aug 2017 23:22:53 +0300 Subject: [PATCH 0051/1110] get some gp2x stuff out of the way for others --- Makefile | 3 +++ pico/sound/{mix_arm.s => mix_arm.S} | 3 +++ platform/common/common.mak | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) rename pico/sound/{mix_arm.s => mix_arm.S} (99%) diff --git a/Makefile b/Makefile index ff8e0327..ca7dc49d 100644 --- a/Makefile +++ b/Makefile @@ -200,6 +200,9 @@ tools/textfilter: tools/textfilter.c .s.o: $(CC) $(CFLAGS) -c $< -o $@ +.S.o: + $(CC) $(CFLAGS) -c $< -o $@ + # special flags - perhaps fix this someday instead? pico/draw.o: CFLAGS += -fno-strict-aliasing pico/draw2.o: CFLAGS += -fno-strict-aliasing diff --git a/pico/sound/mix_arm.s b/pico/sound/mix_arm.S similarity index 99% rename from pico/sound/mix_arm.s rename to pico/sound/mix_arm.S index 60a09495..5088e61b 100644 --- a/pico/sound/mix_arm.s +++ b/pico/sound/mix_arm.S @@ -299,6 +299,7 @@ m32_16_mo_no_unal2: bx lr +#ifdef __GP2X__ .data .align 4 @@ -366,4 +367,6 @@ m32_16l_st_l_no_unal2: ldmfd sp!, {r4-r9,lr} bx lr +#endif /* __GP2X__ */ + @ vim:filetype=armasm diff --git a/platform/common/common.mak b/platform/common/common.mak index 0cf8a7e4..f89d7920 100644 --- a/platform/common/common.mak +++ b/platform/common/common.mak @@ -69,7 +69,7 @@ DEFINES += _ASM_32X_DRAW SRCS_COMMON += $(R)pico/32x/draw_arm.s endif ifeq "$(asm_mix)" "1" -SRCS_COMMON += $(R)pico/sound/mix_arm.s +SRCS_COMMON += $(R)pico/sound/mix_arm.S endif endif # ARCH=arm From 20a10d3ed94e12bfc0f415e186f0da5bc4efac49 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 8 Aug 2017 02:25:37 +0300 Subject: [PATCH 0052/1110] android: solve "text segment is not shareable" Some asm is disabled and some performance is lost by this. --- cpu/cyclone | 2 +- jni/Android.mk | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/cpu/cyclone b/cpu/cyclone index 355815eb..66dda842 160000 --- a/cpu/cyclone +++ b/cpu/cyclone @@ -1 +1 @@ -Subproject commit 355815ebb5b1c60916f66d043a43f3af7839589f +Subproject commit 66dda842eae01f47f5389b931ec9567fb0bbb6a1 diff --git a/jni/Android.mk b/jni/Android.mk index 91b31fd7..042c1f74 100644 --- a/jni/Android.mk +++ b/jni/Android.mk @@ -34,15 +34,19 @@ ifeq ($(TARGET_ARCH),arm) endif use_cyclone = 1 - use_drz80 = 1 + + # texrels, -perf ~~8% + use_drz80 = 0 + use_cz80 = 1 + use_sh2drc = 1 use_svpdrc = 1 - asm_memory = 1 +# asm_memory = 1 # texrels, -perf negligible asm_render = 1 - asm_ym2612 = 1 +# asm_ym2612 = 1 # texrels, -perf ~~4% asm_misc = 1 - asm_cdmemory = 1 +# asm_cdmemory = 1 # texrels asm_mix = 1 else use_fame = 1 From 61290a355398cc8f17b183700f637dc65a8b8ab4 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 11 Aug 2017 02:12:57 +0300 Subject: [PATCH 0053/1110] drz80: drop fast_sp for compatibility no measurable improvement in most cases anyway --- cpu/DrZ80/drz80.s | 4 +++- pico/z80if.c | 19 +++++++++++++++++-- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/cpu/DrZ80/drz80.s b/cpu/DrZ80/drz80.s index b92b2c2d..c2a64df3 100644 --- a/cpu/DrZ80/drz80.s +++ b/cpu/DrZ80/drz80.s @@ -12,7 +12,7 @@ .global DrZ80Ver .equiv INTERRUPT_MODE, 0 ;@0 = Use internal int handler, 1 = Use Mames int handler - .equiv FAST_Z80SP, 1 ;@0 = Use mem functions for stack pointer, 1 = Use direct mem pointer + .equiv FAST_Z80SP, 0 ;@0 = Use mem functions for stack pointer, 1 = Use direct mem pointer .equiv UPDATE_CONTEXT, 0 .equiv DRZ80_XMAP, 1 .equiv DRZ80_XMAP_MORE_INLINE, 1 @@ -212,6 +212,7 @@ z80_bad_jump: mov z80pc,r0 ldmfd sp!,{r3,r12,pc} +.if FAST_Z80SP z80_xmap_rebase_sp: ldr r1,[cpucontext,#z80_read8] sub r2,r0,#1 @@ -228,6 +229,7 @@ z80_xmap_rebase_sp: ldr pc,[cpucontext,#z80_rebaseSP] mov z80sp,r0 ldmfd sp!,{r3,r12,pc} +.endif @ FAST_Z80SP .endif @ DRZ80_XMAP diff --git a/pico/z80if.c b/pico/z80if.c index 8db4aa5c..90b3f7a9 100644 --- a/pico/z80if.c +++ b/pico/z80if.c @@ -14,9 +14,11 @@ uptr z80_read_map [0x10000 >> Z80_MEM_SHIFT]; uptr z80_write_map[0x10000 >> Z80_MEM_SHIFT]; #ifdef _USE_DRZ80 -struct DrZ80 drZ80; +// this causes trouble in some cases, like doukutsu putting sp in bank area +// no perf difference for most, upto 1-2% for some others +//#define FAST_Z80SP -static u32 drz80_sp_base; +struct DrZ80 drZ80; static void drz80_load_pcsp(u32 pc, u32 sp) { @@ -28,6 +30,8 @@ static void drz80_load_pcsp(u32 pc, u32 sp) drZ80.Z80PC_BASE <<= 1; drZ80.Z80PC = drZ80.Z80PC_BASE + pc; } + drZ80.Z80SP = sp; +#ifdef FAST_Z80SP drZ80.Z80SP_BASE = z80_read_map[sp >> Z80_MEM_SHIFT]; if (drZ80.Z80SP_BASE & (1<<31)) { elprintf(EL_STATUS|EL_ANOMALY, "load_pcsp: bad SP: %04x", sp); @@ -37,6 +41,7 @@ static void drz80_load_pcsp(u32 pc, u32 sp) drZ80.Z80SP_BASE <<= 1; drZ80.Z80SP = drZ80.Z80SP_BASE + sp; } +#endif } // called only if internal xmap rebase fails @@ -47,13 +52,19 @@ static unsigned int dz80_rebase_pc(unsigned short pc) return drZ80.Z80PC_BASE; } +#ifdef FAST_Z80SP +static u32 drz80_sp_base; + static unsigned int dz80_rebase_sp(unsigned short sp) { elprintf(EL_STATUS|EL_ANOMALY, "dz80_rebase_sp: fail on %04x", sp); drZ80.Z80SP_BASE = z80_read_map[drz80_sp_base >> Z80_MEM_SHIFT] << 1; return drZ80.Z80SP_BASE + (1 << Z80_MEM_SHIFT) - 0x100; } +#else +#define dz80_rebase_sp NULL #endif +#endif // _USE_DRZ80 void z80_init(void) @@ -91,9 +102,11 @@ void z80_reset(void) drZ80.Z80IX = 0xFFFF << 16; drZ80.Z80IY = 0xFFFF << 16; */ +#ifdef FAST_Z80SP // drZ80 is locked in single bank drz80_sp_base = (PicoAHW & PAHW_SMS) ? 0xc000 : 0x0000; drZ80.Z80SP_BASE = z80_read_map[drz80_sp_base >> Z80_MEM_SHIFT] << 1; +#endif if (PicoAHW & PAHW_SMS) drZ80.Z80SP = drZ80.Z80SP_BASE + 0xdff0; // simulate BIOS // XXX: since we use direct SP pointer, it might make sense to force it to RAM, @@ -286,3 +299,5 @@ void z80_debug(char *dstr) sprintf(dstr, "Z80 state: PC: %04x SP: %04x\n", (unsigned int)(CZ80.PC - CZ80.BasePC), CZ80.SP.W); #endif } + +// vim:ts=2:sw=2:expandtab From 6a5b1b362ecf78ce2925068a5d938d319ff583a3 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 18 Aug 2017 03:44:25 +0300 Subject: [PATCH 0054/1110] sh2: handle some branch exceptions --- cpu/sh2/compiler.c | 31 +++++++++++++++++++++++++++---- cpu/sh2/compiler.h | 1 + cpu/sh2/mame/sh2pico.c | 12 ++++++++++++ 3 files changed, 40 insertions(+), 4 deletions(-) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 25ba9d2f..3a2b708c 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -2590,8 +2590,9 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) default: default_: - elprintf_sh2(sh2, EL_ANOMALY, - "drc: illegal op %04x @ %08x", op, pc - 2); + if (!(op_flags[i] & OF_B_IN_DS)) + elprintf_sh2(sh2, EL_ANOMALY, + "drc: illegal op %04x @ %08x", op, pc - 2); tmp = rcache_get_reg(SHR_SP, RC_GR_RMW); emith_sub_r_imm(tmp, 4*2); @@ -2604,10 +2605,16 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) // push PC rcache_get_reg_arg(0, SHR_SP); tmp = rcache_get_tmp_arg(1); - emith_move_r_imm(tmp, pc - 2); + if (drcf.pending_branch_indirect) { + tmp2 = rcache_get_reg(SHR_PC, RC_GR_READ); + emith_move_r_r(tmp, tmp2); + } + else + emith_move_r_imm(tmp, pc - 2); emit_memhandler_write(2); // obtain new PC - emit_memhandler_read_rr(SHR_PC, SHR_VBR, 4 * 4, 2); + v = (op_flags[i] & OF_B_IN_DS) ? 6 : 4; + emit_memhandler_read_rr(SHR_PC, SHR_VBR, v * 4, 2); // indirect jump -> back to dispatcher rcache_flush(); emith_jump(sh2_drc_dispatcher); @@ -4062,6 +4069,22 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, is_slave ? 's' : 'm', op, pc); break; } + + if (op_flags[i] & OF_DELAY_OP) { + switch (opd->op) { + case OP_BRANCH: + case OP_BRANCH_CT: + case OP_BRANCH_CF: + case OP_BRANCH_R: + case OP_BRANCH_RF: + elprintf(EL_ANOMALY, "%csh2 drc: branch in DS @ %08x", + is_slave ? 's' : 'm', pc); + opd->op = OP_UNHANDLED; + op_flags[i] |= OF_B_IN_DS; + next_is_delay = 0; + break; + } + } } i_end = i; end_pc = pc; diff --git a/cpu/sh2/compiler.h b/cpu/sh2/compiler.h index ef1944b4..61d8d2da 100644 --- a/cpu/sh2/compiler.h +++ b/cpu/sh2/compiler.h @@ -20,6 +20,7 @@ void sh2_drc_frame(void); #define OF_BTARGET (1 << 1) #define OF_T_SET (1 << 2) // T is known to be set #define OF_T_CLEAR (1 << 3) // ... clear +#define OF_B_IN_DS (1 << 4) void scan_block(unsigned int base_pc, int is_slave, unsigned char *op_flags, unsigned int *end_pc, diff --git a/cpu/sh2/mame/sh2pico.c b/cpu/sh2/mame/sh2pico.c index a3ad9f47..174d4691 100644 --- a/cpu/sh2/mame/sh2pico.c +++ b/cpu/sh2/mame/sh2pico.c @@ -122,6 +122,18 @@ int sh2_execute_interpreter(SH2 *sh2, int cycles) { sh2->ppc = sh2->delay; opcode = RW(sh2, sh2->delay); + + // TODO: more branch types + if ((opcode >> 13) == 5) { // BRA/BSR + sh2->r[15] -= 4; + WL(sh2, sh2->r[15], sh2->sr); + sh2->r[15] -= 4; + WL(sh2, sh2->r[15], sh2->pc); + sh2->pc = RL(sh2, sh2->vbr + 6 * 4); + sh2->icount -= 5; + opcode = 9; // NOP + } + sh2->pc -= 2; } else From 61c4e5117aab08e03144f169f610275a2282cf75 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 19 Aug 2017 00:38:03 +0300 Subject: [PATCH 0055/1110] 32x: skip unnecessary bios work in cd mode --- pico/32x/32x.c | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/pico/32x/32x.c b/pico/32x/32x.c index 26162e49..e62c8209 100644 --- a/pico/32x/32x.c +++ b/pico/32x/32x.c @@ -134,28 +134,31 @@ void p32x_reset_sh2s(void) // if we don't have BIOS set, perform it's work here. // MSH2 if (p32x_bios_m == NULL) { - unsigned int idl_src, idl_dst, idl_size; // initial data load - unsigned int vbr; - - // initial data - idl_src = HWSWAP(*(unsigned int *)(Pico.rom + 0x3d4)) & ~0xf0000000; - idl_dst = HWSWAP(*(unsigned int *)(Pico.rom + 0x3d8)) & ~0xf0000000; - idl_size= HWSWAP(*(unsigned int *)(Pico.rom + 0x3dc)); - if (idl_size > Pico.romsize || idl_src + idl_size > Pico.romsize || - idl_size > 0x40000 || idl_dst + idl_size > 0x40000 || (idl_src & 3) || (idl_dst & 3)) { - elprintf(EL_STATUS|EL_ANOMALY, "32x: invalid initial data ptrs: %06x -> %06x, %06x", - idl_src, idl_dst, idl_size); - } - else - memcpy(Pico32xMem->sdram + idl_dst, Pico.rom + idl_src, idl_size); - - // GBR/VBR - vbr = HWSWAP(*(unsigned int *)(Pico.rom + 0x3e8)); sh2_set_gbr(0, 0x20004000); - sh2_set_vbr(0, vbr); - // checksum and M_OK - Pico32x.regs[0x28 / 2] = *(unsigned short *)(Pico.rom + 0x18e); + if (!(PicoAHW & PAHW_MCD)) { + unsigned int idl_src, idl_dst, idl_size; // initial data load + unsigned int vbr; + + // initial data + idl_src = HWSWAP(*(unsigned int *)(Pico.rom + 0x3d4)) & ~0xf0000000; + idl_dst = HWSWAP(*(unsigned int *)(Pico.rom + 0x3d8)) & ~0xf0000000; + idl_size= HWSWAP(*(unsigned int *)(Pico.rom + 0x3dc)); + if (idl_size > Pico.romsize || idl_src + idl_size > Pico.romsize || + idl_size > 0x40000 || idl_dst + idl_size > 0x40000 || (idl_src & 3) || (idl_dst & 3)) { + elprintf(EL_STATUS|EL_ANOMALY, "32x: invalid initial data ptrs: %06x -> %06x, %06x", + idl_src, idl_dst, idl_size); + } + else + memcpy(Pico32xMem->sdram + idl_dst, Pico.rom + idl_src, idl_size); + + // VBR + vbr = HWSWAP(*(unsigned int *)(Pico.rom + 0x3e8)); + sh2_set_vbr(0, vbr); + + // checksum and M_OK + Pico32x.regs[0x28 / 2] = *(unsigned short *)(Pico.rom + 0x18e); + } // program will set M_OK } From 0104fd873b44d7352336fa494246803addffb66a Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 12 Sep 2017 01:25:26 +0300 Subject: [PATCH 0056/1110] musashi: fix build on newer gcc --- cpu/musashi/m68kcpu.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpu/musashi/m68kcpu.h b/cpu/musashi/m68kcpu.h index 83e92c5f..63055cbe 100644 --- a/cpu/musashi/m68kcpu.h +++ b/cpu/musashi/m68kcpu.h @@ -1623,7 +1623,7 @@ INLINE void m68ki_stack_frame_buserr(uint sr) /* Format 8 stack frame (68010). * 68010 only. This is the 29 word bus/address error frame. */ -void m68ki_stack_frame_1000(uint pc, uint sr, uint vector) +INLINE void m68ki_stack_frame_1000(uint pc, uint sr, uint vector) { /* VERSION * NUMBER @@ -1677,7 +1677,7 @@ void m68ki_stack_frame_1000(uint pc, uint sr, uint vector) * if the error happens at an instruction boundary. * PC stacked is address of next instruction. */ -void m68ki_stack_frame_1010(uint sr, uint vector, uint pc) +INLINE void m68ki_stack_frame_1010(uint sr, uint vector, uint pc) { /* INTERNAL REGISTER */ m68ki_push_16(0); @@ -1724,7 +1724,7 @@ void m68ki_stack_frame_1010(uint sr, uint vector, uint pc) * if the error happens during instruction execution. * PC stacked is address of instruction in progress. */ -void m68ki_stack_frame_1011(uint sr, uint vector, uint pc) +INLINE void m68ki_stack_frame_1011(uint sr, uint vector, uint pc) { /* INTERNAL REGISTERS (18 words) */ m68ki_push_32(0); @@ -1968,7 +1968,7 @@ m68k_read_memory_8(0x00ffff01); /* Service an interrupt request and start exception processing */ -void m68ki_exception_interrupt(uint int_level) +INLINE void m68ki_exception_interrupt(uint int_level) { uint vector; uint sr; From e01cf375cbae14cd9f881e54915379f7fe3b156c Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 12 Sep 2017 01:26:49 +0300 Subject: [PATCH 0057/1110] musashi: run same amount of cycles as other cores for consistency --- cpu/musashi/m68kcpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpu/musashi/m68kcpu.c b/cpu/musashi/m68kcpu.c index 72bb217f..38469f32 100644 --- a/cpu/musashi/m68kcpu.c +++ b/cpu/musashi/m68kcpu.c @@ -808,7 +808,7 @@ int m68k_execute(int num_cycles) // notaz m68ki_trace_t1(); - while(GET_CYCLES() >= 0) + while(GET_CYCLES() > 0) // do { /* Set tracing accodring to T1. (T0 is done inside instruction) */ From a39743e3153322ec0e18bb0d05cd16af1d9a6d79 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 13 Sep 2017 01:46:30 +0300 Subject: [PATCH 0058/1110] musashi: don't generate unneeded handlers at least move16 was incorrectly hooked up --- cpu/musashi/m68kmake.c | 5 +++++ pico/sek.c | 1 + 2 files changed, 6 insertions(+) diff --git a/cpu/musashi/m68kmake.c b/cpu/musashi/m68kmake.c index f3157340..361481b9 100644 --- a/cpu/musashi/m68kmake.c +++ b/cpu/musashi/m68kmake.c @@ -1043,6 +1043,11 @@ void process_opcode_handlers(FILE* filep) if(opinfo == NULL) error_exit("Unable to find matching table entry for %s", func_name); +#if 1 /* PD hack: 000 only */ + if (opinfo->cpus[0] == UNSPECIFIED_CH) + continue; +#endif + replace->length = 0; /* Generate opcode variants */ diff --git a/pico/sek.c b/pico/sek.c index 86a351f6..4525c7c9 100644 --- a/pico/sek.c +++ b/pico/sek.c @@ -549,6 +549,7 @@ breakloop: printf("D%d: %08x A%d: %08x\n", i, x68k->dar[i], i, x68k->dar[i + 8]); printf("PC: %08x, %08x\n", x68k->pc, x68k->pc_prev); + printf("SR: %04x\n", x68k->sr); PDebugDumpMem(); exit(1); From 8d998330d08dedb1d8f364c7714394da93c615ac Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 13 Sep 2017 01:55:25 +0300 Subject: [PATCH 0059/1110] famec: improve trace a bit or break it (nobody uses it anyway), just want m68k_opcode_sizes to pass --- cpu/fame/famec.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/cpu/fame/famec.c b/cpu/fame/famec.c index 9e9dc153..83719304 100644 --- a/cpu/fame/famec.c +++ b/cpu/fame/famec.c @@ -742,6 +742,9 @@ static FAMEC_EXTRA_INLINE u32 execute_exception(s32 vect, u32 oldPC, u32 oldSR) //u32 oldSR = GET_SR; m68kcontext.io_cycle_counter -= exception_cycle_table[vect]; +#ifdef FAMEC_EMULATE_TRACE + m68kcontext.execinfo &= ~FM68K_EMULATE_TRACE; +#endif PRE_IO @@ -763,6 +766,7 @@ static FAMEC_EXTRA_INLINE u32 execute_exception(s32 vect, u32 oldPC, u32 oldSR) /* adjust SR */ flag_S = M68K_SR_S; + flag_T = 0; #ifndef FAMEC_32BIT_PC newPC&=M68K_ADR_MASK @@ -916,12 +920,11 @@ famec_Exec: #ifdef FAMEC_EMULATE_TRACE if (m68kcontext.execinfo & FM68K_EMULATE_TRACE) { - m68kcontext.io_cycle_counter = cycles_needed; + m68kcontext.io_cycle_counter += cycles_needed; cycles_needed = 0; m68kcontext.execinfo &= ~FM68K_EMULATE_TRACE; m68kcontext.execinfo |= FM68K_DO_TRACE; SET_PC(execute_exception(M68K_TRACE_EX, GET_PC, GET_SR)); - flag_T=0; if (m68kcontext.io_cycle_counter > 0) { //NEXT @@ -933,7 +936,7 @@ famec_Exec: if (cycles_needed != 0) { u32 line; - m68kcontext.io_cycle_counter = cycles_needed; + m68kcontext.io_cycle_counter += cycles_needed; cycles_needed = 0; if (m68kcontext.io_cycle_counter <= 0) goto famec_End; line=interrupt_chk__(); From 5c5d89adbb0cd269828496350d49ddc8e7be589f Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 13 Sep 2017 01:57:37 +0300 Subject: [PATCH 0060/1110] famec: fix CHK --- cpu/fame/famec_opcodes.h | 129 +++++++++++++++++++-------------------- 1 file changed, 64 insertions(+), 65 deletions(-) diff --git a/cpu/fame/famec_opcodes.h b/cpu/fame/famec_opcodes.h index c690b45c..0d670040 100644 --- a/cpu/fame/famec_opcodes.h +++ b/cpu/fame/famec_opcodes.h @@ -19573,12 +19573,11 @@ RET(14) // CHK OPCODE(0x4180) { - u32 adr, res; - u32 src, dst; + s32 src, res; - src = DREGu16((Opcode >> 0) & 7); - res = DREGu16((Opcode >> 9) & 7); - if (((s32)res < 0) || (res > src)) + src = DREGs16((Opcode >> 0) & 7); + res = DREGs16((Opcode >> 9) & 7); + if ((res < 0) || (res > src)) { flag_N = res >> 8; SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); @@ -19589,14 +19588,14 @@ RET(10) // CHK OPCODE(0x4190) { - u32 adr, res; - u32 src, dst; + s32 src, res; + u32 adr; adr = AREG((Opcode >> 0) & 7); PRE_IO - READ_WORD_F(adr, src) - res = DREGu16((Opcode >> 9) & 7); - if (((s32)res < 0) || (res > src)) + READSX_WORD_F(adr, src) + res = DREGs16((Opcode >> 9) & 7); + if ((res < 0) || (res > src)) { flag_N = res >> 8; SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); @@ -19608,15 +19607,15 @@ RET(14) // CHK OPCODE(0x4198) { - u32 adr, res; - u32 src, dst; + s32 src, res; + u32 adr; adr = AREG((Opcode >> 0) & 7); AREG((Opcode >> 0) & 7) += 2; PRE_IO - READ_WORD_F(adr, src) - res = DREGu16((Opcode >> 9) & 7); - if (((s32)res < 0) || (res > src)) + READSX_WORD_F(adr, src) + res = DREGs16((Opcode >> 9) & 7); + if ((res < 0) || (res > src)) { flag_N = res >> 8; SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); @@ -19628,15 +19627,15 @@ RET(14) // CHK OPCODE(0x41A0) { - u32 adr, res; - u32 src, dst; + s32 src, res; + u32 adr; adr = AREG((Opcode >> 0) & 7) - 2; AREG((Opcode >> 0) & 7) = adr; PRE_IO - READ_WORD_F(adr, src) - res = DREGu16((Opcode >> 9) & 7); - if (((s32)res < 0) || (res > src)) + READSX_WORD_F(adr, src) + res = DREGs16((Opcode >> 9) & 7); + if ((res < 0) || (res > src)) { flag_N = res >> 8; SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); @@ -19648,15 +19647,15 @@ RET(16) // CHK OPCODE(0x41A8) { - u32 adr, res; - u32 src, dst; + s32 src, res; + u32 adr; FETCH_SWORD(adr); adr += AREG((Opcode >> 0) & 7); PRE_IO - READ_WORD_F(adr, src) - res = DREGu16((Opcode >> 9) & 7); - if (((s32)res < 0) || (res > src)) + READSX_WORD_F(adr, src) + res = DREGs16((Opcode >> 9) & 7); + if ((res < 0) || (res > src)) { flag_N = res >> 8; SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); @@ -19668,15 +19667,15 @@ RET(18) // CHK OPCODE(0x41B0) { - u32 adr, res; - u32 src, dst; + s32 src, res; + u32 adr; adr = AREG((Opcode >> 0) & 7); DECODE_EXT_WORD PRE_IO - READ_WORD_F(adr, src) - res = DREGu16((Opcode >> 9) & 7); - if (((s32)res < 0) || (res > src)) + READSX_WORD_F(adr, src) + res = DREGs16((Opcode >> 9) & 7); + if ((res < 0) || (res > src)) { flag_N = res >> 8; SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); @@ -19688,14 +19687,14 @@ RET(20) // CHK OPCODE(0x41B8) { - u32 adr, res; - u32 src, dst; + s32 src, res; + u32 adr; FETCH_SWORD(adr); PRE_IO - READ_WORD_F(adr, src) - res = DREGu16((Opcode >> 9) & 7); - if (((s32)res < 0) || (res > src)) + READSX_WORD_F(adr, src) + res = DREGs16((Opcode >> 9) & 7); + if ((res < 0) || (res > src)) { flag_N = res >> 8; SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); @@ -19707,14 +19706,14 @@ RET(18) // CHK OPCODE(0x41B9) { - u32 adr, res; - u32 src, dst; + s32 src, res; + u32 adr; FETCH_LONG(adr); PRE_IO - READ_WORD_F(adr, src) - res = DREGu16((Opcode >> 9) & 7); - if (((s32)res < 0) || (res > src)) + READSX_WORD_F(adr, src) + res = DREGs16((Opcode >> 9) & 7); + if ((res < 0) || (res > src)) { flag_N = res >> 8; SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); @@ -19726,15 +19725,15 @@ RET(22) // CHK OPCODE(0x41BA) { - u32 adr, res; - u32 src, dst; + s32 src, res; + u32 adr; adr = GET_SWORD + GET_PC; PC++; PRE_IO - READ_WORD_F(adr, src) - res = DREGu16((Opcode >> 9) & 7); - if (((s32)res < 0) || (res > src)) + READSX_WORD_F(adr, src) + res = DREGs16((Opcode >> 9) & 7); + if ((res < 0) || (res > src)) { flag_N = res >> 8; SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); @@ -19746,15 +19745,15 @@ RET(18) // CHK OPCODE(0x41BB) { - u32 adr, res; - u32 src, dst; + s32 src, res; + u32 adr; adr = GET_PC; DECODE_EXT_WORD PRE_IO - READ_WORD_F(adr, src) - res = DREGu16((Opcode >> 9) & 7); - if (((s32)res < 0) || (res > src)) + READSX_WORD_F(adr, src) + res = DREGs16((Opcode >> 9) & 7); + if ((res < 0) || (res > src)) { flag_N = res >> 8; SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); @@ -19766,12 +19765,12 @@ RET(20) // CHK OPCODE(0x41BC) { - u32 adr, res; - u32 src, dst; + s32 src, res; + u32 adr; - FETCH_WORD(src); - res = DREGu16((Opcode >> 9) & 7); - if (((s32)res < 0) || (res > src)) + FETCH_SWORD(src); + res = DREGs16((Opcode >> 9) & 7); + if ((res < 0) || (res > src)) { flag_N = res >> 8; SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); @@ -19783,15 +19782,15 @@ RET(14) // CHK OPCODE(0x419F) { - u32 adr, res; - u32 src, dst; + s32 src, res; + u32 adr; adr = AREG(7); AREG(7) += 2; PRE_IO - READ_WORD_F(adr, src) - res = DREGu16((Opcode >> 9) & 7); - if (((s32)res < 0) || (res > src)) + READSX_WORD_F(adr, src) + res = DREGs16((Opcode >> 9) & 7); + if ((res < 0) || (res > src)) { flag_N = res >> 8; SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); @@ -19803,15 +19802,15 @@ RET(14) // CHK OPCODE(0x41A7) { - u32 adr, res; - u32 src, dst; + s32 src, res; + u32 adr; adr = AREG(7) - 2; AREG(7) = adr; PRE_IO - READ_WORD_F(adr, src) - res = DREGu16((Opcode >> 9) & 7); - if (((s32)res < 0) || (res > src)) + READSX_WORD_F(adr, src) + res = DREGs16((Opcode >> 9) & 7); + if ((res < 0) || (res > src)) { flag_N = res >> 8; SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); From f6aa2456a483b54566442d28d18a9f2fb2740125 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 13 Sep 2017 01:58:44 +0300 Subject: [PATCH 0061/1110] famec: adjust divu timing it's wrong, but I need it to be consistent with other cores now --- cpu/fame/famec_opcodes.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cpu/fame/famec_opcodes.h b/cpu/fame/famec_opcodes.h index 0d670040..bd2efb02 100644 --- a/cpu/fame/famec_opcodes.h +++ b/cpu/fame/famec_opcodes.h @@ -27418,7 +27418,7 @@ OPCODE(0x80F9) { SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV -RET(162) +RET(152) #else RET(22) #endif @@ -27434,7 +27434,7 @@ RET(22) { flag_V = M68K_SR_V; #ifdef USE_CYCLONE_TIMING_DIV -RET(162) +RET(152) #else RET(82) #endif @@ -27447,7 +27447,7 @@ RET(162) DREGu32((Opcode >> 9) & 7) = res; } #ifdef USE_CYCLONE_TIMING_DIV -RET(162) +RET(152) #else RET(102) #endif @@ -27516,7 +27516,7 @@ OPCODE(0x80FB) { SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV -RET(160) +RET(150) #else RET(20) #endif @@ -27532,7 +27532,7 @@ RET(20) { flag_V = M68K_SR_V; #ifdef USE_CYCLONE_TIMING_DIV -RET(160) +RET(150) #else RET(80) #endif @@ -27545,7 +27545,7 @@ RET(160) DREGu32((Opcode >> 9) & 7) = res; } #ifdef USE_CYCLONE_TIMING_DIV -RET(160) +RET(150) #else RET(100) #endif From c6e1e9779a5652b58ae638e40f4b01ccb223b0d0 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 20 Sep 2017 23:37:58 +0300 Subject: [PATCH 0062/1110] 68k cores: fix bcd instructions passing flamewing's test now --- Makefile | 2 + cpu/cyclone | 2 +- cpu/fame/famec_opcodes.h | 280 ++++++++++++++++++++++----------------- cpu/musashi/m68k_in.c | 190 +++++++++++++------------- 4 files changed, 263 insertions(+), 211 deletions(-) diff --git a/Makefile b/Makefile index ca7dc49d..8a95d992 100644 --- a/Makefile +++ b/Makefile @@ -217,7 +217,9 @@ pico/cd/gfx_cd.o: CFLAGS += -fno-strict-aliasing # on x86, this is reduced by ~300MB when debug info is off (but not on ARM) # not using O3 and -fno-expensive-optimizations seems to also help, but you may # want to remove this stuff for better performance if your compiler can handle it +ifndef DEBUG cpu/fame/famec.o: CFLAGS += -g0 -O2 -fno-expensive-optimizations +endif # random deps pico/carthw/svp/compiler.o : cpu/drc/emit_$(ARCH).c diff --git a/cpu/cyclone b/cpu/cyclone index 66dda842..b889883d 160000 --- a/cpu/cyclone +++ b/cpu/cyclone @@ -1 +1 @@ -Subproject commit 66dda842eae01f47f5389b931ec9567fb0bbb6a1 +Subproject commit b889883d36b2d247488c82d79d1eaab4dd41d236 diff --git a/cpu/fame/famec_opcodes.h b/cpu/fame/famec_opcodes.h index bd2efb02..99ba3791 100644 --- a/cpu/fame/famec_opcodes.h +++ b/cpu/fame/famec_opcodes.h @@ -16936,18 +16936,20 @@ OPCODE(0x4800) u32 adr, res; u32 src, dst; - res = DREGu8((Opcode >> 0) & 7); - res = 0x9a - res - ((flag_X >> M68K_SR_X_SFT) & 1); + dst = DREGu8((Opcode >> 0) & 7); + res = -dst - ((flag_X >> M68K_SR_X_SFT) & 1); - if (res != 0x9a) + if (res != 0) { - if ((res & 0x0f) == 0xa) res = (res & 0xf0) + 0x10; - res &= 0xFF; - DREGu8((Opcode >> 0) & 7) = res; + flag_V = res; + if (((res|dst) & 0x0f) == 0) res = (res & 0xf0) + 6; + res = (res + 0x9a) & 0xFF; + DREGu8((Opcode >> 0) & 7) = res; + flag_V &= ~res; flag_NotZ |= res; flag_X = flag_C = M68K_SR_C; } - else flag_X = flag_C = 0; + else flag_V = flag_X = flag_C = 0; flag_N = res; RET(6) } @@ -16960,18 +16962,20 @@ OPCODE(0x4810) adr = AREG((Opcode >> 0) & 7); PRE_IO - READ_BYTE_F(adr, res) - res = 0x9a - res - ((flag_X >> M68K_SR_X_SFT) & 1); + READ_BYTE_F(adr, dst) + res = -dst - ((flag_X >> M68K_SR_X_SFT) & 1); - if (res != 0x9a) + if (res != 0) { - if ((res & 0x0f) == 0xa) res = (res & 0xf0) + 0x10; - res &= 0xFF; - WRITE_BYTE_F(adr, res) + flag_V = res; + if (((res|dst) & 0x0f) == 0) res = (res & 0xf0) + 6; + res = (res + 0x9a) & 0xFF; + WRITE_BYTE_F(adr, res) + flag_V &= ~res; flag_NotZ |= res; flag_X = flag_C = M68K_SR_C; } - else flag_X = flag_C = 0; + else flag_V = flag_X = flag_C = 0; flag_N = res; POST_IO RET(12) @@ -16986,18 +16990,20 @@ OPCODE(0x4818) adr = AREG((Opcode >> 0) & 7); AREG((Opcode >> 0) & 7) += 1; PRE_IO - READ_BYTE_F(adr, res) - res = 0x9a - res - ((flag_X >> M68K_SR_X_SFT) & 1); + READ_BYTE_F(adr, dst) + res = -dst - ((flag_X >> M68K_SR_X_SFT) & 1); - if (res != 0x9a) + if (res != 0) { - if ((res & 0x0f) == 0xa) res = (res & 0xf0) + 0x10; - res &= 0xFF; - WRITE_BYTE_F(adr, res) + flag_V = res; + if (((res|dst) & 0x0f) == 0) res = (res & 0xf0) + 6; + res = (res + 0x9a) & 0xFF; + WRITE_BYTE_F(adr, res) + flag_V &= ~res; flag_NotZ |= res; flag_X = flag_C = M68K_SR_C; } - else flag_X = flag_C = 0; + else flag_V = flag_X = flag_C = 0; flag_N = res; POST_IO RET(12) @@ -17012,18 +17018,20 @@ OPCODE(0x4820) adr = AREG((Opcode >> 0) & 7) - 1; AREG((Opcode >> 0) & 7) = adr; PRE_IO - READ_BYTE_F(adr, res) - res = 0x9a - res - ((flag_X >> M68K_SR_X_SFT) & 1); + READ_BYTE_F(adr, dst) + res = -dst - ((flag_X >> M68K_SR_X_SFT) & 1); - if (res != 0x9a) + if (res != 0) { - if ((res & 0x0f) == 0xa) res = (res & 0xf0) + 0x10; - res &= 0xFF; - WRITE_BYTE_F(adr, res) + flag_V = res; + if (((res|dst) & 0x0f) == 0) res = (res & 0xf0) + 6; + res = (res + 0x9a) & 0xFF; + WRITE_BYTE_F(adr, res) + flag_V &= ~res; flag_NotZ |= res; flag_X = flag_C = M68K_SR_C; } - else flag_X = flag_C = 0; + else flag_V = flag_X = flag_C = 0; flag_N = res; POST_IO RET(14) @@ -17038,18 +17046,20 @@ OPCODE(0x4828) FETCH_SWORD(adr); adr += AREG((Opcode >> 0) & 7); PRE_IO - READ_BYTE_F(adr, res) - res = 0x9a - res - ((flag_X >> M68K_SR_X_SFT) & 1); + READ_BYTE_F(adr, dst) + res = -dst - ((flag_X >> M68K_SR_X_SFT) & 1); - if (res != 0x9a) + if (res != 0) { - if ((res & 0x0f) == 0xa) res = (res & 0xf0) + 0x10; - res &= 0xFF; - WRITE_BYTE_F(adr, res) + flag_V = res; + if (((res|dst) & 0x0f) == 0) res = (res & 0xf0) + 6; + res = (res + 0x9a) & 0xFF; + WRITE_BYTE_F(adr, res) + flag_V &= ~res; flag_NotZ |= res; flag_X = flag_C = M68K_SR_C; } - else flag_X = flag_C = 0; + else flag_V = flag_X = flag_C = 0; flag_N = res; POST_IO RET(16) @@ -17064,18 +17074,20 @@ OPCODE(0x4830) adr = AREG((Opcode >> 0) & 7); DECODE_EXT_WORD PRE_IO - READ_BYTE_F(adr, res) - res = 0x9a - res - ((flag_X >> M68K_SR_X_SFT) & 1); + READ_BYTE_F(adr, dst) + res = -dst - ((flag_X >> M68K_SR_X_SFT) & 1); - if (res != 0x9a) + if (res != 0) { - if ((res & 0x0f) == 0xa) res = (res & 0xf0) + 0x10; - res &= 0xFF; - WRITE_BYTE_F(adr, res) + flag_V = res; + if (((res|dst) & 0x0f) == 0) res = (res & 0xf0) + 6; + res = (res + 0x9a) & 0xFF; + WRITE_BYTE_F(adr, res) + flag_V &= ~res; flag_NotZ |= res; flag_X = flag_C = M68K_SR_C; } - else flag_X = flag_C = 0; + else flag_V = flag_X = flag_C = 0; flag_N = res; POST_IO RET(18) @@ -17089,18 +17101,20 @@ OPCODE(0x4838) FETCH_SWORD(adr); PRE_IO - READ_BYTE_F(adr, res) - res = 0x9a - res - ((flag_X >> M68K_SR_X_SFT) & 1); + READ_BYTE_F(adr, dst) + res = -dst - ((flag_X >> M68K_SR_X_SFT) & 1); - if (res != 0x9a) + if (res != 0) { - if ((res & 0x0f) == 0xa) res = (res & 0xf0) + 0x10; - res &= 0xFF; - WRITE_BYTE_F(adr, res) + flag_V = res; + if (((res|dst) & 0x0f) == 0) res = (res & 0xf0) + 6; + res = (res + 0x9a) & 0xFF; + WRITE_BYTE_F(adr, res) + flag_V &= ~res; flag_NotZ |= res; flag_X = flag_C = M68K_SR_C; } - else flag_X = flag_C = 0; + else flag_V = flag_X = flag_C = 0; flag_N = res; POST_IO RET(16) @@ -17114,18 +17128,20 @@ OPCODE(0x4839) FETCH_LONG(adr); PRE_IO - READ_BYTE_F(adr, res) - res = 0x9a - res - ((flag_X >> M68K_SR_X_SFT) & 1); + READ_BYTE_F(adr, dst) + res = -dst - ((flag_X >> M68K_SR_X_SFT) & 1); - if (res != 0x9a) + if (res != 0) { - if ((res & 0x0f) == 0xa) res = (res & 0xf0) + 0x10; - res &= 0xFF; - WRITE_BYTE_F(adr, res) + flag_V = res; + if (((res|dst) & 0x0f) == 0) res = (res & 0xf0) + 6; + res = (res + 0x9a) & 0xFF; + WRITE_BYTE_F(adr, res) + flag_V &= ~res; flag_NotZ |= res; flag_X = flag_C = M68K_SR_C; } - else flag_X = flag_C = 0; + else flag_V = flag_X = flag_C = 0; flag_N = res; POST_IO RET(20) @@ -17140,18 +17156,20 @@ OPCODE(0x481F) adr = AREG(7); AREG(7) += 2; PRE_IO - READ_BYTE_F(adr, res) - res = 0x9a - res - ((flag_X >> M68K_SR_X_SFT) & 1); + READ_BYTE_F(adr, dst) + res = -dst - ((flag_X >> M68K_SR_X_SFT) & 1); - if (res != 0x9a) + if (res != 0) { - if ((res & 0x0f) == 0xa) res = (res & 0xf0) + 0x10; - res &= 0xFF; - WRITE_BYTE_F(adr, res) + flag_V = res; + if (((res|dst) & 0x0f) == 0) res = (res & 0xf0) + 6; + res = (res + 0x9a) & 0xFF; + WRITE_BYTE_F(adr, res) + flag_V &= ~res; flag_NotZ |= res; flag_X = flag_C = M68K_SR_C; } - else flag_X = flag_C = 0; + else flag_V = flag_X = flag_C = 0; flag_N = res; POST_IO RET(12) @@ -17166,18 +17184,20 @@ OPCODE(0x4827) adr = AREG(7) - 2; AREG(7) = adr; PRE_IO - READ_BYTE_F(adr, res) - res = 0x9a - res - ((flag_X >> M68K_SR_X_SFT) & 1); + READ_BYTE_F(adr, dst) + res = -dst - ((flag_X >> M68K_SR_X_SFT) & 1); - if (res != 0x9a) + if (res != 0) { - if ((res & 0x0f) == 0xa) res = (res & 0xf0) + 0x10; - res &= 0xFF; - WRITE_BYTE_F(adr, res) + flag_V = res; + if (((res|dst) & 0x0f) == 0) res = (res & 0xf0) + 6; + res = (res + 0x9a) & 0xFF; + WRITE_BYTE_F(adr, res) + flag_V &= ~res; flag_NotZ |= res; flag_X = flag_C = M68K_SR_C; } - else flag_X = flag_C = 0; + else flag_V = flag_X = flag_C = 0; flag_N = res; POST_IO RET(14) @@ -26933,19 +26953,19 @@ OPCODE(0x8100) { u32 adr, res; u32 src, dst; + u32 corf = 0; src = DREGu8((Opcode >> 0) & 7); dst = DREGu8((Opcode >> 9) & 7); res = (dst & 0xF) - (src & 0xF) - ((flag_X >> M68K_SR_X_SFT) & 1); - if (res > 9) res -= 6; + if (res > 0xF) corf = 6; res += (dst & 0xF0) - (src & 0xF0); - if (res > 0x99) - { - res += 0xA0; - flag_X = flag_C = M68K_SR_C; - } - else flag_X = flag_C = 0; - flag_NotZ |= res & 0xFF; + flag_V = res; + flag_X = flag_C = (s32)res < (s32)corf ? M68K_SR_C : 0; + if (res > 0xff) res += 0xA0; + res = (res - corf) & 0xFF; + flag_V &= ~res; + flag_NotZ |= res; flag_N = res; DREGu8((Opcode >> 9) & 7) = res; RET(6) @@ -26956,6 +26976,7 @@ OPCODE(0x8108) { u32 adr, res; u32 src, dst; + u32 corf = 0; adr = AREG((Opcode >> 0) & 7) - 1; AREG((Opcode >> 0) & 7) = adr; @@ -26965,15 +26986,14 @@ OPCODE(0x8108) AREG((Opcode >> 9) & 7) = adr; READ_BYTE_F(adr, dst) res = (dst & 0xF) - (src & 0xF) - ((flag_X >> M68K_SR_X_SFT) & 1); - if (res > 9) res -= 6; + if (res > 0xF) corf = 6; res += (dst & 0xF0) - (src & 0xF0); - if (res > 0x99) - { - res += 0xA0; - flag_X = flag_C = M68K_SR_C; - } - else flag_X = flag_C = 0; - flag_NotZ |= res & 0xFF; + flag_V = res; + flag_X = flag_C = (s32)res < (s32)corf ? M68K_SR_C : 0; + if (res > 0xff) res += 0xA0; + res = (res - corf) & 0xFF; + flag_V &= ~res; + flag_NotZ |= res; flag_N = res; WRITE_BYTE_F(adr, res) POST_IO @@ -26985,6 +27005,7 @@ OPCODE(0x810F) { u32 adr, res; u32 src, dst; + u32 corf = 0; adr = AREG(7) - 2; AREG(7) = adr; @@ -26994,15 +27015,14 @@ OPCODE(0x810F) AREG((Opcode >> 9) & 7) = adr; READ_BYTE_F(adr, dst) res = (dst & 0xF) - (src & 0xF) - ((flag_X >> M68K_SR_X_SFT) & 1); - if (res > 9) res -= 6; + if (res > 0xF) corf = 6; res += (dst & 0xF0) - (src & 0xF0); - if (res > 0x99) - { - res += 0xA0; - flag_X = flag_C = M68K_SR_C; - } - else flag_X = flag_C = 0; - flag_NotZ |= res & 0xFF; + flag_V = res; + flag_X = flag_C = (s32)res < (s32)corf ? M68K_SR_C : 0; + if (res > 0xff) res += 0xA0; + res = (res - corf) & 0xFF; + flag_V &= ~res; + flag_NotZ |= res; flag_N = res; WRITE_BYTE_F(adr, res) POST_IO @@ -27014,6 +27034,7 @@ OPCODE(0x8F08) { u32 adr, res; u32 src, dst; + u32 corf = 0; adr = AREG((Opcode >> 0) & 7) - 1; AREG((Opcode >> 0) & 7) = adr; @@ -27023,15 +27044,14 @@ OPCODE(0x8F08) AREG(7) = adr; READ_BYTE_F(adr, dst) res = (dst & 0xF) - (src & 0xF) - ((flag_X >> M68K_SR_X_SFT) & 1); - if (res > 9) res -= 6; + if (res > 0xF) corf = 6; res += (dst & 0xF0) - (src & 0xF0); - if (res > 0x99) - { - res += 0xA0; - flag_X = flag_C = M68K_SR_C; - } - else flag_X = flag_C = 0; - flag_NotZ |= res & 0xFF; + flag_V = res; + flag_X = flag_C = (s32)res < (s32)corf ? M68K_SR_C : 0; + if (res > 0xff) res += 0xA0; + res = (res - corf) & 0xFF; + flag_V &= ~res; + flag_NotZ |= res; flag_N = res; WRITE_BYTE_F(adr, res) POST_IO @@ -27043,6 +27063,7 @@ OPCODE(0x8F0F) { u32 adr, res; u32 src, dst; + u32 corf = 0; adr = AREG(7) - 2; AREG(7) = adr; @@ -27052,15 +27073,14 @@ OPCODE(0x8F0F) AREG(7) = adr; READ_BYTE_F(adr, dst) res = (dst & 0xF) - (src & 0xF) - ((flag_X >> M68K_SR_X_SFT) & 1); - if (res > 9) res -= 6; + if (res > 0xF) corf = 6; res += (dst & 0xF0) - (src & 0xF0); - if (res > 0x99) - { - res += 0xA0; - flag_X = flag_C = M68K_SR_C; - } - else flag_X = flag_C = 0; - flag_NotZ |= res & 0xFF; + flag_V = res; + flag_X = flag_C = (s32)res < (s32)corf ? M68K_SR_C : 0; + if (res > 0xff) res += 0xA0; + res = (res - corf) & 0xFF; + flag_V &= ~res; + flag_NotZ |= res; flag_N = res; WRITE_BYTE_F(adr, res) POST_IO @@ -34120,18 +34140,22 @@ OPCODE(0xC100) { u32 adr, res; u32 src, dst; + u32 corf = 0; src = DREGu8((Opcode >> 0) & 7); dst = DREGu8((Opcode >> 9) & 7); res = (dst & 0xF) + (src & 0xF) + ((flag_X >> M68K_SR_X_SFT) & 1); - if (res > 9) res += 6; + if (res > 9) corf = 6; res += (dst & 0xF0) + (src & 0xF0); - if (res > 0x99) + flag_V = ~res; + res += corf; + if (res > 0x9F) { res -= 0xA0; flag_X = flag_C = M68K_SR_C; } else flag_X = flag_C = 0; + flag_V &= res; flag_NotZ |= res & 0xFF; flag_N = res; DREGu8((Opcode >> 9) & 7) = res; @@ -34143,6 +34167,7 @@ OPCODE(0xC108) { u32 adr, res; u32 src, dst; + u32 corf = 0; adr = AREG((Opcode >> 0) & 7) - 1; AREG((Opcode >> 0) & 7) = adr; @@ -34152,14 +34177,17 @@ OPCODE(0xC108) AREG((Opcode >> 9) & 7) = adr; READ_BYTE_F(adr, dst) res = (dst & 0xF) + (src & 0xF) + ((flag_X >> M68K_SR_X_SFT) & 1); - if (res > 9) res += 6; + if (res > 9) corf = 6; res += (dst & 0xF0) + (src & 0xF0); - if (res > 0x99) + flag_V = ~res; + res += corf; + if (res > 0x9F) { res -= 0xA0; flag_X = flag_C = M68K_SR_C; } else flag_X = flag_C = 0; + flag_V &= res; flag_NotZ |= res & 0xFF; flag_N = res; WRITE_BYTE_F(adr, res) @@ -34172,6 +34200,7 @@ OPCODE(0xC10F) { u32 adr, res; u32 src, dst; + u32 corf = 0; adr = AREG(7) - 2; AREG(7) = adr; @@ -34181,14 +34210,17 @@ OPCODE(0xC10F) AREG((Opcode >> 9) & 7) = adr; READ_BYTE_F(adr, dst) res = (dst & 0xF) + (src & 0xF) + ((flag_X >> M68K_SR_X_SFT) & 1); - if (res > 9) res += 6; + if (res > 9) corf = 6; res += (dst & 0xF0) + (src & 0xF0); - if (res > 0x99) + flag_V = ~res; + res += corf; + if (res > 0x9F) { res -= 0xA0; flag_X = flag_C = M68K_SR_C; } else flag_X = flag_C = 0; + flag_V &= res; flag_NotZ |= res & 0xFF; flag_N = res; WRITE_BYTE_F(adr, res) @@ -34201,6 +34233,7 @@ OPCODE(0xCF08) { u32 adr, res; u32 src, dst; + u32 corf = 0; adr = AREG((Opcode >> 0) & 7) - 1; AREG((Opcode >> 0) & 7) = adr; @@ -34210,14 +34243,17 @@ OPCODE(0xCF08) AREG(7) = adr; READ_BYTE_F(adr, dst) res = (dst & 0xF) + (src & 0xF) + ((flag_X >> M68K_SR_X_SFT) & 1); - if (res > 9) res += 6; + if (res > 9) corf = 6; res += (dst & 0xF0) + (src & 0xF0); - if (res > 0x99) + flag_V = ~res; + res += corf; + if (res > 0x9F) { res -= 0xA0; flag_X = flag_C = M68K_SR_C; } else flag_X = flag_C = 0; + flag_V &= res; flag_NotZ |= res & 0xFF; flag_N = res; WRITE_BYTE_F(adr, res) @@ -34230,6 +34266,7 @@ OPCODE(0xCF0F) { u32 adr, res; u32 src, dst; + u32 corf = 0; adr = AREG(7) - 2; AREG(7) = adr; @@ -34239,14 +34276,17 @@ OPCODE(0xCF0F) AREG(7) = adr; READ_BYTE_F(adr, dst) res = (dst & 0xF) + (src & 0xF) + ((flag_X >> M68K_SR_X_SFT) & 1); - if (res > 9) res += 6; + if (res > 9) corf = 6; res += (dst & 0xF0) + (src & 0xF0); - if (res > 0x99) + flag_V = ~res; + res += corf; + if (res > 0x9F) { res -= 0xA0; flag_X = flag_C = M68K_SR_C; } else flag_X = flag_C = 0; + flag_V &= res; flag_NotZ |= res & 0xFF; flag_N = res; WRITE_BYTE_F(adr, res) diff --git a/cpu/musashi/m68k_in.c b/cpu/musashi/m68k_in.c index 5c9cc066..d1756a53 100644 --- a/cpu/musashi/m68k_in.c +++ b/cpu/musashi/m68k_in.c @@ -918,13 +918,15 @@ M68KMAKE_OP(abcd, 8, rr, .) uint src = DY; uint dst = *r_dst; uint res = LOW_NIBBLE(src) + LOW_NIBBLE(dst) + XFLAG_AS_1(); - - FLAG_V = ~res; /* Undefined V behavior */ + uint corf = 0; if(res > 9) - res += 6; + corf = 6; res += HIGH_NIBBLE(src) + HIGH_NIBBLE(dst); - FLAG_X = FLAG_C = (res > 0x99) << 8; + FLAG_V = ~res; /* Undefined V behavior */ + + res += corf; + FLAG_X = FLAG_C = (res > 0x9f) << 8; if(FLAG_C) res -= 0xa0; @@ -944,13 +946,15 @@ M68KMAKE_OP(abcd, 8, mm, ax7) uint ea = EA_A7_PD_8(); uint dst = m68ki_read_8(ea); uint res = LOW_NIBBLE(src) + LOW_NIBBLE(dst) + XFLAG_AS_1(); - - FLAG_V = ~res; /* Undefined V behavior */ + uint corf = 0; if(res > 9) - res += 6; + corf = 6; res += HIGH_NIBBLE(src) + HIGH_NIBBLE(dst); - FLAG_X = FLAG_C = (res > 0x99) << 8; + FLAG_V = ~res; /* Undefined V behavior */ + + res += corf; + FLAG_X = FLAG_C = (res > 0x9f) << 8; if(FLAG_C) res -= 0xa0; @@ -970,13 +974,15 @@ M68KMAKE_OP(abcd, 8, mm, ay7) uint ea = EA_AX_PD_8(); uint dst = m68ki_read_8(ea); uint res = LOW_NIBBLE(src) + LOW_NIBBLE(dst) + XFLAG_AS_1(); - - FLAG_V = ~res; /* Undefined V behavior */ + uint corf = 0; if(res > 9) - res += 6; + corf = 6; res += HIGH_NIBBLE(src) + HIGH_NIBBLE(dst); - FLAG_X = FLAG_C = (res > 0x99) << 8; + FLAG_V = ~res; /* Undefined V behavior */ + + res += corf; + FLAG_X = FLAG_C = (res > 0x9f) << 8; if(FLAG_C) res -= 0xa0; @@ -996,13 +1002,15 @@ M68KMAKE_OP(abcd, 8, mm, axy7) uint ea = EA_A7_PD_8(); uint dst = m68ki_read_8(ea); uint res = LOW_NIBBLE(src) + LOW_NIBBLE(dst) + XFLAG_AS_1(); - - FLAG_V = ~res; /* Undefined V behavior */ + uint corf = 0; if(res > 9) - res += 6; + corf = 6; res += HIGH_NIBBLE(src) + HIGH_NIBBLE(dst); - FLAG_X = FLAG_C = (res > 0x99) << 8; + FLAG_V = ~res; /* Undefined V behavior */ + + res += corf; + FLAG_X = FLAG_C = (res > 0x9f) << 8; if(FLAG_C) res -= 0xa0; @@ -1022,13 +1030,15 @@ M68KMAKE_OP(abcd, 8, mm, .) uint ea = EA_AX_PD_8(); uint dst = m68ki_read_8(ea); uint res = LOW_NIBBLE(src) + LOW_NIBBLE(dst) + XFLAG_AS_1(); - - FLAG_V = ~res; /* Undefined V behavior */ + uint corf = 0; if(res > 9) - res += 6; + corf = 6; res += HIGH_NIBBLE(src) + HIGH_NIBBLE(dst); - FLAG_X = FLAG_C = (res > 0x99) << 8; + FLAG_V = ~res; /* Undefined V behavior */ + + res += corf; + FLAG_X = FLAG_C = (res > 0x9f) << 8; if(FLAG_C) res -= 0xa0; @@ -7794,19 +7804,19 @@ M68KMAKE_OP(mull, 32, ., .) M68KMAKE_OP(nbcd, 8, ., d) { uint* r_dst = &DY; - uint dst = *r_dst; - uint res = MASK_OUT_ABOVE_8(0x9a - dst - XFLAG_AS_1()); + uint dst = MASK_OUT_ABOVE_8(*r_dst); + uint res = -dst - XFLAG_AS_1(); - if(res != 0x9a) + if(res != 0) { - FLAG_V = ~res; /* Undefined V behavior */ + FLAG_V = res; /* Undefined V behavior */ - if((res & 0x0f) == 0xa) - res = (res & 0xf0) + 0x10; + if(((res|dst) & 0x0f) == 0) + res = (res & 0xf0) + 6; - res = MASK_OUT_ABOVE_8(res); + res = MASK_OUT_ABOVE_8(res + 0x9a); - FLAG_V &= res; /* Undefined V behavior part II */ + FLAG_V &= ~res; /* Undefined V behavior part II */ *r_dst = MASK_OUT_BELOW_8(*r_dst) | res; @@ -7828,18 +7838,18 @@ M68KMAKE_OP(nbcd, 8, ., .) { uint ea = M68KMAKE_GET_EA_AY_8; uint dst = m68ki_read_8(ea); - uint res = MASK_OUT_ABOVE_8(0x9a - dst - XFLAG_AS_1()); + uint res = -dst - XFLAG_AS_1(); - if(res != 0x9a) + if(res != 0) { - FLAG_V = ~res; /* Undefined V behavior */ + FLAG_V = res; /* Undefined V behavior */ - if((res & 0x0f) == 0xa) - res = (res & 0xf0) + 0x10; + if(((res|dst) & 0x0f) == 0) + res = (res & 0xf0) + 6; - res = MASK_OUT_ABOVE_8(res); + res = MASK_OUT_ABOVE_8(res + 0x9a); - FLAG_V &= res; /* Undefined V behavior part II */ + FLAG_V &= ~res; /* Undefined V behavior part II */ m68ki_write_8(ea, MASK_OUT_ABOVE_8(res)); @@ -9359,26 +9369,26 @@ M68KMAKE_OP(sbcd, 8, rr, .) uint src = DY; uint dst = *r_dst; uint res = LOW_NIBBLE(dst) - LOW_NIBBLE(src) - XFLAG_AS_1(); + uint corf = 0; -// FLAG_V = ~res; /* Undefined V behavior */ - FLAG_V = VFLAG_CLEAR; /* Undefined in Motorola's M68000PM/AD rev.1 and safer to assume cleared. */ - - if(res > 9) - res -= 6; + if(res > 0xf) + corf = 6; res += HIGH_NIBBLE(dst) - HIGH_NIBBLE(src); - if(res > 0x99) + FLAG_V = res; /* Undefined V behavior */ + if(res > 0xff) { res += 0xa0; FLAG_X = FLAG_C = CFLAG_SET; - FLAG_N = NFLAG_SET; /* Undefined in Motorola's M68000PM/AD rev.1 and safer to follow carry. */ } + else if(res < corf) + FLAG_X = FLAG_C = CFLAG_SET; else - FLAG_N = FLAG_X = FLAG_C = 0; + FLAG_X = FLAG_C = 0; - res = MASK_OUT_ABOVE_8(res); + res = MASK_OUT_ABOVE_8(res - corf); -// FLAG_V &= res; /* Undefined V behavior part II */ -// FLAG_N = NFLAG_8(res); /* Undefined N behavior */ + FLAG_V &= ~res; /* Undefined V behavior part II */ + FLAG_N = NFLAG_8(res); /* Undefined N behavior */ FLAG_Z |= res; *r_dst = MASK_OUT_BELOW_8(*r_dst) | res; @@ -9391,26 +9401,26 @@ M68KMAKE_OP(sbcd, 8, mm, ax7) uint ea = EA_A7_PD_8(); uint dst = m68ki_read_8(ea); uint res = LOW_NIBBLE(dst) - LOW_NIBBLE(src) - XFLAG_AS_1(); + uint corf = 0; -// FLAG_V = ~res; /* Undefined V behavior */ - FLAG_V = VFLAG_CLEAR; /* Undefined in Motorola's M68000PM/AD rev.1 and safer to return zero. */ - - if(res > 9) - res -= 6; + if(res > 0xf) + corf = 6; res += HIGH_NIBBLE(dst) - HIGH_NIBBLE(src); - if(res > 0x99) + FLAG_V = res; /* Undefined V behavior */ + if(res > 0xff) { res += 0xa0; FLAG_X = FLAG_C = CFLAG_SET; - FLAG_N = NFLAG_SET; /* Undefined in Motorola's M68000PM/AD rev.1 and safer to follow carry. */ } + else if(res < corf) + FLAG_X = FLAG_C = CFLAG_SET; else - FLAG_N = FLAG_X = FLAG_C = 0; + FLAG_X = FLAG_C = 0; - res = MASK_OUT_ABOVE_8(res); + res = MASK_OUT_ABOVE_8(res - corf); -// FLAG_V &= res; /* Undefined V behavior part II */ -// FLAG_N = NFLAG_8(res); /* Undefined N behavior */ + FLAG_V &= ~res; /* Undefined V behavior part II */ + FLAG_N = NFLAG_8(res); /* Undefined N behavior */ FLAG_Z |= res; m68ki_write_8(ea, res); @@ -9423,26 +9433,26 @@ M68KMAKE_OP(sbcd, 8, mm, ay7) uint ea = EA_AX_PD_8(); uint dst = m68ki_read_8(ea); uint res = LOW_NIBBLE(dst) - LOW_NIBBLE(src) - XFLAG_AS_1(); + uint corf = 0; -// FLAG_V = ~res; /* Undefined V behavior */ - FLAG_V = VFLAG_CLEAR; /* Undefined in Motorola's M68000PM/AD rev.1 and safer to return zero. */ - - if(res > 9) - res -= 6; + if(res > 0xf) + corf = 6; res += HIGH_NIBBLE(dst) - HIGH_NIBBLE(src); - if(res > 0x99) + FLAG_V = res; /* Undefined V behavior */ + if(res > 0xff) { res += 0xa0; FLAG_X = FLAG_C = CFLAG_SET; - FLAG_N = NFLAG_SET; /* Undefined in Motorola's M68000PM/AD rev.1 and safer to follow carry. */ } + else if(res < corf) + FLAG_X = FLAG_C = CFLAG_SET; else - FLAG_N = FLAG_X = FLAG_C = 0; + FLAG_X = FLAG_C = 0; - res = MASK_OUT_ABOVE_8(res); + res = MASK_OUT_ABOVE_8(res - corf); -// FLAG_V &= res; /* Undefined V behavior part II */ -// FLAG_N = NFLAG_8(res); /* Undefined N behavior */ + FLAG_V &= ~res; /* Undefined V behavior part II */ + FLAG_N = NFLAG_8(res); /* Undefined N behavior */ FLAG_Z |= res; m68ki_write_8(ea, res); @@ -9455,26 +9465,26 @@ M68KMAKE_OP(sbcd, 8, mm, axy7) uint ea = EA_A7_PD_8(); uint dst = m68ki_read_8(ea); uint res = LOW_NIBBLE(dst) - LOW_NIBBLE(src) - XFLAG_AS_1(); + uint corf = 0; -// FLAG_V = ~res; /* Undefined V behavior */ - FLAG_V = VFLAG_CLEAR; /* Undefined in Motorola's M68000PM/AD rev.1 and safer to return zero. */ - - if(res > 9) - res -= 6; + if(res > 0xf) + corf = 6; res += HIGH_NIBBLE(dst) - HIGH_NIBBLE(src); - if(res > 0x99) + FLAG_V = res; /* Undefined V behavior */ + if(res > 0xff) { res += 0xa0; FLAG_X = FLAG_C = CFLAG_SET; - FLAG_N = NFLAG_SET; /* Undefined in Motorola's M68000PM/AD rev.1 and safer to follow carry. */ } + else if(res < corf) + FLAG_X = FLAG_C = CFLAG_SET; else - FLAG_N = FLAG_X = FLAG_C = 0; + FLAG_X = FLAG_C = 0; - res = MASK_OUT_ABOVE_8(res); + res = MASK_OUT_ABOVE_8(res - corf); -// FLAG_V &= res; /* Undefined V behavior part II */ -// FLAG_N = NFLAG_8(res); /* Undefined N behavior */ + FLAG_V &= ~res; /* Undefined V behavior part II */ + FLAG_N = NFLAG_8(res); /* Undefined N behavior */ FLAG_Z |= res; m68ki_write_8(ea, res); @@ -9487,26 +9497,26 @@ M68KMAKE_OP(sbcd, 8, mm, .) uint ea = EA_AX_PD_8(); uint dst = m68ki_read_8(ea); uint res = LOW_NIBBLE(dst) - LOW_NIBBLE(src) - XFLAG_AS_1(); + uint corf = 0; -// FLAG_V = ~res; /* Undefined V behavior */ - FLAG_V = VFLAG_CLEAR; /* Undefined in Motorola's M68000PM/AD rev.1 and safer to return zero. */ - - if(res > 9) - res -= 6; + if(res > 0xf) + corf = 6; res += HIGH_NIBBLE(dst) - HIGH_NIBBLE(src); - if(res > 0x99) + FLAG_V = res; /* Undefined V behavior */ + if(res > 0xff) { res += 0xa0; FLAG_X = FLAG_C = CFLAG_SET; - FLAG_N = NFLAG_SET; /* Undefined in Motorola's M68000PM/AD rev.1 and safer to follow carry. */ } + else if(res < corf) + FLAG_X = FLAG_C = CFLAG_SET; else - FLAG_N = FLAG_X = FLAG_C = 0; + FLAG_X = FLAG_C = 0; - res = MASK_OUT_ABOVE_8(res); + res = MASK_OUT_ABOVE_8(res - corf); -// FLAG_V &= res; /* Undefined V behavior part II */ -// FLAG_N = NFLAG_8(res); /* Undefined N behavior */ + FLAG_V &= ~res; /* Undefined V behavior part II */ + FLAG_N = NFLAG_8(res); /* Undefined N behavior */ FLAG_Z |= res; m68ki_write_8(ea, res); From 2b15cea82e68d32f5e926ab065ac233b89cfc200 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 20 Sep 2017 23:39:20 +0300 Subject: [PATCH 0063/1110] workaround idledet breaking op test --- cpu/fame/famec_opcodes.h | 4 ++++ pico/sek.c | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/cpu/fame/famec_opcodes.h b/cpu/fame/famec_opcodes.h index 99ba3791..3ba72751 100644 --- a/cpu/fame/famec_opcodes.h +++ b/cpu/fame/famec_opcodes.h @@ -18574,6 +18574,10 @@ RET(8) // ILLEGAL OPCODE(0x4AFC) { +#ifdef PICODRIVE_HACK + extern void SekFinishIdleDet(void); + SekFinishIdleDet(); +#endif SET_PC(execute_exception(M68K_ILLEGAL_INSTRUCTION_EX, GET_PC-2, GET_SR)); RET(0) } diff --git a/pico/sek.c b/pico/sek.c index 4525c7c9..8bf0341b 100644 --- a/pico/sek.c +++ b/pico/sek.c @@ -60,6 +60,8 @@ static int SekUnrecognizedOpcode() PicoCpuCM68k.state_flags |= 1; return 1; } + // happened once - may happen again + SekFinishIdleDet(); #ifdef EMU_M68K // debugging cyclone { extern int have_illegal; @@ -421,6 +423,8 @@ int SekRegisterIdlePatch(unsigned int pc, int oldop, int newop, void *ctx) void SekFinishIdleDet(void) { + if (idledet_count < 0) + return; #ifdef EMU_C68K CycloneFinishIdle(); #endif @@ -439,6 +443,7 @@ void SekFinishIdleDet(void) else elprintf(EL_STATUS|EL_IDLE, "idle: don't know how to restore %04x", *op); } + idledet_count = -1; } From 1dd0871f208e59792aff088ea41f471673eeaa7e Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 20 Sep 2017 23:43:38 +0300 Subject: [PATCH 0064/1110] memory: allow SRAM word writes Was allowing both low and high byte writes by byte, but not word, which was stupid. --- pico/memory.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pico/memory.c b/pico/memory.c index 4f38e5e2..5be66f5c 100644 --- a/pico/memory.c +++ b/pico/memory.c @@ -448,11 +448,14 @@ static void PicoWrite16_sram(u32 a, u32 d) EEPROM_write16(d); } else { - // XXX: hardware could easily use MSB too.. u8 *pm = (u8 *)(SRam.data - SRam.start + a); - if (*pm != (u8)d) { + if (pm[0] != (u8)(d >> 8)) { SRam.changed = 1; - *pm = (u8)d; + pm[0] = (u8)(d >> 8); + } + if (pm[1] != (u8)d) { + SRam.changed = 1; + pm[1] = (u8)d; } } } From 97253703211c5c80dc43f8c7ee5e15ab14031325 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 20 Sep 2017 23:46:28 +0300 Subject: [PATCH 0065/1110] update libpicofe --- platform/libpicofe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/platform/libpicofe b/platform/libpicofe index 0415ebf1..e7f58005 160000 --- a/platform/libpicofe +++ b/platform/libpicofe @@ -1 +1 @@ -Subproject commit 0415ebf191d0d277d1dfba1bebe6f051a5fc89a8 +Subproject commit e7f580052c03fa3f4603051c1b718be4bd8b2db7 From 8c2137f11f1b73f35f688f068de89b40b1f10484 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 22 Sep 2017 00:46:42 +0300 Subject: [PATCH 0066/1110] import SPI EEPROM from Genesis-Plus-GX Author: EkeEke --- pico/carthw/eeprom_spi.c | 358 +++++++++++++++++++++++++++++++++++++++ pico/carthw/eeprom_spi.h | 47 +++++ 2 files changed, 405 insertions(+) create mode 100644 pico/carthw/eeprom_spi.c create mode 100644 pico/carthw/eeprom_spi.h diff --git a/pico/carthw/eeprom_spi.c b/pico/carthw/eeprom_spi.c new file mode 100644 index 00000000..921eb6ab --- /dev/null +++ b/pico/carthw/eeprom_spi.c @@ -0,0 +1,358 @@ +/**************************************************************************** + * Genesis Plus + * SPI Serial EEPROM (25xxx/95xxx) support + * + * Copyright (C) 2012 Eke-Eke (Genesis Plus GX) + * + * Redistribution and use of this code or any derivative works are permitted + * provided that the following conditions are met: + * + * - Redistributions may not be sold, nor may they be used in a commercial + * product or activity. + * + * - Redistributions that are modified from the original source must include the + * complete source code, including the source code for all components used by a + * binary built from the modified sources. However, as a special exception, the + * source code distributed need not include anything that is normally distributed + * (in either source or binary form) with the major components (compiler, kernel, + * and so on) of the operating system on which the executable runs, unless that + * component itself accompanies the executable. + * + * - Redistributions must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other + * materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + ****************************************************************************************/ + +#include "shared.h" + +/* max supported size 64KB (25x512/95x512) */ +#define SIZE_MASK 0xffff +#define PAGE_MASK 0x7f + +/* hard-coded board implementation (!WP pin not used) */ +#define BIT_DATA (0) +#define BIT_CLK (1) +#define BIT_HOLD (2) +#define BIT_CS (3) + +typedef enum +{ + STANDBY, + GET_OPCODE, + GET_ADDRESS, + WRITE_BYTE, + READ_BYTE +} T_STATE_SPI; + +typedef struct +{ + uint8 cs; /* !CS line state */ + uint8 clk; /* SCLK line state */ + uint8 out; /* SO line state */ + uint8 status; /* status register */ + uint8 opcode; /* 8-bit opcode */ + uint8 buffer; /* 8-bit data buffer */ + uint16 addr; /* 16-bit address */ + uint32 cycles; /* current operation cycle */ + T_STATE_SPI state; /* current operation state */ +} T_EEPROM_SPI; + +static T_EEPROM_SPI spi_eeprom; + +void eeprom_spi_init() +{ + /* reset eeprom state */ + memset(&spi_eeprom, 0, sizeof(T_EEPROM_SPI)); + spi_eeprom.out = 1; + spi_eeprom.state = GET_OPCODE; + + /* enable backup RAM */ + sram.custom = 2; + sram.on = 1; +} + +void eeprom_spi_write(unsigned char data) +{ + /* Make sure !HOLD is high */ + if (data & (1 << BIT_HOLD)) + { + /* Check !CS state */ + if (data & (1 << BIT_CS)) + { + /* !CS high -> end of current operation */ + spi_eeprom.cycles = 0; + spi_eeprom.out = 1; + spi_eeprom.opcode = 0; + spi_eeprom.state = GET_OPCODE; + } + else + { + /* !CS low -> process current operation */ + switch (spi_eeprom.state) + { + case GET_OPCODE: + { + /* latch data on CLK positive edge */ + if ((data & (1 << BIT_CLK)) && !spi_eeprom.clk) + { + /* 8-bit opcode buffer */ + spi_eeprom.opcode |= ((data >> BIT_DATA) & 1); + spi_eeprom.cycles++; + + /* last bit ? */ + if (spi_eeprom.cycles == 8) + { + /* reset cycles count */ + spi_eeprom.cycles = 0; + + /* Decode instruction */ + switch (spi_eeprom.opcode) + { + case 0x01: + { + /* WRITE STATUS */ + spi_eeprom.buffer = 0; + spi_eeprom.state = WRITE_BYTE; + break; + } + + case 0x02: + { + /* WRITE BYTE */ + spi_eeprom.addr = 0; + spi_eeprom.state = GET_ADDRESS; + break; + } + + case 0x03: + { + /* READ BYTE */ + spi_eeprom.addr = 0; + spi_eeprom.state = GET_ADDRESS; + break; + } + + case 0x04: + { + /* WRITE DISABLE */ + spi_eeprom.status &= ~0x02; + spi_eeprom.state = STANDBY; + break; + } + + case 0x05: + { + /* READ STATUS */ + spi_eeprom.buffer = spi_eeprom.status; + spi_eeprom.state = READ_BYTE; + break; + } + + case 0x06: + { + /* WRITE ENABLE */ + spi_eeprom.status |= 0x02; + spi_eeprom.state = STANDBY; + break; + } + + default: + { + /* specific instructions (not supported) */ + spi_eeprom.state = STANDBY; + break; + } + } + } + else + { + /* shift opcode value */ + spi_eeprom.opcode = spi_eeprom.opcode << 1; + } + } + break; + } + + case GET_ADDRESS: + { + /* latch data on CLK positive edge */ + if ((data & (1 << BIT_CLK)) && !spi_eeprom.clk) + { + /* 16-bit address */ + spi_eeprom.addr |= ((data >> BIT_DATA) & 1); + spi_eeprom.cycles++; + + /* last bit ? */ + if (spi_eeprom.cycles == 16) + { + /* reset cycles count */ + spi_eeprom.cycles = 0; + + /* mask unused address bits */ + spi_eeprom.addr &= SIZE_MASK; + + /* operation type */ + if (spi_eeprom.opcode & 0x01) + { + /* READ operation */ + spi_eeprom.buffer = sram.sram[spi_eeprom.addr]; + spi_eeprom.state = READ_BYTE; + } + else + { + /* WRITE operation */ + spi_eeprom.buffer = 0; + spi_eeprom.state = WRITE_BYTE; + } + } + else + { + /* shift address value */ + spi_eeprom.addr = spi_eeprom.addr << 1; + } + } + break; + } + + case WRITE_BYTE: + { + /* latch data on CLK positive edge */ + if ((data & (1 << BIT_CLK)) && !spi_eeprom.clk) + { + /* 8-bit data buffer */ + spi_eeprom.buffer |= ((data >> BIT_DATA) & 1); + spi_eeprom.cycles++; + + /* last bit ? */ + if (spi_eeprom.cycles == 8) + { + /* reset cycles count */ + spi_eeprom.cycles = 0; + + /* write data to destination */ + if (spi_eeprom.opcode & 0x01) + { + /* update status register */ + spi_eeprom.status = (spi_eeprom.status & 0x02) | (spi_eeprom.buffer & 0x0c); + + /* wait for operation end */ + spi_eeprom.state = STANDBY; + } + else + { + /* Memory Array (write-protected) */ + if (spi_eeprom.status & 2) + { + /* check array protection bits (BP0, BP1) */ + switch ((spi_eeprom.status >> 2) & 0x03) + { + case 0x01: + { + /* $C000-$FFFF (sector #3) is protected */ + if (spi_eeprom.addr < 0xC000) + { + sram.sram[spi_eeprom.addr] = spi_eeprom.buffer; + } + break; + } + + case 0x02: + { + /* $8000-$FFFF (sectors #2 and #3) is protected */ + if (spi_eeprom.addr < 0x8000) + { + sram.sram[spi_eeprom.addr] = spi_eeprom.buffer; + } + break; + } + + case 0x03: + { + /* $0000-$FFFF (all sectors) is protected */ + break; + } + + default: + { + /* no sectors protected */ + sram.sram[spi_eeprom.addr] = spi_eeprom.buffer; + break; + } + } + } + + /* reset data buffer */ + spi_eeprom.buffer = 0; + + /* increase array address (sequential writes are limited within the same page) */ + spi_eeprom.addr = (spi_eeprom.addr & ~PAGE_MASK) | ((spi_eeprom.addr + 1) & PAGE_MASK); + } + } + else + { + /* shift data buffer value */ + spi_eeprom.buffer = spi_eeprom.buffer << 1; + } + } + break; + } + + case READ_BYTE: + { + /* output data on CLK positive edge */ + if ((data & (1 << BIT_CLK)) && !spi_eeprom.clk) + { + /* read out bits */ + spi_eeprom.out = (spi_eeprom.buffer >> (7 - spi_eeprom.cycles)) & 1; + spi_eeprom.cycles++; + + /* last bit ? */ + if (spi_eeprom.cycles == 8) + { + /* reset cycles count */ + spi_eeprom.cycles = 0; + + /* read from memory array ? */ + if (spi_eeprom.opcode == 0x03) + { + /* read next array byte */ + spi_eeprom.addr = (spi_eeprom.addr + 1) & SIZE_MASK; + spi_eeprom.buffer = sram.sram[spi_eeprom.addr]; + } + } + } + break; + } + + default: + { + /* wait for !CS low->high transition */ + break; + } + } + } + } + + /* update input lines */ + spi_eeprom.cs = (data >> BIT_CS) & 1; + spi_eeprom.clk = (data >> BIT_CLK) & 1; +} + +unsigned int eeprom_spi_read(unsigned int address) +{ + return (spi_eeprom.out << BIT_DATA); +} + diff --git a/pico/carthw/eeprom_spi.h b/pico/carthw/eeprom_spi.h new file mode 100644 index 00000000..1001e6e8 --- /dev/null +++ b/pico/carthw/eeprom_spi.h @@ -0,0 +1,47 @@ +/**************************************************************************** + * Genesis Plus + * SPI Serial EEPROM (25XX512 only) support + * + * Copyright (C) 2012 Eke-Eke (Genesis Plus GX) + * + * Redistribution and use of this code or any derivative works are permitted + * provided that the following conditions are met: + * + * - Redistributions may not be sold, nor may they be used in a commercial + * product or activity. + * + * - Redistributions that are modified from the original source must include the + * complete source code, including the source code for all components used by a + * binary built from the modified sources. However, as a special exception, the + * source code distributed need not include anything that is normally distributed + * (in either source or binary form) with the major components (compiler, kernel, + * and so on) of the operating system on which the executable runs, unless that + * component itself accompanies the executable. + * + * - Redistributions must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other + * materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + ****************************************************************************************/ + +#ifndef _EEPROM_SPI_H_ +#define _EEPROM_SPI_H_ + +/* Function prototypes */ +extern void eeprom_spi_init(); +extern void eeprom_spi_write(unsigned char data); +extern unsigned int eeprom_spi_read(unsigned int address); + +#endif From 6a47c2d4fbd413d86c7f6a0cac8e4a4a163f3713 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 22 Sep 2017 02:00:50 +0300 Subject: [PATCH 0067/1110] integrate SPI EEPROM --- pico/carthw/carthw.c | 21 +++++++++++++++++++-- pico/carthw/eeprom_spi.c | 22 ++++++++++++---------- pico/carthw/eeprom_spi.h | 2 +- platform/common/common.mak | 1 + 4 files changed, 33 insertions(+), 13 deletions(-) diff --git a/pico/carthw/carthw.c b/pico/carthw/carthw.c index 06936fbf..0d6b9c5c 100644 --- a/pico/carthw/carthw.c +++ b/pico/carthw/carthw.c @@ -8,6 +8,7 @@ #include "../pico_int.h" #include "../memory.h" +#include "eeprom_spi.h" /* The SSFII mapper */ @@ -270,6 +271,7 @@ static carthw_state_chunk carthw_pier_state[] = { { CHUNK_CARTHW, sizeof(pier_regs), pier_regs }, { CHUNK_CARTHW + 1, sizeof(pier_dump_prot), &pier_dump_prot }, + { CHUNK_CARTHW + 2, 0, NULL }, // filled later { 0, 0, NULL } }; @@ -308,7 +310,8 @@ static void carthw_pier_write8(u32 a, u32 d) base = d << 19; goto do_map; case 0x09: - // TODO + SRam.changed = 1; + eeprom_spi_write(d); break; case 0x0b: // eeprom read @@ -346,7 +349,7 @@ static u32 carthw_pier_read8(u32 a) return PicoRead8_io(a); if (a == 0xa1300b) - return 0; // TODO + return eeprom_spi_read(a); elprintf(EL_UIO, "pier r8 [%06x] @%06x", a, SekPc); return 0; @@ -415,10 +418,13 @@ static void carthw_pier_reset(void) pier_regs[1] = pier_regs[2] = pier_regs[3] = 0; pier_dump_prot = 3; carthw_pier_statef(); + eeprom_spi_init(NULL); } void carthw_pier_startup(void) { + void *eeprom_state; + int eeprom_size = 0; int i; elprintf(EL_STATUS, "Pier Solar mapper startup"); @@ -434,6 +440,16 @@ void carthw_pier_startup(void) for (i = 0; i < M68K_BANK_SIZE; i += 0x8000) memcpy(Pico.rom + Pico.romsize + i, Pico.rom, 0x8000); + // save EEPROM + eeprom_state = eeprom_spi_init(&eeprom_size); + SRam.flags = 0; + SRam.size = 0x10000; + SRam.data = calloc(1, SRam.size); + if (!SRam.data) + SRam.size = 0; + carthw_pier_state[2].ptr = eeprom_state; + carthw_pier_state[2].size = eeprom_size; + PicoCartMemSetup = carthw_pier_mem_setup; PicoResetHook = carthw_pier_reset; PicoLoadStateHook = carthw_pier_statef; @@ -679,3 +695,4 @@ void carthw_prot_lk3_startup(void) PicoCartMemSetup = carthw_prot_lk3_mem_setup; } +// vim:ts=2:sw=2:expandtab diff --git a/pico/carthw/eeprom_spi.c b/pico/carthw/eeprom_spi.c index 921eb6ab..9643067f 100644 --- a/pico/carthw/eeprom_spi.c +++ b/pico/carthw/eeprom_spi.c @@ -36,7 +36,9 @@ * ****************************************************************************************/ -#include "shared.h" +#include "../pico_int.h" +#include "../cd/genplus_macros.h" +#include "eeprom_spi.h" /* max supported size 64KB (25x512/95x512) */ #define SIZE_MASK 0xffff @@ -72,16 +74,16 @@ typedef struct static T_EEPROM_SPI spi_eeprom; -void eeprom_spi_init() +void *eeprom_spi_init(int *size) { /* reset eeprom state */ memset(&spi_eeprom, 0, sizeof(T_EEPROM_SPI)); spi_eeprom.out = 1; spi_eeprom.state = GET_OPCODE; - /* enable backup RAM */ - sram.custom = 2; - sram.on = 1; + if (size) + *size = sizeof(T_EEPROM_SPI); + return &spi_eeprom; } void eeprom_spi_write(unsigned char data) @@ -208,7 +210,7 @@ void eeprom_spi_write(unsigned char data) if (spi_eeprom.opcode & 0x01) { /* READ operation */ - spi_eeprom.buffer = sram.sram[spi_eeprom.addr]; + spi_eeprom.buffer = SRam.data[spi_eeprom.addr]; spi_eeprom.state = READ_BYTE; } else @@ -264,7 +266,7 @@ void eeprom_spi_write(unsigned char data) /* $C000-$FFFF (sector #3) is protected */ if (spi_eeprom.addr < 0xC000) { - sram.sram[spi_eeprom.addr] = spi_eeprom.buffer; + SRam.data[spi_eeprom.addr] = spi_eeprom.buffer; } break; } @@ -274,7 +276,7 @@ void eeprom_spi_write(unsigned char data) /* $8000-$FFFF (sectors #2 and #3) is protected */ if (spi_eeprom.addr < 0x8000) { - sram.sram[spi_eeprom.addr] = spi_eeprom.buffer; + SRam.data[spi_eeprom.addr] = spi_eeprom.buffer; } break; } @@ -288,7 +290,7 @@ void eeprom_spi_write(unsigned char data) default: { /* no sectors protected */ - sram.sram[spi_eeprom.addr] = spi_eeprom.buffer; + SRam.data[spi_eeprom.addr] = spi_eeprom.buffer; break; } } @@ -330,7 +332,7 @@ void eeprom_spi_write(unsigned char data) { /* read next array byte */ spi_eeprom.addr = (spi_eeprom.addr + 1) & SIZE_MASK; - spi_eeprom.buffer = sram.sram[spi_eeprom.addr]; + spi_eeprom.buffer = SRam.data[spi_eeprom.addr]; } } } diff --git a/pico/carthw/eeprom_spi.h b/pico/carthw/eeprom_spi.h index 1001e6e8..2d60e0f4 100644 --- a/pico/carthw/eeprom_spi.h +++ b/pico/carthw/eeprom_spi.h @@ -40,7 +40,7 @@ #define _EEPROM_SPI_H_ /* Function prototypes */ -extern void eeprom_spi_init(); +extern void *eeprom_spi_init(int *size); extern void eeprom_spi_write(unsigned char data); extern unsigned int eeprom_spi_read(unsigned int address); diff --git a/platform/common/common.mak b/platform/common/common.mak index f89d7920..32b7a649 100644 --- a/platform/common/common.mak +++ b/platform/common/common.mak @@ -102,6 +102,7 @@ endif SRCS_COMMON += $(R)pico/pico/pico.c $(R)pico/pico/memory.c $(R)pico/pico/xpcm.c # carthw SRCS_COMMON += $(R)pico/carthw/carthw.c +SRCS_COMMON += $(R)pico/carthw/eeprom_spi.c # SVP SRCS_COMMON += $(R)pico/carthw/svp/svp.c $(R)pico/carthw/svp/memory.c \ $(R)pico/carthw/svp/ssp16.c From 7feeb8806243f9f0bc6c3d6e957b97d12cecb7df Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 22 Sep 2017 02:16:15 +0300 Subject: [PATCH 0068/1110] make dma honour banking I was sure I had it done before, turns out not. --- pico/memory.h | 11 +++++++++++ pico/videoport.c | 5 +++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/pico/memory.h b/pico/memory.h index afca0826..4260a359 100644 --- a/pico/memory.h +++ b/pico/memory.h @@ -133,6 +133,17 @@ void name(u32 a, u32 d) \ } \ } +static __inline void *m68k_dma_source(u32 a) +{ + uptr v; + a &= 0x00fffffe; + v = m68k_read16_map[a >> M68K_MEM_SHIFT]; + if (map_flag_set(v)) + return NULL; + else + return (void *)((v << 1) + a); +} + // 32x typedef struct { uptr addr; // stores (membase >> 1) or ((handler >> 1) | (1<<31)) diff --git a/pico/videoport.c b/pico/videoport.c index 9ddc1679..35ece480 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -8,6 +8,7 @@ */ #include "pico_int.h" +#include "memory.h" int line_base_cycles; extern const unsigned char hcounts_32[]; @@ -134,10 +135,10 @@ static void DmaSlow(int len) // if we have DmaHook, let it handle ROM because of possible DMA delay if (PicoDmaHook && PicoDmaHook(source, len, &pd, &pdend)); else if (source%04x: invalid src", Pico.video.type, source, a); return; } From 0c7d1ba332b26f4ac67199e8ecbb826651f8512a Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 24 Sep 2017 01:27:31 +0300 Subject: [PATCH 0069/1110] some dma improvements no idea if anything needs this, shouldn't hurt either --- pico/carthw/svp/svp.c | 17 ++- pico/memory.h | 16 ++- pico/pico.c | 3 +- pico/pico_int.h | 2 +- pico/videoport.c | 266 +++++++++++++++++++++--------------------- 5 files changed, 156 insertions(+), 148 deletions(-) diff --git a/pico/carthw/svp/svp.c b/pico/carthw/svp/svp.c index 1bccb3f6..8861de04 100644 --- a/pico/carthw/svp/svp.c +++ b/pico/carthw/svp/svp.c @@ -91,23 +91,20 @@ static void PicoSVPLine(void) } -static int PicoSVPDma(unsigned int source, int len, unsigned short **srcp, unsigned short **limitp) +static int PicoSVPDma(unsigned int source, int len, unsigned short **base, unsigned int *mask) { if (source < Pico.romsize) // Rom { - source -= 2; - *srcp = (unsigned short *)(Pico.rom + (source&~1)); - *limitp = (unsigned short *)(Pico.rom + Pico.romsize); - return 1; + *base = (unsigned short *)(Pico.rom + (source & 0xfe0000)); + *mask = 0x1ffff; + return source - 2; } else if ((source & 0xfe0000) == 0x300000) { elprintf(EL_VDPDMA|EL_SVP, "SVP DmaSlow from %06x, len=%i", source, len); - source &= 0x1fffe; - source -= 2; - *srcp = (unsigned short *)(svp->dram + source); - *limitp = (unsigned short *)(svp->dram + sizeof(svp->dram)); - return 1; + *base = (unsigned short *)svp->dram; + *mask = 0x1ffff; + return source - 2; } else elprintf(EL_VDPDMA|EL_SVP|EL_ANOMALY, "SVP FIXME unhandled DmaSlow from %06x, len=%i", source, len); diff --git a/pico/memory.h b/pico/memory.h index 4260a359..ae7ae50d 100644 --- a/pico/memory.h +++ b/pico/memory.h @@ -133,17 +133,25 @@ void name(u32 a, u32 d) \ } \ } +#ifdef NEED_DMA_SOURCE // meh + static __inline void *m68k_dma_source(u32 a) { + u8 *base; uptr v; - a &= 0x00fffffe; v = m68k_read16_map[a >> M68K_MEM_SHIFT]; - if (map_flag_set(v)) - return NULL; + if (map_flag_set(v)) { + if (a >= Pico.romsize) // Rom + return NULL; + base = Pico.rom; + } else - return (void *)((v << 1) + a); + base = (void *)(v << 1); + return base + (a & 0xfe0000); } +#endif + // 32x typedef struct { uptr addr; // stores (membase >> 1) or ((handler >> 1) | (1<<31)) diff --git a/pico/pico.c b/pico/pico.c index 5951b595..23f7efb2 100644 --- a/pico/pico.c +++ b/pico/pico.c @@ -276,7 +276,8 @@ PICO_INTERNAL int CheckDMA(void) Pico.m.dma_xfers -= xfers_can; } - elprintf(EL_VDPDMA, "~Dma %i op=%i can=%i burn=%i [%i]", Pico.m.dma_xfers, dma_op1, xfers_can, burn, SekCyclesDone()); + elprintf(EL_VDPDMA, "~Dma %i op=%i can=%i burn=%i [%u]", + Pico.m.dma_xfers, dma_op1, xfers_can, burn, SekCyclesDone()); //dprintf("~aim: %i, cnt: %i", SekCycleAim, SekCycleCnt); return burn; } diff --git a/pico/pico_int.h b/pico/pico_int.h index 5d7bfbc3..794fcd1f 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -782,7 +782,7 @@ extern int line_base_cycles; PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d); PICO_INTERNAL_ASM unsigned int PicoVideoRead(unsigned int a); PICO_INTERNAL_ASM unsigned int PicoVideoRead8(unsigned int a); -extern int (*PicoDmaHook)(unsigned int source, int len, unsigned short **srcp, unsigned short **limitp); +extern int (*PicoDmaHook)(unsigned int source, int len, unsigned short **base, unsigned int *mask); // misc.c PICO_INTERNAL_ASM void memcpy16(unsigned short *dest, unsigned short *src, int count); diff --git a/pico/videoport.c b/pico/videoport.c index 35ece480..3f26d581 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -8,6 +8,7 @@ */ #include "pico_int.h" +#define NEED_DMA_SOURCE #include "memory.h" int line_base_cycles; @@ -21,7 +22,7 @@ typedef unsigned int u32; #define UTYPES_DEFINED #endif -int (*PicoDmaHook)(unsigned int source, int len, unsigned short **srcp, unsigned short **limitp) = NULL; +int (*PicoDmaHook)(unsigned int source, int len, unsigned short **base, unsigned int *mask) = NULL; static __inline void AutoIncrement(void) { @@ -73,104 +74,92 @@ static int GetDmaLength(void) // 16-bit words to transfer: len =pvid->reg[0x13]; len|=pvid->reg[0x14]<<8; - // Charles MacDonald: - if(!len) len = 0xffff; + len = ((len - 1) & 0xffff) + 1; return len; } -static void DmaSlow(int len) +static void DmaSlow(int len, unsigned int source) { - u16 *pd=0, *pdend, *r; - unsigned int a=Pico.video.addr, a2, d; - unsigned char inc=Pico.video.reg[0xf]; - unsigned int source; + u32 inc = Pico.video.reg[0xf]; + u32 a = Pico.video.addr; + u16 *r, *base = NULL; + u32 mask = 0x1ffff; - source =Pico.video.reg[0x15]<<1; - source|=Pico.video.reg[0x16]<<9; - source|=Pico.video.reg[0x17]<<17; - - elprintf(EL_VDPDMA, "DmaSlow[%i] %06x->%04x len %i inc=%i blank %i [%i] @ %06x", + elprintf(EL_VDPDMA, "DmaSlow[%i] %06x->%04x len %i inc=%i blank %i [%u] @ %06x", Pico.video.type, source, a, len, inc, (Pico.video.status&8)||!(Pico.video.reg[1]&0x40), SekCyclesDone(), SekPc); Pico.m.dma_xfers += len; + if (Pico.m.dma_xfers < len) // lame 16bit var + Pico.m.dma_xfers = ~0; SekCyclesBurnRun(CheckDMA()); - if ((source&0xe00000)==0xe00000) { // Ram - pd=(u16 *)(Pico.ram+(source&0xfffe)); - pdend=(u16 *)(Pico.ram+0x10000); + if ((source & 0xe00000) == 0xe00000) { // Ram + base = (u16 *)Pico.ram; + mask = 0xffff; } else if (PicoAHW & PAHW_MCD) { - elprintf(EL_VDPDMA, "DmaSlow CD, r3=%02x", Pico_mcd->s68k_regs[3]); - if(source<0x20000) { // Bios area - pd=(u16 *)(Pico_mcd->bios+(source&~1)); - pdend=(u16 *)(Pico_mcd->bios+0x20000); - } else if ((source&0xfc0000)==0x200000) { // Word Ram - source -= 2; - if (!(Pico_mcd->s68k_regs[3]&4)) { // 2M mode - pd=(u16 *)(Pico_mcd->word_ram2M+(source&0x3fffe)); - pdend=(u16 *)(Pico_mcd->word_ram2M+0x40000); + u8 r3 = Pico_mcd->s68k_regs[3]; + elprintf(EL_VDPDMA, "DmaSlow CD, r3=%02x", r3); + if (source < 0x20000) { // Bios area + base = (u16 *)Pico_mcd->bios; + } else if ((source & 0xfc0000) == 0x200000) { // Word Ram + if (!(r3 & 4)) { // 2M mode + base = (u16 *)(Pico_mcd->word_ram2M + (source & 0x20000)); } else { if (source < 0x220000) { // 1M mode - int bank = Pico_mcd->s68k_regs[3]&1; - pd=(u16 *)(Pico_mcd->word_ram1M[bank]+(source&0x1fffe)); - pdend=(u16 *)(Pico_mcd->word_ram1M[bank]+0x20000); + int bank = r3 & 1; + base = (u16 *)(Pico_mcd->word_ram1M[bank]); } else { - DmaSlowCell(source, a, len, inc); + DmaSlowCell(source - 2, a, len, inc); return; } } - } else if ((source&0xfe0000)==0x020000) { // Prg Ram - u8 *prg_ram = Pico_mcd->prg_ram_b[Pico_mcd->s68k_regs[3]>>6]; - pd=(u16 *)(prg_ram+(source&0x1fffe)); - pdend=(u16 *)(prg_ram+0x20000); - } else { - elprintf(EL_VDPDMA|EL_ANOMALY, "DmaSlow[%i] %06x->%04x: FIXME: unsupported src", Pico.video.type, source, a); - return; + source -= 2; + } else if ((source & 0xfe0000) == 0x020000) { // Prg Ram + base = (u16 *)Pico_mcd->prg_ram_b[r3 >> 6]; + source -= 2; // XXX: test } } else { // if we have DmaHook, let it handle ROM because of possible DMA delay - if (PicoDmaHook && PicoDmaHook(source, len, &pd, &pdend)); - else if (source%04x: invalid src", Pico.video.type, source, a); - return; - } + u32 source2; + if (PicoDmaHook && (source2 = PicoDmaHook(source, len, &base, &mask))) + source = source2; + else // Rom + base = m68k_dma_source(source); + } + if (!base) { + elprintf(EL_VDPDMA|EL_ANOMALY, "DmaSlow[%i] %06x->%04x: invalid src", Pico.video.type, source, a); + return; } - // overflow protection, might break something.. - if (len > pdend - pd) { - len = pdend - pd; - elprintf(EL_VDPDMA|EL_ANOMALY, "DmaSlow overflow"); - } + // operate in words + source >>= 1; + mask >>= 1; switch (Pico.video.type) { case 1: // vram r = Pico.vram; - if (inc == 2 && !(a&1) && a+len*2 < 0x10000) + if (inc == 2 && !(a & 1) && a + len * 2 < 0x10000 + && !(((source + len - 1) ^ source) & ~mask)) { // most used DMA mode - memcpy16(r + (a>>1), pd, len); - a += len*2; + memcpy((char *)r + a, base + (source & mask), len * 2); + a += len * 2; } else { for(; len; len--) { - d=*pd++; - if(a&1) d=(d<<8)|(d>>8); - r[a>>1] = (u16)d; // will drop the upper bits + u16 d = base[source++ & mask]; + if(a & 1) d=(d<<8)|(d>>8); + r[a >> 1] = d; // AutoIncrement - a=(u16)(a+inc); - // didn't src overlap? - //if(pd >= pdend) pd-=0x8000; // should be good for RAM, bad for ROM + a = (u16)(a + inc); } } Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; @@ -179,32 +168,22 @@ static void DmaSlow(int len) case 3: // cram Pico.m.dirtyPal = 1; r = Pico.cram; - for(a2=a&0x7f; len; len--) + for (; len; len--) { - r[a2>>1] = (u16)*pd++; // bit 0 is ignored + r[(a / 2) & 0x3f] = base[source++ & mask]; // AutoIncrement - a2+=inc; - // didn't src overlap? - //if(pd >= pdend) pd-=0x8000; - // good dest? - if(a2 >= 0x80) break; // Todds Adventures in Slime World / Andre Agassi tennis + a += inc; } - a=(a&0xff00)|a2; break; - case 5: // vsram[a&0x003f]=d; + case 5: // vsram r = Pico.vsram; - for(a2=a&0x7f; len; len--) + for (; len; len--) { - r[a2>>1] = (u16)*pd++; + r[(a / 2) & 0x3f] = base[source++ & mask]; // AutoIncrement - a2+=inc; - // didn't src overlap? - //if(pd >= pdend) pd-=0x8000; - // good dest? - if(a2 >= 0x80) break; + a += inc; } - a=(a&0xff00)|a2; break; default: @@ -220,23 +199,21 @@ static void DmaCopy(int len) { u16 a=Pico.video.addr; unsigned char *vr = (unsigned char *) Pico.vram; - unsigned char *vrs; unsigned char inc=Pico.video.reg[0xf]; int source; elprintf(EL_VDPDMA, "DmaCopy len %i [%i]", len, SekCyclesDone()); Pico.m.dma_xfers += len; + if (Pico.m.dma_xfers < len) + Pico.m.dma_xfers = ~0; Pico.video.status |= 2; // dma busy source =Pico.video.reg[0x15]; source|=Pico.video.reg[0x16]<<8; - vrs=vr+source; - - if (source+len > 0x10000) len=0x10000-source; // clip?? for (; len; len--) { - vr[a] = *vrs++; + vr[a] = vr[source++ & 0xffff]; // AutoIncrement a=(u16)(a+inc); } @@ -245,57 +222,85 @@ static void DmaCopy(int len) Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; } -// check: Contra, Megaman -// note: this is still inaccurate -static void DmaFill(int data) +static NOINLINE void DmaFill(int data) { - int len; unsigned short a=Pico.video.addr; unsigned char *vr=(unsigned char *) Pico.vram; unsigned char high = (unsigned char) (data >> 8); unsigned char inc=Pico.video.reg[0xf]; + int source; + int len, l; - len=GetDmaLength(); + len = GetDmaLength(); elprintf(EL_VDPDMA, "DmaFill len %i inc %i [%i]", len, inc, SekCyclesDone()); Pico.m.dma_xfers += len; + if (Pico.m.dma_xfers < len) // lame 16bit var + Pico.m.dma_xfers = ~0; Pico.video.status |= 2; // dma busy - // from Charles MacDonald's genvdp.txt: - // Write lower byte to address specified - vr[a] = (unsigned char) data; - a=(u16)(a+inc); + switch (Pico.video.type) + { + case 1: // vram + for (l = len; l; l--) { + // Write upper byte to adjacent address + // (here we are byteswapped, so address is already 'adjacent') + vr[a] = high; - if (!inc) len=1; - - for (; len; len--) { - // Write upper byte to adjacent address - // (here we are byteswapped, so address is already 'adjacent') - vr[a] = high; - - // Increment address register - a=(u16)(a+inc); + // Increment address register + a = (u16)(a + inc); + } + break; + case 3: // cram + case 5: { // vsram + // TODO: needs fifo; anyone using these? + static int once; + if (!once++) + elprintf(EL_STATUS|EL_ANOMALY|EL_VDPDMA, "TODO: cram/vsram fill"); + } + default: + a += len * inc; + break; } + // remember addr - Pico.video.addr=a; - // update length - Pico.video.reg[0x13] = Pico.video.reg[0x14] = 0; // Dino Dini's Soccer (E) (by Haze) + Pico.video.addr = a; + // register update + Pico.video.reg[0x13] = Pico.video.reg[0x14] = 0; + source = Pico.video.reg[0x15]; + source |= Pico.video.reg[0x16] << 8; + source += len; + Pico.video.reg[0x15] = source; + Pico.video.reg[0x16] = source >> 8; Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; } -static void CommandDma(void) +static NOINLINE void CommandDma(void) { struct PicoVideo *pvid=&Pico.video; - int len=0,method=0; + u32 len, method; + u32 source; if ((pvid->reg[1]&0x10)==0) return; // DMA not enabled - len=GetDmaLength(); + len = GetDmaLength(); + source =Pico.video.reg[0x15]; + source|=Pico.video.reg[0x16] << 8; + source|=Pico.video.reg[0x17] << 16; method=pvid->reg[0x17]>>6; - if (method< 2) DmaSlow(len); // 68000 to VDP - if (method==3) DmaCopy(len); // VRAM Copy + if (method < 2) + DmaSlow(len, source << 1); // 68000 to VDP + else if (method == 3) + DmaCopy(len); // VRAM Copy + else + return; + + source += len; + Pico.video.reg[0x13] = Pico.video.reg[0x14] = 0; + Pico.video.reg[0x15] = source; + Pico.video.reg[0x16] = source >> 8; } static void CommandChange(void) @@ -312,9 +317,6 @@ static void CommandChange(void) addr =(cmd>>16)&0x3fff; addr|=(cmd<<14)&0xc000; pvid->addr=(unsigned short)addr; - - // Check for dma: - if (cmd&0x80) CommandDma(); } static void DrawSync(int blank_on) @@ -348,27 +350,23 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) pvid->pending=0; } - // If a DMA fill has been set up, do it - if ((pvid->command&0x80) && (pvid->reg[1]&0x10) && (pvid->reg[0x17]>>6)==2) + // preliminary FIFO emulation for Chaos Engine, The (E) + if (!(pvid->status&8) && (pvid->reg[1]&0x40) && !(PicoOpt&POPT_DIS_VDP_FIFO)) // active display? { - DmaFill(d); - } - else - { - // preliminary FIFO emulation for Chaos Engine, The (E) - if (!(pvid->status&8) && (pvid->reg[1]&0x40) && !(PicoOpt&POPT_DIS_VDP_FIFO)) // active display? - { - pvid->status&=~0x200; // FIFO no longer empty - pvid->lwrite_cnt++; - if (pvid->lwrite_cnt >= 4) pvid->status|=0x100; // FIFO full - if (pvid->lwrite_cnt > 4) { - SekCyclesBurnRun(32); // penalty // 488/12-8 - } - elprintf(EL_ASVDP, "VDP data write: %04x [%06x] {%i} #%i @ %06x", d, Pico.video.addr, - Pico.video.type, pvid->lwrite_cnt, SekPc); + pvid->status&=~0x200; // FIFO no longer empty + pvid->lwrite_cnt++; + if (pvid->lwrite_cnt >= 4) pvid->status|=0x100; // FIFO full + if (pvid->lwrite_cnt > 4) { + SekCyclesBurnRun(32); // penalty // 488/12-8 } - VideoWrite(d); + elprintf(EL_ASVDP, "VDP data write: %04x [%06x] {%i} #%i @ %06x", d, Pico.video.addr, + Pico.video.type, pvid->lwrite_cnt, SekPc); } + VideoWrite(d); + + if ((pvid->command&0x80) && (pvid->reg[1]&0x10) && (pvid->reg[0x17]>>6)==2) + DmaFill(d); + return; } @@ -376,12 +374,16 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) { if (pvid->pending) { - if (d & 0x80) DrawSync(0); // only need sync for DMA // Low word of command: - pvid->command&=0xffff0000; - pvid->command|=d; - pvid->pending=0; + pvid->command &= 0xffff0000; + pvid->command |= d; + pvid->pending = 0; CommandChange(); + // Check for dma: + if (d & 0x80) { + DrawSync(0); + CommandDma(); + } } else { From 6d8782a1f9d9d9d51b8e5f5426895576d26fc1be Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 25 Sep 2017 03:00:20 +0300 Subject: [PATCH 0070/1110] don't miss the sprite check DrawAllSprites depends on HighLnSpr, so prepare in DrawAllSprites was a bad idea as lots of things may have changed --- pico/draw.c | 19 +++++++++---------- pico/draw_arm.S | 11 ----------- 2 files changed, 9 insertions(+), 21 deletions(-) diff --git a/pico/draw.c b/pico/draw.c index 9ad32dad..7b4dd99b 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -954,7 +954,7 @@ static void DrawSpritesHiAS(unsigned char *sprited, int sh) // Index + 0 : hhhhvvvv ----hhvv yyyyyyyy yyyyyyyy // v, h: vert./horiz. size // Index + 4 : xxxxxxxx xxxxxxxx pccvhnnn nnnnnnnn // x: x coord + 8 -void PrepareSprites(int full) +static NOINLINE void PrepareSprites(int full) { const struct PicoVideo *pvid=&Pico.video; const struct PicoEState *est=&Pico.est; @@ -1117,16 +1117,9 @@ found:; static void DrawAllSprites(unsigned char *sprited, int prio, int sh, struct PicoEState *est) { - int rs = est->rendstatus; unsigned char *p; int cnt; - if (rs & (PDRAW_SPRITES_MOVED|PDRAW_DIRTY_SPRITES)) { - //elprintf(EL_STATUS, "PrepareSprites(%i)", (rs>>4)&1); - PrepareSprites(rs & PDRAW_DIRTY_SPRITES); - est->rendstatus = rs & ~(PDRAW_SPRITES_MOVED|PDRAW_DIRTY_SPRITES); - } - cnt = sprited[0] & 0x7f; if (cnt == 0) return; @@ -1288,6 +1281,12 @@ static int DrawDisplay(int sh) int win=0,edge=0,hvwind=0; int maxw,maxcells; + if (est->rendstatus & (PDRAW_SPRITES_MOVED|PDRAW_DIRTY_SPRITES)) { + // elprintf(EL_STATUS, "PrepareSprites(%i)", (est->rendstatus>>4)&1); + PrepareSprites(est->rendstatus & PDRAW_DIRTY_SPRITES); + est->rendstatus &= ~(PDRAW_SPRITES_MOVED|PDRAW_DIRTY_SPRITES); + } + est->rendstatus &= ~(PDRAW_SHHI_DONE|PDRAW_PLANE_HI_PRIO); if (pvid->reg[12]&1) { @@ -1331,7 +1330,7 @@ static int DrawDisplay(int sh) DrawLayer(0|(sh<<1), HighCacheA, 0, maxcells, est); /* - sprites low - */ if (!(PicoDrawMask & PDRAW_SPRITES_LOW_ON)); - else if (Pico.est.rendstatus & PDRAW_INTERLACE) + else if (est->rendstatus & PDRAW_INTERLACE) DrawAllSpritesInterlace(0, sh); else if (sprited[1] & SPRL_HAVE_LO) DrawAllSprites(sprited, 0, sh, est); @@ -1352,7 +1351,7 @@ static int DrawDisplay(int sh) DrawTilesFromCache(HighCacheA, sh, maxw, est); /* - sprites hi - */ if (!(PicoDrawMask & PDRAW_SPRITES_HI_ON)); - else if (Pico.est.rendstatus & PDRAW_INTERLACE) + else if (est->rendstatus & PDRAW_INTERLACE) DrawAllSpritesInterlace(1, sh); // have sprites without layer pri bit ontop of sprites with that bit else if ((sprited[1] & 0xd0) == 0xd0 && (PicoOpt & POPT_ACC_SPRITES)) diff --git a/pico/draw_arm.S b/pico/draw_arm.S index 71db183c..023b0a2f 100644 --- a/pico/draw_arm.S +++ b/pico/draw_arm.S @@ -1101,17 +1101,6 @@ DrawSpriteSHi: DrawAllSprites: orr r1, r2, r1, lsl #1 - ldr r12,[r3, #OFS_rendstatus] - tst r12,#(PDRAW_DIRTY_SPRITES|PDRAW_SPRITES_MOVED) - beq das_no_prep - stmfd sp!, {r0,r1,r3,lr} - and r0, r12,#PDRAW_DIRTY_SPRITES - bic r12,r12,#(PDRAW_DIRTY_SPRITES|PDRAW_SPRITES_MOVED) - str r12,[r3, #OFS_rendstatus] - bl PrepareSprites - ldmfd sp!, {r0,r1,r3,lr} - -das_no_prep: ldr r2, [r0] ands r2, r2, #0x7f bxeq lr From c6b118c0ad6b51e36855601de0913437833ba63b Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 25 Sep 2017 23:34:34 +0300 Subject: [PATCH 0071/1110] fame: always update the fetch map should just get rid of that thing, it's annoying --- pico/32x/memory.c | 6 ------ pico/cd/memory.c | 17 ----------------- pico/memory.c | 28 ++++++++++++++++++++++++---- 3 files changed, 24 insertions(+), 27 deletions(-) diff --git a/pico/32x/memory.c b/pico/32x/memory.c index 8b9254f0..2f3499ce 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -1179,12 +1179,6 @@ static void bank_switch(int b) cpu68k_map_set(m68k_read16_map, 0x900000, 0x900000 + rs - 1, Pico.rom + bank, 0); elprintf(EL_32X, "bank %06x-%06x -> %06x", 0x900000, 0x900000 + rs - 1, bank); - -#ifdef EMU_F68K - // setup FAME fetchmap - for (rs = 0x90; rs < 0xa0; rs++) - PicoCpuFM68k.Fetch[rs] = (unsigned long)Pico.rom + bank - 0x900000; -#endif } // ----------------------------------------------------------------- diff --git a/pico/cd/memory.c b/pico/cd/memory.c index e660e35b..fbc7c476 100644 --- a/pico/cd/memory.c +++ b/pico/cd/memory.c @@ -1071,23 +1071,6 @@ static void remap_word_ram(u32 r3) cpu68k_map_set(s68k_write8_map, 0x080000, 0x0bffff, s68k_dec_write8[b0 ^ 1][m], 1); cpu68k_map_set(s68k_write16_map, 0x080000, 0x0bffff, s68k_dec_write16[b0 ^ 1][m], 1); } - -#ifdef EMU_F68K - // update fetchmap.. - int i; - if (!(r3 & 4)) - { - for (i = M68K_FETCHBANK1*2/16; (i<<(24-FAMEC_FETCHBITS)) < 0x240000; i++) - PicoCpuFM68k.Fetch[i] = (unsigned long)Pico_mcd->word_ram2M - 0x200000; - } - else - { - for (i = M68K_FETCHBANK1*2/16; (i<<(24-FAMEC_FETCHBITS)) < 0x220000; i++) - PicoCpuFM68k.Fetch[i] = (unsigned long)Pico_mcd->word_ram1M[r3 & 1] - 0x200000; - for (i = M68K_FETCHBANK1*0x0c/0x100; (i<<(24-FAMEC_FETCHBITS)) < 0x0e0000; i++) - PicoCpuFS68k.Fetch[i] = (unsigned long)Pico_mcd->word_ram1M[(r3&1)^1] - 0x0c0000; - } -#endif } void pcd_state_loaded_mem(void) diff --git a/pico/memory.c b/pico/memory.c index 5be66f5c..83fe1819 100644 --- a/pico/memory.c +++ b/pico/memory.c @@ -63,6 +63,17 @@ void cpu68k_map_set(uptr *map, int start_addr, int end_addr, const void *func_or_mh, int is_func) { xmap_set(map, M68K_MEM_SHIFT, start_addr, end_addr, func_or_mh, is_func); +#ifdef EMU_F68K + // setup FAME fetchmap + if (!is_func) + { + int shiftout = 24 - FAMEC_FETCHBITS; + int i = start_addr >> shiftout; + uptr base = (uptr)func_or_mh - (i << shiftout); + for (; i <= (end_addr >> shiftout); i++) + PicoCpuFM68k.Fetch[i] = base; + } +#endif } // more specialized/optimized function (does same as above) @@ -89,6 +100,17 @@ void cpu68k_map_all_ram(int start_addr, int end_addr, void *ptr, int is_sub) addr >>= 1; for (i = start_addr >> shift; i <= end_addr >> shift; i++) r8map[i] = r16map[i] = w8map[i] = w16map[i] = addr; +#ifdef EMU_F68K + // setup FAME fetchmap + { + M68K_CONTEXT *ctx = is_sub ? &PicoCpuFS68k : &PicoCpuFM68k; + int shiftout = 24 - FAMEC_FETCHBITS; + i = start_addr >> shiftout; + addr = (uptr)ptr - (i << shiftout); + for (; i <= (end_addr >> shiftout); i++) + ctx->Fetch[i] = addr; + } +#endif } static u32 m68k_unmapped_read8(u32 a) @@ -799,14 +821,12 @@ PICO_INTERNAL void PicoMemSetup(void) { int i; // by default, point everything to first 64k of ROM - for (i = 0; i < M68K_FETCHBANK1; i++) + for (i = 0; i < M68K_FETCHBANK1 * 0xe0 / 0x100; i++) PicoCpuFM68k.Fetch[i] = (unsigned long)Pico.rom - (i<<(24-FAMEC_FETCHBITS)); // now real ROM for (i = 0; i < M68K_FETCHBANK1 && (i<<(24-FAMEC_FETCHBITS)) < Pico.romsize; i++) PicoCpuFM68k.Fetch[i] = (unsigned long)Pico.rom; - // .. and RAM - for (i = M68K_FETCHBANK1*14/16; i < M68K_FETCHBANK1; i++) - PicoCpuFM68k.Fetch[i] = (unsigned long)Pico.ram - (i<<(24-FAMEC_FETCHBITS)); + // RAM already set } #endif #ifdef EMU_M68K From f50806540979413d06aedc47a195e3fa4ba8bde6 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 25 Sep 2017 23:37:54 +0300 Subject: [PATCH 0072/1110] relax the bank check there is 512K padding on load --- pico/carthw/carthw.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/pico/carthw/carthw.c b/pico/carthw/carthw.c index 0d6b9c5c..a8b4333f 100644 --- a/pico/carthw/carthw.c +++ b/pico/carthw/carthw.c @@ -11,6 +11,16 @@ #include "eeprom_spi.h" +static int have_bank(u32 base) +{ + // the loader allocs in 512K quantities + if (base >= Pico.romsize) { + elprintf(EL_ANOMALY|EL_STATUS, "carthw: missing bank @ %06x", base); + return 0; + } + return 1; +} + /* The SSFII mapper */ static unsigned char ssf2_banks[8]; @@ -36,10 +46,8 @@ static void carthw_ssf2_write8(u32 a, u32 d) ssf2_banks[a >> 1] = d; base = d << 19; target = a << 18; - if (base + 0x80000 > Pico.romsize) { - elprintf(EL_ANOMALY|EL_STATUS, "ssf2: missing bank @ %06x", base); + if (!have_bank(base)) return; - } cpu68k_map_set(m68k_read8_map, target, target + 0x80000 - 1, Pico.rom + base, 0); cpu68k_map_set(m68k_read16_map, target, target + 0x80000 - 1, Pico.rom + base, 0); @@ -324,10 +332,9 @@ static void carthw_pier_write8(u32 a, u32 d) return; do_map: - if (base + 0x80000 > Pico.romsize) { - elprintf(EL_ANOMALY|EL_STATUS, "pier: missing bank @ %06x", base); + if (!have_bank(base)) return; - } + cpu68k_map_set(m68k_read8_map, target, target + 0x80000 - 1, Pico.rom + base, 0); cpu68k_map_set(m68k_read16_map, target, target + 0x80000 - 1, Pico.rom + base, 0); } From 40d22a8e7610b2fdde5cfe6178605262ef0d61d7 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 25 Sep 2017 23:41:34 +0300 Subject: [PATCH 0073/1110] recognize the MED ssf2 header --- Makefile | 3 +++ pico/carthw.cfg | 4 ++++ pico/carthw_cfg.c | 5 ++++- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 8a95d992..45e3532b 100644 --- a/Makefile +++ b/Makefile @@ -221,6 +221,9 @@ ifndef DEBUG cpu/fame/famec.o: CFLAGS += -g0 -O2 -fno-expensive-optimizations endif +pico/carthw_cfg.c: pico/carthw.cfg + tools/make_carthw_c $< $@ + # random deps pico/carthw/svp/compiler.o : cpu/drc/emit_$(ARCH).c cpu/sh2/compiler.o : cpu/drc/emit_$(ARCH).c diff --git a/pico/carthw.cfg b/pico/carthw.cfg index 72d4a9a1..20742e0e 100644 --- a/pico/carthw.cfg +++ b/pico/carthw.cfg @@ -70,6 +70,10 @@ check_str = 0x32b74c, "Bishop Level" prop = force_6btn # The SSF2 mapper +[Mega Everdrive] +check_str = 0x100, "SEGA SSF" +hw = ssf2_mapper + [Super Street Fighter II - The New Challengers (U)] check_str = 0x150, "SUPER STREET FIGHTER2 The New Challengers" hw = ssf2_mapper diff --git a/pico/carthw_cfg.c b/pico/carthw_cfg.c index 91ddec74..2fdd1991 100644 --- a/pico/carthw_cfg.c +++ b/pico/carthw_cfg.c @@ -1,4 +1,4 @@ -/* generated by ./tools/make_carthw_c, do not modify */ +/* generated by tools/make_carthw_c, do not modify */ static const char builtin_carthw_cfg[] = "[]\n" "check_str=0x150,\"Virtua Racing\"\n" @@ -28,6 +28,9 @@ static const char builtin_carthw_cfg[] = "check_str=0x32b74c,\"Bishop Level\"\n" "prop=force_6btn\n" "[]\n" + "check_str=0x100,\"SEGA SSF\"\n" + "hw=ssf2_mapper\n" + "[]\n" "check_str=0x150,\"SUPER STREET FIGHTER2 The New Challengers\"\n" "hw=ssf2_mapper\n" "prop=no_sram\n" From b0e08dff9a4ff71333321b931d4155479f35428e Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 26 Sep 2017 02:59:31 +0300 Subject: [PATCH 0074/1110] allow limited z80 vdp access lame and totally wrong timing --- pico/memory.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pico/memory.c b/pico/memory.c index 83fe1819..cb1b5ac3 100644 --- a/pico/memory.c +++ b/pico/memory.c @@ -1165,7 +1165,9 @@ void PicoWrite16_32x(u32 a, u32 d) {} static unsigned char z80_md_vdp_read(unsigned short a) { - // TODO? + if ((a & 0x00e0) == 0x0000) + return PicoVideoRead8(a); // FIXME: depends on 68k cycles + elprintf(EL_ANOMALY, "z80 invalid r8 [%06x] %02x", a, 0xff); return 0xff; } @@ -1192,13 +1194,13 @@ static void z80_md_ym2612_write(unsigned int a, unsigned char data) static void z80_md_vdp_br_write(unsigned int a, unsigned char data) { - // TODO: allow full VDP access if ((a&0xfff9) == 0x7f11) // 7f11 7f13 7f15 7f17 { if (PicoOpt & POPT_EN_PSG) SN76496Write(data); return; } + // at least VDP data writes hang my machine if ((a>>8) == 0x60) { From b71cbbf705cfd1c2519754142cd4b62cf9e0a2f5 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 27 Sep 2017 00:20:52 +0300 Subject: [PATCH 0075/1110] some support for 128k mode --- pico/pico_int.h | 3 ++- pico/videoport.c | 41 +++++++++++++++++++++++++++++++++-------- 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/pico/pico_int.h b/pico/pico_int.h index 794fcd1f..15ab616f 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -292,7 +292,8 @@ struct PicoVideo unsigned char pending_ints; // pending interrupts: ??VH???? signed char lwrite_cnt; // VDP write count during active display line unsigned short v_counter; // V-counter - unsigned char pad[0x10]; + unsigned char addr_u; + unsigned char pad[0x0f]; }; struct PicoMisc diff --git a/pico/videoport.c b/pico/videoport.c index 3f26d581..b14e70f8 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -29,6 +29,13 @@ static __inline void AutoIncrement(void) Pico.video.addr=(unsigned short)(Pico.video.addr+Pico.video.reg[0xf]); } +static NOINLINE void VideoWrite128(u32 a, u16 d) +{ + // nasty + a = ((a & 2) >> 1) | ((a & 0x400) >> 9) | (a & 0x3FC) | ((a & 0x1F800) >> 1); + ((u8 *)Pico.vram)[a] = d; +} + static void VideoWrite(u16 d) { unsigned int a=Pico.video.addr; @@ -43,6 +50,10 @@ static void VideoWrite(u16 d) case 3: Pico.m.dirtyPal = 1; Pico.cram [(a>>1)&0x003f]=d; break; // wraps (Desert Strike) case 5: Pico.vsram[(a>>1)&0x003f]=d; break; + case 0x81: + a |= Pico.video.addr_u << 16; + VideoWrite128(a, d); + break; //default:elprintf(EL_ANOMALY, "VDP write %04x with bad type %i", d, Pico.video.type); break; } @@ -186,6 +197,17 @@ static void DmaSlow(int len, unsigned int source) } break; + case 0x81: // vram 128k + a |= Pico.video.addr_u << 16; + for(; len; len--) + { + VideoWrite128(a, base[source++ & mask]); + // AutoIncrement + a = (a + inc) & 0x1ffff; + } + Pico.video.addr_u = a >> 16; + break; + default: if (Pico.video.type != 0 || (EL_LOGMASK & EL_VDPDMA)) elprintf(EL_VDPDMA|EL_ANOMALY, "DMA with bad type %i", Pico.video.type); @@ -303,20 +325,23 @@ static NOINLINE void CommandDma(void) Pico.video.reg[0x16] = source >> 8; } -static void CommandChange(void) +static NOINLINE void CommandChange(void) { - struct PicoVideo *pvid=&Pico.video; - unsigned int cmd=0,addr=0; + struct PicoVideo *pvid = &Pico.video; + unsigned int cmd, addr; - cmd=pvid->command; + cmd = pvid->command; // Get type of transfer 0xc0000030 (v/c/vsram read/write) - pvid->type=(unsigned char)(((cmd>>2)&0xc)|(cmd>>30)); + pvid->type = (u8)(((cmd >> 2) & 0xc) | (cmd >> 30)); + if (pvid->type == 1) // vram + pvid->type |= pvid->reg[1] & 0x80; // 128k // Get address 0x3fff0003 - addr =(cmd>>16)&0x3fff; - addr|=(cmd<<14)&0xc000; - pvid->addr=(unsigned short)addr; + addr = (cmd >> 16) & 0x3fff; + addr |= (cmd << 14) & 0xc000; + pvid->addr = (u16)addr; + pvid->addr_u = (u8)((cmd >> 2) & 1); } static void DrawSync(int blank_on) From eced0190981a424f9910a7052765023474483b33 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 27 Sep 2017 02:59:50 +0300 Subject: [PATCH 0076/1110] scroll size improvement --- pico/draw.c | 7 +++++-- pico/draw_arm.S | 4 +++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/pico/draw.c b/pico/draw.c index 7b4dd99b..2aa37f8d 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -394,8 +394,11 @@ static void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells, ts.xmask=(1<1) ymask =0x0ff; + switch (width) { + case 1: ymask &= 0x1ff; break; + case 2: ymask = 0x007; break; + case 3: ymask = 0x0ff; break; + } // Find name table: if (plane_sh&1) ts.nametab=(pvid->reg[4]&0x07)<<12; // B diff --git a/pico/draw_arm.S b/pico/draw_arm.S index 023b0a2f..e19abd88 100644 --- a/pico/draw_arm.S +++ b/pico/draw_arm.S @@ -303,7 +303,9 @@ DrawLayer: cmp r10, #1 biclt r1, r1, #0xfc00 biceq r1, r1, #0xfe00 - bicgt r1, r1, #0xff00 @ r1=ymask=(height<<8)|0xff; ...; // Y Mask in pixels + cmp r10, #2 + moveq r1, #0x0007 + movgt r1, #0x00ff @ r1=ymask=(height<<8)|0xff; ...; // Y Mask in pixels add r10, r10, #5 cmp r10, #7 From 1a08dec0e0d3667556ce61307d055b8d19a02034 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 28 Sep 2017 03:19:36 +0300 Subject: [PATCH 0077/1110] simplify tile drawing cuts away 1126 bytes of code on x86 --- pico/draw.c | 207 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 119 insertions(+), 88 deletions(-) diff --git a/pico/draw.c b/pico/draw.c index 2aa37f8d..a649e8c1 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -96,56 +96,41 @@ void blockcpy_or(void *dst, void *src, size_t n, int pat) #define TileNormMaker(funcname,pix_func) \ -static int funcname(int sx,int addr,int pal) \ +static void funcname(int sx, unsigned int pack, int pal) \ { \ - unsigned char *pd = Pico.est.HighCol+sx; \ - unsigned int pack=0; unsigned int t=0; \ + unsigned char *pd = Pico.est.HighCol + sx; \ + unsigned int t; \ \ - pack=*(unsigned int *)(Pico.vram+addr); /* Get 8 pixels */ \ - if (pack) \ - { \ - t=(pack&0x0000f000)>>12; pix_func(0); \ - t=(pack&0x00000f00)>> 8; pix_func(1); \ - t=(pack&0x000000f0)>> 4; pix_func(2); \ - t=(pack&0x0000000f) ; pix_func(3); \ - t=(pack&0xf0000000)>>28; pix_func(4); \ - t=(pack&0x0f000000)>>24; pix_func(5); \ - t=(pack&0x00f00000)>>20; pix_func(6); \ - t=(pack&0x000f0000)>>16; pix_func(7); \ - return 0; \ - } \ - \ - return 1; /* Tile blank */ \ + t = (pack&0x0000f000)>>12; pix_func(0); \ + t = (pack&0x00000f00)>> 8; pix_func(1); \ + t = (pack&0x000000f0)>> 4; pix_func(2); \ + t = (pack&0x0000000f) ; pix_func(3); \ + t = (pack&0xf0000000)>>28; pix_func(4); \ + t = (pack&0x0f000000)>>24; pix_func(5); \ + t = (pack&0x00f00000)>>20; pix_func(6); \ + t = (pack&0x000f0000)>>16; pix_func(7); \ } - #define TileFlipMaker(funcname,pix_func) \ -static int funcname(int sx,int addr,int pal) \ +static void funcname(int sx, unsigned int pack, int pal) \ { \ - unsigned char *pd = Pico.est.HighCol+sx; \ - unsigned int pack=0; unsigned int t=0; \ + unsigned char *pd = Pico.est.HighCol + sx; \ + unsigned int t; \ \ - pack=*(unsigned int *)(Pico.vram+addr); /* Get 8 pixels */ \ - if (pack) \ - { \ - t=(pack&0x000f0000)>>16; pix_func(0); \ - t=(pack&0x00f00000)>>20; pix_func(1); \ - t=(pack&0x0f000000)>>24; pix_func(2); \ - t=(pack&0xf0000000)>>28; pix_func(3); \ - t=(pack&0x0000000f) ; pix_func(4); \ - t=(pack&0x000000f0)>> 4; pix_func(5); \ - t=(pack&0x00000f00)>> 8; pix_func(6); \ - t=(pack&0x0000f000)>>12; pix_func(7); \ - return 0; \ - } \ - \ - return 1; /* Tile blank */ \ + t = (pack&0x000f0000)>>16; pix_func(0); \ + t = (pack&0x00f00000)>>20; pix_func(1); \ + t = (pack&0x0f000000)>>24; pix_func(2); \ + t = (pack&0xf0000000)>>28; pix_func(3); \ + t = (pack&0x0000000f) ; pix_func(4); \ + t = (pack&0x000000f0)>> 4; pix_func(5); \ + t = (pack&0x00000f00)>> 8; pix_func(6); \ + t = (pack&0x0000f000)>>12; pix_func(7); \ } #ifdef _ASM_DRAW_C_AMIPS -int TileNorm(int sx,int addr,int pal); -int TileFlip(int sx,int addr,int pal); +int TileNorm(int sx, unsigned int pack, int pal); +int TileFlip(int sx, unsigned int pack, int pal); #else #define pix_just_write(x) \ @@ -228,7 +213,7 @@ static void DrawStrip(struct TileStrip *ts, int plane_sh, int cellskip) for (; cells > 0; dx+=8,tilex++,cells--) { - int zero=0; + unsigned int pack; code=Pico.vram[ts->nametab+(tilex&ts->xmask)]; if (code==blank) continue; @@ -249,10 +234,14 @@ static void DrawStrip(struct TileStrip *ts, int plane_sh, int cellskip) pal=((code>>9)&0x30)|sh; } - if (code&0x0800) zero=TileFlip(dx,addr,pal); - else zero=TileNorm(dx,addr,pal); + pack = *(unsigned int *)(Pico.vram + addr); + if (!pack) { + blank = code; + continue; + } - if (zero) blank=code; // We know this tile is blank now + if (code & 0x0800) TileFlip(dx, pack, pal); + else TileNorm(dx, pack, pal); } // terminate the cache list @@ -262,7 +251,7 @@ static void DrawStrip(struct TileStrip *ts, int plane_sh, int cellskip) } // this is messy -void DrawStripVSRam(struct TileStrip *ts, int plane_sh, int cellskip) +static void DrawStripVSRam(struct TileStrip *ts, int plane_sh, int cellskip) { int tilex,dx,code=0,addr=0,cell=0; int oldcode=-1,blank=-1; // The tile we know is blank @@ -278,7 +267,8 @@ void DrawStripVSRam(struct TileStrip *ts, int plane_sh, int cellskip) for (; cell < ts->cells; dx+=8,tilex++,cell++) { - int zero=0,nametabadd,ty; + int nametabadd, ty; + unsigned int pack; //if((cell&1)==0) { @@ -309,10 +299,14 @@ void DrawStripVSRam(struct TileStrip *ts, int plane_sh, int cellskip) pal=((code>>9)&0x30)|((plane_sh<<5)&0x40); } - if (code&0x0800) zero=TileFlip(dx,addr,pal); - else zero=TileNorm(dx,addr,pal); + pack = *(unsigned int *)(Pico.vram + addr); + if (!pack) { + blank = code; + continue; + } - if (zero) blank=code; // We know this tile is blank now + if (code & 0x0800) TileFlip(dx, pack, pal); + else TileNorm(dx, pack, pal); } // terminate the cache list @@ -339,7 +333,7 @@ void DrawStripInterlace(struct TileStrip *ts) for (; cells; dx+=8,tilex++,cells--) { - int zero=0; + unsigned int pack; code=Pico.vram[ts->nametab+(tilex&ts->xmask)]; if (code==blank) continue; @@ -361,10 +355,14 @@ void DrawStripInterlace(struct TileStrip *ts) pal=((code>>9)&0x30); } - if (code&0x0800) zero=TileFlip(dx,addr,pal); - else zero=TileNorm(dx,addr,pal); + pack = *(unsigned int *)(Pico.vram + addr); + if (!pack) { + blank = code; + continue; + } - if (zero) blank=code; // We know this tile is blank now + if (code & 0x0800) TileFlip(dx, pack, pal); + else TileNorm(dx, pack, pal); } // terminate the cache list @@ -477,7 +475,8 @@ static void DrawWindow(int tstart, int tend, int prio, int sh, { for (; tilex < tend; tilex++) { - int addr=0,zero=0; + unsigned int pack; + int dx, addr; int pal; code=Pico.vram[nametab+tilex]; @@ -487,23 +486,29 @@ static void DrawWindow(int tstart, int tend, int prio, int sh, continue; } - pal=((code>>9)&0x30); - // Get tile address/2: addr=(code&0x7ff)<<4; if (code&0x1000) addr+=14-ty; else addr+=ty; // Y-flip - if (code&0x0800) zero=TileFlip(8+(tilex<<3),addr,pal); - else zero=TileNorm(8+(tilex<<3),addr,pal); + pack = *(unsigned int *)(Pico.vram + addr); + if (!pack) { + blank = code; + continue; + } - if (zero) blank=code; // We know this tile is blank now + pal = ((code >> 9) & 0x30); + dx = 8 + (tilex << 3); + + if (code & 0x0800) TileFlip(dx, pack, pal); + else TileNorm(dx, pack, pal); } } else { for (; tilex < tend; tilex++) { - int addr=0,zero=0; + unsigned int pack; + int dx, addr; int pal; code=Pico.vram[nametab+tilex]; @@ -527,10 +532,16 @@ static void DrawWindow(int tstart, int tend, int prio, int sh, addr=(code&0x7ff)<<4; if (code&0x1000) addr+=14-ty; else addr+=ty; // Y-flip - if (code&0x0800) zero=TileFlip(8+(tilex<<3),addr,pal); - else zero=TileNorm(8+(tilex<<3),addr,pal); + pack = *(unsigned int *)(Pico.vram + addr); + if (!pack) { + blank = code; + continue; + } - if (zero) blank=code; // We know this tile is blank now + dx = 8 + (tilex << 3); + + if (code & 0x0800) TileFlip(dx, pack, pal); + else TileNorm(dx, pack, pal); } } } @@ -553,6 +564,7 @@ static void DrawTilesFromCacheShPrep(void) static void DrawTilesFromCache(int *hc, int sh, int rlim, struct PicoEState *est) { int code, addr, dx; + unsigned int pack; int pal; // *ts->hc++ = code | (dx<<16) | (ty<<25); // cache it @@ -568,26 +580,31 @@ static void DrawTilesFromCache(int *hc, int sh, int rlim, struct PicoEState *est { short blank=-1; // The tile we know is blank while ((code=*hc++)) { - int zero; if((short)code == blank) continue; // Get tile address/2: addr=(code&0x7ff)<<4; addr+=(unsigned int)code>>25; // y offset into tile - dx=(code>>16)&0x1ff; - pal=((code>>9)&0x30); - if (rlim-dx < 0) goto last_cut_tile; + pack = *(unsigned int *)(Pico.vram + addr); + if (!pack) { + blank = (short)code; + continue; + } - if (code&0x0800) zero=TileFlip(dx,addr,pal); - else zero=TileNorm(dx,addr,pal); + dx = (code >> 16) & 0x1ff; + pal = ((code >> 9) & 0x30); + if (rlim-dx < 0) + goto last_cut_tile; - if (zero) blank=(short)code; + if (code & 0x0800) TileFlip(dx, pack, pal); + else TileNorm(dx, pack, pal); } } else { while ((code=*hc++)) { unsigned char *zb; + // Get tile address/2: addr=(code&0x7ff)<<4; addr+=(unsigned int)code>>25; // y offset into tile @@ -596,20 +613,26 @@ static void DrawTilesFromCache(int *hc, int sh, int rlim, struct PicoEState *est *zb++ &= 0xbf; *zb++ &= 0xbf; *zb++ &= 0xbf; *zb++ &= 0xbf; *zb++ &= 0xbf; *zb++ &= 0xbf; *zb++ &= 0xbf; *zb++ &= 0xbf; - pal=((code>>9)&0x30); - if (rlim-dx < 0) goto last_cut_tile; + pack = *(unsigned int *)(Pico.vram + addr); + if (!pack) + continue; - if (code&0x0800) TileFlip(dx,addr,pal); - else TileNorm(dx,addr,pal); + pal = ((code >> 9) & 0x30); + if (rlim - dx < 0) + goto last_cut_tile; + + if (code & 0x0800) TileFlip(dx, pack, pal); + else TileNorm(dx, pack, pal); } } return; last_cut_tile: + // for vertical window cutoff { - unsigned int t, pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels - unsigned char *pd = est->HighCol+dx; - if (!pack) return; + unsigned char *pd = est->HighCol + dx; + unsigned int t; + if (code&0x0800) { switch (rlim-dx+8) @@ -653,7 +676,7 @@ static void DrawSprite(int *sprite, int sh) int pal; int tile=0,delta=0; int sx, sy; - int (*fTileFunc)(int sx,int addr,int pal); + void (*fTileFunc)(int sx, unsigned int pack, int pal); // parse the sprite data sy=sprite[0]; @@ -687,11 +710,13 @@ static void DrawSprite(int *sprite, int sh) for (; width; width--,sx+=8,tile+=delta) { + unsigned int pack; + if(sx<=0) continue; if(sx>=328) break; // Offscreen - tile&=0x7fff; // Clip tile address - fTileFunc(sx,tile,pal); + pack = *(unsigned int *)(Pico.vram + (tile & 0x7fff)); + fTileFunc(sx, pack, pal); } } #endif @@ -730,12 +755,14 @@ static void DrawSpriteInterlace(unsigned int *sprite) for (; width; width--,sx+=8,tile+=delta) { + unsigned int pack; + if(sx<=0) continue; if(sx>=328) break; // Offscreen - tile&=0x7fff; // Clip tile address - if (code&0x0800) TileFlip(sx,tile,pal); - else TileNorm(sx,tile,pal); + pack = *(unsigned int *)(Pico.vram + (tile & 0x7fff)); + if (code & 0x0800) TileFlip(sx, pack, pal); + else TileNorm(sx, pack, pal); } } @@ -797,7 +824,7 @@ static void DrawAllSpritesInterlace(int pri, int sh) */ static void DrawSpritesSHi(unsigned char *sprited, const struct PicoEState *est) { - int (*fTileFunc)(int sx,int addr,int pal); + void (*fTileFunc)(int sx, unsigned int pack, int pal); unsigned char *p; int cnt; @@ -853,11 +880,13 @@ static void DrawSpritesSHi(unsigned char *sprited, const struct PicoEState *est) for (; width; width--,sx+=8,tile+=delta) { + unsigned int pack; + if(sx<=0) continue; if(sx>=328) break; // Offscreen - tile&=0x7fff; // Clip tile address - fTileFunc(sx,tile,pal); + pack = *(unsigned int *)(Pico.vram + (tile & 0x7fff)); + fTileFunc(sx, pack, pal); } } } @@ -865,7 +894,7 @@ static void DrawSpritesSHi(unsigned char *sprited, const struct PicoEState *est) static void DrawSpritesHiAS(unsigned char *sprited, int sh) { - int (*fTileFunc)(int sx,int addr,int pal); + void (*fTileFunc)(int sx, unsigned int pack, int pal); unsigned char *p; int entry, cnt, sh_cnt = 0; @@ -925,11 +954,13 @@ static void DrawSpritesHiAS(unsigned char *sprited, int sh) pal |= 0x80; for (; width; width--,sx+=8,tile+=delta) { + unsigned int pack; + if(sx<=0) continue; if(sx>=328) break; // Offscreen - tile&=0x7fff; // Clip tile address - fTileFunc(sx,tile,pal); + pack = *(unsigned int *)(Pico.vram + (tile & 0x7fff)); + fTileFunc(sx, pack, pal); } } From e0bcb7a90d06b295b1ca989b6ad70412912cca5b Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 28 Sep 2017 03:16:21 +0300 Subject: [PATCH 0078/1110] some support for vdp debug reg --- pico/32x/32x.c | 2 +- pico/32x/draw.c | 2 +- pico/debug.c | 6 ++- pico/draw.c | 102 +++++++++++++++++++++++++++--------- pico/draw2.c | 10 ++-- pico/draw_arm.S | 19 ++++--- pico/mode4.c | 6 +-- pico/pico.h | 6 --- pico/pico_int.h | 13 ++++- pico/videoport.c | 38 ++++++++++++-- platform/common/menu_pico.c | 25 +++++---- 11 files changed, 160 insertions(+), 69 deletions(-) diff --git a/pico/32x/32x.c b/pico/32x/32x.c index e62c8209..51817c0d 100644 --- a/pico/32x/32x.c +++ b/pico/32x/32x.c @@ -230,7 +230,7 @@ static void p32x_start_blank(void) // XXX: no proper handling of 32col mode.. if ((Pico32x.vdp_regs[0] & P32XV_Mx) != 0 && // 32x not blanking (Pico.video.reg[12] & 1) && // 40col mode - (PicoDrawMask & PDRAW_32X_ON)) + (!(Pico.video.debug_p & PVD_KILL_32X))) { int md_bg = Pico.video.reg[7] & 0x3f; diff --git a/pico/32x/draw.c b/pico/32x/draw.c index 3e007ae0..f8021502 100644 --- a/pico/32x/draw.c +++ b/pico/32x/draw.c @@ -96,7 +96,7 @@ void FinalizeLine32xRGB555(int sh, int line, struct PicoEState *est) if ((Pico32x.vdp_regs[0] & P32XV_Mx) == 0 || // 32x blanking // XXX: how is 32col mode hadled by real hardware? !(Pico.video.reg[12] & 1) || // 32col mode - !(PicoDrawMask & PDRAW_32X_ON)) + (Pico.video.debug_p & PVD_KILL_32X)) { return; } diff --git a/pico/debug.c b/pico/debug.c index efcd3fde..e3ef83be 100644 --- a/pico/debug.c +++ b/pico/debug.c @@ -233,6 +233,7 @@ void PDebugShowSprite(unsigned short *screen, int stride, int which) struct PicoVideo *pvid=&Pico.video; int table=0,u,link=0,*sprite=0,*fsprite,oldsprite[2]; int x,y,max_sprites = 80, oldcol, oldreg; + unsigned char olddbg; if (!(pvid->reg[12]&1)) max_sprites = 64; @@ -257,9 +258,10 @@ void PDebugShowSprite(unsigned short *screen, int stride, int which) fsprite[1] = (sprite[1] & ~0x01ff8000) | 0x800000; oldreg = pvid->reg[7]; oldcol = Pico.cram[0]; + olddbg = pvid->debug_p; pvid->reg[7] = 0; Pico.cram[0] = 0; - PicoDrawMask = PDRAW_SPRITES_LOW_ON; + pvid->debug_p = PVD_KILL_A | PVD_KILL_B; PicoFrameFull(); for (y = 0; y < 8*4; y++) @@ -274,7 +276,7 @@ void PDebugShowSprite(unsigned short *screen, int stride, int which) fsprite[1] = oldsprite[1]; pvid->reg[7] = oldreg; Pico.cram[0] = oldcol; - PicoDrawMask = -1; + pvid->debug_p = olddbg; } #define dump_ram(ram,fname) \ diff --git a/pico/draw.c b/pico/draw.c index a649e8c1..5e8d86b4 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -46,6 +46,10 @@ static int HighCacheA[41+1]; // caches for high layers static int HighCacheB[41+1]; static int HighPreSpr[80*2+1]; // slightly preprocessed sprites +#define LF_PLANE_1 (1 << 0) +#define LF_SH (1 << 1) // must be = 2 +#define LF_FORCE (1 << 2) + #define SPRL_HAVE_HI 0x80 // have hi priority sprites #define SPRL_HAVE_LO 0x40 // *lo* #define SPRL_MAY_HAVE_OP 0x20 // may have operator sprites on the line @@ -54,12 +58,9 @@ unsigned char HighLnSpr[240][3 + MAX_LINE_SPRITES]; // sprite_count, ^flags, til int rendstatus_old; int rendlines; -int PicoDrawMask = -1; static int skip_next_line=0; -//unsigned short ppt[] = { 0x0f11, 0x0ff1, 0x01f1, 0x011f, 0x01ff, 0x0f1f, 0x0f0e, 0x0e7c }; - struct TileStrip { int nametab; // Position in VRAM of name table (for this tile line) @@ -192,18 +193,24 @@ TileFlipMaker(TileFlipAS_noop, pix_sh_as_noop) TileNormMaker(TileNormAS_onlymark, pix_sh_as_onlymark) TileFlipMaker(TileFlipAS_onlymark, pix_sh_as_onlymark) +// mark pixel as sprite pixel (AS) +#define pix_and(x) \ + pd[x] = (pd[x] & 0xc0) | (pd[x] & (pal | t)) + +TileNormMaker(TileNorm_and, pix_and) +TileFlipMaker(TileFlip_and, pix_and) // -------------------------------------------- #ifndef _ASM_DRAW_C -static void DrawStrip(struct TileStrip *ts, int plane_sh, int cellskip) +static void DrawStrip(struct TileStrip *ts, int lflags, int cellskip) { int tilex,dx,ty,code=0,addr=0,cells; int oldcode=-1,blank=-1; // The tile we know is blank int pal=0,sh; // Draw tiles across screen: - sh=(plane_sh<<5)&0x40; + sh = (lflags & LF_SH) << 5; // 0x40 tilex=((-ts->hscroll)>>3)+cellskip; ty=(ts->line&7)<<1; // Y-Offset into tile dx=((ts->hscroll-1)&7)+1; @@ -211,13 +218,14 @@ static void DrawStrip(struct TileStrip *ts, int plane_sh, int cellskip) if(dx != 8) cells++; // have hscroll, need to draw 1 cell more dx+=cellskip<<3; - for (; cells > 0; dx+=8,tilex++,cells--) + for (; cells > 0; dx+=8, tilex++, cells--) { unsigned int pack; - code=Pico.vram[ts->nametab+(tilex&ts->xmask)]; - if (code==blank) continue; - if (code>>15) { // high priority tile + code = Pico.vram[ts->nametab + (tilex & ts->xmask)]; + if (code == blank) + continue; + if ((code >> 15) | (lflags & LF_FORCE)) { // high priority tile int cval = code | (dx<<16) | (ty<<25); if(code&0x1000) cval^=7<<26; *ts->hc++ = cval; // cache it @@ -580,10 +588,11 @@ static void DrawTilesFromCache(int *hc, int sh, int rlim, struct PicoEState *est { short blank=-1; // The tile we know is blank while ((code=*hc++)) { - if((short)code == blank) continue; + if (!(code & 0x8000) || (short)code == blank) + continue; // Get tile address/2: - addr=(code&0x7ff)<<4; - addr+=(unsigned int)code>>25; // y offset into tile + addr = (code & 0x7ff) << 4; + addr += code >> 25; // y offset into tile pack = *(unsigned int *)(Pico.vram + addr); if (!pack) { @@ -721,6 +730,27 @@ static void DrawSprite(int *sprite, int sh) } #endif +static NOINLINE void DrawTilesFromCacheForced(const int *hc) +{ + int code, addr, dx; + unsigned int pack; + int pal; + + // *ts->hc++ = code | (dx<<16) | (ty<<25); + while ((code = *hc++)) { + // Get tile address/2: + addr = (code & 0x7ff) << 4; + addr += (code >> 25) & 0x0e; // y offset into tile + + dx = (code >> 16) & 0x1ff; + pal = ((code >> 9) & 0x30); + pack = *(unsigned int *)(Pico.vram + addr); + + if (code & 0x0800) TileFlip_and(dx, pack, pal); + else TileNorm_and(dx, pack, pal); + } +} + static void DrawSpriteInterlace(unsigned int *sprite) { int width=0,height=0; @@ -767,7 +797,7 @@ static void DrawSpriteInterlace(unsigned int *sprite) } -static void DrawAllSpritesInterlace(int pri, int sh) +static NOINLINE void DrawAllSpritesInterlace(int pri, int sh) { struct PicoVideo *pvid=&Pico.video; int i,u,table,link=0,sline=Pico.est.DrawScanline<<1; @@ -1312,8 +1342,8 @@ static int DrawDisplay(int sh) struct PicoEState *est=&Pico.est; unsigned char *sprited = &HighLnSpr[est->DrawScanline][0]; struct PicoVideo *pvid=&Pico.video; - int win=0,edge=0,hvwind=0; - int maxw,maxcells; + int win=0, edge=0, hvwind=0, lflags; + int maxw, maxcells; if (est->rendstatus & (PDRAW_SPRITES_MOVED|PDRAW_DIRTY_SPRITES)) { // elprintf(EL_STATUS, "PrepareSprites(%i)", (est->rendstatus>>4)&1); @@ -1351,29 +1381,40 @@ static int DrawDisplay(int sh) } /* - layer B low - */ - if (PicoDrawMask & PDRAW_LAYERB_ON) - DrawLayer(1|(sh<<1), HighCacheB, 0, maxcells, est); + if (!(pvid->debug_p & PVD_KILL_B)) { + lflags = LF_PLANE_1 | (sh << 1); + if (pvid->debug_p & PVD_FORCE_B) + lflags |= LF_FORCE; + DrawLayer(lflags, HighCacheB, 0, maxcells, est); + } /* - layer A low - */ - if (!(PicoDrawMask & PDRAW_LAYERA_ON)); + lflags = 0 | (sh << 1); + if (pvid->debug_p & PVD_FORCE_A) + lflags |= LF_FORCE; + if (pvid->debug_p & PVD_KILL_A) + ; else if (hvwind == 1) DrawWindow(0, maxcells>>1, 0, sh, est); else if (hvwind == 2) { - DrawLayer(0|(sh<<1), HighCacheA, (win&0x80) ? 0 : edge<<1, (win&0x80) ? edge<<1 : maxcells, est); - DrawWindow( (win&0x80) ? edge : 0, (win&0x80) ? maxcells>>1 : edge, 0, sh, est); - } else - DrawLayer(0|(sh<<1), HighCacheA, 0, maxcells, est); + DrawLayer(lflags, HighCacheA, (win&0x80) ? 0 : edge<<1, (win&0x80) ? edge<<1 : maxcells, est); + DrawWindow( (win&0x80) ? edge : 0, (win&0x80) ? maxcells>>1 : edge, 0, sh, est); + } + else + DrawLayer(lflags, HighCacheA, 0, maxcells, est); /* - sprites low - */ - if (!(PicoDrawMask & PDRAW_SPRITES_LOW_ON)); + if (pvid->debug_p & PVD_KILL_S_LO) + ; else if (est->rendstatus & PDRAW_INTERLACE) DrawAllSpritesInterlace(0, sh); else if (sprited[1] & SPRL_HAVE_LO) DrawAllSprites(sprited, 0, sh, est); /* - layer B hi - */ - if ((PicoDrawMask & PDRAW_LAYERB_ON) && HighCacheB[0]) + if (!(pvid->debug_p & PVD_KILL_B) && HighCacheB[0]) DrawTilesFromCache(HighCacheB, sh, maxw, est); /* - layer A hi - */ - if (!(PicoDrawMask & PDRAW_LAYERA_ON)); + if (pvid->debug_p & PVD_KILL_A) + ; else if (hvwind == 1) DrawWindow(0, maxcells>>1, 1, sh, est); else if (hvwind == 2) { @@ -1384,7 +1425,8 @@ static int DrawDisplay(int sh) if (HighCacheA[0]) DrawTilesFromCache(HighCacheA, sh, maxw, est); /* - sprites hi - */ - if (!(PicoDrawMask & PDRAW_SPRITES_HI_ON)); + if (pvid->debug_p & PVD_KILL_S_HI) + ; else if (est->rendstatus & PDRAW_INTERLACE) DrawAllSpritesInterlace(1, sh); // have sprites without layer pri bit ontop of sprites with that bit @@ -1395,6 +1437,11 @@ static int DrawDisplay(int sh) else if (sprited[1] & SPRL_HAVE_HI) DrawAllSprites(sprited, 1, 0, est); + if (pvid->debug_p & PVD_FORCE_B) + DrawTilesFromCacheForced(HighCacheB); + else if (pvid->debug_p & PVD_FORCE_A) + DrawTilesFromCacheForced(HighCacheA); + #if 0 { int *c, a, b; @@ -1480,6 +1527,9 @@ static void PicoLine(int line, int offs, int sh, int bgc) return; } + if (Pico.video.debug_p & (PVD_FORCE_A | PVD_FORCE_B)) + bgc = 0x3f; + // Draw screen: BackFill(bgc, sh, &Pico.est); if (Pico.video.reg[1]&0x40) diff --git a/pico/draw2.c b/pico/draw2.c index 1b8cdf5f..db155fa2 100644 --- a/pico/draw2.c +++ b/pico/draw2.c @@ -551,9 +551,9 @@ static void DrawDisplayFull(void) if (hvwin==1) { winend|=maxcolc<<16; planeend|=maxcolc<<16; } HighCache2A[1] = HighCache2B[1] = 0; - if (PicoDrawMask & PDRAW_LAYERB_ON) + if (!(pvid->debug_p & PVD_KILL_B)) DrawLayerFull(1, HighCache2B, START_ROW, (maxcolc<<16)|END_ROW, est); - if (PicoDrawMask & PDRAW_LAYERA_ON) switch (hvwin) + if (!(pvid->debug_p & PVD_KILL_A)) switch (hvwin) { case 4: // fullscreen window @@ -579,12 +579,12 @@ static void DrawDisplayFull(void) DrawLayerFull(0, HighCache2A, START_ROW, (maxcolc<<16)|END_ROW, est); break; } - if (PicoDrawMask & PDRAW_SPRITES_LOW_ON) + if (!(pvid->debug_p & PVD_KILL_S_LO)) DrawAllSpritesFull(0, maxw); if (HighCache2B[1]) DrawTilesFromCacheF(HighCache2B, est); if (HighCache2A[1]) DrawTilesFromCacheF(HighCache2A, est); - if (PicoDrawMask & PDRAW_LAYERA_ON) switch (hvwin) + if (!(pvid->debug_p & PVD_KILL_A)) switch (hvwin) { case 4: // fullscreen window @@ -603,7 +603,7 @@ static void DrawDisplayFull(void) DrawWindowFull(winstart, winend, 1, est); break; } - if (PicoDrawMask & PDRAW_SPRITES_HI_ON) + if (!(pvid->debug_p & PVD_KILL_S_HI)) DrawAllSpritesFull(1, maxw); } diff --git a/pico/draw_arm.S b/pico/draw_arm.S index e19abd88..fae07b8b 100644 --- a/pico/draw_arm.S +++ b/pico/draw_arm.S @@ -278,7 +278,7 @@ @ int cells; // 0x14 @ }; -@ void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells, +@ void DrawLayer(int lflags, int *hcache, int cellskip, int maxcells, @ struct PicoEState *est) .global DrawLayer @@ -293,8 +293,8 @@ DrawLayer: ldrb r7, [r11, #16] @ ??vv??hh mov r6, r1 @ hcache - orr r9, r3, r0, lsl #30 - orr r9, r9, r2, lsl #8 @ r9=sh[31]|cellskip[15:8]|maxcells[7:0] (tmp) + orr r9, r3, r0, lsl #29 @ r9=force[31]|sh[30]|plane[29] + orr r9, r9, r2, lsl #8 @ |cellskip[15:8]|maxcells[7:0] (tmp) mov r1, r7, lsl #4 orr r1, r1, #0x00ff @@ -375,10 +375,11 @@ DrawLayer: and r1, r1, #7 add r7, r1, #1 @ r7=dx=((ts->hscroll-1)&7)+1 - tst r9, #1<<31 + movs r3, r9, lsl #1 @ (force[31]|sh[30]) << 1 mov r3, #0 - orrne r10,r10, #1<<23 @ r10=(cells<<24|sh<<23|hi_not_empty<<22|had_output<<21|ty) - movne r3, #0x40 @ default to shadowed pal on sh mode + orrmi r10,r10, #1<<23 @ r10=cells[31:24]|sh[23]|hi_not_empty[22] + orrcs r10,r10, #1<<20 @ |had_output[21]|force[20]|ty[15:0] + movmi r3, #0x40 @ default to shadowed pal on sh mode cmp r7, #8 addne r10,r10, #0x01000000 @ we will loop cells+1 times if there is scroll @@ -413,6 +414,7 @@ DrawLayer: add r8, r8, #1 tst r7, #0x8000 + tsteq r10, #1<<20 @ force? bne .DrawStrip_hiprio cmp r7, r9 @@ -480,7 +482,7 @@ DrawLayer: orr r10, r10, #1<<22 .DrawStrip_hiprio: - tst r10, #0x00c00000 + tst r10, #0x00d00000 @ sh[23]|hi_not_empty[22]|force[20] beq .DrawStrip_hiprio_maybempt sub r0, r1, r11 orr r7, r7, r0, lsl #16 @@ -774,7 +776,8 @@ DrawTilesFromCache: bic r4, r1, #0xfe00 add r1, r11, r4 @ r1=pdest - mov r7, r6, lsl #16 + movs r7, r6, lsl #16 + bpl .dtfc_loop @ !(code & 0x8000) cmp r5, r7, lsr #16 beq .dtfc_samecode @ if (code==prevcode) diff --git a/pico/mode4.c b/pico/mode4.c index 4abace6b..bda11888 100644 --- a/pico/mode4.c +++ b/pico/mode4.c @@ -179,15 +179,15 @@ static void DrawDisplayM4(int scanline) dx += cellskip << 3; // low priority tiles - if (PicoDrawMask & PDRAW_LAYERB_ON) + if (!(pv->debug_p & PVD_KILL_B)) draw_strip(nametab, dx, cells, tilex | 0x0000 | (ty << 16)); // sprites - if (PicoDrawMask & PDRAW_SPRITES_LOW_ON) + if (!(pv->debug_p & PVD_KILL_S_LO)) draw_sprites(scanline); // high priority tiles (use virtual layer switch just for fun) - if (PicoDrawMask & PDRAW_LAYERA_ON) + if (!(pv->debug_p & PVD_KILL_A)) draw_strip(nametab, dx, cells, tilex | 0x1000 | (ty << 16)); if (pv->reg[0] & 0x20) diff --git a/pico/pico.h b/pico/pico.h index e2dde1b2..527498ee 100644 --- a/pico/pico.h +++ b/pico/pico.h @@ -176,12 +176,6 @@ void PicoDrawSetCallbacks(int (*begin)(unsigned int num), int (*end)(unsigned in void vidConvCpyRGB565(void *to, void *from, int pixels); #endif void PicoDoHighPal555(int sh, int line, struct PicoEState *est); -extern int PicoDrawMask; -#define PDRAW_LAYERB_ON (1<<2) -#define PDRAW_LAYERA_ON (1<<3) -#define PDRAW_SPRITES_LOW_ON (1<<4) -#define PDRAW_SPRITES_HI_ON (1<<7) -#define PDRAW_32X_ON (1<<8) // internals #define PDRAW_SPRITES_MOVED (1<<0) // (asm) #define PDRAW_WND_DIFF_PRIO (1<<1) // not all window tiles use same priority diff --git a/pico/pico_int.h b/pico/pico_int.h index 15ab616f..2249148f 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -281,6 +281,15 @@ extern SH2 sh2s[2]; #define OSC_NTSC 53693100 #define OSC_PAL 53203424 +#define PVD_KILL_A (1 << 0) +#define PVD_KILL_B (1 << 1) +#define PVD_KILL_S_LO (1 << 2) +#define PVD_KILL_S_HI (1 << 3) +#define PVD_KILL_32X (1 << 4) +#define PVD_FORCE_A (1 << 5) +#define PVD_FORCE_B (1 << 6) +#define PVD_FORCE_S (1 << 7) + struct PicoVideo { unsigned char reg[0x20]; @@ -292,8 +301,10 @@ struct PicoVideo unsigned char pending_ints; // pending interrupts: ??VH???? signed char lwrite_cnt; // VDP write count during active display line unsigned short v_counter; // V-counter + unsigned short debug; // raw debug register + unsigned char debug_p; // ... parsed: PVD_* unsigned char addr_u; - unsigned char pad[0x0f]; + unsigned char pad[0x0c]; }; struct PicoMisc diff --git a/pico/videoport.c b/pico/videoport.c index b14e70f8..fe96139f 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -361,8 +361,9 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) // elprintf(EL_STATUS, "PicoVideoWrite [%06x] %04x", a, d); a&=0x1c; - if (a==0x00) // Data port 0 or 2 + switch (a) { + case 0x00: // Data port 0 or 2 // try avoiding the sync.. if (Pico.m.scanline < 224 && (pvid->reg[1]&0x40) && !(!pvid->pending && @@ -392,11 +393,9 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) if ((pvid->command&0x80) && (pvid->reg[1]&0x10) && (pvid->reg[0x17]>>6)==2) DmaFill(d); - return; - } + break; - if (a==0x04) // Control (command) port 4 or 6 - { + case 0x04: // Control (command) port 4 or 6 if (pvid->pending) { // Low word of command: @@ -473,6 +472,35 @@ update_irq: pvid->pending=1; } } + break; + + // case 0x08: // 08 0a - HV counter - lock up + // case 0x0c: // 0c 0e - HV counter - lock up + // case 0x10: // 10 12 - PSG - handled by caller + // case 0x14: // 14 16 - PSG - handled by caller + // case 0x18: // 18 1a - no effect? + case 0x1c: // 1c 1e - debug + pvid->debug = d; + pvid->debug_p = 0; + if (d & (1 << 6)) { + pvid->debug_p |= PVD_KILL_A | PVD_KILL_B; + pvid->debug_p |= PVD_KILL_S_LO | PVD_KILL_S_HI; + } + switch ((d >> 7) & 3) { + case 1: + pvid->debug_p &= ~(PVD_KILL_S_LO | PVD_KILL_S_HI); + pvid->debug_p |= PVD_FORCE_S; + break; + case 2: + pvid->debug_p &= ~PVD_KILL_A; + pvid->debug_p |= PVD_FORCE_A; + break; + case 3: + pvid->debug_p &= ~PVD_KILL_B; + pvid->debug_p |= PVD_FORCE_B; + break; + } + break; } } diff --git a/platform/common/menu_pico.c b/platform/common/menu_pico.c index b127077a..cc116382 100644 --- a/platform/common/menu_pico.c +++ b/platform/common/menu_pico.c @@ -14,7 +14,7 @@ #include "input_pico.h" #include "version.h" -#include +#include #include #ifdef PANDORA @@ -803,11 +803,13 @@ static void draw_text_debug(const char *str, int skip, int from) static void draw_frame_debug(void) { char layer_str[48] = "layers: "; - if (PicoDrawMask & PDRAW_LAYERB_ON) memcpy(layer_str + 8, "B", 1); - if (PicoDrawMask & PDRAW_LAYERA_ON) memcpy(layer_str + 10, "A", 1); - if (PicoDrawMask & PDRAW_SPRITES_LOW_ON) memcpy(layer_str + 12, "spr_lo", 6); - if (PicoDrawMask & PDRAW_SPRITES_HI_ON) memcpy(layer_str + 19, "spr_hi", 6); - if (PicoDrawMask & PDRAW_32X_ON) memcpy(layer_str + 26, "32x", 4); + struct PicoVideo *pv = &Pico.video; + + if (!(pv->debug_p & PVD_KILL_B)) memcpy(layer_str + 8, "B", 1); + if (!(pv->debug_p & PVD_KILL_A)) memcpy(layer_str + 10, "A", 1); + if (!(pv->debug_p & PVD_KILL_S_LO)) memcpy(layer_str + 12, "spr_lo", 6); + if (!(pv->debug_p & PVD_KILL_S_HI)) memcpy(layer_str + 19, "spr_hi", 6); + if (!(pv->debug_p & PVD_KILL_32X)) memcpy(layer_str + 26, "32x", 4); pemu_forced_frame(1, 0); make_bg(1); @@ -818,6 +820,7 @@ static void draw_frame_debug(void) static void debug_menu_loop(void) { + struct PicoVideo *pv = &Pico.video; int inp, mode = 0; int spr_offs = 0, dumped = 0; char *tmp; @@ -878,11 +881,11 @@ static void debug_menu_loop(void) } break; case 1: - if (inp & PBTN_LEFT) PicoDrawMask ^= PDRAW_LAYERB_ON; - if (inp & PBTN_RIGHT) PicoDrawMask ^= PDRAW_LAYERA_ON; - if (inp & PBTN_DOWN) PicoDrawMask ^= PDRAW_SPRITES_LOW_ON; - if (inp & PBTN_UP) PicoDrawMask ^= PDRAW_SPRITES_HI_ON; - if (inp & PBTN_MA2) PicoDrawMask ^= PDRAW_32X_ON; + if (inp & PBTN_LEFT) pv->debug_p ^= PVD_KILL_B; + if (inp & PBTN_RIGHT) pv->debug_p ^= PVD_KILL_A; + if (inp & PBTN_DOWN) pv->debug_p ^= PVD_KILL_S_LO; + if (inp & PBTN_UP) pv->debug_p ^= PVD_KILL_S_HI; + if (inp & PBTN_MA2) pv->debug_p ^= PVD_KILL_32X; if (inp & PBTN_MOK) { PsndOut = NULL; // just in case PicoSkipFrame = 1; From 6d28fb5023d53282209ade40fdd30f46905aacbf Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 30 Sep 2017 00:19:55 +0300 Subject: [PATCH 0079/1110] fix ym2612 asm, rework EG this should be split, but I'm lazy EG saves ~900 bytes --- pico/sound/ym2612.c | 108 +++++++++++----------------- pico/sound/ym2612.h | 14 ++-- pico/sound/ym2612_arm.s | 152 +++++++++++++++++----------------------- 3 files changed, 117 insertions(+), 157 deletions(-) diff --git a/pico/sound/ym2612.c b/pico/sound/ym2612.c index efe5054e..5c52785d 100644 --- a/pico/sound/ym2612.c +++ b/pico/sound/ym2612.c @@ -739,83 +739,57 @@ INLINE int advance_lfo(int lfo_ampm, UINT32 lfo_cnt_old, UINT32 lfo_cnt) return lfo_ampm; } -#define EG_INC_VAL() \ - ((1 << ((pack >> ((eg_cnt>>shift)&7)*3)&7)) >> 1) - -INLINE UINT32 update_eg_phase(FM_SLOT *SLOT, UINT32 eg_cnt) +INLINE void update_eg_phase(UINT16 *vol_out, FM_SLOT *SLOT, UINT32 eg_cnt) { INT32 volume = SLOT->volume; + UINT32 pack = SLOT->eg_pack[SLOT->state - 1]; + UINT32 shift = pack >> 24; + INT32 eg_inc_val; - switch(SLOT->state) + if (eg_cnt & ((1 << shift) - 1)) + return; + + eg_inc_val = pack >> ((eg_cnt >> shift) & 7) * 3; + eg_inc_val = (1 << (eg_inc_val & 7)) >> 1; + + switch (SLOT->state) { - case EG_ATT: /* attack phase */ + case EG_ATT: /* attack phase */ + volume += ( ~volume * eg_inc_val ) >> 4; + if ( volume <= MIN_ATT_INDEX ) { - UINT32 pack = SLOT->eg_pack_ar; - UINT32 shift = pack>>24; - if ( !(eg_cnt & ((1<>4; - - if (volume <= MIN_ATT_INDEX) - { - volume = MIN_ATT_INDEX; - SLOT->state = EG_DEC; - } - } - break; + volume = MIN_ATT_INDEX; + SLOT->state = EG_DEC; } + break; - case EG_DEC: /* decay phase */ + case EG_DEC: /* decay phase */ + volume += eg_inc_val; + if ( volume >= (INT32) SLOT->sl ) + SLOT->state = EG_SUS; + break; + + case EG_SUS: /* sustain phase */ + volume += eg_inc_val; + if ( volume >= MAX_ATT_INDEX ) { - UINT32 pack = SLOT->eg_pack_d1r; - UINT32 shift = pack>>24; - if ( !(eg_cnt & ((1<= (INT32) SLOT->sl ) - SLOT->state = EG_SUS; - } - break; + volume = MAX_ATT_INDEX; + /* do not change SLOT->state (verified on real chip) */ } + break; - case EG_SUS: /* sustain phase */ + case EG_REL: /* release phase */ + volume += eg_inc_val; + if ( volume >= MAX_ATT_INDEX ) { - UINT32 pack = SLOT->eg_pack_d2r; - UINT32 shift = pack>>24; - if ( !(eg_cnt & ((1<= MAX_ATT_INDEX ) - { - volume = MAX_ATT_INDEX; - /* do not change SLOT->state (verified on real chip) */ - } - } - break; - } - - case EG_REL: /* release phase */ - { - UINT32 pack = SLOT->eg_pack_rr; - UINT32 shift = pack>>24; - if ( !(eg_cnt & ((1<= MAX_ATT_INDEX ) - { - volume = MAX_ATT_INDEX; - SLOT->state = EG_OFF; - } - } - break; + volume = MAX_ATT_INDEX; + SLOT->state = EG_OFF; } + break; } SLOT->volume = volume; - return SLOT->tl + ((UINT32)volume); /* tl is 7bit<<3, volume 0-1023 (0-2039 total) */ + *vol_out = SLOT->tl + volume; /* tl is 7bit<<3, volume 0-1023 (0-2039 total) */ } #endif @@ -873,10 +847,10 @@ static void chan_render_loop(chan_rend_context *ct, int *buffer, int length) ct->eg_timer -= EG_TIMER_OVERFLOW; ct->eg_cnt++; - if (ct->CH->SLOT[SLOT1].state != EG_OFF) ct->vol_out1 = update_eg_phase(&ct->CH->SLOT[SLOT1], ct->eg_cnt); - if (ct->CH->SLOT[SLOT2].state != EG_OFF) ct->vol_out2 = update_eg_phase(&ct->CH->SLOT[SLOT2], ct->eg_cnt); - if (ct->CH->SLOT[SLOT3].state != EG_OFF) ct->vol_out3 = update_eg_phase(&ct->CH->SLOT[SLOT3], ct->eg_cnt); - if (ct->CH->SLOT[SLOT4].state != EG_OFF) ct->vol_out4 = update_eg_phase(&ct->CH->SLOT[SLOT4], ct->eg_cnt); + if (ct->CH->SLOT[SLOT1].state != EG_OFF) update_eg_phase(&ct->vol_out1, &ct->CH->SLOT[SLOT1], ct->eg_cnt); + if (ct->CH->SLOT[SLOT2].state != EG_OFF) update_eg_phase(&ct->vol_out2, &ct->CH->SLOT[SLOT2], ct->eg_cnt); + if (ct->CH->SLOT[SLOT3].state != EG_OFF) update_eg_phase(&ct->vol_out3, &ct->CH->SLOT[SLOT3], ct->eg_cnt); + if (ct->CH->SLOT[SLOT4].state != EG_OFF) update_eg_phase(&ct->vol_out4, &ct->CH->SLOT[SLOT4], ct->eg_cnt); } if (ct->pack & 4) continue; /* output disabled */ @@ -1071,7 +1045,7 @@ static void chan_render_loop(chan_rend_context *ct, int *buffer, int length) } else { buffer[scounter] += smp; } - ct->algo = 8; // algo is only used in asm, here only bit3 is used + ct->algo |= 8; } /* update phase counters AFTER output calculations */ diff --git a/pico/sound/ym2612.h b/pico/sound/ym2612.h index f5e98a05..73a36a84 100644 --- a/pico/sound/ym2612.h +++ b/pico/sound/ym2612.h @@ -43,10 +43,16 @@ typedef struct INT16 volume; /* #0x1a envelope counter | need_save */ UINT32 sl; /* #0x1c sustain level:sl_table[SL] */ - UINT32 eg_pack_ar; /* #0x20 (attack state) */ - UINT32 eg_pack_d1r; /* #0x24 (decay state) */ - UINT32 eg_pack_d2r; /* #0x28 (sustain state) */ - UINT32 eg_pack_rr; /* #0x2c (release state) */ + /* asm relies on this order: */ + union { + struct { + UINT32 eg_pack_rr; /* #0x20 1 (release state) */ + UINT32 eg_pack_d2r; /* #0x24 2 (sustain state) */ + UINT32 eg_pack_d1r; /* #0x28 3 (decay state) */ + UINT32 eg_pack_ar; /* #0x2c 4 (attack state) */ + }; + UINT32 eg_pack[4]; + }; } FM_SLOT; diff --git a/pico/sound/ym2612_arm.s b/pico/sound/ym2612_arm.s index 7db31220..9c436d41 100644 --- a/pico/sound/ym2612_arm.s +++ b/pico/sound/ym2612_arm.s @@ -30,103 +30,73 @@ .equiv EG_TIMER_OVERFLOW, (3*(1<= (INT32) SLOT->sl ) + strgeb r3, [r5,#0x17] @ state + b 10f + +4: @ EG_ATT + subs r3, r3, #1 @ eg_inc_val_shift - 1 + mov r2, #0 + mvnpl r2, r0 + mov r2, r2, lsl r3 + add r0, r0, r2, asr #4 cmp r0, #0 @ if (volume <= MIN_ATT_INDEX) movle r3, #EG_DEC strleb r3, [r5,#0x17] @ state movle r0, #0 - b 4f - -1: @ EG_DEC - ldr r2, [r5,#0x24] @ eg_pack_d1r (1ci) - mov r0, r2, lsr #24 - mov r3, r3, lsl r0 - sub r3, r3, #1 - tst r1, r3 - bne 5f @ do smth for tl problem (set on init?) - mov r3, r1, lsr r0 - ldrh r0, [r5,#0x1a] @ volume - and r3, r3, #7 - add r3, r3, r3, lsl #1 - mov r3, r2, lsr r3 - and r3, r3, #7 @ shift for eg_inc calculation - mov r2, #1 - mov r3, r2, lsl r3 - ldr r2, [r5,#0x1c] @ sl (can be 16bit?) - add r0, r0, r3, asr #1 - cmp r0, r2 @ if ( volume >= (INT32) SLOT->sl ) - movge r3, #EG_SUS - strgeb r3, [r5,#0x17] @ state - b 4f + b 10f 2: @ EG_SUS - ldr r2, [r5,#0x28] @ eg_pack_d2r (1ci) - mov r0, r2, lsr #24 - mov r3, r3, lsl r0 - sub r3, r3, #1 - tst r1, r3 - bne 5f @ do smth for tl problem (set on init?) - mov r3, r1, lsr r0 - ldrh r0, [r5,#0x1a] @ volume - and r3, r3, #7 - add r3, r3, r3, lsl #1 - mov r3, r2, lsr r3 - and r3, r3, #7 @ shift for eg_inc calculation - mov r2, #1 - mov r3, r2, lsl r3 - add r0, r0, r3, asr #1 mov r2, #1024 sub r2, r2, #1 @ r2 = MAX_ATT_INDEX cmp r0, r2 @ if ( volume >= MAX_ATT_INDEX ) movge r0, r2 - b 4f + b 10f -3: @ EG_REL - ldr r2, [r5,#0x2c] @ eg_pack_rr (1ci) - mov r0, r2, lsr #24 - mov r3, r3, lsl r0 - sub r3, r3, #1 - tst r1, r3 - bne 5f @ do smth for tl problem (set on init?) - mov r3, r1, lsr r0 - ldrh r0, [r5,#0x1a] @ volume - and r3, r3, #7 - add r3, r3, r3, lsl #1 - mov r3, r2, lsr r3 - and r3, r3, #7 @ shift for eg_inc calculation - mov r2, #1 - mov r3, r2, lsl r3 - add r0, r0, r3, asr #1 +1: @ EG_REL mov r2, #1024 sub r2, r2, #1 @ r2 = MAX_ATT_INDEX cmp r0, r2 @ if ( volume >= MAX_ATT_INDEX ) @@ -134,7 +104,7 @@ movge r3, #EG_OFF strgeb r3, [r5,#0x17] @ state -4: +10: @ finish ldrh r3, [r5,#0x18] @ tl strh r0, [r5,#0x1a] @ volume .if \slot == SLOT1 @@ -157,7 +127,7 @@ orr r7, r0, r7, lsr #16 .endif -5: +0: @ EG_OFF .endm @@ -187,28 +157,30 @@ tstne r12, #(1<<(\slot+8)) .if \slot == SLOT1 mov r1, r6, lsl #16 - mov r1, r1, lsr #17 + mov r1, r1, lsr #16 .elseif \slot == SLOT2 - mov r1, r6, lsr #17 + mov r1, r6, lsr #16 .elseif \slot == SLOT3 mov r1, r7, lsl #16 - mov r1, r1, lsr #17 + mov r1, r1, lsr #16 .elseif \slot == SLOT4 - mov r1, r7, lsr #17 + mov r1, r7, lsr #16 .endif andne r2, r12, #0xc0 movne r2, r2, lsr #6 addne r2, r2, #24 addne r1, r1, r12, lsr r2 + bic r1, r1, #1 .endm +@ \r=sin/result, r1=env, r3=ym_tl_tab .macro lookup_tl r tst \r, #0x100 eorne \r, \r, #0xff @ if (sin & 0x100) sin = 0xff - (sin&0xff); tst \r, #0x200 and \r, \r, #0xff - orr \r, \r, r1, lsl #8 + orr \r, \r, r1, lsl #7 mov \r, \r, lsl #1 ldrh \r, [r3, \r] @ 2ci if ne rsbne \r, \r, #0 @@ -345,9 +317,9 @@ make_eg_out SLOT3 cmp r1, #ENV_QUIET ldr r2, [lr, #0x38] @ mem (for future) - movcs r0, r2 + mov r0, #0 bcs 0f - ldr r0, [lr, #0x18] @ 1ci + ldr r0, [lr, #0x18] @ phase3 mov r0, r0, lsr #16 lookup_tl r0 @ r0=c2 @@ -370,13 +342,13 @@ cmp r1, #ENV_QUIET movcs r2, #0 bcs 2f - ldr r2, [lr, #0x14] + ldr r2, [lr, #0x14] @ phase2 mov r5, r10, lsr #17 add r2, r5, r2, lsr #16 lookup_tl r2 @ r2=mem 2: - str r2, [lr, #0x38] @ mem + str r2, [lr, #0x38] @ mem .endm @@ -541,9 +513,9 @@ movne r0, r0, asr #16 movne r0, r0, lsl r2 - ldr r2, [lr, #0x10] + ldr r2, [lr, #0x10] @ phase1 + add r0, r0, r2 mov r0, r0, lsr #16 - add r0, r0, r2, lsr #16 lookup_tl r0 mov r10,r10,lsl #16 @ ct->op1_out <<= 16; mov r0, r0, lsl #16 @@ -759,11 +731,18 @@ chan_render_loop: crl_loop_lfo: add r0, lr, #0x30 ldmia r0, {r1,r2} + + subs r4, r4, #0x100 + bmi crl_loop_end + add r2, r2, r1 str r2, [lr, #0x30] + @ r12=lfo_ampm[31:16], r1=lfo_cnt_old, r2=lfo_cnt advance_lfo_m + add r4, r4, #0x100 + crl_loop: subs r4, r4, #0x100 bmi crl_loop_end @@ -859,7 +838,6 @@ crl_algo6: crl_algo7: upd_algo7_m - .pool crl_algo_done: @@ -917,6 +895,7 @@ crl_do_phase: crl_loop_end: +@ stmia lr, {r6,r7} @ save volumes (for debug) str r8, [lr, #0x44] @ eg_timer str r12, [lr, #0x4c] @ pack (for lfo_ampm) str r4, [lr, #0x50] @ was_update @@ -925,3 +904,4 @@ crl_loop_end: .pool +@ vim:filetype=armasm From 151df6adf9d63c9b8c8a61946243800610ac3a65 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 7 Oct 2017 01:36:22 +0300 Subject: [PATCH 0080/1110] simple ym2612 fixes --- pico/sound/ym2612.c | 37 +++++++++---------------------------- 1 file changed, 9 insertions(+), 28 deletions(-) diff --git a/pico/sound/ym2612.c b/pico/sound/ym2612.c index 5c52785d..e712278b 100644 --- a/pico/sound/ym2612.c +++ b/pico/sound/ym2612.c @@ -1693,22 +1693,19 @@ int YM2612Write_(unsigned int a, unsigned int v) v &= 0xff; /* adjust to 8 bit bus */ - switch( a&3){ + switch( a & 3 ){ case 0: /* address port 0 */ + case 2: /* address port 1 */ ym2612.OPN.ST.address = v; - ym2612.addr_A1 = 0; - ret=0; + ym2612.addr_A1 = (a & 2) >> 1; + ret = 0; break; - case 1: /* data port 0 */ - if (ym2612.addr_A1 != 0) { - ret=0; - break; /* verified on real YM2608 */ - } + case 1: + case 3: /* data port */ + addr = ym2612.OPN.ST.address | ((int)ym2612.addr_A1 << 8); - addr = ym2612.OPN.ST.address; - - switch( addr & 0xf0 ) + switch( addr & 0x1f0 ) { case 0x20: /* 0x20-0x2f Mode */ switch( addr ) @@ -1721,6 +1718,7 @@ int YM2612Write_(unsigned int a, unsigned int v) else { ym2612.OPN.lfo_inc = 0; + ym2612.OPN.lfo_cnt = 0; } break; #if 0 // handled elsewhere @@ -1790,23 +1788,6 @@ int YM2612Write_(unsigned int a, unsigned int v) ret = OPNWriteReg(addr,v); } break; - - case 2: /* address port 1 */ - ym2612.OPN.ST.address = v; - ym2612.addr_A1 = 1; - ret=0; - break; - - case 3: /* data port 1 */ - if (ym2612.addr_A1 != 1) { - ret=0; - break; /* verified on real YM2608 */ - } - - addr = ym2612.OPN.ST.address | 0x100; - - ret = OPNWriteReg(addr, v); - break; } return ret; From e23f4494fb0b5bc31f8f8a61db88642c33f85516 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 30 Sep 2017 00:17:04 +0300 Subject: [PATCH 0081/1110] state: setup banks before CPUs on load --- pico/state.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pico/state.c b/pico/state.c index 88b8655c..3cd41b2b 100644 --- a/pico/state.c +++ b/pico/state.c @@ -589,6 +589,9 @@ readend: if (PicoAHW & PAHW_32X) Pico32xStateLoaded(1); + if (PicoLoadStateHook != NULL) + PicoLoadStateHook(); + // must unpack 68k and z80 after banks are set up if (!(PicoAHW & PAHW_SMS)) SekUnpackCpu(buff_m68k, 0); @@ -683,8 +686,6 @@ static int pico_state_internal(void *afile, int is_save) ret = state_load_legacy(afile); } - if (PicoLoadStateHook != NULL) - PicoLoadStateHook(); Pico.m.dirtyPal = 1; } From 4f2cdbf551ad1a7f487b65b4754cbf7983e80b8a Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 30 Sep 2017 02:23:13 +0300 Subject: [PATCH 0082/1110] clean up dac code a bit --- pico/debug.c | 26 ++++++------- pico/memory.c | 8 ++-- pico/pico_cmn.c | 13 ++----- pico/pico_int.h | 1 + pico/sms.c | 2 + pico/sound/sound.c | 92 ++++++++++++++++++--------------------------- pico/sound/ym2612.c | 1 + 7 files changed, 62 insertions(+), 81 deletions(-) diff --git a/pico/debug.c b/pico/debug.c index e3ef83be..e780262e 100644 --- a/pico/debug.c +++ b/pico/debug.c @@ -366,31 +366,31 @@ void PDebugZ80Frame(void) } z80_resetCycles(); - emustatus &= ~1; + PsndStartFrame(); - if (Pico.m.z80Run && !Pico.m.z80_reset && (PicoOpt&POPT_EN_Z80)) - PicoSyncZ80(line_sample*488); - if (ym2612.dacen && PsndDacLine <= line_sample) - PsndDoDAC(line_sample); + if (/*Pico.m.z80Run &&*/ !Pico.m.z80_reset && (PicoOpt&POPT_EN_Z80)) + PicoSyncZ80(SekCycleCnt + line_sample * 488); if (PsndOut) PsndGetSamples(line_sample); - if (Pico.m.z80Run && !Pico.m.z80_reset && (PicoOpt&POPT_EN_Z80)) { - PicoSyncZ80(224*488); + if (/*Pico.m.z80Run &&*/ !Pico.m.z80_reset && (PicoOpt&POPT_EN_Z80)) { + PicoSyncZ80(SekCycleCnt + 224 * 488); z80_int(); } - if (ym2612.dacen && PsndDacLine <= 224) - PsndDoDAC(224); if (PsndOut) PsndGetSamples(224); // sync z80 - if (Pico.m.z80Run && !Pico.m.z80_reset && (PicoOpt&POPT_EN_Z80)) - PicoSyncZ80(Pico.m.pal ? 151809 : 127671); // cycles adjusted for converter - if (PsndOut && ym2612.dacen && PsndDacLine <= lines-1) - PsndDoDAC(lines-1); + if (/*Pico.m.z80Run &&*/ !Pico.m.z80_reset && (PicoOpt&POPT_EN_Z80)) { + SekCycleCnt += Pico.m.pal ? 151809 : 127671; // cycles adjusted for converter + PicoSyncZ80(SekCycleCnt); + } + if (PsndOut && ym2612.dacen && PsndDacLine < lines) + PsndDoDAC(lines - 1); + PsndDoPSG(lines - 1); timers_cycle(); + SekCycleAim = SekCycleCnt; } void PDebugCPUStep(void) diff --git a/pico/memory.c b/pico/memory.c index cb1b5ac3..fb5fa9be 100644 --- a/pico/memory.c +++ b/pico/memory.c @@ -932,7 +932,7 @@ static int ym2612_write_local(u32 a, u32 d, int is_from_z80) int scanline = get_scanline(is_from_z80); //elprintf(EL_STATUS, "%03i -> %03i dac w %08x z80 %i", PsndDacLine, scanline, d, is_from_z80); ym2612.dacout = ((int)d - 0x80) << 6; - if (PsndOut && ym2612.dacen && scanline >= PsndDacLine) + if (ym2612.dacen) PsndDoDAC(scanline); return 0; } @@ -1016,8 +1016,10 @@ static int ym2612_write_local(u32 a, u32 d, int is_from_z80) } case 0x2b: { /* DAC Sel (YM2612) */ int scanline = get_scanline(is_from_z80); - ym2612.dacen = d & 0x80; - if (d & 0x80) PsndDacLine = scanline; + if (ym2612.dacen != (d & 0x80)) { + ym2612.dacen = d & 0x80; + PsndDacLine = scanline; + } #ifdef __GP2X__ if (PicoOpt & POPT_EXT_FM) YM2612Write_940(a, d, scanline); #endif diff --git a/pico/pico_cmn.c b/pico/pico_cmn.c index 2de65d02..b39cfdb2 100644 --- a/pico/pico_cmn.c +++ b/pico/pico_cmn.c @@ -85,8 +85,7 @@ static int PicoFrameHints(void) } z80_resetCycles(); - PsndDacLine = 0; - emustatus &= ~1; + PsndStartFrame(); pv->status&=~0x88; // clear V-Int, come out of vblank @@ -145,8 +144,6 @@ static int PicoFrameHints(void) if (Pico.m.z80Run && !Pico.m.z80_reset && (PicoOpt&POPT_EN_Z80)) PicoSyncZ80(cycles); - if (ym2612.dacen && PsndDacLine <= y) - PsndDoDAC(y); #ifdef PICO_CD if (PicoAHW & PAHW_MCD) pcd_sync_s68k(cycles, 0); @@ -230,11 +227,7 @@ static int PicoFrameHints(void) // get samples from sound chips if (y == 224 && PsndOut) - { - if (ym2612.dacen && PsndDacLine <= y) - PsndDoDAC(y); PsndGetSamples(y); - } // Run scanline: CPUS_RUN(CYCLES_M68K_LINE - CYCLES_M68K_VINT_LAG - CYCLES_M68K_ASD); @@ -269,8 +262,8 @@ static int PicoFrameHints(void) cycles = SekCyclesDone(); if (Pico.m.z80Run && !Pico.m.z80_reset && (PicoOpt&POPT_EN_Z80)) PicoSyncZ80(cycles); - if (PsndOut && ym2612.dacen && PsndDacLine <= lines-1) - PsndDoDAC(lines-1); + if (PsndOut && ym2612.dacen && PsndDacLine < lines) + PsndDoDAC(lines - 1); #ifdef PICO_CD if (PicoAHW & PAHW_MCD) diff --git a/pico/pico_int.h b/pico/pico_int.h index 2249148f..551dcbc7 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -820,6 +820,7 @@ PICO_INTERNAL_ASM void wram_1M_to_2M(unsigned char *m); // sound/sound.c PICO_INTERNAL void PsndReset(void); +PICO_INTERNAL void PsndStartFrame(void); PICO_INTERNAL void PsndDoDAC(int line_to); PICO_INTERNAL void PsndClear(void); PICO_INTERNAL void PsndGetSamples(int y); diff --git a/pico/sms.c b/pico/sms.c index 8c44d51f..7dc796ef 100644 --- a/pico/sms.c +++ b/pico/sms.c @@ -258,6 +258,8 @@ void PicoFrameMS(void) int nmi; int y; + PsndStartFrame(); + nmi = (PicoPad[0] >> 7) & 1; if (!Pico.ms.nmi_state && nmi) z80_nmi(); diff --git a/pico/sound/sound.c b/pico/sound/sound.c index 69c1be03..8f88dd7c 100644 --- a/pico/sound/sound.c +++ b/pico/sound/sound.c @@ -14,15 +14,13 @@ #include "../cd/cue.h" #include "mix.h" -#define SIMPLE_WRITE_SOUND 0 - void (*PsndMix_32_to_16l)(short *dest, int *src, int count) = mix_32_to_16l_stereo; // master int buffer to mix to static int PsndBuffer[2*(44100+100)/50]; // dac -static unsigned short dac_info[312+4]; // pppppppp ppppllll, p - pos in buff, l - length to write for this sample +static unsigned short dac_info[312+4]; // pos in sample buffer // cdda output buffer short cdda_out_buffer[2*1152]; @@ -34,6 +32,7 @@ int PsndLen_exc_add=0; // this is for non-integer sample counts per line, eg. 22 int PsndLen_exc_cnt=0; int PsndDacLine=0; short *PsndOut=NULL; // PCM data buffer +static int PsndLen_use; // timers int timer_a_next_oflow, timer_a_step; // in z80 cycles @@ -57,14 +56,12 @@ static void dac_recalculate(void) for(i=226; i != 225; i++) { if (i >= lines) i = 0; - len = 0; if(dac_cnt < 0) { - len=1; pos++; dac_cnt += lines; } dac_cnt -= PsndLen; - dac_info[i] = (pos<<4)|len; + dac_info[i] = pos; } } else @@ -86,24 +83,12 @@ static void dac_recalculate(void) len++; } dac_cnt += PsndLen; - dac_info[i] = (pos<<4)|len; - pos+=len; + pos += len; + dac_info[i] = pos; } - // last sample - for(len = 0, i = pos; i < PsndLen; i++) len++; - if (PsndLen_exc_add) len++; - dac_info[224] = (pos<<4)|len; } - mid = (dac_info[lines-1] & 0xfff0) + ((dac_info[lines-1] & 0xf) << 4); for (i = lines; i < sizeof(dac_info) / sizeof(dac_info[0]); i++) - dac_info[i] = mid; - //for(i=len=0; i < lines; i++) { - // printf("%03i : %03i : %i\n", i, dac_info[i]>>4, dac_info[i]&0xf); - // len+=dac_info[i]&0xf; - //} - //printf("rate is %i, len %f\n", PsndRate, (double)PsndRate/(Pico.m.pal ? 50.0 : 60.0)); - //printf("len total: %i, last pos: %i\n", len, pos); - //exit(8); + dac_info[i] = dac_info[0]; } @@ -163,6 +148,21 @@ void PsndRerate(int preserve_state) } +PICO_INTERNAL void PsndStartFrame(void) +{ + // compensate for float part of PsndLen + PsndLen_use = PsndLen; + PsndLen_exc_cnt += PsndLen_exc_add; + if (PsndLen_exc_cnt >= 0x10000) { + PsndLen_exc_cnt -= 0x10000; + PsndLen_use++; + } + + PsndDacLine = 0; + emustatus &= ~1; + dac_info[224] = PsndLen_use; +} + PICO_INTERNAL void PsndDoDAC(int line_to) { int pos, pos1, len; @@ -172,12 +172,16 @@ PICO_INTERNAL void PsndDoDAC(int line_to) if (line_to >= 312) line_to = 311; + pos = dac_info[line_from]; + pos1 = dac_info[line_to + 1]; + len = pos1 - pos; + if (len <= 0) + return; + PsndDacLine = line_to + 1; - pos =dac_info[line_from]>>4; - pos1=dac_info[line_to]; - len = ((pos1>>4)-pos) + (pos1&0xf); - if (!len) return; + if (!PsndOut) + return; if (PicoOpt & POPT_EN_STEREO) { short *d = PsndOut + pos*2; @@ -260,17 +264,6 @@ static int PsndRender(int offset, int length) pprof_start(sound); -#if !SIMPLE_WRITE_SOUND - if (offset == 0) { // should happen once per frame - // compensate for float part of PsndLen - PsndLen_exc_cnt += PsndLen_exc_add; - if (PsndLen_exc_cnt >= 0x10000) { - PsndLen_exc_cnt -= 0x10000; - length++; - } - } -#endif - // PSG if (PicoOpt & POPT_EN_PSG) SN76496Update(PsndOut+offset, length, stereo); @@ -322,20 +315,16 @@ static int PsndRender(int offset, int length) // to be called on 224 or line_sample scanlines only PICO_INTERNAL void PsndGetSamples(int y) { -#if SIMPLE_WRITE_SOUND - if (y != 224) return; - PsndRender(0, PsndLen); - if (PicoWriteSound) - PicoWriteSound(PsndLen * ((PicoOpt & POPT_EN_STEREO) ? 4 : 2)); - PsndClear(); -#else static int curr_pos = 0; + if (ym2612.dacen && PsndDacLine < y) + PsndDoDAC(y - 1); + if (y == 224) { if (emustatus & 2) curr_pos += PsndRender(curr_pos, PsndLen-PsndLen/2); - else curr_pos = PsndRender(0, PsndLen); + else curr_pos = PsndRender(0, PsndLen_use); if (emustatus & 1) emustatus |= 2; else emustatus &= ~2; @@ -343,28 +332,20 @@ PICO_INTERNAL void PsndGetSamples(int y) PicoWriteSound(curr_pos * ((PicoOpt & POPT_EN_STEREO) ? 4 : 2)); // clear sound buffer PsndClear(); + PsndDacLine = 224; + dac_info[224] = 0; } else if (emustatus & 3) { emustatus|= 2; emustatus&=~1; curr_pos = PsndRender(0, PsndLen/2); } -#endif } PICO_INTERNAL void PsndGetSamplesMS(void) { int stereo = (PicoOpt & 8) >> 3; - int length = PsndLen; - -#if !SIMPLE_WRITE_SOUND - // compensate for float part of PsndLen - PsndLen_exc_cnt += PsndLen_exc_add; - if (PsndLen_exc_cnt >= 0x10000) { - PsndLen_exc_cnt -= 0x10000; - length++; - } -#endif + int length = PsndLen_use; // PSG if (PicoOpt & POPT_EN_PSG) @@ -382,3 +363,4 @@ PICO_INTERNAL void PsndGetSamplesMS(void) PsndClear(); } +// vim:shiftwidth=2:ts=2:expandtab diff --git a/pico/sound/ym2612.c b/pico/sound/ym2612.c index e712278b..0867f558 100644 --- a/pico/sound/ym2612.c +++ b/pico/sound/ym2612.c @@ -1679,6 +1679,7 @@ void YM2612ResetChip_(void) for(i = 0x26 ; i >= 0x20 ; i-- ) OPNWriteReg(i,0); /* DAC mode clear */ ym2612.dacen = 0; + ym2612.dacout = 0; ym2612.addr_A1 = 0; } From 5103774f6de1cd757dcff2fbfe9a2ac845143dd7 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 30 Sep 2017 18:14:20 +0300 Subject: [PATCH 0083/1110] sn76496: simplify writes --- pico/sound/sn76496.c | 120 ++++++++++++++----------------------------- 1 file changed, 39 insertions(+), 81 deletions(-) diff --git a/pico/sound/sn76496.c b/pico/sound/sn76496.c index 8474c703..b2127594 100644 --- a/pico/sound/sn76496.c +++ b/pico/sound/sn76496.c @@ -73,96 +73,54 @@ int *sn76496_regs; void SN76496Write(int data) { struct SN76496 *R = &ono_sn; - int n; - + int n, r, c; /* update the output buffer before changing the registers */ //stream_update(R->Channel,0); + r = R->LastRegister; if (data & 0x80) - { - int r = (data & 0x70) >> 4; - int c = r/2; + r = R->LastRegister = (data & 0x70) >> 4; + c = r / 2; - R->LastRegister = r; - R->Register[r] = (R->Register[r] & 0x3f0) | (data & 0x0f); - switch (r) - { - case 0: /* tone 0 : frequency */ - case 2: /* tone 1 : frequency */ - case 4: /* tone 2 : frequency */ - R->Period[c] = R->UpdateStep * R->Register[r]; - if (R->Period[c] == 0) R->Period[c] = R->UpdateStep; - if (r == 4) - { - /* update noise shift frequency */ - if ((R->Register[6] & 0x03) == 0x03) - R->Period[3] = 2 * R->Period[2]; - } - break; - case 1: /* tone 0 : volume */ - case 3: /* tone 1 : volume */ - case 5: /* tone 2 : volume */ - case 7: /* noise : volume */ - R->Volume[c] = R->VolTable[data & 0x0f]; - break; - case 6: /* noise : frequency, mode */ - { - int n = R->Register[6]; - R->NoiseFB = (n & 4) ? FB_WNOISE : FB_PNOISE; - n &= 3; - /* N/512,N/1024,N/2048,Tone #3 output */ - R->Period[3] = ((n&3) == 3) ? 2 * R->Period[2] : (R->UpdateStep << (5+(n&3))); - - /* reset noise shifter */ - R->RNG = NG_PRESET; - R->Output[3] = R->RNG & 1; - } - break; - } - } + if (!(data & 0x80) && (r == 0 || r == 2 || r == 4)) + // data byte (tone only) + R->Register[r] = (R->Register[r] & 0x0f) | ((data & 0x3f) << 4); else + R->Register[r] = (R->Register[r] & 0x3f0) | (data & 0x0f); + + data = R->Register[r]; + switch (r) { - int r = R->LastRegister; - int c = r/2; + case 0: /* tone 0 : frequency */ + case 2: /* tone 1 : frequency */ + case 4: /* tone 2 : frequency */ + R->Period[c] = R->UpdateStep * data; + if (R->Period[c] == 0) R->Period[c] = R->UpdateStep; + if (r == 4) + { + /* update noise shift frequency */ + if ((R->Register[6] & 0x03) == 0x03) + R->Period[3] = 2 * R->Period[2]; + } + break; + case 1: /* tone 0 : volume */ + case 3: /* tone 1 : volume */ + case 5: /* tone 2 : volume */ + case 7: /* noise : volume */ + R->Volume[c] = R->VolTable[data & 0x0f]; + break; + case 6: /* noise : frequency, mode */ + n = data; + R->NoiseFB = (n & 4) ? FB_WNOISE : FB_PNOISE; + n &= 3; + /* N/512,N/1024,N/2048,Tone #3 output */ + R->Period[3] = (n == 3) ? 2 * R->Period[2] : (R->UpdateStep << (5 + n)); - switch (r) - { - case 0: /* tone 0 : frequency */ - case 2: /* tone 1 : frequency */ - case 4: /* tone 2 : frequency */ - R->Register[r] = (R->Register[r] & 0x0f) | ((data & 0x3f) << 4); - R->Period[c] = R->UpdateStep * R->Register[r]; - if (R->Period[c] == 0) R->Period[c] = R->UpdateStep; - if (r == 4) - { - /* update noise shift frequency */ - if ((R->Register[6] & 0x03) == 0x03) - R->Period[3] = 2 * R->Period[2]; - } - break; - case 1: /* tone 0 : volume */ - case 3: /* tone 1 : volume */ - case 5: /* tone 2 : volume */ - case 7: /* noise : volume */ - R->Volume[c] = R->VolTable[data & 0x0f]; - R->Register[r] = (R->Register[r] & 0x3f0) | (data & 0x0f); - break; - case 6: /* noise : frequency, mode */ - { - R->Register[r] = (R->Register[r] & 0x3f0) | (data & 0x0f); - n = R->Register[6]; - R->NoiseFB = (n & 4) ? FB_WNOISE : FB_PNOISE; - n &= 3; - /* N/512,N/1024,N/2048,Tone #3 output */ - R->Period[3] = ((n&3) == 3) ? 2 * R->Period[2] : (R->UpdateStep << (5+(n&3))); - - /* reset noise shifter */ - R->RNG = NG_PRESET; - R->Output[3] = R->RNG & 1; - } - break; - } + /* reset noise shifter */ + R->RNG = NG_PRESET; + R->Output[3] = R->RNG & 1; + break; } } From 5d638db094e6677240fb4766f2168c7b0791b677 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 30 Sep 2017 22:55:57 +0300 Subject: [PATCH 0084/1110] handle frequent psg updates --- pico/memory.c | 38 +++++++++++++++++++++++++++----------- pico/pico_cmn.c | 2 ++ pico/pico_int.h | 3 ++- pico/sound/sound.c | 44 +++++++++++++++++++++++++++++++++++--------- 4 files changed, 66 insertions(+), 21 deletions(-) diff --git a/pico/memory.c b/pico/memory.c index fb5fa9be..0907696a 100644 --- a/pico/memory.c +++ b/pico/memory.c @@ -385,6 +385,28 @@ void NOINLINE ctl_write_z80reset(u32 d) } } +static int get_scanline(int is_from_z80); + +static void psg_write_68k(u32 d) +{ + // look for volume write and update if needed + if ((d & 0x90) == 0x90 && PsndPsgLine < Pico.m.scanline) + PsndDoPSG(Pico.m.scanline); + + SN76496Write(d); +} + +static void psg_write_z80(u32 d) +{ + if ((d & 0x90) == 0x90) { + int scanline = get_scanline(1); + if (PsndPsgLine < scanline) + PsndDoPSG(scanline); + } + + SN76496Write(d); +} + // ----------------------------------------------------------------- #ifndef _ASM_MEMORY_C @@ -528,8 +550,7 @@ static void PicoWrite8_z80(u32 a, u32 d) } // TODO: probably other VDP access too? Maybe more mirrors? if ((a & 0x7ff9) == 0x7f11) { // PSG Sound - if (PicoOpt & POPT_EN_PSG) - SN76496Write(d); + psg_write_68k(d); return; } if ((a & 0x7f00) == 0x6000) // Z80 BANK register @@ -703,8 +724,7 @@ static u32 PicoRead16_vdp(u32 a) static void PicoWrite8_vdp(u32 a, u32 d) { if ((a & 0x00f9) == 0x0011) { // PSG Sound - if (PicoOpt & POPT_EN_PSG) - SN76496Write(d); + psg_write_68k(d); return; } if ((a & 0x00e0) == 0x0000) { @@ -718,11 +738,8 @@ static void PicoWrite8_vdp(u32 a, u32 d) static void PicoWrite16_vdp(u32 a, u32 d) { - if ((a & 0x00f9) == 0x0010) { // PSG Sound - if (PicoOpt & POPT_EN_PSG) - SN76496Write(d); - return; - } + if ((a & 0x00f9) == 0x0010) // PSG Sound + psg_write_68k(d); if ((a & 0x00e0) == 0x0000) { PicoVideoWrite(a, d); return; @@ -1198,8 +1215,7 @@ static void z80_md_vdp_br_write(unsigned int a, unsigned char data) { if ((a&0xfff9) == 0x7f11) // 7f11 7f13 7f15 7f17 { - if (PicoOpt & POPT_EN_PSG) - SN76496Write(data); + psg_write_z80(data); return; } // at least VDP data writes hang my machine diff --git a/pico/pico_cmn.c b/pico/pico_cmn.c index b39cfdb2..78fdd120 100644 --- a/pico/pico_cmn.c +++ b/pico/pico_cmn.c @@ -264,6 +264,8 @@ static int PicoFrameHints(void) PicoSyncZ80(cycles); if (PsndOut && ym2612.dacen && PsndDacLine < lines) PsndDoDAC(lines - 1); + if (PsndOut && PsndPsgLine < lines) + PsndDoPSG(lines - 1); #ifdef PICO_CD if (PicoAHW & PAHW_MCD) diff --git a/pico/pico_int.h b/pico/pico_int.h index 551dcbc7..12769315 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -822,10 +822,11 @@ PICO_INTERNAL_ASM void wram_1M_to_2M(unsigned char *m); PICO_INTERNAL void PsndReset(void); PICO_INTERNAL void PsndStartFrame(void); PICO_INTERNAL void PsndDoDAC(int line_to); +PICO_INTERNAL void PsndDoPSG(int line_to); PICO_INTERNAL void PsndClear(void); PICO_INTERNAL void PsndGetSamples(int y); PICO_INTERNAL void PsndGetSamplesMS(void); -extern int PsndDacLine; +extern int PsndDacLine, PsndPsgLine; // sms.c #ifndef NO_SMS diff --git a/pico/sound/sound.c b/pico/sound/sound.c index 8f88dd7c..510a9aba 100644 --- a/pico/sound/sound.c +++ b/pico/sound/sound.c @@ -19,7 +19,7 @@ void (*PsndMix_32_to_16l)(short *dest, int *src, int count) = mix_32_to_16l_ster // master int buffer to mix to static int PsndBuffer[2*(44100+100)/50]; -// dac +// dac, psg static unsigned short dac_info[312+4]; // pos in sample buffer // cdda output buffer @@ -30,7 +30,7 @@ int PsndRate=0; int PsndLen=0; // number of mono samples, multiply by 2 for stereo int PsndLen_exc_add=0; // this is for non-integer sample counts per line, eg. 22050/60 int PsndLen_exc_cnt=0; -int PsndDacLine=0; +int PsndDacLine, PsndPsgLine; short *PsndOut=NULL; // PCM data buffer static int PsndLen_use; @@ -158,7 +158,7 @@ PICO_INTERNAL void PsndStartFrame(void) PsndLen_use++; } - PsndDacLine = 0; + PsndDacLine = PsndPsgLine = 0; emustatus &= ~1; dac_info[224] = PsndLen_use; } @@ -185,13 +185,42 @@ PICO_INTERNAL void PsndDoDAC(int line_to) if (PicoOpt & POPT_EN_STEREO) { short *d = PsndOut + pos*2; - for (; len > 0; len--, d+=2) *d = dout; + for (; len > 0; len--, d+=2) *d += dout; } else { short *d = PsndOut + pos; - for (; len > 0; len--, d++) *d = dout; + for (; len > 0; len--, d++) *d += dout; } } +PICO_INTERNAL void PsndDoPSG(int line_to) +{ + int line_from = PsndPsgLine; + int pos, pos1, len; + int stereo = 0; + + if (line_to >= 312) + line_to = 311; + + pos = dac_info[line_from]; + pos1 = dac_info[line_to + 1]; + len = pos1 - pos; + //elprintf(EL_STATUS, "%3d %3d %3d %3d %3d", + // pos, pos1, len, line_from, line_to); + if (len <= 0) + return; + + PsndPsgLine = line_to + 1; + + if (!PsndOut || !(PicoOpt & POPT_EN_PSG)) + return; + + if (PicoOpt & POPT_EN_STEREO) { + stereo = 1; + pos <<= 1; + } + SN76496Update(PsndOut + pos, len, stereo); +} + // cdda static void cdda_raw_update(int *buffer, int length) { @@ -264,10 +293,6 @@ static int PsndRender(int offset, int length) pprof_start(sound); - // PSG - if (PicoOpt & POPT_EN_PSG) - SN76496Update(PsndOut+offset, length, stereo); - if (PicoAHW & PAHW_PICO) { PicoPicoPCMUpdate(PsndOut+offset, length, stereo); return length; @@ -319,6 +344,7 @@ PICO_INTERNAL void PsndGetSamples(int y) if (ym2612.dacen && PsndDacLine < y) PsndDoDAC(y - 1); + PsndDoPSG(y - 1); if (y == 224) { From 12f89605e36fe5e901b28bf85f246faa128cbdfe Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 1 Oct 2017 00:03:54 +0300 Subject: [PATCH 0085/1110] improve fast forward for cd still broken, but a little bit better --- platform/common/emu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/platform/common/emu.c b/platform/common/emu.c index 6f42b73d..0a1b59c0 100644 --- a/platform/common/emu.c +++ b/platform/common/emu.c @@ -973,6 +973,9 @@ void emu_set_fastforward(int set_on) currentConfig.EmuOpt = set_EmuOpt; PsndRerate(1); is_on = 0; + // mainly to unbreak pcm + if (PicoAHW & PAHW_MCD) + pcd_state_loaded(); } } From 75b84e4b7c446cf42a2838834b5d50a4059709fe Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 1 Oct 2017 00:59:44 +0300 Subject: [PATCH 0086/1110] slightly better z80 vdp reads --- pico/memory.c | 32 +++++++++++++++--- pico/memory_amips.s | 2 +- pico/pico_int.h | 7 +++- pico/videoport.c | 79 ++++++++++++++++++++++++++------------------- 4 files changed, 79 insertions(+), 41 deletions(-) diff --git a/pico/memory.c b/pico/memory.c index 0907696a..34ef99d6 100644 --- a/pico/memory.c +++ b/pico/memory.c @@ -703,10 +703,21 @@ void PicoWrite16_io(u32 a, u32 d) // VDP area (0xc00000 - 0xdfffff) // TODO: verify if lower byte goes to PSG on word writes -static u32 PicoRead8_vdp(u32 a) +u32 PicoRead8_vdp(u32 a) { - if ((a & 0x00e0) == 0x0000) - return PicoVideoRead8(a); + if ((a & 0x00f0) == 0x0000) { + switch (a & 0x0d) + { + case 0x00: return PicoVideoRead8DataH(); + case 0x01: return PicoVideoRead8DataL(); + case 0x04: return PicoVideoRead8CtlH(); + case 0x05: return PicoVideoRead8CtlL(); + case 0x08: + case 0x0c: return PicoVideoRead8HV_H(); + case 0x09: + case 0x0d: return PicoVideoRead8HV_L(); + } + } elprintf(EL_UIO|EL_ANOMALY, "68k bad read [%06x] @%06x", a, SekPc); return 0; @@ -1184,8 +1195,19 @@ void PicoWrite16_32x(u32 a, u32 d) {} static unsigned char z80_md_vdp_read(unsigned short a) { - if ((a & 0x00e0) == 0x0000) - return PicoVideoRead8(a); // FIXME: depends on 68k cycles + if ((a & 0x00f0) == 0x0000) { + switch (a & 0x0d) + { + case 0x00: return PicoVideoRead8DataH(); + case 0x01: return PicoVideoRead8DataL(); + case 0x04: return PicoVideoRead8CtlH(); + case 0x05: return PicoVideoRead8CtlL(); + case 0x08: + case 0x0c: return get_scanline(1); // FIXME: make it proper + case 0x09: + case 0x0d: return Pico.m.rotate++; + } + } elprintf(EL_ANOMALY, "z80 invalid r8 [%06x] %02x", a, 0xff); return 0xff; diff --git a/pico/memory_amips.s b/pico/memory_amips.s index 4f09198f..b11c2e28 100644 --- a/pico/memory_amips.s +++ b/pico/memory_amips.s @@ -468,7 +468,7 @@ m_read8_vdp: or $t0, $t1 bnez $t0, m_read_null # invalid address nop - j PicoVideoRead8 + j PicoRead8_vdp nop m_read8_ram: diff --git a/pico/pico_int.h b/pico/pico_int.h index 12769315..97b7b0e4 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -793,7 +793,12 @@ void ym2612_unpack_state(void); extern int line_base_cycles; PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d); PICO_INTERNAL_ASM unsigned int PicoVideoRead(unsigned int a); -PICO_INTERNAL_ASM unsigned int PicoVideoRead8(unsigned int a); +unsigned char PicoVideoRead8DataH(void); +unsigned char PicoVideoRead8DataL(void); +unsigned char PicoVideoRead8CtlH(void); +unsigned char PicoVideoRead8CtlL(void); +unsigned char PicoVideoRead8HV_H(void); +unsigned char PicoVideoRead8HV_L(void); extern int (*PicoDmaHook)(unsigned int source, int len, unsigned short **base, unsigned int *mask); // misc.c diff --git a/pico/videoport.c b/pico/videoport.c index fe96139f..0af4bbcc 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -563,43 +563,54 @@ PICO_INTERNAL_ASM unsigned int PicoVideoRead(unsigned int a) return 0; } -unsigned int PicoVideoRead8(unsigned int a) +unsigned char PicoVideoRead8DataH(void) { - unsigned int d; - a&=0x1d; + return VideoRead() >> 8; +} - switch (a) - { - case 0: return VideoRead() >> 8; - case 1: return VideoRead() & 0xff; - case 4: // control port/status reg - d = Pico.video.status >> 8; - if (d&1) Pico.video.status&=~0x100; // FIFO no longer full - Pico.video.pending = 0; - elprintf(EL_SR, "SR read (h): %02x @ %06x", d, SekPc); - return d; - case 5: - d = Pico.video.status & 0xff; - //if (PicoOpt&POPT_ALT_RENDERER) d|=0x0020; // sprite collision (Shadow of the Beast) - d |= ((Pico.video.reg[1]&0x40)^0x40) >> 3; // set V-Blank if display is disabled - d |= (Pico.video.pending_ints&0x20)<<2; // V-int pending? - if (SekCyclesDone() - line_base_cycles >= 488-88) d |= 4; // H-Blank - Pico.video.pending = 0; - elprintf(EL_SR, "SR read (l): %02x @ %06x", d, SekPc); - return d; - case 8: // hv counter - elprintf(EL_HVCNT, "vcounter: %02x (%i) @ %06x", Pico.video.v_counter, SekCyclesDone(), SekPc); - return Pico.video.v_counter; - case 9: - d = (SekCyclesDone() - line_base_cycles) & 0x1ff; // FIXME - if (Pico.video.reg[12]&1) - d = hcounts_40[d]; - else d = hcounts_32[d]; - elprintf(EL_HVCNT, "hcounter: %02x (%i) @ %06x", d, SekCyclesDone(), SekPc); - return d; - } +unsigned char PicoVideoRead8DataL(void) +{ + return VideoRead(); +} - return 0; +// FIXME: broken mess +unsigned char PicoVideoRead8CtlH(void) +{ + u8 d = (u8)(Pico.video.status >> 8); + if (d & 1) + Pico.video.status &= ~0x100; // FIFO no longer full + Pico.video.pending = 0; + elprintf(EL_SR, "SR read (h): %02x @ %06x", d, SekPc); + return d; +} + +unsigned char PicoVideoRead8CtlL(void) +{ + u8 d = (u8)Pico.video.status; + //if (PicoOpt&POPT_ALT_RENDERER) d|=0x0020; // sprite collision (Shadow of the Beast) + d |= ((Pico.video.reg[1]&0x40)^0x40) >> 3; // set V-Blank if display is disabled + d |= (Pico.video.pending_ints&0x20)<<2; // V-int pending? + if (SekCyclesDone() - line_base_cycles >= 488-88) d |= 4; // H-Blank + Pico.video.pending = 0; + elprintf(EL_SR, "SR read (l): %02x @ %06x", d, SekPc); + return d; +} + +unsigned char PicoVideoRead8HV_H(void) +{ + elprintf(EL_HVCNT, "vcounter: %02x (%i) @ %06x", Pico.video.v_counter, SekCyclesDone(), SekPc); + return Pico.video.v_counter; +} + +// FIXME: broken +unsigned char PicoVideoRead8HV_L(void) +{ + u32 d = (SekCyclesDone() - line_base_cycles) & 0x1ff; // FIXME + if (Pico.video.reg[12]&1) + d = hcounts_40[d]; + else d = hcounts_32[d]; + elprintf(EL_HVCNT, "hcounter: %02x (%i) @ %06x", d, SekCyclesDone(), SekPc); + return d; } // vim:shiftwidth=2:ts=2:expandtab From d1b8bcc63466f6982685df518db8fef399f9cf91 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 1 Oct 2017 01:29:08 +0300 Subject: [PATCH 0087/1110] some poor timing improvement attempts --- pico/memory.c | 8 ++++++-- pico/pico_int.h | 2 ++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/pico/memory.c b/pico/memory.c index 34ef99d6..3900208b 100644 --- a/pico/memory.c +++ b/pico/memory.c @@ -1195,6 +1195,8 @@ void PicoWrite16_32x(u32 a, u32 d) {} static unsigned char z80_md_vdp_read(unsigned short a) { + z80_subCLeft(2); + if ((a & 0x00f0) == 0x0000) { switch (a & 0x0d) { @@ -1218,8 +1220,10 @@ static unsigned char z80_md_bank_read(unsigned short a) unsigned int addr68k; unsigned char ret; - addr68k = Pico.m.z80_bank68k<<15; - addr68k += a & 0x7fff; + z80_subCLeft(3); + + addr68k = Pico.m.z80_bank68k << 15; + addr68k |= a & 0x7fff; ret = m68k_read8(addr68k); diff --git a/pico/pico_int.h b/pico/pico_int.h index 97b7b0e4..f439d6af 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -185,6 +185,7 @@ extern struct DrZ80 drZ80; #define z80_nmi() drZ80.Z80IF |= 8 #define z80_cyclesLeft drZ80.cycles +#define z80_subCLeft(c) drZ80.cycles -= c #define z80_pc() (drZ80.Z80PC - drZ80.Z80PC_BASE) #elif defined(_USE_CZ80) @@ -196,6 +197,7 @@ extern struct DrZ80 drZ80; #define z80_nmi() Cz80_Set_IRQ(&CZ80, IRQ_LINE_NMI, 0) #define z80_cyclesLeft (CZ80.ICount - CZ80.ExtraCycles) +#define z80_subCLeft(c) CZ80.ICount -= c #define z80_pc() Cz80_Get_Reg(&CZ80, CZ80_PC) #else From 8f80007bcc94faae7c369d4ed8a07a9d35c31c72 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 1 Oct 2017 02:42:24 +0300 Subject: [PATCH 0088/1110] fix underalloc --- pico/sek.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pico/sek.c b/pico/sek.c index 8bf0341b..51765287 100644 --- a/pico/sek.c +++ b/pico/sek.c @@ -302,7 +302,8 @@ void SekRegisterIdleHit(unsigned int pc) void SekInitIdleDet(void) { - unsigned short **tmp = realloc(idledet_ptrs, 0x200*4); + unsigned short **tmp; + tmp = realloc(idledet_ptrs, 0x200 * sizeof(tmp[0])); if (tmp == NULL) { free(idledet_ptrs); idledet_ptrs = NULL; @@ -410,7 +411,8 @@ int SekRegisterIdlePatch(unsigned int pc, int oldop, int newop, void *ctx) } if (idledet_count >= 0x200 && (idledet_count & 0x1ff) == 0) { - unsigned short **tmp = realloc(idledet_ptrs, (idledet_count+0x200)*4); + unsigned short **tmp; + tmp = realloc(idledet_ptrs, (idledet_count+0x200) * sizeof(tmp[0])); if (tmp == NULL) return 1; idledet_ptrs = tmp; From e42a47e2086e6512519dd86af420363498302f49 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 2 Oct 2017 23:38:12 +0300 Subject: [PATCH 0089/1110] some more risky timing changes But should be closer to the real thing. Let's see what this will break... --- pico/cd/mcd.c | 2 +- pico/debug.c | 2 +- pico/draw.c | 7 +-- pico/memory.c | 1 - pico/pico.c | 30 +++++------ pico/pico/pico.c | 2 +- pico/pico_cmn.c | 132 +++++++++++++++++++++++++++++---------------- pico/pico_int.h | 23 ++++++-- pico/sound/sound.c | 10 ++-- pico/state.c | 1 + 10 files changed, 129 insertions(+), 81 deletions(-) diff --git a/pico/cd/mcd.c b/pico/cd/mcd.c index 687dfc3c..6d23604c 100644 --- a/pico/cd/mcd.c +++ b/pico/cd/mcd.c @@ -146,7 +146,7 @@ static void pcd_set_cycle_mult(void) { // ~1.63 for NTSC, ~1.645 for PAL if (Pico.m.pal) - mcd_m68k_cycle_mult = ((12500000ull << 16) / (50*312*488)); + mcd_m68k_cycle_mult = ((12500000ull << 16) / (50*313*488)); else mcd_m68k_cycle_mult = ((12500000ull << 16) / (60*262*488)) + 1; } diff --git a/pico/debug.c b/pico/debug.c index e780262e..af5241c1 100644 --- a/pico/debug.c +++ b/pico/debug.c @@ -358,7 +358,7 @@ void PDebugZ80Frame(void) return; if (Pico.m.pal) { - lines = 312; + lines = 313; line_sample = 68; } else { lines = 262; diff --git a/pico/draw.c b/pico/draw.c index 5e8d86b4..e2e4fb49 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -1553,13 +1553,14 @@ void PicoDrawSync(int to, int blank_last_line) pprof_start(draw); - if (rendlines != 240) + if (rendlines != 240) { offs = 8; + if (to > 223) + to = 223; + } for (line = Pico.est.DrawScanline; line < to; line++) - { PicoLine(line, offs, sh, bgc); - } // last line if (line <= to) diff --git a/pico/memory.c b/pico/memory.c index 3900208b..5d1d25f7 100644 --- a/pico/memory.c +++ b/pico/memory.c @@ -539,7 +539,6 @@ static void PicoWrite8_z80(u32 a, u32 d) } if ((a & 0x4000) == 0x0000) { // z80 RAM - SekCyclesBurnRun(2); // FIXME hack Pico.zram[a & 0x1fff] = (u8)d; return; } diff --git a/pico/pico.c b/pico/pico.c index 23f7efb2..f0c54d17 100644 --- a/pico/pico.c +++ b/pico/pico.c @@ -22,7 +22,6 @@ int PicoAutoRgnOrder; struct PicoSRAM SRam; int emustatus; // rapid_ym2612, multi_ym_updates -int scanlines_total; void (*PicoWriteSound)(int len) = NULL; // called at the best time to send sound buffer (PsndOut) to hardware void (*PicoResetHook)(void) = NULL; @@ -227,29 +226,24 @@ void PicoLoopPrepare(void) // force setting possibly changed.. Pico.m.pal = (PicoRegionOverride == 2 || PicoRegionOverride == 8) ? 1 : 0; - // FIXME: PAL has 313 scanlines.. - scanlines_total = Pico.m.pal ? 312 : 262; - Pico.m.dirtyPal = 1; rendstatus_old = -1; } - -// dma2vram settings are just hacks to unglitch Legend of Galahad (needs <= 104 to work) -// same for Outrunners (92-121, when active is set to 24) -// 96 is VR hack +// this table is wrong and should be removed +// keeping it for now to compensate wrong timing elswhere, mainly for Outrunners static const int dma_timings[] = { - 167, 167, 166, 83, // vblank: 32cell: dma2vram dma2[vs|c]ram vram_fill vram_copy - 102, 205, 204, 102, // vblank: 40cell: - 16, 16, 15, 8, // active: 32cell: - 24, 18, 17, 9 // ... + 83, 166, 83, 83, // vblank: 32cell: dma2vram dma2[vs|c]ram vram_fill vram_copy + 102, 204, 102, 102, // vblank: 40cell: + 8, 16, 8, 8, // active: 32cell: + 17, 18, 9, 9 // ... }; static const int dma_bsycles[] = { - (488<<8)/167, (488<<8)/167, (488<<8)/166, (488<<8)/83, - (488<<8)/102, (488<<8)/233, (488<<8)/204, (488<<8)/102, - (488<<8)/16, (488<<8)/16, (488<<8)/15, (488<<8)/8, - (488<<8)/24, (488<<8)/18, (488<<8)/17, (488<<8)/9 + (488<<8)/83, (488<<8)/166, (488<<8)/83, (488<<8)/83, + (488<<8)/102, (488<<8)/204, (488<<8)/102, (488<<8)/102, + (488<<8)/8, (488<<8)/16, (488<<8)/8, (488<<8)/8, + (488<<8)/9, (488<<8)/18, (488<<8)/9, (488<<8)/9 }; // grossly inaccurate.. FIXME FIXXXMEE @@ -304,8 +298,8 @@ PICO_INTERNAL void PicoSyncZ80(unsigned int m68k_cycles_done) pprof_start(z80); elprintf(EL_BUSREQ, "z80 sync %i (%u|%u -> %u|%u)", cnt, - z80_cycle_cnt, z80_cycle_cnt / 288, - z80_cycle_aim, z80_cycle_aim / 288); + z80_cycle_cnt, z80_cycle_cnt / 228, + z80_cycle_aim, z80_cycle_aim / 228); if (cnt > 0) z80_cycle_cnt += z80_run(cnt); diff --git a/pico/pico/pico.c b/pico/pico/pico.c index bff5a2a4..d893f9df 100644 --- a/pico/pico/pico.c +++ b/pico/pico/pico.c @@ -23,7 +23,7 @@ PICO_INTERNAL void PicoReratePico(void) { int rate = guessed_rates[PicoPicohw.r12 & 7]; if (Pico.m.pal) - fifo_bytes_line = (rate<<16)/50/312/2; + fifo_bytes_line = (rate<<16)/50/313/2; else fifo_bytes_line = (rate<<16)/60/262/2; PicoPicoPCMRerate(rate); } diff --git a/pico/pico_cmn.c b/pico/pico_cmn.c index 78fdd120..b44bfba2 100644 --- a/pico/pico_cmn.c +++ b/pico/pico_cmn.c @@ -8,7 +8,6 @@ #define CYCLES_M68K_LINE 488 // suitable for both PAL/NTSC #define CYCLES_M68K_VINT_LAG 68 -#define CYCLES_M68K_ASD 148 // pad delay (for 6 button pads) #define PAD_DELAY() { \ @@ -53,13 +52,28 @@ static void SekSyncM68k(void) static inline void SekRunM68k(int cyc) { SekCycleAim += cyc; + cyc = SekCycleAim - SekCycleCnt; + if (cyc <= 0) + return; + SekCycleCnt += cyc >> 6; // refresh slowdowns SekSyncM68k(); } +static void do_hint(struct PicoVideo *pv) +{ + pv->pending_ints |= 0x10; + if (pv->reg[0] & 0x10) { + elprintf(EL_INTS, "hint: @ %06x [%u]", SekPc, SekCyclesDone()); + SekInterrupt(4); + } +} + static int PicoFrameHints(void) { - struct PicoVideo *pv=&Pico.video; - int lines, y, lines_vis = 224, line_sample, skip, vcnt_wrap; + struct PicoVideo *pv = &Pico.video; + int line_sample = Pico.m.pal ? 68 : 93; + int lines, y, lines_vis, skip; + int vcnt_wrap, vcnt_adj; unsigned int cycles; int hint; // Hint counter @@ -77,25 +91,15 @@ static int PicoFrameHints(void) } else skip=PicoSkipFrame; - if (Pico.m.pal) { - line_sample = 68; - if (pv->reg[1]&8) lines_vis = 240; - } else { - line_sample = 93; - } - z80_resetCycles(); PsndStartFrame(); - pv->status&=~0x88; // clear V-Int, come out of vblank + // Load H-Int counter + hint = (pv->status & PVS_ACTIVE) ? pv->hint_cnt : pv->reg[10]; - hint=pv->reg[10]; // Load H-Int counter - //dprintf("-hint: %i", hint); + pv->status |= PVS_ACTIVE; - // This is to make active scan longer (needed for Double Dragon 2, mainly) - CPUS_RUN(CYCLES_M68K_ASD); - - for (y = 0; y < lines_vis; y++) + for (y = 0; ; y++) { pv->v_counter = Pico.m.scanline = y; if ((pv->reg[12]&6) == 6) { // interlace mode 2 @@ -104,24 +108,23 @@ static int PicoFrameHints(void) pv->v_counter &= 0xff; } + if ((y == 224 && !(pv->reg[1] & 8)) || y == 240) + break; + // VDP FIFO pv->lwrite_cnt -= 12; if (pv->lwrite_cnt <= 0) { - pv->lwrite_cnt=0; - Pico.video.status|=0x200; + pv->lwrite_cnt = 0; + Pico.video.status |= SR_EMPT; } PAD_DELAY(); // H-Interrupts: - if (--hint < 0) // y <= lines_vis: Comix Zone, Golden Axe + if (--hint < 0) { - hint=pv->reg[10]; // Reload H-Int counter - pv->pending_ints|=0x10; - if (pv->reg[0]&0x10) { - elprintf(EL_INTS, "hint: @ %06x [%i]", SekPc, SekCyclesDone()); - SekInterrupt(4); - } + hint = pv->reg[10]; // Reload H-Int counter + do_hint(pv); } // decide if we draw this line @@ -163,6 +166,10 @@ static int PicoFrameHints(void) pevt_log_m68k_o(EVT_NEXT_LINE); } + lines_vis = (pv->reg[1] & 8) ? 240 : 224; + if (y == lines_vis) + pv->status &= ~PVS_ACTIVE; + if (!skip) { if (Pico.est.DrawScanline < y) @@ -172,29 +179,22 @@ static int PicoFrameHints(void) #endif } - // V-int line (224 or 240) - Pico.m.scanline = y; - pv->v_counter = 0xe0; // bad for 240 mode - if ((pv->reg[12]&6) == 6) pv->v_counter = 0xc1; - // VDP FIFO - pv->lwrite_cnt=0; - Pico.video.status|=0x200; + pv->lwrite_cnt = 0; + Pico.video.status |= SR_EMPT; memcpy(PicoPadInt, PicoPad, sizeof(PicoPadInt)); PAD_DELAY(); - // Last H-Int: + // Last H-Int (normally): if (--hint < 0) { - hint=pv->reg[10]; // Reload H-Int counter - pv->pending_ints|=0x10; - //printf("rhint: %i @ %06x [%i|%i]\n", hint, SekPc, y, SekCyclesDone()); - if (pv->reg[0]&0x10) SekInterrupt(4); + hint = pv->reg[10]; // Reload H-Int counter + do_hint(pv); } - pv->status|=0x08; // go into vblank - pv->pending_ints|=0x20; + pv->status |= SR_VB; // go into vblank + pv->pending_ints |= 0x20; // the following SekRun is there for several reasons: // there must be a delay after vblank bit is set and irq is asserted (Mazin Saga) @@ -204,8 +204,8 @@ static int PicoFrameHints(void) if (Pico.m.dma_xfers) SekCyclesBurn(CheckDMA()); CPUS_RUN(CYCLES_M68K_VINT_LAG); - if (pv->reg[1]&0x20) { - elprintf(EL_INTS, "vint: @ %06x [%i]", SekPc, SekCyclesDone()); + if (pv->reg[1] & 0x20) { + elprintf(EL_INTS, "vint: @ %06x [%u]", SekPc, SekCyclesDone()); SekInterrupt(6); } @@ -230,25 +230,39 @@ static int PicoFrameHints(void) PsndGetSamples(y); // Run scanline: - CPUS_RUN(CYCLES_M68K_LINE - CYCLES_M68K_VINT_LAG - CYCLES_M68K_ASD); + CPUS_RUN(CYCLES_M68K_LINE - CYCLES_M68K_VINT_LAG); if (PicoLineHook) PicoLineHook(); pevt_log_m68k_o(EVT_NEXT_LINE); - lines = scanlines_total; - vcnt_wrap = Pico.m.pal ? 0x103 : 0xEB; // based on Gens, TODO: verify + if (Pico.m.pal) { + lines = 313; + vcnt_wrap = 0x103; + vcnt_adj = 57; + } + else { + lines = 262; + vcnt_wrap = 0xEB; + vcnt_adj = 6; + } - for (y++; y < lines; y++) + for (y++; y < lines - 1; y++) { pv->v_counter = Pico.m.scanline = y; if (y >= vcnt_wrap) - pv->v_counter -= Pico.m.pal ? 56 : 6; + pv->v_counter -= vcnt_adj; if ((pv->reg[12]&6) == 6) pv->v_counter = (pv->v_counter << 1) | 1; pv->v_counter &= 0xff; PAD_DELAY(); + if ((pv->status & PVS_ACTIVE) && --hint < 0) + { + hint = pv->reg[10]; // Reload H-Int counter + do_hint(pv); + } + // Run scanline: line_base_cycles = SekCyclesDone(); if (Pico.m.dma_xfers) SekCyclesBurn(CheckDMA()); @@ -258,6 +272,28 @@ static int PicoFrameHints(void) pevt_log_m68k_o(EVT_NEXT_LINE); } + pv->status &= ~SR_VB; + + // last scanline + Pico.m.scanline = y; + pv->v_counter = 0xff; + + PAD_DELAY(); + + if ((pv->status & PVS_ACTIVE) && --hint < 0) + { + hint = pv->reg[10]; // Reload H-Int counter + do_hint(pv); + } + + // Run scanline: + line_base_cycles = SekCyclesDone(); + if (Pico.m.dma_xfers) SekCyclesBurn(CheckDMA()); + CPUS_RUN(CYCLES_M68K_LINE); + + if (PicoLineHook) PicoLineHook(); + pevt_log_m68k_o(EVT_NEXT_LINE); + // sync cpus cycles = SekCyclesDone(); if (Pico.m.z80Run && !Pico.m.z80_reset && (PicoOpt&POPT_EN_Z80)) @@ -276,6 +312,8 @@ static int PicoFrameHints(void) #endif timers_cycle(); + pv->hint_cnt = hint; + return 0; } diff --git a/pico/pico_int.h b/pico/pico_int.h index f439d6af..743ecb48 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -283,6 +283,7 @@ extern SH2 sh2s[2]; #define OSC_NTSC 53693100 #define OSC_PAL 53203424 +// PicoVideo.debug_p #define PVD_KILL_A (1 << 0) #define PVD_KILL_B (1 << 1) #define PVD_KILL_S_LO (1 << 2) @@ -292,6 +293,20 @@ extern SH2 sh2s[2]; #define PVD_FORCE_B (1 << 6) #define PVD_FORCE_S (1 << 7) +// PicoVideo.status, not part of real SR +#define SR_PAL (1 << 0) +#define SR_DMA (1 << 1) +#define SR_HB (1 << 2) +#define SR_VB (1 << 3) +#define SR_ODD (1 << 4) +#define SR_C (1 << 5) +#define SR_SOVR (1 << 6) +#define SR_F (1 << 7) +#define SR_FULL (1 << 8) +#define SR_EMPT (1 << 9) +// not part of real SR +#define PVS_ACTIVE (1 << 16) + struct PicoVideo { unsigned char reg[0x20]; @@ -299,14 +314,15 @@ struct PicoVideo unsigned char pending; // 1 if waiting for second half of 32-bit command unsigned char type; // Command type (v/c/vsram read/write) unsigned short addr; // Read/Write address - int status; // Status bits + unsigned int status; // Status bits (SR) and extra flags unsigned char pending_ints; // pending interrupts: ??VH???? signed char lwrite_cnt; // VDP write count during active display line unsigned short v_counter; // V-counter unsigned short debug; // raw debug register unsigned char debug_p; // ... parsed: PVD_* - unsigned char addr_u; - unsigned char pad[0x0c]; + unsigned char addr_u; // bit16 of .addr + unsigned char hint_cnt; + unsigned char pad[0x0b]; }; struct PicoMisc @@ -684,7 +700,6 @@ extern struct Pico Pico; extern struct PicoSRAM SRam; extern int PicoPadInt[2]; extern int emustatus; -extern int scanlines_total; extern void (*PicoResetHook)(void); extern void (*PicoLineHook)(void); PICO_INTERNAL int CheckDMA(void); diff --git a/pico/sound/sound.c b/pico/sound/sound.c index 510a9aba..a67ebcca 100644 --- a/pico/sound/sound.c +++ b/pico/sound/sound.c @@ -44,7 +44,7 @@ extern int *sn76496_regs; static void dac_recalculate(void) { - int i, dac_cnt, pos, len, lines = Pico.m.pal ? 312 : 262, mid = Pico.m.pal ? 68 : 93; + int i, dac_cnt, pos, len, lines = Pico.m.pal ? 313 : 262, mid = Pico.m.pal ? 68 : 93; if (PsndLen <= lines) { @@ -169,8 +169,8 @@ PICO_INTERNAL void PsndDoDAC(int line_to) int dout = ym2612.dacout; int line_from = PsndDacLine; - if (line_to >= 312) - line_to = 311; + if (line_to >= 313) + line_to = 312; pos = dac_info[line_from]; pos1 = dac_info[line_to + 1]; @@ -198,8 +198,8 @@ PICO_INTERNAL void PsndDoPSG(int line_to) int pos, pos1, len; int stereo = 0; - if (line_to >= 312) - line_to = 311; + if (line_to >= 313) + line_to = 312; pos = dac_info[line_from]; pos1 = dac_info[line_to + 1]; diff --git a/pico/state.c b/pico/state.c index 3cd41b2b..f4e685fb 100644 --- a/pico/state.c +++ b/pico/state.c @@ -687,6 +687,7 @@ static int pico_state_internal(void *afile, int is_save) } Pico.m.dirtyPal = 1; + Pico.video.status &= ~(SR_VB | SR_F); } return ret; From 9db6a54485501b56b0f2f5db4d093c38fe495bda Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 3 Oct 2017 23:00:39 +0300 Subject: [PATCH 0090/1110] menu: add reset hotkey --- platform/common/config_file.c | 2 +- platform/common/emu.c | 2 ++ platform/common/input_pico.h | 4 +++- platform/common/menu_pico.c | 1 + platform/common/plat_sdl.c | 1 + platform/pandora/plat.c | 1 + 6 files changed, 9 insertions(+), 2 deletions(-) diff --git a/platform/common/config_file.c b/platform/common/config_file.c index 97369cb4..e66d4e1c 100644 --- a/platform/common/config_file.c +++ b/platform/common/config_file.c @@ -417,7 +417,7 @@ static void keys_parse_all(FILE *f) acts = parse_bind_val(val, &type); if (acts == -1) { lprintf("config: unhandled action \"%s\"\n", val); - return; + continue; } mystrip(var + 5); diff --git a/platform/common/emu.c b/platform/common/emu.c index 0a1b59c0..6a8c940b 100644 --- a/platform/common/emu.c +++ b/platform/common/emu.c @@ -1141,6 +1141,8 @@ static void run_events_ui(unsigned int which) emu_status_msg("SAVE SLOT %i [%s]", state_slot, emu_check_save_file(state_slot, NULL) ? "USED" : "FREE"); } + if (which & PEV_RESET) + emu_reset_game(); if (which & PEV_MENU) engineState = PGS_Menu; } diff --git a/platform/common/input_pico.h b/platform/common/input_pico.h index c2730b52..c0501d33 100644 --- a/platform/common/input_pico.h +++ b/platform/common/input_pico.h @@ -28,6 +28,7 @@ #define PEVB_PICO_PNEXT 21 #define PEVB_PICO_PPREV 20 #define PEVB_PICO_SWINP 19 +#define PEVB_RESET 18 #define PEV_VOL_DOWN (1 << PEVB_VOL_DOWN) #define PEV_VOL_UP (1 << PEVB_VOL_UP) @@ -41,7 +42,8 @@ #define PEV_PICO_PNEXT (1 << PEVB_PICO_PNEXT) #define PEV_PICO_PPREV (1 << PEVB_PICO_PPREV) #define PEV_PICO_SWINP (1 << PEVB_PICO_SWINP) +#define PEV_RESET (1 << PEVB_RESET) -#define PEV_MASK 0x7ff80000 +#define PEV_MASK 0x7ffc0000 #endif /* INCLUDE_c48097f3ff2a6a9af1cce8fd7a9b3f0c */ diff --git a/platform/common/menu_pico.c b/platform/common/menu_pico.c index cc116382..f928f054 100644 --- a/platform/common/menu_pico.c +++ b/platform/common/menu_pico.c @@ -312,6 +312,7 @@ me_bind_action emuctrl_actions[] = { "Volume Down ", PEV_VOL_DOWN }, { "Volume Up ", PEV_VOL_UP }, { "Fast forward ", PEV_FF }, + { "Reset Game ", PEV_RESET }, { "Enter Menu ", PEV_MENU }, { "Pico Next page ", PEV_PICO_PNEXT }, { "Pico Prev page ", PEV_PICO_PPREV }, diff --git a/platform/common/plat_sdl.c b/platform/common/plat_sdl.c index 3387b952..1b617d5a 100644 --- a/platform/common/plat_sdl.c +++ b/platform/common/plat_sdl.c @@ -35,6 +35,7 @@ const struct in_default_bind in_sdl_defbinds[] __attribute__((weak)) = { { SDLK_RETURN, IN_BINDTYPE_PLAYER12, GBTN_START }, { SDLK_f, IN_BINDTYPE_PLAYER12, GBTN_MODE }, { SDLK_ESCAPE, IN_BINDTYPE_EMU, PEVB_MENU }, + { SDLK_TAB, IN_BINDTYPE_EMU, PEVB_RESET }, { SDLK_F1, IN_BINDTYPE_EMU, PEVB_STATE_SAVE }, { SDLK_F2, IN_BINDTYPE_EMU, PEVB_STATE_LOAD }, { SDLK_F3, IN_BINDTYPE_EMU, PEVB_SSLOT_PREV }, diff --git a/platform/pandora/plat.c b/platform/pandora/plat.c index 8f8eac6c..9cd5fc5b 100644 --- a/platform/pandora/plat.c +++ b/platform/pandora/plat.c @@ -74,6 +74,7 @@ static struct in_default_bind in_evdev_defbinds[] = { KEY_S, IN_BINDTYPE_PLAYER12, GBTN_B }, { KEY_D, IN_BINDTYPE_PLAYER12, GBTN_C }, { KEY_ENTER, IN_BINDTYPE_PLAYER12, GBTN_START }, + { KEY_R, IN_BINDTYPE_EMU, PEVB_RESET }, { KEY_F, IN_BINDTYPE_EMU, PEVB_FF }, { KEY_BACKSPACE,IN_BINDTYPE_EMU, PEVB_FF }, { KEY_BACKSLASH,IN_BINDTYPE_EMU, PEVB_MENU }, From ebd70cb5d9b32eb6548f92e03639db5c0683100f Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 3 Oct 2017 23:02:47 +0300 Subject: [PATCH 0091/1110] fix various logging issues --- pico/carthw/svp/memory.c | 2 +- pico/debug.c | 23 ++++++++++++++++++++--- pico/memory.c | 8 ++++---- pico/sek.c | 12 ++++++------ pico/videoport.c | 14 +++++++------- 5 files changed, 38 insertions(+), 21 deletions(-) diff --git a/pico/carthw/svp/memory.c b/pico/carthw/svp/memory.c index b37985e9..c7216112 100644 --- a/pico/carthw/svp/memory.c +++ b/pico/carthw/svp/memory.c @@ -87,7 +87,7 @@ static u32 PicoRead16_svpr(u32 a) a15004_looping = 0; if (!a15004_looping) - elprintf(EL_SVP, "SVP r%i: [%06x] %04x @%06x", realsize, a, d, SekPc); + elprintf(EL_SVP, "SVP r: [%06x] %04x @%06x", a, d, SekPc); if (a == 0xa15004 && !(d&1)) { if (!a15004_looping) diff --git a/pico/debug.c b/pico/debug.c index af5241c1..eb775bf6 100644 --- a/pico/debug.c +++ b/pico/debug.c @@ -8,6 +8,7 @@ #include "pico_int.h" #include "sound/ym2612.h" +#include "memory.h" #include "debug.h" #define bit(r, x) ((r>>x)&1) @@ -42,7 +43,7 @@ char *PDebugMain(void) !!(SRam.flags & SRF_ENABLED), !!(SRam.flags & SRF_EEPROM), SRam.eeprom_type); MVP; sprintf(dstrp, "sram range: %06x-%06x, reg: %02x\n", SRam.start, SRam.end, Pico.m.sram_reg); MVP; sprintf(dstrp, "pend int: v:%i, h:%i, vdp status: %04x\n", bit(pv->pending_ints,5), bit(pv->pending_ints,4), pv->status); MVP; - sprintf(dstrp, "pal: %i, hw: %02x, frame#: %i, cycles: %i\n", Pico.m.pal, Pico.m.hardware, Pico.m.frame_count, SekCyclesDone()); MVP; + sprintf(dstrp, "pal: %i, hw: %02x, frame#: %i, cycles: %u\n", Pico.m.pal, Pico.m.hardware, Pico.m.frame_count, SekCyclesDone()); MVP; sprintf(dstrp, "M68k: PC: %06x, SR: %04x, irql: %i\n", SekPc, SekSr, SekIrqLevel); MVP; for (r = 0; r < 8; r++) { sprintf(dstrp, "d%i=%08x, a%i=%08x\n", r, SekDar(r), r, SekDar(r+8)); MVP; @@ -279,7 +280,7 @@ void PDebugShowSprite(unsigned short *screen, int stride, int which) pvid->debug_p = olddbg; } -#define dump_ram(ram,fname) \ +#define dump_ram_m(ram,fname,mode) \ { \ unsigned short *sram = (unsigned short *) ram; \ FILE *f; \ @@ -287,7 +288,7 @@ void PDebugShowSprite(unsigned short *screen, int stride, int which) \ for (i = 0; i < sizeof(ram)/2; i++) \ sram[i] = (sram[i]<<8) | (sram[i]>>8); \ - f = fopen(fname, "wb"); \ + f = fopen(fname, mode); \ if (f) { \ fwrite(ram, 1, sizeof(ram), f); \ fclose(f); \ @@ -296,6 +297,9 @@ void PDebugShowSprite(unsigned short *screen, int stride, int which) sram[i] = (sram[i]<<8) | (sram[i]>>8); \ } +#define dump_ram(ram,fname) \ + dump_ram_m(ram,fname,"wb") + #define dump_ram_noswab(ram,fname) \ { \ FILE *f; \ @@ -308,6 +312,19 @@ void PDebugShowSprite(unsigned short *screen, int stride, int which) void PDebugDumpMem(void) { +#if 0 + char buf[1 << M68K_MEM_SHIFT]; + unsigned int a; + for (a = 0; ; a++) { + uptr v = m68k_read16_map[a]; + if (map_flag_set(v)) + break; + v <<= 1; + v += a << M68K_MEM_SHIFT; + memcpy(buf, (void *)v, sizeof(buf)); + dump_ram_m(buf, "dumps/cart.bin", a ? "ab" : "wb"); + } +#endif dump_ram_noswab(Pico.zram, "dumps/zram.bin"); dump_ram(Pico.cram, "dumps/cram.bin"); diff --git a/pico/memory.c b/pico/memory.c index 5d1d25f7..858ab65f 100644 --- a/pico/memory.c +++ b/pico/memory.c @@ -341,7 +341,7 @@ static int z80_cycles_from_68k(void) void NOINLINE ctl_write_z80busreq(u32 d) { d&=1; d^=1; - elprintf(EL_BUSREQ, "set_zrun: %i->%i [%i] @%06x", Pico.m.z80Run, d, SekCyclesDone(), SekPc); + elprintf(EL_BUSREQ, "set_zrun: %i->%i [%u] @%06x", Pico.m.z80Run, d, SekCyclesDone(), SekPc); if (d ^ Pico.m.z80Run) { if (d) @@ -363,7 +363,7 @@ void NOINLINE ctl_write_z80busreq(u32 d) void NOINLINE ctl_write_z80reset(u32 d) { d&=1; d^=1; - elprintf(EL_BUSREQ, "set_zreset: %i->%i [%i] @%06x", Pico.m.z80_reset, d, SekCyclesDone(), SekPc); + elprintf(EL_BUSREQ, "set_zreset: %i->%i [%u] @%06x", Pico.m.z80_reset, d, SekCyclesDone(), SekPc); if (d ^ Pico.m.z80_reset) { if (d) @@ -593,7 +593,7 @@ u32 PicoRead8_io(u32 a) if ((a & 0xff01) == 0x1100) { // z80 busreq (verified) d |= (Pico.m.z80Run | Pico.m.z80_reset) & 1; - elprintf(EL_BUSREQ, "get_zrun: %02x [%i] @%06x", d, SekCyclesDone(), SekPc); + elprintf(EL_BUSREQ, "get_zrun: %02x [%u] @%06x", d, SekCyclesDone(), SekPc); } goto end; } @@ -628,7 +628,7 @@ u32 PicoRead16_io(u32 a) if ((a & 0xff00) == 0x1100) { // z80 busreq d |= ((Pico.m.z80Run | Pico.m.z80_reset) & 1) << 8; - elprintf(EL_BUSREQ, "get_zrun: %04x [%i] @%06x", d, SekCyclesDone(), SekPc); + elprintf(EL_BUSREQ, "get_zrun: %04x [%u] @%06x", d, SekCyclesDone(), SekPc); } goto end; } diff --git a/pico/sek.c b/pico/sek.c index 51765287..c91374bb 100644 --- a/pico/sek.c +++ b/pico/sek.c @@ -36,8 +36,8 @@ M68K_CONTEXT PicoCpuFM68k; static int SekIntAck(int level) { // try to emulate VDP's reaction to 68000 int ack - if (level == 4) { Pico.video.pending_ints = 0; elprintf(EL_INTS, "hack: @ %06x [%i]", SekPc, SekCycleCnt); } - else if(level == 6) { Pico.video.pending_ints &= ~0x20; elprintf(EL_INTS, "vack: @ %06x [%i]", SekPc, SekCycleCnt); } + if (level == 4) { Pico.video.pending_ints = 0; elprintf(EL_INTS, "hack: @ %06x [%u]", SekPc, SekCycleCnt); } + else if(level == 6) { Pico.video.pending_ints &= ~0x20; elprintf(EL_INTS, "vack: @ %06x [%u]", SekPc, SekCycleCnt); } PicoCpuCM68k.irq = 0; return CYCLONE_INT_ACK_AUTOVECTOR; } @@ -76,8 +76,8 @@ static int SekUnrecognizedOpcode() #ifdef EMU_M68K static int SekIntAckM68K(int level) { - if (level == 4) { Pico.video.pending_ints = 0; elprintf(EL_INTS, "hack: @ %06x [%i]", SekPc, SekCycleCnt); } - else if(level == 6) { Pico.video.pending_ints &= ~0x20; elprintf(EL_INTS, "vack: @ %06x [%i]", SekPc, SekCycleCnt); } + if (level == 4) { Pico.video.pending_ints = 0; elprintf(EL_INTS, "hack: @ %06x [%u]", SekPc, SekCycleCnt); } + else if(level == 6) { Pico.video.pending_ints &= ~0x20; elprintf(EL_INTS, "vack: @ %06x [%u]", SekPc, SekCycleCnt); } CPU_INT_LEVEL = 0; return M68K_INT_ACK_AUTOVECTOR; } @@ -94,11 +94,11 @@ static void SekIntAckF68K(unsigned level) { if (level == 4) { Pico.video.pending_ints = 0; - elprintf(EL_INTS, "hack: @ %06x [%i]", SekPc, SekCyclesDone()); + elprintf(EL_INTS, "hack: @ %06x [%u]", SekPc, SekCyclesDone()); } else if(level == 6) { Pico.video.pending_ints &= ~0x20; - elprintf(EL_INTS, "vack: @ %06x [%i]", SekPc, SekCyclesDone()); + elprintf(EL_INTS, "vack: @ %06x [%u]", SekPc, SekCyclesDone()); } PicoCpuFM68k.interrupts[0] = 0; } diff --git a/pico/videoport.c b/pico/videoport.c index 0af4bbcc..1f57f2d4 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -223,7 +223,7 @@ static void DmaCopy(int len) unsigned char *vr = (unsigned char *) Pico.vram; unsigned char inc=Pico.video.reg[0xf]; int source; - elprintf(EL_VDPDMA, "DmaCopy len %i [%i]", len, SekCyclesDone()); + elprintf(EL_VDPDMA, "DmaCopy len %i [%u]", len, SekCyclesDone()); Pico.m.dma_xfers += len; if (Pico.m.dma_xfers < len) @@ -254,7 +254,7 @@ static NOINLINE void DmaFill(int data) int len, l; len = GetDmaLength(); - elprintf(EL_VDPDMA, "DmaFill len %i inc %i [%i]", len, inc, SekCyclesDone()); + elprintf(EL_VDPDMA, "DmaFill len %i inc %i [%u]", len, inc, SekCyclesDone()); Pico.m.dma_xfers += len; if (Pico.m.dma_xfers < len) // lame 16bit var @@ -430,11 +430,11 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) switch (num) { case 0x00: - elprintf(EL_INTSW, "hint_onoff: %i->%i [%i] pend=%i @ %06x", (dold&0x10)>>4, + elprintf(EL_INTSW, "hint_onoff: %i->%i [%u] pend=%i @ %06x", (dold&0x10)>>4, (d&0x10)>>4, SekCyclesDone(), (pvid->pending_ints&0x10)>>4, SekPc); goto update_irq; case 0x01: - elprintf(EL_INTSW, "vint_onoff: %i->%i [%i] pend=%i @ %06x", (dold&0x20)>>5, + elprintf(EL_INTSW, "vint_onoff: %i->%i [%u] pend=%i @ %06x", (dold&0x20)>>5, (d&0x20)>>5, SekCyclesDone(), (pvid->pending_ints&0x20)>>5, SekPc); goto update_irq; case 0x05: @@ -551,7 +551,7 @@ PICO_INTERNAL_ASM unsigned int PicoVideoRead(unsigned int a) d = hcounts_40[d]; else d = hcounts_32[d]; - elprintf(EL_HVCNT, "hv: %02x %02x (%i) @ %06x", d, Pico.video.v_counter, SekCyclesDone(), SekPc); + elprintf(EL_HVCNT, "hv: %02x %02x [%u] @ %06x", d, Pico.video.v_counter, SekCyclesDone(), SekPc); return d | (Pico.video.v_counter << 8); } @@ -598,7 +598,7 @@ unsigned char PicoVideoRead8CtlL(void) unsigned char PicoVideoRead8HV_H(void) { - elprintf(EL_HVCNT, "vcounter: %02x (%i) @ %06x", Pico.video.v_counter, SekCyclesDone(), SekPc); + elprintf(EL_HVCNT, "vcounter: %02x [%u] @ %06x", Pico.video.v_counter, SekCyclesDone(), SekPc); return Pico.video.v_counter; } @@ -609,7 +609,7 @@ unsigned char PicoVideoRead8HV_L(void) if (Pico.video.reg[12]&1) d = hcounts_40[d]; else d = hcounts_32[d]; - elprintf(EL_HVCNT, "hcounter: %02x (%i) @ %06x", d, SekCyclesDone(), SekPc); + elprintf(EL_HVCNT, "hcounter: %02x [%u] @ %06x", d, SekCyclesDone(), SekPc); return d; } From 3162a7104cbb9c1046a3d780dfc74bbc684bdc5b Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 4 Oct 2017 02:23:27 +0300 Subject: [PATCH 0092/1110] adjust z80 timing a bit --- pico/memory.c | 18 +++++++++--------- pico/pico.c | 21 ++++++++------------- pico/pico_cmn.c | 3 ++- pico/pico_int.h | 23 +++++++++++++---------- 4 files changed, 32 insertions(+), 33 deletions(-) diff --git a/pico/memory.c b/pico/memory.c index 858ab65f..436f5453 100644 --- a/pico/memory.c +++ b/pico/memory.c @@ -331,11 +331,10 @@ NOINLINE void io_ports_write(u32 a, u32 d) Pico.ioports[a] = d; } -// lame.. static int z80_cycles_from_68k(void) { - return z80_cycle_aim - + cycles_68k_to_z80(SekCyclesDone() - last_z80_sync); + int m68k_cnt = SekCyclesDone() - timing.m68c_frame_start; + return cycles_68k_to_z80(m68k_cnt); } void NOINLINE ctl_write_z80busreq(u32 d) @@ -346,7 +345,7 @@ void NOINLINE ctl_write_z80busreq(u32 d) { if (d) { - z80_cycle_cnt = z80_cycles_from_68k(); + timing.z80c_cnt = z80_cycles_from_68k() + 2; } else { @@ -378,7 +377,7 @@ void NOINLINE ctl_write_z80reset(u32 d) } else { - z80_cycle_cnt = z80_cycles_from_68k(); + timing.z80c_cnt = z80_cycles_from_68k() + 2; z80_reset(); } Pico.m.z80_reset = d; @@ -896,10 +895,11 @@ static void m68k_mem_setup(void) static int get_scanline(int is_from_z80) { if (is_from_z80) { - int cycles = z80_cyclesDone(); - while (cycles - z80_scanline_cycles >= 228) - z80_scanline++, z80_scanline_cycles += 228; - return z80_scanline; + int mclk_z80 = z80_cyclesDone() * 15; + int mclk_line = timing.z80_scanline * 488 * 7; + while (mclk_z80 - mclk_line >= 488 * 7) + timing.z80_scanline++, mclk_line += 488 * 7; + return timing.z80_scanline; } return Pico.m.scanline; diff --git a/pico/pico.c b/pico/pico.c index f0c54d17..0404367c 100644 --- a/pico/pico.c +++ b/pico/pico.c @@ -23,6 +23,8 @@ int PicoAutoRgnOrder; struct PicoSRAM SRam; int emustatus; // rapid_ym2612, multi_ym_updates +struct PicoTiming timing; + void (*PicoWriteSound)(int len) = NULL; // called at the best time to send sound buffer (PsndOut) to hardware void (*PicoResetHook)(void) = NULL; void (*PicoLineHook)(void) = NULL; @@ -278,31 +280,24 @@ PICO_INTERNAL int CheckDMA(void) #include "pico_cmn.c" -unsigned int last_z80_sync; /* in 68k cycles */ -int z80_cycle_cnt; -int z80_cycle_aim; -int z80_scanline; -int z80_scanline_cycles; /* cycles done until z80_scanline */ - /* sync z80 to 68k */ PICO_INTERNAL void PicoSyncZ80(unsigned int m68k_cycles_done) { int m68k_cnt; int cnt; - m68k_cnt = m68k_cycles_done - last_z80_sync; - z80_cycle_aim += cycles_68k_to_z80(m68k_cnt); - cnt = z80_cycle_aim - z80_cycle_cnt; - last_z80_sync = m68k_cycles_done; + m68k_cnt = m68k_cycles_done - timing.m68c_frame_start; + timing.z80c_aim = cycles_68k_to_z80(m68k_cnt); + cnt = timing.z80c_aim - timing.z80c_cnt; pprof_start(z80); elprintf(EL_BUSREQ, "z80 sync %i (%u|%u -> %u|%u)", cnt, - z80_cycle_cnt, z80_cycle_cnt / 228, - z80_cycle_aim, z80_cycle_aim / 228); + timing.z80c_cnt, timing.z80c_cnt * 15 / 7 / 488, + timing.z80c_aim, timing.z80c_aim * 15 / 7 / 488); if (cnt > 0) - z80_cycle_cnt += z80_run(cnt); + timing.z80c_cnt += z80_run(cnt); pprof_end(z80); } diff --git a/pico/pico_cmn.c b/pico/pico_cmn.c index b44bfba2..01d57a72 100644 --- a/pico/pico_cmn.c +++ b/pico/pico_cmn.c @@ -78,7 +78,6 @@ static int PicoFrameHints(void) int hint; // Hint counter pevt_log_m68k_o(EVT_FRAME_START); - pv->v_counter = Pico.m.scanline = 0; if ((PicoOpt&POPT_ALT_RENDERER) && !PicoSkipFrame && (pv->reg[1]&0x40)) { // fast rend., display enabled // draw a frame just after vblank in alternative render mode @@ -91,6 +90,8 @@ static int PicoFrameHints(void) } else skip=PicoSkipFrame; + timing.m68c_frame_start = SekCyclesDone(); + pv->v_counter = Pico.m.scanline = 0; z80_resetCycles(); PsndStartFrame(); diff --git a/pico/pico_int.h b/pico/pico_int.h index 743ecb48..89cccc79 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -211,20 +211,13 @@ extern struct DrZ80 drZ80; #define Z80_STATE_SIZE 0x60 -extern unsigned int last_z80_sync; -extern int z80_cycle_cnt; /* 'done' z80 cycles before z80_run() */ -extern int z80_cycle_aim; -extern int z80_scanline; -extern int z80_scanline_cycles; /* cycles done until z80_scanline */ - #define z80_resetCycles() \ - last_z80_sync = SekCyclesDone(); \ - z80_cycle_cnt = z80_cycle_aim = z80_scanline = z80_scanline_cycles = 0; + timing.z80c_cnt = timing.z80c_aim = timing.z80_scanline = 0 #define z80_cyclesDone() \ - (z80_cycle_aim - z80_cyclesLeft) + (timing.z80c_aim - z80_cyclesLeft) -#define cycles_68k_to_z80(x) ((x)*957 >> 11) +#define cycles_68k_to_z80(x) ((x) * 3823 >> 13) // ----------------------- SH2 CPU ----------------------- @@ -599,6 +592,15 @@ struct Pico32xMem unsigned short pwm_fifo[2][4]; // [0] - current raw, others - fifo entries }; +struct PicoTiming +{ + unsigned int m68c_frame_start; // m68k cycles + unsigned int z80c_cnt; // z80 cycles done (this frame) + unsigned int z80c_aim; + int z80_scanline; +}; +extern struct PicoTiming timing; + // area.c extern void (*PicoLoadStateHook)(void); @@ -1092,3 +1094,4 @@ void pevt_dump(void); #endif // PICO_INTERNAL_INCLUDED +// vim:shiftwidth=2:ts=2:expandtab From 334f00e24ed55a3e259d1a9d42c4530a770b9a56 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 4 Oct 2017 02:58:48 +0300 Subject: [PATCH 0093/1110] vdp timing hacks --- pico/pico_cmn.c | 34 +++++++++++++++++++++++----------- pico/videoport.c | 12 +++++------- 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/pico/pico_cmn.c b/pico/pico_cmn.c index 01d57a72..0907b930 100644 --- a/pico/pico_cmn.c +++ b/pico/pico_cmn.c @@ -68,10 +68,28 @@ static void do_hint(struct PicoVideo *pv) } } +static void do_timing_hacks_as(struct PicoVideo *pv, int vdp_slots) +{ + pv->lwrite_cnt += vdp_slots - Pico.m.dma_xfers * 2; // wrong *2 + if (pv->lwrite_cnt > vdp_slots) + pv->lwrite_cnt = vdp_slots; + else if (pv->lwrite_cnt < 0) + pv->lwrite_cnt = 0; + if (Pico.m.dma_xfers) + SekCyclesBurn(CheckDMA()); +} + +static void do_timing_hacks_vb(void) +{ + if (Pico.m.dma_xfers) + SekCyclesBurn(CheckDMA()); +} + static int PicoFrameHints(void) { struct PicoVideo *pv = &Pico.video; int line_sample = Pico.m.pal ? 68 : 93; + int vdp_slots = (Pico.video.reg[12] & 1) ? 18 : 16; int lines, y, lines_vis, skip; int vcnt_wrap, vcnt_adj; unsigned int cycles; @@ -112,13 +130,6 @@ static int PicoFrameHints(void) if ((y == 224 && !(pv->reg[1] & 8)) || y == 240) break; - // VDP FIFO - pv->lwrite_cnt -= 12; - if (pv->lwrite_cnt <= 0) { - pv->lwrite_cnt = 0; - Pico.video.status |= SR_EMPT; - } - PAD_DELAY(); // H-Interrupts: @@ -160,7 +171,7 @@ static int PicoFrameHints(void) // Run scanline: line_base_cycles = SekCyclesDone(); - if (Pico.m.dma_xfers) SekCyclesBurn(CheckDMA()); + do_timing_hacks_as(pv, vdp_slots); CPUS_RUN(CYCLES_M68K_LINE); if (PicoLineHook) PicoLineHook(); @@ -202,7 +213,7 @@ static int PicoFrameHints(void) // also delay between F bit (bit 7) is set in SR and IRQ happens (Ex-Mutants) // also delay between last H-int and V-int (Golden Axe 3) line_base_cycles = SekCyclesDone(); - if (Pico.m.dma_xfers) SekCyclesBurn(CheckDMA()); + do_timing_hacks_vb(); CPUS_RUN(CYCLES_M68K_VINT_LAG); if (pv->reg[1] & 0x20) { @@ -266,7 +277,7 @@ static int PicoFrameHints(void) // Run scanline: line_base_cycles = SekCyclesDone(); - if (Pico.m.dma_xfers) SekCyclesBurn(CheckDMA()); + do_timing_hacks_vb(); CPUS_RUN(CYCLES_M68K_LINE); if (PicoLineHook) PicoLineHook(); @@ -278,6 +289,7 @@ static int PicoFrameHints(void) // last scanline Pico.m.scanline = y; pv->v_counter = 0xff; + pv->lwrite_cnt = 0; PAD_DELAY(); @@ -289,7 +301,7 @@ static int PicoFrameHints(void) // Run scanline: line_base_cycles = SekCyclesDone(); - if (Pico.m.dma_xfers) SekCyclesBurn(CheckDMA()); + do_timing_hacks_as(pv, vdp_slots); CPUS_RUN(CYCLES_M68K_LINE); if (PicoLineHook) PicoLineHook(); diff --git a/pico/videoport.c b/pico/videoport.c index 1f57f2d4..4da3ce3a 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -377,14 +377,12 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) } // preliminary FIFO emulation for Chaos Engine, The (E) - if (!(pvid->status&8) && (pvid->reg[1]&0x40) && !(PicoOpt&POPT_DIS_VDP_FIFO)) // active display? + if (!(pvid->status & SR_VB) && (pvid->reg[1] & 0x40) && !(PicoOpt&POPT_DIS_VDP_FIFO)) // active display? { - pvid->status&=~0x200; // FIFO no longer empty - pvid->lwrite_cnt++; - if (pvid->lwrite_cnt >= 4) pvid->status|=0x100; // FIFO full - if (pvid->lwrite_cnt > 4) { - SekCyclesBurnRun(32); // penalty // 488/12-8 - } + int use = pvid->type == 1 ? 2 : 1; + pvid->lwrite_cnt -= use; + if (pvid->lwrite_cnt < 0) + SekCyclesLeft = 0; elprintf(EL_ASVDP, "VDP data write: %04x [%06x] {%i} #%i @ %06x", d, Pico.video.addr, Pico.video.type, pvid->lwrite_cnt, SekPc); } From 4aedc593008ca6e9230b700cec6483c3ecd73bef Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 4 Oct 2017 23:48:57 +0300 Subject: [PATCH 0094/1110] drop unmaintained mips code can always be brought back if anyone has proof it's faster --- pico/draw.c | 7 - pico/draw_amips.s | 1756 --------------------------------------------- 2 files changed, 1763 deletions(-) delete mode 100644 pico/draw_amips.s diff --git a/pico/draw.c b/pico/draw.c index e2e4fb49..5ecbca80 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -129,19 +129,12 @@ static void funcname(int sx, unsigned int pack, int pal) \ } -#ifdef _ASM_DRAW_C_AMIPS -int TileNorm(int sx, unsigned int pack, int pal); -int TileFlip(int sx, unsigned int pack, int pal); -#else - #define pix_just_write(x) \ if (t) pd[x]=pal|t TileNormMaker(TileNorm,pix_just_write) TileFlipMaker(TileFlip,pix_just_write) -#endif - #ifndef _ASM_DRAW_C // draw a sprite pixel, process operator colors diff --git a/pico/draw_amips.s b/pico/draw_amips.s deleted file mode 100644 index fa7906ee..00000000 --- a/pico/draw_amips.s +++ /dev/null @@ -1,1756 +0,0 @@ -#* -#* several drawing related functions for Allegrex MIPS -#* (C) notaz, 2007-2008 -#* -#* This work is licensed under the terms of MAME license. -#* See COPYING file in the top-level directory. -#* -#* this is highly specialized, be careful if changing related C code! -#* - -.set noreorder # don't reorder any instructions -.set noat # don't use $at - -.text -.align 4 - -# void amips_clut(unsigned short *dst, unsigned char *src, unsigned short *pal, int count) - -.global amips_clut - -amips_clut: - srl $a3, 2 -amips_clut_loop: - lbu $t0, 0($a1) # tried lw here, no improvement noticed - lbu $t1, 1($a1) - lbu $t2, 2($a1) - lbu $t3, 3($a1) - sll $t0, 1 - sll $t1, 1 - sll $t2, 1 - sll $t3, 1 - addu $t0, $a2 - addu $t1, $a2 - addu $t2, $a2 - addu $t3, $a2 - lhu $t0, 0($t0) - lhu $t1, 0($t1) - lhu $t2, 0($t2) - lhu $t3, 0($t3) - ins $t0, $t1, 16, 16 # ins rt, rs, pos, size - Insert size bits starting - ins $t2, $t3, 16, 16 # from the LSB of rs into rt starting at position pos - sw $t0, 0($a0) - sw $t2, 4($a0) - addiu $a0, 8 - addiu $a3, -1 - bnez $a3, amips_clut_loop - addiu $a1, 4 - jr $ra - nop - - -.global amips_clut_6bit - -amips_clut_6bit: - srl $a3, 2 - li $t4, 0 - li $t5, 0 - li $t6, 0 - li $t7, 0 -amips_clut_loop6: - lbu $t0, 0($a1) # tried lw here, no improvement noticed - lbu $t1, 1($a1) - lbu $t2, 2($a1) - lbu $t3, 3($a1) - ins $t4, $t0, 1, 6 - ins $t5, $t1, 1, 6 - ins $t6, $t2, 1, 6 - ins $t7, $t3, 1, 6 - addu $t0, $t4, $a2 - addu $t1, $t5, $a2 - addu $t2, $t6, $a2 - addu $t3, $t7, $a2 - lhu $t0, 0($t0) - lhu $t1, 0($t1) - lhu $t2, 0($t2) - lhu $t3, 0($t3) - ins $t0, $t1, 16, 16 # ins rt, rs, pos, size - Insert size bits starting - ins $t2, $t3, 16, 16 # from the LSB of rs into rt starting at position pos - sw $t0, 0($a0) - sw $t2, 4($a0) - addiu $a0, 8 - addiu $a3, -1 - bnez $a3, amips_clut_loop6 - addiu $a1, 4 - jr $ra - nop - - -# $a0 - pd, $a1 - tile word, $a2 - pal -# ext rt, rs, pos, size // Extract size bits from position pos in rs and store in rt - -.macro TilePixelPrep shift dreg offs -.if \shift - ext \dreg, $a1, \shift, 4 -.else - andi \dreg, $a1, 0xf -.endif -.if \offs - sltu $t8, $0, \dreg - ins $t9, $t8, \offs, 1 -.else - sltu $t9, $0, \dreg -.endif -.endm - -.macro TileStartCode - sll $a1, $a1, 1 - lui $t1, %hi(Pico+0x10000) - addu $a1, $a1, $t1 - lw $a1, %lo(Pico+0x10000)($a1) # Pico.vram + addr - beqz $a1, TileEmpty - rotr $t1, $a1, 4 - beq $t1, $a1, SingleColor - and $v0, $0 # not empty tile -.endm - -.macro TileEndCode - xori $t8, $t9, 0xff - beqz $t8, tile11111111 # common case - lui $v1, %hi(HighCol) - lui $t8, %hi(TileTable) - ins $t8, $t9, 2, 8 - lw $t8, %lo(TileTable)($t8) - lw $v1, %lo(HighCol)($v1) - jr $t8 - addu $a0, $v1 -.endm - - -.global TileNorm - -TileNorm: - TileStartCode - TilePixelPrep 12, $t0, 0 - TilePixelPrep 8, $t1, 1 - TilePixelPrep 4, $t2, 2 - TilePixelPrep 0, $t3, 3 - TilePixelPrep 28, $t4, 4 - TilePixelPrep 24, $t5, 5 - TilePixelPrep 20, $t6, 6 - TilePixelPrep 16, $t7, 7 - TileEndCode - - -.global TileFlip - -TileFlip: - TileStartCode - TilePixelPrep 16, $t0, 0 - TilePixelPrep 20, $t1, 1 - TilePixelPrep 24, $t2, 2 - TilePixelPrep 28, $t3, 3 - TilePixelPrep 0, $t4, 4 - TilePixelPrep 4, $t5, 5 - TilePixelPrep 8, $t6, 6 - TilePixelPrep 12, $t7, 7 - TileEndCode - - -SingleColor: - lui $t9, %hi(HighCol) - lw $t9, %lo(HighCol)($t9) - andi $t0, $a1, 0xf - or $t0, $t0, $a2 - addu $a0, $t9 - sb $t0, 0($a0) - sb $t0, 1($a0) - sb $t0, 2($a0) - sb $t0, 3($a0) - sb $t0, 4($a0) - sb $t0, 5($a0) - sb $t0, 6($a0) - jr $ra - sb $t0, 7($a0) - -TileEmpty: - jr $ra - or $v0, $0, 1 # empty tile - -tile11111111: - lw $v1, %lo(HighCol)($v1) - or $t0, $t0, $a2 - addu $a0, $v1 - sb $t0, 0($a0) -tile11111110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile11111100: - or $t2, $t2, $a2 - sb $t2, 2($a0) -tile11111000: - or $t3, $t3, $a2 - sb $t3, 3($a0) -tile11110000: - or $t4, $t4, $a2 - sb $t4, 4($a0) -tile11100000: - or $t5, $t5, $a2 - sb $t5, 5($a0) -tile11000000: - or $t6, $t6, $a2 - sb $t6, 6($a0) -tile10000000: - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11111101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11111011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile11111010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11111001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11110111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile11110110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile11110100: - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11110101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11110011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile11110010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11110001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11101111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile11101110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile11101100: - or $t2, $t2, $a2 - sb $t2, 2($a0) -tile11101000: - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11101101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11101011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile11101010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11101001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11100111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile11100110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile11100100: - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11100101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11100011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile11100010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11100001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11011111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile11011110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile11011100: - or $t2, $t2, $a2 - sb $t2, 2($a0) -tile11011000: - or $t3, $t3, $a2 - sb $t3, 3($a0) -tile11010000: - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11011101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11011011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile11011010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11011001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11010111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile11010110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile11010100: - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11010101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11010011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile11010010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11010001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11001111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile11001110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile11001100: - or $t2, $t2, $a2 - sb $t2, 2($a0) -tile11001000: - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11001101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11001011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile11001010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11001001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11000111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile11000110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile11000100: - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11000101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11000011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile11000010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11000001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10111111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile10111110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile10111100: - or $t2, $t2, $a2 - sb $t2, 2($a0) -tile10111000: - or $t3, $t3, $a2 - sb $t3, 3($a0) -tile10110000: - or $t4, $t4, $a2 - sb $t4, 4($a0) -tile10100000: - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10111101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10111011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile10111010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10111001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10110111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile10110110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile10110100: - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10110101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10110011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile10110010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10110001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10101111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile10101110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile10101100: - or $t2, $t2, $a2 - sb $t2, 2($a0) -tile10101000: - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10101101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10101011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile10101010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10101001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10100111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile10100110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile10100100: - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10100101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10100011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile10100010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10100001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10011111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile10011110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile10011100: - or $t2, $t2, $a2 - sb $t2, 2($a0) -tile10011000: - or $t3, $t3, $a2 - sb $t3, 3($a0) -tile10010000: - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10011101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10011011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile10011010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10011001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10010111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile10010110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile10010100: - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10010101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10010011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile10010010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10010001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10001111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile10001110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile10001100: - or $t2, $t2, $a2 - sb $t2, 2($a0) -tile10001000: - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10001101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10001011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile10001010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10001001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10000111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile10000110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile10000100: - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10000101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10000011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile10000010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10000001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile01111111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile01111110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile01111100: - or $t2, $t2, $a2 - sb $t2, 2($a0) -tile01111000: - or $t3, $t3, $a2 - sb $t3, 3($a0) -tile01110000: - or $t4, $t4, $a2 - sb $t4, 4($a0) -tile01100000: - or $t5, $t5, $a2 - sb $t5, 5($a0) -tile01000000: - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile00000000: -tile01111101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01111011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile01111010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01111001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01110111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile01110110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile01110100: - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01110101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01110011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile01110010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01110001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01101111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile01101110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile01101100: - or $t2, $t2, $a2 - sb $t2, 2($a0) -tile01101000: - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01101101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01101011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile01101010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01101001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01100111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile01100110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile01100100: - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01100101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01100011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile01100010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01100001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01011111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile01011110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile01011100: - or $t2, $t2, $a2 - sb $t2, 2($a0) -tile01011000: - or $t3, $t3, $a2 - sb $t3, 3($a0) -tile01010000: - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01011101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01011011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile01011010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01011001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01010111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile01010110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile01010100: - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01010101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01010011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile01010010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01010001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01001111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile01001110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile01001100: - or $t2, $t2, $a2 - sb $t2, 2($a0) -tile01001000: - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01001101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01001011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile01001010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01001001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01000111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile01000110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile01000100: - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01000101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01000011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile01000010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01000001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile00111111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile00111110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile00111100: - or $t2, $t2, $a2 - sb $t2, 2($a0) -tile00111000: - or $t3, $t3, $a2 - sb $t3, 3($a0) -tile00110000: - or $t4, $t4, $a2 - sb $t4, 4($a0) -tile00100000: - or $t5, $t5, $a2 - jr $ra - sb $t5, 5($a0) -tile00111101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - jr $ra - sb $t5, 5($a0) -tile00111011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile00111010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - jr $ra - sb $t5, 5($a0) -tile00111001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - jr $ra - sb $t5, 5($a0) -tile00110111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile00110110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile00110100: - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - jr $ra - sb $t5, 5($a0) -tile00110101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - jr $ra - sb $t5, 5($a0) -tile00110011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile00110010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - jr $ra - sb $t5, 5($a0) -tile00110001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - jr $ra - sb $t5, 5($a0) -tile00101111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile00101110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile00101100: - or $t2, $t2, $a2 - sb $t2, 2($a0) -tile00101000: - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t5, $t5, $a2 - jr $ra - sb $t5, 5($a0) -tile00101101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t5, $t5, $a2 - jr $ra - sb $t5, 5($a0) -tile00101011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile00101010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t5, $t5, $a2 - jr $ra - sb $t5, 5($a0) -tile00101001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t5, $t5, $a2 - jr $ra - sb $t5, 5($a0) -tile00100111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile00100110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile00100100: - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t5, $t5, $a2 - jr $ra - sb $t5, 5($a0) -tile00100101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t5, $t5, $a2 - jr $ra - sb $t5, 5($a0) -tile00100011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile00100010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t5, $t5, $a2 - jr $ra - sb $t5, 5($a0) -tile00100001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t5, $t5, $a2 - jr $ra - sb $t5, 5($a0) -tile00011111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile00011110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile00011100: - or $t2, $t2, $a2 - sb $t2, 2($a0) -tile00011000: - or $t3, $t3, $a2 - sb $t3, 3($a0) -tile00010000: - or $t4, $t4, $a2 - jr $ra - sb $t4, 4($a0) -tile00011101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - jr $ra - sb $t4, 4($a0) -tile00011011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile00011010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - jr $ra - sb $t4, 4($a0) -tile00011001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - jr $ra - sb $t4, 4($a0) -tile00010111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile00010110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile00010100: - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t4, $t4, $a2 - jr $ra - sb $t4, 4($a0) -tile00010101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t4, $t4, $a2 - jr $ra - sb $t4, 4($a0) -tile00010011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile00010010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t4, $t4, $a2 - jr $ra - sb $t4, 4($a0) -tile00010001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t4, $t4, $a2 - jr $ra - sb $t4, 4($a0) -tile00001111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile00001110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile00001100: - or $t2, $t2, $a2 - sb $t2, 2($a0) -tile00001000: - or $t3, $t3, $a2 - jr $ra - sb $t3, 3($a0) -tile00001101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t3, $t3, $a2 - jr $ra - sb $t3, 3($a0) -tile00001011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile00001010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t3, $t3, $a2 - jr $ra - sb $t3, 3($a0) -tile00001001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t3, $t3, $a2 - jr $ra - sb $t3, 3($a0) -tile00000111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile00000110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile00000100: - or $t2, $t2, $a2 - jr $ra - sb $t2, 2($a0) -tile00000101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - jr $ra - sb $t2, 2($a0) -tile00000011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile00000010: - or $t1, $t1, $a2 - jr $ra - sb $t1, 1($a0) -tile00000001: - or $t0, $t0, $a2 - jr $ra - sb $t0, 0($a0) - -.data -.align 4 - -TileTable: - .long 000000000000, tile00000001, tile00000010, tile00000011, tile00000100, tile00000101, tile00000110, tile00000111 - .long tile00001000, tile00001001, tile00001010, tile00001011, tile00001100, tile00001101, tile00001110, tile00001111 - .long tile00010000, tile00010001, tile00010010, tile00010011, tile00010100, tile00010101, tile00010110, tile00010111 - .long tile00011000, tile00011001, tile00011010, tile00011011, tile00011100, tile00011101, tile00011110, tile00011111 - .long tile00100000, tile00100001, tile00100010, tile00100011, tile00100100, tile00100101, tile00100110, tile00100111 - .long tile00101000, tile00101001, tile00101010, tile00101011, tile00101100, tile00101101, tile00101110, tile00101111 - .long tile00110000, tile00110001, tile00110010, tile00110011, tile00110100, tile00110101, tile00110110, tile00110111 - .long tile00111000, tile00111001, tile00111010, tile00111011, tile00111100, tile00111101, tile00111110, tile00111111 - .long tile01000000, tile01000001, tile01000010, tile01000011, tile01000100, tile01000101, tile01000110, tile01000111 - .long tile01001000, tile01001001, tile01001010, tile01001011, tile01001100, tile01001101, tile01001110, tile01001111 - .long tile01010000, tile01010001, tile01010010, tile01010011, tile01010100, tile01010101, tile01010110, tile01010111 - .long tile01011000, tile01011001, tile01011010, tile01011011, tile01011100, tile01011101, tile01011110, tile01011111 - .long tile01100000, tile01100001, tile01100010, tile01100011, tile01100100, tile01100101, tile01100110, tile01100111 - .long tile01101000, tile01101001, tile01101010, tile01101011, tile01101100, tile01101101, tile01101110, tile01101111 - .long tile01110000, tile01110001, tile01110010, tile01110011, tile01110100, tile01110101, tile01110110, tile01110111 - .long tile01111000, tile01111001, tile01111010, tile01111011, tile01111100, tile01111101, tile01111110, tile01111111 - .long tile10000000, tile10000001, tile10000010, tile10000011, tile10000100, tile10000101, tile10000110, tile10000111 - .long tile10001000, tile10001001, tile10001010, tile10001011, tile10001100, tile10001101, tile10001110, tile10001111 - .long tile10010000, tile10010001, tile10010010, tile10010011, tile10010100, tile10010101, tile10010110, tile10010111 - .long tile10011000, tile10011001, tile10011010, tile10011011, tile10011100, tile10011101, tile10011110, tile10011111 - .long tile10100000, tile10100001, tile10100010, tile10100011, tile10100100, tile10100101, tile10100110, tile10100111 - .long tile10101000, tile10101001, tile10101010, tile10101011, tile10101100, tile10101101, tile10101110, tile10101111 - .long tile10110000, tile10110001, tile10110010, tile10110011, tile10110100, tile10110101, tile10110110, tile10110111 - .long tile10111000, tile10111001, tile10111010, tile10111011, tile10111100, tile10111101, tile10111110, tile10111111 - .long tile11000000, tile11000001, tile11000010, tile11000011, tile11000100, tile11000101, tile11000110, tile11000111 - .long tile11001000, tile11001001, tile11001010, tile11001011, tile11001100, tile11001101, tile11001110, tile11001111 - .long tile11010000, tile11010001, tile11010010, tile11010011, tile11010100, tile11010101, tile11010110, tile11010111 - .long tile11011000, tile11011001, tile11011010, tile11011011, tile11011100, tile11011101, tile11011110, tile11011111 - .long tile11100000, tile11100001, tile11100010, tile11100011, tile11100100, tile11100101, tile11100110, tile11100111 - .long tile11101000, tile11101001, tile11101010, tile11101011, tile11101100, tile11101101, tile11101110, tile11101111 - .long tile11110000, tile11110001, tile11110010, tile11110011, tile11110100, tile11110101, tile11110110, tile11110111 - .long tile11111000, tile11111001, tile11111010, tile11111011, tile11111100, tile11111101, tile11111110, tile11111111 - -# vim:filetype=mips From e64886365da9e882b54e06ab683ee4db60171e32 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 6 Oct 2017 01:34:24 +0300 Subject: [PATCH 0095/1110] drop legacy save support the format has been changed like 10 years ago --- pico/state.c | 61 ++++------------------------------------------------ pico/z80if.c | 35 +++--------------------------- 2 files changed, 7 insertions(+), 89 deletions(-) diff --git a/pico/state.c b/pico/state.c index f4e685fb..fce247a0 100644 --- a/pico/state.c +++ b/pico/state.c @@ -78,54 +78,6 @@ static void *open_save_file(const char *fname, int is_save) return afile; } -// legacy savestate loading -#define SCANP(f, x) areaRead(&Pico.x, sizeof(Pico.x), 1, f) - -static int state_load_legacy(void *file) -{ - unsigned char head[32]; - unsigned char cpu[0x60]; - unsigned char cpu_z80[Z80_STATE_SIZE]; - void *ym2612_regs; - int ok; - - memset(&cpu,0,sizeof(cpu)); - memset(&cpu_z80,0,sizeof(cpu_z80)); - - memset(head, 0, sizeof(head)); - areaRead(head, sizeof(head), 1, file); - if (strcmp((char *)head, "Pico") != 0) - return -1; - - elprintf(EL_STATUS, "legacy savestate"); - - // Scan all the memory areas: - SCANP(file, ram); - SCANP(file, vram); - SCANP(file, zram); - SCANP(file, cram); - SCANP(file, vsram); - - // Pack, scan and unpack the cpu data: - areaRead(cpu, sizeof(cpu), 1, file); - SekUnpackCpu(cpu, 0); - - SCANP(file, m); - SCANP(file, video); - - ok = areaRead(cpu_z80, sizeof(cpu_z80), 1, file) == sizeof(cpu_z80); - // do not unpack if we fail to load z80 state - if (!ok) z80_reset(); - else z80_unpack(cpu_z80); - - ym2612_regs = YM2612GetRegs(); - areaRead(sn76496_regs, 28*4, 1, file); - areaRead(ym2612_regs, 0x200+4, 1, file); - ym2612_unpack_state(); - - return 0; -} - // --------------------------------------------------------------------------- typedef enum { @@ -610,6 +562,9 @@ readend: pcd_state_loaded(); } + Pico.m.dirtyPal = 1; + Pico.video.status &= ~(SR_VB | SR_F); + retval = 0; out: @@ -679,16 +634,8 @@ static int pico_state_internal(void *afile, int is_save) if (is_save) ret = state_save(afile); - else { + else ret = state_load(afile); - if (ret != 0) { - areaSeek(afile, 0, SEEK_SET); - ret = state_load_legacy(afile); - } - - Pico.m.dirtyPal = 1; - Pico.video.status &= ~(SR_VB | SR_F); - } return ret; } diff --git a/pico/z80if.c b/pico/z80if.c index 90b3f7a9..b69495e7 100644 --- a/pico/z80if.c +++ b/pico/z80if.c @@ -119,29 +119,6 @@ void z80_reset(void) #endif } -/* save state stuff */ -static int z80_unpack_legacy(const void *data) -{ -#if defined(_USE_DRZ80) - if (*(int *)data == 0x015A7244) { // "DrZ" v1 save? - u32 pc, sp; - memcpy(&drZ80, data+4, 0x54); - pc = (drZ80.Z80PC - drZ80.Z80PC_BASE) & 0xffff; - sp = (drZ80.Z80SP - drZ80.Z80SP_BASE) & 0xffff; - // update bases - drz80_load_pcsp(pc, sp); - return 0; - } -#elif defined(_USE_CZ80) - if (*(int *)data == 0x00007a43) { // "Cz" save? - memcpy(&CZ80, data+8, offsetof(cz80_struc, BasePC)); - Cz80_Set_Reg(&CZ80, CZ80_PC, *(int *)(data+4)); - return 0; - } -#endif - return -1; -} - struct z80sr_main { u8 a, f; u8 b, c; @@ -226,9 +203,7 @@ int z80_unpack(const void *data) { const struct z80_state *s = data; if (strcmp(s->magic, "Z80") != 0) { - if (z80_unpack_legacy(data) != 0) - goto fail; - elprintf(EL_STATUS, "legacy z80 state"); + elprintf(EL_STATUS, "legacy z80 state - ignored"); return 0; } @@ -278,13 +253,9 @@ int z80_unpack(const void *data) Cz80_Set_Reg(&CZ80, CZ80_IRQ, s->irq_pending ? HOLD_LINE : CLEAR_LINE); return 0; } +#else + return 0; #endif - -fail: - elprintf(EL_STATUS|EL_ANOMALY, "z80_unpack failed"); - z80_reset(); - z80_int(); - return -1; } void z80_exit(void) From 88fd63ad10faa746ef9d7ad7d98a72e51fe2aa86 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 5 Oct 2017 02:06:21 +0300 Subject: [PATCH 0096/1110] split memories away from Pico saves ~3k of code on ARM because Pico no longer crosses ldr limit --- pico/32x/memory.c | 2 +- pico/32x/pwm.c | 6 +- pico/cart.c | 72 +++++++-------- pico/carthw/carthw.c | 12 +-- pico/carthw/eeprom_spi.c | 10 +- pico/cd/gfx_dma.c | 6 +- pico/cd/mcd.c | 50 +++++----- pico/cd/memory.c | 18 ++-- pico/cd/{memory_arm.s => memory_arm.S} | 57 ++++++------ pico/debug.c | 38 ++++---- pico/draw.c | 56 ++++++------ pico/draw2.c | 24 ++--- pico/draw2_arm.S | 40 ++++---- pico/draw_arm.S | 116 ++++++++++++------------ pico/eeprom.c | 38 ++++---- pico/memory.c | 95 +++++++++---------- pico/{memory_amips.s => memory_amips.S} | 64 ++++++------- pico/{memory_arm.s => memory_arm.S} | 90 +++++++++--------- pico/mode4.c | 10 +- pico/pico.c | 42 ++++----- pico/pico_cmn.c | 28 +++--- pico/pico_int.h | 83 +++++++++-------- pico/pico_int_o32.h | 37 +++++--- pico/sek.c | 29 +++--- pico/sms.c | 20 ++-- pico/state.c | 50 +++++----- pico/videoport.c | 54 +++++------ platform/common/common.mak | 4 +- platform/common/emu.c | 16 ++-- tools/mkoffsets.c | 53 ++++++++--- 30 files changed, 633 insertions(+), 587 deletions(-) rename pico/cd/{memory_arm.s => memory_arm.S} (93%) rename pico/{memory_amips.s => memory_amips.S} (93%) rename pico/{memory_arm.s => memory_arm.S} (73%) diff --git a/pico/32x/memory.c b/pico/32x/memory.c index 2f3499ce..f3b70067 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -1159,7 +1159,7 @@ static void bank_switch(int b) return; bank = b << 20; - if ((Pico.m.sram_reg & SRR_MAPPED) && bank == SRam.start) { + if ((Pico.m.sram_reg & SRR_MAPPED) && bank == Pico.sv.start) { bank_map_handler(); return; } diff --git a/pico/32x/pwm.c b/pico/32x/pwm.c index 1aba914f..50735642 100644 --- a/pico/32x/pwm.c +++ b/pico/32x/pwm.c @@ -326,11 +326,11 @@ void p32x_pwm_state_loaded(void) p32x_pwm_ctl_changed(); // for old savestates - cycles_diff_sh2 = SekCycleCnt * 3 - Pico32x.pwm_cycle_p; + cycles_diff_sh2 = Pico.t.m68c_cnt * 3 - Pico32x.pwm_cycle_p; if (cycles_diff_sh2 >= pwm_cycles || cycles_diff_sh2 < 0) { Pico32x.pwm_irq_cnt = pwm_irq_reload; - Pico32x.pwm_cycle_p = SekCycleCnt * 3; - p32x_pwm_schedule(SekCycleCnt); + Pico32x.pwm_cycle_p = Pico.t.m68c_cnt * 3; + p32x_pwm_schedule(Pico.t.m68c_cnt); } } diff --git a/pico/cart.c b/pico/cart.c index 1ce80597..f17ce5e7 100644 --- a/pico/cart.c +++ b/pico/cart.c @@ -617,9 +617,9 @@ int PicoCartInsert(unsigned char *rom, unsigned int romsize, const char *carthw_ Pico.rom=rom; Pico.romsize=romsize; - if (SRam.data) { - free(SRam.data); - SRam.data = NULL; + if (Pico.sv.data) { + free(Pico.sv.data); + Pico.sv.data = NULL; } if (PicoCartUnloadHook != NULL) { @@ -949,8 +949,8 @@ static void parse_carthw(const char *carthw_cfg, int *fill_sram) elprintf(EL_STATUS, "carthw:%d: bad sram_range: %08x - %08x", line, start, end); goto bad_nomsg; } - SRam.start = start; - SRam.end = end; + Pico.sv.start = start; + Pico.sv.end = end; continue; } else if (is_expr("prop", &p)) { @@ -959,9 +959,9 @@ static void parse_carthw(const char *carthw_cfg, int *fill_sram) rstrip(p); if (strcmp(p, "no_sram") == 0) - SRam.flags &= ~SRF_ENABLED; + Pico.sv.flags &= ~SRF_ENABLED; else if (strcmp(p, "no_eeprom") == 0) - SRam.flags &= ~SRF_EEPROM; + Pico.sv.flags &= ~SRF_EEPROM; else if (strcmp(p, "filled_sram") == 0) *fill_sram = 1; else if (strcmp(p, "force_6btn") == 0) @@ -982,8 +982,8 @@ static void parse_carthw(const char *carthw_cfg, int *fill_sram) type = strtoul(p, &r, 0); if (r == p || type < 0) goto bad; - SRam.eeprom_type = type; - SRam.flags |= SRF_EEPROM; + Pico.sv.eeprom_type = type; + Pico.sv.flags |= SRF_EEPROM; continue; } else if (is_expr("eeprom_lines", &p)) { @@ -998,9 +998,9 @@ static void parse_carthw(const char *carthw_cfg, int *fill_sram) sda_out < 0 || sda_out > 15) goto bad; - SRam.eeprom_bit_cl = scl; - SRam.eeprom_bit_in = sda_in; - SRam.eeprom_bit_out= sda_out; + Pico.sv.eeprom_bit_cl = scl; + Pico.sv.eeprom_bit_in = sda_in; + Pico.sv.eeprom_bit_out= sda_out; continue; } else if ((tmp = is_expr("prot_ro_value16", &p)) || is_expr("prot_rw_value16", &p)) { @@ -1040,54 +1040,54 @@ static void PicoCartDetect(const char *carthw_cfg) { int fill_sram = 0; - memset(&SRam, 0, sizeof(SRam)); + memset(&Pico.sv, 0, sizeof(Pico.sv)); if (Pico.rom[0x1B1] == 'R' && Pico.rom[0x1B0] == 'A') { - SRam.start = rom_read32(0x1B4) & ~0xff000001; // align - SRam.end = (rom_read32(0x1B8) & ~0xff000000) | 1; + Pico.sv.start = rom_read32(0x1B4) & ~0xff000001; // align + Pico.sv.end = (rom_read32(0x1B8) & ~0xff000000) | 1; if (Pico.rom[0x1B2] & 0x40) // EEPROM - SRam.flags |= SRF_EEPROM; - SRam.flags |= SRF_ENABLED; + Pico.sv.flags |= SRF_EEPROM; + Pico.sv.flags |= SRF_ENABLED; } - if (SRam.end == 0 || SRam.start > SRam.end) + if (Pico.sv.end == 0 || Pico.sv.start > Pico.sv.end) { // some games may have bad headers, like S&K and Sonic3 // note: majority games use 0x200000 as starting address, but there are some which // use something else (0x300000 by HardBall '95). Luckily they have good headers. - SRam.start = 0x200000; - SRam.end = 0x203FFF; - SRam.flags |= SRF_ENABLED; + Pico.sv.start = 0x200000; + Pico.sv.end = 0x203FFF; + Pico.sv.flags |= SRF_ENABLED; } // set EEPROM defaults, in case it gets detected - SRam.eeprom_type = 0; // 7bit (24C01) - SRam.eeprom_bit_cl = 1; - SRam.eeprom_bit_in = 0; - SRam.eeprom_bit_out= 0; + Pico.sv.eeprom_type = 0; // 7bit (24C01) + Pico.sv.eeprom_bit_cl = 1; + Pico.sv.eeprom_bit_in = 0; + Pico.sv.eeprom_bit_out= 0; if (carthw_cfg != NULL) parse_carthw(carthw_cfg, &fill_sram); - if (SRam.flags & SRF_ENABLED) + if (Pico.sv.flags & SRF_ENABLED) { - if (SRam.flags & SRF_EEPROM) - SRam.size = 0x2000; + if (Pico.sv.flags & SRF_EEPROM) + Pico.sv.size = 0x2000; else - SRam.size = SRam.end - SRam.start + 1; + Pico.sv.size = Pico.sv.end - Pico.sv.start + 1; - SRam.data = calloc(SRam.size, 1); - if (SRam.data == NULL) - SRam.flags &= ~SRF_ENABLED; + Pico.sv.data = calloc(Pico.sv.size, 1); + if (Pico.sv.data == NULL) + Pico.sv.flags &= ~SRF_ENABLED; - if (SRam.eeprom_type == 1) // 1 == 0 in PD EEPROM code - SRam.eeprom_type = 0; + if (Pico.sv.eeprom_type == 1) // 1 == 0 in PD EEPROM code + Pico.sv.eeprom_type = 0; } - if ((SRam.flags & SRF_ENABLED) && fill_sram) + if ((Pico.sv.flags & SRF_ENABLED) && fill_sram) { elprintf(EL_STATUS, "SRAM fill"); - memset(SRam.data, 0xff, SRam.size); + memset(Pico.sv.data, 0xff, Pico.sv.size); } // Unusual region 'code' diff --git a/pico/carthw/carthw.c b/pico/carthw/carthw.c index a8b4333f..a18e5a41 100644 --- a/pico/carthw/carthw.c +++ b/pico/carthw/carthw.c @@ -318,7 +318,7 @@ static void carthw_pier_write8(u32 a, u32 d) base = d << 19; goto do_map; case 0x09: - SRam.changed = 1; + Pico.sv.changed = 1; eeprom_spi_write(d); break; case 0x0b: @@ -449,11 +449,11 @@ void carthw_pier_startup(void) // save EEPROM eeprom_state = eeprom_spi_init(&eeprom_size); - SRam.flags = 0; - SRam.size = 0x10000; - SRam.data = calloc(1, SRam.size); - if (!SRam.data) - SRam.size = 0; + Pico.sv.flags = 0; + Pico.sv.size = 0x10000; + Pico.sv.data = calloc(1, Pico.sv.size); + if (!Pico.sv.data) + Pico.sv.size = 0; carthw_pier_state[2].ptr = eeprom_state; carthw_pier_state[2].size = eeprom_size; diff --git a/pico/carthw/eeprom_spi.c b/pico/carthw/eeprom_spi.c index 9643067f..b250afff 100644 --- a/pico/carthw/eeprom_spi.c +++ b/pico/carthw/eeprom_spi.c @@ -210,7 +210,7 @@ void eeprom_spi_write(unsigned char data) if (spi_eeprom.opcode & 0x01) { /* READ operation */ - spi_eeprom.buffer = SRam.data[spi_eeprom.addr]; + spi_eeprom.buffer = Pico.sv.data[spi_eeprom.addr]; spi_eeprom.state = READ_BYTE; } else @@ -266,7 +266,7 @@ void eeprom_spi_write(unsigned char data) /* $C000-$FFFF (sector #3) is protected */ if (spi_eeprom.addr < 0xC000) { - SRam.data[spi_eeprom.addr] = spi_eeprom.buffer; + Pico.sv.data[spi_eeprom.addr] = spi_eeprom.buffer; } break; } @@ -276,7 +276,7 @@ void eeprom_spi_write(unsigned char data) /* $8000-$FFFF (sectors #2 and #3) is protected */ if (spi_eeprom.addr < 0x8000) { - SRam.data[spi_eeprom.addr] = spi_eeprom.buffer; + Pico.sv.data[spi_eeprom.addr] = spi_eeprom.buffer; } break; } @@ -290,7 +290,7 @@ void eeprom_spi_write(unsigned char data) default: { /* no sectors protected */ - SRam.data[spi_eeprom.addr] = spi_eeprom.buffer; + Pico.sv.data[spi_eeprom.addr] = spi_eeprom.buffer; break; } } @@ -332,7 +332,7 @@ void eeprom_spi_write(unsigned char data) { /* read next array byte */ spi_eeprom.addr = (spi_eeprom.addr + 1) & SIZE_MASK; - spi_eeprom.buffer = SRam.data[spi_eeprom.addr]; + spi_eeprom.buffer = Pico.sv.data[spi_eeprom.addr]; } } } diff --git a/pico/cd/gfx_dma.c b/pico/cd/gfx_dma.c index a24a1c3c..7dfe4bc9 100644 --- a/pico/cd/gfx_dma.c +++ b/pico/cd/gfx_dma.c @@ -26,7 +26,7 @@ PICO_INTERNAL void DmaSlowCell(unsigned int source, unsigned int a, int len, uns switch (Pico.video.type) { case 1: // vram - r = Pico.vram; + r = PicoMem.vram; for(; len; len--) { asrc = cell_map(source >> 2) << 2; @@ -42,7 +42,7 @@ PICO_INTERNAL void DmaSlowCell(unsigned int source, unsigned int a, int len, uns case 3: // cram Pico.m.dirtyPal = 1; - r = Pico.cram; + r = PicoMem.cram; for(a2=a&0x7f; len; len--) { asrc = cell_map(source >> 2) << 2; @@ -58,7 +58,7 @@ PICO_INTERNAL void DmaSlowCell(unsigned int source, unsigned int a, int len, uns break; case 5: // vsram[a&0x003f]=d; - r = Pico.vsram; + r = PicoMem.vsram; for(a2=a&0x7f; len; len--) { asrc = cell_map(source >> 2) << 2; diff --git a/pico/cd/mcd.c b/pico/cd/mcd.c index 6d23604c..043b4a25 100644 --- a/pico/cd/mcd.c +++ b/pico/cd/mcd.c @@ -76,16 +76,16 @@ PICO_INTERNAL int PicoResetMCD(void) { // reset button doesn't affect MCD hardware - // use SRam.data for RAM cart + // use Pico.sv.data for RAM cart if (PicoOpt & POPT_EN_MCD_RAMCART) { - if (SRam.data == NULL) - SRam.data = calloc(1, 0x12000); + if (Pico.sv.data == NULL) + Pico.sv.data = calloc(1, 0x12000); } - else if (SRam.data != NULL) { - free(SRam.data); - SRam.data = NULL; + else if (Pico.sv.data != NULL) { + free(Pico.sv.data); + Pico.sv.data = NULL; } - SRam.start = SRam.end = 0; // unused + Pico.sv.start = Pico.sv.end = 0; // unused return 0; } @@ -95,17 +95,17 @@ static void SekRunM68kOnce(void) int cyc_do; pevt_log_m68k_o(EVT_RUN_START); - if ((cyc_do = SekCycleAim - SekCycleCnt) > 0) { - SekCycleCnt += cyc_do; + if ((cyc_do = Pico.t.m68c_aim - Pico.t.m68c_cnt) > 0) { + Pico.t.m68c_cnt += cyc_do; #if defined(EMU_C68K) PicoCpuCM68k.cycles = cyc_do; CycloneRun(&PicoCpuCM68k); - SekCycleCnt -= PicoCpuCM68k.cycles; + Pico.t.m68c_cnt -= PicoCpuCM68k.cycles; #elif defined(EMU_M68K) - SekCycleCnt += m68k_execute(cyc_do) - cyc_do; + Pico.t.m68c_cnt += m68k_execute(cyc_do) - cyc_do; #elif defined(EMU_F68K) - SekCycleCnt += fm68k_emulate(cyc_do, 0) - cyc_do; + Pico.t.m68c_cnt += fm68k_emulate(cyc_do, 0) - cyc_do; #endif } @@ -316,39 +316,39 @@ static void SekSyncM68k(void); void pcd_run_cpus_normal(int m68k_cycles) { - SekCycleAim += m68k_cycles; + Pico.t.m68c_aim += m68k_cycles; if (SekShouldInterrupt() || Pico_mcd->m.m68k_poll_cnt < 12) Pico_mcd->m.m68k_poll_cnt = 0; else if (Pico_mcd->m.m68k_poll_cnt >= 16) { - int s68k_left = pcd_sync_s68k(SekCycleAim, 1); + int s68k_left = pcd_sync_s68k(Pico.t.m68c_aim, 1); if (s68k_left <= 0) { elprintf(EL_CDPOLL, "m68k poll [%02x] x%d @%06x", Pico_mcd->m.m68k_poll_a, Pico_mcd->m.m68k_poll_cnt, SekPc); - SekCycleCnt = SekCycleAim; + Pico.t.m68c_cnt = Pico.t.m68c_aim; return; } - SekCycleCnt = SekCycleAim - (s68k_left * 40220 >> 16); + Pico.t.m68c_cnt = Pico.t.m68c_aim - (s68k_left * 40220 >> 16); } - while (CYCLES_GT(SekCycleAim, SekCycleCnt)) { + while (CYCLES_GT(Pico.t.m68c_aim, Pico.t.m68c_cnt)) { SekRunM68kOnce(); if (Pico_mcd->m.need_sync) { Pico_mcd->m.need_sync = 0; - pcd_sync_s68k(SekCycleCnt, 0); + pcd_sync_s68k(Pico.t.m68c_cnt, 0); } } } void pcd_run_cpus_lockstep(int m68k_cycles) { - unsigned int target = SekCycleAim + m68k_cycles; + unsigned int target = Pico.t.m68c_aim + m68k_cycles; do { - SekCycleAim += 8; + Pico.t.m68c_aim += 8; SekSyncM68k(); - pcd_sync_s68k(SekCycleAim, 0); - } while (CYCLES_GT(target, SekCycleAim)); + pcd_sync_s68k(Pico.t.m68c_aim, 0); + } while (CYCLES_GT(target, Pico.t.m68c_aim)); - SekCycleAim = target; + Pico.t.m68c_aim = target; } #define PICO_CD @@ -364,7 +364,7 @@ void pcd_prepare_frame(void) // need this because we can't have direct mapping between // master<->slave cycle counters because of overflows - mcd_m68k_cycle_base = SekCycleAim; + mcd_m68k_cycle_base = Pico.t.m68c_aim; mcd_s68k_cycle_base = SekCycleAimS68k; } @@ -390,7 +390,7 @@ void pcd_state_loaded(void) Pico_mcd->pcm_regs_dirty = 1; // old savestates.. - cycles = pcd_cycles_m68k_to_s68k(SekCycleAim); + cycles = pcd_cycles_m68k_to_s68k(Pico.t.m68c_aim); diff = cycles - SekCycleAimS68k; if (diff < -1000 || diff > 1000) { SekCycleCntS68k = SekCycleAimS68k = cycles; diff --git a/pico/cd/memory.c b/pico/cd/memory.c index fbc7c476..94b81b7b 100644 --- a/pico/cd/memory.c +++ b/pico/cd/memory.c @@ -602,14 +602,14 @@ static u32 PicoReadM68k8_ramc(u32 a) { u32 d = 0; if (a == 0x400001) { - if (SRam.data != NULL) + if (Pico.sv.data != NULL) d = 3; // 64k cart return d; } if ((a & 0xfe0000) == 0x600000) { - if (SRam.data != NULL) - d = SRam.data[((a >> 1) & 0xffff) + 0x2000]; + if (Pico.sv.data != NULL) + d = Pico.sv.data[((a >> 1) & 0xffff) + 0x2000]; return d; } @@ -629,9 +629,9 @@ static u32 PicoReadM68k16_ramc(u32 a) static void PicoWriteM68k8_ramc(u32 a, u32 d) { if ((a & 0xfe0000) == 0x600000) { - if (SRam.data != NULL && (Pico_mcd->m.bcram_reg & 1)) { - SRam.data[((a>>1) & 0xffff) + 0x2000] = d; - SRam.changed = 1; + if (Pico.sv.data != NULL && (Pico_mcd->m.bcram_reg & 1)) { + Pico.sv.data[((a>>1) & 0xffff) + 0x2000] = d; + Pico.sv.changed = 1; } return; } @@ -877,7 +877,7 @@ static u32 PicoReadS68k16_bram(u32 a) static void PicoWriteS68k8_bram(u32 a, u32 d) { Pico_mcd->bram[(a >> 1) & 0x1fff] = d; - SRam.changed = 1; + Pico.sv.changed = 1; } static void PicoWriteS68k16_bram(u32 a, u32 d) @@ -886,7 +886,7 @@ static void PicoWriteS68k16_bram(u32 a, u32 d) a = (a >> 1) & 0x1fff; Pico_mcd->bram[a++] = d; Pico_mcd->bram[a++] = d >> 8; // TODO: verify.. - SRam.changed = 1; + Pico.sv.changed = 1; } #ifndef _ASM_CD_MEMORY_C @@ -1175,7 +1175,7 @@ PICO_INTERNAL void PicoMemSetupCD(void) PicoCpuFM68k.Fetch[i] = (unsigned long)Pico.rom; // .. and RAM for (i = M68K_FETCHBANK1*14/16; i < M68K_FETCHBANK1; i++) - PicoCpuFM68k.Fetch[i] = (unsigned long)Pico.ram - (i<<(24-FAMEC_FETCHBITS)); + PicoCpuFM68k.Fetch[i] = (unsigned long)PicoMem.ram - (i<<(24-FAMEC_FETCHBITS)); // S68k // PRG RAM is default for (i = 0; i < M68K_FETCHBANK1; i++) diff --git a/pico/cd/memory_arm.s b/pico/cd/memory_arm.S similarity index 93% rename from pico/cd/memory_arm.s rename to pico/cd/memory_arm.S index f3a1372a..23f0b94c 100644 --- a/pico/cd/memory_arm.s +++ b/pico/cd/memory_arm.S @@ -6,6 +6,8 @@ @* See COPYING file in the top-level directory. @* +#include "../pico_int_o32.h" + .equiv PCM_STEP_SHIFT, 11 .text @@ -127,9 +129,9 @@ PicoReadM68k8_cell1: @ 0x220000 - 0x23ffff, cell arranged mov r3, #0x0e0000 0: cell_map - ldr r1, =(Pico+0x22200) + ldr r1, =Pico add r0, r0, r3 - ldr r1, [r1] + ldr r1, [r1, #OFS_Pico_rom] @ Pico.mcd (used everywhere) eor r0, r0, #1 ldrb r0, [r1, r0] bx lr @@ -140,9 +142,9 @@ PicoRead8_mcd_io: cmp r1, #0x2000 @ a120xx? bne PicoRead8_io - ldr r1, =(Pico+0x22200) + ldr r1, =Pico and r0, r0, #0x3f - ldr r1, [r1] @ Pico.mcd (used everywhere) + ldr r1, [r1, #OFS_Pico_rom] @ Pico.mcd cmp r0, #0x0e ldrlt pc, [pc, r0, lsl #2] b m_m68k_read8_hi @@ -237,9 +239,9 @@ PicoReadM68k16_cell1: @ 0x220000 - 0x23ffff, cell arranged mov r3, #0x0e0000 0: cell_map - ldr r1, =(Pico+0x22200) + ldr r1, =Pico add r0, r0, r3 - ldr r1, [r1] + ldr r1, [r1, #OFS_Pico_rom] @ Pico.mcd bic r0, r0, #1 ldrh r0, [r1, r0] bx lr @@ -251,9 +253,9 @@ PicoRead16_mcd_io: bne PicoRead16_io m_m68k_read16_m68k_regs: - ldr r1, =(Pico+0x22200) + ldr r1, =Pico and r0, r0, #0x3e - ldr r1, [r1] @ Pico.mcd (used everywhere) + ldr r1, [r1, #OFS_Pico_rom] @ Pico.mcd cmp r0, #0x0e ldrlt pc, [pc, r0, lsl #1] b m_m68k_read16_hi @@ -328,8 +330,9 @@ PicoWriteM68k8_cell1: @ 0x220000 - 0x23ffff, cell arranged 0: mov r3, r1 cell_map - ldr r2, =(Pico+0x22200) + ldr r2, =Pico add r0, r0, r12 + ldr r2, [r2, #OFS_Pico_rom] @ Pico.mcd ldr r2, [r2] eor r0, r0, #1 strb r3, [r2, r0] @@ -355,9 +358,9 @@ PicoWriteM68k16_cell1: @ 0x220000 - 0x23ffff, cell arranged 0: mov r3, r1 cell_map - ldr r1, =(Pico+0x22200) + ldr r1, =Pico add r0, r0, r12 - ldr r1, [r1] + ldr r1, [r1, #OFS_Pico_rom] @ Pico.mcd bic r0, r0, #1 strh r3, [r1, r0] bx lr @@ -397,9 +400,9 @@ PicoReadS68k8_dec0: @ 0x080000 - 0x0bffff PicoReadS68k8_dec1: mov r3, #0x0a0000 @ + ^ / 2 0: - ldr r2, =(Pico+0x22200) + ldr r2, =Pico eor r0, r0, #2 - ldr r2, [r2] + ldr r2, [r2, #OFS_Pico_rom] @ Pico.mcd movs r0, r0, lsr #1 @ +4-6 <<16 add r2, r2, r3 @ map to our address ldrb r0, [r2, r0] @@ -429,8 +432,8 @@ m_s68k_read8_regs: bx lr m_s68k_read8_comm: - ldr r1, =(Pico+0x22200) - ldr r1, [r1] + ldr r1, =Pico + ldr r1, [r1, #OFS_Pico_rom] @ Pico.mcd add r1, r1, #0x110000 ldrb r1, [r1, r0] bic r0, r0, #1 @@ -442,9 +445,9 @@ m_s68k_read8_pcm: bne m_read_null @ must not trash r3 and r12 - ldr r1, =(Pico+0x22200) + ldr r1, =Pico bic r0, r0, #0xff0000 - ldr r1, [r1] + ldr r1, [r1, #OFS_Pico_rom] @ Pico.mcd mov r2, #0x110000 orr r2, r2, #0x002200 cmp r0, #0x2000 @@ -477,9 +480,9 @@ PicoReadS68k16_dec0: @ 0x080000 - 0x0bffff PicoReadS68k16_dec1: mov r3, #0x0a0000 @ + ^ / 2 0: - ldr r2, =(Pico+0x22200) + ldr r2, =Pico eor r0, r0, #2 - ldr r2, [r2] + ldr r2, [r2, #OFS_Pico_rom] @ Pico.mcd mov r0, r0, lsr #1 @ +4-6 <<16 add r2, r2, r3 @ map to our address ldrb r0, [r2, r0] @@ -508,9 +511,9 @@ m_s68k_read16_regs: .macro m_s68k_write8_2M_decode - ldr r2, =(Pico+0x22200) + ldr r2, =Pico eor r0, r0, #2 - ldr r2, [r2] @ Pico.rom + ldr r2, [r2, #OFS_Pico_rom] @ Pico.mcd movs r0, r0, lsr #1 @ +4-6 <<16 add r2, r2, r3 @ map to our address .endm @@ -592,9 +595,9 @@ m_s68k_write8_pcm: bxlt lr m_s68k_write8_pcm_ram: - ldr r3, =(Pico+0x22200) + ldr r3, =Pico bic r0, r0, #0x00e000 - ldr r3, [r3] + ldr r3, [r3, #OFS_Pico_rom] @ Pico.mcd mov r0, r0, lsr #1 add r2, r3, #0x110000 add r2, r2, #0x002200 @@ -611,9 +614,9 @@ m_s68k_write8_pcm_ram: .macro m_s68k_write16_2M_decode - ldr r2, =(Pico+0x22200) + ldr r2, =Pico eor r0, r0, #2 - ldr r2, [r2] + ldr r2, [r2, #OFS_Pico_rom] @ Pico.mcd mov r0, r0, lsr #1 @ +4-6 <<16 add r2, r2, r3 @ map to our address .endm @@ -692,9 +695,9 @@ m_s68k_write16_regs: bne s68k_reg_write16 m_s68k_write16_regs_spec: @ special case - ldr r2, =(Pico+0x22200) + ldr r2, =Pico mov r0, #0x110000 - ldr r2, [r2] + ldr r2, [r2, #OFS_Pico_rom] @ Pico.mcd add r0, r0, #0x00000f strb r1, [r2, r0] @ if (a == 0xe) s68k_regs[0xf] = d; bx lr diff --git a/pico/debug.c b/pico/debug.c index eb775bf6..82ae8a55 100644 --- a/pico/debug.c +++ b/pico/debug.c @@ -40,8 +40,8 @@ char *PDebugMain(void) sprintf(dstrp, "mode set 4: %02x\n", (r=reg[0xC])); MVP; sprintf(dstrp, "interlace: %i%i, cells: %i, shadow: %i\n", bit(r,2), bit(r,1), (r&0x80) ? 40 : 32, bit(r,3)); MVP; sprintf(dstrp, "scroll size: w: %i, h: %i SRAM: %i; eeprom: %i (%i)\n", reg[0x10]&3, (reg[0x10]&0x30)>>4, - !!(SRam.flags & SRF_ENABLED), !!(SRam.flags & SRF_EEPROM), SRam.eeprom_type); MVP; - sprintf(dstrp, "sram range: %06x-%06x, reg: %02x\n", SRam.start, SRam.end, Pico.m.sram_reg); MVP; + !!(Pico.sv.flags & SRF_ENABLED), !!(Pico.sv.flags & SRF_EEPROM), Pico.sv.eeprom_type); MVP; + sprintf(dstrp, "sram range: %06x-%06x, reg: %02x\n", Pico.sv.start, Pico.sv.end, Pico.m.sram_reg); MVP; sprintf(dstrp, "pend int: v:%i, h:%i, vdp status: %04x\n", bit(pv->pending_ints,5), bit(pv->pending_ints,4), pv->status); MVP; sprintf(dstrp, "pal: %i, hw: %02x, frame#: %i, cycles: %u\n", Pico.m.pal, Pico.m.hardware, Pico.m.frame_count, SekCyclesDone()); MVP; sprintf(dstrp, "M68k: PC: %06x, SR: %04x, irql: %i\n", SekPc, SekSr, SekIrqLevel); MVP; @@ -117,7 +117,7 @@ char *PDebugSpriteList(void) unsigned int *sprite; int code, code2, sx, sy, height; - sprite=(unsigned int *)(Pico.vram+((table+(link<<2))&0x7ffc)); // Find sprite + sprite=(unsigned int *)(PicoMem.vram+((table+(link<<2))&0x7ffc)); // Find sprite // get sprite info code = sprite[0]; @@ -245,23 +245,23 @@ void PDebugShowSprite(unsigned short *screen, int stride, int which) for (u=0; u < max_sprites && u <= which; u++) { - sprite=(int *)(Pico.vram+((table+(link<<2))&0x7ffc)); // Find sprite + sprite=(int *)(PicoMem.vram+((table+(link<<2))&0x7ffc)); // Find sprite link=(sprite[0]>>16)&0x7f; if (!link) break; // End of sprites } if (u >= max_sprites) return; - fsprite = (int *)(Pico.vram+(table&0x7ffc)); + fsprite = (int *)(PicoMem.vram+(table&0x7ffc)); oldsprite[0] = fsprite[0]; oldsprite[1] = fsprite[1]; fsprite[0] = (sprite[0] & ~0x007f01ff) | 0x000080; fsprite[1] = (sprite[1] & ~0x01ff8000) | 0x800000; oldreg = pvid->reg[7]; - oldcol = Pico.cram[0]; + oldcol = PicoMem.cram[0]; olddbg = pvid->debug_p; pvid->reg[7] = 0; - Pico.cram[0] = 0; + PicoMem.cram[0] = 0; pvid->debug_p = PVD_KILL_A | PVD_KILL_B; PicoFrameFull(); @@ -276,7 +276,7 @@ void PDebugShowSprite(unsigned short *screen, int stride, int which) fsprite[0] = oldsprite[0]; fsprite[1] = oldsprite[1]; pvid->reg[7] = oldreg; - Pico.cram[0] = oldcol; + PicoMem.cram[0] = oldcol; pvid->debug_p = olddbg; } @@ -325,18 +325,18 @@ void PDebugDumpMem(void) dump_ram_m(buf, "dumps/cart.bin", a ? "ab" : "wb"); } #endif - dump_ram_noswab(Pico.zram, "dumps/zram.bin"); - dump_ram(Pico.cram, "dumps/cram.bin"); + dump_ram_noswab(PicoMem.zram, "dumps/zram.bin"); + dump_ram(PicoMem.cram, "dumps/cram.bin"); if (PicoAHW & PAHW_SMS) { - dump_ram_noswab(Pico.vramb, "dumps/vram.bin"); + dump_ram_noswab(PicoMem.vramb, "dumps/vram.bin"); } else { - dump_ram(Pico.ram, "dumps/ram.bin"); - dump_ram(Pico.vram, "dumps/vram.bin"); - dump_ram(Pico.vsram,"dumps/vsram.bin"); + dump_ram(PicoMem.ram, "dumps/ram.bin"); + dump_ram(PicoMem.vram, "dumps/vram.bin"); + dump_ram(PicoMem.vsram,"dumps/vsram.bin"); } if (PicoAHW & PAHW_MCD) @@ -386,12 +386,12 @@ void PDebugZ80Frame(void) PsndStartFrame(); if (/*Pico.m.z80Run &&*/ !Pico.m.z80_reset && (PicoOpt&POPT_EN_Z80)) - PicoSyncZ80(SekCycleCnt + line_sample * 488); + PicoSyncZ80(Pico.t.m68c_cnt + line_sample * 488); if (PsndOut) PsndGetSamples(line_sample); if (/*Pico.m.z80Run &&*/ !Pico.m.z80_reset && (PicoOpt&POPT_EN_Z80)) { - PicoSyncZ80(SekCycleCnt + 224 * 488); + PicoSyncZ80(Pico.t.m68c_cnt + 224 * 488); z80_int(); } if (PsndOut) @@ -399,15 +399,15 @@ void PDebugZ80Frame(void) // sync z80 if (/*Pico.m.z80Run &&*/ !Pico.m.z80_reset && (PicoOpt&POPT_EN_Z80)) { - SekCycleCnt += Pico.m.pal ? 151809 : 127671; // cycles adjusted for converter - PicoSyncZ80(SekCycleCnt); + Pico.t.m68c_cnt += Pico.m.pal ? 151809 : 127671; // cycles adjusted for converter + PicoSyncZ80(Pico.t.m68c_cnt); } if (PsndOut && ym2612.dacen && PsndDacLine < lines) PsndDoDAC(lines - 1); PsndDoPSG(lines - 1); timers_cycle(); - SekCycleAim = SekCycleCnt; + Pico.t.m68c_aim = Pico.t.m68c_cnt; } void PDebugCPUStep(void) diff --git a/pico/draw.c b/pico/draw.c index 5ecbca80..6408f205 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -215,7 +215,7 @@ static void DrawStrip(struct TileStrip *ts, int lflags, int cellskip) { unsigned int pack; - code = Pico.vram[ts->nametab + (tilex & ts->xmask)]; + code = PicoMem.vram[ts->nametab + (tilex & ts->xmask)]; if (code == blank) continue; if ((code >> 15) | (lflags & LF_FORCE)) { // high priority tile @@ -235,7 +235,7 @@ static void DrawStrip(struct TileStrip *ts, int lflags, int cellskip) pal=((code>>9)&0x30)|sh; } - pack = *(unsigned int *)(Pico.vram + addr); + pack = *(unsigned int *)(PicoMem.vram + addr); if (!pack) { blank = code; continue; @@ -274,7 +274,7 @@ static void DrawStripVSRam(struct TileStrip *ts, int plane_sh, int cellskip) //if((cell&1)==0) { int line,vscroll; - vscroll=Pico.vsram[(plane_sh&1)+(cell&~1)]; + vscroll=PicoMem.vsram[(plane_sh&1)+(cell&~1)]; // Find the line in the name table line=(vscroll+scan)&ts->line&0xffff; // ts->line is really ymask .. @@ -282,7 +282,7 @@ static void DrawStripVSRam(struct TileStrip *ts, int plane_sh, int cellskip) ty=(line&7)<<1; // Y-Offset into tile } - code=Pico.vram[ts->nametab+nametabadd+(tilex&ts->xmask)]; + code=PicoMem.vram[ts->nametab+nametabadd+(tilex&ts->xmask)]; if (code==blank) continue; if (code>>15) { // high priority tile int cval = code | (dx<<16) | (ty<<25); @@ -300,7 +300,7 @@ static void DrawStripVSRam(struct TileStrip *ts, int plane_sh, int cellskip) pal=((code>>9)&0x30)|((plane_sh<<5)&0x40); } - pack = *(unsigned int *)(Pico.vram + addr); + pack = *(unsigned int *)(PicoMem.vram + addr); if (!pack) { blank = code; continue; @@ -336,7 +336,7 @@ void DrawStripInterlace(struct TileStrip *ts) { unsigned int pack; - code=Pico.vram[ts->nametab+(tilex&ts->xmask)]; + code = PicoMem.vram[ts->nametab + (tilex & ts->xmask)]; if (code==blank) continue; if (code>>15) { // high priority tile int cval = (code&0xfc00) | (dx<<16) | (ty<<25); @@ -356,7 +356,7 @@ void DrawStripInterlace(struct TileStrip *ts) pal=((code>>9)&0x30); } - pack = *(unsigned int *)(Pico.vram + addr); + pack = *(unsigned int *)(PicoMem.vram + addr); if (!pack) { blank = code; continue; @@ -409,11 +409,11 @@ static void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells, htab+=plane_sh&1; // A or B // Get horizontal scroll value, will be masked later - ts.hscroll=Pico.vram[htab&0x7fff]; + ts.hscroll = PicoMem.vram[htab & 0x7fff]; if((pvid->reg[12]&6) == 6) { // interlace mode 2 - vscroll=Pico.vsram[plane_sh&1]; // Get vertical scroll value + vscroll = PicoMem.vsram[plane_sh & 1]; // Get vertical scroll value // Find the line in the name table ts.line=(vscroll+(est->DrawScanline<<1))&((ymask<<1)|1); @@ -426,7 +426,7 @@ static void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells, ts.line=ymask|(shift[width]<<24); // save some stuff instead of line DrawStripVSRam(&ts, plane_sh, cellskip); } else { - vscroll=Pico.vsram[plane_sh&1]; // Get vertical scroll value + vscroll = PicoMem.vsram[plane_sh & 1]; // Get vertical scroll value // Find the line in the name table ts.line=(vscroll+est->DrawScanline)&ymask; @@ -463,7 +463,7 @@ static void DrawWindow(int tstart, int tend, int prio, int sh, if (!(est->rendstatus & PDRAW_WND_DIFF_PRIO)) { // check the first tile code - code=Pico.vram[nametab+tilex]; + code = PicoMem.vram[nametab + tilex]; // if the whole window uses same priority (what is often the case), we may be able to skip this field if ((code>>15) != prio) return; } @@ -480,7 +480,7 @@ static void DrawWindow(int tstart, int tend, int prio, int sh, int dx, addr; int pal; - code=Pico.vram[nametab+tilex]; + code = PicoMem.vram[nametab + tilex]; if (code==blank) continue; if ((code>>15) != prio) { est->rendstatus |= PDRAW_WND_DIFF_PRIO; @@ -491,7 +491,7 @@ static void DrawWindow(int tstart, int tend, int prio, int sh, addr=(code&0x7ff)<<4; if (code&0x1000) addr+=14-ty; else addr+=ty; // Y-flip - pack = *(unsigned int *)(Pico.vram + addr); + pack = *(unsigned int *)(PicoMem.vram + addr); if (!pack) { blank = code; continue; @@ -512,7 +512,7 @@ static void DrawWindow(int tstart, int tend, int prio, int sh, int dx, addr; int pal; - code=Pico.vram[nametab+tilex]; + code = PicoMem.vram[nametab + tilex]; if(code==blank) continue; if((code>>15) != prio) { est->rendstatus |= PDRAW_WND_DIFF_PRIO; @@ -533,7 +533,7 @@ static void DrawWindow(int tstart, int tend, int prio, int sh, addr=(code&0x7ff)<<4; if (code&0x1000) addr+=14-ty; else addr+=ty; // Y-flip - pack = *(unsigned int *)(Pico.vram + addr); + pack = *(unsigned int *)(PicoMem.vram + addr); if (!pack) { blank = code; continue; @@ -587,7 +587,7 @@ static void DrawTilesFromCache(int *hc, int sh, int rlim, struct PicoEState *est addr = (code & 0x7ff) << 4; addr += code >> 25; // y offset into tile - pack = *(unsigned int *)(Pico.vram + addr); + pack = *(unsigned int *)(PicoMem.vram + addr); if (!pack) { blank = (short)code; continue; @@ -615,7 +615,7 @@ static void DrawTilesFromCache(int *hc, int sh, int rlim, struct PicoEState *est *zb++ &= 0xbf; *zb++ &= 0xbf; *zb++ &= 0xbf; *zb++ &= 0xbf; *zb++ &= 0xbf; *zb++ &= 0xbf; *zb++ &= 0xbf; *zb++ &= 0xbf; - pack = *(unsigned int *)(Pico.vram + addr); + pack = *(unsigned int *)(PicoMem.vram + addr); if (!pack) continue; @@ -717,7 +717,7 @@ static void DrawSprite(int *sprite, int sh) if(sx<=0) continue; if(sx>=328) break; // Offscreen - pack = *(unsigned int *)(Pico.vram + (tile & 0x7fff)); + pack = *(unsigned int *)(PicoMem.vram + (tile & 0x7fff)); fTileFunc(sx, pack, pal); } } @@ -737,7 +737,7 @@ static NOINLINE void DrawTilesFromCacheForced(const int *hc) dx = (code >> 16) & 0x1ff; pal = ((code >> 9) & 0x30); - pack = *(unsigned int *)(Pico.vram + addr); + pack = *(unsigned int *)(PicoMem.vram + addr); if (code & 0x0800) TileFlip_and(dx, pack, pal); else TileNorm_and(dx, pack, pal); @@ -783,7 +783,7 @@ static void DrawSpriteInterlace(unsigned int *sprite) if(sx<=0) continue; if(sx>=328) break; // Offscreen - pack = *(unsigned int *)(Pico.vram + (tile & 0x7fff)); + pack = *(unsigned int *)(PicoMem.vram + (tile & 0x7fff)); if (code & 0x0800) TileFlip(sx, pack, pal); else TileNorm(sx, pack, pal); } @@ -805,7 +805,7 @@ static NOINLINE void DrawAllSpritesInterlace(int pri, int sh) unsigned int *sprite; int code, sx, sy, height; - sprite=(unsigned int *)(Pico.vram+((table+(link<<2))&0x7ffc)); // Find sprite + sprite=(unsigned int *)(PicoMem.vram+((table+(link<<2))&0x7ffc)); // Find sprite // get sprite info code = sprite[0]; @@ -908,7 +908,7 @@ static void DrawSpritesSHi(unsigned char *sprited, const struct PicoEState *est) if(sx<=0) continue; if(sx>=328) break; // Offscreen - pack = *(unsigned int *)(Pico.vram + (tile & 0x7fff)); + pack = *(unsigned int *)(PicoMem.vram + (tile & 0x7fff)); fTileFunc(sx, pack, pal); } } @@ -982,7 +982,7 @@ static void DrawSpritesHiAS(unsigned char *sprited, int sh) if(sx<=0) continue; if(sx>=328) break; // Offscreen - pack = *(unsigned int *)(Pico.vram + (tile & 0x7fff)); + pack = *(unsigned int *)(PicoMem.vram + (tile & 0x7fff)); fTileFunc(sx, pack, pal); } } @@ -1042,7 +1042,7 @@ static NOINLINE void PrepareSprites(int full) unsigned int *sprite; int code2, sx, sy, height; - sprite=(unsigned int *)(Pico.vram+((table+(link<<2))&0x7ffc)); // Find sprite + sprite=(unsigned int *)(PicoMem.vram+((table+(link<<2))&0x7ffc)); // Find sprite // parse sprite info code2 = sprite[1]; @@ -1095,7 +1095,7 @@ found:; unsigned int *sprite; int code, code2, sx, sy, hv, height, width; - sprite=(unsigned int *)(Pico.vram+((table+(link<<2))&0x7ffc)); // Find sprite + sprite=(unsigned int *)(PicoMem.vram+((table+(link<<2))&0x7ffc)); // Find sprite // parse sprite info code = sprite[0]; @@ -1219,7 +1219,7 @@ void PicoDoHighPal555(int sh, int line, struct PicoEState *est) Pico.m.dirtyPal = 0; - spal = (void *)Pico.cram; + spal = (void *)PicoMem.cram; dpal = (void *)est->HighPal; for (i = 0; i < 0x40 / 2; i++) { @@ -1301,9 +1301,9 @@ static void FinalizeLine8bit(int sh, int line, struct PicoEState *est) rs |= PDRAW_SONIC_MODE; est->rendstatus = rs; if (dirty_count == 3) { - blockcpy(est->HighPal, Pico.cram, 0x40*2); + blockcpy(est->HighPal, PicoMem.cram, 0x40*2); } else if (dirty_count == 11) { - blockcpy(est->HighPal+0x40, Pico.cram, 0x40*2); + blockcpy(est->HighPal+0x40, PicoMem.cram, 0x40*2); } } diff --git a/pico/draw2.c b/pico/draw2.c index db155fa2..f0e0518e 100644 --- a/pico/draw2.c +++ b/pico/draw2.c @@ -25,7 +25,7 @@ static unsigned char PicoDraw2FB_[(8+320) * (8+240+8)]; static int HighCache2A[41*(TILE_ROWS+1)+1+1]; // caches for high layers static int HighCache2B[41*(TILE_ROWS+1)+1+1]; -unsigned short *PicoCramHigh=Pico.cram; // pointer to CRAM buff (0x40 shorts), converted to native device color (works only with 16bit for now) +unsigned short *PicoCramHigh=PicoMem.cram; // pointer to CRAM buff (0x40 shorts), converted to native device color (works only with 16bit for now) void (*PicoPrepareCram)()=0; // prepares PicoCramHigh for renderer to use @@ -46,7 +46,7 @@ static int TileXnormYnorm(unsigned char *pd,int addr,unsigned char pal) int i; for(i=8; i; i--, addr+=2, pd += LINE_WIDTH) { - pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels + pack=*(unsigned int *)(PicoMem.vram+addr); // Get 8 pixels if(!pack) continue; t=pack&0x0000f000; if (t) pd[0]=(unsigned char)((t>>12)|pal); @@ -69,7 +69,7 @@ static int TileXflipYnorm(unsigned char *pd,int addr,unsigned char pal) int i; for(i=8; i; i--, addr+=2, pd += LINE_WIDTH) { - pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels + pack=*(unsigned int *)(PicoMem.vram+addr); // Get 8 pixels if(!pack) continue; t=pack&0x000f0000; if (t) pd[0]=(unsigned char)((t>>16)|pal); @@ -92,7 +92,7 @@ static int TileXnormYflip(unsigned char *pd,int addr,unsigned char pal) addr+=14; for(i=8; i; i--, addr-=2, pd += LINE_WIDTH) { - pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels + pack=*(unsigned int *)(PicoMem.vram+addr); // Get 8 pixels if(!pack) continue; t=pack&0x0000f000; if (t) pd[0]=(unsigned char)((t>>12)|pal); @@ -116,7 +116,7 @@ static int TileXflipYflip(unsigned char *pd,int addr,unsigned char pal) addr+=14; for(i=8; i; i--, addr-=2, pd += LINE_WIDTH) { - pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels + pack=*(unsigned int *)(PicoMem.vram+addr); // Get 8 pixels if(!pack) continue; t=pack&0x000f0000; if (t) pd[0]=(unsigned char)((t>>16)|pal); @@ -161,7 +161,7 @@ static void DrawWindowFull(int start, int end, int prio, struct PicoEState *est) nametab += nametab_step*start; // check priority - code=Pico.vram[nametab+tile_start]; + code=PicoMem.vram[nametab+tile_start]; if ((code>>15) != prio) return; // hack: just assume that whole window uses same priority scrpos+=8*LINE_WIDTH+8; @@ -175,7 +175,7 @@ static void DrawWindowFull(int start, int end, int prio, struct PicoEState *est) // unsigned short *pal=NULL; unsigned char pal; - code=Pico.vram[nametab+tilex]; + code=PicoMem.vram[nametab+tilex]; if (code==blank) continue; // Get tile address/2: @@ -222,7 +222,7 @@ static void DrawLayerFull(int plane, int *hcache, int planestart, int planeend, if(!(pvid->reg[11]&3)) { // full screen scroll // Get horizontal scroll value - hscroll=Pico.vram[htab&0x7fff]; + hscroll=PicoMem.vram[htab&0x7fff]; htab = 0; // this marks that we don't have to update scroll value } @@ -243,7 +243,7 @@ static void DrawLayerFull(int plane, int *hcache, int planestart, int planeend, scrpos+=8*LINE_WIDTH*(planestart-START_ROW); // Get vertical scroll value: - vscroll=Pico.vsram[plane]&0x1ff; + vscroll=PicoMem.vsram[plane]&0x1ff; scrpos+=(8-(vscroll&7))*LINE_WIDTH; if(vscroll&7) planeend++; // we have vertically clipped tiles due to vscroll, so we need 1 more row @@ -262,7 +262,7 @@ static void DrawLayerFull(int plane, int *hcache, int planestart, int planeend, if(htab) { int htaddr=htab+(trow<<4); if(trow) htaddr-=(vscroll&7)<<1; - hscroll=Pico.vram[htaddr&0x7fff]; + hscroll=PicoMem.vram[htaddr&0x7fff]; } // Draw tiles across screen: @@ -276,7 +276,7 @@ static void DrawLayerFull(int plane, int *hcache, int planestart, int planeend, // unsigned short *pal=NULL; unsigned char pal; - code=Pico.vram[nametab_row+(tilex&xmask)]; + code=PicoMem.vram[nametab_row+(tilex&xmask)]; if (code==blank) continue; if (code>>15) { // high priority tile @@ -422,7 +422,7 @@ static void DrawAllSpritesFull(int prio, int maxwidth) unsigned int *sprite=NULL; int code, code2, sx, sy, height; - sprite=(unsigned int *)(Pico.vram+((table+(link<<2))&0x7ffc)); // Find sprite + sprite=(unsigned int *)(PicoMem.vram+((table+(link<<2))&0x7ffc)); // Find sprite // get sprite info code = sprite[0]; diff --git a/pico/draw2_arm.S b/pico/draw2_arm.S index 6e7e1ac0..1575653c 100644 --- a/pico/draw2_arm.S +++ b/pico/draw2_arm.S @@ -353,10 +353,10 @@ DrawLayerFull: mov r6, r1 @ hcache - ldr r11, [r12, #OFS_Pico_video] - ldr r10, [r12, #OFS_Pico_vram] - ldrb r5, [r11, #13] @ pvid->reg[13] - ldrb r7, [r11, #11] + ldr r11, [r12, #OFS_EST_Pico] + ldr r10, [r12, #OFS_EST_PicoMem_vram] + ldrb r5, [r11, #OFS_Pico_video_reg+13] @ pvid->reg[13] + ldrb r7, [r11, #OFS_Pico_video_reg+11] sub lr, r3, r2 and lr, lr, #0x00ff0000 @ lr=cells @@ -366,7 +366,7 @@ DrawLayerFull: bic r5, r5, #0x00ff0000 @ just in case tst r7, #3 @ full screen scroll? (if ==0) - ldrb r7, [r11, #16] @ ??hh??ww + ldrb r7, [r11, #OFS_Pico_video_reg+16] @ ??hh??ww ldreqh r5, [r10, r5] biceq r5, r5, #0x0000fc00 @ r5=hscroll (0-0x3ff) movne r5, r5, lsr #1 @@ -397,15 +397,15 @@ DrawLayerFull: @ Find name table: tst r0, r0 - ldreqb r4, [r11, #2] + ldreqb r4, [r11, #OFS_Pico_video_reg+2] moveq r4, r4, lsr #3 - ldrneb r4, [r11, #4] + ldrneb r4, [r11, #OFS_Pico_video_reg+4] and r4, r4, #7 orr lr, lr, r4, lsl #13 @ lr|=nametab_bits{3}<<13 ldr r11,[sp, #9*4] @ est sub r4, r9, #(START_ROW<<24) - ldr r11, [r11, #OFS_Draw2FB] + ldr r11, [r11, #OFS_EST_Draw2FB] mov r4, r4, asr #24 mov r7, #328*8 mla r11, r4, r7, r11 @ scrpos+=8*328*(planestart-START_ROW); @@ -582,13 +582,13 @@ DrawTilesFromCacheF: mov r9, #0xff000000 @ r9=prevcode=-1 mvn r6, #0 @ r6=prevy=-1 - ldr r4, [r1, #OFS_Draw2FB] + ldr r4, [r1, #OFS_EST_Draw2FB] ldr r2, [r0], #4 @ read y offset mov r7, #328 mla r2, r7, r2, r4 sub r12, r2, #(328*8*START_ROW) @ r12=scrpos - ldr r10, [r1, #OFS_Pico_vram] + ldr r10, [r1, #OFS_EST_PicoMem_vram] mov r8, r0 @ hc mov r0, #0xf @@ -674,11 +674,11 @@ DrawTilesFromCacheF: DrawWindowFull: stmfd sp!, {r4-r11,lr} - ldr r11, [r3, #OFS_Pico_video] - ldrb r12, [r11, #3] @ pvid->reg[3] + ldr r11, [r3, #OFS_EST_Pico] + ldrb r12, [r11, #OFS_Pico_video_reg+3] @ pvid->reg[3] mov r12, r12, lsl #10 - ldr r4, [r11, #12] + ldr r4, [r11, #OFS_Pico_video_reg+12] mov r5, #1 @ nametab_step tst r4, #1 @ 40 cell mode? andne r12, r12, #0xf000 @ 0x3c<<10 @@ -689,7 +689,7 @@ DrawWindowFull: and r4, r0, #0xff mla r12, r5, r4, r12 @ nametab += nametab_step*start; - ldr r10, [r3, #OFS_Pico_vram] + ldr r10, [r3, #OFS_EST_PicoMem_vram] mov r4, r0, lsr #16 @ r4=start_cell_h add r7, r12, r4, lsl #1 @@ -707,7 +707,7 @@ DrawWindowFull: mov r9, #0xff000000 @ r9=prevcode=-1 - ldr r11, [r3, #OFS_Draw2FB] + ldr r11, [r3, #OFS_EST_Draw2FB] and r4, r0, #0xff add r11, r11, #328*8 sub r4, r4, #START_ROW @@ -760,7 +760,8 @@ DrawWindowFull: tst r9, #0x080000 @ hflip? bne .dwf_hflip - @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf + @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=PicoMem.vram) + @ r2,r4,r7: scratch, r0=0xf Tile 0, 0 b .dwfloop @@ -870,7 +871,8 @@ DrawWindowFull: cmp r6, #(END_ROW*8+8) bge 52b - @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf + @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=PicoMem.vram) + @ r2,r4,r7: scratch, r0=0xf Tile \hflip, \vflip b 52b .endm @@ -905,8 +907,8 @@ DrawSpriteFull: and r3, lr, #0x6000 mov r3, r3, lsr #9 @ r3=pal=((code>>9)&0x30); - ldr r11, [r1, #OFS_Draw2FB] - ldr r10, [r1, #OFS_Pico_vram] + ldr r11, [r1, #OFS_EST_Draw2FB] + ldr r10, [r1, #OFS_EST_PicoMem_vram] sub r1, r12, #(START_ROW*8) mov r0, #328 mla r11, r1, r0, r11 @ scrpos+=(sy-START_ROW*8)*328; diff --git a/pico/draw_arm.S b/pico/draw_arm.S index fae07b8b..d53e04bd 100644 --- a/pico/draw_arm.S +++ b/pico/draw_arm.S @@ -287,10 +287,10 @@ DrawLayer: ldr r12, [sp] @ est stmfd sp!, {r4-r11,lr} - ldr r11, [r12, #OFS_Pico_video] + ldr r11, [r12, #OFS_EST_Pico] mov r8, #1 - ldrb r7, [r11, #16] @ ??vv??hh + ldrb r7, [r11, #OFS_Pico_video_reg+16] @ ??vv??hh mov r6, r1 @ hcache orr r9, r3, r0, lsl #29 @ r9=force[31]|sh[30]|plane[29] @@ -311,13 +311,13 @@ DrawLayer: cmp r10, #7 subge r10, r10, #1 @ r10=shift[width] (5,6,6,7) - ldr r2, [r12, #OFS_DrawScanline] - ldr lr, [r12, #OFS_Pico_vram] + ldr r2, [r12, #OFS_EST_DrawScanline] + ldr lr, [r12, #OFS_EST_PicoMem_vram] @ Find name table: ands r0, r0, #1 - ldreqb r12, [r11, #2] - ldrneb r12, [r11, #4] + ldreqb r12, [r11, #OFS_Pico_video_reg+2] + ldrneb r12, [r11, #OFS_Pico_video_reg+4] @ calculate xmask: mov r5, r8, lsl r10 @@ -327,8 +327,8 @@ DrawLayer: movne r12, r12, lsl #13 and r12, r12, #(7<<13) @ r12=(ts->nametab<<1) (halfword compliant) - ldrh r8, [r11, #12] - ldrb r7, [r11, #11] + ldrh r8, [r11, #OFS_Pico_video_reg+12] + ldrb r7, [r11, #OFS_Pico_video_reg+11] mov r4, r8, lsr #8 @ pvid->reg[13] mov r4, r4, lsl #10 @ htab=pvid->reg[13]<<9; (halfwords) @@ -345,7 +345,7 @@ DrawLayer: @ Get vertical scroll value: add r7, lr, #0x012000 - add r7, r7, #0x000180 @ r7=Pico.vsram (Pico+0x22180) + add r7, r7, #0x000180 @ r7=PicoMem.vsram (PicoMem+0x22180) ldr r7, [r7] tst r8, #2 @@ -392,7 +392,7 @@ DrawLayer: @ cache some stuff to avoid mem access ldr r11,[sp, #9*4] @ est mov r0, #0xf - ldr r11,[r11, #OFS_HighCol] + ldr r11,[r11, #OFS_EST_HighCol] mvn r9, #0 @ r9=prevcode=-1 add r1, r11, r7 @ r1=pdest @@ -497,10 +497,10 @@ DrawLayer: tst r10, #1<<21 @ seen non hi-prio tile ldr r1, [sp, #9*4] @ est mov r0, #0 - ldreq r2, [r1, #OFS_rendstatus] + ldreq r2, [r1, #OFS_EST_rendstatus] str r0, [r6] @ terminate the cache list orreq r2, r2, #PDRAW_PLANE_HI_PRIO @ had a layer with all hi-prio tiles - streq r2, [r1, #OFS_rendstatus] + streq r2, [r1, #OFS_EST_rendstatus] ldmfd sp!, {r4-r11,lr} bx lr @@ -515,7 +515,7 @@ DrawLayer: ldr r11, [sp, #9*4] @ est orr r5, r1, r10, lsl #24 - ldr r4, [r11, #OFS_DrawScanline] + ldr r4, [r11, #OFS_EST_DrawScanline] sub r1, r3, #1 orr r5, r5, r4, lsl #16 @ r5=(shift_width[31:24]|scanline[23:16]|ymask[15:0]) and r1, r1, #7 @@ -541,7 +541,7 @@ DrawLayer: @ cache some stuff to avoid mem access ldr r11,[sp, #9*4] @ est mov r0, #0xf - ldr r11,[r11, #OFS_HighCol] + ldr r11,[r11, #OFS_EST_HighCol] mvn r9, #0 @ r9=prevcode=-1 add r1, r11, r7 @ r1=pdest @@ -557,7 +557,7 @@ DrawLayer: @ calc offset and read tileline code to r7, also calc ty add r7, lr, #0x012000 - add r7, r7, #0x000180 @ r7=Pico.vsram (Pico+0x22180) + add r7, r7, #0x000180 @ r7=PicoMem.vsram (PicoMem+0x22180) add r7, r7, r10,asr #23 @ vsram + ((cell&~1)<<1) bic r7, r7, #3 tst r10,#0x8000 @ plane1? @@ -576,7 +576,7 @@ DrawLayer: mov r4, r4, lsl r7 @ nametabadd and r7, r8, r8, lsr #25 - add r7, lr, r7, lsl #1 @ Pico.vram+((tilex&ts->xmask) as halfwords) + add r7, lr, r7, lsl #1 @ PicoMem.vram+((tilex&ts->xmask) as halfwords) add r7, r7, r4, lsl #1 ldrh r7, [r7, r12] @ r7=code (int, but from unsigned, no sign extend) @@ -598,7 +598,7 @@ DrawLayer: mov r2, r2, lsr #17 eorcs r2, r2, #0x0e @ if (code&0x1000) addr^=0xe; - ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels + ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(PicoMem.vram+addr); // Get 8 pixels bic r7, r3, #0x3f and r3, r9, #0x6000 @@ -656,7 +656,7 @@ DrawLayer: add r2, r2, r10, lsl #17 mov r2, r2, lsr #17 eorcs r2, r2, #0x0e @ if (code&0x1000) addr^=0xe; - ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels + ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(PicoMem.vram+addr); // Get 8 pixels mov r9, r7 @ remember code tst r2, r2 orrne r10, r10, #1<<22 @@ -667,10 +667,10 @@ DrawLayer: tst r8, #(1<<24) @ seen non hi-prio tile ldr r1, [sp, #9*4] @ est mov r0, #0 - ldreq r2, [r1, #OFS_rendstatus] + ldreq r2, [r1, #OFS_EST_rendstatus] str r0, [r6] @ terminate the cache list orreq r2, r2, #PDRAW_PLANE_HI_PRIO @ had a layer with all hi-prio tiles - streq r2, [r1, #OFS_rendstatus] + streq r2, [r1, #OFS_EST_rendstatus] ldmfd sp!, {r4-r11,lr} bx lr @@ -716,7 +716,7 @@ BackFill: stmfd sp!, {r4-r9,lr} mov r0, r0, lsl #26 - ldr lr, [r2, #OFS_HighCol] + ldr lr, [r2, #OFS_EST_HighCol] mov r0, r0, lsr #26 add lr, lr, #8 @@ -758,9 +758,9 @@ DrawTilesFromCache: stmfd sp!, {r4-r9,r11,lr} @ cache some stuff to avoid mem access - ldr r11,[r3, #OFS_HighCol] + ldr r11,[r3, #OFS_EST_HighCol] mov r12,#0xf - ldr lr, [r3, #OFS_Pico_vram] + ldr lr, [r3, #OFS_EST_PicoMem_vram] mov r9, r3 @ est mvn r5, #0 @ r5=prevcode=-1 @@ -892,14 +892,14 @@ DrawTilesFromCache: @ check if we have detected layer covered with hi-prio tiles: .dtfc_check_rendflags: - ldr r2, [r9, #OFS_rendstatus] + ldr r2, [r9, #OFS_EST_rendstatus] tst r2, #(PDRAW_PLANE_HI_PRIO|PDRAW_SHHI_DONE) beq .dtfc_loop bic r8, r8, #1 @ sh/hi mode off tst r2, #PDRAW_SHHI_DONE bne .dtfc_loop @ already processed orr r2, r2, #PDRAW_SHHI_DONE - str r2, [r9, #OFS_rendstatus] + str r2, [r9, #OFS_EST_rendstatus] add r1, r11,#8 mov r3, #320/4/4 @@ -939,16 +939,16 @@ DrawSpritesSHi: add r10,r0, #3 @ r10=HighLnSpr end add r10,r10,r3 @ r10=HighLnSpr end - ldr r11,[r1, #OFS_HighCol] + ldr r11,[r1, #OFS_EST_HighCol] mov r12,#0xf - ldr lr, [r1, #OFS_Pico_vram] + ldr lr, [r1, #OFS_EST_PicoMem_vram] DrawSpriteSHi: @ draw next sprite ldrb r0, [r10,#-1]! ldr r7, [sp] @ est - ldr r1, [r7, #OFS_HighPreSpr] + ldr r1, [r7, #OFS_EST_HighPreSpr] cmp r0, #0xff ldmeqfd sp!, {r1,r4-r11,pc} @ end of list and r0, r0, #0x7f @@ -974,7 +974,7 @@ DrawSpriteSHi: mov r5, r3, lsr #24 and r5, r5, #7 @ r5=height - ldr r7, [r7, #OFS_DrawScanline] + ldr r7, [r7, #OFS_EST_DrawScanline] mov r0, r3, lsl #16 @ r4=sy<<16 (tmp) sub r7, r7, r0, asr #16 @ r7=row=DrawScanline-sy @@ -1015,7 +1015,7 @@ DrawSpriteSHi: mov r8, r8, lsl #17 mov r8, r8, lsr #17 @ tile&=0x7fff; // Clip tile address - ldr r2, [lr, r8, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels + ldr r2, [lr, r8, lsl #1] @ pack=*(unsigned int *)(PicoMem.vram+addr); // Get 8 pixels add r1, r11, r0 @ r1=pdest tst r2, r2 beq .dsprShi_loop @@ -1117,9 +1117,9 @@ DrawAllSprites: add r10,r0, #3 add r10,r10,r2 @ r10=HighLnSpr end - ldr r11,[r3, #OFS_HighCol] + ldr r11,[r3, #OFS_EST_HighCol] mov r12,#0xf - ldr lr, [r3, #OFS_Pico_vram] + ldr lr, [r3, #OFS_EST_PicoMem_vram] @ + 0 : hhhhvvvv ----hhvv yyyyyyyy yyyyyyyy // v, h: horiz. size @ + 4 : xxxxxxxx xxxxxxxx pccvhnnn nnnnnnnn // x: x coord + 8 @@ -1134,12 +1134,12 @@ DrawSprite: ldmeqfd sp!, {r1,r3-r11,pc} @ end of list cmp r2, r8, lsr #1 bne DrawSprite @ wrong priority - ldr r1, [r7, #OFS_HighPreSpr] + ldr r1, [r7, #OFS_EST_HighPreSpr] and r0, r0, #0x7f add r0, r1, r0, lsl #3 ldr r3, [r0] @ sprite[0] - ldr r7, [r7, #OFS_DrawScanline] + ldr r7, [r7, #OFS_EST_DrawScanline] mov r6, r3, lsr #28 sub r6, r6, #1 @ r6=width-1 (inc later) mov r5, r3, lsr #24 @@ -1198,7 +1198,7 @@ DrawSprite: mov r8, r8, lsl #17 mov r8, r8, lsr #17 @ tile&=0x7fff; // Clip tile address - ldr r2, [lr, r8, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels + ldr r2, [lr, r8, lsl #1] @ pack=*(unsigned int *)(PicoMem.vram+addr); // Get 8 pixels add r1, r11, r0 @ r1=pdest tst r2, r2 beq .dspr_loop @@ -1267,12 +1267,12 @@ DrawWindow: ldr r12, [sp] @ est stmfd sp!, {r4-r11,lr} - ldr r6, [r12, #OFS_Pico_video] - ldr r10, [r12, #OFS_DrawScanline] + ldr r6, [r12, #OFS_EST_Pico] + ldr r10, [r12, #OFS_EST_DrawScanline] mov r11, r12 @ est - ldrb r12, [r6, #3] @ pvid->reg[3] + ldrb r12, [r6, #OFS_Pico_video_reg+3] @ pvid->reg[3] - ldr r4, [r6, #12] + ldr r4, [r6, #OFS_Pico_video_reg+12] mov r5, r10, lsr #3 and r10, r10, #7 mov r10, r10, lsl #1 @ r10=ty @@ -1286,8 +1286,8 @@ DrawWindow: addeq r12, r12, r5, lsl #6 @ nametab add r12, r12, r0, lsl #2 @ +starttile - ldr lr, [r11, #OFS_Pico_vram] - ldr r6, [r11, #OFS_rendstatus] + ldr lr, [r11, #OFS_EST_PicoMem_vram] + ldr r6, [r11, #OFS_EST_rendstatus] @ fetch the first code now ldrh r7, [lr, r12] @@ -1304,7 +1304,7 @@ DrawWindow: sub r8, r1, r0 @ cache some stuff to avoid mem access - ldr r11, [r11, #OFS_HighCol] + ldr r11, [r11, #OFS_EST_HighCol] mov r8, r8, lsl #1 @ cells add r11,r11,#8 mvn r9, #0 @ r9=prevcode=-1 @@ -1392,9 +1392,9 @@ DrawWindow: and r2, r6, #PDRAW_WND_DIFF_PRIO ldmfd sp!, {r4-r11,lr} ldr r0, [sp] - ldr r1, [r0, #OFS_rendstatus] + ldr r1, [r0, #OFS_EST_rendstatus] orr r1, r1, r2 - str r1, [r0, #OFS_rendstatus] + str r1, [r0, #OFS_EST_rendstatus] bx lr @@ -1486,17 +1486,17 @@ PicoDoHighPal555: stmfd sp!, {r4-r10,lr} mov r10,r2 @ est mov r1, #0 - ldr r8, [r10, #OFS_Pico_video] + ldr r8, [r10, #OFS_EST_Pico] PicoDoHighPal555_nopush: orr r9, r1, r0, lsl #31 @ 0:called from FinalizeLine555, 31: s/h - add r0, r10, #OFS_HighPal + add r0, r10, #OFS_EST_HighPal mov r1, #0 - strb r1, [r8, #-0x1a] @ 0x2220e ~ dirtyPal + strb r1, [r8, #OFS_Pico_m_dirtyPal] - sub r1, r8, #0x128 @ r1=Pico.cram + ldr r1, [r10, #OFS_EST_PicoMem_cram] mov r2, #0x40 mov r8, #0x0061 orr r8, r8, #0x0800 @@ -1507,7 +1507,7 @@ PicoDoHighPal555_nopush: tst r9, #(1<<31) beq PicoDoHighPal555_end - add r3, r10, #OFS_HighPal + add r3, r10, #OFS_EST_HighPal @ shadowed pixels: mov r12, #0x008e @@ -1550,7 +1550,7 @@ PicoDoHighPal555_end: tst r9, #1 ldmeqfd sp!, {r4-r10,pc} - ldr r8, [r10, #OFS_Pico_video] + ldr r8, [r10, #OFS_EST_Pico] b FinalizeLineRGB555_pal_done @@ -1561,33 +1561,33 @@ PicoDoHighPal555_end: FinalizeLine555: stmfd sp!, {r4-r10,lr} mov r10,r2 @ est - ldr r8, [r10, #OFS_Pico_video] + ldr r8, [r10, #OFS_EST_Pico] - ldrb r2, [r8, #-0x1a] @ 0x2220e ~ dirtyPal + ldrb r2, [r8, #OFS_Pico_m_dirtyPal] mov r1, #1 tst r2, r2 bne PicoDoHighPal555_nopush FinalizeLineRGB555_pal_done: - add r3, r10, #OFS_HighPal + add r3, r10, #OFS_EST_HighPal - ldr r12, [r10, #OFS_rendstatus] + ldr r12, [r10, #OFS_EST_rendstatus] eors r0, r0, #1 @ sh is 0 mov lr, #0xff tstne r12,#PDRAW_ACC_SPRITES movne lr, #0x3f - ldr r1, [r10, #OFS_HighCol] - ldr r0, [r10, #OFS_DrawLineDest] + ldr r1, [r10, #OFS_EST_HighCol] + ldr r0, [r10, #OFS_EST_DrawLineDest] add r1, r1, #8 - ldrb r12, [r8, #12] + ldrb r12, [r8, #OFS_Pico_video_reg+12] mov lr, lr, lsl #1 tst r12, #1 movne r2, #320/8 @ len bne .fl_no32colRGB555 - ldr r4, [r10, #OFS_PicoOpt] + ldr r4, [r10, #OFS_EST_PicoOpt] mov r2, #256/8 ldr r4, [r4] tst r4, #0x4000 diff --git a/pico/eeprom.c b/pico/eeprom.c index 16f6c05c..d727766d 100644 --- a/pico/eeprom.c +++ b/pico/eeprom.c @@ -42,14 +42,14 @@ static void EEPROM_write_do(unsigned int d) // ???? ??la (l=SCL, a=SDA) { // we are started and SCL went high - next cycle scyc++; // pre-increment - if(SRam.eeprom_type) { + if(Pico.sv.eeprom_type) { // X24C02+ if((ssa&1) && scyc == 18) { scyc = 9; saddr++; // next address in read mode - /*if(SRam.eeprom_type==2) saddr&=0xff; else*/ saddr&=0x1fff; // mask + /*if(Pico.sv.eeprom_type==2) saddr&=0xff; else*/ saddr&=0x1fff; // mask } - else if(SRam.eeprom_type == 2 && scyc == 27) scyc = 18; + else if(Pico.sv.eeprom_type == 2 && scyc == 27) scyc = 18; else if(scyc == 36) scyc = 27; } else { // X24C01 @@ -63,29 +63,29 @@ static void EEPROM_write_do(unsigned int d) // ???? ??la (l=SCL, a=SDA) else if((sreg & 8) && (sreg & 2) && !(d&2)) { // we are started and SCL went low (falling edge) - if(SRam.eeprom_type) { + if(Pico.sv.eeprom_type) { // X24C02+ if(scyc == 9 || scyc == 18 || scyc == 27); // ACK cycles - else if( (SRam.eeprom_type == 3 && scyc > 27) || (SRam.eeprom_type == 2 && scyc > 18) ) { + else if( (Pico.sv.eeprom_type == 3 && scyc > 27) || (Pico.sv.eeprom_type == 2 && scyc > 18) ) { if(!(ssa&1)) { // data write - unsigned char *pm=SRam.data+saddr; + unsigned char *pm=Pico.sv.data+saddr; *pm <<= 1; *pm |= d&1; if(scyc == 26 || scyc == 35) { saddr=(saddr&~0xf)|((saddr+1)&0xf); // only 4 (?) lowest bits are incremented elprintf(EL_EEPROM, "eeprom: write done, addr inc to: %x, last byte=%02x", saddr, *pm); } - SRam.changed = 1; + Pico.sv.changed = 1; } } else if(scyc > 9) { if(!(ssa&1)) { // we latch another addr bit saddr<<=1; - if(SRam.eeprom_type == 2) saddr&=0xff; else saddr&=0x1fff; // mask + if(Pico.sv.eeprom_type == 2) saddr&=0xff; else saddr&=0x1fff; // mask saddr|=d&1; if(scyc==17||scyc==26) { elprintf(EL_EEPROM, "eeprom: addr reg done: %x", saddr); - if(scyc==17&&SRam.eeprom_type==2) { saddr&=0xff; saddr|=(ssa<<7)&0x700; } // add device bits too + if(scyc==17&&Pico.sv.eeprom_type==2) { saddr&=0xff; saddr|=(ssa<<7)&0x700; } // add device bits too } } } else { @@ -99,13 +99,13 @@ static void EEPROM_write_do(unsigned int d) // ???? ??la (l=SCL, a=SDA) else if(scyc > 9) { if(!(saddr&1)) { // data write - unsigned char *pm=SRam.data+(saddr>>1); + unsigned char *pm=Pico.sv.data+(saddr>>1); *pm <<= 1; *pm |= d&1; if(scyc == 17) { saddr=(saddr&0xf9)|((saddr+2)&6); // only 2 lowest bits are incremented elprintf(EL_EEPROM, "eeprom: write done, addr inc to: %x, last byte=%02x", saddr>>1, *pm); } - SRam.changed = 1; + Pico.sv.changed = 1; } } else { // we latch another addr bit @@ -129,11 +129,11 @@ static void EEPROM_upd_pending(unsigned int d) sreg &= ~0xc0; // SCL - d1 = (d >> SRam.eeprom_bit_cl) & 1; + d1 = (d >> Pico.sv.eeprom_bit_cl) & 1; sreg |= d1 << 7; // SDA in - d1 = (d >> SRam.eeprom_bit_in) & 1; + d1 = (d >> Pico.sv.eeprom_bit_in) & 1; sreg |= d1 << 6; Pico.m.eeprom_status = (unsigned char) sreg; @@ -190,23 +190,23 @@ unsigned int EEPROM_read(void) } else if (scyc > 9 && scyc < 18) { // started and first command word received shift = 17-scyc; - if (SRam.eeprom_type) { + if (Pico.sv.eeprom_type) { // X24C02+ if (ssa&1) { elprintf(EL_EEPROM, "eeprom: read: addr %02x, cycle %i, reg %02x", saddr, scyc, sreg); - if (shift==0) elprintf(EL_EEPROM, "eeprom: read done, byte %02x", SRam.data[saddr]); - d = (SRam.data[saddr]>>shift)&1; + if (shift==0) elprintf(EL_EEPROM, "eeprom: read done, byte %02x", Pico.sv.data[saddr]); + d = (Pico.sv.data[saddr]>>shift)&1; } } else { // X24C01 if (saddr&1) { elprintf(EL_EEPROM, "eeprom: read: addr %02x, cycle %i, reg %02x", saddr>>1, scyc, sreg); - if (shift==0) elprintf(EL_EEPROM, "eeprom: read done, byte %02x", SRam.data[saddr>>1]); - d = (SRam.data[saddr>>1]>>shift)&1; + if (shift==0) elprintf(EL_EEPROM, "eeprom: read done, byte %02x", Pico.sv.data[saddr>>1]); + d = (Pico.sv.data[saddr>>1]>>shift)&1; } } } - return (d << SRam.eeprom_bit_out); + return (d << Pico.sv.eeprom_bit_out); } diff --git a/pico/memory.c b/pico/memory.c index 436f5453..e747552f 100644 --- a/pico/memory.c +++ b/pico/memory.c @@ -265,8 +265,8 @@ static port_read_func *port_readers[3] = { static NOINLINE u32 port_read(int i) { - u32 data_reg = Pico.ioports[i + 1]; - u32 ctrl_reg = Pico.ioports[i + 4] | 0x80; + u32 data_reg = PicoMem.ioports[i + 1]; + u32 ctrl_reg = PicoMem.ioports[i + 4] | 0x80; u32 in, out; out = data_reg & ctrl_reg; @@ -310,7 +310,7 @@ NOINLINE u32 io_ports_read(u32 a) case 1: d = port_read(0); break; case 2: d = port_read(1); break; case 3: d = port_read(2); break; - default: d = Pico.ioports[a]; break; // IO ports can be used as RAM + default: d = PicoMem.ioports[a]; break; // IO ports can be used as RAM } return d; } @@ -323,17 +323,17 @@ NOINLINE void io_ports_write(u32 a, u32 d) if (1 <= a && a <= 2) { Pico.m.padDelay[a - 1] = 0; - if (!(Pico.ioports[a] & 0x40) && (d & 0x40)) + if (!(PicoMem.ioports[a] & 0x40) && (d & 0x40)) Pico.m.padTHPhase[a - 1]++; } // certain IO ports can be used as RAM - Pico.ioports[a] = d; + PicoMem.ioports[a] = d; } static int z80_cycles_from_68k(void) { - int m68k_cnt = SekCyclesDone() - timing.m68c_frame_start; + int m68k_cnt = SekCyclesDone() - Pico.t.m68c_frame_start; return cycles_68k_to_z80(m68k_cnt); } @@ -345,7 +345,7 @@ void NOINLINE ctl_write_z80busreq(u32 d) { if (d) { - timing.z80c_cnt = z80_cycles_from_68k() + 2; + Pico.t.z80c_cnt = z80_cycles_from_68k() + 2; } else { @@ -377,7 +377,7 @@ void NOINLINE ctl_write_z80reset(u32 d) } else { - timing.z80c_cnt = z80_cycles_from_68k() + 2; + Pico.t.z80c_cnt = z80_cycles_from_68k() + 2; z80_reset(); } Pico.m.z80_reset = d; @@ -414,14 +414,14 @@ static void psg_write_z80(u32 d) static u32 PicoRead8_sram(u32 a) { u32 d; - if (SRam.start <= a && a <= SRam.end && (Pico.m.sram_reg & SRR_MAPPED)) + if (Pico.sv.start <= a && a <= Pico.sv.end && (Pico.m.sram_reg & SRR_MAPPED)) { - if (SRam.flags & SRF_EEPROM) { + if (Pico.sv.flags & SRF_EEPROM) { d = EEPROM_read(); if (!(a & 1)) d >>= 8; } else - d = *(u8 *)(SRam.data - SRam.start + a); + d = *(u8 *)(Pico.sv.data - Pico.sv.start + a); elprintf(EL_SRAMIO, "sram r8 [%06x] %02x @ %06x", a, d, SekPc); return d; } @@ -436,12 +436,12 @@ static u32 PicoRead8_sram(u32 a) static u32 PicoRead16_sram(u32 a) { u32 d; - if (SRam.start <= a && a <= SRam.end && (Pico.m.sram_reg & SRR_MAPPED)) + if (Pico.sv.start <= a && a <= Pico.sv.end && (Pico.m.sram_reg & SRR_MAPPED)) { - if (SRam.flags & SRF_EEPROM) + if (Pico.sv.flags & SRF_EEPROM) d = EEPROM_read(); else { - u8 *pm = (u8 *)(SRam.data - SRam.start + a); + u8 *pm = (u8 *)(Pico.sv.data - Pico.sv.start + a); d = pm[0] << 8; d |= pm[1]; } @@ -459,20 +459,20 @@ static u32 PicoRead16_sram(u32 a) static void PicoWrite8_sram(u32 a, u32 d) { - if (a > SRam.end || a < SRam.start || !(Pico.m.sram_reg & SRR_MAPPED)) { + if (a > Pico.sv.end || a < Pico.sv.start || !(Pico.m.sram_reg & SRR_MAPPED)) { m68k_unmapped_write8(a, d); return; } elprintf(EL_SRAMIO, "sram w8 [%06x] %02x @ %06x", a, d & 0xff, SekPc); - if (SRam.flags & SRF_EEPROM) + if (Pico.sv.flags & SRF_EEPROM) { EEPROM_write8(a, d); } else { - u8 *pm = (u8 *)(SRam.data - SRam.start + a); + u8 *pm = (u8 *)(Pico.sv.data - Pico.sv.start + a); if (*pm != (u8)d) { - SRam.changed = 1; + Pico.sv.changed = 1; *pm = (u8)d; } } @@ -480,24 +480,24 @@ static void PicoWrite8_sram(u32 a, u32 d) static void PicoWrite16_sram(u32 a, u32 d) { - if (a > SRam.end || a < SRam.start || !(Pico.m.sram_reg & SRR_MAPPED)) { + if (a > Pico.sv.end || a < Pico.sv.start || !(Pico.m.sram_reg & SRR_MAPPED)) { m68k_unmapped_write16(a, d); return; } elprintf(EL_SRAMIO, "sram w16 [%06x] %04x @ %06x", a, d & 0xffff, SekPc); - if (SRam.flags & SRF_EEPROM) + if (Pico.sv.flags & SRF_EEPROM) { EEPROM_write16(d); } else { - u8 *pm = (u8 *)(SRam.data - SRam.start + a); + u8 *pm = (u8 *)(Pico.sv.data - Pico.sv.start + a); if (pm[0] != (u8)(d >> 8)) { - SRam.changed = 1; + Pico.sv.changed = 1; pm[0] = (u8)(d >> 8); } if (pm[1] != (u8)d) { - SRam.changed = 1; + Pico.sv.changed = 1; pm[1] = (u8)d; } } @@ -515,7 +515,7 @@ static u32 PicoRead8_z80(u32 a) } if ((a & 0x4000) == 0x0000) - d = Pico.zram[a & 0x1fff]; + d = PicoMem.zram[a & 0x1fff]; else if ((a & 0x6000) == 0x4000) // 0x4000-0x5fff d = ym2612_read_local_68k(); else @@ -538,7 +538,7 @@ static void PicoWrite8_z80(u32 a, u32 d) } if ((a & 0x4000) == 0x0000) { // z80 RAM - Pico.zram[a & 0x1fff] = (u8)d; + PicoMem.zram[a & 0x1fff] = (u8)d; return; } if ((a & 0x6000) == 0x4000) { // FM Sound @@ -765,7 +765,7 @@ static void m68k_mem_setup(void); PICO_INTERNAL void PicoMemSetup(void) { - int mask, rs, a; + int mask, rs, sstart, a; // setup the memory map cpu68k_map_set(m68k_read8_map, 0x000000, 0xffffff, m68k_unmapped_read8, 1); @@ -781,15 +781,16 @@ PICO_INTERNAL void PicoMemSetup(void) cpu68k_map_set(m68k_read16_map, 0x000000, rs - 1, Pico.rom, 0); // Common case of on-cart (save) RAM, usually at 0x200000-... - if ((SRam.flags & SRF_ENABLED) && SRam.data != NULL) { - rs = SRam.end - SRam.start; + if ((Pico.sv.flags & SRF_ENABLED) && Pico.sv.data != NULL) { + sstart = Pico.sv.start; + rs = Pico.sv.end - sstart; rs = (rs + mask) & ~mask; - if (SRam.start + rs >= 0x1000000) - rs = 0x1000000 - SRam.start; - cpu68k_map_set(m68k_read8_map, SRam.start, SRam.start + rs - 1, PicoRead8_sram, 1); - cpu68k_map_set(m68k_read16_map, SRam.start, SRam.start + rs - 1, PicoRead16_sram, 1); - cpu68k_map_set(m68k_write8_map, SRam.start, SRam.start + rs - 1, PicoWrite8_sram, 1); - cpu68k_map_set(m68k_write16_map, SRam.start, SRam.start + rs - 1, PicoWrite16_sram, 1); + if (sstart + rs >= 0x1000000) + rs = 0x1000000 - sstart; + cpu68k_map_set(m68k_read8_map, sstart, sstart + rs - 1, PicoRead8_sram, 1); + cpu68k_map_set(m68k_read16_map, sstart, sstart + rs - 1, PicoRead16_sram, 1); + cpu68k_map_set(m68k_write8_map, sstart, sstart + rs - 1, PicoWrite8_sram, 1); + cpu68k_map_set(m68k_write16_map, sstart, sstart + rs - 1, PicoWrite16_sram, 1); } // Z80 region @@ -816,10 +817,10 @@ PICO_INTERNAL void PicoMemSetup(void) // RAM and it's mirrors for (a = 0xe00000; a < 0x1000000; a += 0x010000) { - cpu68k_map_set(m68k_read8_map, a, a + 0xffff, Pico.ram, 0); - cpu68k_map_set(m68k_read16_map, a, a + 0xffff, Pico.ram, 0); - cpu68k_map_set(m68k_write8_map, a, a + 0xffff, Pico.ram, 0); - cpu68k_map_set(m68k_write16_map, a, a + 0xffff, Pico.ram, 0); + cpu68k_map_set(m68k_read8_map, a, a + 0xffff, PicoMem.ram, 0); + cpu68k_map_set(m68k_read16_map, a, a + 0xffff, PicoMem.ram, 0); + cpu68k_map_set(m68k_write8_map, a, a + 0xffff, PicoMem.ram, 0); + cpu68k_map_set(m68k_write16_map, a, a + 0xffff, PicoMem.ram, 0); } // Setup memory callbacks: @@ -896,10 +897,10 @@ static int get_scanline(int is_from_z80) { if (is_from_z80) { int mclk_z80 = z80_cyclesDone() * 15; - int mclk_line = timing.z80_scanline * 488 * 7; + int mclk_line = Pico.t.z80_scanline * 488 * 7; while (mclk_z80 - mclk_line >= 488 * 7) - timing.z80_scanline++, mclk_line += 488 * 7; - return timing.z80_scanline; + Pico.t.z80_scanline++, mclk_line += 488 * 7; + return Pico.t.z80_scanline; } return Pico.m.scanline; @@ -1282,14 +1283,14 @@ static void z80_md_out(unsigned short p, unsigned char d) static void z80_mem_setup(void) { - z80_map_set(z80_read_map, 0x0000, 0x1fff, Pico.zram, 0); - z80_map_set(z80_read_map, 0x2000, 0x3fff, Pico.zram, 0); + z80_map_set(z80_read_map, 0x0000, 0x1fff, PicoMem.zram, 0); + z80_map_set(z80_read_map, 0x2000, 0x3fff, PicoMem.zram, 0); z80_map_set(z80_read_map, 0x4000, 0x5fff, ym2612_read_local_z80, 1); z80_map_set(z80_read_map, 0x6000, 0x7fff, z80_md_vdp_read, 1); z80_map_set(z80_read_map, 0x8000, 0xffff, z80_md_bank_read, 1); - z80_map_set(z80_write_map, 0x0000, 0x1fff, Pico.zram, 0); - z80_map_set(z80_write_map, 0x2000, 0x3fff, Pico.zram, 0); + z80_map_set(z80_write_map, 0x0000, 0x1fff, PicoMem.zram, 0); + z80_map_set(z80_write_map, 0x2000, 0x3fff, PicoMem.zram, 0); z80_map_set(z80_write_map, 0x4000, 0x5fff, z80_md_ym2612_write, 1); z80_map_set(z80_write_map, 0x6000, 0x7fff, z80_md_vdp_br_write, 1); z80_map_set(z80_write_map, 0x8000, 0xffff, z80_md_bank_write, 1); @@ -1299,8 +1300,8 @@ static void z80_mem_setup(void) drZ80.z80_out = z80_md_out; #endif #ifdef _USE_CZ80 - Cz80_Set_Fetch(&CZ80, 0x0000, 0x1fff, (FPTR)Pico.zram); // main RAM - Cz80_Set_Fetch(&CZ80, 0x2000, 0x3fff, (FPTR)Pico.zram); // mirror + Cz80_Set_Fetch(&CZ80, 0x0000, 0x1fff, (FPTR)PicoMem.zram); // main RAM + Cz80_Set_Fetch(&CZ80, 0x2000, 0x3fff, (FPTR)PicoMem.zram); // mirror Cz80_Set_INPort(&CZ80, z80_md_in); Cz80_Set_OUTPort(&CZ80, z80_md_out); #endif diff --git a/pico/memory_amips.s b/pico/memory_amips.S similarity index 93% rename from pico/memory_amips.s rename to pico/memory_amips.S index b11c2e28..7ae25922 100644 --- a/pico/memory_amips.s +++ b/pico/memory_amips.S @@ -8,6 +8,8 @@ # OUT OF DATE +#include "pico_int_o32.h" + .set noreorder .set noat @@ -184,8 +186,8 @@ m_read32_table: PicoMemReset: - lui $v1, %hi(Pico+0x22204) - lw $v1, %lo(Pico+0x22204)($v1) # romsize + lui $v1, %hi(Pico+OFS_Pico_romsize) + lw $v1, %lo(Pico+OFS_Pico_romsize)($v1) # romsize lui $t0, 8 addu $v1, $t0 addiu $v1, -1 @@ -235,12 +237,11 @@ m_read_neg1: jr $ra addiu $v0, $0, 0xffff -# loads &Pico.rom to $t3 +# loads &Pico to $t3 .macro m_read_rom_try_sram is200000 size - lui $t2, %hi(SRam) - addiu $t2, %lo(SRam) - lui $t3, %hi(Pico+0x22200) - lw $t1, 8($t2) # SRam.end + lui $t2, %hi(Pico) + addiu $t2, %lo(Pico) + lw $t1, OFS_Pico_sv_end($t2) .if \is200000 ins $a0, $0, 19, 13 lui $t4, 0x20 @@ -248,12 +249,11 @@ m_read_neg1: .endif subu $t4, $a0, $t1 bgtz $t4, 1f - addiu $t3, %lo(Pico+0x22200) - lw $t1, 4($t2) # SRam.start + lw $t1, OFS_Pico_sv_start($t2) subu $t4, $t1, $a0 bgtz $t4, 1f nop - lb $t1, 0x11($t3) # Pico.m.sram_reg + lb $t1, OFS_Pico_m_sram_reg($t2) andi $t4, $t1, 5 beqz $t4, 1f nop @@ -288,8 +288,8 @@ m_read_neg1: .endm .macro m_read8_rom sect - lui $t0, %hi(Pico+0x22200) - lw $t0, %lo(Pico+0x22200)($t0) # rom + lui $t0, %hi(Pico+OFS_Pico_rom) + lw $t0, %lo(Pico+OFS_Pico_rom)($t0) # rom xori $a0, 1 ins $a0, $0, 19, 13 .if \sect @@ -388,15 +388,15 @@ m_read8_misc_io: nop m_read8_misc_hwreg: - lui $v0, %hi(Pico+0x2220f) + lui $v0, %hi(Pico+OFS_Pico_m_hardware) jr $ra - lb $v0, %lo(Pico+0x2220f)($v0) + lb $v0, %lo(Pico+OFS_Pico_m_hardware)($v0) m_read8_misc_ioports: - lui $v0, %hi(Pico+0x22000) + lui $v0, %hi(PicoMem+0x22000) ins $v0, $t0, 0, 5 jr $ra - lb $v0, %lo(Pico+0x22000)($v0) + lb $v0, %lo(PicoMem+0x22000)($v0) m_read8_misc2: lui $t0, 0xa1 @@ -423,10 +423,10 @@ m_read8_z80_misc: nop m_read8_fake_ym2612: - lb $v0, %lo(Pico+0x22208)($t0) # Pico.m.rotate + lb $v0, %lo(Pico+OFS_Pico_m_rotate)($t0) addiu $t1, $v0, 1 jr $ra - sb $t1, %lo(Pico+0x22208)($t0) + sb $t1, %lo(Pico+OFS_Pico_m_rotate)($t0) # delay slot friendly .macro m_read8_call16 funcname is_func_ptr=0 @@ -472,11 +472,11 @@ m_read8_vdp: nop m_read8_ram: - lui $t0, %hi(Pico) + lui $t0, %hi(PicoMem) ins $t0, $a0, 0, 16 xori $t0, 1 jr $ra - lb $v0, %lo(Pico)($t0) + lb $v0, %lo(PicoMem)($t0) m_read8_above_rom: # might still be SRam (Micro Machines, HardBall '95) @@ -486,8 +486,8 @@ m_read8_above_rom: # ############################################################################# .macro m_read16_rom sect - lui $t0, %hi(Pico+0x22200) - lw $t0, %lo(Pico+0x22200)($t0) # rom + lui $t0, %hi(Pico+OFS_Pico_rom) + lw $t0, %lo(Pico+OFS_Pico_rom)($t0) # rom ins $a0, $0, 0, 1 ins $a0, $0, 19, 13 .if \sect @@ -583,11 +583,11 @@ m_read16_vdp: nop m_read16_ram: - lui $t0, %hi(Pico) + lui $t0, %hi(PicoMem) ins $a0, $0, 0, 1 ins $t0, $a0, 0, 16 jr $ra - lh $v0, %lo(Pico)($t0) + lh $v0, %lo(PicoMem)($t0) m_read16_above_rom: # might still be SRam @@ -600,8 +600,8 @@ m_read16_above_rom: # ############################################################################# .macro m_read32_rom sect - lui $t0, %hi(Pico+0x22200) - lw $t0, %lo(Pico+0x22200)($t0) # rom + lui $t0, %hi(Pico+OFS_Pico_rom) + lw $t0, %lo(Pico+OFS_Pico_rom)($t0) # rom ins $a0, $0, 0, 1 ins $a0, $0, 19, 13 .if \sect @@ -723,11 +723,11 @@ m_read32_vdp: m_read32_call16 PicoVideoRead m_read32_ram: - lui $t0, %hi(Pico) + lui $t0, %hi(PicoMem) ins $a0, $0, 0, 1 ins $t0, $a0, 0, 16 - lh $v1, %lo(Pico)($t0) - lh $v0, %lo(Pico+2)($t0) + lh $v1, %lo(PicoMem)($t0) + lh $v0, %lo(PicoMem+2)($t0) jr $ra ins $v0, $v1, 16, 16 @@ -771,11 +771,11 @@ PicoWriteRomHW_SSF2: # u32 a, u32 d bnez $a0, pwr_banking # sram register - lui $t0, %hi(Pico+0x22211) - lb $t1, %lo(Pico+0x22211)($t0) # Pico.m.sram_reg + lui $t0, %hi(Pico+OFS_Pico_m_sram_reg) + lb $t1, %lo(Pico+OFS_Pico_m_sram_reg)($t0) # Pico.m.sram_reg ins $t1, $a1, 0, 2 jr $ra - sb $t1, %lo(Pico+0x22211)($t0) + sb $t1, %lo(Pico+OFS_Pico_m_sram_reg)($t0) pwr_banking: andi $a1, 0x1f diff --git a/pico/memory_arm.s b/pico/memory_arm.S similarity index 73% rename from pico/memory_arm.s rename to pico/memory_arm.S index f6d7f79d..87846d6c 100644 --- a/pico/memory_arm.s +++ b/pico/memory_arm.S @@ -6,6 +6,8 @@ * See COPYING file in the top-level directory. */ +#include "pico_int_o32.h" + .equ SRR_MAPPED, (1 << 0) .equ SRR_READONLY, (1 << 1) .equ SRF_EEPROM, (1 << 1) @@ -21,35 +23,32 @@ .global PicoWrite8_io .global PicoWrite16_io -PicoRead8_sram: @ u32 a, u32 d - ldr r2, =(SRam) - ldr r3, =(Pico+0x22200) - ldr r1, [r2, #8] @ SRam.end +PicoRead8_sram: @ u32 a + ldr r3, =Pico + ldr r1, [r3, #OFS_Pico_sv_end] cmp r0, r1 bgt m_read8_nosram - ldr r1, [r2, #4] @ SRam.start - cmp r0, r1 + ldr r2, [r3, #OFS_Pico_sv_start] + cmp r0, r2 blt m_read8_nosram - ldrb r1, [r3, #0x11] @ Pico.m.sram_reg + ldrb r1, [r3, #OFS_Pico_m_sram_reg] tst r1, #SRR_MAPPED beq m_read8_nosram - ldr r1, [r2, #0x0c] + ldr r1, [r3, #OFS_Pico_sv_flags] tst r1, #SRF_EEPROM bne m_read8_eeprom - ldr r1, [r2, #4] @ SRam.start - ldr r2, [r2] @ SRam.data - sub r0, r0, r1 - add r0, r0, r2 - ldrb r0, [r0] + ldr r1, [r3, #OFS_Pico_sv_data] + sub r0, r0, r2 + ldrb r0, [r0, r1] bx lr m_read8_nosram: - ldr r1, [r3, #4] @ romsize + ldr r1, [r3, #OFS_Pico_romsize] cmp r0, r1 movgt r0, #0 bxgt lr @ bad location @ XXX: banking unfriendly - ldr r1, [r3] + ldr r1, [r3, #OFS_Pico_rom] eor r0, r0, #1 ldrb r0, [r1, r0] bx lr @@ -63,7 +62,7 @@ m_read8_eeprom: bx lr -PicoRead8_io: @ u32 a, u32 d +PicoRead8_io: @ u32 a bic r2, r0, #0x001f @ most commonly we get i/o port read, cmp r2, #0xa10000 @ so check for it first beq io_ports_read @@ -73,11 +72,11 @@ m_read8_not_io: cmp r2, #0x1000 bne m_read8_not_brq - ldr r3, =(Pico+0x22200) + ldr r3, =Pico mov r1, r0 - ldr r0, [r3, #8] @ Pico.m.rotate + ldr r0, [r3, #OFS_Pico_m_rotate] add r0, r0, #1 - strb r0, [r3, #8] + strb r0, [r3, #OFS_Pico_m_rotate] eor r0, r0, r0, lsl #6 tst r1, #1 @@ -87,8 +86,8 @@ m_read8_not_io: cmp r2, #0x1100 bxne lr @ not busreq - ldrb r1, [r3, #(8+0x01)] @ Pico.m.z80Run - ldrb r2, [r3, #(8+0x0f)] @ Pico.m.z80_reset + ldrb r1, [r3, #OFS_Pico_m_z80Run] + ldrb r2, [r3, #OFS_Pico_m_z80_reset] orr r0, r0, r1 orr r0, r0, r2 bx lr @@ -104,36 +103,33 @@ m_read8_not_brq: @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ PicoRead16_sram: @ u32 a, u32 d - ldr r2, =(SRam) - ldr r3, =(Pico+0x22200) - ldr r1, [r2, #8] @ SRam.end + ldr r3, =Pico + ldr r1, [r3, #OFS_Pico_sv_end] cmp r0, r1 bgt m_read16_nosram - ldr r1, [r2, #4] @ SRam.start - cmp r0, r1 + ldr r2, [r3, #OFS_Pico_sv_start] + cmp r0, r2 blt m_read16_nosram - ldrb r1, [r3, #0x11] @ Pico.m.sram_reg + ldrb r1, [r3, #OFS_Pico_m_sram_reg] tst r1, #SRR_MAPPED beq m_read16_nosram - ldr r1, [r2, #0x0c] + ldr r1, [r3, #OFS_Pico_sv_flags] tst r1, #SRF_EEPROM bne EEPROM_read - ldr r1, [r2, #4] @ SRam.start - ldr r2, [r2] @ SRam.data - sub r0, r0, r1 - add r0, r0, r2 - ldrb r1, [r0], #1 - ldrb r0, [r0] + ldr r1, [r3, #OFS_Pico_sv_data] + sub r0, r0, r2 + ldrb r1, [r0, r1]! + ldrb r0, [r0, #1] orr r0, r0, r1, lsl #8 bx lr m_read16_nosram: - ldr r1, [r3, #4] @ romsize + ldr r1, [r3, #OFS_Pico_romsize] cmp r0, r1 movgt r0, #0 bxgt lr @ bad location @ XXX: banking unfriendly - ldr r1, [r3] + ldr r1, [r3, #OFS_Pico_rom] ldrh r0, [r1, r0] bx lr @@ -152,19 +148,19 @@ m_read16_not_io: cmp r2, #0x1000 bne m_read16_not_brq - ldr r3, =(Pico+0x22200) + ldr r3, =Pico and r2, r0, #0xff00 - ldr r0, [r3, #8] @ Pico.m.rotate + ldr r0, [r3, #OFS_Pico_m_rotate] add r0, r0, #1 - strb r0, [r3, #8] + strb r0, [r3, #OFS_Pico_m_rotate] eor r0, r0, r0, lsl #5 eor r0, r0, r0, lsl #8 bic r0, r0, #0x100 @ bit8 defined in this area cmp r2, #0x1100 bxne lr @ not busreq - ldrb r1, [r3, #(8+0x01)] @ Pico.m.z80Run - ldrb r2, [r3, #(8+0x0f)] @ Pico.m.z80_reset + ldrb r1, [r3, #OFS_Pico_m_z80Run] + ldrb r2, [r3, #OFS_Pico_m_z80_reset] orr r0, r0, r1, lsl #8 orr r0, r0, r2, lsl #8 bx lr @@ -202,12 +198,12 @@ m_write8_not_z80ctl: eor r2, r2, #0x003000 eors r2, r2, #0x0000f1 bne m_write8_not_sreg - ldr r3, =(Pico+0x22200) - ldrb r2, [r3, #(8+9)] @ Pico.m.sram_reg + ldr r3, =Pico + ldrb r2, [r3, #OFS_Pico_m_sram_reg] and r1, r1, #(SRR_MAPPED|SRR_READONLY) bic r2, r2, #(SRR_MAPPED|SRR_READONLY) orr r2, r2, r1 - strb r2, [r3, #(8+9)] + strb r2, [r3, #OFS_Pico_m_sram_reg] bx lr m_write8_not_sreg: @@ -239,12 +235,12 @@ m_write16_not_z80ctl: eor r2, r2, #0x003000 eors r2, r2, #0x0000f0 bne m_write16_not_sreg - ldr r3, =(Pico+0x22200) - ldrb r2, [r3, #(8+9)] @ Pico.m.sram_reg + ldr r3, =Pico + ldrb r2, [r3, #OFS_Pico_m_sram_reg] and r1, r1, #(SRR_MAPPED|SRR_READONLY) bic r2, r2, #(SRR_MAPPED|SRR_READONLY) orr r2, r2, r1 - strb r2, [r3, #(8+9)] + strb r2, [r3, #OFS_Pico_m_sram_reg] bx lr m_write16_not_sreg: diff --git a/pico/mode4.c b/pico/mode4.c index bda11888..55e6d104 100644 --- a/pico/mode4.c +++ b/pico/mode4.c @@ -31,7 +31,7 @@ static int TileNormM4(int sx, int addr, int pal) unsigned char *pd = Pico.est.HighCol + sx; unsigned int pack, t; - pack = *(unsigned int *)(Pico.vram + addr); /* Get 4 bitplanes / 8 pixels */ + pack = *(unsigned int *)(PicoMem.vram + addr); /* Get 4 bitplanes / 8 pixels */ if (pack) { PLANAR_PIXEL(0, 0) @@ -53,7 +53,7 @@ static int TileFlipM4(int sx,int addr,int pal) unsigned char *pd = Pico.est.HighCol + sx; unsigned int pack, t; - pack = *(unsigned int *)(Pico.vram + addr); /* Get 4 bitplanes / 8 pixels */ + pack = *(unsigned int *)(PicoMem.vram + addr); /* Get 4 bitplanes / 8 pixels */ if (pack) { PLANAR_PIXEL(0, 7) @@ -83,7 +83,7 @@ static void draw_sprites(int scanline) if (pv->reg[0] & 8) xoff = 0; - sat = (unsigned char *)Pico.vram + ((pv->reg[5] & 0x7e) << 7); + sat = (unsigned char *)PicoMem.vram + ((pv->reg[5] & 0x7e) << 7); if (pv->reg[1] & 2) { addr_mask = 0xfe; h = 16; } else { @@ -161,7 +161,7 @@ static void DrawDisplayM4(int scanline) line -= 224; // Find name table: - nametab = Pico.vram; + nametab = PicoMem.vram; nametab += (pv->reg[2] & 0x0e) << (10-1); nametab += (line>>3) << (6-1); @@ -248,7 +248,7 @@ void PicoLineMode4(int line) void PicoDoHighPal555M4(void) { - unsigned int *spal=(void *)Pico.cram; + unsigned int *spal=(void *)PicoMem.cram; unsigned int *dpal=(void *)Pico.est.HighPal; unsigned int t; int i; diff --git a/pico/pico.c b/pico/pico.c index 0404367c..bcd8aa1b 100644 --- a/pico/pico.c +++ b/pico/pico.c @@ -11,6 +11,7 @@ #include "sound/ym2612.h" struct Pico Pico; +struct PicoMem PicoMem; int PicoOpt; int PicoSkipFrame; // skip rendering frame? int PicoPad[2]; // Joypads, format is MXYZ SACB RLDU @@ -20,11 +21,8 @@ int PicoQuirks; // game-specific quirks int PicoRegionOverride; // override the region detection 0: Auto, 1: Japan NTSC, 2: Japan PAL, 4: US, 8: Europe int PicoAutoRgnOrder; -struct PicoSRAM SRam; int emustatus; // rapid_ym2612, multi_ym_updates -struct PicoTiming timing; - void (*PicoWriteSound)(int len) = NULL; // called at the best time to send sound buffer (PsndOut) to hardware void (*PicoResetHook)(void) = NULL; void (*PicoLineHook)(void) = NULL; @@ -34,11 +32,13 @@ void PicoInit(void) { // Blank space for state: memset(&Pico,0,sizeof(Pico)); + memset(&PicoMem,0,sizeof(PicoMem)); memset(&PicoPad,0,sizeof(PicoPad)); memset(&PicoPadInt,0,sizeof(PicoPadInt)); - Pico.est.Pico_video = &Pico.video; - Pico.est.Pico_vram = Pico.vram; + Pico.est.Pico = &Pico; + Pico.est.PicoMem_vram = PicoMem.vram; + Pico.est.PicoMem_cram = PicoMem.cram; Pico.est.PicoOpt = &PicoOpt; // Init CPUs: @@ -61,18 +61,18 @@ void PicoExit(void) PicoCartUnload(); z80_exit(); - if (SRam.data) - free(SRam.data); + if (Pico.sv.data) + free(Pico.sv.data); pevt_dump(); } void PicoPower(void) { Pico.m.frame_count = 0; - SekCycleCnt = SekCycleAim = 0; + Pico.t.m68c_cnt = Pico.t.m68c_aim = 0; // clear all memory of the emulated machine - memset(&Pico.ram,0,(unsigned char *)&Pico.rom - Pico.ram); + memset(&PicoMem,0,sizeof(PicoMem)); memset(&Pico.video,0,sizeof(Pico.video)); memset(&Pico.m,0,sizeof(Pico.m)); @@ -81,7 +81,7 @@ void PicoPower(void) z80_reset(); // my MD1 VA6 console has this in IO - Pico.ioports[1] = Pico.ioports[2] = Pico.ioports[3] = 0xff; + PicoMem.ioports[1] = PicoMem.ioports[2] = PicoMem.ioports[3] = 0xff; // default VDP register values (based on Fusion) Pico.video.reg[0] = Pico.video.reg[1] = 0x04; @@ -211,12 +211,12 @@ int PicoReset(void) // reset sram state; enable sram access by default if it doesn't overlap with ROM Pico.m.sram_reg = 0; - if ((SRam.flags & SRF_EEPROM) || Pico.romsize <= SRam.start) + if ((Pico.sv.flags & SRF_EEPROM) || Pico.romsize <= Pico.sv.start) Pico.m.sram_reg |= SRR_MAPPED; - if (SRam.flags & SRF_ENABLED) - elprintf(EL_STATUS, "sram: %06x - %06x; eeprom: %i", SRam.start, SRam.end, - !!(SRam.flags & SRF_EEPROM)); + if (Pico.sv.flags & SRF_ENABLED) + elprintf(EL_STATUS, "sram: %06x - %06x; eeprom: %i", Pico.sv.start, Pico.sv.end, + !!(Pico.sv.flags & SRF_EEPROM)); return 0; } @@ -274,7 +274,7 @@ PICO_INTERNAL int CheckDMA(void) elprintf(EL_VDPDMA, "~Dma %i op=%i can=%i burn=%i [%u]", Pico.m.dma_xfers, dma_op1, xfers_can, burn, SekCyclesDone()); - //dprintf("~aim: %i, cnt: %i", SekCycleAim, SekCycleCnt); + //dprintf("~aim: %i, cnt: %i", Pico.t.m68c_aim, Pico.t.m68c_cnt); return burn; } @@ -286,18 +286,18 @@ PICO_INTERNAL void PicoSyncZ80(unsigned int m68k_cycles_done) int m68k_cnt; int cnt; - m68k_cnt = m68k_cycles_done - timing.m68c_frame_start; - timing.z80c_aim = cycles_68k_to_z80(m68k_cnt); - cnt = timing.z80c_aim - timing.z80c_cnt; + m68k_cnt = m68k_cycles_done - Pico.t.m68c_frame_start; + Pico.t.z80c_aim = cycles_68k_to_z80(m68k_cnt); + cnt = Pico.t.z80c_aim - Pico.t.z80c_cnt; pprof_start(z80); elprintf(EL_BUSREQ, "z80 sync %i (%u|%u -> %u|%u)", cnt, - timing.z80c_cnt, timing.z80c_cnt * 15 / 7 / 488, - timing.z80c_aim, timing.z80c_aim * 15 / 7 / 488); + Pico.t.z80c_cnt, Pico.t.z80c_cnt * 15 / 7 / 488, + Pico.t.z80c_aim, Pico.t.z80c_aim * 15 / 7 / 488); if (cnt > 0) - timing.z80c_cnt += z80_run(cnt); + Pico.t.z80c_cnt += z80_run(cnt); pprof_end(z80); } diff --git a/pico/pico_cmn.c b/pico/pico_cmn.c index 0907b930..a4507577 100644 --- a/pico/pico_cmn.c +++ b/pico/pico_cmn.c @@ -21,24 +21,24 @@ SekRunM68k(m68k_cycles) #endif -// sync m68k to SekCycleAim +// sync m68k to Pico.t.m68c_aim static void SekSyncM68k(void) { int cyc_do; pprof_start(m68k); pevt_log_m68k_o(EVT_RUN_START); - while ((cyc_do = SekCycleAim - SekCycleCnt) > 0) { - SekCycleCnt += cyc_do; + while ((cyc_do = Pico.t.m68c_aim - Pico.t.m68c_cnt) > 0) { + Pico.t.m68c_cnt += cyc_do; #if defined(EMU_C68K) PicoCpuCM68k.cycles = cyc_do; CycloneRun(&PicoCpuCM68k); - SekCycleCnt -= PicoCpuCM68k.cycles; + Pico.t.m68c_cnt -= PicoCpuCM68k.cycles; #elif defined(EMU_M68K) - SekCycleCnt += m68k_execute(cyc_do) - cyc_do; + Pico.t.m68c_cnt += m68k_execute(cyc_do) - cyc_do; #elif defined(EMU_F68K) - SekCycleCnt += fm68k_emulate(cyc_do, 0) - cyc_do; + Pico.t.m68c_cnt += fm68k_emulate(cyc_do, 0) - cyc_do; #endif } @@ -51,11 +51,11 @@ static void SekSyncM68k(void) static inline void SekRunM68k(int cyc) { - SekCycleAim += cyc; - cyc = SekCycleAim - SekCycleCnt; + Pico.t.m68c_aim += cyc; + cyc = Pico.t.m68c_aim - Pico.t.m68c_cnt; if (cyc <= 0) return; - SekCycleCnt += cyc >> 6; // refresh slowdowns + Pico.t.m68c_cnt += cyc >> 6; // refresh slowdowns SekSyncM68k(); } @@ -108,7 +108,7 @@ static int PicoFrameHints(void) } else skip=PicoSkipFrame; - timing.m68c_frame_start = SekCyclesDone(); + Pico.t.m68c_frame_start = SekCyclesDone(); pv->v_counter = Pico.m.scanline = 0; z80_resetCycles(); PsndStartFrame(); @@ -170,7 +170,7 @@ static int PicoFrameHints(void) } // Run scanline: - line_base_cycles = SekCyclesDone(); + Pico.t.m68c_line_start = SekCyclesDone(); do_timing_hacks_as(pv, vdp_slots); CPUS_RUN(CYCLES_M68K_LINE); @@ -212,7 +212,7 @@ static int PicoFrameHints(void) // there must be a delay after vblank bit is set and irq is asserted (Mazin Saga) // also delay between F bit (bit 7) is set in SR and IRQ happens (Ex-Mutants) // also delay between last H-int and V-int (Golden Axe 3) - line_base_cycles = SekCyclesDone(); + Pico.t.m68c_line_start = SekCyclesDone(); do_timing_hacks_vb(); CPUS_RUN(CYCLES_M68K_VINT_LAG); @@ -276,7 +276,7 @@ static int PicoFrameHints(void) } // Run scanline: - line_base_cycles = SekCyclesDone(); + Pico.t.m68c_line_start = SekCyclesDone(); do_timing_hacks_vb(); CPUS_RUN(CYCLES_M68K_LINE); @@ -300,7 +300,7 @@ static int PicoFrameHints(void) } // Run scanline: - line_base_cycles = SekCyclesDone(); + Pico.t.m68c_line_start = SekCyclesDone(); do_timing_hacks_as(pv, vdp_slots); CPUS_RUN(CYCLES_M68K_LINE); diff --git a/pico/pico_int.h b/pico/pico_int.h index 89cccc79..765afaff 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -128,26 +128,18 @@ extern m68ki_cpu_core PicoCpuMM68k, PicoCpuMS68k; #endif #endif // EMU_M68K -// while running, cnt represents target of current timeslice -// while not in SekRun(), it's actual cycles done -// (but always use SekCyclesDone() if you need current position) -// cnt may change if timeslice is ended prematurely or extended, -// so we use SekCycleAim for the actual target -extern unsigned int SekCycleCnt; -extern unsigned int SekCycleAim; - // number of cycles done (can be checked anywhere) -#define SekCyclesDone() (SekCycleCnt - SekCyclesLeft) +#define SekCyclesDone() (Pico.t.m68c_cnt - SekCyclesLeft) // burn cycles while not in SekRun() and while in -#define SekCyclesBurn(c) SekCycleCnt += c +#define SekCyclesBurn(c) Pico.t.m68c_cnt += c #define SekCyclesBurnRun(c) { \ SekCyclesLeft -= c; \ } // note: sometimes may extend timeslice to delay an irq #define SekEndRun(after) { \ - SekCycleCnt -= SekCyclesLeft - (after); \ + Pico.t.m68c_cnt -= SekCyclesLeft - (after); \ SekCyclesLeft = after; \ } @@ -212,10 +204,10 @@ extern struct DrZ80 drZ80; #define Z80_STATE_SIZE 0x60 #define z80_resetCycles() \ - timing.z80c_cnt = timing.z80c_aim = timing.z80_scanline = 0 + Pico.t.z80c_cnt = Pico.t.z80c_aim = Pico.t.z80_scanline = 0 #define z80_cyclesDone() \ - (timing.z80c_aim - z80_cyclesLeft) + (Pico.t.z80c_aim - z80_cyclesLeft) #define cycles_68k_to_z80(x) ((x) * 3823 >> 13) @@ -359,15 +351,16 @@ struct PicoEState void *DrawLineDest; // draw destination unsigned char *HighCol; int *HighPreSpr; - void *Pico_video; - void *Pico_vram; + struct Pico *Pico; + void *PicoMem_vram; + void *PicoMem_cram; int *PicoOpt; unsigned char *Draw2FB; unsigned short HighPal[0x100]; }; -// some assembly stuff depend on these, do not touch! -struct Pico +// some assembly stuff still depends on these, do not touch! +struct PicoMem { unsigned char ram[0x10000]; // 0x00000 scratch ram union { // vram is byteswapped for easier reads when drawing @@ -379,14 +372,6 @@ struct Pico unsigned char pad[0xf0]; // unused unsigned short cram[0x40]; // 0x22100 unsigned short vsram[0x40]; // 0x22180 - - unsigned char *rom; // 0x22200 - unsigned int romsize; // 0x22204 (on 32bits) - - struct PicoMisc m; - struct PicoVideo video; - struct PicoMS ms; - struct PicoEState est; }; // sram @@ -396,7 +381,7 @@ struct Pico #define SRF_ENABLED (1 << 0) #define SRF_EEPROM (1 << 1) -struct PicoSRAM +struct PicoCartSave { unsigned char *data; // actual data unsigned int start; // start address in 68k address space @@ -412,6 +397,38 @@ struct PicoSRAM unsigned int size; }; +struct PicoTiming +{ + // while running, cnt represents target of current timeslice + // while not in SekRun(), it's actual cycles done + // (but always use SekCyclesDone() if you need current position) + // _cnt may change if timeslice is ended prematurely or extended, + // so we use _aim for the actual target + unsigned int m68c_cnt; + unsigned int m68c_aim; + unsigned int m68c_frame_start; // m68k cycles + unsigned int m68c_line_start; + + unsigned int z80c_cnt; // z80 cycles done (this frame) + unsigned int z80c_aim; + int z80_scanline; +}; + +// run tools/mkoffsets pico/pico_int_o32.h if you change these +// careful with savestate compat +struct Pico +{ + struct PicoVideo video; + struct PicoMisc m; + struct PicoTiming t; + struct PicoCartSave sv; + struct PicoEState est; + struct PicoMS ms; + + unsigned char *rom; + unsigned int romsize; +}; + // MCD #define PCM_MIXBUF_LEN ((12500000 / 384) / 50 + 1) @@ -592,15 +609,6 @@ struct Pico32xMem unsigned short pwm_fifo[2][4]; // [0] - current raw, others - fifo entries }; -struct PicoTiming -{ - unsigned int m68c_frame_start; // m68k cycles - unsigned int z80c_cnt; // z80 cycles done (this frame) - unsigned int z80c_aim; - int z80_scanline; -}; -extern struct PicoTiming timing; - // area.c extern void (*PicoLoadStateHook)(void); @@ -699,7 +707,7 @@ void pcd_state_loaded_mem(void); // pico.c extern struct Pico Pico; -extern struct PicoSRAM SRam; +extern struct PicoMem PicoMem; extern int PicoPadInt[2]; extern int emustatus; extern void (*PicoResetHook)(void); @@ -809,7 +817,6 @@ void ym2612_unpack_state(void); // videoport.c -extern int line_base_cycles; PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d); PICO_INTERNAL_ASM unsigned int PicoVideoRead(unsigned int a); unsigned char PicoVideoRead8DataH(void); @@ -898,7 +905,7 @@ void p32x_event_schedule_sh2(SH2 *sh2, enum p32x_event event, int after); void p32x_schedule_hint(SH2 *sh2, int m68k_cycles); // 32x/memory.c -struct Pico32xMem *Pico32xMem; +extern struct Pico32xMem *Pico32xMem; unsigned int PicoRead8_32x(unsigned int a); unsigned int PicoRead16_32x(unsigned int a); void PicoWrite8_32x(unsigned int a, unsigned int d); diff --git a/pico/pico_int_o32.h b/pico/pico_int_o32.h index dc7aaa2a..720a5ee4 100644 --- a/pico/pico_int_o32.h +++ b/pico/pico_int_o32.h @@ -1,11 +1,26 @@ -/* autogenerated by ./tools/mkoffsets, do not edit */ -#define OFS_DrawScanline 0x00 -#define OFS_rendstatus 0x04 -#define OFS_DrawLineDest 0x08 -#define OFS_HighCol 0x0c -#define OFS_HighPreSpr 0x10 -#define OFS_Pico_video 0x14 -#define OFS_Pico_vram 0x18 -#define OFS_PicoOpt 0x1c -#define OFS_Draw2FB 0x20 -#define OFS_HighPal 0x24 +/* autogenerated by tools/mkoffsets, do not edit */ +#define OFS_Pico_video_reg 0x0000 +#define OFS_Pico_m_rotate 0x0040 +#define OFS_Pico_m_z80Run 0x0041 +#define OFS_Pico_m_dirtyPal 0x0046 +#define OFS_Pico_m_hardware 0x0047 +#define OFS_Pico_m_z80_reset 0x004f +#define OFS_Pico_m_sram_reg 0x0049 +#define OFS_Pico_sv 0x007c +#define OFS_Pico_sv_data 0x007c +#define OFS_Pico_sv_start 0x0080 +#define OFS_Pico_sv_end 0x0084 +#define OFS_Pico_sv_flags 0x0088 +#define OFS_Pico_rom 0x031c +#define OFS_Pico_romsize 0x0320 +#define OFS_EST_DrawScanline 0x00 +#define OFS_EST_rendstatus 0x04 +#define OFS_EST_DrawLineDest 0x08 +#define OFS_EST_HighCol 0x0c +#define OFS_EST_HighPreSpr 0x10 +#define OFS_EST_Pico 0x14 +#define OFS_EST_PicoMem_vram 0x18 +#define OFS_EST_PicoMem_cram 0x1c +#define OFS_EST_PicoOpt 0x20 +#define OFS_EST_Draw2FB 0x24 +#define OFS_EST_HighPal 0x28 diff --git a/pico/sek.c b/pico/sek.c index c91374bb..f599d9b1 100644 --- a/pico/sek.c +++ b/pico/sek.c @@ -10,11 +10,6 @@ #include "pico_int.h" #include "memory.h" - -unsigned int SekCycleCnt; -unsigned int SekCycleAim; - - /* context */ // Cyclone 68000 #ifdef EMU_C68K @@ -36,8 +31,8 @@ M68K_CONTEXT PicoCpuFM68k; static int SekIntAck(int level) { // try to emulate VDP's reaction to 68000 int ack - if (level == 4) { Pico.video.pending_ints = 0; elprintf(EL_INTS, "hack: @ %06x [%u]", SekPc, SekCycleCnt); } - else if(level == 6) { Pico.video.pending_ints &= ~0x20; elprintf(EL_INTS, "vack: @ %06x [%u]", SekPc, SekCycleCnt); } + if (level == 4) { Pico.video.pending_ints = 0; elprintf(EL_INTS, "hack: @ %06x [%u]", SekPc, Pico.t.m68c_cnt); } + else if(level == 6) { Pico.video.pending_ints &= ~0x20; elprintf(EL_INTS, "vack: @ %06x [%u]", SekPc, Pico.t.m68c_cnt); } PicoCpuCM68k.irq = 0; return CYCLONE_INT_ACK_AUTOVECTOR; } @@ -76,8 +71,8 @@ static int SekUnrecognizedOpcode() #ifdef EMU_M68K static int SekIntAckM68K(int level) { - if (level == 4) { Pico.video.pending_ints = 0; elprintf(EL_INTS, "hack: @ %06x [%u]", SekPc, SekCycleCnt); } - else if(level == 6) { Pico.video.pending_ints &= ~0x20; elprintf(EL_INTS, "vack: @ %06x [%u]", SekPc, SekCycleCnt); } + if (level == 4) { Pico.video.pending_ints = 0; elprintf(EL_INTS, "hack: @ %06x [%u]", SekPc, Pico.t.m68c_cnt); } + else if(level == 6) { Pico.video.pending_ints &= ~0x20; elprintf(EL_INTS, "vack: @ %06x [%u]", SekPc, Pico.t.m68c_cnt); } CPU_INT_LEVEL = 0; return M68K_INT_ACK_AUTOVECTOR; } @@ -168,17 +163,17 @@ PICO_INTERNAL int SekReset(void) void SekStepM68k(void) { - SekCycleAim=SekCycleCnt+1; + Pico.t.m68c_aim = Pico.t.m68c_cnt + 1; #if defined(EMU_CORE_DEBUG) - SekCycleCnt+=CM_compareRun(1, 0); + Pico.t.m68c_cnt += CM_compareRun(1, 0); #elif defined(EMU_C68K) PicoCpuCM68k.cycles=1; CycloneRun(&PicoCpuCM68k); - SekCycleCnt+=1-PicoCpuCM68k.cycles; + Pico.t.m68c_cnt += 1 - PicoCpuCM68k.cycles; #elif defined(EMU_M68K) - SekCycleCnt+=m68k_execute(1); + Pico.t.m68c_cnt += m68k_execute(1); #elif defined(EMU_F68K) - SekCycleCnt+=fm68k_emulate(1, 0); + Pico.t.m68c_cnt += fm68k_emulate(1, 0); #endif } @@ -228,7 +223,7 @@ PICO_INTERNAL void SekPackCpu(unsigned char *cpu, int is_sub) *(unsigned int *)(cpu+0x40) = pc; *(unsigned int *)(cpu+0x50) = - is_sub ? SekCycleCntS68k : SekCycleCnt; + is_sub ? SekCycleCntS68k : Pico.t.m68c_cnt; } PICO_INTERNAL void SekUnpackCpu(const unsigned char *cpu, int is_sub) @@ -268,7 +263,7 @@ PICO_INTERNAL void SekUnpackCpu(const unsigned char *cpu, int is_sub) if (is_sub) SekCycleCntS68k = *(unsigned int *)(cpu+0x50); else - SekCycleCnt = *(unsigned int *)(cpu+0x50); + Pico.t.m68c_cnt = *(unsigned int *)(cpu+0x50); } @@ -467,7 +462,7 @@ void SekTrace(int is_s68k) struct ref_68k *x68k = &ref_68ks[is_s68k]; u32 pc = is_s68k ? SekPcS68k : SekPc; u32 sr = is_s68k ? SekSrS68k : SekSr; - u32 cycles = is_s68k ? SekCycleCntS68k : SekCycleCnt; + u32 cycles = is_s68k ? SekCycleCntS68k : Pico.t.m68c_cnt; u32 r; u8 cmd; #ifdef CPU_CMP_W diff --git a/pico/sms.c b/pico/sms.c index 7dc796ef..a2351b0f 100644 --- a/pico/sms.c +++ b/pico/sms.c @@ -23,7 +23,7 @@ static unsigned char vdp_data_read(void) struct PicoVideo *pv = &Pico.video; unsigned char d; - d = Pico.vramb[pv->addr]; + d = PicoMem.vramb[pv->addr]; pv->addr = (pv->addr + 1) & 0x3fff; pv->pending = 0; return d; @@ -44,10 +44,10 @@ static void vdp_data_write(unsigned char d) struct PicoVideo *pv = &Pico.video; if (pv->type == 3) { - Pico.cram[pv->addr & 0x1f] = d; + PicoMem.cram[pv->addr & 0x1f] = d; Pico.m.dirtyPal = 1; } else { - Pico.vramb[pv->addr] = d; + PicoMem.vramb[pv->addr] = d; } pv->addr = (pv->addr + 1) & 0x3fff; @@ -180,7 +180,7 @@ static void xwrite(unsigned int a, unsigned char d) { elprintf(EL_IO, "z80 write [%04x] %02x", a, d); if (a >= 0xc000) - Pico.zram[a & 0x1fff] = d; + PicoMem.zram[a & 0x1fff] = d; if (a >= 0xfff8) write_bank(a, d); } @@ -195,7 +195,7 @@ void PicoPowerMS(void) { int s, tmp; - memset(&Pico.ram,0,(unsigned char *)&Pico.rom - Pico.ram); + memset(&PicoMem,0,sizeof(PicoMem)); memset(&Pico.video,0,sizeof(Pico.video)); memset(&Pico.m,0,sizeof(Pico.m)); Pico.m.pal = 0; @@ -219,11 +219,11 @@ void PicoPowerMS(void) void PicoMemSetupMS(void) { z80_map_set(z80_read_map, 0x0000, 0xbfff, Pico.rom, 0); - z80_map_set(z80_read_map, 0xc000, 0xdfff, Pico.zram, 0); - z80_map_set(z80_read_map, 0xe000, 0xffff, Pico.zram, 0); + z80_map_set(z80_read_map, 0xc000, 0xdfff, PicoMem.zram, 0); + z80_map_set(z80_read_map, 0xe000, 0xffff, PicoMem.zram, 0); z80_map_set(z80_write_map, 0x0000, 0xbfff, xwrite, 1); - z80_map_set(z80_write_map, 0xc000, 0xdfff, Pico.zram, 0); + z80_map_set(z80_write_map, 0xc000, 0xdfff, PicoMem.zram, 0); z80_map_set(z80_write_map, 0xe000, 0xffff, xwrite, 1); #ifdef _USE_DRZ80 @@ -232,8 +232,8 @@ void PicoMemSetupMS(void) #endif #ifdef _USE_CZ80 Cz80_Set_Fetch(&CZ80, 0x0000, 0xbfff, (FPTR)Pico.rom); - Cz80_Set_Fetch(&CZ80, 0xc000, 0xdfff, (FPTR)Pico.zram); - Cz80_Set_Fetch(&CZ80, 0xe000, 0xffff, (FPTR)Pico.zram); + Cz80_Set_Fetch(&CZ80, 0xc000, 0xdfff, (FPTR)PicoMem.zram); + Cz80_Set_Fetch(&CZ80, 0xe000, 0xffff, (FPTR)PicoMem.zram); Cz80_Set_INPort(&CZ80, z80_sms_in); Cz80_Set_OUTPort(&CZ80, z80_sms_out); #endif diff --git a/pico/state.c b/pico/state.c index fce247a0..8a2f2aa0 100644 --- a/pico/state.c +++ b/pico/state.c @@ -230,9 +230,9 @@ static int state_save(void *file) memset(buff, 0, sizeof(buff)); SekPackCpu(buff, 0); CHECKED_WRITE_BUFF(CHUNK_M68K, buff); - CHECKED_WRITE_BUFF(CHUNK_RAM, Pico.ram); - CHECKED_WRITE_BUFF(CHUNK_VSRAM, Pico.vsram); - CHECKED_WRITE_BUFF(CHUNK_IOPORTS, Pico.ioports); + CHECKED_WRITE_BUFF(CHUNK_RAM, PicoMem.ram); + CHECKED_WRITE_BUFF(CHUNK_VSRAM, PicoMem.vsram); + CHECKED_WRITE_BUFF(CHUNK_IOPORTS, PicoMem.ioports); ym2612_pack_state(); CHECKED_WRITE(CHUNK_FM, 0x200+4, ym2612_regs); } @@ -240,9 +240,9 @@ static int state_save(void *file) CHECKED_WRITE_BUFF(CHUNK_SMS, Pico.ms); } - CHECKED_WRITE_BUFF(CHUNK_VRAM, Pico.vram); - CHECKED_WRITE_BUFF(CHUNK_ZRAM, Pico.zram); - CHECKED_WRITE_BUFF(CHUNK_CRAM, Pico.cram); + CHECKED_WRITE_BUFF(CHUNK_VRAM, PicoMem.vram); + CHECKED_WRITE_BUFF(CHUNK_ZRAM, PicoMem.zram); + CHECKED_WRITE_BUFF(CHUNK_CRAM, PicoMem.cram); CHECKED_WRITE_BUFF(CHUNK_MISC, Pico.m); CHECKED_WRITE_BUFF(CHUNK_VIDEO, Pico.video); @@ -421,14 +421,14 @@ static int state_load(void *file) CHECKED_READ_BUFF(buff_z80); break; - case CHUNK_RAM: CHECKED_READ_BUFF(Pico.ram); break; - case CHUNK_VRAM: CHECKED_READ_BUFF(Pico.vram); break; - case CHUNK_ZRAM: CHECKED_READ_BUFF(Pico.zram); break; - case CHUNK_CRAM: CHECKED_READ_BUFF(Pico.cram); break; - case CHUNK_VSRAM: CHECKED_READ_BUFF(Pico.vsram); break; + case CHUNK_RAM: CHECKED_READ_BUFF(PicoMem.ram); break; + case CHUNK_VRAM: CHECKED_READ_BUFF(PicoMem.vram); break; + case CHUNK_ZRAM: CHECKED_READ_BUFF(PicoMem.zram); break; + case CHUNK_CRAM: CHECKED_READ_BUFF(PicoMem.cram); break; + case CHUNK_VSRAM: CHECKED_READ_BUFF(PicoMem.vsram); break; case CHUNK_MISC: CHECKED_READ_BUFF(Pico.m); break; case CHUNK_VIDEO: CHECKED_READ_BUFF(Pico.video); break; - case CHUNK_IOPORTS: CHECKED_READ_BUFF(Pico.ioports); break; + case CHUNK_IOPORTS: CHECKED_READ_BUFF(PicoMem.ioports); break; case CHUNK_PSG: CHECKED_READ2(28*4, sn76496_regs); break; case CHUNK_FM: ym2612_regs = YM2612GetRegs(); @@ -553,7 +553,7 @@ readend: z80_unpack(buff_z80); // due to dep from 68k cycles.. - SekCycleAim = SekCycleCnt; + Pico.t.m68c_aim = Pico.t.m68c_cnt; if (PicoAHW & PAHW_32X) Pico32xStateLoaded(0); if (PicoAHW & PAHW_MCD) @@ -596,9 +596,9 @@ static int state_load_gfx(void *file) switch (buff[0]) { - case CHUNK_VRAM: CHECKED_READ_BUFF(Pico.vram); found++; break; - case CHUNK_CRAM: CHECKED_READ_BUFF(Pico.cram); found++; break; - case CHUNK_VSRAM: CHECKED_READ_BUFF(Pico.vsram); found++; break; + case CHUNK_VRAM: CHECKED_READ_BUFF(PicoMem.vram); found++; break; + case CHUNK_CRAM: CHECKED_READ_BUFF(PicoMem.cram); found++; break; + case CHUNK_VSRAM: CHECKED_READ_BUFF(PicoMem.vsram); found++; break; case CHUNK_VIDEO: CHECKED_READ_BUFF(Pico.video); found++; break; #ifndef NO_32X @@ -679,10 +679,10 @@ int PicoStateLoadGfx(const char *fname) if (ret != 0) { // assume legacy areaSeek(afile, 0x10020, SEEK_SET); // skip header and RAM - areaRead(Pico.vram, 1, sizeof(Pico.vram), afile); + areaRead(PicoMem.vram, 1, sizeof(PicoMem.vram), afile); areaSeek(afile, 0x2000, SEEK_CUR); - areaRead(Pico.cram, 1, sizeof(Pico.cram), afile); - areaRead(Pico.vsram, 1, sizeof(Pico.vsram), afile); + areaRead(PicoMem.cram, 1, sizeof(PicoMem.cram), afile); + areaRead(PicoMem.vsram, 1, sizeof(PicoMem.vsram), afile); areaSeek(afile, 0x221a0, SEEK_SET); areaRead(&Pico.video, 1, sizeof(Pico.video), afile); } @@ -715,9 +715,9 @@ void *PicoTmpStateSave(void) if (t == NULL) return NULL; - memcpy(t->vram, Pico.vram, sizeof(Pico.vram)); - memcpy(t->cram, Pico.cram, sizeof(Pico.cram)); - memcpy(t->vsram, Pico.vsram, sizeof(Pico.vsram)); + memcpy(t->vram, PicoMem.vram, sizeof(PicoMem.vram)); + memcpy(t->cram, PicoMem.cram, sizeof(PicoMem.cram)); + memcpy(t->vsram, PicoMem.vsram, sizeof(PicoMem.vsram)); memcpy(&t->video, &Pico.video, sizeof(Pico.video)); #ifndef NO_32X @@ -737,9 +737,9 @@ void PicoTmpStateRestore(void *data) if (t == NULL) return; - memcpy(Pico.vram, t->vram, sizeof(Pico.vram)); - memcpy(Pico.cram, t->cram, sizeof(Pico.cram)); - memcpy(Pico.vsram, t->vsram, sizeof(Pico.vsram)); + memcpy(PicoMem.vram, t->vram, sizeof(PicoMem.vram)); + memcpy(PicoMem.cram, t->cram, sizeof(PicoMem.cram)); + memcpy(PicoMem.vsram, t->vsram, sizeof(PicoMem.vsram)); memcpy(&Pico.video, &t->video, sizeof(Pico.video)); Pico.m.dirtyPal = 1; diff --git a/pico/videoport.c b/pico/videoport.c index 4da3ce3a..072a941b 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -11,7 +11,6 @@ #define NEED_DMA_SOURCE #include "memory.h" -int line_base_cycles; extern const unsigned char hcounts_32[]; extern const unsigned char hcounts_40[]; @@ -33,23 +32,24 @@ static NOINLINE void VideoWrite128(u32 a, u16 d) { // nasty a = ((a & 2) >> 1) | ((a & 0x400) >> 9) | (a & 0x3FC) | ((a & 0x1F800) >> 1); - ((u8 *)Pico.vram)[a] = d; + ((u8 *)PicoMem.vram)[a] = d; } static void VideoWrite(u16 d) { - unsigned int a=Pico.video.addr; + unsigned int a = Pico.video.addr; switch (Pico.video.type) { - case 1: if(a&1) d=(u16)((d<<8)|(d>>8)); // If address is odd, bytes are swapped (which game needs this?) - Pico.vram [(a>>1)&0x7fff]=d; + case 1: if (a & 1) + d = (u16)((d << 8) | (d >> 8)); + PicoMem.vram [(a >> 1) & 0x7fff] = d; if (a - ((unsigned)(Pico.video.reg[5]&0x7f) << 9) < 0x400) Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; break; case 3: Pico.m.dirtyPal = 1; - Pico.cram [(a>>1)&0x003f]=d; break; // wraps (Desert Strike) - case 5: Pico.vsram[(a>>1)&0x003f]=d; break; + PicoMem.cram [(a >> 1) & 0x3f] = d; break; + case 5: PicoMem.vsram[(a >> 1) & 0x3f] = d; break; case 0x81: a |= Pico.video.addr_u << 16; VideoWrite128(a, d); @@ -68,9 +68,9 @@ static unsigned int VideoRead(void) switch (Pico.video.type) { - case 0: d=Pico.vram [a&0x7fff]; break; - case 8: d=Pico.cram [a&0x003f]; break; - case 4: d=Pico.vsram[a&0x003f]; break; + case 0: d=PicoMem.vram [a & 0x7fff]; break; + case 8: d=PicoMem.cram [a & 0x003f]; break; + case 4: d=PicoMem.vsram[a & 0x003f]; break; default:elprintf(EL_ANOMALY, "VDP read with bad type %i", Pico.video.type); break; } @@ -106,7 +106,7 @@ static void DmaSlow(int len, unsigned int source) SekCyclesBurnRun(CheckDMA()); if ((source & 0xe00000) == 0xe00000) { // Ram - base = (u16 *)Pico.ram; + base = (u16 *)PicoMem.ram; mask = 0xffff; } else if (PicoAHW & PAHW_MCD) @@ -154,7 +154,7 @@ static void DmaSlow(int len, unsigned int source) switch (Pico.video.type) { case 1: // vram - r = Pico.vram; + r = PicoMem.vram; if (inc == 2 && !(a & 1) && a + len * 2 < 0x10000 && !(((source + len - 1) ^ source) & ~mask)) { @@ -178,7 +178,7 @@ static void DmaSlow(int len, unsigned int source) case 3: // cram Pico.m.dirtyPal = 1; - r = Pico.cram; + r = PicoMem.cram; for (; len; len--) { r[(a / 2) & 0x3f] = base[source++ & mask]; @@ -188,7 +188,7 @@ static void DmaSlow(int len, unsigned int source) break; case 5: // vsram - r = Pico.vsram; + r = PicoMem.vsram; for (; len; len--) { r[(a / 2) & 0x3f] = base[source++ & mask]; @@ -219,9 +219,9 @@ static void DmaSlow(int len, unsigned int source) static void DmaCopy(int len) { - u16 a=Pico.video.addr; - unsigned char *vr = (unsigned char *) Pico.vram; - unsigned char inc=Pico.video.reg[0xf]; + u16 a = Pico.video.addr; + u8 *vr = (u8 *)PicoMem.vram; + u8 inc = Pico.video.reg[0xf]; int source; elprintf(EL_VDPDMA, "DmaCopy len %i [%u]", len, SekCyclesDone()); @@ -246,10 +246,10 @@ static void DmaCopy(int len) static NOINLINE void DmaFill(int data) { - unsigned short a=Pico.video.addr; - unsigned char *vr=(unsigned char *) Pico.vram; - unsigned char high = (unsigned char) (data >> 8); - unsigned char inc=Pico.video.reg[0xf]; + u16 a = Pico.video.addr; + u8 *vr = (u8 *)PicoMem.vram; + u8 high = (u8)(data >> 8); + u8 inc = Pico.video.reg[0xf]; int source; int len, l; @@ -367,7 +367,7 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) // try avoiding the sync.. if (Pico.m.scanline < 224 && (pvid->reg[1]&0x40) && !(!pvid->pending && - ((pvid->command & 0xc00000f0) == 0x40000010 && Pico.vsram[pvid->addr>>1] == d)) + ((pvid->command & 0xc00000f0) == 0x40000010 && PicoMem.vsram[pvid->addr>>1] == d)) ) DrawSync(0); @@ -421,7 +421,7 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) return; } - if (num == 1 && !(d&0x40) && SekCyclesDone() - line_base_cycles <= 488-390) + if (num == 1 && !(d&0x40) && SekCyclesDone() - Pico.t.m68c_line_start <= 488-390) blank_on = 1; DrawSync(blank_on); pvid->reg[num]=(unsigned char)d; @@ -512,7 +512,7 @@ PICO_INTERNAL_ASM unsigned int PicoVideoRead(unsigned int a) unsigned int d; d=pv->status; //if (PicoOpt&POPT_ALT_RENDERER) d|=0x0020; // sprite collision (Shadow of the Beast) - if (SekCyclesDone() - line_base_cycles >= 488-88) + if (SekCyclesDone() - Pico.t.m68c_line_start >= 488-88) d|=0x0004; // H-Blank (Sonic3 vs) d |= ((pv->reg[1]&0x40)^0x40) >> 3; // set V-Blank if display is disabled @@ -544,7 +544,7 @@ PICO_INTERNAL_ASM unsigned int PicoVideoRead(unsigned int a) { unsigned int d; - d = (SekCyclesDone() - line_base_cycles) & 0x1ff; // FIXME + d = (SekCyclesDone() - Pico.t.m68c_line_start) & 0x1ff; // FIXME if (Pico.video.reg[12]&1) d = hcounts_40[d]; else d = hcounts_32[d]; @@ -588,7 +588,7 @@ unsigned char PicoVideoRead8CtlL(void) //if (PicoOpt&POPT_ALT_RENDERER) d|=0x0020; // sprite collision (Shadow of the Beast) d |= ((Pico.video.reg[1]&0x40)^0x40) >> 3; // set V-Blank if display is disabled d |= (Pico.video.pending_ints&0x20)<<2; // V-int pending? - if (SekCyclesDone() - line_base_cycles >= 488-88) d |= 4; // H-Blank + if (SekCyclesDone() - Pico.t.m68c_line_start >= 488-88) d |= 4; // H-Blank Pico.video.pending = 0; elprintf(EL_SR, "SR read (l): %02x @ %06x", d, SekPc); return d; @@ -603,7 +603,7 @@ unsigned char PicoVideoRead8HV_H(void) // FIXME: broken unsigned char PicoVideoRead8HV_L(void) { - u32 d = (SekCyclesDone() - line_base_cycles) & 0x1ff; // FIXME + u32 d = (SekCyclesDone() - Pico.t.m68c_line_start) & 0x1ff; // FIXME if (Pico.video.reg[12]&1) d = hcounts_40[d]; else d = hcounts_32[d]; diff --git a/platform/common/common.mak b/platform/common/common.mak index 32b7a649..cd92f54b 100644 --- a/platform/common/common.mak +++ b/platform/common/common.mak @@ -49,7 +49,7 @@ SRCS_COMMON += $(R)pico/draw_arm.S $(R)pico/draw2_arm.S endif ifeq "$(asm_memory)" "1" DEFINES += _ASM_MEMORY_C -SRCS_COMMON += $(R)pico/memory_arm.s +SRCS_COMMON += $(R)pico/memory_arm.S endif ifeq "$(asm_ym2612)" "1" DEFINES += _ASM_YM2612_C @@ -62,7 +62,7 @@ SRCS_COMMON += $(R)pico/cd/misc_arm.s endif ifeq "$(asm_cdmemory)" "1" DEFINES += _ASM_CD_MEMORY_C -SRCS_COMMON += $(R)pico/cd/memory_arm.s +SRCS_COMMON += $(R)pico/cd/memory_arm.S endif ifeq "$(asm_32xdraw)" "1" DEFINES += _ASM_32X_DRAW diff --git a/platform/common/emu.c b/platform/common/emu.c index 6a8c940b..9535bfd2 100644 --- a/platform/common/emu.c +++ b/platform/common/emu.c @@ -889,7 +889,7 @@ int emu_save_load_game(int load, int sram) { if (PicoOpt & POPT_EN_MCD_RAMCART) { sram_size = 0x12000; - sram_data = SRam.data; + sram_data = Pico.sv.data; if (sram_data) memcpy32((int *)sram_data, (int *)Pico_mcd->bram, 0x2000/4); } else { @@ -898,11 +898,11 @@ int emu_save_load_game(int load, int sram) truncate = 0; // the .brm may contain RAM cart data after normal brm } } else { - sram_size = SRam.size; - sram_data = SRam.data; + sram_size = Pico.sv.size; + sram_data = Pico.sv.data; } if (sram_data == NULL) - return 0; // SRam forcefully disabled for this game + return 0; // cart saves forcefully disabled for this game if (load) { @@ -1262,9 +1262,9 @@ void emu_init(void) void emu_finish(void) { // save SRAM - if ((currentConfig.EmuOpt & EOPT_EN_SRAM) && SRam.changed) { + if ((currentConfig.EmuOpt & EOPT_EN_SRAM) && Pico.sv.changed) { emu_save_load_game(0, 1); - SRam.changed = 0; + Pico.sv.changed = 0; } if (!(currentConfig.EmuOpt & EOPT_NO_AUTOSVCFG)) { @@ -1514,10 +1514,10 @@ void emu_loop(void) emu_set_fastforward(0); // save SRAM - if ((currentConfig.EmuOpt & EOPT_EN_SRAM) && SRam.changed) { + if ((currentConfig.EmuOpt & EOPT_EN_SRAM) && Pico.sv.changed) { plat_status_msg_busy_first("Writing SRAM/BRAM..."); emu_save_load_game(0, 1); - SRam.changed = 0; + Pico.sv.changed = 0; } pemu_loop_end(); diff --git a/tools/mkoffsets.c b/tools/mkoffsets.c index a5c475f2..b371cf4c 100644 --- a/tools/mkoffsets.c +++ b/tools/mkoffsets.c @@ -3,10 +3,22 @@ #include "../pico/pico_int.h" -#define DUMP(f, field) \ +#define DUMP(f, prefix, type, field) \ fprintf(f, "#define %-20s 0x%02x\n", \ - "OFS_" #field, \ - (int)offsetof(struct PicoEState, field)) + prefix #field, (int)offsetof(type, field)) + +#define DUMP_P(f, field) \ + fprintf(f, "#define %-20s 0x%04x\n", \ + "OFS_Pico_" #field, (char *)&p.field - (char *)&p) + +#define DUMP_PS(f, s1, field) \ + fprintf(f, "#define %-20s 0x%04x\n", \ + "OFS_Pico_" #s1 "_" #field, (char *)&p.s1.field - (char *)&p) + +#define DUMP_EST(f, field) \ + DUMP(f, "OFS_EST_", struct PicoEState, field) + +extern struct Pico p; int main(int argc, char *argv[]) { @@ -21,16 +33,31 @@ int main(int argc, char *argv[]) } fprintf(f, "/* autogenerated by %s, do not edit */\n", argv[0]); - DUMP(f, DrawScanline); - DUMP(f, rendstatus); - DUMP(f, DrawLineDest); - DUMP(f, HighCol); - DUMP(f, HighPreSpr); - DUMP(f, Pico_video); - DUMP(f, Pico_vram); - DUMP(f, PicoOpt); - DUMP(f, Draw2FB); - DUMP(f, HighPal); + DUMP_PS(f, video, reg); + DUMP_PS(f, m, rotate); + DUMP_PS(f, m, z80Run); + DUMP_PS(f, m, dirtyPal); + DUMP_PS(f, m, hardware); + DUMP_PS(f, m, z80_reset); + DUMP_PS(f, m, sram_reg); + DUMP_P (f, sv); + DUMP_PS(f, sv, data); + DUMP_PS(f, sv, start); + DUMP_PS(f, sv, end); + DUMP_PS(f, sv, flags); + DUMP_P (f, rom); + DUMP_P (f, romsize); + DUMP_EST(f, DrawScanline); + DUMP_EST(f, rendstatus); + DUMP_EST(f, DrawLineDest); + DUMP_EST(f, HighCol); + DUMP_EST(f, HighPreSpr); + DUMP_EST(f, Pico); + DUMP_EST(f, PicoMem_vram); + DUMP_EST(f, PicoMem_cram); + DUMP_EST(f, PicoOpt); + DUMP_EST(f, Draw2FB); + DUMP_EST(f, HighPal); fclose(f); return 0; From 2b46e6c1054e1551d0edaecd8164cdaebc0c09b1 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 6 Oct 2017 01:31:11 +0300 Subject: [PATCH 0097/1110] fix t574 reset --- pico/carthw/carthw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pico/carthw/carthw.c b/pico/carthw/carthw.c index a18e5a41..cd47e7c7 100644 --- a/pico/carthw/carthw.c +++ b/pico/carthw/carthw.c @@ -423,7 +423,6 @@ static void carthw_pier_reset(void) { pier_regs[0] = 1; pier_regs[1] = pier_regs[2] = pier_regs[3] = 0; - pier_dump_prot = 3; carthw_pier_statef(); eeprom_spi_init(NULL); } @@ -443,6 +442,8 @@ void carthw_pier_startup(void) return; } + pier_dump_prot = 3; + // create dump protection bank for (i = 0; i < M68K_BANK_SIZE; i += 0x8000) memcpy(Pico.rom + Pico.romsize + i, Pico.rom, 0x8000); From e015ad3c1b8430b6c217c5cd3384f95389878e1a Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 6 Oct 2017 01:26:12 +0300 Subject: [PATCH 0098/1110] update libpicofe --- platform/libpicofe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/platform/libpicofe b/platform/libpicofe index e7f58005..448ec62f 160000 --- a/platform/libpicofe +++ b/platform/libpicofe @@ -1 +1 @@ -Subproject commit e7f580052c03fa3f4603051c1b718be4bd8b2db7 +Subproject commit 448ec62f85a90e8a27368ddc05057a5a714944b8 From 22814963ccbcba3a08ca49656fd80dc40de64710 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 7 Oct 2017 00:08:05 +0300 Subject: [PATCH 0099/1110] some irq hacks notes to self (derived from Nemesis): H32: 0x10A .. 0x127 0x1D2 .. 0x1FF 0x000 0x001 pclk 30 46 1 |0x109 mclk 770 | 2650 = 3420 68kclk 110 378.5 ~= 488.5 H40: 0x14A .. 0x16C 0x1C9 .. 0x1FF 0x000 0x001 pclk 35 55 1 mclk 35*8 4*8+314+10+19*8 |0x149*8 788 | 2632 = 3420 68kclk 112.5 376 ~= 488.5 --- pico/cd/sek.c | 6 ++---- pico/pico_cmn.c | 6 ++++-- pico/pico_int.h | 10 +++------- pico/sek.c | 36 ++++++++++++++++++++---------------- pico/videoport.c | 19 ++++++++++--------- 5 files changed, 39 insertions(+), 38 deletions(-) diff --git a/pico/cd/sek.c b/pico/cd/sek.c index d4914901..42fea930 100644 --- a/pico/cd/sek.c +++ b/pico/cd/sek.c @@ -178,10 +178,8 @@ PICO_INTERNAL int SekInterruptS68k(int irq) PicoCpuCS68k.irq=real_irq; #endif #ifdef EMU_M68K - void *oldcontext = m68ki_cpu_p; - m68k_set_context(&PicoCpuMS68k); - m68k_set_irq(real_irq); - m68k_set_context(oldcontext); + // avoid m68k_set_irq() for delaying to work + PicoCpuMS68k.int_level = real_irq << 8; #endif #ifdef EMU_F68K PicoCpuFS68k.interrupts[0]=real_irq; diff --git a/pico/pico_cmn.c b/pico/pico_cmn.c index a4507577..509c8775 100644 --- a/pico/pico_cmn.c +++ b/pico/pico_cmn.c @@ -7,7 +7,7 @@ */ #define CYCLES_M68K_LINE 488 // suitable for both PAL/NTSC -#define CYCLES_M68K_VINT_LAG 68 +#define CYCLES_M68K_VINT_LAG 112 // pad delay (for 6 button pads) #define PAD_DELAY() { \ @@ -206,7 +206,6 @@ static int PicoFrameHints(void) } pv->status |= SR_VB; // go into vblank - pv->pending_ints |= 0x20; // the following SekRun is there for several reasons: // there must be a delay after vblank bit is set and irq is asserted (Mazin Saga) @@ -216,7 +215,10 @@ static int PicoFrameHints(void) do_timing_hacks_vb(); CPUS_RUN(CYCLES_M68K_VINT_LAG); + pv->pending_ints |= 0x20; if (pv->reg[1] & 0x20) { + Pico.t.m68c_aim = Pico.t.m68c_cnt + 11; // HACK + SekSyncM68k(); elprintf(EL_INTS, "vint: @ %06x [%u]", SekPc, SekCyclesDone()); SekInterrupt(6); } diff --git a/pico/pico_int.h b/pico/pico_int.h index 765afaff..bbfc5cb3 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -117,13 +117,9 @@ extern m68ki_cpu_core PicoCpuMM68k, PicoCpuMS68k; #define SekNotPolling PicoCpuMM68k.not_polling #define SekNotPollingS68k PicoCpuMS68k.not_polling -#define SekInterrupt(irq) { \ - void *oldcontext = m68ki_cpu_p; \ - m68k_set_context(&PicoCpuMM68k); \ - m68k_set_irq(irq); \ - m68k_set_context(oldcontext); \ -} -#define SekIrqLevel (PicoCpuMM68k.int_level >> 8) +// avoid m68k_set_irq() for delaying to work +#define SekInterrupt(irq) PicoCpuMM68k.int_level = (irq) << 8 +#define SekIrqLevel (PicoCpuMM68k.int_level >> 8) #endif #endif // EMU_M68K diff --git a/pico/sek.c b/pico/sek.c index f599d9b1..031c5491 100644 --- a/pico/sek.c +++ b/pico/sek.c @@ -25,15 +25,29 @@ M68K_CONTEXT PicoCpuFM68k; #endif +static int do_ack(int level) +{ + struct PicoVideo *pv = &Pico.video; + + elprintf(EL_INTS, "%cack: @ %06x [%u], p=%02x", + level == 6 ? 'v' : 'h', SekPc, SekCyclesDone(), pv->pending_ints); + // the VDP doesn't look at the 68k level + if (pv->pending_ints & pv->reg[1] & 0x20) { + pv->pending_ints &= ~0x20; + return (pv->reg[0] & pv->pending_ints & 0x10) >> 2; + } + else if (pv->pending_ints & pv->reg[0] & 0x10) + pv->pending_ints &= ~0x10; + + return 0; +} + /* callbacks */ #ifdef EMU_C68K // interrupt acknowledgment static int SekIntAck(int level) { - // try to emulate VDP's reaction to 68000 int ack - if (level == 4) { Pico.video.pending_ints = 0; elprintf(EL_INTS, "hack: @ %06x [%u]", SekPc, Pico.t.m68c_cnt); } - else if(level == 6) { Pico.video.pending_ints &= ~0x20; elprintf(EL_INTS, "vack: @ %06x [%u]", SekPc, Pico.t.m68c_cnt); } - PicoCpuCM68k.irq = 0; + PicoCpuCM68k.irq = do_ack(level); return CYCLONE_INT_ACK_AUTOVECTOR; } @@ -71,9 +85,7 @@ static int SekUnrecognizedOpcode() #ifdef EMU_M68K static int SekIntAckM68K(int level) { - if (level == 4) { Pico.video.pending_ints = 0; elprintf(EL_INTS, "hack: @ %06x [%u]", SekPc, Pico.t.m68c_cnt); } - else if(level == 6) { Pico.video.pending_ints &= ~0x20; elprintf(EL_INTS, "vack: @ %06x [%u]", SekPc, Pico.t.m68c_cnt); } - CPU_INT_LEVEL = 0; + CPU_INT_LEVEL = do_ack(level) << 8; return M68K_INT_ACK_AUTOVECTOR; } @@ -87,15 +99,7 @@ static int SekTasCallback(void) #ifdef EMU_F68K static void SekIntAckF68K(unsigned level) { - if (level == 4) { - Pico.video.pending_ints = 0; - elprintf(EL_INTS, "hack: @ %06x [%u]", SekPc, SekCyclesDone()); - } - else if(level == 6) { - Pico.video.pending_ints &= ~0x20; - elprintf(EL_INTS, "vack: @ %06x [%u]", SekPc, SekCyclesDone()); - } - PicoCpuFM68k.interrupts[0] = 0; + PicoCpuFM68k.interrupts[0] = do_ack(level); } #endif diff --git a/pico/videoport.c b/pico/videoport.c index 072a941b..22b83850 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -357,10 +357,10 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) { struct PicoVideo *pvid=&Pico.video; - //if (Pico.m.scanline < 224) - // elprintf(EL_STATUS, "PicoVideoWrite [%06x] %04x", a, d); - a&=0x1c; + //elprintf(EL_STATUS, "PicoVideoWrite [%06x] %04x [%u] @ %06x", + // a, d, SekCyclesDone(), SekPc); + a &= 0x1c; switch (a) { case 0x00: // Data port 0 or 2 @@ -383,8 +383,8 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) pvid->lwrite_cnt -= use; if (pvid->lwrite_cnt < 0) SekCyclesLeft = 0; - elprintf(EL_ASVDP, "VDP data write: %04x [%06x] {%i} #%i @ %06x", d, Pico.video.addr, - Pico.video.type, pvid->lwrite_cnt, SekPc); + elprintf(EL_ASVDP, "VDP data write: [%04x] %04x [%u] {%i} #%i @ %06x", + Pico.video.addr, d, SekCyclesDone(), Pico.video.type, pvid->lwrite_cnt, SekPc); } VideoWrite(d); @@ -451,14 +451,15 @@ update_irq: // update IRQ level if (!SekShouldInterrupt()) // hack { - int lines, pints, irq=0; + int lines, pints, irq = 0; lines = (pvid->reg[1] & 0x20) | (pvid->reg[0] & 0x10); - pints = (pvid->pending_ints&lines); + pints = pvid->pending_ints & lines; if (pints & 0x20) irq = 6; else if (pints & 0x10) irq = 4; SekInterrupt(irq); // update line - if (irq) SekEndRun(24); // make it delayed + // this is broken because cost of current insn isn't known here + if (irq) SekEndRun(21); // make it delayed } #endif } @@ -521,7 +522,7 @@ PICO_INTERNAL_ASM unsigned int PicoVideoRead(unsigned int a) pv->pending = 0; // ctrl port reads clear write-pending flag (Charles MacDonald) - elprintf(EL_SR, "SR read: %04x @ %06x", d, SekPc); + elprintf(EL_SR, "SR read: %04x [%u] @ %06x", d, SekCyclesDone(), SekPc); return d; } From 0e4bde9b2d981c264343aa66809f2f26ce7843c8 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 10 Oct 2017 01:13:48 +0300 Subject: [PATCH 0100/1110] rework sr note to self: h32 0x10A .. 0x127 0x1D2 .. 0x1FF 0x000 .. 0x109 pclk 30 | 46 | 266 = 342 hbset 0x126 ... 0x009 pclk 29 | 1 + 46 + 10 | 256 mclk 290 | 570 | 2560 = 3420 68kclk 41.4 81.4 365.7 ~= 488.5 h40 0x14A .. 0x16C 0x1C9 .. 0x1FF 0x000 .. 0x149 pclk 35 | 55 | 330 = 420 hbset 0x166 ... 0x00A pclk 28 | 7 + 55 + 11 | 319 mclk 28*8 | 7*8 4*8+314+10+(18+11)*8 | 319*8 = 3420 68kclk 32 92 364.5 ~= 488.5 --- pico/pico_cmn.c | 16 +++++++++------- pico/pico_int.h | 1 + pico/sek.c | 1 + pico/state.c | 2 ++ pico/videoport.c | 45 ++++++++++++++++++++------------------------- 5 files changed, 33 insertions(+), 32 deletions(-) diff --git a/pico/pico_cmn.c b/pico/pico_cmn.c index 509c8775..aad8406e 100644 --- a/pico/pico_cmn.c +++ b/pico/pico_cmn.c @@ -108,7 +108,7 @@ static int PicoFrameHints(void) } else skip=PicoSkipFrame; - Pico.t.m68c_frame_start = SekCyclesDone(); + Pico.t.m68c_frame_start = Pico.t.m68c_aim; pv->v_counter = Pico.m.scanline = 0; z80_resetCycles(); PsndStartFrame(); @@ -170,7 +170,7 @@ static int PicoFrameHints(void) } // Run scanline: - Pico.t.m68c_line_start = SekCyclesDone(); + Pico.t.m68c_line_start = Pico.t.m68c_aim; do_timing_hacks_as(pv, vdp_slots); CPUS_RUN(CYCLES_M68K_LINE); @@ -205,16 +205,17 @@ static int PicoFrameHints(void) do_hint(pv); } - pv->status |= SR_VB; // go into vblank + pv->status |= SR_VB | PVS_VB2; // go into vblank // the following SekRun is there for several reasons: // there must be a delay after vblank bit is set and irq is asserted (Mazin Saga) // also delay between F bit (bit 7) is set in SR and IRQ happens (Ex-Mutants) // also delay between last H-int and V-int (Golden Axe 3) - Pico.t.m68c_line_start = SekCyclesDone(); + Pico.t.m68c_line_start = Pico.t.m68c_aim; do_timing_hacks_vb(); CPUS_RUN(CYCLES_M68K_VINT_LAG); + pv->status |= SR_F; pv->pending_ints |= 0x20; if (pv->reg[1] & 0x20) { Pico.t.m68c_aim = Pico.t.m68c_cnt + 11; // HACK @@ -278,7 +279,7 @@ static int PicoFrameHints(void) } // Run scanline: - Pico.t.m68c_line_start = SekCyclesDone(); + Pico.t.m68c_line_start = Pico.t.m68c_aim; do_timing_hacks_vb(); CPUS_RUN(CYCLES_M68K_LINE); @@ -286,7 +287,8 @@ static int PicoFrameHints(void) pevt_log_m68k_o(EVT_NEXT_LINE); } - pv->status &= ~SR_VB; + pv->status &= ~(SR_VB | PVS_VB2); + pv->status |= ((pv->reg[1] >> 3) ^ SR_VB) & SR_VB; // forced blanking // last scanline Pico.m.scanline = y; @@ -302,7 +304,7 @@ static int PicoFrameHints(void) } // Run scanline: - Pico.t.m68c_line_start = SekCyclesDone(); + Pico.t.m68c_line_start = Pico.t.m68c_aim; do_timing_hacks_as(pv, vdp_slots); CPUS_RUN(CYCLES_M68K_LINE); diff --git a/pico/pico_int.h b/pico/pico_int.h index bbfc5cb3..848da5df 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -287,6 +287,7 @@ extern SH2 sh2s[2]; #define SR_EMPT (1 << 9) // not part of real SR #define PVS_ACTIVE (1 << 16) +#define PVS_VB2 (1 << 17) // ignores forced blanking struct PicoVideo { diff --git a/pico/sek.c b/pico/sek.c index 031c5491..8fece1a3 100644 --- a/pico/sek.c +++ b/pico/sek.c @@ -34,6 +34,7 @@ static int do_ack(int level) // the VDP doesn't look at the 68k level if (pv->pending_ints & pv->reg[1] & 0x20) { pv->pending_ints &= ~0x20; + pv->status &= ~SR_F; return (pv->reg[0] & pv->pending_ints & 0x10) >> 2; } else if (pv->pending_ints & pv->reg[0] & 0x10) diff --git a/pico/state.c b/pico/state.c index 8a2f2aa0..69e8be06 100644 --- a/pico/state.c +++ b/pico/state.c @@ -564,6 +564,8 @@ readend: Pico.m.dirtyPal = 1; Pico.video.status &= ~(SR_VB | SR_F); + Pico.video.status |= ((Pico.video.reg[1] >> 3) ^ SR_VB) & SR_VB; + Pico.video.status |= (Pico.video.pending_ints << 2) & SR_F; retval = 0; diff --git a/pico/videoport.c b/pico/videoport.c index 22b83850..b5e3f86c 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -376,8 +376,7 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) pvid->pending=0; } - // preliminary FIFO emulation for Chaos Engine, The (E) - if (!(pvid->status & SR_VB) && (pvid->reg[1] & 0x40) && !(PicoOpt&POPT_DIS_VDP_FIFO)) // active display? + if (!(pvid->status & SR_VB) && !(PicoOpt&POPT_DIS_VDP_FIFO)) { int use = pvid->type == 1 ? 2 : 1; pvid->lwrite_cnt -= use; @@ -434,6 +433,9 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) case 0x01: elprintf(EL_INTSW, "vint_onoff: %i->%i [%u] pend=%i @ %06x", (dold&0x20)>>5, (d&0x20)>>5, SekCyclesDone(), (pvid->pending_ints&0x20)>>5, SekPc); + if (!(pvid->status & PVS_VB2)) + pvid->status &= ~SR_VB; + pvid->status |= ((d >> 3) ^ SR_VB) & SR_VB; // forced blanking goto update_irq; case 0x05: //elprintf(EL_STATUS, "spritep moved to %04x", (unsigned)(Pico.video.reg[5]&0x7f) << 9); @@ -503,25 +505,25 @@ update_irq: } } +static u32 SrLow(const struct PicoVideo *pv) +{ + unsigned int c, d = pv->status; + + c = SekCyclesDone() - Pico.t.m68c_line_start - 39; + if (c < 92) + d |= SR_HB; + return d; +} + PICO_INTERNAL_ASM unsigned int PicoVideoRead(unsigned int a) { - a&=0x1c; + a &= 0x1c; - if (a==0x04) // control port + if (a == 0x04) // control port { - struct PicoVideo *pv=&Pico.video; - unsigned int d; - d=pv->status; - //if (PicoOpt&POPT_ALT_RENDERER) d|=0x0020; // sprite collision (Shadow of the Beast) - if (SekCyclesDone() - Pico.t.m68c_line_start >= 488-88) - d|=0x0004; // H-Blank (Sonic3 vs) - - d |= ((pv->reg[1]&0x40)^0x40) >> 3; // set V-Blank if display is disabled - d |= (pv->pending_ints&0x20)<<2; // V-int pending? - if (d&0x100) pv->status&=~0x100; // FIFO no longer full - - pv->pending = 0; // ctrl port reads clear write-pending flag (Charles MacDonald) - + struct PicoVideo *pv = &Pico.video; + unsigned int d = SrLow(pv); + pv->pending = 0; elprintf(EL_SR, "SR read: %04x [%u] @ %06x", d, SekCyclesDone(), SekPc); return d; } @@ -572,12 +574,9 @@ unsigned char PicoVideoRead8DataL(void) return VideoRead(); } -// FIXME: broken mess unsigned char PicoVideoRead8CtlH(void) { u8 d = (u8)(Pico.video.status >> 8); - if (d & 1) - Pico.video.status &= ~0x100; // FIFO no longer full Pico.video.pending = 0; elprintf(EL_SR, "SR read (h): %02x @ %06x", d, SekPc); return d; @@ -585,11 +584,7 @@ unsigned char PicoVideoRead8CtlH(void) unsigned char PicoVideoRead8CtlL(void) { - u8 d = (u8)Pico.video.status; - //if (PicoOpt&POPT_ALT_RENDERER) d|=0x0020; // sprite collision (Shadow of the Beast) - d |= ((Pico.video.reg[1]&0x40)^0x40) >> 3; // set V-Blank if display is disabled - d |= (Pico.video.pending_ints&0x20)<<2; // V-int pending? - if (SekCyclesDone() - Pico.t.m68c_line_start >= 488-88) d |= 4; // H-Blank + u8 d = SrLow(&Pico.video); Pico.video.pending = 0; elprintf(EL_SR, "SR read (l): %02x @ %06x", d, SekPc); return d; From c041308933a54fce3b1e98b0228e19f96475ae40 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 10 Oct 2017 02:00:28 +0300 Subject: [PATCH 0101/1110] fame: fix roxr also take interrupts on exit, like other cores do --- cpu/fame/famec.c | 2 +- cpu/fame/famec_opcodes.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpu/fame/famec.c b/cpu/fame/famec.c index 83719304..db4eae69 100644 --- a/cpu/fame/famec.c +++ b/cpu/fame/famec.c @@ -938,7 +938,7 @@ famec_Exec: u32 line; m68kcontext.io_cycle_counter += cycles_needed; cycles_needed = 0; - if (m68kcontext.io_cycle_counter <= 0) goto famec_End; + //if (m68kcontext.io_cycle_counter <= 0) goto famec_End; line=interrupt_chk__(); if (line>0) { diff --git a/cpu/fame/famec_opcodes.h b/cpu/fame/famec_opcodes.h index 3ba72751..2df6d78a 100644 --- a/cpu/fame/famec_opcodes.h +++ b/cpu/fame/famec_opcodes.h @@ -38009,7 +38009,7 @@ OPCODE(0xE0B0) { if (sft == 1) res = (src >> 1) | ((flag_X & M68K_SR_X) << (32 - (M68K_SR_X_SFT + 1))); else res = (src >> sft) | (src << (33 - sft)) | (((flag_X & M68K_SR_X) << (32 - (M68K_SR_X_SFT + 1))) >> (sft - 1)); - flag_X = (src >> (32 - sft)) << M68K_SR_X_SFT; + flag_X = (src >> (sft - 1)) << M68K_SR_X_SFT; } else res = src; flag_C = flag_X; From cf07a88d6eaa0e7719c498ef838ef175512a2bc4 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 8 Oct 2017 22:33:04 +0300 Subject: [PATCH 0102/1110] fix cell scroll drawing --- pico/draw.c | 6 +++++- pico/draw2_arm.S | 14 +++++++++++--- pico/draw_arm.S | 31 ++++++++++++++++++++++--------- pico/pico_int.h | 7 +++---- pico/pico_int_o32.h | 2 ++ tools/mkoffsets.c | 5 +++++ 6 files changed, 48 insertions(+), 17 deletions(-) diff --git a/pico/draw.c b/pico/draw.c index 6408f205..bb051b6d 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -261,7 +261,11 @@ static void DrawStripVSRam(struct TileStrip *ts, int plane_sh, int cellskip) // Draw tiles across screen: tilex=(-ts->hscroll)>>3; dx=((ts->hscroll-1)&7)+1; - if(dx != 8) cell--; // have hscroll, start with negative cell + if (ts->hscroll & 0x0f) { + int adj = ((ts->hscroll ^ dx) >> 3) & 1; + cell -= adj + 1; + ts->cells -= adj; + } cell+=cellskip; tilex+=cellskip; dx+=cellskip<<3; diff --git a/pico/draw2_arm.S b/pico/draw2_arm.S index 1575653c..6b110b32 100644 --- a/pico/draw2_arm.S +++ b/pico/draw2_arm.S @@ -73,8 +73,17 @@ BackFillFull: @ -------- some macros -------- +@ helpers +.macro add_c24 d s c + add \d, \s, #(\c & 0x00ff00) +.if \c & 0x0000ff + add \d, \d, #(\c & 0x0000ff) +.endif +.if \c & 0xff0000 + add \d, \d, #(\c & 0xff0000) +.endif +.endm -@ helper @ TileLineSinglecol (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r0: pixels8_old .macro TileLineSinglecol notsinglecol=0 and r2, r2, #0xf @ #0x0000000f @@ -411,8 +420,7 @@ DrawLayerFull: mla r11, r4, r7, r11 @ scrpos+=8*328*(planestart-START_ROW); @ Get vertical scroll value: - add r7, r10, #0x012000 - add r7, r7, #0x000180 @ r7=Pico.vsram (Pico+0x22180) + add_c24 r7, r10, (OFS_PMEM_vsram-OFS_PMEM_vram) ldr r7, [r7] tst r0, r0 moveq r7, r7, lsl #22 diff --git a/pico/draw_arm.S b/pico/draw_arm.S index d53e04bd..29af1c13 100644 --- a/pico/draw_arm.S +++ b/pico/draw_arm.S @@ -19,7 +19,17 @@ .equ PDRAW_PLANE_HI_PRIO, (1<<6) .equ PDRAW_SHHI_DONE, (1<<7) -@ helper +@ helpers +.macro add_c24 d s c + add \d, \s, #(\c & 0x00ff00) +.if \c & 0x0000ff + add \d, \d, #(\c & 0x0000ff) +.endif +.if \c & 0xff0000 + add \d, \d, #(\c & 0xff0000) +.endif +.endm + .macro TilePixel pat lsrr offs .if !\lsrr ands r4, \pat, r2 @@ -344,8 +354,7 @@ DrawLayer: bne .DrawStrip_vsscroll @ Get vertical scroll value: - add r7, lr, #0x012000 - add r7, r7, #0x000180 @ r7=PicoMem.vsram (PicoMem+0x22180) + add_c24 r7, lr, (OFS_PMEM_vsram-OFS_PMEM_vram) ldr r7, [r7] tst r8, #2 @@ -522,17 +531,22 @@ DrawLayer: add r7, r1, #1 @ r7=dx=((ts->hscroll-1)&7)+1 mov r10,r9, lsl #16 + orr r10,r10, #0xff000000 @ will be adjusted on entering loop tst r0, #1 orrne r10,r10, #0x8000 + tst r3, #0x0f @ hscroll & 0x0f? + beq 0f + eor r3, r3, r7 + sub r10,r10, #1<<24 @ cell-- // start from negative for hscroll + tst r3, #0x08 + subne r10,r10, #1<<16 @ cells-- + subne r10,r10, #1<<24 @ cell-- // even more negative +0: tst r9, #1<<31 mov r3, #0 - orr r10,r10, #0xff000000 @ will be adjusted on entering loop orrne r10,r10, #1<<23 @ r10=(cell[31:24]|sh[23]|hi_not_empty[22]|cells_max[21:16]|plane[15]|ty[14:0]) movne r3, #0x40 @ default to shadowed pal on sh mode - cmp r7, #8 - subne r10,r10, #0x01000000 @ have hscroll, start with negative cell - and r9, r9, #0xff00 add r8, r8, r9, lsr #8 @ tilex+=cellskip add r7, r7, r9, lsr #5 @ dx+=cellskip<<3; @@ -556,8 +570,7 @@ DrawLayer: ble .dsloop_vs_exit @ calc offset and read tileline code to r7, also calc ty - add r7, lr, #0x012000 - add r7, r7, #0x000180 @ r7=PicoMem.vsram (PicoMem+0x22180) + add_c24 r7, lr, (OFS_PMEM_vsram-OFS_PMEM_vram) add r7, r7, r10,asr #23 @ vsram + ((cell&~1)<<1) bic r7, r7, #3 tst r10,#0x8000 @ plane1? diff --git a/pico/pico_int.h b/pico/pico_int.h index 848da5df..369bb5d7 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -356,7 +356,6 @@ struct PicoEState unsigned short HighPal[0x100]; }; -// some assembly stuff still depends on these, do not touch! struct PicoMem { unsigned char ram[0x10000]; // 0x00000 scratch ram @@ -366,9 +365,9 @@ struct PicoMem }; unsigned char zram[0x2000]; // 0x20000 Z80 ram unsigned char ioports[0x10]; // XXX: fix asm and mv - unsigned char pad[0xf0]; // unused - unsigned short cram[0x40]; // 0x22100 - unsigned short vsram[0x40]; // 0x22180 + unsigned short cram[0x40]; // 0x22010 + unsigned char pad[0x70]; // 0x22050 DrawStripVSRam reads 0 from here + unsigned short vsram[0x40]; // 0x22100 }; // sram diff --git a/pico/pico_int_o32.h b/pico/pico_int_o32.h index 720a5ee4..ca3004f3 100644 --- a/pico/pico_int_o32.h +++ b/pico/pico_int_o32.h @@ -24,3 +24,5 @@ #define OFS_EST_PicoOpt 0x20 #define OFS_EST_Draw2FB 0x24 #define OFS_EST_HighPal 0x28 +#define OFS_PMEM_vram 0x10000 +#define OFS_PMEM_vsram 0x22100 diff --git a/tools/mkoffsets.c b/tools/mkoffsets.c index b371cf4c..7e57383a 100644 --- a/tools/mkoffsets.c +++ b/tools/mkoffsets.c @@ -18,6 +18,9 @@ #define DUMP_EST(f, field) \ DUMP(f, "OFS_EST_", struct PicoEState, field) +#define DUMP_PMEM(f, field) \ + DUMP(f, "OFS_PMEM_", struct PicoMem, field) + extern struct Pico p; int main(int argc, char *argv[]) @@ -58,6 +61,8 @@ int main(int argc, char *argv[]) DUMP_EST(f, PicoOpt); DUMP_EST(f, Draw2FB); DUMP_EST(f, HighPal); + DUMP_PMEM(f, vram); + DUMP_PMEM(f, vsram); fclose(f); return 0; From 0bc89554854606315659cb395c5b57346c13d0ff Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 4 Oct 2017 23:01:56 +0300 Subject: [PATCH 0103/1110] release 1.92 --- platform/common/version.h | 2 +- platform/pandora/Makefile | 6 +++--- platform/pandora/PicoDrive.pxml.template | 2 +- platform/pandora/make_pxml.sh | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/platform/common/version.h b/platform/common/version.h index 01e9b7d0..ce4223b5 100644 --- a/platform/common/version.h +++ b/platform/common/version.h @@ -1 +1 @@ -#define VERSION "1.91" +#define VERSION "1.92" diff --git a/platform/pandora/Makefile b/platform/pandora/Makefile index 4240897a..a181e098 100644 --- a/platform/pandora/Makefile +++ b/platform/pandora/Makefile @@ -15,8 +15,8 @@ all: rel ../../tools/textfilter: ../../tools/textfilter.c make -C ../../tools/ -readme.txt: ../../tools/textfilter ../base_readme.txt ../../ChangeLog - ../../tools/textfilter ../base_readme.txt $@ PANDORA +#readme.txt: ../../tools/textfilter ../base_readme.txt ../../ChangeLog +# ../../tools/textfilter ../base_readme.txt $@ PANDORA /tmp/PicoDrive.pxml: PicoDrive.pxml.template FORCE ./make_pxml.sh $< $@ @@ -24,7 +24,7 @@ readme.txt: ../../tools/textfilter ../base_readme.txt ../../ChangeLog rel: ../../PicoDrive PicoDrive.sh picorestore \ PicoDrive.png PicoDrive_p.png \ ../../pico/carthw.cfg skin \ - readme.txt /tmp/PicoDrive.pxml + ../../README /tmp/PicoDrive.pxml rm -rf out mkdir out cp -r $^ out/ diff --git a/platform/pandora/PicoDrive.pxml.template b/platform/pandora/PicoDrive.pxml.template index 220d48ea..635d59cc 100644 --- a/platform/pandora/PicoDrive.pxml.template +++ b/platform/pandora/PicoDrive.pxml.template @@ -33,7 +33,7 @@ - + diff --git a/platform/pandora/make_pxml.sh b/platform/pandora/make_pxml.sh index 2267c14a..3fd03a23 100755 --- a/platform/pandora/make_pxml.sh +++ b/platform/pandora/make_pxml.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash set -e verfile=../common/version.h From 84162df45ca32c27cc56be3c98b6f91abc00586e Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 13 Oct 2017 00:40:30 +0300 Subject: [PATCH 0104/1110] build: drop function-sections this has provoked people to do some horrible ifdeffery in libretro fork, while the benefit is minimal, if any. --- Makefile | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 45e3532b..588475bd 100644 --- a/Makefile +++ b/Makefile @@ -2,10 +2,7 @@ TARGET ?= PicoDrive CFLAGS += -Wall -ggdb -falign-functions=2 CFLAGS += -I. ifndef DEBUG -CFLAGS += -O3 -DNDEBUG -ffunction-sections -ifeq ($(findstring clang,$(CC)),) -LDFLAGS += -Wl,--gc-sections -endif +CFLAGS += -O3 -DNDEBUG endif #CFLAGS += -DEVT_LOG #CFLAGS += -DDRC_CMP From 65e4c57baa06bb5e221de15c17ef87e5754dfd60 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 13 Oct 2017 00:40:45 +0300 Subject: [PATCH 0105/1110] build: some clean ups --- Makefile | 17 ++++++----------- pico/cd/memory_arm.S | 1 - 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/Makefile b/Makefile index 588475bd..ff008506 100644 --- a/Makefile +++ b/Makefile @@ -1,15 +1,15 @@ TARGET ?= PicoDrive -CFLAGS += -Wall -ggdb -falign-functions=2 +CFLAGS += -Wall -g CFLAGS += -I. ifndef DEBUG CFLAGS += -O3 -DNDEBUG endif -#CFLAGS += -DEVT_LOG -#CFLAGS += -DDRC_CMP -#cpu_cmp = 1 -#drc_debug = 7 -#profile = 1 +# This is actually needed, bevieve me. +# If you really have to disable this, set NO_ALIGN_FUNCTIONS elsewhere. +ifndef NO_ALIGN_FUNCTIONS +CFLAGS += -falign-functions=2 +endif all: config.mak target_ @@ -48,11 +48,6 @@ endif -include Makefile.local -ifneq "$(use_cyclone)" "1" -# due to CPU stop flag access -asm_cdmemory = 0 -endif - ifeq "$(PLATFORM)" "opendingux" opk: $(TARGET).opk diff --git a/pico/cd/memory_arm.S b/pico/cd/memory_arm.S index 23f0b94c..fe82ecb9 100644 --- a/pico/cd/memory_arm.S +++ b/pico/cd/memory_arm.S @@ -59,7 +59,6 @@ .extern s68k_poll_detect .extern pcd_pcm_write .extern pcd_pcm_read -.extern PicoCpuCS68k .extern PicoRead8_io .extern PicoRead16_io .extern PicoWrite8_io From 9e38b1f0d61b1f3d22cb253921afad8c84691bc4 Mon Sep 17 00:00:00 2001 From: David Date: Tue, 24 Nov 2015 13:49:31 +0100 Subject: [PATCH 0106/1110] Fix the lprintf method (libretro side) The lprintf method used the format string instead of the buffer build from va_list --- pico/media.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pico/media.c b/pico/media.c index 3ba45324..b7fa7f56 100644 --- a/pico/media.c +++ b/pico/media.c @@ -244,7 +244,7 @@ enum media_type_e PicoLoadMedia(const char *filename, rom = pm_open(rom_fname); if (rom == NULL) { - lprintf("Failed to open ROM"); + lprintf("Failed to open ROM\n"); media_type = PM_ERROR; goto out; } @@ -252,9 +252,9 @@ enum media_type_e PicoLoadMedia(const char *filename, ret = PicoCartLoad(rom, &rom_data, &rom_size, (PicoAHW & PAHW_SMS) ? 1 : 0); pm_close(rom); if (ret != 0) { - if (ret == 2) lprintf("Out of memory"); - else if (ret == 3) lprintf("Read failed"); - else lprintf("PicoCartLoad() failed."); + if (ret == 2) lprintf("Out of memory\n"); + else if (ret == 3) lprintf("Read failed\n"); + else lprintf("PicoCartLoad() failed.\n"); media_type = PM_ERROR; goto out; } From 8655fd046255007513449f76cf2d9f5efd84f49e Mon Sep 17 00:00:00 2001 From: iLag Date: Wed, 22 Mar 2017 14:19:35 -0700 Subject: [PATCH 0107/1110] Add support for single-line cheats and PAR codes. notaz: include required header --- pico/patch.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/pico/patch.c b/pico/patch.c index b961e802..40e8372c 100644 --- a/pico/patch.c +++ b/pico/patch.c @@ -22,6 +22,7 @@ */ #include "pico_int.h" +#include "memory.h" #include "patch.h" struct patch @@ -142,7 +143,7 @@ static void hex_decode(const char *code, struct patch *result) /* THIS is the function you call from the MegaDrive or whatever. This figures * out whether it's a genie or hex code, depunctuates it, and calls the proper * decoder. */ -static void decode(const char* code, struct patch* result) +void decode(const char* code, struct patch* result) { int len = strlen(code), i, j; char code_to_pass[16], *x; @@ -299,6 +300,8 @@ void PicoPatchPrepare(void) PicoPatches[i].addr &= ~1; if (PicoPatches[i].addr < Pico.romsize) PicoPatches[i].data_old = *(unsigned short *)(Pico.rom + PicoPatches[i].addr); + else + PicoPatches[i].data_old = (unsigned short) m68k_read16(PicoPatches[i].addr); if (strstr(PicoPatches[i].name, "AUTO")) PicoPatches[i].active = 1; } @@ -328,7 +331,15 @@ void PicoPatchApply(void) } else { - /* TODO? */ + if (PicoPatches[i].active) + m68k_write16(PicoPatches[i].addr,PicoPatches[i].data); + else { + // if current addr is not patched by older patch, write back original val + for (u = 0; u < i; u++) + if (PicoPatches[u].addr == addr) break; + if (u == i) + m68k_write16(PicoPatches[i].addr,PicoPatches[i].data_old); + } } } } From ed4a2193f7d42088cfb909868dfeaa4f74e98535 Mon Sep 17 00:00:00 2001 From: iLag Date: Sat, 25 Mar 2017 19:29:00 -0700 Subject: [PATCH 0108/1110] Add support for Master System cheats. notaz: drop wrong code (PicoWrite8_z80 are 68k functions, not for sms) --- pico/patch.c | 384 +++++++++++++++++++++++++++++++++++++-------------- pico/patch.h | 1 + 2 files changed, 278 insertions(+), 107 deletions(-) diff --git a/pico/patch.c b/pico/patch.c index 40e8372c..09626160 100644 --- a/pico/patch.c +++ b/pico/patch.c @@ -29,12 +29,13 @@ struct patch { unsigned int addr; unsigned short data; + unsigned char comp; }; struct patch_inst *PicoPatches = NULL; int PicoPatchCount = 0; -static char genie_chars[] = "AaBbCcDdEeFfGgHhJjKkLlMmNnPpRrSsTtVvWwXxYyZz0O1I2233445566778899"; +static char genie_chars_md[] = "AaBbCcDdEeFfGgHhJjKkLlMmNnPpRrSsTtVvWwXxYyZz0O1I2233445566778899"; /* genie_decode * This function converts a Game Genie code to an address:data pair. @@ -48,20 +49,23 @@ static char genie_chars[] = "AaBbCcDdEeFfGgHhJjKkLlMmNnPpRrSsTtVvWwXxYyZz0O1I223 * by result. If an error results, both the address and data will be set to -1. */ -static void genie_decode(const char* code, struct patch* result) +static void genie_decode_md(const char* code, struct patch* result) { int i = 0, n; char* x; - for(; i < 8; ++i) + for(; i < 9; ++i) { + /* Skip i=4; it's going to be the separating hyphen */ + if (i==4) continue; + /* If strchr returns NULL, we were given a bad character */ - if(!(x = strchr(genie_chars, code[i]))) + if(!(x = strchr(genie_chars_md, code[i]))) { result->addr = -1; result->data = -1; return; } - n = (x - genie_chars) >> 1; + n = (x - genie_chars_md) >> 1; /* Now, based on which character this is, fit it into the result */ switch(i) { @@ -82,21 +86,21 @@ static void genie_decode(const char* code, struct patch* result) /* BCDE ____ ____ ___A ____ ____ : ____ ____ ____ ____ */ result->addr |= (n & 0xF) << 20 | (n >> 4) << 8; break; - case 4: + case 5: /* ____ ABCD ____ ____ ____ ____ : ___E ____ ____ ____ */ result->data |= (n & 1) << 12; result->addr |= (n >> 1) << 16; break; - case 5: + case 6: /* ____ ____ ____ ____ ____ ____ : E___ ABCD ____ ____ */ result->data |= (n & 1) << 15 | (n >> 1) << 8; break; - case 6: + case 7: /* ____ ____ ____ ____ CDE_ ____ : _AB_ ____ ____ ____ */ result->data |= (n >> 3) << 13; result->addr |= (n & 7) << 5; break; - case 7: + case 8: /* ____ ____ ____ ____ ___A BCDE : ____ ____ ____ ____ */ result->addr |= n; break; @@ -114,30 +118,165 @@ static void genie_decode(const char* code, struct patch* result) static char hex_chars[] = "00112233445566778899AaBbCcDdEeFf"; -static void hex_decode(const char *code, struct patch *result) +static void hex_decode_md(const char *code, struct patch *result) { char *x; int i; /* 6 digits for address */ for(i = 0; i < 6; ++i) + { + if(!(x = strchr(hex_chars, code[i]))) { - if(!(x = strchr(hex_chars, code[i]))) - { - result->addr = result->data = -1; - return; - } - result->addr = (result->addr << 4) | ((x - hex_chars) >> 1); + result->addr = result->data = -1; + return; } + result->addr = (result->addr << 4) | ((x - hex_chars) >> 1); + } /* 4 digits for data */ - for(i = 6; i < 10; ++i) + for(i = 7; i < 11; ++i) + { + if(!(x = strchr(hex_chars, code[i]))) { + if (i==8) break; + result->addr = result->data = -1; + return; + } + result->data = (result->data << 4) | ((x - hex_chars) >> 1); + } +} + +void genie_decode_ms(const char *code, struct patch *result) +{ + char *x; + int i; + /* 2 digits for data */ + for(i=0;i<2;++i) + { + if(!(x = strchr(hex_chars, code[i]))) + { + result->addr = result->data = -1; + return; + } + result->data = (result->data << 4) | ((x - hex_chars) >> 1); + } + /* 4 digits for address */ + for(i=2;i<7;++i) + { + /* 4th character is hyphen and can be skipped*/ + if (i==3) continue; + if(!(x = strchr(hex_chars, code[i]))) + { + result->addr = result->data = -1; + return; + } + result->addr = (result->addr << 4) | ((x - hex_chars) >> 1); + } + /* Correct the address */ + result->addr = ((result->addr >> 4) | (result->addr << 12 & 0xF000)) ^ 0xF000; + /* Optional: 3 digits for comp */ + if (code[8]){ + for(i=8;i<11;++i) + { + if (i==9) continue; /* 2nd character is ignored */ if(!(x = strchr(hex_chars, code[i]))) { result->addr = result->data = -1; return; } - result->data = (result->data << 4) | ((x - hex_chars) >> 1); + result->comp = (result->comp << 4) | ((x - hex_chars) >> 1); } + /* Correct the comp */ + result->comp = ((result->comp >> 2) | ((result->comp << 6) & 0xC0)) ^ 0xBA; + } +} + +void ar_decode_ms(const char *code, struct patch *result){ + char *x; + int i; + /* 2 digits of padding*/ + /* 4 digits for address */ + for(i=2;i<7;++i) + { + /* 5th character is hyphen and can be skipped*/ + if (i==4) continue; + if(!(x = strchr(hex_chars, code[i]))) + { + result->addr = result->data = -1; + return; + } + result->addr = (result->addr << 4) | ((x - hex_chars) >> 1); + } + /* 2 digits for data */ + for(i=7;i<9;++i) + { + if(!(x = strchr(hex_chars, code[i]))) + { + result->addr = result->data = -1; + return; + } + result->data = (result->data << 4) | ((x - hex_chars) >> 1); + } +} + +void fusion_ram_decode(const char *code, struct patch *result){ + char *x; + int i; + /* 4 digits for address */ + for(i=0;i<4;++i) + { + if(!(x = strchr(hex_chars, code[i]))) + { + result->addr = result->data = -1; + return; + } + result->addr = (result->addr << 4) | ((x - hex_chars) >> 1); + } + /* Skip the ':' */ + /* 2 digits for data */ + for(i=5;i<7;++i) + { + if(!(x = strchr(hex_chars, code[i]))) + { + result->addr = result->data = -1; + return; + } + result->data = (result->data << 4) | ((x - hex_chars) >> 1); + } +} + +void fusion_rom_decode(const char *code, struct patch *result){ + char *x; + int i; + /* 2 digits for comp */ + for(i=0;i<2;++i) + { + if(!(x = strchr(hex_chars, code[i]))) + { + result->addr = result->data = -1; + return; + } + result->comp = (result->comp << 4) | ((x - hex_chars) >> 1); + } + /* 4 digits for address */ + for(i=2;i<6;++i) + { + if(!(x = strchr(hex_chars, code[i]))) + { + result->addr = result->data = -1; + return; + } + result->addr = (result->addr << 4) | ((x - hex_chars) >> 1); + } + /* 2 digits for data */ + for(i=7;i<9;++i) + { + if(!(x = strchr(hex_chars, code[i]))) + { + result->addr = result->data = -1; + return; + } + result->data = (result->data << 4) | ((x - hex_chars) >> 1); + } } /* THIS is the function you call from the MegaDrive or whatever. This figures @@ -145,75 +284,76 @@ static void hex_decode(const char *code, struct patch *result) * decoder. */ void decode(const char* code, struct patch* result) { - int len = strlen(code), i, j; - char code_to_pass[16], *x; - const char *ad, *da; - int adl, dal; + int len = strlen(code); /* Initialize the result */ - result->addr = result->data = 0; + result->addr = result->data = result->comp = 0; - /* Just assume 8 char long string to be Game Genie code */ - if (len == 8) + if(!(PicoAHW & PAHW_SMS)) { - genie_decode(code, result); - return; - } + //If Genesis - /* If it's 9 chars long and the 5th is a hyphen, we have a Game Genie - * code. */ + //Game Genie if(len == 9 && code[4] == '-') { - /* Remove the hyphen and pass to genie_decode */ - code_to_pass[0] = code[0]; - code_to_pass[1] = code[1]; - code_to_pass[2] = code[2]; - code_to_pass[3] = code[3]; - code_to_pass[4] = code[5]; - code_to_pass[5] = code[6]; - code_to_pass[6] = code[7]; - code_to_pass[7] = code[8]; - code_to_pass[8] = '\0'; - genie_decode(code_to_pass, result); + genie_decode_md(code, result); return; } - /* Otherwise, we assume it's a hex code. - * Find the colon so we know where address ends and data starts. If there's - * no colon, then we haven't a code at all! */ - if(!(x = strchr(code, ':'))) goto bad_code; - ad = code; da = x + 1; adl = x - code; dal = len - adl - 1; + //Master + else if(len >=9 && code[6] == ':') + { + hex_decode_md(code, result); + } - /* If a section is empty or too long, toss it */ - if(adl == 0 || adl > 6 || dal == 0 || dal > 4) goto bad_code; + else + { + goto bad_code; + } + } else { + //If Master System - /* Pad the address with zeros, then fill it with the value */ - for(i = 0; i < (6 - adl); ++i) code_to_pass[i] = '0'; - for(j = 0; i < 6; ++i, ++j) code_to_pass[i] = ad[j]; + //Genie + if(len == 11 && code[3] == '-' && code[7] == '-') + { + genie_decode_ms(code, result); + } - /* Do the same for data */ - for(i = 6; i < (10 - dal); ++i) code_to_pass[i] = '0'; - for(j = 0; i < 10; ++i, ++j) code_to_pass[i] = da[j]; + //AR + else if(len == 9 && code[4] == '-') + { + ar_decode_ms(code, result); + } - code_to_pass[10] = '\0'; + //Fusion RAM + else if(len == 7 && code[4] == ':') + { + fusion_ram_decode(code, result); + } + + //Fusion ROM + else if(len == 9 && code[6] == ':') + { + fusion_rom_decode(code, result); + } + + else + { + goto bad_code; + } + + //Convert RAM address space to Genesis location. + if (result->addr>=0xC000) + result->addr= 0xFF0000 | (0x1FFF & result->addr); + } - /* Decode and goodbye */ - hex_decode(code_to_pass, result); return; -bad_code: - - /* AGH! Invalid code! */ + bad_code: result->data = result->addr = -1; return; } - - -unsigned int PicoRead16(unsigned int a); -void PicoWrite16(unsigned int a, unsigned short d); - - void PicoPatchUnload(void) { if (PicoPatches != NULL) @@ -293,54 +433,84 @@ int PicoPatchLoad(const char *fname) /* to be called when the Rom is loaded and byteswapped */ void PicoPatchPrepare(void) { - int i; + int i; + int addr; - for (i = 0; i < PicoPatchCount; i++) - { - PicoPatches[i].addr &= ~1; - if (PicoPatches[i].addr < Pico.romsize) - PicoPatches[i].data_old = *(unsigned short *)(Pico.rom + PicoPatches[i].addr); - else - PicoPatches[i].data_old = (unsigned short) m68k_read16(PicoPatches[i].addr); - if (strstr(PicoPatches[i].name, "AUTO")) - PicoPatches[i].active = 1; - } + for (i = 0; i < PicoPatchCount; i++) + { + addr=PicoPatches[i].addr; + addr &= ~1; + if (addr < Pico.romsize) + PicoPatches[i].data_old = *(unsigned short *)(Pico.rom + addr); + else + { + if(!(PicoAHW & PAHW_SMS)) + PicoPatches[i].data_old = (unsigned short) m68k_read16(addr); + else + ; // wrong: PicoPatches[i].data_old = (unsigned char) PicoRead8_z80(addr); + } + if (strstr(PicoPatches[i].name, "AUTO")) + PicoPatches[i].active = 1; + } } void PicoPatchApply(void) { - int i, u; - unsigned int addr; + int i, u; + unsigned int addr; - for (i = 0; i < PicoPatchCount; i++) + for (i = 0; i < PicoPatchCount; i++) + { + addr = PicoPatches[i].addr; + + if (addr < Pico.romsize) + { + if (PicoPatches[i].active) + { + if (!(PicoAHW & PAHW_SMS)) + *(unsigned short *)(Pico.rom + addr) = PicoPatches[i].data; + else if (!PicoPatches[i].comp || PicoPatches[i].comp == *(char *)(Pico.rom + addr)) + *(char *)(Pico.rom + addr) = (char) PicoPatches[i].data; + } + else + { + // if current addr is not patched by older patch, write back original val + for (u = 0; u < i; u++) + if (PicoPatches[u].addr == addr) break; + if (u == i) { - addr = PicoPatches[i].addr; - if (addr < Pico.romsize) - { - if (PicoPatches[i].active) - *(unsigned short *)(Pico.rom + addr) = PicoPatches[i].data; - else { - // if current addr is not patched by older patch, write back original val - for (u = 0; u < i; u++) - if (PicoPatches[u].addr == addr) break; - if (u == i) - *(unsigned short *)(Pico.rom + addr) = PicoPatches[i].data_old; - } - // fprintf(stderr, "patched %i: %06x:%04x\n", PicoPatches[i].active, addr, - // *(unsigned short *)(Pico.rom + addr)); - } - else - { - if (PicoPatches[i].active) - m68k_write16(PicoPatches[i].addr,PicoPatches[i].data); - else { - // if current addr is not patched by older patch, write back original val - for (u = 0; u < i; u++) - if (PicoPatches[u].addr == addr) break; - if (u == i) - m68k_write16(PicoPatches[i].addr,PicoPatches[i].data_old); - } - } + if (!(PicoAHW & PAHW_SMS)) + *(unsigned short *)(Pico.rom + addr) = PicoPatches[i].data_old; + else + *(char *)(Pico.rom + addr) = (char) PicoPatches[i].data_old; } + } + // fprintf(stderr, "patched %i: %06x:%04x\n", PicoPatches[i].active, addr, + // *(unsigned short *)(Pico.rom + addr)); + } + else + { + if (PicoPatches[i].active) + { + if (!(PicoAHW & PAHW_SMS)) + m68k_write16(addr,PicoPatches[i].data); + else + ;// wrong: PicoWrite8_z80(addr,PicoPatches[i].data); + } + else + { + // if current addr is not patched by older patch, write back original val + for (u = 0; u < i; u++) + if (PicoPatches[u].addr == addr) break; + if (u == i) + { + if (!(PicoAHW & PAHW_SMS)) + m68k_write16(PicoPatches[i].addr,PicoPatches[i].data_old); + else + ;// wrong: PicoWrite8_z80(PicoPatches[i].addr,PicoPatches[i].data_old); + } + } + } + } } diff --git a/pico/patch.h b/pico/patch.h index 6e9420f8..9ab86d36 100644 --- a/pico/patch.h +++ b/pico/patch.h @@ -13,6 +13,7 @@ struct patch_inst unsigned int addr; unsigned short data; unsigned short data_old; + unsigned char comp; }; extern struct patch_inst *PicoPatches; From 9a570a67ca07aca39037024d37db1ad33712dbf1 Mon Sep 17 00:00:00 2001 From: iLag Date: Sat, 25 Mar 2017 19:35:36 -0700 Subject: [PATCH 0109/1110] Restore support for short GG cheats. --- pico/patch.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pico/patch.c b/pico/patch.c index 09626160..7f5d594b 100644 --- a/pico/patch.c +++ b/pico/patch.c @@ -174,7 +174,7 @@ void genie_decode_ms(const char *code, struct patch *result) /* Correct the address */ result->addr = ((result->addr >> 4) | (result->addr << 12 & 0xF000)) ^ 0xF000; /* Optional: 3 digits for comp */ - if (code[8]){ + if (code[8]=='-'){ for(i=8;i<11;++i) { if (i==9) continue; /* 2nd character is ignored */ @@ -314,7 +314,7 @@ void decode(const char* code, struct patch* result) //If Master System //Genie - if(len == 11 && code[3] == '-' && code[7] == '-') + if(len >= 7 && code[3] == '-') { genie_decode_ms(code, result); } From 126eb5f46978a7e97aae210971f1364ee548a646 Mon Sep 17 00:00:00 2001 From: iLag Date: Sat, 25 Mar 2017 20:28:08 -0700 Subject: [PATCH 0110/1110] Fix remaining bugs and fix indentation --- pico/patch.c | 263 ++++++++++++++++++++++++++------------------------- 1 file changed, 132 insertions(+), 131 deletions(-) diff --git a/pico/patch.c b/pico/patch.c index 7f5d594b..d534a2c9 100644 --- a/pico/patch.c +++ b/pico/patch.c @@ -27,9 +27,9 @@ struct patch { - unsigned int addr; - unsigned short data; - unsigned char comp; + unsigned int addr; + unsigned short data; + unsigned char comp; }; struct patch_inst *PicoPatches = NULL; @@ -174,14 +174,15 @@ void genie_decode_ms(const char *code, struct patch *result) /* Correct the address */ result->addr = ((result->addr >> 4) | (result->addr << 12 & 0xF000)) ^ 0xF000; /* Optional: 3 digits for comp */ - if (code[8]=='-'){ + if (code[7]=='-') + { for(i=8;i<11;++i) { if (i==9) continue; /* 2nd character is ignored */ if(!(x = strchr(hex_chars, code[i]))) { - result->addr = result->data = -1; - return; + result->addr = result->data = -1; + return; } result->comp = (result->comp << 4) | ((x - hex_chars) >> 1); } @@ -356,161 +357,161 @@ void decode(const char* code, struct patch* result) void PicoPatchUnload(void) { - if (PicoPatches != NULL) - { - free(PicoPatches); - PicoPatches = NULL; - } - PicoPatchCount = 0; + if (PicoPatches != NULL) + { + free(PicoPatches); + PicoPatches = NULL; + } + PicoPatchCount = 0; } int PicoPatchLoad(const char *fname) { - FILE *f; - char buff[256]; - struct patch pt; - int array_len = 0; + FILE *f; + char buff[256]; + struct patch pt; + int array_len = 0; - PicoPatchUnload(); + PicoPatchUnload(); - f = fopen(fname, "r"); - if (f == NULL) - { - return -1; - } + f = fopen(fname, "r"); + if (f == NULL) + { + return -1; + } - while (fgets(buff, sizeof(buff), f)) - { - int llen, clen; + while (fgets(buff, sizeof(buff), f)) + { + int llen, clen; - llen = strlen(buff); - for (clen = 0; clen < llen; clen++) - if (isspace_(buff[clen])) - break; - buff[clen] = 0; + llen = strlen(buff); + for (clen = 0; clen < llen; clen++) + if (isspace_(buff[clen])) + break; + buff[clen] = 0; - if (clen > 11 || clen < 8) - continue; + if (clen > 11 || clen < 8) + continue; - decode(buff, &pt); - if (pt.addr == (unsigned int)-1 || pt.data == (unsigned short)-1) - continue; + decode(buff, &pt); + if (pt.addr == (unsigned int)-1 || pt.data == (unsigned short)-1) + continue; - /* code was good, add it */ - if (array_len < PicoPatchCount + 1) - { - void *ptr; - array_len *= 2; - array_len++; - ptr = realloc(PicoPatches, array_len * sizeof(PicoPatches[0])); - if (ptr == NULL) break; - PicoPatches = ptr; - } - strcpy(PicoPatches[PicoPatchCount].code, buff); - /* strip */ - for (clen++; clen < llen; clen++) - if (!isspace_(buff[clen])) - break; - for (llen--; llen > 0; llen--) - if (!isspace_(buff[llen])) - break; - buff[llen+1] = 0; - strncpy(PicoPatches[PicoPatchCount].name, buff + clen, 51); - PicoPatches[PicoPatchCount].name[51] = 0; - PicoPatches[PicoPatchCount].active = 0; - PicoPatches[PicoPatchCount].addr = pt.addr; - PicoPatches[PicoPatchCount].data = pt.data; - PicoPatches[PicoPatchCount].data_old = 0; - PicoPatchCount++; - // fprintf(stderr, "loaded patch #%i: %06x:%04x \"%s\"\n", PicoPatchCount-1, pt.addr, pt.data, - // PicoPatches[PicoPatchCount-1].name); - } - fclose(f); + /* code was good, add it */ + if (array_len < PicoPatchCount + 1) + { + void *ptr; + array_len *= 2; + array_len++; + ptr = realloc(PicoPatches, array_len * sizeof(PicoPatches[0])); + if (ptr == NULL) break; + PicoPatches = ptr; + } + strcpy(PicoPatches[PicoPatchCount].code, buff); + /* strip */ + for (clen++; clen < llen; clen++) + if (!isspace_(buff[clen])) + break; + for (llen--; llen > 0; llen--) + if (!isspace_(buff[llen])) + break; + buff[llen+1] = 0; + strncpy(PicoPatches[PicoPatchCount].name, buff + clen, 51); + PicoPatches[PicoPatchCount].name[51] = 0; + PicoPatches[PicoPatchCount].active = 0; + PicoPatches[PicoPatchCount].addr = pt.addr; + PicoPatches[PicoPatchCount].data = pt.data; + PicoPatches[PicoPatchCount].data_old = 0; + PicoPatchCount++; + // fprintf(stderr, "loaded patch #%i: %06x:%04x \"%s\"\n", PicoPatchCount-1, pt.addr, pt.data, + // PicoPatches[PicoPatchCount-1].name); + } + fclose(f); - return 0; + return 0; } /* to be called when the Rom is loaded and byteswapped */ void PicoPatchPrepare(void) { - int i; - int addr; + int i; + int addr; - for (i = 0; i < PicoPatchCount; i++) - { - addr=PicoPatches[i].addr; - addr &= ~1; - if (addr < Pico.romsize) - PicoPatches[i].data_old = *(unsigned short *)(Pico.rom + addr); - else - { - if(!(PicoAHW & PAHW_SMS)) - PicoPatches[i].data_old = (unsigned short) m68k_read16(addr); + for (i = 0; i < PicoPatchCount; i++) + { + addr=PicoPatches[i].addr; + addr &= ~1; + if (addr < Pico.romsize) + PicoPatches[i].data_old = *(unsigned short *)(Pico.rom + addr); else - ; // wrong: PicoPatches[i].data_old = (unsigned char) PicoRead8_z80(addr); - } - if (strstr(PicoPatches[i].name, "AUTO")) - PicoPatches[i].active = 1; - } + { + if(!(PicoAHW & PAHW_SMS)) + PicoPatches[i].data_old = (unsigned short) m68k_read16(addr); + else + ;// wrong: PicoPatches[i].data_old = (unsigned char) PicoRead8_z80(addr); + } + if (strstr(PicoPatches[i].name, "AUTO")) + PicoPatches[i].active = 1; + } } void PicoPatchApply(void) { - int i, u; - unsigned int addr; + int i, u; + unsigned int addr; - for (i = 0; i < PicoPatchCount; i++) - { - addr = PicoPatches[i].addr; + for (i = 0; i < PicoPatchCount; i++) + { + addr = PicoPatches[i].addr; - if (addr < Pico.romsize) - { - if (PicoPatches[i].active) + if (addr < Pico.romsize) { - if (!(PicoAHW & PAHW_SMS)) - *(unsigned short *)(Pico.rom + addr) = PicoPatches[i].data; - else if (!PicoPatches[i].comp || PicoPatches[i].comp == *(char *)(Pico.rom + addr)) - *(char *)(Pico.rom + addr) = (char) PicoPatches[i].data; - } - else - { - // if current addr is not patched by older patch, write back original val - for (u = 0; u < i; u++) - if (PicoPatches[u].addr == addr) break; - if (u == i) - { - if (!(PicoAHW & PAHW_SMS)) - *(unsigned short *)(Pico.rom + addr) = PicoPatches[i].data_old; - else - *(char *)(Pico.rom + addr) = (char) PicoPatches[i].data_old; - } - } + if (PicoPatches[i].active) + { + if (!(PicoAHW & PAHW_SMS)) + *(unsigned short *)(Pico.rom + addr) = PicoPatches[i].data; + else if (!PicoPatches[i].comp || PicoPatches[i].comp == *(char *)(Pico.rom + addr)) + *(char *)(Pico.rom + addr) = (char) PicoPatches[i].data; + } + else + { + // if current addr is not patched by older patch, write back original val + for (u = 0; u < i; u++) + if (PicoPatches[u].addr == addr) break; + if (u == i) + { + if (!(PicoAHW & PAHW_SMS)) + *(unsigned short *)(Pico.rom + addr) = PicoPatches[i].data_old; + else + *(char *)(Pico.rom + addr) = (char) PicoPatches[i].data_old; + } + } // fprintf(stderr, "patched %i: %06x:%04x\n", PicoPatches[i].active, addr, - // *(unsigned short *)(Pico.rom + addr)); - } - else - { - if (PicoPatches[i].active) - { - if (!(PicoAHW & PAHW_SMS)) - m68k_write16(addr,PicoPatches[i].data); - else - ;// wrong: PicoWrite8_z80(addr,PicoPatches[i].data); + // *(unsigned short *)(Pico.rom + addr)); } else { - // if current addr is not patched by older patch, write back original val - for (u = 0; u < i; u++) - if (PicoPatches[u].addr == addr) break; - if (u == i) - { - if (!(PicoAHW & PAHW_SMS)) - m68k_write16(PicoPatches[i].addr,PicoPatches[i].data_old); - else - ;// wrong: PicoWrite8_z80(PicoPatches[i].addr,PicoPatches[i].data_old); - } + if (PicoPatches[i].active) + { + if (!(PicoAHW & PAHW_SMS)) + m68k_write16(addr,PicoPatches[i].data); + else + ;// wrong: PicoWrite8_z80(addr,PicoPatches[i].data); + } + else + { + // if current addr is not patched by older patch, write back original val + for (u = 0; u < i; u++) + if (PicoPatches[u].addr == addr) break; + if (u == i) + { + if (!(PicoAHW & PAHW_SMS)) + m68k_write16(PicoPatches[i].addr,PicoPatches[i].data_old); + else + ;// wrong: PicoWrite8_z80(PicoPatches[i].addr,PicoPatches[i].data_old); + } + } } - } - } + } } From 7612bf90bef4f54e60865db057040b62c289ea34 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 13 Oct 2017 00:39:51 +0300 Subject: [PATCH 0111/1110] re-import all libretro code from it's fork Verbatim copy from https://github.com/libretro/picodrive/ commit 9ae88ef15ff00cacc3877c7ecc13b0092bab50b8 , so look there for the history of libretro specific changes. Unfortunately there is too much noise and divergence to merge this in a proper way. --- Makefile | 6 +- Makefile.libretro | 520 ++++- jni/Android.mk | 11 +- platform/libretro.c | 915 -------- platform/libretro.h | 787 ------- platform/libretro/3ds/3ds_utils.c | 78 + platform/libretro/3ds/3ds_utils.h | 16 + platform/libretro/libretro.c | 1421 ++++++++++++ platform/libretro/libretro.h | 1926 +++++++++++++++++ platform/libretro/msvc/msvc-2003-xbox1.bat | 47 + platform/libretro/msvc/msvc-2010-360.bat | 124 ++ platform/libretro/msvc/msvc-2010.bat | 124 ++ platform/libretro/msvc/msvc-2010.sln | 20 + platform/libretro/msvc/msvc-2010/libretro.def | 27 + .../libretro/msvc/msvc-2010/msvc-2010.vcxproj | 157 ++ .../msvc/msvc-2010/msvc-2010.vcxproj.filters | 277 +++ platform/libretro/psp/draw_amips.s | 1756 +++++++++++++++ 17 files changed, 6402 insertions(+), 1810 deletions(-) delete mode 100644 platform/libretro.c delete mode 100644 platform/libretro.h create mode 100644 platform/libretro/3ds/3ds_utils.c create mode 100644 platform/libretro/3ds/3ds_utils.h create mode 100644 platform/libretro/libretro.c create mode 100644 platform/libretro/libretro.h create mode 100644 platform/libretro/msvc/msvc-2003-xbox1.bat create mode 100644 platform/libretro/msvc/msvc-2010-360.bat create mode 100644 platform/libretro/msvc/msvc-2010.bat create mode 100644 platform/libretro/msvc/msvc-2010.sln create mode 100644 platform/libretro/msvc/msvc-2010/libretro.def create mode 100644 platform/libretro/msvc/msvc-2010/msvc-2010.vcxproj create mode 100644 platform/libretro/msvc/msvc-2010/msvc-2010.vcxproj.filters create mode 100644 platform/libretro/psp/draw_amips.s diff --git a/Makefile b/Makefile index ff008506..f0cd64ef 100644 --- a/Makefile +++ b/Makefile @@ -111,7 +111,7 @@ USE_FRONTEND = 1 PLATFORM_MP3 = 1 endif ifeq "$(PLATFORM)" "libretro" -OBJS += platform/libretro.o +OBJS += platform/libretro/libretro.o endif ifeq "$(USE_FRONTEND)" "1" @@ -181,7 +181,11 @@ clean: $(RM) -r .opk_data $(TARGET): $(OBJS) +ifeq ($(STATIC_LINKING), 1) + $(AR) rcs $@ $^ +else $(CC) -o $@ $(CFLAGS) $^ $(LDFLAGS) $(LDLIBS) +endif pprof: platform/linux/pprof.c $(CC) -O2 -ggdb -DPPROF -DPPROF_TOOL -I../../ -I. $^ -o $@ diff --git a/Makefile.libretro b/Makefile.libretro index 5d5f4729..14f2dc50 100644 --- a/Makefile.libretro +++ b/Makefile.libretro @@ -1,16 +1,20 @@ # Makefile for PicoDrive (libretro) ifeq ($(platform),) -platform = unix -ifeq ($(shell uname -a),) - platform = win -else ifneq ($(findstring MINGW,$(shell uname -a)),) - platform = win -else ifneq ($(findstring Darwin,$(shell uname -a)),) - platform = osx -else ifneq ($(findstring win,$(shell uname -a)),) - platform = win -endif + platform = unix + ifeq ($(shell uname -a),) + platform = win + else ifneq ($(findstring MINGW,$(shell uname -a)),) + platform = win + else ifneq ($(findstring Darwin,$(shell uname -a)),) + platform = osx + arch = intel + ifeq ($(shell uname -p),powerpc) + arch = ppc + endif + else ifneq ($(findstring win,$(shell uname -a)),) + platform = win + endif endif CC ?= gcc @@ -19,7 +23,13 @@ AS ?= as CC_AS ?= $(CC) CFLAGS ?= +STATIC_LINKING:= 0 TARGET_NAME := picodrive +LIBM := -lm +GIT_VERSION := " $(shell git rev-parse --short HEAD || echo unknown)" +ifneq ($(GIT_VERSION)," unknown") + CFLAGS += -DGIT_VERSION=\"$(GIT_VERSION)\" +endif asm_memory = 0 asm_render = 0 @@ -28,108 +38,410 @@ asm_misc = 0 asm_cdmemory = 0 asm_mix = 0 +fpic := + +ifeq ($(STATIC_LINKING),1) +EXT=a +endif + +# Unix ifeq ($(platform), unix) - TARGET := $(TARGET_NAME)_libretro.so - SHARED := -shared + EXT ?= so + TARGET := $(TARGET_NAME)_libretro.$(EXT) + fpic := -fPIC + SHARED := -shared + DONT_COMPILE_IN_ZLIB = 1 + CFLAGS += -DFAMEC_NO_GOTOS + +# Portable Linux +else ifeq ($(platform), linux-portable) + EXT ?= so + TARGET := $(TARGET_NAME)_libretro.$(EXT) + SHARED := -shared -nostdlib + fpic := -fPIC + LIBM := + DONT_COMPILE_IN_ZLIB = 1 + CFLAGS += -DFAMEC_NO_GOTOS + +# OS X else ifeq ($(platform), osx) - TARGET := $(TARGET_NAME)_libretro.dylib - SHARED := -dynamiclib -else ifeq ($(platform), ios) - TARGET := $(TARGET_NAME)_libretro_ios.dylib - SHARED := -dynamiclib + EXT ?= dylib + TARGET := $(TARGET_NAME)_libretro.$(EXT) + SHARED := -dynamiclib + fpic := -fPIC + APPLE := 1 + arch = intel + ifeq ($(shell uname -p),powerpc) + arch = ppc + endif + ifeq ($(arch),ppc) + CFLAGS += -DBLARGG_BIG_ENDIAN=1 -D__ppc__ -DFAMEC_NO_GOTOS + endif + OSXVER = `sw_vers -productVersion | cut -d. -f 2` + OSX_LT_MAVERICKS = `(( $(OSXVER) <= 9)) && echo "YES"` + SHARED += -mmacosx-version-min=10.1 - CC = clang -arch armv7 -isysroot $(IOSSDK) -miphoneos-version-min=5.0 - CXX = clang++ -arch armv7 -isysroot $(IOSSDK) -miphoneos-version-min=5.0 - CC_AS = perl ./tools/gas-preprocessor.pl $(CC) -miphoneos-version-min=5.0 - CFLAGS += -mcpu=cortex-a8 -mtune=cortex-a8 -mfpu=neon -marm - ASFLAGS += -mcpu=cortex-a8 -mtune=cortex-a8 -mfpu=neon - CFLAGS += -DIOS -miphoneos-version-min=5.0 +else ifeq ($(platform), staticios) + TARGET := $(TARGET_NAME)_libretro_ios.a + APPLE := 1 + ifeq ($(IOSSDK),) + IOSSDK := $(shell xcodebuild -version -sdk iphoneos Path) + endif + CC = clang -arch armv7 -arch arm64 -isysroot $(IOSSDK) + CXX = clang++ -arch armv7 -arch arm64 -isysroot $(IOSSDK) + CC_AS = perl ./tools/gas-preprocessor.pl $(CC) + CFLAGS += -marm + CFLAGS += -DIOS - ARCH := arm + CC += -miphoneos-version-min=8.0 + CXX += -miphoneos-version-min=8.0 + CC_AS += -miphoneos-version-min=8.0 + CFLAGS += -miphoneos-version-min=8.0 + ARCH := arm - use_cyclone = 0 - use_fame = 1 - use_drz80 = 0 - use_cz80 = 1 + STATIC_LINKING = 1 + use_cyclone = 0 + use_fame = 1 + use_drz80 = 0 + use_cz80 = 1 + use_sh2drc = 0 + use_svpdrc = 0 + +# iOS +else ifneq (,$(findstring ios,$(platform))) + TARGET := $(TARGET_NAME)_libretro_ios.dylib + SHARED := -dynamiclib + fpic := -fPIC + APPLE := 1 + ifeq ($(IOSSDK),) + IOSSDK := $(shell xcodebuild -version -sdk iphoneos Path) + endif + CC = clang -arch armv7 -isysroot $(IOSSDK) + CXX = clang++ -arch armv7 -isysroot $(IOSSDK) + CC_AS = perl ./tools/gas-preprocessor.pl $(CC) + CFLAGS += -mcpu=cortex-a8 -mtune=cortex-a8 -mfpu=neon -marm + ASFLAGS += -mcpu=cortex-a8 -mtune=cortex-a8 -mfpu=neon + CFLAGS += -DIOS + +ifeq ($(platform),ios9) + CC += -miphoneos-version-min=8.0 + CXX += -miphoneos-version-min=8.0 + CC_AS += -miphoneos-version-min=8.0 + CFLAGS += -miphoneos-version-min=8.0 +else + CC += -miphoneos-version-min=5.0 + CXX += -miphoneos-version-min=5.0 + CC_AS += -miphoneos-version-min=5.0 + CFLAGS += -miphoneos-version-min=5.0 +endif + ARCH := arm + + use_cyclone = 0 + use_fame = 1 + use_drz80 = 0 + use_cz80 = 1 + use_sh2drc = 1 + use_svpdrc = 1 + +# PS3 +else ifeq ($(platform), ps3) + TARGET := $(TARGET_NAME)_libretro_$(platform).a + CC = $(CELL_SDK)/host-win32/ppu/bin/ppu-lv2-gcc.exe + AR = $(CELL_SDK)/host-win32/ppu/bin/ppu-lv2-ar.exe + CFLAGS += -DBLARGG_BIG_ENDIAN=1 -D__ppc__ -DFAMEC_NO_GOTOS + STATIC_LINKING = 1 + NO_MMAP = 1 + DONT_COMPILE_IN_ZLIB = 1 + + asm_memory = 0 + asm_render = 0 + asm_ym2612 = 0 + asm_misc = 0 + asm_cdpico = 0 + asm_cdmemory = 0 + asm_mix = 0 + use_cyclone = 0 + use_fame = 1 + use_drz80 = 0 + use_cz80 = 1 + +# sncps3 +else ifeq ($(platform), sncps3) + TARGET := $(TARGET_NAME)_libretro_ps3.a + CC = $(CELL_SDK)/host-win32/sn/bin/ps3ppusnc.exe + AR = $(CELL_SDK)/host-win32/sn/bin/ps3snarl.exe + CFLAGS += -DBLARGG_BIG_ENDIAN=1 -D__ppc__ -DFAMEC_NO_GOTOS + STATIC_LINKING = 1 + NO_MMAP = 1 + DONT_COMPILE_IN_ZLIB = 1 + + asm_memory = 0 + asm_render = 0 + asm_ym2612 = 0 + asm_misc = 0 + asm_cdpico = 0 + asm_cdmemory = 0 + asm_mix = 0 + use_cyclone = 0 + use_fame = 1 + use_drz80 = 0 + use_cz80 = 1 + +# Lightweight PS3 Homebrew SDK +else ifeq ($(platform), psl1ght) + TARGET := $(TARGET_NAME)_libretro_$(platform).a + CC = $(PS3DEV)/ppu/bin/ppu-gcc$(EXE_EXT) + AR = $(PS3DEV)/ppu/bin/ppu-ar$(EXE_EXT) + CFLAGS += -DBLARGG_BIG_ENDIAN=1 -D__ppc__ -DFAMEC_NO_GOTOS + STATIC_LINKING = 1 + NO_MMAP = 1 + DONT_COMPILE_IN_ZLIB = 1 + + asm_memory = 0 + asm_render = 0 + asm_ym2612 = 0 + asm_misc = 0 + asm_cdpico = 0 + asm_cdmemory = 0 + asm_mix = 0 + use_cyclone = 0 + use_fame = 1 + use_drz80 = 0 + use_cz80 = 1 + +# PSP +else ifeq ($(platform), psp1) + TARGET := $(TARGET_NAME)_libretro_$(platform).a + CC = psp-gcc$(EXE_EXT) + AR = psp-ar$(EXE_EXT) + CFLAGS += -G0 -ftracer + CFLAGS += -DPSP -D_ASM_DRAW_C_AMIPS + STATIC_LINKING = 1 + NO_MMAP = 1 + DONT_COMPILE_IN_ZLIB = 1 + + asm_memory = 0 + asm_render = 1 + asm_ym2612 = 0 + asm_misc = 0 + asm_cdpico = 0 + asm_cdmemory = 0 + asm_mix = 0 + use_cyclone = 0 + use_fame = 1 + use_drz80 = 0 + use_cz80 = 1 + + OBJS +=platform/libretro/psp/draw_amips.o + +# CTR (3DS) +else ifeq ($(platform), ctr) + TARGET := $(TARGET_NAME)_libretro_$(platform).a + CC = $(DEVKITARM)/bin/arm-none-eabi-gcc$(EXE_EXT) + CXX = $(DEVKITARM)/bin/arm-none-eabi-g++$(EXE_EXT) + AR = $(DEVKITARM)/bin/arm-none-eabi-ar$(EXE_EXT) + CFLAGS += -DARM11 -D_3DS + CFLAGS += -march=armv6k -mtune=mpcore -mfloat-abi=hard -marm -mfpu=vfp + CFLAGS += -Wall -mword-relocations + CFLAGS += -fomit-frame-pointer -ffast-math + STATIC_LINKING = 1 + NO_MMAP = 1 + DONT_COMPILE_IN_ZLIB = 1 + ARCH = arm + ARM_ASM = 1 + + asm_memory = 1 + asm_render = 1 + asm_ym2612 = 1 + asm_misc = 1 + asm_cdpico = 1 + asm_cdmemory = 1 + asm_mix = 1 + + use_cyclone = 1 + use_fame = 0 + use_drz80 = 1 + use_cz80 = 0 use_sh2drc = 1 use_svpdrc = 1 -else ifeq ($(platform), ps3) - TARGET := $(TARGET_NAME)_libretro_ps3.a - CC = $(CELL_SDK)/host-win32/ppu/bin/ppu-lv2-gcc.exe - AR = $(CELL_SDK)/host-win32/ppu/bin/ppu-lv2-ar.exe - CFLAGS += -DBLARGG_BIG_ENDIAN=1 -D__ppc__ -else ifeq ($(platform), sncps3) - TARGET := $(TARGET_NAME)_libretro_ps3.a - CC = $(CELL_SDK)/host-win32/sn/bin/ps3ppusnc.exe - AR = $(CELL_SDK)/host-win32/sn/bin/ps3snarl.exe - CFLAGS += -DBLARGG_BIG_ENDIAN=1 -D__ppc__ -else ifeq ($(platform), psl1ght) - TARGET := $(TARGET_NAME)_libretro_psl1ght.a - CC = $(PS3DEV)/ppu/bin/ppu-gcc$(EXE_EXT) - AR = $(PS3DEV)/ppu/bin/ppu-ar$(EXE_EXT) - CFLAGS += -DBLARGG_BIG_ENDIAN=1 -D__ppc__ -else ifeq ($(platform), psp1) - TARGET := $(TARGET_NAME)_libretro_psp1.a - CC = psp-gcc$(EXE_EXT) - AR = psp-ar$(EXE_EXT) - CFLAGS += -DPSP -G0 -else ifeq ($(platform), xenon) - TARGET := $(TARGET_NAME)_libretro_xenon360.a - CC = xenon-gcc$(EXE_EXT) - AR = xenon-ar$(EXE_EXT) - CFLAGS += -D__LIBXENON__ -m32 -D__ppc__ -else ifeq ($(platform), ngc) - TARGET := $(TARGET_NAME)_libretro_ngc.a - CC = $(DEVKITPPC)/bin/powerpc-eabi-gcc$(EXE_EXT) - AR = $(DEVKITPPC)/bin/powerpc-eabi-ar$(EXE_EXT) - CFLAGS += -DGEKKO -DHW_DOL -mrvl -mcpu=750 -meabi -mhard-float -DBLARGG_BIG_ENDIAN=1 -D__ppc__ -else ifeq ($(platform), wii) - TARGET := libretro_$(TARGET_NAME)_wii.a - CC = $(DEVKITPPC)/bin/powerpc-eabi-gcc$(EXE_EXT) - AR = $(DEVKITPPC)/bin/powerpc-eabi-ar$(EXE_EXT) - CFLAGS += -DGEKKO -DHW_RVL -mrvl -mcpu=750 -meabi -mhard-float -DBLARGG_BIG_ENDIAN=1 -D__ppc__ -else ifeq ($(platform), qnx) - TARGET := $(TARGET_NAME)_libretro_qnx.so - CC = qcc -Vgcc_ntoarmv7le - CC_AS = $(CC) - CFLAGS += -DBASE_ADDR_FIXED=0 -D__BLACKBERRY_QNX__ -marm -mcpu=cortex-a9 -mtune=cortex-a9 -mfpu=neon -mfloat-abi=softfp - ASFLAGS += -mcpu=cortex-a9 -mfpu=neon -mfloat-abi=softfp + OBJS +=platform/libretro/3ds/3ds_utils.o +# Raspberry Pi (original model) Raspbian +else ifeq ($(platform), raspberrypi) + CFLAGS += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6j + CFLAGS += -Wall -mword-relocations + CFLAGS += -fomit-frame-pointer -ffast-math ARCH = arm ARM_ASM = 1 -else ifneq (,$(findstring armv,$(platform))) + TARGET := $(TARGET_NAME)_libretro.so - SHARED := -shared -Wl,--no-undefined -ifneq (,$(findstring cortexa8,$(platform))) - CFLAGS += -marm -mcpu=cortex-a8 - ASFLAGS += -mcpu=cortex-a8 -else ifneq (,$(findstring cortexa9,$(platform))) - CFLAGS += -marm -mcpu=cortex-a9 - ASFLAGS += -mcpu=cortex-a9 -endif - CFLAGS += -marm -ifneq (,$(findstring neon,$(platform))) - CFLAGS += -mfpu=neon - ASFLAGS += -mfpu=neon -endif -ifneq (,$(findstring softfloat,$(platform))) - CFLAGS += -mfloat-abi=softfp - ASFLAGS += -mfloat-abi=softfp -else ifneq (,$(findstring hardfloat,$(platform))) - CFLAGS += -mfloat-abi=hard - ASFLAGS += -mfloat-abi=hard -endif -ifneq (,$(findstring armasm,$(platform))) - ARM_ASM = 1 -endif - ARCH = arm + SHARED := -shared + fpic := -fPIC + DONT_COMPILE_IN_ZLIB = 1 + + asm_memory = 1 + asm_render = 1 + asm_ym2612 = 1 + asm_misc = 1 + asm_cdpico = 1 + asm_cdmemory = 1 + asm_mix = 1 + + use_cyclone = 1 + use_fame = 0 + use_drz80 = 1 + use_cz80 = 0 + use_sh2drc = 1 + use_svpdrc = 1 + +# Vita +else ifeq ($(platform), vita) + TARGET := $(TARGET_NAME)_libretro_$(platform).a + CC = arm-vita-eabi-gcc$(EXE_EXT) + AR = arm-vita-eabi-ar$(EXE_EXT) + CFLAGS += -DVITA + CFLAGS += -marm -mfpu=neon -mcpu=cortex-a9 -march=armv7-a -mfloat-abi=hard -ffast-math + CFLAGS += -fno-asynchronous-unwind-tables -ftree-vectorize -funroll-loops + CFLAGS += -mword-relocations -fno-unwind-tables + CFLAGS += -fno-optimize-sibling-calls + STATIC_LINKING = 1 + NO_MMAP = 1 + DONT_COMPILE_IN_ZLIB = 1 + ARCH = arm + + asm_memory = 1 + asm_render = 1 + asm_ym2612 = 1 + asm_misc = 1 + asm_cdpico = 1 + asm_cdmemory = 1 + asm_mix = 1 + use_cyclone = 1 + use_fame = 0 + use_drz80 = 1 + use_cz80 = 0 + use_sh2drc = 1 + use_svpdrc = 1 + +# Xbox 360 +else ifeq ($(platform), xenon) + TARGET := $(TARGET_NAME)_libretro_xenon360.a + CC = xenon-gcc$(EXE_EXT) + AR = xenon-ar$(EXE_EXT) + CFLAGS += -D__LIBXENON__ -m32 -D__ppc__ + +# Nintendo Game Cube +else ifeq ($(platform), ngc) + TARGET := $(TARGET_NAME)_libretro_$(platform).a + CC = $(DEVKITPPC)/bin/powerpc-eabi-gcc$(EXE_EXT) + AR = $(DEVKITPPC)/bin/powerpc-eabi-ar$(EXE_EXT) + CFLAGS += -DGEKKO -DHW_DOL -mrvl -mcpu=750 -meabi -mhard-float -DBLARGG_BIG_ENDIAN=1 -D__ppc__ + +# Nintendo Wii +else ifeq ($(platform), wii) + TARGET := $(TARGET_NAME)_libretro_$(platform).a + CC = $(DEVKITPPC)/bin/powerpc-eabi-gcc$(EXE_EXT) + AR = $(DEVKITPPC)/bin/powerpc-eabi-ar$(EXE_EXT) + CFLAGS += -DGEKKO -DHW_RVL -mrvl -mcpu=750 -meabi -mhard-float -DBLARGG_BIG_ENDIAN=1 -D__ppc__ + +# QNX +else ifeq ($(platform), qnx) + TARGET := $(TARGET_NAME)_libretro_$(platform).so + fpic := -fPIC + CC = qcc -Vgcc_ntoarmv7le + CC_AS = $(CC) + CFLAGS += -DBASE_ADDR_FIXED=0 -D__BLACKBERRY_QNX__ -marm -mcpu=cortex-a9 -mtune=cortex-a9 -mfpu=neon -mfloat-abi=softfp + ASFLAGS += -mcpu=cortex-a9 -mfpu=neon -mfloat-abi=softfp + ARCH = arm + ARM_ASM = 1 + + use_cyclone = 0 + use_fame = 1 + use_drz80 = 0 + use_cz80 = 1 + use_sh2drc = 1 + use_svpdrc = 1 + +# ARM +else ifneq (,$(findstring armv,$(platform))) + TARGET := $(TARGET_NAME)_libretro.so + SHARED := -shared -Wl,--no-undefined,-Bsymbolic + fpic := -fPIC + ifneq (,$(findstring cortexa5,$(platform))) + CFLAGS += -marm -mcpu=cortex-a5 + ASFLAGS += -mcpu=cortex-a5 + else ifneq (,$(findstring cortexa8,$(platform))) + CFLAGS += -marm -mcpu=cortex-a8 + ASFLAGS += -mcpu=cortex-a8 + else ifneq (,$(findstring cortexa9,$(platform))) + CFLAGS += -marm -mcpu=cortex-a9 + ASFLAGS += -mcpu=cortex-a9 + else ifneq (,$(findstring cortexa15a7,$(platform))) + CFLAGS += -marm -mcpu=cortex-a15.cortex-a7 + ASFLAGS += -mcpu=cortex-a15.cortex-a7 + else + CFLAGS += -marm + endif + ifneq (,$(findstring neon,$(platform))) + CFLAGS += -mfpu=neon + ASFLAGS += -mfpu=neon + endif + ifneq (,$(findstring softfloat,$(platform))) + CFLAGS += -mfloat-abi=softfp + ASFLAGS += -mfloat-abi=softfp + else ifneq (,$(findstring hardfloat,$(platform))) + CFLAGS += -mfloat-abi=hard + ASFLAGS += -mfloat-abi=hard + endif + ifneq (,$(findstring armasm,$(platform))) + ARM_ASM = 1 + endif + ARCH = arm + +# Emscripten +else ifeq ($(platform), emscripten) + TARGET := $(TARGET_NAME)_libretro_$(platform).bc + STATIC_LINKING = 1 + DONT_COMPILE_IN_ZLIB = 1 + +# GCW0 +else ifeq ($(platform), gcw0) + TARGET := $(TARGET_NAME)_libretro.so + CC = /opt/gcw0-toolchain/usr/bin/mipsel-linux-gcc + AR = /opt/gcw0-toolchain/usr/bin/mipsel-linux-ar + SHARED := -shared -nostdlib + fpic := -fPIC + LIBM := + DONT_COMPILE_IN_ZLIB = 1 + CFLAGS += -ffast-math -march=mips32 -mtune=mips32r2 -mhard-float + + asm_memory = 0 + asm_render = 0 + asm_ym2612 = 0 + asm_misc = 0 + asm_cdpico = 0 + asm_cdmemory = 0 + asm_mix = 0 + use_cyclone = 0 + use_fame = 1 + use_drz80 = 0 + use_cz80 = 1 + +# Windows else - TARGET := $(TARGET_NAME)_libretro.dll - CC = gcc - LD_FLAGS := -fPIC - SHARED := -shared -static-libgcc -static-libstdc++ - CFLAGS += -D__WIN32__ -D__WIN32_LIBRETRO__ + TARGET := $(TARGET_NAME)_libretro.dll + CC = gcc + fpic := -fPIC + SHARED := -shared -static-libgcc -static-libstdc++ + CFLAGS += -D__WIN32__ -D__WIN32_LIBRETRO__ + +endif + +CFLAGS += -DNO_ZLIB + +ifeq ($(NO_MMAP),1) + CFLAGS += -DNO_MMAP endif ifeq ($(ARM_ASM),1) @@ -141,10 +453,14 @@ asm_cdmemory = 1 asm_mix = 1 endif -CFLAGS += -fPIC -LDLIBS += -lm +CFLAGS += $(fpic) + +ifeq ($(findstring Haiku,$(shell uname -a)),) + LDLIBS += $(LIBM) +endif + SHARED ?= -shared -LDFLAGS += $(SHARED) +LDFLAGS += $(SHARED) $(fpic) PLATFORM = libretro NO_CONFIG_MAK = yes diff --git a/jni/Android.mk b/jni/Android.mk index 042c1f74..a0f5dc8d 100644 --- a/jni/Android.mk +++ b/jni/Android.mk @@ -2,12 +2,13 @@ LOCAL_PATH := $(call my-dir) include $(CLEAR_VARS) -ifeq ($(NEON_BUILD)$(TARGET_ARCH_ABI),1armeabi-v7a) - LOCAL_MODULE := retro_picodrive-neon -else - LOCAL_MODULE := retro_picodrive +GIT_VERSION := " $(shell git rev-parse --short HEAD || echo unknown)" +ifneq ($(GIT_VERSION)," unknown") + LOCAL_CFLAGS += -DGIT_VERSION=\"$(GIT_VERSION)\" endif +LOCAL_MODULE := retro + R := ../ FR := $(LOCAL_PATH)/$(R) @@ -64,7 +65,7 @@ ARCH := $(TARGET_ARCH) include $(R)platform/common/common.mak LOCAL_SRC_FILES += $(SRCS_COMMON) -LOCAL_SRC_FILES += $(R)platform/libretro.c +LOCAL_SRC_FILES += $(R)platform/libretro/libretro.c LOCAL_SRC_FILES += $(R)platform/common/mp3.c LOCAL_SRC_FILES += $(R)platform/common/mp3_dummy.c diff --git a/platform/libretro.c b/platform/libretro.c deleted file mode 100644 index 23f0694e..00000000 --- a/platform/libretro.c +++ /dev/null @@ -1,915 +0,0 @@ -/* - * libretro core glue for PicoDrive - * (C) notaz, 2013 - * - * This work is licensed under the terms of MAME license. - * See COPYING file in the top-level directory. - */ - -#define _GNU_SOURCE 1 // mremap -#include -#include -#include -#ifndef _WIN32 -#include -#else -#include -#include -#include -#endif -#include -#ifdef __MACH__ -#include -#endif - -#include -#include -#include "common/input_pico.h" -#include "common/version.h" -#include "libretro.h" - -static retro_video_refresh_t video_cb; -static retro_input_poll_t input_poll_cb; -static retro_input_state_t input_state_cb; -static retro_environment_t environ_cb; -static retro_audio_sample_batch_t audio_batch_cb; - -static FILE *emu_log; - -#define VOUT_MAX_WIDTH 320 -#define VOUT_MAX_HEIGHT 240 -static void *vout_buf; -static int vout_width, vout_height, vout_offset; - -static short __attribute__((aligned(4))) sndBuffer[2*44100/50]; - -static void snd_write(int len); - -#ifdef _WIN32 -#define SLASH '\\' -#else -#define SLASH '/' -#endif - -/* functions called by the core */ - -void cache_flush_d_inval_i(void *start, void *end) -{ -#ifdef __arm__ -#if defined(__BLACKBERRY_QNX__) - msync(start, end - start, MS_SYNC | MS_CACHE_ONLY | MS_INVALIDATE_ICACHE); -#elif defined(__MACH__) - size_t len = (char *)end - (char *)start; - sys_dcache_flush(start, len); - sys_icache_invalidate(start, len); -#else - __clear_cache(start, end); -#endif -#endif -} - -#ifdef _WIN32 -/* mmap() replacement for Windows - * - * Author: Mike Frysinger - * Placed into the public domain - */ - -/* References: - * CreateFileMapping: http://msdn.microsoft.com/en-us/library/aa366537(VS.85).aspx - * CloseHandle: http://msdn.microsoft.com/en-us/library/ms724211(VS.85).aspx - * MapViewOfFile: http://msdn.microsoft.com/en-us/library/aa366761(VS.85).aspx - * UnmapViewOfFile: http://msdn.microsoft.com/en-us/library/aa366882(VS.85).aspx - */ - -#define PROT_READ 0x1 -#define PROT_WRITE 0x2 -/* This flag is only available in WinXP+ */ -#ifdef FILE_MAP_EXECUTE -#define PROT_EXEC 0x4 -#else -#define PROT_EXEC 0x0 -#define FILE_MAP_EXECUTE 0 -#endif - -#define MAP_SHARED 0x01 -#define MAP_PRIVATE 0x02 -#define MAP_ANONYMOUS 0x20 -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FAILED ((void *) -1) - -#ifdef __USE_FILE_OFFSET64 -# define DWORD_HI(x) (x >> 32) -# define DWORD_LO(x) ((x) & 0xffffffff) -#else -# define DWORD_HI(x) (0) -# define DWORD_LO(x) (x) -#endif - -static void *mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset) -{ - if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) - return MAP_FAILED; - if (fd == -1) { - if (!(flags & MAP_ANON) || offset) - return MAP_FAILED; - } else if (flags & MAP_ANON) - return MAP_FAILED; - - DWORD flProtect; - if (prot & PROT_WRITE) { - if (prot & PROT_EXEC) - flProtect = PAGE_EXECUTE_READWRITE; - else - flProtect = PAGE_READWRITE; - } else if (prot & PROT_EXEC) { - if (prot & PROT_READ) - flProtect = PAGE_EXECUTE_READ; - else if (prot & PROT_EXEC) - flProtect = PAGE_EXECUTE; - } else - flProtect = PAGE_READONLY; - - off_t end = length + offset; - HANDLE mmap_fd, h; - if (fd == -1) - mmap_fd = INVALID_HANDLE_VALUE; - else - mmap_fd = (HANDLE)_get_osfhandle(fd); - h = CreateFileMapping(mmap_fd, NULL, flProtect, DWORD_HI(end), DWORD_LO(end), NULL); - if (h == NULL) - return MAP_FAILED; - - DWORD dwDesiredAccess; - if (prot & PROT_WRITE) - dwDesiredAccess = FILE_MAP_WRITE; - else - dwDesiredAccess = FILE_MAP_READ; - if (prot & PROT_EXEC) - dwDesiredAccess |= FILE_MAP_EXECUTE; - if (flags & MAP_PRIVATE) - dwDesiredAccess |= FILE_MAP_COPY; - void *ret = MapViewOfFile(h, dwDesiredAccess, DWORD_HI(offset), DWORD_LO(offset), length); - if (ret == NULL) { - CloseHandle(h); - ret = MAP_FAILED; - } - return ret; -} - -static void munmap(void *addr, size_t length) -{ - UnmapViewOfFile(addr); - /* ruh-ro, we leaked handle from CreateFileMapping() ... */ -} -#endif - -#ifndef MAP_ANONYMOUS -#define MAP_ANONYMOUS MAP_ANON -#endif - -void *plat_mmap(unsigned long addr, size_t size, int need_exec, int is_fixed) -{ - int flags = MAP_PRIVATE | MAP_ANONYMOUS; - void *req, *ret; - - req = (void *)addr; - ret = mmap(req, size, PROT_READ | PROT_WRITE, flags, -1, 0); - if (ret == MAP_FAILED) { - lprintf("mmap(%08lx, %zd) failed: %d\n", addr, size, errno); - return NULL; - } - - if (addr != 0 && ret != (void *)addr) { - lprintf("warning: wanted to map @%08lx, got %p\n", - addr, ret); - - if (is_fixed) { - munmap(ret, size); - return NULL; - } - } - - return ret; -} - -void *plat_mremap(void *ptr, size_t oldsize, size_t newsize) -{ -#ifdef __linux__ - void *ret = mremap(ptr, oldsize, newsize, 0); - if (ret == MAP_FAILED) - return NULL; - - return ret; -#else - void *tmp, *ret; - size_t preserve_size; - - preserve_size = oldsize; - if (preserve_size > newsize) - preserve_size = newsize; - tmp = malloc(preserve_size); - if (tmp == NULL) - return NULL; - memcpy(tmp, ptr, preserve_size); - - munmap(ptr, oldsize); - ret = mmap(ptr, newsize, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (ret == MAP_FAILED) { - free(tmp); - return NULL; - } - memcpy(ret, tmp, preserve_size); - free(tmp); - return ret; -#endif -} - -void plat_munmap(void *ptr, size_t size) -{ - if (ptr != NULL) - munmap(ptr, size); -} - -int plat_mem_set_exec(void *ptr, size_t size) -{ -#ifdef _WIN32 - int ret = VirtualProtect(ptr,size,PAGE_EXECUTE_READWRITE,0); - if (ret == 0) - lprintf("mprotect(%p, %zd) failed: %d\n", ptr, size, 0); -#else - int ret = mprotect(ptr, size, PROT_READ | PROT_WRITE | PROT_EXEC); - if (ret != 0) - lprintf("mprotect(%p, %zd) failed: %d\n", ptr, size, errno); -#endif - return ret; -} - -void emu_video_mode_change(int start_line, int line_count, int is_32cols) -{ - memset(vout_buf, 0, 320 * 240 * 2); - vout_width = is_32cols ? 256 : 320; - PicoDrawSetOutBuf(vout_buf, vout_width * 2); - - vout_height = line_count; - vout_offset = vout_width * start_line; -} - -void emu_32x_startup(void) -{ -} - -#ifndef ANDROID - -void lprintf(const char *fmt, ...) -{ - va_list list; - - va_start(list, fmt); - fprintf(emu_log, "PicoDrive: "); - vfprintf(emu_log, fmt, list); - va_end(list); - fflush(emu_log); -} - -#else - -#include - -void lprintf(const char *fmt, ...) -{ - va_list list; - - va_start(list, fmt); - __android_log_vprint(ANDROID_LOG_INFO, "PicoDrive", fmt, list); - va_end(list); -} - -#endif - -/* libretro */ -void retro_set_environment(retro_environment_t cb) -{ - static const struct retro_variable vars[] = { - //{ "region", "Region; Auto|NTSC|PAL" }, - { "picodrive_input1", "Input device 1; 3 button pad|6 button pad|None" }, - { "picodrive_input2", "Input device 2; 3 button pad|6 button pad|None" }, - { "picodrive_sprlim", "No sprite limit; disabled|enabled" }, - { "picodrive_ramcart", "MegaCD RAM cart; disabled|enabled" }, -#ifdef DRC_SH2 - { "picodrive_drc", "Dynamic recompilers; enabled|disabled" }, -#endif - { NULL, NULL }, - }; - - environ_cb = cb; - - cb(RETRO_ENVIRONMENT_SET_VARIABLES, (void *)vars); -} - -void retro_set_video_refresh(retro_video_refresh_t cb) { video_cb = cb; } -void retro_set_audio_sample(retro_audio_sample_t cb) { (void)cb; } -void retro_set_audio_sample_batch(retro_audio_sample_batch_t cb) { audio_batch_cb = cb; } -void retro_set_input_poll(retro_input_poll_t cb) { input_poll_cb = cb; } -void retro_set_input_state(retro_input_state_t cb) { input_state_cb = cb; } - -unsigned retro_api_version(void) -{ - return RETRO_API_VERSION; -} - -void retro_set_controller_port_device(unsigned port, unsigned device) -{ -} - -void retro_get_system_info(struct retro_system_info *info) -{ - memset(info, 0, sizeof(*info)); - info->library_name = "PicoDrive"; - info->library_version = VERSION; - info->valid_extensions = "bin|gen|smd|md|32x|cue|iso|sms"; - info->need_fullpath = true; -} - -void retro_get_system_av_info(struct retro_system_av_info *info) -{ - memset(info, 0, sizeof(*info)); - info->timing.fps = Pico.m.pal ? 50 : 60; - info->timing.sample_rate = 44100; - info->geometry.base_width = 320; - info->geometry.base_height = vout_height; - info->geometry.max_width = VOUT_MAX_WIDTH; - info->geometry.max_height = VOUT_MAX_HEIGHT; - info->geometry.aspect_ratio = 0.0f; -} - -/* savestates */ -struct savestate_state { - const char *load_buf; - char *save_buf; - size_t size; - size_t pos; -}; - -size_t state_read(void *p, size_t size, size_t nmemb, void *file) -{ - struct savestate_state *state = file; - size_t bsize = size * nmemb; - - if (state->pos + bsize > state->size) { - lprintf("savestate error: %u/%u\n", - state->pos + bsize, state->size); - bsize = state->size - state->pos; - if ((int)bsize <= 0) - return 0; - } - - memcpy(p, state->load_buf + state->pos, bsize); - state->pos += bsize; - return bsize; -} - -size_t state_write(void *p, size_t size, size_t nmemb, void *file) -{ - struct savestate_state *state = file; - size_t bsize = size * nmemb; - - if (state->pos + bsize > state->size) { - lprintf("savestate error: %u/%u\n", - state->pos + bsize, state->size); - bsize = state->size - state->pos; - if ((int)bsize <= 0) - return 0; - } - - memcpy(state->save_buf + state->pos, p, bsize); - state->pos += bsize; - return bsize; -} - -size_t state_skip(void *p, size_t size, size_t nmemb, void *file) -{ - struct savestate_state *state = file; - size_t bsize = size * nmemb; - - state->pos += bsize; - return bsize; -} - -size_t state_eof(void *file) -{ - struct savestate_state *state = file; - - return state->pos >= state->size; -} - -int state_fseek(void *file, long offset, int whence) -{ - struct savestate_state *state = file; - - switch (whence) { - case SEEK_SET: - state->pos = offset; - break; - case SEEK_CUR: - state->pos += offset; - break; - case SEEK_END: - state->pos = state->size + offset; - break; - } - return (int)state->pos; -} - -/* savestate sizes vary wildly depending if cd/32x or - * carthw is active, so run the whole thing to get size */ -size_t retro_serialize_size(void) -{ - struct savestate_state state = { 0, }; - int ret; - - ret = PicoStateFP(&state, 1, NULL, state_skip, NULL, state_fseek); - if (ret != 0) - return 0; - - return state.pos; -} - -bool retro_serialize(void *data, size_t size) -{ - struct savestate_state state = { 0, }; - int ret; - - state.save_buf = data; - state.size = size; - state.pos = 0; - - ret = PicoStateFP(&state, 1, NULL, state_write, - NULL, state_fseek); - return ret == 0; -} - -bool retro_unserialize(const void *data, size_t size) -{ - struct savestate_state state = { 0, }; - int ret; - - state.load_buf = data; - state.size = size; - state.pos = 0; - - ret = PicoStateFP(&state, 0, state_read, NULL, - state_eof, state_fseek); - return ret == 0; -} - -/* cheats - TODO */ -void retro_cheat_reset(void) -{ -} - -void retro_cheat_set(unsigned index, bool enabled, const char *code) -{ -} - -/* multidisk support */ -static bool disk_ejected; -static unsigned int disk_current_index; -static unsigned int disk_count; -static struct disks_state { - char *fname; -} disks[8]; - -static bool disk_set_eject_state(bool ejected) -{ - // TODO? - disk_ejected = ejected; - return true; -} - -static bool disk_get_eject_state(void) -{ - return disk_ejected; -} - -static unsigned int disk_get_image_index(void) -{ - return disk_current_index; -} - -static bool disk_set_image_index(unsigned int index) -{ - enum cd_img_type cd_type; - int ret; - - if (index >= sizeof(disks) / sizeof(disks[0])) - return false; - - if (disks[index].fname == NULL) { - lprintf("missing disk #%u\n", index); - - // RetroArch specifies "no disk" with index == count, - // so don't fail here.. - disk_current_index = index; - return true; - } - - lprintf("switching to disk %u: \"%s\"\n", index, - disks[index].fname); - - ret = -1; - cd_type = PicoCdCheck(disks[index].fname, NULL); - if (cd_type != CIT_NOT_CD) - ret = cdd_load(disks[index].fname, cd_type); - if (ret != 0) { - lprintf("Load failed, invalid CD image?\n"); - return 0; - } - - disk_current_index = index; - return true; -} - -static unsigned int disk_get_num_images(void) -{ - return disk_count; -} - -static bool disk_replace_image_index(unsigned index, - const struct retro_game_info *info) -{ - bool ret = true; - - if (index >= sizeof(disks) / sizeof(disks[0])) - return false; - - if (disks[index].fname != NULL) - free(disks[index].fname); - disks[index].fname = NULL; - - if (info != NULL) { - disks[index].fname = strdup(info->path); - if (index == disk_current_index) - ret = disk_set_image_index(index); - } - - return ret; -} - -static bool disk_add_image_index(void) -{ - if (disk_count >= sizeof(disks) / sizeof(disks[0])) - return false; - - disk_count++; - return true; -} - -static struct retro_disk_control_callback disk_control = { - .set_eject_state = disk_set_eject_state, - .get_eject_state = disk_get_eject_state, - .get_image_index = disk_get_image_index, - .set_image_index = disk_set_image_index, - .get_num_images = disk_get_num_images, - .replace_image_index = disk_replace_image_index, - .add_image_index = disk_add_image_index, -}; - -static void disk_tray_open(void) -{ - lprintf("cd tray open\n"); - disk_ejected = 1; -} - -static void disk_tray_close(void) -{ - lprintf("cd tray close\n"); - disk_ejected = 0; -} - - -static const char * const biosfiles_us[] = { - "us_scd2_9306", "SegaCDBIOS9303", "us_scd1_9210", "bios_CD_U" -}; -static const char * const biosfiles_eu[] = { - "eu_mcd2_9306", "eu_mcd2_9303", "eu_mcd1_9210", "bios_CD_E" -}; -static const char * const biosfiles_jp[] = { - "jp_mcd2_921222", "jp_mcd1_9112", "jp_mcd1_9111", "bios_CD_J" -}; - -static void make_system_path(char *buf, size_t buf_size, - const char *name, const char *ext) -{ - const char *dir = NULL; - - if (environ_cb(RETRO_ENVIRONMENT_GET_SYSTEM_DIRECTORY, &dir) && dir) { - snprintf(buf, buf_size, "%s%c%s%s", dir, SLASH, name, ext); - } - else { - snprintf(buf, buf_size, "%s%s", name, ext); - } -} - -static const char *find_bios(int *region, const char *cd_fname) -{ - const char * const *files; - static char path[256]; - int i, count; - FILE *f = NULL; - - if (*region == 4) { // US - files = biosfiles_us; - count = sizeof(biosfiles_us) / sizeof(char *); - } else if (*region == 8) { // EU - files = biosfiles_eu; - count = sizeof(biosfiles_eu) / sizeof(char *); - } else if (*region == 1 || *region == 2) { - files = biosfiles_jp; - count = sizeof(biosfiles_jp) / sizeof(char *); - } else { - return NULL; - } - - for (i = 0; i < count; i++) - { - make_system_path(path, sizeof(path), files[i], ".bin"); - f = fopen(path, "rb"); - if (f != NULL) - break; - - make_system_path(path, sizeof(path), files[i], ".zip"); - f = fopen(path, "rb"); - if (f != NULL) - break; - } - - if (f != NULL) { - lprintf("using bios: %s\n", path); - fclose(f); - return path; - } - - return NULL; -} - -bool retro_load_game(const struct retro_game_info *info) -{ - enum media_type_e media_type; - static char carthw_path[256]; - size_t i; - - enum retro_pixel_format fmt = RETRO_PIXEL_FORMAT_RGB565; - if (!environ_cb(RETRO_ENVIRONMENT_SET_PIXEL_FORMAT, &fmt)) { - lprintf("RGB565 support required, sorry\n"); - return false; - } - - if (info == NULL || info->path == NULL) { - lprintf("info->path required\n"); - return false; - } - - for (i = 0; i < sizeof(disks) / sizeof(disks[0]); i++) { - if (disks[i].fname != NULL) { - free(disks[i].fname); - disks[i].fname = NULL; - } - } - - disk_current_index = 0; - disk_count = 1; - disks[0].fname = strdup(info->path); - - make_system_path(carthw_path, sizeof(carthw_path), "carthw", ".cfg"); - - media_type = PicoLoadMedia(info->path, carthw_path, - find_bios, NULL); - - switch (media_type) { - case PM_BAD_DETECT: - lprintf("Failed to detect ROM/CD image type.\n"); - return false; - case PM_BAD_CD: - lprintf("Invalid CD image\n"); - return false; - case PM_BAD_CD_NO_BIOS: - lprintf("Missing BIOS\n"); - return false; - case PM_ERROR: - lprintf("Load error\n"); - return false; - default: - break; - } - - PicoLoopPrepare(); - - PicoWriteSound = snd_write; - memset(sndBuffer, 0, sizeof(sndBuffer)); - PsndOut = sndBuffer; - PsndRerate(0); - - return true; -} - -bool retro_load_game_special(unsigned game_type, const struct retro_game_info *info, size_t num_info) -{ - return false; -} - -void retro_unload_game(void) -{ -} - -unsigned retro_get_region(void) -{ - return Pico.m.pal ? RETRO_REGION_PAL : RETRO_REGION_NTSC; -} - -void *retro_get_memory_data(unsigned id) -{ - if (id != RETRO_MEMORY_SAVE_RAM) - return NULL; - - if (PicoAHW & PAHW_MCD) - return Pico_mcd->bram; - else - return SRam.data; -} - -size_t retro_get_memory_size(unsigned id) -{ - unsigned int i; - int sum; - - if (id != RETRO_MEMORY_SAVE_RAM) - return 0; - - if (PicoAHW & PAHW_MCD) - // bram - return 0x2000; - - if (Pico.m.frame_count == 0) - return SRam.size; - - // if game doesn't write to sram, don't report it to - // libretro so that RA doesn't write out zeroed .srm - for (i = 0, sum = 0; i < SRam.size; i++) - sum |= SRam.data[i]; - - return (sum != 0) ? SRam.size : 0; -} - -void retro_reset(void) -{ - PicoReset(); -} - -static const unsigned short retro_pico_map[] = { - [RETRO_DEVICE_ID_JOYPAD_B] = 1 << GBTN_B, - [RETRO_DEVICE_ID_JOYPAD_Y] = 1 << GBTN_A, - [RETRO_DEVICE_ID_JOYPAD_SELECT] = 1 << GBTN_MODE, - [RETRO_DEVICE_ID_JOYPAD_START] = 1 << GBTN_START, - [RETRO_DEVICE_ID_JOYPAD_UP] = 1 << GBTN_UP, - [RETRO_DEVICE_ID_JOYPAD_DOWN] = 1 << GBTN_DOWN, - [RETRO_DEVICE_ID_JOYPAD_LEFT] = 1 << GBTN_LEFT, - [RETRO_DEVICE_ID_JOYPAD_RIGHT] = 1 << GBTN_RIGHT, - [RETRO_DEVICE_ID_JOYPAD_A] = 1 << GBTN_C, - [RETRO_DEVICE_ID_JOYPAD_X] = 1 << GBTN_Y, - [RETRO_DEVICE_ID_JOYPAD_L] = 1 << GBTN_X, - [RETRO_DEVICE_ID_JOYPAD_R] = 1 << GBTN_Z, -}; -#define RETRO_PICO_MAP_LEN (sizeof(retro_pico_map) / sizeof(retro_pico_map[0])) - -static void snd_write(int len) -{ - audio_batch_cb(PsndOut, len / 4); -} - -static enum input_device input_name_to_val(const char *name) -{ - if (strcmp(name, "3 button pad") == 0) - return PICO_INPUT_PAD_3BTN; - if (strcmp(name, "6 button pad") == 0) - return PICO_INPUT_PAD_6BTN; - if (strcmp(name, "None") == 0) - return PICO_INPUT_NOTHING; - - lprintf("invalid picodrive_input: '%s'\n", name); - return PICO_INPUT_PAD_3BTN; -} - -static void update_variables(void) -{ - struct retro_variable var; - - var.value = NULL; - var.key = "picodrive_input1"; - if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) - PicoSetInputDevice(0, input_name_to_val(var.value)); - - var.value = NULL; - var.key = "picodrive_input2"; - if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) - PicoSetInputDevice(1, input_name_to_val(var.value)); - - var.value = NULL; - var.key = "picodrive_sprlim"; - if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { - if (strcmp(var.value, "enabled") == 0) - PicoOpt |= POPT_DIS_SPRITE_LIM; - else - PicoOpt &= ~POPT_DIS_SPRITE_LIM; - } - - var.value = NULL; - var.key = "picodrive_ramcart"; - if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { - if (strcmp(var.value, "enabled") == 0) - PicoOpt |= POPT_EN_MCD_RAMCART; - else - PicoOpt &= ~POPT_EN_MCD_RAMCART; - } - -#ifdef DRC_SH2 - var.value = NULL; - var.key = "picodrive_drc"; - if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { - if (strcmp(var.value, "enabled") == 0) - PicoOpt |= POPT_EN_DRC; - else - PicoOpt &= ~POPT_EN_DRC; - } -#endif -} - -void retro_run(void) -{ - bool updated = false; - int pad, i; - - if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE_UPDATE, &updated) && updated) - update_variables(); - - input_poll_cb(); - - PicoPad[0] = PicoPad[1] = 0; - for (pad = 0; pad < 2; pad++) - for (i = 0; i < RETRO_PICO_MAP_LEN; i++) - if (input_state_cb(pad, RETRO_DEVICE_JOYPAD, 0, i)) - PicoPad[pad] |= retro_pico_map[i]; - - PicoFrame(); - - video_cb((short *)vout_buf + vout_offset, - vout_width, vout_height, vout_width * 2); -} - -void retro_init(void) -{ - int level; - -#ifdef IOS - emu_log = fopen("/User/Documents/PicoDrive.log", "w"); - if (emu_log == NULL) - emu_log = fopen("PicoDrive.log", "w"); - if (emu_log == NULL) -#endif - emu_log = stdout; - - level = 0; - environ_cb(RETRO_ENVIRONMENT_SET_PERFORMANCE_LEVEL, &level); - - environ_cb(RETRO_ENVIRONMENT_SET_DISK_CONTROL_INTERFACE, &disk_control); - - PicoOpt = POPT_EN_STEREO|POPT_EN_FM|POPT_EN_PSG|POPT_EN_Z80 - | POPT_EN_MCD_PCM|POPT_EN_MCD_CDDA|POPT_EN_MCD_GFX - | POPT_EN_32X|POPT_EN_PWM - | POPT_ACC_SPRITES|POPT_DIS_32C_BORDER; -#ifdef __arm__ - PicoOpt |= POPT_EN_DRC; -#endif - PsndRate = 44100; - PicoAutoRgnOrder = 0x184; // US, EU, JP - - vout_width = 320; - vout_height = 240; - vout_buf = malloc(VOUT_MAX_WIDTH * VOUT_MAX_HEIGHT * 2); - - PicoInit(); - PicoDrawSetOutFormat(PDF_RGB555, 0); - PicoDrawSetOutBuf(vout_buf, vout_width * 2); - - //PicoMessage = plat_status_msg_busy_next; - PicoMCDopenTray = disk_tray_open; - PicoMCDcloseTray = disk_tray_close; - - update_variables(); -} - -void retro_deinit(void) -{ - PicoExit(); -} diff --git a/platform/libretro.h b/platform/libretro.h deleted file mode 100644 index ff4f4fd9..00000000 --- a/platform/libretro.h +++ /dev/null @@ -1,787 +0,0 @@ -/* Copyright (C) 2010-2013 The RetroArch team - * - * --------------------------------------------------------------------------------------- - * The following license statement only applies to this libretro API header (libretro.h). - * --------------------------------------------------------------------------------------- - * - * Permission is hereby granted, free of charge, - * to any person obtaining a copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation the rights to - * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, - * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef LIBRETRO_H__ -#define LIBRETRO_H__ - -#include -#include -#include - -// Hack applied for MSVC when compiling in C89 mode as it isn't C99 compliant. -#ifdef __cplusplus -extern "C" { -#else -#if defined(_MSC_VER) && !defined(SN_TARGET_PS3) && !defined(__cplusplus) -#define bool unsigned char -#define true 1 -#define false 0 -#else -#include -#endif -#endif - -// Used for checking API/ABI mismatches that can break libretro implementations. -// It is not incremented for compatible changes to the API. -#define RETRO_API_VERSION 1 - -// Libretro's fundamental device abstractions. -#define RETRO_DEVICE_MASK 0xff -#define RETRO_DEVICE_NONE 0 - -// The JOYPAD is called RetroPad. It is essentially a Super Nintendo controller, -// but with additional L2/R2/L3/R3 buttons, similar to a PS1 DualShock. -#define RETRO_DEVICE_JOYPAD 1 - -// The mouse is a simple mouse, similar to Super Nintendo's mouse. -// X and Y coordinates are reported relatively to last poll (poll callback). -// It is up to the libretro implementation to keep track of where the mouse pointer is supposed to be on the screen. -// The frontend must make sure not to interfere with its own hardware mouse pointer. -#define RETRO_DEVICE_MOUSE 2 - -// KEYBOARD device lets one poll for raw key pressed. -// It is poll based, so input callback will return with the current pressed state. -#define RETRO_DEVICE_KEYBOARD 3 - -// Lightgun X/Y coordinates are reported relatively to last poll, similar to mouse. -#define RETRO_DEVICE_LIGHTGUN 4 - -// The ANALOG device is an extension to JOYPAD (RetroPad). -// Similar to DualShock it adds two analog sticks. -// This is treated as a separate device type as it returns values in the full analog range -// of [-0x8000, 0x7fff]. Positive X axis is right. Positive Y axis is down. -// Only use ANALOG type when polling for analog values of the axes. -#define RETRO_DEVICE_ANALOG 5 - -// Abstracts the concept of a pointing mechanism, e.g. touch. -// This allows libretro to query in absolute coordinates where on the screen a mouse (or something similar) is being placed. -// For a touch centric device, coordinates reported are the coordinates of the press. -// -// Coordinates in X and Y are reported as: -// [-0x7fff, 0x7fff]: -0x7fff corresponds to the far left/top of the screen, -// and 0x7fff corresponds to the far right/bottom of the screen. -// The "screen" is here defined as area that is passed to the frontend and later displayed on the monitor. -// The frontend is free to scale/resize this screen as it sees fit, however, -// (X, Y) = (-0x7fff, -0x7fff) will correspond to the top-left pixel of the game image, etc. -// -// To check if the pointer coordinates are valid (e.g. a touch display actually being touched), -// PRESSED returns 1 or 0. -// If using a mouse, PRESSED will usually correspond to the left mouse button. -// PRESSED will only return 1 if the pointer is inside the game screen. -// -// For multi-touch, the index variable can be used to successively query more presses. -// If index = 0 returns true for _PRESSED, coordinates can be extracted -// with _X, _Y for index = 0. One can then query _PRESSED, _X, _Y with index = 1, and so on. -// Eventually _PRESSED will return false for an index. No further presses are registered at this point. -#define RETRO_DEVICE_POINTER 6 - -// These device types are specializations of the base types above. -// They should only be used in retro_set_controller_type() to inform libretro implementations -// about use of a very specific device type. -// -// In input state callback, however, only the base type should be used in the 'device' field. -#define RETRO_DEVICE_JOYPAD_MULTITAP ((1 << 8) | RETRO_DEVICE_JOYPAD) -#define RETRO_DEVICE_LIGHTGUN_SUPER_SCOPE ((1 << 8) | RETRO_DEVICE_LIGHTGUN) -#define RETRO_DEVICE_LIGHTGUN_JUSTIFIER ((2 << 8) | RETRO_DEVICE_LIGHTGUN) -#define RETRO_DEVICE_LIGHTGUN_JUSTIFIERS ((3 << 8) | RETRO_DEVICE_LIGHTGUN) - -// Buttons for the RetroPad (JOYPAD). -// The placement of these is equivalent to placements on the Super Nintendo controller. -// L2/R2/L3/R3 buttons correspond to the PS1 DualShock. -#define RETRO_DEVICE_ID_JOYPAD_B 0 -#define RETRO_DEVICE_ID_JOYPAD_Y 1 -#define RETRO_DEVICE_ID_JOYPAD_SELECT 2 -#define RETRO_DEVICE_ID_JOYPAD_START 3 -#define RETRO_DEVICE_ID_JOYPAD_UP 4 -#define RETRO_DEVICE_ID_JOYPAD_DOWN 5 -#define RETRO_DEVICE_ID_JOYPAD_LEFT 6 -#define RETRO_DEVICE_ID_JOYPAD_RIGHT 7 -#define RETRO_DEVICE_ID_JOYPAD_A 8 -#define RETRO_DEVICE_ID_JOYPAD_X 9 -#define RETRO_DEVICE_ID_JOYPAD_L 10 -#define RETRO_DEVICE_ID_JOYPAD_R 11 -#define RETRO_DEVICE_ID_JOYPAD_L2 12 -#define RETRO_DEVICE_ID_JOYPAD_R2 13 -#define RETRO_DEVICE_ID_JOYPAD_L3 14 -#define RETRO_DEVICE_ID_JOYPAD_R3 15 - -// Index / Id values for ANALOG device. -#define RETRO_DEVICE_INDEX_ANALOG_LEFT 0 -#define RETRO_DEVICE_INDEX_ANALOG_RIGHT 1 -#define RETRO_DEVICE_ID_ANALOG_X 0 -#define RETRO_DEVICE_ID_ANALOG_Y 1 - -// Id values for MOUSE. -#define RETRO_DEVICE_ID_MOUSE_X 0 -#define RETRO_DEVICE_ID_MOUSE_Y 1 -#define RETRO_DEVICE_ID_MOUSE_LEFT 2 -#define RETRO_DEVICE_ID_MOUSE_RIGHT 3 - -// Id values for LIGHTGUN types. -#define RETRO_DEVICE_ID_LIGHTGUN_X 0 -#define RETRO_DEVICE_ID_LIGHTGUN_Y 1 -#define RETRO_DEVICE_ID_LIGHTGUN_TRIGGER 2 -#define RETRO_DEVICE_ID_LIGHTGUN_CURSOR 3 -#define RETRO_DEVICE_ID_LIGHTGUN_TURBO 4 -#define RETRO_DEVICE_ID_LIGHTGUN_PAUSE 5 -#define RETRO_DEVICE_ID_LIGHTGUN_START 6 - -// Id values for POINTER. -#define RETRO_DEVICE_ID_POINTER_X 0 -#define RETRO_DEVICE_ID_POINTER_Y 1 -#define RETRO_DEVICE_ID_POINTER_PRESSED 2 - -// Returned from retro_get_region(). -#define RETRO_REGION_NTSC 0 -#define RETRO_REGION_PAL 1 - -// Passed to retro_get_memory_data/size(). -// If the memory type doesn't apply to the implementation NULL/0 can be returned. -#define RETRO_MEMORY_MASK 0xff - -// Regular save ram. This ram is usually found on a game cartridge, backed up by a battery. -// If save game data is too complex for a single memory buffer, -// the SYSTEM_DIRECTORY environment callback can be used. -#define RETRO_MEMORY_SAVE_RAM 0 - -// Some games have a built-in clock to keep track of time. -// This memory is usually just a couple of bytes to keep track of time. -#define RETRO_MEMORY_RTC 1 - -// System ram lets a frontend peek into a game systems main RAM. -#define RETRO_MEMORY_SYSTEM_RAM 2 - -// Video ram lets a frontend peek into a game systems video RAM (VRAM). -#define RETRO_MEMORY_VIDEO_RAM 3 - -// Special memory types. -#define RETRO_MEMORY_SNES_BSX_RAM ((1 << 8) | RETRO_MEMORY_SAVE_RAM) -#define RETRO_MEMORY_SNES_BSX_PRAM ((2 << 8) | RETRO_MEMORY_SAVE_RAM) -#define RETRO_MEMORY_SNES_SUFAMI_TURBO_A_RAM ((3 << 8) | RETRO_MEMORY_SAVE_RAM) -#define RETRO_MEMORY_SNES_SUFAMI_TURBO_B_RAM ((4 << 8) | RETRO_MEMORY_SAVE_RAM) -#define RETRO_MEMORY_SNES_GAME_BOY_RAM ((5 << 8) | RETRO_MEMORY_SAVE_RAM) -#define RETRO_MEMORY_SNES_GAME_BOY_RTC ((6 << 8) | RETRO_MEMORY_RTC) - -// Special game types passed into retro_load_game_special(). -// Only used when multiple ROMs are required. -#define RETRO_GAME_TYPE_BSX 0x101 -#define RETRO_GAME_TYPE_BSX_SLOTTED 0x102 -#define RETRO_GAME_TYPE_SUFAMI_TURBO 0x103 -#define RETRO_GAME_TYPE_SUPER_GAME_BOY 0x104 - -// Keysyms used for ID in input state callback when polling RETRO_KEYBOARD. -enum retro_key -{ - RETROK_UNKNOWN = 0, - RETROK_FIRST = 0, - RETROK_BACKSPACE = 8, - RETROK_TAB = 9, - RETROK_CLEAR = 12, - RETROK_RETURN = 13, - RETROK_PAUSE = 19, - RETROK_ESCAPE = 27, - RETROK_SPACE = 32, - RETROK_EXCLAIM = 33, - RETROK_QUOTEDBL = 34, - RETROK_HASH = 35, - RETROK_DOLLAR = 36, - RETROK_AMPERSAND = 38, - RETROK_QUOTE = 39, - RETROK_LEFTPAREN = 40, - RETROK_RIGHTPAREN = 41, - RETROK_ASTERISK = 42, - RETROK_PLUS = 43, - RETROK_COMMA = 44, - RETROK_MINUS = 45, - RETROK_PERIOD = 46, - RETROK_SLASH = 47, - RETROK_0 = 48, - RETROK_1 = 49, - RETROK_2 = 50, - RETROK_3 = 51, - RETROK_4 = 52, - RETROK_5 = 53, - RETROK_6 = 54, - RETROK_7 = 55, - RETROK_8 = 56, - RETROK_9 = 57, - RETROK_COLON = 58, - RETROK_SEMICOLON = 59, - RETROK_LESS = 60, - RETROK_EQUALS = 61, - RETROK_GREATER = 62, - RETROK_QUESTION = 63, - RETROK_AT = 64, - RETROK_LEFTBRACKET = 91, - RETROK_BACKSLASH = 92, - RETROK_RIGHTBRACKET = 93, - RETROK_CARET = 94, - RETROK_UNDERSCORE = 95, - RETROK_BACKQUOTE = 96, - RETROK_a = 97, - RETROK_b = 98, - RETROK_c = 99, - RETROK_d = 100, - RETROK_e = 101, - RETROK_f = 102, - RETROK_g = 103, - RETROK_h = 104, - RETROK_i = 105, - RETROK_j = 106, - RETROK_k = 107, - RETROK_l = 108, - RETROK_m = 109, - RETROK_n = 110, - RETROK_o = 111, - RETROK_p = 112, - RETROK_q = 113, - RETROK_r = 114, - RETROK_s = 115, - RETROK_t = 116, - RETROK_u = 117, - RETROK_v = 118, - RETROK_w = 119, - RETROK_x = 120, - RETROK_y = 121, - RETROK_z = 122, - RETROK_DELETE = 127, - - RETROK_KP0 = 256, - RETROK_KP1 = 257, - RETROK_KP2 = 258, - RETROK_KP3 = 259, - RETROK_KP4 = 260, - RETROK_KP5 = 261, - RETROK_KP6 = 262, - RETROK_KP7 = 263, - RETROK_KP8 = 264, - RETROK_KP9 = 265, - RETROK_KP_PERIOD = 266, - RETROK_KP_DIVIDE = 267, - RETROK_KP_MULTIPLY = 268, - RETROK_KP_MINUS = 269, - RETROK_KP_PLUS = 270, - RETROK_KP_ENTER = 271, - RETROK_KP_EQUALS = 272, - - RETROK_UP = 273, - RETROK_DOWN = 274, - RETROK_RIGHT = 275, - RETROK_LEFT = 276, - RETROK_INSERT = 277, - RETROK_HOME = 278, - RETROK_END = 279, - RETROK_PAGEUP = 280, - RETROK_PAGEDOWN = 281, - - RETROK_F1 = 282, - RETROK_F2 = 283, - RETROK_F3 = 284, - RETROK_F4 = 285, - RETROK_F5 = 286, - RETROK_F6 = 287, - RETROK_F7 = 288, - RETROK_F8 = 289, - RETROK_F9 = 290, - RETROK_F10 = 291, - RETROK_F11 = 292, - RETROK_F12 = 293, - RETROK_F13 = 294, - RETROK_F14 = 295, - RETROK_F15 = 296, - - RETROK_NUMLOCK = 300, - RETROK_CAPSLOCK = 301, - RETROK_SCROLLOCK = 302, - RETROK_RSHIFT = 303, - RETROK_LSHIFT = 304, - RETROK_RCTRL = 305, - RETROK_LCTRL = 306, - RETROK_RALT = 307, - RETROK_LALT = 308, - RETROK_RMETA = 309, - RETROK_LMETA = 310, - RETROK_LSUPER = 311, - RETROK_RSUPER = 312, - RETROK_MODE = 313, - RETROK_COMPOSE = 314, - - RETROK_HELP = 315, - RETROK_PRINT = 316, - RETROK_SYSREQ = 317, - RETROK_BREAK = 318, - RETROK_MENU = 319, - RETROK_POWER = 320, - RETROK_EURO = 321, - RETROK_UNDO = 322, - - RETROK_LAST, - - RETROK_DUMMY = INT_MAX // Ensure sizeof(enum) == sizeof(int) -}; - -enum retro_mod -{ - RETROKMOD_NONE = 0x0000, - - RETROKMOD_SHIFT = 0x01, - RETROKMOD_CTRL = 0x02, - RETROKMOD_ALT = 0x04, - RETROKMOD_META = 0x08, - - RETROKMOD_NUMLOCK = 0x10, - RETROKMOD_CAPSLOCK = 0x20, - RETROKMOD_SCROLLOCK = 0x40, - - RETROKMOD_DUMMY = INT_MAX // Ensure sizeof(enum) == sizeof(int) -}; - -// If set, this call is not part of the public libretro API yet. It can change or be removed at any time. -#define RETRO_ENVIRONMENT_EXPERIMENTAL 0x10000 - -// Environment commands. -#define RETRO_ENVIRONMENT_SET_ROTATION 1 // const unsigned * -- - // Sets screen rotation of graphics. - // Is only implemented if rotation can be accelerated by hardware. - // Valid values are 0, 1, 2, 3, which rotates screen by 0, 90, 180, 270 degrees - // counter-clockwise respectively. - // -#define RETRO_ENVIRONMENT_GET_OVERSCAN 2 // bool * -- - // Boolean value whether or not the implementation should use overscan, or crop away overscan. - // -#define RETRO_ENVIRONMENT_GET_CAN_DUPE 3 // bool * -- - // Boolean value whether or not frontend supports frame duping, - // passing NULL to video frame callback. - // -// Environ 4, 5 are no longer supported (GET_VARIABLE / SET_VARIABLES), and reserved to avoid possible ABI clash. -#define RETRO_ENVIRONMENT_SET_MESSAGE 6 // const struct retro_message * -- - // Sets a message to be displayed in implementation-specific manner for a certain amount of 'frames'. - // Should not be used for trivial messages, which should simply be logged to stderr. -#define RETRO_ENVIRONMENT_SHUTDOWN 7 // N/A (NULL) -- - // Requests the frontend to shutdown. - // Should only be used if game has a specific - // way to shutdown the game from a menu item or similar. - // -#define RETRO_ENVIRONMENT_SET_PERFORMANCE_LEVEL 8 - // const unsigned * -- - // Gives a hint to the frontend how demanding this implementation - // is on a system. E.g. reporting a level of 2 means - // this implementation should run decently on all frontends - // of level 2 and up. - // - // It can be used by the frontend to potentially warn - // about too demanding implementations. - // - // The levels are "floating", but roughly defined as: - // 0: Low-powered embedded devices such as Raspberry Pi - // 1: 6th generation consoles, such as Wii/Xbox 1, and phones, tablets, etc. - // 2: 7th generation consoles, such as PS3/360, with sub-par CPUs. - // 3: Modern desktop/laptops with reasonably powerful CPUs. - // 4: High-end desktops with very powerful CPUs. - // - // This function can be called on a per-game basis, - // as certain games an implementation can play might be - // particularily demanding. - // If called, it should be called in retro_load_game(). - // -#define RETRO_ENVIRONMENT_GET_SYSTEM_DIRECTORY 9 - // const char ** -- - // Returns the "system" directory of the frontend. - // This directory can be used to store system specific ROMs such as BIOSes, configuration data, etc. - // The returned value can be NULL. - // If so, no such directory is defined, - // and it's up to the implementation to find a suitable directory. - // -#define RETRO_ENVIRONMENT_SET_PIXEL_FORMAT 10 - // const enum retro_pixel_format * -- - // Sets the internal pixel format used by the implementation. - // The default pixel format is RETRO_PIXEL_FORMAT_0RGB1555. - // This pixel format however, is deprecated (see enum retro_pixel_format). - // If the call returns false, the frontend does not support this pixel format. - // This function should be called inside retro_load_game() or retro_get_system_av_info(). - // -#define RETRO_ENVIRONMENT_SET_INPUT_DESCRIPTORS 11 - // const struct retro_input_descriptor * -- - // Sets an array of retro_input_descriptors. - // It is up to the frontend to present this in a usable way. - // The array is terminated by retro_input_descriptor::description being set to NULL. - // This function can be called at any time, but it is recommended to call it as early as possible. -#define RETRO_ENVIRONMENT_SET_KEYBOARD_CALLBACK 12 - // const struct retro_keyboard_callback * -- - // Sets a callback function used to notify core about keyboard events. - // -#define RETRO_ENVIRONMENT_SET_DISK_CONTROL_INTERFACE 13 - // const struct retro_disk_control_callback * -- - // Sets an interface which frontend can use to eject and insert disk images. - // This is used for games which consist of multiple images and must be manually - // swapped out by the user (e.g. PSX). -#define RETRO_ENVIRONMENT_SET_HW_RENDER (14 | RETRO_ENVIRONMENT_EXPERIMENTAL) - // struct retro_hw_render_callback * -- - // NOTE: This call is currently very experimental, and should not be considered part of the public API. - // The interface could be changed or removed at any time. - // Sets an interface to let a libretro core render with hardware acceleration. - // Should be called in retro_load_game(). - // If successful, libretro cores will be able to render to a frontend-provided framebuffer. - // The size of this framebuffer will be at least as large as max_width/max_height provided in get_av_info(). - // If HW rendering is used, pass only RETRO_HW_FRAME_BUFFER_VALID or NULL to retro_video_refresh_t. -#define RETRO_ENVIRONMENT_GET_VARIABLE 15 - // struct retro_variable * -- - // Interface to aquire user-defined information from environment - // that cannot feasibly be supported in a multi-system way. - // 'key' should be set to a key which has already been set by SET_VARIABLES. - // 'data' will be set to a value or NULL. - // -#define RETRO_ENVIRONMENT_SET_VARIABLES 16 - // const struct retro_variable * -- - // Allows an implementation to signal the environment - // which variables it might want to check for later using GET_VARIABLE. - // This allows the frontend to present these variables to a user dynamically. - // This should be called as early as possible (ideally in retro_set_environment). - // - // 'data' points to an array of retro_variable structs terminated by a { NULL, NULL } element. - // retro_variable::key should be namespaced to not collide with other implementations' keys. E.g. A core called 'foo' should use keys named as 'foo_option'. - // retro_variable::value should contain a human readable description of the key as well as a '|' delimited list of expected values. - // The number of possible options should be very limited, i.e. it should be feasible to cycle through options without a keyboard. - // First entry should be treated as a default. - // - // Example entry: - // { "foo_option", "Speed hack coprocessor X; false|true" } - // - // Text before first ';' is description. This ';' must be followed by a space, and followed by a list of possible values split up with '|'. - // Only strings are operated on. The possible values will generally be displayed and stored as-is by the frontend. - // -#define RETRO_ENVIRONMENT_GET_VARIABLE_UPDATE 17 - // bool * -- - // Result is set to true if some variables are updated by - // frontend since last call to RETRO_ENVIRONMENT_GET_VARIABLE. - // Variables should be queried with GET_VARIABLE. - // -#define RETRO_ENVIRONMENT_SET_SUPPORT_NO_GAME 18 - // const bool * -- - // If true, the libretro implementation supports calls to retro_load_game() with NULL as argument. - // Used by cores which can run without particular game data. - // This should be called within retro_set_environment() only. - - -// Pass this to retro_video_refresh_t if rendering to hardware. -// Passing NULL to retro_video_refresh_t is still a frame dupe as normal. -#define RETRO_HW_FRAME_BUFFER_VALID ((void*)-1) - -// Invalidates the current HW context. -// If called, all GPU resources must be reinitialized. -// Usually called when frontend reinits video driver. -// Also called first time video driver is initialized, allowing libretro core to init resources. -typedef void (*retro_hw_context_reset_t)(void); -// Gets current framebuffer which is to be rendered to. Could change every frame potentially. -typedef uintptr_t (*retro_hw_get_current_framebuffer_t)(void); - -// Get a symbol from HW context. -typedef void (*retro_proc_address_t)(void); -typedef retro_proc_address_t (*retro_hw_get_proc_address_t)(const char *sym); - -enum retro_hw_context_type -{ - RETRO_HW_CONTEXT_NONE = 0, - RETRO_HW_CONTEXT_OPENGL, // OpenGL 2.x. Latest version available before 3.x+. - RETRO_HW_CONTEXT_OPENGLES2, // GLES 2.0 - - RETRO_HW_CONTEXT_DUMMY = INT_MAX -}; - -struct retro_hw_render_callback -{ - enum retro_hw_context_type context_type; // Which API to use. Set by libretro core. - retro_hw_context_reset_t context_reset; // Set by libretro core. - retro_hw_get_current_framebuffer_t get_current_framebuffer; // Set by frontend. - retro_hw_get_proc_address_t get_proc_address; // Set by frontend. - bool depth; // Set if render buffers should have depth component attached. -}; - -// Callback type passed in RETRO_ENVIRONMENT_SET_KEYBOARD_CALLBACK. Called by the frontend in response to keyboard events. -// down is set if the key is being pressed, or false if it is being released. -// keycode is the RETROK value of the char. -// character is the text character of the pressed key. (UTF-32). -// key_modifiers is a set of RETROKMOD values or'ed together. -typedef void (*retro_keyboard_event_t)(bool down, unsigned keycode, uint32_t character, uint16_t key_modifiers); - -struct retro_keyboard_callback -{ - retro_keyboard_event_t callback; -}; - -// Callbacks for RETRO_ENVIRONMENT_SET_DISK_CONTROL_INTERFACE. -// Should be set for implementations which can swap out multiple disk images in runtime. -// If the implementation can do this automatically, it should strive to do so. -// However, there are cases where the user must manually do so. -// -// Overview: To swap a disk image, eject the disk image with set_eject_state(true). -// Set the disk index with set_image_index(index). Insert the disk again with set_eject_state(false). - -// If ejected is true, "ejects" the virtual disk tray. -// When ejected, the disk image index can be set. -typedef bool (*retro_set_eject_state_t)(bool ejected); -// Gets current eject state. The initial state is 'not ejected'. -typedef bool (*retro_get_eject_state_t)(void); -// Gets current disk index. First disk is index 0. -// If return value is >= get_num_images(), no disk is currently inserted. -typedef unsigned (*retro_get_image_index_t)(void); -// Sets image index. Can only be called when disk is ejected. -// The implementation supports setting "no disk" by using an index >= get_num_images(). -typedef bool (*retro_set_image_index_t)(unsigned index); -// Gets total number of images which are available to use. -typedef unsigned (*retro_get_num_images_t)(void); -// -// Replaces the disk image associated with index. -// Arguments to pass in info have same requirements as retro_load_game(). -// Virtual disk tray must be ejected when calling this. -// Replacing a disk image with info = NULL will remove the disk image from the internal list. -// As a result, calls to get_image_index() can change. -// -// E.g. replace_image_index(1, NULL), and previous get_image_index() returned 4 before. -// Index 1 will be removed, and the new index is 3. -struct retro_game_info; -typedef bool (*retro_replace_image_index_t)(unsigned index, const struct retro_game_info *info); -// Adds a new valid index (get_num_images()) to the internal disk list. -// This will increment subsequent return values from get_num_images() by 1. -// This image index cannot be used until a disk image has been set with replace_image_index. -typedef bool (*retro_add_image_index_t)(void); - -struct retro_disk_control_callback -{ - retro_set_eject_state_t set_eject_state; - retro_get_eject_state_t get_eject_state; - - retro_get_image_index_t get_image_index; - retro_set_image_index_t set_image_index; - retro_get_num_images_t get_num_images; - - retro_replace_image_index_t replace_image_index; - retro_add_image_index_t add_image_index; -}; - -enum retro_pixel_format -{ - // 0RGB1555, native endian. 0 bit must be set to 0. - // This pixel format is default for compatibility concerns only. - // If a 15/16-bit pixel format is desired, consider using RGB565. - RETRO_PIXEL_FORMAT_0RGB1555 = 0, - - // XRGB8888, native endian. X bits are ignored. - RETRO_PIXEL_FORMAT_XRGB8888 = 1, - - // RGB565, native endian. This pixel format is the recommended format to use if a 15/16-bit format is desired - // as it is the pixel format that is typically available on a wide range of low-power devices. - // It is also natively supported in APIs like OpenGL ES. - RETRO_PIXEL_FORMAT_RGB565 = 2, - - // Ensure sizeof() == sizeof(int). - RETRO_PIXEL_FORMAT_UNKNOWN = INT_MAX -}; - -struct retro_message -{ - const char *msg; // Message to be displayed. - unsigned frames; // Duration in frames of message. -}; - -// Describes how the libretro implementation maps a libretro input bind -// to its internal input system through a human readable string. -// This string can be used to better let a user configure input. -struct retro_input_descriptor -{ - // Associates given parameters with a description. - unsigned port; - unsigned device; - unsigned index; - unsigned id; - - const char *description; // Human readable description for parameters. - // The pointer must remain valid until retro_unload_game() is called. -}; - -struct retro_system_info -{ - // All pointers are owned by libretro implementation, and pointers must remain valid until retro_deinit() is called. - - const char *library_name; // Descriptive name of library. Should not contain any version numbers, etc. - const char *library_version; // Descriptive version of core. - - const char *valid_extensions; // A string listing probably rom extensions the core will be able to load, separated with pipe. - // I.e. "bin|rom|iso". - // Typically used for a GUI to filter out extensions. - - bool need_fullpath; // If true, retro_load_game() is guaranteed to provide a valid pathname in retro_game_info::path. - // ::data and ::size are both invalid. - // If false, ::data and ::size are guaranteed to be valid, but ::path might not be valid. - // This is typically set to true for libretro implementations that must load from file. - // Implementations should strive for setting this to false, as it allows the frontend to perform patching, etc. - - bool block_extract; // If true, the frontend is not allowed to extract any archives before loading the real ROM. - // Necessary for certain libretro implementations that load games from zipped archives. -}; - -struct retro_game_geometry -{ - unsigned base_width; // Nominal video width of game. - unsigned base_height; // Nominal video height of game. - unsigned max_width; // Maximum possible width of game. - unsigned max_height; // Maximum possible height of game. - - float aspect_ratio; // Nominal aspect ratio of game. If aspect_ratio is <= 0.0, - // an aspect ratio of base_width / base_height is assumed. - // A frontend could override this setting if desired. -}; - -struct retro_system_timing -{ - double fps; // FPS of video content. - double sample_rate; // Sampling rate of audio. -}; - -struct retro_system_av_info -{ - struct retro_game_geometry geometry; - struct retro_system_timing timing; -}; - -struct retro_variable -{ - const char *key; // Variable to query in RETRO_ENVIRONMENT_GET_VARIABLE. - // If NULL, obtains the complete environment string if more complex parsing is necessary. - // The environment string is formatted as key-value pairs delimited by semicolons as so: - // "key1=value1;key2=value2;..." - const char *value; // Value to be obtained. If key does not exist, it is set to NULL. -}; - -struct retro_game_info -{ - const char *path; // Path to game, UTF-8 encoded. Usually used as a reference. - // May be NULL if rom was loaded from stdin or similar. - // retro_system_info::need_fullpath guaranteed that this path is valid. - const void *data; // Memory buffer of loaded game. Will be NULL if need_fullpath was set. - size_t size; // Size of memory buffer. - const char *meta; // String of implementation specific meta-data. -}; - -// Callbacks -// -// Environment callback. Gives implementations a way of performing uncommon tasks. Extensible. -typedef bool (*retro_environment_t)(unsigned cmd, void *data); - -// Render a frame. Pixel format is 15-bit 0RGB1555 native endian unless changed (see RETRO_ENVIRONMENT_SET_PIXEL_FORMAT). -// Width and height specify dimensions of buffer. -// Pitch specifices length in bytes between two lines in buffer. -// For performance reasons, it is highly recommended to have a frame that is packed in memory, i.e. pitch == width * byte_per_pixel. -// Certain graphic APIs, such as OpenGL ES, do not like textures that are not packed in memory. -typedef void (*retro_video_refresh_t)(const void *data, unsigned width, unsigned height, size_t pitch); - -// Renders a single audio frame. Should only be used if implementation generates a single sample at a time. -// Format is signed 16-bit native endian. -typedef void (*retro_audio_sample_t)(int16_t left, int16_t right); -// Renders multiple audio frames in one go. One frame is defined as a sample of left and right channels, interleaved. -// I.e. int16_t buf[4] = { l, r, l, r }; would be 2 frames. -// Only one of the audio callbacks must ever be used. -typedef size_t (*retro_audio_sample_batch_t)(const int16_t *data, size_t frames); - -// Polls input. -typedef void (*retro_input_poll_t)(void); -// Queries for input for player 'port'. device will be masked with RETRO_DEVICE_MASK. -// Specialization of devices such as RETRO_DEVICE_JOYPAD_MULTITAP that have been set with retro_set_controller_port_device() -// will still use the higher level RETRO_DEVICE_JOYPAD to request input. -typedef int16_t (*retro_input_state_t)(unsigned port, unsigned device, unsigned index, unsigned id); - -// Sets callbacks. retro_set_environment() is guaranteed to be called before retro_init(). -// The rest of the set_* functions are guaranteed to have been called before the first call to retro_run() is made. -void retro_set_environment(retro_environment_t); -void retro_set_video_refresh(retro_video_refresh_t); -void retro_set_audio_sample(retro_audio_sample_t); -void retro_set_audio_sample_batch(retro_audio_sample_batch_t); -void retro_set_input_poll(retro_input_poll_t); -void retro_set_input_state(retro_input_state_t); - -// Library global initialization/deinitialization. -void retro_init(void); -void retro_deinit(void); - -// Must return RETRO_API_VERSION. Used to validate ABI compatibility when the API is revised. -unsigned retro_api_version(void); - -// Gets statically known system info. Pointers provided in *info must be statically allocated. -// Can be called at any time, even before retro_init(). -void retro_get_system_info(struct retro_system_info *info); - -// Gets information about system audio/video timings and geometry. -// Can be called only after retro_load_game() has successfully completed. -// NOTE: The implementation of this function might not initialize every variable if needed. -// E.g. geom.aspect_ratio might not be initialized if core doesn't desire a particular aspect ratio. -void retro_get_system_av_info(struct retro_system_av_info *info); - -// Sets device to be used for player 'port'. -void retro_set_controller_port_device(unsigned port, unsigned device); - -// Resets the current game. -void retro_reset(void); - -// Runs the game for one video frame. -// During retro_run(), input_poll callback must be called at least once. -// -// If a frame is not rendered for reasons where a game "dropped" a frame, -// this still counts as a frame, and retro_run() should explicitly dupe a frame if GET_CAN_DUPE returns true. -// In this case, the video callback can take a NULL argument for data. -void retro_run(void); - -// Returns the amount of data the implementation requires to serialize internal state (save states). -// Beetween calls to retro_load_game() and retro_unload_game(), the returned size is never allowed to be larger than a previous returned value, to -// ensure that the frontend can allocate a save state buffer once. -size_t retro_serialize_size(void); - -// Serializes internal state. If failed, or size is lower than retro_serialize_size(), it should return false, true otherwise. -bool retro_serialize(void *data, size_t size); -bool retro_unserialize(const void *data, size_t size); - -void retro_cheat_reset(void); -void retro_cheat_set(unsigned index, bool enabled, const char *code); - -// Loads a game. -bool retro_load_game(const struct retro_game_info *game); - -// Loads a "special" kind of game. Should not be used except in extreme cases. -bool retro_load_game_special( - unsigned game_type, - const struct retro_game_info *info, size_t num_info -); - -// Unloads a currently loaded game. -void retro_unload_game(void); - -// Gets region of game. -unsigned retro_get_region(void); - -// Gets region of memory. -void *retro_get_memory_data(unsigned id); -size_t retro_get_memory_size(unsigned id); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/platform/libretro/3ds/3ds_utils.c b/platform/libretro/3ds/3ds_utils.c new file mode 100644 index 00000000..e0f76ca2 --- /dev/null +++ b/platform/libretro/3ds/3ds_utils.c @@ -0,0 +1,78 @@ + +#include "3ds_utils.h" + +typedef int (*ctr_callback_type)(void); + +int srvGetServiceHandle(unsigned int* out, const char* name); +int svcCloseHandle(unsigned int handle); +int svcBackdoor(ctr_callback_type); + + +static void ctr_enable_all_svc_kernel(void) +{ + __asm__ volatile("cpsid aif"); + + unsigned int* svc_access_control = *(*(unsigned int***)0xFFFF9000 + 0x22) - 0x6; + + svc_access_control[0]=0xFFFFFFFE; + svc_access_control[1]=0xFFFFFFFF; + svc_access_control[2]=0xFFFFFFFF; + svc_access_control[3]=0x3FFFFFFF; +} + + +static void ctr_invalidate_ICache_kernel(void) +{ + __asm__ volatile( + "cpsid aif\n\t" + "mov r0, #0\n\t" + "mcr p15, 0, r0, c7, c5, 0\n\t"); +} + +static void ctr_flush_DCache_kernel(void) +{ + __asm__ volatile( + "cpsid aif\n\t" + "mov r0, #0\n\t" + "mcr p15, 0, r0, c7, c10, 0\n\t"); + +} + + +static void ctr_enable_all_svc(void) +{ + svcBackdoor((ctr_callback_type)ctr_enable_all_svc_kernel); +} + +void ctr_invalidate_ICache(void) +{ +// __asm__ volatile("svc 0x2E\n\t"); + svcBackdoor((ctr_callback_type)ctr_invalidate_ICache_kernel); + +} + +void ctr_flush_DCache(void) +{ +// __asm__ volatile("svc 0x4B\n\t"); + svcBackdoor((ctr_callback_type)ctr_flush_DCache_kernel); +} + + +void ctr_flush_invalidate_cache(void) +{ + ctr_flush_DCache(); + ctr_invalidate_ICache(); +} + +int ctr_svchack_init(void) +{ + extern unsigned int __service_ptr; + + if(__service_ptr) + return 0; + + /* CFW */ + ctr_enable_all_svc(); + return 1; +} + diff --git a/platform/libretro/3ds/3ds_utils.h b/platform/libretro/3ds/3ds_utils.h new file mode 100644 index 00000000..fe97985c --- /dev/null +++ b/platform/libretro/3ds/3ds_utils.h @@ -0,0 +1,16 @@ +#ifndef _3DS_UTILS_H +#define _3DS_UTILS_H + +void ctr_invalidate_ICache(void); +void ctr_flush_DCache(void); + +void ctr_flush_invalidate_cache(void); + +int ctr_svchack_init(void); + +#include +#define DEBUG_HOLD() do{printf("%s@%s:%d.\n",__FUNCTION__, __FILE__, __LINE__);fflush(stdout);wait_for_input();}while(0) + +void wait_for_input(void); + +#endif // _3DS_UTILS_H diff --git a/platform/libretro/libretro.c b/platform/libretro/libretro.c new file mode 100644 index 00000000..934b6859 --- /dev/null +++ b/platform/libretro/libretro.c @@ -0,0 +1,1421 @@ +/* + * libretro core glue for PicoDrive + * (C) notaz, 2013 + * (C) aliaspider, 2016 + * (C) Daniel De Matteis, 2013 + * + * This work is licensed under the terms of MAME license. + * See COPYING file in the top-level directory. + */ + +#define _GNU_SOURCE 1 // mremap +#include +#include +#include +#ifndef _WIN32 +#ifndef NO_MMAP +#include +#endif +#else +#include +#include +#include +#endif +#include +#ifdef __MACH__ +#include +#endif + +#ifdef _3DS +#include "3ds/3ds_utils.h" +#define MEMOP_MAP 4 +#define MEMOP_UNMAP 5 +#define MEMOP_PROT 6 + +int svcDuplicateHandle(unsigned int* out, unsigned int original); +int svcCloseHandle(unsigned int handle); +int svcControlProcessMemory(unsigned int process, void* addr0, void* addr1, + unsigned int size, unsigned int type, unsigned int perm); +void* linearMemAlign(size_t size, size_t alignment); +void linearFree(void* mem); + +static int ctr_svchack_successful = 0; + +#elif defined(VITA) +#define TARGET_SIZE_2 24 // 2^24 = 16 megabytes + +#include +static int sceBlock; +int getVMBlock(); +int _newlib_vm_size_user = 1 << TARGET_SIZE_2; + +#endif + +#include +#include +#include +#include "../common/input_pico.h" +#include "../common/version.h" +#include "libretro.h" + +static retro_log_printf_t log_cb; +static retro_video_refresh_t video_cb; +static retro_input_poll_t input_poll_cb; +static retro_input_state_t input_state_cb; +static retro_environment_t environ_cb; +static retro_audio_sample_batch_t audio_batch_cb; + +#define VOUT_MAX_WIDTH 320 +#define VOUT_MAX_HEIGHT 240 + +static const float VOUT_PAR = 0.0; +static const float VOUT_4_3 = (224.0f * (4.0f / 3.0f)); +static const float VOUT_CRT = (224.0f * 1.29911f); + +bool show_overscan = false; + +static void *vout_buf; +static int vout_width, vout_height, vout_offset; +static float user_vout_width = 0.0; + +#ifdef _MSC_VER +static short sndBuffer[2*44100/50]; +#else +static short __attribute__((aligned(4))) sndBuffer[2*44100/50]; +#endif + +static void snd_write(int len); + +#ifdef _WIN32 +#define SLASH '\\' +#else +#define SLASH '/' +#endif + +/* functions called by the core */ + +void cache_flush_d_inval_i(void *start, void *end) +{ +#ifdef __arm__ + size_t len = (char *)end - (char *)start; +#if defined(__BLACKBERRY_QNX__) + msync(start, end - start, MS_SYNC | MS_CACHE_ONLY | MS_INVALIDATE_ICACHE); +#elif defined(__MACH__) + sys_dcache_flush(start, len); + sys_icache_invalidate(start, len); +#elif defined(_3DS) + ctr_flush_invalidate_cache(); +#elif defined(VITA) + sceKernelSyncVMDomain(sceBlock, start, len); +#else + __clear_cache(start, end); +#endif +#endif +} + +#ifdef _WIN32 +/* mmap() replacement for Windows + * + * Author: Mike Frysinger + * Placed into the public domain + */ + +/* References: + * CreateFileMapping: http://msdn.microsoft.com/en-us/library/aa366537(VS.85).aspx + * CloseHandle: http://msdn.microsoft.com/en-us/library/ms724211(VS.85).aspx + * MapViewOfFile: http://msdn.microsoft.com/en-us/library/aa366761(VS.85).aspx + * UnmapViewOfFile: http://msdn.microsoft.com/en-us/library/aa366882(VS.85).aspx + */ + +#define PROT_READ 0x1 +#define PROT_WRITE 0x2 +/* This flag is only available in WinXP+ */ +#ifdef FILE_MAP_EXECUTE +#define PROT_EXEC 0x4 +#else +#define PROT_EXEC 0x0 +#define FILE_MAP_EXECUTE 0 +#endif + +#define MAP_SHARED 0x01 +#define MAP_PRIVATE 0x02 +#define MAP_ANONYMOUS 0x20 +#define MAP_ANON MAP_ANONYMOUS +#define MAP_FAILED ((void *) -1) + +#ifdef __USE_FILE_OFFSET64 +# define DWORD_HI(x) (x >> 32) +# define DWORD_LO(x) ((x) & 0xffffffff) +#else +# define DWORD_HI(x) (0) +# define DWORD_LO(x) (x) +#endif + +static void *mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset) +{ + uint32_t flProtect, dwDesiredAccess; + off_t end; + HANDLE mmap_fd, h; + void *ret; + + if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) + return MAP_FAILED; + if (fd == -1) { + if (!(flags & MAP_ANON) || offset) + return MAP_FAILED; + } else if (flags & MAP_ANON) + return MAP_FAILED; + + if (prot & PROT_WRITE) { + if (prot & PROT_EXEC) + flProtect = PAGE_EXECUTE_READWRITE; + else + flProtect = PAGE_READWRITE; + } else if (prot & PROT_EXEC) { + if (prot & PROT_READ) + flProtect = PAGE_EXECUTE_READ; + else if (prot & PROT_EXEC) + flProtect = PAGE_EXECUTE; + } else + flProtect = PAGE_READONLY; + + end = length + offset; + + if (fd == -1) + mmap_fd = INVALID_HANDLE_VALUE; + else + mmap_fd = (HANDLE)_get_osfhandle(fd); + h = CreateFileMapping(mmap_fd, NULL, flProtect, DWORD_HI(end), DWORD_LO(end), NULL); + if (h == NULL) + return MAP_FAILED; + + if (prot & PROT_WRITE) + dwDesiredAccess = FILE_MAP_WRITE; + else + dwDesiredAccess = FILE_MAP_READ; + if (prot & PROT_EXEC) + dwDesiredAccess |= FILE_MAP_EXECUTE; + if (flags & MAP_PRIVATE) + dwDesiredAccess |= FILE_MAP_COPY; + ret = MapViewOfFile(h, dwDesiredAccess, DWORD_HI(offset), DWORD_LO(offset), length); + if (ret == NULL) { + CloseHandle(h); + ret = MAP_FAILED; + } + return ret; +} + +static void munmap(void *addr, size_t length) +{ + UnmapViewOfFile(addr); + /* ruh-ro, we leaked handle from CreateFileMapping() ... */ +} +#elif defined(NO_MMAP) +#define PROT_EXEC 0x04 +#define MAP_FAILED 0 +#define PROT_READ 0 +#define PROT_WRITE 0 +#define MAP_PRIVATE 0 +#define MAP_ANONYMOUS 0 + +void* mmap(void *desired_addr, size_t len, int mmap_prot, int mmap_flags, int fildes, size_t off) +{ + return malloc(len); +} + +void munmap(void *base_addr, size_t len) +{ + free(base_addr); +} + +int mprotect(void *addr, size_t len, int prot) +{ + /* stub - not really needed at this point since this codepath has no dynarecs */ + return 0; +} + +#endif + +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif + +#ifdef _3DS +typedef struct +{ + unsigned int requested_map; + void* buffer; +}pico_mmap_t; + +pico_mmap_t pico_mmaps[] = { + {0x02000000, 0}, + {0x06000000, 0}, + {NULL, 0} +}; + +void *plat_mmap(unsigned long addr, size_t size, int need_exec, int is_fixed) +{ + (void)is_fixed; + + if (ctr_svchack_successful) + { + pico_mmap_t* pico_mmap; + + for (pico_mmap = pico_mmaps; pico_mmap->requested_map; pico_mmap++) + { + if ((pico_mmap->requested_map == addr)) + { + unsigned int ptr_aligned, tmp; + unsigned int currentHandle; + unsigned int perm = 0b011; + + if (need_exec) + perm = 0b111; + + size = (size + 0xFFF) & ~0xFFF; + pico_mmap->buffer = malloc(size + 0x1000); + ptr_aligned = (((unsigned int)pico_mmap->buffer) + 0xFFF) & ~0xFFF; + + svcDuplicateHandle(¤tHandle, 0xFFFF8001); + + if(svcControlProcessMemory(currentHandle, pico_mmap->requested_map, ptr_aligned, size, MEMOP_MAP, perm) < 0) + { + if (log_cb) + log_cb(RETRO_LOG_ERROR, "could not map memory @0x%08X\n", pico_mmap->requested_map); + exit(1); + } + + svcCloseHandle(currentHandle); + return (void*)pico_mmap->requested_map; + } + } + } + + return malloc(size); +} + +void *plat_mremap(void *ptr, size_t oldsize, size_t newsize) +{ + if (ctr_svchack_successful) + { + pico_mmap_t* pico_mmap; + + for (pico_mmap = pico_mmaps; pico_mmap->requested_map; pico_mmap++) + { + if ((pico_mmap->requested_map == (unsigned int)ptr)) + { + unsigned int ptr_aligned; + unsigned int currentHandle; + void* tmp; + + oldsize = (oldsize + 0xFFF) & ~0xFFF; + newsize = (newsize + 0xFFF) & ~0xFFF; + ptr_aligned = (((unsigned int)pico_mmap->buffer) + 0xFFF) & ~0xFFF; + + svcDuplicateHandle(¤tHandle, 0xFFFF8001); + + svcControlProcessMemory(currentHandle, pico_mmap->requested_map, ptr_aligned, oldsize, MEMOP_UNMAP, 0b011); + + tmp = realloc(pico_mmap->buffer, newsize + 0x1000); + if(!tmp) + return NULL; + + pico_mmap->buffer = tmp; + ptr_aligned = (((unsigned int)pico_mmap->buffer) + 0xFFF) & ~0xFFF; + + svcControlProcessMemory(currentHandle, pico_mmap->requested_map, ptr_aligned, newsize, MEMOP_MAP, 0x3); + + svcCloseHandle(currentHandle); + + return ptr; + } + } + } + + return realloc(ptr, newsize); + +} +void plat_munmap(void *ptr, size_t size) +{ + if (ctr_svchack_successful) + { + pico_mmap_t* pico_mmap; + + for (pico_mmap = pico_mmaps; pico_mmap->requested_map; pico_mmap++) + { + if ((pico_mmap->requested_map == (unsigned int)ptr)) + { + unsigned int ptr_aligned; + unsigned int currentHandle; + + size = (size + 0xFFF) & ~0xFFF; + ptr_aligned = (((unsigned int)pico_mmap->buffer) + 0xFFF) & ~0xFFF; + + svcDuplicateHandle(¤tHandle, 0xFFFF8001); + + svcControlProcessMemory(currentHandle, (void*)pico_mmap->requested_map, (void*)ptr_aligned, size, MEMOP_UNMAP, 0b011); + + svcCloseHandle(currentHandle); + + free(pico_mmap->buffer); + pico_mmap->buffer = NULL; + return; + } + } + } + + free(ptr); +} + +#else +void *plat_mmap(unsigned long addr, size_t size, int need_exec, int is_fixed) +{ + int flags = MAP_PRIVATE | MAP_ANONYMOUS; + void *req, *ret; + + req = (void *)addr; + ret = mmap(req, size, PROT_READ | PROT_WRITE, flags, -1, 0); + if (ret == MAP_FAILED) { + if (log_cb) + log_cb(RETRO_LOG_ERROR, "mmap(%08lx, %zd) failed: %d\n", addr, size, errno); + return NULL; + } + + if (addr != 0 && ret != (void *)addr) { + if (log_cb) + log_cb(RETRO_LOG_WARN, "warning: wanted to map @%08lx, got %p\n", + addr, ret); + + if (is_fixed) { + munmap(ret, size); + return NULL; + } + } + + return ret; +} + +void *plat_mremap(void *ptr, size_t oldsize, size_t newsize) +{ +#ifdef __linux__ + void *ret = mremap(ptr, oldsize, newsize, 0); + if (ret == MAP_FAILED) + return NULL; + + return ret; +#else + void *tmp, *ret; + size_t preserve_size; + + preserve_size = oldsize; + if (preserve_size > newsize) + preserve_size = newsize; + tmp = malloc(preserve_size); + if (tmp == NULL) + return NULL; + memcpy(tmp, ptr, preserve_size); + + munmap(ptr, oldsize); + ret = mmap(ptr, newsize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (ret == MAP_FAILED) { + free(tmp); + return NULL; + } + memcpy(ret, tmp, preserve_size); + free(tmp); + return ret; +#endif +} + +void plat_munmap(void *ptr, size_t size) +{ + if (ptr != NULL) + munmap(ptr, size); +} +#endif + +int plat_mem_set_exec(void *ptr, size_t size) +{ +#ifdef _WIN32 + int ret = VirtualProtect(ptr,size,PAGE_EXECUTE_READWRITE,0); + if (ret == 0 && log_cb) + log_cb(RETRO_LOG_ERROR, "mprotect(%p, %zd) failed: %d\n", ptr, size, 0); +#elif defined(_3DS) + int ret = -1; + if (ctr_svchack_successful) + { + unsigned int currentHandle; + svcDuplicateHandle(¤tHandle, 0xFFFF8001); + ret = svcControlProcessMemory(currentHandle, ptr, 0x0, + size, MEMOP_PROT, 0b111); + svcCloseHandle(currentHandle); + ctr_flush_invalidate_cache(); + + } + else + { + if (log_cb) + log_cb(RETRO_LOG_ERROR, "plat_mem_set_exec called with no svcControlProcessMemory access\n"); + exit(1); + } + +#elif defined(VITA) + int ret = sceKernelOpenVMDomain(); +#else + int ret = mprotect(ptr, size, PROT_READ | PROT_WRITE | PROT_EXEC); + if (ret != 0 && log_cb) + log_cb(RETRO_LOG_ERROR, "mprotect(%p, %zd) failed: %d\n", ptr, size, errno); +#endif + return ret; +} + +void emu_video_mode_change(int start_line, int line_count, int is_32cols) +{ + memset(vout_buf, 0, 320 * 240 * 2); + vout_width = is_32cols ? 256 : 320; + PicoDrawSetOutBuf(vout_buf, vout_width * 2); + if (show_overscan == true) line_count += 16; + if (show_overscan == true) start_line -= 8; + + vout_height = line_count; + vout_offset = vout_width * start_line; + + // Update the geometry + struct retro_system_av_info av_info; + retro_get_system_av_info(&av_info); + environ_cb(RETRO_ENVIRONMENT_SET_GEOMETRY, &av_info); +} + +void emu_32x_startup(void) +{ +} + +void lprintf(const char *fmt, ...) +{ + char buffer[256]; + va_list ap; + va_start(ap, fmt); + vsprintf(buffer, fmt, ap); + /* TODO - add 'level' param for warning/error messages? */ + if (log_cb) + log_cb(RETRO_LOG_INFO, "%s", buffer); + va_end(ap); +} + +/* libretro */ +void retro_set_environment(retro_environment_t cb) +{ + static const struct retro_variable vars[] = { + { "picodrive_input1", "Input device 1; 3 button pad|6 button pad|None" }, + { "picodrive_input2", "Input device 2; 3 button pad|6 button pad|None" }, + { "picodrive_sprlim", "No sprite limit; disabled|enabled" }, + { "picodrive_ramcart", "MegaCD RAM cart; disabled|enabled" }, + { "picodrive_region", "Region; Auto|Japan NTSC|Japan PAL|US|Europe" }, + { "picodrive_region_fps", "Region FPS; Auto|NTSC|PAL" }, + { "picodrive_aspect", "Core-provided aspect ratio; PAR|4/3|CRT" }, + { "picodrive_overscan", "Show Overscan; disabled|enabled" }, +#ifdef DRC_SH2 + { "picodrive_drc", "Dynamic recompilers; enabled|disabled" }, +#endif + { NULL, NULL }, + }; + + environ_cb = cb; + + cb(RETRO_ENVIRONMENT_SET_VARIABLES, (void *)vars); +} + +void retro_set_video_refresh(retro_video_refresh_t cb) { video_cb = cb; } +void retro_set_audio_sample(retro_audio_sample_t cb) { (void)cb; } +void retro_set_audio_sample_batch(retro_audio_sample_batch_t cb) { audio_batch_cb = cb; } +void retro_set_input_poll(retro_input_poll_t cb) { input_poll_cb = cb; } +void retro_set_input_state(retro_input_state_t cb) { input_state_cb = cb; } + +unsigned retro_api_version(void) +{ + return RETRO_API_VERSION; +} + +void retro_set_controller_port_device(unsigned port, unsigned device) +{ +} + +void retro_get_system_info(struct retro_system_info *info) +{ + memset(info, 0, sizeof(*info)); + info->library_name = "PicoDrive"; +#ifndef GIT_VERSION +#define GIT_VERSION "" +#endif + info->library_version = VERSION GIT_VERSION; + info->valid_extensions = "bin|gen|smd|md|32x|cue|iso|sms"; + info->need_fullpath = true; +} + +void retro_get_system_av_info(struct retro_system_av_info *info) +{ + memset(info, 0, sizeof(*info)); + info->timing.fps = Pico.m.pal ? 50 : 60; + info->timing.sample_rate = 44100; + info->geometry.base_width = vout_width; + info->geometry.base_height = vout_height; + info->geometry.max_width = vout_width; + info->geometry.max_height = vout_height; + + float common_width = vout_width; + if (user_vout_width != 0) + common_width = user_vout_width; + + info->geometry.aspect_ratio = common_width / vout_height; +} + +/* savestates */ +struct savestate_state { + const char *load_buf; + char *save_buf; + size_t size; + size_t pos; +}; + +size_t state_read(void *p, size_t size, size_t nmemb, void *file) +{ + struct savestate_state *state = file; + size_t bsize = size * nmemb; + + if (state->pos + bsize > state->size) { + if (log_cb) + log_cb(RETRO_LOG_ERROR, "savestate error: %u/%u\n", + state->pos + bsize, state->size); + bsize = state->size - state->pos; + if ((int)bsize <= 0) + return 0; + } + + memcpy(p, state->load_buf + state->pos, bsize); + state->pos += bsize; + return bsize; +} + +size_t state_write(void *p, size_t size, size_t nmemb, void *file) +{ + struct savestate_state *state = file; + size_t bsize = size * nmemb; + + if (state->pos + bsize > state->size) { + if (log_cb) + log_cb(RETRO_LOG_ERROR, "savestate error: %u/%u\n", + state->pos + bsize, state->size); + bsize = state->size - state->pos; + if ((int)bsize <= 0) + return 0; + } + + memcpy(state->save_buf + state->pos, p, bsize); + state->pos += bsize; + return bsize; +} + +size_t state_skip(void *p, size_t size, size_t nmemb, void *file) +{ + struct savestate_state *state = file; + size_t bsize = size * nmemb; + + state->pos += bsize; + return bsize; +} + +size_t state_eof(void *file) +{ + struct savestate_state *state = file; + + return state->pos >= state->size; +} + +int state_fseek(void *file, long offset, int whence) +{ + struct savestate_state *state = file; + + switch (whence) { + case SEEK_SET: + state->pos = offset; + break; + case SEEK_CUR: + state->pos += offset; + break; + case SEEK_END: + state->pos = state->size + offset; + break; + } + return (int)state->pos; +} + +/* savestate sizes vary wildly depending if cd/32x or + * carthw is active, so run the whole thing to get size */ +size_t retro_serialize_size(void) +{ + struct savestate_state state = { 0, }; + int ret; + + ret = PicoStateFP(&state, 1, NULL, state_skip, NULL, state_fseek); + if (ret != 0) + return 0; + + return state.pos; +} + +bool retro_serialize(void *data, size_t size) +{ + struct savestate_state state = { 0, }; + int ret; + + state.save_buf = data; + state.size = size; + state.pos = 0; + + ret = PicoStateFP(&state, 1, NULL, state_write, + NULL, state_fseek); + return ret == 0; +} + +bool retro_unserialize(const void *data, size_t size) +{ + struct savestate_state state = { 0, }; + int ret; + + state.load_buf = data; + state.size = size; + state.pos = 0; + + ret = PicoStateFP(&state, 0, state_read, NULL, + state_eof, state_fseek); + return ret == 0; +} + +typedef struct patch +{ + unsigned int addr; + unsigned short data; + unsigned char comp; +} patch; + +extern void decode(char *buff, patch *dest); +extern uint16_t m68k_read16(uint32_t a); +extern void m68k_write16(uint32_t a, uint16_t d); + +void retro_cheat_reset(void) +{ + int i=0; + unsigned int addr; + + for (i = 0; i < PicoPatchCount; i++) + { + addr = PicoPatches[i].addr; + if (addr < Pico.romsize) { + if (PicoPatches[i].active) + *(unsigned short *)(Pico.rom + addr) = PicoPatches[i].data_old; + } else { + if (PicoPatches[i].active) + m68k_write16(PicoPatches[i].addr,PicoPatches[i].data_old); + } + } + + PicoPatchUnload(); +} + +void retro_cheat_set(unsigned index, bool enabled, const char *code) +{ + patch pt; + int array_len = PicoPatchCount; + char codeCopy[256]; + char *buff; + + if (code=='\0') return; + strcpy(codeCopy,code); + buff = strtok(codeCopy,"+"); + + while (buff != NULL) + { + decode(buff, &pt); + if (pt.addr == (uint32_t) -1 || pt.data == (uint16_t) -1) + { + log_cb(RETRO_LOG_ERROR,"CHEATS: Invalid code: %s\n",buff); + return; + } + + /* code was good, add it */ + if (array_len < PicoPatchCount + 1) + { + void *ptr; + array_len *= 2; + array_len++; + ptr = realloc(PicoPatches, array_len * sizeof(PicoPatches[0])); + if (ptr == NULL) { + log_cb(RETRO_LOG_ERROR,"CHEATS: Failed to allocate memory for: %s\n",buff); + return; + } + PicoPatches = ptr; + } + strcpy(PicoPatches[PicoPatchCount].code, buff); + + PicoPatches[PicoPatchCount].active = enabled; + PicoPatches[PicoPatchCount].addr = pt.addr; + PicoPatches[PicoPatchCount].data = pt.data; + PicoPatches[PicoPatchCount].comp = pt.comp; + if (PicoPatches[PicoPatchCount].addr < Pico.romsize) + PicoPatches[PicoPatchCount].data_old = *(uint16_t *)(Pico.rom + PicoPatches[PicoPatchCount].addr); + else + PicoPatches[PicoPatchCount].data_old = (uint16_t) m68k_read16(PicoPatches[PicoPatchCount].addr); + PicoPatchCount++; + + buff = strtok(NULL,"+"); + } +} + +/* multidisk support */ +static bool disk_ejected; +static unsigned int disk_current_index; +static unsigned int disk_count; +static struct disks_state { + char *fname; +} disks[8]; + +static bool disk_set_eject_state(bool ejected) +{ + // TODO? + disk_ejected = ejected; + return true; +} + +static bool disk_get_eject_state(void) +{ + return disk_ejected; +} + +static unsigned int disk_get_image_index(void) +{ + return disk_current_index; +} + +static bool disk_set_image_index(unsigned int index) +{ + enum cd_img_type cd_type; + int ret; + + if (index >= sizeof(disks) / sizeof(disks[0])) + return false; + + if (disks[index].fname == NULL) { + if (log_cb) + log_cb(RETRO_LOG_ERROR, "missing disk #%u\n", index); + + // RetroArch specifies "no disk" with index == count, + // so don't fail here.. + disk_current_index = index; + return true; + } + + if (log_cb) + log_cb(RETRO_LOG_INFO, "switching to disk %u: \"%s\"\n", index, + disks[index].fname); + + ret = -1; + cd_type = PicoCdCheck(disks[index].fname, NULL); + if (cd_type != CIT_NOT_CD) + ret = cdd_load(disks[index].fname, cd_type); + if (ret != 0) { + if (log_cb) + log_cb(RETRO_LOG_ERROR, "Load failed, invalid CD image?\n"); + return 0; + } + + disk_current_index = index; + return true; +} + +static unsigned int disk_get_num_images(void) +{ + return disk_count; +} + +static bool disk_replace_image_index(unsigned index, + const struct retro_game_info *info) +{ + bool ret = true; + + if (index >= sizeof(disks) / sizeof(disks[0])) + return false; + + if (disks[index].fname != NULL) + free(disks[index].fname); + disks[index].fname = NULL; + + if (info != NULL) { + disks[index].fname = strdup(info->path); + if (index == disk_current_index) + ret = disk_set_image_index(index); + } + + return ret; +} + +static bool disk_add_image_index(void) +{ + if (disk_count >= sizeof(disks) / sizeof(disks[0])) + return false; + + disk_count++; + return true; +} + +static struct retro_disk_control_callback disk_control = { + disk_set_eject_state, + disk_get_eject_state, + disk_get_image_index, + disk_set_image_index, + disk_get_num_images, + disk_replace_image_index, + disk_add_image_index, +}; + +static void disk_tray_open(void) +{ + if (log_cb) + log_cb(RETRO_LOG_INFO, "cd tray open\n"); + disk_ejected = 1; +} + +static void disk_tray_close(void) +{ + if (log_cb) + log_cb(RETRO_LOG_INFO, "cd tray close\n"); + disk_ejected = 0; +} + + +static const char * const biosfiles_us[] = { + "us_scd2_9306", "SegaCDBIOS9303", "us_scd1_9210", "bios_CD_U" +}; +static const char * const biosfiles_eu[] = { + "eu_mcd2_9306", "eu_mcd2_9303", "eu_mcd1_9210", "bios_CD_E" +}; +static const char * const biosfiles_jp[] = { + "jp_mcd2_921222", "jp_mcd1_9112", "jp_mcd1_9111", "bios_CD_J" +}; + +static void make_system_path(char *buf, size_t buf_size, + const char *name, const char *ext) +{ + const char *dir = NULL; + + if (environ_cb(RETRO_ENVIRONMENT_GET_SYSTEM_DIRECTORY, &dir) && dir) { + snprintf(buf, buf_size, "%s%c%s%s", dir, SLASH, name, ext); + } + else { + snprintf(buf, buf_size, "%s%s", name, ext); + } +} + +static const char *find_bios(int *region, const char *cd_fname) +{ + const char * const *files; + static char path[256]; + int i, count; + FILE *f = NULL; + + if (*region == 4) { // US + files = biosfiles_us; + count = sizeof(biosfiles_us) / sizeof(char *); + } else if (*region == 8) { // EU + files = biosfiles_eu; + count = sizeof(biosfiles_eu) / sizeof(char *); + } else if (*region == 1 || *region == 2) { + files = biosfiles_jp; + count = sizeof(biosfiles_jp) / sizeof(char *); + } else { + return NULL; + } + + for (i = 0; i < count; i++) + { + make_system_path(path, sizeof(path), files[i], ".bin"); + f = fopen(path, "rb"); + if (f != NULL) + break; + + make_system_path(path, sizeof(path), files[i], ".zip"); + f = fopen(path, "rb"); + if (f != NULL) + break; + } + + if (f != NULL) { + if (log_cb) + log_cb(RETRO_LOG_INFO, "using bios: %s\n", path); + fclose(f); + return path; + } + + return NULL; +} + +static void sram_reset() +{ + SRam.data = NULL; + SRam.start = 0; + SRam.end = 0; + SRam.flags = '\0'; + SRam.unused2 = '\0'; + SRam.changed = '\0' ; + SRam.eeprom_type = '\0'; + SRam.unused3 = '\0'; + SRam.eeprom_bit_cl = '\0'; + SRam.eeprom_bit_in = '\0'; + SRam.eeprom_bit_out = '\0'; + SRam.size = 0; +} + +bool retro_load_game(const struct retro_game_info *info) +{ + enum media_type_e media_type; + static char carthw_path[256]; + size_t i; + + struct retro_input_descriptor desc[] = { + { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_LEFT, "D-Pad Left" }, + { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_UP, "D-Pad Up" }, + { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_DOWN, "D-Pad Down" }, + { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_RIGHT, "D-Pad Right" }, + { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_B, "B" }, + { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_A, "C" }, + { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_X, "Y" }, + { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_Y, "A" }, + { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L, "X" }, + { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R, "Z" }, + { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_SELECT,"Mode" }, + { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_START, "Start" }, + + { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_LEFT, "D-Pad Left" }, + { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_UP, "D-Pad Up" }, + { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_DOWN, "D-Pad Down" }, + { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_RIGHT, "D-Pad Right" }, + { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_B, "B" }, + { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_A, "C" }, + { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_X, "Y" }, + { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_Y, "A" }, + { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L, "X" }, + { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R, "Z" }, + { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_SELECT,"Mode" }, + { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_START, "Start" }, + + { 0 }, + }; + + struct retro_input_descriptor desc_sms[] = { + { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_LEFT, "D-Pad Left" }, + { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_UP, "D-Pad Up" }, + { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_DOWN, "D-Pad Down" }, + { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_RIGHT, "D-Pad Right" }, + { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_B, "Button 1 Start" }, + { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_A, "Button 2" }, + { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_START, "Button Pause" }, + + { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_LEFT, "D-Pad Left" }, + { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_UP, "D-Pad Up" }, + { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_DOWN, "D-Pad Down" }, + { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_RIGHT, "D-Pad Right" }, + { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_B, "Button 1 Start" }, + { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_A, "Button 2" }, + { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_START, "Button Pause" }, + + { 0 }, + }; + + sram_reset(); + + enum retro_pixel_format fmt = RETRO_PIXEL_FORMAT_RGB565; + if (!environ_cb(RETRO_ENVIRONMENT_SET_PIXEL_FORMAT, &fmt)) { + if (log_cb) + log_cb(RETRO_LOG_ERROR, "RGB565 support required, sorry\n"); + return false; + } + + if (info == NULL || info->path == NULL) { + if (log_cb) + log_cb(RETRO_LOG_ERROR, "info->path required\n"); + return false; + } + + for (i = 0; i < sizeof(disks) / sizeof(disks[0]); i++) { + if (disks[i].fname != NULL) { + free(disks[i].fname); + disks[i].fname = NULL; + } + } + + disk_current_index = 0; + disk_count = 1; + disks[0].fname = strdup(info->path); + + make_system_path(carthw_path, sizeof(carthw_path), "carthw", ".cfg"); + + media_type = PicoLoadMedia(info->path, carthw_path, + find_bios, NULL); + + switch (media_type) { + case PM_BAD_DETECT: + if (log_cb) + log_cb(RETRO_LOG_ERROR, "Failed to detect ROM/CD image type.\n"); + return false; + case PM_BAD_CD: + if (log_cb) + log_cb(RETRO_LOG_ERROR, "Invalid CD image\n"); + return false; + case PM_BAD_CD_NO_BIOS: + if (log_cb) + log_cb(RETRO_LOG_ERROR, "Missing BIOS\n"); + return false; + case PM_ERROR: + if (log_cb) + log_cb(RETRO_LOG_ERROR, "Load error\n"); + return false; + default: + break; + } + + if (media_type == PM_MARK3) + environ_cb(RETRO_ENVIRONMENT_SET_INPUT_DESCRIPTORS, desc_sms); + else + environ_cb(RETRO_ENVIRONMENT_SET_INPUT_DESCRIPTORS, desc); + + PicoLoopPrepare(); + + PicoWriteSound = snd_write; + memset(sndBuffer, 0, sizeof(sndBuffer)); + PsndOut = sndBuffer; + PsndRerate(0); + + return true; +} + +bool retro_load_game_special(unsigned game_type, const struct retro_game_info *info, size_t num_info) +{ + return false; +} + +void retro_unload_game(void) +{ +} + +unsigned retro_get_region(void) +{ + return Pico.m.pal ? RETRO_REGION_PAL : RETRO_REGION_NTSC; +} + +void *retro_get_memory_data(unsigned type) +{ + uint8_t* data; + + switch(type) + { + case RETRO_MEMORY_SAVE_RAM: + if (PicoAHW & PAHW_MCD) + data = Pico_mcd->bram; + else + data = SRam.data; + break; + case RETRO_MEMORY_SYSTEM_RAM: + if (PicoAHW & PAHW_SMS) + data = Pico.zram; + else + data = Pico.ram; + break; + default: + data = NULL; + break; + } + + return data; +} + +size_t retro_get_memory_size(unsigned type) +{ + unsigned int i; + int sum; + + switch(type) + { + case RETRO_MEMORY_SAVE_RAM: + if (PicoAHW & PAHW_MCD) + // bram + return 0x2000; + + if (Pico.m.frame_count == 0) + return SRam.size; + + // if game doesn't write to sram, don't report it to + // libretro so that RA doesn't write out zeroed .srm + for (i = 0, sum = 0; i < SRam.size; i++) + sum |= SRam.data[i]; + + return (sum != 0) ? SRam.size : 0; + + case RETRO_MEMORY_SYSTEM_RAM: + if (PicoAHW & PAHW_SMS) + return 0x2000; + else + return sizeof(Pico.ram); + + default: + return 0; + } + +} + +void retro_reset(void) +{ + PicoReset(); +} + +static const unsigned short retro_pico_map[] = { + 1 << GBTN_B, + 1 << GBTN_A, + 1 << GBTN_MODE, + 1 << GBTN_START, + 1 << GBTN_UP, + 1 << GBTN_DOWN, + 1 << GBTN_LEFT, + 1 << GBTN_RIGHT, + 1 << GBTN_C, + 1 << GBTN_Y, + 1 << GBTN_X, + 1 << GBTN_Z, +}; +#define RETRO_PICO_MAP_LEN (sizeof(retro_pico_map) / sizeof(retro_pico_map[0])) + +static void snd_write(int len) +{ + audio_batch_cb(PsndOut, len / 4); +} + +static enum input_device input_name_to_val(const char *name) +{ + if (strcmp(name, "3 button pad") == 0) + return PICO_INPUT_PAD_3BTN; + if (strcmp(name, "6 button pad") == 0) + return PICO_INPUT_PAD_6BTN; + if (strcmp(name, "None") == 0) + return PICO_INPUT_NOTHING; + + if (log_cb) + log_cb(RETRO_LOG_WARN, "invalid picodrive_input: '%s'\n", name); + return PICO_INPUT_PAD_3BTN; +} + +static void update_variables(void) +{ + struct retro_variable var; + + var.value = NULL; + var.key = "picodrive_input1"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + PicoSetInputDevice(0, input_name_to_val(var.value)); + + var.value = NULL; + var.key = "picodrive_input2"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + PicoSetInputDevice(1, input_name_to_val(var.value)); + + var.value = NULL; + var.key = "picodrive_sprlim"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { + if (strcmp(var.value, "enabled") == 0) + PicoOpt |= POPT_DIS_SPRITE_LIM; + else + PicoOpt &= ~POPT_DIS_SPRITE_LIM; + } + + var.value = NULL; + var.key = "picodrive_ramcart"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { + if (strcmp(var.value, "enabled") == 0) + PicoOpt |= POPT_EN_MCD_RAMCART; + else + PicoOpt &= ~POPT_EN_MCD_RAMCART; + } + + int OldPicoRegionOverride = PicoRegionOverride; + var.value = NULL; + var.key = "picodrive_region"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { + if (strcmp(var.value, "Auto") == 0) + PicoRegionOverride = 0; + else if (strcmp(var.value, "Japan NTSC") == 0) + PicoRegionOverride = 1; + else if (strcmp(var.value, "Japan PAL") == 0) + PicoRegionOverride = 2; + else if (strcmp(var.value, "US") == 0) + PicoRegionOverride = 4; + else if (strcmp(var.value, "Europe") == 0) + PicoRegionOverride = 8; + } + + int OldPicoRegionFPSOverride = PicoRegionFPSOverride; + var.value = NULL; + var.key = "picodrive_region_fps"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { + if (strcmp(var.value, "Auto") == 0) + PicoRegionFPSOverride = 0; + else if (strcmp(var.value, "NTSC") == 0) + PicoRegionFPSOverride = 1; + else if (strcmp(var.value, "PAL") == 0) + PicoRegionFPSOverride = 2; + } + + // Update region, fps and sound flags if needed + if (PicoRegionOverride != OldPicoRegionOverride || + PicoRegionFPSOverride != OldPicoRegionFPSOverride) + { + PicoDetectRegion(); + PicoLoopPrepare(); + PsndRerate(1); + } + + float old_user_vout_width = user_vout_width; + var.value = NULL; + var.key = "picodrive_aspect"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { + if (strcmp(var.value, "4/3") == 0) + user_vout_width = VOUT_4_3; + else if (strcmp(var.value, "CRT") == 0) + user_vout_width = VOUT_CRT; + else + user_vout_width = VOUT_PAR; + } + + var.value = NULL; + var.key = "picodrive_overscan"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { + if (strcmp(var.value, "enabled") == 0) + show_overscan = true; + else + show_overscan = false; + } + + if (user_vout_width != old_user_vout_width) + { + // Update the geometry + struct retro_system_av_info av_info; + retro_get_system_av_info(&av_info); + environ_cb(RETRO_ENVIRONMENT_SET_GEOMETRY, &av_info); + } + +#ifdef DRC_SH2 + var.value = NULL; + var.key = "picodrive_drc"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { + if (strcmp(var.value, "enabled") == 0) + PicoOpt |= POPT_EN_DRC; + else + PicoOpt &= ~POPT_EN_DRC; + } +#endif +#ifdef _3DS + if(!ctr_svchack_successful) + PicoOpt &= ~POPT_EN_DRC; +#endif +} + +void retro_run(void) +{ + bool updated = false; + int pad, i; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE_UPDATE, &updated) && updated) + update_variables(); + + input_poll_cb(); + + PicoPad[0] = PicoPad[1] = 0; + for (pad = 0; pad < 2; pad++) + for (i = 0; i < RETRO_PICO_MAP_LEN; i++) + if (input_state_cb(pad, RETRO_DEVICE_JOYPAD, 0, i)) + PicoPad[pad] |= retro_pico_map[i]; + + PicoPatchApply(); + PicoFrame(); + + video_cb((short *)vout_buf + vout_offset, + vout_width, vout_height, vout_width * 2); +} + +static void check_system_specs(void) +{ + /* TODO - set different performance level for 32X - 6 for ARM dynarec, higher for interpreter core */ + unsigned level = 5; + environ_cb(RETRO_ENVIRONMENT_SET_PERFORMANCE_LEVEL, &level); +} + +void retro_init(void) +{ + struct retro_log_callback log; + int level; + + level = 0; + environ_cb(RETRO_ENVIRONMENT_SET_PERFORMANCE_LEVEL, &level); + + if (environ_cb(RETRO_ENVIRONMENT_GET_LOG_INTERFACE, &log)) + log_cb = log.log; + else + log_cb = NULL; + + environ_cb(RETRO_ENVIRONMENT_SET_DISK_CONTROL_INTERFACE, &disk_control); + +#ifdef _3DS + ctr_svchack_successful = ctr_svchack_init(); +#elif defined(VITA) + sceBlock = getVMBlock(); +#endif + + PicoOpt = POPT_EN_STEREO|POPT_EN_FM|POPT_EN_PSG|POPT_EN_Z80 + | POPT_EN_MCD_PCM|POPT_EN_MCD_CDDA|POPT_EN_MCD_GFX + | POPT_EN_32X|POPT_EN_PWM + | POPT_ACC_SPRITES|POPT_DIS_32C_BORDER; +#ifdef __arm__ +#ifdef _3DS + if (ctr_svchack_successful) +#endif + PicoOpt |= POPT_EN_DRC; +#endif + PsndRate = 44100; + PicoAutoRgnOrder = 0x184; // US, EU, JP + + vout_width = 320; + vout_height = 240; +#ifdef _3DS + vout_buf = linearMemAlign(VOUT_MAX_WIDTH * VOUT_MAX_HEIGHT * 2, 0x80); +#else + vout_buf = malloc(VOUT_MAX_WIDTH * VOUT_MAX_HEIGHT * 2); +#endif + + PicoInit(); + PicoDrawSetOutFormat(PDF_RGB555, 0); + PicoDrawSetOutBuf(vout_buf, vout_width * 2); + + //PicoMessage = plat_status_msg_busy_next; + PicoMCDopenTray = disk_tray_open; + PicoMCDcloseTray = disk_tray_close; + + update_variables(); +} + +void retro_deinit(void) +{ +#ifdef _3DS + linearFree(vout_buf); +#else + free(vout_buf); +#endif + vout_buf = NULL; + PicoExit(); +} diff --git a/platform/libretro/libretro.h b/platform/libretro/libretro.h new file mode 100644 index 00000000..16c274a1 --- /dev/null +++ b/platform/libretro/libretro.h @@ -0,0 +1,1926 @@ +/* Copyright (C) 2010-2014 The RetroArch team + * + * --------------------------------------------------------------------------------------- + * The following license statement only applies to this libretro API header (libretro.h). + * --------------------------------------------------------------------------------------- + * + * Permission is hereby granted, free of charge, + * to any person obtaining a copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef LIBRETRO_H__ +#define LIBRETRO_H__ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef __cplusplus +#if defined(_MSC_VER) && !defined(SN_TARGET_PS3) +/* Hack applied for MSVC when compiling in C89 mode + * as it isn't C99-compliant. */ +#define bool unsigned char +#define true 1 +#define false 0 +#else +#include +#endif +#endif + +/* Used for checking API/ABI mismatches that can break libretro + * implementations. + * It is not incremented for compatible changes to the API. + */ +#define RETRO_API_VERSION 1 + +/* + * Libretro's fundamental device abstractions. + * + * Libretro's input system consists of some standardized device types, + * such as a joypad (with/without analog), mouse, keyboard, lightgun + * and a pointer. + * + * The functionality of these devices are fixed, and individual cores + * map their own concept of a controller to libretro's abstractions. + * This makes it possible for frontends to map the abstract types to a + * real input device, and not having to worry about binding input + * correctly to arbitrary controller layouts. + */ + +#define RETRO_DEVICE_TYPE_SHIFT 8 +#define RETRO_DEVICE_MASK ((1 << RETRO_DEVICE_TYPE_SHIFT) - 1) +#define RETRO_DEVICE_SUBCLASS(base, id) (((id + 1) << RETRO_DEVICE_TYPE_SHIFT) | base) + +/* Input disabled. */ +#define RETRO_DEVICE_NONE 0 + +/* The JOYPAD is called RetroPad. It is essentially a Super Nintendo + * controller, but with additional L2/R2/L3/R3 buttons, similar to a + * PS1 DualShock. */ +#define RETRO_DEVICE_JOYPAD 1 + +/* The mouse is a simple mouse, similar to Super Nintendo's mouse. + * X and Y coordinates are reported relatively to last poll (poll callback). + * It is up to the libretro implementation to keep track of where the mouse + * pointer is supposed to be on the screen. + * The frontend must make sure not to interfere with its own hardware + * mouse pointer. + */ +#define RETRO_DEVICE_MOUSE 2 + +/* KEYBOARD device lets one poll for raw key pressed. + * It is poll based, so input callback will return with the current + * pressed state. + * For event/text based keyboard input, see + * RETRO_ENVIRONMENT_SET_KEYBOARD_CALLBACK. + */ +#define RETRO_DEVICE_KEYBOARD 3 + +/* Lightgun X/Y coordinates are reported relatively to last poll, + * similar to mouse. */ +#define RETRO_DEVICE_LIGHTGUN 4 + +/* The ANALOG device is an extension to JOYPAD (RetroPad). + * Similar to DualShock it adds two analog sticks. + * This is treated as a separate device type as it returns values in the + * full analog range of [-0x8000, 0x7fff]. Positive X axis is right. + * Positive Y axis is down. + * Only use ANALOG type when polling for analog values of the axes. + */ +#define RETRO_DEVICE_ANALOG 5 + +/* Abstracts the concept of a pointing mechanism, e.g. touch. + * This allows libretro to query in absolute coordinates where on the + * screen a mouse (or something similar) is being placed. + * For a touch centric device, coordinates reported are the coordinates + * of the press. + * + * Coordinates in X and Y are reported as: + * [-0x7fff, 0x7fff]: -0x7fff corresponds to the far left/top of the screen, + * and 0x7fff corresponds to the far right/bottom of the screen. + * The "screen" is here defined as area that is passed to the frontend and + * later displayed on the monitor. + * + * The frontend is free to scale/resize this screen as it sees fit, however, + * (X, Y) = (-0x7fff, -0x7fff) will correspond to the top-left pixel of the + * game image, etc. + * + * To check if the pointer coordinates are valid (e.g. a touch display + * actually being touched), PRESSED returns 1 or 0. + * + * If using a mouse on a desktop, PRESSED will usually correspond to the + * left mouse button, but this is a frontend decision. + * PRESSED will only return 1 if the pointer is inside the game screen. + * + * For multi-touch, the index variable can be used to successively query + * more presses. + * If index = 0 returns true for _PRESSED, coordinates can be extracted + * with _X, _Y for index = 0. One can then query _PRESSED, _X, _Y with + * index = 1, and so on. + * Eventually _PRESSED will return false for an index. No further presses + * are registered at this point. */ +#define RETRO_DEVICE_POINTER 6 + +/* Buttons for the RetroPad (JOYPAD). + * The placement of these is equivalent to placements on the + * Super Nintendo controller. + * L2/R2/L3/R3 buttons correspond to the PS1 DualShock. */ +#define RETRO_DEVICE_ID_JOYPAD_B 0 +#define RETRO_DEVICE_ID_JOYPAD_Y 1 +#define RETRO_DEVICE_ID_JOYPAD_SELECT 2 +#define RETRO_DEVICE_ID_JOYPAD_START 3 +#define RETRO_DEVICE_ID_JOYPAD_UP 4 +#define RETRO_DEVICE_ID_JOYPAD_DOWN 5 +#define RETRO_DEVICE_ID_JOYPAD_LEFT 6 +#define RETRO_DEVICE_ID_JOYPAD_RIGHT 7 +#define RETRO_DEVICE_ID_JOYPAD_A 8 +#define RETRO_DEVICE_ID_JOYPAD_X 9 +#define RETRO_DEVICE_ID_JOYPAD_L 10 +#define RETRO_DEVICE_ID_JOYPAD_R 11 +#define RETRO_DEVICE_ID_JOYPAD_L2 12 +#define RETRO_DEVICE_ID_JOYPAD_R2 13 +#define RETRO_DEVICE_ID_JOYPAD_L3 14 +#define RETRO_DEVICE_ID_JOYPAD_R3 15 + +/* Index / Id values for ANALOG device. */ +#define RETRO_DEVICE_INDEX_ANALOG_LEFT 0 +#define RETRO_DEVICE_INDEX_ANALOG_RIGHT 1 +#define RETRO_DEVICE_ID_ANALOG_X 0 +#define RETRO_DEVICE_ID_ANALOG_Y 1 + +/* Id values for MOUSE. */ +#define RETRO_DEVICE_ID_MOUSE_X 0 +#define RETRO_DEVICE_ID_MOUSE_Y 1 +#define RETRO_DEVICE_ID_MOUSE_LEFT 2 +#define RETRO_DEVICE_ID_MOUSE_RIGHT 3 +#define RETRO_DEVICE_ID_MOUSE_WHEELUP 4 +#define RETRO_DEVICE_ID_MOUSE_WHEELDOWN 5 +#define RETRO_DEVICE_ID_MOUSE_MIDDLE 6 + +/* Id values for LIGHTGUN types. */ +#define RETRO_DEVICE_ID_LIGHTGUN_X 0 +#define RETRO_DEVICE_ID_LIGHTGUN_Y 1 +#define RETRO_DEVICE_ID_LIGHTGUN_TRIGGER 2 +#define RETRO_DEVICE_ID_LIGHTGUN_CURSOR 3 +#define RETRO_DEVICE_ID_LIGHTGUN_TURBO 4 +#define RETRO_DEVICE_ID_LIGHTGUN_PAUSE 5 +#define RETRO_DEVICE_ID_LIGHTGUN_START 6 + +/* Id values for POINTER. */ +#define RETRO_DEVICE_ID_POINTER_X 0 +#define RETRO_DEVICE_ID_POINTER_Y 1 +#define RETRO_DEVICE_ID_POINTER_PRESSED 2 + +/* Returned from retro_get_region(). */ +#define RETRO_REGION_NTSC 0 +#define RETRO_REGION_PAL 1 + +/* Id values for LANGUAGE */ +enum retro_language +{ + RETRO_LANGUAGE_ENGLISH = 0, + RETRO_LANGUAGE_JAPANESE = 1, + RETRO_LANGUAGE_FRENCH = 2, + RETRO_LANGUAGE_SPANISH = 3, + RETRO_LANGUAGE_GERMAN = 4, + RETRO_LANGUAGE_ITALIAN = 5, + RETRO_LANGUAGE_DUTCH = 6, + RETRO_LANGUAGE_PORTUGUESE = 7, + RETRO_LANGUAGE_RUSSIAN = 8, + RETRO_LANGUAGE_KOREAN = 9, + RETRO_LANGUAGE_CHINESE_TRADITIONAL = 10, + RETRO_LANGUAGE_CHINESE_SIMPLIFIED = 11, + RETRO_LANGUAGE_LAST, + + /* Ensure sizeof(enum) == sizeof(int) */ + RETRO_LANGUAGE_DUMMY = INT_MAX +}; + +/* Passed to retro_get_memory_data/size(). + * If the memory type doesn't apply to the + * implementation NULL/0 can be returned. + */ +#define RETRO_MEMORY_MASK 0xff + +/* Regular save RAM. This RAM is usually found on a game cartridge, + * backed up by a battery. + * If save game data is too complex for a single memory buffer, + * the SAVE_DIRECTORY (preferably) or SYSTEM_DIRECTORY environment + * callback can be used. */ +#define RETRO_MEMORY_SAVE_RAM 0 + +/* Some games have a built-in clock to keep track of time. + * This memory is usually just a couple of bytes to keep track of time. + */ +#define RETRO_MEMORY_RTC 1 + +/* System ram lets a frontend peek into a game systems main RAM. */ +#define RETRO_MEMORY_SYSTEM_RAM 2 + +/* Video ram lets a frontend peek into a game systems video RAM (VRAM). */ +#define RETRO_MEMORY_VIDEO_RAM 3 + +/* Keysyms used for ID in input state callback when polling RETRO_KEYBOARD. */ +enum retro_key +{ + RETROK_UNKNOWN = 0, + RETROK_FIRST = 0, + RETROK_BACKSPACE = 8, + RETROK_TAB = 9, + RETROK_CLEAR = 12, + RETROK_RETURN = 13, + RETROK_PAUSE = 19, + RETROK_ESCAPE = 27, + RETROK_SPACE = 32, + RETROK_EXCLAIM = 33, + RETROK_QUOTEDBL = 34, + RETROK_HASH = 35, + RETROK_DOLLAR = 36, + RETROK_AMPERSAND = 38, + RETROK_QUOTE = 39, + RETROK_LEFTPAREN = 40, + RETROK_RIGHTPAREN = 41, + RETROK_ASTERISK = 42, + RETROK_PLUS = 43, + RETROK_COMMA = 44, + RETROK_MINUS = 45, + RETROK_PERIOD = 46, + RETROK_SLASH = 47, + RETROK_0 = 48, + RETROK_1 = 49, + RETROK_2 = 50, + RETROK_3 = 51, + RETROK_4 = 52, + RETROK_5 = 53, + RETROK_6 = 54, + RETROK_7 = 55, + RETROK_8 = 56, + RETROK_9 = 57, + RETROK_COLON = 58, + RETROK_SEMICOLON = 59, + RETROK_LESS = 60, + RETROK_EQUALS = 61, + RETROK_GREATER = 62, + RETROK_QUESTION = 63, + RETROK_AT = 64, + RETROK_LEFTBRACKET = 91, + RETROK_BACKSLASH = 92, + RETROK_RIGHTBRACKET = 93, + RETROK_CARET = 94, + RETROK_UNDERSCORE = 95, + RETROK_BACKQUOTE = 96, + RETROK_a = 97, + RETROK_b = 98, + RETROK_c = 99, + RETROK_d = 100, + RETROK_e = 101, + RETROK_f = 102, + RETROK_g = 103, + RETROK_h = 104, + RETROK_i = 105, + RETROK_j = 106, + RETROK_k = 107, + RETROK_l = 108, + RETROK_m = 109, + RETROK_n = 110, + RETROK_o = 111, + RETROK_p = 112, + RETROK_q = 113, + RETROK_r = 114, + RETROK_s = 115, + RETROK_t = 116, + RETROK_u = 117, + RETROK_v = 118, + RETROK_w = 119, + RETROK_x = 120, + RETROK_y = 121, + RETROK_z = 122, + RETROK_DELETE = 127, + + RETROK_KP0 = 256, + RETROK_KP1 = 257, + RETROK_KP2 = 258, + RETROK_KP3 = 259, + RETROK_KP4 = 260, + RETROK_KP5 = 261, + RETROK_KP6 = 262, + RETROK_KP7 = 263, + RETROK_KP8 = 264, + RETROK_KP9 = 265, + RETROK_KP_PERIOD = 266, + RETROK_KP_DIVIDE = 267, + RETROK_KP_MULTIPLY = 268, + RETROK_KP_MINUS = 269, + RETROK_KP_PLUS = 270, + RETROK_KP_ENTER = 271, + RETROK_KP_EQUALS = 272, + + RETROK_UP = 273, + RETROK_DOWN = 274, + RETROK_RIGHT = 275, + RETROK_LEFT = 276, + RETROK_INSERT = 277, + RETROK_HOME = 278, + RETROK_END = 279, + RETROK_PAGEUP = 280, + RETROK_PAGEDOWN = 281, + + RETROK_F1 = 282, + RETROK_F2 = 283, + RETROK_F3 = 284, + RETROK_F4 = 285, + RETROK_F5 = 286, + RETROK_F6 = 287, + RETROK_F7 = 288, + RETROK_F8 = 289, + RETROK_F9 = 290, + RETROK_F10 = 291, + RETROK_F11 = 292, + RETROK_F12 = 293, + RETROK_F13 = 294, + RETROK_F14 = 295, + RETROK_F15 = 296, + + RETROK_NUMLOCK = 300, + RETROK_CAPSLOCK = 301, + RETROK_SCROLLOCK = 302, + RETROK_RSHIFT = 303, + RETROK_LSHIFT = 304, + RETROK_RCTRL = 305, + RETROK_LCTRL = 306, + RETROK_RALT = 307, + RETROK_LALT = 308, + RETROK_RMETA = 309, + RETROK_LMETA = 310, + RETROK_LSUPER = 311, + RETROK_RSUPER = 312, + RETROK_MODE = 313, + RETROK_COMPOSE = 314, + + RETROK_HELP = 315, + RETROK_PRINT = 316, + RETROK_SYSREQ = 317, + RETROK_BREAK = 318, + RETROK_MENU = 319, + RETROK_POWER = 320, + RETROK_EURO = 321, + RETROK_UNDO = 322, + + RETROK_LAST, + + RETROK_DUMMY = INT_MAX /* Ensure sizeof(enum) == sizeof(int) */ +}; + +enum retro_mod +{ + RETROKMOD_NONE = 0x0000, + + RETROKMOD_SHIFT = 0x01, + RETROKMOD_CTRL = 0x02, + RETROKMOD_ALT = 0x04, + RETROKMOD_META = 0x08, + + RETROKMOD_NUMLOCK = 0x10, + RETROKMOD_CAPSLOCK = 0x20, + RETROKMOD_SCROLLOCK = 0x40, + + RETROKMOD_DUMMY = INT_MAX /* Ensure sizeof(enum) == sizeof(int) */ +}; + +/* If set, this call is not part of the public libretro API yet. It can + * change or be removed at any time. */ +#define RETRO_ENVIRONMENT_EXPERIMENTAL 0x10000 +/* Environment callback to be used internally in frontend. */ +#define RETRO_ENVIRONMENT_PRIVATE 0x20000 + +/* Environment commands. */ +#define RETRO_ENVIRONMENT_SET_ROTATION 1 /* const unsigned * -- + * Sets screen rotation of graphics. + * Is only implemented if rotation can be accelerated by hardware. + * Valid values are 0, 1, 2, 3, which rotates screen by 0, 90, 180, + * 270 degrees counter-clockwise respectively. + */ +#define RETRO_ENVIRONMENT_GET_OVERSCAN 2 /* bool * -- + * Boolean value whether or not the implementation should use overscan, + * or crop away overscan. + */ +#define RETRO_ENVIRONMENT_GET_CAN_DUPE 3 /* bool * -- + * Boolean value whether or not frontend supports frame duping, + * passing NULL to video frame callback. + */ + + /* Environ 4, 5 are no longer supported (GET_VARIABLE / SET_VARIABLES), + * and reserved to avoid possible ABI clash. + */ + +#define RETRO_ENVIRONMENT_SET_MESSAGE 6 /* const struct retro_message * -- + * Sets a message to be displayed in implementation-specific manner + * for a certain amount of 'frames'. + * Should not be used for trivial messages, which should simply be + * logged via RETRO_ENVIRONMENT_GET_LOG_INTERFACE (or as a + * fallback, stderr). + */ +#define RETRO_ENVIRONMENT_SHUTDOWN 7 /* N/A (NULL) -- + * Requests the frontend to shutdown. + * Should only be used if game has a specific + * way to shutdown the game from a menu item or similar. + */ +#define RETRO_ENVIRONMENT_SET_PERFORMANCE_LEVEL 8 + /* const unsigned * -- + * Gives a hint to the frontend how demanding this implementation + * is on a system. E.g. reporting a level of 2 means + * this implementation should run decently on all frontends + * of level 2 and up. + * + * It can be used by the frontend to potentially warn + * about too demanding implementations. + * + * The levels are "floating". + * + * This function can be called on a per-game basis, + * as certain games an implementation can play might be + * particularly demanding. + * If called, it should be called in retro_load_game(). + */ +#define RETRO_ENVIRONMENT_GET_SYSTEM_DIRECTORY 9 + /* const char ** -- + * Returns the "system" directory of the frontend. + * This directory can be used to store system specific + * content such as BIOSes, configuration data, etc. + * The returned value can be NULL. + * If so, no such directory is defined, + * and it's up to the implementation to find a suitable directory. + * + * NOTE: Some cores used this folder also for "save" data such as + * memory cards, etc, for lack of a better place to put it. + * This is now discouraged, and if possible, cores should try to + * use the new GET_SAVE_DIRECTORY. + */ +#define RETRO_ENVIRONMENT_SET_PIXEL_FORMAT 10 + /* const enum retro_pixel_format * -- + * Sets the internal pixel format used by the implementation. + * The default pixel format is RETRO_PIXEL_FORMAT_0RGB1555. + * This pixel format however, is deprecated (see enum retro_pixel_format). + * If the call returns false, the frontend does not support this pixel + * format. + * + * This function should be called inside retro_load_game() or + * retro_get_system_av_info(). + */ +#define RETRO_ENVIRONMENT_SET_INPUT_DESCRIPTORS 11 + /* const struct retro_input_descriptor * -- + * Sets an array of retro_input_descriptors. + * It is up to the frontend to present this in a usable way. + * The array is terminated by retro_input_descriptor::description + * being set to NULL. + * This function can be called at any time, but it is recommended + * to call it as early as possible. + */ +#define RETRO_ENVIRONMENT_SET_KEYBOARD_CALLBACK 12 + /* const struct retro_keyboard_callback * -- + * Sets a callback function used to notify core about keyboard events. + */ +#define RETRO_ENVIRONMENT_SET_DISK_CONTROL_INTERFACE 13 + /* const struct retro_disk_control_callback * -- + * Sets an interface which frontend can use to eject and insert + * disk images. + * This is used for games which consist of multiple images and + * must be manually swapped out by the user (e.g. PSX). + */ +#define RETRO_ENVIRONMENT_SET_HW_RENDER 14 + /* struct retro_hw_render_callback * -- + * Sets an interface to let a libretro core render with + * hardware acceleration. + * Should be called in retro_load_game(). + * If successful, libretro cores will be able to render to a + * frontend-provided framebuffer. + * The size of this framebuffer will be at least as large as + * max_width/max_height provided in get_av_info(). + * If HW rendering is used, pass only RETRO_HW_FRAME_BUFFER_VALID or + * NULL to retro_video_refresh_t. + */ +#define RETRO_ENVIRONMENT_GET_VARIABLE 15 + /* struct retro_variable * -- + * Interface to acquire user-defined information from environment + * that cannot feasibly be supported in a multi-system way. + * 'key' should be set to a key which has already been set by + * SET_VARIABLES. + * 'data' will be set to a value or NULL. + */ +#define RETRO_ENVIRONMENT_SET_VARIABLES 16 + /* const struct retro_variable * -- + * Allows an implementation to signal the environment + * which variables it might want to check for later using + * GET_VARIABLE. + * This allows the frontend to present these variables to + * a user dynamically. + * This should be called as early as possible (ideally in + * retro_set_environment). + * + * 'data' points to an array of retro_variable structs + * terminated by a { NULL, NULL } element. + * retro_variable::key should be namespaced to not collide + * with other implementations' keys. E.g. A core called + * 'foo' should use keys named as 'foo_option'. + * retro_variable::value should contain a human readable + * description of the key as well as a '|' delimited list + * of expected values. + * + * The number of possible options should be very limited, + * i.e. it should be feasible to cycle through options + * without a keyboard. + * + * First entry should be treated as a default. + * + * Example entry: + * { "foo_option", "Speed hack coprocessor X; false|true" } + * + * Text before first ';' is description. This ';' must be + * followed by a space, and followed by a list of possible + * values split up with '|'. + * + * Only strings are operated on. The possible values will + * generally be displayed and stored as-is by the frontend. + */ +#define RETRO_ENVIRONMENT_GET_VARIABLE_UPDATE 17 + /* bool * -- + * Result is set to true if some variables are updated by + * frontend since last call to RETRO_ENVIRONMENT_GET_VARIABLE. + * Variables should be queried with GET_VARIABLE. + */ +#define RETRO_ENVIRONMENT_SET_SUPPORT_NO_GAME 18 + /* const bool * -- + * If true, the libretro implementation supports calls to + * retro_load_game() with NULL as argument. + * Used by cores which can run without particular game data. + * This should be called within retro_set_environment() only. + */ +#define RETRO_ENVIRONMENT_GET_LIBRETRO_PATH 19 + /* const char ** -- + * Retrieves the absolute path from where this libretro + * implementation was loaded. + * NULL is returned if the libretro was loaded statically + * (i.e. linked statically to frontend), or if the path cannot be + * determined. + * Mostly useful in cooperation with SET_SUPPORT_NO_GAME as assets can + * be loaded without ugly hacks. + */ + + /* Environment 20 was an obsolete version of SET_AUDIO_CALLBACK. + * It was not used by any known core at the time, + * and was removed from the API. */ +#define RETRO_ENVIRONMENT_SET_AUDIO_CALLBACK 22 + /* const struct retro_audio_callback * -- + * Sets an interface which is used to notify a libretro core about audio + * being available for writing. + * The callback can be called from any thread, so a core using this must + * have a thread safe audio implementation. + * It is intended for games where audio and video are completely + * asynchronous and audio can be generated on the fly. + * This interface is not recommended for use with emulators which have + * highly synchronous audio. + * + * The callback only notifies about writability; the libretro core still + * has to call the normal audio callbacks + * to write audio. The audio callbacks must be called from within the + * notification callback. + * The amount of audio data to write is up to the implementation. + * Generally, the audio callback will be called continously in a loop. + * + * Due to thread safety guarantees and lack of sync between audio and + * video, a frontend can selectively disallow this interface based on + * internal configuration. A core using this interface must also + * implement the "normal" audio interface. + * + * A libretro core using SET_AUDIO_CALLBACK should also make use of + * SET_FRAME_TIME_CALLBACK. + */ +#define RETRO_ENVIRONMENT_SET_FRAME_TIME_CALLBACK 21 + /* const struct retro_frame_time_callback * -- + * Lets the core know how much time has passed since last + * invocation of retro_run(). + * The frontend can tamper with the timing to fake fast-forward, + * slow-motion, frame stepping, etc. + * In this case the delta time will use the reference value + * in frame_time_callback.. + */ +#define RETRO_ENVIRONMENT_GET_RUMBLE_INTERFACE 23 + /* struct retro_rumble_interface * -- + * Gets an interface which is used by a libretro core to set + * state of rumble motors in controllers. + * A strong and weak motor is supported, and they can be + * controlled indepedently. + */ +#define RETRO_ENVIRONMENT_GET_INPUT_DEVICE_CAPABILITIES 24 + /* uint64_t * -- + * Gets a bitmask telling which device type are expected to be + * handled properly in a call to retro_input_state_t. + * Devices which are not handled or recognized always return + * 0 in retro_input_state_t. + * Example bitmask: caps = (1 << RETRO_DEVICE_JOYPAD) | (1 << RETRO_DEVICE_ANALOG). + * Should only be called in retro_run(). + */ +#define RETRO_ENVIRONMENT_GET_SENSOR_INTERFACE (25 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* struct retro_sensor_interface * -- + * Gets access to the sensor interface. + * The purpose of this interface is to allow + * setting state related to sensors such as polling rate, + * enabling/disable it entirely, etc. + * Reading sensor state is done via the normal + * input_state_callback API. + */ +#define RETRO_ENVIRONMENT_GET_CAMERA_INTERFACE (26 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* struct retro_camera_callback * -- + * Gets an interface to a video camera driver. + * A libretro core can use this interface to get access to a + * video camera. + * New video frames are delivered in a callback in same + * thread as retro_run(). + * + * GET_CAMERA_INTERFACE should be called in retro_load_game(). + * + * Depending on the camera implementation used, camera frames + * will be delivered as a raw framebuffer, + * or as an OpenGL texture directly. + * + * The core has to tell the frontend here which types of + * buffers can be handled properly. + * An OpenGL texture can only be handled when using a + * libretro GL core (SET_HW_RENDER). + * It is recommended to use a libretro GL core when + * using camera interface. + * + * The camera is not started automatically. The retrieved start/stop + * functions must be used to explicitly + * start and stop the camera driver. + */ +#define RETRO_ENVIRONMENT_GET_LOG_INTERFACE 27 + /* struct retro_log_callback * -- + * Gets an interface for logging. This is useful for + * logging in a cross-platform way + * as certain platforms cannot use use stderr for logging. + * It also allows the frontend to + * show logging information in a more suitable way. + * If this interface is not used, libretro cores should + * log to stderr as desired. + */ +#define RETRO_ENVIRONMENT_GET_PERF_INTERFACE 28 + /* struct retro_perf_callback * -- + * Gets an interface for performance counters. This is useful + * for performance logging in a cross-platform way and for detecting + * architecture-specific features, such as SIMD support. + */ +#define RETRO_ENVIRONMENT_GET_LOCATION_INTERFACE 29 + /* struct retro_location_callback * -- + * Gets access to the location interface. + * The purpose of this interface is to be able to retrieve + * location-based information from the host device, + * such as current latitude / longitude. + */ +#define RETRO_ENVIRONMENT_GET_CONTENT_DIRECTORY 30 + /* const char ** -- + * Returns the "content" directory of the frontend. + * This directory can be used to store specific assets that the + * core relies upon, such as art assets, + * input data, etc etc. + * The returned value can be NULL. + * If so, no such directory is defined, + * and it's up to the implementation to find a suitable directory. + */ +#define RETRO_ENVIRONMENT_GET_SAVE_DIRECTORY 31 + /* const char ** -- + * Returns the "save" directory of the frontend. + * This directory can be used to store SRAM, memory cards, + * high scores, etc, if the libretro core + * cannot use the regular memory interface (retro_get_memory_data()). + * + * NOTE: libretro cores used to check GET_SYSTEM_DIRECTORY for + * similar things before. + * They should still check GET_SYSTEM_DIRECTORY if they want to + * be backwards compatible. + * The path here can be NULL. It should only be non-NULL if the + * frontend user has set a specific save path. + */ +#define RETRO_ENVIRONMENT_SET_SYSTEM_AV_INFO 32 + /* const struct retro_system_av_info * -- + * Sets a new av_info structure. This can only be called from + * within retro_run(). + * This should *only* be used if the core is completely altering the + * internal resolutions, aspect ratios, timings, sampling rate, etc. + * Calling this can require a full reinitialization of video/audio + * drivers in the frontend, + * + * so it is important to call it very sparingly, and usually only with + * the users explicit consent. + * An eventual driver reinitialize will happen so that video and + * audio callbacks + * happening after this call within the same retro_run() call will + * target the newly initialized driver. + * + * This callback makes it possible to support configurable resolutions + * in games, which can be useful to + * avoid setting the "worst case" in max_width/max_height. + * + * ***HIGHLY RECOMMENDED*** Do not call this callback every time + * resolution changes in an emulator core if it's + * expected to be a temporary change, for the reasons of possible + * driver reinitialization. + * This call is not a free pass for not trying to provide + * correct values in retro_get_system_av_info(). If you need to change + * things like aspect ratio or nominal width/height, + * use RETRO_ENVIRONMENT_SET_GEOMETRY, which is a softer variant + * of SET_SYSTEM_AV_INFO. + * + * If this returns false, the frontend does not acknowledge a + * changed av_info struct. + */ +#define RETRO_ENVIRONMENT_SET_PROC_ADDRESS_CALLBACK 33 + /* const struct retro_get_proc_address_interface * -- + * Allows a libretro core to announce support for the + * get_proc_address() interface. + * This interface allows for a standard way to extend libretro where + * use of environment calls are too indirect, + * e.g. for cases where the frontend wants to call directly into the core. + * + * If a core wants to expose this interface, SET_PROC_ADDRESS_CALLBACK + * **MUST** be called from within retro_set_environment(). + */ +#define RETRO_ENVIRONMENT_SET_SUBSYSTEM_INFO 34 + /* const struct retro_subsystem_info * -- + * This environment call introduces the concept of libretro "subsystems". + * A subsystem is a variant of a libretro core which supports + * different kinds of games. + * The purpose of this is to support e.g. emulators which might + * have special needs, e.g. Super Nintendo's Super GameBoy, Sufami Turbo. + * It can also be used to pick among subsystems in an explicit way + * if the libretro implementation is a multi-system emulator itself. + * + * Loading a game via a subsystem is done with retro_load_game_special(), + * and this environment call allows a libretro core to expose which + * subsystems are supported for use with retro_load_game_special(). + * A core passes an array of retro_game_special_info which is terminated + * with a zeroed out retro_game_special_info struct. + * + * If a core wants to use this functionality, SET_SUBSYSTEM_INFO + * **MUST** be called from within retro_set_environment(). + */ +#define RETRO_ENVIRONMENT_SET_CONTROLLER_INFO 35 + /* const struct retro_controller_info * -- + * This environment call lets a libretro core tell the frontend + * which controller types are recognized in calls to + * retro_set_controller_port_device(). + * + * Some emulators such as Super Nintendo + * support multiple lightgun types which must be specifically + * selected from. + * It is therefore sometimes necessary for a frontend to be able + * to tell the core about a special kind of input device which is + * not covered by the libretro input API. + * + * In order for a frontend to understand the workings of an input device, + * it must be a specialized type + * of the generic device types already defined in the libretro API. + * + * Which devices are supported can vary per input port. + * The core must pass an array of const struct retro_controller_info which + * is terminated with a blanked out struct. Each element of the struct + * corresponds to an ascending port index to + * retro_set_controller_port_device(). + * Even if special device types are set in the libretro core, + * libretro should only poll input based on the base input device types. + */ +#define RETRO_ENVIRONMENT_SET_MEMORY_MAPS (36 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* const struct retro_memory_map * -- + * This environment call lets a libretro core tell the frontend + * about the memory maps this core emulates. + * This can be used to implement, for example, cheats in a core-agnostic way. + * + * Should only be used by emulators; it doesn't make much sense for + * anything else. + * It is recommended to expose all relevant pointers through + * retro_get_memory_* as well. + * + * Can be called from retro_init and retro_load_game. + */ +#define RETRO_ENVIRONMENT_SET_GEOMETRY 37 + /* const struct retro_game_geometry * -- + * This environment call is similar to SET_SYSTEM_AV_INFO for changing + * video parameters, but provides a guarantee that drivers will not be + * reinitialized. + * This can only be called from within retro_run(). + * + * The purpose of this call is to allow a core to alter nominal + * width/heights as well as aspect ratios on-the-fly, which can be + * useful for some emulators to change in run-time. + * + * max_width/max_height arguments are ignored and cannot be changed + * with this call as this could potentially require a reinitialization or a + * non-constant time operation. + * If max_width/max_height are to be changed, SET_SYSTEM_AV_INFO is required. + * + * A frontend must guarantee that this environment call completes in + * constant time. + */ +#define RETRO_ENVIRONMENT_GET_USERNAME 38 + /* const char ** + * Returns the specified username of the frontend, if specified by the user. + * This username can be used as a nickname for a core that has online facilities + * or any other mode where personalization of the user is desirable. + * The returned value can be NULL. + * If this environ callback is used by a core that requires a valid username, + * a default username should be specified by the core. + */ +#define RETRO_ENVIRONMENT_GET_LANGUAGE 39 + /* unsigned * -- + * Returns the specified language of the frontend, if specified by the user. + * It can be used by the core for localization purposes. + */ + +#define RETRO_MEMDESC_CONST (1 << 0) /* The frontend will never change this memory area once retro_load_game has returned. */ +#define RETRO_MEMDESC_BIGENDIAN (1 << 1) /* The memory area contains big endian data. Default is little endian. */ +#define RETRO_MEMDESC_ALIGN_2 (1 << 16) /* All memory access in this area is aligned to their own size, or 2, whichever is smaller. */ +#define RETRO_MEMDESC_ALIGN_4 (2 << 16) +#define RETRO_MEMDESC_ALIGN_8 (3 << 16) +#define RETRO_MEMDESC_MINSIZE_2 (1 << 24) /* All memory in this region is accessed at least 2 bytes at the time. */ +#define RETRO_MEMDESC_MINSIZE_4 (2 << 24) +#define RETRO_MEMDESC_MINSIZE_8 (3 << 24) +struct retro_memory_descriptor +{ + uint64_t flags; + + /* Pointer to the start of the relevant ROM or RAM chip. + * It's strongly recommended to use 'offset' if possible, rather than + * doing math on the pointer. + * + * If the same byte is mapped my multiple descriptors, their descriptors + * must have the same pointer. + * If 'start' does not point to the first byte in the pointer, put the + * difference in 'offset' instead. + * + * May be NULL if there's nothing usable here (e.g. hardware registers and + * open bus). No flags should be set if the pointer is NULL. + * It's recommended to minimize the number of descriptors if possible, + * but not mandatory. */ + void *ptr; + size_t offset; + + /* This is the location in the emulated address space + * where the mapping starts. */ + size_t start; + + /* Which bits must be same as in 'start' for this mapping to apply. + * The first memory descriptor to claim a certain byte is the one + * that applies. + * A bit which is set in 'start' must also be set in this. + * Can be zero, in which case each byte is assumed mapped exactly once. + * In this case, 'len' must be a power of two. */ + size_t select; + + /* If this is nonzero, the set bits are assumed not connected to the + * memory chip's address pins. */ + size_t disconnect; + + /* This one tells the size of the current memory area. + * If, after start+disconnect are applied, the address is higher than + * this, the highest bit of the address is cleared. + * + * If the address is still too high, the next highest bit is cleared. + * Can be zero, in which case it's assumed to be infinite (as limited + * by 'select' and 'disconnect'). */ + size_t len; + + /* To go from emulated address to physical address, the following + * order applies: + * Subtract 'start', pick off 'disconnect', apply 'len', add 'offset'. + * + * The address space name must consist of only a-zA-Z0-9_-, + * should be as short as feasible (maximum length is 8 plus the NUL), + * and may not be any other address space plus one or more 0-9A-F + * at the end. + * However, multiple memory descriptors for the same address space is + * allowed, and the address space name can be empty. NULL is treated + * as empty. + * + * Address space names are case sensitive, but avoid lowercase if possible. + * The same pointer may exist in multiple address spaces. + * + * Examples: + * blank+blank - valid (multiple things may be mapped in the same namespace) + * 'Sp'+'Sp' - valid (multiple things may be mapped in the same namespace) + * 'A'+'B' - valid (neither is a prefix of each other) + * 'S'+blank - valid ('S' is not in 0-9A-F) + * 'a'+blank - valid ('a' is not in 0-9A-F) + * 'a'+'A' - valid (neither is a prefix of each other) + * 'AR'+blank - valid ('R' is not in 0-9A-F) + * 'ARB'+blank - valid (the B can't be part of the address either, because + * there is no namespace 'AR') + * blank+'B' - not valid, because it's ambigous which address space B1234 + * would refer to. + * The length can't be used for that purpose; the frontend may want + * to append arbitrary data to an address, without a separator. */ + const char *addrspace; +}; + +/* The frontend may use the largest value of 'start'+'select' in a + * certain namespace to infer the size of the address space. + * + * If the address space is larger than that, a mapping with .ptr=NULL + * should be at the end of the array, with .select set to all ones for + * as long as the address space is big. + * + * Sample descriptors (minus .ptr, and RETRO_MEMFLAG_ on the flags): + * SNES WRAM: + * .start=0x7E0000, .len=0x20000 + * (Note that this must be mapped before the ROM in most cases; some of the + * ROM mappers + * try to claim $7E0000, or at least $7E8000.) + * SNES SPC700 RAM: + * .addrspace="S", .len=0x10000 + * SNES WRAM mirrors: + * .flags=MIRROR, .start=0x000000, .select=0xC0E000, .len=0x2000 + * .flags=MIRROR, .start=0x800000, .select=0xC0E000, .len=0x2000 + * SNES WRAM mirrors, alternate equivalent descriptor: + * .flags=MIRROR, .select=0x40E000, .disconnect=~0x1FFF + * (Various similar constructions can be created by combining parts of + * the above two.) + * SNES LoROM (512KB, mirrored a couple of times): + * .flags=CONST, .start=0x008000, .select=0x408000, .disconnect=0x8000, .len=512*1024 + * .flags=CONST, .start=0x400000, .select=0x400000, .disconnect=0x8000, .len=512*1024 + * SNES HiROM (4MB): + * .flags=CONST, .start=0x400000, .select=0x400000, .len=4*1024*1024 + * .flags=CONST, .offset=0x8000, .start=0x008000, .select=0x408000, .len=4*1024*1024 + * SNES ExHiROM (8MB): + * .flags=CONST, .offset=0, .start=0xC00000, .select=0xC00000, .len=4*1024*1024 + * .flags=CONST, .offset=4*1024*1024, .start=0x400000, .select=0xC00000, .len=4*1024*1024 + * .flags=CONST, .offset=0x8000, .start=0x808000, .select=0xC08000, .len=4*1024*1024 + * .flags=CONST, .offset=4*1024*1024+0x8000, .start=0x008000, .select=0xC08000, .len=4*1024*1024 + * Clarify the size of the address space: + * .ptr=NULL, .select=0xFFFFFF + * .len can be implied by .select in many of them, but was included for clarity. + */ + +struct retro_memory_map +{ + const struct retro_memory_descriptor *descriptors; + unsigned num_descriptors; +}; + +struct retro_controller_description +{ + /* Human-readable description of the controller. Even if using a generic + * input device type, this can be set to the particular device type the + * core uses. */ + const char *desc; + + /* Device type passed to retro_set_controller_port_device(). If the device + * type is a sub-class of a generic input device type, use the + * RETRO_DEVICE_SUBCLASS macro to create an ID. + * + * E.g. RETRO_DEVICE_SUBCLASS(RETRO_DEVICE_JOYPAD, 1). */ + unsigned id; +}; + +struct retro_controller_info +{ + const struct retro_controller_description *types; + unsigned num_types; +}; + +struct retro_subsystem_memory_info +{ + /* The extension associated with a memory type, e.g. "psram". */ + const char *extension; + + /* The memory type for retro_get_memory(). This should be at + * least 0x100 to avoid conflict with standardized + * libretro memory types. */ + unsigned type; +}; + +struct retro_subsystem_rom_info +{ + /* Describes what the content is (SGB BIOS, GB ROM, etc). */ + const char *desc; + + /* Same definition as retro_get_system_info(). */ + const char *valid_extensions; + + /* Same definition as retro_get_system_info(). */ + bool need_fullpath; + + /* Same definition as retro_get_system_info(). */ + bool block_extract; + + /* This is set if the content is required to load a game. + * If this is set to false, a zeroed-out retro_game_info can be passed. */ + bool required; + + /* Content can have multiple associated persistent + * memory types (retro_get_memory()). */ + const struct retro_subsystem_memory_info *memory; + unsigned num_memory; +}; + +struct retro_subsystem_info +{ + /* Human-readable string of the subsystem type, e.g. "Super GameBoy" */ + const char *desc; + + /* A computer friendly short string identifier for the subsystem type. + * This name must be [a-z]. + * E.g. if desc is "Super GameBoy", this can be "sgb". + * This identifier can be used for command-line interfaces, etc. + */ + const char *ident; + + /* Infos for each content file. The first entry is assumed to be the + * "most significant" content for frontend purposes. + * E.g. with Super GameBoy, the first content should be the GameBoy ROM, + * as it is the most "significant" content to a user. + * If a frontend creates new file paths based on the content used + * (e.g. savestates), it should use the path for the first ROM to do so. */ + const struct retro_subsystem_rom_info *roms; + + /* Number of content files associated with a subsystem. */ + unsigned num_roms; + + /* The type passed to retro_load_game_special(). */ + unsigned id; +}; + +typedef void (*retro_proc_address_t)(void); + +/* libretro API extension functions: + * (None here so far). + * + * Get a symbol from a libretro core. + * Cores should only return symbols which are actual + * extensions to the libretro API. + * + * Frontends should not use this to obtain symbols to standard + * libretro entry points (static linking or dlsym). + * + * The symbol name must be equal to the function name, + * e.g. if void retro_foo(void); exists, the symbol must be called "retro_foo". + * The returned function pointer must be cast to the corresponding type. + */ +typedef retro_proc_address_t (*retro_get_proc_address_t)(const char *sym); + +struct retro_get_proc_address_interface +{ + retro_get_proc_address_t get_proc_address; +}; + +enum retro_log_level +{ + RETRO_LOG_DEBUG = 0, + RETRO_LOG_INFO, + RETRO_LOG_WARN, + RETRO_LOG_ERROR, + + RETRO_LOG_DUMMY = INT_MAX +}; + +/* Logging function. Takes log level argument as well. */ +typedef void (*retro_log_printf_t)(enum retro_log_level level, + const char *fmt, ...); + +struct retro_log_callback +{ + retro_log_printf_t log; +}; + +/* Performance related functions */ + +/* ID values for SIMD CPU features */ +#define RETRO_SIMD_SSE (1 << 0) +#define RETRO_SIMD_SSE2 (1 << 1) +#define RETRO_SIMD_VMX (1 << 2) +#define RETRO_SIMD_VMX128 (1 << 3) +#define RETRO_SIMD_AVX (1 << 4) +#define RETRO_SIMD_NEON (1 << 5) +#define RETRO_SIMD_SSE3 (1 << 6) +#define RETRO_SIMD_SSSE3 (1 << 7) +#define RETRO_SIMD_MMX (1 << 8) +#define RETRO_SIMD_MMXEXT (1 << 9) +#define RETRO_SIMD_SSE4 (1 << 10) +#define RETRO_SIMD_SSE42 (1 << 11) +#define RETRO_SIMD_AVX2 (1 << 12) +#define RETRO_SIMD_VFPU (1 << 13) +#define RETRO_SIMD_PS (1 << 14) +#define RETRO_SIMD_AES (1 << 15) + +typedef uint64_t retro_perf_tick_t; +typedef int64_t retro_time_t; + +struct retro_perf_counter +{ + const char *ident; + retro_perf_tick_t start; + retro_perf_tick_t total; + retro_perf_tick_t call_cnt; + + bool registered; +}; + +/* Returns current time in microseconds. + * Tries to use the most accurate timer available. + */ +typedef retro_time_t (*retro_perf_get_time_usec_t)(void); + +/* A simple counter. Usually nanoseconds, but can also be CPU cycles. + * Can be used directly if desired (when creating a more sophisticated + * performance counter system). + * */ +typedef retro_perf_tick_t (*retro_perf_get_counter_t)(void); + +/* Returns a bit-mask of detected CPU features (RETRO_SIMD_*). */ +typedef uint64_t (*retro_get_cpu_features_t)(void); + +/* Asks frontend to log and/or display the state of performance counters. + * Performance counters can always be poked into manually as well. + */ +typedef void (*retro_perf_log_t)(void); + +/* Register a performance counter. + * ident field must be set with a discrete value and other values in + * retro_perf_counter must be 0. + * Registering can be called multiple times. To avoid calling to + * frontend redundantly, you can check registered field first. */ +typedef void (*retro_perf_register_t)(struct retro_perf_counter *counter); + +/* Starts a registered counter. */ +typedef void (*retro_perf_start_t)(struct retro_perf_counter *counter); + +/* Stops a registered counter. */ +typedef void (*retro_perf_stop_t)(struct retro_perf_counter *counter); + +/* For convenience it can be useful to wrap register, start and stop in macros. + * E.g.: + * #ifdef LOG_PERFORMANCE + * #define RETRO_PERFORMANCE_INIT(perf_cb, name) static struct retro_perf_counter name = {#name}; if (!name.registered) perf_cb.perf_register(&(name)) + * #define RETRO_PERFORMANCE_START(perf_cb, name) perf_cb.perf_start(&(name)) + * #define RETRO_PERFORMANCE_STOP(perf_cb, name) perf_cb.perf_stop(&(name)) + * #else + * ... Blank macros ... + * #endif + * + * These can then be used mid-functions around code snippets. + * + * extern struct retro_perf_callback perf_cb; * Somewhere in the core. + * + * void do_some_heavy_work(void) + * { + * RETRO_PERFORMANCE_INIT(cb, work_1; + * RETRO_PERFORMANCE_START(cb, work_1); + * heavy_work_1(); + * RETRO_PERFORMANCE_STOP(cb, work_1); + * + * RETRO_PERFORMANCE_INIT(cb, work_2); + * RETRO_PERFORMANCE_START(cb, work_2); + * heavy_work_2(); + * RETRO_PERFORMANCE_STOP(cb, work_2); + * } + * + * void retro_deinit(void) + * { + * perf_cb.perf_log(); * Log all perf counters here for example. + * } + */ + +struct retro_perf_callback +{ + retro_perf_get_time_usec_t get_time_usec; + retro_get_cpu_features_t get_cpu_features; + + retro_perf_get_counter_t get_perf_counter; + retro_perf_register_t perf_register; + retro_perf_start_t perf_start; + retro_perf_stop_t perf_stop; + retro_perf_log_t perf_log; +}; + +/* FIXME: Document the sensor API and work out behavior. + * It will be marked as experimental until then. + */ +enum retro_sensor_action +{ + RETRO_SENSOR_ACCELEROMETER_ENABLE = 0, + RETRO_SENSOR_ACCELEROMETER_DISABLE, + + RETRO_SENSOR_DUMMY = INT_MAX +}; + +/* Id values for SENSOR types. */ +#define RETRO_SENSOR_ACCELEROMETER_X 0 +#define RETRO_SENSOR_ACCELEROMETER_Y 1 +#define RETRO_SENSOR_ACCELEROMETER_Z 2 + +typedef bool (*retro_set_sensor_state_t)(unsigned port, + enum retro_sensor_action action, unsigned rate); + +typedef float (*retro_sensor_get_input_t)(unsigned port, unsigned id); + +struct retro_sensor_interface +{ + retro_set_sensor_state_t set_sensor_state; + retro_sensor_get_input_t get_sensor_input; +}; + +enum retro_camera_buffer +{ + RETRO_CAMERA_BUFFER_OPENGL_TEXTURE = 0, + RETRO_CAMERA_BUFFER_RAW_FRAMEBUFFER, + + RETRO_CAMERA_BUFFER_DUMMY = INT_MAX +}; + +/* Starts the camera driver. Can only be called in retro_run(). */ +typedef bool (*retro_camera_start_t)(void); + +/* Stops the camera driver. Can only be called in retro_run(). */ +typedef void (*retro_camera_stop_t)(void); + +/* Callback which signals when the camera driver is initialized + * and/or deinitialized. + * retro_camera_start_t can be called in initialized callback. + */ +typedef void (*retro_camera_lifetime_status_t)(void); + +/* A callback for raw framebuffer data. buffer points to an XRGB8888 buffer. + * Width, height and pitch are similar to retro_video_refresh_t. + * First pixel is top-left origin. + */ +typedef void (*retro_camera_frame_raw_framebuffer_t)(const uint32_t *buffer, + unsigned width, unsigned height, size_t pitch); + +/* A callback for when OpenGL textures are used. + * + * texture_id is a texture owned by camera driver. + * Its state or content should be considered immutable, except for things like + * texture filtering and clamping. + * + * texture_target is the texture target for the GL texture. + * These can include e.g. GL_TEXTURE_2D, GL_TEXTURE_RECTANGLE, and possibly + * more depending on extensions. + * + * affine points to a packed 3x3 column-major matrix used to apply an affine + * transform to texture coordinates. (affine_matrix * vec3(coord_x, coord_y, 1.0)) + * After transform, normalized texture coord (0, 0) should be bottom-left + * and (1, 1) should be top-right (or (width, height) for RECTANGLE). + * + * GL-specific typedefs are avoided here to avoid relying on gl.h in + * the API definition. + */ +typedef void (*retro_camera_frame_opengl_texture_t)(unsigned texture_id, + unsigned texture_target, const float *affine); + +struct retro_camera_callback +{ + /* Set by libretro core. + * Example bitmask: caps = (1 << RETRO_CAMERA_BUFFER_OPENGL_TEXTURE) | (1 << RETRO_CAMERA_BUFFER_RAW_FRAMEBUFFER). + */ + uint64_t caps; + + unsigned width; /* Desired resolution for camera. Is only used as a hint. */ + unsigned height; + retro_camera_start_t start; /* Set by frontend. */ + retro_camera_stop_t stop; /* Set by frontend. */ + + /* Set by libretro core if raw framebuffer callbacks will be used. */ + retro_camera_frame_raw_framebuffer_t frame_raw_framebuffer; + /* Set by libretro core if OpenGL texture callbacks will be used. */ + retro_camera_frame_opengl_texture_t frame_opengl_texture; + + /* Set by libretro core. Called after camera driver is initialized and + * ready to be started. + * Can be NULL, in which this callback is not called. + */ + retro_camera_lifetime_status_t initialized; + + /* Set by libretro core. Called right before camera driver is + * deinitialized. + * Can be NULL, in which this callback is not called. + */ + retro_camera_lifetime_status_t deinitialized; +}; + +/* Sets the interval of time and/or distance at which to update/poll + * location-based data. + * + * To ensure compatibility with all location-based implementations, + * values for both interval_ms and interval_distance should be provided. + * + * interval_ms is the interval expressed in milliseconds. + * interval_distance is the distance interval expressed in meters. + */ +typedef void (*retro_location_set_interval_t)(unsigned interval_ms, + unsigned interval_distance); + +/* Start location services. The device will start listening for changes to the + * current location at regular intervals (which are defined with + * retro_location_set_interval_t). */ +typedef bool (*retro_location_start_t)(void); + +/* Stop location services. The device will stop listening for changes + * to the current location. */ +typedef void (*retro_location_stop_t)(void); + +/* Get the position of the current location. Will set parameters to + * 0 if no new location update has happened since the last time. */ +typedef bool (*retro_location_get_position_t)(double *lat, double *lon, + double *horiz_accuracy, double *vert_accuracy); + +/* Callback which signals when the location driver is initialized + * and/or deinitialized. + * retro_location_start_t can be called in initialized callback. + */ +typedef void (*retro_location_lifetime_status_t)(void); + +struct retro_location_callback +{ + retro_location_start_t start; + retro_location_stop_t stop; + retro_location_get_position_t get_position; + retro_location_set_interval_t set_interval; + + retro_location_lifetime_status_t initialized; + retro_location_lifetime_status_t deinitialized; +}; + +enum retro_rumble_effect +{ + RETRO_RUMBLE_STRONG = 0, + RETRO_RUMBLE_WEAK = 1, + + RETRO_RUMBLE_DUMMY = INT_MAX +}; + +/* Sets rumble state for joypad plugged in port 'port'. + * Rumble effects are controlled independently, + * and setting e.g. strong rumble does not override weak rumble. + * Strength has a range of [0, 0xffff]. + * + * Returns true if rumble state request was honored. + * Calling this before first retro_run() is likely to return false. */ +typedef bool (*retro_set_rumble_state_t)(unsigned port, + enum retro_rumble_effect effect, uint16_t strength); + +struct retro_rumble_interface +{ + retro_set_rumble_state_t set_rumble_state; +}; + +/* Notifies libretro that audio data should be written. */ +typedef void (*retro_audio_callback_t)(void); + +/* True: Audio driver in frontend is active, and callback is + * expected to be called regularily. + * False: Audio driver in frontend is paused or inactive. + * Audio callback will not be called until set_state has been + * called with true. + * Initial state is false (inactive). + */ +typedef void (*retro_audio_set_state_callback_t)(bool enabled); + +struct retro_audio_callback +{ + retro_audio_callback_t callback; + retro_audio_set_state_callback_t set_state; +}; + +/* Notifies a libretro core of time spent since last invocation + * of retro_run() in microseconds. + * + * It will be called right before retro_run() every frame. + * The frontend can tamper with timing to support cases like + * fast-forward, slow-motion and framestepping. + * + * In those scenarios the reference frame time value will be used. */ +typedef int64_t retro_usec_t; +typedef void (*retro_frame_time_callback_t)(retro_usec_t usec); +struct retro_frame_time_callback +{ + retro_frame_time_callback_t callback; + /* Represents the time of one frame. It is computed as + * 1000000 / fps, but the implementation will resolve the + * rounding to ensure that framestepping, etc is exact. */ + retro_usec_t reference; +}; + +/* Pass this to retro_video_refresh_t if rendering to hardware. + * Passing NULL to retro_video_refresh_t is still a frame dupe as normal. + * */ +#define RETRO_HW_FRAME_BUFFER_VALID ((void*)-1) + +/* Invalidates the current HW context. + * Any GL state is lost, and must not be deinitialized explicitly. + * If explicit deinitialization is desired by the libretro core, + * it should implement context_destroy callback. + * If called, all GPU resources must be reinitialized. + * Usually called when frontend reinits video driver. + * Also called first time video driver is initialized, + * allowing libretro core to initialize resources. + */ +typedef void (*retro_hw_context_reset_t)(void); + +/* Gets current framebuffer which is to be rendered to. + * Could change every frame potentially. + */ +typedef uintptr_t (*retro_hw_get_current_framebuffer_t)(void); + +/* Get a symbol from HW context. */ +typedef retro_proc_address_t (*retro_hw_get_proc_address_t)(const char *sym); + +enum retro_hw_context_type +{ + RETRO_HW_CONTEXT_NONE = 0, + /* OpenGL 2.x. Driver can choose to use latest compatibility context. */ + RETRO_HW_CONTEXT_OPENGL = 1, + /* OpenGL ES 2.0. */ + RETRO_HW_CONTEXT_OPENGLES2 = 2, + /* Modern desktop core GL context. Use version_major/ + * version_minor fields to set GL version. */ + RETRO_HW_CONTEXT_OPENGL_CORE = 3, + /* OpenGL ES 3.0 */ + RETRO_HW_CONTEXT_OPENGLES3 = 4, + /* OpenGL ES 3.1+. Set version_major/version_minor. For GLES2 and GLES3, + * use the corresponding enums directly. */ + RETRO_HW_CONTEXT_OPENGLES_VERSION = 5, + + RETRO_HW_CONTEXT_DUMMY = INT_MAX +}; + +struct retro_hw_render_callback +{ + /* Which API to use. Set by libretro core. */ + enum retro_hw_context_type context_type; + + /* Called when a context has been created or when it has been reset. + * An OpenGL context is only valid after context_reset() has been called. + * + * When context_reset is called, OpenGL resources in the libretro + * implementation are guaranteed to be invalid. + * + * It is possible that context_reset is called multiple times during an + * application lifecycle. + * If context_reset is called without any notification (context_destroy), + * the OpenGL context was lost and resources should just be recreated + * without any attempt to "free" old resources. + */ + retro_hw_context_reset_t context_reset; + + /* Set by frontend. */ + retro_hw_get_current_framebuffer_t get_current_framebuffer; + + /* Set by frontend. */ + retro_hw_get_proc_address_t get_proc_address; + + /* Set if render buffers should have depth component attached. */ + bool depth; + + /* Set if stencil buffers should be attached. */ + bool stencil; + + /* If depth and stencil are true, a packed 24/8 buffer will be added. + * Only attaching stencil is invalid and will be ignored. */ + + /* Use conventional bottom-left origin convention. If false, + * standard libretro top-left origin semantics are used. */ + bool bottom_left_origin; + + /* Major version number for core GL context or GLES 3.1+. */ + unsigned version_major; + + /* Minor version number for core GL context or GLES 3.1+. */ + unsigned version_minor; + + /* If this is true, the frontend will go very far to avoid + * resetting context in scenarios like toggling fullscreen, etc. + */ + bool cache_context; + + /* The reset callback might still be called in extreme situations + * such as if the context is lost beyond recovery. + * + * For optimal stability, set this to false, and allow context to be + * reset at any time. + */ + + /* A callback to be called before the context is destroyed in a + * controlled way by the frontend. */ + retro_hw_context_reset_t context_destroy; + + /* OpenGL resources can be deinitialized cleanly at this step. + * context_destroy can be set to NULL, in which resources will + * just be destroyed without any notification. + * + * Even when context_destroy is non-NULL, it is possible that + * context_reset is called without any destroy notification. + * This happens if context is lost by external factors (such as + * notified by GL_ARB_robustness). + * + * In this case, the context is assumed to be already dead, + * and the libretro implementation must not try to free any OpenGL + * resources in the subsequent context_reset. + */ + + /* Creates a debug context. */ + bool debug_context; +}; + +/* Callback type passed in RETRO_ENVIRONMENT_SET_KEYBOARD_CALLBACK. + * Called by the frontend in response to keyboard events. + * down is set if the key is being pressed, or false if it is being released. + * keycode is the RETROK value of the char. + * character is the text character of the pressed key. (UTF-32). + * key_modifiers is a set of RETROKMOD values or'ed together. + * + * The pressed/keycode state can be indepedent of the character. + * It is also possible that multiple characters are generated from a + * single keypress. + * Keycode events should be treated separately from character events. + * However, when possible, the frontend should try to synchronize these. + * If only a character is posted, keycode should be RETROK_UNKNOWN. + * + * Similarily if only a keycode event is generated with no corresponding + * character, character should be 0. + */ +typedef void (*retro_keyboard_event_t)(bool down, unsigned keycode, + uint32_t character, uint16_t key_modifiers); + +struct retro_keyboard_callback +{ + retro_keyboard_event_t callback; +}; + +/* Callbacks for RETRO_ENVIRONMENT_SET_DISK_CONTROL_INTERFACE. + * Should be set for implementations which can swap out multiple disk + * images in runtime. + * + * If the implementation can do this automatically, it should strive to do so. + * However, there are cases where the user must manually do so. + * + * Overview: To swap a disk image, eject the disk image with + * set_eject_state(true). + * Set the disk index with set_image_index(index). Insert the disk again + * with set_eject_state(false). + */ + +/* If ejected is true, "ejects" the virtual disk tray. + * When ejected, the disk image index can be set. + */ +typedef bool (*retro_set_eject_state_t)(bool ejected); + +/* Gets current eject state. The initial state is 'not ejected'. */ +typedef bool (*retro_get_eject_state_t)(void); + +/* Gets current disk index. First disk is index 0. + * If return value is >= get_num_images(), no disk is currently inserted. + */ +typedef unsigned (*retro_get_image_index_t)(void); + +/* Sets image index. Can only be called when disk is ejected. + * The implementation supports setting "no disk" by using an + * index >= get_num_images(). + */ +typedef bool (*retro_set_image_index_t)(unsigned index); + +/* Gets total number of images which are available to use. */ +typedef unsigned (*retro_get_num_images_t)(void); + +struct retro_game_info; + +/* Replaces the disk image associated with index. + * Arguments to pass in info have same requirements as retro_load_game(). + * Virtual disk tray must be ejected when calling this. + * + * Replacing a disk image with info = NULL will remove the disk image + * from the internal list. + * As a result, calls to get_image_index() can change. + * + * E.g. replace_image_index(1, NULL), and previous get_image_index() + * returned 4 before. + * Index 1 will be removed, and the new index is 3. + */ +typedef bool (*retro_replace_image_index_t)(unsigned index, + const struct retro_game_info *info); + +/* Adds a new valid index (get_num_images()) to the internal disk list. + * This will increment subsequent return values from get_num_images() by 1. + * This image index cannot be used until a disk image has been set + * with replace_image_index. */ +typedef bool (*retro_add_image_index_t)(void); + +struct retro_disk_control_callback +{ + retro_set_eject_state_t set_eject_state; + retro_get_eject_state_t get_eject_state; + + retro_get_image_index_t get_image_index; + retro_set_image_index_t set_image_index; + retro_get_num_images_t get_num_images; + + retro_replace_image_index_t replace_image_index; + retro_add_image_index_t add_image_index; +}; + +enum retro_pixel_format +{ + /* 0RGB1555, native endian. + * 0 bit must be set to 0. + * This pixel format is default for compatibility concerns only. + * If a 15/16-bit pixel format is desired, consider using RGB565. */ + RETRO_PIXEL_FORMAT_0RGB1555 = 0, + + /* XRGB8888, native endian. + * X bits are ignored. */ + RETRO_PIXEL_FORMAT_XRGB8888 = 1, + + /* RGB565, native endian. + * This pixel format is the recommended format to use if a 15/16-bit + * format is desired as it is the pixel format that is typically + * available on a wide range of low-power devices. + * + * It is also natively supported in APIs like OpenGL ES. */ + RETRO_PIXEL_FORMAT_RGB565 = 2, + + /* Ensure sizeof() == sizeof(int). */ + RETRO_PIXEL_FORMAT_UNKNOWN = INT_MAX +}; + +struct retro_message +{ + const char *msg; /* Message to be displayed. */ + unsigned frames; /* Duration in frames of message. */ +}; + +/* Describes how the libretro implementation maps a libretro input bind + * to its internal input system through a human readable string. + * This string can be used to better let a user configure input. */ +struct retro_input_descriptor +{ + /* Associates given parameters with a description. */ + unsigned port; + unsigned device; + unsigned index; + unsigned id; + + /* Human readable description for parameters. + * The pointer must remain valid until + * retro_unload_game() is called. */ + const char *description; +}; + +struct retro_system_info +{ + /* All pointers are owned by libretro implementation, and pointers must + * remain valid until retro_deinit() is called. */ + + const char *library_name; /* Descriptive name of library. Should not + * contain any version numbers, etc. */ + const char *library_version; /* Descriptive version of core. */ + + const char *valid_extensions; /* A string listing probably content + * extensions the core will be able to + * load, separated with pipe. + * I.e. "bin|rom|iso". + * Typically used for a GUI to filter + * out extensions. */ + + /* If true, retro_load_game() is guaranteed to provide a valid pathname + * in retro_game_info::path. + * ::data and ::size are both invalid. + * + * If false, ::data and ::size are guaranteed to be valid, but ::path + * might not be valid. + * + * This is typically set to true for libretro implementations that must + * load from file. + * Implementations should strive for setting this to false, as it allows + * the frontend to perform patching, etc. */ + bool need_fullpath; + + /* If true, the frontend is not allowed to extract any archives before + * loading the real content. + * Necessary for certain libretro implementations that load games + * from zipped archives. */ + bool block_extract; +}; + +struct retro_game_geometry +{ + unsigned base_width; /* Nominal video width of game. */ + unsigned base_height; /* Nominal video height of game. */ + unsigned max_width; /* Maximum possible width of game. */ + unsigned max_height; /* Maximum possible height of game. */ + + float aspect_ratio; /* Nominal aspect ratio of game. If + * aspect_ratio is <= 0.0, an aspect ratio + * of base_width / base_height is assumed. + * A frontend could override this setting, + * if desired. */ +}; + +struct retro_system_timing +{ + double fps; /* FPS of video content. */ + double sample_rate; /* Sampling rate of audio. */ +}; + +struct retro_system_av_info +{ + struct retro_game_geometry geometry; + struct retro_system_timing timing; +}; + +struct retro_variable +{ + /* Variable to query in RETRO_ENVIRONMENT_GET_VARIABLE. + * If NULL, obtains the complete environment string if more + * complex parsing is necessary. + * The environment string is formatted as key-value pairs + * delimited by semicolons as so: + * "key1=value1;key2=value2;..." + */ + const char *key; + + /* Value to be obtained. If key does not exist, it is set to NULL. */ + const char *value; +}; + +struct retro_game_info +{ + const char *path; /* Path to game, UTF-8 encoded. + * Usually used as a reference. + * May be NULL if rom was loaded from stdin + * or similar. + * retro_system_info::need_fullpath guaranteed + * that this path is valid. */ + const void *data; /* Memory buffer of loaded game. Will be NULL + * if need_fullpath was set. */ + size_t size; /* Size of memory buffer. */ + const char *meta; /* String of implementation specific meta-data. */ +}; + +/* Callbacks */ + +/* Environment callback. Gives implementations a way of performing + * uncommon tasks. Extensible. */ +typedef bool (*retro_environment_t)(unsigned cmd, void *data); + +/* Render a frame. Pixel format is 15-bit 0RGB1555 native endian + * unless changed (see RETRO_ENVIRONMENT_SET_PIXEL_FORMAT). + * + * Width and height specify dimensions of buffer. + * Pitch specifices length in bytes between two lines in buffer. + * + * For performance reasons, it is highly recommended to have a frame + * that is packed in memory, i.e. pitch == width * byte_per_pixel. + * Certain graphic APIs, such as OpenGL ES, do not like textures + * that are not packed in memory. + */ +typedef void (*retro_video_refresh_t)(const void *data, unsigned width, + unsigned height, size_t pitch); + +/* Renders a single audio frame. Should only be used if implementation + * generates a single sample at a time. + * Format is signed 16-bit native endian. + */ +typedef void (*retro_audio_sample_t)(int16_t left, int16_t right); + +/* Renders multiple audio frames in one go. + * + * One frame is defined as a sample of left and right channels, interleaved. + * I.e. int16_t buf[4] = { l, r, l, r }; would be 2 frames. + * Only one of the audio callbacks must ever be used. + */ +typedef size_t (*retro_audio_sample_batch_t)(const int16_t *data, + size_t frames); + +/* Polls input. */ +typedef void (*retro_input_poll_t)(void); + +/* Queries for input for player 'port'. device will be masked with + * RETRO_DEVICE_MASK. + * + * Specialization of devices such as RETRO_DEVICE_JOYPAD_MULTITAP that + * have been set with retro_set_controller_port_device() + * will still use the higher level RETRO_DEVICE_JOYPAD to request input. + */ +typedef int16_t (*retro_input_state_t)(unsigned port, unsigned device, + unsigned index, unsigned id); + +/* Sets callbacks. retro_set_environment() is guaranteed to be called + * before retro_init(). + * + * The rest of the set_* functions are guaranteed to have been called + * before the first call to retro_run() is made. */ +void retro_set_environment(retro_environment_t); +void retro_set_video_refresh(retro_video_refresh_t); +void retro_set_audio_sample(retro_audio_sample_t); +void retro_set_audio_sample_batch(retro_audio_sample_batch_t); +void retro_set_input_poll(retro_input_poll_t); +void retro_set_input_state(retro_input_state_t); + +/* Library global initialization/deinitialization. */ +void retro_init(void); +void retro_deinit(void); + +/* Must return RETRO_API_VERSION. Used to validate ABI compatibility + * when the API is revised. */ +unsigned retro_api_version(void); + +/* Gets statically known system info. Pointers provided in *info + * must be statically allocated. + * Can be called at any time, even before retro_init(). */ +void retro_get_system_info(struct retro_system_info *info); + +/* Gets information about system audio/video timings and geometry. + * Can be called only after retro_load_game() has successfully completed. + * NOTE: The implementation of this function might not initialize every + * variable if needed. + * E.g. geom.aspect_ratio might not be initialized if core doesn't + * desire a particular aspect ratio. */ +void retro_get_system_av_info(struct retro_system_av_info *info); + +/* Sets device to be used for player 'port'. + * By default, RETRO_DEVICE_JOYPAD is assumed to be plugged into all + * available ports. + * Setting a particular device type is not a guarantee that libretro cores + * will only poll input based on that particular device type. It is only a + * hint to the libretro core when a core cannot automatically detect the + * appropriate input device type on its own. It is also relevant when a + * core can change its behavior depending on device type. */ +void retro_set_controller_port_device(unsigned port, unsigned device); + +/* Resets the current game. */ +void retro_reset(void); + +/* Runs the game for one video frame. + * During retro_run(), input_poll callback must be called at least once. + * + * If a frame is not rendered for reasons where a game "dropped" a frame, + * this still counts as a frame, and retro_run() should explicitly dupe + * a frame if GET_CAN_DUPE returns true. + * In this case, the video callback can take a NULL argument for data. + */ +void retro_run(void); + +/* Returns the amount of data the implementation requires to serialize + * internal state (save states). + * Between calls to retro_load_game() and retro_unload_game(), the + * returned size is never allowed to be larger than a previous returned + * value, to ensure that the frontend can allocate a save state buffer once. + */ +size_t retro_serialize_size(void); + +/* Serializes internal state. If failed, or size is lower than + * retro_serialize_size(), it should return false, true otherwise. */ +bool retro_serialize(void *data, size_t size); +bool retro_unserialize(const void *data, size_t size); + +void retro_cheat_reset(void); +void retro_cheat_set(unsigned index, bool enabled, const char *code); + +/* Loads a game. */ +bool retro_load_game(const struct retro_game_info *game); + +/* Loads a "special" kind of game. Should not be used, + * except in extreme cases. */ +bool retro_load_game_special( + unsigned game_type, + const struct retro_game_info *info, size_t num_info +); + +/* Unloads a currently loaded game. */ +void retro_unload_game(void); + +/* Gets region of game. */ +unsigned retro_get_region(void); + +/* Gets region of memory. */ +void *retro_get_memory_data(unsigned id); +size_t retro_get_memory_size(unsigned id); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/platform/libretro/msvc/msvc-2003-xbox1.bat b/platform/libretro/msvc/msvc-2003-xbox1.bat new file mode 100644 index 00000000..91d69ff5 --- /dev/null +++ b/platform/libretro/msvc/msvc-2003-xbox1.bat @@ -0,0 +1,47 @@ +@SET VSINSTALLDIR=C:\Program Files\Microsoft Visual Studio .NET 2003\Common7\IDE +@SET VCINSTALLDIR=C:\Program Files\Microsoft Visual Studio .NET 2003 +@SET FrameworkDir=C:\WINDOWS\Microsoft.NET\Framework +@SET FrameworkVersion=v1.1.4322 +@SET FrameworkSDKDir=C:\Program Files\Microsoft Visual Studio .NET 2003\SDK\v1.1 +@rem Root of Visual Studio common files. + +@if "%VSINSTALLDIR%"=="" goto Usage +@if "%VCINSTALLDIR%"=="" set VCINSTALLDIR=%VSINSTALLDIR% + +@rem +@rem Root of Visual Studio ide installed files. +@rem +@set DevEnvDir=%VSINSTALLDIR% + +@rem +@rem Root of Visual C++ installed files. +@rem +@set MSVCDir=%VCINSTALLDIR%\VC7 + +@rem +@echo Setting environment for using Microsoft Visual Studio .NET 2003 tools. +@echo (If you have another version of Visual Studio or Visual C++ installed and wish +@echo to use its tools from the command line, run vcvars32.bat for that version.) +@rem + +@REM %VCINSTALLDIR%\Common7\Tools dir is added only for real setup. + +@set PATH=%DevEnvDir%;%MSVCDir%\BIN;%VCINSTALLDIR%\Common7\Tools;%VCINSTALLDIR%\Common7\Tools\bin\prerelease;%VCINSTALLDIR%\Common7\Tools\bin;%FrameworkSDKDir%\bin;%FrameworkDir%\%FrameworkVersion%;%PATH%; +@set INCLUDE=%MSVCDir%\ATLMFC\INCLUDE;%MSVCDir%\INCLUDE;%FrameworkSDKDir%\include;%INCLUDE%;%XDK%\xbox\include +@set LIB=%MSVCDir%\ATLMFC\LIB;%MSVCDir%\LIB;%MSVCDir%\PlatformSDK\lib;%XDK%\lib;%XDK%\xbox\lib;%LIB% + +@goto end + +:Usage + +@echo. VSINSTALLDIR variable is not set. +@echo. +@echo SYNTAX: %0 + +@goto end + +:end + +devenv /clean Release_LTCG msvc-2003-xbox1.sln +devenv /build Release_LTCG msvc-2003-xbox1.sln +exit diff --git a/platform/libretro/msvc/msvc-2010-360.bat b/platform/libretro/msvc/msvc-2010-360.bat new file mode 100644 index 00000000..3ca14052 --- /dev/null +++ b/platform/libretro/msvc/msvc-2010-360.bat @@ -0,0 +1,124 @@ +@echo off + +@echo Setting environment for using Microsoft Visual Studio 2010 x86 tools. + +@call :GetVSCommonToolsDir +@if "%VS100COMNTOOLS%"=="" goto error_no_VS100COMNTOOLSDIR + +@call "%VS100COMNTOOLS%VCVarsQueryRegistry.bat" 32bit No64bit + +@if "%VSINSTALLDIR%"=="" goto error_no_VSINSTALLDIR +@if "%FrameworkDir32%"=="" goto error_no_FrameworkDIR32 +@if "%FrameworkVersion32%"=="" goto error_no_FrameworkVer32 +@if "%Framework35Version%"=="" goto error_no_Framework35Version + +@set FrameworkDir=%FrameworkDir32% +@set FrameworkVersion=%FrameworkVersion32% + +@if not "%WindowsSdkDir%" == "" ( + @set "PATH=%WindowsSdkDir%bin\NETFX 4.0 Tools;%WindowsSdkDir%bin;%PATH%" + @set "INCLUDE=%WindowsSdkDir%include;%INCLUDE%" + @set "LIB=%WindowsSdkDir%lib;%LIB%" +) + +@rem +@rem Root of Visual Studio IDE installed files. +@rem +@set DevEnvDir=%VSINSTALLDIR%Common7\IDE\ + +@rem PATH +@rem ---- +@if exist "%VSINSTALLDIR%Team Tools\Performance Tools" ( + @set "PATH=%VSINSTALLDIR%Team Tools\Performance Tools;%PATH%" +) +@if exist "%ProgramFiles%\HTML Help Workshop" set PATH=%ProgramFiles%\HTML Help Workshop;%PATH% +@if exist "%ProgramFiles(x86)%\HTML Help Workshop" set PATH=%ProgramFiles(x86)%\HTML Help Workshop;%PATH% +@if exist "%VCINSTALLDIR%VCPackages" set PATH=%VCINSTALLDIR%VCPackages;%PATH% +@set PATH=%FrameworkDir%%Framework35Version%;%PATH% +@set PATH=%FrameworkDir%%FrameworkVersion%;%PATH% +@set PATH=%VSINSTALLDIR%Common7\Tools;%PATH% +@if exist "%VCINSTALLDIR%BIN" set PATH=%VCINSTALLDIR%BIN;%PATH% +@set PATH=%DevEnvDir%;%PATH% + +@if exist "%VSINSTALLDIR%VSTSDB\Deploy" ( + @set "PATH=%VSINSTALLDIR%VSTSDB\Deploy;%PATH%" +) + +@if not "%FSHARPINSTALLDIR%" == "" ( + @set "PATH=%FSHARPINSTALLDIR%;%PATH%" +) + +@rem INCLUDE +@rem ------- +@if exist "%VCINSTALLDIR%ATLMFC\INCLUDE" set INCLUDE=%VCINSTALLDIR%ATLMFC\INCLUDE;%INCLUDE% +@if exist "%VCINSTALLDIR%INCLUDE" set INCLUDE=%VCINSTALLDIR%INCLUDE;%INCLUDE% + +@rem LIB +@rem --- +@if exist "%VCINSTALLDIR%ATLMFC\LIB" set LIB=%VCINSTALLDIR%ATLMFC\LIB;%LIB% +@if exist "%VCINSTALLDIR%LIB" set LIB=%VCINSTALLDIR%LIB;%LIB% + +@rem LIBPATH +@rem ------- +@if exist "%VCINSTALLDIR%ATLMFC\LIB" set LIBPATH=%VCINSTALLDIR%ATLMFC\LIB;%LIBPATH% +@if exist "%VCINSTALLDIR%LIB" set LIBPATH=%VCINSTALLDIR%LIB;%LIBPATH% +@set LIBPATH=%FrameworkDir%%Framework35Version%;%LIBPATH% +@set LIBPATH=%FrameworkDir%%FrameworkVersion%;%LIBPATH% + +@goto end + +@REM ----------------------------------------------------------------------- +:GetVSCommonToolsDir +@set VS100COMNTOOLS= +@call :GetVSCommonToolsDirHelper32 HKLM > nul 2>&1 +@if errorlevel 1 call :GetVSCommonToolsDirHelper32 HKCU > nul 2>&1 +@if errorlevel 1 call :GetVSCommonToolsDirHelper64 HKLM > nul 2>&1 +@if errorlevel 1 call :GetVSCommonToolsDirHelper64 HKCU > nul 2>&1 +@exit /B 0 + +:GetVSCommonToolsDirHelper32 +@for /F "tokens=1,2*" %%i in ('reg query "%1\SOFTWARE\Microsoft\VisualStudio\SxS\VS7" /v "10.0"') DO ( + @if "%%i"=="10.0" ( + @SET "VS100COMNTOOLS=%%k" + ) +) +@if "%VS100COMNTOOLS%"=="" exit /B 1 +@SET "VS100COMNTOOLS=%VS100COMNTOOLS%Common7\Tools\" +@exit /B 0 + +:GetVSCommonToolsDirHelper64 +@for /F "tokens=1,2*" %%i in ('reg query "%1\SOFTWARE\Wow6432Node\Microsoft\VisualStudio\SxS\VS7" /v "10.0"') DO ( + @if "%%i"=="10.0" ( + @SET "VS100COMNTOOLS=%%k" + ) +) +@if "%VS100COMNTOOLS%"=="" exit /B 1 +@SET "VS100COMNTOOLS=%VS100COMNTOOLS%Common7\Tools\" +@exit /B 0 + +@REM ----------------------------------------------------------------------- +:error_no_VS100COMNTOOLSDIR +@echo ERROR: Cannot determine the location of the VS Common Tools folder. +@goto end + +:error_no_VSINSTALLDIR +@echo ERROR: Cannot determine the location of the VS installation. +@goto end + +:error_no_FrameworkDIR32 +@echo ERROR: Cannot determine the location of the .NET Framework 32bit installation. +@goto end + +:error_no_FrameworkVer32 +@echo ERROR: Cannot determine the version of the .NET Framework 32bit installation. +@goto end + +:error_no_Framework35Version +@echo ERROR: Cannot determine the .NET Framework 3.5 version. +@goto end + +:end + +msbuild msvc-2010-360.sln /p:Configuration=Release_LTCG /target:clean +msbuild msvc-2010-360.sln /p:Configuration=Release_LTCG +exit diff --git a/platform/libretro/msvc/msvc-2010.bat b/platform/libretro/msvc/msvc-2010.bat new file mode 100644 index 00000000..a4b08228 --- /dev/null +++ b/platform/libretro/msvc/msvc-2010.bat @@ -0,0 +1,124 @@ +@echo off + +@echo Setting environment for using Microsoft Visual Studio 2010 x86 tools. + +@call :GetVSCommonToolsDir +@if "%VS100COMNTOOLS%"=="" goto error_no_VS100COMNTOOLSDIR + +@call "%VS100COMNTOOLS%VCVarsQueryRegistry.bat" 32bit No64bit + +@if "%VSINSTALLDIR%"=="" goto error_no_VSINSTALLDIR +@if "%FrameworkDir32%"=="" goto error_no_FrameworkDIR32 +@if "%FrameworkVersion32%"=="" goto error_no_FrameworkVer32 +@if "%Framework35Version%"=="" goto error_no_Framework35Version + +@set FrameworkDir=%FrameworkDir32% +@set FrameworkVersion=%FrameworkVersion32% + +@if not "%WindowsSdkDir%" == "" ( + @set "PATH=%WindowsSdkDir%bin\NETFX 4.0 Tools;%WindowsSdkDir%bin;%PATH%" + @set "INCLUDE=%WindowsSdkDir%include;%INCLUDE%" + @set "LIB=%WindowsSdkDir%lib;%LIB%" +) + +@rem +@rem Root of Visual Studio IDE installed files. +@rem +@set DevEnvDir=%VSINSTALLDIR%Common7\IDE\ + +@rem PATH +@rem ---- +@if exist "%VSINSTALLDIR%Team Tools\Performance Tools" ( + @set "PATH=%VSINSTALLDIR%Team Tools\Performance Tools;%PATH%" +) +@if exist "%ProgramFiles%\HTML Help Workshop" set PATH=%ProgramFiles%\HTML Help Workshop;%PATH% +@if exist "%ProgramFiles(x86)%\HTML Help Workshop" set PATH=%ProgramFiles(x86)%\HTML Help Workshop;%PATH% +@if exist "%VCINSTALLDIR%VCPackages" set PATH=%VCINSTALLDIR%VCPackages;%PATH% +@set PATH=%FrameworkDir%%Framework35Version%;%PATH% +@set PATH=%FrameworkDir%%FrameworkVersion%;%PATH% +@set PATH=%VSINSTALLDIR%Common7\Tools;%PATH% +@if exist "%VCINSTALLDIR%BIN" set PATH=%VCINSTALLDIR%BIN;%PATH% +@set PATH=%DevEnvDir%;%PATH% + +@if exist "%VSINSTALLDIR%VSTSDB\Deploy" ( + @set "PATH=%VSINSTALLDIR%VSTSDB\Deploy;%PATH%" +) + +@if not "%FSHARPINSTALLDIR%" == "" ( + @set "PATH=%FSHARPINSTALLDIR%;%PATH%" +) + +@rem INCLUDE +@rem ------- +@if exist "%VCINSTALLDIR%ATLMFC\INCLUDE" set INCLUDE=%VCINSTALLDIR%ATLMFC\INCLUDE;%INCLUDE% +@if exist "%VCINSTALLDIR%INCLUDE" set INCLUDE=%VCINSTALLDIR%INCLUDE;%INCLUDE% + +@rem LIB +@rem --- +@if exist "%VCINSTALLDIR%ATLMFC\LIB" set LIB=%VCINSTALLDIR%ATLMFC\LIB;%LIB% +@if exist "%VCINSTALLDIR%LIB" set LIB=%VCINSTALLDIR%LIB;%LIB% + +@rem LIBPATH +@rem ------- +@if exist "%VCINSTALLDIR%ATLMFC\LIB" set LIBPATH=%VCINSTALLDIR%ATLMFC\LIB;%LIBPATH% +@if exist "%VCINSTALLDIR%LIB" set LIBPATH=%VCINSTALLDIR%LIB;%LIBPATH% +@set LIBPATH=%FrameworkDir%%Framework35Version%;%LIBPATH% +@set LIBPATH=%FrameworkDir%%FrameworkVersion%;%LIBPATH% + +@goto end + +@REM ----------------------------------------------------------------------- +:GetVSCommonToolsDir +@set VS100COMNTOOLS= +@call :GetVSCommonToolsDirHelper32 HKLM > nul 2>&1 +@if errorlevel 1 call :GetVSCommonToolsDirHelper32 HKCU > nul 2>&1 +@if errorlevel 1 call :GetVSCommonToolsDirHelper64 HKLM > nul 2>&1 +@if errorlevel 1 call :GetVSCommonToolsDirHelper64 HKCU > nul 2>&1 +@exit /B 0 + +:GetVSCommonToolsDirHelper32 +@for /F "tokens=1,2*" %%i in ('reg query "%1\SOFTWARE\Microsoft\VisualStudio\SxS\VS7" /v "10.0"') DO ( + @if "%%i"=="10.0" ( + @SET "VS100COMNTOOLS=%%k" + ) +) +@if "%VS100COMNTOOLS%"=="" exit /B 1 +@SET "VS100COMNTOOLS=%VS100COMNTOOLS%Common7\Tools\" +@exit /B 0 + +:GetVSCommonToolsDirHelper64 +@for /F "tokens=1,2*" %%i in ('reg query "%1\SOFTWARE\Wow6432Node\Microsoft\VisualStudio\SxS\VS7" /v "10.0"') DO ( + @if "%%i"=="10.0" ( + @SET "VS100COMNTOOLS=%%k" + ) +) +@if "%VS100COMNTOOLS%"=="" exit /B 1 +@SET "VS100COMNTOOLS=%VS100COMNTOOLS%Common7\Tools\" +@exit /B 0 + +@REM ----------------------------------------------------------------------- +:error_no_VS100COMNTOOLSDIR +@echo ERROR: Cannot determine the location of the VS Common Tools folder. +@goto end + +:error_no_VSINSTALLDIR +@echo ERROR: Cannot determine the location of the VS installation. +@goto end + +:error_no_FrameworkDIR32 +@echo ERROR: Cannot determine the location of the .NET Framework 32bit installation. +@goto end + +:error_no_FrameworkVer32 +@echo ERROR: Cannot determine the version of the .NET Framework 32bit installation. +@goto end + +:error_no_Framework35Version +@echo ERROR: Cannot determine the .NET Framework 3.5 version. +@goto end + +:end + +msbuild msvc-2010.sln /p:Configuration=Release /target:clean +msbuild msvc-2010.sln /p:Configuration=Release +exit diff --git a/platform/libretro/msvc/msvc-2010.sln b/platform/libretro/msvc/msvc-2010.sln new file mode 100644 index 00000000..e2db4a95 --- /dev/null +++ b/platform/libretro/msvc/msvc-2010.sln @@ -0,0 +1,20 @@ + +Microsoft Visual Studio Solution File, Format Version 11.00 +# Visual Studio 2010 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "msvc-2010", "msvc-2010\msvc-2010.vcxproj", "{D4156C25-0E30-4407-9198-1F51EF74AA84}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Win32 = Debug|Win32 + Release|Win32 = Release|Win32 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {D4156C25-0E30-4407-9198-1F51EF74AA84}.Debug|Win32.ActiveCfg = Debug|Win32 + {D4156C25-0E30-4407-9198-1F51EF74AA84}.Debug|Win32.Build.0 = Debug|Win32 + {D4156C25-0E30-4407-9198-1F51EF74AA84}.Release|Win32.ActiveCfg = Release|Win32 + {D4156C25-0E30-4407-9198-1F51EF74AA84}.Release|Win32.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/platform/libretro/msvc/msvc-2010/libretro.def b/platform/libretro/msvc/msvc-2010/libretro.def new file mode 100644 index 00000000..70f6699a --- /dev/null +++ b/platform/libretro/msvc/msvc-2010/libretro.def @@ -0,0 +1,27 @@ +LIBRARY "msvc-2010" +EXPORTS +retro_set_environment +retro_set_video_refresh +retro_set_audio_sample +retro_set_audio_sample_batch +retro_set_input_poll +retro_set_input_state +retro_init +retro_deinit +retro_api_version +retro_get_system_info +retro_get_system_av_info +retro_set_controller_port_device +retro_reset +retro_run +retro_serialize_size +retro_serialize +retro_unserialize +retro_cheat_reset +retro_cheat_set +retro_load_game +retro_load_game_special +retro_unload_game +retro_get_region +retro_get_memory_data +retro_get_memory_size diff --git a/platform/libretro/msvc/msvc-2010/msvc-2010.vcxproj b/platform/libretro/msvc/msvc-2010/msvc-2010.vcxproj new file mode 100644 index 00000000..4b8784a3 --- /dev/null +++ b/platform/libretro/msvc/msvc-2010/msvc-2010.vcxproj @@ -0,0 +1,157 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + + {D4156C25-0E30-4407-9198-1F51EF74AA84} + Win32Proj + msvc2010 + + + + DynamicLibrary + true + Unicode + + + DynamicLibrary + false + true + Unicode + + + + + + + + + + + + + true + $(SolutionDir)msvc-2010\$(Configuration)\ + + + false + $(SolutionDir)msvc-2010\$(Configuration)\ + + + + + + Level3 + Disabled + WIN32;_DEBUG;_WINDOWS;_USRDLL;MSVC2010_EXPORTS;%(PreprocessorDefinitions);INLINE=_inline;_CRT_SECURE_NO_WARNINGS;EMU_F68K;_USE_CZ80;NO_ZLIB;FAMEC_NO_GOTOS + CompileAsC + $(SolutionDir)\..\..\..\;$(SolutionDIr)\..\..\..\pico;%(AdditionalIncludeDirectories) + + + Windows + true + libretro.def + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_WINDOWS;_USRDLL;MSVC2010_EXPORTS;%(PreprocessorDefinitions);INLINE=_inline;_CRT_SECURE_NO_WARNINGS;EMU_F68K;_USE_CZ80;NO_ZLIB;FAMEC_NO_GOTOS + CompileAsC + $(SolutionDir)\..\..\..\;$(SolutionDIr)\..\..\..\pico;%(AdditionalIncludeDirectories) + + + Windows + true + true + true + libretro.def + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/platform/libretro/msvc/msvc-2010/msvc-2010.vcxproj.filters b/platform/libretro/msvc/msvc-2010/msvc-2010.vcxproj.filters new file mode 100644 index 00000000..1a70e495 --- /dev/null +++ b/platform/libretro/msvc/msvc-2010/msvc-2010.vcxproj.filters @@ -0,0 +1,277 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + {13ad8d51-3614-47ce-9d0d-8eb47a4cfabe} + + + {56e5d1cc-a749-46f0-9c75-e26037b4e2b3} + + + {ab1e9796-fcf3-49c2-92f2-cbce4ad50f7f} + + + {d7cd40e2-d074-4967-84ad-89488a9eed11} + + + {76c63342-13b7-413c-b44b-52ef07b4dccc} + + + {04bd626c-6833-49c7-8256-dc94935efe03} + + + {3b94bd08-c15d-46a4-9672-094f4cafbc06} + + + {403b507e-7278-436e-b8a5-5a0deb70dfae} + + + {27323686-5607-4502-9488-ac65c90e6969} + + + {2e0a2f96-c25d-473e-9456-5e25b6eb8036} + + + {a208ee7f-75c1-4ff9-9ed5-ea2d42832fc6} + + + {04862576-b191-4769-a0f8-bb6400cfa861} + + + {337acc4a-3fe4-4547-b655-058d31318ffc} + + + {63c3bec2-54b1-4831-a420-5e1aa120738b} + + + {85be1810-42b8-4ec7-bbd5-6c7d1dc5b763} + + + {055bac11-1f11-4fe7-be7b-09ebaeab74d5} + + + {dd1911b8-6d08-42aa-ab21-0ba1154613e1} + + + {a635c355-0290-4923-84c6-8290ea8b0065} + + + + + Source Files\platform\libretro + + + Source Files\platform\common + + + Source Files\platform\common + + + Source Files\zlib + + + Source Files\zlib + + + Source Files\zlib + + + Source Files\zlib + + + Source Files\zlib + + + Source Files\zlib + + + Source Files\zlib + + + Source Files\zlib + + + Source Files\zlib + + + Source Files\zlib + + + Source Files\zlib + + + Source Files\zlib + + + Source Files\zlib + + + Source Files\unzip + + + Source Files\unzip + + + Source Files\pico + + + Source Files\pico + + + Source Files\pico + + + Source Files\pico + + + Source Files\pico + + + Source Files\pico + + + Source Files\pico + + + Source Files\pico + + + Source Files\pico + + + Source Files\pico + + + Source Files\pico + + + Source Files\pico + + + Source Files\pico + + + Source Files\pico + + + Source Files\pico + + + Source Files\pico + + + Source Files\pico + + + Source Files\pico\cd + + + Source Files\pico\cd + + + Source Files\pico\cd + + + Source Files\pico\cd + + + Source Files\pico\cd + + + Source Files\pico\cd + + + Source Files\pico\cd + + + Source Files\pico\cd + + + Source Files\pico\cd + + + Source Files\pico\cd + + + Source Files\pico\cd + + + Source Files\pico\cd + + + Source Files\pico\32x + + + Source Files\pico\32x + + + Source Files\pico\32x + + + Source Files\pico\32x + + + Source Files\pico\carthw + + + Source Files\pico\carthw\svp + + + Source Files\pico\carthw\svp + + + Source Files\pico\carthw\svp + + + Source Files\pico\sound + + + Source Files\pico\sound + + + Source Files\pico\sound + + + Source Files\pico\sound + + + Source Files\cpu\famec + + + Source Files\cpu\cz80 + + + Source Files\cpu\drc + + + Source Files\cpu\sh2 + + + Source Files\cpu\sh2\mame + + + Source Files + + + Source Files\pico\pico + + + Source Files\pico\pico + + + Source Files\pico\pico + + + \ No newline at end of file diff --git a/platform/libretro/psp/draw_amips.s b/platform/libretro/psp/draw_amips.s new file mode 100644 index 00000000..fa7906ee --- /dev/null +++ b/platform/libretro/psp/draw_amips.s @@ -0,0 +1,1756 @@ +#* +#* several drawing related functions for Allegrex MIPS +#* (C) notaz, 2007-2008 +#* +#* This work is licensed under the terms of MAME license. +#* See COPYING file in the top-level directory. +#* +#* this is highly specialized, be careful if changing related C code! +#* + +.set noreorder # don't reorder any instructions +.set noat # don't use $at + +.text +.align 4 + +# void amips_clut(unsigned short *dst, unsigned char *src, unsigned short *pal, int count) + +.global amips_clut + +amips_clut: + srl $a3, 2 +amips_clut_loop: + lbu $t0, 0($a1) # tried lw here, no improvement noticed + lbu $t1, 1($a1) + lbu $t2, 2($a1) + lbu $t3, 3($a1) + sll $t0, 1 + sll $t1, 1 + sll $t2, 1 + sll $t3, 1 + addu $t0, $a2 + addu $t1, $a2 + addu $t2, $a2 + addu $t3, $a2 + lhu $t0, 0($t0) + lhu $t1, 0($t1) + lhu $t2, 0($t2) + lhu $t3, 0($t3) + ins $t0, $t1, 16, 16 # ins rt, rs, pos, size - Insert size bits starting + ins $t2, $t3, 16, 16 # from the LSB of rs into rt starting at position pos + sw $t0, 0($a0) + sw $t2, 4($a0) + addiu $a0, 8 + addiu $a3, -1 + bnez $a3, amips_clut_loop + addiu $a1, 4 + jr $ra + nop + + +.global amips_clut_6bit + +amips_clut_6bit: + srl $a3, 2 + li $t4, 0 + li $t5, 0 + li $t6, 0 + li $t7, 0 +amips_clut_loop6: + lbu $t0, 0($a1) # tried lw here, no improvement noticed + lbu $t1, 1($a1) + lbu $t2, 2($a1) + lbu $t3, 3($a1) + ins $t4, $t0, 1, 6 + ins $t5, $t1, 1, 6 + ins $t6, $t2, 1, 6 + ins $t7, $t3, 1, 6 + addu $t0, $t4, $a2 + addu $t1, $t5, $a2 + addu $t2, $t6, $a2 + addu $t3, $t7, $a2 + lhu $t0, 0($t0) + lhu $t1, 0($t1) + lhu $t2, 0($t2) + lhu $t3, 0($t3) + ins $t0, $t1, 16, 16 # ins rt, rs, pos, size - Insert size bits starting + ins $t2, $t3, 16, 16 # from the LSB of rs into rt starting at position pos + sw $t0, 0($a0) + sw $t2, 4($a0) + addiu $a0, 8 + addiu $a3, -1 + bnez $a3, amips_clut_loop6 + addiu $a1, 4 + jr $ra + nop + + +# $a0 - pd, $a1 - tile word, $a2 - pal +# ext rt, rs, pos, size // Extract size bits from position pos in rs and store in rt + +.macro TilePixelPrep shift dreg offs +.if \shift + ext \dreg, $a1, \shift, 4 +.else + andi \dreg, $a1, 0xf +.endif +.if \offs + sltu $t8, $0, \dreg + ins $t9, $t8, \offs, 1 +.else + sltu $t9, $0, \dreg +.endif +.endm + +.macro TileStartCode + sll $a1, $a1, 1 + lui $t1, %hi(Pico+0x10000) + addu $a1, $a1, $t1 + lw $a1, %lo(Pico+0x10000)($a1) # Pico.vram + addr + beqz $a1, TileEmpty + rotr $t1, $a1, 4 + beq $t1, $a1, SingleColor + and $v0, $0 # not empty tile +.endm + +.macro TileEndCode + xori $t8, $t9, 0xff + beqz $t8, tile11111111 # common case + lui $v1, %hi(HighCol) + lui $t8, %hi(TileTable) + ins $t8, $t9, 2, 8 + lw $t8, %lo(TileTable)($t8) + lw $v1, %lo(HighCol)($v1) + jr $t8 + addu $a0, $v1 +.endm + + +.global TileNorm + +TileNorm: + TileStartCode + TilePixelPrep 12, $t0, 0 + TilePixelPrep 8, $t1, 1 + TilePixelPrep 4, $t2, 2 + TilePixelPrep 0, $t3, 3 + TilePixelPrep 28, $t4, 4 + TilePixelPrep 24, $t5, 5 + TilePixelPrep 20, $t6, 6 + TilePixelPrep 16, $t7, 7 + TileEndCode + + +.global TileFlip + +TileFlip: + TileStartCode + TilePixelPrep 16, $t0, 0 + TilePixelPrep 20, $t1, 1 + TilePixelPrep 24, $t2, 2 + TilePixelPrep 28, $t3, 3 + TilePixelPrep 0, $t4, 4 + TilePixelPrep 4, $t5, 5 + TilePixelPrep 8, $t6, 6 + TilePixelPrep 12, $t7, 7 + TileEndCode + + +SingleColor: + lui $t9, %hi(HighCol) + lw $t9, %lo(HighCol)($t9) + andi $t0, $a1, 0xf + or $t0, $t0, $a2 + addu $a0, $t9 + sb $t0, 0($a0) + sb $t0, 1($a0) + sb $t0, 2($a0) + sb $t0, 3($a0) + sb $t0, 4($a0) + sb $t0, 5($a0) + sb $t0, 6($a0) + jr $ra + sb $t0, 7($a0) + +TileEmpty: + jr $ra + or $v0, $0, 1 # empty tile + +tile11111111: + lw $v1, %lo(HighCol)($v1) + or $t0, $t0, $a2 + addu $a0, $v1 + sb $t0, 0($a0) +tile11111110: + or $t1, $t1, $a2 + sb $t1, 1($a0) +tile11111100: + or $t2, $t2, $a2 + sb $t2, 2($a0) +tile11111000: + or $t3, $t3, $a2 + sb $t3, 3($a0) +tile11110000: + or $t4, $t4, $a2 + sb $t4, 4($a0) +tile11100000: + or $t5, $t5, $a2 + sb $t5, 5($a0) +tile11000000: + or $t6, $t6, $a2 + sb $t6, 6($a0) +tile10000000: + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile11111101: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t6, $t6, $a2 + sb $t6, 6($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile11111011: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile11111010: + or $t1, $t1, $a2 + sb $t1, 1($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t6, $t6, $a2 + sb $t6, 6($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile11111001: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t6, $t6, $a2 + sb $t6, 6($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile11110111: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile11110110: + or $t1, $t1, $a2 + sb $t1, 1($a0) +tile11110100: + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t6, $t6, $a2 + sb $t6, 6($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile11110101: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t6, $t6, $a2 + sb $t6, 6($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile11110011: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile11110010: + or $t1, $t1, $a2 + sb $t1, 1($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t6, $t6, $a2 + sb $t6, 6($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile11110001: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t6, $t6, $a2 + sb $t6, 6($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile11101111: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile11101110: + or $t1, $t1, $a2 + sb $t1, 1($a0) +tile11101100: + or $t2, $t2, $a2 + sb $t2, 2($a0) +tile11101000: + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t6, $t6, $a2 + sb $t6, 6($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile11101101: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t6, $t6, $a2 + sb $t6, 6($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile11101011: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile11101010: + or $t1, $t1, $a2 + sb $t1, 1($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t6, $t6, $a2 + sb $t6, 6($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile11101001: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t6, $t6, $a2 + sb $t6, 6($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile11100111: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile11100110: + or $t1, $t1, $a2 + sb $t1, 1($a0) +tile11100100: + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t6, $t6, $a2 + sb $t6, 6($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile11100101: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t6, $t6, $a2 + sb $t6, 6($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile11100011: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile11100010: + or $t1, $t1, $a2 + sb $t1, 1($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t6, $t6, $a2 + sb $t6, 6($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile11100001: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t6, $t6, $a2 + sb $t6, 6($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile11011111: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile11011110: + or $t1, $t1, $a2 + sb $t1, 1($a0) +tile11011100: + or $t2, $t2, $a2 + sb $t2, 2($a0) +tile11011000: + or $t3, $t3, $a2 + sb $t3, 3($a0) +tile11010000: + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t6, $t6, $a2 + sb $t6, 6($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile11011101: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t6, $t6, $a2 + sb $t6, 6($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile11011011: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile11011010: + or $t1, $t1, $a2 + sb $t1, 1($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t6, $t6, $a2 + sb $t6, 6($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile11011001: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t6, $t6, $a2 + sb $t6, 6($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile11010111: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile11010110: + or $t1, $t1, $a2 + sb $t1, 1($a0) +tile11010100: + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t6, $t6, $a2 + sb $t6, 6($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile11010101: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t6, $t6, $a2 + sb $t6, 6($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile11010011: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile11010010: + or $t1, $t1, $a2 + sb $t1, 1($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t6, $t6, $a2 + sb $t6, 6($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile11010001: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t6, $t6, $a2 + sb $t6, 6($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile11001111: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile11001110: + or $t1, $t1, $a2 + sb $t1, 1($a0) +tile11001100: + or $t2, $t2, $a2 + sb $t2, 2($a0) +tile11001000: + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t6, $t6, $a2 + sb $t6, 6($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile11001101: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t6, $t6, $a2 + sb $t6, 6($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile11001011: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile11001010: + or $t1, $t1, $a2 + sb $t1, 1($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t6, $t6, $a2 + sb $t6, 6($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile11001001: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t6, $t6, $a2 + sb $t6, 6($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile11000111: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile11000110: + or $t1, $t1, $a2 + sb $t1, 1($a0) +tile11000100: + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t6, $t6, $a2 + sb $t6, 6($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile11000101: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t6, $t6, $a2 + sb $t6, 6($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile11000011: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile11000010: + or $t1, $t1, $a2 + sb $t1, 1($a0) + or $t6, $t6, $a2 + sb $t6, 6($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile11000001: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t6, $t6, $a2 + sb $t6, 6($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile10111111: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile10111110: + or $t1, $t1, $a2 + sb $t1, 1($a0) +tile10111100: + or $t2, $t2, $a2 + sb $t2, 2($a0) +tile10111000: + or $t3, $t3, $a2 + sb $t3, 3($a0) +tile10110000: + or $t4, $t4, $a2 + sb $t4, 4($a0) +tile10100000: + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile10111101: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile10111011: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile10111010: + or $t1, $t1, $a2 + sb $t1, 1($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile10111001: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile10110111: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile10110110: + or $t1, $t1, $a2 + sb $t1, 1($a0) +tile10110100: + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile10110101: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile10110011: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile10110010: + or $t1, $t1, $a2 + sb $t1, 1($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile10110001: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile10101111: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile10101110: + or $t1, $t1, $a2 + sb $t1, 1($a0) +tile10101100: + or $t2, $t2, $a2 + sb $t2, 2($a0) +tile10101000: + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile10101101: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile10101011: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile10101010: + or $t1, $t1, $a2 + sb $t1, 1($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile10101001: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile10100111: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile10100110: + or $t1, $t1, $a2 + sb $t1, 1($a0) +tile10100100: + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile10100101: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile10100011: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile10100010: + or $t1, $t1, $a2 + sb $t1, 1($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile10100001: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile10011111: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile10011110: + or $t1, $t1, $a2 + sb $t1, 1($a0) +tile10011100: + or $t2, $t2, $a2 + sb $t2, 2($a0) +tile10011000: + or $t3, $t3, $a2 + sb $t3, 3($a0) +tile10010000: + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile10011101: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile10011011: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile10011010: + or $t1, $t1, $a2 + sb $t1, 1($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile10011001: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile10010111: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile10010110: + or $t1, $t1, $a2 + sb $t1, 1($a0) +tile10010100: + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile10010101: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile10010011: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile10010010: + or $t1, $t1, $a2 + sb $t1, 1($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile10010001: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile10001111: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile10001110: + or $t1, $t1, $a2 + sb $t1, 1($a0) +tile10001100: + or $t2, $t2, $a2 + sb $t2, 2($a0) +tile10001000: + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile10001101: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile10001011: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile10001010: + or $t1, $t1, $a2 + sb $t1, 1($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile10001001: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile10000111: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile10000110: + or $t1, $t1, $a2 + sb $t1, 1($a0) +tile10000100: + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile10000101: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile10000011: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile10000010: + or $t1, $t1, $a2 + sb $t1, 1($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile10000001: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t7, $t7, $a2 + jr $ra + sb $t7, 7($a0) +tile01111111: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile01111110: + or $t1, $t1, $a2 + sb $t1, 1($a0) +tile01111100: + or $t2, $t2, $a2 + sb $t2, 2($a0) +tile01111000: + or $t3, $t3, $a2 + sb $t3, 3($a0) +tile01110000: + or $t4, $t4, $a2 + sb $t4, 4($a0) +tile01100000: + or $t5, $t5, $a2 + sb $t5, 5($a0) +tile01000000: + or $t6, $t6, $a2 + jr $ra + sb $t6, 6($a0) +tile00000000: +tile01111101: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t6, $t6, $a2 + jr $ra + sb $t6, 6($a0) +tile01111011: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile01111010: + or $t1, $t1, $a2 + sb $t1, 1($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t6, $t6, $a2 + jr $ra + sb $t6, 6($a0) +tile01111001: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t6, $t6, $a2 + jr $ra + sb $t6, 6($a0) +tile01110111: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile01110110: + or $t1, $t1, $a2 + sb $t1, 1($a0) +tile01110100: + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t6, $t6, $a2 + jr $ra + sb $t6, 6($a0) +tile01110101: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t6, $t6, $a2 + jr $ra + sb $t6, 6($a0) +tile01110011: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile01110010: + or $t1, $t1, $a2 + sb $t1, 1($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t6, $t6, $a2 + jr $ra + sb $t6, 6($a0) +tile01110001: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t6, $t6, $a2 + jr $ra + sb $t6, 6($a0) +tile01101111: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile01101110: + or $t1, $t1, $a2 + sb $t1, 1($a0) +tile01101100: + or $t2, $t2, $a2 + sb $t2, 2($a0) +tile01101000: + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t6, $t6, $a2 + jr $ra + sb $t6, 6($a0) +tile01101101: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t6, $t6, $a2 + jr $ra + sb $t6, 6($a0) +tile01101011: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile01101010: + or $t1, $t1, $a2 + sb $t1, 1($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t6, $t6, $a2 + jr $ra + sb $t6, 6($a0) +tile01101001: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t6, $t6, $a2 + jr $ra + sb $t6, 6($a0) +tile01100111: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile01100110: + or $t1, $t1, $a2 + sb $t1, 1($a0) +tile01100100: + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t6, $t6, $a2 + jr $ra + sb $t6, 6($a0) +tile01100101: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t6, $t6, $a2 + jr $ra + sb $t6, 6($a0) +tile01100011: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile01100010: + or $t1, $t1, $a2 + sb $t1, 1($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t6, $t6, $a2 + jr $ra + sb $t6, 6($a0) +tile01100001: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t5, $t5, $a2 + sb $t5, 5($a0) + or $t6, $t6, $a2 + jr $ra + sb $t6, 6($a0) +tile01011111: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile01011110: + or $t1, $t1, $a2 + sb $t1, 1($a0) +tile01011100: + or $t2, $t2, $a2 + sb $t2, 2($a0) +tile01011000: + or $t3, $t3, $a2 + sb $t3, 3($a0) +tile01010000: + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t6, $t6, $a2 + jr $ra + sb $t6, 6($a0) +tile01011101: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t6, $t6, $a2 + jr $ra + sb $t6, 6($a0) +tile01011011: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile01011010: + or $t1, $t1, $a2 + sb $t1, 1($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t6, $t6, $a2 + jr $ra + sb $t6, 6($a0) +tile01011001: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t6, $t6, $a2 + jr $ra + sb $t6, 6($a0) +tile01010111: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile01010110: + or $t1, $t1, $a2 + sb $t1, 1($a0) +tile01010100: + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t6, $t6, $a2 + jr $ra + sb $t6, 6($a0) +tile01010101: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t6, $t6, $a2 + jr $ra + sb $t6, 6($a0) +tile01010011: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile01010010: + or $t1, $t1, $a2 + sb $t1, 1($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t6, $t6, $a2 + jr $ra + sb $t6, 6($a0) +tile01010001: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t6, $t6, $a2 + jr $ra + sb $t6, 6($a0) +tile01001111: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile01001110: + or $t1, $t1, $a2 + sb $t1, 1($a0) +tile01001100: + or $t2, $t2, $a2 + sb $t2, 2($a0) +tile01001000: + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t6, $t6, $a2 + jr $ra + sb $t6, 6($a0) +tile01001101: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t6, $t6, $a2 + jr $ra + sb $t6, 6($a0) +tile01001011: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile01001010: + or $t1, $t1, $a2 + sb $t1, 1($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t6, $t6, $a2 + jr $ra + sb $t6, 6($a0) +tile01001001: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t6, $t6, $a2 + jr $ra + sb $t6, 6($a0) +tile01000111: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile01000110: + or $t1, $t1, $a2 + sb $t1, 1($a0) +tile01000100: + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t6, $t6, $a2 + jr $ra + sb $t6, 6($a0) +tile01000101: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t6, $t6, $a2 + jr $ra + sb $t6, 6($a0) +tile01000011: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile01000010: + or $t1, $t1, $a2 + sb $t1, 1($a0) + or $t6, $t6, $a2 + jr $ra + sb $t6, 6($a0) +tile01000001: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t6, $t6, $a2 + jr $ra + sb $t6, 6($a0) +tile00111111: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile00111110: + or $t1, $t1, $a2 + sb $t1, 1($a0) +tile00111100: + or $t2, $t2, $a2 + sb $t2, 2($a0) +tile00111000: + or $t3, $t3, $a2 + sb $t3, 3($a0) +tile00110000: + or $t4, $t4, $a2 + sb $t4, 4($a0) +tile00100000: + or $t5, $t5, $a2 + jr $ra + sb $t5, 5($a0) +tile00111101: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t5, $t5, $a2 + jr $ra + sb $t5, 5($a0) +tile00111011: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile00111010: + or $t1, $t1, $a2 + sb $t1, 1($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t5, $t5, $a2 + jr $ra + sb $t5, 5($a0) +tile00111001: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t5, $t5, $a2 + jr $ra + sb $t5, 5($a0) +tile00110111: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile00110110: + or $t1, $t1, $a2 + sb $t1, 1($a0) +tile00110100: + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t5, $t5, $a2 + jr $ra + sb $t5, 5($a0) +tile00110101: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t5, $t5, $a2 + jr $ra + sb $t5, 5($a0) +tile00110011: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile00110010: + or $t1, $t1, $a2 + sb $t1, 1($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t5, $t5, $a2 + jr $ra + sb $t5, 5($a0) +tile00110001: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t4, $t4, $a2 + sb $t4, 4($a0) + or $t5, $t5, $a2 + jr $ra + sb $t5, 5($a0) +tile00101111: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile00101110: + or $t1, $t1, $a2 + sb $t1, 1($a0) +tile00101100: + or $t2, $t2, $a2 + sb $t2, 2($a0) +tile00101000: + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t5, $t5, $a2 + jr $ra + sb $t5, 5($a0) +tile00101101: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t5, $t5, $a2 + jr $ra + sb $t5, 5($a0) +tile00101011: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile00101010: + or $t1, $t1, $a2 + sb $t1, 1($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t5, $t5, $a2 + jr $ra + sb $t5, 5($a0) +tile00101001: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t5, $t5, $a2 + jr $ra + sb $t5, 5($a0) +tile00100111: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile00100110: + or $t1, $t1, $a2 + sb $t1, 1($a0) +tile00100100: + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t5, $t5, $a2 + jr $ra + sb $t5, 5($a0) +tile00100101: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t5, $t5, $a2 + jr $ra + sb $t5, 5($a0) +tile00100011: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile00100010: + or $t1, $t1, $a2 + sb $t1, 1($a0) + or $t5, $t5, $a2 + jr $ra + sb $t5, 5($a0) +tile00100001: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t5, $t5, $a2 + jr $ra + sb $t5, 5($a0) +tile00011111: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile00011110: + or $t1, $t1, $a2 + sb $t1, 1($a0) +tile00011100: + or $t2, $t2, $a2 + sb $t2, 2($a0) +tile00011000: + or $t3, $t3, $a2 + sb $t3, 3($a0) +tile00010000: + or $t4, $t4, $a2 + jr $ra + sb $t4, 4($a0) +tile00011101: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t4, $t4, $a2 + jr $ra + sb $t4, 4($a0) +tile00011011: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile00011010: + or $t1, $t1, $a2 + sb $t1, 1($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t4, $t4, $a2 + jr $ra + sb $t4, 4($a0) +tile00011001: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t3, $t3, $a2 + sb $t3, 3($a0) + or $t4, $t4, $a2 + jr $ra + sb $t4, 4($a0) +tile00010111: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile00010110: + or $t1, $t1, $a2 + sb $t1, 1($a0) +tile00010100: + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t4, $t4, $a2 + jr $ra + sb $t4, 4($a0) +tile00010101: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t4, $t4, $a2 + jr $ra + sb $t4, 4($a0) +tile00010011: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile00010010: + or $t1, $t1, $a2 + sb $t1, 1($a0) + or $t4, $t4, $a2 + jr $ra + sb $t4, 4($a0) +tile00010001: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t4, $t4, $a2 + jr $ra + sb $t4, 4($a0) +tile00001111: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile00001110: + or $t1, $t1, $a2 + sb $t1, 1($a0) +tile00001100: + or $t2, $t2, $a2 + sb $t2, 2($a0) +tile00001000: + or $t3, $t3, $a2 + jr $ra + sb $t3, 3($a0) +tile00001101: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t2, $t2, $a2 + sb $t2, 2($a0) + or $t3, $t3, $a2 + jr $ra + sb $t3, 3($a0) +tile00001011: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile00001010: + or $t1, $t1, $a2 + sb $t1, 1($a0) + or $t3, $t3, $a2 + jr $ra + sb $t3, 3($a0) +tile00001001: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t3, $t3, $a2 + jr $ra + sb $t3, 3($a0) +tile00000111: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile00000110: + or $t1, $t1, $a2 + sb $t1, 1($a0) +tile00000100: + or $t2, $t2, $a2 + jr $ra + sb $t2, 2($a0) +tile00000101: + or $t0, $t0, $a2 + sb $t0, 0($a0) + or $t2, $t2, $a2 + jr $ra + sb $t2, 2($a0) +tile00000011: + or $t0, $t0, $a2 + sb $t0, 0($a0) +tile00000010: + or $t1, $t1, $a2 + jr $ra + sb $t1, 1($a0) +tile00000001: + or $t0, $t0, $a2 + jr $ra + sb $t0, 0($a0) + +.data +.align 4 + +TileTable: + .long 000000000000, tile00000001, tile00000010, tile00000011, tile00000100, tile00000101, tile00000110, tile00000111 + .long tile00001000, tile00001001, tile00001010, tile00001011, tile00001100, tile00001101, tile00001110, tile00001111 + .long tile00010000, tile00010001, tile00010010, tile00010011, tile00010100, tile00010101, tile00010110, tile00010111 + .long tile00011000, tile00011001, tile00011010, tile00011011, tile00011100, tile00011101, tile00011110, tile00011111 + .long tile00100000, tile00100001, tile00100010, tile00100011, tile00100100, tile00100101, tile00100110, tile00100111 + .long tile00101000, tile00101001, tile00101010, tile00101011, tile00101100, tile00101101, tile00101110, tile00101111 + .long tile00110000, tile00110001, tile00110010, tile00110011, tile00110100, tile00110101, tile00110110, tile00110111 + .long tile00111000, tile00111001, tile00111010, tile00111011, tile00111100, tile00111101, tile00111110, tile00111111 + .long tile01000000, tile01000001, tile01000010, tile01000011, tile01000100, tile01000101, tile01000110, tile01000111 + .long tile01001000, tile01001001, tile01001010, tile01001011, tile01001100, tile01001101, tile01001110, tile01001111 + .long tile01010000, tile01010001, tile01010010, tile01010011, tile01010100, tile01010101, tile01010110, tile01010111 + .long tile01011000, tile01011001, tile01011010, tile01011011, tile01011100, tile01011101, tile01011110, tile01011111 + .long tile01100000, tile01100001, tile01100010, tile01100011, tile01100100, tile01100101, tile01100110, tile01100111 + .long tile01101000, tile01101001, tile01101010, tile01101011, tile01101100, tile01101101, tile01101110, tile01101111 + .long tile01110000, tile01110001, tile01110010, tile01110011, tile01110100, tile01110101, tile01110110, tile01110111 + .long tile01111000, tile01111001, tile01111010, tile01111011, tile01111100, tile01111101, tile01111110, tile01111111 + .long tile10000000, tile10000001, tile10000010, tile10000011, tile10000100, tile10000101, tile10000110, tile10000111 + .long tile10001000, tile10001001, tile10001010, tile10001011, tile10001100, tile10001101, tile10001110, tile10001111 + .long tile10010000, tile10010001, tile10010010, tile10010011, tile10010100, tile10010101, tile10010110, tile10010111 + .long tile10011000, tile10011001, tile10011010, tile10011011, tile10011100, tile10011101, tile10011110, tile10011111 + .long tile10100000, tile10100001, tile10100010, tile10100011, tile10100100, tile10100101, tile10100110, tile10100111 + .long tile10101000, tile10101001, tile10101010, tile10101011, tile10101100, tile10101101, tile10101110, tile10101111 + .long tile10110000, tile10110001, tile10110010, tile10110011, tile10110100, tile10110101, tile10110110, tile10110111 + .long tile10111000, tile10111001, tile10111010, tile10111011, tile10111100, tile10111101, tile10111110, tile10111111 + .long tile11000000, tile11000001, tile11000010, tile11000011, tile11000100, tile11000101, tile11000110, tile11000111 + .long tile11001000, tile11001001, tile11001010, tile11001011, tile11001100, tile11001101, tile11001110, tile11001111 + .long tile11010000, tile11010001, tile11010010, tile11010011, tile11010100, tile11010101, tile11010110, tile11010111 + .long tile11011000, tile11011001, tile11011010, tile11011011, tile11011100, tile11011101, tile11011110, tile11011111 + .long tile11100000, tile11100001, tile11100010, tile11100011, tile11100100, tile11100101, tile11100110, tile11100111 + .long tile11101000, tile11101001, tile11101010, tile11101011, tile11101100, tile11101101, tile11101110, tile11101111 + .long tile11110000, tile11110001, tile11110010, tile11110011, tile11110100, tile11110101, tile11110110, tile11110111 + .long tile11111000, tile11111001, tile11111010, tile11111011, tile11111100, tile11111101, tile11111110, tile11111111 + +# vim:filetype=mips From bce144211cd70e1be78b7c1b7424fb4609de1ac8 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 13 Oct 2017 23:30:44 +0300 Subject: [PATCH 0112/1110] some portability cleanups --- cpu/drc/cmn.c | 2 +- cpu/sh2/sh2.h | 6 +----- pico/pico_int.h | 20 +------------------- pico/pico_port.h | 18 ++++++++++++++++++ 4 files changed, 21 insertions(+), 25 deletions(-) create mode 100644 pico/pico_port.h diff --git a/cpu/drc/cmn.c b/cpu/drc/cmn.c index 37f17ce9..acff42c8 100644 --- a/cpu/drc/cmn.c +++ b/cpu/drc/cmn.c @@ -10,7 +10,7 @@ #include #include "cmn.h" -u8 __attribute__((aligned(4096))) tcache[DRC_TCACHE_SIZE]; +u8 ALIGNED(4096) tcache[DRC_TCACHE_SIZE]; void drc_cmn_init(void) diff --git a/cpu/sh2/sh2.h b/cpu/sh2/sh2.h index 1394f94a..49695b1e 100644 --- a/cpu/sh2/sh2.h +++ b/cpu/sh2/sh2.h @@ -1,11 +1,7 @@ #ifndef __SH2_H__ #define __SH2_H__ -#if !defined(REGPARM) && defined(__i386__) -#define REGPARM(x) __attribute__((regparm(x))) -#else -#define REGPARM(x) -#endif +#include "../../pico/pico_port.h" // registers - matches structure order typedef enum { diff --git a/pico/pico_int.h b/pico/pico_int.h index 369bb5d7..d4fc8357 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -13,6 +13,7 @@ #include #include #include +#include "pico_port.h" #include "pico.h" #include "carthw/carthw.h" @@ -1072,25 +1073,6 @@ void pevt_dump(void); #define pevt_dump() #endif -// misc -#ifdef _MSC_VER -#define cdprintf -#else -#define cdprintf(x...) -#endif - -#if defined(__GNUC__) && defined(__i386__) -#define REGPARM(x) __attribute__((regparm(x))) -#else -#define REGPARM(x) -#endif - -#ifdef __GNUC__ -#define NOINLINE __attribute__((noinline)) -#else -#define NOINLINE -#endif - #ifdef __cplusplus } // End of extern "C" #endif diff --git a/pico/pico_port.h b/pico/pico_port.h new file mode 100644 index 00000000..f1d95a56 --- /dev/null +++ b/pico/pico_port.h @@ -0,0 +1,18 @@ +#ifndef PICO_PORT_INCLUDED +#define PICO_PORT_INCLUDED + +#if defined(__GNUC__) && defined(__i386__) +#define REGPARM(x) __attribute__((regparm(x))) +#else +#define REGPARM(x) +#endif + +#ifdef __GNUC__ +#define NOINLINE __attribute__((noinline)) +#define ALIGNED(n) __attribute__((aligned(n))) +#else +#define NOINLINE +#define ALIGNED(n) +#endif + +#endif // PICO_PORT_INCLUDED From a4fa71d4da571b206789805e3a3a5a37bc6ae1f1 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 13 Oct 2017 23:31:22 +0300 Subject: [PATCH 0113/1110] libretro: drop sram clear It's done by the core. If the core is missing something, core itself needs to be fixed. --- pico/pico.c | 5 +++-- platform/libretro/libretro.c | 18 ------------------ 2 files changed, 3 insertions(+), 20 deletions(-) diff --git a/pico/pico.c b/pico/pico.c index bcd8aa1b..bf9df268 100644 --- a/pico/pico.c +++ b/pico/pico.c @@ -61,8 +61,9 @@ void PicoExit(void) PicoCartUnload(); z80_exit(); - if (Pico.sv.data) - free(Pico.sv.data); + free(Pico.sv.data); + Pico.sv.data = NULL; + Pico.sv.start = Pico.sv.end = 0; pevt_dump(); } diff --git a/platform/libretro/libretro.c b/platform/libretro/libretro.c index 934b6859..3502b5eb 100644 --- a/platform/libretro/libretro.c +++ b/platform/libretro/libretro.c @@ -959,22 +959,6 @@ static const char *find_bios(int *region, const char *cd_fname) return NULL; } -static void sram_reset() -{ - SRam.data = NULL; - SRam.start = 0; - SRam.end = 0; - SRam.flags = '\0'; - SRam.unused2 = '\0'; - SRam.changed = '\0' ; - SRam.eeprom_type = '\0'; - SRam.unused3 = '\0'; - SRam.eeprom_bit_cl = '\0'; - SRam.eeprom_bit_in = '\0'; - SRam.eeprom_bit_out = '\0'; - SRam.size = 0; -} - bool retro_load_game(const struct retro_game_info *info) { enum media_type_e media_type; @@ -1031,8 +1015,6 @@ bool retro_load_game(const struct retro_game_info *info) { 0 }, }; - sram_reset(); - enum retro_pixel_format fmt = RETRO_PIXEL_FORMAT_RGB565; if (!environ_cb(RETRO_ENVIRONMENT_SET_PIXEL_FORMAT, &fmt)) { if (log_cb) From 28a5b3923203571bb99b38a3045c24e4c1d8a05f Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 13 Oct 2017 23:36:36 +0300 Subject: [PATCH 0114/1110] libretro: update for core changes also drops the broken "fps override" feature --- platform/libretro/libretro.c | 39 +++++++++--------------------------- 1 file changed, 9 insertions(+), 30 deletions(-) diff --git a/platform/libretro/libretro.c b/platform/libretro/libretro.c index 3502b5eb..b6833741 100644 --- a/platform/libretro/libretro.c +++ b/platform/libretro/libretro.c @@ -512,7 +512,6 @@ void retro_set_environment(retro_environment_t cb) { "picodrive_sprlim", "No sprite limit; disabled|enabled" }, { "picodrive_ramcart", "MegaCD RAM cart; disabled|enabled" }, { "picodrive_region", "Region; Auto|Japan NTSC|Japan PAL|US|Europe" }, - { "picodrive_region_fps", "Region FPS; Auto|NTSC|PAL" }, { "picodrive_aspect", "Core-provided aspect ratio; PAR|4/3|CRT" }, { "picodrive_overscan", "Show Overscan; disabled|enabled" }, #ifdef DRC_SH2 @@ -1104,13 +1103,13 @@ void *retro_get_memory_data(unsigned type) if (PicoAHW & PAHW_MCD) data = Pico_mcd->bram; else - data = SRam.data; + data = Pico.sv.data; break; case RETRO_MEMORY_SYSTEM_RAM: if (PicoAHW & PAHW_SMS) - data = Pico.zram; + data = PicoMem.zram; else - data = Pico.ram; + data = PicoMem.ram; break; default: data = NULL; @@ -1133,20 +1132,20 @@ size_t retro_get_memory_size(unsigned type) return 0x2000; if (Pico.m.frame_count == 0) - return SRam.size; + return Pico.sv.size; // if game doesn't write to sram, don't report it to // libretro so that RA doesn't write out zeroed .srm - for (i = 0, sum = 0; i < SRam.size; i++) - sum |= SRam.data[i]; + for (i = 0, sum = 0; i < Pico.sv.size; i++) + sum |= Pico.sv.data[i]; - return (sum != 0) ? SRam.size : 0; + return (sum != 0) ? Pico.sv.size : 0; case RETRO_MEMORY_SYSTEM_RAM: if (PicoAHW & PAHW_SMS) return 0x2000; else - return sizeof(Pico.ram); + return sizeof(PicoMem.ram); default: return 0; @@ -1242,21 +1241,8 @@ static void update_variables(void) PicoRegionOverride = 8; } - int OldPicoRegionFPSOverride = PicoRegionFPSOverride; - var.value = NULL; - var.key = "picodrive_region_fps"; - if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { - if (strcmp(var.value, "Auto") == 0) - PicoRegionFPSOverride = 0; - else if (strcmp(var.value, "NTSC") == 0) - PicoRegionFPSOverride = 1; - else if (strcmp(var.value, "PAL") == 0) - PicoRegionFPSOverride = 2; - } - // Update region, fps and sound flags if needed - if (PicoRegionOverride != OldPicoRegionOverride || - PicoRegionFPSOverride != OldPicoRegionFPSOverride) + if (Pico.rom && PicoRegionOverride != OldPicoRegionOverride) { PicoDetectRegion(); PicoLoopPrepare(); @@ -1331,13 +1317,6 @@ void retro_run(void) vout_width, vout_height, vout_width * 2); } -static void check_system_specs(void) -{ - /* TODO - set different performance level for 32X - 6 for ARM dynarec, higher for interpreter core */ - unsigned level = 5; - environ_cb(RETRO_ENVIRONMENT_SET_PERFORMANCE_LEVEL, &level); -} - void retro_init(void) { struct retro_log_callback log; From df9251536deed37b18d10b8bc3502ee39006a320 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 13 Oct 2017 23:36:51 +0300 Subject: [PATCH 0115/1110] libretro: satisfy vita's dynarec needs in a cleaner way --- cpu/drc/cmn.c | 14 ++++++++++---- cpu/drc/cmn.h | 2 +- pico/pico.h | 3 +++ platform/libretro/libretro.c | 28 +++++++++++++++++++++------- platform/linux/emu.c | 4 ++++ 5 files changed, 39 insertions(+), 12 deletions(-) diff --git a/cpu/drc/cmn.c b/cpu/drc/cmn.c index acff42c8..27ff9812 100644 --- a/cpu/drc/cmn.c +++ b/cpu/drc/cmn.c @@ -10,14 +10,20 @@ #include #include "cmn.h" -u8 ALIGNED(4096) tcache[DRC_TCACHE_SIZE]; - +u8 ALIGNED(4096) tcache_default[DRC_TCACHE_SIZE]; +u8 *tcache; void drc_cmn_init(void) { - int ret = plat_mem_set_exec(tcache, sizeof(tcache)); + int ret; + + tcache = plat_mem_get_for_drc(DRC_TCACHE_SIZE); + if (tcache == NULL) + tcache = tcache_default; + + ret = plat_mem_set_exec(tcache, DRC_TCACHE_SIZE); elprintf(EL_STATUS, "drc_cmn_init: %p, %zd bytes: %d", - tcache, sizeof(tcache), ret); + tcache, DRC_TCACHE_SIZE, ret); #ifdef __arm__ if (PicoOpt & POPT_EN_DRC) diff --git a/cpu/drc/cmn.h b/cpu/drc/cmn.h index 4737b74d..8953edd1 100644 --- a/cpu/drc/cmn.h +++ b/cpu/drc/cmn.h @@ -5,7 +5,7 @@ typedef unsigned int u32; #define DRC_TCACHE_SIZE (2*1024*1024) -extern u8 tcache[DRC_TCACHE_SIZE]; +extern u8 *tcache; void drc_cmn_init(void); void drc_cmn_cleanup(void); diff --git a/pico/pico.h b/pico/pico.h index 527498ee..f1687f51 100644 --- a/pico/pico.h +++ b/pico/pico.h @@ -34,6 +34,9 @@ extern void cache_flush_d_inval_i(void *start_addr, void *end_addr); extern void *plat_mmap(unsigned long addr, size_t size, int need_exec, int is_fixed); extern void *plat_mremap(void *ptr, size_t oldsize, size_t newsize); extern void plat_munmap(void *ptr, size_t size); + +// memory for the dynarec; plat_mem_get_for_drc() can just return NULL +extern void *plat_mem_get_for_drc(size_t size); extern int plat_mem_set_exec(void *ptr, size_t size); // this one should handle display mode changes diff --git a/platform/libretro/libretro.c b/platform/libretro/libretro.c index b6833741..e7f588a1 100644 --- a/platform/libretro/libretro.c +++ b/platform/libretro/libretro.c @@ -98,6 +98,7 @@ void cache_flush_d_inval_i(void *start, void *end) { #ifdef __arm__ size_t len = (char *)end - (char *)start; + (void)len; #if defined(__BLACKBERRY_QNX__) msync(start, end - start, MS_SYNC | MS_CACHE_ONLY | MS_INVALIDATE_ICACHE); #elif defined(__MACH__) @@ -435,14 +436,25 @@ void plat_munmap(void *ptr, size_t size) } #endif +// if NULL is returned, static buffer is used +void *plat_mem_get_for_drc(size_t size) +{ + void *mem = NULL; +#ifdef VITA + sceKernelGetMemBlockBase(sceBlock, &mem); +#endif + return mem; +} + int plat_mem_set_exec(void *ptr, size_t size) { -#ifdef _WIN32 - int ret = VirtualProtect(ptr,size,PAGE_EXECUTE_READWRITE,0); - if (ret == 0 && log_cb) - log_cb(RETRO_LOG_ERROR, "mprotect(%p, %zd) failed: %d\n", ptr, size, 0); -#elif defined(_3DS) int ret = -1; +#ifdef _WIN32 + ret = VirtualProtect(ptr, size, PAGE_EXECUTE_READWRITE, 0); + if (ret == 0 && log_cb) + log_cb(RETRO_LOG_ERROR, "VirtualProtect(%p, %d) failed: %d\n", ptr, (int)size, + GetLastError()); +#elif defined(_3DS) if (ctr_svchack_successful) { unsigned int currentHandle; @@ -461,9 +473,9 @@ int plat_mem_set_exec(void *ptr, size_t size) } #elif defined(VITA) - int ret = sceKernelOpenVMDomain(); + ret = sceKernelOpenVMDomain(); #else - int ret = mprotect(ptr, size, PROT_READ | PROT_WRITE | PROT_EXEC); + ret = mprotect(ptr, size, PROT_READ | PROT_WRITE | PROT_EXEC); if (ret != 0 && log_cb) log_cb(RETRO_LOG_ERROR, "mprotect(%p, %zd) failed: %d\n", ptr, size, errno); #endif @@ -1380,3 +1392,5 @@ void retro_deinit(void) vout_buf = NULL; PicoExit(); } + +// vim:shiftwidth=3:ts=3:expandtab diff --git a/platform/linux/emu.c b/platform/linux/emu.c index 5a97959b..aee8d44c 100644 --- a/platform/linux/emu.c +++ b/platform/linux/emu.c @@ -203,3 +203,7 @@ void plat_wait_till_us(unsigned int us_to) } } +void *plat_mem_get_for_drc(size_t size) +{ + return NULL; +} From b5f5dc1fad9a7876a2d146b1c61d06e64435a86e Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 13 Oct 2017 23:38:09 +0300 Subject: [PATCH 0116/1110] android: make armeabi buildable --- cpu/cyclone_config.h | 2 ++ cpu/cyclone_config_armv4.h | 2 ++ jni/Android.mk | 7 +++++++ platform/common/common.mak | 6 ++++-- 4 files changed, 15 insertions(+), 2 deletions(-) create mode 100644 cpu/cyclone_config_armv4.h diff --git a/cpu/cyclone_config.h b/cpu/cyclone_config.h index ed3b257e..23205171 100644 --- a/cpu/cyclone_config.h +++ b/cpu/cyclone_config.h @@ -8,7 +8,9 @@ **/ +#ifndef HAVE_ARMv6 #define HAVE_ARMv6 1 +#endif #define USE_MS_SYNTAX 0 #define CYCLONE_FOR_GENESIS 2 #define COMPRESS_JUMPTABLE 0 diff --git a/cpu/cyclone_config_armv4.h b/cpu/cyclone_config_armv4.h new file mode 100644 index 00000000..2319029b --- /dev/null +++ b/cpu/cyclone_config_armv4.h @@ -0,0 +1,2 @@ +#define HAVE_ARMv6 0 +#include "cyclone_config.h" diff --git a/jni/Android.mk b/jni/Android.mk index a0f5dc8d..bc817026 100644 --- a/jni/Android.mk +++ b/jni/Android.mk @@ -49,6 +49,13 @@ ifeq ($(TARGET_ARCH),arm) asm_misc = 1 # asm_cdmemory = 1 # texrels asm_mix = 1 + +# for armeabi to build... +CYCLONE_CONFIG = cyclone_config_armv4.h + +$(cleantarget):: + $(MAKE) -C $(FR)cpu/cyclone/ clean + else use_fame = 1 use_cz80 = 1 diff --git a/platform/common/common.mak b/platform/common/common.mak index cd92f54b..89e46051 100644 --- a/platform/common/common.mak +++ b/platform/common/common.mak @@ -173,13 +173,15 @@ ifeq "$(use_cyclone)" "1" $(FR)pico/pico.c: $(FR)cpu/cyclone/Cyclone.h endif +CYCLONE_CONFIG ?= cyclone_config.h + $(FR)cpu/cyclone/Cyclone.h: @echo "Cyclone submodule is missing, please run 'git submodule update --init'" @false -$(FR)cpu/cyclone/Cyclone.s: $(FR)cpu/cyclone_config.h +$(FR)cpu/cyclone/Cyclone.s: $(FR)cpu/$(CYCLONE_CONFIG) @echo building Cyclone... - @make -C $(R)cpu/cyclone/ CONFIG_FILE=../cyclone_config.h + @make -C $(R)cpu/cyclone/ CONFIG_FILE=../$(CYCLONE_CONFIG) $(FR)cpu/cyclone/Cyclone.s: $(FR)cpu/cyclone/*.cpp $(FR)cpu/cyclone/*.h From e9a11abb3c8237286fd7b6c4b5048d9eec9b1a1b Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 13 Oct 2017 23:49:13 +0300 Subject: [PATCH 0117/1110] drop some unnecessary inlines apparently somebody compiles with msvc? --- cpu/debug.h | 2 +- cpu/sh2/compiler.c | 2 +- cpu/sh2/mame/sh2.c | 1 + cpu/sh2/mame/sh2pico.c | 4 ++-- cpu/sh2/sh2.h | 2 +- pico/32x/32x.c | 2 +- pico/pico_cmn.c | 2 +- 7 files changed, 8 insertions(+), 7 deletions(-) diff --git a/cpu/debug.h b/cpu/debug.h index 5aaa60ee..63728ad6 100644 --- a/cpu/debug.h +++ b/cpu/debug.h @@ -26,6 +26,6 @@ int pdb_net_connect(const char *host, const char *port); #else -static inline int pdb_net_connect(const char *host, const char *port) {return 0;} +static __inline int pdb_net_connect(const char *host, const char *port) {return 0;} #endif diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 3a2b708c..4403378e 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -1017,7 +1017,7 @@ static void rcache_unlock_all(void) reg_temp[i].flags &= ~HRF_LOCKED; } -static inline u32 rcache_used_hreg_mask(void) +static u32 rcache_used_hreg_mask(void) { u32 mask = 0; int i; diff --git a/cpu/sh2/mame/sh2.c b/cpu/sh2/mame/sh2.c index 81203e7b..2fb964b6 100644 --- a/cpu/sh2/mame/sh2.c +++ b/cpu/sh2/mame/sh2.c @@ -108,6 +108,7 @@ //#include "debugger.h" //#include "sh2.h" //#include "sh2comn.h" +#undef INLINE #define INLINE static //CPU_DISASSEMBLE( sh2 ); diff --git a/cpu/sh2/mame/sh2pico.c b/cpu/sh2/mame/sh2pico.c index 174d4691..636ebc6f 100644 --- a/cpu/sh2/mame/sh2pico.c +++ b/cpu/sh2/mame/sh2pico.c @@ -21,7 +21,7 @@ typedef unsigned char UINT8; // this nasty conversion is needed for drc-expecting memhandlers #define MAKE_READFUNC(name, cname) \ -static inline unsigned int name(SH2 *sh2, unsigned int a) \ +static __inline unsigned int name(SH2 *sh2, unsigned int a) \ { \ unsigned int ret; \ sh2->sr |= sh2->icount << 12; \ @@ -32,7 +32,7 @@ static inline unsigned int name(SH2 *sh2, unsigned int a) \ } #define MAKE_WRITEFUNC(name, cname) \ -static inline void name(SH2 *sh2, unsigned int a, unsigned int d) \ +static __inline void name(SH2 *sh2, unsigned int a, unsigned int d) \ { \ sh2->sr |= sh2->icount << 12; \ cname(a, d, sh2); \ diff --git a/cpu/sh2/sh2.h b/cpu/sh2/sh2.h index 49695b1e..e945354d 100644 --- a/cpu/sh2/sh2.h +++ b/cpu/sh2/sh2.h @@ -88,7 +88,7 @@ void sh2_unpack(SH2 *sh2, const unsigned char *buff); int sh2_execute_drc(SH2 *sh2c, int cycles); int sh2_execute_interpreter(SH2 *sh2c, int cycles); -static inline int sh2_execute(SH2 *sh2, int cycles, int use_drc) +static __inline int sh2_execute(SH2 *sh2, int cycles, int use_drc) { int ret; diff --git a/pico/32x/32x.c b/pico/32x/32x.c index 51817c0d..b20ebf30 100644 --- a/pico/32x/32x.c +++ b/pico/32x/32x.c @@ -366,7 +366,7 @@ static void p32x_run_events(unsigned int until) oldest, event_time_next); } -static inline void run_sh2(SH2 *sh2, int m68k_cycles) +static void run_sh2(SH2 *sh2, int m68k_cycles) { int cycles, done; diff --git a/pico/pico_cmn.c b/pico/pico_cmn.c index aad8406e..27a66cdf 100644 --- a/pico/pico_cmn.c +++ b/pico/pico_cmn.c @@ -49,7 +49,7 @@ static void SekSyncM68k(void) pprof_end(m68k); } -static inline void SekRunM68k(int cyc) +static __inline void SekRunM68k(int cyc) { Pico.t.m68c_aim += cyc; cyc = Pico.t.m68c_aim - Pico.t.m68c_cnt; From adb98333d7ab029a61d79c1e437bbea7111ec09d Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 13 Oct 2017 23:53:15 +0300 Subject: [PATCH 0118/1110] drop draw_amips from libretro too There's no proof it's any faster, it's only a maintenance burden. See also 4aedc593008ca6e9230b700cec6483c3ecd73bef . --- Makefile.libretro | 4 +- platform/libretro/psp/draw_amips.s | 1756 ---------------------------- 2 files changed, 1 insertion(+), 1759 deletions(-) delete mode 100644 platform/libretro/psp/draw_amips.s diff --git a/Makefile.libretro b/Makefile.libretro index 14f2dc50..b35a0dac 100644 --- a/Makefile.libretro +++ b/Makefile.libretro @@ -215,7 +215,7 @@ else ifeq ($(platform), psp1) CC = psp-gcc$(EXE_EXT) AR = psp-ar$(EXE_EXT) CFLAGS += -G0 -ftracer - CFLAGS += -DPSP -D_ASM_DRAW_C_AMIPS + CFLAGS += -DPSP STATIC_LINKING = 1 NO_MMAP = 1 DONT_COMPILE_IN_ZLIB = 1 @@ -232,8 +232,6 @@ else ifeq ($(platform), psp1) use_drz80 = 0 use_cz80 = 1 - OBJS +=platform/libretro/psp/draw_amips.o - # CTR (3DS) else ifeq ($(platform), ctr) TARGET := $(TARGET_NAME)_libretro_$(platform).a diff --git a/platform/libretro/psp/draw_amips.s b/platform/libretro/psp/draw_amips.s deleted file mode 100644 index fa7906ee..00000000 --- a/platform/libretro/psp/draw_amips.s +++ /dev/null @@ -1,1756 +0,0 @@ -#* -#* several drawing related functions for Allegrex MIPS -#* (C) notaz, 2007-2008 -#* -#* This work is licensed under the terms of MAME license. -#* See COPYING file in the top-level directory. -#* -#* this is highly specialized, be careful if changing related C code! -#* - -.set noreorder # don't reorder any instructions -.set noat # don't use $at - -.text -.align 4 - -# void amips_clut(unsigned short *dst, unsigned char *src, unsigned short *pal, int count) - -.global amips_clut - -amips_clut: - srl $a3, 2 -amips_clut_loop: - lbu $t0, 0($a1) # tried lw here, no improvement noticed - lbu $t1, 1($a1) - lbu $t2, 2($a1) - lbu $t3, 3($a1) - sll $t0, 1 - sll $t1, 1 - sll $t2, 1 - sll $t3, 1 - addu $t0, $a2 - addu $t1, $a2 - addu $t2, $a2 - addu $t3, $a2 - lhu $t0, 0($t0) - lhu $t1, 0($t1) - lhu $t2, 0($t2) - lhu $t3, 0($t3) - ins $t0, $t1, 16, 16 # ins rt, rs, pos, size - Insert size bits starting - ins $t2, $t3, 16, 16 # from the LSB of rs into rt starting at position pos - sw $t0, 0($a0) - sw $t2, 4($a0) - addiu $a0, 8 - addiu $a3, -1 - bnez $a3, amips_clut_loop - addiu $a1, 4 - jr $ra - nop - - -.global amips_clut_6bit - -amips_clut_6bit: - srl $a3, 2 - li $t4, 0 - li $t5, 0 - li $t6, 0 - li $t7, 0 -amips_clut_loop6: - lbu $t0, 0($a1) # tried lw here, no improvement noticed - lbu $t1, 1($a1) - lbu $t2, 2($a1) - lbu $t3, 3($a1) - ins $t4, $t0, 1, 6 - ins $t5, $t1, 1, 6 - ins $t6, $t2, 1, 6 - ins $t7, $t3, 1, 6 - addu $t0, $t4, $a2 - addu $t1, $t5, $a2 - addu $t2, $t6, $a2 - addu $t3, $t7, $a2 - lhu $t0, 0($t0) - lhu $t1, 0($t1) - lhu $t2, 0($t2) - lhu $t3, 0($t3) - ins $t0, $t1, 16, 16 # ins rt, rs, pos, size - Insert size bits starting - ins $t2, $t3, 16, 16 # from the LSB of rs into rt starting at position pos - sw $t0, 0($a0) - sw $t2, 4($a0) - addiu $a0, 8 - addiu $a3, -1 - bnez $a3, amips_clut_loop6 - addiu $a1, 4 - jr $ra - nop - - -# $a0 - pd, $a1 - tile word, $a2 - pal -# ext rt, rs, pos, size // Extract size bits from position pos in rs and store in rt - -.macro TilePixelPrep shift dreg offs -.if \shift - ext \dreg, $a1, \shift, 4 -.else - andi \dreg, $a1, 0xf -.endif -.if \offs - sltu $t8, $0, \dreg - ins $t9, $t8, \offs, 1 -.else - sltu $t9, $0, \dreg -.endif -.endm - -.macro TileStartCode - sll $a1, $a1, 1 - lui $t1, %hi(Pico+0x10000) - addu $a1, $a1, $t1 - lw $a1, %lo(Pico+0x10000)($a1) # Pico.vram + addr - beqz $a1, TileEmpty - rotr $t1, $a1, 4 - beq $t1, $a1, SingleColor - and $v0, $0 # not empty tile -.endm - -.macro TileEndCode - xori $t8, $t9, 0xff - beqz $t8, tile11111111 # common case - lui $v1, %hi(HighCol) - lui $t8, %hi(TileTable) - ins $t8, $t9, 2, 8 - lw $t8, %lo(TileTable)($t8) - lw $v1, %lo(HighCol)($v1) - jr $t8 - addu $a0, $v1 -.endm - - -.global TileNorm - -TileNorm: - TileStartCode - TilePixelPrep 12, $t0, 0 - TilePixelPrep 8, $t1, 1 - TilePixelPrep 4, $t2, 2 - TilePixelPrep 0, $t3, 3 - TilePixelPrep 28, $t4, 4 - TilePixelPrep 24, $t5, 5 - TilePixelPrep 20, $t6, 6 - TilePixelPrep 16, $t7, 7 - TileEndCode - - -.global TileFlip - -TileFlip: - TileStartCode - TilePixelPrep 16, $t0, 0 - TilePixelPrep 20, $t1, 1 - TilePixelPrep 24, $t2, 2 - TilePixelPrep 28, $t3, 3 - TilePixelPrep 0, $t4, 4 - TilePixelPrep 4, $t5, 5 - TilePixelPrep 8, $t6, 6 - TilePixelPrep 12, $t7, 7 - TileEndCode - - -SingleColor: - lui $t9, %hi(HighCol) - lw $t9, %lo(HighCol)($t9) - andi $t0, $a1, 0xf - or $t0, $t0, $a2 - addu $a0, $t9 - sb $t0, 0($a0) - sb $t0, 1($a0) - sb $t0, 2($a0) - sb $t0, 3($a0) - sb $t0, 4($a0) - sb $t0, 5($a0) - sb $t0, 6($a0) - jr $ra - sb $t0, 7($a0) - -TileEmpty: - jr $ra - or $v0, $0, 1 # empty tile - -tile11111111: - lw $v1, %lo(HighCol)($v1) - or $t0, $t0, $a2 - addu $a0, $v1 - sb $t0, 0($a0) -tile11111110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile11111100: - or $t2, $t2, $a2 - sb $t2, 2($a0) -tile11111000: - or $t3, $t3, $a2 - sb $t3, 3($a0) -tile11110000: - or $t4, $t4, $a2 - sb $t4, 4($a0) -tile11100000: - or $t5, $t5, $a2 - sb $t5, 5($a0) -tile11000000: - or $t6, $t6, $a2 - sb $t6, 6($a0) -tile10000000: - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11111101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11111011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile11111010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11111001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11110111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile11110110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile11110100: - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11110101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11110011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile11110010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11110001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11101111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile11101110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile11101100: - or $t2, $t2, $a2 - sb $t2, 2($a0) -tile11101000: - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11101101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11101011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile11101010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11101001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11100111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile11100110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile11100100: - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11100101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11100011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile11100010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11100001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11011111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile11011110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile11011100: - or $t2, $t2, $a2 - sb $t2, 2($a0) -tile11011000: - or $t3, $t3, $a2 - sb $t3, 3($a0) -tile11010000: - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11011101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11011011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile11011010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11011001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11010111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile11010110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile11010100: - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11010101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11010011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile11010010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11010001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11001111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile11001110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile11001100: - or $t2, $t2, $a2 - sb $t2, 2($a0) -tile11001000: - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11001101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11001011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile11001010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11001001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11000111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile11000110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile11000100: - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11000101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11000011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile11000010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile11000001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t6, $t6, $a2 - sb $t6, 6($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10111111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile10111110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile10111100: - or $t2, $t2, $a2 - sb $t2, 2($a0) -tile10111000: - or $t3, $t3, $a2 - sb $t3, 3($a0) -tile10110000: - or $t4, $t4, $a2 - sb $t4, 4($a0) -tile10100000: - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10111101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10111011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile10111010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10111001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10110111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile10110110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile10110100: - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10110101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10110011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile10110010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10110001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10101111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile10101110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile10101100: - or $t2, $t2, $a2 - sb $t2, 2($a0) -tile10101000: - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10101101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10101011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile10101010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10101001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10100111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile10100110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile10100100: - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10100101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10100011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile10100010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10100001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10011111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile10011110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile10011100: - or $t2, $t2, $a2 - sb $t2, 2($a0) -tile10011000: - or $t3, $t3, $a2 - sb $t3, 3($a0) -tile10010000: - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10011101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10011011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile10011010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10011001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10010111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile10010110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile10010100: - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10010101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10010011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile10010010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10010001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10001111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile10001110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile10001100: - or $t2, $t2, $a2 - sb $t2, 2($a0) -tile10001000: - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10001101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10001011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile10001010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10001001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10000111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile10000110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile10000100: - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10000101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10000011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile10000010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile10000001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t7, $t7, $a2 - jr $ra - sb $t7, 7($a0) -tile01111111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile01111110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile01111100: - or $t2, $t2, $a2 - sb $t2, 2($a0) -tile01111000: - or $t3, $t3, $a2 - sb $t3, 3($a0) -tile01110000: - or $t4, $t4, $a2 - sb $t4, 4($a0) -tile01100000: - or $t5, $t5, $a2 - sb $t5, 5($a0) -tile01000000: - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile00000000: -tile01111101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01111011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile01111010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01111001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01110111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile01110110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile01110100: - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01110101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01110011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile01110010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01110001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01101111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile01101110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile01101100: - or $t2, $t2, $a2 - sb $t2, 2($a0) -tile01101000: - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01101101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01101011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile01101010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01101001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01100111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile01100110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile01100100: - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01100101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01100011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile01100010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01100001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t5, $t5, $a2 - sb $t5, 5($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01011111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile01011110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile01011100: - or $t2, $t2, $a2 - sb $t2, 2($a0) -tile01011000: - or $t3, $t3, $a2 - sb $t3, 3($a0) -tile01010000: - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01011101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01011011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile01011010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01011001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01010111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile01010110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile01010100: - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01010101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01010011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile01010010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01010001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01001111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile01001110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile01001100: - or $t2, $t2, $a2 - sb $t2, 2($a0) -tile01001000: - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01001101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01001011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile01001010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01001001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01000111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile01000110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile01000100: - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01000101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01000011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile01000010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile01000001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t6, $t6, $a2 - jr $ra - sb $t6, 6($a0) -tile00111111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile00111110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile00111100: - or $t2, $t2, $a2 - sb $t2, 2($a0) -tile00111000: - or $t3, $t3, $a2 - sb $t3, 3($a0) -tile00110000: - or $t4, $t4, $a2 - sb $t4, 4($a0) -tile00100000: - or $t5, $t5, $a2 - jr $ra - sb $t5, 5($a0) -tile00111101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - jr $ra - sb $t5, 5($a0) -tile00111011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile00111010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - jr $ra - sb $t5, 5($a0) -tile00111001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - jr $ra - sb $t5, 5($a0) -tile00110111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile00110110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile00110100: - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - jr $ra - sb $t5, 5($a0) -tile00110101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - jr $ra - sb $t5, 5($a0) -tile00110011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile00110010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - jr $ra - sb $t5, 5($a0) -tile00110001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t4, $t4, $a2 - sb $t4, 4($a0) - or $t5, $t5, $a2 - jr $ra - sb $t5, 5($a0) -tile00101111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile00101110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile00101100: - or $t2, $t2, $a2 - sb $t2, 2($a0) -tile00101000: - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t5, $t5, $a2 - jr $ra - sb $t5, 5($a0) -tile00101101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t5, $t5, $a2 - jr $ra - sb $t5, 5($a0) -tile00101011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile00101010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t5, $t5, $a2 - jr $ra - sb $t5, 5($a0) -tile00101001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t5, $t5, $a2 - jr $ra - sb $t5, 5($a0) -tile00100111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile00100110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile00100100: - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t5, $t5, $a2 - jr $ra - sb $t5, 5($a0) -tile00100101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t5, $t5, $a2 - jr $ra - sb $t5, 5($a0) -tile00100011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile00100010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t5, $t5, $a2 - jr $ra - sb $t5, 5($a0) -tile00100001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t5, $t5, $a2 - jr $ra - sb $t5, 5($a0) -tile00011111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile00011110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile00011100: - or $t2, $t2, $a2 - sb $t2, 2($a0) -tile00011000: - or $t3, $t3, $a2 - sb $t3, 3($a0) -tile00010000: - or $t4, $t4, $a2 - jr $ra - sb $t4, 4($a0) -tile00011101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - jr $ra - sb $t4, 4($a0) -tile00011011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile00011010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - jr $ra - sb $t4, 4($a0) -tile00011001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t3, $t3, $a2 - sb $t3, 3($a0) - or $t4, $t4, $a2 - jr $ra - sb $t4, 4($a0) -tile00010111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile00010110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile00010100: - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t4, $t4, $a2 - jr $ra - sb $t4, 4($a0) -tile00010101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t4, $t4, $a2 - jr $ra - sb $t4, 4($a0) -tile00010011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile00010010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t4, $t4, $a2 - jr $ra - sb $t4, 4($a0) -tile00010001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t4, $t4, $a2 - jr $ra - sb $t4, 4($a0) -tile00001111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile00001110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile00001100: - or $t2, $t2, $a2 - sb $t2, 2($a0) -tile00001000: - or $t3, $t3, $a2 - jr $ra - sb $t3, 3($a0) -tile00001101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - sb $t2, 2($a0) - or $t3, $t3, $a2 - jr $ra - sb $t3, 3($a0) -tile00001011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile00001010: - or $t1, $t1, $a2 - sb $t1, 1($a0) - or $t3, $t3, $a2 - jr $ra - sb $t3, 3($a0) -tile00001001: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t3, $t3, $a2 - jr $ra - sb $t3, 3($a0) -tile00000111: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile00000110: - or $t1, $t1, $a2 - sb $t1, 1($a0) -tile00000100: - or $t2, $t2, $a2 - jr $ra - sb $t2, 2($a0) -tile00000101: - or $t0, $t0, $a2 - sb $t0, 0($a0) - or $t2, $t2, $a2 - jr $ra - sb $t2, 2($a0) -tile00000011: - or $t0, $t0, $a2 - sb $t0, 0($a0) -tile00000010: - or $t1, $t1, $a2 - jr $ra - sb $t1, 1($a0) -tile00000001: - or $t0, $t0, $a2 - jr $ra - sb $t0, 0($a0) - -.data -.align 4 - -TileTable: - .long 000000000000, tile00000001, tile00000010, tile00000011, tile00000100, tile00000101, tile00000110, tile00000111 - .long tile00001000, tile00001001, tile00001010, tile00001011, tile00001100, tile00001101, tile00001110, tile00001111 - .long tile00010000, tile00010001, tile00010010, tile00010011, tile00010100, tile00010101, tile00010110, tile00010111 - .long tile00011000, tile00011001, tile00011010, tile00011011, tile00011100, tile00011101, tile00011110, tile00011111 - .long tile00100000, tile00100001, tile00100010, tile00100011, tile00100100, tile00100101, tile00100110, tile00100111 - .long tile00101000, tile00101001, tile00101010, tile00101011, tile00101100, tile00101101, tile00101110, tile00101111 - .long tile00110000, tile00110001, tile00110010, tile00110011, tile00110100, tile00110101, tile00110110, tile00110111 - .long tile00111000, tile00111001, tile00111010, tile00111011, tile00111100, tile00111101, tile00111110, tile00111111 - .long tile01000000, tile01000001, tile01000010, tile01000011, tile01000100, tile01000101, tile01000110, tile01000111 - .long tile01001000, tile01001001, tile01001010, tile01001011, tile01001100, tile01001101, tile01001110, tile01001111 - .long tile01010000, tile01010001, tile01010010, tile01010011, tile01010100, tile01010101, tile01010110, tile01010111 - .long tile01011000, tile01011001, tile01011010, tile01011011, tile01011100, tile01011101, tile01011110, tile01011111 - .long tile01100000, tile01100001, tile01100010, tile01100011, tile01100100, tile01100101, tile01100110, tile01100111 - .long tile01101000, tile01101001, tile01101010, tile01101011, tile01101100, tile01101101, tile01101110, tile01101111 - .long tile01110000, tile01110001, tile01110010, tile01110011, tile01110100, tile01110101, tile01110110, tile01110111 - .long tile01111000, tile01111001, tile01111010, tile01111011, tile01111100, tile01111101, tile01111110, tile01111111 - .long tile10000000, tile10000001, tile10000010, tile10000011, tile10000100, tile10000101, tile10000110, tile10000111 - .long tile10001000, tile10001001, tile10001010, tile10001011, tile10001100, tile10001101, tile10001110, tile10001111 - .long tile10010000, tile10010001, tile10010010, tile10010011, tile10010100, tile10010101, tile10010110, tile10010111 - .long tile10011000, tile10011001, tile10011010, tile10011011, tile10011100, tile10011101, tile10011110, tile10011111 - .long tile10100000, tile10100001, tile10100010, tile10100011, tile10100100, tile10100101, tile10100110, tile10100111 - .long tile10101000, tile10101001, tile10101010, tile10101011, tile10101100, tile10101101, tile10101110, tile10101111 - .long tile10110000, tile10110001, tile10110010, tile10110011, tile10110100, tile10110101, tile10110110, tile10110111 - .long tile10111000, tile10111001, tile10111010, tile10111011, tile10111100, tile10111101, tile10111110, tile10111111 - .long tile11000000, tile11000001, tile11000010, tile11000011, tile11000100, tile11000101, tile11000110, tile11000111 - .long tile11001000, tile11001001, tile11001010, tile11001011, tile11001100, tile11001101, tile11001110, tile11001111 - .long tile11010000, tile11010001, tile11010010, tile11010011, tile11010100, tile11010101, tile11010110, tile11010111 - .long tile11011000, tile11011001, tile11011010, tile11011011, tile11011100, tile11011101, tile11011110, tile11011111 - .long tile11100000, tile11100001, tile11100010, tile11100011, tile11100100, tile11100101, tile11100110, tile11100111 - .long tile11101000, tile11101001, tile11101010, tile11101011, tile11101100, tile11101101, tile11101110, tile11101111 - .long tile11110000, tile11110001, tile11110010, tile11110011, tile11110100, tile11110101, tile11110110, tile11110111 - .long tile11111000, tile11111001, tile11111010, tile11111011, tile11111100, tile11111101, tile11111110, tile11111111 - -# vim:filetype=mips From 24aab4da7352b5cecad4e09b0dcc0807b14786f2 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 14 Oct 2017 21:28:24 +0300 Subject: [PATCH 0119/1110] let it build on msvc supposedly for the original XBox? --- pico/32x/32x.c | 6 +-- pico/32x/draw.c | 2 +- pico/cd/mcd.c | 12 +++-- pico/draw.c | 2 +- pico/pico_port.h | 7 +++ platform/libretro/libretro.c | 13 +++-- .../libretro/msvc/msvc-2010/msvc-2010.vcxproj | 52 ++++++++++++++----- .../msvc/msvc-2010/msvc-2010.vcxproj.filters | 34 ++++++------ 8 files changed, 84 insertions(+), 44 deletions(-) diff --git a/pico/32x/32x.c b/pico/32x/32x.c index b20ebf30..1c166cee 100644 --- a/pico/32x/32x.c +++ b/pico/32x/32x.c @@ -297,9 +297,9 @@ typedef void (event_cb)(unsigned int now); unsigned int p32x_event_times[P32X_EVENT_COUNT]; static unsigned int event_time_next; static event_cb *p32x_event_cbs[P32X_EVENT_COUNT] = { - [P32X_EVENT_PWM] = p32x_pwm_irq_event, - [P32X_EVENT_FILLEND] = fillend_event, - [P32X_EVENT_HINT] = hint_event, + p32x_pwm_irq_event, // P32X_EVENT_PWM + fillend_event, // P32X_EVENT_FILLEND + hint_event, // P32X_EVENT_HINT }; // schedule event at some time 'after', in m68k clocks diff --git a/pico/32x/draw.c b/pico/32x/draw.c index f8021502..ee541bd9 100644 --- a/pico/32x/draw.c +++ b/pico/32x/draw.c @@ -292,7 +292,7 @@ void PicoDraw32xLayerMdOnly(int offs, int lines) for (l = 0; l < lines; l++) { if (have_scan) { PicoScan32xBegin(l + offs); - dst = Pico.est.DrawLineDest + poffs; + dst = (unsigned short *)Pico.est.DrawLineDest + poffs; } for (p = 0; p < plen; p += 4) { dst[p + 0] = pal[*pmd++]; diff --git a/pico/cd/mcd.c b/pico/cd/mcd.c index 043b4a25..af320bd0 100644 --- a/pico/cd/mcd.c +++ b/pico/cd/mcd.c @@ -30,9 +30,11 @@ PICO_INTERNAL void PicoExitMCD(void) PICO_INTERNAL void PicoPowerMCD(void) { + int fmt_size; + SekCycleCntS68k = SekCycleAimS68k = 0; - int fmt_size = sizeof(formatted_bram); + fmt_size = sizeof(formatted_bram); memset(Pico_mcd->prg_ram, 0, sizeof(Pico_mcd->prg_ram)); memset(Pico_mcd->word_ram2M, 0, sizeof(Pico_mcd->word_ram2M)); memset(Pico_mcd->pcm_ram, 0, sizeof(Pico_mcd->pcm_ram)); @@ -200,10 +202,10 @@ typedef void (event_cb)(unsigned int now); unsigned int pcd_event_times[PCD_EVENT_COUNT]; static unsigned int event_time_next; static event_cb *pcd_event_cbs[PCD_EVENT_COUNT] = { - [PCD_EVENT_CDC] = pcd_cdc_event, - [PCD_EVENT_TIMER3] = pcd_int3_timer_event, - [PCD_EVENT_GFX] = gfx_update, - [PCD_EVENT_DMA] = pcd_dma_event, + pcd_cdc_event, // PCD_EVENT_CDC + pcd_int3_timer_event, // PCD_EVENT_TIMER3 + gfx_update, // PCD_EVENT_GFX + pcd_dma_event, // PCD_EVENT_DMA }; void pcd_event_schedule(unsigned int now, enum pcd_event event, int after) diff --git a/pico/draw.c b/pico/draw.c index bb051b6d..83010a49 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -1617,7 +1617,7 @@ void PicoDrawSetOutBuf(void *dest, int increment) { DrawLineDestBase = dest; DrawLineDestIncrement = increment; - Pico.est.DrawLineDest = DrawLineDestBase + Pico.est.DrawScanline * increment; + Pico.est.DrawLineDest = (char *)DrawLineDestBase + Pico.est.DrawScanline * increment; } void PicoDrawSetInternalBuf(void *dest, int increment) diff --git a/pico/pico_port.h b/pico/pico_port.h index f1d95a56..605778d8 100644 --- a/pico/pico_port.h +++ b/pico/pico_port.h @@ -15,4 +15,11 @@ #define ALIGNED(n) #endif +#ifdef _MSC_VER +#define snprintf _snprintf +#define strcasecmp _stricmp +#define strncasecmp _strnicmp +#define strdup _strdup +#endif + #endif // PICO_PORT_INCLUDED diff --git a/platform/libretro/libretro.c b/platform/libretro/libretro.c index e7f588a1..7896ffbf 100644 --- a/platform/libretro/libretro.c +++ b/platform/libretro/libretro.c @@ -484,6 +484,8 @@ int plat_mem_set_exec(void *ptr, size_t size) void emu_video_mode_change(int start_line, int line_count, int is_32cols) { + struct retro_system_av_info av_info; + memset(vout_buf, 0, 320 * 240 * 2); vout_width = is_32cols ? 256 : 320; PicoDrawSetOutBuf(vout_buf, vout_width * 2); @@ -494,7 +496,6 @@ void emu_video_mode_change(int start_line, int line_count, int is_32cols) vout_offset = vout_width * start_line; // Update the geometry - struct retro_system_av_info av_info; retro_get_system_av_info(&av_info); environ_cb(RETRO_ENVIRONMENT_SET_GEOMETRY, &av_info); } @@ -566,6 +567,8 @@ void retro_get_system_info(struct retro_system_info *info) void retro_get_system_av_info(struct retro_system_av_info *info) { + float common_width; + memset(info, 0, sizeof(*info)); info->timing.fps = Pico.m.pal ? 50 : 60; info->timing.sample_rate = 44100; @@ -574,7 +577,7 @@ void retro_get_system_av_info(struct retro_system_av_info *info) info->geometry.max_width = vout_width; info->geometry.max_height = vout_height; - float common_width = vout_width; + common_width = vout_width; if (user_vout_width != 0) common_width = user_vout_width; @@ -1208,6 +1211,8 @@ static enum input_device input_name_to_val(const char *name) static void update_variables(void) { struct retro_variable var; + int OldPicoRegionOverride; + float old_user_vout_width; var.value = NULL; var.key = "picodrive_input1"; @@ -1237,7 +1242,7 @@ static void update_variables(void) PicoOpt &= ~POPT_EN_MCD_RAMCART; } - int OldPicoRegionOverride = PicoRegionOverride; + OldPicoRegionOverride = PicoRegionOverride; var.value = NULL; var.key = "picodrive_region"; if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { @@ -1261,7 +1266,7 @@ static void update_variables(void) PsndRerate(1); } - float old_user_vout_width = user_vout_width; + old_user_vout_width = user_vout_width; var.value = NULL; var.key = "picodrive_aspect"; if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { diff --git a/platform/libretro/msvc/msvc-2010/msvc-2010.vcxproj b/platform/libretro/msvc/msvc-2010/msvc-2010.vcxproj index 4b8784a3..e9bde075 100644 --- a/platform/libretro/msvc/msvc-2010/msvc-2010.vcxproj +++ b/platform/libretro/msvc/msvc-2010/msvc-2010.vcxproj @@ -51,9 +51,10 @@ Level3 Disabled - WIN32;_DEBUG;_WINDOWS;_USRDLL;MSVC2010_EXPORTS;%(PreprocessorDefinitions);INLINE=_inline;_CRT_SECURE_NO_WARNINGS;EMU_F68K;_USE_CZ80;NO_ZLIB;FAMEC_NO_GOTOS + WIN32;_DEBUG;_WINDOWS;_USRDLL;MSVC2010_EXPORTS;%(PreprocessorDefinitions);_CRT_SECURE_NO_WARNINGS;EMU_F68K;_USE_CZ80;FAMEC_NO_GOTOS CompileAsC - $(SolutionDir)\..\..\..\;$(SolutionDIr)\..\..\..\pico;%(AdditionalIncludeDirectories) + $(SolutionDir)\..\..\..\;$(SolutionDIr)\..\..\..\pico;$(SolutionDIr)\..\..\..\zlib;%(AdditionalIncludeDirectories) + 4018;4090;4101;4146;4244 Windows @@ -69,9 +70,10 @@ MaxSpeed true true - WIN32;NDEBUG;_WINDOWS;_USRDLL;MSVC2010_EXPORTS;%(PreprocessorDefinitions);INLINE=_inline;_CRT_SECURE_NO_WARNINGS;EMU_F68K;_USE_CZ80;NO_ZLIB;FAMEC_NO_GOTOS + WIN32;NDEBUG;_WINDOWS;_USRDLL;MSVC2010_EXPORTS;%(PreprocessorDefinitions);_CRT_SECURE_NO_WARNINGS;EMU_F68K;_USE_CZ80;FAMEC_NO_GOTOS CompileAsC - $(SolutionDir)\..\..\..\;$(SolutionDIr)\..\..\..\pico;%(AdditionalIncludeDirectories) + $(SolutionDir)\..\..\..\;$(SolutionDIr)\..\..\..\pico;$(SolutionDIr)\..\..\..\zlib;%(AdditionalIncludeDirectories) + 4018;4090;4101;4146;4244 Windows @@ -88,13 +90,23 @@ - - + + $(IntDir)\32x\ + $(IntDir)\32x\ + + + $(IntDir)\32x\ + $(IntDir)\32x\ + - + + + $(IntDir)\svp\ + $(IntDir)\svp\ + @@ -106,10 +118,19 @@ - - + + $(IntDir)\cd\ + $(IntDir)\cd\ + + + $(IntDir)\cd\ + $(IntDir)\cd\ + - + + $(IntDir)\cd\ + $(IntDir)\cd\ + @@ -120,8 +141,14 @@ - - + + $(IntDir)\pico\ + $(IntDir)\pico\ + + + $(IntDir)\pico\ + $(IntDir)\pico\ + @@ -133,7 +160,6 @@ - diff --git a/platform/libretro/msvc/msvc-2010/msvc-2010.vcxproj.filters b/platform/libretro/msvc/msvc-2010/msvc-2010.vcxproj.filters index 1a70e495..a1c0e0f6 100644 --- a/platform/libretro/msvc/msvc-2010/msvc-2010.vcxproj.filters +++ b/platform/libretro/msvc/msvc-2010/msvc-2010.vcxproj.filters @@ -120,9 +120,6 @@ Source Files\unzip - - Source Files\unzip - Source Files\pico @@ -198,22 +195,22 @@ Source Files\pico\cd - + Source Files\pico\cd - + Source Files\pico\cd Source Files\pico\cd - + Source Files\pico\cd Source Files\pico\32x - + Source Files\pico\32x @@ -225,7 +222,10 @@ Source Files\pico\carthw - + + Source Files\pico\carthw + + Source Files\pico\carthw\svp @@ -261,17 +261,17 @@ Source Files\cpu\sh2\mame - - Source Files - - - Source Files\pico\pico - - - Source Files\pico\pico - Source Files\pico\pico + + Source Files\pico\pico + + + Source Files\pico\pico + + + Source Files\pico\32x + \ No newline at end of file From ba11a48115de2d25531ddd5fec841ebee42166bd Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 14 Oct 2017 02:13:40 +0300 Subject: [PATCH 0120/1110] fix clang build yet another workaround for it... --- pico/cd/cdd.c | 2 ++ pico/cd/memory.c | 7 ++++++- platform/common/menu_pico.c | 2 -- platform/libpicofe | 2 +- 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/pico/cd/cdd.c b/pico/cd/cdd.c index c6b24b82..501d09e8 100644 --- a/pico/cd/cdd.c +++ b/pico/cd/cdd.c @@ -122,12 +122,14 @@ static const uint32 toc_ffightj[29] = 14553, 9834, 10542, 1699, 1792, 1781, 3783, 3052 }; +#if 0 /* supported WAVE file header (16-bit stereo samples @44.1kHz) */ static const unsigned char waveHeader[32] = { 0x57,0x41,0x56,0x45,0x66,0x6d,0x74,0x20,0x10,0x00,0x00,0x00,0x01,0x00,0x02,0x00, 0x44,0xac,0x00,0x00,0x10,0xb1,0x02,0x00,0x04,0x00,0x10,0x00,0x64,0x61,0x74,0x61 }; +#endif #ifdef USE_LIBTREMOR #ifdef DISABLE_MANY_OGG_OPEN_FILES diff --git a/pico/cd/memory.c b/pico/cd/memory.c index 94b81b7b..6890b576 100644 --- a/pico/cd/memory.c +++ b/pico/cd/memory.c @@ -1165,7 +1165,12 @@ PICO_INTERNAL void PicoMemSetupCD(void) // setup FAME fetchmap { +#ifdef __clang__ + volatile // prevent strange relocs from clang +#endif + unsigned long ptr_ram = (unsigned long)PicoMem.ram; int i; + // M68k // by default, point everything to fitst 64k of ROM (BIOS) for (i = 0; i < M68K_FETCHBANK1; i++) @@ -1175,7 +1180,7 @@ PICO_INTERNAL void PicoMemSetupCD(void) PicoCpuFM68k.Fetch[i] = (unsigned long)Pico.rom; // .. and RAM for (i = M68K_FETCHBANK1*14/16; i < M68K_FETCHBANK1; i++) - PicoCpuFM68k.Fetch[i] = (unsigned long)PicoMem.ram - (i<<(24-FAMEC_FETCHBITS)); + PicoCpuFM68k.Fetch[i] = ptr_ram - (i<<(24-FAMEC_FETCHBITS)); // S68k // PRG RAM is default for (i = 0; i < M68K_FETCHBANK1; i++) diff --git a/platform/common/menu_pico.c b/platform/common/menu_pico.c index f928f054..0f9fa612 100644 --- a/platform/common/menu_pico.c +++ b/platform/common/menu_pico.c @@ -406,8 +406,6 @@ static const char h_srcart[] = "Emulate the save RAM cartridge accessory\n" "most games don't need this"; static const char h_scfx[] = "Emulate scale/rotate ASIC chip for graphics effects\n" "disable to improve performance"; -static const char h_bsync[] = "More accurate mode for CPUs (needed for some games)\n" - "disable to improve performance"; static menu_entry e_menu_cd_options[] = { diff --git a/platform/libpicofe b/platform/libpicofe index 448ec62f..f287890d 160000 --- a/platform/libpicofe +++ b/platform/libpicofe @@ -1 +1 @@ -Subproject commit 448ec62f85a90e8a27368ddc05057a5a714944b8 +Subproject commit f287890d65ad36ca75bb71d05745693ae78b1490 From fdcfd323747dceacb1ec2e86919d437896635bb7 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 14 Oct 2017 21:10:25 +0300 Subject: [PATCH 0121/1110] get rid of custom memcpy funcs not used for anything important, just a maintenance burden --- pico/misc.c | 45 ------------------------- pico/misc_amips.s | 74 ------------------------------------------ pico/misc_arm.s | 70 --------------------------------------- pico/mode4.c | 2 +- pico/pico_int.h | 2 -- platform/common/emu.c | 4 +-- platform/gp2x/940ctl.c | 2 +- platform/gp2x/emu.c | 4 +-- platform/psp/emu.c | 2 +- 9 files changed, 7 insertions(+), 198 deletions(-) diff --git a/pico/misc.c b/pico/misc.c index a500ac8c..47842e3f 100644 --- a/pico/misc.c +++ b/pico/misc.c @@ -87,35 +87,6 @@ const unsigned char hcounts_32[] = { #ifndef _ASM_MISC_C -typedef struct -{ - int b0; - int b1; - int b2; - int b3; - int b4; - int b5; - int b6; - int b7; -} intblock; - -PICO_INTERNAL_ASM void memcpy16(unsigned short *dest, unsigned short *src, int count) -{ - if ((((long)dest | (long)src) & 3) == 0) - { - if (count >= 32) { - memcpy32((int *)dest, (int *)src, count/2); - count&=1; - } else { - for (; count >= 2; count -= 2, dest+=2, src+=2) - *(int *)dest = *(int *)src; - } - } - while (count--) - *dest++ = *src++; -} - - PICO_INTERNAL_ASM void memcpy16bswap(unsigned short *dest, void *src, int count) { unsigned char *src_ = src; @@ -125,22 +96,6 @@ PICO_INTERNAL_ASM void memcpy16bswap(unsigned short *dest, void *src, int count) } #ifndef _ASM_MISC_C_AMIPS -PICO_INTERNAL_ASM void memcpy32(void *dest_in, const void *src_in, int count) -{ - const intblock *bs = (intblock *) src_in; - intblock *bd = (intblock *) dest_in; - const int *src; - int *dest; - - for (; count >= sizeof(*bd)/4; count -= sizeof(*bd)/4) - *bd++ = *bs++; - - dest = (int *)bd; src = (const int *)bs; - while (count--) - *dest++ = *src++; -} - - PICO_INTERNAL_ASM void memset32(void *dest_in, int c, int count) { int *dest = dest_in; diff --git a/pico/misc_amips.s b/pico/misc_amips.s index 3aa70c77..7c3f1ade 100644 --- a/pico/misc_amips.s +++ b/pico/misc_amips.s @@ -101,78 +101,4 @@ ms32u_return: nop -.globl memcpy32 # int *dest, int *src, int count - -memcpy32: -mc32_aloop: - andi $t0, $a0, 0x3f - beqz $t0, mc32_bloop_prep - nop - lw $t1, 0($a1) - addiu $a2, -1 - sw $t1, 0($a0) - beqz $a2, mc32_return - addiu $a0, 4 - j mc32_aloop - addiu $a1, 4 - -mc32_bloop_prep: - srl $t0, $a2, 4 # we will do 64 bytes per iteration (cache line) - beqz $t0, mc32_bloop_end - -mc32_bloop: - addiu $t0, -1 - cache 0x18, ($a0) # create dirty exclusive - lw $t2, 0x00($a1) - lw $t3, 0x04($a1) - lw $t4, 0x08($a1) - lw $t5, 0x0c($a1) - lw $t6, 0x10($a1) - lw $t7, 0x14($a1) - lw $t8, 0x18($a1) - lw $t9, 0x1c($a1) - sw $t2, 0x00($a0) - sw $t3, 0x04($a0) - sw $t4, 0x08($a0) - sw $t5, 0x0c($a0) - sw $t6, 0x10($a0) - sw $t7, 0x14($a0) - sw $t8, 0x18($a0) - sw $t9, 0x1c($a0) - lw $t2, 0x20($a1) - lw $t3, 0x24($a1) - lw $t4, 0x28($a1) - lw $t5, 0x2c($a1) - lw $t6, 0x30($a1) - lw $t7, 0x34($a1) - lw $t8, 0x38($a1) - lw $t9, 0x3c($a1) - sw $t2, 0x20($a0) - sw $t3, 0x24($a0) - sw $t4, 0x28($a0) - sw $t5, 0x2c($a0) - sw $t6, 0x30($a0) - sw $t7, 0x34($a0) - sw $t8, 0x38($a0) - sw $t9, 0x3c($a0) - addiu $a0, 0x40 - bnez $t0, mc32_bloop - addiu $a1, 0x40 - -mc32_bloop_end: - andi $a2, $a2, 0x0f - beqz $a2, mc32_return - -mc32_cloop: - lw $t1, 0($a1) - addiu $a2, -1 - addiu $a1, 4 - sw $t1, 0($a0) - bnez $a2, mc32_cloop - addiu $a0, 4 - -mc32_return: - jr $ra - nop - # vim:filetype=mips diff --git a/pico/misc_arm.s b/pico/misc_arm.s index 15662a7b..56c74019 100644 --- a/pico/misc_arm.s +++ b/pico/misc_arm.s @@ -6,44 +6,6 @@ * See COPYING file in the top-level directory. */ -.global memcpy16 @ unsigned short *dest, unsigned short *src, int count - -memcpy16: - eor r3, r0, r1 - tst r3, #2 - bne mcp16_cant_align - - tst r0, #2 - ldrneh r3, [r1], #2 - subne r2, r2, #1 - strneh r3, [r0], #2 - - subs r2, r2, #4 - bmi mcp16_fin - -mcp16_loop: - ldmia r1!, {r3,r12} - subs r2, r2, #4 - stmia r0!, {r3,r12} - bpl mcp16_loop - -mcp16_fin: - tst r2, #2 - ldrne r3, [r1], #4 - strne r3, [r0], #4 - ands r2, r2, #1 - bxeq lr - -mcp16_cant_align: - ldrh r3, [r1], #2 - subs r2, r2, #1 - strh r3, [r0], #2 - bne mcp16_cant_align - - bx lr - - - @ 0x12345678 -> 0x34127856 @ r4=temp, lr=0x00ff00ff .macro bswap reg @@ -52,7 +14,6 @@ mcp16_cant_align: orr \reg, \reg, r4, lsl #8 .endm - @ dest must be halfword aligned, src can be unaligned .global memcpy16bswap @ unsigned short *dest, void *src, int count @@ -121,37 +82,6 @@ mcp16bs_cant_align2: bx lr - -.global memcpy32 @ int *dest, int *src, int count - -memcpy32: - stmfd sp!, {r4,lr} - - subs r2, r2, #4 - bmi mcp32_fin - -mcp32_loop: - ldmia r1!, {r3,r4,r12,lr} - subs r2, r2, #4 - stmia r0!, {r3,r4,r12,lr} - bpl mcp32_loop - -mcp32_fin: - tst r2, #3 - ldmeqfd sp!, {r4,pc} - tst r2, #1 - ldrne r3, [r1], #4 - strne r3, [r0], #4 - -mcp32_no_unal1: - tst r2, #2 - ldmneia r1!, {r3,r12} - ldmfd sp!, {r4,lr} - stmneia r0!, {r3,r12} - bx lr - - - .global memset32 @ int *dest, int c, int count memset32: diff --git a/pico/mode4.c b/pico/mode4.c index 55e6d104..0f3d766b 100644 --- a/pico/mode4.c +++ b/pico/mode4.c @@ -287,7 +287,7 @@ static void FinalizeLine8bitM4(int line) if (!(PicoOpt & POPT_DIS_32C_BORDER)) pd += 32; - memcpy32((int *)pd, (int *)(Pico.est.HighCol+8), 256/4); + memcpy(pd, Pico.est.HighCol + 8, 256); } void PicoDrawSetOutputMode4(pdso_t which) diff --git a/pico/pico_int.h b/pico/pico_int.h index d4fc8357..e3bf03a8 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -825,9 +825,7 @@ unsigned char PicoVideoRead8HV_L(void); extern int (*PicoDmaHook)(unsigned int source, int len, unsigned short **base, unsigned int *mask); // misc.c -PICO_INTERNAL_ASM void memcpy16(unsigned short *dest, unsigned short *src, int count); PICO_INTERNAL_ASM void memcpy16bswap(unsigned short *dest, void *src, int count); -PICO_INTERNAL_ASM void memcpy32(void *dest, const void *src, int count); // 32bit word count PICO_INTERNAL_ASM void memset32(void *dest, int c, int count); // eeprom.c diff --git a/platform/common/emu.c b/platform/common/emu.c index 9535bfd2..85e1ba77 100644 --- a/platform/common/emu.c +++ b/platform/common/emu.c @@ -891,7 +891,7 @@ int emu_save_load_game(int load, int sram) sram_size = 0x12000; sram_data = Pico.sv.data; if (sram_data) - memcpy32((int *)sram_data, (int *)Pico_mcd->bram, 0x2000/4); + memcpy(sram_data, Pico_mcd->bram, 0x2000); } else { sram_size = 0x2000; sram_data = Pico_mcd->bram; @@ -913,7 +913,7 @@ int emu_save_load_game(int load, int sram) ret = ret > 0 ? 0 : -1; fclose(sramFile); if ((PicoAHW & PAHW_MCD) && (PicoOpt&POPT_EN_MCD_RAMCART)) - memcpy32((int *)Pico_mcd->bram, (int *)sram_data, 0x2000/4); + memcpy(Pico_mcd->bram, sram_data, 0x2000); } else { // sram save needs some special processing // see if we have anything to save diff --git a/platform/gp2x/940ctl.c b/platform/gp2x/940ctl.c index 6ba13bd8..ff79c88a 100644 --- a/platform/gp2x/940ctl.c +++ b/platform/gp2x/940ctl.c @@ -391,7 +391,7 @@ int YM2612UpdateOne_940(int *buffer, int length, int stereo, int is_buf_empty) ym_active_chs = shared_ctl->ym_active_chs; // mix in ym buffer. is_buf_empty means nobody mixed there anything yet and it may contain trash - if (is_buf_empty && ym_active_chs) memcpy32(buffer, ym_buf, length<writebuffsel == 1) { diff --git a/platform/gp2x/emu.c b/platform/gp2x/emu.c index c0bc71f4..01fd1629 100644 --- a/platform/gp2x/emu.c +++ b/platform/gp2x/emu.c @@ -280,7 +280,7 @@ static int EmuScanEnd16_ld(unsigned int num) ld_left = ld_lines; EmuScanBegin16_ld(num); - memcpy32(Pico.est.DrawLineDest, oldline, 320 * gp2x_current_bpp / 8 / 4); + memcpy(Pico.est.DrawLineDest, oldline, 320 * gp2x_current_bpp / 8); if (emu_scan_end) emu_scan_end(ld_counter); @@ -315,7 +315,7 @@ static int make_local_pal_md(int fast_mode) bgr444_to_rgb32(localPal+0x80, Pico.est.HighPal+0x40); } else - memcpy32(localPal+0x80, localPal, 0x40); // for spr prio mess + memcpy(localPal + 0x80, localPal, 0x40 * 4); // for spr prio mess return pallen; } diff --git a/platform/psp/emu.c b/platform/psp/emu.c index dd4381bb..8373a21a 100644 --- a/platform/psp/emu.c +++ b/platform/psp/emu.c @@ -225,7 +225,7 @@ static void do_pal_update(int allow_sh, int allow_as) } else if (allow_as && (Pico.est.rendstatus & PDRAW_SPR_LO_ON_HI)) { - memcpy32((int *)dpal+0x80/2, (void *)localPal, 0x40*2/4); + memcpy(dpal + 0x80/2, localPal, 0x40*2); } } From 12f23dac6f91eb707f985ef00a5d48e9e5ef8838 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 15 Oct 2017 00:45:55 +0300 Subject: [PATCH 0122/1110] famec: split fm68k_emulate in FAMEC_NO_GOTOS mode at least --- cpu/fame/fame.h | 19 ++++++++++--- cpu/fame/famec.c | 73 ++++++++++++++++++++++++++++++++++++++---------- pico/cd/mcd.c | 5 ++-- pico/cd/sek.c | 3 +- pico/debugCPU.c | 2 +- pico/pico_cmn.c | 2 +- pico/pico_int.h | 2 +- pico/sek.c | 11 +++----- 8 files changed, 83 insertions(+), 34 deletions(-) diff --git a/cpu/fame/fame.h b/cpu/fame/fame.h index 2f9d8508..93172c87 100644 --- a/cpu/fame/fame.h +++ b/cpu/fame/fame.h @@ -145,18 +145,29 @@ typedef struct extern M68K_CONTEXT *g_m68kcontext; +typedef enum +{ + fm68k_reason_emulate = 0, + fm68k_reason_init, + fm68k_reason_idle_install, + fm68k_reason_idle_remove, +} fm68k_call_reason; + /************************/ /* Function definition */ /************************/ /* General purpose functions */ void fm68k_init(void); -int fm68k_reset(void); -int fm68k_emulate(int n, int idle_mode); -int fm68k_would_interrupt(void); // to be called from fm68k_emulate() +int fm68k_reset(M68K_CONTEXT *ctx); +int fm68k_emulate(M68K_CONTEXT *ctx, int n, fm68k_call_reason reason); +int fm68k_would_interrupt(M68K_CONTEXT *ctx); // to be called from fm68k_emulate() -unsigned fm68k_get_pc(M68K_CONTEXT *context); +unsigned fm68k_get_pc(M68K_CONTEXT *ctx); +// PICODRIVE_HACK +int fm68k_idle_install(void); +int fm68k_idle_remove(void); #ifdef __cplusplus } diff --git a/cpu/fame/famec.c b/cpu/fame/famec.c index db4eae69..508a12f7 100644 --- a/cpu/fame/famec.c +++ b/cpu/fame/famec.c @@ -556,7 +556,7 @@ M68K_CONTEXT *g_m68kcontext; static u32 initialised = 0; #ifdef PICODRIVE_HACK -extern M68K_CONTEXT PicoCpuFM68k, PicoCpuFS68k; +extern M68K_CONTEXT PicoCpuFS68k; #endif /* Custom function handler */ @@ -640,6 +640,7 @@ static const s32 exception_cycle_table[256] = 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4 }; +static int init_jump_table(void); /***********************/ /* core main functions */ @@ -656,8 +657,8 @@ void fm68k_init(void) puts("Initializing FAME..."); #endif - if (!initialised) - fm68k_emulate(0, 0); + if (!initialised) + init_jump_table(); #ifdef FAMEC_DEBUG puts("FAME initialized."); @@ -673,10 +674,12 @@ void fm68k_init(void) /* M68K_NO_SUP_ADDR_SPACE (2): No se puede resetear porque no hay mapa */ /* de memoria supervisor de extraccion de opcodes */ /******************************************************************************/ -int fm68k_reset(void) +int fm68k_reset(M68K_CONTEXT *ctx) { if (!initialised) - fm68k_emulate(0, 0); + init_jump_table(); + + g_m68kcontext = ctx; // Si la CPU esta en ejecucion, salir con M68K_RUNNING if (m68kcontext.execinfo & M68K_RUNNING) @@ -731,7 +734,7 @@ static FAMEC_EXTRA_INLINE s32 interrupt_chk__(void) return 0; } -int fm68k_would_interrupt(void) +int fm68k_would_interrupt(M68K_CONTEXT *ctx) { return interrupt_chk__(); } @@ -808,7 +811,7 @@ static FAMEC_EXTRA_INLINE u32 execute_exception_group_0(s32 vect, s32 addr, u16 // main exec function ////////////////////// -int fm68k_emulate(s32 cycles, int idle_mode) +int fm68k_emulate(M68K_CONTEXT *ctx, s32 cycles, fm68k_call_reason reason) { #ifndef FAMEC_NO_GOTOS u32 Opcode; @@ -820,17 +823,23 @@ int fm68k_emulate(s32 cycles, int idle_mode) u32 flag_NotZ; u32 flag_N; u32 flag_X; -#endif - if (!initialised) + switch (reason) { + case fm68k_reason_init: goto init_jump_table; - } - #ifdef PICODRIVE_HACK - if (idle_mode == 1) goto idle_install; - else if (idle_mode == 2) goto idle_remove; + case fm68k_reason_idle_install: + goto idle_install; + case fm68k_reason_idle_remove: + goto idle_remove; #endif + case fm68k_reason_emulate: + break; + } +#endif // FAMEC_NO_GOTOS + + g_m68kcontext = ctx; // won't emulate double fault // if (m68kcontext.execinfo & M68K_FAULTED) return -1; @@ -975,7 +984,13 @@ famec_End: return cycles - m68kcontext.io_cycle_counter; +#ifndef FAMEC_NO_GOTOS init_jump_table: +#else +} + +static int init_jump_table(void) +#endif { u32 i, j; @@ -5005,7 +5020,12 @@ init_jump_table: JumpTable[fake_op_base] = JumpTable[fake_op_base|0x0200] = CAST_OP(0x4AFC); \ JumpTable[real_op] = CAST_OP(normal_handler) +#ifndef FAMEC_NO_GOTOS idle_install: +#else +int fm68k_idle_install(void) +#endif +{ // printf("install..\n"); INSTALL_IDLE(0x71fa, 0x66fa, idle_detector_bcc8, 0x6601_idle, 0x6601); INSTALL_IDLE(0x71f8, 0x66f8, idle_detector_bcc8, 0x6601_idle, 0x6601); @@ -5018,8 +5038,14 @@ idle_install: INSTALL_IDLE(0x7dfe, 0x60fe, idle_detector_bcc8, 0x6001_idle, 0x6001); INSTALL_IDLE(0x7dfc, 0x60fc, idle_detector_bcc8, 0x6001_idle, 0x6001); return 0; +} +#ifndef FAMEC_NO_GOTOS idle_remove: +#else +int fm68k_idle_remove(void) +#endif +{ // printf("remove..\n"); UNDO_IDLE(0x71fa, 0x66fa, 0x6601); UNDO_IDLE(0x71f8, 0x66f8, 0x6601); @@ -5032,9 +5058,26 @@ idle_remove: UNDO_IDLE(0x7dfe, 0x60fe, 0x6001); UNDO_IDLE(0x7dfc, 0x60fc, 0x6001); return 0; +} +#endif // PICODRIVE_HACK -#endif +#ifndef FAMEC_NO_GOTOS } -void *get_jumptab(void) { return JumpTable; } +static int init_jump_table(void) +{ + return fm68k_emulate(NULL, 0, fm68k_reason_init); +} +#ifdef PICODRIVE_HACK +int fm68k_idle_install(void) +{ + return fm68k_emulate(NULL, 0, fm68k_reason_idle_install); +} + +int fm68k_idle_remove(void) +{ + return fm68k_emulate(NULL, 0, fm68k_reason_idle_remove); +} +#endif +#endif // FAMEC_NO_GOTOS diff --git a/pico/cd/mcd.c b/pico/cd/mcd.c index af320bd0..929b57f3 100644 --- a/pico/cd/mcd.c +++ b/pico/cd/mcd.c @@ -107,7 +107,7 @@ static void SekRunM68kOnce(void) #elif defined(EMU_M68K) Pico.t.m68c_cnt += m68k_execute(cyc_do) - cyc_do; #elif defined(EMU_F68K) - Pico.t.m68c_cnt += fm68k_emulate(cyc_do, 0) - cyc_do; + Pico.t.m68c_cnt += fm68k_emulate(&PicoCpuFM68k, cyc_do, 0) - cyc_do; #endif } @@ -138,8 +138,7 @@ static void SekRunS68k(unsigned int to) SekCycleCntS68k += m68k_execute(cyc_do) - cyc_do; m68k_set_context(&PicoCpuMM68k); #elif defined(EMU_F68K) - g_m68kcontext = &PicoCpuFS68k; - SekCycleCntS68k += fm68k_emulate(cyc_do, 0) - cyc_do; + SekCycleCntS68k += fm68k_emulate(&PicoCpuFS68k, cyc_do, 0) - cyc_do; g_m68kcontext = &PicoCpuFM68k; #endif } diff --git a/pico/cd/sek.c b/pico/cd/sek.c index 42fea930..6f54801b 100644 --- a/pico/cd/sek.c +++ b/pico/cd/sek.c @@ -151,8 +151,7 @@ PICO_INTERNAL int SekResetS68k(void) #ifdef EMU_F68K { void *oldcontext = g_m68kcontext; - g_m68kcontext = &PicoCpuFS68k; - fm68k_reset(); + fm68k_reset(&PicoCpuFS68k); g_m68kcontext = oldcontext; } #endif diff --git a/pico/debugCPU.c b/pico/debugCPU.c index fd312142..36e71a7c 100644 --- a/pico/debugCPU.c +++ b/pico/debugCPU.c @@ -49,7 +49,7 @@ static int otherRun(void) CycloneRun(currentC68k); return 1-currentC68k->cycles; #elif defined(EMU_F68K) - return fm68k_emulate(1, 0); + return fm68k_emulate(g_m68kcontext, 1, 0); #endif } diff --git a/pico/pico_cmn.c b/pico/pico_cmn.c index 27a66cdf..d2a1d94c 100644 --- a/pico/pico_cmn.c +++ b/pico/pico_cmn.c @@ -38,7 +38,7 @@ static void SekSyncM68k(void) #elif defined(EMU_M68K) Pico.t.m68c_cnt += m68k_execute(cyc_do) - cyc_do; #elif defined(EMU_F68K) - Pico.t.m68c_cnt += fm68k_emulate(cyc_do, 0) - cyc_do; + Pico.t.m68c_cnt += fm68k_emulate(&PicoCpuFM68k, cyc_do, 0) - cyc_do; #endif } diff --git a/pico/pico_int.h b/pico/pico_int.h index e3bf03a8..f356d40d 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -81,7 +81,7 @@ extern M68K_CONTEXT PicoCpuFM68k, PicoCpuFS68k; } #define SekIsStoppedM68k() (PicoCpuFM68k.execinfo&FM68K_HALTED) #define SekIsStoppedS68k() (PicoCpuFS68k.execinfo&FM68K_HALTED) -#define SekShouldInterrupt() fm68k_would_interrupt() +#define SekShouldInterrupt() fm68k_would_interrupt(&PicoCpuFM68k) #define SekNotPolling PicoCpuFM68k.not_polling #define SekNotPollingS68k PicoCpuFS68k.not_polling diff --git a/pico/sek.c b/pico/sek.c index 8fece1a3..c76a3e8e 100644 --- a/pico/sek.c +++ b/pico/sek.c @@ -157,10 +157,7 @@ PICO_INTERNAL int SekReset(void) REG_USP = 0; // ? #endif #ifdef EMU_F68K - { - g_m68kcontext = &PicoCpuFM68k; - fm68k_reset(); - } + fm68k_reset(&PicoCpuFM68k); #endif return 0; @@ -178,7 +175,7 @@ void SekStepM68k(void) #elif defined(EMU_M68K) Pico.t.m68c_cnt += m68k_execute(1); #elif defined(EMU_F68K) - Pico.t.m68c_cnt += fm68k_emulate(1, 0); + Pico.t.m68c_cnt += fm68k_emulate(&PicoCpuFM68k, 1, 0); #endif } @@ -320,7 +317,7 @@ void SekInitIdleDet(void) CycloneInitIdle(); #endif #ifdef EMU_F68K - fm68k_emulate(0, 1); + fm68k_idle_install(); #endif } @@ -431,7 +428,7 @@ void SekFinishIdleDet(void) CycloneFinishIdle(); #endif #ifdef EMU_F68K - fm68k_emulate(0, 2); + fm68k_idle_remove(); #endif while (idledet_count > 0) { From 7669591e0876778fc4f3977b145c012f2e3a12e9 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 15 Oct 2017 01:15:00 +0300 Subject: [PATCH 0123/1110] famec: eliminate global context ptr saves like 25-35K of .text current compile resource usage on i5-6600K: cpu mem gcc 5.4.0: 17.0 1.1g clang 3.8: 1686 2.3g FAMEC_NO_GOTOS: gcc 5.4.0: 8.4 0.4g clang 3.8: 20.0 0.15g vs2008/O2: ~1800 ? vs2008/O1: ~720 ? --- cpu/fame/fame.h | 4 +- cpu/fame/famec.c | 253 +++++++++++------------ cpu/fame/famec_opcodes.h | 426 +++++++++++++++++++-------------------- pico/cd/mcd.c | 1 - pico/cd/sek.c | 19 +- pico/debugCPU.c | 1 + pico/sek.c | 13 +- 7 files changed, 346 insertions(+), 371 deletions(-) diff --git a/cpu/fame/fame.h b/cpu/fame/fame.h index 93172c87..0baabf26 100644 --- a/cpu/fame/fame.h +++ b/cpu/fame/fame.h @@ -143,8 +143,6 @@ typedef struct unsigned long Fetch[M68K_FETCHBANK1]; } M68K_CONTEXT; -extern M68K_CONTEXT *g_m68kcontext; - typedef enum { fm68k_reason_emulate = 0, @@ -163,7 +161,7 @@ int fm68k_reset(M68K_CONTEXT *ctx); int fm68k_emulate(M68K_CONTEXT *ctx, int n, fm68k_call_reason reason); int fm68k_would_interrupt(M68K_CONTEXT *ctx); // to be called from fm68k_emulate() -unsigned fm68k_get_pc(M68K_CONTEXT *ctx); +unsigned int fm68k_get_pc(const M68K_CONTEXT *ctx); // PICODRIVE_HACK int fm68k_idle_install(void); diff --git a/cpu/fame/famec.c b/cpu/fame/famec.c index 508a12f7..41620944 100644 --- a/cpu/fame/famec.c +++ b/cpu/fame/famec.c @@ -228,21 +228,21 @@ typedef signed int s32; // internals core macros ///////////////////////// -#define DREG(X) (m68kcontext.dreg[(X)].D) -#define DREGu32(X) (m68kcontext.dreg[(X)].D) -#define DREGs32(X) (m68kcontext.dreg[(X)].SD) -#define DREGu16(X) (m68kcontext.dreg[(X)].W) -#define DREGs16(X) (m68kcontext.dreg[(X)].SW) -#define DREGu8(X) (m68kcontext.dreg[(X)].B) -#define DREGs8(X) (m68kcontext.dreg[(X)].SB) +#define DREG(X) (ctx->dreg[(X)].D) +#define DREGu32(X) (ctx->dreg[(X)].D) +#define DREGs32(X) (ctx->dreg[(X)].SD) +#define DREGu16(X) (ctx->dreg[(X)].W) +#define DREGs16(X) (ctx->dreg[(X)].SW) +#define DREGu8(X) (ctx->dreg[(X)].B) +#define DREGs8(X) (ctx->dreg[(X)].SB) -#define AREG(X) (m68kcontext.areg[(X)].D) -#define AREGu32(X) (m68kcontext.areg[(X)].D) -#define AREGs32(X) (m68kcontext.areg[(X)].SD) -#define AREGu16(X) (m68kcontext.areg[(X)].W) -#define AREGs16(X) (m68kcontext.areg[(X)].SW) +#define AREG(X) (ctx->areg[(X)].D) +#define AREGu32(X) (ctx->areg[(X)].D) +#define AREGs32(X) (ctx->areg[(X)].SD) +#define AREGu16(X) (ctx->areg[(X)].W) +#define AREGs16(X) (ctx->areg[(X)].SW) -#define ASP (m68kcontext.asp) +#define ASP (ctx->asp) #define LSL(A, C) ((A) << (C)) #define LSR(A, C) ((A) >> (C)) @@ -271,39 +271,39 @@ typedef signed int s32; #ifdef FAMEC_ROLL_INLINE #define RET(A) \ - m68kcontext.io_cycle_counter -= (A); \ - if (m68kcontext.io_cycle_counter <= 0) goto famec_Exec_End; \ + ctx->io_cycle_counter -= (A); \ + if (ctx->io_cycle_counter <= 0) goto famec_Exec_End; \ NEXT #else #define RET(A) \ - m68kcontext.io_cycle_counter -= (A); \ - if (m68kcontext.io_cycle_counter <= 0) goto famec_Exec_End; \ + ctx->io_cycle_counter -= (A); \ + if (ctx->io_cycle_counter <= 0) goto famec_Exec_End; \ goto famec_Exec; #endif #define RET0() \ - m68kcontext.io_cycle_counter = -6; \ + ctx->io_cycle_counter = -6; \ goto famec_End; #else #define NEXT \ - do{ \ - FETCH_WORD(Opcode); \ - JumpTable[Opcode](); \ - }while(m68kcontext.io_cycle_counter>0); + do { \ + FETCH_WORD(Opcode); \ + JumpTable[Opcode](ctx); \ + } while (ctx->io_cycle_counter > 0); #define RET(A) \ - m68kcontext.io_cycle_counter -= (A); \ + ctx->io_cycle_counter -= (A); \ return; #define RET0() \ - m68kcontext.io_cycle_counter = -6; \ + ctx->io_cycle_counter = -6; \ return; #endif -#define M68K_PPL (m68kcontext.sr >> 8) & 7 +#define M68K_PPL (ctx->sr >> 8) & 7 #define GET_PC \ (u32)((uptr)PC - BasePC) @@ -321,7 +321,7 @@ typedef signed int s32; { \ u32 pc = A; \ FORCE_ALIGNMENT(pc); \ - BasePC = m68kcontext.Fetch[(pc >> M68K_FETCHSFT) & M68K_FETCHMASK]; \ + BasePC = ctx->Fetch[(pc >> M68K_FETCHSFT) & M68K_FETCHMASK]; \ PC = (u16*)((pc & M68K_ADR_MASK) + BasePC); \ } @@ -331,7 +331,7 @@ typedef signed int s32; { \ u32 pc = A; \ FORCE_ALIGNMENT(pc); \ - BasePC = m68kcontext.Fetch[(pc >> M68K_FETCHSFT) & M68K_FETCHMASK]; \ + BasePC = ctx->Fetch[(pc >> M68K_FETCHSFT) & M68K_FETCHMASK]; \ BasePC -= pc & 0xFF000000; \ PC = (u16*)(pc + BasePC); \ } @@ -346,29 +346,29 @@ typedef signed int s32; // CCnt = io_cycle_counter; #define READ_BYTE_F(A, D) \ - D = m68kcontext.read_byte(A) & 0xFF; + D = ctx->read_byte(A) & 0xFF; #define READ_WORD_F(A, D) \ - D = m68kcontext.read_word(A) & 0xFFFF; + D = ctx->read_word(A) & 0xFFFF; #define READ_LONG_F(A, D) \ - D = m68kcontext.read_long(A); + D = ctx->read_long(A); #define READSX_LONG_F READ_LONG_F #define WRITE_LONG_F(A, D) \ - m68kcontext.write_long(A, D); + ctx->write_long(A, D); #define WRITE_LONG_DEC_F(A, D) \ - m68kcontext.write_word((A) + 2, (D) & 0xFFFF); \ - m68kcontext.write_word((A), (D) >> 16); + ctx->write_word((A) + 2, (D) & 0xFFFF); \ + ctx->write_word((A), (D) >> 16); #define PUSH_32_F(D) \ AREG(7) -= 4; \ - m68kcontext.write_long(AREG(7), D); + ctx->write_long(AREG(7), D); #define POP_32_F(D) \ - D = m68kcontext.read_long(AREG(7)); \ + D = ctx->read_long(AREG(7)); \ AREG(7) += 4; #ifndef FAME_BIG_ENDIAN @@ -440,23 +440,23 @@ typedef signed int s32; #endif #define READSX_BYTE_F(A, D) \ - D = (s8)m68kcontext.read_byte(A); + D = (s8)ctx->read_byte(A); #define READSX_WORD_F(A, D) \ - D = (s16)m68kcontext.read_word(A); + D = (s16)ctx->read_word(A); #define WRITE_BYTE_F(A, D) \ - m68kcontext.write_byte(A, D); + ctx->write_byte(A, D); #define WRITE_WORD_F(A, D) \ - m68kcontext.write_word(A, D); + ctx->write_word(A, D); #define PUSH_16_F(D) \ - m68kcontext.write_word(AREG(7) -= 2, D); \ + ctx->write_word(AREG(7) -= 2, D); \ #define POP_16_F(D) \ - D = (u16)m68kcontext.read_word(AREG(7)); \ + D = (u16)ctx->read_word(AREG(7)); \ AREG(7) += 2; #define GET_CCR \ @@ -501,17 +501,17 @@ typedef signed int s32; #endif #define CHECK_INT_TO_JUMP(CLK) \ - if (interrupt_chk__()) \ + if (interrupt_chk__(ctx)) \ { \ - cycles_needed=m68kcontext.io_cycle_counter-(CLK); \ - m68kcontext.io_cycle_counter=(CLK); \ + cycles_needed=ctx->io_cycle_counter-(CLK); \ + ctx->io_cycle_counter=(CLK); \ } #ifdef FAMEC_CHECK_BRANCHES #ifdef FAMEC_NO_GOTOS -#define CHECK_BRANCH_EXCEPTION_GOTO_END m68kcontext.io_cycle_counter=0; return; +#define CHECK_BRANCH_EXCEPTION_GOTO_END ctx->io_cycle_counter=0; return; #else #define CHECK_BRANCH_EXCEPTION_GOTO_END goto famec_Exec_End; #endif @@ -520,8 +520,8 @@ typedef signed int s32; if ((_PC_)&1) \ { \ u32 new_PC, pr_PC=GET_PC; \ - m68kcontext.execinfo |= FM68K_EMULATE_GROUP_0; \ - new_PC = execute_exception_group_0(M68K_ADDRESS_ERROR_EX, 0, pr_PC, 0x12 ); \ + ctx->execinfo |= FM68K_EMULATE_GROUP_0; \ + new_PC = execute_exception_group_0(ctx, M68K_ADDRESS_ERROR_EX, 0, pr_PC, 0x12 ); \ SET_PC(new_PC); \ CHECK_BRANCH_EXCEPTION_GOTO_END \ } @@ -529,30 +529,25 @@ typedef signed int s32; #define CHECK_BRANCH_EXCEPTION(_PC_) #endif +#ifdef FAMEC_NO_GOTOS +#define Opcode ctx->Opcode +#define cycles_needed ctx->cycles_needed +#define PC ctx->PC +#define BasePC ctx->BasePC +#define flag_C ctx->flag_C +#define flag_V ctx->flag_V +#define flag_NotZ ctx->flag_NotZ +#define flag_N ctx->flag_N +#define flag_X ctx->flag_X +#endif + +#define flag_T ctx->flag_T +#define flag_S ctx->flag_S +#define flag_I ctx->flag_I // global variable /////////////////// -/* Current CPU context */ -M68K_CONTEXT *g_m68kcontext; -#define m68kcontext (*g_m68kcontext) - -#ifdef FAMEC_NO_GOTOS -#define Opcode m68kcontext.Opcode -#define cycles_needed m68kcontext.cycles_needed -#define PC m68kcontext.PC -#define BasePC m68kcontext.BasePC -#define flag_C m68kcontext.flag_C -#define flag_V m68kcontext.flag_V -#define flag_NotZ m68kcontext.flag_NotZ -#define flag_N m68kcontext.flag_N -#define flag_X m68kcontext.flag_X -#endif - -#define flag_T m68kcontext.flag_T -#define flag_S m68kcontext.flag_S -#define flag_I m68kcontext.flag_I - static u32 initialised = 0; #ifdef PICODRIVE_HACK @@ -560,7 +555,7 @@ extern M68K_CONTEXT PicoCpuFS68k; #endif /* Custom function handler */ -typedef void (*opcode_func)(void); +typedef void (*opcode_func)(M68K_CONTEXT *ctx); static opcode_func JumpTable[0x10000]; @@ -679,30 +674,28 @@ int fm68k_reset(M68K_CONTEXT *ctx) if (!initialised) init_jump_table(); - g_m68kcontext = ctx; - // Si la CPU esta en ejecucion, salir con M68K_RUNNING - if (m68kcontext.execinfo & M68K_RUNNING) + if (ctx->execinfo & M68K_RUNNING) return M68K_RUNNING; // Resetear registros - //memset(&m68kcontext.dreg[0], 0, 16*4); + //memset(&ctx->dreg[0], 0, 16*4); // Resetear interrupts, execinfo y ASP - m68kcontext.interrupts[0] = 0; - m68kcontext.execinfo = 0; + ctx->interrupts[0] = 0; + ctx->execinfo = 0; ASP = 0; // Fijar registro de estado - m68kcontext.sr = (m68kcontext.sr & 0xff) | 0x2700; + ctx->sr = (ctx->sr & 0xff) | 0x2700; // Obtener puntero de pila inicial y PC - AREG(7) = m68kcontext.read_long(0); - m68kcontext.pc = m68kcontext.read_long(4); + AREG(7) = ctx->read_long(0); + ctx->pc = ctx->read_long(4); #ifdef FAMEC_DEBUG puts("Reset 68k done!\n"); - printf("PC = 0x%08X\n",m68kcontext.pc); + printf("PC = 0x%08X\n",ctx->pc); #endif return M68K_OK; @@ -714,39 +707,39 @@ int fm68k_reset(M68K_CONTEXT *ctx) /* No recibe parametros */ /* Retorna 68k PC */ /****************************************************************************/ -u32 fm68k_get_pc(M68K_CONTEXT *context) +u32 fm68k_get_pc(const M68K_CONTEXT *ctx) { #ifdef FAMEC_NO_GOTOS - return (context->execinfo & M68K_RUNNING)?(uptr)PC-BasePC:context->pc; + return (ctx->execinfo & M68K_RUNNING)?(uptr)PC-BasePC:ctx->pc; #else - return context->pc; // approximate PC in this mode + return ctx->pc; // approximate PC in this mode #endif } ////////////////////////// // Chequea las interrupciones y las inicia -static FAMEC_EXTRA_INLINE s32 interrupt_chk__(void) +static FAMEC_EXTRA_INLINE s32 interrupt_chk__(M68K_CONTEXT *ctx) { - if (m68kcontext.interrupts[0] > flag_I) - return m68kcontext.interrupts[0]; + if (ctx->interrupts[0] > flag_I) + return ctx->interrupts[0]; return 0; } int fm68k_would_interrupt(M68K_CONTEXT *ctx) { - return interrupt_chk__(); + return interrupt_chk__(ctx); } -static FAMEC_EXTRA_INLINE u32 execute_exception(s32 vect, u32 oldPC, u32 oldSR) +static FAMEC_EXTRA_INLINE u32 execute_exception(M68K_CONTEXT *ctx, s32 vect, u32 oldPC, u32 oldSR) { u32 newPC; //u32 oldSR = GET_SR; - m68kcontext.io_cycle_counter -= exception_cycle_table[vect]; + ctx->io_cycle_counter -= exception_cycle_table[vect]; #ifdef FAMEC_EMULATE_TRACE - m68kcontext.execinfo &= ~FM68K_EMULATE_TRACE; + ctx->execinfo &= ~FM68K_EMULATE_TRACE; #endif PRE_IO @@ -785,12 +778,12 @@ static FAMEC_EXTRA_INLINE u32 execute_exception(s32 vect, u32 oldPC, u32 oldSR) return newPC; } -static FAMEC_EXTRA_INLINE u32 execute_exception_group_0(s32 vect, s32 addr, u16 spec_info, u32 oldSR) +static FAMEC_EXTRA_INLINE u32 execute_exception_group_0(M68K_CONTEXT *ctx, s32 vect, s32 addr, u16 spec_info, u32 oldSR) { u32 newPC; u16 inst_reg = 0; - newPC = execute_exception(vect, addr, oldSR); - //if (!(m68kcontext.icust_handler && m68kcontext.icust_handler[vect])) + newPC = execute_exception(ctx, vect, addr, oldSR); + //if (!(ctx->icust_handler && ctx->icust_handler[vect])) { PUSH_16_F(inst_reg); PUSH_32_F(addr); @@ -802,7 +795,7 @@ static FAMEC_EXTRA_INLINE u32 execute_exception_group_0(s32 vect, s32 addr, u16 #ifdef FAMEC_NO_GOTOS -#define OPCODE(N_OP) static void OP_##N_OP(void) +#define OPCODE(N_OP) static void OP_##N_OP(M68K_CONTEXT *ctx) #define CAST_OP(N_OP) (opcode_func)&OP_##N_OP #include "famec_opcodes.h" #endif @@ -839,21 +832,19 @@ int fm68k_emulate(M68K_CONTEXT *ctx, s32 cycles, fm68k_call_reason reason) } #endif // FAMEC_NO_GOTOS - g_m68kcontext = ctx; - // won't emulate double fault - // if (m68kcontext.execinfo & M68K_FAULTED) return -1; + // if (ctx->execinfo & M68K_FAULTED) return -1; // Cache PPL flag_I = M68K_PPL; - if (m68kcontext.execinfo & FM68K_HALTED) + if (ctx->execinfo & FM68K_HALTED) { - if (interrupt_chk__() <= 0) + if (interrupt_chk__(ctx) <= 0) { return cycles; } - m68kcontext.execinfo &= ~FM68K_HALTED; + ctx->execinfo &= ~FM68K_HALTED; } #ifdef FAMEC_DEBUG @@ -861,13 +852,13 @@ int fm68k_emulate(M68K_CONTEXT *ctx, s32 cycles, fm68k_call_reason reason) #endif /* Poner la CPU en estado de ejecucion */ - m68kcontext.execinfo |= M68K_RUNNING; + ctx->execinfo |= M68K_RUNNING; // Cache SR - SET_SR(m68kcontext.sr) + SET_SR(ctx->sr) // Fijar PC - SET_PC(m68kcontext.pc) + SET_PC(ctx->pc) #ifdef FAMEC_DEBUG printf("PC: %p\n",PC); @@ -875,33 +866,33 @@ int fm68k_emulate(M68K_CONTEXT *ctx, s32 cycles, fm68k_call_reason reason) #endif /* guardar ciclos de ejecucion solicitados */ - m68kcontext.io_cycle_counter = cycles; + ctx->io_cycle_counter = cycles; cycles_needed = 0; #ifdef FAMEC_EMULATE_TRACE - if (!(m68kcontext.execinfo & FM68K_EMULATE_TRACE)) + if (!(ctx->execinfo & FM68K_EMULATE_TRACE)) #endif { - s32 line=interrupt_chk__(); + s32 line=interrupt_chk__(ctx); if (line>0) { /* comprobar si hay rutina de acknowledge */ - if (m68kcontext.iack_handler != NULL) - m68kcontext.iack_handler(line); + if (ctx->iack_handler != NULL) + ctx->iack_handler(line); else - m68kcontext.interrupts[0] = 0; + ctx->interrupts[0] = 0; - SET_PC(execute_exception(line + 0x18, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, line + 0x18, GET_PC, GET_SR)); flag_I = (u32)line; - if (m68kcontext.io_cycle_counter <= 0) goto famec_End; + if (ctx->io_cycle_counter <= 0) goto famec_End; } #ifdef FAMEC_EMULATE_TRACE else if (flag_T) { - m68kcontext.execinfo |= FM68K_EMULATE_TRACE; - cycles_needed = m68kcontext.io_cycle_counter; - m68kcontext.io_cycle_counter=0; + ctx->execinfo |= FM68K_EMULATE_TRACE; + cycles_needed = ctx->io_cycle_counter; + ctx->io_cycle_counter=0; } #endif } @@ -927,14 +918,14 @@ famec_Exec: #endif #ifdef FAMEC_EMULATE_TRACE - if (m68kcontext.execinfo & FM68K_EMULATE_TRACE) + if (ctx->execinfo & FM68K_EMULATE_TRACE) { - m68kcontext.io_cycle_counter += cycles_needed; + ctx->io_cycle_counter += cycles_needed; cycles_needed = 0; - m68kcontext.execinfo &= ~FM68K_EMULATE_TRACE; - m68kcontext.execinfo |= FM68K_DO_TRACE; - SET_PC(execute_exception(M68K_TRACE_EX, GET_PC, GET_SR)); - if (m68kcontext.io_cycle_counter > 0) + ctx->execinfo &= ~FM68K_EMULATE_TRACE; + ctx->execinfo |= FM68K_DO_TRACE; + SET_PC(execute_exception(ctx, M68K_TRACE_EX, GET_PC, GET_SR)); + if (ctx->io_cycle_counter > 0) { //NEXT goto famec_Exec; @@ -945,24 +936,24 @@ famec_Exec: if (cycles_needed != 0) { u32 line; - m68kcontext.io_cycle_counter += cycles_needed; + ctx->io_cycle_counter += cycles_needed; cycles_needed = 0; - //if (m68kcontext.io_cycle_counter <= 0) goto famec_End; - line=interrupt_chk__(); + //if (ctx->io_cycle_counter <= 0) goto famec_End; + line=interrupt_chk__(ctx); if (line>0) { - if (m68kcontext.iack_handler != NULL) - m68kcontext.iack_handler(line); + if (ctx->iack_handler != NULL) + ctx->iack_handler(line); else - m68kcontext.interrupts[0] = 0; + ctx->interrupts[0] = 0; - SET_PC(execute_exception(line + 0x18, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, line + 0x18, GET_PC, GET_SR)); flag_I = (u32)line; } #ifdef FAMEC_EMULATE_TRACE if (!(flag_T)) #endif - if (m68kcontext.io_cycle_counter > 0) + if (ctx->io_cycle_counter > 0) { //NEXT goto famec_Exec; @@ -970,19 +961,19 @@ famec_Exec: } famec_End: - m68kcontext.sr = GET_SR; - m68kcontext.pc = GET_PC; + ctx->sr = GET_SR; + ctx->pc = GET_PC; - m68kcontext.execinfo &= ~M68K_RUNNING; + ctx->execinfo &= ~M68K_RUNNING; #ifdef FAMEC_DEBUG printf("En really end...\n"); printf("PC: %p\n",PC); printf("BasePC: 0x%08x\n",BasePC); - printf("pc: 0x%08x\n",m68kcontext.pc); + printf("pc: 0x%08x\n",ctx->pc); #endif - return cycles - m68kcontext.io_cycle_counter; + return cycles - ctx->io_cycle_counter; #ifndef FAMEC_NO_GOTOS init_jump_table: diff --git a/cpu/fame/famec_opcodes.h b/cpu/fame/famec_opcodes.h index 2df6d78a..e069e9ea 100644 --- a/cpu/fame/famec_opcodes.h +++ b/cpu/fame/famec_opcodes.h @@ -1,6 +1,6 @@ #ifdef PICODRIVE_HACK -#define NOT_POLLING g_m68kcontext->not_polling = 1; +#define NOT_POLLING ctx->not_polling = 1; #else #define NOT_POLLING #endif @@ -644,7 +644,7 @@ OPCODE(0x007C) } else { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); #ifdef USE_CYCLONE_TIMING RET(0) #else @@ -1302,7 +1302,7 @@ OPCODE(0x027C) } else { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); RET(4) } RET(20) @@ -1952,7 +1952,7 @@ OPCODE(0x0A7C) } else { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); RET(0) } RET(20) @@ -5484,8 +5484,8 @@ OPCODE(0x1008) u32 src, dst; // can't read byte from Ax registers ! - m68kcontext.execinfo |= M68K_FAULTED; - m68kcontext.io_cycle_counter = 0; + ctx->execinfo |= M68K_FAULTED; + ctx->io_cycle_counter = 0; /* goto famec_Exec_End; flag_C = 0; @@ -5504,8 +5504,8 @@ OPCODE(0x1088) u32 src, dst; // can't read byte from Ax registers ! - m68kcontext.execinfo |= M68K_FAULTED; - m68kcontext.io_cycle_counter = 0; + ctx->execinfo |= M68K_FAULTED; + ctx->io_cycle_counter = 0; /* goto famec_Exec_End; flag_C = 0; @@ -5527,8 +5527,8 @@ OPCODE(0x10C8) u32 src, dst; // can't read byte from Ax registers ! - m68kcontext.execinfo |= M68K_FAULTED; - m68kcontext.io_cycle_counter = 0; + ctx->execinfo |= M68K_FAULTED; + ctx->io_cycle_counter = 0; /* goto famec_Exec_End; flag_C = 0; @@ -5551,8 +5551,8 @@ OPCODE(0x1108) u32 src, dst; // can't read byte from Ax registers ! - m68kcontext.execinfo |= M68K_FAULTED; - m68kcontext.io_cycle_counter = 0; + ctx->execinfo |= M68K_FAULTED; + ctx->io_cycle_counter = 0; /* goto famec_Exec_End; flag_C = 0; @@ -5575,8 +5575,8 @@ OPCODE(0x1148) u32 src, dst; // can't read byte from Ax registers ! - m68kcontext.execinfo |= M68K_FAULTED; - m68kcontext.io_cycle_counter = 0; + ctx->execinfo |= M68K_FAULTED; + ctx->io_cycle_counter = 0; /* goto famec_Exec_End; flag_C = 0; @@ -5599,8 +5599,8 @@ OPCODE(0x1188) u32 src, dst; // can't read byte from Ax registers ! - m68kcontext.execinfo |= M68K_FAULTED; - m68kcontext.io_cycle_counter = 0; + ctx->execinfo |= M68K_FAULTED; + ctx->io_cycle_counter = 0; /* goto famec_Exec_End; flag_C = 0; @@ -5623,8 +5623,8 @@ OPCODE(0x11C8) u32 src, dst; // can't read byte from Ax registers ! - m68kcontext.execinfo |= M68K_FAULTED; - m68kcontext.io_cycle_counter = 0; + ctx->execinfo |= M68K_FAULTED; + ctx->io_cycle_counter = 0; /* goto famec_Exec_End; flag_C = 0; @@ -5646,8 +5646,8 @@ OPCODE(0x13C8) u32 src, dst; // can't read byte from Ax registers ! - m68kcontext.execinfo |= M68K_FAULTED; - m68kcontext.io_cycle_counter = 0; + ctx->execinfo |= M68K_FAULTED; + ctx->io_cycle_counter = 0; /* goto famec_Exec_End; flag_C = 0; @@ -5669,8 +5669,8 @@ OPCODE(0x1EC8) u32 src, dst; // can't read byte from Ax registers ! - m68kcontext.execinfo |= M68K_FAULTED; - m68kcontext.io_cycle_counter = 0; + ctx->execinfo |= M68K_FAULTED; + ctx->io_cycle_counter = 0; /* goto famec_Exec_End; flag_C = 0; @@ -5693,8 +5693,8 @@ OPCODE(0x1F08) u32 src, dst; // can't read byte from Ax registers ! - m68kcontext.execinfo |= M68K_FAULTED; - m68kcontext.io_cycle_counter = 0; + ctx->execinfo |= M68K_FAULTED; + ctx->io_cycle_counter = 0; /* goto famec_Exec_End; flag_C = 0; @@ -16570,7 +16570,7 @@ OPCODE(0x46C0) } else { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); RET(4) } RET(12) @@ -16599,7 +16599,7 @@ OPCODE(0x46D0) } else { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); RET(4) } RET(16) @@ -16629,7 +16629,7 @@ OPCODE(0x46D8) } else { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); RET(4) } RET(16) @@ -16659,7 +16659,7 @@ OPCODE(0x46E0) } else { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); RET(4) } RET(18) @@ -16689,7 +16689,7 @@ OPCODE(0x46E8) } else { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); RET(4) } RET(20) @@ -16719,7 +16719,7 @@ OPCODE(0x46F0) } else { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); RET(4) } RET(22) @@ -16749,7 +16749,7 @@ OPCODE(0x46F8) } else { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); RET(4) } RET(20) @@ -16778,7 +16778,7 @@ OPCODE(0x46F9) } else { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); RET(4) } RET(24) @@ -16808,7 +16808,7 @@ OPCODE(0x46FA) } else { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); RET(4) } RET(20) @@ -16838,7 +16838,7 @@ OPCODE(0x46FB) } else { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); RET(4) } RET(22) @@ -16864,7 +16864,7 @@ OPCODE(0x46FC) } else { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); RET(4) } RET(16) @@ -16894,7 +16894,7 @@ OPCODE(0x46DF) } else { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); RET(4) } RET(16) @@ -16924,7 +16924,7 @@ OPCODE(0x46E7) } else { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); RET(4) } RET(18) @@ -17337,7 +17337,7 @@ OPCODE(0x4890) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(8) #else @@ -17369,7 +17369,7 @@ OPCODE(0x48A0) } while (res >>= 1); AREG((Opcode >> 0) & 7) = adr; POST_IO - m68kcontext.io_cycle_counter -= (dst - adr) * 2; + ctx->io_cycle_counter -= (dst - adr) * 2; RET(8) } @@ -17397,7 +17397,7 @@ OPCODE(0x48A8) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(12) #else @@ -17429,7 +17429,7 @@ OPCODE(0x48B0) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(14) #else @@ -17460,7 +17460,7 @@ OPCODE(0x48B8) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(12) #else @@ -17491,7 +17491,7 @@ OPCODE(0x48B9) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(16) #else @@ -17523,7 +17523,7 @@ OPCODE(0x48A7) } while (res >>= 1); AREG(7) = adr; POST_IO - m68kcontext.io_cycle_counter -= (dst - adr) * 2; + ctx->io_cycle_counter -= (dst - adr) * 2; RET(8) } @@ -17550,7 +17550,7 @@ OPCODE(0x48D0) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(8) #else @@ -17582,7 +17582,7 @@ OPCODE(0x48E0) } while (res >>= 1); AREG((Opcode >> 0) & 7) = adr; POST_IO - m68kcontext.io_cycle_counter -= (dst - adr) * 2; + ctx->io_cycle_counter -= (dst - adr) * 2; RET(8) } @@ -17610,7 +17610,7 @@ OPCODE(0x48E8) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(12) #else @@ -17642,7 +17642,7 @@ OPCODE(0x48F0) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(14) #else @@ -17673,7 +17673,7 @@ OPCODE(0x48F8) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(12) #else @@ -17704,7 +17704,7 @@ OPCODE(0x48F9) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(16) #else @@ -17736,7 +17736,7 @@ OPCODE(0x48E7) } while (res >>= 1); AREG(7) = adr; POST_IO - m68kcontext.io_cycle_counter -= (dst - adr) * 2; + ctx->io_cycle_counter -= (dst - adr) * 2; RET(8) } @@ -18319,7 +18319,7 @@ OPCODE(0x4AD0) flag_NotZ = res; flag_N = res; #ifdef PICODRIVE_HACK - if (g_m68kcontext == &PicoCpuFS68k) { + if (ctx == &PicoCpuFS68k) { res |= 0x80; WRITE_BYTE_F(adr, res); } @@ -18349,7 +18349,7 @@ OPCODE(0x4AD8) flag_N = res; #ifdef PICODRIVE_HACK - if (g_m68kcontext == &PicoCpuFS68k) { + if (ctx == &PicoCpuFS68k) { res |= 0x80; WRITE_BYTE_F(adr, res); } @@ -18379,7 +18379,7 @@ OPCODE(0x4AE0) flag_N = res; #ifdef PICODRIVE_HACK - if (g_m68kcontext == &PicoCpuFS68k) { + if (ctx == &PicoCpuFS68k) { res |= 0x80; WRITE_BYTE_F(adr, res); } @@ -18409,7 +18409,7 @@ OPCODE(0x4AE8) flag_N = res; #ifdef PICODRIVE_HACK - if (g_m68kcontext == &PicoCpuFS68k) { + if (ctx == &PicoCpuFS68k) { res |= 0x80; WRITE_BYTE_F(adr, res); } @@ -18439,7 +18439,7 @@ OPCODE(0x4AF0) flag_N = res; #ifdef PICODRIVE_HACK - if (g_m68kcontext == &PicoCpuFS68k) { + if (ctx == &PicoCpuFS68k) { res |= 0x80; WRITE_BYTE_F(adr, res); } @@ -18468,7 +18468,7 @@ OPCODE(0x4AF8) flag_N = res; #ifdef PICODRIVE_HACK - if (g_m68kcontext == &PicoCpuFS68k) { + if (ctx == &PicoCpuFS68k) { res |= 0x80; WRITE_BYTE_F(adr, res); } @@ -18497,7 +18497,7 @@ OPCODE(0x4AF9) flag_N = res; #ifdef PICODRIVE_HACK - if (g_m68kcontext == &PicoCpuFS68k) { + if (ctx == &PicoCpuFS68k) { res |= 0x80; WRITE_BYTE_F(adr, res); } @@ -18527,7 +18527,7 @@ OPCODE(0x4ADF) flag_N = res; #ifdef PICODRIVE_HACK - if (g_m68kcontext == &PicoCpuFS68k) { + if (ctx == &PicoCpuFS68k) { res |= 0x80; WRITE_BYTE_F(adr, res); } @@ -18557,7 +18557,7 @@ OPCODE(0x4AE7) flag_N = res; #ifdef PICODRIVE_HACK - if (g_m68kcontext == &PicoCpuFS68k) { + if (ctx == &PicoCpuFS68k) { res |= 0x80; WRITE_BYTE_F(adr, res); } @@ -18578,21 +18578,21 @@ OPCODE(0x4AFC) extern void SekFinishIdleDet(void); SekFinishIdleDet(); #endif - SET_PC(execute_exception(M68K_ILLEGAL_INSTRUCTION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ILLEGAL_INSTRUCTION_EX, GET_PC-2, GET_SR)); RET(0) } // ILLEGAL A000-AFFF OPCODE(0xA000) { - SET_PC(execute_exception(M68K_1010_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_1010_EX, GET_PC-2, GET_SR)); RET(0) } // ILLEGAL F000-FFFF OPCODE(0xF000) { - SET_PC(execute_exception(M68K_1111_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_1111_EX, GET_PC-2, GET_SR)); RET(0) // 4 already taken by exc. handler } @@ -18619,7 +18619,7 @@ OPCODE(0x4C90) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(12) #else @@ -18651,7 +18651,7 @@ OPCODE(0x4C98) } while (res >>= 1); AREG((Opcode >> 0) & 7) = adr; POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; RET(12) } @@ -18679,7 +18679,7 @@ OPCODE(0x4CA8) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(16) #else @@ -18711,7 +18711,7 @@ OPCODE(0x4CB0) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(18) #else @@ -18742,7 +18742,7 @@ OPCODE(0x4CB8) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(16) #else @@ -18773,7 +18773,7 @@ OPCODE(0x4CB9) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(20) #else @@ -18805,7 +18805,7 @@ OPCODE(0x4CBA) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(16) #else @@ -18837,7 +18837,7 @@ OPCODE(0x4CBB) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(18) #else @@ -18869,7 +18869,7 @@ OPCODE(0x4C9F) } while (res >>= 1); AREG(7) = adr; POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; RET(12) } @@ -18896,7 +18896,7 @@ OPCODE(0x4CD0) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(12) #else @@ -18928,7 +18928,7 @@ OPCODE(0x4CD8) } while (res >>= 1); AREG((Opcode >> 0) & 7) = adr; POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; RET(12) } @@ -18956,7 +18956,7 @@ OPCODE(0x4CE8) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(16) #else @@ -18988,7 +18988,7 @@ OPCODE(0x4CF0) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(18) #else @@ -19019,7 +19019,7 @@ OPCODE(0x4CF8) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(16) #else @@ -19050,7 +19050,7 @@ OPCODE(0x4CF9) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(20) #else @@ -19082,7 +19082,7 @@ OPCODE(0x4CFA) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(16) #else @@ -19114,7 +19114,7 @@ OPCODE(0x4CFB) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(18) #else @@ -19146,14 +19146,14 @@ OPCODE(0x4CDF) } while (res >>= 1); AREG(7) = adr; POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; RET(12) } // TRAP OPCODE(0x4E40) { - SET_PC(execute_exception(M68K_TRAP_BASE_EX + (Opcode & 0xF), GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_TRAP_BASE_EX + (Opcode & 0xF), GET_PC, GET_SR)); RET(4) } @@ -19224,7 +19224,7 @@ OPCODE(0x4E60) if (!flag_S) { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); RET(4) } res = AREGu32((Opcode >> 0) & 7); @@ -19240,7 +19240,7 @@ OPCODE(0x4E68) if (!flag_S) { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); RET(4) } res = ASP; @@ -19256,11 +19256,11 @@ OPCODE(0x4E70) if (!flag_S) { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); RET(4) } PRE_IO - if (m68kcontext.reset_handler) m68kcontext.reset_handler(); + if (ctx->reset_handler) ctx->reset_handler(); // CPU->Reset_CallBack(); POST_IO RET(132) @@ -19280,7 +19280,7 @@ OPCODE(0x4E72) if (!flag_S) { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); RET(4) } FETCH_WORD(res); @@ -19292,7 +19292,7 @@ OPCODE(0x4E72) AREG(7) = ASP; ASP = res; } - m68kcontext.execinfo |= FM68K_HALTED; + ctx->execinfo |= FM68K_HALTED; RET0() } @@ -19304,7 +19304,7 @@ OPCODE(0x4E73) if (!flag_S) { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); RET(4) } PRE_IO @@ -19319,7 +19319,7 @@ OPCODE(0x4E73) ASP = res; } POST_IO - m68kcontext.execinfo &= ~(FM68K_EMULATE_GROUP_0|FM68K_EMULATE_TRACE|FM68K_DO_TRACE); + ctx->execinfo &= ~(FM68K_EMULATE_GROUP_0|FM68K_EMULATE_TRACE|FM68K_DO_TRACE); CHECK_INT_TO_JUMP(20) RET(20) } @@ -19342,7 +19342,7 @@ RET(16) OPCODE(0x4E76) { if (flag_V & 0x80) - SET_PC(execute_exception(M68K_TRAPV_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_TRAPV_EX, GET_PC, GET_SR)); RET(4) } @@ -19604,7 +19604,7 @@ OPCODE(0x4180) if ((res < 0) || (res > src)) { flag_N = res >> 8; - SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_CHK_EX, GET_PC, GET_SR)); } RET(10) } @@ -19622,7 +19622,7 @@ OPCODE(0x4190) if ((res < 0) || (res > src)) { flag_N = res >> 8; - SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_CHK_EX, GET_PC, GET_SR)); } POST_IO RET(14) @@ -19642,7 +19642,7 @@ OPCODE(0x4198) if ((res < 0) || (res > src)) { flag_N = res >> 8; - SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_CHK_EX, GET_PC, GET_SR)); } POST_IO RET(14) @@ -19662,7 +19662,7 @@ OPCODE(0x41A0) if ((res < 0) || (res > src)) { flag_N = res >> 8; - SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_CHK_EX, GET_PC, GET_SR)); } POST_IO RET(16) @@ -19682,7 +19682,7 @@ OPCODE(0x41A8) if ((res < 0) || (res > src)) { flag_N = res >> 8; - SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_CHK_EX, GET_PC, GET_SR)); } POST_IO RET(18) @@ -19702,7 +19702,7 @@ OPCODE(0x41B0) if ((res < 0) || (res > src)) { flag_N = res >> 8; - SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_CHK_EX, GET_PC, GET_SR)); } POST_IO RET(20) @@ -19721,7 +19721,7 @@ OPCODE(0x41B8) if ((res < 0) || (res > src)) { flag_N = res >> 8; - SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_CHK_EX, GET_PC, GET_SR)); } POST_IO RET(18) @@ -19740,7 +19740,7 @@ OPCODE(0x41B9) if ((res < 0) || (res > src)) { flag_N = res >> 8; - SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_CHK_EX, GET_PC, GET_SR)); } POST_IO RET(22) @@ -19760,7 +19760,7 @@ OPCODE(0x41BA) if ((res < 0) || (res > src)) { flag_N = res >> 8; - SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_CHK_EX, GET_PC, GET_SR)); } POST_IO RET(18) @@ -19780,7 +19780,7 @@ OPCODE(0x41BB) if ((res < 0) || (res > src)) { flag_N = res >> 8; - SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_CHK_EX, GET_PC, GET_SR)); } POST_IO RET(20) @@ -19797,7 +19797,7 @@ OPCODE(0x41BC) if ((res < 0) || (res > src)) { flag_N = res >> 8; - SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_CHK_EX, GET_PC, GET_SR)); } POST_IO RET(14) @@ -19817,7 +19817,7 @@ OPCODE(0x419F) if ((res < 0) || (res > src)) { flag_N = res >> 8; - SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_CHK_EX, GET_PC, GET_SR)); } POST_IO RET(14) @@ -19837,7 +19837,7 @@ OPCODE(0x41A7) if ((res < 0) || (res > src)) { flag_N = res >> 8; - SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_CHK_EX, GET_PC, GET_SR)); } POST_IO RET(16) @@ -25051,7 +25051,7 @@ OPCODE(0x6201) if (flag_NotZ && (!(flag_C & 0x100))) { PC += ((s8)(Opcode & 0xFE)) >> 1; - m68kcontext.io_cycle_counter -= 2; + ctx->io_cycle_counter -= 2; } RET(8) } @@ -25065,7 +25065,7 @@ OPCODE(0x6301) if ((!flag_NotZ) || (flag_C & 0x100)) { PC += ((s8)(Opcode & 0xFE)) >> 1; - m68kcontext.io_cycle_counter -= 2; + ctx->io_cycle_counter -= 2; } RET(8) } @@ -25079,7 +25079,7 @@ OPCODE(0x6401) if (!(flag_C & 0x100)) { PC += ((s8)(Opcode & 0xFE)) >> 1; - m68kcontext.io_cycle_counter -= 2; + ctx->io_cycle_counter -= 2; } RET(8) } @@ -25093,7 +25093,7 @@ OPCODE(0x6501) if (flag_C & 0x100) { PC += ((s8)(Opcode & 0xFE)) >> 1; - m68kcontext.io_cycle_counter -= 2; + ctx->io_cycle_counter -= 2; } RET(8) } @@ -25107,7 +25107,7 @@ OPCODE(0x6601) if (flag_NotZ) { PC += ((s8)(Opcode & 0xFE)) >> 1; - m68kcontext.io_cycle_counter -= 2; + ctx->io_cycle_counter -= 2; } RET(8) } @@ -25121,7 +25121,7 @@ OPCODE(0x6701) if (!flag_NotZ) { PC += ((s8)(Opcode & 0xFE)) >> 1; - m68kcontext.io_cycle_counter -= 2; + ctx->io_cycle_counter -= 2; } RET(8) } @@ -25135,7 +25135,7 @@ OPCODE(0x6801) if (!(flag_V & 0x80)) { PC += ((s8)(Opcode & 0xFE)) >> 1; - m68kcontext.io_cycle_counter -= 2; + ctx->io_cycle_counter -= 2; } RET(8) } @@ -25149,7 +25149,7 @@ OPCODE(0x6901) if (flag_V & 0x80) { PC += ((s8)(Opcode & 0xFE)) >> 1; - m68kcontext.io_cycle_counter -= 2; + ctx->io_cycle_counter -= 2; } RET(8) } @@ -25163,7 +25163,7 @@ OPCODE(0x6A01) if (!(flag_N & 0x80)) { PC += ((s8)(Opcode & 0xFE)) >> 1; - m68kcontext.io_cycle_counter -= 2; + ctx->io_cycle_counter -= 2; } RET(8) } @@ -25177,7 +25177,7 @@ OPCODE(0x6B01) if (flag_N & 0x80) { PC += ((s8)(Opcode & 0xFE)) >> 1; - m68kcontext.io_cycle_counter -= 2; + ctx->io_cycle_counter -= 2; } RET(8) } @@ -25191,7 +25191,7 @@ OPCODE(0x6C01) if (!((flag_N ^ flag_V) & 0x80)) { PC += ((s8)(Opcode & 0xFE)) >> 1; - m68kcontext.io_cycle_counter -= 2; + ctx->io_cycle_counter -= 2; } RET(8) } @@ -25205,7 +25205,7 @@ OPCODE(0x6D01) if ((flag_N ^ flag_V) & 0x80) { PC += ((s8)(Opcode & 0xFE)) >> 1; - m68kcontext.io_cycle_counter -= 2; + ctx->io_cycle_counter -= 2; } RET(8) } @@ -25219,7 +25219,7 @@ OPCODE(0x6E01) if (flag_NotZ && (!((flag_N ^ flag_V) & 0x80))) { PC += ((s8)(Opcode & 0xFE)) >> 1; - m68kcontext.io_cycle_counter -= 2; + ctx->io_cycle_counter -= 2; } RET(8) } @@ -25233,7 +25233,7 @@ OPCODE(0x6F01) if ((!flag_NotZ) || ((flag_N ^ flag_V) & 0x80)) { PC += ((s8)(Opcode & 0xFE)) >> 1; - m68kcontext.io_cycle_counter -= 2; + ctx->io_cycle_counter -= 2; } RET(8) } @@ -27100,7 +27100,7 @@ OPCODE(0x80C0) src = DREGu16((Opcode >> 0) & 7); if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV RET(140) #else @@ -27148,7 +27148,7 @@ OPCODE(0x80D0) READ_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV RET(144) #else @@ -27197,7 +27197,7 @@ OPCODE(0x80D8) READ_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV RET(144) #else @@ -27246,7 +27246,7 @@ OPCODE(0x80E0) READ_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV RET(146) #else @@ -27295,7 +27295,7 @@ OPCODE(0x80E8) READ_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV RET(148) #else @@ -27344,7 +27344,7 @@ OPCODE(0x80F0) READ_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV RET(150) #else @@ -27392,7 +27392,7 @@ OPCODE(0x80F8) READ_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV RET(148) #else @@ -27440,7 +27440,7 @@ OPCODE(0x80F9) READ_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV RET(152) #else @@ -27489,7 +27489,7 @@ OPCODE(0x80FA) READ_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV RET(148) #else @@ -27538,7 +27538,7 @@ OPCODE(0x80FB) READ_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV RET(150) #else @@ -27584,7 +27584,7 @@ OPCODE(0x80FC) FETCH_WORD(src); if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV RET(144) #else @@ -27633,7 +27633,7 @@ OPCODE(0x80DF) READ_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV RET(144) #else @@ -27682,7 +27682,7 @@ OPCODE(0x80E7) READ_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV RET(146) #else @@ -27728,7 +27728,7 @@ OPCODE(0x81C0) src = (s32)DREGs16((Opcode >> 0) & 7); if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV goto end81C0; #endif @@ -27768,7 +27768,7 @@ goto end81C0; DREGu32((Opcode >> 9) & 7) = res; } #ifdef USE_CYCLONE_TIMING_DIV -end81C0: m68kcontext.io_cycle_counter -= 50; +end81C0: ctx->io_cycle_counter -= 50; #endif RET(108) } @@ -27784,7 +27784,7 @@ OPCODE(0x81D0) READSX_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV goto end81D0; #endif @@ -27824,7 +27824,7 @@ goto end81D0; DREGu32((Opcode >> 9) & 7) = res; } #ifdef USE_CYCLONE_TIMING_DIV -end81D0: m68kcontext.io_cycle_counter -= 50; +end81D0: ctx->io_cycle_counter -= 50; #endif RET(112) } @@ -27841,7 +27841,7 @@ OPCODE(0x81D8) READSX_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV goto end81D8; #endif @@ -27881,7 +27881,7 @@ goto end81D8; DREGu32((Opcode >> 9) & 7) = res; } #ifdef USE_CYCLONE_TIMING_DIV -end81D8: m68kcontext.io_cycle_counter -= 50; +end81D8: ctx->io_cycle_counter -= 50; #endif RET(112) } @@ -27898,7 +27898,7 @@ OPCODE(0x81E0) READSX_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV goto end81E0; #endif @@ -27938,7 +27938,7 @@ goto end81E0; DREGu32((Opcode >> 9) & 7) = res; } #ifdef USE_CYCLONE_TIMING_DIV -end81E0: m68kcontext.io_cycle_counter -= 50; +end81E0: ctx->io_cycle_counter -= 50; #endif RET(114) } @@ -27955,7 +27955,7 @@ OPCODE(0x81E8) READSX_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV goto end81E8; #endif @@ -27995,7 +27995,7 @@ goto end81E8; DREGu32((Opcode >> 9) & 7) = res; } #ifdef USE_CYCLONE_TIMING_DIV -end81E8: m68kcontext.io_cycle_counter -= 50; +end81E8: ctx->io_cycle_counter -= 50; #endif RET(116) } @@ -28012,7 +28012,7 @@ OPCODE(0x81F0) READSX_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV goto end81F0; #endif @@ -28052,7 +28052,7 @@ goto end81F0; DREGu32((Opcode >> 9) & 7) = res; } #ifdef USE_CYCLONE_TIMING_DIV -end81F0: m68kcontext.io_cycle_counter -= 50; +end81F0: ctx->io_cycle_counter -= 50; #endif RET(118) } @@ -28068,7 +28068,7 @@ OPCODE(0x81F8) READSX_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV goto end81F8; #endif @@ -28108,7 +28108,7 @@ goto end81F8; DREGu32((Opcode >> 9) & 7) = res; } #ifdef USE_CYCLONE_TIMING_DIV -end81F8: m68kcontext.io_cycle_counter -= 50; +end81F8: ctx->io_cycle_counter -= 50; #endif RET(116) } @@ -28124,7 +28124,7 @@ OPCODE(0x81F9) READSX_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV goto end81F9; #endif @@ -28164,7 +28164,7 @@ goto end81F9; DREGu32((Opcode >> 9) & 7) = res; } #ifdef USE_CYCLONE_TIMING_DIV -end81F9: m68kcontext.io_cycle_counter -= 50; +end81F9: ctx->io_cycle_counter -= 50; #endif RET(120) } @@ -28181,7 +28181,7 @@ OPCODE(0x81FA) READSX_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV goto end81FA; #endif @@ -28221,7 +28221,7 @@ goto end81FA; DREGu32((Opcode >> 9) & 7) = res; } #ifdef USE_CYCLONE_TIMING_DIV -end81FA: m68kcontext.io_cycle_counter -= 50; +end81FA: ctx->io_cycle_counter -= 50; #endif RET(116) } @@ -28238,7 +28238,7 @@ OPCODE(0x81FB) READSX_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV goto end81FB; #endif @@ -28278,7 +28278,7 @@ goto end81FB; DREGu32((Opcode >> 9) & 7) = res; } #ifdef USE_CYCLONE_TIMING_DIV -end81FB: m68kcontext.io_cycle_counter -= 50; +end81FB: ctx->io_cycle_counter -= 50; #endif RET(118) } @@ -28292,7 +28292,7 @@ OPCODE(0x81FC) FETCH_SWORD(src); if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV goto end81FC; #endif @@ -28332,7 +28332,7 @@ goto end81FC; DREGu32((Opcode >> 9) & 7) = res; } #ifdef USE_CYCLONE_TIMING_DIV -end81FC: m68kcontext.io_cycle_counter -= 50; +end81FC: ctx->io_cycle_counter -= 50; #endif RET(112) } @@ -28349,7 +28349,7 @@ OPCODE(0x81DF) READSX_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV goto end81DF; #endif @@ -28389,7 +28389,7 @@ goto end81DF; DREGu32((Opcode >> 9) & 7) = res; } #ifdef USE_CYCLONE_TIMING_DIV -end81DF: m68kcontext.io_cycle_counter -= 50; +end81DF: ctx->io_cycle_counter -= 50; #endif RET(112) } @@ -28406,7 +28406,7 @@ OPCODE(0x81E7) READSX_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV goto end81E7; #endif @@ -28446,7 +28446,7 @@ goto end81E7; DREGu32((Opcode >> 9) & 7) = res; } #ifdef USE_CYCLONE_TIMING_DIV -end81E7: m68kcontext.io_cycle_counter -= 50; +end81E7: ctx->io_cycle_counter -= 50; #endif RET(114) } @@ -28475,8 +28475,8 @@ OPCODE(0x9008) u32 src, dst; // can't read byte from Ax registers ! - m68kcontext.execinfo |= M68K_FAULTED; - m68kcontext.io_cycle_counter = 0; + ctx->execinfo |= M68K_FAULTED; + ctx->io_cycle_counter = 0; /* goto famec_Exec_End; dst = DREGu8((Opcode >> 9) & 7); @@ -30649,8 +30649,8 @@ OPCODE(0xB008) u32 src, dst; // can't read byte from Ax registers ! - m68kcontext.execinfo |= M68K_FAULTED; - m68kcontext.io_cycle_counter = 0; + ctx->execinfo |= M68K_FAULTED; + ctx->io_cycle_counter = 0; /* goto famec_Exec_End; dst = DREGu8((Opcode >> 9) & 7); @@ -34966,8 +34966,8 @@ OPCODE(0xD008) u32 src, dst; // can't read byte from Ax registers ! - m68kcontext.execinfo |= M68K_FAULTED; - m68kcontext.io_cycle_counter = 0; + ctx->execinfo |= M68K_FAULTED; + ctx->io_cycle_counter = 0; /* goto famec_Exec_End; dst = DREGu8((Opcode >> 9) & 7); @@ -37126,7 +37126,7 @@ OPCODE(0xE000) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = (s32)DREGs8((Opcode >> 0) & 7); flag_V = 0; flag_X = flag_C = src << ((M68K_SR_C_SFT + 1) - sft); @@ -37146,7 +37146,7 @@ OPCODE(0xE040) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = (s32)DREGs16((Opcode >> 0) & 7); flag_V = 0; flag_X = flag_C = src << ((M68K_SR_C_SFT + 1) - sft); @@ -37166,7 +37166,7 @@ OPCODE(0xE080) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = (s32)DREGs32((Opcode >> 0) & 7); flag_V = 0; flag_X = flag_C = src << ((M68K_SR_C_SFT + 1) - sft); @@ -37186,7 +37186,7 @@ OPCODE(0xE008) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu8((Opcode >> 0) & 7); flag_N = flag_V = 0; flag_X = flag_C = src << ((M68K_SR_C_SFT + 1) - sft); @@ -37205,7 +37205,7 @@ OPCODE(0xE048) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu16((Opcode >> 0) & 7); flag_N = flag_V = 0; flag_X = flag_C = src << ((M68K_SR_C_SFT + 1) - sft); @@ -37224,7 +37224,7 @@ OPCODE(0xE088) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu32((Opcode >> 0) & 7); flag_N = flag_V = 0; flag_X = flag_C = src << ((M68K_SR_C_SFT + 1) - sft); @@ -37243,7 +37243,7 @@ OPCODE(0xE010) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu8((Opcode >> 0) & 7); src |= (flag_X & M68K_SR_X) << 0; res = (src >> sft) | (src << (9 - sft)); @@ -37264,7 +37264,7 @@ OPCODE(0xE050) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu16((Opcode >> 0) & 7); src |= (flag_X & M68K_SR_X) << 8; res = (src >> sft) | (src << (17 - sft)); @@ -37285,7 +37285,7 @@ OPCODE(0xE090) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu32((Opcode >> 0) & 7); flag_C = src << ((M68K_SR_C_SFT + 1) - sft); if (sft == 1) res = (src >> 1) | ((flag_X & M68K_SR_X) << (32 - (M68K_SR_X_SFT + 1))); @@ -37307,7 +37307,7 @@ OPCODE(0xE018) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu8((Opcode >> 0) & 7); flag_V = 0; flag_C = src << ((M68K_SR_C_SFT + 1) - sft); @@ -37327,7 +37327,7 @@ OPCODE(0xE058) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu16((Opcode >> 0) & 7); flag_V = 0; flag_C = src << ((M68K_SR_C_SFT + 1) - sft); @@ -37347,7 +37347,7 @@ OPCODE(0xE098) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu32((Opcode >> 0) & 7); flag_V = 0; flag_C = src << ((M68K_SR_C_SFT + 1) - sft); @@ -37367,7 +37367,7 @@ OPCODE(0xE100) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu8((Opcode >> 0) & 7); if (sft < 8) { @@ -37406,7 +37406,7 @@ OPCODE(0xE140) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu16((Opcode >> 0) & 7); flag_X = flag_C = src >> (8 - sft); res = src << sft; @@ -37431,7 +37431,7 @@ OPCODE(0xE180) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu32((Opcode >> 0) & 7); flag_X = flag_C = src >> (24 - sft); res = src << sft; @@ -37456,7 +37456,7 @@ OPCODE(0xE108) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu8((Opcode >> 0) & 7); flag_V = 0; flag_X = flag_C = src << (0 + sft); @@ -37476,7 +37476,7 @@ OPCODE(0xE148) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu16((Opcode >> 0) & 7); flag_V = 0; flag_X = flag_C = src >> (8 - sft); @@ -37496,7 +37496,7 @@ OPCODE(0xE188) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu32((Opcode >> 0) & 7); flag_V = 0; flag_X = flag_C = src >> (24 - sft); @@ -37516,7 +37516,7 @@ OPCODE(0xE110) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu8((Opcode >> 0) & 7); src |= (flag_X & M68K_SR_X) << 0; res = (src << sft) | (src >> (9 - sft)); @@ -37537,7 +37537,7 @@ OPCODE(0xE150) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu16((Opcode >> 0) & 7); src |= (flag_X & M68K_SR_X) << 8; res = (src << sft) | (src >> (17 - sft)); @@ -37558,7 +37558,7 @@ OPCODE(0xE190) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu32((Opcode >> 0) & 7); flag_C = src >> ((32 - M68K_SR_C_SFT) - sft); if (sft == 1) res = (src << 1) | ((flag_X & M68K_SR_X) >> ((M68K_SR_X_SFT + 1) - 1)); @@ -37580,7 +37580,7 @@ OPCODE(0xE118) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu8((Opcode >> 0) & 7); flag_V = 0; flag_C = src << (0 + sft); @@ -37600,7 +37600,7 @@ OPCODE(0xE158) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu16((Opcode >> 0) & 7); flag_V = 0; flag_C = src >> (8 - sft); @@ -37620,7 +37620,7 @@ OPCODE(0xE198) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu32((Opcode >> 0) & 7); flag_V = 0; flag_C = src >> (24 - sft); @@ -37643,7 +37643,7 @@ OPCODE(0xE020) src = (s32)DREGs8((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; if (sft < 8) { flag_V = 0; @@ -37696,7 +37696,7 @@ OPCODE(0xE060) src = (s32)DREGs16((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; if (sft < 16) { flag_V = 0; @@ -37754,7 +37754,7 @@ OPCODE(0xE0A0) src = (s32)DREGs32((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; if (sft < 32) { flag_V = 0; @@ -37808,7 +37808,7 @@ OPCODE(0xE028) src = DREGu8((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; if (sft <= 8) { flag_N = flag_V = 0; @@ -37847,7 +37847,7 @@ OPCODE(0xE068) src = DREGu16((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; if (sft <= 16) { flag_N = flag_V = 0; @@ -37891,7 +37891,7 @@ OPCODE(0xE0A8) src = DREGu32((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; if (sft < 32) { flag_N = flag_V = 0; @@ -37933,7 +37933,7 @@ OPCODE(0xE030) src = DREGu8((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; sft %= 9; src |= (flag_X & M68K_SR_X) << 0; @@ -37965,7 +37965,7 @@ OPCODE(0xE070) src = DREGu16((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; sft %= 17; src |= (flag_X & M68K_SR_X) << 8; @@ -38002,7 +38002,7 @@ OPCODE(0xE0B0) src = DREGu32((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; sft %= 33; if (sft != 0) @@ -38040,7 +38040,7 @@ OPCODE(0xE038) src = DREGu8((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; sft &= 0x07; flag_C = src << (M68K_SR_C_SFT - ((sft - 1) & 7)); @@ -38071,7 +38071,7 @@ OPCODE(0xE078) src = DREGu16((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; sft &= 0x0F; flag_C = (src >> ((sft - 1) & 15)) << M68K_SR_C_SFT; @@ -38107,7 +38107,7 @@ OPCODE(0xE0B8) src = DREGu32((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; sft &= 0x1F; flag_C = (src >> ((sft - 1) & 31)) << M68K_SR_C_SFT; @@ -38139,7 +38139,7 @@ OPCODE(0xE120) src = DREGu8((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; if (sft < 8) { flag_X = flag_C = (src << sft) >> 0; @@ -38187,7 +38187,7 @@ OPCODE(0xE160) src = DREGu16((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; if (sft < 16) { flag_X = flag_C = (src << sft) >> 8; @@ -38240,7 +38240,7 @@ OPCODE(0xE1A0) src = DREGu32((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; if (sft < 32) { flag_X = flag_C = (src >> (32 - sft)) << M68K_SR_C_SFT; @@ -38289,7 +38289,7 @@ OPCODE(0xE128) src = DREGu8((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; if (sft <= 8) { flag_X = flag_C = (src << sft) >> 0; @@ -38329,7 +38329,7 @@ OPCODE(0xE168) src = DREGu16((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; if (sft <= 16) { flag_X = flag_C = (src << sft) >> 8; @@ -38374,7 +38374,7 @@ OPCODE(0xE1A8) src = DREGu32((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; if (sft < 32) { flag_X = flag_C = (src >> (32 - sft)) << M68K_SR_C_SFT; @@ -38417,7 +38417,7 @@ OPCODE(0xE130) src = DREGu8((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; sft %= 9; src |= (flag_X & M68K_SR_X) << 0; @@ -38449,7 +38449,7 @@ OPCODE(0xE170) src = DREGu16((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; sft %= 17; src |= (flag_X & M68K_SR_X) << 8; @@ -38486,7 +38486,7 @@ OPCODE(0xE1B0) src = DREGu32((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; sft %= 33; if (sft != 0) @@ -38524,7 +38524,7 @@ OPCODE(0xE138) src = DREGu8((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; if (sft &= 0x07) { flag_C = (src << sft) >> 0; @@ -38562,7 +38562,7 @@ OPCODE(0xE178) src = DREGu16((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; if (sft &= 0x0F) { flag_C = (src << sft) >> 8; @@ -38605,7 +38605,7 @@ OPCODE(0xE1B8) src = DREGu32((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; if (sft &= 0x1F) { flag_C = (src >> (32 - sft)) << M68K_SR_C_SFT; @@ -40112,7 +40112,7 @@ OPCODE(idle_detector_bcc8) if ( Opcode & 0x0100) newop |= 0x400; // beq if (!(Opcode & 0x0f00)) newop |= 0xc00; // bra - ret = SekRegisterIdlePatch(GET_PC - 2, Opcode, newop, &m68kcontext); + ret = SekRegisterIdlePatch(GET_PC - 2, Opcode, newop, ctx); switch (ret) { case 0: PC[-1] = newop; break; @@ -40128,7 +40128,7 @@ end: if (cond_true) { PC = dest_pc; - m68kcontext.io_cycle_counter -= 2; + ctx->io_cycle_counter -= 2; } RET(8) } diff --git a/pico/cd/mcd.c b/pico/cd/mcd.c index 929b57f3..72488343 100644 --- a/pico/cd/mcd.c +++ b/pico/cd/mcd.c @@ -139,7 +139,6 @@ static void SekRunS68k(unsigned int to) m68k_set_context(&PicoCpuMM68k); #elif defined(EMU_F68K) SekCycleCntS68k += fm68k_emulate(&PicoCpuFS68k, cyc_do, 0) - cyc_do; - g_m68kcontext = &PicoCpuFM68k; #endif } diff --git a/pico/cd/sek.c b/pico/cd/sek.c index 6f54801b..baf0d9e5 100644 --- a/pico/cd/sek.c +++ b/pico/cd/sek.c @@ -117,15 +117,10 @@ PICO_INTERNAL void SekInitS68k(void) } #endif #ifdef EMU_F68K - { - void *oldcontext = g_m68kcontext; - g_m68kcontext = &PicoCpuFS68k; - memset(&PicoCpuFS68k, 0, sizeof(PicoCpuFS68k)); - fm68k_init(); - PicoCpuFS68k.iack_handler = SekIntAckFS68k; - PicoCpuFS68k.sr = 0x2704; // Z flag - g_m68kcontext = oldcontext; - } + memset(&PicoCpuFS68k, 0, sizeof(PicoCpuFS68k)); + fm68k_init(); + PicoCpuFS68k.iack_handler = SekIntAckFS68k; + PicoCpuFS68k.sr = 0x2704; // Z flag #endif } @@ -149,11 +144,7 @@ PICO_INTERNAL int SekResetS68k(void) } #endif #ifdef EMU_F68K - { - void *oldcontext = g_m68kcontext; - fm68k_reset(&PicoCpuFS68k); - g_m68kcontext = oldcontext; - } + fm68k_reset(&PicoCpuFS68k); #endif return 0; diff --git a/pico/debugCPU.c b/pico/debugCPU.c index 36e71a7c..128f4144 100644 --- a/pico/debugCPU.c +++ b/pico/debugCPU.c @@ -30,6 +30,7 @@ static struct Cyclone *currentC68k = NULL; #define other_is_stopped() (currentC68k->state_flags&1) #define other_is_tracing() ((currentC68k->state_flags&2)?1:0) #elif defined(EMU_F68K) +static struct M68K_CONTEXT *g_m68kcontext; #define other_set_sub(s) g_m68kcontext=(s)?&PicoCpuFS68k:&PicoCpuFM68k; #define other_get_sr() g_m68kcontext->sr #define other_dar(i) ((unsigned int*)g_m68kcontext->dreg)[i] diff --git a/pico/sek.c b/pico/sek.c index c76a3e8e..a40e3081 100644 --- a/pico/sek.c +++ b/pico/sek.c @@ -128,15 +128,10 @@ PICO_INTERNAL void SekInit(void) } #endif #ifdef EMU_F68K - { - void *oldcontext = g_m68kcontext; - g_m68kcontext = &PicoCpuFM68k; - memset(&PicoCpuFM68k, 0, sizeof(PicoCpuFM68k)); - fm68k_init(); - PicoCpuFM68k.iack_handler = SekIntAckF68K; - PicoCpuFM68k.sr = 0x2704; // Z flag - g_m68kcontext = oldcontext; - } + memset(&PicoCpuFM68k, 0, sizeof(PicoCpuFM68k)); + fm68k_init(); + PicoCpuFM68k.iack_handler = SekIntAckF68K; + PicoCpuFM68k.sr = 0x2704; // Z flag #endif } From 1c25c32c114f8cb76674287d2c85ee5677bfacf7 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 17 Oct 2017 00:53:35 +0300 Subject: [PATCH 0124/1110] sms: improve sr a bit --- pico/mode4.c | 11 ++++++++++- pico/sms.c | 9 ++++++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/pico/mode4.c b/pico/mode4.c index 0f3d766b..a13c38b0 100644 --- a/pico/mode4.c +++ b/pico/mode4.c @@ -91,7 +91,7 @@ static void draw_sprites(int scanline) } sprite_base = (pv->reg[6] & 4) << (13-2-1); - for (i = s = 0; i < 64 && s < 8; i++) + for (i = s = 0; i < 64; i++) { int y; y = sat[i] + 1; @@ -99,6 +99,10 @@ static void draw_sprites(int scanline) break; if (y + h <= scanline || scanline < y) continue; // not on this line + if (s >= 8) { + pv->status |= SR_SOVR; + break; + } sprites_x[s] = xoff + sat[0x80 + i*2]; sprites_addr[s] = sprite_base + ((sat[0x80 + i*2 + 1] & addr_mask) << (5-1)) + @@ -106,6 +110,10 @@ static void draw_sprites(int scanline) s++; } + // really half-assed but better than nothing + if (s > 1) + pv->status |= SR_C; + // now draw all sprites backwards for (--s; s >= 0; s--) TileNormM4(sprites_x[s], sprites_addr[s], 0x10); @@ -300,3 +308,4 @@ void PicoDrawSetOutputMode4(pdso_t which) } } +// vim:shiftwidth=2:ts=2:expandtab diff --git a/pico/sms.c b/pico/sms.c index a2351b0f..9e80c984 100644 --- a/pico/sms.c +++ b/pico/sms.c @@ -31,9 +31,12 @@ static unsigned char vdp_data_read(void) static unsigned char vdp_ctl_read(void) { - unsigned char d = Pico.video.pending_ints << 7; - Pico.video.pending = 0; - Pico.video.pending_ints = 0; + struct PicoVideo *pv = &Pico.video; + unsigned char d; + + d = pv->status | (pv->pending_ints << 7); + pv->pending = pv->pending_ints = 0; + pv->status = 0; elprintf(EL_SR, "VDP sr: %02x", d); return d; From eaa147519f92c1460bb0fb317d700fec63cf00ef Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 17 Oct 2017 01:01:26 +0300 Subject: [PATCH 0125/1110] sms: more md-consistent drawing --- pico/mode4.c | 76 +++++++++++++++++++++++----------------------------- 1 file changed, 34 insertions(+), 42 deletions(-) diff --git a/pico/mode4.c b/pico/mode4.c index a13c38b0..37f752bb 100644 --- a/pico/mode4.c +++ b/pico/mode4.c @@ -26,48 +26,34 @@ static int screen_offset; pd[x] = pal|t; \ } -static int TileNormM4(int sx, int addr, int pal) +static void TileNormM4(int sx, unsigned int pack, int pal) { unsigned char *pd = Pico.est.HighCol + sx; - unsigned int pack, t; + unsigned int t; - pack = *(unsigned int *)(PicoMem.vram + addr); /* Get 4 bitplanes / 8 pixels */ - if (pack) - { - PLANAR_PIXEL(0, 0) - PLANAR_PIXEL(1, 1) - PLANAR_PIXEL(2, 2) - PLANAR_PIXEL(3, 3) - PLANAR_PIXEL(4, 4) - PLANAR_PIXEL(5, 5) - PLANAR_PIXEL(6, 6) - PLANAR_PIXEL(7, 7) - return 0; - } - - return 1; /* Tile blank */ + PLANAR_PIXEL(0, 0) + PLANAR_PIXEL(1, 1) + PLANAR_PIXEL(2, 2) + PLANAR_PIXEL(3, 3) + PLANAR_PIXEL(4, 4) + PLANAR_PIXEL(5, 5) + PLANAR_PIXEL(6, 6) + PLANAR_PIXEL(7, 7) } -static int TileFlipM4(int sx,int addr,int pal) +static void TileFlipM4(int sx, unsigned int pack, int pal) { unsigned char *pd = Pico.est.HighCol + sx; - unsigned int pack, t; + unsigned int t; - pack = *(unsigned int *)(PicoMem.vram + addr); /* Get 4 bitplanes / 8 pixels */ - if (pack) - { - PLANAR_PIXEL(0, 7) - PLANAR_PIXEL(1, 6) - PLANAR_PIXEL(2, 5) - PLANAR_PIXEL(3, 4) - PLANAR_PIXEL(4, 3) - PLANAR_PIXEL(5, 2) - PLANAR_PIXEL(6, 1) - PLANAR_PIXEL(7, 0) - return 0; - } - - return 1; /* Tile blank */ + PLANAR_PIXEL(0, 7) + PLANAR_PIXEL(1, 6) + PLANAR_PIXEL(2, 5) + PLANAR_PIXEL(3, 4) + PLANAR_PIXEL(4, 3) + PLANAR_PIXEL(5, 2) + PLANAR_PIXEL(6, 1) + PLANAR_PIXEL(7, 0) } static void draw_sprites(int scanline) @@ -75,6 +61,7 @@ static void draw_sprites(int scanline) struct PicoVideo *pv = &Pico.video; unsigned int sprites_addr[8]; unsigned int sprites_x[8]; + unsigned int pack; unsigned char *sat; int xoff = 8; // relative to HighCol, which is (screen - 8) int sprite_base, addr_mask; @@ -115,8 +102,10 @@ static void draw_sprites(int scanline) pv->status |= SR_C; // now draw all sprites backwards - for (--s; s >= 0; s--) - TileNormM4(sprites_x[s], sprites_addr[s], 0x10); + for (--s; s >= 0; s--) { + pack = *(unsigned int *)(PicoMem.vram + sprites_addr[s]); + TileNormM4(sprites_x[s], pack, 0x10); + } } // tilex_ty_prio merged to reduce register pressure @@ -128,7 +117,8 @@ static void draw_strip(const unsigned short *nametab, int dx, int cells, int til // Draw tiles across screen: for (; cells > 0; dx += 8, tilex_ty_prio++, cells--) { - int code, zero; + unsigned int pack; + int code; code = nametab[tilex_ty_prio & 0x1f]; if (code == blank) @@ -147,11 +137,13 @@ static void draw_strip(const unsigned short *nametab, int dx, int cells, int til pal = (code>>7) & 0x10; } - if (code&0x0200) zero = TileFlipM4(dx, addr, pal); - else zero = TileNormM4(dx, addr, pal); - - if (zero) - blank = code; // We know this tile is blank now + pack = *(unsigned int *)(PicoMem.vram + addr); /* Get 4 bitplanes / 8 pixels */ + if (pack == 0) { + blank = code; + continue; + } + if (code & 0x0200) TileFlipM4(dx, pack, pal); + else TileNormM4(dx, pack, pal); } } From 759c9d38464a56c4ad5b68a0d9adb38c22d06861 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 19 Oct 2017 02:44:56 +0300 Subject: [PATCH 0126/1110] pandora: fix build Fixes: df9251536de "libretro: satisfy vita's dynarec needs in a cleaner way" --- cpu/sh2/compiler.c | 2 ++ platform/gp2x/plat.c | 5 +++++ platform/pandora/plat.c | 5 +++++ 3 files changed, 12 insertions(+) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 4403378e..7bd9de0e 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -1017,6 +1017,7 @@ static void rcache_unlock_all(void) reg_temp[i].flags &= ~HRF_LOCKED; } +#ifdef DRC_CMP static u32 rcache_used_hreg_mask(void) { u32 mask = 0; @@ -1028,6 +1029,7 @@ static u32 rcache_used_hreg_mask(void) return mask; } +#endif static void rcache_clean(void) { diff --git a/platform/gp2x/plat.c b/platform/gp2x/plat.c index 4d35cbd8..614ab7f0 100644 --- a/platform/gp2x/plat.c +++ b/platform/gp2x/plat.c @@ -179,6 +179,11 @@ void plat_video_menu_leave(void) { } +void *plat_mem_get_for_drc(size_t size) +{ + return NULL; +} + void plat_early_init(void) { // just use gettimeofday until plat_init() diff --git a/platform/pandora/plat.c b/platform/pandora/plat.c index 9cd5fc5b..2e9457a4 100644 --- a/platform/pandora/plat.c +++ b/platform/pandora/plat.c @@ -442,6 +442,11 @@ void plat_wait_till_us(unsigned int us_to) */ } +void *plat_mem_get_for_drc(size_t size) +{ + return NULL; +} + void plat_early_init(void) { } From 93f9619ed819dee07948416c98ca2f1c70a22666 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 19 Oct 2017 02:38:20 +0300 Subject: [PATCH 0127/1110] rearrange globals scripted find/replace gives slightly better code on ARM, less unnecessary asm, ~400 bytes saved --- cpu/drc/cmn.c | 2 +- pico/32x/32x.c | 20 +++--- pico/32x/memory.c | 50 +++++++++------ pico/cart.c | 18 +++--- pico/carthw/svp/svp.c | 17 ++--- pico/cd/gfx.c | 2 +- pico/cd/mcd.c | 2 +- pico/cd/memory.c | 2 +- pico/cd/pcm.c | 2 +- pico/debug.c | 18 +++--- pico/draw.c | 16 ++--- pico/media.c | 18 +++--- pico/memory.c | 55 ++++++---------- pico/memory_arm.S | 38 ++--------- pico/mode4.c | 2 +- pico/patch.c | 12 ++-- pico/pico.c | 64 ++++++++----------- pico/pico.h | 27 +++++--- pico/pico/memory.c | 4 +- pico/pico/pico.c | 2 +- pico/pico_cmn.c | 20 +++--- pico/pico_int.h | 6 +- pico/sek.c | 4 +- pico/sms.c | 10 +-- pico/sound/sound.c | 48 +++++++------- pico/sound/ym2612.h | 10 +-- pico/state.c | 30 ++++----- pico/videoport.c | 8 +-- pico/z80if.c | 6 +- platform/common/config_file.c | 22 +++---- platform/common/emu.c | 116 +++++++++++++++++----------------- platform/common/menu_pico.c | 72 ++++++++++----------- platform/common/mp3.c | 2 +- platform/gizmondo/emu.c | 54 ++++++++-------- platform/gizmondo/menu.c | 78 +++++++++++------------ platform/gp2x/940ctl.c | 4 +- platform/gp2x/emu.c | 48 +++++++------- platform/gp2x/menu.c | 2 +- platform/libretro/libretro.c | 46 +++++++------- platform/linux/emu.c | 16 ++--- platform/pandora/plat.c | 2 +- platform/psp/emu.c | 54 ++++++++-------- platform/psp/menu.c | 66 +++++++++---------- platform/psp/mp3.c | 2 +- platform/win32/main.c | 2 +- platform/win32/plat.c | 4 +- tools/mkoffsets.c | 2 +- 47 files changed, 532 insertions(+), 573 deletions(-) diff --git a/cpu/drc/cmn.c b/cpu/drc/cmn.c index 27ff9812..3f174a03 100644 --- a/cpu/drc/cmn.c +++ b/cpu/drc/cmn.c @@ -26,7 +26,7 @@ void drc_cmn_init(void) tcache, DRC_TCACHE_SIZE, ret); #ifdef __arm__ - if (PicoOpt & POPT_EN_DRC) + if (PicoIn.opt & POPT_EN_DRC) { static int test_done; if (!test_done) diff --git a/pico/32x/32x.c b/pico/32x/32x.c index 1c166cee..c10e1486 100644 --- a/pico/32x/32x.c +++ b/pico/32x/32x.c @@ -99,7 +99,7 @@ void Pico32xStartup(void) elprintf(EL_STATUS|EL_32X, "32X startup"); // TODO: OOM handling - PicoAHW |= PAHW_32X; + PicoIn.AHW |= PAHW_32X; sh2_init(&msh2, 0, &ssh2); msh2.irq_callback = sh2_irq_cb; sh2_init(&ssh2, 1, &msh2); @@ -136,7 +136,7 @@ void p32x_reset_sh2s(void) if (p32x_bios_m == NULL) { sh2_set_gbr(0, 0x20004000); - if (!(PicoAHW & PAHW_MCD)) { + if (!(PicoIn.AHW & PAHW_MCD)) { unsigned int idl_src, idl_dst, idl_size; // initial data load unsigned int vbr; @@ -200,12 +200,12 @@ void PicoUnload32x(void) sh2_finish(&msh2); sh2_finish(&ssh2); - PicoAHW &= ~PAHW_32X; + PicoIn.AHW &= ~PAHW_32X; } void PicoReset32x(void) { - if (PicoAHW & PAHW_32X) { + if (PicoIn.AHW & PAHW_32X) { p32x_trigger_irq(NULL, SekCyclesDone(), P32XI_VRES); p32x_sh2_poll_event(&msh2, SH2_IDLE_STATES, 0); p32x_sh2_poll_event(&ssh2, SH2_IDLE_STATES, 0); @@ -216,13 +216,13 @@ void PicoReset32x(void) static void p32x_start_blank(void) { - if (Pico32xDrawMode != PDM32X_OFF && !PicoSkipFrame) { + if (Pico32xDrawMode != PDM32X_OFF && !PicoIn.skipFrame) { int offs, lines; pprof_start(draw); offs = 8; lines = 224; - if ((Pico.video.reg[1] & 8) && !(PicoOpt & POPT_ALT_RENDERER)) { + if ((Pico.video.reg[1] & 8) && !(PicoIn.opt & POPT_ALT_RENDERER)) { offs = 0; lines = 240; } @@ -376,7 +376,7 @@ static void run_sh2(SH2 *sh2, int m68k_cycles) elprintf_sh2(sh2, EL_32X, "+run %u %d @%08x", sh2->m68krcycles_done, cycles, sh2->pc); - done = sh2_execute(sh2, cycles, PicoOpt & POPT_EN_DRC); + done = sh2_execute(sh2, cycles, PicoIn.opt & POPT_EN_DRC); sh2->m68krcycles_done += C_SH2_TO_M68K(*sh2, done); sh2->state &= ~SH2_STATE_RUN; @@ -521,13 +521,13 @@ void sync_sh2s_lockstep(unsigned int m68k_target) } #define CPUS_RUN(m68k_cycles) do { \ - if (PicoAHW & PAHW_MCD) \ + if (PicoIn.AHW & PAHW_MCD) \ pcd_run_cpus(m68k_cycles); \ else \ SekRunM68k(m68k_cycles); \ \ if ((Pico32x.emu_flags & P32XF_Z80_32X_IO) && Pico.m.z80Run \ - && !Pico.m.z80_reset && (PicoOpt & POPT_EN_Z80)) \ + && !Pico.m.z80_reset && (PicoIn.opt & POPT_EN_Z80)) \ PicoSyncZ80(SekCyclesDone()); \ if (Pico32x.emu_flags & (P32XF_68KCPOLL|P32XF_68KVPOLL)) \ p32x_sync_sh2s(SekCyclesDone()); \ @@ -550,7 +550,7 @@ void PicoFrame32x(void) p32x_sh2_poll_event(&msh2, SH2_STATE_VPOLL, 0); p32x_sh2_poll_event(&ssh2, SH2_STATE_VPOLL, 0); - if (PicoAHW & PAHW_MCD) + if (PicoIn.AHW & PAHW_MCD) pcd_prepare_frame(); PicoFrameStart(); diff --git a/pico/32x/memory.c b/pico/32x/memory.c index f3b70067..399c8bd1 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -787,7 +787,7 @@ static u32 PicoRead8_32x_on(u32 a) } if ((a & 0xfc00) != 0x5000) { - if (PicoAHW & PAHW_MCD) + if (PicoIn.AHW & PAHW_MCD) return PicoRead8_mcd_io(a); else return PicoRead8_io(a); @@ -831,7 +831,7 @@ static u32 PicoRead16_32x_on(u32 a) } if ((a & 0xfc00) != 0x5000) { - if (PicoAHW & PAHW_MCD) + if (PicoIn.AHW & PAHW_MCD) return PicoRead16_mcd_io(a); else return PicoRead16_io(a); @@ -871,7 +871,7 @@ static void PicoWrite8_32x_on(u32 a, u32 d) } if ((a & 0xfc00) != 0x5000) { - if (PicoAHW & PAHW_MCD) + if (PicoIn.AHW & PAHW_MCD) PicoWrite8_mcd_io(a, d); else PicoWrite8_io(a, d); @@ -909,7 +909,7 @@ static void PicoWrite16_32x_on(u32 a, u32 d) } if ((a & 0xfc00) != 0x5000) { - if (PicoAHW & PAHW_MCD) + if (PicoIn.AHW & PAHW_MCD) PicoWrite16_mcd_io(a, d); else PicoWrite16_io(a, d); @@ -938,15 +938,18 @@ static void PicoWrite16_32x_on(u32 a, u32 d) u32 PicoRead8_32x(u32 a) { u32 d = 0; - if ((a & 0xffc0) == 0x5100) { // a15100 - // regs are always readable - d = ((u8 *)Pico32x.regs)[(a & 0x3f) ^ 1]; - goto out; - } - if ((a & 0xfffc) == 0x30ec) { // a130ec - d = str_mars[a & 3]; - goto out; + if (PicoIn.opt & POPT_EN_32X) { + if ((a & 0xffc0) == 0x5100) { // a15100 + // regs are always readable + d = ((u8 *)Pico32x.regs)[(a & 0x3f) ^ 1]; + goto out; + } + + if ((a & 0xfffc) == 0x30ec) { // a130ec + d = str_mars[a & 3]; + goto out; + } } elprintf(EL_UIO, "m68k unmapped r8 [%06x] @%06x", a, SekPc); @@ -960,14 +963,17 @@ out: u32 PicoRead16_32x(u32 a) { u32 d = 0; - if ((a & 0xffc0) == 0x5100) { // a15100 - d = Pico32x.regs[(a & 0x3f) / 2]; - goto out; - } - if ((a & 0xfffc) == 0x30ec) { // a130ec - d = !(a & 2) ? ('M'<<8)|'A' : ('R'<<8)|'S'; - goto out; + if (PicoIn.opt & POPT_EN_32X) { + if ((a & 0xffc0) == 0x5100) { // a15100 + d = Pico32x.regs[(a & 0x3f) / 2]; + goto out; + } + + if ((a & 0xfffc) == 0x30ec) { // a130ec + d = !(a & 2) ? ('M'<<8)|'A' : ('R'<<8)|'S'; + goto out; + } } elprintf(EL_UIO, "m68k unmapped r16 [%06x] @%06x", a, SekPc); @@ -980,7 +986,8 @@ out: void PicoWrite8_32x(u32 a, u32 d) { - if ((a & 0xffc0) == 0x5100) { // a15100 + if ((PicoIn.opt & POPT_EN_32X) && (a & 0xffc0) == 0x5100) // a15100 + { u16 *r = Pico32x.regs; elprintf(EL_32X, "m68k 32x w8 [%06x] %02x @%06x", a, d & 0xff, SekPc); @@ -1008,7 +1015,8 @@ void PicoWrite8_32x(u32 a, u32 d) void PicoWrite16_32x(u32 a, u32 d) { - if ((a & 0xffc0) == 0x5100) { // a15100 + if ((PicoIn.opt & POPT_EN_32X) && (a & 0xffc0) == 0x5100) // a15100 + { u16 *r = Pico32x.regs; elprintf(EL_UIO, "m68k 32x w16 [%06x] %04x @%06x", a, d & 0xffff, SekPc); diff --git a/pico/cart.c b/pico/cart.c index f17ce5e7..3148f36d 100644 --- a/pico/cart.c +++ b/pico/cart.c @@ -576,9 +576,9 @@ int PicoCartLoad(pm_file *f,unsigned char **prom,unsigned int *psize,int is_sms) if (!is_sms) { // maybe we are loading MegaCD BIOS? - if (!(PicoAHW & PAHW_MCD) && size == 0x20000 && (!strncmp((char *)rom+0x124, "BOOT", 4) || + if (!(PicoIn.AHW & PAHW_MCD) && size == 0x20000 && (!strncmp((char *)rom+0x124, "BOOT", 4) || !strncmp((char *)rom+0x128, "BOOT", 4))) { - PicoAHW |= PAHW_MCD; + PicoIn.AHW |= PAHW_MCD; } // Check for SMD: @@ -628,7 +628,7 @@ int PicoCartInsert(unsigned char *rom, unsigned int romsize, const char *carthw_ } pdb_cleanup(); - PicoAHW &= PAHW_MCD|PAHW_SMS; + PicoIn.AHW &= PAHW_MCD|PAHW_SMS; PicoCartMemSetup = NULL; PicoDmaHook = NULL; @@ -637,13 +637,13 @@ int PicoCartInsert(unsigned char *rom, unsigned int romsize, const char *carthw_ PicoLoadStateHook = NULL; carthw_chunks = NULL; - if (!(PicoAHW & (PAHW_MCD|PAHW_SMS))) + if (!(PicoIn.AHW & (PAHW_MCD|PAHW_SMS))) PicoCartDetect(carthw_cfg); // setup correct memory map for loaded ROM - switch (PicoAHW) { + switch (PicoIn.AHW) { default: - elprintf(EL_STATUS|EL_ANOMALY, "starting in unknown hw configuration: %x", PicoAHW); + elprintf(EL_STATUS|EL_ANOMALY, "starting in unknown hw configuration: %x", PicoIn.AHW); case 0: case PAHW_SVP: PicoMemSetup(); break; case PAHW_MCD: PicoMemSetupCD(); break; @@ -654,7 +654,7 @@ int PicoCartInsert(unsigned char *rom, unsigned int romsize, const char *carthw_ if (PicoCartMemSetup != NULL) PicoCartMemSetup(); - if (PicoAHW & PAHW_SMS) + if (PicoIn.AHW & PAHW_SMS) PicoPowerMS(); else PicoPower(); @@ -681,7 +681,7 @@ void PicoCartUnload(void) PicoCartUnloadHook = NULL; } - if (PicoAHW & PAHW_32X) + if (PicoIn.AHW & PAHW_32X) PicoUnload32x(); if (Pico.rom != NULL) { @@ -965,7 +965,7 @@ static void parse_carthw(const char *carthw_cfg, int *fill_sram) else if (strcmp(p, "filled_sram") == 0) *fill_sram = 1; else if (strcmp(p, "force_6btn") == 0) - PicoQuirks |= PQUIRK_FORCE_6BTN; + PicoIn.quirks |= PQUIRK_FORCE_6BTN; else { elprintf(EL_STATUS, "carthw:%d: unsupported prop: %s", line, p); goto bad_nomsg; diff --git a/pico/carthw/svp/svp.c b/pico/carthw/svp/svp.c index 8861de04..c50eb850 100644 --- a/pico/carthw/svp/svp.c +++ b/pico/carthw/svp/svp.c @@ -30,8 +30,9 @@ #include #include "compiler.h" +#define SVP_CYCLES_LINE 850 + svp_t *svp = NULL; -int PicoSVPCycles = 850; // cycles/line, just a guess static int svp_dyn_ready = 0; /* save state stuff */ @@ -57,7 +58,7 @@ static void PicoSVPReset(void) memcpy(svp->iram_rom + 0x800, Pico.rom + 0x800, 0x20000 - 0x800); ssp1601_reset(&svp->ssp1601); #ifdef _SVP_DRC - if ((PicoOpt & POPT_EN_DRC) && svp_dyn_ready) + if ((PicoIn.opt & POPT_EN_DRC) && svp_dyn_ready) ssp1601_dyn_reset(&svp->ssp1601); #endif } @@ -77,17 +78,17 @@ static void PicoSVPLine(void) #endif #ifdef _SVP_DRC - if ((PicoOpt & POPT_EN_DRC) && svp_dyn_ready) - ssp1601_dyn_run(PicoSVPCycles * count); + if ((PicoIn.opt & POPT_EN_DRC) && svp_dyn_ready) + ssp1601_dyn_run(SVP_CYCLES_LINE * count); else #endif { - ssp1601_run(PicoSVPCycles * count); + ssp1601_run(SVP_CYCLES_LINE * count); svp_dyn_ready = 0; // just in case } // test mode - //if (Pico.m.frame_count == 13) PicoPad[0] |= 0xff; + //if (Pico.m.frame_count == 13) PicoIn.pad[0] |= 0xff; } @@ -148,7 +149,7 @@ void PicoSVPStartup(void) // init SVP compiler svp_dyn_ready = 0; #ifdef _SVP_DRC - if (PicoOpt & POPT_EN_DRC) { + if (PicoIn.opt & POPT_EN_DRC) { if (ssp1601_dyn_startup()) return; svp_dyn_ready = 1; @@ -167,6 +168,6 @@ void PicoSVPStartup(void) svp_states[1].ptr = svp->dram; svp_states[2].ptr = &svp->ssp1601; carthw_chunks = svp_states; - PicoAHW |= PAHW_SVP; + PicoIn.AHW |= PAHW_SVP; } diff --git a/pico/cd/gfx.c b/pico/cd/gfx.c index a2c97be0..e3434e11 100644 --- a/pico/cd/gfx.c +++ b/pico/cd/gfx.c @@ -434,7 +434,7 @@ void gfx_update(unsigned int cycles) pcd_event_schedule(cycles, PCD_EVENT_GFX, 5 * w * lines); } - if (PicoOpt & POPT_EN_MCD_GFX) + if (PicoIn.opt & POPT_EN_MCD_GFX) { /* render lines */ while (lines--) diff --git a/pico/cd/mcd.c b/pico/cd/mcd.c index 72488343..51d9d403 100644 --- a/pico/cd/mcd.c +++ b/pico/cd/mcd.c @@ -79,7 +79,7 @@ PICO_INTERNAL int PicoResetMCD(void) // reset button doesn't affect MCD hardware // use Pico.sv.data for RAM cart - if (PicoOpt & POPT_EN_MCD_RAMCART) { + if (PicoIn.opt & POPT_EN_MCD_RAMCART) { if (Pico.sv.data == NULL) Pico.sv.data = calloc(1, 0x12000); } diff --git a/pico/cd/memory.c b/pico/cd/memory.c index 6890b576..22694389 100644 --- a/pico/cd/memory.c +++ b/pico/cd/memory.c @@ -1099,7 +1099,7 @@ PICO_INTERNAL void PicoMemSetupCD(void) // main68k map (BIOS mapped by PicoMemSetup()): // RAM cart - if (PicoOpt & POPT_EN_MCD_RAMCART) { + if (PicoIn.opt & POPT_EN_MCD_RAMCART) { cpu68k_map_set(m68k_read8_map, 0x400000, 0x7fffff, PicoReadM68k8_ramc, 1); cpu68k_map_set(m68k_read16_map, 0x400000, 0x7fffff, PicoReadM68k16_ramc, 1); cpu68k_map_set(m68k_write8_map, 0x400000, 0x7fffff, PicoWriteM68k8_ramc, 1); diff --git a/pico/cd/pcm.c b/pico/cd/pcm.c index 77bfa300..27fb2ac9 100644 --- a/pico/cd/pcm.c +++ b/pico/cd/pcm.c @@ -127,7 +127,7 @@ void pcd_pcm_update(int *buf32, int length, int stereo) pcd_pcm_sync(SekCyclesDoneS68k()); - if (!Pico_mcd->pcm_mixbuf_dirty || !(PicoOpt & POPT_EN_MCD_PCM)) + if (!Pico_mcd->pcm_mixbuf_dirty || !(PicoIn.opt & POPT_EN_MCD_PCM)) goto out; step = (Pico_mcd->pcm_mixpos << 16) / length; diff --git a/pico/debug.c b/pico/debug.c index 82ae8a55..d4fb575a 100644 --- a/pico/debug.c +++ b/pico/debug.c @@ -201,7 +201,7 @@ void PDebugShowPalette(unsigned short *screen, int stride) int x, y; Pico.m.dirtyPal = 1; - if (PicoAHW & PAHW_SMS) + if (PicoIn.AHW & PAHW_SMS) PicoDoHighPal555M4(); else PicoDoHighPal555(1, 0, est); @@ -328,7 +328,7 @@ void PDebugDumpMem(void) dump_ram_noswab(PicoMem.zram, "dumps/zram.bin"); dump_ram(PicoMem.cram, "dumps/cram.bin"); - if (PicoAHW & PAHW_SMS) + if (PicoIn.AHW & PAHW_SMS) { dump_ram_noswab(PicoMem.vramb, "dumps/vram.bin"); } @@ -339,7 +339,7 @@ void PDebugDumpMem(void) dump_ram(PicoMem.vsram,"dumps/vsram.bin"); } - if (PicoAHW & PAHW_MCD) + if (PicoIn.AHW & PAHW_MCD) { dump_ram(Pico_mcd->prg_ram, "dumps/prg_ram.bin"); if (Pico_mcd->s68k_regs[3]&4) // 1M mode? @@ -355,7 +355,7 @@ void PDebugDumpMem(void) } #ifndef NO_32X - if (PicoAHW & PAHW_32X) + if (PicoIn.AHW & PAHW_32X) { dump_ram(Pico32xMem->sdram, "dumps/sdram.bin"); dump_ram(Pico32xMem->dram[0], "dumps/dram0.bin"); @@ -371,7 +371,7 @@ void PDebugZ80Frame(void) { int lines, line_sample; - if (PicoAHW & PAHW_SMS) + if (PicoIn.AHW & PAHW_SMS) return; if (Pico.m.pal) { @@ -385,12 +385,12 @@ void PDebugZ80Frame(void) z80_resetCycles(); PsndStartFrame(); - if (/*Pico.m.z80Run &&*/ !Pico.m.z80_reset && (PicoOpt&POPT_EN_Z80)) + if (/*Pico.m.z80Run &&*/ !Pico.m.z80_reset && (PicoIn.opt&POPT_EN_Z80)) PicoSyncZ80(Pico.t.m68c_cnt + line_sample * 488); if (PsndOut) PsndGetSamples(line_sample); - if (/*Pico.m.z80Run &&*/ !Pico.m.z80_reset && (PicoOpt&POPT_EN_Z80)) { + if (/*Pico.m.z80Run &&*/ !Pico.m.z80_reset && (PicoIn.opt&POPT_EN_Z80)) { PicoSyncZ80(Pico.t.m68c_cnt + 224 * 488); z80_int(); } @@ -398,7 +398,7 @@ void PDebugZ80Frame(void) PsndGetSamples(224); // sync z80 - if (/*Pico.m.z80Run &&*/ !Pico.m.z80_reset && (PicoOpt&POPT_EN_Z80)) { + if (/*Pico.m.z80Run &&*/ !Pico.m.z80_reset && (PicoIn.opt&POPT_EN_Z80)) { Pico.t.m68c_cnt += Pico.m.pal ? 151809 : 127671; // cycles adjusted for converter PicoSyncZ80(Pico.t.m68c_cnt); } @@ -412,7 +412,7 @@ void PDebugZ80Frame(void) void PDebugCPUStep(void) { - if (PicoAHW & PAHW_SMS) + if (PicoIn.AHW & PAHW_SMS) z80_run_nr(1); else SekStepM68k(); diff --git a/pico/draw.c b/pico/draw.c index 83010a49..0939f3fe 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -1027,7 +1027,7 @@ static NOINLINE void PrepareSprites(int full) if (!(Pico.video.reg[12]&1)) max_sprites = 64, max_line_sprites = 16, max_width = 264; - if (PicoOpt & POPT_DIS_SPRITE_LIM) + if (PicoIn.opt & POPT_DIS_SPRITE_LIM) max_line_sprites = MAX_LINE_SPRITES; if (pvid->reg[1]&8) max_lines = 240; @@ -1267,7 +1267,7 @@ void FinalizeLine555(int sh, int line, struct PicoEState *est) if (Pico.video.reg[12]&1) { len = 320; } else { - if (!(PicoOpt&POPT_DIS_32C_BORDER)) pd+=32; + if (!(PicoIn.opt&POPT_DIS_32C_BORDER)) pd+=32; len = 256; } @@ -1314,7 +1314,7 @@ static void FinalizeLine8bit(int sh, int line, struct PicoEState *est) if (Pico.video.reg[12]&1) { len = 320; } else { - if (!(PicoOpt & POPT_DIS_32C_BORDER)) + if (!(PicoIn.opt & POPT_DIS_32C_BORDER)) pd += 32; len = 256; } @@ -1427,7 +1427,7 @@ static int DrawDisplay(int sh) else if (est->rendstatus & PDRAW_INTERLACE) DrawAllSpritesInterlace(1, sh); // have sprites without layer pri bit ontop of sprites with that bit - else if ((sprited[1] & 0xd0) == 0xd0 && (PicoOpt & POPT_ACC_SPRITES)) + else if ((sprited[1] & 0xd0) == 0xd0 && (PicoIn.opt & POPT_ACC_SPRITES)) DrawSpritesHiAS(sprited, sh); else if (sh && (sprited[1] & SPRL_MAY_HAVE_OP)) DrawSpritesSHi(sprited, est); @@ -1481,7 +1481,7 @@ PICO_INTERNAL void PicoFrameStart(void) Pico.est.DrawScanline = 0; skip_next_line = 0; - if (PicoOpt & POPT_ALT_RENDERER) + if (PicoIn.opt & POPT_ALT_RENDERER) return; if (Pico.m.dirtyPal) @@ -1577,7 +1577,7 @@ void PicoDrawUpdateHighPal(void) { struct PicoEState *est = &Pico.est; int sh = (Pico.video.reg[0xC] & 8) >> 3; // shadow/hilight? - if (PicoOpt & POPT_ALT_RENDERER) + if (PicoIn.opt & POPT_ALT_RENDERER) sh = 0; // no s/h support PicoDoHighPal555(sh, 0, &Pico.est); @@ -1597,7 +1597,7 @@ void PicoDrawSetOutFormat(pdso_t which, int use_32x_line_mode) break; case PDF_RGB555: - if ((PicoAHW & PAHW_32X) && use_32x_line_mode) + if ((PicoIn.AHW & PAHW_32X) && use_32x_line_mode) FinalizeLine = FinalizeLine32xRGB555; else FinalizeLine = FinalizeLine555; @@ -1640,7 +1640,7 @@ void PicoDrawSetCallbacks(int (*begin)(unsigned int num), int (*end)(unsigned in PicoScan32xBegin = NULL; PicoScan32xEnd = NULL; - if ((PicoAHW & PAHW_32X) && FinalizeLine != FinalizeLine32xRGB555) { + if ((PicoIn.AHW & PAHW_32X) && FinalizeLine != FinalizeLine32xRGB555) { PicoScan32xBegin = begin; PicoScan32xEnd = end; } diff --git a/pico/media.c b/pico/media.c index b7fa7f56..f9b053bd 100644 --- a/pico/media.c +++ b/pico/media.c @@ -209,11 +209,11 @@ enum media_type_e PicoLoadMedia(const char *filename, if (media_type == PM_BAD_DETECT) goto out; - if ((PicoAHW & PAHW_MCD) && Pico_mcd != NULL) + if ((PicoIn.AHW & PAHW_MCD) && Pico_mcd != NULL) cdd_unload(); PicoCartUnload(); - PicoAHW = 0; - PicoQuirks = 0; + PicoIn.AHW = 0; + PicoIn.quirks = 0; if (media_type == PM_CD) { @@ -230,7 +230,7 @@ enum media_type_e PicoLoadMedia(const char *filename, goto out; } - PicoAHW |= PAHW_MCD; + PicoIn.AHW |= PAHW_MCD; } else { media_type = PM_BAD_CD; @@ -239,7 +239,7 @@ enum media_type_e PicoLoadMedia(const char *filename, } else if (media_type == PM_MARK3) { lprintf("detected SMS ROM\n"); - PicoAHW = PAHW_SMS; + PicoIn.AHW = PAHW_SMS; } rom = pm_open(rom_fname); @@ -249,7 +249,7 @@ enum media_type_e PicoLoadMedia(const char *filename, goto out; } - ret = PicoCartLoad(rom, &rom_data, &rom_size, (PicoAHW & PAHW_SMS) ? 1 : 0); + ret = PicoCartLoad(rom, &rom_data, &rom_size, (PicoIn.AHW & PAHW_SMS) ? 1 : 0); pm_close(rom); if (ret != 0) { if (ret == 2) lprintf("Out of memory\n"); @@ -266,7 +266,7 @@ enum media_type_e PicoLoadMedia(const char *filename, goto out; } - if (!(PicoAHW & PAHW_SMS)) { + if (!(PicoIn.AHW & PAHW_SMS)) { unsigned short *d = (unsigned short *)(rom_data + 4); if ((((d[0] << 16) | d[1]) & 0xffffff) >= (int)rom_size) { lprintf("bad reset vector\n"); @@ -276,7 +276,7 @@ enum media_type_e PicoLoadMedia(const char *filename, } // load config for this ROM (do this before insert to get correct region) - if (!(PicoAHW & PAHW_MCD)) { + if (!(PicoIn.AHW & PAHW_MCD)) { memcpy(media_id_header, rom_data + 0x100, sizeof(media_id_header)); if (do_region_override != NULL) do_region_override(filename); @@ -300,7 +300,7 @@ enum media_type_e PicoLoadMedia(const char *filename, Pico.m.ncart_in = 1; } - if (PicoQuirks & PQUIRK_FORCE_6BTN) + if (PicoIn.quirks & PQUIRK_FORCE_6BTN) PicoSetInputDevice(0, PICO_INPUT_PAD_6BTN); out: diff --git a/pico/memory.c b/pico/memory.c index e747552f..b49956e3 100644 --- a/pico/memory.c +++ b/pico/memory.c @@ -210,7 +210,7 @@ void cyclone_crashed(u32 pc, struct Cyclone *context) static u32 read_pad_3btn(int i, u32 out_bits) { - u32 pad = ~PicoPadInt[i]; // Get inverse of pad MXYZ SACB RLDU + u32 pad = ~PicoIn.padInt[i]; // Get inverse of pad MXYZ SACB RLDU u32 value; if (out_bits & 0x40) // TH @@ -224,7 +224,7 @@ static u32 read_pad_3btn(int i, u32 out_bits) static u32 read_pad_6btn(int i, u32 out_bits) { - u32 pad = ~PicoPadInt[i]; // Get inverse of pad MXYZ SACB RLDU + u32 pad = ~PicoIn.padInt[i]; // Get inverse of pad MXYZ SACB RLDU int phase = Pico.m.padTHPhase[i]; u32 value; @@ -349,7 +349,7 @@ void NOINLINE ctl_write_z80busreq(u32 d) } else { - if ((PicoOpt&POPT_EN_Z80) && !Pico.m.z80_reset) { + if ((PicoIn.opt & POPT_EN_Z80) && !Pico.m.z80_reset) { pprof_start(m68k); PicoSyncZ80(SekCyclesDone()); pprof_end_sub(m68k); @@ -367,7 +367,7 @@ void NOINLINE ctl_write_z80reset(u32 d) { if (d) { - if ((PicoOpt&POPT_EN_Z80) && Pico.m.z80Run) { + if ((PicoIn.opt & POPT_EN_Z80) && Pico.m.z80Run) { pprof_start(m68k); PicoSyncZ80(SekCyclesDone()); pprof_end_sub(m68k); @@ -542,8 +542,8 @@ static void PicoWrite8_z80(u32 a, u32 d) return; } if ((a & 0x6000) == 0x4000) { // FM Sound - if (PicoOpt & POPT_EN_FM) - emustatus |= ym2612_write_local(a&3, d&0xff, 0)&1; + if (PicoIn.opt & POPT_EN_FM) + Pico.m.status |= ym2612_write_local(a & 3, d & 0xff, 0) & 1; return; } // TODO: probably other VDP access too? Maybe more mirrors? @@ -597,12 +597,8 @@ u32 PicoRead8_io(u32 a) goto end; } - if (PicoOpt & POPT_EN_32X) { - d = PicoRead8_32x(a); - goto end; - } + d = PicoRead8_32x(a); - d = m68k_unmapped_read8(a); end: return d; } @@ -632,12 +628,8 @@ u32 PicoRead16_io(u32 a) goto end; } - if (PicoOpt & POPT_EN_32X) { - d = PicoRead16_32x(a); - goto end; - } + d = PicoRead16_32x(a); - d = m68k_unmapped_read16(a); end: return d; } @@ -662,12 +654,7 @@ void PicoWrite8_io(u32 a, u32 d) Pico.m.sram_reg |= (u8)(d & 3); return; } - if (PicoOpt & POPT_EN_32X) { - PicoWrite8_32x(a, d); - return; - } - - m68k_unmapped_write8(a, d); + PicoWrite8_32x(a, d); } void PicoWrite16_io(u32 a, u32 d) @@ -690,11 +677,7 @@ void PicoWrite16_io(u32 a, u32 d) Pico.m.sram_reg |= (u8)(d & 3); return; } - if (PicoOpt & POPT_EN_32X) { - PicoWrite16_32x(a, d); - return; - } - m68k_unmapped_write16(a, d); + PicoWrite16_32x(a, d); } #endif // _ASM_MEMORY_C @@ -971,7 +954,7 @@ static int ym2612_write_local(u32 a, u32 d, int is_from_z80) ym2612.OPN.ST.address = d; ym2612.addr_A1 = 0; #ifdef __GP2X__ - if (PicoOpt & POPT_EXT_FM) YM2612Write_940(a, d, -1); + if (PicoIn.opt & POPT_EXT_FM) YM2612Write_940(a, d, -1); #endif return 0; @@ -1036,7 +1019,7 @@ static int ym2612_write_local(u32 a, u32 d, int is_from_z80) if ((d ^ old_mode) & 0xc0) { #ifdef __GP2X__ - if (PicoOpt & POPT_EXT_FM) return YM2612Write_940(a, d, get_scanline(is_from_z80)); + if (PicoIn.opt & POPT_EXT_FM) return YM2612Write_940(a, d, get_scanline(is_from_z80)); #endif return 1; } @@ -1049,7 +1032,7 @@ static int ym2612_write_local(u32 a, u32 d, int is_from_z80) PsndDacLine = scanline; } #ifdef __GP2X__ - if (PicoOpt & POPT_EXT_FM) YM2612Write_940(a, d, scanline); + if (PicoIn.opt & POPT_EXT_FM) YM2612Write_940(a, d, scanline); #endif return 0; } @@ -1060,7 +1043,7 @@ static int ym2612_write_local(u32 a, u32 d, int is_from_z80) ym2612.OPN.ST.address = d; ym2612.addr_A1 = 1; #ifdef __GP2X__ - if (PicoOpt & POPT_EXT_FM) YM2612Write_940(a, d, -1); + if (PicoIn.opt & POPT_EXT_FM) YM2612Write_940(a, d, -1); #endif return 0; @@ -1074,7 +1057,7 @@ static int ym2612_write_local(u32 a, u32 d, int is_from_z80) } #ifdef __GP2X__ - if (PicoOpt & POPT_EXT_FM) + if (PicoIn.opt & POPT_EXT_FM) return YM2612Write_940(a, d, get_scanline(is_from_z80)); #endif return YM2612Write_(a, d); @@ -1123,7 +1106,7 @@ void ym2612_pack_state(void) elprintf(EL_YMTIMER, "save: timer b %i/%i", tbt >> 16, tbc); #ifdef __GP2X__ - if (PicoOpt & POPT_EXT_FM) + if (PicoIn.opt & POPT_EXT_FM) YM2612PicoStateSave2_940(tat, tbt); else #endif @@ -1158,7 +1141,7 @@ void ym2612_unpack_state(void) } #ifdef __GP2X__ - if (PicoOpt & POPT_EXT_FM) + if (PicoIn.opt & POPT_EXT_FM) ret = YM2612PicoStateLoad2_940(&tat, &tbt); else #endif @@ -1233,8 +1216,8 @@ static unsigned char z80_md_bank_read(unsigned short a) static void z80_md_ym2612_write(unsigned int a, unsigned char data) { - if (PicoOpt & POPT_EN_FM) - emustatus |= ym2612_write_local(a, data, 1) & 1; + if (PicoIn.opt & POPT_EN_FM) + Pico.m.status |= ym2612_write_local(a, data, 1) & 1; } static void z80_md_vdp_br_write(unsigned int a, unsigned char data) diff --git a/pico/memory_arm.S b/pico/memory_arm.S index 87846d6c..bfe8ca10 100644 --- a/pico/memory_arm.S +++ b/pico/memory_arm.S @@ -70,7 +70,7 @@ PicoRead8_io: @ u32 a m_read8_not_io: and r2, r0, #0xfc00 cmp r2, #0x1000 - bne m_read8_not_brq + bne PicoRead8_32x ldr r3, =Pico mov r1, r0 @@ -92,14 +92,6 @@ m_read8_not_io: orr r0, r0, r2 bx lr -m_read8_not_brq: - ldr r2, =PicoOpt - ldr r2, [r2] - tst r2, #POPT_EN_32X - bne PicoRead8_32x - mov r0, #0 - bx lr - @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ PicoRead16_sram: @ u32 a, u32 d @@ -146,7 +138,7 @@ PicoRead16_io: @ u32 a, u32 d m_read16_not_io: and r2, r0, #0xfc00 cmp r2, #0x1000 - bne m_read16_not_brq + bne PicoRead16_32x ldr r3, =Pico and r2, r0, #0xff00 @@ -165,14 +157,6 @@ m_read16_not_io: orr r0, r0, r2, lsl #8 bx lr -m_read16_not_brq: - ldr r2, =PicoOpt - ldr r2, [r2] - tst r2, #POPT_EN_32X - bne PicoRead16_32x - mov r0, #0 - bx lr - @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ PicoWrite8_io: @ u32 a, u32 d @@ -197,7 +181,7 @@ m_write8_not_z80ctl: eor r2, r0, #0xa10000 eor r2, r2, #0x003000 eors r2, r2, #0x0000f1 - bne m_write8_not_sreg + bne PicoWrite8_32x ldr r3, =Pico ldrb r2, [r3, #OFS_Pico_m_sram_reg] and r1, r1, #(SRR_MAPPED|SRR_READONLY) @@ -206,13 +190,6 @@ m_write8_not_z80ctl: strb r2, [r3, #OFS_Pico_m_sram_reg] bx lr -m_write8_not_sreg: - ldr r2, =PicoOpt - ldr r2, [r2] - tst r2, #POPT_EN_32X - bne PicoWrite8_32x - bx lr - @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ PicoWrite16_io: @ u32 a, u32 d @@ -234,7 +211,7 @@ m_write16_not_z80ctl: eor r2, r0, #0xa10000 eor r2, r2, #0x003000 eors r2, r2, #0x0000f0 - bne m_write16_not_sreg + bne PicoWrite16_32x ldr r3, =Pico ldrb r2, [r3, #OFS_Pico_m_sram_reg] and r1, r1, #(SRR_MAPPED|SRR_READONLY) @@ -243,13 +220,6 @@ m_write16_not_z80ctl: strb r2, [r3, #OFS_Pico_m_sram_reg] bx lr -m_write16_not_sreg: - ldr r2, =PicoOpt - ldr r2, [r2] - tst r2, #POPT_EN_32X - bne PicoWrite16_32x - bx lr - .pool @ vim:filetype=armasm diff --git a/pico/mode4.c b/pico/mode4.c index 37f752bb..8c063857 100644 --- a/pico/mode4.c +++ b/pico/mode4.c @@ -284,7 +284,7 @@ static void FinalizeLine8bitM4(int line) { unsigned char *pd = Pico.est.DrawLineDest; - if (!(PicoOpt & POPT_DIS_32C_BORDER)) + if (!(PicoIn.opt & POPT_DIS_32C_BORDER)) pd += 32; memcpy(pd, Pico.est.HighCol + 8, 256); diff --git a/pico/patch.c b/pico/patch.c index d534a2c9..c0c5b00f 100644 --- a/pico/patch.c +++ b/pico/patch.c @@ -290,7 +290,7 @@ void decode(const char* code, struct patch* result) /* Initialize the result */ result->addr = result->data = result->comp = 0; - if(!(PicoAHW & PAHW_SMS)) + if(!(PicoIn.AHW & PAHW_SMS)) { //If Genesis @@ -445,7 +445,7 @@ void PicoPatchPrepare(void) PicoPatches[i].data_old = *(unsigned short *)(Pico.rom + addr); else { - if(!(PicoAHW & PAHW_SMS)) + if(!(PicoIn.AHW & PAHW_SMS)) PicoPatches[i].data_old = (unsigned short) m68k_read16(addr); else ;// wrong: PicoPatches[i].data_old = (unsigned char) PicoRead8_z80(addr); @@ -468,7 +468,7 @@ void PicoPatchApply(void) { if (PicoPatches[i].active) { - if (!(PicoAHW & PAHW_SMS)) + if (!(PicoIn.AHW & PAHW_SMS)) *(unsigned short *)(Pico.rom + addr) = PicoPatches[i].data; else if (!PicoPatches[i].comp || PicoPatches[i].comp == *(char *)(Pico.rom + addr)) *(char *)(Pico.rom + addr) = (char) PicoPatches[i].data; @@ -480,7 +480,7 @@ void PicoPatchApply(void) if (PicoPatches[u].addr == addr) break; if (u == i) { - if (!(PicoAHW & PAHW_SMS)) + if (!(PicoIn.AHW & PAHW_SMS)) *(unsigned short *)(Pico.rom + addr) = PicoPatches[i].data_old; else *(char *)(Pico.rom + addr) = (char) PicoPatches[i].data_old; @@ -493,7 +493,7 @@ void PicoPatchApply(void) { if (PicoPatches[i].active) { - if (!(PicoAHW & PAHW_SMS)) + if (!(PicoIn.AHW & PAHW_SMS)) m68k_write16(addr,PicoPatches[i].data); else ;// wrong: PicoWrite8_z80(addr,PicoPatches[i].data); @@ -505,7 +505,7 @@ void PicoPatchApply(void) if (PicoPatches[u].addr == addr) break; if (u == i) { - if (!(PicoAHW & PAHW_SMS)) + if (!(PicoIn.AHW & PAHW_SMS)) m68k_write16(PicoPatches[i].addr,PicoPatches[i].data_old); else ;// wrong: PicoWrite8_z80(PicoPatches[i].addr,PicoPatches[i].data_old); diff --git a/pico/pico.c b/pico/pico.c index bf9df268..2e561c61 100644 --- a/pico/pico.c +++ b/pico/pico.c @@ -12,16 +12,7 @@ struct Pico Pico; struct PicoMem PicoMem; -int PicoOpt; -int PicoSkipFrame; // skip rendering frame? -int PicoPad[2]; // Joypads, format is MXYZ SACB RLDU -int PicoPadInt[2]; // internal copy -int PicoAHW; // active addon hardware: PAHW_* -int PicoQuirks; // game-specific quirks -int PicoRegionOverride; // override the region detection 0: Auto, 1: Japan NTSC, 2: Japan PAL, 4: US, 8: Europe -int PicoAutoRgnOrder; - -int emustatus; // rapid_ym2612, multi_ym_updates +PicoInterface PicoIn; void (*PicoWriteSound)(int len) = NULL; // called at the best time to send sound buffer (PsndOut) to hardware void (*PicoResetHook)(void) = NULL; @@ -33,13 +24,13 @@ void PicoInit(void) // Blank space for state: memset(&Pico,0,sizeof(Pico)); memset(&PicoMem,0,sizeof(PicoMem)); - memset(&PicoPad,0,sizeof(PicoPad)); - memset(&PicoPadInt,0,sizeof(PicoPadInt)); + memset(&PicoIn.pad,0,sizeof(PicoIn.pad)); + memset(&PicoIn.padInt,0,sizeof(PicoIn.padInt)); Pico.est.Pico = &Pico; Pico.est.PicoMem_vram = PicoMem.vram; Pico.est.PicoMem_cram = PicoMem.cram; - Pico.est.PicoOpt = &PicoOpt; + Pico.est.PicoOpt = &PicoIn.opt; // Init CPUs: SekInit(); @@ -56,7 +47,7 @@ void PicoInit(void) // to be called once on emu exit void PicoExit(void) { - if (PicoAHW & PAHW_MCD) + if (PicoIn.AHW & PAHW_MCD) PicoExitMCD(); PicoCartUnload(); z80_exit(); @@ -89,10 +80,10 @@ void PicoPower(void) Pico.video.reg[0xc] = 0x81; Pico.video.reg[0xf] = 0x02; - if (PicoAHW & PAHW_MCD) + if (PicoIn.AHW & PAHW_MCD) PicoPowerMCD(); - if (PicoOpt & POPT_EN_32X) + if (PicoIn.opt & POPT_EN_32X) PicoPower32x(); PicoReset(); @@ -103,9 +94,9 @@ PICO_INTERNAL void PicoDetectRegion(void) int support=0, hw=0, i; unsigned char pal=0; - if (PicoRegionOverride) + if (PicoIn.regionOverride) { - support = PicoRegionOverride; + support = PicoIn.regionOverride; } else { @@ -138,10 +129,10 @@ PICO_INTERNAL void PicoDetectRegion(void) } // auto detection order override - if (PicoAutoRgnOrder) { - if (((PicoAutoRgnOrder>>0)&0xf) & support) support = (PicoAutoRgnOrder>>0)&0xf; - else if (((PicoAutoRgnOrder>>4)&0xf) & support) support = (PicoAutoRgnOrder>>4)&0xf; - else if (((PicoAutoRgnOrder>>8)&0xf) & support) support = (PicoAutoRgnOrder>>8)&0xf; + if (PicoIn.autoRgnOrder) { + if (((PicoIn.autoRgnOrder>>0)&0xf) & support) support = (PicoIn.autoRgnOrder>>0)&0xf; + else if (((PicoIn.autoRgnOrder>>4)&0xf) & support) support = (PicoIn.autoRgnOrder>>4)&0xf; + else if (((PicoIn.autoRgnOrder>>8)&0xf) & support) support = (PicoIn.autoRgnOrder>>8)&0xf; } // Try to pick the best hardware value for English/50hz: @@ -161,17 +152,16 @@ int PicoReset(void) return 1; #if defined(CPU_CMP_R) || defined(CPU_CMP_W) || defined(DRC_CMP) - PicoOpt |= POPT_DIS_VDP_FIFO|POPT_DIS_IDLE_DET; + PicoIn.opt |= POPT_DIS_VDP_FIFO|POPT_DIS_IDLE_DET; #endif /* must call now, so that banking is reset, and correct vectors get fetched */ if (PicoResetHook) PicoResetHook(); - memset(&PicoPadInt,0,sizeof(PicoPadInt)); - emustatus = 0; + memset(&PicoIn.padInt, 0, sizeof(PicoIn.padInt)); - if (PicoAHW & PAHW_SMS) { + if (PicoIn.AHW & PAHW_SMS) { PicoResetMS(); return 0; } @@ -180,7 +170,7 @@ int PicoReset(void) // ..but do not reset SekCycle* to not desync with addons // s68k doesn't have the TAS quirk, so we just globally set normal TAS handler in MCD mode (used by Batman games). - SekSetRealTAS(PicoAHW & PAHW_MCD); + SekSetRealTAS(PicoIn.AHW & PAHW_MCD); Pico.m.dirtyPal = 1; @@ -193,21 +183,21 @@ int PicoReset(void) PsndReset(); // pal must be known here // create an empty "dma" to cause 68k exec start at random frame location - if (Pico.m.dma_xfers == 0 && !(PicoOpt & POPT_DIS_VDP_FIFO)) + if (Pico.m.dma_xfers == 0 && !(PicoIn.opt & POPT_DIS_VDP_FIFO)) Pico.m.dma_xfers = rand() & 0x1fff; SekFinishIdleDet(); - if (PicoAHW & PAHW_MCD) { + if (PicoIn.AHW & PAHW_MCD) { PicoResetMCD(); return 0; } // reinit, so that checksum checks pass - if (!(PicoOpt & POPT_DIS_IDLE_DET)) + if (!(PicoIn.opt & POPT_DIS_IDLE_DET)) SekInitIdleDet(); - if (PicoOpt & POPT_EN_32X) + if (PicoIn.opt & POPT_EN_32X) PicoReset32x(); // reset sram state; enable sram access by default if it doesn't overlap with ROM @@ -225,9 +215,9 @@ int PicoReset(void) // flush config changes before emu loop starts void PicoLoopPrepare(void) { - if (PicoRegionOverride) + if (PicoIn.regionOverride) // force setting possibly changed.. - Pico.m.pal = (PicoRegionOverride == 2 || PicoRegionOverride == 8) ? 1 : 0; + Pico.m.pal = (PicoIn.regionOverride == 2 || PicoIn.regionOverride == 8) ? 1 : 0; Pico.m.dirtyPal = 1; rendstatus_old = -1; @@ -310,17 +300,17 @@ void PicoFrame(void) Pico.m.frame_count++; - if (PicoAHW & PAHW_SMS) { + if (PicoIn.AHW & PAHW_SMS) { PicoFrameMS(); goto end; } - if (PicoAHW & PAHW_32X) { + if (PicoIn.AHW & PAHW_32X) { PicoFrame32x(); // also does MCD+32X goto end; } - if (PicoAHW & PAHW_MCD) { + if (PicoIn.AHW & PAHW_MCD) { PicoFrameMCD(); goto end; } @@ -336,7 +326,7 @@ end: void PicoFrameDrawOnly(void) { - if (!(PicoAHW & PAHW_SMS)) { + if (!(PicoIn.AHW & PAHW_SMS)) { PicoFrameStart(); PicoDrawSync(223, 0); } else { diff --git a/pico/pico.h b/pico/pico.h index f1687f51..be02ef3a 100644 --- a/pico/pico.h +++ b/pico/pico.h @@ -72,22 +72,34 @@ extern void *p32x_bios_g, *p32x_bios_m, *p32x_bios_s; #define POPT_DIS_IDLE_DET (1<<19) #define POPT_EN_32X (1<<20) #define POPT_EN_PWM (1<<21) -extern int PicoOpt; // bitfield #define PAHW_MCD (1<<0) #define PAHW_32X (1<<1) #define PAHW_SVP (1<<2) #define PAHW_PICO (1<<3) #define PAHW_SMS (1<<4) -extern int PicoAHW; // Pico active hw #define PQUIRK_FORCE_6BTN (1<<0) -extern int PicoQuirks; -extern int PicoSkipFrame; // skip rendering frame, but still do sound (if enabled) and emulation stuff -extern int PicoRegionOverride; // override the region detection 0: auto, 1: Japan NTSC, 2: Japan PAL, 4: US, 8: Europe -extern int PicoAutoRgnOrder; // packed priority list of regions, for example 0x148 means this detection order: EUR, USA, JAP -extern int PicoSVPCycles; +// the emulator is configured and some status is reported +// through this global state (not saved in savestates) +typedef struct +{ + unsigned int opt; // POPT_* bitfield + + unsigned short pad[2]; // Joypads, format is MXYZ SACB RLDU + unsigned short padInt[2]; // internal copy + unsigned short AHW; // active addon hardware: PAHW_* bitfield + + unsigned short skipFrame; // skip rendering frame, but still do sound (if enabled) and emulation stuff + unsigned short regionOverride; // override the region detection 0: auto, 1: Japan NTSC, 2: Japan PAL, 4: US, 8: Europe + unsigned short autoRgnOrder; // packed priority list of regions, for example 0x148 means this detection order: EUR, USA, JAP + + unsigned short quirks; // game-specific quirks: PQUIRK_* +} PicoInterface; + +extern PicoInterface PicoIn; + void PicoInit(void); void PicoExit(void); void PicoPower(void); @@ -95,7 +107,6 @@ int PicoReset(void); void PicoLoopPrepare(void); void PicoFrame(void); void PicoFrameDrawOnly(void); -extern int PicoPad[2]; // Joypads, format is MXYZ SACB RLDU extern void (*PicoWriteSound)(int bytes); // called once per frame at the best time to send sound buffer (PsndOut) to hardware extern void (*PicoMessage)(const char *msg); // callback to output text message from emu typedef enum { PI_ROM, PI_ISPAL, PI_IS40_CELL, PI_IS240_LINES } pint_t; diff --git a/pico/pico/memory.c b/pico/pico/memory.c index 7c3a6463..ae262076 100644 --- a/pico/pico/memory.c +++ b/pico/pico/memory.c @@ -35,8 +35,8 @@ static u32 PicoRead8_pico(u32 a) { case 0x01: d = PicoPicohw.r1; break; case 0x03: - d = PicoPad[0]&0x1f; // d-pad - d |= (PicoPad[0]&0x20) << 2; // pen push -> C + d = PicoIn.pad[0]&0x1f; // d-pad + d |= (PicoIn.pad[0]&0x20) << 2; // pen push -> C d = ~d; break; diff --git a/pico/pico/pico.c b/pico/pico/pico.c index d893f9df..077b9b21 100644 --- a/pico/pico/pico.c +++ b/pico/pico/pico.c @@ -86,7 +86,7 @@ PICO_INTERNAL void PicoInitPico(void) PicoLineHook = PicoLinePico; PicoResetHook = PicoResetPico; - PicoAHW = PAHW_PICO; + PicoIn.AHW = PAHW_PICO; memset(&PicoPicohw, 0, sizeof(PicoPicohw)); PicoPicohw.pen_pos[0] = 0x03c + 320/2; PicoPicohw.pen_pos[1] = 0x200 + 240/2; diff --git a/pico/pico_cmn.c b/pico/pico_cmn.c index d2a1d94c..50b8ced1 100644 --- a/pico/pico_cmn.c +++ b/pico/pico_cmn.c @@ -97,7 +97,7 @@ static int PicoFrameHints(void) pevt_log_m68k_o(EVT_FRAME_START); - if ((PicoOpt&POPT_ALT_RENDERER) && !PicoSkipFrame && (pv->reg[1]&0x40)) { // fast rend., display enabled + if ((PicoIn.opt&POPT_ALT_RENDERER) && !PicoIn.skipFrame && (pv->reg[1]&0x40)) { // fast rend., display enabled // draw a frame just after vblank in alternative render mode // yes, this will cause 1 frame lag, but this is inaccurate mode anyway. PicoFrameFull(); @@ -106,7 +106,7 @@ static int PicoFrameHints(void) #endif skip = 1; } - else skip=PicoSkipFrame; + else skip=PicoIn.skipFrame; Pico.t.m68c_frame_start = Pico.t.m68c_aim; pv->v_counter = Pico.m.scanline = 0; @@ -140,7 +140,7 @@ static int PicoFrameHints(void) } // decide if we draw this line - if (!skip && (PicoOpt & POPT_ALT_RENDERER)) + if (!skip && (PicoIn.opt & POPT_ALT_RENDERER)) { // find the right moment for frame renderer, when display is no longer blanked if ((pv->reg[1]&0x40) || y > 100) { @@ -157,10 +157,10 @@ static int PicoFrameHints(void) { cycles = SekCyclesDone(); - if (Pico.m.z80Run && !Pico.m.z80_reset && (PicoOpt&POPT_EN_Z80)) + if (Pico.m.z80Run && !Pico.m.z80_reset && (PicoIn.opt&POPT_EN_Z80)) PicoSyncZ80(cycles); #ifdef PICO_CD - if (PicoAHW & PAHW_MCD) + if (PicoIn.AHW & PAHW_MCD) pcd_sync_s68k(cycles, 0); #endif #ifdef PICO_32X @@ -195,7 +195,7 @@ static int PicoFrameHints(void) pv->lwrite_cnt = 0; Pico.video.status |= SR_EMPT; - memcpy(PicoPadInt, PicoPad, sizeof(PicoPadInt)); + memcpy(PicoIn.padInt, PicoIn.pad, sizeof(PicoIn.padInt)); PAD_DELAY(); // Last H-Int (normally): @@ -225,14 +225,14 @@ static int PicoFrameHints(void) } cycles = SekCyclesDone(); - if (Pico.m.z80Run && !Pico.m.z80_reset && (PicoOpt&POPT_EN_Z80)) { + if (Pico.m.z80Run && !Pico.m.z80_reset && (PicoIn.opt&POPT_EN_Z80)) { PicoSyncZ80(cycles); elprintf(EL_INTS, "zint"); z80_int(); } #ifdef PICO_CD - if (PicoAHW & PAHW_MCD) + if (PicoIn.AHW & PAHW_MCD) pcd_sync_s68k(cycles, 0); #endif #ifdef PICO_32X @@ -313,7 +313,7 @@ static int PicoFrameHints(void) // sync cpus cycles = SekCyclesDone(); - if (Pico.m.z80Run && !Pico.m.z80_reset && (PicoOpt&POPT_EN_Z80)) + if (Pico.m.z80Run && !Pico.m.z80_reset && (PicoIn.opt&POPT_EN_Z80)) PicoSyncZ80(cycles); if (PsndOut && ym2612.dacen && PsndDacLine < lines) PsndDoDAC(lines - 1); @@ -321,7 +321,7 @@ static int PicoFrameHints(void) PsndDoPSG(lines - 1); #ifdef PICO_CD - if (PicoAHW & PAHW_MCD) + if (PicoIn.AHW & PAHW_MCD) pcd_sync_s68k(cycles, 0); #endif #ifdef PICO_32X diff --git a/pico/pico_int.h b/pico/pico_int.h index f356d40d..fa7979cc 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -327,7 +327,7 @@ struct PicoMisc unsigned char eeprom_cycle; // EEPROM cycle number unsigned char eeprom_slave; // EEPROM slave word for X24C02 and better SRAMs unsigned char eeprom_status; - unsigned char pad2; + unsigned char status; // rapid_ym2612, multi_ym_updates unsigned short dma_xfers; // 18 unsigned char eeprom_wb[2]; // EEPROM latch/write buffer unsigned int frame_count; // 1c for movies and idle det @@ -352,7 +352,7 @@ struct PicoEState struct Pico *Pico; void *PicoMem_vram; void *PicoMem_cram; - int *PicoOpt; + unsigned int *PicoOpt; unsigned char *Draw2FB; unsigned short HighPal[0x100]; }; @@ -705,8 +705,6 @@ void pcd_state_loaded_mem(void); // pico.c extern struct Pico Pico; extern struct PicoMem PicoMem; -extern int PicoPadInt[2]; -extern int emustatus; extern void (*PicoResetHook)(void); extern void (*PicoLineHook)(void); PICO_INTERNAL int CheckDMA(void); diff --git a/pico/sek.c b/pico/sek.c index a40e3081..84c048e3 100644 --- a/pico/sek.c +++ b/pico/sek.c @@ -335,7 +335,7 @@ int SekIsIdleCode(unsigned short *dst, int bytes) (*dst & 0xc1ff) == 0x0038 || // move.x ($xxxx.w), dX (*dst & 0xf13f) == 0xb038) // cmp.x ($xxxx.w), dX return 1; - if (PicoAHW & (PAHW_MCD|PAHW_32X)) + if (PicoIn.AHW & (PAHW_MCD|PAHW_32X)) break; // with no addons, there should be no need to wait // for byte change anywhere @@ -362,7 +362,7 @@ int SekIsIdleCode(unsigned short *dst, int bytes) return 1; break; case 12: - if (PicoAHW & (PAHW_MCD|PAHW_32X)) + if (PicoIn.AHW & (PAHW_MCD|PAHW_32X)) break; if ( (*dst & 0xf1f8) == 0x3010 && // move.w (aX), dX (dst[1]&0xf100) == 0x0000 && // arithmetic diff --git a/pico/sms.c b/pico/sms.c index 9e80c984..748c3265 100644 --- a/pico/sms.c +++ b/pico/sms.c @@ -108,12 +108,12 @@ static unsigned char z80_sms_in(unsigned short a) break; case 0xc0: /* I/O port A and B */ - d = ~((PicoPad[0] & 0x3f) | (PicoPad[1] << 6)); + d = ~((PicoIn.pad[0] & 0x3f) | (PicoIn.pad[1] << 6)); break; case 0xc1: /* I/O port B and miscellaneous */ d = (Pico.ms.io_ctl & 0x80) | ((Pico.ms.io_ctl << 1) & 0x40) | 0x30; - d |= ~(PicoPad[1] >> 2) & 0x0f; + d |= ~(PicoIn.pad[1] >> 2) & 0x0f; break; } @@ -133,7 +133,7 @@ static void z80_sms_out(unsigned short a, unsigned char d) case 0x40: case 0x41: - if (PicoOpt & POPT_EN_PSG) + if (PicoIn.opt & POPT_EN_PSG) SN76496Write(d); break; @@ -255,7 +255,7 @@ void PicoFrameMS(void) int lines = is_pal ? 313 : 262; int cycles_line = is_pal ? 58020 : 58293; /* (226.6 : 227.7) * 256 */ int cycles_done = 0, cycles_aim = 0; - int skip = PicoSkipFrame; + int skip = PicoIn.skipFrame; int lines_vis = 192; int hint; // Hint counter int nmi; @@ -263,7 +263,7 @@ void PicoFrameMS(void) PsndStartFrame(); - nmi = (PicoPad[0] >> 7) & 1; + nmi = (PicoIn.pad[0] >> 7) & 1; if (!Pico.ms.nmi_state && nmi) z80_nmi(); Pico.ms.nmi_state = nmi; diff --git a/pico/sound/sound.c b/pico/sound/sound.c index a67ebcca..56ffe3f8 100644 --- a/pico/sound/sound.c +++ b/pico/sound/sound.c @@ -141,9 +141,9 @@ void PsndRerate(int preserve_state) PsndClear(); // set mixer - PsndMix_32_to_16l = (PicoOpt & POPT_EN_STEREO) ? mix_32_to_16l_stereo : mix_32_to_16_mono; + PsndMix_32_to_16l = (PicoIn.opt & POPT_EN_STEREO) ? mix_32_to_16l_stereo : mix_32_to_16_mono; - if (PicoAHW & PAHW_PICO) + if (PicoIn.AHW & PAHW_PICO) PicoReratePico(); } @@ -159,7 +159,7 @@ PICO_INTERNAL void PsndStartFrame(void) } PsndDacLine = PsndPsgLine = 0; - emustatus &= ~1; + Pico.m.status &= ~1; dac_info[224] = PsndLen_use; } @@ -183,7 +183,7 @@ PICO_INTERNAL void PsndDoDAC(int line_to) if (!PsndOut) return; - if (PicoOpt & POPT_EN_STEREO) { + if (PicoIn.opt & POPT_EN_STEREO) { short *d = PsndOut + pos*2; for (; len > 0; len--, d+=2) *d += dout; } else { @@ -211,10 +211,10 @@ PICO_INTERNAL void PsndDoPSG(int line_to) PsndPsgLine = line_to + 1; - if (!PsndOut || !(PicoOpt & POPT_EN_PSG)) + if (!PsndOut || !(PicoIn.opt & POPT_EN_PSG)) return; - if (PicoOpt & POPT_EN_STEREO) { + if (PicoIn.opt & POPT_EN_STEREO) { stereo = 1; pos <<= 1; } @@ -272,7 +272,7 @@ PICO_INTERNAL void PsndClear(void) { int len = PsndLen; if (PsndLen_exc_add) len++; - if (PicoOpt & POPT_EN_STEREO) + if (PicoIn.opt & POPT_EN_STEREO) memset32((int *) PsndOut, 0, len); // assume PsndOut to be aligned else { short *out = PsndOut; @@ -287,19 +287,19 @@ static int PsndRender(int offset, int length) { int buf32_updated = 0; int *buf32 = PsndBuffer+offset; - int stereo = (PicoOpt & 8) >> 3; + int stereo = (PicoIn.opt & 8) >> 3; offset <<= stereo; pprof_start(sound); - if (PicoAHW & PAHW_PICO) { + if (PicoIn.AHW & PAHW_PICO) { PicoPicoPCMUpdate(PsndOut+offset, length, stereo); return length; } // Add in the stereo FM buffer - if (PicoOpt & POPT_EN_FM) { + if (PicoIn.opt & POPT_EN_FM) { buf32_updated = YM2612UpdateOne(buf32, length, stereo, 1); } else memset32(buf32, 0, length<cdda_stream != NULL && !(Pico_mcd->s68k_regs[0x36] & 1)) { @@ -326,7 +326,7 @@ static int PsndRender(int offset, int length) cdda_raw_update(buf32, length); } - if ((PicoAHW & PAHW_32X) && (PicoOpt & POPT_EN_PWM)) + if ((PicoIn.AHW & PAHW_32X) && (PicoIn.opt & POPT_EN_PWM)) p32x_pwm_update(buf32, length, stereo); // convert + limit to normal 16bit output @@ -348,33 +348,33 @@ PICO_INTERNAL void PsndGetSamples(int y) if (y == 224) { - if (emustatus & 2) + if (Pico.m.status & 2) curr_pos += PsndRender(curr_pos, PsndLen-PsndLen/2); else curr_pos = PsndRender(0, PsndLen_use); - if (emustatus & 1) - emustatus |= 2; - else emustatus &= ~2; + if (Pico.m.status & 1) + Pico.m.status |= 2; + else Pico.m.status &= ~2; if (PicoWriteSound) - PicoWriteSound(curr_pos * ((PicoOpt & POPT_EN_STEREO) ? 4 : 2)); + PicoWriteSound(curr_pos * ((PicoIn.opt & POPT_EN_STEREO) ? 4 : 2)); // clear sound buffer PsndClear(); PsndDacLine = 224; dac_info[224] = 0; } - else if (emustatus & 3) { - emustatus|= 2; - emustatus&=~1; + else if (Pico.m.status & 3) { + Pico.m.status |= 2; + Pico.m.status &= ~1; curr_pos = PsndRender(0, PsndLen/2); } } PICO_INTERNAL void PsndGetSamplesMS(void) { - int stereo = (PicoOpt & 8) >> 3; + int stereo = (PicoIn.opt & 8) >> 3; int length = PsndLen_use; // PSG - if (PicoOpt & POPT_EN_PSG) + if (PicoIn.opt & POPT_EN_PSG) SN76496Update(PsndOut, length, stereo); // upmix to "stereo" if needed @@ -385,7 +385,7 @@ PICO_INTERNAL void PsndGetSamplesMS(void) } if (PicoWriteSound != NULL) - PicoWriteSound(length * ((PicoOpt & POPT_EN_STEREO) ? 4 : 2)); + PicoWriteSound(length * ((PicoIn.opt & POPT_EN_STEREO) ? 4 : 2)); PsndClear(); } diff --git a/pico/sound/ym2612.h b/pico/sound/ym2612.h index 73a36a84..a2921b22 100644 --- a/pico/sound/ym2612.h +++ b/pico/sound/ym2612.h @@ -176,20 +176,20 @@ int YM2612PicoStateLoad2(int *tat, int *tbt); #else /* GP2X specific */ #include "../../platform/gp2x/940ctl.h" -extern int PicoOpt; +extern int PicoIn.opt; #define YM2612Init(baseclock,rate) { \ - if (PicoOpt&0x200) YM2612Init_940(baseclock, rate); \ + if (PicoIn.opt&0x200) YM2612Init_940(baseclock, rate); \ else YM2612Init_(baseclock, rate); \ } #define YM2612ResetChip() { \ - if (PicoOpt&0x200) YM2612ResetChip_940(); \ + if (PicoIn.opt&0x200) YM2612ResetChip_940(); \ else YM2612ResetChip_(); \ } #define YM2612UpdateOne(buffer,length,stereo,is_buf_empty) \ - (PicoOpt&0x200) ? YM2612UpdateOne_940(buffer, length, stereo, is_buf_empty) : \ + (PicoIn.opt&0x200) ? YM2612UpdateOne_940(buffer, length, stereo, is_buf_empty) : \ YM2612UpdateOne_(buffer, length, stereo, is_buf_empty); #define YM2612PicoStateLoad() { \ - if (PicoOpt&0x200) YM2612PicoStateLoad_940(); \ + if (PicoIn.opt&0x200) YM2612PicoStateLoad_940(); \ else YM2612PicoStateLoad_(); \ } #endif /* __GP2X__ */ diff --git a/pico/state.c b/pico/state.c index 69e8be06..100f4237 100644 --- a/pico/state.c +++ b/pico/state.c @@ -226,7 +226,7 @@ static int state_save(void *file) areaWrite("PicoSEXT", 1, 8, file); areaWrite(&ver, 1, 4, file); - if (!(PicoAHW & PAHW_SMS)) { + if (!(PicoIn.AHW & PAHW_SMS)) { memset(buff, 0, sizeof(buff)); SekPackCpu(buff, 0); CHECKED_WRITE_BUFF(CHUNK_M68K, buff); @@ -250,7 +250,7 @@ static int state_save(void *file) CHECKED_WRITE_BUFF(CHUNK_Z80, buff_z80); CHECKED_WRITE(CHUNK_PSG, 28*4, sn76496_regs); - if (PicoAHW & PAHW_MCD) + if (PicoIn.AHW & PAHW_MCD) { buf2 = malloc(CHUNK_LIMIT_W); if (buf2 == NULL) @@ -287,7 +287,7 @@ static int state_save(void *file) } #ifndef NO_32X - if (PicoAHW & PAHW_32X) + if (PicoIn.AHW & PAHW_32X) { unsigned char cpubuff[SH2_STATE_SIZE]; @@ -406,9 +406,9 @@ static int state_load(void *file) CHECKED_READ(1, &chunk); CHECKED_READ(4, &len); if (len < 0 || len > 1024*512) R_ERROR_RETURN("bad length"); - if (CHUNK_S68K <= chunk && chunk <= CHUNK_MISC_CD && !(PicoAHW & PAHW_MCD)) + if (CHUNK_S68K <= chunk && chunk <= CHUNK_MISC_CD && !(PicoIn.AHW & PAHW_MCD)) R_ERROR_RETURN("cd chunk in non CD state?"); - if (CHUNK_32X_FIRST <= chunk && chunk <= CHUNK_32X_LAST && !(PicoAHW & PAHW_32X)) + if (CHUNK_32X_FIRST <= chunk && chunk <= CHUNK_32X_LAST && !(PicoIn.AHW & PAHW_32X)) Pico32xStartup(); switch (chunk) @@ -535,28 +535,28 @@ breakswitch: } readend: - if (PicoAHW & PAHW_SMS) + if (PicoIn.AHW & PAHW_SMS) PicoStateLoadedMS(); - if (PicoAHW & PAHW_32X) + if (PicoIn.AHW & PAHW_32X) Pico32xStateLoaded(1); if (PicoLoadStateHook != NULL) PicoLoadStateHook(); // must unpack 68k and z80 after banks are set up - if (!(PicoAHW & PAHW_SMS)) + if (!(PicoIn.AHW & PAHW_SMS)) SekUnpackCpu(buff_m68k, 0); - if (PicoAHW & PAHW_MCD) + if (PicoIn.AHW & PAHW_MCD) SekUnpackCpu(buff_s68k, 1); z80_unpack(buff_z80); // due to dep from 68k cycles.. Pico.t.m68c_aim = Pico.t.m68c_cnt; - if (PicoAHW & PAHW_32X) + if (PicoIn.AHW & PAHW_32X) Pico32xStateLoaded(0); - if (PicoAHW & PAHW_MCD) + if (PicoIn.AHW & PAHW_MCD) { SekCycleAimS68k = SekCycleCntS68k; pcd_state_loaded(); @@ -579,7 +579,7 @@ static int state_load_gfx(void *file) int ver, len, found = 0, to_find = 4; char buff[8]; - if (PicoAHW & PAHW_32X) + if (PicoIn.AHW & PAHW_32X) to_find += 2; g_read_offs = 0; @@ -593,7 +593,7 @@ static int state_load_gfx(void *file) CHECKED_READ(1, buff); CHECKED_READ(4, &len); if (len < 0 || len > 1024*512) R_ERROR_RETURN("bad length"); - if (buff[0] > CHUNK_FM && buff[0] <= CHUNK_MISC_CD && !(PicoAHW & PAHW_MCD)) + if (buff[0] > CHUNK_FM && buff[0] <= CHUNK_MISC_CD && !(PicoIn.AHW & PAHW_MCD)) R_ERROR_RETURN("cd chunk in non CD state?"); switch (buff[0]) @@ -723,7 +723,7 @@ void *PicoTmpStateSave(void) memcpy(&t->video, &Pico.video, sizeof(Pico.video)); #ifndef NO_32X - if (PicoAHW & PAHW_32X) { + if (PicoIn.AHW & PAHW_32X) { memcpy(&t->t32x.p32x, &Pico32x, sizeof(Pico32x)); memcpy(t->t32x.dram, Pico32xMem->dram, sizeof(Pico32xMem->dram)); memcpy(t->t32x.pal, Pico32xMem->pal, sizeof(Pico32xMem->pal)); @@ -746,7 +746,7 @@ void PicoTmpStateRestore(void *data) Pico.m.dirtyPal = 1; #ifndef NO_32X - if (PicoAHW & PAHW_32X) { + if (PicoIn.AHW & PAHW_32X) { memcpy(&Pico32x, &t->t32x.p32x, sizeof(Pico32x)); memcpy(Pico32xMem->dram, t->t32x.dram, sizeof(Pico32xMem->dram)); memcpy(Pico32xMem->pal, t->t32x.pal, sizeof(Pico32xMem->pal)); diff --git a/pico/videoport.c b/pico/videoport.c index b5e3f86c..355489e9 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -109,7 +109,7 @@ static void DmaSlow(int len, unsigned int source) base = (u16 *)PicoMem.ram; mask = 0xffff; } - else if (PicoAHW & PAHW_MCD) + else if (PicoIn.AHW & PAHW_MCD) { u8 r3 = Pico_mcd->s68k_regs[3]; elprintf(EL_VDPDMA, "DmaSlow CD, r3=%02x", r3); @@ -346,8 +346,8 @@ static NOINLINE void CommandChange(void) static void DrawSync(int blank_on) { - if (Pico.m.scanline < 224 && !(PicoOpt & POPT_ALT_RENDERER) && - !PicoSkipFrame && Pico.est.DrawScanline <= Pico.m.scanline) { + if (Pico.m.scanline < 224 && !(PicoIn.opt & POPT_ALT_RENDERER) && + !PicoIn.skipFrame && Pico.est.DrawScanline <= Pico.m.scanline) { //elprintf(EL_ANOMALY, "sync"); PicoDrawSync(Pico.m.scanline, blank_on); } @@ -376,7 +376,7 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) pvid->pending=0; } - if (!(pvid->status & SR_VB) && !(PicoOpt&POPT_DIS_VDP_FIFO)) + if (!(pvid->status & SR_VB) && !(PicoIn.opt&POPT_DIS_VDP_FIFO)) { int use = pvid->type == 1 ? 2 : 1; pvid->lwrite_cnt -= use; diff --git a/pico/z80if.c b/pico/z80if.c index b69495e7..da2043e4 100644 --- a/pico/z80if.c +++ b/pico/z80if.c @@ -104,17 +104,17 @@ void z80_reset(void) */ #ifdef FAST_Z80SP // drZ80 is locked in single bank - drz80_sp_base = (PicoAHW & PAHW_SMS) ? 0xc000 : 0x0000; + drz80_sp_base = (PicoIn.AHW & PAHW_SMS) ? 0xc000 : 0x0000; drZ80.Z80SP_BASE = z80_read_map[drz80_sp_base >> Z80_MEM_SHIFT] << 1; #endif - if (PicoAHW & PAHW_SMS) + if (PicoIn.AHW & PAHW_SMS) drZ80.Z80SP = drZ80.Z80SP_BASE + 0xdff0; // simulate BIOS // XXX: since we use direct SP pointer, it might make sense to force it to RAM, // but we'll rely on built-in stack protection for now #endif #ifdef _USE_CZ80 Cz80_Reset(&CZ80); - if (PicoAHW & PAHW_SMS) + if (PicoIn.AHW & PAHW_SMS) Cz80_Set_Reg(&CZ80, CZ80_SP, 0xdff0); #endif } diff --git a/platform/common/config_file.c b/platform/common/config_file.c index e66d4e1c..2d1186d7 100644 --- a/platform/common/config_file.c +++ b/platform/common/config_file.c @@ -266,9 +266,9 @@ static int custom_read(menu_entry *me, const char *var, const char *val) if (*tmp == 'Z' || *tmp == 'z') tmp++; while (*tmp == ' ') tmp++; if (strcasecmp(tmp, "stereo") == 0) { - PicoOpt |= POPT_EN_STEREO; + PicoIn.opt |= POPT_EN_STEREO; } else if (strcasecmp(tmp, "mono") == 0) { - PicoOpt &= ~POPT_EN_STEREO; + PicoIn.opt &= ~POPT_EN_STEREO; } else return 0; return 1; @@ -279,31 +279,31 @@ static int custom_read(menu_entry *me, const char *var, const char *val) { const char *p = val + 5, *end = val + strlen(val); int i; - PicoRegionOverride = PicoAutoRgnOrder = 0; + PicoIn.regionOverride = PicoIn.autoRgnOrder = 0; for (i = 0; p < end && i < 3; i++) { while (*p == ' ') p++; if (p[0] == 'J' && p[1] == 'P') { - PicoAutoRgnOrder |= 1 << (i*4); + PicoIn.autoRgnOrder |= 1 << (i*4); } else if (p[0] == 'U' && p[1] == 'S') { - PicoAutoRgnOrder |= 4 << (i*4); + PicoIn.autoRgnOrder |= 4 << (i*4); } else if (p[0] == 'E' && p[1] == 'U') { - PicoAutoRgnOrder |= 8 << (i*4); + PicoIn.autoRgnOrder |= 8 << (i*4); } while (*p != ' ' && *p != 0) p++; if (*p == 0) break; } } else if (strcasecmp(val, "Auto") == 0) { - PicoRegionOverride = 0; + PicoIn.regionOverride = 0; } else if (strcasecmp(val, "Japan NTSC") == 0) { - PicoRegionOverride = 1; + PicoIn.regionOverride = 1; } else if (strcasecmp(val, "Japan PAL") == 0) { - PicoRegionOverride = 2; + PicoIn.regionOverride = 2; } else if (strcasecmp(val, "USA") == 0) { - PicoRegionOverride = 4; + PicoIn.regionOverride = 4; } else if (strcasecmp(val, "Europe") == 0) { - PicoRegionOverride = 8; + PicoIn.regionOverride = 8; } else return 0; return 1; diff --git a/platform/common/emu.c b/platform/common/emu.c index 85e1ba77..822fec73 100644 --- a/platform/common/emu.c +++ b/platform/common/emu.c @@ -171,8 +171,8 @@ static const char *find_bios(int *region, const char *cd_fname) ret = emu_read_config(cd_fname, 0); if (!ret) emu_read_config(NULL, 0); - if (PicoRegionOverride) { - *region = PicoRegionOverride; + if (PicoIn.regionOverride) { + *region = PicoIn.regionOverride; lprintf("override region to %s\n", *region != 4 ? (*region == 8 ? "EU" : "JAP") : "USA"); } @@ -266,16 +266,16 @@ static char *emu_make_rom_id(const char *fname) static char id_string[3+0xe*3+0x3*3+0x30*3+3]; int pos, swab = 1; - if (PicoAHW & PAHW_MCD) { + if (PicoIn.AHW & PAHW_MCD) { strcpy(id_string, "CD|"); swab = 0; } - else if (PicoAHW & PAHW_SMS) + else if (PicoIn.AHW & PAHW_SMS) strcpy(id_string, "MS|"); else strcpy(id_string, "MD|"); pos = 3; - if (!(PicoAHW & PAHW_SMS)) { + if (!(PicoIn.AHW & PAHW_SMS)) { pos += extract_text(id_string + pos, media_id_header + 0x80, 0x0e, swab); // serial id_string[pos] = '|'; pos++; pos += extract_text(id_string + pos, media_id_header + 0xf0, 0x03, swab); // region @@ -296,7 +296,7 @@ static char *emu_make_rom_id(const char *fname) // buffer must be at least 150 byte long void emu_get_game_name(char *str150) { - int ret, swab = (PicoAHW & PAHW_MCD) ? 0 : 1; + int ret, swab = (PicoIn.AHW & PAHW_MCD) ? 0 : 1; char *s, *d; ret = extract_text(str150, media_id_header + 0x50, 0x30, swab); // overseas name @@ -315,22 +315,22 @@ static void system_announce(void) const char *sys_name, *tv_standard, *extra = ""; int fps; - if (PicoAHW & PAHW_SMS) { + if (PicoIn.AHW & PAHW_SMS) { sys_name = "Master System"; #ifdef NO_SMS extra = " [no support]"; #endif - } else if (PicoAHW & PAHW_PICO) { + } else if (PicoIn.AHW & PAHW_PICO) { sys_name = "Pico"; - } else if ((PicoAHW & (PAHW_32X|PAHW_MCD)) == (PAHW_32X|PAHW_MCD)) { + } else if ((PicoIn.AHW & (PAHW_32X|PAHW_MCD)) == (PAHW_32X|PAHW_MCD)) { sys_name = "32X + Mega CD"; if ((Pico.m.hardware & 0xc0) == 0x80) sys_name = "32X + Sega CD"; - } else if (PicoAHW & PAHW_MCD) { + } else if (PicoIn.AHW & PAHW_MCD) { sys_name = "Mega CD"; if ((Pico.m.hardware & 0xc0) == 0x80) sys_name = "Sega CD"; - } else if (PicoAHW & PAHW_32X) { + } else if (PicoIn.AHW & PAHW_32X) { sys_name = "32X"; } else { sys_name = "MegaDrive"; @@ -451,7 +451,7 @@ int emu_reload_rom(const char *rom_fname_in) } // make quirks visible in UI - if (PicoQuirks & PQUIRK_FORCE_6BTN) + if (PicoIn.quirks & PQUIRK_FORCE_6BTN) currentConfig.input_dev0 = PICO_INPUT_PAD_6BTN; menu_romload_end(); @@ -470,12 +470,12 @@ int emu_reload_rom(const char *rom_fname_in) PicoSetInputDevice(0, indev); PicoSetInputDevice(1, indev); - PicoOpt |= POPT_DIS_VDP_FIFO; // no VDP fifo timing + PicoIn.opt |= POPT_DIS_VDP_FIFO; // no VDP fifo timing if (movie_data[0xF] >= 'A') { if (movie_data[0x16] & 0x80) { - PicoRegionOverride = 8; + PicoIn.regionOverride = 8; } else { - PicoRegionOverride = 4; + PicoIn.regionOverride = 4; } PicoReset(); // TODO: bits 6 & 5 @@ -486,7 +486,7 @@ int emu_reload_rom(const char *rom_fname_in) else { system_announce(); - PicoOpt &= ~POPT_DIS_VDP_FIFO; + PicoIn.opt &= ~POPT_DIS_VDP_FIFO; } strncpy(rom_fname_loaded, rom_fname, sizeof(rom_fname_loaded)-1); @@ -603,10 +603,10 @@ void emu_prep_defconfig(void) void emu_set_defconfig(void) { memcpy(¤tConfig, &defaultConfig, sizeof(currentConfig)); - PicoOpt = currentConfig.s_PicoOpt; + PicoIn.opt = currentConfig.s_PicoOpt; PsndRate = currentConfig.s_PsndRate; - PicoRegionOverride = currentConfig.s_PicoRegion; - PicoAutoRgnOrder = currentConfig.s_PicoAutoRgnOrder; + PicoIn.regionOverride = currentConfig.s_PicoRegion; + PicoIn.autoRgnOrder = currentConfig.s_PicoAutoRgnOrder; } int emu_read_config(const char *rom_fname, int no_defaults) @@ -767,20 +767,20 @@ static void update_movie(void) lprintf("END OF MOVIE.\n"); } else { // MXYZ SACB RLDU - PicoPad[0] = ~movie_data[offs] & 0x8f; // ! SCBA RLDU - if(!(movie_data[offs] & 0x10)) PicoPad[0] |= 0x40; // C - if(!(movie_data[offs] & 0x20)) PicoPad[0] |= 0x10; // A - if(!(movie_data[offs] & 0x40)) PicoPad[0] |= 0x20; // B - PicoPad[1] = ~movie_data[offs+1] & 0x8f; // ! SCBA RLDU - if(!(movie_data[offs+1] & 0x10)) PicoPad[1] |= 0x40; // C - if(!(movie_data[offs+1] & 0x20)) PicoPad[1] |= 0x10; // A - if(!(movie_data[offs+1] & 0x40)) PicoPad[1] |= 0x20; // B - PicoPad[0] |= (~movie_data[offs+2] & 0x0A) << 8; // ! MZYX - if(!(movie_data[offs+2] & 0x01)) PicoPad[0] |= 0x0400; // X - if(!(movie_data[offs+2] & 0x04)) PicoPad[0] |= 0x0100; // Z - PicoPad[1] |= (~movie_data[offs+2] & 0xA0) << 4; // ! MZYX - if(!(movie_data[offs+2] & 0x10)) PicoPad[1] |= 0x0400; // X - if(!(movie_data[offs+2] & 0x40)) PicoPad[1] |= 0x0100; // Z + PicoIn.pad[0] = ~movie_data[offs] & 0x8f; // ! SCBA RLDU + if(!(movie_data[offs] & 0x10)) PicoIn.pad[0] |= 0x40; // C + if(!(movie_data[offs] & 0x20)) PicoIn.pad[0] |= 0x10; // A + if(!(movie_data[offs] & 0x40)) PicoIn.pad[0] |= 0x20; // B + PicoIn.pad[1] = ~movie_data[offs+1] & 0x8f; // ! SCBA RLDU + if(!(movie_data[offs+1] & 0x10)) PicoIn.pad[1] |= 0x40; // C + if(!(movie_data[offs+1] & 0x20)) PicoIn.pad[1] |= 0x10; // A + if(!(movie_data[offs+1] & 0x40)) PicoIn.pad[1] |= 0x20; // B + PicoIn.pad[0] |= (~movie_data[offs+2] & 0x0A) << 8; // ! MZYX + if(!(movie_data[offs+2] & 0x01)) PicoIn.pad[0] |= 0x0400; // X + if(!(movie_data[offs+2] & 0x04)) PicoIn.pad[0] |= 0x0100; // Z + PicoIn.pad[1] |= (~movie_data[offs+2] & 0xA0) << 4; // ! MZYX + if(!(movie_data[offs+2] & 0x10)) PicoIn.pad[1] |= 0x0400; // X + if(!(movie_data[offs+2] & 0x40)) PicoIn.pad[1] |= 0x0100; // Z } } @@ -809,9 +809,9 @@ char *emu_get_save_fname(int load, int is_sram, int slot, int *time) if (is_sram) { - strcpy(ext, (PicoAHW & PAHW_MCD) ? ".brm" : ".srm"); + strcpy(ext, (PicoIn.AHW & PAHW_MCD) ? ".brm" : ".srm"); romfname_ext(saveFname, sizeof(static_buff), - (PicoAHW & PAHW_MCD) ? "brm"PATH_SEP : "srm"PATH_SEP, ext); + (PicoIn.AHW & PAHW_MCD) ? "brm"PATH_SEP : "srm"PATH_SEP, ext); if (!load) return saveFname; @@ -885,9 +885,9 @@ int emu_save_load_game(int load, int sram) int sram_size; unsigned char *sram_data; int truncate = 1; - if (PicoAHW & PAHW_MCD) + if (PicoIn.AHW & PAHW_MCD) { - if (PicoOpt & POPT_EN_MCD_RAMCART) { + if (PicoIn.opt & POPT_EN_MCD_RAMCART) { sram_size = 0x12000; sram_data = Pico.sv.data; if (sram_data) @@ -912,7 +912,7 @@ int emu_save_load_game(int load, int sram) ret = fread(sram_data, 1, sram_size, sramFile); ret = ret > 0 ? 0 : -1; fclose(sramFile); - if ((PicoAHW & PAHW_MCD) && (PicoOpt&POPT_EN_MCD_RAMCART)) + if ((PicoIn.AHW & PAHW_MCD) && (PicoIn.opt&POPT_EN_MCD_RAMCART)) memcpy(Pico_mcd->bram, sram_data, 0x2000); } else { // sram save needs some special processing @@ -974,7 +974,7 @@ void emu_set_fastforward(int set_on) PsndRerate(1); is_on = 0; // mainly to unbreak pcm - if (PicoAHW & PAHW_MCD) + if (PicoIn.AHW & PAHW_MCD) pcd_state_loaded(); } } @@ -1034,11 +1034,11 @@ void run_events_pico(unsigned int events) return; /* handle other input modes */ - if (PicoPad[0] & 1) pico_pen_y--; - if (PicoPad[0] & 2) pico_pen_y++; - if (PicoPad[0] & 4) pico_pen_x--; - if (PicoPad[0] & 8) pico_pen_x++; - PicoPad[0] &= ~0x0f; // release UDLR + if (PicoIn.pad[0] & 1) pico_pen_y--; + if (PicoIn.pad[0] & 2) pico_pen_y++; + if (PicoIn.pad[0] & 4) pico_pen_x--; + if (PicoIn.pad[0] & 8) pico_pen_x++; + PicoIn.pad[0] &= ~0x0f; // release UDLR lim_x = (Pico.video.reg[12]&1) ? 319 : 255; if (pico_pen_y < 8) @@ -1057,7 +1057,7 @@ void run_events_pico(unsigned int events) PicoPicohw.pen_pos[1] = pico_inp_mode == 1 ? (0x2f8 + pico_pen_y) : (0x1fc + pico_pen_y); } -static void do_turbo(int *pad, int acts) +static void do_turbo(unsigned short *pad, int acts) { static int turbo_pad = 0; static unsigned char turbo_cnt[3] = { 0, 0, 0 }; @@ -1159,13 +1159,13 @@ void emu_update_input(void) pl_actions[0] = actions[IN_BINDTYPE_PLAYER12]; pl_actions[1] = actions[IN_BINDTYPE_PLAYER12] >> 16; - PicoPad[0] = pl_actions[0] & 0xfff; - PicoPad[1] = pl_actions[1] & 0xfff; + PicoIn.pad[0] = pl_actions[0] & 0xfff; + PicoIn.pad[1] = pl_actions[1] & 0xfff; if (pl_actions[0] & 0x7000) - do_turbo(&PicoPad[0], pl_actions[0]); + do_turbo(&PicoIn.pad[0], pl_actions[0]); if (pl_actions[1] & 0x7000) - do_turbo(&PicoPad[1], pl_actions[1]); + do_turbo(&PicoIn.pad[1], pl_actions[1]); events = actions[IN_BINDTYPE_EMU] & PEV_MASK; @@ -1181,7 +1181,7 @@ void emu_update_input(void) events &= ~prev_events; - if (PicoAHW == PAHW_PICO) + if (PicoIn.AHW == PAHW_PICO) run_events_pico(events); if (events) run_events_ui(events); @@ -1202,14 +1202,14 @@ static void mkdir_path(char *path_with_reserve, int pos, const char *name) void emu_cmn_forced_frame(int no_scale, int do_emu) { - int po_old = PicoOpt; + int po_old = PicoIn.opt; memset32(g_screen_ptr, 0, g_screen_width * g_screen_height * 2 / 4); - PicoOpt &= ~POPT_ALT_RENDERER; - PicoOpt |= POPT_ACC_SPRITES; + PicoIn.opt &= ~POPT_ALT_RENDERER; + PicoIn.opt |= POPT_ACC_SPRITES; if (!no_scale) - PicoOpt |= POPT_EN_SOFTSCALE; + PicoIn.opt |= POPT_EN_SOFTSCALE; PicoDrawSetOutFormat(PDF_RGB555, 1); Pico.m.dirtyPal = 1; @@ -1218,7 +1218,7 @@ void emu_cmn_forced_frame(int no_scale, int do_emu) else PicoFrameDrawOnly(); - PicoOpt = po_old; + PicoIn.opt = po_old; } void emu_init(void) @@ -1293,7 +1293,7 @@ void emu_sound_start(void) if (currentConfig.EmuOpt & EOPT_EN_SOUND) { - int is_stereo = (PicoOpt & POPT_EN_STEREO) ? 1 : 0; + int is_stereo = (PicoIn.opt & POPT_EN_STEREO) ? 1 : 0; PsndRerate(Pico.m.frame_count ? 1 : 0); @@ -1470,9 +1470,9 @@ void emu_loop(void) emu_update_input(); if (skip) { int do_audio = diff > -target_frametime_x3 * 2; - PicoSkipFrame = do_audio ? 1 : 2; + PicoIn.skipFrame = do_audio ? 1 : 2; PicoFrame(); - PicoSkipFrame = 0; + PicoIn.skipFrame = 0; } else { PicoFrame(); diff --git a/platform/common/menu_pico.c b/platform/common/menu_pico.c index 0f9fa612..bd2b9159 100644 --- a/platform/common/menu_pico.c +++ b/platform/common/menu_pico.c @@ -281,7 +281,7 @@ static void menu_loop_patches(void) // -------------- key config -------------- -// PicoPad[] format: MXYZ SACB RLDU +// PicoIn.pad[] format: MXYZ SACB RLDU me_bind_action me_ctrl_actions[] = { { "UP ", 0x0001 }, @@ -410,10 +410,10 @@ static const char h_scfx[] = "Emulate scale/rotate ASIC chip for graphics effe static menu_entry e_menu_cd_options[] = { mee_onoff_h("CD LEDs", MA_CDOPT_LEDS, currentConfig.EmuOpt, EOPT_EN_CD_LEDS, h_cdleds), - mee_onoff_h("CDDA audio", MA_CDOPT_CDDA, PicoOpt, POPT_EN_MCD_CDDA, h_cdda), - mee_onoff_h("PCM audio", MA_CDOPT_PCM, PicoOpt, POPT_EN_MCD_PCM, h_cdpcm), - mee_onoff_h("SaveRAM cart", MA_CDOPT_SAVERAM, PicoOpt, POPT_EN_MCD_RAMCART, h_srcart), - mee_onoff_h("Scale/Rot. fx", MA_CDOPT_SCALEROT_CHIP, PicoOpt, POPT_EN_MCD_GFX, h_scfx), + mee_onoff_h("CDDA audio", MA_CDOPT_CDDA, PicoIn.opt, POPT_EN_MCD_CDDA, h_cdda), + mee_onoff_h("PCM audio", MA_CDOPT_PCM, PicoIn.opt, POPT_EN_MCD_PCM, h_cdpcm), + mee_onoff_h("SaveRAM cart", MA_CDOPT_SAVERAM, PicoIn.opt, POPT_EN_MCD_RAMCART, h_srcart), + mee_onoff_h("Scale/Rot. fx", MA_CDOPT_SCALEROT_CHIP, PicoIn.opt, POPT_EN_MCD_GFX, h_scfx), mee_end, }; @@ -464,9 +464,9 @@ static const char h_sh2cycles[] = "Cycles/millisecond (similar to DOSBox)\n" static menu_entry e_menu_32x_options[] = { - mee_onoff_h ("32X enabled", MA_32XOPT_ENABLE_32X, PicoOpt, POPT_EN_32X, h_32x_enable), + mee_onoff_h ("32X enabled", MA_32XOPT_ENABLE_32X, PicoIn.opt, POPT_EN_32X, h_32x_enable), mee_enum ("32X renderer", MA_32XOPT_RENDERER, currentConfig.renderer32x, renderer_names32x), - mee_onoff_h ("PWM sound", MA_32XOPT_PWM, PicoOpt, POPT_EN_PWM, h_pwm), + mee_onoff_h ("PWM sound", MA_32XOPT_PWM, PicoIn.opt, POPT_EN_PWM, h_pwm), mee_cust_h ("Master SH2 cycles", MA_32XOPT_MSH2_CYCLES, mh_opt_sh2cycles, mgn_opt_sh2cycles, h_sh2cycles), mee_cust_h ("Slave SH2 cycles", MA_32XOPT_SSH2_CYCLES, mh_opt_sh2cycles, mgn_opt_sh2cycles, h_sh2cycles), mee_end, @@ -491,15 +491,15 @@ static int menu_loop_32x_options(int id, int keys) static menu_entry e_menu_adv_options[] = { mee_onoff ("SRAM/BRAM saves", MA_OPT_SRAM_STATES, currentConfig.EmuOpt, EOPT_EN_SRAM), - mee_onoff ("Disable sprite limit", MA_OPT2_NO_SPRITE_LIM, PicoOpt, POPT_DIS_SPRITE_LIM), - mee_onoff ("Emulate Z80", MA_OPT2_ENABLE_Z80, PicoOpt, POPT_EN_Z80), - mee_onoff ("Emulate YM2612 (FM)", MA_OPT2_ENABLE_YM2612, PicoOpt, POPT_EN_FM), - mee_onoff ("Emulate SN76496 (PSG)", MA_OPT2_ENABLE_SN76496,PicoOpt, POPT_EN_PSG), + mee_onoff ("Disable sprite limit", MA_OPT2_NO_SPRITE_LIM, PicoIn.opt, POPT_DIS_SPRITE_LIM), + mee_onoff ("Emulate Z80", MA_OPT2_ENABLE_Z80, PicoIn.opt, POPT_EN_Z80), + mee_onoff ("Emulate YM2612 (FM)", MA_OPT2_ENABLE_YM2612, PicoIn.opt, POPT_EN_FM), + mee_onoff ("Emulate SN76496 (PSG)", MA_OPT2_ENABLE_SN76496,PicoIn.opt, POPT_EN_PSG), mee_onoff ("gzip savestates", MA_OPT2_GZIP_STATES, currentConfig.EmuOpt, EOPT_GZIP_SAVES), mee_onoff ("Don't save last used ROM", MA_OPT2_NO_LAST_ROM, currentConfig.EmuOpt, EOPT_NO_AUTOSVCFG), - mee_onoff ("Disable idle loop patching",MA_OPT2_NO_IDLE_LOOPS,PicoOpt, POPT_DIS_IDLE_DET), + mee_onoff ("Disable idle loop patching",MA_OPT2_NO_IDLE_LOOPS,PicoIn.opt, POPT_DIS_IDLE_DET), mee_onoff ("Disable frame limiter", MA_OPT2_NO_FRAME_LIMIT,currentConfig.EmuOpt, EOPT_NO_FRMLIMIT), - mee_onoff ("Enable dynarecs", MA_OPT2_DYNARECS, PicoOpt, POPT_EN_DRC), + mee_onoff ("Enable dynarecs", MA_OPT2_DYNARECS, PicoIn.opt, POPT_EN_DRC), mee_onoff ("Status line in main menu", MA_OPT2_STATUS_LINE, currentConfig.EmuOpt, EOPT_SHOW_RTC), MENU_OPTIONS_ADV mee_end, @@ -556,15 +556,15 @@ static int sndrate_prevnext(int rate, int dir) i += dir ? 1 : -1; if (i > 4) { - if (!(PicoOpt & POPT_EN_STEREO)) { - PicoOpt |= POPT_EN_STEREO; + if (!(PicoIn.opt & POPT_EN_STEREO)) { + PicoIn.opt |= POPT_EN_STEREO; return rates[0]; } return rates[4]; } if (i < 0) { - if (PicoOpt & POPT_EN_STEREO) { - PicoOpt &= ~POPT_EN_STEREO; + if (PicoIn.opt & POPT_EN_STEREO) { + PicoIn.opt &= ~POPT_EN_STEREO; return rates[4]; } return rates[0]; @@ -579,24 +579,24 @@ static void region_prevnext(int right) int i; if (right) { - if (!PicoRegionOverride) { + if (!PicoIn.regionOverride) { for (i = 0; i < 6; i++) - if (rgn_orders[i] == PicoAutoRgnOrder) break; - if (i < 5) PicoAutoRgnOrder = rgn_orders[i+1]; - else PicoRegionOverride=1; + if (rgn_orders[i] == PicoIn.autoRgnOrder) break; + if (i < 5) PicoIn.autoRgnOrder = rgn_orders[i+1]; + else PicoIn.regionOverride=1; } else - PicoRegionOverride <<= 1; - if (PicoRegionOverride > 8) - PicoRegionOverride = 8; + PicoIn.regionOverride <<= 1; + if (PicoIn.regionOverride > 8) + PicoIn.regionOverride = 8; } else { - if (!PicoRegionOverride) { + if (!PicoIn.regionOverride) { for (i = 0; i < 6; i++) - if (rgn_orders[i] == PicoAutoRgnOrder) break; - if (i > 0) PicoAutoRgnOrder = rgn_orders[i-1]; + if (rgn_orders[i] == PicoIn.autoRgnOrder) break; + if (i > 0) PicoIn.autoRgnOrder = rgn_orders[i-1]; } else - PicoRegionOverride >>= 1; + PicoIn.regionOverride >>= 1; } } @@ -667,7 +667,7 @@ static const char *mgn_opt_sound(int id, int *offs) { const char *str2; *offs = -8; - str2 = (PicoOpt & POPT_EN_STEREO) ? "stereo" : "mono"; + str2 = (PicoIn.opt & POPT_EN_STEREO) ? "stereo" : "mono"; sprintf(static_buff, "%5iHz %s", PsndRate, str2); return static_buff; } @@ -676,7 +676,7 @@ static const char *mgn_opt_region(int id, int *offs) { static const char *names[] = { "Auto", " Japan NTSC", " Japan PAL", " USA", " Europe" }; static const char *names_short[] = { "", " JP", " JP", " US", " EU" }; - int code = PicoRegionOverride; + int code = PicoIn.regionOverride; int u, i = 0; *offs = -6; @@ -689,7 +689,7 @@ static const char *mgn_opt_region(int id, int *offs) } else { strcpy(static_buff, "Auto:"); for (u = 0; u < 3; u++) { - code = (PicoAutoRgnOrder >> u*4) & 0xf; + code = (PicoIn.autoRgnOrder >> u*4) & 0xf; for (i = 0; code; code >>= 1, i++) ; strcat(static_buff, names_short[i]); @@ -887,9 +887,9 @@ static void debug_menu_loop(void) if (inp & PBTN_MA2) pv->debug_p ^= PVD_KILL_32X; if (inp & PBTN_MOK) { PsndOut = NULL; // just in case - PicoSkipFrame = 1; + PicoIn.skipFrame = 1; PicoFrame(); - PicoSkipFrame = 0; + PicoIn.skipFrame = 0; while (inp & PBTN_MOK) inp = in_menu_wait_any(NULL, -1); } break; @@ -1023,7 +1023,7 @@ static int main_menu_handler(int id, int keys) } break; case MA_MAIN_CHANGE_CD: - if (PicoAHW & PAHW_MCD) { + if (PicoIn.AHW & PAHW_MCD) { // if cd is loaded, cdd_unload() triggers eject and // returns 1, else we'll select and load new CD here if (!cdd_unload()) @@ -1081,7 +1081,7 @@ void menu_loop(void) me_enable(e_menu_main, MA_MAIN_SAVE_STATE, PicoGameLoaded); me_enable(e_menu_main, MA_MAIN_LOAD_STATE, PicoGameLoaded); me_enable(e_menu_main, MA_MAIN_RESET_GAME, PicoGameLoaded); - me_enable(e_menu_main, MA_MAIN_CHANGE_CD, PicoAHW & PAHW_MCD); + me_enable(e_menu_main, MA_MAIN_CHANGE_CD, PicoIn.AHW & PAHW_MCD); me_enable(e_menu_main, MA_MAIN_PATCHES, PicoPatches != NULL); menu_enter(PicoGameLoaded); @@ -1168,7 +1168,7 @@ void menu_update_msg(const char *msg) /* hidden options for config engine only */ static menu_entry e_menu_hidden[] = { - mee_onoff("Accurate sprites", MA_OPT_ACC_SPRITES, PicoOpt, 0x080), + mee_onoff("Accurate sprites", MA_OPT_ACC_SPRITES, PicoIn.opt, 0x080), mee_onoff("autoload savestates", MA_OPT_AUTOLOAD_SAVE, g_autostateld_opt, 1), mee_end, }; diff --git a/platform/common/mp3.c b/platform/common/mp3.c index b2bcaf40..6c823ad4 100644 --- a/platform/common/mp3.c +++ b/platform/common/mp3.c @@ -115,7 +115,7 @@ void mp3_start_play(void *f_, int pos1024) cdda_out_pos = 0; decoder_active = 0; - if (!(PicoOpt & POPT_EN_MCD_CDDA) || f == NULL) // cdda disabled or no file? + if (!(PicoIn.opt & POPT_EN_MCD_CDDA) || f == NULL) // cdda disabled or no file? return; fseek(f, 0, SEEK_END); diff --git a/platform/gizmondo/emu.c b/platform/gizmondo/emu.c index 30c6651b..96a49cf5 100644 --- a/platform/gizmondo/emu.c +++ b/platform/gizmondo/emu.c @@ -145,7 +145,7 @@ static void blit(const char *fps, const char *notice) { int emu_opt = currentConfig.EmuOpt; - if (PicoOpt&0x10) + if (PicoIn.opt&0x10) { int lines_flags = 224; // 8bit fast renderer @@ -154,7 +154,7 @@ static void blit(const char *fps, const char *notice) vidConvCpyRGB565(localPal, Pico.cram, 0x40); } // a hack for VR - if (PicoAHW & PAHW_SVP) + if (PicoIn.AHW & PAHW_SVP) memset32((int *)(Pico.est.Draw2FB+328*8+328*223), 0xe0e0e0e0, 328); if (!(Pico.video.reg[12]&1)) lines_flags|=0x10000; if (currentConfig.EmuOpt&0x4000) @@ -196,7 +196,7 @@ static void blit(const char *fps, const char *notice) if (emu_opt & 2) osd_text(OSD_FPS_X, h, fps); } - if ((emu_opt & 0x400) && (PicoAHW & PAHW_MCD)) + if ((emu_opt & 0x400) && (PicoIn.AHW & PAHW_MCD)) cd_leds(); } @@ -220,7 +220,7 @@ static void vidResetMode(void) { giz_screen = fb_lock(1); - if (PicoOpt&0x10) { + if (PicoIn.opt&0x10) { } else if (currentConfig.EmuOpt&0x80) { PicoDrawSetOutFormat(PDF_RGB555, 0); PicoDrawSetCallbacks(EmuScanBegin16, NULL); @@ -228,7 +228,7 @@ static void vidResetMode(void) PicoDrawSetOutFormat(PDF_NONE, 0); PicoDrawSetCallbacks(EmuScanBegin8, NULL); } - if ((PicoOpt&0x10) || !(currentConfig.EmuOpt&0x80)) { + if ((PicoIn.opt&0x10) || !(currentConfig.EmuOpt&0x80)) { // setup pal for 8-bit modes localPal[0xc0] = 0x0600; localPal[0xd0] = 0xc000; @@ -278,21 +278,21 @@ static void updateSound(int len) static void SkipFrame(void) { - PicoSkipFrame=1; + PicoIn.skipFrame=1; PicoFrame(); - PicoSkipFrame=0; + PicoIn.skipFrame=0; } /* forced frame to front buffer */ void pemu_forced_frame(int no_scale, int do_emu) { - int po_old = PicoOpt; + int po_old = PicoIn.opt; int eo_old = currentConfig.EmuOpt; - PicoOpt &= ~0x10; - PicoOpt |= POPT_ACC_SPRITES; + PicoIn.opt &= ~0x10; + PicoIn.opt |= POPT_ACC_SPRITES; if (!no_scale) - PicoOpt |= POPT_EN_SOFTSCALE; + PicoIn.opt |= POPT_EN_SOFTSCALE; currentConfig.EmuOpt |= 0x80; if (giz_screen == NULL) @@ -306,7 +306,7 @@ void pemu_forced_frame(int no_scale, int do_emu) fb_unlock(); giz_screen = NULL; - PicoOpt = po_old; + PicoIn.opt = po_old; currentConfig.EmuOpt = eo_old; } @@ -350,12 +350,12 @@ static void RunEvents(unsigned int which) } if (which & 0x0400) // switch renderer { - if (PicoOpt&0x10) { PicoOpt&=~0x10; currentConfig.EmuOpt |= 0x80; } - else { PicoOpt|= 0x10; currentConfig.EmuOpt &= ~0x80; } + if (PicoIn.opt&0x10) { PicoIn.opt&=~0x10; currentConfig.EmuOpt |= 0x80; } + else { PicoIn.opt|= 0x10; currentConfig.EmuOpt &= ~0x80; } vidResetMode(); - if (PicoOpt&0x10) { + if (PicoIn.opt&0x10) { strcpy(noticeMsg, " 8bit fast renderer"); } else if (currentConfig.EmuOpt&0x80) { strcpy(noticeMsg, "16bit accurate renderer"); @@ -392,11 +392,11 @@ static void updateKeys(void) keys &= CONFIGURABLE_KEYS; - PicoPad[0] = allActions[0] & 0xfff; - PicoPad[1] = allActions[1] & 0xfff; + PicoIn.pad[0] = allActions[0] & 0xfff; + PicoIn.pad[1] = allActions[1] & 0xfff; - if (allActions[0] & 0x7000) emu_DoTurbo(&PicoPad[0], allActions[0]); - if (allActions[1] & 0x7000) emu_DoTurbo(&PicoPad[1], allActions[1]); + if (allActions[0] & 0x7000) emu_DoTurbo(&PicoIn.pad[0], allActions[0]); + if (allActions[1] & 0x7000) emu_DoTurbo(&PicoIn.pad[1], allActions[1]); events = (allActions[0] | allActions[1]) >> 16; @@ -455,8 +455,8 @@ void pemu_loop(void) // make sure we are in correct mode vidResetMode(); - if (currentConfig.scaling) PicoOpt|=0x4000; - else PicoOpt&=~0x4000; + if (currentConfig.scaling) PicoIn.opt|=0x4000; + else PicoIn.opt&=~0x4000; Pico.m.dirtyPal = 1; oldmodes = ((Pico.video.reg[12]&1)<<2) ^ 0xc; @@ -466,17 +466,17 @@ void pemu_loop(void) reset_timing = 1; // prepare CD buffer - if (PicoAHW & PAHW_MCD) PicoCDBufferInit(); + if (PicoIn.AHW & PAHW_MCD) PicoCDBufferInit(); // prepare sound stuff PsndOut = NULL; if (currentConfig.EmuOpt & 4) { int ret, snd_excess_add, stereo; - if (PsndRate != PsndRate_old || (PicoOpt&0x0b) != (PicoOpt_old&0x0b) || Pico.m.pal != pal_old) { + if (PsndRate != PsndRate_old || (PicoIn.opt&0x0b) != (PicoOpt_old&0x0b) || Pico.m.pal != pal_old) { PsndRerate(Pico.m.frame_count ? 1 : 0); } - stereo=(PicoOpt&8)>>3; + stereo=(PicoIn.opt&8)>>3; snd_excess_add = ((PsndRate - PsndLen*target_fps)<<16) / target_fps; snd_cbuf_samples = (PsndRate< 22050) co = 11; - if (PicoOpt&8) shift++; + if (PicoIn.opt&8) shift++; if (audio_skew < 0) { adj = -((-audio_skew) >> shift); if (audio_skew > -(6<>=1; @@ -674,7 +674,7 @@ void pemu_loop(void) } - if (PicoAHW & PAHW_MCD) PicoCDBufferFree(); + if (PicoIn.AHW & PAHW_MCD) PicoCDBufferFree(); if (PsndOut != NULL) { PsndOut = snd_cbuff = NULL; diff --git a/platform/gizmondo/menu.c b/platform/gizmondo/menu.c index dd6740a3..232701ad 100644 --- a/platform/gizmondo/menu.c +++ b/platform/gizmondo/menu.c @@ -501,7 +501,7 @@ static void draw_savestate_bg(int slot) } if (file) { - if (PicoAHW & 1) { + if (PicoIn.AHW & 1) { PicoCdLoadStateGfx(file); } else { areaSeek(file, 0x10020, SEEK_SET); // skip header and RAM in state file @@ -703,7 +703,7 @@ menu_entry ctrlopt_entries[] = { "Player 1", MB_NONE, MA_CTRL_PLAYER1, NULL, 0, 0, 0, 1, 0 }, { "Player 2", MB_NONE, MA_CTRL_PLAYER2, NULL, 0, 0, 0, 1, 0 }, { "Emulator controls", MB_NONE, MA_CTRL_EMU, NULL, 0, 0, 0, 1, 0 }, - { "6 button pad", MB_ONOFF, MA_OPT_6BUTTON_PAD, &PicoOpt, 0x020, 0, 0, 1, 1 }, + { "6 button pad", MB_ONOFF, MA_OPT_6BUTTON_PAD, &PicoIn.opt, 0x020, 0, 0, 1, 1 }, { "Turbo rate", MB_RANGE, MA_CTRL_TURBO_RATE, ¤tConfig.turbo_rate, 0, 1, 30, 1, 1 }, { "Done", MB_NONE, MA_CTRL_DONE, NULL, 0, 0, 0, 1, 0 }, }; @@ -756,7 +756,7 @@ static void kc_sel_loop(void) if (inp & PBTN_UP ) { menu_sel--; if (menu_sel < 0) menu_sel = menu_sel_max; } if (inp & PBTN_DOWN) { menu_sel++; if (menu_sel > menu_sel_max) menu_sel = 0; } if (inp & PBTN_PLAY) { - int is_6button = PicoOpt & 0x020; + int is_6button = PicoIn.opt & 0x020; switch (selected_id) { case MA_CTRL_PLAYER1: key_config_loop(me_ctrl_actions, is_6button ? 15 : 11, 0); return; case MA_CTRL_PLAYER2: key_config_loop(me_ctrl_actions, is_6button ? 15 : 11, 1); return; @@ -779,12 +779,12 @@ menu_entry cdopt_entries[] = { NULL, MB_NONE, MA_CDOPT_TESTBIOS_EUR, NULL, 0, 0, 0, 1, 0 }, { NULL, MB_NONE, MA_CDOPT_TESTBIOS_JAP, NULL, 0, 0, 0, 1, 0 }, { "CD LEDs", MB_ONOFF, MA_CDOPT_LEDS, ¤tConfig.EmuOpt, 0x0400, 0, 0, 1, 1 }, - { "CDDA audio", MB_ONOFF, MA_CDOPT_CDDA, &PicoOpt, 0x0800, 0, 0, 1, 1 }, - { "PCM audio", MB_ONOFF, MA_CDOPT_PCM, &PicoOpt, 0x0400, 0, 0, 1, 1 }, + { "CDDA audio", MB_ONOFF, MA_CDOPT_CDDA, &PicoIn.opt, 0x0800, 0, 0, 1, 1 }, + { "PCM audio", MB_ONOFF, MA_CDOPT_PCM, &PicoIn.opt, 0x0400, 0, 0, 1, 1 }, { NULL, MB_NONE, MA_CDOPT_READAHEAD, NULL, 0, 0, 0, 1, 1 }, - { "SaveRAM cart", MB_ONOFF, MA_CDOPT_SAVERAM, &PicoOpt, 0x8000, 0, 0, 1, 1 }, - { "Scale/Rot. fx (slow)", MB_ONOFF, MA_CDOPT_SCALEROT_CHIP,&PicoOpt, 0x1000, 0, 0, 1, 1 }, - { "Better sync (slow)", MB_ONOFF, MA_CDOPT_BETTER_SYNC, &PicoOpt, 0x2000, 0, 0, 1, 1 }, + { "SaveRAM cart", MB_ONOFF, MA_CDOPT_SAVERAM, &PicoIn.opt, 0x8000, 0, 0, 1, 1 }, + { "Scale/Rot. fx (slow)", MB_ONOFF, MA_CDOPT_SCALEROT_CHIP,&PicoIn.opt, 0x1000, 0, 0, 1, 1 }, + { "Better sync (slow)", MB_ONOFF, MA_CDOPT_BETTER_SYNC, &PicoIn.opt, 0x2000, 0, 0, 1, 1 }, { "done", MB_NONE, MA_CDOPT_DONE, NULL, 0, 0, 0, 1, 0 }, }; @@ -927,16 +927,16 @@ static void cd_menu_loop_options(void) menu_entry opt2_entries[] = { - { "Disable sprite limit", MB_ONOFF, MA_OPT2_NO_SPRITE_LIM, &PicoOpt, 0x40000, 0, 0, 1, 1 }, - { "Emulate Z80", MB_ONOFF, MA_OPT2_ENABLE_Z80, &PicoOpt, 0x00004, 0, 0, 1, 1 }, - { "Emulate YM2612 (FM)", MB_ONOFF, MA_OPT2_ENABLE_YM2612, &PicoOpt, 0x00001, 0, 0, 1, 1 }, - { "Emulate SN76496 (PSG)", MB_ONOFF, MA_OPT2_ENABLE_SN76496,&PicoOpt, 0x00002, 0, 0, 1, 1 }, + { "Disable sprite limit", MB_ONOFF, MA_OPT2_NO_SPRITE_LIM, &PicoIn.opt, 0x40000, 0, 0, 1, 1 }, + { "Emulate Z80", MB_ONOFF, MA_OPT2_ENABLE_Z80, &PicoIn.opt, 0x00004, 0, 0, 1, 1 }, + { "Emulate YM2612 (FM)", MB_ONOFF, MA_OPT2_ENABLE_YM2612, &PicoIn.opt, 0x00001, 0, 0, 1, 1 }, + { "Emulate SN76496 (PSG)", MB_ONOFF, MA_OPT2_ENABLE_SN76496,&PicoIn.opt, 0x00002, 0, 0, 1, 1 }, { "Double buffering", MB_ONOFF, MA_OPT2_DBLBUFF, ¤tConfig.EmuOpt, 0x8000, 0, 0, 1, 1 }, { "Wait for V-sync (slow)", MB_ONOFF, MA_OPT2_VSYNC, ¤tConfig.EmuOpt, 0x2000, 0, 0, 1, 1 }, { "gzip savestates", MB_ONOFF, MA_OPT2_GZIP_STATES, ¤tConfig.EmuOpt, 0x0008, 0, 0, 1, 1 }, { "Don't save last used ROM", MB_ONOFF, MA_OPT2_NO_LAST_ROM, ¤tConfig.EmuOpt, 0x0020, 0, 0, 1, 1 }, - { "SVP dynarec", MB_ONOFF, MA_OPT2_SVP_DYNAREC, &PicoOpt, 0x20000, 0, 0, 1, 1 }, - { "Disable idle loop patching",MB_ONOFF, MA_OPT2_NO_IDLE_LOOPS, &PicoOpt, 0x80000, 0, 0, 1, 1 }, + { "SVP dynarec", MB_ONOFF, MA_OPT2_SVP_DYNAREC, &PicoIn.opt, 0x20000, 0, 0, 1, 1 }, + { "Disable idle loop patching",MB_ONOFF, MA_OPT2_NO_IDLE_LOOPS, &PicoIn.opt, 0x80000, 0, 0, 1, 1 }, { "done", MB_NONE, MA_OPT2_DONE, NULL, 0, 0, 0, 1, 0 }, }; @@ -1001,7 +1001,7 @@ static void amenu_loop_options(void) menu_entry opt_entries[] = { { NULL, MB_NONE, MA_OPT_RENDERER, NULL, 0, 0, 0, 1, 1 }, - { "Accurate sprites", MB_ONOFF, MA_OPT_ACC_SPRITES, &PicoOpt, 0x080, 0, 0, 0, 1 }, + { "Accurate sprites", MB_ONOFF, MA_OPT_ACC_SPRITES, &PicoIn.opt, 0x080, 0, 0, 0, 1 }, { "Scanline mode (faster)", MB_ONOFF, MA_OPT_INTERLACED, ¤tConfig.EmuOpt, 0x4000, 0, 0, 1, 1 }, { "Scale low res mode", MB_ONOFF, MA_OPT_SCALING, ¤tConfig.scaling, 0x0001, 0, 3, 1, 1 }, { "Show FPS", MB_ONOFF, MA_OPT_SHOW_FPS, ¤tConfig.EmuOpt, 0x0002, 0, 0, 1, 1 }, @@ -1030,7 +1030,7 @@ static void menu_opt_cust_draw(const menu_entry *entry, int x, int y, void *para switch (entry->id) { case MA_OPT_RENDERER: - if (PicoOpt&0x10) + if (PicoIn.opt&0x10) str = " 8bit fast"; else if (currentConfig.EmuOpt&0x80) str = "16bit accurate"; @@ -1045,11 +1045,11 @@ static void menu_opt_cust_draw(const menu_entry *entry, int x, int y, void *para text_out16(x, y, "Frameskip %s", str24); break; case MA_OPT_SOUND_QUALITY: - str = (PicoOpt&0x08)?"stereo":"mono"; + str = (PicoIn.opt&0x08)?"stereo":"mono"; text_out16(x, y, "Sound Quality: %5iHz %s", PsndRate, str); break; case MA_OPT_REGION: - text_out16(x, y, "Region: %s", me_region_name(PicoRegionOverride, PicoAutoRgnOrder)); + text_out16(x, y, "Region: %s", me_region_name(PicoIn.regionOverride, PicoIn.autoRgnOrder)); break; case MA_OPT_CONFIRM_STATES: switch ((currentConfig.EmuOpt >> 9) & 5) { @@ -1108,31 +1108,31 @@ static void region_prevnext(int right) static int rgn_orders[] = { 0x148, 0x184, 0x814, 0x418, 0x841, 0x481 }; int i; if (right) { - if (!PicoRegionOverride) { + if (!PicoIn.regionOverride) { for (i = 0; i < 6; i++) - if (rgn_orders[i] == PicoAutoRgnOrder) break; - if (i < 5) PicoAutoRgnOrder = rgn_orders[i+1]; - else PicoRegionOverride=1; + if (rgn_orders[i] == PicoIn.autoRgnOrder) break; + if (i < 5) PicoIn.autoRgnOrder = rgn_orders[i+1]; + else PicoIn.regionOverride=1; } - else PicoRegionOverride<<=1; - if (PicoRegionOverride > 8) PicoRegionOverride = 8; + else PicoIn.regionOverride<<=1; + if (PicoIn.regionOverride > 8) PicoIn.regionOverride = 8; } else { - if (!PicoRegionOverride) { + if (!PicoIn.regionOverride) { for (i = 0; i < 6; i++) - if (rgn_orders[i] == PicoAutoRgnOrder) break; - if (i > 0) PicoAutoRgnOrder = rgn_orders[i-1]; + if (rgn_orders[i] == PicoIn.autoRgnOrder) break; + if (i > 0) PicoIn.autoRgnOrder = rgn_orders[i-1]; } - else PicoRegionOverride>>=1; + else PicoIn.regionOverride>>=1; } } static void menu_options_save(void) { - if (PicoRegionOverride) { + if (PicoIn.regionOverride) { // force setting possibly changed.. - Pico.m.pal = (PicoRegionOverride == 2 || PicoRegionOverride == 8) ? 1 : 0; + Pico.m.pal = (PicoIn.regionOverride == 2 || PicoIn.regionOverride == 8) ? 1 : 0; } - if (!(PicoOpt & 0x20)) { + if (!(PicoIn.opt & 0x20)) { // unbind XYZ MODE, just in case unbind_action(0xf00); } @@ -1162,28 +1162,28 @@ static int menu_loop_options(void) switch (selected_id) { case MA_OPT_RENDERER: if (inp & PBTN_LEFT) { - if ((PicoOpt&0x10) || !(currentConfig.EmuOpt &0x80)) { - PicoOpt&= ~0x10; + if ((PicoIn.opt&0x10) || !(currentConfig.EmuOpt &0x80)) { + PicoIn.opt&= ~0x10; currentConfig.EmuOpt |= 0x80; } } else { - if (!(PicoOpt&0x10) || (currentConfig.EmuOpt &0x80)) { - PicoOpt|= 0x10; + if (!(PicoIn.opt&0x10) || (currentConfig.EmuOpt &0x80)) { + PicoIn.opt|= 0x10; currentConfig.EmuOpt &= ~0x80; } } break; case MA_OPT_SOUND_QUALITY: if ((inp & PBTN_RIGHT) && PsndRate == 44100 && - !(PicoOpt&0x08)) + !(PicoIn.opt&0x08)) { PsndRate = 11025; - PicoOpt |= 8; + PicoIn.opt |= 8; } else if ((inp & PBTN_LEFT) && PsndRate == 11025 && - (PicoOpt&0x08) && !(PicoAHW&1)) + (PicoIn.opt&0x08) && !(PicoIn.AHW&1)) { PsndRate = 44100; - PicoOpt &= ~8; + PicoIn.opt &= ~8; } else PsndRate = sndrate_prevnext(PsndRate, inp & PBTN_RIGHT); break; diff --git a/platform/gp2x/940ctl.c b/platform/gp2x/940ctl.c index ff79c88a..99c25d64 100644 --- a/platform/gp2x/940ctl.c +++ b/platform/gp2x/940ctl.c @@ -424,7 +424,7 @@ int YM2612UpdateOne_940(int *buffer, int length, int stereo, int is_buf_empty) int mp3dec_decode(FILE *f, int *file_pos, int file_len) { - if (!(PicoOpt & POPT_EXT_FM)) { + if (!(PicoIn.opt & POPT_EXT_FM)) { //mp3_update_local(buffer, length, stereo); return 0; } @@ -456,7 +456,7 @@ int mp3dec_decode(FILE *f, int *file_pos, int file_len) int mp3dec_start(FILE *f, int fpos_start) { - if (!(PicoOpt & POPT_EXT_FM)) { + if (!(PicoIn.opt & POPT_EXT_FM)) { //mp3_start_play_local(f, pos); return -1; } diff --git a/platform/gp2x/emu.c b/platform/gp2x/emu.c index 01fd1629..3b3a234b 100644 --- a/platform/gp2x/emu.c +++ b/platform/gp2x/emu.c @@ -70,7 +70,7 @@ void pemu_prep_defconfig(void) void pemu_validate_config(void) { if (gp2x_dev_id != GP2X_DEV_GP2X) - PicoOpt &= ~POPT_EXT_FM; + PicoIn.opt &= ~POPT_EXT_FM; if (gp2x_dev_id != GP2X_DEV_WIZ) currentConfig.EmuOpt &= ~EOPT_WIZ_TEAR_FIX; @@ -83,7 +83,7 @@ void pemu_validate_config(void) static int get_renderer(void) { - if (PicoAHW & PAHW_32X) + if (PicoIn.AHW & PAHW_32X) return currentConfig.renderer32x; else return currentConfig.renderer; @@ -92,14 +92,14 @@ static int get_renderer(void) static void change_renderer(int diff) { int *r; - if (PicoAHW & PAHW_32X) + if (PicoIn.AHW & PAHW_32X) r = ¤tConfig.renderer32x; else r = ¤tConfig.renderer; *r += diff; // 8bpp fast is not there (yet?) - if ((PicoAHW & PAHW_SMS) && *r == RT_8BIT_FAST) + if ((PicoIn.AHW & PAHW_SMS) && *r == RT_8BIT_FAST) (*r)++; if (*r >= RT_COUNT) @@ -109,7 +109,7 @@ static void change_renderer(int diff) } #define is_16bit_mode() \ - (get_renderer() == RT_16BIT || (PicoAHW & PAHW_32X)) + (get_renderer() == RT_16BIT || (PicoIn.AHW & PAHW_32X)) static void (*osd_text)(int x, int y, const char *text); @@ -201,7 +201,7 @@ static void draw_pico_ptr(void) x = pico_pen_x + PICO_PEN_ADJUST_X; y = pico_pen_y + PICO_PEN_ADJUST_Y; - if (!(Pico.video.reg[12]&1) && !(PicoOpt & POPT_DIS_32C_BORDER)) + if (!(Pico.video.reg[12]&1) && !(PicoIn.opt & POPT_DIS_32C_BORDER)) x += 32; if (currentConfig.EmuOpt & EOPT_WIZ_TEAR_FIX) { @@ -231,7 +231,7 @@ static int EmuScanEnd16_rot(unsigned int num) if ((num & 3) != 3) return 0; rotated_blit16(g_screen_ptr, rot_buff, num + 1, - !(Pico.video.reg[12] & 1) && !(PicoOpt & POPT_EN_SOFTSCALE)); + !(Pico.video.reg[12] & 1) && !(PicoIn.opt & POPT_EN_SOFTSCALE)); return 0; } @@ -342,7 +342,7 @@ void pemu_finalize_frame(const char *fps, const char *notice) int emu_opt = currentConfig.EmuOpt; int ret; - if (PicoAHW & PAHW_32X) + if (PicoIn.AHW & PAHW_32X) ; // nothing to do else if (get_renderer() == RT_8BIT_FAST) { @@ -354,11 +354,11 @@ void pemu_finalize_frame(const char *fps, const char *notice) gp2x_video_setpalette(localPal, ret); } // a hack for VR - if (PicoAHW & PAHW_SVP) + if (PicoIn.AHW & PAHW_SVP) memset32((int *)(Pico.est.Draw2FB+328*8+328*223), 0xe0e0e0e0, 328); // do actual copy vidcpyM2(g_screen_ptr, Pico.est.Draw2FB+328*8, - !(Pico.video.reg[12] & 1), !(PicoOpt & POPT_DIS_32C_BORDER)); + !(Pico.video.reg[12] & 1), !(PicoIn.opt & POPT_DIS_32C_BORDER)); } else if (get_renderer() == RT_8BIT_ACC) { @@ -375,9 +375,9 @@ void pemu_finalize_frame(const char *fps, const char *notice) osd_text(4, osd_y, notice); if (emu_opt & EOPT_SHOW_FPS) osd_text(osd_fps_x, osd_y, fps); - if ((PicoAHW & PAHW_MCD) && (emu_opt & EOPT_EN_CD_LEDS)) + if ((PicoIn.AHW & PAHW_MCD) && (emu_opt & EOPT_EN_CD_LEDS)) draw_cd_leds(); - if (PicoAHW & PAHW_PICO) + if (PicoIn.AHW & PAHW_PICO) draw_pico_ptr(); } @@ -472,7 +472,7 @@ static void vid_reset_mode(void) int gp2x_mode = 16; int renderer = get_renderer(); - PicoOpt &= ~POPT_ALT_RENDERER; + PicoIn.opt &= ~POPT_ALT_RENDERER; emu_scan_begin = NULL; emu_scan_end = NULL; @@ -487,7 +487,7 @@ static void vid_reset_mode(void) gp2x_mode = 8; break; case RT_8BIT_FAST: - PicoOpt |= POPT_ALT_RENDERER; + PicoIn.opt |= POPT_ALT_RENDERER; PicoDrawSetOutFormat(PDF_NONE, 0); vidcpyM2 = vidcpy_m2; gp2x_mode = 8; @@ -497,7 +497,7 @@ static void vid_reset_mode(void) break; } - if (PicoAHW & PAHW_32X) { + if (PicoIn.AHW & PAHW_32X) { // Wiz 16bit is an exception, uses line rendering due to rotation mess if (renderer == RT_16BIT && (currentConfig.EmuOpt & EOPT_WIZ_TEAR_FIX)) { PicoDrawSetOutFormat(PDF_RGB555, 1); @@ -510,7 +510,7 @@ static void vid_reset_mode(void) } if (currentConfig.EmuOpt & EOPT_WIZ_TEAR_FIX) { - if ((PicoAHW & PAHW_32X) || renderer == RT_16BIT) { + if ((PicoIn.AHW & PAHW_32X) || renderer == RT_16BIT) { emu_scan_begin = EmuScanBegin16_rot; emu_scan_end = EmuScanEnd16_rot; } @@ -549,12 +549,12 @@ static void vid_reset_mode(void) Pico.m.dirtyPal = 1; - PicoOpt &= ~POPT_EN_SOFTSCALE; + PicoIn.opt &= ~POPT_EN_SOFTSCALE; if (currentConfig.scaling == EOPT_SCALE_SW) - PicoOpt |= POPT_EN_SOFTSCALE; + PicoIn.opt |= POPT_EN_SOFTSCALE; // palette converters for 8bit modes - make_local_pal = (PicoAHW & PAHW_SMS) ? make_local_pal_sms : make_local_pal_md; + make_local_pal = (PicoIn.AHW & PAHW_SMS) ? make_local_pal_sms : make_local_pal_md; } void emu_video_mode_change(int start_line, int line_count, int is_32cols) @@ -569,10 +569,10 @@ void emu_video_mode_change(int start_line, int line_count, int is_32cols) osd_y = 232; /* set up hwscaling here */ - PicoOpt &= ~POPT_DIS_32C_BORDER; + PicoIn.opt &= ~POPT_DIS_32C_BORDER; if (is_32cols && currentConfig.scaling == EOPT_SCALE_HW) { scalex = 256; - PicoOpt |= POPT_DIS_32C_BORDER; + PicoIn.opt |= POPT_DIS_32C_BORDER; osd_fps_x = OSD_FPS_X - 64; } @@ -607,7 +607,7 @@ void plat_video_toggle_renderer(int change, int is_menu_call) vid_reset_mode(); rendstatus_old = -1; - if (PicoAHW & PAHW_32X) + if (PicoIn.AHW & PAHW_32X) emu_status_msg(renderer_names32x[get_renderer()]); else emu_status_msg(renderer_names[get_renderer()]); @@ -626,7 +626,7 @@ static void RunEventsPico(unsigned int events) if (ret > 35000) { if (pdown_frames++ > 5) - PicoPad[0] |= 0x20; + PicoIn.pad[0] |= 0x20; pico_pen_x = px; pico_pen_y = py; @@ -654,7 +654,7 @@ void plat_update_volume(int has_changed, int is_up) gp2x_soc_t soc; soc = soc_detect(); - if ((PicoOpt & POPT_EN_STEREO) && soc == SOCID_MMSP2) + if ((PicoIn.opt & POPT_EN_STEREO) && soc == SOCID_MMSP2) need_low_volume = 1; if (has_changed) diff --git a/platform/gp2x/menu.c b/platform/gp2x/menu.c index 7d7ac93d..8e1da968 100644 --- a/platform/gp2x/menu.c +++ b/platform/gp2x/menu.c @@ -13,7 +13,7 @@ const char *men_scaling_opts[] = { "OFF", "software", "hardware", NULL }; mee_onoff ("Vsync", MA_OPT2_VSYNC, currentConfig.EmuOpt, EOPT_VSYNC), #define MENU_OPTIONS_ADV \ - mee_onoff ("Use second CPU for sound", MA_OPT_ARM940_SOUND, PicoOpt, POPT_EXT_FM), \ + mee_onoff ("Use second CPU for sound", MA_OPT_ARM940_SOUND, PicoIn.opt, POPT_EXT_FM), \ static menu_entry e_menu_adv_options[]; diff --git a/platform/libretro/libretro.c b/platform/libretro/libretro.c index 7896ffbf..33ede0d2 100644 --- a/platform/libretro/libretro.c +++ b/platform/libretro/libretro.c @@ -1115,13 +1115,13 @@ void *retro_get_memory_data(unsigned type) switch(type) { case RETRO_MEMORY_SAVE_RAM: - if (PicoAHW & PAHW_MCD) + if (PicoIn.AHW & PAHW_MCD) data = Pico_mcd->bram; else data = Pico.sv.data; break; case RETRO_MEMORY_SYSTEM_RAM: - if (PicoAHW & PAHW_SMS) + if (PicoIn.AHW & PAHW_SMS) data = PicoMem.zram; else data = PicoMem.ram; @@ -1142,7 +1142,7 @@ size_t retro_get_memory_size(unsigned type) switch(type) { case RETRO_MEMORY_SAVE_RAM: - if (PicoAHW & PAHW_MCD) + if (PicoIn.AHW & PAHW_MCD) // bram return 0x2000; @@ -1157,7 +1157,7 @@ size_t retro_get_memory_size(unsigned type) return (sum != 0) ? Pico.sv.size : 0; case RETRO_MEMORY_SYSTEM_RAM: - if (PicoAHW & PAHW_SMS) + if (PicoIn.AHW & PAHW_SMS) return 0x2000; else return sizeof(PicoMem.ram); @@ -1228,38 +1228,38 @@ static void update_variables(void) var.key = "picodrive_sprlim"; if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { if (strcmp(var.value, "enabled") == 0) - PicoOpt |= POPT_DIS_SPRITE_LIM; + PicoIn.opt |= POPT_DIS_SPRITE_LIM; else - PicoOpt &= ~POPT_DIS_SPRITE_LIM; + PicoIn.opt &= ~POPT_DIS_SPRITE_LIM; } var.value = NULL; var.key = "picodrive_ramcart"; if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { if (strcmp(var.value, "enabled") == 0) - PicoOpt |= POPT_EN_MCD_RAMCART; + PicoIn.opt |= POPT_EN_MCD_RAMCART; else - PicoOpt &= ~POPT_EN_MCD_RAMCART; + PicoIn.opt &= ~POPT_EN_MCD_RAMCART; } - OldPicoRegionOverride = PicoRegionOverride; + OldPicoRegionOverride = PicoIn.regionOverride; var.value = NULL; var.key = "picodrive_region"; if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { if (strcmp(var.value, "Auto") == 0) - PicoRegionOverride = 0; + PicoIn.regionOverride = 0; else if (strcmp(var.value, "Japan NTSC") == 0) - PicoRegionOverride = 1; + PicoIn.regionOverride = 1; else if (strcmp(var.value, "Japan PAL") == 0) - PicoRegionOverride = 2; + PicoIn.regionOverride = 2; else if (strcmp(var.value, "US") == 0) - PicoRegionOverride = 4; + PicoIn.regionOverride = 4; else if (strcmp(var.value, "Europe") == 0) - PicoRegionOverride = 8; + PicoIn.regionOverride = 8; } // Update region, fps and sound flags if needed - if (Pico.rom && PicoRegionOverride != OldPicoRegionOverride) + if (Pico.rom && PicoIn.regionOverride != OldPicoRegionOverride) { PicoDetectRegion(); PicoLoopPrepare(); @@ -1300,14 +1300,14 @@ static void update_variables(void) var.key = "picodrive_drc"; if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { if (strcmp(var.value, "enabled") == 0) - PicoOpt |= POPT_EN_DRC; + PicoIn.opt |= POPT_EN_DRC; else - PicoOpt &= ~POPT_EN_DRC; + PicoIn.opt &= ~POPT_EN_DRC; } #endif #ifdef _3DS if(!ctr_svchack_successful) - PicoOpt &= ~POPT_EN_DRC; + PicoIn.opt &= ~POPT_EN_DRC; #endif } @@ -1321,11 +1321,11 @@ void retro_run(void) input_poll_cb(); - PicoPad[0] = PicoPad[1] = 0; + PicoIn.pad[0] = PicoIn.pad[1] = 0; for (pad = 0; pad < 2; pad++) for (i = 0; i < RETRO_PICO_MAP_LEN; i++) if (input_state_cb(pad, RETRO_DEVICE_JOYPAD, 0, i)) - PicoPad[pad] |= retro_pico_map[i]; + PicoIn.pad[pad] |= retro_pico_map[i]; PicoPatchApply(); PicoFrame(); @@ -1355,7 +1355,7 @@ void retro_init(void) sceBlock = getVMBlock(); #endif - PicoOpt = POPT_EN_STEREO|POPT_EN_FM|POPT_EN_PSG|POPT_EN_Z80 + PicoIn.opt = POPT_EN_STEREO|POPT_EN_FM|POPT_EN_PSG|POPT_EN_Z80 | POPT_EN_MCD_PCM|POPT_EN_MCD_CDDA|POPT_EN_MCD_GFX | POPT_EN_32X|POPT_EN_PWM | POPT_ACC_SPRITES|POPT_DIS_32C_BORDER; @@ -1363,10 +1363,10 @@ void retro_init(void) #ifdef _3DS if (ctr_svchack_successful) #endif - PicoOpt |= POPT_EN_DRC; + PicoIn.opt |= POPT_EN_DRC; #endif PsndRate = 44100; - PicoAutoRgnOrder = 0x184; // US, EU, JP + PicoIn.autoRgnOrder = 0x184; // US, EU, JP vout_width = 320; vout_height = 240; diff --git a/platform/linux/emu.c b/platform/linux/emu.c index aee8d44c..ddde8dab 100644 --- a/platform/linux/emu.c +++ b/platform/linux/emu.c @@ -29,10 +29,8 @@ void pemu_prep_defconfig(void) void pemu_validate_config(void) { - extern int PicoOpt; -// PicoOpt &= ~POPT_EXT_FM; #ifndef __arm__ - PicoOpt &= ~POPT_EN_DRC; + PicoIn.opt &= ~POPT_EN_DRC; #endif } @@ -68,7 +66,7 @@ static void draw_cd_leds(void) void pemu_finalize_frame(const char *fps, const char *notice) { - if (currentConfig.renderer != RT_16BIT && !(PicoAHW & PAHW_32X)) { + if (currentConfig.renderer != RT_16BIT && !(PicoIn.AHW & PAHW_32X)) { unsigned short *pd = (unsigned short *)g_screen_ptr + 8 * g_screen_width; unsigned char *ps = Pico.est.Draw2FB + 328*8 + 8; unsigned short *pal = Pico.est.HighPal; @@ -86,7 +84,7 @@ void pemu_finalize_frame(const char *fps, const char *notice) if (currentConfig.EmuOpt & EOPT_SHOW_FPS) emu_osd_text16(g_screen_width - 60, g_screen_height - 8, fps); } - if ((PicoAHW & PAHW_MCD) && (currentConfig.EmuOpt & EOPT_EN_CD_LEDS)) + if ((PicoIn.AHW & PAHW_MCD) && (currentConfig.EmuOpt & EOPT_EN_CD_LEDS)) draw_cd_leds(); } @@ -94,22 +92,22 @@ static void apply_renderer(void) { switch (currentConfig.renderer) { case RT_16BIT: - PicoOpt &= ~POPT_ALT_RENDERER; + PicoIn.opt &= ~POPT_ALT_RENDERER; PicoDrawSetOutFormat(PDF_RGB555, 0); PicoDrawSetOutBuf(g_screen_ptr, g_screen_width * 2); break; case RT_8BIT_ACC: - PicoOpt &= ~POPT_ALT_RENDERER; + PicoIn.opt &= ~POPT_ALT_RENDERER; PicoDrawSetOutFormat(PDF_8BIT, 0); PicoDrawSetOutBuf(Pico.est.Draw2FB + 8, 328); break; case RT_8BIT_FAST: - PicoOpt |= POPT_ALT_RENDERER; + PicoIn.opt |= POPT_ALT_RENDERER; PicoDrawSetOutFormat(PDF_NONE, 0); break; } - if (PicoAHW & PAHW_32X) + if (PicoIn.AHW & PAHW_32X) PicoDrawSetOutBuf(g_screen_ptr, g_screen_width * 2); } diff --git a/platform/pandora/plat.c b/platform/pandora/plat.c index 2e9457a4..a2e70eb8 100644 --- a/platform/pandora/plat.c +++ b/platform/pandora/plat.c @@ -168,7 +168,7 @@ void pemu_finalize_frame(const char *fps, const char *notice) emu_osd_text16(2, g_osd_y, notice); if (fps && fps[0] && (currentConfig.EmuOpt & EOPT_SHOW_FPS)) emu_osd_text16(g_osd_fps_x, g_osd_y, fps); - if ((PicoAHW & PAHW_MCD) && (currentConfig.EmuOpt & EOPT_EN_CD_LEDS)) + if ((PicoIn.AHW & PAHW_MCD) && (currentConfig.EmuOpt & EOPT_EN_CD_LEDS)) draw_cd_leds(); } diff --git a/platform/psp/emu.c b/platform/psp/emu.c index 8373a21a..8f77f395 100644 --- a/platform/psp/emu.c +++ b/platform/psp/emu.c @@ -309,7 +309,7 @@ static void blitscreen_clut(void) blit_16bit_mode = 0; } - if ((PicoOpt&0x10) && Pico.m.dirtyPal) + if ((PicoIn.opt&0x10) && Pico.m.dirtyPal) do_pal_update(0, 0); sceKernelDcacheWritebackAll(); @@ -395,7 +395,7 @@ static void dbg_text(void) /* called after rendering is done, but frame emulation is not finished */ void blit1(void) { - if (PicoOpt&0x10) + if (PicoIn.opt&0x10) { int i; unsigned char *pd; @@ -406,7 +406,7 @@ void blit1(void) memset32((int *)pd, 0xe0e0e0e0, 320/4); } - if (PicoAHW & PAHW_PICO) + if (PicoIn.AHW & PAHW_PICO) draw_pico_ptr(); blitscreen_clut(); @@ -424,7 +424,7 @@ static void blit2(const char *fps, const char *notice, int lagging_behind) //dbg_text(); - if ((emu_opt & 0x400) && (PicoAHW & PAHW_MCD)) + if ((emu_opt & 0x400) && (PicoIn.AHW & PAHW_MCD)) cd_leds(); if (currentConfig.EmuOpt & 0x2000) { // want vsync @@ -571,10 +571,10 @@ void pemu_sound_start(void) samples_made = samples_done = 0; - if (PsndRate != PsndRate_old || (PicoOpt&0x0b) != (PicoOpt_old&0x0b) || Pico.m.pal != pal_old) { + if (PsndRate != PsndRate_old || (PicoIn.opt&0x0b) != (PicoOpt_old&0x0b) || Pico.m.pal != pal_old) { PsndRerate(Pico.m.frame_count ? 1 : 0); } - stereo=(PicoOpt&8)>>3; + stereo=(PicoIn.opt&8)>>3; samples_block = Pico.m.pal ? SOUND_BLOCK_SIZE_PAL : SOUND_BLOCK_SIZE_NTSC; if (PsndRate <= 22050) samples_block /= 2; @@ -597,7 +597,7 @@ void pemu_sound_start(void) samples_made = samples_block; // send 1 empty block first.. PsndOut = sndBuffer; PsndRate_old = PsndRate; - PicoOpt_old = PicoOpt; + PicoOpt_old = PicoIn.opt; pal_old = Pico.m.pal; } } @@ -664,20 +664,20 @@ static void writeSound(int len) static void SkipFrame(void) { - PicoSkipFrame=1; + PicoIn.skipFrame=1; PicoFrame(); - PicoSkipFrame=0; + PicoIn.skipFrame=0; } void pemu_forced_frame(int no_scale, int do_emu) { - int po_old = PicoOpt; + int po_old = PicoIn.opt; int eo_old = currentConfig.EmuOpt; - PicoOpt &= ~POPT_ALT_RENDERER; - PicoOpt |= POPT_ACC_SPRITES; + PicoIn.opt &= ~POPT_ALT_RENDERER; + PicoIn.opt |= POPT_ACC_SPRITES; if (!no_scale) - PicoOpt |= POPT_EN_SOFTSCALE; + PicoIn.opt |= POPT_EN_SOFTSCALE; currentConfig.EmuOpt |= 0x80; vidResetMode(); @@ -692,7 +692,7 @@ void pemu_forced_frame(int no_scale, int do_emu) blit1(); sceGuSync(0,0); - PicoOpt = po_old; + PicoIn.opt = po_old; currentConfig.EmuOpt = eo_old; } @@ -703,7 +703,7 @@ static void RunEventsPico(unsigned int events, unsigned int keys) if (pico_inp_mode != 0) { - PicoPad[0] &= ~0x0f; // release UDLR + PicoIn.pad[0] &= ~0x0f; // release UDLR if (keys & PBTN_UP) { pico_pen_y--; if (pico_pen_y < 8) pico_pen_y = 8; } if (keys & PBTN_DOWN) { pico_pen_y++; if (pico_pen_y > 224-PICO_PEN_ADJUST_Y) pico_pen_y = 224-PICO_PEN_ADJUST_Y; } if (keys & PBTN_LEFT) { pico_pen_x--; if (pico_pen_x < 0) pico_pen_x = 0; } @@ -754,12 +754,12 @@ static void RunEvents(unsigned int which) } if (which & 0x0400) // switch renderer { - if (PicoOpt&0x10) { PicoOpt&=~0x10; currentConfig.EmuOpt |= 0x80; } - else { PicoOpt|= 0x10; currentConfig.EmuOpt &= ~0x80; } + if (PicoIn.opt&0x10) { PicoIn.opt&=~0x10; currentConfig.EmuOpt |= 0x80; } + else { PicoIn.opt|= 0x10; currentConfig.EmuOpt &= ~0x80; } vidResetMode(); - if (PicoOpt & POPT_ALT_RENDERER) + if (PicoIn.opt & POPT_ALT_RENDERER) emu_status_msg("fast renderer"); else if (currentConfig.EmuOpt&0x80) emu_status_msg("accurate renderer"); @@ -794,11 +794,11 @@ static void updateKeys(void) keys &= CONFIGURABLE_KEYS; - PicoPad[0] = allActions[0] & 0xfff; - PicoPad[1] = allActions[1] & 0xfff; + PicoIn.pad[0] = allActions[0] & 0xfff; + PicoIn.pad[1] = allActions[1] & 0xfff; - if (allActions[0] & 0x7000) emu_DoTurbo(&PicoPad[0], allActions[0]); - if (allActions[1] & 0x7000) emu_DoTurbo(&PicoPad[1], allActions[1]); + if (allActions[0] & 0x7000) emu_DoTurbo(&PicoIn.pad[0], allActions[0]); + if (allActions[1] & 0x7000) emu_DoTurbo(&PicoIn.pad[1], allActions[1]); events = (allActions[0] | allActions[1]) >> 16; @@ -809,7 +809,7 @@ static void updateKeys(void) events &= ~prevEvents; - if (PicoAHW == PAHW_PICO) + if (PicoIn.AHW == PAHW_PICO) RunEventsPico(events, keys); if (events) RunEvents(events); if (movie_data) emu_updateMovie(); @@ -861,7 +861,7 @@ void pemu_loop(void) target_frametime = Pico.m.pal ? (1000000<<8)/50 : (1000000<<8)/60+1; reset_timing = 1; - if (PicoAHW & PAHW_MCD) { + if (PicoIn.AHW & PAHW_MCD) { // prepare CD buffer PicoCDBufferInit(); // mp3... @@ -986,7 +986,7 @@ void pemu_loop(void) updateKeys(); - if (!(PicoOpt&0x10)) + if (!(PicoIn.opt&0x10)) EmuScanPrepare(); PicoFrame(); @@ -1019,7 +1019,7 @@ void pemu_loop(void) emu_set_fastforward(0); - if (PicoAHW & PAHW_MCD) PicoCDBufferFree(); + if (PicoIn.AHW & PAHW_MCD) PicoCDBufferFree(); if (PsndOut != NULL) { pemu_sound_stop(); @@ -1039,7 +1039,7 @@ void pemu_loop(void) void emu_HandleResume(void) { - if (!(PicoAHW & PAHW_MCD)) return; + if (!(PicoIn.AHW & PAHW_MCD)) return; // reopen first CD track if (Pico_mcd->TOC.Tracks[0].F != NULL) diff --git a/platform/psp/menu.c b/platform/psp/menu.c index f01f0ae5..de63d345 100644 --- a/platform/psp/menu.c +++ b/platform/psp/menu.c @@ -506,7 +506,7 @@ static void draw_savestate_bg(int slot) } if (file) { - if (PicoAHW & PAHW_MCD) { + if (PicoIn.AHW & PAHW_MCD) { PicoCdLoadStateGfx(file); } else { areaSeek(file, 0x10020, SEEK_SET); // skip header and RAM in state file @@ -708,7 +708,7 @@ menu_entry ctrlopt_entries[] = { "Player 1", MB_NONE, MA_CTRL_PLAYER1, NULL, 0, 0, 0, 1, 0 }, { "Player 2", MB_NONE, MA_CTRL_PLAYER2, NULL, 0, 0, 0, 1, 0 }, { "Emulator controls", MB_NONE, MA_CTRL_EMU, NULL, 0, 0, 0, 1, 0 }, - { "6 button pad", MB_ONOFF, MA_OPT_6BUTTON_PAD, &PicoOpt, 0x020, 0, 0, 1, 1 }, + { "6 button pad", MB_ONOFF, MA_OPT_6BUTTON_PAD, &PicoIn.opt, 0x020, 0, 0, 1, 1 }, { "Turbo rate", MB_RANGE, MA_CTRL_TURBO_RATE, ¤tConfig.turbo_rate, 0, 1, 30, 1, 1 }, { "Done", MB_NONE, MA_CTRL_DONE, NULL, 0, 0, 0, 1, 0 }, }; @@ -763,7 +763,7 @@ static void kc_sel_loop(void) if (inp & PBTN_UP ) { menu_sel--; if (menu_sel < 0) menu_sel = menu_sel_max; } if (inp & PBTN_DOWN) { menu_sel++; if (menu_sel > menu_sel_max) menu_sel = 0; } if (inp & PBTN_CIRCLE) { - int is_6button = PicoOpt & POPT_6BTN_PAD; + int is_6button = PicoIn.opt & POPT_6BTN_PAD; switch (selected_id) { case MA_CTRL_PLAYER1: key_config_loop(me_ctrl_actions, is_6button ? 15 : 11, 0); return; case MA_CTRL_PLAYER2: key_config_loop(me_ctrl_actions, is_6button ? 15 : 11, 1); return; @@ -786,12 +786,12 @@ menu_entry cdopt_entries[] = { NULL, MB_NONE, MA_CDOPT_TESTBIOS_EUR, NULL, 0, 0, 0, 1, 0 }, { NULL, MB_NONE, MA_CDOPT_TESTBIOS_JAP, NULL, 0, 0, 0, 1, 0 }, { "CD LEDs", MB_ONOFF, MA_CDOPT_LEDS, ¤tConfig.EmuOpt, 0x0400, 0, 0, 1, 1 }, - { "CDDA audio", MB_ONOFF, MA_CDOPT_CDDA, &PicoOpt, 0x0800, 0, 0, 1, 1 }, - { "PCM audio", MB_ONOFF, MA_CDOPT_PCM, &PicoOpt, 0x0400, 0, 0, 1, 1 }, + { "CDDA audio", MB_ONOFF, MA_CDOPT_CDDA, &PicoIn.opt, 0x0800, 0, 0, 1, 1 }, + { "PCM audio", MB_ONOFF, MA_CDOPT_PCM, &PicoIn.opt, 0x0400, 0, 0, 1, 1 }, { NULL, MB_NONE, MA_CDOPT_READAHEAD, NULL, 0, 0, 0, 1, 1 }, - { "SaveRAM cart", MB_ONOFF, MA_CDOPT_SAVERAM, &PicoOpt, 0x8000, 0, 0, 1, 1 }, - { "Scale/Rot. fx (slow)", MB_ONOFF, MA_CDOPT_SCALEROT_CHIP,&PicoOpt, 0x1000, 0, 0, 1, 1 }, - { "Better sync (slow)", MB_ONOFF, MA_CDOPT_BETTER_SYNC, &PicoOpt, 0x2000, 0, 0, 1, 1 }, + { "SaveRAM cart", MB_ONOFF, MA_CDOPT_SAVERAM, &PicoIn.opt, 0x8000, 0, 0, 1, 1 }, + { "Scale/Rot. fx (slow)", MB_ONOFF, MA_CDOPT_SCALEROT_CHIP,&PicoIn.opt, 0x1000, 0, 0, 1, 1 }, + { "Better sync (slow)", MB_ONOFF, MA_CDOPT_BETTER_SYNC, &PicoIn.opt, 0x2000, 0, 0, 1, 1 }, { "done", MB_NONE, MA_CDOPT_DONE, NULL, 0, 0, 0, 1, 0 }, }; @@ -1115,14 +1115,14 @@ static void dispmenu_loop_options(void) menu_entry opt2_entries[] = { - { "Disable sprite limit", MB_ONOFF, MA_OPT2_NO_SPRITE_LIM, &PicoOpt, 0x40000, 0, 0, 1, 1 }, - { "Emulate Z80", MB_ONOFF, MA_OPT2_ENABLE_Z80, &PicoOpt, 0x00004, 0, 0, 1, 1 }, - { "Emulate YM2612 (FM)", MB_ONOFF, MA_OPT2_ENABLE_YM2612, &PicoOpt, 0x00001, 0, 0, 1, 1 }, - { "Emulate SN76496 (PSG)", MB_ONOFF, MA_OPT2_ENABLE_SN76496, &PicoOpt, 0x00002, 0, 0, 1, 1 }, + { "Disable sprite limit", MB_ONOFF, MA_OPT2_NO_SPRITE_LIM, &PicoIn.opt, 0x40000, 0, 0, 1, 1 }, + { "Emulate Z80", MB_ONOFF, MA_OPT2_ENABLE_Z80, &PicoIn.opt, 0x00004, 0, 0, 1, 1 }, + { "Emulate YM2612 (FM)", MB_ONOFF, MA_OPT2_ENABLE_YM2612, &PicoIn.opt, 0x00001, 0, 0, 1, 1 }, + { "Emulate SN76496 (PSG)", MB_ONOFF, MA_OPT2_ENABLE_SN76496, &PicoIn.opt, 0x00002, 0, 0, 1, 1 }, { "gzip savestates", MB_ONOFF, MA_OPT2_GZIP_STATES, ¤tConfig.EmuOpt, 0x00008, 0, 0, 1, 1 }, { "Don't save last used ROM", MB_ONOFF, MA_OPT2_NO_LAST_ROM, ¤tConfig.EmuOpt, 0x00020, 0, 0, 1, 1 }, { "Status line in main menu", MB_ONOFF, MA_OPT2_STATUS_LINE, ¤tConfig.EmuOpt, 0x20000, 0, 0, 1, 1 }, - { "Disable idle loop patching",MB_ONOFF, MA_OPT2_NO_IDLE_LOOPS, &PicoOpt, 0x80000, 0, 0, 1, 1 }, + { "Disable idle loop patching",MB_ONOFF, MA_OPT2_NO_IDLE_LOOPS, &PicoIn.opt, 0x80000, 0, 0, 1, 1 }, { "Disable frame limiter", MB_ONOFF, MA_OPT2_NO_FRAME_LIMIT, ¤tConfig.EmuOpt, 0x40000, 0, 0, 1, 1 }, { "done", MB_NONE, MA_OPT2_DONE, NULL, 0, 0, 0, 1, 0 }, }; @@ -1182,7 +1182,7 @@ static void amenu_loop_options(void) menu_entry opt_entries[] = { { NULL, MB_NONE, MA_OPT_RENDERER, NULL, 0, 0, 0, 1, 1 }, - { "Accurate sprites", MB_ONOFF, MA_OPT_ACC_SPRITES, &PicoOpt, 0x080, 0, 0, 0, 1 }, + { "Accurate sprites", MB_ONOFF, MA_OPT_ACC_SPRITES, &PicoIn.opt, 0x080, 0, 0, 0, 1 }, { "Show FPS", MB_ONOFF, MA_OPT_SHOW_FPS, ¤tConfig.EmuOpt, 0x0002, 0, 0, 1, 1 }, { NULL, MB_RANGE, MA_OPT_FRAMESKIP, ¤tConfig.Frameskip, 0, -1, 16, 1, 1 }, { "Enable sound", MB_ONOFF, MA_OPT_ENABLE_SOUND, ¤tConfig.EmuOpt, 0x0004, 0, 0, 1, 1 }, @@ -1211,7 +1211,7 @@ static void menu_opt_cust_draw(const menu_entry *entry, int x, int y, void *para switch (entry->id) { case MA_OPT_RENDERER: - if (PicoOpt & 0x10) + if (PicoIn.opt & 0x10) str = "fast"; else if (currentConfig.EmuOpt & 0x80) str = "accurate"; @@ -1226,11 +1226,11 @@ static void menu_opt_cust_draw(const menu_entry *entry, int x, int y, void *para text_out16(x, y, "Frameskip %s", str24); break; case MA_OPT_SOUND_QUALITY: - str = (PicoOpt&0x08)?"stereo":"mono"; + str = (PicoIn.opt&0x08)?"stereo":"mono"; text_out16(x, y, "Sound Quality: %5iHz %s", PsndRate, str); break; case MA_OPT_REGION: - text_out16(x, y, "Region: %s", me_region_name(PicoRegionOverride, PicoAutoRgnOrder)); + text_out16(x, y, "Region: %s", me_region_name(PicoIn.regionOverride, PicoIn.autoRgnOrder)); break; case MA_OPT_CONFIRM_STATES: switch ((currentConfig.EmuOpt >> 9) & 5) { @@ -1291,31 +1291,31 @@ static void region_prevnext(int right) static int rgn_orders[] = { 0x148, 0x184, 0x814, 0x418, 0x841, 0x481 }; int i; if (right) { - if (!PicoRegionOverride) { + if (!PicoIn.regionOverride) { for (i = 0; i < 6; i++) - if (rgn_orders[i] == PicoAutoRgnOrder) break; - if (i < 5) PicoAutoRgnOrder = rgn_orders[i+1]; - else PicoRegionOverride=1; + if (rgn_orders[i] == PicoIn.autoRgnOrder) break; + if (i < 5) PicoIn.autoRgnOrder = rgn_orders[i+1]; + else PicoIn.regionOverride=1; } - else PicoRegionOverride<<=1; - if (PicoRegionOverride > 8) PicoRegionOverride = 8; + else PicoIn.regionOverride<<=1; + if (PicoIn.regionOverride > 8) PicoIn.regionOverride = 8; } else { - if (!PicoRegionOverride) { + if (!PicoIn.regionOverride) { for (i = 0; i < 6; i++) - if (rgn_orders[i] == PicoAutoRgnOrder) break; - if (i > 0) PicoAutoRgnOrder = rgn_orders[i-1]; + if (rgn_orders[i] == PicoIn.autoRgnOrder) break; + if (i > 0) PicoIn.autoRgnOrder = rgn_orders[i-1]; } - else PicoRegionOverride>>=1; + else PicoIn.regionOverride>>=1; } } static void menu_options_save(void) { - if (PicoRegionOverride) { + if (PicoIn.regionOverride) { // force setting possibly changed.. - Pico.m.pal = (PicoRegionOverride == 2 || PicoRegionOverride == 8) ? 1 : 0; + Pico.m.pal = (PicoIn.regionOverride == 2 || PicoIn.regionOverride == 8) ? 1 : 0; } - if (!(PicoOpt & POPT_6BTN_PAD)) { + if (!(PicoIn.opt & POPT_6BTN_PAD)) { // unbind XYZ MODE, just in case unbind_action(0xf00); } @@ -1344,11 +1344,11 @@ static int menu_loop_options(void) if (!me_process(opt_entries, OPT_ENTRY_COUNT, selected_id, (inp&PBTN_RIGHT) ? 1 : 0)) { switch (selected_id) { case MA_OPT_RENDERER: - if ((PicoOpt & 0x10) || !(currentConfig.EmuOpt & 0x80)) { - PicoOpt &= ~0x10; + if ((PicoIn.opt & 0x10) || !(currentConfig.EmuOpt & 0x80)) { + PicoIn.opt &= ~0x10; currentConfig.EmuOpt |= 0x80; } else { - PicoOpt |= 0x10; + PicoIn.opt |= 0x10; currentConfig.EmuOpt &= ~0x80; } break; diff --git a/platform/psp/mp3.c b/platform/psp/mp3.c index 6cffc629..4ea3bdd7 100644 --- a/platform/psp/mp3.c +++ b/platform/psp/mp3.c @@ -466,7 +466,7 @@ int mp3_get_offset(void) // 0-1023 unsigned int offs1024 = 0; int cdda_on; - cdda_on = (PicoAHW & PAHW_MCD) && (PicoOpt&0x800) && !(Pico_mcd->s68k_regs[0x36] & 1) && + cdda_on = (PicoIn.AHW & PAHW_MCD) && (PicoIn.opt&0x800) && !(Pico_mcd->s68k_regs[0x36] & 1) && (Pico_mcd->scd.Status_CDC & 1) && mp3_handle >= 0; if (cdda_on) { diff --git a/platform/win32/main.c b/platform/win32/main.c index cf2523a2..eb55e0e0 100644 --- a/platform/win32/main.c +++ b/platform/win32/main.c @@ -131,7 +131,7 @@ static HBITMAP png2hb(const char *fname, int is_480) static void PrepareForROM(void) { unsigned char *rom_data = NULL; - int i, ret, show = PicoAHW & PAHW_PICO; + int i, ret, show = PicoIn.AHW & PAHW_PICO; PicoGetInternal(PI_ROM, (pint_ret_t *) &rom_data); EnableMenuItem(mmain, 2, MF_BYPOSITION|(show ? MF_ENABLED : MF_GRAYED)); diff --git a/platform/win32/plat.c b/platform/win32/plat.c index e8f53898..a9cceb79 100644 --- a/platform/win32/plat.c +++ b/platform/win32/plat.c @@ -123,7 +123,7 @@ static int sndbuff[2*44100/50/2 + 4]; static void update_sound(int len) { /* avoid writing audio when lagging behind to prevent audio lag */ - if (PicoSkipFrame != 2) + if (PicoIn.skipFrame != 2) DSoundUpdate(sndbuff, (currentConfig.EmuOpt & EOPT_NO_FRMLIMIT) ? 0 : 1); } @@ -139,7 +139,7 @@ void pemu_sound_start(void) { PsndRerate(0); - ret = DSoundInit(FrameWnd, PsndRate, (PicoOpt & POPT_EN_STEREO) ? 1 : 0, PsndLen); + ret = DSoundInit(FrameWnd, PsndRate, (PicoIn.opt & POPT_EN_STEREO) ? 1 : 0, PsndLen); if (ret != 0) { lprintf("dsound init failed\n"); return; diff --git a/tools/mkoffsets.c b/tools/mkoffsets.c index 7e57383a..297a1969 100644 --- a/tools/mkoffsets.c +++ b/tools/mkoffsets.c @@ -58,7 +58,7 @@ int main(int argc, char *argv[]) DUMP_EST(f, Pico); DUMP_EST(f, PicoMem_vram); DUMP_EST(f, PicoMem_cram); - DUMP_EST(f, PicoOpt); + DUMP_EST(f, PicoIn.opt); DUMP_EST(f, Draw2FB); DUMP_EST(f, HighPal); DUMP_PMEM(f, vram); From 35f2b65ef708e7afc922ceda8d00b716de289610 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 20 Oct 2017 00:41:12 +0300 Subject: [PATCH 0128/1110] add 68k overclocking support --- pico/pico.h | 1 + pico/pico_cmn.c | 13 +++++++++++-- pico/pico_port.h | 2 ++ platform/common/emu.c | 1 + platform/common/emu.h | 1 + platform/common/menu_pico.c | 6 ++++++ platform/common/menu_pico.h | 1 + platform/libretro/libretro.c | 10 ++++++++++ 8 files changed, 33 insertions(+), 2 deletions(-) diff --git a/pico/pico.h b/pico/pico.h index be02ef3a..2d63d184 100644 --- a/pico/pico.h +++ b/pico/pico.h @@ -96,6 +96,7 @@ typedef struct unsigned short autoRgnOrder; // packed priority list of regions, for example 0x148 means this detection order: EUR, USA, JAP unsigned short quirks; // game-specific quirks: PQUIRK_* + unsigned short overclockM68k; // overclock the emulated 68k, in % } PicoInterface; extern PicoInterface PicoIn; diff --git a/pico/pico_cmn.c b/pico/pico_cmn.c index 50b8ced1..fc12a767 100644 --- a/pico/pico_cmn.c +++ b/pico/pico_cmn.c @@ -81,7 +81,7 @@ static void do_timing_hacks_as(struct PicoVideo *pv, int vdp_slots) static void do_timing_hacks_vb(void) { - if (Pico.m.dma_xfers) + if (unlikely(Pico.m.dma_xfers)) SekCyclesBurn(CheckDMA()); } @@ -272,7 +272,7 @@ static int PicoFrameHints(void) PAD_DELAY(); - if ((pv->status & PVS_ACTIVE) && --hint < 0) + if (unlikely(pv->status & PVS_ACTIVE) && --hint < 0) { hint = pv->reg[10]; // Reload H-Int counter do_hint(pv); @@ -287,6 +287,15 @@ static int PicoFrameHints(void) pevt_log_m68k_o(EVT_NEXT_LINE); } + if (unlikely(PicoIn.overclockM68k)) { + unsigned int l = PicoIn.overclockM68k * lines / 100; + while (l-- > 0) { + Pico.t.m68c_cnt -= CYCLES_M68K_LINE; + do_timing_hacks_vb(); + SekSyncM68k(); + } + } + pv->status &= ~(SR_VB | PVS_VB2); pv->status |= ((pv->reg[1] >> 3) ^ SR_VB) & SR_VB; // forced blanking diff --git a/pico/pico_port.h b/pico/pico_port.h index 605778d8..70802202 100644 --- a/pico/pico_port.h +++ b/pico/pico_port.h @@ -10,9 +10,11 @@ #ifdef __GNUC__ #define NOINLINE __attribute__((noinline)) #define ALIGNED(n) __attribute__((aligned(n))) +#define unlikely(x) __builtin_expect((x), 0) #else #define NOINLINE #define ALIGNED(n) +#define unlikely(x) (x) #endif #ifdef _MSC_VER diff --git a/platform/common/emu.c b/platform/common/emu.c index 822fec73..407ed599 100644 --- a/platform/common/emu.c +++ b/platform/common/emu.c @@ -652,6 +652,7 @@ int emu_read_config(const char *rom_fname, int no_defaults) } pemu_validate_config(); + PicoIn.overclockM68k = currentConfig.overclock_68k; // some sanity checks #ifdef PSP diff --git a/platform/common/emu.h b/platform/common/emu.h index 6e7c3991..9a5ae660 100644 --- a/platform/common/emu.h +++ b/platform/common/emu.h @@ -74,6 +74,7 @@ typedef struct _currentConfig_t { int analog_deadzone; int msh2_khz; int ssh2_khz; + int overclock_68k; } currentConfig_t; extern currentConfig_t currentConfig, defaultConfig; diff --git a/platform/common/menu_pico.c b/platform/common/menu_pico.c index bd2b9159..1d73d4a7 100644 --- a/platform/common/menu_pico.c +++ b/platform/common/menu_pico.c @@ -488,10 +488,13 @@ static int menu_loop_32x_options(int id, int keys) // ------------ adv options menu ------------ +static const char h_ovrclk[] = "Will break some games, keep at 0"; + static menu_entry e_menu_adv_options[] = { mee_onoff ("SRAM/BRAM saves", MA_OPT_SRAM_STATES, currentConfig.EmuOpt, EOPT_EN_SRAM), mee_onoff ("Disable sprite limit", MA_OPT2_NO_SPRITE_LIM, PicoIn.opt, POPT_DIS_SPRITE_LIM), + mee_range_h ("Overclock M68k (%)", MA_OPT2_OVERCLOCK_M68K,currentConfig.overclock_68k, 0, 1000, h_ovrclk), mee_onoff ("Emulate Z80", MA_OPT2_ENABLE_Z80, PicoIn.opt, POPT_EN_Z80), mee_onoff ("Emulate YM2612 (FM)", MA_OPT2_ENABLE_YM2612, PicoIn.opt, POPT_EN_FM), mee_onoff ("Emulate SN76496 (PSG)", MA_OPT2_ENABLE_SN76496,PicoIn.opt, POPT_EN_PSG), @@ -508,7 +511,10 @@ static menu_entry e_menu_adv_options[] = static int menu_loop_adv_options(int id, int keys) { static int sel = 0; + me_loop(e_menu_adv_options, &sel); + PicoIn.overclockM68k = currentConfig.overclock_68k; // int vs short + return 0; } diff --git a/platform/common/menu_pico.h b/platform/common/menu_pico.h index c5edde3d..595989e8 100644 --- a/platform/common/menu_pico.h +++ b/platform/common/menu_pico.h @@ -57,6 +57,7 @@ typedef enum MA_OPT2_DYNARECS, MA_OPT2_NO_SPRITE_LIM, MA_OPT2_NO_IDLE_LOOPS, + MA_OPT2_OVERCLOCK_M68K, MA_OPT2_DONE, MA_OPT3_SCALE, /* psp (all OPT3) */ MA_OPT3_HSCALE32, diff --git a/platform/libretro/libretro.c b/platform/libretro/libretro.c index 33ede0d2..99f0f3b5 100644 --- a/platform/libretro/libretro.c +++ b/platform/libretro/libretro.c @@ -10,6 +10,7 @@ #define _GNU_SOURCE 1 // mremap #include +#include #include #include #ifndef _WIN32 @@ -527,6 +528,7 @@ void retro_set_environment(retro_environment_t cb) { "picodrive_region", "Region; Auto|Japan NTSC|Japan PAL|US|Europe" }, { "picodrive_aspect", "Core-provided aspect ratio; PAR|4/3|CRT" }, { "picodrive_overscan", "Show Overscan; disabled|enabled" }, + { "picodrive_overclk68k", "68k overclock; disabled|+25%|+50%|+75%|+100%|+200%|+400%" }, #ifdef DRC_SH2 { "picodrive_drc", "Dynamic recompilers; enabled|disabled" }, #endif @@ -1295,6 +1297,14 @@ static void update_variables(void) environ_cb(RETRO_ENVIRONMENT_SET_GEOMETRY, &av_info); } + var.value = NULL; + var.key = "picodrive_overclk68k"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { + PicoIn.overclockM68k = 0; + if (var.value[0] == '+') + PicoIn.overclockM68k = atoi(var.value + 1); + } + #ifdef DRC_SH2 var.value = NULL; var.key = "picodrive_drc"; From ee5d41a1edcfd0ac2fece688df2d476628a8b333 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 20 Oct 2017 00:57:34 +0300 Subject: [PATCH 0129/1110] pandora: mark prerelease versions so they can live along released ones --- platform/pandora/PicoDrive.pxml.template | 6 +++--- platform/pandora/make_pxml.sh | 9 ++++++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/platform/pandora/PicoDrive.pxml.template b/platform/pandora/PicoDrive.pxml.template index 635d59cc..96d7b1ec 100644 --- a/platform/pandora/PicoDrive.pxml.template +++ b/platform/pandora/PicoDrive.pxml.template @@ -7,11 +7,11 @@ - + - PicoDrive @major@.@minor@ + PicoDrive @major@.@minor@@build_post@ - PicoDrive @major@.@minor@ + PicoDrive @major@.@minor@@build_post@ Genesis/MegaDrive/SegaCD/32x Emulator. diff --git a/platform/pandora/make_pxml.sh b/platform/pandora/make_pxml.sh index 3fd03a23..2fcf8334 100755 --- a/platform/pandora/make_pxml.sh +++ b/platform/pandora/make_pxml.sh @@ -8,10 +8,13 @@ major=`head -n 1 $verfile | sed 's/.*"\([0-9]*\)\.\([0-9]*\).*/\1/g'` minor=`head -n 1 $verfile | sed 's/.*"\([0-9]*\)\.\([0-9]*\).*/\2/g'` # lame, I know.. build=`git describe HEAD | grep -- - | sed -e 's/.*\-\(.*\)\-.*/\1/'` +test -n "$build" && build_post="-$build" test -n "$build" || build=0 trap "rm -f $2" ERR -sed 's/@major@/'$major'/' "$1" > "$2" -sed -i 's/@minor@/'$minor'/' "$2" -sed -i 's/@build@/'$build'/' "$2" +sed -e 's/@major@/'$major'/' \ + -e 's/@minor@/'$minor'/' \ + -e 's/@build@/'$build'/' \ + -e 's/@build_post@/'$build_post'/' \ + "$1" > "$2" From eef77d7a8b0b0d47e9559e40d8cb6407ea39b0f2 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 21 Oct 2017 00:02:38 +0300 Subject: [PATCH 0130/1110] handle 'bad' dma better --- pico/pico.c | 5 ++--- pico/videoport.c | 14 +++++++++----- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/pico/pico.c b/pico/pico.c index 2e561c61..76c5fe92 100644 --- a/pico/pico.c +++ b/pico/pico.c @@ -253,10 +253,9 @@ PICO_INTERNAL int CheckDMA(void) xfers_can = dma_timings[dma_op]; if(xfers <= xfers_can) { - if(dma_op&2) Pico.video.status&=~2; // dma no longer busy - else { + Pico.video.status &= ~SR_DMA; + if (!(dma_op & 2)) burn = xfers * dma_bsycles[dma_op] >> 8; // have to be approximate because can't afford division.. - } Pico.m.dma_xfers = 0; } else { if(!(dma_op&2)) burn = 488; diff --git a/pico/videoport.c b/pico/videoport.c index 355489e9..9def819d 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -100,7 +100,7 @@ static void DmaSlow(int len, unsigned int source) Pico.video.type, source, a, len, inc, (Pico.video.status&8)||!(Pico.video.reg[1]&0x40), SekCyclesDone(), SekPc); - Pico.m.dma_xfers += len; + Pico.m.dma_xfers = len; if (Pico.m.dma_xfers < len) // lame 16bit var Pico.m.dma_xfers = ~0; SekCyclesBurnRun(CheckDMA()); @@ -225,10 +225,10 @@ static void DmaCopy(int len) int source; elprintf(EL_VDPDMA, "DmaCopy len %i [%u]", len, SekCyclesDone()); - Pico.m.dma_xfers += len; + Pico.m.dma_xfers = len; if (Pico.m.dma_xfers < len) Pico.m.dma_xfers = ~0; - Pico.video.status |= 2; // dma busy + Pico.video.status |= SR_DMA; source =Pico.video.reg[0x15]; source|=Pico.video.reg[0x16]<<8; @@ -256,10 +256,10 @@ static NOINLINE void DmaFill(int data) len = GetDmaLength(); elprintf(EL_VDPDMA, "DmaFill len %i inc %i [%u]", len, inc, SekCyclesDone()); - Pico.m.dma_xfers += len; + Pico.m.dma_xfers = len; if (Pico.m.dma_xfers < len) // lame 16bit var Pico.m.dma_xfers = ~0; - Pico.video.status |= 2; // dma busy + Pico.video.status |= SR_DMA; switch (Pico.video.type) { @@ -306,6 +306,10 @@ static NOINLINE void CommandDma(void) if ((pvid->reg[1]&0x10)==0) return; // DMA not enabled + if (Pico.m.dma_xfers) + elprintf(EL_VDPDMA|EL_ANOMALY, "Dma overlap, left=%d @ %06x", + Pico.m.dma_xfers, SekPc); + len = GetDmaLength(); source =Pico.video.reg[0x15]; source|=Pico.video.reg[0x16] << 8; From 075672bf9f028490174bd3fbebe957a47a10b09d Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 22 Oct 2017 00:39:43 +0300 Subject: [PATCH 0131/1110] sms: do psg like md does --- pico/sms.c | 16 ++++++++++------ pico/sound/sound.c | 9 ++++----- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/pico/sms.c b/pico/sms.c index 748c3265..6955a7d8 100644 --- a/pico/sms.c +++ b/pico/sms.c @@ -8,10 +8,8 @@ /* * TODO: * - start in a state as if BIOS ran - * - remaining status flags (OVR/COL) * - RAM support in mapper * - region support - * - SN76496 DAC-like usage * - H counter */ #include "pico_int.h" @@ -133,8 +131,9 @@ static void z80_sms_out(unsigned short a, unsigned char d) case 0x40: case 0x41: - if (PicoIn.opt & POPT_EN_PSG) - SN76496Write(d); + if ((d & 0x90) == 0x90 && PsndPsgLine < Pico.m.scanline) + PsndDoPSG(Pico.m.scanline); + SN76496Write(d); break; case 0x80: @@ -300,12 +299,16 @@ void PicoFrameMS(void) } } + // 224 because of how it's done for MD... + if (y == 224 && PsndOut) + PsndGetSamplesMS(); + cycles_aim += cycles_line; cycles_done += z80_run((cycles_aim - cycles_done) >> 8) << 8; } - if (PsndOut) - PsndGetSamplesMS(); + if (PsndOut && PsndPsgLine < lines) + PsndDoPSG(lines - 1); } void PicoFrameDrawOnlyMS(void) @@ -319,3 +322,4 @@ void PicoFrameDrawOnlyMS(void) PicoLineMode4(y); } +// vim:ts=2:sw=2:expandtab diff --git a/pico/sound/sound.c b/pico/sound/sound.c index 56ffe3f8..e799e936 100644 --- a/pico/sound/sound.c +++ b/pico/sound/sound.c @@ -370,15 +370,12 @@ PICO_INTERNAL void PsndGetSamples(int y) PICO_INTERNAL void PsndGetSamplesMS(void) { - int stereo = (PicoIn.opt & 8) >> 3; int length = PsndLen_use; - // PSG - if (PicoIn.opt & POPT_EN_PSG) - SN76496Update(PsndOut, length, stereo); + PsndDoPSG(223); // upmix to "stereo" if needed - if (stereo) { + if (PicoIn.opt & POPT_EN_STEREO) { int i, *p; for (i = length, p = (void *)PsndOut; i > 0; i--, p++) *p |= *p << 16; @@ -387,6 +384,8 @@ PICO_INTERNAL void PsndGetSamplesMS(void) if (PicoWriteSound != NULL) PicoWriteSound(length * ((PicoIn.opt & POPT_EN_STEREO) ? 4 : 2)); PsndClear(); + + dac_info[224] = 0; } // vim:shiftwidth=2:ts=2:expandtab From 6311a3baf533d4a034e51dfe38e8cb213a1bf442 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 22 Oct 2017 02:04:26 +0300 Subject: [PATCH 0132/1110] move more globals to PicoInterface similar reasons as before --- pico/cart.c | 4 +- pico/cd/cdd.c | 8 +-- pico/cd/mcd.c | 3 - pico/debug.c | 6 +- pico/memory.c | 80 ++++++++++++----------- pico/pico.c | 5 +- pico/pico.h | 17 ++--- pico/pico/xpcm.c | 2 +- pico/pico_cmn.c | 8 +-- pico/pico_int.h | 33 ++++++---- pico/pico_int_o32.h | 14 ++--- pico/sms.c | 6 +- pico/sound/sound.c | 115 +++++++++++++++------------------- platform/common/config_file.c | 6 +- platform/common/emu.c | 26 ++++---- platform/common/menu_pico.c | 6 +- platform/common/mp3.c | 4 +- platform/gizmondo/emu.c | 48 +++++++------- platform/gizmondo/menu.c | 12 ++-- platform/gp2x/940ctl.c | 10 +-- platform/gp2x/emu.c | 8 +-- platform/libretro/libretro.c | 14 ++--- platform/psp/emu.c | 34 +++++----- platform/psp/menu.c | 4 +- platform/psp/mp3.c | 8 +-- platform/win32/plat.c | 8 +-- tools/mkoffsets.c | 2 +- 27 files changed, 244 insertions(+), 247 deletions(-) diff --git a/pico/cart.c b/pico/cart.c index 3148f36d..58a9a68f 100644 --- a/pico/cart.c +++ b/pico/cart.c @@ -391,8 +391,8 @@ int pm_seek(pm_file *stream, long offset, int whence) offset = pos; } - if (PicoMessage != NULL && offset > 4 * 1024 * 1024) - PicoMessage("Decompressing data..."); + if (PicoIn.osdMessage != NULL && offset > 4 * 1024 * 1024) + PicoIn.osdMessage("Decompressing data..."); while (offset > 0) { char buf[16 * 1024]; diff --git a/pico/cd/cdd.c b/pico/cd/cdd.c index 501d09e8..35edaa80 100644 --- a/pico/cd/cdd.c +++ b/pico/cd/cdd.c @@ -1297,8 +1297,8 @@ void cdd_process(void) set_reg16(0x3e, 0x0000); set_reg16(0x40, 0x000f); - if (PicoMCDcloseTray) - PicoMCDcloseTray(); + if (PicoIn.mcdTrayClose) + PicoIn.mcdTrayClose(); return; } @@ -1316,8 +1316,8 @@ void cdd_process(void) set_reg16(0x3e, 0x0000); set_reg16(0x40, ~CD_OPEN & 0x0f); - if (PicoMCDopenTray) - PicoMCDopenTray(); + if (PicoIn.mcdTrayOpen) + PicoIn.mcdTrayOpen(); return; } diff --git a/pico/cd/mcd.c b/pico/cd/mcd.c index 51d9d403..5e3629a3 100644 --- a/pico/cd/mcd.c +++ b/pico/cd/mcd.c @@ -15,9 +15,6 @@ static unsigned int mcd_m68k_cycle_mult; static unsigned int mcd_m68k_cycle_base; static unsigned int mcd_s68k_cycle_base; -void (*PicoMCDopenTray)(void) = NULL; -void (*PicoMCDcloseTray)(void) = NULL; - PICO_INTERNAL void PicoInitMCD(void) { diff --git a/pico/debug.c b/pico/debug.c index d4fb575a..50cbaf38 100644 --- a/pico/debug.c +++ b/pico/debug.c @@ -387,14 +387,14 @@ void PDebugZ80Frame(void) if (/*Pico.m.z80Run &&*/ !Pico.m.z80_reset && (PicoIn.opt&POPT_EN_Z80)) PicoSyncZ80(Pico.t.m68c_cnt + line_sample * 488); - if (PsndOut) + if (PicoIn.sndOut) PsndGetSamples(line_sample); if (/*Pico.m.z80Run &&*/ !Pico.m.z80_reset && (PicoIn.opt&POPT_EN_Z80)) { PicoSyncZ80(Pico.t.m68c_cnt + 224 * 488); z80_int(); } - if (PsndOut) + if (PicoIn.sndOut) PsndGetSamples(224); // sync z80 @@ -402,7 +402,7 @@ void PDebugZ80Frame(void) Pico.t.m68c_cnt += Pico.m.pal ? 151809 : 127671; // cycles adjusted for converter PicoSyncZ80(Pico.t.m68c_cnt); } - if (PsndOut && ym2612.dacen && PsndDacLine < lines) + if (PicoIn.sndOut && ym2612.dacen && Pico.snd.dac_line < lines) PsndDoDAC(lines - 1); PsndDoPSG(lines - 1); diff --git a/pico/memory.c b/pico/memory.c index b49956e3..c633c89b 100644 --- a/pico/memory.c +++ b/pico/memory.c @@ -389,7 +389,7 @@ static int get_scanline(int is_from_z80); static void psg_write_68k(u32 d) { // look for volume write and update if needed - if ((d & 0x90) == 0x90 && PsndPsgLine < Pico.m.scanline) + if ((d & 0x90) == 0x90 && Pico.snd.psg_line < Pico.m.scanline) PsndDoPSG(Pico.m.scanline); SN76496Write(d); @@ -399,7 +399,7 @@ static void psg_write_z80(u32 d) { if ((d & 0x90) == 0x90) { int scanline = get_scanline(1); - if (PsndPsgLine < scanline) + if (Pico.snd.psg_line < scanline) PsndDoPSG(scanline); } @@ -895,41 +895,41 @@ void ym2612_sync_timers(int z80_cycles, int mode_old, int mode_new) int xcycles = z80_cycles << 8; /* check for overflows */ - if ((mode_old & 4) && xcycles > timer_a_next_oflow) + if ((mode_old & 4) && xcycles > Pico.t.timer_a_next_oflow) ym2612.OPN.ST.status |= 1; - if ((mode_old & 8) && xcycles > timer_b_next_oflow) + if ((mode_old & 8) && xcycles > Pico.t.timer_b_next_oflow) ym2612.OPN.ST.status |= 2; /* update timer a */ if (mode_old & 1) - while (xcycles > timer_a_next_oflow) - timer_a_next_oflow += timer_a_step; + while (xcycles > Pico.t.timer_a_next_oflow) + Pico.t.timer_a_next_oflow += Pico.t.timer_a_step; if ((mode_old ^ mode_new) & 1) // turning on/off { if (mode_old & 1) - timer_a_next_oflow = TIMER_NO_OFLOW; + Pico.t.timer_a_next_oflow = TIMER_NO_OFLOW; else - timer_a_next_oflow = xcycles + timer_a_step; + Pico.t.timer_a_next_oflow = xcycles + Pico.t.timer_a_step; } if (mode_new & 1) - elprintf(EL_YMTIMER, "timer a upd to %i @ %i", timer_a_next_oflow>>8, z80_cycles); + elprintf(EL_YMTIMER, "timer a upd to %i @ %i", Pico.t.timer_a_next_oflow>>8, z80_cycles); /* update timer b */ if (mode_old & 2) - while (xcycles > timer_b_next_oflow) - timer_b_next_oflow += timer_b_step; + while (xcycles > Pico.t.timer_b_next_oflow) + Pico.t.timer_b_next_oflow += Pico.t.timer_b_step; if ((mode_old ^ mode_new) & 2) { if (mode_old & 2) - timer_b_next_oflow = TIMER_NO_OFLOW; + Pico.t.timer_b_next_oflow = TIMER_NO_OFLOW; else - timer_b_next_oflow = xcycles + timer_b_step; + Pico.t.timer_b_next_oflow = xcycles + Pico.t.timer_b_step; } if (mode_new & 2) - elprintf(EL_YMTIMER, "timer b upd to %i @ %i", timer_b_next_oflow>>8, z80_cycles); + elprintf(EL_YMTIMER, "timer b upd to %i @ %i", Pico.t.timer_b_next_oflow>>8, z80_cycles); } // ym2612 DAC and timer I/O handlers for z80 @@ -941,7 +941,7 @@ static int ym2612_write_local(u32 a, u32 d, int is_from_z80) if (a == 1 && ym2612.OPN.ST.address == 0x2a) /* DAC data */ { int scanline = get_scanline(is_from_z80); - //elprintf(EL_STATUS, "%03i -> %03i dac w %08x z80 %i", PsndDacLine, scanline, d, is_from_z80); + //elprintf(EL_STATUS, "%03i -> %03i dac w %08x z80 %i", Pico.snd.dac_line, scanline, d, is_from_z80); ym2612.dacout = ((int)d - 0x80) << 6; if (ym2612.dacen) PsndDoDAC(scanline); @@ -977,13 +977,13 @@ static int ym2612_write_local(u32 a, u32 d, int is_from_z80) ym2612.OPN.ST.TA = TAnew; //ym2612.OPN.ST.TAC = (1024-TAnew)*18; //ym2612.OPN.ST.TAT = 0; - timer_a_step = TIMER_A_TICK_ZCYCLES * (1024 - TAnew); + Pico.t.timer_a_step = TIMER_A_TICK_ZCYCLES * (1024 - TAnew); if (ym2612.OPN.ST.mode & 1) { // this is not right, should really be done on overflow only int cycles = is_from_z80 ? z80_cyclesDone() : z80_cycles_from_68k(); - timer_a_next_oflow = (cycles << 8) + timer_a_step; + Pico.t.timer_a_next_oflow = (cycles << 8) + Pico.t.timer_a_step; } - elprintf(EL_YMTIMER, "timer a set to %i, %i", 1024 - TAnew, timer_a_next_oflow>>8); + elprintf(EL_YMTIMER, "timer a set to %i, %i", 1024 - TAnew, Pico.t.timer_a_next_oflow>>8); } return 0; } @@ -993,12 +993,12 @@ static int ym2612_write_local(u32 a, u32 d, int is_from_z80) ym2612.OPN.ST.TB = d; //ym2612.OPN.ST.TBC = (256-d) * 288; //ym2612.OPN.ST.TBT = 0; - timer_b_step = TIMER_B_TICK_ZCYCLES * (256 - d); // 262800 + Pico.t.timer_b_step = TIMER_B_TICK_ZCYCLES * (256 - d); // 262800 if (ym2612.OPN.ST.mode & 2) { int cycles = is_from_z80 ? z80_cyclesDone() : z80_cycles_from_68k(); - timer_b_next_oflow = (cycles << 8) + timer_b_step; + Pico.t.timer_b_next_oflow = (cycles << 8) + Pico.t.timer_b_step; } - elprintf(EL_YMTIMER, "timer b set to %i, %i", 256 - d, timer_b_next_oflow>>8); + elprintf(EL_YMTIMER, "timer b set to %i, %i", 256 - d, Pico.t.timer_b_next_oflow>>8); } return 0; case 0x27: { /* mode, timer control */ @@ -1029,7 +1029,7 @@ static int ym2612_write_local(u32 a, u32 d, int is_from_z80) int scanline = get_scanline(is_from_z80); if (ym2612.dacen != (d & 0x80)) { ym2612.dacen = d & 0x80; - PsndDacLine = scanline; + Pico.snd.dac_line = scanline; } #ifdef __GP2X__ if (PicoIn.opt & POPT_EXT_FM) YM2612Write_940(a, d, scanline); @@ -1065,9 +1065,9 @@ static int ym2612_write_local(u32 a, u32 d, int is_from_z80) #define ym2612_read_local() \ - if (xcycles >= timer_a_next_oflow) \ + if (xcycles >= Pico.t.timer_a_next_oflow) \ ym2612.OPN.ST.status |= (ym2612.OPN.ST.mode >> 2) & 1; \ - if (xcycles >= timer_b_next_oflow) \ + if (xcycles >= Pico.t.timer_b_next_oflow) \ ym2612.OPN.ST.status |= (ym2612.OPN.ST.mode >> 2) & 2 static u32 ym2612_read_local_z80(void) @@ -1076,8 +1076,9 @@ static u32 ym2612_read_local_z80(void) ym2612_read_local(); - elprintf(EL_YMTIMER, "timer z80 read %i, sched %i, %i @ %i|%i", ym2612.OPN.ST.status, - timer_a_next_oflow>>8, timer_b_next_oflow>>8, xcycles >> 8, (xcycles >> 8) / 228); + elprintf(EL_YMTIMER, "timer z80 read %i, sched %i, %i @ %i|%i", + ym2612.OPN.ST.status, Pico.t.timer_a_next_oflow >> 8, + Pico.t.timer_b_next_oflow >> 8, xcycles >> 8, (xcycles >> 8) / 228); return ym2612.OPN.ST.status; } @@ -1087,8 +1088,9 @@ static u32 ym2612_read_local_68k(void) ym2612_read_local(); - elprintf(EL_YMTIMER, "timer 68k read %i, sched %i, %i @ %i|%i", ym2612.OPN.ST.status, - timer_a_next_oflow>>8, timer_b_next_oflow>>8, xcycles >> 8, (xcycles >> 8) / 228); + elprintf(EL_YMTIMER, "timer 68k read %i, sched %i, %i @ %i|%i", + ym2612.OPN.ST.status, Pico.t.timer_a_next_oflow >> 8, + Pico.t.timer_b_next_oflow >> 8, xcycles >> 8, (xcycles >> 8) / 228); return ym2612.OPN.ST.status; } @@ -1098,10 +1100,12 @@ void ym2612_pack_state(void) int tac, tat = 0, tbc, tbt = 0; tac = 1024 - ym2612.OPN.ST.TA; tbc = 256 - ym2612.OPN.ST.TB; - if (timer_a_next_oflow != TIMER_NO_OFLOW) - tat = (int)((double)(timer_a_step - timer_a_next_oflow) / (double)timer_a_step * tac * 65536); - if (timer_b_next_oflow != TIMER_NO_OFLOW) - tbt = (int)((double)(timer_b_step - timer_b_next_oflow) / (double)timer_b_step * tbc * 65536); + if (Pico.t.timer_a_next_oflow != TIMER_NO_OFLOW) + tat = (int)((double)(Pico.t.timer_a_step - Pico.t.timer_a_next_oflow) + / (double)Pico.t.timer_a_step * tac * 65536); + if (Pico.t.timer_b_next_oflow != TIMER_NO_OFLOW) + tbt = (int)((double)(Pico.t.timer_b_step - Pico.t.timer_b_next_oflow) + / (double)Pico.t.timer_b_step * tbc * 65536); elprintf(EL_YMTIMER, "save: timer a %i/%i", tat >> 16, tac); elprintf(EL_YMTIMER, "save: timer b %i/%i", tbt >> 16, tbc); @@ -1154,15 +1158,15 @@ void ym2612_unpack_state(void) tac = (1024 - ym2612.OPN.ST.TA) << 16; tbc = (256 - ym2612.OPN.ST.TB) << 16; if (ym2612.OPN.ST.mode & 1) - timer_a_next_oflow = (int)((double)(tac - tat) / (double)tac * timer_a_step); + Pico.t.timer_a_next_oflow = (int)((double)(tac - tat) / (double)tac * Pico.t.timer_a_step); else - timer_a_next_oflow = TIMER_NO_OFLOW; + Pico.t.timer_a_next_oflow = TIMER_NO_OFLOW; if (ym2612.OPN.ST.mode & 2) - timer_b_next_oflow = (int)((double)(tbc - tbt) / (double)tbc * timer_b_step); + Pico.t.timer_b_next_oflow = (int)((double)(tbc - tbt) / (double)tbc * Pico.t.timer_b_step); else - timer_b_next_oflow = TIMER_NO_OFLOW; - elprintf(EL_YMTIMER, "load: %i/%i, timer_a_next_oflow %i", tat>>16, tac>>16, timer_a_next_oflow >> 8); - elprintf(EL_YMTIMER, "load: %i/%i, timer_b_next_oflow %i", tbt>>16, tbc>>16, timer_b_next_oflow >> 8); + Pico.t.timer_b_next_oflow = TIMER_NO_OFLOW; + elprintf(EL_YMTIMER, "load: %i/%i, timer_a_next_oflow %i", tat>>16, tac>>16, Pico.t.timer_a_next_oflow >> 8); + elprintf(EL_YMTIMER, "load: %i/%i, timer_b_next_oflow %i", tbt>>16, tbc>>16, Pico.t.timer_b_next_oflow >> 8); } #if defined(NO_32X) && defined(_ASM_MEMORY_C) diff --git a/pico/pico.c b/pico/pico.c index 76c5fe92..f6b43cd6 100644 --- a/pico/pico.c +++ b/pico/pico.c @@ -14,7 +14,6 @@ struct Pico Pico; struct PicoMem PicoMem; PicoInterface PicoIn; -void (*PicoWriteSound)(int len) = NULL; // called at the best time to send sound buffer (PsndOut) to hardware void (*PicoResetHook)(void) = NULL; void (*PicoLineHook)(void) = NULL; @@ -344,6 +343,4 @@ void PicoGetInternal(pint_t which, pint_ret_t *r) } } -// callback to output message from emu -void (*PicoMessage)(const char *msg)=NULL; - +// vim:ts=2:sw=2:expandtab diff --git a/pico/pico.h b/pico/pico.h index 2d63d184..f22ef606 100644 --- a/pico/pico.h +++ b/pico/pico.h @@ -97,6 +97,15 @@ typedef struct unsigned short quirks; // game-specific quirks: PQUIRK_* unsigned short overclockM68k; // overclock the emulated 68k, in % + + int sndRate; // rate in Hz + short *sndOut; // PCM output buffer + void (*writeSound)(int len); // write .sndOut callback, called once per frame + + void (*osdMessage)(const char *msg); // output OSD message from emu, optional + + void (*mcdTrayOpen)(void); + void (*mcdTrayClose)(void); } PicoInterface; extern PicoInterface PicoIn; @@ -108,18 +117,12 @@ int PicoReset(void); void PicoLoopPrepare(void); void PicoFrame(void); void PicoFrameDrawOnly(void); -extern void (*PicoWriteSound)(int bytes); // called once per frame at the best time to send sound buffer (PsndOut) to hardware -extern void (*PicoMessage)(const char *msg); // callback to output text message from emu typedef enum { PI_ROM, PI_ISPAL, PI_IS40_CELL, PI_IS240_LINES } pint_t; typedef union { int vint; void *vptr; } pint_ret_t; void PicoGetInternal(pint_t which, pint_ret_t *ret); struct PicoEState; -// cd/mcd.c -extern void (*PicoMCDopenTray)(void); -extern void (*PicoMCDcloseTray)(void); - // pico.c #define XPCM_BUFFER_SIZE (320+160) typedef struct @@ -230,8 +233,6 @@ void Pico32xSetClocks(int msh2_hz, int ssh2_hz); #define PICO_SSH2_HZ ((int)(7670442.0 * 2.4)) // sound.c -extern int PsndRate,PsndLen; -extern short *PsndOut; extern void (*PsndMix_32_to_16l)(short *dest, int *src, int count); void PsndRerate(int preserve_state); diff --git a/pico/pico/xpcm.c b/pico/pico/xpcm.c index 0109dcf9..ee204464 100644 --- a/pico/pico/xpcm.c +++ b/pico/pico/xpcm.c @@ -50,7 +50,7 @@ PICO_INTERNAL void PicoPicoPCMReset(void) PICO_INTERNAL void PicoPicoPCMRerate(int xpcm_rate) { - stepsamples = (PsndRate<<10)/xpcm_rate; + stepsamples = (PicoIn.sndRate<<10)/xpcm_rate; } #define XSHIFT 6 diff --git a/pico/pico_cmn.c b/pico/pico_cmn.c index fc12a767..95b6b103 100644 --- a/pico/pico_cmn.c +++ b/pico/pico_cmn.c @@ -153,7 +153,7 @@ static int PicoFrameHints(void) } // get samples from sound chips - if ((y == 224 || y == line_sample) && PsndOut) + if ((y == 224 || y == line_sample) && PicoIn.sndOut) { cycles = SekCyclesDone(); @@ -241,7 +241,7 @@ static int PicoFrameHints(void) #endif // get samples from sound chips - if (y == 224 && PsndOut) + if (y == 224 && PicoIn.sndOut) PsndGetSamples(y); // Run scanline: @@ -324,9 +324,9 @@ static int PicoFrameHints(void) cycles = SekCyclesDone(); if (Pico.m.z80Run && !Pico.m.z80_reset && (PicoIn.opt&POPT_EN_Z80)) PicoSyncZ80(cycles); - if (PsndOut && ym2612.dacen && PsndDacLine < lines) + if (PicoIn.sndOut && ym2612.dacen && Pico.snd.dac_line < lines) PsndDoDAC(lines - 1); - if (PsndOut && PsndPsgLine < lines) + if (PicoIn.sndOut && Pico.snd.psg_line < lines) PsndDoPSG(lines - 1); #ifdef PICO_CD diff --git a/pico/pico_int.h b/pico/pico_int.h index fa7979cc..bb27922b 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -409,6 +409,19 @@ struct PicoTiming unsigned int z80c_cnt; // z80 cycles done (this frame) unsigned int z80c_aim; int z80_scanline; + + int timer_a_next_oflow, timer_a_step; // in z80 cycles + int timer_b_next_oflow, timer_b_step; +}; + +struct PicoSound +{ + short len; // number of mono samples + short len_use; // adjusted + int len_e_add; // for non-int samples/frame + int len_e_cnt; + short dac_line; + short psg_line; }; // run tools/mkoffsets pico/pico_int_o32.h if you change these @@ -419,6 +432,7 @@ struct Pico struct PicoMisc m; struct PicoTiming t; struct PicoCartSave sv; + struct PicoSound snd; struct PicoEState est; struct PicoMS ms; @@ -781,10 +795,6 @@ void SekInterruptClearS68k(int irq); // sound/sound.c extern short cdda_out_buffer[2*1152]; -extern int PsndLen_exc_cnt; -extern int PsndLen_exc_add; -extern int timer_a_next_oflow, timer_a_step; // in z80 cycles -extern int timer_b_next_oflow, timer_b_step; void cdda_start_play(int lba_base, int lba_offset, int lb_len); @@ -799,16 +809,16 @@ void ym2612_unpack_state(void); #define TIMER_B_TICK_ZCYCLES 262800 // 275251 broken, see Dai Makaimura #define timers_cycle() \ - if (timer_a_next_oflow > 0 && timer_a_next_oflow < TIMER_NO_OFLOW) \ - timer_a_next_oflow -= Pico.m.pal ? 70938*256 : 59659*256; \ - if (timer_b_next_oflow > 0 && timer_b_next_oflow < TIMER_NO_OFLOW) \ - timer_b_next_oflow -= Pico.m.pal ? 70938*256 : 59659*256; \ + if (Pico.t.timer_a_next_oflow > 0 && Pico.t.timer_a_next_oflow < TIMER_NO_OFLOW) \ + Pico.t.timer_a_next_oflow -= Pico.m.pal ? 70938*256 : 59659*256; \ + if (Pico.t.timer_b_next_oflow > 0 && Pico.t.timer_b_next_oflow < TIMER_NO_OFLOW) \ + Pico.t.timer_b_next_oflow -= Pico.m.pal ? 70938*256 : 59659*256; \ ym2612_sync_timers(0, ym2612.OPN.ST.mode, ym2612.OPN.ST.mode); #define timers_reset() \ - timer_a_next_oflow = timer_b_next_oflow = TIMER_NO_OFLOW; \ - timer_a_step = TIMER_A_TICK_ZCYCLES * 1024; \ - timer_b_step = TIMER_B_TICK_ZCYCLES * 256; + Pico.t.timer_a_next_oflow = Pico.t.timer_b_next_oflow = TIMER_NO_OFLOW; \ + Pico.t.timer_a_step = TIMER_A_TICK_ZCYCLES * 1024; \ + Pico.t.timer_b_step = TIMER_B_TICK_ZCYCLES * 256; // videoport.c @@ -850,7 +860,6 @@ PICO_INTERNAL void PsndDoPSG(int line_to); PICO_INTERNAL void PsndClear(void); PICO_INTERNAL void PsndGetSamples(int y); PICO_INTERNAL void PsndGetSamplesMS(void); -extern int PsndDacLine, PsndPsgLine; // sms.c #ifndef NO_SMS diff --git a/pico/pico_int_o32.h b/pico/pico_int_o32.h index ca3004f3..25c64f43 100644 --- a/pico/pico_int_o32.h +++ b/pico/pico_int_o32.h @@ -6,13 +6,13 @@ #define OFS_Pico_m_hardware 0x0047 #define OFS_Pico_m_z80_reset 0x004f #define OFS_Pico_m_sram_reg 0x0049 -#define OFS_Pico_sv 0x007c -#define OFS_Pico_sv_data 0x007c -#define OFS_Pico_sv_start 0x0080 -#define OFS_Pico_sv_end 0x0084 -#define OFS_Pico_sv_flags 0x0088 -#define OFS_Pico_rom 0x031c -#define OFS_Pico_romsize 0x0320 +#define OFS_Pico_sv 0x008c +#define OFS_Pico_sv_data 0x008c +#define OFS_Pico_sv_start 0x0090 +#define OFS_Pico_sv_end 0x0094 +#define OFS_Pico_sv_flags 0x0098 +#define OFS_Pico_rom 0x033c +#define OFS_Pico_romsize 0x0340 #define OFS_EST_DrawScanline 0x00 #define OFS_EST_rendstatus 0x04 #define OFS_EST_DrawLineDest 0x08 diff --git a/pico/sms.c b/pico/sms.c index 6955a7d8..ac81c2b2 100644 --- a/pico/sms.c +++ b/pico/sms.c @@ -131,7 +131,7 @@ static void z80_sms_out(unsigned short a, unsigned char d) case 0x40: case 0x41: - if ((d & 0x90) == 0x90 && PsndPsgLine < Pico.m.scanline) + if ((d & 0x90) == 0x90 && Pico.snd.psg_line < Pico.m.scanline) PsndDoPSG(Pico.m.scanline); SN76496Write(d); break; @@ -300,14 +300,14 @@ void PicoFrameMS(void) } // 224 because of how it's done for MD... - if (y == 224 && PsndOut) + if (y == 224 && PicoIn.sndOut) PsndGetSamplesMS(); cycles_aim += cycles_line; cycles_done += z80_run((cycles_aim - cycles_done) >> 8) << 8; } - if (PsndOut && PsndPsgLine < lines) + if (PicoIn.sndOut && Pico.snd.psg_line < lines) PsndDoPSG(lines - 1); } diff --git a/pico/sound/sound.c b/pico/sound/sound.c index e799e936..fc71b741 100644 --- a/pico/sound/sound.c +++ b/pico/sound/sound.c @@ -25,31 +25,20 @@ static unsigned short dac_info[312+4]; // pos in sample buffer // cdda output buffer short cdda_out_buffer[2*1152]; -// for Pico -int PsndRate=0; -int PsndLen=0; // number of mono samples, multiply by 2 for stereo -int PsndLen_exc_add=0; // this is for non-integer sample counts per line, eg. 22050/60 -int PsndLen_exc_cnt=0; -int PsndDacLine, PsndPsgLine; -short *PsndOut=NULL; // PCM data buffer -static int PsndLen_use; - -// timers -int timer_a_next_oflow, timer_a_step; // in z80 cycles -int timer_b_next_oflow, timer_b_step; - // sn76496 extern int *sn76496_regs; static void dac_recalculate(void) { - int i, dac_cnt, pos, len, lines = Pico.m.pal ? 313 : 262, mid = Pico.m.pal ? 68 : 93; + int lines = Pico.m.pal ? 313 : 262; + int mid = Pico.m.pal ? 68 : 93; + int i, dac_cnt, pos, len; - if (PsndLen <= lines) + if (Pico.snd.len <= lines) { // shrinking algo - dac_cnt = -PsndLen; + dac_cnt = -Pico.snd.len; len=1; pos=0; dac_info[225] = 1; @@ -60,14 +49,14 @@ static void dac_recalculate(void) pos++; dac_cnt += lines; } - dac_cnt -= PsndLen; + dac_cnt -= Pico.snd.len; dac_info[i] = pos; } } else { // stretching - dac_cnt = PsndLen; + dac_cnt = Pico.snd.len; pos=0; for(i = 225; i != 224; i++) { @@ -78,11 +67,11 @@ static void dac_recalculate(void) len++; } if (i == mid) // midpoint - while(pos+len < PsndLen/2) { + while(pos+len < Pico.snd.len/2) { dac_cnt -= lines; len++; } - dac_cnt += PsndLen; + dac_cnt += Pico.snd.len; pos += len; dac_info[i] = pos; } @@ -112,7 +101,7 @@ void PsndRerate(int preserve_state) ym2612_pack_state(); memcpy(state, YM2612GetRegs(), 0x204); } - YM2612Init(Pico.m.pal ? OSC_PAL/7 : OSC_NTSC/7, PsndRate); + YM2612Init(Pico.m.pal ? OSC_PAL/7 : OSC_NTSC/7, PicoIn.sndRate); if (preserve_state) { // feed it back it's own registers, just like after loading state memcpy(YM2612GetRegs(), state, 0x204); @@ -120,16 +109,16 @@ void PsndRerate(int preserve_state) } if (preserve_state) memcpy(state, sn76496_regs, 28*4); // remember old state - SN76496_init(Pico.m.pal ? OSC_PAL/15 : OSC_NTSC/15, PsndRate); + SN76496_init(Pico.m.pal ? OSC_PAL/15 : OSC_NTSC/15, PicoIn.sndRate); if (preserve_state) memcpy(sn76496_regs, state, 28*4); // restore old state if (state) free(state); - // calculate PsndLen - PsndLen=PsndRate / target_fps; - PsndLen_exc_add=((PsndRate - PsndLen*target_fps)<<16) / target_fps; - PsndLen_exc_cnt=0; + // calculate Pico.snd.len + Pico.snd.len = PicoIn.sndRate / target_fps; + Pico.snd.len_e_add = ((PicoIn.sndRate - Pico.snd.len * target_fps) << 16) / target_fps; + Pico.snd.len_e_cnt = 0; // recalculate dac info dac_recalculate(); @@ -137,7 +126,7 @@ void PsndRerate(int preserve_state) // clear all buffers memset32(PsndBuffer, 0, sizeof(PsndBuffer)/4); memset(cdda_out_buffer, 0, sizeof(cdda_out_buffer)); - if (PsndOut) + if (PicoIn.sndOut) PsndClear(); // set mixer @@ -150,24 +139,24 @@ void PsndRerate(int preserve_state) PICO_INTERNAL void PsndStartFrame(void) { - // compensate for float part of PsndLen - PsndLen_use = PsndLen; - PsndLen_exc_cnt += PsndLen_exc_add; - if (PsndLen_exc_cnt >= 0x10000) { - PsndLen_exc_cnt -= 0x10000; - PsndLen_use++; + // compensate for float part of Pico.snd.len + Pico.snd.len_use = Pico.snd.len; + Pico.snd.len_e_cnt += Pico.snd.len_e_add; + if (Pico.snd.len_e_cnt >= 0x10000) { + Pico.snd.len_e_cnt -= 0x10000; + Pico.snd.len_use++; } - PsndDacLine = PsndPsgLine = 0; + Pico.snd.dac_line = Pico.snd.psg_line = 0; Pico.m.status &= ~1; - dac_info[224] = PsndLen_use; + dac_info[224] = Pico.snd.len_use; } PICO_INTERNAL void PsndDoDAC(int line_to) { int pos, pos1, len; int dout = ym2612.dacout; - int line_from = PsndDacLine; + int line_from = Pico.snd.dac_line; if (line_to >= 313) line_to = 312; @@ -178,23 +167,23 @@ PICO_INTERNAL void PsndDoDAC(int line_to) if (len <= 0) return; - PsndDacLine = line_to + 1; + Pico.snd.dac_line = line_to + 1; - if (!PsndOut) + if (!PicoIn.sndOut) return; if (PicoIn.opt & POPT_EN_STEREO) { - short *d = PsndOut + pos*2; + short *d = PicoIn.sndOut + pos*2; for (; len > 0; len--, d+=2) *d += dout; } else { - short *d = PsndOut + pos; + short *d = PicoIn.sndOut + pos; for (; len > 0; len--, d++) *d += dout; } } PICO_INTERNAL void PsndDoPSG(int line_to) { - int line_from = PsndPsgLine; + int line_from = Pico.snd.psg_line; int pos, pos1, len; int stereo = 0; @@ -209,16 +198,16 @@ PICO_INTERNAL void PsndDoPSG(int line_to) if (len <= 0) return; - PsndPsgLine = line_to + 1; + Pico.snd.psg_line = line_to + 1; - if (!PsndOut || !(PicoIn.opt & POPT_EN_PSG)) + if (!PicoIn.sndOut || !(PicoIn.opt & POPT_EN_PSG)) return; if (PicoIn.opt & POPT_EN_STEREO) { stereo = 1; pos <<= 1; } - SN76496Update(PsndOut + pos, len, stereo); + SN76496Update(PicoIn.sndOut + pos, len, stereo); } // cdda @@ -227,8 +216,8 @@ static void cdda_raw_update(int *buffer, int length) int ret, cdda_bytes, mult = 1; cdda_bytes = length*4; - if (PsndRate <= 22050 + 100) mult = 2; - if (PsndRate < 22050 - 100) mult = 4; + if (PicoIn.sndRate <= 22050 + 100) mult = 2; + if (PicoIn.sndRate < 22050 - 100) mult = 4; cdda_bytes *= mult; ret = pm_read(cdda_out_buffer, cdda_bytes, Pico_mcd->cdda_stream); @@ -270,12 +259,12 @@ void cdda_start_play(int lba_base, int lba_offset, int lb_len) PICO_INTERNAL void PsndClear(void) { - int len = PsndLen; - if (PsndLen_exc_add) len++; + int len = Pico.snd.len; + if (Pico.snd.len_e_add) len++; if (PicoIn.opt & POPT_EN_STEREO) - memset32((int *) PsndOut, 0, len); // assume PsndOut to be aligned + memset32((int *) PicoIn.sndOut, 0, len); // assume PicoIn.sndOut to be aligned else { - short *out = PsndOut; + short *out = PicoIn.sndOut; if ((long)out & 2) { *out++ = 0; len--; } memset32((int *) out, 0, len/2); if (len & 1) out[len-1] = 0; @@ -294,7 +283,7 @@ static int PsndRender(int offset, int length) pprof_start(sound); if (PicoIn.AHW & PAHW_PICO) { - PicoPicoPCMUpdate(PsndOut+offset, length, stereo); + PicoPicoPCMUpdate(PicoIn.sndOut+offset, length, stereo); return length; } @@ -330,7 +319,7 @@ static int PsndRender(int offset, int length) p32x_pwm_update(buf32, length, stereo); // convert + limit to normal 16bit output - PsndMix_32_to_16l(PsndOut+offset, buf32, length); + PsndMix_32_to_16l(PicoIn.sndOut+offset, buf32, length); pprof_end(sound); @@ -342,47 +331,47 @@ PICO_INTERNAL void PsndGetSamples(int y) { static int curr_pos = 0; - if (ym2612.dacen && PsndDacLine < y) + if (ym2612.dacen && Pico.snd.dac_line < y) PsndDoDAC(y - 1); PsndDoPSG(y - 1); if (y == 224) { if (Pico.m.status & 2) - curr_pos += PsndRender(curr_pos, PsndLen-PsndLen/2); - else curr_pos = PsndRender(0, PsndLen_use); + curr_pos += PsndRender(curr_pos, Pico.snd.len-Pico.snd.len/2); + else curr_pos = PsndRender(0, Pico.snd.len_use); if (Pico.m.status & 1) Pico.m.status |= 2; else Pico.m.status &= ~2; - if (PicoWriteSound) - PicoWriteSound(curr_pos * ((PicoIn.opt & POPT_EN_STEREO) ? 4 : 2)); + if (PicoIn.writeSound) + PicoIn.writeSound(curr_pos * ((PicoIn.opt & POPT_EN_STEREO) ? 4 : 2)); // clear sound buffer PsndClear(); - PsndDacLine = 224; + Pico.snd.dac_line = 224; dac_info[224] = 0; } else if (Pico.m.status & 3) { Pico.m.status |= 2; Pico.m.status &= ~1; - curr_pos = PsndRender(0, PsndLen/2); + curr_pos = PsndRender(0, Pico.snd.len/2); } } PICO_INTERNAL void PsndGetSamplesMS(void) { - int length = PsndLen_use; + int length = Pico.snd.len_use; PsndDoPSG(223); // upmix to "stereo" if needed if (PicoIn.opt & POPT_EN_STEREO) { int i, *p; - for (i = length, p = (void *)PsndOut; i > 0; i--, p++) + for (i = length, p = (void *)PicoIn.sndOut; i > 0; i--, p++) *p |= *p << 16; } - if (PicoWriteSound != NULL) - PicoWriteSound(length * ((PicoIn.opt & POPT_EN_STEREO) ? 4 : 2)); + if (PicoIn.writeSound != NULL) + PicoIn.writeSound(length * ((PicoIn.opt & POPT_EN_STEREO) ? 4 : 2)); PsndClear(); dac_info[224] = 0; diff --git a/platform/common/config_file.c b/platform/common/config_file.c index 2d1186d7..1b5c5172 100644 --- a/platform/common/config_file.c +++ b/platform/common/config_file.c @@ -259,9 +259,9 @@ static int custom_read(menu_entry *me, const char *var, const char *val) case MA_OPT_SOUND_QUALITY: if (strcasecmp(var, "Sound Quality") != 0) return 0; - PsndRate = strtoul(val, &tmp, 10); - if (PsndRate < 8000 || PsndRate > 44100) - PsndRate = 22050; + PicoIn.sndRate = strtoul(val, &tmp, 10); + if (PicoIn.sndRate < 8000 || PicoIn.sndRate > 44100) + PicoIn.sndRate = 22050; if (*tmp == 'H' || *tmp == 'h') tmp++; if (*tmp == 'Z' || *tmp == 'z') tmp++; while (*tmp == ' ') tmp++; diff --git a/platform/common/emu.c b/platform/common/emu.c index 407ed599..b4db4c67 100644 --- a/platform/common/emu.c +++ b/platform/common/emu.c @@ -604,7 +604,7 @@ void emu_set_defconfig(void) { memcpy(¤tConfig, &defaultConfig, sizeof(currentConfig)); PicoIn.opt = currentConfig.s_PicoOpt; - PsndRate = currentConfig.s_PsndRate; + PicoIn.sndRate = currentConfig.s_PsndRate; PicoIn.regionOverride = currentConfig.s_PicoRegion; PicoIn.autoRgnOrder = currentConfig.s_PicoAutoRgnOrder; } @@ -958,10 +958,10 @@ void emu_set_fastforward(int set_on) static int set_Frameskip, set_EmuOpt, is_on = 0; if (set_on && !is_on) { - set_PsndOut = PsndOut; + set_PsndOut = PicoIn.sndOut; set_Frameskip = currentConfig.Frameskip; set_EmuOpt = currentConfig.EmuOpt; - PsndOut = NULL; + PicoIn.sndOut = NULL; currentConfig.Frameskip = 8; currentConfig.EmuOpt &= ~4; currentConfig.EmuOpt |= 0x40000; @@ -969,7 +969,7 @@ void emu_set_fastforward(int set_on) emu_status_msg("FAST FORWARD"); } else if (!set_on && is_on) { - PsndOut = set_PsndOut; + PicoIn.sndOut = set_PsndOut; currentConfig.Frameskip = set_Frameskip; currentConfig.EmuOpt = set_EmuOpt; PsndRerate(1); @@ -1253,9 +1253,9 @@ void emu_init(void) config_readlrom(path); PicoInit(); - PicoMessage = plat_status_msg_busy_next; - PicoMCDopenTray = emu_tray_open; - PicoMCDcloseTray = emu_tray_close; + PicoIn.osdMessage = plat_status_msg_busy_next; + PicoIn.mcdTrayOpen = emu_tray_open; + PicoIn.mcdTrayClose = emu_tray_close; sndout_init(); } @@ -1285,12 +1285,12 @@ void emu_finish(void) static void snd_write_nonblocking(int len) { - sndout_write_nb(PsndOut, len); + sndout_write_nb(PicoIn.sndOut, len); } void emu_sound_start(void) { - PsndOut = NULL; + PicoIn.sndOut = NULL; if (currentConfig.EmuOpt & EOPT_EN_SOUND) { @@ -1299,12 +1299,12 @@ void emu_sound_start(void) PsndRerate(Pico.m.frame_count ? 1 : 0); printf("starting audio: %i len: %i stereo: %i, pal: %i\n", - PsndRate, PsndLen, is_stereo, Pico.m.pal); - sndout_start(PsndRate, is_stereo); - PicoWriteSound = snd_write_nonblocking; + PicoIn.sndRate, Pico.snd.len, is_stereo, Pico.m.pal); + sndout_start(PicoIn.sndRate, is_stereo); + PicoIn.writeSound = snd_write_nonblocking; plat_update_volume(0, 0); memset(sndBuffer, 0, sizeof(sndBuffer)); - PsndOut = sndBuffer; + PicoIn.sndOut = sndBuffer; } } diff --git a/platform/common/menu_pico.c b/platform/common/menu_pico.c index 1d73d4a7..969fc8e2 100644 --- a/platform/common/menu_pico.c +++ b/platform/common/menu_pico.c @@ -610,7 +610,7 @@ static int mh_opt_misc(int id, int keys) { switch (id) { case MA_OPT_SOUND_QUALITY: - PsndRate = sndrate_prevnext(PsndRate, keys & PBTN_RIGHT); + PicoIn.sndRate = sndrate_prevnext(PicoIn.sndRate, keys & PBTN_RIGHT); break; case MA_OPT_REGION: region_prevnext(keys & PBTN_RIGHT); @@ -674,7 +674,7 @@ static const char *mgn_opt_sound(int id, int *offs) const char *str2; *offs = -8; str2 = (PicoIn.opt & POPT_EN_STEREO) ? "stereo" : "mono"; - sprintf(static_buff, "%5iHz %s", PsndRate, str2); + sprintf(static_buff, "%5iHz %s", PicoIn.sndRate, str2); return static_buff; } @@ -892,7 +892,7 @@ static void debug_menu_loop(void) if (inp & PBTN_UP) pv->debug_p ^= PVD_KILL_S_HI; if (inp & PBTN_MA2) pv->debug_p ^= PVD_KILL_32X; if (inp & PBTN_MOK) { - PsndOut = NULL; // just in case + PicoIn.sndOut = NULL; // just in case PicoIn.skipFrame = 1; PicoFrame(); PicoIn.skipFrame = 0; diff --git a/platform/common/mp3.c b/platform/common/mp3.c index 6c823ad4..c84962cc 100644 --- a/platform/common/mp3.c +++ b/platform/common/mp3.c @@ -167,11 +167,11 @@ void mp3_update(int *buffer, int length, int stereo) return; length_mp3 = length; - if (PsndRate <= 11025 + 100) { + if (PicoIn.sndRate <= 11025 + 100) { mix_samples = mix_16h_to_32_s2; length_mp3 <<= 2; shr = 2; } - else if (PsndRate <= 22050 + 100) { + else if (PicoIn.sndRate <= 22050 + 100) { mix_samples = mix_16h_to_32_s1; length_mp3 <<= 1; shr = 1; } diff --git a/platform/gizmondo/emu.c b/platform/gizmondo/emu.c index 96a49cf5..86c473c2 100644 --- a/platform/gizmondo/emu.c +++ b/platform/gizmondo/emu.c @@ -266,12 +266,12 @@ static void stdbg(const char *fmt, ...) static void updateSound(int len) { snd_all_samples += len / 2; - PsndOut += len / 2; - if (PsndOut - snd_cbuff >= snd_cbuf_samples) + PicoIn.sndOut += len / 2; + if (PicoIn.sndOut - snd_cbuff >= snd_cbuf_samples) { - //if (PsndOut - snd_cbuff != snd_cbuf_samples) - // stdbg("snd diff is %i, not %i", PsndOut - snd_cbuff, snd_cbuf_samples); - PsndOut = snd_cbuff; + //if (PicoIn.sndOut - snd_cbuff != snd_cbuf_samples) + // stdbg("snd diff is %i, not %i", PicoIn.sndOut - snd_cbuff, snd_cbuf_samples); + PicoIn.sndOut = snd_cbuff; } } @@ -317,7 +317,7 @@ static void RunEvents(unsigned int which) { int do_it = 1; - if (PsndOut != NULL) + if (PicoIn.sndOut != NULL) FrameworkAudio_SetPause(1); if (giz_screen == NULL) giz_screen = fb_lock(1); @@ -344,7 +344,7 @@ static void RunEvents(unsigned int which) Sleep(0); } - if (PsndOut != NULL) + if (PicoIn.sndOut != NULL) FrameworkAudio_SetPause(0); reset_timing = 1; } @@ -401,7 +401,7 @@ static void updateKeys(void) events = (allActions[0] | allActions[1]) >> 16; // volume is treated in special way and triggered every frame - if ((events & 0x6000) && PsndOut != NULL) + if ((events & 0x6000) && PicoIn.sndOut != NULL) { int vol = currentConfig.volume; if (events & 0x2000) { @@ -469,19 +469,19 @@ void pemu_loop(void) if (PicoIn.AHW & PAHW_MCD) PicoCDBufferInit(); // prepare sound stuff - PsndOut = NULL; + PicoIn.sndOut = NULL; if (currentConfig.EmuOpt & 4) { int ret, snd_excess_add, stereo; - if (PsndRate != PsndRate_old || (PicoIn.opt&0x0b) != (PicoOpt_old&0x0b) || Pico.m.pal != pal_old) { + if (PicoIn.sndRate != PsndRate_old || (PicoIn.opt&0x0b) != (PicoOpt_old&0x0b) || Pico.m.pal != pal_old) { PsndRerate(Pico.m.frame_count ? 1 : 0); } stereo=(PicoIn.opt&8)>>3; - snd_excess_add = ((PsndRate - PsndLen*target_fps)<<16) / target_fps; - snd_cbuf_samples = (PsndRate< 22050) co = 11; + if (PicoIn.sndRate == 22050) co = 10; + if (PicoIn.sndRate > 22050) co = 11; if (PicoIn.opt&8) shift++; if (audio_skew < 0) { adj = -((-audio_skew) >> shift); @@ -600,7 +600,7 @@ void pemu_loop(void) for (i = 0; i < currentConfig.Frameskip; i++) { updateKeys(); SkipFrame(); frames_done++; - if (PsndOut) { // do framelimitting if sound is enabled + if (PicoIn.sndOut) { // do framelimitting if sound is enabled int tval_diff; tval = GetTickCount(); tval_diff = (int)(tval - tval_thissec) << 8; @@ -660,7 +660,7 @@ void pemu_loop(void) if (currentConfig.Frameskip < 0 && tval_diff - lim_time >= (300<<8)) // slowdown detection reset_timing = 1; - else if (PsndOut != NULL || currentConfig.Frameskip < 0) + else if (PicoIn.sndOut != NULL || currentConfig.Frameskip < 0) { // sleep if we are still too fast if (tval_diff < lim_time) @@ -676,8 +676,8 @@ void pemu_loop(void) if (PicoIn.AHW & PAHW_MCD) PicoCDBufferFree(); - if (PsndOut != NULL) { - PsndOut = snd_cbuff = NULL; + if (PicoIn.sndOut != NULL) { + PicoIn.sndOut = snd_cbuff = NULL; FrameworkAudio_Close(); } diff --git a/platform/gizmondo/menu.c b/platform/gizmondo/menu.c index 232701ad..51f032f0 100644 --- a/platform/gizmondo/menu.c +++ b/platform/gizmondo/menu.c @@ -1046,7 +1046,7 @@ static void menu_opt_cust_draw(const menu_entry *entry, int x, int y, void *para break; case MA_OPT_SOUND_QUALITY: str = (PicoIn.opt&0x08)?"stereo":"mono"; - text_out16(x, y, "Sound Quality: %5iHz %s", PsndRate, str); + text_out16(x, y, "Sound Quality: %5iHz %s", PicoIn.sndRate, str); break; case MA_OPT_REGION: text_out16(x, y, "Region: %s", me_region_name(PicoIn.regionOverride, PicoIn.autoRgnOrder)); @@ -1174,18 +1174,18 @@ static int menu_loop_options(void) } break; case MA_OPT_SOUND_QUALITY: - if ((inp & PBTN_RIGHT) && PsndRate == 44100 && + if ((inp & PBTN_RIGHT) && PicoIn.sndRate == 44100 && !(PicoIn.opt&0x08)) { - PsndRate = 11025; + PicoIn.sndRate = 11025; PicoIn.opt |= 8; - } else if ((inp & PBTN_LEFT) && PsndRate == 11025 && + } else if ((inp & PBTN_LEFT) && PicoIn.sndRate == 11025 && (PicoIn.opt&0x08) && !(PicoIn.AHW&1)) { - PsndRate = 44100; + PicoIn.sndRate = 44100; PicoIn.opt &= ~8; } else - PsndRate = sndrate_prevnext(PsndRate, inp & PBTN_RIGHT); + PicoIn.sndRate = sndrate_prevnext(PicoIn.sndRate, inp & PBTN_RIGHT); break; case MA_OPT_REGION: region_prevnext(inp & PBTN_RIGHT); diff --git a/platform/gp2x/940ctl.c b/platform/gp2x/940ctl.c index 99c25d64..c270bfee 100644 --- a/platform/gp2x/940ctl.c +++ b/platform/gp2x/940ctl.c @@ -402,9 +402,9 @@ int YM2612UpdateOne_940(int *buffer, int length, int stereo, int is_buf_empty) writebuff_ptr = 0; /* predict sample counter for next frame */ - if (PsndLen_exc_add) { - length = PsndLen; - if (PsndLen_exc_cnt + PsndLen_exc_add >= 0x10000) length++; + if (Pico.snd.len_e_add) { + length = Pico.snd.len; + if (Pico.snd.len_e_cnt + Pico.snd.len_e_add >= 0x10000) length++; } /* give 940 ym job */ @@ -463,11 +463,11 @@ int mp3dec_start(FILE *f, int fpos_start) if (loaded_mp3 != f) { - if (PicoMessage != NULL) + if (PicoIn.osdMessage != NULL) { fseek(f, 0, SEEK_END); if (ftell(f) > 2*1024*1024) - PicoMessage("Loading MP3..."); + PicoIn.osdMessage("Loading MP3..."); } fseek(f, 0, SEEK_SET); fread(mp3_mem, 1, MP3_SIZE_MAX, f); diff --git a/platform/gp2x/emu.c b/platform/gp2x/emu.c index 3b3a234b..7e9a132f 100644 --- a/platform/gp2x/emu.c +++ b/platform/gp2x/emu.c @@ -692,7 +692,7 @@ void pemu_sound_start(void) { soc = soc_detect(); if (soc == SOCID_POLLUX) { - PsndRate = pollux_get_real_snd_rate(PsndRate); + PicoIn.sndRate = pollux_get_real_snd_rate(PicoIn.sndRate); PsndRerate(Pico.m.frame_count ? 1 : 0); } @@ -707,10 +707,10 @@ void pemu_sound_stop(void) int i; /* get back from Pollux pain */ - PsndRate += 1000; + PicoIn.sndRate += 1000; for (i = 0; i < ARRAY_SIZE(sound_rates); i++) { - if (PsndRate >= sound_rates[i]) { - PsndRate = sound_rates[i]; + if (PicoIn.sndRate >= sound_rates[i]) { + PicoIn.sndRate = sound_rates[i]; break; } } diff --git a/platform/libretro/libretro.c b/platform/libretro/libretro.c index 99f0f3b5..2bdd07d2 100644 --- a/platform/libretro/libretro.c +++ b/platform/libretro/libretro.c @@ -1088,9 +1088,9 @@ bool retro_load_game(const struct retro_game_info *info) PicoLoopPrepare(); - PicoWriteSound = snd_write; + PicoIn.writeSound = snd_write; memset(sndBuffer, 0, sizeof(sndBuffer)); - PsndOut = sndBuffer; + PicoIn.sndOut = sndBuffer; PsndRerate(0); return true; @@ -1193,7 +1193,7 @@ static const unsigned short retro_pico_map[] = { static void snd_write(int len) { - audio_batch_cb(PsndOut, len / 4); + audio_batch_cb(PicoIn.sndOut, len / 4); } static enum input_device input_name_to_val(const char *name) @@ -1375,7 +1375,7 @@ void retro_init(void) #endif PicoIn.opt |= POPT_EN_DRC; #endif - PsndRate = 44100; + PicoIn.sndRate = 44100; PicoIn.autoRgnOrder = 0x184; // US, EU, JP vout_width = 320; @@ -1390,9 +1390,9 @@ void retro_init(void) PicoDrawSetOutFormat(PDF_RGB555, 0); PicoDrawSetOutBuf(vout_buf, vout_width * 2); - //PicoMessage = plat_status_msg_busy_next; - PicoMCDopenTray = disk_tray_open; - PicoMCDcloseTray = disk_tray_close; + //PicoIn.osdMessage = plat_status_msg_busy_next; + PicoIn.mcdTrayOpen = disk_tray_open; + PicoIn.mcdTrayClose = disk_tray_close; update_variables(); } diff --git a/platform/psp/emu.c b/platform/psp/emu.c index 8f77f395..0656f581 100644 --- a/platform/psp/emu.c +++ b/platform/psp/emu.c @@ -571,32 +571,32 @@ void pemu_sound_start(void) samples_made = samples_done = 0; - if (PsndRate != PsndRate_old || (PicoIn.opt&0x0b) != (PicoOpt_old&0x0b) || Pico.m.pal != pal_old) { + if (PicoIn.sndRate != PsndRate_old || (PicoIn.opt&0x0b) != (PicoOpt_old&0x0b) || Pico.m.pal != pal_old) { PsndRerate(Pico.m.frame_count ? 1 : 0); } stereo=(PicoIn.opt&8)>>3; samples_block = Pico.m.pal ? SOUND_BLOCK_SIZE_PAL : SOUND_BLOCK_SIZE_NTSC; - if (PsndRate <= 22050) samples_block /= 2; + if (PicoIn.sndRate <= 22050) samples_block /= 2; sndBuffer_endptr = &sndBuffer[samples_block*SOUND_BLOCK_COUNT]; lprintf("starting audio: %i, len: %i, stereo: %i, pal: %i, block samples: %i\n", - PsndRate, PsndLen, stereo, Pico.m.pal, samples_block); + PicoIn.sndRate, Pico.snd.len, stereo, Pico.m.pal, samples_block); // while (sceAudioOutput2GetRestSample() > 0) psp_msleep(100); // sceAudio_5C37C0AE(); - ret = sceAudio_38553111(samples_block/2, PsndRate, 2); // seems to not need that stupid 64byte alignment + ret = sceAudio_38553111(samples_block/2, PicoIn.sndRate, 2); // seems to not need that stupid 64byte alignment if (ret < 0) { lprintf("sceAudio_38553111() failed: %i\n", ret); emu_status_msg("sound init failed (%i), snd disabled", ret); currentConfig.EmuOpt &= ~EOPT_EN_SOUND; } else { - PicoWriteSound = writeSound; + PicoIn.writeSound = writeSound; memset32((int *)(void *)sndBuffer, 0, sizeof(sndBuffer)/4); snd_playptr = sndBuffer_endptr - samples_block; samples_made = samples_block; // send 1 empty block first.. - PsndOut = sndBuffer; - PsndRate_old = PsndRate; + PicoIn.sndOut = sndBuffer; + PsndRate_old = PicoIn.sndRate; PicoOpt_old = PicoIn.opt; pal_old = Pico.m.pal; } @@ -641,16 +641,16 @@ static void writeSound(int len) { int ret; - PsndOut += len / 2; - /*if (PsndOut > sndBuffer_endptr) { - memcpy32((int *)(void *)sndBuffer, (int *)endptr, (PsndOut - endptr + 1) / 2); - PsndOut = &sndBuffer[PsndOut - endptr]; + PicoIn.sndOut += len / 2; + /*if (PicoIn.sndOut > sndBuffer_endptr) { + memcpy32((int *)(void *)sndBuffer, (int *)endptr, (PicoIn.sndOut - endptr + 1) / 2); + PicoIn.sndOut = &sndBuffer[PicoIn.sndOut - endptr]; lprintf("mov\n"); } else*/ - if (PsndOut > sndBuffer_endptr) lprintf("snd oflow %i!\n", PsndOut - sndBuffer_endptr); - if (PsndOut >= sndBuffer_endptr) - PsndOut = sndBuffer; + if (PicoIn.sndOut > sndBuffer_endptr) lprintf("snd oflow %i!\n", PicoIn.sndOut - sndBuffer_endptr); + if (PicoIn.sndOut >= sndBuffer_endptr) + PicoIn.sndOut = sndBuffer; // signal the snd thread samples_made += len / 2; @@ -873,7 +873,7 @@ void pemu_loop(void) } // prepare sound stuff - PsndOut = NULL; + PicoIn.sndOut = NULL; if (currentConfig.EmuOpt & EOPT_EN_SOUND) { pemu_sound_start(); @@ -1021,9 +1021,9 @@ void pemu_loop(void) if (PicoIn.AHW & PAHW_MCD) PicoCDBufferFree(); - if (PsndOut != NULL) { + if (PicoIn.sndOut != NULL) { pemu_sound_stop(); - PsndOut = NULL; + PicoIn.sndOut = NULL; } // save SRAM diff --git a/platform/psp/menu.c b/platform/psp/menu.c index de63d345..ab022f97 100644 --- a/platform/psp/menu.c +++ b/platform/psp/menu.c @@ -1227,7 +1227,7 @@ static void menu_opt_cust_draw(const menu_entry *entry, int x, int y, void *para break; case MA_OPT_SOUND_QUALITY: str = (PicoIn.opt&0x08)?"stereo":"mono"; - text_out16(x, y, "Sound Quality: %5iHz %s", PsndRate, str); + text_out16(x, y, "Sound Quality: %5iHz %s", PicoIn.sndRate, str); break; case MA_OPT_REGION: text_out16(x, y, "Region: %s", me_region_name(PicoIn.regionOverride, PicoIn.autoRgnOrder)); @@ -1353,7 +1353,7 @@ static int menu_loop_options(void) } break; case MA_OPT_SOUND_QUALITY: - PsndRate = sndrate_prevnext(PsndRate, inp & PBTN_RIGHT); + PicoIn.sndRate = sndrate_prevnext(PicoIn.sndRate, inp & PBTN_RIGHT); break; case MA_OPT_REGION: region_prevnext(inp & PBTN_RIGHT); diff --git a/platform/psp/mp3.c b/platform/psp/mp3.c index 4ea3bdd7..18624399 100644 --- a/platform/psp/mp3.c +++ b/platform/psp/mp3.c @@ -403,8 +403,8 @@ void mp3_update(int *buffer, int length, int stereo) if (mp3_handle < 0 || mp3_src_pos >= mp3_src_size) return; length_mp3 = length; - if (PsndRate == 22050) length_mp3 <<= 1; // mp3s are locked to 44100Hz stereo - else if (PsndRate == 11025) length_mp3 <<= 2; // so make length 44100ish + if (PicoIn.sndRate == 22050) length_mp3 <<= 1; // mp3s are locked to 44100Hz stereo + else if (PicoIn.sndRate == 11025) length_mp3 <<= 2; // so make length 44100ish /* do we have to wait? */ if (mp3_job_started && mp3_samples_ready < length_mp3) @@ -420,8 +420,8 @@ void mp3_update(int *buffer, int length, int stereo) { int shr = 0; void (*mix_samples)(int *dest_buf, short *mp3_buf, int count) = mix_16h_to_32; - if (PsndRate == 22050) { mix_samples = mix_16h_to_32_s1; shr = 1; } - else if (PsndRate == 11025) { mix_samples = mix_16h_to_32_s2; shr = 2; } + if (PicoIn.sndRate == 22050) { mix_samples = mix_16h_to_32_s1; shr = 1; } + else if (PicoIn.sndRate == 11025) { mix_samples = mix_16h_to_32_s2; shr = 2; } if (1152 - mp3_buffer_offs >= length_mp3) { mix_samples(buffer, mp3_mix_buffer[mp3_play_bufsel] + mp3_buffer_offs*2, length<<1); diff --git a/platform/win32/plat.c b/platform/win32/plat.c index a9cceb79..8abb0626 100644 --- a/platform/win32/plat.c +++ b/platform/win32/plat.c @@ -131,7 +131,7 @@ void pemu_sound_start(void) { int ret; - PsndOut = NULL; + PicoIn.sndOut = NULL; currentConfig.EmuOpt &= ~EOPT_EXT_FRMLIMIT; // prepare sound stuff @@ -139,14 +139,14 @@ void pemu_sound_start(void) { PsndRerate(0); - ret = DSoundInit(FrameWnd, PsndRate, (PicoIn.opt & POPT_EN_STEREO) ? 1 : 0, PsndLen); + ret = DSoundInit(FrameWnd, PicoIn.sndRate, (PicoIn.opt & POPT_EN_STEREO) ? 1 : 0, Pico.snd.len); if (ret != 0) { lprintf("dsound init failed\n"); return; } - PsndOut = (void *)sndbuff; - PicoWriteSound = update_sound; + PicoIn.sndOut = (void *)sndbuff; + PicoIn.writeSound = update_sound; currentConfig.EmuOpt |= EOPT_EXT_FRMLIMIT; } } diff --git a/tools/mkoffsets.c b/tools/mkoffsets.c index 297a1969..7e57383a 100644 --- a/tools/mkoffsets.c +++ b/tools/mkoffsets.c @@ -58,7 +58,7 @@ int main(int argc, char *argv[]) DUMP_EST(f, Pico); DUMP_EST(f, PicoMem_vram); DUMP_EST(f, PicoMem_cram); - DUMP_EST(f, PicoIn.opt); + DUMP_EST(f, PicoOpt); DUMP_EST(f, Draw2FB); DUMP_EST(f, HighPal); DUMP_PMEM(f, vram); From 31fbc691a10d9d3119fed9624ffcbd1a52784502 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 26 Nov 2017 20:19:40 +0200 Subject: [PATCH 0133/1110] 32x: remove some comm hacks they can (and do) easily break things --- pico/32x/32x.c | 3 +++ pico/32x/memory.c | 36 +++++++++++------------------------- pico/pico_int.h | 4 ++-- 3 files changed, 16 insertions(+), 27 deletions(-) diff --git a/pico/32x/32x.c b/pico/32x/32x.c index c10e1486..3743eb99 100644 --- a/pico/32x/32x.c +++ b/pico/32x/32x.c @@ -502,6 +502,9 @@ void sync_sh2s_normal(unsigned int m68k_target) if (CYCLES_GT(m68k_target, ssh2.m68krcycles_done)) ssh2.m68krcycles_done = m68k_target; } + + // everyone is in sync now + Pico32x.comm_dirty = 0; } #define STEP_68K 24 diff --git a/pico/32x/memory.c b/pico/32x/memory.c index 399c8bd1..6eb9d2b4 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -191,12 +191,10 @@ static u32 p32x_reg_read16(u32 a) int comreg = 1 << (a & 0x0f) / 2; if (cycles - msh2.m68krcycles_done > 244 - || (Pico32x.comm_dirty_68k & comreg)) + || (Pico32x.comm_dirty & comreg)) p32x_sync_sh2s(cycles); - if (Pico32x.comm_dirty_sh2 & comreg) - Pico32x.comm_dirty_sh2 &= ~comreg; - else if (m68k_poll_detect(a, cycles, P32XF_68KCPOLL)) { + if (m68k_poll_detect(a, cycles, P32XF_68KCPOLL)) { SekSetStop(1); SekEndRun(16); } @@ -388,14 +386,13 @@ static void p32x_reg_write8(u32 a, u32 d) if (REG8IN16(r, a) == d) return; - comreg = 1 << (a & 0x0f) / 2; - if (Pico32x.comm_dirty_68k & comreg) - p32x_sync_sh2s(cycles); + p32x_sync_sh2s(cycles); REG8IN16(r, a) = d; p32x_sh2_poll_event(&sh2s[0], SH2_STATE_CPOLL, cycles); p32x_sh2_poll_event(&sh2s[1], SH2_STATE_CPOLL, cycles); - Pico32x.comm_dirty_68k |= comreg; + comreg = 1 << (a & 0x0f) / 2; + Pico32x.comm_dirty |= comreg; if (cycles - (int)msh2.m68krcycles_done > 120) p32x_sync_sh2s(cycles); @@ -451,20 +448,13 @@ static void p32x_reg_write16(u32 a, u32 d) int cycles = SekCyclesDone(); int comreg; - if (r[a / 2] == d) - return; - - comreg = 1 << (a & 0x0f) / 2; - if (Pico32x.comm_dirty_68k & comreg) - p32x_sync_sh2s(cycles); + p32x_sync_sh2s(cycles); r[a / 2] = d; p32x_sh2_poll_event(&sh2s[0], SH2_STATE_CPOLL, cycles); p32x_sh2_poll_event(&sh2s[1], SH2_STATE_CPOLL, cycles); - Pico32x.comm_dirty_68k |= comreg; - - if (cycles - (int)msh2.m68krcycles_done > 120) - p32x_sync_sh2s(cycles); + comreg = 1 << (a & 0x0f) / 2; + Pico32x.comm_dirty |= comreg; return; } // PWM @@ -601,11 +591,7 @@ static u32 p32x_sh2reg_read16(u32 a, SH2 *sh2) // comm port if ((a & 0x30) == 0x20) { - int comreg = 1 << (a & 0x0f) / 2; - if (Pico32x.comm_dirty_68k & comreg) - Pico32x.comm_dirty_68k &= ~comreg; - else - sh2_poll_detect(sh2, a, SH2_STATE_CPOLL, 3); + sh2_poll_detect(sh2, a, SH2_STATE_CPOLL, 3); sh2s_sync_on_read(sh2); return r[a / 2]; } @@ -708,7 +694,7 @@ static void p32x_sh2reg_write8(u32 a, u32 d, SH2 *sh2) p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, sh2_cycles_done_m68k(sh2)); comreg = 1 << (a & 0x0f) / 2; - Pico32x.comm_dirty_sh2 |= comreg; + Pico32x.comm_dirty |= comreg; return; } @@ -733,7 +719,7 @@ static void p32x_sh2reg_write16(u32 a, u32 d, SH2 *sh2) p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, sh2_cycles_done_m68k(sh2)); comreg = 1 << (a & 0x0f) / 2; - Pico32x.comm_dirty_sh2 |= comreg; + Pico32x.comm_dirty |= comreg; return; } // PWM diff --git a/pico/pico_int.h b/pico/pico_int.h index bb27922b..25c728a4 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -582,8 +582,8 @@ struct Pico32x unsigned int dmac0_fifo_ptr; unsigned short vdp_fbcr_fake; unsigned short pad2; - unsigned char comm_dirty_68k; - unsigned char comm_dirty_sh2; + unsigned char comm_dirty; + unsigned char pad3; // was comm_dirty_sh2 unsigned char pwm_irq_cnt; unsigned char pad1; unsigned short pwm_p[2]; // pwm pos in fifo From 6c2041fea0f4624daa988b5edd2bd4b7f9c381a7 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 28 Nov 2017 02:04:08 +0200 Subject: [PATCH 0134/1110] 32x: add other timing hacks For sdram sync, like NJTE. Still bad, but don't have a better solution for now (or ever?). --- pico/32x/32x.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pico/32x/32x.c b/pico/32x/32x.c index 3743eb99..9bfbefac 100644 --- a/pico/32x/32x.c +++ b/pico/32x/32x.c @@ -422,6 +422,9 @@ void p32x_sync_other_sh2(SH2 *sh2, unsigned int m68k_target) } } +#define STEP_LS 24 +#define STEP_N 440 + #define sync_sh2s_normal p32x_sync_sh2s //#define sync_sh2s_lockstep p32x_sync_sh2s @@ -451,6 +454,8 @@ void sync_sh2s_normal(unsigned int m68k_target) target = m68k_target; if (event_time_next && CYCLES_GT(target, event_time_next)) target = event_time_next; + if (CYCLES_GT(target, now + STEP_N)) + target = now + STEP_N; while (CYCLES_GT(target, now)) { @@ -507,8 +512,6 @@ void sync_sh2s_normal(unsigned int m68k_target) Pico32x.comm_dirty = 0; } -#define STEP_68K 24 - void sync_sh2s_lockstep(unsigned int m68k_target) { unsigned int mcycles; @@ -518,7 +521,7 @@ void sync_sh2s_lockstep(unsigned int m68k_target) mcycles = ssh2.m68krcycles_done; while (mcycles < m68k_target) { - mcycles += STEP_68K; + mcycles += STEP_LS; sync_sh2s_normal(mcycles); } } From eefdb8a5598b9343f2c96e4b2ffcd46c688c0f86 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 29 Nov 2017 02:31:19 +0200 Subject: [PATCH 0135/1110] 32x: improve 68k bios handling --- pico/32x/memory.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/pico/32x/memory.c b/pico/32x/memory.c index 6eb9d2b4..706d820e 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -1644,23 +1644,37 @@ static void get_bios(void) Byteswap(Pico32xMem->m68k_rom, p32x_bios_g, sizeof(Pico32xMem->m68k_rom)); } else { + static const u16 andb[] = { 0x0239, 0x00fe, 0x00a1, 0x5107 }; + static const u16 p_d4[] = { + 0x48e7, 0x8040, // movem.l d0/a1, -(sp) + 0x227c, 0x00a1, 0x30f1, // movea.l #0xa130f1, a1 + 0x7007, // moveq.l #7, d0 + 0x12d8, //0: move.b (a0)+, (a1)+ + 0x5289, // addq.l #1, a1 + 0x51c8, 0xfffa, // dbra d0, 0b + 0x0239, 0x00fe, 0x00a1, // and.b #0xfe, (0xa15107).l + 0x5107, + 0x4cdf, 0x0201 // movem.l (sp)+, d0/a1 + }; + // generate 68k ROM ps = (u16 *)Pico32xMem->m68k_rom; pl = (u32 *)ps; for (i = 1; i < 0xc0/4; i++) pl[i] = HWSWAP(0x880200 + (i - 1) * 6); + pl[0x70/4] = 0; // fill with nops for (i = 0xc0/2; i < 0x100/2; i++) ps[i] = 0x4e71; -#if 0 - ps[0xc0/2] = 0x46fc; - ps[0xc2/2] = 0x2700; // move #0x2700,sr - ps[0xfe/2] = 0x60fe; // jump to self -#else + // c0: don't need to care about RV - not emulated + ps[0xc8/2] = 0x1280; // move.b d0, (a1) + memcpy(ps + 0xca/2, andb, sizeof(andb)); // and.b #0xfe, (a15107) + ps[0xd2/2] = 0x4e75; // rts + // d4: + memcpy(ps + 0xd4/2, p_d4, sizeof(p_d4)); ps[0xfe/2] = 0x4e75; // rts -#endif } // fill remaining m68k_rom page with game ROM memcpy(Pico32xMem->m68k_rom_bank + sizeof(Pico32xMem->m68k_rom), From 7e056c853a0ae81cbfc33135c3802b55eaab5b17 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 2 Dec 2017 00:10:08 +0200 Subject: [PATCH 0136/1110] stop IdleDet from affecting save states --- pico/state.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pico/state.c b/pico/state.c index 100f4237..5092ddcb 100644 --- a/pico/state.c +++ b/pico/state.c @@ -227,6 +227,9 @@ static int state_save(void *file) areaWrite(&ver, 1, 4, file); if (!(PicoIn.AHW & PAHW_SMS)) { + // the patches can cause incompatible saves with no-idle + SekFinishIdleDet(); + memset(buff, 0, sizeof(buff)); SekPackCpu(buff, 0); CHECKED_WRITE_BUFF(CHUNK_M68K, buff); @@ -235,6 +238,9 @@ static int state_save(void *file) CHECKED_WRITE_BUFF(CHUNK_IOPORTS, PicoMem.ioports); ym2612_pack_state(); CHECKED_WRITE(CHUNK_FM, 0x200+4, ym2612_regs); + + if (!(PicoIn.opt & POPT_DIS_IDLE_DET)) + SekInitIdleDet(); } else { CHECKED_WRITE_BUFF(CHUNK_SMS, Pico.ms); From f0ed9e38ad8e9cde9bfce6ec385cb3fddc5d0fee Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 2 Dec 2017 23:55:27 +0200 Subject: [PATCH 0137/1110] drc: rm overlapped block entry points otherwise we get duplicates in hash tables --- cpu/sh2/compiler.c | 65 ++++++++++++++++++++++++++++++------------ cpu/sh2/mame/sh2dasm.c | 1 + 2 files changed, 47 insertions(+), 19 deletions(-) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 7bd9de0e..f5525533 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -419,7 +419,7 @@ static void add_to_block_list(struct block_list **blist, struct block_desc *bloc static void rm_from_block_list(struct block_list **blist, struct block_desc *block) { struct block_list *prev = NULL, *current = *blist; - for (; current != NULL; prev = current, current = current->next) { + for (; current != NULL; current = current->next) { if (current->block == block) { if (prev == NULL) *blist = current->next; @@ -428,6 +428,7 @@ static void rm_from_block_list(struct block_list **blist, struct block_desc *blo free(current); return; } + prev = current; } dbg(1, "can't rm block %p (%08x-%08x)", block, block->addr, block->addr + block->size); @@ -514,6 +515,29 @@ missing: dbg(1, "rm_from_hashlist: be %p %08x missing?", be, be->pc); } +static void unregister_links(struct block_entry *be, int tcache_id) +{ + struct block_link *bl_unresolved = unresolved_links[tcache_id]; + struct block_link *bl, *bl_next; + + for (bl = be->links; bl != NULL; ) { + bl_next = bl->next; + bl->next = bl_unresolved; + bl_unresolved = bl; + bl = bl_next; + } + be->links = NULL; + unresolved_links[tcache_id] = bl_unresolved; +} + +// unlike sh2_smc_rm_block, the block stays and can still be accessed +// by other already directly linked blocks, just not preferred +static void kill_block_entry(struct block_entry *be, int tcache_id) +{ + rm_from_hashlist(be, tcache_id); + unregister_links(be, tcache_id); +} + static struct block_desc *dr_add_block(u32 addr, u16 size_lit, u16 size_nolit, int is_slave, int *blk_id) { @@ -524,8 +548,10 @@ static struct block_desc *dr_add_block(u32 addr, u16 size_lit, // do a lookup to get tcache_id and override check be = dr_get_entry(addr, is_slave, &tcache_id); - if (be != NULL) - dbg(1, "block override for %08x", addr); + if (be != NULL) { + dbg(1, "block override for %08x, was %p", addr, be->tcache_ptr); + kill_block_entry(be, tcache_id); + } bcount = &block_counts[tcache_id]; if (*bcount >= block_max_counts[tcache_id]) { @@ -1480,13 +1506,22 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) // make block entry v = block->entry_count; - if (v < ARRAY_SIZE(block->entryp)) { + if (v < ARRAY_SIZE(block->entryp)) + { + struct block_entry *be_old; + block->entryp[v].pc = pc; block->entryp[v].tcache_ptr = tcache_ptr; block->entryp[v].links = NULL; #if (DRC_DEBUG & 2) block->entryp[v].block = block; #endif + be_old = dr_get_entry(pc, sh2->is_slave, &tcache_id); + if (be_old != NULL) { + dbg(1, "entry override for %08x, was %p", pc, be_old->tcache_ptr); + kill_block_entry(be_old, tcache_id); + } + add_to_hashlist(&block->entryp[v], tcache_id); block->entry_count++; @@ -2992,13 +3027,12 @@ static void sh2_generate_utils(void) #endif } -static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 ram_mask) +static void sh2_smc_rm_block(struct block_desc *bd, int tcache_id, u32 ram_mask) { - struct block_link *bl, *bl_next, *bl_unresolved; u32 i, addr, end_addr; void *tmp; - dbg(2, " killing entry %08x-%08x-%08x, blkid %d,%d", + dbg(2, " killing block %08x-%08x-%08x, blkid %d,%d", bd->addr, bd->addr + bd->size_nolit, bd->addr + bd->size, tcache_id, bd - block_tables[tcache_id]); if (bd->addr == 0 || bd->entry_count == 0) { @@ -3015,7 +3049,6 @@ static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 ram } tmp = tcache_ptr; - bl_unresolved = unresolved_links[tcache_id]; // remove from hash table, make incoming links unresolved // XXX: maybe patch branches w/flush instead? @@ -3031,22 +3064,16 @@ static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 ram host_instructions_updated(bd->entryp[i].tcache_ptr, tcache_ptr); - for (bl = bd->entryp[i].links; bl != NULL; ) { - bl_next = bl->next; - bl->next = bl_unresolved; - bl_unresolved = bl; - bl = bl_next; - } + unregister_links(&bd->entryp[i], tcache_id); } tcache_ptr = tmp; - unresolved_links[tcache_id] = bl_unresolved; bd->addr = bd->size = bd->size_nolit = 0; bd->entry_count = 0; } -static void sh2_smc_rm_block(u32 a, u16 *drc_ram_blk, int tcache_id, u32 shift, u32 mask) +static void sh2_smc_rm_blocks(u32 a, u16 *drc_ram_blk, int tcache_id, u32 shift, u32 mask) { struct block_list **blist = NULL, *entry; u32 from = ~0, to = 0, end_addr, taddr, i; @@ -3064,7 +3091,7 @@ static void sh2_smc_rm_block(u32 a, u16 *drc_ram_blk, int tcache_id, u32 shift, if (to < end_addr) to = end_addr; - sh2_smc_rm_block_entry(block, tcache_id, mask); + sh2_smc_rm_block(block, tcache_id, mask); if (a >= block->addr + block->size_nolit) literal_disabled_frames = 3; @@ -3110,13 +3137,13 @@ static void sh2_smc_rm_block(u32 a, u16 *drc_ram_blk, int tcache_id, u32 shift, void sh2_drc_wcheck_ram(unsigned int a, int val, int cpuid) { dbg(2, "%csh2 smc check @%08x", cpuid ? 's' : 'm', a); - sh2_smc_rm_block(a, Pico32xMem->drcblk_ram, 0, SH2_DRCBLK_RAM_SHIFT, 0x3ffff); + sh2_smc_rm_blocks(a, Pico32xMem->drcblk_ram, 0, SH2_DRCBLK_RAM_SHIFT, 0x3ffff); } void sh2_drc_wcheck_da(unsigned int a, int val, int cpuid) { dbg(2, "%csh2 smc check @%08x", cpuid ? 's' : 'm', a); - sh2_smc_rm_block(a, Pico32xMem->drcblk_da[cpuid], + sh2_smc_rm_blocks(a, Pico32xMem->drcblk_da[cpuid], 1 + cpuid, SH2_DRCBLK_DA_SHIFT, 0xfff); } diff --git a/cpu/sh2/mame/sh2dasm.c b/cpu/sh2/mame/sh2dasm.c index 21a32e68..3fa25e92 100644 --- a/cpu/sh2/mame/sh2dasm.c +++ b/cpu/sh2/mame/sh2dasm.c @@ -610,6 +610,7 @@ unsigned DasmSH2(char *buffer, unsigned pc, UINT16 opcode) case 14: flags = op1110(buffer,pc,opcode); break; default: flags = op1111(buffer,pc,opcode); break; } + (void)flags; return 0;//2 | flags | DASMFLAG_SUPPORTED; } From d602fd4f739acca7145b4208134da15fad2a6c6e Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 3 Dec 2017 03:40:13 +0200 Subject: [PATCH 0138/1110] drc: ignore cache-through on smc check --- cpu/sh2/compiler.c | 59 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 14 deletions(-) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index f5525533..0e8b65b3 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -47,10 +47,11 @@ #define MAX_LOCAL_BRANCHES 32 // debug stuff -// 1 - warnings/errors -// 2 - block info/smc -// 4 - asm -// 8 - runtime block entry log +// 01 - warnings/errors +// 02 - block info/smc +// 04 - asm +// 08 - runtime block entry log +// 10 - smc self-check // { #ifndef DRC_DEBUG #define DRC_DEBUG 0 @@ -1549,6 +1550,22 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emit_move_r_imm32(SHR_PC, pc); rcache_clean(); +#if (DRC_DEBUG & 0x10) + rcache_get_reg_arg(0, SHR_PC); + tmp = emit_memhandler_read(2); + tmp2 = rcache_get_tmp(); + tmp3 = rcache_get_tmp(); + emith_move_r_imm(tmp2, FETCH32(pc)); + emith_move_r_imm(tmp3, 0); + emith_cmp_r_r(tmp, tmp2); + EMITH_SJMP_START(DCOND_EQ); + emith_read_r_r_offs_c(DCOND_NE, tmp3, tmp3, 0); // crash + EMITH_SJMP_END(DCOND_EQ); + rcache_free_tmp(tmp); + rcache_free_tmp(tmp2); + rcache_free_tmp(tmp3); +#endif + // check cycles sr = rcache_get_reg(SHR_SR, RC_GR_READ); emith_cmp_r_imm(sr, 0); @@ -3073,26 +3090,39 @@ static void sh2_smc_rm_block(struct block_desc *bd, int tcache_id, u32 ram_mask) bd->entry_count = 0; } +/* +04205:243: == msh2 block #0,200 060017a8-060017f0 -> 0x27cb9c + 060017a8 d11c MOV.L @($70,PC),R1 ; @$0600181c + +04230:261: msh2 xsh w32 [260017a8] d225e304 +04230:261: msh2 smc check @260017a8 +04239:226: = ssh2 enter 060017a8 0x27cb9c, c=173 +*/ static void sh2_smc_rm_blocks(u32 a, u16 *drc_ram_blk, int tcache_id, u32 shift, u32 mask) { struct block_list **blist = NULL, *entry; - u32 from = ~0, to = 0, end_addr, taddr, i; struct block_desc *block; + u32 start_addr, end_addr, taddr, i; + u32 from = ~0, to = 0; + + // ignore cache-through + a &= ~0x20000000; blist = &inval_lookup[tcache_id][(a & mask) / INVAL_PAGE_SIZE]; entry = *blist; while (entry != NULL) { block = entry->block; - end_addr = block->addr + block->size; - if (block->addr <= a && a < end_addr) { + start_addr = block->addr & ~0x20000000; + end_addr = start_addr + block->size; + if (start_addr <= a && a < end_addr) { // get addr range that includes all removed blocks - if (from > block->addr) - from = block->addr; + if (from > start_addr) + from = start_addr; if (to < end_addr) to = end_addr; sh2_smc_rm_block(block, tcache_id, mask); - if (a >= block->addr + block->size_nolit) + if (a >= start_addr + block->size_nolit) literal_disabled_frames = 3; // entry lost, restart search @@ -3115,12 +3145,13 @@ static void sh2_smc_rm_blocks(u32 a, u16 *drc_ram_blk, int tcache_id, u32 shift, for (; entry != NULL; entry = entry->next) { block = entry->block; - if (block->addr > a) { - if (to > block->addr) - to = block->addr; + start_addr = block->addr & ~0x20000000; + if (start_addr > a) { + if (to > start_addr) + to = start_addr; } else { - end_addr = block->addr + block->size; + end_addr = start_addr + block->size; if (from < end_addr) from = end_addr; } From 00468b0a9b651d930c06d565f3ffc7e704f0e57c Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 3 Dec 2017 17:39:59 +0200 Subject: [PATCH 0139/1110] drc: do lit check before size_nolit is cleared --- cpu/sh2/compiler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 0e8b65b3..e18f43c8 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -3121,9 +3121,9 @@ static void sh2_smc_rm_blocks(u32 a, u16 *drc_ram_blk, int tcache_id, u32 shift, if (to < end_addr) to = end_addr; - sh2_smc_rm_block(block, tcache_id, mask); if (a >= start_addr + block->size_nolit) literal_disabled_frames = 3; + sh2_smc_rm_block(block, tcache_id, mask); // entry lost, restart search entry = *blist; From 98a3d79ba2d05e8883f8e3761f866cb4fa17c214 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 3 Dec 2017 17:41:30 +0200 Subject: [PATCH 0140/1110] drc: arm: use movw/movt it's about time... --- cpu/drc/emit_arm.c | 18 ++++++++++++++++++ cpu/sh2/compiler.c | 1 + 2 files changed, 19 insertions(+) diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index 64face12..eb5f332e 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -243,6 +243,11 @@ #define EOP_MSR_IMM(ror2,imm) EOP_C_MSR_IMM(A_COND_AL,ror2,imm) #define EOP_MSR_REG(rm) EOP_C_MSR_REG(A_COND_AL,rm) +#define EOP_MOVW(rd,imm) \ + EMIT(0xe3000000 | ((rd)<<12) | ((imm)&0xfff) | (((imm)<<4)&0xf0000)) + +#define EOP_MOVT(rd,imm) \ + EMIT(0xe3400000 | ((rd)<<12) | (((imm)>>16)&0xfff) | (((imm)>>12)&0xf0000)) // XXX: AND, RSB, *C, will break if 1 insn is not enough static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int imm) @@ -257,6 +262,19 @@ static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int imm = ~imm; op = A_OP_MVN; } +#ifdef HAVE_ARMV7 + for (v = imm, ror2 = 0; v && !(v & 3); v >>= 2) + ror2--; + if (v >> 8) { + /* 2+ insns needed - prefer movw/movt */ + if (op == A_OP_MVN) + imm = ~imm; + EOP_MOVW(rd, imm); + if (imm & 0xffff0000) + EOP_MOVT(rd, imm); + return; + } +#endif break; case A_OP_EOR: diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index e18f43c8..a7c71c30 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -29,6 +29,7 @@ #include #include "../../pico/pico_int.h" +#include "../../pico/arm_features.h" #include "sh2.h" #include "compiler.h" #include "../drc/cmn.h" From 91be5ebd10d16f1e5cf40fe1f115ed7f6b464743 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 3 Dec 2017 18:23:27 +0200 Subject: [PATCH 0141/1110] don't do idle log - it's not useful --- pico/pico_int.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pico/pico_int.h b/pico/pico_int.h index 25c728a4..0b50e4b9 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -983,7 +983,7 @@ static __inline int isspace_(int c) // emulation event logging #ifndef EL_LOGMASK # ifdef __x86_64__ // HACK -# define EL_LOGMASK (EL_STATUS|EL_IDLE|EL_ANOMALY) +# define EL_LOGMASK (EL_STATUS|EL_ANOMALY) # else # define EL_LOGMASK (EL_STATUS) # endif From 898d51a7fd1c090799ac88869fa0daf096fd702f Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 11 Dec 2017 02:53:17 +0200 Subject: [PATCH 0142/1110] drc: revive x86 dynarec, support x86-64 --- Makefile | 8 +- Makefile.libretro | 2 + cpu/drc/cmn.h | 2 + cpu/drc/emit_arm.c | 13 +++ cpu/drc/emit_x86.c | 256 +++++++++++++++++++++++++++++-------------- cpu/sh2/compiler.c | 112 ++++++++++++------- cpu/sh2/sh2.c | 9 +- cpu/sh2/sh2.h | 2 +- platform/libpicofe | 2 +- platform/linux/emu.c | 2 +- 10 files changed, 285 insertions(+), 123 deletions(-) diff --git a/Makefile b/Makefile index f0cd64ef..997609da 100644 --- a/Makefile +++ b/Makefile @@ -44,6 +44,9 @@ asm_mix ?= 1 else # if not arm use_fame ?= 1 use_cz80 ?= 1 +ifneq (,$(findstring 86,$(ARCH))) +use_sh2drc ?= 1 +endif endif -include Makefile.local @@ -221,8 +224,9 @@ pico/carthw_cfg.c: pico/carthw.cfg tools/make_carthw_c $< $@ # random deps -pico/carthw/svp/compiler.o : cpu/drc/emit_$(ARCH).c -cpu/sh2/compiler.o : cpu/drc/emit_$(ARCH).c +pico/carthw/svp/compiler.o : cpu/drc/emit_arm.c +cpu/sh2/compiler.o : cpu/drc/emit_arm.c +cpu/sh2/compiler.o : cpu/drc/emit_x86.c cpu/sh2/mame/sh2pico.o : cpu/sh2/mame/sh2.c pico/pico.o pico/cd/mcd.o pico/32x/32x.o : pico/pico_cmn.c pico/pico_int.h pico/memory.o pico/cd/memory.o pico/32x/memory.o : pico/pico_int.h pico/memory.h diff --git a/Makefile.libretro b/Makefile.libretro index b35a0dac..05e8bd8d 100644 --- a/Makefile.libretro +++ b/Makefile.libretro @@ -52,6 +52,7 @@ ifeq ($(platform), unix) SHARED := -shared DONT_COMPILE_IN_ZLIB = 1 CFLAGS += -DFAMEC_NO_GOTOS + use_sh2drc = 1 # Portable Linux else ifeq ($(platform), linux-portable) @@ -62,6 +63,7 @@ else ifeq ($(platform), linux-portable) LIBM := DONT_COMPILE_IN_ZLIB = 1 CFLAGS += -DFAMEC_NO_GOTOS + use_sh2drc = 1 # OS X else ifeq ($(platform), osx) diff --git a/cpu/drc/cmn.h b/cpu/drc/cmn.h index 8953edd1..39463566 100644 --- a/cpu/drc/cmn.h +++ b/cpu/drc/cmn.h @@ -1,7 +1,9 @@ typedef unsigned char u8; typedef signed char s8; typedef unsigned short u16; +typedef signed short s16; typedef unsigned int u32; +typedef signed int s32; #define DRC_TCACHE_SIZE (2*1024*1024) diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index eb5f332e..7b5566dd 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -6,6 +6,7 @@ * See COPYING file in the top-level directory. */ #define CONTEXT_REG 11 +#define RET_REG 0 // XXX: tcache_ptr type for SVP and SH2 compilers differs.. #define EMIT_PTR(ptr, x) \ @@ -369,6 +370,9 @@ static int emith_xbranch(int cond, void *target, int is_call) #define EMITH_NOTHING1(cond) \ (void)(cond) +#define EMITH_SJMP_DECL_() +#define EMITH_SJMP_START_(cond) EMITH_NOTHING1(cond) +#define EMITH_SJMP_END_(cond) EMITH_NOTHING1(cond) #define EMITH_SJMP_START(cond) EMITH_NOTHING1(cond) #define EMITH_SJMP_END(cond) EMITH_NOTHING1(cond) #define EMITH_SJMP3_START(cond) EMITH_NOTHING1(cond) @@ -378,6 +382,9 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_move_r_r(d, s) \ EOP_MOV_REG_SIMPLE(d, s) +#define emith_move_r_r_ptr(d, s) \ + emith_move_r_r(d, s) + #define emith_mvn_r_r(d, s) \ EOP_MVN_REG(A_COND_AL,0,d,s,A_AM1_LSL,0) @@ -521,6 +528,9 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_add_r_r_imm(d, s, imm) \ emith_op_imm2(A_COND_AL, 0, A_OP_ADD, d, s, imm) +#define emith_add_r_r_ptr_imm(d, s, imm) \ + emith_add_r_r_imm(d, s, imm) + #define emith_sub_r_r_imm(d, s, imm) \ emith_op_imm2(A_COND_AL, 0, A_OP_SUB, d, s, imm) @@ -611,6 +621,9 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_ctx_read(r, offs) \ emith_read_r_r_offs(r, CONTEXT_REG, offs) +#define emith_ctx_read_ptr(r, offs) \ + emith_ctx_read(r, offs) + #define emith_ctx_write(r, offs) \ EOP_STR_IMM(r, CONTEXT_REG, offs) diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index 754c27fe..5362d9bd 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -16,6 +16,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define CONTEXT_REG xBP +#define RET_REG xAX #define ICOND_JO 0x00 #define ICOND_JNO 0x01 @@ -53,15 +54,15 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define EMIT_PTR(ptr, val, type) \ *(type *)(ptr) = val -#define EMIT(val, type) { \ +#define EMIT(val, type) do { \ EMIT_PTR(tcache_ptr, val, type); \ tcache_ptr += sizeof(type); \ -} +} while (0) -#define EMIT_OP(op) { \ +#define EMIT_OP(op) do { \ COUNT_OP; \ EMIT(op, u8); \ -} +} while (0) #define EMIT_MODRM(mod,r,rm) \ EMIT(((mod)<<6) | ((r)<<3) | (rm), u8) @@ -69,6 +70,9 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define EMIT_SIB(scale,index,base) \ EMIT(((scale)<<6) | ((index)<<3) | (base), u8) +#define EMIT_REX(w,r,x,b) \ + EMIT(0x40 | ((w)<<3) | ((r)<<2) | ((x)<<1) | (b), u8) + #define EMIT_OP_MODRM(op,mod,r,rm) do { \ EMIT_OP(op); \ EMIT_MODRM(mod, r, rm); \ @@ -90,6 +94,11 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_move_r_r(dst, src) \ EMIT_OP_MODRM(0x8b, 3, dst, src) +#define emith_move_r_r_ptr(dst, src) do { \ + EMIT_REX_FOR_PTR(); \ + EMIT_OP_MODRM(0x8b, 3, dst, src); \ +} while (0) + #define emith_add_r_r(d, s) \ EMIT_OP_MODRM(0x01, 3, s, d) @@ -118,34 +127,34 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; EMIT_OP_MODRM(0x39, 3, s, d) // fake teq - test equivalence - get_flags(d ^ s) -#define emith_teq_r_r(d, s) { \ +#define emith_teq_r_r(d, s) do { \ emith_push(d); \ emith_eor_r_r(d, s); \ emith_pop(d); \ -} +} while (0) -#define emith_mvn_r_r(d, s) { \ +#define emith_mvn_r_r(d, s) do { \ if (d != s) \ emith_move_r_r(d, s); \ EMIT_OP_MODRM(0xf7, 3, 2, d); /* NOT d */ \ -} +} while (0) -#define emith_negc_r_r(d, s) { \ +#define emith_negc_r_r(d, s) do { \ int tmp_ = rcache_get_tmp(); \ emith_move_r_imm(tmp_, 0); \ emith_sbc_r_r(tmp_, s); \ emith_move_r_r(d, tmp_); \ rcache_free_tmp(tmp_); \ -} +} while (0) -#define emith_neg_r_r(d, s) { \ +#define emith_neg_r_r(d, s) do { \ if (d != s) \ emith_move_r_r(d, s); \ EMIT_OP_MODRM(0xf7, 3, 3, d); /* NEG d */ \ -} +} while (0) // _r_r_r -#define emith_add_r_r_r(d, s1, s2) { \ +#define emith_add_r_r_r(d, s1, s2) do { \ if (d == s1) { \ emith_add_r_r(d, s2); \ } else if (d == s2) { \ @@ -154,9 +163,9 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; emith_move_r_r(d, s1); \ emith_add_r_r(d, s2); \ } \ -} +} while (0) -#define emith_eor_r_r_r(d, s1, s2) { \ +#define emith_eor_r_r_r(d, s1, s2) do { \ if (d == s1) { \ emith_eor_r_r(d, s2); \ } else if (d == s2) { \ @@ -165,29 +174,29 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; emith_move_r_r(d, s1); \ emith_eor_r_r(d, s2); \ } \ -} +} while (0) // _r_r_shift -#define emith_or_r_r_lsl(d, s, lslimm) { \ +#define emith_or_r_r_lsl(d, s, lslimm) do { \ int tmp_ = rcache_get_tmp(); \ emith_lsl(tmp_, s, lslimm); \ emith_or_r_r(d, tmp_); \ rcache_free_tmp(tmp_); \ -} +} while (0) // d != s -#define emith_eor_r_r_lsr(d, s, lsrimm) { \ +#define emith_eor_r_r_lsr(d, s, lsrimm) do { \ emith_push(s); \ emith_lsr(s, s, lsrimm); \ emith_eor_r_r(d, s); \ emith_pop(s); \ -} +} while (0) // _r_imm -#define emith_move_r_imm(r, imm) { \ +#define emith_move_r_imm(r, imm) do { \ EMIT_OP(0xb8 + (r)); \ EMIT(imm, u32); \ -} +} while (0) #define emith_move_r_imm_s8(r, imm) \ emith_move_r_imm(r, (u32)(signed int)(signed char)(imm)) @@ -212,7 +221,14 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_and_r_imm(r, imm) \ emith_arith_r_imm(4, r, imm) -#define emith_sub_r_imm(r, imm) \ +/* used for sub cycles after test, so retain flags with lea */ +#define emith_sub_r_imm(r, imm) do { \ + assert(r != xSP); \ + EMIT_OP_MODRM(0x8d, 2, r, r); \ + EMIT(-(s32)(imm), s32); \ +} while (0) + +#define emith_subf_r_imm(r, imm) \ emith_arith_r_imm(5, r, imm) #define emith_eor_r_imm(r, imm) \ @@ -231,20 +247,20 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; emith_arith_r_imm(4, r, ~(imm)) // fake conditionals (using SJMP instead) -#define emith_move_r_imm_c(cond, r, imm) { \ +#define emith_move_r_imm_c(cond, r, imm) do { \ (void)(cond); \ emith_move_r_imm(r, imm); \ -} +} while (0) -#define emith_add_r_imm_c(cond, r, imm) { \ +#define emith_add_r_imm_c(cond, r, imm) do { \ (void)(cond); \ emith_add_r_imm(r, imm); \ -} +} while (0) -#define emith_sub_r_imm_c(cond, r, imm) { \ +#define emith_sub_r_imm_c(cond, r, imm) do { \ (void)(cond); \ emith_sub_r_imm(r, imm); \ -} +} while (0) #define emith_or_r_imm_c(cond, r, imm) \ emith_or_r_imm(r, imm) @@ -274,26 +290,40 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_ret_c(cond) \ emith_ret() -// _r_r_imm -#define emith_add_r_r_imm(d, s, imm) { \ - if (d != s) \ - emith_move_r_r(d, s); \ - emith_add_r_imm(d, imm); \ -} +// _r_r_imm - use lea +#define emith_add_r_r_imm(d, s, imm) do { \ + assert(s != xSP); \ + EMIT_OP_MODRM(0x8d, 2, d, s); /* lea */ \ + EMIT(imm, s32); \ +} while (0) -#define emith_and_r_r_imm(d, s, imm) { \ +#define emith_add_r_r_ptr_imm(d, s, imm) do { \ + if (s != xSP) { \ + EMIT_REX_FOR_PTR(); \ + EMIT_OP_MODRM(0x8d, 2, d, s); /* lea */ \ + } \ + else { \ + if (d != s) \ + emith_move_r_r_ptr(d, s); \ + EMIT_REX_FOR_PTR(); \ + EMIT_OP_MODRM(0x81, 3, 0, d); /* add */ \ + } \ + EMIT(imm, s32); \ +} while (0) + +#define emith_and_r_r_imm(d, s, imm) do { \ if (d != s) \ emith_move_r_r(d, s); \ emith_and_r_imm(d, imm); \ -} +} while (0) // shift -#define emith_shift(op, d, s, cnt) { \ +#define emith_shift(op, d, s, cnt) do { \ if (d != s) \ emith_move_r_r(d, s); \ EMIT_OP_MODRM(0xc1, 3, op, d); \ EMIT(cnt, u8); \ -} +} while (0) #define emith_lsl(d, s, cnt) \ emith_shift(4, d, s, cnt) @@ -320,10 +350,10 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_push(r) \ EMIT_OP(0x50 + (r)) -#define emith_push_imm(imm) { \ +#define emith_push_imm(imm) do { \ EMIT_OP(0x68); \ EMIT(imm, u32); \ -} +} while (0) #define emith_pop(r) \ EMIT_OP(0x58 + (r)) @@ -349,13 +379,14 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; emith_asr(d, d, 32 - (bits)); \ } -#define emith_setc(r) { \ +#define emith_setc(r) do { \ + assert(is_abcdx(r)); \ EMIT_OP(0x0f); \ EMIT_OP_MODRM(0x92, 3, 0, r); /* SETC r */ \ -} +} while (0) // XXX: stupid mess -#define emith_mul_(op, dlo, dhi, s1, s2) { \ +#define emith_mul_(op, dlo, dhi, s1, s2) do { \ int rmr; \ if (dlo != xAX && dhi != xAX) \ emith_push(xAX); \ @@ -381,7 +412,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; emith_pop(xDX); \ if (dlo != xAX && dhi != xAX) \ emith_pop(xAX); \ -} +} while (0) #define emith_mul_u64(dlo, dhi, s1, s2) \ emith_mul_(4, dlo, dhi, s1, s2) /* MUL */ @@ -393,20 +424,19 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; emith_mul_(4, d, -1, s1, s2) // (dlo,dhi) += signed(s1) * signed(s2) -#define emith_mula_s64(dlo, dhi, s1, s2) { \ +#define emith_mula_s64(dlo, dhi, s1, s2) do { \ emith_push(dhi); \ emith_push(dlo); \ emith_mul_(5, dlo, dhi, s1, s2); \ EMIT_OP_MODRM(0x03, 0, dlo, 4); \ - EMIT_SIB(0, 4, 4); /* add dlo, [esp] */ \ + EMIT_SIB(0, 4, 4); /* add dlo, [xsp] */ \ EMIT_OP_MODRM(0x13, 1, dhi, 4); \ EMIT_SIB(0, 4, 4); \ - EMIT(4, u8); /* adc dhi, [esp+4] */ \ - emith_add_r_imm(xSP, 4*2); \ -} + EMIT(sizeof(void *), u8); /* adc dhi, [xsp+{4,8}] */ \ + emith_add_r_r_ptr_imm(xSP, xSP, sizeof(void *) * 2); \ +} while (0) // "flag" instructions are the same -#define emith_subf_r_imm emith_sub_r_imm #define emith_addf_r_r emith_add_r_r #define emith_subf_r_r emith_sub_r_r #define emith_adcf_r_r emith_adc_r_r @@ -464,19 +494,24 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; rcache_free_tmp(r_); \ } while (0) -#define emith_read16_r_r_offs(r, rs, offs) { \ +#define emith_read16_r_r_offs(r, rs, offs) do { \ EMIT(0x66, u8); /* operand override */ \ emith_read_r_r_offs(r, rs, offs); \ -} +} while (0) -#define emith_write16_r_r_offs(r, rs, offs) { \ +#define emith_write16_r_r_offs(r, rs, offs) do { \ EMIT(0x66, u8); \ emith_write_r_r_offs(r, rs, offs); \ -} +} while (0) #define emith_ctx_read(r, offs) \ emith_read_r_r_offs(r, CONTEXT_REG, offs) +#define emith_ctx_read_ptr(r, offs) do { \ + EMIT_REX_FOR_PTR(); \ + emith_deref_op(0x8b, r, CONTEXT_REG, offs); \ +} while (0) + #define emith_ctx_write(r, offs) \ emith_write_r_r_offs(r, CONTEXT_REG, offs) @@ -499,7 +534,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; } #define emith_jump(ptr) { \ - u32 disp = (u32)(ptr) - ((u32)tcache_ptr + 5); \ + u32 disp = (u8 *)(ptr) - ((u8 *)tcache_ptr + 5); \ EMIT_OP(0xe9); \ EMIT(disp, u32); \ } @@ -507,30 +542,30 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_jump_patchable(target) \ emith_jump(target) -#define emith_jump_cond(cond, ptr) { \ - u32 disp = (u32)(ptr) - ((u32)tcache_ptr + 6); \ +#define emith_jump_cond(cond, ptr) do { \ + u32 disp = (u8 *)(ptr) - ((u8 *)tcache_ptr + 6); \ EMIT(0x0f, u8); \ EMIT_OP(0x80 | (cond)); \ EMIT(disp, u32); \ -} +} while (0) #define emith_jump_cond_patchable(cond, target) \ emith_jump_cond(cond, target) #define emith_jump_patch(ptr, target) do { \ - u32 disp_ = (u32)(target) - ((u32)(ptr) + 4); \ + u32 disp_ = (u8 *)(target) - ((u8 *)(ptr) + 4); \ u32 offs_ = (*(u8 *)(ptr) == 0x0f) ? 2 : 1; \ EMIT_PTR((u8 *)(ptr) + offs_, disp_ - offs_, u32); \ } while (0) #define emith_jump_at(ptr, target) { \ - u32 disp_ = (u32)(target) - ((u32)(ptr) + 5); \ + u32 disp_ = (u8 *)(target) - ((u8 *)(ptr) + 5); \ EMIT_PTR(ptr, 0xe9, u8); \ EMIT_PTR((u8 *)(ptr) + 1, disp_, u32); \ } #define emith_call(ptr) { \ - u32 disp = (u32)(ptr) - ((u32)tcache_ptr + 5); \ + u32 disp = (u8 *)(ptr) - ((u8 *)tcache_ptr + 5); \ EMIT_OP(0xe8); \ EMIT(disp, u32); \ } @@ -541,10 +576,10 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_call_reg(r) \ EMIT_OP_MODRM(0xff, 3, 2, r) -#define emith_call_ctx(offs) { \ +#define emith_call_ctx(offs) do { \ EMIT_OP_MODRM(0xff, 2, 2, CONTEXT_REG); \ EMIT(offs, u32); \ -} +} while (0) #define emith_ret() \ EMIT_OP(0xc3) @@ -552,10 +587,10 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_jump_reg(r) \ EMIT_OP_MODRM(0xff, 3, 4, r) -#define emith_jump_ctx(offs) { \ +#define emith_jump_ctx(offs) do { \ EMIT_OP_MODRM(0xff, 2, 4, CONTEXT_REG); \ EMIT(offs, u32); \ -} +} while (0) #define emith_push_ret() @@ -584,6 +619,15 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; // "simple" jump (no more then a few insns) // ARM will use conditional instructions here +#define EMITH_SJMP_DECL_() \ + u8 *cond_ptr + +#define EMITH_SJMP_START_(cond) \ + JMP8_POS(cond_ptr) + +#define EMITH_SJMP_END_(cond) \ + JMP8_EMIT(cond, cond_ptr) + #define EMITH_SJMP_START EMITH_JMP_START #define EMITH_SJMP_END EMITH_JMP_END @@ -591,20 +635,55 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define EMITH_SJMP3_MID EMITH_JMP3_MID #define EMITH_SJMP3_END EMITH_JMP3_END -#define emith_pass_arg_r(arg, reg) { \ +#define emith_pass_arg_r(arg, reg) do { \ int rd = 7; \ host_arg2reg(rd, arg); \ - emith_move_r_r(rd, reg); \ -} + emith_move_r_r_ptr(rd, reg); \ +} while (0) -#define emith_pass_arg_imm(arg, imm) { \ +#define emith_pass_arg_imm(arg, imm) do { \ int rd = 7; \ host_arg2reg(rd, arg); \ emith_move_r_imm(rd, imm); \ -} +} while (0) #define host_instructions_updated(base, end) +#ifdef __x86_64__ + +#define PTR_SCALE 3 +#define NA_TMP_REG xCX // non-arg tmp from reg_temp[] + +#define EMIT_REX_FOR_PTR() \ + EMIT_REX(1,0,0,0) + +#define host_arg2reg(rd, arg) \ + switch (arg) { \ + case 0: rd = xDI; break; \ + case 1: rd = xSI; break; \ + case 2: rd = xDX; break; \ + } + +#define emith_sh2_drc_entry() { \ + emith_push(xBX); \ + emith_push(xBP); \ + emith_push(xSI); /* to align */ \ +} + +#define emith_sh2_drc_exit() { \ + emith_pop(xSI); \ + emith_pop(xBP); \ + emith_pop(xBX); \ + emith_ret(); \ +} + +#else + +#define PTR_SCALE 2 +#define NA_TMP_REG xBX // non-arg tmp from reg_temp[] + +#define EMIT_REX_FOR_PTR() + #define host_arg2reg(rd, arg) \ switch (arg) { \ case 0: rd = xAX; break; \ @@ -612,7 +691,6 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; case 2: rd = xCX; break; \ } -/* SH2 drc specific */ #define emith_sh2_drc_entry() { \ emith_push(xBX); \ emith_push(xBP); \ @@ -628,15 +706,33 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; emith_ret(); \ } -// assumes EBX is free temporary +#endif + +#define emith_save_caller_regs(mask) do { \ + if ((mask) & (1 << xAX)) emith_push(xAX); \ + if ((mask) & (1 << xCX)) emith_push(xCX); \ + if ((mask) & (1 << xDX)) emith_push(xDX); \ + if ((mask) & (1 << xSI)) emith_push(xSI); \ + if ((mask) & (1 << xDI)) emith_push(xDI); \ +} while (0) + +#define emith_restore_caller_regs(mask) do { \ + if ((mask) & (1 << xDI)) emith_pop(xDI); \ + if ((mask) & (1 << xSI)) emith_pop(xSI); \ + if ((mask) & (1 << xDX)) emith_pop(xDX); \ + if ((mask) & (1 << xCX)) emith_pop(xCX); \ + if ((mask) & (1 << xAX)) emith_pop(xAX); \ +} while (0) + #define emith_sh2_wcall(a, tab) { \ int arg2_; \ host_arg2reg(arg2_, 2); \ - emith_lsr(xBX, a, SH2_WRITE_SHIFT); \ - EMIT_OP_MODRM(0x8b, 0, xBX, 4); \ - EMIT_SIB(2, xBX, tab); /* mov ebx, [tab + ebx * 4] */ \ - emith_move_r_r(arg2_, CONTEXT_REG); \ - emith_jump_reg(xBX); \ + emith_lsr(NA_TMP_REG, a, SH2_WRITE_SHIFT); \ + EMIT_REX_FOR_PTR(); \ + EMIT_OP_MODRM(0x8b, 0, NA_TMP_REG, 4); \ + EMIT_SIB(PTR_SCALE, NA_TMP_REG, tab); /* mov tmp, [tab + tmp * {4,8}] */ \ + emith_move_r_r_ptr(arg2_, CONTEXT_REG); \ + emith_jump_reg(NA_TMP_REG); \ } #define emith_sh2_dtbf_loop() { \ @@ -697,8 +793,8 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; JMP8_EMIT(ICOND_JE, jmp0); /* do_sub: */ \ emith_sub_r_r(rn, rm); \ JMP8_EMIT_NC(jmp1); /* done: */ \ - emith_setc(tmp_); \ - EMIT_OP_MODRM(0x31, 3, tmp_, sr); /* T = Q1 ^ Q2 */ \ + emith_adc_r_r(tmp_, tmp_); \ + emith_eor_r_r(sr, tmp_); \ rcache_free_tmp(tmp_); \ } diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index a7c71c30..3b6b45af 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -326,6 +326,27 @@ static temp_reg_t reg_temp[] = { { xDX, }, }; +#elif defined(__x86_64__) +#include "../drc/emit_x86.c" + +static const int reg_map_g2h[] = { + -1, -1, -1, -1, + -1, -1, -1, -1, + -1, -1, -1, -1, + -1, -1, -1, -1, + -1, -1, -1, xBX, + -1, -1, -1, -1, +}; + +// ax, cx, dx are usually temporaries by convention +static temp_reg_t reg_temp[] = { + { xAX, }, + { xCX, }, + { xDX, }, + { xSI, }, + { xDI, }, +}; + #else #error unsupported arch #endif @@ -919,13 +940,12 @@ do_alloc: return tr->hreg; } -static int rcache_get_arg_id(int arg) +static int rcache_get_hr_id(int hr) { - int i, r = 0; - host_arg2reg(r, arg); + int i; for (i = 0; i < ARRAY_SIZE(reg_temp); i++) - if (reg_temp[i].hreg == r) + if (reg_temp[i].hreg == hr) break; if (i == ARRAY_SIZE(reg_temp)) // can't happen @@ -938,7 +958,7 @@ static int rcache_get_arg_id(int arg) gconst_check_evict(reg_temp[i].greg); } else if (reg_temp[i].type == HR_TEMP) { - printf("arg %d reg %d already used, aborting\n", arg, r); + printf("host reg %d already used, aborting\n", hr); exit(1); } @@ -948,6 +968,13 @@ static int rcache_get_arg_id(int arg) return i; } +static int rcache_get_arg_id(int arg) +{ + int r = 0; + host_arg2reg(r, arg); + return rcache_get_hr_id(r); +} + // get a reg to be used as function arg static int rcache_get_tmp_arg(int arg) { @@ -957,6 +984,15 @@ static int rcache_get_tmp_arg(int arg) return reg_temp[id].hreg; } +// ... as return value after a call +static int rcache_get_tmp_ret(void) +{ + int id = rcache_get_hr_id(RET_REG); + reg_temp[id].type = HR_TEMP; + + return reg_temp[id].hreg; +} + // same but caches a reg. RC_GR_READ only. static int rcache_get_reg_arg(int arg, sh2_reg_e r) { @@ -1104,8 +1140,8 @@ static int emit_get_rbase_and_offs(u32 a, u32 *offs) // XXX: could use some related reg hr = rcache_get_tmp(); - emith_ctx_read(hr, poffs); - emith_add_r_imm(hr, a & mask & ~0xff); + emith_ctx_read_ptr(hr, poffs); + emith_add_r_r_ptr_imm(hr, hr, a & mask & ~0xff); *offs = a & 0xff; // XXX: ARM oriented.. return hr; } @@ -1154,7 +1190,7 @@ static int emit_memhandler_read_(int size, int ram_check) emith_ctx_write(reg_map_g2h[SHR_SR], SHR_SR * 4); arg1 = rcache_get_tmp_arg(1); - emith_move_r_r(arg1, CONTEXT_REG); + emith_move_r_r_ptr(arg1, CONTEXT_REG); #if 0 // can't do this because of unmapped reads // ndef PDB_NET @@ -1208,8 +1244,7 @@ static int emit_memhandler_read_(int size, int ram_check) if (reg_map_g2h[SHR_SR] != -1) emith_ctx_read(reg_map_g2h[SHR_SR], SHR_SR * 4); - // assuming arg0 and retval reg matches - return rcache_get_tmp_arg(0); + return rcache_get_tmp_ret(); } static int emit_memhandler_read(int size) @@ -1279,7 +1314,7 @@ static void emit_memhandler_write(int size) emith_call(sh2_drc_write16); break; case 2: // 32 - emith_move_r_r(ctxr, CONTEXT_REG); + emith_move_r_r_ptr(ctxr, CONTEXT_REG); emith_call(sh2_drc_write32); break; } @@ -1351,26 +1386,23 @@ static void emit_do_static_regs(int is_write, int tmpr) } } +/* just after lookup function, jump to address returned */ static void emit_block_entry(void) { - int arg0; - - host_arg2reg(arg0, 0); - #if (DRC_DEBUG & 8) || defined(PDB) int arg1, arg2; host_arg2reg(arg1, 1); host_arg2reg(arg2, 2); emit_do_static_regs(1, arg2); - emith_move_r_r(arg1, CONTEXT_REG); + emith_move_r_r_ptr(arg1, CONTEXT_REG); emith_move_r_r(arg2, rcache_get_reg(SHR_SR, RC_GR_READ)); emith_call(sh2_drc_log_entry); rcache_invalidate(); #endif - emith_tst_r_r(arg0, arg0); + emith_tst_r_r(RET_REG, RET_REG); EMITH_SJMP_START(DCOND_EQ); - emith_jump_reg_c(DCOND_NE, arg0); + emith_jump_reg_c(DCOND_NE, RET_REG); EMITH_SJMP_END(DCOND_EQ); } @@ -2703,14 +2735,18 @@ end_op: struct op_data *opd_b = (op_flags[i] & OF_DELAY_OP) ? &ops[i-1] : opd; u32 target_pc = opd_b->imm; - int cond = -1; + int cond = -1, ncond = -1; void *target = NULL; + EMITH_SJMP_DECL_(); sr = rcache_get_reg(SHR_SR, RC_GR_RMW); FLUSH_CYCLES(sr); + rcache_clean(); - if (opd_b->op != OP_BRANCH) + if (opd_b->op != OP_BRANCH) { cond = (opd_b->op == OP_BRANCH_CF) ? DCOND_EQ : DCOND_NE; + ncond = (opd_b->op == OP_BRANCH_CF) ? DCOND_NE : DCOND_EQ; + } if (cond != -1) { int ctaken = (op_flags[i] & OF_DELAY_OP) ? 1 : 2; @@ -2719,9 +2755,9 @@ end_op: else emith_tst_r_imm(sr, T); + EMITH_SJMP_START_(ncond); emith_sub_r_imm_c(cond, sr, ctaken<<12); } - rcache_clean(); #if LINK_BRANCHES if (find_in_array(branch_target_pc, branch_target_count, target_pc) >= 0) @@ -2750,8 +2786,10 @@ end_op: return NULL; } - if (cond != -1) + if (cond != -1) { emith_jump_cond_patchable(cond, target); + EMITH_SJMP_END_(ncond); + } else { emith_jump_patchable(target); rcache_invalidate(); @@ -2906,18 +2944,18 @@ static void sh2_generate_utils(void) rcache_invalidate(); emith_ctx_read(arg0, SHR_PC * 4); emith_ctx_read(arg1, offsetof(SH2, is_slave)); - emith_add_r_r_imm(arg2, CONTEXT_REG, offsetof(SH2, drc_tmp)); + emith_add_r_r_ptr_imm(arg2, CONTEXT_REG, offsetof(SH2, drc_tmp)); emith_call(dr_lookup_block); emit_block_entry(); // lookup failed, call sh2_translate() - emith_move_r_r(arg0, CONTEXT_REG); + emith_move_r_r_ptr(arg0, CONTEXT_REG); emith_ctx_read(arg1, offsetof(SH2, drc_tmp)); // tcache_id emith_call(sh2_translate); emit_block_entry(); // sh2_translate() failed, flush cache and retry emith_ctx_read(arg0, offsetof(SH2, drc_tmp)); emith_call(flush_tcache); - emith_move_r_r(arg0, CONTEXT_REG); + emith_move_r_r_ptr(arg0, CONTEXT_REG); emith_ctx_read(arg1, offsetof(SH2, drc_tmp)); emith_call(sh2_translate); emit_block_entry(); @@ -2944,13 +2982,13 @@ static void sh2_generate_utils(void) emith_add_r_imm(tmp, 4); tmp = rcache_get_reg_arg(1, SHR_SR); emith_clear_msb(tmp, tmp, 22); - emith_move_r_r(arg2, CONTEXT_REG); + emith_move_r_r_ptr(arg2, CONTEXT_REG); emith_call(p32x_sh2_write32); // XXX: use sh2_drc_write32? rcache_invalidate(); // push PC rcache_get_reg_arg(0, SHR_SP); emith_ctx_read(arg1, SHR_PC * 4); - emith_move_r_r(arg2, CONTEXT_REG); + emith_move_r_r_ptr(arg2, CONTEXT_REG); emith_call(p32x_sh2_write32); rcache_invalidate(); // update I, cycles, do callback @@ -2960,16 +2998,16 @@ static void sh2_generate_utils(void) emith_or_r_r_lsl(sr, arg1, I_SHIFT); emith_sub_r_imm(sr, 13 << 12); // at least 13 cycles rcache_flush(); - emith_move_r_r(arg0, CONTEXT_REG); + emith_move_r_r_ptr(arg0, CONTEXT_REG); emith_call_ctx(offsetof(SH2, irq_callback)); // vector = sh2->irq_callback(sh2, level); // obtain new PC - emith_lsl(arg0, arg0, 2); + emith_lsl(arg0, RET_REG, 2); emith_ctx_read(arg1, SHR_VBR * 4); emith_add_r_r(arg0, arg1); - emit_memhandler_read(2); - emith_ctx_write(arg0, SHR_PC * 4); -#ifdef __i386__ - emith_add_r_imm(xSP, 4); // fix stack + tmp = emit_memhandler_read(2); + emith_ctx_write(tmp, SHR_PC * 4); +#if defined(__i386__) || defined(__x86_64__) + emith_add_r_r_ptr_imm(xSP, xSP, sizeof(void *)); // fix stack #endif emith_jump(sh2_drc_dispatcher); rcache_invalidate(); @@ -2977,19 +3015,19 @@ static void sh2_generate_utils(void) // sh2_drc_entry(SH2 *sh2) sh2_drc_entry = (void *)tcache_ptr; emith_sh2_drc_entry(); - emith_move_r_r(CONTEXT_REG, arg0); // move ctx, arg0 + emith_move_r_r_ptr(CONTEXT_REG, arg0); // move ctx, arg0 emit_do_static_regs(0, arg2); emith_call(sh2_drc_test_irq); emith_jump(sh2_drc_dispatcher); // sh2_drc_write8(u32 a, u32 d) sh2_drc_write8 = (void *)tcache_ptr; - emith_ctx_read(arg2, offsetof(SH2, write8_tab)); + emith_ctx_read_ptr(arg2, offsetof(SH2, write8_tab)); emith_sh2_wcall(arg0, arg2); // sh2_drc_write16(u32 a, u32 d) sh2_drc_write16 = (void *)tcache_ptr; - emith_ctx_read(arg2, offsetof(SH2, write16_tab)); + emith_ctx_read_ptr(arg2, offsetof(SH2, write16_tab)); emith_sh2_wcall(arg0, arg2); #ifdef PDB_NET @@ -3015,7 +3053,7 @@ static void sh2_generate_utils(void) emith_ctx_read(arg2, offsetof(SH2, pdb_io_csum[1])); \ emith_adc_r_imm(arg2, 0x01000000); \ emith_ctx_write(arg2, offsetof(SH2, pdb_io_csum[1])); \ - emith_move_r_r(arg2, CONTEXT_REG); \ + emith_move_r_r_ptr(arg2, CONTEXT_REG); \ emith_jump(func); \ func = tmp; \ } diff --git a/cpu/sh2/sh2.c b/cpu/sh2/sh2.c index 23b8fc0a..403c4c70 100644 --- a/cpu/sh2/sh2.c +++ b/cpu/sh2/sh2.c @@ -237,7 +237,7 @@ static void dump_regs(SH2 *sh2) printf("%csh2 SR: %03x PR: %08x\n", csh2, sh2->sr, sh2->pr); } -void do_sh2_cmp(SH2 *current) +void REGPARM(1) do_sh2_cmp(SH2 *current) { static int current_slave; static u32 current_val; @@ -251,6 +251,13 @@ void do_sh2_cmp(SH2 *current) int cycles; int i, ret; +#if 0 + sr = current->sr; + current->sr &= 0x3f3; + do_sh2_trace(current, (signed int)sr >> 12); + current->sr = sr; + return; +#endif sh2ref[1].is_slave = 1; while (1) { diff --git a/cpu/sh2/sh2.h b/cpu/sh2/sh2.h index e945354d..407270f1 100644 --- a/cpu/sh2/sh2.h +++ b/cpu/sh2/sh2.h @@ -118,7 +118,7 @@ void REGPARM(3) p32x_sh2_write32(unsigned int a, unsigned int d, SH2 *sh2); // debug #ifdef DRC_CMP void do_sh2_trace(SH2 *current, int cycles); -void do_sh2_cmp(SH2 *current); +void REGPARM(1) do_sh2_cmp(SH2 *current); #endif #endif /* __SH2_H__ */ diff --git a/platform/libpicofe b/platform/libpicofe index f287890d..21082d0b 160000 --- a/platform/libpicofe +++ b/platform/libpicofe @@ -1 +1 @@ -Subproject commit f287890d65ad36ca75bb71d05745693ae78b1490 +Subproject commit 21082d0b2b9910727770674cef9b68b9e97a3155 diff --git a/platform/linux/emu.c b/platform/linux/emu.c index ddde8dab..c470a771 100644 --- a/platform/linux/emu.c +++ b/platform/linux/emu.c @@ -29,7 +29,7 @@ void pemu_prep_defconfig(void) void pemu_validate_config(void) { -#ifndef __arm__ +#if !defined(__arm__) && !defined(__i386__) && !defined(__x86_64__) PicoIn.opt &= ~POPT_EN_DRC; #endif } From 9c0ac970438d6962bd9e8074bc3b26060a7482ba Mon Sep 17 00:00:00 2001 From: Chips-fr Date: Mon, 25 Dec 2017 17:38:36 +0100 Subject: [PATCH 0143/1110] Support latest raspberry GLES library name --- Makefile | 8 +++++++- README | 7 ------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 997609da..2620a704 100644 --- a/Makefile +++ b/Makefile @@ -69,7 +69,13 @@ endif ifeq ("$(PLATFORM)",$(filter "$(PLATFORM)","rpi1" "rpi2")) CFLAGS += -DHAVE_GLES -DRASPBERRY CFLAGS += -I/opt/vc/include/ -I/opt/vc/include/interface/vcos/pthreads/ -I/opt/vc/include/interface/vmcs_host/linux/ -LDFLAGS += -ldl -lbcm_host -L/opt/vc/lib -lEGL -lGLESv2 +LDFLAGS += -ldl -lbcm_host -L/opt/vc/lib +# Stupid renaming occured in latest raspbian... +ifneq (,$(wildcard /opt/vc/lib/libbrcmGLESv2.so)) +LDFLAGS += -lbrcmEGL -lbrcmGLESv2 +else +LDFLAGS += -lEGL -lGLESv2 +endif OBJS += platform/linux/emu.o platform/linux/blit.o # FIXME OBJS += platform/common/plat_sdl.o OBJS += platform/libpicofe/plat_sdl.o platform/libpicofe/in_sdl.o diff --git a/README b/README index d9c10910..af1bbfcc 100644 --- a/README +++ b/README @@ -12,10 +12,3 @@ then taken over and expanded by notaz. PicoDrive was the first emulator ever to properly emulate Virtua Racing and it's SVP chip. - -How to compile on Raspbian Wheezy: - -export CC=gcc-4.8 -export CXX=g++-4.8 -./configure --platform=rpi2 -make From 9cdfc191b60cb8881552d01f84e21bdcbb48758d Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 5 Apr 2014 22:08:06 +0300 Subject: [PATCH 0144/1110] add a pitch variable 32x is still broken --- platform/common/emu.c | 16 ++++++++------ platform/common/emu.h | 1 + platform/common/menu_pico.c | 43 ++++++++++++++++++++----------------- platform/common/plat_sdl.c | 14 ++++++------ platform/gp2x/plat.c | 1 + platform/libpicofe | 2 +- platform/linux/emu.c | 16 +++++++------- platform/linux/io.c | 6 ++++-- platform/pandora/menu.c | 2 +- platform/pandora/plat.c | 2 +- platform/win32/plat.c | 2 +- 11 files changed, 59 insertions(+), 46 deletions(-) diff --git a/platform/common/emu.c b/platform/common/emu.c index b4db4c67..0a9f0890 100644 --- a/platform/common/emu.c +++ b/platform/common/emu.c @@ -42,6 +42,7 @@ void *g_screen_ptr; int g_screen_width = 320; int g_screen_height = 240; +int g_screen_ppitch = 320; // pitch in pixels const char *PicoConfigFile = "config2.cfg"; currentConfig_t currentConfig, defaultConfig; @@ -730,12 +731,12 @@ void name(int x, int y, const char *text) \ } \ } -mk_text_out(emu_text_out8, unsigned char, 0xf0, g_screen_ptr, 1, g_screen_width) -mk_text_out(emu_text_out16, unsigned short, 0xffff, g_screen_ptr, 1, g_screen_width) +mk_text_out(emu_text_out8, unsigned char, 0xf0, g_screen_ptr, 1, g_screen_ppitch) +mk_text_out(emu_text_out16, unsigned short, 0xffff, g_screen_ptr, 1, g_screen_ppitch) mk_text_out(emu_text_out8_rot, unsigned char, 0xf0, - (char *)g_screen_ptr + (g_screen_width - 1) * g_screen_height, -g_screen_height, 1) + (char *)g_screen_ptr + (g_screen_ppitch - 1) * g_screen_height, -g_screen_height, 1) mk_text_out(emu_text_out16_rot, unsigned short, 0xffff, - (short *)g_screen_ptr + (g_screen_width - 1) * g_screen_height, -g_screen_height, 1) + (short *)g_screen_ptr + (g_screen_ppitch - 1) * g_screen_height, -g_screen_height, 1) #undef mk_text_out @@ -751,7 +752,7 @@ void emu_osd_text16(int x, int y, const char *text) for (h = 0; h < 8; h++) { unsigned short *p; p = (unsigned short *)g_screen_ptr - + x + g_screen_width * (y + h); + + x + g_screen_ppitch * (y + h); for (i = len; i > 0; i--, p++) *p = (*p >> 2) & 0x39e7; } @@ -1204,8 +1205,11 @@ static void mkdir_path(char *path_with_reserve, int pos, const char *name) void emu_cmn_forced_frame(int no_scale, int do_emu) { int po_old = PicoIn.opt; + int y; - memset32(g_screen_ptr, 0, g_screen_width * g_screen_height * 2 / 4); + for (y = 0; y < g_screen_height; y++) + memset32((short *)g_screen_ptr + g_screen_ppitch * y, 0, + g_screen_width * 2 / 4); PicoIn.opt &= ~POPT_ALT_RENDERER; PicoIn.opt |= POPT_ACC_SPRITES; diff --git a/platform/common/emu.h b/platform/common/emu.h index 9a5ae660..1e751f89 100644 --- a/platform/common/emu.h +++ b/platform/common/emu.h @@ -16,6 +16,7 @@ extern void *g_screen_ptr; extern int g_screen_width; extern int g_screen_height; +extern int g_screen_ppitch; // pitch in pixels #define EOPT_EN_SRAM (1<<0) #define EOPT_SHOW_FPS (1<<1) diff --git a/platform/common/menu_pico.c b/platform/common/menu_pico.c index 969fc8e2..ab91e1c2 100644 --- a/platform/common/menu_pico.c +++ b/platform/common/menu_pico.c @@ -107,7 +107,7 @@ static void make_bg(int no_scale) (g_menuscreen_w / 2 - w / 2); // darken the active framebuffer - for (; h > 0; dst += g_menuscreen_w, src += g_screen_width, h--) + for (; h > 0; dst += g_menuscreen_w, src += g_screen_ppitch, h--) menu_darken_bg(dst, src, w, 1); } @@ -167,8 +167,8 @@ static void load_progress_cb(int percent) len = g_menuscreen_w; menu_draw_begin(0, 1); - dst = (unsigned short *)g_menuscreen_ptr + g_menuscreen_w * me_sfont_h * 2; - for (ln = me_sfont_h - 2; ln > 0; ln--, dst += g_menuscreen_w) + dst = (unsigned short *)g_menuscreen_ptr + g_menuscreen_pp * me_sfont_h * 2; + for (ln = me_sfont_h - 2; ln > 0; ln--, dst += g_menuscreen_pp) memset(dst, 0xff, len * 2); menu_draw_end(); } @@ -179,17 +179,18 @@ static void cdload_progress_cb(const char *fname, int percent) unsigned short *dst; menu_draw_begin(0, 1); - dst = (unsigned short *)g_menuscreen_ptr + g_menuscreen_w * me_sfont_h * 2; - memset(dst, 0xff, g_menuscreen_w * (me_sfont_h - 2) * 2); + dst = (unsigned short *)g_menuscreen_ptr + g_menuscreen_pp * me_sfont_h * 2; + + menuscreen_memset_lines(dst, 0xff, me_sfont_h - 2); smalltext_out16(1, 3 * me_sfont_h, "Processing CD image / MP3s", 0xffff); smalltext_out16(1, 4 * me_sfont_h, fname, 0xffff); - dst += g_menuscreen_w * me_sfont_h * 3; + dst += g_menuscreen_pp * me_sfont_h * 3; if (len > g_menuscreen_w) len = g_menuscreen_w; - for (ln = (me_sfont_h - 2); ln > 0; ln--, dst += g_menuscreen_w) + for (ln = (me_sfont_h - 2); ln > 0; ln--, dst += g_menuscreen_pp) memset(dst, 0xff, len * 2); menu_draw_end(); @@ -848,13 +849,15 @@ static void debug_menu_loop(void) break; case 2: pemu_forced_frame(1, 0); make_bg(1); - PDebugShowSpriteStats((unsigned short *)g_menuscreen_ptr + (g_menuscreen_h/2 - 240/2)*g_menuscreen_w + - g_menuscreen_w/2 - 320/2, g_menuscreen_w); + PDebugShowSpriteStats((unsigned short *)g_menuscreen_ptr + + (g_menuscreen_h/2 - 240/2) * g_menuscreen_pp + + g_menuscreen_w/2 - 320/2, g_menuscreen_pp); break; - case 3: memset(g_menuscreen_ptr, 0, g_menuscreen_w * g_menuscreen_h * 2); - PDebugShowPalette(g_menuscreen_ptr, g_menuscreen_w); - PDebugShowSprite((unsigned short *)g_menuscreen_ptr + g_menuscreen_w*120 + g_menuscreen_w/2 + 16, - g_menuscreen_w, spr_offs); + case 3: menuscreen_memset_lines(g_menuscreen_ptr, 0, g_menuscreen_h); + PDebugShowPalette(g_menuscreen_ptr, g_menuscreen_pp); + PDebugShowSprite((unsigned short *)g_menuscreen_ptr + + g_menuscreen_pp * 120 + g_menuscreen_w / 2 + 16, + g_menuscreen_pp, spr_offs); draw_text_debug(PDebugSpriteList(), spr_offs, 6); break; case 4: tmp = PDebug32x(); @@ -974,23 +977,23 @@ static void menu_main_draw_status(void) return; /* battery info */ - bp += (me_mfont_h * 2 + 2) * g_screen_width + g_screen_width - me_mfont_w * 3 - 3; + bp += (me_mfont_h * 2 + 2) * g_screen_ppitch + g_screen_width - me_mfont_w * 3 - 3; for (i = 0; i < me_mfont_w * 2; i++) bp[i] = menu_text_color; for (i = 0; i < me_mfont_w * 2; i++) - bp[i + g_screen_width * bat_h] = menu_text_color; + bp[i + g_screen_ppitch * bat_h] = menu_text_color; for (i = 0; i <= bat_h; i++) - bp[i * g_screen_width] = - bp[i * g_screen_width + me_mfont_w * 2] = menu_text_color; + bp[i * g_screen_ppitch] = + bp[i * g_screen_ppitch + me_mfont_w * 2] = menu_text_color; for (i = 2; i < bat_h - 1; i++) - bp[i * g_screen_width - 1] = - bp[i * g_screen_width - 2] = menu_text_color; + bp[i * g_screen_ppitch - 1] = + bp[i * g_screen_ppitch - 2] = menu_text_color; w = me_mfont_w * 2 - 1; wfill = batt_val * w / 100; for (u = 1; u < bat_h; u++) for (i = 0; i < wfill; i++) - bp[(w - i) + g_screen_width * u] = menu_text_color; + bp[(w - i) + g_screen_ppitch * u] = menu_text_color; } static int main_menu_handler(int id, int keys) diff --git a/platform/common/plat_sdl.c b/platform/common/plat_sdl.c index 1b617d5a..3948cc41 100644 --- a/platform/common/plat_sdl.c +++ b/platform/common/plat_sdl.c @@ -158,19 +158,19 @@ void plat_video_flip(void) SDL_LockYUVOverlay(plat_sdl_overlay); rgb565_to_uyvy(plat_sdl_overlay->pixels[0], shadow_fb, - g_screen_width * g_screen_height); + g_screen_ppitch * g_screen_height); SDL_UnlockYUVOverlay(plat_sdl_overlay); SDL_DisplayYUVOverlay(plat_sdl_overlay, &dstrect); } else if (plat_sdl_gl_active) { - gl_flip(shadow_fb, g_screen_width, g_screen_height); + gl_flip(shadow_fb, g_screen_ppitch, g_screen_height); } else { if (SDL_MUSTLOCK(plat_sdl_screen)) SDL_UnlockSurface(plat_sdl_screen); SDL_Flip(plat_sdl_screen); g_screen_ptr = plat_sdl_screen->pixels; - PicoDrawSetOutBuf(g_screen_ptr, g_screen_width * 2); + PicoDrawSetOutBuf(g_screen_ptr, g_screen_ppitch * 2); } } @@ -204,13 +204,13 @@ void plat_video_menu_end(void) SDL_LockYUVOverlay(plat_sdl_overlay); rgb565_to_uyvy(plat_sdl_overlay->pixels[0], shadow_fb, - g_menuscreen_w * g_menuscreen_h); + g_menuscreen_pp * g_menuscreen_h); SDL_UnlockYUVOverlay(plat_sdl_overlay); SDL_DisplayYUVOverlay(plat_sdl_overlay, &dstrect); } else if (plat_sdl_gl_active) { - gl_flip(g_menuscreen_ptr, g_menuscreen_w, g_menuscreen_h); + gl_flip(g_menuscreen_ptr, g_menuscreen_pp, g_menuscreen_h); } else { if (SDL_MUSTLOCK(plat_sdl_screen)) @@ -237,7 +237,7 @@ void plat_video_loop_prepare(void) SDL_LockSurface(plat_sdl_screen); g_screen_ptr = plat_sdl_screen->pixels; } - PicoDrawSetOutBuf(g_screen_ptr, g_screen_width * 2); + PicoDrawSetOutBuf(g_screen_ptr, g_screen_ppitch * 2); } void plat_early_init(void) @@ -265,6 +265,7 @@ void plat_init(void) g_menuscreen_w = plat_sdl_screen->w; g_menuscreen_h = plat_sdl_screen->h; + g_menuscreen_pp = g_menuscreen_w; g_menuscreen_ptr = NULL; shadow_size = g_menuscreen_w * g_menuscreen_h * 2; @@ -280,6 +281,7 @@ void plat_init(void) g_screen_width = 320; g_screen_height = 240; + g_screen_ppitch = 320; g_screen_ptr = shadow_fb; in_sdl_init(&in_sdl_platform_data, plat_sdl_event_handler); diff --git a/platform/gp2x/plat.c b/platform/gp2x/plat.c index 614ab7f0..4fcf76f4 100644 --- a/platform/gp2x/plat.c +++ b/platform/gp2x/plat.c @@ -208,6 +208,7 @@ void plat_init(void) g_menuscreen_w = 320; g_menuscreen_h = 240; + g_menuscreen_pp = g_menuscreen_w; gp2x_memset_all_buffers(0, 0, 320*240*2); gp2x_make_fb_bufferable(1); diff --git a/platform/libpicofe b/platform/libpicofe index 21082d0b..2b27288e 160000 --- a/platform/libpicofe +++ b/platform/libpicofe @@ -1 +1 @@ -Subproject commit 21082d0b2b9910727770674cef9b68b9e97a3155 +Subproject commit 2b27288eb44ef9247d2a948a207d3ac9b835421a diff --git a/platform/linux/emu.c b/platform/linux/emu.c index c470a771..5d4432fa 100644 --- a/platform/linux/emu.c +++ b/platform/linux/emu.c @@ -67,7 +67,7 @@ static void draw_cd_leds(void) void pemu_finalize_frame(const char *fps, const char *notice) { if (currentConfig.renderer != RT_16BIT && !(PicoIn.AHW & PAHW_32X)) { - unsigned short *pd = (unsigned short *)g_screen_ptr + 8 * g_screen_width; + unsigned short *pd = (unsigned short *)g_screen_ptr + 8 * g_screen_ppitch; unsigned char *ps = Pico.est.Draw2FB + 328*8 + 8; unsigned short *pal = Pico.est.HighPal; int i, x; @@ -94,7 +94,7 @@ static void apply_renderer(void) case RT_16BIT: PicoIn.opt &= ~POPT_ALT_RENDERER; PicoDrawSetOutFormat(PDF_RGB555, 0); - PicoDrawSetOutBuf(g_screen_ptr, g_screen_width * 2); + PicoDrawSetOutBuf(g_screen_ptr, g_screen_ppitch * 2); break; case RT_8BIT_ACC: PicoIn.opt &= ~POPT_ALT_RENDERER; @@ -108,7 +108,7 @@ static void apply_renderer(void) } if (PicoIn.AHW & PAHW_32X) - PicoDrawSetOutBuf(g_screen_ptr, g_screen_width * 2); + PicoDrawSetOutBuf(g_screen_ptr, g_screen_ppitch * 2); } void plat_video_toggle_renderer(int change, int is_menu) @@ -127,8 +127,8 @@ void plat_video_toggle_renderer(int change, int is_menu) void plat_status_msg_clear(void) { - unsigned short *d = (unsigned short *)g_screen_ptr + g_screen_width * g_screen_height; - int l = g_screen_width * 8; + unsigned short *d = (unsigned short *)g_screen_ptr + g_screen_ppitch * g_screen_height; + int l = g_screen_ppitch * 8; memset32((int *)(d - l), 0, l * 2 / 4); } @@ -143,7 +143,7 @@ void plat_status_msg_busy_next(const char *msg) void plat_status_msg_busy_first(const char *msg) { -// memset32(g_screen_ptr, 0, g_screen_width * g_screen_height * 2 / 4); +// memset32(g_screen_ptr, 0, g_screen_ppitch * g_screen_height * 2 / 4); plat_status_msg_busy_next(msg); } @@ -153,7 +153,7 @@ void plat_update_volume(int has_changed, int is_up) void pemu_forced_frame(int no_scale, int do_emu) { - PicoDrawSetOutBuf(g_screen_ptr, g_screen_width * 2); + PicoDrawSetOutBuf(g_screen_ptr, g_screen_ppitch * 2); PicoDrawSetCallbacks(NULL, NULL); Pico.m.dirtyPal = 1; @@ -174,7 +174,7 @@ void plat_debug_cat(char *str) void emu_video_mode_change(int start_line, int line_count, int is_32cols) { // clear whole screen in all buffers - memset32(g_screen_ptr, 0, g_screen_width * g_screen_height * 2 / 4); + memset32(g_screen_ptr, 0, g_screen_ppitch * g_screen_height * 2 / 4); } void pemu_loop_prep(void) diff --git a/platform/linux/io.c b/platform/linux/io.c index edc5f6eb..57601ccc 100644 --- a/platform/linux/io.c +++ b/platform/linux/io.c @@ -134,7 +134,7 @@ static void xlib_update(void) XLockDisplay(xlib_display); xstatus = XPutImage(xlib_display, xlib_window, DefaultGC(xlib_display, 0), ximage, - 0, 0, 0, 0, g_screen_width, g_screen_height); + 0, 0, 0, 0, g_screen_ppitch, g_screen_height); if (xstatus != 0) fprintf(stderr, "XPutImage %d\n", xstatus); @@ -260,6 +260,7 @@ static void realloc_screen(void) int size = scr_w * scr_h * 2; g_screen_width = g_menuscreen_w = scr_w; g_screen_height = g_menuscreen_h = scr_h; + g_screen_ppitch = g_menuscreen_pp = scr_w; g_screen_ptr = realloc(g_screen_ptr, size); g_menubg_ptr = realloc(g_menubg_ptr, size); memset(g_screen_ptr, 0, size); @@ -275,7 +276,7 @@ void plat_video_flip(void) if (ximage == NULL) return; - pixel_count = g_screen_width * g_screen_height; + pixel_count = g_screen_ppitch * g_screen_height; image = (void *)ximage->data; if (current_bpp == 8) @@ -332,6 +333,7 @@ void plat_init(void) exit(1); g_screen_width = g_menuscreen_w = w; g_screen_height = g_menuscreen_h = h; + g_screen_ppitch = g_menuscreen_pp = w; g_menubg_ptr = realloc(g_menubg_ptr, w * g_screen_height * 2); #else realloc_screen(); diff --git a/platform/pandora/menu.c b/platform/pandora/menu.c index 9f451feb..867b5c0d 100644 --- a/platform/pandora/menu.c +++ b/platform/pandora/menu.c @@ -18,7 +18,7 @@ static int menu_loop_cscaler(int id, int keys) for (;;) { menu_draw_begin(0, 1); - memset(g_menuscreen_ptr, 0, g_menuscreen_w * g_menuscreen_h * 2); + menuscreen_memset_lines(g_menuscreen_ptr, 0, g_menuscreen_h); text_out16(2, 480 - 18, "%dx%d | d-pad to resize, R+d-pad to move", g_layer_cw, g_layer_ch); menu_draw_end(); diff --git a/platform/pandora/plat.c b/platform/pandora/plat.c index a2e70eb8..5f10b17b 100644 --- a/platform/pandora/plat.c +++ b/platform/pandora/plat.c @@ -488,7 +488,7 @@ void plat_init(void) exit(1); } - g_menuscreen_w = w; + g_menuscreen_w = g_menuscreen_pp = w; g_menuscreen_h = h; g_menuscreen_ptr = vout_fbdev_flip(main_fb); diff --git a/platform/win32/plat.c b/platform/win32/plat.c index 8abb0626..54af8292 100644 --- a/platform/win32/plat.c +++ b/platform/win32/plat.c @@ -75,7 +75,7 @@ void pemu_validate_config(void) void pemu_loop_prep(void) { PicoDrawSetOutFormat(PDF_RGB555, 1); - PicoDrawSetOutBuf(g_screen_ptr, g_screen_width * 2); + PicoDrawSetOutBuf(g_screen_ptr, g_screen_ppitch * 2); pemu_sound_start(); } From a0b95da11240b4e936e4bab64a002db394130bb0 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 3 Jan 2018 03:00:32 +0200 Subject: [PATCH 0145/1110] libretro: pass required arg to VirtualProtect --- platform/libretro/libretro.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/platform/libretro/libretro.c b/platform/libretro/libretro.c index 2bdd07d2..42d30c2d 100644 --- a/platform/libretro/libretro.c +++ b/platform/libretro/libretro.c @@ -451,7 +451,8 @@ int plat_mem_set_exec(void *ptr, size_t size) { int ret = -1; #ifdef _WIN32 - ret = VirtualProtect(ptr, size, PAGE_EXECUTE_READWRITE, 0); + DWORD oldProtect = 0; + ret = VirtualProtect(ptr, size, PAGE_EXECUTE_READWRITE, &oldProtect); if (ret == 0 && log_cb) log_cb(RETRO_LOG_ERROR, "VirtualProtect(%p, %d) failed: %d\n", ptr, (int)size, GetLastError()); From 48c9e01be8ad93a7902e22f9ad07aba4527e6572 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 6 Jan 2018 21:29:59 +0200 Subject: [PATCH 0146/1110] improve 64bit portability for win64 mostly --- cpu/cz80/cz80.h | 8 +++++++- cpu/cz80/cz80macro.h | 2 +- cpu/fame/fame.h | 10 ++++++++-- cpu/fame/famec.c | 2 +- cpu/musashi/m68kcpu.h | 1 + pico/32x/draw.c | 2 +- pico/32x/memory.c | 4 ++-- pico/cd/memory.c | 12 ++++++------ pico/memory.c | 4 ++-- pico/memory.h | 4 +++- pico/pico_int.h | 1 - pico/pico_port.h | 6 ++++++ pico/sound/sound.c | 2 +- platform/libretro/libretro.c | 10 +++------- 14 files changed, 42 insertions(+), 26 deletions(-) diff --git a/cpu/cz80/cz80.h b/cpu/cz80/cz80.h index 9b97d3b4..f3f79b39 100644 --- a/cpu/cz80/cz80.h +++ b/cpu/cz80/cz80.h @@ -11,6 +11,12 @@ #ifndef CZ80_H #define CZ80_H +// uintptr_t +#include +#ifndef _MSC_VER +#include +#endif + #ifdef __cplusplus extern "C" { #endif @@ -44,7 +50,7 @@ extern "C" { #endif #ifndef FPTR -#define FPTR unsigned long +#define FPTR uintptr_t #endif /*************************************/ diff --git a/cpu/cz80/cz80macro.h b/cpu/cz80/cz80macro.h index 5adca13a..2e21d409 100644 --- a/cpu/cz80/cz80macro.h +++ b/cpu/cz80/cz80macro.h @@ -73,7 +73,7 @@ #define WRITE_MEM8(A, D) { \ unsigned short a = A; \ unsigned char d = D; \ - unsigned long v = z80_write_map[a >> Z80_MEM_SHIFT]; \ + uptr v = z80_write_map[a >> Z80_MEM_SHIFT]; \ if (map_flag_set(v)) \ ((z80_write_f *)(v << 1))(a, d); \ else \ diff --git a/cpu/fame/fame.h b/cpu/fame/fame.h index 0baabf26..aba9316e 100644 --- a/cpu/fame/fame.h +++ b/cpu/fame/fame.h @@ -9,6 +9,12 @@ #ifndef __FAME_H__ #define __FAME_H__ +// uintptr_t +#include +#ifndef _MSC_VER +#include +#endif + #ifdef __cplusplus extern "C" { #endif @@ -127,7 +133,7 @@ typedef struct signed int cycles_needed; unsigned short *PC; - unsigned long BasePC; + uintptr_t BasePC; unsigned int flag_C; unsigned int flag_V; unsigned int flag_NotZ; @@ -140,7 +146,7 @@ typedef struct unsigned char not_polling; unsigned char pad[3]; - unsigned long Fetch[M68K_FETCHBANK1]; + uintptr_t Fetch[M68K_FETCHBANK1]; } M68K_CONTEXT; typedef enum diff --git a/cpu/fame/famec.c b/cpu/fame/famec.c index 41620944..60a9e550 100644 --- a/cpu/fame/famec.c +++ b/cpu/fame/famec.c @@ -94,7 +94,7 @@ #define s16 signed short #define u32 unsigned int #define s32 signed int -#define uptr unsigned long +#define uptr uintptr_t /* typedef unsigned char u8; diff --git a/cpu/musashi/m68kcpu.h b/cpu/musashi/m68kcpu.h index 63055cbe..a1dff7de 100644 --- a/cpu/musashi/m68kcpu.h +++ b/cpu/musashi/m68kcpu.h @@ -142,6 +142,7 @@ /* Exception Vectors handled by emulation */ #define EXCEPTION_BUS_ERROR 2 /* This one is not emulated! */ #define EXCEPTION_ADDRESS_ERROR 3 /* This one is partially emulated (doesn't stack a proper frame yet) */ +#undef EXCEPTION_ILLEGAL_INSTRUCTION #define EXCEPTION_ILLEGAL_INSTRUCTION 4 #define EXCEPTION_ZERO_DIVIDE 5 #define EXCEPTION_CHK 6 diff --git a/pico/32x/draw.c b/pico/32x/draw.c index ee541bd9..2287e246 100644 --- a/pico/32x/draw.c +++ b/pico/32x/draw.c @@ -58,7 +58,7 @@ static void convert_pal555(int invert_prio) unsigned short t; \ int i; \ for (i = 320; i > 0; i--, pd++, p32x++, pmd++) { \ - t = pal[*(unsigned char *)((long)p32x ^ 1)]; \ + t = pal[*(unsigned char *)((uintptr_t)p32x ^ 1)]; \ if ((t & 0x20) || (*pmd & 0x3f) == mdbg) \ *pd = t; \ else \ diff --git a/pico/32x/memory.c b/pico/32x/memory.c index 706d820e..d9f18db5 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -1786,9 +1786,9 @@ void PicoMemSetup32x(void) cpu68k_map_set(m68k_write16_map, 0x880000, 0x880000 + rs - 1, PicoWrite16_cart, 1); #ifdef EMU_F68K // setup FAME fetchmap - PicoCpuFM68k.Fetch[0] = (unsigned long)Pico32xMem->m68k_rom; + PicoCpuFM68k.Fetch[0] = (uptr)Pico32xMem->m68k_rom; for (rs = 0x88; rs < 0x90; rs++) - PicoCpuFM68k.Fetch[rs] = (unsigned long)Pico.rom - 0x880000; + PicoCpuFM68k.Fetch[rs] = (uptr)Pico.rom - 0x880000; #endif // 32X ROM (banked) diff --git a/pico/cd/memory.c b/pico/cd/memory.c index 22694389..1c5dcf94 100644 --- a/pico/cd/memory.c +++ b/pico/cd/memory.c @@ -1168,29 +1168,29 @@ PICO_INTERNAL void PicoMemSetupCD(void) #ifdef __clang__ volatile // prevent strange relocs from clang #endif - unsigned long ptr_ram = (unsigned long)PicoMem.ram; + unsigned long ptr_ram = (uptr)PicoMem.ram; int i; // M68k // by default, point everything to fitst 64k of ROM (BIOS) for (i = 0; i < M68K_FETCHBANK1; i++) - PicoCpuFM68k.Fetch[i] = (unsigned long)Pico.rom - (i<<(24-FAMEC_FETCHBITS)); + PicoCpuFM68k.Fetch[i] = (uptr)Pico.rom - (i<<(24-FAMEC_FETCHBITS)); // now real ROM (BIOS) for (i = 0; i < M68K_FETCHBANK1 && (i<<(24-FAMEC_FETCHBITS)) < Pico.romsize; i++) - PicoCpuFM68k.Fetch[i] = (unsigned long)Pico.rom; + PicoCpuFM68k.Fetch[i] = (uptr)Pico.rom; // .. and RAM for (i = M68K_FETCHBANK1*14/16; i < M68K_FETCHBANK1; i++) PicoCpuFM68k.Fetch[i] = ptr_ram - (i<<(24-FAMEC_FETCHBITS)); // S68k // PRG RAM is default for (i = 0; i < M68K_FETCHBANK1; i++) - PicoCpuFS68k.Fetch[i] = (unsigned long)Pico_mcd->prg_ram - (i<<(24-FAMEC_FETCHBITS)); + PicoCpuFS68k.Fetch[i] = (uptr)Pico_mcd->prg_ram - (i<<(24-FAMEC_FETCHBITS)); // real PRG RAM for (i = 0; i < M68K_FETCHBANK1 && (i<<(24-FAMEC_FETCHBITS)) < 0x80000; i++) - PicoCpuFS68k.Fetch[i] = (unsigned long)Pico_mcd->prg_ram; + PicoCpuFS68k.Fetch[i] = (uptr)Pico_mcd->prg_ram; // WORD RAM 2M area for (i = M68K_FETCHBANK1*0x08/0x100; i < M68K_FETCHBANK1 && (i<<(24-FAMEC_FETCHBITS)) < 0xc0000; i++) - PicoCpuFS68k.Fetch[i] = (unsigned long)Pico_mcd->word_ram2M - 0x80000; + PicoCpuFS68k.Fetch[i] = (uptr)Pico_mcd->word_ram2M - 0x80000; // remap_word_ram() will setup word ram for both } #endif diff --git a/pico/memory.c b/pico/memory.c index c633c89b..a31a08e9 100644 --- a/pico/memory.c +++ b/pico/memory.c @@ -832,10 +832,10 @@ PICO_INTERNAL void PicoMemSetup(void) int i; // by default, point everything to first 64k of ROM for (i = 0; i < M68K_FETCHBANK1 * 0xe0 / 0x100; i++) - PicoCpuFM68k.Fetch[i] = (unsigned long)Pico.rom - (i<<(24-FAMEC_FETCHBITS)); + PicoCpuFM68k.Fetch[i] = (uptr)Pico.rom - (i<<(24-FAMEC_FETCHBITS)); // now real ROM for (i = 0; i < M68K_FETCHBANK1 && (i<<(24-FAMEC_FETCHBITS)) < Pico.romsize; i++) - PicoCpuFM68k.Fetch[i] = (unsigned long)Pico.rom; + PicoCpuFM68k.Fetch[i] = (uptr)Pico.rom; // RAM already set } #endif diff --git a/pico/memory.h b/pico/memory.h index ae7ae50d..eb440dd4 100644 --- a/pico/memory.h +++ b/pico/memory.h @@ -1,9 +1,11 @@ // memory map related stuff +#include "pico_port.h" + typedef unsigned char u8; typedef unsigned short u16; typedef unsigned int u32; -typedef unsigned long uptr; // unsigned pointer-sized int +typedef uintptr_t uptr; // unsigned pointer-sized int #define M68K_MEM_SHIFT 16 // minimum size we can map diff --git a/pico/pico_int.h b/pico/pico_int.h index 0b50e4b9..da0fbb90 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -11,7 +11,6 @@ #define PICO_INTERNAL_INCLUDED #include -#include #include #include "pico_port.h" #include "pico.h" diff --git a/pico/pico_port.h b/pico/pico_port.h index 70802202..e26e6ca2 100644 --- a/pico/pico_port.h +++ b/pico/pico_port.h @@ -1,6 +1,12 @@ #ifndef PICO_PORT_INCLUDED #define PICO_PORT_INCLUDED +// provide size_t, uintptr_t +#include +#if !(defined(_MSC_VER) && _MSC_VER < 1800) +#include +#endif + #if defined(__GNUC__) && defined(__i386__) #define REGPARM(x) __attribute__((regparm(x))) #else diff --git a/pico/sound/sound.c b/pico/sound/sound.c index fc71b741..95aac128 100644 --- a/pico/sound/sound.c +++ b/pico/sound/sound.c @@ -265,7 +265,7 @@ PICO_INTERNAL void PsndClear(void) memset32((int *) PicoIn.sndOut, 0, len); // assume PicoIn.sndOut to be aligned else { short *out = PicoIn.sndOut; - if ((long)out & 2) { *out++ = 0; len--; } + if ((uintptr_t)out & 2) { *out++ = 0; len--; } memset32((int *) out, 0, len/2); if (len & 1) out[len-1] = 0; } diff --git a/platform/libretro/libretro.c b/platform/libretro/libretro.c index 42d30c2d..9111048f 100644 --- a/platform/libretro/libretro.c +++ b/platform/libretro/libretro.c @@ -79,11 +79,7 @@ static void *vout_buf; static int vout_width, vout_height, vout_offset; static float user_vout_width = 0.0; -#ifdef _MSC_VER -static short sndBuffer[2*44100/50]; -#else -static short __attribute__((aligned(4))) sndBuffer[2*44100/50]; -#endif +static short ALIGNED(4) sndBuffer[2*44100/50]; static void snd_write(int len); @@ -375,7 +371,7 @@ void *plat_mmap(unsigned long addr, size_t size, int need_exec, int is_fixed) int flags = MAP_PRIVATE | MAP_ANONYMOUS; void *req, *ret; - req = (void *)addr; + req = (void *)(uintptr_t)addr; ret = mmap(req, size, PROT_READ | PROT_WRITE, flags, -1, 0); if (ret == MAP_FAILED) { if (log_cb) @@ -383,7 +379,7 @@ void *plat_mmap(unsigned long addr, size_t size, int need_exec, int is_fixed) return NULL; } - if (addr != 0 && ret != (void *)addr) { + if (addr != 0 && ret != (void *)(uintptr_t)addr) { if (log_cb) log_cb(RETRO_LOG_WARN, "warning: wanted to map @%08lx, got %p\n", addr, ret); From fda2f31020bf0d6cf7b5dd70ec01cf390b7e1483 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 7 Jan 2018 01:20:00 +0200 Subject: [PATCH 0147/1110] drc: support ms ABI --- cpu/drc/emit_arm.c | 3 ++ cpu/drc/emit_x86.c | 103 +++++++++++++++++++++++++++++++++++---------- cpu/sh2/compiler.c | 21 ++++++--- 3 files changed, 99 insertions(+), 28 deletions(-) diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index 7b5566dd..91b47402 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -436,6 +436,9 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_tst_r_r(d, s) \ EOP_TST_REG(A_COND_AL,d,s,A_AM1_LSL,0) +#define emith_tst_r_r_ptr(d, s) \ + emith_tst_r_r(d, s) + #define emith_teq_r_r(d, s) \ EOP_TEQ_REG(A_COND_AL,d,s,A_AM1_LSL,0) diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index 5362d9bd..865aab4b 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -64,20 +64,35 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; EMIT(op, u8); \ } while (0) -#define EMIT_MODRM(mod,r,rm) \ - EMIT(((mod)<<6) | ((r)<<3) | (rm), u8) +#define EMIT_MODRM(mod, r, rm) do { \ + assert((mod) < 4u); \ + assert((r) < 8u); \ + assert((rm) < 8u); \ + EMIT(((mod)<<6) | ((r)<<3) | (rm), u8); \ +} while (0) -#define EMIT_SIB(scale,index,base) \ - EMIT(((scale)<<6) | ((index)<<3) | (base), u8) +#define EMIT_SIB(scale, index, base) do { \ + assert((scale) < 4u); \ + assert((index) < 8u); \ + assert((base) < 8u); \ + EMIT(((scale)<<6) | ((index)<<3) | (base), u8); \ +} while (0) + +#define EMIT_SIB64(scale, index, base) \ + EMIT_SIB(scale, (index) & ~8u, (base) & ~8u) #define EMIT_REX(w,r,x,b) \ EMIT(0x40 | ((w)<<3) | ((r)<<2) | ((x)<<1) | (b), u8) #define EMIT_OP_MODRM(op,mod,r,rm) do { \ EMIT_OP(op); \ - EMIT_MODRM(mod, r, rm); \ + EMIT_MODRM(mod, (r), rm); \ } while (0) +// 64bit friendly, rm when everything is converted +#define EMIT_OP_MODRM64(op, mod, r, rm) \ + EMIT_OP_MODRM(op, mod, (r) & ~8u, (rm) & ~8u) + #define JMP8_POS(ptr) \ ptr = tcache_ptr; \ tcache_ptr += 2 @@ -95,8 +110,8 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; EMIT_OP_MODRM(0x8b, 3, dst, src) #define emith_move_r_r_ptr(dst, src) do { \ - EMIT_REX_FOR_PTR(); \ - EMIT_OP_MODRM(0x8b, 3, dst, src); \ + EMIT_REX_IF(1, dst, src); \ + EMIT_OP_MODRM64(0x8b, 3, dst, src); \ } while (0) #define emith_add_r_r(d, s) \ @@ -123,6 +138,11 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_tst_r_r(d, s) \ EMIT_OP_MODRM(0x85, 3, s, d) /* TEST */ +#define emith_tst_r_r_ptr(d, s) do { \ + EMIT_REX_IF(1, s, d); \ + EMIT_OP_MODRM64(0x85, 3, s, d); /* TEST */ \ +} while (0) + #define emith_cmp_r_r(d, s) \ EMIT_OP_MODRM(0x39, 3, s, d) @@ -298,15 +318,15 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; } while (0) #define emith_add_r_r_ptr_imm(d, s, imm) do { \ - if (s != xSP) { \ - EMIT_REX_FOR_PTR(); \ - EMIT_OP_MODRM(0x8d, 2, d, s); /* lea */ \ + if ((s) != xSP) { \ + EMIT_REX_IF(1, d, s); \ + EMIT_OP_MODRM64(0x8d, 2, d, s); /* lea */ \ } \ else { \ if (d != s) \ emith_move_r_r_ptr(d, s); \ - EMIT_REX_FOR_PTR(); \ - EMIT_OP_MODRM(0x81, 3, 0, d); /* add */ \ + EMIT_REX_IF(1, 0, d); \ + EMIT_OP_MODRM64(0x81, 3, 0, d); /* add */ \ } \ EMIT(imm, s32); \ } while (0) @@ -455,10 +475,10 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_deref_op(op, r, rs, offs) do { \ /* mov r <-> [ebp+#offs] */ \ if ((offs) >= 0x80) { \ - EMIT_OP_MODRM(op, 2, r, rs); \ + EMIT_OP_MODRM64(op, 2, r, rs); \ EMIT(offs, u32); \ } else { \ - EMIT_OP_MODRM(op, 1, r, rs); \ + EMIT_OP_MODRM64(op, 1, r, rs); \ EMIT(offs, u8); \ } \ } while (0) @@ -508,7 +528,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; emith_read_r_r_offs(r, CONTEXT_REG, offs) #define emith_ctx_read_ptr(r, offs) do { \ - EMIT_REX_FOR_PTR(); \ + EMIT_REX_IF(1, r, CONTEXT_REG); \ emith_deref_op(0x8b, r, CONTEXT_REG, offs); \ } while (0) @@ -652,10 +672,16 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #ifdef __x86_64__ #define PTR_SCALE 3 -#define NA_TMP_REG xCX // non-arg tmp from reg_temp[] +#define NA_TMP_REG xAX // non-arg tmp from reg_temp[] -#define EMIT_REX_FOR_PTR() \ - EMIT_REX(1,0,0,0) +#define EMIT_REX_IF(w, r, rm) do { \ + int r_ = (r) > 7 ? 1 : 0; \ + int rm_ = (rm) > 7 ? 1 : 0; \ + if ((w) | r_ | rm_) \ + EMIT_REX(1, r_, 0, rm_); \ +} while (0) + +#ifndef _WIN32 #define host_arg2reg(rd, arg) \ switch (arg) { \ @@ -677,12 +703,43 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; emith_ret(); \ } -#else +#else // _WIN32 + +#define host_arg2reg(rd, arg) \ + switch (arg) { \ + case 0: rd = xCX; break; \ + case 1: rd = xDX; break; \ + case 2: rd = 8; break; \ + } + +#define emith_sh2_drc_entry() { \ + emith_push(xBX); \ + emith_push(xBP); \ + emith_push(xSI); \ + emith_push(xDI); \ + emith_add_r_r_ptr_imm(xSP, xSP, -8*5); \ +} + +#define emith_sh2_drc_exit() { \ + emith_add_r_r_ptr_imm(xSP, xSP, 8*5); \ + emith_pop(xDI); \ + emith_pop(xSI); \ + emith_pop(xBP); \ + emith_pop(xBX); \ + emith_ret(); \ +} + +#endif // _WIN32 + +#else // !__x86_64__ #define PTR_SCALE 2 #define NA_TMP_REG xBX // non-arg tmp from reg_temp[] -#define EMIT_REX_FOR_PTR() +#define EMIT_REX_IF(w, r, rm) do { \ + assert((u32)(r) < 8u); \ + assert((u32)(rm) < 8u); \ +} while (0) #define host_arg2reg(rd, arg) \ switch (arg) { \ @@ -728,9 +785,9 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; int arg2_; \ host_arg2reg(arg2_, 2); \ emith_lsr(NA_TMP_REG, a, SH2_WRITE_SHIFT); \ - EMIT_REX_FOR_PTR(); \ - EMIT_OP_MODRM(0x8b, 0, NA_TMP_REG, 4); \ - EMIT_SIB(PTR_SCALE, NA_TMP_REG, tab); /* mov tmp, [tab + tmp * {4,8}] */ \ + EMIT_REX_IF(1, NA_TMP_REG, tab); \ + EMIT_OP_MODRM64(0x8b, 0, NA_TMP_REG, 4); \ + EMIT_SIB64(PTR_SCALE, NA_TMP_REG, tab); /* mov tmp, [tab + tmp * {4,8}] */ \ emith_move_r_r_ptr(arg2_, CONTEXT_REG); \ emith_jump_reg(NA_TMP_REG); \ } diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 3b6b45af..089f3ef1 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -313,9 +313,9 @@ static const int reg_map_g2h[] = { xSI,-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, - -1, -1, -1, xDI, - -1, -1, -1, -1, + -1, -1, -1, -1, // r12 .. sp + -1, -1, -1, xDI, // SHR_PC, SHR_PPC, SHR_PR, SHR_SR, + -1, -1, -1, -1, // SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL, }; // ax, cx, dx are usually temporaries by convention @@ -330,12 +330,21 @@ static temp_reg_t reg_temp[] = { #include "../drc/emit_x86.c" static const int reg_map_g2h[] = { +#ifndef _WIN32 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, // r12 .. sp + -1, -1, -1, xBX, // SHR_PC, SHR_PPC, SHR_PR, SHR_SR, + -1, -1, -1, -1, // SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL, +#else + xDI,-1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, xBX, -1, -1, -1, -1, + -1, -1, -1, -1, // r12 .. sp + -1, -1, -1, xBX, // SHR_PC, SHR_PPC, SHR_PR, SHR_SR, + -1, -1, -1, -1, // SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL, +#endif }; // ax, cx, dx are usually temporaries by convention @@ -344,7 +353,9 @@ static temp_reg_t reg_temp[] = { { xCX, }, { xDX, }, { xSI, }, +#ifndef _WIN32 { xDI, }, +#endif }; #else @@ -1400,7 +1411,7 @@ static void emit_block_entry(void) emith_call(sh2_drc_log_entry); rcache_invalidate(); #endif - emith_tst_r_r(RET_REG, RET_REG); + emith_tst_r_r_ptr(RET_REG, RET_REG); EMITH_SJMP_START(DCOND_EQ); emith_jump_reg_c(DCOND_NE, RET_REG); EMITH_SJMP_END(DCOND_EQ); From 8b9dbcde387f567a154e15ca14a0c6e5b3efa3ef Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 21 Jan 2018 18:55:38 +0200 Subject: [PATCH 0148/1110] 32x: implement standard/ssf2 mapper --- cpu/sh2/compiler.c | 5 ++- pico/32x/memory.c | 85 +++++++++++++++++++++++++++++++------------- pico/cart.c | 12 +++++-- pico/carthw/carthw.c | 75 ++++++++++++++++++++++---------------- pico/carthw/carthw.h | 6 +++- pico/pico_int.h | 2 ++ 6 files changed, 126 insertions(+), 59 deletions(-) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 089f3ef1..aa41a84d 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -2912,8 +2912,10 @@ end_op: tcache_id, blkid_main, tcache_ptr - tcache_bases[tcache_id], tcache_sizes[tcache_id], insns_compiled, host_insn_count, (float)host_insn_count / insns_compiled); - if ((sh2->pc & 0xc6000000) == 0x02000000) // ROM + if ((sh2->pc & 0xc6000000) == 0x02000000) { // ROM dbg(2, " hash collisions %d/%d", hash_collisions, block_counts[tcache_id]); + Pico32x.emu_flags |= P32XF_DRC_ROM_C; + } /* printf("~~~\n"); tcache_dsm_ptrs[tcache_id] = block_entry_ptr; @@ -3292,6 +3294,7 @@ void sh2_drc_flush_all(void) flush_tcache(0); flush_tcache(1); flush_tcache(2); + Pico32x.emu_flags &= ~P32XF_DRC_ROM_C; } void sh2_drc_mem_setup(SH2 *sh2) diff --git a/pico/32x/memory.c b/pico/32x/memory.c index d9f18db5..ef9e3b70 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -47,7 +47,7 @@ static const char str_mars[] = "MARS"; void *p32x_bios_g, *p32x_bios_m, *p32x_bios_s; struct Pico32xMem *Pico32xMem; -static void bank_switch(int b); +static void bank_switch_rom_68k(int b); // addressing byte in 16bit reg #define REG8IN16(ptr, offs) ((u8 *)ptr)[(offs) ^ 1] @@ -276,7 +276,7 @@ static void p32x_reg_write8(u32 a, u32 d) d &= 3; if (r[0x04 / 2] != d) { r[0x04 / 2] = d; - bank_switch(d); + bank_switch_rom_68k(d); } return; case 0x06: // ignored, always 0 @@ -862,7 +862,7 @@ static void PicoWrite8_32x_on(u32 a, u32 d) else PicoWrite8_io(a, d); if (a == 0xa130f1) - bank_switch(Pico32x.regs[4 / 2]); + bank_switch_rom_68k(Pico32x.regs[4 / 2]); return; } @@ -900,7 +900,7 @@ static void PicoWrite16_32x_on(u32 a, u32 d) else PicoWrite16_io(a, d); if (a == 0xa130f0) - bank_switch(Pico32x.regs[4 / 2]); + bank_switch_rom_68k(Pico32x.regs[4 / 2]); return; } @@ -1145,9 +1145,9 @@ static void bank_map_handler(void) cpu68k_map_set(m68k_read16_map, 0x900000, 0x9fffff, PicoRead16_bank, 1); } -static void bank_switch(int b) +static void bank_switch_rom_68k(int b) { - unsigned int rs, bank; + unsigned int rs, bank, bank2; if (Pico.m.ncart_in) return; @@ -1164,15 +1164,25 @@ static void bank_switch(int b) return; } - // 32X ROM (unbanked, XXX: consider mirroring?) + // 32X ROM (XXX: consider mirroring?) rs = (Pico.romsize + M68K_BANK_MASK) & ~M68K_BANK_MASK; - rs -= bank; - if (rs > 0x100000) - rs = 0x100000; - cpu68k_map_set(m68k_read8_map, 0x900000, 0x900000 + rs - 1, Pico.rom + bank, 0); - cpu68k_map_set(m68k_read16_map, 0x900000, 0x900000 + rs - 1, Pico.rom + bank, 0); - - elprintf(EL_32X, "bank %06x-%06x -> %06x", 0x900000, 0x900000 + rs - 1, bank); + if (!carthw_ssf2_active) { + rs -= bank; + if (rs > 0x100000) + rs = 0x100000; + cpu68k_map_set(m68k_read8_map, 0x900000, 0x900000 + rs - 1, Pico.rom + bank, 0); + cpu68k_map_set(m68k_read16_map, 0x900000, 0x900000 + rs - 1, Pico.rom + bank, 0); + elprintf(EL_32X, "bank %06x-%06x -> %06x", 0x900000, 0x900000 + rs - 1, bank); + } + else { + bank = bank >> 19; + bank2 = carthw_ssf2_banks[bank + 0] << 19; + cpu68k_map_set(m68k_read8_map, 0x900000, 0x97ffff, Pico.rom + bank2, 0); + cpu68k_map_set(m68k_read16_map, 0x900000, 0x97ffff, Pico.rom + bank2, 0); + bank2 = carthw_ssf2_banks[bank + 1] << 19; + cpu68k_map_set(m68k_read8_map, 0x980000, 0x9fffff, Pico.rom + bank2, 0); + cpu68k_map_set(m68k_read16_map, 0x980000, 0x9fffff, Pico.rom + bank2, 0); + } } // ----------------------------------------------------------------- @@ -1234,6 +1244,13 @@ static u32 sh2_read8_da(u32 a, SH2 *sh2) return sh2->data_array[(a & 0xfff) ^ 1]; } +// for ssf2 +static u32 sh2_read8_rom(u32 a, SH2 *sh2) +{ + u32 bank = carthw_ssf2_banks[(a >> 19) & 7] << 19; + return Pico.rom[(bank + (a & 0x7ffff)) ^ 1]; +} + // read16 static u32 sh2_read16_unmapped(u32 a, SH2 *sh2) { @@ -1284,6 +1301,12 @@ static u32 sh2_read16_da(u32 a, SH2 *sh2) return ((u16 *)sh2->data_array)[(a & 0xfff) / 2]; } +static u32 sh2_read16_rom(u32 a, SH2 *sh2) +{ + u32 bank = carthw_ssf2_banks[(a >> 19) & 7] << 19; + return *(u16 *)(Pico.rom + bank + (a & 0x7fffe)); +} + // writes static void REGPARM(3) sh2_write_ignore(u32 a, u32 d, SH2 *sh2) { @@ -1750,6 +1773,19 @@ void Pico32xSwapDRAM(int b) sh2_write16_map[0x04/2] = sh2_write16_map[0x24/2] = b ? sh2_write16_dram1 : sh2_write16_dram0; } +static void bank_switch_rom_sh2(void) +{ + if (!carthw_ssf2_active) { + // easy + sh2_read8_map[0x02/2].addr = sh2_read8_map[0x22/2].addr = + sh2_read16_map[0x02/2].addr = sh2_read16_map[0x22/2].addr = MAP_MEMORY(Pico.rom); + } + else { + sh2_read8_map[0x02/2].addr = sh2_read8_map[0x22/2].addr = MAP_HANDLER(sh2_read8_rom); + sh2_read16_map[0x02/2].addr = sh2_read16_map[0x22/2].addr = MAP_HANDLER(sh2_read16_rom); + } +} + void PicoMemSetup32x(void) { unsigned int rs; @@ -1784,15 +1820,9 @@ void PicoMemSetup32x(void) cpu68k_map_set(m68k_read16_map, 0x880000, 0x880000 + rs - 1, Pico.rom, 0); cpu68k_map_set(m68k_write8_map, 0x880000, 0x880000 + rs - 1, PicoWrite8_cart, 1); cpu68k_map_set(m68k_write16_map, 0x880000, 0x880000 + rs - 1, PicoWrite16_cart, 1); -#ifdef EMU_F68K - // setup FAME fetchmap - PicoCpuFM68k.Fetch[0] = (uptr)Pico32xMem->m68k_rom; - for (rs = 0x88; rs < 0x90; rs++) - PicoCpuFM68k.Fetch[rs] = (uptr)Pico.rom - 0x880000; -#endif // 32X ROM (banked) - bank_switch(0); + bank_switch_rom_68k(0); cpu68k_map_set(m68k_write8_map, 0x900000, 0x9fffff, PicoWrite8_bank, 1); cpu68k_map_set(m68k_write16_map, 0x900000, 0x9fffff, PicoWrite16_bank, 1); } @@ -1827,8 +1857,7 @@ void PicoMemSetup32x(void) sh2_write8_map[0x00/2] = sh2_write8_map[0x20/2] = sh2_write8_cs0; sh2_write16_map[0x00/2] = sh2_write16_map[0x20/2] = sh2_write16_cs0; // CS1 - ROM - sh2_read8_map[0x02/2].addr = sh2_read8_map[0x22/2].addr = - sh2_read16_map[0x02/2].addr = sh2_read16_map[0x22/2].addr = MAP_MEMORY(Pico.rom); + bank_switch_rom_sh2(); sh2_read8_map[0x02/2].mask = sh2_read8_map[0x22/2].mask = sh2_read16_map[0x02/2].mask = sh2_read16_map[0x22/2].mask = 0x3fffff; // FIXME // CS2 - DRAM - done by Pico32xSwapDRAM() @@ -1868,9 +1897,17 @@ void PicoMemSetup32x(void) z80_map_set(z80_write_map, 0x8000, 0xffff, z80_md_bank_write_32x, 1); } +void p32x_update_banks(void) +{ + bank_switch_rom_68k(Pico32x.regs[4 / 2]); + bank_switch_rom_sh2(); + if (Pico32x.emu_flags & P32XF_DRC_ROM_C) + sh2_drc_flush_all(); +} + void Pico32xMemStateLoaded(void) { - bank_switch(Pico32x.regs[4 / 2]); + bank_switch_rom_68k(Pico32x.regs[4 / 2]); Pico32xSwapDRAM((Pico32x.vdp_regs[0x0a / 2] & P32XV_FS) ^ P32XV_FS); memset(Pico32xMem->pwm, 0, sizeof(Pico32xMem->pwm)); Pico32x.dirty_pal = 1; diff --git a/pico/cart.c b/pico/cart.c index 58a9a68f..9fcb01a4 100644 --- a/pico/cart.c +++ b/pico/cart.c @@ -778,7 +778,8 @@ static int is_expr(const char *expr, char **pr) #include "carthw_cfg.c" -static void parse_carthw(const char *carthw_cfg, int *fill_sram) +static void parse_carthw(const char *carthw_cfg, int *fill_sram, + int *hw_detected) { int line = 0, any_checks_passed = 0, skip_sect = 0; const char *s, *builtin = builtin_carthw_cfg; @@ -902,6 +903,7 @@ static void parse_carthw(const char *carthw_cfg, int *fill_sram) if (is_expr("hw", &p)) { if (!any_checks_passed) goto no_checks; + *hw_detected = 1; rstrip(p); if (strcmp(p, "svp") == 0) @@ -925,6 +927,7 @@ static void parse_carthw(const char *carthw_cfg, int *fill_sram) else { elprintf(EL_STATUS, "carthw:%d: unsupported mapper: %s", line, p); skip_sect = 1; + *hw_detected = 0; } continue; } @@ -1038,6 +1041,7 @@ no_checks: */ static void PicoCartDetect(const char *carthw_cfg) { + int carthw_detected = 0; int fill_sram = 0; memset(&Pico.sv, 0, sizeof(Pico.sv)); @@ -1067,7 +1071,11 @@ static void PicoCartDetect(const char *carthw_cfg) Pico.sv.eeprom_bit_out= 0; if (carthw_cfg != NULL) - parse_carthw(carthw_cfg, &fill_sram); + parse_carthw(carthw_cfg, &fill_sram, &carthw_detected); + + // assume the standard mapper for large roms + if (!carthw_detected && Pico.romsize > 0x400000) + carthw_ssf2_startup(); if (Pico.sv.flags & SRF_ENABLED) { diff --git a/pico/carthw/carthw.c b/pico/carthw/carthw.c index cd47e7c7..41dc6bd8 100644 --- a/pico/carthw/carthw.c +++ b/pico/carthw/carthw.c @@ -21,63 +21,76 @@ static int have_bank(u32 base) return 1; } -/* The SSFII mapper */ -static unsigned char ssf2_banks[8]; +/* standard/ssf2 mapper */ +int carthw_ssf2_active; +unsigned char carthw_ssf2_banks[8]; static carthw_state_chunk carthw_ssf2_state[] = { - { CHUNK_CARTHW, sizeof(ssf2_banks), &ssf2_banks }, - { 0, 0, NULL } + { CHUNK_CARTHW, sizeof(carthw_ssf2_banks), &carthw_ssf2_banks }, + { 0, 0, NULL } }; static void carthw_ssf2_write8(u32 a, u32 d) { - u32 target, base; + u32 target, base; - if ((a & 0xfffff0) != 0xa130f0) { - PicoWrite8_io(a, d); - return; - } + if ((a & 0xfffff0) != 0xa130f0) { + PicoWrite8_io(a, d); + return; + } - a &= 0x0e; - if (a == 0) - return; + a &= 0x0e; + if (a == 0) + return; + if (carthw_ssf2_banks[a >> 1] == d) + return; - ssf2_banks[a >> 1] = d; - base = d << 19; - target = a << 18; - if (!have_bank(base)) - return; + base = d << 19; + target = a << 18; + if (!have_bank(base)) + return; + carthw_ssf2_banks[a >> 1] = d; - cpu68k_map_set(m68k_read8_map, target, target + 0x80000 - 1, Pico.rom + base, 0); - cpu68k_map_set(m68k_read16_map, target, target + 0x80000 - 1, Pico.rom + base, 0); + cpu68k_map_set(m68k_read8_map, target, target + 0x80000 - 1, Pico.rom + base, 0); + cpu68k_map_set(m68k_read16_map, target, target + 0x80000 - 1, Pico.rom + base, 0); + if (PicoIn.AHW & PAHW_32X) + p32x_update_banks(); } static void carthw_ssf2_mem_setup(void) { - cpu68k_map_set(m68k_write8_map, 0xa10000, 0xa1ffff, carthw_ssf2_write8, 1); + cpu68k_map_set(m68k_write8_map, 0xa10000, 0xa1ffff, carthw_ssf2_write8, 1); } static void carthw_ssf2_statef(void) { - int i; - for (i = 1; i < 8; i++) - carthw_ssf2_write8(0xa130f0 | (i << 1), ssf2_banks[i]); + int i; + for (i = 1; i < 8; i++) + carthw_ssf2_write8(0xa130f0 | (i << 1), carthw_ssf2_banks[i]); +} + +static void carthw_ssf2_unload(void) +{ + memset(carthw_ssf2_banks, 0, sizeof(carthw_ssf2_banks)); + carthw_ssf2_active = 0; } void carthw_ssf2_startup(void) { - int i; + int i; - elprintf(EL_STATUS, "SSF2 mapper startup"); + elprintf(EL_STATUS, "SSF2 mapper startup"); - // default map - for (i = 0; i < 8; i++) - ssf2_banks[i] = i; + // default map + for (i = 0; i < 8; i++) + carthw_ssf2_banks[i] = i; - PicoCartMemSetup = carthw_ssf2_mem_setup; - PicoLoadStateHook = carthw_ssf2_statef; - carthw_chunks = carthw_ssf2_state; + PicoCartMemSetup = carthw_ssf2_mem_setup; + PicoLoadStateHook = carthw_ssf2_statef; + PicoCartUnloadHook = carthw_ssf2_unload; + carthw_chunks = carthw_ssf2_state; + carthw_ssf2_active = 1; } diff --git a/pico/carthw/carthw.h b/pico/carthw/carthw.h index 869a5cd6..640e0ef4 100644 --- a/pico/carthw/carthw.h +++ b/pico/carthw/carthw.h @@ -14,8 +14,12 @@ void PicoSVPInit(void); void PicoSVPStartup(void); void PicoSVPMemSetup(void); -/* misc */ +/* standard/ssf2 mapper */ +extern int carthw_ssf2_active; +extern unsigned char carthw_ssf2_banks[8]; void carthw_ssf2_startup(void); + +/* misc */ void carthw_Xin1_startup(void); void carthw_realtec_startup(void); void carthw_radica_startup(void); diff --git a/pico/pico_int.h b/pico/pico_int.h index da0fbb90..cb1c21d2 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -546,6 +546,7 @@ typedef struct #define P32XF_68KCPOLL (1 << 0) #define P32XF_68KVPOLL (1 << 1) #define P32XF_Z80_32X_IO (1 << 7) // z80 does 32x io +#define P32XF_DRC_ROM_C (1 << 8) // cached code from ROM #define P32XI_VRES (1 << 14/2) // IRL/2 #define P32XI_VINT (1 << 12/2) @@ -914,6 +915,7 @@ void PicoWrite16_32x(unsigned int a, unsigned int d); void PicoMemSetup32x(void); void Pico32xSwapDRAM(int b); void Pico32xMemStateLoaded(void); +void p32x_update_banks(void); void p32x_m68k_poll_event(unsigned int flags); void p32x_sh2_poll_event(SH2 *sh2, unsigned int flags, unsigned int m68k_cycles); From 310d973b9e4f25c0359d5a489d6c061ea989a92a Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 21 Jan 2018 19:03:51 +0200 Subject: [PATCH 0149/1110] don't spam DMA message VR US triggers this --- pico/videoport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pico/videoport.c b/pico/videoport.c index 9def819d..aadba24f 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -307,7 +307,7 @@ static NOINLINE void CommandDma(void) if ((pvid->reg[1]&0x10)==0) return; // DMA not enabled if (Pico.m.dma_xfers) - elprintf(EL_VDPDMA|EL_ANOMALY, "Dma overlap, left=%d @ %06x", + elprintf(EL_VDPDMA, "Dma overlap, left=%d @ %06x", Pico.m.dma_xfers, SekPc); len = GetDmaLength(); From 8fde2033ac268ee9fe198f5d1c9d0b964f2763ab Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 23 Jan 2018 02:39:01 +0200 Subject: [PATCH 0150/1110] 32x: implement standard/ssf2 mapper, part 2 Turns out wasn't actually hooked in. --- pico/32x/memory.c | 75 +++++++++++++++++++++++++++++++++++++------- pico/carthw/carthw.c | 16 +++++++--- pico/carthw/carthw.h | 1 + 3 files changed, 75 insertions(+), 17 deletions(-) diff --git a/pico/32x/memory.c b/pico/32x/memory.c index ef9e3b70..eff0ab07 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -49,6 +49,9 @@ struct Pico32xMem *Pico32xMem; static void bank_switch_rom_68k(int b); +static void (*m68k_write8_io)(u32 a, u32 d); +static void (*m68k_write16_io)(u32 a, u32 d); + // addressing byte in 16bit reg #define REG8IN16(ptr, offs) ((u8 *)ptr)[(offs) ^ 1] @@ -857,12 +860,7 @@ static void PicoWrite8_32x_on(u32 a, u32 d) } if ((a & 0xfc00) != 0x5000) { - if (PicoIn.AHW & PAHW_MCD) - PicoWrite8_mcd_io(a, d); - else - PicoWrite8_io(a, d); - if (a == 0xa130f1) - bank_switch_rom_68k(Pico32x.regs[4 / 2]); + m68k_write8_io(a, d); return; } @@ -884,6 +882,27 @@ static void PicoWrite8_32x_on(u32 a, u32 d) elprintf(EL_UIO, "m68k unmapped w8 [%06x] %02x @%06x", a, d & 0xff, SekPc); } +static void PicoWrite8_32x_on_io(u32 a, u32 d) +{ + PicoWrite8_io(a, d); + if (a == 0xa130f1) + bank_switch_rom_68k(Pico32x.regs[4 / 2]); +} + +static void PicoWrite8_32x_on_io_cd(u32 a, u32 d) +{ + PicoWrite8_mcd_io(a, d); + if (a == 0xa130f1) + bank_switch_rom_68k(Pico32x.regs[4 / 2]); +} + +static void PicoWrite8_32x_on_io_ssf2(u32 a, u32 d) +{ + carthw_ssf2_write8(a, d); + if ((a & ~0x0e) == 0xa130f1) + bank_switch_rom_68k(Pico32x.regs[4 / 2]); +} + static void PicoWrite16_32x_on(u32 a, u32 d) { if ((a & 0xfc00) == 0x5000) @@ -895,12 +914,7 @@ static void PicoWrite16_32x_on(u32 a, u32 d) } if ((a & 0xfc00) != 0x5000) { - if (PicoIn.AHW & PAHW_MCD) - PicoWrite16_mcd_io(a, d); - else - PicoWrite16_io(a, d); - if (a == 0xa130f0) - bank_switch_rom_68k(Pico32x.regs[4 / 2]); + m68k_write16_io(a, d); return; } @@ -920,6 +934,29 @@ static void PicoWrite16_32x_on(u32 a, u32 d) elprintf(EL_UIO, "m68k unmapped w16 [%06x] %04x @%06x", a, d & 0xffff, SekPc); } +static void PicoWrite16_32x_on_io(u32 a, u32 d) +{ + PicoWrite16_io(a, d); + if (a == 0xa130f0) + bank_switch_rom_68k(Pico32x.regs[4 / 2]); +} + +static void PicoWrite16_32x_on_io_cd(u32 a, u32 d) +{ + PicoWrite16_mcd_io(a, d); + if (a == 0xa130f0) + bank_switch_rom_68k(Pico32x.regs[4 / 2]); +} + +static void PicoWrite16_32x_on_io_ssf2(u32 a, u32 d) +{ + PicoWrite16_io(a, d); + if ((a & ~0x0f) == 0xa130f0) { + carthw_ssf2_write8(a + 1, d); + bank_switch_rom_68k(Pico32x.regs[4 / 2]); + } +} + // before ADEN u32 PicoRead8_32x(u32 a) { @@ -1833,6 +1870,20 @@ void PicoMemSetup32x(void) cpu68k_map_set(m68k_write8_map, 0xa10000, 0xa1ffff, PicoWrite8_32x_on, 1); cpu68k_map_set(m68k_write16_map, 0xa10000, 0xa1ffff, PicoWrite16_32x_on, 1); + // TODO: cd + carthw + if (PicoIn.AHW & PAHW_MCD) { + m68k_write8_io = PicoWrite8_32x_on_io_cd; + m68k_write16_io = PicoWrite16_32x_on_io_cd; + } + else if (carthw_ssf2_active) { + m68k_write8_io = PicoWrite8_32x_on_io_ssf2; + m68k_write16_io = PicoWrite16_32x_on_io_ssf2; + } + else { + m68k_write8_io = PicoWrite8_32x_on_io; + m68k_write16_io = PicoWrite16_32x_on_io; + } + // SH2 maps: A31,A30,A29,CS1,CS0 // all unmapped by default for (i = 0; i < ARRAY_SIZE(sh2_read8_map); i++) { diff --git a/pico/carthw/carthw.c b/pico/carthw/carthw.c index 41dc6bd8..2fed8220 100644 --- a/pico/carthw/carthw.c +++ b/pico/carthw/carthw.c @@ -31,11 +31,11 @@ static carthw_state_chunk carthw_ssf2_state[] = { 0, 0, NULL } }; -static void carthw_ssf2_write8(u32 a, u32 d) +void carthw_ssf2_write8(u32 a, u32 d) { u32 target, base; - if ((a & 0xfffff0) != 0xa130f0) { + if ((a & ~0x0e) != 0xa130f1) { PicoWrite8_io(a, d); return; } @@ -54,13 +54,19 @@ static void carthw_ssf2_write8(u32 a, u32 d) cpu68k_map_set(m68k_read8_map, target, target + 0x80000 - 1, Pico.rom + base, 0); cpu68k_map_set(m68k_read16_map, target, target + 0x80000 - 1, Pico.rom + base, 0); - if (PicoIn.AHW & PAHW_32X) - p32x_update_banks(); +} + +void carthw_ssf2_write16(u32 a, u32 d) +{ + PicoWrite16_io(a, d); + if ((a & ~0x0f) == 0xa130f0) + carthw_ssf2_write8(a + 1, d); } static void carthw_ssf2_mem_setup(void) { - cpu68k_map_set(m68k_write8_map, 0xa10000, 0xa1ffff, carthw_ssf2_write8, 1); + cpu68k_map_set(m68k_write8_map, 0xa10000, 0xa1ffff, carthw_ssf2_write8, 1); + cpu68k_map_set(m68k_write16_map, 0xa10000, 0xa1ffff, carthw_ssf2_write16, 1); } static void carthw_ssf2_statef(void) diff --git a/pico/carthw/carthw.h b/pico/carthw/carthw.h index 640e0ef4..7303f60a 100644 --- a/pico/carthw/carthw.h +++ b/pico/carthw/carthw.h @@ -18,6 +18,7 @@ void PicoSVPMemSetup(void); extern int carthw_ssf2_active; extern unsigned char carthw_ssf2_banks[8]; void carthw_ssf2_startup(void); +void carthw_ssf2_write8(unsigned int a, unsigned int d); /* misc */ void carthw_Xin1_startup(void); From cf83610baa16d21842e8097a7e87f284e2519584 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 27 Jan 2018 19:41:57 +0200 Subject: [PATCH 0151/1110] sms: improve irq handling --- pico/pico_int.h | 4 +++- pico/sms.c | 29 +++++++++++++++++++++++++---- pico/z80if.c | 7 ++++++- 3 files changed, 34 insertions(+), 6 deletions(-) diff --git a/pico/pico_int.h b/pico/pico_int.h index cb1c21d2..7225cab8 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -169,7 +169,7 @@ extern struct DrZ80 drZ80; #define z80_run(cycles) ((cycles) - DrZ80Run(&drZ80, cycles)) #define z80_run_nr(cycles) DrZ80Run(&drZ80, cycles) #define z80_int() drZ80.Z80_IRQ = 1 -#define z80_int() drZ80.Z80_IRQ = 1 +#define z80_int_assert(a) drZ80.Z80_IRQ = (a) #define z80_nmi() drZ80.Z80IF |= 8 #define z80_cyclesLeft drZ80.cycles @@ -182,6 +182,7 @@ extern struct DrZ80 drZ80; #define z80_run(cycles) Cz80_Exec(&CZ80, cycles) #define z80_run_nr(cycles) Cz80_Exec(&CZ80, cycles) #define z80_int() Cz80_Set_IRQ(&CZ80, 0, HOLD_LINE) +#define z80_int_assert(a) Cz80_Set_IRQ(&CZ80, 0, (a) ? ASSERT_LINE : CLEAR_LINE) #define z80_nmi() Cz80_Set_IRQ(&CZ80, IRQ_LINE_NMI, 0) #define z80_cyclesLeft (CZ80.ICount - CZ80.ExtraCycles) @@ -193,6 +194,7 @@ extern struct DrZ80 drZ80; #define z80_run(cycles) (cycles) #define z80_run_nr(cycles) #define z80_int() +#define z80_int_assert(a) #define z80_nmi() #endif diff --git a/pico/sms.c b/pico/sms.c index ac81c2b2..286b8bf1 100644 --- a/pico/sms.c +++ b/pico/sms.c @@ -32,6 +32,7 @@ static unsigned char vdp_ctl_read(void) struct PicoVideo *pv = &Pico.video; unsigned char d; + z80_int_assert(0); d = pv->status | (pv->pending_ints << 7); pv->pending = pv->pending_ints = 0; pv->status = 0; @@ -55,14 +56,34 @@ static void vdp_data_write(unsigned char d) pv->pending = 0; } -static void vdp_ctl_write(unsigned char d) +static NOINLINE void vdp_reg_write(struct PicoVideo *pv, u8 a, u8 d) +{ + int l; + + pv->reg[a] = d; + switch (a) { + case 0: + l = pv->pending_ints & (d >> 3) & 2; + elprintf(EL_INTS, "hint %d", l); + z80_int_assert(l); + break; + case 1: + l = pv->pending_ints & (d >> 5) & 1; + elprintf(EL_INTS, "vint %d", l); + z80_int_assert(l); + break; + } +} + +static void vdp_ctl_write(u8 d) { struct PicoVideo *pv = &Pico.video; if (pv->pending) { if ((d >> 6) == 2) { - pv->reg[d & 0x0f] = pv->addr; elprintf(EL_IO, " VDP r%02x=%02x", d & 0x0f, pv->addr & 0xff); + if (pv->reg[d & 0x0f] != (u8)pv->addr) + vdp_reg_write(pv, d & 0x0f, pv->addr); } pv->type = d >> 6; pv->addr &= 0x00ff; @@ -287,7 +308,7 @@ void PicoFrameMS(void) pv->pending_ints |= 2; if (pv->reg[0] & 0x10) { elprintf(EL_INTS, "hint"); - z80_int(); + z80_int_assert(1); } } } @@ -295,7 +316,7 @@ void PicoFrameMS(void) pv->pending_ints |= 1; if (pv->reg[1] & 0x20) { elprintf(EL_INTS, "vint"); - z80_int(); + z80_int_assert(1); } } diff --git a/pico/z80if.c b/pico/z80if.c index da2043e4..419d061f 100644 --- a/pico/z80if.c +++ b/pico/z80if.c @@ -52,6 +52,8 @@ static unsigned int dz80_rebase_pc(unsigned short pc) return drZ80.Z80PC_BASE; } +static void dz80_noop_irq_ack(void) {} + #ifdef FAST_Z80SP static u32 drz80_sp_base; @@ -107,8 +109,11 @@ void z80_reset(void) drz80_sp_base = (PicoIn.AHW & PAHW_SMS) ? 0xc000 : 0x0000; drZ80.Z80SP_BASE = z80_read_map[drz80_sp_base >> Z80_MEM_SHIFT] << 1; #endif - if (PicoIn.AHW & PAHW_SMS) + drZ80.z80_irq_callback = NULL; // use auto-clear + if (PicoIn.AHW & PAHW_SMS) { drZ80.Z80SP = drZ80.Z80SP_BASE + 0xdff0; // simulate BIOS + drZ80.z80_irq_callback = dz80_noop_irq_ack; + } // XXX: since we use direct SP pointer, it might make sense to force it to RAM, // but we'll rely on built-in stack protection for now #endif From 6e05b76b6a1889e4bab809a5f31042121276692f Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 27 Jan 2018 20:03:13 +0200 Subject: [PATCH 0152/1110] add a header fixup --- Makefile | 1 + pico/carthw.cfg | 5 +++++ pico/carthw_cfg.c | 3 +++ 3 files changed, 9 insertions(+) diff --git a/Makefile b/Makefile index 2620a704..78fa5386 100644 --- a/Makefile +++ b/Makefile @@ -236,4 +236,5 @@ cpu/sh2/compiler.o : cpu/drc/emit_x86.c cpu/sh2/mame/sh2pico.o : cpu/sh2/mame/sh2.c pico/pico.o pico/cd/mcd.o pico/32x/32x.o : pico/pico_cmn.c pico/pico_int.h pico/memory.o pico/cd/memory.o pico/32x/memory.o : pico/pico_int.h pico/memory.h +# pico/cart.o : pico/carthw_cfg.c cpu/fame/famec.o: cpu/fame/famec.c cpu/fame/famec_opcodes.h diff --git a/pico/carthw.cfg b/pico/carthw.cfg index 20742e0e..c0ad6bce 100644 --- a/pico/carthw.cfg +++ b/pico/carthw.cfg @@ -63,6 +63,11 @@ prop = filled_sram check_str = 0x150, "MICRO MACHINES II" prop = filled_sram +# bad headers +[HardBall III] +check_str = 0x150, " HardBall III" +sram_range = 0x200000,0x20ffff + # X-Men proto [X-Men (prototype)] check_str = 0x150, "32X SAMPLE PROGRAM" diff --git a/pico/carthw_cfg.c b/pico/carthw_cfg.c index 2fdd1991..2ce6d29f 100644 --- a/pico/carthw_cfg.c +++ b/pico/carthw_cfg.c @@ -24,6 +24,9 @@ static const char builtin_carthw_cfg[] = "check_str=0x150,\"MICRO MACHINES II\"\n" "prop=filled_sram\n" "[]\n" + "check_str=0x150,\" HardBall III\"\n" + "sram_range=0x200000,0x20ffff\n" + "[]\n" "check_str=0x150,\"32X SAMPLE PROGRAM\"\n" "check_str=0x32b74c,\"Bishop Level\"\n" "prop=force_6btn\n" From ee3c39efd2cf51cd654b6240f6fb595673f10f45 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 27 Jan 2018 23:46:37 +0200 Subject: [PATCH 0153/1110] fix DrawSpritesHiAS --- pico/draw.c | 198 +++++++++++++++++++++++++-------------------- pico/pico.h | 1 - platform/psp/emu.c | 6 +- 3 files changed, 111 insertions(+), 94 deletions(-) diff --git a/pico/draw.c b/pico/draw.c index 0939f3fe..680de3da 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -13,20 +13,19 @@ * - "sonic mode" for midline palette changes (8bit mode only) * - accurate sprites (AS) [+ s/h] * - * AS and s/h both use upper bits for both priority and shadow/hilight flags. + * s/h uses upper bits for both priority and shadow/hilight flags. * "sonic mode" is autodetected, shadow/hilight is enabled by emulated game. * AS is enabled by user and takes priority over "sonic mode". * * since renderer always draws line in 8bit mode, there are 2 spare bits: - * b \ mode: s/h as sonic - * 00 normal - pal index - * 01 shadow - pal index - * 10 hilight+op spr spr pal index - * 11 shadow +op spr - pal index + * b \ mode: s/h sonic + * 00 normal pal index + * 01 shadow pal index + * 10 hilight+op spr pal index + * 11 shadow +op spr pal index * * not handled properly: * - hilight op on shadow tile - * - AS + s/h (s/h sprite flag interferes with and cleared by AS code) */ #include "pico_int.h" @@ -95,11 +94,8 @@ void blockcpy_or(void *dst, void *src, size_t n, int pat) #define blockcpy memcpy #endif - -#define TileNormMaker(funcname,pix_func) \ -static void funcname(int sx, unsigned int pack, int pal) \ +#define TileNormMaker_(pix_func) \ { \ - unsigned char *pd = Pico.est.HighCol + sx; \ unsigned int t; \ \ t = (pack&0x0000f000)>>12; pix_func(0); \ @@ -112,10 +108,8 @@ static void funcname(int sx, unsigned int pack, int pal) \ t = (pack&0x000f0000)>>16; pix_func(7); \ } -#define TileFlipMaker(funcname,pix_func) \ -static void funcname(int sx, unsigned int pack, int pal) \ +#define TileFlipMaker_(pix_func) \ { \ - unsigned char *pd = Pico.est.HighCol + sx; \ unsigned int t; \ \ t = (pack&0x000f0000)>>16; pix_func(0); \ @@ -128,6 +122,21 @@ static void funcname(int sx, unsigned int pack, int pal) \ t = (pack&0x0000f000)>>12; pix_func(7); \ } +#define TileNormMaker(funcname, pix_func) \ +static void funcname(unsigned char *pd, unsigned int pack, int pal) \ +TileNormMaker_(pix_func) + +#define TileFlipMaker(funcname, pix_func) \ +static void funcname(unsigned char *pd, unsigned int pack, int pal) \ +TileFlipMaker_(pix_func) + +#define TileNormMakerAS(funcname, pix_func) \ +static void funcname(unsigned char *pd, unsigned char *mb, unsigned int pack, int pal) \ +TileNormMaker_(pix_func) + +#define TileFlipMakerAS(funcname, pix_func) \ +static void funcname(unsigned char *pd, unsigned char *mb, unsigned int pack, int pal) \ +TileFlipMaker_(pix_func) #define pix_just_write(x) \ if (t) pd[x]=pal|t @@ -155,11 +164,15 @@ TileFlipMaker(TileFlipSH, pix_sh) TileNormMaker(TileNormSH_markop, pix_sh_markop) TileFlipMaker(TileFlipSH_markop, pix_sh_markop) +#endif + // process operator pixels only, apply only on low pri tiles and other op pixels #define pix_sh_onlyop(x) \ if (t>=0xe && (pd[x]&0xc0)) \ pd[x]=(pd[x]&0x3f)|(t<<6); /* c0 shadow, 80 hilight */ \ +#ifndef _ASM_DRAW_C + TileNormMaker(TileNormSH_onlyop_lp, pix_sh_onlyop) TileFlipMaker(TileFlipSH_onlyop_lp, pix_sh_onlyop) @@ -167,26 +180,39 @@ TileFlipMaker(TileFlipSH_onlyop_lp, pix_sh_onlyop) // draw a sprite pixel (AS) #define pix_as(x) \ - if (t && !(pd[x]&0x80)) pd[x]=pal|t + if (t & mb[x]) mb[x] = 0, pd[x] = pal | t -TileNormMaker(TileNormAS, pix_as) -TileFlipMaker(TileFlipAS, pix_as) +TileNormMakerAS(TileNormAS, pix_as) +TileFlipMakerAS(TileFlipAS, pix_as) -// draw a sprite pixel, skip operator colors (AS) -#define pix_sh_as_noop(x) \ - if (t && t < 0xe && !(pd[x]&0x80)) pd[x]=pal|t +// draw a sprite pixel, process operator colors (AS) +#define pix_sh_as(x) \ + if (t & mb[x]) { \ + mb[x] = 0; \ + if (t>=0xe) pd[x]=(pd[x]&0x3f)|(t<<6); /* c0 shadow, 80 hilight */ \ + else pd[x] = pal | t; \ + } -TileNormMaker(TileNormAS_noop, pix_sh_as_noop) -TileFlipMaker(TileFlipAS_noop, pix_sh_as_noop) +TileNormMakerAS(TileNormSH_AS, pix_sh_as) +TileFlipMakerAS(TileFlipSH_AS, pix_sh_as) + +#define pix_sh_as_onlyop(x) \ + if (t & mb[x]) { \ + mb[x] = 0; \ + pix_sh_onlyop(x); \ + } + +TileNormMakerAS(TileNormSH_AS_onlyop_lp, pix_sh_as_onlyop) +TileFlipMakerAS(TileFlipSH_AS_onlyop_lp, pix_sh_as_onlyop) // mark pixel as sprite pixel (AS) #define pix_sh_as_onlymark(x) \ - if (t) pd[x]|=0x80 + if (t) mb[x] = 0 -TileNormMaker(TileNormAS_onlymark, pix_sh_as_onlymark) -TileFlipMaker(TileFlipAS_onlymark, pix_sh_as_onlymark) +TileNormMakerAS(TileNormAS_onlymark, pix_sh_as_onlymark) +TileFlipMakerAS(TileFlipAS_onlymark, pix_sh_as_onlymark) -// mark pixel as sprite pixel (AS) +// forced both layer draw (through debug reg) #define pix_and(x) \ pd[x] = (pd[x] & 0xc0) | (pd[x] & (pal | t)) @@ -198,6 +224,7 @@ TileFlipMaker(TileFlip_and, pix_and) #ifndef _ASM_DRAW_C static void DrawStrip(struct TileStrip *ts, int lflags, int cellskip) { + unsigned char *pd = Pico.est.HighCol; int tilex,dx,ty,code=0,addr=0,cells; int oldcode=-1,blank=-1; // The tile we know is blank int pal=0,sh; @@ -241,8 +268,8 @@ static void DrawStrip(struct TileStrip *ts, int lflags, int cellskip) continue; } - if (code & 0x0800) TileFlip(dx, pack, pal); - else TileNorm(dx, pack, pal); + if (code & 0x0800) TileFlip(pd + dx, pack, pal); + else TileNorm(pd + dx, pack, pal); } // terminate the cache list @@ -254,6 +281,7 @@ static void DrawStrip(struct TileStrip *ts, int lflags, int cellskip) // this is messy static void DrawStripVSRam(struct TileStrip *ts, int plane_sh, int cellskip) { + unsigned char *pd = Pico.est.HighCol; int tilex,dx,code=0,addr=0,cell=0; int oldcode=-1,blank=-1; // The tile we know is blank int pal=0,scan=Pico.est.DrawScanline; @@ -310,8 +338,8 @@ static void DrawStripVSRam(struct TileStrip *ts, int plane_sh, int cellskip) continue; } - if (code & 0x0800) TileFlip(dx, pack, pal); - else TileNorm(dx, pack, pal); + if (code & 0x0800) TileFlip(pd + dx, pack, pal); + else TileNorm(pd + dx, pack, pal); } // terminate the cache list @@ -325,6 +353,7 @@ static #endif void DrawStripInterlace(struct TileStrip *ts) { + unsigned char *pd = Pico.est.HighCol; int tilex=0,dx=0,ty=0,code=0,addr=0,cells; int oldcode=-1,blank=-1; // The tile we know is blank int pal=0; @@ -366,8 +395,8 @@ void DrawStripInterlace(struct TileStrip *ts) continue; } - if (code & 0x0800) TileFlip(dx, pack, pal); - else TileNorm(dx, pack, pal); + if (code & 0x0800) TileFlip(pd + dx, pack, pal); + else TileNorm(pd + dx, pack, pal); } // terminate the cache list @@ -447,7 +476,8 @@ static void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells, static void DrawWindow(int tstart, int tend, int prio, int sh, struct PicoEState *est) { - struct PicoVideo *pvid=&Pico.video; + unsigned char *pd = Pico.est.HighCol; + struct PicoVideo *pvid = &Pico.video; int tilex,ty,nametab,code=0; int blank=-1; // The tile we know is blank @@ -504,8 +534,8 @@ static void DrawWindow(int tstart, int tend, int prio, int sh, pal = ((code >> 9) & 0x30); dx = 8 + (tilex << 3); - if (code & 0x0800) TileFlip(dx, pack, pal); - else TileNorm(dx, pack, pal); + if (code & 0x0800) TileFlip(pd + dx, pack, pal); + else TileNorm(pd + dx, pack, pal); } } else @@ -545,8 +575,8 @@ static void DrawWindow(int tstart, int tend, int prio, int sh, dx = 8 + (tilex << 3); - if (code & 0x0800) TileFlip(dx, pack, pal); - else TileNorm(dx, pack, pal); + if (code & 0x0800) TileFlip(pd + dx, pack, pal); + else TileNorm(pd + dx, pack, pal); } } } @@ -568,6 +598,7 @@ static void DrawTilesFromCacheShPrep(void) static void DrawTilesFromCache(int *hc, int sh, int rlim, struct PicoEState *est) { + unsigned char *pd = Pico.est.HighCol; int code, addr, dx; unsigned int pack; int pal; @@ -602,8 +633,8 @@ static void DrawTilesFromCache(int *hc, int sh, int rlim, struct PicoEState *est if (rlim-dx < 0) goto last_cut_tile; - if (code & 0x0800) TileFlip(dx, pack, pal); - else TileNorm(dx, pack, pal); + if (code & 0x0800) TileFlip(pd + dx, pack, pal); + else TileNorm(pd + dx, pack, pal); } } else @@ -627,8 +658,8 @@ static void DrawTilesFromCache(int *hc, int sh, int rlim, struct PicoEState *est if (rlim - dx < 0) goto last_cut_tile; - if (code & 0x0800) TileFlip(dx, pack, pal); - else TileNorm(dx, pack, pal); + if (code & 0x0800) TileFlip(pd + dx, pack, pal); + else TileNorm(pd + dx, pack, pal); } } return; @@ -636,9 +667,9 @@ static void DrawTilesFromCache(int *hc, int sh, int rlim, struct PicoEState *est last_cut_tile: // for vertical window cutoff { - unsigned char *pd = est->HighCol + dx; unsigned int t; + pd += dx; if (code&0x0800) { switch (rlim-dx+8) @@ -677,12 +708,13 @@ last_cut_tile: static void DrawSprite(int *sprite, int sh) { + void (*fTileFunc)(unsigned char *pd, unsigned int pack, int pal); + unsigned char *pd = Pico.est.HighCol; int width=0,height=0; int row=0,code=0; int pal; int tile=0,delta=0; int sx, sy; - void (*fTileFunc)(int sx, unsigned int pack, int pal); // parse the sprite data sy=sprite[0]; @@ -722,13 +754,14 @@ static void DrawSprite(int *sprite, int sh) if(sx>=328) break; // Offscreen pack = *(unsigned int *)(PicoMem.vram + (tile & 0x7fff)); - fTileFunc(sx, pack, pal); + fTileFunc(pd + sx, pack, pal); } } #endif static NOINLINE void DrawTilesFromCacheForced(const int *hc) { + unsigned char *pd = Pico.est.HighCol; int code, addr, dx; unsigned int pack; int pal; @@ -743,13 +776,14 @@ static NOINLINE void DrawTilesFromCacheForced(const int *hc) pal = ((code >> 9) & 0x30); pack = *(unsigned int *)(PicoMem.vram + addr); - if (code & 0x0800) TileFlip_and(dx, pack, pal); - else TileNorm_and(dx, pack, pal); + if (code & 0x0800) TileFlip_and(pd + dx, pack, pal); + else TileNorm_and(pd + dx, pack, pal); } } static void DrawSpriteInterlace(unsigned int *sprite) { + unsigned char *pd = Pico.est.HighCol; int width=0,height=0; int row=0,code=0; int pal; @@ -788,8 +822,8 @@ static void DrawSpriteInterlace(unsigned int *sprite) if(sx>=328) break; // Offscreen pack = *(unsigned int *)(PicoMem.vram + (tile & 0x7fff)); - if (code & 0x0800) TileFlip(sx, pack, pal); - else TileNorm(sx, pack, pal); + if (code & 0x0800) TileFlip(pd + sx, pack, pal); + else TileNorm(pd + sx, pack, pal); } } @@ -851,7 +885,8 @@ static NOINLINE void DrawAllSpritesInterlace(int pri, int sh) */ static void DrawSpritesSHi(unsigned char *sprited, const struct PicoEState *est) { - void (*fTileFunc)(int sx, unsigned int pack, int pal); + void (*fTileFunc)(unsigned char *pd, unsigned int pack, int pal); + unsigned char *pd = Pico.est.HighCol; unsigned char *p; int cnt; @@ -913,7 +948,7 @@ static void DrawSpritesSHi(unsigned char *sprited, const struct PicoEState *est) if(sx>=328) break; // Offscreen pack = *(unsigned int *)(PicoMem.vram + (tile & 0x7fff)); - fTileFunc(sx, pack, pal); + fTileFunc(pd + sx, pack, pal); } } } @@ -921,15 +956,17 @@ static void DrawSpritesSHi(unsigned char *sprited, const struct PicoEState *est) static void DrawSpritesHiAS(unsigned char *sprited, int sh) { - void (*fTileFunc)(int sx, unsigned int pack, int pal); + void (*fTileFunc)(unsigned char *pd, unsigned char *mb, + unsigned int pack, int pal); + unsigned char *pd = Pico.est.HighCol; + unsigned char mb[8+320+8]; unsigned char *p; - int entry, cnt, sh_cnt = 0; + int entry, cnt; cnt = sprited[0] & 0x7f; if (cnt == 0) return; - Pico.est.rendstatus |= PDRAW_SPR_LO_ON_HI; - + memset(mb, 0xff, sizeof(mb)); p = &sprited[3]; // Go through sprites: @@ -943,22 +980,26 @@ static void DrawSpritesHiAS(unsigned char *sprited, int sh) code = sprite[1]; pal = (code>>9)&0x30; - if (code & 0x8000) // hi priority + if (sh && pal == 0x30) { - if (sh && pal == 0x30) + if (code & 0x8000) // hi priority { - if (code&0x800) fTileFunc=TileFlipAS_noop; - else fTileFunc=TileNormAS_noop; + if (code&0x800) fTileFunc = TileFlipSH_AS; + else fTileFunc = TileNormSH_AS; } else { - if (code&0x800) fTileFunc=TileFlipAS; - else fTileFunc=TileNormAS; + if (code&0x800) fTileFunc = TileFlipSH_AS_onlyop_lp; + else fTileFunc = TileNormSH_AS_onlyop_lp; } } else { - if (code&0x800) fTileFunc=TileFlipAS_onlymark; - else fTileFunc=TileNormAS_onlymark; + if (code & 0x8000) // hi priority + { + if (code&0x800) fTileFunc = TileFlipAS; + else fTileFunc = TileNormAS; + } else { + if (code&0x800) fTileFunc = TileFlipAS_onlymark; + else fTileFunc = TileNormAS_onlymark; + } } - if (sh && pal == 0x30) - p[sh_cnt++] = offs / 2; // re-save for sh/hi pass // parse remaining sprite data sy=sprite[0]; @@ -978,7 +1019,6 @@ static void DrawSpritesHiAS(unsigned char *sprited, int sh) tile &= 0x7ff; tile<<=4; tile+=(row&7)<<1; // Tile address delta<<=4; // Delta of address - pal |= 0x80; for (; width; width--,sx+=8,tile+=delta) { unsigned int pack; @@ -987,25 +1027,9 @@ static void DrawSpritesHiAS(unsigned char *sprited, int sh) if(sx>=328) break; // Offscreen pack = *(unsigned int *)(PicoMem.vram + (tile & 0x7fff)); - fTileFunc(sx, pack, pal); + fTileFunc(pd + sx, mb + sx, pack, pal); } } - - if (!sh || !(sprited[1]&SPRL_MAY_HAVE_OP)) return; - - /* nasty 1: remove 'sprite' flags */ - { - int c = 320/4/4, *zb = (int *)(Pico.est.HighCol+8); - while (c--) - { - *zb++ &= 0x7f7f7f7f; *zb++ &= 0x7f7f7f7f; - *zb++ &= 0x7f7f7f7f; *zb++ &= 0x7f7f7f7f; - } - } - - /* nasty 2: sh operator pass */ - sprited[0] = sh_cnt; - DrawSpritesSHi(sprited, &Pico.est); } @@ -1272,17 +1296,15 @@ void FinalizeLine555(int sh, int line, struct PicoEState *est) } { -#ifndef PSP - int i, mask=0xff; - if (!sh && (est->rendstatus & PDRAW_SPR_LO_ON_HI)) - mask=0x3f; // accurate sprites, upper bits are priority stuff +#if 1 + int i; for (i = 0; i < len; i++) - pd[i] = pal[ps[i] & mask]; + pd[i] = pal[ps[i]]; #else extern void amips_clut(unsigned short *dst, unsigned char *src, unsigned short *pal, int count); extern void amips_clut_6bit(unsigned short *dst, unsigned char *src, unsigned short *pal, int count); - if (!sh && (est->rendstatus & PDRAW_SPR_LO_ON_HI)) + if (!sh) amips_clut_6bit(pd, ps, pal, len); else amips_clut(pd, ps, pal, len); #endif diff --git a/pico/pico.h b/pico/pico.h index f22ef606..ac1550d4 100644 --- a/pico/pico.h +++ b/pico/pico.h @@ -197,7 +197,6 @@ void PicoDoHighPal555(int sh, int line, struct PicoEState *est); // internals #define PDRAW_SPRITES_MOVED (1<<0) // (asm) #define PDRAW_WND_DIFF_PRIO (1<<1) // not all window tiles use same priority -#define PDRAW_SPR_LO_ON_HI (1<<2) // seen sprites without layer pri bit ontop spr. with that bit #define PDRAW_INTERLACE (1<<3) #define PDRAW_DIRTY_SPRITES (1<<4) // (asm) #define PDRAW_SONIC_MODE (1<<5) // mid-frame palette changes for 8bit renderer diff --git a/platform/psp/emu.c b/platform/psp/emu.c index 0656f581..5c0cb57f 100644 --- a/platform/psp/emu.c +++ b/platform/psp/emu.c @@ -223,10 +223,6 @@ static void do_pal_update(int allow_sh, int allow_as) localPal[0xe0] = 0; localPal[0xf0] = 0x001f; } - else if (allow_as && (Pico.est.rendstatus & PDRAW_SPR_LO_ON_HI)) - { - memcpy(dpal + 0x80/2, localPal, 0x40*2); - } } static void do_slowmode_lines(int line_to) @@ -250,7 +246,7 @@ static void EmuScanPrepare(void) if (Pico.m.dirtyPal) do_pal_update(1, 1); - if ((Pico.est.rendstatus & PDRAW_SPR_LO_ON_HI) && !(Pico.video.reg[0xC]&8)) + if (!(Pico.video.reg[0xC] & 8)) amips_clut_f = amips_clut_6bit; else amips_clut_f = amips_clut; } From 0a0073dc9fae7d867d737623d30917cc4b285b16 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 28 Jan 2018 19:13:01 +0200 Subject: [PATCH 0154/1110] fix a corner case with h-int --- pico/pico_cmn.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/pico/pico_cmn.c b/pico/pico_cmn.c index 95b6b103..1f89da90 100644 --- a/pico/pico_cmn.c +++ b/pico/pico_cmn.c @@ -113,9 +113,7 @@ static int PicoFrameHints(void) z80_resetCycles(); PsndStartFrame(); - // Load H-Int counter - hint = (pv->status & PVS_ACTIVE) ? pv->hint_cnt : pv->reg[10]; - + hint = pv->hint_cnt; pv->status |= PVS_ACTIVE; for (y = 0; ; y++) @@ -306,11 +304,14 @@ static int PicoFrameHints(void) PAD_DELAY(); - if ((pv->status & PVS_ACTIVE) && --hint < 0) - { - hint = pv->reg[10]; // Reload H-Int counter - do_hint(pv); + if (unlikely(pv->status & PVS_ACTIVE)) { + if (--hint < 0) { + hint = pv->reg[10]; // Reload H-Int counter + do_hint(pv); + } } + else + hint = pv->reg[10]; // Run scanline: Pico.t.m68c_line_start = Pico.t.m68c_aim; From 2d2e57b2cf580eeea067c667183f8354d75b0238 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 28 Jan 2018 19:59:58 +0200 Subject: [PATCH 0155/1110] fix some issues with menu bg --- platform/common/menu_pico.c | 5 +++-- platform/common/plat_sdl.c | 2 +- platform/libpicofe | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/platform/common/menu_pico.c b/platform/common/menu_pico.c index ab91e1c2..7b0cd78c 100644 --- a/platform/common/menu_pico.c +++ b/platform/common/menu_pico.c @@ -73,6 +73,7 @@ static void make_bg(int no_scale) { unsigned short *src = (void *)g_menubg_src_ptr; int w = g_screen_width, h = g_screen_height; + int pp = g_screen_ppitch; short *dst; int x, y; @@ -87,7 +88,7 @@ static void make_bg(int no_scale) d += (g_menuscreen_h / 2 - h * 2 / 2) * g_menuscreen_w / 2; d += (g_menuscreen_w / 2 - w * 2 / 2) / 2; - for (y = 0; y < h; y++, src += w, d += g_menuscreen_w*2/2) { + for (y = 0; y < h; y++, src += pp, d += g_menuscreen_w*2/2) { for (x = 0; x < w; x++) { t = src[x]; t = ((t & 0xf79e)>>1) - ((t & 0xc618)>>3); @@ -107,7 +108,7 @@ static void make_bg(int no_scale) (g_menuscreen_w / 2 - w / 2); // darken the active framebuffer - for (; h > 0; dst += g_menuscreen_w, src += g_screen_ppitch, h--) + for (; h > 0; dst += g_menuscreen_w, src += pp, h--) menu_darken_bg(dst, src, w, 1); } diff --git a/platform/common/plat_sdl.c b/platform/common/plat_sdl.c index 3948cc41..4446f72e 100644 --- a/platform/common/plat_sdl.c +++ b/platform/common/plat_sdl.c @@ -273,7 +273,7 @@ void plat_init(void) shadow_size = 320 * 480 * 2; shadow_fb = malloc(shadow_size); - g_menubg_ptr = malloc(shadow_size); + g_menubg_ptr = calloc(1, shadow_size); if (shadow_fb == NULL || g_menubg_ptr == NULL) { fprintf(stderr, "OOM\n"); exit(1); diff --git a/platform/libpicofe b/platform/libpicofe index 2b27288e..f8cd6a08 160000 --- a/platform/libpicofe +++ b/platform/libpicofe @@ -1 +1 @@ -Subproject commit 2b27288eb44ef9247d2a948a207d3ac9b835421a +Subproject commit f8cd6a082bb9c228397a0436f28818b74d8e9636 From fd587b673e51820d82cfeec142048a97faeafd05 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 31 Jan 2018 23:44:45 +0200 Subject: [PATCH 0156/1110] ctr/3ds: attempt to improve cache flushing --- platform/libretro/3ds/3ds_utils.c | 45 +++++++++---------------------- platform/libretro/3ds/3ds_utils.h | 3 --- 2 files changed, 13 insertions(+), 35 deletions(-) diff --git a/platform/libretro/3ds/3ds_utils.c b/platform/libretro/3ds/3ds_utils.c index e0f76ca2..7fe47639 100644 --- a/platform/libretro/3ds/3ds_utils.c +++ b/platform/libretro/3ds/3ds_utils.c @@ -20,48 +20,29 @@ static void ctr_enable_all_svc_kernel(void) svc_access_control[3]=0x3FFFFFFF; } - -static void ctr_invalidate_ICache_kernel(void) -{ - __asm__ volatile( - "cpsid aif\n\t" - "mov r0, #0\n\t" - "mcr p15, 0, r0, c7, c5, 0\n\t"); -} - -static void ctr_flush_DCache_kernel(void) -{ - __asm__ volatile( - "cpsid aif\n\t" - "mov r0, #0\n\t" - "mcr p15, 0, r0, c7, c10, 0\n\t"); - -} - - static void ctr_enable_all_svc(void) { svcBackdoor((ctr_callback_type)ctr_enable_all_svc_kernel); } -void ctr_invalidate_ICache(void) +static void ctr_clean_invalidate_kernel(void) { -// __asm__ volatile("svc 0x2E\n\t"); - svcBackdoor((ctr_callback_type)ctr_invalidate_ICache_kernel); - + __asm__ volatile( + "mrs r1, cpsr\n" + "cpsid aif\n" // disable interrupts + "mov r0, #0\n" + "mcr p15, 0, r0, c7, c10, 0\n" // clean dcache + "mcr p15, 0, r0, c7, c10, 4\n" // DSB + "mcr p15, 0, r0, c7, c5, 0\n" // invalidate icache+BTAC + "msr cpsr_cx, r1\n" // restore interrupts + ::: "r0", "r1"); } -void ctr_flush_DCache(void) -{ -// __asm__ volatile("svc 0x4B\n\t"); - svcBackdoor((ctr_callback_type)ctr_flush_DCache_kernel); -} - - void ctr_flush_invalidate_cache(void) { - ctr_flush_DCache(); - ctr_invalidate_ICache(); +// __asm__ volatile("svc 0x2E\n\t"); +// __asm__ volatile("svc 0x4B\n\t"); + svcBackdoor((ctr_callback_type)ctr_clean_invalidate_kernel); } int ctr_svchack_init(void) diff --git a/platform/libretro/3ds/3ds_utils.h b/platform/libretro/3ds/3ds_utils.h index fe97985c..ae917b0e 100644 --- a/platform/libretro/3ds/3ds_utils.h +++ b/platform/libretro/3ds/3ds_utils.h @@ -1,9 +1,6 @@ #ifndef _3DS_UTILS_H #define _3DS_UTILS_H -void ctr_invalidate_ICache(void); -void ctr_flush_DCache(void); - void ctr_flush_invalidate_cache(void); int ctr_svchack_init(void); From 079bc1bf446e57d81ffaa87dab66b5e90f0f6f01 Mon Sep 17 00:00:00 2001 From: orbea Date: Sun, 1 Apr 2018 19:43:22 -0700 Subject: [PATCH 0157/1110] Makefile: Build with optimizations if DEBUG=0 --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 78fa5386..34f5d403 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,8 @@ TARGET ?= PicoDrive +DEBUG ?= 0 CFLAGS += -Wall -g CFLAGS += -I. -ifndef DEBUG +ifeq "$(DEBUG)" "0" CFLAGS += -O3 -DNDEBUG endif From c7397eda552b329779e342538965eea47ca113c0 Mon Sep 17 00:00:00 2001 From: orbea Date: Mon, 2 Apr 2018 16:18:52 -0700 Subject: [PATCH 0158/1110] Makefile: Update one more DEBUG conditional --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 34f5d403..2903a689 100644 --- a/Makefile +++ b/Makefile @@ -223,7 +223,7 @@ pico/cd/gfx_cd.o: CFLAGS += -fno-strict-aliasing # on x86, this is reduced by ~300MB when debug info is off (but not on ARM) # not using O3 and -fno-expensive-optimizations seems to also help, but you may # want to remove this stuff for better performance if your compiler can handle it -ifndef DEBUG +ifeq "$(DEBUG)" "0" cpu/fame/famec.o: CFLAGS += -g0 -O2 -fno-expensive-optimizations endif From cdb923c40d4816469c2161b0f5127c2cfd8ec706 Mon Sep 17 00:00:00 2001 From: orbea Date: Tue, 3 Apr 2018 10:41:26 -0700 Subject: [PATCH 0159/1110] libretro: Allow setting GIT_VERSION. --- Makefile.libretro | 2 +- jni/Android.mk | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.libretro b/Makefile.libretro index 05e8bd8d..1e07d50f 100644 --- a/Makefile.libretro +++ b/Makefile.libretro @@ -26,7 +26,7 @@ CFLAGS ?= STATIC_LINKING:= 0 TARGET_NAME := picodrive LIBM := -lm -GIT_VERSION := " $(shell git rev-parse --short HEAD || echo unknown)" +GIT_VERSION ?= " $(shell git rev-parse --short HEAD || echo unknown)" ifneq ($(GIT_VERSION)," unknown") CFLAGS += -DGIT_VERSION=\"$(GIT_VERSION)\" endif diff --git a/jni/Android.mk b/jni/Android.mk index bc817026..9252d9f9 100644 --- a/jni/Android.mk +++ b/jni/Android.mk @@ -2,7 +2,7 @@ LOCAL_PATH := $(call my-dir) include $(CLEAR_VARS) -GIT_VERSION := " $(shell git rev-parse --short HEAD || echo unknown)" +GIT_VERSION ?= " $(shell git rev-parse --short HEAD || echo unknown)" ifneq ($(GIT_VERSION)," unknown") LOCAL_CFLAGS += -DGIT_VERSION=\"$(GIT_VERSION)\" endif From 7ddd8501a72bd53a98083e7137a2e26f7d615a7f Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 25 Jan 2019 01:31:56 +0200 Subject: [PATCH 0160/1110] release 1.93 just because orbea wants a release tarball --- cpu/cyclone | 2 +- platform/common/version.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpu/cyclone b/cpu/cyclone index b889883d..5fc93bdd 160000 --- a/cpu/cyclone +++ b/cpu/cyclone @@ -1 +1 @@ -Subproject commit b889883d36b2d247488c82d79d1eaab4dd41d236 +Subproject commit 5fc93bddb71461abb7619cf506d6f15ba8a675f3 diff --git a/platform/common/version.h b/platform/common/version.h index ce4223b5..f65ba1ed 100644 --- a/platform/common/version.h +++ b/platform/common/version.h @@ -1 +1 @@ -#define VERSION "1.92" +#define VERSION "1.93" From c79d0bb90f98b1b3add067d87758b3269758078a Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 15 Mar 2019 20:51:51 +0100 Subject: [PATCH 0161/1110] fix gp2x compilation (using linaro arm gcc 4.7 on ubuntu) --- Makefile | 5 ++++- pico/sound/ym2612.h | 9 ++++----- platform/common/common.mak | 2 +- platform/gp2x/emu.c | 6 +++--- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/Makefile b/Makefile index 2903a689..0df41f5d 100644 --- a/Makefile +++ b/Makefile @@ -119,9 +119,12 @@ OBJS += platform/gp2x/vid_pollux.o OBJS += platform/gp2x/warm.o USE_FRONTEND = 1 PLATFORM_MP3 = 1 +PLATFORM_ZLIB = 1 +HAVE_ARMv6 = 0 endif ifeq "$(PLATFORM)" "libretro" OBJS += platform/libretro/libretro.o +PLATFORM_ZLIB = 1 endif ifeq "$(USE_FRONTEND)" "1" @@ -164,7 +167,7 @@ else OBJS += platform/common/mp3_dummy.o endif -ifeq "$(PLATFORM)" "libretro" +ifeq "$(PLATFORM_ZLIB)" "1" # zlib OBJS += zlib/gzio.o zlib/inffast.o zlib/inflate.o zlib/inftrees.o zlib/trees.o \ zlib/deflate.o zlib/crc32.o zlib/adler32.o zlib/zutil.o zlib/compress.o zlib/uncompr.o diff --git a/pico/sound/ym2612.h b/pico/sound/ym2612.h index a2921b22..bbe6b1a4 100644 --- a/pico/sound/ym2612.h +++ b/pico/sound/ym2612.h @@ -176,20 +176,19 @@ int YM2612PicoStateLoad2(int *tat, int *tbt); #else /* GP2X specific */ #include "../../platform/gp2x/940ctl.h" -extern int PicoIn.opt; #define YM2612Init(baseclock,rate) { \ - if (PicoIn.opt&0x200) YM2612Init_940(baseclock, rate); \ + if (PicoIn.opt&POPT_EXT_FM) YM2612Init_940(baseclock, rate); \ else YM2612Init_(baseclock, rate); \ } #define YM2612ResetChip() { \ - if (PicoIn.opt&0x200) YM2612ResetChip_940(); \ + if (PicoIn.opt&POPT_EXT_FM) YM2612ResetChip_940(); \ else YM2612ResetChip_(); \ } #define YM2612UpdateOne(buffer,length,stereo,is_buf_empty) \ - (PicoIn.opt&0x200) ? YM2612UpdateOne_940(buffer, length, stereo, is_buf_empty) : \ + (PicoIn.opt&POPT_EXT_FM) ? YM2612UpdateOne_940(buffer, length, stereo, is_buf_empty) : \ YM2612UpdateOne_(buffer, length, stereo, is_buf_empty); #define YM2612PicoStateLoad() { \ - if (PicoIn.opt&0x200) YM2612PicoStateLoad_940(); \ + if (PicoIn.opt&POPT_EXT_FM) YM2612PicoStateLoad_940(); \ else YM2612PicoStateLoad_(); \ } #endif /* __GP2X__ */ diff --git a/platform/common/common.mak b/platform/common/common.mak index 89e46051..e329c247 100644 --- a/platform/common/common.mak +++ b/platform/common/common.mak @@ -181,7 +181,7 @@ $(FR)cpu/cyclone/Cyclone.h: $(FR)cpu/cyclone/Cyclone.s: $(FR)cpu/$(CYCLONE_CONFIG) @echo building Cyclone... - @make -C $(R)cpu/cyclone/ CONFIG_FILE=../$(CYCLONE_CONFIG) + @make -C $(R)cpu/cyclone/ CONFIG_FILE=../$(CYCLONE_CONFIG) HAVE_ARMv6=$(HAVE_ARMv6) $(FR)cpu/cyclone/Cyclone.s: $(FR)cpu/cyclone/*.cpp $(FR)cpu/cyclone/*.h diff --git a/platform/gp2x/emu.c b/platform/gp2x/emu.c index 7e9a132f..18d8a57e 100644 --- a/platform/gp2x/emu.c +++ b/platform/gp2x/emu.c @@ -298,12 +298,12 @@ static int make_local_pal_md(int fast_mode) { int pallen = 0xc0; - bgr444_to_rgb32(localPal, Pico.cram); + bgr444_to_rgb32(localPal, PicoMem.cram); if (fast_mode) return 0x40; if (Pico.video.reg[0xC] & 8) { // shadow/hilight mode - bgr444_to_rgb32_sh(localPal, Pico.cram); + bgr444_to_rgb32_sh(localPal, PicoMem.cram); localPal[0xc0] = 0x0000c000; localPal[0xd0] = 0x00c00000; localPal[0xe0] = 0x00000000; // reserved pixels for OSD @@ -322,7 +322,7 @@ static int make_local_pal_md(int fast_mode) static int make_local_pal_sms(int fast_mode) { - unsigned short *spal = Pico.cram; + unsigned short *spal = PicoMem.cram; unsigned int *dpal = (void *)localPal; unsigned int i, t; From 340e528ff8cb08e8e0be64aa4ca82b2aa5b4de9a Mon Sep 17 00:00:00 2001 From: kub Date: Mon, 18 Mar 2019 23:14:07 +0100 Subject: [PATCH 0162/1110] make gp2x mp3 playback functional (need to unpack and compile helix decoder separately in platform/common/helix) --- Makefile | 4 +- platform/common/helix/Makefile | 42 ++++++++ platform/common/helix/lib.c | 122 ++++++++++++++++++++++++ platform/common/mp3.c | 27 ------ platform/common/mp3.h | 4 +- platform/common/mp3_helix.c | 37 ++++++- platform/common/mp3_sync.c | 27 ++++++ platform/gp2x/940ctl.c | 6 +- platform/gp2x/code940/940.c | 13 ++- platform/gp2x/code940/Makefile | 39 +++++--- platform/gp2x/code940/mp3test.c | 2 +- platform/gp2x/code940/uClibc/memset.s | 2 +- platform/gp2x/code940/uClibc/wrappers.c | 8 ++ 13 files changed, 274 insertions(+), 59 deletions(-) create mode 100644 platform/common/helix/Makefile create mode 100644 platform/common/helix/lib.c create mode 100644 platform/common/mp3_sync.c diff --git a/Makefile b/Makefile index 0df41f5d..dff08b2b 100644 --- a/Makefile +++ b/Makefile @@ -159,8 +159,10 @@ endif endif # USE_FRONTEND -OBJS += platform/common/mp3.o +OBJS += platform/common/mp3.o platform/common/mp3_sync.o ifeq "$(PLATFORM_MP3)" "1" +platform/common/mp3_helix.o: CFLAGS += -Iplatform/libpicofe +OBJS += platform/common/mp3_helix.o else ifeq "$(HAVE_LIBAVCODEC)" "1" OBJS += platform/common/mp3_libavcodec.o else diff --git a/platform/common/helix/Makefile b/platform/common/helix/Makefile new file mode 100644 index 00000000..0021ea8e --- /dev/null +++ b/platform/common/helix/Makefile @@ -0,0 +1,42 @@ +CROSS ?= arm-linux-gnueabi- + +CC = $(CROSS)gcc +AS = $(CROSS)as +AR = $(CROSS)ar +TOOLCHAIN = $(notdir $(CROSS)) + +CFLAGS += -Ipub -O2 -Wall -fstrict-aliasing -ffast-math +ifneq ($(findstring arm-,$(TOOLCHAIN)),) +CFLAGS += -mcpu=arm940t -mtune=arm940t -mfloat-abi=soft -mfpu=fpa -mabi=apcs-gnu -mno-thumb-interwork +ASFLAGS = -mcpu=arm940t -mfloat-abi=soft -mfpu=fpa -mabi=apcs-gnu +OBJS += real/arm/asmpoly_gcc.o +else +CFLAGS += -m32 +ASFLAGS += -m32 +OBJS += real/polyphase.o +endif + +LIB = $(TOOLCHAIN)helix_mp3.a +SHLIB = $(TOOLCHAIN)helix_mp3.so + +all: $(LIB) $(SHLIB) + + +OBJS += mp3dec.o mp3tabs.o +#OBJS += ipp/bitstream.o ipp/buffers.o ipp/dequant.o ipp/huffman.o ipp/imdct.o ipp/subband.o +OBJS += real/bitstream.o real/buffers.o real/dct32.o real/dequant.o real/dqchan.o real/huffman.o +OBJS += real/hufftabs.o real/imdct.o real/scalfact.o real/stproc.o real/subband.o real/trigtabs.o + +OBJS += lib.o + +real/arm/asmpoly_gcc.o: real/arm/asmpoly_gcc.s + $(CC) -o $@ $(ASFLAGS) -c $< + +$(LIB) : $(OBJS) + $(AR) r $@ $^ +$(SHLIB) : $(OBJS) /home/build/opt/open2x/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1/libgcc.a + $(CC) -o $@ -nostdlib -shared $(CFLAGS) $^ + +clean: + $(RM) -f $(OBJS) + diff --git a/platform/common/helix/lib.c b/platform/common/helix/lib.c new file mode 100644 index 00000000..d7c511be --- /dev/null +++ b/platform/common/helix/lib.c @@ -0,0 +1,122 @@ +#include +#include + +// libgcc has this with gcc 4.x +void raise(int sig) +{ +} + +// very limited heap functions for helix decoder + +static char heap[65000] __attribute__((aligned(16))); +static long heap_offs; + +void __malloc_init(void) +{ + heap_offs = 0; +} + +void *malloc(size_t size) +{ + void *chunk = heap + heap_offs; + size = (size+15) & ~15; + if (heap_offs + size > sizeof(heap)) + return NULL; + else { + heap_offs += size; + return chunk; + } +} + +void free(void *chunk) +{ + if (chunk == heap) + heap_offs = 0; +} + +#if 0 +void *memcpy (void *dest, const void *src, size_t n) +{ + char *_dest = dest; + const char *_src = src; + while (n--) *_dest++ = *_src++; + return dest; +} + +void *memmove (void *dest, const void *src, size_t n) +{ + char *_dest = dest+n; + const char *_src = src+n; + if (dest <= src || dest >= _src) + return memcpy(dest, src, n); + while (n--) *--_dest = *--_src; + return dest; +} +#else +/* memcpy/memmove in C with some simple optimizations. + * ATTN does dirty aliasing tricks with undefined behaviour by standard. + * (this works fine with gcc, though...) + */ +void *memcpy(void *dest, const void *src, size_t n) +{ + struct _16 { uint32_t a[4]; }; + union { const void *v; char *c; uint64_t *l; struct _16 *s; } + ss = { src }, ds = { dest }; + const int lm = sizeof(uint32_t)-1; + + if ((((unsigned)ss.c ^ (unsigned)ds.c) & lm) == 0) { + /* fast copy if pointers have the same aligment */ + while (((unsigned)ss.c & lm) && n > 0) /* align to word */ + *ds.c++ = *ss.c++, n--; + while (n >= sizeof(struct _16)) /* copy 16 bytes blocks */ + *ds.s++ = *ss.s++, n -= sizeof(struct _16); + if (n >= sizeof(uint64_t)) /* copy leftover 8 byte block */ + *ds.l++ = *ss.l++, n -= sizeof(uint64_t); + } else { + /* byte copy if pointers are unaligned */ + while (n >= 8) { /* copy 8 byte blocks */ + *ds.c++ = *ss.c++, n--; *ds.c++ = *ss.c++, n--; + *ds.c++ = *ss.c++, n--; *ds.c++ = *ss.c++, n--; + *ds.c++ = *ss.c++, n--; *ds.c++ = *ss.c++, n--; + *ds.c++ = *ss.c++, n--; *ds.c++ = *ss.c++, n--; + } + } + /* copy max. 8 leftover bytes */ + while (n > 0) + *ds.c++ = *ss.c++, n--; + return dest; +} + +void *memmove (void *dest, const void *src, size_t n) +{ + struct _16 { uint32_t a[4]; }; + union { const void *v; char *c; uint64_t *l; struct _16 *s; } + ss = { src+n }, ds = { dest+n }; + const int lm = sizeof(uint32_t)-1; + + if (dest <= src || dest >= src+n) + return memcpy(dest, src, n); + + if ((((unsigned)ss.c ^ (unsigned)ds.c) & lm) == 0) { + /* fast copy if pointers have the same aligment */ + while (((unsigned)ss.c & lm) && n > 0) + *--ds.c = *--ss.c, n--; + while (n >= sizeof(struct _16)) + *--ds.s = *--ss.s, n -= sizeof(struct _16); + if (n >= sizeof(uint64_t)) + *--ds.l = *--ss.l, n -= sizeof(uint64_t); + } else { + /* byte copy if pointers are unaligned */ + while (n >= 8) { + *--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--; + *--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--; + *--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--; + *--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--; + } + } + /* copy max. 8 leftover bytes */ + while (n > 0) + *--ds.c = *--ss.c, n--; + return dest; +} +#endif diff --git a/platform/common/mp3.c b/platform/common/mp3.c index c84962cc..346e0195 100644 --- a/platform/common/mp3.c +++ b/platform/common/mp3.c @@ -21,33 +21,6 @@ unsigned short mpeg1_l3_bitrates[16] = { 0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320 }; -int mp3_find_sync_word(const unsigned char *buf, int size) -{ - const unsigned char *p, *pe; - - /* find byte-aligned syncword - need 12 (MPEG 1,2) or 11 (MPEG 2.5) matching bits */ - for (p = buf, pe = buf + size - 3; p <= pe; p++) - { - int pn; - if (p[0] != 0xff) - continue; - pn = p[1]; - if ((pn & 0xf8) != 0xf8 || // currently must be MPEG1 - (pn & 6) == 0) { // invalid layer - p++; continue; - } - pn = p[2]; - if ((pn & 0xf0) < 0x20 || (pn & 0xf0) == 0xf0 || // bitrates - (pn & 0x0c) != 0) { // not 44kHz - continue; - } - - return p - buf; - } - - return -1; -} - static int try_get_bitrate(unsigned char *buf, int buf_size) { int offs1, offs = 0; diff --git a/platform/common/mp3.h b/platform/common/mp3.h index eb66db88..4a2b230b 100644 --- a/platform/common/mp3.h +++ b/platform/common/mp3.h @@ -12,8 +12,8 @@ int mp3dec_decode(FILE *f, int *file_pos, int file_len); extern unsigned short mpeg1_l3_bitrates[16]; #ifdef __GP2X__ -void mp3_update_local(int *buffer, int length, int stereo); -void mp3_start_play_local(void *f, int pos); +int _mp3dec_start(FILE *f, int fpos_start); +int _mp3dec_decode(FILE *f, int *file_pos, int file_len); #endif #endif // __COMMON_MP3_H__ diff --git a/platform/common/mp3_helix.c b/platform/common/mp3_helix.c index b2785298..75be8df3 100644 --- a/platform/common/mp3_helix.c +++ b/platform/common/mp3_helix.c @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -20,10 +21,15 @@ static HMP3Decoder mp3dec; static unsigned char mp3_input_buffer[2 * 1024]; #ifdef __GP2X__ -#define mp3_update mp3_update_local -#define mp3_start_play mp3_start_play_local +#define mp3dec_decode _mp3dec_decode +#define mp3dec_start _mp3dec_start #endif +static void *libhelix; +HMP3Decoder (*p_MP3InitDecoder)(void); +void (*p_MP3FreeDecoder)(HMP3Decoder); +int (*p_MP3Decode)(HMP3Decoder, unsigned char **, int *, short *, int); + int mp3dec_decode(FILE *f, int *file_pos, int file_len) { unsigned char *readPtr; @@ -51,7 +57,7 @@ int mp3dec_decode(FILE *f, int *file_pos, int file_len) bytesLeft -= offset; had_err = err; - err = MP3Decode(mp3dec, &readPtr, &bytesLeft, cdda_out_buffer, 0); + err = p_MP3Decode(mp3dec, &readPtr, &bytesLeft, cdda_out_buffer, 0); if (err) { if (err == ERR_MP3_MAINDATA_UNDERFLOW && !had_err) { // just need another frame @@ -86,10 +92,31 @@ int mp3dec_decode(FILE *f, int *file_pos, int file_len) int mp3dec_start(FILE *f, int fpos_start) { + if (libhelix == NULL) { + libhelix = dlopen("./libhelix.so", RTLD_NOW); + if (libhelix == NULL) { + lprintf("mp3dec: load libhelix.so: %s\n", dlerror()); + return -1; + } + + p_MP3InitDecoder = dlsym(libhelix, "MP3InitDecoder"); + p_MP3FreeDecoder = dlsym(libhelix, "MP3FreeDecoder"); + p_MP3Decode = dlsym(libhelix, "MP3Decode"); + + if (p_MP3InitDecoder == NULL || p_MP3FreeDecoder == NULL + || p_MP3Decode == NULL) + { + lprintf("mp3dec: missing symbol(s) in libhelix.so\n"); + dlclose(libhelix); + libhelix = NULL; + return -1; + } + } + // must re-init decoder for new track if (mp3dec) - MP3FreeDecoder(mp3dec); - mp3dec = MP3InitDecoder(); + p_MP3FreeDecoder(mp3dec); + mp3dec = p_MP3InitDecoder(); return (mp3dec == 0) ? -1 : 0; } diff --git a/platform/common/mp3_sync.c b/platform/common/mp3_sync.c new file mode 100644 index 00000000..509c259d --- /dev/null +++ b/platform/common/mp3_sync.c @@ -0,0 +1,27 @@ + +int mp3_find_sync_word(const unsigned char *buf, int size) +{ + const unsigned char *p, *pe; + + /* find byte-aligned syncword - need 12 (MPEG 1,2) or 11 (MPEG 2.5) matching bits */ + for (p = buf, pe = buf + size - 3; p <= pe; p++) + { + int pn; + if (p[0] != 0xff) + continue; + pn = p[1]; + if ((pn & 0xf8) != 0xf8 || // currently must be MPEG1 + (pn & 6) == 0) { // invalid layer + p++; continue; + } + pn = p[2]; + if ((pn & 0xf0) < 0x20 || (pn & 0xf0) == 0xf0 || // bitrates + (pn & 0x0c) != 0) { // not 44kHz + continue; + } + + return p - buf; + } + + return -1; +} diff --git a/platform/gp2x/940ctl.c b/platform/gp2x/940ctl.c index c270bfee..31408d05 100644 --- a/platform/gp2x/940ctl.c +++ b/platform/gp2x/940ctl.c @@ -425,8 +425,7 @@ int YM2612UpdateOne_940(int *buffer, int length, int stereo, int is_buf_empty) int mp3dec_decode(FILE *f, int *file_pos, int file_len) { if (!(PicoIn.opt & POPT_EXT_FM)) { - //mp3_update_local(buffer, length, stereo); - return 0; + return _mp3dec_decode(f, file_pos, file_len); } // check if playback was started, track not ended @@ -457,8 +456,7 @@ int mp3dec_decode(FILE *f, int *file_pos, int file_len) int mp3dec_start(FILE *f, int fpos_start) { if (!(PicoIn.opt & POPT_EXT_FM)) { - //mp3_start_play_local(f, pos); - return -1; + return _mp3dec_start(f, fpos_start); } if (loaded_mp3 != f) diff --git a/platform/gp2x/code940/940.c b/platform/gp2x/code940/940.c index 760816eb..f79db1e5 100644 --- a/platform/gp2x/code940/940.c +++ b/platform/gp2x/code940/940.c @@ -2,7 +2,7 @@ // (c) Copyright 2006-2007, Grazvydas "notaz" Ignotas #include "940shared.h" -#include "../../common/mp3.h" +#include "../../common/helix/pub/mp3dec.h" static _940_data_t *shared_data = (_940_data_t *) 0x00100000; static _940_ctl_t *shared_ctl = (_940_ctl_t *) 0x00200000; @@ -19,7 +19,7 @@ void drain_wb(void); // is changed by other core just before we update it void set_if_not_changed(int *val, int oldval, int newval); -void _memcpy(void *dst, const void *src, int count); +extern void *memcpy(void *dest, const void *src, unsigned long n); // asm volatile ("mov r0, #0" ::: "r0"); // asm volatile ("mcr p15, 0, r0, c7, c6, 0" ::: "r0"); /* flush dcache */ @@ -153,6 +153,8 @@ void Main940(void) int job = 0; ym2612_940 = &shared_data->ym2612; +// extern unsigned __bss_start__, __bss_end__; +// memset(&__bss_start__, 0, &__bss_end__ - &__bss_start__); for (;;) { @@ -167,6 +169,7 @@ void Main940(void) shared_ctl->writebuff0[0] = shared_ctl->writebuff1[0] = 0xffff; YM2612Init_(shared_ctl->baseclock, shared_ctl->rate); /* Helix mp3 decoder */ + __malloc_init(); shared_data->mp3dec = MP3InitDecoder(); break; @@ -185,7 +188,7 @@ void Main940(void) case JOB940_PICOSTATESAVE2: YM2612PicoStateSave2(0, 0); - _memcpy(shared_ctl->writebuff0, ym2612_940->REGS, 0x200); + memcpy(shared_ctl->writebuff0, ym2612_940->REGS, 0x200); break; case JOB940_PICOSTATELOAD2_PREP: @@ -193,7 +196,7 @@ void Main940(void) break; case JOB940_PICOSTATELOAD2: - _memcpy(ym2612_940->REGS, shared_ctl->writebuff0, 0x200); + memcpy(ym2612_940->REGS, shared_ctl->writebuff0, 0x200); YM2612PicoStateLoad2(0, 0); break; @@ -207,6 +210,7 @@ void Main940(void) case JOB940_MP3RESET: if (shared_data->mp3dec) MP3FreeDecoder(shared_data->mp3dec); + __malloc_init(); shared_data->mp3dec = MP3InitDecoder(); break; } @@ -215,4 +219,3 @@ void Main940(void) dcache_clean(); } } - diff --git a/platform/gp2x/code940/Makefile b/platform/gp2x/code940/Makefile index e327d136..8561551b 100644 --- a/platform/gp2x/code940/Makefile +++ b/platform/gp2x/code940/Makefile @@ -1,17 +1,23 @@ # you may or may not need to change this -#devkit_path = x:/stuff/dev/devkitgp2x/ -devkit_path ?= $(HOME)/opt/devkitGP2X/ -lgcc_path = $(devkit_path)lib/gcc/arm-linux/4.0.3/ -CROSS = arm-linux- +#devkit_path ?= $(HOME)/opt/devkitGP2X/ +#lgcc_path = $(devkit_path)lib/gcc/arm-linux/4.0.3/ #CROSS = $(devkit_path)bin/arm-linux- +#devkit_path ?= $(HOME)/opt/open2x +#lgcc_path = $(devkit_path)/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1/ +#CROSS ?= $(devkit_path)/gcc-4.1.1-glibc-2.3.6/bin/arm-open2x-linux- +#devkit_path ?= $(HOME)/opt/arm-unknown-linux-gnu +#lgcc_path = $(HOME)/opt/open2x/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1/ +#CROSS ?= $(devkit_path)/bin/arm-unknown-linux-gnu- +lgcc_path = $(HOME)/opt/open2x/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1/ +CROSS ?= arm-linux-gnueabi- # settings #up = 1 -CFLAGS += -O2 -Wall -fomit-frame-pointer -fstrict-aliasing -ffast-math -CFLAGS += -I../.. -I. -D__GP2X__ -DARM -CFLAGS += -mcpu=arm940t -mtune=arm940t -LDFLAGS = -static -s -e code940 -Ttext 0x0 -L$(lgcc_path) -lgcc +CFLAGS += -O2 -Wall -mno-thumb-interwork -fstrict-aliasing -ffast-math +CFLAGS += -I../../common/helix/pub -I../../.. -I. -D__GP2X__ -DARM +CFLAGS += -mcpu=arm940t -mtune=arm940t -mabi=apcs-gnu -mfloat-abi=soft -mfpu=fpa +LDFLAGS = -static -e code940 -Ttext 0x0 -L$(lgcc_path) -lgcc GCC = $(CROSS)gcc STRIP = $(CROSS)strip @@ -36,7 +42,9 @@ all: $(BIN) # stuff for 940 core # init, emu_control, emu -OBJS940 += 940init.o 940.o 940ym2612.o memcpy.o misc_arm.o mp3.o +OBJS940 += 940init.o 940.o 940ym2612.o misc_arm.o mp3_sync.o +# the asm memcpy code crashes job LOAD2 on 940. Possibly a globbered reg? +# OBJS940 += memcpy.o # the asm code seems to be faster when run on 920, but not on 940 for some reason # OBJS940 += ../../Pico/sound/ym2612_asm.o @@ -44,12 +52,13 @@ OBJS940 += 940init.o 940.o 940ym2612.o memcpy.o misc_arm.o mp3.o OBJS940 += uClibc/memset.o uClibc/s_floor.o uClibc/e_pow.o uClibc/e_sqrt.o uClibc/s_fabs.o OBJS940 += uClibc/s_scalbn.o uClibc/s_copysign.o uClibc/k_sin.o uClibc/k_cos.o uClibc/s_sin.o OBJS940 += uClibc/e_rem_pio2.o uClibc/k_rem_pio2.o uClibc/e_log.o uClibc/wrappers.o +LIBHELIX ?= ../../common/helix/$(notdir $(CROSS))helix_mp3.a $(BIN) : code940.elf @echo ">>>" $@ $(OBJCOPY) -O binary $< $@ -code940.elf : $(OBJS940) ../../common/helix/$(CROSS)helix-mp3.a +code940.elf : $(OBJS940) $(LIBHELIX) @echo ">>>" $@ $(LD) $^ $(LDFLAGS) -o $@ -Map code940.map @@ -64,8 +73,12 @@ misc_arm.o : ../../../pico/misc_arm.s @echo ">>>" $@ $(GCC) $(CFLAGS) -DEXTERNAL_YM2612 -c $< -o $@ -../../common/helix/helix_mp3.a: - @make -C ../../common/helix/ +mp3_sync.o: ../../common/mp3_sync.c + @echo ">>>" $@ + $(GCC) $(CFLAGS) -Os -DCODE940 -c $< -o $@ + +$(LIBHELIX): + @$(MAKE) -C ../../common/helix/ CROSS=$(CROSS) up: $(BIN) @@ -82,7 +95,7 @@ tidy: ## OBJSMP3T = mp3test.o ../gp2x.o ../asmutils.o ../usbjoy.o -mp3test.gpe : $(OBJSMP3T) ../helix/helix_mp3.a +mp3test.gpe : $(OBJSMP3T) $(LIBHELIX) $(GCC) -static -o $@ $^ $(STRIP) $@ @cp -v $@ /mnt/gp2x/mnt/sd diff --git a/platform/gp2x/code940/mp3test.c b/platform/gp2x/code940/mp3test.c index 9072d858..cd2a6651 100644 --- a/platform/gp2x/code940/mp3test.c +++ b/platform/gp2x/code940/mp3test.c @@ -13,7 +13,7 @@ //#include "emu.h" //#include "menu.h" #include "../asmutils.h" -#include "../helix/pub/mp3dec.h" +#include "../../helix/pub/mp3dec.h" /* we will need some gp2x internals here */ extern volatile unsigned short *gp2x_memregs; /* from minimal library rlyeh */ diff --git a/platform/gp2x/code940/uClibc/memset.s b/platform/gp2x/code940/uClibc/memset.s index 0923014c..80cdcb58 100644 --- a/platform/gp2x/code940/uClibc/memset.s +++ b/platform/gp2x/code940/uClibc/memset.s @@ -22,7 +22,7 @@ .text .global memset .type memset,%function - .align 4 + .align 2 memset: mov a4, a1 diff --git a/platform/gp2x/code940/uClibc/wrappers.c b/platform/gp2x/code940/uClibc/wrappers.c index cc4e269e..ce95a48c 100644 --- a/platform/gp2x/code940/uClibc/wrappers.c +++ b/platform/gp2x/code940/uClibc/wrappers.c @@ -4,9 +4,17 @@ double pow(double x, double y) { return __ieee754_pow(x, y); } +double __pow_finite(double x, double y) +{ + return __ieee754_pow(x, y); +} double log(double x) { return __ieee754_log(x); } +double __log_finite(double x) +{ + return __ieee754_log(x); +} From 2c479106af9fd2a037c9eb6118ed0909c0fc1400 Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 20 Mar 2019 19:07:16 +0100 Subject: [PATCH 0163/1110] arm asm syntax fixes for open2x --- Makefile | 5 +++-- pico/carthw/svp/stub_arm.S | 16 ++++++++-------- pico/videoport.c | 6 ------ 3 files changed, 11 insertions(+), 16 deletions(-) diff --git a/Makefile b/Makefile index dff08b2b..95d4b5d8 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,9 @@ TARGET ?= PicoDrive DEBUG ?= 0 -CFLAGS += -Wall -g +CFLAGS += -Wall -ggdb -ffunction-sections -fdata-sections CFLAGS += -I. ifeq "$(DEBUG)" "0" -CFLAGS += -O3 -DNDEBUG +CFLAGS += -O2 -finline-functions -DNDEBUG endif # This is actually needed, bevieve me. @@ -11,6 +11,7 @@ endif ifndef NO_ALIGN_FUNCTIONS CFLAGS += -falign-functions=2 endif +LDFLAGS += -Wl,--gc-sections all: config.mak target_ diff --git a/pico/carthw/svp/stub_arm.S b/pico/carthw/svp/stub_arm.S index 9d5c5fa1..736d459b 100644 --- a/pico/carthw/svp/stub_arm.S +++ b/pico/carthw/svp/stub_arm.S @@ -8,7 +8,7 @@ #include "../../arm_features.h" -.syntax unified +@.syntax unified .text .align 2 @@ -281,8 +281,8 @@ ssp_hle_902_loop: bgt ssp_hle_902_loop tst r12, #1 - ldrhne r0, [r2], #2 - strhne r0, [r3], #2 + ldrneh r0, [r2], #2 + strneh r0, [r3], #2 ldr r0, [r7, #SSP_OFFS_IRAM_ROM] add r1, r7, #0x200 @@ -501,7 +501,7 @@ FUNCTION(ssp_hle_07_036): mov r12, #0x4000 orr r12,r12,#0x0018 subs r12,r3, r12 - subsne r12,r12,#0x0400 + subnes r12,r12,#0x0400 blne tr_unhandled orr r2, r2, r2, lsl #16 @@ -510,7 +510,7 @@ FUNCTION(ssp_hle_07_036): hle_07_036_no_ovrwr: tst r1, #2 - strhne r2, [r1], #0x3e @ align + strneh r2, [r1], #0x3e @ align subne r0, r0, #1 subs r0, r0, #4 blt hle_07_036_l2 @@ -525,7 +525,7 @@ hle_07_036_l2: tst r0, #2 strne r2, [r1], #0x40 tst r0, #1 - strhne r2, [r1], #2 + strneh r2, [r1], #2 b hle_07_036_end_copy hle_07_036_ovrwr: @@ -562,10 +562,10 @@ hle_07_036_ol1: hle_07_036_ol2: tst r0, #1 - ldrhne r3, [r1] + ldrneh r3, [r1] andne r3, r3, r12 orrne r3, r3, r2 - strhne r3, [r1], #2 + strneh r3, [r1], #2 hle_07_036_end_copy: ldr r2, [r7, #SSP_OFFS_DRAM] diff --git a/pico/videoport.c b/pico/videoport.c index aadba24f..cd76dc04 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -14,12 +14,6 @@ extern const unsigned char hcounts_32[]; extern const unsigned char hcounts_40[]; -#ifndef UTYPES_DEFINED -typedef unsigned char u8; -typedef unsigned short u16; -typedef unsigned int u32; -#define UTYPES_DEFINED -#endif int (*PicoDmaHook)(unsigned int source, int len, unsigned short **base, unsigned int *mask) = NULL; From e0396782f39758bf6e917410b90280b56f611be5 Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 20 Mar 2019 19:08:29 +0100 Subject: [PATCH 0164/1110] config templates for gp2x, caanoo, dingux either with system toolchain (open2x,gph,opendingux) or ubuntu arm(gcc 4.7 is highest possible),mips --- config.caanoo | 16 ++++++++++++++++ config.caanoo47 | 16 ++++++++++++++++ config.dingux | 16 ++++++++++++++++ config.dingux54 | 16 ++++++++++++++++ config.gp2x | 16 ++++++++++++++++ config.gp2x47 | 16 ++++++++++++++++ 6 files changed, 96 insertions(+) create mode 100644 config.caanoo create mode 100644 config.caanoo47 create mode 100644 config.dingux create mode 100644 config.dingux54 create mode 100644 config.gp2x create mode 100644 config.gp2x47 diff --git a/config.caanoo b/config.caanoo new file mode 100644 index 00000000..8e62573c --- /dev/null +++ b/config.caanoo @@ -0,0 +1,16 @@ +# Automatically generated by configure +# Configured with: './configure' '--platform=gp2x' +CC = arm-gph-linux-gnueabi-gcc +CXX = arm-gph-linux-gnueabi-g++ +AS = arm-gph-linux-gnueabi-as +STRIP = arm-gph-linux-gnueabi-strip +CFLAGS += -mfloat-abi=soft -mcpu=arm920t -mtune=arm920t -fno-stack-protector -D__GP2X__ -DGPERF +CFLAGS += -fno-gcse -funswitch-loops -fweb -ftree-loop-im #-fpredictive-commoning -ftree-loop-distribution -frename-registers +CFLAGS += -I/home/build/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/include -I/home/build/src/gp2x/armroot-eabi/include +ASFLAGS += -mfloat-abi=soft -mcpu=arm920t +LDFLAGS += -B/home/build/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/lib/gcc/arm-gph-linux-gnueabi/4.2.4 -B/home/build/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L/home/build/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L/home/build/src/gp2x/armroot-eabi/lib -static +LDLIBS += -lpng -lm -ldl -lprofiler -lstdc++ + +ARCH = arm +PLATFORM = gp2x +SOUND_DRIVERS = oss diff --git a/config.caanoo47 b/config.caanoo47 new file mode 100644 index 00000000..f3efde0f --- /dev/null +++ b/config.caanoo47 @@ -0,0 +1,16 @@ +# Automatically generated by configure +# Configured with: './configure' '--platform=gp2x' +CC = arm-linux-gnueabi-gcc +CXX = arm-linux-gnueabi-g++ +AS = arm-linux-gnueabi-as +STRIP = arm-linux-gnueabi-strip +CFLAGS += -mfloat-abi=soft -mcpu=arm920t -mtune=arm920t -Wno-unused-result -fno-stack-protector -D__GP2X__ +CFLAGS += -fno-gcse -funswitch-loops -fweb -ftree-loop-im #-fpredictive-commoning -ftree-loop-distribution -frename-registers +CFLAGS += -I/home/build/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/include -I/home/build/src/gp2x/armroot-eabi/include +ASFLAGS += -mfloat-abi=soft -mcpu=arm920t +LDFLAGS += -B/home/build/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/lib/gcc/arm-gph-linux-gnueabi/4.2.4 -B/home/build/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L/home/build/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -static +LDLIBS += -lpng -lm -ldl + +ARCH = arm +PLATFORM = gp2x +SOUND_DRIVERS = oss diff --git a/config.dingux b/config.dingux new file mode 100644 index 00000000..6611991c --- /dev/null +++ b/config.dingux @@ -0,0 +1,16 @@ +# Automatically generated by configure +# Configured with: './configure' '--platform=generic' +CC = mipsel-linux-gcc +CXX = mipsel-linux-g++ +AS = mipsel-linux-as +STRIP = mipsel-linux-strip +CFLAGS += -I/home/build/opt/opendingux-toolchain/usr/include/ +CFLAGS += -I/home/build/opt/opendingux-toolchain/usr/include/SDL +CFLAGS += -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -fno-stack-protector +ASFLAGS += +LDFLAGS += +LDLIBS += -B/home/build/opt/opendingux-toolchain/usr/lib -Wl,-rpath-link=/home/build/opt/opendingux-toolchain/usr/lib -Wl,-rpath-link=/home/build/opt/opendingux-toolchain/lib -lSDL -lasound -lpng -lm -lstdc++ -ldl + +ARCH = mipsel +PLATFORM = opendingux +SOUND_DRIVERS = sdl diff --git a/config.dingux54 b/config.dingux54 new file mode 100644 index 00000000..96e55014 --- /dev/null +++ b/config.dingux54 @@ -0,0 +1,16 @@ +# Automatically generated by configure +# Configured with: './configure' '--platform=generic' +CC = mipsel-linux-gnu-gcc +CXX = mipsel-linux-gnu-g++ +AS = mipsel-linux-gnu-as +STRIP = mipsel-linux-gnu-strip +CFLAGS += -I/home/build/opt/opendingux-toolchain/usr/include/ +CFLAGS += -I/home/build/opt/opendingux-toolchain/usr/include/SDL +CFLAGS += -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -fno-stack-protector +ASFLAGS += +LDFLAGS += +LDLIBS += -B/home/build/opt/opendingux-toolchain/usr/lib -B/home/build/opt/opendingux-toolchain/lib -Wl,-rpath-link=/home/build/opt/opendingux-toolchain/usr/lib -Wl,-rpath-link=/home/build/opt/opendingux-toolchain/lib -lSDL -lasound -lpng -lz -lm -lstdc++ -ldl + +ARCH = mipsel +PLATFORM = opendingux +SOUND_DRIVERS = sdl diff --git a/config.gp2x b/config.gp2x new file mode 100644 index 00000000..de3e47c4 --- /dev/null +++ b/config.gp2x @@ -0,0 +1,16 @@ +# Automatically generated by configure +# Configured with: './configure' '--platform=gp2x' +CC = arm-open2x-linux-gcc +CXX = arm-open2x-linux-g++ +AS = arm-open2x-linux-as +STRIP = arm-open2x-linux-strip +CFLAGS += -msoft-float -mcpu=arm920t -mtune=arm920t -D__GP2X__ +CFLAGS += -I/home/build/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -I/home/build/src/gp2x/armroot/include +CFLAGS += -fno-gcse -funswitch-loops -fweb -ftree-loop-im #-fpredictive-commoning -ftree-loop-distribution -frename-registers +ASFLAGS += -mcpu=arm920t -mfloat-abi=soft +LDFLAGS += --sysroot /home/build/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux -L/home/build/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L/home/build/src/gp2x/armroot/lib -static +LDLIBS += -lpng -lm -ldl + +ARCH = arm +PLATFORM = gp2x +SOUND_DRIVERS = oss diff --git a/config.gp2x47 b/config.gp2x47 new file mode 100644 index 00000000..1022166d --- /dev/null +++ b/config.gp2x47 @@ -0,0 +1,16 @@ +# Automatically generated by configure +# Configured with: './configure' '--platform=gp2x' +CC = arm-linux-gnueabi-gcc +CXX = arm-linux-gnueabi-g++ +AS = arm-linux-gnueabi-as +STRIP = arm-linux-gnueabi-strip +CFLAGS += -mabi=apcs-gnu -mno-thumb-interwork -mfloat-abi=soft -mfpu=fpa -mcpu=arm920t -mtune=arm920t -Wno-unused-result -fno-stack-protector -D__GP2X__ +CFLAGS += -I/home/build/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -I/home/build/src/gp2x/armroot/include +CFLAGS += -fno-gcse -funswitch-loops -fweb -ftree-loop-im #-fpredictive-commoning -ftree-loop-distribution -frename-registers +ASFLAGS += -mabi=apcs-gnu -mfloat-abi=soft -mfpu=fpa -mcpu=arm920t +LDFLAGS += -mabi=apcs-gnu -mfpu=fpa -B/home/build/opt/open2x/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1 -B/home/build/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L/home/build/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L/home/build/src/gp2x/armroot/lib -static +LDLIBS += -lpng -lm -ldl + +ARCH = arm +PLATFORM = gp2x +SOUND_DRIVERS = oss From 78d817c37006a557174594071d6390987ea8f09c Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 20 Mar 2019 19:08:46 +0100 Subject: [PATCH 0165/1110] arm asm memory access functions for m/s68k --- pico/cd/memory.c | 2 + pico/cd/memory_arm.S | 113 +++++++++++++++++++++++++++++++++++++++---- pico/memory.c | 3 ++ pico/memory_arm.S | 96 ++++++++++++++++++++++++++++++++++++ 4 files changed, 205 insertions(+), 9 deletions(-) diff --git a/pico/cd/memory.c b/pico/cd/memory.c index 1c5dcf94..e6486840 100644 --- a/pico/cd/memory.c +++ b/pico/cd/memory.c @@ -14,12 +14,14 @@ uptr s68k_read16_map [0x1000000 >> M68K_MEM_SHIFT]; uptr s68k_write8_map [0x1000000 >> M68K_MEM_SHIFT]; uptr s68k_write16_map[0x1000000 >> M68K_MEM_SHIFT]; +#ifndef _ASM_CD_MEMORY_C MAKE_68K_READ8(s68k_read8, s68k_read8_map) MAKE_68K_READ16(s68k_read16, s68k_read16_map) MAKE_68K_READ32(s68k_read32, s68k_read16_map) MAKE_68K_WRITE8(s68k_write8, s68k_write8_map) MAKE_68K_WRITE16(s68k_write16, s68k_write16_map) MAKE_68K_WRITE32(s68k_write32, s68k_write16_map) +#endif // ----------------------------------------------------------------- diff --git a/pico/cd/memory_arm.S b/pico/cd/memory_arm.S index fe82ecb9..335f3624 100644 --- a/pico/cd/memory_arm.S +++ b/pico/cd/memory_arm.S @@ -178,9 +178,9 @@ m_m68k_read8_r02: bx lr m_m68k_read8_r03: add r1, r1, #0x110000 - push {r1, lr} + stmfd sp!, {r1, lr} bl m68k_comm_check - pop {r1, lr} + ldmfd sp!, {r1, lr} ldrb r0, [r1, #3] and r0, r0, #0xc7 bx lr @@ -219,10 +219,10 @@ m_m68k_read8_hi: add r1, r1, #0x110000 movge r0, #0 bxge lr - add r1, r0 - push {r1, lr} + add r1, r1, r0 + stmfd sp!, {r1, lr} bl m68k_comm_check - pop {r1, lr} + ldmfd sp!, {r1, lr} ldrb r0, [r1] bx lr @@ -275,9 +275,9 @@ m_m68k_read16_r00: bx lr m_m68k_read16_r02: add r1, r1, #0x110000 - push {r1, lr} + stmfd sp!, {r1, lr} bl m68k_comm_check - pop {r1, lr} + ldmfd sp!, {r1, lr} ldrb r2, [r1, #3] ldrb r0, [r1, #2] and r2, r2, #0xc7 @@ -307,9 +307,9 @@ m_m68k_read16_hi: bxge lr add r1, r0, r1 - push {r1, lr} + stmfd sp!, {r1, lr} bl m68k_comm_check - pop {r0, lr} + ldmfd sp!, {r0, lr} ldrh r0, [r0] mov r1, r0, lsr #8 and r0, r0, #0xff @@ -701,6 +701,101 @@ m_s68k_write16_regs_spec: @ special case strb r1, [r2, r0] @ if (a == 0xe) s68k_regs[0xf] = d; bx lr +.global s68k_read8 +.global s68k_read16 +.global s68k_write8 +.global s68k_write16 + +s68k_read8: + ldr r3, =s68k_read8_map + bic r0, r0, #0xff000000 + mov r2, r0, lsr #16 + ldr r3, [r3, r2, lsl #2] + eor r2, r0, #1 + movs r3, r3, lsl #1 + ldrccb r0, [r3, r2] + bxcc lr + bx r3 + +s68k_read16: + ldr r3, =s68k_read16_map + bic r0, r0, #0xff000000 + mov r2, r0, lsr #16 + ldr r3, [r3, r2, lsl #2] + bic r0, r0, #1 + movs r3, r3, lsl #1 + ldrcch r0, [r3, r0] + bxcc lr + bx r3 + +s68k_read32: + ldr r3, =s68k_read16_map + bic r0, r0, #0xff000000 + mov r2, r0, lsr #16 + ldr r3, [r3, r2, lsl #2] + bic r0, r0, #1 + movs r3, r3, lsl #1 + ldrcch r1, [r3, r0]! + ldrcch r0, [r3, #2] + orrcc r0, r0, r1, lsl #16 + bxcc lr + + stmfd sp!, {r0, r3, r4, lr} + mov lr, pc + bx r3 + ldmfd sp!, {r1, r3} + str r0, [sp] + add r0, r1, #2 + mov lr, pc + bx r3 + ldmfd sp!, {r1, lr} + mov r0, r0, lsl #16 + mov r1, r1, lsl #16 + orr r0, r1, r0, lsr #16 + bx lr + +s68k_write8: + ldr r3, =s68k_write8_map + bic r0, r0, #0xff000000 + mov r2, r0, lsr #16 + ldr r3, [r3, r2, lsl #2] + eor r2, r0, #1 + movs r3, r3, lsl #1 + strccb r1, [r3, r2] + bxcc lr + bx r3 + +s68k_write16: + ldr r3, =s68k_write16_map + bic r0, r0, #0xff000000 + mov r2, r0, lsr #16 + ldr r3, [r3, r2, lsl #2] + bic r0, r0, #1 + movs r3, r3, lsl #1 + strcch r1, [r3, r0] + bxcc lr + bx r3 + +s68k_write32: + ldr r3, =s68k_write16_map + bic r0, r0, #0xff000000 + mov r2, r0, lsr #16 + ldr r3, [r3, r2, lsl #2] + bic r0, r0, #1 + movs r3, r3, lsl #1 + movcc r2, r1, lsr #16 + strcch r2, [r3, r0]! + strcch r1, [r3, #2] + bxcc lr + + stmfd sp!, {r0, r1, r3, lr} + mov r1, r1, lsr #16 + mov lr, pc + bx r3 + ldmfd sp!, {r0, r1, r3, lr} + add r0, r0, #2 + bx r3 + .pool @ vim:filetype=armasm diff --git a/pico/memory.c b/pico/memory.c index a31a08e9..cc82f789 100644 --- a/pico/memory.c +++ b/pico/memory.c @@ -163,12 +163,14 @@ void m68k_map_unmap(int start_addr, int end_addr) m68k_write16_map[i] = (addr >> 1) | MAP_FLAG; } +#ifndef _ASM_MEMORY_C MAKE_68K_READ8(m68k_read8, m68k_read8_map) MAKE_68K_READ16(m68k_read16, m68k_read16_map) MAKE_68K_READ32(m68k_read32, m68k_read16_map) MAKE_68K_WRITE8(m68k_write8, m68k_write8_map) MAKE_68K_WRITE16(m68k_write16, m68k_write16_map) MAKE_68K_WRITE32(m68k_write32, m68k_write16_map) +#endif // ----------------------------------------------------------------- @@ -420,6 +422,7 @@ static u32 PicoRead8_sram(u32 a) d = EEPROM_read(); if (!(a & 1)) d >>= 8; + d &= 0xff; } else d = *(u8 *)(Pico.sv.data - Pico.sv.start + a); elprintf(EL_SRAMIO, "sram r8 [%06x] %02x @ %06x", a, d, SekPc); diff --git a/pico/memory_arm.S b/pico/memory_arm.S index bfe8ca10..117cea0b 100644 --- a/pico/memory_arm.S +++ b/pico/memory_arm.S @@ -59,6 +59,7 @@ m_read8_eeprom: ldmfd sp!,{r1,lr} tst r1, #1 moveq r0, r0, lsr #8 + and r0, r0, #0xff bx lr @@ -220,6 +221,101 @@ m_write16_not_z80ctl: strb r2, [r3, #OFS_Pico_m_sram_reg] bx lr +.global m68k_read8 +.global m68k_read16 +.global m68k_write8 +.global m68k_write16 + +m68k_read8: + ldr r3, =m68k_read8_map + bic r0, r0, #0xff000000 + mov r2, r0, lsr #16 + ldr r3, [r3, r2, lsl #2] + eor r2, r0, #1 + movs r3, r3, lsl #1 + ldrccb r0, [r3, r2] + bxcc lr + bx r3 + +m68k_read16: + ldr r3, =m68k_read16_map + bic r0, r0, #0xff000000 + mov r2, r0, lsr #16 + ldr r3, [r3, r2, lsl #2] + bic r0, r0, #1 + movs r3, r3, lsl #1 + ldrcch r0, [r3, r0] + bxcc lr + bx r3 + +m68k_read32: + ldr r3, =m68k_read16_map + bic r0, r0, #0xff000000 + mov r2, r0, lsr #16 + ldr r3, [r3, r2, lsl #2] + bic r0, r0, #1 + movs r3, r3, lsl #1 + ldrcch r1, [r3, r0]! + ldrcch r0, [r3, #2] + orrcc r0, r0, r1, lsl #16 + bxcc lr + + stmfd sp!, {r0, r3, r4, lr} + mov lr, pc + bx r3 + ldmfd sp!, {r1, r3} + str r0, [sp] + add r0, r1, #2 + mov lr, pc + bx r3 + ldmfd sp!, {r1, lr} + mov r0, r0, lsl #16 + mov r1, r1, lsl #16 + orr r0, r1, r0, lsr #16 + bx lr + +m68k_write8: + ldr r3, =m68k_write8_map + bic r0, r0, #0xff000000 + mov r2, r0, lsr #16 + ldr r3, [r3, r2, lsl #2] + eor r2, r0, #1 + movs r3, r3, lsl #1 + strccb r1, [r3, r2] + bxcc lr + bx r3 + +m68k_write16: + ldr r3, =m68k_write16_map + bic r0, r0, #0xff000000 + mov r2, r0, lsr #16 + ldr r3, [r3, r2, lsl #2] + bic r0, r0, #1 + movs r3, r3, lsl #1 + strcch r1, [r3, r0] + bxcc lr + bx r3 + +m68k_write32: + ldr r3, =m68k_write16_map + bic r0, r0, #0xff000000 + mov r2, r0, lsr #16 + ldr r3, [r3, r2, lsl #2] + bic r0, r0, #1 + movs r3, r3, lsl #1 + movcc r2, r1, lsr #16 + strcch r2, [r3, r0]! + strcch r1, [r3, #2] + bxcc lr + + stmfd sp!, {r0, r1, r3, lr} + mov r1, r1, lsr #16 + mov lr, pc + bx r3 + ldmfd sp!, {r0, r1, r3, lr} + add r0, r0, #2 + bx r3 + .pool @ vim:filetype=armasm From c1d15f7397aa32eaac6d20b452c969d84712e0e9 Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 20 Mar 2019 20:33:43 +0100 Subject: [PATCH 0166/1110] config for x86 (32 bit only, for SH2 drc), add/revive profiling --- Makefile | 8 +++++++- config.caanoo | 2 +- config.x86 | 15 +++++++++++++++ platform/common/common.mak | 4 ++++ platform/common/main.c | 7 +++++++ platform/linux/pprof.c | 39 +++++++++++++++++++++++++++++++++----- platform/linux/pprof.h | 31 ++++++++++++++++++++++-------- 7 files changed, 91 insertions(+), 15 deletions(-) create mode 100644 config.x86 diff --git a/Makefile b/Makefile index 95d4b5d8..4d3288d9 100644 --- a/Makefile +++ b/Makefile @@ -13,6 +13,10 @@ CFLAGS += -falign-functions=2 endif LDFLAGS += -Wl,--gc-sections +# profiling +pprof ?= 0 +gperf ?= 0 + all: config.mak target_ ifndef NO_CONFIG_MAK @@ -118,6 +122,8 @@ OBJS += platform/gp2x/emu.o OBJS += platform/gp2x/vid_mmsp2.o OBJS += platform/gp2x/vid_pollux.o OBJS += platform/gp2x/warm.o +OBJS += platform/gp2x/host_dasm.o +OBJS += cpu/sh2/mame/sh2dasm.o USE_FRONTEND = 1 PLATFORM_MP3 = 1 PLATFORM_ZLIB = 1 @@ -204,7 +210,7 @@ else endif pprof: platform/linux/pprof.c - $(CC) -O2 -ggdb -DPPROF -DPPROF_TOOL -I../../ -I. $^ -o $@ + $(CC) $(CFLAGS) -O2 -ggdb -DPPROF -DPPROF_TOOL -I../../ -I. $^ -o $@ $(LDFLAGS) $(LDLIBS) tools/textfilter: tools/textfilter.c make -C tools/ textfilter diff --git a/config.caanoo b/config.caanoo index 8e62573c..39edb5db 100644 --- a/config.caanoo +++ b/config.caanoo @@ -9,7 +9,7 @@ CFLAGS += -fno-gcse -funswitch-loops -fweb -ftree-loop-im #-fpredictive-commonin CFLAGS += -I/home/build/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/include -I/home/build/src/gp2x/armroot-eabi/include ASFLAGS += -mfloat-abi=soft -mcpu=arm920t LDFLAGS += -B/home/build/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/lib/gcc/arm-gph-linux-gnueabi/4.2.4 -B/home/build/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L/home/build/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L/home/build/src/gp2x/armroot-eabi/lib -static -LDLIBS += -lpng -lm -ldl -lprofiler -lstdc++ +LDLIBS += -lpng -lm -ldl ARCH = arm PLATFORM = gp2x diff --git a/config.x86 b/config.x86 new file mode 100644 index 00000000..24f9d209 --- /dev/null +++ b/config.x86 @@ -0,0 +1,15 @@ +# Automatically generated by configure +# Configured with: './configure' '--platform=generic' +CC = gcc +CXX = g++ +AS = as +STRIP = strip +CFLAGS += -I/usr/include/SDL -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -m32 #-DGPERF -pg +ASFLAGS += +LDFLAGS += -m32 #-pg +LDLIBS += -L$(HOME)/opt/binutils-i386/usr/lib/ -lbfd-2.24-multiarch -lopcodes-2.24-multiarch +LDLIBS += -L/usr/lib/i386-linux-gnu/debug -L/home/build/opt/lib32 -lSDL-1.2 -lasound -lpng -lz -lm -ldl + +ARCH = x86 +PLATFORM = generic +SOUND_DRIVERS = oss alsa sdl diff --git a/platform/common/common.mak b/platform/common/common.mak index e329c247..29945d48 100644 --- a/platform/common/common.mak +++ b/platform/common/common.mak @@ -40,6 +40,10 @@ ifeq "$(pprof)" "1" DEFINES += PPROF SRCS_COMMON += $(R)platform/linux/pprof.c endif +ifeq "$(gperf)" "1" +DEFINES += GPERF +LDFLAGS += -lprofiler -lstdc++ +endif # ARM asm stuff ifeq "$(ARCH)" "arm" diff --git a/platform/common/main.c b/platform/common/main.c index 52676676..e7b04466 100644 --- a/platform/common/main.c +++ b/platform/common/main.c @@ -90,6 +90,10 @@ int main(int argc, char *argv[]) emu_init(); menu_init(); +#ifdef GPERF + ProfilerStart("gperf.out"); +#endif + engineState = PGS_Menu; if (argc > 1) @@ -145,6 +149,9 @@ int main(int argc, char *argv[]) } endloop: +#ifdef GPERF + ProfilerStop(); +#endif emu_finish(); plat_finish(); diff --git a/platform/linux/pprof.c b/platform/linux/pprof.c index e1ecd1fd..6c7c0ff9 100644 --- a/platform/linux/pprof.c +++ b/platform/linux/pprof.c @@ -1,21 +1,46 @@ #include #include #include +#include #include #include #include +#include #include +int rc_mem[pp_total_points]; + struct pp_counters *pp_counters; +int *refcounts = rc_mem; static int shmemid; +static unsigned long devMem; +volatile unsigned long *gp2x_memregl; +volatile unsigned short *gp2x_memregs; + void pprof_init(void) { int this_is_new_shmem = 1; key_t shmemkey; void *shmem; +#if 0 + devMem = open("/dev/mem", O_RDWR); + if (devMem == -1) + { + perror("pprof: open failed"); + return; + } + gp2x_memregl = (unsigned long *)mmap(0, 0x10000, PROT_READ|PROT_WRITE, MAP_SHARED, devMem, 0xc0000000); + if (gp2x_memregl == (unsigned long *)-1) + { + perror("pprof: mmap failed"); + return; + } + gp2x_memregs = (unsigned short *)gp2x_memregl; +#endif + #ifndef PPROF_TOOL unsigned int tmp = pprof_get_one(); printf("pprof: measured diff is %u\n", pprof_get_one() - tmp); @@ -28,11 +53,11 @@ void pprof_init(void) return; } -#ifndef PPROF_TOOL +//#ifndef PPROF_TOOL shmemid = shmget(shmemkey, sizeof(*pp_counters), IPC_CREAT | IPC_EXCL | 0644); if (shmemid == -1) -#endif +//#endif { shmemid = shmget(shmemkey, sizeof(*pp_counters), 0644); @@ -76,15 +101,18 @@ static const struct { IT(draw), IT(sound), IT(m68k), + IT(s68k), + IT(mem68), IT(z80), IT(msh2), IT(ssh2), + IT(memsh), IT(dummy), }; int main(int argc, char *argv[]) { - unsigned long long old[pp_total_points], new[pp_total_points]; + pp_type old[pp_total_points], new[pp_total_points]; int base = 0; int l, i; @@ -107,11 +135,12 @@ int main(int argc, char *argv[]) memcpy(new, pp_counters->counter, sizeof(new)); for (i = 0; i < ARRAY_SIZE(pp_tab); i++) { - unsigned long long idiff = new[i] - old[i]; - unsigned long long bdiff = (new[base] - old[base]) | 1; + pp_type idiff = new[i] - old[i]; + pp_type bdiff = (new[base] - old[base]) | 1; printf("%6.2f ", (double)idiff * 100.0 / bdiff); } printf("\n"); + fflush(stdout); memcpy(old, new, sizeof(old)); if (argc < 3) diff --git a/platform/linux/pprof.h b/platform/linux/pprof.h index cccbcbd5..91fd5b09 100644 --- a/platform/linux/pprof.h +++ b/platform/linux/pprof.h @@ -7,21 +7,22 @@ enum pprof_points { pp_draw, pp_sound, pp_m68k, + pp_s68k, + pp_mem68, pp_z80, pp_msh2, pp_ssh2, + pp_memsh, pp_dummy, pp_total_points }; -struct pp_counters -{ - unsigned long long counter[pp_total_points]; -}; - extern struct pp_counters *pp_counters; +extern int *refcounts; #ifdef __i386__ +typedef unsigned long long pp_type; + static __attribute__((always_inline)) inline unsigned int pprof_get_one(void) { unsigned long long ret; @@ -31,24 +32,38 @@ static __attribute__((always_inline)) inline unsigned int pprof_get_one(void) #define unglitch_timer(x) #elif defined(__GP2X__) +typedef unsigned long pp_type; + +#if 0 // XXX: MMSP2 only, timer sometimes seems to return lower vals? extern volatile unsigned long *gp2x_memregl; #define pprof_get_one() (unsigned int)gp2x_memregl[0x0a00 >> 2] #define unglitch_timer(di) \ if ((signed int)(di) < 0) di = 0 +#else +extern unsigned int (*gp2x_get_ticks_us)(void); +#define pprof_get_one() gp2x_get_ticks_us() +#define unglitch_timer(di) \ + if ((signed int)(di) < 0) di = 0 +#endif #else #error no timer #endif +struct pp_counters +{ + pp_type counter[pp_total_points]; +}; + #define pprof_start(point) { \ - unsigned int pp_start_##point = pprof_get_one() + unsigned int pp_start_##point = pprof_get_one(); refcounts[pp_##point]++ #define pprof_end(point) \ { \ unsigned int di = pprof_get_one() - pp_start_##point; \ unglitch_timer(di); \ - pp_counters->counter[pp_##point] += di; \ + if (!--refcounts[pp_##point]) pp_counters->counter[pp_##point] += di; \ } \ } @@ -57,7 +72,7 @@ extern volatile unsigned long *gp2x_memregl; { \ unsigned int di = pprof_get_one() - pp_start_##point; \ unglitch_timer(di); \ - pp_counters->counter[pp_##point] -= di; \ + if (--refcounts[pp_##point]) pp_counters->counter[pp_##point] -= di; \ } \ } From ce322c1e3744a4b88cee8b784b75501140749a58 Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 20 Mar 2019 20:34:01 +0100 Subject: [PATCH 0167/1110] bfd-less arm disassembler for gph --- cpu/sh2/mame/sh2dasm.c | 2 +- platform/common/disarm.c | 481 ++++++++++++++++++++++++++++++++++++++ platform/common/disarm.h | 28 +++ platform/gp2x/host_dasm.c | 85 +++++++ 4 files changed, 595 insertions(+), 1 deletion(-) create mode 100644 platform/common/disarm.c create mode 100644 platform/common/disarm.h create mode 100644 platform/gp2x/host_dasm.c diff --git a/cpu/sh2/mame/sh2dasm.c b/cpu/sh2/mame/sh2dasm.c index 3fa25e92..0ecb7f45 100644 --- a/cpu/sh2/mame/sh2dasm.c +++ b/cpu/sh2/mame/sh2dasm.c @@ -465,7 +465,7 @@ static UINT32 op1000(char *buffer, UINT32 pc, UINT16 opcode) sprintf(buffer, "MOV.B @($%02X,%s),R0", (opcode & 15), regname[Rm]); break; case 5: - sprintf(buffer, "MOV.W @($%02X,%s),R0", (opcode & 15), regname[Rm]); + sprintf(buffer, "MOV.W @($%02X,%s),R0", (opcode & 15) * 2, regname[Rm]); break; case 8: sprintf(buffer, "CMP/EQ #$%02X,R0", (opcode & 0xff)); diff --git a/platform/common/disarm.c b/platform/common/disarm.c new file mode 100644 index 00000000..2e7c04e7 --- /dev/null +++ b/platform/common/disarm.c @@ -0,0 +1,481 @@ +/* + * Copyright (c) 2012 Wojtek Kaniewski + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include + +#define IMM_FORMAT "0x%x" +//#define IMM_FORMAT "%d" +#define ADDR_FORMAT "0x%x" + +static inline unsigned int rol(unsigned int value, unsigned int shift) +{ + shift &= 31; + + return (value >> shift) | (value << (32 - shift)); +} + +static inline const char *condition(unsigned int insn) +{ + const char *conditions[16] = { "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", "hi", "ls", "ge", "lt", "gt", "le", "", "nv" }; + return conditions[(insn >> 28) & 0x0f]; +} + +static inline const char *register_name(unsigned int reg) +{ + const char *register_names[16] = { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "sp", "lr", "pc" }; + return register_names[reg & 0x0f]; +} + +static const char *register_list(unsigned int list, char *buf, size_t buf_len) +{ + int i; + + buf[0] = 0; + + for (i = 0; i < 16; i++) + { + if ((list >> i) & 1) + { + snprintf(buf + strlen(buf), buf_len - strlen(buf), "%s%s", (buf[0] == 0) ? "" : ",", register_name(i)); + } + } + + return buf; +} + +static const char *shift(unsigned int insn, char *buf, size_t buf_len) +{ + unsigned int imm = (insn >> 7) & 0x1f; + const char *rn = register_name(insn >> 8); + unsigned int type = (insn >> 4) & 0x07; + + switch (type) + { + case 0: + snprintf(buf, buf_len, (imm != 0) ? ",lsl #%d" : "", imm); + break; + case 1: + snprintf(buf, buf_len, ",lsl %s", rn); + break; + case 2: + snprintf(buf, buf_len, ",lsr #%d", imm ? imm : 32); + break; + case 3: + snprintf(buf, buf_len, ",lsr %s", rn); + break; + case 4: + snprintf(buf, buf_len, ",asr #%d", imm ? imm : 32); + break; + case 5: + snprintf(buf, buf_len, ",asr %s", rn); + break; + case 6: + snprintf(buf, buf_len, (imm != 0) ? ",ror #%d" : ",rrx", imm); + break; + case 7: + snprintf(buf, buf_len, ",ror %s", rn); + break; + } + + return buf; +} + +static const char *immediate(unsigned int imm, int negative, int show_if_zero, char *buf, size_t buf_len) +{ + if (imm || show_if_zero) + { + snprintf(buf, buf_len, ",#%s" IMM_FORMAT, (negative) ? "-" : "", imm); + return buf; + } + + return ""; +} + +static int data_processing(unsigned int pc, unsigned int insn, char *buf, size_t buf_len) +{ + unsigned int oper = (insn >> 21) & 15; + const char *names[16] = { "and", "eor", "sub", "rsb", "add", "adc", "sbc", "rsc", "tst", "teq", "cmp", "cmn", "orr", "mov", "bic", "mvn" }; + const char *name; + const char *s; + unsigned int rd; + unsigned int rn; + int is_move = ((oper == 13) || (oper == 15)); + int is_test = ((oper >= 8) && (oper <= 11)); + char tmp_buf[64]; + + name = names[oper]; + s = ((insn >> 20) & 1) ? "s" : ""; + rn = (insn >> 16) & 15; + rd = (insn >> 12) & 15; + + /* mov r0,r0,r0 is a nop */ + if (insn == 0xe1a00000) + { + snprintf(buf, buf_len, "nop"); + return 1; + } + + /* mrs */ + if ((insn & 0x0fbf0fff) == 0x010f0000) + { + const char *psr = ((insn >> 22) & 1) ? "spsr" : "cpsr"; + const char *rd = register_name(insn >> 12); + + snprintf(buf, buf_len, "mrs%s %s,%s", condition(insn), rd, psr); + + return 1; + } + + /* msr flag only*/ + if ((insn & 0x0db0f000) == 0x0120f000) + { + const char *psr = ((insn >> 22) & 1) ? "spsr" : "cpsr"; + const char *suffix; + + switch ((insn >> 16) & 15) + { + case 9: + suffix = ""; + break; + case 8: + suffix = "_f"; + break; + case 1: + suffix = "_c"; + break; + default: + return 0; + } + + if ((insn >> 25) & 1) + { + unsigned int imm = rol(insn & 0x000000ff, ((insn >> 8) & 15) * 2); + + snprintf(buf, buf_len, "msr%s %s%s,#" IMM_FORMAT, condition(insn), psr, suffix, imm); + } + else + { + const char *rm = register_name(insn >> 0); + + if (((insn >> 4) & 255) != 0) + { + return 0; + } + + snprintf(buf, buf_len, "msr%s %s%s,%s", condition(insn), psr, suffix, rm); + } + + return 1; + } + + if (((insn >> 25) & 1) == 0) + { + unsigned int rm; + + rm = (insn & 15); + + if (is_move) + { + snprintf(buf, buf_len, "%s%s%s %s,%s%s", name, condition(insn), s, register_name(rd), register_name(rm), shift(insn, tmp_buf, sizeof(tmp_buf))); + } + else if (is_test) + { + snprintf(buf, buf_len, "%s%s %s,%s%s", name, condition(insn), register_name(rn), register_name(rm), shift(insn, tmp_buf, sizeof(tmp_buf))); + } + else + { + snprintf(buf, buf_len, "%s%s%s %s,%s,%s%s", name, condition(insn), s, register_name(rd), register_name(rn), register_name(rm), shift(insn, tmp_buf, sizeof(tmp_buf))); + } + } + else + { + unsigned int imm; + + imm = rol(insn & 0x000000ff, ((insn >> 8) & 15) * 2); + + if (is_move) + { + snprintf(buf, buf_len, "%s%s%s %s%s", name, condition(insn), s, register_name(rd), immediate(imm, 0, 1, tmp_buf, sizeof(tmp_buf))); + } + else if (is_test) + { + snprintf(buf, buf_len, "%s%s %s%s", name, condition(insn), register_name(rn), immediate(imm, 0, 1, tmp_buf, sizeof(tmp_buf))); + } + else + { + snprintf(buf, buf_len, "%s%s%s %s,%s%s", name, condition(insn), s, register_name(rd), register_name(rn), immediate(imm, 0, 1, tmp_buf, sizeof(tmp_buf))); + } + } + + return 1; +} + +static int branch(unsigned int pc, unsigned int insn, char *buf, size_t buf_len) +{ + const char *link = ((insn >> 24) & 1) ? "l" : ""; + unsigned int address; + unsigned int offset; + + offset = insn & 0x00ffffff; + + if ((offset & 0x00800000) != 0) + { + offset |= 0xff000000; + } + + address = pc + 8 + (offset << 2); + + snprintf(buf, buf_len, "b%s%s " ADDR_FORMAT, link, condition(insn), address); + + return 1; +} + +static int multiply(unsigned int pc, unsigned int insn, char *buf, size_t buf_len) +{ + const char *rd = register_name(insn >> 16); + const char *rn = register_name(insn >> 12); + const char *rs = register_name(insn >> 8); + const char *rm = register_name(insn >> 0); + const char *s = ((insn >> 20) & 1) ? "s" : ""; + int mla = (insn >> 21) & 1; + + snprintf(buf, buf_len, (mla) ? "mla%s%s %s,%s,%s,%s" : "mul%s%s %s,%s,%s", condition(insn), s, rd, rm, rs, rn); + + return 1; +} + +static int multiply_long(unsigned int pc, unsigned int insn, char *buf, size_t buf_len) +{ + const char *rh = register_name(insn >> 16); + const char *rl = register_name(insn >> 12); + const char *rs = register_name(insn >> 8); + const char *rm = register_name(insn >> 0); + const char *u = ((insn >> 22) & 1) ? "s" : "u"; + const char *s = ((insn >> 20) & 1) ? "s" : ""; + const char *name = ((insn >> 21) & 1) ? "mlal" : "mull"; + + snprintf(buf, buf_len, "%s%s%s%s %s,%s,%s,%s", u, name, condition(insn), s, rl, rh, rm, rs); + + return 1; +} + +static int single_data_swap(unsigned int pc, unsigned int insn, char *buf, size_t buf_len) +{ + const char *rn = register_name(insn >> 16); + const char *rd = register_name(insn >> 12); + const char *rm = register_name(insn >> 0); + const char *b = ((insn >> 22) & 1) ? "b" : ""; + + snprintf(buf, buf_len, "swp%s%s %s,%s,[%s]", condition(insn), b, rd, rm, rn); + + return 1; +} + +static int branch_and_exchange(unsigned int pc, unsigned int insn, char *buf, size_t buf_len) +{ + const char *rn = register_name(insn >> 0); + const char *l = ((insn >> 5) & 1) ? "l" : ""; + + snprintf(buf, buf_len, "b%sx%s %s", l, condition(insn), rn); + + return 1; +} + +static int halfword_data_transfer(unsigned int pc, unsigned int insn, char *buf, size_t buf_len) +{ + const char *rn = register_name(insn >> 16); + const char *rd = register_name(insn >> 12); + const char *name = ((insn >> 20) & 1) ? "ldr" : "str"; + const char *w = ((insn >> 21) & 1) ? "!" : ""; + int sign = (insn >> 23) & 1; + int pre = (insn >> 24) & 1; + const char *suffix = ""; + char tmp_buf[64]; + + switch ((insn >> 5) & 3) + { + case 0: + name = "swp"; + break; + case 1: + suffix = "h"; + break; + case 2: + suffix = "sb"; + break; + case 3: + suffix = "sh"; + break; + } + + if ((insn >> 22) & 1) + { + unsigned int imm = ((insn >> 4) & 0xf0) | (insn & 0x0f); + + snprintf(buf, buf_len, (pre) ? "%s%s%s %s,[%s%s]%s" : "%s%s%s %s,[%s],%s%s", name, condition(insn), suffix, rd, rn, immediate(imm, !sign, 0, tmp_buf, sizeof(tmp_buf)), w); + } + else + { + const char *rm = register_name(insn >> 0); + + snprintf(buf, buf_len, (pre) ? "%s%s%s %s,[%s,%s%s]%s" : "%s%s%s %s,[%s],%s%s%s", name, condition(insn), suffix, rd, rn, sign ? "" : "-", rm, w); + } + + return 1; +} + +static int single_data_transfer(unsigned int pc, unsigned int insn, char *buf, size_t buf_len) +{ + const char *rn = register_name(insn >> 16); + const char *rd = register_name(insn >> 12); + const char *name = ((insn >> 20) & 1) ? "ldr" : "str"; + const char *w = ((insn >> 21) & 1) ? "!" : ""; + const char *b = ((insn >> 22) & 1) ? "b" : ""; + int sign = (insn >> 23) & 1; + int pre = (insn >> 24) & 1; + char tmp_buf[64]; + + if ((insn >> 25) & 1) + { + const char *rm = register_name(insn >> 0); + + snprintf(buf, buf_len, (pre) ? "%s%s%s %s,[%s,%s%s%s]%s" : "%s%s%s %s,[%s],%s%s%s%s", name, condition(insn), b, rd, rn, sign ? "" : "-", rm, shift(insn, tmp_buf, sizeof(tmp_buf)), w); + } + else + { + unsigned int imm = insn & 0x00000fff; + + snprintf(buf, buf_len, (pre) ? "%s%s%s %s,[%s%s]%s" : "%s%s%s %s,[%s]%s%s", name, condition(insn), b, rd, rn, immediate(imm, !sign, 0, tmp_buf, sizeof(tmp_buf)), w); + } + + return 1; +} + +static int block_data_transfer(unsigned int pc, unsigned int insn, char *buf, size_t buf_len) +{ + const char *s = ((insn >> 22) & 1) ? "^" : ""; + const char *w = ((insn >> 21) & 1) ? "!" : ""; + int load = (insn >> 20) & 1; + const char *name = (load) ? "ldm" : "stm"; + const char *ldm_stubs[4] = { "fa", "fd", "ea", "ed" }; + const char *stm_stubs[4] = { "ed", "ea", "fd", "fa" }; + int stub_idx = (insn >> 23) & 3; + const char *stub = (load) ? ldm_stubs[stub_idx] : stm_stubs[stub_idx]; + char tmp_buf[64]; + + snprintf(buf, buf_len, "%s%s%s %s%s, {%s}%s", name, condition(insn), stub, register_name(insn >> 16), w, register_list(insn & 0xffff, tmp_buf, sizeof(tmp_buf)), s); + + return 1; +} + +static int coprocessor_data_transfer(unsigned int pc, unsigned int insn, char *buf, size_t buf_len) +{ + const char *name = ((insn >> 20) & 1) ? "ldc" : "stc"; + const char *rn = register_name(insn >> 16); + int sign = (insn >> 23) & 1; + const char *l = ((insn >> 22) & 1) ? "l" : ""; + const char *w = ((insn >> 21) & 1) ? "!" : ""; + int pre = (insn >> 24) & 1; + unsigned int cp = (insn >> 8) & 15; + unsigned int cd = (insn >> 12) & 15; + unsigned int imm = (insn >> 0) & 255; + char tmp_buf[64]; + + snprintf(buf, buf_len, (pre) ? "%s%s%s p%d,cr%d,[%s%s]%s" : "%s%s%s p%d,cr%d,[%s]%s%s", name, condition(insn), l, cp, cd, rn, immediate(imm, !sign, 0, tmp_buf, sizeof(tmp_buf)), w); + + return 1; +} + +static int coprocessor_data_operation(unsigned int pc, unsigned int insn, char *buf, size_t buf_len) +{ + snprintf(buf, buf_len, "cdp%s p%d,%d,cr%d,cr%d,cr%d,{%d}", condition(insn), (insn >> 8) & 15, (insn >> 20) & 15, (insn >> 12) & 15, (insn >> 16) & 15, (insn >> 0) & 15, (insn >> 5) & 7); + + return 1; +} + +static int coprocessor_register_transfer(unsigned int pc, unsigned int insn, char *buf, size_t buf_len) +{ + const char *name = ((insn >> 20) & 1) ? "mrc" : "mcr"; + unsigned int cn = (insn >> 16) & 15; + const char *rd = register_name(insn >> 12); + unsigned int expr1 = (insn >> 21) & 7; + unsigned int expr2 = (insn >> 5) & 7; + unsigned int cp = (insn >> 8) & 15; + unsigned int cm = (insn >> 0) & 15; + + snprintf(buf, buf_len, "%s%s p%d,%d,%s,cr%d,cr%d,{%d}", name, condition(insn), cp, expr1, rd, cn, cm, expr2); + + return 1; +} + +static int software_interrupt(unsigned int pc, unsigned int insn, char *buf, size_t buf_len) +{ + snprintf(buf, buf_len, "swi%s %u", condition(insn), insn & 0x00ffffff); + + return 1; +} + +int disarm(unsigned int pc, unsigned int insn, char *buf, size_t buf_len) +{ + if ((insn & 0x0fffffd0) == 0x012fff10) + return branch_and_exchange(pc, insn, buf, buf_len); + + if ((insn & 0x0fb00ff0) == 0x01000090) + return single_data_swap(pc, insn, buf, buf_len); + + if ((insn & 0x0fc000f0) == 0x00000090) + return multiply(pc, insn, buf, buf_len); + + if ((insn & 0x0f8000f0) == 0x00800090) + return multiply_long(pc, insn, buf, buf_len); + + if ((insn & 0x0f000010) == 0x0e000000) + return coprocessor_data_operation(pc, insn, buf, buf_len); + + if ((insn & 0x0f000010) == 0x0e000010) + return coprocessor_register_transfer(pc, insn, buf, buf_len); + + if ((insn & 0x0f000000) == 0x0f000000) + return software_interrupt(pc, insn, buf, buf_len); + + if ((insn & 0x0e000090) == 0x00000090) + return halfword_data_transfer(pc, insn, buf, buf_len); + + if ((insn & 0x0e000000) == 0x08000000) + return block_data_transfer(pc, insn, buf, buf_len); + + if ((insn & 0x0e000000) == 0x0a000000) + return branch(pc, insn, buf, buf_len); + + if ((insn & 0x0e000000) == 0x0c000000) + return coprocessor_data_transfer(pc, insn, buf, buf_len); + + if ((insn & 0x0c000000) == 0x00000000) + return data_processing(pc, insn, buf, buf_len); + + if ((insn & 0x0c000000) == 0x04000000) + return single_data_transfer(pc, insn, buf, buf_len); + + return 0; +} + diff --git a/platform/common/disarm.h b/platform/common/disarm.h new file mode 100644 index 00000000..2ea4ccc3 --- /dev/null +++ b/platform/common/disarm.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2012 Wojtek Kaniewski + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef DISARM_H +#define DISARM_H + +int disarm(unsigned int pc, unsigned int insn, char *buf, unsigned int buf_len); + +#endif /* DISARM_H */ diff --git a/platform/gp2x/host_dasm.c b/platform/gp2x/host_dasm.c new file mode 100644 index 00000000..5e1fc218 --- /dev/null +++ b/platform/gp2x/host_dasm.c @@ -0,0 +1,85 @@ +#include +#include +#include + +#include "../common/disarm.c" + + +/* symbols */ +typedef struct { const char *name; void *value; } asymbol; + +static asymbol **symbols; +static long symcount, symstorage = 8; + +static const char *lookup_name(void *addr) +{ + asymbol **sptr = symbols; + int i; + + for (i = 0; i < symcount; i++) { + asymbol *sym = *sptr++; + + if (addr == sym->value) + return sym->name; + } + + return NULL; +} + +void host_dasm(void *addr, int len) +{ + void *end = (char *)addr + len; + const char *name; + char buf[64]; + long insn, symaddr; + + while (addr < end) { + name = lookup_name(addr); + if (name != NULL) + printf("%s:\n", name); + + insn = *(long *)addr; + printf(" %08lx %08lx ", (long)addr, insn); + if(disarm((unsigned)addr, insn, buf, sizeof(buf))) { + symaddr = 0; + if ((insn & 0xe000000) == 0xa000000) { + symaddr = (long)addr + 8 + ((long)(insn << 8) >> 6); + name = lookup_name((void *)symaddr); + } + if (symaddr && name) + printf("%s <%s>\n", buf, name); + else if (symaddr && !name) + printf("%s \n", buf); + else + printf("%s\n", buf); + } else + printf("unknown\n"); + addr = (char *)addr + sizeof(long); + } +} + +void host_dasm_new_symbol_(void *addr, const char *name) +{ + asymbol *sym, **tmp; + + if (symbols == NULL) + symbols = malloc(symstorage); + if (symstorage <= symcount * sizeof(symbols[0])) { + tmp = realloc(symbols, symstorage * 2); + if (tmp == NULL) + return; + symstorage *= 2; + symbols = tmp; + } + + symbols[symcount] = calloc(sizeof(*symbols[0]), 1); + if (symbols[symcount] == NULL) + return; + + // a HACK (should use correct section), but ohwell + sym = symbols[symcount]; + sym->value = addr; + sym->name = name; + symcount++; +} + From 5a5d765c23f79ae483f03ca88c47302c07db1693 Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 20 Mar 2019 20:34:19 +0100 Subject: [PATCH 0168/1110] bugfix for 32x --- pico/32x/sh2soc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/pico/32x/sh2soc.c b/pico/32x/sh2soc.c index 62423d13..b5300119 100644 --- a/pico/32x/sh2soc.c +++ b/pico/32x/sh2soc.c @@ -87,6 +87,7 @@ static void dmac_transfer_one(SH2 *sh2, struct dma_chan *chan) case 0: d = p32x_sh2_read8(chan->sar, sh2); p32x_sh2_write8(chan->dar, d, sh2); + break; case 1: d = p32x_sh2_read16(chan->sar, sh2); p32x_sh2_write16(chan->dar, d, sh2); From f5939109c409ec906c2c005656e3c0281fa11741 Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 20 Mar 2019 21:12:10 +0100 Subject: [PATCH 0169/1110] sh2 drc host disassembler integration for gp2x --- Makefile | 2 -- config.x86 | 3 +-- cpu/sh2/compiler.c | 2 +- pico/sound/mix.c | 16 +++++++++++++--- platform/common/common.mak | 12 ++++++++++-- .../{gp2x/host_dasm.c => common/host_dasm_arm.c} | 2 +- 6 files changed, 26 insertions(+), 11 deletions(-) rename platform/{gp2x/host_dasm.c => common/host_dasm_arm.c} (98%) diff --git a/Makefile b/Makefile index 4d3288d9..ddbd71a1 100644 --- a/Makefile +++ b/Makefile @@ -122,8 +122,6 @@ OBJS += platform/gp2x/emu.o OBJS += platform/gp2x/vid_mmsp2.o OBJS += platform/gp2x/vid_pollux.o OBJS += platform/gp2x/warm.o -OBJS += platform/gp2x/host_dasm.o -OBJS += cpu/sh2/mame/sh2dasm.o USE_FRONTEND = 1 PLATFORM_MP3 = 1 PLATFORM_ZLIB = 1 diff --git a/config.x86 b/config.x86 index 24f9d209..d463157e 100644 --- a/config.x86 +++ b/config.x86 @@ -4,10 +4,9 @@ CC = gcc CXX = g++ AS = as STRIP = strip -CFLAGS += -I/usr/include/SDL -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -m32 #-DGPERF -pg +CFLAGS += -I/usr/include/SDL -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -m32 # -pg ASFLAGS += LDFLAGS += -m32 #-pg -LDLIBS += -L$(HOME)/opt/binutils-i386/usr/lib/ -lbfd-2.24-multiarch -lopcodes-2.24-multiarch LDLIBS += -L/usr/lib/i386-linux-gnu/debug -L/home/build/opt/lib32 -lSDL-1.2 -lasound -lpng -lz -lm -ldl ARCH = x86 diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index aa41a84d..c6522f37 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -2744,7 +2744,7 @@ end_op: if (drcf.pending_branch_direct) { struct op_data *opd_b = - (op_flags[i] & OF_DELAY_OP) ? &ops[i-1] : opd; + (op_flags[i] & OF_DELAY_OP) ? opd-1 : opd; u32 target_pc = opd_b->imm; int cond = -1, ncond = -1; void *target = NULL; diff --git a/pico/sound/mix.c b/pico/sound/mix.c index 636edb55..202ba355 100644 --- a/pico/sound/mix.c +++ b/pico/sound/mix.c @@ -15,16 +15,17 @@ else if ( val < min ) val = min; \ } +int mix_32_to_16l_level; -void mix_32_to_16l_stereo(short *dest, int *src, int count) +void mix_32_to_16l_stereo_core(short *dest, int *src, int count, int level) { int l, r; for (; count > 0; count--) { l = r = *dest; - l += *src++; - r += *src++; + l += *src++ >> level; + r += *src++ >> level; Limit( l, MAXOUT, MINOUT ); Limit( r, MAXOUT, MINOUT ); *dest++ = l; @@ -32,6 +33,15 @@ void mix_32_to_16l_stereo(short *dest, int *src, int count) } } +void mix_32_to_16l_stereo_lvl(short *dest, int *src, int count) +{ + mix_32_to_16l_stereo_core(dest, src, count, mix_32_to_16l_level); +} + +void mix_32_to_16l_stereo(short *dest, int *src, int count) +{ + mix_32_to_16l_stereo_core(dest, src, count, 0); +} void mix_32_to_16_mono(short *dest, int *src, int count) { diff --git a/platform/common/common.mak b/platform/common/common.mak index 29945d48..197e8677 100644 --- a/platform/common/common.mak +++ b/platform/common/common.mak @@ -161,8 +161,16 @@ SRCS_COMMON += $(R)cpu/sh2/compiler.c ifdef drc_debug DEFINES += DRC_DEBUG=$(drc_debug) SRCS_COMMON += $(R)cpu/sh2/mame/sh2dasm.c -SRCS_COMMON += $(R)platform/libpicofe/linux/host_dasm.c -LDFLAGS += -lbfd -lopcodes -liberty +DASM = $(R)platform/libpicofe/linux/host_dasm.c +DASMLIBS = -lbfd -lopcodes -liberty +ifeq "$(ARCH)" "arm" +ifeq ($(filter_out $(shell $(CC) --print-file-name=libbfd.so),"/"),) +DASM = $(R)platform/common/host_dasm_arm.c +DASMLIBS = +endif +endif +SRCS_COMMON += $(DASM) +LDFLAGS += $(DASMLIBS) endif endif # use_sh2drc SRCS_COMMON += $(R)cpu/sh2/mame/sh2pico.c diff --git a/platform/gp2x/host_dasm.c b/platform/common/host_dasm_arm.c similarity index 98% rename from platform/gp2x/host_dasm.c rename to platform/common/host_dasm_arm.c index 5e1fc218..7951b7d9 100644 --- a/platform/gp2x/host_dasm.c +++ b/platform/common/host_dasm_arm.c @@ -2,7 +2,7 @@ #include #include -#include "../common/disarm.c" +#include "disarm.c" /* symbols */ From 2fa02d5a63e4b6dea2d6ed809507480576f6bba0 Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 20 Mar 2019 23:39:45 +0100 Subject: [PATCH 0170/1110] improved sh2 clock handling, bug fixing + small improvement to drc emitters --- cpu/drc/emit_arm.c | 73 +++++++++++++++++++++++++--------------------- cpu/drc/emit_x86.c | 21 +++++++------ cpu/sh2/sh2.c | 2 +- cpu/sh2/sh2.h | 4 +-- pico/32x/32x.c | 29 +++++++++++------- pico/32x/memory.c | 2 +- pico/cd/mcd.c | 2 ++ pico/pico_int.h | 6 ++-- 8 files changed, 77 insertions(+), 62 deletions(-) diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index 91b47402..89582e8d 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -86,7 +86,7 @@ #define A_OP_TST 0x8 #define A_OP_TEQ 0x9 #define A_OP_CMP 0xa -#define A_OP_CMN 0xa +#define A_OP_CMN 0xb #define A_OP_ORR 0xc #define A_OP_MOV 0xd #define A_OP_BIC 0xe @@ -250,7 +250,16 @@ #define EOP_MOVT(rd,imm) \ EMIT(0xe3400000 | ((rd)<<12) | (((imm)>>16)&0xfff) | (((imm)>>12)&0xf0000)) -// XXX: AND, RSB, *C, will break if 1 insn is not enough +static int count_bits(unsigned val) +{ + val = (val & 0x55555555) + ((val >> 1) & 0x55555555); + val = (val & 0x33333333) + ((val >> 2) & 0x33333333); + val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); + val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff); + return (val & 0xffff) + (val >> 16); +} + +// XXX: RSB, *S will break if 1 insn is not enough static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int imm) { int ror2; @@ -259,23 +268,11 @@ static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int switch (op) { case A_OP_MOV: rn = 0; - if (~imm < 0x10000) { + // count bits in imm and use MVN if more bits 1 than 0 + if (count_bits(imm) > 16) { imm = ~imm; op = A_OP_MVN; } -#ifdef HAVE_ARMV7 - for (v = imm, ror2 = 0; v && !(v & 3); v >>= 2) - ror2--; - if (v >> 8) { - /* 2+ insns needed - prefer movw/movt */ - if (op == A_OP_MVN) - imm = ~imm; - EOP_MOVW(rd, imm); - if (imm & 0xffff0000) - EOP_MOVT(rd, imm); - return; - } -#endif break; case A_OP_EOR: @@ -283,27 +280,37 @@ static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int case A_OP_ADD: case A_OP_ORR: case A_OP_BIC: - if (s == 0 && imm == 0) + if (s == 0 && imm == 0 && rd == rn) return; break; } - for (v = imm, ror2 = 0; ; ror2 -= 8/2) { - /* shift down to get 'best' rot2 */ - for (; v && !(v & 3); v >>= 2) - ror2--; - - EOP_C_DOP_IMM(cond, op, s, rn, rd, ror2 & 0x0f, v & 0xff); - - v >>= 8; - if (v == 0) - break; - if (op == A_OP_MOV) - op = A_OP_ORR; - if (op == A_OP_MVN) + again: + v = imm, ror2 = 32/2; // arm imm shift is ROR, so rotate for best fit + while ((v >> 24) && !(v & 0xc0)) + v = (v << 2) | (v >> 30), ror2++; + do { + // shift down to get 'best' rot2 + while (v > 0xff && !(v & 3)) + v >>= 2, ror2--; + // AND must fit into 1 insn. if not, use BIC + if (op == A_OP_AND && v != (v & 0xff)) { + imm = ~imm; op = A_OP_BIC; + goto again; + } + EOP_C_DOP_IMM(cond, op, s, rn, rd, ror2 & 0xf, v & 0xff); + + switch (op) { + case A_OP_MOV: op = A_OP_ORR; break; + case A_OP_MVN: op = A_OP_BIC; break; + case A_OP_ADC: op = A_OP_ADD; break; + case A_OP_SBC: op = A_OP_SUB; break; + } rn = rd; - } + + v >>= 8, ror2 -= 8/2; + } while (v); } #define emith_op_imm(cond, s, op, r, imm) \ @@ -491,7 +498,7 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_cmp_r_imm(r, imm) { \ u32 op = A_OP_CMP, imm_ = imm; \ if (~imm_ < 0x100) { \ - imm_ = ~imm_; \ + imm_ = -imm_; \ op = A_OP_CMN; \ } \ emith_top_imm(A_COND_AL, op, r, imm); \ @@ -652,12 +659,10 @@ static int emith_xbranch(int cond, void *target, int is_call) if ((count) <= 8) { \ t = (count) - 8; \ t = (0xff << t) & 0xff; \ - EOP_BIC_IMM(d,s,8/2,t); \ EOP_C_DOP_IMM(cond,A_OP_BIC,0,s,d,8/2,t); \ } else if ((count) >= 24) { \ t = (count) - 24; \ t = 0xff >> t; \ - EOP_AND_IMM(d,s,0,t); \ EOP_C_DOP_IMM(cond,A_OP_AND,0,s,d,0,t); \ } else { \ EOP_MOV_REG(cond,0,d,s,A_AM1_LSL,count); \ diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index 865aab4b..e5f2adef 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -421,13 +421,10 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; rmr = s2; \ } \ EMIT_OP_MODRM(0xf7, 3, op, rmr); /* xMUL rmr */ \ - /* XXX: using push/pop for the case of edx->eax; eax->edx */ \ - if (dhi != xDX && dhi != -1) \ - emith_push(xDX); \ if (dlo != xAX) \ - emith_move_r_r(dlo, xAX); \ - if (dhi != xDX && dhi != -1) \ - emith_pop(dhi); \ + EMIT_OP(0x90 + (dlo)); /* XCHG eax, dlo */ \ + if (dhi != xDX && dhi != -1 && !(dhi == xAX && dlo == xDX)) \ + emith_move_r_r(dhi, (dlo == xDX ? xAX : xDX)); \ if (dlo != xDX && dhi != xDX) \ emith_pop(xDX); \ if (dlo != xAX && dhi != xAX) \ @@ -474,12 +471,12 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_deref_op(op, r, rs, offs) do { \ /* mov r <-> [ebp+#offs] */ \ - if ((offs) >= 0x80) { \ + if (abs(offs) >= 0x80) { \ EMIT_OP_MODRM64(op, 2, r, rs); \ EMIT(offs, u32); \ } else { \ EMIT_OP_MODRM64(op, 1, r, rs); \ - EMIT(offs, u8); \ + EMIT((u8)offs, u8); \ } \ } while (0) @@ -496,7 +493,8 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; int r_ = r; \ if (!is_abcdx(r)) \ r_ = rcache_get_tmp(); \ - emith_deref_op(0x8a, r_, rs, offs); \ + EMIT(0x0f, u8); \ + emith_deref_op(0xb6, r_, rs, offs); \ if ((r) != r_) { \ emith_move_r_r(r, r_); \ rcache_free_tmp(r_); \ @@ -515,8 +513,8 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; } while (0) #define emith_read16_r_r_offs(r, rs, offs) do { \ - EMIT(0x66, u8); /* operand override */ \ - emith_read_r_r_offs(r, rs, offs); \ + EMIT(0x0f, u8); \ + emith_deref_op(0xb7, r, rs, offs); \ } while (0) #define emith_write16_r_r_offs(r, rs, offs) do { \ @@ -688,6 +686,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; case 0: rd = xDI; break; \ case 1: rd = xSI; break; \ case 2: rd = xDX; break; \ + case 2: rd = xBX; break; \ } #define emith_sh2_drc_entry() { \ diff --git a/cpu/sh2/sh2.c b/cpu/sh2/sh2.c index 403c4c70..ba260718 100644 --- a/cpu/sh2/sh2.c +++ b/cpu/sh2/sh2.c @@ -84,7 +84,7 @@ int sh2_irl_irq(SH2 *sh2, int level, int nested_call) // do this to avoid missing irqs that other SH2 might clear int vector = sh2->irq_callback(sh2, level); sh2_do_irq(sh2, level, vector); - sh2->m68krcycles_done += C_SH2_TO_M68K(*sh2, 13); + sh2->m68krcycles_done += C_SH2_TO_M68K(sh2, 13); } else sh2->test_irq = 1; diff --git a/cpu/sh2/sh2.h b/cpu/sh2/sh2.h index 407270f1..69abf8cd 100644 --- a/cpu/sh2/sh2.h +++ b/cpu/sh2/sh2.h @@ -72,9 +72,9 @@ typedef struct SH2_ #define CYCLE_MULT_SHIFT 10 #define C_M68K_TO_SH2(xsh2, c) \ - ((int)((c) * (xsh2).mult_m68k_to_sh2) >> CYCLE_MULT_SHIFT) + ((int)((long long)(c) * (xsh2)->mult_m68k_to_sh2) >> CYCLE_MULT_SHIFT) #define C_SH2_TO_M68K(xsh2, c) \ - ((int)((c + 3) * (xsh2).mult_sh2_to_m68k) >> CYCLE_MULT_SHIFT) + ((int)((long long)(c+3) * (xsh2)->mult_sh2_to_m68k) >> CYCLE_MULT_SHIFT) int sh2_init(SH2 *sh2, int is_slave, SH2 *other_sh2); void sh2_finish(SH2 *sh2); diff --git a/pico/32x/32x.c b/pico/32x/32x.c index 9bfbefac..3ee8c2ea 100644 --- a/pico/32x/32x.c +++ b/pico/32x/32x.c @@ -254,8 +254,8 @@ static void p32x_start_blank(void) } p32x_trigger_irq(NULL, SekCyclesDone(), P32XI_VINT); - p32x_sh2_poll_event(&msh2, SH2_STATE_VPOLL, 0); - p32x_sh2_poll_event(&ssh2, SH2_STATE_VPOLL, 0); + p32x_sh2_poll_event(&msh2, SH2_STATE_VPOLL, SekCyclesDone()); + p32x_sh2_poll_event(&ssh2, SH2_STATE_VPOLL, SekCyclesDone()); } void p32x_schedule_hint(SH2 *sh2, int m68k_cycles) @@ -323,8 +323,12 @@ void p32x_event_schedule_sh2(SH2 *sh2, enum p32x_event event, int after) p32x_event_schedule(now, event, after); - left_to_next = (event_time_next - now) * 3; - sh2_end_run(sh2, left_to_next); + left_to_next = C_M68K_TO_SH2(sh2, (int)(event_time_next - now)); + if (sh2_cycles_left(sh2) > left_to_next) { + if (left_to_next < 1) + left_to_next = 1; + sh2_end_run(sh2, left_to_next); + } } static void p32x_run_events(unsigned int until) @@ -372,13 +376,13 @@ static void run_sh2(SH2 *sh2, int m68k_cycles) pevt_log_sh2_o(sh2, EVT_RUN_START); sh2->state |= SH2_STATE_RUN; - cycles = C_M68K_TO_SH2(*sh2, m68k_cycles); + cycles = C_M68K_TO_SH2(sh2, m68k_cycles); elprintf_sh2(sh2, EL_32X, "+run %u %d @%08x", sh2->m68krcycles_done, cycles, sh2->pc); done = sh2_execute(sh2, cycles, PicoIn.opt & POPT_EN_DRC); - sh2->m68krcycles_done += C_SH2_TO_M68K(*sh2, done); + sh2->m68krcycles_done += C_SH2_TO_M68K(sh2, done); sh2->state &= ~SH2_STATE_RUN; pevt_log_sh2_o(sh2, EVT_RUN_END); elprintf_sh2(sh2, EL_32X, "-run %u %d", @@ -412,8 +416,7 @@ void p32x_sync_other_sh2(SH2 *sh2, unsigned int m68k_target) // there might be new event to schedule current sh2 to if (event_time_next) { - left_to_event = event_time_next - m68k_target; - left_to_event *= 3; + left_to_event = C_M68K_TO_SH2(sh2, (int)(event_time_next - m68k_target)); if (sh2_cycles_left(sh2) > left_to_event) { if (left_to_event < 1) left_to_event = 1; @@ -446,6 +449,7 @@ void sync_sh2s_normal(unsigned int m68k_target) now = ssh2.m68krcycles_done; timer_cycles = now; + pprof_start(m68k); while (CYCLES_GT(m68k_target, now)) { if (event_time_next && CYCLES_GE(now, event_time_next)) @@ -463,6 +467,7 @@ void sync_sh2s_normal(unsigned int m68k_target) target - msh2.m68krcycles_done, target - ssh2.m68krcycles_done, m68k_target - now, Pico32x.emu_flags); + pprof_start(ssh2); if (!(ssh2.state & SH2_IDLE_STATES)) { cycles = target - ssh2.m68krcycles_done; if (cycles > 0) { @@ -472,7 +477,9 @@ void sync_sh2s_normal(unsigned int m68k_target) target = event_time_next; } } + pprof_end(ssh2); + pprof_start(msh2); if (!(msh2.state & SH2_IDLE_STATES)) { cycles = target - msh2.m68krcycles_done; if (cycles > 0) { @@ -482,6 +489,7 @@ void sync_sh2s_normal(unsigned int m68k_target) target = event_time_next; } } + pprof_end(msh2); now = target; if (!(msh2.state & SH2_IDLE_STATES)) { @@ -497,6 +505,7 @@ void sync_sh2s_normal(unsigned int m68k_target) p32x_timers_do(now - timer_cycles); timer_cycles = now; } + pprof_end_sub(m68k); // advance idle CPUs if (msh2.state & SH2_IDLE_STATES) { @@ -553,8 +562,8 @@ void PicoFrame32x(void) if (!(Pico32x.sh2_regs[0] & 0x80)) p32x_schedule_hint(NULL, SekCyclesDone()); - p32x_sh2_poll_event(&msh2, SH2_STATE_VPOLL, 0); - p32x_sh2_poll_event(&ssh2, SH2_STATE_VPOLL, 0); + p32x_sh2_poll_event(&msh2, SH2_STATE_VPOLL, SekCyclesDone()); + p32x_sh2_poll_event(&ssh2, SH2_STATE_VPOLL, SekCyclesDone()); if (PicoIn.AHW & PAHW_MCD) pcd_prepare_frame(); diff --git a/pico/32x/memory.c b/pico/32x/memory.c index eff0ab07..d815853d 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -146,7 +146,7 @@ static void sh2s_sync_on_read(SH2 *sh2) cycles = sh2_cycles_done(sh2); if (cycles > 600) - p32x_sync_other_sh2(sh2, sh2->m68krcycles_done + cycles / 3); + p32x_sync_other_sh2(sh2, sh2->m68krcycles_done + C_SH2_TO_M68K(sh2, cycles)); } // SH2 faking diff --git a/pico/cd/mcd.c b/pico/cd/mcd.c index 5e3629a3..8a2f230d 100644 --- a/pico/cd/mcd.c +++ b/pico/cd/mcd.c @@ -125,6 +125,7 @@ static void SekRunS68k(unsigned int to) if (SekShouldInterrupt()) Pico_mcd->m.s68k_poll_a = 0; + pprof_start(s68k); SekCycleCntS68k += cyc_do; #if defined(EMU_C68K) PicoCpuCS68k.cycles = cyc_do; @@ -137,6 +138,7 @@ static void SekRunS68k(unsigned int to) #elif defined(EMU_F68K) SekCycleCntS68k += fm68k_emulate(&PicoCpuFS68k, cyc_do, 0) - cyc_do; #endif + pprof_end(s68k); } static void pcd_set_cycle_mult(void) diff --git a/pico/pico_int.h b/pico/pico_int.h index 7225cab8..cca7f954 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -241,11 +241,11 @@ extern SH2 sh2s[2]; # define sh2_pc(sh2) (sh2)->pc #endif -#define sh2_cycles_done(sh2) ((int)(sh2)->cycles_timeslice - sh2_cycles_left(sh2)) +#define sh2_cycles_done(sh2) ((unsigned)(sh2)->cycles_timeslice - sh2_cycles_left(sh2)) #define sh2_cycles_done_t(sh2) \ - ((sh2)->m68krcycles_done * 3 + sh2_cycles_done(sh2)) + (unsigned)(C_M68K_TO_SH2(sh2, (sh2)->m68krcycles_done) + sh2_cycles_done(sh2)) #define sh2_cycles_done_m68k(sh2) \ - ((sh2)->m68krcycles_done + (sh2_cycles_done(sh2) / 3)) + (unsigned)((sh2)->m68krcycles_done + C_SH2_TO_M68K(sh2, sh2_cycles_done(sh2))) #define sh2_reg(c, x) (c) ? ssh2.r[x] : msh2.r[x] #define sh2_gbr(c) (c) ? ssh2.gbr : msh2.gbr From 122afd9d37fa6b0ce7b9dfc369c8a907c590f34d Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 22 Mar 2019 20:17:08 +0100 Subject: [PATCH 0171/1110] substituted tool to obtain target structure offsets (for asm) --- Makefile | 9 ++--- pico/pico_int_o32.h | 28 ---------------- tools/Makefile | 7 ++-- tools/mkoffsets.sh | 82 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 89 insertions(+), 37 deletions(-) delete mode 100644 pico/pico_int_o32.h create mode 100755 tools/mkoffsets.sh diff --git a/Makefile b/Makefile index ddbd71a1..a46c2a43 100644 --- a/Makefile +++ b/Makefile @@ -47,6 +47,7 @@ asm_ym2612 ?= 1 asm_misc ?= 1 asm_cdmemory ?= 1 asm_mix ?= 1 +asm_32xdraw ?= 0 # currently defunct else # if not arm use_fame ?= 1 use_cz80 ?= 1 @@ -194,10 +195,10 @@ LDFLAGS += -Wl,-Map=$(TARGET).map endif -target_: $(TARGET) +target_: pico/pico_int_o32.h $(TARGET) clean: - $(RM) $(TARGET) $(OBJS) + $(RM) $(TARGET) $(OBJS) pico/pico_int_o32.h $(RM) -r .opk_data $(TARGET): $(OBJS) @@ -210,8 +211,8 @@ endif pprof: platform/linux/pprof.c $(CC) $(CFLAGS) -O2 -ggdb -DPPROF -DPPROF_TOOL -I../../ -I. $^ -o $@ $(LDFLAGS) $(LDLIBS) -tools/textfilter: tools/textfilter.c - make -C tools/ textfilter +pico/pico_int_o32.h:: tools/mkoffsets.sh + make -C tools/ XCC="$(CC)" XCFLAGS="$(CFLAGS)" .s.o: $(CC) $(CFLAGS) -c $< -o $@ diff --git a/pico/pico_int_o32.h b/pico/pico_int_o32.h deleted file mode 100644 index 25c64f43..00000000 --- a/pico/pico_int_o32.h +++ /dev/null @@ -1,28 +0,0 @@ -/* autogenerated by tools/mkoffsets, do not edit */ -#define OFS_Pico_video_reg 0x0000 -#define OFS_Pico_m_rotate 0x0040 -#define OFS_Pico_m_z80Run 0x0041 -#define OFS_Pico_m_dirtyPal 0x0046 -#define OFS_Pico_m_hardware 0x0047 -#define OFS_Pico_m_z80_reset 0x004f -#define OFS_Pico_m_sram_reg 0x0049 -#define OFS_Pico_sv 0x008c -#define OFS_Pico_sv_data 0x008c -#define OFS_Pico_sv_start 0x0090 -#define OFS_Pico_sv_end 0x0094 -#define OFS_Pico_sv_flags 0x0098 -#define OFS_Pico_rom 0x033c -#define OFS_Pico_romsize 0x0340 -#define OFS_EST_DrawScanline 0x00 -#define OFS_EST_rendstatus 0x04 -#define OFS_EST_DrawLineDest 0x08 -#define OFS_EST_HighCol 0x0c -#define OFS_EST_HighPreSpr 0x10 -#define OFS_EST_Pico 0x14 -#define OFS_EST_PicoMem_vram 0x18 -#define OFS_EST_PicoMem_cram 0x1c -#define OFS_EST_PicoOpt 0x20 -#define OFS_EST_Draw2FB 0x24 -#define OFS_EST_HighPal 0x28 -#define OFS_PMEM_vram 0x10000 -#define OFS_PMEM_vsram 0x22100 diff --git a/tools/Makefile b/tools/Makefile index 28b748d4..752cd6b2 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -1,13 +1,10 @@ -CFLAGS = -Wall -ggdb - -TARGETS = amalgamate textfilter mkoffsets +TARGETS = amalgamate textfilter OBJS = $(addsuffix .o,$(TARGETS)) all: $(TARGETS) + CC="$(XCC)" CFLAGS="$(XCFLAGS)" ./mkoffsets.sh ../pico clean: $(RM) $(TARGETS) $(OBJS) -mkoffsets: CFLAGS += -m32 -I.. - .PHONY: clean all diff --git a/tools/mkoffsets.sh b/tools/mkoffsets.sh new file mode 100755 index 00000000..60088f21 --- /dev/null +++ b/tools/mkoffsets.sh @@ -0,0 +1,82 @@ +# usage: mkoffsets +# automatically compute structure offsets for gcc targets in ELF format + +CC=${CC:-gcc} + +# endianess of target (automagically determined below) +ENDIAN= + +compile_rodata () +{ + $CC $CFLAGS -I .. -c /tmp/getoffs.c -o /tmp/getoffs.o || exit 1 + rosect=$(readelf -S /tmp/getoffs.o | grep '\.rodata' | + sed 's/^[^.]*././;s/ .*//') + objcopy --dump-section $rosect=/tmp/getoffs.ro /tmp/getoffs.o || exit 1 + ro=$(xxd -ps /tmp/getoffs.ro) + if [ "$ENDIAN" = "le" ]; then + # swap needed for le target + hex="" + for b in $(echo $ro | sed 's/\([0-9a-f]\{2\}\)/\1 /g'); do + hex=$b$hex; + done + else + hex=$ro + fi + rodata=$(printf "%d" 0x$hex) +} + +get_define () # prefix struct member member... +{ + prefix=$1; shift + struct=$1; shift + field=$(echo $* | sed 's/ /./g') + name=$(echo $* | sed 's/ /_/g') + echo '#include "pico/pico_int.h"' > /tmp/getoffs.c + echo "static const struct $struct p;" >> /tmp/getoffs.c + echo "const int offs = (char *)&p.$field - (char*)&p;" >>/tmp/getoffs.c + compile_rodata + line=$(printf "#define %-20s 0x%04x" $prefix$name $rodata) +} + +# determine endianess +echo "const int one = 1;" >/tmp/getoffs.c +compile_rodata +ENDIAN=$(if [ "$rodata" -eq 1 ]; then echo be; else echo le; fi) +# determine output file +echo "const int vsz = sizeof(void *);" >/tmp/getoffs.c +compile_rodata +fn="${1:-.}/pico_int_o$((8*$rodata)).h" +# output header +echo "/* autogenerated by mkoffset.sh, do not edit */" >$fn +echo "/* target endianess: $ENDIAN, compiled with: $CC $CFLAGS */" >>$fn +# output offsets +get_define OFS_Pico_ Pico video reg ; echo "$line" >>$fn +get_define OFS_Pico_ Pico m rotate ; echo "$line" >>$fn +get_define OFS_Pico_ Pico m z80Run ; echo "$line" >>$fn +get_define OFS_Pico_ Pico m dirtyPal ; echo "$line" >>$fn +get_define OFS_Pico_ Pico m hardware ; echo "$line" >>$fn +get_define OFS_Pico_ Pico m z80_reset ; echo "$line" >>$fn +get_define OFS_Pico_ Pico m sram_reg ; echo "$line" >>$fn +get_define OFS_Pico_ Pico sv ; echo "$line" >>$fn +get_define OFS_Pico_ Pico sv data ; echo "$line" >>$fn +get_define OFS_Pico_ Pico sv start ; echo "$line" >>$fn +get_define OFS_Pico_ Pico sv end ; echo "$line" >>$fn +get_define OFS_Pico_ Pico sv flags ; echo "$line" >>$fn +get_define OFS_Pico_ Pico rom ; echo "$line" >>$fn +get_define OFS_Pico_ Pico romsize ; echo "$line" >>$fn +get_define OFS_Pico_ Pico est ; echo "$line" >>$fn + +get_define OFS_EST_ PicoEState DrawScanline ; echo "$line" >>$fn +get_define OFS_EST_ PicoEState rendstatus ; echo "$line" >>$fn +get_define OFS_EST_ PicoEState DrawLineDest ; echo "$line" >>$fn +get_define OFS_EST_ PicoEState HighCol ; echo "$line" >>$fn +get_define OFS_EST_ PicoEState HighPreSpr ; echo "$line" >>$fn +get_define OFS_EST_ PicoEState Pico ; echo "$line" >>$fn +get_define OFS_EST_ PicoEState PicoMem_vram ; echo "$line" >>$fn +get_define OFS_EST_ PicoEState PicoMem_cram ; echo "$line" >>$fn +get_define OFS_EST_ PicoEState PicoOpt ; echo "$line" >>$fn +get_define OFS_EST_ PicoEState Draw2FB ; echo "$line" >>$fn +get_define OFS_EST_ PicoEState HighPal ; echo "$line" >>$fn + +get_define OFS_PMEM_ PicoMem vram ; echo "$line" >>$fn +get_define OFS_PMEM_ PicoMem vsram ; echo "$line" >>$fn From 064cc6d1037fd74b3a1b0062bbe05d1211994684 Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 22 Mar 2019 20:18:33 +0100 Subject: [PATCH 0172/1110] kludges for wwf raw, nfl --- cpu/sh2/compiler.c | 8 ++++++++ pico/32x/memory.c | 15 +++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index c6522f37..aa3e772c 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -4233,6 +4233,14 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, if (opd->imm < end_pc + MAX_LITERAL_OFFSET) { if (end_literals < opd->imm + opd->size * 2) end_literals = opd->imm + opd->size * 2; + if (opd->size == 2) { + // tweak for NFL: treat a 32bit literal as an address and check if it + // points to the literal space. In that case handle it like MOVA. + tmp = FETCH32(opd->imm) & ~0x20000000; // MUST ignore wt bit here + if (tmp >= end_pc && tmp < end_pc + MAX_LITERAL_OFFSET) + if (lowest_mova == 0 || tmp < lowest_mova) + lowest_mova = tmp; + } } } } diff --git a/pico/32x/memory.c b/pico/32x/memory.c index d815853d..c6b89a22 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -1487,6 +1487,20 @@ static void REGPARM(3) sh2_write16_da(u32 a, u32 d, SH2 *sh2) ((u16 *)sh2->data_array)[a1 / 2] = d; } +static void REGPARM(3) sh2_write16_rom(u32 a, u32 d, SH2 *sh2) +{ + u32 a1 = a & 0x3fffff; + // tweak for WWF Raw: does writes to ROM area, and it doesn't work without + // allowing this. + // Presumably the write goes to the CPU cache and is read back from there, + // but it would be extremely costly to emulate cache behaviour. Just allow + // writes to that region, hoping that the original ROM values are never used. + if ((a1 & 0x3e0000) == 0x3e0000) + ((u16 *)sh2->p_rom)[a1 / 2] = d; + else + sh2_write16_unmapped(a, d, sh2); +} + typedef u32 (sh2_read_handler)(u32 a, SH2 *sh2); typedef void REGPARM(3) (sh2_write_handler)(u32 a, u32 d, SH2 *sh2); @@ -1911,6 +1925,7 @@ void PicoMemSetup32x(void) bank_switch_rom_sh2(); sh2_read8_map[0x02/2].mask = sh2_read8_map[0x22/2].mask = sh2_read16_map[0x02/2].mask = sh2_read16_map[0x22/2].mask = 0x3fffff; // FIXME + sh2_write16_map[0x02/2] = sh2_write16_map[0x22/2] = sh2_write16_rom; // CS2 - DRAM - done by Pico32xSwapDRAM() sh2_read8_map[0x04/2].mask = sh2_read8_map[0x24/2].mask = sh2_read16_map[0x04/2].mask = sh2_read16_map[0x24/2].mask = 0x01ffff; From 23eef37f25adf19b092fc0fa06e26abe1f3eb355 Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 22 Mar 2019 23:02:11 +0100 Subject: [PATCH 0173/1110] revamped 32X draw arm asm code --- Makefile | 2 +- pico/32x/{draw_arm.s => draw_arm.S} | 46 ++++++++++++++++------------- 2 files changed, 26 insertions(+), 22 deletions(-) rename pico/32x/{draw_arm.s => draw_arm.S} (90%) diff --git a/Makefile b/Makefile index a46c2a43..a0e63a47 100644 --- a/Makefile +++ b/Makefile @@ -47,7 +47,7 @@ asm_ym2612 ?= 1 asm_misc ?= 1 asm_cdmemory ?= 1 asm_mix ?= 1 -asm_32xdraw ?= 0 # currently defunct +asm_32xdraw ?= 1 else # if not arm use_fame ?= 1 use_cz80 ?= 1 diff --git a/pico/32x/draw_arm.s b/pico/32x/draw_arm.S similarity index 90% rename from pico/32x/draw_arm.s rename to pico/32x/draw_arm.S index ba66fbf1..e91f9893 100644 --- a/pico/32x/draw_arm.s +++ b/pico/32x/draw_arm.S @@ -6,9 +6,10 @@ @* See COPYING file in the top-level directory. @* +#include "pico/pico_int_o32.h" + .extern Pico32x -.extern PicoDraw2FB -.extern HighPal +.extern Pico .equiv P32XV_PRI, (1<< 7) @@ -22,11 +23,11 @@ Pico32xNativePal: .align 2 -.macro call_scan_prep cond +.macro call_scan_prep cond est @ &Pico.est .if \cond ldr r4, =PicoScan32xBegin ldr r5, =PicoScan32xEnd - ldr r6, =DrawLineDest + ldr r6, [\est, #OFS_EST_DrawLineDest] ldr r4, [r4] ldr r5, [r5] stmfd sp!, {r4,r5,r6} @@ -70,19 +71,20 @@ Pico32xNativePal: \name: stmfd sp!, {r4-r11,lr} + ldr lr,=Pico ldr r10,=Pico32x - ldr r11,=PicoDraw2FB + ldr r11, [lr, #OFS_Pico_est+OFS_EST_Draw2FB] ldr r10,[r10, #0x40] @ Pico32x.vdp_regs[0] - ldr r11,[r11] - ldr r9, =HighPal @ palmd + add r9, lr, #OFS_Pico_est+OFS_EST_HighPal @ palmd + and r4, r2, #0xff mov r5, #328 - lsl r3, #26 @ mdbg << 26 + mov r3, r3, lsl #26 @ mdbg << 26 mla r11,r4,r5,r11 @ r11 = pmd = PicoDraw2FB + offs*328: md data tst r10,#P32XV_PRI moveq r10,#0 movne r10,#0x8000 @ r10 = inv_bit - call_scan_prep \call_scan + call_scan_prep \call_scan lr mov r4, #0 @ line b 1f @ loop_outer_entry @@ -139,16 +141,17 @@ Pico32xNativePal: \name: stmfd sp!, {r4-r11,lr} - ldr r11,=PicoDraw2FB + ldr lr,=Pico ldr r10,=Pico32xNativePal - ldr r11,[r11] + ldr r11, [lr, #OFS_Pico_est+OFS_EST_Draw2FB] ldr r10,[r10] - ldr r9, =HighPal @ palmd + add r9, lr, #OFS_Pico_est+OFS_EST_HighPal @ palmd + and r4, r2, #0xff mov r5, #328 - lsl r3, #26 @ mdbg << 26 + mov r3, r3, lsl #26 @ mdbg << 26 mla r11,r4,r5,r11 @ r11 = pmd = PicoDraw2FB + offs*328: md data - call_scan_prep \call_scan + call_scan_prep \call_scan lr mov r4, #0 @ line b 1f @ loop_outer_entry @@ -179,8 +182,8 @@ Pico32xNativePal: ldrneb r8, [r5, #2]! @ r7,r8 - pixel 0,1 index subs r6, r6, #1 blt 0b @ loop_outer - cmp r7, r8 - beq 5f @ check_fill @ +8 +@ cmp r7, r8 @ is this really improving things? +@ beq 5f @ check_fill @ +8 3: @ no_fill: mov r12,r7, lsl #1 @@ -297,16 +300,17 @@ Pico32xNativePal: \name: stmfd sp!, {r4-r11,lr} - ldr r11,=PicoDraw2FB + ldr lr,=Pico ldr r10,=Pico32xNativePal - ldr r11,[r11] + ldr r11, [lr, #OFS_Pico_est+OFS_EST_Draw2FB] ldr r10,[r10] - ldr r9, =HighPal @ palmd + add r9, lr, #OFS_Pico_est+OFS_EST_HighPal @ palmd + and r4, r2, #0xff mov r5, #328 - lsl r3, #26 @ mdbg << 26 + mov r3, r3, lsl #26 @ mdbg << 26 mla r11,r4,r5,r11 @ r11 = pmd = PicoDraw2FB + offs*328: md data - call_scan_prep \call_scan + call_scan_prep \call_scan lr mov r4, #0 @ line b 1f @ loop_outer_entry From b1a047c9267065bb85095761a3192ce2874d7383 Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 22 Mar 2019 23:03:26 +0100 Subject: [PATCH 0174/1110] reworked palette and buffer handling due to some 32X bugs --- pico/32x/draw.c | 52 +++++++++----- pico/draw.c | 135 +++++++++++++++++++++++++++---------- pico/draw_arm.S | 22 ++---- pico/pico_int.h | 3 + pico/videoport.c | 4 +- platform/common/common.mak | 2 +- platform/common/emu.c | 2 + platform/gizmondo/emu.c | 25 ++++--- platform/gizmondo/menu.c | 4 +- platform/gp2x/emu.c | 76 +++++++++++---------- platform/linux/emu.c | 6 +- platform/psp/emu.c | 33 +++++++-- platform/psp/menu.c | 2 +- 13 files changed, 236 insertions(+), 130 deletions(-) diff --git a/pico/32x/draw.c b/pico/32x/draw.c index 2287e246..4bdbc89a 100644 --- a/pico/32x/draw.c +++ b/pico/32x/draw.c @@ -11,6 +11,9 @@ int (*PicoScan32xBegin)(unsigned int num); int (*PicoScan32xEnd)(unsigned int num); int Pico32xDrawMode; +void *DrawLineDestBase32x; +int DrawLineDestIncrement32x; + static void convert_pal555(int invert_prio) { unsigned int *ps = (void *)Pico32xMem->pal; @@ -228,13 +231,11 @@ void PicoDraw32xLayer(int offs, int lines, int md_bg) int lines_sft_offs; int which_func; - Pico.est.DrawLineDest = (char *)DrawLineDestBase + offs * DrawLineDestIncrement; + Pico.est.DrawLineDest = (char *)DrawLineDestBase32x + offs * DrawLineDestIncrement32x; dram = Pico32xMem->dram[Pico32x.vdp_regs[0x0a/2] & P32XV_FS]; - if (Pico32xDrawMode == PDM32X_BOTH) { - if (Pico.m.dirtyPal) - PicoDrawUpdateHighPal(); - } + if (Pico32xDrawMode == PDM32X_BOTH) + PicoDrawUpdateHighPal(); if ((Pico32x.vdp_regs[0] & P32XV_Mx) == 2) { @@ -273,20 +274,21 @@ do_it: void PicoDraw32xLayerMdOnly(int offs, int lines) { int have_scan = PicoScan32xBegin != NULL && PicoScan32xEnd != NULL; - unsigned short *dst = (void *)((char *)DrawLineDestBase + offs * DrawLineDestIncrement); + unsigned short *dst = (void *)((char *)DrawLineDestBase32x + offs * DrawLineDestIncrement32x); unsigned char *pmd = Pico.est.Draw2FB + 328 * offs + 8; unsigned short *pal = Pico.est.HighPal; int poffs = 0, plen = 320; int l, p; if (!(Pico.video.reg[12] & 1)) { - // 32col mode + // 32col mode. for some render modes MD pixel data carries an offset + if (!(PicoIn.opt & (POPT_ALT_RENDERER|POPT_DIS_32C_BORDER))) + pmd += 32; poffs = 32; plen = 256; } - if (Pico.m.dirtyPal) - PicoDrawUpdateHighPal(); + PicoDrawUpdateHighPal(); dst += poffs; for (l = 0; l < lines; l++) { @@ -300,7 +302,7 @@ void PicoDraw32xLayerMdOnly(int offs, int lines) dst[p + 2] = pal[*pmd++]; dst[p + 3] = pal[*pmd++]; } - dst = (void *)((char *)dst + DrawLineDestIncrement); + dst = (void *)((char *)dst + DrawLineDestIncrement32x); pmd += 328 - plen; if (have_scan) PicoScan32xEnd(l + offs); @@ -314,16 +316,32 @@ void PicoDrawSetOutFormat32x(pdso_t which, int use_32x_line_mode) Pico32xNativePal = Pico32xMem->pal_native; #endif - if (which == PDF_RGB555 && use_32x_line_mode) { - // we'll draw via FinalizeLine32xRGB555 (rare) + if (which == PDF_RGB555) { + // need CLUT pixels in PicoDraw2FB for layer transparency + PicoDrawSetInternalBuf(Pico.est.Draw2FB, 328); + PicoDrawSetOutBufMD(DrawLineDestBase32x, DrawLineDestIncrement32x); + } else { + // use the same layout as alt renderer PicoDrawSetInternalBuf(NULL, 0); - Pico32xDrawMode = PDM32X_OFF; - return; + PicoDrawSetOutBufMD(Pico.est.Draw2FB + 8, 328); } - // use the same layout as alt renderer - PicoDrawSetInternalBuf(Pico.est.Draw2FB, 328); - Pico32xDrawMode = (which == PDF_RGB555) ? PDM32X_32X_ONLY : PDM32X_BOTH; + if (use_32x_line_mode) + // we'll draw via FinalizeLine32xRGB555 (rare) + Pico32xDrawMode = PDM32X_OFF; + else + // in RGB555 mode the 32x layer is drawn over the MD layer, in the other + // modes 32x and MD layer are merged together by the 32x renderer + Pico32xDrawMode = (which == PDF_RGB555) ? PDM32X_32X_ONLY : PDM32X_BOTH; +} + +void PicoDrawSetOutBuf32X(void *dest, int increment) +{ + DrawLineDestBase32x = dest; + DrawLineDestIncrement32x = increment; + // in RGB555 mode this buffer is also used by the MD renderer + if (Pico32xDrawMode != PDM32X_BOTH) + PicoDrawSetOutBufMD(DrawLineDestBase32x, DrawLineDestIncrement32x); } // vim:shiftwidth=2:ts=2:expandtab diff --git a/pico/draw.c b/pico/draw.c index 680de3da..e345a28d 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -1239,6 +1239,49 @@ void BackFill(int reg7, int sh, struct PicoEState *est) // -------------------------------------------- +void PicoDoHighPal555_8bit(int sh, int line, struct PicoEState *est) +{ + unsigned int *spal, *dpal; + unsigned int cnt = (sh ? 1 : est->SonicPalCount+1); + unsigned int t, i; + + // reset dirty only if there are no outstanding changes + if (Pico.m.dirtyPal == 2) + Pico.m.dirtyPal = 0; + + // In Sonic render mode palettes were backuped in SonicPal + spal = (void *)est->SonicPal; + dpal = (void *)est->HighPal; + + // additional palettes stored after in-frame changes + for (i = 0; i < cnt * 0x40 / 2; i++) { + t = spal[i]; +#ifdef USE_BGR555 + t = ((t & 0x000e000e)<< 1) | ((t & 0x00e000e0)<<3) | ((t & 0x0e000e00)<<4); +#else + t = ((t & 0x000e000e)<<12) | ((t & 0x00e000e0)<<3) | ((t & 0x0e000e00)>>7); +#endif + // treat it like it was 4-bit per channel, since in s/h mode it somewhat is that. + // otherwise intensity difference between this and s/h will be wrong + t |= (t >> 4) & 0x08610861; // 0x18e318e3 + dpal[i] = t; + } + + // norm: xxx0, sh: 0xxx, hi: 0xxx + 7 + if (sh) + { + // shadowed pixels + for (i = 0; i < 0x40 / 2; i++) + dpal[0x40/2 | i] = dpal[0xc0/2 | i] = (dpal[i] >> 1) & 0x738e738e; + // hilighted pixels + for (i = 0; i < 0x40 / 2; i++) { + t = ((dpal[i] >> 1) & 0x738e738e) + 0x738e738e; // 0x7bef7bef; + t |= (t >> 4) & 0x08610861; + dpal[0x80/2 | i] = t; + } + } +} + #ifndef _ASM_DRAW_C void PicoDoHighPal555(int sh, int line, struct PicoEState *est) { @@ -1285,8 +1328,7 @@ void FinalizeLine555(int sh, int line, struct PicoEState *est) unsigned short *pal=est->HighPal; int len; - if (Pico.m.dirtyPal) - PicoDoHighPal555(sh, line, est); + PicoDrawUpdateHighPal(); if (Pico.video.reg[12]&1) { len = 320; @@ -1315,22 +1357,21 @@ void FinalizeLine555(int sh, int line, struct PicoEState *est) static void FinalizeLine8bit(int sh, int line, struct PicoEState *est) { unsigned char *pd = est->DrawLineDest; - int len, rs = est->rendstatus; - static int dirty_count; + int len; + static int dirty_line; - if (!sh && Pico.m.dirtyPal == 1) + if (Pico.m.dirtyPal == 1) { // a hack for mid-frame palette changes - if (!(rs & PDRAW_SONIC_MODE)) - dirty_count = 1; - else dirty_count++; - rs |= PDRAW_SONIC_MODE; - est->rendstatus = rs; - if (dirty_count == 3) { - blockcpy(est->HighPal, PicoMem.cram, 0x40*2); - } else if (dirty_count == 11) { - blockcpy(est->HighPal+0x40, PicoMem.cram, 0x40*2); + if (!(est->rendstatus & PDRAW_SONIC_MODE) || line - dirty_line > 4) { + // store a maximum of 3 additional palettes in SonicPal + if (est->SonicPalCount < 3) + est->SonicPalCount ++; + dirty_line = line; + est->rendstatus |= PDRAW_SONIC_MODE; } + blockcpy(est->SonicPal+est->SonicPalCount*0x40, PicoMem.cram, 0x40*2); + Pico.m.dirtyPal = 2; } if (Pico.video.reg[12]&1) { @@ -1341,12 +1382,9 @@ static void FinalizeLine8bit(int sh, int line, struct PicoEState *est) len = 256; } - if (!sh && (rs & PDRAW_SONIC_MODE)) { - if (dirty_count >= 11) { - blockcpy_or(pd, est->HighCol+8, len, 0x80); - } else { - blockcpy_or(pd, est->HighCol+8, len, 0x40); - } + if (!sh && (est->rendstatus & PDRAW_SONIC_MODE)) { + // select active backup palette + blockcpy_or(pd, est->HighCol+8, len, est->SonicPalCount*0x40); } else { blockcpy(pd, est->HighCol+8, len); } @@ -1478,6 +1516,7 @@ static int DrawDisplay(int sh) PICO_INTERNAL void PicoFrameStart(void) { int offs = 8, lines = 224; + int dirty = ((Pico.est.rendstatus & PDRAW_SONIC_MODE) || Pico.m.dirtyPal); // prepare to do this frame Pico.est.rendstatus = 0; @@ -1503,11 +1542,16 @@ PICO_INTERNAL void PicoFrameStart(void) Pico.est.DrawScanline = 0; skip_next_line = 0; + if (FinalizeLine == FinalizeLine8bit) { + // make a backup of the current palette in case Sonic mode is detected later + Pico.est.SonicPalCount = 0; + Pico.m.dirtyPal = (dirty ? 2 : 0); // mark as dirty but already copied + blockcpy(Pico.est.SonicPal, PicoMem.cram, 0x40*2); + } + if (PicoIn.opt & POPT_ALT_RENDERER) return; - if (Pico.m.dirtyPal) - Pico.m.dirtyPal = 2; // reset dirty if needed PrepareSprites(1); } @@ -1598,15 +1642,21 @@ void PicoDrawSync(int to, int blank_last_line) void PicoDrawUpdateHighPal(void) { struct PicoEState *est = &Pico.est; - int sh = (Pico.video.reg[0xC] & 8) >> 3; // shadow/hilight? - if (PicoIn.opt & POPT_ALT_RENDERER) - sh = 0; // no s/h support + if (Pico.m.dirtyPal) { + int sh = (Pico.video.reg[0xC] & 8) >> 3; // shadow/hilight? + if ((PicoIn.opt & POPT_ALT_RENDERER) | (est->rendstatus & PDRAW_SONIC_MODE)) + sh = 0; // no s/h support - PicoDoHighPal555(sh, 0, &Pico.est); - if (est->rendstatus & PDRAW_SONIC_MODE) { - // FIXME? - memcpy(est->HighPal + 0x40, est->HighPal, 0x40*2); - memcpy(est->HighPal + 0x80, est->HighPal, 0x40*2); + if (FinalizeLine == FinalizeLine8bit) + PicoDoHighPal555_8bit(sh, 0, est); + else + PicoDoHighPal555(sh, 0, est); + + // cover for sprite priority bits if not in s/h or sonic mode + if (!sh && !(est->rendstatus & PDRAW_SONIC_MODE)) { + blockcpy(est->HighPal+0x40, est->HighPal, 0x40*2); + blockcpy(est->HighPal+0x80, est->HighPal, 0x80*2); + } } } @@ -1629,17 +1679,33 @@ void PicoDrawSetOutFormat(pdso_t which, int use_32x_line_mode) FinalizeLine = NULL; break; } - PicoDrawSetOutFormat32x(which, use_32x_line_mode); + if (PicoIn.AHW & PAHW_32X) + PicoDrawSetOutFormat32x(which, use_32x_line_mode); PicoDrawSetOutputMode4(which); rendstatus_old = -1; } +void PicoDrawSetOutBufMD(void *dest, int increment) +{ + if (dest != NULL) { + DrawLineDestBase = dest; + DrawLineDestIncrement = increment; + Pico.est.DrawLineDest = DrawLineDestBase + Pico.est.DrawScanline * increment; + } + else { + DrawLineDestBase = DefOutBuff; + DrawLineDestIncrement = 0; + Pico.est.DrawLineDest = DefOutBuff; + } +} + // note: may be called on the middle of frame void PicoDrawSetOutBuf(void *dest, int increment) { - DrawLineDestBase = dest; - DrawLineDestIncrement = increment; - Pico.est.DrawLineDest = (char *)DrawLineDestBase + Pico.est.DrawScanline * increment; + if (PicoIn.AHW & PAHW_32X) + PicoDrawSetOutBuf32X(dest, increment); + else + PicoDrawSetOutBufMD(dest, increment); } void PicoDrawSetInternalBuf(void *dest, int increment) @@ -1652,6 +1718,7 @@ void PicoDrawSetInternalBuf(void *dest, int increment) else { HighColBase = DefHighCol; HighColIncrement = 0; + Pico.est.HighCol = DefHighCol; } } diff --git a/pico/draw_arm.S b/pico/draw_arm.S index 29af1c13..3bc27033 100644 --- a/pico/draw_arm.S +++ b/pico/draw_arm.S @@ -1498,11 +1498,9 @@ vidConvCpyRGB565: @ void *to, void *from, int pixels PicoDoHighPal555: stmfd sp!, {r4-r10,lr} mov r10,r2 @ est - mov r1, #0 ldr r8, [r10, #OFS_EST_Pico] -PicoDoHighPal555_nopush: - orr r9, r1, r0, lsl #31 @ 0:called from FinalizeLine555, 31: s/h + mov r9, r0 add r0, r10, #OFS_EST_HighPal @@ -1517,7 +1515,7 @@ PicoDoHighPal555_nopush: vidConvCpyRGB565_local - tst r9, #(1<<31) + cmp r9, #0 beq PicoDoHighPal555_end add r3, r10, #OFS_EST_HighPal @@ -1560,11 +1558,7 @@ PicoDoHighPal555_nopush: mov r0, #1 PicoDoHighPal555_end: - tst r9, #1 - ldmeqfd sp!, {r4-r10,pc} - - ldr r8, [r10, #OFS_EST_Pico] - b FinalizeLineRGB555_pal_done + ldmfd sp!, {r4-r10,pc} @ void FinalizeLine555(int sh, int line, struct PicoEState *est) @@ -1576,19 +1570,11 @@ FinalizeLine555: mov r10,r2 @ est ldr r8, [r10, #OFS_EST_Pico] - ldrb r2, [r8, #OFS_Pico_m_dirtyPal] - mov r1, #1 - tst r2, r2 - bne PicoDoHighPal555_nopush + bl PicoDrawUpdateHighPal -FinalizeLineRGB555_pal_done: add r3, r10, #OFS_EST_HighPal - ldr r12, [r10, #OFS_EST_rendstatus] - eors r0, r0, #1 @ sh is 0 mov lr, #0xff - tstne r12,#PDRAW_ACC_SPRITES - movne lr, #0x3f ldr r1, [r10, #OFS_EST_HighCol] ldr r0, [r10, #OFS_EST_DrawLineDest] diff --git a/pico/pico_int.h b/pico/pico_int.h index cca7f954..f6d8b37f 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -356,6 +356,8 @@ struct PicoEState unsigned int *PicoOpt; unsigned char *Draw2FB; unsigned short HighPal[0x100]; + unsigned short SonicPal[0x100]; + int SonicPalCount; }; struct PicoMem @@ -923,6 +925,7 @@ void p32x_sh2_poll_event(SH2 *sh2, unsigned int flags, unsigned int m68k_cycles) // 32x/draw.c void PicoDrawSetOutFormat32x(pdso_t which, int use_32x_line_mode); +void PicoDrawSetOutBuf32X(void *dest, int increment); void FinalizeLine32xRGB555(int sh, int line, struct PicoEState *est); void PicoDraw32xLayer(int offs, int lines, int mdbg); void PicoDraw32xLayerMdOnly(int offs, int lines); diff --git a/pico/videoport.c b/pico/videoport.c index cd76dc04..d18c2cf9 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -41,7 +41,7 @@ static void VideoWrite(u16 d) if (a - ((unsigned)(Pico.video.reg[5]&0x7f) << 9) < 0x400) Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; break; - case 3: Pico.m.dirtyPal = 1; + case 3: if (PicoMem.cram [(a >> 1) & 0x3f] != d) Pico.m.dirtyPal = 1; PicoMem.cram [(a >> 1) & 0x3f] = d; break; case 5: PicoMem.vsram[(a >> 1) & 0x3f] = d; break; case 0x81: @@ -441,7 +441,7 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) break; case 0x0c: // renderers should update their palettes if sh/hi mode is changed - if ((d^dold)&8) Pico.m.dirtyPal = 2; + if ((d^dold)&8) Pico.m.dirtyPal = 1; break; } return; diff --git a/platform/common/common.mak b/platform/common/common.mak index 197e8677..2f676abc 100644 --- a/platform/common/common.mak +++ b/platform/common/common.mak @@ -70,7 +70,7 @@ SRCS_COMMON += $(R)pico/cd/memory_arm.S endif ifeq "$(asm_32xdraw)" "1" DEFINES += _ASM_32X_DRAW -SRCS_COMMON += $(R)pico/32x/draw_arm.s +SRCS_COMMON += $(R)pico/32x/draw_arm.S endif ifeq "$(asm_mix)" "1" SRCS_COMMON += $(R)pico/sound/mix_arm.S diff --git a/platform/common/emu.c b/platform/common/emu.c index 0a9f0890..da03bff1 100644 --- a/platform/common/emu.c +++ b/platform/common/emu.c @@ -1407,8 +1407,10 @@ void emu_loop(void) { notice_msg_time = 0; plat_status_msg_clear(); +#ifndef __GP2X__ plat_video_flip(); plat_status_msg_clear(); /* Do it again in case of double buffering */ +#endif notice_msg = NULL; } else { diff --git a/platform/gizmondo/emu.c b/platform/gizmondo/emu.c index 86c473c2..fcf27125 100644 --- a/platform/gizmondo/emu.c +++ b/platform/gizmondo/emu.c @@ -155,7 +155,7 @@ static void blit(const char *fps, const char *notice) } // a hack for VR if (PicoIn.AHW & PAHW_SVP) - memset32((int *)(Pico.est.Draw2FB+328*8+328*223), 0xe0e0e0e0, 328); + memset((int *)(Pico.est.Draw2FB+328*8+328*223), 0xe0e0e0e0, 328*4); if (!(Pico.video.reg[12]&1)) lines_flags|=0x10000; if (currentConfig.EmuOpt&0x4000) lines_flags|=0x40000; // (Pico.m.frame_count&1)?0x20000:0x40000; @@ -166,22 +166,25 @@ static void blit(const char *fps, const char *notice) int lines_flags; // 8bit accurate renderer if (Pico.m.dirtyPal) { - Pico.m.dirtyPal = 0; - vidConvCpyRGB565(localPal, Pico.cram, 0x40); + if (Pico.m.dirtyPal == 2) + Pico.m.dirtyPal = 0; + /* no support + switch (Pico.est.SonicPalCount) { + case 3: vidConvCpyRGB565(localPal+0xc0, Pico.est.SonicPal+0xc0, 0x40); + case 2: vidConvCpyRGB565(localPal+0x80, Pico.est.SonicPal+0x80, 0x40); + case 1: vidConvCpyRGB565(localPal+0x40, Pico.est.SonicPal+0x40, 0x40); + default://vidConvCpyRGB565(localPal, Pico.est.SonicPal, 0x40); + } */ + vidConvCpyRGB565(localPal, Pico.est.SonicPal, 0x40); if (Pico.video.reg[0xC]&8) { // shadow/hilight mode - //vidConvCpyRGB32sh(localPal+0x40, Pico.cram, 0x40); - //vidConvCpyRGB32hi(localPal+0x80, Pico.cram, 0x40); // TODO? - memcpy32((void *)(localPal+0xc0), (void *)(localPal+0x40), 0x40*2/4); + //vidConvCpyRGB32sh(localPal+0x40, Pico.est.SonicPal, 0x40); + //vidConvCpyRGB32hi(localPal+0x80, Pico.est.SonicPal, 0x40); // TODO? + memcpy((void *)(localPal+0xc0), (void *)(localPal+0x40), 0x40*2); localPal[0xc0] = 0x0600; localPal[0xd0] = 0xc000; localPal[0xe0] = 0x0000; // reserved pixels for OSD localPal[0xf0] = 0xffff; } - /* no support - else if (rendstatus & 0x20) { // mid-frame palette changes - vidConvCpyRGB565(localPal+0x40, HighPal, 0x40); - vidConvCpyRGB565(localPal+0x80, HighPal+0x40, 0x40); - } */ } lines_flags = (Pico.video.reg[1]&8) ? 240 : 224; if (!(Pico.video.reg[12]&1)) lines_flags|=0x10000; diff --git a/platform/gizmondo/menu.c b/platform/gizmondo/menu.c index 51f032f0..1045f47b 100644 --- a/platform/gizmondo/menu.c +++ b/platform/gizmondo/menu.c @@ -54,7 +54,7 @@ static unsigned int inp_prev = 0; void menu_draw_begin(int use_bgbuff) { if (use_bgbuff) - memcpy32((int *)menu_screen, (int *)bg_buffer, 321*240*2/4); + memcpy((int *)menu_screen, (int *)bg_buffer, 321*240*2); } @@ -66,7 +66,7 @@ void menu_draw_end(void) lprintf("%s: Framework2D_LockBuffer() returned NULL\n", __FUNCTION__); return; } - memcpy32(giz_screen, (int *)menu_screen, 321*240*2/4); + memcpy(giz_screen, (int *)menu_screen, 321*240*2); fb_unlock(); giz_screen = NULL; fb_flip(); diff --git a/platform/gp2x/emu.c b/platform/gp2x/emu.c index 18d8a57e..450ac080 100644 --- a/platform/gp2x/emu.c +++ b/platform/gp2x/emu.c @@ -291,32 +291,45 @@ static int EmuScanEnd16_ld(unsigned int num) } static int localPal[0x100]; +static int localPalSize; + static void (*vidcpyM2)(void *dest, void *src, int m32col, int with_32c_border); static int (*make_local_pal)(int fast_mode); static int make_local_pal_md(int fast_mode) { - int pallen = 0xc0; + int pallen = 0x100; - bgr444_to_rgb32(localPal, PicoMem.cram); - if (fast_mode) - return 0x40; - - if (Pico.video.reg[0xC] & 8) { // shadow/hilight mode - bgr444_to_rgb32_sh(localPal, PicoMem.cram); - localPal[0xc0] = 0x0000c000; - localPal[0xd0] = 0x00c00000; - localPal[0xe0] = 0x00000000; // reserved pixels for OSD - localPal[0xf0] = 0x00ffffff; - pallen = 0x100; + if (fast_mode) { + bgr444_to_rgb32(localPal, PicoMem.cram); + pallen = 0x40; + Pico.m.dirtyPal = 0; } else if (Pico.est.rendstatus & PDRAW_SONIC_MODE) { // mid-frame palette changes - bgr444_to_rgb32(localPal+0x40, Pico.est.HighPal); - bgr444_to_rgb32(localPal+0x80, Pico.est.HighPal+0x40); + switch (Pico.est.SonicPalCount) { + case 3: bgr444_to_rgb32(localPal+0xc0, Pico.est.SonicPal+0xc0); + case 2: bgr444_to_rgb32(localPal+0x80, Pico.est.SonicPal+0x80); + case 1: bgr444_to_rgb32(localPal+0x40, Pico.est.SonicPal+0x40); + default:bgr444_to_rgb32(localPal, Pico.est.SonicPal); + } + pallen = (Pico.est.SonicPalCount+1)*0x40; } - else - memcpy(localPal + 0x80, localPal, 0x40 * 4); // for spr prio mess + else if (Pico.video.reg[0xC] & 8) { // shadow/hilight mode + bgr444_to_rgb32(localPal, Pico.est.SonicPal); + bgr444_to_rgb32_sh(localPal, Pico.est.SonicPal); + } + else { + bgr444_to_rgb32(localPal, Pico.est.SonicPal); + memcpy(localPal+0x40, localPal, 0x40*4); // for spr prio mess + memcpy(localPal+0x80, localPal, 0x80*4); // for spr prio mess + } + localPal[0xc0] = 0x0000c000; + localPal[0xd0] = 0x00c00000; + localPal[0xe0] = 0x00000000; // reserved pixels for OSD + localPal[0xf0] = 0x00ffffff; + if (Pico.m.dirtyPal == 2) + Pico.m.dirtyPal = 0; return pallen; } @@ -334,25 +347,21 @@ static int make_local_pal_sms(int fast_mode) *dpal++ = t; } + Pico.m.dirtyPal = 0; return 0x40; } void pemu_finalize_frame(const char *fps, const char *notice) { int emu_opt = currentConfig.EmuOpt; - int ret; if (PicoIn.AHW & PAHW_32X) - ; // nothing to do + localPalSize = 0; // nothing to do else if (get_renderer() == RT_8BIT_FAST) { // 8bit fast renderer - if (Pico.m.dirtyPal) { - Pico.m.dirtyPal = 0; - ret = make_local_pal(1); - // feed new palette to our device - gp2x_video_setpalette(localPal, ret); - } + if (Pico.m.dirtyPal) + localPalSize = make_local_pal(1); // a hack for VR if (PicoIn.AHW & PAHW_SVP) memset32((int *)(Pico.est.Draw2FB+328*8+328*223), 0xe0e0e0e0, 328); @@ -364,12 +373,9 @@ void pemu_finalize_frame(const char *fps, const char *notice) { // 8bit accurate renderer if (Pico.m.dirtyPal) - { - Pico.m.dirtyPal = 0; - ret = make_local_pal(0); - gp2x_video_setpalette(localPal, ret); - } + localPalSize = make_local_pal(0); } + else localPalSize = 0; // no palette in 16bit mode if (notice) osd_text(4, osd_y, notice); @@ -385,6 +391,10 @@ void plat_video_flip(void) { int stride = g_screen_width; gp2x_video_flip(); + // switching the palette takes immediate effect, whilst flipping only + // takes effect with the next vsync; unavoidable flicker may occur! + if (localPalSize) + gp2x_video_setpalette(localPal, localPalSize); if (is_16bit_mode()) stride *= 2; @@ -502,9 +512,6 @@ static void vid_reset_mode(void) if (renderer == RT_16BIT && (currentConfig.EmuOpt & EOPT_WIZ_TEAR_FIX)) { PicoDrawSetOutFormat(PDF_RGB555, 1); } - else { - PicoDrawSetOutFormat(PDF_NONE, 0); - } PicoDrawSetOutBuf(g_screen_ptr, g_screen_width * 2); gp2x_mode = 16; } @@ -537,10 +544,7 @@ static void vid_reset_mode(void) localPal[0xe0] = 0x00000000; // reserved pixels for OSD localPal[0xf0] = 0x00ffffff; gp2x_video_setpalette(localPal, 0x100); - gp2x_memset_all_buffers(0, 0xe0, 320*240); } - else - gp2x_memset_all_buffers(0, 0, 320*240*2); if (currentConfig.EmuOpt & EOPT_WIZ_TEAR_FIX) gp2x_mode = -gp2x_mode; @@ -723,6 +727,8 @@ void pemu_forced_frame(int no_scale, int do_emu) PicoDrawSetCallbacks(NULL, NULL); Pico.m.dirtyPal = 1; + if (!no_scale) + no_scale = currentConfig.scaling == EOPT_SCALE_NONE; emu_cmn_forced_frame(no_scale, do_emu); g_menubg_src_ptr = g_screen_ptr; diff --git a/platform/linux/emu.c b/platform/linux/emu.c index 5d4432fa..8af5afa8 100644 --- a/platform/linux/emu.c +++ b/platform/linux/emu.c @@ -71,8 +71,8 @@ void pemu_finalize_frame(const char *fps, const char *notice) unsigned char *ps = Pico.est.Draw2FB + 328*8 + 8; unsigned short *pal = Pico.est.HighPal; int i, x; - if (Pico.m.dirtyPal) - PicoDrawUpdateHighPal(); + + PicoDrawUpdateHighPal(); for (i = 0; i < 224; i++, ps += 8) for (x = 0; x < 320; x++) *pd++ = pal[*ps++]; @@ -109,6 +109,8 @@ static void apply_renderer(void) if (PicoIn.AHW & PAHW_32X) PicoDrawSetOutBuf(g_screen_ptr, g_screen_ppitch * 2); + + Pico.m.dirtyPal = 1; } void plat_video_toggle_renderer(int change, int is_menu) diff --git a/platform/psp/emu.c b/platform/psp/emu.c index 5c0cb57f..917ecc8d 100644 --- a/platform/psp/emu.c +++ b/platform/psp/emu.c @@ -201,13 +201,22 @@ static void do_pal_update(int allow_sh, int allow_as) //for (i = 0x3f/2; i >= 0; i--) // dpal[i] = ((spal[i]&0x000f000f)<< 1)|((spal[i]&0x00f000f0)<<3)|((spal[i]&0x0f000f00)<<4); - do_pal_convert(localPal, Pico.cram, currentConfig.gamma, currentConfig.gamma2); - - Pico.m.dirtyPal = 0; - need_pal_upload = 1; - - if (allow_sh && (Pico.video.reg[0xC]&8)) // shadow/hilight? + if ((currentConfig.EmuOpt&0x80) || (PicoOpt&0x10)) { + do_pal_convert(localPal, Pico.cram, currentConfig.gamma, currentConfig.gamma2); + Pico.m.dirtyPal = 0; + } + else if (Pico.est.rendstatus&0x20) { + switch (Pico.est.SonicPalCount) { + case 3: do_pal_convert(localPal+0xc0, Pico.est.SonicPal+0xc0, currentConfig.gamma, currentConfig.gamma2); + case 2: do_pal_convert(localPal+0x80, Pico.est.SonicPal+0x80, currentConfig.gamma, currentConfig.gamma2); + case 1: do_pal_convert(localPal+0x40, Pico.est.SonicPal+0x40, currentConfig.gamma, currentConfig.gamma2); + default:do_pal_convert(localPal, Pico.est.SonicPal, currentConfig.gamma, currentConfig.gamma2); + } + } + else if (allow_sh && (Pico.video.reg[0xC]&8)) // shadow/hilight? + { + do_pal_convert(localPal, Pico.est.SonicPal, currentConfig.gamma, currentConfig.gamma2); // shadowed pixels for (i = 0x3f/2; i >= 0; i--) dpal[0x20|i] = dpal[0x60|i] = (dpal[i]>>1)&0x7bcf7bcf; @@ -223,6 +232,16 @@ static void do_pal_update(int allow_sh, int allow_as) localPal[0xe0] = 0; localPal[0xf0] = 0x001f; } + else if (allow_as && (Pico.est.rendstatus & PDRAW_SPR_LO_ON_HI)) + { + do_pal_convert(localPal, Pico.est.SonicPal, currentConfig.gamma, currentConfig.gamma2); + memcpy((int *)dpal+0x40/2, (void *)localPal, 0x40*2); + memcpy((int *)dpal+0x80/2, (void *)localPal, 0x80*2); + } + + if (Pico.m.dirtyPal == 2) + Pico.m.dirtyPal = 0; + need_pal_upload = 1; } static void do_slowmode_lines(int line_to) @@ -639,7 +658,7 @@ static void writeSound(int len) PicoIn.sndOut += len / 2; /*if (PicoIn.sndOut > sndBuffer_endptr) { - memcpy32((int *)(void *)sndBuffer, (int *)endptr, (PicoIn.sndOut - endptr + 1) / 2); + memcpy((int *)(void *)sndBuffer, (int *)endptr, (PicoIn.sndOut - endptr + 1) * 2); PicoIn.sndOut = &sndBuffer[PicoIn.sndOut - endptr]; lprintf("mov\n"); } diff --git a/platform/psp/menu.c b/platform/psp/menu.c index ab022f97..fc31b8e7 100644 --- a/platform/psp/menu.c +++ b/platform/psp/menu.c @@ -59,7 +59,7 @@ void menu_draw_begin(void) // int i; // for (i = 272; i >= 0; i--, dst += 512, src += 480) - // memcpy32((int *)dst, (int *)src, 480*2/4); + // memcpy((int *)dst, (int *)src, 480*2); sceGuSync(0,0); // sync with prev sceGuStart(GU_DIRECT, guCmdList); From 32feba7458a9497f27d72e219cf177774c09ce45 Mon Sep 17 00:00:00 2001 From: kub Date: Mon, 25 Mar 2019 19:31:32 +0100 Subject: [PATCH 0175/1110] minor changes --- pico/32x/32x.c | 4 +- pico/m68kif_cyclone.s | 8 +-- pico/pico_int.h | 3 +- pico/sms.c | 2 +- platform/common/helix/lib.c | 67 +------------------ platform/common/memcpy.c | 125 ++++++++++++++++++++++++++++++++++++ platform/common/plat_sdl.c | 9 ++- platform/common/version.h | 2 +- tools/mkoffsets.sh | 9 ++- 9 files changed, 151 insertions(+), 78 deletions(-) create mode 100644 platform/common/memcpy.c diff --git a/pico/32x/32x.c b/pico/32x/32x.c index 3ee8c2ea..a15cb112 100644 --- a/pico/32x/32x.c +++ b/pico/32x/32x.c @@ -194,11 +194,11 @@ void PicoPower32x(void) void PicoUnload32x(void) { + sh2_finish(&msh2); + sh2_finish(&ssh2); if (Pico32xMem != NULL) plat_munmap(Pico32xMem, sizeof(*Pico32xMem)); Pico32xMem = NULL; - sh2_finish(&msh2); - sh2_finish(&ssh2); PicoIn.AHW &= ~PAHW_32X; } diff --git a/pico/m68kif_cyclone.s b/pico/m68kif_cyclone.s index a0a508cd..3a9621dc 100644 --- a/pico/m68kif_cyclone.s +++ b/pico/m68kif_cyclone.s @@ -87,19 +87,19 @@ cyclone_fetch32: orrcc r0, r1, r0, lsl #16 bxcc lr - stmfd sp!,{r0,r1,lr} + stmfd sp!,{r0,r1,r2,lr} mov lr, pc bx r1 mov r2, r0, lsl #16 - ldmia sp, {r0,r1} + ldmfd sp!, {r0,r1} str r2, [sp] add r0, r0, #2 mov lr, pc bx r1 - ldr r1, [sp] + ldmfd sp!, {r1,lr} mov r0, r0, lsl #16 orr r0, r1, r0, lsr #16 - ldmfd sp!,{r1,r2,pc} + bx lr cyclone_write8: @ u32 a, u8 d diff --git a/pico/pico_int.h b/pico/pico_int.h index f6d8b37f..4d599ce8 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -241,7 +241,7 @@ extern SH2 sh2s[2]; # define sh2_pc(sh2) (sh2)->pc #endif -#define sh2_cycles_done(sh2) ((unsigned)(sh2)->cycles_timeslice - sh2_cycles_left(sh2)) +#define sh2_cycles_done(sh2) (unsigned)((int)(sh2)->cycles_timeslice - sh2_cycles_left(sh2)) #define sh2_cycles_done_t(sh2) \ (unsigned)(C_M68K_TO_SH2(sh2, (sh2)->m68krcycles_done) + sh2_cycles_done(sh2)) #define sh2_cycles_done_m68k(sh2) \ @@ -650,6 +650,7 @@ PICO_INTERNAL void PicoFrameStart(void); void PicoDrawSync(int to, int blank_last_line); void BackFill(int reg7, int sh, struct PicoEState *est); void FinalizeLine555(int sh, int line, struct PicoEState *est); +void PicoDrawSetOutBufMD(void *dest, int increment); extern int (*PicoScanBegin)(unsigned int num); extern int (*PicoScanEnd)(unsigned int num); #define MAX_LINE_SPRITES 29 diff --git a/pico/sms.c b/pico/sms.c index 286b8bf1..2800e209 100644 --- a/pico/sms.c +++ b/pico/sms.c @@ -46,8 +46,8 @@ static void vdp_data_write(unsigned char d) struct PicoVideo *pv = &Pico.video; if (pv->type == 3) { + if (PicoMem.cram[pv->addr & 0x1f] != d) Pico.m.dirtyPal = 1; PicoMem.cram[pv->addr & 0x1f] = d; - Pico.m.dirtyPal = 1; } else { PicoMem.vramb[pv->addr] = d; } diff --git a/platform/common/helix/lib.c b/platform/common/helix/lib.c index d7c511be..d2b05898 100644 --- a/platform/common/helix/lib.c +++ b/platform/common/helix/lib.c @@ -53,70 +53,5 @@ void *memmove (void *dest, const void *src, size_t n) return dest; } #else -/* memcpy/memmove in C with some simple optimizations. - * ATTN does dirty aliasing tricks with undefined behaviour by standard. - * (this works fine with gcc, though...) - */ -void *memcpy(void *dest, const void *src, size_t n) -{ - struct _16 { uint32_t a[4]; }; - union { const void *v; char *c; uint64_t *l; struct _16 *s; } - ss = { src }, ds = { dest }; - const int lm = sizeof(uint32_t)-1; - - if ((((unsigned)ss.c ^ (unsigned)ds.c) & lm) == 0) { - /* fast copy if pointers have the same aligment */ - while (((unsigned)ss.c & lm) && n > 0) /* align to word */ - *ds.c++ = *ss.c++, n--; - while (n >= sizeof(struct _16)) /* copy 16 bytes blocks */ - *ds.s++ = *ss.s++, n -= sizeof(struct _16); - if (n >= sizeof(uint64_t)) /* copy leftover 8 byte block */ - *ds.l++ = *ss.l++, n -= sizeof(uint64_t); - } else { - /* byte copy if pointers are unaligned */ - while (n >= 8) { /* copy 8 byte blocks */ - *ds.c++ = *ss.c++, n--; *ds.c++ = *ss.c++, n--; - *ds.c++ = *ss.c++, n--; *ds.c++ = *ss.c++, n--; - *ds.c++ = *ss.c++, n--; *ds.c++ = *ss.c++, n--; - *ds.c++ = *ss.c++, n--; *ds.c++ = *ss.c++, n--; - } - } - /* copy max. 8 leftover bytes */ - while (n > 0) - *ds.c++ = *ss.c++, n--; - return dest; -} - -void *memmove (void *dest, const void *src, size_t n) -{ - struct _16 { uint32_t a[4]; }; - union { const void *v; char *c; uint64_t *l; struct _16 *s; } - ss = { src+n }, ds = { dest+n }; - const int lm = sizeof(uint32_t)-1; - - if (dest <= src || dest >= src+n) - return memcpy(dest, src, n); - - if ((((unsigned)ss.c ^ (unsigned)ds.c) & lm) == 0) { - /* fast copy if pointers have the same aligment */ - while (((unsigned)ss.c & lm) && n > 0) - *--ds.c = *--ss.c, n--; - while (n >= sizeof(struct _16)) - *--ds.s = *--ss.s, n -= sizeof(struct _16); - if (n >= sizeof(uint64_t)) - *--ds.l = *--ss.l, n -= sizeof(uint64_t); - } else { - /* byte copy if pointers are unaligned */ - while (n >= 8) { - *--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--; - *--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--; - *--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--; - *--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--; - } - } - /* copy max. 8 leftover bytes */ - while (n > 0) - *--ds.c = *--ss.c, n--; - return dest; -} +#include "../memcpy.c" #endif diff --git a/platform/common/memcpy.c b/platform/common/memcpy.c new file mode 100644 index 00000000..b99de4ae --- /dev/null +++ b/platform/common/memcpy.c @@ -0,0 +1,125 @@ +/* + * (C) 2018 Kai-Uwe Bloem + * + * 32bit ARM/MIPS optimized C implementation of memcpy and memove, designed for + * good performance with gcc. + * - if src and dest have the same alignment, 4-word copy is used. + * - if src and dest are unaligned to each other, still loads word data and + * stores correctly shifted word data (for all but the first and last bytes + * to avoid under/overstepping the src region). + * + * ATTN does dirty aliasing tricks with undefined behaviour by standard. + * (however, this was needed to improve the generated code). + * ATTN uses struct assignment, which only works if the compiler is inlining + * this (else it would probably call memcpy :-)). + */ +#include +#include + +#include +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define _L_ >> +#define _U_ << +#else +#define _L_ << +#define _U_ >> +#endif + +void *memcpy(void *dest, const void *src, size_t n) +{ + struct _16 { uint32_t a[4]; }; + union { const void *v; uint8_t *c; uint32_t *i; uint64_t *l; struct _16 *s; } + ss = { src }, ds = { dest }; + const int lm = sizeof(uint32_t)-1; + + /* align src to word */ + while (((unsigned)ss.c & lm) && n > 0) + *ds.c++ = *ss.c++, n--; + if (((unsigned)ds.c & lm) == 0) { + /* fast copy if pointers have the same aligment */ + while (n >= sizeof(struct _16)) /* copy 16 bytes blocks */ + *ds.s++ = *ss.s++, n -= sizeof(struct _16); + if (n >= sizeof(uint64_t)) /* copy leftover 8 byte block */ + *ds.l++ = *ss.l++, n -= sizeof(uint64_t); + } else if (n >= 2*sizeof(uint32_t)) { + /* unaligned data big enough to avoid overstepping src */ + uint32_t v1, v2, b, s; + /* align dest to word */ + while (((unsigned)ds.c & lm) && n > 0) + *ds.c++ = *ss.c++, n--; + /* copy loop: load aligned words and store shifted words */ + b = (unsigned)ss.c & lm, s = b*8; ss.c -= b; + v1 = *ss.i++, v2 = *ss.i++; + while (n >= 3*sizeof(uint32_t)) { + *ds.i++ = (v1 _L_ s) | (v2 _U_ (32-s)); v1 = *ss.i++; + *ds.i++ = (v2 _L_ s) | (v1 _U_ (32-s)); v2 = *ss.i++; + n -= 2*sizeof(uint32_t); + } + /* data for one more store is already loaded */ + if (n >= sizeof(uint32_t)) { + *ds.i++ = (v1 _L_ s) | (v2 _U_ (32-s)); + n -= sizeof(uint32_t); + ss.c += sizeof(uint32_t); + } + ss.c += b - 2*sizeof(uint32_t); + } + /* copy 0-7 leftover bytes */ + while (n >= 4) { + *ds.c++ = *ss.c++, n--; *ds.c++ = *ss.c++, n--; + *ds.c++ = *ss.c++, n--; *ds.c++ = *ss.c++, n--; + } + while (n > 0) + *ds.c++ = *ss.c++, n--; + return dest; +} + +void *memmove (void *dest, const void *src, size_t n) +{ + struct _16 { uint32_t a[4]; }; + union { const void *v; uint8_t *c; uint32_t *i; uint64_t *l; struct _16 *s; } + ss = { src+n }, ds = { dest+n }; + const int lm = sizeof(uint32_t)-1; + + if (dest <= src || dest >= src+n) + return memcpy(dest, src, n); + + /* align src to word */ + while (((unsigned)ss.c & lm) && n > 0) + *--ds.c = *--ss.c, n--; + if (((unsigned)ds.c & lm) == 0) { + /* fast copy if pointers have the same aligment */ + while (n >= sizeof(struct _16)) /* copy 16 byte blocks */ + *--ds.s = *--ss.s, n -= sizeof(struct _16); + if (n >= sizeof(uint64_t)) /* copy leftover 8 byte block */ + *--ds.l = *--ss.l, n -= sizeof(uint64_t); + } else if (n >= 2*sizeof(uint32_t)) { + /* unaligned data big enough to avoid understepping src */ + uint32_t v1, v2, b, s; + /* align dest to word */ + while (((unsigned)ds.c & lm) && n > 0) + *--ds.c = *--ss.c, n--; + /* copy loop: load aligned words and store shifted words */ + b = (unsigned)ss.c & lm, s = b*8; ss.c += b; + v1 = *--ss.i, v2 = *--ss.i; + while (n >= 3*sizeof(uint32_t)) { + *--ds.i = (v1 _U_ s) | (v2 _L_ (32-s)); v1 = *--ss.i; + *--ds.i = (v2 _U_ s) | (v1 _L_ (32-s)); v2 = *--ss.i; + n -= 2*sizeof(uint32_t); + } + /* data for one more store is already loaded */ + if (n >= sizeof(uint32_t)) { + *--ds.i = (v1 _U_ s) | (v2 _L_ (32-s)); + n -= sizeof(uint32_t); + ss.c -= sizeof(uint32_t); + } + ss.c -= b - 2*sizeof(uint32_t); + } + /* copy 0-7 leftover bytes */ + while (n >= 4) { + *--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--; + *--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--; + } + while (n > 0) + *--ds.c = *--ss.c, n--; + return dest; +} diff --git a/platform/common/plat_sdl.c b/platform/common/plat_sdl.c index 4446f72e..ef99af2a 100644 --- a/platform/common/plat_sdl.c +++ b/platform/common/plat_sdl.c @@ -89,6 +89,7 @@ static const struct in_pdata in_sdl_platform_data = { /* YUV stuff */ static int yuv_ry[32], yuv_gy[32], yuv_by[32]; static unsigned char yuv_u[32 * 2], yuv_v[32 * 2]; +static int yuv_y[256]; void bgr_to_uyvy_init(void) { @@ -119,6 +120,10 @@ void bgr_to_uyvy_init(void) v = 255; yuv_v[i + 32] = v; } + // valid Y range seems to be 16..235 + for (i = 0; i < 256; i++) { + yuv_y[i] = 16 + 219 * i / 32; + } } void rgb565_to_uyvy(void *d, const void *s, int pixels) @@ -143,8 +148,8 @@ void rgb565_to_uyvy(void *d, const void *s, int pixels) u = yu[b0 - y0]; v = yv[r0 - y0]; // valid Y range seems to be 16..235 - y0 = 16 + 219 * y0 / 31; - y1 = 16 + 219 * y1 / 31; + y0 = yuv_y[y0]; + y1 = yuv_y[y1]; *dst = (y1 << 24) | (v << 16) | (y0 << 8) | u; } diff --git a/platform/common/version.h b/platform/common/version.h index f65ba1ed..8b3adbf8 100644 --- a/platform/common/version.h +++ b/platform/common/version.h @@ -1 +1 @@ -#define VERSION "1.93" +#define VERSION "1.93+" diff --git a/tools/mkoffsets.sh b/tools/mkoffsets.sh index 60088f21..90e65867 100755 --- a/tools/mkoffsets.sh +++ b/tools/mkoffsets.sh @@ -1,16 +1,21 @@ -# usage: mkoffsets # automatically compute structure offsets for gcc targets in ELF format +# (C) 2018 Kai-Uwe Bloem. This work is placed in the public domain. +# +# usage: mkoffsets CC=${CC:-gcc} # endianess of target (automagically determined below) ENDIAN= +# compile with target C compiler and extract value from .rodata section compile_rodata () { $CC $CFLAGS -I .. -c /tmp/getoffs.c -o /tmp/getoffs.o || exit 1 + # find the name of the .rodata section (in case -fdata-sections is used) rosect=$(readelf -S /tmp/getoffs.o | grep '\.rodata' | sed 's/^[^.]*././;s/ .*//') + # read out .rodata section as hex string (should be only 4 or 8 bytes) objcopy --dump-section $rosect=/tmp/getoffs.ro /tmp/getoffs.o || exit 1 ro=$(xxd -ps /tmp/getoffs.ro) if [ "$ENDIAN" = "le" ]; then @@ -22,9 +27,11 @@ compile_rodata () else hex=$ro fi + # extract decimal value from hex string rodata=$(printf "%d" 0x$hex) } +# determine member offset and create #define get_define () # prefix struct member member... { prefix=$1; shift From b804d9543be943a02714ce7fa74ef5a898c9f5a4 Mon Sep 17 00:00:00 2001 From: kub Date: Mon, 25 Mar 2019 20:23:04 +0100 Subject: [PATCH 0176/1110] DRC: reworked scan_block (fix register usage masks, better block and literals detection) --- cpu/drc/cmn.h | 2 +- cpu/drc/emit_x86.c | 2 +- cpu/sh2/compiler.c | 243 +++++++++++++++++++++++++++++---------------- 3 files changed, 162 insertions(+), 85 deletions(-) diff --git a/cpu/drc/cmn.h b/cpu/drc/cmn.h index 39463566..7d50d33d 100644 --- a/cpu/drc/cmn.h +++ b/cpu/drc/cmn.h @@ -5,7 +5,7 @@ typedef signed short s16; typedef unsigned int u32; typedef signed int s32; -#define DRC_TCACHE_SIZE (2*1024*1024) +#define DRC_TCACHE_SIZE (4*1024*1024) extern u8 *tcache; diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index e5f2adef..c5f4e865 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -686,7 +686,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; case 0: rd = xDI; break; \ case 1: rd = xSI; break; \ case 2: rd = xDX; break; \ - case 2: rd = xBX; break; \ + case 3: rd = xBX; break; \ } #define emith_sh2_drc_entry() { \ diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index aa3e772c..ece3b13e 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -43,9 +43,9 @@ #define MAX_BLOCK_SIZE (BLOCK_INSN_LIMIT * 6 * 6) // max literal offset from the block end -#define MAX_LITERAL_OFFSET 32*2 +#define MAX_LITERAL_OFFSET 0x200 // max. MOVA, MOV @(PC) offset #define MAX_LITERALS (BLOCK_INSN_LIMIT / 4) -#define MAX_LOCAL_BRANCHES 32 +#define MAX_LOCAL_BRANCHES (BLOCK_INSN_LIMIT / 4) // debug stuff // 01 - warnings/errors @@ -98,8 +98,10 @@ static int insns_compiled, hash_collisions, host_insn_count; #define BITMASK3(v0,v1,v2) (BITMASK2(v0,v1) | (1 << (v2))) #define BITMASK4(v0,v1,v2,v3) (BITMASK3(v0,v1,v2) | (1 << (v3))) #define BITMASK5(v0,v1,v2,v3,v4) (BITMASK4(v0,v1,v2,v3) | (1 << (v4))) +#define BITMASK6(v0,v1,v2,v3,v4,v5) (BITMASK5(v0,v1,v2,v3,v4) | (1 << (v5))) -#define SHR_T SHR_SR // might make them separate someday +#define SHR_T SHR_SR // might make them separate someday +#define SHR_MEM 31 static struct op_data { u8 op; @@ -115,6 +117,7 @@ static struct op_data { enum op_types { OP_UNHANDLED = 0, OP_BRANCH, + OP_BRANCH_N, // conditional known not to be taken OP_BRANCH_CT, // conditional, branch if T set OP_BRANCH_CF, // conditional, branch if T clear OP_BRANCH_R, // indirect @@ -125,6 +128,8 @@ enum op_types { OP_MOVA, OP_SLEEP, OP_RTE, + OP_TRAPA, + OP_UNDEFINED, }; #ifdef DRC_SH2 @@ -1672,6 +1677,9 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) tmp2 = ops[i-1].op == OP_BRANCH_CT ? DCOND_NE : DCOND_EQ; emith_move_r_imm_c(tmp2, tmp, ops[i-1].imm); break; + case OP_BRANCH_N: + emit_move_r_imm32(SHR_PC, pc); + break; // case OP_BRANCH_R OP_BRANCH_RF - PC already loaded } } @@ -1684,6 +1692,9 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) switch (opd->op) { + case OP_BRANCH_N: + goto end_op; + case OP_BRANCH: case OP_BRANCH_CT: case OP_BRANCH_CF: @@ -1734,6 +1745,32 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) drcf.pending_branch_indirect = 1; goto end_op; + case OP_UNDEFINED: + elprintf_sh2(sh2, EL_ANOMALY, + "drc: illegal op %04x @ %08x", op, pc - 2); + opd->imm = 4; + // fallthrough + case OP_TRAPA: + tmp = rcache_get_reg(SHR_SP, RC_GR_RMW); + emith_sub_r_imm(tmp, 4*2); + // push SR + tmp = rcache_get_reg_arg(0, SHR_SP); + emith_add_r_imm(tmp, 4); + tmp = rcache_get_reg_arg(1, SHR_SR); + emith_clear_msb(tmp, tmp, 22); + emit_memhandler_write(2); + // push PC + rcache_get_reg_arg(0, SHR_SP); + tmp = rcache_get_tmp_arg(1); + emith_move_r_imm(tmp, pc); + emit_memhandler_write(2); + // obtain new PC + emit_memhandler_read_rr(SHR_PC, SHR_VBR, opd->imm * 4, 2); + // indirect jump -> back to dispatcher + rcache_flush(); + emith_jump(sh2_drc_dispatcher); + goto end_op; + case OP_LOAD_POOL: #if PROPAGATE_CONSTANTS if (opd->imm != 0 && opd->imm < end_literals @@ -2610,26 +2647,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) tmp = (op & 0x300) >> 8; emit_memhandler_read_rr(SHR_R0, SHR_GBR, (op & 0xff) << tmp, tmp); goto end_op; - case 0x0300: // TRAPA #imm 11000011iiiiiiii - tmp = rcache_get_reg(SHR_SP, RC_GR_RMW); - emith_sub_r_imm(tmp, 4*2); - // push SR - tmp = rcache_get_reg_arg(0, SHR_SP); - emith_add_r_imm(tmp, 4); - tmp = rcache_get_reg_arg(1, SHR_SR); - emith_clear_msb(tmp, tmp, 22); - emit_memhandler_write(2); - // push PC - rcache_get_reg_arg(0, SHR_SP); - tmp = rcache_get_tmp_arg(1); - emith_move_r_imm(tmp, pc); - emit_memhandler_write(2); - // obtain new PC - emit_memhandler_read_rr(SHR_PC, SHR_VBR, (op & 0xff) * 4, 2); - // indirect jump -> back to dispatcher - rcache_flush(); - emith_jump(sh2_drc_dispatcher); - goto end_op; case 0x0800: // TST #imm,R0 11001000iiiiiiii tmp = rcache_get_reg(SHR_R0, RC_GR_READ); sr = rcache_get_reg(SHR_SR, RC_GR_RMW); @@ -3446,13 +3463,15 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, u16 *dr_pc_base; u32 pc, op, tmp; u32 end_pc, end_literals = 0; + u32 lowest_literal = 0; u32 lowest_mova = 0; struct op_data *opd; int next_is_delay = 0; int end_block = 0; int i, i_end; - memset(op_flags, 0, BLOCK_INSN_LIMIT); + memset(op_flags, 0, sizeof(*op_flags) * BLOCK_INSN_LIMIT); + op_flags[0] |= OF_BTARGET; // block start is always a target dr_pc_base = dr_get_pc_base(base_pc, is_slave); @@ -3473,6 +3492,9 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, } else if (end_block || i >= BLOCK_INSN_LIMIT - 2) break; + else if ((lowest_mova && lowest_mova <= pc) || + (lowest_literal && lowest_literal <= pc)) + break; // text area collides with data area op = FETCH_OP(pc); switch ((op & 0xf000) >> 12) @@ -3506,18 +3528,22 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, // BSRF Rm 0000mmmm00000011 opd->op = OP_BRANCH_RF; opd->rm = GET_Rn(); - opd->source = BITMASK1(opd->rm); + opd->source = BITMASK2(SHR_PC, opd->rm); opd->dest = BITMASK1(SHR_PC); if (!(op & 0x20)) opd->dest |= BITMASK1(SHR_PR); opd->cycles = 2; next_is_delay = 1; - end_block = 1; + if (!(opd->dest & BITMASK1(SHR_PR))) + end_block = !(op_flags[i+1+next_is_delay] & OF_BTARGET); + else + op_flags[i+1+next_is_delay] |= OF_BTARGET; break; case 0x04: // MOV.B Rm,@(R0,Rn) 0000nnnnmmmm0100 case 0x05: // MOV.W Rm,@(R0,Rn) 0000nnnnmmmm0101 case 0x06: // MOV.L Rm,@(R0,Rn) 0000nnnnmmmm0110 opd->source = BITMASK3(GET_Rm(), SHR_R0, GET_Rn()); + opd->dest = BITMASK1(SHR_MEM); break; case 0x07: // MUL.L Rm,Rn 0000nnnnmmmm0111 @@ -3594,7 +3620,7 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, opd->dest = BITMASK1(SHR_PC); opd->cycles = 2; next_is_delay = 1; - end_block = 1; + end_block = !(op_flags[i+1+next_is_delay] & OF_BTARGET); break; case 1: // SLEEP 0000000000011011 opd->op = OP_SLEEP; @@ -3603,10 +3629,10 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, case 2: // RTE 0000000000101011 opd->op = OP_RTE; opd->source = BITMASK1(SHR_SP); - opd->dest = BITMASK2(SHR_SR, SHR_PC); + opd->dest = BITMASK3(SHR_SP, SHR_SR, SHR_PC); opd->cycles = 4; next_is_delay = 1; - end_block = 1; + end_block = !(op_flags[i+1+next_is_delay] & OF_BTARGET); break; default: goto undefined; @@ -3615,11 +3641,11 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, case 0x0c: // MOV.B @(R0,Rm),Rn 0000nnnnmmmm1100 case 0x0d: // MOV.W @(R0,Rm),Rn 0000nnnnmmmm1101 case 0x0e: // MOV.L @(R0,Rm),Rn 0000nnnnmmmm1110 - opd->source = BITMASK2(GET_Rm(), SHR_R0); + opd->source = BITMASK3(GET_Rm(), SHR_R0, SHR_MEM); opd->dest = BITMASK1(GET_Rn()); break; case 0x0f: // MAC.L @Rm+,@Rn+ 0000nnnnmmmm1111 - opd->source = BITMASK5(GET_Rm(), GET_Rn(), SHR_SR, SHR_MACL, SHR_MACH); + opd->source = BITMASK6(GET_Rm(), GET_Rn(), SHR_SR, SHR_MACL, SHR_MACH, SHR_MEM); opd->dest = BITMASK4(GET_Rm(), GET_Rn(), SHR_MACL, SHR_MACH); opd->cycles = 3; break; @@ -3631,8 +3657,8 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, ///////////////////////////////////////////// case 0x01: // MOV.L Rm,@(disp,Rn) 0001nnnnmmmmdddd - opd->source = BITMASK1(GET_Rm()); - opd->source = BITMASK1(GET_Rn()); + opd->source = BITMASK2(GET_Rm(), GET_Rn()); + opd->dest = BITMASK1(SHR_MEM); opd->imm = (op & 0x0f) * 4; break; @@ -3643,14 +3669,14 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, case 0x00: // MOV.B Rm,@Rn 0010nnnnmmmm0000 case 0x01: // MOV.W Rm,@Rn 0010nnnnmmmm0001 case 0x02: // MOV.L Rm,@Rn 0010nnnnmmmm0010 - opd->source = BITMASK1(GET_Rm()); - opd->source = BITMASK1(GET_Rn()); + opd->source = BITMASK2(GET_Rm(), GET_Rn()); + opd->dest = BITMASK1(SHR_MEM); break; case 0x04: // MOV.B Rm,@-Rn 0010nnnnmmmm0100 case 0x05: // MOV.W Rm,@-Rn 0010nnnnmmmm0101 case 0x06: // MOV.L Rm,@-Rn 0010nnnnmmmm0110 opd->source = BITMASK2(GET_Rm(), GET_Rn()); - opd->dest = BITMASK1(GET_Rn()); + opd->dest = BITMASK2(GET_Rn(), SHR_MEM); break; case 0x07: // DIV0S Rm,Rn 0010nnnnmmmm0111 opd->source = BITMASK2(GET_Rm(), GET_Rn()); @@ -3791,7 +3817,7 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, goto undefined; } opd->source = BITMASK2(GET_Rn(), tmp); - opd->dest = BITMASK1(GET_Rn()); + opd->dest = BITMASK2(GET_Rn(), SHR_MEM); break; case 0x04: case 0x05: @@ -3843,7 +3869,7 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, default: goto undefined; } - opd->source = BITMASK1(GET_Rn()); + opd->source = BITMASK2(GET_Rn(), SHR_MEM); opd->dest = BITMASK2(GET_Rn(), tmp); break; case 0x08: @@ -3899,11 +3925,14 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, opd->dest |= BITMASK1(SHR_PC); opd->cycles = 2; next_is_delay = 1; - end_block = 1; + if (!(opd->dest & BITMASK1(SHR_PR))) + end_block = !(op_flags[i+1+next_is_delay] & OF_BTARGET); + else + op_flags[i+1+next_is_delay] |= OF_BTARGET; break; case 1: // TAS.B @Rn 0100nnnn00011011 - opd->source = BITMASK1(GET_Rn()); - opd->dest = BITMASK1(SHR_T); + opd->source = BITMASK2(GET_Rn(), SHR_MEM); + opd->dest = BITMASK2(SHR_T, SHR_MEM); opd->cycles = 4; break; default: @@ -3931,7 +3960,7 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, break; case 0x0f: // MAC.W @Rm+,@Rn+ 0100nnnnmmmm1111 - opd->source = BITMASK5(GET_Rm(), GET_Rn(), SHR_SR, SHR_MACL, SHR_MACH); + opd->source = BITMASK6(GET_Rm(), GET_Rn(), SHR_SR, SHR_MACL, SHR_MACH, SHR_MEM); opd->dest = BITMASK4(GET_Rm(), GET_Rn(), SHR_MACL, SHR_MACH); opd->cycles = 3; break; @@ -3943,7 +3972,7 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, ///////////////////////////////////////////// case 0x05: // MOV.L @(disp,Rm),Rn 0101nnnnmmmmdddd - opd->source = BITMASK1(GET_Rm()); + opd->source = BITMASK2(GET_Rm(), SHR_MEM); opd->dest = BITMASK1(GET_Rn()); opd->imm = (op & 0x0f) * 4; break; @@ -3955,12 +3984,14 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, case 0x04: // MOV.B @Rm+,Rn 0110nnnnmmmm0100 case 0x05: // MOV.W @Rm+,Rn 0110nnnnmmmm0101 case 0x06: // MOV.L @Rm+,Rn 0110nnnnmmmm0110 - opd->dest = BITMASK1(GET_Rm()); + opd->dest = BITMASK2(GET_Rm(), GET_Rn()); + opd->source = BITMASK2(GET_Rm(), SHR_MEM); + break; case 0x00: // MOV.B @Rm,Rn 0110nnnnmmmm0000 case 0x01: // MOV.W @Rm,Rn 0110nnnnmmmm0001 case 0x02: // MOV.L @Rm,Rn 0110nnnnmmmm0010 - opd->source = BITMASK1(GET_Rm()); - opd->dest |= BITMASK1(GET_Rn()); + opd->dest = BITMASK1(GET_Rn()); + opd->source = BITMASK2(GET_Rm(), SHR_MEM); break; case 0x0a: // NEGC Rm,Rn 0110nnnnmmmm1010 opd->source = BITMASK2(GET_Rm(), SHR_T); @@ -3997,19 +4028,21 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, { case 0x0000: // MOV.B R0,@(disp,Rn) 10000000nnnndddd opd->source = BITMASK2(GET_Rm(), SHR_R0); + opd->dest = BITMASK1(SHR_MEM); opd->imm = (op & 0x0f); break; case 0x0100: // MOV.W R0,@(disp,Rn) 10000001nnnndddd opd->source = BITMASK2(GET_Rm(), SHR_R0); + opd->dest = BITMASK1(SHR_MEM); opd->imm = (op & 0x0f) * 2; break; case 0x0400: // MOV.B @(disp,Rm),R0 10000100mmmmdddd - opd->source = BITMASK1(GET_Rm()); + opd->source = BITMASK2(GET_Rm(), SHR_MEM); opd->dest = BITMASK1(SHR_R0); opd->imm = (op & 0x0f); break; case 0x0500: // MOV.W @(disp,Rm),R0 10000101mmmmdddd - opd->source = BITMASK1(GET_Rm()); + opd->source = BITMASK2(GET_Rm(), SHR_MEM); opd->dest = BITMASK1(SHR_R0); opd->imm = (op & 0x0f) * 2; break; @@ -4025,7 +4058,7 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, case 0x0900: // BT label 10001001dddddddd case 0x0b00: // BF label 10001011dddddddd opd->op = (op & 0x0200) ? OP_BRANCH_CF : OP_BRANCH_CT; - opd->source = BITMASK1(SHR_T); + opd->source = BITMASK2(SHR_PC, SHR_T); opd->dest = BITMASK1(SHR_PC); opd->imm = ((signed int)(op << 24) >> 23); opd->imm += pc + 4; @@ -4045,13 +4078,16 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, if (op_flags[i] & OF_DELAY_OP) { if (ops[i-1].op == OP_BRANCH) tmp = ops[i-1].imm; - else + else if (ops[i-1].op != OP_BRANCH_N) tmp = 0; } - opd->source = BITMASK1(SHR_PC); + opd->source = BITMASK2(SHR_PC, SHR_MEM); opd->dest = BITMASK1(GET_Rn()); - if (tmp) + if (tmp) { opd->imm = tmp + 2 + (op & 0xff) * 2; + if (lowest_literal == 0 || opd->imm < lowest_literal) + lowest_literal = opd->imm; + } opd->size = 1; break; @@ -4062,14 +4098,21 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, case 0x0a: // BRA label 1010dddddddddddd opd->op = OP_BRANCH; + opd->source = BITMASK1(SHR_PC); opd->dest |= BITMASK1(SHR_PC); opd->imm = ((signed int)(op << 20) >> 19); opd->imm += pc + 4; opd->cycles = 2; next_is_delay = 1; - end_block = 1; - if (base_pc <= opd->imm && opd->imm < base_pc + BLOCK_INSN_LIMIT * 2) - op_flags[(opd->imm - base_pc) / 2] |= OF_BTARGET; + if (!(opd->dest & BITMASK1(SHR_PR))) { + if (base_pc <= opd->imm && opd->imm < base_pc + BLOCK_INSN_LIMIT * 2) { + op_flags[(opd->imm - base_pc) / 2] |= OF_BTARGET; + if (opd->imm <= pc) + end_block = !(op_flags[i+1+next_is_delay] & OF_BTARGET); + } else + end_block = !(op_flags[i+1+next_is_delay] & OF_BTARGET); + } else + op_flags[i+1+next_is_delay] |= OF_BTARGET; break; ///////////////////////////////////////////// @@ -4080,23 +4123,25 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, case 0x0100: // MOV.W R0,@(disp,GBR) 11000001dddddddd case 0x0200: // MOV.L R0,@(disp,GBR) 11000010dddddddd opd->source = BITMASK2(SHR_GBR, SHR_R0); + opd->dest = BITMASK1(SHR_MEM); opd->size = (op & 0x300) >> 8; opd->imm = (op & 0xff) << opd->size; break; case 0x0400: // MOV.B @(disp,GBR),R0 11000100dddddddd case 0x0500: // MOV.W @(disp,GBR),R0 11000101dddddddd case 0x0600: // MOV.L @(disp,GBR),R0 11000110dddddddd - opd->source = BITMASK1(SHR_GBR); + opd->source = BITMASK2(SHR_GBR, SHR_MEM); opd->dest = BITMASK1(SHR_R0); opd->size = (op & 0x300) >> 8; opd->imm = (op & 0xff) << opd->size; break; case 0x0300: // TRAPA #imm 11000011iiiiiiii - opd->source = BITMASK2(SHR_PC, SHR_SR); - opd->dest = BITMASK1(SHR_PC); - opd->imm = (op & 0xff) * 4; + opd->op = OP_TRAPA; + opd->source = BITMASK3(SHR_SP, SHR_PC, SHR_SR); + opd->dest = BITMASK2(SHR_SP, SHR_PC); + opd->imm = (op & 0xff); opd->cycles = 8; - end_block = 1; // FIXME + op_flags[i+1] |= OF_BTARGET; break; case 0x0700: // MOVA @(disp,PC),R0 11000111dddddddd opd->op = OP_MOVA; @@ -4104,7 +4149,7 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, if (op_flags[i] & OF_DELAY_OP) { if (ops[i-1].op == OP_BRANCH) tmp = ops[i-1].imm; - else + else if (ops[i-1].op != OP_BRANCH_N) tmp = 0; } opd->dest = BITMASK1(SHR_R0); @@ -4134,7 +4179,7 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, opd->imm = op & 0xff; break; case 0x0c00: // TST.B #imm,@(R0,GBR) 11001100iiiiiiii - opd->source = BITMASK2(SHR_GBR, SHR_R0); + opd->source = BITMASK3(SHR_GBR, SHR_R0, SHR_MEM); opd->dest = BITMASK1(SHR_T); opd->imm = op & 0xff; opd->cycles = 3; @@ -4142,7 +4187,8 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, case 0x0d00: // AND.B #imm,@(R0,GBR) 11001101iiiiiiii case 0x0e00: // XOR.B #imm,@(R0,GBR) 11001110iiiiiiii case 0x0f00: // OR.B #imm,@(R0,GBR) 11001111iiiiiiii - opd->source = BITMASK2(SHR_GBR, SHR_R0); + opd->source = BITMASK3(SHR_GBR, SHR_R0, SHR_MEM); + opd->dest = BITMASK1(SHR_MEM); opd->imm = op & 0xff; opd->cycles = 3; break; @@ -4159,13 +4205,16 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, if (op_flags[i] & OF_DELAY_OP) { if (ops[i-1].op == OP_BRANCH) tmp = ops[i-1].imm; - else + else if (ops[i-1].op != OP_BRANCH_N) tmp = 0; } - opd->source = BITMASK1(SHR_PC); + opd->source = BITMASK2(SHR_PC, SHR_MEM); opd->dest = BITMASK1(GET_Rn()); - if (tmp) + if (tmp) { opd->imm = (tmp + 2 + (op & 0xff) * 4) & ~3; + if (lowest_literal == 0 || opd->imm < lowest_literal) + lowest_literal = opd->imm; + } opd->size = 2; break; @@ -4180,6 +4229,10 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, undefined: elprintf(EL_ANOMALY, "%csh2 drc: unhandled op %04x @ %08x", is_slave ? 's' : 'm', op, pc); + opd->op = OP_UNDEFINED; + // an unhandled instruction is probably not code if it's not the 1st insn + if (!(op_flags[i] & OF_DELAY_OP) && pc != base_pc) + goto end; break; } @@ -4199,10 +4252,12 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, } } } +end: i_end = i; end_pc = pc; // 2nd pass: some analysis + lowest_literal = end_literals = lowest_mova = 0; for (i = 0; i < i_end; i++) { opd = &ops[i]; @@ -4217,22 +4272,39 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, else op_flags[i + 1] |= op_flags[i] & (OF_T_SET | OF_T_CLEAR); - if ((opd->op == OP_BRANCH_CT && (op_flags[i] & OF_T_SET)) - || (opd->op == OP_BRANCH_CF && (op_flags[i] & OF_T_CLEAR))) - { + if ((opd->op == OP_BRANCH_CT && (op_flags[i] & OF_T_CLEAR)) || + (opd->op == OP_BRANCH_CF && (op_flags[i] & OF_T_SET))) + opd->op = OP_BRANCH_N; + else if ((opd->op == OP_BRANCH_CT && (op_flags[i] & OF_T_SET)) || + (opd->op == OP_BRANCH_CF && (op_flags[i] & OF_T_CLEAR))) { opd->op = OP_BRANCH; - opd->cycles = 3; - i_end = i + 1; - if (op_flags[i + 1] & OF_DELAY_OP) { + if (op_flags[i + 1] & OF_DELAY_OP) opd->cycles = 2; - i_end++; + else + opd->cycles = 3; + } + // "overscan" detection: unreachable code after unconditional branch + // this can happen if the insn after a forward branch isn't a local target + if (opd->op == OP_BRANCH || opd->op == OP_BRANCH_R || opd->op == OP_BRANCH_RF) { + if (op_flags[i + 1] & OF_DELAY_OP) { + if (i_end > i + 2 && !(op_flags[i + 2] & OF_BTARGET)) + i_end = i + 2; + } else { + if (i_end > i + 1 && !(op_flags[i + 1] & OF_BTARGET)) + i_end = i + 1; } } - else if (opd->op == OP_LOAD_POOL) - { - if (opd->imm < end_pc + MAX_LITERAL_OFFSET) { + + // literal pool size detection + if (opd->op == OP_MOVA && opd->imm >= base_pc) + if (lowest_mova == 0 || opd->imm < lowest_mova) + lowest_mova = opd->imm; + if (opd->op == OP_LOAD_POOL) { + if (opd->imm >= base_pc && opd->imm < end_pc + MAX_LITERAL_OFFSET) { if (end_literals < opd->imm + opd->size * 2) end_literals = opd->imm + opd->size * 2; + if (lowest_literal == 0 || lowest_literal > opd->imm) + lowest_literal = opd->imm; if (opd->size == 2) { // tweak for NFL: treat a 32bit literal as an address and check if it // points to the literal space. In that case handle it like MOVA. @@ -4245,26 +4317,31 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, } } end_pc = base_pc + i_end * 2; - if (end_literals < end_pc) - end_literals = end_pc; // end_literals is used to decide to inline a literal or not // XXX: need better detection if this actually is used in write + if (lowest_literal >= base_pc) { + if (lowest_literal < end_pc) { + dbg(1, "warning: lowest_literal=%08x < end_pc=%08x", lowest_literal, end_pc); + // TODO: does this always mean end_pc covers data? + } + } if (lowest_mova >= base_pc) { if (lowest_mova < end_literals) { - dbg(1, "mova for %08x, block %08x", lowest_mova, base_pc); - end_literals = end_pc; + dbg(1, "warning: mova=%08x < end_literals=%08x", lowest_mova, end_literals); + end_literals = lowest_mova; } if (lowest_mova < end_pc) { - dbg(1, "warning: mova inside of blk for %08x, block %08x", - lowest_mova, base_pc); + dbg(1, "warning: mova=%08x < end_pc=%08x", lowest_mova, end_pc); end_literals = end_pc; } } + if (lowest_literal >= end_literals) + lowest_literal = end_literals; *end_pc_out = end_pc; if (end_literals_out != NULL) - *end_literals_out = end_literals; + *end_literals_out = (end_literals ?: end_pc); } // vim:shiftwidth=2:ts=2:expandtab From 1db36a7a0754ac03e824dd21fda9f85147eab6d2 Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 26 Mar 2019 22:01:27 +0100 Subject: [PATCH 0177/1110] sh2 drc: sh2 addr modes generalization, more const propagation, code gen optimizations --- cpu/drc/emit_arm.c | 69 ++++++ cpu/drc/emit_x86.c | 86 +++++++- cpu/sh2/compiler.c | 507 +++++++++++++++++++++------------------------ 3 files changed, 378 insertions(+), 284 deletions(-) diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index 89582e8d..86d8a41d 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -193,6 +193,11 @@ #define EOP_STRH_SIMPLE(rd,rn) EOP_C_AM3_IMM(A_COND_AL,1,0,rn,rd,0,1,0) #define EOP_STRH_REG( rd,rn,rm) EOP_C_AM3_REG(A_COND_AL,1,0,rn,rd,0,1,rm) +#define EOP_LDRSB_IMM2(cond,rd,rn,offset_8) EOP_C_AM3_IMM(cond,(offset_8) >= 0,1,rn,rd,1,0,abs(offset_8)) +#define EOP_LDRSB_REG2(cond,rd,rn,rm) EOP_C_AM3_REG(cond,1,1,rn,rd,1,0,rm) +#define EOP_LDRSH_IMM2(cond,rd,rn,offset_8) EOP_C_AM3_IMM(cond,(offset_8) >= 0,1,rn,rd,1,1,abs(offset_8)) +#define EOP_LDRSH_REG2(cond,rd,rn,rm) EOP_C_AM3_REG(cond,1,1,rn,rd,1,1,rm) + /* ldm and stm */ #define EOP_XXM(cond,p,u,s,w,l,rn,list) \ EMIT(((cond)<<28) | (1<<27) | ((p)<<24) | ((u)<<23) | ((s)<<22) | ((w)<<21) | ((l)<<20) | ((rn)<<16) | (list)) @@ -382,6 +387,9 @@ static int emith_xbranch(int cond, void *target, int is_call) #define EMITH_SJMP_END_(cond) EMITH_NOTHING1(cond) #define EMITH_SJMP_START(cond) EMITH_NOTHING1(cond) #define EMITH_SJMP_END(cond) EMITH_NOTHING1(cond) +#define EMITH_SJMP2_START(cond) EMITH_NOTHING1(cond) +#define EMITH_SJMP2_MID(cond) EMITH_JMP_START((cond)^1) // inverse cond +#define EMITH_SJMP2_END(cond) EMITH_JMP_END((cond)^1) #define EMITH_SJMP3_START(cond) EMITH_NOTHING1(cond) #define EMITH_SJMP3_MID(cond) EMITH_NOTHING1(cond) #define EMITH_SJMP3_END() @@ -398,6 +406,9 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_add_r_r_r_lsl(d, s1, s2, lslimm) \ EOP_ADD_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm) +#define emith_addf_r_r_r_lsr(d, s1, s2, lslimm) \ + EOP_ADD_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSR,lslimm) + #define emith_or_r_r_r_lsl(d, s1, s2, lslimm) \ EOP_ORR_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm) @@ -476,6 +487,9 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_adc_r_imm(r, imm) \ emith_op_imm(A_COND_AL, 0, A_OP_ADC, r, imm) +#define emith_adcf_r_imm(r, imm) \ + emith_op_imm(A_COND_AL, 1, A_OP_ADC, r, (imm)) + #define emith_sub_r_imm(r, imm) \ emith_op_imm(A_COND_AL, 0, A_OP_SUB, r, imm) @@ -606,6 +620,8 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_mul_s64(dlo, dhi, s1, s2) \ EOP_C_SMULL(A_COND_AL,0,dhi,dlo,s1,s2) +#define emith_mula_s64_c(cond, dlo, dhi, s1, s2) \ + EOP_C_SMLAL(cond,0,dhi,dlo,s1,s2) #define emith_mula_s64(dlo, dhi, s1, s2) \ EOP_C_SMLAL(A_COND_AL,0,dhi,dlo,s1,s2) @@ -622,9 +638,13 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_read_r_r_offs(r, rs, offs) \ emith_read_r_r_offs_c(A_COND_AL, r, rs, offs) +#define emith_read8s_r_r_offs(r, rs, offs) \ + EOP_LDRSB_IMM2(A_COND_AL, r, rs, offs) #define emith_read8_r_r_offs(r, rs, offs) \ emith_read8_r_r_offs_c(A_COND_AL, r, rs, offs) +#define emith_read16s_r_r_offs(r, rs, offs) \ + EOP_LDRSH_IMM2(A_COND_AL, r, rs, offs) #define emith_read16_r_r_offs(r, rs, offs) \ emith_read16_r_r_offs_c(A_COND_AL, r, rs, offs) @@ -851,3 +871,52 @@ static int emith_xbranch(int cond, void *target, int is_call) JMP_EMIT(A_COND_AL, jmp1); /* done: */ \ } +/* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ +#define emith_sh2_macl(ml, mh, rn, rm, sr) do { \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP2_START(DCOND_NE); \ + emith_mula_s64_c(DCOND_EQ, ml, mh, rn, rm); \ + EMITH_SJMP2_MID(DCOND_NE); \ + /* MACH top 16 bits unused if saturated. sign ext for overfl detect */ \ + emith_sext(mh, mh, 16); \ + emith_mula_s64(ml, mh, rn, rm); \ + /* overflow if top 17 bits of MACH aren't all 1 or 0 */ \ + /* to check: add MACH[15] to MACH[31:16]. this is 0 if no overflow */ \ + emith_asrf(rn, mh, 16); /* sum = (MACH>>16) + ((MACH>>15)&1) */ \ + emith_adcf_r_imm(rn, 0); /* (MACH>>15) is in carry after shift */ \ + EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> ov */ \ + emith_move_r_imm_c(DCOND_NE, ml, 0x0000); /* -overflow */ \ + emith_move_r_imm_c(DCOND_NE, mh, 0x8000); \ + EMITH_SJMP_START(DCOND_LE); /* sum > 0 -> +ovl */ \ + emith_sub_r_imm_c(DCOND_GT, ml, 1); /* 0xffffffff */ \ + emith_sub_r_imm_c(DCOND_GT, mh, 1); /* 0x00007fff */ \ + EMITH_SJMP_END(DCOND_LE); \ + EMITH_SJMP_END(DCOND_EQ); \ + EMITH_SJMP2_END(DCOND_NE); \ +} while (0) + +/* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ +#define emith_sh2_macw(ml, mh, rn, rm, sr) do { \ + emith_sext(rn, rn, 16); \ + emith_sext(rm, rm, 16); \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP2_START(DCOND_NE); \ + emith_mula_s64_c(DCOND_EQ, ml, mh, rn, rm); \ + EMITH_SJMP2_MID(DCOND_NE); \ + /* XXX: MACH should be untouched when S is set? */ \ + emith_asr(mh, ml, 31); /* sign ext MACL to MACH for ovrfl check */ \ + emith_mula_s64(ml, mh, rn, rm); \ + /* overflow if top 33 bits of MACH:MACL aren't all 1 or 0 */ \ + /* to check: add MACL[31] to MACH. this is 0 if no overflow */ \ + emith_addf_r_r_r_lsr(mh, mh, ml, 31); /* sum = MACH + ((MACL>>31)&1) */\ + EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \ + /* XXX: LSB signalling only in SH1, or in SH2 too? */ \ + emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \ + emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* negative ovrfl */ \ + EMITH_SJMP_START(DCOND_LE); /* sum > 0 -> positive ovrfl */ \ + emith_sub_r_imm_c(DCOND_GT, ml, 1); /* 0x7fffffff */ \ + EMITH_SJMP_END(DCOND_LE); \ + EMITH_SJMP_END(DCOND_EQ); \ + EMITH_SJMP2_END(DCOND_NE); \ +} while (0) + diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index c5f4e865..01702e0c 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -241,14 +241,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_and_r_imm(r, imm) \ emith_arith_r_imm(4, r, imm) -/* used for sub cycles after test, so retain flags with lea */ -#define emith_sub_r_imm(r, imm) do { \ - assert(r != xSP); \ - EMIT_OP_MODRM(0x8d, 2, r, r); \ - EMIT(-(s32)(imm), s32); \ -} while (0) - -#define emith_subf_r_imm(r, imm) \ +#define emith_sub_r_imm(r, imm) \ emith_arith_r_imm(5, r, imm) #define emith_eor_r_imm(r, imm) \ @@ -454,6 +447,8 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; } while (0) // "flag" instructions are the same +#define emith_adcf_r_imm emith_adc_r_imm +#define emith_subf_r_imm emith_sub_r_imm #define emith_addf_r_r emith_add_r_r #define emith_subf_r_r emith_sub_r_r #define emith_adcf_r_r emith_adc_r_r @@ -501,6 +496,18 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; } \ } while (0) +#define emith_read8s_r_r_offs(r, rs, offs) do { \ + int r_ = r; \ + if (!is_abcdx(r)) \ + r_ = rcache_get_tmp(); \ + EMIT(0x0f, u8); \ + emith_deref_op(0xbe, r_, rs, offs); \ + if ((r) != r_) { \ + emith_move_r_r(r, r_); \ + rcache_free_tmp(r_); \ + } \ +} while (0) + #define emith_write8_r_r_offs(r, rs, offs) do {\ int r_ = r; \ if (!is_abcdx(r)) { \ @@ -517,6 +524,11 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; emith_deref_op(0xb7, r, rs, offs); \ } while (0) +#define emith_read16s_r_r_offs(r, rs, offs) do { \ + EMIT(0x0f, u8); \ + emith_deref_op(0xbf, r, rs, offs); \ +} while (0) + #define emith_write16_r_r_offs(r, rs, offs) do { \ EMIT(0x66, u8); \ emith_write_r_r_offs(r, rs, offs); \ @@ -653,6 +665,13 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define EMITH_SJMP3_MID EMITH_JMP3_MID #define EMITH_SJMP3_END EMITH_JMP3_END +#define EMITH_SJMP2_START(cond) \ + EMITH_SJMP3_START(cond) +#define EMITH_SJMP2_MID(cond) \ + EMITH_SJMP3_MID(cond) +#define EMITH_SJMP2_END(cond) \ + EMITH_SJMP3_END() + #define emith_pass_arg_r(arg, reg) do { \ int rd = 7; \ host_arg2reg(rd, arg); \ @@ -854,3 +873,54 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; rcache_free_tmp(tmp_); \ } +/* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ +#define emith_sh2_macl(ml, mh, rn, rm, sr) do { \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* MACH top 16 bits unused if saturated. sign ext for overfl detect */ \ + emith_sext(mh, mh, 16); \ + EMITH_SJMP_END(DCOND_EQ); \ + emith_mula_s64(ml, mh, rn, rm); \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* overflow if top 17 bits of MACH aren't all 1 or 0 */ \ + /* to check: add MACH[15] to MACH[31:16]. this is 0 if no overflow */ \ + emith_asrf(rn, mh, 16); /* sum = (MACH>>16) + ((MACH>>15)&1) */ \ + emith_adcf_r_imm(rn, 0); /* (MACH>>15) is in carry after shift */ \ + EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> ov */ \ + emith_move_r_imm_c(DCOND_NE, ml, 0x0000); /* -overflow */ \ + emith_move_r_imm_c(DCOND_NE, mh, 0x8000); \ + EMITH_SJMP_START(DCOND_LE); /* sum > 0 -> +ovl */ \ + emith_sub_r_imm_c(DCOND_GT, ml, 1); /* 0xffffffff */ \ + emith_sub_r_imm_c(DCOND_GT, mh, 1); /* 0x00007fff */ \ + EMITH_SJMP_END(DCOND_LE); \ + EMITH_SJMP_END(DCOND_EQ); \ + EMITH_SJMP_END(DCOND_EQ); \ +} while (0) + +/* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ +#define emith_sh2_macw(ml, mh, rn, rm, sr) do { \ + emith_sext(rn, rn, 16); \ + emith_sext(rm, rm, 16); \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* XXX: MACH should be untouched when S is set? */ \ + emith_asr(mh, ml, 31); /* sign ext MACL to MACH for ovrfl check */ \ + EMITH_SJMP_END(DCOND_EQ); \ + emith_mula_s64(ml, mh, rn, rm); \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* overflow if top 33 bits of MACH:MACL aren't all 1 or 0 */ \ + /* to check: add MACL[31] to MACH. this is 0 if no overflow */ \ + emith_lsr(rn, ml, 31); \ + emith_addf_r_r(rn, mh); /* sum = MACH + ((MACL>>31)&1) */ \ + EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \ + /* XXX: LSB signalling only in SH1, or in SH2 too? */ \ + emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \ + emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* negative ovrfl */ \ + EMITH_SJMP_START(DCOND_LE); /* sum > 0 -> positive ovrfl */ \ + emith_sub_r_imm_c(DCOND_GT, ml, 1); /* 0x7fffffff */ \ + EMITH_SJMP_END(DCOND_LE); \ + EMITH_SJMP_END(DCOND_EQ); \ + EMITH_SJMP_END(DCOND_EQ); \ +} while (0) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index ece3b13e..af6ca9cd 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -102,6 +102,7 @@ static int insns_compiled, hash_collisions, host_insn_count; #define SHR_T SHR_SR // might make them separate someday #define SHR_MEM 31 +#define SHR_TMP -1 static struct op_data { u8 op; @@ -391,6 +392,12 @@ static void REGPARM(2) (*sh2_drc_write16)(u32 a, u32 d); static void REGPARM(3) (*sh2_drc_write32)(u32 a, u32 d, SH2 *sh2); // address space stuff +static int dr_is_rom(u32 a) +{ + // tweak for WWF Raw which writes data to some high ROM addresses + return (a & 0xc6000000) == 0x02000000 && (a & 0x3f0000) < 0x3e0000; +} + static int dr_ctx_get_mem_ptr(u32 a, u32 *mask) { int poffs = -1; @@ -1162,6 +1169,26 @@ static int emit_get_rbase_and_offs(u32 a, u32 *offs) return hr; } +// read const data from const ROM address +static int emit_get_rom_data(sh2_reg_e r, u32 offs, int size, u32 *val) +{ + u32 tmp; + + *val = 0; + if (gconst_get(r, &tmp)) { + tmp += offs; + if (dr_is_rom(tmp)) { + switch (size) { + case 0: *val = (s8)p32x_sh2_read8(tmp, sh2s); break; // 8 + case 1: *val = (s16)p32x_sh2_read16(tmp, sh2s); break; // 16 + case 2: *val = p32x_sh2_read32(tmp, sh2s); break; // 32 + } + return 1; + } + } + return 0; +} + static void emit_move_r_imm32(sh2_reg_e dst, u32 imm) { #if PROPAGATE_CONSTANTS @@ -1174,10 +1201,19 @@ static void emit_move_r_imm32(sh2_reg_e dst, u32 imm) static void emit_move_r_r(sh2_reg_e dst, sh2_reg_e src) { - int hr_d = rcache_get_reg(dst, RC_GR_WRITE); - int hr_s = rcache_get_reg(src, RC_GR_READ); + int hr_d, hr_s; + u32 val; - emith_move_r_r(hr_d, hr_s); +#if PROPAGATE_CONSTANTS + if (gconst_get(src, &val)) + gconst_new(dst, val); + else +#endif + { + hr_s = rcache_get_reg(src, RC_GR_READ); + hr_d = rcache_get_reg(dst, RC_GR_WRITE); + emith_move_r_r(hr_d, hr_s); + } } // T must be clear, and comparison done just before this @@ -1188,15 +1224,10 @@ static void emit_or_t_if_eq(int srr) EMITH_SJMP_END(DCOND_NE); } -// arguments must be ready -// reg cache must be clean before call -static int emit_memhandler_read_(int size, int ram_check) +// rd = @(arg0) +static int emit_memhandler_read(int size) { int arg1; -#if 0 - int arg0; - host_arg2reg(arg0, 0); -#endif rcache_clean(); @@ -1207,53 +1238,10 @@ static int emit_memhandler_read_(int size, int ram_check) arg1 = rcache_get_tmp_arg(1); emith_move_r_r_ptr(arg1, CONTEXT_REG); - -#if 0 // can't do this because of unmapped reads - // ndef PDB_NET - if (ram_check && Pico.rom == (void *)0x02000000 && Pico32xMem->sdram == (void *)0x06000000) { - int tmp = rcache_get_tmp(); - emith_and_r_r_imm(tmp, arg0, 0xfb000000); - emith_cmp_r_imm(tmp, 0x02000000); - switch (size) { - case 0: // 8 - EMITH_SJMP3_START(DCOND_NE); - emith_eor_r_imm_c(DCOND_EQ, arg0, 1); - emith_read8_r_r_offs_c(DCOND_EQ, arg0, arg0, 0); - EMITH_SJMP3_MID(DCOND_NE); - emith_call_cond(DCOND_NE, sh2_drc_read8); - EMITH_SJMP3_END(); - break; - case 1: // 16 - EMITH_SJMP3_START(DCOND_NE); - emith_read16_r_r_offs_c(DCOND_EQ, arg0, arg0, 0); - EMITH_SJMP3_MID(DCOND_NE); - emith_call_cond(DCOND_NE, sh2_drc_read16); - EMITH_SJMP3_END(); - break; - case 2: // 32 - EMITH_SJMP3_START(DCOND_NE); - emith_read_r_r_offs_c(DCOND_EQ, arg0, arg0, 0); - emith_ror_c(DCOND_EQ, arg0, arg0, 16); - EMITH_SJMP3_MID(DCOND_NE); - emith_call_cond(DCOND_NE, sh2_drc_read32); - EMITH_SJMP3_END(); - break; - } - } - else -#endif - { - switch (size) { - case 0: // 8 - emith_call(sh2_drc_read8); - break; - case 1: // 16 - emith_call(sh2_drc_read16); - break; - case 2: // 32 - emith_call(sh2_drc_read32); - break; - } + switch (size) { + case 0: emith_call(sh2_drc_read8); break; // 8 + case 1: emith_call(sh2_drc_read16); break; // 16 + case 2: emith_call(sh2_drc_read32); break; // 32 } rcache_invalidate(); @@ -1263,28 +1251,56 @@ static int emit_memhandler_read_(int size, int ram_check) return rcache_get_tmp_ret(); } -static int emit_memhandler_read(int size) +// @(arg0) = arg1 +static void emit_memhandler_write(int size) { - return emit_memhandler_read_(size, 1); + int arg2; + + if (reg_map_g2h[SHR_SR] != -1) + emith_ctx_write(reg_map_g2h[SHR_SR], SHR_SR * 4); + + rcache_clean(); + + arg2 = rcache_get_tmp_arg(2); + emith_move_r_r_ptr(arg2, CONTEXT_REG); + switch (size) { + case 0: emith_call(sh2_drc_write8); break; // 8 + case 1: emith_call(sh2_drc_write16); break; // 16 + case 2: emith_call(sh2_drc_write32); break; // 32 + } + + rcache_invalidate(); + if (reg_map_g2h[SHR_SR] != -1) + emith_ctx_read(reg_map_g2h[SHR_SR], SHR_SR * 4); } +// rd = @(Rs,#offs) static int emit_memhandler_read_rr(sh2_reg_e rd, sh2_reg_e rs, u32 offs, int size) { - int hr, hr2, ram_check = 1; + int hr, hr2; u32 val, offs2; + if (emit_get_rom_data(rs, offs, size, &val)) { + if (rd == SHR_TMP) { + hr2 = rcache_get_tmp(); + emith_move_r_imm(hr2, val); + } else { + gconst_new(rd, val); + hr2 = rcache_get_reg(rd, RC_GR_RMW); + } + return hr2; + } + if (gconst_get(rs, &val)) { hr = emit_get_rbase_and_offs(val + offs, &offs2); if (hr != -1) { hr2 = rcache_get_reg(rd, RC_GR_WRITE); switch (size) { case 0: // 8 - emith_read8_r_r_offs(hr2, hr, offs2 ^ 1); - emith_sext(hr2, hr2, 8); + emith_read8s_r_r_offs(hr2, hr, offs2 ^ 1); break; case 1: // 16 - emith_read16_r_r_offs(hr2, hr, offs2); - emith_sext(hr2, hr2, 16); + emith_read16s_r_r_offs(hr2, hr, offs2); break; case 2: // 32 emith_read_r_r_offs(hr2, hr, offs2); @@ -1294,14 +1310,17 @@ static int emit_memhandler_read_rr(sh2_reg_e rd, sh2_reg_e rs, u32 offs, int siz rcache_free_tmp(hr); return hr2; } - - ram_check = 0; } - hr = rcache_get_reg_arg(0, rs); - if (offs != 0) - emith_add_r_imm(hr, offs); - hr = emit_memhandler_read_(size, ram_check); + if (gconst_get(rs, &val)) { + hr = rcache_get_tmp_arg(0); + emith_move_r_imm(hr, val + offs); + } else { + hr = rcache_get_reg_arg(0, rs); + if (offs) + emith_add_r_imm(hr, offs); + } + hr = emit_memhandler_read(size); hr2 = rcache_get_reg(rd, RC_GR_WRITE); if (size != 2) { emith_sext(hr2, hr, (size == 1) ? 16 : 8); @@ -1312,45 +1331,78 @@ static int emit_memhandler_read_rr(sh2_reg_e rd, sh2_reg_e rs, u32 offs, int siz return hr2; } -static void emit_memhandler_write(int size) +// @(Rs,#offs) = rd +static void emit_memhandler_write_rr(sh2_reg_e rd, sh2_reg_e rs, u32 offs, int size) { - int ctxr; - host_arg2reg(ctxr, 2); - if (reg_map_g2h[SHR_SR] != -1) - emith_ctx_write(reg_map_g2h[SHR_SR], SHR_SR * 4); + int hr; + u32 val; - rcache_clean(); + rcache_get_reg_arg(1, rd); - switch (size) { - case 0: // 8 - // XXX: consider inlining sh2_drc_write8 - emith_call(sh2_drc_write8); - break; - case 1: // 16 - emith_call(sh2_drc_write16); - break; - case 2: // 32 - emith_move_r_r_ptr(ctxr, CONTEXT_REG); - emith_call(sh2_drc_write32); - break; - } + if (gconst_get(rs, &val)) { + hr = rcache_get_tmp_arg(0); + emith_move_r_imm(hr, val + offs); + } else if (offs) { + hr = rcache_get_reg_arg(0, rs); + emith_add_r_imm(hr, offs); + } else + rcache_get_reg_arg(0, rs); - rcache_invalidate(); - if (reg_map_g2h[SHR_SR] != -1) - emith_ctx_read(reg_map_g2h[SHR_SR], SHR_SR * 4); + emit_memhandler_write(size); } -// @(Rx,Ry) -static int emit_indirect_indexed_read(int rx, int ry, int size) +// rd = @(Rx,Ry) +static int emit_indirect_indexed_read(sh2_reg_e rd, sh2_reg_e rx, sh2_reg_e ry, int size) { + int hr, hr2; int a0, t; +#if PROPAGATE_CONSTANTS + u32 offs; + + if (gconst_get(ry, &offs)) + return emit_memhandler_read_rr(rd, rx, offs, size); + if (gconst_get(rx, &offs)) + return emit_memhandler_read_rr(rd, ry, offs, size); +#endif a0 = rcache_get_reg_arg(0, rx); t = rcache_get_reg(ry, RC_GR_READ); emith_add_r_r(a0, t); - return emit_memhandler_read(size); + hr = emit_memhandler_read(size); + if (rd != SHR_TMP) + hr2 = rcache_get_reg(rd, RC_GR_WRITE); + else + hr2 = hr; + + if (size != 2) { // 16, 8 + emith_sext(hr2, hr, size ? 16 : 8); + } else if (hr != hr2) // 32 + emith_move_r_r(hr2, hr); + + if (hr != hr2) + rcache_free_tmp(hr); + return hr2; } -// read @Rn, @rm +// @(Rx,Ry) = rd +static void emit_indirect_indexed_write(sh2_reg_e rd, sh2_reg_e rx, sh2_reg_e ry, int size) +{ + int a0, t; +#if PROPAGATE_CONSTANTS + u32 offs; + + if (gconst_get(ry, &offs)) + return emit_memhandler_write_rr(rd, rx, offs, size); + if (gconst_get(rx, &offs)) + return emit_memhandler_write_rr(rd, ry, offs, size); +#endif + rcache_get_reg_arg(1, rd); + a0 = rcache_get_reg_arg(0, rx); + t = rcache_get_reg(ry, RC_GR_READ); + emith_add_r_r(a0, t); + emit_memhandler_write(size); +} + +// @Rn+,@Rm+ static void emit_indirect_read_double(u32 *rnr, u32 *rmr, int rn, int rm, int size) { int tmp; @@ -1670,8 +1722,8 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) break; case OP_BRANCH_CT: case OP_BRANCH_CF: - tmp = rcache_get_reg(SHR_PC, RC_GR_WRITE); sr = rcache_get_reg(SHR_SR, RC_GR_READ); + tmp = rcache_get_reg(SHR_PC, RC_GR_WRITE); emith_move_r_imm(tmp, pc); emith_tst_r_imm(sr, T); tmp2 = ops[i-1].op == OP_BRANCH_CT ? DCOND_NE : DCOND_EQ; @@ -1706,23 +1758,34 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case OP_BRANCH_R: if (opd->dest & BITMASK1(SHR_PR)) emit_move_r_imm32(SHR_PR, pc + 2); - emit_move_r_r(SHR_PC, opd->rm); - drcf.pending_branch_indirect = 1; + if (gconst_get(opd->rm, &tmp)) { + opd->imm = tmp; + drcf.pending_branch_direct = 1; + } else { + emit_move_r_r(SHR_PC, opd->rm); + ops[i+1].source |= SHR_PC; // need PC for jump after delay slot + drcf.pending_branch_indirect = 1; + } goto end_op; case OP_BRANCH_RF: - tmp = rcache_get_reg(SHR_PC, RC_GR_WRITE); - tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ); - if (opd->dest & BITMASK1(SHR_PR)) { - tmp3 = rcache_get_reg(SHR_PR, RC_GR_WRITE); - emith_move_r_imm(tmp3, pc + 2); - emith_add_r_r_r(tmp, tmp2, tmp3); + if (gconst_get(GET_Rn(), &tmp)) { + if (opd->dest & BITMASK1(SHR_PR)) + emit_move_r_imm32(SHR_PR, pc + 2); + opd->imm = pc + 2 + tmp; + drcf.pending_branch_direct = 1; + } else { + tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ); + tmp = rcache_get_reg(SHR_PC, RC_GR_WRITE); + emith_move_r_imm(tmp, pc + 2); + if (opd->dest & BITMASK1(SHR_PR)) { + tmp3 = rcache_get_reg(SHR_PR, RC_GR_WRITE); + emith_move_r_r(tmp3, tmp); + } + emith_add_r_r(tmp, tmp2); + ops[i+1].source |= SHR_PC; // need PC for jump after delay slot + drcf.pending_branch_indirect = 1; } - else { - emith_move_r_r(tmp, tmp2); - emith_add_r_imm(tmp, pc + 2); - } - drcf.pending_branch_indirect = 1; goto end_op; case OP_SLEEP: @@ -1767,6 +1830,8 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) // obtain new PC emit_memhandler_read_rr(SHR_PC, SHR_VBR, opd->imm * 4, 2); // indirect jump -> back to dispatcher + sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + FLUSH_CYCLES(sr); rcache_flush(); emith_jump(sh2_drc_dispatcher); goto end_op; @@ -1780,7 +1845,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) if (opd->size == 2) tmp = FETCH32(opd->imm); else - tmp = (u32)(int)(signed short)FETCH_OP(opd->imm); + tmp = (s16)FETCH_OP(opd->imm); gconst_new(GET_Rn(), tmp); } else @@ -1812,9 +1877,9 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case OP_MOVA: if (opd->imm != 0) emit_move_r_imm32(SHR_R0, opd->imm); - else { - tmp = rcache_get_reg(SHR_R0, RC_GR_WRITE); + else { // delay slot case, pc can have either value tmp2 = rcache_get_reg(SHR_PC, RC_GR_READ); + tmp = rcache_get_reg(SHR_R0, RC_GR_WRITE); emith_add_r_r_imm(tmp, tmp2, 2 + (op & 0xff) * 4); emith_bic_r_imm(tmp, 3); } @@ -1828,7 +1893,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) switch (op & 0x0f) { case 0x02: - tmp = rcache_get_reg(GET_Rn(), RC_GR_WRITE); switch (GET_Fx()) { case 0: // STC SR,Rn 0000nnnn00000010 @@ -1844,6 +1908,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) goto default_; } tmp3 = rcache_get_reg(tmp2, RC_GR_READ); + tmp = rcache_get_reg(GET_Rn(), RC_GR_WRITE); emith_move_r_r(tmp, tmp3); if (tmp2 == SHR_SR) emith_clear_msb(tmp, tmp, 22); // reserved bits defined by ISA as 0 @@ -1851,12 +1916,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x04: // MOV.B Rm,@(R0,Rn) 0000nnnnmmmm0100 case 0x05: // MOV.W Rm,@(R0,Rn) 0000nnnnmmmm0101 case 0x06: // MOV.L Rm,@(R0,Rn) 0000nnnnmmmm0110 - rcache_clean(); - tmp = rcache_get_reg_arg(1, GET_Rm()); - tmp2 = rcache_get_reg_arg(0, SHR_R0); - tmp3 = rcache_get_reg(GET_Rn(), RC_GR_READ); - emith_add_r_r(tmp2, tmp3); - emit_memhandler_write(op & 3); + emit_indirect_indexed_write(GET_Rm(), SHR_R0, GET_Rn(), op & 3); goto end_op; case 0x07: // MUL.L Rm,Rn 0000nnnnmmmm0111 @@ -1903,7 +1963,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } goto end_op; case 0x0a: - tmp = rcache_get_reg(GET_Rn(), RC_GR_WRITE); switch (GET_Fx()) { case 0: // STS MACH,Rn 0000nnnn00001010 @@ -1918,50 +1977,21 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) default: goto default_; } - tmp2 = rcache_get_reg(tmp2, RC_GR_READ); - emith_move_r_r(tmp, tmp2); + emit_move_r_r(GET_Rn(), tmp2); goto end_op; case 0x0c: // MOV.B @(R0,Rm),Rn 0000nnnnmmmm1100 case 0x0d: // MOV.W @(R0,Rm),Rn 0000nnnnmmmm1101 case 0x0e: // MOV.L @(R0,Rm),Rn 0000nnnnmmmm1110 - tmp = emit_indirect_indexed_read(SHR_R0, GET_Rm(), op & 3); - tmp2 = rcache_get_reg(GET_Rn(), RC_GR_WRITE); - if ((op & 3) != 2) { - emith_sext(tmp2, tmp, (op & 1) ? 16 : 8); - } else - emith_move_r_r(tmp2, tmp); - rcache_free_tmp(tmp); + emit_indirect_indexed_read(GET_Rn(), SHR_R0, GET_Rm(), op & 3); goto end_op; case 0x0f: // MAC.L @Rm+,@Rn+ 0000nnnnmmmm1111 emit_indirect_read_double(&tmp, &tmp2, GET_Rn(), GET_Rm(), 2); - tmp4 = rcache_get_reg(SHR_MACH, RC_GR_RMW); - /* MS 16 MAC bits unused if saturated */ + sr = rcache_get_reg(SHR_SR, RC_GR_READ); - emith_tst_r_imm(sr, S); - EMITH_SJMP_START(DCOND_EQ); - emith_clear_msb_c(DCOND_NE, tmp4, tmp4, 16); - EMITH_SJMP_END(DCOND_EQ); - rcache_unlock(sr); - tmp3 = rcache_get_reg(SHR_MACL, RC_GR_RMW); // might evict SR - emith_mula_s64(tmp3, tmp4, tmp, tmp2); + tmp3 = rcache_get_reg(SHR_MACL, RC_GR_RMW); + tmp4 = rcache_get_reg(SHR_MACH, RC_GR_RMW); + emith_sh2_macl(tmp3, tmp4, tmp, tmp2, sr); rcache_free_tmp(tmp2); - sr = rcache_get_reg(SHR_SR, RC_GR_READ); // reget just in case - emith_tst_r_imm(sr, S); - - EMITH_JMP_START(DCOND_EQ); - emith_asr(tmp, tmp4, 15); - emith_cmp_r_imm(tmp, -1); // negative overflow (0x80000000..0xffff7fff) - EMITH_SJMP_START(DCOND_GE); - emith_move_r_imm_c(DCOND_LT, tmp4, 0x8000); - emith_move_r_imm_c(DCOND_LT, tmp3, 0x0000); - EMITH_SJMP_END(DCOND_GE); - emith_cmp_r_imm(tmp, 0); // positive overflow (0x00008000..0x7fffffff) - EMITH_SJMP_START(DCOND_LE); - emith_move_r_imm_c(DCOND_GT, tmp4, 0x00007fff); - emith_move_r_imm_c(DCOND_GT, tmp3, 0xffffffff); - EMITH_SJMP_END(DCOND_LE); - EMITH_JMP_END(DCOND_EQ); - rcache_free_tmp(tmp); goto end_op; } @@ -1970,12 +2000,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) ///////////////////////////////////////////// case 0x01: // MOV.L Rm,@(disp,Rn) 0001nnnnmmmmdddd - rcache_clean(); - tmp = rcache_get_reg_arg(0, GET_Rn()); - tmp2 = rcache_get_reg_arg(1, GET_Rm()); - if (op & 0x0f) - emith_add_r_imm(tmp, (op & 0x0f) * 4); - emit_memhandler_write(2); + emit_memhandler_write_rr(GET_Rm(), GET_Rn(), (op & 0x0f) * 4, 2); goto end_op; case 0x02: @@ -1984,20 +2009,14 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x00: // MOV.B Rm,@Rn 0010nnnnmmmm0000 case 0x01: // MOV.W Rm,@Rn 0010nnnnmmmm0001 case 0x02: // MOV.L Rm,@Rn 0010nnnnmmmm0010 - rcache_clean(); - rcache_get_reg_arg(0, GET_Rn()); - rcache_get_reg_arg(1, GET_Rm()); - emit_memhandler_write(op & 3); + emit_memhandler_write_rr(GET_Rm(), GET_Rn(), 0, op & 3); goto end_op; case 0x04: // MOV.B Rm,@-Rn 0010nnnnmmmm0100 case 0x05: // MOV.W Rm,@-Rn 0010nnnnmmmm0101 case 0x06: // MOV.L Rm,@-Rn 0010nnnnmmmm0110 - rcache_get_reg_arg(1, GET_Rm()); // for Rm == Rn tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); emith_sub_r_imm(tmp, (1 << (op & 3))); - rcache_clean(); - rcache_get_reg_arg(0, GET_Rn()); - emit_memhandler_write(op & 3); + emit_memhandler_write_rr(GET_Rm(), GET_Rn(), 0, op & 3); goto end_op; case 0x07: // DIV0S Rm,Rn 0010nnnnmmmm0111 sr = rcache_get_reg(SHR_SR, RC_GR_RMW); @@ -2132,8 +2151,8 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) // Q2 = carry(Rn -= Rm) // Q = M ^ Q1 ^ Q2 // T = (Q == M) = !(Q ^ M) = !(Q1 ^ Q2) - tmp2 = rcache_get_reg(GET_Rn(), RC_GR_RMW); tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ); + tmp2 = rcache_get_reg(GET_Rn(), RC_GR_RMW); sr = rcache_get_reg(SHR_SR, RC_GR_RMW); emith_tpop_carry(sr, 0); emith_adcf_r_r(tmp2, tmp2); @@ -2228,20 +2247,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) goto end_op; case 1: // DT Rn 0100nnnn00010000 sr = rcache_get_reg(SHR_SR, RC_GR_RMW); -#if 0 // scheduling needs tuning - if (FETCH_OP(pc) == 0x8bfd) { // BF #-2 - if (gconst_get(GET_Rn(), &tmp)) { - // XXX: limit burned cycles - emit_move_r_imm32(GET_Rn(), 0); - emith_or_r_imm(sr, T); - cycles += tmp * 4 + 1; // +1 syncs with noconst version, not sure why - skip_op = 1; - } - else - emith_sh2_dtbf_loop(); - goto end_op; - } -#endif tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); emith_bic_r_imm(sr, T); emith_subf_r_imm(tmp, 1); @@ -2370,17 +2375,14 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) default: goto default_; } - rcache_get_reg_arg(0, GET_Rn()); - tmp2 = emit_memhandler_read(2); if (tmp == SHR_SR) { + tmp2 = emit_memhandler_read_rr(SHR_TMP, GET_Rn(), 0, 2); sr = rcache_get_reg(SHR_SR, RC_GR_RMW); emith_write_sr(sr, tmp2); + rcache_free_tmp(tmp2); drcf.test_irq = 1; - } else { - tmp = rcache_get_reg(tmp, RC_GR_WRITE); - emith_move_r_r(tmp, tmp2); - } - rcache_free_tmp(tmp2); + } else + emit_memhandler_read_rr(tmp, GET_Rn(), 0, 2); tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); emith_add_r_imm(tmp, 4); goto end_op; @@ -2440,7 +2442,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_bic_r_imm(sr, T); emith_cmp_r_imm(tmp, 0); emit_or_t_if_eq(sr); - rcache_clean(); emith_or_r_imm(tmp, 0x80); tmp2 = rcache_get_tmp_arg(1); // assuming it differs to tmp emith_move_r_r(tmp2, tmp); @@ -2480,28 +2481,11 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x0f: // MAC.W @Rm+,@Rn+ 0100nnnnmmmm1111 emit_indirect_read_double(&tmp, &tmp2, GET_Rn(), GET_Rm(), 1); - emith_sext(tmp, tmp, 16); - emith_sext(tmp2, tmp2, 16); + sr = rcache_get_reg(SHR_SR, RC_GR_READ); tmp3 = rcache_get_reg(SHR_MACL, RC_GR_RMW); tmp4 = rcache_get_reg(SHR_MACH, RC_GR_RMW); - emith_mula_s64(tmp3, tmp4, tmp, tmp2); + emith_sh2_macw(tmp3, tmp4, tmp, tmp2, sr); rcache_free_tmp(tmp2); - // XXX: MACH should be untouched when S is set? - sr = rcache_get_reg(SHR_SR, RC_GR_READ); - emith_tst_r_imm(sr, S); - EMITH_JMP_START(DCOND_EQ); - - emith_asr(tmp, tmp3, 31); - emith_eorf_r_r(tmp, tmp4); // tmp = ((signed)macl >> 31) ^ mach - EMITH_JMP_START(DCOND_EQ); - emith_move_r_imm(tmp3, 0x80000000); - emith_tst_r_r(tmp4, tmp4); - EMITH_SJMP_START(DCOND_MI); - emith_sub_r_imm_c(DCOND_PL, tmp3, 1); // positive - EMITH_SJMP_END(DCOND_MI); - EMITH_JMP_END(DCOND_EQ); - - EMITH_JMP_END(DCOND_EQ); rcache_free_tmp(tmp); goto end_op; } @@ -2600,13 +2584,8 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) { case 0x0000: // MOV.B R0,@(disp,Rn) 10000000nnnndddd case 0x0100: // MOV.W R0,@(disp,Rn) 10000001nnnndddd - rcache_clean(); - tmp = rcache_get_reg_arg(0, GET_Rm()); - tmp2 = rcache_get_reg_arg(1, SHR_R0); - tmp3 = (op & 0x100) >> 8; - if (op & 0x0f) - emith_add_r_imm(tmp, (op & 0x0f) << tmp3); - emit_memhandler_write(tmp3); + tmp = (op & 0x100) >> 8; + emit_memhandler_write_rr(SHR_R0, GET_Rm(), (op & 0x0f) << tmp, tmp); goto end_op; case 0x0400: // MOV.B @(disp,Rm),R0 10000100mmmmdddd case 0x0500: // MOV.W @(disp,Rm),R0 10000101mmmmdddd @@ -2615,14 +2594,11 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) goto end_op; case 0x0800: // CMP/EQ #imm,R0 10001000iiiiiiii // XXX: could use cmn - tmp = rcache_get_tmp(); tmp2 = rcache_get_reg(0, RC_GR_READ); sr = rcache_get_reg(SHR_SR, RC_GR_RMW); - emith_move_r_imm_s8(tmp, op & 0xff); emith_bic_r_imm(sr, T); - emith_cmp_r_r(tmp2, tmp); + emith_cmp_r_imm(tmp2, (s8)(op & 0xff)); emit_or_t_if_eq(sr); - rcache_free_tmp(tmp); goto end_op; } goto default_; @@ -2634,12 +2610,8 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x0000: // MOV.B R0,@(disp,GBR) 11000000dddddddd case 0x0100: // MOV.W R0,@(disp,GBR) 11000001dddddddd case 0x0200: // MOV.L R0,@(disp,GBR) 11000010dddddddd - rcache_clean(); - tmp = rcache_get_reg_arg(0, SHR_GBR); - tmp2 = rcache_get_reg_arg(1, SHR_R0); - tmp3 = (op & 0x300) >> 8; - emith_add_r_imm(tmp, (op & 0xff) << tmp3); - emit_memhandler_write(tmp3); + tmp = (op & 0x300) >> 8; + emit_memhandler_write_rr(SHR_R0, SHR_GBR, (op & 0xff) << tmp, tmp); goto end_op; case 0x0400: // MOV.B @(disp,GBR),R0 11000100dddddddd case 0x0500: // MOV.W @(disp,GBR),R0 11000101dddddddd @@ -2667,7 +2639,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_or_r_imm(tmp, op & 0xff); goto end_op; case 0x0c00: // TST.B #imm,@(R0,GBR) 11001100iiiiiiii - tmp = emit_indirect_indexed_read(SHR_R0, SHR_GBR, 0); + tmp = emit_indirect_indexed_read(SHR_TMP, SHR_R0, SHR_GBR, 0); sr = rcache_get_reg(SHR_SR, RC_GR_RMW); emith_bic_r_imm(sr, T); emith_tst_r_imm(tmp, op & 0xff); @@ -2675,15 +2647,15 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) rcache_free_tmp(tmp); goto end_op; case 0x0d00: // AND.B #imm,@(R0,GBR) 11001101iiiiiiii - tmp = emit_indirect_indexed_read(SHR_R0, SHR_GBR, 0); + tmp = emit_indirect_indexed_read(SHR_TMP, SHR_R0, SHR_GBR, 0); emith_and_r_imm(tmp, op & 0xff); goto end_rmw_op; case 0x0e00: // XOR.B #imm,@(R0,GBR) 11001110iiiiiiii - tmp = emit_indirect_indexed_read(SHR_R0, SHR_GBR, 0); + tmp = emit_indirect_indexed_read(SHR_TMP, SHR_R0, SHR_GBR, 0); emith_eor_r_imm(tmp, op & 0xff); goto end_rmw_op; case 0x0f00: // OR.B #imm,@(R0,GBR) 11001111iiiiiiii - tmp = emit_indirect_indexed_read(SHR_R0, SHR_GBR, 0); + tmp = emit_indirect_indexed_read(SHR_TMP, SHR_R0, SHR_GBR, 0); emith_or_r_imm(tmp, op & 0xff); end_rmw_op: tmp2 = rcache_get_tmp_arg(1); @@ -2708,32 +2680,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) if (!(op_flags[i] & OF_B_IN_DS)) elprintf_sh2(sh2, EL_ANOMALY, "drc: illegal op %04x @ %08x", op, pc - 2); - - tmp = rcache_get_reg(SHR_SP, RC_GR_RMW); - emith_sub_r_imm(tmp, 4*2); - // push SR - tmp = rcache_get_reg_arg(0, SHR_SP); - emith_add_r_imm(tmp, 4); - tmp = rcache_get_reg_arg(1, SHR_SR); - emith_clear_msb(tmp, tmp, 22); - emit_memhandler_write(2); - // push PC - rcache_get_reg_arg(0, SHR_SP); - tmp = rcache_get_tmp_arg(1); - if (drcf.pending_branch_indirect) { - tmp2 = rcache_get_reg(SHR_PC, RC_GR_READ); - emith_move_r_r(tmp, tmp2); - } - else - emith_move_r_imm(tmp, pc - 2); - emit_memhandler_write(2); - // obtain new PC - v = (op_flags[i] & OF_B_IN_DS) ? 6 : 4; - emit_memhandler_read_rr(SHR_PC, SHR_VBR, v * 4, 2); - // indirect jump -> back to dispatcher - rcache_flush(); - emith_jump(sh2_drc_dispatcher); - break; + exit(1); } end_op: @@ -2754,6 +2701,8 @@ end_op: emit_move_r_imm32(SHR_PC, pc); rcache_flush(); emith_call(sh2_drc_test_irq); + if (pc < end_pc) // mark next insns as entry point for RTE + op_flags[i+1] |= OF_BTARGET; drcf.test_irq = 0; } @@ -2763,36 +2712,37 @@ end_op: struct op_data *opd_b = (op_flags[i] & OF_DELAY_OP) ? opd-1 : opd; u32 target_pc = opd_b->imm; - int cond = -1, ncond = -1; + int cond = -1; void *target = NULL; - EMITH_SJMP_DECL_(); + int ctaken = 0; + if (opd_b->op == OP_BRANCH_CT || opd_b->op == OP_BRANCH_CF) { + ctaken = (op_flags[i] & OF_DELAY_OP) ? 1 : 2; + } + cycles += ctaken; // assume branch taken sr = rcache_get_reg(SHR_SR, RC_GR_RMW); FLUSH_CYCLES(sr); rcache_clean(); - if (opd_b->op != OP_BRANCH) { + // emit condition test for conditional branch + if (opd_b->op == OP_BRANCH_CT || opd_b->op == OP_BRANCH_CF) { cond = (opd_b->op == OP_BRANCH_CF) ? DCOND_EQ : DCOND_NE; - ncond = (opd_b->op == OP_BRANCH_CF) ? DCOND_NE : DCOND_EQ; - } - if (cond != -1) { - int ctaken = (op_flags[i] & OF_DELAY_OP) ? 1 : 2; - if (delay_dep_fw & BITMASK1(SHR_T)) emith_tst_r_imm(sr, T_save); else emith_tst_r_imm(sr, T); - - EMITH_SJMP_START_(ncond); - emith_sub_r_imm_c(cond, sr, ctaken<<12); } + // no modification of host status/flags between here and branching! #if LINK_BRANCHES - if (find_in_array(branch_target_pc, branch_target_count, target_pc) >= 0) + v = find_in_array(branch_target_pc, branch_target_count, target_pc); + if (v >= 0) { // local branch - // XXX: jumps back can be linked already - if (branch_patch_count < MAX_LOCAL_BRANCHES) { + if (branch_target_ptr[v]) { + // jumps back can be linked here since host PC is already known + target = branch_target_ptr[v]; + } else if (branch_patch_count < MAX_LOCAL_BRANCHES) { target = tcache_ptr; branch_patch_pc[branch_patch_count] = target_pc; branch_patch_ptr[branch_patch_count] = target; @@ -2801,9 +2751,8 @@ end_op: else dbg(1, "warning: too many local branches"); } - - if (target == NULL) #endif + if (target == NULL) { // can't resolve branch locally, make a block exit emit_move_r_imm32(SHR_PC, target_pc); @@ -2816,13 +2765,16 @@ end_op: if (cond != -1) { emith_jump_cond_patchable(cond, target); - EMITH_SJMP_END_(ncond); } else { emith_jump_patchable(target); rcache_invalidate(); } + // branch not taken, correct cycle count + if (ctaken) + emith_add_r_imm(sr, ctaken << 12); + drcf.pending_branch_direct = 0; } else if (drcf.pending_branch_indirect) { @@ -2851,6 +2803,9 @@ end_op: { void *target; + s32 tmp = rcache_get_reg(SHR_SR, RC_GR_RMW); + FLUSH_CYCLES(tmp); + emit_move_r_imm32(SHR_PC, pc); rcache_flush(); From 9031406131c35c297c9b6df7aa0343b1f99582bb Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 27 Mar 2019 20:24:48 +0100 Subject: [PATCH 0178/1110] add 32bit memory access functions for SH2 --- cpu/sh2/compiler.c | 8 +- cpu/sh2/sh2.h | 9 ++- pico/32x/memory.c | 177 ++++++++++++++++++++++++++++++++++----------- pico/32x/sh2soc.c | 4 +- 4 files changed, 150 insertions(+), 48 deletions(-) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index af6ca9cd..3c82420e 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -2906,7 +2906,6 @@ static void sh2_generate_utils(void) { int arg0, arg1, arg2, sr, tmp; - sh2_drc_write32 = p32x_sh2_write32; sh2_drc_read8 = p32x_sh2_read8; sh2_drc_read16 = p32x_sh2_read16; sh2_drc_read32 = p32x_sh2_read32; @@ -3015,6 +3014,11 @@ static void sh2_generate_utils(void) emith_ctx_read_ptr(arg2, offsetof(SH2, write16_tab)); emith_sh2_wcall(arg0, arg2); + // sh2_drc_write32(u32 a, u32 d) + sh2_drc_write32 = (void *)tcache_ptr; + emith_ctx_read_ptr(arg2, offsetof(SH2, write32_tab)); + emith_sh2_wcall(arg0, arg2); + #ifdef PDB_NET // debug #define MAKE_READ_WRAPPER(func) { \ @@ -3053,7 +3057,6 @@ static void sh2_generate_utils(void) host_dasm_new_symbol(sh2_drc_read8); host_dasm_new_symbol(sh2_drc_read16); host_dasm_new_symbol(sh2_drc_read32); - host_dasm_new_symbol(sh2_drc_write32); #endif #endif @@ -3065,6 +3068,7 @@ static void sh2_generate_utils(void) host_dasm_new_symbol(sh2_drc_test_irq); host_dasm_new_symbol(sh2_drc_write8); host_dasm_new_symbol(sh2_drc_write16); + host_dasm_new_symbol(sh2_drc_write32); #endif } diff --git a/cpu/sh2/sh2.h b/cpu/sh2/sh2.h index 69abf8cd..7faa844b 100644 --- a/cpu/sh2/sh2.h +++ b/cpu/sh2/sh2.h @@ -12,6 +12,7 @@ typedef enum { typedef struct SH2_ { + // registers. this MUST correlate with enum sh2_reg_e. unsigned int r[16]; // 00 unsigned int pc; // 40 unsigned int ppc; @@ -21,17 +22,19 @@ typedef struct SH2_ unsigned int mach, macl; // 58 // common - const void *read8_map; // 60 + const void *read8_map; const void *read16_map; + const void *read32_map; const void **write8_tab; const void **write16_tab; + const void **write32_tab; // drc stuff - int drc_tmp; // 70 + int drc_tmp; int irq_cycles; void *p_bios; // convenience pointers void *p_da; - void *p_sdram; // 80 + void *p_sdram; void *p_rom; unsigned int pdb_io_csum[2]; diff --git a/pico/32x/memory.c b/pico/32x/memory.c index c6b89a22..d399d758 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -1087,8 +1087,8 @@ static void m68k_write8_dram1_ow(u32 a, u32 d) return; \ } \ /* overwrite */ \ - if (!(d & 0xff00)) d |= *pd & 0xff00; \ if (!(d & 0x00ff)) d |= *pd & 0x00ff; \ + if (!(d & 0xff00)) d |= *pd & 0xff00; \ *pd = d; static void m68k_write16_dram0_ow(u32 a, u32 d) @@ -1344,6 +1344,31 @@ static u32 sh2_read16_rom(u32 a, SH2 *sh2) return *(u16 *)(Pico.rom + bank + (a & 0x7fffe)); } +static u32 sh2_read32_unmapped(u32 a, SH2 *sh2) +{ + elprintf_sh2(sh2, EL_32X, "unmapped r32 [%08x] %08x @%06x", + a, 0, sh2_pc(sh2)); + return 0; +} + +static u32 sh2_read32_cs0(u32 a, SH2 *sh2) +{ + return (sh2_read16_cs0(a, sh2) << 16) | sh2_read16_cs0(a + 2, sh2); +} + +static u32 sh2_read32_da(u32 a, SH2 *sh2) +{ + u32 d = *(u32 *)(sh2->data_array + (a & 0xfff)); + return (d << 16) | (d >> 16); +} + +static u32 sh2_read32_rom(u32 a, SH2 *sh2) +{ + u32 bank = carthw_ssf2_banks[(a >> 19) & 7] << 19; + u32 d = *(u32 *)(Pico.rom + bank + (a & 0x7fffc)); + return (d << 16) | (d >> 16); +} + // writes static void REGPARM(3) sh2_write_ignore(u32 a, u32 d, SH2 *sh2) { @@ -1501,6 +1526,73 @@ static void REGPARM(3) sh2_write16_rom(u32 a, u32 d, SH2 *sh2) sh2_write16_unmapped(a, d, sh2); } +static void REGPARM(3) sh2_write32_unmapped(u32 a, u32 d, SH2 *sh2) +{ + elprintf_sh2(sh2, EL_32X, "unmapped w32 [%08x] %08x @%06x", + a, d, sh2_pc(sh2)); +} + +static void REGPARM(3) sh2_write32_cs0(u32 a, u32 d, SH2 *sh2) +{ + sh2_write16_cs0(a, d >> 16, sh2); + sh2_write16_cs0(a + 2, d, sh2); +} + +#define sh2_write32_dramN(n) \ + u32 *pd = (u32 *)&Pico32xMem->dram[n][(a & 0x1ffff) / 2]; \ + if (!(a & 0x20000)) { \ + *pd = (d << 16) | (d >> 16); \ + return; \ + } \ + /* overwrite */ \ + u8 *pb = (u8 *)pd; \ + if (d & 0x000000ff) pb[2] = d; \ + if (d & 0x0000ff00) pb[3] = d >> 8; \ + if (d & 0x00ff0000) pb[0] = d >> 16; \ + if (d & 0xff000000) pb[1] = d >> 24; \ + +static void REGPARM(3) sh2_write32_dram0(u32 a, u32 d, SH2 *sh2) +{ + sh2_write32_dramN(0); +} + +static void REGPARM(3) sh2_write32_dram1(u32 a, u32 d, SH2 *sh2) +{ + sh2_write32_dramN(1); +} + +static void REGPARM(3) sh2_write32_sdram(u32 a, u32 d, SH2 *sh2) +{ + u32 a1 = a & 0x3ffff; + *(u32 *)(sh2->p_sdram + a1) = (d << 16) | (d >> 16); +#ifdef DRC_SH2 + unsigned short *p = &Pico32xMem->drcblk_ram[a1 >> SH2_DRCBLK_RAM_SHIFT]; + if (p[0]) + sh2_drc_wcheck_ram(a, p[0], sh2->is_slave); + if (p[1]) + sh2_drc_wcheck_ram(a, p[1], sh2->is_slave); +#endif +} + +static void REGPARM(3) sh2_write32_da(u32 a, u32 d, SH2 *sh2) +{ + u32 a1 = a & 0xfff; + *(u32 *)(sh2->data_array + a1) = (d << 16) | (d >> 16); +#ifdef DRC_SH2 + int id = sh2->is_slave; + unsigned short *p = &Pico32xMem->drcblk_da[id][a1 >> SH2_DRCBLK_DA_SHIFT]; + if (p[0]) + sh2_drc_wcheck_da(a, p[0], id); + if (p[1]) + sh2_drc_wcheck_da(a, p[1], id); +#endif +} + +static void REGPARM(3) sh2_write32_rom(u32 a, u32 d, SH2 *sh2) +{ + sh2_write16_rom(a, d >> 16, sh2); + sh2_write16_rom(a + 2, d, sh2); +} typedef u32 (sh2_read_handler)(u32 a, SH2 *sh2); typedef void REGPARM(3) (sh2_write_handler)(u32 a, u32 d, SH2 *sh2); @@ -1534,30 +1626,21 @@ u32 REGPARM(2) p32x_sh2_read16(u32 a, SH2 *sh2) if (map_flag_set(p)) return ((sh2_read_handler *)(p << 1))(a, sh2); else - return *(u16 *)((p << 1) + ((a & sh2_map->mask) & ~1)); + return *(u16 *)((p << 1) + (a & sh2_map->mask)); } u32 REGPARM(2) p32x_sh2_read32(u32 a, SH2 *sh2) { - const sh2_memmap *sh2_map = sh2->read16_map; - sh2_read_handler *handler; - u32 offs; + const sh2_memmap *sh2_map = sh2->read32_map; uptr p; - offs = SH2MAP_ADDR2OFFS_R(a); - sh2_map += offs; + sh2_map += SH2MAP_ADDR2OFFS_R(a); p = sh2_map->addr; if (!map_flag_set(p)) { - // XXX: maybe 32bit access instead with ror? - u16 *pd = (u16 *)((p << 1) + ((a & sh2_map->mask) & ~1)); - return (pd[0] << 16) | pd[1]; - } - - if (offs == SH2MAP_ADDR2OFFS_R(0xffffc000)) - return sh2_peripheral_read32(a, sh2); - - handler = (sh2_read_handler *)(p << 1); - return (handler(a, sh2) << 16) | handler(a + 2, sh2); + u32 *pd = (u32 *)((p << 1) + (a & sh2_map->mask)); + return (*pd << 16) | (*pd >> 16); + } else + return ((sh2_read_handler *)(p << 1))(a, sh2); } void REGPARM(3) p32x_sh2_write8(u32 a, u32 d, SH2 *sh2) @@ -1580,20 +1663,11 @@ void REGPARM(3) p32x_sh2_write16(u32 a, u32 d, SH2 *sh2) void REGPARM(3) p32x_sh2_write32(u32 a, u32 d, SH2 *sh2) { - const void **sh2_wmap = sh2->write16_tab; + const void **sh2_wmap = sh2->write32_tab; sh2_write_handler *wh; - u32 offs; - offs = SH2MAP_ADDR2OFFS_W(a); - - if (offs == SH2MAP_ADDR2OFFS_W(0xffffc000)) { - sh2_peripheral_write32(a, d, sh2); - return; - } - - wh = sh2_wmap[offs]; - wh(a, d >> 16, sh2); - wh(a + 2, d, sh2); + wh = sh2_wmap[SH2MAP_ADDR2OFFS_W(a)]; + wh(a, d, sh2); } // ----------------------------------------------------------------- @@ -1801,9 +1875,9 @@ static void get_bios(void) #define MAP_MEMORY(m) ((uptr)(m) >> 1) #define MAP_HANDLER(h) ( ((uptr)(h) >> 1) | ((uptr)1 << (sizeof(uptr) * 8 - 1)) ) -static sh2_memmap sh2_read8_map[0x80], sh2_read16_map[0x80]; +static sh2_memmap sh2_read8_map[0x80], sh2_read16_map[0x80], sh2_read32_map[0x80]; // for writes we are using handlers only -static sh2_write_handler *sh2_write8_map[0x80], *sh2_write16_map[0x80]; +static sh2_write_handler *sh2_write8_map[0x80], *sh2_write16_map[0x80], *sh2_write32_map[0x80]; void Pico32xSwapDRAM(int b) { @@ -1818,10 +1892,12 @@ void Pico32xSwapDRAM(int b) // SH2 sh2_read8_map[0x04/2].addr = sh2_read8_map[0x24/2].addr = - sh2_read16_map[0x04/2].addr = sh2_read16_map[0x24/2].addr = MAP_MEMORY(Pico32xMem->dram[b]); + sh2_read16_map[0x04/2].addr = sh2_read16_map[0x24/2].addr = + sh2_read32_map[0x04/2].addr = sh2_read32_map[0x24/2].addr = MAP_MEMORY(Pico32xMem->dram[b]); sh2_write8_map[0x04/2] = sh2_write8_map[0x24/2] = b ? sh2_write8_dram1 : sh2_write8_dram0; sh2_write16_map[0x04/2] = sh2_write16_map[0x24/2] = b ? sh2_write16_dram1 : sh2_write16_dram0; + sh2_write32_map[0x04/2] = sh2_write32_map[0x24/2] = b ? sh2_write32_dram1 : sh2_write32_dram0; } static void bank_switch_rom_sh2(void) @@ -1829,11 +1905,13 @@ static void bank_switch_rom_sh2(void) if (!carthw_ssf2_active) { // easy sh2_read8_map[0x02/2].addr = sh2_read8_map[0x22/2].addr = - sh2_read16_map[0x02/2].addr = sh2_read16_map[0x22/2].addr = MAP_MEMORY(Pico.rom); + sh2_read16_map[0x02/2].addr = sh2_read16_map[0x22/2].addr = + sh2_read32_map[0x02/2].addr = sh2_read32_map[0x22/2].addr = MAP_MEMORY(Pico.rom); } else { sh2_read8_map[0x02/2].addr = sh2_read8_map[0x22/2].addr = MAP_HANDLER(sh2_read8_rom); sh2_read16_map[0x02/2].addr = sh2_read16_map[0x22/2].addr = MAP_HANDLER(sh2_read16_rom); + sh2_read32_map[0x02/2].addr = sh2_read32_map[0x22/2].addr = MAP_HANDLER(sh2_read32_rom); } } @@ -1903,58 +1981,75 @@ void PicoMemSetup32x(void) for (i = 0; i < ARRAY_SIZE(sh2_read8_map); i++) { sh2_read8_map[i].addr = MAP_HANDLER(sh2_read8_unmapped); sh2_read16_map[i].addr = MAP_HANDLER(sh2_read16_unmapped); + sh2_read32_map[i].addr = MAP_HANDLER(sh2_read32_unmapped); } for (i = 0; i < ARRAY_SIZE(sh2_write8_map); i++) { sh2_write8_map[i] = sh2_write8_unmapped; sh2_write16_map[i] = sh2_write16_unmapped; + sh2_write32_map[i] = sh2_write32_unmapped; } // "purge area" for (i = 0x40; i <= 0x5f; i++) { sh2_write8_map[i >> 1] = - sh2_write16_map[i >> 1] = sh2_write_ignore; + sh2_write16_map[i >> 1] = + sh2_write32_map[i >> 1] = sh2_write_ignore; } // CS0 sh2_read8_map[0x00/2].addr = sh2_read8_map[0x20/2].addr = MAP_HANDLER(sh2_read8_cs0); sh2_read16_map[0x00/2].addr = sh2_read16_map[0x20/2].addr = MAP_HANDLER(sh2_read16_cs0); + sh2_read32_map[0x00/2].addr = sh2_read32_map[0x20/2].addr = MAP_HANDLER(sh2_read32_cs0); sh2_write8_map[0x00/2] = sh2_write8_map[0x20/2] = sh2_write8_cs0; sh2_write16_map[0x00/2] = sh2_write16_map[0x20/2] = sh2_write16_cs0; + sh2_write32_map[0x00/2] = sh2_write32_map[0x20/2] = sh2_write32_cs0; // CS1 - ROM bank_switch_rom_sh2(); - sh2_read8_map[0x02/2].mask = sh2_read8_map[0x22/2].mask = - sh2_read16_map[0x02/2].mask = sh2_read16_map[0x22/2].mask = 0x3fffff; // FIXME + sh2_read8_map[0x02/2].mask = sh2_read8_map[0x22/2].mask = 0x3fffff; // FIXME + sh2_read16_map[0x02/2].mask = sh2_read16_map[0x22/2].mask = 0x3ffffe; // FIXME + sh2_read32_map[0x02/2].mask = sh2_read32_map[0x22/2].mask = 0x3ffffc; // FIXME sh2_write16_map[0x02/2] = sh2_write16_map[0x22/2] = sh2_write16_rom; + sh2_write32_map[0x02/2] = sh2_write32_map[0x22/2] = sh2_write32_rom; // CS2 - DRAM - done by Pico32xSwapDRAM() - sh2_read8_map[0x04/2].mask = sh2_read8_map[0x24/2].mask = - sh2_read16_map[0x04/2].mask = sh2_read16_map[0x24/2].mask = 0x01ffff; + sh2_read8_map[0x04/2].mask = sh2_read8_map[0x24/2].mask = 0x01ffff; + sh2_read16_map[0x04/2].mask = sh2_read16_map[0x24/2].mask = 0x01fffe; + sh2_read32_map[0x04/2].mask = sh2_read32_map[0x24/2].mask = 0x01fffc; // CS3 - SDRAM sh2_read8_map[0x06/2].addr = sh2_read8_map[0x26/2].addr = - sh2_read16_map[0x06/2].addr = sh2_read16_map[0x26/2].addr = MAP_MEMORY(Pico32xMem->sdram); + sh2_read16_map[0x06/2].addr = sh2_read16_map[0x26/2].addr = + sh2_read32_map[0x06/2].addr = sh2_read32_map[0x26/2].addr = MAP_MEMORY(Pico32xMem->sdram); sh2_write8_map[0x06/2] = sh2_write8_sdram; sh2_write8_map[0x26/2] = sh2_write8_sdram_wt; sh2_write16_map[0x06/2] = sh2_write16_map[0x26/2] = sh2_write16_sdram; - sh2_read8_map[0x06/2].mask = sh2_read8_map[0x26/2].mask = - sh2_read16_map[0x06/2].mask = sh2_read16_map[0x26/2].mask = 0x03ffff; + sh2_write32_map[0x06/2] = sh2_write32_map[0x26/2] = sh2_write32_sdram; + sh2_read8_map[0x06/2].mask = sh2_read8_map[0x26/2].mask = 0x03ffff; + sh2_read16_map[0x06/2].mask = sh2_read16_map[0x26/2].mask = 0x03fffe; + sh2_read32_map[0x06/2].mask = sh2_read32_map[0x26/2].mask = 0x03fffc; // SH2 data array sh2_read8_map[0xc0/2].addr = MAP_HANDLER(sh2_read8_da); sh2_read16_map[0xc0/2].addr = MAP_HANDLER(sh2_read16_da); + sh2_read32_map[0xc0/2].addr = MAP_HANDLER(sh2_read32_da); sh2_write8_map[0xc0/2] = sh2_write8_da; sh2_write16_map[0xc0/2] = sh2_write16_da; + sh2_write32_map[0xc0/2] = sh2_write32_da; // SH2 IO sh2_read8_map[0xff/2].addr = MAP_HANDLER(sh2_peripheral_read8); sh2_read16_map[0xff/2].addr = MAP_HANDLER(sh2_peripheral_read16); + sh2_read32_map[0xff/2].addr = MAP_HANDLER(sh2_peripheral_read32); sh2_write8_map[0xff/2] = sh2_peripheral_write8; sh2_write16_map[0xff/2] = sh2_peripheral_write16; + sh2_write32_map[0xff/2] = sh2_peripheral_write32; // map DRAM area, both 68k and SH2 Pico32xSwapDRAM(1); msh2.read8_map = ssh2.read8_map = sh2_read8_map; msh2.read16_map = ssh2.read16_map = sh2_read16_map; + msh2.read32_map = ssh2.read32_map = sh2_read32_map; msh2.write8_tab = ssh2.write8_tab = (const void **)(void *)sh2_write8_map; msh2.write16_tab = ssh2.write16_tab = (const void **)(void *)sh2_write16_map; + msh2.write32_tab = ssh2.write32_tab = (const void **)(void *)sh2_write32_map; sh2_drc_mem_setup(&msh2); sh2_drc_mem_setup(&ssh2); diff --git a/pico/32x/sh2soc.c b/pico/32x/sh2soc.c index b5300119..0f75d9b4 100644 --- a/pico/32x/sh2soc.c +++ b/pico/32x/sh2soc.c @@ -244,7 +244,7 @@ u32 sh2_peripheral_read16(u32 a, SH2 *sh2) u16 *r = (void *)sh2->peri_regs; u32 d; - a &= 0x1ff; + a &= 0x1fe; d = r[(a / 2) ^ 1]; elprintf_sh2(sh2, EL_32XP, "peri r16 [%08x] %04x @%06x", @@ -343,7 +343,7 @@ void REGPARM(3) sh2_peripheral_write16(u32 a, u32 d, SH2 *sh2) elprintf_sh2(sh2, EL_32XP, "peri w16 [%08x] %04x @%06x", a, d, sh2_pc(sh2)); - a &= 0x1ff; + a &= 0x1fe; // evil WDT if (a == 0x80) { From ff0eaa11d9480eb2e97da16d5562aaa022310cbb Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 27 Mar 2019 21:58:32 +0100 Subject: [PATCH 0179/1110] move saving SH2 SR into memory access and do so only if needed --- cpu/sh2/compiler.c | 14 +++++----- cpu/sh2/compiler.h | 28 +++++++++++++++++++- pico/32x/memory.c | 64 +++++++++++++++++++++++++++++++--------------- pico/32x/sh2soc.c | 5 ++++ 4 files changed, 84 insertions(+), 27 deletions(-) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 3c82420e..3c5ce5b9 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -1231,11 +1231,11 @@ static int emit_memhandler_read(int size) rcache_clean(); +#ifndef DCR_SR_REG // must writeback cycles for poll detection stuff - // FIXME: rm if (reg_map_g2h[SHR_SR] != -1) emith_ctx_write(reg_map_g2h[SHR_SR], SHR_SR * 4); - +#endif arg1 = rcache_get_tmp_arg(1); emith_move_r_r_ptr(arg1, CONTEXT_REG); switch (size) { @@ -1244,10 +1244,10 @@ static int emit_memhandler_read(int size) case 2: emith_call(sh2_drc_read32); break; // 32 } rcache_invalidate(); - +#ifndef DCR_SR_REG if (reg_map_g2h[SHR_SR] != -1) emith_ctx_read(reg_map_g2h[SHR_SR], SHR_SR * 4); - +#endif return rcache_get_tmp_ret(); } @@ -1255,10 +1255,10 @@ static int emit_memhandler_read(int size) static void emit_memhandler_write(int size) { int arg2; - +#ifndef DCR_SR_REG if (reg_map_g2h[SHR_SR] != -1) emith_ctx_write(reg_map_g2h[SHR_SR], SHR_SR * 4); - +#endif rcache_clean(); arg2 = rcache_get_tmp_arg(2); @@ -1270,8 +1270,10 @@ static void emit_memhandler_write(int size) } rcache_invalidate(); +#ifndef DCR_SR_REG if (reg_map_g2h[SHR_SR] != -1) emith_ctx_read(reg_map_g2h[SHR_SR], SHR_SR * 4); +#endif } // rd = @(Rs,#offs) diff --git a/cpu/sh2/compiler.h b/cpu/sh2/compiler.h index 61d8d2da..70fdbf4e 100644 --- a/cpu/sh2/compiler.h +++ b/cpu/sh2/compiler.h @@ -13,7 +13,7 @@ void sh2_drc_frame(void); #define sh2_drc_frame() #endif -#define BLOCK_INSN_LIMIT 128 +#define BLOCK_INSN_LIMIT 1024 /* op_flags */ #define OF_DELAY_OP (1 << 0) @@ -25,3 +25,29 @@ void sh2_drc_frame(void); void scan_block(unsigned int base_pc, int is_slave, unsigned char *op_flags, unsigned int *end_pc, unsigned int *end_literals); + +#if defined(DRC_SH2) +// direct access to some host CPU registers used by the DRC +// XXX MUST match definitions in cpu/sh2/compiler.c +#if defined(_arm__) +#define DRC_SR_REG r10 +#elif defined(__i386__) +#define DRC_SR_REG edi +#else +#warning "direct DRC register access not available for this host" +#endif + +#ifdef DCR_SR_REG +#define DRC_DECLARE_SR register int sh2_sr asm(#DCR_SR_REG) +#define DRC_SAVE_SR(sh2) \ + if ((sh2->state & (SH2_STATE_RUN|SH2_STATE_BUSY)) == SH2_STATE_RUN) \ + sh2->sr = sh2_sr; +#define DRC_RESTORE_SR(sh2) \ + if ((sh2->state & (SH2_STATE_RUN|SH2_STATE_BUSY)) == SH2_STATE_RUN) \ + sh2_sr = sh2->sr; +#else +#define DRC_DECLARE_SR +#define DRC_SAVE_SR(sh2) +#define DRC_RESTORE_SR(sh2) +#endif +#endif diff --git a/pico/32x/memory.c b/pico/32x/memory.c index d399d758..f82b9f99 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -40,7 +40,9 @@ */ #include "../pico_int.h" #include "../memory.h" + #include "../../cpu/sh2/compiler.h" +DRC_DECLARE_SR; static const char str_mars[] = "MARS"; @@ -1237,6 +1239,7 @@ static u32 sh2_read8_unmapped(u32 a, SH2 *sh2) static u32 sh2_read8_cs0(u32 a, SH2 *sh2) { u32 d = 0; + DRC_SAVE_SR(sh2); sh2_burn_cycles(sh2, 1*2); @@ -1252,18 +1255,19 @@ static u32 sh2_read8_cs0(u32 a, SH2 *sh2) goto out_16to8; } - // TODO: mirroring? - if (!sh2->is_slave && a < sizeof(Pico32xMem->sh2_rom_m)) - return Pico32xMem->sh2_rom_m.b[a ^ 1]; - if (sh2->is_slave && a < sizeof(Pico32xMem->sh2_rom_s)) - return Pico32xMem->sh2_rom_s.b[a ^ 1]; - if ((a & 0x3fe00) == 0x4200) { d = Pico32xMem->pal[(a & 0x1ff) / 2]; goto out_16to8; } - return sh2_read8_unmapped(a, sh2); + // TODO: mirroring? + if (!sh2->is_slave && a < sizeof(Pico32xMem->sh2_rom_m)) + d = Pico32xMem->sh2_rom_m.b[a ^ 1]; + else if (sh2->is_slave && a < sizeof(Pico32xMem->sh2_rom_s)) + d = Pico32xMem->sh2_rom_s.b[a ^ 1]; + else + d = sh2_read8_unmapped(a, sh2); + goto out; out_16to8: if (a & 1) @@ -1271,8 +1275,10 @@ out_16to8: else d >>= 8; +out: elprintf_sh2(sh2, EL_32X, "r8 [%08x] %02x @%06x", a, d, sh2_pc(sh2)); + DRC_RESTORE_SR(sh2); return d; } @@ -1299,13 +1305,14 @@ static u32 sh2_read16_unmapped(u32 a, SH2 *sh2) static u32 sh2_read16_cs0(u32 a, SH2 *sh2) { u32 d = 0; + DRC_SAVE_SR(sh2); sh2_burn_cycles(sh2, 1*2); if ((a & 0x3ffc0) == 0x4000) { d = p32x_sh2reg_read16(a, sh2); if (!(EL_LOGMASK & EL_PWM) && (a & 0x30) == 0x30) // hide PWM - return d; + goto out_noprint; goto out; } @@ -1315,21 +1322,23 @@ static u32 sh2_read16_cs0(u32 a, SH2 *sh2) goto out; } - if (!sh2->is_slave && a < sizeof(Pico32xMem->sh2_rom_m)) - return Pico32xMem->sh2_rom_m.w[a / 2]; - if (sh2->is_slave && a < sizeof(Pico32xMem->sh2_rom_s)) - return Pico32xMem->sh2_rom_s.w[a / 2]; - if ((a & 0x3fe00) == 0x4200) { d = Pico32xMem->pal[(a & 0x1ff) / 2]; goto out; } - return sh2_read16_unmapped(a, sh2); + if (!sh2->is_slave && a < sizeof(Pico32xMem->sh2_rom_m)) + d = Pico32xMem->sh2_rom_m.w[a / 2]; + else if (sh2->is_slave && a < sizeof(Pico32xMem->sh2_rom_s)) + d = Pico32xMem->sh2_rom_s.w[a / 2]; + else + d = sh2_read16_unmapped(a, sh2); out: elprintf_sh2(sh2, EL_32X, "r16 [%08x] %04x @%06x", a, d, sh2_pc(sh2)); +out_noprint: + DRC_RESTORE_SR(sh2); return d; } @@ -1383,6 +1392,7 @@ static void REGPARM(3) sh2_write8_unmapped(u32 a, u32 d, SH2 *sh2) static void REGPARM(3) sh2_write8_cs0(u32 a, u32 d, SH2 *sh2) { + DRC_SAVE_SR(sh2); elprintf_sh2(sh2, EL_32X, "w8 [%08x] %02x @%06x", a, d & 0xff, sh2_pc(sh2)); @@ -1390,16 +1400,24 @@ static void REGPARM(3) sh2_write8_cs0(u32 a, u32 d, SH2 *sh2) if ((a & 0x3fff0) == 0x4100) { sh2->poll_addr = 0; p32x_vdp_write8(a, d); - return; + goto out; + } + + if ((a & 0x3fe00) == 0x4200) { + ((u8 *)Pico32xMem->pal)[(a & 0x1ff) ^ 1] = d; + Pico32x.dirty_pal = 1; + goto out; } } if ((a & 0x3ffc0) == 0x4000) { p32x_sh2reg_write8(a, d, sh2); - return; + goto out; } sh2_write8_unmapped(a, d, sh2); +out: + DRC_RESTORE_SR(sh2); } static void REGPARM(3) sh2_write8_dram0(u32 a, u32 d, SH2 *sh2) @@ -1426,8 +1444,11 @@ static void REGPARM(3) sh2_write8_sdram(u32 a, u32 d, SH2 *sh2) static void REGPARM(3) sh2_write8_sdram_wt(u32 a, u32 d, SH2 *sh2) { // xmen sync hack.. - if (a < 0x26000200) + if (a < 0x26000200) { + DRC_SAVE_SR(sh2); sh2_end_run(sh2, 32); + DRC_RESTORE_SR(sh2); + } sh2_write8_sdram(a, d, sh2); } @@ -1453,6 +1474,7 @@ static void REGPARM(3) sh2_write16_unmapped(u32 a, u32 d, SH2 *sh2) static void REGPARM(3) sh2_write16_cs0(u32 a, u32 d, SH2 *sh2) { + DRC_SAVE_SR(sh2); if (((EL_LOGMASK & EL_PWM) || (a & 0x30) != 0x30)) // hide PWM elprintf_sh2(sh2, EL_32X, "w16 [%08x] %04x @%06x", a, d & 0xffff, sh2_pc(sh2)); @@ -1461,22 +1483,24 @@ static void REGPARM(3) sh2_write16_cs0(u32 a, u32 d, SH2 *sh2) if ((a & 0x3fff0) == 0x4100) { sh2->poll_addr = 0; p32x_vdp_write16(a, d, sh2); - return; + goto out; } if ((a & 0x3fe00) == 0x4200) { Pico32xMem->pal[(a & 0x1ff) / 2] = d; Pico32x.dirty_pal = 1; - return; + goto out; } } if ((a & 0x3ffc0) == 0x4000) { p32x_sh2reg_write16(a, d, sh2); - return; + goto out; } sh2_write16_unmapped(a, d, sh2); +out: + DRC_RESTORE_SR(sh2); } static void REGPARM(3) sh2_write16_dram0(u32 a, u32 d, SH2 *sh2) diff --git a/pico/32x/sh2soc.c b/pico/32x/sh2soc.c index 0f75d9b4..f8e657f5 100644 --- a/pico/32x/sh2soc.c +++ b/pico/32x/sh2soc.c @@ -25,6 +25,9 @@ #include "../pico_int.h" #include "../memory.h" +#include "../../cpu/sh2/compiler.h" +DRC_DECLARE_SR; + // DMAC handling struct dma_chan { unsigned int sar, dar; // src, dst addr @@ -413,10 +416,12 @@ void REGPARM(3) sh2_peripheral_write32(u32 a, u32 d, SH2 *sh2) if (!(dmac->dmaor & DMA_DME)) return; + DRC_SAVE_SR(sh2); if ((dmac->chan[0].chcr & (DMA_TE|DMA_DE)) == DMA_DE) dmac_trigger(sh2, &dmac->chan[0]); if ((dmac->chan[1].chcr & (DMA_TE|DMA_DE)) == DMA_DE) dmac_trigger(sh2, &dmac->chan[1]); + DRC_RESTORE_SR(sh2); } } From e267031a50851a7f4a3851a46bce8e2ce057ec41 Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 29 Mar 2019 18:36:44 +0100 Subject: [PATCH 0180/1110] debug stuff, bug fixing --- cpu/drc/emit_arm.c | 10 +-- cpu/sh2/compiler.c | 219 +++++++++++++++++++++++++++++++++++---------- cpu/sh2/compiler.h | 4 +- pico/32x/memory.c | 14 +-- 4 files changed, 185 insertions(+), 62 deletions(-) diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index 86d8a41d..632d476e 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -510,12 +510,12 @@ static int emith_xbranch(int cond, void *target, int is_call) emith_top_imm(A_COND_AL, A_OP_TST, r, imm) #define emith_cmp_r_imm(r, imm) { \ - u32 op = A_OP_CMP, imm_ = imm; \ - if (~imm_ < 0x100) { \ - imm_ = -imm_; \ - op = A_OP_CMN; \ + u32 op_ = A_OP_CMP, imm_ = (u8)imm; \ + if ((s8)imm_ < 0) { \ + imm_ = (u8)-imm_; \ + op_ = A_OP_CMN; \ } \ - emith_top_imm(A_COND_AL, op, r, imm); \ + emith_top_imm(A_COND_AL, op_, r, imm_); \ } #define emith_subf_r_imm(r, imm) \ diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 3c5ce5b9..800e9d32 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -53,6 +53,9 @@ // 04 - asm // 08 - runtime block entry log // 10 - smc self-check +// 100 - write trace +// 200 - compare trace +// 400 - print block entry backtrace // { #ifndef DRC_DEBUG #define DRC_DEBUG 0 @@ -73,6 +76,7 @@ static int insns_compiled, hash_collisions, host_insn_count; #define dbg(...) #endif + /// #define FETCH_OP(pc) \ dr_pc_base[(pc) / 2] @@ -147,13 +151,86 @@ static char sh2dasm_buff[64]; #define do_host_disasm(x) #endif -#if (DRC_DEBUG & 8) || defined(PDB) +#if (DRC_DEBUG & (8|256|512|1024)) || defined(PDB) + +#define SH2_DUMP(sh2, reason) { \ + char ms = (sh2)->is_slave ? 's' : 'm'; \ + printf("%csh2 %s %08x\n", ms, reason, (sh2)->pc); \ + printf("%csh2 r0-7 %08x %08x %08x %08x %08x %08x %08x %08x\n", ms, \ + (sh2)->r[0], (sh2)->r[1], (sh2)->r[2], (sh2)->r[3], \ + (sh2)->r[4], (sh2)->r[5], (sh2)->r[6], (sh2)->r[7]); \ + printf("%csh2 r8-15 %08x %08x %08x %08x %08x %08x %08x %08x\n", ms, \ + (sh2)->r[8], (sh2)->r[9], (sh2)->r[10], (sh2)->r[11], \ + (sh2)->r[12], (sh2)->r[13], (sh2)->r[14], (sh2)->r[15]); \ + printf("%csh2 pc-ml %08x %08x %08x %08x %08x %08x %08x %08x\n", ms, \ + (sh2)->pc, (sh2)->ppc, (sh2)->pr, (sh2)->sr&0x3ff, \ + (sh2)->gbr, (sh2)->vbr, (sh2)->mach, (sh2)->macl); \ + printf("%csh2 tmp-p %08x %08x %08x %08x %08x %08x %08x %08x\n", ms, \ + (sh2)->drc_tmp, (sh2)->irq_cycles, \ + (sh2)->pdb_io_csum[0], (sh2)->pdb_io_csum[1], (sh2)->state, \ + (sh2)->poll_addr, (sh2)->poll_cycles, (sh2)->poll_cnt); \ +} +static SH2 csh2[2][4]; static void REGPARM(3) *sh2_drc_log_entry(void *block, SH2 *sh2, u32 sr) { if (block != NULL) { dbg(8, "= %csh2 enter %08x %p, c=%d", sh2->is_slave ? 's' : 'm', sh2->pc, block, (signed int)sr >> 12); +#if defined PDB pdb_step(sh2, sh2->pc); +#elif (DRC_DEBUG & 256) + { + static FILE *trace[2]; + int idx = sh2->is_slave; +if (sh2 != &sh2s[0] && sh2 != &sh2s[1]) printf("sh2 %p?\n",sh2); + if (!trace[0]) { + truncate("pico.trace", 0); + trace[0] = fopen("pico.trace0", "wb"); + trace[1] = fopen("pico.trace1", "wb"); + } + if (csh2[idx][0].pc != sh2->pc) { + fwrite(sh2, offsetof(SH2, read8_map), 1, trace[idx]); + fwrite(&sh2->pdb_io_csum, sizeof(sh2->pdb_io_csum), 1, trace[idx]); + memcpy(&csh2[idx][0], sh2, offsetof(SH2, icount)); + } + } +#elif (DRC_DEBUG & 512) + { + static FILE *trace[2]; + static SH2 fsh2; + int idx = sh2->is_slave; + if (!trace[0]) { + trace[0] = fopen("pico.trace0", "rb"); + trace[1] = fopen("pico.trace1", "rb"); + } + if (csh2[idx][0].pc != sh2->pc) { + if (!fread(&fsh2, offsetof(SH2, read8_map), 1, trace[idx]) || + !fread(&fsh2.pdb_io_csum, sizeof(sh2->pdb_io_csum), 1, trace[idx])) { + printf("trace eof at %08lx\n",ftell(trace[idx])); + exit(1); + } + fsh2.sr = (fsh2.sr & 0xfff) | (sh2->sr & ~0xfff); + fsh2.is_slave = idx; + if (memcmp(&fsh2, sh2, offsetof(SH2, read8_map)) || + 0)//memcmp(&fsh2.pdb_io_csum, &sh2->pdb_io_csum, sizeof(sh2->pdb_io_csum))) + { + printf("difference at %08lx!\n",ftell(trace[idx])); + SH2_DUMP(&fsh2, "file"); + SH2_DUMP(sh2, "current"); + SH2_DUMP(&csh2[idx][0], "previous"); + exit(1); + } + csh2[idx][0] = fsh2; + } + } +#elif (DRC_DEBUG & 1024) + { + int x = sh2->is_slave, i; + for (i = 0; i < ARRAY_SIZE(csh2[x]); i++) + memcpy(&csh2[x][i], &csh2[x][i+1], offsetof(SH2, icount)); + memcpy(&csh2[x][3], sh2, offsetof(SH2, icount)); + } +#endif } return block; } @@ -759,13 +836,18 @@ static u32 dr_gcregs_mask; static u32 dr_gcregs_dirty; #if PROPAGATE_CONSTANTS +static void gconst_set(sh2_reg_e r, u32 val) +{ + dr_gcregs_mask |= 1 << r; + dr_gcregs[r] = val; +} + static void gconst_new(sh2_reg_e r, u32 val) { int i; - dr_gcregs_mask |= 1 << r; + gconst_set(r, val); dr_gcregs_dirty |= 1 << r; - dr_gcregs[r] = val; // throw away old r that we might have cached for (i = ARRAY_SIZE(reg_temp) - 1; i >= 0; i--) { @@ -818,6 +900,17 @@ static void gconst_kill(sh2_reg_e r) dr_gcregs_dirty &= ~(1 << r); } +#if PROPAGATE_CONSTANTS +static void gconst_copy(sh2_reg_e rd, sh2_reg_e rs) +{ + u32 val; + + gconst_kill(rd); + if (gconst_get(rs, &val)) + gconst_set(rd, val); +} +#endif + static void gconst_clean(void) { int i; @@ -1104,7 +1197,7 @@ static void rcache_unlock_all(void) reg_temp[i].flags &= ~HRF_LOCKED; } -#ifdef DRC_CMP +#if (DRC_DEBUG & (8|256|512|1024)) || defined(DRC_CMP) static u32 rcache_used_hreg_mask(void) { u32 mask = 0; @@ -1202,18 +1295,13 @@ static void emit_move_r_imm32(sh2_reg_e dst, u32 imm) static void emit_move_r_r(sh2_reg_e dst, sh2_reg_e src) { int hr_d, hr_s; - u32 val; + hr_s = rcache_get_reg(src, RC_GR_READ); + hr_d = rcache_get_reg(dst, RC_GR_WRITE); + emith_move_r_r(hr_d, hr_s); #if PROPAGATE_CONSTANTS - if (gconst_get(src, &val)) - gconst_new(dst, val); - else + gconst_copy(dst, src); #endif - { - hr_s = rcache_get_reg(src, RC_GR_READ); - hr_d = rcache_get_reg(dst, RC_GR_WRITE); - emith_move_r_r(hr_d, hr_s); - } } // T must be clear, and comparison done just before this @@ -1231,7 +1319,7 @@ static int emit_memhandler_read(int size) rcache_clean(); -#ifndef DCR_SR_REG +#ifndef DRC_SR_REG // must writeback cycles for poll detection stuff if (reg_map_g2h[SHR_SR] != -1) emith_ctx_write(reg_map_g2h[SHR_SR], SHR_SR * 4); @@ -1244,7 +1332,7 @@ static int emit_memhandler_read(int size) case 2: emith_call(sh2_drc_read32); break; // 32 } rcache_invalidate(); -#ifndef DCR_SR_REG +#ifndef DRC_SR_REG if (reg_map_g2h[SHR_SR] != -1) emith_ctx_read(reg_map_g2h[SHR_SR], SHR_SR * 4); #endif @@ -1255,7 +1343,7 @@ static int emit_memhandler_read(int size) static void emit_memhandler_write(int size) { int arg2; -#ifndef DCR_SR_REG +#ifndef DRC_SR_REG if (reg_map_g2h[SHR_SR] != -1) emith_ctx_write(reg_map_g2h[SHR_SR], SHR_SR * 4); #endif @@ -1270,7 +1358,7 @@ static void emit_memhandler_write(int size) } rcache_invalidate(); -#ifndef DCR_SR_REG +#ifndef DRC_SR_REG if (reg_map_g2h[SHR_SR] != -1) emith_ctx_read(reg_map_g2h[SHR_SR], SHR_SR * 4); #endif @@ -1287,8 +1375,8 @@ static int emit_memhandler_read_rr(sh2_reg_e rd, sh2_reg_e rs, u32 offs, int siz hr2 = rcache_get_tmp(); emith_move_r_imm(hr2, val); } else { - gconst_new(rd, val); - hr2 = rcache_get_reg(rd, RC_GR_RMW); + emit_move_r_imm32(rd, val); + hr2 = rcache_get_reg(rd, RC_GR_READ); } return hr2; } @@ -1296,7 +1384,10 @@ static int emit_memhandler_read_rr(sh2_reg_e rd, sh2_reg_e rs, u32 offs, int siz if (gconst_get(rs, &val)) { hr = emit_get_rbase_and_offs(val + offs, &offs2); if (hr != -1) { - hr2 = rcache_get_reg(rd, RC_GR_WRITE); + if (rd == SHR_TMP) + hr2 = rcache_get_tmp(); + else + hr2 = rcache_get_reg(rd, RC_GR_WRITE); switch (size) { case 0: // 8 emith_read8s_r_r_offs(hr2, hr, offs2 ^ 1); @@ -1323,13 +1414,18 @@ static int emit_memhandler_read_rr(sh2_reg_e rd, sh2_reg_e rs, u32 offs, int siz emith_add_r_imm(hr, offs); } hr = emit_memhandler_read(size); - hr2 = rcache_get_reg(rd, RC_GR_WRITE); - if (size != 2) { - emith_sext(hr2, hr, (size == 1) ? 16 : 8); - } else - emith_move_r_r(hr2, hr); - rcache_free_tmp(hr); + if (rd == SHR_TMP) + hr2 = hr; + else + hr2 = rcache_get_reg(rd, RC_GR_WRITE); + if (rd != SHR_TMP && size != 2) { + emith_sext(hr2, hr, (size == 1) ? 16 : 8); + } else if (hr != hr2) + emith_move_r_r(hr2, hr); + + if (hr != hr2) + rcache_free_tmp(hr); return hr2; } @@ -1339,6 +1435,7 @@ static void emit_memhandler_write_rr(sh2_reg_e rd, sh2_reg_e rs, u32 offs, int s int hr; u32 val; + rcache_clean(); // XXX rcache_get_reg_arg(1, rd); if (gconst_get(rs, &val)) { @@ -1375,7 +1472,7 @@ static int emit_indirect_indexed_read(sh2_reg_e rd, sh2_reg_e rx, sh2_reg_e ry, else hr2 = hr; - if (size != 2) { // 16, 8 + if (rd != SHR_TMP && size != 2) { // 16, 8 emith_sext(hr2, hr, size ? 16 : 8); } else if (hr != hr2) // 32 emith_move_r_r(hr2, hr); @@ -1397,6 +1494,7 @@ static void emit_indirect_indexed_write(sh2_reg_e rd, sh2_reg_e rx, sh2_reg_e ry if (gconst_get(rx, &offs)) return emit_memhandler_write_rr(rd, ry, offs, size); #endif + rcache_clean(); // XXX rcache_get_reg_arg(1, rd); a0 = rcache_get_reg_arg(0, rx); t = rcache_get_reg(ry, RC_GR_READ); @@ -1459,17 +1557,6 @@ static void emit_do_static_regs(int is_write, int tmpr) /* just after lookup function, jump to address returned */ static void emit_block_entry(void) { -#if (DRC_DEBUG & 8) || defined(PDB) - int arg1, arg2; - host_arg2reg(arg1, 1); - host_arg2reg(arg2, 2); - - emit_do_static_regs(1, arg2); - emith_move_r_r_ptr(arg1, CONTEXT_REG); - emith_move_r_r(arg2, rcache_get_reg(SHR_SR, RC_GR_READ)); - emith_call(sh2_drc_log_entry); - rcache_invalidate(); -#endif emith_tst_r_r_ptr(RET_REG, RET_REG); EMITH_SJMP_START(DCOND_EQ); emith_jump_reg_c(DCOND_NE, RET_REG); @@ -1675,6 +1762,24 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_jump_cond(DCOND_LE, sh2_drc_exit); do_host_disasm(tcache_id); rcache_unlock_all(); + +#if (DRC_DEBUG & (8|256|512|1024)) + emit_move_r_imm32(SHR_PC, pc); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + FLUSH_CYCLES(sr); + rcache_clean(); + tmp = rcache_used_hreg_mask(); + emith_save_caller_regs(tmp); + emit_do_static_regs(1, 0); + rcache_get_reg_arg(2, SHR_SR); + tmp2 = rcache_get_tmp_arg(0); + tmp3 = rcache_get_tmp_arg(1); + emith_move_r_imm(tmp2, (u32)tcache_ptr); + emith_move_r_r_ptr(tmp3,CONTEXT_REG); + emith_call(sh2_drc_log_entry); + emith_restore_caller_regs(tmp); + rcache_invalidate(); +#endif } #ifdef DRC_CMP @@ -1729,7 +1834,10 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_move_r_imm(tmp, pc); emith_tst_r_imm(sr, T); tmp2 = ops[i-1].op == OP_BRANCH_CT ? DCOND_NE : DCOND_EQ; + tmp3 = ops[i-1].op == OP_BRANCH_CT ? DCOND_EQ : DCOND_NE; + EMITH_SJMP_START(tmp3); emith_move_r_imm_c(tmp2, tmp, ops[i-1].imm); + EMITH_SJMP_END(tmp3); break; case OP_BRANCH_N: emit_move_r_imm32(SHR_PC, pc); @@ -1765,7 +1873,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) drcf.pending_branch_direct = 1; } else { emit_move_r_r(SHR_PC, opd->rm); - ops[i+1].source |= SHR_PC; // need PC for jump after delay slot drcf.pending_branch_indirect = 1; } goto end_op; @@ -1785,7 +1892,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_move_r_r(tmp3, tmp); } emith_add_r_r(tmp, tmp2); - ops[i+1].source |= SHR_PC; // need PC for jump after delay slot drcf.pending_branch_indirect = 1; } goto end_op; @@ -1813,7 +1919,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case OP_UNDEFINED: elprintf_sh2(sh2, EL_ANOMALY, "drc: illegal op %04x @ %08x", op, pc - 2); - opd->imm = 4; + opd->imm = (op_flags[i] & OF_B_IN_DS) ? 6 : 4; // fallthrough case OP_TRAPA: tmp = rcache_get_reg(SHR_SP, RC_GR_RMW); @@ -1827,7 +1933,13 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) // push PC rcache_get_reg_arg(0, SHR_SP); tmp = rcache_get_tmp_arg(1); - emith_move_r_imm(tmp, pc); + if (op == OP_TRAPA) + emith_move_r_imm(tmp, pc); + else if (drcf.pending_branch_indirect) { + tmp2 = rcache_get_reg(SHR_PC, RC_GR_READ); + emith_move_r_r(tmp, tmp2); + } else + emith_move_r_imm(tmp, pc - 2); emit_memhandler_write(2); // obtain new PC emit_memhandler_read_rr(SHR_PC, SHR_VBR, opd->imm * 4, 2); @@ -1988,7 +2100,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) goto end_op; case 0x0f: // MAC.L @Rm+,@Rn+ 0000nnnnmmmm1111 emit_indirect_read_double(&tmp, &tmp2, GET_Rn(), GET_Rm(), 2); - sr = rcache_get_reg(SHR_SR, RC_GR_READ); tmp3 = rcache_get_reg(SHR_MACL, RC_GR_RMW); tmp4 = rcache_get_reg(SHR_MACH, RC_GR_RMW); @@ -2087,12 +2198,12 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x0e: // MULU.W Rm,Rn 0010nnnnmmmm1110 case 0x0f: // MULS.W Rm,Rn 0010nnnnmmmm1111 tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ); + tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ); tmp = rcache_get_reg(SHR_MACL, RC_GR_WRITE); if (op & 1) { emith_sext(tmp, tmp2, 16); } else emith_clear_msb(tmp, tmp2, 16); - tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ); tmp2 = rcache_get_tmp(); if (op & 1) { emith_sext(tmp2, tmp3, 16); @@ -2308,7 +2419,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } tmp2 = rcache_get_reg(GET_Rn(), RC_GR_RMW); emith_sub_r_imm(tmp2, 4); - rcache_clean(); + rcache_clean(); // XXX rcache_get_reg_arg(0, GET_Rn()); tmp3 = rcache_get_reg_arg(1, tmp); if (tmp == SHR_SR) @@ -2444,6 +2555,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_bic_r_imm(sr, T); emith_cmp_r_imm(tmp, 0); emit_or_t_if_eq(sr); + rcache_clean(); // XXX emith_or_r_imm(tmp, 0x80); tmp2 = rcache_get_tmp_arg(1); // assuming it differs to tmp emith_move_r_r(tmp2, tmp); @@ -2596,7 +2708,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) goto end_op; case 0x0800: // CMP/EQ #imm,R0 10001000iiiiiiii // XXX: could use cmn - tmp2 = rcache_get_reg(0, RC_GR_READ); + tmp2 = rcache_get_reg(SHR_R0, RC_GR_READ); sr = rcache_get_reg(SHR_SR, RC_GR_RMW); emith_bic_r_imm(sr, T); emith_cmp_r_imm(tmp2, (s8)(op & 0xff)); @@ -2679,10 +2791,11 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) default: default_: - if (!(op_flags[i] & OF_B_IN_DS)) + if (!(op_flags[i] & OF_B_IN_DS)) { elprintf_sh2(sh2, EL_ANOMALY, "drc: illegal op %04x @ %08x", op, pc - 2); exit(1); + } } end_op: @@ -3268,6 +3381,15 @@ void block_stats(void) void sh2_drc_flush_all(void) { +#if (DRC_DEBUG & 1024) + int i; + printf("backtrace master:\n"); + for (i = 0; i < ARRAY_SIZE(csh2[0]); i++) + SH2_DUMP(&csh2[0][i], "bt msh2"); + printf("backtrace slave:\n"); + for (i = 0; i < ARRAY_SIZE(csh2[1]); i++) + SH2_DUMP(&csh2[1][i], "bt ssh2"); +#endif block_stats(); flush_tcache(0); flush_tcache(1); @@ -4200,13 +4322,14 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, if (op_flags[i] & OF_DELAY_OP) { switch (opd->op) { case OP_BRANCH: + case OP_BRANCH_N: case OP_BRANCH_CT: case OP_BRANCH_CF: case OP_BRANCH_R: case OP_BRANCH_RF: elprintf(EL_ANOMALY, "%csh2 drc: branch in DS @ %08x", is_slave ? 's' : 'm', pc); - opd->op = OP_UNHANDLED; + opd->op = OP_UNDEFINED; op_flags[i] |= OF_B_IN_DS; next_is_delay = 0; break; diff --git a/cpu/sh2/compiler.h b/cpu/sh2/compiler.h index 70fdbf4e..c9cf7ab0 100644 --- a/cpu/sh2/compiler.h +++ b/cpu/sh2/compiler.h @@ -29,9 +29,9 @@ void scan_block(unsigned int base_pc, int is_slave, #if defined(DRC_SH2) // direct access to some host CPU registers used by the DRC // XXX MUST match definitions in cpu/sh2/compiler.c -#if defined(_arm__) +#if defined(__arm__) #define DRC_SR_REG r10 -#elif defined(__i386__) +#elif defined(__i386__) || defined(__x86_64__) #define DRC_SR_REG edi #else #warning "direct DRC register access not available for this host" diff --git a/pico/32x/memory.c b/pico/32x/memory.c index f82b9f99..8f2a7c2f 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -1344,7 +1344,7 @@ out_noprint: static u32 sh2_read16_da(u32 a, SH2 *sh2) { - return ((u16 *)sh2->data_array)[(a & 0xfff) / 2]; + return ((u16 *)sh2->data_array)[(a & 0xffe) / 2]; } static u32 sh2_read16_rom(u32 a, SH2 *sh2) @@ -1367,7 +1367,7 @@ static u32 sh2_read32_cs0(u32 a, SH2 *sh2) static u32 sh2_read32_da(u32 a, SH2 *sh2) { - u32 d = *(u32 *)(sh2->data_array + (a & 0xfff)); + u32 d = *((u32 *)sh2->data_array + (a & 0xffc)/4); return (d << 16) | (d >> 16); } @@ -1587,28 +1587,28 @@ static void REGPARM(3) sh2_write32_dram1(u32 a, u32 d, SH2 *sh2) static void REGPARM(3) sh2_write32_sdram(u32 a, u32 d, SH2 *sh2) { - u32 a1 = a & 0x3ffff; + u32 a1 = a & 0x3fffc; *(u32 *)(sh2->p_sdram + a1) = (d << 16) | (d >> 16); #ifdef DRC_SH2 unsigned short *p = &Pico32xMem->drcblk_ram[a1 >> SH2_DRCBLK_RAM_SHIFT]; if (p[0]) sh2_drc_wcheck_ram(a, p[0], sh2->is_slave); if (p[1]) - sh2_drc_wcheck_ram(a, p[1], sh2->is_slave); + sh2_drc_wcheck_ram(a+2, p[1], sh2->is_slave); #endif } static void REGPARM(3) sh2_write32_da(u32 a, u32 d, SH2 *sh2) { - u32 a1 = a & 0xfff; - *(u32 *)(sh2->data_array + a1) = (d << 16) | (d >> 16); + u32 a1 = a & 0xffc; + *((u32 *)sh2->data_array + a1/4) = (d << 16) | (d >> 16); #ifdef DRC_SH2 int id = sh2->is_slave; unsigned short *p = &Pico32xMem->drcblk_da[id][a1 >> SH2_DRCBLK_DA_SHIFT]; if (p[0]) sh2_drc_wcheck_da(a, p[0], id); if (p[1]) - sh2_drc_wcheck_da(a, p[1], id); + sh2_drc_wcheck_da(a+2, p[1], id); #endif } From 4f4e9bf3bd0637256849a249e959f12c44e3cd3e Mon Sep 17 00:00:00 2001 From: kub Date: Mon, 1 Apr 2019 23:39:58 +0200 Subject: [PATCH 0181/1110] overhaul of the register cache (improves generated code by some 10+%) --- cpu/drc/emit_arm.c | 55 +- cpu/drc/emit_x86.c | 91 +- cpu/sh2/compiler.c | 1972 +++++++++++++++++++++++++++++--------------- cpu/sh2/compiler.h | 4 +- 4 files changed, 1455 insertions(+), 667 deletions(-) diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index 632d476e..4421c641 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -5,6 +5,7 @@ * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. */ +#define HOST_REGS 16 #define CONTEXT_REG 11 #define RET_REG 0 @@ -406,9 +407,24 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_add_r_r_r_lsl(d, s1, s2, lslimm) \ EOP_ADD_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm) +#define emith_addf_r_r_r_lsl(d, s1, s2, lslimm) \ + EOP_ADD_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSL,lslimm) + #define emith_addf_r_r_r_lsr(d, s1, s2, lslimm) \ EOP_ADD_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSR,lslimm) +#define emith_adcf_r_r_r_lsl(d, s1, s2, lslimm) \ + EOP_ADC_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSL,lslimm) + +#define emith_sub_r_r_r_lsl(d, s1, s2, lslimm) \ + EOP_SUB_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm) + +#define emith_subf_r_r_r_lsl(d, s1, s2, lslimm) \ + EOP_SUB_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSL,lslimm) + +#define emith_sbcf_r_r_r_lsl(d, s1, s2, lslimm) \ + EOP_SBC_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSL,lslimm) + #define emith_or_r_r_r_lsl(d, s1, s2, lslimm) \ EOP_ORR_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm) @@ -418,6 +434,9 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_eor_r_r_r_lsr(d, s1, s2, lsrimm) \ EOP_EOR_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSR,lsrimm) +#define emith_and_r_r_r_lsl(d, s1, s2, lslimm) \ + EOP_AND_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm) + #define emith_or_r_r_lsl(d, s, lslimm) \ emith_or_r_r_r_lsl(d, d, s, lslimm) @@ -427,12 +446,30 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_add_r_r_r(d, s1, s2) \ emith_add_r_r_r_lsl(d, s1, s2, 0) +#define emith_addf_r_r_r(d, s1, s2) \ + emith_addf_r_r_r_lsl(d, s1, s2, 0) + +#define emith_adcf_r_r_r(d, s1, s2) \ + emith_adcf_r_r_r_lsl(d, s1, s2, 0) + +#define emith_sub_r_r_r(d, s1, s2) \ + emith_sub_r_r_r_lsl(d, s1, s2, 0) + +#define emith_subf_r_r_r(d, s1, s2) \ + emith_subf_r_r_r_lsl(d, s1, s2, 0) + +#define emith_sbcf_r_r_r(d, s1, s2) \ + emith_sbcf_r_r_r_lsl(d, s1, s2, 0) + #define emith_or_r_r_r(d, s1, s2) \ emith_or_r_r_r_lsl(d, s1, s2, 0) #define emith_eor_r_r_r(d, s1, s2) \ emith_eor_r_r_r_lsl(d, s1, s2, 0) +#define emith_and_r_r_r(d, s1, s2) \ + emith_and_r_r_r_lsl(d, s1, s2, 0) + #define emith_add_r_r(d, s) \ emith_add_r_r_r(d, d, s) @@ -539,11 +576,14 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_bic_r_imm_c(cond, r, imm) \ emith_op_imm(cond, 0, A_OP_BIC, r, imm) +#define emith_tst_r_imm_c(cond, r, imm) \ + emith_top_imm(cond, A_OP_TST, r, imm) + #define emith_move_r_imm_s8(r, imm) { \ - if ((imm) & 0x80) \ - EOP_MVN_IMM(r, 0, ((imm) ^ 0xff)); \ + if ((s8)(imm) < 0) \ + EOP_MVN_IMM(r, 0, ((u8)(imm) ^ 0xff)); \ else \ - EOP_MOV_IMM(r, 0, imm); \ + EOP_MOV_IMM(r, 0, (u8)imm); \ } #define emith_and_r_r_imm(d, s, imm) \ @@ -558,6 +598,15 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_sub_r_r_imm(d, s, imm) \ emith_op_imm2(A_COND_AL, 0, A_OP_SUB, d, s, imm) +#define emith_subf_r_r_imm(d, s, imm) \ + emith_op_imm2(A_COND_AL, 1, A_OP_SUB, d, s, (imm)) + +#define emith_or_r_r_imm(d, s, imm) \ + emith_op_imm2(A_COND_AL, 0, A_OP_ORR, d, s, (imm)) + +#define emith_eor_r_r_imm(d, s, imm) \ + emith_op_imm2(A_COND_AL, 0, A_OP_EOR, d, s, (imm)) + #define emith_neg_r_r(d, s) \ EOP_RSB_IMM(d, s, 0, 0) diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index 01702e0c..4f9dd5a7 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -15,6 +15,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; +#define HOST_REGS 8 #define CONTEXT_REG xBP #define RET_REG xAX @@ -185,6 +186,61 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; } \ } while (0) +#define emith_sub_r_r_r(d, s1, s2) do { \ + if (d == s1) { \ + emith_sub_r_r(d, s2); \ + } else if (d == s2) { \ + emith_sub_r_r(d, s1); \ + } else { \ + emith_move_r_r(d, s1); \ + emith_sub_r_r(d, s2); \ + } \ +} while (0) + +#define emith_adc_r_r_r(d, s1, s2) do { \ + if (d == s1) { \ + emith_adc_r_r(d, s2); \ + } else if (d == s2) { \ + emith_adc_r_r(d, s1); \ + } else { \ + emith_move_r_r(d, s1); \ + emith_adc_r_r(d, s2); \ + } \ +} while (0) + +#define emith_sbc_r_r_r(d, s1, s2) do { \ + if (d == s1) { \ + emith_sbc_r_r(d, s2); \ + } else if (d == s2) { \ + emith_sbc_r_r(d, s1); \ + } else { \ + emith_move_r_r(d, s1); \ + emith_sbc_r_r(d, s2); \ + } \ +} while (0) + +#define emith_and_r_r_r(d, s1, s2) do { \ + if (d == s1) { \ + emith_and_r_r(d, s2); \ + } else if (d == s2) { \ + emith_and_r_r(d, s1); \ + } else { \ + emith_move_r_r(d, s1); \ + emith_and_r_r(d, s2); \ + } \ +} while (0) + +#define emith_or_r_r_r(d, s1, s2) do { \ + if (d == s1) { \ + emith_or_r_r(d, s2); \ + } else if (d == s2) { \ + emith_or_r_r(d, s1); \ + } else { \ + emith_move_r_r(d, s1); \ + emith_or_r_r(d, s2); \ + } \ +} while (0) + #define emith_eor_r_r_r(d, s1, s2) do { \ if (d == s1) { \ emith_eor_r_r(d, s2); \ @@ -281,6 +337,8 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; emith_eor_r_imm(r, imm) #define emith_bic_r_imm_c(cond, r, imm) \ emith_bic_r_imm(r, imm) +#define emith_tst_r_imm_c(cond, r, imm) \ + emith_tst_r_imm(r, imm) #define emith_ror_c(cond, d, s, cnt) \ emith_ror(d, s, cnt) @@ -324,12 +382,33 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; EMIT(imm, s32); \ } while (0) +#define emith_sub_r_r_imm(d, s, imm) do { \ + if (d != s) \ + emith_move_r_r(d, s); \ + if (imm) \ + emith_sub_r_imm(d, imm); \ +} while (0) + #define emith_and_r_r_imm(d, s, imm) do { \ if (d != s) \ emith_move_r_r(d, s); \ emith_and_r_imm(d, imm); \ } while (0) +#define emith_or_r_r_imm(d, s, imm) do { \ + if (d != s) \ + emith_move_r_r(d, s); \ + if ((s32)imm != 0) \ + emith_or_r_imm(d, imm); \ +} while (0) + +#define emith_eor_r_r_imm(d, s, imm) do { \ + if (d != s) \ + emith_move_r_r(d, s); \ + if ((s32)imm != 0) \ + emith_eor_r_imm(d, imm); \ +} while (0) + // shift #define emith_shift(op, d, s, cnt) do { \ if (d != s) \ @@ -456,6 +535,14 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_eorf_r_r emith_eor_r_r #define emith_negcf_r_r emith_negc_r_r +#define emith_subf_r_r_imm emith_sub_r_r_imm +#define emith_addf_r_r_r emith_add_r_r_r +#define emith_subf_r_r_r emith_sub_r_r_r +#define emith_adcf_r_r_r emith_adc_r_r_r +#define emith_sbcf_r_r_r emith_sbc_r_r_r +#define emith_eorf_r_r_r emith_eor_r_r_r +#define emith_addf_r_r_r_lsr emith_add_r_r_r_lsr + #define emith_lslf emith_lsl #define emith_lsrf emith_lsr #define emith_asrf emith_asr @@ -705,7 +792,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; case 0: rd = xDI; break; \ case 1: rd = xSI; break; \ case 2: rd = xDX; break; \ - case 3: rd = xBX; break; \ + default: rd = xCX; break; \ } #define emith_sh2_drc_entry() { \ @@ -728,6 +815,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; case 0: rd = xCX; break; \ case 1: rd = xDX; break; \ case 2: rd = 8; break; \ + default: rd = 9; break; \ } #define emith_sh2_drc_entry() { \ @@ -764,6 +852,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; case 0: rd = xAX; break; \ case 1: rd = xDX; break; \ case 2: rd = xCX; break; \ + default: rd = xBX; break; \ } #define emith_sh2_drc_entry() { \ diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 800e9d32..1b300cc3 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -38,6 +38,8 @@ // features #define PROPAGATE_CONSTANTS 1 #define LINK_BRANCHES 1 +#define ALIAS_REGISTERS 1 +#define REMAP_REGISTER 1 // limits (per block) #define MAX_BLOCK_SIZE (BLOCK_INSN_LIMIT * 6 * 6) @@ -103,6 +105,7 @@ static int insns_compiled, hash_collisions, host_insn_count; #define BITMASK4(v0,v1,v2,v3) (BITMASK3(v0,v1,v2) | (1 << (v3))) #define BITMASK5(v0,v1,v2,v3,v4) (BITMASK4(v0,v1,v2,v3) | (1 << (v4))) #define BITMASK6(v0,v1,v2,v3,v4,v5) (BITMASK5(v0,v1,v2,v3,v4) | (1 << (v5))) +#define BITRANGE(v0,v1) (BITMASK1(v1+1)-BITMASK1(v0)) // set with v0..v1 #define SHR_T SHR_SR // might make them separate someday #define SHR_MEM 31 @@ -137,6 +140,11 @@ enum op_types { OP_UNDEFINED, }; +#define OP_ISBRANCH(op) (BITRANGE(OP_BRANCH, OP_BRANCH_RF) & BITMASK1(op)) +#define OP_ISBRAUC(op) (BITMASK4(OP_BRANCH, OP_BRANCH_R, OP_BRANCH_RF, OP_RTE) \ + & BITMASK1(op)) +#define OP_ISBRACND(op) (BITMASK2(OP_BRANCH_CT, OP_BRANCH_CF) & BITMASK1(op)) + #ifdef DRC_SH2 static int literal_disabled_frames; @@ -294,9 +302,9 @@ static int block_counts[TCACHE_BUFFERS]; // we have block_link_pool to avoid using mallocs static const int block_link_pool_max_counts[TCACHE_BUFFERS] = { - 4*1024, - 256, - 256, + 16*1024, + 4*256, + 4*256, }; static struct block_link *block_link_pool[TCACHE_BUFFERS]; static int block_link_pool_counts[TCACHE_BUFFERS]; @@ -332,119 +340,148 @@ static struct block_entry **hash_tables[TCACHE_BUFFERS]; // host register tracking enum { HR_FREE, - HR_CACHED, // 'val' has sh2_reg_e -// HR_CONST, // 'val' has a constant + HR_STATIC, // vreg has a static mapping + HR_CACHED, // vreg has sh2_reg_e HR_TEMP, // reg used for temp storage -}; +} cach_reg_type; enum { - HRF_DIRTY = 1 << 0, // reg has "dirty" value to be written to ctx - HRF_LOCKED = 1 << 1, // HR_CACHED can't be evicted -}; + HRF_DIRTY = 1 << 0, // has "dirty" value to be written to ctx + HRF_LOCKED = 1 << 1, // can't be evicted + HRF_TEMP = 1 << 2, // is for temps and args + HRF_REG = 1 << 3, // is for sh2 regs +} cache_reg_flags; typedef struct { - u32 hreg:5; // "host" reg - u32 greg:5; // "guest" reg - u32 type:3; - u32 flags:3; - u32 stamp:16; // kind of a timestamp -} temp_reg_t; + u8 hreg; // "host" reg + u8 flags:4; // TEMP or REG? + u8 type:4; + u16 stamp; // kind of a timestamp + u32 gregs; // "guest" reg mask +} cache_reg_t; -// note: reg_temp[] must have at least the amount of -// registers used by handlers in worst case (currently 4) +// guest register tracking +enum { + GRF_DIRTY = 1 << 0, // reg has "dirty" value to be written to ctx + GRF_CONST = 1 << 1, // reg has a constant + GRF_CDIRTY = 1 << 2, // constant not yet written to ctx + GRF_STATIC = 1 << 3, // reg has static mapping to vreg +} guest_reg_flags; + +typedef struct { + u16 flags; // guest flags: is constant, is dirty? + s8 sreg; // cache reg for static mapping + s8 vreg; // cache_reg this is currently mapped to, -1 if not mapped + u32 val; // value if this is constant +} guest_reg_t; + + +// note: cache_regs[] must have at least the amount of +// HRF_REG registers used by handlers in worst case (currently 4) #ifdef __arm__ #include "../drc/emit_arm.c" -#ifndef __MACH__ - -static const int reg_map_g2h[] = { - 4, 5, 6, 7, - 8, -1, -1, -1, - -1, -1, -1, -1, - -1, -1, -1, 9, // r12 .. sp - -1, -1, -1, 10, // SHR_PC, SHR_PPC, SHR_PR, SHR_SR, - -1, -1, -1, -1, // SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL, -}; - +// register assigment goes by ABI convention. All caller save registers are TEMP +// the others are either static or REG. SR must be static, R0 very recommended +static guest_reg_t guest_regs[] = { + // SHR_R0 .. SHR_SP +#ifndef __MACH__ // no r9.. + { GRF_STATIC, 8 }, { GRF_STATIC, 9 }, { 0 } , { 0 } , #else - -// no r9.. -static const int reg_map_g2h[] = { - 4, 5, 6, 7, - -1, -1, -1, -1, - -1, -1, -1, -1, - -1, -1, -1, 8, // r12 .. sp - -1, -1, -1, 10, // SHR_PC, SHR_PPC, SHR_PR, SHR_SR, - -1, -1, -1, -1, // SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL, + { GRF_STATIC, 8 }, { 0 } , { 0 } , { 0 } , +#endif + { 0 } , { 0 } , { 0 } , { 0 } , + { 0 } , { 0 } , { 0 } , { 0 } , + { 0 } , { 0 } , { 0 } , { 0 } , + // SHR_PC, SHR_PPC, SHR_PR, SHR_SR, + // SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL, + { 0 } , { 0 } , { 0 } , { GRF_STATIC, 10 }, + { 0 } , { 0 } , { 0 } , { 0 } , }; +// NB first TEMP, then REG. alloc/evict algorithm depends on this +static cache_reg_t cache_regs[] = { + { 12, HRF_TEMP }, + { 14, HRF_TEMP }, + { 0, HRF_TEMP }, + { 1, HRF_TEMP }, + { 2, HRF_TEMP }, + { 3, HRF_TEMP }, + { 8, HRF_LOCKED }, +#ifndef __MACH__ // no r9.. + { 9, HRF_LOCKED }, #endif - -static temp_reg_t reg_temp[] = { - { 0, }, - { 1, }, - { 12, }, - { 14, }, - { 2, }, - { 3, }, + { 10, HRF_LOCKED }, + { 4, HRF_REG }, + { 5, HRF_REG }, + { 6, HRF_REG }, + { 7, HRF_REG }, }; #elif defined(__i386__) #include "../drc/emit_x86.c" -static const int reg_map_g2h[] = { - xSI,-1, -1, -1, - -1, -1, -1, -1, - -1, -1, -1, -1, - -1, -1, -1, -1, // r12 .. sp - -1, -1, -1, xDI, // SHR_PC, SHR_PPC, SHR_PR, SHR_SR, - -1, -1, -1, -1, // SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL, +static guest_reg_t guest_regs[] = { + // SHR_R0 .. SHR_SP + {GRF_STATIC, xSI}, { 0 } , { 0 } , { 0 } , + { 0 } , { 0 } , { 0 } , { 0 } , + { 0 } , { 0 } , { 0 } , { 0 } , + { 0 } , { 0 } , { 0 } , { 0 } , + // SHR_PC, SHR_PPC, SHR_PR, SHR_SR, + // SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL, + { 0 } , { 0 } , { 0 } , {GRF_STATIC, xDI}, + { 0 } , { 0 } , { 0 } , { 0 } , }; // ax, cx, dx are usually temporaries by convention -static temp_reg_t reg_temp[] = { - { xAX, }, - { xBX, }, - { xCX, }, - { xDX, }, +static cache_reg_t cache_regs[] = { + { xBX, HRF_REG|HRF_TEMP }, + { xCX, HRF_REG|HRF_TEMP }, + { xDX, HRF_REG|HRF_TEMP }, + { xAX, HRF_REG|HRF_TEMP }, + { xSI, HRF_LOCKED }, + { xDI, HRF_LOCKED }, }; #elif defined(__x86_64__) #include "../drc/emit_x86.c" -static const int reg_map_g2h[] = { +static guest_reg_t guest_regs[] = { + // SHR_R0 .. SHR_SP #ifndef _WIN32 - -1, -1, -1, -1, - -1, -1, -1, -1, - -1, -1, -1, -1, - -1, -1, -1, -1, // r12 .. sp - -1, -1, -1, xBX, // SHR_PC, SHR_PPC, SHR_PR, SHR_SR, - -1, -1, -1, -1, // SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL, + { 0 } , { 0 } , { 0 } , { 0 } , #else - xDI,-1, -1, -1, - -1, -1, -1, -1, - -1, -1, -1, -1, - -1, -1, -1, -1, // r12 .. sp - -1, -1, -1, xBX, // SHR_PC, SHR_PPC, SHR_PR, SHR_SR, - -1, -1, -1, -1, // SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL, + {GRF_STATIC, xDI}, { 0 } , { 0 } , { 0 } , #endif + { 0 } , { 0 } , { 0 } , { 0 } , + { 0 } , { 0 } , { 0 } , { 0 } , + { 0 } , { 0 } , { 0 } , { 0 } , + // SHR_PC, SHR_PPC, SHR_PR, SHR_SR, + // SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL, + { 0 } , { 0 } , { 0 } , {GRF_STATIC, xBX}, + { 0 } , { 0 } , { 0 } , { 0 } , }; // ax, cx, dx are usually temporaries by convention -static temp_reg_t reg_temp[] = { - { xAX, }, - { xCX, }, - { xDX, }, - { xSI, }, +static cache_reg_t cache_regs[] = { + { xCX, HRF_REG|HRF_TEMP }, + { xDX, HRF_REG|HRF_TEMP }, + { xAX, HRF_REG|HRF_TEMP }, + { xSI, HRF_REG|HRF_TEMP }, #ifndef _WIN32 - { xDI, }, + { xDI, HRF_REG|HRF_TEMP }, +#else + { xDI, HRF_LOCKED }, #endif + { xBX, HRF_LOCKED }, }; #else #error unsupported arch #endif +static signed char reg_map_host[HOST_REGS]; + #define T 0x00000001 #define S 0x00000002 #define I 0x000000f0 @@ -468,6 +505,11 @@ static void REGPARM(2) (*sh2_drc_write8)(u32 a, u32 d); static void REGPARM(2) (*sh2_drc_write16)(u32 a, u32 d); static void REGPARM(3) (*sh2_drc_write32)(u32 a, u32 d, SH2 *sh2); +// flags for memory access +#define MF_SIZEMASK 0x03 // size of access +#define MF_POSTINCR 0x10 // post increment (for read_rr) +#define MF_PREDECR MF_POSTINCR // pre decrement (for write_rr) + // address space stuff static int dr_is_rom(u32 a) { @@ -801,12 +843,13 @@ static void dr_link_blocks(struct block_entry *be, int tcache_id) #endif } -#define ADD_TO_ARRAY(array, count, item, failcode) \ +#define ADD_TO_ARRAY(array, count, item, failcode) { \ if (count >= ARRAY_SIZE(array)) { \ dbg(1, "warning: " #array " overflow"); \ failcode; \ - } \ - array[count++] = item; + } else \ + array[count++] = item; \ +} static int find_in_array(u32 *array, size_t size, u32 what) { @@ -820,6 +863,11 @@ static int find_in_array(u32 *array, size_t size, u32 what) // --------------------------------------------------------------- +// NB rcache allocation dependencies: +// - get_reg_arg/get_tmp_arg first (might evict other regs just allocated) +// - get_reg(..., NULL) before get_reg(..., &x) if it might get the same reg +// - get_reg(..., RC_GR_READ/RMW, ...) before WRITE (might evict needed reg) + // register cache / constant propagation stuff typedef enum { RC_GR_READ, @@ -827,43 +875,57 @@ typedef enum { RC_GR_RMW, } rc_gr_mode; -static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking); +static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr); +static void rcache_remove_vreg_alias(int x, sh2_reg_e r); -// guest regs with constants -static u32 dr_gcregs[24]; -// a mask of constant/dirty regs -static u32 dr_gcregs_mask; -static u32 dr_gcregs_dirty; +#define RCACHE_DUMP(msg) { \ + cache_reg_t *cp; \ + guest_reg_t *gp; \ + int i; \ + printf("cache dump %s:\n",msg); \ + printf("cache_regs:\n"); \ + for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { \ + cp = &cache_regs[i]; \ + if (cp->type != HR_FREE || cp->gregs) \ + printf("%d: hr=%d t=%d f=%x m=%x\n", i, cp->hreg, cp->type, cp->flags, cp->gregs); \ + } \ + printf("guest_regs:\n"); \ + for (i = 0; i < ARRAY_SIZE(guest_regs); i++) { \ + gp = &guest_regs[i]; \ + if (gp->vreg != -1 || gp->sreg >= 0) \ + printf("%d: v=%d f=%x s=%d\n", i, gp->vreg, gp->flags, gp->sreg); \ + } \ +} #if PROPAGATE_CONSTANTS static void gconst_set(sh2_reg_e r, u32 val) { - dr_gcregs_mask |= 1 << r; - dr_gcregs[r] = val; + guest_regs[r].flags |= GRF_CONST; + guest_regs[r].val = val; } static void gconst_new(sh2_reg_e r, u32 val) { - int i; - gconst_set(r, val); - dr_gcregs_dirty |= 1 << r; + guest_regs[r].flags |= GRF_CDIRTY; // throw away old r that we might have cached - for (i = ARRAY_SIZE(reg_temp) - 1; i >= 0; i--) { - if ((reg_temp[i].type == HR_CACHED) && - reg_temp[i].greg == r) { - reg_temp[i].type = HR_FREE; - reg_temp[i].flags = 0; - } - } + if (guest_regs[r].vreg >= 0) + rcache_remove_vreg_alias(guest_regs[r].vreg, r); +} + +static void gconst_copy(sh2_reg_e rd, sh2_reg_e rs) +{ + guest_regs[rd].flags &= ~(GRF_CONST|GRF_CDIRTY); + if (guest_regs[rs].flags & GRF_CONST) + gconst_set(rd, guest_regs[rs].val); } #endif static int gconst_get(sh2_reg_e r, u32 *val) { - if (dr_gcregs_mask & (1 << r)) { - *val = dr_gcregs[r]; + if (guest_regs[r].flags & GRF_CONST) { + *val = guest_regs[r].val; return 1; } return 0; @@ -871,7 +933,7 @@ static int gconst_get(sh2_reg_e r, u32 *val) static int gconst_check(sh2_reg_e r) { - if ((dr_gcregs_mask | dr_gcregs_dirty) & (1 << r)) + if (guest_regs[r].flags & (GRF_CONST|GRF_CDIRTY)) return 1; return 0; } @@ -879,68 +941,182 @@ static int gconst_check(sh2_reg_e r) // update hr if dirty, else do nothing static int gconst_try_read(int hr, sh2_reg_e r) { - if (dr_gcregs_dirty & (1 << r)) { - emith_move_r_imm(hr, dr_gcregs[r]); - dr_gcregs_dirty &= ~(1 << r); + if (guest_regs[r].flags & GRF_CDIRTY) { + emith_move_r_imm(hr, guest_regs[r].val); + guest_regs[r].flags &= ~GRF_CDIRTY; return 1; } return 0; } -static void gconst_check_evict(sh2_reg_e r) +static u32 gconst_dirty_mask(void) { - if (dr_gcregs_mask & (1 << r)) - // no longer cached in reg, make dirty again - dr_gcregs_dirty |= 1 << r; + u32 mask = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(guest_regs); i++) + if (guest_regs[i].flags & GRF_CDIRTY) + mask |= (1 << i); + return mask; } static void gconst_kill(sh2_reg_e r) { - dr_gcregs_mask &= ~(1 << r); - dr_gcregs_dirty &= ~(1 << r); + guest_regs[r].flags &= ~(GRF_CONST|GRF_CDIRTY); } -#if PROPAGATE_CONSTANTS -static void gconst_copy(sh2_reg_e rd, sh2_reg_e rs) -{ - u32 val; - - gconst_kill(rd); - if (gconst_get(rs, &val)) - gconst_set(rd, val); -} -#endif - static void gconst_clean(void) { int i; - for (i = 0; i < ARRAY_SIZE(dr_gcregs); i++) - if (dr_gcregs_dirty & (1 << i)) { + for (i = 0; i < ARRAY_SIZE(guest_regs); i++) + if (guest_regs[i].flags & GRF_CDIRTY) { // using RC_GR_READ here: it will call gconst_try_read, // cache the reg and mark it dirty. - rcache_get_reg_(i, RC_GR_READ, 0); + rcache_get_reg_(i, RC_GR_READ, 0, NULL); } } static void gconst_invalidate(void) { - dr_gcregs_mask = dr_gcregs_dirty = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(guest_regs); i++) + guest_regs[i].flags &= ~(GRF_CONST|GRF_CDIRTY); } static u16 rcache_counter; +static u32 rcache_static; +static u32 rcache_locked; +static u32 rcache_hint_soon; +static u32 rcache_hint_late; +#define rcache_hint (rcache_hint_soon|rcache_hint_late) -static temp_reg_t *rcache_evict(void) +// binary search approach, since we don't have CLZ on ARM920T +#define FOR_ALL_BITS_SET_DO(mask, bit, code) { \ + u32 __mask = mask; \ + for (bit = 31; bit >= 0 && mask; bit--, __mask <<= 1) { \ + if (!(__mask & (0xffff << 16))) \ + bit -= 16, __mask <<= 16; \ + if (!(__mask & (0xff << 24))) \ + bit -= 8, __mask <<= 8; \ + if (!(__mask & (0xf << 28))) \ + bit -= 4, __mask <<= 4; \ + if (!(__mask & (0x3 << 30))) \ + bit -= 2, __mask <<= 2; \ + if (!(__mask & (0x1 << 31))) \ + bit -= 1, __mask <<= 1; \ + if (__mask & (0x1 << 31)) { \ + code; \ + } \ + } \ +} + +static void rcache_unmap_vreg(int x) { - // evict reg with oldest stamp - int i, oldest = -1; + int i; + + FOR_ALL_BITS_SET_DO(cache_regs[x].gregs, i, + guest_regs[i].vreg = -1); + if (cache_regs[x].type != HR_STATIC) + cache_regs[x].type = HR_FREE; + cache_regs[x].gregs = 0; + cache_regs[x].flags &= (HRF_REG|HRF_TEMP); +} + +static void rcache_clean_vreg(int x) +{ + int r; + + if (cache_regs[x].flags & HRF_DIRTY) { // writeback + cache_regs[x].flags &= ~HRF_DIRTY; + FOR_ALL_BITS_SET_DO(cache_regs[x].gregs, r, + if (guest_regs[r].flags & GRF_DIRTY) { + if (guest_regs[r].flags & GRF_STATIC) { + if (guest_regs[r].vreg != guest_regs[r].sreg) { + if (!(cache_regs[guest_regs[r].sreg].flags & HRF_LOCKED)) { + // statically mapped reg not in its sreg. move back to sreg + rcache_clean_vreg(guest_regs[r].sreg); + rcache_unmap_vreg(guest_regs[r].sreg); + emith_move_r_r(cache_regs[guest_regs[r].sreg].hreg, cache_regs[guest_regs[r].vreg].hreg); + rcache_remove_vreg_alias(x, r); + cache_regs[guest_regs[r].sreg].gregs = (1 << r); + guest_regs[r].vreg = guest_regs[r].sreg; + } else { + // must evict since sreg is locked + emith_ctx_write(cache_regs[x].hreg, r * 4); + guest_regs[r].vreg = -1; + } + } + } else + emith_ctx_write(cache_regs[x].hreg, r * 4); + } + guest_regs[r].flags &= ~GRF_DIRTY;) + } +} + +static void rcache_remove_vreg_alias(int x, sh2_reg_e r) +{ + cache_regs[x].gregs &= ~(1 << r); + if (!cache_regs[x].gregs) { + // no reg mapped -> free vreg + if (cache_regs[x].type != HR_STATIC) + cache_regs[x].type = HR_FREE; + cache_regs[x].flags &= (HRF_REG|HRF_TEMP); + } + guest_regs[r].vreg = -1; +} + +static void rcache_evict_vreg(int x) +{ + rcache_clean_vreg(x); + rcache_unmap_vreg(x); +} + +static void rcache_evict_vreg_aliases(int x, sh2_reg_e r) +{ + cache_regs[x].gregs &= ~(1 << r); + rcache_evict_vreg(x); + cache_regs[x].gregs = (1 << r); + if (cache_regs[x].type != HR_STATIC) + cache_regs[x].type = HR_CACHED; + if (guest_regs[r].flags & GRF_DIRTY) + cache_regs[x].flags |= HRF_DIRTY; +} + +static cache_reg_t *rcache_evict(void) +{ + // evict reg with oldest stamp (only for HRF_REG, no temps) + int i, i_prio, oldest = -1, prio = 0; u16 min_stamp = (u16)-1; - for (i = 0; i < ARRAY_SIZE(reg_temp); i++) { - if (reg_temp[i].type == HR_CACHED && !(reg_temp[i].flags & HRF_LOCKED) && - reg_temp[i].stamp <= min_stamp) { - min_stamp = reg_temp[i].stamp; + for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { + // consider only unlocked REG + if (!(cache_regs[i].flags & HRF_REG) || (cache_regs[i].flags & HRF_LOCKED)) + continue; + if (cache_regs[i].type == HR_FREE || (cache_regs[i].type == HR_TEMP)) { oldest = i; + break; + } + if (cache_regs[i].type == HR_CACHED) { + if (rcache_locked & cache_regs[i].gregs) + // REGs needed for the current insn + i_prio = 1; + else if (rcache_hint_soon & cache_regs[i].gregs) + // REGs needed in some future insn + i_prio = 2; + else if (rcache_hint_late & cache_regs[i].gregs) + // REGs needed in some future insn + i_prio = 3; + else + // REGs not needed soon + i_prio = 4; + + if (prio < i_prio || (prio == i_prio && cache_regs[i].stamp < min_stamp)) { + min_stamp = cache_regs[i].stamp; + oldest = i; + prio = i_prio; + } } } @@ -949,110 +1125,254 @@ static temp_reg_t *rcache_evict(void) exit(1); } - i = oldest; - if (reg_temp[i].type == HR_CACHED) { - if (reg_temp[i].flags & HRF_DIRTY) - // writeback - emith_ctx_write(reg_temp[i].hreg, reg_temp[i].greg * 4); - gconst_check_evict(reg_temp[i].greg); - } + if (cache_regs[oldest].type == HR_CACHED) + rcache_evict_vreg(oldest); + cache_regs[oldest].type = HR_FREE; + cache_regs[oldest].flags &= (HRF_TEMP|HRF_REG); + cache_regs[oldest].gregs = 0; - reg_temp[i].type = HR_FREE; - reg_temp[i].flags = 0; - return ®_temp[i]; + return &cache_regs[oldest]; } -static int get_reg_static(sh2_reg_e r, rc_gr_mode mode) +#if REMAP_REGISTER +// maps a host register to a REG +static int rcache_map_reg(sh2_reg_e r, int hr, int mode) { - int i = reg_map_g2h[r]; - if (i != -1) { - if (mode != RC_GR_WRITE) - gconst_try_read(i, r); + int i; + + gconst_kill(r); + + // lookup the TEMP hr maps to + i = reg_map_host[hr]; + if (i < 0) { + // must not happen + printf("invalid host register %d\n", hr); + exit(1); } - return i; + + // deal with statically mapped regs + if (mode == RC_GR_RMW && (guest_regs[r].flags & GRF_STATIC)) { + if (guest_regs[r].vreg == guest_regs[r].sreg) { + // STATIC in its sreg with no aliases, and some processing pending + if (cache_regs[guest_regs[r].vreg].gregs == 1 << r) + return cache_regs[guest_regs[r].vreg].hreg; + } else if (!cache_regs[guest_regs[r].sreg].gregs) + // STATIC not in its sreg, with sreg available -> move it + i = guest_regs[r].sreg; + } + + // remove old mappings of r and i if one exists + if (guest_regs[r].vreg >= 0) + rcache_remove_vreg_alias(guest_regs[r].vreg, r); + if (cache_regs[i].type == HR_CACHED) + rcache_unmap_vreg(i); + // set new mappping + if (cache_regs[i].type != HR_STATIC) + cache_regs[i].type = HR_CACHED; + cache_regs[i].gregs = 1 << r; + cache_regs[i].flags &= (HRF_TEMP|HRF_REG); + cache_regs[i].stamp = ++rcache_counter; + cache_regs[i].flags |= HRF_DIRTY|HRF_LOCKED; + guest_regs[r].flags |= GRF_DIRTY; + guest_regs[r].vreg = i; + return cache_regs[i].hreg; } +// remap vreg from a TEMP to a REG if it is hinted (upcoming TEMP invalidation) +static void rcache_remap_vreg(int r) +{ + int i, j, free = -1, cached = -1, hinted = -1; + u16 min_stamp_cached = (u16)-1, min_stamp_hinted = -1; + + // r must be a vreg + if (cache_regs[r].type != HR_CACHED) + return; + // if r is already a REG or isn't used, clean here to avoid data loss on inval + if ((cache_regs[r].flags & HRF_REG) || !(rcache_hint & cache_regs[r].gregs)) { + rcache_clean_vreg(r); + return; + } + + // find REG, either free or unused temp or oldest cached + for (i = 0; i < ARRAY_SIZE(cache_regs) && free < 0; i++) { + if ((cache_regs[i].flags & HRF_TEMP) || (cache_regs[i].flags & HRF_LOCKED)) + continue; + if (cache_regs[i].type == HR_FREE || cache_regs[i].type == HR_TEMP) + free = i; + if (cache_regs[i].type == HR_CACHED && !(rcache_hint & cache_regs[i].gregs)) { + if (cache_regs[i].stamp < min_stamp_cached) { + min_stamp_cached = cache_regs[i].stamp; + cached = i; + } + } + if (cache_regs[i].type == HR_CACHED && !(rcache_hint_soon & cache_regs[i].gregs) + && (rcache_hint_soon & cache_regs[r].gregs)) + if (cache_regs[i].stamp < min_stamp_hinted) { + min_stamp_hinted = cache_regs[i].stamp; + hinted = i; + } + } + + if (free >= 0) { + i = free; + } else if (cached >= 0 && cached != r) { + i = cached; + rcache_evict_vreg(i); + } else if (hinted >= 0 && hinted != r) { + i = hinted; + rcache_evict_vreg(i); + } else { + rcache_clean_vreg(r); + return; + } + + // set new mapping and remove old one + cache_regs[i].type = HR_CACHED; + cache_regs[i].gregs = cache_regs[r].gregs; + cache_regs[i].flags &= (HRF_TEMP|HRF_REG); + cache_regs[i].flags |= cache_regs[r].flags & ~(HRF_TEMP|HRF_REG); + cache_regs[i].stamp = cache_regs[r].stamp; + emith_move_r_r(cache_regs[i].hreg, cache_regs[r].hreg); + for (j = 0; j < ARRAY_SIZE(guest_regs); j++) + if (guest_regs[j].vreg == r) + guest_regs[j].vreg = i; + cache_regs[r].type = HR_FREE; + cache_regs[r].flags &= (HRF_TEMP|HRF_REG); + cache_regs[r].gregs = 0; +} +#endif + // note: must not be called when doing conditional code -static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking) +static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr) { - temp_reg_t *tr; - int i, ret; - - // maybe statically mapped? - ret = get_reg_static(r, mode); - if (ret != -1) - goto end; + cache_reg_t *tr = NULL; + int i, h, split = -1; rcache_counter++; // maybe already cached? // if so, prefer against gconst (they must be in sync) - for (i = ARRAY_SIZE(reg_temp) - 1; i >= 0; i--) { - if (reg_temp[i].type == HR_CACHED && reg_temp[i].greg == r) { - reg_temp[i].stamp = rcache_counter; - if (mode != RC_GR_READ) - reg_temp[i].flags |= HRF_DIRTY; - ret = reg_temp[i].hreg; + i = guest_regs[r].vreg; + if ((guest_regs[r].flags & GRF_STATIC) && i != guest_regs[r].sreg && + !(cache_regs[guest_regs[r].sreg].flags & HRF_LOCKED) && + (i < 0 || mode != RC_GR_READ) && + !((rcache_hint_soon|rcache_locked) & cache_regs[guest_regs[r].sreg].gregs)) { + // good opportunity to relocate a remapped STATIC + h = guest_regs[r].sreg; + rcache_evict_vreg(h); + tr = &cache_regs[h]; + if (i >= 0) { + if (mode != RC_GR_WRITE) { + if (hr) + *hr = cache_regs[i].hreg; + else + emith_move_r_r(cache_regs[h].hreg, cache_regs[i].hreg); + hr = NULL; + } + rcache_remove_vreg_alias(guest_regs[r].vreg, r); + } else if (mode != RC_GR_WRITE) { + if (gconst_try_read(tr->hreg, r)) { + tr->flags |= HRF_DIRTY; + guest_regs[r].flags |= GRF_DIRTY; + } else + emith_ctx_read(tr->hreg, r * 4); + } + guest_regs[r].vreg = guest_regs[r].sreg; + tr->gregs = 1 << r; + goto end; + } else if (i >= 0) { + if (mode == RC_GR_READ || !(cache_regs[i].gregs & ~(1 << r))) { + // either only reading, or no multiple mapping + tr = &cache_regs[i]; + goto end; + } + // split if aliases needed rsn, or already locked, or r is STATIC in sreg + if (((rcache_hint|rcache_locked) & cache_regs[i].gregs & ~(1 << r)) || + (cache_regs[i].flags & HRF_LOCKED) || + (cache_regs[i].type == HR_STATIC && !(guest_regs[r].flags & GRF_STATIC))) { + // need to split up. take reg out here to avoid unnecessary writebacks + cache_regs[i].gregs &= ~(1 << r); + split = i; + } else { + // aliases not needed anytime soon, remove them + // XXX split aliases away if writing and static and not locked and hinted? + rcache_evict_vreg_aliases(i, r); + tr = &cache_regs[i]; goto end; } } - // use any free reg - for (i = ARRAY_SIZE(reg_temp) - 1; i >= 0; i--) { - if (reg_temp[i].type == HR_FREE) { - tr = ®_temp[i]; - goto do_alloc; + // get a free reg, but use temps only if r is not needed soon + for (i = ARRAY_SIZE(cache_regs) - 1; i >= 0; i--) { + if ((cache_regs[i].type == HR_FREE || + (cache_regs[i].type == HR_TEMP && !(cache_regs[i].flags & HRF_LOCKED))) && + (!(rcache_hint & (1 << r)) || (cache_regs[i].flags & HRF_REG))) { + tr = &cache_regs[i]; + break; } } - tr = rcache_evict(); + if (!tr) + tr = rcache_evict(); -do_alloc: tr->type = HR_CACHED; - if (do_locking) - tr->flags |= HRF_LOCKED; - if (mode != RC_GR_READ) - tr->flags |= HRF_DIRTY; - tr->greg = r; - tr->stamp = rcache_counter; - ret = tr->hreg; + tr->gregs = 1 << r; + guest_regs[r].vreg = tr - cache_regs; if (mode != RC_GR_WRITE) { - if (gconst_check(r)) { - if (gconst_try_read(ret, r)) - tr->flags |= HRF_DIRTY; - } - else + if (gconst_try_read(tr->hreg, r)) { + tr->flags |= HRF_DIRTY; + guest_regs[r].flags |= GRF_DIRTY; + } else if (split >= 0) { + if (hr) { + cache_regs[split].flags |= HRF_LOCKED; + *hr = cache_regs[split].hreg; + hr = NULL; + } else if (tr->hreg != cache_regs[split].hreg) + emith_move_r_r(tr->hreg, cache_regs[split].hreg); + } else emith_ctx_read(tr->hreg, r * 4); } end: - if (mode != RC_GR_READ) + if (hr) + *hr = tr->hreg; + if (do_locking) + tr->flags |= HRF_LOCKED; + tr->stamp = rcache_counter; + if (mode != RC_GR_READ) { + tr->flags |= HRF_DIRTY; + guest_regs[r].flags |= GRF_DIRTY; gconst_kill(r); + } - return ret; + return tr->hreg; } -static int rcache_get_reg(sh2_reg_e r, rc_gr_mode mode) +static int rcache_get_reg(sh2_reg_e r, rc_gr_mode mode, int *hr) { - return rcache_get_reg_(r, mode, 1); + return rcache_get_reg_(r, mode, 1, hr); } static int rcache_get_tmp(void) { - temp_reg_t *tr; + cache_reg_t *tr = NULL; int i; - for (i = 0; i < ARRAY_SIZE(reg_temp); i++) - if (reg_temp[i].type == HR_FREE) { - tr = ®_temp[i]; - goto do_alloc; + // use any free reg, but prefer TEMP regs + for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { + if (cache_regs[i].type == HR_FREE || + (cache_regs[i].type == HR_TEMP && !(cache_regs[i].flags & HRF_LOCKED))) { + tr = &cache_regs[i]; + break; } + } - tr = rcache_evict(); + if (!tr) + tr = rcache_evict(); -do_alloc: tr->type = HR_TEMP; + tr->flags |= HRF_LOCKED; return tr->hreg; } @@ -1060,192 +1380,421 @@ static int rcache_get_hr_id(int hr) { int i; - for (i = 0; i < ARRAY_SIZE(reg_temp); i++) - if (reg_temp[i].hreg == hr) - break; - - if (i == ARRAY_SIZE(reg_temp)) // can't happen + i = reg_map_host[hr]; + if (i < 0) // can't happen exit(1); - if (reg_temp[i].type == HR_CACHED) { - // writeback - if (reg_temp[i].flags & HRF_DIRTY) - emith_ctx_write(reg_temp[i].hreg, reg_temp[i].greg * 4); - gconst_check_evict(reg_temp[i].greg); - } - else if (reg_temp[i].type == HR_TEMP) { +#if REMAP_REGISTER + if (cache_regs[i].type == HR_CACHED) + rcache_remap_vreg(i); +#endif + if (cache_regs[i].type == HR_CACHED) + rcache_evict_vreg(i); + else if (cache_regs[i].type == HR_TEMP && (cache_regs[i].flags & HRF_LOCKED)) { printf("host reg %d already used, aborting\n", hr); exit(1); } - reg_temp[i].type = HR_FREE; - reg_temp[i].flags = 0; - return i; } static int rcache_get_arg_id(int arg) { - int r = 0; - host_arg2reg(r, arg); - return rcache_get_hr_id(r); + int hr = 0; + + host_arg2reg(hr, arg); + return rcache_get_hr_id(hr); } // get a reg to be used as function arg static int rcache_get_tmp_arg(int arg) { int id = rcache_get_arg_id(arg); - reg_temp[id].type = HR_TEMP; + cache_regs[id].type = HR_TEMP; + cache_regs[id].flags |= HRF_LOCKED; - return reg_temp[id].hreg; + return cache_regs[id].hreg; } // ... as return value after a call static int rcache_get_tmp_ret(void) { int id = rcache_get_hr_id(RET_REG); - reg_temp[id].type = HR_TEMP; + cache_regs[id].type = HR_TEMP; + cache_regs[id].flags |= HRF_LOCKED; - return reg_temp[id].hreg; + return cache_regs[id].hreg; } -// same but caches a reg. RC_GR_READ only. -static int rcache_get_reg_arg(int arg, sh2_reg_e r) +// same but caches a reg if access is readonly (announced by hr being NULL) +static int rcache_get_reg_arg(int arg, sh2_reg_e r, int *hr) { int i, srcr, dstr, dstid; - int dirty = 0, src_dirty = 0; + int dirty = 0, src_dirty = 0, is_const = 0, is_cached = 0; + u32 val; + host_arg2reg(dstr, arg); - dstid = rcache_get_arg_id(arg); - dstr = reg_temp[dstid].hreg; + i = guest_regs[r].vreg; + if (i >= 0 && cache_regs[i].type == HR_CACHED && cache_regs[i].hreg == dstr) + // r is already in arg + dstid = i; + else + dstid = rcache_get_arg_id(arg); + dstr = cache_regs[dstid].hreg; - // maybe already statically mapped? - srcr = get_reg_static(r, RC_GR_READ); - if (srcr != -1) - goto do_cache; + if (rcache_hint & (1 << r)) { + // r is needed later on anyway + srcr = rcache_get_reg_(r, RC_GR_READ, 0, NULL); + is_cached = (cache_regs[reg_map_host[srcr]].type == HR_CACHED); + } else if ((guest_regs[r].flags & GRF_CDIRTY) && gconst_get(r, &val)) { + // r has an uncomitted const - load into arg, but keep constant uncomitted + srcr = dstr; + is_const = 1; + } else if ((i = guest_regs[r].vreg) >= 0) { + // maybe already cached? + srcr = cache_regs[i].hreg; + is_cached = (cache_regs[reg_map_host[srcr]].type == HR_CACHED); + } else { + // must read either const or from ctx + srcr = dstr; + if (rcache_static & (1 << r)) + srcr = rcache_get_reg_(r, RC_GR_READ, 0, NULL); + else if (gconst_try_read(srcr, r)) + dirty = 1; + else + emith_ctx_read(srcr, r * 4); + } - // maybe already cached? - for (i = ARRAY_SIZE(reg_temp) - 1; i >= 0; i--) { - if ((reg_temp[i].type == HR_CACHED) && - reg_temp[i].greg == r) - { - srcr = reg_temp[i].hreg; - if (reg_temp[i].flags & HRF_DIRTY) - src_dirty = 1; - goto do_cache; + if (is_cached) { + i = reg_map_host[srcr]; + if (srcr == dstr) { // evict aliases here since it is reallocated below + if (guest_regs[r].flags & GRF_STATIC) // move STATIC back to its sreg + rcache_clean_vreg(guest_regs[r].vreg); +#if REMAP_REGISTER + rcache_remap_vreg(i); +#endif + if (cache_regs[i].type == HR_CACHED) + rcache_evict_vreg(i); + } + else if (hr != NULL) // must lock srcr if not copied here + cache_regs[i].flags |= HRF_LOCKED; + if (guest_regs[r].flags & GRF_DIRTY) + src_dirty = 1; + } + + cache_regs[dstid].type = HR_TEMP; + if (is_const) { + // uncomitted constant + emith_move_r_imm(srcr, val); + } else if (dstr != srcr) { + // arg is a copy of cached r + if (hr == NULL) + emith_move_r_r(dstr, srcr); + } else if (hr != NULL) { + // caller will modify arg, so it will soon be out of sync with r + if (dirty || src_dirty) + emith_ctx_write(dstr, r * 4); // must clean since arg will be modified + } else if (guest_regs[r].vreg < 0) { + // keep arg as vreg for r + cache_regs[dstid].type = HR_CACHED; + cache_regs[dstid].gregs = 1 << r; + guest_regs[r].vreg = dstid; + if (dirty || src_dirty) { // mark as modifed for cleaning later on + cache_regs[dstid].flags |= HRF_DIRTY; + guest_regs[r].flags |= GRF_DIRTY; } } - // must read - srcr = dstr; - if (gconst_check(r)) { - if (gconst_try_read(srcr, r)) - dirty = 1; - } - else - emith_ctx_read(srcr, r * 4); + if (hr) + *hr = srcr; -do_cache: - if (dstr != srcr) - emith_move_r_r(dstr, srcr); -#if 1 - else - dirty |= src_dirty; - - if (dirty) - // must clean, callers might want to modify the arg before call - emith_ctx_write(dstr, r * 4); -#else - if (dirty) - reg_temp[dstid].flags |= HRF_DIRTY; -#endif - - reg_temp[dstid].stamp = ++rcache_counter; - reg_temp[dstid].type = HR_CACHED; - reg_temp[dstid].greg = r; - reg_temp[dstid].flags |= HRF_LOCKED; + cache_regs[dstid].stamp = ++rcache_counter; + cache_regs[dstid].flags |= HRF_LOCKED; return dstr; } static void rcache_free_tmp(int hr) { - int i; - for (i = 0; i < ARRAY_SIZE(reg_temp); i++) - if (reg_temp[i].hreg == hr) - break; - - if (i == ARRAY_SIZE(reg_temp) || reg_temp[i].type != HR_TEMP) { - printf("rcache_free_tmp fail: #%i hr %d, type %d\n", i, hr, reg_temp[i].type); + int i = reg_map_host[hr]; + if (i < 0 || cache_regs[i].type != HR_TEMP) { + printf("rcache_free_tmp fail: #%i hr %d, type %d\n", i, hr, cache_regs[i].type); return; } - reg_temp[i].type = HR_FREE; - reg_temp[i].flags = 0; + cache_regs[i].type = HR_FREE; + cache_regs[i].flags &= (HRF_REG|HRF_TEMP); +} + +// saves temporary result either in REG or in drctmp +static int rcache_save_tmp(int hr) +{ + int i, free = -1, cached = -1; + u16 min_stamp = (u16)-1; + + // find REG, either free or unlocked temp or oldest non-hinted cached + for (i = 0; i < ARRAY_SIZE(cache_regs) && free < 0; i++) { + if ((cache_regs[i].flags & HRF_TEMP) || (cache_regs[i].flags & HRF_LOCKED)) + continue; + if (cache_regs[i].type == HR_FREE || cache_regs[i].type == HR_TEMP) + free = i; + if (cache_regs[i].type == HR_CACHED && + !((rcache_hint | rcache_locked) & cache_regs[i].gregs)) { + if (cache_regs[i].stamp < min_stamp) { + min_stamp = cache_regs[i].stamp; + cached = i; + } + } + } + + if (free >= 0) + i = free; + else if (cached >= 0) { + i = cached; + rcache_evict_vreg(i); + } else { + // if none is available, store in drctmp + emith_ctx_write(hr, offsetof(SH2, drc_tmp)); + rcache_free_tmp(hr); + return -1; + } + + cache_regs[i].type = HR_CACHED; + cache_regs[i].gregs = 0; // not storing any guest register + cache_regs[i].flags &= (HRF_TEMP|HRF_REG); + cache_regs[i].flags |= HRF_LOCKED; + cache_regs[i].stamp = ++rcache_counter; + emith_move_r_r(cache_regs[i].hreg, hr); + rcache_free_tmp(hr); + return i; +} + +static int rcache_restore_tmp(int r) +{ + int hr; + + // find REG with tmp store: cached but with no gregs + if (r >= 0) { + if (cache_regs[r].type != HR_CACHED || cache_regs[r].gregs) { + printf("invalid tmp storage %d\n", r); + exit(1); + } + // found, transform to a TEMP + cache_regs[r].type = HR_TEMP; + cache_regs[r].flags |= HRF_LOCKED; + return cache_regs[r].hreg; + } + + // if not available, create a TEMP store and fetch from drctmp + hr = rcache_get_tmp(); + emith_ctx_read(hr, offsetof(SH2, drc_tmp)); + + return hr; } static void rcache_unlock(int hr) { - int i; - for (i = 0; i < ARRAY_SIZE(reg_temp); i++) - if (reg_temp[i].type == HR_CACHED && reg_temp[i].hreg == hr) - reg_temp[i].flags &= ~HRF_LOCKED; + if (hr >= 0) { + cache_regs[hr].flags &= ~HRF_LOCKED; + rcache_locked &= ~cache_regs[hr].gregs; + } } static void rcache_unlock_all(void) { int i; - for (i = 0; i < ARRAY_SIZE(reg_temp); i++) - reg_temp[i].flags &= ~HRF_LOCKED; + for (i = 0; i < ARRAY_SIZE(cache_regs); i++) + cache_regs[i].flags &= ~HRF_LOCKED; } -#if (DRC_DEBUG & (8|256|512|1024)) || defined(DRC_CMP) -static u32 rcache_used_hreg_mask(void) +static inline void rcache_set_locked(u32 mask) +{ + rcache_locked = mask & ~rcache_static; +} + +static inline void rcache_set_hint_soon(u32 mask) +{ + rcache_hint_soon = mask & ~rcache_static; +} + +static inline void rcache_set_hint_late(u32 mask) +{ + rcache_hint_late = mask & ~rcache_static; +} + +static inline int rcache_is_hinted(sh2_reg_e r) +{ + // consider static REGs as always hinted, since they are always there + return ((rcache_hint | rcache_static) & (1 << r)); +} + +static inline int rcache_is_cached(sh2_reg_e r) +{ + // consider static REGs as always hinted, since they are always there + return (guest_regs[r].vreg >= 0); +} + +static inline u32 rcache_used_hreg_mask(void) { u32 mask = 0; int i; - for (i = 0; i < ARRAY_SIZE(reg_temp); i++) - if (reg_temp[i].type != HR_FREE) - mask |= 1 << reg_temp[i].hreg; + for (i = 0; i < ARRAY_SIZE(cache_regs); i++) + if (cache_regs[i].type != HR_FREE) + mask |= 1 << cache_regs[i].hreg; + + return mask & ~rcache_static; +} + +static inline u32 rcache_dirty_mask(void) +{ + u32 mask = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(guest_regs); i++) + if (guest_regs[i].flags & GRF_DIRTY) + mask |= 1 << i; + mask |= gconst_dirty_mask(); return mask; } + +static inline u32 rcache_reg_mask(void) +{ + u32 mask = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(cache_regs); i++) + if (cache_regs[i].type == HR_CACHED) + mask |= cache_regs[i].gregs; + + return mask; +} + +static void rcache_clean_tmp(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(cache_regs); i++) + if (cache_regs[i].type == HR_CACHED && (cache_regs[i].flags & HRF_TEMP)) +#if REMAP_REGISTER + rcache_remap_vreg(i); +#else + rcache_clean_vreg(i); #endif +} + +static void rcache_clean_mask(u32 mask) +{ + int i; + + // XXX consider gconst? + if (!(mask &= ~rcache_static & ~gconst_dirty_mask())) + return; + + // clean only vregs where all aliases are covered by the mask + for (i = 0; i < ARRAY_SIZE(cache_regs); i++) + if (cache_regs[i].type == HR_CACHED && + (cache_regs[i].gregs & mask) && !(cache_regs[i].gregs & ~mask)) + rcache_clean_vreg(i); +} static void rcache_clean(void) { int i; gconst_clean(); - for (i = 0; i < ARRAY_SIZE(reg_temp); i++) - if (reg_temp[i].type == HR_CACHED && (reg_temp[i].flags & HRF_DIRTY)) { - // writeback - emith_ctx_write(reg_temp[i].hreg, reg_temp[i].greg * 4); - reg_temp[i].flags &= ~HRF_DIRTY; + for (i = ARRAY_SIZE(cache_regs)-1; i >= 0; i--) + if (cache_regs[i].type == HR_CACHED || cache_regs[i].type == HR_STATIC) + rcache_clean_vreg(i); +} + +static void rcache_invalidate_tmp(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { + if (cache_regs[i].flags & HRF_TEMP) { + if (cache_regs[i].type == HR_CACHED) + rcache_unmap_vreg(i); + cache_regs[i].type = HR_FREE; + cache_regs[i].flags &= (HRF_TEMP|HRF_REG); + cache_regs[i].gregs = 0; } + } } static void rcache_invalidate(void) { int i; - for (i = 0; i < ARRAY_SIZE(reg_temp); i++) { - reg_temp[i].type = HR_FREE; - reg_temp[i].flags = 0; + + for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { + cache_regs[i].flags &= (HRF_TEMP|HRF_REG); + if (cache_regs[i].type != HR_STATIC) + cache_regs[i].type = HR_FREE; + cache_regs[i].gregs = 0; } + for (i = 0; i < ARRAY_SIZE(guest_regs); i++) { + guest_regs[i].flags &= GRF_STATIC; + if (!(guest_regs[i].flags & GRF_STATIC)) + guest_regs[i].vreg = -1; + else { + if (guest_regs[i].vreg < 0) + emith_ctx_read(cache_regs[guest_regs[i].sreg].hreg, i*4); + else if (guest_regs[i].vreg != guest_regs[i].sreg) + emith_move_r_r(cache_regs[guest_regs[i].sreg].hreg, + cache_regs[guest_regs[i].vreg].hreg); + cache_regs[guest_regs[i].sreg].gregs = 1 << i; + guest_regs[i].vreg = guest_regs[i].sreg; + } + }; rcache_counter = 0; + rcache_hint_soon = rcache_hint_late = 0; gconst_invalidate(); } static void rcache_flush(void) { + rcache_unlock_all(); rcache_clean(); rcache_invalidate(); } +static void rcache_init(void) +{ + static int once = 1; + int i; + + // init is executed on every rom load, but this must only be executed once... + if (once) { + memset(reg_map_host, -1, sizeof(reg_map_host)); + for (i = 0; i < ARRAY_SIZE(cache_regs); i++) + reg_map_host[cache_regs[i].hreg] = i; + + for (i = 0; i < ARRAY_SIZE(guest_regs); i++) + if (guest_regs[i].flags & GRF_STATIC) { + rcache_static |= (1 << i); + guest_regs[i].sreg = reg_map_host[guest_regs[i].sreg]; + cache_regs[guest_regs[i].sreg].type = HR_STATIC; + } else + guest_regs[i].sreg = -1; + once = 0; + } + + for (i = 0; i < ARRAY_SIZE(guest_regs); i++) + if (guest_regs[i].flags & GRF_STATIC) { + guest_regs[i].vreg = guest_regs[i].sreg; + cache_regs[guest_regs[i].sreg].gregs = (1 << i); + } + + rcache_invalidate(); +} + // --------------------------------------------------------------- -static int emit_get_rbase_and_offs(u32 a, u32 *offs) +static int emit_get_rbase_and_offs(SH2 *sh2, u32 a, u32 *offs) { + u32 omask = 0xff; // offset mask, XXX: ARM oriented.. u32 mask = 0; int poffs; int hr; @@ -1254,11 +1803,19 @@ static int emit_get_rbase_and_offs(u32 a, u32 *offs) if (poffs == -1) return -1; - // XXX: could use some related reg hr = rcache_get_tmp(); - emith_ctx_read_ptr(hr, poffs); - emith_add_r_r_ptr_imm(hr, hr, a & mask & ~0xff); - *offs = a & 0xff; // XXX: ARM oriented.. + if (mask < 0x1000) { + // can't access data array or BIOS directly from ROM or SDRAM, + // since code may run on both SH2s (if the tcache_id would be known...) + emith_ctx_read(hr, poffs); + if (a & mask & ~omask) + emith_add_r_imm(hr, a & mask & ~omask); + } else { + // known fixed host address + a = (a & mask) + *(u32 *)((char *)sh2 + poffs); + emith_move_r_imm(hr, (a & ~omask)); + } + *offs = a & omask; return hr; } @@ -1271,7 +1828,7 @@ static int emit_get_rom_data(sh2_reg_e r, u32 offs, int size, u32 *val) if (gconst_get(r, &tmp)) { tmp += offs; if (dr_is_rom(tmp)) { - switch (size) { + switch (size & MF_SIZEMASK) { case 0: *val = (s8)p32x_sh2_read8(tmp, sh2s); break; // 8 case 1: *val = (s16)p32x_sh2_read16(tmp, sh2s); break; // 16 case 2: *val = p32x_sh2_read32(tmp, sh2s); break; // 32 @@ -1287,7 +1844,7 @@ static void emit_move_r_imm32(sh2_reg_e dst, u32 imm) #if PROPAGATE_CONSTANTS gconst_new(dst, imm); #else - int hr = rcache_get_reg(dst, RC_GR_WRITE); + int hr = rcache_get_reg(dst, RC_GR_WRITE, NULL); emith_move_r_imm(hr, imm); #endif } @@ -1296,12 +1853,36 @@ static void emit_move_r_r(sh2_reg_e dst, sh2_reg_e src) { int hr_d, hr_s; - hr_s = rcache_get_reg(src, RC_GR_READ); - hr_d = rcache_get_reg(dst, RC_GR_WRITE); - emith_move_r_r(hr_d, hr_s); + if (guest_regs[src].vreg >= 0 || gconst_check(src) || rcache_is_hinted(src)) { + hr_s = rcache_get_reg(src, RC_GR_READ, NULL); +#if ALIAS_REGISTERS + // check for aliasing + int i = guest_regs[src].vreg; + if (guest_regs[dst].vreg != i) { + // remove possible old mapping of dst + if (guest_regs[dst].vreg >= 0) + rcache_remove_vreg_alias(guest_regs[dst].vreg, dst); + // make dst an alias of src + cache_regs[i].gregs |= (1 << dst); + cache_regs[i].flags |= HRF_DIRTY; + guest_regs[dst].flags |= GRF_DIRTY; + guest_regs[dst].vreg = i; + gconst_kill(dst); #if PROPAGATE_CONSTANTS - gconst_copy(dst, src); + gconst_copy(dst, src); #endif + return; + } +#endif + hr_d = rcache_get_reg(dst, RC_GR_WRITE, NULL); + emith_move_r_r(hr_d, hr_s); +#if PROPAGATE_CONSTANTS + gconst_copy(dst, src); +#endif + } else { + hr_d = rcache_get_reg(dst, RC_GR_WRITE, NULL); + emith_ctx_read(hr_d, src * 4); + } } // T must be clear, and comparison done just before this @@ -1317,25 +1898,22 @@ static int emit_memhandler_read(int size) { int arg1; - rcache_clean(); - + rcache_clean_tmp(); #ifndef DRC_SR_REG // must writeback cycles for poll detection stuff - if (reg_map_g2h[SHR_SR] != -1) - emith_ctx_write(reg_map_g2h[SHR_SR], SHR_SR * 4); + if (guest_regs[SHR_SR].vreg != -1) + rcache_evict_vreg(guest_regs[SHR_SR].vreg); #endif + arg1 = rcache_get_tmp_arg(1); emith_move_r_r_ptr(arg1, CONTEXT_REG); - switch (size) { + switch (size & MF_SIZEMASK) { case 0: emith_call(sh2_drc_read8); break; // 8 case 1: emith_call(sh2_drc_read16); break; // 16 case 2: emith_call(sh2_drc_read32); break; // 32 } - rcache_invalidate(); -#ifndef DRC_SR_REG - if (reg_map_g2h[SHR_SR] != -1) - emith_ctx_read(reg_map_g2h[SHR_SR], SHR_SR * 4); -#endif + + rcache_invalidate_tmp(); return rcache_get_tmp_ret(); } @@ -1343,52 +1921,52 @@ static int emit_memhandler_read(int size) static void emit_memhandler_write(int size) { int arg2; + + rcache_clean_tmp(); #ifndef DRC_SR_REG - if (reg_map_g2h[SHR_SR] != -1) - emith_ctx_write(reg_map_g2h[SHR_SR], SHR_SR * 4); + if (guest_regs[SHR_SR].vreg != -1) + rcache_evict_vreg(guest_regs[SHR_SR].vreg); #endif - rcache_clean(); arg2 = rcache_get_tmp_arg(2); emith_move_r_r_ptr(arg2, CONTEXT_REG); - switch (size) { + switch (size & MF_SIZEMASK) { case 0: emith_call(sh2_drc_write8); break; // 8 case 1: emith_call(sh2_drc_write16); break; // 16 case 2: emith_call(sh2_drc_write32); break; // 32 } - rcache_invalidate(); -#ifndef DRC_SR_REG - if (reg_map_g2h[SHR_SR] != -1) - emith_ctx_read(reg_map_g2h[SHR_SR], SHR_SR * 4); -#endif + rcache_invalidate_tmp(); } -// rd = @(Rs,#offs) -static int emit_memhandler_read_rr(sh2_reg_e rd, sh2_reg_e rs, u32 offs, int size) +// rd = @(Rs,#offs); rd < 0 -> return a temp +static int emit_memhandler_read_rr(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rs, u32 offs, int size) { int hr, hr2; u32 val, offs2; +#if PROPAGATE_CONSTANTS if (emit_get_rom_data(rs, offs, size, &val)) { if (rd == SHR_TMP) { hr2 = rcache_get_tmp(); emith_move_r_imm(hr2, val); } else { emit_move_r_imm32(rd, val); - hr2 = rcache_get_reg(rd, RC_GR_READ); + hr2 = rcache_get_reg(rd, RC_GR_READ, NULL); } + if ((size & MF_POSTINCR) && gconst_get(rs, &val)) + gconst_new(rs, val + (1 << (size & MF_SIZEMASK))); return hr2; } if (gconst_get(rs, &val)) { - hr = emit_get_rbase_and_offs(val + offs, &offs2); + hr = emit_get_rbase_and_offs(sh2, val + offs, &offs2); if (hr != -1) { if (rd == SHR_TMP) hr2 = rcache_get_tmp(); else - hr2 = rcache_get_reg(rd, RC_GR_WRITE); - switch (size) { + hr2 = rcache_get_reg(rd, RC_GR_WRITE, NULL); + switch (size & MF_SIZEMASK) { case 0: // 8 emith_read8s_r_r_offs(hr2, hr, offs2 ^ 1); break; @@ -1401,142 +1979,170 @@ static int emit_memhandler_read_rr(sh2_reg_e rd, sh2_reg_e rs, u32 offs, int siz break; } rcache_free_tmp(hr); + if (size & MF_POSTINCR) + gconst_new(rs, val + (1 << (size & MF_SIZEMASK))); return hr2; } } - - if (gconst_get(rs, &val)) { +#endif + if (gconst_get(rs, &val) && (!(size & MF_POSTINCR) /*|| !(rcache_hint_soon & (1 << rs))*/)) { hr = rcache_get_tmp_arg(0); emith_move_r_imm(hr, val + offs); - } else { - hr = rcache_get_reg_arg(0, rs); - if (offs) - emith_add_r_imm(hr, offs); - } - hr = emit_memhandler_read(size); + if (size & MF_POSTINCR) + gconst_new(rs, val + (1 << (size & MF_SIZEMASK))); + } else if (offs || (size & MF_POSTINCR)) { + hr = rcache_get_reg_arg(0, rs, &hr2); + if (offs || hr != hr2) + emith_add_r_r_imm(hr, hr2, offs); + if (size & MF_POSTINCR) { + hr = rcache_get_reg(rs, RC_GR_WRITE, NULL); + emith_add_r_r_imm(hr, hr2, 1 << (size & MF_SIZEMASK)); + } + } else + rcache_get_reg_arg(0, rs, NULL); + hr = emit_memhandler_read(size); + + size &= MF_SIZEMASK; if (rd == SHR_TMP) hr2 = hr; else - hr2 = rcache_get_reg(rd, RC_GR_WRITE); +#if REMAP_REGISTER + hr2 = rcache_map_reg(rd, hr, size != 2 ? RC_GR_RMW : RC_GR_WRITE); +#else + hr2 = rcache_get_reg(rd, RC_GR_WRITE, NULL); +#endif - if (rd != SHR_TMP && size != 2) { - emith_sext(hr2, hr, (size == 1) ? 16 : 8); - } else if (hr != hr2) + if (rd != SHR_TMP && size != 2) { // 16, 8 + emith_sext(hr2, hr, size ? 16 : 8); + } else if (hr != hr2) // 32 emith_move_r_r(hr2, hr); - if (hr != hr2) rcache_free_tmp(hr); return hr2; } -// @(Rs,#offs) = rd -static void emit_memhandler_write_rr(sh2_reg_e rd, sh2_reg_e rs, u32 offs, int size) +// @(Rs,#offs) = rd; rd < 0 -> write arg1 +static void emit_memhandler_write_rr(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rs, u32 offs, int size) { - int hr; + int hr, hr2; u32 val; - rcache_clean(); // XXX - rcache_get_reg_arg(1, rd); + if (rd == SHR_TMP) { + host_arg2reg(hr2, 1); + } else if ((size & MF_PREDECR) && rd == rs) { // must avoid caching rd in arg1 + hr2 = rcache_get_reg_arg(1, rd, &hr); + if (hr != hr2) emith_move_r_r(hr2, hr); + } else + hr2 = rcache_get_reg_arg(1, rd, NULL); - if (gconst_get(rs, &val)) { + if (gconst_get(rs, &val) && (!(size & MF_PREDECR) /*|| !(rcache_hint_soon & (1 << rs))*/)) { + if (size & MF_PREDECR) { + val -= 1 << (size & MF_SIZEMASK); + gconst_new(rs, val); + } hr = rcache_get_tmp_arg(0); emith_move_r_imm(hr, val + offs); - } else if (offs) { - hr = rcache_get_reg_arg(0, rs); - emith_add_r_imm(hr, offs); + } else if (offs || (size & MF_PREDECR)) { + if (size & MF_PREDECR) { + hr = rcache_get_reg(rs, RC_GR_RMW, &hr2); + emith_sub_r_r_imm(hr, hr2, 1 << (size & MF_SIZEMASK)); + } + hr = rcache_get_reg_arg(0, rs, &hr2); + if (offs || hr != hr2) + emith_add_r_r_imm(hr, hr2, offs); } else - rcache_get_reg_arg(0, rs); + rcache_get_reg_arg(0, rs, NULL); emit_memhandler_write(size); } -// rd = @(Rx,Ry) -static int emit_indirect_indexed_read(sh2_reg_e rd, sh2_reg_e rx, sh2_reg_e ry, int size) +// rd = @(Rx,Ry); rd < 0 -> return a temp +static int emit_indirect_indexed_read(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rx, sh2_reg_e ry, int size) { int hr, hr2; - int a0, t; + int tx, ty; #if PROPAGATE_CONSTANTS u32 offs; if (gconst_get(ry, &offs)) - return emit_memhandler_read_rr(rd, rx, offs, size); + return emit_memhandler_read_rr(sh2, rd, rx, offs, size); if (gconst_get(rx, &offs)) - return emit_memhandler_read_rr(rd, ry, offs, size); + return emit_memhandler_read_rr(sh2, rd, ry, offs, size); #endif - a0 = rcache_get_reg_arg(0, rx); - t = rcache_get_reg(ry, RC_GR_READ); - emith_add_r_r(a0, t); + hr = rcache_get_reg_arg(0, rx, &tx); + ty = rcache_get_reg(ry, RC_GR_READ, NULL); + emith_add_r_r_r(hr, tx, ty); hr = emit_memhandler_read(size); + + size &= MF_SIZEMASK; if (rd != SHR_TMP) - hr2 = rcache_get_reg(rd, RC_GR_WRITE); +#if REMAP_REGISTER + hr2 = rcache_map_reg(rd, hr, size != 2 ? RC_GR_RMW : RC_GR_WRITE); +#else + hr2 = rcache_get_reg(rd, RC_GR_WRITE, NULL); +#endif else hr2 = hr; - if (rd != SHR_TMP && size != 2) { // 16, 8 + if (rd != SHR_TMP && size != 2) { // 16, 8 emith_sext(hr2, hr, size ? 16 : 8); } else if (hr != hr2) // 32 emith_move_r_r(hr2, hr); - if (hr != hr2) rcache_free_tmp(hr); return hr2; } -// @(Rx,Ry) = rd -static void emit_indirect_indexed_write(sh2_reg_e rd, sh2_reg_e rx, sh2_reg_e ry, int size) +// @(Rx,Ry) = rd; rd < 0 -> write arg1 +static void emit_indirect_indexed_write(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rx, sh2_reg_e ry, int size) { - int a0, t; + int hr, tx, ty; #if PROPAGATE_CONSTANTS u32 offs; if (gconst_get(ry, &offs)) - return emit_memhandler_write_rr(rd, rx, offs, size); + return emit_memhandler_write_rr(sh2, rd, rx, offs, size); if (gconst_get(rx, &offs)) - return emit_memhandler_write_rr(rd, ry, offs, size); + return emit_memhandler_write_rr(sh2, rd, ry, offs, size); #endif - rcache_clean(); // XXX - rcache_get_reg_arg(1, rd); - a0 = rcache_get_reg_arg(0, rx); - t = rcache_get_reg(ry, RC_GR_READ); - emith_add_r_r(a0, t); + if (rd != SHR_TMP) + rcache_get_reg_arg(1, rd, NULL); + hr = rcache_get_reg_arg(0, rx, &tx); + ty = rcache_get_reg(ry, RC_GR_READ, NULL); + emith_add_r_r_r(hr, tx, ty); emit_memhandler_write(size); } // @Rn+,@Rm+ -static void emit_indirect_read_double(u32 *rnr, u32 *rmr, int rn, int rm, int size) +static void emit_indirect_read_double(SH2 *sh2, int *rnr, int *rmr, sh2_reg_e rn, sh2_reg_e rm, int size) { int tmp; - rcache_get_reg_arg(0, rn); - tmp = emit_memhandler_read(size); - emith_ctx_write(tmp, offsetof(SH2, drc_tmp)); - rcache_free_tmp(tmp); - tmp = rcache_get_reg(rn, RC_GR_RMW); - emith_add_r_imm(tmp, 1 << size); - rcache_unlock(tmp); - - rcache_get_reg_arg(0, rm); - *rmr = emit_memhandler_read(size); - *rnr = rcache_get_tmp(); - emith_ctx_read(*rnr, offsetof(SH2, drc_tmp)); - tmp = rcache_get_reg(rm, RC_GR_RMW); - emith_add_r_imm(tmp, 1 << size); - rcache_unlock(tmp); + // unlock rn, rm here to avoid REG shortage in MAC operation + tmp = emit_memhandler_read_rr(sh2, SHR_TMP, rn, 0, size | MF_POSTINCR); + rcache_unlock(guest_regs[rn].vreg); + tmp = rcache_save_tmp(tmp); + *rmr = emit_memhandler_read_rr(sh2, SHR_TMP, rm, 0, size | MF_POSTINCR); + rcache_unlock(guest_regs[rm].vreg); + *rnr = rcache_restore_tmp(tmp); } static void emit_do_static_regs(int is_write, int tmpr) { int i, r, count; - for (i = 0; i < ARRAY_SIZE(reg_map_g2h); i++) { - r = reg_map_g2h[i]; - if (r == -1) + for (i = 0; i < ARRAY_SIZE(guest_regs); i++) { + if (guest_regs[i].flags & GRF_STATIC) + r = cache_regs[guest_regs[i].vreg].hreg; + else continue; - for (count = 1; i < ARRAY_SIZE(reg_map_g2h) - 1; i++, r++) { - if (reg_map_g2h[i + 1] != r + 1) + for (count = 1; i < ARRAY_SIZE(guest_regs) - 1; i++, r++) { + if ((guest_regs[i + 1].flags & GRF_STATIC) && + cache_regs[guest_regs[i + 1].vreg].hreg == r + 1) + count++; + else break; - count++; } if (count > 1) { @@ -1606,9 +2212,10 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) struct op_data *opd; int blkid_main = 0; int skip_op = 0; - u32 tmp, tmp2; + int tmp, tmp2; int cycles; int i, v; + u32 u; int op; base_pc = sh2->pc; @@ -1625,8 +2232,8 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) tcache_ptr = tcache_ptrs[tcache_id]; // predict tcache overflow - tmp = tcache_ptr - tcache_bases[tcache_id]; - if (tmp > tcache_sizes[tcache_id] - MAX_BLOCK_SIZE) { + u = tcache_ptr - tcache_bases[tcache_id]; + if (u > tcache_sizes[tcache_id] - MAX_BLOCK_SIZE) { dbg(1, "tcache %d overflow", tcache_id); return NULL; } @@ -1673,7 +2280,8 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) for (i = 0; pc < end_pc; i++) { u32 delay_dep_fw = 0, delay_dep_bk = 0; - u32 tmp3, tmp4, sr; + int tmp3, tmp4; + u32 sr; opd = &ops[i]; op = FETCH_OP(pc); @@ -1691,7 +2299,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) { if (pc != base_pc) { - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); FLUSH_CYCLES(sr); rcache_flush(); @@ -1741,7 +2349,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) rcache_clean(); #if (DRC_DEBUG & 0x10) - rcache_get_reg_arg(0, SHR_PC); + rcache_get_reg_arg(0, SHR_PC, NULL); tmp = emit_memhandler_read(2); tmp2 = rcache_get_tmp(); tmp3 = rcache_get_tmp(); @@ -1757,7 +2365,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) #endif // check cycles - sr = rcache_get_reg(SHR_SR, RC_GR_READ); + sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); emith_cmp_r_imm(sr, 0); emith_jump_cond(DCOND_LE, sh2_drc_exit); do_host_disasm(tcache_id); @@ -1765,27 +2373,27 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) #if (DRC_DEBUG & (8|256|512|1024)) emit_move_r_imm32(SHR_PC, pc); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); FLUSH_CYCLES(sr); rcache_clean(); tmp = rcache_used_hreg_mask(); emith_save_caller_regs(tmp); emit_do_static_regs(1, 0); - rcache_get_reg_arg(2, SHR_SR); + rcache_get_reg_arg(2, SHR_SR, NULL); tmp2 = rcache_get_tmp_arg(0); tmp3 = rcache_get_tmp_arg(1); emith_move_r_imm(tmp2, (u32)tcache_ptr); emith_move_r_r_ptr(tmp3,CONTEXT_REG); emith_call(sh2_drc_log_entry); emith_restore_caller_regs(tmp); - rcache_invalidate(); + rcache_invalidate_tmp(); #endif } #ifdef DRC_CMP if (!(op_flags[i] & OF_DELAY_OP)) { emit_move_r_imm32(SHR_PC, pc); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); FLUSH_CYCLES(sr); rcache_clean(); @@ -1811,7 +2419,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) delay_dep_fw = opd->dest & ops[i-1].source; delay_dep_bk = opd->source & ops[i-1].dest; if (delay_dep_fw & BITMASK1(SHR_T)) { - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); DELAY_SAVE_T(sr); } if (delay_dep_bk & BITMASK1(SHR_PC)) { @@ -1820,8 +2428,9 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) elprintf_sh2(sh2, EL_ANOMALY, "drc: illegal slot insn %04x @ %08x?", op, pc - 2); } + // store PC for MOVA/MOV @PC address calculation if (opd->imm != 0) - ; // addr already resolved somehow + ; // case OP_BRANCH - addr already resolved in scan_block else { switch (ops[i-1].op) { case OP_BRANCH: @@ -1829,8 +2438,8 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) break; case OP_BRANCH_CT: case OP_BRANCH_CF: - sr = rcache_get_reg(SHR_SR, RC_GR_READ); - tmp = rcache_get_reg(SHR_PC, RC_GR_WRITE); + sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); + tmp = rcache_get_reg(SHR_PC, RC_GR_WRITE, NULL); emith_move_r_imm(tmp, pc); emith_tst_r_imm(sr, T); tmp2 = ops[i-1].op == OP_BRANCH_CT ? DCOND_NE : DCOND_EQ; @@ -1839,7 +2448,8 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_move_r_imm_c(tmp2, tmp, ops[i-1].imm); EMITH_SJMP_END(tmp3); break; - case OP_BRANCH_N: + case OP_BRANCH_N: // BT/BF known not to be taken + // XXX could modify opd->imm instead? emit_move_r_imm32(SHR_PC, pc); break; // case OP_BRANCH_R OP_BRANCH_RF - PC already loaded @@ -1850,13 +2460,46 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) // dbg(1, "unhandled delay_dep_fw: %x", delay_dep_fw & ~BITMASK1(SHR_T)); if (delay_dep_bk & ~BITMASK2(SHR_PC, SHR_PR)) dbg(1, "unhandled delay_dep_bk: %x", delay_dep_bk); + rcache_set_hint_soon(0); + rcache_set_hint_late(0); } + else + { + // inform cache about future register usage + u32 late = 0; // regs read by future ops + u32 write = 0; // regs written to (to detect write before read) + u32 soon = 0; // regs read soon + tmp = OP_ISBRANCH(opd[0].op); // branch insn detected + for (v = 1; v <= 9; v++) { + // no sense in looking any further than the next rcache flush + if (pc + 2*v < end_pc && !(op_flags[i+v] & OF_BTARGET) && + (!tmp || (op_flags[i+v] & OF_DELAY_OP))) { + late |= opd[v].source & ~write; + // ignore source regs after they have been written to + write |= opd[v].dest; + } else { + // upcoming rcache_flush, start writing back unused dirty stuff + tmp2 = write|opd[0].source|opd[0].dest; // insn may change reg aliases + rcache_clean_mask(rcache_dirty_mask() & ~tmp2); + break; + } + // XXX must also include test-irq locations! + tmp |= (OP_ISBRANCH(opd[v].op) || opd[v].op == OP_RTE || + opd[v].op == OP_TRAPA || opd[v].op == OP_UNDEFINED); + // regs needed in the next few instructions + if (v <= 4) + soon = late; + } + rcache_set_hint_soon(late); // insns 1-3 + rcache_set_hint_late(late & ~soon); // insns 4-9 + } + rcache_set_locked(opd[0].source); // try not to evict src regs for this op switch (opd->op) { case OP_BRANCH_N: + // never taken, just use up cycles goto end_op; - case OP_BRANCH: case OP_BRANCH_CT: case OP_BRANCH_CF: @@ -1868,8 +2511,8 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case OP_BRANCH_R: if (opd->dest & BITMASK1(SHR_PR)) emit_move_r_imm32(SHR_PR, pc + 2); - if (gconst_get(opd->rm, &tmp)) { - opd->imm = tmp; + if (gconst_get(opd->rm, &u)) { + opd->imm = u; drcf.pending_branch_direct = 1; } else { emit_move_r_r(SHR_PC, opd->rm); @@ -1878,17 +2521,17 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) goto end_op; case OP_BRANCH_RF: - if (gconst_get(GET_Rn(), &tmp)) { + if (gconst_get(GET_Rn(), &u)) { if (opd->dest & BITMASK1(SHR_PR)) emit_move_r_imm32(SHR_PR, pc + 2); - opd->imm = pc + 2 + tmp; + opd->imm = pc + 2 + u; drcf.pending_branch_direct = 1; } else { - tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ); - tmp = rcache_get_reg(SHR_PC, RC_GR_WRITE); + tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); + tmp = rcache_get_reg(SHR_PC, RC_GR_WRITE, NULL); emith_move_r_imm(tmp, pc + 2); if (opd->dest & BITMASK1(SHR_PR)) { - tmp3 = rcache_get_reg(SHR_PR, RC_GR_WRITE); + tmp3 = rcache_get_reg(SHR_PR, RC_GR_WRITE, NULL); emith_move_r_r(tmp3, tmp); } emith_add_r_r(tmp, tmp2); @@ -1896,22 +2539,18 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } goto end_op; - case OP_SLEEP: + case OP_SLEEP: // SLEEP 0000000000011011 printf("TODO sleep\n"); goto end_op; - case OP_RTE: + case OP_RTE: // RTE 0000000000101011 // pop PC - emit_memhandler_read_rr(SHR_PC, SHR_SP, 0, 2); + emit_memhandler_read_rr(sh2, SHR_PC, SHR_SP, 0, 2 | MF_POSTINCR); // pop SR - tmp = rcache_get_reg_arg(0, SHR_SP); - emith_add_r_imm(tmp, 4); - tmp = emit_memhandler_read(2); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + tmp = emit_memhandler_read_rr(sh2, SHR_TMP, SHR_SP, 0, 2 | MF_POSTINCR); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_write_sr(sr, tmp); rcache_free_tmp(tmp); - tmp = rcache_get_reg(SHR_SP, RC_GR_RMW); - emith_add_r_imm(tmp, 4*2); drcf.test_irq = 1; drcf.pending_branch_indirect = 1; goto end_op; @@ -1921,30 +2560,27 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) "drc: illegal op %04x @ %08x", op, pc - 2); opd->imm = (op_flags[i] & OF_B_IN_DS) ? 6 : 4; // fallthrough - case OP_TRAPA: - tmp = rcache_get_reg(SHR_SP, RC_GR_RMW); - emith_sub_r_imm(tmp, 4*2); + case OP_TRAPA: // TRAPA #imm 11000011iiiiiiii // push SR - tmp = rcache_get_reg_arg(0, SHR_SP); - emith_add_r_imm(tmp, 4); - tmp = rcache_get_reg_arg(1, SHR_SR); - emith_clear_msb(tmp, tmp, 22); - emit_memhandler_write(2); + tmp = rcache_get_reg_arg(1, SHR_SR, &tmp2); + emith_clear_msb(tmp, tmp2, 22); + emit_memhandler_write_rr(sh2, SHR_TMP, SHR_SP, 0, 2 | MF_PREDECR); // push PC - rcache_get_reg_arg(0, SHR_SP); - tmp = rcache_get_tmp_arg(1); - if (op == OP_TRAPA) + if (op == OP_TRAPA) { + tmp = rcache_get_tmp_arg(1); emith_move_r_imm(tmp, pc); - else if (drcf.pending_branch_indirect) { - tmp2 = rcache_get_reg(SHR_PC, RC_GR_READ); - emith_move_r_r(tmp, tmp2); - } else + } else if (drcf.pending_branch_indirect) { + tmp = rcache_get_reg_arg(1, SHR_PC, NULL); + } else { + tmp = rcache_get_tmp_arg(1); emith_move_r_imm(tmp, pc - 2); - emit_memhandler_write(2); + } + emith_move_r_imm(tmp, pc); + emit_memhandler_write_rr(sh2, SHR_TMP, SHR_SP, 0, 2 | MF_PREDECR); // obtain new PC - emit_memhandler_read_rr(SHR_PC, SHR_VBR, opd->imm * 4, 2); + emit_memhandler_read_rr(sh2, SHR_PC, SHR_VBR, opd->imm * 4, 2); // indirect jump -> back to dispatcher - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); FLUSH_CYCLES(sr); rcache_flush(); emith_jump(sh2_drc_dispatcher); @@ -1952,25 +2588,27 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case OP_LOAD_POOL: #if PROPAGATE_CONSTANTS - if (opd->imm != 0 && opd->imm < end_literals - && literal_addr_count < MAX_LITERALS) + if ((opd->imm && opd->imm >= base_pc && opd->imm < end_literals) || + dr_is_rom(opd->imm)) { ADD_TO_ARRAY(literal_addr, literal_addr_count, opd->imm,); if (opd->size == 2) - tmp = FETCH32(opd->imm); + u = FETCH32(opd->imm); else - tmp = (s16)FETCH_OP(opd->imm); - gconst_new(GET_Rn(), tmp); + u = (s16)FETCH_OP(opd->imm); + // tweak for Blackthorne: avoid stack overwriting + if (GET_Rn() == SHR_SP && u == 0x0603f800) u = 0x0603f880; + gconst_new(GET_Rn(), u); } else #endif { - tmp = rcache_get_tmp_arg(0); - if (opd->imm != 0) + if (opd->imm != 0) { + tmp = rcache_get_tmp_arg(0); emith_move_r_imm(tmp, opd->imm); - else { - // have to calculate read addr from PC - tmp2 = rcache_get_reg(SHR_PC, RC_GR_READ); + } else { + // have to calculate read addr from PC for delay slot + tmp = rcache_get_reg_arg(0, SHR_PC, &tmp2); if (opd->size == 2) { emith_add_r_r_imm(tmp, tmp2, 2 + (op & 0xff) * 4); emith_bic_r_imm(tmp, 3); @@ -1979,21 +2617,27 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_add_r_r_imm(tmp, tmp2, 2 + (op & 0xff) * 2); } tmp2 = emit_memhandler_read(opd->size); - tmp3 = rcache_get_reg(GET_Rn(), RC_GR_WRITE); - if (opd->size == 2) - emith_move_r_r(tmp3, tmp2); - else +#if REMAP_REGISTER + tmp3 = rcache_map_reg(GET_Rn(), tmp2, opd->size != 2 ? RC_GR_RMW : RC_GR_WRITE); +#else + tmp3 = rcache_get_reg(GET_Rn(), RC_GR_WRITE, NULL); +#endif + if (opd->size != 2) { emith_sext(tmp3, tmp2, 16); - rcache_free_tmp(tmp2); + } else if (tmp3 != tmp2) + emith_move_r_r(tmp3, tmp2); + if (tmp3 != tmp2) + rcache_free_tmp(tmp2); } goto end_op; - case OP_MOVA: + case OP_MOVA: // MOVA @(disp,PC),R0 11000111dddddddd if (opd->imm != 0) emit_move_r_imm32(SHR_R0, opd->imm); - else { // delay slot case, pc can have either value - tmp2 = rcache_get_reg(SHR_PC, RC_GR_READ); - tmp = rcache_get_reg(SHR_R0, RC_GR_WRITE); + else { + // have to calculate addr from PC for delay slot + tmp2 = rcache_get_reg(SHR_PC, RC_GR_READ, NULL); + tmp = rcache_get_reg(SHR_R0, RC_GR_WRITE, NULL); emith_add_r_r_imm(tmp, tmp2, 2 + (op & 0xff) * 4); emith_bic_r_imm(tmp, 3); } @@ -2021,33 +2665,34 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) default: goto default_; } - tmp3 = rcache_get_reg(tmp2, RC_GR_READ); - tmp = rcache_get_reg(GET_Rn(), RC_GR_WRITE); - emith_move_r_r(tmp, tmp3); - if (tmp2 == SHR_SR) - emith_clear_msb(tmp, tmp, 22); // reserved bits defined by ISA as 0 + if (tmp2 == SHR_SR) { + sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); + tmp = rcache_get_reg(GET_Rn(), RC_GR_WRITE, NULL); + emith_clear_msb(tmp, sr, 22); // reserved bits defined by ISA as 0 + } else + emit_move_r_r(GET_Rn(), tmp2); goto end_op; case 0x04: // MOV.B Rm,@(R0,Rn) 0000nnnnmmmm0100 case 0x05: // MOV.W Rm,@(R0,Rn) 0000nnnnmmmm0101 case 0x06: // MOV.L Rm,@(R0,Rn) 0000nnnnmmmm0110 - emit_indirect_indexed_write(GET_Rm(), SHR_R0, GET_Rn(), op & 3); + emit_indirect_indexed_write(sh2, GET_Rm(), SHR_R0, GET_Rn(), op & 3); goto end_op; case 0x07: // MUL.L Rm,Rn 0000nnnnmmmm0111 - tmp = rcache_get_reg(GET_Rn(), RC_GR_READ); - tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ); - tmp3 = rcache_get_reg(SHR_MACL, RC_GR_WRITE); + tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); + tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); + tmp3 = rcache_get_reg(SHR_MACL, RC_GR_WRITE, NULL); emith_mul(tmp3, tmp2, tmp); goto end_op; case 0x08: switch (GET_Fx()) { case 0: // CLRT 0000000000001000 - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_bic_r_imm(sr, T); break; case 1: // SETT 0000000000011000 - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_or_r_imm(sr, T); break; case 2: // CLRMAC 0000000000101000 @@ -2064,12 +2709,12 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0: // NOP 0000000000001001 break; case 1: // DIV0U 0000000000011001 - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_bic_r_imm(sr, M|Q|T); break; case 2: // MOVT Rn 0000nnnn00101001 - sr = rcache_get_reg(SHR_SR, RC_GR_READ); - tmp2 = rcache_get_reg(GET_Rn(), RC_GR_WRITE); + sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); + tmp2 = rcache_get_reg(GET_Rn(), RC_GR_WRITE, NULL); emith_clear_msb(tmp2, sr, 31); break; default: @@ -2096,13 +2741,13 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x0c: // MOV.B @(R0,Rm),Rn 0000nnnnmmmm1100 case 0x0d: // MOV.W @(R0,Rm),Rn 0000nnnnmmmm1101 case 0x0e: // MOV.L @(R0,Rm),Rn 0000nnnnmmmm1110 - emit_indirect_indexed_read(GET_Rn(), SHR_R0, GET_Rm(), op & 3); + emit_indirect_indexed_read(sh2, GET_Rn(), SHR_R0, GET_Rm(), op & 3); goto end_op; case 0x0f: // MAC.L @Rm+,@Rn+ 0000nnnnmmmm1111 - emit_indirect_read_double(&tmp, &tmp2, GET_Rn(), GET_Rm(), 2); - sr = rcache_get_reg(SHR_SR, RC_GR_READ); - tmp3 = rcache_get_reg(SHR_MACL, RC_GR_RMW); - tmp4 = rcache_get_reg(SHR_MACH, RC_GR_RMW); + emit_indirect_read_double(sh2, &tmp, &tmp2, GET_Rn(), GET_Rm(), 2); + sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); + tmp3 = rcache_get_reg(SHR_MACL, RC_GR_RMW, NULL); + tmp4 = rcache_get_reg(SHR_MACH, RC_GR_RMW, NULL); emith_sh2_macl(tmp3, tmp4, tmp, tmp2, sr); rcache_free_tmp(tmp2); rcache_free_tmp(tmp); @@ -2113,7 +2758,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) ///////////////////////////////////////////// case 0x01: // MOV.L Rm,@(disp,Rn) 0001nnnnmmmmdddd - emit_memhandler_write_rr(GET_Rm(), GET_Rn(), (op & 0x0f) * 4, 2); + emit_memhandler_write_rr(sh2, GET_Rm(), GET_Rn(), (op & 0x0f) * 4, 2); goto end_op; case 0x02: @@ -2122,19 +2767,17 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x00: // MOV.B Rm,@Rn 0010nnnnmmmm0000 case 0x01: // MOV.W Rm,@Rn 0010nnnnmmmm0001 case 0x02: // MOV.L Rm,@Rn 0010nnnnmmmm0010 - emit_memhandler_write_rr(GET_Rm(), GET_Rn(), 0, op & 3); + emit_memhandler_write_rr(sh2, GET_Rm(), GET_Rn(), 0, op & 3); goto end_op; case 0x04: // MOV.B Rm,@-Rn 0010nnnnmmmm0100 case 0x05: // MOV.W Rm,@-Rn 0010nnnnmmmm0101 case 0x06: // MOV.L Rm,@-Rn 0010nnnnmmmm0110 - tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); - emith_sub_r_imm(tmp, (1 << (op & 3))); - emit_memhandler_write_rr(GET_Rm(), GET_Rn(), 0, op & 3); + emit_memhandler_write_rr(sh2, GET_Rm(), GET_Rn(), 0, (op & 3) | MF_PREDECR); goto end_op; case 0x07: // DIV0S Rm,Rn 0010nnnnmmmm0111 - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); - tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ); - tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); + tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); emith_bic_r_imm(sr, M|Q|T); emith_tst_r_imm(tmp2, (1<<31)); EMITH_SJMP_START(DCOND_EQ); @@ -2150,56 +2793,69 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) EMITH_SJMP_END(DCOND_PL); goto end_op; case 0x08: // TST Rm,Rn 0010nnnnmmmm1000 - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); - tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ); - tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); + tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); emith_bic_r_imm(sr, T); emith_tst_r_r(tmp2, tmp3); emit_or_t_if_eq(sr); goto end_op; case 0x09: // AND Rm,Rn 0010nnnnmmmm1001 - tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); - tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ); - emith_and_r_r(tmp, tmp2); + if (GET_Rm() != GET_Rn()) { + tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); + tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3); + emith_and_r_r_r(tmp, tmp3, tmp2); + } goto end_op; case 0x0a: // XOR Rm,Rn 0010nnnnmmmm1010 - tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); - tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ); - emith_eor_r_r(tmp, tmp2); +#if PROPAGATE_CONSTANTS + if (GET_Rn() == GET_Rm()) { + gconst_new(GET_Rn(), 0); + goto end_op; + } +#endif + tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); + tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3); + emith_eor_r_r_r(tmp, tmp3, tmp2); goto end_op; case 0x0b: // OR Rm,Rn 0010nnnnmmmm1011 - tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); - tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ); - emith_or_r_r(tmp, tmp2); + if (GET_Rm() != GET_Rn()) { + tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); + tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3); + emith_or_r_r_r(tmp, tmp3, tmp2); + } goto end_op; case 0x0c: // CMP/STR Rm,Rn 0010nnnnmmmm1100 tmp = rcache_get_tmp(); - tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ); - tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ); + tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); + tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); emith_eor_r_r_r(tmp, tmp2, tmp3); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_bic_r_imm(sr, T); emith_tst_r_imm(tmp, 0x000000ff); - emit_or_t_if_eq(sr); - emith_tst_r_imm(tmp, 0x0000ff00); - emit_or_t_if_eq(sr); - emith_tst_r_imm(tmp, 0x00ff0000); - emit_or_t_if_eq(sr); - emith_tst_r_imm(tmp, 0xff000000); + EMITH_SJMP_START(DCOND_EQ); + emith_tst_r_imm_c(DCOND_NE, tmp, 0x0000ff00); + EMITH_SJMP_START(DCOND_EQ); + emith_tst_r_imm_c(DCOND_NE, tmp, 0x00ff0000); + EMITH_SJMP_START(DCOND_EQ); + emith_tst_r_imm_c(DCOND_NE, tmp, 0xff000000); + EMITH_SJMP_END(DCOND_EQ); + EMITH_SJMP_END(DCOND_EQ); + EMITH_SJMP_END(DCOND_EQ); emit_or_t_if_eq(sr); rcache_free_tmp(tmp); goto end_op; case 0x0d: // XTRCT Rm,Rn 0010nnnnmmmm1101 - tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); - tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ); - emith_lsr(tmp, tmp, 16); + tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); + tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3); + emith_lsr(tmp, tmp3, 16); emith_or_r_r_lsl(tmp, tmp2, 16); goto end_op; case 0x0e: // MULU.W Rm,Rn 0010nnnnmmmm1110 case 0x0f: // MULS.W Rm,Rn 0010nnnnmmmm1111 - tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ); - tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ); - tmp = rcache_get_reg(SHR_MACL, RC_GR_WRITE); + tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); + tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); + tmp = rcache_get_reg(SHR_MACL, RC_GR_WRITE, NULL); if (op & 1) { emith_sext(tmp, tmp2, 16); } else @@ -2224,9 +2880,9 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x03: // CMP/GE Rm,Rn 0011nnnnmmmm0011 case 0x06: // CMP/HI Rm,Rn 0011nnnnmmmm0110 case 0x07: // CMP/GT Rm,Rn 0011nnnnmmmm0111 - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); - tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ); - tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); + tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); emith_bic_r_imm(sr, T); emith_cmp_r_r(tmp2, tmp3); switch (op & 0x07) @@ -2264,11 +2920,11 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) // Q2 = carry(Rn -= Rm) // Q = M ^ Q1 ^ Q2 // T = (Q == M) = !(Q ^ M) = !(Q1 ^ Q2) - tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ); - tmp2 = rcache_get_reg(GET_Rn(), RC_GR_RMW); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); + tmp2 = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_tpop_carry(sr, 0); - emith_adcf_r_r(tmp2, tmp2); + emith_adcf_r_r_r(tmp2, tmp, tmp); emith_tpush_carry(sr, 0); // keep Q1 in T for now tmp4 = rcache_get_tmp(); emith_and_r_r_imm(tmp4, sr, M); @@ -2289,55 +2945,61 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_eor_r_imm(sr, T); // T = !(Q1 ^ Q2) goto end_op; case 0x05: // DMULU.L Rm,Rn 0011nnnnmmmm0101 - tmp = rcache_get_reg(GET_Rn(), RC_GR_READ); - tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ); - tmp3 = rcache_get_reg(SHR_MACL, RC_GR_WRITE); - tmp4 = rcache_get_reg(SHR_MACH, RC_GR_WRITE); + tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); + tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); + tmp3 = rcache_get_reg(SHR_MACL, RC_GR_WRITE, NULL); + tmp4 = rcache_get_reg(SHR_MACH, RC_GR_WRITE, NULL); emith_mul_u64(tmp3, tmp4, tmp, tmp2); goto end_op; case 0x08: // SUB Rm,Rn 0011nnnnmmmm1000 +#if PROPAGATE_CONSTANTS + if (GET_Rn() == GET_Rm()) { + gconst_new(GET_Rn(), 0); + goto end_op; + } +#endif case 0x0c: // ADD Rm,Rn 0011nnnnmmmm1100 - tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); - tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ); + tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); + tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3); if (op & 4) { - emith_add_r_r(tmp, tmp2); + emith_add_r_r_r(tmp, tmp3, tmp2); } else - emith_sub_r_r(tmp, tmp2); + emith_sub_r_r_r(tmp, tmp3, tmp2); goto end_op; case 0x0a: // SUBC Rm,Rn 0011nnnnmmmm1010 case 0x0e: // ADDC Rm,Rn 0011nnnnmmmm1110 - tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); - tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); + tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); if (op & 4) { // adc emith_tpop_carry(sr, 0); - emith_adcf_r_r(tmp, tmp2); + emith_adcf_r_r_r(tmp, tmp3, tmp2); emith_tpush_carry(sr, 0); } else { emith_tpop_carry(sr, 1); - emith_sbcf_r_r(tmp, tmp2); + emith_sbcf_r_r_r(tmp, tmp3, tmp2); emith_tpush_carry(sr, 1); } goto end_op; case 0x0b: // SUBV Rm,Rn 0011nnnnmmmm1011 case 0x0f: // ADDV Rm,Rn 0011nnnnmmmm1111 - tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); - tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); + tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_bic_r_imm(sr, T); if (op & 4) { - emith_addf_r_r(tmp, tmp2); + emith_addf_r_r_r(tmp, tmp3, tmp2); } else - emith_subf_r_r(tmp, tmp2); + emith_subf_r_r_r(tmp, tmp3, tmp2); EMITH_SJMP_START(DCOND_VC); emith_or_r_imm_c(DCOND_VS, sr, T); EMITH_SJMP_END(DCOND_VC); goto end_op; case 0x0d: // DMULS.L Rm,Rn 0011nnnnmmmm1101 - tmp = rcache_get_reg(GET_Rn(), RC_GR_READ); - tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ); - tmp3 = rcache_get_reg(SHR_MACL, RC_GR_WRITE); - tmp4 = rcache_get_reg(SHR_MACH, RC_GR_WRITE); + tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); + tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); + tmp3 = rcache_get_reg(SHR_MACL, RC_GR_WRITE, NULL); + tmp4 = rcache_get_reg(SHR_MACH, RC_GR_WRITE, NULL); emith_mul_s64(tmp3, tmp4, tmp, tmp2); goto end_op; } @@ -2352,17 +3014,17 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) { case 0: // SHLL Rn 0100nnnn00000000 case 2: // SHAL Rn 0100nnnn00100000 - tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_tpop_carry(sr, 0); // dummy - emith_lslf(tmp, tmp, 1); + emith_lslf(tmp, tmp2, 1); emith_tpush_carry(sr, 0); goto end_op; case 1: // DT Rn 0100nnnn00010000 - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); - tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_bic_r_imm(sr, T); - emith_subf_r_imm(tmp, 1); + tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); + emith_subf_r_r_imm(tmp, tmp2, 1); emit_or_t_if_eq(sr); goto end_op; } @@ -2372,18 +3034,18 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) { case 0: // SHLR Rn 0100nnnn00000001 case 2: // SHAR Rn 0100nnnn00100001 - tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_tpop_carry(sr, 0); // dummy if (op & 0x20) { - emith_asrf(tmp, tmp, 1); + emith_asrf(tmp, tmp2, 1); } else - emith_lsrf(tmp, tmp, 1); + emith_lsrf(tmp, tmp2, 1); emith_tpush_carry(sr, 0); goto end_op; case 1: // CMP/PZ Rn 0100nnnn00010001 - tmp = rcache_get_reg(GET_Rn(), RC_GR_READ); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_bic_r_imm(sr, T); emith_cmp_r_imm(tmp, 0); EMITH_SJMP_START(DCOND_LT); @@ -2417,14 +3079,12 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) default: goto default_; } - tmp2 = rcache_get_reg(GET_Rn(), RC_GR_RMW); - emith_sub_r_imm(tmp2, 4); - rcache_clean(); // XXX - rcache_get_reg_arg(0, GET_Rn()); - tmp3 = rcache_get_reg_arg(1, tmp); - if (tmp == SHR_SR) - emith_clear_msb(tmp3, tmp3, 22); // reserved bits defined by ISA as 0 - emit_memhandler_write(2); + tmp3 = rcache_get_reg_arg(1, tmp, &tmp4); + if (tmp == SHR_SR) { + emith_clear_msb(tmp3, tmp4, 22); // reserved bits defined by ISA as 0 + } else if (tmp3 != tmp4) + emith_move_r_r(tmp3, tmp4); + emit_memhandler_write_rr(sh2, SHR_TMP, GET_Rn(), 0, 2 | MF_PREDECR); goto end_op; case 0x04: case 0x05: @@ -2432,19 +3092,19 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) { case 0x04: // ROTL Rn 0100nnnn00000100 case 0x05: // ROTR Rn 0100nnnn00000101 - tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_tpop_carry(sr, 0); // dummy if (op & 1) { - emith_rorf(tmp, tmp, 1); + emith_rorf(tmp, tmp2, 1); } else - emith_rolf(tmp, tmp, 1); + emith_rolf(tmp, tmp2, 1); emith_tpush_carry(sr, 0); goto end_op; case 0x24: // ROTCL Rn 0100nnnn00100100 case 0x25: // ROTCR Rn 0100nnnn00100101 - tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, NULL); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_tpop_carry(sr, 0); if (op & 1) { emith_rorcf(tmp); @@ -2453,8 +3113,8 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_tpush_carry(sr, 0); goto end_op; case 0x15: // CMP/PL Rn 0100nnnn00010101 - tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_bic_r_imm(sr, T); emith_cmp_r_imm(tmp, 0); EMITH_SJMP_START(DCOND_LE); @@ -2489,15 +3149,13 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) goto default_; } if (tmp == SHR_SR) { - tmp2 = emit_memhandler_read_rr(SHR_TMP, GET_Rn(), 0, 2); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + tmp2 = emit_memhandler_read_rr(sh2, SHR_TMP, GET_Rn(), 0, 2 | MF_POSTINCR); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_write_sr(sr, tmp2); rcache_free_tmp(tmp2); drcf.test_irq = 1; } else - emit_memhandler_read_rr(tmp, GET_Rn(), 0, 2); - tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); - emith_add_r_imm(tmp, 4); + emit_memhandler_read_rr(sh2, tmp, GET_Rn(), 0, 2 | MF_POSTINCR); goto end_op; case 0x08: case 0x09: @@ -2521,11 +3179,11 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) default: goto default_; } - tmp2 = rcache_get_reg(GET_Rn(), RC_GR_RMW); + tmp2 = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3); if (op & 1) { - emith_lsr(tmp2, tmp2, tmp); + emith_lsr(tmp2, tmp3, tmp); } else - emith_lsl(tmp2, tmp2, tmp); + emith_lsl(tmp2, tmp3, tmp); goto end_op; case 0x0a: switch (GET_Fx()) @@ -2549,18 +3207,17 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) { case 1: // TAS.B @Rn 0100nnnn00011011 // XXX: is TAS working on 32X? - rcache_get_reg_arg(0, GET_Rn()); + rcache_get_reg_arg(0, GET_Rn(), NULL); tmp = emit_memhandler_read(0); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_bic_r_imm(sr, T); emith_cmp_r_imm(tmp, 0); emit_or_t_if_eq(sr); - rcache_clean(); // XXX emith_or_r_imm(tmp, 0x80); tmp2 = rcache_get_tmp_arg(1); // assuming it differs to tmp emith_move_r_r(tmp2, tmp); rcache_free_tmp(tmp); - rcache_get_reg_arg(0, GET_Rn()); + rcache_get_reg_arg(0, GET_Rn(), NULL); emit_memhandler_write(0); break; default: @@ -2568,7 +3225,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } goto end_op; case 0x0e: - tmp = rcache_get_reg(GET_Rn(), RC_GR_READ); switch (GET_Fx()) { case 0: // LDC Rm,SR 0100mmmm00001110 @@ -2584,20 +3240,19 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) goto default_; } if (tmp2 == SHR_SR) { - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); emith_write_sr(sr, tmp); drcf.test_irq = 1; - } else { - tmp2 = rcache_get_reg(tmp2, RC_GR_WRITE); - emith_move_r_r(tmp2, tmp); - } + } else + emit_move_r_r(tmp2, GET_Rn()); goto end_op; case 0x0f: // MAC.W @Rm+,@Rn+ 0100nnnnmmmm1111 - emit_indirect_read_double(&tmp, &tmp2, GET_Rn(), GET_Rm(), 1); - sr = rcache_get_reg(SHR_SR, RC_GR_READ); - tmp3 = rcache_get_reg(SHR_MACL, RC_GR_RMW); - tmp4 = rcache_get_reg(SHR_MACH, RC_GR_RMW); + emit_indirect_read_double(sh2, &tmp, &tmp2, GET_Rn(), GET_Rm(), 1); + sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); + tmp3 = rcache_get_reg(SHR_MACL, RC_GR_RMW, NULL); + tmp4 = rcache_get_reg(SHR_MACH, RC_GR_RMW, NULL); emith_sh2_macw(tmp3, tmp4, tmp, tmp2, sr); rcache_free_tmp(tmp2); rcache_free_tmp(tmp); @@ -2608,7 +3263,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) ///////////////////////////////////////////// case 0x05: // MOV.L @(disp,Rm),Rn 0101nnnnmmmmdddd - emit_memhandler_read_rr(GET_Rn(), GET_Rm(), (op & 0x0f) * 4, 2); + emit_memhandler_read_rr(sh2, GET_Rn(), GET_Rm(), (op & 0x0f) * 4, 2); goto end_op; ///////////////////////////////////////////// @@ -2621,21 +3276,17 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x04: // MOV.B @Rm+,Rn 0110nnnnmmmm0100 case 0x05: // MOV.W @Rm+,Rn 0110nnnnmmmm0101 case 0x06: // MOV.L @Rm+,Rn 0110nnnnmmmm0110 - emit_memhandler_read_rr(GET_Rn(), GET_Rm(), 0, op & 3); - if ((op & 7) >= 4 && GET_Rn() != GET_Rm()) { - tmp = rcache_get_reg(GET_Rm(), RC_GR_RMW); - emith_add_r_imm(tmp, (1 << (op & 3))); - } + tmp = ((op & 7) >= 4 && GET_Rn() != GET_Rm()) ? MF_POSTINCR : 0; + emit_memhandler_read_rr(sh2, GET_Rn(), GET_Rm(), 0, (op & 3) | tmp); + goto end_op; + case 0x03: // MOV Rm,Rn 0110nnnnmmmm0011 + emit_move_r_r(GET_Rn(), GET_Rm()); goto end_op; - case 0x03: case 0x07 ... 0x0f: - tmp = rcache_get_reg(GET_Rm(), RC_GR_READ); - tmp2 = rcache_get_reg(GET_Rn(), RC_GR_WRITE); + tmp = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); + tmp2 = rcache_get_reg(GET_Rn(), RC_GR_WRITE, NULL); switch (op & 0x0f) { - case 0x03: // MOV Rm,Rn 0110nnnnmmmm0011 - emith_move_r_r(tmp2, tmp); - break; case 0x07: // NOT Rm,Rn 0110nnnnmmmm0111 emith_mvn_r_r(tmp2, tmp); break; @@ -2657,7 +3308,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_rol(tmp2, tmp, 16); break; case 0x0a: // NEGC Rm,Rn 0110nnnnmmmm1010 - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_tpop_carry(sr, 1); emith_negcf_r_r(tmp2, tmp); emith_tpush_carry(sr, 1); @@ -2685,11 +3336,11 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) ///////////////////////////////////////////// case 0x07: // ADD #imm,Rn 0111nnnniiiiiiii - tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); + tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); if (op & 0x80) { // adding negative - emith_sub_r_imm(tmp, -op & 0xff); + emith_sub_r_r_imm(tmp, tmp2, -op & 0xff); } else - emith_add_r_imm(tmp, op & 0xff); + emith_add_r_r_imm(tmp, tmp2, op & 0xff); goto end_op; ///////////////////////////////////////////// @@ -2699,17 +3350,16 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x0000: // MOV.B R0,@(disp,Rn) 10000000nnnndddd case 0x0100: // MOV.W R0,@(disp,Rn) 10000001nnnndddd tmp = (op & 0x100) >> 8; - emit_memhandler_write_rr(SHR_R0, GET_Rm(), (op & 0x0f) << tmp, tmp); + emit_memhandler_write_rr(sh2, SHR_R0, GET_Rm(), (op & 0x0f) << tmp, tmp); goto end_op; case 0x0400: // MOV.B @(disp,Rm),R0 10000100mmmmdddd case 0x0500: // MOV.W @(disp,Rm),R0 10000101mmmmdddd tmp = (op & 0x100) >> 8; - emit_memhandler_read_rr(SHR_R0, GET_Rm(), (op & 0x0f) << tmp, tmp); + emit_memhandler_read_rr(sh2, SHR_R0, GET_Rm(), (op & 0x0f) << tmp, tmp); goto end_op; case 0x0800: // CMP/EQ #imm,R0 10001000iiiiiiii - // XXX: could use cmn - tmp2 = rcache_get_reg(SHR_R0, RC_GR_READ); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + tmp2 = rcache_get_reg(SHR_R0, RC_GR_READ, NULL); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_bic_r_imm(sr, T); emith_cmp_r_imm(tmp2, (s8)(op & 0xff)); emit_or_t_if_eq(sr); @@ -2725,60 +3375,62 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x0100: // MOV.W R0,@(disp,GBR) 11000001dddddddd case 0x0200: // MOV.L R0,@(disp,GBR) 11000010dddddddd tmp = (op & 0x300) >> 8; - emit_memhandler_write_rr(SHR_R0, SHR_GBR, (op & 0xff) << tmp, tmp); + emit_memhandler_write_rr(sh2, SHR_R0, SHR_GBR, (op & 0xff) << tmp, tmp); goto end_op; case 0x0400: // MOV.B @(disp,GBR),R0 11000100dddddddd case 0x0500: // MOV.W @(disp,GBR),R0 11000101dddddddd case 0x0600: // MOV.L @(disp,GBR),R0 11000110dddddddd tmp = (op & 0x300) >> 8; - emit_memhandler_read_rr(SHR_R0, SHR_GBR, (op & 0xff) << tmp, tmp); + emit_memhandler_read_rr(sh2, SHR_R0, SHR_GBR, (op & 0xff) << tmp, tmp); goto end_op; case 0x0800: // TST #imm,R0 11001000iiiiiiii - tmp = rcache_get_reg(SHR_R0, RC_GR_READ); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + tmp = rcache_get_reg(SHR_R0, RC_GR_READ, NULL); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_bic_r_imm(sr, T); emith_tst_r_imm(tmp, op & 0xff); emit_or_t_if_eq(sr); goto end_op; case 0x0900: // AND #imm,R0 11001001iiiiiiii - tmp = rcache_get_reg(SHR_R0, RC_GR_RMW); - emith_and_r_imm(tmp, op & 0xff); + tmp = rcache_get_reg(SHR_R0, RC_GR_RMW, &tmp2); + emith_and_r_r_imm(tmp, tmp2, (op & 0xff)); goto end_op; case 0x0a00: // XOR #imm,R0 11001010iiiiiiii - tmp = rcache_get_reg(SHR_R0, RC_GR_RMW); - emith_eor_r_imm(tmp, op & 0xff); + if (op & 0xff) { + tmp = rcache_get_reg(SHR_R0, RC_GR_RMW, &tmp2); + emith_eor_r_r_imm(tmp, tmp2, (op & 0xff)); + } goto end_op; case 0x0b00: // OR #imm,R0 11001011iiiiiiii - tmp = rcache_get_reg(SHR_R0, RC_GR_RMW); - emith_or_r_imm(tmp, op & 0xff); + if (op & 0xff) { + tmp = rcache_get_reg(SHR_R0, RC_GR_RMW, &tmp2); + emith_or_r_r_imm(tmp, tmp2, (op & 0xff)); + } goto end_op; case 0x0c00: // TST.B #imm,@(R0,GBR) 11001100iiiiiiii - tmp = emit_indirect_indexed_read(SHR_TMP, SHR_R0, SHR_GBR, 0); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + tmp = emit_indirect_indexed_read(sh2, SHR_TMP, SHR_R0, SHR_GBR, 0); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_bic_r_imm(sr, T); emith_tst_r_imm(tmp, op & 0xff); emit_or_t_if_eq(sr); rcache_free_tmp(tmp); goto end_op; case 0x0d00: // AND.B #imm,@(R0,GBR) 11001101iiiiiiii - tmp = emit_indirect_indexed_read(SHR_TMP, SHR_R0, SHR_GBR, 0); - emith_and_r_imm(tmp, op & 0xff); + tmp = emit_indirect_indexed_read(sh2, SHR_TMP, SHR_R0, SHR_GBR, 0); + tmp2 = rcache_get_tmp_arg(1); + emith_and_r_r_imm(tmp2, tmp, (op & 0xff)); goto end_rmw_op; case 0x0e00: // XOR.B #imm,@(R0,GBR) 11001110iiiiiiii - tmp = emit_indirect_indexed_read(SHR_TMP, SHR_R0, SHR_GBR, 0); - emith_eor_r_imm(tmp, op & 0xff); + tmp = emit_indirect_indexed_read(sh2, SHR_TMP, SHR_R0, SHR_GBR, 0); + tmp2 = rcache_get_tmp_arg(1); + emith_eor_r_r_imm(tmp2, tmp, (op & 0xff)); goto end_rmw_op; case 0x0f00: // OR.B #imm,@(R0,GBR) 11001111iiiiiiii - tmp = emit_indirect_indexed_read(SHR_TMP, SHR_R0, SHR_GBR, 0); - emith_or_r_imm(tmp, op & 0xff); - end_rmw_op: + tmp = emit_indirect_indexed_read(sh2, SHR_TMP, SHR_R0, SHR_GBR, 0); tmp2 = rcache_get_tmp_arg(1); - emith_move_r_r(tmp2, tmp); + emith_or_r_r_imm(tmp2, tmp, (op & 0xff)); + end_rmw_op: rcache_free_tmp(tmp); - tmp3 = rcache_get_reg_arg(0, SHR_GBR); - tmp4 = rcache_get_reg(SHR_R0, RC_GR_READ); - emith_add_r_r(tmp3, tmp4); - emit_memhandler_write(0); + emit_indirect_indexed_write(sh2, SHR_TMP, SHR_R0, SHR_GBR, 0); goto end_op; } goto default_; @@ -2786,7 +3438,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) ///////////////////////////////////////////// case 0x0e: // MOV #imm,Rn 1110nnnniiiiiiii - emit_move_r_imm32(GET_Rn(), (u32)(signed int)(signed char)op); + emit_move_r_imm32(GET_Rn(), (s8)op); goto end_op; default: @@ -2810,7 +3462,7 @@ end_op: // test irq? if (drcf.test_irq && !drcf.pending_branch_direct) { - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); FLUSH_CYCLES(sr); if (!drcf.pending_branch_indirect) emit_move_r_imm32(SHR_PC, pc); @@ -2831,16 +3483,16 @@ end_op: void *target = NULL; int ctaken = 0; - if (opd_b->op == OP_BRANCH_CT || opd_b->op == OP_BRANCH_CF) { + if (OP_ISBRACND(opd_b->op)) { ctaken = (op_flags[i] & OF_DELAY_OP) ? 1 : 2; } cycles += ctaken; // assume branch taken - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); FLUSH_CYCLES(sr); rcache_clean(); // emit condition test for conditional branch - if (opd_b->op == OP_BRANCH_CT || opd_b->op == OP_BRANCH_CF) { + if (OP_ISBRACND(opd_b->op)) { cond = (opd_b->op == OP_BRANCH_CF) ? DCOND_EQ : DCOND_NE; if (delay_dep_fw & BITMASK1(SHR_T)) emith_tst_r_imm(sr, T_save); @@ -2893,7 +3545,7 @@ end_op: drcf.pending_branch_direct = 0; } else if (drcf.pending_branch_indirect) { - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); FLUSH_CYCLES(sr); rcache_flush(); emith_jump(sh2_drc_dispatcher); @@ -2903,22 +3555,17 @@ end_op: do_host_disasm(tcache_id); } - tmp = rcache_get_reg(SHR_SR, RC_GR_RMW); - FLUSH_CYCLES(tmp); - rcache_flush(); - // check the last op if (op_flags[i-1] & OF_DELAY_OP) opd = &ops[i-2]; else opd = &ops[i-1]; - if (opd->op != OP_BRANCH && opd->op != OP_BRANCH_R - && opd->op != OP_BRANCH_RF && opd->op != OP_RTE) + if (! OP_ISBRAUC(opd->op)) { void *target; - s32 tmp = rcache_get_reg(SHR_SR, RC_GR_RMW); + s32 tmp = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); FLUSH_CYCLES(tmp); emit_move_r_imm32(SHR_PC, pc); @@ -2975,8 +3622,8 @@ end_op: // mark literals for (i = 0; i < literal_addr_count; i++) { - tmp = literal_addr[i]; - drc_ram_blk[(tmp & mask) >> shift] = 1; + u = literal_addr[i]; + drc_ram_blk[(u & mask) >> shift] = 1; } // add to invalidation lookup lists @@ -3037,7 +3684,7 @@ static void sh2_generate_utils(void) // sh2_drc_dispatcher(void) sh2_drc_dispatcher = (void *)tcache_ptr; - sr = rcache_get_reg(SHR_SR, RC_GR_READ); + sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); emith_cmp_r_imm(sr, 0); emith_jump_cond(DCOND_LT, sh2_drc_exit); rcache_invalidate(); @@ -3065,7 +3712,7 @@ static void sh2_generate_utils(void) // assumes it's called from main function (may jump to dispatcher) sh2_drc_test_irq = (void *)tcache_ptr; emith_ctx_read(arg1, offsetof(SH2, pending_level)); - sr = rcache_get_reg(SHR_SR, RC_GR_READ); + sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); emith_lsr(arg0, sr, I_SHIFT); emith_and_r_imm(arg0, 0x0f); emith_cmp_r_r(arg1, arg0); // pending_level > ((sr >> 4) & 0x0f)? @@ -3073,26 +3720,26 @@ static void sh2_generate_utils(void) emith_ret_c(DCOND_LE); // nope, return EMITH_SJMP_END(DCOND_GT); // adjust SP - tmp = rcache_get_reg(SHR_SP, RC_GR_RMW); + tmp = rcache_get_reg(SHR_SP, RC_GR_RMW, NULL); emith_sub_r_imm(tmp, 4*2); rcache_clean(); // push SR - tmp = rcache_get_reg_arg(0, SHR_SP); + tmp = rcache_get_reg_arg(0, SHR_SP, NULL); emith_add_r_imm(tmp, 4); - tmp = rcache_get_reg_arg(1, SHR_SR); + tmp = rcache_get_reg_arg(1, SHR_SR, NULL); emith_clear_msb(tmp, tmp, 22); emith_move_r_r_ptr(arg2, CONTEXT_REG); emith_call(p32x_sh2_write32); // XXX: use sh2_drc_write32? rcache_invalidate(); // push PC - rcache_get_reg_arg(0, SHR_SP); + rcache_get_reg_arg(0, SHR_SP, NULL); emith_ctx_read(arg1, SHR_PC * 4); emith_move_r_r_ptr(arg2, CONTEXT_REG); emith_call(p32x_sh2_write32); rcache_invalidate(); // update I, cycles, do callback emith_ctx_read(arg1, offsetof(SH2, pending_level)); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_bic_r_imm(sr, I); emith_or_r_r_lsl(sr, arg1, I_SHIFT); emith_sub_r_imm(sr, 13 << 12); // at least 13 cycles @@ -3441,6 +4088,7 @@ int sh2_drc_init(SH2 *sh2) memset(block_link_pool_counts, 0, sizeof(block_link_pool_counts)); drc_cmn_init(); + rcache_init(); tcache_ptr = tcache; sh2_generate_utils(); host_instructions_updated(tcache, tcache_ptr); @@ -4102,7 +4750,7 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, case 0x07: // ADD #imm,Rn 0111nnnniiiiiiii opd->source = opd->dest = BITMASK1(GET_Rn()); - opd->imm = (int)(signed char)op; + opd->imm = (s8)op; break; ///////////////////////////////////////////// @@ -4132,7 +4780,7 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, case 0x0800: // CMP/EQ #imm,R0 10001000iiiiiiii opd->source = BITMASK1(SHR_R0); opd->dest = BITMASK1(SHR_T); - opd->imm = (int)(signed char)op; + opd->imm = (s8)op; break; case 0x0d00: // BT/S label 10001101dddddddd case 0x0f00: // BF/S label 10001111dddddddd @@ -4305,7 +4953,7 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, case 0x0e: // MOV #imm,Rn 1110nnnniiiiiiii opd->dest = BITMASK1(GET_Rn()); - opd->imm = (u32)(signed int)(signed char)op; + opd->imm = (s8)op; break; default: @@ -4369,7 +5017,7 @@ end: } // "overscan" detection: unreachable code after unconditional branch // this can happen if the insn after a forward branch isn't a local target - if (opd->op == OP_BRANCH || opd->op == OP_BRANCH_R || opd->op == OP_BRANCH_RF) { + if (OP_ISBRAUC(opd->op)) { if (op_flags[i + 1] & OF_DELAY_OP) { if (i_end > i + 2 && !(op_flags[i + 2] & OF_BTARGET)) i_end = i + 2; diff --git a/cpu/sh2/compiler.h b/cpu/sh2/compiler.h index c9cf7ab0..b690435c 100644 --- a/cpu/sh2/compiler.h +++ b/cpu/sh2/compiler.h @@ -31,8 +31,10 @@ void scan_block(unsigned int base_pc, int is_slave, // XXX MUST match definitions in cpu/sh2/compiler.c #if defined(__arm__) #define DRC_SR_REG r10 -#elif defined(__i386__) || defined(__x86_64__) +#elif defined(__i386__) #define DRC_SR_REG edi +#elif defined(__x86_64__) +#define DRC_SR_REG ebx #else #warning "direct DRC register access not available for this host" #endif From 6822ba9d646d09a091e9e9b03a7cefc3ff77e620 Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 3 Apr 2019 23:21:20 +0200 Subject: [PATCH 0182/1110] sh2 memory interface optimzations --- cpu/drc/emit_arm.c | 38 +++++- cpu/drc/emit_x86.c | 68 +++++++++- cpu/sh2/compiler.c | 123 ++++++++++------- cpu/sh2/compiler.h | 4 +- cpu/sh2/sh2.h | 3 + pico/32x/memory.c | 180 ++++++++++++------------- pico/32x/memory_arm.S | 305 ++++++++++++++++++++++++++++++++++++++++++ pico/32x/sh2soc.c | 6 +- pico/pico_int.h | 6 +- tools/mkoffsets.sh | 9 ++ 10 files changed, 585 insertions(+), 157 deletions(-) create mode 100644 pico/32x/memory_arm.S diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index 4421c641..c255a8b8 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -65,6 +65,9 @@ #define DCOND_VS A_COND_VS #define DCOND_VC A_COND_VC +#define DCOND_CS A_COND_HS +#define DCOND_CC A_COND_LO + /* addressing mode 1 */ #define A_AM1_LSL 0 #define A_AM1_LSR 1 @@ -184,8 +187,10 @@ #define EOP_STR_SIMPLE(rd,rn) EOP_C_AM2_IMM(A_COND_AL,1,0,0,rn,rd,0) #define EOP_LDR_REG_LSL(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,0,1,rn,rd,shift_imm,A_AM1_LSL,rm) +#define EOP_LDRB_REG_LSL(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,1,1,rn,rd,shift_imm,A_AM1_LSL,rm); #define EOP_LDRH_IMM2(cond,rd,rn,offset_8) EOP_C_AM3_IMM(cond,1,1,rn,rd,0,1,offset_8) +#define EOP_LDRH_REG2(cond,rd,rn,rm) EOP_C_AM3_REG(cond,1,1,rn,rd,0,1,rm) #define EOP_LDRH_IMM( rd,rn,offset_8) EOP_C_AM3_IMM(A_COND_AL,1,1,rn,rd,0,1,offset_8) #define EOP_LDRH_SIMPLE(rd,rn) EOP_C_AM3_IMM(A_COND_AL,1,1,rn,rd,0,1,0) @@ -479,6 +484,8 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_adc_r_r(d, s) \ EOP_ADC_REG(A_COND_AL,0,d,d,s,A_AM1_LSL,0) +#define emith_and_r_r_c(cond, d, s) \ + EOP_AND_REG(cond,0,d,d,s,A_AM1_LSL,0) #define emith_and_r_r(d, s) \ EOP_AND_REG(A_COND_AL,0,d,d,s,A_AM1_LSL,0) @@ -677,12 +684,24 @@ static int emith_xbranch(int cond, void *target, int is_call) // misc #define emith_read_r_r_offs_c(cond, r, rs, offs) \ EOP_LDR_IMM2(cond, r, rs, offs) +#define emith_read_r_r_r_c(cond, r, rs, rm) \ + EOP_LDR_REG_LSL(cond, r, rs, rm, 0) +#define emith_read_r_r_r(r, rs, rm) \ + EOP_LDR_REG_LSL(A_COND_AL, r, rs, rm, 0) #define emith_read8_r_r_offs_c(cond, r, rs, offs) \ EOP_LDRB_IMM2(cond, r, rs, offs) +#define emith_read8_r_r_r_c(cond, r, rs, rm) \ + EOP_LDRB_REG_LSL(cond, r, rs, rm, 0) +#define emith_read8_r_r_r(r, rs, rm) \ + EOP_LDRB_REG_LSL(A_COND_AL, r, rs, rm, 0) #define emith_read16_r_r_offs_c(cond, r, rs, offs) \ EOP_LDRH_IMM2(cond, r, rs, offs) +#define emith_read16_r_r_r_c(cond, r, rs, rm) \ + EOP_LDRH_REG2(cond, r, rs, rm) +#define emith_read16_r_r_r(r, rs, rm) \ + EOP_LDRH_REG2(A_COND_AL, r, rs, rm) #define emith_read_r_r_offs(r, rs, offs) \ emith_read_r_r_offs_c(A_COND_AL, r, rs, offs) @@ -844,11 +863,20 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_sh2_drc_exit() \ EOP_LDMFD_SP(A_R4M|A_R5M|A_R6M|A_R7M|A_R8M|A_R9M|A_R10M|A_R11M|A_R12M|A_R15M) -#define emith_sh2_wcall(a, tab) { \ - emith_lsr(12, a, SH2_WRITE_SHIFT); \ - EOP_LDR_REG_LSL(A_COND_AL,12,tab,12,2); \ - emith_move_r_r(2, CONTEXT_REG); \ - emith_jump_reg(12); \ +// assumes a is in arg0, tab, func and mask are temp +#define emith_sh2_rcall(a, tab, func, mask) { \ + emith_lsr(mask, a, SH2_READ_SHIFT); \ + EOP_ADD_REG_LSL(tab, tab, mask, 3); \ + EOP_LDMIA(tab, (1<> WRT_SHIFT */ \ + EMIT_REX_IF(1, func, tab); \ + EMIT_OP_MODRM64(0x8b, 0, func, 4); \ + EMIT_SIB64(PTR_SCALE, func, tab); /* mov tmp, [tab + tmp * {4,8}] */ \ emith_move_r_r_ptr(arg2_, CONTEXT_REG); \ - emith_jump_reg(NA_TMP_REG); \ + emith_jump_reg(func); \ } #define emith_sh2_dtbf_loop() { \ diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 1b300cc3..bfd98e2b 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -328,7 +328,7 @@ struct block_list { static struct block_list **inval_lookup[TCACHE_BUFFERS]; static const int hash_table_sizes[TCACHE_BUFFERS] = { - 0x1000, + 0x4000, 0x100, 0x100, }; @@ -498,12 +498,12 @@ static void (*sh2_drc_dispatcher)(void); static void (*sh2_drc_exit)(void); static void (*sh2_drc_test_irq)(void); -static u32 REGPARM(2) (*sh2_drc_read8)(u32 a, SH2 *sh2); -static u32 REGPARM(2) (*sh2_drc_read16)(u32 a, SH2 *sh2); -static u32 REGPARM(2) (*sh2_drc_read32)(u32 a, SH2 *sh2); +static u32 REGPARM(1) (*sh2_drc_read8)(u32 a); +static u32 REGPARM(1) (*sh2_drc_read16)(u32 a); +static u32 REGPARM(1) (*sh2_drc_read32)(u32 a); static void REGPARM(2) (*sh2_drc_write8)(u32 a, u32 d); static void REGPARM(2) (*sh2_drc_write16)(u32 a, u32 d); -static void REGPARM(3) (*sh2_drc_write32)(u32 a, u32 d, SH2 *sh2); +static void REGPARM(2) (*sh2_drc_write32)(u32 a, u32 d); // flags for memory access #define MF_SIZEMASK 0x03 // size of access @@ -787,7 +787,7 @@ static void *dr_prepare_ext_branch(u32 pc, int is_slave, int tcache_id) cnt = i + 1; if (cnt >= block_link_pool_max_counts[tcache_id]) { dbg(1, "bl overflow for tcache %d", tcache_id); - return NULL; + return sh2_drc_dispatcher; } bl += cnt; block_link_pool_counts[tcache_id]++; @@ -848,7 +848,7 @@ static void dr_link_blocks(struct block_entry *be, int tcache_id) dbg(1, "warning: " #array " overflow"); \ failcode; \ } else \ - array[count++] = item; \ + array[count++] = item; \ } static int find_in_array(u32 *array, size_t size, u32 what) @@ -1806,7 +1806,7 @@ static int emit_get_rbase_and_offs(SH2 *sh2, u32 a, u32 *offs) hr = rcache_get_tmp(); if (mask < 0x1000) { // can't access data array or BIOS directly from ROM or SDRAM, - // since code may run on both SH2s (if the tcache_id would be known...) + // since code may run on both SH2s (tcache_id of translation block needed)) emith_ctx_read(hr, poffs); if (a & mask & ~omask) emith_add_r_imm(hr, a & mask & ~omask); @@ -1896,8 +1896,6 @@ static void emit_or_t_if_eq(int srr) // rd = @(arg0) static int emit_memhandler_read(int size) { - int arg1; - rcache_clean_tmp(); #ifndef DRC_SR_REG // must writeback cycles for poll detection stuff @@ -1905,8 +1903,6 @@ static int emit_memhandler_read(int size) rcache_evict_vreg(guest_regs[SHR_SR].vreg); #endif - arg1 = rcache_get_tmp_arg(1); - emith_move_r_r_ptr(arg1, CONTEXT_REG); switch (size & MF_SIZEMASK) { case 0: emith_call(sh2_drc_read8); break; // 8 case 1: emith_call(sh2_drc_read16); break; // 16 @@ -1920,16 +1916,12 @@ static int emit_memhandler_read(int size) // @(arg0) = arg1 static void emit_memhandler_write(int size) { - int arg2; - rcache_clean_tmp(); #ifndef DRC_SR_REG if (guest_regs[SHR_SR].vreg != -1) rcache_evict_vreg(guest_regs[SHR_SR].vreg); #endif - arg2 = rcache_get_tmp_arg(2); - emith_move_r_r_ptr(arg2, CONTEXT_REG); switch (size & MF_SIZEMASK) { case 0: emith_call(sh2_drc_write8); break; // 8 case 1: emith_call(sh2_drc_write16); break; // 16 @@ -2372,7 +2364,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) rcache_unlock_all(); #if (DRC_DEBUG & (8|256|512|1024)) - emit_move_r_imm32(SHR_PC, pc); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); FLUSH_CYCLES(sr); rcache_clean(); @@ -2392,7 +2383,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) #ifdef DRC_CMP if (!(op_flags[i] & OF_DELAY_OP)) { - emit_move_r_imm32(SHR_PC, pc); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); FLUSH_CYCLES(sr); rcache_clean(); @@ -3666,16 +3656,69 @@ end_op: static void sh2_generate_utils(void) { - int arg0, arg1, arg2, sr, tmp; - - sh2_drc_read8 = p32x_sh2_read8; - sh2_drc_read16 = p32x_sh2_read16; - sh2_drc_read32 = p32x_sh2_read32; + int arg0, arg1, arg2, arg3, sr, tmp; host_arg2reg(arg0, 0); host_arg2reg(arg1, 1); host_arg2reg(arg2, 2); + host_arg2reg(arg3, 3); emith_move_r_r(arg0, arg0); // nop + emith_move_r_r(arg1, arg1); // nop + emith_move_r_r(arg2, arg2); // nop + emith_move_r_r(arg3, arg3); // nop + + // sh2_drc_write8(u32 a, u32 d) + sh2_drc_write8 = (void *)tcache_ptr; + emith_ctx_read_ptr(arg2, offsetof(SH2, write8_tab)); + emith_sh2_wcall(arg0, arg1, arg2, arg3); + + // sh2_drc_write16(u32 a, u32 d) + sh2_drc_write16 = (void *)tcache_ptr; + emith_ctx_read_ptr(arg2, offsetof(SH2, write16_tab)); + emith_sh2_wcall(arg0, arg1, arg2, arg3); + + // sh2_drc_write32(u32 a, u32 d) + sh2_drc_write32 = (void *)tcache_ptr; + emith_ctx_read_ptr(arg2, offsetof(SH2, write32_tab)); + emith_sh2_wcall(arg0, arg1, arg2, arg3); + + // d = sh2_drc_read8(u32 a) + sh2_drc_read8 = (void *)tcache_ptr; + emith_ctx_read_ptr(arg1, offsetof(SH2, read8_map)); + emith_sh2_rcall(arg0, arg1, arg2, arg3); + EMITH_SJMP_START(DCOND_CS); + emith_and_r_r_c(DCOND_CC, arg0, arg3); + emith_eor_r_imm_c(DCOND_CC, arg0, 1); + emith_read8_r_r_r_c(DCOND_CC, RET_REG, arg0, arg2); + emith_ret_c(DCOND_CC); + EMITH_SJMP_END(DCOND_CS); + emith_move_r_r_ptr(arg1, CONTEXT_REG); + emith_jump_reg(arg2); + + // d = sh2_drc_read16(u32 a) + sh2_drc_read16 = (void *)tcache_ptr; + emith_ctx_read_ptr(arg1, offsetof(SH2, read16_map)); + emith_sh2_rcall(arg0, arg1, arg2, arg3); + EMITH_SJMP_START(DCOND_CS); + emith_and_r_r_c(DCOND_CC, arg0, arg3); + emith_read16_r_r_r_c(DCOND_CC, RET_REG, arg0, arg2); + emith_ret_c(DCOND_CC); + EMITH_SJMP_END(DCOND_CS); + emith_move_r_r_ptr(arg1, CONTEXT_REG); + emith_jump_reg(arg2); + + // d = sh2_drc_read32(u32 a) + sh2_drc_read32 = (void *)tcache_ptr; + emith_ctx_read_ptr(arg1, offsetof(SH2, read32_map)); + emith_sh2_rcall(arg0, arg1, arg2, arg3); + EMITH_SJMP_START(DCOND_CS); + emith_and_r_r_c(DCOND_CC, arg0, arg3); + emith_read_r_r_r_c(DCOND_CC, RET_REG, arg0, arg2); + emith_ror_c(DCOND_CC, RET_REG, RET_REG, 16); + emith_ret_c(DCOND_CC); + EMITH_SJMP_END(DCOND_CS); + emith_move_r_r_ptr(arg1, CONTEXT_REG); + emith_jump_reg(arg2); // sh2_drc_exit(void) sh2_drc_exit = (void *)tcache_ptr; @@ -3766,21 +3809,6 @@ static void sh2_generate_utils(void) emith_call(sh2_drc_test_irq); emith_jump(sh2_drc_dispatcher); - // sh2_drc_write8(u32 a, u32 d) - sh2_drc_write8 = (void *)tcache_ptr; - emith_ctx_read_ptr(arg2, offsetof(SH2, write8_tab)); - emith_sh2_wcall(arg0, arg2); - - // sh2_drc_write16(u32 a, u32 d) - sh2_drc_write16 = (void *)tcache_ptr; - emith_ctx_read_ptr(arg2, offsetof(SH2, write16_tab)); - emith_sh2_wcall(arg0, arg2); - - // sh2_drc_write32(u32 a, u32 d) - sh2_drc_write32 = (void *)tcache_ptr; - emith_ctx_read_ptr(arg2, offsetof(SH2, write32_tab)); - emith_sh2_wcall(arg0, arg2); - #ifdef PDB_NET // debug #define MAKE_READ_WRAPPER(func) { \ @@ -3815,11 +3843,6 @@ static void sh2_generate_utils(void) MAKE_WRITE_WRAPPER(sh2_drc_write8); MAKE_WRITE_WRAPPER(sh2_drc_write16); MAKE_WRITE_WRAPPER(sh2_drc_write32); -#if (DRC_DEBUG & 4) - host_dasm_new_symbol(sh2_drc_read8); - host_dasm_new_symbol(sh2_drc_read16); - host_dasm_new_symbol(sh2_drc_read32); -#endif #endif rcache_invalidate(); @@ -3831,6 +3854,9 @@ static void sh2_generate_utils(void) host_dasm_new_symbol(sh2_drc_write8); host_dasm_new_symbol(sh2_drc_write16); host_dasm_new_symbol(sh2_drc_write32); + host_dasm_new_symbol(sh2_drc_read8); + host_dasm_new_symbol(sh2_drc_read16); + host_dasm_new_symbol(sh2_drc_read32); #endif } @@ -3955,14 +3981,15 @@ static void sh2_smc_rm_blocks(u32 a, u16 *drc_ram_blk, int tcache_id, u32 shift, } } -void sh2_drc_wcheck_ram(unsigned int a, int val, int cpuid) +void sh2_drc_wcheck_ram(unsigned int a, int val, SH2 *sh2) { - dbg(2, "%csh2 smc check @%08x", cpuid ? 's' : 'm', a); + dbg(2, "%csh2 smc check @%08x", sh2->is_slave ? 's' : 'm', a); sh2_smc_rm_blocks(a, Pico32xMem->drcblk_ram, 0, SH2_DRCBLK_RAM_SHIFT, 0x3ffff); } -void sh2_drc_wcheck_da(unsigned int a, int val, int cpuid) +void sh2_drc_wcheck_da(unsigned int a, int val, SH2 *sh2) { + int cpuid = sh2->is_slave; dbg(2, "%csh2 smc check @%08x", cpuid ? 's' : 'm', a); sh2_smc_rm_blocks(a, Pico32xMem->drcblk_da[cpuid], 1 + cpuid, SH2_DRCBLK_DA_SHIFT, 0xfff); @@ -4051,6 +4078,9 @@ void sh2_drc_mem_setup(SH2 *sh2) sh2->p_da = sh2->data_array; sh2->p_sdram = Pico32xMem->sdram; sh2->p_rom = Pico.rom; + // sh2->p_dram filled in dram bank switching + sh2->p_drcblk_da = Pico32xMem->drcblk_da[!!sh2->is_slave]; + sh2->p_drcblk_ram = Pico32xMem->drcblk_ram; } void sh2_drc_frame(void) @@ -4103,6 +4133,7 @@ int sh2_drc_init(SH2 *sh2) // disasm the utils tcache_dsm_ptrs[0] = tcache; do_host_disasm(0); + fflush(stdout); #endif #if (DRC_DEBUG & 1) hash_collisions = 0; diff --git a/cpu/sh2/compiler.h b/cpu/sh2/compiler.h index b690435c..6a8596b8 100644 --- a/cpu/sh2/compiler.h +++ b/cpu/sh2/compiler.h @@ -1,7 +1,7 @@ int sh2_drc_init(SH2 *sh2); void sh2_drc_finish(SH2 *sh2); -void sh2_drc_wcheck_ram(unsigned int a, int val, int cpuid); -void sh2_drc_wcheck_da(unsigned int a, int val, int cpuid); +void sh2_drc_wcheck_ram(unsigned int a, int val, SH2 *sh2); +void sh2_drc_wcheck_da(unsigned int a, int val, SH2 *sh2); #ifdef DRC_SH2 void sh2_drc_mem_setup(SH2 *sh2); diff --git a/cpu/sh2/sh2.h b/cpu/sh2/sh2.h index 7faa844b..a073d43f 100644 --- a/cpu/sh2/sh2.h +++ b/cpu/sh2/sh2.h @@ -36,6 +36,9 @@ typedef struct SH2_ void *p_da; void *p_sdram; void *p_rom; + void *p_dram; + void *p_drcblk_da; + void *p_drcblk_ram; unsigned int pdb_io_csum[2]; #define SH2_STATE_RUN (1 << 0) // to prevent recursion diff --git a/pico/32x/memory.c b/pico/32x/memory.c index 8f2a7c2f..47329835 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -1066,41 +1066,41 @@ void PicoWrite16_32x(u32 a, u32 d) } /* quirk: in both normal and overwrite areas only nonzero values go through */ -#define sh2_write8_dramN(n) \ +#define sh2_write8_dramN(p, a, d) \ if ((d & 0xff) != 0) { \ - u8 *dram = (u8 *)Pico32xMem->dram[n]; \ + u8 *dram = (u8 *)p; \ dram[(a & 0x1ffff) ^ 1] = d; \ } static void m68k_write8_dram0_ow(u32 a, u32 d) { - sh2_write8_dramN(0); + sh2_write8_dramN(Pico32xMem->dram[0], a, d); } static void m68k_write8_dram1_ow(u32 a, u32 d) { - sh2_write8_dramN(1); + sh2_write8_dramN(Pico32xMem->dram[1], a, d); } -#define sh2_write16_dramN(n) \ - u16 *pd = &Pico32xMem->dram[n][(a & 0x1ffff) / 2]; \ +#define sh2_write16_dramN(p, a, d) \ + u16 *pd = &((u16 *)p)[(a & 0x1ffff) / 2]; \ if (!(a & 0x20000)) { \ *pd = d; \ - return; \ - } \ - /* overwrite */ \ - if (!(d & 0x00ff)) d |= *pd & 0x00ff; \ - if (!(d & 0xff00)) d |= *pd & 0xff00; \ - *pd = d; + } else { \ + u16 v = *pd; /* overwrite */ \ + if (!(d & 0x00ff)) d |= v & 0x00ff; \ + if (!(d & 0xff00)) d |= v & 0xff00; \ + *pd = d; \ + } static void m68k_write16_dram0_ow(u32 a, u32 d) { - sh2_write16_dramN(0); + sh2_write16_dramN(Pico32xMem->dram[0], a, d); } static void m68k_write16_dram1_ow(u32 a, u32 d) { - sh2_write16_dramN(1); + sh2_write16_dramN(Pico32xMem->dram[1], a, d); } // ----------------------------------------------------------------- @@ -1229,14 +1229,14 @@ static void bank_switch_rom_68k(int b) // ----------------------------------------------------------------- // read8 -static u32 sh2_read8_unmapped(u32 a, SH2 *sh2) +static REGPARM(2) u32 sh2_read8_unmapped(u32 a, SH2 *sh2) { elprintf_sh2(sh2, EL_32X, "unmapped r8 [%08x] %02x @%06x", a, 0, sh2_pc(sh2)); return 0; } -static u32 sh2_read8_cs0(u32 a, SH2 *sh2) +static u32 REGPARM(2) sh2_read8_cs0(u32 a, SH2 *sh2) { u32 d = 0; DRC_SAVE_SR(sh2); @@ -1282,27 +1282,28 @@ out: return d; } -static u32 sh2_read8_da(u32 a, SH2 *sh2) +static u32 REGPARM(2) sh2_read8_da(u32 a, SH2 *sh2) { return sh2->data_array[(a & 0xfff) ^ 1]; } // for ssf2 -static u32 sh2_read8_rom(u32 a, SH2 *sh2) +static u32 REGPARM(2) sh2_read8_rom(u32 a, SH2 *sh2) { u32 bank = carthw_ssf2_banks[(a >> 19) & 7] << 19; - return Pico.rom[(bank + (a & 0x7ffff)) ^ 1]; + u8 *p = sh2->p_rom; + return p[(bank + (a & 0x7ffff)) ^ 1]; } // read16 -static u32 sh2_read16_unmapped(u32 a, SH2 *sh2) +static u32 REGPARM(2) sh2_read16_unmapped(u32 a, SH2 *sh2) { elprintf_sh2(sh2, EL_32X, "unmapped r16 [%08x] %04x @%06x", a, 0, sh2_pc(sh2)); return 0; } -static u32 sh2_read16_cs0(u32 a, SH2 *sh2) +static u32 REGPARM(2) sh2_read16_cs0(u32 a, SH2 *sh2) { u32 d = 0; DRC_SAVE_SR(sh2); @@ -1342,39 +1343,41 @@ out_noprint: return d; } -static u32 sh2_read16_da(u32 a, SH2 *sh2) +static u32 REGPARM(2) sh2_read16_da(u32 a, SH2 *sh2) { return ((u16 *)sh2->data_array)[(a & 0xffe) / 2]; } -static u32 sh2_read16_rom(u32 a, SH2 *sh2) +static u32 REGPARM(2) sh2_read16_rom(u32 a, SH2 *sh2) { u32 bank = carthw_ssf2_banks[(a >> 19) & 7] << 19; - return *(u16 *)(Pico.rom + bank + (a & 0x7fffe)); + u16 *p = sh2->p_rom; + return p[(bank + (a & 0x7fffe)) / 2]; } -static u32 sh2_read32_unmapped(u32 a, SH2 *sh2) +static u32 REGPARM(2) sh2_read32_unmapped(u32 a, SH2 *sh2) { elprintf_sh2(sh2, EL_32X, "unmapped r32 [%08x] %08x @%06x", a, 0, sh2_pc(sh2)); return 0; } -static u32 sh2_read32_cs0(u32 a, SH2 *sh2) +static u32 REGPARM(2) sh2_read32_cs0(u32 a, SH2 *sh2) { return (sh2_read16_cs0(a, sh2) << 16) | sh2_read16_cs0(a + 2, sh2); } -static u32 sh2_read32_da(u32 a, SH2 *sh2) +static u32 REGPARM(2) sh2_read32_da(u32 a, SH2 *sh2) { u32 d = *((u32 *)sh2->data_array + (a & 0xffc)/4); return (d << 16) | (d >> 16); } -static u32 sh2_read32_rom(u32 a, SH2 *sh2) +static u32 REGPARM(2) sh2_read32_rom(u32 a, SH2 *sh2) { u32 bank = carthw_ssf2_banks[(a >> 19) & 7] << 19; - u32 d = *(u32 *)(Pico.rom + bank + (a & 0x7fffc)); + u32 *p = sh2->p_rom; + u32 d = p[(bank + (a & 0x7fffc)) / 4]; return (d << 16) | (d >> 16); } @@ -1420,25 +1423,21 @@ out: DRC_RESTORE_SR(sh2); } -static void REGPARM(3) sh2_write8_dram0(u32 a, u32 d, SH2 *sh2) +static void REGPARM(3) sh2_write8_dram(u32 a, u32 d, SH2 *sh2) { - sh2_write8_dramN(0); -} - -static void REGPARM(3) sh2_write8_dram1(u32 a, u32 d, SH2 *sh2) -{ - sh2_write8_dramN(1); + sh2_write8_dramN(sh2->p_dram, a, d); } static void REGPARM(3) sh2_write8_sdram(u32 a, u32 d, SH2 *sh2) { u32 a1 = a & 0x3ffff; #ifdef DRC_SH2 - int t = Pico32xMem->drcblk_ram[a1 >> SH2_DRCBLK_RAM_SHIFT]; + u16 *p = sh2->p_drcblk_ram; + int t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; if (t) - sh2_drc_wcheck_ram(a, t, sh2->is_slave); + sh2_drc_wcheck_ram(a, t, sh2); #endif - Pico32xMem->sdram[a1 ^ 1] = d; + ((u8 *)sh2->p_sdram)[a1 ^ 1] = d; } static void REGPARM(3) sh2_write8_sdram_wt(u32 a, u32 d, SH2 *sh2) @@ -1457,10 +1456,10 @@ static void REGPARM(3) sh2_write8_da(u32 a, u32 d, SH2 *sh2) { u32 a1 = a & 0xfff; #ifdef DRC_SH2 - int id = sh2->is_slave; - int t = Pico32xMem->drcblk_da[id][a1 >> SH2_DRCBLK_DA_SHIFT]; + u16 *p = sh2->p_drcblk_da; + int t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; if (t) - sh2_drc_wcheck_da(a, t, id); + sh2_drc_wcheck_da(a, t, sh2); #endif sh2->data_array[a1 ^ 1] = d; } @@ -1503,42 +1502,38 @@ out: DRC_RESTORE_SR(sh2); } -static void REGPARM(3) sh2_write16_dram0(u32 a, u32 d, SH2 *sh2) +static void REGPARM(3) sh2_write16_dram(u32 a, u32 d, SH2 *sh2) { - sh2_write16_dramN(0); -} - -static void REGPARM(3) sh2_write16_dram1(u32 a, u32 d, SH2 *sh2) -{ - sh2_write16_dramN(1); + sh2_write16_dramN(sh2->p_dram, a, d); } static void REGPARM(3) sh2_write16_sdram(u32 a, u32 d, SH2 *sh2) { - u32 a1 = a & 0x3ffff; + u32 a1 = a & 0x3fffe; #ifdef DRC_SH2 - int t = Pico32xMem->drcblk_ram[a1 >> SH2_DRCBLK_RAM_SHIFT]; + u16 *p = sh2->p_drcblk_ram; + int t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; if (t) - sh2_drc_wcheck_ram(a, t, sh2->is_slave); + sh2_drc_wcheck_ram(a, t, sh2); #endif - ((u16 *)Pico32xMem->sdram)[a1 / 2] = d; + ((u16 *)sh2->p_sdram)[a1 / 2] = d; } static void REGPARM(3) sh2_write16_da(u32 a, u32 d, SH2 *sh2) { - u32 a1 = a & 0xfff; + u32 a1 = a & 0xffe; #ifdef DRC_SH2 - int id = sh2->is_slave; - int t = Pico32xMem->drcblk_da[id][a1 >> SH2_DRCBLK_DA_SHIFT]; + u16 *p = sh2->p_drcblk_da; + int t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; if (t) - sh2_drc_wcheck_da(a, t, id); + sh2_drc_wcheck_da(a, t, sh2); #endif ((u16 *)sh2->data_array)[a1 / 2] = d; } static void REGPARM(3) sh2_write16_rom(u32 a, u32 d, SH2 *sh2) { - u32 a1 = a & 0x3fffff; + u32 a1 = a & 0x3ffffe; // tweak for WWF Raw: does writes to ROM area, and it doesn't work without // allowing this. // Presumably the write goes to the CPU cache and is read back from there, @@ -1562,54 +1557,53 @@ static void REGPARM(3) sh2_write32_cs0(u32 a, u32 d, SH2 *sh2) sh2_write16_cs0(a + 2, d, sh2); } -#define sh2_write32_dramN(n) \ - u32 *pd = (u32 *)&Pico32xMem->dram[n][(a & 0x1ffff) / 2]; \ +#define sh2_write32_dramN(p, a, d) \ + u32 *pd = &((u32 *)p)[(a & 0x1ffff) / 4]; \ if (!(a & 0x20000)) { \ *pd = (d << 16) | (d >> 16); \ - return; \ - } \ - /* overwrite */ \ - u8 *pb = (u8 *)pd; \ - if (d & 0x000000ff) pb[2] = d; \ - if (d & 0x0000ff00) pb[3] = d >> 8; \ - if (d & 0x00ff0000) pb[0] = d >> 16; \ - if (d & 0xff000000) pb[1] = d >> 24; \ + } else { \ + /* overwrite */ \ + u32 v = *pd, m = 0; d = (d << 16) | (d >> 16) ; \ + if (!(d & 0x000000ff)) m |= 0x000000ff; \ + if (!(d & 0x0000ff00)) m |= 0x0000ff00; \ + if (!(d & 0x00ff0000)) m |= 0x00ff0000; \ + if (!(d & 0xff000000)) m |= 0xff000000; \ + *pd = d | (v&m); \ + } -static void REGPARM(3) sh2_write32_dram0(u32 a, u32 d, SH2 *sh2) +static void REGPARM(3) sh2_write32_dram(u32 a, u32 d, SH2 *sh2) { - sh2_write32_dramN(0); -} - -static void REGPARM(3) sh2_write32_dram1(u32 a, u32 d, SH2 *sh2) -{ - sh2_write32_dramN(1); + sh2_write32_dramN(sh2->p_dram, a, d); } static void REGPARM(3) sh2_write32_sdram(u32 a, u32 d, SH2 *sh2) { u32 a1 = a & 0x3fffc; - *(u32 *)(sh2->p_sdram + a1) = (d << 16) | (d >> 16); #ifdef DRC_SH2 - unsigned short *p = &Pico32xMem->drcblk_ram[a1 >> SH2_DRCBLK_RAM_SHIFT]; - if (p[0]) - sh2_drc_wcheck_ram(a, p[0], sh2->is_slave); - if (p[1]) - sh2_drc_wcheck_ram(a+2, p[1], sh2->is_slave); + u16 *p = sh2->p_drcblk_ram; + int t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; + if (t) + sh2_drc_wcheck_ram(a, t, sh2); + int u = p[(a1+2) >> SH2_DRCBLK_RAM_SHIFT]; + if (u) + sh2_drc_wcheck_ram(a+2, u, sh2); #endif + *(u32 *)(sh2->p_sdram + a1) = (d << 16) | (d >> 16); } static void REGPARM(3) sh2_write32_da(u32 a, u32 d, SH2 *sh2) { u32 a1 = a & 0xffc; - *((u32 *)sh2->data_array + a1/4) = (d << 16) | (d >> 16); #ifdef DRC_SH2 - int id = sh2->is_slave; - unsigned short *p = &Pico32xMem->drcblk_da[id][a1 >> SH2_DRCBLK_DA_SHIFT]; - if (p[0]) - sh2_drc_wcheck_da(a, p[0], id); - if (p[1]) - sh2_drc_wcheck_da(a+2, p[1], id); + u16 *p = sh2->p_drcblk_da; + int t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; + if (t) + sh2_drc_wcheck_da(a, t, sh2); + int u = p[(a1+2) >> SH2_DRCBLK_DA_SHIFT]; + if (u) + sh2_drc_wcheck_da(a+2, u, sh2); #endif + *((u32 *)sh2->data_array + a1/4) = (d << 16) | (d >> 16); } static void REGPARM(3) sh2_write32_rom(u32 a, u32 d, SH2 *sh2) @@ -1919,9 +1913,7 @@ void Pico32xSwapDRAM(int b) sh2_read16_map[0x04/2].addr = sh2_read16_map[0x24/2].addr = sh2_read32_map[0x04/2].addr = sh2_read32_map[0x24/2].addr = MAP_MEMORY(Pico32xMem->dram[b]); - sh2_write8_map[0x04/2] = sh2_write8_map[0x24/2] = b ? sh2_write8_dram1 : sh2_write8_dram0; - sh2_write16_map[0x04/2] = sh2_write16_map[0x24/2] = b ? sh2_write16_dram1 : sh2_write16_dram0; - sh2_write32_map[0x04/2] = sh2_write32_map[0x24/2] = b ? sh2_write32_dram1 : sh2_write32_dram0; + msh2.p_dram = ssh2.p_dram = Pico32xMem->dram[b]; // DRC conveniance ptr } static void bank_switch_rom_sh2(void) @@ -2035,10 +2027,14 @@ void PicoMemSetup32x(void) sh2_read32_map[0x02/2].mask = sh2_read32_map[0x22/2].mask = 0x3ffffc; // FIXME sh2_write16_map[0x02/2] = sh2_write16_map[0x22/2] = sh2_write16_rom; sh2_write32_map[0x02/2] = sh2_write32_map[0x22/2] = sh2_write32_rom; - // CS2 - DRAM - done by Pico32xSwapDRAM() + // CS2 - DRAM sh2_read8_map[0x04/2].mask = sh2_read8_map[0x24/2].mask = 0x01ffff; sh2_read16_map[0x04/2].mask = sh2_read16_map[0x24/2].mask = 0x01fffe; sh2_read32_map[0x04/2].mask = sh2_read32_map[0x24/2].mask = 0x01fffc; + sh2_write8_map[0x04/2] = sh2_write8_map[0x24/2] = sh2_write8_dram; + sh2_write16_map[0x04/2] = sh2_write16_map[0x24/2] = sh2_write16_dram; + sh2_write32_map[0x04/2] = sh2_write32_map[0x24/2] = sh2_write32_dram; + // CS3 - SDRAM sh2_read8_map[0x06/2].addr = sh2_read8_map[0x26/2].addr = sh2_read16_map[0x06/2].addr = sh2_read16_map[0x26/2].addr = diff --git a/pico/32x/memory_arm.S b/pico/32x/memory_arm.S new file mode 100644 index 00000000..90c86ddf --- /dev/null +++ b/pico/32x/memory_arm.S @@ -0,0 +1,305 @@ +/* + * PicoDrive 32X memory access functions, assembler version + * (C) KUB, 2018 + * + * This work is licensed under the terms of MAME license. + * See COPYING file in the top-level directory. + */ + +#include "../pico_int_o32.h" + +@ 32X bank sizes... TODO this should somehow come from an include file +.equ SH2_ROM_SHIFT, 10 @ 0x003fffff +.equ SH2_RAM_SHIFT, 14 @ 0x0003ffff +.equ SH2_DRAM_SHIFT,15 @ 0x0001ffff +.equ SH2_DA_SHIFT, 20 @ 0x00000fff + +.equ SH2_DRAM_OW, 1<<(32-SH2_DRAM_SHIFT) @ DRAM overwrite mode bit + +.text + +@ u32 a +.global sh2_read8_rom +.global sh2_read8_sdram +.global sh2_read8_da +.global sh2_read8_dram +.global sh2_read16_rom +.global sh2_read16_sdram +.global sh2_read16_da +.global sh2_read16_dram +.global sh2_read32_rom +.global sh2_read32_sdram +.global sh2_read32_da +.global sh2_read32_dram + +@ u32 a, u32 d +.global sh2_write8_sdram +.global sh2_write8_da +.global sh2_write8_dram +.global sh2_write16_sdram +.global sh2_write16_da +.global sh2_write16_dram +.global sh2_write32_sdram +.global sh2_write32_da +.global sh2_write32_dram + +sh2_read8_rom: + ldr ip, [r1, #OFS_SH2_p_rom] + eor r0, r0, #1 + lsl r0, #SH2_ROM_SHIFT + ldrb r0, [ip, r0, lsr #SH2_ROM_SHIFT] + bx lr + +sh2_read8_sdram: + ldr ip, [r1, #OFS_SH2_p_sdram] + eor r0, r0, #1 + lsl r0, #SH2_RAM_SHIFT + ldrb r0, [ip, r0, lsr #SH2_RAM_SHIFT] + bx lr + +sh2_read8_da: + ldr ip, [r1, #OFS_SH2_p_da] + eor r0, r0, #1 + lsl r0, #SH2_DA_SHIFT + ldrb r0, [ip, r0, lsr #SH2_DA_SHIFT] + bx lr + +sh2_read8_dram: + ldr ip, [r1, #OFS_SH2_p_dram] + eor r0, r0, #1 + lsl r0, #SH2_DRAM_SHIFT + ldrb r0, [ip, r0, lsr #SH2_DRAM_SHIFT] + bx lr + +sh2_read16_rom: + ldr ip, [r1, #OFS_SH2_p_rom] + lsl r0, #SH2_ROM_SHIFT + lsr r0, #SH2_ROM_SHIFT + ldrh r0, [ip, r0] + bx lr + +sh2_read16_sdram: + ldr ip, [r1, #OFS_SH2_p_sdram] + lsl r0, #SH2_RAM_SHIFT + lsr r0, #SH2_RAM_SHIFT + ldrh r0, [ip, r0] + bx lr + +sh2_read16_da: + ldr ip, [r1, #OFS_SH2_p_da] + lsl r0, #SH2_DA_SHIFT + lsr r0, #SH2_DA_SHIFT + ldrh r0, [ip, r0] + bx lr + +sh2_read16_dram: + ldr ip, [r1, #OFS_SH2_p_dram] + lsl r0, #SH2_DRAM_SHIFT + lsr r0, #SH2_DRAM_SHIFT + ldrh r0, [ip, r0] + bx lr + +sh2_read32_rom: + ldr ip, [r1, #OFS_SH2_p_rom] + lsl r0, #SH2_ROM_SHIFT + ldr r0, [ip, r0, lsr #SH2_ROM_SHIFT] + ror r0, r0, #16 + bx lr + +sh2_read32_sdram: + ldr ip, [r1, #OFS_SH2_p_sdram] + lsl r0, #SH2_RAM_SHIFT + ldr r0, [ip, r0, lsr #SH2_RAM_SHIFT] + ror r0, r0, #16 + bx lr + +sh2_read32_da: + ldr ip, [r1, #OFS_SH2_p_da] + lsl r0, #SH2_DA_SHIFT + ldr r0, [ip, r0, lsr #SH2_DA_SHIFT] + ror r0, r0, #16 + bx lr + +sh2_read32_dram: + ldr ip, [r1, #OFS_SH2_p_dram] + lsl r0, #SH2_DRAM_SHIFT + ldr r0, [ip, r0, lsr #SH2_DRAM_SHIFT] + ror r0, r0, #16 + bx lr + +sh2_write8_sdram: + @ preserve r0 and r2 for tail call + ldr ip, [r2, #OFS_SH2_p_sdram] + eor r3, r0, #1 + lsl r3, #SH2_RAM_SHIFT + strb r1, [ip, r3, lsr #SH2_RAM_SHIFT] +#ifdef DRC_SH2 + ldr ip, [r2, #OFS_SH2_p_drcblk_ram] + ldrb r1, [ip, r3, lsr #SH2_RAM_SHIFT+1] + bic r0, r0, #1 + cmp r1, #0 + bxeq lr + b sh2_drc_wcheck_ram +#else + bx lr +#endif + +sh2_write8_da: + @ preserve r0 and r2 for tail call + ldr ip, [r2, #OFS_SH2_p_da] + eor r3, r0, #1 + lsl r3, #SH2_DA_SHIFT + strb r1, [ip, r3, lsr #SH2_DA_SHIFT] +#ifdef DRC_SH2 + ldr ip, [r2, #OFS_SH2_p_drcblk_da] + ldrb r1, [ip, r3, lsr #SH2_DA_SHIFT+1] + bic r0, r0, #1 + cmp r1, #0 + bxeq lr + b sh2_drc_wcheck_da +#else + bx lr +#endif + +sh2_write8_dram: + tst r1, #0xff + ldrne ip, [r2, #OFS_SH2_p_dram] + eorne r3, r0, #1 + lslne r3, #SH2_DRAM_SHIFT + strneb r1, [ip, r3, lsr #SH2_DRAM_SHIFT] + bx lr + +sh2_write16_sdram: + @ preserve r0 and r2 for tail call + ldr ip, [r2, #OFS_SH2_p_sdram] + lsl r3, r0, #SH2_RAM_SHIFT + lsr r3, r3, #SH2_RAM_SHIFT + strh r1, [ip, r3] +#ifdef DRC_SH2 + ldr ip, [r2, #OFS_SH2_p_drcblk_ram] + ldrb r1, [ip, r3, lsr #1] + cmp r1, #0 + bxeq lr + b sh2_drc_wcheck_ram +#else + bx lr +#endif + +sh2_write16_da: + @ preserve r0 and r2 for tail call + ldr ip, [r2, #OFS_SH2_p_da] + lsl r3, r0, #SH2_DA_SHIFT + lsr r3, r3, #SH2_DA_SHIFT + strh r1, [ip, r3] +#ifdef DRC_SH2 + ldr ip, [r2, #OFS_SH2_p_drcblk_da] + ldrb r1, [ip, r3, lsr #1] + cmp r1, #0 + bxeq lr + b sh2_drc_wcheck_da +#else + bx lr +#endif + +sh2_write16_dram: + ldr ip, [r2, #OFS_SH2_p_dram] + tst r0, #SH2_DRAM_OW + lsl r3, r0, #SH2_DRAM_SHIFT + lsr r3, r3, #SH2_DRAM_SHIFT + streqh r1, [ip, r3] + bxeq lr + add ip, ip, r3 + tst r1, #0xff + strneb r1, [ip, #0] + tst r1, #0xff00 + lsrne r1, r1, #8 + strneb r1, [ip, #1] + bx lr + +sh2_write32_sdram: + @ preserve r0 and r2 for tail call + ldr ip, [r2, #OFS_SH2_p_sdram] + ror r1, r1, #16 + lsl r3, r0, #SH2_RAM_SHIFT + str r1, [ip, r3, lsr #SH2_RAM_SHIFT] +#ifdef DRC_SH2 + ldr ip, [r2, #OFS_SH2_p_drcblk_ram] + ldrb r1, [ip, r3, lsr #SH2_RAM_SHIFT+1]! + cmp r1, #0 + beq 1f + stmfd sp!, {r0, r1, r2, ip} + bl sh2_drc_wcheck_ram + ldmfd sp!, {r0, r1, r2, ip} +1: ldrb r1, [ip, #1] + cmp r1, #0 + bxeq lr + add r0, r0, #2 + b sh2_drc_wcheck_ram +#else + bx lr +#endif + +sh2_write32_da: + @ preserve r0 and r2 for tail call + ldr ip, [r2, #OFS_SH2_p_da] + ror r1, r1, #16 + lsl r3, r0, #SH2_DA_SHIFT + str r1, [ip, r3, lsr #SH2_DA_SHIFT] +#ifdef DRC_SH2 + ldr ip, [r2, #OFS_SH2_p_drcblk_da] + ldrb r1, [ip, r3, lsr #SH2_DA_SHIFT+1]! + cmp r1, #0 + beq 1f + stmfd sp!, {r0, r1, r2, ip} + bl sh2_drc_wcheck_da + ldmfd sp!, {r0, r1, r2, ip} +1: ldrb r1, [ip, #1] + cmp r1, #0 + bxeq lr + add r0, r0, #2 + b sh2_drc_wcheck_da +#else + bx lr +#endif + +sh2_write32_dram: + ldr ip, [r2, #OFS_SH2_p_dram] + tst r0, #SH2_DRAM_OW + lsl r3, r0, #SH2_DRAM_SHIFT + roreq r1, r1, #16 + streq r1, [ip, r3, lsr #SH2_DRAM_SHIFT] + bxeq lr +#if 1 + ldr r0, [ip, r3, lsr #SH2_DRAM_SHIFT] + ror r1, r1, #16 + mov r2, #0 + tst r1, #0x00ff0000 + orrne r2, r2, #0x00ff0000 + tst r1, #0xff000000 + orrne r2, r2, #0xff000000 + tst r1, #0x000000ff + orrne r2, r2, #0x000000ff + tst r1, #0x0000ff00 + orrne r2, r2, #0x0000ff00 + bic r0, r0, r2 + orr r0, r0, r1 + str r0, [ip, r3, lsr #SH2_DRAM_SHIFT] +#else + add ip, ip, r3, lsr #SH2_DRAM_SHIFT + tst r1, #0x00ff0000 + lsrne r3, r1, #16 + strneb r3, [ip, #0] + tst r1, #0xff000000 + lsrne r3, r1, #24 + strneb r3, [ip, #1] + tst r1, #0x000000ff + strneb r1, [ip, #2] + tst r1, #0x0000ff00 + lsrne r3, r1, #8 + strneb r3, [ip, #3] +#endif + bx lr + +.pool + +@ vim:filetype=armasm diff --git a/pico/32x/sh2soc.c b/pico/32x/sh2soc.c index f8e657f5..4aae2a04 100644 --- a/pico/32x/sh2soc.c +++ b/pico/32x/sh2soc.c @@ -229,7 +229,7 @@ void sh2_peripheral_reset(SH2 *sh2) // SH2 internal peripheral memhandlers // we keep them in little endian format -u32 sh2_peripheral_read8(u32 a, SH2 *sh2) +u32 REGPARM(2) sh2_peripheral_read8(u32 a, SH2 *sh2) { u8 *r = (void *)sh2->peri_regs; u32 d; @@ -242,7 +242,7 @@ u32 sh2_peripheral_read8(u32 a, SH2 *sh2) return d; } -u32 sh2_peripheral_read16(u32 a, SH2 *sh2) +u32 REGPARM(2) sh2_peripheral_read16(u32 a, SH2 *sh2) { u16 *r = (void *)sh2->peri_regs; u32 d; @@ -255,7 +255,7 @@ u32 sh2_peripheral_read16(u32 a, SH2 *sh2) return d; } -u32 sh2_peripheral_read32(u32 a, SH2 *sh2) +u32 REGPARM(2) sh2_peripheral_read32(u32 a, SH2 *sh2) { u32 d; a &= 0x1fc; diff --git a/pico/pico_int.h b/pico/pico_int.h index 4d599ce8..497649b6 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -958,9 +958,9 @@ void p32x_dreq1_trigger(void); void p32x_timers_recalc(void); void p32x_timers_do(unsigned int m68k_slice); void sh2_peripheral_reset(SH2 *sh2); -unsigned int sh2_peripheral_read8(unsigned int a, SH2 *sh2); -unsigned int sh2_peripheral_read16(unsigned int a, SH2 *sh2); -unsigned int sh2_peripheral_read32(unsigned int a, SH2 *sh2); +unsigned int REGPARM(2) sh2_peripheral_read8(unsigned int a, SH2 *sh2); +unsigned int REGPARM(2) sh2_peripheral_read16(unsigned int a, SH2 *sh2); +unsigned int REGPARM(2) sh2_peripheral_read32(unsigned int a, SH2 *sh2); void REGPARM(3) sh2_peripheral_write8(unsigned int a, unsigned int d, SH2 *sh2); void REGPARM(3) sh2_peripheral_write16(unsigned int a, unsigned int d, SH2 *sh2); void REGPARM(3) sh2_peripheral_write32(unsigned int a, unsigned int d, SH2 *sh2); diff --git a/tools/mkoffsets.sh b/tools/mkoffsets.sh index 90e65867..13e55495 100755 --- a/tools/mkoffsets.sh +++ b/tools/mkoffsets.sh @@ -87,3 +87,12 @@ get_define OFS_EST_ PicoEState HighPal ; echo "$line" >>$fn get_define OFS_PMEM_ PicoMem vram ; echo "$line" >>$fn get_define OFS_PMEM_ PicoMem vsram ; echo "$line" >>$fn + +get_define OFS_SH2_ SH2_ is_slave ; echo "$line" >>$fn +get_define OFS_SH2_ SH2_ p_bios ; echo "$line" >>$fn +get_define OFS_SH2_ SH2_ p_da ; echo "$line" >>$fn +get_define OFS_SH2_ SH2_ p_sdram ; echo "$line" >>$fn +get_define OFS_SH2_ SH2_ p_rom ; echo "$line" >>$fn +get_define OFS_SH2_ SH2_ p_dram ; echo "$line" >>$fn +get_define OFS_SH2_ SH2_ p_drcblk_da ; echo "$line" >>$fn +get_define OFS_SH2_ SH2_ p_drcblk_ram ; echo "$line" >>$fn From d760c90f3a5537231ff0aeaec308ea149f150ba8 Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 4 Apr 2019 20:29:39 +0200 Subject: [PATCH 0183/1110] added branch cache to sh2 drc to improve cross-tcache jump speed --- cpu/drc/emit_arm.c | 15 +++++- cpu/drc/emit_x86.c | 29 +++++++++++- cpu/sh2/compiler.c | 112 +++++++++++++++++++++++++++++++++++++++------ cpu/sh2/sh2.h | 3 ++ 4 files changed, 143 insertions(+), 16 deletions(-) diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index c255a8b8..3f782bb6 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -179,6 +179,7 @@ /* ldr and str */ #define EOP_LDR_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,1,0,1,rn,rd,offset_12) #define EOP_LDRB_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,1,1,1,rn,rd,offset_12) +#define EOP_STR_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,(offset_12) >= 0,0,0,rn,rd,abs(offset_12)) #define EOP_LDR_IMM( rd,rn,offset_12) EOP_C_AM2_IMM(A_COND_AL,1,0,1,rn,rd,offset_12) #define EOP_LDR_NEGIMM(rd,rn,offset_12) EOP_C_AM2_IMM(A_COND_AL,0,0,1,rn,rd,offset_12) @@ -478,6 +479,9 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_add_r_r(d, s) \ emith_add_r_r_r(d, d, s) +#define emith_add_r_r_ptr(d, s) \ + emith_add_r_r_r(d, d, s) + #define emith_sub_r_r(d, s) \ EOP_SUB_REG(A_COND_AL,0,d,d,s,A_AM1_LSL,0) @@ -684,6 +688,8 @@ static int emith_xbranch(int cond, void *target, int is_call) // misc #define emith_read_r_r_offs_c(cond, r, rs, offs) \ EOP_LDR_IMM2(cond, r, rs, offs) +#define emith_read_r_r_offs_ptr_c(cond, r, rs, offs) \ + emith_read_r_r_offs_c(cond, r, rs, offs) #define emith_read_r_r_r_c(cond, r, rs, rm) \ EOP_LDR_REG_LSL(cond, r, rs, rm, 0) #define emith_read_r_r_r(r, rs, rm) \ @@ -716,8 +722,15 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_read16_r_r_offs(r, rs, offs) \ emith_read16_r_r_offs_c(A_COND_AL, r, rs, offs) +#define emith_write_r_r_offs_c(cond, r, rs, offs) \ + EOP_STR_IMM2(cond, r, rs, offs) +#define emith_write_r_r_offs_ptr_c(cond, r, rs, offs) \ + emith_write_r_r_offs_c(cond, r, rs, offs) + +#define emith_ctx_read_c(cond, r, offs) \ + emith_read_r_r_offs_c(cond, r, CONTEXT_REG, offs) #define emith_ctx_read(r, offs) \ - emith_read_r_r_offs(r, CONTEXT_REG, offs) + emith_ctx_read_c(A_COND_AL, r, offs) #define emith_ctx_read_ptr(r, offs) \ emith_ctx_read(r, offs) diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index 816e9294..58476a94 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -122,7 +122,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; EMIT_OP_MODRM(0x01, 3, s, d) #define emith_add_r_r_ptr(d, s) do { \ - EMIT_REX_IF(1, dst, src); \ + EMIT_REX_IF(1, s, d); \ EMIT_OP_MODRM64(0x01, 3, s, d); \ } while (0) @@ -260,6 +260,21 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; } \ } while (0) +// _r_r_r_shift +#define emith_add_r_r_r_lsl(d, s1, s2, lslimm) do { \ + int tmp_ = rcache_get_tmp(); \ + emith_lsl(tmp_, s2, lslimm); \ + emith_add_r_r_r(d, s1, tmp_); \ + rcache_free_tmp(tmp_); \ +} while (0) + +#define emith_add_r_r_r_lsr(d, s1, s2, lslimm) do { \ + int tmp_ = rcache_get_tmp(); \ + emith_lsr(tmp_, s2, lslimm); \ + emith_add_r_r_r(d, s1, tmp_); \ + rcache_free_tmp(tmp_); \ +} while (0) + // _r_r_shift #define emith_or_r_r_lsl(d, s, lslimm) do { \ int tmp_ = rcache_get_tmp(); \ @@ -361,8 +376,12 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_read_r_r_offs_c(cond, r, rs, offs) \ emith_read_r_r_offs(r, rs, offs) +#define emith_read_r_r_offs_ptr_c(cond, r, rs, offs) \ + emith_read_r_r_offs_ptr(r, rs, offs) #define emith_write_r_r_offs_c(cond, r, rs, offs) \ emith_write_r_r_offs(r, rs, offs) +#define emith_write_r_r_offs_ptr_c(cond, r, rs, offs) \ + emith_write_r_r_offs_ptr(r, rs, offs) #define emith_read8_r_r_offs_c(cond, r, rs, offs) \ emith_read8_r_r_offs(r, rs, offs) #define emith_write8_r_r_offs_c(cond, r, rs, offs) \ @@ -583,9 +602,15 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_read_r_r_offs(r, rs, offs) \ emith_deref_op(0x8b, r, rs, offs) +#define emith_read_r_r_offs_ptr(r, rs, offs) \ + EMIT_REX_IF(1, r, rs); \ + emith_deref_op(0x8b, r, rs, offs) #define emith_write_r_r_offs(r, rs, offs) \ emith_deref_op(0x89, r, rs, offs) +#define emith_write_r_r_offs_ptr(r, rs, offs) \ + EMIT_REX_IF(1, r, rs); \ + emith_deref_op(0x89, r, rs, offs) // note: don't use prefixes on this #define emith_read8_r_r_offs(r, rs, offs) do { \ @@ -664,6 +689,8 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_ctx_read(r, offs) \ emith_read_r_r_offs(r, CONTEXT_REG, offs) +#define emith_ctx_read_c(cond, r, offs) \ + emith_ctx_read(r, offs) #define emith_ctx_read_ptr(r, offs) do { \ EMIT_REX_IF(1, r, CONTEXT_REG); \ diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index bfd98e2b..d54d204e 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -38,6 +38,7 @@ // features #define PROPAGATE_CONSTANTS 1 #define LINK_BRANCHES 1 +#define BRANCH_CACHE 1 #define ALIAS_REGISTERS 1 #define REMAP_REGISTER 1 @@ -57,10 +58,11 @@ // 10 - smc self-check // 100 - write trace // 200 - compare trace -// 400 - print block entry backtrace +// 400 - block entry backtraceA on exit +// 800 - state dump on exit // { #ifndef DRC_DEBUG -#define DRC_DEBUG 0 +#define DRC_DEBUG 0x800 #endif #if DRC_DEBUG @@ -159,8 +161,6 @@ static char sh2dasm_buff[64]; #define do_host_disasm(x) #endif -#if (DRC_DEBUG & (8|256|512|1024)) || defined(PDB) - #define SH2_DUMP(sh2, reason) { \ char ms = (sh2)->is_slave ? 's' : 'm'; \ printf("%csh2 %s %08x\n", ms, reason, (sh2)->pc); \ @@ -178,6 +178,8 @@ static char sh2dasm_buff[64]; (sh2)->pdb_io_csum[0], (sh2)->pdb_io_csum[1], (sh2)->state, \ (sh2)->poll_addr, (sh2)->poll_cycles, (sh2)->poll_cnt); \ } + +#if (DRC_DEBUG & (8|256|512|1024)) || defined(PDB) static SH2 csh2[2][4]; static void REGPARM(3) *sh2_drc_log_entry(void *block, SH2 *sh2, u32 sr) { @@ -631,6 +633,14 @@ static void REGPARM(1) flush_tcache(int tcid) memset(Pico32xMem->drcblk_da[tcid - 1], 0, sizeof(Pico32xMem->drcblk_da[0])); } +#if BRANCH_CACHE + if (tcid) + memset32(sh2s[tcid-1].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4); + else { + memset32(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4); + memset32(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache)/4); + } +#endif #if (DRC_DEBUG & 4) tcache_dsm_ptrs[tcid] = tcache_bases[tcid]; #endif @@ -3727,14 +3737,35 @@ static void sh2_generate_utils(void) // sh2_drc_dispatcher(void) sh2_drc_dispatcher = (void *)tcache_ptr; - sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); - emith_cmp_r_imm(sr, 0); - emith_jump_cond(DCOND_LT, sh2_drc_exit); - rcache_invalidate(); emith_ctx_read(arg0, SHR_PC * 4); +#if BRANCH_CACHE + // check if PC is in branch target cache + emith_and_r_r_imm(arg1, arg0, (ARRAY_SIZE(sh2s->branch_cache)-1)*4); + // TODO implement emith_add_r_r_r_lsl_ptr, saves one insn on 32bit ARM + emith_lsl(arg1, arg1, sizeof(void *) == 8 ? 2 : 1); + emith_add_r_r_ptr(arg1, CONTEXT_REG); + emith_read_r_r_offs(arg2, arg1, offsetof(SH2, branch_cache)); + emith_cmp_r_r(arg2, arg0); + EMITH_SJMP_START(DCOND_NE); + emith_read_r_r_offs_ptr_c(DCOND_EQ, RET_REG, arg1, offsetof(SH2, branch_cache) + sizeof(void *)); + emith_jump_reg_c(DCOND_EQ, RET_REG); + EMITH_SJMP_END(DCOND_NE); +#endif emith_ctx_read(arg1, offsetof(SH2, is_slave)); emith_add_r_r_ptr_imm(arg2, CONTEXT_REG, offsetof(SH2, drc_tmp)); emith_call(dr_lookup_block); +#if BRANCH_CACHE + // store PC and block entry ptr (in arg0) in branch target cache + emith_tst_r_r_ptr(RET_REG, RET_REG); + EMITH_SJMP_START(DCOND_EQ); + emith_ctx_read_c(DCOND_NE, arg2, SHR_PC * 4); + emith_and_r_r_imm(arg1, arg2, (ARRAY_SIZE(sh2s->branch_cache)-1)*4); + emith_lsl(arg1, arg1, sizeof(void *) == 8 ? 2 : 1); + emith_add_r_r_ptr(arg1, CONTEXT_REG); + emith_write_r_r_offs_c(DCOND_NE, arg2, arg1, offsetof(SH2, branch_cache)); + emith_write_r_r_offs_ptr_c(DCOND_NE, RET_REG, arg1, offsetof(SH2, branch_cache) + sizeof(void *)); + EMITH_SJMP_END(DCOND_EQ); +#endif emit_block_entry(); // lookup failed, call sh2_translate() emith_move_r_r_ptr(arg0, CONTEXT_REG); @@ -3904,6 +3935,15 @@ static void sh2_smc_rm_block(struct block_desc *bd, int tcache_id, u32 ram_mask) bd->addr = bd->size = bd->size_nolit = 0; bd->entry_count = 0; + +#if BRANCH_CACHE + if (tcache_id) + memset32(sh2s[tcache_id-1].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4); + else { + memset32(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4); + memset32(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache)/4); + } +#endif } /* @@ -4015,9 +4055,9 @@ int sh2_execute_drc(SH2 *sh2c, int cycles) return ret_cycles; } -#if (DRC_DEBUG & 2) -void block_stats(void) +static void block_stats(void) { +#if (DRC_DEBUG & 2) int c, b, i, total = 0; printf("block stats:\n"); @@ -4048,12 +4088,10 @@ void block_stats(void) for (b = 0; b < ARRAY_SIZE(block_tables); b++) for (i = 0; i < block_counts[b]; i++) block_tables[b][i].refcount = 0; -} -#else -#define block_stats() #endif +} -void sh2_drc_flush_all(void) +static void backtrace(void) { #if (DRC_DEBUG & 1024) int i; @@ -4064,6 +4102,52 @@ void sh2_drc_flush_all(void) for (i = 0; i < ARRAY_SIZE(csh2[1]); i++) SH2_DUMP(&csh2[1][i], "bt ssh2"); #endif +} + +static void state_dump(void) +{ +#if (DRC_DEBUG & 2048) + int i; + + SH2_DUMP(&sh2s[0], "master"); + printf("VBR msh2: %x\n", sh2s[0].vbr); + for (i = 0; i < 0x60; i++) { + printf("%08x ",p32x_sh2_read32(sh2s[0].vbr + i*4, &sh2s[0])); + if ((i+1) % 8 == 0) printf("\n"); + } + printf("stack msh2: %x\n", sh2s[0].r[15]); + for (i = -0x30; i < 0x30; i++) { + printf("%08x ",p32x_sh2_read32(sh2s[0].r[15] + i*4, &sh2s[0])); + if ((i+1) % 8 == 0) printf("\n"); + } + printf("branch cache master:\n"); + for (i = 0; i < ARRAY_SIZE(sh2s[0].branch_cache); i++) { + printf("%08x ",sh2s[0].branch_cache[i].pc); + if ((i+1) % 8 == 0) printf("\n"); + } + SH2_DUMP(&sh2s[1], "slave"); + printf("VBR ssh2: %x\n", sh2s[1].vbr); + for (i = 0; i < 0x60; i++) { + printf("%08x ",p32x_sh2_read32(sh2s[1].vbr + i*4, &sh2s[1])); + if ((i+1) % 8 == 0) printf("\n"); + } + printf("stack ssh2: %x\n", sh2s[1].r[15]); + for (i = -0x30; i < 0x30; i++) { + printf("%08x ",p32x_sh2_read32(sh2s[1].r[15] + i*4, &sh2s[1])); + if ((i+1) % 8 == 0) printf("\n"); + } + printf("branch cache slave:\n"); + for (i = 0; i < ARRAY_SIZE(sh2s[1].branch_cache); i++) { + printf("%08x ",sh2s[1].branch_cache[i].pc); + if ((i+1) % 8 == 0) printf("\n"); + } +#endif +} + +void sh2_drc_flush_all(void) +{ + backtrace(); + state_dump(); block_stats(); flush_tcache(0); flush_tcache(1); diff --git a/cpu/sh2/sh2.h b/cpu/sh2/sh2.h index a073d43f..e53bbf05 100644 --- a/cpu/sh2/sh2.h +++ b/cpu/sh2/sh2.h @@ -50,6 +50,9 @@ typedef struct SH2_ int poll_cycles; int poll_cnt; + // DRC branch cache. size must be 2^n and <=128 + struct { unsigned int pc; void *code; } branch_cache[128]; + // interpreter stuff int icount; // cycles left in current timeslice unsigned int ea; From f133766faa06ca6abde2f831c94f04bbaff0f3fc Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 5 Apr 2019 21:01:03 +0200 Subject: [PATCH 0184/1110] overhaul of translation cache and sh2 literals handling --- cpu/sh2/compiler.c | 859 +++++++++++++++++++++++++++------------------ cpu/sh2/compiler.h | 12 +- pico/32x/memory.c | 12 +- pico/pico_int.h | 6 +- 4 files changed, 525 insertions(+), 364 deletions(-) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index d54d204e..e6ce3474 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -62,7 +62,7 @@ // 800 - state dump on exit // { #ifndef DRC_DEBUG -#define DRC_DEBUG 0x800 +#define DRC_DEBUG 0 #endif #if DRC_DEBUG @@ -149,8 +149,6 @@ enum op_types { #ifdef DRC_SH2 -static int literal_disabled_frames; - #if (DRC_DEBUG & 4) static u8 *tcache_dsm_ptrs[3]; static char sh2dasm_buff[64]; @@ -261,6 +259,7 @@ static const int tcache_sizes[TCACHE_BUFFERS] = { static u8 *tcache_bases[TCACHE_BUFFERS]; static u8 *tcache_ptrs[TCACHE_BUFFERS]; +static u8 *tcache_limit[TCACHE_BUFFERS]; // ptr for code emiters static u8 *tcache_ptr; @@ -270,14 +269,21 @@ static u8 *tcache_ptr; struct block_link { u32 target_pc; void *jump; // insn address - struct block_link *next; // either in block_entry->links or + struct block_link *next; // either in block_entry->links or unresolved + struct block_link *o_next; // ...in block_entry->o_links + struct block_link *prev; + struct block_link *o_prev; + struct block_entry *target;// target block this is linked in (be->links) + int tcache_id; }; struct block_entry { u32 pc; - void *tcache_ptr; // translated block for above PC - struct block_entry *next; // next block in hash_table with same pc hash - struct block_link *links; // links to this entry + u8 *tcache_ptr; // translated block for above PC + struct block_entry *next; // chain in hash_table with same pc hash + struct block_entry *prev; + struct block_link *links; // incoming links to this entry + struct block_link *o_links;// outgoing links from this entry #if (DRC_DEBUG & 2) struct block_desc *block; #endif @@ -285,8 +291,12 @@ struct block_entry { struct block_desc { u32 addr; // block start SH2 PC address - u16 size; // ..of recompiled insns+lit. pool - u16 size_nolit; // same without literals + u32 addr_lit; // block start SH2 literal pool addr + int size; // ..of recompiled insns + int size_lit; // ..of (insns+)literal pool + u8 *tcache_ptr; // start address of block in cache + u16 active; // actively used or deactivated? + struct block_list *list; #if (DRC_DEBUG & 2) int refcount; #endif @@ -301,6 +311,7 @@ static const int block_max_counts[TCACHE_BUFFERS] = { }; static struct block_desc *block_tables[TCACHE_BUFFERS]; static int block_counts[TCACHE_BUFFERS]; +static int block_limit[TCACHE_BUFFERS]; // we have block_link_pool to avoid using mallocs static const int block_link_pool_max_counts[TCACHE_BUFFERS] = { @@ -310,7 +321,8 @@ static const int block_link_pool_max_counts[TCACHE_BUFFERS] = { }; static struct block_link *block_link_pool[TCACHE_BUFFERS]; static int block_link_pool_counts[TCACHE_BUFFERS]; -static struct block_link *unresolved_links[TCACHE_BUFFERS]; +static struct block_link **unresolved_links[TCACHE_BUFFERS]; +static struct block_link *blink_free[TCACHE_BUFFERS]; // used for invalidation static const int ram_sizes[TCACHE_BUFFERS] = { @@ -323,7 +335,11 @@ static const int ram_sizes[TCACHE_BUFFERS] = { struct block_list { struct block_desc *block; struct block_list *next; + struct block_list *prev; + struct block_list **head; + struct block_list *l_next; }; +struct block_list *blist_free; // array of pointers to block_lists for RAM and 2 data arrays // each array has len: sizeof(mem) / INVAL_PAGE_SIZE @@ -573,41 +589,59 @@ static struct block_entry *dr_get_entry(u32 pc, int is_slave, int *tcache_id) // block management static void add_to_block_list(struct block_list **blist, struct block_desc *block) { - struct block_list *added = malloc(sizeof(*added)); + struct block_list *added; + + if (blist_free) { + added = blist_free; + blist_free = added->next; + } else { + added = malloc(sizeof(*added)); + } if (!added) { elprintf(EL_ANOMALY, "drc OOM (1)"); return; } added->block = block; + added->l_next = block->list; + block->list = added; + added->head = blist; + + added->prev = NULL; + if (*blist) + (*blist)->prev = added; added->next = *blist; *blist = added; } -static void rm_from_block_list(struct block_list **blist, struct block_desc *block) +static void rm_from_block_lists(struct block_desc *block) { - struct block_list *prev = NULL, *current = *blist; - for (; current != NULL; current = current->next) { - if (current->block == block) { - if (prev == NULL) - *blist = current->next; - else - prev->next = current->next; - free(current); - return; - } - prev = current; + struct block_list *entry; + + entry = block->list; + while (entry != NULL) { + if (entry->prev != NULL) + entry->prev->next = entry->next; + else + *(entry->head) = entry->next; + if (entry->next != NULL) + entry->next->prev = entry->prev; + + entry->next = blist_free; + blist_free = entry; + + entry = entry->l_next; } - dbg(1, "can't rm block %p (%08x-%08x)", - block, block->addr, block->addr + block->size); + block->list = NULL; } static void rm_block_list(struct block_list **blist) { - struct block_list *tmp, *current = *blist; + struct block_list *next, *current = *blist; while (current != NULL) { - tmp = current; - current = current->next; - free(tmp); + next = current->next; + current->next = blist_free; + blist_free = current; + current = next; } *blist = NULL; } @@ -615,32 +649,37 @@ static void rm_block_list(struct block_list **blist) static void REGPARM(1) flush_tcache(int tcid) { int i; +#if (DRC_DEBUG & 1) + int tc_used, bl_used; - dbg(1, "tcache #%d flush! (%d/%d, bds %d/%d)", tcid, - tcache_ptrs[tcid] - tcache_bases[tcid], tcache_sizes[tcid], - block_counts[tcid], block_max_counts[tcid]); + tc_used = tcache_sizes[tcid] - (tcache_limit[tcid] - tcache_ptrs[tcid]); + bl_used = block_max_counts[tcid] - (block_limit[tcid] - block_counts[tcid]); + elprintf(EL_STATUS, "tcache #%d flush! (%d/%d, bds %d/%d)", tcid, tc_used, + tcache_sizes[tcid], bl_used, block_max_counts[tcid]); +#endif block_counts[tcid] = 0; + block_limit[tcid] = block_max_counts[tcid] - 1; block_link_pool_counts[tcid] = 0; - unresolved_links[tcid] = NULL; + blink_free[tcid] = NULL; + memset(unresolved_links[tcid], 0, sizeof(*unresolved_links[0]) * hash_table_sizes[tcid]); memset(hash_tables[tcid], 0, sizeof(*hash_tables[0]) * hash_table_sizes[tcid]); tcache_ptrs[tcid] = tcache_bases[tcid]; - if (Pico32xMem != NULL) { - if (tcid == 0) // ROM, RAM - memset(Pico32xMem->drcblk_ram, 0, - sizeof(Pico32xMem->drcblk_ram)); - else - memset(Pico32xMem->drcblk_da[tcid - 1], 0, - sizeof(Pico32xMem->drcblk_da[0])); - } -#if BRANCH_CACHE - if (tcid) - memset32(sh2s[tcid-1].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4); - else { - memset32(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4); - memset32(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache)/4); + tcache_limit[tcid] = tcache_bases[tcid] + tcache_sizes[tcid]; + if (Pico32xMem->sdram != NULL) { + if (tcid == 0) { // ROM, RAM + memset(Pico32xMem->drcblk_ram, 0, sizeof(Pico32xMem->drcblk_ram)); + memset(Pico32xMem->drclit_ram, 0, sizeof(Pico32xMem->drclit_ram)); + memset(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache)); + memset(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache)); + } else { + memset(Pico32xMem->drcblk_ram, 0, sizeof(Pico32xMem->drcblk_ram)); + memset(Pico32xMem->drclit_ram, 0, sizeof(Pico32xMem->drclit_ram)); + memset(Pico32xMem->drcblk_da[tcid - 1], 0, sizeof(Pico32xMem->drcblk_da[tcid - 1])); + memset(Pico32xMem->drclit_da[tcid - 1], 0, sizeof(Pico32xMem->drclit_da[tcid - 1])); + memset(sh2s[tcid - 1].branch_cache, -1, sizeof(sh2s[0].branch_cache)); } -#endif + } #if (DRC_DEBUG & 4) tcache_dsm_ptrs[tcid] = tcache_bases[tcid]; #endif @@ -652,69 +691,222 @@ static void REGPARM(1) flush_tcache(int tcid) static void add_to_hashlist(struct block_entry *be, int tcache_id) { u32 tcmask = hash_table_sizes[tcache_id] - 1; + struct block_entry **head = &HASH_FUNC(hash_tables[tcache_id], be->pc, tcmask); - be->next = HASH_FUNC(hash_tables[tcache_id], be->pc, tcmask); - HASH_FUNC(hash_tables[tcache_id], be->pc, tcmask) = be; - -#if (DRC_DEBUG & 2) - if (be->next != NULL) { - printf(" %08x: hash collision with %08x\n", - be->pc, be->next->pc); - hash_collisions++; - } -#endif + be->prev = NULL; + if (*head) + (*head)->prev = be; + be->next = *head; + *head = be; } static void rm_from_hashlist(struct block_entry *be, int tcache_id) { u32 tcmask = hash_table_sizes[tcache_id] - 1; - struct block_entry *cur, *prev; - - cur = HASH_FUNC(hash_tables[tcache_id], be->pc, tcmask); - if (cur == NULL) - goto missing; + struct block_entry **head = &HASH_FUNC(hash_tables[tcache_id], be->pc, tcmask); - if (be == cur) { // first - HASH_FUNC(hash_tables[tcache_id], be->pc, tcmask) = be->next; - return; +#if DRC_DEBUG & 1 + struct block_entry *current = be; + while (current->prev != NULL) + current = current->prev; + if (current != *head) + dbg(1, "rm_from_hashlist @%p: be %p %08x missing?", head, be, be->pc); +#endif + + if (be->prev != NULL) + be->prev->next = be->next; + else + *head = be->next; + if (be->next != NULL) + be->next->prev = be->prev; +} + + +static void add_to_hashlist_unresolved(struct block_link *bl, int tcache_id) +{ + u32 tcmask = hash_table_sizes[tcache_id] - 1; + struct block_link **head = &HASH_FUNC(unresolved_links[tcache_id], bl->target_pc, tcmask); + + bl->target = NULL; // marker for not resolved + bl->prev = NULL; + if (*head) + (*head)->prev = bl; + bl->next = *head; + *head = bl; +} + +static void rm_from_hashlist_unresolved(struct block_link *bl, int tcache_id) +{ + u32 tcmask = hash_table_sizes[tcache_id] - 1; + struct block_link **head = &HASH_FUNC(unresolved_links[tcache_id], bl->target_pc, tcmask); + +#if DRC_DEBUG & 1 + struct block_link *current = bl; + while (current->prev != NULL) + current = current->prev; + if (current != *head) + dbg(1, "rm_from_hashlist unresolved @%p: bl %p %p %08x missing?", head, bl, bl->target, bl->target_pc); +#endif + + if (bl->prev != NULL) + bl->prev->next = bl->next; + else + *head = bl->next; + if (bl->next != NULL) + bl->next->prev = bl->prev; +} + +static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 nolit); +static void dr_free_oldest_block(int tcache_id) +{ + struct block_desc *bd; + + if (block_limit[tcache_id] >= block_max_counts[tcache_id]) { + // block desc wrap around + block_limit[tcache_id] = 0; + } + bd = &block_tables[tcache_id][block_limit[tcache_id]]; + + if (bd->tcache_ptr && bd->tcache_ptr < tcache_ptrs[tcache_id]) { + // cache wrap around + tcache_ptrs[tcache_id] = bd->tcache_ptr; } - for (prev = cur, cur = cur->next; cur != NULL; cur = cur->next) { - if (cur == be) { - prev->next = cur->next; - return; + if (bd->addr && bd->entry_count) + sh2_smc_rm_block_entry(bd, tcache_id, 0); + + block_limit[tcache_id]++; + if (block_limit[tcache_id] >= block_max_counts[tcache_id]) + block_limit[tcache_id] = 0; + bd = &block_tables[tcache_id][block_limit[tcache_id]]; + if (bd->tcache_ptr >= tcache_ptrs[tcache_id]) + tcache_limit[tcache_id] = bd->tcache_ptr; + else + tcache_limit[tcache_id] = tcache_bases[tcache_id] + tcache_sizes[tcache_id]; +} + +static u8 *dr_prepare_cache(int tcache_id, int insn_count) +{ +#if BRANCH_CACHE + u8 *limit = tcache_limit[tcache_id]; +#endif + + // if no block desc available + if (block_counts[tcache_id] == block_limit[tcache_id]) + dr_free_oldest_block(tcache_id); + + // while not enough cache space left (limit - tcache_ptr < max space needed) + while (tcache_limit[tcache_id] - tcache_ptrs[tcache_id] < insn_count * 128) + dr_free_oldest_block(tcache_id); + +#if BRANCH_CACHE + if (limit != tcache_limit[tcache_id]) { + if (tcache_id) + memset32(sh2s[tcache_id-1].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4); + else { + memset32(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4); + memset32(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache)/4); } } - -missing: - dbg(1, "rm_from_hashlist: be %p %08x missing?", be, be->pc); +#endif + return (u8 *)tcache_ptrs[tcache_id]; } -static void unregister_links(struct block_entry *be, int tcache_id) +static void dr_mark_memory(int mark, struct block_desc *block, int tcache_id, u32 nolit) { - struct block_link *bl_unresolved = unresolved_links[tcache_id]; - struct block_link *bl, *bl_next; + u8 *drc_ram_blk = NULL, *lit_ram_blk = NULL; + u32 addr, end, mask = 0, shift = 0, idx; - for (bl = be->links; bl != NULL; ) { - bl_next = bl->next; - bl->next = bl_unresolved; - bl_unresolved = bl; - bl = bl_next; + // mark memory blocks as containing compiled code + if ((block->addr & 0xc7fc0000) == 0x06000000 + || (block->addr & 0xfffff000) == 0xc0000000) + { + if (tcache_id != 0) { + // data array + drc_ram_blk = Pico32xMem->drcblk_da[tcache_id-1]; + lit_ram_blk = Pico32xMem->drclit_da[tcache_id-1]; + shift = SH2_DRCBLK_DA_SHIFT; + } + else { + // SDRAM + drc_ram_blk = Pico32xMem->drcblk_ram; + lit_ram_blk = Pico32xMem->drclit_ram; + shift = SH2_DRCBLK_RAM_SHIFT; + } + mask = ram_sizes[tcache_id] - 1; + + // mark recompiled insns + addr = block->addr & ~((1 << shift) - 1); + end = block->addr + block->size; + for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift)) + drc_ram_blk[idx++] += mark; + + // mark literal pool + if (addr < (block->addr_lit & ~((1 << shift) - 1))) + addr = block->addr_lit & ~((1 << shift) - 1); + end = block->addr_lit + block->size_lit; + for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift)) + drc_ram_blk[idx++] += mark; + + // mark for literals disabled + if (nolit) { + addr = nolit & ~((1 << shift) - 1); + end = block->addr_lit + block->size_lit; + for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift)) + lit_ram_blk[idx++] = 1; + } + + if (mark < 0) + rm_from_block_lists(block); + else { + // add to invalidation lookup lists + addr = block->addr & ~(INVAL_PAGE_SIZE - 1); + end = block->addr + block->size; + for (idx = (addr & mask) / INVAL_PAGE_SIZE; addr < end; addr += INVAL_PAGE_SIZE) + add_to_block_list(&inval_lookup[tcache_id][idx++], block); + + if (addr < (block->addr_lit & ~(INVAL_PAGE_SIZE - 1))) + addr = block->addr_lit & ~(INVAL_PAGE_SIZE - 1); + end = block->addr_lit + block->size_lit; + for (idx = (addr & mask) / INVAL_PAGE_SIZE; addr < end; addr += INVAL_PAGE_SIZE) + add_to_block_list(&inval_lookup[tcache_id][idx++], block); + } } - be->links = NULL; - unresolved_links[tcache_id] = bl_unresolved; } -// unlike sh2_smc_rm_block, the block stays and can still be accessed -// by other already directly linked blocks, just not preferred -static void kill_block_entry(struct block_entry *be, int tcache_id) +static u32 dr_check_nolit(u32 start, u32 end, int tcache_id) { - rm_from_hashlist(be, tcache_id); - unregister_links(be, tcache_id); + u8 *lit_ram_blk = NULL; + u32 mask = 0, shift = 0, addr, idx; + + if ((start & 0xc7fc0000) == 0x06000000 + || (start & 0xfffff000) == 0xc0000000) + { + if (tcache_id != 0) { + // data array + lit_ram_blk = Pico32xMem->drclit_da[tcache_id-1]; + shift = SH2_DRCBLK_DA_SHIFT; + } + else { + // SDRAM + lit_ram_blk = Pico32xMem->drclit_ram; + shift = SH2_DRCBLK_RAM_SHIFT; + } + mask = ram_sizes[tcache_id] - 1; + + addr = start & ~((1 << shift) - 1); + for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift)) + if (lit_ram_blk[idx++]) + break; + + return (addr < start ? start : addr > end ? end : addr); + } + + return end; } -static struct block_desc *dr_add_block(u32 addr, u16 size_lit, - u16 size_nolit, int is_slave, int *blk_id) +static struct block_desc *dr_add_block(u32 addr, int size, + u32 addr_lit, int size_lit, int is_slave, int *blk_id) { struct block_entry *be; struct block_desc *bd; @@ -723,26 +915,27 @@ static struct block_desc *dr_add_block(u32 addr, u16 size_lit, // do a lookup to get tcache_id and override check be = dr_get_entry(addr, is_slave, &tcache_id); - if (be != NULL) { - dbg(1, "block override for %08x, was %p", addr, be->tcache_ptr); - kill_block_entry(be, tcache_id); - } + if (be != NULL) + dbg(1, "block override for %08x", addr); bcount = &block_counts[tcache_id]; - if (*bcount >= block_max_counts[tcache_id]) { + if (*bcount == block_limit[tcache_id]) { dbg(1, "bd overflow for tcache %d", tcache_id); return NULL; } bd = &block_tables[tcache_id][*bcount]; bd->addr = addr; - bd->size = size_lit; - bd->size_nolit = size_nolit; + bd->size = size; + bd->addr_lit = addr_lit; + bd->size_lit = size_lit; + bd->tcache_ptr = tcache_ptr; + bd->active = 1; bd->entry_count = 1; bd->entryp[0].pc = addr; bd->entryp[0].tcache_ptr = tcache_ptr; - bd->entryp[0].links = NULL; + bd->entryp[0].links = bd->entryp[0].o_links = NULL; #if (DRC_DEBUG & 2) bd->entryp[0].block = bd; bd->refcount = 0; @@ -751,6 +944,8 @@ static struct block_desc *dr_add_block(u32 addr, u16 size_lit, *blk_id = *bcount; (*bcount)++; + if (*bcount >= block_max_counts[tcache_id]) + *bcount = 0; return bd; } @@ -777,43 +972,47 @@ static void *dr_failure(void) exit(1); } -static void *dr_prepare_ext_branch(u32 pc, int is_slave, int tcache_id) +static void *dr_prepare_ext_branch(struct block_entry *owner, u32 pc, int is_slave, int tcache_id) { #if LINK_BRANCHES struct block_link *bl = block_link_pool[tcache_id]; int cnt = block_link_pool_counts[tcache_id]; struct block_entry *be = NULL; int target_tcache_id; - int i; be = dr_get_entry(pc, is_slave, &target_tcache_id); - if (target_tcache_id != tcache_id) + if (target_tcache_id && target_tcache_id != tcache_id) return sh2_drc_dispatcher; - // if pool has been freed, reuse - for (i = cnt - 1; i >= 0; i--) - if (bl[i].target_pc != 0) - break; - cnt = i + 1; - if (cnt >= block_link_pool_max_counts[tcache_id]) { + if (blink_free[tcache_id] != NULL) { + bl = blink_free[tcache_id]; + blink_free[tcache_id] = bl->next; + } else if (cnt >= block_link_pool_max_counts[tcache_id]) { dbg(1, "bl overflow for tcache %d", tcache_id); return sh2_drc_dispatcher; + } else { + bl += cnt; + block_link_pool_counts[tcache_id] = cnt+1; } - bl += cnt; - block_link_pool_counts[tcache_id]++; + bl->tcache_id = tcache_id; bl->target_pc = pc; bl->jump = tcache_ptr; + bl->o_next = owner->o_links; + owner->o_links = bl; if (be != NULL) { - dbg(2, "- early link from %p to pc %08x", bl->jump, pc); + dbg(2, "- early link from %p to pc %08x entry %p", bl->jump, pc, be->tcache_ptr); + bl->target = be; + bl->prev = NULL; + if (be->links) + be->links->prev = bl; bl->next = be->links; be->links = bl; return be->tcache_ptr; } else { - bl->next = unresolved_links[tcache_id]; - unresolved_links[tcache_id] = bl; + add_to_hashlist_unresolved(bl, tcache_id); return sh2_drc_dispatcher; } #else @@ -824,30 +1023,28 @@ static void *dr_prepare_ext_branch(u32 pc, int is_slave, int tcache_id) static void dr_link_blocks(struct block_entry *be, int tcache_id) { #if LINK_BRANCHES - struct block_link *first = unresolved_links[tcache_id]; - struct block_link *bl, *prev, *tmp; + u32 tcmask = hash_table_sizes[tcache_id] - 1; u32 pc = be->pc; + struct block_link **head = &HASH_FUNC(unresolved_links[tcache_id], pc, tcmask); + struct block_link *bl = *head, *next; - for (bl = prev = first; bl != NULL; ) { + while (bl != NULL) { + next = bl->next; if (bl->target_pc == pc) { - dbg(2, "- link from %p to pc %08x", bl->jump, pc); - emith_jump_patch(bl->jump, tcache_ptr); - + dbg(2, "- link from %p to pc %08x entry %p", bl->jump, pc, be->tcache_ptr); // move bl from unresolved_links to block_entry - tmp = bl->next; + rm_from_hashlist_unresolved(bl, tcache_id); + + emith_jump_patch(bl->jump, be->tcache_ptr); + bl->target = be; + bl->prev = NULL; + if (be->links) + be->links->prev = bl; bl->next = be->links; be->links = bl; - - if (bl == first) - first = prev = bl = tmp; - else - prev->next = bl = tmp; - continue; } - prev = bl; - bl = bl->next; + bl = next; } - unresolved_links[tcache_id] = first; // could sync arm caches here, but that's unnecessary #endif @@ -1954,7 +2151,7 @@ static int emit_memhandler_read_rr(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rs, u32 off emith_move_r_imm(hr2, val); } else { emit_move_r_imm32(rd, val); - hr2 = rcache_get_reg(rd, RC_GR_READ, NULL); + hr2 = rcache_get_reg(rd, RC_GR_RMW, NULL); } if ((size & MF_POSTINCR) && gconst_get(rs, &val)) gconst_new(rs, val + (1 << (size & MF_SIZEMASK))); @@ -2202,14 +2399,14 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) u32 test_irq:1; u32 pending_branch_direct:1; u32 pending_branch_indirect:1; - u32 literals_disabled:1; } drcf = { 0, }; // PC of current, first, last SH2 insn u32 pc, base_pc, end_pc; - u32 end_literals; + u32 base_literals, end_literals; void *block_entry_ptr; struct block_desc *block; + struct block_entry *entry; u16 *dr_pc_base; struct op_data *opd; int blkid_main = 0; @@ -2221,7 +2418,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) int op; base_pc = sh2->pc; - drcf.literals_disabled = literal_disabled_frames != 0; // get base/validate PC dr_pc_base = dr_get_pc_base(base_pc, sh2->is_slave); @@ -2231,31 +2427,11 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) exit(1); } - tcache_ptr = tcache_ptrs[tcache_id]; - - // predict tcache overflow - u = tcache_ptr - tcache_bases[tcache_id]; - if (u > tcache_sizes[tcache_id] - MAX_BLOCK_SIZE) { - dbg(1, "tcache %d overflow", tcache_id); - return NULL; - } - // initial passes to disassemble and analyze the block - scan_block(base_pc, sh2->is_slave, op_flags, &end_pc, &end_literals); - - if (drcf.literals_disabled) - end_literals = end_pc; - - block = dr_add_block(base_pc, end_literals - base_pc, - end_pc - base_pc, sh2->is_slave, &blkid_main); - if (block == NULL) - return NULL; - - block_entry_ptr = tcache_ptr; - dbg(2, "== %csh2 block #%d,%d %08x-%08x -> %p", sh2->is_slave ? 's' : 'm', - tcache_id, blkid_main, base_pc, end_pc, block_entry_ptr); - - dr_link_blocks(&block->entryp[0], tcache_id); + scan_block(base_pc, sh2->is_slave, op_flags, &end_pc, &base_literals, &end_literals); + end_literals = dr_check_nolit(base_literals, end_literals, tcache_id); + if (base_literals == end_literals) // map empty lit section to end of code + base_literals = end_literals = end_pc; // collect branch_targets that don't land on delay slots for (pc = base_pc, i = 0; pc < end_pc; i++, pc += 2) { @@ -2272,6 +2448,20 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) memset(branch_target_ptr, 0, sizeof(branch_target_ptr[0]) * branch_target_count); } + tcache_ptr = dr_prepare_cache(tcache_id, (end_pc - base_pc) / 2); +#if (DRC_DEBUG & 4) + tcache_dsm_ptrs[tcache_id] = tcache_ptr; +#endif + + block = dr_add_block(base_pc, end_pc - base_pc, base_literals, + end_literals - base_literals, sh2->is_slave, &blkid_main); + if (block == NULL) + return NULL; + + block_entry_ptr = tcache_ptr; + dbg(2, "== %csh2 block #%d,%d %08x-%08x -> %p", sh2->is_slave ? 's' : 'm', + tcache_id, blkid_main, base_pc, end_pc, block_entry_ptr); + // clear stale state after compile errors rcache_invalidate(); @@ -2307,41 +2497,36 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) // make block entry v = block->entry_count; + entry = &block->entryp[v]; if (v < ARRAY_SIZE(block->entryp)) { - struct block_entry *be_old; - - block->entryp[v].pc = pc; - block->entryp[v].tcache_ptr = tcache_ptr; - block->entryp[v].links = NULL; + entry = &block->entryp[v]; + entry->pc = pc; + entry->tcache_ptr = tcache_ptr; + entry->links = entry->o_links = NULL; #if (DRC_DEBUG & 2) - block->entryp[v].block = block; + entry->block = block; #endif - be_old = dr_get_entry(pc, sh2->is_slave, &tcache_id); - if (be_old != NULL) { - dbg(1, "entry override for %08x, was %p", pc, be_old->tcache_ptr); - kill_block_entry(be_old, tcache_id); - } - - add_to_hashlist(&block->entryp[v], tcache_id); + add_to_hashlist(entry, tcache_id); block->entry_count++; dbg(2, "-- %csh2 block #%d,%d entry %08x -> %p", sh2->is_slave ? 's' : 'm', tcache_id, blkid_main, pc, tcache_ptr); - - // since we made a block entry, link any other blocks - // that jump to current pc - dr_link_blocks(&block->entryp[v], tcache_id); } else { dbg(1, "too many entryp for block #%d,%d pc=%08x", tcache_id, blkid_main, pc); } - - do_host_disasm(tcache_id); + } else { + entry = block->entryp; } + // since we made a block entry, link any other blocks that jump to it + dr_link_blocks(entry, tcache_id); + if (!tcache_id) // can safely link from cpu-local to global memory + dr_link_blocks(entry, sh2->is_slave?2:1); + v = find_in_array(branch_target_pc, branch_target_count, pc); if (v >= 0) branch_target_ptr[v] = tcache_ptr; @@ -2370,8 +2555,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); emith_cmp_r_imm(sr, 0); emith_jump_cond(DCOND_LE, sh2_drc_exit); - do_host_disasm(tcache_id); - rcache_unlock_all(); #if (DRC_DEBUG & (8|256|512|1024)) sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); @@ -2389,6 +2572,9 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_restore_caller_regs(tmp); rcache_invalidate_tmp(); #endif + + do_host_disasm(tcache_id); + rcache_unlock_all(); } #ifdef DRC_CMP @@ -2556,8 +2742,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) goto end_op; case OP_UNDEFINED: - elprintf_sh2(sh2, EL_ANOMALY, - "drc: illegal op %04x @ %08x", op, pc - 2); + elprintf_sh2(sh2, EL_ANOMALY, "drc: unhandled op %04x @ %08x", op, pc-2); opd->imm = (op_flags[i] & OF_B_IN_DS) ? 6 : 4; // fallthrough case OP_TRAPA: // TRAPA #imm 11000011iiiiiiii @@ -3525,7 +3710,7 @@ end_op: emit_move_r_imm32(SHR_PC, target_pc); rcache_clean(); - target = dr_prepare_ext_branch(target_pc, sh2->is_slave, tcache_id); + target = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id); if (target == NULL) return NULL; } @@ -3571,7 +3756,7 @@ end_op: emit_move_r_imm32(SHR_PC, pc); rcache_flush(); - target = dr_prepare_ext_branch(pc, sh2->is_slave, tcache_id); + target = dr_prepare_ext_branch(block->entryp, pc, sh2->is_slave, tcache_id); if (target == NULL) return NULL; emith_jump_patchable(target); @@ -3594,45 +3779,7 @@ end_op: emith_jump_patch(branch_patch_ptr[i], target); } - // mark memory blocks as containing compiled code - // override any overlay blocks as they become unreachable anyway - if ((block->addr & 0xc7fc0000) == 0x06000000 - || (block->addr & 0xfffff000) == 0xc0000000) - { - u16 *drc_ram_blk = NULL; - u32 addr, mask = 0, shift = 0; - - if (tcache_id != 0) { - // data array, BIOS - drc_ram_blk = Pico32xMem->drcblk_da[sh2->is_slave]; - shift = SH2_DRCBLK_DA_SHIFT; - mask = 0xfff; - } - else { - // SDRAM - drc_ram_blk = Pico32xMem->drcblk_ram; - shift = SH2_DRCBLK_RAM_SHIFT; - mask = 0x3ffff; - } - - // mark recompiled insns - drc_ram_blk[(base_pc & mask) >> shift] = 1; - for (pc = base_pc; pc < end_pc; pc += 2) - drc_ram_blk[(pc & mask) >> shift] = 1; - - // mark literals - for (i = 0; i < literal_addr_count; i++) { - u = literal_addr[i]; - drc_ram_blk[(u & mask) >> shift] = 1; - } - - // add to invalidation lookup lists - addr = base_pc & ~(INVAL_PAGE_SIZE - 1); - for (; addr < end_literals; addr += INVAL_PAGE_SIZE) { - i = (addr & mask) / INVAL_PAGE_SIZE; - add_to_block_list(&inval_lookup[tcache_id][i], block); - } - } + dr_mark_memory(1, block, tcache_id, 0); tcache_ptrs[tcache_id] = tcache_ptr; @@ -3640,10 +3787,8 @@ end_op: do_host_disasm(tcache_id); - if (drcf.literals_disabled && literal_addr_count) - dbg(1, "literals_disabled && literal_addr_count?"); - dbg(2, " block #%d,%d tcache %d/%d, insns %d -> %d %.3f", - tcache_id, blkid_main, + dbg(2, " block #%d,%d -> %p tcache %d/%d, insns %d -> %d %.3f", + tcache_id, blkid_main, tcache_ptr, tcache_ptr - tcache_bases[tcache_id], tcache_sizes[tcache_id], insns_compiled, host_insn_count, (float)host_insn_count / insns_compiled); if ((sh2->pc & 0xc6000000) == 0x02000000) { // ROM @@ -3657,7 +3802,7 @@ end_op: printf("~~~\n"); */ -#if (DRC_DEBUG & 4) +#if (DRC_DEBUG) fflush(stdout); #endif @@ -3772,13 +3917,6 @@ static void sh2_generate_utils(void) emith_ctx_read(arg1, offsetof(SH2, drc_tmp)); // tcache_id emith_call(sh2_translate); emit_block_entry(); - // sh2_translate() failed, flush cache and retry - emith_ctx_read(arg0, offsetof(SH2, drc_tmp)); - emith_call(flush_tcache); - emith_move_r_r_ptr(arg0, CONTEXT_REG); - emith_ctx_read(arg1, offsetof(SH2, drc_tmp)); - emith_call(sh2_translate); - emit_block_entry(); // XXX: can't translate, fail emith_call(dr_failure); @@ -3891,51 +4029,105 @@ static void sh2_generate_utils(void) #endif } -static void sh2_smc_rm_block(struct block_desc *bd, int tcache_id, u32 ram_mask) +static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 nolit) { - u32 i, addr, end_addr; - void *tmp; + struct block_link *bl; + u32 i; - dbg(2, " killing block %08x-%08x-%08x, blkid %d,%d", - bd->addr, bd->addr + bd->size_nolit, bd->addr + bd->size, + dbg(2, " killing entry %08x-%08x,%08x-%08x, blkid %d,%d", + bd->addr, bd->addr + bd->size, bd->addr_lit, bd->addr_lit + bd->size_lit, tcache_id, bd - block_tables[tcache_id]); if (bd->addr == 0 || bd->entry_count == 0) { dbg(1, " killing dead block!? %08x", bd->addr); return; } - // remove from inval_lookup - addr = bd->addr & ~(INVAL_PAGE_SIZE - 1); - end_addr = bd->addr + bd->size; - for (; addr < end_addr; addr += INVAL_PAGE_SIZE) { - i = (addr & ram_mask) / INVAL_PAGE_SIZE; - rm_from_block_list(&inval_lookup[tcache_id][i], bd); - } - - tmp = tcache_ptr; - - // remove from hash table, make incoming links unresolved - // XXX: maybe patch branches w/flush instead? + // remove from hash table, make incoming links unresolved, revoke outgoing links for (i = 0; i < bd->entry_count; i++) { - rm_from_hashlist(&bd->entryp[i], tcache_id); + if (bd->active) + rm_from_hashlist(&bd->entryp[i], tcache_id); - // since we never reuse tcache space of dead blocks, - // insert jump to dispatcher for blocks that are linked to this - tcache_ptr = bd->entryp[i].tcache_ptr; - emit_move_r_imm32(SHR_PC, bd->entryp[i].pc); - rcache_flush(); - emith_jump(sh2_drc_dispatcher); + for (bl = bd->entryp[i].o_links; bl != NULL; ) { + struct block_link *bl_next = bl->o_next; + if (bl->target) { + if (bl->prev) + bl->prev->next = bl->next; + else + bl->target->links = bl->next; + if (bl->next) + bl->next->prev = bl->prev; + bl->target = NULL; + } else if (bd->active) + rm_from_hashlist_unresolved(bl, tcache_id); + // free bl + bl->jump = NULL; + bl->next = blink_free[bl->tcache_id]; + blink_free[bl->tcache_id] = bl; + bl = bl_next; + } + bd->entryp[i].o_links = NULL; - host_instructions_updated(bd->entryp[i].tcache_ptr, tcache_ptr); + for (bl = bd->entryp[i].links; bl != NULL; ) { + struct block_link *bl_next = bl->next; + dbg(2, "- unlink from %p to pc %08x", bl->jump, bl->target_pc); + emith_jump_patch(bl->jump, sh2_drc_dispatcher); + // update cpu caches since the previous jump target doesn't exist anymore + host_instructions_updated(bl->jump, bl->jump+4); - unregister_links(&bd->entryp[i], tcache_id); + add_to_hashlist_unresolved(bl, tcache_id); + bl = bl_next; + } + bd->entryp[i].links = NULL; } - tcache_ptr = tmp; + if (bd->active) + dr_mark_memory(-1, bd, tcache_id, nolit); - bd->addr = bd->size = bd->size_nolit = 0; + bd->addr = bd->size = bd->addr_lit = bd->size_lit = 0; bd->entry_count = 0; + bd->active = 0; + rm_from_block_lists(bd); +} +static void sh2_smc_rm_blocks(u32 a, int tcache_id, u32 shift) +{ + struct block_list **blist, *entry, *next; + u32 mask = ram_sizes[tcache_id] - 1; + u32 wtmask = ~0x20000000; // writethrough area mask + u32 start_addr, end_addr; + u32 start_lit, end_lit; + struct block_desc *block; +#if (DRC_DEBUG & 2) + int removed = 0; +#endif + + // need to check cached and writethrough area + a &= wtmask; + blist = &inval_lookup[tcache_id][(a & mask) / INVAL_PAGE_SIZE]; + entry = *blist; + while (entry != NULL) { + next = entry->next; + block = entry->block; + start_addr = block->addr & wtmask; + end_addr = start_addr + block->size; + start_lit = block->addr_lit & wtmask; + end_lit = start_lit + block->size_lit; + if ((start_addr <= a && a < end_addr) || + (start_lit <= a && a < end_lit)) + { + dbg(2, "smc remove @%08x", a); + end_addr = (start_lit <= a && block->size_lit ? a : 0); + sh2_smc_rm_block_entry(block, tcache_id, end_addr); +#if (DRC_DEBUG & 2) + removed = 1; +#endif + } + entry = next; + } +#if (DRC_DEBUG & 2) + if (!removed) + dbg(2, "rm_blocks called @%08x, no work?", a); +#endif #if BRANCH_CACHE if (tcache_id) memset32(sh2s[tcache_id-1].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4); @@ -3946,93 +4138,17 @@ static void sh2_smc_rm_block(struct block_desc *bd, int tcache_id, u32 ram_mask) #endif } -/* -04205:243: == msh2 block #0,200 060017a8-060017f0 -> 0x27cb9c - 060017a8 d11c MOV.L @($70,PC),R1 ; @$0600181c - -04230:261: msh2 xsh w32 [260017a8] d225e304 -04230:261: msh2 smc check @260017a8 -04239:226: = ssh2 enter 060017a8 0x27cb9c, c=173 -*/ -static void sh2_smc_rm_blocks(u32 a, u16 *drc_ram_blk, int tcache_id, u32 shift, u32 mask) -{ - struct block_list **blist = NULL, *entry; - struct block_desc *block; - u32 start_addr, end_addr, taddr, i; - u32 from = ~0, to = 0; - - // ignore cache-through - a &= ~0x20000000; - - blist = &inval_lookup[tcache_id][(a & mask) / INVAL_PAGE_SIZE]; - entry = *blist; - while (entry != NULL) { - block = entry->block; - start_addr = block->addr & ~0x20000000; - end_addr = start_addr + block->size; - if (start_addr <= a && a < end_addr) { - // get addr range that includes all removed blocks - if (from > start_addr) - from = start_addr; - if (to < end_addr) - to = end_addr; - - if (a >= start_addr + block->size_nolit) - literal_disabled_frames = 3; - sh2_smc_rm_block(block, tcache_id, mask); - - // entry lost, restart search - entry = *blist; - continue; - } - entry = entry->next; - } - - if (from >= to) - return; - - // update range around a to match latest state - from &= ~(INVAL_PAGE_SIZE - 1); - to |= (INVAL_PAGE_SIZE - 1); - for (taddr = from; taddr < to; taddr += INVAL_PAGE_SIZE) { - i = (taddr & mask) / INVAL_PAGE_SIZE; - entry = inval_lookup[tcache_id][i]; - - for (; entry != NULL; entry = entry->next) { - block = entry->block; - - start_addr = block->addr & ~0x20000000; - if (start_addr > a) { - if (to > start_addr) - to = start_addr; - } - else { - end_addr = start_addr + block->size; - if (from < end_addr) - from = end_addr; - } - } - } - - // clear code marks - if (from < to) { - u16 *p = drc_ram_blk + ((from & mask) >> shift); - memset(p, 0, (to - from) >> (shift - 1)); - } -} - void sh2_drc_wcheck_ram(unsigned int a, int val, SH2 *sh2) { - dbg(2, "%csh2 smc check @%08x", sh2->is_slave ? 's' : 'm', a); - sh2_smc_rm_blocks(a, Pico32xMem->drcblk_ram, 0, SH2_DRCBLK_RAM_SHIFT, 0x3ffff); + dbg(2, "%csh2 smc check @%08x v=%d", sh2->is_slave ? 's' : 'm', a, val); + sh2_smc_rm_blocks(a, 0, SH2_DRCBLK_RAM_SHIFT); } void sh2_drc_wcheck_da(unsigned int a, int val, SH2 *sh2) { int cpuid = sh2->is_slave; - dbg(2, "%csh2 smc check @%08x", cpuid ? 's' : 'm', a); - sh2_smc_rm_blocks(a, Pico32xMem->drcblk_da[cpuid], - 1 + cpuid, SH2_DRCBLK_DA_SHIFT, 0xfff); + dbg(2, "%csh2 smc check @%08x v=%d", cpuid ? 's' : 'm', a, val); + sh2_smc_rm_blocks(a, 1 + cpuid, SH2_DRCBLK_DA_SHIFT); } int sh2_execute_drc(SH2 *sh2c, int cycles) @@ -4061,10 +4177,14 @@ static void block_stats(void) int c, b, i, total = 0; printf("block stats:\n"); - for (b = 0; b < ARRAY_SIZE(block_tables); b++) + for (b = 0; b < ARRAY_SIZE(block_tables); b++) { for (i = 0; i < block_counts[b]; i++) if (block_tables[b][i].addr != 0) total += block_tables[b][i].refcount; + for (i = block_limit[b]; i < block_max_counts[b]; i++) + if (block_tables[b][i].addr != 0) + total += block_tables[b][i].refcount; + } for (c = 0; c < 10; c++) { struct block_desc *blk, *maxb = NULL; @@ -4077,17 +4197,27 @@ static void block_stats(void) maxb = blk; } } + for (i = block_limit[b]; i < block_max_counts[b]; i++) { + blk = &block_tables[b][i]; + if (blk->addr != 0 && blk->refcount > max) { + max = blk->refcount; + maxb = blk; + } + } } if (maxb == NULL) break; - printf("%08x %9d %2.3f%%\n", maxb->addr, maxb->refcount, + printf("%08x %p %9d %2.3f%%\n", maxb->addr, maxb->tcache_ptr, maxb->refcount, (double)maxb->refcount / total * 100.0); maxb->refcount = 0; } - for (b = 0; b < ARRAY_SIZE(block_tables); b++) + for (b = 0; b < ARRAY_SIZE(block_tables); b++) { for (i = 0; i < block_counts[b]; i++) block_tables[b][i].refcount = 0; + for (i = block_limit[b]; i < block_max_counts[b]; i++) + block_tables[b][i].refcount = 0; + } #endif } @@ -4169,8 +4299,6 @@ void sh2_drc_mem_setup(SH2 *sh2) void sh2_drc_frame(void) { - if (literal_disabled_frames > 0) - literal_disabled_frames--; } int sh2_drc_init(SH2 *sh2) @@ -4197,9 +4325,19 @@ int sh2_drc_init(SH2 *sh2) hash_tables[i] = calloc(hash_table_sizes[i], sizeof(*hash_tables[0])); if (hash_tables[i] == NULL) goto fail; + + unresolved_links[i] = calloc(hash_table_sizes[i], sizeof(*unresolved_links[0])); + if (unresolved_links[i] == NULL) + goto fail; } memset(block_counts, 0, sizeof(block_counts)); + for (i = 0; i < ARRAY_SIZE(block_counts); i++) { + block_limit[i] = block_max_counts[i] - 1; + } memset(block_link_pool_counts, 0, sizeof(block_link_pool_counts)); + for (i = 0; i < ARRAY_SIZE(blink_free); i++) { + blink_free[i] = NULL; + } drc_cmn_init(); rcache_init(); @@ -4208,8 +4346,11 @@ int sh2_drc_init(SH2 *sh2) host_instructions_updated(tcache, tcache_ptr); tcache_bases[0] = tcache_ptrs[0] = tcache_ptr; - for (i = 1; i < ARRAY_SIZE(tcache_bases); i++) + tcache_limit[0] = tcache_bases[0] + tcache_sizes[0] - (tcache_ptr-tcache); + for (i = 1; i < ARRAY_SIZE(tcache_bases); i++) { tcache_bases[i] = tcache_ptrs[i] = tcache_bases[i - 1] + tcache_sizes[i - 1]; + tcache_limit[i] = tcache_bases[i] + tcache_sizes[i]; + } #if (DRC_DEBUG & 4) for (i = 0; i < ARRAY_SIZE(block_tables); i++) @@ -4233,6 +4374,7 @@ fail: void sh2_drc_finish(SH2 *sh2) { + struct block_list *bl, *bn; int i; if (block_tables[0] == NULL) @@ -4243,19 +4385,28 @@ void sh2_drc_finish(SH2 *sh2) for (i = 0; i < TCACHE_BUFFERS; i++) { #if (DRC_DEBUG & 4) printf("~~~ tcache %d\n", i); +#if 0 tcache_dsm_ptrs[i] = tcache_bases[i]; tcache_ptr = tcache_ptrs[i]; do_host_disasm(i); + if (tcache_limit[i] < tcache_bases[i] + tcache_sizes[i]) { + tcache_dsm_ptrs[i] = tcache_limit[i]; + tcache_ptr = tcache_bases[i] + tcache_sizes[i]; + do_host_disasm(i); + } +#endif + printf("max links: %d\n", block_link_pool_counts[i]); #endif if (block_tables[i] != NULL) free(block_tables[i]); block_tables[i] = NULL; - if (block_link_pool[i] == NULL) + if (block_link_pool[i] != NULL) free(block_link_pool[i]); block_link_pool[i] = NULL; + blink_free[i] = NULL; - if (inval_lookup[i] == NULL) + if (inval_lookup[i] != NULL) free(inval_lookup[i]); inval_lookup[i] = NULL; @@ -4265,6 +4416,12 @@ void sh2_drc_finish(SH2 *sh2) } } + for (bl = blist_free; bl; bl = bn) { + bn = bl->next; + free(bl); + } + blist_free = NULL; + drc_cmn_cleanup(); } @@ -4304,7 +4461,7 @@ static void *dr_get_pc_base(u32 pc, int is_slave) } void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, - u32 *end_literals_out) + u32 *base_literals_out, u32 *end_literals_out) { u16 *dr_pc_base; u32 pc, op, tmp; @@ -5073,8 +5230,6 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, default: undefined: - elprintf(EL_ANOMALY, "%csh2 drc: unhandled op %04x @ %08x", - is_slave ? 's' : 'm', op, pc); opd->op = OP_UNDEFINED; // an unhandled instruction is probably not code if it's not the 1st insn if (!(op_flags[i] & OF_DELAY_OP) && pc != base_pc) @@ -5187,6 +5342,8 @@ end: lowest_literal = end_literals; *end_pc_out = end_pc; + if (base_literals_out != NULL) + *base_literals_out = (lowest_literal ?: end_pc); if (end_literals_out != NULL) *end_literals_out = (end_literals ?: end_pc); } diff --git a/cpu/sh2/compiler.h b/cpu/sh2/compiler.h index 6a8596b8..36dfd945 100644 --- a/cpu/sh2/compiler.h +++ b/cpu/sh2/compiler.h @@ -24,7 +24,7 @@ void sh2_drc_frame(void); void scan_block(unsigned int base_pc, int is_slave, unsigned char *op_flags, unsigned int *end_pc, - unsigned int *end_literals); + unsigned int *base_literals, unsigned int *end_literals); #if defined(DRC_SH2) // direct access to some host CPU registers used by the DRC @@ -39,13 +39,15 @@ void scan_block(unsigned int base_pc, int is_slave, #warning "direct DRC register access not available for this host" #endif -#ifdef DCR_SR_REG -#define DRC_DECLARE_SR register int sh2_sr asm(#DCR_SR_REG) +#ifdef DRC_SR_REG +#define __DRC_DECLARE_SR(SR) register int sh2_sr asm(#SR) +#define _DRC_DECLARE_SR(SR) __DRC_DECLARE_SR(SR) +#define DRC_DECLARE_SR _DRC_DECLARE_SR(DRC_SR_REG) #define DRC_SAVE_SR(sh2) \ - if ((sh2->state & (SH2_STATE_RUN|SH2_STATE_BUSY)) == SH2_STATE_RUN) \ + if ((sh2->state & (SH2_STATE_RUN)) == SH2_STATE_RUN) \ sh2->sr = sh2_sr; #define DRC_RESTORE_SR(sh2) \ - if ((sh2->state & (SH2_STATE_RUN|SH2_STATE_BUSY)) == SH2_STATE_RUN) \ + if ((sh2->state & (SH2_STATE_RUN)) == SH2_STATE_RUN) \ sh2_sr = sh2->sr; #else #define DRC_DECLARE_SR diff --git a/pico/32x/memory.c b/pico/32x/memory.c index 47329835..30d0e4d5 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -1432,7 +1432,7 @@ static void REGPARM(3) sh2_write8_sdram(u32 a, u32 d, SH2 *sh2) { u32 a1 = a & 0x3ffff; #ifdef DRC_SH2 - u16 *p = sh2->p_drcblk_ram; + u8 *p = sh2->p_drcblk_ram; int t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; if (t) sh2_drc_wcheck_ram(a, t, sh2); @@ -1456,7 +1456,7 @@ static void REGPARM(3) sh2_write8_da(u32 a, u32 d, SH2 *sh2) { u32 a1 = a & 0xfff; #ifdef DRC_SH2 - u16 *p = sh2->p_drcblk_da; + u8 *p = sh2->p_drcblk_da; int t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; if (t) sh2_drc_wcheck_da(a, t, sh2); @@ -1511,7 +1511,7 @@ static void REGPARM(3) sh2_write16_sdram(u32 a, u32 d, SH2 *sh2) { u32 a1 = a & 0x3fffe; #ifdef DRC_SH2 - u16 *p = sh2->p_drcblk_ram; + u8 *p = sh2->p_drcblk_ram; int t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; if (t) sh2_drc_wcheck_ram(a, t, sh2); @@ -1523,7 +1523,7 @@ static void REGPARM(3) sh2_write16_da(u32 a, u32 d, SH2 *sh2) { u32 a1 = a & 0xffe; #ifdef DRC_SH2 - u16 *p = sh2->p_drcblk_da; + u8 *p = sh2->p_drcblk_da; int t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; if (t) sh2_drc_wcheck_da(a, t, sh2); @@ -1580,7 +1580,7 @@ static void REGPARM(3) sh2_write32_sdram(u32 a, u32 d, SH2 *sh2) { u32 a1 = a & 0x3fffc; #ifdef DRC_SH2 - u16 *p = sh2->p_drcblk_ram; + u8 *p = sh2->p_drcblk_ram; int t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; if (t) sh2_drc_wcheck_ram(a, t, sh2); @@ -1595,7 +1595,7 @@ static void REGPARM(3) sh2_write32_da(u32 a, u32 d, SH2 *sh2) { u32 a1 = a & 0xffc; #ifdef DRC_SH2 - u16 *p = sh2->p_drcblk_da; + u8 *p = sh2->p_drcblk_da; int t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; if (t) sh2_drc_wcheck_da(a, t, sh2); diff --git a/pico/pico_int.h b/pico/pico_int.h index 497649b6..13338242 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -599,7 +599,8 @@ struct Pico32xMem { unsigned char sdram[0x40000]; #ifdef DRC_SH2 - unsigned short drcblk_ram[1 << (18 - SH2_DRCBLK_RAM_SHIFT)]; + unsigned char drcblk_ram[1 << (18 - SH2_DRCBLK_RAM_SHIFT)]; + unsigned char drclit_ram[1 << (18 - SH2_DRCBLK_RAM_SHIFT)]; #endif unsigned short dram[2][0x20000/2]; // AKA fb union { @@ -607,7 +608,8 @@ struct Pico32xMem unsigned char m68k_rom_bank[0x10000]; // M68K_BANK_SIZE }; #ifdef DRC_SH2 - unsigned short drcblk_da[2][1 << (12 - SH2_DRCBLK_DA_SHIFT)]; + unsigned char drcblk_da[2][1 << (12 - SH2_DRCBLK_DA_SHIFT)]; + unsigned char drclit_da[2][1 << (12 - SH2_DRCBLK_DA_SHIFT)]; #endif union { unsigned char b[0x800]; From d40a5af495db6c91d9c4469ac650bc95e6b7a4d5 Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 16 Apr 2019 20:37:52 +0200 Subject: [PATCH 0185/1110] various small improvements and fixes --- Makefile | 6 +- config.caanoo | 6 +- config.caanoo47 | 4 +- config.dingux | 6 +- config.dingux54 | 6 +- config.gp2x | 4 +- config.gp2x47 | 4 +- config.i386 | 14 +++ config.x86 | 8 +- cpu/cz80/cz80.c | 1 + cpu/drc/cmn.h | 6 - cpu/drc/emit_arm.c | 56 +++++++--- cpu/drc/emit_x86.c | 68 +++++++----- cpu/sh2/compiler.c | 194 ++++++++++++++++++++++++--------- cpu/sh2/mame/sh2pico.c | 2 +- cpu/sh2/sh2.h | 4 +- pico/32x/32x.c | 28 ++--- pico/32x/draw_arm.S | 20 ++-- pico/32x/memory.c | 8 +- pico/32x/memory_arm.S | 76 ++++++------- pico/cd/gfx_dma.c | 4 - pico/cd/memory_arm.S | 2 +- pico/draw2_arm.S | 2 +- pico/draw_arm.S | 2 +- pico/memory.h | 5 - pico/memory_amips.S | 2 +- pico/memory_arm.S | 2 +- pico/pico_int.h | 18 ++- platform/common/common.mak | 1 + platform/common/memcpy.c | 37 ++++--- platform/gp2x/code940/memcpy.s | 12 +- tools/mkoffsets.sh | 5 +- 32 files changed, 372 insertions(+), 241 deletions(-) create mode 100644 config.i386 diff --git a/Makefile b/Makefile index a0e63a47..d82961eb 100644 --- a/Makefile +++ b/Makefile @@ -195,10 +195,10 @@ LDFLAGS += -Wl,-Map=$(TARGET).map endif -target_: pico/pico_int_o32.h $(TARGET) +target_: pico/pico_int_offs.h $(TARGET) clean: - $(RM) $(TARGET) $(OBJS) pico/pico_int_o32.h + $(RM) $(TARGET) $(OBJS) pico/pico_int_offs.h $(RM) -r .opk_data $(TARGET): $(OBJS) @@ -211,7 +211,7 @@ endif pprof: platform/linux/pprof.c $(CC) $(CFLAGS) -O2 -ggdb -DPPROF -DPPROF_TOOL -I../../ -I. $^ -o $@ $(LDFLAGS) $(LDLIBS) -pico/pico_int_o32.h:: tools/mkoffsets.sh +pico/pico_int_offs.h:: tools/mkoffsets.sh make -C tools/ XCC="$(CC)" XCFLAGS="$(CFLAGS)" .s.o: diff --git a/config.caanoo b/config.caanoo index 39edb5db..dd053bc5 100644 --- a/config.caanoo +++ b/config.caanoo @@ -4,11 +4,11 @@ CC = arm-gph-linux-gnueabi-gcc CXX = arm-gph-linux-gnueabi-g++ AS = arm-gph-linux-gnueabi-as STRIP = arm-gph-linux-gnueabi-strip -CFLAGS += -mfloat-abi=soft -mcpu=arm920t -mtune=arm920t -fno-stack-protector -D__GP2X__ -DGPERF +CFLAGS += -mfloat-abi=soft -mcpu=arm920t -mtune=arm920t -fno-stack-protector -D__GP2X__ CFLAGS += -fno-gcse -funswitch-loops -fweb -ftree-loop-im #-fpredictive-commoning -ftree-loop-distribution -frename-registers -CFLAGS += -I/home/build/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/include -I/home/build/src/gp2x/armroot-eabi/include +CFLAGS += -I${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/include -I${HOME}/src/gp2x/armroot-eabi/include ASFLAGS += -mfloat-abi=soft -mcpu=arm920t -LDFLAGS += -B/home/build/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/lib/gcc/arm-gph-linux-gnueabi/4.2.4 -B/home/build/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L/home/build/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L/home/build/src/gp2x/armroot-eabi/lib -static +LDFLAGS += -B${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/lib/gcc/arm-gph-linux-gnueabi/4.2.4 -B${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L${HOME}/src/gp2x/armroot-eabi/lib -static LDLIBS += -lpng -lm -ldl ARCH = arm diff --git a/config.caanoo47 b/config.caanoo47 index f3efde0f..2c0ee5af 100644 --- a/config.caanoo47 +++ b/config.caanoo47 @@ -6,9 +6,9 @@ AS = arm-linux-gnueabi-as STRIP = arm-linux-gnueabi-strip CFLAGS += -mfloat-abi=soft -mcpu=arm920t -mtune=arm920t -Wno-unused-result -fno-stack-protector -D__GP2X__ CFLAGS += -fno-gcse -funswitch-loops -fweb -ftree-loop-im #-fpredictive-commoning -ftree-loop-distribution -frename-registers -CFLAGS += -I/home/build/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/include -I/home/build/src/gp2x/armroot-eabi/include +CFLAGS += -I${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/include -I${HOME}/src/gp2x/armroot-eabi/include ASFLAGS += -mfloat-abi=soft -mcpu=arm920t -LDFLAGS += -B/home/build/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/lib/gcc/arm-gph-linux-gnueabi/4.2.4 -B/home/build/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L/home/build/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -static +LDFLAGS += -B${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/lib/gcc/arm-gph-linux-gnueabi/4.2.4 -B${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -static LDLIBS += -lpng -lm -ldl ARCH = arm diff --git a/config.dingux b/config.dingux index 6611991c..8aca06a6 100644 --- a/config.dingux +++ b/config.dingux @@ -4,12 +4,12 @@ CC = mipsel-linux-gcc CXX = mipsel-linux-g++ AS = mipsel-linux-as STRIP = mipsel-linux-strip -CFLAGS += -I/home/build/opt/opendingux-toolchain/usr/include/ -CFLAGS += -I/home/build/opt/opendingux-toolchain/usr/include/SDL +CFLAGS += -I${HOME}/opt/opendingux-toolchain/usr/include/ +CFLAGS += -I${HOME}/opt/opendingux-toolchain/usr/include/SDL CFLAGS += -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -fno-stack-protector ASFLAGS += LDFLAGS += -LDLIBS += -B/home/build/opt/opendingux-toolchain/usr/lib -Wl,-rpath-link=/home/build/opt/opendingux-toolchain/usr/lib -Wl,-rpath-link=/home/build/opt/opendingux-toolchain/lib -lSDL -lasound -lpng -lm -lstdc++ -ldl +LDLIBS += -B${HOME}/opt/opendingux-toolchain/usr/lib -Wl,-rpath-link=${HOME}/opt/opendingux-toolchain/usr/lib -Wl,-rpath-link=${HOME}/opt/opendingux-toolchain/lib -lSDL -lasound -lpng -lm -lstdc++ -ldl ARCH = mipsel PLATFORM = opendingux diff --git a/config.dingux54 b/config.dingux54 index 96e55014..5f292652 100644 --- a/config.dingux54 +++ b/config.dingux54 @@ -4,12 +4,12 @@ CC = mipsel-linux-gnu-gcc CXX = mipsel-linux-gnu-g++ AS = mipsel-linux-gnu-as STRIP = mipsel-linux-gnu-strip -CFLAGS += -I/home/build/opt/opendingux-toolchain/usr/include/ -CFLAGS += -I/home/build/opt/opendingux-toolchain/usr/include/SDL +CFLAGS += -I${HOME}/opt/opendingux-toolchain/usr/include/ +CFLAGS += -I${HOME}/opt/opendingux-toolchain/usr/include/SDL CFLAGS += -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -fno-stack-protector ASFLAGS += LDFLAGS += -LDLIBS += -B/home/build/opt/opendingux-toolchain/usr/lib -B/home/build/opt/opendingux-toolchain/lib -Wl,-rpath-link=/home/build/opt/opendingux-toolchain/usr/lib -Wl,-rpath-link=/home/build/opt/opendingux-toolchain/lib -lSDL -lasound -lpng -lz -lm -lstdc++ -ldl +LDLIBS += -B${HOME}/opt/opendingux-toolchain/usr/lib -B${HOME}/opt/opendingux-toolchain/lib -Wl,-rpath-link=${HOME}/opt/opendingux-toolchain/usr/lib -Wl,-rpath-link=${HOME}/opt/opendingux-toolchain/lib -lSDL -lasound -lpng -lz -lm -lstdc++ -ldl ARCH = mipsel PLATFORM = opendingux diff --git a/config.gp2x b/config.gp2x index de3e47c4..248d73aa 100644 --- a/config.gp2x +++ b/config.gp2x @@ -5,10 +5,10 @@ CXX = arm-open2x-linux-g++ AS = arm-open2x-linux-as STRIP = arm-open2x-linux-strip CFLAGS += -msoft-float -mcpu=arm920t -mtune=arm920t -D__GP2X__ -CFLAGS += -I/home/build/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -I/home/build/src/gp2x/armroot/include +CFLAGS += -I${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -I${HOME}/src/gp2x/armroot/include CFLAGS += -fno-gcse -funswitch-loops -fweb -ftree-loop-im #-fpredictive-commoning -ftree-loop-distribution -frename-registers ASFLAGS += -mcpu=arm920t -mfloat-abi=soft -LDFLAGS += --sysroot /home/build/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux -L/home/build/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L/home/build/src/gp2x/armroot/lib -static +LDFLAGS += --sysroot ${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux -L${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L${HOME}/src/gp2x/armroot/lib -static LDLIBS += -lpng -lm -ldl ARCH = arm diff --git a/config.gp2x47 b/config.gp2x47 index 1022166d..21769ada 100644 --- a/config.gp2x47 +++ b/config.gp2x47 @@ -5,10 +5,10 @@ CXX = arm-linux-gnueabi-g++ AS = arm-linux-gnueabi-as STRIP = arm-linux-gnueabi-strip CFLAGS += -mabi=apcs-gnu -mno-thumb-interwork -mfloat-abi=soft -mfpu=fpa -mcpu=arm920t -mtune=arm920t -Wno-unused-result -fno-stack-protector -D__GP2X__ -CFLAGS += -I/home/build/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -I/home/build/src/gp2x/armroot/include +CFLAGS += -I${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -I${HOME}/src/gp2x/armroot/include CFLAGS += -fno-gcse -funswitch-loops -fweb -ftree-loop-im #-fpredictive-commoning -ftree-loop-distribution -frename-registers ASFLAGS += -mabi=apcs-gnu -mfloat-abi=soft -mfpu=fpa -mcpu=arm920t -LDFLAGS += -mabi=apcs-gnu -mfpu=fpa -B/home/build/opt/open2x/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1 -B/home/build/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L/home/build/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L/home/build/src/gp2x/armroot/lib -static +LDFLAGS += -mabi=apcs-gnu -mfpu=fpa -B${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1 -B${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L${HOME}/src/gp2x/armroot/lib -static LDLIBS += -lpng -lm -ldl ARCH = arm diff --git a/config.i386 b/config.i386 new file mode 100644 index 00000000..ce07b103 --- /dev/null +++ b/config.i386 @@ -0,0 +1,14 @@ +# Automatically generated by configure +# Configured with: './configure' '--platform=generic' +CC = gcc +CXX = g++ +AS = as +STRIP = strip +CFLAGS += -I/usr/include/SDL -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -m32 # -pg +ASFLAGS += +LDFLAGS += -m32 #-pg +LDLIBS += -L/usr/lib/i386-linux-gnu -L${HOME}/opt/lib32 -lSDL-1.2 -lasound -lpng -lz -lm -ldl + +ARCH = i386 +PLATFORM = generic +SOUND_DRIVERS = oss alsa sdl diff --git a/config.x86 b/config.x86 index d463157e..287b82d3 100644 --- a/config.x86 +++ b/config.x86 @@ -4,11 +4,11 @@ CC = gcc CXX = g++ AS = as STRIP = strip -CFLAGS += -I/usr/include/SDL -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -m32 # -pg +CFLAGS += -I/usr/include/SDL -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result # -pg ASFLAGS += -LDFLAGS += -m32 #-pg -LDLIBS += -L/usr/lib/i386-linux-gnu/debug -L/home/build/opt/lib32 -lSDL-1.2 -lasound -lpng -lz -lm -ldl +LDFLAGS += #-pg +LDLIBS += -L/usr/lib/x86_64-linux-gnu -lSDL-1.2 -lasound -lpng -lz -lm -ldl -ARCH = x86 +ARCH = x86_64 PLATFORM = generic SOUND_DRIVERS = oss alsa sdl diff --git a/cpu/cz80/cz80.c b/cpu/cz80/cz80.c index 61ca5f84..0326b0b8 100644 --- a/cpu/cz80/cz80.c +++ b/cpu/cz80/cz80.c @@ -14,6 +14,7 @@ #include "cz80.h" #if PICODRIVE_HACKS +#include #include #endif diff --git a/cpu/drc/cmn.h b/cpu/drc/cmn.h index 7d50d33d..bad02a1b 100644 --- a/cpu/drc/cmn.h +++ b/cpu/drc/cmn.h @@ -1,9 +1,3 @@ -typedef unsigned char u8; -typedef signed char s8; -typedef unsigned short u16; -typedef signed short s16; -typedef unsigned int u32; -typedef signed int s32; #define DRC_TCACHE_SIZE (4*1024*1024) diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index 3f782bb6..4744b127 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -177,26 +177,25 @@ #define EOP_C_AM3_REG(cond,u,l,rn,rd,s,h,rm) EOP_C_AM3(cond,u,0,l,rn,rd,s,h,rm) /* ldr and str */ -#define EOP_LDR_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,1,0,1,rn,rd,offset_12) -#define EOP_LDRB_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,1,1,1,rn,rd,offset_12) +#define EOP_LDR_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,(offset_12) >= 0,0,1,rn,rd,abs(offset_12)) +#define EOP_LDRB_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,(offset_12) >= 0,1,1,rn,rd,abs(offset_12)) #define EOP_STR_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,(offset_12) >= 0,0,0,rn,rd,abs(offset_12)) -#define EOP_LDR_IMM( rd,rn,offset_12) EOP_C_AM2_IMM(A_COND_AL,1,0,1,rn,rd,offset_12) -#define EOP_LDR_NEGIMM(rd,rn,offset_12) EOP_C_AM2_IMM(A_COND_AL,0,0,1,rn,rd,offset_12) +#define EOP_LDR_IMM( rd,rn,offset_12) EOP_C_AM2_IMM(A_COND_AL,(offset_12) >= 0,0,1,rn,rd,abs(offset_12)) #define EOP_LDR_SIMPLE(rd,rn) EOP_C_AM2_IMM(A_COND_AL,1,0,1,rn,rd,0) -#define EOP_STR_IMM( rd,rn,offset_12) EOP_C_AM2_IMM(A_COND_AL,1,0,0,rn,rd,offset_12) +#define EOP_STR_IMM( rd,rn,offset_12) EOP_C_AM2_IMM(A_COND_AL,(offset_12) >= 0,0,0,rn,rd,abs(offset_12)) #define EOP_STR_SIMPLE(rd,rn) EOP_C_AM2_IMM(A_COND_AL,1,0,0,rn,rd,0) #define EOP_LDR_REG_LSL(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,0,1,rn,rd,shift_imm,A_AM1_LSL,rm) #define EOP_LDRB_REG_LSL(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,1,1,rn,rd,shift_imm,A_AM1_LSL,rm); -#define EOP_LDRH_IMM2(cond,rd,rn,offset_8) EOP_C_AM3_IMM(cond,1,1,rn,rd,0,1,offset_8) +#define EOP_LDRH_IMM2(cond,rd,rn,offset_8) EOP_C_AM3_IMM(cond,(offset_8) >= 0,1,rn,rd,0,1,abs(offset_8)) #define EOP_LDRH_REG2(cond,rd,rn,rm) EOP_C_AM3_REG(cond,1,1,rn,rd,0,1,rm) -#define EOP_LDRH_IMM( rd,rn,offset_8) EOP_C_AM3_IMM(A_COND_AL,1,1,rn,rd,0,1,offset_8) +#define EOP_LDRH_IMM( rd,rn,offset_8) EOP_C_AM3_IMM(A_COND_AL,(offset_8) >= 0,1,rn,rd,0,1,abs(offset_8)) #define EOP_LDRH_SIMPLE(rd,rn) EOP_C_AM3_IMM(A_COND_AL,1,1,rn,rd,0,1,0) #define EOP_LDRH_REG( rd,rn,rm) EOP_C_AM3_REG(A_COND_AL,1,1,rn,rd,0,1,rm) -#define EOP_STRH_IMM( rd,rn,offset_8) EOP_C_AM3_IMM(A_COND_AL,1,0,rn,rd,0,1,offset_8) +#define EOP_STRH_IMM( rd,rn,offset_8) EOP_C_AM3_IMM(A_COND_AL,(offset_8) >= 0,0,rn,rd,0,1,abs(offset_8)) #define EOP_STRH_SIMPLE(rd,rn) EOP_C_AM3_IMM(A_COND_AL,1,0,rn,rd,0,1,0) #define EOP_STRH_REG( rd,rn,rm) EOP_C_AM3_REG(A_COND_AL,1,0,rn,rd,0,1,rm) @@ -285,11 +284,29 @@ static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int imm = ~imm; op = A_OP_MVN; } +#ifdef HAVE_ARMV7 + for (v = imm, ror2 = 0; v && !(v & 3); v >>= 2) + ror2--; + if (v >> 8) { + /* 2+ insns needed - prefer movw/movt */ + if (op == A_OP_MVN) + imm = ~imm; + EOP_MOVW(rd, imm); + if (imm & 0xffff0000) + EOP_MOVT(rd, imm); + return; + } +#endif break; - case A_OP_EOR: case A_OP_SUB: case A_OP_ADD: + // count bits in imm and swap ADD and SUB if more bits 1 than 0 + if (s == 0 && count_bits(imm) > 16) { + imm = -imm; + op ^= (A_OP_ADD^A_OP_SUB); + } + case A_OP_EOR: case A_OP_ORR: case A_OP_BIC: if (s == 0 && imm == 0 && rd == rn) @@ -412,6 +429,8 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_add_r_r_r_lsl(d, s1, s2, lslimm) \ EOP_ADD_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm) +#define emith_add_r_r_r_lsl_ptr(d, s1, s2, lslimm) \ + emith_add_r_r_r_lsl(d, s1, s2, lslimm) #define emith_addf_r_r_r_lsl(d, s1, s2, lslimm) \ EOP_ADD_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSL,lslimm) @@ -483,7 +502,7 @@ static int emith_xbranch(int cond, void *target, int is_call) emith_add_r_r_r(d, d, s) #define emith_sub_r_r(d, s) \ - EOP_SUB_REG(A_COND_AL,0,d,d,s,A_AM1_LSL,0) + emith_sub_r_r_r(d, d, s) #define emith_adc_r_r(d, s) \ EOP_ADC_REG(A_COND_AL,0,d,d,s,A_AM1_LSL,0) @@ -529,6 +548,9 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_move_r_imm(r, imm) \ emith_op_imm(A_COND_AL, 0, A_OP_MOV, r, imm) +#define emith_move_r_ptr_imm(r, imm) \ + emith_move_r_imm(r, (u32)(imm)) + #define emith_add_r_imm(r, imm) \ emith_op_imm(A_COND_AL, 0, A_OP_ADD, r, imm) @@ -536,7 +558,7 @@ static int emith_xbranch(int cond, void *target, int is_call) emith_op_imm(A_COND_AL, 0, A_OP_ADC, r, imm) #define emith_adcf_r_imm(r, imm) \ - emith_op_imm(A_COND_AL, 1, A_OP_ADC, r, (imm)) + emith_op_imm(A_COND_AL, 1, A_OP_ADC, r, imm) #define emith_sub_r_imm(r, imm) \ emith_op_imm(A_COND_AL, 0, A_OP_SUB, r, imm) @@ -610,13 +632,13 @@ static int emith_xbranch(int cond, void *target, int is_call) emith_op_imm2(A_COND_AL, 0, A_OP_SUB, d, s, imm) #define emith_subf_r_r_imm(d, s, imm) \ - emith_op_imm2(A_COND_AL, 1, A_OP_SUB, d, s, (imm)) + emith_op_imm2(A_COND_AL, 1, A_OP_SUB, d, s, imm) #define emith_or_r_r_imm(d, s, imm) \ - emith_op_imm2(A_COND_AL, 0, A_OP_ORR, d, s, (imm)) + emith_op_imm2(A_COND_AL, 0, A_OP_ORR, d, s, imm) #define emith_eor_r_r_imm(d, s, imm) \ - emith_op_imm2(A_COND_AL, 0, A_OP_EOR, d, s, (imm)) + emith_op_imm2(A_COND_AL, 0, A_OP_EOR, d, s, imm) #define emith_neg_r_r(d, s) \ EOP_RSB_IMM(d, s, 0, 0) @@ -758,7 +780,7 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_clear_msb_c(cond, d, s, count) { \ u32 t; \ if ((count) <= 8) { \ - t = (count) - 8; \ + t = 8 - (count); \ t = (0xff << t) & 0xff; \ EOP_C_DOP_IMM(cond,A_OP_BIC,0,s,d,8/2,t); \ } else if ((count) >= 24) { \ @@ -880,7 +902,9 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_sh2_rcall(a, tab, func, mask) { \ emith_lsr(mask, a, SH2_READ_SHIFT); \ EOP_ADD_REG_LSL(tab, tab, mask, 3); \ - EOP_LDMIA(tab, (1<is_slave; -if (sh2 != &sh2s[0] && sh2 != &sh2s[1]) printf("sh2 %p?\n",sh2); if (!trace[0]) { truncate("pico.trace", 0); trace[0] = fopen("pico.trace0", "wb"); @@ -199,7 +199,8 @@ if (sh2 != &sh2s[0] && sh2 != &sh2s[1]) printf("sh2 %p?\n",sh2); if (csh2[idx][0].pc != sh2->pc) { fwrite(sh2, offsetof(SH2, read8_map), 1, trace[idx]); fwrite(&sh2->pdb_io_csum, sizeof(sh2->pdb_io_csum), 1, trace[idx]); - memcpy(&csh2[idx][0], sh2, offsetof(SH2, icount)); + memcpy(&csh2[idx][0], sh2, offsetof(SH2, poll_cnt)+4); + csh2[idx][0].is_slave = idx; } } #elif (DRC_DEBUG & 512) @@ -234,9 +235,10 @@ if (sh2 != &sh2s[0] && sh2 != &sh2s[1]) printf("sh2 %p?\n",sh2); #elif (DRC_DEBUG & 1024) { int x = sh2->is_slave, i; - for (i = 0; i < ARRAY_SIZE(csh2[x]); i++) - memcpy(&csh2[x][i], &csh2[x][i+1], offsetof(SH2, icount)); - memcpy(&csh2[x][3], sh2, offsetof(SH2, icount)); + for (i = 0; i < ARRAY_SIZE(csh2[x])-1; i++) + memcpy(&csh2[x][i], &csh2[x][i+1], offsetof(SH2, poll_cnt)+4); + memcpy(&csh2[x][ARRAY_SIZE(csh2[x])-1], sh2, offsetof(SH2, poll_cnt)+4); + csh2[x][0].is_slave = x; } #endif } @@ -252,9 +254,9 @@ if (sh2 != &sh2s[0] && sh2 != &sh2s[1]) printf("sh2 %p?\n",sh2); // and can be discarded early // XXX: need to tune sizes static const int tcache_sizes[TCACHE_BUFFERS] = { - DRC_TCACHE_SIZE * 6 / 8, // ROM (rarely used), DRAM - DRC_TCACHE_SIZE / 8, // BIOS, data array in master sh2 - DRC_TCACHE_SIZE / 8, // ... slave + DRC_TCACHE_SIZE * 14 / 16, // ROM (rarely used), DRAM + DRC_TCACHE_SIZE / 16, // BIOS, data array in master sh2 + DRC_TCACHE_SIZE / 16, // ... slave }; static u8 *tcache_bases[TCACHE_BUFFERS]; @@ -287,6 +289,9 @@ struct block_entry { #if (DRC_DEBUG & 2) struct block_desc *block; #endif +#if (DRC_DEBUG & 32) + int entry_count; +#endif }; struct block_desc { @@ -698,6 +703,14 @@ static void add_to_hashlist(struct block_entry *be, int tcache_id) (*head)->prev = be; be->next = *head; *head = be; + +#if (DRC_DEBUG & 2) + if (be->next != NULL) { + printf(" %08x: entry hash collision with %08x\n", + be->pc, be->next->pc); + hash_collisions++; + } +#endif } static void rm_from_hashlist(struct block_entry *be, int tcache_id) @@ -727,6 +740,14 @@ static void add_to_hashlist_unresolved(struct block_link *bl, int tcache_id) u32 tcmask = hash_table_sizes[tcache_id] - 1; struct block_link **head = &HASH_FUNC(unresolved_links[tcache_id], bl->target_pc, tcmask); +#if DRC_DEBUG & 1 + struct block_link *current = *head; + while (current != NULL && current != bl) + current = current->next; + if (current == bl) + dbg(1, "add_to_hashlist_unresolved @%p: bl %p %p %08x already in?", head, bl, bl->target, bl->target_pc); +#endif + bl->target = NULL; // marker for not resolved bl->prev = NULL; if (*head) @@ -745,7 +766,7 @@ static void rm_from_hashlist_unresolved(struct block_link *bl, int tcache_id) while (current->prev != NULL) current = current->prev; if (current != *head) - dbg(1, "rm_from_hashlist unresolved @%p: bl %p %p %08x missing?", head, bl, bl->target, bl->target_pc); + dbg(1, "rm_from_hashlist_unresolved @%p: bl %p %p %08x missing?", head, bl, bl->target, bl->target_pc); #endif if (bl->prev != NULL) @@ -980,10 +1001,12 @@ static void *dr_prepare_ext_branch(struct block_entry *owner, u32 pc, int is_sla struct block_entry *be = NULL; int target_tcache_id; + // get the target block entry be = dr_get_entry(pc, is_slave, &target_tcache_id); if (target_tcache_id && target_tcache_id != tcache_id) return sh2_drc_dispatcher; + // get a block link if (blink_free[tcache_id] != NULL) { bl = blink_free[tcache_id]; blink_free[tcache_id] = bl->next; @@ -995,6 +1018,7 @@ static void *dr_prepare_ext_branch(struct block_entry *owner, u32 pc, int is_sla block_link_pool_counts[tcache_id] = cnt+1; } + // prepare link and add to ougoing list of owner bl->tcache_id = tcache_id; bl->target_pc = pc; bl->jump = tcache_ptr; @@ -1940,6 +1964,7 @@ static void rcache_invalidate(void) cache_regs[i].type = HR_FREE; cache_regs[i].gregs = 0; } + for (i = 0; i < ARRAY_SIZE(guest_regs); i++) { guest_regs[i].flags &= GRF_STATIC; if (!(guest_regs[i].flags & GRF_STATIC)) @@ -1953,7 +1978,8 @@ static void rcache_invalidate(void) cache_regs[guest_regs[i].sreg].gregs = 1 << i; guest_regs[i].vreg = guest_regs[i].sreg; } - }; + } + rcache_counter = 0; rcache_hint_soon = rcache_hint_late = 0; @@ -2005,6 +2031,7 @@ static int emit_get_rbase_and_offs(SH2 *sh2, u32 a, u32 *offs) u32 mask = 0; int poffs; int hr; + unsigned long la; poffs = dr_ctx_get_mem_ptr(a, &mask); if (poffs == -1) @@ -2014,15 +2041,16 @@ static int emit_get_rbase_and_offs(SH2 *sh2, u32 a, u32 *offs) if (mask < 0x1000) { // can't access data array or BIOS directly from ROM or SDRAM, // since code may run on both SH2s (tcache_id of translation block needed)) - emith_ctx_read(hr, poffs); + emith_ctx_read_ptr(hr, poffs); if (a & mask & ~omask) - emith_add_r_imm(hr, a & mask & ~omask); + emith_add_r_r_ptr_imm(hr, hr, a & mask & ~omask); + *offs = a & omask; } else { // known fixed host address - a = (a & mask) + *(u32 *)((char *)sh2 + poffs); - emith_move_r_imm(hr, (a & ~omask)); + la = (unsigned long)*(void **)((char *)sh2 + poffs) + (a & mask); + *offs = la & omask; + emith_move_r_ptr_imm(hr, la & ~omask); } - *offs = a & omask; return hr; } @@ -2392,8 +2420,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) void *branch_patch_ptr[MAX_LOCAL_BRANCHES]; u32 branch_patch_pc[MAX_LOCAL_BRANCHES]; int branch_patch_count = 0; - u32 literal_addr[MAX_LITERALS]; - int literal_addr_count = 0; u8 op_flags[BLOCK_INSN_LIMIT]; struct { u32 test_irq:1; @@ -2473,7 +2499,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) { u32 delay_dep_fw = 0, delay_dep_bk = 0; int tmp3, tmp4; - u32 sr; + int sr; opd = &ops[i]; op = FETCH_OP(pc); @@ -2487,7 +2513,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) pc, op, sh2dasm_buff); #endif - if ((op_flags[i] & OF_BTARGET) || pc == base_pc) + if (op_flags[i] & OF_BTARGET) { if (pc != base_pc) { @@ -2517,6 +2543,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) else { dbg(1, "too many entryp for block #%d,%d pc=%08x", tcache_id, blkid_main, pc); + break; } } else { entry = block->entryp; @@ -2537,10 +2564,10 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) #if (DRC_DEBUG & 0x10) rcache_get_reg_arg(0, SHR_PC, NULL); - tmp = emit_memhandler_read(2); + tmp = emit_memhandler_read(1); tmp2 = rcache_get_tmp(); tmp3 = rcache_get_tmp(); - emith_move_r_imm(tmp2, FETCH32(pc)); + emith_move_r_imm(tmp2, (s16)FETCH_OP(pc)); emith_move_r_imm(tmp3, 0); emith_cmp_r_r(tmp, tmp2); EMITH_SJMP_START(DCOND_EQ); @@ -2556,9 +2583,20 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_cmp_r_imm(sr, 0); emith_jump_cond(DCOND_LE, sh2_drc_exit); +#if (DRC_DEBUG & 32) + // block hit counter + tmp = rcache_get_tmp_arg(0); + tmp2 = rcache_get_tmp_arg(1); + emith_move_r_ptr_imm(tmp, (uptr)entry); + emith_read_r_r_offs(tmp2, tmp, offsetof(struct block_entry, entry_count)); + emith_add_r_imm(tmp2, 1); + emith_write_r_r_offs(tmp2, tmp, offsetof(struct block_entry, entry_count)); + rcache_free_tmp(tmp); + rcache_free_tmp(tmp2); +#endif + #if (DRC_DEBUG & (8|256|512|1024)) sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); - FLUSH_CYCLES(sr); rcache_clean(); tmp = rcache_used_hreg_mask(); emith_save_caller_regs(tmp); @@ -2566,7 +2604,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) rcache_get_reg_arg(2, SHR_SR, NULL); tmp2 = rcache_get_tmp_arg(0); tmp3 = rcache_get_tmp_arg(1); - emith_move_r_imm(tmp2, (u32)tcache_ptr); + emith_move_r_ptr_imm(tmp2, tcache_ptr); emith_move_r_r_ptr(tmp3,CONTEXT_REG); emith_call(sh2_drc_log_entry); emith_restore_caller_regs(tmp); @@ -2776,7 +2814,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) if ((opd->imm && opd->imm >= base_pc && opd->imm < end_literals) || dr_is_rom(opd->imm)) { - ADD_TO_ARRAY(literal_addr, literal_addr_count, opd->imm,); if (opd->size == 2) u = FETCH32(opd->imm); else @@ -2862,8 +2899,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x06: // MOV.L Rm,@(R0,Rn) 0000nnnnmmmm0110 emit_indirect_indexed_write(sh2, GET_Rm(), SHR_R0, GET_Rn(), op & 3); goto end_op; - case 0x07: - // MUL.L Rm,Rn 0000nnnnmmmm0111 + case 0x07: // MUL.L Rm,Rn 0000nnnnmmmm0111 tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); tmp3 = rcache_get_reg(SHR_MACL, RC_GR_WRITE, NULL); @@ -2941,8 +2977,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) goto default_; ///////////////////////////////////////////// - case 0x01: - // MOV.L Rm,@(disp,Rn) 0001nnnnmmmmdddd + case 0x01: // MOV.L Rm,@(disp,Rn) 0001nnnnmmmmdddd emit_memhandler_write_rr(sh2, GET_Rm(), GET_Rn(), (op & 0x0f) * 4, 2); goto end_op; @@ -3346,19 +3381,16 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x09: switch (GET_Fx()) { - case 0: - // SHLL2 Rn 0100nnnn00001000 - // SHLR2 Rn 0100nnnn00001001 + case 0: // SHLL2 Rn 0100nnnn00001000 + // SHLR2 Rn 0100nnnn00001001 tmp = 2; break; - case 1: - // SHLL8 Rn 0100nnnn00011000 - // SHLR8 Rn 0100nnnn00011001 + case 1: // SHLL8 Rn 0100nnnn00011000 + // SHLR8 Rn 0100nnnn00011001 tmp = 8; break; - case 2: - // SHLL16 Rn 0100nnnn00101000 - // SHLR16 Rn 0100nnnn00101001 + case 2: // SHLL16 Rn 0100nnnn00101000 + // SHLR16 Rn 0100nnnn00101001 tmp = 16; break; default: @@ -3432,8 +3464,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } else emit_move_r_r(tmp2, GET_Rn()); goto end_op; - case 0x0f: - // MAC.W @Rm+,@Rn+ 0100nnnnmmmm1111 + case 0x0f: // MAC.W @Rm+,@Rn+ 0100nnnnmmmm1111 emit_indirect_read_double(sh2, &tmp, &tmp2, GET_Rn(), GET_Rm(), 1); sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); tmp3 = rcache_get_reg(SHR_MACL, RC_GR_RMW, NULL); @@ -3446,8 +3477,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) goto default_; ///////////////////////////////////////////// - case 0x05: - // MOV.L @(disp,Rm),Rn 0101nnnnmmmmdddd + case 0x05: // MOV.L @(disp,Rm),Rn 0101nnnnmmmmdddd emit_memhandler_read_rr(sh2, GET_Rn(), GET_Rm(), (op & 0x0f) * 4, 2); goto end_op; @@ -3519,8 +3549,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) goto default_; ///////////////////////////////////////////// - case 0x07: - // ADD #imm,Rn 0111nnnniiiiiiii + case 0x07: // ADD #imm,Rn 0111nnnniiiiiiii tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); if (op & 0x80) { // adding negative emith_sub_r_r_imm(tmp, tmp2, -op & 0xff); @@ -3621,8 +3650,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) goto default_; ///////////////////////////////////////////// - case 0x0e: - // MOV #imm,Rn 1110nnnniiiiiiii + case 0x0e: // MOV #imm,Rn 1110nnnniiiiiiii emit_move_r_imm32(GET_Rn(), (s8)op); goto end_op; @@ -3886,9 +3914,7 @@ static void sh2_generate_utils(void) #if BRANCH_CACHE // check if PC is in branch target cache emith_and_r_r_imm(arg1, arg0, (ARRAY_SIZE(sh2s->branch_cache)-1)*4); - // TODO implement emith_add_r_r_r_lsl_ptr, saves one insn on 32bit ARM - emith_lsl(arg1, arg1, sizeof(void *) == 8 ? 2 : 1); - emith_add_r_r_ptr(arg1, CONTEXT_REG); + emith_add_r_r_r_lsl_ptr(arg1, CONTEXT_REG, arg1, sizeof(void *) == 8 ? 2 : 1); emith_read_r_r_offs(arg2, arg1, offsetof(SH2, branch_cache)); emith_cmp_r_r(arg2, arg0); EMITH_SJMP_START(DCOND_NE); @@ -3905,8 +3931,7 @@ static void sh2_generate_utils(void) EMITH_SJMP_START(DCOND_EQ); emith_ctx_read_c(DCOND_NE, arg2, SHR_PC * 4); emith_and_r_r_imm(arg1, arg2, (ARRAY_SIZE(sh2s->branch_cache)-1)*4); - emith_lsl(arg1, arg1, sizeof(void *) == 8 ? 2 : 1); - emith_add_r_r_ptr(arg1, CONTEXT_REG); + emith_add_r_r_r_lsl_ptr(arg1, CONTEXT_REG, arg1, sizeof(void *) == 8 ? 2 : 1); emith_write_r_r_offs_c(DCOND_NE, arg2, arg1, offsetof(SH2, branch_cache)); emith_write_r_r_offs_ptr_c(DCOND_NE, RET_REG, arg1, offsetof(SH2, branch_cache) + sizeof(void *)); EMITH_SJMP_END(DCOND_EQ); @@ -4174,7 +4199,8 @@ int sh2_execute_drc(SH2 *sh2c, int cycles) static void block_stats(void) { #if (DRC_DEBUG & 2) - int c, b, i, total = 0; + int c, b, i; + long total = 0; printf("block stats:\n"); for (b = 0; b < ARRAY_SIZE(block_tables); b++) { @@ -4185,8 +4211,9 @@ static void block_stats(void) if (block_tables[b][i].addr != 0) total += block_tables[b][i].refcount; } + printf("total: %ld\n",total); - for (c = 0; c < 10; c++) { + for (c = 0; c < 20; c++) { struct block_desc *blk, *maxb = NULL; int max = 0; for (b = 0; b < ARRAY_SIZE(block_tables); b++) { @@ -4221,6 +4248,63 @@ static void block_stats(void) #endif } +void entry_stats(void) +{ +#if (DRC_DEBUG & 32) + int c, b, i, j; + long total = 0; + + printf("block entry stats:\n"); + for (b = 0; b < ARRAY_SIZE(block_tables); b++) { + for (i = 0; i < block_counts[b]; i++) + for (j = 0; j < block_tables[b][i].entry_count; j++) + total += block_tables[b][i].entryp[j].entry_count; + for (i = block_limit[b]; i < block_max_counts[b]; i++) + for (j = 0; j < block_tables[b][i].entry_count; j++) + total += block_tables[b][i].entryp[j].entry_count; + } + printf("total: %ld\n",total); + + for (c = 0; c < 20; c++) { + struct block_desc *blk; + struct block_entry *maxb = NULL; + int max = 0; + for (b = 0; b < ARRAY_SIZE(block_tables); b++) { + for (i = 0; i < block_counts[b]; i++) { + blk = &block_tables[b][i]; + for (j = 0; j < blk->entry_count; j++) + if (blk->entryp[j].entry_count > max) { + max = blk->entryp[j].entry_count; + maxb = &blk->entryp[j]; + } + } + for (i = block_limit[b]; i < block_max_counts[b]; i++) { + blk = &block_tables[b][i]; + for (j = 0; j < blk->entry_count; j++) + if (blk->entryp[j].entry_count > max) { + max = blk->entryp[j].entry_count; + maxb = &blk->entryp[j]; + } + } + } + if (maxb == NULL) + break; + printf("%08x %p %9d %2.3f%%\n", maxb->pc, maxb->tcache_ptr, maxb->entry_count, + (double)100 * maxb->entry_count / total); + maxb->entry_count = 0; + } + + for (b = 0; b < ARRAY_SIZE(block_tables); b++) { + for (i = 0; i < block_counts[b]; i++) + for (j = 0; j < block_tables[b][i].entry_count; j++) + block_tables[b][i].entryp[j].entry_count = 0; + for (i = block_limit[b]; i < block_max_counts[b]; i++) + for (j = 0; j < block_tables[b][i].entry_count; j++) + block_tables[b][i].entryp[j].entry_count = 0; + } +#endif +} + static void backtrace(void) { #if (DRC_DEBUG & 1024) @@ -4279,6 +4363,7 @@ void sh2_drc_flush_all(void) backtrace(); state_dump(); block_stats(); + entry_stats(); flush_tcache(0); flush_tcache(1); flush_tcache(2); @@ -4364,6 +4449,7 @@ int sh2_drc_init(SH2 *sh2) hash_collisions = 0; #endif } + memset(sh2->branch_cache, -1, sizeof(sh2->branch_cache)); return 0; diff --git a/cpu/sh2/mame/sh2pico.c b/cpu/sh2/mame/sh2pico.c index 636ebc6f..f9d30d77 100644 --- a/cpu/sh2/mame/sh2pico.c +++ b/cpu/sh2/mame/sh2pico.c @@ -214,7 +214,7 @@ int sh2_execute_interpreter(SH2 *sh2, int cycles) if (sh2->pc < *base_pc || sh2->pc >= *end_pc) { *base_pc = sh2->pc; scan_block(*base_pc, sh2->is_slave, - op_flags, end_pc, NULL); + op_flags, end_pc, NULL, NULL); } if ((op_flags[(sh2->pc - *base_pc) / 2] & OF_BTARGET) || sh2->pc == *base_pc diff --git a/cpu/sh2/sh2.h b/cpu/sh2/sh2.h index e53bbf05..5a0661ea 100644 --- a/cpu/sh2/sh2.h +++ b/cpu/sh2/sh2.h @@ -81,9 +81,9 @@ typedef struct SH2_ #define CYCLE_MULT_SHIFT 10 #define C_M68K_TO_SH2(xsh2, c) \ - ((int)((long long)(c) * (xsh2)->mult_m68k_to_sh2) >> CYCLE_MULT_SHIFT) + (int)(((unsigned long long)(c) * (xsh2)->mult_m68k_to_sh2) >> CYCLE_MULT_SHIFT) #define C_SH2_TO_M68K(xsh2, c) \ - ((int)((long long)(c+3) * (xsh2)->mult_sh2_to_m68k) >> CYCLE_MULT_SHIFT) + (int)(((unsigned long long)(c+3U) * (xsh2)->mult_sh2_to_m68k) >> CYCLE_MULT_SHIFT) int sh2_init(SH2 *sh2, int is_slave, SH2 *other_sh2); void sh2_finish(SH2 *sh2); diff --git a/pico/32x/32x.c b/pico/32x/32x.c index a15cb112..4e8377eb 100644 --- a/pico/32x/32x.c +++ b/pico/32x/32x.c @@ -30,7 +30,7 @@ static int REGPARM(2) sh2_irq_cb(SH2 *sh2, int level) } // MUST specify active_sh2 when called from sh2 memhandlers -void p32x_update_irls(SH2 *active_sh2, int m68k_cycles) +void p32x_update_irls(SH2 *active_sh2, unsigned int m68k_cycles) { int irqs, mlvl = 0, slvl = 0; int mrun, srun; @@ -50,18 +50,18 @@ void p32x_update_irls(SH2 *active_sh2, int m68k_cycles) slvl++; slvl *= 2; - mrun = sh2_irl_irq(&msh2, mlvl, active_sh2 == &msh2); + mrun = sh2_irl_irq(&msh2, mlvl, msh2.state & SH2_STATE_RUN); if (mrun) { p32x_sh2_poll_event(&msh2, SH2_IDLE_STATES, m68k_cycles); - if (active_sh2 == &msh2) - sh2_end_run(active_sh2, 1); + if (msh2.state & SH2_STATE_RUN) + sh2_end_run(&msh2, 1); } - srun = sh2_irl_irq(&ssh2, slvl, active_sh2 == &ssh2); + srun = sh2_irl_irq(&ssh2, slvl, ssh2.state & SH2_STATE_RUN); if (srun) { p32x_sh2_poll_event(&ssh2, SH2_IDLE_STATES, m68k_cycles); - if (active_sh2 == &ssh2) - sh2_end_run(active_sh2, 1); + if (ssh2.state & SH2_STATE_RUN) + sh2_end_run(&ssh2, 1); } elprintf(EL_32X, "update_irls: m %d/%d, s %d/%d", mlvl, mrun, slvl, srun); @@ -70,7 +70,7 @@ void p32x_update_irls(SH2 *active_sh2, int m68k_cycles) // the mask register is inconsistent, CMD is supposed to be a mask, // while others are actually irq trigger enables? // TODO: test on hw.. -void p32x_trigger_irq(SH2 *sh2, int m68k_cycles, unsigned int mask) +void p32x_trigger_irq(SH2 *sh2, unsigned int m68k_cycles, unsigned int mask) { Pico32x.sh2irqs |= mask & P32XI_VRES; Pico32x.sh2irqi[0] |= mask & (Pico32x.sh2irq_mask[0] << 3); @@ -79,7 +79,7 @@ void p32x_trigger_irq(SH2 *sh2, int m68k_cycles, unsigned int mask) p32x_update_irls(sh2, m68k_cycles); } -void p32x_update_cmd_irq(SH2 *sh2, int m68k_cycles) +void p32x_update_cmd_irq(SH2 *sh2, unsigned int m68k_cycles) { if ((Pico32x.sh2irq_mask[0] & 2) && (Pico32x.regs[2 / 2] & 1)) Pico32x.sh2irqi[0] |= P32XI_CMD; @@ -207,8 +207,8 @@ void PicoReset32x(void) { if (PicoIn.AHW & PAHW_32X) { p32x_trigger_irq(NULL, SekCyclesDone(), P32XI_VRES); - p32x_sh2_poll_event(&msh2, SH2_IDLE_STATES, 0); - p32x_sh2_poll_event(&ssh2, SH2_IDLE_STATES, 0); + p32x_sh2_poll_event(&msh2, SH2_IDLE_STATES, SekCyclesDone()); + p32x_sh2_poll_event(&ssh2, SH2_IDLE_STATES, SekCyclesDone()); p32x_pwm_ctl_changed(); p32x_timers_recalc(); } @@ -258,7 +258,7 @@ static void p32x_start_blank(void) p32x_sh2_poll_event(&ssh2, SH2_STATE_VPOLL, SekCyclesDone()); } -void p32x_schedule_hint(SH2 *sh2, int m68k_cycles) +void p32x_schedule_hint(SH2 *sh2, unsigned int m68k_cycles) { // rather rough, 32x hint is useless in practice int after; @@ -370,9 +370,9 @@ static void p32x_run_events(unsigned int until) oldest, event_time_next); } -static void run_sh2(SH2 *sh2, int m68k_cycles) +static void run_sh2(SH2 *sh2, unsigned int m68k_cycles) { - int cycles, done; + unsigned int cycles, done; pevt_log_sh2_o(sh2, EVT_RUN_START); sh2->state |= SH2_STATE_RUN; diff --git a/pico/32x/draw_arm.S b/pico/32x/draw_arm.S index e91f9893..c59fa8f5 100644 --- a/pico/32x/draw_arm.S +++ b/pico/32x/draw_arm.S @@ -6,7 +6,7 @@ @* See COPYING file in the top-level directory. @* -#include "pico/pico_int_o32.h" +#include "pico/pico_int_offs.h" .extern Pico32x .extern Pico @@ -74,7 +74,7 @@ Pico32xNativePal: ldr lr,=Pico ldr r10,=Pico32x ldr r11, [lr, #OFS_Pico_est+OFS_EST_Draw2FB] - ldr r10,[r10, #0x40] @ Pico32x.vdp_regs[0] + ldrh r10,[r10, #0x40] @ Pico32x.vdp_regs[0] add r9, lr, #OFS_Pico_est+OFS_EST_HighPal @ palmd and r4, r2, #0xff @@ -118,6 +118,8 @@ Pico32xNativePal: mov r7, r7, lsl #1 ldreqh r12,[r9, r7] streqh r12,[r0], #2 @ *dst++ = palmd[*pmd] +.else + addeq r0, r0, #2 .endif beq 2b @ loop_inner @@ -182,8 +184,8 @@ Pico32xNativePal: ldrneb r8, [r5, #2]! @ r7,r8 - pixel 0,1 index subs r6, r6, #1 blt 0b @ loop_outer -@ cmp r7, r8 @ is this really improving things? -@ beq 5f @ check_fill @ +8 + cmp r7, r8 @ is this really improving things? + beq 5f @ check_fill @ +8 3: @ no_fill: mov r12,r7, lsl #1 @@ -242,7 +244,7 @@ Pico32xNativePal: beq 6b 7: @ count_done - sub r5, r5, #4 @ undo readahead + sub r5, r5, #4 @ undo readahead @ fix alignment and check type sub r8, r5, lr @@ -268,14 +270,14 @@ Pico32xNativePal: b 2b @ loop_inner 9: @ bg_mode: - ldrb r12,[r11],#1 @ MD pixel + ldrb r12,[r11],#1 @ MD pixel 0,1 ldrb lr, [r11],#1 - cmp r3, lr, lsl #26 @ MD has bg pixel? + cmp r3, r12,lsl #26 @ MD pixel 0 has bg? .if \do_md mov r12,r12,lsl #1 ldrneh r12,[r9, r12] @ t = palmd[*pmd] moveq r12,r7 - cmp r3, lr, lsl #26 + cmp r3, lr, lsl #26 @ MD pixel 1 has bg? mov lr, lr, lsl #1 ldrneh lr, [r9, lr] moveq lr, r7 @@ -283,7 +285,7 @@ Pico32xNativePal: strh lr, [r0], #2 .else streqh r7, [r0] - cmp r3, lr, lsl #26 + cmp r3, lr, lsl #26 @ MD pixel 1 has bg? streqh r7, [r0, #2] add r0, r0, #4 .endif diff --git a/pico/32x/memory.c b/pico/32x/memory.c index 30d0e4d5..6a3b2222 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -398,9 +398,6 @@ static void p32x_reg_write8(u32 a, u32 d) p32x_sh2_poll_event(&sh2s[1], SH2_STATE_CPOLL, cycles); comreg = 1 << (a & 0x0f) / 2; Pico32x.comm_dirty |= comreg; - - if (cycles - (int)msh2.m68krcycles_done > 120) - p32x_sync_sh2s(cycles); return; } } @@ -453,6 +450,9 @@ static void p32x_reg_write16(u32 a, u32 d) int cycles = SekCyclesDone(); int comreg; + if (r[a / 2] == d) + return; + p32x_sync_sh2s(cycles); r[a / 2] = d; @@ -685,7 +685,7 @@ static void p32x_sh2reg_write8(u32 a, u32 d, SH2 *sh2) case 0x3f: return; pwm_write: - p32x_pwm_write16(a & ~1, d, sh2, 0); + p32x_pwm_write16(a & ~1, d, sh2, sh2_cycles_done_m68k(sh2)); return; } diff --git a/pico/32x/memory_arm.S b/pico/32x/memory_arm.S index 90c86ddf..1082c7b7 100644 --- a/pico/32x/memory_arm.S +++ b/pico/32x/memory_arm.S @@ -6,7 +6,7 @@ * See COPYING file in the top-level directory. */ -#include "../pico_int_o32.h" +#include "../pico_int_offs.h" @ 32X bank sizes... TODO this should somehow come from an include file .equ SH2_ROM_SHIFT, 10 @ 0x003fffff @@ -46,92 +46,92 @@ sh2_read8_rom: ldr ip, [r1, #OFS_SH2_p_rom] eor r0, r0, #1 - lsl r0, #SH2_ROM_SHIFT + mov r0, r0, lsl #SH2_ROM_SHIFT ldrb r0, [ip, r0, lsr #SH2_ROM_SHIFT] bx lr sh2_read8_sdram: ldr ip, [r1, #OFS_SH2_p_sdram] eor r0, r0, #1 - lsl r0, #SH2_RAM_SHIFT + mov r0, r0, lsl #SH2_RAM_SHIFT ldrb r0, [ip, r0, lsr #SH2_RAM_SHIFT] bx lr sh2_read8_da: ldr ip, [r1, #OFS_SH2_p_da] eor r0, r0, #1 - lsl r0, #SH2_DA_SHIFT + mov r0, r0, lsl #SH2_DA_SHIFT ldrb r0, [ip, r0, lsr #SH2_DA_SHIFT] bx lr sh2_read8_dram: ldr ip, [r1, #OFS_SH2_p_dram] eor r0, r0, #1 - lsl r0, #SH2_DRAM_SHIFT + mov r0, r0, lsl #SH2_DRAM_SHIFT ldrb r0, [ip, r0, lsr #SH2_DRAM_SHIFT] bx lr sh2_read16_rom: ldr ip, [r1, #OFS_SH2_p_rom] - lsl r0, #SH2_ROM_SHIFT - lsr r0, #SH2_ROM_SHIFT + mov r0, r0, lsl #SH2_ROM_SHIFT + mov r0, r0, lsr #SH2_ROM_SHIFT ldrh r0, [ip, r0] bx lr sh2_read16_sdram: ldr ip, [r1, #OFS_SH2_p_sdram] - lsl r0, #SH2_RAM_SHIFT - lsr r0, #SH2_RAM_SHIFT + mov r0, r0, lsl #SH2_RAM_SHIFT + mov r0, r0, lsr #SH2_RAM_SHIFT ldrh r0, [ip, r0] bx lr sh2_read16_da: ldr ip, [r1, #OFS_SH2_p_da] - lsl r0, #SH2_DA_SHIFT - lsr r0, #SH2_DA_SHIFT + mov r0, r0, lsl #SH2_DA_SHIFT + mov r0, r0, lsr #SH2_DA_SHIFT ldrh r0, [ip, r0] bx lr sh2_read16_dram: ldr ip, [r1, #OFS_SH2_p_dram] - lsl r0, #SH2_DRAM_SHIFT - lsr r0, #SH2_DRAM_SHIFT + mov r0, r0, lsl #SH2_DRAM_SHIFT + mov r0, r0, lsr #SH2_DRAM_SHIFT ldrh r0, [ip, r0] bx lr sh2_read32_rom: ldr ip, [r1, #OFS_SH2_p_rom] - lsl r0, #SH2_ROM_SHIFT + mov r0, r0, lsl #SH2_ROM_SHIFT ldr r0, [ip, r0, lsr #SH2_ROM_SHIFT] - ror r0, r0, #16 + mov r0, r0, ror #16 bx lr sh2_read32_sdram: ldr ip, [r1, #OFS_SH2_p_sdram] - lsl r0, #SH2_RAM_SHIFT + mov r0, r0, lsl #SH2_RAM_SHIFT ldr r0, [ip, r0, lsr #SH2_RAM_SHIFT] - ror r0, r0, #16 + mov r0, r0, ror #16 bx lr sh2_read32_da: ldr ip, [r1, #OFS_SH2_p_da] - lsl r0, #SH2_DA_SHIFT + mov r0, r0, lsl #SH2_DA_SHIFT ldr r0, [ip, r0, lsr #SH2_DA_SHIFT] - ror r0, r0, #16 + mov r0, r0, ror #16 bx lr sh2_read32_dram: ldr ip, [r1, #OFS_SH2_p_dram] - lsl r0, #SH2_DRAM_SHIFT + mov r0, r0, lsl #SH2_DRAM_SHIFT ldr r0, [ip, r0, lsr #SH2_DRAM_SHIFT] - ror r0, r0, #16 + mov r0, r0, ror #16 bx lr sh2_write8_sdram: @ preserve r0 and r2 for tail call ldr ip, [r2, #OFS_SH2_p_sdram] eor r3, r0, #1 - lsl r3, #SH2_RAM_SHIFT + mov r3, r3, lsl #SH2_RAM_SHIFT strb r1, [ip, r3, lsr #SH2_RAM_SHIFT] #ifdef DRC_SH2 ldr ip, [r2, #OFS_SH2_p_drcblk_ram] @@ -148,7 +148,7 @@ sh2_write8_da: @ preserve r0 and r2 for tail call ldr ip, [r2, #OFS_SH2_p_da] eor r3, r0, #1 - lsl r3, #SH2_DA_SHIFT + mov r3, r3, lsl #SH2_DA_SHIFT strb r1, [ip, r3, lsr #SH2_DA_SHIFT] #ifdef DRC_SH2 ldr ip, [r2, #OFS_SH2_p_drcblk_da] @@ -165,15 +165,15 @@ sh2_write8_dram: tst r1, #0xff ldrne ip, [r2, #OFS_SH2_p_dram] eorne r3, r0, #1 - lslne r3, #SH2_DRAM_SHIFT + movne r3, r3, lsl #SH2_DRAM_SHIFT strneb r1, [ip, r3, lsr #SH2_DRAM_SHIFT] bx lr sh2_write16_sdram: @ preserve r0 and r2 for tail call ldr ip, [r2, #OFS_SH2_p_sdram] - lsl r3, r0, #SH2_RAM_SHIFT - lsr r3, r3, #SH2_RAM_SHIFT + mov r3, r0, lsl #SH2_RAM_SHIFT + mov r3, r3, lsr #SH2_RAM_SHIFT strh r1, [ip, r3] #ifdef DRC_SH2 ldr ip, [r2, #OFS_SH2_p_drcblk_ram] @@ -188,8 +188,8 @@ sh2_write16_sdram: sh2_write16_da: @ preserve r0 and r2 for tail call ldr ip, [r2, #OFS_SH2_p_da] - lsl r3, r0, #SH2_DA_SHIFT - lsr r3, r3, #SH2_DA_SHIFT + mov r3, r0, lsl #SH2_DA_SHIFT + mov r3, r3, lsr #SH2_DA_SHIFT strh r1, [ip, r3] #ifdef DRC_SH2 ldr ip, [r2, #OFS_SH2_p_drcblk_da] @@ -204,23 +204,23 @@ sh2_write16_da: sh2_write16_dram: ldr ip, [r2, #OFS_SH2_p_dram] tst r0, #SH2_DRAM_OW - lsl r3, r0, #SH2_DRAM_SHIFT - lsr r3, r3, #SH2_DRAM_SHIFT + mov r3, r0, lsl #SH2_DRAM_SHIFT + mov r3, r3, lsr #SH2_DRAM_SHIFT streqh r1, [ip, r3] bxeq lr add ip, ip, r3 tst r1, #0xff strneb r1, [ip, #0] tst r1, #0xff00 - lsrne r1, r1, #8 + movne r1, r1, lsr #8 strneb r1, [ip, #1] bx lr sh2_write32_sdram: @ preserve r0 and r2 for tail call ldr ip, [r2, #OFS_SH2_p_sdram] - ror r1, r1, #16 - lsl r3, r0, #SH2_RAM_SHIFT + mov r1, r1, ror #16 + mov r3, r0, lsl #SH2_RAM_SHIFT str r1, [ip, r3, lsr #SH2_RAM_SHIFT] #ifdef DRC_SH2 ldr ip, [r2, #OFS_SH2_p_drcblk_ram] @@ -242,8 +242,8 @@ sh2_write32_sdram: sh2_write32_da: @ preserve r0 and r2 for tail call ldr ip, [r2, #OFS_SH2_p_da] - ror r1, r1, #16 - lsl r3, r0, #SH2_DA_SHIFT + mov r1, r1, ror #16 + mov r3, r0, lsl #SH2_DA_SHIFT str r1, [ip, r3, lsr #SH2_DA_SHIFT] #ifdef DRC_SH2 ldr ip, [r2, #OFS_SH2_p_drcblk_da] @@ -265,13 +265,13 @@ sh2_write32_da: sh2_write32_dram: ldr ip, [r2, #OFS_SH2_p_dram] tst r0, #SH2_DRAM_OW - lsl r3, r0, #SH2_DRAM_SHIFT - roreq r1, r1, #16 + mov r3, r0, lsl #SH2_DRAM_SHIFT + moveq r1, r1, ror #16 streq r1, [ip, r3, lsr #SH2_DRAM_SHIFT] bxeq lr #if 1 ldr r0, [ip, r3, lsr #SH2_DRAM_SHIFT] - ror r1, r1, #16 + mov r1, r1, ror #16 mov r2, #0 tst r1, #0x00ff0000 orrne r2, r2, #0x00ff0000 diff --git a/pico/cd/gfx_dma.c b/pico/cd/gfx_dma.c index 7dfe4bc9..ff93a2dc 100644 --- a/pico/cd/gfx_dma.c +++ b/pico/cd/gfx_dma.c @@ -10,10 +10,6 @@ #include "cell_map.c" -#ifndef UTYPES_DEFINED -typedef unsigned short u16; -#endif - // check: Heart of the alien, jaguar xj 220 PICO_INTERNAL void DmaSlowCell(unsigned int source, unsigned int a, int len, unsigned char inc) { diff --git a/pico/cd/memory_arm.S b/pico/cd/memory_arm.S index 335f3624..04920b62 100644 --- a/pico/cd/memory_arm.S +++ b/pico/cd/memory_arm.S @@ -6,7 +6,7 @@ @* See COPYING file in the top-level directory. @* -#include "../pico_int_o32.h" +#include "../pico_int_offs.h" .equiv PCM_STEP_SHIFT, 11 diff --git a/pico/draw2_arm.S b/pico/draw2_arm.S index 6b110b32..6b094495 100644 --- a/pico/draw2_arm.S +++ b/pico/draw2_arm.S @@ -8,7 +8,7 @@ * this is highly specialized, be careful if changing related C code! */ -#include "pico_int_o32.h" +#include "pico_int_offs.h" @ define these constants in your include file: @ .equiv START_ROW, 1 diff --git a/pico/draw_arm.S b/pico/draw_arm.S index 3bc27033..2efc804c 100644 --- a/pico/draw_arm.S +++ b/pico/draw_arm.S @@ -8,7 +8,7 @@ * this is highly specialized, be careful if changing related C code! */ -#include "pico_int_o32.h" +#include "pico_int_offs.h" .extern DrawStripInterlace diff --git a/pico/memory.h b/pico/memory.h index eb440dd4..d55267ba 100644 --- a/pico/memory.h +++ b/pico/memory.h @@ -2,11 +2,6 @@ #include "pico_port.h" -typedef unsigned char u8; -typedef unsigned short u16; -typedef unsigned int u32; -typedef uintptr_t uptr; // unsigned pointer-sized int - #define M68K_MEM_SHIFT 16 // minimum size we can map #define M68K_BANK_SIZE (1 << M68K_MEM_SHIFT) diff --git a/pico/memory_amips.S b/pico/memory_amips.S index 7ae25922..7932c2c9 100644 --- a/pico/memory_amips.S +++ b/pico/memory_amips.S @@ -8,7 +8,7 @@ # OUT OF DATE -#include "pico_int_o32.h" +#include "pico_int_offs.h" .set noreorder .set noat diff --git a/pico/memory_arm.S b/pico/memory_arm.S index 117cea0b..07d6a128 100644 --- a/pico/memory_arm.S +++ b/pico/memory_arm.S @@ -6,7 +6,7 @@ * See COPYING file in the top-level directory. */ -#include "pico_int_o32.h" +#include "pico_int_offs.h" .equ SRR_MAPPED, (1 << 0) .equ SRR_READONLY, (1 << 1) diff --git a/pico/pico_int.h b/pico/pico_int.h index 13338242..831bfc72 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -33,6 +33,14 @@ extern "C" { #endif +typedef unsigned char u8; +typedef signed char s8; +typedef unsigned short u16; +typedef signed short s16; +typedef unsigned int u32; +typedef signed int s32; +typedef uintptr_t uptr; // unsigned pointer-sized int + // ----------------------- 68000 CPU ----------------------- #ifdef EMU_C68K #include "../cpu/cyclone/Cyclone.h" @@ -427,7 +435,7 @@ struct PicoSound short psg_line; }; -// run tools/mkoffsets pico/pico_int_o32.h if you change these +// run tools/mkoffsets pico/pico_int_offs.h if you change these // careful with savestate compat struct Pico { @@ -905,13 +913,13 @@ void PicoFrame32x(void); void Pico32xStateLoaded(int is_early); void p32x_sync_sh2s(unsigned int m68k_target); void p32x_sync_other_sh2(SH2 *sh2, unsigned int m68k_target); -void p32x_update_irls(SH2 *active_sh2, int m68k_cycles); -void p32x_trigger_irq(SH2 *sh2, int m68k_cycles, unsigned int mask); -void p32x_update_cmd_irq(SH2 *sh2, int m68k_cycles); +void p32x_update_irls(SH2 *active_sh2, unsigned int m68k_cycles); +void p32x_trigger_irq(SH2 *sh2, unsigned int m68k_cycles, unsigned int mask); +void p32x_update_cmd_irq(SH2 *sh2, unsigned int m68k_cycles); void p32x_reset_sh2s(void); void p32x_event_schedule(unsigned int now, enum p32x_event event, int after); void p32x_event_schedule_sh2(SH2 *sh2, enum p32x_event event, int after); -void p32x_schedule_hint(SH2 *sh2, int m68k_cycles); +void p32x_schedule_hint(SH2 *sh2, unsigned int m68k_cycles); // 32x/memory.c extern struct Pico32xMem *Pico32xMem; diff --git a/platform/common/common.mak b/platform/common/common.mak index 2f676abc..b4a5759c 100644 --- a/platform/common/common.mak +++ b/platform/common/common.mak @@ -9,6 +9,7 @@ asm_render = 0 asm_ym2612 = 0 asm_misc = 0 asm_cdmemory = 0 +asm_32xdraw = 0 asm_mix = 0 endif diff --git a/platform/common/memcpy.c b/platform/common/memcpy.c index b99de4ae..1cd74175 100644 --- a/platform/common/memcpy.c +++ b/platform/common/memcpy.c @@ -9,7 +9,7 @@ * to avoid under/overstepping the src region). * * ATTN does dirty aliasing tricks with undefined behaviour by standard. - * (however, this was needed to improve the generated code). + * (however, this improved the generated code). * ATTN uses struct assignment, which only works if the compiler is inlining * this (else it would probably call memcpy :-)). */ @@ -33,22 +33,24 @@ void *memcpy(void *dest, const void *src, size_t n) const int lm = sizeof(uint32_t)-1; /* align src to word */ - while (((unsigned)ss.c & lm) && n > 0) + while (((uintptr_t)ss.c & lm) && n > 0) *ds.c++ = *ss.c++, n--; - if (((unsigned)ds.c & lm) == 0) { + if (((uintptr_t)ds.c & lm) == 0) { /* fast copy if pointers have the same aligment */ - while (n >= sizeof(struct _16)) /* copy 16 bytes blocks */ + while (n >= sizeof(struct _16)) /* copy 16 byte blocks */ *ds.s++ = *ss.s++, n -= sizeof(struct _16); if (n >= sizeof(uint64_t)) /* copy leftover 8 byte block */ *ds.l++ = *ss.l++, n -= sizeof(uint64_t); +// if (n >= sizeof(uint32_t)) /* copy leftover 4 byte block */ +// *ds.i++ = *ss.i++, n -= sizeof(uint32_t); } else if (n >= 2*sizeof(uint32_t)) { /* unaligned data big enough to avoid overstepping src */ uint32_t v1, v2, b, s; /* align dest to word */ - while (((unsigned)ds.c & lm) && n > 0) + while (((uintptr_t)ds.c & lm) && n > 0) *ds.c++ = *ss.c++, n--; /* copy loop: load aligned words and store shifted words */ - b = (unsigned)ss.c & lm, s = b*8; ss.c -= b; + b = (uintptr_t)ss.c & lm, s = b*8; ss.c -= b; v1 = *ss.i++, v2 = *ss.i++; while (n >= 3*sizeof(uint32_t)) { *ds.i++ = (v1 _L_ s) | (v2 _U_ (32-s)); v1 = *ss.i++; @@ -78,28 +80,35 @@ void *memmove (void *dest, const void *src, size_t n) struct _16 { uint32_t a[4]; }; union { const void *v; uint8_t *c; uint32_t *i; uint64_t *l; struct _16 *s; } ss = { src+n }, ds = { dest+n }; + size_t pd = dest > src ? dest - src : src - dest; const int lm = sizeof(uint32_t)-1; if (dest <= src || dest >= src+n) return memcpy(dest, src, n); /* align src to word */ - while (((unsigned)ss.c & lm) && n > 0) + while (((uintptr_t)ss.c & lm) && n > 0) *--ds.c = *--ss.c, n--; - if (((unsigned)ds.c & lm) == 0) { + /* take care not to copy multi-byte data if it overlaps */ + if (((uintptr_t)ds.c & lm) == 0) { /* fast copy if pointers have the same aligment */ - while (n >= sizeof(struct _16)) /* copy 16 byte blocks */ + while (n >= sizeof(struct _16) && pd >= sizeof(struct _16)) + /* copy 16 bytes blocks if no overlap */ *--ds.s = *--ss.s, n -= sizeof(struct _16); - if (n >= sizeof(uint64_t)) /* copy leftover 8 byte block */ + while (n >= sizeof(uint64_t) && pd >= sizeof(uint64_t)) + /* copy leftover 8 byte blocks if no overlap */ *--ds.l = *--ss.l, n -= sizeof(uint64_t); - } else if (n >= 2*sizeof(uint32_t)) { + while (n >= sizeof(uint32_t) && pd >= sizeof(uint32_t)) + /* copy leftover 4 byte blocks if no overlap */ + *--ds.i = *--ss.i, n -= sizeof(uint32_t); + } else if (n >= 2*sizeof(uint32_t) && pd >= 2*sizeof(uint32_t)) { /* unaligned data big enough to avoid understepping src */ uint32_t v1, v2, b, s; /* align dest to word */ - while (((unsigned)ds.c & lm) && n > 0) + while (((uintptr_t)ds.c & lm) && n > 0) *--ds.c = *--ss.c, n--; /* copy loop: load aligned words and store shifted words */ - b = (unsigned)ss.c & lm, s = b*8; ss.c += b; + b = (uintptr_t)ss.c & lm, s = b*8; ss.c += b; v1 = *--ss.i, v2 = *--ss.i; while (n >= 3*sizeof(uint32_t)) { *--ds.i = (v1 _U_ s) | (v2 _L_ (32-s)); v1 = *--ss.i; @@ -114,7 +123,7 @@ void *memmove (void *dest, const void *src, size_t n) } ss.c -= b - 2*sizeof(uint32_t); } - /* copy 0-7 leftover bytes */ + /* copy 0-7 leftover bytes (or upto everything if ptrs are too close) */ while (n >= 4) { *--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--; diff --git a/platform/gp2x/code940/memcpy.s b/platform/gp2x/code940/memcpy.s index 282762fd..1350639a 100644 --- a/platform/gp2x/code940/memcpy.s +++ b/platform/gp2x/code940/memcpy.s @@ -114,14 +114,12 @@ subs r2, r2, #0x14 blt Lmemcpy_fl32 /* less than 32 bytes (12 from above) */ stmdb sp!, {r4, r7, r8, r9, r10} /* borrow r4 */ -/* blat 64 bytes at a time */ +/* blat 32 bytes at a time */ /* XXX for really big copies perhaps we should use more registers */ Lmemcpy_floop32: ldmia r1!, {r3, r4, r7, r8, r9, r10, r12, lr} stmia r0!, {r3, r4, r7, r8, r9, r10, r12, lr} -ldmia r1!, {r3, r4, r7, r8, r9, r10, r12, lr} -stmia r0!, {r3, r4, r7, r8, r9, r10, r12, lr} -subs r2, r2, #0x40 +subs r2, r2, #0x20 bge Lmemcpy_floop32 cmn r2, #0x10 @@ -314,14 +312,12 @@ stmdb sp!, {r4, r7, r8, r9, r10, lr} subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */ blt Lmemcpy_bl32 -/* blat 64 bytes at a time */ +/* blat 32 bytes at a time */ /* XXX for really big copies perhaps we should use more registers */ Lmemcpy_bloop32: ldmdb r1!, {r3, r4, r7, r8, r9, r10, r12, lr} stmdb r0!, {r3, r4, r7, r8, r9, r10, r12, lr} -ldmdb r1!, {r3, r4, r7, r8, r9, r10, r12, lr} -stmdb r0!, {r3, r4, r7, r8, r9, r10, r12, lr} -subs r2, r2, #0x40 +subs r2, r2, #0x20 bge Lmemcpy_bloop32 Lmemcpy_bl32: diff --git a/tools/mkoffsets.sh b/tools/mkoffsets.sh index 13e55495..6d68a1bc 100755 --- a/tools/mkoffsets.sh +++ b/tools/mkoffsets.sh @@ -49,11 +49,8 @@ get_define () # prefix struct member member... echo "const int one = 1;" >/tmp/getoffs.c compile_rodata ENDIAN=$(if [ "$rodata" -eq 1 ]; then echo be; else echo le; fi) -# determine output file -echo "const int vsz = sizeof(void *);" >/tmp/getoffs.c -compile_rodata -fn="${1:-.}/pico_int_o$((8*$rodata)).h" # output header +fn="${1:-.}/pico_int_offs.h" echo "/* autogenerated by mkoffset.sh, do not edit */" >$fn echo "/* target endianess: $ENDIAN, compiled with: $CC $CFLAGS */" >>$fn # output offsets From 47ee54b8732ae897287f92fb076ed0047999e411 Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 25 Apr 2019 18:56:26 +0200 Subject: [PATCH 0186/1110] sh2 drc, reuse blocks if already previously compiled (speedup for Virtua *) --- cpu/sh2/compiler.c | 144 ++++++++++++++++++++++++++++++++++++++------- cpu/sh2/compiler.h | 2 +- 2 files changed, 124 insertions(+), 22 deletions(-) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index f8f64ef6..fa0a6b71 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -300,6 +300,7 @@ struct block_desc { int size; // ..of recompiled insns int size_lit; // ..of (insns+)literal pool u8 *tcache_ptr; // start address of block in cache + u16 crc; // crc of insns and literals u16 active; // actively used or deactivated? struct block_list *list; #if (DRC_DEBUG & 2) @@ -346,6 +347,8 @@ struct block_list { }; struct block_list *blist_free; +static struct block_list *inactive_blocks[TCACHE_BUFFERS]; + // array of pointers to block_lists for RAM and 2 data arrays // each array has len: sizeof(mem) / INVAL_PAGE_SIZE static struct block_list **inval_lookup[TCACHE_BUFFERS]; @@ -691,6 +694,7 @@ static void REGPARM(1) flush_tcache(int tcid) for (i = 0; i < ram_sizes[tcid] / INVAL_PAGE_SIZE; i++) rm_block_list(&inval_lookup[tcid][i]); + rm_block_list(&inactive_blocks[tcid]); } static void add_to_hashlist(struct block_entry *be, int tcache_id) @@ -777,7 +781,7 @@ static void rm_from_hashlist_unresolved(struct block_link *bl, int tcache_id) bl->next->prev = bl->prev; } -static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 nolit); +static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 nolit, int free); static void dr_free_oldest_block(int tcache_id) { struct block_desc *bd; @@ -794,7 +798,7 @@ static void dr_free_oldest_block(int tcache_id) } if (bd->addr && bd->entry_count) - sh2_smc_rm_block_entry(bd, tcache_id, 0); + sh2_smc_rm_block_entry(bd, tcache_id, 0, 1); block_limit[tcache_id]++; if (block_limit[tcache_id] >= block_max_counts[tcache_id]) @@ -926,8 +930,32 @@ static u32 dr_check_nolit(u32 start, u32 end, int tcache_id) return end; } +static struct block_desc *dr_find_inactive_block(int tcache_id, u16 crc, + u32 addr, int size, u32 addr_lit, int size_lit) +{ + struct block_list **head = &inactive_blocks[tcache_id]; + struct block_list *prev = NULL, *current = *head; + + for (; current != NULL; prev = current, current = current->next) { + struct block_desc *block = current->block; + if (block->crc == crc && block->addr == addr && block->size == size && + block->addr_lit == addr_lit && block->size_lit == size_lit) + { + if (prev == NULL) + *head = current->next; + else + prev->next = current->next; + block->list = NULL; // should now be empty + current->next = blist_free; + blist_free = current; + return block; + } + } + return NULL; +} + static struct block_desc *dr_add_block(u32 addr, int size, - u32 addr_lit, int size_lit, int is_slave, int *blk_id) + u32 addr_lit, int size_lit, u16 crc, int is_slave, int *blk_id) { struct block_entry *be; struct block_desc *bd; @@ -951,6 +979,7 @@ static struct block_desc *dr_add_block(u32 addr, int size, bd->addr_lit = addr_lit; bd->size_lit = size_lit; bd->tcache_ptr = tcache_ptr; + bd->crc = crc; bd->active = 1; bd->entry_count = 1; @@ -1074,6 +1103,34 @@ static void dr_link_blocks(struct block_entry *be, int tcache_id) #endif } +static void dr_link_outgoing(struct block_entry *be, int tcache_id, int is_slave) +{ +#if LINK_BRANCHES + struct block_link *bl; + int target_tcache_id; + + for (bl = be->o_links; bl; bl = bl->o_next) { + be = dr_get_entry(bl->target_pc, is_slave, &target_tcache_id); + if (!target_tcache_id || target_tcache_id == tcache_id) { + if (be) { + dbg(2, "- link from %p to pc %08x entry %p", bl->jump, bl->target_pc, be->tcache_ptr); + emith_jump_patch(bl->jump, be->tcache_ptr); + bl->target = be; + bl->prev = NULL; + if (be->links) + be->links->prev = bl; + bl->next = be->links; + be->links = bl; + } else { + emith_jump_patch(bl->jump, sh2_drc_dispatcher); + add_to_hashlist_unresolved(bl, tcache_id); + } + host_instructions_updated(bl->jump, bl->jump+4); + } + } +#endif +} + #define ADD_TO_ARRAY(array, count, item, failcode) { \ if (count >= ARRAY_SIZE(array)) { \ dbg(1, "warning: " #array " overflow"); \ @@ -2442,6 +2499,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) int i, v; u32 u; int op; + u16 crc; base_pc = sh2->pc; @@ -2454,11 +2512,37 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } // initial passes to disassemble and analyze the block - scan_block(base_pc, sh2->is_slave, op_flags, &end_pc, &base_literals, &end_literals); + crc = scan_block(base_pc, sh2->is_slave, op_flags, &end_pc, &base_literals, &end_literals); end_literals = dr_check_nolit(base_literals, end_literals, tcache_id); if (base_literals == end_literals) // map empty lit section to end of code base_literals = end_literals = end_pc; + // if there is already a translated but inactive block, reuse it + block = dr_find_inactive_block(tcache_id, crc, base_pc, end_pc - base_pc, + base_literals, end_literals - base_literals); + + if (block) { + // connect branches + dbg(2, "== %csh2 reuse block %08x-%08x,%08x-%08x -> %p", sh2->is_slave ? 's' : 'm', + base_pc, end_pc, base_literals, end_literals, block->entryp->tcache_ptr); + for (i = 0; i < block->entry_count; i++) { + entry = &block->entryp[i]; + add_to_hashlist(entry, tcache_id); +#if LINK_BRANCHES + // incoming branches + dr_link_blocks(entry, tcache_id); + if (!tcache_id) + dr_link_blocks(entry, sh2->is_slave?2:1); + // outgoing branches + dr_link_outgoing(entry, tcache_id, sh2->is_slave); +#endif + } + // mark memory for overwrite detection + dr_mark_memory(1, block, tcache_id, 0); + block->active = 1; + return block->entryp[0].tcache_ptr; + } + // collect branch_targets that don't land on delay slots for (pc = base_pc, i = 0; pc < end_pc; i++, pc += 2) { if (!(op_flags[i] & OF_BTARGET)) @@ -2480,13 +2564,14 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) #endif block = dr_add_block(base_pc, end_pc - base_pc, base_literals, - end_literals - base_literals, sh2->is_slave, &blkid_main); + end_literals - base_literals, crc, sh2->is_slave, &blkid_main); if (block == NULL) return NULL; block_entry_ptr = tcache_ptr; - dbg(2, "== %csh2 block #%d,%d %08x-%08x -> %p", sh2->is_slave ? 's' : 'm', - tcache_id, blkid_main, base_pc, end_pc, block_entry_ptr); + dbg(2, "== %csh2 block #%d,%d crc %04x %08x-%08x,%08x-%08x -> %p", sh2->is_slave ? 's' : 'm', + tcache_id, blkid_main, crc, base_pc, end_pc, base_literals, end_literals, block_entry_ptr); + // clear stale state after compile errors rcache_invalidate(); @@ -4054,7 +4139,7 @@ static void sh2_generate_utils(void) #endif } -static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 nolit) +static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 nolit, int free) { struct block_link *bl; u32 i; @@ -4066,6 +4151,7 @@ static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 nol dbg(1, " killing dead block!? %08x", bd->addr); return; } + free = free || nolit; // block is invalid if literals are overwritten // remove from hash table, make incoming links unresolved, revoke outgoing links for (i = 0; i < bd->entry_count; i++) { @@ -4073,7 +4159,6 @@ static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 nol rm_from_hashlist(&bd->entryp[i], tcache_id); for (bl = bd->entryp[i].o_links; bl != NULL; ) { - struct block_link *bl_next = bl->o_next; if (bl->target) { if (bl->prev) bl->prev->next = bl->next; @@ -4084,13 +4169,8 @@ static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 nol bl->target = NULL; } else if (bd->active) rm_from_hashlist_unresolved(bl, tcache_id); - // free bl - bl->jump = NULL; - bl->next = blink_free[bl->tcache_id]; - blink_free[bl->tcache_id] = bl; - bl = bl_next; + bl = bl->o_next; } - bd->entryp[i].o_links = NULL; for (bl = bd->entryp[i].links; bl != NULL; ) { struct block_link *bl_next = bl->next; @@ -4108,10 +4188,21 @@ static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 nol if (bd->active) dr_mark_memory(-1, bd, tcache_id, nolit); - bd->addr = bd->size = bd->addr_lit = bd->size_lit = 0; - bd->entry_count = 0; + if (free) { + while ((bl = bd->entryp[0].o_links) != NULL) { + bd->entryp[0].o_links = bl->next; + bl->jump = NULL; + bl->next = blink_free[bl->tcache_id]; + blink_free[bl->tcache_id] = bl; + } + bd->entryp[0].o_links = NULL; + rm_from_block_lists(bd); + bd->addr = bd->size = bd->addr_lit = bd->size_lit = 0; + bd->entry_count = 0; + } else { + add_to_block_list(&inactive_blocks[tcache_id], bd); + } bd->active = 0; - rm_from_block_lists(bd); } static void sh2_smc_rm_blocks(u32 a, int tcache_id, u32 shift) @@ -4142,7 +4233,7 @@ static void sh2_smc_rm_blocks(u32 a, int tcache_id, u32 shift) { dbg(2, "smc remove @%08x", a); end_addr = (start_lit <= a && block->size_lit ? a : 0); - sh2_smc_rm_block_entry(block, tcache_id, end_addr); + sh2_smc_rm_block_entry(block, tcache_id, end_addr, 0); #if (DRC_DEBUG & 2) removed = 1; #endif @@ -4546,7 +4637,7 @@ static void *dr_get_pc_base(u32 pc, int is_slave) return (char *)ret - (pc & ~mask); } -void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, +u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, u32 *base_literals_out, u32 *end_literals_out) { u16 *dr_pc_base; @@ -4558,6 +4649,7 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, int next_is_delay = 0; int end_block = 0; int i, i_end; + u32 crc = 0; memset(op_flags, 0, sizeof(*op_flags) * BLOCK_INSN_LIMIT); op_flags[0] |= OF_BTARGET; // block start is always a target @@ -5346,8 +5438,9 @@ end: // 2nd pass: some analysis lowest_literal = end_literals = lowest_mova = 0; - for (i = 0; i < i_end; i++) { + for (i = 0, pc = base_pc; i < i_end; i++, pc += 2) { opd = &ops[i]; + crc += FETCH_OP(pc); // propagate T (TODO: DIV0U) if ((opd->op == OP_SETCLRT && !opd->imm) || opd->op == OP_BRANCH_CT) @@ -5427,11 +5520,20 @@ end: if (lowest_literal >= end_literals) lowest_literal = end_literals; + if (lowest_literal && end_literals) + for (pc = lowest_literal; pc < end_literals; pc += 2) + crc += FETCH_OP(pc); + *end_pc_out = end_pc; if (base_literals_out != NULL) *base_literals_out = (lowest_literal ?: end_pc); if (end_literals_out != NULL) *end_literals_out = (end_literals ?: end_pc); + + // crc overflow handling, twice to collect all overflows + crc = (crc & 0xffff) + (crc >> 16); + crc = (crc & 0xffff) + (crc >> 16); + return crc; } // vim:shiftwidth=2:ts=2:expandtab diff --git a/cpu/sh2/compiler.h b/cpu/sh2/compiler.h index 36dfd945..07e76cca 100644 --- a/cpu/sh2/compiler.h +++ b/cpu/sh2/compiler.h @@ -22,7 +22,7 @@ void sh2_drc_frame(void); #define OF_T_CLEAR (1 << 3) // ... clear #define OF_B_IN_DS (1 << 4) -void scan_block(unsigned int base_pc, int is_slave, +unsigned short scan_block(unsigned int base_pc, int is_slave, unsigned char *op_flags, unsigned int *end_pc, unsigned int *base_literals, unsigned int *end_literals); From 83bafe8e0b62ab02850011c443c1086d61e96d71 Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 25 Apr 2019 18:57:18 +0200 Subject: [PATCH 0187/1110] add literal pool to sh2 drc (for armv[456] without MOVT/W) --- cpu/drc/emit_arm.c | 125 ++++++++++++++++++++++++++++++------- cpu/drc/emit_x86.c | 3 + cpu/sh2/compiler.c | 10 ++- pico/carthw/svp/compiler.c | 1 + 4 files changed, 114 insertions(+), 25 deletions(-) diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index 4744b127..d8674a03 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -261,13 +261,30 @@ #define EOP_MOVT(rd,imm) \ EMIT(0xe3400000 | ((rd)<<12) | (((imm)>>16)&0xfff) | (((imm)>>12)&0xf0000)) -static int count_bits(unsigned val) +static inline int count_bits(unsigned val) { - val = (val & 0x55555555) + ((val >> 1) & 0x55555555); + val = val - ((val >> 1) & 0x55555555); val = (val & 0x33333333) + ((val >> 2) & 0x33333333); - val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); - val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff); - return (val & 0xffff) + (val >> 16); + return (((val + (val >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24; +} + +// host literal pool; must be significantly smaller than 1024 (max LDR offset = 4096) +#define MAX_HOST_LITERALS 128 +static u32 literal_pool[MAX_HOST_LITERALS]; +static u32 *literal_insn[MAX_HOST_LITERALS]; +static int literal_pindex, literal_iindex; + +static int emith_pool_literal(u32 imm, int *offs) +{ + int idx = literal_pindex - 8; // max look behind in pool + // see if one of the last literals was the same (or close enough) + for (idx = (idx < 0 ? 0 : idx); idx < literal_pindex; idx++) + if (abs((int)(imm - literal_pool[idx])) <= 0xff) + break; + if (idx == literal_pindex) // store new literal + literal_pool[literal_pindex++] = imm; + *offs = imm - literal_pool[idx]; + return idx; } // XXX: RSB, *S will break if 1 insn is not enough @@ -275,6 +292,7 @@ static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int { int ror2; u32 v; + int i; switch (op) { case A_OP_MOV: @@ -284,19 +302,48 @@ static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int imm = ~imm; op = A_OP_MVN; } -#ifdef HAVE_ARMV7 - for (v = imm, ror2 = 0; v && !(v & 3); v >>= 2) - ror2--; - if (v >> 8) { - /* 2+ insns needed - prefer movw/movt */ + // count insns needed for mov/orr #imm + for (v = imm, ror2 = 0; (v >> 24) && ror2 < 32/2; ror2++) + v = (v << 2) | (v >> 30); + for (i = 2; i > 0; i--, v >>= 8) + while (v > 0xff && !(v & 3)) + v >>= 2; + if (v) { // 3+ insns needed... if (op == A_OP_MVN) imm = ~imm; +#ifdef HAVE_ARMV7 + // ...prefer movw/movt EOP_MOVW(rd, imm); if (imm & 0xffff0000) EOP_MOVT(rd, imm); +#else + // ...emit literal load + int idx, o; + if (literal_iindex >= MAX_HOST_LITERALS) { + elprintf(EL_STATUS|EL_SVP|EL_ANOMALY, + "pool overflow"); + exit(1); + } + idx = emith_pool_literal(imm, &o); + literal_insn[literal_iindex++] = (u32 *)tcache_ptr; + EOP_LDR_IMM2(cond, rd, 15, idx * sizeof(u32)); + if (o > 0) + EOP_C_DOP_IMM(cond, A_OP_ADD, 0, rd, rd, 0, o); + else if (o < 0) + EOP_C_DOP_IMM(cond, A_OP_SUB, 0, rd, rd, 0, -o); +#endif return; } -#endif + break; + + case A_OP_AND: + // AND must fit into 1 insn. if not, use BIC + for (v = imm, ror2 = 0; (v >> 8) && ror2 < 32/2; ror2++) + v = (v << 2) | (v >> 30); + if (v >> 8) { + imm = ~imm; + op = A_OP_BIC; + } break; case A_OP_SUB: @@ -314,20 +361,13 @@ static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int break; } - again: - v = imm, ror2 = 32/2; // arm imm shift is ROR, so rotate for best fit - while ((v >> 24) && !(v & 0xc0)) - v = (v << 2) | (v >> 30), ror2++; + // try to get the topmost byte empty to possibly save an insn + for (v = imm, ror2 = 0; (v >> 24) && ror2 < 32/2; ror2++) + v = (v << 2) | (v >> 30); do { // shift down to get 'best' rot2 while (v > 0xff && !(v & 3)) v >>= 2, ror2--; - // AND must fit into 1 insn. if not, use BIC - if (op == A_OP_AND && v != (v & 0xff)) { - imm = ~imm; - op = A_OP_BIC; - goto again; - } EOP_C_DOP_IMM(cond, op, s, rn, rd, ror2 & 0xf, v & 0xff); switch (op) { @@ -385,6 +425,47 @@ static int emith_xbranch(int cond, void *target, int is_call) return (u32 *)tcache_ptr - start_ptr; } +static void emith_pool_commit(int jumpover) +{ + int i, sz = literal_pindex * sizeof(u32); + u8 *pool = (u8 *)tcache_ptr; + + // nothing to commit if pool is empty + if (sz == 0) + return; + // need branch over pool if not at block end + if (jumpover) { + pool += sizeof(u32); + emith_xbranch(A_COND_AL, (u8 *)pool + sz, 0); + } + // safety check - pool must be after insns and reachable + if ((u32)(pool - (u8 *)literal_insn[0] + 8) > 0xfff) { + elprintf(EL_STATUS|EL_SVP|EL_ANOMALY, + "pool offset out of range"); + exit(1); + } + // copy pool and adjust addresses in insns accessing the pool + memcpy(pool, literal_pool, sz); + for (i = 0; i < literal_iindex; i++) { + *literal_insn[i] += (u8 *)pool - ((u8 *)literal_insn[i] + 8); + } + // count pool constants as insns for statistics + for (i = 0; i < literal_pindex; i++) + COUNT_OP; + + tcache_ptr = (void *)((u8 *)pool + sz); + literal_pindex = literal_iindex = 0; +} + +static inline void emith_pool_check(void) +{ + // check if pool must be committed + if (literal_iindex > MAX_HOST_LITERALS-4 || + (u8 *)tcache_ptr - (u8 *)literal_insn[0] > 0xe00) + // pool full, or displacement is approaching the limit + emith_pool_commit(1); +} + #define JMP_POS(ptr) \ ptr = tcache_ptr; \ tcache_ptr += sizeof(u32) @@ -769,7 +850,7 @@ static int emith_xbranch(int cond, void *target, int is_call) b_ = tmpr; \ } \ op(b_,v_); \ -} while(0) +} while (0) #define emith_ctx_read_multiple(r, offs, count, tmpr) \ emith_ctx_do_multiple(EOP_LDMIA, r, offs, count, tmpr) diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index ce13c618..1ac4ee01 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -1104,3 +1104,6 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; EMITH_SJMP_END(DCOND_EQ); \ EMITH_SJMP_END(DCOND_EQ); \ } while (0) + +#define emith_pool_check() /**/ +#define emith_pool_commit(j) /**/ diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index fa0a6b71..bc63e18b 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -369,7 +369,7 @@ enum { HR_STATIC, // vreg has a static mapping HR_CACHED, // vreg has sh2_reg_e HR_TEMP, // reg used for temp storage -} cach_reg_type; +} cache_reg_type; enum { HRF_DIRTY = 1 << 0, // has "dirty" value to be written to ctx @@ -2569,8 +2569,8 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) return NULL; block_entry_ptr = tcache_ptr; - dbg(2, "== %csh2 block #%d,%d crc %04x %08x-%08x,%08x-%08x -> %p", sh2->is_slave ? 's' : 'm', - tcache_id, blkid_main, crc, base_pc, end_pc, base_literals, end_literals, block_entry_ptr); + dbg(2, "== %csh2 block #%d,%d %08x-%08x,%08x-%08x -> %p", sh2->is_slave ? 's' : 'm', + tcache_id, blkid_main, base_pc, end_pc, base_literals, end_literals, block_entry_ptr); // clear stale state after compile errors @@ -2715,6 +2715,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } #endif + emith_pool_check(); pc += 2; if (skip_op > 0) { @@ -3892,6 +3893,8 @@ end_op: emith_jump_patch(branch_patch_ptr[i], target); } + emith_pool_commit(0); + dr_mark_memory(1, block, tcache_id, 0); tcache_ptrs[tcache_id] = tcache_ptr; @@ -4124,6 +4127,7 @@ static void sh2_generate_utils(void) MAKE_WRITE_WRAPPER(sh2_drc_write32); #endif + emith_pool_commit(0); rcache_invalidate(); #if (DRC_DEBUG & 4) host_dasm_new_symbol(sh2_drc_entry); diff --git a/pico/carthw/svp/compiler.c b/pico/carthw/svp/compiler.c index b31197c2..1ec71e75 100644 --- a/pico/carthw/svp/compiler.c +++ b/pico/carthw/svp/compiler.c @@ -1795,6 +1795,7 @@ void *ssp_translate_block(int pc) tr_flush_dirty_ST(); tr_flush_dirty_pmcrs(); block_end = emit_block_epilogue(ccount, end_cond, jump_pc, pc); + emith_pool_commit(0); if (tcache_ptr - (u32 *)tcache > DRC_TCACHE_SIZE/4) { elprintf(EL_ANOMALY|EL_STATUS|EL_SVP, "tcache overflow!\n"); From 08626dab12e62ba1caf018a739c44073029606b4 Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 25 Apr 2019 19:02:29 +0200 Subject: [PATCH 0188/1110] speed improvement and fixes for 32x ARM asm draw --- pico/32x/draw.c | 5 -- pico/32x/draw_arm.S | 166 ++++++++++++++++++++++++++++++-------------- pico/draw.c | 4 +- platform/gp2x/emu.c | 2 +- tools/mkoffsets.sh | 1 + 5 files changed, 118 insertions(+), 60 deletions(-) diff --git a/pico/32x/draw.c b/pico/32x/draw.c index 4bdbc89a..372f27ef 100644 --- a/pico/32x/draw.c +++ b/pico/32x/draw.c @@ -311,11 +311,6 @@ void PicoDraw32xLayerMdOnly(int offs, int lines) void PicoDrawSetOutFormat32x(pdso_t which, int use_32x_line_mode) { -#ifdef _ASM_32X_DRAW - extern void *Pico32xNativePal; - Pico32xNativePal = Pico32xMem->pal_native; -#endif - if (which == PDF_RGB555) { // need CLUT pixels in PicoDraw2FB for layer transparency PicoDrawSetInternalBuf(Pico.est.Draw2FB, 328); diff --git a/pico/32x/draw_arm.S b/pico/32x/draw_arm.S index c59fa8f5..e0cdcbe5 100644 --- a/pico/32x/draw_arm.S +++ b/pico/32x/draw_arm.S @@ -13,12 +13,6 @@ .equiv P32XV_PRI, (1<< 7) -.bss -.align 2 -.global Pico32xNativePal -Pico32xNativePal: - .word 0 - .text .align 2 @@ -82,8 +76,8 @@ Pico32xNativePal: mov r3, r3, lsl #26 @ mdbg << 26 mla r11,r4,r5,r11 @ r11 = pmd = PicoDraw2FB + offs*328: md data tst r10,#P32XV_PRI - moveq r10,#0 - movne r10,#0x8000 @ r10 = inv_bit + movne r10,#0 + moveq r10,#0x8000 @ r10 = inv_bit call_scan_prep \call_scan lr mov r4, #0 @ line @@ -92,7 +86,6 @@ Pico32xNativePal: 0: @ loop_outer: call_scan_end \call_scan add r4, r4, #1 - sub r11,r11,#1 @ adjust for prev read cmp r4, r2, lsr #16 call_scan_fin_ge \call_scan ldmgefd sp!, {r4-r11,pc} @@ -106,31 +99,86 @@ Pico32xNativePal: add r5, r1, r12, lsl #1 @ p32x = dram + dram[l] 2: @ loop_inner: - ldrb r7, [r11], #1 @ MD pixel - subs r6, r6, #1 + ldrh r8, [r5], #2 + subs lr, r6, #1 blt 0b @ loop_outer - ldrh r8, [r5], #2 @ 32x pixel - cmp r3, r7, lsl #26 @ MD has bg pixel? - beq 3f @ draw32x - eor r12,r8, r10 - ands r12,r12,#0x8000 @ !((t ^ inv) & 0x8000) -.if \do_md - mov r7, r7, lsl #1 - ldreqh r12,[r9, r7] - streqh r12,[r0], #2 @ *dst++ = palmd[*pmd] -.else - addeq r0, r0, #2 -.endif - beq 2b @ loop_inner -3: @ draw32x: - and r12,r8, #0x03e0 +3: @ loop_innermost: + ldrh r7, [r5], #2 @ 32x pixel + subs lr, lr, #1 + cmpge r7, r8 + beq 3b @ loop_innermost + + sub r5, r5, #2 + add lr, lr, #1 + sub lr, r6, lr + sub r6, r6, lr + + eor r12,r8, r10 + tst r12, #0x8000 @ !((t ^ inv) & 0x8000) + bne 5f @ draw_md + + and r7 ,r8, #0x03e0 mov r8, r8, lsl #11 orr r8, r8, r8, lsr #(10+11) - orr r8, r8, r12,lsl #1 + orr r8, r8, r7 ,lsl #1 + bic r8, r8, #0x0020 @ kill prio bit + + add r11,r11,lr + tst r0, #2 @ dst unaligned? + strneh r8, [r0], #2 + subne lr, lr, #1 + cmp lr, #0 + beq 2b @ loop_inner + mov r8, r8, lsl #16 + orr r12,r8, r8, lsr #16 + mov r8 ,r12 +4: @ draw_32x: + subs lr, lr, #4 @ store 4 pixels + stmgeia r0!, {r8, r12} + bgt 4b @ draw_32x + beq 2b @ loop_inner + adds lr, lr, #2 @ store 1-3 leftover pixels + strge r8, [r0], #4 + strneh r8, [r0], #2 + b 2b @ loop_inner + +5: @ draw_md: + subs lr, lr, #1 + ldrgeb r7, [r11], #1 @ MD pixel + blt 2b @ loop_inner + cmp r3, r7, lsl #26 @ MD has bg pixel? +.if \do_md + mov r7, r7, lsl #1 + ldrneh r7 ,[r9, r7] + strneh r7 ,[r0], #2 @ *dst++ = palmd[*pmd] +.else + addne r0, r0, #2 +.endif + bne 5b @ draw_md + + and r7 ,r8, #0x03e0 + mov r8, r8, lsl #11 + orr r8, r8, r8, lsr #(10+11) + orr r8, r8, r7 ,lsl #1 bic r8, r8, #0x0020 @ kill prio bit strh r8, [r0], #2 @ *dst++ = bgr2rgb(*p32x++) - b 2b @ loop_inner + +6: @ draw_md_32x: + subs lr, lr, #1 + ldrgeb r7, [r11], #1 @ MD pixel + blt 2b @ loop_inner + cmp r3, r7, lsl #26 @ MD has bg pixel? +.if \do_md + mov r7, r7, lsl #1 + ldrneh r7 ,[r9, r7] @ *dst++ = palmd[*pmd] + moveq r7 ,r8 @ *dst++ = bgr2rgb(*p32x++) + strh r7 ,[r0], #2 +.else + streqh r8, [r0] @ *dst++ = bgr2rgb(*p32x++) + add r0, r0, #2 +.endif + b 6b @ draw_md_32x .endm @@ -144,9 +192,11 @@ Pico32xNativePal: stmfd sp!, {r4-r11,lr} ldr lr,=Pico - ldr r10,=Pico32xNativePal + ldr r10,=Pico32xMem + ldr r9,=OFS_PMEM32x_pal_native + ldr r10, [r10] ldr r11, [lr, #OFS_Pico_est+OFS_EST_Draw2FB] - ldr r10,[r10] + add r10,r10,r9 add r9, lr, #OFS_Pico_est+OFS_EST_HighPal @ palmd and r4, r2, #0xff @@ -184,7 +234,7 @@ Pico32xNativePal: ldrneb r8, [r5, #2]! @ r7,r8 - pixel 0,1 index subs r6, r6, #1 blt 0b @ loop_outer - cmp r7, r8 @ is this really improving things? + cmp r7, r8 beq 5f @ check_fill @ +8 3: @ no_fill: @@ -204,11 +254,11 @@ Pico32xNativePal: ldrneh r7, [r9, r12] @ t = palmd[pmd[0]] tst lr, #0x20 ldrneb lr, [r11,#-1] @ MD pixel 1 - strh r7, [r0], #2 cmpne r3, lr, lsl #26 @ MD has bg pixel? mov lr, lr, lsl #1 ldrneh r8, [r9, lr] @ t = palmd[pmd[1]] - strh r8, [r0], #2 + orr r7, r7, r8, lsl #16 @ combine 2 pixels to optimize memory bandwidth + str r7, [r0], #4 @ (no write combining on ARM9) .else streqh r7, [r0] tst lr, #0x20 @@ -219,18 +269,21 @@ Pico32xNativePal: .endif b 2b @ loop_inner -5: @ check_fill +5: @ check_fill: @ count pixels, align if needed bic r12,r5, #1 + ldrh lr ,[r12, #2] @ only do this for at least 4 pixels ldrh r12,[r12] + orr r12,lr,r12, lsl #16 orr lr, r7, r7, lsl #8 + orr lr, lr, lr, lsl #16 cmp r12,lr bne 3b @ no_fill tst r5, #1 sub lr, r5, #2 @ starting r5 (32x render data start) - addeq r5, r5, #2 - addne r5, r5, #1 @ add for the check above + addeq r5, r5, #4 + addne r5, r5, #3 @ add for the check above add r6, r6, #1 @ restore from dec orr r7, r7, r7, lsl #8 6: @@ -240,11 +293,12 @@ Pico32xNativePal: ldrh r12,[r5], #2 bge 7f @ count_done cmp r8, r7 + subne r5, r5, #2 @ undo readahead cmpeq r12,r7 beq 6b -7: @ count_done - sub r5, r5, #4 @ undo readahead +7: @ count_done: + sub r5, r5, #2 @ undo readahead @ fix alignment and check type sub r8, r5, lr @@ -262,11 +316,15 @@ Pico32xNativePal: beq 9f @ bg_mode add r11,r11,r8 -8: - subs r8, r8, #2 - strgeh r7, [r0], #2 - strgeh r7, [r0], #2 - bgt 8b + orr r12,r7, r7, lsl #16 + mov r7 ,r12 +8: @ 32x_loop: + subs r8, r8, #4 @ store 4 pixels + stmgeia r0!, {r7, r12} + bgt 8b @ 32x_loop + beq 2b @ loop_inner + adds r8, r8, #2 + strge r7, [r0], #4 @ store 2 leftover pixels b 2b @ loop_inner 9: @ bg_mode: @@ -281,8 +339,8 @@ Pico32xNativePal: mov lr, lr, lsl #1 ldrneh lr, [r9, lr] moveq lr, r7 - strh r12,[r0], #2 - strh lr, [r0], #2 + orr r12,r12,lr, lsl #16 @ combine 2 pixels to optimize memory bandwidth + str r12,[r0], #4 @ (no write combining on ARM9) .else streqh r7, [r0] cmp r3, lr, lsl #26 @ MD pixel 1 has bg? @@ -303,9 +361,11 @@ Pico32xNativePal: stmfd sp!, {r4-r11,lr} ldr lr,=Pico - ldr r10,=Pico32xNativePal + ldr r10,=Pico32xMem + ldr r9,=OFS_PMEM32x_pal_native + ldr r10, [r10] ldr r11, [lr, #OFS_Pico_est+OFS_EST_Draw2FB] - ldr r10,[r10] + add r10,r10,r9 add r9, lr, #OFS_Pico_est+OFS_EST_HighPal @ palmd and r4, r2, #0xff @@ -320,7 +380,6 @@ Pico32xNativePal: 0: @ loop_outer: call_scan_end \call_scan add r4, r4, #1 - sub r11,r11,#1 @ adjust for prev read cmp r4, r2, lsr #16 call_scan_fin_ge \call_scan ldmgefd sp!, {r4-r11,pc} @@ -341,13 +400,13 @@ Pico32xNativePal: eor lr, lr, #0x20 3: @ loop_innermost: - ldrb r7, [r11], #1 @ MD pixel subs r6, r6, #1 + ldrgeb r7, [r11], #1 @ MD pixel blt 0b @ loop_outer - cmp r3, r7, lsl #26 @ MD has bg pixel? - mov r7, r7, lsl #1 - tstne lr, #0x20 + tst lr, #0x20 + cmpne r3, r7, lsl #26 @ MD has bg pixel? .if \do_md + mov r7, r7, lsl #1 ldrneh r12,[r9, r7] @ t = palmd[*pmd] streqh lr, [r0], #2 strneh r12,[r0], #2 @ *dst++ = t @@ -365,15 +424,18 @@ make_do_loop_dc do_loop_dc, 0, 0 make_do_loop_dc do_loop_dc_md, 0, 1 make_do_loop_dc do_loop_dc_scan, 1, 0 make_do_loop_dc do_loop_dc_scan_md, 1, 1 +.pool make_do_loop_pp do_loop_pp, 0, 0 make_do_loop_pp do_loop_pp_md, 0, 1 make_do_loop_pp do_loop_pp_scan, 1, 0 make_do_loop_pp do_loop_pp_scan_md, 1, 1 +.pool make_do_loop_rl do_loop_rl, 0, 0 make_do_loop_rl do_loop_rl_md, 0, 1 make_do_loop_rl do_loop_rl_scan, 1, 0 make_do_loop_rl do_loop_rl_scan_md, 1, 1 +.pool @ vim:filetype=armasm diff --git a/pico/draw.c b/pico/draw.c index e345a28d..4834d6bf 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -1364,8 +1364,8 @@ static void FinalizeLine8bit(int sh, int line, struct PicoEState *est) { // a hack for mid-frame palette changes if (!(est->rendstatus & PDRAW_SONIC_MODE) || line - dirty_line > 4) { - // store a maximum of 3 additional palettes in SonicPal - if (est->SonicPalCount < 3) + // store a maximum of 2 additional palettes in SonicPal + if (est->SonicPalCount < 2) est->SonicPalCount ++; dirty_line = line; est->rendstatus |= PDRAW_SONIC_MODE; diff --git a/platform/gp2x/emu.c b/platform/gp2x/emu.c index 450ac080..4ad90b83 100644 --- a/platform/gp2x/emu.c +++ b/platform/gp2x/emu.c @@ -328,7 +328,7 @@ static int make_local_pal_md(int fast_mode) localPal[0xe0] = 0x00000000; // reserved pixels for OSD localPal[0xf0] = 0x00ffffff; - if (Pico.m.dirtyPal == 2) + if (Pico.m.dirtyPal == 2) Pico.m.dirtyPal = 0; return pallen; } diff --git a/tools/mkoffsets.sh b/tools/mkoffsets.sh index 6d68a1bc..461fbfa7 100755 --- a/tools/mkoffsets.sh +++ b/tools/mkoffsets.sh @@ -84,6 +84,7 @@ get_define OFS_EST_ PicoEState HighPal ; echo "$line" >>$fn get_define OFS_PMEM_ PicoMem vram ; echo "$line" >>$fn get_define OFS_PMEM_ PicoMem vsram ; echo "$line" >>$fn +get_define OFS_PMEM32x_ Pico32xMem pal_native ; echo "$line" >>$fn get_define OFS_SH2_ SH2_ is_slave ; echo "$line" >>$fn get_define OFS_SH2_ SH2_ p_bios ; echo "$line" >>$fn From ed7e9150781e0d5c0f3a95a4910963ea821fbdf4 Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 25 Apr 2019 19:03:58 +0200 Subject: [PATCH 0189/1110] sh2 drc, improved constant handling and register allocator --- cpu/sh2/compiler.c | 283 ++++++++++++++++++++++++++++----------------- 1 file changed, 174 insertions(+), 109 deletions(-) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index bc63e18b..cd85b373 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -395,10 +395,10 @@ enum { } guest_reg_flags; typedef struct { - u16 flags; // guest flags: is constant, is dirty? + u8 flags; // guest flags: is constant, is dirty? s8 sreg; // cache reg for static mapping s8 vreg; // cache_reg this is currently mapped to, -1 if not mapped - u32 val; // value if this is constant + s8 cnst; // const index if this is constant } guest_reg_t; @@ -1153,7 +1153,7 @@ static int find_in_array(u32 *array, size_t size, u32 what) // NB rcache allocation dependencies: // - get_reg_arg/get_tmp_arg first (might evict other regs just allocated) -// - get_reg(..., NULL) before get_reg(..., &x) if it might get the same reg +// - get_reg(..., NULL) before get_reg(..., &hr) if it might get the same reg // - get_reg(..., RC_GR_READ/RMW, ...) before WRITE (might evict needed reg) // register cache / constant propagation stuff @@ -1163,7 +1163,15 @@ typedef enum { RC_GR_RMW, } rc_gr_mode; +typedef struct { + u32 gregs; + u32 val; +} gconst_t; + +gconst_t gconsts[ARRAY_SIZE(guest_regs)]; + static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr); +static void rcache_add_vreg_alias(int x, sh2_reg_e r); static void rcache_remove_vreg_alias(int x, sh2_reg_e r); #define RCACHE_DUMP(msg) { \ @@ -1185,101 +1193,6 @@ static void rcache_remove_vreg_alias(int x, sh2_reg_e r); } \ } -#if PROPAGATE_CONSTANTS -static void gconst_set(sh2_reg_e r, u32 val) -{ - guest_regs[r].flags |= GRF_CONST; - guest_regs[r].val = val; -} - -static void gconst_new(sh2_reg_e r, u32 val) -{ - gconst_set(r, val); - guest_regs[r].flags |= GRF_CDIRTY; - - // throw away old r that we might have cached - if (guest_regs[r].vreg >= 0) - rcache_remove_vreg_alias(guest_regs[r].vreg, r); -} - -static void gconst_copy(sh2_reg_e rd, sh2_reg_e rs) -{ - guest_regs[rd].flags &= ~(GRF_CONST|GRF_CDIRTY); - if (guest_regs[rs].flags & GRF_CONST) - gconst_set(rd, guest_regs[rs].val); -} -#endif - -static int gconst_get(sh2_reg_e r, u32 *val) -{ - if (guest_regs[r].flags & GRF_CONST) { - *val = guest_regs[r].val; - return 1; - } - return 0; -} - -static int gconst_check(sh2_reg_e r) -{ - if (guest_regs[r].flags & (GRF_CONST|GRF_CDIRTY)) - return 1; - return 0; -} - -// update hr if dirty, else do nothing -static int gconst_try_read(int hr, sh2_reg_e r) -{ - if (guest_regs[r].flags & GRF_CDIRTY) { - emith_move_r_imm(hr, guest_regs[r].val); - guest_regs[r].flags &= ~GRF_CDIRTY; - return 1; - } - return 0; -} - -static u32 gconst_dirty_mask(void) -{ - u32 mask = 0; - int i; - - for (i = 0; i < ARRAY_SIZE(guest_regs); i++) - if (guest_regs[i].flags & GRF_CDIRTY) - mask |= (1 << i); - return mask; -} - -static void gconst_kill(sh2_reg_e r) -{ - guest_regs[r].flags &= ~(GRF_CONST|GRF_CDIRTY); -} - -static void gconst_clean(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(guest_regs); i++) - if (guest_regs[i].flags & GRF_CDIRTY) { - // using RC_GR_READ here: it will call gconst_try_read, - // cache the reg and mark it dirty. - rcache_get_reg_(i, RC_GR_READ, 0, NULL); - } -} - -static void gconst_invalidate(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(guest_regs); i++) - guest_regs[i].flags &= ~(GRF_CONST|GRF_CDIRTY); -} - -static u16 rcache_counter; -static u32 rcache_static; -static u32 rcache_locked; -static u32 rcache_hint_soon; -static u32 rcache_hint_late; -#define rcache_hint (rcache_hint_soon|rcache_hint_late) - // binary search approach, since we don't have CLZ on ARM920T #define FOR_ALL_BITS_SET_DO(mask, bit, code) { \ u32 __mask = mask; \ @@ -1300,6 +1213,142 @@ static u32 rcache_hint_late; } \ } +#if PROPAGATE_CONSTANTS +static inline int gconst_alloc(sh2_reg_e r) +{ + int i, n = -1; + + for (i = 0; i < ARRAY_SIZE(gconsts); i++) { + if (gconsts[i].gregs & (1 << r)) + gconsts[i].gregs &= ~(1 << r); + if (gconsts[i].gregs == 0 && n < 0) + n = i; + } + if (n >= 0) + gconsts[n].gregs = (1 << r); + else + exit(1); // cannot happen - more constants than guest regs? + return n; +} + +static void gconst_set(sh2_reg_e r, u32 val) +{ + int i = gconst_alloc(r); + + guest_regs[r].flags |= GRF_CONST; + guest_regs[r].cnst = i; + gconsts[i].val = val; +} + +static void gconst_new(sh2_reg_e r, u32 val) +{ + gconst_set(r, val); + guest_regs[r].flags |= GRF_CDIRTY; + + // throw away old r that we might have cached + if (guest_regs[r].vreg >= 0) + rcache_remove_vreg_alias(guest_regs[r].vreg, r); +} + +static void gconst_copy(sh2_reg_e rd, sh2_reg_e rs) +{ + if (guest_regs[rd].flags & GRF_CONST) { + guest_regs[rd].flags &= ~(GRF_CONST|GRF_CDIRTY); + gconsts[guest_regs[rd].cnst].gregs &= ~(1 << rd); + } + if (guest_regs[rs].flags & GRF_CONST) { + guest_regs[rd].flags |= GRF_CONST; + guest_regs[rd].cnst = guest_regs[rs].cnst; + gconsts[guest_regs[rd].cnst].gregs |= (1 << rd); + } +} +#endif + +static int gconst_get(sh2_reg_e r, u32 *val) +{ + if (guest_regs[r].flags & GRF_CONST) { + *val = gconsts[guest_regs[r].cnst].val; + return 1; + } + return 0; +} + +static int gconst_check(sh2_reg_e r) +{ + if (guest_regs[r].flags & (GRF_CONST|GRF_CDIRTY)) + return 1; + return 0; +} + +// update hr if dirty, else do nothing +static int gconst_try_read(int vreg, sh2_reg_e r) +{ + int i, x; + if (guest_regs[r].flags & GRF_CDIRTY) { + x = guest_regs[r].cnst; + emith_move_r_imm(cache_regs[vreg].hreg, gconsts[x].val); + FOR_ALL_BITS_SET_DO(gconsts[x].gregs, i, + { + if (guest_regs[i].vreg >= 0 && i != r) + rcache_remove_vreg_alias(guest_regs[i].vreg, i); + rcache_add_vreg_alias(vreg, i); + guest_regs[i].flags &= ~GRF_CDIRTY; + guest_regs[i].flags |= GRF_DIRTY; + }); + return 1; + } + return 0; +} + +static u32 gconst_dirty_mask(void) +{ + u32 mask = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(guest_regs); i++) + if (guest_regs[i].flags & GRF_CDIRTY) + mask |= (1 << i); + return mask; +} + +static void gconst_kill(sh2_reg_e r) +{ + if (guest_regs[r].flags &= ~(GRF_CONST|GRF_CDIRTY)) + gconsts[guest_regs[r].cnst].gregs &= ~(1 << r); + guest_regs[r].flags &= ~(GRF_CONST|GRF_CDIRTY); +} + +static void gconst_clean(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(guest_regs); i++) + if (guest_regs[i].flags & GRF_CDIRTY) { + // using RC_GR_READ here: it will call gconst_try_read, + // cache the reg and mark it dirty. + rcache_get_reg_(i, RC_GR_READ, 0, NULL); + } +} + +static void gconst_invalidate(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(guest_regs); i++) { + if (guest_regs[i].flags & (GRF_CONST|GRF_CDIRTY)) + gconsts[guest_regs[i].cnst].gregs &= ~(1 << i); + guest_regs[i].flags &= ~(GRF_CONST|GRF_CDIRTY); + } +} + +static u16 rcache_counter; +static u32 rcache_static; +static u32 rcache_locked; +static u32 rcache_hint_soon; +static u32 rcache_hint_late; +static u32 rcache_hint_write; +#define rcache_hint (rcache_hint_soon|rcache_hint_late) + static void rcache_unmap_vreg(int x) { int i; @@ -1328,8 +1377,7 @@ static void rcache_clean_vreg(int x) rcache_unmap_vreg(guest_regs[r].sreg); emith_move_r_r(cache_regs[guest_regs[r].sreg].hreg, cache_regs[guest_regs[r].vreg].hreg); rcache_remove_vreg_alias(x, r); - cache_regs[guest_regs[r].sreg].gregs = (1 << r); - guest_regs[r].vreg = guest_regs[r].sreg; + rcache_add_vreg_alias(guest_regs[r].sreg, r); } else { // must evict since sreg is locked emith_ctx_write(cache_regs[x].hreg, r * 4); @@ -1343,6 +1391,12 @@ static void rcache_clean_vreg(int x) } } +static void rcache_add_vreg_alias(int x, sh2_reg_e r) +{ + cache_regs[x].gregs |= (1 << r); + guest_regs[r].vreg = x; +} + static void rcache_remove_vreg_alias(int x, sh2_reg_e r) { cache_regs[x].gregs &= ~(1 << r); @@ -1396,9 +1450,12 @@ static cache_reg_t *rcache_evict(void) else if (rcache_hint_late & cache_regs[i].gregs) // REGs needed in some future insn i_prio = 3; - else + else if ((rcache_hint_write & cache_regs[i].gregs) != cache_regs[i].gregs) // REGs not needed soon i_prio = 4; + else + // REGs soon overwritten anyway + i_prio = 5; if (prio < i_prio || (prio == i_prio && cache_regs[i].stamp < min_stamp)) { min_stamp = cache_regs[i].stamp; @@ -1549,6 +1606,7 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr h = guest_regs[r].sreg; rcache_evict_vreg(h); tr = &cache_regs[h]; + tr->gregs = 1 << r; if (i >= 0) { if (mode != RC_GR_WRITE) { if (hr) @@ -1559,14 +1617,13 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr } rcache_remove_vreg_alias(guest_regs[r].vreg, r); } else if (mode != RC_GR_WRITE) { - if (gconst_try_read(tr->hreg, r)) { + if (gconst_try_read(h, r)) { tr->flags |= HRF_DIRTY; guest_regs[r].flags |= GRF_DIRTY; } else emith_ctx_read(tr->hreg, r * 4); } guest_regs[r].vreg = guest_regs[r].sreg; - tr->gregs = 1 << r; goto end; } else if (i >= 0) { if (mode == RC_GR_READ || !(cache_regs[i].gregs & ~(1 << r))) { @@ -1608,7 +1665,7 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr guest_regs[r].vreg = tr - cache_regs; if (mode != RC_GR_WRITE) { - if (gconst_try_read(tr->hreg, r)) { + if (gconst_try_read(guest_regs[r].vreg, r)) { tr->flags |= HRF_DIRTY; guest_regs[r].flags |= GRF_DIRTY; } else if (split >= 0) { @@ -1747,7 +1804,7 @@ static int rcache_get_reg_arg(int arg, sh2_reg_e r, int *hr) srcr = dstr; if (rcache_static & (1 << r)) srcr = rcache_get_reg_(r, RC_GR_READ, 0, NULL); - else if (gconst_try_read(srcr, r)) + else if (gconst_try_read(guest_regs[r].vreg, r)) dirty = 1; else emith_ctx_read(srcr, r * 4); @@ -1780,8 +1837,10 @@ static int rcache_get_reg_arg(int arg, sh2_reg_e r, int *hr) emith_move_r_r(dstr, srcr); } else if (hr != NULL) { // caller will modify arg, so it will soon be out of sync with r - if (dirty || src_dirty) + if (dirty || src_dirty) { emith_ctx_write(dstr, r * 4); // must clean since arg will be modified + guest_regs[r].flags &= ~GRF_DIRTY; + } } else if (guest_regs[r].vreg < 0) { // keep arg as vreg for r cache_regs[dstid].type = HR_CACHED; @@ -1909,6 +1968,11 @@ static inline void rcache_set_hint_late(u32 mask) rcache_hint_late = mask & ~rcache_static; } +static inline void rcache_set_hint_write(u32 mask) +{ + rcache_hint_write = mask & ~rcache_static; +} + static inline int rcache_is_hinted(sh2_reg_e r) { // consider static REGs as always hinted, since they are always there @@ -2038,7 +2102,7 @@ static void rcache_invalidate(void) } rcache_counter = 0; - rcache_hint_soon = rcache_hint_late = 0; + rcache_hint_soon = rcache_hint_late = rcache_hint_write = 0; gconst_invalidate(); } @@ -2155,10 +2219,9 @@ static void emit_move_r_r(sh2_reg_e dst, sh2_reg_e src) if (guest_regs[dst].vreg >= 0) rcache_remove_vreg_alias(guest_regs[dst].vreg, dst); // make dst an alias of src - cache_regs[i].gregs |= (1 << dst); + rcache_add_vreg_alias(i, dst); cache_regs[i].flags |= HRF_DIRTY; guest_regs[dst].flags |= GRF_DIRTY; - guest_regs[dst].vreg = i; gconst_kill(dst); #if PROPAGATE_CONSTANTS gconst_copy(dst, src); @@ -2772,6 +2835,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) dbg(1, "unhandled delay_dep_bk: %x", delay_dep_bk); rcache_set_hint_soon(0); rcache_set_hint_late(0); + rcache_set_hint_write(0); } else { @@ -2802,6 +2866,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } rcache_set_hint_soon(late); // insns 1-3 rcache_set_hint_late(late & ~soon); // insns 4-9 + rcache_set_hint_write(write & ~(late|soon)); // next access is write } rcache_set_locked(opd[0].source); // try not to evict src regs for this op From aa4c4cb951d3ec16975d2d546c3cb3bbb56e94d2 Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 26 Apr 2019 18:53:21 +0200 Subject: [PATCH 0190/1110] sh2 drc, make B/W read functions signed (reduces generated code size) --- cpu/drc/emit_arm.c | 48 +++++++++++++++++++++++++++--------------- cpu/drc/emit_x86.c | 22 ++++++++++++++++--- cpu/sh2/compiler.c | 34 ++++++++++++------------------ cpu/sh2/compiler.h | 4 ++-- cpu/sh2/mame/sh2.c | 10 ++++----- cpu/sh2/mame/sh2pico.c | 8 +++---- pico/32x/memory.c | 19 +++++++++-------- 7 files changed, 85 insertions(+), 60 deletions(-) diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index d8674a03..586f0a54 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -795,6 +795,8 @@ static inline void emith_pool_check(void) emith_read_r_r_offs_c(cond, r, rs, offs) #define emith_read_r_r_r_c(cond, r, rs, rm) \ EOP_LDR_REG_LSL(cond, r, rs, rm, 0) +#define emith_read_r_r_offs(r, rs, offs) \ + emith_read_r_r_offs_c(A_COND_AL, r, rs, offs) #define emith_read_r_r_r(r, rs, rm) \ EOP_LDR_REG_LSL(A_COND_AL, r, rs, rm, 0) @@ -802,28 +804,37 @@ static inline void emith_pool_check(void) EOP_LDRB_IMM2(cond, r, rs, offs) #define emith_read8_r_r_r_c(cond, r, rs, rm) \ EOP_LDRB_REG_LSL(cond, r, rs, rm, 0) +#define emith_read8_r_r_offs(r, rs, offs) \ + emith_read8_r_r_offs_c(A_COND_AL, r, rs, offs) #define emith_read8_r_r_r(r, rs, rm) \ - EOP_LDRB_REG_LSL(A_COND_AL, r, rs, rm, 0) + emith_read8_r_r_r_c(A_COND_AL, r, rs, rm) #define emith_read16_r_r_offs_c(cond, r, rs, offs) \ EOP_LDRH_IMM2(cond, r, rs, offs) #define emith_read16_r_r_r_c(cond, r, rs, rm) \ EOP_LDRH_REG2(cond, r, rs, rm) -#define emith_read16_r_r_r(r, rs, rm) \ - EOP_LDRH_REG2(A_COND_AL, r, rs, rm) - -#define emith_read_r_r_offs(r, rs, offs) \ - emith_read_r_r_offs_c(A_COND_AL, r, rs, offs) - -#define emith_read8s_r_r_offs(r, rs, offs) \ - EOP_LDRSB_IMM2(A_COND_AL, r, rs, offs) -#define emith_read8_r_r_offs(r, rs, offs) \ - emith_read8_r_r_offs_c(A_COND_AL, r, rs, offs) - -#define emith_read16s_r_r_offs(r, rs, offs) \ - EOP_LDRSH_IMM2(A_COND_AL, r, rs, offs) #define emith_read16_r_r_offs(r, rs, offs) \ emith_read16_r_r_offs_c(A_COND_AL, r, rs, offs) +#define emith_read16_r_r_r(r, rs, rm) \ + emith_read16_r_r_r_c(A_COND_AL, r, rs, rm) + +#define emith_read8s_r_r_offs_c(cond, r, rs, offs) \ + EOP_LDRSB_IMM2(cond, r, rs, offs) +#define emith_read8s_r_r_r_c(cond, r, rs, rm) \ + EOP_LDRSB_REG2(cond, r, rs, rm) +#define emith_read8s_r_r_offs(r, rs, offs) \ + emith_read8s_r_r_offs_c(A_COND_AL, r, rs, offs) +#define emith_read8s_r_r_r(r, rs, rm) \ + emith_read8s_r_r_r_c(A_COND_AL, r, rs, rm) + +#define emith_read16s_r_r_offs_c(cond, r, rs, offs) \ + EOP_LDRSH_IMM2(cond, r, rs, offs) +#define emith_read16s_r_r_r_c(cond, r, rs, rm) \ + EOP_LDRSH_REG2(cond, r, rs, rm) +#define emith_read16s_r_r_offs(r, rs, offs) \ + emith_read16s_r_r_offs_c(A_COND_AL, r, rs, offs) +#define emith_read16s_r_r_r(r, rs, rm) \ + emith_read16s_r_r_r_c(A_COND_AL, r, rs, rm) #define emith_write_r_r_offs_c(cond, r, rs, offs) \ EOP_STR_IMM2(cond, r, rs, offs) @@ -945,6 +956,11 @@ static inline void emith_pool_check(void) #define emith_call(target) \ emith_call_cond(A_COND_AL, target) +#define emith_call_reg(r) { \ + emith_move_r_r(14, 15); \ + EOP_C_BX(A_COND_AL, r); \ +} + #define emith_call_ctx(offs) { \ emith_move_r_r(14, 15); \ emith_jump_ctx(offs); \ @@ -1091,9 +1107,7 @@ static inline void emith_pool_check(void) } while (0) /* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ -#define emith_sh2_macw(ml, mh, rn, rm, sr) do { \ - emith_sext(rn, rn, 16); \ - emith_sext(rm, rm, 16); \ +#define emith_sh2_macw(ml, mh, rn, rm, sr) do { \ emith_tst_r_imm(sr, S); \ EMITH_SJMP2_START(DCOND_NE); \ emith_mula_s64_c(DCOND_EQ, ml, mh, rn, rm); \ diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index 1ac4ee01..5805aadd 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -397,8 +397,12 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_read8_r_r_r_c(cond, r, rs, rm) \ emith_read8_r_r_r(r, rs, rm) +#define emith_read8s_r_r_r_c(cond, r, rs, rm) \ + emith_read8s_r_r_r(r, rs, rm) #define emith_read16_r_r_r_c(cond, r, rs, rm) \ emith_read16_r_r_r(r, rs, rm) +#define emith_read16s_r_r_r_c(cond, r, rs, rm) \ + emith_read16s_r_r_r(r, rs, rm) #define emith_read_r_r_r_c(cond, r, rs, rm) \ emith_read_r_r_r(r, rs, rm) @@ -684,12 +688,24 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; EMIT_SIB(0, rs, rm); /* mov r, [rm + rs * 1] */ \ } while (0) +#define emith_read8s_r_r_r(r, rs, rm) do { \ + EMIT(0x0f, u8); \ + EMIT_OP_MODRM(0xbe, 0, r, 4); \ + EMIT_SIB(0, rs, rm); /* mov r, [rm + rs * 1] */ \ +} while (0) + #define emith_read16_r_r_r(r, rs, rm) do { \ EMIT(0x0f, u8); \ EMIT_OP_MODRM(0xb7, 0, r, 4); \ EMIT_SIB(0, rs, rm); /* mov r, [rm + rs * 1] */ \ } while (0) +#define emith_read16s_r_r_r(r, rs, rm) do { \ + EMIT(0x0f, u8); \ + EMIT_OP_MODRM(0xbf, 0, r, 4); \ + EMIT_SIB(0, rs, rm); /* mov r, [rm + rs * 1] */ \ +} while (0) + #define emith_read_r_r_r(r, rs, rm) do { \ EMIT_OP_MODRM(0x8b, 0, r, 4); \ EMIT_SIB(0, rs, rm); /* mov r, [rm + rs * 1] */ \ @@ -785,9 +801,11 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; EMIT(offs, u32); \ } while (0) -#define emith_push_ret() +#define emith_push_ret() \ + emith_push(xSI); /* to align */ #define emith_pop_and_ret() \ + emith_pop(xSI); \ emith_ret() #define EMITH_JMP_START(cond) { \ @@ -1080,8 +1098,6 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; /* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ #define emith_sh2_macw(ml, mh, rn, rm, sr) do { \ - emith_sext(rn, rn, 16); \ - emith_sext(rm, rm, 16); \ emith_tst_r_imm(sr, S); \ EMITH_SJMP_START(DCOND_EQ); \ /* XXX: MACH should be untouched when S is set? */ \ diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index cd85b373..517be81c 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -2354,17 +2354,15 @@ static int emit_memhandler_read_rr(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rs, u32 off hr2 = hr; else #if REMAP_REGISTER - hr2 = rcache_map_reg(rd, hr, size != 2 ? RC_GR_RMW : RC_GR_WRITE); + hr2 = rcache_map_reg(rd, hr, RC_GR_WRITE); #else hr2 = rcache_get_reg(rd, RC_GR_WRITE, NULL); #endif - if (rd != SHR_TMP && size != 2) { // 16, 8 - emith_sext(hr2, hr, size ? 16 : 8); - } else if (hr != hr2) // 32 + if (hr != hr2) { emith_move_r_r(hr2, hr); - if (hr != hr2) rcache_free_tmp(hr); + } return hr2; } @@ -2422,21 +2420,19 @@ static int emit_indirect_indexed_read(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rx, sh2_ hr = emit_memhandler_read(size); size &= MF_SIZEMASK; - if (rd != SHR_TMP) + if (rd == SHR_TMP) + hr2 = hr; + else #if REMAP_REGISTER - hr2 = rcache_map_reg(rd, hr, size != 2 ? RC_GR_RMW : RC_GR_WRITE); + hr2 = rcache_map_reg(rd, hr, RC_GR_WRITE); #else hr2 = rcache_get_reg(rd, RC_GR_WRITE, NULL); #endif - else - hr2 = hr; - if (rd != SHR_TMP && size != 2) { // 16, 8 - emith_sext(hr2, hr, size ? 16 : 8); - } else if (hr != hr2) // 32 + if (hr != hr2) { emith_move_r_r(hr2, hr); - if (hr != hr2) rcache_free_tmp(hr); + } return hr2; } @@ -2991,16 +2987,14 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } tmp2 = emit_memhandler_read(opd->size); #if REMAP_REGISTER - tmp3 = rcache_map_reg(GET_Rn(), tmp2, opd->size != 2 ? RC_GR_RMW : RC_GR_WRITE); + tmp3 = rcache_map_reg(GET_Rn(), tmp2, RC_GR_WRITE); #else tmp3 = rcache_get_reg(GET_Rn(), RC_GR_WRITE, NULL); #endif - if (opd->size != 2) { - emith_sext(tmp3, tmp2, 16); - } else if (tmp3 != tmp2) + if (tmp3 != tmp2) { emith_move_r_r(tmp3, tmp2); - if (tmp3 != tmp2) rcache_free_tmp(tmp2); + } } goto end_op; @@ -4025,7 +4019,7 @@ static void sh2_generate_utils(void) EMITH_SJMP_START(DCOND_CS); emith_and_r_r_c(DCOND_CC, arg0, arg3); emith_eor_r_imm_c(DCOND_CC, arg0, 1); - emith_read8_r_r_r_c(DCOND_CC, RET_REG, arg0, arg2); + emith_read8s_r_r_r_c(DCOND_CC, RET_REG, arg0, arg2); emith_ret_c(DCOND_CC); EMITH_SJMP_END(DCOND_CS); emith_move_r_r_ptr(arg1, CONTEXT_REG); @@ -4037,7 +4031,7 @@ static void sh2_generate_utils(void) emith_sh2_rcall(arg0, arg1, arg2, arg3); EMITH_SJMP_START(DCOND_CS); emith_and_r_r_c(DCOND_CC, arg0, arg3); - emith_read16_r_r_r_c(DCOND_CC, RET_REG, arg0, arg2); + emith_read16s_r_r_r_c(DCOND_CC, RET_REG, arg0, arg2); emith_ret_c(DCOND_CC); EMITH_SJMP_END(DCOND_CS); emith_move_r_r_ptr(arg1, CONTEXT_REG); diff --git a/cpu/sh2/compiler.h b/cpu/sh2/compiler.h index 07e76cca..d5cde520 100644 --- a/cpu/sh2/compiler.h +++ b/cpu/sh2/compiler.h @@ -44,10 +44,10 @@ unsigned short scan_block(unsigned int base_pc, int is_slave, #define _DRC_DECLARE_SR(SR) __DRC_DECLARE_SR(SR) #define DRC_DECLARE_SR _DRC_DECLARE_SR(DRC_SR_REG) #define DRC_SAVE_SR(sh2) \ - if ((sh2->state & (SH2_STATE_RUN)) == SH2_STATE_RUN) \ + if ((sh2->state & (SH2_STATE_RUN|SH2_STATE_SLEEP)) == SH2_STATE_RUN) \ sh2->sr = sh2_sr; #define DRC_RESTORE_SR(sh2) \ - if ((sh2->state & (SH2_STATE_RUN)) == SH2_STATE_RUN) \ + if ((sh2->state & (SH2_STATE_RUN|SH2_STATE_SLEEP)) == SH2_STATE_RUN) \ sh2_sr = sh2->sr; #else #define DRC_DECLARE_SR diff --git a/cpu/sh2/mame/sh2.c b/cpu/sh2/mame/sh2.c index 2fb964b6..fa49153a 100644 --- a/cpu/sh2/mame/sh2.c +++ b/cpu/sh2/mame/sh2.c @@ -372,7 +372,7 @@ INLINE void BRA(sh2_state *sh2, UINT32 d) #if BUSY_LOOP_HACKS if (disp == -2) { - UINT32 next_opcode = RW( sh2, sh2->ppc & AM ); + UINT32 next_opcode = (UINT32)(UINT16)RW( sh2, sh2->ppc & AM ); /* BRA $ * NOP */ @@ -802,7 +802,7 @@ INLINE void DT(sh2_state *sh2, UINT32 n) sh2->sr &= ~T; #if BUSY_LOOP_HACKS { - UINT32 next_opcode = RW( sh2, sh2->ppc & AM ); + UINT32 next_opcode = (UINT32)(UINT16)RW( sh2, sh2->ppc & AM ); /* DT Rn * BF $-2 */ @@ -1049,12 +1049,12 @@ INLINE void MAC_W(sh2_state *sh2, UINT32 m, UINT32 n) INT32 tempm, tempn, dest, src, ans; UINT32 templ; - tempn = (INT32) RW( sh2, sh2->r[n] ); + tempn = (INT32)(INT16) RW( sh2, sh2->r[n] ); sh2->r[n] += 2; - tempm = (INT32) RW( sh2, sh2->r[m] ); + tempm = (INT32)(INT16) RW( sh2, sh2->r[m] ); sh2->r[m] += 2; templ = sh2->macl; - tempm = ((INT32) (short) tempn * (INT32) (short) tempm); + tempm = (tempn * tempm); if ((INT32) sh2->macl >= 0) dest = 0; else diff --git a/cpu/sh2/mame/sh2pico.c b/cpu/sh2/mame/sh2pico.c index f9d30d77..467b2adc 100644 --- a/cpu/sh2/mame/sh2pico.c +++ b/cpu/sh2/mame/sh2pico.c @@ -121,7 +121,7 @@ int sh2_execute_interpreter(SH2 *sh2, int cycles) if (sh2->delay) { sh2->ppc = sh2->delay; - opcode = RW(sh2, sh2->delay); + opcode = (UINT32)(UINT16)RW(sh2, sh2->delay); // TODO: more branch types if ((opcode >> 13) == 5) { // BRA/BSR @@ -139,7 +139,7 @@ int sh2_execute_interpreter(SH2 *sh2, int cycles) else { sh2->ppc = sh2->pc; - opcode = RW(sh2, sh2->pc); + opcode = (UINT32)(UINT16)RW(sh2, sh2->pc); } sh2->delay = 0; @@ -232,13 +232,13 @@ int sh2_execute_interpreter(SH2 *sh2, int cycles) if (sh2->delay) { sh2->ppc = sh2->delay; - opcode = RW(sh2, sh2->delay); + opcode = (UINT32)(UINT16)RW(sh2, sh2->delay); sh2->pc -= 2; } else { sh2->ppc = sh2->pc; - opcode = RW(sh2, sh2->pc); + opcode = (UINT32)(UINT16)RW(sh2, sh2->pc); } sh2->delay = 0; diff --git a/pico/32x/memory.c b/pico/32x/memory.c index 6a3b2222..8a4b5365 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -1279,19 +1279,19 @@ out: elprintf_sh2(sh2, EL_32X, "r8 [%08x] %02x @%06x", a, d, sh2_pc(sh2)); DRC_RESTORE_SR(sh2); - return d; + return (s8)d; } static u32 REGPARM(2) sh2_read8_da(u32 a, SH2 *sh2) { - return sh2->data_array[(a & 0xfff) ^ 1]; + return (s8)sh2->data_array[(a & 0xfff) ^ 1]; } // for ssf2 static u32 REGPARM(2) sh2_read8_rom(u32 a, SH2 *sh2) { u32 bank = carthw_ssf2_banks[(a >> 19) & 7] << 19; - u8 *p = sh2->p_rom; + s8 *p = sh2->p_rom; return p[(bank + (a & 0x7ffff)) ^ 1]; } @@ -1340,18 +1340,18 @@ out: a, d, sh2_pc(sh2)); out_noprint: DRC_RESTORE_SR(sh2); - return d; + return (s16)d; } static u32 REGPARM(2) sh2_read16_da(u32 a, SH2 *sh2) { - return ((u16 *)sh2->data_array)[(a & 0xffe) / 2]; + return ((s16 *)sh2->data_array)[(a & 0xffe) / 2]; } static u32 REGPARM(2) sh2_read16_rom(u32 a, SH2 *sh2) { u32 bank = carthw_ssf2_banks[(a >> 19) & 7] << 19; - u16 *p = sh2->p_rom; + s16 *p = sh2->p_rom; return p[(bank + (a & 0x7fffe)) / 2]; } @@ -1364,7 +1364,8 @@ static u32 REGPARM(2) sh2_read32_unmapped(u32 a, SH2 *sh2) static u32 REGPARM(2) sh2_read32_cs0(u32 a, SH2 *sh2) { - return (sh2_read16_cs0(a, sh2) << 16) | sh2_read16_cs0(a + 2, sh2); + u32 d1 = sh2_read16_cs0(a, sh2) << 16, d2 = sh2_read16_cs0(a + 2, sh2) << 16; + return d1 | (d2 >> 16); } static u32 REGPARM(2) sh2_read32_da(u32 a, SH2 *sh2) @@ -1631,7 +1632,7 @@ u32 REGPARM(2) p32x_sh2_read8(u32 a, SH2 *sh2) if (map_flag_set(p)) return ((sh2_read_handler *)(p << 1))(a, sh2); else - return *(u8 *)((p << 1) + ((a & sh2_map->mask) ^ 1)); + return *(s8 *)((p << 1) + ((a & sh2_map->mask) ^ 1)); } u32 REGPARM(2) p32x_sh2_read16(u32 a, SH2 *sh2) @@ -1644,7 +1645,7 @@ u32 REGPARM(2) p32x_sh2_read16(u32 a, SH2 *sh2) if (map_flag_set(p)) return ((sh2_read_handler *)(p << 1))(a, sh2); else - return *(u16 *)((p << 1) + (a & sh2_map->mask)); + return *(s16 *)((p << 1) + (a & sh2_map->mask)); } u32 REGPARM(2) p32x_sh2_read32(u32 a, SH2 *sh2) From e01deede1bfff1e6a36524ea2f5d72e4a73682f9 Mon Sep 17 00:00:00 2001 From: kub Date: Sun, 28 Apr 2019 23:42:02 +0200 Subject: [PATCH 0191/1110] sh2 drc, code emitter cleanup, add ARM reorder stage to reduce interlock --- cpu/drc/emit_arm.c | 316 ++++++++++++++++++++++++++----------- cpu/drc/emit_x86.c | 74 ++++----- cpu/sh2/compiler.c | 22 ++- pico/carthw/svp/compiler.c | 1 + 4 files changed, 285 insertions(+), 128 deletions(-) diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index 586f0a54..bfce29fa 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -14,22 +14,130 @@ do { \ *(u32 *)ptr = x; \ ptr = (void *)((u8 *)ptr + sizeof(u32)); \ - COUNT_OP; \ } while (0) -#define EMIT(x) EMIT_PTR(tcache_ptr, x) +// ARM special registers and peephole optimization flags +#define SP 13 // stack pointer +#define LR 14 // link (return address) +#define PC 15 // program counter +#define SR 16 // CPSR, status register +#define MEM 17 // memory access (src=LDR, dst=STR) +#define CYC1 20 // 1 cycle interlock (LDR, reg-cntrld shift) +#define CYC2 21 // 2+ cycles interlock (LDR[BH], MUL/MLA etc) +#define SWAP 31 // swapped +#define NO 32 // token for "no register" -#define A_R4M (1 << 4) -#define A_R5M (1 << 5) -#define A_R6M (1 << 6) -#define A_R7M (1 << 7) -#define A_R8M (1 << 8) -#define A_R9M (1 << 9) -#define A_R10M (1 << 10) -#define A_R11M (1 << 11) -#define A_R12M (1 << 12) -#define A_R14M (1 << 14) -#define A_R15M (1 << 15) +// bitmask builders +#define M1(x) (u32)(1ULL<<(x)) // u32 to have NO evaluate to 0 +#define M2(x,y) (M1(x)|M1(y)) +#define M3(x,y,z) (M2(x,y)|M1(z)) +#define M4(x,y,z,a) (M3(x,y,z)|M1(a)) +#define M5(x,y,z,a,b) (M4(x,y,z,a)|M1(b)) +#define M10(a,b,c,d,e,f,g,h,i,j) (M5(a,b,c,d,e)|M5(f,g,h,i,j)) + +// peephole optimizer. ATM only tries to reduce interlock +#define EMIT_CACHE_SIZE 3 +struct emit_op { + u32 op; + u32 src, dst; +}; + +// peephole cache, last commited insn + cache + next insn + empty insn = size+3 +static struct emit_op emit_cache[EMIT_CACHE_SIZE+3]; +static int emit_index; +#define emith_insn_ptr() (u8 *)((u32 *)tcache_ptr-emit_index) + +static int emith_pool_index(int tcache_offs); +static void emith_pool_adjust(int pool_index, int move_offs); + +static NOINLINE void EMIT(u32 op, u32 dst, u32 src) +{ + void *emit_ptr = (u32 *)tcache_ptr - emit_index; + int i; + + EMIT_PTR(tcache_ptr, op); // emit to keep tcache_ptr current + COUNT_OP; + // for conditional execution SR is always source + if (op < 0xe0000000 /*A_COND_AL << 28*/) + src |= M1(SR); + // put insn on back of queue + emit_cache[emit_index+1].op = op; + emit_cache[emit_index+1].src = src & ~M1(NO); // mask away the NO token + emit_cache[emit_index+1].dst = dst & ~M1(NO); + // move insn down in the queue as long as permitted by dependencies + for (i = emit_index-1; i > 0; i--) { + struct emit_op *ptr = &emit_cache[i]; + int deps = 0; + // never swap branch insns (changes semantics) + if ((ptr[0].dst | ptr[1].dst) & M1(PC)) + continue; + // dst deps between 0 and 1 must not be swapped, since any deps + // but [0].src & [1].src lead to changed semantics if swapped. + if ((ptr[0].dst & ptr[1].src) || (ptr[1].dst & ptr[0].src) || + (ptr[0].dst & ptr[1].dst)) + continue; +#if 1 + // just move loads as far up as possible + deps -= !!(ptr[1].src & M1(MEM)); + deps += !!(ptr[0].src & M1(MEM)); +#elif 0 + // treat all dest->src deps as a potential interlock +#define DEP_INSN(x,y) !!(ptr[x].dst & ptr[y].src) + // insn sequence: -1, 0, 1, 2 + deps -= DEP_INSN(1,2) + DEP_INSN(-1,0); + deps -= !!(ptr[1].src & M1(MEM)); // favour moving LDR's down + // insn sequence: -1, 1, 0, 2 + deps += DEP_INSN(0,2) + DEP_INSN(-1,1); + deps += !!(ptr[0].src & M1(SWAP)); // penalise if swapped +#else + // calculate ARM920T interlock cycles +#define DEP_CYC1(x,y) ((ptr[x].dst & ptr[y].src)&&(ptr[x].src & M1(CYC1))) +#define DEP_CYC2(x,y) ((ptr[x].dst & ptr[y].src)&&(ptr[x].src & M1(CYC2))) +#define DEP_INSN(x,y,z) DEP_CYC1(x,y)+DEP_CYC1(y,z)+2*DEP_CYC2(x,y)+DEP_CYC2(x,z) + // insn sequence: -1, 0, 1, 2 + deps -= DEP_INSN(0,1,2) + DEP_INSN(-1,0,1); + deps -= !!(ptr[1].src & M1(MEM)); // favour moving LDR's down + // insn sequence: -1, 1, 0, 2 + deps += DEP_INSN(0,2,1) + DEP_INSN(-1,1,0); + deps += !!(ptr[0].src & M1(SWAP)); // penalise multiple swaps +#endif + // swap if fewer depencies + if (deps < 0) { + // swap insn reading PC only if uncomitted pool load + struct emit_op tmp; + int i0 = -1, i1 = -1; + if ((!(ptr[0].src & M1(PC)) || + (i0 = emith_pool_index(emit_index+2 - i)) >= 0) && + (!(ptr[1].src & M1(PC)) || + (i1 = emith_pool_index(emit_index+1 - i)) >= 0)) { + // not using PC, or pool load + emith_pool_adjust(i0, 1); + emith_pool_adjust(i1, -1); + tmp = ptr[0], ptr[0] = ptr[1], ptr[1] = tmp; + ptr[0].src |= M1(SWAP); + } + } + } + if (emit_index <= EMIT_CACHE_SIZE) { + // queue not yet full + emit_index++; + } else { + // commit oldest insn from cache + EMIT_PTR(emit_ptr, emit_cache[1].op); + for (i = 0; i <= emit_index; i++) + emit_cache[i] = emit_cache[i+1]; + } +} + +static void emith_flush(void) +{ + int i; + void *emit_ptr = tcache_ptr - emit_index*sizeof(u32); + + for (i = 1; i <= emit_index; i++) + EMIT_PTR(emit_ptr, emit_cache[i].op); + emit_index = 0; +} #define A_COND_AL 0xe #define A_COND_EQ 0x0 @@ -96,12 +204,20 @@ #define A_OP_BIC 0xe #define A_OP_MVN 0xf -#define EOP_C_DOP_X(cond,op,s,rn,rd,shifter_op) \ - EMIT(((cond)<<28) | ((op)<< 21) | ((s)<<20) | ((rn)<<16) | ((rd)<<12) | (shifter_op)) +// operation specific register usage in DOP +#define A_Rn(op,rn) (((op)&0xd)!=0xd ? rn:NO) // no rn for MOV,MVN +#define A_Rd(op,rd) (((op)&0xc)!=0x8 ? rd:NO) // no rd for TST,TEQ,CMP,CMN +// CSPR is dst if S set, CSPR is src if op is ADC/SBC/RSC or shift is RRX +#define A_Sd(s) ((s) ? SR:NO) +#define A_Sr(op,sop) (((op)>=0x5 && (op)<=0x7) || (sop)>>4==A_AM1_ROR<<1 ? SR:NO) -#define EOP_C_DOP_IMM( cond,op,s,rn,rd,ror2,imm8) EOP_C_DOP_X(cond,op,s,rn,rd,A_AM1_IMM(ror2,imm8)) -#define EOP_C_DOP_REG_XIMM(cond,op,s,rn,rd,shift_imm,shift_op,rm) EOP_C_DOP_X(cond,op,s,rn,rd,A_AM1_REG_XIMM(shift_imm,shift_op,rm)) -#define EOP_C_DOP_REG_XREG(cond,op,s,rn,rd,rs, shift_op,rm) EOP_C_DOP_X(cond,op,s,rn,rd,A_AM1_REG_XREG(rs, shift_op,rm)) +#define EOP_C_DOP_X(cond,op,s,rn,rd,sop,rm,rs) \ + EMIT(((cond)<<28) | ((op)<< 21) | ((s)<<20) | ((rn)<<16) | ((rd)<<12) | (sop), \ + M2(A_Rd(op,rd),A_Sd(s)), M5(A_Sr(op,sop),A_Rn(op,rn),rm,rs,rs==NO?NO:CYC1)) + +#define EOP_C_DOP_IMM( cond,op,s,rn,rd,ror2,imm8) EOP_C_DOP_X(cond,op,s,rn,rd,A_AM1_IMM(ror2,imm8), NO, NO) +#define EOP_C_DOP_REG_XIMM(cond,op,s,rn,rd,shift_imm,shift_op,rm) EOP_C_DOP_X(cond,op,s,rn,rd,A_AM1_REG_XIMM(shift_imm,shift_op,rm), rm, NO) +#define EOP_C_DOP_REG_XREG(cond,op,s,rn,rd,rs, shift_op,rm) EOP_C_DOP_X(cond,op,s,rn,rd,A_AM1_REG_XREG(rs, shift_op,rm), rm, rs) #define EOP_MOV_IMM(rd, ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_MOV,0, 0,rd,ror2,imm8) #define EOP_MVN_IMM(rd, ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_MVN,0, 0,rd,ror2,imm8) @@ -161,16 +277,17 @@ /* addressing mode 2 */ #define EOP_C_AM2_IMM(cond,u,b,l,rn,rd,offset_12) \ - EMIT(((cond)<<28) | 0x05000000 | ((u)<<23) | ((b)<<22) | ((l)<<20) | ((rn)<<16) | ((rd)<<12) | (offset_12)) + EMIT(((cond)<<28) | 0x05000000 | ((u)<<23) | ((b)<<22) | ((l)<<20) | ((rn)<<16) | ((rd)<<12) | \ + ((offset_12) & 0xfff), M1(l?rd:MEM), M3(rn,l?MEM:rd,l?b?CYC2:CYC1:NO)) #define EOP_C_AM2_REG(cond,u,b,l,rn,rd,shift_imm,shift_op,rm) \ EMIT(((cond)<<28) | 0x07000000 | ((u)<<23) | ((b)<<22) | ((l)<<20) | ((rn)<<16) | ((rd)<<12) | \ - ((shift_imm)<<7) | ((shift_op)<<5) | (rm)) + A_AM1_REG_XIMM(shift_imm, shift_op, rm), M1(l?rd:MEM), M4(rn,rm,l?MEM:rd,l?b?CYC2:CYC1:NO)) /* addressing mode 3 */ #define EOP_C_AM3(cond,u,r,l,rn,rd,s,h,immed_reg) \ EMIT(((cond)<<28) | 0x01000090 | ((u)<<23) | ((r)<<22) | ((l)<<20) | ((rn)<<16) | ((rd)<<12) | \ - ((s)<<6) | ((h)<<5) | (immed_reg)) + ((s)<<6) | ((h)<<5) | (immed_reg), M1(l?rd:MEM), M4(rn,r?NO:immed_reg,l?MEM:rd,l?CYC2:NO)) #define EOP_C_AM3_IMM(cond,u,l,rn,rd,s,h,offset_8) EOP_C_AM3(cond,u,1,l,rn,rd,s,h,(((offset_8)&0xf0)<<4)|((offset_8)&0xf)) @@ -206,60 +323,61 @@ /* ldm and stm */ #define EOP_XXM(cond,p,u,s,w,l,rn,list) \ - EMIT(((cond)<<28) | (1<<27) | ((p)<<24) | ((u)<<23) | ((s)<<22) | ((w)<<21) | ((l)<<20) | ((rn)<<16) | (list)) + EMIT(((cond)<<28) | (1<<27) | ((p)<<24) | ((u)<<23) | ((s)<<22) | ((w)<<21) | ((l)<<20) | ((rn)<<16) | (list), \ + M2(rn,l?NO:MEM)|(l?list:0), M3(rn,l?MEM:NO,l?CYC2:NO)|(l?0:list)) #define EOP_STMIA(rb,list) EOP_XXM(A_COND_AL,0,1,0,0,0,rb,list) #define EOP_LDMIA(rb,list) EOP_XXM(A_COND_AL,0,1,0,0,1,rb,list) -#define EOP_STMFD_SP(list) EOP_XXM(A_COND_AL,1,0,0,1,0,13,list) -#define EOP_LDMFD_SP(list) EOP_XXM(A_COND_AL,0,1,0,1,1,13,list) +#define EOP_STMFD_SP(list) EOP_XXM(A_COND_AL,1,0,0,1,0,SP,list) +#define EOP_LDMFD_SP(list) EOP_XXM(A_COND_AL,0,1,0,1,1,SP,list) /* branches */ #define EOP_C_BX(cond,rm) \ - EMIT(((cond)<<28) | 0x012fff10 | (rm)) + EMIT(((cond)<<28) | 0x012fff10 | (rm), M1(PC), M1(rm)) #define EOP_C_B_PTR(ptr,cond,l,signed_immed_24) \ EMIT_PTR(ptr, ((cond)<<28) | 0x0a000000 | ((l)<<24) | (signed_immed_24)) #define EOP_C_B(cond,l,signed_immed_24) \ - EOP_C_B_PTR(tcache_ptr,cond,l,signed_immed_24) + EMIT(((cond)<<28) | 0x0a000000 | ((l)<<24) | (signed_immed_24), M2(PC,l?LR:NO), M1(PC)) #define EOP_B( signed_immed_24) EOP_C_B(A_COND_AL,0,signed_immed_24) #define EOP_BL(signed_immed_24) EOP_C_B(A_COND_AL,1,signed_immed_24) /* misc */ #define EOP_C_MUL(cond,s,rd,rs,rm) \ - EMIT(((cond)<<28) | ((s)<<20) | ((rd)<<16) | ((rs)<<8) | 0x90 | (rm)) + EMIT(((cond)<<28) | ((s)<<20) | ((rd)<<16) | ((rs)<<8) | 0x90 | (rm), M2(rd,s?SR:NO), M3(rs,rm,CYC2)) #define EOP_C_UMULL(cond,s,rdhi,rdlo,rs,rm) \ - EMIT(((cond)<<28) | 0x00800000 | ((s)<<20) | ((rdhi)<<16) | ((rdlo)<<12) | ((rs)<<8) | 0x90 | (rm)) + EMIT(((cond)<<28) | 0x00800000 | ((s)<<20) | ((rdhi)<<16) | ((rdlo)<<12) | ((rs)<<8) | 0x90 | (rm), M3(rdhi,rdlo,s?SR:NO), M3(rs,rm,CYC2)) #define EOP_C_SMULL(cond,s,rdhi,rdlo,rs,rm) \ - EMIT(((cond)<<28) | 0x00c00000 | ((s)<<20) | ((rdhi)<<16) | ((rdlo)<<12) | ((rs)<<8) | 0x90 | (rm)) + EMIT(((cond)<<28) | 0x00c00000 | ((s)<<20) | ((rdhi)<<16) | ((rdlo)<<12) | ((rs)<<8) | 0x90 | (rm), M3(rdhi,rdlo,s?SR:NO), M3(rs,rm,CYC2)) #define EOP_C_SMLAL(cond,s,rdhi,rdlo,rs,rm) \ - EMIT(((cond)<<28) | 0x00e00000 | ((s)<<20) | ((rdhi)<<16) | ((rdlo)<<12) | ((rs)<<8) | 0x90 | (rm)) + EMIT(((cond)<<28) | 0x00e00000 | ((s)<<20) | ((rdhi)<<16) | ((rdlo)<<12) | ((rs)<<8) | 0x90 | (rm), M3(rdhi,rdlo,s?SR:NO), M5(rs,rm,rdlo,rdhi,CYC2)) #define EOP_MUL(rd,rm,rs) EOP_C_MUL(A_COND_AL,0,rd,rs,rm) // note: rd != rm #define EOP_C_MRS(cond,rd) \ - EMIT(((cond)<<28) | 0x010f0000 | ((rd)<<12)) + EMIT(((cond)<<28) | 0x010f0000 | ((rd)<<12), M1(rd), M1(SR)) #define EOP_C_MSR_IMM(cond,ror2,imm) \ - EMIT(((cond)<<28) | 0x0328f000 | ((ror2)<<8) | (imm)) // cpsr_f + EMIT(((cond)<<28) | 0x0328f000 | ((ror2)<<8) | (imm), M1(SR), 0) // cpsr_f #define EOP_C_MSR_REG(cond,rm) \ - EMIT(((cond)<<28) | 0x0128f000 | (rm)) // cpsr_f + EMIT(((cond)<<28) | 0x0128f000 | (rm), M1(SR), M1(rm)) // cpsr_f #define EOP_MRS(rd) EOP_C_MRS(A_COND_AL,rd) #define EOP_MSR_IMM(ror2,imm) EOP_C_MSR_IMM(A_COND_AL,ror2,imm) #define EOP_MSR_REG(rm) EOP_C_MSR_REG(A_COND_AL,rm) #define EOP_MOVW(rd,imm) \ - EMIT(0xe3000000 | ((rd)<<12) | ((imm)&0xfff) | (((imm)<<4)&0xf0000)) + EMIT(0xe3000000 | ((rd)<<12) | ((imm)&0xfff) | (((imm)<<4)&0xf0000), M1(rd), NO) #define EOP_MOVT(rd,imm) \ - EMIT(0xe3400000 | ((rd)<<12) | (((imm)>>16)&0xfff) | (((imm)>>12)&0xf0000)) + EMIT(0xe3400000 | ((rd)<<12) | (((imm)>>16)&0xfff) | (((imm)>>12)&0xf0000), M1(rd), NO) static inline int count_bits(unsigned val) { @@ -326,7 +444,7 @@ static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int } idx = emith_pool_literal(imm, &o); literal_insn[literal_iindex++] = (u32 *)tcache_ptr; - EOP_LDR_IMM2(cond, rd, 15, idx * sizeof(u32)); + EOP_LDR_IMM2(cond, rd, PC, idx * sizeof(u32)); if (o > 0) EOP_C_DOP_IMM(cond, A_OP_ADD, 0, rd, rd, 0, o); else if (o < 0) @@ -411,10 +529,10 @@ static int emith_xbranch(int cond, void *target, int is_call) #ifdef __EPOC32__ // elprintf(EL_SVP, "emitting indirect jmp %08x->%08x", tcache_ptr, target); if (is_call) - EOP_ADD_IMM(14,15,0,8); // add lr,pc,#8 - EOP_C_AM2_IMM(cond,1,0,1,15,15,0); // ldrcc pc,[pc] - EOP_MOV_REG_SIMPLE(15,15); // mov pc, pc - EMIT((u32)target); + EOP_ADD_IMM(LR,PC,0,8); // add lr,pc,#8 + EOP_C_AM2_IMM(cond,1,0,1,PC,PC,0); // ldrcc pc,[pc] + EOP_MOV_REG_SIMPLE(PC,PC); // mov pc, pc + EMIT((u32)target,M1(PC),0); #else // should never happen elprintf(EL_STATUS|EL_SVP|EL_ANOMALY, "indirect jmp %08x->%08x", target, tcache_ptr); @@ -438,6 +556,7 @@ static void emith_pool_commit(int jumpover) pool += sizeof(u32); emith_xbranch(A_COND_AL, (u8 *)pool + sz, 0); } + emith_flush(); // safety check - pool must be after insns and reachable if ((u32)(pool - (u8 *)literal_insn[0] + 8) > 0xfff) { elprintf(EL_STATUS|EL_SVP|EL_ANOMALY, @@ -466,12 +585,30 @@ static inline void emith_pool_check(void) emith_pool_commit(1); } +static inline int emith_pool_index(int tcache_offs) +{ + u32 *ptr = (u32 *)tcache_ptr - tcache_offs; + int i; + + for (i = literal_iindex-1; i >= 0 && literal_insn[i] >= ptr; i--) + if (literal_insn[i] == ptr) + return i; + return -1; +} + +static inline void emith_pool_adjust(int pool_index, int move_offs) +{ + if (pool_index >= 0) + literal_insn[pool_index] += move_offs; +} + #define JMP_POS(ptr) \ ptr = tcache_ptr; \ - tcache_ptr += sizeof(u32) + EMIT(0,M1(PC),0); #define JMP_EMIT(cond, ptr) { \ u32 val_ = (u32 *)tcache_ptr - (u32 *)(ptr) - 2; \ + emith_flush(); \ EOP_C_B_PTR(ptr, cond, 0, val_ & 0xffffff); \ } @@ -660,14 +797,14 @@ static inline void emith_pool_check(void) #define emith_tst_r_imm(r, imm) \ emith_top_imm(A_COND_AL, A_OP_TST, r, imm) -#define emith_cmp_r_imm(r, imm) { \ +#define emith_cmp_r_imm(r, imm) do { \ u32 op_ = A_OP_CMP, imm_ = (u8)imm; \ if ((s8)imm_ < 0) { \ imm_ = (u8)-imm_; \ op_ = A_OP_CMN; \ } \ emith_top_imm(A_COND_AL, op_, r, imm_); \ -} +} while (0) #define emith_subf_r_imm(r, imm) \ emith_op_imm(A_COND_AL, 1, A_OP_SUB, r, imm) @@ -693,12 +830,12 @@ static inline void emith_pool_check(void) #define emith_tst_r_imm_c(cond, r, imm) \ emith_top_imm(cond, A_OP_TST, r, imm) -#define emith_move_r_imm_s8(r, imm) { \ +#define emith_move_r_imm_s8(r, imm) do { \ if ((s8)(imm) < 0) \ EOP_MVN_IMM(r, 0, ((u8)(imm) ^ 0xff)); \ else \ EOP_MOV_IMM(r, 0, (u8)imm); \ -} +} while (0) #define emith_and_r_r_imm(d, s, imm) \ emith_op_imm2(A_COND_AL, 0, A_OP_AND, d, s, imm) @@ -752,11 +889,11 @@ static inline void emith_pool_check(void) EOP_MOV_REG(A_COND_AL,1,d,s,A_AM1_ASR,cnt) // note: only C flag updated correctly -#define emith_rolf(d, s, cnt) { \ +#define emith_rolf(d, s, cnt) do { \ EOP_MOV_REG(A_COND_AL,1,d,s,A_AM1_ROR,32-(cnt)); \ /* we don't have ROL so we shift to get the right carry */ \ EOP_TST_REG(A_COND_AL,d,d,A_AM1_LSR,1); \ -} +} while (0) #define emith_rorf(d, s, cnt) \ EOP_MOV_REG(A_COND_AL,1,d,s,A_AM1_ROR,cnt) @@ -770,12 +907,12 @@ static inline void emith_pool_check(void) #define emith_negcf_r_r(d, s) \ EOP_C_DOP_IMM(A_COND_AL,A_OP_RSC,1,s,d,0,0) -#define emith_mul(d, s1, s2) { \ +#define emith_mul(d, s1, s2) do { \ if ((d) != (s1)) /* rd != rm limitation */ \ EOP_MUL(d, s1, s2); \ else \ EOP_MUL(d, s2, s1); \ -} +} while (0) #define emith_mul_u64(dlo, dhi, s1, s2) \ EOP_C_UMULL(A_COND_AL,0,dhi,dlo,s1,s2) @@ -855,7 +992,7 @@ static inline void emith_pool_check(void) #define emith_ctx_do_multiple(op, r, offs, count, tmpr) do { \ int v_, r_ = r, c_ = count, b_ = CONTEXT_REG; \ for (v_ = 0; c_; c_--, r_++) \ - v_ |= 1 << r_; \ + v_ |= M1(r_); \ if ((offs) != 0) { \ EOP_ADD_IMM(tmpr,CONTEXT_REG,30/2,(offs)>>2);\ b_ = tmpr; \ @@ -869,7 +1006,7 @@ static inline void emith_pool_check(void) #define emith_ctx_write_multiple(r, offs, count, tmpr) \ emith_ctx_do_multiple(EOP_STMIA, r, offs, count, tmpr) -#define emith_clear_msb_c(cond, d, s, count) { \ +#define emith_clear_msb_c(cond, d, s, count) do { \ u32 t; \ if ((count) <= 8) { \ t = 8 - (count); \ @@ -883,24 +1020,24 @@ static inline void emith_pool_check(void) EOP_MOV_REG(cond,0,d,s,A_AM1_LSL,count); \ EOP_MOV_REG(cond,0,d,d,A_AM1_LSR,count); \ } \ -} +} while (0) #define emith_clear_msb(d, s, count) \ emith_clear_msb_c(A_COND_AL, d, s, count) -#define emith_sext(d, s, bits) { \ +#define emith_sext(d, s, bits) do { \ EOP_MOV_REG_LSL(d,s,32 - (bits)); \ EOP_MOV_REG_ASR(d,d,32 - (bits)); \ -} +} while (0) -#define emith_do_caller_regs(mask, func) { \ +#define emith_do_caller_regs(mask, func) do { \ u32 _reg_mask = (mask) & 0x500f; \ if (_reg_mask) { \ if (__builtin_parity(_reg_mask) == 1) \ _reg_mask |= 0x10; /* eabi align */ \ func(_reg_mask); \ } \ -} +} while (0) #define emith_save_caller_regs(mask) \ emith_do_caller_regs(mask, EOP_STMFD_SP) @@ -933,10 +1070,11 @@ static inline void emith_pool_check(void) *ptr_ = (*ptr_ & 0xff000000) | (val_ & 0x00ffffff); \ } while (0) -#define emith_jump_at(ptr, target) { \ +#define emith_jump_at(ptr, target) do { \ u32 val_ = (u32 *)(target) - (u32 *)(ptr) - 2; \ + emith_flush(); \ EOP_C_B_PTR(ptr, A_COND_AL, 0, val_ & 0xffffff); \ -} +} while (0) #define emith_jump_reg_c(cond, r) \ EOP_C_BX(cond, r) @@ -945,7 +1083,7 @@ static inline void emith_pool_check(void) emith_jump_reg_c(A_COND_AL, r) #define emith_jump_ctx_c(cond, offs) \ - EOP_LDR_IMM2(cond,15,CONTEXT_REG,offs) + EOP_LDR_IMM2(cond,PC,CONTEXT_REG,offs) #define emith_jump_ctx(offs) \ emith_jump_ctx_c(A_COND_AL, offs) @@ -956,30 +1094,30 @@ static inline void emith_pool_check(void) #define emith_call(target) \ emith_call_cond(A_COND_AL, target) -#define emith_call_reg(r) { \ - emith_move_r_r(14, 15); \ +#define emith_call_reg(r) do { \ + emith_move_r_r(LR, PC); \ EOP_C_BX(A_COND_AL, r); \ -} +} while (0) -#define emith_call_ctx(offs) { \ - emith_move_r_r(14, 15); \ +#define emith_call_ctx(offs) do { \ + emith_move_r_r(LR, PC); \ emith_jump_ctx(offs); \ -} +} while (0) #define emith_ret_c(cond) \ - emith_jump_reg_c(cond, 14) + emith_jump_reg_c(cond, LR) #define emith_ret() \ emith_ret_c(A_COND_AL) #define emith_ret_to_ctx(offs) \ - emith_ctx_write(14, offs) + emith_ctx_write(LR, offs) #define emith_push_ret() \ - EOP_STMFD_SP(A_R14M) + EOP_STMFD_SP(M1(LR)) #define emith_pop_and_ret() \ - EOP_LDMFD_SP(A_R15M) + EOP_LDMFD_SP(M1(PC)) #define host_instructions_updated(base, end) \ cache_flush_d_inval_i(base, end) @@ -990,30 +1128,30 @@ static inline void emith_pool_check(void) /* SH2 drc specific */ /* pushes r12 for eabi alignment */ #define emith_sh2_drc_entry() \ - EOP_STMFD_SP(A_R4M|A_R5M|A_R6M|A_R7M|A_R8M|A_R9M|A_R10M|A_R11M|A_R12M|A_R14M) + EOP_STMFD_SP(M10(4,5,6,7,8,9,10,11,12,LR)) #define emith_sh2_drc_exit() \ - EOP_LDMFD_SP(A_R4M|A_R5M|A_R6M|A_R7M|A_R8M|A_R9M|A_R10M|A_R11M|A_R12M|A_R15M) + EOP_LDMFD_SP(M10(4,5,6,7,8,9,10,11,12,PC)) // assumes a is in arg0, tab, func and mask are temp -#define emith_sh2_rcall(a, tab, func, mask) { \ +#define emith_sh2_rcall(a, tab, func, mask) do { \ emith_lsr(mask, a, SH2_READ_SHIFT); \ EOP_ADD_REG_LSL(tab, tab, mask, 3); \ - if (func < mask) EOP_LDMIA(tab, (1<>= count; \ if (d != s) \ emith_move_r_r(d, s); \ emith_and_r_imm(d, t); \ -} +} while (0) -#define emith_clear_msb_c(cond, d, s, count) { \ +#define emith_clear_msb_c(cond, d, s, count) do { \ (void)(cond); \ emith_clear_msb(d, s, count); \ -} +} while (0) -#define emith_sext(d, s, bits) { \ +#define emith_sext(d, s, bits) do { \ emith_lsl(d, s, 32 - (bits)); \ emith_asr(d, d, 32 - (bits)); \ -} +} while (0) #define emith_setc(r) do { \ assert(is_abcdx(r)); \ @@ -737,16 +737,16 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; } while (0) // assumes EBX is free -#define emith_ret_to_ctx(offs) { \ +#define emith_ret_to_ctx(offs) do { \ emith_pop(xBX); \ emith_ctx_write(xBX, offs); \ -} +} while (0) -#define emith_jump(ptr) { \ +#define emith_jump(ptr) do { \ u32 disp = (u8 *)(ptr) - ((u8 *)tcache_ptr + 5); \ EMIT_OP(0xe9); \ EMIT(disp, u32); \ -} +} while (0) #define emith_jump_patchable(target) \ emith_jump(target) @@ -767,17 +767,17 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; EMIT_PTR((u8 *)(ptr) + offs_, disp_ - offs_, u32); \ } while (0) -#define emith_jump_at(ptr, target) { \ +#define emith_jump_at(ptr, target) do { \ u32 disp_ = (u8 *)(target) - ((u8 *)(ptr) + 5); \ EMIT_PTR(ptr, 0xe9, u8); \ EMIT_PTR((u8 *)(ptr) + 1, disp_, u32); \ -} +} while (0) -#define emith_call(ptr) { \ +#define emith_call(ptr) do { \ u32 disp = (u8 *)(ptr) - ((u8 *)tcache_ptr + 5); \ EMIT_OP(0xe8); \ EMIT(disp, u32); \ -} +} while (0) #define emith_call_cond(cond, ptr) \ emith_call(ptr) @@ -889,18 +889,18 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; default: rd = xCX; break; \ } -#define emith_sh2_drc_entry() { \ +#define emith_sh2_drc_entry() do { \ emith_push(xBX); \ emith_push(xBP); \ emith_push(xSI); /* to align */ \ -} +} while (0) -#define emith_sh2_drc_exit() { \ +#define emith_sh2_drc_exit() do { \ emith_pop(xSI); \ emith_pop(xBP); \ emith_pop(xBX); \ emith_ret(); \ -} +} while (0) #else // _WIN32 @@ -912,22 +912,22 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; default: rd = 9; break; \ } -#define emith_sh2_drc_entry() { \ +#define emith_sh2_drc_entry() do { \ emith_push(xBX); \ emith_push(xBP); \ emith_push(xSI); \ emith_push(xDI); \ emith_add_r_r_ptr_imm(xSP, xSP, -8*5); \ -} +} while (0) -#define emith_sh2_drc_exit() { \ +#define emith_sh2_drc_exit() do { \ emith_add_r_r_ptr_imm(xSP, xSP, 8*5); \ emith_pop(xDI); \ emith_pop(xSI); \ emith_pop(xBP); \ emith_pop(xBX); \ emith_ret(); \ -} +} while (0) #endif // _WIN32 @@ -949,20 +949,20 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; default: rd = xBX; break; \ } -#define emith_sh2_drc_entry() { \ +#define emith_sh2_drc_entry() do { \ emith_push(xBX); \ emith_push(xBP); \ emith_push(xSI); \ emith_push(xDI); \ -} +} while (0) -#define emith_sh2_drc_exit() { \ +#define emith_sh2_drc_exit() do { \ emith_pop(xDI); \ emith_pop(xSI); \ emith_pop(xBP); \ emith_pop(xBX); \ emith_ret(); \ -} +} while (0) #endif @@ -982,7 +982,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; if ((mask) & (1 << xAX)) emith_pop(xAX); \ } while (0) -#define emith_sh2_rcall(a, tab, func, mask) { \ +#define emith_sh2_rcall(a, tab, func, mask) do { \ emith_lsr(mask, a, SH2_READ_SHIFT); \ EMIT_REX_IF(1, mask, tab); \ EMIT_OP_MODRM64(0x8d, 0, tab, 4); \ @@ -995,9 +995,9 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; EMIT_OP_MODRM64(0x8b, 1, mask, tab); \ EMIT(1 << PTR_SCALE, u8); /* mov mask, [tab + {4,8}] */ \ emith_add_r_r_ptr(func, func); \ -} +} while (0) -#define emith_sh2_wcall(a, val, tab, func) { \ +#define emith_sh2_wcall(a, val, tab, func) do { \ int arg2_; \ host_arg2reg(arg2_, 2); \ emith_lsr(func, a, SH2_WRITE_SHIFT); /* tmp = a >> WRT_SHIFT */ \ @@ -1006,9 +1006,9 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; EMIT_SIB64(PTR_SCALE, func, tab); /* mov tmp, [tab + tmp * {4,8}] */ \ emith_move_r_r_ptr(arg2_, CONTEXT_REG); \ emith_jump_reg(func); \ -} +} while (0) -#define emith_sh2_dtbf_loop() { \ +#define emith_sh2_dtbf_loop() do { \ u8 *jmp0; /* negative cycles check */ \ u8 *jmp1; /* unsinged overflow check */ \ int cr, rn; \ @@ -1032,15 +1032,15 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; emith_move_r_imm(rn, 0); \ JMP8_EMIT(ICOND_JA, jmp1); \ rcache_free_tmp(tmp_); \ -} +} while (0) -#define emith_write_sr(sr, srcr) { \ +#define emith_write_sr(sr, srcr) do { \ int tmp_ = rcache_get_tmp(); \ emith_clear_msb(tmp_, srcr, 22); \ emith_bic_r_imm(sr, 0x3ff); \ emith_or_r_r(sr, tmp_); \ rcache_free_tmp(tmp_); \ -} +} while (0) #define emith_tpop_carry(sr, is_sub) \ emith_lsr(sr, sr, 1) @@ -1055,7 +1055,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; * t = carry(Rn -= Rm) * T ^= t */ -#define emith_sh2_div1_step(rn, rm, sr) { \ +#define emith_sh2_div1_step(rn, rm, sr) do { \ u8 *jmp0, *jmp1; \ int tmp_ = rcache_get_tmp(); \ emith_eor_r_r(tmp_, tmp_); \ @@ -1069,7 +1069,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; emith_adc_r_r(tmp_, tmp_); \ emith_eor_r_r(sr, tmp_); \ rcache_free_tmp(tmp_); \ -} +} while (0) /* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ #define emith_sh2_macl(ml, mh, rn, rm, sr) do { \ @@ -1123,3 +1123,5 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_pool_check() /**/ #define emith_pool_commit(j) /**/ +#define emith_insn_ptr() ((u8 *)tcache_ptr) +#define emith_flush() /**/ diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 517be81c..85ce799b 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -154,8 +154,8 @@ enum op_types { static u8 *tcache_dsm_ptrs[3]; static char sh2dasm_buff[64]; #define do_host_disasm(tcid) \ - host_dasm(tcache_dsm_ptrs[tcid], tcache_ptr - tcache_dsm_ptrs[tcid]); \ - tcache_dsm_ptrs[tcid] = tcache_ptr + host_dasm(tcache_dsm_ptrs[tcid], emith_insn_ptr() - tcache_dsm_ptrs[tcid]); \ + tcache_dsm_ptrs[tcid] = emith_insn_ptr() #else #define do_host_disasm(x) #endif @@ -2664,6 +2664,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); FLUSH_CYCLES(sr); rcache_flush(); + emith_flush(); // make block entry v = block->entry_count; @@ -3933,7 +3934,9 @@ end_op: if (target == NULL) return NULL; emith_jump_patchable(target); - } + } else + rcache_flush(); + emith_flush(); // link local branches for (i = 0; i < branch_patch_count; i++) { @@ -3996,21 +3999,25 @@ static void sh2_generate_utils(void) emith_move_r_r(arg1, arg1); // nop emith_move_r_r(arg2, arg2); // nop emith_move_r_r(arg3, arg3); // nop + emith_flush(); // sh2_drc_write8(u32 a, u32 d) sh2_drc_write8 = (void *)tcache_ptr; emith_ctx_read_ptr(arg2, offsetof(SH2, write8_tab)); emith_sh2_wcall(arg0, arg1, arg2, arg3); + emith_flush(); // sh2_drc_write16(u32 a, u32 d) sh2_drc_write16 = (void *)tcache_ptr; emith_ctx_read_ptr(arg2, offsetof(SH2, write16_tab)); emith_sh2_wcall(arg0, arg1, arg2, arg3); + emith_flush(); // sh2_drc_write32(u32 a, u32 d) sh2_drc_write32 = (void *)tcache_ptr; emith_ctx_read_ptr(arg2, offsetof(SH2, write32_tab)); emith_sh2_wcall(arg0, arg1, arg2, arg3); + emith_flush(); // d = sh2_drc_read8(u32 a) sh2_drc_read8 = (void *)tcache_ptr; @@ -4024,6 +4031,7 @@ static void sh2_generate_utils(void) EMITH_SJMP_END(DCOND_CS); emith_move_r_r_ptr(arg1, CONTEXT_REG); emith_jump_reg(arg2); + emith_flush(); // d = sh2_drc_read16(u32 a) sh2_drc_read16 = (void *)tcache_ptr; @@ -4036,6 +4044,7 @@ static void sh2_generate_utils(void) EMITH_SJMP_END(DCOND_CS); emith_move_r_r_ptr(arg1, CONTEXT_REG); emith_jump_reg(arg2); + emith_flush(); // d = sh2_drc_read32(u32 a) sh2_drc_read32 = (void *)tcache_ptr; @@ -4049,11 +4058,13 @@ static void sh2_generate_utils(void) EMITH_SJMP_END(DCOND_CS); emith_move_r_r_ptr(arg1, CONTEXT_REG); emith_jump_reg(arg2); + emith_flush(); // sh2_drc_exit(void) sh2_drc_exit = (void *)tcache_ptr; emit_do_static_regs(1, arg2); emith_sh2_drc_exit(); + emith_flush(); // sh2_drc_dispatcher(void) sh2_drc_dispatcher = (void *)tcache_ptr; @@ -4091,6 +4102,7 @@ static void sh2_generate_utils(void) emit_block_entry(); // XXX: can't translate, fail emith_call(dr_failure); + emith_flush(); // sh2_drc_test_irq(void) // assumes it's called from main function (may jump to dispatcher) @@ -4141,6 +4153,7 @@ static void sh2_generate_utils(void) #endif emith_jump(sh2_drc_dispatcher); rcache_invalidate(); + emith_flush(); // sh2_drc_entry(SH2 *sh2) sh2_drc_entry = (void *)tcache_ptr; @@ -4149,6 +4162,7 @@ static void sh2_generate_utils(void) emit_do_static_regs(0, arg2); emith_call(sh2_drc_test_irq); emith_jump(sh2_drc_dispatcher); + emith_flush(); #ifdef PDB_NET // debug @@ -4163,6 +4177,7 @@ static void sh2_generate_utils(void) emith_adc_r_imm(arg2, 0x01000000); \ emith_ctx_write(arg2, offsetof(SH2, pdb_io_csum[1])); \ emith_pop_and_ret(); \ + emith_flush(); \ func = tmp; \ } #define MAKE_WRITE_WRAPPER(func) { \ @@ -4175,6 +4190,7 @@ static void sh2_generate_utils(void) emith_ctx_write(arg2, offsetof(SH2, pdb_io_csum[1])); \ emith_move_r_r_ptr(arg2, CONTEXT_REG); \ emith_jump(func); \ + emith_flush(); \ func = tmp; \ } diff --git a/pico/carthw/svp/compiler.c b/pico/carthw/svp/compiler.c index 1ec71e75..06aa1791 100644 --- a/pico/carthw/svp/compiler.c +++ b/pico/carthw/svp/compiler.c @@ -1796,6 +1796,7 @@ void *ssp_translate_block(int pc) tr_flush_dirty_pmcrs(); block_end = emit_block_epilogue(ccount, end_cond, jump_pc, pc); emith_pool_commit(0); + emith_flush(); if (tcache_ptr - (u32 *)tcache > DRC_TCACHE_SIZE/4) { elprintf(EL_ANOMALY|EL_STATUS|EL_SVP, "tcache overflow!\n"); From 213b7f42c1439acf452a65dda0ebd5a18883914b Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 30 Apr 2019 21:18:12 +0200 Subject: [PATCH 0192/1110] sh2 drc, add loop detector, handle delay/idle loops --- cpu/drc/emit_arm.c | 35 +++++++++ cpu/drc/emit_x86.c | 56 ++++++++++++++ cpu/sh2/compiler.c | 177 ++++++++++++++++++++++++++++++++++++++------- cpu/sh2/compiler.h | 9 ++- 4 files changed, 247 insertions(+), 30 deletions(-) diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index bfce29fa..37d5cf1b 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -846,6 +846,9 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) #define emith_add_r_r_ptr_imm(d, s, imm) \ emith_add_r_r_imm(d, s, imm) +#define emith_sub_r_r_imm_c(cond, d, s, imm) \ + emith_op_imm2(cond, 0, A_OP_SUB, d, s, (imm)) + #define emith_sub_r_r_imm(d, s, imm) \ emith_op_imm2(A_COND_AL, 0, A_OP_SUB, d, s, imm) @@ -1172,6 +1175,38 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) rcache_free_tmp(tmp_); \ } while (0) +#define emith_sh2_delay_loop(cycles, reg) do { \ + int sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); \ + int t1 = rcache_get_tmp(); \ + int t2 = rcache_get_tmp(); \ + int t3 = rcache_get_tmp(); \ + /* if (sr < 0) return */ \ + emith_asrf(t2, sr, 12); \ + EMITH_JMP_START(DCOND_LE); \ + /* turns = sr.cycles / cycles */ \ + emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)) + 1); \ + emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */ \ + rcache_free_tmp(t3); \ + if (reg >= 0) { \ + /* if (reg <= turns) turns = reg-1 */ \ + t3 = rcache_get_reg(reg, RC_GR_RMW, NULL); \ + emith_cmp_r_r(t3, t2); \ + emith_sub_r_r_imm_c(DCOND_LE, t2, t3, 1); \ + /* if (reg <= 1) turns = 0 */ \ + emith_cmp_r_imm(t3, 1); \ + emith_move_r_imm_c(DCOND_LE, t2, 0); \ + /* reg -= turns */ \ + emith_sub_r_r(t3, t2); \ + } \ + /* sr.cycles -= turns * cycles; */ \ + emith_move_r_imm(t1, cycles); \ + emith_mul(t1, t2, t1); \ + emith_sub_r_r_r_lsl(sr, sr, t1, 12); \ + EMITH_JMP_END(DCOND_LE); \ + rcache_free_tmp(t1); \ + rcache_free_tmp(t2); \ +} while (0) + #define emith_write_sr(sr, srcr) do { \ emith_lsr(sr, sr, 10); \ emith_or_r_r_r_lsl(sr, sr, srcr, 22); \ diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index 10528abd..b8354789 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -293,6 +293,20 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; rcache_free_tmp(tmp_); \ } while (0) +#define emith_sub_r_r_r_lsl(d, s1, s2, lslimm) do { \ + int tmp_ = rcache_get_tmp(); \ + emith_lsl(tmp_, s2, lslimm); \ + emith_sub_r_r_r(d, s1, tmp_); \ + rcache_free_tmp(tmp_); \ +} while (0) + +#define emith_or_r_r_r_lsl(d, s1, s2, lslimm) do { \ + int tmp_ = rcache_get_tmp(); \ + emith_lsl(tmp_, s2, lslimm); \ + emith_or_r_r_r(d, s1, tmp_); \ + rcache_free_tmp(tmp_); \ +} while (0) + // _r_r_shift #define emith_or_r_r_lsl(d, s, lslimm) do { \ int tmp_ = rcache_get_tmp(); \ @@ -394,6 +408,10 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; emith_ror(d, s, cnt) #define emith_and_r_r_c(cond, d, s) \ emith_and_r_r(d, s); +#define emith_add_r_r_imm_c(cond, d, s, imm) \ + emith_add_r_r_imm(d, s, imm); +#define emith_sub_r_r_imm_c(cond, d, s, imm) \ + emith_sub_r_r_imm(d, s, imm); #define emith_read8_r_r_r_c(cond, r, rs, rm) \ emith_read8_r_r_r(r, rs, rm) @@ -1034,6 +1052,44 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; rcache_free_tmp(tmp_); \ } while (0) +#define emith_sh2_delay_loop(cycles, reg) do { \ + int sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); \ + int t1 = rcache_get_tmp(); \ + int t2 = rcache_get_tmp(); \ + int t3 = rcache_get_tmp(); \ + if (t3 == xAX) { t3 = t1; t1 = xAX; } /* for MUL */ \ + if (t3 == xDX) { t3 = t2; t2 = xDX; } \ + /* if (sr < 0) return */ \ + emith_asrf(t2, sr, 12); \ + EMITH_JMP_START(DCOND_LE); \ + /* turns = sr.cycles / cycles */ \ + emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)) + 1); \ + emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */ \ + rcache_free_tmp(t3); \ + if (reg >= 0) { \ + /* if (reg <= turns) turns = reg-1 */ \ + t3 = rcache_get_reg(reg, RC_GR_RMW, NULL); \ + emith_cmp_r_r(t3, t2); \ + EMITH_SJMP_START(DCOND_GT); \ + emith_sub_r_r_imm_c(DCOND_LE, t2, t3, 1); \ + EMITH_SJMP_END(DCOND_GT); \ + /* if (reg <= 1) turns = 0 */ \ + emith_cmp_r_imm(t3, 1); \ + EMITH_SJMP_START(DCOND_GT); \ + emith_move_r_imm_c(DCOND_LE, t2, 0); \ + EMITH_SJMP_END(DCOND_GT); \ + /* reg -= turns */ \ + emith_sub_r_r(t3, t2); \ + } \ + /* sr.cycles -= turns * cycles; */ \ + emith_move_r_imm(t1, cycles); \ + emith_mul_u64(t1, t2, t1, t2); \ + emith_sub_r_r_r_lsl(sr, sr, t1, 12); \ + EMITH_JMP_END(DCOND_LE); \ + rcache_free_tmp(t1); \ + rcache_free_tmp(t2); \ +} while (0) + #define emith_write_sr(sr, srcr) do { \ int tmp_ = rcache_get_tmp(); \ emith_clear_msb(tmp_, srcr, 22); \ diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 85ce799b..fd75cc44 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -41,6 +41,7 @@ #define BRANCH_CACHE 1 #define ALIAS_REGISTERS 1 #define REMAP_REGISTER 1 +#define LOOP_DETECTION 1 // limits (per block) #define MAX_BLOCK_SIZE (BLOCK_INSN_LIMIT * 6 * 6) @@ -135,6 +136,7 @@ enum op_types { OP_BRANCH_RF, // indirect far (PC + Rm) OP_SETCLRT, // T flag set/clear OP_MOVE, // register move + OP_LOAD_CONST,// load const to register OP_LOAD_POOL, // literal pool load, imm is address OP_MOVA, OP_SLEEP, @@ -147,6 +149,9 @@ enum op_types { #define OP_ISBRAUC(op) (BITMASK4(OP_BRANCH, OP_BRANCH_R, OP_BRANCH_RF, OP_RTE) \ & BITMASK1(op)) #define OP_ISBRACND(op) (BITMASK2(OP_BRANCH_CT, OP_BRANCH_CF) & BITMASK1(op)) +#define OP_ISBRAIMM(op) (BITMASK3(OP_BRANCH, OP_BRANCH_CT, OP_BRANCH_CF) \ + & BITMASK1(op)) +#define OP_ISBRAIND(op) (BITMASK2(OP_BRANCH_R, OP_BRANCH_RF) & BITMASK1(op)) #ifdef DRC_SH2 @@ -2537,7 +2542,9 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) u32 branch_patch_pc[MAX_LOCAL_BRANCHES]; int branch_patch_count = 0; u8 op_flags[BLOCK_INSN_LIMIT]; - struct { + struct drcf { + int delay_reg:8; + u32 loop_type:8; u32 test_irq:1; u32 pending_branch_direct:1; u32 pending_branch_indirect:1; @@ -2556,7 +2563,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) int tmp, tmp2; int cycles; int i, v; - u32 u; + u32 u, m1, m2; int op; u16 crc; @@ -2603,14 +2610,64 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } // collect branch_targets that don't land on delay slots + m1 = m2 = v = op = 0; for (pc = base_pc, i = 0; pc < end_pc; i++, pc += 2) { - if (!(op_flags[i] & OF_BTARGET)) - continue; - if (op_flags[i] & OF_DELAY_OP) { + if (op_flags[i] & OF_DELAY_OP) op_flags[i] &= ~OF_BTARGET; - continue; + if (op_flags[i] & OF_BTARGET) + ADD_TO_ARRAY(branch_target_pc, branch_target_count, pc, ); +#if LOOP_DETECTION + // loop types detected: + // 1. target: ... BRA target -> idle loop + // 2. target: ... delay insn ... BF target -> delay loop + // 3. target: ... poll insn ... BF/BT target -> poll loop + // 4. target: ... poll insn ... BF/BT exit ... BRA target, exit: -> poll + // conditions: + // a. no further branch targets between target and back jump. + // b. no unconditional branch insn inside the loop. + // c. exactly one poll or delay insn is allowed inside a delay/poll loop + // (scan_block marks loops only if they meet conditions a through c) + // d. idle loops do not modify anything but PC,SR and contain no branches + // e. delay/poll loops do not modify anything but the concerned reg,PC,SR + // f. loading constants into registers inside the loop is allowed + // g. a delay/poll loop must have a conditional branch somewhere + // h. an idle loop must not have a conditional branch + if (op_flags[i] & OF_BTARGET) { + // possible loop entry point + drcf.loop_type = op_flags[i] & OF_LOOP; + drcf.pending_branch_direct = drcf.pending_branch_indirect = 0; + op = OF_IDLE_LOOP; // loop type + v = i; + m1 = m2 = 0; } - ADD_TO_ARRAY(branch_target_pc, branch_target_count, pc, break); + if (drcf.loop_type) { + // detect loop type, and store poll/delay register + if (op_flags[i] & OF_POLL_INSN) { + op = OF_POLL_LOOP; + m1 |= ops[i].dest; // loop poll/delay regs + } else if (op_flags[i] & OF_DELAY_INSN) { + op = OF_DELAY_LOOP; + m1 |= ops[i].dest; + } else if (ops[i].op != OP_LOAD_POOL && ops[i].op != OP_LOAD_CONST + && (ops[i].op != OP_MOVE || op != OF_POLL_LOOP)) { + // not (MOV @(PC) or MOV # or (MOV reg and poll)), condition f + m2 |= ops[i].dest; // regs modified by other insns + } + // branch detector + if (OP_ISBRAIMM(ops[i].op) && ops[i].imm == base_pc + 2*v) + drcf.pending_branch_direct = 1; // backward branch detected + if (OP_ISBRACND(ops[i].op)) + drcf.pending_branch_indirect = 1; // conditions g,h - cond.branch + // poll/idle loops terminate with their backwards branch to the loop start + if (drcf.pending_branch_direct && !(op_flags[i+1] & OF_DELAY_OP)) { + m2 &= ~(m1 | BITMASK2(SHR_PC, SHR_SR)); // conditions d,e + g,h + if (m2 || ((op == OF_IDLE_LOOP) == (drcf.pending_branch_indirect))) + op = 0; // conditions not met + op_flags[v] = (op_flags[v] & ~OF_LOOP) | op; // set loop type + drcf.loop_type = 0; + } + } +#endif } if (branch_target_count > 0) { @@ -2634,6 +2691,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) // clear stale state after compile errors rcache_invalidate(); + drcf = (struct drcf) { 0 }; // ------------------------------------------------- // 3rd pass: actual compilation @@ -2653,8 +2711,14 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) #endif #if (DRC_DEBUG & 4) DasmSH2(sh2dasm_buff, pc, op); - printf("%c%08x %04x %s\n", (op_flags[i] & OF_BTARGET) ? '*' : ' ', - pc, op, sh2dasm_buff); + if (op_flags[i] & OF_BTARGET) { + if ((op_flags[i] & OF_LOOP) == OF_DELAY_LOOP) tmp3 = '+'; + else if ((op_flags[i] & OF_LOOP) == OF_POLL_LOOP) tmp3 = '='; + else if ((op_flags[i] & OF_LOOP) == OF_IDLE_LOOP) tmp3 = '~'; + else tmp3 = '*'; + } else if (drcf.loop_type) tmp3 = '.'; + else tmp3 = ' '; + printf("%c%08x %04x %s\n", tmp3, pc, op, sh2dasm_buff); #endif if (op_flags[i] & OF_BTARGET) @@ -2702,6 +2766,10 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) v = find_in_array(branch_target_pc, branch_target_count, pc); if (v >= 0) branch_target_ptr[v] = tcache_ptr; +#if LOOP_DETECTION + drcf.loop_type = op_flags[i] & OF_LOOP; + drcf.delay_reg = -1; +#endif // must update PC emit_move_r_imm32(SHR_PC, pc); @@ -3388,6 +3456,14 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) goto end_op; case 1: // DT Rn 0100nnnn00010000 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); +#if LOOP_DETECTION + if (drcf.loop_type == OF_DELAY_LOOP) { + if (drcf.delay_reg == -1) + drcf.delay_reg = GET_Rn(); + else + drcf.loop_type = 0; + } +#endif emith_bic_r_imm(sr, T); tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); emith_subf_r_r_imm(tmp, tmp2, 1); @@ -3832,7 +3908,7 @@ end_op: drcf.test_irq = 0; } - // branch handling (with/without delay) + // branch handling if (drcf.pending_branch_direct) { struct op_data *opd_b = @@ -3846,6 +3922,16 @@ end_op: ctaken = (op_flags[i] & OF_DELAY_OP) ? 1 : 2; } cycles += ctaken; // assume branch taken +#if LOOP_DETECTION + if ((drcf.loop_type == OF_IDLE_LOOP || + (drcf.loop_type == OF_DELAY_LOOP && drcf.delay_reg >= 0))) + { + // idle or delay loop + emith_sh2_delay_loop(cycles, drcf.delay_reg); + drcf.loop_type = 0; + } +#endif + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); FLUSH_CYCLES(sr); rcache_clean(); @@ -3902,6 +3988,8 @@ end_op: emith_add_r_imm(sr, ctaken << 12); drcf.pending_branch_direct = 0; + if (target_pc >= base_pc && target_pc < pc) + drcf.loop_type = 0; } else if (drcf.pending_branch_indirect) { sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); @@ -3909,6 +3997,7 @@ end_op: rcache_flush(); emith_jump(sh2_drc_dispatcher); drcf.pending_branch_indirect = 0; + drcf.loop_type = 0; } do_host_disasm(tcache_id); @@ -4729,6 +4818,9 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, int end_block = 0; int i, i_end; u32 crc = 0; + // 2nd pass stuff + int last_btarget; // loop detector + enum { T_UNKNOWN, T_CLEAR, T_SET } t; // T propagation state memset(op_flags, 0, sizeof(*op_flags) * BLOCK_INSN_LIMIT); op_flags[0] |= OF_BTARGET; // block start is always a target @@ -4903,6 +4995,7 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, case 0x0e: // MOV.L @(R0,Rm),Rn 0000nnnnmmmm1110 opd->source = BITMASK3(GET_Rm(), SHR_R0, SHR_MEM); opd->dest = BITMASK1(GET_Rn()); + op_flags[i] |= OF_POLL_INSN; break; case 0x0f: // MAC.L @Rm+,@Rn+ 0000nnnnmmmm1111 opd->source = BITMASK6(GET_Rm(), GET_Rn(), SHR_SR, SHR_MACL, SHR_MACH, SHR_MEM); @@ -5027,6 +5120,7 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, case 1: // DT Rn 0100nnnn00010000 opd->source = BITMASK1(GET_Rn()); opd->dest = BITMASK2(GET_Rn(), SHR_T); + op_flags[i] |= OF_DELAY_INSN; break; default: goto undefined; @@ -5235,6 +5329,7 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, opd->source = BITMASK2(GET_Rm(), SHR_MEM); opd->dest = BITMASK1(GET_Rn()); opd->imm = (op & 0x0f) * 4; + op_flags[i] |= OF_POLL_INSN; break; ///////////////////////////////////////////// @@ -5252,6 +5347,7 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, case 0x02: // MOV.L @Rm,Rn 0110nnnnmmmm0010 opd->dest = BITMASK1(GET_Rn()); opd->source = BITMASK2(GET_Rm(), SHR_MEM); + op_flags[i] |= OF_POLL_INSN; break; case 0x0a: // NEGC Rm,Rn 0110nnnnmmmm1010 opd->source = BITMASK2(GET_Rm(), SHR_T); @@ -5394,6 +5490,7 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, opd->dest = BITMASK1(SHR_R0); opd->size = (op & 0x300) >> 8; opd->imm = (op & 0xff) << opd->size; + op_flags[i] |= OF_POLL_INSN; break; case 0x0300: // TRAPA #imm 11000011iiiiiiii opd->op = OP_TRAPA; @@ -5481,6 +5578,7 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, ///////////////////////////////////////////// case 0x0e: // MOV #imm,Rn 1110nnnniiiiiiii + opd->op = OP_LOAD_CONST; opd->dest = BITMASK1(GET_Rn()); opd->imm = (s8)op; break; @@ -5517,32 +5615,29 @@ end: // 2nd pass: some analysis lowest_literal = end_literals = lowest_mova = 0; + t = T_UNKNOWN; + last_btarget = 0; + op = 0; // delay/poll insns counter for (i = 0, pc = base_pc; i < i_end; i++, pc += 2) { opd = &ops[i]; crc += FETCH_OP(pc); // propagate T (TODO: DIV0U) - if ((opd->op == OP_SETCLRT && !opd->imm) || opd->op == OP_BRANCH_CT) - op_flags[i + 1] |= OF_T_CLEAR; - else if ((opd->op == OP_SETCLRT && opd->imm) || opd->op == OP_BRANCH_CF) - op_flags[i + 1] |= OF_T_SET; - if ((op_flags[i] & OF_BTARGET) || (opd->dest & BITMASK1(SHR_T))) - op_flags[i] &= ~(OF_T_SET | OF_T_CLEAR); - else - op_flags[i + 1] |= op_flags[i] & (OF_T_SET | OF_T_CLEAR); + t = T_UNKNOWN; - if ((opd->op == OP_BRANCH_CT && (op_flags[i] & OF_T_CLEAR)) || - (opd->op == OP_BRANCH_CF && (op_flags[i] & OF_T_SET))) - opd->op = OP_BRANCH_N; - else if ((opd->op == OP_BRANCH_CT && (op_flags[i] & OF_T_SET)) || - (opd->op == OP_BRANCH_CF && (op_flags[i] & OF_T_CLEAR))) { + if ((opd->op == OP_BRANCH_CT && t == T_SET) || + (opd->op == OP_BRANCH_CF && t == T_CLEAR)) { opd->op = OP_BRANCH; - if (op_flags[i + 1] & OF_DELAY_OP) - opd->cycles = 2; - else - opd->cycles = 3; - } + opd->cycles = (op_flags[i + 1] & OF_DELAY_OP) ? 2 : 3; + } else if ((opd->op == OP_BRANCH_CT && t == T_CLEAR) || + (opd->op == OP_BRANCH_CF && t == T_SET)) + opd->op = OP_BRANCH_N; + else if ((opd->op == OP_SETCLRT && !opd->imm) || opd->op == OP_BRANCH_CT) + t = T_CLEAR; + else if ((opd->op == OP_SETCLRT && opd->imm) || opd->op == OP_BRANCH_CF) + t = T_SET; + // "overscan" detection: unreachable code after unconditional branch // this can happen if the insn after a forward branch isn't a local target if (OP_ISBRAUC(opd->op)) { @@ -5575,6 +5670,32 @@ end: } } } +#if LOOP_DETECTION + // inner loop detection + // 1. a loop always starts with a branch target (for the backwards jump) + // 2. it doesn't contain more than one polling and/or delaying insn + // 3. it doesn't contain unconditional jumps + // 4. no overlapping of loops + if (op_flags[i] & OF_BTARGET) { + last_btarget = i; // possible loop starting point + op = 0; + } + // XXX let's hope nobody is putting a delay or poll insn in a delay slot :-/ + if (OP_ISBRAIMM(opd->op)) { + // BSR, BRA, BT, BF with immediate target + int i_tmp = (opd->imm - base_pc) / 2; // branch target, index in ops + if (i_tmp == last_btarget && op <= 1) { + op_flags[i_tmp] |= OF_LOOP; // conditions met -> mark loop + last_btarget = i+1; // condition 4 + } else if (opd->op == OP_BRANCH) + last_btarget = i+1; // condition 3 + } + else if (OP_ISBRAIND(opd->op)) + // BRAF, BSRF, JMP, JSR, register indirect. treat it as off-limits jump + last_btarget = i+1; // condition 3 + else if (op_flags[i] & (OF_POLL_INSN|OF_DELAY_INSN)) + op ++; // condition 2 +#endif } end_pc = base_pc + i_end * 2; diff --git a/cpu/sh2/compiler.h b/cpu/sh2/compiler.h index d5cde520..b098f6c6 100644 --- a/cpu/sh2/compiler.h +++ b/cpu/sh2/compiler.h @@ -18,9 +18,14 @@ void sh2_drc_frame(void); /* op_flags */ #define OF_DELAY_OP (1 << 0) #define OF_BTARGET (1 << 1) -#define OF_T_SET (1 << 2) // T is known to be set -#define OF_T_CLEAR (1 << 3) // ... clear +#define OF_LOOP (3 << 2) // NONE, IDLE, DELAY, POLL loop #define OF_B_IN_DS (1 << 4) +#define OF_DELAY_INSN (1 << 5) // DT, (TODO ADD+CMP?) +#define OF_POLL_INSN (1 << 6) // MOV @(...),Rn (no post increment), TST @(...) + +#define OF_IDLE_LOOP (1 << 2) +#define OF_DELAY_LOOP (2 << 2) +#define OF_POLL_LOOP (3 << 2) unsigned short scan_block(unsigned int base_pc, int is_slave, unsigned char *op_flags, unsigned int *end_pc, From 397ccdc6cf5d873b4399895d6e491ea38a598a88 Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 2 May 2019 23:16:55 +0200 Subject: [PATCH 0193/1110] sh2 drc, add detection for in-memory polling --- cpu/drc/emit_arm.c | 19 ++++-- cpu/drc/emit_x86.c | 37 +++++----- cpu/sh2/compiler.c | 94 +++++++++++++++++++++++--- cpu/sh2/sh2.h | 3 +- pico/32x/32x.c | 2 +- pico/32x/memory.c | 152 ++++++++++++++++++++++++++---------------- pico/32x/memory_arm.S | 23 ++----- pico/32x/sh2soc.c | 6 ++ pico/pico_int.h | 1 + 9 files changed, 224 insertions(+), 113 deletions(-) diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index 37d5cf1b..1b429b35 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -636,9 +636,13 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) #define EMITH_SJMP3_MID(cond) EMITH_NOTHING1(cond) #define EMITH_SJMP3_END() +#define emith_move_r_r_c(cond, d, s) \ + EOP_MOV_REG(cond,0,d,s,A_AM1_LSL,0) #define emith_move_r_r(d, s) \ - EOP_MOV_REG_SIMPLE(d, s) + emith_move_r_r_c(A_COND_AL, d, s) +#define emith_move_r_r_ptr_c(cond, d, s) \ + emith_move_r_r_c(cond, d, s) #define emith_move_r_r_ptr(d, s) \ emith_move_r_r(d, s) @@ -1116,11 +1120,16 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) #define emith_ret_to_ctx(offs) \ emith_ctx_write(LR, offs) -#define emith_push_ret() \ - EOP_STMFD_SP(M1(LR)) +/* pushes r12 for eabi alignment */ +#define emith_push_ret(r) do { \ + int r_ = (r >= 0 ? r : 12); \ + EOP_STMFD_SP(M2(r_,LR)); \ +} while (0) -#define emith_pop_and_ret() \ - EOP_LDMFD_SP(M1(PC)) +#define emith_pop_and_ret(r) do { \ + int r_ = (r >= 0 ? r : 12); \ + EOP_LDMFD_SP(M2(r_,PC)); \ +} while (0) #define host_instructions_updated(base, end) \ cache_flush_d_inval_i(base, end) diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index b8354789..9dd06262 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -381,21 +381,12 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; emith_arith_r_imm(4, r, ~(imm)) // fake conditionals (using SJMP instead) -#define emith_move_r_imm_c(cond, r, imm) do { \ - (void)(cond); \ - emith_move_r_imm(r, imm); \ -} while (0) - -#define emith_add_r_imm_c(cond, r, imm) do { \ - (void)(cond); \ - emith_add_r_imm(r, imm); \ -} while (0) - -#define emith_sub_r_imm_c(cond, r, imm) do { \ - (void)(cond); \ - emith_sub_r_imm(r, imm); \ -} while (0) - +#define emith_move_r_imm_c(cond, r, imm) \ + emith_move_r_imm(r, imm); +#define emith_add_r_imm_c(cond, r, imm) \ + emith_add_r_imm(r, imm); +#define emith_sub_r_imm_c(cond, r, imm) \ + emith_sub_r_imm(r, imm); #define emith_or_r_imm_c(cond, r, imm) \ emith_or_r_imm(r, imm) #define emith_eor_r_imm_c(cond, r, imm) \ @@ -404,6 +395,8 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; emith_bic_r_imm(r, imm) #define emith_tst_r_imm_c(cond, r, imm) \ emith_tst_r_imm(r, imm) +#define emith_move_r_r_ptr_c(cond, d, s) \ + emith_move_r_r_ptr(d, s) #define emith_ror_c(cond, d, s, cnt) \ emith_ror(d, s, cnt) #define emith_and_r_r_c(cond, d, s) \ @@ -819,12 +812,16 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; EMIT(offs, u32); \ } while (0) -#define emith_push_ret() \ - emith_push(xSI); /* to align */ +#define emith_push_ret(r) do { \ + int r_ = (r >= 0 ? r : xSI); \ + emith_push(r_); /* always push to align */ \ +} while (0) -#define emith_pop_and_ret() \ - emith_pop(xSI); \ - emith_ret() +#define emith_pop_and_ret(r) do { \ + int r_ = (r >= 0 ? r : xSI); \ + emith_pop(r_); \ + emith_ret(); \ +} while (0) #define EMITH_JMP_START(cond) { \ u8 *cond_ptr; \ diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index fd75cc44..b7f54dd9 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -532,6 +532,9 @@ static void (*sh2_drc_test_irq)(void); static u32 REGPARM(1) (*sh2_drc_read8)(u32 a); static u32 REGPARM(1) (*sh2_drc_read16)(u32 a); static u32 REGPARM(1) (*sh2_drc_read32)(u32 a); +static u32 REGPARM(1) (*sh2_drc_read8_poll)(u32 a); +static u32 REGPARM(1) (*sh2_drc_read16_poll)(u32 a); +static u32 REGPARM(1) (*sh2_drc_read32_poll)(u32 a); static void REGPARM(2) (*sh2_drc_write8)(u32 a, u32 d); static void REGPARM(2) (*sh2_drc_write16)(u32 a, u32 d); static void REGPARM(2) (*sh2_drc_write32)(u32 a, u32 d); @@ -540,6 +543,7 @@ static void REGPARM(2) (*sh2_drc_write32)(u32 a, u32 d); #define MF_SIZEMASK 0x03 // size of access #define MF_POSTINCR 0x10 // post increment (for read_rr) #define MF_PREDECR MF_POSTINCR // pre decrement (for write_rr) +#define MF_POLLING 0x20 // include polling check in read // address space stuff static int dr_is_rom(u32 a) @@ -2263,11 +2267,18 @@ static int emit_memhandler_read(int size) rcache_evict_vreg(guest_regs[SHR_SR].vreg); #endif - switch (size & MF_SIZEMASK) { - case 0: emith_call(sh2_drc_read8); break; // 8 - case 1: emith_call(sh2_drc_read16); break; // 16 - case 2: emith_call(sh2_drc_read32); break; // 32 - } + if (size & MF_POLLING) + switch (size & MF_SIZEMASK) { + case 0: emith_call(sh2_drc_read8_poll); break; // 8 + case 1: emith_call(sh2_drc_read16_poll); break; // 16 + case 2: emith_call(sh2_drc_read32_poll); break; // 32 + } + else + switch (size & MF_SIZEMASK) { + case 0: emith_call(sh2_drc_read8); break; // 8 + case 1: emith_call(sh2_drc_read16); break; // 16 + case 2: emith_call(sh2_drc_read32); break; // 32 + } rcache_invalidate_tmp(); return rcache_get_tmp_ret(); @@ -2545,6 +2556,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) struct drcf { int delay_reg:8; u32 loop_type:8; + u32 polling:8; u32 test_irq:1; u32 pending_branch_direct:1; u32 pending_branch_indirect:1; @@ -2769,6 +2781,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) #if LOOP_DETECTION drcf.loop_type = op_flags[i] & OF_LOOP; drcf.delay_reg = -1; + drcf.polling = (drcf.loop_type == OF_POLL_LOOP ? MF_POLLING : 0); #endif // must update PC @@ -3176,7 +3189,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x0c: // MOV.B @(R0,Rm),Rn 0000nnnnmmmm1100 case 0x0d: // MOV.W @(R0,Rm),Rn 0000nnnnmmmm1101 case 0x0e: // MOV.L @(R0,Rm),Rn 0000nnnnmmmm1110 - emit_indirect_indexed_read(sh2, GET_Rn(), SHR_R0, GET_Rm(), op & 3); + emit_indirect_indexed_read(sh2, GET_Rn(), SHR_R0, GET_Rm(), (op & 3) | drcf.polling); goto end_op; case 0x0f: // MAC.L @Rm+,@Rn+ 0000nnnnmmmm1111 emit_indirect_read_double(sh2, &tmp, &tmp2, GET_Rn(), GET_Rm(), 2); @@ -3700,7 +3713,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) ///////////////////////////////////////////// case 0x05: // MOV.L @(disp,Rm),Rn 0101nnnnmmmmdddd - emit_memhandler_read_rr(sh2, GET_Rn(), GET_Rm(), (op & 0x0f) * 4, 2); + emit_memhandler_read_rr(sh2, GET_Rn(), GET_Rm(), (op & 0x0f) * 4, 2 | drcf.polling); goto end_op; ///////////////////////////////////////////// @@ -3713,7 +3726,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x04: // MOV.B @Rm+,Rn 0110nnnnmmmm0100 case 0x05: // MOV.W @Rm+,Rn 0110nnnnmmmm0101 case 0x06: // MOV.L @Rm+,Rn 0110nnnnmmmm0110 - tmp = ((op & 7) >= 4 && GET_Rn() != GET_Rm()) ? MF_POSTINCR : 0; + tmp = ((op & 7) >= 4 && GET_Rn() != GET_Rm()) ? MF_POSTINCR : drcf.polling; emit_memhandler_read_rr(sh2, GET_Rn(), GET_Rm(), 0, (op & 3) | tmp); goto end_op; case 0x03: // MOV Rm,Rn 0110nnnnmmmm0011 @@ -3791,7 +3804,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x0400: // MOV.B @(disp,Rm),R0 10000100mmmmdddd case 0x0500: // MOV.W @(disp,Rm),R0 10000101mmmmdddd tmp = (op & 0x100) >> 8; - emit_memhandler_read_rr(sh2, SHR_R0, GET_Rm(), (op & 0x0f) << tmp, tmp); + emit_memhandler_read_rr(sh2, SHR_R0, GET_Rm(), (op & 0x0f) << tmp, tmp | drcf.polling); goto end_op; case 0x0800: // CMP/EQ #imm,R0 10001000iiiiiiii tmp2 = rcache_get_reg(SHR_R0, RC_GR_READ, NULL); @@ -3817,7 +3830,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x0500: // MOV.W @(disp,GBR),R0 11000101dddddddd case 0x0600: // MOV.L @(disp,GBR),R0 11000110dddddddd tmp = (op & 0x300) >> 8; - emit_memhandler_read_rr(sh2, SHR_R0, SHR_GBR, (op & 0xff) << tmp, tmp); + emit_memhandler_read_rr(sh2, SHR_R0, SHR_GBR, (op & 0xff) << tmp, tmp | drcf.polling); goto end_op; case 0x0800: // TST #imm,R0 11001000iiiiiiii tmp = rcache_get_reg(SHR_R0, RC_GR_READ, NULL); @@ -3843,7 +3856,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } goto end_op; case 0x0c00: // TST.B #imm,@(R0,GBR) 11001100iiiiiiii - tmp = emit_indirect_indexed_read(sh2, SHR_TMP, SHR_R0, SHR_GBR, 0); + tmp = emit_indirect_indexed_read(sh2, SHR_TMP, SHR_R0, SHR_GBR, 0 | drcf.polling); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_bic_r_imm(sr, T); emith_tst_r_imm(tmp, op & 0xff); @@ -4149,6 +4162,56 @@ static void sh2_generate_utils(void) emith_jump_reg(arg2); emith_flush(); + // d = sh2_drc_read8_poll(u32 a) + sh2_drc_read8_poll = (void *)tcache_ptr; + emith_ctx_read_ptr(arg1, offsetof(SH2, read8_map)); + emith_sh2_rcall(arg0, arg1, arg2, arg3); + EMITH_SJMP_START(DCOND_CC); + emith_move_r_r_ptr_c(DCOND_CS, arg1, CONTEXT_REG); + emith_jump_reg_c(DCOND_CS, arg2); + EMITH_SJMP_END(DCOND_CC); + emith_and_r_r_r(arg1, arg0, arg3); + emith_eor_r_imm(arg1, 1); + emith_read8s_r_r_r(arg1, arg1, arg2); + emith_push_ret(arg1); + emith_move_r_r_ptr(arg1, CONTEXT_REG); + emith_call(p32x_sh2_poll_memory); + emith_pop_and_ret(RET_REG); + emith_flush(); + + // d = sh2_drc_read16_poll(u32 a) + sh2_drc_read16_poll = (void *)tcache_ptr; + emith_ctx_read_ptr(arg1, offsetof(SH2, read16_map)); + emith_sh2_rcall(arg0, arg1, arg2, arg3); + EMITH_SJMP_START(DCOND_CC); + emith_move_r_r_ptr_c(DCOND_CS, arg1, CONTEXT_REG); + emith_jump_reg_c(DCOND_CS, arg2); + EMITH_SJMP_END(DCOND_CC); + emith_and_r_r_r(arg1, arg0, arg3); + emith_read16s_r_r_r(arg1, arg1, arg2); + emith_push_ret(arg1); + emith_move_r_r_ptr(arg1, CONTEXT_REG); + emith_call(p32x_sh2_poll_memory); + emith_pop_and_ret(RET_REG); + emith_flush(); + + // d = sh2_drc_read32_poll(u32 a) + sh2_drc_read32_poll = (void *)tcache_ptr; + emith_ctx_read_ptr(arg1, offsetof(SH2, read32_map)); + emith_sh2_rcall(arg0, arg1, arg2, arg3); + EMITH_SJMP_START(DCOND_CC); + emith_move_r_r_ptr_c(DCOND_CS, arg1, CONTEXT_REG); + emith_jump_reg_c(DCOND_CS, arg2); + EMITH_SJMP_END(DCOND_CC); + emith_and_r_r_r(arg1, arg0, arg3); + emith_read_r_r_r(arg1, arg1, arg2); + emith_ror(arg1, arg1, 16); + emith_push_ret(arg1); + emith_move_r_r_ptr(arg1, CONTEXT_REG); + emith_call(p32x_sh2_poll_memory); + emith_pop_and_ret(RET_REG); + emith_flush(); + // sh2_drc_exit(void) sh2_drc_exit = (void *)tcache_ptr; emit_do_static_regs(1, arg2); @@ -4289,6 +4352,9 @@ static void sh2_generate_utils(void) MAKE_WRITE_WRAPPER(sh2_drc_write8); MAKE_WRITE_WRAPPER(sh2_drc_write16); MAKE_WRITE_WRAPPER(sh2_drc_write32); + MAKE_READ_WRAPPER(sh2_drc_read8_poll); + MAKE_READ_WRAPPER(sh2_drc_read16_poll); + MAKE_READ_WRAPPER(sh2_drc_read32_poll); #endif emith_pool_commit(0); @@ -4304,6 +4370,9 @@ static void sh2_generate_utils(void) host_dasm_new_symbol(sh2_drc_read8); host_dasm_new_symbol(sh2_drc_read16); host_dasm_new_symbol(sh2_drc_read32); + host_dasm_new_symbol(sh2_drc_read8_poll); + host_dasm_new_symbol(sh2_drc_read16_poll); + host_dasm_new_symbol(sh2_drc_read32_poll); #endif } @@ -5396,11 +5465,13 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, opd->source = BITMASK2(GET_Rm(), SHR_MEM); opd->dest = BITMASK1(SHR_R0); opd->imm = (op & 0x0f); + op_flags[i] |= OF_POLL_INSN; break; case 0x0500: // MOV.W @(disp,Rm),R0 10000101mmmmdddd opd->source = BITMASK2(GET_Rm(), SHR_MEM); opd->dest = BITMASK1(SHR_R0); opd->imm = (op & 0x0f) * 2; + op_flags[i] |= OF_POLL_INSN; break; case 0x0800: // CMP/EQ #imm,R0 10001000iiiiiiii opd->source = BITMASK1(SHR_R0); @@ -5539,6 +5610,7 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, opd->source = BITMASK3(SHR_GBR, SHR_R0, SHR_MEM); opd->dest = BITMASK1(SHR_T); opd->imm = op & 0xff; + op_flags[i] |= OF_POLL_INSN; opd->cycles = 3; break; case 0x0d00: // AND.B #imm,@(R0,GBR) 11001101iiiiiiii diff --git a/cpu/sh2/sh2.h b/cpu/sh2/sh2.h index 5a0661ea..a3eb5b12 100644 --- a/cpu/sh2/sh2.h +++ b/cpu/sh2/sh2.h @@ -42,9 +42,10 @@ typedef struct SH2_ unsigned int pdb_io_csum[2]; #define SH2_STATE_RUN (1 << 0) // to prevent recursion -#define SH2_STATE_SLEEP (1 << 1) +#define SH2_STATE_SLEEP (1 << 1) // temporarily stopped (DMA, IO, ...) #define SH2_STATE_CPOLL (1 << 2) // polling comm regs #define SH2_STATE_VPOLL (1 << 3) // polling VDP +#define SH2_STATE_RPOLL (1 << 4) // polling address in SDRAM unsigned int state; unsigned int poll_addr; int poll_cycles; diff --git a/pico/32x/32x.c b/pico/32x/32x.c index 4e8377eb..19c6e0a6 100644 --- a/pico/32x/32x.c +++ b/pico/32x/32x.c @@ -12,7 +12,7 @@ struct Pico32x Pico32x; SH2 sh2s[2]; -#define SH2_IDLE_STATES (SH2_STATE_CPOLL|SH2_STATE_VPOLL|SH2_STATE_SLEEP) +#define SH2_IDLE_STATES (SH2_STATE_CPOLL|SH2_STATE_VPOLL|SH2_STATE_RPOLL|SH2_STATE_SLEEP) static int REGPARM(2) sh2_irq_cb(SH2 *sh2, int level) { diff --git a/pico/32x/memory.c b/pico/32x/memory.c index 8a4b5365..c385d141 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -61,29 +61,37 @@ static void (*m68k_write16_io)(u32 a, u32 d); #define POLL_THRESHOLD 3 static struct { - u32 addr, cycles; + u32 addr1, addr2, cycles; int cnt; } m68k_poll; static int m68k_poll_detect(u32 a, u32 cycles, u32 flags) { int ret = 0; + // support polling on 2 addresses - seen in Wolfenstein + int match = (a - m68k_poll.addr1 <= 2 || a - m68k_poll.addr2 <= 2); - if (a - 2 <= m68k_poll.addr && m68k_poll.addr <= a + 2 - && cycles - m68k_poll.cycles <= 64 && !SekNotPolling) + if (match && cycles - m68k_poll.cycles <= 64 && !SekNotPolling) { - if (m68k_poll.cnt++ > POLL_THRESHOLD) { + // detect split 32bit access by same cycle count, and ignore those + if (cycles != m68k_poll.cycles && m68k_poll.cnt++ > POLL_THRESHOLD) { if (!(Pico32x.emu_flags & flags)) { elprintf(EL_32X, "m68k poll addr %08x, cyc %u", a, cycles - m68k_poll.cycles); - ret = 1; } Pico32x.emu_flags |= flags; + ret = 1; } } else { + // reset poll state in case of restart by interrupt + Pico32x.emu_flags &= ~(P32XF_68KCPOLL|P32XF_68KVPOLL); + SekSetStop(0); m68k_poll.cnt = 0; - m68k_poll.addr = a; + if (!match) { + m68k_poll.addr2 = m68k_poll.addr1; + m68k_poll.addr1 = a; + } SekNotPolling = 0; } m68k_poll.cycles = cycles; @@ -99,15 +107,15 @@ void p32x_m68k_poll_event(u32 flags) Pico32x.emu_flags &= ~flags; SekSetStop(0); } - m68k_poll.addr = m68k_poll.cnt = 0; + m68k_poll.addr1 = m68k_poll.addr2 = m68k_poll.cnt = 0; } -static void sh2_poll_detect(SH2 *sh2, u32 a, u32 flags, int maxcnt) +static void NOINLINE sh2_poll_detect(u32 a, SH2 *sh2, u32 flags, int maxcnt) { - int cycles_left = sh2_cycles_left(sh2); + u32 cycles_done = sh2_cycles_done_t(sh2); - if (a == sh2->poll_addr && sh2->poll_cycles - cycles_left <= 10) { - if (sh2->poll_cnt++ > maxcnt) { + if (a - sh2->poll_addr <= 2 && CYCLES_GE(sh2->poll_cycles+20, cycles_done)) { + if (sh2->poll_cycles != cycles_done && ++sh2->poll_cnt >= maxcnt) { if (!(sh2->state & flags)) elprintf_sh2(sh2, EL_32X, "state: %02x->%02x", sh2->state, sh2->state | flags); @@ -115,16 +123,22 @@ static void sh2_poll_detect(SH2 *sh2, u32 a, u32 flags, int maxcnt) sh2->state |= flags; sh2_end_run(sh2, 1); pevt_log_sh2(sh2, EVT_POLL_START); - return; +#ifdef DRC_SH2 + if ((a & 0xc6000000) == 0x06000000) { + unsigned char *p = sh2->p_drcblk_ram; + p[(a & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT] |= 0x80; + } +#endif } } - else + else if (!(sh2->state & (SH2_STATE_CPOLL|SH2_STATE_VPOLL|SH2_STATE_RPOLL))) { sh2->poll_cnt = 0; - sh2->poll_addr = a; - sh2->poll_cycles = cycles_left; + sh2->poll_addr = a; + } + sh2->poll_cycles = cycles_done; } -void p32x_sh2_poll_event(SH2 *sh2, u32 flags, u32 m68k_cycles) +void NOINLINE p32x_sh2_poll_event(SH2 *sh2, u32 flags, u32 m68k_cycles) { if (sh2->state & flags) { elprintf_sh2(sh2, EL_32X, "state: %02x->%02x", sh2->state, @@ -134,10 +148,17 @@ void p32x_sh2_poll_event(SH2 *sh2, u32 flags, u32 m68k_cycles) sh2->m68krcycles_done = m68k_cycles; pevt_log_sh2_o(sh2, EVT_POLL_END); + sh2->state &= ~flags; +#ifdef DRC_SH2 + if ((sh2->poll_addr & 0xc6000000) == 0x06000000) { + unsigned char *p = sh2->p_drcblk_ram; + p[(sh2->poll_addr & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT] &= ~0x80; + } +#endif } - sh2->state &= ~flags; - sh2->poll_addr = sh2->poll_cycles = sh2->poll_cnt = 0; + if (!(sh2->state & (SH2_STATE_CPOLL|SH2_STATE_VPOLL|SH2_STATE_RPOLL))) + sh2->poll_addr = sh2->poll_cycles = sh2->poll_cnt = 0; } static void sh2s_sync_on_read(SH2 *sh2) @@ -151,6 +172,14 @@ static void sh2s_sync_on_read(SH2 *sh2) p32x_sync_other_sh2(sh2, sh2->m68krcycles_done + C_SH2_TO_M68K(sh2, cycles)); } +void p32x_sh2_poll_memory(unsigned int a, SH2 *sh2) +{ + DRC_SAVE_SR(sh2); + sh2_poll_detect(a, sh2, SH2_STATE_RPOLL, 5); + sh2s_sync_on_read(sh2); + DRC_RESTORE_SR(sh2); +} + // SH2 faking //#define FAKE_SH2 #ifdef FAKE_SH2 @@ -567,7 +596,7 @@ static u32 p32x_sh2reg_read16(u32 a, SH2 *sh2) return (r[0] & P32XS_FM) | Pico32x.sh2_regs[0] | Pico32x.sh2irq_mask[sh2->is_slave]; case 0x04: // H count (often as comm too) - sh2_poll_detect(sh2, a, SH2_STATE_CPOLL, 3); + sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 7); sh2s_sync_on_read(sh2); return Pico32x.sh2_regs[4 / 2]; case 0x06: @@ -596,7 +625,7 @@ static u32 p32x_sh2reg_read16(u32 a, SH2 *sh2) // comm port if ((a & 0x30) == 0x20) { - sh2_poll_detect(sh2, a, SH2_STATE_CPOLL, 3); + sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 7); sh2s_sync_on_read(sh2); return r[a / 2]; } @@ -614,7 +643,7 @@ static void p32x_sh2reg_write8(u32 a, u32 d, SH2 *sh2) u32 old; a &= 0x3f; - sh2->poll_addr = 0; + sh2->poll_cnt = 0; switch (a) { case 0x00: // FM @@ -695,6 +724,7 @@ static void p32x_sh2reg_write8(u32 a, u32 d, SH2 *sh2) return; REG8IN16(r, a) = d; + sh2_end_run(sh2, 1); p32x_m68k_poll_event(P32XF_68KCPOLL); p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, sh2_cycles_done_m68k(sh2)); @@ -711,7 +741,7 @@ static void p32x_sh2reg_write16(u32 a, u32 d, SH2 *sh2) { a &= 0x3e; - sh2->poll_addr = 0; + sh2->poll_cnt = 0; // comm if ((a & 0x30) == 0x20) { @@ -720,6 +750,7 @@ static void p32x_sh2reg_write16(u32 a, u32 d, SH2 *sh2) return; Pico32x.regs[a / 2] = d; + sh2_end_run(sh2, 1); p32x_m68k_poll_event(P32XF_68KCPOLL); p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, sh2_cycles_done_m68k(sh2)); @@ -1251,7 +1282,7 @@ static u32 REGPARM(2) sh2_read8_cs0(u32 a, SH2 *sh2) if ((a & 0x3fff0) == 0x4100) { d = p32x_vdp_read16(a); - sh2_poll_detect(sh2, a, SH2_STATE_VPOLL, 7); + sh2_poll_detect(a, sh2, SH2_STATE_VPOLL, 9); goto out_16to8; } @@ -1319,7 +1350,7 @@ static u32 REGPARM(2) sh2_read16_cs0(u32 a, SH2 *sh2) if ((a & 0x3fff0) == 0x4100) { d = p32x_vdp_read16(a); - sh2_poll_detect(sh2, a, SH2_STATE_VPOLL, 7); + sh2_poll_detect(a, sh2, SH2_STATE_VPOLL, 9); goto out; } @@ -1383,6 +1414,28 @@ static u32 REGPARM(2) sh2_read32_rom(u32 a, SH2 *sh2) } // writes +#ifdef DRC_SH2 +void NOINLINE sh2_sdram_checks(u32 a, int t, SH2 *sh2) +{ + int v = t & ~0x80; + + if (v) + sh2_drc_wcheck_ram(a, v, sh2); + if (t & 0x80) { + DRC_SAVE_SR(sh2); + sh2_end_run(sh2, 1); + p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_RPOLL, sh2_cycles_done_m68k(sh2)); + DRC_RESTORE_SR(sh2); + } +} + +void inline sh2_da_checks(u32 a, int t, SH2 *sh2) +{ + if (t) + sh2_drc_wcheck_da(a, t, sh2); +} +#endif + static void REGPARM(3) sh2_write_ignore(u32 a, u32 d, SH2 *sh2) { } @@ -1402,7 +1455,7 @@ static void REGPARM(3) sh2_write8_cs0(u32 a, u32 d, SH2 *sh2) if (Pico32x.regs[0] & P32XS_FM) { if ((a & 0x3fff0) == 0x4100) { - sh2->poll_addr = 0; + sh2->poll_cnt = 0; p32x_vdp_write8(a, d); goto out; } @@ -1431,38 +1484,26 @@ static void REGPARM(3) sh2_write8_dram(u32 a, u32 d, SH2 *sh2) static void REGPARM(3) sh2_write8_sdram(u32 a, u32 d, SH2 *sh2) { - u32 a1 = a & 0x3ffff; + u32 a1 = (a & 0x3ffff) ^ 1; + ((u8 *)sh2->p_sdram)[a1] = d; #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_ram; int t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; if (t) - sh2_drc_wcheck_ram(a, t, sh2); + sh2_sdram_checks(a, t, sh2); #endif - ((u8 *)sh2->p_sdram)[a1 ^ 1] = d; -} - -static void REGPARM(3) sh2_write8_sdram_wt(u32 a, u32 d, SH2 *sh2) -{ - // xmen sync hack.. - if (a < 0x26000200) { - DRC_SAVE_SR(sh2); - sh2_end_run(sh2, 32); - DRC_RESTORE_SR(sh2); - } - - sh2_write8_sdram(a, d, sh2); } static void REGPARM(3) sh2_write8_da(u32 a, u32 d, SH2 *sh2) { - u32 a1 = a & 0xfff; + u32 a1 = (a & 0xfff) ^ 1; + sh2->data_array[a1] = d; #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_da; int t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; if (t) - sh2_drc_wcheck_da(a, t, sh2); + sh2_da_checks(a, t, sh2); #endif - sh2->data_array[a1 ^ 1] = d; } // write16 @@ -1481,7 +1522,7 @@ static void REGPARM(3) sh2_write16_cs0(u32 a, u32 d, SH2 *sh2) if (Pico32x.regs[0] & P32XS_FM) { if ((a & 0x3fff0) == 0x4100) { - sh2->poll_addr = 0; + sh2->poll_cnt = 0; p32x_vdp_write16(a, d, sh2); goto out; } @@ -1511,25 +1552,25 @@ static void REGPARM(3) sh2_write16_dram(u32 a, u32 d, SH2 *sh2) static void REGPARM(3) sh2_write16_sdram(u32 a, u32 d, SH2 *sh2) { u32 a1 = a & 0x3fffe; + ((u16 *)sh2->p_sdram)[a1 / 2] = d; #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_ram; int t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; if (t) - sh2_drc_wcheck_ram(a, t, sh2); + sh2_sdram_checks(a, t, sh2); #endif - ((u16 *)sh2->p_sdram)[a1 / 2] = d; } static void REGPARM(3) sh2_write16_da(u32 a, u32 d, SH2 *sh2) { u32 a1 = a & 0xffe; + ((u16 *)sh2->data_array)[a1 / 2] = d; #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_da; int t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; if (t) - sh2_drc_wcheck_da(a, t, sh2); + sh2_da_checks(a, t, sh2); #endif - ((u16 *)sh2->data_array)[a1 / 2] = d; } static void REGPARM(3) sh2_write16_rom(u32 a, u32 d, SH2 *sh2) @@ -1580,31 +1621,31 @@ static void REGPARM(3) sh2_write32_dram(u32 a, u32 d, SH2 *sh2) static void REGPARM(3) sh2_write32_sdram(u32 a, u32 d, SH2 *sh2) { u32 a1 = a & 0x3fffc; + *(u32 *)(sh2->p_sdram + a1) = (d << 16) | (d >> 16); #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_ram; int t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; if (t) - sh2_drc_wcheck_ram(a, t, sh2); + sh2_sdram_checks(a, t, sh2); int u = p[(a1+2) >> SH2_DRCBLK_RAM_SHIFT]; if (u) - sh2_drc_wcheck_ram(a+2, u, sh2); + sh2_sdram_checks(a+2, u, sh2); #endif - *(u32 *)(sh2->p_sdram + a1) = (d << 16) | (d >> 16); } static void REGPARM(3) sh2_write32_da(u32 a, u32 d, SH2 *sh2) { u32 a1 = a & 0xffc; + *((u32 *)sh2->data_array + a1/4) = (d << 16) | (d >> 16); #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_da; int t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; if (t) - sh2_drc_wcheck_da(a, t, sh2); + sh2_da_checks(a, t, sh2); int u = p[(a1+2) >> SH2_DRCBLK_DA_SHIFT]; if (u) - sh2_drc_wcheck_da(a+2, u, sh2); + sh2_da_checks(a+2, u, sh2); #endif - *((u32 *)sh2->data_array + a1/4) = (d << 16) | (d >> 16); } static void REGPARM(3) sh2_write32_rom(u32 a, u32 d, SH2 *sh2) @@ -2040,8 +2081,7 @@ void PicoMemSetup32x(void) sh2_read8_map[0x06/2].addr = sh2_read8_map[0x26/2].addr = sh2_read16_map[0x06/2].addr = sh2_read16_map[0x26/2].addr = sh2_read32_map[0x06/2].addr = sh2_read32_map[0x26/2].addr = MAP_MEMORY(Pico32xMem->sdram); - sh2_write8_map[0x06/2] = sh2_write8_sdram; - sh2_write8_map[0x26/2] = sh2_write8_sdram_wt; + sh2_write8_map[0x06/2] = sh2_write8_map[0x26/2] = sh2_write8_sdram; sh2_write16_map[0x06/2] = sh2_write16_map[0x26/2] = sh2_write16_sdram; sh2_write32_map[0x06/2] = sh2_write32_map[0x26/2] = sh2_write32_sdram; sh2_read8_map[0x06/2].mask = sh2_read8_map[0x26/2].mask = 0x03ffff; diff --git a/pico/32x/memory_arm.S b/pico/32x/memory_arm.S index 1082c7b7..b449370b 100644 --- a/pico/32x/memory_arm.S +++ b/pico/32x/memory_arm.S @@ -227,9 +227,9 @@ sh2_write32_sdram: ldrb r1, [ip, r3, lsr #SH2_RAM_SHIFT+1]! cmp r1, #0 beq 1f - stmfd sp!, {r0, r1, r2, ip} + stmfd sp!, {r0, r2, ip, lr} bl sh2_drc_wcheck_ram - ldmfd sp!, {r0, r1, r2, ip} + ldmfd sp!, {r0, r2, ip, lr} 1: ldrb r1, [ip, #1] cmp r1, #0 bxeq lr @@ -250,9 +250,9 @@ sh2_write32_da: ldrb r1, [ip, r3, lsr #SH2_DA_SHIFT+1]! cmp r1, #0 beq 1f - stmfd sp!, {r0, r1, r2, ip} + stmfd sp!, {r0, r2, ip, lr} bl sh2_drc_wcheck_da - ldmfd sp!, {r0, r1, r2, ip} + ldmfd sp!, {r0, r2, ip, lr} 1: ldrb r1, [ip, #1] cmp r1, #0 bxeq lr @@ -269,7 +269,6 @@ sh2_write32_dram: moveq r1, r1, ror #16 streq r1, [ip, r3, lsr #SH2_DRAM_SHIFT] bxeq lr -#if 1 ldr r0, [ip, r3, lsr #SH2_DRAM_SHIFT] mov r1, r1, ror #16 mov r2, #0 @@ -284,20 +283,6 @@ sh2_write32_dram: bic r0, r0, r2 orr r0, r0, r1 str r0, [ip, r3, lsr #SH2_DRAM_SHIFT] -#else - add ip, ip, r3, lsr #SH2_DRAM_SHIFT - tst r1, #0x00ff0000 - lsrne r3, r1, #16 - strneb r3, [ip, #0] - tst r1, #0xff000000 - lsrne r3, r1, #24 - strneb r3, [ip, #1] - tst r1, #0x000000ff - strneb r1, [ip, #2] - tst r1, #0x0000ff00 - lsrne r3, r1, #8 - strneb r3, [ip, #3] -#endif bx lr .pool diff --git a/pico/32x/sh2soc.c b/pico/32x/sh2soc.c index 4aae2a04..dd61a93b 100644 --- a/pico/32x/sh2soc.c +++ b/pico/32x/sh2soc.c @@ -138,6 +138,7 @@ static void dmac_trigger(SH2 *sh2, struct dma_chan *chan) if (chan->chcr & DMA_AR) { // auto-request transfer + sh2->state |= SH2_STATE_SLEEP; while ((int)chan->tcr > 0) dmac_transfer_one(sh2, chan); dmac_transfer_complete(sh2, chan); @@ -237,6 +238,7 @@ u32 REGPARM(2) sh2_peripheral_read8(u32 a, SH2 *sh2) a &= 0x1ff; d = PREG8(r, a); + sh2->poll_cnt = 0; elprintf_sh2(sh2, EL_32XP, "peri r8 [%08x] %02x @%06x", a | ~0x1ff, d, sh2_pc(sh2)); return d; @@ -250,6 +252,7 @@ u32 REGPARM(2) sh2_peripheral_read16(u32 a, SH2 *sh2) a &= 0x1fe; d = r[(a / 2) ^ 1]; + sh2->poll_cnt = 0; elprintf_sh2(sh2, EL_32XP, "peri r16 [%08x] %04x @%06x", a | ~0x1ff, d, sh2_pc(sh2)); return d; @@ -258,9 +261,11 @@ u32 REGPARM(2) sh2_peripheral_read16(u32 a, SH2 *sh2) u32 REGPARM(2) sh2_peripheral_read32(u32 a, SH2 *sh2) { u32 d; + a &= 0x1fc; d = sh2->peri_regs[a / 4]; + sh2->poll_cnt = 0; elprintf_sh2(sh2, EL_32XP, "peri r32 [%08x] %08x @%06x", a | ~0x1ff, d, sh2_pc(sh2)); return d; @@ -472,6 +477,7 @@ static void dreq1_do(SH2 *sh2, struct dma_chan *chan) if ((chan->dar & ~0xf) != 0x20004030) elprintf(EL_32XP|EL_ANOMALY, "dreq1: bad dar?: %08x\n", chan->dar); + sh2->state |= SH2_STATE_SLEEP; dmac_transfer_one(sh2, chan); if (chan->tcr == 0) dmac_transfer_complete(sh2, chan); diff --git a/pico/pico_int.h b/pico/pico_int.h index 831bfc72..2c55c941 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -932,6 +932,7 @@ void Pico32xSwapDRAM(int b); void Pico32xMemStateLoaded(void); void p32x_update_banks(void); void p32x_m68k_poll_event(unsigned int flags); +void p32x_sh2_poll_memory(unsigned int a, SH2 *sh2); void p32x_sh2_poll_event(SH2 *sh2, unsigned int flags, unsigned int m68k_cycles); // 32x/draw.c From 49daa9e093314117d39090a738eb926822dacb12 Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 22 May 2019 21:01:00 +0200 Subject: [PATCH 0194/1110] sh2 drc, block management bugfixes and cleanup --- cpu/sh2/compiler.c | 159 ++++++++++++++++++++++----------------------- 1 file changed, 77 insertions(+), 82 deletions(-) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index b7f54dd9..2e6aa7c9 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -943,20 +943,14 @@ static struct block_desc *dr_find_inactive_block(int tcache_id, u16 crc, u32 addr, int size, u32 addr_lit, int size_lit) { struct block_list **head = &inactive_blocks[tcache_id]; - struct block_list *prev = NULL, *current = *head; + struct block_list *current; - for (; current != NULL; prev = current, current = current->next) { + for (current = *head; current != NULL; current = current->next) { struct block_desc *block = current->block; if (block->crc == crc && block->addr == addr && block->size == size && block->addr_lit == addr_lit && block->size_lit == size_lit) { - if (prev == NULL) - *head = current->next; - else - prev->next = current->next; - block->list = NULL; // should now be empty - current->next = blist_free; - blist_free = current; + rm_from_block_lists(block); return block; } } @@ -1031,6 +1025,47 @@ static void *dr_failure(void) exit(1); } +#if LINK_BRANCHES +static void dr_block_link(struct block_entry *be, struct block_link *bl, int emit_jump) +{ + dbg(2, "- %slink from %p to pc %08x entry %p", emit_jump ? "":"early ", + bl->jump, bl->target_pc, be->tcache_ptr); + + if (emit_jump) + emith_jump_patch(bl->jump, be->tcache_ptr); + // could sync arm caches here, but that's unnecessary + + // move bl to block_entry + bl->target = be; + bl->prev = NULL; + if (be->links) + be->links->prev = bl; + bl->next = be->links; + be->links = bl; +} + +static void dr_block_unlink(struct block_link *bl, int emit_jump) +{ + dbg(2,"- unlink from %p to pc %08x", bl->jump, bl->target_pc); + + if (bl->target) { + if (emit_jump) { + emith_jump_patch(bl->jump, sh2_drc_dispatcher); + // update cpu caches since the previous jump target doesn't exist anymore + host_instructions_updated(bl->jump, bl->jump+4); + } + + if (bl->prev) + bl->prev->next = bl->next; + else + bl->target->links = bl->next; + if (bl->next) + bl->next->prev = bl->prev; + bl->target = NULL; + } +} +#endif + static void *dr_prepare_ext_branch(struct block_entry *owner, u32 pc, int is_slave, int tcache_id) { #if LINK_BRANCHES @@ -1064,13 +1099,7 @@ static void *dr_prepare_ext_branch(struct block_entry *owner, u32 pc, int is_sla owner->o_links = bl; if (be != NULL) { - dbg(2, "- early link from %p to pc %08x entry %p", bl->jump, pc, be->tcache_ptr); - bl->target = be; - bl->prev = NULL; - if (be->links) - be->links->prev = bl; - bl->next = be->links; - be->links = bl; + dr_block_link(be, bl, 0); // jump not yet emitted by translate() return be->tcache_ptr; } else { @@ -1092,23 +1121,12 @@ static void dr_link_blocks(struct block_entry *be, int tcache_id) while (bl != NULL) { next = bl->next; - if (bl->target_pc == pc) { - dbg(2, "- link from %p to pc %08x entry %p", bl->jump, pc, be->tcache_ptr); - // move bl from unresolved_links to block_entry - rm_from_hashlist_unresolved(bl, tcache_id); - - emith_jump_patch(bl->jump, be->tcache_ptr); - bl->target = be; - bl->prev = NULL; - if (be->links) - be->links->prev = bl; - bl->next = be->links; - be->links = bl; + if (bl->target_pc == pc && (!bl->tcache_id || bl->tcache_id == tcache_id)) { + rm_from_hashlist_unresolved(bl, bl->tcache_id); + dr_block_link(be, bl, 1); } bl = next; } - - // could sync arm caches here, but that's unnecessary #endif } @@ -1119,22 +1137,13 @@ static void dr_link_outgoing(struct block_entry *be, int tcache_id, int is_slave int target_tcache_id; for (bl = be->o_links; bl; bl = bl->o_next) { - be = dr_get_entry(bl->target_pc, is_slave, &target_tcache_id); - if (!target_tcache_id || target_tcache_id == tcache_id) { - if (be) { - dbg(2, "- link from %p to pc %08x entry %p", bl->jump, bl->target_pc, be->tcache_ptr); - emith_jump_patch(bl->jump, be->tcache_ptr); - bl->target = be; - bl->prev = NULL; - if (be->links) - be->links->prev = bl; - bl->next = be->links; - be->links = bl; - } else { - emith_jump_patch(bl->jump, sh2_drc_dispatcher); - add_to_hashlist_unresolved(bl, tcache_id); + if (bl->target == NULL) { + be = dr_get_entry(bl->target_pc, is_slave, &target_tcache_id); + if (be != NULL && (!target_tcache_id || target_tcache_id == tcache_id)) { + // remove bl from unresolved_links (must've been since target was NULL) + rm_from_hashlist_unresolved(bl, bl->tcache_id); + dr_block_link(be, bl, 1); } - host_instructions_updated(bl->jump, bl->jump+4); } } #endif @@ -4381,65 +4390,48 @@ static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 nol struct block_link *bl; u32 i; - dbg(2, " killing entry %08x-%08x,%08x-%08x, blkid %d,%d", + free = free || nolit; // block is invalid if literals are overwritten + dbg(2," %sing block %08x-%08x,%08x-%08x, blkid %d,%d", free?"delet":"disabl", bd->addr, bd->addr + bd->size, bd->addr_lit, bd->addr_lit + bd->size_lit, tcache_id, bd - block_tables[tcache_id]); if (bd->addr == 0 || bd->entry_count == 0) { dbg(1, " killing dead block!? %08x", bd->addr); return; } - free = free || nolit; // block is invalid if literals are overwritten - // remove from hash table, make incoming links unresolved, revoke outgoing links - for (i = 0; i < bd->entry_count; i++) { - if (bd->active) + // remove from hash table, make incoming links unresolved + if (bd->active) { + for (i = 0; i < bd->entry_count; i++) { rm_from_hashlist(&bd->entryp[i], tcache_id); - for (bl = bd->entryp[i].o_links; bl != NULL; ) { - if (bl->target) { - if (bl->prev) - bl->prev->next = bl->next; - else - bl->target->links = bl->next; - if (bl->next) - bl->next->prev = bl->prev; - bl->target = NULL; - } else if (bd->active) - rm_from_hashlist_unresolved(bl, tcache_id); - bl = bl->o_next; + while ((bl = bd->entryp[i].links) != NULL) { + dr_block_unlink(bl, 1); + add_to_hashlist_unresolved(bl, tcache_id); + } } - for (bl = bd->entryp[i].links; bl != NULL; ) { - struct block_link *bl_next = bl->next; - dbg(2, "- unlink from %p to pc %08x", bl->jump, bl->target_pc); - emith_jump_patch(bl->jump, sh2_drc_dispatcher); - // update cpu caches since the previous jump target doesn't exist anymore - host_instructions_updated(bl->jump, bl->jump+4); - - add_to_hashlist_unresolved(bl, tcache_id); - bl = bl_next; - } - bd->entryp[i].links = NULL; - } - - if (bd->active) dr_mark_memory(-1, bd, tcache_id, nolit); + add_to_block_list(&inactive_blocks[tcache_id], bd); + } + bd->active = 0; if (free) { - while ((bl = bd->entryp[0].o_links) != NULL) { - bd->entryp[0].o_links = bl->next; + // revoke outgoing links + for (bl = bd->entryp[0].o_links; bl != NULL; bl = bl->o_next) { + if (bl->target) + dr_block_unlink(bl, 0); + else + rm_from_hashlist_unresolved(bl, tcache_id); bl->jump = NULL; bl->next = blink_free[bl->tcache_id]; blink_free[bl->tcache_id] = bl; } bd->entryp[0].o_links = NULL; + // invalidate block rm_from_block_lists(bd); bd->addr = bd->size = bd->addr_lit = bd->size_lit = 0; bd->entry_count = 0; - } else { - add_to_block_list(&inactive_blocks[tcache_id], bd); } - bd->active = 0; } static void sh2_smc_rm_blocks(u32 a, int tcache_id, u32 shift) @@ -4454,10 +4446,12 @@ static void sh2_smc_rm_blocks(u32 a, int tcache_id, u32 shift) int removed = 0; #endif - // need to check cached and writethrough area + // ignore cache-through a &= wtmask; + blist = &inval_lookup[tcache_id][(a & mask) / INVAL_PAGE_SIZE]; entry = *blist; + // go through the block list for this range while (entry != NULL) { next = entry->next; block = entry->block; @@ -4465,6 +4459,7 @@ static void sh2_smc_rm_blocks(u32 a, int tcache_id, u32 shift) end_addr = start_addr + block->size; start_lit = block->addr_lit & wtmask; end_lit = start_lit + block->size_lit; + // disable/delete block if it covers the modified address if ((start_addr <= a && a < end_addr) || (start_lit <= a && a < end_lit)) { From adf39a13f9f62629d767f1f7f73f4f41fc590219 Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 22 May 2019 21:04:59 +0200 Subject: [PATCH 0195/1110] sh2 drc, register cache optimisations --- cpu/sh2/compiler.c | 321 ++++++++++++++++++++++++--------------------- pico/32x/memory.c | 26 ++++ pico/pico_int.h | 1 + 3 files changed, 197 insertions(+), 151 deletions(-) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 2e6aa7c9..9160c90c 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -138,10 +138,11 @@ enum op_types { OP_MOVE, // register move OP_LOAD_CONST,// load const to register OP_LOAD_POOL, // literal pool load, imm is address - OP_MOVA, - OP_SLEEP, - OP_RTE, - OP_TRAPA, + OP_MOVA, // MOVA instruction + OP_SLEEP, // SLEEP instruction + OP_RTE, // RTE instruction + OP_TRAPA, // TRAPA instruction + OP_LDC, // LDC instruction OP_UNDEFINED, }; @@ -552,31 +553,25 @@ static int dr_is_rom(u32 a) return (a & 0xc6000000) == 0x02000000 && (a & 0x3f0000) < 0x3e0000; } -static int dr_ctx_get_mem_ptr(u32 a, u32 *mask) +static int dr_ctx_get_mem_ptr(SH2 *sh2, u32 a, u32 *mask) { + void *memptr; int poffs = -1; - if ((a & ~0x7ff) == 0) { - // BIOS + // check if region is mapped memory + memptr = p32x_sh2_get_mem_ptr(a, mask, sh2); + if (memptr == NULL /*|| (a & ((1 << SH2_READ_SHIFT)-1) & ~*mask) != 0*/) + return poffs; + + if (memptr == sh2->p_bios) // BIOS poffs = offsetof(SH2, p_bios); - *mask = 0x7ff; - } - else if ((a & 0xfffff000) == 0xc0000000) { - // data array + else if (memptr == sh2->p_da) // data array // FIXME: access sh2->data_array instead poffs = offsetof(SH2, p_da); - *mask = 0xfff; - } - else if ((a & 0xc6000000) == 0x06000000) { - // SDRAM + else if (memptr == sh2->p_sdram) // SDRAM poffs = offsetof(SH2, p_sdram); - *mask = 0x03ffff; - } - else if ((a & 0xc6000000) == 0x02000000) { - // ROM + else if (memptr == sh2->p_rom) // ROM poffs = offsetof(SH2, p_rom); - *mask = 0x3fffff; - } return poffs; } @@ -1365,6 +1360,7 @@ static u32 rcache_locked; static u32 rcache_hint_soon; static u32 rcache_hint_late; static u32 rcache_hint_write; +static u32 rcache_hint_clean; #define rcache_hint (rcache_hint_soon|rcache_hint_late) static void rcache_unmap_vreg(int x) @@ -1396,16 +1392,19 @@ static void rcache_clean_vreg(int x) emith_move_r_r(cache_regs[guest_regs[r].sreg].hreg, cache_regs[guest_regs[r].vreg].hreg); rcache_remove_vreg_alias(x, r); rcache_add_vreg_alias(guest_regs[r].sreg, r); + cache_regs[guest_regs[r].sreg].flags |= HRF_DIRTY; } else { // must evict since sreg is locked emith_ctx_write(cache_regs[x].hreg, r * 4); + guest_regs[r].flags &= ~GRF_DIRTY; guest_regs[r].vreg = -1; } } - } else + } else if (~rcache_hint_write & (1 << r)) { emith_ctx_write(cache_regs[x].hreg, r * 4); - } - guest_regs[r].flags &= ~GRF_DIRTY;) + guest_regs[r].flags &= ~GRF_DIRTY; + } + }) } } @@ -1654,7 +1653,7 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr (cache_regs[i].flags & HRF_LOCKED) || (cache_regs[i].type == HR_STATIC && !(guest_regs[r].flags & GRF_STATIC))) { // need to split up. take reg out here to avoid unnecessary writebacks - cache_regs[i].gregs &= ~(1 << r); + rcache_remove_vreg_alias(i, r); split = i; } else { // aliases not needed anytime soon, remove them @@ -1809,7 +1808,8 @@ static int rcache_get_reg_arg(int arg, sh2_reg_e r, int *hr) // r is needed later on anyway srcr = rcache_get_reg_(r, RC_GR_READ, 0, NULL); is_cached = (cache_regs[reg_map_host[srcr]].type == HR_CACHED); - } else if ((guest_regs[r].flags & GRF_CDIRTY) && gconst_get(r, &val)) { + } else if (!(rcache_hint_clean & (1 << r)) && + (guest_regs[r].flags & GRF_CDIRTY) && gconst_get(r, &val)) { // r has an uncomitted const - load into arg, but keep constant uncomitted srcr = dstr; is_const = 1; @@ -1822,7 +1822,7 @@ static int rcache_get_reg_arg(int arg, sh2_reg_e r, int *hr) srcr = dstr; if (rcache_static & (1 << r)) srcr = rcache_get_reg_(r, RC_GR_READ, 0, NULL); - else if (gconst_try_read(guest_regs[r].vreg, r)) + else if (gconst_try_read(dstid, r)) dirty = 1; else emith_ctx_read(srcr, r * 4); @@ -1856,14 +1856,18 @@ static int rcache_get_reg_arg(int arg, sh2_reg_e r, int *hr) } else if (hr != NULL) { // caller will modify arg, so it will soon be out of sync with r if (dirty || src_dirty) { - emith_ctx_write(dstr, r * 4); // must clean since arg will be modified - guest_regs[r].flags &= ~GRF_DIRTY; + if (~rcache_hint_write & (1 << r)) { + emith_ctx_write(dstr, r * 4); // must clean since arg will be modified + guest_regs[r].flags &= ~GRF_DIRTY; + } } - } else if (guest_regs[r].vreg < 0) { + } else { // keep arg as vreg for r cache_regs[dstid].type = HR_CACHED; - cache_regs[dstid].gregs = 1 << r; - guest_regs[r].vreg = dstid; + if (guest_regs[r].vreg < 0) { + cache_regs[dstid].gregs = 1 << r; + guest_regs[r].vreg = dstid; + } if (dirty || src_dirty) { // mark as modifed for cleaning later on cache_regs[dstid].flags |= HRF_DIRTY; guest_regs[r].flags |= GRF_DIRTY; @@ -2057,9 +2061,9 @@ static void rcache_clean_mask(u32 mask) { int i; - // XXX consider gconst? - if (!(mask &= ~rcache_static & ~gconst_dirty_mask())) + if (!(mask &= ~rcache_static)) return; + rcache_hint_clean |= mask; // clean only vregs where all aliases are covered by the mask for (i = 0; i < ARRAY_SIZE(cache_regs); i++) @@ -2120,7 +2124,7 @@ static void rcache_invalidate(void) } rcache_counter = 0; - rcache_hint_soon = rcache_hint_late = rcache_hint_write = 0; + rcache_hint_soon = rcache_hint_late = rcache_hint_write = rcache_hint_clean = 0; gconst_invalidate(); } @@ -2164,48 +2168,76 @@ static void rcache_init(void) // --------------------------------------------------------------- -static int emit_get_rbase_and_offs(SH2 *sh2, u32 a, u32 *offs) +// NB may return either REG or TEMP +static int emit_get_rbase_and_offs(SH2 *sh2, sh2_reg_e r, int rmod, u32 *offs) { - u32 omask = 0xff; // offset mask, XXX: ARM oriented.. + uptr omask = 0xff; // offset mask, XXX: ARM oriented.. u32 mask = 0; + u32 a; int poffs; - int hr; - unsigned long la; + int hr, hr2; + uptr la; - poffs = dr_ctx_get_mem_ptr(a, &mask); + // is r constant and points to a memory region? + if (! gconst_get(r, &a)) + return -1; + poffs = dr_ctx_get_mem_ptr(sh2, a, &mask); if (poffs == -1) return -1; - hr = rcache_get_tmp(); if (mask < 0x1000) { - // can't access data array or BIOS directly from ROM or SDRAM, - // since code may run on both SH2s (tcache_id of translation block needed)) + // data array or BIOS, can't safely access directly since translated code + // may run on both SH2s + hr = rcache_get_tmp(); emith_ctx_read_ptr(hr, poffs); + a += *offs; if (a & mask & ~omask) emith_add_r_r_ptr_imm(hr, hr, a & mask & ~omask); *offs = a & omask; + return hr; + } + + la = (uptr)*(void **)((char *)sh2 + poffs); + // accessing ROM or SDRAM, code location doesn't matter. The host address + // for these should be mmapped to be equal to the SH2 address. + // if r is in rcache or needed soon anyway, and offs is relative to region + // use rcached const to avoid loading a literal on ARM + if ((guest_regs[r].vreg >= 0 || ((guest_regs[r].flags & GRF_CDIRTY) && + ((rcache_hint_soon|rcache_hint_clean) & (1 << r)))) && !(*offs & ~mask)) { + u32 odd = a & 1; // need to fix odd address for correct byte addressing + la -= (s32)((a & ~mask) - *offs - odd); // diff between reg and memory + // if reg is modified later on, allocate it RMW to remove aliases here + // else the aliases vreg stays locked and a vreg shortage may occur. + hr = hr2 = rcache_get_reg(r, rmod ? RC_GR_RMW : RC_GR_READ, NULL); + if ((la & ~omask) - odd) { + hr = rcache_get_tmp(); + emith_add_r_r_ptr_imm(hr, hr2, (la & ~omask) - odd); + } + *offs = (la & omask); } else { // known fixed host address - la = (unsigned long)*(void **)((char *)sh2 + poffs) + (a & mask); - *offs = la & omask; + la += (a + *offs) & mask; + hr = rcache_get_tmp(); emith_move_r_ptr_imm(hr, la & ~omask); + *offs = la & omask; } return hr; } // read const data from const ROM address -static int emit_get_rom_data(sh2_reg_e r, u32 offs, int size, u32 *val) +static int emit_get_rom_data(SH2 *sh2, sh2_reg_e r, u32 offs, int size, u32 *val) { - u32 tmp; + u32 a, mask; *val = 0; - if (gconst_get(r, &tmp)) { - tmp += offs; - if (dr_is_rom(tmp)) { + if (gconst_get(r, &a)) { + a += offs; + // check if rom is memory mapped (not bank switched), and address is in rom + if (dr_is_rom(a) && p32x_sh2_get_mem_ptr(a, &mask, sh2)) { switch (size & MF_SIZEMASK) { - case 0: *val = (s8)p32x_sh2_read8(tmp, sh2s); break; // 8 - case 1: *val = (s16)p32x_sh2_read16(tmp, sh2s); break; // 16 - case 2: *val = p32x_sh2_read32(tmp, sh2s); break; // 32 + case 0: *val = (s8)p32x_sh2_read8(a, sh2s); break; // 8 + case 1: *val = (s16)p32x_sh2_read16(a, sh2s); break; // 16 + case 2: *val = p32x_sh2_read32(a, sh2s); break; // 32 } return 1; } @@ -2315,10 +2347,10 @@ static void emit_memhandler_write(int size) static int emit_memhandler_read_rr(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rs, u32 offs, int size) { int hr, hr2; - u32 val, offs2; + u32 val; #if PROPAGATE_CONSTANTS - if (emit_get_rom_data(rs, offs, size, &val)) { + if (emit_get_rom_data(sh2, rs, offs, size, &val)) { if (rd == SHR_TMP) { hr2 = rcache_get_tmp(); emith_move_r_imm(hr2, val); @@ -2331,47 +2363,49 @@ static int emit_memhandler_read_rr(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rs, u32 off return hr2; } - if (gconst_get(rs, &val)) { - hr = emit_get_rbase_and_offs(sh2, val + offs, &offs2); - if (hr != -1) { - if (rd == SHR_TMP) - hr2 = rcache_get_tmp(); - else - hr2 = rcache_get_reg(rd, RC_GR_WRITE, NULL); - switch (size & MF_SIZEMASK) { - case 0: // 8 - emith_read8s_r_r_offs(hr2, hr, offs2 ^ 1); - break; - case 1: // 16 - emith_read16s_r_r_offs(hr2, hr, offs2); - break; - case 2: // 32 - emith_read_r_r_offs(hr2, hr, offs2); - emith_ror(hr2, hr2, 16); - break; - } - rcache_free_tmp(hr); - if (size & MF_POSTINCR) - gconst_new(rs, val + (1 << (size & MF_SIZEMASK))); - return hr2; + hr = emit_get_rbase_and_offs(sh2, rs, size & MF_POSTINCR, &offs); + if (hr != -1) { + if (rd == SHR_TMP) + hr2 = rcache_get_tmp(); + else + hr2 = rcache_get_reg(rd, RC_GR_WRITE, NULL); + switch (size & MF_SIZEMASK) { + case 0: emith_read8s_r_r_offs(hr2, hr, offs ^ 1); break; // 8 + case 1: emith_read16s_r_r_offs(hr2, hr, offs); break; // 16 + case 2: emith_read_r_r_offs(hr2, hr, offs); emith_ror(hr2, hr2, 16); break; } + if (cache_regs[reg_map_host[hr]].type == HR_TEMP) // may also return REG + rcache_free_tmp(hr); + if (size & MF_POSTINCR) { + int isgc = gconst_get(rs, &val); + if (!isgc || guest_regs[rs].vreg >= 0) { + // already loaded + hr = rcache_get_reg(rs, RC_GR_RMW, NULL); + emith_add_r_r_imm(hr, hr, 1 << (size & MF_SIZEMASK)); + if (isgc) + gconst_set(rs, val + (1 << (size & MF_SIZEMASK))); + } else + gconst_new(rs, val + (1 << (size & MF_SIZEMASK))); + } + return hr2; } #endif - if (gconst_get(rs, &val) && (!(size & MF_POSTINCR) /*|| !(rcache_hint_soon & (1 << rs))*/)) { + + if (gconst_get(rs, &val) && guest_regs[rs].vreg < 0 && !(rcache_hint_soon & (1 << rs))) { hr = rcache_get_tmp_arg(0); emith_move_r_imm(hr, val + offs); if (size & MF_POSTINCR) gconst_new(rs, val + (1 << (size & MF_SIZEMASK))); - } else if (offs || (size & MF_POSTINCR)) { + } else if (size & MF_POSTINCR) { + hr = rcache_get_tmp_arg(0); + hr2 = rcache_get_reg(rs, RC_GR_RMW, NULL); + emith_add_r_r_imm(hr, hr2, offs); + emith_add_r_imm(hr2, 1 << (size & MF_SIZEMASK)); + } else { hr = rcache_get_reg_arg(0, rs, &hr2); if (offs || hr != hr2) emith_add_r_r_imm(hr, hr2, offs); - if (size & MF_POSTINCR) { - hr = rcache_get_reg(rs, RC_GR_WRITE, NULL); - emith_add_r_r_imm(hr, hr2, 1 << (size & MF_SIZEMASK)); - } - } else - rcache_get_reg_arg(0, rs, NULL); + } hr = emit_memhandler_read(size); size &= MF_SIZEMASK; @@ -2405,7 +2439,7 @@ static void emit_memhandler_write_rr(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rs, u32 o } else hr2 = rcache_get_reg_arg(1, rd, NULL); - if (gconst_get(rs, &val) && (!(size & MF_PREDECR) /*|| !(rcache_hint_soon & (1 << rs))*/)) { + if (gconst_get(rs, &val) && guest_regs[rs].vreg < 0 && !(rcache_hint_soon & (1 << rs))) { if (size & MF_PREDECR) { val -= 1 << (size & MF_SIZEMASK); gconst_new(rs, val); @@ -2551,7 +2585,7 @@ static void emit_block_entry(void) cycles = 0; \ } -static void *dr_get_pc_base(u32 pc, int is_slave); +static void *dr_get_pc_base(u32 pc, SH2 *sh2); static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) { @@ -2591,7 +2625,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) base_pc = sh2->pc; // get base/validate PC - dr_pc_base = dr_get_pc_base(base_pc, sh2->is_slave); + dr_pc_base = dr_get_pc_base(base_pc, sh2); if (dr_pc_base == (void *)-1) { printf("invalid PC, aborting: %08x\n", base_pc); // FIXME: be less destructive @@ -2637,6 +2671,8 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) op_flags[i] &= ~OF_BTARGET; if (op_flags[i] & OF_BTARGET) ADD_TO_ARRAY(branch_target_pc, branch_target_count, pc, ); + if (ops[i].op == OP_LDC && (ops[i].dest & BITMASK1(SHR_SR)) && pc+2 < end_pc) + op_flags[i+1] |= OF_BTARGET; // RTE entrypoint in case of SR(IMASK) change #if LOOP_DETECTION // loop types detected: // 1. target: ... BRA target -> idle loop @@ -2930,7 +2966,8 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) u32 late = 0; // regs read by future ops u32 write = 0; // regs written to (to detect write before read) u32 soon = 0; // regs read soon - tmp = OP_ISBRANCH(opd[0].op); // branch insn detected + tmp = (OP_ISBRANCH(opd[0].op) || opd[0].op == OP_RTE || // branching insns + opd[0].op == OP_TRAPA || opd[0].op == OP_UNDEFINED); for (v = 1; v <= 9; v++) { // no sense in looking any further than the next rcache flush if (pc + 2*v < end_pc && !(op_flags[i+v] & OF_BTARGET) && @@ -2944,7 +2981,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) rcache_clean_mask(rcache_dirty_mask() & ~tmp2); break; } - // XXX must also include test-irq locations! tmp |= (OP_ISBRANCH(opd[v].op) || opd[v].op == OP_RTE || opd[v].op == OP_TRAPA || opd[v].op == OP_UNDEFINED); // regs needed in the next few instructions @@ -2953,7 +2989,8 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } rcache_set_hint_soon(late); // insns 1-3 rcache_set_hint_late(late & ~soon); // insns 4-9 - rcache_set_hint_write(write & ~(late|soon)); // next access is write + rcache_set_hint_write(write & ~(late|soon) & ~opd[0].source); + // overwritten without being used } rcache_set_locked(opd[0].source); // try not to evict src regs for this op @@ -2973,32 +3010,22 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case OP_BRANCH_R: if (opd->dest & BITMASK1(SHR_PR)) emit_move_r_imm32(SHR_PR, pc + 2); - if (gconst_get(opd->rm, &u)) { - opd->imm = u; - drcf.pending_branch_direct = 1; - } else { - emit_move_r_r(SHR_PC, opd->rm); - drcf.pending_branch_indirect = 1; - } + emit_move_r_r(SHR_PC, opd->rm); + drcf.pending_branch_indirect = 1; goto end_op; case OP_BRANCH_RF: - if (gconst_get(GET_Rn(), &u)) { - if (opd->dest & BITMASK1(SHR_PR)) - emit_move_r_imm32(SHR_PR, pc + 2); - opd->imm = pc + 2 + u; - drcf.pending_branch_direct = 1; - } else { - tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); - tmp = rcache_get_reg(SHR_PC, RC_GR_WRITE, NULL); - emith_move_r_imm(tmp, pc + 2); - if (opd->dest & BITMASK1(SHR_PR)) { - tmp3 = rcache_get_reg(SHR_PR, RC_GR_WRITE, NULL); - emith_move_r_r(tmp3, tmp); - } - emith_add_r_r(tmp, tmp2); - drcf.pending_branch_indirect = 1; + tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); + tmp = rcache_get_reg(SHR_PC, RC_GR_WRITE, NULL); + emith_move_r_imm(tmp, pc + 2); + if (opd->dest & BITMASK1(SHR_PR)) { + tmp3 = rcache_get_reg(SHR_PR, RC_GR_WRITE, NULL); + emith_move_r_r(tmp3, tmp); } + emith_add_r_r(tmp, tmp2); + if (gconst_get(GET_Rn(), &u)) + gconst_set(SHR_PC, pc + 2 + u); + drcf.pending_branch_indirect = 1; goto end_op; case OP_SLEEP: // SLEEP 0000000000011011 @@ -3041,10 +3068,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) // obtain new PC emit_memhandler_read_rr(sh2, SHR_PC, SHR_VBR, opd->imm * 4, 2); // indirect jump -> back to dispatcher - sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); - FLUSH_CYCLES(sr); - rcache_flush(); - emith_jump(sh2_drc_dispatcher); + drcf.pending_branch_indirect = 1; goto end_op; case OP_LOAD_POOL: @@ -3483,7 +3507,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) if (drcf.delay_reg == -1) drcf.delay_reg = GET_Rn(); else - drcf.loop_type = 0; + drcf.polling = drcf.loop_type = 0; } #endif emith_bic_r_imm(sr, T); @@ -3925,8 +3949,6 @@ end_op: emit_move_r_imm32(SHR_PC, pc); rcache_flush(); emith_call(sh2_drc_test_irq); - if (pc < end_pc) // mark next insns as entry point for RTE - op_flags[i+1] |= OF_BTARGET; drcf.test_irq = 0; } @@ -3950,7 +3972,7 @@ end_op: { // idle or delay loop emith_sh2_delay_loop(cycles, drcf.delay_reg); - drcf.loop_type = 0; + drcf.polling = drcf.loop_type = 0; } #endif @@ -4011,15 +4033,30 @@ end_op: drcf.pending_branch_direct = 0; if (target_pc >= base_pc && target_pc < pc) - drcf.loop_type = 0; + drcf.polling = drcf.loop_type = 0; } else if (drcf.pending_branch_indirect) { + struct op_data *opd_b = + (op_flags[i] & OF_DELAY_OP) ? opd-1 : opd; + void *target; + u32 target_pc; + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); FLUSH_CYCLES(sr); - rcache_flush(); - emith_jump(sh2_drc_dispatcher); + rcache_clean(); + if (gconst_get(SHR_PC, &target_pc)) { + // JMP const, treat like unconditional direct branch + target = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id); + if (target == NULL) + return NULL; + emith_jump_patchable(target); + } else { + // JMP + emith_jump(sh2_drc_dispatcher); + } + rcache_invalidate(); drcf.pending_branch_indirect = 0; - drcf.loop_type = 0; + drcf.polling = drcf.loop_type = 0; } do_host_disasm(tcache_id); @@ -4836,33 +4873,12 @@ void sh2_drc_finish(SH2 *sh2) #endif /* DRC_SH2 */ -static void *dr_get_pc_base(u32 pc, int is_slave) +static void *dr_get_pc_base(u32 pc, SH2 *sh2) { void *ret = NULL; u32 mask = 0; - if ((pc & ~0x7ff) == 0) { - // BIOS - ret = is_slave ? Pico32xMem->sh2_rom_s.w : Pico32xMem->sh2_rom_m.w; - mask = 0x7ff; - } - else if ((pc & 0xfffff000) == 0xc0000000) { - // data array - ret = sh2s[is_slave].data_array; - mask = 0xfff; - } - else if ((pc & 0xc6000000) == 0x06000000) { - // SDRAM - ret = Pico32xMem->sdram; - mask = 0x03ffff; - } - else if ((pc & 0xc6000000) == 0x02000000) { - // ROM - if ((pc & 0x3fffff) < Pico.romsize) - ret = Pico.rom; - mask = 0x3fffff; - } - + ret = p32x_sh2_get_mem_ptr(pc, &mask, sh2); if (ret == NULL) return (void *)-1; // NULL is valid value @@ -4889,7 +4905,7 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, memset(op_flags, 0, sizeof(*op_flags) * BLOCK_INSN_LIMIT); op_flags[0] |= OF_BTARGET; // block start is always a target - dr_pc_base = dr_get_pc_base(base_pc, is_slave); + dr_pc_base = dr_get_pc_base(base_pc, &sh2s[!!is_slave]); // 1st pass: disassemble for (i = 0, pc = base_pc; ; i++, pc += 2) { @@ -5274,14 +5290,17 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, break; case 0x07: // LDC.L @Rm+,SR 0100mmmm00000111 tmp = SHR_SR; + opd->op = OP_LDC; opd->cycles = 3; break; case 0x17: // LDC.L @Rm+,GBR 0100mmmm00010111 tmp = SHR_GBR; + opd->op = OP_LDC; opd->cycles = 3; break; case 0x27: // LDC.L @Rm+,VBR 0100mmmm00100111 tmp = SHR_VBR; + opd->op = OP_LDC; opd->cycles = 3; break; default: @@ -5372,7 +5391,7 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, default: goto undefined; } - opd->op = OP_MOVE; + opd->op = OP_LDC; opd->source = BITMASK1(GET_Rn()); opd->dest = BITMASK1(tmp); break; diff --git a/pico/32x/memory.c b/pico/32x/memory.c index c385d141..578c72f8 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -1730,6 +1730,32 @@ void REGPARM(3) p32x_sh2_write32(u32 a, u32 d, SH2 *sh2) wh(a, d, sh2); } +void *p32x_sh2_get_mem_ptr(u32 a, u32 *mask, SH2 *sh2) +{ + const sh2_memmap *mm = sh2->read8_map; + void *ret = (void *)-1; + u32 am; + + mm += a >> SH2_READ_SHIFT; + am = a & ((1 << SH2_READ_SHIFT)-1); + if (!map_flag_set(mm->addr) && !(am & ~mm->mask)) { + // directly mapped memory (SDRAM, ROM, data array) + ret = (void *)(mm->addr << 1); + *mask = mm->mask; + } else if ((a & ~0x7ff) == 0) { + // BIOS, has handler function since it shares its segment with I/O + ret = sh2->is_slave ? Pico32xMem->sh2_rom_s.w : Pico32xMem->sh2_rom_m.w; + *mask = 0x7ff; + } else if ((a & 0xc6000000) == 0x02000000) { + // banked ROM. Return bank address + u32 bank = carthw_ssf2_banks[(a >> 19) & 7] << 19; + ret = sh2->p_rom + bank; + *mask = 0x07ffff; + } + + return ret; +} + // ----------------------------------------------------------------- static void z80_md_bank_write_32x(unsigned int a, unsigned char d) diff --git a/pico/pico_int.h b/pico/pico_int.h index 2c55c941..4139e816 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -933,6 +933,7 @@ void Pico32xMemStateLoaded(void); void p32x_update_banks(void); void p32x_m68k_poll_event(unsigned int flags); void p32x_sh2_poll_memory(unsigned int a, SH2 *sh2); +void *p32x_sh2_get_mem_ptr(unsigned int a, unsigned int *mask, SH2 *sh2); void p32x_sh2_poll_event(SH2 *sh2, unsigned int flags, unsigned int m68k_cycles); // 32x/draw.c From 0495df5d0c752ea7086abee965153f3358dfb79c Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 22 May 2019 21:33:23 +0200 Subject: [PATCH 0196/1110] sh2 memory access improvements, revive ARM asm memory functions --- Makefile | 1 + pico/32x/memory.c | 211 +++++++++++++++++++++---------------- pico/32x/memory_arm.S | 23 ++-- platform/common/common.mak | 5 + 4 files changed, 140 insertions(+), 100 deletions(-) diff --git a/Makefile b/Makefile index d82961eb..1b2aab41 100644 --- a/Makefile +++ b/Makefile @@ -48,6 +48,7 @@ asm_misc ?= 1 asm_cdmemory ?= 1 asm_mix ?= 1 asm_32xdraw ?= 1 +asm_32xmemory ?= 1 else # if not arm use_fame ?= 1 use_cz80 ?= 1 diff --git a/pico/32x/memory.c b/pico/32x/memory.c index 578c72f8..e05d74c9 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -1313,11 +1313,6 @@ out: return (s8)d; } -static u32 REGPARM(2) sh2_read8_da(u32 a, SH2 *sh2) -{ - return (s8)sh2->data_array[(a & 0xfff) ^ 1]; -} - // for ssf2 static u32 REGPARM(2) sh2_read8_rom(u32 a, SH2 *sh2) { @@ -1374,11 +1369,6 @@ out_noprint: return (s16)d; } -static u32 REGPARM(2) sh2_read16_da(u32 a, SH2 *sh2) -{ - return ((s16 *)sh2->data_array)[(a & 0xffe) / 2]; -} - static u32 REGPARM(2) sh2_read16_rom(u32 a, SH2 *sh2) { u32 bank = carthw_ssf2_banks[(a >> 19) & 7] << 19; @@ -1399,12 +1389,6 @@ static u32 REGPARM(2) sh2_read32_cs0(u32 a, SH2 *sh2) return d1 | (d2 >> 16); } -static u32 REGPARM(2) sh2_read32_da(u32 a, SH2 *sh2) -{ - u32 d = *((u32 *)sh2->data_array + (a & 0xffc)/4); - return (d << 16) | (d >> 16); -} - static u32 REGPARM(2) sh2_read32_rom(u32 a, SH2 *sh2) { u32 bank = carthw_ssf2_banks[(a >> 19) & 7] << 19; @@ -1429,12 +1413,14 @@ void NOINLINE sh2_sdram_checks(u32 a, int t, SH2 *sh2) } } -void inline sh2_da_checks(u32 a, int t, SH2 *sh2) +#ifndef _ASM_32X_MEMORY_C +static void sh2_da_checks(u32 a, int t, SH2 *sh2) { if (t) sh2_drc_wcheck_da(a, t, sh2); } #endif +#endif static void REGPARM(3) sh2_write_ignore(u32 a, u32 d, SH2 *sh2) { @@ -1477,6 +1463,11 @@ out: DRC_RESTORE_SR(sh2); } +#ifdef _ASM_32X_MEMORY_C +extern void REGPARM(3) sh2_write8_dram(u32 a, u32 d, SH2 *sh2); +extern void REGPARM(3) sh2_write8_sdram(u32 a, u32 d, SH2 *sh2); +extern void REGPARM(3) sh2_write8_da(u32 a, u32 d, SH2 *sh2); +#else static void REGPARM(3) sh2_write8_dram(u32 a, u32 d, SH2 *sh2) { sh2_write8_dramN(sh2->p_dram, a, d); @@ -1505,6 +1496,7 @@ static void REGPARM(3) sh2_write8_da(u32 a, u32 d, SH2 *sh2) sh2_da_checks(a, t, sh2); #endif } +#endif // write16 static void REGPARM(3) sh2_write16_unmapped(u32 a, u32 d, SH2 *sh2) @@ -1544,6 +1536,11 @@ out: DRC_RESTORE_SR(sh2); } +#ifdef _ASM_32X_MEMORY_C +extern void REGPARM(3) sh2_write16_dram(u32 a, u32 d, SH2 *sh2); +extern void REGPARM(3) sh2_write16_sdram(u32 a, u32 d, SH2 *sh2); +extern void REGPARM(3) sh2_write16_da(u32 a, u32 d, SH2 *sh2); +#else static void REGPARM(3) sh2_write16_dram(u32 a, u32 d, SH2 *sh2) { sh2_write16_dramN(sh2->p_dram, a, d); @@ -1572,6 +1569,7 @@ static void REGPARM(3) sh2_write16_da(u32 a, u32 d, SH2 *sh2) sh2_da_checks(a, t, sh2); #endif } +#endif static void REGPARM(3) sh2_write16_rom(u32 a, u32 d, SH2 *sh2) { @@ -1613,6 +1611,11 @@ static void REGPARM(3) sh2_write32_cs0(u32 a, u32 d, SH2 *sh2) *pd = d | (v&m); \ } +#ifdef _ASM_32X_MEMORY_C +extern void REGPARM(3) sh2_write32_dram(u32 a, u32 d, SH2 *sh2); +extern void REGPARM(3) sh2_write32_sdram(u32 a, u32 d, SH2 *sh2); +extern void REGPARM(3) sh2_write32_da(u32 a, u32 d, SH2 *sh2); +#else static void REGPARM(3) sh2_write32_dram(u32 a, u32 d, SH2 *sh2) { sh2_write32_dramN(sh2->p_dram, a, d); @@ -1647,6 +1650,7 @@ static void REGPARM(3) sh2_write32_da(u32 a, u32 d, SH2 *sh2) sh2_da_checks(a+2, u, sh2); #endif } +#endif static void REGPARM(3) sh2_write32_rom(u32 a, u32 d, SH2 *sh2) { @@ -1670,10 +1674,10 @@ u32 REGPARM(2) p32x_sh2_read8(u32 a, SH2 *sh2) sh2_map += SH2MAP_ADDR2OFFS_R(a); p = sh2_map->addr; - if (map_flag_set(p)) - return ((sh2_read_handler *)(p << 1))(a, sh2); - else + if (!map_flag_set(p)) return *(s8 *)((p << 1) + ((a & sh2_map->mask) ^ 1)); + else + return ((sh2_read_handler *)(p << 1))(a, sh2); } u32 REGPARM(2) p32x_sh2_read16(u32 a, SH2 *sh2) @@ -1683,10 +1687,10 @@ u32 REGPARM(2) p32x_sh2_read16(u32 a, SH2 *sh2) sh2_map += SH2MAP_ADDR2OFFS_R(a); p = sh2_map->addr; - if (map_flag_set(p)) - return ((sh2_read_handler *)(p << 1))(a, sh2); - else + if (!map_flag_set(p)) return *(s16 *)((p << 1) + (a & sh2_map->mask)); + else + return ((sh2_read_handler *)(p << 1))(a, sh2); } u32 REGPARM(2) p32x_sh2_read32(u32 a, SH2 *sh2) @@ -1961,9 +1965,11 @@ static void get_bios(void) #define MAP_MEMORY(m) ((uptr)(m) >> 1) #define MAP_HANDLER(h) ( ((uptr)(h) >> 1) | ((uptr)1 << (sizeof(uptr) * 8 - 1)) ) -static sh2_memmap sh2_read8_map[0x80], sh2_read16_map[0x80], sh2_read32_map[0x80]; +static sh2_memmap msh2_read8_map[0x80], msh2_read16_map[0x80], msh2_read32_map[0x80]; +static sh2_memmap ssh2_read8_map[0x80], ssh2_read16_map[0x80], ssh2_read32_map[0x80]; // for writes we are using handlers only -static sh2_write_handler *sh2_write8_map[0x80], *sh2_write16_map[0x80], *sh2_write32_map[0x80]; +static sh2_write_handler *msh2_write8_map[0x80], *msh2_write16_map[0x80], *msh2_write32_map[0x80]; +static sh2_write_handler *ssh2_write8_map[0x80], *ssh2_write16_map[0x80], *ssh2_write32_map[0x80]; void Pico32xSwapDRAM(int b) { @@ -1977,25 +1983,35 @@ void Pico32xSwapDRAM(int b) b ? m68k_write16_dram1_ow : m68k_write16_dram0_ow, 1); // SH2 - sh2_read8_map[0x04/2].addr = sh2_read8_map[0x24/2].addr = - sh2_read16_map[0x04/2].addr = sh2_read16_map[0x24/2].addr = - sh2_read32_map[0x04/2].addr = sh2_read32_map[0x24/2].addr = MAP_MEMORY(Pico32xMem->dram[b]); + msh2_read8_map[0x04/2].addr = msh2_read8_map[0x24/2].addr = + msh2_read16_map[0x04/2].addr = msh2_read16_map[0x24/2].addr = + msh2_read32_map[0x04/2].addr = msh2_read32_map[0x24/2].addr = MAP_MEMORY(Pico32xMem->dram[b]); + ssh2_read8_map[0x04/2].addr = ssh2_read8_map[0x24/2].addr = + ssh2_read16_map[0x04/2].addr = ssh2_read16_map[0x24/2].addr = + ssh2_read32_map[0x04/2].addr = ssh2_read32_map[0x24/2].addr = MAP_MEMORY(Pico32xMem->dram[b]); msh2.p_dram = ssh2.p_dram = Pico32xMem->dram[b]; // DRC conveniance ptr + msh2.p_rom = ssh2.p_rom = Pico.rom; } static void bank_switch_rom_sh2(void) { if (!carthw_ssf2_active) { // easy - sh2_read8_map[0x02/2].addr = sh2_read8_map[0x22/2].addr = - sh2_read16_map[0x02/2].addr = sh2_read16_map[0x22/2].addr = - sh2_read32_map[0x02/2].addr = sh2_read32_map[0x22/2].addr = MAP_MEMORY(Pico.rom); + msh2_read8_map[0x02/2].addr = msh2_read8_map[0x22/2].addr = + msh2_read16_map[0x02/2].addr = msh2_read16_map[0x22/2].addr = + msh2_read32_map[0x02/2].addr = msh2_read32_map[0x22/2].addr = MAP_MEMORY(Pico.rom); + ssh2_read8_map[0x02/2].addr = ssh2_read8_map[0x22/2].addr = + ssh2_read16_map[0x02/2].addr = ssh2_read16_map[0x22/2].addr = + ssh2_read32_map[0x02/2].addr = ssh2_read32_map[0x22/2].addr = MAP_MEMORY(Pico.rom); } else { - sh2_read8_map[0x02/2].addr = sh2_read8_map[0x22/2].addr = MAP_HANDLER(sh2_read8_rom); - sh2_read16_map[0x02/2].addr = sh2_read16_map[0x22/2].addr = MAP_HANDLER(sh2_read16_rom); - sh2_read32_map[0x02/2].addr = sh2_read32_map[0x22/2].addr = MAP_HANDLER(sh2_read32_rom); + msh2_read8_map[0x02/2].addr = msh2_read8_map[0x22/2].addr = MAP_HANDLER(sh2_read8_rom); + msh2_read16_map[0x02/2].addr = msh2_read16_map[0x22/2].addr = MAP_HANDLER(sh2_read16_rom); + msh2_read32_map[0x02/2].addr = msh2_read32_map[0x22/2].addr = MAP_HANDLER(sh2_read32_rom); + ssh2_read8_map[0x02/2].addr = ssh2_read8_map[0x22/2].addr = MAP_HANDLER(sh2_read8_rom); + ssh2_read16_map[0x02/2].addr = ssh2_read16_map[0x22/2].addr = MAP_HANDLER(sh2_read16_rom); + ssh2_read32_map[0x02/2].addr = ssh2_read32_map[0x22/2].addr = MAP_HANDLER(sh2_read32_rom); } } @@ -2062,81 +2078,98 @@ void PicoMemSetup32x(void) // SH2 maps: A31,A30,A29,CS1,CS0 // all unmapped by default - for (i = 0; i < ARRAY_SIZE(sh2_read8_map); i++) { - sh2_read8_map[i].addr = MAP_HANDLER(sh2_read8_unmapped); - sh2_read16_map[i].addr = MAP_HANDLER(sh2_read16_unmapped); - sh2_read32_map[i].addr = MAP_HANDLER(sh2_read32_unmapped); + for (i = 0; i < ARRAY_SIZE(msh2_read8_map); i++) { + msh2_read8_map[i].addr = MAP_HANDLER(sh2_read8_unmapped); + msh2_read16_map[i].addr = MAP_HANDLER(sh2_read16_unmapped); + msh2_read32_map[i].addr = MAP_HANDLER(sh2_read32_unmapped); } - for (i = 0; i < ARRAY_SIZE(sh2_write8_map); i++) { - sh2_write8_map[i] = sh2_write8_unmapped; - sh2_write16_map[i] = sh2_write16_unmapped; - sh2_write32_map[i] = sh2_write32_unmapped; + for (i = 0; i < ARRAY_SIZE(msh2_write8_map); i++) { + msh2_write8_map[i] = sh2_write8_unmapped; + msh2_write16_map[i] = sh2_write16_unmapped; + msh2_write32_map[i] = sh2_write32_unmapped; } // "purge area" for (i = 0x40; i <= 0x5f; i++) { - sh2_write8_map[i >> 1] = - sh2_write16_map[i >> 1] = - sh2_write32_map[i >> 1] = sh2_write_ignore; + msh2_write8_map[i >> 1] = + msh2_write16_map[i >> 1] = + msh2_write32_map[i >> 1] = sh2_write_ignore; } // CS0 - sh2_read8_map[0x00/2].addr = sh2_read8_map[0x20/2].addr = MAP_HANDLER(sh2_read8_cs0); - sh2_read16_map[0x00/2].addr = sh2_read16_map[0x20/2].addr = MAP_HANDLER(sh2_read16_cs0); - sh2_read32_map[0x00/2].addr = sh2_read32_map[0x20/2].addr = MAP_HANDLER(sh2_read32_cs0); - sh2_write8_map[0x00/2] = sh2_write8_map[0x20/2] = sh2_write8_cs0; - sh2_write16_map[0x00/2] = sh2_write16_map[0x20/2] = sh2_write16_cs0; - sh2_write32_map[0x00/2] = sh2_write32_map[0x20/2] = sh2_write32_cs0; + msh2_read8_map[0x00/2].addr = msh2_read8_map[0x20/2].addr = MAP_HANDLER(sh2_read8_cs0); + msh2_read16_map[0x00/2].addr = msh2_read16_map[0x20/2].addr = MAP_HANDLER(sh2_read16_cs0); + msh2_read32_map[0x00/2].addr = msh2_read32_map[0x20/2].addr = MAP_HANDLER(sh2_read32_cs0); + msh2_write8_map[0x00/2] = msh2_write8_map[0x20/2] = sh2_write8_cs0; + msh2_write16_map[0x00/2] = msh2_write16_map[0x20/2] = sh2_write16_cs0; + msh2_write32_map[0x00/2] = msh2_write32_map[0x20/2] = sh2_write32_cs0; // CS1 - ROM bank_switch_rom_sh2(); - sh2_read8_map[0x02/2].mask = sh2_read8_map[0x22/2].mask = 0x3fffff; // FIXME - sh2_read16_map[0x02/2].mask = sh2_read16_map[0x22/2].mask = 0x3ffffe; // FIXME - sh2_read32_map[0x02/2].mask = sh2_read32_map[0x22/2].mask = 0x3ffffc; // FIXME - sh2_write16_map[0x02/2] = sh2_write16_map[0x22/2] = sh2_write16_rom; - sh2_write32_map[0x02/2] = sh2_write32_map[0x22/2] = sh2_write32_rom; + msh2_read8_map[0x02/2].mask = msh2_read8_map[0x22/2].mask = 0x3fffff; // FIXME + msh2_read16_map[0x02/2].mask = msh2_read16_map[0x22/2].mask = 0x3ffffe; // FIXME + msh2_read32_map[0x02/2].mask = msh2_read32_map[0x22/2].mask = 0x3ffffc; // FIXME + msh2_write16_map[0x02/2] = msh2_write16_map[0x22/2] = sh2_write16_rom; + msh2_write32_map[0x02/2] = msh2_write32_map[0x22/2] = sh2_write32_rom; // CS2 - DRAM - sh2_read8_map[0x04/2].mask = sh2_read8_map[0x24/2].mask = 0x01ffff; - sh2_read16_map[0x04/2].mask = sh2_read16_map[0x24/2].mask = 0x01fffe; - sh2_read32_map[0x04/2].mask = sh2_read32_map[0x24/2].mask = 0x01fffc; - sh2_write8_map[0x04/2] = sh2_write8_map[0x24/2] = sh2_write8_dram; - sh2_write16_map[0x04/2] = sh2_write16_map[0x24/2] = sh2_write16_dram; - sh2_write32_map[0x04/2] = sh2_write32_map[0x24/2] = sh2_write32_dram; + msh2_read8_map[0x04/2].mask = msh2_read8_map[0x24/2].mask = 0x01ffff; + msh2_read16_map[0x04/2].mask = msh2_read16_map[0x24/2].mask = 0x01fffe; + msh2_read32_map[0x04/2].mask = msh2_read32_map[0x24/2].mask = 0x01fffc; + msh2_write8_map[0x04/2] = msh2_write8_map[0x24/2] = sh2_write8_dram; + msh2_write16_map[0x04/2] = msh2_write16_map[0x24/2] = sh2_write16_dram; + msh2_write32_map[0x04/2] = msh2_write32_map[0x24/2] = sh2_write32_dram; // CS3 - SDRAM - sh2_read8_map[0x06/2].addr = sh2_read8_map[0x26/2].addr = - sh2_read16_map[0x06/2].addr = sh2_read16_map[0x26/2].addr = - sh2_read32_map[0x06/2].addr = sh2_read32_map[0x26/2].addr = MAP_MEMORY(Pico32xMem->sdram); - sh2_write8_map[0x06/2] = sh2_write8_map[0x26/2] = sh2_write8_sdram; - sh2_write16_map[0x06/2] = sh2_write16_map[0x26/2] = sh2_write16_sdram; - sh2_write32_map[0x06/2] = sh2_write32_map[0x26/2] = sh2_write32_sdram; - sh2_read8_map[0x06/2].mask = sh2_read8_map[0x26/2].mask = 0x03ffff; - sh2_read16_map[0x06/2].mask = sh2_read16_map[0x26/2].mask = 0x03fffe; - sh2_read32_map[0x06/2].mask = sh2_read32_map[0x26/2].mask = 0x03fffc; + msh2_read8_map[0x06/2].addr = msh2_read8_map[0x26/2].addr = + msh2_read16_map[0x06/2].addr = msh2_read16_map[0x26/2].addr = + msh2_read32_map[0x06/2].addr = msh2_read32_map[0x26/2].addr = MAP_MEMORY(Pico32xMem->sdram); + msh2_write8_map[0x06/2] = msh2_write8_map[0x26/2] = sh2_write8_sdram; + msh2_write16_map[0x06/2] = msh2_write16_map[0x26/2] = sh2_write16_sdram; + msh2_write32_map[0x06/2] = msh2_write32_map[0x26/2] = sh2_write32_sdram; + msh2_read8_map[0x06/2].mask = msh2_read8_map[0x26/2].mask = 0x03ffff; + msh2_read16_map[0x06/2].mask = msh2_read16_map[0x26/2].mask = 0x03fffe; + msh2_read32_map[0x06/2].mask = msh2_read32_map[0x26/2].mask = 0x03fffc; // SH2 data array - sh2_read8_map[0xc0/2].addr = MAP_HANDLER(sh2_read8_da); - sh2_read16_map[0xc0/2].addr = MAP_HANDLER(sh2_read16_da); - sh2_read32_map[0xc0/2].addr = MAP_HANDLER(sh2_read32_da); - sh2_write8_map[0xc0/2] = sh2_write8_da; - sh2_write16_map[0xc0/2] = sh2_write16_da; - sh2_write32_map[0xc0/2] = sh2_write32_da; + msh2_read8_map[0xc0/2].mask = 0x0fff; + msh2_read16_map[0xc0/2].mask = 0x0ffe; + msh2_read32_map[0xc0/2].mask = 0x0ffc; + msh2_write8_map[0xc0/2] = sh2_write8_da; + msh2_write16_map[0xc0/2] = sh2_write16_da; + msh2_write32_map[0xc0/2] = sh2_write32_da; // SH2 IO - sh2_read8_map[0xff/2].addr = MAP_HANDLER(sh2_peripheral_read8); - sh2_read16_map[0xff/2].addr = MAP_HANDLER(sh2_peripheral_read16); - sh2_read32_map[0xff/2].addr = MAP_HANDLER(sh2_peripheral_read32); - sh2_write8_map[0xff/2] = sh2_peripheral_write8; - sh2_write16_map[0xff/2] = sh2_peripheral_write16; - sh2_write32_map[0xff/2] = sh2_peripheral_write32; + msh2_read8_map[0xff/2].addr = MAP_HANDLER(sh2_peripheral_read8); + msh2_read16_map[0xff/2].addr = MAP_HANDLER(sh2_peripheral_read16); + msh2_read32_map[0xff/2].addr = MAP_HANDLER(sh2_peripheral_read32); + msh2_write8_map[0xff/2] = sh2_peripheral_write8; + msh2_write16_map[0xff/2] = sh2_peripheral_write16; + msh2_write32_map[0xff/2] = sh2_peripheral_write32; + + memcpy(ssh2_read8_map, msh2_read8_map, sizeof(msh2_read8_map)); + memcpy(ssh2_read16_map, msh2_read16_map, sizeof(msh2_read16_map)); + memcpy(ssh2_read32_map, msh2_read32_map, sizeof(msh2_read32_map)); + memcpy(ssh2_write8_map, msh2_write8_map, sizeof(msh2_write8_map)); + memcpy(ssh2_write16_map, msh2_write16_map, sizeof(msh2_write16_map)); + memcpy(ssh2_write32_map, msh2_write32_map, sizeof(msh2_write32_map)); + + msh2_read8_map[0xc0/2].addr = + msh2_read16_map[0xc0/2].addr = + msh2_read32_map[0xc0/2].addr = MAP_MEMORY(msh2.data_array); + ssh2_read8_map[0xc0/2].addr = + ssh2_read16_map[0xc0/2].addr = + ssh2_read32_map[0xc0/2].addr = MAP_MEMORY(ssh2.data_array); // map DRAM area, both 68k and SH2 Pico32xSwapDRAM(1); - msh2.read8_map = ssh2.read8_map = sh2_read8_map; - msh2.read16_map = ssh2.read16_map = sh2_read16_map; - msh2.read32_map = ssh2.read32_map = sh2_read32_map; - msh2.write8_tab = ssh2.write8_tab = (const void **)(void *)sh2_write8_map; - msh2.write16_tab = ssh2.write16_tab = (const void **)(void *)sh2_write16_map; - msh2.write32_tab = ssh2.write32_tab = (const void **)(void *)sh2_write32_map; + msh2.read8_map = msh2_read8_map; ssh2.read8_map = ssh2_read8_map; + msh2.read16_map = msh2_read16_map; ssh2.read16_map = ssh2_read16_map; + msh2.read32_map = msh2_read32_map; ssh2.read32_map = ssh2_read32_map; + msh2.write8_tab = (const void **)(void *)msh2_write8_map; + msh2.write16_tab = (const void **)(void *)msh2_write16_map; + msh2.write32_tab = (const void **)(void *)msh2_write32_map; + ssh2.write8_tab = (const void **)(void *)ssh2_write8_map; + ssh2.write16_tab = (const void **)(void *)ssh2_write16_map; + ssh2.write32_tab = (const void **)(void *)ssh2_write32_map; sh2_drc_mem_setup(&msh2); sh2_drc_mem_setup(&ssh2); diff --git a/pico/32x/memory_arm.S b/pico/32x/memory_arm.S index b449370b..379906a0 100644 --- a/pico/32x/memory_arm.S +++ b/pico/32x/memory_arm.S @@ -43,6 +43,7 @@ .global sh2_write32_da .global sh2_write32_dram +#if 0 sh2_read8_rom: ldr ip, [r1, #OFS_SH2_p_rom] eor r0, r0, #1 @@ -126,9 +127,10 @@ sh2_read32_dram: ldr r0, [ip, r0, lsr #SH2_DRAM_SHIFT] mov r0, r0, ror #16 bx lr +#endif sh2_write8_sdram: - @ preserve r0 and r2 for tail call + @ preserve r0,r2 for tail call ldr ip, [r2, #OFS_SH2_p_sdram] eor r3, r0, #1 mov r3, r3, lsl #SH2_RAM_SHIFT @@ -139,7 +141,7 @@ sh2_write8_sdram: bic r0, r0, #1 cmp r1, #0 bxeq lr - b sh2_drc_wcheck_ram + b sh2_sdram_checks #else bx lr #endif @@ -170,7 +172,7 @@ sh2_write8_dram: bx lr sh2_write16_sdram: - @ preserve r0 and r2 for tail call + @ preserve r0,r2 for tail call ldr ip, [r2, #OFS_SH2_p_sdram] mov r3, r0, lsl #SH2_RAM_SHIFT mov r3, r3, lsr #SH2_RAM_SHIFT @@ -180,7 +182,7 @@ sh2_write16_sdram: ldrb r1, [ip, r3, lsr #1] cmp r1, #0 bxeq lr - b sh2_drc_wcheck_ram + b sh2_sdram_checks #else bx lr #endif @@ -217,7 +219,7 @@ sh2_write16_dram: bx lr sh2_write32_sdram: - @ preserve r0 and r2 for tail call + @ preserve r0,r2 for tail call ldr ip, [r2, #OFS_SH2_p_sdram] mov r1, r1, ror #16 mov r3, r0, lsl #SH2_RAM_SHIFT @@ -228,13 +230,13 @@ sh2_write32_sdram: cmp r1, #0 beq 1f stmfd sp!, {r0, r2, ip, lr} - bl sh2_drc_wcheck_ram + b sh2_sdram_checks ldmfd sp!, {r0, r2, ip, lr} 1: ldrb r1, [ip, #1] + add r0, r0, #2 cmp r1, #0 bxeq lr - add r0, r0, #2 - b sh2_drc_wcheck_ram + b sh2_sdram_checks #else bx lr #endif @@ -254,9 +256,9 @@ sh2_write32_da: bl sh2_drc_wcheck_da ldmfd sp!, {r0, r2, ip, lr} 1: ldrb r1, [ip, #1] + add r0, r0, #2 cmp r1, #0 bxeq lr - add r0, r0, #2 b sh2_drc_wcheck_da #else bx lr @@ -266,11 +268,10 @@ sh2_write32_dram: ldr ip, [r2, #OFS_SH2_p_dram] tst r0, #SH2_DRAM_OW mov r3, r0, lsl #SH2_DRAM_SHIFT - moveq r1, r1, ror #16 + mov r1, r1, ror #16 streq r1, [ip, r3, lsr #SH2_DRAM_SHIFT] bxeq lr ldr r0, [ip, r3, lsr #SH2_DRAM_SHIFT] - mov r1, r1, ror #16 mov r2, #0 tst r1, #0x00ff0000 orrne r2, r2, #0x00ff0000 diff --git a/platform/common/common.mak b/platform/common/common.mak index b4a5759c..5be1521c 100644 --- a/platform/common/common.mak +++ b/platform/common/common.mak @@ -10,6 +10,7 @@ asm_ym2612 = 0 asm_misc = 0 asm_cdmemory = 0 asm_32xdraw = 0 +asm_32xmemory = 0 asm_mix = 0 endif @@ -73,6 +74,10 @@ ifeq "$(asm_32xdraw)" "1" DEFINES += _ASM_32X_DRAW SRCS_COMMON += $(R)pico/32x/draw_arm.S endif +ifeq "$(asm_32xmemory)" "1" +DEFINES += _ASM_32X_MEMORY_C +SRCS_COMMON += $(R)pico/32x/memory_arm.s +endif ifeq "$(asm_mix)" "1" SRCS_COMMON += $(R)pico/sound/mix_arm.S endif From e43998086cfd654cc463ef1928bffec1d4f80919 Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 22 May 2019 21:38:59 +0200 Subject: [PATCH 0197/1110] polling detection: communication poll fifo to avoid comm data loss --- cpu/sh2/compiler.c | 18 ++-- pico/32x/32x.c | 4 +- pico/32x/memory.c | 241 +++++++++++++++++++++++++++++++----------- pico/32x/memory_arm.S | 44 ++++---- pico/pico_int.h | 4 +- 5 files changed, 214 insertions(+), 97 deletions(-) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 9160c90c..6d8e5118 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -4220,9 +4220,9 @@ static void sh2_generate_utils(void) emith_eor_r_imm(arg1, 1); emith_read8s_r_r_r(arg1, arg1, arg2); emith_push_ret(arg1); - emith_move_r_r_ptr(arg1, CONTEXT_REG); - emith_call(p32x_sh2_poll_memory); - emith_pop_and_ret(RET_REG); + emith_move_r_r_ptr(arg2, CONTEXT_REG); + emith_call(p32x_sh2_poll_memory8); + emith_pop_and_ret(arg1); emith_flush(); // d = sh2_drc_read16_poll(u32 a) @@ -4236,9 +4236,9 @@ static void sh2_generate_utils(void) emith_and_r_r_r(arg1, arg0, arg3); emith_read16s_r_r_r(arg1, arg1, arg2); emith_push_ret(arg1); - emith_move_r_r_ptr(arg1, CONTEXT_REG); - emith_call(p32x_sh2_poll_memory); - emith_pop_and_ret(RET_REG); + emith_move_r_r_ptr(arg2, CONTEXT_REG); + emith_call(p32x_sh2_poll_memory16); + emith_pop_and_ret(arg1); emith_flush(); // d = sh2_drc_read32_poll(u32 a) @@ -4253,9 +4253,9 @@ static void sh2_generate_utils(void) emith_read_r_r_r(arg1, arg1, arg2); emith_ror(arg1, arg1, 16); emith_push_ret(arg1); - emith_move_r_r_ptr(arg1, CONTEXT_REG); - emith_call(p32x_sh2_poll_memory); - emith_pop_and_ret(RET_REG); + emith_move_r_r_ptr(arg2, CONTEXT_REG); + emith_call(p32x_sh2_poll_memory32); + emith_pop_and_ret(arg1); emith_flush(); // sh2_drc_exit(void) diff --git a/pico/32x/32x.c b/pico/32x/32x.c index 19c6e0a6..1511f3f7 100644 --- a/pico/32x/32x.c +++ b/pico/32x/32x.c @@ -471,7 +471,7 @@ void sync_sh2s_normal(unsigned int m68k_target) if (!(ssh2.state & SH2_IDLE_STATES)) { cycles = target - ssh2.m68krcycles_done; if (cycles > 0) { - run_sh2(&ssh2, cycles); + run_sh2(&ssh2, cycles > 20 ? cycles : 20); if (event_time_next && CYCLES_GT(target, event_time_next)) target = event_time_next; @@ -483,7 +483,7 @@ void sync_sh2s_normal(unsigned int m68k_target) if (!(msh2.state & SH2_IDLE_STATES)) { cycles = target - msh2.m68krcycles_done; if (cycles > 0) { - run_sh2(&msh2, cycles); + run_sh2(&msh2, cycles > 20 ? cycles : 20); if (event_time_next && CYCLES_GT(target, event_time_next)) target = event_time_next; diff --git a/pico/32x/memory.c b/pico/32x/memory.c index e05d74c9..a1ef42c2 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -58,7 +58,7 @@ static void (*m68k_write16_io)(u32 a, u32 d); #define REG8IN16(ptr, offs) ((u8 *)ptr)[(offs) ^ 1] // poll detection -#define POLL_THRESHOLD 3 +#define POLL_THRESHOLD 5 static struct { u32 addr1, addr2, cycles; @@ -74,7 +74,7 @@ static int m68k_poll_detect(u32 a, u32 cycles, u32 flags) if (match && cycles - m68k_poll.cycles <= 64 && !SekNotPolling) { // detect split 32bit access by same cycle count, and ignore those - if (cycles != m68k_poll.cycles && m68k_poll.cnt++ > POLL_THRESHOLD) { + if (cycles != m68k_poll.cycles && ++m68k_poll.cnt > POLL_THRESHOLD) { if (!(Pico32x.emu_flags & flags)) { elprintf(EL_32X, "m68k poll addr %08x, cyc %u", a, cycles - m68k_poll.cycles); @@ -114,8 +114,11 @@ static void NOINLINE sh2_poll_detect(u32 a, SH2 *sh2, u32 flags, int maxcnt) { u32 cycles_done = sh2_cycles_done_t(sh2); + // reading 2 consecutive 16bit values is probably a 32bit access. detect this + // by checking address (max 2 bytes away) and cycles (max 2 cycles later). + // no polling if more than 20 cycles have passed since last detect call. if (a - sh2->poll_addr <= 2 && CYCLES_GE(sh2->poll_cycles+20, cycles_done)) { - if (sh2->poll_cycles != cycles_done && ++sh2->poll_cnt >= maxcnt) { + if (CYCLES_GT(cycles_done,sh2->poll_cycles+2) && ++sh2->poll_cnt > maxcnt) { if (!(sh2->state & flags)) elprintf_sh2(sh2, EL_32X, "state: %02x->%02x", sh2->state, sh2->state | flags); @@ -124,6 +127,7 @@ static void NOINLINE sh2_poll_detect(u32 a, SH2 *sh2, u32 flags, int maxcnt) sh2_end_run(sh2, 1); pevt_log_sh2(sh2, EVT_POLL_START); #ifdef DRC_SH2 + // mark this as an address used for polling if SDRAM if ((a & 0xc6000000) == 0x06000000) { unsigned char *p = sh2->p_drcblk_ram; p[(a & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT] |= 0x80; @@ -149,12 +153,6 @@ void NOINLINE p32x_sh2_poll_event(SH2 *sh2, u32 flags, u32 m68k_cycles) pevt_log_sh2_o(sh2, EVT_POLL_END); sh2->state &= ~flags; -#ifdef DRC_SH2 - if ((sh2->poll_addr & 0xc6000000) == 0x06000000) { - unsigned char *p = sh2->p_drcblk_ram; - p[(sh2->poll_addr & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT] &= ~0x80; - } -#endif } if (!(sh2->state & (SH2_STATE_CPOLL|SH2_STATE_VPOLL|SH2_STATE_RPOLL))) @@ -172,12 +170,123 @@ static void sh2s_sync_on_read(SH2 *sh2) p32x_sync_other_sh2(sh2, sh2->m68krcycles_done + C_SH2_TO_M68K(sh2, cycles)); } -void p32x_sh2_poll_memory(unsigned int a, SH2 *sh2) +// poll fifo, stores writes to potential addresses used for polling. +// This is used to correctly deliver syncronisation data to the 3 cpus. The +// fifo stores 16 bit values, 8/32 bit accesses must be adapted accordingly. +#define PFIFO_SZ 4 +#define PFIFO_CNT 4 +struct sh2_poll_fifo { + u32 cycles; + u32 a; + u16 d; + u16 cpu; +} sh2_poll_fifo[PFIFO_CNT][PFIFO_SZ]; +unsigned sh2_poll_rd[PFIFO_CNT], sh2_poll_wr[PFIFO_CNT]; // ringbuffer pointers + +static NOINLINE u32 sh2_poll_read(u32 a, u32 d, unsigned int cycles, SH2* sh2) { + int hix = (a >> 1) % PFIFO_CNT; + struct sh2_poll_fifo *fifo = sh2_poll_fifo[hix]; + struct sh2_poll_fifo *p; + int cpu = sh2 ? sh2->is_slave+1 : 0; + unsigned idx; + + // fetch oldest write to address from fifo, but stop when reaching the present + idx = sh2_poll_rd[hix]; + while (idx != sh2_poll_wr[hix] && CYCLES_GE(cycles, fifo[idx].cycles)) { +// int oidx = idx; + p = &fifo[idx]; + idx = (idx+1) % PFIFO_SZ; + + if (CYCLES_GT(cycles, p->cycles+80)) { + // drop older fifo stores that may cause synchronisation problems. + // NB unfortunately this cycle diff is quite sensitive: + // observed in Brutal Unleashed: min 80, observed in Afterburner: max 110 + sh2_poll_rd[hix] = idx; + } else if (p->a == a) { + // replace current data with fifo value and discard fifo entry + if (cpu != p->cpu) { + d = p->d; + p->a = -1; +// if (oidx == sh2_poll_rd[hix]) +// sh2_poll_rd[hix] = idx; + } + break; + } + } + return d; +} + +static NOINLINE void sh2_poll_write(u32 a, u32 d, unsigned int cycles, SH2 *sh2) +{ + int hix = (a >> 1) % PFIFO_CNT; + struct sh2_poll_fifo *fifo = sh2_poll_fifo[hix]; + struct sh2_poll_fifo *p = &fifo[sh2_poll_wr[hix]]; + struct sh2_poll_fifo *q = &fifo[(sh2_poll_wr[hix]-1) % PFIFO_SZ]; + int cpu = sh2 ? sh2->is_slave+1 : 0; + + // fold 2 consecutive writes to the same address to avoid reading of + // intermediate values that may cause synchronisation problems. + // NB this can take an eternity on m68k: mov.b , needs + // 28 m68k-cycles (~80 sh2-cycles) to complete (observed in Metal Head) + if (q->a == a && !CYCLES_GT(cycles,q->cycles+30)) { + q->d = d; + } else { + // store write to poll address in fifo + sh2_poll_wr[hix] = (sh2_poll_wr[hix]+1) % PFIFO_SZ; + if (sh2_poll_wr[hix] == sh2_poll_rd[hix]) + // fifo overflow, discard oldest value + sh2_poll_rd[hix] = (sh2_poll_rd[hix]+1) % PFIFO_SZ; + *p = (struct sh2_poll_fifo){ .cycles = cycles, .a = a, .d = d, .cpu = cpu }; + } +} + +u32 REGPARM(3) p32x_sh2_poll_memory8(unsigned int a, u32 d, SH2 *sh2) +{ + int shift = (a & 1 ? 0 : 8); + d = (s8)(p32x_sh2_poll_memory16(a & ~1, d << shift, sh2) >> shift); + return d; +} + +u32 REGPARM(3) p32x_sh2_poll_memory16(unsigned int a, u32 d, SH2 *sh2) +{ + unsigned char *p = sh2->p_drcblk_ram; + unsigned int cycles; + DRC_SAVE_SR(sh2); + // is this a synchronisation address? + if(p[(a & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT] & 0x80) { + sh2s_sync_on_read(sh2); + cycles = sh2_cycles_done_m68k(sh2); + // check poll fifo and sign-extend the result correctly + d = (s16)sh2_poll_read(a, d, cycles, sh2); + } + sh2_poll_detect(a, sh2, SH2_STATE_RPOLL, 5); - sh2s_sync_on_read(sh2); + DRC_RESTORE_SR(sh2); + return d; +} + +u32 REGPARM(3) p32x_sh2_poll_memory32(unsigned int a, u32 d, SH2 *sh2) +{ + unsigned char *p = sh2->p_drcblk_ram; + unsigned int cycles; + + DRC_SAVE_SR(sh2); + // is this a synchronisation address? + if(p[(a & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT] & 0x80) { + sh2s_sync_on_read(sh2); + cycles = sh2_cycles_done_m68k(sh2); + // check poll fifo and sign-extend the result correctly + d = sh2_poll_read(a, d, cycles, sh2) | + (sh2_poll_read(a+2, d >> 16, cycles, sh2) << 16); + } + + sh2_poll_detect(a, sh2, SH2_STATE_RPOLL, 5); + + DRC_RESTORE_SR(sh2); + return d; } // SH2 faking @@ -222,17 +331,15 @@ static u32 p32x_reg_read16(u32 a) #else if ((a & 0x30) == 0x20) { unsigned int cycles = SekCyclesDone(); - int comreg = 1 << (a & 0x0f) / 2; - if (cycles - msh2.m68krcycles_done > 244 - || (Pico32x.comm_dirty & comreg)) + if (cycles - msh2.m68krcycles_done > 244) p32x_sync_sh2s(cycles); if (m68k_poll_detect(a, cycles, P32XF_68KCPOLL)) { SekSetStop(1); SekEndRun(16); } - goto out; + return sh2_poll_read(a, Pico32x.regs[a / 2], cycles, NULL); } #endif @@ -415,18 +522,17 @@ static void p32x_reg_write8(u32 a, u32 d) if ((a & 0x30) == 0x20) { int cycles = SekCyclesDone(); - int comreg; if (REG8IN16(r, a) == d) return; - p32x_sync_sh2s(cycles); + if (cycles - (int)msh2.m68krcycles_done > 30) + p32x_sync_sh2s(cycles); REG8IN16(r, a) = d; p32x_sh2_poll_event(&sh2s[0], SH2_STATE_CPOLL, cycles); p32x_sh2_poll_event(&sh2s[1], SH2_STATE_CPOLL, cycles); - comreg = 1 << (a & 0x0f) / 2; - Pico32x.comm_dirty |= comreg; + sh2_poll_write(a & ~1, r[a / 2], cycles, NULL); return; } } @@ -477,18 +583,17 @@ static void p32x_reg_write16(u32 a, u32 d) // comm port if ((a & 0x30) == 0x20) { int cycles = SekCyclesDone(); - int comreg; - + if (r[a / 2] == d) return; - p32x_sync_sh2s(cycles); + if (cycles - (int)msh2.m68krcycles_done > 30) + p32x_sync_sh2s(cycles); r[a / 2] = d; p32x_sh2_poll_event(&sh2s[0], SH2_STATE_CPOLL, cycles); p32x_sh2_poll_event(&sh2s[1], SH2_STATE_CPOLL, cycles); - comreg = 1 << (a & 0x0f) / 2; - Pico32x.comm_dirty |= comreg; + sh2_poll_write(a, (u16)d, cycles, NULL); return; } // PWM @@ -596,9 +701,9 @@ static u32 p32x_sh2reg_read16(u32 a, SH2 *sh2) return (r[0] & P32XS_FM) | Pico32x.sh2_regs[0] | Pico32x.sh2irq_mask[sh2->is_slave]; case 0x04: // H count (often as comm too) - sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 7); + sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 9); sh2s_sync_on_read(sh2); - return Pico32x.sh2_regs[4 / 2]; + return sh2_poll_read(a, Pico32x.sh2_regs[4 / 2], sh2_cycles_done_m68k(sh2), sh2); case 0x06: return (r[a / 2] & ~P32XS_FULL) | 0x4000; case 0x08: // DREQ src @@ -625,9 +730,9 @@ static u32 p32x_sh2reg_read16(u32 a, SH2 *sh2) // comm port if ((a & 0x30) == 0x20) { - sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 7); + sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 9); sh2s_sync_on_read(sh2); - return r[a / 2]; + return sh2_poll_read(a, r[a / 2], sh2_cycles_done_m68k(sh2), sh2); } if ((a & 0x30) == 0x30) return p32x_pwm_read16(a, sh2, sh2_cycles_done_m68k(sh2)); @@ -671,10 +776,11 @@ static void p32x_sh2reg_write8(u32 a, u32 d, SH2 *sh2) case 0x05: // H count d &= 0xff; if (Pico32x.sh2_regs[4 / 2] != d) { + unsigned int cycles = sh2_cycles_done_m68k(sh2); Pico32x.sh2_regs[4 / 2] = d; - p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, - sh2_cycles_done_m68k(sh2)); sh2_end_run(sh2, 4); + p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles); + sh2_poll_write(a & ~1, d, cycles, sh2); } return; case 0x30: @@ -719,17 +825,16 @@ static void p32x_sh2reg_write8(u32 a, u32 d, SH2 *sh2) } if ((a & 0x30) == 0x20) { - int comreg; + unsigned int cycles; if (REG8IN16(r, a) == d) return; REG8IN16(r, a) = d; + cycles = sh2_cycles_done_m68k(sh2); sh2_end_run(sh2, 1); p32x_m68k_poll_event(P32XF_68KCPOLL); - p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, - sh2_cycles_done_m68k(sh2)); - comreg = 1 << (a & 0x0f) / 2; - Pico32x.comm_dirty |= comreg; + p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles); + sh2_poll_write(a & ~1, r[a / 2], cycles, sh2); return; } @@ -745,17 +850,16 @@ static void p32x_sh2reg_write16(u32 a, u32 d, SH2 *sh2) // comm if ((a & 0x30) == 0x20) { - int comreg; + unsigned int cycles; if (Pico32x.regs[a / 2] == d) return; Pico32x.regs[a / 2] = d; + cycles = sh2_cycles_done_m68k(sh2); sh2_end_run(sh2, 1); p32x_m68k_poll_event(P32XF_68KCPOLL); - p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, - sh2_cycles_done_m68k(sh2)); - comreg = 1 << (a & 0x0f) / 2; - Pico32x.comm_dirty |= comreg; + p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles); + sh2_poll_write(a, d, cycles, sh2); return; } // PWM @@ -1399,25 +1503,42 @@ static u32 REGPARM(2) sh2_read32_rom(u32 a, SH2 *sh2) // writes #ifdef DRC_SH2 -void NOINLINE sh2_sdram_checks(u32 a, int t, SH2 *sh2) +static void NOINLINE sh2_sdram_poll(u32 a, u16 d, SH2 *sh2) { - int v = t & ~0x80; + unsigned cycles; - if (v) - sh2_drc_wcheck_ram(a, v, sh2); - if (t & 0x80) { - DRC_SAVE_SR(sh2); - sh2_end_run(sh2, 1); - p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_RPOLL, sh2_cycles_done_m68k(sh2)); - DRC_RESTORE_SR(sh2); - } + DRC_SAVE_SR(sh2); + sh2_end_run(sh2, 1); + cycles = sh2_cycles_done_m68k(sh2); + sh2_poll_write(a, d, cycles, sh2); + p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_RPOLL, cycles); + DRC_RESTORE_SR(sh2); +} + +void NOINLINE sh2_sdram_checks(u32 a, u32 d, SH2 *sh2, int t) +{ + if (t & 0x80) + sh2_sdram_poll(a, d, sh2); + if (t & 0x7f) + sh2_drc_wcheck_ram(a, t & 0x7f, sh2); +} + +void NOINLINE sh2_sdram_checks_l(u32 a, u32 d, SH2 *sh2, int t) +{ + sh2_sdram_checks(a, d, sh2, t); + sh2_sdram_checks(a+2, d>>16, sh2, t>>16); } #ifndef _ASM_32X_MEMORY_C static void sh2_da_checks(u32 a, int t, SH2 *sh2) { - if (t) - sh2_drc_wcheck_da(a, t, sh2); + sh2_drc_wcheck_da(a, t, sh2); +} + +static void NOINLINE sh2_da_checks_l(u32 a, int t, SH2 *sh2) +{ + sh2_da_checks(a, t, sh2); + sh2_da_checks(a+2, t>>16, sh2); } #endif #endif @@ -1481,7 +1602,7 @@ static void REGPARM(3) sh2_write8_sdram(u32 a, u32 d, SH2 *sh2) u8 *p = sh2->p_drcblk_ram; int t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; if (t) - sh2_sdram_checks(a, t, sh2); + sh2_sdram_checks(a & ~1, ((u16 *)sh2->p_sdram)[a1 / 2], sh2, t); #endif } @@ -1554,7 +1675,7 @@ static void REGPARM(3) sh2_write16_sdram(u32 a, u32 d, SH2 *sh2) u8 *p = sh2->p_drcblk_ram; int t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; if (t) - sh2_sdram_checks(a, t, sh2); + sh2_sdram_checks(a, d, sh2, t); #endif } @@ -1628,11 +1749,9 @@ static void REGPARM(3) sh2_write32_sdram(u32 a, u32 d, SH2 *sh2) #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_ram; int t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; - if (t) - sh2_sdram_checks(a, t, sh2); int u = p[(a1+2) >> SH2_DRCBLK_RAM_SHIFT]; - if (u) - sh2_sdram_checks(a+2, u, sh2); + if (t|(u<<16)) + sh2_sdram_checks_l(a, d, sh2, t|(u<<16)); #endif } @@ -1643,11 +1762,9 @@ static void REGPARM(3) sh2_write32_da(u32 a, u32 d, SH2 *sh2) #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_da; int t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; - if (t) - sh2_da_checks(a, t, sh2); int u = p[(a1+2) >> SH2_DRCBLK_DA_SHIFT]; - if (u) - sh2_da_checks(a+2, u, sh2); + if (t|(u<<16)) + sh2_da_checks_l(a, t|(u<<16), sh2); #endif } #endif diff --git a/pico/32x/memory_arm.S b/pico/32x/memory_arm.S index 379906a0..48143ba9 100644 --- a/pico/32x/memory_arm.S +++ b/pico/32x/memory_arm.S @@ -130,17 +130,21 @@ sh2_read32_dram: #endif sh2_write8_sdram: - @ preserve r0,r2 for tail call + @ preserve r0-r2 for tail call ldr ip, [r2, #OFS_SH2_p_sdram] eor r3, r0, #1 mov r3, r3, lsl #SH2_RAM_SHIFT strb r1, [ip, r3, lsr #SH2_RAM_SHIFT] #ifdef DRC_SH2 ldr ip, [r2, #OFS_SH2_p_drcblk_ram] - ldrb r1, [ip, r3, lsr #SH2_RAM_SHIFT+1] - bic r0, r0, #1 - cmp r1, #0 + ldrb r3, [ip, r3, lsr #SH2_RAM_SHIFT+1] + cmp r3, #0 bxeq lr + ldr ip, [r2, #OFS_SH2_p_sdram] + bic r0, r0, #1 + mov r3, r0, lsl #SH2_RAM_SHIFT + mov r3, r3, lsr #SH2_RAM_SHIFT + ldrh r1, [ip, r3] b sh2_sdram_checks #else bx lr @@ -172,15 +176,15 @@ sh2_write8_dram: bx lr sh2_write16_sdram: - @ preserve r0,r2 for tail call + @ preserve r0-r2 for tail call ldr ip, [r2, #OFS_SH2_p_sdram] mov r3, r0, lsl #SH2_RAM_SHIFT mov r3, r3, lsr #SH2_RAM_SHIFT strh r1, [ip, r3] #ifdef DRC_SH2 ldr ip, [r2, #OFS_SH2_p_drcblk_ram] - ldrb r1, [ip, r3, lsr #1] - cmp r1, #0 + ldrb r3, [ip, r3, lsr #1] + cmp r3, #0 bxeq lr b sh2_sdram_checks #else @@ -219,24 +223,19 @@ sh2_write16_dram: bx lr sh2_write32_sdram: - @ preserve r0,r2 for tail call + @ preserve r0-r2 for tail call ldr ip, [r2, #OFS_SH2_p_sdram] mov r1, r1, ror #16 mov r3, r0, lsl #SH2_RAM_SHIFT str r1, [ip, r3, lsr #SH2_RAM_SHIFT] #ifdef DRC_SH2 ldr ip, [r2, #OFS_SH2_p_drcblk_ram] - ldrb r1, [ip, r3, lsr #SH2_RAM_SHIFT+1]! - cmp r1, #0 - beq 1f - stmfd sp!, {r0, r2, ip, lr} - b sh2_sdram_checks - ldmfd sp!, {r0, r2, ip, lr} -1: ldrb r1, [ip, #1] - add r0, r0, #2 - cmp r1, #0 + ldrb r3, [ip, r3, lsr #SH2_RAM_SHIFT+1]! + ldrb ip, [ip, #1] + orrs r3, r3, ip, lsl #16 bxeq lr - b sh2_sdram_checks + mov r1, r1, ror #16 + b sh2_sdram_checks_l #else bx lr #endif @@ -250,15 +249,14 @@ sh2_write32_da: #ifdef DRC_SH2 ldr ip, [r2, #OFS_SH2_p_drcblk_da] ldrb r1, [ip, r3, lsr #SH2_DA_SHIFT+1]! - cmp r1, #0 - beq 1f + ldrb ip, [ip, #1] + orrs r3, r1, ip, lsl #16 + bxeq lr stmfd sp!, {r0, r2, ip, lr} bl sh2_drc_wcheck_da ldmfd sp!, {r0, r2, ip, lr} -1: ldrb r1, [ip, #1] add r0, r0, #2 - cmp r1, #0 - bxeq lr + mov r1, ip b sh2_drc_wcheck_da #else bx lr diff --git a/pico/pico_int.h b/pico/pico_int.h index 4139e816..31fc702c 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -932,7 +932,9 @@ void Pico32xSwapDRAM(int b); void Pico32xMemStateLoaded(void); void p32x_update_banks(void); void p32x_m68k_poll_event(unsigned int flags); -void p32x_sh2_poll_memory(unsigned int a, SH2 *sh2); +unsigned int REGPARM(3) p32x_sh2_poll_memory8(unsigned int a, unsigned int d, SH2 *sh2); +unsigned int REGPARM(3) p32x_sh2_poll_memory16(unsigned int a, unsigned int d, SH2 *sh2); +unsigned int REGPARM(3) p32x_sh2_poll_memory32(unsigned int a, unsigned int d, SH2 *sh2); void *p32x_sh2_get_mem_ptr(unsigned int a, unsigned int *mask, SH2 *sh2); void p32x_sh2_poll_event(SH2 *sh2, unsigned int flags, unsigned int m68k_cycles); From 9e36dd0e0839d9b8dc6097ab545590c2fa3ed520 Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 22 May 2019 21:45:31 +0200 Subject: [PATCH 0198/1110] add xSR/RTS call stack cache to sh2 drc --- cpu/drc/emit_arm.c | 17 +++++ cpu/drc/emit_x86.c | 22 ++++++ cpu/sh2/compiler.c | 173 +++++++++++++++++++++++++++++++++++++++------ cpu/sh2/sh2.h | 2 + 4 files changed, 192 insertions(+), 22 deletions(-) diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index 1b429b35..9af2f453 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -304,7 +304,9 @@ static void emith_flush(void) #define EOP_STR_SIMPLE(rd,rn) EOP_C_AM2_IMM(A_COND_AL,1,0,0,rn,rd,0) #define EOP_LDR_REG_LSL(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,0,1,rn,rd,shift_imm,A_AM1_LSL,rm) +#define EOP_LDR_REG_LSL_WB(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,0,3,rn,rd,shift_imm,A_AM1_LSL,rm) #define EOP_LDRB_REG_LSL(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,1,1,rn,rd,shift_imm,A_AM1_LSL,rm); +#define EOP_STR_REG_LSL_WB(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,0,2,rn,rd,shift_imm,A_AM1_LSL,rm) #define EOP_LDRH_IMM2(cond,rd,rn,offset_8) EOP_C_AM3_IMM(cond,(offset_8) >= 0,1,rn,rd,0,1,abs(offset_8)) #define EOP_LDRH_REG2(cond,rd,rn,rm) EOP_C_AM3_REG(cond,1,1,rn,rd,0,1,rm) @@ -941,8 +943,12 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) EOP_LDR_REG_LSL(cond, r, rs, rm, 0) #define emith_read_r_r_offs(r, rs, offs) \ emith_read_r_r_offs_c(A_COND_AL, r, rs, offs) +#define emith_read_r_r_offs_ptr(r, rs, offs) \ + emith_read_r_r_offs_c(A_COND_AL, r, rs, offs) #define emith_read_r_r_r(r, rs, rm) \ EOP_LDR_REG_LSL(A_COND_AL, r, rs, rm, 0) +#define emith_read_r_r_r_wb(r, rs, rm) \ + EOP_LDR_REG_LSL_WB(A_COND_AL, r, rs, rm, 0) #define emith_read8_r_r_offs_c(cond, r, rs, offs) \ EOP_LDRB_IMM2(cond, r, rs, offs) @@ -984,6 +990,12 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) EOP_STR_IMM2(cond, r, rs, offs) #define emith_write_r_r_offs_ptr_c(cond, r, rs, offs) \ emith_write_r_r_offs_c(cond, r, rs, offs) +#define emith_write_r_r_offs(r, rs, offs) \ + emith_write_r_r_offs_c(A_COND_AL, r, rs, offs) +#define emith_write_r_r_offs_ptr(r, rs, offs) \ + emith_write_r_r_offs_c(A_COND_AL, r, rs, offs) +#define emith_write_r_r_r_wb(r, rs, rm) \ + EOP_STR_REG_LSL_WB(A_COND_AL, r, rs, rm, 0) #define emith_ctx_read_c(cond, r, offs) \ emith_read_r_r_offs_c(cond, r, CONTEXT_REG, offs) @@ -1111,6 +1123,11 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) emith_jump_ctx(offs); \ } while (0) +#define emith_call_link(r, target) do { \ + emith_move_r_r(r, PC); \ + emith_jump(target); \ +} while (0) + #define emith_ret_c(cond) \ emith_jump_reg_c(cond, LR) diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index 9dd06262..edb34521 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -721,6 +721,20 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; EMIT_OP_MODRM(0x8b, 0, r, 4); \ EMIT_SIB(0, rs, rm); /* mov r, [rm + rs * 1] */ \ } while (0) +#define emith_read_r_r_r_wb(r, rs, rm) do { \ + emith_read_r_r_r(r, rs, rm); \ + emith_add_r_r_ptr(rs, rm); \ +} while (0) + +#define emith_write_r_r_r(r, rs, rm) do { \ + EMIT_OP_MODRM(0x89, 0, r, 4); \ + EMIT_SIB(0, rs, rm); /* mov [rm + rs * 1], r */ \ +} while (0) +#define emith_write_r_r_r_wb(r, rs, rm) do { \ + emith_write_r_r_r(r, rs, rm); \ + emith_add_r_r_ptr(rs, rm); \ +} while (0) + #define emith_ctx_read(r, offs) \ emith_read_r_r_offs(r, CONTEXT_REG, offs) @@ -801,6 +815,14 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; EMIT(offs, u32); \ } while (0) +#define emith_call_link(r, target) do { \ + EMIT_OP(0xe8); \ + EMIT(0, u32); /* call pc+0 */ \ + emith_pop(r); \ + emith_add_r_r_ptr_imm(r, r, 13); \ + emith_jump(target); \ +} while (0) + #define emith_ret() \ EMIT_OP(0xc3) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 6d8e5118..be6e3ee1 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -39,6 +39,7 @@ #define PROPAGATE_CONSTANTS 1 #define LINK_BRANCHES 1 #define BRANCH_CACHE 1 +#define CALL_STACK 0 #define ALIAS_REGISTERS 1 #define REMAP_REGISTER 1 #define LOOP_DETECTION 1 @@ -58,13 +59,14 @@ // 08 - runtime block entry log // 10 - smc self-check // 20 - runtime block entry counter +// 80 - branch cache statistics // 100 - write trace // 200 - compare trace // 400 - block entry backtrace on exit // 800 - state dump on exit // { #ifndef DRC_DEBUG -#define DRC_DEBUG 0 +#define DRC_DEBUG 0x0 #endif #if DRC_DEBUG @@ -369,6 +371,15 @@ static struct block_entry **hash_tables[TCACHE_BUFFERS]; #define HASH_FUNC(hash_tab, addr, mask) \ (hash_tab)[(((addr) >> 20) ^ ((addr) >> 2)) & (mask)] +#if (DRC_DEBUG & 128) +#if BRANCH_CACHE +int bchit, bcmiss; +#endif +#if CALL_STACK +int rchit, rcmiss; +#endif +#endif + // host register tracking enum { HR_FREE, @@ -527,6 +538,10 @@ static signed char reg_map_host[HOST_REGS]; static void REGPARM(1) (*sh2_drc_entry)(SH2 *sh2); static void (*sh2_drc_dispatcher)(void); +#if CALL_STACK +static void REGPARM(1) (*sh2_drc_dispatcher_call)(uptr host_pc); +static void (*sh2_drc_dispatcher_return)(void); +#endif static void (*sh2_drc_exit)(void); static void (*sh2_drc_test_irq)(void); @@ -684,12 +699,17 @@ static void REGPARM(1) flush_tcache(int tcid) memset(Pico32xMem->drclit_ram, 0, sizeof(Pico32xMem->drclit_ram)); memset(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache)); memset(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache)); + memset(sh2s[0].rts_cache, -1, sizeof(sh2s[0].rts_cache)); + memset(sh2s[1].rts_cache, -1, sizeof(sh2s[1].rts_cache)); + sh2s[0].rts_cache_idx = sh2s[1].rts_cache_idx = 0; } else { memset(Pico32xMem->drcblk_ram, 0, sizeof(Pico32xMem->drcblk_ram)); memset(Pico32xMem->drclit_ram, 0, sizeof(Pico32xMem->drclit_ram)); memset(Pico32xMem->drcblk_da[tcid - 1], 0, sizeof(Pico32xMem->drcblk_da[tcid - 1])); memset(Pico32xMem->drclit_da[tcid - 1], 0, sizeof(Pico32xMem->drclit_da[tcid - 1])); memset(sh2s[tcid - 1].branch_cache, -1, sizeof(sh2s[0].branch_cache)); + memset(sh2s[tcid - 1].rts_cache, -1, sizeof(sh2s[0].rts_cache)); + sh2s[tcid - 1].rts_cache_idx = 0; } } #if (DRC_DEBUG & 4) @@ -816,9 +836,7 @@ static void dr_free_oldest_block(int tcache_id) static u8 *dr_prepare_cache(int tcache_id, int insn_count) { -#if BRANCH_CACHE u8 *limit = tcache_limit[tcache_id]; -#endif // if no block desc available if (block_counts[tcache_id] == block_limit[tcache_id]) @@ -828,16 +846,26 @@ static u8 *dr_prepare_cache(int tcache_id, int insn_count) while (tcache_limit[tcache_id] - tcache_ptrs[tcache_id] < insn_count * 128) dr_free_oldest_block(tcache_id); -#if BRANCH_CACHE if (limit != tcache_limit[tcache_id]) { +#if BRANCH_CACHE if (tcache_id) memset32(sh2s[tcache_id-1].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4); else { memset32(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4); memset32(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache)/4); } - } #endif +#if CALL_STACK + if (tcache_id) { + memset32(sh2s[tcache_id-1].rts_cache, -1, sizeof(sh2s[0].rts_cache)/4); + sh2s[tcache_id-1].rts_cache_idx = 0; + } else { + memset32(sh2s[0].rts_cache, -1, sizeof(sh2s[0].rts_cache)/4); + memset32(sh2s[1].rts_cache, -1, sizeof(sh2s[1].rts_cache)/4); + sh2s[0].rts_cache_idx = sh2s[1].rts_cache_idx = 0; + } +#endif + } return (u8 *)tcache_ptrs[tcache_id]; } @@ -3955,16 +3983,14 @@ end_op: // branch handling if (drcf.pending_branch_direct) { - struct op_data *opd_b = - (op_flags[i] & OF_DELAY_OP) ? opd-1 : opd; + struct op_data *opd_b = (op_flags[i] & OF_DELAY_OP) ? opd-1 : opd; u32 target_pc = opd_b->imm; int cond = -1; void *target = NULL; int ctaken = 0; - if (OP_ISBRACND(opd_b->op)) { + if (OP_ISBRACND(opd_b->op)) ctaken = (op_flags[i] & OF_DELAY_OP) ? 1 : 2; - } cycles += ctaken; // assume branch taken #if LOOP_DETECTION if ((drcf.loop_type == OF_IDLE_LOOP || @@ -4014,15 +4040,21 @@ end_op: emit_move_r_imm32(SHR_PC, target_pc); rcache_clean(); - target = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id); - if (target == NULL) - return NULL; +#if CALL_STACK + if ((opd_b->dest & BITMASK1(SHR_PR)) && pc+2 < end_pc) { + // BSR + tmp = rcache_get_tmp_arg(0); + emith_call_link(tmp, sh2_drc_dispatcher_call); + rcache_free_tmp(tmp); + } else +#endif + target = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id); } if (cond != -1) { emith_jump_cond_patchable(cond, target); } - else { + else if (target != NULL) { emith_jump_patchable(target); rcache_invalidate(); } @@ -4036,19 +4068,26 @@ end_op: drcf.polling = drcf.loop_type = 0; } else if (drcf.pending_branch_indirect) { - struct op_data *opd_b = - (op_flags[i] & OF_DELAY_OP) ? opd-1 : opd; void *target; u32 target_pc; sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); FLUSH_CYCLES(sr); rcache_clean(); +#if CALL_STACK + struct op_data *opd_b = (op_flags[i] & OF_DELAY_OP) ? opd-1 : opd; + if (opd_b->rm == SHR_PR) { + // RTS + emith_jump(sh2_drc_dispatcher_return); + } else if ((opd_b->dest & BITMASK1(SHR_PR)) && pc+2 < end_pc) { + // JSR/BSRF + tmp = rcache_get_tmp_arg(0); + emith_call_link(tmp, sh2_drc_dispatcher_call); + } else +#endif if (gconst_get(SHR_PC, &target_pc)) { // JMP const, treat like unconditional direct branch target = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id); - if (target == NULL) - return NULL; emith_jump_patchable(target); } else { // JMP @@ -4264,6 +4303,20 @@ static void sh2_generate_utils(void) emith_sh2_drc_exit(); emith_flush(); +#if CALL_STACK + // sh2_drc_dispatcher_call(uptr host_pc) + sh2_drc_dispatcher_call = (void *)tcache_ptr; + emith_ctx_read(arg2, offsetof(SH2, rts_cache_idx)); + emith_add_r_imm(arg2, 2*sizeof(void *)); + emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *)); + emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx)); + emith_add_r_r_ptr_imm(arg1, CONTEXT_REG, offsetof(SH2, rts_cache)); + emith_ctx_read(arg3, offsetof(SH2, pr)); + emith_write_r_r_r_wb(arg3, arg1, arg2); + emith_write_r_r_offs_ptr(arg0, arg1, sizeof(void *)); + emith_flush(); + // FALLTHROUGH +#endif // sh2_drc_dispatcher(void) sh2_drc_dispatcher = (void *)tcache_ptr; emith_ctx_read(arg0, SHR_PC * 4); @@ -4274,6 +4327,12 @@ static void sh2_generate_utils(void) emith_read_r_r_offs(arg2, arg1, offsetof(SH2, branch_cache)); emith_cmp_r_r(arg2, arg0); EMITH_SJMP_START(DCOND_NE); +#if (DRC_DEBUG & 128) + emith_move_r_ptr_imm(arg2, (uptr)&bchit); + emith_read_r_r_offs_c(DCOND_EQ, arg3, arg2, 0); + emith_add_r_imm_c(DCOND_EQ, arg3, 1); + emith_write_r_r_offs_c(DCOND_EQ, arg3, arg2, 0); +#endif emith_read_r_r_offs_ptr_c(DCOND_EQ, RET_REG, arg1, offsetof(SH2, branch_cache) + sizeof(void *)); emith_jump_reg_c(DCOND_EQ, RET_REG); EMITH_SJMP_END(DCOND_NE); @@ -4285,6 +4344,12 @@ static void sh2_generate_utils(void) // store PC and block entry ptr (in arg0) in branch target cache emith_tst_r_r_ptr(RET_REG, RET_REG); EMITH_SJMP_START(DCOND_EQ); +#if (DRC_DEBUG & 128) + emith_move_r_ptr_imm(arg2, (uptr)&bcmiss); + emith_read_r_r_offs_c(DCOND_NE, arg3, arg2, 0); + emith_add_r_imm_c(DCOND_NE, arg3, 1); + emith_write_r_r_offs_c(DCOND_NE, arg3, arg2, 0); +#endif emith_ctx_read_c(DCOND_NE, arg2, SHR_PC * 4); emith_and_r_r_imm(arg1, arg2, (ARRAY_SIZE(sh2s->branch_cache)-1)*4); emith_add_r_r_r_lsl_ptr(arg1, CONTEXT_REG, arg1, sizeof(void *) == 8 ? 2 : 1); @@ -4302,6 +4367,37 @@ static void sh2_generate_utils(void) emith_call(dr_failure); emith_flush(); +#if CALL_STACK + // sh2_drc_dispatcher_return(void) + sh2_drc_dispatcher_return = (void *)tcache_ptr; + emith_ctx_read(arg2, offsetof(SH2, rts_cache_idx)); + emith_add_r_r_ptr_imm(arg1, CONTEXT_REG, offsetof(SH2, rts_cache)); + emith_ctx_read(arg0, offsetof(SH2, pc)); + emith_read_r_r_r_wb(arg3, arg1, arg2); + emith_cmp_r_r(arg0, arg3); +#if (DRC_DEBUG & 128) + EMITH_SJMP_START(DCOND_EQ); + emith_move_r_ptr_imm(arg2, (uptr)&rcmiss); + emith_read_r_r_offs_c(DCOND_NE, arg1, arg2, 0); + emith_add_r_imm_c(DCOND_NE, arg1, 1); + emith_write_r_r_offs_c(DCOND_NE, arg1, arg2, 0); + EMITH_SJMP_END(DCOND_EQ); +#endif + emith_jump_cond(DCOND_NE, sh2_drc_dispatcher); + emith_read_r_r_offs_ptr(arg0, arg1, sizeof(void *)); + emith_sub_r_imm(arg2, 2*sizeof(void *)); + emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *)); + emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx)); +#if (DRC_DEBUG & 128) + emith_move_r_ptr_imm(arg2, (uptr)&rchit); + emith_read_r_r_offs(arg1, arg2, 0); + emith_add_r_imm(arg1, 1); + emith_write_r_r_offs(arg1, arg2, 0); +#endif + emith_jump_reg(arg0); + emith_flush(); +#endif + // sh2_drc_test_irq(void) // assumes it's called from main function (may jump to dispatcher) sh2_drc_test_irq = (void *)tcache_ptr; @@ -4408,6 +4504,10 @@ static void sh2_generate_utils(void) #if (DRC_DEBUG & 4) host_dasm_new_symbol(sh2_drc_entry); host_dasm_new_symbol(sh2_drc_dispatcher); +#if CALL_STACK + host_dasm_new_symbol(sh2_drc_dispatcher_call); + host_dasm_new_symbol(sh2_drc_dispatcher_return); +#endif host_dasm_new_symbol(sh2_drc_exit); host_dasm_new_symbol(sh2_drc_test_irq); host_dasm_new_symbol(sh2_drc_write8); @@ -4521,6 +4621,16 @@ static void sh2_smc_rm_blocks(u32 a, int tcache_id, u32 shift) memset32(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache)/4); } #endif +#if CALL_STACK + if (tcache_id) { + memset32(sh2s[tcache_id-1].rts_cache, -1, sizeof(sh2s[0].rts_cache)/4); + sh2s[tcache_id-1].rts_cache_idx = 0; + } else { + memset32(sh2s[0].rts_cache, -1, sizeof(sh2s[0].rts_cache)/4); + memset32(sh2s[1].rts_cache, -1, sizeof(sh2s[1].rts_cache)/4); + sh2s[0].rts_cache_idx = sh2s[1].rts_cache_idx = 0; + } +#endif } void sh2_drc_wcheck_ram(unsigned int a, int val, SH2 *sh2) @@ -4694,11 +4804,6 @@ static void state_dump(void) printf("%08x ",p32x_sh2_read32(sh2s[0].r[15] + i*4, &sh2s[0])); if ((i+1) % 8 == 0) printf("\n"); } - printf("branch cache master:\n"); - for (i = 0; i < ARRAY_SIZE(sh2s[0].branch_cache); i++) { - printf("%08x ",sh2s[0].branch_cache[i].pc); - if ((i+1) % 8 == 0) printf("\n"); - } SH2_DUMP(&sh2s[1], "slave"); printf("VBR ssh2: %x\n", sh2s[1].vbr); for (i = 0; i < 0x60; i++) { @@ -4710,12 +4815,33 @@ static void state_dump(void) printf("%08x ",p32x_sh2_read32(sh2s[1].r[15] + i*4, &sh2s[1])); if ((i+1) % 8 == 0) printf("\n"); } +#endif +} + +static void bcache_stats(void) +{ +#if (DRC_DEBUG & 128) + int i; +#if CALL_STACK + for (i = 1; i < ARRAY_SIZE(sh2s->rts_cache); i++) + if (sh2s[0].rts_cache[i].pc == -1 && sh2s[1].rts_cache[i].pc == -1) break; + + printf("return cache hits:%d misses:%d depth: %d\n", rchit, rcmiss, i); +#endif +#if BRANCH_CACHE + printf("branch cache hits:%d misses:%d\n", bchit, bcmiss); + printf("branch cache master:\n"); + for (i = 0; i < ARRAY_SIZE(sh2s[0].branch_cache); i++) { + printf("%08x ",sh2s[0].branch_cache[i].pc); + if ((i+1) % 8 == 0) printf("\n"); + } printf("branch cache slave:\n"); for (i = 0; i < ARRAY_SIZE(sh2s[1].branch_cache); i++) { printf("%08x ",sh2s[1].branch_cache[i].pc); if ((i+1) % 8 == 0) printf("\n"); } #endif +#endif } void sh2_drc_flush_all(void) @@ -4724,6 +4850,7 @@ void sh2_drc_flush_all(void) state_dump(); block_stats(); entry_stats(); + bcache_stats(); flush_tcache(0); flush_tcache(1); flush_tcache(2); @@ -4810,6 +4937,8 @@ int sh2_drc_init(SH2 *sh2) #endif } memset(sh2->branch_cache, -1, sizeof(sh2->branch_cache)); + memset(sh2->rts_cache, -1, sizeof(sh2->rts_cache)); + sh2->rts_cache_idx = 0; return 0; diff --git a/cpu/sh2/sh2.h b/cpu/sh2/sh2.h index a3eb5b12..cf830dfc 100644 --- a/cpu/sh2/sh2.h +++ b/cpu/sh2/sh2.h @@ -52,6 +52,8 @@ typedef struct SH2_ int poll_cnt; // DRC branch cache. size must be 2^n and <=128 + int rts_cache_idx; + struct { unsigned int pc; void *code; } rts_cache[16]; struct { unsigned int pc; void *code; } branch_cache[128]; // interpreter stuff From 39615f60791ad06ed817ba615bf9d32e0bee07aa Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 23 May 2019 19:04:31 +0200 Subject: [PATCH 0199/1110] sh2 drc, keep T bit in host flags as long as possible --- cpu/drc/emit_arm.c | 58 +++++++++++++++- cpu/drc/emit_x86.c | 38 +++++++++++ cpu/sh2/compiler.c | 165 ++++++++++++++++++++++++++------------------- 3 files changed, 190 insertions(+), 71 deletions(-) diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index 9af2f453..b7922a98 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -128,7 +128,7 @@ static NOINLINE void EMIT(u32 op, u32 dst, u32 src) emit_cache[i] = emit_cache[i+1]; } } - + static void emith_flush(void) { int i; @@ -156,6 +156,7 @@ static void emith_flush(void) #define A_COND_LE 0xd #define A_COND_CS A_COND_HS #define A_COND_CC A_COND_LO +#define A_COND_NV 0xf // Not Valid (aka NeVer :-) - ATTN: not a real condition! /* unified conditions */ #define DCOND_EQ A_COND_EQ @@ -414,6 +415,9 @@ static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int u32 v; int i; + if (cond == A_COND_NV) + return; + switch (op) { case A_OP_MOV: rn = 0; @@ -522,6 +526,9 @@ static int emith_xbranch(int cond, void *target, int is_call) int direct = is_offset_24(val); u32 *start_ptr = (u32 *)tcache_ptr; + if (cond == A_COND_NV) + return 0; // never taken + if (direct) { EOP_C_B(cond,is_call,val & 0xffffff); // b, bl target @@ -1328,3 +1335,52 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) EMITH_SJMP2_END(DCOND_NE); \ } while (0) +#ifdef T +// T bit handling +static int tcond = -1; + +#define emith_invert_cond(cond) \ + ((cond) ^ 1) + +#define emith_clr_t_cond(sr) \ + (void)sr + +#define emith_set_t_cond(sr, cond) \ + tcond = cond + +#define emith_get_t_cond() \ + tcond + +#define emith_invalidate_t() \ + tcond = -1 + +#define emith_set_t(sr, val) \ + tcond = ((val) ? A_COND_AL: A_COND_NV) + +static void emith_sync_t(sr) +{ + if (tcond == A_COND_AL) + emith_or_r_imm(sr, T); + else if (tcond == A_COND_NV) + emith_bic_r_imm(sr, T); + else if (tcond >= 0) { + emith_bic_r_imm_c(emith_invert_cond(tcond),sr, T); + emith_or_r_imm_c(tcond, sr, T); + } + tcond = -1; +} + +static int emith_tst_t(int sr, int tf) +{ + if (tcond < 0) { + emith_tst_r_imm(sr, T); + return tf ? DCOND_NE: DCOND_EQ; + } else if (tcond >= A_COND_AL) { + // MUST sync because A_COND_NV isn't a real condition + emith_sync_t(sr); + emith_tst_r_imm(sr, T); + return tf ? DCOND_NE: DCOND_EQ; + } else + return tf ? tcond : emith_invert_cond(tcond); +} +#endif diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index edb34521..32569404 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -1200,3 +1200,41 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_pool_commit(j) /**/ #define emith_insn_ptr() ((u8 *)tcache_ptr) #define emith_flush() /**/ + +#ifdef T +// T bit handling +#define emith_invert_cond(cond) \ + ((cond) ^ 1) + +static void emith_clr_t_cond(int sr) +{ + emith_bic_r_imm(sr, T); +} + +static void emith_set_t_cond(int sr, int cond) +{ + EMITH_SJMP_START(emith_invert_cond(cond)); + emith_or_r_imm_c(cond, sr, T); + EMITH_SJMP_END(emith_invert_cond(cond)); +} + +#define emith_get_t_cond() -1 + +#define emith_sync_t(sr) ((void)sr) + +#define emith_invalidate_t() + +static void emith_set_t(int sr, int val) +{ + if (val) + emith_or_r_imm(sr, T); + else + emith_bic_r_imm(sr, T); +} + +static int emith_tst_t(int sr, int tf) +{ + emith_tst_r_imm(sr, T); + return tf ? DCOND_NE: DCOND_EQ; +} +#endif diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index be6e3ee1..d441039b 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -117,6 +117,17 @@ static int insns_compiled, hash_collisions, host_insn_count; #define SHR_MEM 31 #define SHR_TMP -1 +#define T 0x00000001 +#define S 0x00000002 +#define I 0x000000f0 +#define Q 0x00000100 +#define M 0x00000200 +#define T_save 0x00000800 + +#define I_SHIFT 4 +#define Q_SHIFT 8 +#define M_SHIFT 9 + static struct op_data { u8 op; u8 cycles; @@ -525,17 +536,6 @@ static cache_reg_t cache_regs[] = { static signed char reg_map_host[HOST_REGS]; -#define T 0x00000001 -#define S 0x00000002 -#define I 0x000000f0 -#define Q 0x00000100 -#define M 0x00000200 -#define T_save 0x00000800 - -#define I_SHIFT 4 -#define Q_SHIFT 8 -#define M_SHIFT 9 - static void REGPARM(1) (*sh2_drc_entry)(SH2 *sh2); static void (*sh2_drc_dispatcher)(void); #if CALL_STACK @@ -2318,17 +2318,19 @@ static void emit_move_r_r(sh2_reg_e dst, sh2_reg_e src) } } -// T must be clear, and comparison done just before this -static void emit_or_t_if_eq(int srr) +static void emit_sync_t_to_sr(void) { - EMITH_SJMP_START(DCOND_NE); - emith_or_r_imm_c(DCOND_EQ, srr, T); - EMITH_SJMP_END(DCOND_NE); + // avoid reloading SR from context if there's nothing to do + if (emith_get_t_cond() >= 0) { + int sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + emith_sync_t(sr); + } } // rd = @(arg0) static int emit_memhandler_read(int size) { + emit_sync_t_to_sr(); rcache_clean_tmp(); #ifndef DRC_SR_REG // must writeback cycles for poll detection stuff @@ -2356,6 +2358,7 @@ static int emit_memhandler_read(int size) // @(arg0) = arg1 static void emit_memhandler_write(int size) { + emit_sync_t_to_sr(); rcache_clean_tmp(); #ifndef DRC_SR_REG if (guest_regs[SHR_SR].vreg != -1) @@ -2776,6 +2779,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) // clear stale state after compile errors rcache_invalidate(); + emith_invalidate_t(); drcf = (struct drcf) { 0 }; // ------------------------------------------------- @@ -2812,6 +2816,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) { sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); FLUSH_CYCLES(sr); + emith_sync_t(sr); rcache_flush(); emith_flush(); @@ -2896,6 +2901,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) #if (DRC_DEBUG & (8|256|512|1024)) sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + emith_sync_t(sr); rcache_clean(); tmp = rcache_used_hreg_mask(); emith_save_caller_regs(tmp); @@ -2918,6 +2924,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) if (!(op_flags[i] & OF_DELAY_OP)) { sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); FLUSH_CYCLES(sr); + emith_sync_t(sr); rcache_clean(); tmp = rcache_used_hreg_mask(); @@ -2944,6 +2951,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) delay_dep_bk = opd->source & ops[i-1].dest; if (delay_dep_fw & BITMASK1(SHR_T)) { sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + emith_sync_t(sr); DELAY_SAVE_T(sr); } if (delay_dep_bk & BITMASK1(SHR_PC)) { @@ -2965,9 +2973,8 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); tmp = rcache_get_reg(SHR_PC, RC_GR_WRITE, NULL); emith_move_r_imm(tmp, pc); - emith_tst_r_imm(sr, T); - tmp2 = ops[i-1].op == OP_BRANCH_CT ? DCOND_NE : DCOND_EQ; - tmp3 = ops[i-1].op == OP_BRANCH_CT ? DCOND_EQ : DCOND_NE; + tmp2 = emith_tst_t(sr, (ops[i-1].op == OP_BRANCH_CT)); + tmp3 = emith_invert_cond(tmp2); EMITH_SJMP_START(tmp3); emith_move_r_imm_c(tmp2, tmp, ops[i-1].imm); EMITH_SJMP_END(tmp3); @@ -3061,6 +3068,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) goto end_op; case OP_RTE: // RTE 0000000000101011 + emith_invalidate_t(); // pop PC emit_memhandler_read_rr(sh2, SHR_PC, SHR_SP, 0, 2 | MF_POSTINCR); // pop SR @@ -3079,6 +3087,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case OP_TRAPA: // TRAPA #imm 11000011iiiiiiii // push SR tmp = rcache_get_reg_arg(1, SHR_SR, &tmp2); + emith_sync_t(tmp2); emith_clear_msb(tmp, tmp2, 22); emit_memhandler_write_rr(sh2, SHR_TMP, SHR_SP, 0, 2 | MF_PREDECR); // push PC @@ -3177,6 +3186,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } if (tmp2 == SHR_SR) { sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); + emith_sync_t(sr); tmp = rcache_get_reg(GET_Rn(), RC_GR_WRITE, NULL); emith_clear_msb(tmp, sr, 22); // reserved bits defined by ISA as 0 } else @@ -3198,11 +3208,11 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) { case 0: // CLRT 0000000000001000 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); - emith_bic_r_imm(sr, T); + emith_set_t(sr, 0); break; case 1: // SETT 0000000000011000 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); - emith_or_r_imm(sr, T); + emith_set_t(sr, 1); break; case 2: // CLRMAC 0000000000101000 emit_move_r_imm32(SHR_MACL, 0); @@ -3219,10 +3229,12 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) break; case 1: // DIV0U 0000000000011001 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + emith_invalidate_t(); emith_bic_r_imm(sr, M|Q|T); break; case 2: // MOVT Rn 0000nnnn00101001 sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); + emith_sync_t(sr); tmp2 = rcache_get_reg(GET_Rn(), RC_GR_WRITE, NULL); emith_clear_msb(tmp2, sr, 31); break; @@ -3286,6 +3298,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); + emith_invalidate_t(); emith_bic_r_imm(sr, M|Q|T); emith_tst_r_imm(tmp2, (1<<31)); EMITH_SJMP_START(DCOND_EQ); @@ -3304,9 +3317,9 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); - emith_bic_r_imm(sr, T); + emith_clr_t_cond(sr); emith_tst_r_r(tmp2, tmp3); - emit_or_t_if_eq(sr); + emith_set_t_cond(sr, DCOND_EQ); goto end_op; case 0x09: // AND Rm,Rn 0010nnnnmmmm1001 if (GET_Rm() != GET_Rn()) { @@ -3339,7 +3352,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); emith_eor_r_r_r(tmp, tmp2, tmp3); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); - emith_bic_r_imm(sr, T); + emith_clr_t_cond(sr); emith_tst_r_imm(tmp, 0x000000ff); EMITH_SJMP_START(DCOND_EQ); emith_tst_r_imm_c(DCOND_NE, tmp, 0x0000ff00); @@ -3350,7 +3363,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) EMITH_SJMP_END(DCOND_EQ); EMITH_SJMP_END(DCOND_EQ); EMITH_SJMP_END(DCOND_EQ); - emit_or_t_if_eq(sr); + emith_set_t_cond(sr, DCOND_EQ); rcache_free_tmp(tmp); goto end_op; case 0x0d: // XTRCT Rm,Rn 0010nnnnmmmm1101 @@ -3391,32 +3404,24 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); - emith_bic_r_imm(sr, T); + emith_clr_t_cond(sr); emith_cmp_r_r(tmp2, tmp3); switch (op & 0x07) { case 0x00: // CMP/EQ - emit_or_t_if_eq(sr); + emith_set_t_cond(sr, DCOND_EQ); break; case 0x02: // CMP/HS - EMITH_SJMP_START(DCOND_LO); - emith_or_r_imm_c(DCOND_HS, sr, T); - EMITH_SJMP_END(DCOND_LO); + emith_set_t_cond(sr, DCOND_HS); break; case 0x03: // CMP/GE - EMITH_SJMP_START(DCOND_LT); - emith_or_r_imm_c(DCOND_GE, sr, T); - EMITH_SJMP_END(DCOND_LT); + emith_set_t_cond(sr, DCOND_GE); break; case 0x06: // CMP/HI - EMITH_SJMP_START(DCOND_LS); - emith_or_r_imm_c(DCOND_HI, sr, T); - EMITH_SJMP_END(DCOND_LS); + emith_set_t_cond(sr, DCOND_HI); break; case 0x07: // CMP/GT - EMITH_SJMP_START(DCOND_LE); - emith_or_r_imm_c(DCOND_GT, sr, T); - EMITH_SJMP_END(DCOND_LE); + emith_set_t_cond(sr, DCOND_GT); break; } goto end_op; @@ -3431,6 +3436,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); tmp2 = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + emith_sync_t(sr); emith_tpop_carry(sr, 0); emith_adcf_r_r_r(tmp2, tmp, tmp); emith_tpush_carry(sr, 0); // keep Q1 in T for now @@ -3479,6 +3485,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + emith_sync_t(sr); if (op & 4) { // adc emith_tpop_carry(sr, 0); emith_adcf_r_r_r(tmp, tmp3, tmp2); @@ -3494,14 +3501,12 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); - emith_bic_r_imm(sr, T); + emith_clr_t_cond(sr); if (op & 4) { emith_addf_r_r_r(tmp, tmp3, tmp2); } else emith_subf_r_r_r(tmp, tmp3, tmp2); - EMITH_SJMP_START(DCOND_VC); - emith_or_r_imm_c(DCOND_VS, sr, T); - EMITH_SJMP_END(DCOND_VC); + emith_set_t_cond(sr, DCOND_VS); goto end_op; case 0x0d: // DMULS.L Rm,Rn 0011nnnnmmmm1101 tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); @@ -3524,6 +3529,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 2: // SHAL Rn 0100nnnn00100000 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + emith_sync_t(sr); emith_tpop_carry(sr, 0); // dummy emith_lslf(tmp, tmp2, 1); emith_tpush_carry(sr, 0); @@ -3538,10 +3544,10 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) drcf.polling = drcf.loop_type = 0; } #endif - emith_bic_r_imm(sr, T); tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); + emith_clr_t_cond(sr); emith_subf_r_r_imm(tmp, tmp2, 1); - emit_or_t_if_eq(sr); + emith_set_t_cond(sr, DCOND_EQ); goto end_op; } goto default_; @@ -3552,6 +3558,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 2: // SHAR Rn 0100nnnn00100001 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + emith_sync_t(sr); emith_tpop_carry(sr, 0); // dummy if (op & 0x20) { emith_asrf(tmp, tmp2, 1); @@ -3562,11 +3569,9 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 1: // CMP/PZ Rn 0100nnnn00010001 tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); - emith_bic_r_imm(sr, T); + emith_clr_t_cond(sr); emith_cmp_r_imm(tmp, 0); - EMITH_SJMP_START(DCOND_LT); - emith_or_r_imm_c(DCOND_GE, sr, T); - EMITH_SJMP_END(DCOND_LT); + emith_set_t_cond(sr, DCOND_GE); goto end_op; } goto default_; @@ -3597,6 +3602,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } tmp3 = rcache_get_reg_arg(1, tmp, &tmp4); if (tmp == SHR_SR) { + emith_sync_t(tmp4); emith_clear_msb(tmp3, tmp4, 22); // reserved bits defined by ISA as 0 } else if (tmp3 != tmp4) emith_move_r_r(tmp3, tmp4); @@ -3610,6 +3616,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x05: // ROTR Rn 0100nnnn00000101 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + emith_sync_t(sr); emith_tpop_carry(sr, 0); // dummy if (op & 1) { emith_rorf(tmp, tmp2, 1); @@ -3621,6 +3628,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x25: // ROTCR Rn 0100nnnn00100101 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, NULL); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + emith_sync_t(sr); emith_tpop_carry(sr, 0); if (op & 1) { emith_rorcf(tmp); @@ -3631,11 +3639,9 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x15: // CMP/PL Rn 0100nnnn00010101 tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); - emith_bic_r_imm(sr, T); + emith_clr_t_cond(sr); emith_cmp_r_imm(tmp, 0); - EMITH_SJMP_START(DCOND_LE); - emith_or_r_imm_c(DCOND_GT, sr, T); - EMITH_SJMP_END(DCOND_LE); + emith_set_t_cond(sr, DCOND_GT); goto end_op; } goto default_; @@ -3665,6 +3671,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) goto default_; } if (tmp == SHR_SR) { + emith_invalidate_t(); tmp2 = emit_memhandler_read_rr(sh2, SHR_TMP, GET_Rn(), 0, 2 | MF_POSTINCR); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_write_sr(sr, tmp2); @@ -3723,9 +3730,9 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) rcache_get_reg_arg(0, GET_Rn(), NULL); tmp = emit_memhandler_read(0); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); - emith_bic_r_imm(sr, T); + emith_clr_t_cond(sr); emith_cmp_r_imm(tmp, 0); - emit_or_t_if_eq(sr); + emith_set_t_cond(sr, DCOND_EQ); emith_or_r_imm(tmp, 0x80); tmp2 = rcache_get_tmp_arg(1); // assuming it differs to tmp emith_move_r_r(tmp2, tmp); @@ -3753,6 +3760,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) goto default_; } if (tmp2 == SHR_SR) { + emith_invalidate_t(); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); emith_write_sr(sr, tmp); @@ -3820,6 +3828,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) break; case 0x0a: // NEGC Rm,Rn 0110nnnnmmmm1010 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + emith_sync_t(sr); emith_tpop_carry(sr, 1); emith_negcf_r_r(tmp2, tmp); emith_tpush_carry(sr, 1); @@ -3870,9 +3879,9 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x0800: // CMP/EQ #imm,R0 10001000iiiiiiii tmp2 = rcache_get_reg(SHR_R0, RC_GR_READ, NULL); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); - emith_bic_r_imm(sr, T); + emith_clr_t_cond(sr); emith_cmp_r_imm(tmp2, (s8)(op & 0xff)); - emit_or_t_if_eq(sr); + emith_set_t_cond(sr, DCOND_EQ); goto end_op; } goto default_; @@ -3896,9 +3905,9 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x0800: // TST #imm,R0 11001000iiiiiiii tmp = rcache_get_reg(SHR_R0, RC_GR_READ, NULL); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); - emith_bic_r_imm(sr, T); + emith_clr_t_cond(sr); emith_tst_r_imm(tmp, op & 0xff); - emit_or_t_if_eq(sr); + emith_set_t_cond(sr, DCOND_EQ); goto end_op; case 0x0900: // AND #imm,R0 11001001iiiiiiii tmp = rcache_get_reg(SHR_R0, RC_GR_RMW, &tmp2); @@ -3919,9 +3928,9 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x0c00: // TST.B #imm,@(R0,GBR) 11001100iiiiiiii tmp = emit_indirect_indexed_read(sh2, SHR_TMP, SHR_R0, SHR_GBR, 0 | drcf.polling); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); - emith_bic_r_imm(sr, T); + emith_clr_t_cond(sr); emith_tst_r_imm(tmp, op & 0xff); - emit_or_t_if_eq(sr); + emith_set_t_cond(sr, DCOND_EQ); rcache_free_tmp(tmp); goto end_op; case 0x0d00: // AND.B #imm,@(R0,GBR) 11001101iiiiiiii @@ -3955,7 +3964,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) if (!(op_flags[i] & OF_B_IN_DS)) { elprintf_sh2(sh2, EL_ANOMALY, "drc: illegal op %04x @ %08x", op, pc - 2); - exit(1); + exit(1); } } @@ -3973,6 +3982,7 @@ end_op: if (drcf.test_irq && !drcf.pending_branch_direct) { sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); FLUSH_CYCLES(sr); + emith_sync_t(sr); if (!drcf.pending_branch_indirect) emit_move_r_imm32(SHR_PC, pc); rcache_flush(); @@ -3997,6 +4007,7 @@ end_op: (drcf.loop_type == OF_DELAY_LOOP && drcf.delay_reg >= 0))) { // idle or delay loop + emit_sync_t_to_sr(); emith_sh2_delay_loop(cycles, drcf.delay_reg); drcf.polling = drcf.loop_type = 0; } @@ -4009,11 +4020,20 @@ end_op: // emit condition test for conditional branch if (OP_ISBRACND(opd_b->op)) { cond = (opd_b->op == OP_BRANCH_CF) ? DCOND_EQ : DCOND_NE; - if (delay_dep_fw & BITMASK1(SHR_T)) + if (delay_dep_fw & BITMASK1(SHR_T)) { + emith_sync_t(sr); emith_tst_r_imm(sr, T_save); - else - emith_tst_r_imm(sr, T); - } + } else { + cond = emith_tst_t(sr, (opd_b->op == OP_BRANCH_CT)); + if (emith_get_t_cond() >= 0) { + if (opd_b->op == OP_BRANCH_CT) + emith_or_r_imm_c(cond, sr, T); + else + emith_bic_r_imm_c(cond, sr, T); + } + } + } else + emith_sync_t(sr); // no modification of host status/flags between here and branching! #if LINK_BRANCHES @@ -4062,6 +4082,9 @@ end_op: // branch not taken, correct cycle count if (ctaken) emith_add_r_imm(sr, ctaken << 12); + // set T bit to reflect branch not taken for OP_BRANCH_CT/CF + if (emith_get_t_cond() >= 0) // T is synced for all other cases + emith_set_t(sr, opd_b->op == OP_BRANCH_CF); drcf.pending_branch_direct = 0; if (target_pc >= base_pc && target_pc < pc) @@ -4073,6 +4096,7 @@ end_op: sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); FLUSH_CYCLES(sr); + emith_sync_t(sr); rcache_clean(); #if CALL_STACK struct op_data *opd_b = (op_flags[i] & OF_DELAY_OP) ? opd-1 : opd; @@ -4113,6 +4137,7 @@ end_op: s32 tmp = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); FLUSH_CYCLES(tmp); + emith_sync_t(tmp); emit_move_r_imm32(SHR_PC, pc); rcache_flush(); @@ -5553,7 +5578,7 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, case 0x06: // MOV.L @Rm+,Rn 0110nnnnmmmm0110 opd->dest = BITMASK2(GET_Rm(), GET_Rn()); opd->source = BITMASK2(GET_Rm(), SHR_MEM); - break; + break; case 0x00: // MOV.B @Rm,Rn 0110nnnnmmmm0000 case 0x01: // MOV.W @Rm,Rn 0110nnnnmmmm0001 case 0x02: // MOV.L @Rm,Rn 0110nnnnmmmm0010 @@ -5596,12 +5621,12 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, { case 0x0000: // MOV.B R0,@(disp,Rn) 10000000nnnndddd opd->source = BITMASK2(GET_Rm(), SHR_R0); - opd->dest = BITMASK1(SHR_MEM); + opd->dest = BITMASK1(SHR_MEM); opd->imm = (op & 0x0f); break; case 0x0100: // MOV.W R0,@(disp,Rn) 10000001nnnndddd opd->source = BITMASK2(GET_Rm(), SHR_R0); - opd->dest = BITMASK1(SHR_MEM); + opd->dest = BITMASK1(SHR_MEM); opd->imm = (op & 0x0f) * 2; break; case 0x0400: // MOV.B @(disp,Rm),R0 10000100mmmmdddd @@ -5760,7 +5785,7 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, case 0x0e00: // XOR.B #imm,@(R0,GBR) 11001110iiiiiiii case 0x0f00: // OR.B #imm,@(R0,GBR) 11001111iiiiiiii opd->source = BITMASK3(SHR_GBR, SHR_R0, SHR_MEM); - opd->dest = BITMASK1(SHR_MEM); + opd->dest = BITMASK1(SHR_MEM); opd->imm = op & 0xff; opd->cycles = 3; break; From 8141d7569450e156d1aa50c306872b538b31cfec Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 24 May 2019 21:52:03 +0200 Subject: [PATCH 0200/1110] sh2 drc, change utils abi to pass sh2 PC in arg0 (reduces compiled code size) --- cpu/drc/emit_arm.c | 4 +++ cpu/drc/emit_x86.c | 18 ++++++++++++ cpu/sh2/compiler.c | 68 +++++++++++++++++++++++++++------------------- 3 files changed, 62 insertions(+), 28 deletions(-) diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index b7922a98..c85a3d71 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -956,6 +956,8 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) EOP_LDR_REG_LSL(A_COND_AL, r, rs, rm, 0) #define emith_read_r_r_r_wb(r, rs, rm) \ EOP_LDR_REG_LSL_WB(A_COND_AL, r, rs, rm, 0) +#define emith_read_r_r_r_ptr_wb(r, rs, rm) \ + emith_read_r_r_r_wb(r, rs, rm) #define emith_read8_r_r_offs_c(cond, r, rs, offs) \ EOP_LDRB_IMM2(cond, r, rs, offs) @@ -1003,6 +1005,8 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) emith_write_r_r_offs_c(A_COND_AL, r, rs, offs) #define emith_write_r_r_r_wb(r, rs, rm) \ EOP_STR_REG_LSL_WB(A_COND_AL, r, rs, rm, 0) +#define emith_write_r_r_r_ptr_wb(r, rs, rm) \ + emith_write_r_r_r_wb(r, rs, rm) #define emith_ctx_read_c(cond, r, offs) \ emith_read_r_r_offs_c(cond, r, CONTEXT_REG, offs) diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index 32569404..f71c5d42 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -721,19 +721,37 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; EMIT_OP_MODRM(0x8b, 0, r, 4); \ EMIT_SIB(0, rs, rm); /* mov r, [rm + rs * 1] */ \ } while (0) +#define emith_read_r_r_r_ptr(r, rs, rm) do { \ + EMIT_REX_IF(1, r, rs); \ + EMIT_OP_MODRM64(0x8b, 0, r, 4); \ + EMIT_SIB(0, rs, rm); /* mov r, [rm + rs * 1] */ \ +} while (0) #define emith_read_r_r_r_wb(r, rs, rm) do { \ emith_read_r_r_r(r, rs, rm); \ emith_add_r_r_ptr(rs, rm); \ } while (0) +#define emith_read_r_r_r_ptr_wb(r, rs, rm) do { \ + emith_read_r_r_r_ptr(r, rs, rm); \ + emith_add_r_r_ptr(rs, rm); \ +} while (0) #define emith_write_r_r_r(r, rs, rm) do { \ EMIT_OP_MODRM(0x89, 0, r, 4); \ EMIT_SIB(0, rs, rm); /* mov [rm + rs * 1], r */ \ } while (0) +#define emith_write_r_r_r_ptr(r, rs, rm) do { \ + EMIT_REX_IF(1, r, rs); \ + EMIT_OP_MODRM64(0x89, 0, r, 4); \ + EMIT_SIB(0, rs, rm); /* mov [rm + rs * 1], r */ \ +} while (0) #define emith_write_r_r_r_wb(r, rs, rm) do { \ emith_write_r_r_r(r, rs, rm); \ emith_add_r_r_ptr(rs, rm); \ } while (0) +#define emith_write_r_r_r_ptr_wb(r, rs, rm) do { \ + emith_write_r_r_r_ptr(r, rs, rm); \ + emith_add_r_r_ptr(rs, rm); \ +} while (0) #define emith_ctx_read(r, offs) \ diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index d441039b..f2a1f95b 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -537,12 +537,12 @@ static cache_reg_t cache_regs[] = { static signed char reg_map_host[HOST_REGS]; static void REGPARM(1) (*sh2_drc_entry)(SH2 *sh2); -static void (*sh2_drc_dispatcher)(void); +static void REGPARM(1) (*sh2_drc_dispatcher)(u32 pc); #if CALL_STACK -static void REGPARM(1) (*sh2_drc_dispatcher_call)(uptr host_pc); -static void (*sh2_drc_dispatcher_return)(void); +static void REGPARM(2) (*sh2_drc_dispatcher_call)(u32 pc, uptr host_pr); +static void REGPARM(1) (*sh2_drc_dispatcher_return)(u32 pc); #endif -static void (*sh2_drc_exit)(void); +static void REGPARM(1) (*sh2_drc_exit)(u32 pc); static void (*sh2_drc_test_irq)(void); static u32 REGPARM(1) (*sh2_drc_read8)(u32 a); @@ -2862,8 +2862,10 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) drcf.polling = (drcf.loop_type == OF_POLL_LOOP ? MF_POLLING : 0); #endif +#if DRC_DEBUG // must update PC emit_move_r_imm32(SHR_PC, pc); +#endif rcache_clean(); #if (DRC_DEBUG & 0x10) @@ -2883,9 +2885,12 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) #endif // check cycles + tmp = rcache_get_tmp_arg(0); sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); emith_cmp_r_imm(sr, 0); + emith_move_r_imm(tmp, pc); emith_jump_cond(DCOND_LE, sh2_drc_exit); + rcache_free_tmp(tmp); #if (DRC_DEBUG & 32) // block hit counter @@ -4057,13 +4062,15 @@ end_op: if (target == NULL) { // can't resolve branch locally, make a block exit - emit_move_r_imm32(SHR_PC, target_pc); rcache_clean(); + tmp = rcache_get_tmp_arg(0); + emith_move_r_imm(tmp, target_pc); + rcache_free_tmp(tmp); #if CALL_STACK if ((opd_b->dest & BITMASK1(SHR_PR)) && pc+2 < end_pc) { // BSR - tmp = rcache_get_tmp_arg(0); + tmp = rcache_get_tmp_arg(1); emith_call_link(tmp, sh2_drc_dispatcher_call); rcache_free_tmp(tmp); } else @@ -4098,6 +4105,7 @@ end_op: FLUSH_CYCLES(sr); emith_sync_t(sr); rcache_clean(); + tmp = rcache_get_reg_arg(0, SHR_PC, NULL); #if CALL_STACK struct op_data *opd_b = (op_flags[i] & OF_DELAY_OP) ? opd-1 : opd; if (opd_b->rm == SHR_PR) { @@ -4105,7 +4113,7 @@ end_op: emith_jump(sh2_drc_dispatcher_return); } else if ((opd_b->dest & BITMASK1(SHR_PR)) && pc+2 < end_pc) { // JSR/BSRF - tmp = rcache_get_tmp_arg(0); + tmp = rcache_get_tmp_arg(1); emith_call_link(tmp, sh2_drc_dispatcher_call); } else #endif @@ -4139,13 +4147,15 @@ end_op: FLUSH_CYCLES(tmp); emith_sync_t(tmp); - emit_move_r_imm32(SHR_PC, pc); - rcache_flush(); + rcache_clean(); + tmp = rcache_get_tmp_arg(0); + emith_move_r_imm(tmp, pc); target = dr_prepare_ext_branch(block->entryp, pc, sh2->is_slave, tcache_id); if (target == NULL) return NULL; emith_jump_patchable(target); + rcache_invalidate(); } else rcache_flush(); emith_flush(); @@ -4160,7 +4170,8 @@ end_op: // flush pc and go back to dispatcher (this should no longer happen) dbg(1, "stray branch to %08x %p", branch_patch_pc[i], tcache_ptr); target = tcache_ptr; - emit_move_r_imm32(SHR_PC, branch_patch_pc[i]); + tmp = rcache_get_tmp_arg(0); + emith_move_r_imm(tmp, branch_patch_pc[i]); rcache_flush(); emith_jump(sh2_drc_dispatcher); } @@ -4322,33 +4333,34 @@ static void sh2_generate_utils(void) emith_pop_and_ret(arg1); emith_flush(); - // sh2_drc_exit(void) + // sh2_drc_exit(u32 pc) sh2_drc_exit = (void *)tcache_ptr; + emith_ctx_write(arg0, SHR_PC * 4); emit_do_static_regs(1, arg2); emith_sh2_drc_exit(); emith_flush(); #if CALL_STACK - // sh2_drc_dispatcher_call(uptr host_pc) + // sh2_drc_dispatcher_call(u32 pc, uptr host_pr) sh2_drc_dispatcher_call = (void *)tcache_ptr; emith_ctx_read(arg2, offsetof(SH2, rts_cache_idx)); emith_add_r_imm(arg2, 2*sizeof(void *)); emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *)); emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx)); - emith_add_r_r_ptr_imm(arg1, CONTEXT_REG, offsetof(SH2, rts_cache)); - emith_ctx_read(arg3, offsetof(SH2, pr)); - emith_write_r_r_r_wb(arg3, arg1, arg2); - emith_write_r_r_offs_ptr(arg0, arg1, sizeof(void *)); + emith_add_r_r_ptr_imm(arg3, CONTEXT_REG, offsetof(SH2, rts_cache) + sizeof(void *)); + emith_write_r_r_r_ptr_wb(arg1, arg2, arg3); + emith_ctx_read(arg3, SHR_PR * 4); + emith_write_r_r_offs(arg3, arg2, (s8)-sizeof(void *)); emith_flush(); // FALLTHROUGH #endif - // sh2_drc_dispatcher(void) + // sh2_drc_dispatcher(u32 pc) sh2_drc_dispatcher = (void *)tcache_ptr; - emith_ctx_read(arg0, SHR_PC * 4); + emith_ctx_write(arg0, SHR_PC * 4); #if BRANCH_CACHE // check if PC is in branch target cache - emith_and_r_r_imm(arg1, arg0, (ARRAY_SIZE(sh2s->branch_cache)-1)*4); - emith_add_r_r_r_lsl_ptr(arg1, CONTEXT_REG, arg1, sizeof(void *) == 8 ? 2 : 1); + emith_and_r_r_imm(arg1, arg0, (ARRAY_SIZE(sh2s->branch_cache)-1)*8); + emith_add_r_r_r_lsl_ptr(arg1, CONTEXT_REG, arg1, sizeof(void *) == 8 ? 1 : 0); emith_read_r_r_offs(arg2, arg1, offsetof(SH2, branch_cache)); emith_cmp_r_r(arg2, arg0); EMITH_SJMP_START(DCOND_NE); @@ -4376,8 +4388,8 @@ static void sh2_generate_utils(void) emith_write_r_r_offs_c(DCOND_NE, arg3, arg2, 0); #endif emith_ctx_read_c(DCOND_NE, arg2, SHR_PC * 4); - emith_and_r_r_imm(arg1, arg2, (ARRAY_SIZE(sh2s->branch_cache)-1)*4); - emith_add_r_r_r_lsl_ptr(arg1, CONTEXT_REG, arg1, sizeof(void *) == 8 ? 2 : 1); + emith_and_r_r_imm(arg1, arg2, (ARRAY_SIZE(sh2s->branch_cache)-1)*8); + emith_add_r_r_r_lsl_ptr(arg1, CONTEXT_REG, arg1, sizeof(void *) == 8 ? 1 : 0); emith_write_r_r_offs_c(DCOND_NE, arg2, arg1, offsetof(SH2, branch_cache)); emith_write_r_r_offs_ptr_c(DCOND_NE, RET_REG, arg1, offsetof(SH2, branch_cache) + sizeof(void *)); EMITH_SJMP_END(DCOND_EQ); @@ -4393,11 +4405,10 @@ static void sh2_generate_utils(void) emith_flush(); #if CALL_STACK - // sh2_drc_dispatcher_return(void) + // sh2_drc_dispatcher_return(u32 pc) sh2_drc_dispatcher_return = (void *)tcache_ptr; emith_ctx_read(arg2, offsetof(SH2, rts_cache_idx)); emith_add_r_r_ptr_imm(arg1, CONTEXT_REG, offsetof(SH2, rts_cache)); - emith_ctx_read(arg0, offsetof(SH2, pc)); emith_read_r_r_r_wb(arg3, arg1, arg2); emith_cmp_r_r(arg0, arg3); #if (DRC_DEBUG & 128) @@ -4462,11 +4473,11 @@ static void sh2_generate_utils(void) emith_move_r_r_ptr(arg0, CONTEXT_REG); emith_call_ctx(offsetof(SH2, irq_callback)); // vector = sh2->irq_callback(sh2, level); // obtain new PC - emith_lsl(arg0, RET_REG, 2); emith_ctx_read(arg1, SHR_VBR * 4); - emith_add_r_r(arg0, arg1); - tmp = emit_memhandler_read(2); - emith_ctx_write(tmp, SHR_PC * 4); + emith_add_r_r_r_lsl(arg0, arg1, RET_REG, 2); + emith_call(sh2_drc_read32); + if (arg0 != RET_REG) + emith_move_r_r(arg0, RET_REG); #if defined(__i386__) || defined(__x86_64__) emith_add_r_r_ptr_imm(xSP, xSP, sizeof(void *)); // fix stack #endif @@ -4480,6 +4491,7 @@ static void sh2_generate_utils(void) emith_move_r_r_ptr(CONTEXT_REG, arg0); // move ctx, arg0 emit_do_static_regs(0, arg2); emith_call(sh2_drc_test_irq); + emith_ctx_read(arg0, SHR_PC * 4); emith_jump(sh2_drc_dispatcher); emith_flush(); From 346153e08ed482c2b0694541b582f8674a2bf8af Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 28 May 2019 23:16:45 +0200 Subject: [PATCH 0201/1110] 32x DMA memory copy performance optimisation --- cpu/sh2/compiler.c | 9 ++--- pico/32x/memory.c | 85 ++++++++++++++++++++++++++++++++++++++++++---- pico/32x/sh2soc.c | 23 +++++++++++++ pico/pico_int.h | 1 + tools/mkoffsets.sh | 2 +- 5 files changed, 105 insertions(+), 15 deletions(-) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index f2a1f95b..2a147a15 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -2261,7 +2261,7 @@ static int emit_get_rom_data(SH2 *sh2, sh2_reg_e r, u32 offs, int size, u32 *val if (gconst_get(r, &a)) { a += offs; // check if rom is memory mapped (not bank switched), and address is in rom - if (dr_is_rom(a) && p32x_sh2_get_mem_ptr(a, &mask, sh2)) { + if (dr_is_rom(a) && p32x_sh2_get_mem_ptr(a, &mask, sh2) != (void *)-1) { switch (size & MF_SIZEMASK) { case 0: *val = (s8)p32x_sh2_read8(a, sh2s); break; // 8 case 1: *val = (s16)p32x_sh2_read16(a, sh2s); break; // 16 @@ -4896,12 +4896,7 @@ void sh2_drc_flush_all(void) void sh2_drc_mem_setup(SH2 *sh2) { - // fill the convenience pointers - sh2->p_bios = sh2->is_slave ? Pico32xMem->sh2_rom_s.w : Pico32xMem->sh2_rom_m.w; - sh2->p_da = sh2->data_array; - sh2->p_sdram = Pico32xMem->sdram; - sh2->p_rom = Pico.rom; - // sh2->p_dram filled in dram bank switching + // fill the DRC-only convenience pointers sh2->p_drcblk_da = Pico32xMem->drcblk_da[!!sh2->is_slave]; sh2->p_drcblk_ram = Pico32xMem->drcblk_ram; } diff --git a/pico/32x/memory.c b/pico/32x/memory.c index a1ef42c2..70287a2c 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -1855,17 +1855,15 @@ void *p32x_sh2_get_mem_ptr(u32 a, u32 *mask, SH2 *sh2) { const sh2_memmap *mm = sh2->read8_map; void *ret = (void *)-1; - u32 am; - mm += a >> SH2_READ_SHIFT; - am = a & ((1 << SH2_READ_SHIFT)-1); - if (!map_flag_set(mm->addr) && !(am & ~mm->mask)) { + mm += SH2MAP_ADDR2OFFS_R(a); + if (!map_flag_set(mm->addr)) { // directly mapped memory (SDRAM, ROM, data array) ret = (void *)(mm->addr << 1); *mask = mm->mask; } else if ((a & ~0x7ff) == 0) { // BIOS, has handler function since it shares its segment with I/O - ret = sh2->is_slave ? Pico32xMem->sh2_rom_s.w : Pico32xMem->sh2_rom_m.w; + ret = sh2->p_bios; *mask = 0x7ff; } else if ((a & 0xc6000000) == 0x02000000) { // banked ROM. Return bank address @@ -1877,6 +1875,75 @@ void *p32x_sh2_get_mem_ptr(u32 a, u32 *mask, SH2 *sh2) return ret; } +int p32x_sh2_memcpy(u32 dst, u32 src, int count, int size, SH2 *sh2) +{ + u32 mask; + void *ps, *pd; + int len, i; + + // check if src and dst points to memory (rom/sdram/dram/da) + if ((pd = p32x_sh2_get_mem_ptr(dst, &mask, sh2)) == (void *)-1) + return 0; + if ((ps = p32x_sh2_get_mem_ptr(src, &mask, sh2)) == (void *)-1) + return 0; + ps += src & mask; + len = count * size; + + // DRAM in byte access is always in overwrite mode + if (pd == sh2->p_dram && size == 1) + dst |= 0x20000; + + // align dst to halfword + if (dst & 1) { + p32x_sh2_write8(dst, *(u8 *)((uptr)ps ^ 1), sh2); + ps++, dst++, len --; + } + + // copy data + if ((uptr)ps & 1) { + // unaligned, use halfword copy mode to reduce memory bandwidth + u16 *sp = (u16 *)(ps - 1); + u16 dl, dh = *sp++; + for (i = 0; i < (len & ~1); i += 2, dst += 2, sp++) { + dl = dh, dh = *sp; + p32x_sh2_write16(dst, (dh >> 8) | (dl << 8), sh2); + } + if (len & 1) + p32x_sh2_write8(dst, dh, sh2); + } else { + // dst and src at least halfword aligned + u16 *sp = (u16 *)ps; + // align dst to word + if ((dst & 2) && len >= 2) { + p32x_sh2_write16(dst, *sp++, sh2); + dst += 2, len -= 2; + } + if ((uptr)sp & 2) { + // halfword copy, using word writes to reduce memory bandwidth + u16 dl, dh; + for (i = 0; i < (len & ~3); i += 4, dst += 4, sp += 2) { + dl = sp[0], dh = sp[1]; + p32x_sh2_write32(dst, (dl << 16) | dh, sh2); + } + } else { + // word copy + u32 d; + for (i = 0; i < (len & ~3); i += 4, dst += 4, sp += 2) { + d = *(u32 *)sp; + p32x_sh2_write32(dst, (d << 16) | (d >> 16), sh2); + } + } + if (len & 2) { + p32x_sh2_write16(dst, *sp++, sh2); + dst += 2; + } + if (len & 1) + p32x_sh2_write8(dst, *sp >> 8, sh2); + } + + return count; +} + // ----------------------------------------------------------------- static void z80_md_bank_write_32x(unsigned int a, unsigned char d) @@ -2107,8 +2174,12 @@ void Pico32xSwapDRAM(int b) ssh2_read16_map[0x04/2].addr = ssh2_read16_map[0x24/2].addr = ssh2_read32_map[0x04/2].addr = ssh2_read32_map[0x24/2].addr = MAP_MEMORY(Pico32xMem->dram[b]); - msh2.p_dram = ssh2.p_dram = Pico32xMem->dram[b]; // DRC conveniance ptr - msh2.p_rom = ssh2.p_rom = Pico.rom; + // convenience ptrs + msh2.p_sdram = ssh2.p_sdram = Pico32xMem->sdram; + msh2.p_dram = ssh2.p_dram = Pico32xMem->dram[b]; + msh2.p_rom = ssh2.p_rom = Pico.rom; + msh2.p_bios = Pico32xMem->sh2_rom_m.w; msh2.p_da = msh2.data_array; + ssh2.p_bios = Pico32xMem->sh2_rom_s.w; ssh2.p_da = ssh2.data_array; } static void bank_switch_rom_sh2(void) diff --git a/pico/32x/sh2soc.c b/pico/32x/sh2soc.c index dd61a93b..66bdc478 100644 --- a/pico/32x/sh2soc.c +++ b/pico/32x/sh2soc.c @@ -129,6 +129,24 @@ static void dmac_transfer_one(SH2 *sh2, struct dma_chan *chan) chan->sar += size; } +// optimization for copying around memory with SH2 DMA +static void dmac_memcpy(struct dma_chan *chan, SH2 *sh2) +{ + u32 size = (chan->chcr >> 10) & 3, up = chan->chcr & (1 << 14); + int count; + + if (!up || chan->tcr < 4) + return; + if (size == 3) size = 2; // 4-word xfer mode still counts in words + // XXX check TCR being a multiple of 4 in 4-word xfer mode? + // XXX check alignment of sar/dar, generating a bus error if unaligned? + count = p32x_sh2_memcpy(chan->dar, chan->sar, chan->tcr, 1 << size, sh2); + + chan->sar += count << size; + chan->dar += count << size; + chan->tcr -= count; +} + // DMA trigger by SH2 register write static void dmac_trigger(SH2 *sh2, struct dma_chan *chan) { @@ -139,6 +157,11 @@ static void dmac_trigger(SH2 *sh2, struct dma_chan *chan) if (chan->chcr & DMA_AR) { // auto-request transfer sh2->state |= SH2_STATE_SLEEP; + if ((((chan->chcr >> 12) ^ (chan->chcr >> 14)) & 3) == 0 && + (((chan->chcr >> 14) ^ (chan->chcr >> 15)) & 1) == 1) { + // SM == DM and either DM0 or DM1 are set. check for mem to mem copy + dmac_memcpy(chan, sh2); + } while ((int)chan->tcr > 0) dmac_transfer_one(sh2, chan); dmac_transfer_complete(sh2, chan); diff --git a/pico/pico_int.h b/pico/pico_int.h index 31fc702c..36b36144 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -937,6 +937,7 @@ unsigned int REGPARM(3) p32x_sh2_poll_memory16(unsigned int a, unsigned int d, S unsigned int REGPARM(3) p32x_sh2_poll_memory32(unsigned int a, unsigned int d, SH2 *sh2); void *p32x_sh2_get_mem_ptr(unsigned int a, unsigned int *mask, SH2 *sh2); void p32x_sh2_poll_event(SH2 *sh2, unsigned int flags, unsigned int m68k_cycles); +int p32x_sh2_memcpy(unsigned int dst, unsigned int src, int count, int size, SH2 *sh2); // 32x/draw.c void PicoDrawSetOutFormat32x(pdso_t which, int use_32x_line_mode); diff --git a/tools/mkoffsets.sh b/tools/mkoffsets.sh index 461fbfa7..a573f7a4 100755 --- a/tools/mkoffsets.sh +++ b/tools/mkoffsets.sh @@ -89,7 +89,7 @@ get_define OFS_PMEM32x_ Pico32xMem pal_native ; echo "$line" >>$fn get_define OFS_SH2_ SH2_ is_slave ; echo "$line" >>$fn get_define OFS_SH2_ SH2_ p_bios ; echo "$line" >>$fn get_define OFS_SH2_ SH2_ p_da ; echo "$line" >>$fn -get_define OFS_SH2_ SH2_ p_sdram ; echo "$line" >>$fn +get_define OFS_SH2_ SH2_ p_sdram ; echo "$line" >>$fn get_define OFS_SH2_ SH2_ p_rom ; echo "$line" >>$fn get_define OFS_SH2_ SH2_ p_dram ; echo "$line" >>$fn get_define OFS_SH2_ SH2_ p_drcblk_da ; echo "$line" >>$fn From ee46642395c15c0aa5a3b33b1d9f2aac82e95c54 Mon Sep 17 00:00:00 2001 From: kub Date: Mon, 24 Jun 2019 20:09:15 +0200 Subject: [PATCH 0202/1110] sh2 drc, x86 code emitter: use x86-64 registers R8-R15 --- cpu/drc/emit_x86.c | 351 ++++++++++++++++++++++++++++----------------- cpu/sh2/compiler.c | 64 +++++---- 2 files changed, 254 insertions(+), 161 deletions(-) diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index f71c5d42..652b4989 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -13,9 +13,9 @@ */ #include -enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; +enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common + xR8, xR9, xR10, xR11, xR12, xR13, xR14, xR15 }; // x86-64 only -#define HOST_REGS 8 #define CONTEXT_REG xBP #define RET_REG xAX @@ -65,7 +65,8 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define EMIT_OP(op) do { \ COUNT_OP; \ - EMIT(op, u8); \ + if ((op) > 0xff) EMIT((op) >> 8, u8); \ + EMIT((u8)(op), u8); \ } while (0) #define EMIT_MODRM(mod, r, rm) do { \ @@ -110,50 +111,70 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; EMIT_PTR(ptr + 1, (tcache_ptr - (ptr+2)), u8) // _r_r -#define emith_move_r_r(dst, src) \ - EMIT_OP_MODRM(0x8b, 3, dst, src) +#define emith_move_r_r(dst, src) do {\ + EMIT_REX_IF(0, dst, src); \ + EMIT_OP_MODRM64(0x8b, 3, dst, src); \ +} while (0) #define emith_move_r_r_ptr(dst, src) do { \ EMIT_REX_IF(1, dst, src); \ EMIT_OP_MODRM64(0x8b, 3, dst, src); \ } while (0) -#define emith_add_r_r(d, s) \ - EMIT_OP_MODRM(0x01, 3, s, d) +#define emith_add_r_r(d, s) do { \ + EMIT_REX_IF(0, s, d); \ + EMIT_OP_MODRM64(0x01, 3, s, d); \ +} while (0) #define emith_add_r_r_ptr(d, s) do { \ EMIT_REX_IF(1, s, d); \ EMIT_OP_MODRM64(0x01, 3, s, d); \ } while (0) -#define emith_sub_r_r(d, s) \ - EMIT_OP_MODRM(0x29, 3, s, d) +#define emith_sub_r_r(d, s) do {\ + EMIT_REX_IF(0, s, d); \ + EMIT_OP_MODRM64(0x29, 3, s, d); \ +} while (0) -#define emith_adc_r_r(d, s) \ - EMIT_OP_MODRM(0x11, 3, s, d) +#define emith_adc_r_r(d, s) do { \ + EMIT_REX_IF(0, s, d); \ + EMIT_OP_MODRM64(0x11, 3, s, d); \ +} while (0) -#define emith_sbc_r_r(d, s) \ - EMIT_OP_MODRM(0x19, 3, s, d) /* SBB */ +#define emith_sbc_r_r(d, s) do { \ + EMIT_REX_IF(0, s, d); \ + EMIT_OP_MODRM64(0x19, 3, s, d); /* SBB */ \ +} while (0) -#define emith_or_r_r(d, s) \ - EMIT_OP_MODRM(0x09, 3, s, d) +#define emith_or_r_r(d, s) do { \ + EMIT_REX_IF(0, s, d); \ + EMIT_OP_MODRM64(0x09, 3, s, d); \ +} while (0) -#define emith_and_r_r(d, s) \ - EMIT_OP_MODRM(0x21, 3, s, d) +#define emith_and_r_r(d, s) do { \ + EMIT_REX_IF(0, s, d); \ + EMIT_OP_MODRM64(0x21, 3, s, d); \ +} while (0) -#define emith_eor_r_r(d, s) \ - EMIT_OP_MODRM(0x31, 3, s, d) /* XOR */ +#define emith_eor_r_r(d, s) do { \ + EMIT_REX_IF(0, s, d); \ + EMIT_OP_MODRM64(0x31, 3, s, d); /* XOR */ \ +} while (0) -#define emith_tst_r_r(d, s) \ - EMIT_OP_MODRM(0x85, 3, s, d) /* TEST */ +#define emith_tst_r_r(d, s) do { \ + EMIT_REX_IF(0, s, d); \ + EMIT_OP_MODRM64(0x85, 3, s, d); /* TEST */ \ +} while (0) #define emith_tst_r_r_ptr(d, s) do { \ EMIT_REX_IF(1, s, d); \ EMIT_OP_MODRM64(0x85, 3, s, d); /* TEST */ \ } while (0) -#define emith_cmp_r_r(d, s) \ - EMIT_OP_MODRM(0x39, 3, s, d) +#define emith_cmp_r_r(d, s) do { \ + EMIT_REX_IF(0, s, d); \ + EMIT_OP_MODRM64(0x39, 3, s, d); \ +} while (0) // fake teq - test equivalence - get_flags(d ^ s) #define emith_teq_r_r(d, s) do { \ @@ -165,7 +186,8 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_mvn_r_r(d, s) do { \ if (d != s) \ emith_move_r_r(d, s); \ - EMIT_OP_MODRM(0xf7, 3, 2, d); /* NOT d */ \ + EMIT_REX_IF(0, 0, d); \ + EMIT_OP_MODRM64(0xf7, 3, 2, d); /* NOT d */ \ } while (0) #define emith_negc_r_r(d, s) do { \ @@ -179,7 +201,8 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_neg_r_r(d, s) do { \ if (d != s) \ emith_move_r_r(d, s); \ - EMIT_OP_MODRM(0xf7, 3, 3, d); /* NEG d */ \ + EMIT_REX_IF(0, 0, d); \ + EMIT_OP_MODRM64(0xf7, 3, 3, d); /* NEG d */ \ } while (0) // _r_r_r @@ -325,17 +348,18 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; // _r_imm #define emith_move_r_imm(r, imm) do { \ - EMIT_OP(0xb8 + (r)); \ + EMIT_REX_IF(0, 0, r); \ + EMIT_OP(0xb8 + ((r)&7)); \ EMIT(imm, u32); \ } while (0) #define emith_move_r_ptr_imm(r, imm) do { \ - if ((uint64_t)(imm) <= UINT32_MAX) \ + if ((uintptr_t)(imm) <= UINT32_MAX) \ emith_move_r_imm(r, (uintptr_t)(imm)); \ else { \ EMIT_REX_IF(1, 0, r); \ - EMIT_OP(0xb8 + (r)); \ - EMIT((uint64_t)(imm), uint64_t); \ + EMIT_OP(0xb8 + ((r)&7)); \ + EMIT((uintptr_t)(imm), uint64_t); \ } \ } while (0) @@ -343,7 +367,8 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; emith_move_r_imm(r, (u32)(signed int)(signed char)(imm)) #define emith_arith_r_imm(op, r, imm) do { \ - EMIT_OP_MODRM(0x81, 3, op, r); \ + EMIT_REX_IF(0, 0, r); \ + EMIT_OP_MODRM64(0x81, 3, op, r); \ EMIT(imm, u32); \ } while (0) @@ -372,7 +397,8 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; emith_arith_r_imm(7, r, imm) #define emith_tst_r_imm(r, imm) do { \ - EMIT_OP_MODRM(0xf7, 3, 0, r); \ + EMIT_REX_IF(0, 0, r); \ + EMIT_OP_MODRM64(0xf7, 3, 0, r); \ EMIT(imm, u32); \ } while (0) @@ -442,22 +468,14 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; // _r_r_imm - use lea #define emith_add_r_r_imm(d, s, imm) do { \ - assert(s != xSP); \ - EMIT_OP_MODRM(0x8d, 2, d, s); /* lea */ \ + EMIT_REX_IF(0, d, s); \ + emith_deref_modrm(0x8d, 2, d, s); \ EMIT(imm, s32); \ } while (0) #define emith_add_r_r_ptr_imm(d, s, imm) do { \ - if ((s) != xSP) { \ - EMIT_REX_IF(1, d, s); \ - EMIT_OP_MODRM64(0x8d, 2, d, s); /* lea */ \ - } \ - else { \ - if (d != s) \ - emith_move_r_r_ptr(d, s); \ - EMIT_REX_IF(1, 0, d); \ - EMIT_OP_MODRM64(0x81, 3, 0, d); /* add */ \ - } \ + EMIT_REX_IF(1, d, s); \ + emith_deref_modrm(0x8d, 2, d, s); \ EMIT(imm, s32); \ } while (0) @@ -493,7 +511,8 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_shift(op, d, s, cnt) do { \ if (d != s) \ emith_move_r_r(d, s); \ - EMIT_OP_MODRM(0xc1, 3, op, d); \ + EMIT_REX_IF(0, 0, d); \ + EMIT_OP_MODRM64(0xc1, 3, op, d); \ EMIT(cnt, u8); \ } while (0) @@ -512,26 +531,36 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_ror(d, s, cnt) \ emith_shift(1, d, s, cnt) -#define emith_rolc(r) \ - EMIT_OP_MODRM(0xd1, 3, 2, r) +#define emith_rolc(r) do { \ + EMIT_REX_IF(0, 0, r); \ + EMIT_OP_MODRM64(0xd1, 3, 2, r); \ +} while (0) -#define emith_rorc(r) \ - EMIT_OP_MODRM(0xd1, 3, 3, r) +#define emith_rorc(r) do { \ + EMIT_REX_IF(0, 0, r); \ + EMIT_OP_MODRM64(0xd1, 3, 3, r); \ +} while (0) // misc -#define emith_push(r) \ - EMIT_OP(0x50 + (r)) +#define emith_push(r) do { \ + EMIT_REX_IF(0, 0, r); \ + EMIT_OP(0x50 + ((r)&7)); \ +} while (0) #define emith_push_imm(imm) do { \ EMIT_OP(0x68); \ EMIT(imm, u32); \ } while (0) -#define emith_pop(r) \ - EMIT_OP(0x58 + (r)) +#define emith_pop(r) do { \ + EMIT_REX_IF(0, 0, r); \ + EMIT_OP(0x58 + ((r)&7)); \ +} while (0) -#define emith_neg_r(r) \ - EMIT_OP_MODRM(0xf7, 3, 3, r) +#define emith_neg_r(r) do { \ + EMIT_REX_IF(0, 0, r); \ + EMIT_OP_MODRM64(0xf7, 3, 3, r); \ +} while (0) #define emith_clear_msb(d, s, count) do { \ u32 t = (u32)-1; \ @@ -553,8 +582,8 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_setc(r) do { \ assert(is_abcdx(r)); \ - EMIT_OP(0x0f); \ - EMIT_OP_MODRM(0x92, 3, 0, r); /* SETC r */ \ + EMIT_REX_IF(0, 0, r); \ + EMIT_OP_MODRM64(0x0f92, 3, 0, r); /* SETC r */ \ } while (0) // XXX: stupid mess @@ -572,9 +601,12 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; emith_move_r_r(xAX, s1); \ rmr = s2; \ } \ - EMIT_OP_MODRM(0xf7, 3, op, rmr); /* xMUL rmr */ \ - if (dlo != xAX) \ - EMIT_OP(0x90 + (dlo)); /* XCHG eax, dlo */ \ + EMIT_REX_IF(0, 0, rmr); \ + EMIT_OP_MODRM64(0xf7, 3, op, rmr); /* xMUL rmr */ \ + if (dlo != xAX) { \ + EMIT_REX_IF(0, 0, dlo); \ + EMIT_OP(0x90 + ((dlo)&7)); /* XCHG eax, dlo */ \ + } \ if (dhi != xDX && dhi != -1 && !(dhi == xAX && dlo == xDX)) \ emith_move_r_r(dhi, (dlo == xDX ? xAX : xDX)); \ if (dlo != xDX && dhi != xDX) \ @@ -589,19 +621,30 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_mul_s64(dlo, dhi, s1, s2) \ emith_mul_(5, dlo, dhi, s1, s2) /* IMUL */ -#define emith_mul(d, s1, s2) \ - emith_mul_(4, d, -1, s1, s2) +#define emith_mul(d, s1, s2) do { \ + if (d == s1) { \ + EMIT_REX_IF(0, d, s2); \ + EMIT_OP_MODRM64(0x0faf, 3, d, s2); \ + } else if (d == s2) { \ + EMIT_REX_IF(0, d, s1); \ + EMIT_OP_MODRM64(0x0faf, 3, d, s1); \ + } else { \ + emith_move_r_r(d, s1); \ + EMIT_REX_IF(0, d, s2); \ + EMIT_OP_MODRM64(0x0faf, 3, d, s2); \ + } \ +} while (0) // (dlo,dhi) += signed(s1) * signed(s2) #define emith_mula_s64(dlo, dhi, s1, s2) do { \ emith_push(dhi); \ emith_push(dlo); \ emith_mul_(5, dlo, dhi, s1, s2); \ - EMIT_OP_MODRM(0x03, 0, dlo, 4); \ - EMIT_SIB(0, 4, 4); /* add dlo, [xsp] */ \ - EMIT_OP_MODRM(0x13, 1, dhi, 4); \ - EMIT_SIB(0, 4, 4); \ - EMIT(sizeof(void *), u8); /* adc dhi, [xsp+{4,8}] */ \ + EMIT_REX_IF(0, dlo, xSP); \ + emith_deref_modrm(0x03, 0, dlo, xSP); /* add dlo, [xsp] */ \ + EMIT_REX_IF(0, dhi, xSP); \ + emith_deref_modrm(0x13, 1, dhi, xSP); /* adc dhi, [xsp+{4,8}] */ \ + EMIT(sizeof(void *), u8); \ emith_add_r_r_ptr_imm(xSP, xSP, sizeof(void *) * 2); \ } while (0) @@ -631,100 +674,114 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_rolcf emith_rolc #define emith_rorcf emith_rorc +#define emith_deref_modrm(op, m, r, rs) do { \ + if (((rs) & 7) == 5 && m == 0) { /* xBP,xR13 not in mod 0, use mod 1 */\ + EMIT_OP_MODRM64(op, 1, r, rs); \ + EMIT(0, u8); \ + } else if (((rs) & 7) == 4) { /* xSP,xR12 must use SIB */ \ + EMIT_OP_MODRM64(op, m, r, 4); \ + EMIT_SIB64(0, 4, rs); \ + } else \ + EMIT_OP_MODRM64(op, m, r, rs); \ +} while (0) + #define emith_deref_op(op, r, rs, offs) do { \ /* mov r <-> [ebp+#offs] */ \ - if (abs(offs) >= 0x80) { \ - EMIT_OP_MODRM64(op, 2, r, rs); \ + if ((offs) == 0) { \ + emith_deref_modrm(op, 0, r, rs); \ + } else if (abs(offs) >= 0x80) { \ + emith_deref_modrm(op, 2, r, rs); \ EMIT(offs, u32); \ } else { \ - EMIT_OP_MODRM64(op, 1, r, rs); \ + emith_deref_modrm(op, 1, r, rs); \ EMIT((u8)offs, u8); \ } \ } while (0) -#define is_abcdx(r) (xAX <= (r) && (r) <= xDX) +#define is_abcdx(r) !((r) & ~0x3) -#define emith_read_r_r_offs(r, rs, offs) \ - emith_deref_op(0x8b, r, rs, offs) -#define emith_read_r_r_offs_ptr(r, rs, offs) \ +#define emith_read_r_r_offs(r, rs, offs) do { \ + EMIT_REX_IF(0, r, rs); \ + emith_deref_op(0x8b, r, rs, offs); \ +} while (0) +#define emith_read_r_r_offs_ptr(r, rs, offs) do { \ EMIT_REX_IF(1, r, rs); \ - emith_deref_op(0x8b, r, rs, offs) + emith_deref_op(0x8b, r, rs, offs); \ +} while (0) -#define emith_write_r_r_offs(r, rs, offs) \ - emith_deref_op(0x89, r, rs, offs) -#define emith_write_r_r_offs_ptr(r, rs, offs) \ +#define emith_write_r_r_offs(r, rs, offs) do { \ + EMIT_REX_IF(0, r, rs); \ + emith_deref_op(0x89, r, rs, offs); \ +} while (0) +#define emith_write_r_r_offs_ptr(r, rs, offs) do { \ EMIT_REX_IF(1, r, rs); \ - emith_deref_op(0x89, r, rs, offs) + emith_deref_op(0x89, r, rs, offs); \ +} while (0) #define emith_read8_r_r_offs(r, rs, offs) do { \ - EMIT(0x0f, u8); \ - emith_deref_op(0xb6, r, rs, offs); \ + EMIT_REX_IF(0, r, rs); \ + emith_deref_op(0x0fb6, r, rs, offs); \ } while (0) #define emith_read8s_r_r_offs(r, rs, offs) do { \ - EMIT(0x0f, u8); \ - emith_deref_op(0xbe, r, rs, offs); \ + EMIT_REX_IF(0, r, rs); \ + emith_deref_op(0x0fbe, r, rs, offs); \ } while (0) -// note: don't use prefixes on this #define emith_write8_r_r_offs(r, rs, offs) do {\ - int r_ = r; \ - if (!is_abcdx(r)) { \ - r_ = rcache_get_tmp(); \ - emith_move_r_r(r_, r); \ - } \ - emith_deref_op(0x88, r_, rs, offs); \ - if ((r) != r_) \ - rcache_free_tmp(r_); \ + EMIT_REX_IF(0, r, rs); \ + emith_deref_op(0x88, r, rs, offs); \ } while (0) #define emith_read16_r_r_offs(r, rs, offs) do { \ - EMIT(0x0f, u8); \ - emith_deref_op(0xb7, r, rs, offs); \ + EMIT_REX_IF(0, r, rs); \ + emith_deref_op(0x0fb7, r, rs, offs); \ } while (0) #define emith_read16s_r_r_offs(r, rs, offs) do { \ - EMIT(0x0f, u8); \ - emith_deref_op(0xbf, r, rs, offs); \ + EMIT_REX_IF(0, r, rs); \ + emith_deref_op(0x0fbf, r, rs, offs); \ } while (0) #define emith_write16_r_r_offs(r, rs, offs) do { \ - EMIT(0x66, u8); \ - emith_write_r_r_offs(r, rs, offs); \ + EMIT(0x66, u8); /* Intel SDM Vol 2a: REX must be closest to opcode */ \ + EMIT_REX_IF(0, r, rs); \ + emith_deref_op(0x89, r, rs, offs); \ } while (0) #define emith_read8_r_r_r(r, rs, rm) do { \ - EMIT(0x0f, u8); \ - EMIT_OP_MODRM(0xb6, 0, r, 4); \ - EMIT_SIB(0, rs, rm); /* mov r, [rm + rs * 1] */ \ + EMIT_XREX_IF(0, r, rm, rs); \ + EMIT_OP_MODRM64(0x0fb6, 0, r, 4); \ + EMIT_SIB64(0, rs, rm); /* mov r, [rm + rs * 1] */ \ } while (0) #define emith_read8s_r_r_r(r, rs, rm) do { \ - EMIT(0x0f, u8); \ - EMIT_OP_MODRM(0xbe, 0, r, 4); \ - EMIT_SIB(0, rs, rm); /* mov r, [rm + rs * 1] */ \ + EMIT_XREX_IF(0, r, rm, rs); \ + EMIT_OP_MODRM64(0x0fbe, 0, r, 4); \ + EMIT_SIB64(0, rs, rm); /* mov r, [rm + rs * 1] */ \ } while (0) #define emith_read16_r_r_r(r, rs, rm) do { \ - EMIT(0x0f, u8); \ - EMIT_OP_MODRM(0xb7, 0, r, 4); \ - EMIT_SIB(0, rs, rm); /* mov r, [rm + rs * 1] */ \ + EMIT_XREX_IF(0, r, rm, rs); \ + EMIT_OP_MODRM64(0x0fb7, 0, r, 4); \ + EMIT_SIB64(0, rs, rm); /* mov r, [rm + rs * 1] */ \ } while (0) #define emith_read16s_r_r_r(r, rs, rm) do { \ - EMIT(0x0f, u8); \ - EMIT_OP_MODRM(0xbf, 0, r, 4); \ - EMIT_SIB(0, rs, rm); /* mov r, [rm + rs * 1] */ \ + EMIT_XREX_IF(0, r, rm, rs); \ + EMIT_OP_MODRM64(0x0fbf, 0, r, 4); \ + EMIT_SIB64(0, rs, rm); /* mov r, [rm + rs * 1] */ \ } while (0) #define emith_read_r_r_r(r, rs, rm) do { \ - EMIT_OP_MODRM(0x8b, 0, r, 4); \ - EMIT_SIB(0, rs, rm); /* mov r, [rm + rs * 1] */ \ + EMIT_XREX_IF(0, r, rm, rs); \ + EMIT_OP_MODRM64(0x8b, 0, r, 4); \ + EMIT_SIB64(0, rs, rm); /* mov r, [rm + rs * 1] */ \ } while (0) #define emith_read_r_r_r_ptr(r, rs, rm) do { \ - EMIT_REX_IF(1, r, rs); \ + EMIT_XREX_IF(1, r, rm, rs); \ EMIT_OP_MODRM64(0x8b, 0, r, 4); \ - EMIT_SIB(0, rs, rm); /* mov r, [rm + rs * 1] */ \ + EMIT_SIB64(0, rs, rm); /* mov r, [rm + rs * 1] */ \ } while (0) #define emith_read_r_r_r_wb(r, rs, rm) do { \ emith_read_r_r_r(r, rs, rm); \ @@ -736,13 +793,14 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; } while (0) #define emith_write_r_r_r(r, rs, rm) do { \ - EMIT_OP_MODRM(0x89, 0, r, 4); \ - EMIT_SIB(0, rs, rm); /* mov [rm + rs * 1], r */ \ + EMIT_XREX_IF(0, r, rm, rs); \ + EMIT_OP_MODRM64(0x89, 0, r, 4); \ + EMIT_SIB64(0, rs, rm); /* mov [rm + rs * 1], r */ \ } while (0) #define emith_write_r_r_r_ptr(r, rs, rm) do { \ - EMIT_REX_IF(1, r, rs); \ + EMIT_XREX_IF(1, r, rm, rs); \ EMIT_OP_MODRM64(0x89, 0, r, 4); \ - EMIT_SIB(0, rs, rm); /* mov [rm + rs * 1], r */ \ + EMIT_SIB64(0, rs, rm); /* mov [rm + rs * 1], r */ \ } while (0) #define emith_write_r_r_r_wb(r, rs, rm) do { \ emith_write_r_r_r(r, rs, rm); \ @@ -796,8 +854,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_jump_cond(cond, ptr) do { \ u32 disp = (u8 *)(ptr) - ((u8 *)tcache_ptr + 6); \ - EMIT(0x0f, u8); \ - EMIT_OP(0x80 | (cond)); \ + EMIT_OP(0x0f80 | (cond)); \ EMIT(disp, u32); \ } while (0) @@ -924,15 +981,20 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #ifdef __x86_64__ +#define HOST_REGS 16 #define PTR_SCALE 3 #define NA_TMP_REG xAX // non-arg tmp from reg_temp[] -#define EMIT_REX_IF(w, r, rm) do { \ - int r_ = (r) > 7 ? 1 : 0; \ - int rm_ = (rm) > 7 ? 1 : 0; \ - if ((w) | r_ | rm_) \ - EMIT_REX(1, r_, 0, rm_); \ +#define EMIT_XREX_IF(w, r, rm, rs) do { \ + int xr_ = (r) > 7 ? 1 : 0; \ + int xb_ = (rm) > 7 ? 1 : 0; \ + int xx_ = (rs) > 7 ? 1 : 0; \ + if ((w) | xr_ | xx_ | xb_) \ + EMIT_REX(w, xr_, xx_, xb_); \ } while (0) + +#define EMIT_REX_IF(w, r, rm) \ + EMIT_XREX_IF(w, r, rm, 0) #ifndef _WIN32 @@ -947,11 +1009,19 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_sh2_drc_entry() do { \ emith_push(xBX); \ emith_push(xBP); \ + emith_push(xR12); \ + emith_push(xR13); \ + emith_push(xR14); \ + emith_push(xR15); \ emith_push(xSI); /* to align */ \ } while (0) #define emith_sh2_drc_exit() do { \ emith_pop(xSI); \ + emith_pop(xR15); \ + emith_pop(xR14); \ + emith_pop(xR13); \ + emith_pop(xR12); \ emith_pop(xBP); \ emith_pop(xBX); \ emith_ret(); \ @@ -963,22 +1033,30 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; switch (arg) { \ case 0: rd = xCX; break; \ case 1: rd = xDX; break; \ - case 2: rd = 8; break; \ - default: rd = 9; break; \ + case 2: rd = xR8; break; \ + default: rd = xR9; break; \ } #define emith_sh2_drc_entry() do { \ emith_push(xBX); \ emith_push(xBP); \ + emith_push(xR12); \ + emith_push(xR13); \ + emith_push(xR14); \ + emith_push(xR15); \ emith_push(xSI); \ emith_push(xDI); \ - emith_add_r_r_ptr_imm(xSP, xSP, -8*5); \ + emith_add_r_r_ptr_imm(xSP, xSP, -8*5); /* align + ABI param area */ \ } while (0) #define emith_sh2_drc_exit() do { \ emith_add_r_r_ptr_imm(xSP, xSP, 8*5); \ emith_pop(xDI); \ emith_pop(xSI); \ + emith_pop(xR15); \ + emith_pop(xR14); \ + emith_pop(xR13); \ + emith_pop(xR12); \ emith_pop(xBP); \ emith_pop(xBX); \ emith_ret(); \ @@ -988,6 +1066,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #else // !__x86_64__ +#define HOST_REGS 8 #define PTR_SCALE 2 #define NA_TMP_REG xBX // non-arg tmp from reg_temp[] @@ -995,6 +1074,11 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; assert((u32)(r) < 8u); \ assert((u32)(rm) < 8u); \ } while (0) +#define EMIT_XREX_IF(w, r, rs, rm) do { \ + assert((u32)(r) < 8u); \ + assert((u32)(rs) < 8u); \ + assert((u32)(rm) < 8u); \ +} while (0) #define host_arg2reg(rd, arg) \ switch (arg) { \ @@ -1039,15 +1123,16 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_sh2_rcall(a, tab, func, mask) do { \ emith_lsr(mask, a, SH2_READ_SHIFT); \ - EMIT_REX_IF(1, mask, tab); \ + EMIT_XREX_IF(1, tab, tab, mask); \ EMIT_OP_MODRM64(0x8d, 0, tab, 4); \ EMIT_SIB64(PTR_SCALE, mask, tab); /* lea tab, [tab + mask * {4,8}] */ \ - EMIT_REX_IF(1, mask, tab); \ + EMIT_XREX_IF(1, tab, tab, mask); \ EMIT_OP_MODRM64(0x8d, 0, tab, 4); \ EMIT_SIB64(PTR_SCALE, mask, tab); /* lea tab, [tab + mask * {4,8}] */ \ - EMIT_REX_IF(1, func, tab); \ - EMIT_OP_MODRM64(0x8b, 0, func, tab); /* mov func, [tab] */ \ - EMIT_OP_MODRM64(0x8b, 1, mask, tab); \ + EMIT_REX_IF(1, func, tab); \ + emith_deref_modrm(0x8b, 0, func, tab); /* mov func, [tab] */ \ + EMIT_REX_IF(0, mask, tab); \ + emith_deref_modrm(0x8b, 1, mask, tab); \ EMIT(1 << PTR_SCALE, u8); /* mov mask, [tab + {4,8}] */ \ emith_add_r_r_ptr(func, func); \ } while (0) @@ -1056,7 +1141,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; int arg2_; \ host_arg2reg(arg2_, 2); \ emith_lsr(func, a, SH2_WRITE_SHIFT); /* tmp = a >> WRT_SHIFT */ \ - EMIT_REX_IF(1, func, tab); \ + EMIT_XREX_IF(1, func, tab, func); \ EMIT_OP_MODRM64(0x8b, 0, func, 4); \ EMIT_SIB64(PTR_SCALE, func, tab); /* mov tmp, [tab + tmp * {4,8}] */ \ emith_move_r_r_ptr(arg2_, CONTEXT_REG); \ diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 2a147a15..9932ce6f 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -1,6 +1,7 @@ /* * SH2 recompiler * (C) notaz, 2009,2010,2013 + * (C) kub, 2018,2019 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. @@ -430,13 +431,16 @@ typedef struct { } guest_reg_t; -// note: cache_regs[] must have at least the amount of -// HRF_REG registers used by handlers in worst case (currently 4) +// Note: cache_regs[] must have at least the amount of REG and TEMP registers +// used by handlers in worst case (currently 4). +// Register assignment goes by ABI convention. Caller save registers are TEMP, +// the others are either static or REG. SR must be static, R0 very recommended. +// TEMP registers first, REG last. alloc/evict algorithm depends on this. +// The 1st TEMP must not be RET_REG on x86 (it uses temps for some insns). +// XXX shouldn't this be somehow defined in the code emitters? #ifdef __arm__ #include "../drc/emit_arm.c" -// register assigment goes by ABI convention. All caller save registers are TEMP -// the others are either static or REG. SR must be static, R0 very recommended static guest_reg_t guest_regs[] = { // SHR_R0 .. SHR_SP #ifndef __MACH__ // no r9.. @@ -453,20 +457,21 @@ static guest_reg_t guest_regs[] = { { 0 } , { 0 } , { 0 } , { 0 } , }; -// NB first TEMP, then REG. alloc/evict algorithm depends on this +// OABI/EABI: params: r0-r3, return: r0-r1, temp: r12,r14, saved: r4-r8,r10,r11 +// SP,PC: r13,r15 must not be used. saved: r9 (for platform use, e.g. on OSx) static cache_reg_t cache_regs[] = { - { 12, HRF_TEMP }, + { 12, HRF_TEMP }, // temps { 14, HRF_TEMP }, - { 0, HRF_TEMP }, - { 1, HRF_TEMP }, + { 3, HRF_TEMP }, // params { 2, HRF_TEMP }, - { 3, HRF_TEMP }, - { 8, HRF_LOCKED }, + { 1, HRF_TEMP }, + { 0, HRF_TEMP }, // RET_REG + { 8, HRF_LOCKED }, // statics #ifndef __MACH__ // no r9.. { 9, HRF_LOCKED }, #endif { 10, HRF_LOCKED }, - { 4, HRF_REG }, + { 4, HRF_REG }, // other regs { 5, HRF_REG }, { 6, HRF_REG }, { 7, HRF_REG }, @@ -489,11 +494,11 @@ static guest_reg_t guest_regs[] = { // ax, cx, dx are usually temporaries by convention static cache_reg_t cache_regs[] = { - { xBX, HRF_REG|HRF_TEMP }, + { xBX, HRF_REG|HRF_TEMP }, // params { xCX, HRF_REG|HRF_TEMP }, { xDX, HRF_REG|HRF_TEMP }, - { xAX, HRF_REG|HRF_TEMP }, - { xSI, HRF_LOCKED }, + { xAX, HRF_REG|HRF_TEMP }, // return value + { xSI, HRF_LOCKED }, // statics { xDI, HRF_LOCKED }, }; @@ -502,11 +507,7 @@ static cache_reg_t cache_regs[] = { static guest_reg_t guest_regs[] = { // SHR_R0 .. SHR_SP -#ifndef _WIN32 - { 0 } , { 0 } , { 0 } , { 0 } , -#else - {GRF_STATIC, xDI}, { 0 } , { 0 } , { 0 } , -#endif + {GRF_STATIC,xR12}, { 0 } , { 0 } , { 0 } , { 0 } , { 0 } , { 0 } , { 0 } , { 0 } , { 0 } , { 0 } , { 0 } , { 0 } , { 0 } , { 0 } , { 0 } , @@ -516,18 +517,25 @@ static guest_reg_t guest_regs[] = { { 0 } , { 0 } , { 0 } , { 0 } , }; -// ax, cx, dx are usually temporaries by convention +// M$/SystemV ABI conventions: +// rbx,rbp,r12-r15 are preserved, rcx,rdx,rax,r8,r9,r10,r11 are temporaries +// rsi,rdi are preserved in M$ ABI, temporary in SystemV ABI +// parameters in rcx,rdx,r8,r9, SystemV ABI additionally uses rsi,rdi static cache_reg_t cache_regs[] = { - { xCX, HRF_REG|HRF_TEMP }, - { xDX, HRF_REG|HRF_TEMP }, - { xAX, HRF_REG|HRF_TEMP }, + { xR10,HRF_TEMP }, // temps + { xR11,HRF_TEMP }, + { xAX, HRF_TEMP }, // RET_REG + { xR8, HRF_TEMP }, // params + { xR9, HRF_TEMP }, + { xCX, HRF_TEMP }, + { xDX, HRF_TEMP }, { xSI, HRF_REG|HRF_TEMP }, -#ifndef _WIN32 { xDI, HRF_REG|HRF_TEMP }, -#else - { xDI, HRF_LOCKED }, -#endif - { xBX, HRF_LOCKED }, + { xBX, HRF_LOCKED }, // statics + { xR12,HRF_LOCKED }, + { xR13,HRF_REG }, // other regs + { xR14,HRF_REG }, + { xR15,HRF_REG }, }; #else From 1891e649e58c7a4499649cc29aded5da7713d4c2 Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 25 Jun 2019 20:15:48 +0200 Subject: [PATCH 0203/1110] 32X: memory access and polling bug fixes --- pico/32x/memory.c | 36 ++++++++++++++++++++++-------------- pico/32x/memory_arm.S | 14 ++++++-------- pico/32x/sh2soc.c | 6 +++++- 3 files changed, 33 insertions(+), 23 deletions(-) diff --git a/pico/32x/memory.c b/pico/32x/memory.c index 70287a2c..7148d41c 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -74,7 +74,7 @@ static int m68k_poll_detect(u32 a, u32 cycles, u32 flags) if (match && cycles - m68k_poll.cycles <= 64 && !SekNotPolling) { // detect split 32bit access by same cycle count, and ignore those - if (cycles != m68k_poll.cycles && ++m68k_poll.cnt > POLL_THRESHOLD) { + if (cycles != m68k_poll.cycles && ++m68k_poll.cnt >= POLL_THRESHOLD) { if (!(Pico32x.emu_flags & flags)) { elprintf(EL_32X, "m68k poll addr %08x, cyc %u", a, cycles - m68k_poll.cycles); @@ -118,7 +118,7 @@ static void NOINLINE sh2_poll_detect(u32 a, SH2 *sh2, u32 flags, int maxcnt) // by checking address (max 2 bytes away) and cycles (max 2 cycles later). // no polling if more than 20 cycles have passed since last detect call. if (a - sh2->poll_addr <= 2 && CYCLES_GE(sh2->poll_cycles+20, cycles_done)) { - if (CYCLES_GT(cycles_done,sh2->poll_cycles+2) && ++sh2->poll_cnt > maxcnt) { + if (CYCLES_GT(cycles_done,sh2->poll_cycles+2) && ++sh2->poll_cnt >= maxcnt) { if (!(sh2->state & flags)) elprintf_sh2(sh2, EL_32X, "state: %02x->%02x", sh2->state, sh2->state | flags); @@ -131,6 +131,8 @@ static void NOINLINE sh2_poll_detect(u32 a, SH2 *sh2, u32 flags, int maxcnt) if ((a & 0xc6000000) == 0x06000000) { unsigned char *p = sh2->p_drcblk_ram; p[(a & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT] |= 0x80; + // mark next word too to enable poll fifo for 32bit access + p[((a+2) & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT] |= 0x80; } #endif } @@ -148,7 +150,7 @@ void NOINLINE p32x_sh2_poll_event(SH2 *sh2, u32 flags, u32 m68k_cycles) elprintf_sh2(sh2, EL_32X, "state: %02x->%02x", sh2->state, sh2->state & ~flags); - if (sh2->m68krcycles_done < m68k_cycles) + if (sh2->m68krcycles_done < m68k_cycles && !(sh2->state & SH2_STATE_RUN)) sh2->m68krcycles_done = m68k_cycles; pevt_log_sh2_o(sh2, EVT_POLL_END); @@ -174,12 +176,12 @@ static void sh2s_sync_on_read(SH2 *sh2) // This is used to correctly deliver syncronisation data to the 3 cpus. The // fifo stores 16 bit values, 8/32 bit accesses must be adapted accordingly. #define PFIFO_SZ 4 -#define PFIFO_CNT 4 +#define PFIFO_CNT 8 struct sh2_poll_fifo { u32 cycles; u32 a; u16 d; - u16 cpu; + int cpu; } sh2_poll_fifo[PFIFO_CNT][PFIFO_SZ]; unsigned sh2_poll_rd[PFIFO_CNT], sh2_poll_wr[PFIFO_CNT]; // ringbuffer pointers @@ -191,6 +193,7 @@ static NOINLINE u32 sh2_poll_read(u32 a, u32 d, unsigned int cycles, SH2* sh2) int cpu = sh2 ? sh2->is_slave+1 : 0; unsigned idx; + a &= ~0x20000000; // ignore writethrough bit // fetch oldest write to address from fifo, but stop when reaching the present idx = sh2_poll_rd[hix]; while (idx != sh2_poll_wr[hix] && CYCLES_GE(cycles, fifo[idx].cycles)) { @@ -225,6 +228,7 @@ static NOINLINE void sh2_poll_write(u32 a, u32 d, unsigned int cycles, SH2 *sh2) struct sh2_poll_fifo *q = &fifo[(sh2_poll_wr[hix]-1) % PFIFO_SZ]; int cpu = sh2 ? sh2->is_slave+1 : 0; + a &= ~0x20000000; // ignore writethrough bit // fold 2 consecutive writes to the same address to avoid reading of // intermediate values that may cause synchronisation problems. // NB this can take an eternity on m68k: mov.b , needs @@ -279,8 +283,8 @@ u32 REGPARM(3) p32x_sh2_poll_memory32(unsigned int a, u32 d, SH2 *sh2) sh2s_sync_on_read(sh2); cycles = sh2_cycles_done_m68k(sh2); // check poll fifo and sign-extend the result correctly - d = sh2_poll_read(a, d, cycles, sh2) | - (sh2_poll_read(a+2, d >> 16, cycles, sh2) << 16); + d = (sh2_poll_read(a, d >> 16, cycles, sh2) << 16) | + ((u16)sh2_poll_read(a+2, d, cycles, sh2)); } sh2_poll_detect(a, sh2, SH2_STATE_RPOLL, 5); @@ -1503,7 +1507,7 @@ static u32 REGPARM(2) sh2_read32_rom(u32 a, SH2 *sh2) // writes #ifdef DRC_SH2 -static void NOINLINE sh2_sdram_poll(u32 a, u16 d, SH2 *sh2) +static void NOINLINE sh2_sdram_poll(u32 a, u32 d, SH2 *sh2) { unsigned cycles; @@ -1525,8 +1529,8 @@ void NOINLINE sh2_sdram_checks(u32 a, u32 d, SH2 *sh2, int t) void NOINLINE sh2_sdram_checks_l(u32 a, u32 d, SH2 *sh2, int t) { - sh2_sdram_checks(a, d, sh2, t); - sh2_sdram_checks(a+2, d>>16, sh2, t>>16); + sh2_sdram_checks(a, d>>16, sh2, t); + sh2_sdram_checks(a+2, d, sh2, t>>16); } #ifndef _ASM_32X_MEMORY_C @@ -1568,6 +1572,7 @@ static void REGPARM(3) sh2_write8_cs0(u32 a, u32 d, SH2 *sh2) } if ((a & 0x3fe00) == 0x4200) { + sh2->poll_cnt = 0; ((u8 *)Pico32xMem->pal)[(a & 0x1ff) ^ 1] = d; Pico32x.dirty_pal = 1; goto out; @@ -1641,6 +1646,7 @@ static void REGPARM(3) sh2_write16_cs0(u32 a, u32 d, SH2 *sh2) } if ((a & 0x3fe00) == 0x4200) { + sh2->poll_cnt = 0; Pico32xMem->pal[(a & 0x1ff) / 2] = d; Pico32x.dirty_pal = 1; goto out; @@ -2175,11 +2181,7 @@ void Pico32xSwapDRAM(int b) ssh2_read32_map[0x04/2].addr = ssh2_read32_map[0x24/2].addr = MAP_MEMORY(Pico32xMem->dram[b]); // convenience ptrs - msh2.p_sdram = ssh2.p_sdram = Pico32xMem->sdram; msh2.p_dram = ssh2.p_dram = Pico32xMem->dram[b]; - msh2.p_rom = ssh2.p_rom = Pico.rom; - msh2.p_bios = Pico32xMem->sh2_rom_m.w; msh2.p_da = msh2.data_array; - ssh2.p_bios = Pico32xMem->sh2_rom_s.w; ssh2.p_da = ssh2.data_array; } static void bank_switch_rom_sh2(void) @@ -2359,6 +2361,12 @@ void PicoMemSetup32x(void) ssh2.write16_tab = (const void **)(void *)ssh2_write16_map; ssh2.write32_tab = (const void **)(void *)ssh2_write32_map; + // convenience ptrs + msh2.p_sdram = ssh2.p_sdram = Pico32xMem->sdram; + msh2.p_rom = ssh2.p_rom = Pico.rom; + msh2.p_bios = Pico32xMem->sh2_rom_m.w; msh2.p_da = msh2.data_array; + ssh2.p_bios = Pico32xMem->sh2_rom_s.w; ssh2.p_da = ssh2.data_array; + sh2_drc_mem_setup(&msh2); sh2_drc_mem_setup(&ssh2); diff --git a/pico/32x/memory_arm.S b/pico/32x/memory_arm.S index 48143ba9..43a01958 100644 --- a/pico/32x/memory_arm.S +++ b/pico/32x/memory_arm.S @@ -18,7 +18,7 @@ .text -@ u32 a +@ u32 a, SH2 *sh2 .global sh2_read8_rom .global sh2_read8_sdram .global sh2_read8_da @@ -32,7 +32,7 @@ .global sh2_read32_da .global sh2_read32_dram -@ u32 a, u32 d +@ u32 a, u32 d, SH2 *sh2 .global sh2_write8_sdram .global sh2_write8_da .global sh2_write8_dram @@ -270,16 +270,14 @@ sh2_write32_dram: streq r1, [ip, r3, lsr #SH2_DRAM_SHIFT] bxeq lr ldr r0, [ip, r3, lsr #SH2_DRAM_SHIFT] - mov r2, #0 tst r1, #0x00ff0000 - orrne r2, r2, #0x00ff0000 + bicne r0, r0, #0x00ff0000 tst r1, #0xff000000 - orrne r2, r2, #0xff000000 + bicne r0, r0, #0xff000000 tst r1, #0x000000ff - orrne r2, r2, #0x000000ff + bicne r0, r0, #0x000000ff tst r1, #0x0000ff00 - orrne r2, r2, #0x0000ff00 - bic r0, r0, r2 + bicne r0, r0, #0x0000ff00 orr r0, r0, r1 str r0, [ip, r3, lsr #SH2_DRAM_SHIFT] bx lr diff --git a/pico/32x/sh2soc.c b/pico/32x/sh2soc.c index 66bdc478..1f19150e 100644 --- a/pico/32x/sh2soc.c +++ b/pico/32x/sh2soc.c @@ -137,6 +137,11 @@ static void dmac_memcpy(struct dma_chan *chan, SH2 *sh2) if (!up || chan->tcr < 4) return; + // XXX Mars Check Program fills a 64K buffer, then copies 32K longwords from + // DRAM to SDRAM in 4-longword mode, which is 128K. This overwrites a comm + // area in SDRAM, which is why the check fails. + // Is this a buswidth mismatch problem? As a kludge, usw 16-bit width xfers + if (size == 3 && (chan->sar & 0xdf000000) == 0x04000000) size = 1; if (size == 3) size = 2; // 4-word xfer mode still counts in words // XXX check TCR being a multiple of 4 in 4-word xfer mode? // XXX check alignment of sar/dar, generating a bus error if unaligned? @@ -500,7 +505,6 @@ static void dreq1_do(SH2 *sh2, struct dma_chan *chan) if ((chan->dar & ~0xf) != 0x20004030) elprintf(EL_32XP|EL_ANOMALY, "dreq1: bad dar?: %08x\n", chan->dar); - sh2->state |= SH2_STATE_SLEEP; dmac_transfer_one(sh2, chan); if (chan->tcr == 0) dmac_transfer_complete(sh2, chan); From 748b8187db89c5205db452bf80f4bdd3420441e9 Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 25 Jun 2019 20:23:45 +0200 Subject: [PATCH 0204/1110] SH2 drc: bug fixing and small speed improvements --- config.gp2x47 | 5 +- cpu/drc/cmn.h | 36 +++ cpu/drc/emit_arm.c | 38 ++- cpu/drc/emit_x86.c | 62 ++-- cpu/sh2/compiler.c | 299 +++++++++--------- cpu/sh2/compiler.h | 2 +- platform/common/common.mak | 2 +- .../common/{host_dasm_arm.c => host_dasm.c} | 13 +- 8 files changed, 254 insertions(+), 203 deletions(-) rename platform/common/{host_dasm_arm.c => host_dasm.c} (88%) diff --git a/config.gp2x47 b/config.gp2x47 index 21769ada..632515ee 100644 --- a/config.gp2x47 +++ b/config.gp2x47 @@ -4,9 +4,10 @@ CC = arm-linux-gnueabi-gcc CXX = arm-linux-gnueabi-g++ AS = arm-linux-gnueabi-as STRIP = arm-linux-gnueabi-strip -CFLAGS += -mabi=apcs-gnu -mno-thumb-interwork -mfloat-abi=soft -mfpu=fpa -mcpu=arm920t -mtune=arm920t -Wno-unused-result -fno-stack-protector -D__GP2X__ +CFLAGS += -mabi=apcs-gnu -mfloat-abi=soft -mfpu=fpa -mcpu=arm920t -mtune=arm920t +CFLAGS += -Wno-unused-result -D__GP2X__ -mno-thumb-interwork -fno-stack-protector -fno-common CFLAGS += -I${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -I${HOME}/src/gp2x/armroot/include -CFLAGS += -fno-gcse -funswitch-loops -fweb -ftree-loop-im #-fpredictive-commoning -ftree-loop-distribution -frename-registers +CFLAGS += -finline-limit=42 -fipa-pta -fno-ipa-sra -fno-ipa-pure-const ASFLAGS += -mabi=apcs-gnu -mfloat-abi=soft -mfpu=fpa -mcpu=arm920t LDFLAGS += -mabi=apcs-gnu -mfpu=fpa -B${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1 -B${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L${HOME}/src/gp2x/armroot/lib -static LDLIBS += -lpng -lm -ldl diff --git a/cpu/drc/cmn.h b/cpu/drc/cmn.h index bad02a1b..2eb52aad 100644 --- a/cpu/drc/cmn.h +++ b/cpu/drc/cmn.h @@ -6,3 +6,39 @@ extern u8 *tcache; void drc_cmn_init(void); void drc_cmn_cleanup(void); +#define BITMASK1(v0) (1 << (v0)) +#define BITMASK2(v0,v1) ((1 << (v0)) | (1 << (v1))) +#define BITMASK3(v0,v1,v2) (BITMASK2(v0,v1) | (1 << (v2))) +#define BITMASK4(v0,v1,v2,v3) (BITMASK3(v0,v1,v2) | (1 << (v3))) +#define BITMASK5(v0,v1,v2,v3,v4) (BITMASK4(v0,v1,v2,v3) | (1 << (v4))) +#define BITMASK6(v0,v1,v2,v3,v4,v5) (BITMASK5(v0,v1,v2,v3,v4) | (1 << (v5))) +#define BITRANGE(v0,v1) (BITMASK1(v1+1)-BITMASK1(v0)) // set with v0..v1 + +// binary search approach, since we don't have CLZ on ARM920T +#define FOR_ALL_BITS_SET_DO(mask, bit, code) { \ + u32 __mask = mask; \ + for (bit = 31; bit >= 0 && mask; bit--, __mask <<= 1) { \ + if (!(__mask & (0xffff << 16))) \ + bit -= 16, __mask <<= 16; \ + if (!(__mask & (0xff << 24))) \ + bit -= 8, __mask <<= 8; \ + if (!(__mask & (0xf << 28))) \ + bit -= 4, __mask <<= 4; \ + if (!(__mask & (0x3 << 30))) \ + bit -= 2, __mask <<= 2; \ + if (!(__mask & (0x1 << 31))) \ + bit -= 1, __mask <<= 1; \ + if (__mask & (0x1 << 31)) { \ + code; \ + } \ + } \ +} + +// inspired by https://graphics.stanford.edu/~seander/bithacks.html +static inline int count_bits(unsigned val) +{ + val = val - ((val >> 1) & 0x55555555); + val = (val & 0x33333333) + ((val >> 2) & 0x33333333); + return (((val + (val >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24; +} + diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index c85a3d71..0eb2d972 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -382,13 +382,6 @@ static void emith_flush(void) #define EOP_MOVT(rd,imm) \ EMIT(0xe3400000 | ((rd)<<12) | (((imm)>>16)&0xfff) | (((imm)>>12)&0xf0000), M1(rd), NO) -static inline int count_bits(unsigned val) -{ - val = val - ((val >> 1) & 0x55555555); - val = (val & 0x33333333) + ((val >> 2) & 0x33333333); - return (((val + (val >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24; -} - // host literal pool; must be significantly smaller than 1024 (max LDR offset = 4096) #define MAX_HOST_LITERALS 128 static u32 literal_pool[MAX_HOST_LITERALS]; @@ -429,18 +422,26 @@ static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int // count insns needed for mov/orr #imm for (v = imm, ror2 = 0; (v >> 24) && ror2 < 32/2; ror2++) v = (v << 2) | (v >> 30); +#ifdef HAVE_ARMV7 for (i = 2; i > 0; i--, v >>= 8) while (v > 0xff && !(v & 3)) v >>= 2; if (v) { // 3+ insns needed... if (op == A_OP_MVN) imm = ~imm; -#ifdef HAVE_ARMV7 // ...prefer movw/movt EOP_MOVW(rd, imm); if (imm & 0xffff0000) EOP_MOVT(rd, imm); + return; + } #else + for (i = 3; i > 0; i--, v >>= 8) + while (v > 0xff && !(v & 3)) + v >>= 2; + if (v) { // 4 insns needed... + if (op == A_OP_MVN) + imm = ~imm; // ...emit literal load int idx, o; if (literal_iindex >= MAX_HOST_LITERALS) { @@ -455,9 +456,9 @@ static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int EOP_C_DOP_IMM(cond, A_OP_ADD, 0, rd, rd, 0, o); else if (o < 0) EOP_C_DOP_IMM(cond, A_OP_SUB, 0, rd, rd, 0, -o); -#endif return; } +#endif break; case A_OP_AND: @@ -544,7 +545,7 @@ static int emith_xbranch(int cond, void *target, int is_call) EMIT((u32)target,M1(PC),0); #else // should never happen - elprintf(EL_STATUS|EL_SVP|EL_ANOMALY, "indirect jmp %08x->%08x", target, tcache_ptr); + elprintf(EL_STATUS|EL_SVP|EL_ANOMALY, "indirect jmp %8p->%8p", target, tcache_ptr); exit(1); #endif } @@ -633,9 +634,6 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) #define EMITH_NOTHING1(cond) \ (void)(cond) -#define EMITH_SJMP_DECL_() -#define EMITH_SJMP_START_(cond) EMITH_NOTHING1(cond) -#define EMITH_SJMP_END_(cond) EMITH_NOTHING1(cond) #define EMITH_SJMP_START(cond) EMITH_NOTHING1(cond) #define EMITH_SJMP_END(cond) EMITH_NOTHING1(cond) #define EMITH_SJMP2_START(cond) EMITH_NOTHING1(cond) @@ -806,6 +804,9 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) #define emith_eor_r_imm(r, imm) \ emith_op_imm(A_COND_AL, 0, A_OP_EOR, r, imm) +#define emith_eor_r_imm_ptr(r, imm) \ + emith_eor_r_imm(r, imm) + // note: only use 8bit imm for these #define emith_tst_r_imm(r, imm) \ emith_top_imm(A_COND_AL, A_OP_TST, r, imm) @@ -837,6 +838,9 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) #define emith_eor_r_imm_c(cond, r, imm) \ emith_op_imm(cond, 0, A_OP_EOR, r, imm) +#define emith_eor_r_imm_ptr_c(cond, r, imm) \ + emith_eor_r_imm_c(cond, r, imm) + #define emith_bic_r_imm_c(cond, r, imm) \ emith_op_imm(cond, 0, A_OP_BIC, r, imm) @@ -1139,6 +1143,8 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) emith_jump(target); \ } while (0) +#define emith_call_cleanup() /**/ + #define emith_ret_c(cond) \ emith_jump_reg_c(cond, LR) @@ -1228,10 +1234,10 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) /* if (reg <= turns) turns = reg-1 */ \ t3 = rcache_get_reg(reg, RC_GR_RMW, NULL); \ emith_cmp_r_r(t3, t2); \ - emith_sub_r_r_imm_c(DCOND_LE, t2, t3, 1); \ + emith_sub_r_r_imm_c(DCOND_LS, t2, t3, 1); \ /* if (reg <= 1) turns = 0 */ \ emith_cmp_r_imm(t3, 1); \ - emith_move_r_imm_c(DCOND_LE, t2, 0); \ + emith_move_r_imm_c(DCOND_LS, t2, 0); \ /* reg -= turns */ \ emith_sub_r_r(t3, t2); \ } \ @@ -1361,7 +1367,7 @@ static int tcond = -1; #define emith_set_t(sr, val) \ tcond = ((val) ? A_COND_AL: A_COND_NV) -static void emith_sync_t(sr) +static void emith_sync_t(int sr) { if (tcond == A_COND_AL) emith_or_r_imm(sr, T); diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index 652b4989..0a31d894 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -396,6 +396,12 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common #define emith_cmp_r_imm(r, imm) \ emith_arith_r_imm(7, r, imm) +#define emith_eor_r_imm_ptr(r, imm) do { \ + EMIT_REX_IF(1, 0, r); \ + EMIT_OP_MODRM64(0x81, 3, 6, r); \ + EMIT(imm, u32); \ +} while (0) + #define emith_tst_r_imm(r, imm) do { \ EMIT_REX_IF(0, 0, r); \ EMIT_OP_MODRM64(0xf7, 3, 0, r); \ @@ -417,6 +423,8 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common emith_or_r_imm(r, imm) #define emith_eor_r_imm_c(cond, r, imm) \ emith_eor_r_imm(r, imm) +#define emith_eor_r_imm_ptr_c(cond, r, imm) \ + emith_eor_r_imm_ptr(r, imm) #define emith_bic_r_imm_c(cond, r, imm) \ emith_bic_r_imm(r, imm) #define emith_tst_r_imm_c(cond, r, imm) \ @@ -589,9 +597,9 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common // XXX: stupid mess #define emith_mul_(op, dlo, dhi, s1, s2) do { \ int rmr; \ - if (dlo != xAX && dhi != xAX) \ + if (dlo != xAX && dhi != xAX && rcache_is_hreg_used(xAX)) \ emith_push(xAX); \ - if (dlo != xDX && dhi != xDX) \ + if (dlo != xDX && dhi != xDX && rcache_is_hreg_used(xDX)) \ emith_push(xDX); \ if ((s1) == xAX) \ rmr = s2; \ @@ -609,9 +617,9 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common } \ if (dhi != xDX && dhi != -1 && !(dhi == xAX && dlo == xDX)) \ emith_move_r_r(dhi, (dlo == xDX ? xAX : xDX)); \ - if (dlo != xDX && dhi != xDX) \ + if (dlo != xDX && dhi != xDX && rcache_is_hreg_used(xDX)) \ emith_pop(xDX); \ - if (dlo != xAX && dhi != xAX) \ + if (dlo != xAX && dhi != xAX && rcache_is_hreg_used(xAX)) \ emith_pop(xAX); \ } while (0) @@ -898,6 +906,9 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common emith_jump(target); \ } while (0) +#define emith_call_cleanup() \ + emith_add_r_r_ptr_imm(xSP, xSP, sizeof(void *)); // remove return addr + #define emith_ret() \ EMIT_OP(0xc3) @@ -912,10 +923,12 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common #define emith_push_ret(r) do { \ int r_ = (r >= 0 ? r : xSI); \ emith_push(r_); /* always push to align */ \ + emith_add_r_r_ptr_imm(xSP, xSP, -8*4); /* args shadow space */ \ } while (0) #define emith_pop_and_ret(r) do { \ int r_ = (r >= 0 ? r : xSI); \ + emith_add_r_r_ptr_imm(xSP, xSP, 8*4); /* args shadow space */ \ emith_pop(r_); \ emith_ret(); \ } while (0) @@ -942,15 +955,6 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common // "simple" jump (no more then a few insns) // ARM will use conditional instructions here -#define EMITH_SJMP_DECL_() \ - u8 *cond_ptr - -#define EMITH_SJMP_START_(cond) \ - JMP8_POS(cond_ptr) - -#define EMITH_SJMP_END_(cond) \ - JMP8_EMIT(cond, cond_ptr) - #define EMITH_SJMP_START EMITH_JMP_START #define EMITH_SJMP_END EMITH_JMP_END @@ -1046,7 +1050,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common emith_push(xR15); \ emith_push(xSI); \ emith_push(xDI); \ - emith_add_r_r_ptr_imm(xSP, xSP, -8*5); /* align + ABI param area */ \ + emith_add_r_r_ptr_imm(xSP, xSP, -8*5); /* align + args shadow space */ \ } while (0) #define emith_sh2_drc_exit() do { \ @@ -1106,19 +1110,17 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common #endif #define emith_save_caller_regs(mask) do { \ - if ((mask) & (1 << xAX)) emith_push(xAX); \ - if ((mask) & (1 << xCX)) emith_push(xCX); \ - if ((mask) & (1 << xDX)) emith_push(xDX); \ - if ((mask) & (1 << xSI)) emith_push(xSI); \ - if ((mask) & (1 << xDI)) emith_push(xDI); \ + int _c; u32 _m = mask & 0xfc7; /* AX, CX, DX, SI, DI, 8, 9, 10, 11 */ \ + if (__builtin_parity(_m) == 1) _m |= 0x8; /* BX for ABI align */ \ + for (_c = HOST_REGS; _m && _c >= 0; _m &= ~(1 << _c), _c--) \ + if (_m & (1 << _c)) emith_push(_c); \ } while (0) #define emith_restore_caller_regs(mask) do { \ - if ((mask) & (1 << xDI)) emith_pop(xDI); \ - if ((mask) & (1 << xSI)) emith_pop(xSI); \ - if ((mask) & (1 << xDX)) emith_pop(xDX); \ - if ((mask) & (1 << xCX)) emith_pop(xCX); \ - if ((mask) & (1 << xAX)) emith_pop(xAX); \ + int _c; u32 _m = mask & 0xfc7; \ + if (__builtin_parity(_m) == 1) _m |= 0x8; /* BX for ABI align */ \ + for (_c = 0; _m && _c < HOST_REGS; _m &= ~(1 << _c), _c++) \ + if (_m & (1 << _c)) emith_pop(_c); \ } while (0) #define emith_sh2_rcall(a, tab, func, mask) do { \ @@ -1192,14 +1194,14 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common /* if (reg <= turns) turns = reg-1 */ \ t3 = rcache_get_reg(reg, RC_GR_RMW, NULL); \ emith_cmp_r_r(t3, t2); \ - EMITH_SJMP_START(DCOND_GT); \ - emith_sub_r_r_imm_c(DCOND_LE, t2, t3, 1); \ - EMITH_SJMP_END(DCOND_GT); \ + EMITH_SJMP_START(DCOND_HI); \ + emith_sub_r_r_imm_c(DCOND_LS, t2, t3, 1); \ + EMITH_SJMP_END(DCOND_HI); \ /* if (reg <= 1) turns = 0 */ \ emith_cmp_r_imm(t3, 1); \ - EMITH_SJMP_START(DCOND_GT); \ - emith_move_r_imm_c(DCOND_LE, t2, 0); \ - EMITH_SJMP_END(DCOND_GT); \ + EMITH_SJMP_START(DCOND_HI); \ + emith_move_r_imm_c(DCOND_LS, t2, 0); \ + EMITH_SJMP_END(DCOND_HI); \ /* reg -= turns */ \ emith_sub_r_r(t3, t2); \ } \ diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 9932ce6f..c1ba3f32 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -106,14 +106,6 @@ static int insns_compiled, hash_collisions, host_insn_count; #define GET_Rn() \ ((op >> 8) & 0x0f) -#define BITMASK1(v0) (1 << (v0)) -#define BITMASK2(v0,v1) ((1 << (v0)) | (1 << (v1))) -#define BITMASK3(v0,v1,v2) (BITMASK2(v0,v1) | (1 << (v2))) -#define BITMASK4(v0,v1,v2,v3) (BITMASK3(v0,v1,v2) | (1 << (v3))) -#define BITMASK5(v0,v1,v2,v3,v4) (BITMASK4(v0,v1,v2,v3) | (1 << (v4))) -#define BITMASK6(v0,v1,v2,v3,v4,v5) (BITMASK5(v0,v1,v2,v3,v4) | (1 << (v5))) -#define BITRANGE(v0,v1) (BITMASK1(v1+1)-BITMASK1(v0)) // set with v0..v1 - #define SHR_T SHR_SR // might make them separate someday #define SHR_MEM 31 #define SHR_TMP -1 @@ -174,6 +166,7 @@ enum op_types { static u8 *tcache_dsm_ptrs[3]; static char sh2dasm_buff[64]; #define do_host_disasm(tcid) \ + emith_flush(); \ host_dasm(tcache_dsm_ptrs[tcid], emith_insn_ptr() - tcache_dsm_ptrs[tcid]); \ tcache_dsm_ptrs[tcid] = emith_insn_ptr() #else @@ -212,7 +205,6 @@ static void REGPARM(3) *sh2_drc_log_entry(void *block, SH2 *sh2, u32 sr) static FILE *trace[2]; int idx = sh2->is_slave; if (!trace[0]) { - truncate("pico.trace", 0); trace[0] = fopen("pico.trace0", "wb"); trace[1] = fopen("pico.trace1", "wb"); } @@ -286,7 +278,7 @@ static u8 *tcache_limit[TCACHE_BUFFERS]; // ptr for code emiters static u8 *tcache_ptr; -#define MAX_BLOCK_ENTRIES (BLOCK_INSN_LIMIT / 8) +#define MAX_BLOCK_ENTRIES (BLOCK_INSN_LIMIT / 6) struct block_link { u32 target_pc; @@ -330,32 +322,20 @@ struct block_desc { struct block_entry entryp[MAX_BLOCK_ENTRIES]; }; -static const int block_max_counts[TCACHE_BUFFERS] = { - 4*1024, - 256, - 256, -}; +#define BLOCK_MAX_COUNT(tcid) ((tcid) ? 256 : 16*256) static struct block_desc *block_tables[TCACHE_BUFFERS]; static int block_counts[TCACHE_BUFFERS]; static int block_limit[TCACHE_BUFFERS]; // we have block_link_pool to avoid using mallocs -static const int block_link_pool_max_counts[TCACHE_BUFFERS] = { - 16*1024, - 4*256, - 4*256, -}; +#define BLOCK_LINK_MAX_COUNT(tcid) ((tcid) ? 1024 : 16*1024) static struct block_link *block_link_pool[TCACHE_BUFFERS]; static int block_link_pool_counts[TCACHE_BUFFERS]; static struct block_link **unresolved_links[TCACHE_BUFFERS]; static struct block_link *blink_free[TCACHE_BUFFERS]; // used for invalidation -static const int ram_sizes[TCACHE_BUFFERS] = { - 0x40000, - 0x1000, - 0x1000, -}; +#define RAM_SIZE(tcid) ((tcid) ? 0x1000 : 0x40000) #define INVAL_PAGE_SIZE 0x100 struct block_list { @@ -373,15 +353,11 @@ static struct block_list *inactive_blocks[TCACHE_BUFFERS]; // each array has len: sizeof(mem) / INVAL_PAGE_SIZE static struct block_list **inval_lookup[TCACHE_BUFFERS]; -static const int hash_table_sizes[TCACHE_BUFFERS] = { - 0x4000, - 0x100, - 0x100, -}; +#define HASH_TABLE_SIZE(tcid) ((tcid) ? 256 : 64*256) static struct block_entry **hash_tables[TCACHE_BUFFERS]; #define HASH_FUNC(hash_tab, addr, mask) \ - (hash_tab)[(((addr) >> 20) ^ ((addr) >> 2)) & (mask)] + (hash_tab)[((addr) >> 1) & (mask)] #if (DRC_DEBUG & 128) #if BRANCH_CACHE @@ -431,6 +407,10 @@ typedef struct { } guest_reg_t; +// possibly needed in code emitter +static int rcache_get_tmp(void); +static void rcache_free_tmp(int hr); + // Note: cache_regs[] must have at least the amount of REG and TEMP registers // used by handlers in worst case (currently 4). // Register assignment goes by ABI convention. Caller save registers are TEMP, @@ -583,13 +563,12 @@ static int dr_ctx_get_mem_ptr(SH2 *sh2, u32 a, u32 *mask) // check if region is mapped memory memptr = p32x_sh2_get_mem_ptr(a, mask, sh2); - if (memptr == NULL /*|| (a & ((1 << SH2_READ_SHIFT)-1) & ~*mask) != 0*/) + if (memptr == NULL) return poffs; if (memptr == sh2->p_bios) // BIOS poffs = offsetof(SH2, p_bios); else if (memptr == sh2->p_da) // data array - // FIXME: access sh2->data_array instead poffs = offsetof(SH2, p_da); else if (memptr == sh2->p_sdram) // SDRAM poffs = offsetof(SH2, p_sdram); @@ -602,16 +581,16 @@ static int dr_ctx_get_mem_ptr(SH2 *sh2, u32 a, u32 *mask) static struct block_entry *dr_get_entry(u32 pc, int is_slave, int *tcache_id) { struct block_entry *be; - u32 tcid = 0, mask; - - // data arrays have their own caches - if ((pc & 0xe0000000) == 0xc0000000 || (pc & ~0xfff) == 0) - tcid = 1 + is_slave; - + u32 tcid = 0; + + if ((pc & 0xe0000000) == 0xc0000000) + tcid = 1 + is_slave; // data array + if ((pc & ~0xfff) == 0) + tcid = 1 + is_slave; // BIOS *tcache_id = tcid; - mask = hash_table_sizes[tcid] - 1; - be = HASH_FUNC(hash_tables[tcid], pc, mask); + be = HASH_FUNC(hash_tables[tcid], pc, HASH_TABLE_SIZE(tcid) - 1); + if (be != NULL) // don't ask... gcc code generation hint for (; be != NULL; be = be->next) if (be->pc == pc) return be; @@ -688,17 +667,17 @@ static void REGPARM(1) flush_tcache(int tcid) int tc_used, bl_used; tc_used = tcache_sizes[tcid] - (tcache_limit[tcid] - tcache_ptrs[tcid]); - bl_used = block_max_counts[tcid] - (block_limit[tcid] - block_counts[tcid]); + bl_used = BLOCK_MAX_COUNT(tcid) - (block_limit[tcid] - block_counts[tcid]); elprintf(EL_STATUS, "tcache #%d flush! (%d/%d, bds %d/%d)", tcid, tc_used, - tcache_sizes[tcid], bl_used, block_max_counts[tcid]); + tcache_sizes[tcid], bl_used, BLOCK_MAX_COUNT(tcid)); #endif block_counts[tcid] = 0; - block_limit[tcid] = block_max_counts[tcid] - 1; + block_limit[tcid] = BLOCK_MAX_COUNT(tcid) - 1; block_link_pool_counts[tcid] = 0; blink_free[tcid] = NULL; - memset(unresolved_links[tcid], 0, sizeof(*unresolved_links[0]) * hash_table_sizes[tcid]); - memset(hash_tables[tcid], 0, sizeof(*hash_tables[0]) * hash_table_sizes[tcid]); + memset(unresolved_links[tcid], 0, sizeof(*unresolved_links[0]) * HASH_TABLE_SIZE(tcid)); + memset(hash_tables[tcid], 0, sizeof(*hash_tables[0]) * HASH_TABLE_SIZE(tcid)); tcache_ptrs[tcid] = tcache_bases[tcid]; tcache_limit[tcid] = tcache_bases[tcid] + tcache_sizes[tcid]; if (Pico32xMem->sdram != NULL) { @@ -724,14 +703,14 @@ static void REGPARM(1) flush_tcache(int tcid) tcache_dsm_ptrs[tcid] = tcache_bases[tcid]; #endif - for (i = 0; i < ram_sizes[tcid] / INVAL_PAGE_SIZE; i++) + for (i = 0; i < RAM_SIZE(tcid) / INVAL_PAGE_SIZE; i++) rm_block_list(&inval_lookup[tcid][i]); rm_block_list(&inactive_blocks[tcid]); } static void add_to_hashlist(struct block_entry *be, int tcache_id) { - u32 tcmask = hash_table_sizes[tcache_id] - 1; + u32 tcmask = HASH_TABLE_SIZE(tcache_id) - 1; struct block_entry **head = &HASH_FUNC(hash_tables[tcache_id], be->pc, tcmask); be->prev = NULL; @@ -751,7 +730,7 @@ static void add_to_hashlist(struct block_entry *be, int tcache_id) static void rm_from_hashlist(struct block_entry *be, int tcache_id) { - u32 tcmask = hash_table_sizes[tcache_id] - 1; + u32 tcmask = HASH_TABLE_SIZE(tcache_id) - 1; struct block_entry **head = &HASH_FUNC(hash_tables[tcache_id], be->pc, tcmask); #if DRC_DEBUG & 1 @@ -773,7 +752,7 @@ static void rm_from_hashlist(struct block_entry *be, int tcache_id) static void add_to_hashlist_unresolved(struct block_link *bl, int tcache_id) { - u32 tcmask = hash_table_sizes[tcache_id] - 1; + u32 tcmask = HASH_TABLE_SIZE(tcache_id) - 1; struct block_link **head = &HASH_FUNC(unresolved_links[tcache_id], bl->target_pc, tcmask); #if DRC_DEBUG & 1 @@ -794,7 +773,7 @@ static void add_to_hashlist_unresolved(struct block_link *bl, int tcache_id) static void rm_from_hashlist_unresolved(struct block_link *bl, int tcache_id) { - u32 tcmask = hash_table_sizes[tcache_id] - 1; + u32 tcmask = HASH_TABLE_SIZE(tcache_id) - 1; struct block_link **head = &HASH_FUNC(unresolved_links[tcache_id], bl->target_pc, tcmask); #if DRC_DEBUG & 1 @@ -818,7 +797,7 @@ static void dr_free_oldest_block(int tcache_id) { struct block_desc *bd; - if (block_limit[tcache_id] >= block_max_counts[tcache_id]) { + if (block_limit[tcache_id] >= BLOCK_MAX_COUNT(tcache_id)) { // block desc wrap around block_limit[tcache_id] = 0; } @@ -833,7 +812,7 @@ static void dr_free_oldest_block(int tcache_id) sh2_smc_rm_block_entry(bd, tcache_id, 0, 1); block_limit[tcache_id]++; - if (block_limit[tcache_id] >= block_max_counts[tcache_id]) + if (block_limit[tcache_id] >= BLOCK_MAX_COUNT(tcache_id)) block_limit[tcache_id] = 0; bd = &block_tables[tcache_id][block_limit[tcache_id]]; if (bd->tcache_ptr >= tcache_ptrs[tcache_id]) @@ -898,7 +877,7 @@ static void dr_mark_memory(int mark, struct block_desc *block, int tcache_id, u3 lit_ram_blk = Pico32xMem->drclit_ram; shift = SH2_DRCBLK_RAM_SHIFT; } - mask = ram_sizes[tcache_id] - 1; + mask = RAM_SIZE(tcache_id) - 1; // mark recompiled insns addr = block->addr & ~((1 << shift) - 1); @@ -957,7 +936,7 @@ static u32 dr_check_nolit(u32 start, u32 end, int tcache_id) lit_ram_blk = Pico32xMem->drclit_ram; shift = SH2_DRCBLK_RAM_SHIFT; } - mask = ram_sizes[tcache_id] - 1; + mask = RAM_SIZE(tcache_id) - 1; addr = start & ~((1 << shift) - 1); for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift)) @@ -1028,18 +1007,18 @@ static struct block_desc *dr_add_block(u32 addr, int size, *blk_id = *bcount; (*bcount)++; - if (*bcount >= block_max_counts[tcache_id]) + if (*bcount >= BLOCK_MAX_COUNT(tcache_id)) *bcount = 0; return bd; } -static void REGPARM(3) *dr_lookup_block(u32 pc, int is_slave, int *tcache_id) +static void REGPARM(3) *dr_lookup_block(u32 pc, SH2 *sh2, int *tcache_id) { struct block_entry *be = NULL; void *block = NULL; - be = dr_get_entry(pc, is_slave, tcache_id); + be = dr_get_entry(pc, sh2->is_slave, tcache_id); if (be != NULL) block = be->tcache_ptr; @@ -1114,7 +1093,7 @@ static void *dr_prepare_ext_branch(struct block_entry *owner, u32 pc, int is_sla if (blink_free[tcache_id] != NULL) { bl = blink_free[tcache_id]; blink_free[tcache_id] = bl->next; - } else if (cnt >= block_link_pool_max_counts[tcache_id]) { + } else if (cnt >= BLOCK_LINK_MAX_COUNT(tcache_id)) { dbg(1, "bl overflow for tcache %d", tcache_id); return sh2_drc_dispatcher; } else { @@ -1145,7 +1124,7 @@ static void *dr_prepare_ext_branch(struct block_entry *owner, u32 pc, int is_sla static void dr_link_blocks(struct block_entry *be, int tcache_id) { #if LINK_BRANCHES - u32 tcmask = hash_table_sizes[tcache_id] - 1; + u32 tcmask = HASH_TABLE_SIZE(tcache_id) - 1; u32 pc = be->pc; struct block_link **head = &HASH_FUNC(unresolved_links[tcache_id], pc, tcmask); struct block_link *bl = *head, *next; @@ -1188,7 +1167,7 @@ static void dr_link_outgoing(struct block_entry *be, int tcache_id, int is_slave array[count++] = item; \ } -static int find_in_array(u32 *array, size_t size, u32 what) +static inline int find_in_array(u32 *array, size_t size, u32 what) { size_t i; for (i = 0; i < size; i++) @@ -1198,6 +1177,23 @@ static int find_in_array(u32 *array, size_t size, u32 what) return -1; } +static int find_in_sorted_array(u32 *array, size_t size, u32 what) +{ + // binary search in sorted array + int left = 0, right = size-1; + while (left <= right) + { + int middle = (left + right) / 2; + if (array[middle] == what) + return middle; + else if (array[middle] < what) + left = middle + 1; + else + right = middle - 1; + } + return -1; +} + // --------------------------------------------------------------- // NB rcache allocation dependencies: @@ -1242,26 +1238,6 @@ static void rcache_remove_vreg_alias(int x, sh2_reg_e r); } \ } -// binary search approach, since we don't have CLZ on ARM920T -#define FOR_ALL_BITS_SET_DO(mask, bit, code) { \ - u32 __mask = mask; \ - for (bit = 31; bit >= 0 && mask; bit--, __mask <<= 1) { \ - if (!(__mask & (0xffff << 16))) \ - bit -= 16, __mask <<= 16; \ - if (!(__mask & (0xff << 24))) \ - bit -= 8, __mask <<= 8; \ - if (!(__mask & (0xf << 28))) \ - bit -= 4, __mask <<= 4; \ - if (!(__mask & (0x3 << 30))) \ - bit -= 2, __mask <<= 2; \ - if (!(__mask & (0x1 << 31))) \ - bit -= 1, __mask <<= 1; \ - if (__mask & (0x1 << 31)) { \ - code; \ - } \ - } \ -} - #if PROPAGATE_CONSTANTS static inline int gconst_alloc(sh2_reg_e r) { @@ -1319,6 +1295,7 @@ static int gconst_get(sh2_reg_e r, u32 *val) *val = gconsts[guest_regs[r].cnst].val; return 1; } + *val = 0; return 0; } @@ -2043,13 +2020,22 @@ static inline int rcache_is_cached(sh2_reg_e r) return (guest_regs[r].vreg >= 0); } +static inline int rcache_is_hreg_used(int hr) +{ + int x = reg_map_host[hr]; + // is hr in use? + return cache_regs[x].type != HR_FREE && + (cache_regs[x].type != HR_TEMP || (cache_regs[x].flags & HRF_LOCKED)); +} + static inline u32 rcache_used_hreg_mask(void) { u32 mask = 0; int i; for (i = 0; i < ARRAY_SIZE(cache_regs); i++) - if (cache_regs[i].type != HR_FREE) + if ((cache_regs[i].flags & HRF_TEMP) && cache_regs[i].type != HR_FREE && + (cache_regs[i].type != HR_TEMP || (cache_regs[i].flags & HRF_LOCKED))) mask |= 1 << cache_regs[i].hreg; return mask & ~rcache_static; @@ -2137,6 +2123,8 @@ static void rcache_invalidate(void) { int i; + gconst_invalidate(); + for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { cache_regs[i].flags &= (HRF_TEMP|HRF_REG); if (cache_regs[i].type != HR_STATIC) @@ -2161,8 +2149,6 @@ static void rcache_invalidate(void) rcache_counter = 0; rcache_hint_soon = rcache_hint_late = rcache_hint_write = rcache_hint_clean = 0; - - gconst_invalidate(); } static void rcache_flush(void) @@ -2221,14 +2207,20 @@ static int emit_get_rbase_and_offs(SH2 *sh2, sh2_reg_e r, int rmod, u32 *offs) if (poffs == -1) return -1; - if (mask < 0x1000) { - // data array or BIOS, can't safely access directly since translated code - // may run on both SH2s + if (mask < 0x20000) { + // data array, BIOS, DRAM, can't safely access directly since host addr may + // change (BIOS,da code may run on either core, DRAM may be switched) hr = rcache_get_tmp(); - emith_ctx_read_ptr(hr, poffs); - a += *offs; - if (a & mask & ~omask) - emith_add_r_r_ptr_imm(hr, hr, a & mask & ~omask); + a = (a + *offs) & mask; + if (poffs == offsetof(SH2, p_da)) { + // access sh2->data_array directly + a += offsetof(SH2, data_array); + emith_add_r_r_ptr_imm(hr, CONTEXT_REG, a & ~omask); + } else { + emith_ctx_read_ptr(hr, poffs); + if (a & ~omask) + emith_add_r_r_ptr_imm(hr, hr, a & ~omask); + } *offs = a & omask; return hr; } @@ -2269,7 +2261,7 @@ static int emit_get_rom_data(SH2 *sh2, sh2_reg_e r, u32 offs, int size, u32 *val if (gconst_get(r, &a)) { a += offs; // check if rom is memory mapped (not bank switched), and address is in rom - if (dr_is_rom(a) && p32x_sh2_get_mem_ptr(a, &mask, sh2) != (void *)-1) { + if (dr_is_rom(a) && p32x_sh2_get_mem_ptr(a, &mask, sh2) == sh2->p_rom) { switch (size & MF_SIZEMASK) { case 0: *val = (s8)p32x_sh2_read8(a, sh2s); break; // 8 case 1: *val = (s16)p32x_sh2_read16(a, sh2s); break; // 16 @@ -2507,9 +2499,10 @@ static int emit_indirect_indexed_read(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rx, sh2_ #if PROPAGATE_CONSTANTS u32 offs; - if (gconst_get(ry, &offs)) + // if offs is larger than 0x01000000, it's most probably the base address part + if (gconst_get(ry, &offs) && offs < 0x01000000) return emit_memhandler_read_rr(sh2, rd, rx, offs, size); - if (gconst_get(rx, &offs)) + if (gconst_get(rx, &offs) && offs < 0x01000000) return emit_memhandler_read_rr(sh2, rd, ry, offs, size); #endif hr = rcache_get_reg_arg(0, rx, &tx); @@ -2541,9 +2534,10 @@ static void emit_indirect_indexed_write(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rx, sh #if PROPAGATE_CONSTANTS u32 offs; - if (gconst_get(ry, &offs)) + // if offs is larger than 0x01000000, it's most probably the base address part + if (gconst_get(ry, &offs) && offs < 0x01000000) return emit_memhandler_write_rr(sh2, rd, rx, offs, size); - if (gconst_get(rx, &offs)) + if (gconst_get(rx, &offs) && offs < 0x01000000) return emit_memhandler_write_rr(sh2, rd, ry, offs, size); #endif if (rd != SHR_TMP) @@ -2601,15 +2595,6 @@ static void emit_do_static_regs(int is_write, int tmpr) } } -/* just after lookup function, jump to address returned */ -static void emit_block_entry(void) -{ - emith_tst_r_r_ptr(RET_REG, RET_REG); - EMITH_SJMP_START(DCOND_EQ); - emith_jump_reg_c(DCOND_NE, RET_REG); - EMITH_SJMP_END(DCOND_EQ); -} - #define DELAY_SAVE_T(sr) { \ emith_bic_r_imm(sr, T_save); \ emith_tst_r_imm(sr, T); \ @@ -2861,7 +2846,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) if (!tcache_id) // can safely link from cpu-local to global memory dr_link_blocks(entry, sh2->is_slave?2:1); - v = find_in_array(branch_target_pc, branch_target_count, pc); + v = find_in_sorted_array(branch_target_pc, branch_target_count, pc); if (v >= 0) branch_target_ptr[v] = tcache_ptr; #if LOOP_DETECTION @@ -2870,14 +2855,15 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) drcf.polling = (drcf.loop_type == OF_POLL_LOOP ? MF_POLLING : 0); #endif -#if DRC_DEBUG +#if (DRC_DEBUG & ~7) // must update PC emit_move_r_imm32(SHR_PC, pc); #endif rcache_clean(); #if (DRC_DEBUG & 0x10) - rcache_get_reg_arg(0, SHR_PC, NULL); + tmp = rcache_get_tmp_arg(0); + emith_move_r_imm(tmp, pc); tmp = emit_memhandler_read(1); tmp2 = rcache_get_tmp(); tmp3 = rcache_get_tmp(); @@ -2896,7 +2882,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) tmp = rcache_get_tmp_arg(0); sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); emith_cmp_r_imm(sr, 0); - emith_move_r_imm(tmp, pc); + emith_move_r_imm_c(DCOND_LE, tmp, pc); emith_jump_cond(DCOND_LE, sh2_drc_exit); rcache_free_tmp(tmp); @@ -3104,7 +3090,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_clear_msb(tmp, tmp2, 22); emit_memhandler_write_rr(sh2, SHR_TMP, SHR_SP, 0, 2 | MF_PREDECR); // push PC - if (op == OP_TRAPA) { + if (opd->op == OP_TRAPA) { tmp = rcache_get_tmp_arg(1); emith_move_r_imm(tmp, pc); } else if (drcf.pending_branch_indirect) { @@ -3113,7 +3099,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) tmp = rcache_get_tmp_arg(1); emith_move_r_imm(tmp, pc - 2); } - emith_move_r_imm(tmp, pc); emit_memhandler_write_rr(sh2, SHR_TMP, SHR_SP, 0, 2 | MF_PREDECR); // obtain new PC emit_memhandler_read_rr(sh2, SHR_PC, SHR_VBR, opd->imm * 4, 2); @@ -3613,12 +3598,12 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) default: goto default_; } - tmp3 = rcache_get_reg_arg(1, tmp, &tmp4); if (tmp == SHR_SR) { + tmp3 = rcache_get_reg_arg(1, tmp, &tmp4); emith_sync_t(tmp4); emith_clear_msb(tmp3, tmp4, 22); // reserved bits defined by ISA as 0 - } else if (tmp3 != tmp4) - emith_move_r_r(tmp3, tmp4); + } else + tmp3 = rcache_get_reg_arg(1, tmp, NULL); emit_memhandler_write_rr(sh2, SHR_TMP, GET_Rn(), 0, 2 | MF_PREDECR); goto end_op; case 0x04: @@ -4050,7 +4035,7 @@ end_op: // no modification of host status/flags between here and branching! #if LINK_BRANCHES - v = find_in_array(branch_target_pc, branch_target_count, target_pc); + v = find_in_sorted_array(branch_target_pc, branch_target_count, target_pc); if (v >= 0) { // local branch @@ -4151,7 +4136,7 @@ end_op: { void *target; - s32 tmp = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + tmp = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); FLUSH_CYCLES(tmp); emith_sync_t(tmp); @@ -4172,7 +4157,7 @@ end_op: for (i = 0; i < branch_patch_count; i++) { void *target; int t; - t = find_in_array(branch_target_pc, branch_target_count, branch_patch_pc[i]); + t = find_in_sorted_array(branch_target_pc, branch_target_count, branch_patch_pc[i]); target = branch_target_ptr[t]; if (target == NULL) { // flush pc and go back to dispatcher (this should no longer happen) @@ -4256,8 +4241,8 @@ static void sh2_generate_utils(void) emith_sh2_rcall(arg0, arg1, arg2, arg3); EMITH_SJMP_START(DCOND_CS); emith_and_r_r_c(DCOND_CC, arg0, arg3); - emith_eor_r_imm_c(DCOND_CC, arg0, 1); - emith_read8s_r_r_r_c(DCOND_CC, RET_REG, arg0, arg2); + emith_eor_r_imm_ptr_c(DCOND_CC, arg0, 1); + emith_read8s_r_r_r_c(DCOND_CC, RET_REG, arg2, arg0); emith_ret_c(DCOND_CC); EMITH_SJMP_END(DCOND_CS); emith_move_r_r_ptr(arg1, CONTEXT_REG); @@ -4270,7 +4255,7 @@ static void sh2_generate_utils(void) emith_sh2_rcall(arg0, arg1, arg2, arg3); EMITH_SJMP_START(DCOND_CS); emith_and_r_r_c(DCOND_CC, arg0, arg3); - emith_read16s_r_r_r_c(DCOND_CC, RET_REG, arg0, arg2); + emith_read16s_r_r_r_c(DCOND_CC, RET_REG, arg2, arg0); emith_ret_c(DCOND_CC); EMITH_SJMP_END(DCOND_CS); emith_move_r_r_ptr(arg1, CONTEXT_REG); @@ -4283,7 +4268,7 @@ static void sh2_generate_utils(void) emith_sh2_rcall(arg0, arg1, arg2, arg3); EMITH_SJMP_START(DCOND_CS); emith_and_r_r_c(DCOND_CC, arg0, arg3); - emith_read_r_r_r_c(DCOND_CC, RET_REG, arg0, arg2); + emith_read_r_r_r_c(DCOND_CC, RET_REG, arg2, arg0); emith_ror_c(DCOND_CC, RET_REG, RET_REG, 16); emith_ret_c(DCOND_CC); EMITH_SJMP_END(DCOND_CS); @@ -4300,8 +4285,8 @@ static void sh2_generate_utils(void) emith_jump_reg_c(DCOND_CS, arg2); EMITH_SJMP_END(DCOND_CC); emith_and_r_r_r(arg1, arg0, arg3); - emith_eor_r_imm(arg1, 1); - emith_read8s_r_r_r(arg1, arg1, arg2); + emith_eor_r_imm_ptr(arg1, 1); + emith_read8s_r_r_r(arg1, arg2, arg1); emith_push_ret(arg1); emith_move_r_r_ptr(arg2, CONTEXT_REG); emith_call(p32x_sh2_poll_memory8); @@ -4317,7 +4302,7 @@ static void sh2_generate_utils(void) emith_jump_reg_c(DCOND_CS, arg2); EMITH_SJMP_END(DCOND_CC); emith_and_r_r_r(arg1, arg0, arg3); - emith_read16s_r_r_r(arg1, arg1, arg2); + emith_read16s_r_r_r(arg1, arg2, arg1); emith_push_ret(arg1); emith_move_r_r_ptr(arg2, CONTEXT_REG); emith_call(p32x_sh2_poll_memory16); @@ -4333,7 +4318,7 @@ static void sh2_generate_utils(void) emith_jump_reg_c(DCOND_CS, arg2); EMITH_SJMP_END(DCOND_CC); emith_and_r_r_r(arg1, arg0, arg3); - emith_read_r_r_r(arg1, arg1, arg2); + emith_read_r_r_r(arg1, arg2, arg1); emith_ror(arg1, arg1, 16); emith_push_ret(arg1); emith_move_r_r_ptr(arg2, CONTEXT_REG); @@ -4382,13 +4367,13 @@ static void sh2_generate_utils(void) emith_jump_reg_c(DCOND_EQ, RET_REG); EMITH_SJMP_END(DCOND_NE); #endif - emith_ctx_read(arg1, offsetof(SH2, is_slave)); + emith_move_r_r_ptr(arg1, CONTEXT_REG); emith_add_r_r_ptr_imm(arg2, CONTEXT_REG, offsetof(SH2, drc_tmp)); emith_call(dr_lookup_block); -#if BRANCH_CACHE // store PC and block entry ptr (in arg0) in branch target cache emith_tst_r_r_ptr(RET_REG, RET_REG); EMITH_SJMP_START(DCOND_EQ); +#if BRANCH_CACHE #if (DRC_DEBUG & 128) emith_move_r_ptr_imm(arg2, (uptr)&bcmiss); emith_read_r_r_offs_c(DCOND_NE, arg3, arg2, 0); @@ -4400,14 +4385,18 @@ static void sh2_generate_utils(void) emith_add_r_r_r_lsl_ptr(arg1, CONTEXT_REG, arg1, sizeof(void *) == 8 ? 1 : 0); emith_write_r_r_offs_c(DCOND_NE, arg2, arg1, offsetof(SH2, branch_cache)); emith_write_r_r_offs_ptr_c(DCOND_NE, RET_REG, arg1, offsetof(SH2, branch_cache) + sizeof(void *)); - EMITH_SJMP_END(DCOND_EQ); #endif - emit_block_entry(); + emith_jump_reg_c(DCOND_NE, RET_REG); + EMITH_SJMP_END(DCOND_EQ); // lookup failed, call sh2_translate() emith_move_r_r_ptr(arg0, CONTEXT_REG); emith_ctx_read(arg1, offsetof(SH2, drc_tmp)); // tcache_id emith_call(sh2_translate); - emit_block_entry(); +/* just after lookup function, jump to address returned */ + emith_tst_r_r_ptr(RET_REG, RET_REG); + EMITH_SJMP_START(DCOND_EQ); + emith_jump_reg_c(DCOND_NE, RET_REG); + EMITH_SJMP_END(DCOND_EQ); // XXX: can't translate, fail emith_call(dr_failure); emith_flush(); @@ -4486,9 +4475,7 @@ static void sh2_generate_utils(void) emith_call(sh2_drc_read32); if (arg0 != RET_REG) emith_move_r_r(arg0, RET_REG); -#if defined(__i386__) || defined(__x86_64__) - emith_add_r_r_ptr_imm(xSP, xSP, sizeof(void *)); // fix stack -#endif + emith_call_cleanup(); emith_jump(sh2_drc_dispatcher); rcache_invalidate(); emith_flush(); @@ -4581,6 +4568,7 @@ static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 nol return; } +#if LINK_BRANCHES // remove from hash table, make incoming links unresolved if (bd->active) { for (i = 0; i < bd->entry_count; i++) { @@ -4596,8 +4584,10 @@ static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 nol add_to_block_list(&inactive_blocks[tcache_id], bd); } bd->active = 0; +#endif if (free) { +#if LINK_BRANCHES // revoke outgoing links for (bl = bd->entryp[0].o_links; bl != NULL; bl = bl->o_next) { if (bl->target) @@ -4609,6 +4599,7 @@ static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 nol blink_free[bl->tcache_id] = bl; } bd->entryp[0].o_links = NULL; +#endif // invalidate block rm_from_block_lists(bd); bd->addr = bd->size = bd->addr_lit = bd->size_lit = 0; @@ -4619,7 +4610,7 @@ static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 nol static void sh2_smc_rm_blocks(u32 a, int tcache_id, u32 shift) { struct block_list **blist, *entry, *next; - u32 mask = ram_sizes[tcache_id] - 1; + u32 mask = RAM_SIZE(tcache_id) - 1; u32 wtmask = ~0x20000000; // writethrough area mask u32 start_addr, end_addr; u32 start_lit, end_lit; @@ -4722,7 +4713,7 @@ static void block_stats(void) for (i = 0; i < block_counts[b]; i++) if (block_tables[b][i].addr != 0) total += block_tables[b][i].refcount; - for (i = block_limit[b]; i < block_max_counts[b]; i++) + for (i = block_limit[b]; i < BLOCK_MAX_COUNT(b); i++) if (block_tables[b][i].addr != 0) total += block_tables[b][i].refcount; } @@ -4739,7 +4730,7 @@ static void block_stats(void) maxb = blk; } } - for (i = block_limit[b]; i < block_max_counts[b]; i++) { + for (i = block_limit[b]; i < BLOCK_MAX_COUNT(b); i++) { blk = &block_tables[b][i]; if (blk->addr != 0 && blk->refcount > max) { max = blk->refcount; @@ -4757,7 +4748,7 @@ static void block_stats(void) for (b = 0; b < ARRAY_SIZE(block_tables); b++) { for (i = 0; i < block_counts[b]; i++) block_tables[b][i].refcount = 0; - for (i = block_limit[b]; i < block_max_counts[b]; i++) + for (i = block_limit[b]; i < BLOCK_MAX_COUNT(b); i++) block_tables[b][i].refcount = 0; } #endif @@ -4774,7 +4765,7 @@ void entry_stats(void) for (i = 0; i < block_counts[b]; i++) for (j = 0; j < block_tables[b][i].entry_count; j++) total += block_tables[b][i].entryp[j].entry_count; - for (i = block_limit[b]; i < block_max_counts[b]; i++) + for (i = block_limit[b]; i < BLOCK_MAX_COUNT(b); i++) for (j = 0; j < block_tables[b][i].entry_count; j++) total += block_tables[b][i].entryp[j].entry_count; } @@ -4793,7 +4784,7 @@ void entry_stats(void) maxb = &blk->entryp[j]; } } - for (i = block_limit[b]; i < block_max_counts[b]; i++) { + for (i = block_limit[b]; i < BLOCK_MAX_COUNT(b); i++) { blk = &block_tables[b][i]; for (j = 0; j < blk->entry_count; j++) if (blk->entryp[j].entry_count > max) { @@ -4813,7 +4804,7 @@ void entry_stats(void) for (i = 0; i < block_counts[b]; i++) for (j = 0; j < block_tables[b][i].entry_count; j++) block_tables[b][i].entryp[j].entry_count = 0; - for (i = block_limit[b]; i < block_max_counts[b]; i++) + for (i = block_limit[b]; i < BLOCK_MAX_COUNT(b); i++) for (j = 0; j < block_tables[b][i].entry_count; j++) block_tables[b][i].entryp[j].entry_count = 0; } @@ -4871,7 +4862,15 @@ static void bcache_stats(void) for (i = 1; i < ARRAY_SIZE(sh2s->rts_cache); i++) if (sh2s[0].rts_cache[i].pc == -1 && sh2s[1].rts_cache[i].pc == -1) break; - printf("return cache hits:%d misses:%d depth: %d\n", rchit, rcmiss, i); + printf("return cache hits:%d misses:%d depth: %d index: %d/%d\n", rchit, rcmiss, i,sh2s[0].rts_cache_idx,sh2s[1].rts_cache_idx); + for (i = 0; i < ARRAY_SIZE(sh2s[0].rts_cache); i++) { + printf("%08x ",sh2s[0].rts_cache[i].pc); + if ((i+1) % 8 == 0) printf("\n"); + } + for (i = 0; i < ARRAY_SIZE(sh2s[1].rts_cache); i++) { + printf("%08x ",sh2s[1].rts_cache[i].pc); + if ((i+1) % 8 == 0) printf("\n"); + } #endif #if BRANCH_CACHE printf("branch cache hits:%d misses:%d\n", bchit, bcmiss); @@ -4920,31 +4919,31 @@ int sh2_drc_init(SH2 *sh2) if (block_tables[0] == NULL) { for (i = 0; i < TCACHE_BUFFERS; i++) { - block_tables[i] = calloc(block_max_counts[i], sizeof(*block_tables[0])); + block_tables[i] = calloc(BLOCK_MAX_COUNT(i), sizeof(*block_tables[0])); if (block_tables[i] == NULL) goto fail; // max 2 block links (exits) per block - block_link_pool[i] = calloc(block_link_pool_max_counts[i], + block_link_pool[i] = calloc(BLOCK_LINK_MAX_COUNT(i), sizeof(*block_link_pool[0])); if (block_link_pool[i] == NULL) goto fail; - inval_lookup[i] = calloc(ram_sizes[i] / INVAL_PAGE_SIZE, + inval_lookup[i] = calloc(RAM_SIZE(i) / INVAL_PAGE_SIZE, sizeof(inval_lookup[0])); if (inval_lookup[i] == NULL) goto fail; - hash_tables[i] = calloc(hash_table_sizes[i], sizeof(*hash_tables[0])); + hash_tables[i] = calloc(HASH_TABLE_SIZE(i), sizeof(*hash_tables[0])); if (hash_tables[i] == NULL) goto fail; - unresolved_links[i] = calloc(hash_table_sizes[i], sizeof(*unresolved_links[0])); + unresolved_links[i] = calloc(HASH_TABLE_SIZE(i), sizeof(*unresolved_links[0])); if (unresolved_links[i] == NULL) goto fail; } memset(block_counts, 0, sizeof(block_counts)); for (i = 0; i < ARRAY_SIZE(block_counts); i++) { - block_limit[i] = block_max_counts[i] - 1; + block_limit[i] = BLOCK_MAX_COUNT(i) - 1; } memset(block_link_pool_counts, 0, sizeof(block_link_pool_counts)); for (i = 0; i < ARRAY_SIZE(blink_free); i++) { @@ -5044,12 +5043,12 @@ void sh2_drc_finish(SH2 *sh2) static void *dr_get_pc_base(u32 pc, SH2 *sh2) { - void *ret = NULL; + void *ret; u32 mask = 0; ret = p32x_sh2_get_mem_ptr(pc, &mask, sh2); - if (ret == NULL) - return (void *)-1; // NULL is valid value + if (ret == (void *)-1) + return ret; return (char *)ret - (pc & ~mask); } diff --git a/cpu/sh2/compiler.h b/cpu/sh2/compiler.h index b098f6c6..38e47c0b 100644 --- a/cpu/sh2/compiler.h +++ b/cpu/sh2/compiler.h @@ -43,6 +43,7 @@ unsigned short scan_block(unsigned int base_pc, int is_slave, #else #warning "direct DRC register access not available for this host" #endif +#endif #ifdef DRC_SR_REG #define __DRC_DECLARE_SR(SR) register int sh2_sr asm(#SR) @@ -59,4 +60,3 @@ unsigned short scan_block(unsigned int base_pc, int is_slave, #define DRC_SAVE_SR(sh2) #define DRC_RESTORE_SR(sh2) #endif -#endif diff --git a/platform/common/common.mak b/platform/common/common.mak index 5be1521c..331e7124 100644 --- a/platform/common/common.mak +++ b/platform/common/common.mak @@ -171,7 +171,7 @@ DASM = $(R)platform/libpicofe/linux/host_dasm.c DASMLIBS = -lbfd -lopcodes -liberty ifeq "$(ARCH)" "arm" ifeq ($(filter_out $(shell $(CC) --print-file-name=libbfd.so),"/"),) -DASM = $(R)platform/common/host_dasm_arm.c +DASM = $(R)platform/common/host_dasm.c DASMLIBS = endif endif diff --git a/platform/common/host_dasm_arm.c b/platform/common/host_dasm.c similarity index 88% rename from platform/common/host_dasm_arm.c rename to platform/common/host_dasm.c index 7951b7d9..d0537ef6 100644 --- a/platform/common/host_dasm_arm.c +++ b/platform/common/host_dasm.c @@ -1,9 +1,15 @@ #include #include +#include #include +#ifdef __mips__ +#include "dismips.c" +#define disasm dismips +#else #include "disarm.c" - +#define disasm disarm +#endif /* symbols */ typedef struct { const char *name; void *value; } asymbol; @@ -40,7 +46,8 @@ void host_dasm(void *addr, int len) insn = *(long *)addr; printf(" %08lx %08lx ", (long)addr, insn); - if(disarm((unsigned)addr, insn, buf, sizeof(buf))) { + if(disasm((unsigned)addr, insn, buf, sizeof(buf))) + { symaddr = 0; if ((insn & 0xe000000) == 0xa000000) { symaddr = (long)addr + 8 + ((long)(insn << 8) >> 6); @@ -53,7 +60,7 @@ void host_dasm(void *addr, int len) else printf("%s\n", buf); } else - printf("unknown\n"); + printf("unknown (0x%08lx)\n", insn); addr = (char *)addr + sizeof(long); } } From 1747b6712da0238f7a9c9b6a4bc08337e3e4aa71 Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 25 Jun 2019 20:24:11 +0200 Subject: [PATCH 0205/1110] SH2 drc: register cache overhaul (bugfixing, speed, readability) --- cpu/sh2/compiler.c | 1188 ++++++++++++++++++++++++-------------------- 1 file changed, 662 insertions(+), 526 deletions(-) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index c1ba3f32..3b03d0c2 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -60,6 +60,7 @@ // 08 - runtime block entry log // 10 - smc self-check // 20 - runtime block entry counter +// 40 - rcache checking // 80 - branch cache statistics // 100 - write trace // 200 - compare trace @@ -67,7 +68,7 @@ // 800 - state dump on exit // { #ifndef DRC_DEBUG -#define DRC_DEBUG 0x0 +#define DRC_DEBUG 0//x8e7 #endif #if DRC_DEBUG @@ -152,13 +153,17 @@ enum op_types { OP_UNDEFINED, }; -#define OP_ISBRANCH(op) (BITRANGE(OP_BRANCH, OP_BRANCH_RF) & BITMASK1(op)) +// XXX consider trap insns: OP_TRAPA, OP_UNDEFINED? +#define OP_ISBRANCH(op) ((BITRANGE(OP_BRANCH, OP_BRANCH_RF)| BITMASK1(OP_RTE)) \ + & BITMASK1(op)) #define OP_ISBRAUC(op) (BITMASK4(OP_BRANCH, OP_BRANCH_R, OP_BRANCH_RF, OP_RTE) \ & BITMASK1(op)) -#define OP_ISBRACND(op) (BITMASK2(OP_BRANCH_CT, OP_BRANCH_CF) & BITMASK1(op)) +#define OP_ISBRACND(op) (BITMASK3(OP_BRANCH_CT, OP_BRANCH_CF, OP_BRANCH_N) \ + & BITMASK1(op)) #define OP_ISBRAIMM(op) (BITMASK3(OP_BRANCH, OP_BRANCH_CT, OP_BRANCH_CF) \ - & BITMASK1(op)) -#define OP_ISBRAIND(op) (BITMASK2(OP_BRANCH_R, OP_BRANCH_RF) & BITMASK1(op)) + & BITMASK1(op)) +#define OP_ISBRAIND(op) (BITMASK3(OP_BRANCH_R, OP_BRANCH_RF, OP_RTE) \ + & BITMASK1(op)) #ifdef DRC_SH2 @@ -192,7 +197,9 @@ static char sh2dasm_buff[64]; } #if (DRC_DEBUG & (8|256|512|1024)) || defined(PDB) +#if (DRC_DEBUG & (256|512|1024)) static SH2 csh2[2][8]; +#endif static void REGPARM(3) *sh2_drc_log_entry(void *block, SH2 *sh2, u32 sr) { if (block != NULL) { @@ -386,7 +393,8 @@ enum { typedef struct { u8 hreg; // "host" reg u8 flags:4; // TEMP or REG? - u8 type:4; + u8 type:2; // CACHED or TEMP? + u8 ref:2; // ref counter u16 stamp; // kind of a timestamp u32 gregs; // "guest" reg mask } cache_reg_t; @@ -415,8 +423,9 @@ static void rcache_free_tmp(int hr); // used by handlers in worst case (currently 4). // Register assignment goes by ABI convention. Caller save registers are TEMP, // the others are either static or REG. SR must be static, R0 very recommended. +// VBR, PC, PR must not be static (read from context in utils). // TEMP registers first, REG last. alloc/evict algorithm depends on this. -// The 1st TEMP must not be RET_REG on x86 (it uses temps for some insns). +// The 1st TEMP must not be RET_REG on platforms using temps in insns (eg. x86). // XXX shouldn't this be somehow defined in the code emitters? #ifdef __arm__ #include "../drc/emit_arm.c" @@ -438,7 +447,7 @@ static guest_reg_t guest_regs[] = { }; // OABI/EABI: params: r0-r3, return: r0-r1, temp: r12,r14, saved: r4-r8,r10,r11 -// SP,PC: r13,r15 must not be used. saved: r9 (for platform use, e.g. on OSx) +// SP,PC: r13,r15 must not be used. saved: r9 (for platform use, e.g. on ios) static cache_reg_t cache_regs[] = { { 12, HRF_TEMP }, // temps { 14, HRF_TEMP }, @@ -1216,26 +1225,75 @@ typedef struct { gconst_t gconsts[ARRAY_SIZE(guest_regs)]; static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr); +static inline int rcache_is_cached(sh2_reg_e r); static void rcache_add_vreg_alias(int x, sh2_reg_e r); static void rcache_remove_vreg_alias(int x, sh2_reg_e r); +static void rcache_evict_vreg(int x); +static void rcache_remap_vreg(int x); #define RCACHE_DUMP(msg) { \ cache_reg_t *cp; \ guest_reg_t *gp; \ int i; \ printf("cache dump %s:\n",msg); \ - printf("cache_regs:\n"); \ + printf(" cache_regs:\n"); \ for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { \ cp = &cache_regs[i]; \ - if (cp->type != HR_FREE || cp->gregs) \ - printf("%d: hr=%d t=%d f=%x m=%x\n", i, cp->hreg, cp->type, cp->flags, cp->gregs); \ + if (cp->type != HR_FREE || cp->gregs || (cp->flags & ~(HRF_REG|HRF_TEMP))) \ + printf(" %d: hr=%d t=%d f=%x c=%d m=%x\n", i, cp->hreg, cp->type, cp->flags, cp->ref, cp->gregs); \ } \ - printf("guest_regs:\n"); \ + printf(" guest_regs:\n"); \ for (i = 0; i < ARRAY_SIZE(guest_regs); i++) { \ gp = &guest_regs[i]; \ - if (gp->vreg != -1 || gp->sreg >= 0) \ - printf("%d: v=%d f=%x s=%d\n", i, gp->vreg, gp->flags, gp->sreg); \ + if (gp->vreg != -1 || gp->sreg >= 0 || gp->flags) \ + printf(" %d: v=%d f=%x s=%d c=%d\n", i, gp->vreg, gp->flags, gp->sreg, gp->cnst); \ } \ + printf(" gconsts:\n"); \ + for (i = 0; i < ARRAY_SIZE(gconsts); i++) { \ + if (gconsts[i].gregs) \ + printf(" %d: m=%x v=%x\n", i, gconsts[i].gregs, gconsts[i].val); \ + } \ +} + +#define RCACHE_CHECK(msg) { \ + cache_reg_t *cp; \ + guest_reg_t *gp; \ + int i, x, d = 0; \ + for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { \ + cp = &cache_regs[i]; \ + if (cp->type == HR_FREE || cp->type == HR_TEMP) continue; \ + /* check connectivity greg->vreg */ \ + FOR_ALL_BITS_SET_DO(cp->gregs, x, \ + if (guest_regs[x].vreg != i) \ + { d = 1; printf("cache check v=%d r=%d not connected?\n",i,x); } \ + ) \ + } \ + for (i = 0; i < ARRAY_SIZE(guest_regs); i++) { \ + gp = &guest_regs[i]; \ + if (gp->vreg != -1 && !(cache_regs[gp->vreg].gregs & (1 << i))) \ + { d = 1; printf("cache check r=%d v=%d not connected?\n", i, gp->vreg); }\ + if (gp->vreg != -1 && cache_regs[gp->vreg].type != HR_STATIC && cache_regs[gp->vreg].type != HR_CACHED) \ + { d = 1; printf("cache check r=%d v=%d wrong type?\n", i, gp->vreg); }\ + if ((gp->flags & GRF_CONST) && !(gconsts[gp->cnst].gregs & (1 << i))) \ + { d = 1; printf("cache check r=%d c=%d not connected?\n", i, gp->cnst); }\ + if ((gp->flags & GRF_CDIRTY) && (gp->vreg != -1 || !(gp->flags & GRF_CONST)) )\ + { d = 1; printf("cache check r=%d CDIRTY?\n", i); } \ + } \ + for (i = 0; i < ARRAY_SIZE(gconsts); i++) { \ + FOR_ALL_BITS_SET_DO(gconsts[i].gregs, x, \ + if (guest_regs[x].cnst != i || !(guest_regs[x].flags & GRF_CONST)) \ + { d = 1; printf("cache check c=%d v=%d not connected?\n",i,x); } \ + ) \ + } \ + if (d) RCACHE_DUMP(msg) \ +/* else { \ + printf("locked regs %s:\n",msg); \ + for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { \ + cp = &cache_regs[i]; \ + if (cp->flags & HRF_LOCKED) \ + printf(" %d: hr=%d t=%d f=%x c=%d m=%x\n", i, cp->hreg, cp->type, cp->flags, cp->ref, cp->gregs); \ + } \ + } */ \ } #if PROPAGATE_CONSTANTS @@ -1244,15 +1302,16 @@ static inline int gconst_alloc(sh2_reg_e r) int i, n = -1; for (i = 0; i < ARRAY_SIZE(gconsts); i++) { - if (gconsts[i].gregs & (1 << r)) - gconsts[i].gregs &= ~(1 << r); + gconsts[i].gregs &= ~(1 << r); if (gconsts[i].gregs == 0 && n < 0) n = i; } if (n >= 0) gconsts[n].gregs = (1 << r); - else + else { + printf("all gconst buffers in use, aborting\n"); exit(1); // cannot happen - more constants than guest regs? + } return n; } @@ -1274,19 +1333,6 @@ static void gconst_new(sh2_reg_e r, u32 val) if (guest_regs[r].vreg >= 0) rcache_remove_vreg_alias(guest_regs[r].vreg, r); } - -static void gconst_copy(sh2_reg_e rd, sh2_reg_e rs) -{ - if (guest_regs[rd].flags & GRF_CONST) { - guest_regs[rd].flags &= ~(GRF_CONST|GRF_CDIRTY); - gconsts[guest_regs[rd].cnst].gregs &= ~(1 << rd); - } - if (guest_regs[rs].flags & GRF_CONST) { - guest_regs[rd].flags |= GRF_CONST; - guest_regs[rd].cnst = guest_regs[rs].cnst; - gconsts[guest_regs[rd].cnst].gregs |= (1 << rd); - } -} #endif static int gconst_get(sh2_reg_e r, u32 *val) @@ -1310,17 +1356,22 @@ static int gconst_check(sh2_reg_e r) static int gconst_try_read(int vreg, sh2_reg_e r) { int i, x; + if (guest_regs[r].flags & GRF_CDIRTY) { x = guest_regs[r].cnst; emith_move_r_imm(cache_regs[vreg].hreg, gconsts[x].val); FOR_ALL_BITS_SET_DO(gconsts[x].gregs, i, { - if (guest_regs[i].vreg >= 0 && i != r) + if (guest_regs[i].vreg >= 0 && guest_regs[i].vreg != vreg) rcache_remove_vreg_alias(guest_regs[i].vreg, i); - rcache_add_vreg_alias(vreg, i); + if (guest_regs[i].vreg < 0) + rcache_add_vreg_alias(vreg, i); guest_regs[i].flags &= ~GRF_CDIRTY; guest_regs[i].flags |= GRF_DIRTY; }); + if (cache_regs[vreg].type != HR_STATIC) + cache_regs[vreg].type = HR_CACHED; + cache_regs[vreg].flags |= HRF_DIRTY; return 1; } return 0; @@ -1339,11 +1390,23 @@ static u32 gconst_dirty_mask(void) static void gconst_kill(sh2_reg_e r) { - if (guest_regs[r].flags &= ~(GRF_CONST|GRF_CDIRTY)) + if (guest_regs[r].flags & (GRF_CONST|GRF_CDIRTY)) gconsts[guest_regs[r].cnst].gregs &= ~(1 << r); guest_regs[r].flags &= ~(GRF_CONST|GRF_CDIRTY); } +static void gconst_copy(sh2_reg_e rd, sh2_reg_e rs) +{ + gconst_kill(rd); + if (guest_regs[rs].flags & GRF_CONST) { + guest_regs[rd].flags |= GRF_CONST; + if (guest_regs[rd].vreg < 0) + guest_regs[rd].flags |= GRF_CDIRTY; + guest_regs[rd].cnst = guest_regs[rs].cnst; + gconsts[guest_regs[rd].cnst].gregs |= (1 << rd); + } +} + static void gconst_clean(void) { int i; @@ -1367,25 +1430,76 @@ static void gconst_invalidate(void) } } + static u16 rcache_counter; -static u32 rcache_static; -static u32 rcache_locked; -static u32 rcache_hint_soon; -static u32 rcache_hint_late; -static u32 rcache_hint_write; -static u32 rcache_hint_clean; -#define rcache_hint (rcache_hint_soon|rcache_hint_late) +// SH2 register usage bitmasks +static u32 rcache_regs_static; // statically allocated regs +static u32 rcache_regs_now; // regs used in current insn +static u32 rcache_regs_soon; // regs used in the next few insns +static u32 rcache_regs_late; // regs used in later insns +static u32 rcache_regs_discard; // regs overwritten without being used +static u32 rcache_regs_clean; // regs needing cleaning +// combination masks XXX this seems obscure +#define rcache_regs_used (rcache_regs_soon|rcache_regs_late|rcache_regs_clean) +#define rcache_regs_nowused (rcache_regs_now|rcache_regs_used) +#define rcache_regs_nowsoon (rcache_regs_now|rcache_regs_soon) +#define rcache_regs_soonclean (rcache_regs_soon|rcache_regs_clean) + +static void rcache_ref_vreg(int x) +{ + if (x >= 0) { + cache_regs[x].ref ++; + cache_regs[x].flags |= HRF_LOCKED; + } +} + +static void rcache_unref_vreg(int x) +{ + if (x >= 0 && -- cache_regs[x].ref == 0) { + cache_regs[x].flags &= ~HRF_LOCKED; + } +} + +static void rcache_free_vreg(int x) +{ + if (cache_regs[x].type != HR_STATIC) + cache_regs[x].type = HR_FREE; + cache_regs[x].flags &= (HRF_REG|HRF_TEMP); + cache_regs[x].gregs = 0; + cache_regs[x].ref = 0; +} static void rcache_unmap_vreg(int x) { int i; FOR_ALL_BITS_SET_DO(cache_regs[x].gregs, i, + if (guest_regs[i].flags & GRF_DIRTY) { + // if a dirty reg is unmapped save its value to context + if (~rcache_regs_discard & (1 << i)) + emith_ctx_write(cache_regs[x].hreg, i * 4); + guest_regs[i].flags &= ~GRF_DIRTY; + } guest_regs[i].vreg = -1); - if (cache_regs[x].type != HR_STATIC) - cache_regs[x].type = HR_FREE; - cache_regs[x].gregs = 0; - cache_regs[x].flags &= (HRF_REG|HRF_TEMP); + rcache_free_vreg(x); +} + +static void rcache_move_vreg(int d, int x) +{ + int i; + + if (cache_regs[d].type != HR_STATIC) + cache_regs[d].type = HR_CACHED; + cache_regs[d].gregs = cache_regs[x].gregs; + cache_regs[d].flags &= (HRF_TEMP|HRF_REG); + cache_regs[d].flags |= cache_regs[x].flags & ~(HRF_TEMP|HRF_REG); + cache_regs[d].ref = 0; + cache_regs[d].stamp = cache_regs[x].stamp; + emith_move_r_r(cache_regs[d].hreg, cache_regs[x].hreg); + for (i = 0; i < ARRAY_SIZE(guest_regs); i++) + if (guest_regs[i].vreg == x) + guest_regs[i].vreg = d; + rcache_free_vreg(x); } static void rcache_clean_vreg(int x) @@ -1394,99 +1508,112 @@ static void rcache_clean_vreg(int x) if (cache_regs[x].flags & HRF_DIRTY) { // writeback cache_regs[x].flags &= ~HRF_DIRTY; + rcache_ref_vreg(x); FOR_ALL_BITS_SET_DO(cache_regs[x].gregs, r, if (guest_regs[r].flags & GRF_DIRTY) { if (guest_regs[r].flags & GRF_STATIC) { if (guest_regs[r].vreg != guest_regs[r].sreg) { if (!(cache_regs[guest_regs[r].sreg].flags & HRF_LOCKED)) { // statically mapped reg not in its sreg. move back to sreg - rcache_clean_vreg(guest_regs[r].sreg); - rcache_unmap_vreg(guest_regs[r].sreg); - emith_move_r_r(cache_regs[guest_regs[r].sreg].hreg, cache_regs[guest_regs[r].vreg].hreg); + rcache_evict_vreg(guest_regs[r].sreg); + emith_move_r_r(cache_regs[guest_regs[r].sreg].hreg, + cache_regs[guest_regs[r].vreg].hreg); rcache_remove_vreg_alias(x, r); rcache_add_vreg_alias(guest_regs[r].sreg, r); cache_regs[guest_regs[r].sreg].flags |= HRF_DIRTY; } else { // must evict since sreg is locked - emith_ctx_write(cache_regs[x].hreg, r * 4); + if (~rcache_regs_discard & (1 << r)) + emith_ctx_write(cache_regs[x].hreg, r * 4); guest_regs[r].flags &= ~GRF_DIRTY; - guest_regs[r].vreg = -1; + rcache_remove_vreg_alias(x, r); } - } - } else if (~rcache_hint_write & (1 << r)) { - emith_ctx_write(cache_regs[x].hreg, r * 4); + } else + cache_regs[x].flags |= HRF_DIRTY; + } else { + if (~rcache_regs_discard & (1 << r)) + emith_ctx_write(cache_regs[x].hreg, r * 4); guest_regs[r].flags &= ~GRF_DIRTY; } + rcache_regs_clean &= ~(1 << r); }) + rcache_unref_vreg(x); } +#if DRC_DEBUG & 64 + RCACHE_CHECK("after clean"); +#endif } static void rcache_add_vreg_alias(int x, sh2_reg_e r) { cache_regs[x].gregs |= (1 << r); guest_regs[r].vreg = x; + if (cache_regs[x].type != HR_STATIC) + cache_regs[x].type = HR_CACHED; } static void rcache_remove_vreg_alias(int x, sh2_reg_e r) { cache_regs[x].gregs &= ~(1 << r); - if (!cache_regs[x].gregs) { + if (!cache_regs[x].gregs) // no reg mapped -> free vreg - if (cache_regs[x].type != HR_STATIC) - cache_regs[x].type = HR_FREE; - cache_regs[x].flags &= (HRF_REG|HRF_TEMP); - } + rcache_free_vreg(x); guest_regs[r].vreg = -1; } static void rcache_evict_vreg(int x) { +#if REMAP_REGISTER + rcache_remap_vreg(x); +#else rcache_clean_vreg(x); +#endif rcache_unmap_vreg(x); } static void rcache_evict_vreg_aliases(int x, sh2_reg_e r) { - cache_regs[x].gregs &= ~(1 << r); + rcache_remove_vreg_alias(x, r); rcache_evict_vreg(x); - cache_regs[x].gregs = (1 << r); - if (cache_regs[x].type != HR_STATIC) - cache_regs[x].type = HR_CACHED; - if (guest_regs[r].flags & GRF_DIRTY) - cache_regs[x].flags |= HRF_DIRTY; + rcache_add_vreg_alias(x, r); } -static cache_reg_t *rcache_evict(void) +static int rcache_allocate(int what, int minprio) { // evict reg with oldest stamp (only for HRF_REG, no temps) int i, i_prio, oldest = -1, prio = 0; u16 min_stamp = (u16)-1; for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { - // consider only unlocked REG - if (!(cache_regs[i].flags & HRF_REG) || (cache_regs[i].flags & HRF_LOCKED)) + // consider only unlocked REG or non-TEMP + if (cache_regs[i].flags == 0 || (cache_regs[i].flags & HRF_LOCKED)) continue; - if (cache_regs[i].type == HR_FREE || (cache_regs[i].type == HR_TEMP)) { + if ((what > 0 && !(cache_regs[i].flags & HRF_REG)) || + (what == 0 && (cache_regs[i].flags & HRF_TEMP)) || + (what < 0 && !(cache_regs[i].flags & HRF_TEMP))) + continue; + if (cache_regs[i].type == HR_FREE || cache_regs[i].type == HR_TEMP) { + // REG is free + prio = 6; oldest = i; break; } if (cache_regs[i].type == HR_CACHED) { - if (rcache_locked & cache_regs[i].gregs) + if (rcache_regs_now & cache_regs[i].gregs) // REGs needed for the current insn i_prio = 1; - else if (rcache_hint_soon & cache_regs[i].gregs) - // REGs needed in some future insn + else if (rcache_regs_soon & cache_regs[i].gregs) + // REGs needed in the next insns i_prio = 2; - else if (rcache_hint_late & cache_regs[i].gregs) + else if (rcache_regs_late & cache_regs[i].gregs) // REGs needed in some future insn i_prio = 3; - else if ((rcache_hint_write & cache_regs[i].gregs) != cache_regs[i].gregs) - // REGs not needed soon + else if (!(~rcache_regs_discard & cache_regs[i].gregs)) + // REGs not needed in the foreseeable future i_prio = 4; else // REGs soon overwritten anyway i_prio = 5; - if (prio < i_prio || (prio == i_prio && cache_regs[i].stamp < min_stamp)) { min_stamp = cache_regs[i].stamp; oldest = i; @@ -1495,25 +1622,66 @@ static cache_reg_t *rcache_evict(void) } } - if (oldest == -1) { - printf("no registers to evict, aborting\n"); - exit(1); - } + + if (prio < minprio || oldest == -1) + return -1; if (cache_regs[oldest].type == HR_CACHED) rcache_evict_vreg(oldest); - cache_regs[oldest].type = HR_FREE; - cache_regs[oldest].flags &= (HRF_TEMP|HRF_REG); - cache_regs[oldest].gregs = 0; + else + rcache_free_vreg(oldest); - return &cache_regs[oldest]; + return oldest; +} + +static int rcache_allocate_vreg(int needed) +{ + int x; + + // get a free reg, but use temps only if r is not needed soon + for (x = ARRAY_SIZE(cache_regs) - 1; x >= 0; x--) { + if (cache_regs[x].flags && (cache_regs[x].type == HR_FREE || + (cache_regs[x].type == HR_TEMP && !(cache_regs[x].flags & HRF_LOCKED))) && + (!needed || (cache_regs[x].flags & HRF_REG))) + break; + } + + if (x < 0) + x = rcache_allocate(1, 0); + return x; +} + +static int rcache_allocate_nontemp(void) +{ + int x = rcache_allocate(0, 3); + return x; +} + +static int rcache_allocate_temp(void) +{ + int x; + + // use any free reg, but prefer TEMP regs + for (x = 0; x < ARRAY_SIZE(cache_regs); x++) { + if (cache_regs[x].flags && (cache_regs[x].type == HR_FREE || + (cache_regs[x].type == HR_TEMP && !(cache_regs[x].flags & HRF_LOCKED)))) + break; + } + + if (x >= ARRAY_SIZE(cache_regs)) + x = rcache_allocate(-1, 1); + if (x < 0) { + printf("no temp register available, aborting\n"); + exit(1); + } + return x; } #if REMAP_REGISTER // maps a host register to a REG static int rcache_map_reg(sh2_reg_e r, int hr, int mode) { - int i; + int x, i; gconst_kill(r); @@ -1527,11 +1695,13 @@ static int rcache_map_reg(sh2_reg_e r, int hr, int mode) // deal with statically mapped regs if (mode == RC_GR_RMW && (guest_regs[r].flags & GRF_STATIC)) { - if (guest_regs[r].vreg == guest_regs[r].sreg) { + x = guest_regs[r].sreg; + if (guest_regs[r].vreg == x) { // STATIC in its sreg with no aliases, and some processing pending - if (cache_regs[guest_regs[r].vreg].gregs == 1 << r) - return cache_regs[guest_regs[r].vreg].hreg; - } else if (!cache_regs[guest_regs[r].sreg].gregs) + if (cache_regs[x].gregs == 1 << r) + return cache_regs[x].hreg; + } else if (cache_regs[x].type == HR_FREE || + (cache_regs[x].type == HR_TEMP && !(cache_regs[x].flags & HRF_LOCKED))) // STATIC not in its sreg, with sreg available -> move it i = guest_regs[r].sreg; } @@ -1540,187 +1710,184 @@ static int rcache_map_reg(sh2_reg_e r, int hr, int mode) if (guest_regs[r].vreg >= 0) rcache_remove_vreg_alias(guest_regs[r].vreg, r); if (cache_regs[i].type == HR_CACHED) - rcache_unmap_vreg(i); + rcache_evict_vreg(i); // set new mappping if (cache_regs[i].type != HR_STATIC) cache_regs[i].type = HR_CACHED; cache_regs[i].gregs = 1 << r; cache_regs[i].flags &= (HRF_TEMP|HRF_REG); + cache_regs[i].ref = 0; cache_regs[i].stamp = ++rcache_counter; - cache_regs[i].flags |= HRF_DIRTY|HRF_LOCKED; + cache_regs[i].flags |= HRF_DIRTY; + rcache_ref_vreg(i); guest_regs[r].flags |= GRF_DIRTY; guest_regs[r].vreg = i; +#if DRC_DEBUG & 64 + RCACHE_CHECK("after map"); +#endif return cache_regs[i].hreg; } -// remap vreg from a TEMP to a REG if it is hinted (upcoming TEMP invalidation) -static void rcache_remap_vreg(int r) +// remap vreg from a TEMP to a REG if it will be used (upcoming TEMP invalidation) +static void rcache_remap_vreg(int x) { - int i, j, free = -1, cached = -1, hinted = -1; - u16 min_stamp_cached = (u16)-1, min_stamp_hinted = -1; + int d; - // r must be a vreg - if (cache_regs[r].type != HR_CACHED) + // x must be a cached vreg + if (cache_regs[x].type != HR_CACHED && cache_regs[x].type != HR_STATIC) return; - // if r is already a REG or isn't used, clean here to avoid data loss on inval - if ((cache_regs[r].flags & HRF_REG) || !(rcache_hint & cache_regs[r].gregs)) { - rcache_clean_vreg(r); + // don't do it if x is already a REG or isn't used or to be cleaned anyway + if ((cache_regs[x].flags & HRF_REG) || + !(rcache_regs_used & ~rcache_regs_clean & cache_regs[x].gregs)) { + // clean here to avoid data loss on invalidation + rcache_clean_vreg(x); return; } - // find REG, either free or unused temp or oldest cached - for (i = 0; i < ARRAY_SIZE(cache_regs) && free < 0; i++) { - if ((cache_regs[i].flags & HRF_TEMP) || (cache_regs[i].flags & HRF_LOCKED)) - continue; - if (cache_regs[i].type == HR_FREE || cache_regs[i].type == HR_TEMP) - free = i; - if (cache_regs[i].type == HR_CACHED && !(rcache_hint & cache_regs[i].gregs)) { - if (cache_regs[i].stamp < min_stamp_cached) { - min_stamp_cached = cache_regs[i].stamp; - cached = i; - } + if (cache_regs[x].flags & HRF_LOCKED) { + printf("remap vreg %d is locked\n", x); + exit(1); + } + + // allocate a non-TEMP vreg + rcache_ref_vreg(x); // lock to avoid evicting x + d = rcache_allocate_nontemp(); + rcache_unref_vreg(x); + if (d < 0) { + rcache_clean_vreg(x); + return; + } + + // move vreg to new location + rcache_move_vreg(d, x); +#if DRC_DEBUG & 64 + RCACHE_CHECK("after remap"); +#endif +} +#endif + +#if ALIAS_REGISTERS +static void rcache_alias_vreg(sh2_reg_e rd, sh2_reg_e rs) +{ + int x; + + // if s isn't constant, it must be in cache for aliasing + if (!gconst_check(rs)) + rcache_get_reg_(rs, RC_GR_READ, 0, NULL); + + // if d and s are not already aliased + x = guest_regs[rs].vreg; + if (guest_regs[rd].vreg != x) { + // remove possible old mapping of dst + if (guest_regs[rd].vreg >= 0) + rcache_remove_vreg_alias(guest_regs[rd].vreg, rd); + // make dst an alias of src + if (x >= 0) + rcache_add_vreg_alias(x, rd); + // if d is now in cache, it must be dirty + if (guest_regs[rd].vreg >= 0) { + x = guest_regs[rd].vreg; + cache_regs[x].flags |= HRF_DIRTY; + guest_regs[rd].flags |= GRF_DIRTY; } - if (cache_regs[i].type == HR_CACHED && !(rcache_hint_soon & cache_regs[i].gregs) - && (rcache_hint_soon & cache_regs[r].gregs)) - if (cache_regs[i].stamp < min_stamp_hinted) { - min_stamp_hinted = cache_regs[i].stamp; - hinted = i; - } } - if (free >= 0) { - i = free; - } else if (cached >= 0 && cached != r) { - i = cached; - rcache_evict_vreg(i); - } else if (hinted >= 0 && hinted != r) { - i = hinted; - rcache_evict_vreg(i); - } else { - rcache_clean_vreg(r); - return; - } - - // set new mapping and remove old one - cache_regs[i].type = HR_CACHED; - cache_regs[i].gregs = cache_regs[r].gregs; - cache_regs[i].flags &= (HRF_TEMP|HRF_REG); - cache_regs[i].flags |= cache_regs[r].flags & ~(HRF_TEMP|HRF_REG); - cache_regs[i].stamp = cache_regs[r].stamp; - emith_move_r_r(cache_regs[i].hreg, cache_regs[r].hreg); - for (j = 0; j < ARRAY_SIZE(guest_regs); j++) - if (guest_regs[j].vreg == r) - guest_regs[j].vreg = i; - cache_regs[r].type = HR_FREE; - cache_regs[r].flags &= (HRF_TEMP|HRF_REG); - cache_regs[r].gregs = 0; + gconst_copy(rd, rs); +#if DRC_DEBUG & 64 + RCACHE_CHECK("after alias"); +#endif } #endif // note: must not be called when doing conditional code static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr) { - cache_reg_t *tr = NULL; - int i, h, split = -1; + int src, dst, ali; + cache_reg_t *tr; - rcache_counter++; + dst = src = guest_regs[r].vreg; - // maybe already cached? - // if so, prefer against gconst (they must be in sync) - i = guest_regs[r].vreg; - if ((guest_regs[r].flags & GRF_STATIC) && i != guest_regs[r].sreg && + rcache_ref_vreg(src); // lock to avoid evicting src + // good opportunity to relocate a remapped STATIC? + if ((guest_regs[r].flags & GRF_STATIC) && src != guest_regs[r].sreg && !(cache_regs[guest_regs[r].sreg].flags & HRF_LOCKED) && - (i < 0 || mode != RC_GR_READ) && - !((rcache_hint_soon|rcache_locked) & cache_regs[guest_regs[r].sreg].gregs)) { - // good opportunity to relocate a remapped STATIC - h = guest_regs[r].sreg; - rcache_evict_vreg(h); - tr = &cache_regs[h]; - tr->gregs = 1 << r; - if (i >= 0) { - if (mode != RC_GR_WRITE) { - if (hr) - *hr = cache_regs[i].hreg; - else - emith_move_r_r(cache_regs[h].hreg, cache_regs[i].hreg); - hr = NULL; - } - rcache_remove_vreg_alias(guest_regs[r].vreg, r); - } else if (mode != RC_GR_WRITE) { - if (gconst_try_read(h, r)) { - tr->flags |= HRF_DIRTY; - guest_regs[r].flags |= GRF_DIRTY; - } else - emith_ctx_read(tr->hreg, r * 4); - } - guest_regs[r].vreg = guest_regs[r].sreg; - goto end; - } else if (i >= 0) { - if (mode == RC_GR_READ || !(cache_regs[i].gregs & ~(1 << r))) { - // either only reading, or no multiple mapping - tr = &cache_regs[i]; - goto end; - } - // split if aliases needed rsn, or already locked, or r is STATIC in sreg - if (((rcache_hint|rcache_locked) & cache_regs[i].gregs & ~(1 << r)) || - (cache_regs[i].flags & HRF_LOCKED) || - (cache_regs[i].type == HR_STATIC && !(guest_regs[r].flags & GRF_STATIC))) { - // need to split up. take reg out here to avoid unnecessary writebacks - rcache_remove_vreg_alias(i, r); - split = i; - } else { - // aliases not needed anytime soon, remove them - // XXX split aliases away if writing and static and not locked and hinted? - rcache_evict_vreg_aliases(i, r); - tr = &cache_regs[i]; - goto end; + (src < 0 || mode != RC_GR_READ) && + !(rcache_regs_nowsoon & cache_regs[guest_regs[r].sreg].gregs)) { + dst = guest_regs[r].sreg; + rcache_evict_vreg(dst); + } else if (dst < 0) { + // allocate a cache register + if ((dst = rcache_allocate_vreg(rcache_regs_nowsoon & (1 << r))) < 0) { + printf("no registers to evict, aborting\n"); + exit(1); } } - - // get a free reg, but use temps only if r is not needed soon - for (i = ARRAY_SIZE(cache_regs) - 1; i >= 0; i--) { - if ((cache_regs[i].type == HR_FREE || - (cache_regs[i].type == HR_TEMP && !(cache_regs[i].flags & HRF_LOCKED))) && - (!(rcache_hint & (1 << r)) || (cache_regs[i].flags & HRF_REG))) { - tr = &cache_regs[i]; - break; - } - } - - if (!tr) - tr = rcache_evict(); - - tr->type = HR_CACHED; - tr->gregs = 1 << r; - guest_regs[r].vreg = tr - cache_regs; - - if (mode != RC_GR_WRITE) { - if (gconst_try_read(guest_regs[r].vreg, r)) { - tr->flags |= HRF_DIRTY; - guest_regs[r].flags |= GRF_DIRTY; - } else if (split >= 0) { - if (hr) { - cache_regs[split].flags |= HRF_LOCKED; - *hr = cache_regs[split].hreg; - hr = NULL; - } else if (tr->hreg != cache_regs[split].hreg) - emith_move_r_r(tr->hreg, cache_regs[split].hreg); - } else - emith_ctx_read(tr->hreg, r * 4); - } - -end: - if (hr) - *hr = tr->hreg; - if (do_locking) - tr->flags |= HRF_LOCKED; + tr = &cache_regs[dst]; tr->stamp = rcache_counter; + rcache_unref_vreg(src); + // remove r from src + if (src >= 0 && src != dst) + rcache_remove_vreg_alias(src, r); + + // if r has a constant it may have aliases + if (mode != RC_GR_WRITE && gconst_try_read(dst, r)) + src = dst; + + // if r will be modified, check for aliases being needed rsn + ali = tr->gregs & ~(1 << r); + if (mode != RC_GR_READ && src == dst && ali) { + int x = -1; + if (rcache_regs_nowsoon & ali) { + if (tr->type == HR_STATIC && guest_regs[r].sreg == dst && + !(tr->flags & HRF_LOCKED)) { + // split aliases if r is STATIC in sreg and dst isn't already locked + rcache_ref_vreg(dst); // lock to avoid evicting dst + if ((x = rcache_allocate_vreg(rcache_regs_nowsoon & ali)) >= 0) { + src = x; + rcache_move_vreg(src, dst); + } + rcache_unref_vreg(dst); + } else { + // split r + rcache_ref_vreg(src); // lock to avoid evicting src + if ((x = rcache_allocate_vreg(rcache_regs_nowsoon & (1 << r))) >= 0) { + dst = x; + tr = &cache_regs[dst]; + tr->stamp = rcache_counter; + } + rcache_unref_vreg(src); + } + } + if (x < 0) + // aliases not needed or no vreg available, remove them + rcache_evict_vreg_aliases(dst, r); + else if (src != dst) + rcache_remove_vreg_alias(src, r); + } + + // assign r to dst + rcache_add_vreg_alias(dst, r); + + // handle dst register transfer + if (src < 0 && mode != RC_GR_WRITE) + emith_ctx_read(tr->hreg, r * 4); + if (hr) { + *hr = (src >= 0 ? cache_regs[src].hreg : tr->hreg); + rcache_ref_vreg(reg_map_host[*hr]); + } else if (src >= 0 && cache_regs[src].hreg != tr->hreg) + emith_move_r_r(tr->hreg, cache_regs[src].hreg); + + // housekeeping + if (do_locking) + rcache_ref_vreg(dst); if (mode != RC_GR_READ) { tr->flags |= HRF_DIRTY; guest_regs[r].flags |= GRF_DIRTY; gconst_kill(r); } - +#if DRC_DEBUG & 64 + RCACHE_CHECK("after getreg"); +#endif return tr->hreg; } @@ -1731,38 +1898,25 @@ static int rcache_get_reg(sh2_reg_e r, rc_gr_mode mode, int *hr) static int rcache_get_tmp(void) { - cache_reg_t *tr = NULL; int i; - // use any free reg, but prefer TEMP regs - for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { - if (cache_regs[i].type == HR_FREE || - (cache_regs[i].type == HR_TEMP && !(cache_regs[i].flags & HRF_LOCKED))) { - tr = &cache_regs[i]; - break; - } - } + i = rcache_allocate_temp(); + rcache_ref_vreg(i); - if (!tr) - tr = rcache_evict(); - - tr->type = HR_TEMP; - tr->flags |= HRF_LOCKED; - return tr->hreg; + cache_regs[i].type = HR_TEMP; + return cache_regs[i].hreg; } -static int rcache_get_hr_id(int hr) +static int rcache_get_vreg_hr(int hr) { int i; i = reg_map_host[hr]; - if (i < 0) // can't happen + if (i < 0 || (cache_regs[i].flags & HRF_LOCKED)) { + printf("host register %d is locked\n", hr); exit(1); + } -#if REMAP_REGISTER - if (cache_regs[i].type == HR_CACHED) - rcache_remap_vreg(i); -#endif if (cache_regs[i].type == HR_CACHED) rcache_evict_vreg(i); else if (cache_regs[i].type == HR_TEMP && (cache_regs[i].flags & HRF_LOCKED)) { @@ -1773,167 +1927,110 @@ static int rcache_get_hr_id(int hr) return i; } -static int rcache_get_arg_id(int arg) +static int rcache_get_vreg_arg(int arg) { int hr = 0; host_arg2reg(hr, arg); - return rcache_get_hr_id(hr); + return rcache_get_vreg_hr(hr); } // get a reg to be used as function arg static int rcache_get_tmp_arg(int arg) { - int id = rcache_get_arg_id(arg); - cache_regs[id].type = HR_TEMP; - cache_regs[id].flags |= HRF_LOCKED; + int x = rcache_get_vreg_arg(arg); + cache_regs[x].type = HR_TEMP; + rcache_ref_vreg(x); - return cache_regs[id].hreg; + return cache_regs[x].hreg; } // ... as return value after a call static int rcache_get_tmp_ret(void) { - int id = rcache_get_hr_id(RET_REG); - cache_regs[id].type = HR_TEMP; - cache_regs[id].flags |= HRF_LOCKED; + int x = rcache_get_vreg_hr(RET_REG); + cache_regs[x].type = HR_TEMP; + rcache_ref_vreg(x); - return cache_regs[id].hreg; + return cache_regs[x].hreg; } // same but caches a reg if access is readonly (announced by hr being NULL) static int rcache_get_reg_arg(int arg, sh2_reg_e r, int *hr) { - int i, srcr, dstr, dstid; - int dirty = 0, src_dirty = 0, is_const = 0, is_cached = 0; + int i, srcr, dstr, dstid, keep; u32 val; host_arg2reg(dstr, arg); i = guest_regs[r].vreg; if (i >= 0 && cache_regs[i].type == HR_CACHED && cache_regs[i].hreg == dstr) - // r is already in arg + // r is already in arg, avoid evicting dstid = i; else - dstid = rcache_get_arg_id(arg); + dstid = rcache_get_vreg_arg(arg); dstr = cache_regs[dstid].hreg; - if (rcache_hint & (1 << r)) { + if (rcache_is_cached(r)) { // r is needed later on anyway srcr = rcache_get_reg_(r, RC_GR_READ, 0, NULL); - is_cached = (cache_regs[reg_map_host[srcr]].type == HR_CACHED); - } else if (!(rcache_hint_clean & (1 << r)) && - (guest_regs[r].flags & GRF_CDIRTY) && gconst_get(r, &val)) { + keep = 1; + } else if ((guest_regs[r].flags & GRF_CDIRTY) && gconst_get(r, &val)) { // r has an uncomitted const - load into arg, but keep constant uncomitted srcr = dstr; - is_const = 1; - } else if ((i = guest_regs[r].vreg) >= 0) { - // maybe already cached? - srcr = cache_regs[i].hreg; - is_cached = (cache_regs[reg_map_host[srcr]].type == HR_CACHED); + emith_move_r_imm(srcr, val); + keep = 0; } else { - // must read either const or from ctx + // must read from ctx srcr = dstr; - if (rcache_static & (1 << r)) - srcr = rcache_get_reg_(r, RC_GR_READ, 0, NULL); - else if (gconst_try_read(dstid, r)) - dirty = 1; - else - emith_ctx_read(srcr, r * 4); + emith_ctx_read(srcr, r * 4); + keep = 1; } - if (is_cached) { - i = reg_map_host[srcr]; - if (srcr == dstr) { // evict aliases here since it is reallocated below - if (guest_regs[r].flags & GRF_STATIC) // move STATIC back to its sreg - rcache_clean_vreg(guest_regs[r].vreg); -#if REMAP_REGISTER - rcache_remap_vreg(i); -#endif - if (cache_regs[i].type == HR_CACHED) - rcache_evict_vreg(i); - } - else if (hr != NULL) // must lock srcr if not copied here - cache_regs[i].flags |= HRF_LOCKED; - if (guest_regs[r].flags & GRF_DIRTY) - src_dirty = 1; - } + if (cache_regs[dstid].type == HR_CACHED) + rcache_evict_vreg(dstid); cache_regs[dstid].type = HR_TEMP; - if (is_const) { - // uncomitted constant - emith_move_r_imm(srcr, val); - } else if (dstr != srcr) { - // arg is a copy of cached r - if (hr == NULL) + if (hr == NULL) { + if (dstr != srcr) + // arg is a copy of cached r emith_move_r_r(dstr, srcr); - } else if (hr != NULL) { - // caller will modify arg, so it will soon be out of sync with r - if (dirty || src_dirty) { - if (~rcache_hint_write & (1 << r)) { - emith_ctx_write(dstr, r * 4); // must clean since arg will be modified - guest_regs[r].flags &= ~GRF_DIRTY; - } - } + else if (keep && guest_regs[r].vreg < 0) + // keep arg as vreg for r + rcache_add_vreg_alias(dstid, r); } else { - // keep arg as vreg for r - cache_regs[dstid].type = HR_CACHED; - if (guest_regs[r].vreg < 0) { - cache_regs[dstid].gregs = 1 << r; - guest_regs[r].vreg = dstid; - } - if (dirty || src_dirty) { // mark as modifed for cleaning later on - cache_regs[dstid].flags |= HRF_DIRTY; - guest_regs[r].flags |= GRF_DIRTY; - } + *hr = srcr; + if (dstr != srcr) // must lock srcr if not copied here + rcache_ref_vreg(reg_map_host[srcr]); } - if (hr) - *hr = srcr; - cache_regs[dstid].stamp = ++rcache_counter; - cache_regs[dstid].flags |= HRF_LOCKED; + rcache_ref_vreg(dstid); +#if DRC_DEBUG & 64 + RCACHE_CHECK("after getarg"); +#endif return dstr; } static void rcache_free_tmp(int hr) { int i = reg_map_host[hr]; + if (i < 0 || cache_regs[i].type != HR_TEMP) { printf("rcache_free_tmp fail: #%i hr %d, type %d\n", i, hr, cache_regs[i].type); - return; + exit(1); } - cache_regs[i].type = HR_FREE; - cache_regs[i].flags &= (HRF_REG|HRF_TEMP); + rcache_free_vreg(i); } // saves temporary result either in REG or in drctmp static int rcache_save_tmp(int hr) { - int i, free = -1, cached = -1; - u16 min_stamp = (u16)-1; + int i; // find REG, either free or unlocked temp or oldest non-hinted cached - for (i = 0; i < ARRAY_SIZE(cache_regs) && free < 0; i++) { - if ((cache_regs[i].flags & HRF_TEMP) || (cache_regs[i].flags & HRF_LOCKED)) - continue; - if (cache_regs[i].type == HR_FREE || cache_regs[i].type == HR_TEMP) - free = i; - if (cache_regs[i].type == HR_CACHED && - !((rcache_hint | rcache_locked) & cache_regs[i].gregs)) { - if (cache_regs[i].stamp < min_stamp) { - min_stamp = cache_regs[i].stamp; - cached = i; - } - } - } - - if (free >= 0) - i = free; - else if (cached >= 0) { - i = cached; - rcache_evict_vreg(i); - } else { + i = rcache_allocate_nontemp(); + if (i < 0) { // if none is available, store in drctmp emith_ctx_write(hr, offsetof(SH2, drc_tmp)); rcache_free_tmp(hr); @@ -1943,27 +2040,27 @@ static int rcache_save_tmp(int hr) cache_regs[i].type = HR_CACHED; cache_regs[i].gregs = 0; // not storing any guest register cache_regs[i].flags &= (HRF_TEMP|HRF_REG); - cache_regs[i].flags |= HRF_LOCKED; + cache_regs[i].ref = 0; cache_regs[i].stamp = ++rcache_counter; + rcache_ref_vreg(i); emith_move_r_r(cache_regs[i].hreg, hr); rcache_free_tmp(hr); return i; } -static int rcache_restore_tmp(int r) +static int rcache_restore_tmp(int x) { int hr; // find REG with tmp store: cached but with no gregs - if (r >= 0) { - if (cache_regs[r].type != HR_CACHED || cache_regs[r].gregs) { - printf("invalid tmp storage %d\n", r); + if (x >= 0) { + if (cache_regs[x].type != HR_CACHED || cache_regs[x].gregs) { + printf("invalid tmp storage %d\n", x); exit(1); } // found, transform to a TEMP - cache_regs[r].type = HR_TEMP; - cache_regs[r].flags |= HRF_LOCKED; - return cache_regs[r].hreg; + cache_regs[x].type = HR_TEMP; + return cache_regs[x].hreg; } // if not available, create a TEMP store and fetch from drctmp @@ -1973,51 +2070,57 @@ static int rcache_restore_tmp(int r) return hr; } -static void rcache_unlock(int hr) +static void rcache_free(int hr) { - if (hr >= 0) { - cache_regs[hr].flags &= ~HRF_LOCKED; - rcache_locked &= ~cache_regs[hr].gregs; + int x = reg_map_host[hr]; + if (cache_regs[x].type == HR_TEMP) + rcache_free_tmp(hr); + else + rcache_unref_vreg(x); +} + +static void rcache_unlock(int x) +{ + if (x >= 0) { + cache_regs[x].flags &= ~HRF_LOCKED; + cache_regs[x].ref = 0; +// rcache_regs_now &= ~cache_regs[x].gregs; } } static void rcache_unlock_all(void) { int i; - for (i = 0; i < ARRAY_SIZE(cache_regs); i++) + for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { cache_regs[i].flags &= ~HRF_LOCKED; + cache_regs[i].ref = 0; + } } -static inline void rcache_set_locked(u32 mask) +static inline void rcache_set_usage_now(u32 mask) { - rcache_locked = mask & ~rcache_static; + rcache_regs_now = mask; } -static inline void rcache_set_hint_soon(u32 mask) +static inline void rcache_set_usage_soon(u32 mask) { - rcache_hint_soon = mask & ~rcache_static; + rcache_regs_soon = mask; } -static inline void rcache_set_hint_late(u32 mask) +static inline void rcache_set_usage_late(u32 mask) { - rcache_hint_late = mask & ~rcache_static; + rcache_regs_late = mask; } -static inline void rcache_set_hint_write(u32 mask) +static inline void rcache_set_usage_discard(u32 mask) { - rcache_hint_write = mask & ~rcache_static; -} - -static inline int rcache_is_hinted(sh2_reg_e r) -{ - // consider static REGs as always hinted, since they are always there - return ((rcache_hint | rcache_static) & (1 << r)); + rcache_regs_discard = mask; } static inline int rcache_is_cached(sh2_reg_e r) { - // consider static REGs as always hinted, since they are always there - return (guest_regs[r].vreg >= 0); + // is r in cache or needed RSN? + return (guest_regs[r].vreg >= 0 || (rcache_regs_soonclean & (1 << r))); } static inline int rcache_is_hreg_used(int hr) @@ -2028,7 +2131,7 @@ static inline int rcache_is_hreg_used(int hr) (cache_regs[x].type != HR_TEMP || (cache_regs[x].flags & HRF_LOCKED)); } -static inline u32 rcache_used_hreg_mask(void) +static inline u32 rcache_used_hregs_mask(void) { u32 mask = 0; int i; @@ -2038,7 +2141,7 @@ static inline u32 rcache_used_hreg_mask(void) (cache_regs[i].type != HR_TEMP || (cache_regs[i].flags & HRF_LOCKED))) mask |= 1 << cache_regs[i].hreg; - return mask & ~rcache_static; + return mask; } static inline u32 rcache_dirty_mask(void) @@ -2054,13 +2157,13 @@ static inline u32 rcache_dirty_mask(void) return mask; } -static inline u32 rcache_reg_mask(void) +static inline u32 rcache_cached_mask(void) { u32 mask = 0; int i; for (i = 0; i < ARRAY_SIZE(cache_regs); i++) - if (cache_regs[i].type == HR_CACHED) + if (cache_regs[i].type == HR_CACHED || cache_regs[i].type == HR_STATIC) mask |= cache_regs[i].gregs; return mask; @@ -2070,26 +2173,40 @@ static void rcache_clean_tmp(void) { int i; + rcache_regs_clean = (1 << ARRAY_SIZE(guest_regs)) - 1; for (i = 0; i < ARRAY_SIZE(cache_regs); i++) - if (cache_regs[i].type == HR_CACHED && (cache_regs[i].flags & HRF_TEMP)) + if (cache_regs[i].type == HR_CACHED && (cache_regs[i].flags & HRF_TEMP)) { + rcache_unlock(i); #if REMAP_REGISTER rcache_remap_vreg(i); #else rcache_clean_vreg(i); #endif + } + rcache_regs_clean = 0; } -static void rcache_clean_mask(u32 mask) +static void rcache_clean_masked(u32 mask) { - int i; + int i, r, hr; - if (!(mask &= ~rcache_static)) + if (!(mask &= ~rcache_regs_static)) return; - rcache_hint_clean |= mask; + rcache_regs_clean |= mask; - // clean only vregs where all aliases are covered by the mask + // clean constants where all aliases are covered by the mask + for (i = 0; i < ARRAY_SIZE(gconsts); i++) + if ((gconsts[i].gregs & mask) && !(gconsts[i].gregs & ~mask)) { + FOR_ALL_BITS_SET_DO(gconsts[i].gregs, r, + if (guest_regs[r].flags & GRF_CDIRTY) { + hr = rcache_get_reg_(r, RC_GR_READ, 0, NULL); + rcache_clean_vreg(reg_map_host[hr]); + break; + }); + } + // clean vregs where all aliases are covered by the mask for (i = 0; i < ARRAY_SIZE(cache_regs); i++) - if (cache_regs[i].type == HR_CACHED && + if ((cache_regs[i].type == HR_CACHED || cache_regs[i].type == HR_STATIC) && (cache_regs[i].gregs & mask) && !(cache_regs[i].gregs & ~mask)) rcache_clean_vreg(i); } @@ -2099,9 +2216,30 @@ static void rcache_clean(void) int i; gconst_clean(); + rcache_regs_clean = (1 << ARRAY_SIZE(guest_regs)) - 1; for (i = ARRAY_SIZE(cache_regs)-1; i >= 0; i--) if (cache_regs[i].type == HR_CACHED || cache_regs[i].type == HR_STATIC) rcache_clean_vreg(i); + + // relocate statics to their sregs (necessary before conditional jumps) + for (i = 0; i < ARRAY_SIZE(guest_regs); i++) { + if ((guest_regs[i].flags & GRF_STATIC) && + guest_regs[i].vreg != guest_regs[i].sreg) { + rcache_ref_vreg(guest_regs[i].vreg); + rcache_evict_vreg(guest_regs[i].sreg); + rcache_unref_vreg(guest_regs[i].vreg); + if (guest_regs[i].vreg < 0) + emith_ctx_read(cache_regs[guest_regs[i].sreg].hreg, i*4); + else + emith_move_r_r(cache_regs[guest_regs[i].sreg].hreg, + cache_regs[guest_regs[i].vreg].hreg); + cache_regs[guest_regs[i].sreg].gregs = 1 << i; + cache_regs[guest_regs[i].sreg].flags |= HRF_DIRTY; + guest_regs[i].flags |= GRF_DIRTY; + guest_regs[i].vreg = guest_regs[i].sreg; + } + } + rcache_regs_clean = 0; } static void rcache_invalidate_tmp(void) @@ -2110,11 +2248,11 @@ static void rcache_invalidate_tmp(void) for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { if (cache_regs[i].flags & HRF_TEMP) { + rcache_unlock(i); if (cache_regs[i].type == HR_CACHED) - rcache_unmap_vreg(i); - cache_regs[i].type = HR_FREE; - cache_regs[i].flags &= (HRF_TEMP|HRF_REG); - cache_regs[i].gregs = 0; + rcache_evict_vreg(i); + else + rcache_free_vreg(i); } } } @@ -2122,33 +2260,26 @@ static void rcache_invalidate_tmp(void) static void rcache_invalidate(void) { int i; - gconst_invalidate(); - for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { - cache_regs[i].flags &= (HRF_TEMP|HRF_REG); - if (cache_regs[i].type != HR_STATIC) - cache_regs[i].type = HR_FREE; - cache_regs[i].gregs = 0; - } + for (i = 0; i < ARRAY_SIZE(cache_regs); i++) + rcache_free_vreg(i); for (i = 0; i < ARRAY_SIZE(guest_regs); i++) { guest_regs[i].flags &= GRF_STATIC; if (!(guest_regs[i].flags & GRF_STATIC)) guest_regs[i].vreg = -1; else { - if (guest_regs[i].vreg < 0) - emith_ctx_read(cache_regs[guest_regs[i].sreg].hreg, i*4); - else if (guest_regs[i].vreg != guest_regs[i].sreg) - emith_move_r_r(cache_regs[guest_regs[i].sreg].hreg, - cache_regs[guest_regs[i].vreg].hreg); cache_regs[guest_regs[i].sreg].gregs = 1 << i; + cache_regs[guest_regs[i].sreg].flags |= HRF_DIRTY; + guest_regs[i].flags |= GRF_DIRTY; guest_regs[i].vreg = guest_regs[i].sreg; } } rcache_counter = 0; - rcache_hint_soon = rcache_hint_late = rcache_hint_write = rcache_hint_clean = 0; + rcache_regs_now = rcache_regs_soon = rcache_regs_late = 0; + rcache_regs_discard = rcache_regs_clean = 0; } static void rcache_flush(void) @@ -2171,7 +2302,7 @@ static void rcache_init(void) for (i = 0; i < ARRAY_SIZE(guest_regs); i++) if (guest_regs[i].flags & GRF_STATIC) { - rcache_static |= (1 << i); + rcache_regs_static |= (1 << i); guest_regs[i].sreg = reg_map_host[guest_regs[i].sreg]; cache_regs[guest_regs[i].sreg].type = HR_STATIC; } else @@ -2191,7 +2322,7 @@ static void rcache_init(void) // --------------------------------------------------------------- // NB may return either REG or TEMP -static int emit_get_rbase_and_offs(SH2 *sh2, sh2_reg_e r, int rmod, u32 *offs) +static int emit_get_rbase_and_offs(SH2 *sh2, sh2_reg_e r, int rmode, u32 *offs) { uptr omask = 0xff; // offset mask, XXX: ARM oriented.. u32 mask = 0; @@ -2225,21 +2356,19 @@ static int emit_get_rbase_and_offs(SH2 *sh2, sh2_reg_e r, int rmod, u32 *offs) return hr; } + // ROM, SDRAM. Host address should be mmapped to be equal to SH2 address. la = (uptr)*(void **)((char *)sh2 + poffs); - // accessing ROM or SDRAM, code location doesn't matter. The host address - // for these should be mmapped to be equal to the SH2 address. - // if r is in rcache or needed soon anyway, and offs is relative to region - // use rcached const to avoid loading a literal on ARM - if ((guest_regs[r].vreg >= 0 || ((guest_regs[r].flags & GRF_CDIRTY) && - ((rcache_hint_soon|rcache_hint_clean) & (1 << r)))) && !(*offs & ~mask)) { + + // if r is in rcache or needed soon anyway, and offs is relative to region, + // and address translation fits in add_ptr_imm (s32), then use rcached const + if (la == (s32)la && !(*offs & ~mask) && rcache_is_cached(r)) { u32 odd = a & 1; // need to fix odd address for correct byte addressing la -= (s32)((a & ~mask) - *offs - odd); // diff between reg and memory - // if reg is modified later on, allocate it RMW to remove aliases here - // else the aliases vreg stays locked and a vreg shortage may occur. - hr = hr2 = rcache_get_reg(r, rmod ? RC_GR_RMW : RC_GR_READ, NULL); + hr = hr2 = rcache_get_reg(r, rmode, NULL); if ((la & ~omask) - odd) { hr = rcache_get_tmp(); emith_add_r_r_ptr_imm(hr, hr2, (la & ~omask) - odd); + rcache_free(hr2); } *offs = (la & omask); } else { @@ -2285,39 +2414,55 @@ static void emit_move_r_imm32(sh2_reg_e dst, u32 imm) static void emit_move_r_r(sh2_reg_e dst, sh2_reg_e src) { - int hr_d, hr_s; - - if (guest_regs[src].vreg >= 0 || gconst_check(src) || rcache_is_hinted(src)) { - hr_s = rcache_get_reg(src, RC_GR_READ, NULL); + if (gconst_check(src) || rcache_is_cached(src)) { #if ALIAS_REGISTERS - // check for aliasing - int i = guest_regs[src].vreg; - if (guest_regs[dst].vreg != i) { - // remove possible old mapping of dst - if (guest_regs[dst].vreg >= 0) - rcache_remove_vreg_alias(guest_regs[dst].vreg, dst); - // make dst an alias of src - rcache_add_vreg_alias(i, dst); - cache_regs[i].flags |= HRF_DIRTY; - guest_regs[dst].flags |= GRF_DIRTY; - gconst_kill(dst); -#if PROPAGATE_CONSTANTS - gconst_copy(dst, src); -#endif - return; - } -#endif - hr_d = rcache_get_reg(dst, RC_GR_WRITE, NULL); + rcache_alias_vreg(dst, src); +#else + int hr_s = rcache_get_reg(src, RC_GR_READ, NULL); + int hr_d = rcache_get_reg(dst, RC_GR_WRITE, NULL); emith_move_r_r(hr_d, hr_s); -#if PROPAGATE_CONSTANTS gconst_copy(dst, src); #endif } else { - hr_d = rcache_get_reg(dst, RC_GR_WRITE, NULL); + int hr_d = rcache_get_reg(dst, RC_GR_WRITE, NULL); emith_ctx_read(hr_d, src * 4); } } +static void emit_add_r_imm(sh2_reg_e r, u32 imm) +{ + u32 val; + int isgc = gconst_get(r, &val); + int hr, hr2; + + if (!isgc || rcache_is_cached(r)) { + // not constant, or r is already in cache + hr = rcache_get_reg(r, RC_GR_RMW, &hr2); + emith_add_r_r_imm(hr, hr2, imm); + rcache_free(hr2); + if (isgc) + gconst_set(r, val + imm); + } else + gconst_new(r, val + imm); +} + +static void emit_sub_r_imm(sh2_reg_e r, u32 imm) +{ + u32 val; + int isgc = gconst_get(r, &val); + int hr, hr2; + + if (!isgc || rcache_is_cached(r)) { + // not constant, or r is already in cache + hr = rcache_get_reg(r, RC_GR_RMW, &hr2); + emith_sub_r_r_imm(hr, hr2, imm); + rcache_free(hr2); + if (isgc) + gconst_set(r, val - imm); + } else + gconst_new(r, val - imm); +} + static void emit_sync_t_to_sr(void) { // avoid reloading SR from context if there's nothing to do @@ -2335,8 +2480,9 @@ static int emit_memhandler_read(int size) #ifndef DRC_SR_REG // must writeback cycles for poll detection stuff if (guest_regs[SHR_SR].vreg != -1) - rcache_evict_vreg(guest_regs[SHR_SR].vreg); + rcache_unmap_vreg(guest_regs[SHR_SR].vreg); #endif + rcache_invalidate_tmp(); if (size & MF_POLLING) switch (size & MF_SIZEMASK) { @@ -2351,7 +2497,6 @@ static int emit_memhandler_read(int size) case 2: emith_call(sh2_drc_read32); break; // 32 } - rcache_invalidate_tmp(); return rcache_get_tmp_ret(); } @@ -2362,16 +2507,15 @@ static void emit_memhandler_write(int size) rcache_clean_tmp(); #ifndef DRC_SR_REG if (guest_regs[SHR_SR].vreg != -1) - rcache_evict_vreg(guest_regs[SHR_SR].vreg); + rcache_unmap_vreg(guest_regs[SHR_SR].vreg); #endif + rcache_invalidate_tmp(); switch (size & MF_SIZEMASK) { case 0: emith_call(sh2_drc_write8); break; // 8 case 1: emith_call(sh2_drc_write16); break; // 16 case 2: emith_call(sh2_drc_write32); break; // 32 } - - rcache_invalidate_tmp(); } // rd = @(Rs,#offs); rd < 0 -> return a temp @@ -2389,12 +2533,13 @@ static int emit_memhandler_read_rr(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rs, u32 off emit_move_r_imm32(rd, val); hr2 = rcache_get_reg(rd, RC_GR_RMW, NULL); } - if ((size & MF_POSTINCR) && gconst_get(rs, &val)) - gconst_new(rs, val + (1 << (size & MF_SIZEMASK))); + if (size & MF_POSTINCR) + emit_add_r_imm(rs, 1 << (size & MF_SIZEMASK)); return hr2; } - hr = emit_get_rbase_and_offs(sh2, rs, size & MF_POSTINCR, &offs); + val = size & MF_POSTINCR; + hr = emit_get_rbase_and_offs(sh2, rs, val ? RC_GR_RMW : RC_GR_READ, &offs); if (hr != -1) { if (rd == SHR_TMP) hr2 = rcache_get_tmp(); @@ -2405,24 +2550,14 @@ static int emit_memhandler_read_rr(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rs, u32 off case 1: emith_read16s_r_r_offs(hr2, hr, offs); break; // 16 case 2: emith_read_r_r_offs(hr2, hr, offs); emith_ror(hr2, hr2, 16); break; } - if (cache_regs[reg_map_host[hr]].type == HR_TEMP) // may also return REG - rcache_free_tmp(hr); - if (size & MF_POSTINCR) { - int isgc = gconst_get(rs, &val); - if (!isgc || guest_regs[rs].vreg >= 0) { - // already loaded - hr = rcache_get_reg(rs, RC_GR_RMW, NULL); - emith_add_r_r_imm(hr, hr, 1 << (size & MF_SIZEMASK)); - if (isgc) - gconst_set(rs, val + (1 << (size & MF_SIZEMASK))); - } else - gconst_new(rs, val + (1 << (size & MF_SIZEMASK))); - } + rcache_free(hr); + if (size & MF_POSTINCR) + emit_add_r_imm(rs, 1 << (size & MF_SIZEMASK)); return hr2; } #endif - if (gconst_get(rs, &val) && guest_regs[rs].vreg < 0 && !(rcache_hint_soon & (1 << rs))) { + if (gconst_get(rs, &val) && !rcache_is_cached(rs)) { hr = rcache_get_tmp_arg(0); emith_move_r_imm(hr, val + offs); if (size & MF_POSTINCR) @@ -2432,6 +2567,8 @@ static int emit_memhandler_read_rr(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rs, u32 off hr2 = rcache_get_reg(rs, RC_GR_RMW, NULL); emith_add_r_r_imm(hr, hr2, offs); emith_add_r_imm(hr2, 1 << (size & MF_SIZEMASK)); + if (gconst_get(rs, &val)) + gconst_set(rs, val + (1 << (size & MF_SIZEMASK))); } else { hr = rcache_get_reg_arg(0, rs, &hr2); if (offs || hr != hr2) @@ -2463,30 +2600,34 @@ static void emit_memhandler_write_rr(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rs, u32 o u32 val; if (rd == SHR_TMP) { - host_arg2reg(hr2, 1); + host_arg2reg(hr2, 1); // already locked and prepared by caller } else if ((size & MF_PREDECR) && rd == rs) { // must avoid caching rd in arg1 hr2 = rcache_get_reg_arg(1, rd, &hr); - if (hr != hr2) emith_move_r_r(hr2, hr); + if (hr != hr2) { + emith_move_r_r(hr2, hr); + rcache_free(hr2); + } } else hr2 = rcache_get_reg_arg(1, rd, NULL); + if (rd != SHR_TMP) + rcache_unlock(guest_regs[rd].vreg); // unlock in case rd is in arg0 - if (gconst_get(rs, &val) && guest_regs[rs].vreg < 0 && !(rcache_hint_soon & (1 << rs))) { + if (gconst_get(rs, &val) && !rcache_is_cached(rs)) { + hr = rcache_get_tmp_arg(0); if (size & MF_PREDECR) { val -= 1 << (size & MF_SIZEMASK); gconst_new(rs, val); } - hr = rcache_get_tmp_arg(0); emith_move_r_imm(hr, val + offs); } else if (offs || (size & MF_PREDECR)) { - if (size & MF_PREDECR) { - hr = rcache_get_reg(rs, RC_GR_RMW, &hr2); - emith_sub_r_r_imm(hr, hr2, 1 << (size & MF_SIZEMASK)); - } + if (size & MF_PREDECR) + emit_sub_r_imm(rs, 1 << (size & MF_SIZEMASK)); + rcache_unlock(guest_regs[rs].vreg); // unlock in case rs is in arg0 hr = rcache_get_reg_arg(0, rs, &hr2); if (offs || hr != hr2) emith_add_r_r_imm(hr, hr2, offs); } else - rcache_get_reg_arg(0, rs, NULL); + hr = rcache_get_reg_arg(0, rs, NULL); emit_memhandler_write(size); } @@ -2696,7 +2837,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) if (op_flags[i] & OF_BTARGET) ADD_TO_ARRAY(branch_target_pc, branch_target_count, pc, ); if (ops[i].op == OP_LDC && (ops[i].dest & BITMASK1(SHR_SR)) && pc+2 < end_pc) - op_flags[i+1] |= OF_BTARGET; // RTE entrypoint in case of SR(IMASK) change + op_flags[i+1] |= OF_BTARGET; // RTE entrypoint in case of SR.IMASK change #if LOOP_DETECTION // loop types detected: // 1. target: ... BRA target -> idle loop @@ -2855,10 +2996,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) drcf.polling = (drcf.loop_type == OF_POLL_LOOP ? MF_POLLING : 0); #endif -#if (DRC_DEBUG & ~7) - // must update PC - emit_move_r_imm32(SHR_PC, pc); -#endif rcache_clean(); #if (DRC_DEBUG & 0x10) @@ -2902,17 +3039,20 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_sync_t(sr); rcache_clean(); - tmp = rcache_used_hreg_mask(); + tmp = rcache_used_hregs_mask(); emith_save_caller_regs(tmp); emit_do_static_regs(1, 0); rcache_get_reg_arg(2, SHR_SR, NULL); tmp2 = rcache_get_tmp_arg(0); tmp3 = rcache_get_tmp_arg(1); + tmp4 = rcache_get_tmp_arg(3); emith_move_r_ptr_imm(tmp2, tcache_ptr); - emith_move_r_r_ptr(tmp3,CONTEXT_REG); + emith_move_r_r_ptr(tmp3, CONTEXT_REG); + emith_move_r_imm(tmp4, pc); + emith_ctx_write(tmp4, SHR_PC * 4); + rcache_invalidate_tmp(); emith_call(sh2_drc_log_entry); emith_restore_caller_regs(tmp); - rcache_invalidate_tmp(); #endif do_host_disasm(tcache_id); @@ -2924,9 +3064,10 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); FLUSH_CYCLES(sr); emith_sync_t(sr); + emit_move_r_imm32(SHR_PC, pc); rcache_clean(); - tmp = rcache_used_hreg_mask(); + tmp = rcache_used_hregs_mask(); emith_save_caller_regs(tmp); emit_do_static_regs(1, 0); emith_pass_arg_r(0, CONTEXT_REG); @@ -2990,43 +3131,33 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) // dbg(1, "unhandled delay_dep_fw: %x", delay_dep_fw & ~BITMASK1(SHR_T)); if (delay_dep_bk & ~BITMASK2(SHR_PC, SHR_PR)) dbg(1, "unhandled delay_dep_bk: %x", delay_dep_bk); - rcache_set_hint_soon(0); - rcache_set_hint_late(0); - rcache_set_hint_write(0); } - else - { - // inform cache about future register usage - u32 late = 0; // regs read by future ops - u32 write = 0; // regs written to (to detect write before read) - u32 soon = 0; // regs read soon - tmp = (OP_ISBRANCH(opd[0].op) || opd[0].op == OP_RTE || // branching insns - opd[0].op == OP_TRAPA || opd[0].op == OP_UNDEFINED); - for (v = 1; v <= 9; v++) { - // no sense in looking any further than the next rcache flush - if (pc + 2*v < end_pc && !(op_flags[i+v] & OF_BTARGET) && - (!tmp || (op_flags[i+v] & OF_DELAY_OP))) { - late |= opd[v].source & ~write; - // ignore source regs after they have been written to - write |= opd[v].dest; - } else { - // upcoming rcache_flush, start writing back unused dirty stuff - tmp2 = write|opd[0].source|opd[0].dest; // insn may change reg aliases - rcache_clean_mask(rcache_dirty_mask() & ~tmp2); - break; - } - tmp |= (OP_ISBRANCH(opd[v].op) || opd[v].op == OP_RTE || - opd[v].op == OP_TRAPA || opd[v].op == OP_UNDEFINED); + + // inform cache about future register usage + u32 late = 0; // regs read by future ops + u32 write = 0; // regs written to (to detect write before read) + u32 soon = 0; // regs read soon + for (v = 1; v <= 9; v++) { + // no sense in looking any further than the next rcache flush + tmp = ((op_flags[i+v] & OF_BTARGET) || (op_flags[i+v-1] & OF_DELAY_OP) || + (OP_ISBRACND(opd[v-1].op) && !(op_flags[i+v] & OF_DELAY_OP))); + if (pc + 2*v <= end_pc && !tmp) { // (pc already incremented above) + late |= opd[v].source & ~write; + // ignore source regs after they have been written to + write |= opd[v].dest; // regs needed in the next few instructions if (v <= 4) soon = late; + } else { + // upcoming rcache_flush, start writing back unused dirty stuff + rcache_clean_masked(rcache_dirty_mask() & ~(write|opd[0].dest)); + break; } - rcache_set_hint_soon(late); // insns 1-3 - rcache_set_hint_late(late & ~soon); // insns 4-9 - rcache_set_hint_write(write & ~(late|soon) & ~opd[0].source); - // overwritten without being used } - rcache_set_locked(opd[0].source); // try not to evict src regs for this op + rcache_set_usage_now(opd[0].source); // current insn + rcache_set_usage_soon(late); // insns 1-3 + rcache_set_usage_late(late & ~soon); // insns 4-9 + rcache_set_usage_discard(write & ~(late|soon) & ~opd[0].source); switch (opd->op) { @@ -3069,7 +3200,8 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case OP_RTE: // RTE 0000000000101011 emith_invalidate_t(); // pop PC - emit_memhandler_read_rr(sh2, SHR_PC, SHR_SP, 0, 2 | MF_POSTINCR); + tmp = emit_memhandler_read_rr(sh2, SHR_PC, SHR_SP, 0, 2 | MF_POSTINCR); + rcache_free(tmp); // pop SR tmp = emit_memhandler_read_rr(sh2, SHR_TMP, SHR_SP, 0, 2 | MF_POSTINCR); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); @@ -3853,11 +3985,10 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) ///////////////////////////////////////////// case 0x07: // ADD #imm,Rn 0111nnnniiiiiiii - tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); - if (op & 0x80) { // adding negative - emith_sub_r_r_imm(tmp, tmp2, -op & 0xff); - } else - emith_add_r_r_imm(tmp, tmp2, op & 0xff); + if (op & 0x80) // adding negative + emit_sub_r_imm(GET_Rn(), (u8)-op); + else + emit_add_r_imm(GET_Rn(), (u8)op); goto end_op; ///////////////////////////////////////////// @@ -3968,6 +4099,9 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) end_op: rcache_unlock_all(); +#if DRC_DEBUG & 64 + RCACHE_CHECK("after insn"); +#endif cycles += opd->cycles; @@ -4007,6 +4141,7 @@ end_op: // idle or delay loop emit_sync_t_to_sr(); emith_sh2_delay_loop(cycles, drcf.delay_reg); + rcache_unlock_all(); // may lock delay_reg drcf.polling = drcf.loop_type = 0; } #endif @@ -4075,8 +4210,8 @@ end_op: emith_jump_cond_patchable(cond, target); } else if (target != NULL) { - emith_jump_patchable(target); rcache_invalidate(); + emith_jump_patchable(target); } // branch not taken, correct cycle count @@ -4099,6 +4234,7 @@ end_op: emith_sync_t(sr); rcache_clean(); tmp = rcache_get_reg_arg(0, SHR_PC, NULL); + rcache_invalidate(); #if CALL_STACK struct op_data *opd_b = (op_flags[i] & OF_DELAY_OP) ? opd-1 : opd; if (opd_b->rm == SHR_PR) { @@ -4108,6 +4244,7 @@ end_op: // JSR/BSRF tmp = rcache_get_tmp_arg(1); emith_call_link(tmp, sh2_drc_dispatcher_call); + rcache_free(tmp); } else #endif if (gconst_get(SHR_PC, &target_pc)) { @@ -4118,7 +4255,6 @@ end_op: // JMP emith_jump(sh2_drc_dispatcher); } - rcache_invalidate(); drcf.pending_branch_indirect = 0; drcf.polling = drcf.loop_type = 0; } @@ -4147,8 +4283,8 @@ end_op: target = dr_prepare_ext_branch(block->entryp, pc, sh2->is_slave, tcache_id); if (target == NULL) return NULL; - emith_jump_patchable(target); rcache_invalidate(); + emith_jump_patchable(target); } else rcache_flush(); emith_flush(); @@ -4452,14 +4588,14 @@ static void sh2_generate_utils(void) tmp = rcache_get_reg_arg(1, SHR_SR, NULL); emith_clear_msb(tmp, tmp, 22); emith_move_r_r_ptr(arg2, CONTEXT_REG); - emith_call(p32x_sh2_write32); // XXX: use sh2_drc_write32? rcache_invalidate(); + emith_call(p32x_sh2_write32); // XXX: use sh2_drc_write32? // push PC rcache_get_reg_arg(0, SHR_SP, NULL); emith_ctx_read(arg1, SHR_PC * 4); emith_move_r_r_ptr(arg2, CONTEXT_REG); - emith_call(p32x_sh2_write32); rcache_invalidate(); + emith_call(p32x_sh2_write32); // update I, cycles, do callback emith_ctx_read(arg1, offsetof(SH2, pending_level)); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); @@ -4476,8 +4612,8 @@ static void sh2_generate_utils(void) if (arg0 != RET_REG) emith_move_r_r(arg0, RET_REG); emith_call_cleanup(); - emith_jump(sh2_drc_dispatcher); rcache_invalidate(); + emith_jump(sh2_drc_dispatcher); emith_flush(); // sh2_drc_entry(SH2 *sh2) From d80a5fd2ab743382f734346760fadd2dc44955f1 Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 30 Jul 2019 20:55:48 +0200 Subject: [PATCH 0206/1110] sh2 drc: add mipsel backend for MIPS32 Release 1 (for JZ47xx) --- Makefile | 5 +- config.gcw0 | 16 + cpu/drc/emit_arm.c | 7 +- cpu/drc/emit_mips.c | 1464 ++++++++++++++++++++++++++++++++++++ cpu/drc/emit_x86.c | 7 +- cpu/sh2/compiler.c | 84 ++- cpu/sh2/compiler.h | 2 + platform/common/common.mak | 2 +- platform/common/disarm.c | 2 +- platform/common/disarm.h | 2 +- platform/common/dismips.c | 346 +++++++++ platform/common/dismips.h | 6 + platform/linux/emu.c | 2 +- 13 files changed, 1922 insertions(+), 23 deletions(-) create mode 100644 config.gcw0 create mode 100644 cpu/drc/emit_mips.c create mode 100644 platform/common/dismips.c create mode 100644 platform/common/dismips.h diff --git a/Makefile b/Makefile index 1b2aab41..62accf77 100644 --- a/Makefile +++ b/Makefile @@ -55,6 +55,9 @@ use_cz80 ?= 1 ifneq (,$(findstring 86,$(ARCH))) use_sh2drc ?= 1 endif +ifneq (,$(findstring mips,$(ARCH))) +use_sh2drc ?= 1 +endif endif -include Makefile.local @@ -245,7 +248,7 @@ pico/carthw_cfg.c: pico/carthw.cfg # random deps pico/carthw/svp/compiler.o : cpu/drc/emit_arm.c cpu/sh2/compiler.o : cpu/drc/emit_arm.c -cpu/sh2/compiler.o : cpu/drc/emit_x86.c +cpu/sh2/compiler.o : cpu/drc/emit_x86.c cpu/drc/emit_mips.c cpu/sh2/mame/sh2pico.o : cpu/sh2/mame/sh2.c pico/pico.o pico/cd/mcd.o pico/32x/32x.o : pico/pico_cmn.c pico/pico_int.h pico/memory.o pico/cd/memory.o pico/32x/memory.o : pico/pico_int.h pico/memory.h diff --git a/config.gcw0 b/config.gcw0 new file mode 100644 index 00000000..1d2ccef0 --- /dev/null +++ b/config.gcw0 @@ -0,0 +1,16 @@ +# Automatically generated by configure +# Configured with: './configure' '--platform=generic' +CC = mipsel-gcw0-linux-uclibc-gcc +CXX = mipsel-gcw0-linux-uclibc-g++ +AS = mipsel-gcw0-linux-uclibc-as +STRIP = mipsel-gcw0-linux-uclibc-strip +CFLAGS += -I${HOME}/opt/gcw0-toolchain/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include/ +CFLAGS += -I${HOME}/opt/gcw0-toolchain/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include/SDL +CFLAGS += -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -fno-stack-protector +ASFLAGS += +LDFLAGS += +LDLIBS += -B${HOME}/opt/gcw0-toolchain/usr/lib -Wl,-rpath-link=${HOME}/opt/gcw0-toolchain/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/lib -Wl,-rpath-link=${HOME}/opt/gcw0-toolchain/usr/mipsel-gcw0-linux-uclibc/sysroot/lib -lSDL -lasound -lpng -lz -lm -lstdc++ -ldl + +ARCH = mipsel +PLATFORM = opendingux +SOUND_DRIVERS = sdl diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index 0eb2d972..72542a3f 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -1098,11 +1098,14 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) #define emith_jump_cond_patchable(cond, target) \ emith_jump_cond(cond, target) -#define emith_jump_patch(ptr, target) do { \ +#define emith_jump_patch(ptr, target) ({ \ u32 *ptr_ = ptr; \ u32 val_ = (u32 *)(target) - ptr_ - 2; \ *ptr_ = (*ptr_ & 0xff000000) | (val_ & 0x00ffffff); \ -} while (0) + (u8 *)ptr; \ +}) + +#define emith_jump_patch_size() 4 #define emith_jump_at(ptr, target) do { \ u32 val_ = (u32 *)(target) - (u32 *)(ptr) - 2; \ diff --git a/cpu/drc/emit_mips.c b/cpu/drc/emit_mips.c new file mode 100644 index 00000000..f56b89a3 --- /dev/null +++ b/cpu/drc/emit_mips.c @@ -0,0 +1,1464 @@ +/* + * Basic macros to emit MIPS II/MIPS32 Release 1 instructions and some utils + * Copyright (C) 2019 kub + * + * This work is licensed under the terms of MAME license. + * See COPYING file in the top-level directory. + */ +#define HOST_REGS 32 +#define CONTEXT_REG 23 // s7 +#define RET_REG 2 // v0 + +// NB: the ubiquitous JZ74[46]0 uses MIPS32 Release 1, a slight MIPS II superset + +// registers usable for user code: r1-r25, others reserved or special +#define Z0 0 // zero register +#define GP 28 // global pointer +#define SP 29 // stack pointer +#define FP 30 // frame pointer +#define LR 31 // link register +// internally used by code emitter: +#define AT 1 // used to hold intermediate results +#define FNZ 15 // emulated processor flags: N (bit 31) ,Z (all bits) +#define FC 24 // emulated processor flags: C (bit 0), others 0 +#define FV 25 // emulated processor flags: Nt^Ns (bit 31). others ? + + +// unified conditions; virtual, not corresponding to anything real on MIPS +#define DCOND_EQ 0x0 +#define DCOND_NE 0x1 +#define DCOND_HS 0x2 +#define DCOND_LO 0x3 +#define DCOND_MI 0x4 +#define DCOND_PL 0x5 +#define DCOND_VS 0x6 +#define DCOND_VC 0x7 +#define DCOND_HI 0x8 +#define DCOND_LS 0x9 +#define DCOND_GE 0xa +#define DCOND_LT 0xb +#define DCOND_GT 0xc +#define DCOND_LE 0xd + +#define DCOND_CS DCOND_LO +#define DCOND_CC DCOND_HS + +// unified insn +#define MIPS_INSN(op, rs, rt, rd, sa, fn) \ + (((op)<<26)|((rs)<<21)|((rt)<<16)|((rd)<<11)|((sa)<<6)|((fn)<<0)) + +#define _ 0 // marker for "field unused" +#define __(n) o##n // enum marker for "undefined" + +// opcode field (encoded in op) +enum { OP__FN=000, OP__RT, OP_J, OP_JAL, OP_BEQ, OP_BNE, OP_BLEZ, OP_BGTZ }; +enum { OP_ADDI=010, OP_ADDIU, OP_SLTI, OP_SLTIU, OP_ANDI, OP_ORI, OP_XORI, OP_LUI }; +enum { OP_LB=040, OP_LH, OP_LWL, OP_LW, OP_LBU, OP_LHU, OP_LWR }; +enum { OP_SB=050, OP_SH, OP_SWL, OP_SW, __(54), __(55), OP_SWR }; +// function field (encoded in fn if opcode = OP__FN) +enum { FN_SLL=000, __(01), FN_SRL, FN_SRA, FN_SLLV, __(05), FN_SRLV, FN_SRAV }; +enum { FN_MFHI=020, FN_MTHI, FN_MFLO, FN_MTLO }; +enum { FN_MULT=030, FN_MULTU, FN_DIV, FN_DIVU }; +enum { FN_ADD=040, FN_ADDU, FN_SUB, FN_SUBU, FN_AND, FN_OR, FN_XOR, FN_NOR }; +enum { FN_JR=010, FN_JALR, FN_MOVZ, FN_MOVN, FN_SYNC=017, FN_SLT=052, FN_SLTU }; +// rt field (encoded in rt if opcode = OP__RT) +enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 }; + +#define MIPS_NOP 000 // null operation: SLL r0, r0, #0 + +// arithmetic/logical + +#define MIPS_OP_REG(op, rd, rs, rt) \ + MIPS_INSN(OP__FN, rs, rt, rd, _, op) // R-type, SPECIAL +#define MIPS_OP_IMM(op, rt, rs, imm) \ + MIPS_INSN(op, rs, rt, _, _, (u16)(imm)) // I-type + +// rd = rt OP rs +#define MIPS_ADD_REG(rd, rs, rt) \ + MIPS_OP_REG(FN_ADDU, rd, rs, rt) +#define MIPS_SUB_REG(rd, rs, rt) \ + MIPS_OP_REG(FN_SUBU, rd, rs, rt) + +#define MIPS_NEG_REG(rd, rt) \ + MIPS_SUB_REG(rd, Z0, rt) + +#define MIPS_XOR_REG(rd, rs, rt) \ + MIPS_OP_REG(FN_XOR, rd, rs, rt) +#define MIPS_OR_REG(rd, rs, rt) \ + MIPS_OP_REG(FN_OR, rd, rs, rt) +#define MIPS_AND_REG(rd, rs, rt) \ + MIPS_OP_REG(FN_AND, rd, rs, rt) +#define MIPS_NOR_REG(rd, rs, rt) \ + MIPS_OP_REG(FN_NOR, rd, rs, rt) + +#define MIPS_MOVE_REG(rd, rs) \ + MIPS_OR_REG(rd, rs, Z0) +#define MIPS_MVN_REG(rd, rs) \ + MIPS_NOR_REG(rd, rs, Z0) + +// rd = rt SHIFT rs +#define MIPS_LSL_REG(rd, rt, rs) \ + MIPS_OP_REG(FN_SLLV, rd, rs, rt) +#define MIPS_LSR_REG(rd, rt, rs) \ + MIPS_OP_REG(FN_SRLV, rd, rs, rt) +#define MIPS_ASR_REG(rd, rt, rs) \ + MIPS_OP_REG(FN_SRAV, rd, rs, rt) + +// rd = (rs < rt) +#define MIPS_SLT_REG(rd, rs, rt) \ + MIPS_OP_REG(FN_SLT, rd, rs, rt) +#define MIPS_SLTU_REG(rd, rs, rt) \ + MIPS_OP_REG(FN_SLTU, rd, rs, rt) + +// rt = rs OP imm16 +#define MIPS_ADD_IMM(rt, rs, imm16) \ + MIPS_OP_IMM(OP_ADDIU, rt, rs, imm16) + +#define MIPS_XOR_IMM(rt, rs, imm16) \ + MIPS_OP_IMM(OP_XORI, rt, rs, imm16) +#define MIPS_OR_IMM(rt, rs, imm16) \ + MIPS_OP_IMM(OP_ORI, rt, rs, imm16) +#define MIPS_AND_IMM(rt, rs, imm16) \ + MIPS_OP_IMM(OP_ANDI, rt, rs, imm16) + +// rt = (imm16 << (0|16)) +#define MIPS_MOV_IMM(rt, imm16) \ + MIPS_OP_IMM(OP_ORI, rt, Z0, imm16) +#define MIPS_MOVT_IMM(rt, imm16) \ + MIPS_OP_IMM(OP_LUI, rt, _, imm16) + +// rd = rt SHIFT imm5 +#define MIPS_LSL_IMM(rd, rt, bits) \ + MIPS_INSN(OP__FN, _, rt, rd, bits, FN_SLL) +#define MIPS_LSR_IMM(rd, rt, bits) \ + MIPS_INSN(OP__FN, _, rt, rd, bits, FN_SRL) +#define MIPS_ASR_IMM(rd, rt, bits) \ + MIPS_INSN(OP__FN, _, rt, rd, bits, FN_SRA) + +// rt = (rs < imm16) +#define MIPS_SLT_IMM(rt, rs, imm16) \ + MIPS_OP_IMM(OP_SLTI, rt, rs, imm16) +#define MIPS_SLTU_IMM(rt, rs, imm16) \ + MIPS_OP_IMM(OP_SLTIU, rt, rs, imm16) + +// multiplication + +#define MIPS_MULT(rt, rs) \ + MIPS_OP_REG(FN_MULT, _, rs, rt) +#define MIPS_MULTU(rt, rs) \ + MIPS_OP_REG(FN_MULTU, _, rs, rt) +#define MIPS_MFLO(rd) \ + MIPS_OP_REG(FN_MFLO, rd, _, _) +#define MIPS_MFHI(rd) \ + MIPS_OP_REG(FN_MFHI, rd, _, _) + +// branching + +#define MIPS_J(abs26) \ + MIPS_INSN(OP_J, _,_,_,_, (abs26) >> 2) // J-type +#define MIPS_JAL(abs26) \ + MIPS_INSN(OP_JAL, _,_,_,_, (abs26) >> 2) +#define MIPS_JR(rs) \ + MIPS_OP_REG(FN_JR,_,rs,_) +#define MIPS_JALR(rd, rs) \ + MIPS_OP_REG(FN_JALR,rd,rs,_) + +// conditional branches; no condition code, these compare rs against rt or Z0 +#define MIPS_BEQ (OP_BEQ << 5) +#define MIPS_BNE (OP_BNE << 5) +#define MIPS_BLE (OP_BLEZ << 5) +#define MIPS_BGT (OP_BGTZ << 5) +#define MIPS_BLT ((OP__RT << 5)|RT_BLTZ) +#define MIPS_BGE ((OP__RT << 5)|RT_BGEZ) +#define MIPS_BGTL ((OP__RT << 5)|RT_BLTZAL) +#define MIPS_BGEL ((OP__RT << 5)|RT_BGEZAL) + +#define MIPS_BCONDZ(cond, rs, offs16) \ + MIPS_OP_IMM((cond >> 5), (cond & 0x1f), rs, (offs16) >> 2) +#define MIPS_B(offs16) \ + MIPS_BCONDZ(MIPS_BEQ, Z0, offs16) +#define MIPS_BL(offs16) \ + MIPS_BCONDZ(MIPS_BGEL, Z0, offs16) + +// load/store indexed base + +#define MIPS_LW(rt, rs, offs16) \ + MIPS_INSN(OP_LW, rs, rt, _,_, (u16)(offs16)) +#define MIPS_LH(rt, rs, offs16) \ + MIPS_INSN(OP_LH, rs, rt, _,_, (u16)(offs16)) +#define MIPS_LB(rt, rs, offs16) \ + MIPS_INSN(OP_LB, rs, rt, _,_, (u16)(offs16)) +#define MIPS_LHU(rt, rs, offs16) \ + MIPS_INSN(OP_LHU, rs, rt, _,_, (u16)(offs16)) +#define MIPS_LBU(rt, rs, offs16) \ + MIPS_INSN(OP_LBU, rs, rt, _,_, (u16)(offs16)) + +#define MIPS_SW(rt, rs, offs16) \ + MIPS_INSN(OP_SW, rs, rt, _,_, (u16)(offs16)) +#define MIPS_SH(rt, rs, offs16) \ + MIPS_INSN(OP_SH, rs, rt, _,_, (u16)(offs16)) +#define MIPS_SB(rt, rs, offs16) \ + MIPS_INSN(OP_SB, rs, rt, _,_, (u16)(offs16)) + +// XXX: tcache_ptr type for SVP and SH2 compilers differs.. +#define EMIT_PTR(ptr, x) \ + do { \ + *(u32 *)(ptr) = x; \ + ptr = (void *)((u8 *)(ptr) + sizeof(u32)); \ + } while (0) + +// FIFO for 2 instructions, for delay slot handling +u32 emith_last_insns[2] = { -1,-1 }; +int emith_last_idx; + +#define EMIT_PUSHOP() \ + do { \ + emith_last_idx ^= 1; \ + if (emith_last_insns[emith_last_idx] != -1) \ + EMIT_PTR(tcache_ptr, emith_last_insns[emith_last_idx]);\ + emith_last_insns[emith_last_idx] = -1; \ + } while (0) + +#define EMIT(op) \ + do { \ + EMIT_PUSHOP(); \ + emith_last_insns[emith_last_idx] = op; \ + COUNT_OP; \ + } while (0) + +#define emith_flush() \ + do { \ + int i; for (i = 0; i < 2; i++) EMIT_PUSHOP(); \ + } while (0) + +#define emith_insn_ptr() (u8 *)((u32 *)tcache_ptr + \ + (emith_last_insns[0] != -1) + (emith_last_insns[1] != -1)) + +// delay slot stuff +static int emith_is_j(u32 op) // J, JAL + { return ((op>>26) & 076) == OP_J; } +static int emith_is_jr(u32 op) // JR, JALR + { return (op>>26) == OP__FN && (op & 076) == FN_JR; } +static int emith_is_b(u32 op) // B + { return ((op>>26) & 074) == OP_BEQ || + ((op>>26) == OP__RT && ((op>>16) & 036) == RT_BLTZ); } +// register usage for dependency evaluation XXX better do this as in emit_arm? +static uint64_t emith_has_rs[3] = // OP__FN, OP__RT, others + { 0x00fffffffffa0ff0ULL, 0x000fff0fUL, 0xffffffff0f007f30ULL }; +static uint64_t emith_has_rt[3] = // OP__FN, OP__RT, others + { 0xff00fffffff00cffULL, 0x00000000UL, 0x8000ff0000000030ULL }; +static uint64_t emith_has_rd[3] = // OP__FN, OP__RT, others (rt instead of rd) + { 0xff00fffffff50fffULL, 0x00000000UL, 0x119100ff0f00ff00ULL }; +#define emith_has_(rx,ix,op,sa,m) \ + (emith_has_##rx[ix] & (1ULL << (((op)>>(sa)) & (m)))) +static int emith_rs(u32 op) + { if ((op>>26) == OP__FN) + return emith_has_(rs,0,op, 0,0x3f) ? (op>>21)&0x1f : 0; + if ((op>>26) == OP__RT) + return emith_has_(rs,1,op,16,0x1f) ? (op>>21)&0x1f : 0; + return emith_has_(rs,2,op,26,0x3f) ? (op>>21)&0x1f : 0; + } +static int emith_rt(u32 op) + { if ((op>>26) == OP__FN) + return emith_has_(rt,0,op, 0,0x3f) ? (op>>16)&0x1f : 0; + if ((op>>26) == OP__RT) + return 0; + return emith_has_(rt,2,op,26,0x3f) ? (op>>16)&0x1f : 0; + } +static int emith_rd(u32 op) + { if ((op>>26) == OP__FN) + return emith_has_(rd,0,op, 0,0x3f) ? (op>>11)&0x1f :-1; + if ((op>>26) == OP__RT) + return -1; + return emith_has_(rd,2,op,26,0x3f) ? (op>>16)&0x1f :-1; + } + +static int emith_b_isswap(u32 bop, u32 lop) +{ + if (emith_is_j(bop)) + return bop; + else if (emith_is_jr(bop) && emith_rd(lop) != emith_rs(bop)) + return bop; + else if (emith_is_b(bop) && emith_rd(lop) != emith_rs(bop)) + if ((bop & 0xffff) != 0x7fff) // displacement overflow? + return (bop & 0xffff0000) | ((bop & 0xffff)+1); + return 0; +} + +// emit branch, trying to fill the delay slot with one of the last insns +static void *emith_branch(u32 op) +{ + int idx = emith_last_idx; + u32 op1 = emith_last_insns[idx], op2 = emith_last_insns[idx^1]; + u32 bop = 0; + void *bp; + + // check last insn (op1) + if (op1 != -1 && op1) + bop = emith_b_isswap(op, op1); + // if not, check older insn (op2); mustn't interact with op1 to overtake + if (!bop && op2 != -1 && op2 && emith_rd(op1) != emith_rd(op2) && + emith_rs(op1) != emith_rd(op2) && emith_rt(op1) != emith_rd(op2) && + emith_rs(op2) != emith_rd(op1) && emith_rt(op2) != emith_rd(op1)) { + idx ^= 1; + bop = emith_b_isswap(op, op2); + } + + if (bop) { // can swap + if (emith_last_insns[idx^1] != -1) + EMIT_PTR(tcache_ptr, emith_last_insns[idx^1]); + bp = tcache_ptr; + EMIT_PTR(tcache_ptr, bop); COUNT_OP; + EMIT_PTR(tcache_ptr, emith_last_insns[idx]); + emith_last_insns[0] = emith_last_insns[1] = -1; + } else { // can't swap + emith_flush(); + bp = tcache_ptr; + EMIT_PTR(tcache_ptr, op); COUNT_OP; + EMIT_PTR(tcache_ptr, MIPS_NOP); COUNT_OP; + } + return bp; +} + +// if-then-else conditional execution helpers +#define JMP_POS(ptr) \ + ptr = emith_branch(MIPS_BCONDZ(cond_m, cond_r, 0)); + +#define JMP_EMIT(cond, ptr) { \ + u32 val_ = emith_insn_ptr() - (u8 *)(ptr) - 4; \ + EMIT_PTR(ptr, MIPS_BCONDZ(cond_m, cond_r, val_ & 0x0003ffff)); \ + emith_flush(); /* NO delay slot handling across jump targets */ \ +} + +#define JMP_EMIT_NC(ptr) { \ + u32 val_ = emith_insn_ptr() - (u8 *)(ptr) - 4; \ + EMIT_PTR(ptr, MIPS_B(val_ & 0x0003ffff)); \ + emith_flush(); \ +} + +#define EMITH_JMP_START(cond) { \ + int cond_r, cond_m = emith_cond_check(cond, &cond_r); \ + u8 *cond_ptr; \ + JMP_POS(cond_ptr) + +#define EMITH_JMP_END(cond) \ + JMP_EMIT(cond, cond_ptr); \ +} + +#define EMITH_JMP3_START(cond) { \ + int cond_r, cond_m = emith_cond_check(cond, &cond_r); \ + u8 *cond_ptr, *else_ptr; \ + JMP_POS(cond_ptr) + +#define EMITH_JMP3_MID(cond) \ + JMP_POS(else_ptr); \ + JMP_EMIT(cond, cond_ptr); + +#define EMITH_JMP3_END() \ + JMP_EMIT_NC(else_ptr); \ +} + +// "simple" jump (no more then a few insns) +// ARM32 will use conditional instructions here +#define EMITH_SJMP_START EMITH_JMP_START +#define EMITH_SJMP_END EMITH_JMP_END + +#define EMITH_SJMP3_START EMITH_JMP3_START +#define EMITH_SJMP3_MID EMITH_JMP3_MID +#define EMITH_SJMP3_END EMITH_JMP3_END + +#define EMITH_SJMP2_START(cond) \ + EMITH_SJMP3_START(cond) +#define EMITH_SJMP2_MID(cond) \ + EMITH_SJMP3_MID(cond) +#define EMITH_SJMP2_END(cond) \ + EMITH_SJMP3_END() + + +// flag register emulation. this is modelled after arm/x86. +// the FNZ register stores the result of the last flag setting operation for +// N and Z flag, used for EQ,NE,MI,PL branches. +// the FC register stores the C flag (used for HI,HS,LO,LS,CC,CS). +// the FV register stores information for V flag calculation (used for +// GT,GE,LT,LE,VC,VS). V flag is costly and only fully calculated when needed. +// the core registers may be temp registers, since the condition after calls +// is undefined anyway. + +// flag emulation creates 2 (ie cmp #0/beq) up to 9 (ie adcf/ble) extra insns. +// flag handling shortcuts may reduce this by 1-4 insns, see emith_cond_check() +int emith_flg_rs, emith_flg_rt; // registers used in FNZ=rs-rt (aka cmp_r_r) +int emith_flg_noV; // V flag known not to be set + +// store minimal cc information: rd, rt^rs, carry +// NB: the result *must* first go to FNZ, in case rd == rs or rd == rt. +// NB: for adcf and sbcf, carry-in must be dealt with separately (see there) +static void emith_set_arith_flags(int rd, int rt, int rs, s32 imm, int sub) +{ + if (sub && rd == FNZ && rt && rs) // is this cmp_r_r? + emith_flg_rs = rs, emith_flg_rt = rt; + else emith_flg_rs = emith_flg_rt = 0; + + if (sub) // C = sub:rt 0) // Nt^Ns + EMIT(MIPS_XOR_REG(FV, rt, rs)); + else if (imm < 0) + EMIT(MIPS_NOR_REG(FV, rt, Z0)); + else if (imm > 0) + EMIT(MIPS_OR_REG(FV, rt, Z0)); // Nt^Ns in FV, bit 31 + else emith_flg_noV = 1; // imm #0, never overflows + // full V = Nd^Nt^Ns^C calculation is deferred until really needed + + if (rd != FNZ) + EMIT(MIPS_MOVE_REG(rd, FNZ)); // N,Z via result value in FNZ +} + +// data processing, register +#define emith_move_r_r_ptr(d, s) \ + EMIT(MIPS_MOVE_REG(d, s)) +#define emith_move_r_r_ptr_c(cond, d, s) \ + emith_move_r_r_ptr(d, s) + +#define emith_move_r_r(d, s) \ + emith_move_r_r_ptr(d, s) +#define emith_move_r_r_c(cond, d, s) \ + emith_move_r_r(d, s) + +#define emith_mvn_r_r(d, s) \ + EMIT(MIPS_MVN_REG(d, s)) + +#define emith_add_r_r_r_lsl_ptr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ + EMIT(MIPS_ADD_REG(d, s1, AT)); \ + } else EMIT(MIPS_ADD_REG(d, s1, s2)); \ +} while (0) +#define emith_add_r_r_r_lsl(d, s1, s2, simm) \ + emith_add_r_r_r_lsl_ptr(d, s1, s2, simm) + +#define emith_add_r_r_r_lsr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSR_IMM(AT, s2, simm)); \ + EMIT(MIPS_ADD_REG(d, s1, AT)); \ + } else EMIT(MIPS_ADD_REG(d, s1, s2)); \ +} while (0) + +#define emith_addf_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ + EMIT(MIPS_ADD_REG(FNZ, s1, AT)); \ + emith_set_arith_flags(d, s1, AT, 0, 0); \ + } else { \ + EMIT(MIPS_ADD_REG(FNZ, s1, s2)); \ + emith_set_arith_flags(d, s1, s2, 0, 0); \ + } \ +} while (0) + +#define emith_addf_r_r_r_lsr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSR_IMM(AT, s2, simm)); \ + EMIT(MIPS_ADD_REG(FNZ, s1, AT)); \ + emith_set_arith_flags(d, s1, AT, 0, 0); \ + } else { \ + EMIT(MIPS_ADD_REG(FNZ, s1, s2)); \ + emith_set_arith_flags(d, s1, s2, 0, 0); \ + } \ +} while (0) + +#define emith_sub_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ + EMIT(MIPS_SUB_REG(d, s1, AT)); \ + } else EMIT(MIPS_SUB_REG(d, s1, s2)); \ +} while (0) + +#define emith_subf_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ + EMIT(MIPS_SUB_REG(FNZ, s1, AT)); \ + emith_set_arith_flags(d, s1, AT, 0, 1); \ + } else { \ + EMIT(MIPS_SUB_REG(FNZ, s1, s2)); \ + emith_set_arith_flags(d, s1, s2, 0, 1); \ + } \ +} while (0) + +#define emith_or_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ + EMIT(MIPS_OR_REG(d, s1, AT)); \ + } else EMIT(MIPS_OR_REG(d, s1, s2)); \ +} while (0) + +#define emith_eor_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ + EMIT(MIPS_XOR_REG(d, s1, AT)); \ + } else EMIT(MIPS_XOR_REG(d, s1, s2)); \ +} while (0) + +#define emith_eor_r_r_r_lsr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSR_IMM(AT, s2, simm)); \ + EMIT(MIPS_XOR_REG(d, s1, AT)); \ + } else EMIT(MIPS_XOR_REG(d, s1, s2)); \ +} while (0) + +#define emith_and_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ + EMIT(MIPS_AND_REG(d, s1, AT)); \ + } else EMIT(MIPS_AND_REG(d, s1, s2)); \ +} while (0) + +#define emith_or_r_r_lsl(d, s, lslimm) \ + emith_or_r_r_r_lsl(d, d, s, lslimm) + +#define emith_eor_r_r_lsr(d, s, lsrimm) \ + emith_eor_r_r_r_lsr(d, d, s, lsrimm) + +#define emith_add_r_r_r(d, s1, s2) \ + emith_add_r_r_r_lsl(d, s1, s2, 0) + +#define emith_addf_r_r_r(d, s1, s2) \ + emith_addf_r_r_r_lsl(d, s1, s2, 0) + +#define emith_sub_r_r_r(d, s1, s2) \ + emith_sub_r_r_r_lsl(d, s1, s2, 0) + +#define emith_subf_r_r_r(d, s1, s2) \ + emith_subf_r_r_r_lsl(d, s1, s2, 0) + +#define emith_or_r_r_r(d, s1, s2) \ + emith_or_r_r_r_lsl(d, s1, s2, 0) + +#define emith_eor_r_r_r(d, s1, s2) \ + emith_eor_r_r_r_lsl(d, s1, s2, 0) + +#define emith_and_r_r_r(d, s1, s2) \ + emith_and_r_r_r_lsl(d, s1, s2, 0) + +#define emith_add_r_r_ptr(d, s) \ + emith_add_r_r_r_lsl_ptr(d, d, s, 0) +#define emith_add_r_r(d, s) \ + emith_add_r_r_r(d, d, s) + +#define emith_sub_r_r(d, s) \ + emith_sub_r_r_r(d, d, s) + +#define emith_neg_r_r(d, s) \ + EMIT(MIPS_NEG_REG(d, s)) + +#define emith_adc_r_r_r(d, s1, s2) do { \ + emith_add_r_r_r(AT, s1, FC); \ + emith_add_r_r_r(d, AT, s2); \ +} while (0) + +#define emith_adc_r_r(d, s) \ + emith_adc_r_r_r(d, d, s) + +// NB: the incoming C can cause its own outgoing C if s2+C=0 (or s1+C=0 FWIW) +// moreover, s2 is 0 if there is C, so no other C can be generated. +#define emith_adcf_r_r_r(d, s1, s2) do { \ + emith_add_r_r_r(FNZ, s2, FC); \ + EMIT(MIPS_SLTU_REG(AT, FNZ, FC)); \ + emith_add_r_r_r(FNZ, s1, FNZ); \ + emith_set_arith_flags(d, s1, s2, 0, 0); \ + emith_or_r_r(FC, AT); \ +} while (0) + +#define emith_sbcf_r_r_r(d, s1, s2) do { \ + emith_add_r_r_r(FNZ, s2, FC); \ + EMIT(MIPS_SLTU_REG(AT, FNZ, FC)); \ + emith_sub_r_r_r(FNZ, s1, FNZ); \ + emith_set_arith_flags(d, s1, s2, 0, 1); \ + emith_or_r_r(FC, AT); \ +} while (0) + +#define emith_and_r_r(d, s) \ + emith_and_r_r_r(d, d, s) +#define emith_and_r_r_c(cond, d, s) \ + emith_and_r_r(d, s) + +#define emith_or_r_r(d, s) \ + emith_or_r_r_r(d, d, s) + +#define emith_eor_r_r(d, s) \ + emith_eor_r_r_r(d, d, s) + +#define emith_tst_r_r_ptr(d, s) \ + emith_and_r_r_r(FNZ, d, s) +#define emith_tst_r_r(d, s) \ + emith_tst_r_r_ptr(d, s) + +#define emith_teq_r_r(d, s) \ + emith_eor_r_r_r(FNZ, d, s) + +#define emith_cmp_r_r(d, s) \ + emith_subf_r_r_r(FNZ, d, s) + +#define emith_addf_r_r(d, s) \ + emith_addf_r_r_r(d, d, s) + +#define emith_subf_r_r(d, s) \ + emith_subf_r_r_r(d, d, s) + +#define emith_adcf_r_r(d, s) \ + emith_adcf_r_r_r(d, d, s) + +#define emith_sbcf_r_r(d, s) \ + emith_sbcf_r_r_r(d, d, s) + +#define emith_negcf_r_r(d, s) \ + emith_sbcf_r_r_r(d, Z0, s) + + +// move immediate +static void emith_move_imm(int r, uintptr_t imm) +{ + if ((s16)imm != imm) { + int s = Z0; + if (imm >> 16) { + EMIT(MIPS_MOVT_IMM(r, imm >> 16)); + s = r; + } + if ((u16)imm) + EMIT(MIPS_OR_IMM(r, s, (u16)imm)); + } else + EMIT(MIPS_ADD_IMM(r, Z0, imm)); +} + +#define emith_move_r_ptr_imm(r, imm) \ + emith_move_imm(r, (uintptr_t)(imm)) + +#define emith_move_r_imm(r, imm) \ + emith_move_imm(r, (u32)(imm)) +#define emith_move_r_imm_c(cond, r, imm) \ + emith_move_r_imm(r, imm) + + +// arithmetic, immediate +static void emith_arith_imm(int op, int rd, int rs, u32 imm) +{ + if ((s16)imm != imm) { + emith_move_r_imm(AT, imm); + EMIT(MIPS_OP_REG(FN_ADD + (op-OP_ADDI), rd, rs, AT)); + } else if (imm || rd != rs) + EMIT(MIPS_OP_IMM(op, rd, rs, imm)); +} + +#define emith_add_r_imm(r, imm) \ + emith_add_r_r_imm(r, r, imm) +#define emith_add_r_imm_c(cond, r, imm) \ + emith_add_r_imm(r, imm) + +#define emith_addf_r_imm(r, imm) \ + emith_addf_r_r_imm(r, imm) + +#define emith_sub_r_imm(r, imm) \ + emith_sub_r_r_imm(r, r, imm) +#define emith_sub_r_imm_c(cond, r, imm) \ + emith_sub_r_imm(r, imm) + +#define emith_subf_r_imm(r, imm) \ + emith_subf_r_r_imm(r, r, imm) + +#define emith_adc_r_imm(r, imm) \ + emith_adc_r_r_imm(r, r, imm); + +#define emith_adcf_r_imm(r, imm) \ + emith_adcf_r_r_imm(r, r, imm) + +#define emith_cmp_r_imm(r, imm) \ + emith_subf_r_r_imm(FNZ, r, (s16)imm) + + +#define emith_add_r_r_ptr_imm(d, s, imm) \ + emith_arith_imm(OP_ADDIU, d, s, imm) + +#define emith_add_r_r_imm(d, s, imm) \ + emith_add_r_r_ptr_imm(d, s, imm) + +#define emith_addf_r_r_imm(d, s, imm) do { \ + emith_add_r_r_imm(FNZ, s, imm); \ + emith_set_arith_flags(d, s, 0, imm, 0); \ +} while (0) + +#define emith_adc_r_r_imm(d, s, imm) do { \ + emith_add_r_r_r(AT, s, FC); \ + emith_add_r_r_imm(d, AT, imm); \ +} while (0) + +#define emith_adcf_r_r_imm(d, s, imm) do { \ + emith_add_r_r_r(FNZ, s, FC); \ + EMIT(MIPS_SLTU_REG(AT, FNZ, FC)); \ + emith_add_r_r_imm(FNZ, FNZ, imm); \ + emith_set_arith_flags(d, s, 0, imm, 0); \ + emith_or_r_r(FC, AT); \ +} while (0) + +// NB: no SUBI in MIPS II, since ADDI takes a signed imm +#define emith_sub_r_r_imm(d, s, imm) \ + emith_add_r_r_imm(d, s, -(imm)) +#define emith_sub_r_r_imm_c(cond, d, s, imm) \ + emith_sub_r_r_imm(d, s, imm) + +#define emith_subf_r_r_imm(d, s, imm) do { \ + emith_sub_r_r_imm(FNZ, s, imm); \ + emith_set_arith_flags(d, s, 0, imm, 1); \ +} while (0) + +// logical, immediate +static void emith_log_imm(int op, int rd, int rs, u32 imm) +{ + if (imm >> 16) { + emith_move_r_imm(AT, imm); + EMIT(MIPS_OP_REG(FN_AND + (op-OP_ANDI), rd, rs, AT)); + } else if (op == OP_ANDI || imm || rd != rs) + EMIT(MIPS_OP_IMM(op, rd, rs, imm)); +} + +#define emith_and_r_imm(r, imm) \ + emith_log_imm(OP_ANDI, r, r, imm) + +#define emith_or_r_imm(r, imm) \ + emith_log_imm(OP_ORI, r, r, imm) +#define emith_or_r_imm_c(cond, r, imm) \ + emith_or_r_imm(r, imm) + +#define emith_eor_r_imm_ptr(r, imm) \ + emith_log_imm(OP_XORI, r, r, imm) +#define emith_eor_r_imm_ptr_c(cond, r, imm) \ + emith_eor_r_imm_ptr(r, imm) + +#define emith_eor_r_imm(r, imm) \ + emith_eor_r_imm_ptr(r, imm) +#define emith_eor_r_imm_c(cond, r, imm) \ + emith_eor_r_imm(r, imm) + +/* NB: BIC #imm not available in MIPS; use AND #~imm instead */ +#define emith_bic_r_imm(r, imm) \ + emith_log_imm(OP_ANDI, r, r, ~(imm)) +#define emith_bic_r_imm_c(cond, r, imm) \ + emith_bic_r_imm(r, imm) + +#define emith_tst_r_imm(r, imm) \ + emith_log_imm(OP_ANDI, FNZ, r, imm) +#define emith_tst_r_imm_c(cond, r, imm) \ + emith_tst_r_imm(r, imm) + +#define emith_and_r_r_imm(d, s, imm) \ + emith_log_imm(OP_ANDI, d, s, imm) + +#define emith_or_r_r_imm(d, s, imm) \ + emith_log_imm(OP_ORI, d, s, imm) + +#define emith_eor_r_r_imm(d, s, imm) \ + emith_log_imm(OP_XORI, d, s, imm) + +// shift +#define emith_lsl(d, s, cnt) \ + EMIT(MIPS_LSL_IMM(d, s, cnt)) + +#define emith_lsr(d, s, cnt) \ + EMIT(MIPS_LSR_IMM(d, s, cnt)) + +#define emith_asr(d, s, cnt) \ + EMIT(MIPS_ASR_IMM(d, s, cnt)) + +// NB: mips32r2 has ROT (SLR with R bit set) +#define emith_ror(d, s, cnt) do { \ + EMIT(MIPS_LSL_IMM(AT, s, 32-(cnt))); \ + EMIT(MIPS_LSR_IMM(d, s, cnt)); \ + EMIT(MIPS_OR_REG(d, d, AT)); \ +} while (0) +#define emith_ror_c(cond, d, s, cnt) \ + emith_ror(d, s, cnt) + +#define emith_rol(d, s, cnt) do { \ + EMIT(MIPS_LSR_IMM(AT, s, 32-(cnt))); \ + EMIT(MIPS_LSL_IMM(d, s, cnt)); \ + EMIT(MIPS_OR_REG(d, d, AT)); \ +} while (0) + +// NB: all flag setting shifts make V undefined +// NB: mips32r2 has EXT (useful for extracting C) +#define emith_lslf(d, s, cnt) do { \ + int _s = s; \ + if ((cnt) > 1) { \ + emith_lsl(d, s, cnt-1); \ + _s = d; \ + } \ + if ((cnt) > 0) { \ + emith_lsr(FC, _s, 31); \ + emith_lsl(d, _s, 1); \ + } \ + emith_move_r_r(FNZ, d); \ +} while (0) + +#define emith_lsrf(d, s, cnt) do { \ + int _s = s; \ + if ((cnt) > 1) { \ + emith_lsr(d, s, cnt-1); \ + _s = d; \ + } \ + if ((cnt) > 0) { \ + emith_and_r_r_imm(FC, _s, 1); \ + emith_lsr(d, _s, 1); \ + } \ + emith_move_r_r(FNZ, d); \ +} while (0) + +#define emith_asrf(d, s, cnt) do { \ + int _s = s; \ + if ((cnt) > 1) { \ + emith_asr(d, s, cnt-1); \ + _s = d; \ + } \ + if ((cnt) > 0) { \ + emith_and_r_r_imm(FC, _s, 1); \ + emith_asr(d, _s, 1); \ + } \ + emith_move_r_r(FNZ, d); \ +} while (0) + +#define emith_rolf(d, s, cnt) do { \ + emith_rol(d, s, cnt); \ + emith_and_r_r_imm(FC, d, 1); \ + emith_move_r_r(FNZ, d); \ +} while (0) + +#define emith_rorf(d, s, cnt) do { \ + emith_ror(d, s, cnt); \ + emith_lsr(FC, d, 31); \ + emith_move_r_r(FNZ, d); \ +} while (0) + +#define emith_rolcf(d) do { \ + emith_lsr(AT, d, 31); \ + emith_lsl(d, d, 1); \ + emith_or_r_r(d, FC); \ + emith_move_r_r(FC, AT); \ + emith_move_r_r(FNZ, d); \ +} while (0) + +#define emith_rorcf(d) do { \ + emith_and_r_r_imm(AT, d, 1); \ + emith_lsr(d, d, 1); \ + emith_lsl(FC, FC, 31); \ + emith_or_r_r(d, FC); \ + emith_move_r_r(FC, AT); \ + emith_move_r_r(FNZ, d); \ +} while (0) + +// signed/unsigned extend +// NB: mips32r2 has EXT and INS +#define emith_clear_msb(d, s, count) /* bits to clear */ do { \ + u32 t; \ + if ((count) > 16) { \ + t = (count) - 16; \ + t = 0xffff >> t; \ + emith_and_r_r_imm(d, s, t); \ + } else { \ + emith_lsl(d, s, count); \ + emith_lsr(d, d, count); \ + } \ +} while (0) +#define emith_clear_msb_c(cond, d, s, count) \ + emith_clear_msb(d, s, count) + +// NB: mips32r2 has SE[BH]H +#define emith_sext(d, s, count) /* bits to keep */ do { \ + emith_lsl(d, s, 32-(count)); \ + emith_asr(d, d, 32-(count)); \ +} while (0) + +// multiply Rd = Rn*Rm (+ Ra); NB: next 2 insns after MFLO/MFHI mustn't be MULT +static u8 *last_lohi; +static void emith_lohi_nops(void) +{ + u32 d; + while ((d = emith_insn_ptr() - last_lohi) < 8 && d >= 0) EMIT(MIPS_NOP); +} + +#define emith_mul(d, s1, s2) do { \ + emith_lohi_nops(); \ + EMIT(MIPS_MULTU(s1, s2)); \ + EMIT(MIPS_MFLO(d)); \ + last_lohi = emith_insn_ptr(); \ +} while (0) + +#define emith_mul_u64(dlo, dhi, s1, s2) do { \ + emith_lohi_nops(); \ + EMIT(MIPS_MULTU(s1, s2)); \ + EMIT(MIPS_MFLO(dlo)); \ + EMIT(MIPS_MFHI(dhi)); \ + last_lohi = emith_insn_ptr(); \ +} while (0) + +#define emith_mul_s64(dlo, dhi, s1, s2) do { \ + emith_lohi_nops(); \ + EMIT(MIPS_MULT(s1, s2)); \ + EMIT(MIPS_MFLO(dlo)); \ + EMIT(MIPS_MFHI(dhi)); \ + last_lohi = emith_insn_ptr(); \ +} while (0) + +#define emith_mula_s64(dlo, dhi, s1, s2) do { \ + int t_ = rcache_get_tmp(); \ + emith_lohi_nops(); \ + EMIT(MIPS_MULT(s1, s2)); \ + EMIT(MIPS_MFLO(AT)); \ + emith_add_r_r(dlo, AT); \ + EMIT(MIPS_SLTU_REG(t_, dlo, AT)); \ + EMIT(MIPS_MFHI(AT)); \ + last_lohi = emith_insn_ptr(); \ + emith_add_r_r(dhi, AT); \ + emith_add_r_r(dhi, t_); \ + rcache_free_tmp(t_); \ +} while (0) +#define emith_mula_s64_c(cond, dlo, dhi, s1, s2) \ + emith_mula_s64(dlo, dhi, s1, s2) + +// load/store. offs has 16 bits signed, which is currently sufficient +#define emith_read_r_r_offs_ptr(r, rs, offs) \ + EMIT(MIPS_LW(r, rs, offs)) +#define emith_read_r_r_offs_ptr_c(cond, r, rs, offs) \ + emith_read_r_r_offs_ptr(r, rs, offs) + +#define emith_read_r_r_offs(r, rs, offs) \ + emith_read_r_r_offs_ptr(r, rs, offs) +#define emith_read_r_r_offs_c(cond, r, rs, offs) \ + emith_read_r_r_offs(r, rs, offs) + +#define emith_read_r_r_r_ptr(r, rs, rm) do { \ + emith_add_r_r_r(AT, rs, rm); \ + EMIT(MIPS_LW(r, AT, 0)); \ +} while (0) + +#define emith_read_r_r_r(r, rs, rm) \ + emith_read_r_r_r_ptr(r, rs, rm) +#define emith_read_r_r_r_c(cond, r, rs, rm) \ + emith_read_r_r_r(r, rs, rm) + +#define emith_read_r_r_r_ptr_wb(r, rs, rm) do { \ + emith_add_r_r_r(rs, rs, rm); \ + EMIT(MIPS_LW(r, rs, 0)); \ +} while (0) +#define emith_read_r_r_r_wb(r, rs, rm) \ + emith_read_r_r_r_ptr_wb(r, rs, rm) + +#define emith_read8_r_r_offs(r, rs, offs) \ + EMIT(MIPS_LBU(r, rs, offs)) +#define emith_read8_r_r_offs_c(cond, r, rs, offs) \ + emith_read8_r_r_offs(r, rs, offs) + +#define emith_read8_r_r_r(r, rs, rm) do { \ + emith_add_r_r_r(AT, rs, rm); \ + EMIT(MIPS_LBU(r, AT, 0)); \ +} while (0) +#define emith_read8_r_r_r_c(cond, r, rs, rm) \ + emith_read8_r_r_r(r, rs, rm) + +#define emith_read16_r_r_offs(r, rs, offs) \ + EMIT(MIPS_LHU(r, rs, offs)) +#define emith_read16_r_r_offs_c(cond, r, rs, offs) \ + emith_read16_r_r_offs(r, rs, offs) + +#define emith_read16_r_r_r(r, rs, rm) do { \ + emith_add_r_r_r(AT, rs, rm); \ + EMIT(MIPS_LHU(r, AT, 0)); \ +} while (0) +#define emith_read16_r_r_r_c(cond, r, rs, rm) \ + emith_read16_r_r_r(r, rs, rm) + +#define emith_read8s_r_r_offs(r, rs, offs) \ + EMIT(MIPS_LB(r, rs, offs)) +#define emith_read8s_r_r_offs_c(cond, r, rs, offs) \ + emith_read8s_r_r_offs(r, rs, offs) + +#define emith_read8s_r_r_r(r, rs, rm) do { \ + emith_add_r_r_r(AT, rs, rm); \ + EMIT(MIPS_LB(r, AT, 0)); \ +} while (0) +#define emith_read8s_r_r_r_c(cond, r, rs, rm) \ + emith_read8s_r_r_r(r, rs, rm) + +#define emith_read16s_r_r_offs(r, rs, offs) \ + EMIT(MIPS_LH(r, rs, offs)) +#define emith_read16s_r_r_offs_c(cond, r, rs, offs) \ + emith_read16s_r_r_offs(r, rs, offs) + +#define emith_read16s_r_r_r(r, rs, rm) do { \ + emith_add_r_r_r(AT, rs, rm); \ + EMIT(MIPS_LH(r, AT, 0)); \ +} while (0) +#define emith_read16s_r_r_r_c(cond, r, rs, rm) \ + emith_read16s_r_r_r(r, rs, rm) + + +#define emith_write_r_r_offs_ptr(r, rs, offs) \ + EMIT(MIPS_SW(r, rs, offs)) +#define emith_write_r_r_offs_ptr_c(cond, r, rs, offs) \ + emith_write_r_r_offs_ptr(r, rs, offs) + +#define emith_write_r_r_r_ptr(r, rs, rm) do { \ + emith_add_r_r_r(AT, rs, rm); \ + EMIT(MIPS_SW(r, AT, 0)); \ +} while (0) +#define emith_write_r_r_r_ptr_c(cond, r, rs, rm) \ + emith_write_r_r_r_ptr(r, rs, rm) + +#define emith_write_r_r_offs(r, rs, offs) \ + emith_write_r_r_offs_ptr(r, rs, offs) +#define emith_write_r_r_offs_c(cond, r, rs, offs) \ + emith_write_r_r_offs(r, rs, offs) + +#define emith_write_r_r_r(r, rs, rm) \ + emith_write_r_r_r_ptr(r, rs, rm) +#define emith_write_r_r_r_c(cond, r, rs, rm) \ + emith_write_r_r_r(r, rs, rm) + +#define emith_write_r_r_r_ptr_wb(r, rs, rm) do { \ + emith_add_r_r_r(rs, rs, rm); \ + EMIT(MIPS_SW(r, rs, 0)); \ +} while (0) +#define emith_write_r_r_r_wb(r, rs, rm) \ + emith_write_r_r_r_ptr_wb(r, rs, rm) + +#define emith_ctx_read_ptr(r, offs) \ + emith_read_r_r_offs_ptr(r, CONTEXT_REG, offs) + +#define emith_ctx_read(r, offs) \ + emith_read_r_r_offs(r, CONTEXT_REG, offs) +#define emith_ctx_read_c(cond, r, offs) \ + emith_ctx_read(r, offs) + +#define emith_ctx_write_ptr(r, offs) \ + emith_write_r_r_offs_ptr(r, CONTEXT_REG, offs) + +#define emith_ctx_write(r, offs) \ + emith_write_r_r_offs(r, CONTEXT_REG, offs) + +#define emith_ctx_read_multiple(r, offs, cnt, tmpr) do { \ + int r_ = r, offs_ = offs, cnt_ = cnt; \ + for (; cnt_ > 0; r_++, offs_ += 4, cnt_--) \ + emith_ctx_read(r_, offs_); \ +} while (0) + +#define emith_ctx_write_multiple(r, offs, cnt, tmpr) do { \ + int r_ = r, offs_ = offs, cnt_ = cnt; \ + for (; cnt_ > 0; r_++, offs_ += 4, cnt_--) \ + emith_ctx_write(r_, offs_); \ +} while (0) + +// function call handling +#define emith_save_caller_regs(mask) do { \ + int _c; u32 _m = mask & 0x300fffc; /* r2-r15,r24-r25 */ \ + if (__builtin_parity(_m) == 1) _m |= 0x1; /* ABI align */ \ + int _s = count_bits(_m) * 4, _o = _s; \ + if (_s) emith_sub_r_imm(SP, _s); \ + for (_c = HOST_REGS; _m && _c >= 0; _m &= ~(1 << _c), _c--) \ + if (_m & (1 << _c)) \ + { _o -= 4; if (_c) emith_write_r_r_offs(_c, SP, _o); } \ +} while (0) + +#define emith_restore_caller_regs(mask) do { \ + int _c; u32 _m = mask & 0x300fffc; \ + if (__builtin_parity(_m) == 1) _m |= 0x1; \ + int _s = count_bits(_m) * 4, _o = 0; \ + for (_c = 0; _m && _c < HOST_REGS; _m &= ~(1 << _c), _c++) \ + if (_m & (1 << _c)) \ + { if (_c) emith_read_r_r_offs(_c, SP, _o); _o += 4; } \ + if (_s) emith_add_r_imm(SP, _s); \ +} while (0) + +#define host_arg2reg(rd, arg) \ + rd = (arg+4) + +#define emith_pass_arg_r(arg, reg) \ + emith_move_r_r(arg, reg) + +#define emith_pass_arg_imm(arg, imm) \ + emith_move_r_imm(arg, imm) + +// branching +#define emith_invert_branch(cond) /* inverted conditional branch */ \ + (((cond) >> 5) == OP__RT ? (cond) ^ 0x01 : (cond) ^ 0x20) + +// evaluate the emulated condition, returns a register/branch type pair +static int emith_cond_check(int cond, int *r) +{ + int b = 0; + + // shortcut for comparing 2 registers + if (emith_flg_rs || emith_flg_rt) switch (cond) { + case DCOND_LS: EMIT(MIPS_SLTU_REG(AT, emith_flg_rs, emith_flg_rt)); + *r = AT, b = MIPS_BEQ; break; // s <= t unsigned + case DCOND_HI: EMIT(MIPS_SLTU_REG(AT, emith_flg_rs, emith_flg_rt)); + *r = AT, b = MIPS_BNE; break; // s > t unsigned + case DCOND_LT: EMIT(MIPS_SLT_REG(AT, emith_flg_rt, emith_flg_rs)); + *r = AT, b = MIPS_BNE; break; // s < t + case DCOND_GE: EMIT(MIPS_SLT_REG(AT, emith_flg_rt, emith_flg_rs)); + *r = AT, b = MIPS_BEQ; break; // s >= t + case DCOND_LE: EMIT(MIPS_SLT_REG(AT, emith_flg_rs, emith_flg_rt)); + *r = AT, b = MIPS_BEQ; break; // s <= t + case DCOND_GT: EMIT(MIPS_SLT_REG(AT, emith_flg_rs, emith_flg_rt)); + *r = AT, b = MIPS_BNE; break; // s > t + } + + // shortcut for V known to be 0 + if (!b && emith_flg_noV) switch (cond) { + case DCOND_VS: *r = Z0; b = MIPS_BNE; break; // never + case DCOND_VC: *r = Z0; b = MIPS_BEQ; break; // always + case DCOND_LT: *r = FNZ, b = MIPS_BLT; break; // N + case DCOND_GE: *r = FNZ, b = MIPS_BGE; break; // !N + case DCOND_LE: *r = FNZ, b = MIPS_BLE; break; // N || Z + case DCOND_GT: *r = FNZ, b = MIPS_BGT; break; // !N && !Z + } + + // the full monty if no shortcut + if (!b) switch (cond) { + // conditions using NZ + case DCOND_EQ: *r = FNZ; b = MIPS_BEQ; break; // Z + case DCOND_NE: *r = FNZ; b = MIPS_BNE; break; // !Z + case DCOND_MI: *r = FNZ; b = MIPS_BLT; break; // N + case DCOND_PL: *r = FNZ; b = MIPS_BGE; break; // !N + // conditions using C + case DCOND_LO: *r = FC; b = MIPS_BNE; break; // C + case DCOND_HS: *r = FC; b = MIPS_BEQ; break; // !C + // conditions using CZ + case DCOND_LS: // C || Z + case DCOND_HI: // !C && !Z + EMIT(MIPS_ADD_IMM(AT, FC, (u16)-1)); // !C && !Z + EMIT(MIPS_AND_REG(AT, FNZ, AT)); + *r = AT, b = (cond == DCOND_HI ? MIPS_BNE : MIPS_BEQ); + break; + + // conditions using V + case DCOND_VS: // V + case DCOND_VC: // !V + EMIT(MIPS_XOR_REG(AT, FV, FNZ)); // V = Nt^Ns^Nd^C + EMIT(MIPS_LSR_IMM(AT, AT, 31)); + EMIT(MIPS_XOR_REG(AT, AT, FC)); + *r = AT, b = (cond == DCOND_VS ? MIPS_BNE : MIPS_BEQ); + break; + // conditions using VNZ + case DCOND_LT: // N^V + case DCOND_GE: // !(N^V) + EMIT(MIPS_LSR_IMM(AT, FV, 31)); // Nd^V = Nt^Ns^C + EMIT(MIPS_XOR_REG(AT, FC, AT)); + *r = AT, b = (cond == DCOND_LT ? MIPS_BNE : MIPS_BEQ); + break; + case DCOND_LE: // (N^V) || Z + case DCOND_GT: // !(N^V) && !Z + EMIT(MIPS_LSR_IMM(AT, FV, 31)); // Nd^V = Nt^Ns^C + EMIT(MIPS_XOR_REG(AT, FC, AT)); + EMIT(MIPS_ADD_IMM(AT, AT, (u16)-1)); // !(Nd^V) && !Z + EMIT(MIPS_AND_REG(AT, FNZ, AT)); + *r = AT, b = (cond == DCOND_GT ? MIPS_BNE : MIPS_BEQ); + break; + } + return b; +} + +// NB: assumes all targets are in the same 256MB segment +#define emith_jump(target) \ + emith_branch(MIPS_J((uintptr_t)target & 0x0fffffff)) +#define emith_jump_patchable(target) \ + emith_jump(target) + +// NB: MIPS conditional branches have only +/- 128KB range +#define emith_jump_cond(cond, target) do { \ + int r_, mcond_ = emith_cond_check(cond, &r_); \ + u32 disp_ = (u8 *)target - emith_insn_ptr() - 4; \ + if (disp_ >= 0xfffe0000 || disp_ <= 0x0001ffff) { /* can use near B */ \ + emith_branch(MIPS_BCONDZ(mcond_,r_,disp_ & 0x0003ffff)); \ + } else { /* far branch if near branch isn't possible */ \ + mcond_ = emith_invert_branch(mcond_); \ + u8 *bp = emith_branch(MIPS_BCONDZ(mcond_, r_, 0)); \ + emith_branch(MIPS_J((uintptr_t)target & 0x0fffffff)); \ + EMIT_PTR(bp, MIPS_BCONDZ(mcond_, r_, emith_insn_ptr()-bp-4)); \ + } \ +} while (0) + +#define emith_jump_cond_patchable(cond, target) do { \ + int r_, mcond_ = emith_cond_check(cond, &r_); \ + mcond_ = emith_invert_branch(mcond_); \ + u8 *bp = emith_branch(MIPS_BCONDZ(mcond_, r_, 0));\ + emith_branch(MIPS_J((uintptr_t)target & 0x0fffffff)); \ + EMIT_PTR(bp, MIPS_BCONDZ(mcond_, r_, emith_insn_ptr()-bp-4)); \ +} while (0) + +// NB: returns position of patch for cache maintenance +#define emith_jump_patch(ptr, target) ({ \ + u32 *ptr_ = (u32 *)ptr-1; /* must skip condition check code */ \ + while ((ptr_[0] & 0xf8000000) != OP_J << 26) ptr_ ++; \ + EMIT_PTR(ptr_, MIPS_J((uintptr_t)target & 0x0fffffff)); \ + (u8 *)(ptr_-1); \ +}) + +#define emith_jump_reg(r) \ + emith_branch(MIPS_JR(r)) +#define emith_jump_reg_c(cond, r) \ + emith_jump_reg(r) + +#define emith_jump_ctx(offs) do { \ + emith_ctx_read_ptr(AT, offs); \ + emith_jump_reg(AT); \ +} while (0) +#define emith_jump_ctx_c(cond, offs) \ + emith_jump_ctx(offs) + +#define emith_call(target) \ + emith_branch(MIPS_JAL((uintptr_t)target & 0x0fffffff)) +#define emith_call_cond(cond, target) \ + emith_call(target) + +#define emith_call_reg(r) \ + emith_branch(MIPS_JALR(LR, r)) + +#define emith_call_ctx(offs) do { \ + emith_ctx_read_ptr(AT, offs); \ + emith_call_reg(AT); \ +} while (0) + +#define emith_call_link(r, target) do { \ + EMIT(MIPS_BL(4)); EMIT(MIPS_ADD_IMM(r, LR, 8)); emith_flush(); \ + emith_branch(MIPS_J((uintptr_t)target & 0x0fffffff)); \ +} while (0) + +#define emith_call_cleanup() /**/ + +#define emith_ret() \ + emith_branch(MIPS_JR(LR)) +#define emith_ret_c(cond) \ + emith_ret() + +#define emith_ret_to_ctx(offs) \ + emith_ctx_write_ptr(LR, offs) + +// NB: ABI SP alignment is 8 for compatibility with MIPS IV +#define emith_push_ret(r) do { \ + emith_sub_r_imm(SP, 8+16); /* reserve new arg save area (16) */ \ + emith_write_r_r_offs(LR, SP, 4+16); \ + if ((r) >= 0) emith_write_r_r_offs(r, SP, 0+16); \ +} while (0) + +#define emith_pop_and_ret(r) do { \ + if ((r) >= 0) emith_read_r_r_offs(r, SP, 0+16); \ + emith_read_r_r_offs(LR, SP, 4+16); \ + emith_add_r_imm(SP, 8+16); \ + emith_ret(); \ +} while (0) + + +// emitter ABI stuff +#define emith_pool_check() /**/ +#define emith_pool_commit(j) /**/ +// NB: mips32r2 has SYNCI +#define host_instructions_updated(base, end) __builtin___clear_cache(base, end) +#define emith_jump_patch_size() 4 + +// SH2 drc specific +#define emith_sh2_drc_entry() do { \ + int _c; u32 _m = 0xd0ff0000; \ + if (__builtin_parity(_m) == 1) _m |= 0x1; /* ABI align for SP is 8 */ \ + int _s = count_bits(_m) * 4 + 16, _o = _s; /* 16 byte arg save area */ \ + if (_s) emith_sub_r_imm(SP, _s); \ + for (_c = HOST_REGS; _m && _c >= 0; _m &= ~(1 << _c), _c--) \ + if (_m & (1 << _c)) \ + { _o -= 4; if (_c) emith_write_r_r_offs(_c, SP, _o); } \ +} while (0) +#define emith_sh2_drc_exit() do { \ + int _c; u32 _m = 0xd0ff0000; \ + if (__builtin_parity(_m) == 1) _m |= 0x1; \ + int _s = count_bits(_m) * 4 + 16, _o = 16; \ + for (_c = 0; _m && _c < HOST_REGS; _m &= ~(1 << _c), _c++) \ + if (_m & (1 << _c)) \ + { if (_c) emith_read_r_r_offs(_c, SP, _o); _o += 4; } \ + if (_s) emith_add_r_imm(SP, _s); \ + emith_ret(); \ +} while (0) + +// NB: assumes a is in arg0, tab, func and mask are temp +#define emith_sh2_rcall(a, tab, func, mask) do { \ + emith_lsr(mask, a, SH2_READ_SHIFT); \ + emith_add_r_r_r_lsl_ptr(tab, tab, mask, 3); \ + emith_read_r_r_offs_ptr(func, tab, 0); \ + emith_read_r_r_offs(mask, tab, 4); \ + emith_addf_r_r_r/*_ptr*/(func, func, func); \ +} while (0) + +// NB: assumes a, val are in arg0 and arg1, tab and func are temp +#define emith_sh2_wcall(a, val, tab, func) do { \ + emith_lsr(func, a, SH2_WRITE_SHIFT); \ + emith_lsl(func, func, 2); \ + emith_read_r_r_r_ptr(func, tab, func); \ + emith_move_r_r_ptr(6, CONTEXT_REG); /* arg2 */ \ + emith_jump_reg(func); \ +} while (0) + +#define emith_sh2_delay_loop(cycles, reg) do { \ + int sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); \ + int t1 = rcache_get_tmp(); \ + int t2 = rcache_get_tmp(); \ + int t3 = rcache_get_tmp(); \ + /* if (sr < 0) return */ \ + emith_cmp_r_imm(sr, 0); \ + EMITH_JMP_START(DCOND_LE); \ + /* turns = sr.cycles / cycles */ \ + emith_asr(t2, sr, 12); \ + emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)) + 1); \ + emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */ \ + rcache_free_tmp(t3); \ + if (reg >= 0) { \ + /* if (reg <= turns) turns = reg-1 */ \ + t3 = rcache_get_reg(reg, RC_GR_RMW, NULL); \ + emith_cmp_r_r(t3, t2); \ + EMITH_SJMP_START(DCOND_HI); \ + emith_sub_r_r_imm_c(DCOND_LS, t2, t3, 1); \ + EMITH_SJMP_END(DCOND_HI); \ + /* if (reg <= 1) turns = 0 */ \ + emith_cmp_r_imm(t3, 1); \ + EMITH_SJMP_START(DCOND_HI); \ + emith_move_r_imm_c(DCOND_LS, t2, 0); \ + EMITH_SJMP_END(DCOND_HI); \ + /* reg -= turns */ \ + emith_sub_r_r(t3, t2); \ + } \ + /* sr.cycles -= turns * cycles; */ \ + emith_move_r_imm(t1, cycles); \ + emith_mul(t1, t2, t1); \ + emith_sub_r_r_r_lsl(sr, sr, t1, 12); \ + EMITH_JMP_END(DCOND_LE); \ + rcache_free_tmp(t1); \ + rcache_free_tmp(t2); \ +} while (0) + +/* + * if Q + * t = carry(Rn += Rm) + * else + * t = carry(Rn -= Rm) + * T ^= t + */ +#define emith_sh2_div1_step(rn, rm, sr) do { \ + emith_tst_r_imm(sr, Q); /* if (Q ^ M) */ \ + EMITH_JMP3_START(DCOND_EQ); \ + emith_addf_r_r(rn, rm); \ + EMITH_JMP3_MID(DCOND_EQ); \ + emith_subf_r_r(rn, rm); \ + EMITH_JMP3_END(); \ + emith_eor_r_r(sr, FC); \ +} while (0) + +/* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ +#define emith_sh2_macl(ml, mh, rn, rm, sr) do { \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* MACH top 16 bits unused if saturated. sign ext for overfl detect */ \ + emith_sext(mh, mh, 16); \ + EMITH_SJMP_END(DCOND_EQ); \ + emith_mula_s64(ml, mh, rn, rm); \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* overflow if top 17 bits of MACH aren't all 1 or 0 */ \ + /* to check: add MACH[15] to MACH[31:16]. this is 0 if no overflow */ \ + emith_asrf(rn, mh, 16); /* sum = (MACH>>16) + ((MACH>>15)&1) */ \ + emith_adcf_r_imm(rn, 0); /* (MACH>>15) is in carry after shift */ \ + EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> ov */ \ + emith_move_r_imm_c(DCOND_NE, ml, 0x0000); /* -overflow */ \ + emith_move_r_imm_c(DCOND_NE, mh, 0x8000); \ + EMITH_SJMP_START(DCOND_LE); /* sum > 0 -> +ovl */ \ + emith_sub_r_imm_c(DCOND_GT, ml, 1); /* 0xffffffff */ \ + emith_sub_r_imm_c(DCOND_GT, mh, 1); /* 0x00007fff */ \ + EMITH_SJMP_END(DCOND_LE); \ + EMITH_SJMP_END(DCOND_EQ); \ + EMITH_SJMP_END(DCOND_EQ); \ +} while (0) + +/* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ +#define emith_sh2_macw(ml, mh, rn, rm, sr) do { \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* XXX: MACH should be untouched when S is set? */ \ + emith_asr(mh, ml, 31); /* sign ext MACL to MACH for ovrfl check */ \ + EMITH_SJMP_END(DCOND_EQ); \ + emith_mula_s64(ml, mh, rn, rm); \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* overflow if top 33 bits of MACH:MACL aren't all 1 or 0 */ \ + /* to check: add MACL[31] to MACH. this is 0 if no overflow */ \ + emith_lsr(rn, ml, 31); \ + emith_addf_r_r(rn, mh); /* sum = MACH + ((MACL>>31)&1) */ \ + EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \ + /* XXX: LSB signalling only in SH1, or in SH2 too? */ \ + emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \ + emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* negative ovrfl */ \ + EMITH_SJMP_START(DCOND_LE); /* sum > 0 -> positive ovrfl */ \ + emith_sub_r_imm_c(DCOND_GT, ml, 1); /* 0x7fffffff */ \ + EMITH_SJMP_END(DCOND_LE); \ + EMITH_SJMP_END(DCOND_EQ); \ + EMITH_SJMP_END(DCOND_EQ); \ +} while (0) + +#define emith_write_sr(sr, srcr) do { \ + emith_lsr(sr, sr, 10); \ + emith_or_r_r_r_lsl(sr, sr, srcr, 22); \ + emith_ror(sr, sr, 22); \ +} while (0) + +#define emith_carry_to_t(srr, is_sub) do { \ + emith_lsr(sr, sr, 1); \ + emith_adc_r_r(sr, sr); \ +} while (0) + +#define emith_tpop_carry(sr, is_sub) do { \ + emith_and_r_r_imm(FC, sr, 1); \ + emith_lsr(sr, sr, 1); \ +} while (0) + +#define emith_tpush_carry(sr, is_sub) \ + emith_adc_r_r(sr, sr) + +#ifdef T +// T bit handling +#define emith_invert_cond(cond) \ + ((cond) ^ 1) + +static void emith_clr_t_cond(int sr) +{ + emith_bic_r_imm(sr, T); +} + +static void emith_set_t_cond(int sr, int cond) +{ + EMITH_SJMP_START(emith_invert_cond(cond)); + emith_or_r_imm_c(cond, sr, T); + EMITH_SJMP_END(emith_invert_cond(cond)); +} + +#define emith_get_t_cond() -1 + +#define emith_sync_t(sr) ((void)sr) + +#define emith_invalidate_t() + +static void emith_set_t(int sr, int val) +{ + if (val) + emith_or_r_imm(sr, T); + else + emith_bic_r_imm(sr, T); +} + +static int emith_tst_t(int sr, int tf) +{ + emith_tst_r_imm(sr, T); + return tf ? DCOND_NE: DCOND_EQ; +} +#endif diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index 0a31d894..a40c0f8c 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -869,11 +869,14 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common #define emith_jump_cond_patchable(cond, target) \ emith_jump_cond(cond, target) -#define emith_jump_patch(ptr, target) do { \ +#define emith_jump_patch(ptr, target) ({ \ u32 disp_ = (u8 *)(target) - ((u8 *)(ptr) + 4); \ u32 offs_ = (*(u8 *)(ptr) == 0x0f) ? 2 : 1; \ EMIT_PTR((u8 *)(ptr) + offs_, disp_ - offs_, u32); \ -} while (0) + ptr; \ +}) + +#define emith_jump_patch_size() 6 #define emith_jump_at(ptr, target) do { \ u32 disp_ = (u8 *)(target) - ((u8 *)(ptr) + 5); \ diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 3b03d0c2..01fc6ae1 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -466,6 +466,47 @@ static cache_reg_t cache_regs[] = { { 7, HRF_REG }, }; +#elif defined(__mips__) +#include "../drc/emit_mips.c" + +static guest_reg_t guest_regs[] = { + // SHR_R0 .. SHR_SP + {GRF_STATIC, 20} , {GRF_STATIC, 21} , { 0 } , { 0 } , + { 0 } , { 0 } , { 0 } , { 0 } , + { 0 } , { 0 } , { 0 } , { 0 } , + { 0 } , { 0 } , { 0 } , { 0 } , + // SHR_PC, SHR_PPC, SHR_PR, SHR_SR, + // SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL, + { 0 } , { 0 } , { 0 } , {GRF_STATIC, 22} , + { 0 } , { 0 } , { 0 } , { 0 } , +}; + +// MIPS ABI: params: r4-r7, return: r2-r3, temp: r1(at),r8-r15,r24-r25,r31(ra), +// saved: r16-r23,r30, reserved: r0(zero), r26-r27(irq), r28(gp), r29(sp) +// r1,r15,r24,r25 are used internally by the code emitter +static cache_reg_t cache_regs[] = { + { 14, HRF_TEMP }, // temps + { 13, HRF_TEMP }, + { 12, HRF_TEMP }, + { 11, HRF_TEMP }, + { 10, HRF_TEMP }, + { 9, HRF_TEMP }, + { 8, HRF_TEMP }, + { 7, HRF_TEMP }, // params + { 6, HRF_TEMP }, + { 5, HRF_TEMP }, + { 4, HRF_TEMP }, + { 3, HRF_TEMP }, // RET_REG + { 2, HRF_TEMP }, + { 22, HRF_LOCKED }, // statics + { 21, HRF_LOCKED }, + { 20, HRF_LOCKED }, + { 19, HRF_REG }, // other regs + { 18, HRF_REG }, + { 17, HRF_REG }, + { 16, HRF_REG }, +}; + #elif defined(__i386__) #include "../drc/emit_x86.c" @@ -1050,9 +1091,12 @@ static void dr_block_link(struct block_entry *be, struct block_link *bl, int emi dbg(2, "- %slink from %p to pc %08x entry %p", emit_jump ? "":"early ", bl->jump, bl->target_pc, be->tcache_ptr); - if (emit_jump) - emith_jump_patch(bl->jump, be->tcache_ptr); - // could sync arm caches here, but that's unnecessary + if (emit_jump) { + u8 *jump = emith_jump_patch(bl->jump, be->tcache_ptr); + // only needs sync if patch is possibly crossing cacheline (assume 16 byte) + if ((uintptr_t)jump >>4 != ((uintptr_t)jump+emith_jump_patch_size()-1) >>4) + host_instructions_updated(jump, jump+emith_jump_patch_size()); + } // move bl to block_entry bl->target = be; @@ -1069,9 +1113,9 @@ static void dr_block_unlink(struct block_link *bl, int emit_jump) if (bl->target) { if (emit_jump) { - emith_jump_patch(bl->jump, sh2_drc_dispatcher); + u8 *jump = emith_jump_patch(bl->jump, sh2_drc_dispatcher); // update cpu caches since the previous jump target doesn't exist anymore - host_instructions_updated(bl->jump, bl->jump+4); + host_instructions_updated(jump, jump+emith_jump_patch_size()); } if (bl->prev) @@ -4128,8 +4172,9 @@ end_op: struct op_data *opd_b = (op_flags[i] & OF_DELAY_OP) ? opd-1 : opd; u32 target_pc = opd_b->imm; int cond = -1; - void *target = NULL; int ctaken = 0; + void *target = NULL; + int patchable = 0; if (OP_ISBRACND(opd_b->op)) ctaken = (op_flags[i] & OF_DELAY_OP) ? 1 : 2; @@ -4182,11 +4227,12 @@ end_op: branch_patch_pc[branch_patch_count] = target_pc; branch_patch_ptr[branch_patch_count] = target; branch_patch_count++; - } - else + patchable = 1; + } else dbg(1, "warning: too many local branches"); } #endif + if (target == NULL) { // can't resolve branch locally, make a block exit @@ -4204,14 +4250,24 @@ end_op: } else #endif target = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id); + patchable = 1; } - if (cond != -1) { - emith_jump_cond_patchable(cond, target); - } - else if (target != NULL) { - rcache_invalidate(); - emith_jump_patchable(target); + // create branch + if (patchable) { + if (cond != -1) + emith_jump_cond_patchable(cond, target); + else if (target != NULL) { + rcache_invalidate(); + emith_jump_patchable(target); + } + } else { + if (cond != -1) + emith_jump_cond(cond, target); + else if (target != NULL) { + rcache_invalidate(); + emith_jump(target); + } } // branch not taken, correct cycle count diff --git a/cpu/sh2/compiler.h b/cpu/sh2/compiler.h index 38e47c0b..09f4ae97 100644 --- a/cpu/sh2/compiler.h +++ b/cpu/sh2/compiler.h @@ -36,6 +36,8 @@ unsigned short scan_block(unsigned int base_pc, int is_slave, // XXX MUST match definitions in cpu/sh2/compiler.c #if defined(__arm__) #define DRC_SR_REG r10 +#elif defined(__mips__) +#define DRC_SR_REG s6 #elif defined(__i386__) #define DRC_SR_REG edi #elif defined(__x86_64__) diff --git a/platform/common/common.mak b/platform/common/common.mak index 331e7124..5afc0171 100644 --- a/platform/common/common.mak +++ b/platform/common/common.mak @@ -169,7 +169,7 @@ DEFINES += DRC_DEBUG=$(drc_debug) SRCS_COMMON += $(R)cpu/sh2/mame/sh2dasm.c DASM = $(R)platform/libpicofe/linux/host_dasm.c DASMLIBS = -lbfd -lopcodes -liberty -ifeq "$(ARCH)" "arm" +ifeq ("$(ARCH)",$(filter "$(ARCH)","arm" "mipsel")) ifeq ($(filter_out $(shell $(CC) --print-file-name=libbfd.so),"/"),) DASM = $(R)platform/common/host_dasm.c DASMLIBS = diff --git a/platform/common/disarm.c b/platform/common/disarm.c index 2e7c04e7..80655877 100644 --- a/platform/common/disarm.c +++ b/platform/common/disarm.c @@ -435,7 +435,7 @@ static int software_interrupt(unsigned int pc, unsigned int insn, char *buf, siz return 1; } -int disarm(unsigned int pc, unsigned int insn, char *buf, size_t buf_len) +int disarm(uintptr_t pc, uint32_t insn, char *buf, size_t buf_len) { if ((insn & 0x0fffffd0) == 0x012fff10) return branch_and_exchange(pc, insn, buf, buf_len); diff --git a/platform/common/disarm.h b/platform/common/disarm.h index 2ea4ccc3..b8634f68 100644 --- a/platform/common/disarm.h +++ b/platform/common/disarm.h @@ -23,6 +23,6 @@ #ifndef DISARM_H #define DISARM_H -int disarm(unsigned int pc, unsigned int insn, char *buf, unsigned int buf_len); +int disarm(uintptr_t long pc, uint32_t, char *buf, unsigned int buf_len); #endif /* DISARM_H */ diff --git a/platform/common/dismips.c b/platform/common/dismips.c new file mode 100644 index 00000000..af71b095 --- /dev/null +++ b/platform/common/dismips.c @@ -0,0 +1,346 @@ +/* + * very basic mips disassembler for MIPS32/MIPS64 Release 1, only for picodrive + * Copyright (C) 2019 kub + * + * This work is licensed under the terms of MAME license. + * See COPYING file in the top-level directory. + */ + +// XXX unimplemented: SYSCALL, BREAK, SYNC, SDBBP, T*, CACHE, PREF, +// MOVF/MOVT, LWC*/LDC*, SWC*/SDC*, COP*. +// however, it's certainly good enough for anything picodrive DRC throws at it. + +#include +#include +#include +#include + +#include "dismips.h" + + +static char *const register_names[32] = { + "$zero", + "$at", + "$v0", + "$v1", + "$a0", + "$a1", + "$a2", + "$a3", + "$t0", + "$t1", + "$t2", + "$t3", + "$t4", + "$t5", + "$t6", + "$t7", + "$s0", + "$s1", + "$s2", + "$s3", + "$s4", + "$s5", + "$s6", + "$s7", + "$t8", + "$t9", + "$k0", + "$k1", + "$gp", + "$sp", + "$fp", + "$ra" +}; + + +enum insn_type { + REG_DTS, REG_TS, // 3, 2, or 1 regs + REG_DS, REG_D, REG_S, + S_IMM_DT, // 2 regs with shift amount + B_IMM_S, B_IMM_TS, // pc-relative branches with 1 or 2 regs + J_IMM, // region-relative jump + A_IMM_TS, // arithmetic immediate with 1 or 2 regs + L_IMM_T, L_IMM_TS, // logical immediate with 2 regs + M_IMM_TS, // memory indexed with 2 regs +}; + +struct insn { + unsigned char op; + enum insn_type type; + char *name; +}; + +// ATTN: these array MUST be sorted by op (decode relies on it) + +// instructions with opcode SPECIAL (R-type) +#define OP_SPECIAL 0x00 +static const struct insn special_insns[] = { + {0x00, S_IMM_DT, "sll"}, + {0x02, S_IMM_DT, "srl"}, + {0x03, S_IMM_DT, "sra"}, + {0x04, REG_DTS, "sllv"}, + {0x06, REG_DTS, "srlv"}, + {0x07, REG_DTS, "srav"}, + {0x08, REG_S, "jr"}, + {0x09, REG_DS, "jalr"}, + {0x0a, REG_DTS, "movz"}, + {0x0b, REG_DTS, "movn"}, +// {0x0c, , "syscall"}, +// {0x0d, , "break"}, +// {0x0f, , "sync"}, + {0x10, REG_D, "mfhi"}, + {0x11, REG_S, "mthi"}, + {0x12, REG_D, "mflo"}, + {0x13, REG_S, "mtlo"}, + {0x14, REG_DTS, "dsllv"}, + {0x16, REG_DTS, "dslrv"}, + {0x17, REG_DTS, "dsrav"}, + {0x18, REG_TS, "mult"}, + {0x19, REG_TS, "multu"}, + {0x1A, REG_TS, "div"}, + {0x1B, REG_TS, "divu"}, + {0x1C, REG_TS, "dmult"}, + {0x1D, REG_TS, "dmultu"}, + {0x1E, REG_TS, "ddiv"}, + {0x1F, REG_TS, "ddivu"}, + {0x20, REG_DTS, "add"}, + {0x21, REG_DTS, "addu"}, + {0x22, REG_DTS, "sub"}, + {0x23, REG_DTS, "subu"}, + {0x24, REG_DTS, "and"}, + {0x25, REG_DTS, "or"}, + {0x26, REG_DTS, "xor"}, + {0x27, REG_DTS, "nor"}, + {0x2A, REG_DTS, "slt"}, + {0x2B, REG_DTS, "sltu"}, + {0x2C, REG_DTS, "dadd"}, + {0x2D, REG_DTS, "daddu"}, + {0x2E, REG_DTS, "dsub"}, + {0x2F, REG_DTS, "dsubu"}, +// {0x30, REG_TS, "tge" }, +// {0x31, REG_TS, "tgeu" }, +// {0x32, REG_TS, "tlt" }, +// {0x33, REG_TS, "tltu" }, +// {0x34, REG_TS, "teq" }, +// {0x36, REG_TS, "tne" }, + {0x38, S_IMM_DT, "dsll"}, + {0x3A, S_IMM_DT, "dsrl"}, + {0x3B, S_IMM_DT, "dsra"}, + {0x3D, S_IMM_DT, "dsll32"}, + {0x3E, S_IMM_DT, "dsrl32"}, + {0x3F, S_IMM_DT, "dsra32"}, +}; + +// instructions with opcode SPECIAL2 (R-type) +#define OP_SPECIAL2 0x1C +static const struct insn special2_insns[] = { + {0x00, REG_TS, "madd" }, + {0x01, REG_TS, "maddu" }, + {0x02, REG_TS, "mul" }, + {0x04, REG_TS, "msub" }, + {0x05, REG_TS, "msubu" }, + {0x20, REG_DS, "clz" }, + {0x21, REG_DS, "clo" }, + {0x24, REG_DS, "dclz" }, + {0x25, REG_DS, "dclo" }, +}; + +// instructions with opcode REGIMM (I-type) +#define OP_REGIMM 0x01 +static const struct insn regimm_insns[] = { + {0x00, B_IMM_S, "bltz"}, + {0x01, B_IMM_S, "bgez"}, + {0x02, B_IMM_S, "bltzl"}, + {0x03, B_IMM_S, "bgezl"}, +// {0x08, , "tgei"}, +// {0x09, , "tgeiu"}, +// {0x0a, , "tlti"}, +// {0x0b, , "tltiu"}, +// {0x0c, , "teqi"}, +// {0x0e, , "tnei"}, + {0x10, B_IMM_S, "bltzal"}, + {0x11, B_IMM_S, "bgezal"}, + {0x12, B_IMM_S, "bltzall"}, + {0x13, B_IMM_S, "bgezall"}, + {0x13, B_IMM_S, "bgezall"}, +}; + +// instructions with other opcodes (I-type) +static const struct insn immediate_insns[] = { + {0x02, J_IMM, "j"}, + {0x03, J_IMM, "jal"}, + {0x04, B_IMM_TS, "beq"}, + {0x05, B_IMM_TS, "bne"}, + {0x06, B_IMM_S, "blez"}, + {0x07, B_IMM_S, "bgtz"}, + {0x08, A_IMM_TS, "addi"}, + {0x09, A_IMM_TS, "addiu"}, + {0x0A, A_IMM_TS, "slti"}, + {0x0B, A_IMM_TS, "sltiu"}, + {0x0C, L_IMM_TS, "andi"}, + {0x0D, L_IMM_TS, "ori"}, + {0x0E, L_IMM_TS, "xori"}, + {0x0F, L_IMM_T, "lui"}, + {0x14, B_IMM_TS, "beql"}, + {0x15, B_IMM_TS, "bnel"}, + {0x16, B_IMM_S, "blezl"}, + {0x17, B_IMM_S, "bgtzl"}, + {0x18, A_IMM_TS, "daddi"}, + {0x19, A_IMM_TS, "daddiu"}, + {0x1A, M_IMM_TS, "ldl"}, + {0x1B, M_IMM_TS, "ldr"}, + {0x20, M_IMM_TS, "lb"}, + {0x21, M_IMM_TS, "lh"}, + {0x22, M_IMM_TS, "lwl"}, + {0x23, M_IMM_TS, "lw"}, + {0x24, M_IMM_TS, "lbu"}, + {0x25, M_IMM_TS, "lhu"}, + {0x26, M_IMM_TS, "lwr"}, + {0x27, M_IMM_TS, "lwu"}, + {0x28, M_IMM_TS, "sb"}, + {0x29, M_IMM_TS, "sh"}, + {0x2A, M_IMM_TS, "swl"}, + {0x2B, M_IMM_TS, "sw"}, + {0x2C, M_IMM_TS, "sdl"}, + {0x2D, M_IMM_TS, "sdr"}, + {0x2E, M_IMM_TS, "swr"}, +// {0x2F, , "cache"}, + {0x30, M_IMM_TS, "ll"}, +// {0x31, , "lwc1"}, +// {0x32, , "lwc2"}, +// {0x33, , "pref"}, + {0x34, M_IMM_TS, "lld"}, +// {0x35, , "ldc1"}, +// {0x36, , "ldc2"}, + {0x37, M_IMM_TS, "ld"}, + {0x38, M_IMM_TS, "sc"}, +// {0x39, , "swc1"}, +// {0x3A, , "swc2"}, + {0x3C, M_IMM_TS, "scd"}, +// {0x3D, , "sdc1"}, +// {0x3E, , "sdc2"}, + {0x3F, M_IMM_TS, "sd"}, +}; + +#define ARRAY_SIZE(a) (sizeof(a)/sizeof(*a)) + +// find instruction description for insn +static const struct insn *decode_insn(uint32_t insn) +{ + uint32_t op = insn >> 26; + const struct insn *pi; + int l = 0, r = 0; + + if (op == OP_SPECIAL) { + op = insn & 0x3f; + pi = special_insns; + r = ARRAY_SIZE(special_insns)-1; + } else if (op == OP_SPECIAL2) { + op = insn & 0x3f; + pi = special2_insns; + r = ARRAY_SIZE(special2_insns)-1; + } else if (op == OP_REGIMM) { + op = (insn>>16) & 0x1f; + pi = regimm_insns; + r = ARRAY_SIZE(regimm_insns)-1; + } else { + pi = immediate_insns; + r = ARRAY_SIZE(immediate_insns)-1; + } + + while (l <= r) { + int m = (l+r) / 2; + if (pi[m].op == op) + return pi+m; + else if (pi[m].op < op) + l = m+1; + else + r = m-1; + } + return NULL; +} + +// calculate target for pc-relative branches +static unsigned long b_target(unsigned long pc, uint32_t insn) +{ + return pc + 4 + (int16_t)insn * 4; +} + +// calculate target for region-relative branches +static unsigned long j_target(unsigned long pc, uint32_t insn) +{ + return (pc & ~0x0fffffffL) | ((insn & 0x03ffffff) << 2); +} + +// main disassembler function +int dismips(uintptr_t pc, uint32_t insn, char *buf, unsigned int buflen) +{ + const struct insn *pi = decode_insn(insn); + char *rs = register_names[(insn >> 21) & 0x1f]; + char *rt = register_names[(insn >> 16) & 0x1f]; + char *rd = register_names[(insn >> 11) & 0x1f]; + int sa = (insn >> 6) & 0x1f; + int imm = (int16_t) insn; + + if (pi == NULL) { + snprintf(buf, buflen, "0x%x", insn); + return 0; + } + + switch (pi->type) { + case REG_DTS: + if ((insn & 0x3f) == 0x25 /*OR*/ && (insn & 0x1f0000) == 0 /*zero*/) + snprintf(buf, buflen, "move %s, %s", rd, rs); + else + snprintf(buf, buflen, "%s %s, %s, %s", pi->name, rd, rs, rt); + break; + case REG_TS: + snprintf(buf, buflen, "%s %s, %s", pi->name, rs, rt); + break; + case REG_DS: + snprintf(buf, buflen, "%s %s, %s", pi->name, rd, rs); + break; + case REG_D: + snprintf(buf, buflen, "%s %s", pi->name, rd); + break; + case REG_S: + snprintf(buf, buflen, "%s %s", pi->name, rs); + break; + case S_IMM_DT: + if (insn == 0x00000000) + snprintf(buf, buflen, "nop"); + else + snprintf(buf, buflen, "%s %s, %s, %d", pi->name, rd, rt, sa); + break; + case B_IMM_S: + snprintf(buf, buflen, "%s %s, 0x%lx", pi->name, rs, b_target(pc, insn)); + break; + case B_IMM_TS: + snprintf(buf, buflen, "%s %s, %s, 0x%lx", pi->name, rs, rt, b_target(pc, insn)); + break; + case J_IMM: + snprintf(buf, buflen, "%s 0x%lx", pi->name, j_target(pc, insn)); + break; + case A_IMM_TS: + if (abs(imm) < 1000) + snprintf(buf, buflen, "%s %s, %s, %d", pi->name, rt, rs, imm); + else + snprintf(buf, buflen, "%s %s, %s, 0x%x", pi->name, rt, rs, imm); + break; + case L_IMM_T: + snprintf(buf, buflen, "%s %s, 0x%x", pi->name, rt, (uint16_t)imm); + break; + case L_IMM_TS: + if ((insn >> 26) == 0x34 /*ORI*/ && (insn & 0x03e00000) == 0 /*zero*/) + snprintf(buf, buflen, "li %s, 0x%x", rt, (uint16_t)imm); + else + snprintf(buf, buflen, "%s %s, %s, 0x%x", pi->name, rt, rs, (uint16_t)imm); + break; + case M_IMM_TS: + snprintf(buf, buflen, "%s %s, %d(%s)", pi->name, rt, imm, rs); + break; + } + return 1; +} + diff --git a/platform/common/dismips.h b/platform/common/dismips.h new file mode 100644 index 00000000..e6338def --- /dev/null +++ b/platform/common/dismips.h @@ -0,0 +1,6 @@ +#ifndef DISMIPS_H +#define DISMIPS_H + +int dismips(uintptr_t pc, uint32_t insn, char *buf, unsigned int buf_len); + +#endif /* DISMIPS_H */ diff --git a/platform/linux/emu.c b/platform/linux/emu.c index 8af5afa8..887d7836 100644 --- a/platform/linux/emu.c +++ b/platform/linux/emu.c @@ -29,7 +29,7 @@ void pemu_prep_defconfig(void) void pemu_validate_config(void) { -#if !defined(__arm__) && !defined(__i386__) && !defined(__x86_64__) +#if !defined(__arm__) && !defined(__aarch64__) && !defined(__mips__) && !defined(__i386__) && !defined(__x86_64__) PicoIn.opt &= ~POPT_EN_DRC; #endif } From b90e104fc9042452ac8cd8bdd5216b6069c2a5df Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 30 Jul 2019 21:04:16 +0200 Subject: [PATCH 0207/1110] sh2 drc: add aarch64 backend for A64 --- Makefile | 15 +- config.aarch64 | 15 + cpu/drc/emit_arm64.c | 1328 ++++++++++++++++++++++++++++++++++++++++++ cpu/sh2/compiler.c | 50 ++ cpu/sh2/compiler.h | 2 + 5 files changed, 1404 insertions(+), 6 deletions(-) create mode 100644 config.aarch64 create mode 100644 cpu/drc/emit_arm64.c diff --git a/Makefile b/Makefile index 62accf77..5f64f713 100644 --- a/Makefile +++ b/Makefile @@ -49,15 +49,18 @@ asm_cdmemory ?= 1 asm_mix ?= 1 asm_32xdraw ?= 1 asm_32xmemory ?= 1 -else # if not arm +else ifneq (,$(findstring 86,$(ARCH))) use_fame ?= 1 use_cz80 ?= 1 -ifneq (,$(findstring 86,$(ARCH))) use_sh2drc ?= 1 -endif -ifneq (,$(findstring mips,$(ARCH))) +else ifneq (,$(findstring mips,$(ARCH))) +use_fame ?= 1 +use_cz80 ?= 1 +use_sh2drc ?= 1 +else ifneq (,$(findstring aarch64,$(ARCH))) +use_fame ?= 1 +use_cz80 ?= 1 use_sh2drc ?= 1 -endif endif -include Makefile.local @@ -247,7 +250,7 @@ pico/carthw_cfg.c: pico/carthw.cfg # random deps pico/carthw/svp/compiler.o : cpu/drc/emit_arm.c -cpu/sh2/compiler.o : cpu/drc/emit_arm.c +cpu/sh2/compiler.o : cpu/drc/emit_arm.c cpu/drc/emit_arm64.c cpu/sh2/compiler.o : cpu/drc/emit_x86.c cpu/drc/emit_mips.c cpu/sh2/mame/sh2pico.o : cpu/sh2/mame/sh2.c pico/pico.o pico/cd/mcd.o pico/32x/32x.o : pico/pico_cmn.c pico/pico_int.h diff --git a/config.aarch64 b/config.aarch64 new file mode 100644 index 00000000..70a6fe30 --- /dev/null +++ b/config.aarch64 @@ -0,0 +1,15 @@ +# Automatically generated by configure +# Configured with: './configure' '--platform=generic' +CC = aarch64-linux-gnu-gcc +CXX = aarch64-linux-gnu-g++ +AS = aarch64-linux-gnu-as +STRIP = aarch64-linux-gnu-strip +CFLAGS += -I/usr/include/SDL +CFLAGS += -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -fno-stack-protector +ASFLAGS += +LDFLAGS += +LDLIBS += -lSDL -lasound -lpng -lz -lm -lstdc++ -ldl + +ARCH = aarch64 +PLATFORM = generic +SOUND_DRIVERS = alsa diff --git a/cpu/drc/emit_arm64.c b/cpu/drc/emit_arm64.c new file mode 100644 index 00000000..90010d80 --- /dev/null +++ b/cpu/drc/emit_arm64.c @@ -0,0 +1,1328 @@ +/* + * Basic macros to emit ARM A64 instructions and some utils + * Copyright (C) 2019 kub + * + * This work is licensed under the terms of MAME license. + * See COPYING file in the top-level directory. + */ +#define HOST_REGS 32 +#define CONTEXT_REG 19 +#define RET_REG 0 + +// R31 doesn't exist, it aliases either with zero or SP +#define SP 31 // stack pointer +#define Z0 31 // zero register +#define LR 30 // link register +#define FP 29 // frame pointer +#define PR 18 // platform register + +// All operations but ptr ops are using the lower 32 bits of the A64 registers. +// The upper 32 bits are only used in ptr ops. + + +#define A64_COND_EQ 0x0 +#define A64_COND_NE 0x1 +#define A64_COND_HS 0x2 +#define A64_COND_LO 0x3 +#define A64_COND_MI 0x4 +#define A64_COND_PL 0x5 +#define A64_COND_VS 0x6 +#define A64_COND_VC 0x7 +#define A64_COND_HI 0x8 +#define A64_COND_LS 0x9 +#define A64_COND_GE 0xa +#define A64_COND_LT 0xb +#define A64_COND_GT 0xc +#define A64_COND_LE 0xd +#define A64_COND_CS A64_COND_HS +#define A64_COND_CC A64_COND_LO +#define A64_COND_AL 0xe +#define A64_COND_NV 0xf + +/* unified conditions */ +#define DCOND_EQ A64_COND_EQ +#define DCOND_NE A64_COND_NE +#define DCOND_MI A64_COND_MI +#define DCOND_PL A64_COND_PL +#define DCOND_HI A64_COND_HI +#define DCOND_HS A64_COND_HS +#define DCOND_LO A64_COND_LO +#define DCOND_GE A64_COND_GE +#define DCOND_GT A64_COND_GT +#define DCOND_LT A64_COND_LT +#define DCOND_LS A64_COND_LS +#define DCOND_LE A64_COND_LE +#define DCOND_VS A64_COND_VS +#define DCOND_VC A64_COND_VC + +#define DCOND_CS A64_COND_HS +#define DCOND_CC A64_COND_LO + + +// unified insn +#define A64_INSN(op, b29, b22, b21, b16, b12, b10, b5, b0) \ + (((op)<<25)|((b29)<<29)|((b22)<<22)|((b21)<<21)|((b16)<<16)|((b12)<<12)|((b10)<<10)|((b5)<<5)|((b0)<<0)) + +#define _ 0 // marker for "field unused" + +#define A64_NOP \ + A64_INSN(0xa,0x6,0x4,_,0x3,0x2,_,0,0x1f) // 0xd503201f + +// arithmetic/logical + +enum { OP_AND, OP_OR, OP_EOR, OP_ANDS, OP_ADD, OP_ADDS, OP_SUB, OP_SUBS }; +enum { ST_LSL, ST_LSR, ST_ASR, ST_ROR }; +enum { XT_UXTW=0x4, XT_UXTX=0x6, XT_LSL=0x7, XT_SXTW=0xc, XT_SXTX=0xe }; +#define OP_SZ64 (1 << 31) // bit for 64 bit op selection +#define OP_N64 (1 << 22) // N-bit for 64 bit logical immediate ops + +#define A64_OP_REG(op, n, rd, rn, rm, stype, simm) /* arith+logical, ST_ */ \ + A64_INSN(0x5,(op)&3,((op)&4)|stype,n,rm,_,simm,rn,rd) +#define A64_OP_XREG(op, rd, rn, rm, xtopt, simm) /* arith, XT_ */ \ + A64_INSN(0x5,(op)&3,0x4,1,rm,xtopt,simm,rn,rd) +#define A64_OP_IMM12(op, rd, rn, imm, lsl12) /* arith */ \ + A64_INSN(0x8,(op)&3,((op)&4)|lsl12,_,_,_,(imm)&0xfff,rn,rd) +#define A64_OP_IMMBM(op, rd, rn, immr, imms) /* logical */ \ + A64_INSN(0x9,(op)&3,0x0,_,immr,_,(imms)&0x3f,rn,rd) + +// rd = rn OP (rm SHIFT simm) +#define A64_ADD_REG(rd, rn, rm, stype, simm) \ + A64_OP_REG(OP_ADD,0,rd,rn,rm,stype,simm) +#define A64_ADDS_REG(rd, rn, rm, stype, simm) \ + A64_OP_REG(OP_ADDS,0,rd,rn,rm,stype,simm) +#define A64_SUB_REG(rd, rn, rm, stype, simm) \ + A64_OP_REG(OP_SUB,0,rd,rn,rm,stype,simm) +#define A64_SUBS_REG(rd, rn, rm, stype, simm) \ + A64_OP_REG(OP_SUBS,0,rd,rn,rm,stype,simm) + +#define A64_NEG_REG(rd, rm, stype, simm) \ + A64_SUB_REG(rd,Z0,rm,stype,simm) +#define A64_NEGS_REG(rd, rm, stype, simm) \ + A64_SUBS_REG(rd,Z0,rm,stype,simm) +#define A64_NEGC_REG(rd, rm) \ + A64_SBC_REG(rd,Z0,rm,stype,simm) +#define A64_NEGCS_REG(rd, rm) \ + A64_SBCS_REG(rd,Z0,rm,stype,simm) +#define A64_CMP_REG(rn, rm, stype, simm) \ + A64_SUBS_REG(Z0, rn, rm, stype, simm) +#define A64_CMN_REG(rn, rm, stype, simm) \ + A64_ADDS_REG(Z0, rn, rm, stype, simm) + +#define A64_EOR_REG(rd, rn, rm, stype, simm) \ + A64_OP_REG(OP_EOR,0,rd,rn,rm,stype,simm) +#define A64_OR_REG(rd, rn, rm, stype, simm) \ + A64_OP_REG(OP_OR,0,rd,rn,rm,stype,simm) +#define A64_ORN_REG(rd, rn, rm, stype, simm) \ + A64_OP_REG(OP_OR,1,rd,rn,rm,stype,simm) +#define A64_AND_REG(rd, rn, rm, stype, simm) \ + A64_OP_REG(OP_AND,0,rd,rn,rm,stype,simm) +#define A64_ANDS_REG(rd, rn, rm, stype, simm) \ + A64_OP_REG(OP_ANDS,0,rd,rn,rm,stype,simm) +#define A64_BIC_REG(rd, rn, rm, stype, simm) \ + A64_OP_REG(OP_AND,1,rd,rn,rm,stype,simm) +#define A64_BICS_REG(rd, rn, rm, stype, simm) \ + A64_OP_REG(OP_ANDS,1,rd,rn,rm,stype,simm) + +#define A64_TST_REG(rn, rm, stype, simm) \ + A64_ANDS_REG(Z0, rn, rm, stype, simm) +#define A64_MOV_REG(rd, rm, stype, simm) \ + A64_OR_REG(rd, Z0, rm, stype, simm); +#define A64_MVN_REG(rd, rm, stype, simm) \ + A64_ORN_REG(rd, Z0, rm, stype, simm); + +// rd = rn OP (rm EXTEND simm) +#define A64_ADD_XREG(rd, rn, rm, xtopt, simm) \ + A64_OP_XREG(OP_ADD,rd,rn,rm,xtopt,simm) +#define A64_ADDS_XREG(rd, rn, rm, xtopt, simm) \ + A64_OP_XREG(OP_ADDS,rd,rn,rm,xtopt,simm) +#define A64_SUB_XREG(rd, rn, rm, stype, simm) \ + A64_OP_XREG(OP_SUB,rd,rn,rm,xtopt,simm) +#define A64_SUBS_XREG(rd, rn, rm, stype, simm) \ + A64_OP_XREG(OP_SUBS,rd,rn,rm,xtopt,simm) + +// rd = rn OP rm OP carry +#define A64_ADC_REG(rd, rn, rm) \ + A64_INSN(0xd,OP_ADD &3,0x0,_,rm,_,_,rn,rd) +#define A64_ADCS_REG(rd, rn, rm) \ + A64_INSN(0xd,OP_ADDS&3,0x0,_,rm,_,_,rn,rd) +#define A64_SBC_REG(rd, rn, rm, s) \ + A64_INSN(0xd,OP_SUB &3,0x0,_,rm,_,_,rn,rd) +#define A64_SBCS_REG(rd, rn, rm) \ + A64_INSN(0xd,OP_SUBS&3,0x0,_,rm,_,_,rn,rd) + +// rd = rn SHIFT rm +#define A64_LSL_REG(rd, rn, rm) \ + A64_INSN(0xd,0x0,0x3,_,rm,_,0x8,rn,rd) +#define A64_LSR_REG(rd, rn, rm) \ + A64_INSN(0xd,0x0,0x3,_,rm,_,0xa,rn,rd) +#define A64_ASR_REG(rd, rn, rm) \ + A64_INSN(0xd,0x0,0x3,_,rm,_,0x9,rn,rd) +#define A64_ROR_REG(rd, rn, rm) \ + A64_INSN(0xd,0x0,0x3,_,rm,_,0xb,rn,rd) + +// rd = REVERSE(n) rn +#define A64_RBIT_REG(rd, rn) \ + A64_INSN(0xd,0x2,0x3,_,_,_,_,rn,rd) + +// rd = rn OP (imm12 << (0|12)) +#define A64_ADD_IMM(rd, rn, imm12, lsl12) \ + A64_OP_IMM12(OP_ADD, rd, rn, imm12, lsl12) +#define A64_ADDS_IMM(rd, rn, imm12, lsl12) \ + A64_OP_IMM12(OP_ADDS, rd, rn, imm12, lsl12) +#define A64_SUB_IMM(rd, rn, imm12, lsl12) \ + A64_OP_IMM12(OP_SUB, rd, rn, imm12, lsl12) +#define A64_SUBS_IMM(rd, rn, imm12, lsl12) \ + A64_OP_IMM12(OP_SUBS, rd, rn, imm12, lsl12) + +#define A64_CMP_IMM(rn, imm12, lsl12) \ + A64_SUBS_IMM(Z0,rn,imm12,lsl12) +#define A64_CMN_IMM(rn, imm12, lsl12) \ + A64_ADDS_IMM(Z0,rn,imm12,lsl12) + +// rd = rn OP immbm; immbm is a repeated special pattern of 2^n bits length +#define A64_EOR_IMM(rd, rn, immr, imms) \ + A64_OP_IMMBM(OP_EOR,rd,rn,immr,imms) +#define A64_OR_IMM(rd, rn, immr, imms) \ + A64_OP_IMMBM(OP_OR,rd,rn,immr,imms) +#define A64_AND_IMM(rd, rn, immr, imms) \ + A64_OP_IMMBM(OP_AND,rd,rn,immr,imms) +#define A64_ANDS_IMM(rd, rn, immr, imms) \ + A64_OP_IMMBM(OP_ANDS,rd,rn,immr,imms) +#define A64_TST_IMM(rn, immr, imms) \ + A64_OP_IMMBM(OP_ANDS,Z0,rn,immr,imms) +#define A64_MOV_IMM(rd, rn, immr, imms) \ + A64_OP_IMMBM(OP_OR,rd,Z0,immr,imms) + +// rd = (imm16 << (0|16|32|48)) +#define A64_MOVN_IMM(rd, imm16, lsl16) \ + A64_INSN(0x9,0x0,0x2,lsl16,_,_,_,(imm16)&0xffff,rd) +#define A64_MOVZ_IMM(rd, imm16, lsl16) \ + A64_INSN(0x9,0x2,0x2,lsl16,_,_,_,(imm16)&0xffff,rd) +#define A64_MOVK_IMM(rd, imm16, lsl16) \ + A64_INSN(0x9,0x3,0x2,lsl16,_,_,_,(imm16)&0xffff,rd) +#define A64_MOVT_IMM(rd, imm16, lsl16) \ + A64_INSN(0x9,0x3,0x2,lsl16,_,_,_,(imm16)&0xffff,rd) + +// rd = rn SHIFT imm6 +#define A64_LSL_IMM(rd, rn, bits) /* UBFM */ \ + A64_INSN(0x9,0x2,0x4,_,32-(bits),_,31-(bits),rn,rd) +#define A64_LSR_IMM(rd, rn, bits) /* UBFM */ \ + A64_INSN(0x9,0x2,0x4,_,bits,_,31,rn,rd) +#define A64_ASR_IMM(rd, rn, bits) /* SBFM */ \ + A64_INSN(0x9,0x0,0x4,_,bits,_,31,rn,rd) +#define A64_ROR_IMM(rd, rn, bits) /* EXTR */ \ + A64_INSN(0x9,0x0,0x6,_,rn,_,bits,rn,rd) + +#define A64_SXT_IMM(rd, rn, bits) \ + A64_INSN(0x9,0x0,0x4,0,0,_,bits-1,rn,rd) +#define A64_UXT_IMM(rd, rn, bits) \ + A64_INSN(0x9,0x2,0x4,0,0,_,bits-1,rn,rd) + +// multiplication + +#define A64_SMULL(rd, rn, rm) /* Xd = Wn*Wm (+ Xa) */ \ + A64_INSN(0xd,0x4,0x4,1,rm,_,Z0,rn,rd) +#define A64_SMADDL(rd, rn, rm, ra) \ + A64_INSN(0xd,0x4,0x4,1,rm,_,ra,rn,rd) +#define A64_UMULL(rd, rn, rm) \ + A64_INSN(0xd,0x4,0x6,1,rm,_,Z0,rn,rd) +#define A64_UMADDL(rd, rn, rm, ra) \ + A64_INSN(0xd,0x4,0x6,1,rm,_,ra,rn,rd) +#define A64_MUL(rd, rn, rm) /* Wd = Wn*Wm (+ Wa) */ \ + A64_INSN(0xd,0x0,0x4,0,rm,_,Z0,rn,rd) +#define A64_MADD(rd, rn, rm, ra) \ + A64_INSN(0xd,0x0,0x4,0,rm,_,ra,rn,rd) + +// branching + +#define A64_B(offs26) \ + A64_INSN(0xa,0x0,_,_,_,_,_,_,(offs26) >> 2) +#define A64_BL(offs26) \ + A64_INSN(0xa,0x4,_,_,_,_,_,_,(offs26) >> 2) +#define A64_BR(rn) \ + A64_INSN(0xb,0x6,_,_,0x1f,_,_,rn,_) +#define A64_BLR(rn) \ + A64_INSN(0xb,0x6,_,_,0x3f,_,_,rn,_) +#define A64_RET(rn) /* same as BR, but hint for cpu */ \ + A64_INSN(0xb,0x6,_,_,0x5f,_,_,rn,_) +#define A64_BCOND(cond, offs19) \ + A64_INSN(0xa,0x2,_,_,_,_,_,(offs19) >> 2,(cond)) + +// load pc-relative + +#define A64_LDRLIT_IMM(rd, offs19) \ + A64_INSN(0xc,0x0,0x0,_,_,_,_,(offs19) >> 2,rd) +#define A64_LDRXLIT_IMM(rd, offs19) \ + A64_INSN(0xc,0x2,0x0,_,_,_,_,(offs19) >> 2,rd) +#define A64_ADRXLIT_IMM(rd, offs21) \ + A64_INSN(0x8,(offs21)&3,0x0,_,_,_,_,(offs21) >> 2,rd) + +// load/store indexed base. Only the signed unscaled variant is used here. + +enum { LT_ST, LT_LD, LT_LDSX, LT_LDS }; +enum { AM_B=0x1, AM_H=0x3, AM_W=0x5, AM_X=0x7 }; +enum { AM_IDX, AM_IDXPOST, AM_IDXREG, AM_IDXPRE }; +#define A64_LDST_AM(ir,rm,optimm) (((ir)<<9)|((rm)<<4)|((optimm)&0x1ff)) +#define A64_OP_LDST(sz, op, am, mode, rm, rd) \ + A64_INSN(0xc,sz,op,_,_,am,mode,rm,rd) + +#define A64_LDSTX_IMM(rd, rn, offs9, ld, mode) \ + A64_OP_LDST(AM_X,ld,A64_LDST_AM(0,_,offs9),mode,rn,rd) +#define A64_LDST_IMM(rd, rn, offs9, ld, mode) \ + A64_OP_LDST(AM_W,ld,A64_LDST_AM(0,_,offs9),mode,rn,rd) +#define A64_LDSTH_IMM(rd, rn, offs9, ld, mode) \ + A64_OP_LDST(AM_H,ld,A64_LDST_AM(0,_,offs9),mode,rn,rd) +#define A64_LDSTB_IMM(rd, rn, offs9, ld, mode) \ + A64_OP_LDST(AM_B,ld,A64_LDST_AM(0,_,offs9),mode,rn,rd) + +// NB: pre/postindex isn't available with register offset +#define A64_LDSTX_REG(rd, rn, rm, ld, opt) \ + A64_OP_LDST(AM_X,ld,A64_LDST_AM(1,rm,opt),AM_IDXREG,rn,rd) +#define A64_LDST_REG(rd, rn, rm, ld, opt) \ + A64_OP_LDST(AM_W,ld,A64_LDST_AM(1,rm,opt),AM_IDXREG,rn,rd) +#define A64_LDSTH_REG(rd, rn, rm, ld, opt) \ + A64_OP_LDST(AM_H,ld,A64_LDST_AM(1,rm,opt),AM_IDXREG,rn,rd) +#define A64_LDSTB_REG(rd, rn, rm, ld, opt) \ + A64_OP_LDST(AM_B,ld,A64_LDST_AM(1,rm,opt),AM_IDXREG,rn,rd) + +#define A64_LDSTPX_IMM(rn, r1, r2, offs7, ld, mode) \ + A64_INSN(0x4,0x5,(mode<<1)|ld,_,_,(offs7)&0x3f8,r2,rn,r1) + +// 64 bit stuff for pointer handling + +#define A64_ADDX_XREG(rd, rn, rm, xtopt, simm) \ + OP_SZ64|A64_OP_XREG(OP_ADD,rd,rn,rm,xtopt,simm) +#define A64_ADDX_REG(rd, rn, rm, stype, simm) \ + OP_SZ64|A64_ADD_REG(rd, rn, rm, stype, simm) +#define A64_ADDXS_REG(rd, rn, rm, stype, simm) \ + OP_SZ64|A64_ADDS_REG(rd, rn, rm, stype, simm) +#define A64_ORX_REG(rd, rn, rm, stype, simm) \ + OP_SZ64|A64_OR_REG(rd, rn, rm, stype, simm) +#define A64_TSTX_REG(rn, rm, stype, simm) \ + OP_SZ64|A64_TST_REG(rn, rm, stype, simm) +#define A64_MOVX_REG(rd, rm, stype, simm) \ + OP_SZ64|A64_MOV_REG(rd, rm, stype, simm) +#define A64_ADDX_IMM(rd, rn, imm12) \ + OP_SZ64|A64_ADD_IMM(rd, rn, imm12, 0) +#define A64_EORX_IMM(rd, rn, immr, imms) \ + OP_SZ64|OP_N64|A64_EOR_IMM(rd, rn, immr, imms) +#define A64_UXTX_IMM(rd, rn, bits) \ + OP_SZ64|OP_N64|A64_UXT_IMM(rd, rn, bits) +#define A64_LSRX_IMM(rd, rn, bits) \ + OP_SZ64|OP_N64|A64_LSR_IMM(rd, rn, bits)|(63<<10) + + +// XXX: tcache_ptr type for SVP and SH2 compilers differs.. +#define EMIT_PTR(ptr, x) \ + do { \ + *(u32 *)(ptr) = x; \ + ptr = (void *)((u8 *)(ptr) + sizeof(u32)); \ + } while (0) + +#define EMIT(op) \ + do { \ + EMIT_PTR(tcache_ptr, op); \ + COUNT_OP; \ + } while (0) + + +// if-then-else conditional execution helpers +#define JMP_POS(ptr) \ + ptr = tcache_ptr; \ + EMIT(A64_B(0)); + +#define JMP_EMIT(cond, ptr) { \ + u32 val_ = (u8 *)tcache_ptr - (u8 *)(ptr); \ + EMIT_PTR(ptr, A64_BCOND(cond, val_ & 0x001fffff)); \ +} + +#define JMP_EMIT_NC(ptr) { \ + u32 val_ = (u8 *)tcache_ptr - (u8 *)(ptr); \ + EMIT_PTR(ptr, A64_B(val_ & 0x0fffffff)); \ +} + +#define EMITH_JMP_START(cond) { \ + u8 *cond_ptr; \ + JMP_POS(cond_ptr) + +#define EMITH_JMP_END(cond) \ + JMP_EMIT(cond, cond_ptr); \ +} + +#define EMITH_JMP3_START(cond) { \ + u8 *cond_ptr, *else_ptr; \ + JMP_POS(cond_ptr) + +#define EMITH_JMP3_MID(cond) \ + JMP_POS(else_ptr); \ + JMP_EMIT(cond, cond_ptr); + +#define EMITH_JMP3_END() \ + JMP_EMIT_NC(else_ptr); \ +} + +// "simple" jump (no more then a few insns) +// ARM32 will use conditional instructions here +#define EMITH_SJMP_START EMITH_JMP_START +#define EMITH_SJMP_END EMITH_JMP_END + +#define EMITH_SJMP3_START EMITH_JMP3_START +#define EMITH_SJMP3_MID EMITH_JMP3_MID +#define EMITH_SJMP3_END EMITH_JMP3_END + +#define EMITH_SJMP2_START(cond) \ + EMITH_SJMP3_START(cond) +#define EMITH_SJMP2_MID(cond) \ + EMITH_SJMP3_MID(cond) +#define EMITH_SJMP2_END(cond) \ + EMITH_SJMP3_END() + + +// data processing, register +#define emith_move_r_r_ptr(d, s) \ + EMIT(A64_MOVX_REG(d, s, ST_LSL, 0)) +#define emith_move_r_r_ptr_c(cond, d, s) \ + emith_move_r_r_ptr(d, s) + +#define emith_move_r_r(d, s) \ + EMIT(A64_MOV_REG(d, s, ST_LSL, 0)) +#define emith_move_r_r_c(cond, d, s) \ + emith_move_r_r(d, s) + +#define emith_mvn_r_r(d, s) \ + EMIT(A64_MVN_REG(d, s, ST_LSL, 0)) + +#define emith_add_r_r_r_lsl_ptr(d, s1, s2, simm) do { \ + if (simm < 4) EMIT(A64_ADDX_XREG(d, s1, s2, XT_SXTW, simm)); \ + else EMIT(A64_ADDX_REG(d, s1, s2, ST_LSL, simm)); \ +} while (0) +#define emith_add_r_r_r_lsl(d, s1, s2, simm) \ + EMIT(A64_ADD_REG(d, s1, s2, ST_LSL, simm)) + +#define emith_addf_r_r_r_lsl(d, s1, s2, simm) \ + EMIT(A64_ADDS_REG(d, s1, s2, ST_LSL, simm)) + +#define emith_addf_r_r_r_lsr(d, s1, s2, simm) \ + EMIT(A64_ADDS_REG(d, s1, s2, ST_LSR, simm)) + +#define emith_sub_r_r_r_lsl(d, s1, s2, simm) \ + EMIT(A64_SUB_REG(d, s1, s2, ST_LSL, simm)) + +#define emith_subf_r_r_r_lsl(d, s1, s2, simm) \ + EMIT(A64_SUBS_REG(d, s1, s2, ST_LSL, simm)) + +#define emith_or_r_r_r_lsl(d, s1, s2, simm) \ + EMIT(A64_OR_REG(d, s1, s2, ST_LSL, simm)) + +#define emith_eor_r_r_r_lsl(d, s1, s2, simm) \ + EMIT(A64_EOR_REG(d, s1, s2, ST_LSL, simm)) + +#define emith_eor_r_r_r_lsr(d, s1, s2, simm) \ + EMIT(A64_EOR_REG(d, s1, s2, ST_LSR, simm)) + +#define emith_and_r_r_r_lsl(d, s1, s2, simm) \ + EMIT(A64_AND_REG(d, s1, s2, ST_LSL, simm)) + +#define emith_or_r_r_lsl(d, s, lslimm) \ + emith_or_r_r_r_lsl(d, d, s, lslimm) + +#define emith_eor_r_r_lsr(d, s, lsrimm) \ + emith_eor_r_r_r_lsr(d, d, s, lsrimm) + +#define emith_add_r_r_r(d, s1, s2) \ + emith_add_r_r_r_lsl(d, s1, s2, 0) + +#define emith_addf_r_r_r(d, s1, s2) \ + emith_addf_r_r_r_lsl(d, s1, s2, 0) + +#define emith_sub_r_r_r(d, s1, s2) \ + emith_sub_r_r_r_lsl(d, s1, s2, 0) + +#define emith_subf_r_r_r(d, s1, s2) \ + emith_subf_r_r_r_lsl(d, s1, s2, 0) + +#define emith_or_r_r_r(d, s1, s2) \ + emith_or_r_r_r_lsl(d, s1, s2, 0) + +#define emith_eor_r_r_r(d, s1, s2) \ + emith_eor_r_r_r_lsl(d, s1, s2, 0) + +#define emith_and_r_r_r(d, s1, s2) \ + emith_and_r_r_r_lsl(d, s1, s2, 0) + +#define emith_add_r_r_ptr(d, s) \ + emith_add_r_r_r_lsl_ptr(d, d, s, 0) +#define emith_add_r_r(d, s) \ + emith_add_r_r_r(d, d, s) + +#define emith_sub_r_r(d, s) \ + emith_sub_r_r_r(d, d, s) + +#define emith_neg_r_r(d, s) \ + EMIT(A64_NEG_REG(d, s, ST_LSL, 0)) + +#define emith_adc_r_r_r(d, s1, s2) \ + EMIT(A64_ADC_REG(d, s1, s2)) + +#define emith_adc_r_r(d, s) \ + EMIT(A64_ADC_REG(d, d, s)) + +#define emith_adcf_r_r_r(d, s1, s2) \ + EMIT(A64_ADCS_REG(d, s1, s2)) + +#define emith_sbcf_r_r_r(d, s1, s2) \ + EMIT(A64_SBCS_REG(d, s1, s2)) + +#define emith_and_r_r(d, s) \ + emith_and_r_r_r(d, d, s) +#define emith_and_r_r_c(cond, d, s) \ + emith_and_r_r(d, s) + +#define emith_or_r_r(d, s) \ + emith_or_r_r_r(d, d, s) + +#define emith_eor_r_r(d, s) \ + emith_eor_r_r_r(d, d, s) + +#define emith_tst_r_r_ptr(d, s) \ + EMIT(A64_TSTX_REG(d, s, ST_LSL, 0)) +#define emith_tst_r_r(d, s) \ + EMIT(A64_TST_REG(d, s, ST_LSL, 0)) + +#define emith_teq_r_r(d, s) do { \ + int _t = rcache_get_tmp(); \ + emith_eor_r_r_r(_t, d, s); \ + emith_cmp_r_imm(_t, 0); \ + rcache_free_tmp(_t); \ +} while (0) + +#define emith_cmp_r_r(d, s) \ + EMIT(A64_CMP_REG(d, s, ST_LSL, 0)) + +#define emith_addf_r_r(d, s) \ + emith_addf_r_r_r(d, d, s) + +#define emith_subf_r_r(d, s) \ + emith_subf_r_r_r(d, d, s) + +#define emith_adcf_r_r(d, s) \ + emith_adcf_r_r_r(d, d, s) + +#define emith_sbcf_r_r(d, s) \ + emith_sbcf_r_r_r(d, d, s) + +#define emith_negcf_r_r(d, s) \ + emith_sbcf_r_r_r(d, Z0, s) + + +// move immediate + +static void emith_move_imm64(int r, int wx, int64_t imm) +{ + int sz64 = wx ? OP_SZ64:0; + int c, s; + + if (!imm) { + EMIT(sz64|A64_MOVZ_IMM(r, imm, 0)); + return; + } + if (imm && -imm == (u16)-imm) { + EMIT(sz64|A64_MOVN_IMM(r, ~imm, 0)); + return; + } + + for (c = s = 0; s < (wx ? 4:2) && imm; s++, imm >>= 16) + if ((u16)(imm)) { + if (c++) EMIT(sz64|A64_MOVK_IMM(r, imm, s)); + else EMIT(sz64|A64_MOVZ_IMM(r, imm, s)); + } +} + +#define emith_move_r_ptr_imm(r, imm) \ + emith_move_imm64(r, 1, (intptr_t)(imm)) + +#define emith_move_r_imm(r, imm) \ + emith_move_imm64(r, 0, (s32)(imm)) +#define emith_move_r_imm_c(cond, r, imm) \ + emith_move_r_imm(r, imm) + + +// arithmetic, immediate +static void emith_arith_imm(int op, int wx, int rd, int rn, s32 imm) +{ + u32 sz64 = wx ? OP_SZ64:0; + + if (imm < 0) { + op ^= (OP_ADD ^ OP_SUB); + imm = -imm; + } + if (imm == 0) { + // value 0, must emit if op is *S or source isn't dest + if ((op & 1) || rd != rn) + EMIT(sz64|A64_OP_IMM12(op, rd, rn, 0, 0)); + } else if (imm >> 24) { + // value too large + int _t = rcache_get_tmp(); + emith_move_r_imm(_t, imm); + EMIT(sz64|A64_OP_REG(op, 0, rd, rn, _t, ST_LSL, 0)); + rcache_free_tmp(_t); + } else { + int rs = rn; + if ((imm) & 0x000fff) { + EMIT(sz64|A64_OP_IMM12(op, rd, rs, imm, 0)); rs = rd; + } + if ((imm) & 0xfff000) { + EMIT(sz64|A64_OP_IMM12(op, rd, rs, imm >>12, 1)); + } + } +} + +#define emith_add_r_imm(r, imm) \ + emith_arith_imm(OP_ADD, 0, r, r, imm) +#define emith_add_r_imm_c(cond, r, imm) \ + emith_add_r_imm(r, imm) + +#define emith_addf_r_imm(r, imm) \ + emith_arith_imm(OP_ADDS, 0, r, r, imm) + +#define emith_sub_r_imm(r, imm) \ + emith_arith_imm(OP_SUB, 0, r, r, imm) +#define emith_sub_r_imm_c(cond, r, imm) \ + emith_sub_r_imm(r, imm) + +#define emith_subf_r_imm(r, imm) \ + emith_arith_imm(OP_SUBS, 0, r, r, imm) + + +#define emith_adc_r_imm(r, imm) do { \ + int _t = rcache_get_tmp(); \ + emith_move_r_imm(_t, imm); \ + emith_adc_r_r(r, _t); \ + rcache_free_tmp(_t); \ +} while (0) + +#define emith_adcf_r_imm(r, imm) do { \ + int _t = rcache_get_tmp(); \ + emith_move_r_imm(_t, imm); \ + emith_adcf_r_r(r, _t); \ + rcache_free_tmp(_t); \ +} while (0) + +#define emith_cmp_r_imm(r, imm) do { \ + u32 op_ = OP_SUBS, imm_ = (u8)imm; \ + if ((s8)imm_ < 0) { \ + imm_ = (u8)-imm_; \ + op_ = OP_ADDS; \ + } \ + EMIT(A64_OP_IMM12(op_, Z0, r, imm_, 0)); \ +} while (0) + + +#define emith_add_r_r_ptr_imm(d, s, imm) \ + emith_arith_imm(OP_ADD, 1, d, s, imm) + +#define emith_add_r_r_imm(d, s, imm) \ + emith_arith_imm(OP_ADD, 0, d, s, imm) + +#define emith_sub_r_r_imm(d, s, imm) \ + emith_arith_imm(OP_SUB, 0, d, s, imm) +#define emith_sub_r_r_imm_c(cond, d, s, imm) \ + emith_sub_r_r_imm(d, s, imm) + +#define emith_subf_r_r_imm(d, s, imm) \ + emith_arith_imm(OP_SUBS, 0, d, s, imm) + + +// logical, immediate; the value describes a bitmask, see ARMv8 ArchRefMan +// NB: deal only with simple masks 0{n}1{m}0{o} or 1{n}0{m}1{o}, 0 16) { + emith_move_r_imm(_t, ~imm); + EMIT(sz64|A64_OP_REG(op, 1, rd, rn, _t, ST_LSL, 0)); + } else { + emith_move_r_imm(_t, imm); + EMIT(sz64|A64_OP_REG(op, 0, rd, rn, _t, ST_LSL, 0)); + } + rcache_free_tmp(_t); + } +} + +#define emith_and_r_imm(r, imm) \ + emith_log_imm(OP_AND, 0, r, r, imm) + +#define emith_or_r_imm(r, imm) \ + emith_log_imm(OP_OR, 0, r, r, imm) +#define emith_or_r_imm_c(cond, r, imm) \ + emith_or_r_imm(r, imm) + +#define emith_eor_r_imm_ptr(r, imm) \ + emith_log_imm(OP_EOR, 1, r, r, imm) +#define emith_eor_r_imm_ptr_c(cond, r, imm) \ + emith_eor_r_imm_ptr(r, imm) + +#define emith_eor_r_imm(r, imm) \ + emith_log_imm(OP_EOR, 0, r, r, imm) +#define emith_eor_r_imm_c(cond, r, imm) \ + emith_eor_r_imm(r, imm) + +/* NB: BIC #imm not available in A64; use AND #~imm instead */ +#define emith_bic_r_imm(r, imm) \ + emith_log_imm(OP_AND, 0, r, r, ~(imm)) +#define emith_bic_r_imm_c(cond, r, imm) \ + emith_bic_r_imm(r, imm) + +#define emith_tst_r_imm(r, imm) \ + emith_log_imm(OP_ANDS, 0, Z0, r, imm) +#define emith_tst_r_imm_c(cond, r, imm) \ + emith_tst_r_imm(r, imm) + +#define emith_and_r_r_imm(d, s, imm) \ + emith_log_imm(OP_AND, 0, d, s, imm) + +#define emith_or_r_r_imm(d, s, imm) \ + emith_log_imm(OP_OR, 0, d, s, imm) + +#define emith_eor_r_r_imm(d, s, imm) \ + emith_log_imm(OP_EOR, 0, d, s, imm) + + +// shift +#define emith_lsl(d, s, cnt) \ + EMIT(A64_LSL_IMM(d, s, cnt)) + +#define emith_lsr(d, s, cnt) \ + EMIT(A64_LSR_IMM(d, s, cnt)) + +#define emith_asr(d, s, cnt) \ + EMIT(A64_ASR_IMM(d, s, cnt)) + +#define emith_ror(d, s, cnt) \ + EMIT(A64_ROR_IMM(d, s, cnt)) +#define emith_ror_c(cond, d, s, cnt) \ + emith_ror(d, s, cnt) + +#define emith_rol(d, s, cnt) \ + EMIT(A64_ROR_IMM(d, s, 32-(cnt))) + +// NB: shift with carry not directly supported in A64 :-|. +#define emith_lslf(d, s, cnt) do { \ + if ((cnt) > 1) { \ + emith_lsl(d, s, cnt-1); \ + emith_addf_r_r_r(d, d, d); \ + } else if ((cnt) > 0) \ + emith_addf_r_r_r(d, s, s); \ +} while (0) + +#define emith_lsrf(d, s, cnt) do { \ + EMIT(A64_RBIT_REG(d, s)); \ + emith_lslf(d, d, cnt); \ + EMIT(A64_RBIT_REG(d, d)); \ +} while (0) + +#define emith_asrf(d, s, cnt) do { \ + int _s = s; \ + if ((cnt) > 1) { \ + emith_asr(d, s, cnt-1); \ + _s = d; \ + } \ + if ((cnt) > 0) { \ + emith_addf_r_r_r(Z0, _s, _s); \ + EMIT(A64_RBIT_REG(d, _s)); \ + emith_adcf_r_r_r(d, d, d); \ + EMIT(A64_RBIT_REG(d, d)); \ + } \ +} while (0) + +#define emith_rolf(d, s, cnt) do { \ + int _s = s; \ + if ((cnt) > 1) { \ + emith_rol(d, s, cnt-1); \ + _s = d; \ + } \ + if ((cnt) > 0) { \ + emith_addf_r_r_r(d, _s, _s); \ + emith_adc_r_r_r(d, d, Z0); \ + } \ +} while (0) + +#define emith_rorf(d, s, cnt) do { \ + if ((cnt) > 0) { \ + emith_ror(d, s, cnt); \ + emith_addf_r_r_r(Z0, d, d); \ + } \ +} while (0) + +#define emith_rolcf(d) \ + emith_adcf_r_r(d, d) + +#define emith_rorcf(d) do { \ + EMIT(A64_RBIT_REG(d, d)); \ + emith_adcf_r_r(d, d); \ + EMIT(A64_RBIT_REG(d, d)); \ +} while (0) + +// signed/unsigned extend +#define emith_clear_msb(d, s, count) /* bits to clear */ \ + EMIT(A64_UXT_IMM(d, s, 32-(count))) +#define emith_clear_msb_c(cond, d, s, count) \ + emith_clear_msb(d, s, count) + +#define emith_sext(d, s, count) /* bits to keep */ \ + EMIT(A64_SXT_IMM(d, s, count)) + +// multiply Rd = Rn*Rm (+ Ra) +#define emith_mul(d, s1, s2) \ + EMIT(A64_MUL(d, s1, s2)) + +// NB: must combine/split Xd from/into 2 Wd's; play safe and clear upper bits +#define emith_combine64(dlo, dhi) \ + EMIT(A64_UXTX_IMM(dlo, dlo, 32)); \ + EMIT(A64_ORX_REG(dlo, dlo, dhi, ST_LSL, 32)); + +#define emith_split64(dlo, dhi) \ + EMIT(A64_LSRX_IMM(dhi, dlo, 32)); \ + EMIT(A64_UXTX_IMM(dlo, dlo, 32)); + +#define emith_mul_u64(dlo, dhi, s1, s2) do { \ + EMIT(A64_UMULL(dlo, s1, s2)); \ + emith_split64(dlo, dhi); \ +} while (0) + +#define emith_mul_s64(dlo, dhi, s1, s2) do { \ + EMIT(A64_SMULL(dlo, s1, s2)); \ + emith_split64(dlo, dhi); \ +} while (0) + +#define emith_mula_s64(dlo, dhi, s1, s2) do { \ + emith_combine64(dlo, dhi); \ + EMIT(A64_SMADDL(dlo, s1, s2, dlo)); \ + emith_split64(dlo, dhi); \ +} while (0) +#define emith_mula_s64_c(cond, dlo, dhi, s1, s2) \ + emith_mula_s64(dlo, dhi, s1, s2) + +// load/store. offs has 9 bits signed, hence larger offs may use a temp +static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) +{ + if (o9 >= -256 && o9 < 256) { + EMIT(A64_OP_LDST(sz, ld, A64_LDST_AM(0,_,o9), mode, rn, rd)); + } else if (mode == AM_IDXPRE) { + emith_add_r_r_ptr_imm(rn, rn, o9); + EMIT(A64_OP_LDST(sz, ld, A64_LDST_AM(0,_,0), AM_IDX, rn, rd)); + } else if (mode == AM_IDXPOST) { + EMIT(A64_OP_LDST(sz, ld, A64_LDST_AM(0,_,0), AM_IDX, rn, rd)); + emith_add_r_r_ptr_imm(rn, rn, o9); + } else { + int _t = rcache_get_tmp(); + emith_add_r_r_ptr_imm(_t, rn, o9); + EMIT(A64_OP_LDST(sz, ld, A64_LDST_AM(0,_,0), AM_IDX, _t, rd)); + rcache_free_tmp(_t); + } +} + +#define emith_read_r_r_offs_ptr(r, rs, offs) \ + emith_ldst_offs(AM_X, r, rs, offs, LT_LD, AM_IDX) +#define emith_read_r_r_offs_ptr_c(cond, r, rs, offs) \ + emith_read_r_r_offs_ptr(r, rs, offs) + +#define emith_read_r_r_offs(r, rs, offs) \ + emith_ldst_offs(AM_W, r, rs, offs, LT_LD, AM_IDX) +#define emith_read_r_r_offs_c(cond, r, rs, offs) \ + emith_read_r_r_offs(r, rs, offs) + +#define emith_read_r_r_r_ptr(r, rs, rm) \ + EMIT(A64_LDSTX_REG(r, rs, rm, LT_LD, XT_SXTW)) + +#define emith_read_r_r_r(r, rs, rm) \ + EMIT(A64_LDST_REG(r, rs, rm, LT_LD, XT_SXTW)) +#define emith_read_r_r_r_c(cond, r, rs, rm) \ + emith_read_r_r_r(r, rs, rm) + +#define emith_read_r_r_r_ptr_wb(r, rs, rm) do { \ + emith_read_r_r_r_ptr(r, rs, rm); \ + emith_add_r_r_ptr(rs, rm); \ +} while (0) +#define emith_read_r_r_r_wb(r, rs, rm) do { \ + emith_read_r_r_r(r, rs, rm); \ + emith_add_r_r_ptr(rs, rm); \ +} while (0) + +#define emith_read8_r_r_offs(r, rs, offs) \ + emith_ldst_offs(AM_B, r, rs, offs, LT_LD, AM_IDX) +#define emith_read8_r_r_offs_c(cond, r, rs, offs) \ + emith_read8_r_r_offs(r, rs, offs) + +#define emith_read8_r_r_r(r, rs, rm) \ + EMIT(A64_LDSTB_REG(r, rs, rm, LT_LD, XT_SXTW)) +#define emith_read8_r_r_r_c(cond, r, rs, rm) \ + emith_read8_r_r_r(r, rs, rm) + +#define emith_read16_r_r_offs(r, rs, offs) \ + emith_ldst_offs(AM_H, r, rs, offs, LT_LD, AM_IDX) +#define emith_read16_r_r_offs_c(cond, r, rs, offs) \ + emith_read16_r_r_offs(r, rs, offs) + +#define emith_read16_r_r_r(r, rs, rm) \ + EMIT(A64_LDSTH_REG(r, rs, rm, LT_LD, XT_SXTW)) +#define emith_read16_r_r_r_c(cond, r, rs, rm) \ + emith_read16_r_r_r(r, rs, rm) + +#define emith_read8s_r_r_offs(r, rs, offs) \ + emith_ldst_offs(AM_B, r, rs, offs, LT_LDS, AM_IDX) +#define emith_read8s_r_r_offs_c(cond, r, rs, offs) \ + emith_read8s_r_r_offs(r, rs, offs) + +#define emith_read8s_r_r_r(r, rs, rm) \ + EMIT(A64_LDSTB_REG(r, rs, rm, LT_LDS, XT_SXTW)) +#define emith_read8s_r_r_r_c(cond, r, rs, rm) \ + emith_read8s_r_r_r(r, rs, rm) + +#define emith_read16s_r_r_offs(r, rs, offs) \ + emith_ldst_offs(AM_H, r, rs, offs, LT_LDS, AM_IDX) +#define emith_read16s_r_r_offs_c(cond, r, rs, offs) \ + emith_read16s_r_r_offs(r, rs, offs) + +#define emith_read16s_r_r_r(r, rs, rm) \ + EMIT(A64_LDSTH_REG(r, rs, rm, LT_LDS, XT_SXTW)) +#define emith_read16s_r_r_r_c(cond, r, rs, rm) \ + emith_read16s_r_r_r(r, rs, rm) + + +#define emith_write_r_r_offs_ptr(r, rs, offs) \ + emith_ldst_offs(AM_X, r, rs, offs, LT_ST, AM_IDX) +#define emith_write_r_r_offs_ptr_c(cond, r, rs, offs) \ + emith_write_r_r_offs_ptr(r, rs, offs) + +#define emith_write_r_r_r_ptr(r, rs, rm) \ + EMIT(A64_LDSTX_REG(r, rs, rm, LT_ST, XT_SXTW)) +#define emith_write_r_r_r_ptr_c(cond, r, rs, rm) \ + emith_write_r_r_r_ptr(r, rs, rm) + +#define emith_write_r_r_offs(r, rs, offs) \ + emith_ldst_offs(AM_W, r, rs, offs, LT_ST, AM_IDX) +#define emith_write_r_r_offs_c(cond, r, rs, offs) \ + emith_write_r_r_offs(r, rs, offs) + +#define emith_write_r_r_r(r, rs, rm) \ + EMIT(A64_LDST_REG(r, rs, rm, LT_ST, XT_SXTW)) +#define emith_write_r_r_r_c(cond, r, rs, rm) \ + emith_write_r_r_r(r, rs, rm) + +#define emith_write_r_r_r_ptr_wb(r, rs, rm) do { \ + emith_write_r_r_r_ptr(r, rs, rm); \ + emith_add_r_r_ptr(rs, rm); \ +} while (0) +#define emith_write_r_r_r_wb(r, rs, rm) do { \ + emith_write_r_r_r(r, rs, rm); \ + emith_add_r_r_ptr(rs, rm); \ +} while (0) + +#define emith_ctx_read_ptr(r, offs) \ + emith_read_r_r_offs_ptr(r, CONTEXT_REG, offs) + +#define emith_ctx_read(r, offs) \ + emith_read_r_r_offs(r, CONTEXT_REG, offs) +#define emith_ctx_read_c(cond, r, offs) \ + emith_ctx_read(r, offs) + +#define emith_ctx_write_ptr(r, offs) \ + emith_write_r_r_offs_ptr(r, CONTEXT_REG, offs) + +#define emith_ctx_write(r, offs) \ + emith_write_r_r_offs(r, CONTEXT_REG, offs) + +#define emith_ctx_read_multiple(r, offs, cnt, tmpr) do { \ + int r_ = r, offs_ = offs, cnt_ = cnt; \ + for (; cnt_ > 0; r_++, offs_ += 4, cnt_--) \ + emith_ctx_read(r_, offs_); \ +} while (0) + +#define emith_ctx_write_multiple(r, offs, cnt, tmpr) do { \ + int r_ = r, offs_ = offs, cnt_ = cnt; \ + for (; cnt_ > 0; r_++, offs_ += 4, cnt_--) \ + emith_ctx_write(r_, offs_); \ +} while (0) + +// push pairs; NB: SP must be 16 byte aligned (HW requirement!) +#define emith_push2(r1, r2) \ + EMIT(A64_LDSTPX_IMM(SP, r1, r2, -2*8, LT_ST, AM_IDXPRE)) +#define emith_pop2(r1, r2) \ + EMIT(A64_LDSTPX_IMM(SP, r1, r2, 2*8, LT_LD, AM_IDXPOST)) + +// function call handling +#define emith_save_caller_regs(mask) do { \ + int _c, _r1, _r2; u32 _m = mask & 0x3ffff; \ + if (__builtin_parity(_m) == 1) _m |= 0x40000; /* hardware align */ \ + for (_c = HOST_REGS, _r1 = -1; _m && _c >= 0; _m &= ~(1 << _c), _c--) \ + if (_m & (1 << _c)) { \ + _r2 = _r1, _r1 = _c; \ + if (_r2 != -1) { \ + emith_push2(_r1, _r2); \ + _r1 = -1; \ + } \ + } \ +} while (0) + +#define emith_restore_caller_regs(mask) do { \ + int _c, _r1, _r2; u32 _m = mask & 0x3ffff; \ + if (__builtin_parity(_m) == 1) _m |= 0x40000; /* hardware align */ \ + for (_c = 0, _r1 = -1; _m && _c < HOST_REGS; _m &= ~(1 << _c), _c++) \ + if (_m & (1 << _c)) { \ + _r2 = _r1, _r1 = _c; \ + if (_r2 != -1) { \ + emith_pop2(_r2, _r1); \ + _r1 = -1; \ + } \ + } \ +} while (0) + +#define host_arg2reg(rd, arg) \ + rd = arg + +#define emith_pass_arg_r(arg, reg) \ + emith_move_r_r(arg, reg) + +#define emith_pass_arg_imm(arg, imm) \ + emith_move_r_imm(arg, imm) + +// branching; NB: A64 B.cond has only +/- 1MB range +#define emith_bcond(ptr, patch, cond, target) do { \ + u32 disp_ = (u8 *)target - (u8 *)ptr; \ + if (disp_ >= 0xfff00000 || disp_ <= 0x000fffff) { /* can use near B.c */ \ + EMIT_PTR(ptr, A64_BCOND(cond, disp_ & 0x001fffff)); \ + if (patch) EMIT_PTR(ptr, A64_NOP); /* reserve space for far B */ \ + } else { /* far branch if near branch isn't possible */ \ + EMIT_PTR(ptr, A64_BCOND(emith_invert_cond(cond), 8)); \ + EMIT_PTR(ptr, A64_B((disp_ - 4) & 0x0fffffff)); \ + } \ +} while (0) + +#define emith_jump(target) do {\ + u32 disp_ = (u8 *)target - (u8 *)tcache_ptr; \ + EMIT(A64_B(disp_ & 0x0fffffff)); \ +} while (0) + +#define emith_jump_patchable(target) \ + emith_jump(target) + +#define emith_jump_cond(cond, target) \ + emith_bcond(tcache_ptr, 0, cond, target) + +#define emith_jump_cond_patchable(cond, target) \ + emith_bcond(tcache_ptr, 1, cond, target) + +#define emith_jump_patch(ptr, target) ({ \ + u32 *ptr_ = (u32 *)ptr; \ + u32 disp_ = (u8 *)(target) - (u8 *)(ptr_); \ + int cond_ = ptr_[0] & 0xf; \ + if ((ptr_[0] & 0xff000000) == 0x54000000) { /* B.cond */ \ + if (ptr_[1] != A64_NOP) cond_ = emith_invert_cond(cond_); \ + emith_bcond(ptr_, 1, cond_, target); \ + } else if (ptr_[0] & 0x80000000) \ + EMIT_PTR(ptr_, A64_BL((disp_) & 0x0fffffff)); \ + else EMIT_PTR(ptr_, A64_B((disp_) & 0x0fffffff)); \ + (u8 *)ptr; \ +}) + +#define emith_jump_reg(r) \ + EMIT(A64_BR(r)) +#define emith_jump_reg_c(cond, r) \ + emith_jump_reg(r) + +#define emith_jump_ctx(offs) do { \ + int _t = rcache_get_tmp(); \ + emith_ctx_read_ptr(_t, offs); \ + emith_jump_reg(_t); \ + rcache_free_tmp(_t); \ +} while (0) +#define emith_jump_ctx_c(cond, offs) \ + emith_jump_ctx(offs) + +#define emith_call(target) do { \ + u32 disp_ = (u8 *)target - (u8 *)tcache_ptr; \ + EMIT(A64_BL(disp_ & 0x0fffffff)); \ +} while (0) +#define emith_call_cond(cond, target) \ + emith_call(target) + +#define emith_call_reg(r) \ + EMIT(A64_BLR(r)) + +#define emith_call_ctx(offs) do { \ + int _t = rcache_get_tmp(); \ + emith_ctx_read_ptr(_t, offs); \ + emith_call_reg(_t); \ + rcache_free_tmp(_t); \ +} while (0) + +#define emith_call_link(r, target) do { \ + EMIT(A64_ADRXLIT_IMM(r, 8)); \ + emith_jump(target); \ +} while (0) + +#define emith_call_cleanup() /**/ + +#define emith_ret() \ + EMIT(A64_RET(LR)) +#define emith_ret_c(cond) \ + emith_ret() + +#define emith_ret_to_ctx(offs) \ + emith_ctx_write_ptr(LR, offs) + +// NB: pushes r or r18 for SP hardware alignment +#define emith_push_ret(r) do { \ + int r_ = (r >= 0 ? r : 18); \ + emith_push2(r_, LR); \ +} while (0) + +#define emith_pop_and_ret(r) do { \ + int r_ = (r >= 0 ? r : 18); \ + emith_pop2(r_, LR); \ + emith_ret(); \ +} while (0) + + +// emitter ABI stuff +#define emith_pool_check() /**/ +#define emith_pool_commit(j) /**/ +#define emith_insn_ptr() ((u8 *)tcache_ptr) +#define emith_flush() /**/ +#define host_instructions_updated(base, end) __builtin___clear_cache(base, end) +#define emith_jump_patch_size() 8 + + +// SH2 drc specific +#define emith_sh2_drc_entry() do { \ + emith_push2(LR, FP); \ + emith_push2(28, 27); \ + emith_push2(26, 25); \ + emith_push2(24, 23); \ + emith_push2(22, 21); \ + emith_push2(20, 19); \ +} while (0) +#define emith_sh2_drc_exit() do { \ + emith_pop2(20, 19); \ + emith_pop2(22, 21); \ + emith_pop2(24, 23); \ + emith_pop2(26, 25); \ + emith_pop2(28, 27); \ + emith_pop2(LR, FP); \ + emith_ret(); \ +} while (0) + +// NB: assumes a is in arg0, tab, func and mask are temp +#define emith_sh2_rcall(a, tab, func, mask) do { \ + emith_lsr(mask, a, SH2_READ_SHIFT); \ + EMIT(A64_ADDX_REG(tab, tab, mask, ST_LSL, 4)); \ + emith_read_r_r_offs_ptr(func, tab, 0); \ + emith_read_r_r_offs(mask, tab, 8); \ + EMIT(A64_ADDXS_REG(func, func, func, ST_LSL, 0)); \ +} while (0) + +// NB: assumes a, val are in arg0 and arg1, tab and func are temp +#define emith_sh2_wcall(a, val, tab, func) do { \ + emith_lsr(func, a, SH2_WRITE_SHIFT); \ + emith_lsl(func, func, 3); \ + emith_read_r_r_r_ptr(func, tab, func); \ + emith_move_r_r_ptr(2, CONTEXT_REG); /* arg2 */ \ + emith_jump_reg(func); \ +} while (0) + +#define emith_sh2_delay_loop(cycles, reg) do { \ + int sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); \ + int t1 = rcache_get_tmp(); \ + int t2 = rcache_get_tmp(); \ + int t3 = rcache_get_tmp(); \ + /* if (sr < 0) return */ \ + emith_asrf(t2, sr, 12); \ + EMITH_JMP_START(DCOND_LE); \ + /* turns = sr.cycles / cycles */ \ + emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)) + 1); \ + emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */ \ + rcache_free_tmp(t3); \ + if (reg >= 0) { \ + /* if (reg <= turns) turns = reg-1 */ \ + t3 = rcache_get_reg(reg, RC_GR_RMW, NULL); \ + emith_cmp_r_r(t3, t2); \ + EMITH_SJMP_START(DCOND_HI); \ + emith_sub_r_r_imm_c(DCOND_LS, t2, t3, 1); \ + EMITH_SJMP_END(DCOND_HI); \ + /* if (reg <= 1) turns = 0 */ \ + emith_cmp_r_imm(t3, 1); \ + EMITH_SJMP_START(DCOND_HI); \ + emith_move_r_imm_c(DCOND_LS, t2, 0); \ + EMITH_SJMP_END(DCOND_HI); \ + /* reg -= turns */ \ + emith_sub_r_r(t3, t2); \ + } \ + /* sr.cycles -= turns * cycles; */ \ + emith_move_r_imm(t1, cycles); \ + emith_mul(t1, t2, t1); \ + emith_sub_r_r_r_lsl(sr, sr, t1, 12); \ + EMITH_JMP_END(DCOND_LE); \ + rcache_free_tmp(t1); \ + rcache_free_tmp(t2); \ +} while (0) + +/* + * if Q + * t = carry(Rn += Rm) + * else + * t = carry(Rn -= Rm) + * T ^= t + */ +#define emith_sh2_div1_step(rn, rm, sr) do { \ + int tmp_ = rcache_get_tmp(); \ + emith_tst_r_imm(sr, Q); /* if (Q ^ M) */ \ + EMITH_SJMP3_START(DCOND_EQ); \ + emith_addf_r_r(rn, rm); \ + emith_adc_r_r_r(tmp_, Z0, Z0); \ + EMITH_SJMP3_MID(DCOND_EQ); \ + emith_subf_r_r(rn, rm); \ + emith_adc_r_r_r(tmp_, Z0, Z0); \ + emith_eor_r_imm(tmp_, 1); \ + EMITH_SJMP3_END(); \ + emith_eor_r_r(sr, tmp_); \ + rcache_free_tmp(tmp_); \ +} while (0) + +/* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ +#define emith_sh2_macl(ml, mh, rn, rm, sr) do { \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* MACH top 16 bits unused if saturated. sign ext for overfl detect */ \ + emith_sext(mh, mh, 16); \ + EMITH_SJMP_END(DCOND_EQ); \ + emith_mula_s64(ml, mh, rn, rm); \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* overflow if top 17 bits of MACH aren't all 1 or 0 */ \ + /* to check: add MACH[15] to MACH[31:16]. this is 0 if no overflow */ \ + emith_asrf(rn, mh, 16); /* sum = (MACH>>16) + ((MACH>>15)&1) */ \ + emith_adcf_r_imm(rn, 0); /* (MACH>>15) is in carry after shift */ \ + EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> ov */ \ + emith_move_r_imm_c(DCOND_NE, ml, 0x0000); /* -overflow */ \ + emith_move_r_imm_c(DCOND_NE, mh, 0x8000); \ + EMITH_SJMP_START(DCOND_LE); /* sum > 0 -> +ovl */ \ + emith_sub_r_imm_c(DCOND_GT, ml, 1); /* 0xffffffff */ \ + emith_sub_r_imm_c(DCOND_GT, mh, 1); /* 0x00007fff */ \ + EMITH_SJMP_END(DCOND_LE); \ + EMITH_SJMP_END(DCOND_EQ); \ + EMITH_SJMP_END(DCOND_EQ); \ +} while (0) + +/* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ +#define emith_sh2_macw(ml, mh, rn, rm, sr) do { \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* XXX: MACH should be untouched when S is set? */ \ + emith_asr(mh, ml, 31); /* sign ext MACL to MACH for ovrfl check */ \ + EMITH_SJMP_END(DCOND_EQ); \ + emith_mula_s64(ml, mh, rn, rm); \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* overflow if top 33 bits of MACH:MACL aren't all 1 or 0 */ \ + /* to check: add MACL[31] to MACH. this is 0 if no overflow */ \ + emith_lsr(rn, ml, 31); \ + emith_addf_r_r(rn, mh); /* sum = MACH + ((MACL>>31)&1) */ \ + EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \ + /* XXX: LSB signalling only in SH1, or in SH2 too? */ \ + emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \ + emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* negative ovrfl */ \ + EMITH_SJMP_START(DCOND_LE); /* sum > 0 -> positive ovrfl */ \ + emith_sub_r_imm_c(DCOND_GT, ml, 1); /* 0x7fffffff */ \ + EMITH_SJMP_END(DCOND_LE); \ + EMITH_SJMP_END(DCOND_EQ); \ + EMITH_SJMP_END(DCOND_EQ); \ +} while (0) + +#define emith_write_sr(sr, srcr) do { \ + emith_lsr(sr, sr, 10); \ + emith_or_r_r_r_lsl(sr, sr, srcr, 22); \ + emith_ror(sr, sr, 22); \ +} while (0) + +#define emith_carry_to_t(srr, is_sub) do { \ + emith_lsr(sr, sr, 1); \ + emith_adc_r_r(sr, sr); \ + if (is_sub) /* SUB has inverted C on ARM */ \ + emith_eor_r_imm(sr, 1); \ +} while (0) + +#define emith_tpop_carry(sr, is_sub) do { \ + if (is_sub) \ + emith_eor_r_imm(sr, 1); \ + emith_lsrf(sr, sr, 1); \ +} while (0) + +#define emith_tpush_carry(sr, is_sub) do { \ + emith_adc_r_r(sr, sr); \ + if (is_sub) \ + emith_eor_r_imm(sr, 1); \ +} while (0) + +#ifdef T +// T bit handling +#define emith_invert_cond(cond) \ + ((cond) ^ 1) + +static void emith_clr_t_cond(int sr) +{ + emith_bic_r_imm(sr, T); +} + +static void emith_set_t_cond(int sr, int cond) +{ + EMITH_SJMP_START(emith_invert_cond(cond)); + emith_or_r_imm_c(cond, sr, T); + EMITH_SJMP_END(emith_invert_cond(cond)); +} + +#define emith_get_t_cond() -1 + +#define emith_sync_t(sr) ((void)sr) + +#define emith_invalidate_t() + +static void emith_set_t(int sr, int val) +{ + if (val) + emith_or_r_imm(sr, T); + else + emith_bic_r_imm(sr, T); +} + +static int emith_tst_t(int sr, int tf) +{ + emith_tst_r_imm(sr, T); + return tf ? DCOND_NE: DCOND_EQ; +} +#endif diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 01fc6ae1..0083dc42 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -466,6 +466,56 @@ static cache_reg_t cache_regs[] = { { 7, HRF_REG }, }; +#elif defined(__aarch64__) +#include "../drc/emit_arm64.c" + +static guest_reg_t guest_regs[] = { + // SHR_R0 .. SHR_SP + { GRF_STATIC,20 }, { GRF_STATIC,21 }, { 0 } , { 0 } , + { 0 } , { 0 } , { 0 } , { 0 } , + { 0 } , { 0 } , { 0 } , { 0 } , + { 0 } , { 0 } , { 0 } , { 0 } , + // SHR_PC, SHR_PPC, SHR_PR, SHR_SR, + // SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL, + { 0 } , { 0 } , { 0 } , { GRF_STATIC, 22 }, + { 0 } , { 0 } , { 0 } , { 0 } , +}; + +// AAPCS64: params: r0-r7, return: r0-r1, temp: r8-r17, saved: r19-r29 +// saved: r18 (for platform use) +// since drc never needs more than 4 parameters, r4-r7 are treated as temp. +static cache_reg_t cache_regs[] = { + { 17, HRF_TEMP }, // temps + { 16, HRF_TEMP }, + { 15, HRF_TEMP }, + { 14, HRF_TEMP }, + { 13, HRF_TEMP }, + { 12, HRF_TEMP }, + { 11, HRF_TEMP }, + { 10, HRF_TEMP }, + { 9, HRF_TEMP }, + { 8, HRF_TEMP }, + { 7, HRF_TEMP }, + { 6, HRF_TEMP }, + { 5, HRF_TEMP }, + { 4, HRF_TEMP }, + { 3, HRF_TEMP }, // params + { 2, HRF_TEMP }, + { 1, HRF_TEMP }, + { 0, HRF_TEMP }, // RET_REG + { 22, HRF_LOCKED }, // statics + { 21, HRF_LOCKED }, + { 20, HRF_LOCKED }, + { 29, HRF_REG }, // other regs + { 28, HRF_REG }, + { 27, HRF_REG }, + { 26, HRF_REG }, + { 25, HRF_REG }, + { 24, HRF_REG }, + { 23, HRF_REG }, + { 22, HRF_REG }, +}; + #elif defined(__mips__) #include "../drc/emit_mips.c" diff --git a/cpu/sh2/compiler.h b/cpu/sh2/compiler.h index 09f4ae97..1ad922b7 100644 --- a/cpu/sh2/compiler.h +++ b/cpu/sh2/compiler.h @@ -36,6 +36,8 @@ unsigned short scan_block(unsigned int base_pc, int is_slave, // XXX MUST match definitions in cpu/sh2/compiler.c #if defined(__arm__) #define DRC_SR_REG r10 +#elif defined(__aarch64__) +#define DRC_SR_REG r22 #elif defined(__mips__) #define DRC_SR_REG s6 #elif defined(__i386__) From 8284ab710785099f861cdd10d7b1170eaf40828c Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 16 Aug 2019 15:14:41 +0200 Subject: [PATCH 0208/1110] various small fixes and optimsations --- Makefile | 8 ++++++++ Makefile.libretro | 3 ++- cpu/drc/emit_arm.c | 2 ++ cpu/drc/emit_arm64.c | 1 + cpu/drc/emit_mips.c | 5 +++-- cpu/drc/emit_x86.c | 2 ++ cpu/sh2/compiler.c | 16 ++++++++-------- pico/32x/memory.c | 27 ++++++++++++--------------- tools/mkoffsets.sh | 2 +- 9 files changed, 39 insertions(+), 27 deletions(-) diff --git a/Makefile b/Makefile index 5f64f713..96ccb6ca 100644 --- a/Makefile +++ b/Makefile @@ -236,6 +236,14 @@ pico/cd/cd_file.o: CFLAGS += -fno-strict-aliasing pico/cd/pcm.o: CFLAGS += -fno-strict-aliasing pico/cd/LC89510.o: CFLAGS += -fno-strict-aliasing pico/cd/gfx_cd.o: CFLAGS += -fno-strict-aliasing +ifeq (1,$(use_sh2drc)) +ifneq (,$(findstring -flto,$(CFLAGS))) +# if using the DRC, memory and sh2soc use a global register variable to avoid +# saving and reloading the SH2 SR. However, this collides with the use of LTO. +pico/32x/memory.o: CFLAGS += -fno-lto +pico/32x/sh2soc.o: CFLAGS += -fno-lto +endif +endif # fame needs ~2GB of RAM to compile on gcc 4.8 # on x86, this is reduced by ~300MB when debug info is off (but not on ARM) diff --git a/Makefile.libretro b/Makefile.libretro index 1e07d50f..51da9828 100644 --- a/Makefile.libretro +++ b/Makefile.libretro @@ -26,7 +26,7 @@ CFLAGS ?= STATIC_LINKING:= 0 TARGET_NAME := picodrive LIBM := -lm -GIT_VERSION ?= " $(shell git rev-parse --short HEAD || echo unknown)" +GIT_VERSION ?= $(shell git rev-parse --short HEAD || echo unknown) ifneq ($(GIT_VERSION)," unknown") CFLAGS += -DGIT_VERSION=\"$(GIT_VERSION)\" endif @@ -427,6 +427,7 @@ else ifeq ($(platform), gcw0) use_fame = 1 use_drz80 = 0 use_cz80 = 1 + use_sh2drc = 1 # Windows else diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index 72542a3f..a4aa2ec6 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -1174,6 +1174,8 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) #define host_arg2reg(rd, arg) \ rd = arg +#define emith_rw_offs_max() 0xff + /* SH2 drc specific */ /* pushes r12 for eabi alignment */ #define emith_sh2_drc_entry() \ diff --git a/cpu/drc/emit_arm64.c b/cpu/drc/emit_arm64.c index 90010d80..a67f6819 100644 --- a/cpu/drc/emit_arm64.c +++ b/cpu/drc/emit_arm64.c @@ -1117,6 +1117,7 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) #define emith_flush() /**/ #define host_instructions_updated(base, end) __builtin___clear_cache(base, end) #define emith_jump_patch_size() 8 +#define emith_rw_offs_max() 0xff // SH2 drc specific diff --git a/cpu/drc/emit_mips.c b/cpu/drc/emit_mips.c index f56b89a3..91d493b5 100644 --- a/cpu/drc/emit_mips.c +++ b/cpu/drc/emit_mips.c @@ -394,7 +394,7 @@ int emith_flg_noV; // V flag known not to be set // NB: for adcf and sbcf, carry-in must be dealt with separately (see there) static void emith_set_arith_flags(int rd, int rt, int rs, s32 imm, int sub) { - if (sub && rd == FNZ && rt && rs) // is this cmp_r_r? + if (sub && rd == FNZ && rt > AT && rs > AT) // is this cmp_r_r? emith_flg_rs = rs, emith_flg_rt = rt; else emith_flg_rs = emith_flg_rt = 0; @@ -858,7 +858,7 @@ static void emith_log_imm(int op, int rd, int rs, u32 imm) // NB: mips32r2 has EXT and INS #define emith_clear_msb(d, s, count) /* bits to clear */ do { \ u32 t; \ - if ((count) > 16) { \ + if ((count) >= 16) { \ t = (count) - 16; \ t = 0xffff >> t; \ emith_and_r_r_imm(d, s, t); \ @@ -1262,6 +1262,7 @@ static int emith_cond_check(int cond, int *r) // NB: mips32r2 has SYNCI #define host_instructions_updated(base, end) __builtin___clear_cache(base, end) #define emith_jump_patch_size() 4 +#define emith_rw_offs_max() 0x7fff // SH2 drc specific #define emith_sh2_drc_entry() do { \ diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index a40c0f8c..2177541c 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -986,6 +986,8 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common #define host_instructions_updated(base, end) +#define emith_rw_offs_max() 0xffffffff + #ifdef __x86_64__ #define HOST_REGS 16 diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 0083dc42..677c8adf 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -419,8 +419,8 @@ typedef struct { static int rcache_get_tmp(void); static void rcache_free_tmp(int hr); -// Note: cache_regs[] must have at least the amount of REG and TEMP registers -// used by handlers in worst case (currently 4). +// Note: cache_regs[] must have at least the amount of HRF_REG registers used +// by handlers in worst case (currently 4). // Register assignment goes by ABI convention. Caller save registers are TEMP, // the others are either static or REG. SR must be static, R0 very recommended. // VBR, PC, PR must not be static (read from context in utils). @@ -2418,7 +2418,7 @@ static void rcache_init(void) // NB may return either REG or TEMP static int emit_get_rbase_and_offs(SH2 *sh2, sh2_reg_e r, int rmode, u32 *offs) { - uptr omask = 0xff; // offset mask, XXX: ARM oriented.. + uptr omask = emith_rw_offs_max(); // offset mask u32 mask = 0; u32 a; int poffs; @@ -4447,7 +4447,7 @@ end_op: static void sh2_generate_utils(void) { - int arg0, arg1, arg2, arg3, sr, tmp; + int arg0, arg1, arg2, arg3, sr, tmp, tmp2; host_arg2reg(arg0, 0); host_arg2reg(arg1, 1); @@ -4689,18 +4689,18 @@ static void sh2_generate_utils(void) emith_sub_r_imm(tmp, 4*2); rcache_clean(); // push SR - tmp = rcache_get_reg_arg(0, SHR_SP, NULL); - emith_add_r_imm(tmp, 4); + tmp = rcache_get_reg_arg(0, SHR_SP,&tmp2); + emith_add_r_r_imm(tmp, tmp2, 4); tmp = rcache_get_reg_arg(1, SHR_SR, NULL); emith_clear_msb(tmp, tmp, 22); emith_move_r_r_ptr(arg2, CONTEXT_REG); - rcache_invalidate(); + rcache_invalidate_tmp(); emith_call(p32x_sh2_write32); // XXX: use sh2_drc_write32? // push PC rcache_get_reg_arg(0, SHR_SP, NULL); emith_ctx_read(arg1, SHR_PC * 4); emith_move_r_r_ptr(arg2, CONTEXT_REG); - rcache_invalidate(); + rcache_invalidate_tmp(); emith_call(p32x_sh2_write32); // update I, cycles, do callback emith_ctx_read(arg1, offsetof(SH2, pending_level)); diff --git a/pico/32x/memory.c b/pico/32x/memory.c index 7148d41c..8d5ca725 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -197,24 +197,19 @@ static NOINLINE u32 sh2_poll_read(u32 a, u32 d, unsigned int cycles, SH2* sh2) // fetch oldest write to address from fifo, but stop when reaching the present idx = sh2_poll_rd[hix]; while (idx != sh2_poll_wr[hix] && CYCLES_GE(cycles, fifo[idx].cycles)) { -// int oidx = idx; p = &fifo[idx]; idx = (idx+1) % PFIFO_SZ; - if (CYCLES_GT(cycles, p->cycles+80)) { - // drop older fifo stores that may cause synchronisation problems. - // NB unfortunately this cycle diff is quite sensitive: - // observed in Brutal Unleashed: min 80, observed in Afterburner: max 110 - sh2_poll_rd[hix] = idx; - } else if (p->a == a) { - // replace current data with fifo value and discard fifo entry - if (cpu != p->cpu) { + if (cpu != p->cpu) { + if (CYCLES_GT(cycles, p->cycles+80)) { + // drop older fifo stores that may cause synchronisation problems. + sh2_poll_rd[hix] = idx; + } else if (p->a == a) { + // replace current data with fifo value and discard fifo entry d = p->d; p->a = -1; -// if (oidx == sh2_poll_rd[hix]) -// sh2_poll_rd[hix] = idx; + break; } - break; } } return d; @@ -224,7 +219,6 @@ static NOINLINE void sh2_poll_write(u32 a, u32 d, unsigned int cycles, SH2 *sh2) { int hix = (a >> 1) % PFIFO_CNT; struct sh2_poll_fifo *fifo = sh2_poll_fifo[hix]; - struct sh2_poll_fifo *p = &fifo[sh2_poll_wr[hix]]; struct sh2_poll_fifo *q = &fifo[(sh2_poll_wr[hix]-1) % PFIFO_SZ]; int cpu = sh2 ? sh2->is_slave+1 : 0; @@ -233,15 +227,16 @@ static NOINLINE void sh2_poll_write(u32 a, u32 d, unsigned int cycles, SH2 *sh2) // intermediate values that may cause synchronisation problems. // NB this can take an eternity on m68k: mov.b , needs // 28 m68k-cycles (~80 sh2-cycles) to complete (observed in Metal Head) - if (q->a == a && !CYCLES_GT(cycles,q->cycles+30)) { + if (q->a == a && sh2_poll_wr[hix] != sh2_poll_rd[hix] && !CYCLES_GT(cycles,q->cycles+30)) { q->d = d; } else { // store write to poll address in fifo + fifo[sh2_poll_wr[hix]] = + (struct sh2_poll_fifo){ .cycles = cycles, .a = a, .d = d, .cpu = cpu }; sh2_poll_wr[hix] = (sh2_poll_wr[hix]+1) % PFIFO_SZ; if (sh2_poll_wr[hix] == sh2_poll_rd[hix]) // fifo overflow, discard oldest value sh2_poll_rd[hix] = (sh2_poll_rd[hix]+1) % PFIFO_SZ; - *p = (struct sh2_poll_fifo){ .cycles = cycles, .a = a, .d = d, .cpu = cpu }; } } @@ -2369,6 +2364,8 @@ void PicoMemSetup32x(void) sh2_drc_mem_setup(&msh2); sh2_drc_mem_setup(&ssh2); + memset(sh2_poll_rd, 0, sizeof(sh2_poll_rd)); + memset(sh2_poll_wr, 0, sizeof(sh2_poll_wr)); // z80 hack z80_map_set(z80_write_map, 0x8000, 0xffff, z80_md_bank_write_32x, 1); diff --git a/tools/mkoffsets.sh b/tools/mkoffsets.sh index a573f7a4..e7632593 100755 --- a/tools/mkoffsets.sh +++ b/tools/mkoffsets.sh @@ -11,7 +11,7 @@ ENDIAN= # compile with target C compiler and extract value from .rodata section compile_rodata () { - $CC $CFLAGS -I .. -c /tmp/getoffs.c -o /tmp/getoffs.o || exit 1 + $CC $CFLAGS -I .. -shared /tmp/getoffs.c -o /tmp/getoffs.o || exit 1 # find the name of the .rodata section (in case -fdata-sections is used) rosect=$(readelf -S /tmp/getoffs.o | grep '\.rodata' | sed 's/^[^.]*././;s/ .*//') From 4f06c0df56fceac974afef55abac34bbe2b478af Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 16 Aug 2019 17:25:23 +0200 Subject: [PATCH 0209/1110] fix for mkoffsets without multiarch binutils --- tools/mkoffsets.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/mkoffsets.sh b/tools/mkoffsets.sh index e7632593..3b4c076d 100755 --- a/tools/mkoffsets.sh +++ b/tools/mkoffsets.sh @@ -16,8 +16,8 @@ compile_rodata () rosect=$(readelf -S /tmp/getoffs.o | grep '\.rodata' | sed 's/^[^.]*././;s/ .*//') # read out .rodata section as hex string (should be only 4 or 8 bytes) - objcopy --dump-section $rosect=/tmp/getoffs.ro /tmp/getoffs.o || exit 1 - ro=$(xxd -ps /tmp/getoffs.ro) + ro=$(readelf -x $rosect /tmp/getoffs.o | grep '0x' | cut -c14-48 | + tr -d ' \n') if [ "$ENDIAN" = "le" ]; then # swap needed for le target hex="" From 906a1d182036a44d7821128d99b66c699baf0971 Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 20 Aug 2019 22:26:39 +0200 Subject: [PATCH 0210/1110] cleanup config files, copyright stuff --- config.aarch64 | 9 ++++----- config.caanoo | 8 ++++---- config.caanoo47 | 8 ++++---- config.dingux | 7 +++---- config.dingux54 | 7 +++---- config.gcw0 | 7 +++---- config.gp2x | 6 +++--- config.gp2x47 | 6 +++--- config.i386 | 8 ++++---- config.x86 | 8 ++++---- pico/32x/draw_arm.S | 1 + pico/32x/memory.c | 1 + pico/memory_arm.S | 1 + tools/mkoffsets.sh | 9 ++++++--- 14 files changed, 44 insertions(+), 42 deletions(-) diff --git a/config.aarch64 b/config.aarch64 index 70a6fe30..9631d64e 100644 --- a/config.aarch64 +++ b/config.aarch64 @@ -4,12 +4,11 @@ CC = aarch64-linux-gnu-gcc CXX = aarch64-linux-gnu-g++ AS = aarch64-linux-gnu-as STRIP = aarch64-linux-gnu-strip -CFLAGS += -I/usr/include/SDL -CFLAGS += -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -fno-stack-protector +CFLAGS += -I/usr/include/SDL -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result ASFLAGS += -LDFLAGS += -LDLIBS += -lSDL -lasound -lpng -lz -lm -lstdc++ -ldl +LDFLAGS += # --sysroot ${HOME}/opt/aarch64/debian-arm64 +LDLIBS += -lSDL -lasound -lpng -lz -lm -ldl ARCH = aarch64 PLATFORM = generic -SOUND_DRIVERS = alsa +SOUND_DRIVERS = oss alsa sdl diff --git a/config.caanoo b/config.caanoo index dd053bc5..1ffc54da 100644 --- a/config.caanoo +++ b/config.caanoo @@ -4,11 +4,11 @@ CC = arm-gph-linux-gnueabi-gcc CXX = arm-gph-linux-gnueabi-g++ AS = arm-gph-linux-gnueabi-as STRIP = arm-gph-linux-gnueabi-strip -CFLAGS += -mfloat-abi=soft -mcpu=arm920t -mtune=arm920t -fno-stack-protector -D__GP2X__ -CFLAGS += -fno-gcse -funswitch-loops -fweb -ftree-loop-im #-fpredictive-commoning -ftree-loop-distribution -frename-registers -CFLAGS += -I${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/include -I${HOME}/src/gp2x/armroot-eabi/include +CFLAGS += -I${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/include -I${HOME}/src/gp2x/armroot-eabi/include -D__GP2X__ -Wno-unused-result +CFLAGS += -mfloat-abi=soft -mcpu=arm920t -mtune=arm920t -mno-thumb-interwork -fno-stack-protector -fno-common +CFLAGS += -finline-limit=42 -fipa-pta -fno-ipa-pure-const ASFLAGS += -mfloat-abi=soft -mcpu=arm920t -LDFLAGS += -B${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/lib/gcc/arm-gph-linux-gnueabi/4.2.4 -B${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L${HOME}/src/gp2x/armroot-eabi/lib -static +LDFLAGS += --sysroot ${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root -L${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L${HOME}/src/gp2x/armroot-eabi/lib -static LDLIBS += -lpng -lm -ldl ARCH = arm diff --git a/config.caanoo47 b/config.caanoo47 index 2c0ee5af..5bcf8608 100644 --- a/config.caanoo47 +++ b/config.caanoo47 @@ -4,11 +4,11 @@ CC = arm-linux-gnueabi-gcc CXX = arm-linux-gnueabi-g++ AS = arm-linux-gnueabi-as STRIP = arm-linux-gnueabi-strip -CFLAGS += -mfloat-abi=soft -mcpu=arm920t -mtune=arm920t -Wno-unused-result -fno-stack-protector -D__GP2X__ -CFLAGS += -fno-gcse -funswitch-loops -fweb -ftree-loop-im #-fpredictive-commoning -ftree-loop-distribution -frename-registers -CFLAGS += -I${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/include -I${HOME}/src/gp2x/armroot-eabi/include +CFLAGS += -I${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/include -I${HOME}/src/gp2x/armroot-eabi/include -D__GP2X__ -Wno-unused-result +CFLAGS += -mfloat-abi=soft -mcpu=arm920t -mtune=arm920t -mno-thumb-interwork -fno-stack-protector -fno-common +CFLAGS += -finline-limit=42 -fipa-pta -fno-ipa-sra -fno-ipa-pure-const ASFLAGS += -mfloat-abi=soft -mcpu=arm920t -LDFLAGS += -B${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/lib/gcc/arm-gph-linux-gnueabi/4.2.4 -B${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -static +LDFLAGS += -B${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/lib/gcc/arm-gph-linux-gnueabi/4.2.4 -B${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L${HOME}/src/gp2x/armroot-eabi/lib -static LDLIBS += -lpng -lm -ldl ARCH = arm diff --git a/config.dingux b/config.dingux index 8aca06a6..d1ec7fe5 100644 --- a/config.dingux +++ b/config.dingux @@ -4,12 +4,11 @@ CC = mipsel-linux-gcc CXX = mipsel-linux-g++ AS = mipsel-linux-as STRIP = mipsel-linux-strip -CFLAGS += -I${HOME}/opt/opendingux-toolchain/usr/include/ -CFLAGS += -I${HOME}/opt/opendingux-toolchain/usr/include/SDL +CFLAGS += -I${HOME}/opt/opendingux-toolchain/usr/include/ -I${HOME}/opt/opendingux-toolchain/usr/include/SDL CFLAGS += -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -fno-stack-protector ASFLAGS += -LDFLAGS += -LDLIBS += -B${HOME}/opt/opendingux-toolchain/usr/lib -Wl,-rpath-link=${HOME}/opt/opendingux-toolchain/usr/lib -Wl,-rpath-link=${HOME}/opt/opendingux-toolchain/lib -lSDL -lasound -lpng -lm -lstdc++ -ldl +LDFLAGS += --sysroot ${HOME}/opt/opendingux-toolchain -L${HOME}/opt/opendingux-toolchain/lib +LDLIBS += -lSDL -lasound -lpng -lz -lm -ldl ARCH = mipsel PLATFORM = opendingux diff --git a/config.dingux54 b/config.dingux54 index 5f292652..423cbd17 100644 --- a/config.dingux54 +++ b/config.dingux54 @@ -4,12 +4,11 @@ CC = mipsel-linux-gnu-gcc CXX = mipsel-linux-gnu-g++ AS = mipsel-linux-gnu-as STRIP = mipsel-linux-gnu-strip -CFLAGS += -I${HOME}/opt/opendingux-toolchain/usr/include/ -CFLAGS += -I${HOME}/opt/opendingux-toolchain/usr/include/SDL +CFLAGS += -I${HOME}/opt/opendingux-toolchain/usr/include/ -I${HOME}/opt/opendingux-toolchain/usr/include/SDL CFLAGS += -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -fno-stack-protector ASFLAGS += -LDFLAGS += -LDLIBS += -B${HOME}/opt/opendingux-toolchain/usr/lib -B${HOME}/opt/opendingux-toolchain/lib -Wl,-rpath-link=${HOME}/opt/opendingux-toolchain/usr/lib -Wl,-rpath-link=${HOME}/opt/opendingux-toolchain/lib -lSDL -lasound -lpng -lz -lm -lstdc++ -ldl +LDFLAGS += -B${HOME}/opt/opendingux-toolchain/usr/lib -B${HOME}/opt/opendingux-toolchain/lib -Wl,-rpath-link=${HOME}/opt/opendingux-toolchain/usr/lib -Wl,-rpath-link=${HOME}/opt/opendingux-toolchain/lib +LDLIBS += -lSDL -lasound -lpng -lz -lm -ldl ARCH = mipsel PLATFORM = opendingux diff --git a/config.gcw0 b/config.gcw0 index 1d2ccef0..78f7c3a4 100644 --- a/config.gcw0 +++ b/config.gcw0 @@ -4,12 +4,11 @@ CC = mipsel-gcw0-linux-uclibc-gcc CXX = mipsel-gcw0-linux-uclibc-g++ AS = mipsel-gcw0-linux-uclibc-as STRIP = mipsel-gcw0-linux-uclibc-strip -CFLAGS += -I${HOME}/opt/gcw0-toolchain/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include/ -CFLAGS += -I${HOME}/opt/gcw0-toolchain/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include/SDL +CFLAGS += -I${HOME}/opt/gcw0-toolchain/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include/ -I${HOME}/opt/gcw0-toolchain/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include/SDL CFLAGS += -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -fno-stack-protector ASFLAGS += -LDFLAGS += -LDLIBS += -B${HOME}/opt/gcw0-toolchain/usr/lib -Wl,-rpath-link=${HOME}/opt/gcw0-toolchain/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/lib -Wl,-rpath-link=${HOME}/opt/gcw0-toolchain/usr/mipsel-gcw0-linux-uclibc/sysroot/lib -lSDL -lasound -lpng -lz -lm -lstdc++ -ldl +LDFLAGS += --sysroot ${HOME}/opt/gcw0-toolchain/usr/mipsel-gcw0-linux-uclibc/sysroot +LDLIBS += -lSDL -lasound -lpng -lz -lm -ldl ARCH = mipsel PLATFORM = opendingux diff --git a/config.gp2x b/config.gp2x index 248d73aa..84d2f93d 100644 --- a/config.gp2x +++ b/config.gp2x @@ -4,9 +4,9 @@ CC = arm-open2x-linux-gcc CXX = arm-open2x-linux-g++ AS = arm-open2x-linux-as STRIP = arm-open2x-linux-strip -CFLAGS += -msoft-float -mcpu=arm920t -mtune=arm920t -D__GP2X__ -CFLAGS += -I${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -I${HOME}/src/gp2x/armroot/include -CFLAGS += -fno-gcse -funswitch-loops -fweb -ftree-loop-im #-fpredictive-commoning -ftree-loop-distribution -frename-registers +CFLAGS += -I${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -I${HOME}/src/gp2x/armroot/include -D__GP2X__ -Wno-unused-result +CFLAGS += -msoft-float -mcpu=arm920t -mtune=arm920t +CFLAGS += -finline-limit=42 -fipa-cp -fno-ipa-pure-const ASFLAGS += -mcpu=arm920t -mfloat-abi=soft LDFLAGS += --sysroot ${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux -L${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L${HOME}/src/gp2x/armroot/lib -static LDLIBS += -lpng -lm -ldl diff --git a/config.gp2x47 b/config.gp2x47 index 632515ee..7ce3d9a9 100644 --- a/config.gp2x47 +++ b/config.gp2x47 @@ -4,12 +4,12 @@ CC = arm-linux-gnueabi-gcc CXX = arm-linux-gnueabi-g++ AS = arm-linux-gnueabi-as STRIP = arm-linux-gnueabi-strip +CFLAGS += -I${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -I${HOME}/src/gp2x/armroot/include -D__GP2X__ -Wno-unused-result CFLAGS += -mabi=apcs-gnu -mfloat-abi=soft -mfpu=fpa -mcpu=arm920t -mtune=arm920t -CFLAGS += -Wno-unused-result -D__GP2X__ -mno-thumb-interwork -fno-stack-protector -fno-common -CFLAGS += -I${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -I${HOME}/src/gp2x/armroot/include +CFLAGS += -mno-thumb-interwork -fno-stack-protector -fno-common CFLAGS += -finline-limit=42 -fipa-pta -fno-ipa-sra -fno-ipa-pure-const ASFLAGS += -mabi=apcs-gnu -mfloat-abi=soft -mfpu=fpa -mcpu=arm920t -LDFLAGS += -mabi=apcs-gnu -mfpu=fpa -B${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1 -B${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L${HOME}/src/gp2x/armroot/lib -static +LDFLAGS += -mabi=apcs-gnu -mfpu=fpa -B${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1 -B${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/usr/lib -L${HOME}/src/gp2x/armroot/lib -static LDLIBS += -lpng -lm -ldl ARCH = arm diff --git a/config.i386 b/config.i386 index ce07b103..9c8c2e65 100644 --- a/config.i386 +++ b/config.i386 @@ -4,11 +4,11 @@ CC = gcc CXX = g++ AS = as STRIP = strip -CFLAGS += -I/usr/include/SDL -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -m32 # -pg +CFLAGS += -I/usr/include/SDL -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -m32 ASFLAGS += -LDFLAGS += -m32 #-pg -LDLIBS += -L/usr/lib/i386-linux-gnu -L${HOME}/opt/lib32 -lSDL-1.2 -lasound -lpng -lz -lm -ldl +LDFLAGS += -m32 -L/usr/lib/i386-linux-gnu -L${HOME}/opt/lib32 +LDLIBS += -lSDL-1.2 -lasound -lpng -lz -lm -ldl ARCH = i386 PLATFORM = generic -SOUND_DRIVERS = oss alsa sdl +SOUND_DRIVERS = oss alsa sdl diff --git a/config.x86 b/config.x86 index 287b82d3..45440011 100644 --- a/config.x86 +++ b/config.x86 @@ -4,11 +4,11 @@ CC = gcc CXX = g++ AS = as STRIP = strip -CFLAGS += -I/usr/include/SDL -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result # -pg +CFLAGS += -I/usr/include/SDL -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result ASFLAGS += -LDFLAGS += #-pg -LDLIBS += -L/usr/lib/x86_64-linux-gnu -lSDL-1.2 -lasound -lpng -lz -lm -ldl +LDFLAGS += -L/usr/lib/x86_64-linux-gnu +LDLIBS += -lSDL-1.2 -lasound -lpng -lz -lm -ldl ARCH = x86_64 PLATFORM = generic -SOUND_DRIVERS = oss alsa sdl +SOUND_DRIVERS = oss alsa sdl diff --git a/pico/32x/draw_arm.S b/pico/32x/draw_arm.S index e0cdcbe5..f351d8e0 100644 --- a/pico/32x/draw_arm.S +++ b/pico/32x/draw_arm.S @@ -1,6 +1,7 @@ @* @* PicoDrive @* (C) notaz, 2010 +@* (C) kub, 2019 @* @* This work is licensed under the terms of MAME license. @* See COPYING file in the top-level directory. diff --git a/pico/32x/memory.c b/pico/32x/memory.c index 8d5ca725..3e11cbcb 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -1,6 +1,7 @@ /* * PicoDrive * (C) notaz, 2009,2010,2013 + * (C) kub, 2019 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. diff --git a/pico/memory_arm.S b/pico/memory_arm.S index 07d6a128..333780c1 100644 --- a/pico/memory_arm.S +++ b/pico/memory_arm.S @@ -1,6 +1,7 @@ /* * PicoDrive * (C) notaz, 2006-2009 + * (C) kub, 2019 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. diff --git a/tools/mkoffsets.sh b/tools/mkoffsets.sh index 3b4c076d..8f2d888c 100755 --- a/tools/mkoffsets.sh +++ b/tools/mkoffsets.sh @@ -11,7 +11,10 @@ ENDIAN= # compile with target C compiler and extract value from .rodata section compile_rodata () { - $CC $CFLAGS -I .. -shared /tmp/getoffs.c -o /tmp/getoffs.o || exit 1 + # $CC $CFLAGS -I .. -shared /tmp/getoffs.c -o /tmp/getoffs.o || exit 1 + echo 'void dummy(void) { asm(""::"r" (&val)); }' >> /tmp/getoffs.c + $CC $CFLAGS -I .. -nostdlib -Wl,-edummy /tmp/getoffs.c \ + -o /tmp/getoffs.o || exit 1 # find the name of the .rodata section (in case -fdata-sections is used) rosect=$(readelf -S /tmp/getoffs.o | grep '\.rodata' | sed 's/^[^.]*././;s/ .*//') @@ -40,13 +43,13 @@ get_define () # prefix struct member member... name=$(echo $* | sed 's/ /_/g') echo '#include "pico/pico_int.h"' > /tmp/getoffs.c echo "static const struct $struct p;" >> /tmp/getoffs.c - echo "const int offs = (char *)&p.$field - (char*)&p;" >>/tmp/getoffs.c + echo "const int val = (char *)&p.$field - (char*)&p;" >>/tmp/getoffs.c compile_rodata line=$(printf "#define %-20s 0x%04x" $prefix$name $rodata) } # determine endianess -echo "const int one = 1;" >/tmp/getoffs.c +echo "const int val = 1;" >/tmp/getoffs.c compile_rodata ENDIAN=$(if [ "$rodata" -eq 1 ]; then echo be; else echo le; fi) # output header From 0f7a30ede33ee915cc0e7dc86d9d9ee4b22f051c Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 21 Aug 2019 18:27:26 +0200 Subject: [PATCH 0211/1110] configuration changes and README --- Makefile | 4 +- README.md | 112 +++++++++++++++++++++++++++++++ config.dingux | 2 +- config.dingux54 | 2 +- config.gcw0 | 2 +- config.gp2x | 4 +- config.gp2x47 | 3 +- configure | 118 ++++++++++++++++++++------------- cpu/drc/emit_arm.c | 1 + cpu/drc/emit_x86.c | 1 + platform/common/helix/Makefile | 3 +- 11 files changed, 196 insertions(+), 56 deletions(-) create mode 100644 README.md diff --git a/Makefile b/Makefile index 96ccb6ca..c3bd6899 100644 --- a/Makefile +++ b/Makefile @@ -238,8 +238,8 @@ pico/cd/LC89510.o: CFLAGS += -fno-strict-aliasing pico/cd/gfx_cd.o: CFLAGS += -fno-strict-aliasing ifeq (1,$(use_sh2drc)) ifneq (,$(findstring -flto,$(CFLAGS))) -# if using the DRC, memory and sh2soc use a global register variable to avoid -# saving and reloading the SH2 SR. However, this collides with the use of LTO. +# if using the DRC, memory and sh2soc directly use the DRC register for SH2 SR +# to avoid saving and reloading it. However, this collides with the use of LTO. pico/32x/memory.o: CFLAGS += -fno-lto pico/32x/sh2soc.o: CFLAGS += -fno-lto endif diff --git a/README.md b/README.md new file mode 100644 index 00000000..d0d7259f --- /dev/null +++ b/README.md @@ -0,0 +1,112 @@ +This is my foray into dynamic recompilation using PicoDrive, a +Megadrive / Genesis / Sega CD / Mega CD / 32X / SMS emulator. + +I added support for MIPS (mips32r1) and ARM64 (aarch64) to the recompiler, as +well as spent much effort to optimize the code generated by the DRC. +I also optimized SH2 memory access inside the emulator, and did some work on +M68K/SH2 CPU synchronization to fix some problems and speed up the emulator. + +It got a bit out of hand. I ended up doing fixes and optimzations all over the +place, mainly for 32X and CD, 32X graphics handling, and probably some more, +see the commit history. + +### compiling + +I mainly worked with standalone PicoDrive versions as created by configure/make. +A list of platforms for which this is possible can be obtained with + +> configure --help + +If you want to build an executable for a unixoid platform not listed in the +platform list, just use + +> configure --platform=generic + +If DRC is available for the platform, it should be enabled automatically. + +For other platforms using a cross-compiling toolchain I used this, +assuming $TC points to the appropriate cross compile toolchain directory: + +platform|toolchain|configure command +--------|---------|----------------- +gp2x,wiz,caanoo|open2x|CROSS_COMPILE=arm-open2x-linux- CFLAGS="-I$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -fno-stack-protector -fno-common -finline-limit=42" LDFLAGS="--sysroot $TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux -L$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib" ./configure --platform=gp2x +gp2x,wiz,caanoo|open2x with ubuntu arm gcc 4.7|CROSS_COMPILE=arm-linux-gnueabi- CFLAGS="-I$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -fno-stack-protector -fno-common -finline-limit=42 -fipa-pta" LDFLAGS="-B$TC/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1 -B$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib" ./configure --platform=gp2x +opendingux|opendingux|CROSS_COMPILE=mipsel-linux- CFLAGS="-I$TC/usr/include -I$TC/usr/include/SDL -fno-stack-protector -fno-common -finline-limit=42 -fipa-pta" LDFLAGS="--sysroot $TC -L$TC/lib" ./configure --platform=opendingux +opendingux|opendingux with ubuntu mips gcc 5.4|CROSS_COMPILE=mipsel-linux-gnu- CFLAGS="-I$TC/usr/include -I$TC/usr/include/SDL -fno-stack-protector -fno-common -finline-limit=42 -fipa-pta" LDFLAGS="-B$TC/usr/lib -B$TC/lib -Wl,-rpath-link=$TC/usr/lib -Wl,-rpath-link=$TC/lib" ./configure --platform=opendingux +gcw0|gcw0|CROSS_COMPILE=mipsel-gcw0-linux-uclibc- CFLAGS="-I$TC/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include -I$TC/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include/SDL -fno-stack-protector -fno-common -finline-limit=42 -fipa-pta" LDFLAGS="--sysroot $TC/usr/mipsel-gcw0-linux-uclibc/sysroot" ./configure --platform=gcw0 + +For gp2x, wiz, and caanoo you may need to compile libpng first, and additionally +this patch may need to be applied to the cpu/cyclone submodule: +> diff --git a/OpArith.cpp b/OpArith.cpp +> index 96c7e0d..09517b8 100644 +> --- a/OpArith.cpp +> +++ b/OpArith.cpp +> @@ -425,7 +425,7 @@ int OpAbcd(int op) +> ot(" add r1,r1,r0\n"); +> ot(" add r1,r1,r6\n"); +> ot(" mov r12,r1\n"); +> - ot(" addhi r12,#6 ;@ Decimal adjust units\n"); +> + ot(" addhi r12,r12,#6 ;@ Decimal adjust units\n"); +> ot(" tst r1,#0x80\n"); +> ot(" orreq r10,r10,#0x10000000 ;@ Undefined V behavior\n"); +> ot(" cmp r12,#0x9f\n"); +> @@ -452,7 +452,7 @@ int OpAbcd(int op) +> ot(" cmp r1,r12\n"); +> ot(" orrlt r10,r10,#0x20000000 ;@ C\n"); +> ot(" cmp r1,#0xff\n"); +> - ot(" addhi r1,#0xa0\n"); +> + ot(" addhi r1,r1,#0xa0\n"); +> ot(" sub r12,r1,r12\n"); +> ot(" movs r0,r12,lsl #24\n"); +> ot(" bicmi r10,r10,#0x10000000 ;@ Undefined V behavior part II\n"); +> diff --git a/OpLogic.cpp b/OpLogic.cpp +> index 012e35a..d40d814 100644 +> --- a/OpLogic.cpp +> +++ b/OpLogic.cpp +> @@ -74,12 +74,12 @@ const char *TestCond(int m68k_cc, int invert) +> break; +> case 0x0e: // gt +> ot(" eor r0,r10,r10,lsl #3 ;@ gt: !Z && N == V\n"); +> - ot(" orrs r0,r10,lsl #1\n"); +> + ot(" orrs r0,r0,r10,lsl #1\n"); +> cond="pl", icond="mi"; +> break; +> case 0x0f: // le +> ot(" eor r0,r10,r10,lsl #3 ;@ le: Z || N != V\n"); +> - ot(" orrs r0,r10,lsl #1\n"); +> + ot(" orrs r0,r0,r10,lsl #1\n"); +> cond="mi", icond="pl"; +> break; +> default: + +After configure, compile with + +> make opk # for opendingux and gcw0 +> +> make # for anything else + +### helix MP3 decoder + +For 32 bit ARM platforms, there is the possibility to compile the helix MP3 +decoder into a shared library to be able to use MP3 audio files with CD games. +The helix source files aren't supplied because of licensing issues. However, if +you have obtained the sources, put them into the platform/common/helix +directory, set CROSS to your cross compiler prefix (e.g. arm-linux-gnueabi-) +and LIBGCC to your cross compiler's libgcc.a +(e.g. /usr/lib/gcc-cross/arm-linux-gnueabi/4.7/libgcc.a), and compile with + +> make -C platform/common/helix CROSS=$CROSS LIBGCC=$LIBGCC + +Copy the resulting ${CROSS}helix_mp3.so as libhelix.so to the directory where +the PicoDrive binary is. + +### installing + +You need to install the resulting binary onto your device manually. +For opendingux and gcw0, copy the opk to your SD card. +For gp2x, wiz and caanoo, the easiest way is to unpack +[PicoDrive_191.zip](http://notaz.gp2x.de/releases/PicoDrive/PicoDrive_191.zip) +on you SD card and replace the PicoDrive binary. + +Send bug reports, fixes etc to +Kai-Uwe Bloem diff --git a/config.dingux b/config.dingux index d1ec7fe5..b981bd3f 100644 --- a/config.dingux +++ b/config.dingux @@ -1,5 +1,5 @@ # Automatically generated by configure -# Configured with: './configure' '--platform=generic' +# Configured with: './configure' '--platform=opendingux' CC = mipsel-linux-gcc CXX = mipsel-linux-g++ AS = mipsel-linux-as diff --git a/config.dingux54 b/config.dingux54 index 423cbd17..a232d952 100644 --- a/config.dingux54 +++ b/config.dingux54 @@ -1,5 +1,5 @@ # Automatically generated by configure -# Configured with: './configure' '--platform=generic' +# Configured with: './configure' '--platform=opendingux' CC = mipsel-linux-gnu-gcc CXX = mipsel-linux-gnu-g++ AS = mipsel-linux-gnu-as diff --git a/config.gcw0 b/config.gcw0 index 78f7c3a4..cebe79a1 100644 --- a/config.gcw0 +++ b/config.gcw0 @@ -1,5 +1,5 @@ # Automatically generated by configure -# Configured with: './configure' '--platform=generic' +# Configured with: './configure' '--platform=gcw0' CC = mipsel-gcw0-linux-uclibc-gcc CXX = mipsel-gcw0-linux-uclibc-g++ AS = mipsel-gcw0-linux-uclibc-as diff --git a/config.gp2x b/config.gp2x index 84d2f93d..cf99bd77 100644 --- a/config.gp2x +++ b/config.gp2x @@ -4,8 +4,8 @@ CC = arm-open2x-linux-gcc CXX = arm-open2x-linux-g++ AS = arm-open2x-linux-as STRIP = arm-open2x-linux-strip -CFLAGS += -I${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -I${HOME}/src/gp2x/armroot/include -D__GP2X__ -Wno-unused-result -CFLAGS += -msoft-float -mcpu=arm920t -mtune=arm920t +CFLAGS += -I${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -I${HOME}/src/gp2x/armroot/include -D__GP2X__ +CFLAGS += -mfloat-abi=soft -mcpu=arm920t -mtune=arm920t -mno-thumb-interwork -fno-stack-protector -fno-common CFLAGS += -finline-limit=42 -fipa-cp -fno-ipa-pure-const ASFLAGS += -mcpu=arm920t -mfloat-abi=soft LDFLAGS += --sysroot ${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux -L${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L${HOME}/src/gp2x/armroot/lib -static diff --git a/config.gp2x47 b/config.gp2x47 index 7ce3d9a9..8a86e850 100644 --- a/config.gp2x47 +++ b/config.gp2x47 @@ -5,8 +5,7 @@ CXX = arm-linux-gnueabi-g++ AS = arm-linux-gnueabi-as STRIP = arm-linux-gnueabi-strip CFLAGS += -I${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -I${HOME}/src/gp2x/armroot/include -D__GP2X__ -Wno-unused-result -CFLAGS += -mabi=apcs-gnu -mfloat-abi=soft -mfpu=fpa -mcpu=arm920t -mtune=arm920t -CFLAGS += -mno-thumb-interwork -fno-stack-protector -fno-common +CFLAGS += -mabi=apcs-gnu -mfloat-abi=soft -mfpu=fpa -mcpu=arm920t -mtune=arm920t -mno-thumb-interwork -fno-stack-protector -fno-common CFLAGS += -finline-limit=42 -fipa-pta -fno-ipa-sra -fno-ipa-pure-const ASFLAGS += -mabi=apcs-gnu -mfloat-abi=soft -mfpu=fpa -mcpu=arm920t LDFLAGS += -mabi=apcs-gnu -mfpu=fpa -B${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1 -B${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/usr/lib -L${HOME}/src/gp2x/armroot/lib -static diff --git a/configure b/configure index 1310ab2c..c82fe205 100755 --- a/configure +++ b/configure @@ -22,6 +22,13 @@ compile_binary() $c >> config.log 2>&1 } +check_option() +{ + echo 'void test(void) { }' >$TMPC + compile_object $1 || return 1 + return 0 +} + check_define() { $CC -E -dD $CFLAGS pico/arm_features.h | grep -q $1 || return 1 @@ -31,17 +38,18 @@ check_define() # setting options to "yes" or "no" will make that choice default, # "" means "autodetect". -platform_list="generic pandora gp2x opendingux rpi1 rpi2" +platform_list="generic pandora gp2x wiz caanoo opendingux gcw0 rpi1 rpi2" platform="generic" sound_driver_list="oss alsa sdl" sound_drivers="" have_armv5="" have_armv6="" have_armv7="" +have_arm_oabi="" have_arm_neon="" have_libavcodec="" need_sdl="no" -need_xlib="no" +need_zlib="no" # these are for known platforms optimize_cortexa8="no" optimize_cortexa7="no" @@ -54,7 +62,7 @@ CC="${CC-${CROSS_COMPILE}gcc}" CXX="${CXX-${CROSS_COMPILE}g++}" AS="${AS-${CROSS_COMPILE}as}" STRIP="${STRIP-${CROSS_COMPILE}strip}" -test -n "$SDL_CONFIG" || SDL_CONFIG="`$CC --print-sysroot 2> /dev/null || true`/usr/bin/sdl-config" +test -n "$SDL_CONFIG" || SDL_CONFIG="`$CC $CFLAGS $LDFLAGS --print-sysroot 2> /dev/null || true`/usr/bin/sdl-config" MAIN_LDLIBS="$LDLIBS -lm" config_mak="config.mak" @@ -78,23 +86,27 @@ set_platform() ;; generic) ;; - opendingux) + opendingux | gcw0) sound_drivers="sdl" + # both are really an opendingux + platform="opendingux" ;; pandora) sound_drivers="oss alsa" optimize_cortexa8="yes" have_arm_neon="yes" ;; - gp2x) + gp2x | wiz | caanoo) sound_drivers="oss" optimize_arm920="yes" + # compile for OABI if toolchain provides it (faster code on caanoo) + have_arm_oabi="yes" + # always use static linking, since caanoo doesn't have OABI libs. Moreover, + # dynamic linking slows Wiz 1-10%, and libm on F100 isn't compatible + LDFLAGS="$LDFLAGS -static" + # unified binary for all of them CFLAGS="$CFLAGS -D__GP2X__" - if [ "$CROSS_COMPILE" = "arm-linux-" ]; then - # still using static, dynamic linking slows Wiz 1-10% - # also libm on F100 is not compatible - MAIN_LDLIBS="$MAIN_LDLIBS -static" - fi + platform="gp2x" ;; *) fail "unsupported platform: $platform" @@ -147,18 +159,11 @@ fi # fi #fi -# basic compiler test -cat > $TMPC <> $config_mak +# echo ' $(CC) $(CFLAGS) -E -c $^ -o /tmp/$(notdir $@).s' >> $config_mak +# echo ' $(AS) $(ASFLAGS) /tmp/$(notdir $@).s -o $@' >> $config_mak +#fi # use pandora's skin (for now) test -e skin || ln -s platform/pandora/skin skin diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index a4aa2ec6..1d70866c 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -1,6 +1,7 @@ /* * Basic macros to emit ARM instructions and some utils * Copyright (C) 2008,2009,2010 notaz + * Copyright (C) 2019 kub * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index 2177541c..62288ff5 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -1,6 +1,7 @@ /* * Basic macros to emit x86 instructions and some utils * Copyright (C) 2008,2009,2010 notaz + * Copyright (C) 2019 kuv * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. diff --git a/platform/common/helix/Makefile b/platform/common/helix/Makefile index 0021ea8e..9fa4c1cc 100644 --- a/platform/common/helix/Makefile +++ b/platform/common/helix/Makefile @@ -4,6 +4,7 @@ CC = $(CROSS)gcc AS = $(CROSS)as AR = $(CROSS)ar TOOLCHAIN = $(notdir $(CROSS)) +LIBGCC ?= ${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1/libgcc.a CFLAGS += -Ipub -O2 -Wall -fstrict-aliasing -ffast-math ifneq ($(findstring arm-,$(TOOLCHAIN)),) @@ -34,7 +35,7 @@ real/arm/asmpoly_gcc.o: real/arm/asmpoly_gcc.s $(LIB) : $(OBJS) $(AR) r $@ $^ -$(SHLIB) : $(OBJS) /home/build/opt/open2x/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1/libgcc.a +$(SHLIB) : $(OBJS) $(LIBGCC) $(CC) -o $@ -nostdlib -shared $(CFLAGS) $^ clean: From e5274cc92d858a87f667df5d9b12af1746e423e8 Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 21 Aug 2019 18:43:28 +0200 Subject: [PATCH 0212/1110] pff... README, 2nd try --- README.md | 45 +++------------------------------------------ cyclone_gp2x.patch | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 42 deletions(-) create mode 100644 cyclone_gp2x.patch diff --git a/README.md b/README.md index d0d7259f..13ff1598 100644 --- a/README.md +++ b/README.md @@ -36,48 +36,9 @@ opendingux|opendingux with ubuntu mips gcc 5.4|CROSS_COMPILE=mipsel-linux-gnu- C gcw0|gcw0|CROSS_COMPILE=mipsel-gcw0-linux-uclibc- CFLAGS="-I$TC/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include -I$TC/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include/SDL -fno-stack-protector -fno-common -finline-limit=42 -fipa-pta" LDFLAGS="--sysroot $TC/usr/mipsel-gcw0-linux-uclibc/sysroot" ./configure --platform=gcw0 For gp2x, wiz, and caanoo you may need to compile libpng first, and additionally -this patch may need to be applied to the cpu/cyclone submodule: -> diff --git a/OpArith.cpp b/OpArith.cpp -> index 96c7e0d..09517b8 100644 -> --- a/OpArith.cpp -> +++ b/OpArith.cpp -> @@ -425,7 +425,7 @@ int OpAbcd(int op) -> ot(" add r1,r1,r0\n"); -> ot(" add r1,r1,r6\n"); -> ot(" mov r12,r1\n"); -> - ot(" addhi r12,#6 ;@ Decimal adjust units\n"); -> + ot(" addhi r12,r12,#6 ;@ Decimal adjust units\n"); -> ot(" tst r1,#0x80\n"); -> ot(" orreq r10,r10,#0x10000000 ;@ Undefined V behavior\n"); -> ot(" cmp r12,#0x9f\n"); -> @@ -452,7 +452,7 @@ int OpAbcd(int op) -> ot(" cmp r1,r12\n"); -> ot(" orrlt r10,r10,#0x20000000 ;@ C\n"); -> ot(" cmp r1,#0xff\n"); -> - ot(" addhi r1,#0xa0\n"); -> + ot(" addhi r1,r1,#0xa0\n"); -> ot(" sub r12,r1,r12\n"); -> ot(" movs r0,r12,lsl #24\n"); -> ot(" bicmi r10,r10,#0x10000000 ;@ Undefined V behavior part II\n"); -> diff --git a/OpLogic.cpp b/OpLogic.cpp -> index 012e35a..d40d814 100644 -> --- a/OpLogic.cpp -> +++ b/OpLogic.cpp -> @@ -74,12 +74,12 @@ const char *TestCond(int m68k_cc, int invert) -> break; -> case 0x0e: // gt -> ot(" eor r0,r10,r10,lsl #3 ;@ gt: !Z && N == V\n"); -> - ot(" orrs r0,r10,lsl #1\n"); -> + ot(" orrs r0,r0,r10,lsl #1\n"); -> cond="pl", icond="mi"; -> break; -> case 0x0f: // le -> ot(" eor r0,r10,r10,lsl #3 ;@ le: Z || N != V\n"); -> - ot(" orrs r0,r10,lsl #1\n"); -> + ot(" orrs r0,r0,r10,lsl #1\n"); -> cond="mi", icond="pl"; -> break; -> default: +cyclone_gp2x.patch may need to be applied to the cpu/cyclone submodule: + +> patch -d cpu/cyclone -p1 Date: Thu, 22 Aug 2019 22:57:42 +0200 Subject: [PATCH 0213/1110] bug fix in comm poll fifo, and back to -O3 --- Makefile | 2 +- README.md | 12 ++++++------ pico/32x/memory.c | 30 ++++++++++++++++++++++-------- 3 files changed, 29 insertions(+), 15 deletions(-) diff --git a/Makefile b/Makefile index c3bd6899..a3b2c96b 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ DEBUG ?= 0 CFLAGS += -Wall -ggdb -ffunction-sections -fdata-sections CFLAGS += -I. ifeq "$(DEBUG)" "0" -CFLAGS += -O2 -finline-functions -DNDEBUG +CFLAGS += -O3 -DNDEBUG endif # This is actually needed, bevieve me. diff --git a/README.md b/README.md index 13ff1598..aa0466d1 100644 --- a/README.md +++ b/README.md @@ -29,11 +29,11 @@ assuming $TC points to the appropriate cross compile toolchain directory: platform|toolchain|configure command --------|---------|----------------- -gp2x,wiz,caanoo|open2x|CROSS_COMPILE=arm-open2x-linux- CFLAGS="-I$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -fno-stack-protector -fno-common -finline-limit=42" LDFLAGS="--sysroot $TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux -L$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib" ./configure --platform=gp2x -gp2x,wiz,caanoo|open2x with ubuntu arm gcc 4.7|CROSS_COMPILE=arm-linux-gnueabi- CFLAGS="-I$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -fno-stack-protector -fno-common -finline-limit=42 -fipa-pta" LDFLAGS="-B$TC/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1 -B$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib" ./configure --platform=gp2x -opendingux|opendingux|CROSS_COMPILE=mipsel-linux- CFLAGS="-I$TC/usr/include -I$TC/usr/include/SDL -fno-stack-protector -fno-common -finline-limit=42 -fipa-pta" LDFLAGS="--sysroot $TC -L$TC/lib" ./configure --platform=opendingux -opendingux|opendingux with ubuntu mips gcc 5.4|CROSS_COMPILE=mipsel-linux-gnu- CFLAGS="-I$TC/usr/include -I$TC/usr/include/SDL -fno-stack-protector -fno-common -finline-limit=42 -fipa-pta" LDFLAGS="-B$TC/usr/lib -B$TC/lib -Wl,-rpath-link=$TC/usr/lib -Wl,-rpath-link=$TC/lib" ./configure --platform=opendingux -gcw0|gcw0|CROSS_COMPILE=mipsel-gcw0-linux-uclibc- CFLAGS="-I$TC/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include -I$TC/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include/SDL -fno-stack-protector -fno-common -finline-limit=42 -fipa-pta" LDFLAGS="--sysroot $TC/usr/mipsel-gcw0-linux-uclibc/sysroot" ./configure --platform=gcw0 +gp2x,wiz,caanoo|open2x|CROSS_COMPILE=arm-open2x-linux- CFLAGS="-I$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -finline-limit=42 -fno-unroll-loops -fno-stack-protector -fno-common" LDFLAGS="--sysroot $TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux -L$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib" ./configure --platform=gp2x +gp2x,wiz,caanoo|open2x with ubuntu arm gcc 4.7|CROSS_COMPILE=arm-linux-gnueabi- CFLAGS="-I$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -finline-limit=42 -fno-unroll-loops -fno-stack-protector -fno-common" LDFLAGS="-B$TC/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1 -B$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib" ./configure --platform=gp2x +opendingux|opendingux|CROSS_COMPILE=mipsel-linux- CFLAGS="-I$TC/usr/include -I$TC/usr/include/SDL" LDFLAGS="--sysroot $TC -L$TC/lib" ./configure --platform=opendingux +opendingux|opendingux with ubuntu mips gcc 5.4|CROSS_COMPILE=mipsel-linux-gnu- CFLAGS="-I$TC/usr/include -I$TC/usr/include/SDL" LDFLAGS="-B$TC/usr/lib -B$TC/lib -Wl,-rpath-link=$TC/usr/lib -Wl,-rpath-link=$TC/lib" ./configure --platform=opendingux +gcw0|gcw0|CROSS_COMPILE=mipsel-gcw0-linux-uclibc- CFLAGS="-I$TC/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include -I$TC/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include/SDL" LDFLAGS="--sysroot $TC/usr/mipsel-gcw0-linux-uclibc/sysroot" ./configure --platform=gcw0 For gp2x, wiz, and caanoo you may need to compile libpng first, and additionally cyclone_gp2x.patch may need to be applied to the cpu/cyclone submodule: @@ -67,7 +67,7 @@ You need to install the resulting binary onto your device manually. For opendingux and gcw0, copy the opk to your SD card. For gp2x, wiz and caanoo, the easiest way is to unpack [PicoDrive_191.zip](http://notaz.gp2x.de/releases/PicoDrive/PicoDrive_191.zip) -on you SD card and replace the PicoDrive binary. +on your SD card and replace the PicoDrive binary. Send bug reports, fixes etc to Kai-Uwe Bloem diff --git a/pico/32x/memory.c b/pico/32x/memory.c index 3e11cbcb..7f494e7a 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -191,7 +191,7 @@ static NOINLINE u32 sh2_poll_read(u32 a, u32 d, unsigned int cycles, SH2* sh2) int hix = (a >> 1) % PFIFO_CNT; struct sh2_poll_fifo *fifo = sh2_poll_fifo[hix]; struct sh2_poll_fifo *p; - int cpu = sh2 ? sh2->is_slave+1 : 0; + int cpu = sh2 ? sh2->is_slave : -1; unsigned idx; a &= ~0x20000000; // ignore writethrough bit @@ -204,7 +204,7 @@ static NOINLINE u32 sh2_poll_read(u32 a, u32 d, unsigned int cycles, SH2* sh2) if (cpu != p->cpu) { if (CYCLES_GT(cycles, p->cycles+80)) { // drop older fifo stores that may cause synchronisation problems. - sh2_poll_rd[hix] = idx; + p->a = -1; } else if (p->a == a) { // replace current data with fifo value and discard fifo entry d = p->d; @@ -221,24 +221,37 @@ static NOINLINE void sh2_poll_write(u32 a, u32 d, unsigned int cycles, SH2 *sh2) int hix = (a >> 1) % PFIFO_CNT; struct sh2_poll_fifo *fifo = sh2_poll_fifo[hix]; struct sh2_poll_fifo *q = &fifo[(sh2_poll_wr[hix]-1) % PFIFO_SZ]; - int cpu = sh2 ? sh2->is_slave+1 : 0; + int cpu = sh2 ? sh2->is_slave : -1; + unsigned rd = sh2_poll_rd[hix], wr = sh2_poll_wr[hix]; + unsigned idx, nrd; a &= ~0x20000000; // ignore writethrough bit + + // throw out any values written by other cpus, plus heading cancelled stuff + for (idx = nrd = wr; idx != rd; ) { + idx = (idx-1) % PFIFO_SZ; + if (fifo[idx].a == a && fifo[idx].cpu != cpu) { fifo[idx].a = -1; } + if (fifo[idx].a != -1) { nrd = idx; } + } + rd = nrd; + // fold 2 consecutive writes to the same address to avoid reading of // intermediate values that may cause synchronisation problems. // NB this can take an eternity on m68k: mov.b , needs // 28 m68k-cycles (~80 sh2-cycles) to complete (observed in Metal Head) - if (q->a == a && sh2_poll_wr[hix] != sh2_poll_rd[hix] && !CYCLES_GT(cycles,q->cycles+30)) { + if (q->a == a && rd != wr && !CYCLES_GT(cycles,q->cycles+30)) { q->d = d; } else { // store write to poll address in fifo - fifo[sh2_poll_wr[hix]] = + fifo[wr] = (struct sh2_poll_fifo){ .cycles = cycles, .a = a, .d = d, .cpu = cpu }; - sh2_poll_wr[hix] = (sh2_poll_wr[hix]+1) % PFIFO_SZ; - if (sh2_poll_wr[hix] == sh2_poll_rd[hix]) + wr = (wr+1) % PFIFO_SZ; + if (wr == rd) // fifo overflow, discard oldest value - sh2_poll_rd[hix] = (sh2_poll_rd[hix]+1) % PFIFO_SZ; + rd = (rd+1) % PFIFO_SZ; } + + sh2_poll_rd[hix] = rd; sh2_poll_wr[hix] = wr; } u32 REGPARM(3) p32x_sh2_poll_memory8(unsigned int a, u32 d, SH2 *sh2) @@ -2367,6 +2380,7 @@ void PicoMemSetup32x(void) sh2_drc_mem_setup(&ssh2); memset(sh2_poll_rd, 0, sizeof(sh2_poll_rd)); memset(sh2_poll_wr, 0, sizeof(sh2_poll_wr)); + memset(sh2_poll_fifo, -1, sizeof(sh2_poll_fifo)); // z80 hack z80_map_set(z80_write_map, 0x8000, 0xffff, z80_md_bank_write_32x, 1); From f740428b81340b35b0e12756e4876f8aac811478 Mon Sep 17 00:00:00 2001 From: kub Date: Sun, 25 Aug 2019 17:33:13 +0200 Subject: [PATCH 0214/1110] some drawing code C optimisations --- Makefile | 4 ++++ README.md | 4 ++-- pico/32x/draw.c | 41 ++++++++++++++++++++++++-------------- pico/draw.c | 10 ++++++++-- platform/common/plat_sdl.c | 35 +++++++++++++------------------- 5 files changed, 54 insertions(+), 40 deletions(-) diff --git a/Makefile b/Makefile index a3b2c96b..88b9238f 100644 --- a/Makefile +++ b/Makefile @@ -5,6 +5,10 @@ CFLAGS += -I. ifeq "$(DEBUG)" "0" CFLAGS += -O3 -DNDEBUG endif +ifeq ("$(PLATFORM)",$(filter "$(PLATFORM)","gp2x" "opendingux" "rpi1")) +# very small caches, avoid optimization options making the binary much bigger +CFLAGS += -finline-limit=42 -fno-unroll-loops -fno-ipa-cp-clone # -fno-ipa-cp +endif # This is actually needed, bevieve me. # If you really have to disable this, set NO_ALIGN_FUNCTIONS elsewhere. diff --git a/README.md b/README.md index aa0466d1..d7798231 100644 --- a/README.md +++ b/README.md @@ -29,8 +29,8 @@ assuming $TC points to the appropriate cross compile toolchain directory: platform|toolchain|configure command --------|---------|----------------- -gp2x,wiz,caanoo|open2x|CROSS_COMPILE=arm-open2x-linux- CFLAGS="-I$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -finline-limit=42 -fno-unroll-loops -fno-stack-protector -fno-common" LDFLAGS="--sysroot $TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux -L$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib" ./configure --platform=gp2x -gp2x,wiz,caanoo|open2x with ubuntu arm gcc 4.7|CROSS_COMPILE=arm-linux-gnueabi- CFLAGS="-I$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -finline-limit=42 -fno-unroll-loops -fno-stack-protector -fno-common" LDFLAGS="-B$TC/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1 -B$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib" ./configure --platform=gp2x +gp2x,wiz,caanoo|open2x|CROSS_COMPILE=arm-open2x-linux- CFLAGS="-I$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -fno-stack-protector -fno-common" LDFLAGS="--sysroot $TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux -L$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib" ./configure --platform=gp2x +gp2x,wiz,caanoo|open2x with ubuntu arm gcc 4.7|CROSS_COMPILE=arm-linux-gnueabi- CFLAGS="-I$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -fno-stack-protector -fno-common" LDFLAGS="-B$TC/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1 -B$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib" ./configure --platform=gp2x opendingux|opendingux|CROSS_COMPILE=mipsel-linux- CFLAGS="-I$TC/usr/include -I$TC/usr/include/SDL" LDFLAGS="--sysroot $TC -L$TC/lib" ./configure --platform=opendingux opendingux|opendingux with ubuntu mips gcc 5.4|CROSS_COMPILE=mipsel-linux-gnu- CFLAGS="-I$TC/usr/include -I$TC/usr/include/SDL" LDFLAGS="-B$TC/usr/lib -B$TC/lib -Wl,-rpath-link=$TC/usr/lib -Wl,-rpath-link=$TC/lib" ./configure --platform=opendingux gcw0|gcw0|CROSS_COMPILE=mipsel-gcw0-linux-uclibc- CFLAGS="-I$TC/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include -I$TC/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include/SDL" LDFLAGS="--sysroot $TC/usr/mipsel-gcw0-linux-uclibc/sysroot" ./configure --platform=gcw0 diff --git a/pico/32x/draw.c b/pico/32x/draw.c index 372f27ef..4119f09d 100644 --- a/pico/32x/draw.c +++ b/pico/32x/draw.c @@ -42,16 +42,21 @@ static void convert_pal555(int invert_prio) const unsigned int m1 = 0x001f; \ const unsigned int m2 = 0x03e0; \ const unsigned int m3 = 0x7c00; \ - int i; \ + unsigned short t; \ + int i = 320; \ \ - for (i = 320; i > 0; i--, pd++, p32x++, pmd++) { \ - unsigned short t = *p32x; \ - if ((*pmd & 0x3f) != mdbg && !((t ^ inv) & 0x8000)) { \ - pmd_draw_code; \ - continue; \ + while (i > 0) { \ + for (; i > 0 && (*pmd & 0x3f) == mdbg; pd++, pmd++, i--) { \ + t = *p32x++; \ + *pd = ((t&m1) << 11) | ((t&m2) << 1) | ((t&m3) >> 10); \ + } \ + for (; i > 0 && (*pmd & 0x3f) != mdbg; pd++, pmd++, i--) { \ + t = *p32x++; \ + if ((t ^ inv) & 0x8000) \ + *pd = ((t&m1) << 11) | ((t&m2) << 1) | ((t&m3) >> 10); \ + else \ + pmd_draw_code; \ } \ - \ - *pd = ((t & m1) << 11) | ((t & m2) << 1) | ((t & m3) >> 10); \ } \ } @@ -59,15 +64,21 @@ static void convert_pal555(int invert_prio) #define do_line_pp(pd, p32x, pmd, pmd_draw_code) \ { \ unsigned short t; \ - int i; \ - for (i = 320; i > 0; i--, pd++, p32x++, pmd++) { \ - t = pal[*(unsigned char *)((uintptr_t)p32x ^ 1)]; \ - if ((t & 0x20) || (*pmd & 0x3f) == mdbg) \ + int i = 320; \ + while (i > 0) { \ + for (; i > 0 && (*pmd & 0x3f) == mdbg; pd++, pmd++, i--) { \ + t = pal[*(unsigned char *)((uintptr_t)(p32x++) ^ 1)]; \ *pd = t; \ - else \ - pmd_draw_code; \ + } \ + for (; i > 0 && (*pmd & 0x3f) != mdbg; pd++, pmd++, i--) { \ + t = pal[*(unsigned char *)((uintptr_t)(p32x++) ^ 1)]; \ + if (t & 0x20) \ + *pd = t; \ + else \ + pmd_draw_code; \ + } \ } \ -} +} // run length mode #define do_line_rl(pd, p32x, pmd, pmd_draw_code) \ diff --git a/pico/draw.c b/pico/draw.c index 4834d6bf..0bf7c3de 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -1341,8 +1341,14 @@ void FinalizeLine555(int sh, int line, struct PicoEState *est) #if 1 int i; - for (i = 0; i < len; i++) - pd[i] = pal[ps[i]]; + for (i = len; i > 0; i-=4) { + *pd++ = pal[*ps++]; + *pd++ = pal[*ps++]; + *pd++ = pal[*ps++]; + *pd++ = pal[*ps++]; + } +// for (i = 0; i < len; i++) +// pd[i] = pal[ps[i]]; #else extern void amips_clut(unsigned short *dst, unsigned char *src, unsigned short *pal, int count); extern void amips_clut_6bit(unsigned short *dst, unsigned char *src, unsigned short *pal, int count); diff --git a/platform/common/plat_sdl.c b/platform/common/plat_sdl.c index ef99af2a..bce4b084 100644 --- a/platform/common/plat_sdl.c +++ b/platform/common/plat_sdl.c @@ -89,7 +89,8 @@ static const struct in_pdata in_sdl_platform_data = { /* YUV stuff */ static int yuv_ry[32], yuv_gy[32], yuv_by[32]; static unsigned char yuv_u[32 * 2], yuv_v[32 * 2]; -static int yuv_y[256]; +static unsigned char yuv_y[256]; +static struct uyvy { unsigned int y:8; unsigned int vyu:24; } yuv_uyvy[65536]; void bgr_to_uyvy_init(void) { @@ -124,34 +125,26 @@ void bgr_to_uyvy_init(void) for (i = 0; i < 256; i++) { yuv_y[i] = 16 + 219 * i / 32; } + // everything combined into one large array for speed + for (i = 0; i < 65536; i++) { + int r = (i >> 11) & 0x1f, g = (i >> 6) & 0x1f, b = (i >> 0) & 0x1f; + int y = (yuv_ry[r] + yuv_gy[g] + yuv_by[b]) >> 16; + yuv_uyvy[i].y = yuv_y[y]; + yuv_uyvy[i].vyu = (yuv_v[r-y + 32] << 16) | (yuv_y[y] << 8) | yuv_u[b-y + 32]; + } } void rgb565_to_uyvy(void *d, const void *s, int pixels) { unsigned int *dst = d; const unsigned short *src = s; - const unsigned char *yu = yuv_u + 32; - const unsigned char *yv = yuv_v + 32; - int r0, g0, b0, r1, g1, b1; - int y0, y1, u, v; - for (; pixels > 0; src += 2, dst++, pixels -= 2) + for (; pixels > 0; src += 4, dst += 2, pixels -= 4) { - r0 = (src[0] >> 11) & 0x1f; - g0 = (src[0] >> 6) & 0x1f; - b0 = src[0] & 0x1f; - r1 = (src[1] >> 11) & 0x1f; - g1 = (src[1] >> 6) & 0x1f; - b1 = src[1] & 0x1f; - y0 = (yuv_ry[r0] + yuv_gy[g0] + yuv_by[b0]) >> 16; - y1 = (yuv_ry[r1] + yuv_gy[g1] + yuv_by[b1]) >> 16; - u = yu[b0 - y0]; - v = yv[r0 - y0]; - // valid Y range seems to be 16..235 - y0 = yuv_y[y0]; - y1 = yuv_y[y1]; - - *dst = (y1 << 24) | (v << 16) | (y0 << 8) | u; + struct uyvy *uyvy0 = yuv_uyvy + src[0], *uyvy1 = yuv_uyvy + src[1]; + struct uyvy *uyvy2 = yuv_uyvy + src[2], *uyvy3 = yuv_uyvy + src[3]; + dst[0] = (uyvy1->y << 24) | uyvy0->vyu; + dst[1] = (uyvy3->y << 24) | uyvy2->vyu; } } From fe344bd3d8ab717452ae54eea8b2fdfc91e79fda Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 31 Aug 2019 17:37:18 +0200 Subject: [PATCH 0215/1110] cleanup and microoptimizations in SH2 hw handling --- Makefile | 9 +- README.md | 4 +- cpu/drc/emit_arm64.c | 3 +- cpu/drc/emit_mips.c | 4 +- cpu/drc/emit_x86.c | 2 +- pico/32x/32x.c | 2 +- pico/32x/memory.c | 309 +++++++++++++++++++++++++----------------- pico/32x/memory_arm.S | 2 + pico/32x/pwm.c | 184 +++++++++++++------------ pico/32x/sh2soc.c | 32 +++-- pico/draw.c | 2 - pico/pico_int.h | 2 +- 12 files changed, 308 insertions(+), 247 deletions(-) diff --git a/Makefile b/Makefile index 88b9238f..47463d51 100644 --- a/Makefile +++ b/Makefile @@ -5,10 +5,6 @@ CFLAGS += -I. ifeq "$(DEBUG)" "0" CFLAGS += -O3 -DNDEBUG endif -ifeq ("$(PLATFORM)",$(filter "$(PLATFORM)","gp2x" "opendingux" "rpi1")) -# very small caches, avoid optimization options making the binary much bigger -CFLAGS += -finline-limit=42 -fno-unroll-loops -fno-ipa-cp-clone # -fno-ipa-cp -endif # This is actually needed, bevieve me. # If you really have to disable this, set NO_ALIGN_FUNCTIONS elsewhere. @@ -38,6 +34,11 @@ else # NO_CONFIG_MAK config.mak: endif +ifeq ("$(PLATFORM)",$(filter "$(PLATFORM)","gp2x" "opendingux" "rpi1")) +# very small caches, avoid optimization options making the binary much bigger +CFLAGS += -finline-limit=42 -fno-unroll-loops -fno-ipa-cp -fno-common -fno-stack-protector -ffast-math +endif + # default settings ifeq "$(ARCH)" "arm" use_cyclone ?= 1 diff --git a/README.md b/README.md index d7798231..8154f7dc 100644 --- a/README.md +++ b/README.md @@ -29,8 +29,8 @@ assuming $TC points to the appropriate cross compile toolchain directory: platform|toolchain|configure command --------|---------|----------------- -gp2x,wiz,caanoo|open2x|CROSS_COMPILE=arm-open2x-linux- CFLAGS="-I$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -fno-stack-protector -fno-common" LDFLAGS="--sysroot $TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux -L$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib" ./configure --platform=gp2x -gp2x,wiz,caanoo|open2x with ubuntu arm gcc 4.7|CROSS_COMPILE=arm-linux-gnueabi- CFLAGS="-I$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -fno-stack-protector -fno-common" LDFLAGS="-B$TC/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1 -B$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib" ./configure --platform=gp2x +gp2x,wiz,caanoo|open2x|CROSS_COMPILE=arm-open2x-linux- CFLAGS="-I$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include" LDFLAGS="--sysroot $TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux -L$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib" ./configure --platform=gp2x +gp2x,wiz,caanoo|open2x with ubuntu arm gcc 4.7|CROSS_COMPILE=arm-linux-gnueabi- CFLAGS="-I$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include" LDFLAGS="-B$TC/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1 -B$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib" ./configure --platform=gp2x opendingux|opendingux|CROSS_COMPILE=mipsel-linux- CFLAGS="-I$TC/usr/include -I$TC/usr/include/SDL" LDFLAGS="--sysroot $TC -L$TC/lib" ./configure --platform=opendingux opendingux|opendingux with ubuntu mips gcc 5.4|CROSS_COMPILE=mipsel-linux-gnu- CFLAGS="-I$TC/usr/include -I$TC/usr/include/SDL" LDFLAGS="-B$TC/usr/lib -B$TC/lib -Wl,-rpath-link=$TC/usr/lib -Wl,-rpath-link=$TC/lib" ./configure --platform=opendingux gcw0|gcw0|CROSS_COMPILE=mipsel-gcw0-linux-uclibc- CFLAGS="-I$TC/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include -I$TC/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include/SDL" LDFLAGS="--sysroot $TC/usr/mipsel-gcw0-linux-uclibc/sysroot" ./configure --platform=gcw0 diff --git a/cpu/drc/emit_arm64.c b/cpu/drc/emit_arm64.c index a67f6819..de587619 100644 --- a/cpu/drc/emit_arm64.c +++ b/cpu/drc/emit_arm64.c @@ -1163,9 +1163,10 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) int t2 = rcache_get_tmp(); \ int t3 = rcache_get_tmp(); \ /* if (sr < 0) return */ \ - emith_asrf(t2, sr, 12); \ + emith_cmp_r_imm(sr, 0); \ EMITH_JMP_START(DCOND_LE); \ /* turns = sr.cycles / cycles */ \ + emith_asr(t2, sr, 12); \ emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)) + 1); \ emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */ \ rcache_free_tmp(t3); \ diff --git a/cpu/drc/emit_mips.c b/cpu/drc/emit_mips.c index 91d493b5..e200db0a 100644 --- a/cpu/drc/emit_mips.c +++ b/cpu/drc/emit_mips.c @@ -560,8 +560,8 @@ static void emith_set_arith_flags(int rd, int rt, int rs, s32 imm, int sub) #define emith_adc_r_r(d, s) \ emith_adc_r_r_r(d, d, s) -// NB: the incoming C can cause its own outgoing C if s2+C=0 (or s1+C=0 FWIW) -// moreover, s2 is 0 if there is C, so no other C can be generated. +// NB: the incoming carry Cin can cause Cout if s2+Cin=0 (or s1+Cin=0 FWIW) +// moreover, if s2+Cin=0 caused Cout, s1+s2+Cin=s1+0 can't cause another Cout #define emith_adcf_r_r_r(d, s1, s2) do { \ emith_add_r_r_r(FNZ, s2, FC); \ EMIT(MIPS_SLTU_REG(AT, FNZ, FC)); \ diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index 62288ff5..d515cd23 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -1,7 +1,7 @@ /* * Basic macros to emit x86 instructions and some utils * Copyright (C) 2008,2009,2010 notaz - * Copyright (C) 2019 kuv + * Copyright (C) 2019 kub * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. diff --git a/pico/32x/32x.c b/pico/32x/32x.c index 1511f3f7..e9d8ff6d 100644 --- a/pico/32x/32x.c +++ b/pico/32x/32x.c @@ -426,7 +426,7 @@ void p32x_sync_other_sh2(SH2 *sh2, unsigned int m68k_target) } #define STEP_LS 24 -#define STEP_N 440 +#define STEP_N 488 // one line #define sync_sh2s_normal p32x_sync_sh2s //#define sync_sh2s_lockstep p32x_sync_sh2s diff --git a/pico/32x/memory.c b/pico/32x/memory.c index 7f494e7a..e139910a 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -220,7 +220,7 @@ static NOINLINE void sh2_poll_write(u32 a, u32 d, unsigned int cycles, SH2 *sh2) { int hix = (a >> 1) % PFIFO_CNT; struct sh2_poll_fifo *fifo = sh2_poll_fifo[hix]; - struct sh2_poll_fifo *q = &fifo[(sh2_poll_wr[hix]-1) % PFIFO_SZ]; + struct sh2_poll_fifo *q; int cpu = sh2 ? sh2->is_slave : -1; unsigned rd = sh2_poll_rd[hix], wr = sh2_poll_wr[hix]; unsigned idx, nrd; @@ -230,8 +230,9 @@ static NOINLINE void sh2_poll_write(u32 a, u32 d, unsigned int cycles, SH2 *sh2) // throw out any values written by other cpus, plus heading cancelled stuff for (idx = nrd = wr; idx != rd; ) { idx = (idx-1) % PFIFO_SZ; - if (fifo[idx].a == a && fifo[idx].cpu != cpu) { fifo[idx].a = -1; } - if (fifo[idx].a != -1) { nrd = idx; } + q = &fifo[idx]; + if (q->cpu != cpu && q->a == a) { q->a = -1; } + if (q->a != -1) { nrd = idx; } } rd = nrd; @@ -239,7 +240,8 @@ static NOINLINE void sh2_poll_write(u32 a, u32 d, unsigned int cycles, SH2 *sh2) // intermediate values that may cause synchronisation problems. // NB this can take an eternity on m68k: mov.b , needs // 28 m68k-cycles (~80 sh2-cycles) to complete (observed in Metal Head) - if (q->a == a && rd != wr && !CYCLES_GT(cycles,q->cycles+30)) { + q = &fifo[(sh2_poll_wr[hix]-1) % PFIFO_SZ]; + if (rd != wr && q->a == a && !CYCLES_GT(cycles,q->cycles+30)) { q->d = d; } else { // store write to poll address in fifo @@ -493,6 +495,35 @@ static void p32x_reg_write8(u32 a, u32 d) case 0x1d: case 0x1e: case 0x1f: + return; + case 0x20: // comm port + case 0x21: + case 0x22: + case 0x23: + case 0x24: + case 0x25: + case 0x26: + case 0x27: + case 0x28: + case 0x29: + case 0x2a: + case 0x2b: + case 0x2c: + case 0x2d: + case 0x2e: + case 0x2f: + if (REG8IN16(r, a) != d) { + int cycles = SekCyclesDone(); + + if (cycles - (int)msh2.m68krcycles_done > 30) + p32x_sync_sh2s(cycles); + + REG8IN16(r, a) = d; + p32x_sh2_poll_event(&sh2s[0], SH2_STATE_CPOLL, cycles); + p32x_sh2_poll_event(&sh2s[1], SH2_STATE_CPOLL, cycles); + sh2_poll_write(a & ~1, r[a / 2], cycles, NULL); + } + return; case 0x30: return; case 0x31: // PWM control @@ -532,22 +563,6 @@ static void p32x_reg_write8(u32 a, u32 d) p32x_pwm_write16(a & ~1, d, NULL, SekCyclesDone()); return; } - - if ((a & 0x30) == 0x20) { - int cycles = SekCyclesDone(); - - if (REG8IN16(r, a) == d) - return; - - if (cycles - (int)msh2.m68krcycles_done > 30) - p32x_sync_sh2s(cycles); - - REG8IN16(r, a) = d; - p32x_sh2_poll_event(&sh2s[0], SH2_STATE_CPOLL, cycles); - p32x_sh2_poll_event(&sh2s[1], SH2_STATE_CPOLL, cycles); - sh2_poll_write(a & ~1, r[a / 2], cycles, NULL); - return; - } } static void p32x_reg_write16(u32 a, u32 d) @@ -558,61 +573,68 @@ static void p32x_reg_write16(u32 a, u32 d) // for things like bset on comm port m68k_poll.cnt = 0; - switch (a) { - case 0x00: // adapter ctl + switch (a/2) { + case 0x00/2: // adapter ctl if ((d ^ r[0]) & d & P32XS_nRES) p32x_reset_sh2s(); r[0] &= ~(P32XS_FM|P32XS_nRES|P32XS_ADEN); r[0] |= d & (P32XS_FM|P32XS_nRES|P32XS_ADEN); return; - case 0x08: // DREQ src + case 0x08/2: // DREQ src r[a / 2] = d & 0xff; return; - case 0x0a: + case 0x0a/2: r[a / 2] = d & ~1; return; - case 0x0c: // DREQ dest + case 0x0c/2: // DREQ dest r[a / 2] = d & 0xff; return; - case 0x0e: + case 0x0e/2: r[a / 2] = d; return; - case 0x10: // DREQ len + case 0x10/2: // DREQ len r[a / 2] = d & ~3; return; - case 0x12: // FIFO reg + case 0x12/2: // FIFO reg dreq0_write(r, d); return; - case 0x1a: // TV + mystery bit + case 0x1a/2: // TV + mystery bit r[a / 2] = d & 0x0101; return; - case 0x30: // PWM control + case 0x20/2: // comm port + case 0x22/2: + case 0x24/2: + case 0x26/2: + case 0x28/2: + case 0x2a/2: + case 0x2c/2: + case 0x2e/2: + if (r[a / 2] != d) { + int cycles = SekCyclesDone(); + + if (cycles - (int)msh2.m68krcycles_done > 30) + p32x_sync_sh2s(cycles); + + r[a / 2] = d; + p32x_sh2_poll_event(&sh2s[0], SH2_STATE_CPOLL, cycles); + p32x_sh2_poll_event(&sh2s[1], SH2_STATE_CPOLL, cycles); + sh2_poll_write(a, (u16)d, cycles, NULL); + } + return; + case 0x30/2: // PWM control d = (r[a / 2] & ~0x0f) | (d & 0x0f); r[a / 2] = d; p32x_pwm_write16(a, d, NULL, SekCyclesDone()); return; - } - - // comm port - if ((a & 0x30) == 0x20) { - int cycles = SekCyclesDone(); - - if (r[a / 2] == d) - return; - - if (cycles - (int)msh2.m68krcycles_done > 30) - p32x_sync_sh2s(cycles); - - r[a / 2] = d; - p32x_sh2_poll_event(&sh2s[0], SH2_STATE_CPOLL, cycles); - p32x_sh2_poll_event(&sh2s[1], SH2_STATE_CPOLL, cycles); - sh2_poll_write(a, (u16)d, cycles, NULL); - return; - } - // PWM - else if ((a & 0x30) == 0x30) { - p32x_pwm_write16(a, d, NULL, SekCyclesDone()); - return; + case 0x32/2: + case 0x34/2: + case 0x36/2: + case 0x38/2: + case 0x3a/2: + case 0x3c/2: + case 0x3e/2: + p32x_pwm_write16(a, d, NULL, SekCyclesDone()); + return; } p32x_reg_write8(a + 1, d); @@ -709,23 +731,23 @@ static u32 p32x_sh2reg_read16(u32 a, SH2 *sh2) u16 *r = Pico32x.regs; a &= 0x3e; - switch (a) { - case 0x00: // adapter/irq ctl + switch (a/2) { + case 0x00/2: // adapter/irq ctl return (r[0] & P32XS_FM) | Pico32x.sh2_regs[0] | Pico32x.sh2irq_mask[sh2->is_slave]; - case 0x04: // H count (often as comm too) + case 0x04/2: // H count (often as comm too) sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 9); sh2s_sync_on_read(sh2); return sh2_poll_read(a, Pico32x.sh2_regs[4 / 2], sh2_cycles_done_m68k(sh2), sh2); - case 0x06: + case 0x06/2: return (r[a / 2] & ~P32XS_FULL) | 0x4000; - case 0x08: // DREQ src - case 0x0a: - case 0x0c: // DREQ dst - case 0x0e: - case 0x10: // DREQ len + case 0x08/2: // DREQ src + case 0x0a/2: + case 0x0c/2: // DREQ dst + case 0x0e/2: + case 0x10/2: // DREQ len return r[a / 2]; - case 0x12: // DREQ FIFO - does this work on hw? + case 0x12/2: // DREQ FIFO - does this work on hw? if (Pico32x.dmac0_fifo_ptr > 0) { Pico32x.dmac0_fifo_ptr--; r[a / 2] = Pico32x.dmac_fifo[0]; @@ -733,23 +755,34 @@ static u32 p32x_sh2reg_read16(u32 a, SH2 *sh2) Pico32x.dmac0_fifo_ptr * 2); } return r[a / 2]; - case 0x14: - case 0x16: - case 0x18: - case 0x1a: - case 0x1c: + case 0x14/2: + case 0x16/2: + case 0x18/2: + case 0x1a/2: + case 0x1c/2: return 0; // ? + case 0x20/2: // comm port + case 0x22/2: + case 0x24/2: + case 0x26/2: + case 0x28/2: + case 0x2a/2: + case 0x2c/2: + case 0x2e/2: + sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 9); + sh2s_sync_on_read(sh2); + return sh2_poll_read(a, r[a / 2], sh2_cycles_done_m68k(sh2), sh2); + case 0x30/2: // PWM + case 0x32/2: + case 0x34/2: + case 0x36/2: + case 0x38/2: + case 0x3a/2: + case 0x3c/2: + case 0x3e/2: + return p32x_pwm_read16(a, sh2, sh2_cycles_done_m68k(sh2)); } - // comm port - if ((a & 0x30) == 0x20) { - sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 9); - sh2s_sync_on_read(sh2); - return sh2_poll_read(a, r[a / 2], sh2_cycles_done_m68k(sh2), sh2); - } - if ((a & 0x30) == 0x30) - return p32x_pwm_read16(a, sh2, sh2_cycles_done_m68k(sh2)); - elprintf_sh2(sh2, EL_32X|EL_ANOMALY, "unhandled sysreg r16 [%02x] @%08x", a, sh2_pc(sh2)); return 0; @@ -796,6 +829,32 @@ static void p32x_sh2reg_write8(u32 a, u32 d, SH2 *sh2) sh2_poll_write(a & ~1, d, cycles, sh2); } return; + case 0x20: // comm port + case 0x21: + case 0x22: + case 0x23: + case 0x24: + case 0x25: + case 0x26: + case 0x27: + case 0x28: + case 0x29: + case 0x2a: + case 0x2b: + case 0x2c: + case 0x2d: + case 0x2e: + case 0x2f: + if (REG8IN16(r, a) != d) { + unsigned int cycles = sh2_cycles_done_m68k(sh2); + + REG8IN16(r, a) = d; + sh2_end_run(sh2, 1); + p32x_m68k_poll_event(P32XF_68KCPOLL); + p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles); + sh2_poll_write(a & ~1, r[a / 2], cycles, sh2); + } + return; case 0x30: REG8IN16(r, a) = d & 0x0f; d = r[0x30 / 2]; @@ -837,20 +896,6 @@ static void p32x_sh2reg_write8(u32 a, u32 d, SH2 *sh2) return; } - if ((a & 0x30) == 0x20) { - unsigned int cycles; - if (REG8IN16(r, a) == d) - return; - - REG8IN16(r, a) = d; - cycles = sh2_cycles_done_m68k(sh2); - sh2_end_run(sh2, 1); - p32x_m68k_poll_event(P32XF_68KCPOLL); - p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles); - sh2_poll_write(a & ~1, r[a / 2], cycles, sh2); - return; - } - elprintf(EL_32X|EL_ANOMALY, "unhandled sysreg w8 [%02x] %02x @%08x", a, d, sh2_pc(sh2)); } @@ -861,49 +906,57 @@ static void p32x_sh2reg_write16(u32 a, u32 d, SH2 *sh2) sh2->poll_cnt = 0; - // comm - if ((a & 0x30) == 0x20) { - unsigned int cycles; - if (Pico32x.regs[a / 2] == d) - return; - - Pico32x.regs[a / 2] = d; - cycles = sh2_cycles_done_m68k(sh2); - sh2_end_run(sh2, 1); - p32x_m68k_poll_event(P32XF_68KCPOLL); - p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles); - sh2_poll_write(a, d, cycles, sh2); - return; - } - // PWM - else if ((a & 0x30) == 0x30) { - p32x_pwm_write16(a, d, sh2, sh2_cycles_done_m68k(sh2)); - return; - } - - switch (a) { - case 0: // FM + switch (a/2) { + case 0x00/2: // FM Pico32x.regs[0] &= ~P32XS_FM; Pico32x.regs[0] |= d & P32XS_FM; break; - case 0x14: + case 0x14/2: Pico32x.sh2irqs &= ~P32XI_VRES; goto irls; - case 0x16: + case 0x16/2: Pico32x.sh2irqi[sh2->is_slave] &= ~P32XI_VINT; goto irls; - case 0x18: + case 0x18/2: Pico32x.sh2irqi[sh2->is_slave] &= ~P32XI_HINT; goto irls; - case 0x1a: + case 0x1a/2: Pico32x.regs[2 / 2] &= ~(1 << sh2->is_slave); p32x_update_cmd_irq(sh2, 0); return; - case 0x1c: + case 0x1c/2: p32x_pwm_sync_to_sh2(sh2); Pico32x.sh2irqi[sh2->is_slave] &= ~P32XI_PWM; p32x_pwm_schedule_sh2(sh2); goto irls; + case 0x20/2: // comm port + case 0x22/2: + case 0x24/2: + case 0x26/2: + case 0x28/2: + case 0x2a/2: + case 0x2c/2: + case 0x2e/2: + if (Pico32x.regs[a / 2] != d) { + unsigned int cycles = sh2_cycles_done_m68k(sh2); + + Pico32x.regs[a / 2] = d; + sh2_end_run(sh2, 1); + p32x_m68k_poll_event(P32XF_68KCPOLL); + p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles); + sh2_poll_write(a, d, cycles, sh2); + } + return; + case 0x30/2: // PWM + case 0x32/2: + case 0x34/2: + case 0x36/2: + case 0x38/2: + case 0x3a/2: + case 0x3c/2: + case 0x3e/2: + p32x_pwm_write16(a, d, sh2, sh2_cycles_done_m68k(sh2)); + return; } p32x_sh2reg_write8(a | 1, d, sh2); @@ -1391,7 +1444,7 @@ static u32 REGPARM(2) sh2_read8_cs0(u32 a, SH2 *sh2) sh2_burn_cycles(sh2, 1*2); - // 0x3ffc0 is veridied + // 0x3ffc0 is verified if ((a & 0x3ffc0) == 0x4000) { d = p32x_sh2reg_read16(a, sh2); goto out_16to8; @@ -1573,6 +1626,11 @@ static void REGPARM(3) sh2_write8_cs0(u32 a, u32 d, SH2 *sh2) elprintf_sh2(sh2, EL_32X, "w8 [%08x] %02x @%06x", a, d & 0xff, sh2_pc(sh2)); + if ((a & 0x3ffc0) == 0x4000) { + p32x_sh2reg_write8(a, d, sh2); + goto out; + } + if (Pico32x.regs[0] & P32XS_FM) { if ((a & 0x3fff0) == 0x4100) { sh2->poll_cnt = 0; @@ -1588,11 +1646,6 @@ static void REGPARM(3) sh2_write8_cs0(u32 a, u32 d, SH2 *sh2) } } - if ((a & 0x3ffc0) == 0x4000) { - p32x_sh2reg_write8(a, d, sh2); - goto out; - } - sh2_write8_unmapped(a, d, sh2); out: DRC_RESTORE_SR(sh2); @@ -1647,6 +1700,11 @@ static void REGPARM(3) sh2_write16_cs0(u32 a, u32 d, SH2 *sh2) elprintf_sh2(sh2, EL_32X, "w16 [%08x] %04x @%06x", a, d & 0xffff, sh2_pc(sh2)); + if ((a & 0x3ffc0) == 0x4000) { + p32x_sh2reg_write16(a, d, sh2); + goto out; + } + if (Pico32x.regs[0] & P32XS_FM) { if ((a & 0x3fff0) == 0x4100) { sh2->poll_cnt = 0; @@ -1662,11 +1720,6 @@ static void REGPARM(3) sh2_write16_cs0(u32 a, u32 d, SH2 *sh2) } } - if ((a & 0x3ffc0) == 0x4000) { - p32x_sh2reg_write16(a, d, sh2); - goto out; - } - sh2_write16_unmapped(a, d, sh2); out: DRC_RESTORE_SR(sh2); diff --git a/pico/32x/memory_arm.S b/pico/32x/memory_arm.S index 43a01958..ba83a6bf 100644 --- a/pico/32x/memory_arm.S +++ b/pico/32x/memory_arm.S @@ -18,6 +18,7 @@ .text +#if 0 @ u32 a, SH2 *sh2 .global sh2_read8_rom .global sh2_read8_sdram @@ -31,6 +32,7 @@ .global sh2_read32_sdram .global sh2_read32_da .global sh2_read32_dram +#endif @ u32 a, u32 d, SH2 *sh2 .global sh2_write8_sdram diff --git a/pico/32x/pwm.c b/pico/32x/pwm.c index 50735642..1c1ec428 100644 --- a/pico/32x/pwm.c +++ b/pico/32x/pwm.c @@ -7,12 +7,15 @@ */ #include "../pico_int.h" -static int pwm_cycles; -static int pwm_mult; -static int pwm_ptr; -static int pwm_irq_reload; -static int pwm_doing_fifo; -static int pwm_silent; +static struct { + int cycles; + int mult; + int ptr; + int irq_reload; + int doing_fifo; + int silent; + short current[2]; +} pwm; void p32x_pwm_ctl_changed(void) { @@ -20,19 +23,19 @@ void p32x_pwm_ctl_changed(void) int cycles = Pico32x.regs[0x32 / 2]; cycles = (cycles - 1) & 0x0fff; - pwm_cycles = cycles; + pwm.cycles = cycles; // supposedly we should stop FIFO when xMd is 0, // but mars test disagrees - pwm_mult = 0; + pwm.mult = 0; if ((control & 0x0f) != 0) - pwm_mult = 0x10000 / cycles; + pwm.mult = 0x10000 / cycles; - pwm_irq_reload = (control & 0x0f00) >> 8; - pwm_irq_reload = ((pwm_irq_reload - 1) & 0x0f) + 1; + pwm.irq_reload = (control & 0x0f00) >> 8; + pwm.irq_reload = ((pwm.irq_reload - 1) & 0x0f) + 1; if (Pico32x.pwm_irq_cnt == 0) - Pico32x.pwm_irq_cnt = pwm_irq_reload; + Pico32x.pwm_irq_cnt = pwm.irq_reload; } static void do_pwm_irq(SH2 *sh2, unsigned int m68k_cycles) @@ -40,7 +43,7 @@ static void do_pwm_irq(SH2 *sh2, unsigned int m68k_cycles) p32x_trigger_irq(sh2, m68k_cycles, P32XI_PWM); if (Pico32x.regs[0x30 / 2] & P32XP_RTP) { - p32x_event_schedule(m68k_cycles, P32X_EVENT_PWM, pwm_cycles / 3 + 1); + p32x_event_schedule(m68k_cycles, P32X_EVENT_PWM, pwm.cycles / 3 + 1); // note: might recurse p32x_dreq1_trigger(); } @@ -50,14 +53,14 @@ static int convert_sample(unsigned int v) { if (v == 0) return 0; - if (v > pwm_cycles) - v = pwm_cycles; - return ((int)v - pwm_cycles / 2) * pwm_mult; + if (v > pwm.cycles) + v = pwm.cycles; + return (v * 2 - pwm.cycles) / 2 * pwm.mult; } #define consume_fifo(sh2, m68k_cycles) { \ int cycles_diff = ((m68k_cycles) * 3) - Pico32x.pwm_cycle_p; \ - if (cycles_diff >= pwm_cycles) \ + if (cycles_diff >= pwm.cycles) \ consume_fifo_do(sh2, m68k_cycles, cycles_diff); \ } @@ -69,67 +72,63 @@ static void consume_fifo_do(SH2 *sh2, unsigned int m68k_cycles, unsigned short *fifo_r = mem->pwm_fifo[1]; int sum = 0; - if (pwm_cycles == 0 || pwm_doing_fifo) + if (pwm.cycles == 0 || pwm.doing_fifo) return; elprintf(EL_PWM, "pwm: %u: consume %d/%d, %d,%d ptr %d", - m68k_cycles, sh2_cycles_diff, sh2_cycles_diff / pwm_cycles, - Pico32x.pwm_p[0], Pico32x.pwm_p[1], pwm_ptr); + m68k_cycles, sh2_cycles_diff, sh2_cycles_diff / pwm.cycles, + Pico32x.pwm_p[0], Pico32x.pwm_p[1], pwm.ptr); // this is for recursion from dreq1 writes - pwm_doing_fifo = 1; + pwm.doing_fifo = 1; - for (; sh2_cycles_diff >= pwm_cycles; sh2_cycles_diff -= pwm_cycles) + for (; sh2_cycles_diff >= pwm.cycles; sh2_cycles_diff -= pwm.cycles) { if (Pico32x.pwm_p[0] > 0) { - fifo_l[0] = fifo_l[1]; - fifo_l[1] = fifo_l[2]; - fifo_l[2] = fifo_l[3]; + mem->pwm_index[0] = (mem->pwm_index[0]+1) % 4; Pico32x.pwm_p[0]--; - mem->pwm_current[0] = convert_sample(fifo_l[0]); - sum += mem->pwm_current[0]; + pwm.current[0] = convert_sample(fifo_l[mem->pwm_index[0]]); + sum |=pwm.current[0]; } if (Pico32x.pwm_p[1] > 0) { - fifo_r[0] = fifo_r[1]; - fifo_r[1] = fifo_r[2]; - fifo_r[2] = fifo_r[3]; + mem->pwm_index[1] = (mem->pwm_index[1]+1) % 4; Pico32x.pwm_p[1]--; - mem->pwm_current[1] = convert_sample(fifo_r[0]); - sum += mem->pwm_current[1]; + pwm.current[1] = convert_sample(fifo_r[mem->pwm_index[1]]); + sum |= pwm.current[1]; } - mem->pwm[pwm_ptr * 2 ] = mem->pwm_current[0]; - mem->pwm[pwm_ptr * 2 + 1] = mem->pwm_current[1]; - pwm_ptr = (pwm_ptr + 1) & (PWM_BUFF_LEN - 1); + mem->pwm[pwm.ptr * 2 ] = pwm.current[0]; + mem->pwm[pwm.ptr * 2 + 1] = pwm.current[1]; + pwm.ptr = (pwm.ptr + 1) & (PWM_BUFF_LEN - 1); if (--Pico32x.pwm_irq_cnt == 0) { - Pico32x.pwm_irq_cnt = pwm_irq_reload; + Pico32x.pwm_irq_cnt = pwm.irq_reload; do_pwm_irq(sh2, m68k_cycles); } } Pico32x.pwm_cycle_p = m68k_cycles * 3 - sh2_cycles_diff; - pwm_doing_fifo = 0; + pwm.doing_fifo = 0; if (sum != 0) - pwm_silent = 0; + pwm.silent = 0; } static int p32x_pwm_schedule_(SH2 *sh2, unsigned int m68k_now) { - unsigned int sh2_now = m68k_now * 3; + unsigned int pwm_now = m68k_now * 3; int cycles_diff_sh2; - if (pwm_cycles == 0) + if (pwm.cycles == 0) return 0; - cycles_diff_sh2 = sh2_now - Pico32x.pwm_cycle_p; - if (cycles_diff_sh2 >= pwm_cycles) + cycles_diff_sh2 = pwm_now - Pico32x.pwm_cycle_p; + if (cycles_diff_sh2 >= pwm.cycles) consume_fifo_do(sh2, m68k_now, cycles_diff_sh2); if (!((Pico32x.sh2irq_mask[0] | Pico32x.sh2irq_mask[1]) & 1)) return 0; // masked by everyone - cycles_diff_sh2 = sh2_now - Pico32x.pwm_cycle_p; - return (Pico32x.pwm_irq_cnt * pwm_cycles + cycles_diff_sh2 = pwm_now - Pico32x.pwm_cycle_p; + return (Pico32x.pwm_irq_cnt * pwm.cycles - cycles_diff_sh2) / 3 + 1; } @@ -166,21 +165,21 @@ unsigned int p32x_pwm_read16(unsigned int a, SH2 *sh2, consume_fifo(sh2, m68k_cycles); a &= 0x0e; - switch (a) { - case 0: // control - case 2: // cycle + switch (a/2) { + case 0/2: // control + case 2/2: // cycle d = Pico32x.regs[(0x30 + a) / 2]; break; - case 4: // L ch + case 4/2: // L ch if (Pico32x.pwm_p[0] == 3) d |= P32XP_FULL; else if (Pico32x.pwm_p[0] == 0) d |= P32XP_EMPTY; break; - case 6: // R ch - case 8: // MONO + case 6/2: // R ch + case 8/2: // MONO if (Pico32x.pwm_p[1] == 3) d |= P32XP_FULL; else if (Pico32x.pwm_p[1] == 0) @@ -196,47 +195,53 @@ unsigned int p32x_pwm_read16(unsigned int a, SH2 *sh2, void p32x_pwm_write16(unsigned int a, unsigned int d, SH2 *sh2, unsigned int m68k_cycles) { + unsigned short *fifo; + int idx; + elprintf(EL_PWM, "pwm: %u: w16 %02x %04x (p %d %d)", m68k_cycles, a & 0x0e, d, Pico32x.pwm_p[0], Pico32x.pwm_p[1]); consume_fifo(sh2, m68k_cycles); a &= 0x0e; - if (a == 0) { // control - // avoiding pops.. - if ((Pico32x.regs[0x30 / 2] & 0x0f) == 0) - Pico32xMem->pwm_fifo[0][0] = Pico32xMem->pwm_fifo[1][0] = 0; - Pico32x.regs[0x30 / 2] = d; - p32x_pwm_ctl_changed(); - Pico32x.pwm_irq_cnt = pwm_irq_reload; // ? - } - else if (a == 2) { // cycle - Pico32x.regs[0x32 / 2] = d & 0x0fff; - p32x_pwm_ctl_changed(); - } - else if (a <= 8) { - d = (d - 1) & 0x0fff; - - if (a == 4 || a == 8) { // L ch or MONO - unsigned short *fifo = Pico32xMem->pwm_fifo[0]; - if (Pico32x.pwm_p[0] < 3) - Pico32x.pwm_p[0]++; - else { - fifo[1] = fifo[2]; - fifo[2] = fifo[3]; - } - fifo[Pico32x.pwm_p[0]] = d; - } - if (a == 6 || a == 8) { // R ch or MONO - unsigned short *fifo = Pico32xMem->pwm_fifo[1]; + switch (a/2) { + case 0/2: // control + // avoiding pops.. + if ((Pico32x.regs[0x30 / 2] & 0x0f) == 0) + Pico32xMem->pwm_fifo[0][0] = Pico32xMem->pwm_fifo[1][0] = 0; + Pico32x.regs[0x30 / 2] = d; + p32x_pwm_ctl_changed(); + Pico32x.pwm_irq_cnt = pwm.irq_reload; // ? + break; + case 2/2: // cycle + Pico32x.regs[0x32 / 2] = d & 0x0fff; + p32x_pwm_ctl_changed(); + break; + case 8/2: // MONO + case 6/2: // R ch + fifo = Pico32xMem->pwm_fifo[1]; + idx = Pico32xMem->pwm_index[1]; if (Pico32x.pwm_p[1] < 3) Pico32x.pwm_p[1]++; else { - fifo[1] = fifo[2]; - fifo[2] = fifo[3]; +// fifo[(idx+1) % 4] = fifo[idx]; + idx = (idx+1) % 4; + Pico32xMem->pwm_index[0] = idx; } - fifo[Pico32x.pwm_p[1]] = d; - } + fifo[(idx+Pico32x.pwm_p[1]) % 4] = (d - 1) & 0x0fff; + if (a != 8) break; // fallthrough if MONO + case 4/2: // L ch + fifo = Pico32xMem->pwm_fifo[0]; + idx = Pico32xMem->pwm_index[0]; + if (Pico32x.pwm_p[0] < 3) + Pico32x.pwm_p[0]++; + else { +// fifo[(idx+1) % 4] = fifo[idx]; + idx = (idx+1) % 4; + Pico32xMem->pwm_index[0] = idx; + } + fifo[(idx+Pico32x.pwm_p[0]) % 4] = (d - 1) & 0x0fff; + break; } } @@ -252,10 +257,10 @@ void p32x_pwm_update(int *buf32, int length, int stereo) xmd = Pico32x.regs[0x30 / 2] & 0x0f; if (xmd == 0 || xmd == 0x06 || xmd == 0x09 || xmd == 0x0f) goto out; // invalid? - if (pwm_silent) + if (pwm.silent) return; - step = (pwm_ptr << 16) / length; + step = (pwm.ptr << 16) / length; pwmb = Pico32xMem->pwm; if (stereo) @@ -310,13 +315,12 @@ void p32x_pwm_update(int *buf32, int length, int stereo) } } - elprintf(EL_PWM, "pwm_update: pwm_ptr %d, len %d, step %04x, done %d", - pwm_ptr, length, step, (pwmb - Pico32xMem->pwm) / 2); + elprintf(EL_PWM, "pwm_update: pwm.ptr %d, len %d, step %04x, done %d", + pwm.ptr, length, step, (pwmb - Pico32xMem->pwm) / 2); out: - pwm_ptr = 0; - pwm_silent = Pico32xMem->pwm_current[0] == 0 - && Pico32xMem->pwm_current[1] == 0; + pwm.ptr = 0; + pwm.silent = pwm.current[0] == 0 && pwm.current[1] == 0; } void p32x_pwm_state_loaded(void) @@ -327,8 +331,8 @@ void p32x_pwm_state_loaded(void) // for old savestates cycles_diff_sh2 = Pico.t.m68c_cnt * 3 - Pico32x.pwm_cycle_p; - if (cycles_diff_sh2 >= pwm_cycles || cycles_diff_sh2 < 0) { - Pico32x.pwm_irq_cnt = pwm_irq_reload; + if (cycles_diff_sh2 >= pwm.cycles || cycles_diff_sh2 < 0) { + Pico32x.pwm_irq_cnt = pwm.irq_reload; Pico32x.pwm_cycle_p = Pico.t.m68c_cnt * 3; p32x_pwm_schedule(Pico.t.m68c_cnt); } diff --git a/pico/32x/sh2soc.c b/pico/32x/sh2soc.c index 1f19150e..2b5a126c 100644 --- a/pico/32x/sh2soc.c +++ b/pico/32x/sh2soc.c @@ -399,6 +399,7 @@ void REGPARM(3) sh2_peripheral_write32(u32 a, u32 d, SH2 *sh2) { u32 *r = sh2->peri_regs; u32 old; + struct dmac *dmac; elprintf_sh2(sh2, EL_32XP, "peri w32 [%08x] %08x @%06x", a, d, sh2_pc(sh2)); @@ -439,22 +440,23 @@ void REGPARM(3) sh2_peripheral_write32(u32 a, u32 d, SH2 *sh2) else r[0x110 / 4] = r[0x114 / 4] = r[0x118 / 4] = r[0x11c / 4] = 0; // ? break; - } + // perhaps starting a DMA? + case 0x18c: + case 0x19c: + case 0x1b0: + dmac = (void *)&sh2->peri_regs[0x180 / 4]; + if (a == 0x1b0 && !((old ^ d) & d & DMA_DME)) + return; + if (!(dmac->dmaor & DMA_DME)) + return; - // perhaps starting a DMA? - if (a == 0x1b0 || a == 0x18c || a == 0x19c) { - struct dmac *dmac = (void *)&sh2->peri_regs[0x180 / 4]; - if (a == 0x1b0 && !((old ^ d) & d & DMA_DME)) - return; - if (!(dmac->dmaor & DMA_DME)) - return; - - DRC_SAVE_SR(sh2); - if ((dmac->chan[0].chcr & (DMA_TE|DMA_DE)) == DMA_DE) - dmac_trigger(sh2, &dmac->chan[0]); - if ((dmac->chan[1].chcr & (DMA_TE|DMA_DE)) == DMA_DE) - dmac_trigger(sh2, &dmac->chan[1]); - DRC_RESTORE_SR(sh2); + DRC_SAVE_SR(sh2); + if ((dmac->chan[0].chcr & (DMA_TE|DMA_DE)) == DMA_DE) + dmac_trigger(sh2, &dmac->chan[0]); + if ((dmac->chan[1].chcr & (DMA_TE|DMA_DE)) == DMA_DE) + dmac_trigger(sh2, &dmac->chan[1]); + DRC_RESTORE_SR(sh2); + break; } } diff --git a/pico/draw.c b/pico/draw.c index 0bf7c3de..7fd93f8e 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -1347,8 +1347,6 @@ void FinalizeLine555(int sh, int line, struct PicoEState *est) *pd++ = pal[*ps++]; *pd++ = pal[*ps++]; } -// for (i = 0; i < len; i++) -// pd[i] = pal[ps[i]]; #else extern void amips_clut(unsigned short *dst, unsigned char *src, unsigned short *pal, int count); extern void amips_clut_6bit(unsigned short *dst, unsigned char *src, unsigned short *pal, int count); diff --git a/pico/pico_int.h b/pico/pico_int.h index 36b36144..89acc4fb 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -630,8 +630,8 @@ struct Pico32xMem unsigned short pal[0x100]; unsigned short pal_native[0x100]; // converted to native (for renderer) signed short pwm[2*PWM_BUFF_LEN]; // PWM buffer for current frame - signed short pwm_current[2]; // current converted samples unsigned short pwm_fifo[2][4]; // [0] - current raw, others - fifo entries + unsigned pwm_index[2]; // ringbuffer index for pwm_fifo }; // area.c From f53e166cf471684e5325b595fb7bb65df7c5b093 Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 17 Sep 2019 22:48:32 +0200 Subject: [PATCH 0216/1110] various smallish optimizations, cleanups, and bug fixes --- Makefile | 4 +++- cpu/drc/emit_arm.c | 48 +++++++++++++++++++++++++++++++++++++++++--- cpu/drc/emit_arm64.c | 4 ++++ cpu/drc/emit_mips.c | 40 ++++++++++++++++++++++-------------- cpu/drc/emit_x86.c | 4 ++-- cpu/sh2/compiler.c | 24 +++++++++++++++++++--- cpu/sh2/compiler.h | 16 +++++++-------- pico/32x/32x.c | 4 ++-- 8 files changed, 109 insertions(+), 35 deletions(-) diff --git a/Makefile b/Makefile index 47463d51..63e9c833 100644 --- a/Makefile +++ b/Makefile @@ -36,7 +36,9 @@ endif ifeq ("$(PLATFORM)",$(filter "$(PLATFORM)","gp2x" "opendingux" "rpi1")) # very small caches, avoid optimization options making the binary much bigger -CFLAGS += -finline-limit=42 -fno-unroll-loops -fno-ipa-cp -fno-common -fno-stack-protector -ffast-math +CFLAGS += -finline-limit=42 -fno-unroll-loops -fno-ipa-cp +# this gets you about 20% better execution speed on 32bit arm/mips +CFLAGS += -fno-common -fno-stack-protector -fno-guess-branch-probability -fno-caller-saves -fno-tree-loop-if-convert -ffast-math endif # default settings diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index 1d70866c..66a5b065 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -36,6 +36,47 @@ #define M5(x,y,z,a,b) (M4(x,y,z,a)|M1(b)) #define M10(a,b,c,d,e,f,g,h,i,j) (M5(a,b,c,d,e)|M5(f,g,h,i,j)) +// sys_cacheflush always flushes whole pages, and it's rather expensive on ARMs +// hold a list of pending cache updates and merge requests to reduce cacheflush +static struct { void *base, *end; } pageflush[4]; +static unsigned pagesize = 4096; + +static void emith_update_cache(void) +{ + int i; + + for (i = 0; i < 4 && pageflush[i].base; i++) { + cache_flush_d_inval_i(pageflush[i].base, pageflush[i].end + pagesize-1); + pageflush[i].base = NULL; + } +} + +static inline void emith_update_add(void *base, void *end) +{ + void *p_base = (void *)((uintptr_t)(base) & ~(pagesize-1)); + void *p_end = (void *)((uintptr_t)(end ) & ~(pagesize-1)); + int i; + + for (i = 0; i < 4 && pageflush[i].base; i++) { + if (p_base <= pageflush[i].end+pagesize && p_end >= pageflush[i].end) { + if (p_base < pageflush[i].base) pageflush[i].base = p_base; + pageflush[i].end = p_end; + return; + } + if (p_base <= pageflush[i].base && p_end >= pageflush[i].base-pagesize) { + if (p_end > pageflush[i].end) pageflush[i].end = p_end; + pageflush[i].base = p_base; + return; + } + } + if (i == 4) { + /* list full and not mergeable -> flush list */ + emith_update_cache(); + i = 0; + } + pageflush[i].base = p_base, pageflush[i].end = p_end; +} + // peephole optimizer. ATM only tries to reduce interlock #define EMIT_CACHE_SIZE 3 struct emit_op { @@ -48,8 +89,8 @@ static struct emit_op emit_cache[EMIT_CACHE_SIZE+3]; static int emit_index; #define emith_insn_ptr() (u8 *)((u32 *)tcache_ptr-emit_index) -static int emith_pool_index(int tcache_offs); -static void emith_pool_adjust(int pool_index, int move_offs); +static inline int emith_pool_index(int tcache_offs); +static inline void emith_pool_adjust(int pool_index, int move_offs); static NOINLINE void EMIT(u32 op, u32 dst, u32 src) { @@ -1106,6 +1147,7 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) (u8 *)ptr; \ }) +#define emith_jump_cond_inrange(target) !0 #define emith_jump_patch_size() 4 #define emith_jump_at(ptr, target) do { \ @@ -1170,7 +1212,7 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) } while (0) #define host_instructions_updated(base, end) \ - cache_flush_d_inval_i(base, end) + emith_update_add(base, end) #define host_arg2reg(rd, arg) \ rd = arg diff --git a/cpu/drc/emit_arm64.c b/cpu/drc/emit_arm64.c index de587619..8ce2ef38 100644 --- a/cpu/drc/emit_arm64.c +++ b/cpu/drc/emit_arm64.c @@ -1038,6 +1038,9 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) #define emith_jump_cond_patchable(cond, target) \ emith_bcond(tcache_ptr, 1, cond, target) +#define emith_jump_cond_inrange(target) \ + !(((u8 *)target - (u8 *)tcache_ptr + 0x100000) >> 22) + #define emith_jump_patch(ptr, target) ({ \ u32 *ptr_ = (u32 *)ptr; \ u32 disp_ = (u8 *)(target) - (u8 *)(ptr_); \ @@ -1116,6 +1119,7 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) #define emith_insn_ptr() ((u8 *)tcache_ptr) #define emith_flush() /**/ #define host_instructions_updated(base, end) __builtin___clear_cache(base, end) +#define emith_update_cache() /**/ #define emith_jump_patch_size() 8 #define emith_rw_offs_max() 0xff diff --git a/cpu/drc/emit_mips.c b/cpu/drc/emit_mips.c index e200db0a..0e85f92a 100644 --- a/cpu/drc/emit_mips.c +++ b/cpu/drc/emit_mips.c @@ -209,20 +209,25 @@ enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 }; // FIFO for 2 instructions, for delay slot handling u32 emith_last_insns[2] = { -1,-1 }; -int emith_last_idx; +int emith_last_idx, emith_last_cnt; #define EMIT_PUSHOP() \ do { \ emith_last_idx ^= 1; \ - if (emith_last_insns[emith_last_idx] != -1) \ - EMIT_PTR(tcache_ptr, emith_last_insns[emith_last_idx]);\ + if (emith_last_insns[emith_last_idx] != -1) { \ + u32 *p = (u32 *)tcache_ptr - emith_last_cnt; \ + EMIT_PTR(p, emith_last_insns[emith_last_idx]);\ + emith_last_cnt --; \ + } \ emith_last_insns[emith_last_idx] = -1; \ } while (0) #define EMIT(op) \ do { \ EMIT_PUSHOP(); \ + tcache_ptr = (void *)((u32 *)tcache_ptr + 1); \ emith_last_insns[emith_last_idx] = op; \ + emith_last_cnt ++; \ COUNT_OP; \ } while (0) @@ -231,8 +236,7 @@ int emith_last_idx; int i; for (i = 0; i < 2; i++) EMIT_PUSHOP(); \ } while (0) -#define emith_insn_ptr() (u8 *)((u32 *)tcache_ptr + \ - (emith_last_insns[0] != -1) + (emith_last_insns[1] != -1)) +#define emith_insn_ptr() (u8 *)((u32 *)tcache_ptr - emith_last_cnt) // delay slot stuff static int emith_is_j(u32 op) // J, JAL @@ -305,12 +309,14 @@ static void *emith_branch(u32 op) } if (bop) { // can swap + tcache_ptr = (void *)((u32 *)tcache_ptr - emith_last_cnt); if (emith_last_insns[idx^1] != -1) EMIT_PTR(tcache_ptr, emith_last_insns[idx^1]); bp = tcache_ptr; EMIT_PTR(tcache_ptr, bop); COUNT_OP; EMIT_PTR(tcache_ptr, emith_last_insns[idx]); emith_last_insns[0] = emith_last_insns[1] = -1; + emith_last_cnt = 0; } else { // can't swap emith_flush(); bp = tcache_ptr; @@ -325,13 +331,13 @@ static void *emith_branch(u32 op) ptr = emith_branch(MIPS_BCONDZ(cond_m, cond_r, 0)); #define JMP_EMIT(cond, ptr) { \ - u32 val_ = emith_insn_ptr() - (u8 *)(ptr) - 4; \ + u32 val_ = (u8 *)tcache_ptr - (u8 *)(ptr) - 4; \ EMIT_PTR(ptr, MIPS_BCONDZ(cond_m, cond_r, val_ & 0x0003ffff)); \ emith_flush(); /* NO delay slot handling across jump targets */ \ } #define JMP_EMIT_NC(ptr) { \ - u32 val_ = emith_insn_ptr() - (u8 *)(ptr) - 4; \ + u32 val_ = (u8 *)tcache_ptr - (u8 *)(ptr) - 4; \ EMIT_PTR(ptr, MIPS_B(val_ & 0x0003ffff)); \ emith_flush(); \ } @@ -881,14 +887,14 @@ static u8 *last_lohi; static void emith_lohi_nops(void) { u32 d; - while ((d = emith_insn_ptr() - last_lohi) < 8 && d >= 0) EMIT(MIPS_NOP); + while ((d = (u8 *)tcache_ptr - last_lohi) < 8 && d >= 0) EMIT(MIPS_NOP); } #define emith_mul(d, s1, s2) do { \ emith_lohi_nops(); \ EMIT(MIPS_MULTU(s1, s2)); \ EMIT(MIPS_MFLO(d)); \ - last_lohi = emith_insn_ptr(); \ + last_lohi = (u8 *)tcache_ptr; \ } while (0) #define emith_mul_u64(dlo, dhi, s1, s2) do { \ @@ -896,7 +902,7 @@ static void emith_lohi_nops(void) EMIT(MIPS_MULTU(s1, s2)); \ EMIT(MIPS_MFLO(dlo)); \ EMIT(MIPS_MFHI(dhi)); \ - last_lohi = emith_insn_ptr(); \ + last_lohi = (u8 *)tcache_ptr; \ } while (0) #define emith_mul_s64(dlo, dhi, s1, s2) do { \ @@ -904,7 +910,7 @@ static void emith_lohi_nops(void) EMIT(MIPS_MULT(s1, s2)); \ EMIT(MIPS_MFLO(dlo)); \ EMIT(MIPS_MFHI(dhi)); \ - last_lohi = emith_insn_ptr(); \ + last_lohi = (u8 *)tcache_ptr; \ } while (0) #define emith_mula_s64(dlo, dhi, s1, s2) do { \ @@ -915,7 +921,7 @@ static void emith_lohi_nops(void) emith_add_r_r(dlo, AT); \ EMIT(MIPS_SLTU_REG(t_, dlo, AT)); \ EMIT(MIPS_MFHI(AT)); \ - last_lohi = emith_insn_ptr(); \ + last_lohi = (u8 *)tcache_ptr; \ emith_add_r_r(dhi, AT); \ emith_add_r_r(dhi, t_); \ rcache_free_tmp(t_); \ @@ -1174,14 +1180,14 @@ static int emith_cond_check(int cond, int *r) // NB: MIPS conditional branches have only +/- 128KB range #define emith_jump_cond(cond, target) do { \ int r_, mcond_ = emith_cond_check(cond, &r_); \ - u32 disp_ = (u8 *)target - emith_insn_ptr() - 4; \ + u32 disp_ = (u8 *)target - (u8 *)tcache_ptr - 4; \ if (disp_ >= 0xfffe0000 || disp_ <= 0x0001ffff) { /* can use near B */ \ emith_branch(MIPS_BCONDZ(mcond_,r_,disp_ & 0x0003ffff)); \ } else { /* far branch if near branch isn't possible */ \ mcond_ = emith_invert_branch(mcond_); \ u8 *bp = emith_branch(MIPS_BCONDZ(mcond_, r_, 0)); \ emith_branch(MIPS_J((uintptr_t)target & 0x0fffffff)); \ - EMIT_PTR(bp, MIPS_BCONDZ(mcond_, r_, emith_insn_ptr()-bp-4)); \ + EMIT_PTR(bp, MIPS_BCONDZ(mcond_, r_, (u8 *)tcache_ptr-bp-4)); \ } \ } while (0) @@ -1190,9 +1196,12 @@ static int emith_cond_check(int cond, int *r) mcond_ = emith_invert_branch(mcond_); \ u8 *bp = emith_branch(MIPS_BCONDZ(mcond_, r_, 0));\ emith_branch(MIPS_J((uintptr_t)target & 0x0fffffff)); \ - EMIT_PTR(bp, MIPS_BCONDZ(mcond_, r_, emith_insn_ptr()-bp-4)); \ + EMIT_PTR(bp, MIPS_BCONDZ(mcond_, r_, (u8 *)tcache_ptr-bp-4)); \ } while (0) +#define emith_jump_cond_inrange(target) \ + !(((u8 *)target - (u8 *)tcache_ptr + 0x10000) >> 18) + // NB: returns position of patch for cache maintenance #define emith_jump_patch(ptr, target) ({ \ u32 *ptr_ = (u32 *)ptr-1; /* must skip condition check code */ \ @@ -1261,6 +1270,7 @@ static int emith_cond_check(int cond, int *r) #define emith_pool_commit(j) /**/ // NB: mips32r2 has SYNCI #define host_instructions_updated(base, end) __builtin___clear_cache(base, end) +#define emith_update_cache() /**/ #define emith_jump_patch_size() 4 #define emith_rw_offs_max() 0x7fff diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index d515cd23..caade3a6 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -877,6 +877,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common ptr; \ }) +#define emith_jump_cond_inrange(ptr) !0 #define emith_jump_patch_size() 6 #define emith_jump_at(ptr, target) do { \ @@ -986,6 +987,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common } while (0) #define host_instructions_updated(base, end) +#define emith_update_cache() /**/ #define emith_rw_offs_max() 0xffffffff @@ -993,7 +995,6 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common #define HOST_REGS 16 #define PTR_SCALE 3 -#define NA_TMP_REG xAX // non-arg tmp from reg_temp[] #define EMIT_XREX_IF(w, r, rm, rs) do { \ int xr_ = (r) > 7 ? 1 : 0; \ @@ -1078,7 +1079,6 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common #define HOST_REGS 8 #define PTR_SCALE 2 -#define NA_TMP_REG xBX // non-arg tmp from reg_temp[] #define EMIT_REX_IF(w, r, rm) do { \ assert((u32)(r) < 8u); \ diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 677c8adf..6eaf7123 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -2920,6 +2920,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) // mark memory for overwrite detection dr_mark_memory(1, block, tcache_id, 0); block->active = 1; + emith_update_cache(); return block->entryp[0].tcache_ptr; } @@ -3113,8 +3114,15 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) tmp = rcache_get_tmp_arg(0); sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); emith_cmp_r_imm(sr, 0); - emith_move_r_imm_c(DCOND_LE, tmp, pc); - emith_jump_cond(DCOND_LE, sh2_drc_exit); + if (emith_jump_cond_inrange(sh2_drc_exit)) { + emith_move_r_imm_c(DCOND_LE, tmp, pc); + emith_jump_cond(DCOND_LE, sh2_drc_exit); + } else { + EMITH_JMP_START(DCOND_GT); + emith_move_r_imm(tmp, pc); + emith_jump(sh2_drc_exit); + EMITH_JMP_END(DCOND_GT); + } rcache_free_tmp(tmp); #if (DRC_DEBUG & 32) @@ -3249,7 +3257,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } } rcache_set_usage_now(opd[0].source); // current insn - rcache_set_usage_soon(late); // insns 1-3 + rcache_set_usage_soon(soon); // insns 1-3 rcache_set_usage_late(late & ~soon); // insns 4-9 rcache_set_usage_discard(write & ~(late|soon) & ~opd[0].source); @@ -4442,12 +4450,16 @@ end_op: fflush(stdout); #endif + emith_update_cache(); return block_entry_ptr; } static void sh2_generate_utils(void) { int arg0, arg1, arg2, arg3, sr, tmp, tmp2; +#if DRC_DEBUG + int hic = host_insn_count; // don't count utils for insn statistics +#endif host_arg2reg(arg0, 0); host_arg2reg(arg1, 1); @@ -4794,6 +4806,10 @@ static void sh2_generate_utils(void) host_dasm_new_symbol(sh2_drc_read16_poll); host_dasm_new_symbol(sh2_drc_read32_poll); #endif + +#if DRC_DEBUG + host_insn_count = hic; +#endif } static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 nolit, int free) @@ -4847,6 +4863,7 @@ static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 nol bd->addr = bd->size = bd->addr_lit = bd->size_lit = 0; bd->entry_count = 0; } + emith_update_cache(); } static void sh2_smc_rm_blocks(u32 a, int tcache_id, u32 shift) @@ -5197,6 +5214,7 @@ int sh2_drc_init(SH2 *sh2) tcache_ptr = tcache; sh2_generate_utils(); host_instructions_updated(tcache, tcache_ptr); + emith_update_cache(); tcache_bases[0] = tcache_ptrs[0] = tcache_ptr; tcache_limit[0] = tcache_bases[0] + tcache_sizes[0] - (tcache_ptr-tcache); diff --git a/cpu/sh2/compiler.h b/cpu/sh2/compiler.h index 1ad922b7..187ad716 100644 --- a/cpu/sh2/compiler.h +++ b/cpu/sh2/compiler.h @@ -33,26 +33,24 @@ unsigned short scan_block(unsigned int base_pc, int is_slave, #if defined(DRC_SH2) // direct access to some host CPU registers used by the DRC -// XXX MUST match definitions in cpu/sh2/compiler.c +// XXX MUST match definitions for SHR_SR in cpu/sh2/compiler.c #if defined(__arm__) -#define DRC_SR_REG r10 +#define DRC_SR_REG "r10" #elif defined(__aarch64__) -#define DRC_SR_REG r22 +#define DRC_SR_REG "r22" #elif defined(__mips__) -#define DRC_SR_REG s6 +#define DRC_SR_REG "s6" #elif defined(__i386__) -#define DRC_SR_REG edi +#define DRC_SR_REG "edi" #elif defined(__x86_64__) -#define DRC_SR_REG ebx +#define DRC_SR_REG "ebx" #else #warning "direct DRC register access not available for this host" #endif #endif #ifdef DRC_SR_REG -#define __DRC_DECLARE_SR(SR) register int sh2_sr asm(#SR) -#define _DRC_DECLARE_SR(SR) __DRC_DECLARE_SR(SR) -#define DRC_DECLARE_SR _DRC_DECLARE_SR(DRC_SR_REG) +#define DRC_DECLARE_SR register int sh2_sr asm(DRC_SR_REG) #define DRC_SAVE_SR(sh2) \ if ((sh2->state & (SH2_STATE_RUN|SH2_STATE_SLEEP)) == SH2_STATE_RUN) \ sh2->sr = sh2_sr; diff --git a/pico/32x/32x.c b/pico/32x/32x.c index e9d8ff6d..f6d1a153 100644 --- a/pico/32x/32x.c +++ b/pico/32x/32x.c @@ -471,7 +471,7 @@ void sync_sh2s_normal(unsigned int m68k_target) if (!(ssh2.state & SH2_IDLE_STATES)) { cycles = target - ssh2.m68krcycles_done; if (cycles > 0) { - run_sh2(&ssh2, cycles > 20 ? cycles : 20); + run_sh2(&ssh2, cycles > 20U ? cycles : 20U); if (event_time_next && CYCLES_GT(target, event_time_next)) target = event_time_next; @@ -483,7 +483,7 @@ void sync_sh2s_normal(unsigned int m68k_target) if (!(msh2.state & SH2_IDLE_STATES)) { cycles = target - msh2.m68krcycles_done; if (cycles > 0) { - run_sh2(&msh2, cycles > 20 ? cycles : 20); + run_sh2(&msh2, cycles > 20U ? cycles : 20U); if (event_time_next && CYCLES_GT(target, event_time_next)) target = event_time_next; From 58a444a29581e7304c97ce4b0dca79c00b3103a6 Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 17 Sep 2019 23:02:05 +0200 Subject: [PATCH 0217/1110] sh2 drc: rework of register cache to implement basic loop optmization --- cpu/sh2/compiler.c | 608 +++++++++++++++++++++++++++------------------ cpu/sh2/compiler.h | 3 +- 2 files changed, 370 insertions(+), 241 deletions(-) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 6eaf7123..f6fbadaf 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -44,6 +44,7 @@ #define ALIAS_REGISTERS 1 #define REMAP_REGISTER 1 #define LOOP_DETECTION 1 +#define LOOP_OPTIMIZER 1 // limits (per block) #define MAX_BLOCK_SIZE (BLOCK_INSN_LIMIT * 6 * 6) @@ -376,36 +377,41 @@ int rchit, rcmiss; #endif // host register tracking -enum { +enum cache_reg_htype { + HRT_TEMP = 1, // is for temps and args + HRT_REG = 2, // is for sh2 regs + HRT_STATIC = 2, // is for static mappings (same as HRT_REG) +}; + +enum cache_reg_flags { + HRF_DIRTY = 1 << 0, // has "dirty" value to be written to ctx + HRF_PINNED = 1 << 1, // has a pinned mapping +}; + +enum cache_reg_type { HR_FREE, - HR_STATIC, // vreg has a static mapping HR_CACHED, // vreg has sh2_reg_e HR_TEMP, // reg used for temp storage -} cache_reg_type; - -enum { - HRF_DIRTY = 1 << 0, // has "dirty" value to be written to ctx - HRF_LOCKED = 1 << 1, // can't be evicted - HRF_TEMP = 1 << 2, // is for temps and args - HRF_REG = 1 << 3, // is for sh2 regs -} cache_reg_flags; +}; typedef struct { u8 hreg; // "host" reg - u8 flags:4; // TEMP or REG? + u8 htype:2; // TEMP or REG? + u8 flags:2; // DIRTY, PINNED? u8 type:2; // CACHED or TEMP? - u8 ref:2; // ref counter + u8 locked:2; // LOCKED reference counter u16 stamp; // kind of a timestamp u32 gregs; // "guest" reg mask } cache_reg_t; // guest register tracking -enum { +enum guest_reg_flags { GRF_DIRTY = 1 << 0, // reg has "dirty" value to be written to ctx GRF_CONST = 1 << 1, // reg has a constant GRF_CDIRTY = 1 << 2, // constant not yet written to ctx GRF_STATIC = 1 << 3, // reg has static mapping to vreg -} guest_reg_flags; + GRF_PINNED = 1 << 4, // reg has pinned mapping to vreg +}; typedef struct { u8 flags; // guest flags: is constant, is dirty? @@ -419,13 +425,14 @@ typedef struct { static int rcache_get_tmp(void); static void rcache_free_tmp(int hr); -// Note: cache_regs[] must have at least the amount of HRF_REG registers used +// Note: cache_regs[] must have at least the amount of REG/TEMP registers used // by handlers in worst case (currently 4). // Register assignment goes by ABI convention. Caller save registers are TEMP, // the others are either static or REG. SR must be static, R0 very recommended. +// XXX the static definition of SR MUST match that in compiler.h // VBR, PC, PR must not be static (read from context in utils). -// TEMP registers first, REG last. alloc/evict algorithm depends on this. -// The 1st TEMP must not be RET_REG on platforms using temps in insns (eg. x86). +// RET_REG/params should be first TEMPs to avoid allocation conflicts in calls. +// There MUST be at least 3 params and one non-RET_REG/param TEMP. // XXX shouldn't this be somehow defined in the code emitters? #ifdef __arm__ #include "../drc/emit_arm.c" @@ -449,21 +456,21 @@ static guest_reg_t guest_regs[] = { // OABI/EABI: params: r0-r3, return: r0-r1, temp: r12,r14, saved: r4-r8,r10,r11 // SP,PC: r13,r15 must not be used. saved: r9 (for platform use, e.g. on ios) static cache_reg_t cache_regs[] = { - { 12, HRF_TEMP }, // temps - { 14, HRF_TEMP }, - { 3, HRF_TEMP }, // params - { 2, HRF_TEMP }, - { 1, HRF_TEMP }, - { 0, HRF_TEMP }, // RET_REG - { 8, HRF_LOCKED }, // statics + { 0, HRT_TEMP }, // RET_REG, params + { 1, HRT_TEMP }, + { 2, HRT_TEMP }, // params + { 3, HRT_TEMP }, + { 12, HRT_TEMP }, // temps + { 14, HRT_TEMP }, + { 8, HRT_STATIC }, // statics #ifndef __MACH__ // no r9.. - { 9, HRF_LOCKED }, + { 9, HRT_STATIC }, #endif - { 10, HRF_LOCKED }, - { 4, HRF_REG }, // other regs - { 5, HRF_REG }, - { 6, HRF_REG }, - { 7, HRF_REG }, + { 10, HRT_STATIC }, + { 4, HRT_REG }, // other regs + { 5, HRT_REG }, + { 6, HRT_REG }, + { 7, HRT_REG }, }; #elif defined(__aarch64__) @@ -485,35 +492,34 @@ static guest_reg_t guest_regs[] = { // saved: r18 (for platform use) // since drc never needs more than 4 parameters, r4-r7 are treated as temp. static cache_reg_t cache_regs[] = { - { 17, HRF_TEMP }, // temps - { 16, HRF_TEMP }, - { 15, HRF_TEMP }, - { 14, HRF_TEMP }, - { 13, HRF_TEMP }, - { 12, HRF_TEMP }, - { 11, HRF_TEMP }, - { 10, HRF_TEMP }, - { 9, HRF_TEMP }, - { 8, HRF_TEMP }, - { 7, HRF_TEMP }, - { 6, HRF_TEMP }, - { 5, HRF_TEMP }, - { 4, HRF_TEMP }, - { 3, HRF_TEMP }, // params - { 2, HRF_TEMP }, - { 1, HRF_TEMP }, - { 0, HRF_TEMP }, // RET_REG - { 22, HRF_LOCKED }, // statics - { 21, HRF_LOCKED }, - { 20, HRF_LOCKED }, - { 29, HRF_REG }, // other regs - { 28, HRF_REG }, - { 27, HRF_REG }, - { 26, HRF_REG }, - { 25, HRF_REG }, - { 24, HRF_REG }, - { 23, HRF_REG }, - { 22, HRF_REG }, + { 0, HRT_TEMP }, // RET_REG, params + { 1, HRT_TEMP }, + { 2, HRT_TEMP }, // params + { 3, HRT_TEMP }, + { 4, HRT_TEMP }, // temps + { 5, HRT_TEMP }, + { 6, HRT_TEMP }, + { 7, HRT_TEMP }, + { 8, HRT_TEMP }, + { 9, HRT_TEMP }, + { 10, HRT_TEMP }, + { 11, HRT_TEMP }, + { 12, HRT_TEMP }, + { 13, HRT_TEMP }, + { 14, HRT_TEMP }, + { 15, HRT_TEMP }, + { 16, HRT_TEMP }, + { 17, HRT_TEMP }, + { 20, HRT_STATIC }, // statics + { 21, HRT_STATIC }, + { 22, HRT_STATIC }, + { 23, HRT_REG }, // other regs + { 24, HRT_REG }, + { 25, HRT_REG }, + { 26, HRT_REG }, + { 27, HRT_REG }, + { 28, HRT_REG }, + { 29, HRT_REG }, }; #elif defined(__mips__) @@ -521,13 +527,13 @@ static cache_reg_t cache_regs[] = { static guest_reg_t guest_regs[] = { // SHR_R0 .. SHR_SP - {GRF_STATIC, 20} , {GRF_STATIC, 21} , { 0 } , { 0 } , + {GRF_STATIC, 16} , {GRF_STATIC, 17} , { 0 } , { 0 } , { 0 } , { 0 } , { 0 } , { 0 } , { 0 } , { 0 } , { 0 } , { 0 } , { 0 } , { 0 } , { 0 } , { 0 } , // SHR_PC, SHR_PPC, SHR_PR, SHR_SR, // SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL, - { 0 } , { 0 } , { 0 } , {GRF_STATIC, 22} , + { 0 } , { 0 } , { 0 } , {GRF_STATIC, 18} , { 0 } , { 0 } , { 0 } , { 0 } , }; @@ -535,26 +541,26 @@ static guest_reg_t guest_regs[] = { // saved: r16-r23,r30, reserved: r0(zero), r26-r27(irq), r28(gp), r29(sp) // r1,r15,r24,r25 are used internally by the code emitter static cache_reg_t cache_regs[] = { - { 14, HRF_TEMP }, // temps - { 13, HRF_TEMP }, - { 12, HRF_TEMP }, - { 11, HRF_TEMP }, - { 10, HRF_TEMP }, - { 9, HRF_TEMP }, - { 8, HRF_TEMP }, - { 7, HRF_TEMP }, // params - { 6, HRF_TEMP }, - { 5, HRF_TEMP }, - { 4, HRF_TEMP }, - { 3, HRF_TEMP }, // RET_REG - { 2, HRF_TEMP }, - { 22, HRF_LOCKED }, // statics - { 21, HRF_LOCKED }, - { 20, HRF_LOCKED }, - { 19, HRF_REG }, // other regs - { 18, HRF_REG }, - { 17, HRF_REG }, - { 16, HRF_REG }, + { 2, HRT_TEMP }, // RET_REG (v0-v1) + { 3, HRT_TEMP }, + { 4, HRT_TEMP }, // params (a0-a3) + { 5, HRT_TEMP }, + { 6, HRT_TEMP }, + { 7, HRT_TEMP }, + { 8, HRT_TEMP }, // temps (t0-t6) + { 9, HRT_TEMP }, + { 10, HRT_TEMP }, + { 11, HRT_TEMP }, + { 12, HRT_TEMP }, + { 13, HRT_TEMP }, + { 14, HRT_TEMP }, + { 16, HRT_STATIC }, // statics (s0-s2) + { 17, HRT_STATIC }, + { 18, HRT_STATIC }, + { 19, HRT_REG }, // other regs (s3-s6) + { 20, HRT_REG }, + { 21, HRT_REG }, + { 22, HRT_REG }, }; #elif defined(__i386__) @@ -572,14 +578,16 @@ static guest_reg_t guest_regs[] = { { 0 } , { 0 } , { 0 } , { 0 } , }; -// ax, cx, dx are usually temporaries by convention +// MS/SystemV ABI: ebx,esi,edi,ebp are preserved, eax,ecx,edx are temporaries +// DRC uses REGPARM to pass upto 3 parameters in registers eax,ecx,edx. +// To avoid conflicts with param passing ebx must be declared temp here. static cache_reg_t cache_regs[] = { - { xBX, HRF_REG|HRF_TEMP }, // params - { xCX, HRF_REG|HRF_TEMP }, - { xDX, HRF_REG|HRF_TEMP }, - { xAX, HRF_REG|HRF_TEMP }, // return value - { xSI, HRF_LOCKED }, // statics - { xDI, HRF_LOCKED }, + { xAX, HRT_TEMP }, // RET_REG, param + { xDX, HRT_TEMP }, // params + { xCX, HRT_TEMP }, + { xBX, HRT_TEMP }, // temp + { xSI, HRT_STATIC }, // statics + { xDI, HRT_STATIC }, }; #elif defined(__x86_64__) @@ -602,20 +610,20 @@ static guest_reg_t guest_regs[] = { // rsi,rdi are preserved in M$ ABI, temporary in SystemV ABI // parameters in rcx,rdx,r8,r9, SystemV ABI additionally uses rsi,rdi static cache_reg_t cache_regs[] = { - { xR10,HRF_TEMP }, // temps - { xR11,HRF_TEMP }, - { xAX, HRF_TEMP }, // RET_REG - { xR8, HRF_TEMP }, // params - { xR9, HRF_TEMP }, - { xCX, HRF_TEMP }, - { xDX, HRF_TEMP }, - { xSI, HRF_REG|HRF_TEMP }, - { xDI, HRF_REG|HRF_TEMP }, - { xBX, HRF_LOCKED }, // statics - { xR12,HRF_LOCKED }, - { xR13,HRF_REG }, // other regs - { xR14,HRF_REG }, - { xR15,HRF_REG }, + { xAX, HRT_TEMP }, // RET_REG + { xDX, HRT_TEMP }, // params + { xCX, HRT_TEMP }, + { xDI, HRT_TEMP }, + { xSI, HRT_TEMP }, + { xR8, HRT_TEMP }, + { xR9, HRT_TEMP }, + { xR10,HRT_TEMP }, // temps + { xR11,HRT_TEMP }, + { xBX, HRT_STATIC }, // statics + { xR12,HRT_STATIC }, + { xR13,HRT_REG }, // other regs + { xR14,HRT_REG }, + { xR15,HRT_REG }, }; #else @@ -1333,8 +1341,8 @@ static void rcache_remap_vreg(int x); printf(" cache_regs:\n"); \ for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { \ cp = &cache_regs[i]; \ - if (cp->type != HR_FREE || cp->gregs || (cp->flags & ~(HRF_REG|HRF_TEMP))) \ - printf(" %d: hr=%d t=%d f=%x c=%d m=%x\n", i, cp->hreg, cp->type, cp->flags, cp->ref, cp->gregs); \ + if (cp->type != HR_FREE || cp->gregs || cp->locked || cp->flags) \ + printf(" %d: hr=%d t=%d f=%x c=%d m=%x\n", i, cp->hreg, cp->type, cp->flags, cp->locked, cp->gregs); \ } \ printf(" guest_regs:\n"); \ for (i = 0; i < ARRAY_SIZE(guest_regs); i++) { \ @@ -1352,9 +1360,10 @@ static void rcache_remap_vreg(int x); #define RCACHE_CHECK(msg) { \ cache_reg_t *cp; \ guest_reg_t *gp; \ - int i, x, d = 0; \ + int i, x, m = 0, d = 0; \ for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { \ cp = &cache_regs[i]; \ + if (cp->flags & HRF_PINNED) m |= (1 << i); \ if (cp->type == HR_FREE || cp->type == HR_TEMP) continue; \ /* check connectivity greg->vreg */ \ FOR_ALL_BITS_SET_DO(cp->gregs, x, \ @@ -1366,12 +1375,17 @@ static void rcache_remap_vreg(int x); gp = &guest_regs[i]; \ if (gp->vreg != -1 && !(cache_regs[gp->vreg].gregs & (1 << i))) \ { d = 1; printf("cache check r=%d v=%d not connected?\n", i, gp->vreg); }\ - if (gp->vreg != -1 && cache_regs[gp->vreg].type != HR_STATIC && cache_regs[gp->vreg].type != HR_CACHED) \ + if (gp->vreg != -1 && cache_regs[gp->vreg].type != HR_CACHED) \ { d = 1; printf("cache check r=%d v=%d wrong type?\n", i, gp->vreg); }\ if ((gp->flags & GRF_CONST) && !(gconsts[gp->cnst].gregs & (1 << i))) \ { d = 1; printf("cache check r=%d c=%d not connected?\n", i, gp->cnst); }\ - if ((gp->flags & GRF_CDIRTY) && (gp->vreg != -1 || !(gp->flags & GRF_CONST)) )\ + if ((gp->flags & GRF_CDIRTY) && (gp->vreg != -1 || !(gp->flags & GRF_CONST)))\ { d = 1; printf("cache check r=%d CDIRTY?\n", i); } \ + if (gp->flags & GRF_PINNED) { \ + if (gp->sreg == -1 || !(cache_regs[gp->sreg].flags & HRF_PINNED))\ + { d = 1; printf("cache check r=%d v=%d not pinned?\n", i, gp->vreg); } \ + else m &= ~(1 << gp->sreg); \ + } \ } \ for (i = 0; i < ARRAY_SIZE(gconsts); i++) { \ FOR_ALL_BITS_SET_DO(gconsts[i].gregs, x, \ @@ -1379,13 +1393,15 @@ static void rcache_remap_vreg(int x); { d = 1; printf("cache check c=%d v=%d not connected?\n",i,x); } \ ) \ } \ + if (m) \ + { d = 1; printf("cache check m=%x pinning wrong?\n",m); } \ if (d) RCACHE_DUMP(msg) \ /* else { \ printf("locked regs %s:\n",msg); \ for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { \ cp = &cache_regs[i]; \ - if (cp->flags & HRF_LOCKED) \ - printf(" %d: hr=%d t=%d f=%x c=%d m=%x\n", i, cp->hreg, cp->type, cp->flags, cp->ref, cp->gregs); \ + if (cp->locked) \ + printf(" %d: hr=%d t=%d f=%x c=%d m=%x\n", i, cp->hreg, cp->type, cp->flags, cp->locked, cp->gregs); \ } \ } */ \ } @@ -1463,8 +1479,7 @@ static int gconst_try_read(int vreg, sh2_reg_e r) guest_regs[i].flags &= ~GRF_CDIRTY; guest_regs[i].flags |= GRF_DIRTY; }); - if (cache_regs[vreg].type != HR_STATIC) - cache_regs[vreg].type = HR_CACHED; + cache_regs[vreg].type = HR_CACHED; cache_regs[vreg].flags |= HRF_DIRTY; return 1; } @@ -1527,6 +1542,7 @@ static void gconst_invalidate(void) static u16 rcache_counter; // SH2 register usage bitmasks +static u32 rcache_hregs_reg; // regs of type HRT_REG (for pinning) static u32 rcache_regs_static; // statically allocated regs static u32 rcache_regs_now; // regs used in current insn static u32 rcache_regs_soon; // regs used in the next few insns @@ -1539,28 +1555,33 @@ static u32 rcache_regs_clean; // regs needing cleaning #define rcache_regs_nowsoon (rcache_regs_now|rcache_regs_soon) #define rcache_regs_soonclean (rcache_regs_soon|rcache_regs_clean) -static void rcache_ref_vreg(int x) +static void rcache_lock_vreg(int x) { if (x >= 0) { - cache_regs[x].ref ++; - cache_regs[x].flags |= HRF_LOCKED; + if (cache_regs[x].type == HR_FREE) { + printf("locking free vreg %x, aborting\n", x); + exit(1); + } + cache_regs[x].locked ++; } } -static void rcache_unref_vreg(int x) +static void rcache_unlock_vreg(int x) { - if (x >= 0 && -- cache_regs[x].ref == 0) { - cache_regs[x].flags &= ~HRF_LOCKED; + if (x >= 0) { + if (cache_regs[x].type == HR_FREE) { + printf("unlocking free vreg %x, aborting\n", x); + exit(1); + } + cache_regs[x].locked --; } } static void rcache_free_vreg(int x) { - if (cache_regs[x].type != HR_STATIC) - cache_regs[x].type = HR_FREE; - cache_regs[x].flags &= (HRF_REG|HRF_TEMP); + cache_regs[x].type = cache_regs[x].locked ? HR_TEMP : HR_FREE; + cache_regs[x].flags &= HRF_PINNED; cache_regs[x].gregs = 0; - cache_regs[x].ref = 0; } static void rcache_unmap_vreg(int x) @@ -1582,12 +1603,11 @@ static void rcache_move_vreg(int d, int x) { int i; - if (cache_regs[d].type != HR_STATIC) - cache_regs[d].type = HR_CACHED; + cache_regs[d].type = HR_CACHED; cache_regs[d].gregs = cache_regs[x].gregs; - cache_regs[d].flags &= (HRF_TEMP|HRF_REG); - cache_regs[d].flags |= cache_regs[x].flags & ~(HRF_TEMP|HRF_REG); - cache_regs[d].ref = 0; + cache_regs[d].flags &= HRF_PINNED; + cache_regs[d].flags |= cache_regs[x].flags & ~HRF_PINNED; + cache_regs[d].locked = 0; cache_regs[d].stamp = cache_regs[x].stamp; emith_move_r_r(cache_regs[d].hreg, cache_regs[x].hreg); for (i = 0; i < ARRAY_SIZE(guest_regs); i++) @@ -1602,12 +1622,12 @@ static void rcache_clean_vreg(int x) if (cache_regs[x].flags & HRF_DIRTY) { // writeback cache_regs[x].flags &= ~HRF_DIRTY; - rcache_ref_vreg(x); + rcache_lock_vreg(x); FOR_ALL_BITS_SET_DO(cache_regs[x].gregs, r, if (guest_regs[r].flags & GRF_DIRTY) { - if (guest_regs[r].flags & GRF_STATIC) { + if (guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) { if (guest_regs[r].vreg != guest_regs[r].sreg) { - if (!(cache_regs[guest_regs[r].sreg].flags & HRF_LOCKED)) { + if (!(cache_regs[guest_regs[r].sreg].locked)) { // statically mapped reg not in its sreg. move back to sreg rcache_evict_vreg(guest_regs[r].sreg); emith_move_r_r(cache_regs[guest_regs[r].sreg].hreg, @@ -1623,7 +1643,7 @@ static void rcache_clean_vreg(int x) rcache_remove_vreg_alias(x, r); } } else - cache_regs[x].flags |= HRF_DIRTY; + cache_regs[x].flags |= HRF_DIRTY; } else { if (~rcache_regs_discard & (1 << r)) emith_ctx_write(cache_regs[x].hreg, r * 4); @@ -1631,8 +1651,9 @@ static void rcache_clean_vreg(int x) } rcache_regs_clean &= ~(1 << r); }) - rcache_unref_vreg(x); + rcache_unlock_vreg(x); } + #if DRC_DEBUG & 64 RCACHE_CHECK("after clean"); #endif @@ -1642,16 +1663,19 @@ static void rcache_add_vreg_alias(int x, sh2_reg_e r) { cache_regs[x].gregs |= (1 << r); guest_regs[r].vreg = x; - if (cache_regs[x].type != HR_STATIC) - cache_regs[x].type = HR_CACHED; + cache_regs[x].type = HR_CACHED; } static void rcache_remove_vreg_alias(int x, sh2_reg_e r) { cache_regs[x].gregs &= ~(1 << r); - if (!cache_regs[x].gregs) + if (!cache_regs[x].gregs) { // no reg mapped -> free vreg - rcache_free_vreg(x); + if (cache_regs[x].locked) + cache_regs[x].type = HR_TEMP; + else + rcache_free_vreg(x); + } guest_regs[r].vreg = -1; } @@ -1674,17 +1698,17 @@ static void rcache_evict_vreg_aliases(int x, sh2_reg_e r) static int rcache_allocate(int what, int minprio) { - // evict reg with oldest stamp (only for HRF_REG, no temps) + // evict reg with oldest stamp (only for HRT_REG, no temps) int i, i_prio, oldest = -1, prio = 0; u16 min_stamp = (u16)-1; - for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { - // consider only unlocked REG or non-TEMP - if (cache_regs[i].flags == 0 || (cache_regs[i].flags & HRF_LOCKED)) + for (i = ARRAY_SIZE(cache_regs)-1; i >= 0; i--) { + // consider only non-static, unpinned, unlocked REG or TEMP + if ((cache_regs[i].flags & HRF_PINNED) || cache_regs[i].locked) continue; - if ((what > 0 && !(cache_regs[i].flags & HRF_REG)) || - (what == 0 && (cache_regs[i].flags & HRF_TEMP)) || - (what < 0 && !(cache_regs[i].flags & HRF_TEMP))) + if ((what > 0 && !(cache_regs[i].htype & HRT_REG)) || // get a REG + (what == 0 && (cache_regs[i].htype & HRT_TEMP)) || // get a non-TEMP + (what < 0 && !(cache_regs[i].htype & HRT_TEMP))) // get a TEMP continue; if (cache_regs[i].type == HR_FREE || cache_regs[i].type == HR_TEMP) { // REG is free @@ -1731,17 +1755,18 @@ static int rcache_allocate(int what, int minprio) static int rcache_allocate_vreg(int needed) { int x; - - // get a free reg, but use temps only if r is not needed soon - for (x = ARRAY_SIZE(cache_regs) - 1; x >= 0; x--) { - if (cache_regs[x].flags && (cache_regs[x].type == HR_FREE || - (cache_regs[x].type == HR_TEMP && !(cache_regs[x].flags & HRF_LOCKED))) && - (!needed || (cache_regs[x].flags & HRF_REG))) - break; - } - - if (x < 0) + + if (needed) { + // needed soon, try getting a REG 1st, use a TEMP only if none is available x = rcache_allocate(1, 0); + if (x < 0) + x = rcache_allocate(-1, 1); + } else { + // not needed, try getting a TEMP 1st, use a REG only if none is available + x = rcache_allocate(-1, 1); + if (x < 0) + x = rcache_allocate(1, 0); + } return x; } @@ -1753,17 +1778,7 @@ static int rcache_allocate_nontemp(void) static int rcache_allocate_temp(void) { - int x; - - // use any free reg, but prefer TEMP regs - for (x = 0; x < ARRAY_SIZE(cache_regs); x++) { - if (cache_regs[x].flags && (cache_regs[x].type == HR_FREE || - (cache_regs[x].type == HR_TEMP && !(cache_regs[x].flags & HRF_LOCKED)))) - break; - } - - if (x >= ARRAY_SIZE(cache_regs)) - x = rcache_allocate(-1, 1); + int x = rcache_allocate(-1, 1); if (x < 0) { printf("no temp register available, aborting\n"); exit(1); @@ -1788,14 +1803,14 @@ static int rcache_map_reg(sh2_reg_e r, int hr, int mode) } // deal with statically mapped regs - if (mode == RC_GR_RMW && (guest_regs[r].flags & GRF_STATIC)) { + if (mode == RC_GR_RMW && (guest_regs[r].flags & (GRF_STATIC|GRF_PINNED))) { x = guest_regs[r].sreg; if (guest_regs[r].vreg == x) { // STATIC in its sreg with no aliases, and some processing pending if (cache_regs[x].gregs == 1 << r) return cache_regs[x].hreg; } else if (cache_regs[x].type == HR_FREE || - (cache_regs[x].type == HR_TEMP && !(cache_regs[x].flags & HRF_LOCKED))) + (cache_regs[x].type == HR_TEMP && !cache_regs[x].locked)) // STATIC not in its sreg, with sreg available -> move it i = guest_regs[r].sreg; } @@ -1806,14 +1821,13 @@ static int rcache_map_reg(sh2_reg_e r, int hr, int mode) if (cache_regs[i].type == HR_CACHED) rcache_evict_vreg(i); // set new mappping - if (cache_regs[i].type != HR_STATIC) - cache_regs[i].type = HR_CACHED; + cache_regs[i].type = HR_CACHED; cache_regs[i].gregs = 1 << r; - cache_regs[i].flags &= (HRF_TEMP|HRF_REG); - cache_regs[i].ref = 0; + cache_regs[i].flags &= HRF_PINNED; + cache_regs[i].locked = 0; cache_regs[i].stamp = ++rcache_counter; cache_regs[i].flags |= HRF_DIRTY; - rcache_ref_vreg(i); + rcache_lock_vreg(i); guest_regs[r].flags |= GRF_DIRTY; guest_regs[r].vreg = i; #if DRC_DEBUG & 64 @@ -1828,25 +1842,25 @@ static void rcache_remap_vreg(int x) int d; // x must be a cached vreg - if (cache_regs[x].type != HR_CACHED && cache_regs[x].type != HR_STATIC) + if (cache_regs[x].type != HR_CACHED) return; // don't do it if x is already a REG or isn't used or to be cleaned anyway - if ((cache_regs[x].flags & HRF_REG) || + if ((cache_regs[x].htype & HRT_REG) || !(rcache_regs_used & ~rcache_regs_clean & cache_regs[x].gregs)) { // clean here to avoid data loss on invalidation rcache_clean_vreg(x); return; } - if (cache_regs[x].flags & HRF_LOCKED) { + if (cache_regs[x].locked) { printf("remap vreg %d is locked\n", x); exit(1); } // allocate a non-TEMP vreg - rcache_ref_vreg(x); // lock to avoid evicting x + rcache_lock_vreg(x); // lock to avoid evicting x d = rcache_allocate_nontemp(); - rcache_unref_vreg(x); + rcache_unlock_vreg(x); if (d < 0) { rcache_clean_vreg(x); return; @@ -1901,10 +1915,10 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr dst = src = guest_regs[r].vreg; - rcache_ref_vreg(src); // lock to avoid evicting src + rcache_lock_vreg(src); // lock to avoid evicting src // good opportunity to relocate a remapped STATIC? - if ((guest_regs[r].flags & GRF_STATIC) && src != guest_regs[r].sreg && - !(cache_regs[guest_regs[r].sreg].flags & HRF_LOCKED) && + if ((guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) && src != guest_regs[r].sreg && + !cache_regs[guest_regs[r].sreg].locked && (src < 0 || mode != RC_GR_READ) && !(rcache_regs_nowsoon & cache_regs[guest_regs[r].sreg].gregs)) { dst = guest_regs[r].sreg; @@ -1918,10 +1932,10 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr } tr = &cache_regs[dst]; tr->stamp = rcache_counter; - rcache_unref_vreg(src); // remove r from src if (src >= 0 && src != dst) rcache_remove_vreg_alias(src, r); + rcache_unlock_vreg(src); // if r has a constant it may have aliases if (mode != RC_GR_WRITE && gconst_try_read(dst, r)) @@ -1932,24 +1946,26 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr if (mode != RC_GR_READ && src == dst && ali) { int x = -1; if (rcache_regs_nowsoon & ali) { - if (tr->type == HR_STATIC && guest_regs[r].sreg == dst && - !(tr->flags & HRF_LOCKED)) { + if ((guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) && + guest_regs[r].sreg == dst && !tr->locked) { // split aliases if r is STATIC in sreg and dst isn't already locked - rcache_ref_vreg(dst); // lock to avoid evicting dst - if ((x = rcache_allocate_vreg(rcache_regs_nowsoon & ali)) >= 0) { + rcache_lock_vreg(dst); // lock to avoid evicting dst + x = rcache_allocate_vreg(rcache_regs_nowsoon & ali); + rcache_unlock_vreg(dst); + if (x >= 0) { src = x; rcache_move_vreg(src, dst); } - rcache_unref_vreg(dst); } else { // split r - rcache_ref_vreg(src); // lock to avoid evicting src - if ((x = rcache_allocate_vreg(rcache_regs_nowsoon & (1 << r))) >= 0) { + rcache_lock_vreg(src); // lock to avoid evicting src + x = rcache_allocate_vreg(rcache_regs_nowsoon & (1 << r)); + rcache_unlock_vreg(src); + if (x >= 0) { dst = x; tr = &cache_regs[dst]; tr->stamp = rcache_counter; } - rcache_unref_vreg(src); } } if (x < 0) @@ -1967,13 +1983,13 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr emith_ctx_read(tr->hreg, r * 4); if (hr) { *hr = (src >= 0 ? cache_regs[src].hreg : tr->hreg); - rcache_ref_vreg(reg_map_host[*hr]); - } else if (src >= 0 && cache_regs[src].hreg != tr->hreg) + rcache_lock_vreg(src >= 0 ? src : dst); + } else if (src >= 0 && mode != RC_GR_WRITE && cache_regs[src].hreg != tr->hreg) emith_move_r_r(tr->hreg, cache_regs[src].hreg); // housekeeping if (do_locking) - rcache_ref_vreg(dst); + rcache_lock_vreg(dst); if (mode != RC_GR_READ) { tr->flags |= HRF_DIRTY; guest_regs[r].flags |= GRF_DIRTY; @@ -1990,14 +2006,42 @@ static int rcache_get_reg(sh2_reg_e r, rc_gr_mode mode, int *hr) return rcache_get_reg_(r, mode, 1, hr); } +static void rcache_pin_reg(sh2_reg_e r) +{ + int hr, x; + + // don't pin if static or already pinned + if (guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) + return; + + rcache_regs_soon |= (1 << r); // kludge to prevent allocation of a temp + hr = rcache_get_reg_(r, RC_GR_RMW, 0, NULL); + x = reg_map_host[hr]; + + // can only pin non-TEMPs + if (!(cache_regs[x].htype & HRT_TEMP)) { + guest_regs[r].flags |= GRF_PINNED; + cache_regs[x].flags |= HRF_PINNED; + guest_regs[r].sreg = x; + } +#if DRC_DEBUG & 64 + RCACHE_CHECK("after pin"); +#endif +} + static int rcache_get_tmp(void) { int i; i = rcache_allocate_temp(); - rcache_ref_vreg(i); + if (i < 0) { + printf("cannot allocate temp\n"); + exit(1); + } cache_regs[i].type = HR_TEMP; + rcache_lock_vreg(i); + return cache_regs[i].hreg; } @@ -2006,14 +2050,14 @@ static int rcache_get_vreg_hr(int hr) int i; i = reg_map_host[hr]; - if (i < 0 || (cache_regs[i].flags & HRF_LOCKED)) { + if (i < 0 || cache_regs[i].locked) { printf("host register %d is locked\n", hr); exit(1); } if (cache_regs[i].type == HR_CACHED) rcache_evict_vreg(i); - else if (cache_regs[i].type == HR_TEMP && (cache_regs[i].flags & HRF_LOCKED)) { + else if (cache_regs[i].type == HR_TEMP && cache_regs[i].locked) { printf("host reg %d already used, aborting\n", hr); exit(1); } @@ -2034,7 +2078,7 @@ static int rcache_get_tmp_arg(int arg) { int x = rcache_get_vreg_arg(arg); cache_regs[x].type = HR_TEMP; - rcache_ref_vreg(x); + rcache_lock_vreg(x); return cache_regs[x].hreg; } @@ -2044,7 +2088,7 @@ static int rcache_get_tmp_ret(void) { int x = rcache_get_vreg_hr(RET_REG); cache_regs[x].type = HR_TEMP; - rcache_ref_vreg(x); + rcache_lock_vreg(x); return cache_regs[x].hreg; } @@ -2094,11 +2138,11 @@ static int rcache_get_reg_arg(int arg, sh2_reg_e r, int *hr) } else { *hr = srcr; if (dstr != srcr) // must lock srcr if not copied here - rcache_ref_vreg(reg_map_host[srcr]); + rcache_lock_vreg(reg_map_host[srcr]); } cache_regs[dstid].stamp = ++rcache_counter; - rcache_ref_vreg(dstid); + rcache_lock_vreg(dstid); #if DRC_DEBUG & 64 RCACHE_CHECK("after getarg"); #endif @@ -2114,7 +2158,7 @@ static void rcache_free_tmp(int hr) exit(1); } - rcache_free_vreg(i); + rcache_unlock_vreg(i); } // saves temporary result either in REG or in drctmp @@ -2133,10 +2177,10 @@ static int rcache_save_tmp(int hr) cache_regs[i].type = HR_CACHED; cache_regs[i].gregs = 0; // not storing any guest register - cache_regs[i].flags &= (HRF_TEMP|HRF_REG); - cache_regs[i].ref = 0; + cache_regs[i].flags &= HRF_PINNED; + cache_regs[i].locked = 0; cache_regs[i].stamp = ++rcache_counter; - rcache_ref_vreg(i); + rcache_lock_vreg(i); emith_move_r_r(cache_regs[i].hreg, hr); rcache_free_tmp(hr); return i; @@ -2167,17 +2211,13 @@ static int rcache_restore_tmp(int x) static void rcache_free(int hr) { int x = reg_map_host[hr]; - if (cache_regs[x].type == HR_TEMP) - rcache_free_tmp(hr); - else - rcache_unref_vreg(x); + rcache_unlock_vreg(x); } static void rcache_unlock(int x) { if (x >= 0) { - cache_regs[x].flags &= ~HRF_LOCKED; - cache_regs[x].ref = 0; + cache_regs[x].locked = 0; // rcache_regs_now &= ~cache_regs[x].gregs; } } @@ -2185,10 +2225,34 @@ static void rcache_unlock(int x) static void rcache_unlock_all(void) { int i; - for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { - cache_regs[i].flags &= ~HRF_LOCKED; - cache_regs[i].ref = 0; + for (i = 0; i < ARRAY_SIZE(cache_regs); i++) + cache_regs[i].locked = 0; +} + +static void rcache_unpin_all(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(guest_regs); i++) { + if (guest_regs[i].flags & GRF_PINNED) { + guest_regs[i].flags &= ~GRF_PINNED; + cache_regs[guest_regs[i].sreg].flags &= ~HRF_PINNED; + guest_regs[i].sreg = -1; + } } +#if DRC_DEBUG & 64 + RCACHE_CHECK("after unpin"); +#endif +} + +static void rcache_save_pinned(void) +{ + int i; + + // save pinned regs to context + for (i = 0; i < ARRAY_SIZE(guest_regs); i++) + if ((guest_regs[i].flags & GRF_PINNED) && guest_regs[i].vreg >= 0) + emith_ctx_write(cache_regs[guest_regs[i].vreg].hreg, i * 4); } static inline void rcache_set_usage_now(u32 mask) @@ -2222,7 +2286,7 @@ static inline int rcache_is_hreg_used(int hr) int x = reg_map_host[hr]; // is hr in use? return cache_regs[x].type != HR_FREE && - (cache_regs[x].type != HR_TEMP || (cache_regs[x].flags & HRF_LOCKED)); + (cache_regs[x].type != HR_TEMP || cache_regs[x].locked); } static inline u32 rcache_used_hregs_mask(void) @@ -2231,8 +2295,8 @@ static inline u32 rcache_used_hregs_mask(void) int i; for (i = 0; i < ARRAY_SIZE(cache_regs); i++) - if ((cache_regs[i].flags & HRF_TEMP) && cache_regs[i].type != HR_FREE && - (cache_regs[i].type != HR_TEMP || (cache_regs[i].flags & HRF_LOCKED))) + if ((cache_regs[i].htype & HRT_TEMP) && cache_regs[i].type != HR_FREE && + (cache_regs[i].type != HR_TEMP || cache_regs[i].locked)) mask |= 1 << cache_regs[i].hreg; return mask; @@ -2257,7 +2321,7 @@ static inline u32 rcache_cached_mask(void) int i; for (i = 0; i < ARRAY_SIZE(cache_regs); i++) - if (cache_regs[i].type == HR_CACHED || cache_regs[i].type == HR_STATIC) + if (cache_regs[i].type == HR_CACHED) mask |= cache_regs[i].gregs; return mask; @@ -2269,7 +2333,7 @@ static void rcache_clean_tmp(void) rcache_regs_clean = (1 << ARRAY_SIZE(guest_regs)) - 1; for (i = 0; i < ARRAY_SIZE(cache_regs); i++) - if (cache_regs[i].type == HR_CACHED && (cache_regs[i].flags & HRF_TEMP)) { + if (cache_regs[i].type == HR_CACHED && (cache_regs[i].htype & HRT_TEMP)) { rcache_unlock(i); #if REMAP_REGISTER rcache_remap_vreg(i); @@ -2300,7 +2364,7 @@ static void rcache_clean_masked(u32 mask) } // clean vregs where all aliases are covered by the mask for (i = 0; i < ARRAY_SIZE(cache_regs); i++) - if ((cache_regs[i].type == HR_CACHED || cache_regs[i].type == HR_STATIC) && + if (cache_regs[i].type == HR_CACHED && (cache_regs[i].gregs & mask) && !(cache_regs[i].gregs & ~mask)) rcache_clean_vreg(i); } @@ -2312,23 +2376,24 @@ static void rcache_clean(void) rcache_regs_clean = (1 << ARRAY_SIZE(guest_regs)) - 1; for (i = ARRAY_SIZE(cache_regs)-1; i >= 0; i--) - if (cache_regs[i].type == HR_CACHED || cache_regs[i].type == HR_STATIC) + if (cache_regs[i].type == HR_CACHED) rcache_clean_vreg(i); // relocate statics to their sregs (necessary before conditional jumps) for (i = 0; i < ARRAY_SIZE(guest_regs); i++) { - if ((guest_regs[i].flags & GRF_STATIC) && + if ((guest_regs[i].flags & (GRF_STATIC|GRF_PINNED)) && guest_regs[i].vreg != guest_regs[i].sreg) { - rcache_ref_vreg(guest_regs[i].vreg); + rcache_lock_vreg(guest_regs[i].vreg); rcache_evict_vreg(guest_regs[i].sreg); - rcache_unref_vreg(guest_regs[i].vreg); + rcache_unlock_vreg(guest_regs[i].vreg); if (guest_regs[i].vreg < 0) emith_ctx_read(cache_regs[guest_regs[i].sreg].hreg, i*4); else emith_move_r_r(cache_regs[guest_regs[i].sreg].hreg, cache_regs[guest_regs[i].vreg].hreg); cache_regs[guest_regs[i].sreg].gregs = 1 << i; - cache_regs[guest_regs[i].sreg].flags |= HRF_DIRTY; + cache_regs[guest_regs[i].sreg].type = HR_CACHED; + cache_regs[guest_regs[i].sreg].flags |= HRF_DIRTY|HRF_PINNED; guest_regs[i].flags |= GRF_DIRTY; guest_regs[i].vreg = guest_regs[i].sreg; } @@ -2341,7 +2406,7 @@ static void rcache_invalidate_tmp(void) int i; for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { - if (cache_regs[i].flags & HRF_TEMP) { + if (cache_regs[i].htype & HRT_TEMP) { rcache_unlock(i); if (cache_regs[i].type == HR_CACHED) rcache_evict_vreg(i); @@ -2365,7 +2430,8 @@ static void rcache_invalidate(void) guest_regs[i].vreg = -1; else { cache_regs[guest_regs[i].sreg].gregs = 1 << i; - cache_regs[guest_regs[i].sreg].flags |= HRF_DIRTY; + cache_regs[guest_regs[i].sreg].type = HR_CACHED; + cache_regs[guest_regs[i].sreg].flags |= HRF_DIRTY|HRF_PINNED; guest_regs[i].flags |= GRF_DIRTY; guest_regs[i].vreg = guest_regs[i].sreg; } @@ -2391,26 +2457,26 @@ static void rcache_init(void) // init is executed on every rom load, but this must only be executed once... if (once) { memset(reg_map_host, -1, sizeof(reg_map_host)); - for (i = 0; i < ARRAY_SIZE(cache_regs); i++) + for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { reg_map_host[cache_regs[i].hreg] = i; + if (cache_regs[i].htype == HRT_REG) + rcache_hregs_reg |= (1 << i); + } for (i = 0; i < ARRAY_SIZE(guest_regs); i++) if (guest_regs[i].flags & GRF_STATIC) { rcache_regs_static |= (1 << i); guest_regs[i].sreg = reg_map_host[guest_regs[i].sreg]; - cache_regs[guest_regs[i].sreg].type = HR_STATIC; + rcache_hregs_reg &= ~(1 << guest_regs[i].sreg); } else guest_regs[i].sreg = -1; once = 0; } - for (i = 0; i < ARRAY_SIZE(guest_regs); i++) - if (guest_regs[i].flags & GRF_STATIC) { - guest_regs[i].vreg = guest_regs[i].sreg; - cache_regs[guest_regs[i].sreg].gregs = (1 << i); - } - rcache_invalidate(); +#if DRC_DEBUG & 64 + RCACHE_CHECK("after init"); +#endif } // --------------------------------------------------------------- @@ -2802,13 +2868,13 @@ static void emit_do_static_regs(int is_write, int tmpr) int i, r, count; for (i = 0; i < ARRAY_SIZE(guest_regs); i++) { - if (guest_regs[i].flags & GRF_STATIC) + if (guest_regs[i].flags & (GRF_STATIC|GRF_PINNED)) r = cache_regs[guest_regs[i].vreg].hreg; else continue; for (count = 1; i < ARRAY_SIZE(guest_regs) - 1; i++, r++) { - if ((guest_regs[i + 1].flags & GRF_STATIC) && + if ((guest_regs[i + 1].flags & (GRF_STATIC|GRF_PINNED)) && cache_regs[guest_regs[i + 1].vreg].hreg == r + 1) count++; else @@ -2863,6 +2929,12 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) u32 pending_branch_direct:1; u32 pending_branch_indirect:1; } drcf = { 0, }; +#if LOOP_OPTIMIZER + void *pinned_loop_ptr[MAX_LOCAL_BRANCHES/16]; + u32 pinned_loop_pc[MAX_LOCAL_BRANCHES/16]; + u32 pinned_loop_mask[MAX_LOCAL_BRANCHES/16]; + int pinned_loop_count = 0; +#endif // PC of current, first, last SH2 insn u32 pc, base_pc, end_pc; @@ -2877,7 +2949,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) int tmp, tmp2; int cycles; int i, v; - u32 u, m1, m2; + u32 u, m1, m2, m3, m4; int op; u16 crc; @@ -2925,7 +2997,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } // collect branch_targets that don't land on delay slots - m1 = m2 = v = op = 0; + m1 = m2 = m3 = m4 = v = op = 0; for (pc = base_pc, i = 0; pc < end_pc; i++, pc += 2) { if (op_flags[i] & OF_DELAY_OP) op_flags[i] &= ~OF_BTARGET; @@ -2955,9 +3027,14 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) drcf.pending_branch_direct = drcf.pending_branch_indirect = 0; op = OF_IDLE_LOOP; // loop type v = i; - m1 = m2 = 0; + m1 = m2 = m3 = m4 = 0; + if (!drcf.loop_type) // reset basic loop it it isn't recognized as loop + op_flags[i] &= ~OF_BASIC_LOOP; } if (drcf.loop_type) { + // calculate reg masks for loop pinning + m4 |= ops[i].source & ~m3; + m3 |= ops[i].dest; // detect loop type, and store poll/delay register if (op_flags[i] & OF_POLL_INSN) { op = OF_POLL_LOOP; @@ -2971,8 +3048,12 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) m2 |= ops[i].dest; // regs modified by other insns } // branch detector - if (OP_ISBRAIMM(ops[i].op) && ops[i].imm == base_pc + 2*v) - drcf.pending_branch_direct = 1; // backward branch detected + if (OP_ISBRAIMM(ops[i].op)) { + if (ops[i].imm == base_pc + 2*v) + drcf.pending_branch_direct = 1; // backward branch detected + else + op_flags[v] &= ~OF_BASIC_LOOP; // no basic loop + } if (OP_ISBRACND(ops[i].op)) drcf.pending_branch_indirect = 1; // conditions g,h - cond.branch // poll/idle loops terminate with their backwards branch to the loop start @@ -2982,6 +3063,17 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) op = 0; // conditions not met op_flags[v] = (op_flags[v] & ~OF_LOOP) | op; // set loop type drcf.loop_type = 0; +#if LOOP_OPTIMIZER + if (op_flags[v] & OF_BASIC_LOOP) { + m3 &= ~rcache_regs_static & ~BITMASK4(SHR_PC, SHR_PR, SHR_SR, SHR_MEM); + if (m3 && count_bits(m3) < count_bits(rcache_hregs_reg) && + pinned_loop_count < ARRAY_SIZE(pinned_loop_pc)) { + pinned_loop_mask[pinned_loop_count] = m3; + pinned_loop_pc[pinned_loop_count++] = base_pc + 2*v; + } else + op_flags[v] &= ~OF_BASIC_LOOP; + } +#endif } } #endif @@ -3007,9 +3099,13 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) // clear stale state after compile errors + rcache_unlock_all(); rcache_invalidate(); emith_invalidate_t(); drcf = (struct drcf) { 0 }; +#if LOOP_OPTIMIZER + pinned_loop_count = 0; +#endif // ------------------------------------------------- // 3rd pass: actual compilation @@ -3110,10 +3206,31 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) rcache_free_tmp(tmp3); #endif +#if LOOP_OPTIMIZER + if (op_flags[i] & OF_BASIC_LOOP) { + if (pinned_loop_pc[pinned_loop_count] == pc) { + // pin needed regs on loop entry + FOR_ALL_BITS_SET_DO(pinned_loop_mask[pinned_loop_count], v, rcache_pin_reg(v)); + pinned_loop_ptr[pinned_loop_count] = tcache_ptr; + } else + op_flags[i] &= ~OF_BASIC_LOOP; + } +#endif + // check cycles tmp = rcache_get_tmp_arg(0); sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); emith_cmp_r_imm(sr, 0); +#if LOOP_OPTIMIZER + // on drc exit pinned registers must be saved + if (op_flags[i] & OF_BASIC_LOOP) { + EMITH_JMP_START(DCOND_GT); + rcache_save_pinned(); + emith_move_r_imm(tmp, pc); + emith_jump(sh2_drc_exit); + EMITH_JMP_END(DCOND_GT); + } else +#endif if (emith_jump_cond_inrange(sh2_drc_exit)) { emith_move_r_imm_c(DCOND_LE, tmp, pc); emith_jump_cond(DCOND_LE, sh2_drc_exit); @@ -4237,14 +4354,13 @@ end_op: if (OP_ISBRACND(opd_b->op)) ctaken = (op_flags[i] & OF_DELAY_OP) ? 1 : 2; cycles += ctaken; // assume branch taken -#if LOOP_DETECTION +#if LOOP_OPTIMIZER if ((drcf.loop_type == OF_IDLE_LOOP || (drcf.loop_type == OF_DELAY_LOOP && drcf.delay_reg >= 0))) { // idle or delay loop emit_sync_t_to_sr(); emith_sh2_delay_loop(cycles, drcf.delay_reg); - rcache_unlock_all(); // may lock delay_reg drcf.polling = drcf.loop_type = 0; } #endif @@ -4291,6 +4407,15 @@ end_op: } #endif + rcache_unlock_all(); // may lock delay_reg +#if LOOP_OPTIMIZER + if (target && pinned_loop_pc[pinned_loop_count] == target_pc) { + rcache_unpin_all(); + target = pinned_loop_ptr[pinned_loop_count]; + pinned_loop_count ++; + } +#endif + if (target == NULL) { // can't resolve branch locally, make a block exit @@ -4372,6 +4497,7 @@ end_op: drcf.pending_branch_indirect = 0; drcf.polling = drcf.loop_type = 0; } + rcache_unlock_all(); do_host_disasm(tcache_id); } @@ -6198,6 +6324,8 @@ end: if (OP_ISBRAIMM(opd->op)) { // BSR, BRA, BT, BF with immediate target int i_tmp = (opd->imm - base_pc) / 2; // branch target, index in ops + if (i_tmp == last_btarget) // candidate for basic loop optimizer + op_flags[i_tmp] |= OF_BASIC_LOOP; if (i_tmp == last_btarget && op <= 1) { op_flags[i_tmp] |= OF_LOOP; // conditions met -> mark loop last_btarget = i+1; // condition 4 diff --git a/cpu/sh2/compiler.h b/cpu/sh2/compiler.h index 187ad716..3565940d 100644 --- a/cpu/sh2/compiler.h +++ b/cpu/sh2/compiler.h @@ -22,6 +22,7 @@ void sh2_drc_frame(void); #define OF_B_IN_DS (1 << 4) #define OF_DELAY_INSN (1 << 5) // DT, (TODO ADD+CMP?) #define OF_POLL_INSN (1 << 6) // MOV @(...),Rn (no post increment), TST @(...) +#define OF_BASIC_LOOP (1 << 7) // pinnable loop without any branches in it #define OF_IDLE_LOOP (1 << 2) #define OF_DELAY_LOOP (2 << 2) @@ -39,7 +40,7 @@ unsigned short scan_block(unsigned int base_pc, int is_slave, #elif defined(__aarch64__) #define DRC_SR_REG "r22" #elif defined(__mips__) -#define DRC_SR_REG "s6" +#define DRC_SR_REG "s2" #elif defined(__i386__) #define DRC_SR_REG "edi" #elif defined(__x86_64__) From 36614252d942e2dedb856895396441665647b831 Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 19 Sep 2019 22:14:28 +0200 Subject: [PATCH 0218/1110] sh2 drc: improved RTS call stack cache --- cpu/drc/emit_arm.c | 26 +++------- cpu/drc/emit_arm64.c | 37 ++++---------- cpu/drc/emit_mips.c | 36 ++++---------- cpu/drc/emit_x86.c | 116 +++++++++++++++++++------------------------ cpu/sh2/compiler.c | 102 +++++++++++++++++++------------------ pico/32x/pwm.c | 2 +- 6 files changed, 130 insertions(+), 189 deletions(-) diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index 66a5b065..71a10922 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -1000,10 +1000,6 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) emith_read_r_r_offs_c(A_COND_AL, r, rs, offs) #define emith_read_r_r_r(r, rs, rm) \ EOP_LDR_REG_LSL(A_COND_AL, r, rs, rm, 0) -#define emith_read_r_r_r_wb(r, rs, rm) \ - EOP_LDR_REG_LSL_WB(A_COND_AL, r, rs, rm, 0) -#define emith_read_r_r_r_ptr_wb(r, rs, rm) \ - emith_read_r_r_r_wb(r, rs, rm) #define emith_read8_r_r_offs_c(cond, r, rs, offs) \ EOP_LDRB_IMM2(cond, r, rs, offs) @@ -1049,10 +1045,6 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) emith_write_r_r_offs_c(A_COND_AL, r, rs, offs) #define emith_write_r_r_offs_ptr(r, rs, offs) \ emith_write_r_r_offs_c(A_COND_AL, r, rs, offs) -#define emith_write_r_r_r_wb(r, rs, rm) \ - EOP_STR_REG_LSL_WB(A_COND_AL, r, rs, rm, 0) -#define emith_write_r_r_r_ptr_wb(r, rs, rm) \ - emith_write_r_r_r_wb(r, rs, rm) #define emith_ctx_read_c(cond, r, offs) \ emith_read_r_r_offs_c(cond, r, CONTEXT_REG, offs) @@ -1133,21 +1125,21 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) #define emith_jump_patchable(target) \ emith_jump(target) +#define emith_jump_patchable_size() 4 #define emith_jump_cond(cond, target) \ emith_xbranch(cond, target, 0) +#define emith_jump_cond_inrange(target) !0 #define emith_jump_cond_patchable(cond, target) \ emith_jump_cond(cond, target) -#define emith_jump_patch(ptr, target) ({ \ +#define emith_jump_patch(ptr, target, pos) do { \ u32 *ptr_ = ptr; \ u32 val_ = (u32 *)(target) - ptr_ - 2; \ *ptr_ = (*ptr_ & 0xff000000) | (val_ & 0x00ffffff); \ - (u8 *)ptr; \ -}) - -#define emith_jump_cond_inrange(target) !0 + if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)ptr; \ +} while (0) #define emith_jump_patch_size() 4 #define emith_jump_at(ptr, target) do { \ @@ -1184,11 +1176,6 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) emith_jump_ctx(offs); \ } while (0) -#define emith_call_link(r, target) do { \ - emith_move_r_r(r, PC); \ - emith_jump(target); \ -} while (0) - #define emith_call_cleanup() /**/ #define emith_ret_c(cond) \ @@ -1200,6 +1187,9 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) #define emith_ret_to_ctx(offs) \ emith_ctx_write(LR, offs) +#define emith_add_r_ret_imm(r, imm) \ + emith_add_r_r_ptr_imm(r, LR, imm) + /* pushes r12 for eabi alignment */ #define emith_push_ret(r) do { \ int r_ = (r >= 0 ? r : 12); \ diff --git a/cpu/drc/emit_arm64.c b/cpu/drc/emit_arm64.c index 8ce2ef38..72f53dd5 100644 --- a/cpu/drc/emit_arm64.c +++ b/cpu/drc/emit_arm64.c @@ -865,15 +865,6 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) #define emith_read_r_r_r_c(cond, r, rs, rm) \ emith_read_r_r_r(r, rs, rm) -#define emith_read_r_r_r_ptr_wb(r, rs, rm) do { \ - emith_read_r_r_r_ptr(r, rs, rm); \ - emith_add_r_r_ptr(rs, rm); \ -} while (0) -#define emith_read_r_r_r_wb(r, rs, rm) do { \ - emith_read_r_r_r(r, rs, rm); \ - emith_add_r_r_ptr(rs, rm); \ -} while (0) - #define emith_read8_r_r_offs(r, rs, offs) \ emith_ldst_offs(AM_B, r, rs, offs, LT_LD, AM_IDX) #define emith_read8_r_r_offs_c(cond, r, rs, offs) \ @@ -935,15 +926,6 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) #define emith_write_r_r_r_c(cond, r, rs, rm) \ emith_write_r_r_r(r, rs, rm) -#define emith_write_r_r_r_ptr_wb(r, rs, rm) do { \ - emith_write_r_r_r_ptr(r, rs, rm); \ - emith_add_r_r_ptr(rs, rm); \ -} while (0) -#define emith_write_r_r_r_wb(r, rs, rm) do { \ - emith_write_r_r_r(r, rs, rm); \ - emith_add_r_r_ptr(rs, rm); \ -} while (0) - #define emith_ctx_read_ptr(r, offs) \ emith_read_r_r_offs_ptr(r, CONTEXT_REG, offs) @@ -1031,6 +1013,7 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) #define emith_jump_patchable(target) \ emith_jump(target) +#define emith_jump_patchable_size() 4 #define emith_jump_cond(cond, target) \ emith_bcond(tcache_ptr, 0, cond, target) @@ -1039,9 +1022,9 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) emith_bcond(tcache_ptr, 1, cond, target) #define emith_jump_cond_inrange(target) \ - !(((u8 *)target - (u8 *)tcache_ptr + 0x100000) >> 22) + !(((u8 *)target - (u8 *)tcache_ptr + 0x100000) >> 21) -#define emith_jump_patch(ptr, target) ({ \ +#define emith_jump_patch(ptr, target, pos) do { \ u32 *ptr_ = (u32 *)ptr; \ u32 disp_ = (u8 *)(target) - (u8 *)(ptr_); \ int cond_ = ptr_[0] & 0xf; \ @@ -1051,8 +1034,9 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) } else if (ptr_[0] & 0x80000000) \ EMIT_PTR(ptr_, A64_BL((disp_) & 0x0fffffff)); \ else EMIT_PTR(ptr_, A64_B((disp_) & 0x0fffffff)); \ - (u8 *)ptr; \ -}) + if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)ptr; \ +} while (0) +#define emith_jump_patch_size() 8 #define emith_jump_reg(r) \ EMIT(A64_BR(r)) @@ -1085,11 +1069,6 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) rcache_free_tmp(_t); \ } while (0) -#define emith_call_link(r, target) do { \ - EMIT(A64_ADRXLIT_IMM(r, 8)); \ - emith_jump(target); \ -} while (0) - #define emith_call_cleanup() /**/ #define emith_ret() \ @@ -1100,6 +1079,9 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) #define emith_ret_to_ctx(offs) \ emith_ctx_write_ptr(LR, offs) +#define emith_add_r_ret_imm(r, imm) \ + emith_add_r_r_ptr_imm(r, LR, imm) + // NB: pushes r or r18 for SP hardware alignment #define emith_push_ret(r) do { \ int r_ = (r >= 0 ? r : 18); \ @@ -1120,7 +1102,6 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) #define emith_flush() /**/ #define host_instructions_updated(base, end) __builtin___clear_cache(base, end) #define emith_update_cache() /**/ -#define emith_jump_patch_size() 8 #define emith_rw_offs_max() 0xff diff --git a/cpu/drc/emit_mips.c b/cpu/drc/emit_mips.c index 0e85f92a..6ff134d9 100644 --- a/cpu/drc/emit_mips.c +++ b/cpu/drc/emit_mips.c @@ -950,13 +950,6 @@ static void emith_lohi_nops(void) #define emith_read_r_r_r_c(cond, r, rs, rm) \ emith_read_r_r_r(r, rs, rm) -#define emith_read_r_r_r_ptr_wb(r, rs, rm) do { \ - emith_add_r_r_r(rs, rs, rm); \ - EMIT(MIPS_LW(r, rs, 0)); \ -} while (0) -#define emith_read_r_r_r_wb(r, rs, rm) \ - emith_read_r_r_r_ptr_wb(r, rs, rm) - #define emith_read8_r_r_offs(r, rs, offs) \ EMIT(MIPS_LBU(r, rs, offs)) #define emith_read8_r_r_offs_c(cond, r, rs, offs) \ @@ -1028,13 +1021,6 @@ static void emith_lohi_nops(void) #define emith_write_r_r_r_c(cond, r, rs, rm) \ emith_write_r_r_r(r, rs, rm) -#define emith_write_r_r_r_ptr_wb(r, rs, rm) do { \ - emith_add_r_r_r(rs, rs, rm); \ - EMIT(MIPS_SW(r, rs, 0)); \ -} while (0) -#define emith_write_r_r_r_wb(r, rs, rm) \ - emith_write_r_r_r_ptr_wb(r, rs, rm) - #define emith_ctx_read_ptr(r, offs) \ emith_read_r_r_offs_ptr(r, CONTEXT_REG, offs) @@ -1176,6 +1162,7 @@ static int emith_cond_check(int cond, int *r) emith_branch(MIPS_J((uintptr_t)target & 0x0fffffff)) #define emith_jump_patchable(target) \ emith_jump(target) +#define emith_jump_patchable_size() 8 /* J+delayslot */ // NB: MIPS conditional branches have only +/- 128KB range #define emith_jump_cond(cond, target) do { \ @@ -1190,6 +1177,8 @@ static int emith_cond_check(int cond, int *r) EMIT_PTR(bp, MIPS_BCONDZ(mcond_, r_, (u8 *)tcache_ptr-bp-4)); \ } \ } while (0) +#define emith_jump_cond_inrange(target) \ + !(((u8 *)target - (u8 *)tcache_ptr + 0x20000) >> 18) #define emith_jump_cond_patchable(cond, target) do { \ int r_, mcond_ = emith_cond_check(cond, &r_); \ @@ -1199,16 +1188,14 @@ static int emith_cond_check(int cond, int *r) EMIT_PTR(bp, MIPS_BCONDZ(mcond_, r_, (u8 *)tcache_ptr-bp-4)); \ } while (0) -#define emith_jump_cond_inrange(target) \ - !(((u8 *)target - (u8 *)tcache_ptr + 0x10000) >> 18) - // NB: returns position of patch for cache maintenance -#define emith_jump_patch(ptr, target) ({ \ +#define emith_jump_patch(ptr, target, pos) do { \ u32 *ptr_ = (u32 *)ptr-1; /* must skip condition check code */ \ while ((ptr_[0] & 0xf8000000) != OP_J << 26) ptr_ ++; \ EMIT_PTR(ptr_, MIPS_J((uintptr_t)target & 0x0fffffff)); \ - (u8 *)(ptr_-1); \ -}) + if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)(ptr_-1); \ +} while (0) +#define emith_jump_patch_size() 4 #define emith_jump_reg(r) \ emith_branch(MIPS_JR(r)) @@ -1235,11 +1222,6 @@ static int emith_cond_check(int cond, int *r) emith_call_reg(AT); \ } while (0) -#define emith_call_link(r, target) do { \ - EMIT(MIPS_BL(4)); EMIT(MIPS_ADD_IMM(r, LR, 8)); emith_flush(); \ - emith_branch(MIPS_J((uintptr_t)target & 0x0fffffff)); \ -} while (0) - #define emith_call_cleanup() /**/ #define emith_ret() \ @@ -1250,6 +1232,9 @@ static int emith_cond_check(int cond, int *r) #define emith_ret_to_ctx(offs) \ emith_ctx_write_ptr(LR, offs) +#define emith_add_r_ret_imm(r, imm) \ + emith_add_r_r_ptr_imm(r, LR, imm) + // NB: ABI SP alignment is 8 for compatibility with MIPS IV #define emith_push_ret(r) do { \ emith_sub_r_imm(SP, 8+16); /* reserve new arg save area (16) */ \ @@ -1271,7 +1256,6 @@ static int emith_cond_check(int cond, int *r) // NB: mips32r2 has SYNCI #define host_instructions_updated(base, end) __builtin___clear_cache(base, end) #define emith_update_cache() /**/ -#define emith_jump_patch_size() 4 #define emith_rw_offs_max() 0x7fff // SH2 drc specific diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index caade3a6..d8b3a2dd 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -297,54 +297,61 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common // _r_r_r_shift #define emith_add_r_r_r_lsl(d, s1, s2, lslimm) do { \ - int tmp_ = rcache_get_tmp(); \ - emith_lsl(tmp_, s2, lslimm); \ - emith_add_r_r_r(d, s1, tmp_); \ - rcache_free_tmp(tmp_); \ + if (lslimm) { \ + int tmp_ = rcache_get_tmp(); \ + emith_lsl(tmp_, s2, lslimm); \ + emith_add_r_r_r(d, s1, tmp_); \ + rcache_free_tmp(tmp_); \ + } else emith_add_r_r_r(d, s1, s2); \ } while (0) #define emith_add_r_r_r_lsl_ptr(d, s1, s2, lslimm) do { \ - int tmp_ = rcache_get_tmp(); \ - emith_lsl(tmp_, s2, lslimm); \ - emith_add_r_r_r_ptr(d, s1, tmp_); \ - rcache_free_tmp(tmp_); \ + if (lslimm) { \ + int tmp_ = rcache_get_tmp(); \ + emith_lsl(tmp_, s2, lslimm); \ + emith_add_r_r_r_ptr(d, s1, tmp_); \ + rcache_free_tmp(tmp_); \ + } else emith_add_r_r_r_ptr(d, s1, s2); \ } while (0) #define emith_add_r_r_r_lsr(d, s1, s2, lsrimm) do { \ - int tmp_ = rcache_get_tmp(); \ - emith_lsr(tmp_, s2, lsrimm); \ - emith_add_r_r_r(d, s1, tmp_); \ - rcache_free_tmp(tmp_); \ + if (lsrimm) { \ + int tmp_ = rcache_get_tmp(); \ + emith_lsr(tmp_, s2, lsrimm); \ + emith_add_r_r_r(d, s1, tmp_); \ + rcache_free_tmp(tmp_); \ + } else emith_add_r_r_r(d, s1, s2); \ } while (0) #define emith_sub_r_r_r_lsl(d, s1, s2, lslimm) do { \ - int tmp_ = rcache_get_tmp(); \ - emith_lsl(tmp_, s2, lslimm); \ - emith_sub_r_r_r(d, s1, tmp_); \ - rcache_free_tmp(tmp_); \ + if (lslimm) { \ + int tmp_ = rcache_get_tmp(); \ + emith_lsl(tmp_, s2, lslimm); \ + emith_sub_r_r_r(d, s1, tmp_); \ + rcache_free_tmp(tmp_); \ + } else emith_sub_r_r_r(d, s1, s2); \ } while (0) #define emith_or_r_r_r_lsl(d, s1, s2, lslimm) do { \ - int tmp_ = rcache_get_tmp(); \ - emith_lsl(tmp_, s2, lslimm); \ - emith_or_r_r_r(d, s1, tmp_); \ - rcache_free_tmp(tmp_); \ + if (lslimm) { \ + int tmp_ = rcache_get_tmp(); \ + emith_lsl(tmp_, s2, lslimm); \ + emith_or_r_r_r(d, s1, tmp_); \ + rcache_free_tmp(tmp_); \ + } else emith_or_r_r_r(d, s1, s2); \ } while (0) // _r_r_shift -#define emith_or_r_r_lsl(d, s, lslimm) do { \ - int tmp_ = rcache_get_tmp(); \ - emith_lsl(tmp_, s, lslimm); \ - emith_or_r_r(d, tmp_); \ - rcache_free_tmp(tmp_); \ -} while (0) +#define emith_or_r_r_lsl(d, s, lslimm) \ + emith_or_r_r_r_lsl(d, d, s, lslimm) -// d != s #define emith_eor_r_r_lsr(d, s, lsrimm) do { \ - emith_push(s); \ - emith_lsr(s, s, lsrimm); \ - emith_eor_r_r(d, s); \ - emith_pop(s); \ + if (lsrimm) { \ + int tmp_ = rcache_get_tmp(); \ + emith_lsr(tmp_, s, lsrimm); \ + emith_eor_r_r(d, tmp_); \ + rcache_free_tmp(tmp_); \ + } else emith_eor_r_r(d, s); \ } while (0) // _r_imm @@ -792,14 +799,6 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common EMIT_OP_MODRM64(0x8b, 0, r, 4); \ EMIT_SIB64(0, rs, rm); /* mov r, [rm + rs * 1] */ \ } while (0) -#define emith_read_r_r_r_wb(r, rs, rm) do { \ - emith_read_r_r_r(r, rs, rm); \ - emith_add_r_r_ptr(rs, rm); \ -} while (0) -#define emith_read_r_r_r_ptr_wb(r, rs, rm) do { \ - emith_read_r_r_r_ptr(r, rs, rm); \ - emith_add_r_r_ptr(rs, rm); \ -} while (0) #define emith_write_r_r_r(r, rs, rm) do { \ EMIT_XREX_IF(0, r, rm, rs); \ @@ -811,15 +810,6 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common EMIT_OP_MODRM64(0x89, 0, r, 4); \ EMIT_SIB64(0, rs, rm); /* mov [rm + rs * 1], r */ \ } while (0) -#define emith_write_r_r_r_wb(r, rs, rm) do { \ - emith_write_r_r_r(r, rs, rm); \ - emith_add_r_r_ptr(rs, rm); \ -} while (0) -#define emith_write_r_r_r_ptr_wb(r, rs, rm) do { \ - emith_write_r_r_r_ptr(r, rs, rm); \ - emith_add_r_r_ptr(rs, rm); \ -} while (0) - #define emith_ctx_read(r, offs) \ emith_read_r_r_offs(r, CONTEXT_REG, offs) @@ -846,10 +836,11 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common emith_ctx_write(r_, offs_); \ } while (0) -// assumes EBX is free #define emith_ret_to_ctx(offs) do { \ - emith_pop(xBX); \ - emith_ctx_write(xBX, offs); \ + int tmp_ = rcache_get_tmp(); \ + emith_pop(tmp_); \ + emith_ctx_write(tmp_, offs); \ + rcache_free_tmp(tmp_); \ } while (0) #define emith_jump(ptr) do { \ @@ -860,24 +851,24 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common #define emith_jump_patchable(target) \ emith_jump(target) +#define emith_jump_patchable_size() 5 /* JMP rel32 */ #define emith_jump_cond(cond, ptr) do { \ u32 disp = (u8 *)(ptr) - ((u8 *)tcache_ptr + 6); \ EMIT_OP(0x0f80 | (cond)); \ EMIT(disp, u32); \ } while (0) +#define emith_jump_cond_inrange(ptr) !0 #define emith_jump_cond_patchable(cond, target) \ emith_jump_cond(cond, target) -#define emith_jump_patch(ptr, target) ({ \ +#define emith_jump_patch(ptr, target, pos) do { \ u32 disp_ = (u8 *)(target) - ((u8 *)(ptr) + 4); \ u32 offs_ = (*(u8 *)(ptr) == 0x0f) ? 2 : 1; \ EMIT_PTR((u8 *)(ptr) + offs_, disp_ - offs_, u32); \ - ptr; \ -}) - -#define emith_jump_cond_inrange(ptr) !0 + if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)ptr; \ +} while (0) #define emith_jump_patch_size() 6 #define emith_jump_at(ptr, target) do { \ @@ -903,20 +894,17 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common EMIT(offs, u32); \ } while (0) -#define emith_call_link(r, target) do { \ - EMIT_OP(0xe8); \ - EMIT(0, u32); /* call pc+0 */ \ - emith_pop(r); \ - emith_add_r_r_ptr_imm(r, r, 13); \ - emith_jump(target); \ -} while (0) - #define emith_call_cleanup() \ emith_add_r_r_ptr_imm(xSP, xSP, sizeof(void *)); // remove return addr #define emith_ret() \ EMIT_OP(0xc3) +#define emith_add_r_ret_imm(r, imm) do { \ + emith_read_r_r_offs_ptr(r, xSP, 0); \ + emith_add_r_r_ptr_imm(r, r, imm); \ +} while (0) + #define emith_jump_reg(r) \ EMIT_OP_MODRM(0xff, 3, 4, r) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index f6fbadaf..ec8554cc 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -40,7 +40,7 @@ #define PROPAGATE_CONSTANTS 1 #define LINK_BRANCHES 1 #define BRANCH_CACHE 1 -#define CALL_STACK 0 +#define CALL_STACK 1 #define ALIAS_REGISTERS 1 #define REMAP_REGISTER 1 #define LOOP_DETECTION 1 @@ -635,7 +635,7 @@ static signed char reg_map_host[HOST_REGS]; static void REGPARM(1) (*sh2_drc_entry)(SH2 *sh2); static void REGPARM(1) (*sh2_drc_dispatcher)(u32 pc); #if CALL_STACK -static void REGPARM(2) (*sh2_drc_dispatcher_call)(u32 pc, uptr host_pr); +static u32 REGPARM(2) (*sh2_drc_dispatcher_call)(u32 pc); static void REGPARM(1) (*sh2_drc_dispatcher_return)(u32 pc); #endif static void REGPARM(1) (*sh2_drc_exit)(u32 pc); @@ -1150,7 +1150,8 @@ static void dr_block_link(struct block_entry *be, struct block_link *bl, int emi bl->jump, bl->target_pc, be->tcache_ptr); if (emit_jump) { - u8 *jump = emith_jump_patch(bl->jump, be->tcache_ptr); + u8 *jump; + emith_jump_patch(bl->jump, be->tcache_ptr, &jump); // only needs sync if patch is possibly crossing cacheline (assume 16 byte) if ((uintptr_t)jump >>4 != ((uintptr_t)jump+emith_jump_patch_size()-1) >>4) host_instructions_updated(jump, jump+emith_jump_patch_size()); @@ -1171,7 +1172,8 @@ static void dr_block_unlink(struct block_link *bl, int emit_jump) if (bl->target) { if (emit_jump) { - u8 *jump = emith_jump_patch(bl->jump, sh2_drc_dispatcher); + u8 *jump; + emith_jump_patch(bl->jump, sh2_drc_dispatcher, &jump); // update cpu caches since the previous jump target doesn't exist anymore host_instructions_updated(jump, jump+emith_jump_patch_size()); } @@ -1381,7 +1383,7 @@ static void rcache_remap_vreg(int x); { d = 1; printf("cache check r=%d c=%d not connected?\n", i, gp->cnst); }\ if ((gp->flags & GRF_CDIRTY) && (gp->vreg != -1 || !(gp->flags & GRF_CONST)))\ { d = 1; printf("cache check r=%d CDIRTY?\n", i); } \ - if (gp->flags & GRF_PINNED) { \ + if (gp->flags & (GRF_STATIC|GRF_PINNED)) { \ if (gp->sreg == -1 || !(cache_regs[gp->sreg].flags & HRF_PINNED))\ { d = 1; printf("cache check r=%d v=%d not pinned?\n", i, gp->vreg); } \ else m &= ~(1 << gp->sreg); \ @@ -4407,7 +4409,7 @@ end_op: } #endif - rcache_unlock_all(); // may lock delay_reg + rcache_unlock_all(); #if LOOP_OPTIMIZER if (target && pinned_loop_pc[pinned_loop_count] == target_pc) { rcache_unpin_all(); @@ -4427,30 +4429,26 @@ end_op: #if CALL_STACK if ((opd_b->dest & BITMASK1(SHR_PR)) && pc+2 < end_pc) { // BSR - tmp = rcache_get_tmp_arg(1); - emith_call_link(tmp, sh2_drc_dispatcher_call); - rcache_free_tmp(tmp); - } else + emith_call(sh2_drc_dispatcher_call); + } #endif - target = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id); + + target = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id); patchable = 1; } // create branch - if (patchable) { - if (cond != -1) + if (cond != -1) { + if (patchable) emith_jump_cond_patchable(cond, target); - else if (target != NULL) { - rcache_invalidate(); - emith_jump_patchable(target); - } - } else { - if (cond != -1) + else emith_jump_cond(cond, target); - else if (target != NULL) { - rcache_invalidate(); + } else { + rcache_invalidate(); + if (patchable) + emith_jump_patchable(target); + else emith_jump(target); - } } // branch not taken, correct cycle count @@ -4476,14 +4474,14 @@ end_op: rcache_invalidate(); #if CALL_STACK struct op_data *opd_b = (op_flags[i] & OF_DELAY_OP) ? opd-1 : opd; + if ((opd_b->dest & BITMASK1(SHR_PR)) && pc+2 < end_pc) { + // JSR/BSRF + emith_call(sh2_drc_dispatcher_call); + } + if (opd_b->rm == SHR_PR) { // RTS emith_jump(sh2_drc_dispatcher_return); - } else if ((opd_b->dest & BITMASK1(SHR_PR)) && pc+2 < end_pc) { - // JSR/BSRF - tmp = rcache_get_tmp_arg(1); - emith_call_link(tmp, sh2_drc_dispatcher_call); - rcache_free(tmp); } else #endif if (gconst_get(SHR_PC, &target_pc)) { @@ -4544,7 +4542,7 @@ end_op: rcache_flush(); emith_jump(sh2_drc_dispatcher); } - emith_jump_patch(branch_patch_ptr[i], target); + emith_jump_patch(branch_patch_ptr[i], target, NULL); } emith_pool_commit(0); @@ -4713,20 +4711,6 @@ static void sh2_generate_utils(void) emith_sh2_drc_exit(); emith_flush(); -#if CALL_STACK - // sh2_drc_dispatcher_call(u32 pc, uptr host_pr) - sh2_drc_dispatcher_call = (void *)tcache_ptr; - emith_ctx_read(arg2, offsetof(SH2, rts_cache_idx)); - emith_add_r_imm(arg2, 2*sizeof(void *)); - emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *)); - emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx)); - emith_add_r_r_ptr_imm(arg3, CONTEXT_REG, offsetof(SH2, rts_cache) + sizeof(void *)); - emith_write_r_r_r_ptr_wb(arg1, arg2, arg3); - emith_ctx_read(arg3, SHR_PR * 4); - emith_write_r_r_offs(arg3, arg2, (s8)-sizeof(void *)); - emith_flush(); - // FALLTHROUGH -#endif // sh2_drc_dispatcher(u32 pc) sh2_drc_dispatcher = (void *)tcache_ptr; emith_ctx_write(arg0, SHR_PC * 4); @@ -4782,35 +4766,49 @@ static void sh2_generate_utils(void) emith_flush(); #if CALL_STACK + // pc = sh2_drc_dispatcher_call(u32 pc) + sh2_drc_dispatcher_call = (void *)tcache_ptr; + emith_ctx_read(arg2, offsetof(SH2, rts_cache_idx)); + emith_ctx_read(arg1, SHR_PR * 4); + emith_add_r_imm(arg2, 2*sizeof(void *)); + emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *)); + emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx)); + emith_add_r_r_r_lsl_ptr(arg2, CONTEXT_REG, arg2, 0); + emith_write_r_r_offs(arg1, arg2, offsetof(SH2, rts_cache)); + emith_add_r_ret_imm(arg1, emith_jump_patchable_size()); // skip jump_patchable for rts host address + emith_write_r_r_offs_ptr(arg1, arg2, offsetof(SH2, rts_cache) + sizeof(void *)); + emith_ret(); + emith_flush(); + // sh2_drc_dispatcher_return(u32 pc) sh2_drc_dispatcher_return = (void *)tcache_ptr; emith_ctx_read(arg2, offsetof(SH2, rts_cache_idx)); - emith_add_r_r_ptr_imm(arg1, CONTEXT_REG, offsetof(SH2, rts_cache)); - emith_read_r_r_r_wb(arg3, arg1, arg2); + emith_add_r_r_r_lsl_ptr(arg1, CONTEXT_REG, arg2, 0); + emith_read_r_r_offs(arg3, arg1, offsetof(SH2, rts_cache)); emith_cmp_r_r(arg0, arg3); #if (DRC_DEBUG & 128) EMITH_SJMP_START(DCOND_EQ); - emith_move_r_ptr_imm(arg2, (uptr)&rcmiss); - emith_read_r_r_offs_c(DCOND_NE, arg1, arg2, 0); + emith_move_r_ptr_imm(arg3, (uptr)&rcmiss); + emith_read_r_r_offs_c(DCOND_NE, arg1, arg3, 0); emith_add_r_imm_c(DCOND_NE, arg1, 1); - emith_write_r_r_offs_c(DCOND_NE, arg1, arg2, 0); + emith_write_r_r_offs_c(DCOND_NE, arg1, arg3, 0); EMITH_SJMP_END(DCOND_EQ); #endif emith_jump_cond(DCOND_NE, sh2_drc_dispatcher); - emith_read_r_r_offs_ptr(arg0, arg1, sizeof(void *)); + emith_read_r_r_offs_ptr(arg0, arg1, offsetof(SH2, rts_cache) + sizeof(void *)); emith_sub_r_imm(arg2, 2*sizeof(void *)); emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *)); emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx)); #if (DRC_DEBUG & 128) - emith_move_r_ptr_imm(arg2, (uptr)&rchit); - emith_read_r_r_offs(arg1, arg2, 0); + emith_move_r_ptr_imm(arg3, (uptr)&rchit); + emith_read_r_r_offs(arg1, arg3, 0); emith_add_r_imm(arg1, 1); - emith_write_r_r_offs(arg1, arg2, 0); + emith_write_r_r_offs(arg1, arg3, 0); #endif emith_jump_reg(arg0); emith_flush(); #endif - + // sh2_drc_test_irq(void) // assumes it's called from main function (may jump to dispatcher) sh2_drc_test_irq = (void *)tcache_ptr; diff --git a/pico/32x/pwm.c b/pico/32x/pwm.c index 1c1ec428..0aa2f586 100644 --- a/pico/32x/pwm.c +++ b/pico/32x/pwm.c @@ -88,7 +88,7 @@ static void consume_fifo_do(SH2 *sh2, unsigned int m68k_cycles, mem->pwm_index[0] = (mem->pwm_index[0]+1) % 4; Pico32x.pwm_p[0]--; pwm.current[0] = convert_sample(fifo_l[mem->pwm_index[0]]); - sum |=pwm.current[0]; + sum |= pwm.current[0]; } if (Pico32x.pwm_p[1] > 0) { mem->pwm_index[1] = (mem->pwm_index[1]+1) % 4; From 06bc3c0693661afdaef6b63bc8e7dca4ca05851b Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 28 Sep 2019 16:39:26 +0200 Subject: [PATCH 0219/1110] sh2 drc: drc exit, block linking and branch handling revised --- cpu/drc/emit_arm.c | 28 ++- cpu/drc/emit_arm64.c | 65 +++--- cpu/drc/emit_mips.c | 60 +++-- cpu/drc/emit_x86.c | 27 ++- cpu/sh2/compiler.c | 528 ++++++++++++++++++++++++++++--------------- 5 files changed, 462 insertions(+), 246 deletions(-) diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index 71a10922..b8c6419c 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -631,8 +631,8 @@ static void emith_pool_commit(int jumpover) static inline void emith_pool_check(void) { // check if pool must be committed - if (literal_iindex > MAX_HOST_LITERALS-4 || - (u8 *)tcache_ptr - (u8 *)literal_insn[0] > 0xe00) + if (literal_iindex > MAX_HOST_LITERALS-4 || (literal_pindex && + (u8 *)tcache_ptr - (u8 *)literal_insn[0] > 0xe00)) // pool full, or displacement is approaching the limit emith_pool_commit(1); } @@ -889,11 +889,19 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) #define emith_tst_r_imm_c(cond, r, imm) \ emith_top_imm(cond, A_OP_TST, r, imm) -#define emith_move_r_imm_s8(r, imm) do { \ +#define emith_move_r_imm_s8_patchable(r, imm) do { \ + emith_flush(); \ if ((s8)(imm) < 0) \ - EOP_MVN_IMM(r, 0, ((u8)(imm) ^ 0xff)); \ + EOP_MVN_IMM(r, 0, (u8)~(imm)); \ else \ - EOP_MOV_IMM(r, 0, (u8)imm); \ + EOP_MOV_IMM(r, 0, (u8)(imm)); \ +} while (0) +#define emith_move_r_imm_s8_patch(ptr, imm) do { \ + u32 *ptr_ = (u32 *)ptr; u32 op_ = *ptr_ & 0xfe1ff000; \ + if ((s8)(imm) < 0) \ + EMIT_PTR(ptr_, op_ | (A_OP_MVN<<21) | (u8)~(imm));\ + else \ + EMIT_PTR(ptr_, op_ | (A_OP_MOV<<21) | (u8)(imm));\ } while (0) #define emith_and_r_r_imm(d, s, imm) \ @@ -1125,7 +1133,6 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) #define emith_jump_patchable(target) \ emith_jump(target) -#define emith_jump_patchable_size() 4 #define emith_jump_cond(cond, target) \ emith_xbranch(cond, target, 0) @@ -1135,18 +1142,19 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) emith_jump_cond(cond, target) #define emith_jump_patch(ptr, target, pos) do { \ - u32 *ptr_ = ptr; \ + u32 *ptr_ = (u32 *)ptr; \ u32 val_ = (u32 *)(target) - ptr_ - 2; \ *ptr_ = (*ptr_ & 0xff000000) | (val_ & 0x00ffffff); \ if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)ptr; \ } while (0) +#define emith_jump_patch_inrange(ptr, target) !0 #define emith_jump_patch_size() 4 #define emith_jump_at(ptr, target) do { \ u32 val_ = (u32 *)(target) - (u32 *)(ptr) - 2; \ - emith_flush(); \ EOP_C_B_PTR(ptr, A_COND_AL, 0, val_ & 0xffffff); \ } while (0) +#define emith_jump_at_size() 4 #define emith_jump_reg_c(cond, r) \ EOP_C_BX(cond, r) @@ -1187,8 +1195,8 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) #define emith_ret_to_ctx(offs) \ emith_ctx_write(LR, offs) -#define emith_add_r_ret_imm(r, imm) \ - emith_add_r_r_ptr_imm(r, LR, imm) +#define emith_add_r_ret(r) \ + emith_add_r_r_ptr(r, LR) /* pushes r12 for eabi alignment */ #define emith_push_ret(r) do { \ diff --git a/cpu/drc/emit_arm64.c b/cpu/drc/emit_arm64.c index 72f53dd5..688649b5 100644 --- a/cpu/drc/emit_arm64.c +++ b/cpu/drc/emit_arm64.c @@ -447,6 +447,8 @@ enum { AM_IDX, AM_IDXPOST, AM_IDXREG, AM_IDXPRE }; #define emith_eor_r_r_r(d, s1, s2) \ emith_eor_r_r_r_lsl(d, s1, s2, 0) +#define emith_add_r_r_r_ptr(d, s1, s2) \ + emith_add_r_r_r_lsl_ptr(d, s1, s2, 0) #define emith_and_r_r_r(d, s1, s2) \ emith_and_r_r_r_lsl(d, s1, s2, 0) @@ -546,6 +548,20 @@ static void emith_move_imm64(int r, int wx, int64_t imm) #define emith_move_r_imm_c(cond, r, imm) \ emith_move_r_imm(r, imm) +#define emith_move_r_imm_s8_patchable(r, imm) do { \ + if ((s8)(imm) < 0) \ + EMIT(A64_MOVN_IMM(r, ~(s8)(imm), 0)); \ + else \ + EMIT(A64_MOVZ_IMM(r, (s8)(imm), 0)); \ +} while (0) +#define emith_move_r_imm_s8_patch(ptr, imm) do { \ + u32 *ptr_ = (u32 *)ptr; \ + int r_ = *ptr_ & 0x1f; \ + if ((s8)(imm) < 0) \ + EMIT_PTR(ptr_, A64_MOVN_IMM(r_, ~(s8)(imm), 0)); \ + else \ + EMIT_PTR(ptr_, A64_MOVZ_IMM(r_, (s8)(imm), 0)); \ +} while (0) // arithmetic, immediate static void emith_arith_imm(int op, int wx, int rd, int rn, s32 imm) @@ -995,16 +1011,6 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) emith_move_r_imm(arg, imm) // branching; NB: A64 B.cond has only +/- 1MB range -#define emith_bcond(ptr, patch, cond, target) do { \ - u32 disp_ = (u8 *)target - (u8 *)ptr; \ - if (disp_ >= 0xfff00000 || disp_ <= 0x000fffff) { /* can use near B.c */ \ - EMIT_PTR(ptr, A64_BCOND(cond, disp_ & 0x001fffff)); \ - if (patch) EMIT_PTR(ptr, A64_NOP); /* reserve space for far B */ \ - } else { /* far branch if near branch isn't possible */ \ - EMIT_PTR(ptr, A64_BCOND(emith_invert_cond(cond), 8)); \ - EMIT_PTR(ptr, A64_B((disp_ - 4) & 0x0fffffff)); \ - } \ -} while (0) #define emith_jump(target) do {\ u32 disp_ = (u8 *)target - (u8 *)tcache_ptr; \ @@ -1013,30 +1019,37 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) #define emith_jump_patchable(target) \ emith_jump(target) -#define emith_jump_patchable_size() 4 -#define emith_jump_cond(cond, target) \ - emith_bcond(tcache_ptr, 0, cond, target) +#define emith_jump_cond(cond, target) do { \ + u32 disp_ = (u8 *)target - (u8 *)tcache_ptr; \ + EMIT(A64_BCOND(cond, disp_ & 0x001fffff)); \ +} while (0) #define emith_jump_cond_patchable(cond, target) \ - emith_bcond(tcache_ptr, 1, cond, target) + emith_jump_cond(cond, target) #define emith_jump_cond_inrange(target) \ !(((u8 *)target - (u8 *)tcache_ptr + 0x100000) >> 21) #define emith_jump_patch(ptr, target, pos) do { \ u32 *ptr_ = (u32 *)ptr; \ - u32 disp_ = (u8 *)(target) - (u8 *)(ptr_); \ - int cond_ = ptr_[0] & 0xf; \ - if ((ptr_[0] & 0xff000000) == 0x54000000) { /* B.cond */ \ - if (ptr_[1] != A64_NOP) cond_ = emith_invert_cond(cond_); \ - emith_bcond(ptr_, 1, cond_, target); \ - } else if (ptr_[0] & 0x80000000) \ - EMIT_PTR(ptr_, A64_BL((disp_) & 0x0fffffff)); \ - else EMIT_PTR(ptr_, A64_B((disp_) & 0x0fffffff)); \ - if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)ptr; \ + u32 disp_ = (u8 *)target - (u8 *)ptr, mask_; \ + if ((*ptr_ & 0xff000000) == 0x54000000) \ + mask_ = 0xff00001f, disp_ <<= 5; /* B.cond, range 21 bit */ \ + else mask_ = 0xfc000000; /* B[L], range 28 bit */ \ + EMIT_PTR(ptr_, (*ptr_ & mask_) | ((disp_ >> 2) & ~mask_)); \ + if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)(ptr_-1); \ } while (0) -#define emith_jump_patch_size() 8 + +#define emith_jump_patch_inrange(ptr, target) \ + !(((u8 *)target - (u8 *)ptr + 0x100000) >> 21) +#define emith_jump_patch_size() 4 + +#define emith_jump_at(ptr, target) do { \ + u32 disp_ = (u8 *)target - (u8 *)ptr; \ + EMIT_PTR(ptr, A64_B(disp_ & 0x0fffffff)); \ +} while (0) +#define emith_jump_at_size() 4 #define emith_jump_reg(r) \ EMIT(A64_BR(r)) @@ -1079,8 +1092,8 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) #define emith_ret_to_ctx(offs) \ emith_ctx_write_ptr(LR, offs) -#define emith_add_r_ret_imm(r, imm) \ - emith_add_r_r_ptr_imm(r, LR, imm) +#define emith_add_r_ret(r) \ + emith_add_r_r_r_ptr(r, LR, r) // NB: pushes r or r18 for SP hardware alignment #define emith_push_ret(r) do { \ diff --git a/cpu/drc/emit_mips.c b/cpu/drc/emit_mips.c index 6ff134d9..ad02ff24 100644 --- a/cpu/drc/emit_mips.c +++ b/cpu/drc/emit_mips.c @@ -285,7 +285,7 @@ static int emith_b_isswap(u32 bop, u32 lop) return bop; else if (emith_is_b(bop) && emith_rd(lop) != emith_rs(bop)) if ((bop & 0xffff) != 0x7fff) // displacement overflow? - return (bop & 0xffff0000) | ((bop & 0xffff)+1); + return (bop & 0xffff0000) | ((bop+1) & 0x0000ffff); return 0; } @@ -332,14 +332,14 @@ static void *emith_branch(u32 op) #define JMP_EMIT(cond, ptr) { \ u32 val_ = (u8 *)tcache_ptr - (u8 *)(ptr) - 4; \ - EMIT_PTR(ptr, MIPS_BCONDZ(cond_m, cond_r, val_ & 0x0003ffff)); \ emith_flush(); /* NO delay slot handling across jump targets */ \ + EMIT_PTR(ptr, MIPS_BCONDZ(cond_m, cond_r, val_ & 0x0003ffff)); \ } #define JMP_EMIT_NC(ptr) { \ u32 val_ = (u8 *)tcache_ptr - (u8 *)(ptr) - 4; \ - EMIT_PTR(ptr, MIPS_B(val_ & 0x0003ffff)); \ emith_flush(); \ + EMIT_PTR(ptr, MIPS_B(val_ & 0x0003ffff)); \ } #define EMITH_JMP_START(cond) { \ @@ -645,6 +645,13 @@ static void emith_move_imm(int r, uintptr_t imm) #define emith_move_r_imm_c(cond, r, imm) \ emith_move_r_imm(r, imm) +#define emith_move_r_imm_s8_patchable(r, imm) \ + EMIT(MIPS_ADD_IMM(r, Z0, (s8)(imm))) +#define emith_move_r_imm_s8_patch(ptr, imm) do { \ + u32 *ptr_ = (u32 *)ptr; \ + while (*ptr_ >> 26 != OP_ADDIU) ptr_++; \ + EMIT_PTR(ptr_, (*ptr_ & 0xffff0000) | (u16)(s8)(imm)); \ +} while (0) // arithmetic, immediate static void emith_arith_imm(int op, int rd, int rs, u32 imm) @@ -1162,41 +1169,44 @@ static int emith_cond_check(int cond, int *r) emith_branch(MIPS_J((uintptr_t)target & 0x0fffffff)) #define emith_jump_patchable(target) \ emith_jump(target) -#define emith_jump_patchable_size() 8 /* J+delayslot */ // NB: MIPS conditional branches have only +/- 128KB range #define emith_jump_cond(cond, target) do { \ int r_, mcond_ = emith_cond_check(cond, &r_); \ u32 disp_ = (u8 *)target - (u8 *)tcache_ptr - 4; \ - if (disp_ >= 0xfffe0000 || disp_ <= 0x0001ffff) { /* can use near B */ \ - emith_branch(MIPS_BCONDZ(mcond_,r_,disp_ & 0x0003ffff)); \ - } else { /* far branch if near branch isn't possible */ \ - mcond_ = emith_invert_branch(mcond_); \ - u8 *bp = emith_branch(MIPS_BCONDZ(mcond_, r_, 0)); \ - emith_branch(MIPS_J((uintptr_t)target & 0x0fffffff)); \ - EMIT_PTR(bp, MIPS_BCONDZ(mcond_, r_, (u8 *)tcache_ptr-bp-4)); \ - } \ + emith_branch(MIPS_BCONDZ(mcond_,r_,disp_ & 0x0003ffff)); \ } while (0) -#define emith_jump_cond_inrange(target) \ - !(((u8 *)target - (u8 *)tcache_ptr + 0x20000) >> 18) +#define emith_jump_cond_patchable(cond, target) \ + emith_jump_cond(cond, target) -#define emith_jump_cond_patchable(cond, target) do { \ - int r_, mcond_ = emith_cond_check(cond, &r_); \ - mcond_ = emith_invert_branch(mcond_); \ - u8 *bp = emith_branch(MIPS_BCONDZ(mcond_, r_, 0));\ - emith_branch(MIPS_J((uintptr_t)target & 0x0fffffff)); \ - EMIT_PTR(bp, MIPS_BCONDZ(mcond_, r_, (u8 *)tcache_ptr-bp-4)); \ -} while (0) +#define emith_jump_cond_inrange(target) \ + ((u8 *)target - (u8 *)tcache_ptr - 4 < 0x00020000U || \ + (u8 *)target - (u8 *)tcache_ptr - 4 >= 0xfffe0010U) // mind cond_check // NB: returns position of patch for cache maintenance #define emith_jump_patch(ptr, target, pos) do { \ u32 *ptr_ = (u32 *)ptr-1; /* must skip condition check code */ \ - while ((ptr_[0] & 0xf8000000) != OP_J << 26) ptr_ ++; \ - EMIT_PTR(ptr_, MIPS_J((uintptr_t)target & 0x0fffffff)); \ + u32 disp_, mask_; \ + while (!emith_is_j(*ptr_) && !emith_is_b(*ptr_)) ptr_ ++; \ + if (emith_is_b(*ptr_)) \ + mask_ = 0xffff0000, disp_ = (u8 *)target - (u8 *)ptr_ - 4; \ + else mask_ = 0xfc000000, disp_ = (uintptr_t)target; \ + EMIT_PTR(ptr_, (*ptr_ & mask_) | ((disp_ >> 2) & ~mask_)); \ if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)(ptr_-1); \ } while (0) + +#define emith_jump_patch_inrange(ptr, target) \ + ((u8 *)target - (u8 *)ptr - 4 < 0x00020000U || \ + (u8 *)target - (u8 *)ptr - 4 >= 0xfffe0010U) // mind cond_check #define emith_jump_patch_size() 4 +#define emith_jump_at(ptr, target) do { \ + u32 *ptr_ = (u32 *)ptr; \ + EMIT_PTR(ptr_, MIPS_J((uintptr_t)target & 0x0fffffff)); \ + EMIT_PTR(ptr_, MIPS_NOP); \ +} while (0) +#define emith_jump_at_size() 8 + #define emith_jump_reg(r) \ emith_branch(MIPS_JR(r)) #define emith_jump_reg_c(cond, r) \ @@ -1232,8 +1242,8 @@ static int emith_cond_check(int cond, int *r) #define emith_ret_to_ctx(offs) \ emith_ctx_write_ptr(LR, offs) -#define emith_add_r_ret_imm(r, imm) \ - emith_add_r_r_ptr_imm(r, LR, imm) +#define emith_add_r_ret(r) \ + emith_add_r_r_ptr(r, LR) // NB: ABI SP alignment is 8 for compatibility with MIPS IV #define emith_push_ret(r) do { \ diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index d8b3a2dd..451fa8d0 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -371,8 +371,16 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common } \ } while (0) -#define emith_move_r_imm_s8(r, imm) \ - emith_move_r_imm(r, (u32)(signed int)(signed char)(imm)) +#define emith_move_r_imm_s8_patchable(r, imm) do { \ + EMIT_REX_IF(0, 0, r); \ + EMIT_OP(0xb8 + ((r)&7)); \ + EMIT((s8)(imm), u32); \ +} while (0) +#define emith_move_r_imm_s8_patch(ptr, imm) do { \ + u8 *ptr_ = ptr; \ + while ((*ptr_ & 0xf8) != 0xb8) ptr_++; \ + EMIT_PTR(ptr_ + 1, (s8)(imm), u32); \ +} while (0) #define emith_arith_r_imm(op, r, imm) do { \ EMIT_REX_IF(0, 0, r); \ @@ -851,7 +859,6 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common #define emith_jump_patchable(target) \ emith_jump(target) -#define emith_jump_patchable_size() 5 /* JMP rel32 */ #define emith_jump_cond(cond, ptr) do { \ u32 disp = (u8 *)(ptr) - ((u8 *)tcache_ptr + 6); \ @@ -867,15 +874,17 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common u32 disp_ = (u8 *)(target) - ((u8 *)(ptr) + 4); \ u32 offs_ = (*(u8 *)(ptr) == 0x0f) ? 2 : 1; \ EMIT_PTR((u8 *)(ptr) + offs_, disp_ - offs_, u32); \ - if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)ptr; \ + if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)ptr + offs_; \ } while (0) -#define emith_jump_patch_size() 6 +#define emith_jump_patch_size() 4 +#define emith_jump_patch_inrange(ptr, target) !0 #define emith_jump_at(ptr, target) do { \ u32 disp_ = (u8 *)(target) - ((u8 *)(ptr) + 5); \ EMIT_PTR(ptr, 0xe9, u8); \ EMIT_PTR((u8 *)(ptr) + 1, disp_, u32); \ } while (0) +#define emith_jump_at_size() 5 #define emith_call(ptr) do { \ u32 disp = (u8 *)(ptr) - ((u8 *)tcache_ptr + 5); \ @@ -900,9 +909,9 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common #define emith_ret() \ EMIT_OP(0xc3) -#define emith_add_r_ret_imm(r, imm) do { \ - emith_read_r_r_offs_ptr(r, xSP, 0); \ - emith_add_r_r_ptr_imm(r, r, imm); \ +#define emith_add_r_ret(r) do { \ + EMIT_REX_IF(1, r, xSP); \ + emith_deref_modrm(0x03, 0, r, xSP); /* add r, [xsp] */ \ } while (0) #define emith_jump_reg(r) \ @@ -974,7 +983,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common emith_move_r_imm(rd, imm); \ } while (0) -#define host_instructions_updated(base, end) +#define host_instructions_updated(base, end) (void)(base),(void)(end) #define emith_update_cache() /**/ #define emith_rw_offs_max() 0xffffffff diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index ec8554cc..932f21cf 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -69,7 +69,7 @@ // 800 - state dump on exit // { #ifndef DRC_DEBUG -#define DRC_DEBUG 0//x8e7 +#define DRC_DEBUG 0//x8c7 #endif #if DRC_DEBUG @@ -288,15 +288,19 @@ static u8 *tcache_ptr; #define MAX_BLOCK_ENTRIES (BLOCK_INSN_LIMIT / 6) +enum { BL_JMP=1, BL_LDJMP, BL_JCCBLX }; struct block_link { + short tcache_id; + short type; // BL_JMP et al u32 target_pc; void *jump; // insn address + void *blx; // block link/exit area if any + u8 jdisp[8]; // jump backup buffer struct block_link *next; // either in block_entry->links or unresolved struct block_link *o_next; // ...in block_entry->o_links struct block_link *prev; struct block_link *o_prev; struct block_entry *target;// target block this is linked in (be->links) - int tcache_id; }; struct block_entry { @@ -686,18 +690,24 @@ static int dr_ctx_get_mem_ptr(SH2 *sh2, u32 a, u32 *mask) return poffs; } -static struct block_entry *dr_get_entry(u32 pc, int is_slave, int *tcache_id) +static int dr_get_tcache_id(u32 pc, int is_slave) { - struct block_entry *be; u32 tcid = 0; if ((pc & 0xe0000000) == 0xc0000000) tcid = 1 + is_slave; // data array if ((pc & ~0xfff) == 0) tcid = 1 + is_slave; // BIOS - *tcache_id = tcid; + return tcid; +} - be = HASH_FUNC(hash_tables[tcid], pc, HASH_TABLE_SIZE(tcid) - 1); +static struct block_entry *dr_get_entry(u32 pc, int is_slave, int *tcache_id) +{ + struct block_entry *be; + + *tcache_id = dr_get_tcache_id(pc, is_slave); + + be = HASH_FUNC(hash_tables[*tcache_id], pc, HASH_TABLE_SIZE(*tcache_id) - 1); if (be != NULL) // don't ask... gcc code generation hint for (; be != NULL; be = be->next) if (be->pc == pc) @@ -1101,17 +1111,11 @@ static struct block_desc *dr_add_block(u32 addr, int size, bd->size_lit = size_lit; bd->tcache_ptr = tcache_ptr; bd->crc = crc; - bd->active = 1; - - bd->entry_count = 1; - bd->entryp[0].pc = addr; - bd->entryp[0].tcache_ptr = tcache_ptr; - bd->entryp[0].links = bd->entryp[0].o_links = NULL; + bd->active = 0; + bd->entry_count = 0; #if (DRC_DEBUG & 2) - bd->entryp[0].block = bd; bd->refcount = 0; #endif - add_to_hashlist(&bd->entryp[0], tcache_id); *blk_id = *bcount; (*bcount)++; @@ -1150,11 +1154,33 @@ static void dr_block_link(struct block_entry *be, struct block_link *bl, int emi bl->jump, bl->target_pc, be->tcache_ptr); if (emit_jump) { - u8 *jump; - emith_jump_patch(bl->jump, be->tcache_ptr, &jump); + u8 *jump = bl->jump; + int jsz = emith_jump_patch_size(); + if (bl->type == BL_JMP) { // patch: jump @entry + // inlined: @jump far jump to target + emith_jump_patch(jump, be->tcache_ptr, &jump); + } else if (bl->type == BL_LDJMP) { // write: jump @entry + // inlined: @jump far jump to target + emith_jump_at(jump, be->tcache_ptr); + jsz = emith_jump_at_size(); + } else if (bl->type == BL_JCCBLX) { // patch: jump cond -> jump @entry + if (emith_jump_patch_inrange(bl->jump, be->tcache_ptr)) { + // inlined: @jump near jumpcc to target + emith_jump_patch(jump, be->tcache_ptr, &jump); + } else { // dispatcher cond immediate + // via blx: @jump near jumpcc to blx; @blx far jump + emith_jump_patch(jump, bl->blx, &jump); + emith_jump_at(bl->blx, be->tcache_ptr); + if ((((uintptr_t)bl->blx & 0xf) + emith_jump_at_size()-1) > 0xf) + host_instructions_updated(bl->blx, bl->blx + emith_jump_at_size()-1); + } + } else { + printf("unknown BL type %d\n", bl->type); + exit(1); + } // only needs sync if patch is possibly crossing cacheline (assume 16 byte) - if ((uintptr_t)jump >>4 != ((uintptr_t)jump+emith_jump_patch_size()-1) >>4) - host_instructions_updated(jump, jump+emith_jump_patch_size()); + if ((((uintptr_t)jump & 0xf) + jsz-1) > 0xf) + host_instructions_updated(jump, jump + jsz-1); } // move bl to block_entry @@ -1172,10 +1198,26 @@ static void dr_block_unlink(struct block_link *bl, int emit_jump) if (bl->target) { if (emit_jump) { - u8 *jump; - emith_jump_patch(bl->jump, sh2_drc_dispatcher, &jump); + u8 *jump = bl->jump; + int jsz = emith_jump_patch_size(); + if (bl->type == BL_JMP) { // jump_patch @dispatcher + // inlined: @jump far jump to dispatcher + emith_jump_patch(jump, sh2_drc_dispatcher, &jump); + } else if (bl->type == BL_LDJMP) { // restore: load pc, jump @dispatcher + // inlined: @jump load target_pc, far jump to dispatcher + memcpy(jump, bl->jdisp, emith_jump_at_size()); + jsz = emith_jump_at_size(); + } else if (bl->type == BL_JCCBLX) { // jump cond @blx; @blx: load pc, jump + // via blx: @jump near jumpcc to blx; @blx load target_pc, far jump + emith_jump_patch(bl->jump, bl->blx, &jump); + memcpy(bl->blx, bl->jdisp, emith_jump_at_size()); + host_instructions_updated(bl->blx, bl->blx + emith_jump_at_size()-1); + } else { + printf("unknown BL type %d\n", bl->type); + exit(1); + } // update cpu caches since the previous jump target doesn't exist anymore - host_instructions_updated(jump, jump+emith_jump_patch_size()); + host_instructions_updated(jump, jump + jsz-1); } if (bl->prev) @@ -1189,18 +1231,17 @@ static void dr_block_unlink(struct block_link *bl, int emit_jump) } #endif -static void *dr_prepare_ext_branch(struct block_entry *owner, u32 pc, int is_slave, int tcache_id) +static struct block_link *dr_prepare_ext_branch(struct block_entry *owner, u32 pc, int is_slave, int tcache_id) { #if LINK_BRANCHES struct block_link *bl = block_link_pool[tcache_id]; int cnt = block_link_pool_counts[tcache_id]; - struct block_entry *be = NULL; int target_tcache_id; // get the target block entry - be = dr_get_entry(pc, is_slave, &target_tcache_id); + target_tcache_id = dr_get_tcache_id(pc, is_slave); if (target_tcache_id && target_tcache_id != tcache_id) - return sh2_drc_dispatcher; + return NULL; // get a block link if (blink_free[tcache_id] != NULL) { @@ -1208,29 +1249,24 @@ static void *dr_prepare_ext_branch(struct block_entry *owner, u32 pc, int is_sla blink_free[tcache_id] = bl->next; } else if (cnt >= BLOCK_LINK_MAX_COUNT(tcache_id)) { dbg(1, "bl overflow for tcache %d", tcache_id); - return sh2_drc_dispatcher; + return NULL; } else { bl += cnt; block_link_pool_counts[tcache_id] = cnt+1; } - // prepare link and add to ougoing list of owner + // prepare link and add to outgoing list of owner bl->tcache_id = tcache_id; bl->target_pc = pc; bl->jump = tcache_ptr; + bl->blx = NULL; bl->o_next = owner->o_links; owner->o_links = bl; - if (be != NULL) { - dr_block_link(be, bl, 0); // jump not yet emitted by translate() - return be->tcache_ptr; - } - else { - add_to_hashlist_unresolved(bl, tcache_id); - return sh2_drc_dispatcher; - } + add_to_hashlist_unresolved(bl, tcache_id); + return bl; #else - return sh2_drc_dispatcher; + return NULL; #endif } @@ -1272,6 +1308,27 @@ static void dr_link_outgoing(struct block_entry *be, int tcache_id, int is_slave #endif } +static void dr_activate_block(struct block_desc *bd, int tcache_id, int is_slave) +{ + int i; + + // connect branches + for (i = 0; i < bd->entry_count; i++) { + struct block_entry *entry = &bd->entryp[i]; + add_to_hashlist(entry, tcache_id); + // incoming branches + dr_link_blocks(entry, tcache_id); + if (!tcache_id) + dr_link_blocks(entry, is_slave?2:1); + // outgoing branches + dr_link_outgoing(entry, tcache_id, is_slave); + } + + // mark memory for overwrite detection + dr_mark_memory(1, bd, tcache_id, 0); + bd->active = 1; +} + #define ADD_TO_ARRAY(array, count, item, failcode) { \ if (count >= ARRAY_SIZE(array)) { \ dbg(1, "warning: " #array " overflow"); \ @@ -2422,6 +2479,7 @@ static void rcache_invalidate(void) { int i; gconst_invalidate(); + rcache_unlock_all(); for (i = 0; i < ARRAY_SIZE(cache_regs); i++) rcache_free_vreg(i); @@ -2446,7 +2504,6 @@ static void rcache_invalidate(void) static void rcache_flush(void) { - rcache_unlock_all(); rcache_clean(); rcache_invalidate(); } @@ -2916,13 +2973,22 @@ static void *dr_get_pc_base(u32 pc, SH2 *sh2); static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) { + // branch targets in current block u32 branch_target_pc[MAX_LOCAL_BRANCHES]; void *branch_target_ptr[MAX_LOCAL_BRANCHES]; int branch_target_count = 0; - void *branch_patch_ptr[MAX_LOCAL_BRANCHES]; + // unresolved local forward branches, for fixup at block end u32 branch_patch_pc[MAX_LOCAL_BRANCHES]; + void *branch_patch_ptr[MAX_LOCAL_BRANCHES]; int branch_patch_count = 0; + // external branch targets with a block link/exit area + u32 blx_target_pc[MAX_LOCAL_BRANCHES]; + void *blx_target_ptr[MAX_LOCAL_BRANCHES]; + struct block_link *blx_target_bl[MAX_LOCAL_BRANCHES]; + int blx_target_count = 0; + u8 op_flags[BLOCK_INSN_LIMIT]; + struct drcf { int delay_reg:8; u32 loop_type:8; @@ -2931,9 +2997,12 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) u32 pending_branch_direct:1; u32 pending_branch_indirect:1; } drcf = { 0, }; + #if LOOP_OPTIMIZER - void *pinned_loop_ptr[MAX_LOCAL_BRANCHES/16]; + // loops with pinned registers for optimzation + // pinned regs are like statics and don't need saving/restoring inside a loop u32 pinned_loop_pc[MAX_LOCAL_BRANCHES/16]; + void *pinned_loop_ptr[MAX_LOCAL_BRANCHES/16]; u32 pinned_loop_mask[MAX_LOCAL_BRANCHES/16]; int pinned_loop_count = 0; #endif @@ -2976,24 +3045,9 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) base_literals, end_literals - base_literals); if (block) { - // connect branches dbg(2, "== %csh2 reuse block %08x-%08x,%08x-%08x -> %p", sh2->is_slave ? 's' : 'm', base_pc, end_pc, base_literals, end_literals, block->entryp->tcache_ptr); - for (i = 0; i < block->entry_count; i++) { - entry = &block->entryp[i]; - add_to_hashlist(entry, tcache_id); -#if LINK_BRANCHES - // incoming branches - dr_link_blocks(entry, tcache_id); - if (!tcache_id) - dr_link_blocks(entry, sh2->is_slave?2:1); - // outgoing branches - dr_link_outgoing(entry, tcache_id, sh2->is_slave); -#endif - } - // mark memory for overwrite detection - dr_mark_memory(1, block, tcache_id, 0); - block->active = 1; + dr_activate_block(block, tcache_id, sh2->is_slave); emith_update_cache(); return block->entryp[0].tcache_ptr; } @@ -3069,7 +3123,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) if (op_flags[v] & OF_BASIC_LOOP) { m3 &= ~rcache_regs_static & ~BITMASK4(SHR_PC, SHR_PR, SHR_SR, SHR_MEM); if (m3 && count_bits(m3) < count_bits(rcache_hregs_reg) && - pinned_loop_count < ARRAY_SIZE(pinned_loop_pc)) { + pinned_loop_count < ARRAY_SIZE(pinned_loop_pc)-1) { pinned_loop_mask[pinned_loop_count] = m3; pinned_loop_pc[pinned_loop_count++] = base_pc + 2*v; } else @@ -3080,6 +3134,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } #endif } + pinned_loop_pc[pinned_loop_count] = -1; if (branch_target_count > 0) { memset(branch_target_ptr, 0, sizeof(branch_target_ptr[0]) * branch_target_count); @@ -3101,7 +3156,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) // clear stale state after compile errors - rcache_unlock_all(); rcache_invalidate(); emith_invalidate_t(); drcf = (struct drcf) { 0 }; @@ -3146,39 +3200,31 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_sync_t(sr); rcache_flush(); emith_flush(); - - // make block entry - v = block->entry_count; - entry = &block->entryp[v]; - if (v < ARRAY_SIZE(block->entryp)) - { - entry = &block->entryp[v]; - entry->pc = pc; - entry->tcache_ptr = tcache_ptr; - entry->links = entry->o_links = NULL; -#if (DRC_DEBUG & 2) - entry->block = block; -#endif - add_to_hashlist(entry, tcache_id); - block->entry_count++; - - dbg(2, "-- %csh2 block #%d,%d entry %08x -> %p", - sh2->is_slave ? 's' : 'm', tcache_id, blkid_main, - pc, tcache_ptr); - } - else { - dbg(1, "too many entryp for block #%d,%d pc=%08x", - tcache_id, blkid_main, pc); - break; - } - } else { - entry = block->entryp; } - // since we made a block entry, link any other blocks that jump to it - dr_link_blocks(entry, tcache_id); - if (!tcache_id) // can safely link from cpu-local to global memory - dr_link_blocks(entry, sh2->is_slave?2:1); + // make block entry + v = block->entry_count; + entry = &block->entryp[v]; + if (v < ARRAY_SIZE(block->entryp)) + { + entry = &block->entryp[v]; + entry->pc = pc; + entry->tcache_ptr = tcache_ptr; + entry->links = entry->o_links = NULL; +#if (DRC_DEBUG & 2) + entry->block = block; +#endif + block->entry_count++; + + dbg(2, "-- %csh2 block #%d,%d entry %08x -> %p", + sh2->is_slave ? 's' : 'm', tcache_id, blkid_main, + pc, tcache_ptr); + } + else { + dbg(1, "too many entryp for block #%d,%d pc=%08x", + tcache_id, blkid_main, pc); + break; + } v = find_in_sorted_array(branch_target_pc, branch_target_count, pc); if (v >= 0) @@ -3220,29 +3266,35 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) #endif // check cycles - tmp = rcache_get_tmp_arg(0); sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); emith_cmp_r_imm(sr, 0); + #if LOOP_OPTIMIZER - // on drc exit pinned registers must be saved + u8 *jp = NULL; if (op_flags[i] & OF_BASIC_LOOP) { - EMITH_JMP_START(DCOND_GT); + // if exiting a pinned loop pinned regs must be written back to ctx + // since they are reloaded in the loop entry code + jp = tcache_ptr; + emith_jump_cond_patchable(DCOND_GT, jp); // XXX need API for JMP_POS rcache_save_pinned(); - emith_move_r_imm(tmp, pc); - emith_jump(sh2_drc_exit); - EMITH_JMP_END(DCOND_GT); - } else -#endif - if (emith_jump_cond_inrange(sh2_drc_exit)) { - emith_move_r_imm_c(DCOND_LE, tmp, pc); - emith_jump_cond(DCOND_LE, sh2_drc_exit); - } else { - EMITH_JMP_START(DCOND_GT); - emith_move_r_imm(tmp, pc); - emith_jump(sh2_drc_exit); - EMITH_JMP_END(DCOND_GT); } - rcache_free_tmp(tmp); +#endif + if (blx_target_count < ARRAY_SIZE(blx_target_pc)) { + // exit via stub in blx table (saves some 1-3 insns in the main flow) + blx_target_pc[blx_target_count] = pc|1; + blx_target_bl[blx_target_count] = NULL; + blx_target_ptr[blx_target_count++] = tcache_ptr; + } else { + // blx table full, must inline exit code + tmp = rcache_get_tmp_arg(0); + emith_move_r_imm_c(DCOND_LE, tmp, pc); + rcache_free_tmp(tmp); + } + emith_jump_cond_patchable(DCOND_LE, tcache_ptr); +#if LOOP_OPTIMIZER + if (op_flags[i] & OF_BASIC_LOOP) + emith_jump_patch(jp, tcache_ptr, NULL); +#endif #if (DRC_DEBUG & 32) // block hit counter @@ -3880,7 +3932,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 2: // SHAL Rn 0100nnnn00100000 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); - emith_sync_t(sr); + emith_invalidate_t(); emith_tpop_carry(sr, 0); // dummy emith_lslf(tmp, tmp2, 1); emith_tpush_carry(sr, 0); @@ -3909,7 +3961,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 2: // SHAR Rn 0100nnnn00100001 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); - emith_sync_t(sr); + emith_invalidate_t(); emith_tpop_carry(sr, 0); // dummy if (op & 0x20) { emith_asrf(tmp, tmp2, 1); @@ -3967,7 +4019,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x05: // ROTR Rn 0100nnnn00000101 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); - emith_sync_t(sr); + emith_invalidate_t(); emith_tpop_carry(sr, 0); // dummy if (op & 1) { emith_rorf(tmp, tmp2, 1); @@ -4351,11 +4403,12 @@ end_op: int cond = -1; int ctaken = 0; void *target = NULL; - int patchable = 0; + struct block_link *bl = NULL; if (OP_ISBRACND(opd_b->op)) ctaken = (op_flags[i] & OF_DELAY_OP) ? 1 : 2; cycles += ctaken; // assume branch taken + #if LOOP_OPTIMIZER if ((drcf.loop_type == OF_IDLE_LOOP || (drcf.loop_type == OF_DELAY_LOOP && drcf.delay_reg >= 0))) @@ -4365,14 +4418,35 @@ end_op: emith_sh2_delay_loop(cycles, drcf.delay_reg); drcf.polling = drcf.loop_type = 0; } + + if (target_pc < pc && pinned_loop_pc[pinned_loop_count] == target_pc) { + // backward jump at end of optimized loop + rcache_unpin_all(); + target = pinned_loop_ptr[pinned_loop_count]; + pinned_loop_count ++; + } #endif sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); FLUSH_CYCLES(sr); + rcache_unlock_all(); rcache_clean(); - // emit condition test for conditional branch +#if CALL_STACK + void *rtsadd = NULL, *rtsret = NULL; + if ((opd_b->dest & BITMASK1(SHR_PR)) && pc+2 < end_pc) { + // BSR - save rts data + tmp = rcache_get_tmp_arg(1); + rtsadd = tcache_ptr; + emith_move_r_imm_s8_patchable(tmp, 0); + rcache_invalidate_tmp(); + emith_call(sh2_drc_dispatcher_call); + rtsret = tcache_ptr; + } +#endif + if (OP_ISBRACND(opd_b->op)) { + // BT[S], BF[S] - emit condition test cond = (opd_b->op == OP_BRANCH_CF) ? DCOND_EQ : DCOND_NE; if (delay_dep_fw & BITMASK1(SHR_T)) { emith_sync_t(sr); @@ -4396,61 +4470,118 @@ end_op: { // local branch if (branch_target_ptr[v]) { - // jumps back can be linked here since host PC is already known + // local backward jump, link here now since host PC is already known target = branch_target_ptr[v]; + if (cond != -1) + emith_jump_cond(cond, target); + else { + emith_jump(target); + rcache_invalidate(); + } } else if (branch_patch_count < MAX_LOCAL_BRANCHES) { + // local forward jump target = tcache_ptr; branch_patch_pc[branch_patch_count] = target_pc; branch_patch_ptr[branch_patch_count] = target; branch_patch_count++; - patchable = 1; + if (cond != -1) + emith_jump_cond_patchable(cond, target); + else { + emith_jump_patchable(target); + rcache_invalidate(); + } } else dbg(1, "warning: too many local branches"); } #endif - rcache_unlock_all(); -#if LOOP_OPTIMIZER - if (target && pinned_loop_pc[pinned_loop_count] == target_pc) { - rcache_unpin_all(); - target = pinned_loop_ptr[pinned_loop_count]; - pinned_loop_count ++; - } -#endif - if (target == NULL) { // can't resolve branch locally, make a block exit - rcache_clean(); - tmp = rcache_get_tmp_arg(0); - emith_move_r_imm(tmp, target_pc); - rcache_free_tmp(tmp); + bl = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id); + if (cond != -1) { +#if 1 + if (bl) { + if (blx_target_count < ARRAY_SIZE(blx_target_pc)) { + // conditional jumps get a blx stub for the far jump + blx_target_pc[blx_target_count] = target_pc; + blx_target_bl[blx_target_count] = bl; + blx_target_ptr[blx_target_count++] = tcache_ptr; + bl->type = BL_JCCBLX; + target = tcache_ptr; + } else { + // blx table full, patch jump only + tmp = rcache_get_tmp_arg(0); + emith_move_r_imm(tmp, target_pc); + rcache_free_tmp(tmp); + bl->jump = tcache_ptr; + bl->type = BL_JMP; + target = sh2_drc_dispatcher; + } + emith_jump_cond_patchable(cond, target); + } else { + // cannot link, inline jump @dispatcher + EMITH_JMP_START(emith_invert_cond(cond)); + tmp = rcache_get_tmp_arg(0); + emith_move_r_imm(tmp, target_pc); + rcache_free_tmp(tmp); + target = sh2_drc_dispatcher; -#if CALL_STACK - if ((opd_b->dest & BITMASK1(SHR_PR)) && pc+2 < end_pc) { - // BSR - emith_call(sh2_drc_dispatcher_call); - } -#endif + emith_jump(target); + EMITH_JMP_END(emith_invert_cond(cond)); + } +#elif 1 + // jump @dispatcher - ARM 32bit version with conditional execution + EMITH_SJMP_START(emith_invert_cond(cond)); + tmp = rcache_get_tmp_arg(0); + emith_move_r_imm_c(cond, tmp, target_pc); + rcache_free_tmp(tmp); + target = sh2_drc_dispatcher; - target = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id); - patchable = 1; - } - - // create branch - if (cond != -1) { - if (patchable) + if (bl) { + bl->jump = tcache_ptr; + bl->type = BL_JMP; + } emith_jump_cond_patchable(cond, target); - else - emith_jump_cond(cond, target); - } else { - rcache_invalidate(); - if (patchable) + EMITH_SJMP_END(emith_invert_cond(cond)); +#else + // jump @dispatcher - generic version (jump !cond @over, jump @trgt) + EMITH_JMP_START(emith_invert_cond(cond)); + if (bl) { + bl->jump = tcache_ptr; + bl->type = BL_LDJMP; + } + tmp = rcache_get_tmp_arg(0); + emith_move_r_imm(tmp, target_pc); + rcache_free_tmp(tmp); + target = sh2_drc_dispatcher; + emith_jump_patchable(target); - else - emith_jump(target); + EMITH_JMP_END(emith_invert_cond(cond)); +#endif + } else { + // unconditional, has the far jump inlined + if (bl) + bl->type = BL_LDJMP; + + tmp = rcache_get_tmp_arg(0); + emith_move_r_imm(tmp, target_pc); + rcache_free_tmp(tmp); + target = sh2_drc_dispatcher; + + emith_jump_patchable(target); + rcache_invalidate(); + } } + emith_flush(); + if (bl) + memcpy(bl->jdisp, bl->jump, emith_jump_at_size()); +#if CALL_STACK + if (rtsadd) + emith_move_r_imm_s8_patch(rtsadd, tcache_ptr - (u8 *)rtsret); +#endif + // branch not taken, correct cycle count if (ctaken) emith_add_r_imm(sr, ctaken << 12); @@ -4463,35 +4594,57 @@ end_op: drcf.polling = drcf.loop_type = 0; } else if (drcf.pending_branch_indirect) { - void *target; u32 target_pc; + struct block_link *bl = NULL; sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); FLUSH_CYCLES(sr); emith_sync_t(sr); rcache_clean(); + tmp = rcache_get_reg_arg(0, SHR_PC, NULL); - rcache_invalidate(); + #if CALL_STACK struct op_data *opd_b = (op_flags[i] & OF_DELAY_OP) ? opd-1 : opd; - if ((opd_b->dest & BITMASK1(SHR_PR)) && pc+2 < end_pc) { - // JSR/BSRF - emith_call(sh2_drc_dispatcher_call); - } + void *rtsadd = NULL, *rtsret = NULL; + if ((opd_b->dest & BITMASK1(SHR_PR)) && pc+2 < end_pc) { + // JSR, BSRF - save rts data + tmp = rcache_get_tmp_arg(1); + rtsadd = tcache_ptr; + emith_move_r_imm_s8_patchable(tmp, 0); + rcache_invalidate_tmp(); + emith_call(sh2_drc_dispatcher_call); + rtsret = tcache_ptr; + } +#endif + +#if CALL_STACK if (opd_b->rm == SHR_PR) { - // RTS + // RTS - restore rts data, else jump to dispatcher emith_jump(sh2_drc_dispatcher_return); } else #endif if (gconst_get(SHR_PC, &target_pc)) { - // JMP const, treat like unconditional direct branch - target = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id); - emith_jump_patchable(target); + // JMP, JSR, BRAF, BSRF const - treat like unconditional direct branch + bl = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id); + if (bl) { // pc already loaded somewhere else, can patch jump only + bl->type = BL_JMP; + bl->jump = tcache_ptr; + } + emith_jump_patchable(sh2_drc_dispatcher); } else { - // JMP + // JMP, JSR, BRAF, BSRF not const emith_jump(sh2_drc_dispatcher); } + rcache_invalidate(); + + emith_flush(); +#if CALL_STACK + if (rtsadd) + emith_move_r_imm_s8_patch(rtsadd, tcache_ptr - (u8 *)rtsret); +#endif + drcf.pending_branch_indirect = 0; drcf.polling = drcf.loop_type = 0; } @@ -4508,24 +4661,48 @@ end_op: if (! OP_ISBRAUC(opd->op)) { - void *target; + struct block_link *bl; tmp = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); FLUSH_CYCLES(tmp); emith_sync_t(tmp); rcache_clean(); + bl = dr_prepare_ext_branch(block->entryp, pc, sh2->is_slave, tcache_id); + if (bl) + bl->type = BL_LDJMP; tmp = rcache_get_tmp_arg(0); emith_move_r_imm(tmp, pc); - - target = dr_prepare_ext_branch(block->entryp, pc, sh2->is_slave, tcache_id); - if (target == NULL) - return NULL; + emith_jump_patchable(sh2_drc_dispatcher); rcache_invalidate(); - emith_jump_patchable(target); + emith_flush(); + if (bl) + memcpy(bl->jdisp, bl->jump, emith_jump_at_size()); } else rcache_flush(); + + // emit blx area + for (i = 0; i < blx_target_count; i++) { + void *target = (blx_target_pc[i] & 1 ? sh2_drc_exit : sh2_drc_dispatcher); + struct block_link *bl = blx_target_bl[i]; + + emith_pool_check(); + if (bl) + bl->blx = tcache_ptr; + emith_jump_patch(blx_target_ptr[i], tcache_ptr, NULL); + tmp = rcache_get_tmp_arg(0); + emith_move_r_imm(tmp, blx_target_pc[i] & ~1); + emith_jump(target); + rcache_invalidate(); + emith_flush(); + if (bl) + memcpy(bl->jdisp, bl->blx, emith_jump_at_size()); + } + emith_flush(); + do_host_disasm(tcache_id); + + emith_pool_commit(0); // link local branches for (i = 0; i < branch_patch_count; i++) { @@ -4539,20 +4716,18 @@ end_op: target = tcache_ptr; tmp = rcache_get_tmp_arg(0); emith_move_r_imm(tmp, branch_patch_pc[i]); - rcache_flush(); emith_jump(sh2_drc_dispatcher); + rcache_flush(); } emith_jump_patch(branch_patch_ptr[i], target, NULL); } - emith_pool_commit(0); - - dr_mark_memory(1, block, tcache_id, 0); - tcache_ptrs[tcache_id] = tcache_ptr; - host_instructions_updated(block_entry_ptr, tcache_ptr); + dr_activate_block(block, tcache_id, sh2->is_slave); + emith_update_cache(); + do_host_disasm(tcache_id); dbg(2, " block #%d,%d -> %p tcache %d/%d, insns %d -> %d %.3f", @@ -4574,7 +4749,6 @@ end_op: fflush(stdout); #endif - emith_update_cache(); return block_entry_ptr; } @@ -4769,14 +4943,14 @@ static void sh2_generate_utils(void) // pc = sh2_drc_dispatcher_call(u32 pc) sh2_drc_dispatcher_call = (void *)tcache_ptr; emith_ctx_read(arg2, offsetof(SH2, rts_cache_idx)); - emith_ctx_read(arg1, SHR_PR * 4); emith_add_r_imm(arg2, 2*sizeof(void *)); emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *)); emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx)); emith_add_r_r_r_lsl_ptr(arg2, CONTEXT_REG, arg2, 0); - emith_write_r_r_offs(arg1, arg2, offsetof(SH2, rts_cache)); - emith_add_r_ret_imm(arg1, emith_jump_patchable_size()); // skip jump_patchable for rts host address - emith_write_r_r_offs_ptr(arg1, arg2, offsetof(SH2, rts_cache) + sizeof(void *)); + emith_ctx_read(arg3, SHR_PR * 4); + emith_add_r_ret(arg1); + emith_write_r_r_offs_ptr(arg1, arg2, offsetof(SH2, rts_cache)+sizeof(void *)); + emith_write_r_r_offs(arg3, arg2, offsetof(SH2, rts_cache)); emith_ret(); emith_flush(); @@ -5378,10 +5552,8 @@ void sh2_drc_finish(SH2 *sh2) if (block_tables[0] == NULL) return; - sh2_drc_flush_all(); - - for (i = 0; i < TCACHE_BUFFERS; i++) { #if (DRC_DEBUG & 4) + for (i = 0; i < TCACHE_BUFFERS; i++) { printf("~~~ tcache %d\n", i); #if 0 tcache_dsm_ptrs[i] = tcache_bases[i]; @@ -5394,8 +5566,12 @@ void sh2_drc_finish(SH2 *sh2) } #endif printf("max links: %d\n", block_link_pool_counts[i]); + } #endif + sh2_drc_flush_all(); + + for (i = 0; i < TCACHE_BUFFERS; i++) { if (block_tables[i] != NULL) free(block_tables[i]); block_tables[i] = NULL; From 32818177bd7d5266dc08a4034373f20b444a3be6 Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 28 Sep 2019 17:12:56 +0200 Subject: [PATCH 0220/1110] sh2 drc: drc exit, block linking and branch handling revised (overlooked commit) --- cpu/sh2/compiler.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 932f21cf..2c9e5b7a 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -3270,7 +3270,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_cmp_r_imm(sr, 0); #if LOOP_OPTIMIZER - u8 *jp = NULL; + void *jp = NULL; if (op_flags[i] & OF_BASIC_LOOP) { // if exiting a pinned loop pinned regs must be written back to ctx // since they are reloaded in the loop entry code @@ -3292,8 +3292,10 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } emith_jump_cond_patchable(DCOND_LE, tcache_ptr); #if LOOP_OPTIMIZER - if (op_flags[i] & OF_BASIC_LOOP) + if (op_flags[i] & OF_BASIC_LOOP) { + emith_flush(); emith_jump_patch(jp, tcache_ptr, NULL); + } #endif #if (DRC_DEBUG & 32) From a0f5ba4067742849e3f9d56f3f89bb77b3124b04 Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 4 Oct 2019 17:11:18 +0200 Subject: [PATCH 0221/1110] sh2 drc: bug fixing and optimization in register cache and branch handling --- cpu/drc/emit_arm.c | 15 ++-- cpu/drc/emit_mips.c | 4 +- cpu/sh2/compiler.c | 214 +++++++++++++++++++------------------------- 3 files changed, 104 insertions(+), 129 deletions(-) diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index b8c6419c..ec2958b1 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -160,7 +160,12 @@ static NOINLINE void EMIT(u32 op, u32 dst, u32 src) } } } - if (emit_index <= EMIT_CACHE_SIZE) { + if (dst & M1(PC)) { + // commit everything if a branch insn is emitted + for (i = 1; i <= emit_index+1; i++) + EMIT_PTR(emit_ptr, emit_cache[i].op); + emit_index = 0; + } else if (emit_index <= EMIT_CACHE_SIZE) { // queue not yet full emit_index++; } else { @@ -654,13 +659,14 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) literal_insn[pool_index] += move_offs; } -#define JMP_POS(ptr) \ +#define JMP_POS(ptr) { \ ptr = tcache_ptr; \ - EMIT(0,M1(PC),0); + EMIT(0,M1(PC),0); \ +} #define JMP_EMIT(cond, ptr) { \ u32 val_ = (u32 *)tcache_ptr - (u32 *)(ptr) - 2; \ - emith_flush(); \ + emith_flush(); /* NO insn swapping across jump targets */ \ EOP_C_B_PTR(ptr, cond, 0, val_ & 0xffffff); \ } @@ -890,7 +896,6 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) emith_top_imm(cond, A_OP_TST, r, imm) #define emith_move_r_imm_s8_patchable(r, imm) do { \ - emith_flush(); \ if ((s8)(imm) < 0) \ EOP_MVN_IMM(r, 0, (u8)~(imm)); \ else \ diff --git a/cpu/drc/emit_mips.c b/cpu/drc/emit_mips.c index ad02ff24..fadf5744 100644 --- a/cpu/drc/emit_mips.c +++ b/cpu/drc/emit_mips.c @@ -1249,11 +1249,11 @@ static int emith_cond_check(int cond, int *r) #define emith_push_ret(r) do { \ emith_sub_r_imm(SP, 8+16); /* reserve new arg save area (16) */ \ emith_write_r_r_offs(LR, SP, 4+16); \ - if ((r) >= 0) emith_write_r_r_offs(r, SP, 0+16); \ + if ((r) > 0) emith_write_r_r_offs(r, SP, 0+16); \ } while (0) #define emith_pop_and_ret(r) do { \ - if ((r) >= 0) emith_read_r_r_offs(r, SP, 0+16); \ + if ((r) > 0) emith_read_r_r_offs(r, SP, 0+16); \ emith_read_r_r_offs(LR, SP, 4+16); \ emith_add_r_imm(SP, 8+16); \ emith_ret(); \ diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 2c9e5b7a..449ae0e1 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -172,7 +172,6 @@ enum op_types { static u8 *tcache_dsm_ptrs[3]; static char sh2dasm_buff[64]; #define do_host_disasm(tcid) \ - emith_flush(); \ host_dasm(tcache_dsm_ptrs[tcid], emith_insn_ptr() - tcache_dsm_ptrs[tcid]); \ tcache_dsm_ptrs[tcid] = emith_insn_ptr() #else @@ -200,6 +199,7 @@ static char sh2dasm_buff[64]; #if (DRC_DEBUG & (8|256|512|1024)) || defined(PDB) #if (DRC_DEBUG & (256|512|1024)) static SH2 csh2[2][8]; +static FILE *trace[2]; #endif static void REGPARM(3) *sh2_drc_log_entry(void *block, SH2 *sh2, u32 sr) { @@ -210,7 +210,6 @@ static void REGPARM(3) *sh2_drc_log_entry(void *block, SH2 *sh2, u32 sr) pdb_step(sh2, sh2->pc); #elif (DRC_DEBUG & 256) { - static FILE *trace[2]; int idx = sh2->is_slave; if (!trace[0]) { trace[0] = fopen("pico.trace0", "wb"); @@ -225,7 +224,6 @@ static void REGPARM(3) *sh2_drc_log_entry(void *block, SH2 *sh2, u32 sr) } #elif (DRC_DEBUG & 512) { - static FILE *trace[2]; static SH2 fsh2; int idx = sh2->is_slave; if (!trace[0]) { @@ -1603,16 +1601,12 @@ static u16 rcache_counter; // SH2 register usage bitmasks static u32 rcache_hregs_reg; // regs of type HRT_REG (for pinning) static u32 rcache_regs_static; // statically allocated regs +static u32 rcache_regs_pinned; // pinned regs static u32 rcache_regs_now; // regs used in current insn static u32 rcache_regs_soon; // regs used in the next few insns static u32 rcache_regs_late; // regs used in later insns static u32 rcache_regs_discard; // regs overwritten without being used static u32 rcache_regs_clean; // regs needing cleaning -// combination masks XXX this seems obscure -#define rcache_regs_used (rcache_regs_soon|rcache_regs_late|rcache_regs_clean) -#define rcache_regs_nowused (rcache_regs_now|rcache_regs_used) -#define rcache_regs_nowsoon (rcache_regs_now|rcache_regs_soon) -#define rcache_regs_soonclean (rcache_regs_soon|rcache_regs_clean) static void rcache_lock_vreg(int x) { @@ -1677,6 +1671,7 @@ static void rcache_move_vreg(int d, int x) static void rcache_clean_vreg(int x) { + u32 rns = rcache_regs_now | rcache_regs_soon; int r; if (cache_regs[x].flags & HRF_DIRTY) { // writeback @@ -1685,23 +1680,18 @@ static void rcache_clean_vreg(int x) FOR_ALL_BITS_SET_DO(cache_regs[x].gregs, r, if (guest_regs[r].flags & GRF_DIRTY) { if (guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) { - if (guest_regs[r].vreg != guest_regs[r].sreg) { - if (!(cache_regs[guest_regs[r].sreg].locked)) { - // statically mapped reg not in its sreg. move back to sreg - rcache_evict_vreg(guest_regs[r].sreg); - emith_move_r_r(cache_regs[guest_regs[r].sreg].hreg, - cache_regs[guest_regs[r].vreg].hreg); - rcache_remove_vreg_alias(x, r); - rcache_add_vreg_alias(guest_regs[r].sreg, r); - cache_regs[guest_regs[r].sreg].flags |= HRF_DIRTY; - } else { - // must evict since sreg is locked - if (~rcache_regs_discard & (1 << r)) - emith_ctx_write(cache_regs[x].hreg, r * 4); - guest_regs[r].flags &= ~GRF_DIRTY; - rcache_remove_vreg_alias(x, r); - } + if (guest_regs[r].vreg != guest_regs[r].sreg && + !cache_regs[guest_regs[r].sreg].locked && + !(rns & cache_regs[guest_regs[r].sreg].gregs)) { + // statically mapped reg not in its sreg. move back to sreg + rcache_evict_vreg(guest_regs[r].sreg); + emith_move_r_r(cache_regs[guest_regs[r].sreg].hreg, + cache_regs[guest_regs[r].vreg].hreg); + rcache_remove_vreg_alias(x, r); + rcache_add_vreg_alias(guest_regs[r].sreg, r); + cache_regs[guest_regs[r].sreg].flags |= HRF_DIRTY; } else + // cannot remap. keep dirty for writeback in unmap cache_regs[x].flags |= HRF_DIRTY; } else { if (~rcache_regs_discard & (1 << r)) @@ -1815,17 +1805,9 @@ static int rcache_allocate_vreg(int needed) { int x; - if (needed) { - // needed soon, try getting a REG 1st, use a TEMP only if none is available - x = rcache_allocate(1, 0); - if (x < 0) - x = rcache_allocate(-1, 1); - } else { - // not needed, try getting a TEMP 1st, use a REG only if none is available + x = rcache_allocate(1, needed ? 0 : 3); + if (x < 0) x = rcache_allocate(-1, 1); - if (x < 0) - x = rcache_allocate(1, 0); - } return x; } @@ -1838,10 +1820,6 @@ static int rcache_allocate_nontemp(void) static int rcache_allocate_temp(void) { int x = rcache_allocate(-1, 1); - if (x < 0) { - printf("no temp register available, aborting\n"); - exit(1); - } return x; } @@ -1898,6 +1876,7 @@ static int rcache_map_reg(sh2_reg_e r, int hr, int mode) // remap vreg from a TEMP to a REG if it will be used (upcoming TEMP invalidation) static void rcache_remap_vreg(int x) { + u32 rsl_d = rcache_regs_soon | rcache_regs_late; int d; // x must be a cached vreg @@ -1905,7 +1884,7 @@ static void rcache_remap_vreg(int x) return; // don't do it if x is already a REG or isn't used or to be cleaned anyway if ((cache_regs[x].htype & HRT_REG) || - !(rcache_regs_used & ~rcache_regs_clean & cache_regs[x].gregs)) { + !(rsl_d & cache_regs[x].gregs)) { // clean here to avoid data loss on invalidation rcache_clean_vreg(x); return; @@ -1971,20 +1950,22 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr { int src, dst, ali; cache_reg_t *tr; + u32 rsp_d = (rcache_regs_now | rcache_regs_soon | + rcache_regs_static | rcache_regs_pinned) & ~rcache_regs_discard; dst = src = guest_regs[r].vreg; rcache_lock_vreg(src); // lock to avoid evicting src // good opportunity to relocate a remapped STATIC? - if ((guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) && src != guest_regs[r].sreg && + if ((guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) && + src != guest_regs[r].sreg && (src < 0 || mode != RC_GR_READ) && !cache_regs[guest_regs[r].sreg].locked && - (src < 0 || mode != RC_GR_READ) && - !(rcache_regs_nowsoon & cache_regs[guest_regs[r].sreg].gregs)) { + !(rsp_d & cache_regs[guest_regs[r].sreg].gregs)) { dst = guest_regs[r].sreg; rcache_evict_vreg(dst); } else if (dst < 0) { // allocate a cache register - if ((dst = rcache_allocate_vreg(rcache_regs_nowsoon & (1 << r))) < 0) { + if ((dst = rcache_allocate_vreg(rsp_d & (1 << r))) < 0) { printf("no registers to evict, aborting\n"); exit(1); } @@ -2004,12 +1985,12 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr ali = tr->gregs & ~(1 << r); if (mode != RC_GR_READ && src == dst && ali) { int x = -1; - if (rcache_regs_nowsoon & ali) { + if (rsp_d & ali) { if ((guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) && guest_regs[r].sreg == dst && !tr->locked) { // split aliases if r is STATIC in sreg and dst isn't already locked rcache_lock_vreg(dst); // lock to avoid evicting dst - x = rcache_allocate_vreg(rcache_regs_nowsoon & ali); + x = rcache_allocate_vreg(rsp_d & ali); rcache_unlock_vreg(dst); if (x >= 0) { src = x; @@ -2018,7 +1999,7 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr } else { // split r rcache_lock_vreg(src); // lock to avoid evicting src - x = rcache_allocate_vreg(rcache_regs_nowsoon & (1 << r)); + x = rcache_allocate_vreg(rsp_d & (1 << r)); rcache_unlock_vreg(src); if (x >= 0) { dst = x; @@ -2082,6 +2063,7 @@ static void rcache_pin_reg(sh2_reg_e r) guest_regs[r].flags |= GRF_PINNED; cache_regs[x].flags |= HRF_PINNED; guest_regs[r].sreg = x; + rcache_regs_pinned |= (1 << r); } #if DRC_DEBUG & 64 RCACHE_CHECK("after pin"); @@ -2275,10 +2257,8 @@ static void rcache_free(int hr) static void rcache_unlock(int x) { - if (x >= 0) { + if (x >= 0) cache_regs[x].locked = 0; -// rcache_regs_now &= ~cache_regs[x].gregs; - } } static void rcache_unlock_all(void) @@ -2297,6 +2277,7 @@ static void rcache_unpin_all(void) guest_regs[i].flags &= ~GRF_PINNED; cache_regs[guest_regs[i].sreg].flags &= ~HRF_PINNED; guest_regs[i].sreg = -1; + rcache_regs_pinned &= ~(1 << i); } } #if DRC_DEBUG & 64 @@ -2337,7 +2318,8 @@ static inline void rcache_set_usage_discard(u32 mask) static inline int rcache_is_cached(sh2_reg_e r) { // is r in cache or needed RSN? - return (guest_regs[r].vreg >= 0 || (rcache_regs_soonclean & (1 << r))); + u32 rsc = rcache_regs_soon | rcache_regs_clean; + return (guest_regs[r].vreg >= 0 || (rsc & (1 << r))); } static inline int rcache_is_hreg_used(int hr) @@ -2407,9 +2389,8 @@ static void rcache_clean_masked(u32 mask) { int i, r, hr; - if (!(mask &= ~rcache_regs_static)) - return; rcache_regs_clean |= mask; + mask = rcache_regs_clean; // clean constants where all aliases are covered by the mask for (i = 0; i < ARRAY_SIZE(gconsts); i++) @@ -2447,9 +2428,11 @@ static void rcache_clean(void) rcache_unlock_vreg(guest_regs[i].vreg); if (guest_regs[i].vreg < 0) emith_ctx_read(cache_regs[guest_regs[i].sreg].hreg, i*4); - else + else { emith_move_r_r(cache_regs[guest_regs[i].sreg].hreg, cache_regs[guest_regs[i].vreg].hreg); + rcache_remove_vreg_alias(guest_regs[i].vreg, i); + } cache_regs[guest_regs[i].sreg].gregs = 1 << i; cache_regs[guest_regs[i].sreg].type = HR_CACHED; cache_regs[guest_regs[i].sreg].flags |= HRF_DIRTY|HRF_PINNED; @@ -3134,7 +3117,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } #endif } - pinned_loop_pc[pinned_loop_count] = -1; if (branch_target_count > 0) { memset(branch_target_ptr, 0, sizeof(branch_target_ptr[0]) * branch_target_count); @@ -3160,6 +3142,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_invalidate_t(); drcf = (struct drcf) { 0 }; #if LOOP_OPTIMIZER + pinned_loop_pc[pinned_loop_count] = -1; pinned_loop_count = 0; #endif @@ -3292,10 +3275,8 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } emith_jump_cond_patchable(DCOND_LE, tcache_ptr); #if LOOP_OPTIMIZER - if (op_flags[i] & OF_BASIC_LOOP) { - emith_flush(); + if (op_flags[i] & OF_BASIC_LOOP) emith_jump_patch(jp, tcache_ptr, NULL); - } #endif #if (DRC_DEBUG & 32) @@ -3425,14 +3406,14 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) soon = late; } else { // upcoming rcache_flush, start writing back unused dirty stuff + rcache_set_usage_discard(write & ~(late|soon|opd[0].source)); rcache_clean_masked(rcache_dirty_mask() & ~(write|opd[0].dest)); break; } } rcache_set_usage_now(opd[0].source); // current insn - rcache_set_usage_soon(soon); // insns 1-3 - rcache_set_usage_late(late & ~soon); // insns 4-9 - rcache_set_usage_discard(write & ~(late|soon) & ~opd[0].source); + rcache_set_usage_soon(soon); // insns 1-4 + rcache_set_usage_late(late & ~soon); // insns 5-9 switch (opd->op) { @@ -4374,6 +4355,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) end_op: rcache_unlock_all(); + rcache_set_usage_now(0); #if DRC_DEBUG & 64 RCACHE_CHECK("after insn"); #endif @@ -4418,22 +4400,11 @@ end_op: // idle or delay loop emit_sync_t_to_sr(); emith_sh2_delay_loop(cycles, drcf.delay_reg); + rcache_unlock_all(); // may lock delay_reg drcf.polling = drcf.loop_type = 0; } - - if (target_pc < pc && pinned_loop_pc[pinned_loop_count] == target_pc) { - // backward jump at end of optimized loop - rcache_unpin_all(); - target = pinned_loop_ptr[pinned_loop_count]; - pinned_loop_count ++; - } #endif - sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); - FLUSH_CYCLES(sr); - rcache_unlock_all(); - rcache_clean(); - #if CALL_STACK void *rtsadd = NULL, *rtsret = NULL; if ((opd_b->dest & BITMASK1(SHR_PR)) && pc+2 < end_pc) { @@ -4441,12 +4412,18 @@ end_op: tmp = rcache_get_tmp_arg(1); rtsadd = tcache_ptr; emith_move_r_imm_s8_patchable(tmp, 0); + rcache_clean_tmp(); rcache_invalidate_tmp(); emith_call(sh2_drc_dispatcher_call); rtsret = tcache_ptr; } #endif + // XXX move below cond test if not changing host cond (MIPS delay slot)? + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + FLUSH_CYCLES(sr); + rcache_clean(); + if (OP_ISBRACND(opd_b->op)) { // BT[S], BF[S] - emit condition test cond = (opd_b->op == OP_BRANCH_CF) ? DCOND_EQ : DCOND_NE; @@ -4466,7 +4443,6 @@ end_op: emith_sync_t(sr); // no modification of host status/flags between here and branching! -#if LINK_BRANCHES v = find_in_sorted_array(branch_target_pc, branch_target_count, target_pc); if (v >= 0) { @@ -4474,6 +4450,14 @@ end_op: if (branch_target_ptr[v]) { // local backward jump, link here now since host PC is already known target = branch_target_ptr[v]; +#if LOOP_OPTIMIZER + if (pinned_loop_pc[pinned_loop_count] == target_pc) { + // backward jump at end of optimized loop + rcache_unpin_all(); + target = pinned_loop_ptr[pinned_loop_count]; + pinned_loop_count ++; + } +#endif if (cond != -1) emith_jump_cond(cond, target); else { @@ -4495,7 +4479,6 @@ end_op: } else dbg(1, "warning: too many local branches"); } -#endif if (target == NULL) { @@ -4503,36 +4486,30 @@ end_op: bl = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id); if (cond != -1) { #if 1 - if (bl) { - if (blx_target_count < ARRAY_SIZE(blx_target_pc)) { - // conditional jumps get a blx stub for the far jump - blx_target_pc[blx_target_count] = target_pc; - blx_target_bl[blx_target_count] = bl; - blx_target_ptr[blx_target_count++] = tcache_ptr; - bl->type = BL_JCCBLX; - target = tcache_ptr; - } else { - // blx table full, patch jump only - tmp = rcache_get_tmp_arg(0); - emith_move_r_imm(tmp, target_pc); - rcache_free_tmp(tmp); - bl->jump = tcache_ptr; - bl->type = BL_JMP; - target = sh2_drc_dispatcher; - } + if (bl && blx_target_count < ARRAY_SIZE(blx_target_pc)) { + // conditional jumps get a blx stub for the far jump + blx_target_pc[blx_target_count] = target_pc; + blx_target_bl[blx_target_count] = bl; + blx_target_ptr[blx_target_count++] = tcache_ptr; + bl->type = BL_JCCBLX; + target = tcache_ptr; emith_jump_cond_patchable(cond, target); } else { - // cannot link, inline jump @dispatcher + // not linkable, or blx table full; inline jump @dispatcher EMITH_JMP_START(emith_invert_cond(cond)); + if (bl) { + bl->jump = tcache_ptr; + bl->type = BL_LDJMP; + } tmp = rcache_get_tmp_arg(0); emith_move_r_imm(tmp, target_pc); rcache_free_tmp(tmp); target = sh2_drc_dispatcher; - emith_jump(target); + emith_jump_patchable(target); EMITH_JMP_END(emith_invert_cond(cond)); } -#elif 1 +#else // jump @dispatcher - ARM 32bit version with conditional execution EMITH_SJMP_START(emith_invert_cond(cond)); tmp = rcache_get_tmp_arg(0); @@ -4546,25 +4523,13 @@ end_op: } emith_jump_cond_patchable(cond, target); EMITH_SJMP_END(emith_invert_cond(cond)); -#else - // jump @dispatcher - generic version (jump !cond @over, jump @trgt) - EMITH_JMP_START(emith_invert_cond(cond)); - if (bl) { - bl->jump = tcache_ptr; - bl->type = BL_LDJMP; - } - tmp = rcache_get_tmp_arg(0); - emith_move_r_imm(tmp, target_pc); - rcache_free_tmp(tmp); - target = sh2_drc_dispatcher; - - emith_jump_patchable(target); - EMITH_JMP_END(emith_invert_cond(cond)); #endif } else { // unconditional, has the far jump inlined - if (bl) + if (bl) { + emith_flush(); // flush to inhibit insn swapping bl->type = BL_LDJMP; + } tmp = rcache_get_tmp_arg(0); emith_move_r_imm(tmp, target_pc); @@ -4576,7 +4541,6 @@ end_op: } } - emith_flush(); if (bl) memcpy(bl->jdisp, bl->jump, emith_jump_at_size()); #if CALL_STACK @@ -4599,11 +4563,6 @@ end_op: u32 target_pc; struct block_link *bl = NULL; - sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); - FLUSH_CYCLES(sr); - emith_sync_t(sr); - rcache_clean(); - tmp = rcache_get_reg_arg(0, SHR_PC, NULL); #if CALL_STACK @@ -4615,12 +4574,18 @@ end_op: tmp = rcache_get_tmp_arg(1); rtsadd = tcache_ptr; emith_move_r_imm_s8_patchable(tmp, 0); + rcache_clean_tmp(); rcache_invalidate_tmp(); emith_call(sh2_drc_dispatcher_call); rtsret = tcache_ptr; } #endif + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + FLUSH_CYCLES(sr); + emith_sync_t(sr); + rcache_clean(); + #if CALL_STACK if (opd_b->rm == SHR_PR) { // RTS - restore rts data, else jump to dispatcher @@ -4630,10 +4595,8 @@ end_op: if (gconst_get(SHR_PC, &target_pc)) { // JMP, JSR, BRAF, BSRF const - treat like unconditional direct branch bl = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id); - if (bl) { // pc already loaded somewhere else, can patch jump only + if (bl) // pc already loaded somewhere else, can patch jump only bl->type = BL_JMP; - bl->jump = tcache_ptr; - } emith_jump_patchable(sh2_drc_dispatcher); } else { // JMP, JSR, BRAF, BSRF not const @@ -4641,7 +4604,6 @@ end_op: } rcache_invalidate(); - emith_flush(); #if CALL_STACK if (rtsadd) emith_move_r_imm_s8_patch(rtsadd, tcache_ptr - (u8 *)rtsret); @@ -4671,13 +4633,15 @@ end_op: rcache_clean(); bl = dr_prepare_ext_branch(block->entryp, pc, sh2->is_slave, tcache_id); - if (bl) + if (bl) { + emith_flush(); // flush to inhibit insn swapping bl->type = BL_LDJMP; + } tmp = rcache_get_tmp_arg(0); emith_move_r_imm(tmp, pc); emith_jump_patchable(sh2_drc_dispatcher); rcache_invalidate(); - emith_flush(); + if (bl) memcpy(bl->jdisp, bl->jump, emith_jump_at_size()); } else @@ -4696,7 +4660,7 @@ end_op: emith_move_r_imm(tmp, blx_target_pc[i] & ~1); emith_jump(target); rcache_invalidate(); - emith_flush(); + if (bl) memcpy(bl->jdisp, bl->blx, emith_jump_at_size()); } @@ -5554,6 +5518,12 @@ void sh2_drc_finish(SH2 *sh2) if (block_tables[0] == NULL) return; +#if (DRC_DEBUG & (256|512)) + if (trace[0]) fclose(trace[0]); + if (trace[1]) fclose(trace[1]); + trace[0] = trace[1] = NULL; +#endif + #if (DRC_DEBUG & 4) for (i = 0; i < TCACHE_BUFFERS; i++) { printf("~~~ tcache %d\n", i); From c3ebe082d3c26ab4f80b7aa2f0b3a020f0ce45af Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 5 Oct 2019 11:17:49 +0200 Subject: [PATCH 0222/1110] sh2 drc: fix i386 regression --- cpu/sh2/compiler.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 449ae0e1..09546634 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -1682,6 +1682,7 @@ static void rcache_clean_vreg(int x) if (guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) { if (guest_regs[r].vreg != guest_regs[r].sreg && !cache_regs[guest_regs[r].sreg].locked && + (~rcache_regs_discard & (1 << r)) && !(rns & cache_regs[guest_regs[r].sreg].gregs)) { // statically mapped reg not in its sreg. move back to sreg rcache_evict_vreg(guest_regs[r].sreg); @@ -1820,6 +1821,8 @@ static int rcache_allocate_nontemp(void) static int rcache_allocate_temp(void) { int x = rcache_allocate(-1, 1); + if (x < 0) + x = rcache_allocate(0, 0); return x; } @@ -3404,16 +3407,16 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) // regs needed in the next few instructions if (v <= 4) soon = late; - } else { - // upcoming rcache_flush, start writing back unused dirty stuff - rcache_set_usage_discard(write & ~(late|soon|opd[0].source)); - rcache_clean_masked(rcache_dirty_mask() & ~(write|opd[0].dest)); + } else break; - } } rcache_set_usage_now(opd[0].source); // current insn rcache_set_usage_soon(soon); // insns 1-4 rcache_set_usage_late(late & ~soon); // insns 5-9 + rcache_set_usage_discard(write & ~(late|soon|opd[0].source)); + if (v <= 9) + // upcoming rcache_flush, start writing back unused dirty stuff + rcache_clean_masked(rcache_dirty_mask() & ~(write|opd[0].dest)); switch (opd->op) { @@ -3826,6 +3829,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_tpop_carry(sr, 0); emith_adcf_r_r_r(tmp2, tmp, tmp); emith_tpush_carry(sr, 0); // keep Q1 in T for now + rcache_free(tmp); tmp4 = rcache_get_tmp(); emith_and_r_r_imm(tmp4, sr, M); emith_eor_r_r_lsr(sr, tmp4, M_SHIFT - Q_SHIFT); // Q ^= M From 7869213d35f3ec020083a1c2b3f35c107e0c52a7 Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 10 Oct 2019 23:52:39 +0200 Subject: [PATCH 0223/1110] sh2 drc: speed optimization and bugfixing --- Makefile | 2 +- cpu/drc/emit_arm64.c | 18 ++++++++------ cpu/drc/emit_mips.c | 59 +++++++++++++++++++++++++------------------- cpu/drc/emit_x86.c | 5 ++++ cpu/sh2/compiler.c | 32 +++++++++--------------- 5 files changed, 61 insertions(+), 55 deletions(-) diff --git a/Makefile b/Makefile index 63e9c833..15549dca 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ ifeq "$(DEBUG)" "0" CFLAGS += -O3 -DNDEBUG endif -# This is actually needed, bevieve me. +# This is actually needed, believe me. # If you really have to disable this, set NO_ALIGN_FUNCTIONS elsewhere. ifndef NO_ALIGN_FUNCTIONS CFLAGS += -falign-functions=2 diff --git a/cpu/drc/emit_arm64.c b/cpu/drc/emit_arm64.c index 688649b5..3ef402b4 100644 --- a/cpu/drc/emit_arm64.c +++ b/cpu/drc/emit_arm64.c @@ -160,7 +160,7 @@ enum { XT_UXTW=0x4, XT_UXTX=0x6, XT_LSL=0x7, XT_SXTW=0xc, XT_SXTX=0xe }; #define A64_ROR_REG(rd, rn, rm) \ A64_INSN(0xd,0x0,0x3,_,rm,_,0xb,rn,rd) -// rd = REVERSE(n) rn +// rd = REVERSE(rn) #define A64_RBIT_REG(rd, rn) \ A64_INSN(0xd,0x2,0x3,_,_,_,_,rn,rd) @@ -327,9 +327,10 @@ enum { AM_IDX, AM_IDXPOST, AM_IDXREG, AM_IDXPRE }; // if-then-else conditional execution helpers -#define JMP_POS(ptr) \ +#define JMP_POS(ptr) { \ ptr = tcache_ptr; \ - EMIT(A64_B(0)); + EMIT(A64_B(0)); \ +} #define JMP_EMIT(cond, ptr) { \ u32 val_ = (u8 *)tcache_ptr - (u8 *)(ptr); \ @@ -1225,9 +1226,9 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) emith_tst_r_imm(sr, S); \ EMITH_SJMP_START(DCOND_EQ); \ /* overflow if top 17 bits of MACH aren't all 1 or 0 */ \ - /* to check: add MACH[15] to MACH[31:16]. this is 0 if no overflow */ \ - emith_asrf(rn, mh, 16); /* sum = (MACH>>16) + ((MACH>>15)&1) */ \ - emith_adcf_r_imm(rn, 0); /* (MACH>>15) is in carry after shift */ \ + /* to check: add MACH >> 31 to MACH >> 15. this is 0 if no overflow */ \ + emith_asr(rn, mh, 15); \ + emith_addf_r_r_r_lsr(rn, rn, mh, 31); \ EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> ov */ \ emith_move_r_imm_c(DCOND_NE, ml, 0x0000); /* -overflow */ \ emith_move_r_imm_c(DCOND_NE, mh, 0x8000); \ @@ -1280,11 +1281,12 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) #define emith_tpop_carry(sr, is_sub) do { \ if (is_sub) \ emith_eor_r_imm(sr, 1); \ - emith_lsrf(sr, sr, 1); \ + emith_ror(sr, sr, 1); \ + emith_addf_r_r(sr, sr); \ } while (0) #define emith_tpush_carry(sr, is_sub) do { \ - emith_adc_r_r(sr, sr); \ + emith_adc_r_r(sr, Z0); \ if (is_sub) \ emith_eor_r_imm(sr, 1); \ } while (0) diff --git a/cpu/drc/emit_mips.c b/cpu/drc/emit_mips.c index fadf5744..4a452a68 100644 --- a/cpu/drc/emit_mips.c +++ b/cpu/drc/emit_mips.c @@ -21,7 +21,7 @@ #define AT 1 // used to hold intermediate results #define FNZ 15 // emulated processor flags: N (bit 31) ,Z (all bits) #define FC 24 // emulated processor flags: C (bit 0), others 0 -#define FV 25 // emulated processor flags: Nt^Ns (bit 31). others ? +#define FV 25 // emulated processor flags: Nt^Ns (bit 31). others x // unified conditions; virtual, not corresponding to anything real on MIPS @@ -208,8 +208,8 @@ enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 }; } while (0) // FIFO for 2 instructions, for delay slot handling -u32 emith_last_insns[2] = { -1,-1 }; -int emith_last_idx, emith_last_cnt; +static u32 emith_last_insns[2] = { -1,-1 }; +static int emith_last_idx, emith_last_cnt; #define EMIT_PUSHOP() \ do { \ @@ -248,7 +248,7 @@ static int emith_is_b(u32 op) // B ((op>>26) == OP__RT && ((op>>16) & 036) == RT_BLTZ); } // register usage for dependency evaluation XXX better do this as in emit_arm? static uint64_t emith_has_rs[3] = // OP__FN, OP__RT, others - { 0x00fffffffffa0ff0ULL, 0x000fff0fUL, 0xffffffff0f007f30ULL }; + { 0x00fffffffffa0ff0ULL, 0x000fff0fUL, 0xffffffff0f007ff0ULL }; static uint64_t emith_has_rt[3] = // OP__FN, OP__RT, others { 0xff00fffffff00cffULL, 0x00000000UL, 0x8000ff0000000030ULL }; static uint64_t emith_has_rd[3] = // OP__FN, OP__RT, others (rt instead of rd) @@ -308,21 +308,23 @@ static void *emith_branch(u32 op) bop = emith_b_isswap(op, op2); } + // flush FIFO and branch + tcache_ptr = (void *)((u32 *)tcache_ptr - emith_last_cnt); + if (emith_last_insns[idx^1] != -1) + EMIT_PTR(tcache_ptr, emith_last_insns[idx^1]); if (bop) { // can swap - tcache_ptr = (void *)((u32 *)tcache_ptr - emith_last_cnt); - if (emith_last_insns[idx^1] != -1) - EMIT_PTR(tcache_ptr, emith_last_insns[idx^1]); bp = tcache_ptr; EMIT_PTR(tcache_ptr, bop); COUNT_OP; EMIT_PTR(tcache_ptr, emith_last_insns[idx]); - emith_last_insns[0] = emith_last_insns[1] = -1; - emith_last_cnt = 0; } else { // can't swap - emith_flush(); + if (emith_last_insns[idx] != -1) + EMIT_PTR(tcache_ptr, emith_last_insns[idx]); bp = tcache_ptr; EMIT_PTR(tcache_ptr, op); COUNT_OP; EMIT_PTR(tcache_ptr, MIPS_NOP); COUNT_OP; } + emith_last_insns[0] = emith_last_insns[1] = -1; + emith_last_cnt = 0; return bp; } @@ -392,8 +394,8 @@ static void *emith_branch(u32 op) // flag emulation creates 2 (ie cmp #0/beq) up to 9 (ie adcf/ble) extra insns. // flag handling shortcuts may reduce this by 1-4 insns, see emith_cond_check() -int emith_flg_rs, emith_flg_rt; // registers used in FNZ=rs-rt (aka cmp_r_r) -int emith_flg_noV; // V flag known not to be set +static int emith_flg_rs, emith_flg_rt; // registers used in FNZ=rs-rt (cmp_r_r) +static int emith_flg_noV; // V flag known not to be set // store minimal cc information: rd, rt^rs, carry // NB: the result *must* first go to FNZ, in case rd == rs or rd == rt. @@ -625,7 +627,11 @@ static void emith_set_arith_flags(int rd, int rt, int rs, s32 imm, int sub) // move immediate static void emith_move_imm(int r, uintptr_t imm) { - if ((s16)imm != imm) { + if ((s16)imm == imm) { + EMIT(MIPS_ADD_IMM(r, Z0, imm)); + } else if (!(imm >> 16)) { + EMIT(MIPS_OR_IMM(r, Z0, imm)); + } else { int s = Z0; if (imm >> 16) { EMIT(MIPS_MOVT_IMM(r, imm >> 16)); @@ -633,8 +639,7 @@ static void emith_move_imm(int r, uintptr_t imm) } if ((u16)imm) EMIT(MIPS_OR_IMM(r, s, (u16)imm)); - } else - EMIT(MIPS_ADD_IMM(r, Z0, imm)); + } } #define emith_move_r_ptr_imm(r, imm) \ @@ -1372,16 +1377,17 @@ static int emith_cond_check(int cond, int *r) emith_tst_r_imm(sr, S); \ EMITH_SJMP_START(DCOND_EQ); \ /* overflow if top 17 bits of MACH aren't all 1 or 0 */ \ - /* to check: add MACH[15] to MACH[31:16]. this is 0 if no overflow */ \ - emith_asrf(rn, mh, 16); /* sum = (MACH>>16) + ((MACH>>15)&1) */ \ - emith_adcf_r_imm(rn, 0); /* (MACH>>15) is in carry after shift */ \ + /* to check: add MACH >> 31 to MACH >> 15. this is 0 if no overflow */ \ + emith_asr(rn, mh, 15); \ + emith_add_r_r_r_lsr(rn, rn, mh, 31); /* sum = (MACH>>31)+(MACH>>15) */ \ + emith_teq_r_r(rn, Z0); /* (need only N and Z flags) */ \ EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> ov */ \ emith_move_r_imm_c(DCOND_NE, ml, 0x0000); /* -overflow */ \ emith_move_r_imm_c(DCOND_NE, mh, 0x8000); \ - EMITH_SJMP_START(DCOND_LE); /* sum > 0 -> +ovl */ \ - emith_sub_r_imm_c(DCOND_GT, ml, 1); /* 0xffffffff */ \ - emith_sub_r_imm_c(DCOND_GT, mh, 1); /* 0x00007fff */ \ - EMITH_SJMP_END(DCOND_LE); \ + EMITH_SJMP_START(DCOND_PL); /* sum > 0 -> +ovl */ \ + emith_sub_r_imm_c(DCOND_MI, ml, 1); /* 0xffffffff */ \ + emith_sub_r_imm_c(DCOND_MI, mh, 1); /* 0x00007fff */ \ + EMITH_SJMP_END(DCOND_PL); \ EMITH_SJMP_END(DCOND_EQ); \ EMITH_SJMP_END(DCOND_EQ); \ } while (0) @@ -1399,14 +1405,15 @@ static int emith_cond_check(int cond, int *r) /* overflow if top 33 bits of MACH:MACL aren't all 1 or 0 */ \ /* to check: add MACL[31] to MACH. this is 0 if no overflow */ \ emith_lsr(rn, ml, 31); \ - emith_addf_r_r(rn, mh); /* sum = MACH + ((MACL>>31)&1) */ \ + emith_add_r_r(rn, mh); /* sum = MACH + ((MACL>>31)&1) */ \ + emith_teq_r_r(rn, Z0); /* (need only N and Z flags) */ \ EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \ /* XXX: LSB signalling only in SH1, or in SH2 too? */ \ emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \ emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* negative ovrfl */ \ - EMITH_SJMP_START(DCOND_LE); /* sum > 0 -> positive ovrfl */ \ - emith_sub_r_imm_c(DCOND_GT, ml, 1); /* 0x7fffffff */ \ - EMITH_SJMP_END(DCOND_LE); \ + EMITH_SJMP_START(DCOND_PL); /* sum > 0 -> positive ovrfl */ \ + emith_sub_r_imm_c(DCOND_MI, ml, 1); /* 0x7fffffff */ \ + EMITH_SJMP_END(DCOND_PL); \ EMITH_SJMP_END(DCOND_EQ); \ EMITH_SJMP_END(DCOND_EQ); \ } while (0) diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index 451fa8d0..44e10ecf 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -1225,6 +1225,11 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common rcache_free_tmp(tmp_); \ } while (0) +#define emith_carry_to_t(sr, is_sub) do { \ + emith_rorc(sr); \ + emith_rol(sr, sr, 1); \ +} while (0) + #define emith_tpop_carry(sr, is_sub) \ emith_lsr(sr, sr, 1) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 09546634..2c1e8cff 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -69,7 +69,7 @@ // 800 - state dump on exit // { #ifndef DRC_DEBUG -#define DRC_DEBUG 0//x8c7 +#define DRC_DEBUG 0//x847 #endif #if DRC_DEBUG @@ -2999,6 +2999,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) void *block_entry_ptr; struct block_desc *block; struct block_entry *entry; + struct block_link *bl; u16 *dr_pc_base; struct op_data *opd; int blkid_main = 0; @@ -3245,6 +3246,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) if (pinned_loop_pc[pinned_loop_count] == pc) { // pin needed regs on loop entry FOR_ALL_BITS_SET_DO(pinned_loop_mask[pinned_loop_count], v, rcache_pin_reg(v)); + emith_flush(); pinned_loop_ptr[pinned_loop_count] = tcache_ptr; } else op_flags[i] &= ~OF_BASIC_LOOP; @@ -3920,9 +3922,8 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_invalidate_t(); - emith_tpop_carry(sr, 0); // dummy emith_lslf(tmp, tmp2, 1); - emith_tpush_carry(sr, 0); + emith_carry_to_t(sr, 0); goto end_op; case 1: // DT Rn 0100nnnn00010000 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); @@ -3949,12 +3950,11 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_invalidate_t(); - emith_tpop_carry(sr, 0); // dummy if (op & 0x20) { emith_asrf(tmp, tmp2, 1); } else emith_lsrf(tmp, tmp2, 1); - emith_tpush_carry(sr, 0); + emith_carry_to_t(sr, 0); goto end_op; case 1: // CMP/PZ Rn 0100nnnn00010001 tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); @@ -4007,12 +4007,11 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_invalidate_t(); - emith_tpop_carry(sr, 0); // dummy if (op & 1) { emith_rorf(tmp, tmp2, 1); } else emith_rolf(tmp, tmp2, 1); - emith_tpush_carry(sr, 0); + emith_carry_to_t(sr, 0); goto end_op; case 0x24: // ROTCL Rn 0100nnnn00100100 case 0x25: // ROTCR Rn 0100nnnn00100101 @@ -4391,7 +4390,6 @@ end_op: int cond = -1; int ctaken = 0; void *target = NULL; - struct block_link *bl = NULL; if (OP_ISBRACND(opd_b->op)) ctaken = (op_flags[i] & OF_DELAY_OP) ? 1 : 2; @@ -4545,8 +4543,6 @@ end_op: } } - if (bl) - memcpy(bl->jdisp, bl->jump, emith_jump_at_size()); #if CALL_STACK if (rtsadd) emith_move_r_imm_s8_patch(rtsadd, tcache_ptr - (u8 *)rtsret); @@ -4565,7 +4561,6 @@ end_op: } else if (drcf.pending_branch_indirect) { u32 target_pc; - struct block_link *bl = NULL; tmp = rcache_get_reg_arg(0, SHR_PC, NULL); @@ -4629,8 +4624,6 @@ end_op: if (! OP_ISBRAUC(opd->op)) { - struct block_link *bl; - tmp = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); FLUSH_CYCLES(tmp); emith_sync_t(tmp); @@ -4645,18 +4638,15 @@ end_op: emith_move_r_imm(tmp, pc); emith_jump_patchable(sh2_drc_dispatcher); rcache_invalidate(); - - if (bl) - memcpy(bl->jdisp, bl->jump, emith_jump_at_size()); } else rcache_flush(); // emit blx area for (i = 0; i < blx_target_count; i++) { void *target = (blx_target_pc[i] & 1 ? sh2_drc_exit : sh2_drc_dispatcher); - struct block_link *bl = blx_target_bl[i]; emith_pool_check(); + bl = blx_target_bl[i]; if (bl) bl->blx = tcache_ptr; emith_jump_patch(blx_target_ptr[i], tcache_ptr, NULL); @@ -4664,9 +4654,6 @@ end_op: emith_move_r_imm(tmp, blx_target_pc[i] & ~1); emith_jump(target); rcache_invalidate(); - - if (bl) - memcpy(bl->jdisp, bl->blx, emith_jump_at_size()); } emith_flush(); @@ -4692,6 +4679,11 @@ end_op: emith_jump_patch(branch_patch_ptr[i], target, NULL); } + // fill blx backup; do this last to backup final patched code + for (i = 0; i < block->entry_count; i++) + for (bl = block->entryp[i].o_links; bl; bl = bl->o_next) + memcpy(bl->jdisp, bl->blx ?: bl->jump, emith_jump_at_size()); + tcache_ptrs[tcache_id] = tcache_ptr; host_instructions_updated(block_entry_ptr, tcache_ptr); From 86c16afd45de9e975445a46a96bdd43c80da3288 Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 11 Oct 2019 00:56:26 +0200 Subject: [PATCH 0224/1110] 32x, speed improvement --- pico/32x/32x.c | 28 ++++++++++++++++++---------- pico/32x/sh2soc.c | 17 ++++++++++------- 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/pico/32x/32x.c b/pico/32x/32x.c index f6d1a153..9993bfa8 100644 --- a/pico/32x/32x.c +++ b/pico/32x/32x.c @@ -426,7 +426,7 @@ void p32x_sync_other_sh2(SH2 *sh2, unsigned int m68k_target) } #define STEP_LS 24 -#define STEP_N 488 // one line +#define STEP_N 528 // at least one line (488) #define sync_sh2s_normal p32x_sync_sh2s //#define sync_sh2s_lockstep p32x_sync_sh2s @@ -434,7 +434,7 @@ void p32x_sync_other_sh2(SH2 *sh2, unsigned int m68k_target) /* most timing is in 68k clock */ void sync_sh2s_normal(unsigned int m68k_target) { - unsigned int now, target, timer_cycles; + unsigned int now, target, next, timer_cycles; int cycles; elprintf(EL_32X, "sh2 sync to %u", m68k_target); @@ -458,40 +458,44 @@ void sync_sh2s_normal(unsigned int m68k_target) target = m68k_target; if (event_time_next && CYCLES_GT(target, event_time_next)) target = event_time_next; - if (CYCLES_GT(target, now + STEP_N)) - target = now + STEP_N; - while (CYCLES_GT(target, now)) { - elprintf(EL_32X, "sh2 exec to %u %d,%d/%d, flags %x", target, - target - msh2.m68krcycles_done, target - ssh2.m68krcycles_done, + next = target; + if (CYCLES_GT(target, now + STEP_N)) + next = now + STEP_N; + elprintf(EL_32X, "sh2 exec to %u %d,%d/%d, flags %x", next, + next - msh2.m68krcycles_done, next - ssh2.m68krcycles_done, m68k_target - now, Pico32x.emu_flags); pprof_start(ssh2); if (!(ssh2.state & SH2_IDLE_STATES)) { - cycles = target - ssh2.m68krcycles_done; + cycles = next - ssh2.m68krcycles_done; if (cycles > 0) { run_sh2(&ssh2, cycles > 20U ? cycles : 20U); if (event_time_next && CYCLES_GT(target, event_time_next)) target = event_time_next; + if (CYCLES_GT(next, target)) + next = target; } } pprof_end(ssh2); pprof_start(msh2); if (!(msh2.state & SH2_IDLE_STATES)) { - cycles = target - msh2.m68krcycles_done; + cycles = next - msh2.m68krcycles_done; if (cycles > 0) { run_sh2(&msh2, cycles > 20U ? cycles : 20U); if (event_time_next && CYCLES_GT(target, event_time_next)) target = event_time_next; + if (CYCLES_GT(next, target)) + next = target; } } pprof_end(msh2); - now = target; + now = next; if (!(msh2.state & SH2_IDLE_STATES)) { if (CYCLES_GT(now, msh2.m68krcycles_done)) now = msh2.m68krcycles_done; @@ -500,6 +504,10 @@ void sync_sh2s_normal(unsigned int m68k_target) if (CYCLES_GT(now, ssh2.m68krcycles_done)) now = ssh2.m68krcycles_done; } + if (now - timer_cycles >= STEP_N) { + p32x_timers_do(now - timer_cycles); + timer_cycles = now; + } } p32x_timers_do(now - timer_cycles); diff --git a/pico/32x/sh2soc.c b/pico/32x/sh2soc.c index 2b5a126c..dd834bfb 100644 --- a/pico/32x/sh2soc.c +++ b/pico/32x/sh2soc.c @@ -193,8 +193,9 @@ static void dmac_trigger(SH2 *sh2, struct dma_chan *chan) } // timer state - FIXME -static int timer_cycles[2]; -static int timer_tick_cycles[2]; +static u32 timer_cycles[2]; +static u32 timer_tick_cycles[2]; +static u32 timer_tick_factor[2]; // timers void p32x_timers_recalc(void) @@ -211,6 +212,7 @@ void p32x_timers_recalc(void) else cycles = 2; timer_tick_cycles[i] = cycles; + timer_tick_factor[i] = (1ULL << 32) / cycles; timer_cycles[i] = 0; elprintf(EL_32XP, "WDT cycles[%d] = %d", i, cycles); } @@ -226,11 +228,12 @@ void p32x_timers_do(unsigned int m68k_slice) void *pregs = sh2s[i].peri_regs; if (PREG8(pregs, 0x80) & 0x20) { // TME timer_cycles[i] += cycles; - cnt = PREG8(pregs, 0x81); - while (timer_cycles[i] >= timer_tick_cycles[i]) { - timer_cycles[i] -= timer_tick_cycles[i]; - cnt++; - } + // cnt = timer_cycles[i] / timer_tick_cycles[i]; + cnt = (1ULL * timer_cycles[i] * timer_tick_factor[i]) >> 32; + timer_cycles[i] -= timer_tick_cycles[i] * cnt; + if (timer_cycles[i] > timer_tick_cycles[i]) + timer_cycles[i] -= timer_tick_cycles[i], cnt++; + cnt += PREG8(pregs, 0x81); if (cnt >= 0x100) { int level = PREG8(pregs, 0xe3) >> 4; int vector = PREG8(pregs, 0xe4) & 0x7f; From 20d2358ab1f25608ee1ea067007e10a1e266b811 Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 11 Oct 2019 00:02:23 +0200 Subject: [PATCH 0225/1110] 32x, configurable pwm irq optimization to reduce pwm irq load --- pico/32x/pwm.c | 35 +++++++++++++++++++++++++++++------ pico/pico.h | 1 + platform/common/menu_pico.c | 1 + platform/common/menu_pico.h | 1 + 4 files changed, 32 insertions(+), 6 deletions(-) diff --git a/pico/32x/pwm.c b/pico/32x/pwm.c index 0aa2f586..3e5ce0ae 100644 --- a/pico/32x/pwm.c +++ b/pico/32x/pwm.c @@ -14,13 +14,18 @@ static struct { int irq_reload; int doing_fifo; int silent; + int irq_timer; + int irq_state; short current[2]; } pwm; +enum { PWM_IRQ_LOCKED, PWM_IRQ_STOPPED, PWM_IRQ_LOW, PWM_IRQ_HIGH }; + void p32x_pwm_ctl_changed(void) { int control = Pico32x.regs[0x30 / 2]; int cycles = Pico32x.regs[0x32 / 2]; + int pwm_irq_opt = PicoIn.opt & POPT_PWM_IRQ_OPT; cycles = (cycles - 1) & 0x0fff; pwm.cycles = cycles; @@ -31,8 +36,10 @@ void p32x_pwm_ctl_changed(void) if ((control & 0x0f) != 0) pwm.mult = 0x10000 / cycles; - pwm.irq_reload = (control & 0x0f00) >> 8; - pwm.irq_reload = ((pwm.irq_reload - 1) & 0x0f) + 1; + pwm.irq_timer = (control & 0x0f00) >> 8; + pwm.irq_timer = ((pwm.irq_timer - 1) & 0x0f) + 1; + pwm.irq_reload = pwm.irq_timer; + pwm.irq_state = pwm_irq_opt ? PWM_IRQ_STOPPED: PWM_IRQ_LOCKED; if (Pico32x.pwm_irq_cnt == 0) Pico32x.pwm_irq_cnt = pwm.irq_reload; @@ -104,6 +111,11 @@ static void consume_fifo_do(SH2 *sh2, unsigned int m68k_cycles, if (--Pico32x.pwm_irq_cnt == 0) { Pico32x.pwm_irq_cnt = pwm.irq_reload; do_pwm_irq(sh2, m68k_cycles); + } else if (Pico32x.pwm_p[1] == 0 && pwm.irq_state >= PWM_IRQ_LOW) { + // buffer underrun. Reduce reload rate if above programmed setting. + if (pwm.irq_reload > pwm.irq_timer) + pwm.irq_reload--; + pwm.irq_state = PWM_IRQ_LOW; } } Pico32x.pwm_cycle_p = m68k_cycles * 3 - sh2_cycles_diff; @@ -221,10 +233,22 @@ void p32x_pwm_write16(unsigned int a, unsigned int d, case 6/2: // R ch fifo = Pico32xMem->pwm_fifo[1]; idx = Pico32xMem->pwm_index[1]; - if (Pico32x.pwm_p[1] < 3) + if (Pico32x.pwm_p[1] < 3) { + if (pwm.irq_state == PWM_IRQ_STOPPED) + pwm.irq_state = PWM_IRQ_LOW; + if (Pico32x.pwm_p[1] == 2 && pwm.irq_state >= PWM_IRQ_LOW) { + // buffer full. If there was no buffer underrun after last fill, + // try increasing reload rate to reduce IRQs + if (pwm.irq_reload < 3 && pwm.irq_state == PWM_IRQ_HIGH) + pwm.irq_reload ++; + pwm.irq_state = PWM_IRQ_HIGH; + } Pico32x.pwm_p[1]++; - else { -// fifo[(idx+1) % 4] = fifo[idx]; + } else { + // buffer overflow. Some roms always fill the complete buffer even if + // reload rate is set below max. Lock reload rate to programmed setting. + pwm.irq_reload = pwm.irq_timer; + pwm.irq_state = PWM_IRQ_LOCKED; idx = (idx+1) % 4; Pico32xMem->pwm_index[0] = idx; } @@ -236,7 +260,6 @@ void p32x_pwm_write16(unsigned int a, unsigned int d, if (Pico32x.pwm_p[0] < 3) Pico32x.pwm_p[0]++; else { -// fifo[(idx+1) % 4] = fifo[idx]; idx = (idx+1) % 4; Pico32xMem->pwm_index[0] = idx; } diff --git a/pico/pico.h b/pico/pico.h index ac1550d4..a9359a18 100644 --- a/pico/pico.h +++ b/pico/pico.h @@ -72,6 +72,7 @@ extern void *p32x_bios_g, *p32x_bios_m, *p32x_bios_s; #define POPT_DIS_IDLE_DET (1<<19) #define POPT_EN_32X (1<<20) #define POPT_EN_PWM (1<<21) +#define POPT_PWM_IRQ_OPT (1<<22) #define PAHW_MCD (1<<0) #define PAHW_32X (1<<1) diff --git a/platform/common/menu_pico.c b/platform/common/menu_pico.c index 7b0cd78c..9fb31426 100644 --- a/platform/common/menu_pico.c +++ b/platform/common/menu_pico.c @@ -506,6 +506,7 @@ static menu_entry e_menu_adv_options[] = mee_onoff ("Disable frame limiter", MA_OPT2_NO_FRAME_LIMIT,currentConfig.EmuOpt, EOPT_NO_FRMLIMIT), mee_onoff ("Enable dynarecs", MA_OPT2_DYNARECS, PicoIn.opt, POPT_EN_DRC), mee_onoff ("Status line in main menu", MA_OPT2_STATUS_LINE, currentConfig.EmuOpt, EOPT_SHOW_RTC), + mee_onoff ("PWM IRQ optimization", MA_OPT2_PWM_IRQ_OPT, PicoIn.opt, POPT_PWM_IRQ_OPT), MENU_OPTIONS_ADV mee_end, }; diff --git a/platform/common/menu_pico.h b/platform/common/menu_pico.h index 595989e8..c626c772 100644 --- a/platform/common/menu_pico.h +++ b/platform/common/menu_pico.h @@ -58,6 +58,7 @@ typedef enum MA_OPT2_NO_SPRITE_LIM, MA_OPT2_NO_IDLE_LOOPS, MA_OPT2_OVERCLOCK_M68K, + MA_OPT2_PWM_IRQ_OPT, MA_OPT2_DONE, MA_OPT3_SCALE, /* psp (all OPT3) */ MA_OPT3_HSCALE32, From e7ee7bc00afc6b8167f1c916be15430f2873f240 Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 11 Oct 2019 00:06:50 +0200 Subject: [PATCH 0226/1110] 32x, improved auto frame skip, plus new config option for max auto skip --- platform/common/config_file.c | 4 ++++ platform/common/emu.c | 13 ++++++++++--- platform/common/emu.h | 1 + platform/common/menu_pico.c | 1 + platform/common/menu_pico.h | 1 + 5 files changed, 17 insertions(+), 3 deletions(-) diff --git a/platform/common/config_file.c b/platform/common/config_file.c index 1b5c5172..7248d239 100644 --- a/platform/common/config_file.c +++ b/platform/common/config_file.c @@ -322,6 +322,10 @@ static int custom_read(menu_entry *me, const char *var, const char *val) currentConfig.gamma = atoi(val); return 1; + case MA_OPT2_MAX_FRAMESKIP: + currentConfig.max_skip = atoi(val); + return 1; + /* PSP */ case MA_OPT3_SCALE: if (strcasecmp(var, "Scale factor") != 0) return 0; diff --git a/platform/common/emu.c b/platform/common/emu.c index da03bff1..15aa5392 100644 --- a/platform/common/emu.c +++ b/platform/common/emu.c @@ -596,6 +596,7 @@ void emu_prep_defconfig(void) defaultConfig.turbo_rate = 15; defaultConfig.msh2_khz = PICO_MSH2_HZ / 1000; defaultConfig.ssh2_khz = PICO_SSH2_HZ / 1000; + defaultConfig.max_skip = 4; // platform specific overrides pemu_prep_defconfig(); @@ -1463,10 +1464,16 @@ void emu_loop(void) else if (diff < -target_frametime_x3) { /* no time left for this frame - skip */ - /* limit auto frameskip to 8 */ - if (frames_done / 8 <= frames_shown) + /* limit auto frameskip to max_skip */ + if (fskip_cnt < currentConfig.max_skip) { + fskip_cnt++; skip = 1; - } + } + else { + fskip_cnt = 0; + } + } else + fskip_cnt = 0; // don't go in debt too much while (diff < -target_frametime_x3 * 3) { diff --git a/platform/common/emu.h b/platform/common/emu.h index 1e751f89..26e2159b 100644 --- a/platform/common/emu.h +++ b/platform/common/emu.h @@ -76,6 +76,7 @@ typedef struct _currentConfig_t { int msh2_khz; int ssh2_khz; int overclock_68k; + int max_skip; } currentConfig_t; extern currentConfig_t currentConfig, defaultConfig; diff --git a/platform/common/menu_pico.c b/platform/common/menu_pico.c index 9fb31426..dc7ceda4 100644 --- a/platform/common/menu_pico.c +++ b/platform/common/menu_pico.c @@ -506,6 +506,7 @@ static menu_entry e_menu_adv_options[] = mee_onoff ("Disable frame limiter", MA_OPT2_NO_FRAME_LIMIT,currentConfig.EmuOpt, EOPT_NO_FRMLIMIT), mee_onoff ("Enable dynarecs", MA_OPT2_DYNARECS, PicoIn.opt, POPT_EN_DRC), mee_onoff ("Status line in main menu", MA_OPT2_STATUS_LINE, currentConfig.EmuOpt, EOPT_SHOW_RTC), + mee_range ("Max auto frameskip", MA_OPT2_MAX_FRAMESKIP, currentConfig.max_skip, 1, 10), mee_onoff ("PWM IRQ optimization", MA_OPT2_PWM_IRQ_OPT, PicoIn.opt, POPT_PWM_IRQ_OPT), MENU_OPTIONS_ADV mee_end, diff --git a/platform/common/menu_pico.h b/platform/common/menu_pico.h index c626c772..4c0bbdd1 100644 --- a/platform/common/menu_pico.h +++ b/platform/common/menu_pico.h @@ -58,6 +58,7 @@ typedef enum MA_OPT2_NO_SPRITE_LIM, MA_OPT2_NO_IDLE_LOOPS, MA_OPT2_OVERCLOCK_M68K, + MA_OPT2_MAX_FRAMESKIP, MA_OPT2_PWM_IRQ_OPT, MA_OPT2_DONE, MA_OPT3_SCALE, /* psp (all OPT3) */ From a6c0ab7d99b9d015b0d75a4455b1bf4acb9c9d6d Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 12 Oct 2019 00:26:11 +0200 Subject: [PATCH 0227/1110] sh2 drc bugfix for aarch64/mips --- Makefile | 2 +- cpu/drc/emit_arm64.c | 2 +- cpu/drc/emit_mips.c | 4 ++-- cpu/drc/emit_x86.c | 2 +- pico/32x/memory.c | 20 ++++++++++---------- tools/mkoffsets.sh | 9 +++++---- 6 files changed, 20 insertions(+), 19 deletions(-) diff --git a/Makefile b/Makefile index 15549dca..a79c054b 100644 --- a/Makefile +++ b/Makefile @@ -225,7 +225,7 @@ endif pprof: platform/linux/pprof.c $(CC) $(CFLAGS) -O2 -ggdb -DPPROF -DPPROF_TOOL -I../../ -I. $^ -o $@ $(LDFLAGS) $(LDLIBS) -pico/pico_int_offs.h:: tools/mkoffsets.sh +pico/pico_int_offs.h: tools/mkoffsets.sh make -C tools/ XCC="$(CC)" XCFLAGS="$(CFLAGS)" .s.o: diff --git a/cpu/drc/emit_arm64.c b/cpu/drc/emit_arm64.c index 3ef402b4..4bad6469 100644 --- a/cpu/drc/emit_arm64.c +++ b/cpu/drc/emit_arm64.c @@ -979,7 +979,7 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) #define emith_save_caller_regs(mask) do { \ int _c, _r1, _r2; u32 _m = mask & 0x3ffff; \ if (__builtin_parity(_m) == 1) _m |= 0x40000; /* hardware align */ \ - for (_c = HOST_REGS, _r1 = -1; _m && _c >= 0; _m &= ~(1 << _c), _c--) \ + for (_c = HOST_REGS-1, _r1 = -1; _m && _c >= 0; _m &= ~(1 << _c), _c--)\ if (_m & (1 << _c)) { \ _r2 = _r1, _r1 = _c; \ if (_r2 != -1) { \ diff --git a/cpu/drc/emit_mips.c b/cpu/drc/emit_mips.c index 4a452a68..38d68f40 100644 --- a/cpu/drc/emit_mips.c +++ b/cpu/drc/emit_mips.c @@ -1065,7 +1065,7 @@ static void emith_lohi_nops(void) if (__builtin_parity(_m) == 1) _m |= 0x1; /* ABI align */ \ int _s = count_bits(_m) * 4, _o = _s; \ if (_s) emith_sub_r_imm(SP, _s); \ - for (_c = HOST_REGS; _m && _c >= 0; _m &= ~(1 << _c), _c--) \ + for (_c = HOST_REGS-1; _m && _c >= 0; _m &= ~(1 << _c), _c--) \ if (_m & (1 << _c)) \ { _o -= 4; if (_c) emith_write_r_r_offs(_c, SP, _o); } \ } while (0) @@ -1279,7 +1279,7 @@ static int emith_cond_check(int cond, int *r) if (__builtin_parity(_m) == 1) _m |= 0x1; /* ABI align for SP is 8 */ \ int _s = count_bits(_m) * 4 + 16, _o = _s; /* 16 byte arg save area */ \ if (_s) emith_sub_r_imm(SP, _s); \ - for (_c = HOST_REGS; _m && _c >= 0; _m &= ~(1 << _c), _c--) \ + for (_c = HOST_REGS-1; _m && _c >= 0; _m &= ~(1 << _c), _c--) \ if (_m & (1 << _c)) \ { _o -= 4; if (_c) emith_write_r_r_offs(_c, SP, _o); } \ } while (0) diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index 44e10ecf..212a12c5 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -1115,7 +1115,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common #define emith_save_caller_regs(mask) do { \ int _c; u32 _m = mask & 0xfc7; /* AX, CX, DX, SI, DI, 8, 9, 10, 11 */ \ if (__builtin_parity(_m) == 1) _m |= 0x8; /* BX for ABI align */ \ - for (_c = HOST_REGS; _m && _c >= 0; _m &= ~(1 << _c), _c--) \ + for (_c = HOST_REGS-1; _m && _c >= 0; _m &= ~(1 << _c), _c--) \ if (_m & (1 << _c)) emith_push(_c); \ } while (0) diff --git a/pico/32x/memory.c b/pico/32x/memory.c index e139910a..60820e1a 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -347,7 +347,7 @@ static u32 p32x_reg_read16(u32 a) if ((a & 0x30) == 0x20) { unsigned int cycles = SekCyclesDone(); - if (cycles - msh2.m68krcycles_done > 244) + if (CYCLES_GT(cycles - msh2.m68krcycles_done, 244)) p32x_sync_sh2s(cycles); if (m68k_poll_detect(a, cycles, P32XF_68KCPOLL)) { @@ -360,7 +360,7 @@ static u32 p32x_reg_read16(u32 a) if (a == 2) { // INTM, INTS unsigned int cycles = SekCyclesDone(); - if (cycles - msh2.m68krcycles_done > 64) + if (CYCLES_GT(cycles - msh2.m68krcycles_done, 64)) p32x_sync_sh2s(cycles); goto out; } @@ -420,7 +420,7 @@ static void p32x_reg_write8(u32 a, u32 d) return; case 0x03: // irq ctl if ((d ^ r[0x02 / 2]) & 3) { - int cycles = SekCyclesDone(); + unsigned int cycles = SekCyclesDone(); p32x_sync_sh2s(cycles); r[0x02 / 2] = d & 3; p32x_update_cmd_irq(NULL, cycles); @@ -610,9 +610,9 @@ static void p32x_reg_write16(u32 a, u32 d) case 0x2c/2: case 0x2e/2: if (r[a / 2] != d) { - int cycles = SekCyclesDone(); + unsigned int cycles = SekCyclesDone(); - if (cycles - (int)msh2.m68krcycles_done > 30) + if (CYCLES_GT(cycles - msh2.m68krcycles_done, 64)) p32x_sync_sh2s(cycles); r[a / 2] = d; @@ -712,7 +712,7 @@ static void p32x_vdp_write16(u32 a, u32 d, SH2 *sh2) } Pico32x.vdp_regs[0x06 / 2] = a; Pico32x.vdp_regs[0x08 / 2] = d; - if (sh2 != NULL && len > 4) { + if (sh2 != NULL && len > 8) { Pico32x.vdp_regs[0x0a / 2] |= P32XV_nFEN; // supposedly takes 3 bus/6 sh2 cycles? or 3 sh2 cycles? p32x_event_schedule_sh2(sh2, P32X_EVENT_FILLEND, 3 + len); @@ -824,8 +824,8 @@ static void p32x_sh2reg_write8(u32 a, u32 d, SH2 *sh2) if (Pico32x.sh2_regs[4 / 2] != d) { unsigned int cycles = sh2_cycles_done_m68k(sh2); Pico32x.sh2_regs[4 / 2] = d; - sh2_end_run(sh2, 4); p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles); + sh2_end_run(sh2, 4); sh2_poll_write(a & ~1, d, cycles, sh2); } return; @@ -849,9 +849,9 @@ static void p32x_sh2reg_write8(u32 a, u32 d, SH2 *sh2) unsigned int cycles = sh2_cycles_done_m68k(sh2); REG8IN16(r, a) = d; - sh2_end_run(sh2, 1); p32x_m68k_poll_event(P32XF_68KCPOLL); p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles); + sh2_end_run(sh2, 1); sh2_poll_write(a & ~1, r[a / 2], cycles, sh2); } return; @@ -941,9 +941,9 @@ static void p32x_sh2reg_write16(u32 a, u32 d, SH2 *sh2) unsigned int cycles = sh2_cycles_done_m68k(sh2); Pico32x.regs[a / 2] = d; - sh2_end_run(sh2, 1); p32x_m68k_poll_event(P32XF_68KCPOLL); p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles); + sh2_end_run(sh2, 1); sh2_poll_write(a, d, cycles, sh2); } return; @@ -1574,10 +1574,10 @@ static void NOINLINE sh2_sdram_poll(u32 a, u32 d, SH2 *sh2) unsigned cycles; DRC_SAVE_SR(sh2); - sh2_end_run(sh2, 1); cycles = sh2_cycles_done_m68k(sh2); sh2_poll_write(a, d, cycles, sh2); p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_RPOLL, cycles); + sh2_end_run(sh2, 1); DRC_RESTORE_SR(sh2); } diff --git a/tools/mkoffsets.sh b/tools/mkoffsets.sh index 8f2d888c..2223b804 100755 --- a/tools/mkoffsets.sh +++ b/tools/mkoffsets.sh @@ -11,10 +11,10 @@ ENDIAN= # compile with target C compiler and extract value from .rodata section compile_rodata () { - # $CC $CFLAGS -I .. -shared /tmp/getoffs.c -o /tmp/getoffs.o || exit 1 - echo 'void dummy(void) { asm(""::"r" (&val)); }' >> /tmp/getoffs.c - $CC $CFLAGS -I .. -nostdlib -Wl,-edummy /tmp/getoffs.c \ - -o /tmp/getoffs.o || exit 1 + $CC $CFLAGS -I .. -c /tmp/getoffs.c -o /tmp/getoffs.o || exit 1 + # echo 'void dummy(void) { asm(""::"r" (&val)); }' >> /tmp/getoffs.c + # $CC $CFLAGS -I .. -nostdlib -Wl,-edummy /tmp/getoffs.c \ + # -o /tmp/getoffs.o || exit 1 # find the name of the .rodata section (in case -fdata-sections is used) rosect=$(readelf -S /tmp/getoffs.o | grep '\.rodata' | sed 's/^[^.]*././;s/ .*//') @@ -48,6 +48,7 @@ get_define () # prefix struct member member... line=$(printf "#define %-20s 0x%04x" $prefix$name $rodata) } +CFLAGS="$CFLAGS -fno-lto" # determine endianess echo "const int val = 1;" >/tmp/getoffs.c compile_rodata From 6b9ded20a0d44d2b8b44ae59e06d3952d0a2c8dd Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 12 Oct 2019 11:10:28 +0200 Subject: [PATCH 0228/1110] sh2 drc: bugfix in block management --- cpu/sh2/compiler.c | 4 +--- pico/32x/memory.c | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 2c1e8cff..b7c57b3d 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -769,8 +769,7 @@ static void rm_block_list(struct block_list **blist) struct block_list *next, *current = *blist; while (current != NULL) { next = current->next; - current->next = blist_free; - blist_free = current; + rm_from_block_lists(current->block); current = next; } *blist = NULL; @@ -5441,7 +5440,6 @@ int sh2_drc_init(SH2 *sh2) block_tables[i] = calloc(BLOCK_MAX_COUNT(i), sizeof(*block_tables[0])); if (block_tables[i] == NULL) goto fail; - // max 2 block links (exits) per block block_link_pool[i] = calloc(BLOCK_LINK_MAX_COUNT(i), sizeof(*block_link_pool[0])); if (block_link_pool[i] == NULL) diff --git a/pico/32x/memory.c b/pico/32x/memory.c index 60820e1a..06215a7c 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -513,9 +513,9 @@ static void p32x_reg_write8(u32 a, u32 d) case 0x2e: case 0x2f: if (REG8IN16(r, a) != d) { - int cycles = SekCyclesDone(); + unsigned int cycles = SekCyclesDone(); - if (cycles - (int)msh2.m68krcycles_done > 30) + if (CYCLES_GT(cycles - msh2.m68krcycles_done, 64)) p32x_sync_sh2s(cycles); REG8IN16(r, a) = d; From b10a782a36e887603fc283874363a81bfb57e738 Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 12 Oct 2019 11:19:55 +0200 Subject: [PATCH 0229/1110] sh2 drc: bugfix in block management --- cpu/sh2/compiler.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index b7c57b3d..86d4b85a 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -766,13 +766,8 @@ static void rm_from_block_lists(struct block_desc *block) static void rm_block_list(struct block_list **blist) { - struct block_list *next, *current = *blist; - while (current != NULL) { - next = current->next; - rm_from_block_lists(current->block); - current = next; - } - *blist = NULL; + while (*blist != NULL) + rm_from_block_lists((*blist)->block); } static void REGPARM(1) flush_tcache(int tcid) From 52055c13b253cce969a24fa2b95eb9c39ac7ea79 Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 17 Oct 2019 21:54:37 +0200 Subject: [PATCH 0230/1110] sh2 drc: reorganised block mgmt code, plus some small scale optimisations --- cpu/sh2/compiler.c | 721 ++++++++++++++++++------------------ cpu/sh2/compiler.h | 4 +- cpu/sh2/sh2.h | 2 +- pico/32x/memory.c | 56 +-- pico/32x/memory_arm.S | 15 +- pico/pico_int.h | 4 + platform/gp2x/PicoDrive.gpe | 2 + 7 files changed, 410 insertions(+), 394 deletions(-) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 86d4b85a..1acc7215 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -764,58 +764,16 @@ static void rm_from_block_lists(struct block_desc *block) block->list = NULL; } -static void rm_block_list(struct block_list **blist) +static void discard_block_list(struct block_list **blist) { - while (*blist != NULL) - rm_from_block_lists((*blist)->block); -} - -static void REGPARM(1) flush_tcache(int tcid) -{ - int i; -#if (DRC_DEBUG & 1) - int tc_used, bl_used; - - tc_used = tcache_sizes[tcid] - (tcache_limit[tcid] - tcache_ptrs[tcid]); - bl_used = BLOCK_MAX_COUNT(tcid) - (block_limit[tcid] - block_counts[tcid]); - elprintf(EL_STATUS, "tcache #%d flush! (%d/%d, bds %d/%d)", tcid, tc_used, - tcache_sizes[tcid], bl_used, BLOCK_MAX_COUNT(tcid)); -#endif - - block_counts[tcid] = 0; - block_limit[tcid] = BLOCK_MAX_COUNT(tcid) - 1; - block_link_pool_counts[tcid] = 0; - blink_free[tcid] = NULL; - memset(unresolved_links[tcid], 0, sizeof(*unresolved_links[0]) * HASH_TABLE_SIZE(tcid)); - memset(hash_tables[tcid], 0, sizeof(*hash_tables[0]) * HASH_TABLE_SIZE(tcid)); - tcache_ptrs[tcid] = tcache_bases[tcid]; - tcache_limit[tcid] = tcache_bases[tcid] + tcache_sizes[tcid]; - if (Pico32xMem->sdram != NULL) { - if (tcid == 0) { // ROM, RAM - memset(Pico32xMem->drcblk_ram, 0, sizeof(Pico32xMem->drcblk_ram)); - memset(Pico32xMem->drclit_ram, 0, sizeof(Pico32xMem->drclit_ram)); - memset(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache)); - memset(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache)); - memset(sh2s[0].rts_cache, -1, sizeof(sh2s[0].rts_cache)); - memset(sh2s[1].rts_cache, -1, sizeof(sh2s[1].rts_cache)); - sh2s[0].rts_cache_idx = sh2s[1].rts_cache_idx = 0; - } else { - memset(Pico32xMem->drcblk_ram, 0, sizeof(Pico32xMem->drcblk_ram)); - memset(Pico32xMem->drclit_ram, 0, sizeof(Pico32xMem->drclit_ram)); - memset(Pico32xMem->drcblk_da[tcid - 1], 0, sizeof(Pico32xMem->drcblk_da[tcid - 1])); - memset(Pico32xMem->drclit_da[tcid - 1], 0, sizeof(Pico32xMem->drclit_da[tcid - 1])); - memset(sh2s[tcid - 1].branch_cache, -1, sizeof(sh2s[0].branch_cache)); - memset(sh2s[tcid - 1].rts_cache, -1, sizeof(sh2s[0].rts_cache)); - sh2s[tcid - 1].rts_cache_idx = 0; - } + struct block_list *next, *current = *blist; + while (current != NULL) { + next = current->next; + current->next = blist_free; + blist_free = current; + current = next; } -#if (DRC_DEBUG & 4) - tcache_dsm_ptrs[tcid] = tcache_bases[tcid]; -#endif - - for (i = 0; i < RAM_SIZE(tcid) / INVAL_PAGE_SIZE; i++) - rm_block_list(&inval_lookup[tcid][i]); - rm_block_list(&inactive_blocks[tcid]); + *blist = NULL; } static void add_to_hashlist(struct block_entry *be, int tcache_id) @@ -902,243 +860,6 @@ static void rm_from_hashlist_unresolved(struct block_link *bl, int tcache_id) bl->next->prev = bl->prev; } -static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 nolit, int free); -static void dr_free_oldest_block(int tcache_id) -{ - struct block_desc *bd; - - if (block_limit[tcache_id] >= BLOCK_MAX_COUNT(tcache_id)) { - // block desc wrap around - block_limit[tcache_id] = 0; - } - bd = &block_tables[tcache_id][block_limit[tcache_id]]; - - if (bd->tcache_ptr && bd->tcache_ptr < tcache_ptrs[tcache_id]) { - // cache wrap around - tcache_ptrs[tcache_id] = bd->tcache_ptr; - } - - if (bd->addr && bd->entry_count) - sh2_smc_rm_block_entry(bd, tcache_id, 0, 1); - - block_limit[tcache_id]++; - if (block_limit[tcache_id] >= BLOCK_MAX_COUNT(tcache_id)) - block_limit[tcache_id] = 0; - bd = &block_tables[tcache_id][block_limit[tcache_id]]; - if (bd->tcache_ptr >= tcache_ptrs[tcache_id]) - tcache_limit[tcache_id] = bd->tcache_ptr; - else - tcache_limit[tcache_id] = tcache_bases[tcache_id] + tcache_sizes[tcache_id]; -} - -static u8 *dr_prepare_cache(int tcache_id, int insn_count) -{ - u8 *limit = tcache_limit[tcache_id]; - - // if no block desc available - if (block_counts[tcache_id] == block_limit[tcache_id]) - dr_free_oldest_block(tcache_id); - - // while not enough cache space left (limit - tcache_ptr < max space needed) - while (tcache_limit[tcache_id] - tcache_ptrs[tcache_id] < insn_count * 128) - dr_free_oldest_block(tcache_id); - - if (limit != tcache_limit[tcache_id]) { -#if BRANCH_CACHE - if (tcache_id) - memset32(sh2s[tcache_id-1].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4); - else { - memset32(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4); - memset32(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache)/4); - } -#endif -#if CALL_STACK - if (tcache_id) { - memset32(sh2s[tcache_id-1].rts_cache, -1, sizeof(sh2s[0].rts_cache)/4); - sh2s[tcache_id-1].rts_cache_idx = 0; - } else { - memset32(sh2s[0].rts_cache, -1, sizeof(sh2s[0].rts_cache)/4); - memset32(sh2s[1].rts_cache, -1, sizeof(sh2s[1].rts_cache)/4); - sh2s[0].rts_cache_idx = sh2s[1].rts_cache_idx = 0; - } -#endif - } - return (u8 *)tcache_ptrs[tcache_id]; -} - -static void dr_mark_memory(int mark, struct block_desc *block, int tcache_id, u32 nolit) -{ - u8 *drc_ram_blk = NULL, *lit_ram_blk = NULL; - u32 addr, end, mask = 0, shift = 0, idx; - - // mark memory blocks as containing compiled code - if ((block->addr & 0xc7fc0000) == 0x06000000 - || (block->addr & 0xfffff000) == 0xc0000000) - { - if (tcache_id != 0) { - // data array - drc_ram_blk = Pico32xMem->drcblk_da[tcache_id-1]; - lit_ram_blk = Pico32xMem->drclit_da[tcache_id-1]; - shift = SH2_DRCBLK_DA_SHIFT; - } - else { - // SDRAM - drc_ram_blk = Pico32xMem->drcblk_ram; - lit_ram_blk = Pico32xMem->drclit_ram; - shift = SH2_DRCBLK_RAM_SHIFT; - } - mask = RAM_SIZE(tcache_id) - 1; - - // mark recompiled insns - addr = block->addr & ~((1 << shift) - 1); - end = block->addr + block->size; - for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift)) - drc_ram_blk[idx++] += mark; - - // mark literal pool - if (addr < (block->addr_lit & ~((1 << shift) - 1))) - addr = block->addr_lit & ~((1 << shift) - 1); - end = block->addr_lit + block->size_lit; - for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift)) - drc_ram_blk[idx++] += mark; - - // mark for literals disabled - if (nolit) { - addr = nolit & ~((1 << shift) - 1); - end = block->addr_lit + block->size_lit; - for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift)) - lit_ram_blk[idx++] = 1; - } - - if (mark < 0) - rm_from_block_lists(block); - else { - // add to invalidation lookup lists - addr = block->addr & ~(INVAL_PAGE_SIZE - 1); - end = block->addr + block->size; - for (idx = (addr & mask) / INVAL_PAGE_SIZE; addr < end; addr += INVAL_PAGE_SIZE) - add_to_block_list(&inval_lookup[tcache_id][idx++], block); - - if (addr < (block->addr_lit & ~(INVAL_PAGE_SIZE - 1))) - addr = block->addr_lit & ~(INVAL_PAGE_SIZE - 1); - end = block->addr_lit + block->size_lit; - for (idx = (addr & mask) / INVAL_PAGE_SIZE; addr < end; addr += INVAL_PAGE_SIZE) - add_to_block_list(&inval_lookup[tcache_id][idx++], block); - } - } -} - -static u32 dr_check_nolit(u32 start, u32 end, int tcache_id) -{ - u8 *lit_ram_blk = NULL; - u32 mask = 0, shift = 0, addr, idx; - - if ((start & 0xc7fc0000) == 0x06000000 - || (start & 0xfffff000) == 0xc0000000) - { - if (tcache_id != 0) { - // data array - lit_ram_blk = Pico32xMem->drclit_da[tcache_id-1]; - shift = SH2_DRCBLK_DA_SHIFT; - } - else { - // SDRAM - lit_ram_blk = Pico32xMem->drclit_ram; - shift = SH2_DRCBLK_RAM_SHIFT; - } - mask = RAM_SIZE(tcache_id) - 1; - - addr = start & ~((1 << shift) - 1); - for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift)) - if (lit_ram_blk[idx++]) - break; - - return (addr < start ? start : addr > end ? end : addr); - } - - return end; -} - -static struct block_desc *dr_find_inactive_block(int tcache_id, u16 crc, - u32 addr, int size, u32 addr_lit, int size_lit) -{ - struct block_list **head = &inactive_blocks[tcache_id]; - struct block_list *current; - - for (current = *head; current != NULL; current = current->next) { - struct block_desc *block = current->block; - if (block->crc == crc && block->addr == addr && block->size == size && - block->addr_lit == addr_lit && block->size_lit == size_lit) - { - rm_from_block_lists(block); - return block; - } - } - return NULL; -} - -static struct block_desc *dr_add_block(u32 addr, int size, - u32 addr_lit, int size_lit, u16 crc, int is_slave, int *blk_id) -{ - struct block_entry *be; - struct block_desc *bd; - int tcache_id; - int *bcount; - - // do a lookup to get tcache_id and override check - be = dr_get_entry(addr, is_slave, &tcache_id); - if (be != NULL) - dbg(1, "block override for %08x", addr); - - bcount = &block_counts[tcache_id]; - if (*bcount == block_limit[tcache_id]) { - dbg(1, "bd overflow for tcache %d", tcache_id); - return NULL; - } - - bd = &block_tables[tcache_id][*bcount]; - bd->addr = addr; - bd->size = size; - bd->addr_lit = addr_lit; - bd->size_lit = size_lit; - bd->tcache_ptr = tcache_ptr; - bd->crc = crc; - bd->active = 0; - bd->entry_count = 0; -#if (DRC_DEBUG & 2) - bd->refcount = 0; -#endif - - *blk_id = *bcount; - (*bcount)++; - if (*bcount >= BLOCK_MAX_COUNT(tcache_id)) - *bcount = 0; - - return bd; -} - -static void REGPARM(3) *dr_lookup_block(u32 pc, SH2 *sh2, int *tcache_id) -{ - struct block_entry *be = NULL; - void *block = NULL; - - be = dr_get_entry(pc, sh2->is_slave, tcache_id); - if (be != NULL) - block = be->tcache_ptr; - -#if (DRC_DEBUG & 2) - if (be != NULL) - be->block->refcount++; -#endif - return block; -} - -static void *dr_failure(void) -{ - lprintf("recompilation failed\n"); - exit(1); -} - #if LINK_BRANCHES static void dr_block_link(struct block_entry *be, struct block_link *bl, int emit_jump) { @@ -1262,6 +983,212 @@ static struct block_link *dr_prepare_ext_branch(struct block_entry *owner, u32 p #endif } +static void dr_mark_memory(int mark, struct block_desc *block, int tcache_id, u32 nolit) +{ + u8 *drc_ram_blk = NULL, *lit_ram_blk = NULL; + u32 addr, end, mask = 0, shift = 0, idx; + + // mark memory blocks as containing compiled code + if ((block->addr & 0xc7fc0000) == 0x06000000 + || (block->addr & 0xfffff000) == 0xc0000000) + { + if (tcache_id != 0) { + // data array + drc_ram_blk = Pico32xMem->drcblk_da[tcache_id-1]; + lit_ram_blk = Pico32xMem->drclit_da[tcache_id-1]; + shift = SH2_DRCBLK_DA_SHIFT; + } + else { + // SDRAM + drc_ram_blk = Pico32xMem->drcblk_ram; + lit_ram_blk = Pico32xMem->drclit_ram; + shift = SH2_DRCBLK_RAM_SHIFT; + } + mask = RAM_SIZE(tcache_id) - 1; + + // mark recompiled insns + addr = block->addr & ~((1 << shift) - 1); + end = block->addr + block->size; + for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift)) + drc_ram_blk[idx++] += mark; + + // mark literal pool + if (addr < (block->addr_lit & ~((1 << shift) - 1))) + addr = block->addr_lit & ~((1 << shift) - 1); + end = block->addr_lit + block->size_lit; + for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift)) + drc_ram_blk[idx++] += mark; + + // mark for literals disabled + if (nolit) { + addr = nolit & ~((1 << shift) - 1); + end = block->addr_lit + block->size_lit; + for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift)) + lit_ram_blk[idx++] = 1; + } + + if (mark < 0) + rm_from_block_lists(block); + else { + // add to invalidation lookup lists + addr = block->addr & ~(INVAL_PAGE_SIZE - 1); + end = block->addr + block->size; + for (idx = (addr & mask) / INVAL_PAGE_SIZE; addr < end; addr += INVAL_PAGE_SIZE) + add_to_block_list(&inval_lookup[tcache_id][idx++], block); + + if (addr < (block->addr_lit & ~(INVAL_PAGE_SIZE - 1))) + addr = block->addr_lit & ~(INVAL_PAGE_SIZE - 1); + end = block->addr_lit + block->size_lit; + for (idx = (addr & mask) / INVAL_PAGE_SIZE; addr < end; addr += INVAL_PAGE_SIZE) + add_to_block_list(&inval_lookup[tcache_id][idx++], block); + } + } +} + +static u32 dr_check_nolit(u32 start, u32 end, int tcache_id) +{ + u8 *lit_ram_blk = NULL; + u32 mask = 0, shift = 0, addr, idx; + + if ((start & 0xc7fc0000) == 0x06000000 + || (start & 0xfffff000) == 0xc0000000) + { + if (tcache_id != 0) { + // data array + lit_ram_blk = Pico32xMem->drclit_da[tcache_id-1]; + shift = SH2_DRCBLK_DA_SHIFT; + } + else { + // SDRAM + lit_ram_blk = Pico32xMem->drclit_ram; + shift = SH2_DRCBLK_RAM_SHIFT; + } + mask = RAM_SIZE(tcache_id) - 1; + + addr = start & ~((1 << shift) - 1); + for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift)) + if (lit_ram_blk[idx++]) + break; + + return (addr < start ? start : addr > end ? end : addr); + } + + return end; +} + +static void dr_rm_block_entry(struct block_desc *bd, int tcache_id, u32 nolit, int free) +{ + struct block_link *bl; + u32 i; + + free = free || nolit; // block is invalid if literals are overwritten + dbg(2," %sing block %08x-%08x,%08x-%08x, blkid %d,%d", free?"delet":"disabl", + bd->addr, bd->addr + bd->size, bd->addr_lit, bd->addr_lit + bd->size_lit, + tcache_id, bd - block_tables[tcache_id]); + if (bd->addr == 0 || bd->entry_count == 0) { + dbg(1, " killing dead block!? %08x", bd->addr); + return; + } + +#if LINK_BRANCHES + // remove from hash table, make incoming links unresolved + if (bd->active) { + for (i = 0; i < bd->entry_count; i++) { + rm_from_hashlist(&bd->entryp[i], tcache_id); + + while ((bl = bd->entryp[i].links) != NULL) { + dr_block_unlink(bl, 1); + add_to_hashlist_unresolved(bl, tcache_id); + } + } + + dr_mark_memory(-1, bd, tcache_id, nolit); + add_to_block_list(&inactive_blocks[tcache_id], bd); + } + bd->active = 0; +#endif + + if (free) { +#if LINK_BRANCHES + // revoke outgoing links + for (bl = bd->entryp[0].o_links; bl != NULL; bl = bl->o_next) { + if (bl->target) + dr_block_unlink(bl, 0); + else + rm_from_hashlist_unresolved(bl, tcache_id); + bl->jump = NULL; + bl->next = blink_free[bl->tcache_id]; + blink_free[bl->tcache_id] = bl; + } + bd->entryp[0].o_links = NULL; +#endif + // invalidate block + rm_from_block_lists(bd); + bd->addr = bd->size = bd->addr_lit = bd->size_lit = 0; + bd->entry_count = 0; + } + emith_update_cache(); +} + +static struct block_desc *dr_find_inactive_block(int tcache_id, u16 crc, + u32 addr, int size, u32 addr_lit, int size_lit) +{ + struct block_list **head = &inactive_blocks[tcache_id]; + struct block_list *current; + + for (current = *head; current != NULL; current = current->next) { + struct block_desc *block = current->block; + if (block->crc == crc && block->addr == addr && block->size == size && + block->addr_lit == addr_lit && block->size_lit == size_lit) + { + rm_from_block_lists(block); + return block; + } + } + return NULL; +} + +static struct block_desc *dr_add_block(u32 addr, int size, + u32 addr_lit, int size_lit, u16 crc, int is_slave, int *blk_id) +{ + struct block_entry *be; + struct block_desc *bd; + int tcache_id; + int *bcount; + + // do a lookup to get tcache_id and override check + be = dr_get_entry(addr, is_slave, &tcache_id); + if (be != NULL) + dbg(1, "block override for %08x", addr); + + bcount = &block_counts[tcache_id]; + if (*bcount == block_limit[tcache_id]) { + dbg(1, "bd overflow for tcache %d", tcache_id); + return NULL; + } + + bd = &block_tables[tcache_id][*bcount]; + bd->addr = addr; + bd->size = size; + bd->addr_lit = addr_lit; + bd->size_lit = size_lit; + bd->tcache_ptr = tcache_ptr; + bd->crc = crc; + bd->active = 0; + bd->list = NULL; + bd->entry_count = 0; +#if (DRC_DEBUG & 2) + bd->refcount = 0; +#endif + + *blk_id = *bcount; + (*bcount)++; + if (*bcount >= BLOCK_MAX_COUNT(tcache_id)) + *bcount = 0; + + return bd; +} + static void dr_link_blocks(struct block_entry *be, int tcache_id) { #if LINK_BRANCHES @@ -1321,6 +1248,139 @@ static void dr_activate_block(struct block_desc *bd, int tcache_id, int is_slave bd->active = 1; } +static void REGPARM(3) ALIGNED(32) *dr_lookup_block(u32 pc, SH2 *sh2, int *tcache_id) +{ + struct block_entry *be = NULL; + void *block = NULL; + + be = dr_get_entry(pc, sh2->is_slave, tcache_id); + if (be != NULL) + block = be->tcache_ptr; + +#if (DRC_DEBUG & 2) + if (be != NULL) + be->block->refcount++; +#endif + return block; +} + +static void dr_free_oldest_block(int tcache_id) +{ + struct block_desc *bd; + + if (block_limit[tcache_id] >= BLOCK_MAX_COUNT(tcache_id)) { + // block desc wrap around + block_limit[tcache_id] = 0; + } + bd = &block_tables[tcache_id][block_limit[tcache_id]]; + + if (bd->tcache_ptr && bd->tcache_ptr < tcache_ptrs[tcache_id]) { + // cache wrap around + tcache_ptrs[tcache_id] = bd->tcache_ptr; + } + + if (bd->addr && bd->entry_count) + dr_rm_block_entry(bd, tcache_id, 0, 1); + + block_limit[tcache_id]++; + if (block_limit[tcache_id] >= BLOCK_MAX_COUNT(tcache_id)) + block_limit[tcache_id] = 0; + bd = &block_tables[tcache_id][block_limit[tcache_id]]; + if (bd->tcache_ptr >= tcache_ptrs[tcache_id]) + tcache_limit[tcache_id] = bd->tcache_ptr; + else + tcache_limit[tcache_id] = tcache_bases[tcache_id] + tcache_sizes[tcache_id]; +} + +static u8 *dr_prepare_cache(int tcache_id, int insn_count) +{ + u8 *limit = tcache_limit[tcache_id]; + + // if no block desc available + if (block_counts[tcache_id] == block_limit[tcache_id]) + dr_free_oldest_block(tcache_id); + + // while not enough cache space left (limit - tcache_ptr < max space needed) + while (tcache_limit[tcache_id] - tcache_ptrs[tcache_id] < insn_count * 128) + dr_free_oldest_block(tcache_id); + + if (limit != tcache_limit[tcache_id]) { +#if BRANCH_CACHE + if (tcache_id) + memset32(sh2s[tcache_id-1].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4); + else { + memset32(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4); + memset32(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache)/4); + } +#endif +#if CALL_STACK + if (tcache_id) { + memset32(sh2s[tcache_id-1].rts_cache, -1, sizeof(sh2s[0].rts_cache)/4); + sh2s[tcache_id-1].rts_cache_idx = 0; + } else { + memset32(sh2s[0].rts_cache, -1, sizeof(sh2s[0].rts_cache)/4); + memset32(sh2s[1].rts_cache, -1, sizeof(sh2s[1].rts_cache)/4); + sh2s[0].rts_cache_idx = sh2s[1].rts_cache_idx = 0; + } +#endif + } + return (u8 *)tcache_ptrs[tcache_id]; +} + +static void dr_flush_tcache(int tcid) +{ + int i; +#if (DRC_DEBUG & 1) + int tc_used, bl_used; + + tc_used = tcache_sizes[tcid] - (tcache_limit[tcid] - tcache_ptrs[tcid]); + bl_used = BLOCK_MAX_COUNT(tcid) - (block_limit[tcid] - block_counts[tcid]); + elprintf(EL_STATUS, "tcache #%d flush! (%d/%d, bds %d/%d)", tcid, tc_used, + tcache_sizes[tcid], bl_used, BLOCK_MAX_COUNT(tcid)); +#endif + + block_counts[tcid] = 0; + block_limit[tcid] = BLOCK_MAX_COUNT(tcid) - 1; + block_link_pool_counts[tcid] = 0; + blink_free[tcid] = NULL; + memset(unresolved_links[tcid], 0, sizeof(*unresolved_links[0]) * HASH_TABLE_SIZE(tcid)); + memset(hash_tables[tcid], 0, sizeof(*hash_tables[0]) * HASH_TABLE_SIZE(tcid)); + tcache_ptrs[tcid] = tcache_bases[tcid]; + tcache_limit[tcid] = tcache_bases[tcid] + tcache_sizes[tcid]; + if (Pico32xMem->sdram != NULL) { + if (tcid == 0) { // ROM, RAM + memset(Pico32xMem->drcblk_ram, 0, sizeof(Pico32xMem->drcblk_ram)); + memset(Pico32xMem->drclit_ram, 0, sizeof(Pico32xMem->drclit_ram)); + memset(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache)); + memset(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache)); + memset(sh2s[0].rts_cache, -1, sizeof(sh2s[0].rts_cache)); + memset(sh2s[1].rts_cache, -1, sizeof(sh2s[1].rts_cache)); + sh2s[0].rts_cache_idx = sh2s[1].rts_cache_idx = 0; + } else { + memset(Pico32xMem->drcblk_ram, 0, sizeof(Pico32xMem->drcblk_ram)); + memset(Pico32xMem->drclit_ram, 0, sizeof(Pico32xMem->drclit_ram)); + memset(Pico32xMem->drcblk_da[tcid - 1], 0, sizeof(Pico32xMem->drcblk_da[tcid - 1])); + memset(Pico32xMem->drclit_da[tcid - 1], 0, sizeof(Pico32xMem->drclit_da[tcid - 1])); + memset(sh2s[tcid - 1].branch_cache, -1, sizeof(sh2s[0].branch_cache)); + memset(sh2s[tcid - 1].rts_cache, -1, sizeof(sh2s[0].rts_cache)); + sh2s[tcid - 1].rts_cache_idx = 0; + } + } +#if (DRC_DEBUG & 4) + tcache_dsm_ptrs[tcid] = tcache_bases[tcid]; +#endif + + for (i = 0; i < RAM_SIZE(tcid) / INVAL_PAGE_SIZE; i++) + discard_block_list(&inval_lookup[tcid][i]); + discard_block_list(&inactive_blocks[tcid]); +} + +static void *dr_failure(void) +{ + lprintf("recompilation failed\n"); + exit(1); +} + #define ADD_TO_ARRAY(array, count, item, failcode) { \ if (count >= ARRAY_SIZE(array)) { \ dbg(1, "warning: " #array " overflow"); \ @@ -5066,61 +5126,7 @@ static void sh2_generate_utils(void) #endif } -static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 nolit, int free) -{ - struct block_link *bl; - u32 i; - - free = free || nolit; // block is invalid if literals are overwritten - dbg(2," %sing block %08x-%08x,%08x-%08x, blkid %d,%d", free?"delet":"disabl", - bd->addr, bd->addr + bd->size, bd->addr_lit, bd->addr_lit + bd->size_lit, - tcache_id, bd - block_tables[tcache_id]); - if (bd->addr == 0 || bd->entry_count == 0) { - dbg(1, " killing dead block!? %08x", bd->addr); - return; - } - -#if LINK_BRANCHES - // remove from hash table, make incoming links unresolved - if (bd->active) { - for (i = 0; i < bd->entry_count; i++) { - rm_from_hashlist(&bd->entryp[i], tcache_id); - - while ((bl = bd->entryp[i].links) != NULL) { - dr_block_unlink(bl, 1); - add_to_hashlist_unresolved(bl, tcache_id); - } - } - - dr_mark_memory(-1, bd, tcache_id, nolit); - add_to_block_list(&inactive_blocks[tcache_id], bd); - } - bd->active = 0; -#endif - - if (free) { -#if LINK_BRANCHES - // revoke outgoing links - for (bl = bd->entryp[0].o_links; bl != NULL; bl = bl->o_next) { - if (bl->target) - dr_block_unlink(bl, 0); - else - rm_from_hashlist_unresolved(bl, tcache_id); - bl->jump = NULL; - bl->next = blink_free[bl->tcache_id]; - blink_free[bl->tcache_id] = bl; - } - bd->entryp[0].o_links = NULL; -#endif - // invalidate block - rm_from_block_lists(bd); - bd->addr = bd->size = bd->addr_lit = bd->size_lit = 0; - bd->entry_count = 0; - } - emith_update_cache(); -} - -static void sh2_smc_rm_blocks(u32 a, int tcache_id, u32 shift) +static void sh2_smc_rm_blocks(u32 a, int len, int tcache_id, u32 shift) { struct block_list **blist, *entry, *next; u32 mask = RAM_SIZE(tcache_id) - 1; @@ -5146,12 +5152,12 @@ static void sh2_smc_rm_blocks(u32 a, int tcache_id, u32 shift) start_lit = block->addr_lit & wtmask; end_lit = start_lit + block->size_lit; // disable/delete block if it covers the modified address - if ((start_addr <= a && a < end_addr) || - (start_lit <= a && a < end_lit)) + if ((start_addr <= a+len && a < end_addr) || + (start_lit <= a+len && a < end_lit)) { dbg(2, "smc remove @%08x", a); - end_addr = (start_lit <= a && block->size_lit ? a : 0); - sh2_smc_rm_block_entry(block, tcache_id, end_addr, 0); + end_addr = (start_lit <= a+len && block->size_lit ? a : 0); + dr_rm_block_entry(block, tcache_id, end_addr, 0); #if (DRC_DEBUG & 2) removed = 1; #endif @@ -5182,17 +5188,20 @@ static void sh2_smc_rm_blocks(u32 a, int tcache_id, u32 shift) #endif } -void sh2_drc_wcheck_ram(unsigned int a, int val, SH2 *sh2) +void sh2_drc_wcheck_ram(unsigned int a, unsigned t, SH2 *sh2) { - dbg(2, "%csh2 smc check @%08x v=%d", sh2->is_slave ? 's' : 'm', a, val); - sh2_smc_rm_blocks(a, 0, SH2_DRCBLK_RAM_SHIFT); + int off = ((u16) t ? 0 : 2); + int len = ((u16) t ? 2 : 0) + (t >> 16 ? 2 : 0); + + sh2_smc_rm_blocks(a + off, len, 0, SH2_DRCBLK_RAM_SHIFT); } -void sh2_drc_wcheck_da(unsigned int a, int val, SH2 *sh2) +void sh2_drc_wcheck_da(unsigned int a, unsigned t, SH2 *sh2) { - int cpuid = sh2->is_slave; - dbg(2, "%csh2 smc check @%08x v=%d", cpuid ? 's' : 'm', a, val); - sh2_smc_rm_blocks(a, 1 + cpuid, SH2_DRCBLK_DA_SHIFT); + int off = ((u16) t ? 0 : 2); + int len = ((u16) t ? 2 : 0) + (t >> 16 ? 2 : 0); + + sh2_smc_rm_blocks(a + off, len, 1 + sh2->is_slave, SH2_DRCBLK_DA_SHIFT); } int sh2_execute_drc(SH2 *sh2c, int cycles) @@ -5408,9 +5417,9 @@ void sh2_drc_flush_all(void) block_stats(); entry_stats(); bcache_stats(); - flush_tcache(0); - flush_tcache(1); - flush_tcache(2); + dr_flush_tcache(0); + dr_flush_tcache(1); + dr_flush_tcache(2); Pico32x.emu_flags &= ~P32XF_DRC_ROM_C; } diff --git a/cpu/sh2/compiler.h b/cpu/sh2/compiler.h index 3565940d..94dff8c5 100644 --- a/cpu/sh2/compiler.h +++ b/cpu/sh2/compiler.h @@ -1,7 +1,7 @@ int sh2_drc_init(SH2 *sh2); void sh2_drc_finish(SH2 *sh2); -void sh2_drc_wcheck_ram(unsigned int a, int val, SH2 *sh2); -void sh2_drc_wcheck_da(unsigned int a, int val, SH2 *sh2); +void sh2_drc_wcheck_ram(unsigned int a, unsigned val, SH2 *sh2); +void sh2_drc_wcheck_da(unsigned int a, unsigned val, SH2 *sh2); #ifdef DRC_SH2 void sh2_drc_mem_setup(SH2 *sh2); diff --git a/cpu/sh2/sh2.h b/cpu/sh2/sh2.h index cf830dfc..57693ac1 100644 --- a/cpu/sh2/sh2.h +++ b/cpu/sh2/sh2.h @@ -80,7 +80,7 @@ typedef struct SH2_ unsigned char data_array[0x1000]; // cache (can be used as RAM) unsigned int peri_regs[0x200/4]; // periphereal regs -} SH2; +} SH2 ALIGNED(32); #define CYCLE_MULT_SHIFT 10 #define C_M68K_TO_SH2(xsh2, c) \ diff --git a/pico/32x/memory.c b/pico/32x/memory.c index 06215a7c..39504416 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -231,7 +231,7 @@ static NOINLINE void sh2_poll_write(u32 a, u32 d, unsigned int cycles, SH2 *sh2) for (idx = nrd = wr; idx != rd; ) { idx = (idx-1) % PFIFO_SZ; q = &fifo[idx]; - if (q->cpu != cpu && q->a == a) { q->a = -1; } + if (q->a == a && q->cpu != cpu) { q->a = -1; } if (q->a != -1) { nrd = idx; } } rd = nrd; @@ -825,7 +825,8 @@ static void p32x_sh2reg_write8(u32 a, u32 d, SH2 *sh2) unsigned int cycles = sh2_cycles_done_m68k(sh2); Pico32x.sh2_regs[4 / 2] = d; p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles); - sh2_end_run(sh2, 4); + if (p32x_sh2_ready(sh2->other_sh2, cycles+16)) + sh2_end_run(sh2, 4); sh2_poll_write(a & ~1, d, cycles, sh2); } return; @@ -851,7 +852,8 @@ static void p32x_sh2reg_write8(u32 a, u32 d, SH2 *sh2) REG8IN16(r, a) = d; p32x_m68k_poll_event(P32XF_68KCPOLL); p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles); - sh2_end_run(sh2, 1); + if (p32x_sh2_ready(sh2->other_sh2, cycles+16)) + sh2_end_run(sh2, 1); sh2_poll_write(a & ~1, r[a / 2], cycles, sh2); } return; @@ -943,7 +945,8 @@ static void p32x_sh2reg_write16(u32 a, u32 d, SH2 *sh2) Pico32x.regs[a / 2] = d; p32x_m68k_poll_event(P32XF_68KCPOLL); p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles); - sh2_end_run(sh2, 1); + if (p32x_sh2_ready(sh2->other_sh2, cycles+16)) + sh2_end_run(sh2, 1); sh2_poll_write(a, d, cycles, sh2); } return; @@ -1569,7 +1572,7 @@ static u32 REGPARM(2) sh2_read32_rom(u32 a, SH2 *sh2) // writes #ifdef DRC_SH2 -static void NOINLINE sh2_sdram_poll(u32 a, u32 d, SH2 *sh2) +static void sh2_sdram_poll(u32 a, u32 d, SH2 *sh2) { unsigned cycles; @@ -1577,34 +1580,35 @@ static void NOINLINE sh2_sdram_poll(u32 a, u32 d, SH2 *sh2) cycles = sh2_cycles_done_m68k(sh2); sh2_poll_write(a, d, cycles, sh2); p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_RPOLL, cycles); - sh2_end_run(sh2, 1); + if (p32x_sh2_ready(sh2->other_sh2, cycles+16)) + sh2_end_run(sh2, 1); DRC_RESTORE_SR(sh2); } -void NOINLINE sh2_sdram_checks(u32 a, u32 d, SH2 *sh2, int t) +void sh2_sdram_checks(u32 a, u32 d, SH2 *sh2, u32 t) { - if (t & 0x80) - sh2_sdram_poll(a, d, sh2); - if (t & 0x7f) - sh2_drc_wcheck_ram(a, t & 0x7f, sh2); + if (t & 0x80) sh2_sdram_poll(a, d, sh2); + if (t & 0x7f) sh2_drc_wcheck_ram(a, t & 0x7f, sh2); } -void NOINLINE sh2_sdram_checks_l(u32 a, u32 d, SH2 *sh2, int t) +void sh2_sdram_checks_l(u32 a, u32 d, SH2 *sh2, u32 t) { - sh2_sdram_checks(a, d>>16, sh2, t); - sh2_sdram_checks(a+2, d, sh2, t>>16); + u32 m = 0x80 | 0x800000; + + if (t & 0x000080) sh2_sdram_poll(a, d>>16, sh2); + if (t & 0x800000) sh2_sdram_poll(a+2, d, sh2); + if (t & ~m) sh2_drc_wcheck_ram(a, t & ~m, sh2); } #ifndef _ASM_32X_MEMORY_C -static void sh2_da_checks(u32 a, int t, SH2 *sh2) +static void sh2_da_checks(u32 a, u32 t, SH2 *sh2) { sh2_drc_wcheck_da(a, t, sh2); } -static void NOINLINE sh2_da_checks_l(u32 a, int t, SH2 *sh2) +static void sh2_da_checks_l(u32 a, u32 t, SH2 *sh2) { - sh2_da_checks(a, t, sh2); - sh2_da_checks(a+2, t>>16, sh2); + sh2_drc_wcheck_da(a, t, sh2); } #endif #endif @@ -1667,7 +1671,7 @@ static void REGPARM(3) sh2_write8_sdram(u32 a, u32 d, SH2 *sh2) ((u8 *)sh2->p_sdram)[a1] = d; #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_ram; - int t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; + u32 t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; if (t) sh2_sdram_checks(a & ~1, ((u16 *)sh2->p_sdram)[a1 / 2], sh2, t); #endif @@ -1679,7 +1683,7 @@ static void REGPARM(3) sh2_write8_da(u32 a, u32 d, SH2 *sh2) sh2->data_array[a1] = d; #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_da; - int t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; + u32 t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; if (t) sh2_da_checks(a, t, sh2); #endif @@ -1741,7 +1745,7 @@ static void REGPARM(3) sh2_write16_sdram(u32 a, u32 d, SH2 *sh2) ((u16 *)sh2->p_sdram)[a1 / 2] = d; #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_ram; - int t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; + u32 t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; if (t) sh2_sdram_checks(a, d, sh2, t); #endif @@ -1753,7 +1757,7 @@ static void REGPARM(3) sh2_write16_da(u32 a, u32 d, SH2 *sh2) ((u16 *)sh2->data_array)[a1 / 2] = d; #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_da; - int t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; + u32 t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; if (t) sh2_da_checks(a, t, sh2); #endif @@ -1816,8 +1820,8 @@ static void REGPARM(3) sh2_write32_sdram(u32 a, u32 d, SH2 *sh2) *(u32 *)(sh2->p_sdram + a1) = (d << 16) | (d >> 16); #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_ram; - int t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; - int u = p[(a1+2) >> SH2_DRCBLK_RAM_SHIFT]; + u32 t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; + u32 u = p[(a1+2) >> SH2_DRCBLK_RAM_SHIFT]; if (t|(u<<16)) sh2_sdram_checks_l(a, d, sh2, t|(u<<16)); #endif @@ -1829,8 +1833,8 @@ static void REGPARM(3) sh2_write32_da(u32 a, u32 d, SH2 *sh2) *((u32 *)sh2->data_array + a1/4) = (d << 16) | (d >> 16); #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_da; - int t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; - int u = p[(a1+2) >> SH2_DRCBLK_DA_SHIFT]; + u32 t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; + u32 u = p[(a1+2) >> SH2_DRCBLK_DA_SHIFT]; if (t|(u<<16)) sh2_da_checks_l(a, t|(u<<16), sh2); #endif diff --git a/pico/32x/memory_arm.S b/pico/32x/memory_arm.S index ba83a6bf..b3a94b62 100644 --- a/pico/32x/memory_arm.S +++ b/pico/32x/memory_arm.S @@ -17,6 +17,7 @@ .equ SH2_DRAM_OW, 1<<(32-SH2_DRAM_SHIFT) @ DRAM overwrite mode bit .text +.align 5 #if 0 @ u32 a, SH2 *sh2 @@ -142,11 +143,12 @@ sh2_write8_sdram: ldrb r3, [ip, r3, lsr #SH2_RAM_SHIFT+1] cmp r3, #0 bxeq lr + @ need to load aligned 16 bit data for check ldr ip, [r2, #OFS_SH2_p_sdram] bic r0, r0, #1 - mov r3, r0, lsl #SH2_RAM_SHIFT - mov r3, r3, lsr #SH2_RAM_SHIFT - ldrh r1, [ip, r3] + mov r1, r0, lsl #SH2_RAM_SHIFT + mov r1, r1, lsr #SH2_RAM_SHIFT + ldrh r1, [ip, r1] b sh2_sdram_checks #else bx lr @@ -252,13 +254,8 @@ sh2_write32_da: ldr ip, [r2, #OFS_SH2_p_drcblk_da] ldrb r1, [ip, r3, lsr #SH2_DA_SHIFT+1]! ldrb ip, [ip, #1] - orrs r3, r1, ip, lsl #16 + orrs r1, r1, ip, lsl #16 bxeq lr - stmfd sp!, {r0, r2, ip, lr} - bl sh2_drc_wcheck_da - ldmfd sp!, {r0, r2, ip, lr} - add r0, r0, #2 - mov r1, ip b sh2_drc_wcheck_da #else bx lr diff --git a/pico/pico_int.h b/pico/pico_int.h index 89acc4fb..0fc458ef 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -921,6 +921,10 @@ void p32x_event_schedule(unsigned int now, enum p32x_event event, int after); void p32x_event_schedule_sh2(SH2 *sh2, enum p32x_event event, int after); void p32x_schedule_hint(SH2 *sh2, unsigned int m68k_cycles); +#define p32x_sh2_ready(sh2, cycles) \ + (CYCLES_GT(cycles,sh2->m68krcycles_done) && \ + !(sh2->state&(SH2_STATE_CPOLL|SH2_STATE_VPOLL|SH2_STATE_RPOLL))) + // 32x/memory.c extern struct Pico32xMem *Pico32xMem; unsigned int PicoRead8_32x(unsigned int a); diff --git a/platform/gp2x/PicoDrive.gpe b/platform/gp2x/PicoDrive.gpe index 1c065185..59416d93 100644 --- a/platform/gp2x/PicoDrive.gpe +++ b/platform/gp2x/PicoDrive.gpe @@ -7,6 +7,8 @@ if ! [ -e /dev/accel ]; then export POLLUX_RAM_TIMINGS='ram_timings=2,9,4,1,1,1,1' export POLLUX_LCD_TIMINGS_NTSC='lcd_timings=397,1,37,277,341,0,17,337;clkdiv0=9' export POLLUX_LCD_TIMINGS_PAL='lcd_timings=428,1,37,277,341,0,17,337;clkdiv0=10' +else + export POLLUX_RAM_TIMINGS='ram_timings=3,9,4,1,1,1,1' fi ./PicoDrive "$@" From 1fd8f98696080abb6a8463eb84a1a371d0389e68 Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 18 Oct 2019 00:16:54 +0200 Subject: [PATCH 0231/1110] fix gp2x regression --- cpu/sh2/compiler.c | 2 +- cpu/sh2/sh2.h | 4 ++-- tools/mkoffsets.sh | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 1acc7215..b2306cf2 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -1248,7 +1248,7 @@ static void dr_activate_block(struct block_desc *bd, int tcache_id, int is_slave bd->active = 1; } -static void REGPARM(3) ALIGNED(32) *dr_lookup_block(u32 pc, SH2 *sh2, int *tcache_id) +static void REGPARM(3) *dr_lookup_block(u32 pc, SH2 *sh2, int *tcache_id) { struct block_entry *be = NULL; void *block = NULL; diff --git a/cpu/sh2/sh2.h b/cpu/sh2/sh2.h index 57693ac1..05ae7052 100644 --- a/cpu/sh2/sh2.h +++ b/cpu/sh2/sh2.h @@ -13,7 +13,7 @@ typedef enum { typedef struct SH2_ { // registers. this MUST correlate with enum sh2_reg_e. - unsigned int r[16]; // 00 + unsigned int r[16] ALIGNED(32); unsigned int pc; // 40 unsigned int ppc; unsigned int pr; @@ -80,7 +80,7 @@ typedef struct SH2_ unsigned char data_array[0x1000]; // cache (can be used as RAM) unsigned int peri_regs[0x200/4]; // periphereal regs -} SH2 ALIGNED(32); +} SH2; #define CYCLE_MULT_SHIFT 10 #define C_M68K_TO_SH2(xsh2, c) \ diff --git a/tools/mkoffsets.sh b/tools/mkoffsets.sh index 2223b804..8a0557c7 100755 --- a/tools/mkoffsets.sh +++ b/tools/mkoffsets.sh @@ -48,7 +48,7 @@ get_define () # prefix struct member member... line=$(printf "#define %-20s 0x%04x" $prefix$name $rodata) } -CFLAGS="$CFLAGS -fno-lto" +if echo $CFLAGS | grep -qe -flto; then CFLAGS="$CFLAGS -fno-lto"; fi # determine endianess echo "const int val = 1;" >/tmp/getoffs.c compile_rodata From 7e940f142e4f9840e76a86f9c0c30ad90bb2684f Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 19 Oct 2019 08:53:28 +0200 Subject: [PATCH 0232/1110] 32x, finetuning --- cpu/sh2/compiler.c | 31 ++++++++++++++----------------- cpu/sh2/compiler.h | 4 ++-- pico/32x/memory.c | 41 ++++++++++++++++++++--------------------- pico/32x/memory_arm.S | 8 +++++--- 4 files changed, 41 insertions(+), 43 deletions(-) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index b2306cf2..e9173c4c 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -272,9 +272,9 @@ static void REGPARM(3) *sh2_drc_log_entry(void *block, SH2 *sh2, u32 sr) // and can be discarded early // XXX: need to tune sizes static const int tcache_sizes[TCACHE_BUFFERS] = { - DRC_TCACHE_SIZE * 14 / 16, // ROM (rarely used), DRAM - DRC_TCACHE_SIZE / 16, // BIOS, data array in master sh2 - DRC_TCACHE_SIZE / 16, // ... slave + DRC_TCACHE_SIZE * 30 / 32, // ROM (rarely used), DRAM + DRC_TCACHE_SIZE / 32, // BIOS, data array in master sh2 + DRC_TCACHE_SIZE / 32, // ... slave }; static u8 *tcache_bases[TCACHE_BUFFERS]; @@ -332,13 +332,13 @@ struct block_desc { struct block_entry entryp[MAX_BLOCK_ENTRIES]; }; -#define BLOCK_MAX_COUNT(tcid) ((tcid) ? 256 : 16*256) +#define BLOCK_MAX_COUNT(tcid) ((tcid) ? 256 : 32*256) static struct block_desc *block_tables[TCACHE_BUFFERS]; static int block_counts[TCACHE_BUFFERS]; static int block_limit[TCACHE_BUFFERS]; // we have block_link_pool to avoid using mallocs -#define BLOCK_LINK_MAX_COUNT(tcid) ((tcid) ? 1024 : 16*1024) +#define BLOCK_LINK_MAX_COUNT(tcid) ((tcid) ? 512 : 32*512) static struct block_link *block_link_pool[TCACHE_BUFFERS]; static int block_link_pool_counts[TCACHE_BUFFERS]; static struct block_link **unresolved_links[TCACHE_BUFFERS]; @@ -363,7 +363,7 @@ static struct block_list *inactive_blocks[TCACHE_BUFFERS]; // each array has len: sizeof(mem) / INVAL_PAGE_SIZE static struct block_list **inval_lookup[TCACHE_BUFFERS]; -#define HASH_TABLE_SIZE(tcid) ((tcid) ? 256 : 64*256) +#define HASH_TABLE_SIZE(tcid) ((tcid) ? 512 : 64*512) static struct block_entry **hash_tables[TCACHE_BUFFERS]; #define HASH_FUNC(hash_tab, addr, mask) \ @@ -5188,20 +5188,14 @@ static void sh2_smc_rm_blocks(u32 a, int len, int tcache_id, u32 shift) #endif } -void sh2_drc_wcheck_ram(unsigned int a, unsigned t, SH2 *sh2) +void sh2_drc_wcheck_ram(unsigned int a, unsigned len, SH2 *sh2) { - int off = ((u16) t ? 0 : 2); - int len = ((u16) t ? 2 : 0) + (t >> 16 ? 2 : 0); - - sh2_smc_rm_blocks(a + off, len, 0, SH2_DRCBLK_RAM_SHIFT); + sh2_smc_rm_blocks(a, len, 0, SH2_DRCBLK_RAM_SHIFT); } -void sh2_drc_wcheck_da(unsigned int a, unsigned t, SH2 *sh2) +void sh2_drc_wcheck_da(unsigned int a, unsigned len, SH2 *sh2) { - int off = ((u16) t ? 0 : 2); - int len = ((u16) t ? 2 : 0) + (t >> 16 ? 2 : 0); - - sh2_smc_rm_blocks(a + off, len, 1 + sh2->is_slave, SH2_DRCBLK_DA_SHIFT); + sh2_smc_rm_blocks(a, len, 1 + sh2->is_slave, SH2_DRCBLK_DA_SHIFT); } int sh2_execute_drc(SH2 *sh2c, int cycles) @@ -6403,6 +6397,9 @@ end: last_btarget = 0; op = 0; // delay/poll insns counter for (i = 0, pc = base_pc; i < i_end; i++, pc += 2) { + int null; + if ((op_flags[i] & OF_BTARGET) && dr_get_entry(pc, is_slave, &null)) + break; // branch target already compiled opd = &ops[i]; crc += FETCH_OP(pc); @@ -6483,7 +6480,7 @@ end: op ++; // condition 2 #endif } - end_pc = base_pc + i_end * 2; + end_pc = pc; // end_literals is used to decide to inline a literal or not // XXX: need better detection if this actually is used in write diff --git a/cpu/sh2/compiler.h b/cpu/sh2/compiler.h index 94dff8c5..5f374c8c 100644 --- a/cpu/sh2/compiler.h +++ b/cpu/sh2/compiler.h @@ -1,7 +1,7 @@ int sh2_drc_init(SH2 *sh2); void sh2_drc_finish(SH2 *sh2); -void sh2_drc_wcheck_ram(unsigned int a, unsigned val, SH2 *sh2); -void sh2_drc_wcheck_da(unsigned int a, unsigned val, SH2 *sh2); +void sh2_drc_wcheck_ram(unsigned int a, unsigned len, SH2 *sh2); +void sh2_drc_wcheck_da(unsigned int a, unsigned len, SH2 *sh2); #ifdef DRC_SH2 void sh2_drc_mem_setup(SH2 *sh2); diff --git a/pico/32x/memory.c b/pico/32x/memory.c index 39504416..44bc72d7 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -162,15 +162,13 @@ void NOINLINE p32x_sh2_poll_event(SH2 *sh2, u32 flags, u32 m68k_cycles) sh2->poll_addr = sh2->poll_cycles = sh2->poll_cnt = 0; } -static void sh2s_sync_on_read(SH2 *sh2) +static void sh2s_sync_on_read(SH2 *sh2, unsigned cycles) { - int cycles; if (sh2->poll_cnt != 0) return; - cycles = sh2_cycles_done(sh2); - if (cycles > 600) - p32x_sync_other_sh2(sh2, sh2->m68krcycles_done + C_SH2_TO_M68K(sh2, cycles)); + if (p32x_sh2_ready(sh2->other_sh2, cycles-250)) + p32x_sync_other_sh2(sh2, cycles); } // poll fifo, stores writes to potential addresses used for polling. @@ -271,8 +269,8 @@ u32 REGPARM(3) p32x_sh2_poll_memory16(unsigned int a, u32 d, SH2 *sh2) DRC_SAVE_SR(sh2); // is this a synchronisation address? if(p[(a & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT] & 0x80) { - sh2s_sync_on_read(sh2); cycles = sh2_cycles_done_m68k(sh2); + sh2s_sync_on_read(sh2, cycles); // check poll fifo and sign-extend the result correctly d = (s16)sh2_poll_read(a, d, cycles, sh2); } @@ -291,8 +289,8 @@ u32 REGPARM(3) p32x_sh2_poll_memory32(unsigned int a, u32 d, SH2 *sh2) DRC_SAVE_SR(sh2); // is this a synchronisation address? if(p[(a & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT] & 0x80) { - sh2s_sync_on_read(sh2); cycles = sh2_cycles_done_m68k(sh2); + sh2s_sync_on_read(sh2, cycles); // check poll fifo and sign-extend the result correctly d = (sh2_poll_read(a, d >> 16, cycles, sh2) << 16) | ((u16)sh2_poll_read(a+2, d, cycles, sh2)); @@ -729,6 +727,7 @@ static void p32x_vdp_write16(u32 a, u32 d, SH2 *sh2) static u32 p32x_sh2reg_read16(u32 a, SH2 *sh2) { u16 *r = Pico32x.regs; + unsigned cycles; a &= 0x3e; switch (a/2) { @@ -737,8 +736,9 @@ static u32 p32x_sh2reg_read16(u32 a, SH2 *sh2) | Pico32x.sh2irq_mask[sh2->is_slave]; case 0x04/2: // H count (often as comm too) sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 9); - sh2s_sync_on_read(sh2); - return sh2_poll_read(a, Pico32x.sh2_regs[4 / 2], sh2_cycles_done_m68k(sh2), sh2); + cycles = sh2_cycles_done_m68k(sh2); + sh2s_sync_on_read(sh2, cycles); + return sh2_poll_read(a, Pico32x.sh2_regs[4 / 2], cycles, sh2); case 0x06/2: return (r[a / 2] & ~P32XS_FULL) | 0x4000; case 0x08/2: // DREQ src @@ -770,8 +770,9 @@ static u32 p32x_sh2reg_read16(u32 a, SH2 *sh2) case 0x2c/2: case 0x2e/2: sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 9); - sh2s_sync_on_read(sh2); - return sh2_poll_read(a, r[a / 2], sh2_cycles_done_m68k(sh2), sh2); + cycles = sh2_cycles_done_m68k(sh2); + sh2s_sync_on_read(sh2, cycles); + return sh2_poll_read(a, r[a / 2], cycles, sh2); case 0x30/2: // PWM case 0x32/2: case 0x34/2: @@ -825,7 +826,7 @@ static void p32x_sh2reg_write8(u32 a, u32 d, SH2 *sh2) unsigned int cycles = sh2_cycles_done_m68k(sh2); Pico32x.sh2_regs[4 / 2] = d; p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles); - if (p32x_sh2_ready(sh2->other_sh2, cycles+16)) + if (p32x_sh2_ready(sh2->other_sh2, cycles+8)) sh2_end_run(sh2, 4); sh2_poll_write(a & ~1, d, cycles, sh2); } @@ -852,7 +853,7 @@ static void p32x_sh2reg_write8(u32 a, u32 d, SH2 *sh2) REG8IN16(r, a) = d; p32x_m68k_poll_event(P32XF_68KCPOLL); p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles); - if (p32x_sh2_ready(sh2->other_sh2, cycles+16)) + if (p32x_sh2_ready(sh2->other_sh2, cycles+8)) sh2_end_run(sh2, 1); sh2_poll_write(a & ~1, r[a / 2], cycles, sh2); } @@ -945,7 +946,7 @@ static void p32x_sh2reg_write16(u32 a, u32 d, SH2 *sh2) Pico32x.regs[a / 2] = d; p32x_m68k_poll_event(P32XF_68KCPOLL); p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles); - if (p32x_sh2_ready(sh2->other_sh2, cycles+16)) + if (p32x_sh2_ready(sh2->other_sh2, cycles+8)) sh2_end_run(sh2, 1); sh2_poll_write(a, d, cycles, sh2); } @@ -1580,7 +1581,7 @@ static void sh2_sdram_poll(u32 a, u32 d, SH2 *sh2) cycles = sh2_cycles_done_m68k(sh2); sh2_poll_write(a, d, cycles, sh2); p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_RPOLL, cycles); - if (p32x_sh2_ready(sh2->other_sh2, cycles+16)) + if (p32x_sh2_ready(sh2->other_sh2, cycles+8)) sh2_end_run(sh2, 1); DRC_RESTORE_SR(sh2); } @@ -1588,27 +1589,25 @@ static void sh2_sdram_poll(u32 a, u32 d, SH2 *sh2) void sh2_sdram_checks(u32 a, u32 d, SH2 *sh2, u32 t) { if (t & 0x80) sh2_sdram_poll(a, d, sh2); - if (t & 0x7f) sh2_drc_wcheck_ram(a, t & 0x7f, sh2); + if (t & 0x7f) sh2_drc_wcheck_ram(a, 2, sh2); } void sh2_sdram_checks_l(u32 a, u32 d, SH2 *sh2, u32 t) { - u32 m = 0x80 | 0x800000; - if (t & 0x000080) sh2_sdram_poll(a, d>>16, sh2); if (t & 0x800000) sh2_sdram_poll(a+2, d, sh2); - if (t & ~m) sh2_drc_wcheck_ram(a, t & ~m, sh2); + if (t & ~0x800080) sh2_drc_wcheck_ram(a, 4, sh2); } #ifndef _ASM_32X_MEMORY_C static void sh2_da_checks(u32 a, u32 t, SH2 *sh2) { - sh2_drc_wcheck_da(a, t, sh2); + sh2_drc_wcheck_da(a, 2, sh2); } static void sh2_da_checks_l(u32 a, u32 t, SH2 *sh2) { - sh2_drc_wcheck_da(a, t, sh2); + sh2_drc_wcheck_da(a, 4, sh2); } #endif #endif diff --git a/pico/32x/memory_arm.S b/pico/32x/memory_arm.S index b3a94b62..40707fe7 100644 --- a/pico/32x/memory_arm.S +++ b/pico/32x/memory_arm.S @@ -139,12 +139,11 @@ sh2_write8_sdram: mov r3, r3, lsl #SH2_RAM_SHIFT strb r1, [ip, r3, lsr #SH2_RAM_SHIFT] #ifdef DRC_SH2 - ldr ip, [r2, #OFS_SH2_p_drcblk_ram] - ldrb r3, [ip, r3, lsr #SH2_RAM_SHIFT+1] + ldr r1, [r2, #OFS_SH2_p_drcblk_ram] + ldrb r3, [r1, r3, lsr #SH2_RAM_SHIFT+1] cmp r3, #0 bxeq lr @ need to load aligned 16 bit data for check - ldr ip, [r2, #OFS_SH2_p_sdram] bic r0, r0, #1 mov r1, r0, lsl #SH2_RAM_SHIFT mov r1, r1, lsr #SH2_RAM_SHIFT @@ -166,6 +165,7 @@ sh2_write8_da: bic r0, r0, #1 cmp r1, #0 bxeq lr + mov r1, #2 b sh2_drc_wcheck_da #else bx lr @@ -206,6 +206,7 @@ sh2_write16_da: ldrb r1, [ip, r3, lsr #1] cmp r1, #0 bxeq lr + mov r1, #2 b sh2_drc_wcheck_da #else bx lr @@ -256,6 +257,7 @@ sh2_write32_da: ldrb ip, [ip, #1] orrs r1, r1, ip, lsl #16 bxeq lr + mov r1, #4 b sh2_drc_wcheck_da #else bx lr From 95a46e3f961ec21561b7d79273d4ee3feae535b6 Mon Sep 17 00:00:00 2001 From: Cameron Cawley Date: Sat, 19 Oct 2019 17:06:52 +0100 Subject: [PATCH 0233/1110] Only build evdev code on GP2X and Pandora --- Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 2903a689..40c9f617 100644 --- a/Makefile +++ b/Makefile @@ -98,6 +98,7 @@ platform/libpicofe/linux/plat.o: CFLAGS += -DPANDORA OBJS += platform/pandora/plat.o OBJS += platform/pandora/asm_utils.o OBJS += platform/common/arm_utils.o +OBJS += platform/libpicofe/linux/in_evdev.o OBJS += platform/libpicofe/linux/fbdev.o OBJS += platform/libpicofe/linux/xenv.o OBJS += platform/libpicofe/pandora/plat.o @@ -105,6 +106,7 @@ USE_FRONTEND = 1 endif ifeq "$(PLATFORM)" "gp2x" OBJS += platform/common/arm_utils.o +OBJS += platform/libpicofe/linux/in_evdev.o OBJS += platform/libpicofe/gp2x/in_gp2x.o OBJS += platform/libpicofe/gp2x/soc.o OBJS += platform/libpicofe/gp2x/soc_mmsp2.o @@ -132,8 +134,7 @@ OBJS += platform/common/main.o platform/common/emu.o \ # libpicofe OBJS += platform/libpicofe/input.o platform/libpicofe/readpng.o \ - platform/libpicofe/fonts.o platform/libpicofe/linux/in_evdev.o \ - platform/libpicofe/linux/plat.o + platform/libpicofe/fonts.o platform/libpicofe/linux/plat.o # libpicofe - sound OBJS += platform/libpicofe/sndout.o From 572ab2edc4660438fc109684d228daadfd40d47e Mon Sep 17 00:00:00 2001 From: Cameron Cawley Date: Sat, 19 Oct 2019 17:07:12 +0100 Subject: [PATCH 0234/1110] Remove unused header --- platform/linux/in_evdev.h | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 platform/linux/in_evdev.h diff --git a/platform/linux/in_evdev.h b/platform/linux/in_evdev.h deleted file mode 100644 index c5aef9bb..00000000 --- a/platform/linux/in_evdev.h +++ /dev/null @@ -1,5 +0,0 @@ - -struct in_default_bind; -extern int in_evdev_allow_abs_only; - -void in_evdev_init(const struct in_default_bind *defbinds); From 9bd6706dca2fb8d15c9f3ae2035f4d670d07ee03 Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 9 Nov 2019 10:24:52 +0100 Subject: [PATCH 0235/1110] sh2 drc: moved host register assignment to code emitters, minor bugfixing --- cpu/drc/emit_arm.c | 18 ++- cpu/drc/emit_arm64.c | 16 +- cpu/drc/emit_mips.c | 32 ++-- cpu/drc/emit_x86.c | 29 +++- cpu/sh2/compiler.c | 293 ++++++++++-------------------------- cpu/sh2/compiler.h | 4 +- cpu/sh2/sh2.h | 1 + platform/common/disarm.c | 8 +- platform/common/disarm.h | 2 +- platform/common/dismips.c | 12 +- platform/common/dismips.h | 2 +- platform/common/host_dasm.c | 7 +- 12 files changed, 174 insertions(+), 250 deletions(-) diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index ec2958b1..e35d3471 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -6,9 +6,21 @@ * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. */ -#define HOST_REGS 16 -#define CONTEXT_REG 11 -#define RET_REG 0 +#define HOST_REGS 16 + +// OABI/EABI: params: r0-r3, return: r0-r1, temp: r12,r14, saved: r4-r8,r10,r11 +// SP,PC: r13,r15 must not be used. saved: r9 (for platform use, e.g. on ios) +#define RET_REG 0 +#define PARAM_REGS { 0, 1, 2, 3 } +#ifndef __MACH__ +#define PRESERVED_REGS { 4, 5, 6, 7, 8, 9, 10, 11 } +#else +#define PRESERVED_REGS { 4, 5, 6, 7, 8, 10, 11 } // no r9.. +#endif +#define TEMPORARY_REGS { 12, 14 } + +#define CONTEXT_REG 11 +#define STATIC_SH2_REGS { SHR_SR,10 , SHR_R0,8 , SHR_R0+1,9 } // XXX: tcache_ptr type for SVP and SH2 compilers differs.. #define EMIT_PTR(ptr, x) \ diff --git a/cpu/drc/emit_arm64.c b/cpu/drc/emit_arm64.c index 4bad6469..0c36b2bc 100644 --- a/cpu/drc/emit_arm64.c +++ b/cpu/drc/emit_arm64.c @@ -6,8 +6,16 @@ * See COPYING file in the top-level directory. */ #define HOST_REGS 32 -#define CONTEXT_REG 19 + +// AAPCS64: params: r0-r7, return: r0-r1, temp: r8-r17, saved: r19-r29 +// reserved: r18 (for platform use) #define RET_REG 0 +#define PARAM_REGS { 0, 1, 2, 3, 4, 5, 6, 7 } +#define PRESERVED_REGS { 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29 } +#define TEMPORARY_REGS { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 } + +#define CONTEXT_REG 29 +#define STATIC_SH2_REGS { SHR_SR,28 , SHR_R0,27 , SHR_R0+1,26 } // R31 doesn't exist, it aliases either with zero or SP #define SP 31 // stack pointer @@ -100,9 +108,9 @@ enum { XT_UXTW=0x4, XT_UXTX=0x6, XT_LSL=0x7, XT_SXTW=0xc, XT_SXTX=0xe }; #define A64_NEGS_REG(rd, rm, stype, simm) \ A64_SUBS_REG(rd,Z0,rm,stype,simm) #define A64_NEGC_REG(rd, rm) \ - A64_SBC_REG(rd,Z0,rm,stype,simm) + A64_SBC_REG(rd,Z0,rm) #define A64_NEGCS_REG(rd, rm) \ - A64_SBCS_REG(rd,Z0,rm,stype,simm) + A64_SBCS_REG(rd,Z0,rm) #define A64_CMP_REG(rn, rm, stype, simm) \ A64_SUBS_REG(Z0, rn, rm, stype, simm) #define A64_CMN_REG(rn, rm, stype, simm) \ @@ -145,7 +153,7 @@ enum { XT_UXTW=0x4, XT_UXTX=0x6, XT_LSL=0x7, XT_SXTW=0xc, XT_SXTX=0xe }; A64_INSN(0xd,OP_ADD &3,0x0,_,rm,_,_,rn,rd) #define A64_ADCS_REG(rd, rn, rm) \ A64_INSN(0xd,OP_ADDS&3,0x0,_,rm,_,_,rn,rd) -#define A64_SBC_REG(rd, rn, rm, s) \ +#define A64_SBC_REG(rd, rn, rm) \ A64_INSN(0xd,OP_SUB &3,0x0,_,rm,_,_,rn,rd) #define A64_SBCS_REG(rd, rn, rm) \ A64_INSN(0xd,OP_SUBS&3,0x0,_,rm,_,_,rn,rd) diff --git a/cpu/drc/emit_mips.c b/cpu/drc/emit_mips.c index 38d68f40..832364e9 100644 --- a/cpu/drc/emit_mips.c +++ b/cpu/drc/emit_mips.c @@ -6,8 +6,17 @@ * See COPYING file in the top-level directory. */ #define HOST_REGS 32 + +// MIPS ABI: params: r4-r7, return: r2-r3, temp: r1(at),r8-r15,r24-r25,r31(ra), +// saved: r16-r23,r30, reserved: r0(zero), r26-r27(irq), r28(gp), r29(sp) +// r1,r15,r24,r25(at,t7-t9) are used internally by the code emitter +#define RET_REG 2 // v0 +#define PARAM_REGS { 4, 5, 6, 7 } // a0-a3 +#define PRESERVED_REGS { 16, 17, 18, 19, 20, 21, 22, 23 } // s0-s7 +#define TEMPORARY_REGS { 2, 3, 8, 9, 10, 11, 12, 13, 14 } // v0-v1,t0-t6 + #define CONTEXT_REG 23 // s7 -#define RET_REG 2 // v0 +#define STATIC_SH2_REGS { SHR_SR,22 , SHR_R0,21 , SHR_R0+1,20 } // NB: the ubiquitous JZ74[46]0 uses MIPS32 Release 1, a slight MIPS II superset @@ -73,7 +82,7 @@ enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 }; #define MIPS_OP_IMM(op, rt, rs, imm) \ MIPS_INSN(op, rs, rt, _, _, (u16)(imm)) // I-type -// rd = rt OP rs +// rd = rs OP rt #define MIPS_ADD_REG(rd, rs, rt) \ MIPS_OP_REG(FN_ADDU, rd, rs, rt) #define MIPS_SUB_REG(rd, rs, rt) \ @@ -334,7 +343,7 @@ static void *emith_branch(u32 op) #define JMP_EMIT(cond, ptr) { \ u32 val_ = (u8 *)tcache_ptr - (u8 *)(ptr) - 4; \ - emith_flush(); /* NO delay slot handling across jump targets */ \ + emith_flush(); /* prohibit delay slot switching across jump targets */ \ EMIT_PTR(ptr, MIPS_BCONDZ(cond_m, cond_r, val_ & 0x0003ffff)); \ } @@ -658,14 +667,19 @@ static void emith_move_imm(int r, uintptr_t imm) EMIT_PTR(ptr_, (*ptr_ & 0xffff0000) | (u16)(s8)(imm)); \ } while (0) -// arithmetic, immediate +// arithmetic, immediate - can only be ADDI[U], since SUBI[U] doesn't exist static void emith_arith_imm(int op, int rd, int rs, u32 imm) { - if ((s16)imm != imm) { + if ((s16)imm == imm) { + if (imm || rd != rs) + EMIT(MIPS_OP_IMM(op, rd, rs, imm)); + } else if ((s32)imm < 0) { + emith_move_r_imm(AT, -imm); + EMIT(MIPS_OP_REG(FN_SUB + (op-OP_ADDI), rd, rs, AT)); + } else { emith_move_r_imm(AT, imm); EMIT(MIPS_OP_REG(FN_ADD + (op-OP_ADDI), rd, rs, AT)); - } else if (imm || rd != rs) - EMIT(MIPS_OP_IMM(op, rd, rs, imm)); + } } #define emith_add_r_imm(r, imm) \ @@ -1137,7 +1151,7 @@ static int emith_cond_check(int cond, int *r) // conditions using CZ case DCOND_LS: // C || Z case DCOND_HI: // !C && !Z - EMIT(MIPS_ADD_IMM(AT, FC, (u16)-1)); // !C && !Z + EMIT(MIPS_ADD_IMM(AT, FC, -1)); // !C && !Z EMIT(MIPS_AND_REG(AT, FNZ, AT)); *r = AT, b = (cond == DCOND_HI ? MIPS_BNE : MIPS_BEQ); break; @@ -1161,7 +1175,7 @@ static int emith_cond_check(int cond, int *r) case DCOND_GT: // !(N^V) && !Z EMIT(MIPS_LSR_IMM(AT, FV, 31)); // Nd^V = Nt^Ns^C EMIT(MIPS_XOR_REG(AT, FC, AT)); - EMIT(MIPS_ADD_IMM(AT, AT, (u16)-1)); // !(Nd^V) && !Z + EMIT(MIPS_ADD_IMM(AT, AT, -1)); // !(Nd^V) && !Z EMIT(MIPS_AND_REG(AT, FNZ, AT)); *r = AT, b = (cond == DCOND_GT ? MIPS_BNE : MIPS_BEQ); break; diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index 212a12c5..39f3a1d7 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -17,8 +17,8 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common xR8, xR9, xR10, xR11, xR12, xR13, xR14, xR15 }; // x86-64 only -#define CONTEXT_REG xBP -#define RET_REG xAX +#define CONTEXT_REG xBP +#define RET_REG xAX #define ICOND_JO 0x00 #define ICOND_JNO 0x01 @@ -935,6 +935,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common emith_ret(); \ } while (0) + #define EMITH_JMP_START(cond) { \ u8 *cond_ptr; \ JMP8_POS(cond_ptr) @@ -1006,6 +1007,14 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common #ifndef _WIN32 +// SystemV ABI conventions: +// rbx,rbp,r12-r15 are preserved, rax,rcx,rdx,rsi,rdi,r8-r11 are temporaries +// parameters in rdi,rsi,rdx,rcx,r8,r9, return values in rax,rdx +#define PARAM_REGS { xDI, xSI, xDX, xCX, xR8, xR9 } +#define PRESERVED_REGS { xR12, xR13, xR14, xR15, xBX, xBP } +#define TEMPORARY_REGS { xAX, xR10, xR11 } +#define STATIC_SH2_REGS { SHR_SR,xBX , SHR_R0,xR15 } + #define host_arg2reg(rd, arg) \ switch (arg) { \ case 0: rd = xDI; break; \ @@ -1037,6 +1046,14 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common #else // _WIN32 +// M$ ABI conventions: +// rbx,rbp,rsi,rdi,r12-r15 are preserved, rcx,rdx,rax,r8,r9,r10,r11 temporaries +// parameters in rcx,rdx,r8,r9, return values in rax,rdx +#define PARAM_REGS { xCX, xDX, xR8, xR9 } +#define PRESERVED_REGS { xSI, xDI, xR12, xR13, xR14, xR15, xBX, xBP } +#define TEMPORARY_REGS { xAX, xR10, xR11 } +#define STATIC_SH2_REGS { SHR_SR,xBX , SHR_R0,xR15 , SH2_R0+1,xR14 } + #define host_arg2reg(rd, arg) \ switch (arg) { \ case 0: rd = xCX; break; \ @@ -1087,6 +1104,14 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common assert((u32)(rm) < 8u); \ } while (0) +// MS/SystemV ABI: ebx,esi,edi,ebp are preserved, eax,ecx,edx are temporaries +// DRC uses REGPARM to pass upto 3 parameters in registers eax,ecx,edx. +// To avoid conflicts with param passing ebx must be declared temp here. +#define PARAM_REGS { xAX, xDX, xCX } +#define PRESERVED_REGS { xSI, xDI, xBP } +#define TEMPORARY_REGS { xBX } +#define STATIC_SH2_REGS { SHR_SR,xDI , SHR_R0,xSI } + #define host_arg2reg(rd, arg) \ switch (arg) { \ case 0: rd = xAX; break; \ diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index e9173c4c..3cf7a0d9 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -427,213 +427,42 @@ typedef struct { static int rcache_get_tmp(void); static void rcache_free_tmp(int hr); -// Note: cache_regs[] must have at least the amount of REG/TEMP registers used -// by handlers in worst case (currently 4). -// Register assignment goes by ABI convention. Caller save registers are TEMP, -// the others are either static or REG. SR must be static, R0 very recommended. +// Note: Register assignment goes by ABI convention. Caller save registers are +// TEMPORARY, the others are PRESERVED. Unusable regs are omitted. +// there must be at least the free (not context or statically mapped) amount of +// PRESERVED/TEMPORARY registers used by handlers in worst case (currently 4). +// there must be at least 3 PARAM, and PARAM+TEMPORARY must be at least 4. +// SR and R0 should by all means be statically mapped. // XXX the static definition of SR MUST match that in compiler.h -// VBR, PC, PR must not be static (read from context in utils). -// RET_REG/params should be first TEMPs to avoid allocation conflicts in calls. -// There MUST be at least 3 params and one non-RET_REG/param TEMP. -// XXX shouldn't this be somehow defined in the code emitters? +// PC and PR must not be statically mapped (accessed in context by utils). + #ifdef __arm__ #include "../drc/emit_arm.c" - -static guest_reg_t guest_regs[] = { - // SHR_R0 .. SHR_SP -#ifndef __MACH__ // no r9.. - { GRF_STATIC, 8 }, { GRF_STATIC, 9 }, { 0 } , { 0 } , -#else - { GRF_STATIC, 8 }, { 0 } , { 0 } , { 0 } , -#endif - { 0 } , { 0 } , { 0 } , { 0 } , - { 0 } , { 0 } , { 0 } , { 0 } , - { 0 } , { 0 } , { 0 } , { 0 } , - // SHR_PC, SHR_PPC, SHR_PR, SHR_SR, - // SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL, - { 0 } , { 0 } , { 0 } , { GRF_STATIC, 10 }, - { 0 } , { 0 } , { 0 } , { 0 } , -}; - -// OABI/EABI: params: r0-r3, return: r0-r1, temp: r12,r14, saved: r4-r8,r10,r11 -// SP,PC: r13,r15 must not be used. saved: r9 (for platform use, e.g. on ios) -static cache_reg_t cache_regs[] = { - { 0, HRT_TEMP }, // RET_REG, params - { 1, HRT_TEMP }, - { 2, HRT_TEMP }, // params - { 3, HRT_TEMP }, - { 12, HRT_TEMP }, // temps - { 14, HRT_TEMP }, - { 8, HRT_STATIC }, // statics -#ifndef __MACH__ // no r9.. - { 9, HRT_STATIC }, -#endif - { 10, HRT_STATIC }, - { 4, HRT_REG }, // other regs - { 5, HRT_REG }, - { 6, HRT_REG }, - { 7, HRT_REG }, -}; - #elif defined(__aarch64__) #include "../drc/emit_arm64.c" - -static guest_reg_t guest_regs[] = { - // SHR_R0 .. SHR_SP - { GRF_STATIC,20 }, { GRF_STATIC,21 }, { 0 } , { 0 } , - { 0 } , { 0 } , { 0 } , { 0 } , - { 0 } , { 0 } , { 0 } , { 0 } , - { 0 } , { 0 } , { 0 } , { 0 } , - // SHR_PC, SHR_PPC, SHR_PR, SHR_SR, - // SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL, - { 0 } , { 0 } , { 0 } , { GRF_STATIC, 22 }, - { 0 } , { 0 } , { 0 } , { 0 } , -}; - -// AAPCS64: params: r0-r7, return: r0-r1, temp: r8-r17, saved: r19-r29 -// saved: r18 (for platform use) -// since drc never needs more than 4 parameters, r4-r7 are treated as temp. -static cache_reg_t cache_regs[] = { - { 0, HRT_TEMP }, // RET_REG, params - { 1, HRT_TEMP }, - { 2, HRT_TEMP }, // params - { 3, HRT_TEMP }, - { 4, HRT_TEMP }, // temps - { 5, HRT_TEMP }, - { 6, HRT_TEMP }, - { 7, HRT_TEMP }, - { 8, HRT_TEMP }, - { 9, HRT_TEMP }, - { 10, HRT_TEMP }, - { 11, HRT_TEMP }, - { 12, HRT_TEMP }, - { 13, HRT_TEMP }, - { 14, HRT_TEMP }, - { 15, HRT_TEMP }, - { 16, HRT_TEMP }, - { 17, HRT_TEMP }, - { 20, HRT_STATIC }, // statics - { 21, HRT_STATIC }, - { 22, HRT_STATIC }, - { 23, HRT_REG }, // other regs - { 24, HRT_REG }, - { 25, HRT_REG }, - { 26, HRT_REG }, - { 27, HRT_REG }, - { 28, HRT_REG }, - { 29, HRT_REG }, -}; - #elif defined(__mips__) #include "../drc/emit_mips.c" - -static guest_reg_t guest_regs[] = { - // SHR_R0 .. SHR_SP - {GRF_STATIC, 16} , {GRF_STATIC, 17} , { 0 } , { 0 } , - { 0 } , { 0 } , { 0 } , { 0 } , - { 0 } , { 0 } , { 0 } , { 0 } , - { 0 } , { 0 } , { 0 } , { 0 } , - // SHR_PC, SHR_PPC, SHR_PR, SHR_SR, - // SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL, - { 0 } , { 0 } , { 0 } , {GRF_STATIC, 18} , - { 0 } , { 0 } , { 0 } , { 0 } , -}; - -// MIPS ABI: params: r4-r7, return: r2-r3, temp: r1(at),r8-r15,r24-r25,r31(ra), -// saved: r16-r23,r30, reserved: r0(zero), r26-r27(irq), r28(gp), r29(sp) -// r1,r15,r24,r25 are used internally by the code emitter -static cache_reg_t cache_regs[] = { - { 2, HRT_TEMP }, // RET_REG (v0-v1) - { 3, HRT_TEMP }, - { 4, HRT_TEMP }, // params (a0-a3) - { 5, HRT_TEMP }, - { 6, HRT_TEMP }, - { 7, HRT_TEMP }, - { 8, HRT_TEMP }, // temps (t0-t6) - { 9, HRT_TEMP }, - { 10, HRT_TEMP }, - { 11, HRT_TEMP }, - { 12, HRT_TEMP }, - { 13, HRT_TEMP }, - { 14, HRT_TEMP }, - { 16, HRT_STATIC }, // statics (s0-s2) - { 17, HRT_STATIC }, - { 18, HRT_STATIC }, - { 19, HRT_REG }, // other regs (s3-s6) - { 20, HRT_REG }, - { 21, HRT_REG }, - { 22, HRT_REG }, -}; - #elif defined(__i386__) #include "../drc/emit_x86.c" - -static guest_reg_t guest_regs[] = { - // SHR_R0 .. SHR_SP - {GRF_STATIC, xSI}, { 0 } , { 0 } , { 0 } , - { 0 } , { 0 } , { 0 } , { 0 } , - { 0 } , { 0 } , { 0 } , { 0 } , - { 0 } , { 0 } , { 0 } , { 0 } , - // SHR_PC, SHR_PPC, SHR_PR, SHR_SR, - // SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL, - { 0 } , { 0 } , { 0 } , {GRF_STATIC, xDI}, - { 0 } , { 0 } , { 0 } , { 0 } , -}; - -// MS/SystemV ABI: ebx,esi,edi,ebp are preserved, eax,ecx,edx are temporaries -// DRC uses REGPARM to pass upto 3 parameters in registers eax,ecx,edx. -// To avoid conflicts with param passing ebx must be declared temp here. -static cache_reg_t cache_regs[] = { - { xAX, HRT_TEMP }, // RET_REG, param - { xDX, HRT_TEMP }, // params - { xCX, HRT_TEMP }, - { xBX, HRT_TEMP }, // temp - { xSI, HRT_STATIC }, // statics - { xDI, HRT_STATIC }, -}; - #elif defined(__x86_64__) #include "../drc/emit_x86.c" - -static guest_reg_t guest_regs[] = { - // SHR_R0 .. SHR_SP - {GRF_STATIC,xR12}, { 0 } , { 0 } , { 0 } , - { 0 } , { 0 } , { 0 } , { 0 } , - { 0 } , { 0 } , { 0 } , { 0 } , - { 0 } , { 0 } , { 0 } , { 0 } , - // SHR_PC, SHR_PPC, SHR_PR, SHR_SR, - // SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL, - { 0 } , { 0 } , { 0 } , {GRF_STATIC, xBX}, - { 0 } , { 0 } , { 0 } , { 0 } , -}; - -// M$/SystemV ABI conventions: -// rbx,rbp,r12-r15 are preserved, rcx,rdx,rax,r8,r9,r10,r11 are temporaries -// rsi,rdi are preserved in M$ ABI, temporary in SystemV ABI -// parameters in rcx,rdx,r8,r9, SystemV ABI additionally uses rsi,rdi -static cache_reg_t cache_regs[] = { - { xAX, HRT_TEMP }, // RET_REG - { xDX, HRT_TEMP }, // params - { xCX, HRT_TEMP }, - { xDI, HRT_TEMP }, - { xSI, HRT_TEMP }, - { xR8, HRT_TEMP }, - { xR9, HRT_TEMP }, - { xR10,HRT_TEMP }, // temps - { xR11,HRT_TEMP }, - { xBX, HRT_STATIC }, // statics - { xR12,HRT_STATIC }, - { xR13,HRT_REG }, // other regs - { xR14,HRT_REG }, - { xR15,HRT_REG }, -}; - #else #error unsupported arch #endif +static const signed char hregs_param[] = PARAM_REGS; +static const signed char hregs_temp [] = TEMPORARY_REGS; +static const signed char hregs_saved[] = PRESERVED_REGS; +static const signed char regs_static[] = STATIC_SH2_REGS; + +#define CACHE_REGS \ + (ARRAY_SIZE(hregs_param)+ARRAY_SIZE(hregs_temp)+ARRAY_SIZE(hregs_saved)-1) +static cache_reg_t cache_regs[CACHE_REGS]; + static signed char reg_map_host[HOST_REGS]; +static guest_reg_t guest_regs[SH2_REGS]; + static void REGPARM(1) (*sh2_drc_entry)(SH2 *sh2); static void REGPARM(1) (*sh2_drc_dispatcher)(u32 pc); #if CALL_STACK @@ -884,15 +713,15 @@ static void dr_block_link(struct block_entry *be, struct block_link *bl, int emi // via blx: @jump near jumpcc to blx; @blx far jump emith_jump_patch(jump, bl->blx, &jump); emith_jump_at(bl->blx, be->tcache_ptr); - if ((((uintptr_t)bl->blx & 0xf) + emith_jump_at_size()-1) > 0xf) + if ((((uintptr_t)bl->blx & 0x1f) + emith_jump_at_size()-1) > 0x1f) host_instructions_updated(bl->blx, bl->blx + emith_jump_at_size()-1); } } else { printf("unknown BL type %d\n", bl->type); exit(1); } - // only needs sync if patch is possibly crossing cacheline (assume 16 byte) - if ((((uintptr_t)jump & 0xf) + jsz-1) > 0xf) + // only needs sync if patch is possibly crossing cacheline (assume 32 byte) + if ((((uintptr_t)jump & 0x1f) + jsz-1) > 0x1f) host_instructions_updated(jump, jump + jsz-1); } @@ -1653,7 +1482,7 @@ static void gconst_invalidate(void) static u16 rcache_counter; // SH2 register usage bitmasks -static u32 rcache_hregs_reg; // regs of type HRT_REG (for pinning) +static u32 rcache_vregs_reg; // regs of type HRT_REG (for pinning) static u32 rcache_regs_static; // statically allocated regs static u32 rcache_regs_pinned; // pinned regs static u32 rcache_regs_now; // regs used in current insn @@ -2548,29 +2377,59 @@ static void rcache_flush(void) rcache_invalidate(); } +static void rcache_create(void) +{ + int x = 0, i; + + // create cache_regs as host register representation + // RET_REG/params should be first TEMPs to avoid allocation conflicts in calls + cache_regs[x++] = (cache_reg_t) {.hreg = RET_REG, .htype = HRT_TEMP}; + for (i = 0; i < ARRAY_SIZE(hregs_param); i++) + if (hregs_param[i] != RET_REG) + cache_regs[x++] = (cache_reg_t){.hreg = hregs_param[i],.htype = HRT_TEMP}; + + for (i = 0; i < ARRAY_SIZE(hregs_temp); i++) + if (hregs_temp[i] != RET_REG) + cache_regs[x++] = (cache_reg_t){.hreg = hregs_temp[i], .htype = HRT_TEMP}; + + for (i = ARRAY_SIZE(hregs_saved)-1; i >= 0; i--) + if (hregs_saved[i] != CONTEXT_REG) + cache_regs[x++] = (cache_reg_t){.hreg = hregs_saved[i], .htype = HRT_REG}; + + if (x != ARRAY_SIZE(cache_regs)) { + printf("rcache_create failed (conflicting register count)\n"); + exit(1); + } + + // mapping from host_register to cache regs index + memset(reg_map_host, -1, sizeof(reg_map_host)); + for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { + if (cache_regs[i].htype) + reg_map_host[cache_regs[i].hreg] = i; + if (cache_regs[i].htype == HRT_REG) + rcache_vregs_reg |= (1 << i); + } + + // create static host register mapping for SH2 regs + for (i = 0; i < ARRAY_SIZE(regs_static); i += 2) { + for (x = ARRAY_SIZE(cache_regs)-1; x >= 0; x--) + if (cache_regs[x].hreg == regs_static[i+1]) break; + if (x >= 0) { + guest_regs[regs_static[i]] = (guest_reg_t){.flags = GRF_STATIC,.sreg = x}; + rcache_regs_static |= (1 << regs_static[i]); + rcache_vregs_reg &= ~(1 << x); + } else + guest_regs[regs_static[i]] = (guest_reg_t){.sreg = -1}; + } + + printf("DRC registers created, %ld host regs (%d REG, %d STATIC, 1 CTX)\n", + CACHE_REGS+1L, count_bits(rcache_vregs_reg),count_bits(rcache_regs_static)); +} + static void rcache_init(void) { - static int once = 1; - int i; - - // init is executed on every rom load, but this must only be executed once... - if (once) { - memset(reg_map_host, -1, sizeof(reg_map_host)); - for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { - reg_map_host[cache_regs[i].hreg] = i; - if (cache_regs[i].htype == HRT_REG) - rcache_hregs_reg |= (1 << i); - } - - for (i = 0; i < ARRAY_SIZE(guest_regs); i++) - if (guest_regs[i].flags & GRF_STATIC) { - rcache_regs_static |= (1 << i); - guest_regs[i].sreg = reg_map_host[guest_regs[i].sreg]; - rcache_hregs_reg &= ~(1 << guest_regs[i].sreg); - } else - guest_regs[i].sreg = -1; - once = 0; - } + // create DRC data structures + rcache_create(); rcache_invalidate(); #if DRC_DEBUG & 64 @@ -5038,8 +4897,8 @@ static void sh2_generate_utils(void) emith_move_r_r_ptr(arg0, CONTEXT_REG); emith_call_ctx(offsetof(SH2, irq_callback)); // vector = sh2->irq_callback(sh2, level); // obtain new PC - emith_ctx_read(arg1, SHR_VBR * 4); - emith_add_r_r_r_lsl(arg0, arg1, RET_REG, 2); + tmp = rcache_get_reg_arg(1, SHR_VBR, &tmp2); + emith_add_r_r_r_lsl(arg0, tmp2, RET_REG, 2); emith_call(sh2_drc_read32); if (arg0 != RET_REG) emith_move_r_r(arg0, RET_REG); diff --git a/cpu/sh2/compiler.h b/cpu/sh2/compiler.h index 5f374c8c..415f01ba 100644 --- a/cpu/sh2/compiler.h +++ b/cpu/sh2/compiler.h @@ -38,9 +38,9 @@ unsigned short scan_block(unsigned int base_pc, int is_slave, #if defined(__arm__) #define DRC_SR_REG "r10" #elif defined(__aarch64__) -#define DRC_SR_REG "r22" +#define DRC_SR_REG "r28" #elif defined(__mips__) -#define DRC_SR_REG "s2" +#define DRC_SR_REG "s6" #elif defined(__i386__) #define DRC_SR_REG "edi" #elif defined(__x86_64__) diff --git a/cpu/sh2/sh2.h b/cpu/sh2/sh2.h index 05ae7052..5f1a8841 100644 --- a/cpu/sh2/sh2.h +++ b/cpu/sh2/sh2.h @@ -8,6 +8,7 @@ typedef enum { SHR_R0 = 0, SHR_SP = 15, SHR_PC, SHR_PPC, SHR_PR, SHR_SR, SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL, + SH2_REGS // register set size } sh2_reg_e; typedef struct SH2_ diff --git a/platform/common/disarm.c b/platform/common/disarm.c index 80655877..37fd810e 100644 --- a/platform/common/disarm.c +++ b/platform/common/disarm.c @@ -435,8 +435,10 @@ static int software_interrupt(unsigned int pc, unsigned int insn, char *buf, siz return 1; } -int disarm(uintptr_t pc, uint32_t insn, char *buf, size_t buf_len) +int disarm(uintptr_t pc, uint32_t insn, char *buf, size_t buf_len, uintptr_t *addr) { + *addr = 0; + if ((insn & 0x0fffffd0) == 0x012fff10) return branch_and_exchange(pc, insn, buf, buf_len); @@ -464,8 +466,10 @@ int disarm(uintptr_t pc, uint32_t insn, char *buf, size_t buf_len) if ((insn & 0x0e000000) == 0x08000000) return block_data_transfer(pc, insn, buf, buf_len); - if ((insn & 0x0e000000) == 0x0a000000) + if ((insn & 0x0e000000) == 0x0a000000) { + *addr = (long)pc + 8 + ((long)(insn << 8) >> 6); return branch(pc, insn, buf, buf_len); + } if ((insn & 0x0e000000) == 0x0c000000) return coprocessor_data_transfer(pc, insn, buf, buf_len); diff --git a/platform/common/disarm.h b/platform/common/disarm.h index b8634f68..f1170894 100644 --- a/platform/common/disarm.h +++ b/platform/common/disarm.h @@ -23,6 +23,6 @@ #ifndef DISARM_H #define DISARM_H -int disarm(uintptr_t long pc, uint32_t, char *buf, unsigned int buf_len); +int disarm(uintptr_t pc, uint32_t insn, char *buf, size_t buf_len, uintptr_t *sym); #endif /* DISARM_H */ diff --git a/platform/common/dismips.c b/platform/common/dismips.c index af71b095..41c0f7a5 100644 --- a/platform/common/dismips.c +++ b/platform/common/dismips.c @@ -274,7 +274,7 @@ static unsigned long j_target(unsigned long pc, uint32_t insn) } // main disassembler function -int dismips(uintptr_t pc, uint32_t insn, char *buf, unsigned int buflen) +int dismips(uintptr_t pc, uint32_t insn, char *buf, size_t buflen, uintptr_t *sym) { const struct insn *pi = decode_insn(insn); char *rs = register_names[(insn >> 21) & 0x1f]; @@ -283,6 +283,7 @@ int dismips(uintptr_t pc, uint32_t insn, char *buf, unsigned int buflen) int sa = (insn >> 6) & 0x1f; int imm = (int16_t) insn; + *sym = 0; if (pi == NULL) { snprintf(buf, buflen, "0x%x", insn); return 0; @@ -314,13 +315,16 @@ int dismips(uintptr_t pc, uint32_t insn, char *buf, unsigned int buflen) snprintf(buf, buflen, "%s %s, %s, %d", pi->name, rd, rt, sa); break; case B_IMM_S: - snprintf(buf, buflen, "%s %s, 0x%lx", pi->name, rs, b_target(pc, insn)); + *sym = b_target(pc, insn); + snprintf(buf, buflen, "%s %s, 0x%lx", pi->name, rs, *sym); break; case B_IMM_TS: - snprintf(buf, buflen, "%s %s, %s, 0x%lx", pi->name, rs, rt, b_target(pc, insn)); + *sym = b_target(pc, insn); + snprintf(buf, buflen, "%s %s, %s, 0x%lx", pi->name, rs, rt, *sym); break; case J_IMM: - snprintf(buf, buflen, "%s 0x%lx", pi->name, j_target(pc, insn)); + *sym = j_target(pc, insn); + snprintf(buf, buflen, "%s 0x%lx", pi->name, *sym); break; case A_IMM_TS: if (abs(imm) < 1000) diff --git a/platform/common/dismips.h b/platform/common/dismips.h index e6338def..b547003b 100644 --- a/platform/common/dismips.h +++ b/platform/common/dismips.h @@ -1,6 +1,6 @@ #ifndef DISMIPS_H #define DISMIPS_H -int dismips(uintptr_t pc, uint32_t insn, char *buf, unsigned int buf_len); +int dismips(uintptr_t pc, uint32_t insn, char *buf, size_t buf_len, uintptr_t *sym); #endif /* DISMIPS_H */ diff --git a/platform/common/host_dasm.c b/platform/common/host_dasm.c index d0537ef6..fc3cbe67 100644 --- a/platform/common/host_dasm.c +++ b/platform/common/host_dasm.c @@ -46,13 +46,10 @@ void host_dasm(void *addr, int len) insn = *(long *)addr; printf(" %08lx %08lx ", (long)addr, insn); - if(disasm((unsigned)addr, insn, buf, sizeof(buf))) + if(disasm((unsigned)addr, insn, buf, sizeof(buf), &symaddr)) { - symaddr = 0; - if ((insn & 0xe000000) == 0xa000000) { - symaddr = (long)addr + 8 + ((long)(insn << 8) >> 6); + if (symaddr) name = lookup_name((void *)symaddr); - } if (symaddr && name) printf("%s <%s>\n", buf, name); else if (symaddr && !name) From aaea8e3ecde060c3f042ef36bb68f10d186f6904 Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 9 Nov 2019 10:30:57 +0100 Subject: [PATCH 0236/1110] sh2 drc: optimizations for MIPS code emitting --- cpu/drc/emit_arm.c | 48 ++++- cpu/drc/emit_arm64.c | 52 +++++- cpu/drc/emit_mips.c | 416 ++++++++++++++++++++++++++++++++----------- cpu/drc/emit_x86.c | 25 +++ cpu/sh2/compiler.c | 411 ++++++++++++++++++++++++++---------------- pico/32x/32x.c | 14 +- 6 files changed, 698 insertions(+), 268 deletions(-) diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index e35d3471..25a2c72f 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -671,6 +671,8 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) literal_insn[pool_index] += move_offs; } +#define EMITH_HINT_COND(cond) /**/ + #define JMP_POS(ptr) { \ ptr = tcache_ptr; \ EMIT(0,M1(PC),0); \ @@ -721,9 +723,11 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) #define emith_add_r_r_r_lsl_ptr(d, s1, s2, lslimm) \ emith_add_r_r_r_lsl(d, s1, s2, lslimm) +#define emith_adc_r_r_r_lsl(d, s1, s2, lslimm) \ + EOP_ADC_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm) + #define emith_addf_r_r_r_lsl(d, s1, s2, lslimm) \ EOP_ADD_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSL,lslimm) - #define emith_addf_r_r_r_lsr(d, s1, s2, lslimm) \ EOP_ADD_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSR,lslimm) @@ -733,6 +737,9 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) #define emith_sub_r_r_r_lsl(d, s1, s2, lslimm) \ EOP_SUB_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm) +#define emith_sbc_r_r_r_lsl(d, s1, s2, lslimm) \ + EOP_SBC_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm) + #define emith_subf_r_r_r_lsl(d, s1, s2, lslimm) \ EOP_SUB_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSL,lslimm) @@ -741,10 +748,11 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) #define emith_or_r_r_r_lsl(d, s1, s2, lslimm) \ EOP_ORR_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm) +#define emith_or_r_r_r_lsr(d, s1, s2, lsrimm) \ + EOP_ORR_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSR,lsrimm) #define emith_eor_r_r_r_lsl(d, s1, s2, lslimm) \ EOP_EOR_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm) - #define emith_eor_r_r_r_lsr(d, s1, s2, lsrimm) \ EOP_EOR_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSR,lsrimm) @@ -753,13 +761,20 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) #define emith_or_r_r_lsl(d, s, lslimm) \ emith_or_r_r_r_lsl(d, d, s, lslimm) +#define emith_or_r_r_lsr(d, s, lsrimm) \ + emith_or_r_r_r_lsr(d, d, s, lsrimm) +#define emith_eor_r_r_lsl(d, s, lslimm) \ + emith_eor_r_r_r_lsl(d, d, s, lslimm) #define emith_eor_r_r_lsr(d, s, lsrimm) \ emith_eor_r_r_r_lsr(d, d, s, lsrimm) #define emith_add_r_r_r(d, s1, s2) \ emith_add_r_r_r_lsl(d, s1, s2, 0) +#define emith_adc_r_r_r(d, s1, s2) \ + emith_adc_r_r_r_lsl(d, s1, s2, 0) + #define emith_addf_r_r_r(d, s1, s2) \ emith_addf_r_r_r_lsl(d, s1, s2, 0) @@ -769,6 +784,9 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) #define emith_sub_r_r_r(d, s1, s2) \ emith_sub_r_r_r_lsl(d, s1, s2, 0) +#define emith_sbc_r_r_r(d, s1, s2) \ + emith_sbc_r_r_r_lsl(d, s1, s2, 0) + #define emith_subf_r_r_r(d, s1, s2) \ emith_subf_r_r_r_lsl(d, s1, s2, 0) @@ -790,11 +808,17 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) #define emith_add_r_r_ptr(d, s) \ emith_add_r_r_r(d, d, s) +#define emith_adc_r_r(d, s) \ + emith_adc_r_r_r(d, d, s) + #define emith_sub_r_r(d, s) \ emith_sub_r_r_r(d, d, s) -#define emith_adc_r_r(d, s) \ - EOP_ADC_REG(A_COND_AL,0,d,d,s,A_AM1_LSL,0) +#define emith_sbc_r_r(d, s) \ + emith_sbc_r_r_r(d, d, s) + +#define emith_negc_r_r(d, s) \ + EOP_C_DOP_IMM(A_COND_AL,A_OP_RSC,0,s,d,0,0) #define emith_and_r_r_c(cond, d, s) \ EOP_AND_REG(cond,0,d,d,s,A_AM1_LSL,0) @@ -987,9 +1011,13 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) #define emith_rolcf(d) \ emith_adcf_r_r(d, d) +#define emith_rolc(d) \ + emith_adc_r_r(d, d) #define emith_rorcf(d) \ EOP_MOV_REG(A_COND_AL,1,d,d,A_AM1_ROR,0) /* ROR #0 -> RRX */ +#define emith_rorc(d) \ + EOP_MOV_REG(A_COND_AL,0,d,d,A_AM1_ROR,0) /* ROR #0 -> RRX */ #define emith_negcf_r_r(d, s) \ EOP_C_DOP_IMM(A_COND_AL,A_OP_RSC,1,s,d,0,0) @@ -1329,6 +1357,18 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) } \ } while (0) +#define emith_t_to_carry(srr, is_sub) do { \ + if (is_sub) { \ + int t_ = rcache_get_tmp(); \ + emith_eor_r_r_imm(t_, srr, 1); \ + emith_rorf(t_, t_, 1); \ + rcache_free_tmp(t_); \ + } else { \ + emith_rorf(srr, srr, 1); \ + emith_rol(srr, srr, 1); \ + } \ +} while (0) + #define emith_tpop_carry(sr, is_sub) do { \ if (is_sub) \ emith_eor_r_imm(sr, 1); \ diff --git a/cpu/drc/emit_arm64.c b/cpu/drc/emit_arm64.c index 0c36b2bc..dc0cf559 100644 --- a/cpu/drc/emit_arm64.c +++ b/cpu/drc/emit_arm64.c @@ -370,6 +370,8 @@ enum { AM_IDX, AM_IDXPOST, AM_IDXREG, AM_IDXPRE }; JMP_EMIT_NC(else_ptr); \ } +#define EMITH_HINT_COND(cond) /**/ + // "simple" jump (no more then a few insns) // ARM32 will use conditional instructions here #define EMITH_SJMP_START EMITH_JMP_START @@ -414,6 +416,24 @@ enum { AM_IDX, AM_IDXPOST, AM_IDXREG, AM_IDXPRE }; #define emith_addf_r_r_r_lsr(d, s1, s2, simm) \ EMIT(A64_ADDS_REG(d, s1, s2, ST_LSR, simm)) +#define emith_adc_r_r_r_lsl(d, s1, s2, simm) \ + if (simm) { int _t = rcache_get_tmp(); \ + emith_lsl(_t, s2, simm); \ + emith_adc_r_r_r(d, s1, _t); \ + rcache_free_tmp(_t); \ + } else \ + emith_adc_r_r_r(d, s1, s2); \ +} while (0) + +#define emith_sbc_r_r_r_lsl(d, s1, s2, simm) \ + if (simm) { int _t = rcache_get_tmp(); \ + emith_lsl(_t, s2, simm); \ + emith_sbc_r_r_r(d, s1, _t); \ + rcache_free_tmp(_t); \ + } else \ + emith_sbc_r_r_r(d, s1, s2); \ +} while (0) + #define emith_sub_r_r_r_lsl(d, s1, s2, simm) \ EMIT(A64_SUB_REG(d, s1, s2, ST_LSL, simm)) @@ -422,10 +442,11 @@ enum { AM_IDX, AM_IDXPOST, AM_IDXREG, AM_IDXPRE }; #define emith_or_r_r_r_lsl(d, s1, s2, simm) \ EMIT(A64_OR_REG(d, s1, s2, ST_LSL, simm)) +#define emith_or_r_r_r_lsr(d, s1, s2, simm) \ + EMIT(A64_OR_REG(d, s1, s2, ST_LSR, simm)) #define emith_eor_r_r_r_lsl(d, s1, s2, simm) \ EMIT(A64_EOR_REG(d, s1, s2, ST_LSL, simm)) - #define emith_eor_r_r_r_lsr(d, s1, s2, simm) \ EMIT(A64_EOR_REG(d, s1, s2, ST_LSR, simm)) @@ -434,7 +455,11 @@ enum { AM_IDX, AM_IDXPOST, AM_IDXREG, AM_IDXPRE }; #define emith_or_r_r_lsl(d, s, lslimm) \ emith_or_r_r_r_lsl(d, d, s, lslimm) +#define emith_or_r_r_lsr(d, s, lsrimm) \ + emith_or_r_r_r_lsr(d, d, s, lsrimm) +#define emith_eor_r_r_lsl(d, s, lslimm) \ + emith_eor_r_r_r_lsl(d, d, s, lslimm) #define emith_eor_r_r_lsr(d, s, lsrimm) \ emith_eor_r_r_r_lsr(d, d, s, lsrimm) @@ -472,6 +497,9 @@ enum { AM_IDX, AM_IDXPOST, AM_IDXREG, AM_IDXPRE }; #define emith_neg_r_r(d, s) \ EMIT(A64_NEG_REG(d, s, ST_LSL, 0)) +#define emith_negc_r_r(d, s) \ + EMIT(A64_NEGC_REG(d, s)) + #define emith_adc_r_r_r(d, s1, s2) \ EMIT(A64_ADC_REG(d, s1, s2)) @@ -481,6 +509,9 @@ enum { AM_IDX, AM_IDXPOST, AM_IDXREG, AM_IDXPRE }; #define emith_adcf_r_r_r(d, s1, s2) \ EMIT(A64_ADCS_REG(d, s1, s2)) +#define emith_sbc_r_r_r(d, s1, s2) \ + EMIT(A64_SBC_REG(d, s1, s2)) + #define emith_sbcf_r_r_r(d, s1, s2) \ EMIT(A64_SBCS_REG(d, s1, s2)) @@ -806,12 +837,19 @@ static void emith_log_imm(int op, int wx, int rd, int rn, u32 imm) #define emith_rolcf(d) \ emith_adcf_r_r(d, d) +#define emith_rolc(d) \ + emith_adc_r_r(d, d) #define emith_rorcf(d) do { \ EMIT(A64_RBIT_REG(d, d)); \ emith_adcf_r_r(d, d); \ EMIT(A64_RBIT_REG(d, d)); \ } while (0) +#define emith_rorc(d) do { \ + EMIT(A64_RBIT_REG(d, d)); \ + emith_adc_r_r(d, d); \ + EMIT(A64_RBIT_REG(d, d)); \ +} while (0) // signed/unsigned extend #define emith_clear_msb(d, s, count) /* bits to clear */ \ @@ -1286,6 +1324,18 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) emith_eor_r_imm(sr, 1); \ } while (0) +#define emith_t_to_carry(srr, is_sub) do { \ + if (is_sub) { \ + int t_ = rcache_get_tmp(); \ + emith_eor_r_r_imm(t_, srr, 1); \ + emith_rorf(t_, t_, 1); \ + rcache_free_tmp(t_); \ + } else { \ + emith_rorf(srr, srr, 1); \ + emith_rol(srr, srr, 1); \ + } \ +} while (0) + #define emith_tpop_carry(sr, is_sub) do { \ if (is_sub) \ emith_eor_r_imm(sr, 1); \ diff --git a/cpu/drc/emit_mips.c b/cpu/drc/emit_mips.c index 832364e9..82527474 100644 --- a/cpu/drc/emit_mips.c +++ b/cpu/drc/emit_mips.c @@ -173,15 +173,17 @@ enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 }; MIPS_OP_REG(FN_JALR,rd,rs,_) // conditional branches; no condition code, these compare rs against rt or Z0 -#define MIPS_BEQ (OP_BEQ << 5) -#define MIPS_BNE (OP_BNE << 5) -#define MIPS_BLE (OP_BLEZ << 5) -#define MIPS_BGT (OP_BGTZ << 5) -#define MIPS_BLT ((OP__RT << 5)|RT_BLTZ) -#define MIPS_BGE ((OP__RT << 5)|RT_BGEZ) -#define MIPS_BGTL ((OP__RT << 5)|RT_BLTZAL) -#define MIPS_BGEL ((OP__RT << 5)|RT_BGEZAL) +#define MIPS_BEQ (OP_BEQ << 5) // rs == rt (rt in lower 5 bits) +#define MIPS_BNE (OP_BNE << 5) // rs != rt (ditto) +#define MIPS_BLE (OP_BLEZ << 5) // rs <= 0 +#define MIPS_BGT (OP_BGTZ << 5) // rs > 0 +#define MIPS_BLT ((OP__RT << 5)|RT_BLTZ) // rs < 0 +#define MIPS_BGE ((OP__RT << 5)|RT_BGEZ) // rs >= 0 +#define MIPS_BGTL ((OP__RT << 5)|RT_BLTZAL) // rs > 0, link $ra if jumping +#define MIPS_BGEL ((OP__RT << 5)|RT_BGEZAL) // rs >= 0, link $ra if jumping +#define MIPS_BCOND(cond, rs, rt, offs16) \ + MIPS_OP_IMM((cond >> 5), rt, rs, (offs16) >> 2) #define MIPS_BCONDZ(cond, rs, offs16) \ MIPS_OP_IMM((cond >> 5), (cond & 0x1f), rs, (offs16) >> 2) #define MIPS_B(offs16) \ @@ -216,25 +218,26 @@ enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 }; ptr = (void *)((u8 *)(ptr) + sizeof(u32)); \ } while (0) -// FIFO for 2 instructions, for delay slot handling -static u32 emith_last_insns[2] = { -1,-1 }; -static int emith_last_idx, emith_last_cnt; +// FIFO for some instructions, for delay slot handling +#define FSZ 4 +static u32 emith_last_insns[FSZ]; +static unsigned emith_last_idx, emith_last_cnt; #define EMIT_PUSHOP() \ do { \ - emith_last_idx ^= 1; \ - if (emith_last_insns[emith_last_idx] != -1) { \ + if (emith_last_cnt > 0) { \ u32 *p = (u32 *)tcache_ptr - emith_last_cnt; \ - EMIT_PTR(p, emith_last_insns[emith_last_idx]);\ + int idx = (emith_last_idx - emith_last_cnt+1) %FSZ; \ + EMIT_PTR(p, emith_last_insns[idx]);\ emith_last_cnt --; \ } \ - emith_last_insns[emith_last_idx] = -1; \ } while (0) #define EMIT(op) \ do { \ - EMIT_PUSHOP(); \ + if (emith_last_cnt >= FSZ) EMIT_PUSHOP(); \ tcache_ptr = (void *)((u32 *)tcache_ptr + 1); \ + emith_last_idx = (emith_last_idx+1) %FSZ; \ emith_last_insns[emith_last_idx] = op; \ emith_last_cnt ++; \ COUNT_OP; \ @@ -242,7 +245,8 @@ static int emith_last_idx, emith_last_cnt; #define emith_flush() \ do { \ - int i; for (i = 0; i < 2; i++) EMIT_PUSHOP(); \ + while (emith_last_cnt) EMIT_PUSHOP(); \ + emith_flg_hint = _FHV|_FHC; \ } while (0) #define emith_insn_ptr() (u8 *)((u32 *)tcache_ptr - emith_last_cnt) @@ -279,11 +283,12 @@ static int emith_rt(u32 op) return emith_has_(rt,2,op,26,0x3f) ? (op>>16)&0x1f : 0; } static int emith_rd(u32 op) - { if ((op>>26) == OP__FN) - return emith_has_(rd,0,op, 0,0x3f) ? (op>>11)&0x1f :-1; + { int ret = emith_has_(rd,2,op,26,0x3f) ? (op>>16)&0x1f :-1; + if ((op>>26) == OP__FN) + ret = emith_has_(rd,0,op, 0,0x3f) ? (op>>11)&0x1f :-1; if ((op>>26) == OP__RT) - return -1; - return emith_has_(rd,2,op,26,0x3f) ? (op>>16)&0x1f :-1; + ret = -1; + return (ret ?: -1); // Z0 doesn't have dependencies } static int emith_b_isswap(u32 bop, u32 lop) @@ -292,48 +297,56 @@ static int emith_b_isswap(u32 bop, u32 lop) return bop; else if (emith_is_jr(bop) && emith_rd(lop) != emith_rs(bop)) return bop; - else if (emith_is_b(bop) && emith_rd(lop) != emith_rs(bop)) + else if (emith_is_b(bop) && emith_rd(lop) != emith_rs(bop) && + emith_rd(lop) != emith_rt(bop)) if ((bop & 0xffff) != 0x7fff) // displacement overflow? return (bop & 0xffff0000) | ((bop+1) & 0x0000ffff); return 0; } +static int emith_insn_swappable(u32 op1, u32 op2) +{ + if (emith_rd(op1) != emith_rd(op2) && + emith_rs(op1) != emith_rd(op2) && emith_rt(op1) != emith_rd(op2) && + emith_rs(op2) != emith_rd(op1) && emith_rt(op2) != emith_rd(op1)) + return 1; + return 0; +} + // emit branch, trying to fill the delay slot with one of the last insns static void *emith_branch(u32 op) { - int idx = emith_last_idx; - u32 op1 = emith_last_insns[idx], op2 = emith_last_insns[idx^1]; - u32 bop = 0; + unsigned idx = emith_last_idx, ds = idx; + u32 bop = 0, sop; void *bp; + int i, j, s; - // check last insn (op1) - if (op1 != -1 && op1) - bop = emith_b_isswap(op, op1); - // if not, check older insn (op2); mustn't interact with op1 to overtake - if (!bop && op2 != -1 && op2 && emith_rd(op1) != emith_rd(op2) && - emith_rs(op1) != emith_rd(op2) && emith_rt(op1) != emith_rd(op2) && - emith_rs(op2) != emith_rd(op1) && emith_rt(op2) != emith_rd(op1)) { - idx ^= 1; - bop = emith_b_isswap(op, op2); + // check for ds insn; older mustn't interact with newer ones to overtake + for (i = 0; i < emith_last_cnt && !bop; i++) { + ds = (idx-i)%FSZ; + sop = emith_last_insns[ds]; + for (j = i, s = 1; j > 0 && s; j--) + s = emith_insn_swappable(emith_last_insns[(ds+j)%FSZ], sop); + if (s) + bop = emith_b_isswap(op, sop); } - // flush FIFO and branch + // flush FIFO, but omit delay slot insn tcache_ptr = (void *)((u32 *)tcache_ptr - emith_last_cnt); - if (emith_last_insns[idx^1] != -1) - EMIT_PTR(tcache_ptr, emith_last_insns[idx^1]); - if (bop) { // can swap - bp = tcache_ptr; - EMIT_PTR(tcache_ptr, bop); COUNT_OP; - EMIT_PTR(tcache_ptr, emith_last_insns[idx]); - } else { // can't swap - if (emith_last_insns[idx] != -1) + idx = (idx-emith_last_cnt+1)%FSZ; + for (i = emith_last_cnt; i > 0; i--, idx = (idx+1)%FSZ) + if (!bop || idx != ds) EMIT_PTR(tcache_ptr, emith_last_insns[idx]); - bp = tcache_ptr; + emith_last_cnt = 0; + // emit branch and delay slot + bp = tcache_ptr; + if (bop) { // can swap + EMIT_PTR(tcache_ptr, bop); COUNT_OP; + EMIT_PTR(tcache_ptr, emith_last_insns[ds]); + } else { // can't swap EMIT_PTR(tcache_ptr, op); COUNT_OP; EMIT_PTR(tcache_ptr, MIPS_NOP); COUNT_OP; } - emith_last_insns[0] = emith_last_insns[1] = -1; - emith_last_cnt = 0; return bp; } @@ -403,34 +416,56 @@ static void *emith_branch(u32 op) // flag emulation creates 2 (ie cmp #0/beq) up to 9 (ie adcf/ble) extra insns. // flag handling shortcuts may reduce this by 1-4 insns, see emith_cond_check() -static int emith_flg_rs, emith_flg_rt; // registers used in FNZ=rs-rt (cmp_r_r) +static int emith_cmp_rs, emith_cmp_rt; // registers used in cmp_r_r/cmp_r_imm +static s32 emith_cmp_imm; // immediate value used in cmp_r_imm +enum { _FHC=1, _FHV=2 } emith_flg_hint; // C/V flag usage hinted by compiler static int emith_flg_noV; // V flag known not to be set +#define EMITH_HINT_COND(cond) do { \ + /* only need to check cond>>1 since the lowest bit inverts the cond */ \ + unsigned _mv = BITMASK3(DCOND_VS>>1,DCOND_GE>>1,DCOND_GT>>1); \ + unsigned _mc = _mv | BITMASK2(DCOND_HS>>1,DCOND_HI>>1); \ + emith_flg_hint = (_mv & BITMASK1(cond >> 1) ? _FHV : 0); \ + emith_flg_hint |= (_mc & BITMASK1(cond >> 1) ? _FHC : 0); \ +} while (0) + // store minimal cc information: rd, rt^rs, carry // NB: the result *must* first go to FNZ, in case rd == rs or rd == rt. // NB: for adcf and sbcf, carry-in must be dealt with separately (see there) -static void emith_set_arith_flags(int rd, int rt, int rs, s32 imm, int sub) +static void emith_set_arith_flags(int rd, int rs, int rt, s32 imm, int sub) { - if (sub && rd == FNZ && rt > AT && rs > AT) // is this cmp_r_r? - emith_flg_rs = rs, emith_flg_rt = rt; - else emith_flg_rs = emith_flg_rt = 0; + if (emith_flg_hint & _FHC) { + if (sub) // C = sub:rt 0) // Nt^Ns - EMIT(MIPS_XOR_REG(FV, rt, rs)); - else if (imm < 0) - EMIT(MIPS_NOR_REG(FV, rt, Z0)); - else if (imm > 0) - EMIT(MIPS_OR_REG(FV, rt, Z0)); // Nt^Ns in FV, bit 31 - else emith_flg_noV = 1; // imm #0, never overflows + if (emith_flg_hint & _FHV) { + emith_flg_noV = 0; + if (rt >= 0) // Nt^Ns in FV, bit 31 + EMIT(MIPS_XOR_REG(FV, rs, rt)); + else if (imm == 0) + emith_flg_noV = 1; // imm #0 can't overflow + else if ((imm < 0) == !sub) + EMIT(MIPS_NOR_REG(FV, rs, Z0)); + else if ((imm > 0) == !sub) + EMIT(MIPS_OR_REG(FV, rs, Z0)); + } // full V = Nd^Nt^Ns^C calculation is deferred until really needed - if (rd != FNZ) + if (rd && rd != FNZ) EMIT(MIPS_MOVE_REG(rd, FNZ)); // N,Z via result value in FNZ + emith_cmp_rs = emith_cmp_rt = -1; +} + +// since MIPS has less-than and compare-branch insns, handle cmp separately by +// storing the involved regs for later use in one of those MIPS insns. +// This works for all conditions but VC/VS, but this is fortunately never used. +static void emith_set_compare_flags(int rs, int rt, s32 imm) +{ + emith_cmp_rt = rt; + emith_cmp_rs = rs; + emith_cmp_imm = imm; } // data processing, register @@ -510,6 +545,13 @@ static void emith_set_arith_flags(int rd, int rt, int rs, s32 imm, int sub) } else EMIT(MIPS_OR_REG(d, s1, s2)); \ } while (0) +#define emith_or_r_r_r_lsr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSR_IMM(AT, s2, simm)); \ + EMIT(MIPS_OR_REG(d, s1, AT)); \ + } else EMIT(MIPS_OR_REG(d, s1, s2)); \ +} while (0) + #define emith_eor_r_r_r_lsl(d, s1, s2, simm) do { \ if (simm) { \ EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ @@ -533,7 +575,11 @@ static void emith_set_arith_flags(int rd, int rt, int rs, s32 imm, int sub) #define emith_or_r_r_lsl(d, s, lslimm) \ emith_or_r_r_r_lsl(d, d, s, lslimm) +#define emith_or_r_r_lsr(d, s, lsrimm) \ + emith_or_r_r_r_lsr(d, d, s, lsrimm) +#define emith_eor_r_r_lsl(d, s, lslimm) \ + emith_eor_r_r_r_lsl(d, d, s, lslimm) #define emith_eor_r_r_lsr(d, s, lsrimm) \ emith_eor_r_r_r_lsr(d, d, s, lsrimm) @@ -570,13 +616,21 @@ static void emith_set_arith_flags(int rd, int rt, int rs, s32 imm, int sub) EMIT(MIPS_NEG_REG(d, s)) #define emith_adc_r_r_r(d, s1, s2) do { \ - emith_add_r_r_r(AT, s1, FC); \ - emith_add_r_r_r(d, AT, s2); \ + emith_add_r_r_r(AT, s2, FC); \ + emith_add_r_r_r(d, s1, AT); \ +} while (0) + +#define emith_sbc_r_r_r(d, s1, s2) do { \ + emith_add_r_r_r(AT, s2, FC); \ + emith_sub_r_r_r(d, s1, AT); \ } while (0) #define emith_adc_r_r(d, s) \ emith_adc_r_r_r(d, d, s) +#define emith_negc_r_r(d, s) \ + emith_sbc_r_r_r(d, Z0, s) + // NB: the incoming carry Cin can cause Cout if s2+Cin=0 (or s1+Cin=0 FWIW) // moreover, if s2+Cin=0 caused Cout, s1+s2+Cin=s1+0 can't cause another Cout #define emith_adcf_r_r_r(d, s1, s2) do { \ @@ -606,16 +660,23 @@ static void emith_set_arith_flags(int rd, int rt, int rs, s32 imm, int sub) #define emith_eor_r_r(d, s) \ emith_eor_r_r_r(d, d, s) -#define emith_tst_r_r_ptr(d, s) \ - emith_and_r_r_r(FNZ, d, s) +#define emith_tst_r_r_ptr(d, s) do { \ + if (d != s) { \ + emith_and_r_r_r(FNZ, d, s); \ + emith_cmp_rs = emith_cmp_rt = -1; \ + } else emith_cmp_rs = s, emith_cmp_rt = Z0; \ +} while (0) #define emith_tst_r_r(d, s) \ emith_tst_r_r_ptr(d, s) -#define emith_teq_r_r(d, s) \ - emith_eor_r_r_r(FNZ, d, s) +#define emith_teq_r_r(d, s) do { \ + emith_eor_r_r_r(FNZ, d, s); \ + emith_cmp_rs = emith_cmp_rt = -1; \ +} while (0) #define emith_cmp_r_r(d, s) \ - emith_subf_r_r_r(FNZ, d, s) + emith_set_compare_flags(d, s, 0) +// emith_subf_r_r_r(FNZ, d, s) #define emith_addf_r_r(d, s) \ emith_addf_r_r_r(d, d, s) @@ -705,8 +766,8 @@ static void emith_arith_imm(int op, int rd, int rs, u32 imm) emith_adcf_r_r_imm(r, r, imm) #define emith_cmp_r_imm(r, imm) \ - emith_subf_r_r_imm(FNZ, r, (s16)imm) - + emith_set_compare_flags(r, -1, imm) +// emith_subf_r_r_imm(FNZ, r, (s16)imm) #define emith_add_r_r_ptr_imm(d, s, imm) \ emith_arith_imm(OP_ADDIU, d, s, imm) @@ -716,7 +777,7 @@ static void emith_arith_imm(int op, int rd, int rs, u32 imm) #define emith_addf_r_r_imm(d, s, imm) do { \ emith_add_r_r_imm(FNZ, s, imm); \ - emith_set_arith_flags(d, s, 0, imm, 0); \ + emith_set_arith_flags(d, s, -1, imm, 0); \ } while (0) #define emith_adc_r_r_imm(d, s, imm) do { \ @@ -725,11 +786,16 @@ static void emith_arith_imm(int op, int rd, int rs, u32 imm) } while (0) #define emith_adcf_r_r_imm(d, s, imm) do { \ - emith_add_r_r_r(FNZ, s, FC); \ - EMIT(MIPS_SLTU_REG(AT, FNZ, FC)); \ - emith_add_r_r_imm(FNZ, FNZ, imm); \ - emith_set_arith_flags(d, s, 0, imm, 0); \ - emith_or_r_r(FC, AT); \ + if (imm == 0) { \ + emith_add_r_r_r(FNZ, s, FC); \ + emith_set_arith_flags(d, s, -1, 1, 0); \ + } else { \ + emith_add_r_r_r(FNZ, s, FC); \ + EMIT(MIPS_SLTU_REG(AT, FNZ, FC)); \ + emith_add_r_r_imm(FNZ, FNZ, imm); \ + emith_set_arith_flags(d, s, -1, imm, 0); \ + emith_or_r_r(FC, AT); \ + } \ } while (0) // NB: no SUBI in MIPS II, since ADDI takes a signed imm @@ -740,7 +806,7 @@ static void emith_arith_imm(int op, int rd, int rs, u32 imm) #define emith_subf_r_r_imm(d, s, imm) do { \ emith_sub_r_r_imm(FNZ, s, imm); \ - emith_set_arith_flags(d, s, 0, imm, 1); \ + emith_set_arith_flags(d, s, -1, imm, 1); \ } while (0) // logical, immediate @@ -777,8 +843,10 @@ static void emith_log_imm(int op, int rd, int rs, u32 imm) #define emith_bic_r_imm_c(cond, r, imm) \ emith_bic_r_imm(r, imm) -#define emith_tst_r_imm(r, imm) \ - emith_log_imm(OP_ANDI, FNZ, r, imm) +#define emith_tst_r_imm(r, imm) do { \ + emith_log_imm(OP_ANDI, FNZ, r, imm); \ + emith_cmp_rs = emith_cmp_rt = -1; \ +} while (0) #define emith_tst_r_imm_c(cond, r, imm) \ emith_tst_r_imm(r, imm) @@ -816,6 +884,17 @@ static void emith_log_imm(int op, int rd, int rs, u32 imm) EMIT(MIPS_OR_REG(d, d, AT)); \ } while (0) +#define emith_rorc(d) do { \ + emith_lsr(d, d, 1); \ + emith_lsl(AT, FC, 31); \ + emith_or_r_r(d, AT); \ +} while (0) + +#define emith_rolc(d) do { \ + emith_lsl(d, d, 1); \ + emith_or_r_r(d, FC); \ +} while (0) + // NB: all flag setting shifts make V undefined // NB: mips32r2 has EXT (useful for extracting C) #define emith_lslf(d, s, cnt) do { \ @@ -829,6 +908,7 @@ static void emith_log_imm(int op, int rd, int rs, u32 imm) emith_lsl(d, _s, 1); \ } \ emith_move_r_r(FNZ, d); \ + emith_cmp_rs = emith_cmp_rt = -1; \ } while (0) #define emith_lsrf(d, s, cnt) do { \ @@ -842,6 +922,7 @@ static void emith_log_imm(int op, int rd, int rs, u32 imm) emith_lsr(d, _s, 1); \ } \ emith_move_r_r(FNZ, d); \ + emith_cmp_rs = emith_cmp_rt = -1; \ } while (0) #define emith_asrf(d, s, cnt) do { \ @@ -855,18 +936,21 @@ static void emith_log_imm(int op, int rd, int rs, u32 imm) emith_asr(d, _s, 1); \ } \ emith_move_r_r(FNZ, d); \ + emith_cmp_rs = emith_cmp_rt = -1; \ } while (0) #define emith_rolf(d, s, cnt) do { \ emith_rol(d, s, cnt); \ emith_and_r_r_imm(FC, d, 1); \ emith_move_r_r(FNZ, d); \ + emith_cmp_rs = emith_cmp_rt = -1; \ } while (0) #define emith_rorf(d, s, cnt) do { \ emith_ror(d, s, cnt); \ emith_lsr(FC, d, 31); \ emith_move_r_r(FNZ, d); \ + emith_cmp_rs = emith_cmp_rt = -1; \ } while (0) #define emith_rolcf(d) do { \ @@ -875,6 +959,7 @@ static void emith_log_imm(int op, int rd, int rs, u32 imm) emith_or_r_r(d, FC); \ emith_move_r_r(FC, AT); \ emith_move_r_r(FNZ, d); \ + emith_cmp_rs = emith_cmp_rt = -1; \ } while (0) #define emith_rorcf(d) do { \ @@ -884,6 +969,7 @@ static void emith_log_imm(int op, int rd, int rs, u32 imm) emith_or_r_r(d, FC); \ emith_move_r_r(FC, AT); \ emith_move_r_r(FNZ, d); \ + emith_cmp_rs = emith_cmp_rt = -1; \ } while (0) // signed/unsigned extend @@ -1108,24 +1194,82 @@ static void emith_lohi_nops(void) (((cond) >> 5) == OP__RT ? (cond) ^ 0x01 : (cond) ^ 0x20) // evaluate the emulated condition, returns a register/branch type pair +static int emith_cmpr_check(int rs, int rt, int cond, int *r) +{ + int b = 0; + + // condition check for comparing 2 registers + switch (cond) { + case DCOND_EQ: *r = rs; b = MIPS_BEQ|rt; break; + case DCOND_NE: *r = rs; b = MIPS_BNE|rt; break; + case DCOND_LO: EMIT(MIPS_SLTU_REG(AT, rs, rt)); + *r = AT, b = MIPS_BNE; break; // s < t unsigned + case DCOND_HS: EMIT(MIPS_SLTU_REG(AT, rs, rt)); + *r = AT, b = MIPS_BEQ; break; // s >= t unsigned + case DCOND_LS: EMIT(MIPS_SLTU_REG(AT, rt, rs)); + *r = AT, b = MIPS_BEQ; break; // s <= t unsigned + case DCOND_HI: EMIT(MIPS_SLTU_REG(AT, rt, rs)); + *r = AT, b = MIPS_BNE; break; // s > t unsigned + case DCOND_LT: if (rt == 0) { *r = rs, b = MIPS_BLT; break; } // s < 0 + EMIT(MIPS_SLT_REG(AT, rs, rt)); + *r = AT, b = MIPS_BNE; break; // s < t + case DCOND_GE: if (rt == 0) { *r = rs, b = MIPS_BGE; break; } // s >= 0 + EMIT(MIPS_SLT_REG(AT, rs, rt)); + *r = AT, b = MIPS_BEQ; break; // s >= t + case DCOND_LE: if (rt == 0) { *r = rs, b = MIPS_BLE; break; } // s <= 0 + EMIT(MIPS_SLT_REG(AT, rt, rs)); + *r = AT, b = MIPS_BEQ; break; // s <= t + case DCOND_GT: if (rt == 0) { *r = rs, b = MIPS_BGT; break; } // s > 0 + EMIT(MIPS_SLT_REG(AT, rt, rs)); + *r = AT, b = MIPS_BNE; break; // s > t + } + + return b; +} + +static int emith_cmpi_check(int rs, s32 imm, int cond, int *r) +{ + int b = 0; + + // condition check for comparing register with immediate + if (imm == 0) return emith_cmpr_check(rs, Z0, cond, r); + switch (cond) { + case DCOND_EQ: emith_move_r_imm(AT, imm); + *r = rs; b = MIPS_BEQ|AT; break; + case DCOND_NE: emith_move_r_imm(AT, imm); + *r = rs; b = MIPS_BNE|AT; break; + case DCOND_LO: EMIT(MIPS_SLTU_IMM(AT, rs, imm)); + *r = AT, b = MIPS_BNE; break; // s < imm unsigned + case DCOND_HS: EMIT(MIPS_SLTU_IMM(AT, rs, imm)); + *r = AT, b = MIPS_BEQ; break; // s >= imm unsigned + case DCOND_LS: emith_move_r_imm(AT, imm); + EMIT(MIPS_SLTU_REG(AT, AT, rs)); + *r = AT, b = MIPS_BEQ; break; // s <= imm unsigned + case DCOND_HI: emith_move_r_imm(AT, imm); + EMIT(MIPS_SLTU_REG(AT, AT, rs)); + *r = AT, b = MIPS_BNE; break; // s > imm unsigned + case DCOND_LT: EMIT(MIPS_SLT_IMM(AT, rs, imm)); + *r = AT, b = MIPS_BNE; break; // s < imm + case DCOND_GE: EMIT(MIPS_SLT_IMM(AT, rs, imm)); + *r = AT, b = MIPS_BEQ; break; // s >= imm + case DCOND_LE: emith_move_r_imm(AT, imm); + EMIT(MIPS_SLT_REG(AT, AT, rs)); + *r = AT, b = MIPS_BEQ; break; // s <= imm + case DCOND_GT: emith_move_r_imm(AT, imm); + EMIT(MIPS_SLT_REG(AT, AT, rs)); + *r = AT, b = MIPS_BNE; break; // s > imm + } + return b; +} + static int emith_cond_check(int cond, int *r) { int b = 0; - // shortcut for comparing 2 registers - if (emith_flg_rs || emith_flg_rt) switch (cond) { - case DCOND_LS: EMIT(MIPS_SLTU_REG(AT, emith_flg_rs, emith_flg_rt)); - *r = AT, b = MIPS_BEQ; break; // s <= t unsigned - case DCOND_HI: EMIT(MIPS_SLTU_REG(AT, emith_flg_rs, emith_flg_rt)); - *r = AT, b = MIPS_BNE; break; // s > t unsigned - case DCOND_LT: EMIT(MIPS_SLT_REG(AT, emith_flg_rt, emith_flg_rs)); - *r = AT, b = MIPS_BNE; break; // s < t - case DCOND_GE: EMIT(MIPS_SLT_REG(AT, emith_flg_rt, emith_flg_rs)); - *r = AT, b = MIPS_BEQ; break; // s >= t - case DCOND_LE: EMIT(MIPS_SLT_REG(AT, emith_flg_rs, emith_flg_rt)); - *r = AT, b = MIPS_BEQ; break; // s <= t - case DCOND_GT: EMIT(MIPS_SLT_REG(AT, emith_flg_rs, emith_flg_rt)); - *r = AT, b = MIPS_BNE; break; // s > t + if (emith_cmp_rs >= 0) { + if (emith_cmp_rt != -1) + b = emith_cmpr_check(emith_cmp_rs,emith_cmp_rt, cond,r); + else b = emith_cmpi_check(emith_cmp_rs,emith_cmp_imm,cond,r); } // shortcut for V known to be 0 @@ -1373,8 +1517,10 @@ static int emith_cond_check(int cond, int *r) #define emith_sh2_div1_step(rn, rm, sr) do { \ emith_tst_r_imm(sr, Q); /* if (Q ^ M) */ \ EMITH_JMP3_START(DCOND_EQ); \ + EMITH_HINT_COND(DCOND_CS); \ emith_addf_r_r(rn, rm); \ EMITH_JMP3_MID(DCOND_EQ); \ + EMITH_HINT_COND(DCOND_CS); \ emith_subf_r_r(rn, rm); \ EMITH_JMP3_END(); \ emith_eor_r_r(sr, FC); \ @@ -1433,23 +1579,27 @@ static int emith_cond_check(int cond, int *r) } while (0) #define emith_write_sr(sr, srcr) do { \ - emith_lsr(sr, sr, 10); \ - emith_or_r_r_r_lsl(sr, sr, srcr, 22); \ - emith_ror(sr, sr, 22); \ + emith_lsr(sr, sr , 10); emith_lsl(sr, sr, 10); \ + emith_lsl(AT, srcr, 22); emith_lsr(AT, AT, 22); \ + emith_or_r_r(sr, AT); \ } while (0) -#define emith_carry_to_t(srr, is_sub) do { \ - emith_lsr(sr, sr, 1); \ - emith_adc_r_r(sr, sr); \ +#define emith_carry_to_t(sr, is_sub) do { \ + emith_and_r_imm(sr, 0xfffffffe); \ + emith_or_r_r(sr, FC); \ +} while (0) + +#define emith_t_to_carry(sr, is_sub) do { \ + emith_and_r_r_imm(FC, sr, 1); \ } while (0) #define emith_tpop_carry(sr, is_sub) do { \ emith_and_r_r_imm(FC, sr, 1); \ - emith_lsr(sr, sr, 1); \ + emith_eor_r_r(sr, FC); \ } while (0) #define emith_tpush_carry(sr, is_sub) \ - emith_adc_r_r(sr, sr) + emith_or_r_r(sr, FC) #ifdef T // T bit handling @@ -1463,9 +1613,61 @@ static void emith_clr_t_cond(int sr) static void emith_set_t_cond(int sr, int cond) { - EMITH_SJMP_START(emith_invert_cond(cond)); - emith_or_r_imm_c(cond, sr, T); - EMITH_SJMP_END(emith_invert_cond(cond)); + int b, r; + u8 *ptr; + u32 val = 0, inv = 0; + + // try to avoid jumping around if possible + if (emith_cmp_rs >= 0) { + if (emith_cmp_rt >= 0) + b = emith_cmpr_check(emith_cmp_rs, emith_cmp_rt, cond, &r); + else + b = emith_cmpi_check(emith_cmp_rs, emith_cmp_imm, cond, &r); + + // XXX this relies on the inner workings of cmp_check... + if (r == AT) + // result of slt check which returns either 0 or 1 in AT + val++, inv = (b == MIPS_BEQ); + } else { + b = emith_cond_check(cond, &r); + if (r == Z0) { + if (b == MIPS_BEQ || b == MIPS_BLE || b == MIPS_BGE) + emith_or_r_imm(sr, T); + return; + } else if (r == FC) + val++, inv = (b == MIPS_BEQ); + } + + if (!val) switch (b) { // cases: b..z r, aka cmp r,Z0 or cmp r,#0 + case MIPS_BEQ: EMIT(MIPS_SLTU_IMM(AT, r, 1)); r=AT; val++; break; + case MIPS_BNE: EMIT(MIPS_SLTU_REG(AT,Z0, r)); r=AT; val++; break; + case MIPS_BLT: EMIT(MIPS_SLT_REG(AT, r, Z0)); r=AT; val++; break; + case MIPS_BGE: EMIT(MIPS_SLT_REG(AT, r, Z0)); r=AT; val++; inv++; break; + case MIPS_BLE: EMIT(MIPS_SLT_REG(AT, Z0, r)); r=AT; val++; inv++; break; + case MIPS_BGT: EMIT(MIPS_SLT_REG(AT, Z0, r)); r=AT; val++; break; + default: // cases: beq/bne r,s, aka cmp r,s + if ((b>>5) == OP_BEQ) { + EMIT(MIPS_XOR_REG(AT, r, b&0x1f)); + EMIT(MIPS_SLTU_IMM(AT,AT, 1)); r=AT; val++; break; + } else if ((b>>5) == OP_BNE) { + EMIT(MIPS_XOR_REG(AT, r, b&0x1f)); + EMIT(MIPS_SLTU_IMM(AT,Z0,AT)); r=AT; val++; break; + } + } + if (val) { + emith_or_r_r(sr, r); + if (inv) + emith_eor_r_imm(sr, T); + return; + } + + // can't obtain result directly, use presumably slower jump !cond + or sr,T + b = emith_invert_branch(b); + ptr = emith_branch(MIPS_BCONDZ(b, r, 0)); + emith_or_r_imm(sr, T); + emith_flush(); // prohibit delay slot switching across jump targets + val = (u8 *)tcache_ptr - (u8 *)(ptr) - 4; + EMIT_PTR(ptr, MIPS_BCONDZ(b, r, val & 0x0003ffff)); } #define emith_get_t_cond() -1 diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index 39f3a1d7..e7284499 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -340,11 +340,29 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common rcache_free_tmp(tmp_); \ } else emith_or_r_r_r(d, s1, s2); \ } while (0) +#define emith_or_r_r_r_lsr(d, s1, s2, lsrimm) do { \ + if (lsrimm) { \ + int tmp_ = rcache_get_tmp(); \ + emith_lsr(tmp_, s2, lsrimm); \ + emith_or_r_r_r(d, s1, tmp_); \ + rcache_free_tmp(tmp_); \ + } else emith_or_r_r_r(d, s1, s2); \ +} while (0) // _r_r_shift #define emith_or_r_r_lsl(d, s, lslimm) \ emith_or_r_r_r_lsl(d, d, s, lslimm) +#define emith_or_r_r_lsr(d, s, lsrimm) \ + emith_or_r_r_r_lsr(d, d, s, lsrimm) +#define emith_eor_r_r_lsl(d, s, lslimm) do { \ + if (lslimm) { \ + int tmp_ = rcache_get_tmp(); \ + emith_lsl(tmp_, s, lslimm); \ + emith_eor_r_r(d, tmp_); \ + rcache_free_tmp(tmp_); \ + } else emith_eor_r_r(d, s); \ +} while (0) #define emith_eor_r_r_lsr(d, s, lsrimm) do { \ if (lsrimm) { \ int tmp_ = rcache_get_tmp(); \ @@ -972,6 +990,8 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common #define EMITH_SJMP2_END(cond) \ EMITH_SJMP3_END() +#define EMITH_HINT_COND(cond) /**/ + #define emith_pass_arg_r(arg, reg) do { \ int rd = 7; \ host_arg2reg(rd, arg); \ @@ -1255,6 +1275,11 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common emith_rol(sr, sr, 1); \ } while (0) +#define emith_t_to_carry(sr, is_sub) do { \ + emith_ror(sr, sr, 1); \ + emith_rol(sr, sr, 1); \ +} while (0) + #define emith_tpop_carry(sr, is_sub) \ emith_lsr(sr, sr, 1) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 3cf7a0d9..2320c501 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -45,6 +45,7 @@ #define REMAP_REGISTER 1 #define LOOP_DETECTION 1 #define LOOP_OPTIMIZER 1 +#define T_OPTIMIZER 1 // limits (per block) #define MAX_BLOCK_SIZE (BLOCK_INSN_LIMIT * 6 * 6) @@ -108,7 +109,7 @@ static int insns_compiled, hash_collisions, host_insn_count; #define GET_Rn() \ ((op >> 8) & 0x0f) -#define SHR_T SHR_SR // might make them separate someday +#define SHR_T 30 // separate T for not-used detection #define SHR_MEM 31 #define SHR_TMP -1 @@ -122,6 +123,7 @@ static int insns_compiled, hash_collisions, host_insn_count; #define I_SHIFT 4 #define Q_SHIFT 8 #define M_SHIFT 9 +#define T_SHIFT 11 static struct op_data { u8 op; @@ -263,7 +265,6 @@ static void REGPARM(3) *sh2_drc_log_entry(void *block, SH2 *sh2, u32 sr) return block; } #endif -// } debug #define TCACHE_BUFFERS 3 @@ -1527,7 +1528,7 @@ static void rcache_unmap_vreg(int x) FOR_ALL_BITS_SET_DO(cache_regs[x].gregs, i, if (guest_regs[i].flags & GRF_DIRTY) { // if a dirty reg is unmapped save its value to context - if (~rcache_regs_discard & (1 << i)) + if ((~rcache_regs_discard | rcache_regs_now) & (1 << i)) emith_ctx_write(cache_regs[x].hreg, i * 4); guest_regs[i].flags &= ~GRF_DIRTY; } @@ -1565,7 +1566,7 @@ static void rcache_clean_vreg(int x) if (guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) { if (guest_regs[r].vreg != guest_regs[r].sreg && !cache_regs[guest_regs[r].sreg].locked && - (~rcache_regs_discard & (1 << r)) && + ((~rcache_regs_discard | rcache_regs_now) & (1 << r)) && !(rns & cache_regs[guest_regs[r].sreg].gregs)) { // statically mapped reg not in its sreg. move back to sreg rcache_evict_vreg(guest_regs[r].sreg); @@ -1578,7 +1579,7 @@ static void rcache_clean_vreg(int x) // cannot remap. keep dirty for writeback in unmap cache_regs[x].flags |= HRF_DIRTY; } else { - if (~rcache_regs_discard & (1 << r)) + if ((~rcache_regs_discard | rcache_regs_now) & (1 << r)) emith_ctx_write(cache_regs[x].hreg, r * 4); guest_regs[r].flags &= ~GRF_DIRTY; } @@ -1875,9 +1876,22 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr if ((guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) && guest_regs[r].sreg == dst && !tr->locked) { // split aliases if r is STATIC in sreg and dst isn't already locked - rcache_lock_vreg(dst); // lock to avoid evicting dst - x = rcache_allocate_vreg(rsp_d & ali); - rcache_unlock_vreg(dst); + int t; + FOR_ALL_BITS_SET_DO(ali, t, + if ((guest_regs[t].flags & (GRF_STATIC|GRF_PINNED)) && + !(ali & ~(1 << t)) && + !cache_regs[guest_regs[t].sreg].locked && + !(rsp_d & cache_regs[guest_regs[t].sreg].gregs)) { + // alias is a single STATIC and its sreg is available + x = guest_regs[t].sreg; + rcache_evict_vreg(x); + } else { + rcache_lock_vreg(dst); // lock to avoid evicting dst + x = rcache_allocate_vreg(rsp_d & ali); + rcache_unlock_vreg(dst); + } + break; + ) if (x >= 0) { src = x; rcache_move_vreg(src, dst); @@ -2855,11 +2869,11 @@ static void emit_do_static_regs(int is_write, int tmpr) } #define DELAY_SAVE_T(sr) { \ + int t_ = rcache_get_tmp(); \ emith_bic_r_imm(sr, T_save); \ - emith_tst_r_imm(sr, T); \ - EMITH_SJMP_START(DCOND_EQ); \ - emith_or_r_imm_c(DCOND_NE, sr, T_save); \ - EMITH_SJMP_END(DCOND_EQ); \ + emith_and_r_r_imm(t_, sr, 1); \ + emith_or_r_r_lsl(sr, t_, T_SHIFT); \ + rcache_free_tmp(t_); \ } #define FLUSH_CYCLES(sr) \ @@ -2961,6 +2975,9 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) ADD_TO_ARRAY(branch_target_pc, branch_target_count, pc, ); if (ops[i].op == OP_LDC && (ops[i].dest & BITMASK1(SHR_SR)) && pc+2 < end_pc) op_flags[i+1] |= OF_BTARGET; // RTE entrypoint in case of SR.IMASK change + // unify T and SR since rcache doesn't know about "virtual" guest regs + if (ops[i].source & BITMASK1(SHR_T)) ops[i].source |= BITMASK1(SHR_SR); + if (ops[i].dest & BITMASK1(SHR_T)) ops[i].dest |= BITMASK1(SHR_SR); #if LOOP_DETECTION // loop types detected: // 1. target: ... BRA target -> idle loop @@ -3014,15 +3031,15 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) drcf.pending_branch_indirect = 1; // conditions g,h - cond.branch // poll/idle loops terminate with their backwards branch to the loop start if (drcf.pending_branch_direct && !(op_flags[i+1] & OF_DELAY_OP)) { - m2 &= ~(m1 | BITMASK2(SHR_PC, SHR_SR)); // conditions d,e + g,h + m2 &= ~(m1 | BITMASK3(SHR_PC, SHR_SR, SHR_T)); // conditions d,e + g,h if (m2 || ((op == OF_IDLE_LOOP) == (drcf.pending_branch_indirect))) op = 0; // conditions not met op_flags[v] = (op_flags[v] & ~OF_LOOP) | op; // set loop type drcf.loop_type = 0; #if LOOP_OPTIMIZER if (op_flags[v] & OF_BASIC_LOOP) { - m3 &= ~rcache_regs_static & ~BITMASK4(SHR_PC, SHR_PR, SHR_SR, SHR_MEM); - if (m3 && count_bits(m3) < count_bits(rcache_hregs_reg) && + m3 &= ~rcache_regs_static & ~BITMASK5(SHR_PC, SHR_PR, SHR_SR, SHR_T, SHR_MEM); + if (m3 && count_bits(m3) < count_bits(rcache_vregs_reg) && pinned_loop_count < ARRAY_SIZE(pinned_loop_pc)-1) { pinned_loop_mask[pinned_loop_count] = m3; pinned_loop_pc[pinned_loop_count++] = base_pc + 2*v; @@ -3154,48 +3171,63 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) rcache_free_tmp(tmp3); #endif + // check cycles + sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); + #if LOOP_OPTIMIZER if (op_flags[i] & OF_BASIC_LOOP) { if (pinned_loop_pc[pinned_loop_count] == pc) { // pin needed regs on loop entry FOR_ALL_BITS_SET_DO(pinned_loop_mask[pinned_loop_count], v, rcache_pin_reg(v)); emith_flush(); + // store current PC as loop target pinned_loop_ptr[pinned_loop_count] = tcache_ptr; } else op_flags[i] &= ~OF_BASIC_LOOP; } -#endif - // check cycles - sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); - emith_cmp_r_imm(sr, 0); - -#if LOOP_OPTIMIZER - void *jp = NULL; if (op_flags[i] & OF_BASIC_LOOP) { // if exiting a pinned loop pinned regs must be written back to ctx // since they are reloaded in the loop entry code - jp = tcache_ptr; - emith_jump_cond_patchable(DCOND_GT, jp); // XXX need API for JMP_POS + emith_cmp_r_imm(sr, 0); + EMITH_JMP_START(DCOND_GT); rcache_save_pinned(); - } + + if (blx_target_count < ARRAY_SIZE(blx_target_pc)) { + // exit via stub in blx table (saves some 1-3 insns in the main flow) + blx_target_ptr[blx_target_count] = tcache_ptr; + blx_target_pc[blx_target_count] = pc|1; + blx_target_bl[blx_target_count++] = NULL; + emith_jump_patchable(tcache_ptr); + } else { + // blx table full, must inline exit code + tmp = rcache_get_tmp_arg(0); + emith_move_r_imm(tmp, pc); + emith_jump(sh2_drc_exit); + rcache_free_tmp(tmp); + } + EMITH_JMP_END(DCOND_GT); + } else #endif - if (blx_target_count < ARRAY_SIZE(blx_target_pc)) { - // exit via stub in blx table (saves some 1-3 insns in the main flow) - blx_target_pc[blx_target_count] = pc|1; - blx_target_bl[blx_target_count] = NULL; - blx_target_ptr[blx_target_count++] = tcache_ptr; - } else { - // blx table full, must inline exit code - tmp = rcache_get_tmp_arg(0); - emith_move_r_imm_c(DCOND_LE, tmp, pc); - rcache_free_tmp(tmp); + { + if (blx_target_count < ARRAY_SIZE(blx_target_pc)) { + // exit via stub in blx table (saves some 1-3 insns in the main flow) + blx_target_pc[blx_target_count] = pc|1; + blx_target_bl[blx_target_count] = NULL; + emith_cmp_r_imm(sr, 0); + blx_target_ptr[blx_target_count++] = tcache_ptr; + emith_jump_cond_patchable(DCOND_LE, tcache_ptr); + } else { + // blx table full, must inline exit code + tmp = rcache_get_tmp_arg(0); + emith_cmp_r_imm(sr, 0); + EMITH_SJMP_START(DCOND_GT); + emith_move_r_imm_c(DCOND_LE, tmp, pc); + emith_jump_cond(DCOND_LE, sh2_drc_exit); + EMITH_SJMP_END(DCOND_GT); + rcache_free_tmp(tmp); + } } - emith_jump_cond_patchable(DCOND_LE, tcache_ptr); -#if LOOP_OPTIMIZER - if (op_flags[i] & OF_BASIC_LOOP) - emith_jump_patch(jp, tcache_ptr, NULL); -#endif #if (DRC_DEBUG & 32) // block hit counter @@ -3328,7 +3360,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) rcache_set_usage_now(opd[0].source); // current insn rcache_set_usage_soon(soon); // insns 1-4 rcache_set_usage_late(late & ~soon); // insns 5-9 - rcache_set_usage_discard(write & ~(late|soon|opd[0].source)); + rcache_set_usage_discard(write & ~(late|soon)); if (v <= 9) // upcoming rcache_flush, start writing back unused dirty stuff rcache_clean_masked(rcache_dirty_mask() & ~(write|opd[0].dest)); @@ -3512,11 +3544,17 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) { case 0: // CLRT 0000000000001000 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); - emith_set_t(sr, 0); +#if T_OPTIMIZER + if (~rcache_regs_discard & BITMASK1(SHR_T)) +#endif + emith_set_t(sr, 0); break; case 1: // SETT 0000000000011000 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); - emith_set_t(sr, 1); +#if T_OPTIMIZER + if (~rcache_regs_discard & BITMASK1(SHR_T)) +#endif + emith_set_t(sr, 1); break; case 2: // CLRMAC 0000000000101000 emit_move_r_imm32(SHR_MACL, 0); @@ -3602,20 +3640,16 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); + tmp = rcache_get_tmp(); emith_invalidate_t(); emith_bic_r_imm(sr, M|Q|T); - emith_tst_r_imm(tmp2, (1<<31)); - EMITH_SJMP_START(DCOND_EQ); - emith_or_r_imm_c(DCOND_NE, sr, Q); - EMITH_SJMP_END(DCOND_EQ); - emith_tst_r_imm(tmp3, (1<<31)); - EMITH_SJMP_START(DCOND_EQ); - emith_or_r_imm_c(DCOND_NE, sr, M); - EMITH_SJMP_END(DCOND_EQ); - emith_teq_r_r(tmp2, tmp3); - EMITH_SJMP_START(DCOND_PL); - emith_or_r_imm_c(DCOND_MI, sr, T); - EMITH_SJMP_END(DCOND_PL); + emith_lsr(tmp, tmp2, 31); // Q = Nn + emith_or_r_r_lsl(sr, tmp, Q_SHIFT); + emith_lsr(tmp, tmp3, 31); // M = Nm + emith_or_r_r_lsl(sr, tmp, M_SHIFT); + emith_eor_r_r_lsr(tmp, tmp2, 31); + emith_or_r_r(sr, tmp); // T = Q^M + rcache_free(tmp); goto end_op; case 0x08: // TST Rm,Rn 0010nnnnmmmm1000 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); @@ -3708,26 +3742,27 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); - emith_clr_t_cond(sr); - emith_cmp_r_r(tmp2, tmp3); switch (op & 0x07) { case 0x00: // CMP/EQ - emith_set_t_cond(sr, DCOND_EQ); + tmp = DCOND_EQ; break; case 0x02: // CMP/HS - emith_set_t_cond(sr, DCOND_HS); + tmp = DCOND_HS; break; case 0x03: // CMP/GE - emith_set_t_cond(sr, DCOND_GE); + tmp = DCOND_GE; break; case 0x06: // CMP/HI - emith_set_t_cond(sr, DCOND_HI); + tmp = DCOND_HI; break; case 0x07: // CMP/GT - emith_set_t_cond(sr, DCOND_GT); + tmp = DCOND_GT; break; } + emith_clr_t_cond(sr); + emith_cmp_r_r(tmp2, tmp3); + emith_set_t_cond(sr, tmp); goto end_op; case 0x04: // DIV1 Rm,Rn 0011nnnnmmmm0100 // Q1 = carry(Rn = (Rn << 1) | T) @@ -3738,29 +3773,27 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) // Q = M ^ Q1 ^ Q2 // T = (Q == M) = !(Q ^ M) = !(Q1 ^ Q2) tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); - tmp2 = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp); + tmp2 = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp4); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_sync_t(sr); + EMITH_HINT_COND(DCOND_CS); emith_tpop_carry(sr, 0); - emith_adcf_r_r_r(tmp2, tmp, tmp); + emith_adcf_r_r_r(tmp2, tmp4, tmp4); emith_tpush_carry(sr, 0); // keep Q1 in T for now - rcache_free(tmp); - tmp4 = rcache_get_tmp(); - emith_and_r_r_imm(tmp4, sr, M); - emith_eor_r_r_lsr(sr, tmp4, M_SHIFT - Q_SHIFT); // Q ^= M - rcache_free_tmp(tmp4); + rcache_free(tmp4); + tmp = rcache_get_tmp(); + emith_and_r_r_imm(tmp, sr, M); + emith_eor_r_r_lsr(sr, tmp, M_SHIFT - Q_SHIFT); // Q ^= M + rcache_free_tmp(tmp); // add or sub, invert T if carry to get Q1 ^ Q2 // in: (Q ^ M) passed in Q, Q1 in T emith_sh2_div1_step(tmp2, tmp3, sr); - emith_bic_r_imm(sr, Q); - emith_tst_r_imm(sr, M); - EMITH_SJMP_START(DCOND_EQ); - emith_or_r_imm_c(DCOND_NE, sr, Q); // Q = M - EMITH_SJMP_END(DCOND_EQ); - emith_tst_r_imm(sr, T); - EMITH_SJMP_START(DCOND_EQ); - emith_eor_r_imm_c(DCOND_NE, sr, Q); // Q = M ^ Q1 ^ Q2 - EMITH_SJMP_END(DCOND_EQ); + tmp = rcache_get_tmp(); + emith_bic_r_imm(sr, Q); // Q = M + emith_and_r_r_imm(tmp, sr, M); + emith_or_r_r_lsr(sr, tmp, M_SHIFT - Q_SHIFT); + emith_and_r_r_imm(tmp, sr, T); // Q = M ^ Q1 ^ Q2 + emith_eor_r_r_lsl(sr, tmp, Q_SHIFT); emith_eor_r_imm(sr, T); // T = !(Q1 ^ Q2) goto end_op; case 0x05: // DMULU.L Rm,Rn 0011nnnnmmmm0101 @@ -3791,14 +3824,28 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_sync_t(sr); - if (op & 4) { // adc - emith_tpop_carry(sr, 0); - emith_adcf_r_r_r(tmp, tmp3, tmp2); - emith_tpush_carry(sr, 0); - } else { - emith_tpop_carry(sr, 1); - emith_sbcf_r_r_r(tmp, tmp3, tmp2); - emith_tpush_carry(sr, 1); +#if T_OPTIMIZER + if (rcache_regs_discard & BITMASK1(SHR_T)) { + if (op & 4) { + emith_t_to_carry(sr, 0); + emith_adc_r_r_r(tmp, tmp3, tmp2); + } else { + emith_t_to_carry(sr, 1); + emith_sbc_r_r_r(tmp, tmp3, tmp2); + } + } else +#endif + { + EMITH_HINT_COND(DCOND_CS); + if (op & 4) { // adc + emith_tpop_carry(sr, 0); + emith_adcf_r_r_r(tmp, tmp3, tmp2); + emith_tpush_carry(sr, 0); + } else { + emith_tpop_carry(sr, 1); + emith_sbcf_r_r_r(tmp, tmp3, tmp2); + emith_tpush_carry(sr, 1); + } } goto end_op; case 0x0b: // SUBV Rm,Rn 0011nnnnmmmm1011 @@ -3806,12 +3853,23 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); - emith_clr_t_cond(sr); - if (op & 4) { - emith_addf_r_r_r(tmp, tmp3, tmp2); +#if T_OPTIMIZER + if (rcache_regs_discard & BITMASK1(SHR_T)) { + if (op & 4) + emith_add_r_r_r(tmp,tmp3,tmp2); + else + emith_sub_r_r_r(tmp,tmp3,tmp2); } else - emith_subf_r_r_r(tmp, tmp3, tmp2); - emith_set_t_cond(sr, DCOND_VS); +#endif + { + emith_clr_t_cond(sr); + EMITH_HINT_COND(DCOND_VS); + if (op & 4) + emith_addf_r_r_r(tmp, tmp3, tmp2); + else + emith_subf_r_r_r(tmp, tmp3, tmp2); + emith_set_t_cond(sr, DCOND_VS); + } goto end_op; case 0x0d: // DMULS.L Rm,Rn 0011nnnnmmmm1101 tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); @@ -3834,9 +3892,16 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 2: // SHAL Rn 0100nnnn00100000 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); - emith_invalidate_t(); - emith_lslf(tmp, tmp2, 1); - emith_carry_to_t(sr, 0); +#if T_OPTIMIZER + if (rcache_regs_discard & BITMASK1(SHR_T)) + emith_lsl(tmp, tmp2, 1); + else +#endif + { + emith_invalidate_t(); + emith_lslf(tmp, tmp2, 1); + emith_carry_to_t(sr, 0); + } goto end_op; case 1: // DT Rn 0100nnnn00010000 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); @@ -3850,6 +3915,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) #endif tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); emith_clr_t_cond(sr); + EMITH_HINT_COND(DCOND_EQ); emith_subf_r_r_imm(tmp, tmp2, 1); emith_set_t_cond(sr, DCOND_EQ); goto end_op; @@ -3862,12 +3928,22 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 2: // SHAR Rn 0100nnnn00100001 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); - emith_invalidate_t(); - if (op & 0x20) { - emith_asrf(tmp, tmp2, 1); +#if T_OPTIMIZER + if (rcache_regs_discard & BITMASK1(SHR_T)) { + if (op & 0x20) + emith_asr(tmp,tmp2,1); + else + emith_lsr(tmp,tmp2,1); } else - emith_lsrf(tmp, tmp2, 1); - emith_carry_to_t(sr, 0); +#endif + { + emith_invalidate_t(); + if (op & 0x20) { + emith_asrf(tmp, tmp2, 1); + } else + emith_lsrf(tmp, tmp2, 1); + emith_carry_to_t(sr, 0); + } goto end_op; case 1: // CMP/PZ Rn 0100nnnn00010001 tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); @@ -3919,24 +3995,45 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x05: // ROTR Rn 0100nnnn00000101 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); - emith_invalidate_t(); - if (op & 1) { - emith_rorf(tmp, tmp2, 1); +#if T_OPTIMIZER + if (rcache_regs_discard & BITMASK1(SHR_T)) { + if (op & 1) + emith_ror(tmp, tmp2, 1); + else + emith_rol(tmp, tmp2, 1); } else - emith_rolf(tmp, tmp2, 1); - emith_carry_to_t(sr, 0); +#endif + { + emith_invalidate_t(); + if (op & 1) + emith_rorf(tmp, tmp2, 1); + else + emith_rolf(tmp, tmp2, 1); + emith_carry_to_t(sr, 0); + } goto end_op; case 0x24: // ROTCL Rn 0100nnnn00100100 case 0x25: // ROTCR Rn 0100nnnn00100101 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, NULL); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_sync_t(sr); - emith_tpop_carry(sr, 0); - if (op & 1) { - emith_rorcf(tmp); +#if T_OPTIMIZER + if (rcache_regs_discard & BITMASK1(SHR_T)) { + emith_t_to_carry(sr, 0); + if (op & 1) + emith_rorc(tmp); + else + emith_rolc(tmp); } else - emith_rolcf(tmp); - emith_tpush_carry(sr, 0); +#endif + { + emith_tpop_carry(sr, 0); + if (op & 1) + emith_rorcf(tmp); + else + emith_rolcf(tmp); + emith_tpush_carry(sr, 0); + } goto end_op; case 0x15: // CMP/PL Rn 0100nnnn00010101 tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); @@ -4131,9 +4228,18 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x0a: // NEGC Rm,Rn 0110nnnnmmmm1010 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_sync_t(sr); - emith_tpop_carry(sr, 1); - emith_negcf_r_r(tmp2, tmp); - emith_tpush_carry(sr, 1); +#if T_OPTIMIZER + if (rcache_regs_discard & BITMASK1(SHR_T)) { + emith_t_to_carry(sr, 1); + emith_negc_r_r(tmp2, tmp); + } else +#endif + { + EMITH_HINT_COND(DCOND_CS); + emith_tpop_carry(sr, 1); + emith_negcf_r_r(tmp2, tmp); + emith_tpush_carry(sr, 1); + } break; case 0x0b: // NEG Rm,Rn 0110nnnnmmmm1011 emith_neg_r_r(tmp2, tmp); @@ -4639,9 +4745,6 @@ static void sh2_generate_utils(void) host_arg2reg(arg2, 2); host_arg2reg(arg3, 3); emith_move_r_r(arg0, arg0); // nop - emith_move_r_r(arg1, arg1); // nop - emith_move_r_r(arg2, arg2); // nop - emith_move_r_r(arg3, arg3); // nop emith_flush(); // sh2_drc_write8(u32 a, u32 d) @@ -4665,6 +4768,7 @@ static void sh2_generate_utils(void) // d = sh2_drc_read8(u32 a) sh2_drc_read8 = (void *)tcache_ptr; emith_ctx_read_ptr(arg1, offsetof(SH2, read8_map)); + EMITH_HINT_COND(DCOND_CS); emith_sh2_rcall(arg0, arg1, arg2, arg3); EMITH_SJMP_START(DCOND_CS); emith_and_r_r_c(DCOND_CC, arg0, arg3); @@ -4679,6 +4783,7 @@ static void sh2_generate_utils(void) // d = sh2_drc_read16(u32 a) sh2_drc_read16 = (void *)tcache_ptr; emith_ctx_read_ptr(arg1, offsetof(SH2, read16_map)); + EMITH_HINT_COND(DCOND_CS); emith_sh2_rcall(arg0, arg1, arg2, arg3); EMITH_SJMP_START(DCOND_CS); emith_and_r_r_c(DCOND_CC, arg0, arg3); @@ -4692,6 +4797,7 @@ static void sh2_generate_utils(void) // d = sh2_drc_read32(u32 a) sh2_drc_read32 = (void *)tcache_ptr; emith_ctx_read_ptr(arg1, offsetof(SH2, read32_map)); + EMITH_HINT_COND(DCOND_CS); emith_sh2_rcall(arg0, arg1, arg2, arg3); EMITH_SJMP_START(DCOND_CS); emith_and_r_r_c(DCOND_CC, arg0, arg3); @@ -4706,6 +4812,7 @@ static void sh2_generate_utils(void) // d = sh2_drc_read8_poll(u32 a) sh2_drc_read8_poll = (void *)tcache_ptr; emith_ctx_read_ptr(arg1, offsetof(SH2, read8_map)); + EMITH_HINT_COND(DCOND_CS); emith_sh2_rcall(arg0, arg1, arg2, arg3); EMITH_SJMP_START(DCOND_CC); emith_move_r_r_ptr_c(DCOND_CS, arg1, CONTEXT_REG); @@ -4723,6 +4830,7 @@ static void sh2_generate_utils(void) // d = sh2_drc_read16_poll(u32 a) sh2_drc_read16_poll = (void *)tcache_ptr; emith_ctx_read_ptr(arg1, offsetof(SH2, read16_map)); + EMITH_HINT_COND(DCOND_CS); emith_sh2_rcall(arg0, arg1, arg2, arg3); EMITH_SJMP_START(DCOND_CC); emith_move_r_r_ptr_c(DCOND_CS, arg1, CONTEXT_REG); @@ -4739,6 +4847,7 @@ static void sh2_generate_utils(void) // d = sh2_drc_read32_poll(u32 a) sh2_drc_read32_poll = (void *)tcache_ptr; emith_ctx_read_ptr(arg1, offsetof(SH2, read32_map)); + EMITH_HINT_COND(DCOND_CS); emith_sh2_rcall(arg0, arg1, arg2, arg3); EMITH_SJMP_START(DCOND_CC); emith_move_r_r_ptr_c(DCOND_CS, arg1, CONTEXT_REG); @@ -4834,16 +4943,19 @@ static void sh2_generate_utils(void) emith_ctx_read(arg2, offsetof(SH2, rts_cache_idx)); emith_add_r_r_r_lsl_ptr(arg1, CONTEXT_REG, arg2, 0); emith_read_r_r_offs(arg3, arg1, offsetof(SH2, rts_cache)); - emith_cmp_r_r(arg0, arg3); #if (DRC_DEBUG & 128) + emith_cmp_r_r(arg0, arg3); EMITH_SJMP_START(DCOND_EQ); emith_move_r_ptr_imm(arg3, (uptr)&rcmiss); emith_read_r_r_offs_c(DCOND_NE, arg1, arg3, 0); emith_add_r_imm_c(DCOND_NE, arg1, 1); emith_write_r_r_offs_c(DCOND_NE, arg1, arg3, 0); - EMITH_SJMP_END(DCOND_EQ); -#endif emith_jump_cond(DCOND_NE, sh2_drc_dispatcher); + EMITH_SJMP_END(DCOND_EQ); +#else + emith_cmp_r_r(arg0, arg3); + emith_jump_cond(DCOND_NE, sh2_drc_dispatcher); +#endif emith_read_r_r_offs_ptr(arg0, arg1, offsetof(SH2, rts_cache) + sizeof(void *)); emith_sub_r_imm(arg2, 2*sizeof(void *)); emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *)); @@ -4874,7 +4986,7 @@ static void sh2_generate_utils(void) emith_sub_r_imm(tmp, 4*2); rcache_clean(); // push SR - tmp = rcache_get_reg_arg(0, SHR_SP,&tmp2); + tmp = rcache_get_reg_arg(0, SHR_SP, &tmp2); emith_add_r_r_imm(tmp, tmp2, 4); tmp = rcache_get_reg_arg(1, SHR_SR, NULL); emith_clear_msb(tmp, tmp, 22); @@ -5478,6 +5590,8 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, else if ((lowest_mova && lowest_mova <= pc) || (lowest_literal && lowest_literal <= pc)) break; // text area collides with data area + else if ((op_flags[i] & OF_BTARGET) && dr_get_entry(pc, is_slave, &i_end)) + break; // branch target already compiled op = FETCH_OP(pc); switch ((op & 0xf000) >> 12) @@ -5490,19 +5604,19 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, switch (GET_Fx()) { case 0: // STC SR,Rn 0000nnnn00000010 - tmp = SHR_SR; + tmp = BITMASK2(SHR_SR, SHR_T); break; case 1: // STC GBR,Rn 0000nnnn00010010 - tmp = SHR_GBR; + tmp = BITMASK1(SHR_GBR); break; case 2: // STC VBR,Rn 0000nnnn00100010 - tmp = SHR_VBR; + tmp = BITMASK1(SHR_VBR); break; default: goto undefined; } opd->op = OP_MOVE; - opd->source = BITMASK1(tmp); + opd->source = tmp; opd->dest = BITMASK1(GET_Rn()); break; case 0x03: @@ -5549,7 +5663,7 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, opd->imm = 1; break; case 2: // CLRMAC 0000000000101000 - opd->dest = BITMASK3(SHR_T, SHR_MACL, SHR_MACH); + opd->dest = BITMASK2(SHR_MACL, SHR_MACH); break; default: goto undefined; @@ -5612,7 +5726,7 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, case 2: // RTE 0000000000101011 opd->op = OP_RTE; opd->source = BITMASK1(SHR_SP); - opd->dest = BITMASK3(SHR_SP, SHR_SR, SHR_PC); + opd->dest = BITMASK4(SHR_SP, SHR_SR, SHR_T, SHR_PC); opd->cycles = 4; next_is_delay = 1; end_block = !(op_flags[i+1+next_is_delay] & OF_BTARGET); @@ -5664,7 +5778,7 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, break; case 0x07: // DIV0S Rm,Rn 0010nnnnmmmm0111 opd->source = BITMASK2(GET_Rm(), GET_Rn()); - opd->dest = BITMASK1(SHR_SR); + opd->dest = BITMASK2(SHR_SR, SHR_T); break; case 0x08: // TST Rm,Rn 0010nnnnmmmm1000 opd->source = BITMASK2(GET_Rm(), GET_Rn()); @@ -5707,8 +5821,8 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, opd->dest = BITMASK1(SHR_T); break; case 0x04: // DIV1 Rm,Rn 0011nnnnmmmm0100 - opd->source = BITMASK3(GET_Rm(), GET_Rn(), SHR_SR); - opd->dest = BITMASK2(GET_Rn(), SHR_SR); + opd->source = BITMASK4(GET_Rm(), GET_Rn(), SHR_SR, SHR_T); + opd->dest = BITMASK3(GET_Rn(), SHR_SR, SHR_T); break; case 0x05: // DMULU.L Rm,Rn 0011nnnnmmmm0101 case 0x0d: // DMULS.L Rm,Rn 0011nnnnmmmm1101 @@ -5778,30 +5892,30 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, switch (op & 0x3f) { case 0x02: // STS.L MACH,@-Rn 0100nnnn00000010 - tmp = SHR_MACH; + tmp = BITMASK1(SHR_MACH); break; case 0x12: // STS.L MACL,@-Rn 0100nnnn00010010 - tmp = SHR_MACL; + tmp = BITMASK1(SHR_MACL); break; case 0x22: // STS.L PR,@-Rn 0100nnnn00100010 - tmp = SHR_PR; + tmp = BITMASK1(SHR_PR); break; case 0x03: // STC.L SR,@-Rn 0100nnnn00000011 - tmp = SHR_SR; + tmp = BITMASK2(SHR_SR, SHR_T); opd->cycles = 2; break; case 0x13: // STC.L GBR,@-Rn 0100nnnn00010011 - tmp = SHR_GBR; + tmp = BITMASK1(SHR_GBR); opd->cycles = 2; break; case 0x23: // STC.L VBR,@-Rn 0100nnnn00100011 - tmp = SHR_VBR; + tmp = BITMASK1(SHR_VBR); opd->cycles = 2; break; default: goto undefined; } - opd->source = BITMASK2(GET_Rn(), tmp); + opd->source = BITMASK1(GET_Rn()) | tmp; opd->dest = BITMASK2(GET_Rn(), SHR_MEM); break; case 0x04: @@ -5831,26 +5945,26 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, switch (op & 0x3f) { case 0x06: // LDS.L @Rm+,MACH 0100mmmm00000110 - tmp = SHR_MACH; + tmp = BITMASK1(SHR_MACH); break; case 0x16: // LDS.L @Rm+,MACL 0100mmmm00010110 - tmp = SHR_MACL; + tmp = BITMASK1(SHR_MACL); break; case 0x26: // LDS.L @Rm+,PR 0100mmmm00100110 - tmp = SHR_PR; + tmp = BITMASK1(SHR_PR); break; case 0x07: // LDC.L @Rm+,SR 0100mmmm00000111 - tmp = SHR_SR; + tmp = BITMASK2(SHR_SR, SHR_T); opd->op = OP_LDC; opd->cycles = 3; break; case 0x17: // LDC.L @Rm+,GBR 0100mmmm00010111 - tmp = SHR_GBR; + tmp = BITMASK1(SHR_GBR); opd->op = OP_LDC; opd->cycles = 3; break; case 0x27: // LDC.L @Rm+,VBR 0100mmmm00100111 - tmp = SHR_VBR; + tmp = BITMASK1(SHR_VBR); opd->op = OP_LDC; opd->cycles = 3; break; @@ -5858,7 +5972,7 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, goto undefined; } opd->source = BITMASK2(GET_Rn(), SHR_MEM); - opd->dest = BITMASK2(GET_Rn(), tmp); + opd->dest = BITMASK1(GET_Rn()) | tmp; break; case 0x08: case 0x09: @@ -5931,20 +6045,20 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, switch (GET_Fx()) { case 0: // LDC Rm,SR 0100mmmm00001110 - tmp = SHR_SR; + tmp = BITMASK2(SHR_SR, SHR_T); break; case 1: // LDC Rm,GBR 0100mmmm00011110 - tmp = SHR_GBR; + tmp = BITMASK1(SHR_GBR); break; case 2: // LDC Rm,VBR 0100mmmm00101110 - tmp = SHR_VBR; + tmp = BITMASK1(SHR_VBR); break; default: goto undefined; } opd->op = OP_LDC; opd->source = BITMASK1(GET_Rn()); - opd->dest = BITMASK1(tmp); + opd->dest = tmp; break; case 0x0f: // MAC.W @Rm+,@Rn+ 0100nnnnmmmm1111 @@ -6130,7 +6244,7 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, break; case 0x0300: // TRAPA #imm 11000011iiiiiiii opd->op = OP_TRAPA; - opd->source = BITMASK3(SHR_SP, SHR_PC, SHR_SR); + opd->source = BITMASK4(SHR_SP, SHR_PC, SHR_SR, SHR_T); opd->dest = BITMASK2(SHR_SP, SHR_PC); opd->imm = (op & 0xff); opd->cycles = 8; @@ -6256,9 +6370,6 @@ end: last_btarget = 0; op = 0; // delay/poll insns counter for (i = 0, pc = base_pc; i < i_end; i++, pc += 2) { - int null; - if ((op_flags[i] & OF_BTARGET) && dr_get_entry(pc, is_slave, &null)) - break; // branch target already compiled opd = &ops[i]; crc += FETCH_OP(pc); diff --git a/pico/32x/32x.c b/pico/32x/32x.c index 9993bfa8..7e2e039e 100644 --- a/pico/32x/32x.c +++ b/pico/32x/32x.c @@ -38,17 +38,19 @@ void p32x_update_irls(SH2 *active_sh2, unsigned int m68k_cycles) if (active_sh2 != NULL) m68k_cycles = sh2_cycles_done_m68k(active_sh2); + // find top bit = highest irq number (0 <= irl <= 14/2) by binary search + // msh2 irqs = Pico32x.sh2irqs | Pico32x.sh2irqi[0]; - while ((irqs >>= 1)) - mlvl++; - mlvl *= 2; + if (irqs >= 0x10) mlvl += 8, irqs >>= 4; + if (irqs >= 0x04) mlvl += 4, irqs >>= 2; + if (irqs >= 0x02) mlvl += 2, irqs >>= 1; // ssh2 irqs = Pico32x.sh2irqs | Pico32x.sh2irqi[1]; - while ((irqs >>= 1)) - slvl++; - slvl *= 2; + if (irqs >= 0x10) slvl += 8, irqs >>= 4; + if (irqs >= 0x04) slvl += 4, irqs >>= 2; + if (irqs >= 0x02) slvl += 2, irqs >>= 1; mrun = sh2_irl_irq(&msh2, mlvl, msh2.state & SH2_STATE_RUN); if (mrun) { From e7ee50107538e504f6ece2ccffdc63b98fbd2ba7 Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 13 Nov 2019 21:05:35 +0100 Subject: [PATCH 0237/1110] sh2 drc: RISC-V (RV64IM) code emitter, some work on MIPS64 --- Makefile | 6 +- config.aarch64 | 14 -- config.caanoo | 16 -- config.caanoo47 | 16 -- config.dingux | 15 -- config.dingux54 | 15 -- config.gcw0 | 15 -- config.gp2x | 16 -- config.gp2x47 | 16 -- config.i386 | 14 -- config.x86 | 14 -- cpu/drc/emit_arm.c | 145 ++++++++-------- cpu/drc/emit_arm64.c | 4 +- cpu/drc/emit_mips.c | 174 ++++++++++++++------ cpu/drc/emit_x86.c | 12 +- cpu/sh2/compiler.c | 337 ++++++++++++++++++++------------------ cpu/sh2/compiler.h | 2 + pico/32x/32x.c | 2 +- platform/common/dismips.c | 2 +- platform/linux/emu.c | 2 +- tools/mkoffsets.sh | 2 +- 21 files changed, 395 insertions(+), 444 deletions(-) delete mode 100644 config.aarch64 delete mode 100644 config.caanoo delete mode 100644 config.caanoo47 delete mode 100644 config.dingux delete mode 100644 config.dingux54 delete mode 100644 config.gcw0 delete mode 100644 config.gp2x delete mode 100644 config.gp2x47 delete mode 100644 config.i386 delete mode 100644 config.x86 diff --git a/Makefile b/Makefile index a79c054b..0a0ab127 100644 --- a/Makefile +++ b/Makefile @@ -68,6 +68,10 @@ else ifneq (,$(findstring aarch64,$(ARCH))) use_fame ?= 1 use_cz80 ?= 1 use_sh2drc ?= 1 +else ifneq (,$(findstring riscv,$(ARCH))) +use_fame ?= 1 +use_cz80 ?= 1 +use_sh2drc ?= 1 endif -include Makefile.local @@ -266,7 +270,7 @@ pico/carthw_cfg.c: pico/carthw.cfg # random deps pico/carthw/svp/compiler.o : cpu/drc/emit_arm.c cpu/sh2/compiler.o : cpu/drc/emit_arm.c cpu/drc/emit_arm64.c -cpu/sh2/compiler.o : cpu/drc/emit_x86.c cpu/drc/emit_mips.c +cpu/sh2/compiler.o : cpu/drc/emit_x86.c cpu/drc/emit_mips.c cpu/drc/emit_riscv.c cpu/sh2/mame/sh2pico.o : cpu/sh2/mame/sh2.c pico/pico.o pico/cd/mcd.o pico/32x/32x.o : pico/pico_cmn.c pico/pico_int.h pico/memory.o pico/cd/memory.o pico/32x/memory.o : pico/pico_int.h pico/memory.h diff --git a/config.aarch64 b/config.aarch64 deleted file mode 100644 index 9631d64e..00000000 --- a/config.aarch64 +++ /dev/null @@ -1,14 +0,0 @@ -# Automatically generated by configure -# Configured with: './configure' '--platform=generic' -CC = aarch64-linux-gnu-gcc -CXX = aarch64-linux-gnu-g++ -AS = aarch64-linux-gnu-as -STRIP = aarch64-linux-gnu-strip -CFLAGS += -I/usr/include/SDL -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -ASFLAGS += -LDFLAGS += # --sysroot ${HOME}/opt/aarch64/debian-arm64 -LDLIBS += -lSDL -lasound -lpng -lz -lm -ldl - -ARCH = aarch64 -PLATFORM = generic -SOUND_DRIVERS = oss alsa sdl diff --git a/config.caanoo b/config.caanoo deleted file mode 100644 index 1ffc54da..00000000 --- a/config.caanoo +++ /dev/null @@ -1,16 +0,0 @@ -# Automatically generated by configure -# Configured with: './configure' '--platform=gp2x' -CC = arm-gph-linux-gnueabi-gcc -CXX = arm-gph-linux-gnueabi-g++ -AS = arm-gph-linux-gnueabi-as -STRIP = arm-gph-linux-gnueabi-strip -CFLAGS += -I${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/include -I${HOME}/src/gp2x/armroot-eabi/include -D__GP2X__ -Wno-unused-result -CFLAGS += -mfloat-abi=soft -mcpu=arm920t -mtune=arm920t -mno-thumb-interwork -fno-stack-protector -fno-common -CFLAGS += -finline-limit=42 -fipa-pta -fno-ipa-pure-const -ASFLAGS += -mfloat-abi=soft -mcpu=arm920t -LDFLAGS += --sysroot ${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root -L${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L${HOME}/src/gp2x/armroot-eabi/lib -static -LDLIBS += -lpng -lm -ldl - -ARCH = arm -PLATFORM = gp2x -SOUND_DRIVERS = oss diff --git a/config.caanoo47 b/config.caanoo47 deleted file mode 100644 index 5bcf8608..00000000 --- a/config.caanoo47 +++ /dev/null @@ -1,16 +0,0 @@ -# Automatically generated by configure -# Configured with: './configure' '--platform=gp2x' -CC = arm-linux-gnueabi-gcc -CXX = arm-linux-gnueabi-g++ -AS = arm-linux-gnueabi-as -STRIP = arm-linux-gnueabi-strip -CFLAGS += -I${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/include -I${HOME}/src/gp2x/armroot-eabi/include -D__GP2X__ -Wno-unused-result -CFLAGS += -mfloat-abi=soft -mcpu=arm920t -mtune=arm920t -mno-thumb-interwork -fno-stack-protector -fno-common -CFLAGS += -finline-limit=42 -fipa-pta -fno-ipa-sra -fno-ipa-pure-const -ASFLAGS += -mfloat-abi=soft -mcpu=arm920t -LDFLAGS += -B${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/lib/gcc/arm-gph-linux-gnueabi/4.2.4 -B${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L${HOME}/src/gp2x/armroot-eabi/lib -static -LDLIBS += -lpng -lm -ldl - -ARCH = arm -PLATFORM = gp2x -SOUND_DRIVERS = oss diff --git a/config.dingux b/config.dingux deleted file mode 100644 index b981bd3f..00000000 --- a/config.dingux +++ /dev/null @@ -1,15 +0,0 @@ -# Automatically generated by configure -# Configured with: './configure' '--platform=opendingux' -CC = mipsel-linux-gcc -CXX = mipsel-linux-g++ -AS = mipsel-linux-as -STRIP = mipsel-linux-strip -CFLAGS += -I${HOME}/opt/opendingux-toolchain/usr/include/ -I${HOME}/opt/opendingux-toolchain/usr/include/SDL -CFLAGS += -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -fno-stack-protector -ASFLAGS += -LDFLAGS += --sysroot ${HOME}/opt/opendingux-toolchain -L${HOME}/opt/opendingux-toolchain/lib -LDLIBS += -lSDL -lasound -lpng -lz -lm -ldl - -ARCH = mipsel -PLATFORM = opendingux -SOUND_DRIVERS = sdl diff --git a/config.dingux54 b/config.dingux54 deleted file mode 100644 index a232d952..00000000 --- a/config.dingux54 +++ /dev/null @@ -1,15 +0,0 @@ -# Automatically generated by configure -# Configured with: './configure' '--platform=opendingux' -CC = mipsel-linux-gnu-gcc -CXX = mipsel-linux-gnu-g++ -AS = mipsel-linux-gnu-as -STRIP = mipsel-linux-gnu-strip -CFLAGS += -I${HOME}/opt/opendingux-toolchain/usr/include/ -I${HOME}/opt/opendingux-toolchain/usr/include/SDL -CFLAGS += -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -fno-stack-protector -ASFLAGS += -LDFLAGS += -B${HOME}/opt/opendingux-toolchain/usr/lib -B${HOME}/opt/opendingux-toolchain/lib -Wl,-rpath-link=${HOME}/opt/opendingux-toolchain/usr/lib -Wl,-rpath-link=${HOME}/opt/opendingux-toolchain/lib -LDLIBS += -lSDL -lasound -lpng -lz -lm -ldl - -ARCH = mipsel -PLATFORM = opendingux -SOUND_DRIVERS = sdl diff --git a/config.gcw0 b/config.gcw0 deleted file mode 100644 index cebe79a1..00000000 --- a/config.gcw0 +++ /dev/null @@ -1,15 +0,0 @@ -# Automatically generated by configure -# Configured with: './configure' '--platform=gcw0' -CC = mipsel-gcw0-linux-uclibc-gcc -CXX = mipsel-gcw0-linux-uclibc-g++ -AS = mipsel-gcw0-linux-uclibc-as -STRIP = mipsel-gcw0-linux-uclibc-strip -CFLAGS += -I${HOME}/opt/gcw0-toolchain/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include/ -I${HOME}/opt/gcw0-toolchain/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include/SDL -CFLAGS += -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -fno-stack-protector -ASFLAGS += -LDFLAGS += --sysroot ${HOME}/opt/gcw0-toolchain/usr/mipsel-gcw0-linux-uclibc/sysroot -LDLIBS += -lSDL -lasound -lpng -lz -lm -ldl - -ARCH = mipsel -PLATFORM = opendingux -SOUND_DRIVERS = sdl diff --git a/config.gp2x b/config.gp2x deleted file mode 100644 index cf99bd77..00000000 --- a/config.gp2x +++ /dev/null @@ -1,16 +0,0 @@ -# Automatically generated by configure -# Configured with: './configure' '--platform=gp2x' -CC = arm-open2x-linux-gcc -CXX = arm-open2x-linux-g++ -AS = arm-open2x-linux-as -STRIP = arm-open2x-linux-strip -CFLAGS += -I${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -I${HOME}/src/gp2x/armroot/include -D__GP2X__ -CFLAGS += -mfloat-abi=soft -mcpu=arm920t -mtune=arm920t -mno-thumb-interwork -fno-stack-protector -fno-common -CFLAGS += -finline-limit=42 -fipa-cp -fno-ipa-pure-const -ASFLAGS += -mcpu=arm920t -mfloat-abi=soft -LDFLAGS += --sysroot ${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux -L${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L${HOME}/src/gp2x/armroot/lib -static -LDLIBS += -lpng -lm -ldl - -ARCH = arm -PLATFORM = gp2x -SOUND_DRIVERS = oss diff --git a/config.gp2x47 b/config.gp2x47 deleted file mode 100644 index 8a86e850..00000000 --- a/config.gp2x47 +++ /dev/null @@ -1,16 +0,0 @@ -# Automatically generated by configure -# Configured with: './configure' '--platform=gp2x' -CC = arm-linux-gnueabi-gcc -CXX = arm-linux-gnueabi-g++ -AS = arm-linux-gnueabi-as -STRIP = arm-linux-gnueabi-strip -CFLAGS += -I${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -I${HOME}/src/gp2x/armroot/include -D__GP2X__ -Wno-unused-result -CFLAGS += -mabi=apcs-gnu -mfloat-abi=soft -mfpu=fpa -mcpu=arm920t -mtune=arm920t -mno-thumb-interwork -fno-stack-protector -fno-common -CFLAGS += -finline-limit=42 -fipa-pta -fno-ipa-sra -fno-ipa-pure-const -ASFLAGS += -mabi=apcs-gnu -mfloat-abi=soft -mfpu=fpa -mcpu=arm920t -LDFLAGS += -mabi=apcs-gnu -mfpu=fpa -B${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1 -B${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/usr/lib -L${HOME}/src/gp2x/armroot/lib -static -LDLIBS += -lpng -lm -ldl - -ARCH = arm -PLATFORM = gp2x -SOUND_DRIVERS = oss diff --git a/config.i386 b/config.i386 deleted file mode 100644 index 9c8c2e65..00000000 --- a/config.i386 +++ /dev/null @@ -1,14 +0,0 @@ -# Automatically generated by configure -# Configured with: './configure' '--platform=generic' -CC = gcc -CXX = g++ -AS = as -STRIP = strip -CFLAGS += -I/usr/include/SDL -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -m32 -ASFLAGS += -LDFLAGS += -m32 -L/usr/lib/i386-linux-gnu -L${HOME}/opt/lib32 -LDLIBS += -lSDL-1.2 -lasound -lpng -lz -lm -ldl - -ARCH = i386 -PLATFORM = generic -SOUND_DRIVERS = oss alsa sdl diff --git a/config.x86 b/config.x86 deleted file mode 100644 index 45440011..00000000 --- a/config.x86 +++ /dev/null @@ -1,14 +0,0 @@ -# Automatically generated by configure -# Configured with: './configure' '--platform=generic' -CC = gcc -CXX = g++ -AS = as -STRIP = strip -CFLAGS += -I/usr/include/SDL -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -ASFLAGS += -LDFLAGS += -L/usr/lib/x86_64-linux-gnu -LDLIBS += -lSDL-1.2 -lasound -lpng -lz -lm -ldl - -ARCH = x86_64 -PLATFORM = generic -SOUND_DRIVERS = oss alsa sdl diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index 25a2c72f..8f633fa3 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -365,7 +365,7 @@ static void emith_flush(void) #define EOP_LDR_REG_LSL(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,0,1,rn,rd,shift_imm,A_AM1_LSL,rm) #define EOP_LDR_REG_LSL_WB(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,0,3,rn,rd,shift_imm,A_AM1_LSL,rm) -#define EOP_LDRB_REG_LSL(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,1,1,rn,rd,shift_imm,A_AM1_LSL,rm); +#define EOP_LDRB_REG_LSL(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,1,1,rn,rd,shift_imm,A_AM1_LSL,rm) #define EOP_STR_REG_LSL_WB(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,0,2,rn,rd,shift_imm,A_AM1_LSL,rm) #define EOP_LDRH_IMM2(cond,rd,rn,offset_8) EOP_C_AM3_IMM(cond,(offset_8) >= 0,1,rn,rd,0,1,abs(offset_8)) @@ -470,84 +470,89 @@ static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int if (cond == A_COND_NV) return; - switch (op) { - case A_OP_MOV: - rn = 0; - // count bits in imm and use MVN if more bits 1 than 0 - if (count_bits(imm) > 16) { - imm = ~imm; - op = A_OP_MVN; - } - // count insns needed for mov/orr #imm + do { + u32 u; + // try to get the topmost byte empty to possibly save an insn for (v = imm, ror2 = 0; (v >> 24) && ror2 < 32/2; ror2++) v = (v << 2) | (v >> 30); -#ifdef HAVE_ARMV7 - for (i = 2; i > 0; i--, v >>= 8) - while (v > 0xff && !(v & 3)) - v >>= 2; - if (v) { // 3+ insns needed... - if (op == A_OP_MVN) + + switch (op) { + case A_OP_MOV: + rn = 0; + // use MVN if more bits 1 than 0 + if (count_bits(imm) > 16) { imm = ~imm; - // ...prefer movw/movt - EOP_MOVW(rd, imm); - if (imm & 0xffff0000) - EOP_MOVT(rd, imm); - return; - } -#else - for (i = 3; i > 0; i--, v >>= 8) - while (v > 0xff && !(v & 3)) - v >>= 2; - if (v) { // 4 insns needed... - if (op == A_OP_MVN) - imm = ~imm; - // ...emit literal load - int idx, o; - if (literal_iindex >= MAX_HOST_LITERALS) { - elprintf(EL_STATUS|EL_SVP|EL_ANOMALY, - "pool overflow"); - exit(1); + op = A_OP_MVN; + ror2 = -1; + break; } - idx = emith_pool_literal(imm, &o); - literal_insn[literal_iindex++] = (u32 *)tcache_ptr; - EOP_LDR_IMM2(cond, rd, PC, idx * sizeof(u32)); - if (o > 0) - EOP_C_DOP_IMM(cond, A_OP_ADD, 0, rd, rd, 0, o); - else if (o < 0) - EOP_C_DOP_IMM(cond, A_OP_SUB, 0, rd, rd, 0, -o); + // count insns needed for mov/orr #imm +#ifdef HAVE_ARMV7 + for (i = 2, u = v; i > 0; i--, u >>= 8) + while (u > 0xff && !(u & 3)) + u >>= 2; + if (u) { // 3+ insns needed... + if (op == A_OP_MVN) + imm = ~imm; + // ...prefer movw/movt + EOP_MOVW(rd, imm); + if (imm & 0xffff0000) + EOP_MOVT(rd, imm); + return; + } +#else + for (i = 2, u = v; i > 0; i--, u >>= 8) + while (u > 0xff && !(u & 3)) + u >>= 2; + if (u) { // 4 insns needed... + if (op == A_OP_MVN) + imm = ~imm; + // ...emit literal load + int idx, o; + if (literal_iindex >= MAX_HOST_LITERALS) { + elprintf(EL_STATUS|EL_SVP|EL_ANOMALY, + "pool overflow"); + exit(1); + } + idx = emith_pool_literal(imm, &o); + literal_insn[literal_iindex++] = (u32 *)tcache_ptr; + EOP_LDR_IMM2(cond, rd, PC, idx * sizeof(u32)); + if (o > 0) + EOP_C_DOP_IMM(cond, A_OP_ADD, 0,rd,rd,0,o); + else if (o < 0) + EOP_C_DOP_IMM(cond, A_OP_SUB, 0,rd,rd,0,-o); return; - } + } #endif - break; + break; - case A_OP_AND: - // AND must fit into 1 insn. if not, use BIC - for (v = imm, ror2 = 0; (v >> 8) && ror2 < 32/2; ror2++) - v = (v << 2) | (v >> 30); - if (v >> 8) { - imm = ~imm; - op = A_OP_BIC; + case A_OP_AND: + // AND must fit into 1 insn. if not, use BIC + for (u = v; u > 0xff && !(u & 3); u >>= 2) ; + if (u >> 8) { + imm = ~imm; + op = A_OP_BIC; + ror2 = -1; + } + break; + + case A_OP_SUB: + case A_OP_ADD: + // swap ADD and SUB if more bits 1 than 0 + if (s == 0 && count_bits(imm) > 16) { + imm = -imm; + op ^= (A_OP_ADD^A_OP_SUB); + ror2 = -1; + } + case A_OP_EOR: + case A_OP_ORR: + case A_OP_BIC: + if (s == 0 && imm == 0 && rd == rn) + return; + break; } - break; + } while (ror2 < 0); - case A_OP_SUB: - case A_OP_ADD: - // count bits in imm and swap ADD and SUB if more bits 1 than 0 - if (s == 0 && count_bits(imm) > 16) { - imm = -imm; - op ^= (A_OP_ADD^A_OP_SUB); - } - case A_OP_EOR: - case A_OP_ORR: - case A_OP_BIC: - if (s == 0 && imm == 0 && rd == rn) - return; - break; - } - - // try to get the topmost byte empty to possibly save an insn - for (v = imm, ror2 = 0; (v >> 24) && ror2 < 32/2; ror2++) - v = (v << 2) | (v >> 30); do { // shift down to get 'best' rot2 while (v > 0xff && !(v & 3)) diff --git a/cpu/drc/emit_arm64.c b/cpu/drc/emit_arm64.c index dc0cf559..3f40d4cd 100644 --- a/cpu/drc/emit_arm64.c +++ b/cpu/drc/emit_arm64.c @@ -134,9 +134,9 @@ enum { XT_UXTW=0x4, XT_UXTX=0x6, XT_LSL=0x7, XT_SXTW=0xc, XT_SXTX=0xe }; #define A64_TST_REG(rn, rm, stype, simm) \ A64_ANDS_REG(Z0, rn, rm, stype, simm) #define A64_MOV_REG(rd, rm, stype, simm) \ - A64_OR_REG(rd, Z0, rm, stype, simm); + A64_OR_REG(rd, Z0, rm, stype, simm) #define A64_MVN_REG(rd, rm, stype, simm) \ - A64_ORN_REG(rd, Z0, rm, stype, simm); + A64_ORN_REG(rd, Z0, rm, stype, simm) // rd = rn OP (rm EXTEND simm) #define A64_ADD_XREG(rd, rn, rm, xtopt, simm) \ diff --git a/cpu/drc/emit_mips.c b/cpu/drc/emit_mips.c index 82527474..6f07e509 100644 --- a/cpu/drc/emit_mips.c +++ b/cpu/drc/emit_mips.c @@ -62,14 +62,17 @@ // opcode field (encoded in op) enum { OP__FN=000, OP__RT, OP_J, OP_JAL, OP_BEQ, OP_BNE, OP_BLEZ, OP_BGTZ }; enum { OP_ADDI=010, OP_ADDIU, OP_SLTI, OP_SLTIU, OP_ANDI, OP_ORI, OP_XORI, OP_LUI }; -enum { OP_LB=040, OP_LH, OP_LWL, OP_LW, OP_LBU, OP_LHU, OP_LWR }; -enum { OP_SB=050, OP_SH, OP_SWL, OP_SW, __(54), __(55), OP_SWR }; +enum { OP_LB=040, OP_LH, OP_LWL, OP_LW, OP_LBU, OP_LHU, OP_LWR, OP_LWU }; +enum { OP_SB=050, OP_SH, OP_SWL, OP_SW, OP_SDL, OP_SDR, OP_SWR }; +enum { OP_DADDI=030, OP_DADDIU, OP_LDL, OP_LDR, OP_SD=067, OP_LD=077 }; // function field (encoded in fn if opcode = OP__FN) enum { FN_SLL=000, __(01), FN_SRL, FN_SRA, FN_SLLV, __(05), FN_SRLV, FN_SRAV }; -enum { FN_MFHI=020, FN_MTHI, FN_MFLO, FN_MTLO }; -enum { FN_MULT=030, FN_MULTU, FN_DIV, FN_DIVU }; +enum { FN_JR=010, FN_JALR, FN_MOVZ, FN_MOVN, FN_SYNC=017 }; +enum { FN_MFHI=020, FN_MTHI, FN_MFLO, FN_MTLO, FN_DSSLV, __(25), FN_DSLRV, FN_DSRAV }; +enum { FN_MULT=030, FN_MULTU, FN_DIV, FN_DIVU, FN_DMULT, FN_DMULTU, FN_DDIV, FN_DDIVU }; enum { FN_ADD=040, FN_ADDU, FN_SUB, FN_SUBU, FN_AND, FN_OR, FN_XOR, FN_NOR }; -enum { FN_JR=010, FN_JALR, FN_MOVZ, FN_MOVN, FN_SYNC=017, FN_SLT=052, FN_SLTU }; +enum { FN_SLT=052, FN_SLTU, FN_DADD, FN_DADDU, FN_DSUB, FN_DSUBU }; +enum { FN_DSLL=070, __(71), FN_DSRL, FN_DSRA, FN_DSLL32, __(75), FN_DSRL32, FN_DSRA32 }; // rt field (encoded in rt if opcode = OP__RT) enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 }; @@ -85,8 +88,12 @@ enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 }; // rd = rs OP rt #define MIPS_ADD_REG(rd, rs, rt) \ MIPS_OP_REG(FN_ADDU, rd, rs, rt) +#define MIPS_DADD_REG(rd, rs, rt) \ + MIPS_OP_REG(FN_DADDU, rd, rs, rt) #define MIPS_SUB_REG(rd, rs, rt) \ MIPS_OP_REG(FN_SUBU, rd, rs, rt) +#define MIPS_DSUB_REG(rd, rs, rt) \ + MIPS_OP_REG(FN_DSUBU, rd, rs, rt) #define MIPS_NEG_REG(rd, rt) \ MIPS_SUB_REG(rd, Z0, rt) @@ -122,6 +129,8 @@ enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 }; // rt = rs OP imm16 #define MIPS_ADD_IMM(rt, rs, imm16) \ MIPS_OP_IMM(OP_ADDIU, rt, rs, imm16) +#define MIPS_DADD_IMM(rt, rs, imm16) \ + MIPS_OP_IMM(OP_DADDIU, rt, rs, imm16) #define MIPS_XOR_IMM(rt, rs, imm16) \ MIPS_OP_IMM(OP_XORI, rt, rs, imm16) @@ -144,6 +153,11 @@ enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 }; #define MIPS_ASR_IMM(rd, rt, bits) \ MIPS_INSN(OP__FN, _, rt, rd, bits, FN_SRA) +#define MIPS_DLSL_IMM(rd, rt, bits) \ + MIPS_INSN(OP__FN, _, rt, rd, bits, FN_DSLL) +#define MIPS_DLSL32_IMM(rd, rt, bits) \ + MIPS_INSN(OP__FN, _, rt, rd, bits, FN_DSLL32) + // rt = (rs < imm16) #define MIPS_SLT_IMM(rt, rs, imm16) \ MIPS_OP_IMM(OP_SLTI, rt, rs, imm16) @@ -193,23 +207,45 @@ enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 }; // load/store indexed base +#define MIPS_LD(rt, rs, offs16) \ + MIPS_OP_IMM(OP_LD, rt, rs, (u16)(offs16)) #define MIPS_LW(rt, rs, offs16) \ - MIPS_INSN(OP_LW, rs, rt, _,_, (u16)(offs16)) + MIPS_OP_IMM(OP_LW, rt, rs, (u16)(offs16)) #define MIPS_LH(rt, rs, offs16) \ - MIPS_INSN(OP_LH, rs, rt, _,_, (u16)(offs16)) + MIPS_OP_IMM(OP_LH, rt, rs, (u16)(offs16)) #define MIPS_LB(rt, rs, offs16) \ - MIPS_INSN(OP_LB, rs, rt, _,_, (u16)(offs16)) + MIPS_OP_IMM(OP_LB, rt, rs, (u16)(offs16)) #define MIPS_LHU(rt, rs, offs16) \ - MIPS_INSN(OP_LHU, rs, rt, _,_, (u16)(offs16)) + MIPS_OP_IMM(OP_LHU, rt, rs, (u16)(offs16)) #define MIPS_LBU(rt, rs, offs16) \ - MIPS_INSN(OP_LBU, rs, rt, _,_, (u16)(offs16)) + MIPS_OP_IMM(OP_LBU, rt, rs, (u16)(offs16)) +#define MIPS_SD(rt, rs, offs16) \ + MIPS_OP_IMM(OP_SD, rt, rs, (u16)(offs16)) #define MIPS_SW(rt, rs, offs16) \ - MIPS_INSN(OP_SW, rs, rt, _,_, (u16)(offs16)) + MIPS_OP_IMM(OP_SW, rt, rs, (u16)(offs16)) #define MIPS_SH(rt, rs, offs16) \ - MIPS_INSN(OP_SH, rs, rt, _,_, (u16)(offs16)) + MIPS_OP_IMM(OP_SH, rt, rs, (u16)(offs16)) #define MIPS_SB(rt, rs, offs16) \ - MIPS_INSN(OP_SB, rs, rt, _,_, (u16)(offs16)) + MIPS_OP_IMM(OP_SB, rt, rs, (u16)(offs16)) + +// pointer operations + +#if __mips == 4 || __mips == 64 +#define OP_LP OP_LD +#define OP_SP OP_SD +#define OP_PADDIU OP_DADDIU +#define FN_PADDU FN_DADDU +#define FN_PSUBU FN_DSUBU +#define PTR_SCALE 3 +#else +#define OP_LP OP_LW +#define OP_SP OP_SW +#define OP_PADDIU OP_ADDIU +#define FN_PADDU FN_ADDU +#define FN_PSUBU FN_SUBU +#define PTR_SCALE 2 +#endif // XXX: tcache_ptr type for SVP and SH2 compilers differs.. #define EMIT_PTR(ptr, x) \ @@ -442,14 +478,14 @@ static void emith_set_arith_flags(int rd, int rs, int rt, s32 imm, int sub) if (emith_flg_hint & _FHV) { emith_flg_noV = 0; - if (rt >= 0) // Nt^Ns in FV, bit 31 + if (rt > Z0) // Nt^Ns in FV, bit 31 EMIT(MIPS_XOR_REG(FV, rs, rt)); - else if (imm == 0) + else if (rt == Z0 || imm == 0) emith_flg_noV = 1; // imm #0 can't overflow else if ((imm < 0) == !sub) EMIT(MIPS_NOR_REG(FV, rs, Z0)); else if ((imm > 0) == !sub) - EMIT(MIPS_OR_REG(FV, rs, Z0)); + EMIT(MIPS_XOR_REG(FV, rs, Z0)); } // full V = Nd^Nt^Ns^C calculation is deferred until really needed @@ -483,13 +519,17 @@ static void emith_set_compare_flags(int rs, int rt, s32 imm) EMIT(MIPS_MVN_REG(d, s)) #define emith_add_r_r_r_lsl_ptr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ + EMIT(MIPS_OP_REG(FN_PADDU, d, s1, AT)); \ + } else EMIT(MIPS_OP_REG(FN_PADDU, d, s1, s2)); \ +} while (0) +#define emith_add_r_r_r_lsl(d, s1, s2, simm) do { \ if (simm) { \ EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ EMIT(MIPS_ADD_REG(d, s1, AT)); \ } else EMIT(MIPS_ADD_REG(d, s1, s2)); \ } while (0) -#define emith_add_r_r_r_lsl(d, s1, s2, simm) \ - emith_add_r_r_r_lsl_ptr(d, s1, s2, simm) #define emith_add_r_r_r_lsr(d, s1, s2, simm) do { \ if (simm) { \ @@ -498,6 +538,16 @@ static void emith_set_compare_flags(int rs, int rt, s32 imm) } else EMIT(MIPS_ADD_REG(d, s1, s2)); \ } while (0) +#define emith_addf_r_r_r_lsl_ptr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ + EMIT(MIPS_OP_REG(FN_PADDU, FNZ, s1, AT)); \ + emith_set_arith_flags(d, s1, AT, 0, 0); \ + } else { \ + EMIT(MIPS_OP_REG(FN_PADDU, FNZ, s1, s2)); \ + emith_set_arith_flags(d, s1, s2, 0, 0); \ + } \ +} while (0) #define emith_addf_r_r_r_lsl(d, s1, s2, simm) do { \ if (simm) { \ EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ @@ -586,6 +636,8 @@ static void emith_set_compare_flags(int rs, int rt, s32 imm) #define emith_add_r_r_r(d, s1, s2) \ emith_add_r_r_r_lsl(d, s1, s2, 0) +#define emith_addf_r_r_r_ptr(d, s1, s2) \ + emith_addf_r_r_r_lsl_ptr(d, s1, s2, 0) #define emith_addf_r_r_r(d, s1, s2) \ emith_addf_r_r_r_lsl(d, s1, s2, 0) @@ -697,14 +749,26 @@ static void emith_set_compare_flags(int rs, int rt, s32 imm) // move immediate static void emith_move_imm(int r, uintptr_t imm) { - if ((s16)imm == imm) { +#if __mips == 4 || __mips == 64 + if ((s32)imm != imm) { + emith_move_imm(r, imm >> 32); + if (imm & 0xffff0000) { + EMIT(MIPS_DLSL_IMM(r, r, 16)); + EMIT(MIPS_OR_IMM(r, r, (imm >> 16) & 0xffff)); + EMIT(MIPS_DLSL_IMM(r, r, 16)); + } else EMIT(MIPS_DLSL32_IMM(r, r, 0)); + if (imm & 0x0000ffff) + EMIT(MIPS_OR_IMM(r, r, imm & 0xffff)); + } else +#endif + if ((s16)imm == imm) { EMIT(MIPS_ADD_IMM(r, Z0, imm)); - } else if (!(imm >> 16)) { + } else if (!((u32)imm >> 16)) { EMIT(MIPS_OR_IMM(r, Z0, imm)); } else { int s = Z0; - if (imm >> 16) { - EMIT(MIPS_MOVT_IMM(r, imm >> 16)); + if ((u32)imm >> 16) { + EMIT(MIPS_MOVT_IMM(r, (u32)imm >> 16)); s = r; } if ((u16)imm) @@ -729,17 +793,17 @@ static void emith_move_imm(int r, uintptr_t imm) } while (0) // arithmetic, immediate - can only be ADDI[U], since SUBI[U] doesn't exist -static void emith_arith_imm(int op, int rd, int rs, u32 imm) +static void emith_add_imm(int ptr, int rd, int rs, u32 imm) { if ((s16)imm == imm) { if (imm || rd != rs) - EMIT(MIPS_OP_IMM(op, rd, rs, imm)); + EMIT(MIPS_OP_IMM(ptr ? OP_PADDIU:OP_ADDIU, rd,rs,imm)); } else if ((s32)imm < 0) { emith_move_r_imm(AT, -imm); - EMIT(MIPS_OP_REG(FN_SUB + (op-OP_ADDI), rd, rs, AT)); + EMIT(MIPS_OP_REG((ptr ? FN_PSUBU:FN_SUBU), rd,rs,AT)); } else { emith_move_r_imm(AT, imm); - EMIT(MIPS_OP_REG(FN_ADD + (op-OP_ADDI), rd, rs, AT)); + EMIT(MIPS_OP_REG((ptr ? FN_PADDU:FN_ADDU), rd,rs,AT)); } } @@ -760,7 +824,7 @@ static void emith_arith_imm(int op, int rd, int rs, u32 imm) emith_subf_r_r_imm(r, r, imm) #define emith_adc_r_imm(r, imm) \ - emith_adc_r_r_imm(r, r, imm); + emith_adc_r_r_imm(r, r, imm) #define emith_adcf_r_imm(r, imm) \ emith_adcf_r_r_imm(r, r, imm) @@ -770,10 +834,10 @@ static void emith_arith_imm(int op, int rd, int rs, u32 imm) // emith_subf_r_r_imm(FNZ, r, (s16)imm) #define emith_add_r_r_ptr_imm(d, s, imm) \ - emith_arith_imm(OP_ADDIU, d, s, imm) + emith_add_imm(1, d, s, imm) #define emith_add_r_r_imm(d, s, imm) \ - emith_add_r_r_ptr_imm(d, s, imm) + emith_add_imm(0, d, s, imm) #define emith_addf_r_r_imm(d, s, imm) do { \ emith_add_r_r_imm(FNZ, s, imm); \ @@ -1043,22 +1107,24 @@ static void emith_lohi_nops(void) // load/store. offs has 16 bits signed, which is currently sufficient #define emith_read_r_r_offs_ptr(r, rs, offs) \ - EMIT(MIPS_LW(r, rs, offs)) + EMIT(MIPS_OP_IMM(OP_LP, r, rs, offs)) #define emith_read_r_r_offs_ptr_c(cond, r, rs, offs) \ emith_read_r_r_offs_ptr(r, rs, offs) #define emith_read_r_r_offs(r, rs, offs) \ - emith_read_r_r_offs_ptr(r, rs, offs) + EMIT(MIPS_LW(r, rs, offs)) #define emith_read_r_r_offs_c(cond, r, rs, offs) \ emith_read_r_r_offs(r, rs, offs) #define emith_read_r_r_r_ptr(r, rs, rm) do { \ emith_add_r_r_r(AT, rs, rm); \ - EMIT(MIPS_LW(r, AT, 0)); \ + EMIT(MIPS_OP_IMM(OP_LP, r, AT, 0)); \ } while (0) -#define emith_read_r_r_r(r, rs, rm) \ - emith_read_r_r_r_ptr(r, rs, rm) +#define emith_read_r_r_r(r, rs, rm) do { \ + emith_add_r_r_r(AT, rs, rm); \ + EMIT(MIPS_LW(r, AT, 0)); \ +} while (0) #define emith_read_r_r_r_c(cond, r, rs, rm) \ emith_read_r_r_r(r, rs, rm) @@ -1112,24 +1178,26 @@ static void emith_lohi_nops(void) #define emith_write_r_r_offs_ptr(r, rs, offs) \ - EMIT(MIPS_SW(r, rs, offs)) + EMIT(MIPS_OP_IMM(OP_SP, r, rs, offs)) #define emith_write_r_r_offs_ptr_c(cond, r, rs, offs) \ emith_write_r_r_offs_ptr(r, rs, offs) #define emith_write_r_r_r_ptr(r, rs, rm) do { \ emith_add_r_r_r(AT, rs, rm); \ - EMIT(MIPS_SW(r, AT, 0)); \ + EMIT(MIPS_OP_IMM(OP_SP, r, AT, 0)); \ } while (0) #define emith_write_r_r_r_ptr_c(cond, r, rs, rm) \ emith_write_r_r_r_ptr(r, rs, rm) #define emith_write_r_r_offs(r, rs, offs) \ - emith_write_r_r_offs_ptr(r, rs, offs) + EMIT(MIPS_SW(r, rs, offs)) #define emith_write_r_r_offs_c(cond, r, rs, offs) \ emith_write_r_r_offs(r, rs, offs) -#define emith_write_r_r_r(r, rs, rm) \ - emith_write_r_r_r_ptr(r, rs, rm) +#define emith_write_r_r_r(r, rs, rm) do { \ + emith_add_r_r_r(AT, rs, rm); \ + EMIT(MIPS_SW(r, AT, 0)); \ +} while (0) #define emith_write_r_r_r_c(cond, r, rs, rm) \ emith_write_r_r_r(r, rs, rm) @@ -1164,7 +1232,7 @@ static void emith_lohi_nops(void) int _c; u32 _m = mask & 0x300fffc; /* r2-r15,r24-r25 */ \ if (__builtin_parity(_m) == 1) _m |= 0x1; /* ABI align */ \ int _s = count_bits(_m) * 4, _o = _s; \ - if (_s) emith_sub_r_imm(SP, _s); \ + if (_s) emith_add_r_r_ptr_imm(SP, SP, -_s); \ for (_c = HOST_REGS-1; _m && _c >= 0; _m &= ~(1 << _c), _c--) \ if (_m & (1 << _c)) \ { _o -= 4; if (_c) emith_write_r_r_offs(_c, SP, _o); } \ @@ -1177,7 +1245,7 @@ static void emith_lohi_nops(void) for (_c = 0; _m && _c < HOST_REGS; _m &= ~(1 << _c), _c++) \ if (_m & (1 << _c)) \ { if (_c) emith_read_r_r_offs(_c, SP, _o); _o += 4; } \ - if (_s) emith_add_r_imm(SP, _s); \ + if (_s) emith_add_r_r_ptr_imm(SP, SP, _s); \ } while (0) #define host_arg2reg(rd, arg) \ @@ -1343,8 +1411,8 @@ static int emith_cond_check(int cond, int *r) emith_jump_cond(cond, target) #define emith_jump_cond_inrange(target) \ - ((u8 *)target - (u8 *)tcache_ptr - 4 < 0x00020000U || \ - (u8 *)target - (u8 *)tcache_ptr - 4 >= 0xfffe0010U) // mind cond_check + ((u8 *)target - (u8 *)tcache_ptr - 4 < 0x20000 && \ + (u8 *)target - (u8 *)tcache_ptr - 4 >= -0x20000+0x10) //mind cond_check // NB: returns position of patch for cache maintenance #define emith_jump_patch(ptr, target, pos) do { \ @@ -1359,8 +1427,8 @@ static int emith_cond_check(int cond, int *r) } while (0) #define emith_jump_patch_inrange(ptr, target) \ - ((u8 *)target - (u8 *)ptr - 4 < 0x00020000U || \ - (u8 *)target - (u8 *)ptr - 4 >= 0xfffe0010U) // mind cond_check + ((u8 *)target - (u8 *)ptr - 4 < 0x20000 && \ + (u8 *)target - (u8 *)ptr - 4 >= -0x20000+0x10) // mind cond_check #define emith_jump_patch_size() 4 #define emith_jump_at(ptr, target) do { \ @@ -1410,7 +1478,7 @@ static int emith_cond_check(int cond, int *r) // NB: ABI SP alignment is 8 for compatibility with MIPS IV #define emith_push_ret(r) do { \ - emith_sub_r_imm(SP, 8+16); /* reserve new arg save area (16) */ \ + emith_add_r_r_ptr_imm(SP, SP, -8-16); /* ABI: 16 byte arg save area */ \ emith_write_r_r_offs(LR, SP, 4+16); \ if ((r) > 0) emith_write_r_r_offs(r, SP, 0+16); \ } while (0) @@ -1418,7 +1486,7 @@ static int emith_cond_check(int cond, int *r) #define emith_pop_and_ret(r) do { \ if ((r) > 0) emith_read_r_r_offs(r, SP, 0+16); \ emith_read_r_r_offs(LR, SP, 4+16); \ - emith_add_r_imm(SP, 8+16); \ + emith_add_r_r_ptr_imm(SP, SP, 8+16); \ emith_ret(); \ } while (0) @@ -1436,7 +1504,7 @@ static int emith_cond_check(int cond, int *r) int _c; u32 _m = 0xd0ff0000; \ if (__builtin_parity(_m) == 1) _m |= 0x1; /* ABI align for SP is 8 */ \ int _s = count_bits(_m) * 4 + 16, _o = _s; /* 16 byte arg save area */ \ - if (_s) emith_sub_r_imm(SP, _s); \ + if (_s) emith_add_r_r_ptr_imm(SP, SP, -_s); \ for (_c = HOST_REGS-1; _m && _c >= 0; _m &= ~(1 << _c), _c--) \ if (_m & (1 << _c)) \ { _o -= 4; if (_c) emith_write_r_r_offs(_c, SP, _o); } \ @@ -1448,23 +1516,23 @@ static int emith_cond_check(int cond, int *r) for (_c = 0; _m && _c < HOST_REGS; _m &= ~(1 << _c), _c++) \ if (_m & (1 << _c)) \ { if (_c) emith_read_r_r_offs(_c, SP, _o); _o += 4; } \ - if (_s) emith_add_r_imm(SP, _s); \ + if (_s) emith_add_r_r_ptr_imm(SP, SP, _s); \ emith_ret(); \ } while (0) // NB: assumes a is in arg0, tab, func and mask are temp #define emith_sh2_rcall(a, tab, func, mask) do { \ emith_lsr(mask, a, SH2_READ_SHIFT); \ - emith_add_r_r_r_lsl_ptr(tab, tab, mask, 3); \ + emith_add_r_r_r_lsl_ptr(tab, tab, mask, PTR_SCALE+1); \ emith_read_r_r_offs_ptr(func, tab, 0); \ - emith_read_r_r_offs(mask, tab, 4); \ - emith_addf_r_r_r/*_ptr*/(func, func, func); \ + emith_read_r_r_offs(mask, tab, (1 << PTR_SCALE)); \ + emith_addf_r_r_r_ptr(func, func, func); \ } while (0) // NB: assumes a, val are in arg0 and arg1, tab and func are temp #define emith_sh2_wcall(a, val, tab, func) do { \ emith_lsr(func, a, SH2_WRITE_SHIFT); \ - emith_lsl(func, func, 2); \ + emith_lsl(func, func, PTR_SCALE); \ emith_read_r_r_r_ptr(func, tab, func); \ emith_move_r_r_ptr(6, CONTEXT_REG); /* arg2 */ \ emith_jump_reg(func); \ diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index e7284499..9ed8b563 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -448,11 +448,11 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common // fake conditionals (using SJMP instead) #define emith_move_r_imm_c(cond, r, imm) \ - emith_move_r_imm(r, imm); + emith_move_r_imm(r, imm) #define emith_add_r_imm_c(cond, r, imm) \ - emith_add_r_imm(r, imm); + emith_add_r_imm(r, imm) #define emith_sub_r_imm_c(cond, r, imm) \ - emith_sub_r_imm(r, imm); + emith_sub_r_imm(r, imm) #define emith_or_r_imm_c(cond, r, imm) \ emith_or_r_imm(r, imm) #define emith_eor_r_imm_c(cond, r, imm) \ @@ -468,11 +468,11 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common #define emith_ror_c(cond, d, s, cnt) \ emith_ror(d, s, cnt) #define emith_and_r_r_c(cond, d, s) \ - emith_and_r_r(d, s); + emith_and_r_r(d, s) #define emith_add_r_r_imm_c(cond, d, s, imm) \ - emith_add_r_r_imm(d, s, imm); + emith_add_r_r_imm(d, s, imm) #define emith_sub_r_r_imm_c(cond, d, s, imm) \ - emith_sub_r_r_imm(d, s, imm); + emith_sub_r_r_imm(d, s, imm) #define emith_read8_r_r_r_c(cond, r, rs, rm) \ emith_read8_r_r_r(r, rs, rm) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 2320c501..d1cde69e 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -47,13 +47,9 @@ #define LOOP_OPTIMIZER 1 #define T_OPTIMIZER 1 -// limits (per block) -#define MAX_BLOCK_SIZE (BLOCK_INSN_LIMIT * 6 * 6) - -// max literal offset from the block end #define MAX_LITERAL_OFFSET 0x200 // max. MOVA, MOV @(PC) offset -#define MAX_LITERALS (BLOCK_INSN_LIMIT / 4) -#define MAX_LOCAL_BRANCHES (BLOCK_INSN_LIMIT / 4) +#define MAX_LOCAL_TARGETS (BLOCK_INSN_LIMIT / 4) +#define MAX_LOCAL_BRANCHES (BLOCK_INSN_LIMIT / 2) // debug stuff // 01 - warnings/errors @@ -294,7 +290,7 @@ struct block_link { u32 target_pc; void *jump; // insn address void *blx; // block link/exit area if any - u8 jdisp[8]; // jump backup buffer + u8 jdisp[12]; // jump backup buffer struct block_link *next; // either in block_entry->links or unresolved struct block_link *o_next; // ...in block_entry->o_links struct block_link *prev; @@ -443,6 +439,8 @@ static void rcache_free_tmp(int hr); #include "../drc/emit_arm64.c" #elif defined(__mips__) #include "../drc/emit_mips.c" +#elif defined(__riscv__) || defined(__riscv) +#include "../drc/emit_riscv.c" #elif defined(__i386__) #include "../drc/emit_x86.c" #elif defined(__x86_64__) @@ -1207,45 +1205,10 @@ static void dr_flush_tcache(int tcid) static void *dr_failure(void) { - lprintf("recompilation failed\n"); + printf("recompilation failed\n"); exit(1); } -#define ADD_TO_ARRAY(array, count, item, failcode) { \ - if (count >= ARRAY_SIZE(array)) { \ - dbg(1, "warning: " #array " overflow"); \ - failcode; \ - } else \ - array[count++] = item; \ -} - -static inline int find_in_array(u32 *array, size_t size, u32 what) -{ - size_t i; - for (i = 0; i < size; i++) - if (what == array[i]) - return i; - - return -1; -} - -static int find_in_sorted_array(u32 *array, size_t size, u32 what) -{ - // binary search in sorted array - int left = 0, right = size-1; - while (left <= right) - { - int middle = (left + right) / 2; - if (array[middle] == what) - return middle; - else if (array[middle] < what) - left = middle + 1; - else - right = middle - 1; - } - return -1; -} - // --------------------------------------------------------------- // NB rcache allocation dependencies: @@ -2868,6 +2831,88 @@ static void emit_do_static_regs(int is_write, int tmpr) } } +// block local link stuff +struct linkage { + u32 pc; + void *ptr; + struct block_link *bl; + u32 mask; +}; + +static inline int find_in_linkage(const struct linkage *array, int size, u32 pc) +{ + size_t i; + for (i = 0; i < size; i++) + if (pc == array[i].pc) + return i; + + return -1; +} + +static int find_in_sorted_linkage(const struct linkage *array, int size, u32 pc) +{ + // binary search in sorted array + int left = 0, right = size-1; + while (left <= right) + { + int middle = (left + right) / 2; + if (array[middle].pc == pc) + return middle; + else if (array[middle].pc < pc) + left = middle + 1; + else + right = middle - 1; + } + return -1; +} + +static void emit_branch_linkage_code(SH2 *sh2, struct block_desc *block, int tcache_id, + const struct linkage *targets, int target_count, + const struct linkage *links, int link_count) +{ + struct block_link *bl; + int u, v, tmp; + + for (u = 0; u < link_count; u++) { + emith_pool_check(); + // look up local branch targets + v = find_in_sorted_linkage(targets, target_count, links[u].pc); + if (v >= 0) { + if (! targets[v].ptr) { + // forward branch not yet resolved, prepare external linking + emith_jump_patch(links[u].ptr, tcache_ptr, NULL); + bl = dr_prepare_ext_branch(block->entryp, links[u].pc, sh2->is_slave, tcache_id); + if (bl) { + emith_flush(); // flush to inhibit insn swapping + bl->type = BL_LDJMP; + } + + tmp = rcache_get_tmp_arg(0); + emith_move_r_imm(tmp, links[u].pc); + rcache_free_tmp(tmp); + emith_jump_patchable(sh2_drc_dispatcher); + } else if (emith_jump_patch_inrange(links[u].ptr, targets[v].ptr)) { + // inrange local branch + emith_jump_patch(links[u].ptr, targets[v].ptr, NULL); + } else { + // far local branch + emith_jump_patch(links[u].ptr, tcache_ptr, NULL); + emith_jump(targets[v].ptr); + } + } else { + // external or exit, emit blx area entry + void *target = (links[u].pc & 1 ? sh2_drc_exit : sh2_drc_dispatcher); + if (links[u].bl) + links[u].bl->blx = tcache_ptr; + emith_jump_patch(links[u].ptr, tcache_ptr, NULL); + tmp = rcache_get_tmp_arg(0); + emith_move_r_imm(tmp, links[u].pc & ~1); + rcache_free_tmp(tmp); + emith_jump(target); + } + } +} + #define DELAY_SAVE_T(sr) { \ int t_ = rcache_get_tmp(); \ emith_bic_r_imm(sr, T_save); \ @@ -2887,17 +2932,10 @@ static void *dr_get_pc_base(u32 pc, SH2 *sh2); static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) { // branch targets in current block - u32 branch_target_pc[MAX_LOCAL_BRANCHES]; - void *branch_target_ptr[MAX_LOCAL_BRANCHES]; + struct linkage branch_targets[MAX_LOCAL_TARGETS]; int branch_target_count = 0; - // unresolved local forward branches, for fixup at block end - u32 branch_patch_pc[MAX_LOCAL_BRANCHES]; - void *branch_patch_ptr[MAX_LOCAL_BRANCHES]; - int branch_patch_count = 0; - // external branch targets with a block link/exit area - u32 blx_target_pc[MAX_LOCAL_BRANCHES]; - void *blx_target_ptr[MAX_LOCAL_BRANCHES]; - struct block_link *blx_target_bl[MAX_LOCAL_BRANCHES]; + // unresolved local or external targets with block link/exit area if needed + struct linkage blx_targets[MAX_LOCAL_BRANCHES]; int blx_target_count = 0; u8 op_flags[BLOCK_INSN_LIMIT]; @@ -2906,6 +2944,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) int delay_reg:8; u32 loop_type:8; u32 polling:8; + u32 pinning:1; u32 test_irq:1; u32 pending_branch_direct:1; u32 pending_branch_indirect:1; @@ -2914,23 +2953,20 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) #if LOOP_OPTIMIZER // loops with pinned registers for optimzation // pinned regs are like statics and don't need saving/restoring inside a loop - u32 pinned_loop_pc[MAX_LOCAL_BRANCHES/16]; - void *pinned_loop_ptr[MAX_LOCAL_BRANCHES/16]; - u32 pinned_loop_mask[MAX_LOCAL_BRANCHES/16]; + struct linkage pinned_loops[MAX_LOCAL_TARGETS/16]; int pinned_loop_count = 0; #endif // PC of current, first, last SH2 insn u32 pc, base_pc, end_pc; u32 base_literals, end_literals; - void *block_entry_ptr; + u8 *block_entry_ptr; struct block_desc *block; struct block_entry *entry; struct block_link *bl; u16 *dr_pc_base; struct op_data *opd; int blkid_main = 0; - int skip_op = 0; int tmp, tmp2; int cycles; int i, v; @@ -2971,8 +3007,15 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) for (pc = base_pc, i = 0; pc < end_pc; i++, pc += 2) { if (op_flags[i] & OF_DELAY_OP) op_flags[i] &= ~OF_BTARGET; - if (op_flags[i] & OF_BTARGET) - ADD_TO_ARRAY(branch_target_pc, branch_target_count, pc, ); + if (op_flags[i] & OF_BTARGET) { + if (branch_target_count < ARRAY_SIZE(branch_targets)) + branch_targets[branch_target_count++] = (struct linkage) { .pc = pc }; + else { + printf("warning: linkage overflow\n"); + end_pc = pc; + break; + } + } if (ops[i].op == OP_LDC && (ops[i].dest & BITMASK1(SHR_SR)) && pc+2 < end_pc) op_flags[i+1] |= OF_BTARGET; // RTE entrypoint in case of SR.IMASK change // unify T and SR since rcache doesn't know about "virtual" guest regs @@ -3040,9 +3083,9 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) if (op_flags[v] & OF_BASIC_LOOP) { m3 &= ~rcache_regs_static & ~BITMASK5(SHR_PC, SHR_PR, SHR_SR, SHR_T, SHR_MEM); if (m3 && count_bits(m3) < count_bits(rcache_vregs_reg) && - pinned_loop_count < ARRAY_SIZE(pinned_loop_pc)-1) { - pinned_loop_mask[pinned_loop_count] = m3; - pinned_loop_pc[pinned_loop_count++] = base_pc + 2*v; + pinned_loop_count < ARRAY_SIZE(pinned_loops)-1) { + pinned_loops[pinned_loop_count++] = + (struct linkage) { .mask = m3, .pc = base_pc + 2*v }; } else op_flags[v] &= ~OF_BASIC_LOOP; } @@ -3052,10 +3095,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) #endif } - if (branch_target_count > 0) { - memset(branch_target_ptr, 0, sizeof(branch_target_ptr[0]) * branch_target_count); - } - tcache_ptr = dr_prepare_cache(tcache_id, (end_pc - base_pc) / 2); #if (DRC_DEBUG & 4) tcache_dsm_ptrs[tcache_id] = tcache_ptr; @@ -3076,7 +3115,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_invalidate_t(); drcf = (struct drcf) { 0 }; #if LOOP_OPTIMIZER - pinned_loop_pc[pinned_loop_count] = -1; + pinned_loops[pinned_loop_count].pc = -1; pinned_loop_count = 0; #endif @@ -3090,24 +3129,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) int tmp3, tmp4; int sr; - opd = &ops[i]; - op = FETCH_OP(pc); - -#if (DRC_DEBUG & 2) - insns_compiled++; -#endif -#if (DRC_DEBUG & 4) - DasmSH2(sh2dasm_buff, pc, op); - if (op_flags[i] & OF_BTARGET) { - if ((op_flags[i] & OF_LOOP) == OF_DELAY_LOOP) tmp3 = '+'; - else if ((op_flags[i] & OF_LOOP) == OF_POLL_LOOP) tmp3 = '='; - else if ((op_flags[i] & OF_LOOP) == OF_IDLE_LOOP) tmp3 = '~'; - else tmp3 = '*'; - } else if (drcf.loop_type) tmp3 = '.'; - else tmp3 = ' '; - printf("%c%08x %04x %s\n", tmp3, pc, op, sh2dasm_buff); -#endif - if (op_flags[i] & OF_BTARGET) { if (pc != base_pc) @@ -3143,9 +3164,9 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) break; } - v = find_in_sorted_array(branch_target_pc, branch_target_count, pc); + v = find_in_sorted_linkage(branch_targets, branch_target_count, pc); if (v >= 0) - branch_target_ptr[v] = tcache_ptr; + branch_targets[v].ptr = tcache_ptr; #if LOOP_DETECTION drcf.loop_type = op_flags[i] & OF_LOOP; drcf.delay_reg = -1; @@ -3176,12 +3197,13 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) #if LOOP_OPTIMIZER if (op_flags[i] & OF_BASIC_LOOP) { - if (pinned_loop_pc[pinned_loop_count] == pc) { + if (pinned_loops[pinned_loop_count].pc == pc) { // pin needed regs on loop entry - FOR_ALL_BITS_SET_DO(pinned_loop_mask[pinned_loop_count], v, rcache_pin_reg(v)); + FOR_ALL_BITS_SET_DO(pinned_loops[pinned_loop_count].mask, v, rcache_pin_reg(v)); emith_flush(); // store current PC as loop target - pinned_loop_ptr[pinned_loop_count] = tcache_ptr; + pinned_loops[pinned_loop_count].ptr = tcache_ptr; + drcf.pinning = 1; } else op_flags[i] &= ~OF_BASIC_LOOP; } @@ -3193,11 +3215,10 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) EMITH_JMP_START(DCOND_GT); rcache_save_pinned(); - if (blx_target_count < ARRAY_SIZE(blx_target_pc)) { + if (blx_target_count < ARRAY_SIZE(blx_targets)) { // exit via stub in blx table (saves some 1-3 insns in the main flow) - blx_target_ptr[blx_target_count] = tcache_ptr; - blx_target_pc[blx_target_count] = pc|1; - blx_target_bl[blx_target_count++] = NULL; + blx_targets[blx_target_count++] = + (struct linkage) { .ptr = tcache_ptr, .pc = pc|1, .bl = NULL }; emith_jump_patchable(tcache_ptr); } else { // blx table full, must inline exit code @@ -3210,12 +3231,11 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } else #endif { - if (blx_target_count < ARRAY_SIZE(blx_target_pc)) { + if (blx_target_count < ARRAY_SIZE(blx_targets)) { // exit via stub in blx table (saves some 1-3 insns in the main flow) - blx_target_pc[blx_target_count] = pc|1; - blx_target_bl[blx_target_count] = NULL; emith_cmp_r_imm(sr, 0); - blx_target_ptr[blx_target_count++] = tcache_ptr; + blx_targets[blx_target_count++] = + (struct linkage) { .ptr = tcache_ptr, .pc = pc|1, .bl = NULL }; emith_jump_cond_patchable(DCOND_LE, tcache_ptr); } else { // blx table full, must inline exit code @@ -3282,14 +3302,41 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } #endif - emith_pool_check(); - pc += 2; - - if (skip_op > 0) { - skip_op--; - continue; + // emit blx area if limits are approached + if (blx_target_count && (blx_target_count > ARRAY_SIZE(blx_targets)-4 || + !emith_jump_patch_inrange(blx_targets[0].ptr, tcache_ptr+0x100))) { + u8 *jp; + rcache_invalidate_tmp(); + jp = tcache_ptr; + emith_jump_patchable(tcache_ptr); + emit_branch_linkage_code(sh2, block, tcache_id, branch_targets, + branch_target_count, blx_targets, blx_target_count); + blx_target_count = 0; + do_host_disasm(tcache_id); + emith_jump_patch(jp, tcache_ptr, NULL); } + emith_pool_check(); + + opd = &ops[i]; + op = FETCH_OP(pc); +#if (DRC_DEBUG & 4) + DasmSH2(sh2dasm_buff, pc, op); + if (op_flags[i] & OF_BTARGET) { + if ((op_flags[i] & OF_LOOP) == OF_DELAY_LOOP) tmp3 = '+'; + else if ((op_flags[i] & OF_LOOP) == OF_POLL_LOOP) tmp3 = '='; + else if ((op_flags[i] & OF_LOOP) == OF_IDLE_LOOP) tmp3 = '~'; + else tmp3 = '*'; + } else if (drcf.loop_type) tmp3 = '.'; + else tmp3 = ' '; + printf("%c%08x %04x %s\n", tmp3, pc, op, sh2dasm_buff); +#endif + + pc += 2; +#if (DRC_DEBUG & 2) + insns_compiled++; +#endif + if (op_flags[i] & OF_DELAY_OP) { // handle delay slot dependencies @@ -4422,7 +4469,7 @@ end_op: emit_sync_t_to_sr(); emith_sh2_delay_loop(cycles, drcf.delay_reg); rcache_unlock_all(); // may lock delay_reg - drcf.polling = drcf.loop_type = 0; + drcf.polling = drcf.loop_type = drcf.pinning = 0; } #endif @@ -4464,33 +4511,39 @@ end_op: emith_sync_t(sr); // no modification of host status/flags between here and branching! - v = find_in_sorted_array(branch_target_pc, branch_target_count, target_pc); + v = find_in_sorted_linkage(branch_targets, branch_target_count, target_pc); if (v >= 0) { // local branch - if (branch_target_ptr[v]) { + if (branch_targets[v].ptr) { // local backward jump, link here now since host PC is already known - target = branch_target_ptr[v]; + target = branch_targets[v].ptr; #if LOOP_OPTIMIZER - if (pinned_loop_pc[pinned_loop_count] == target_pc) { + if (pinned_loops[pinned_loop_count].pc == target_pc) { // backward jump at end of optimized loop rcache_unpin_all(); - target = pinned_loop_ptr[pinned_loop_count]; + target = pinned_loops[pinned_loop_count].ptr; pinned_loop_count ++; } #endif - if (cond != -1) - emith_jump_cond(cond, target); - else { + if (cond != -1) { + if (emith_jump_patch_inrange(tcache_ptr, target)) { + emith_jump_cond(cond, target); + } else { + // not reachable directly, must use far branch + EMITH_JMP_START(emith_invert_cond(cond)); + emith_jump(target); + EMITH_JMP_END(emith_invert_cond(cond)); + } + } else { emith_jump(target); rcache_invalidate(); } - } else if (branch_patch_count < MAX_LOCAL_BRANCHES) { + } else if (blx_target_count < MAX_LOCAL_BRANCHES) { // local forward jump target = tcache_ptr; - branch_patch_pc[branch_patch_count] = target_pc; - branch_patch_ptr[branch_patch_count] = target; - branch_patch_count++; + blx_targets[blx_target_count++] = + (struct linkage) { .pc = target_pc, .ptr = target, .bl = NULL }; if (cond != -1) emith_jump_cond_patchable(cond, target); else { @@ -4498,7 +4551,7 @@ end_op: rcache_invalidate(); } } else - dbg(1, "warning: too many local branches"); + dbg(1, "warning: too many unresolved branches"); } if (target == NULL) @@ -4507,13 +4560,12 @@ end_op: bl = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id); if (cond != -1) { #if 1 - if (bl && blx_target_count < ARRAY_SIZE(blx_target_pc)) { + if (bl && blx_target_count < ARRAY_SIZE(blx_targets)) { // conditional jumps get a blx stub for the far jump - blx_target_pc[blx_target_count] = target_pc; - blx_target_bl[blx_target_count] = bl; - blx_target_ptr[blx_target_count++] = tcache_ptr; bl->type = BL_JCCBLX; target = tcache_ptr; + blx_targets[blx_target_count++] = + (struct linkage) { .pc = target_pc, .ptr = target, .bl = bl }; emith_jump_cond_patchable(cond, target); } else { // not linkable, or blx table full; inline jump @dispatcher @@ -4660,44 +4712,15 @@ end_op: } else rcache_flush(); - // emit blx area - for (i = 0; i < blx_target_count; i++) { - void *target = (blx_target_pc[i] & 1 ? sh2_drc_exit : sh2_drc_dispatcher); - - emith_pool_check(); - bl = blx_target_bl[i]; - if (bl) - bl->blx = tcache_ptr; - emith_jump_patch(blx_target_ptr[i], tcache_ptr, NULL); - tmp = rcache_get_tmp_arg(0); - emith_move_r_imm(tmp, blx_target_pc[i] & ~1); - emith_jump(target); - rcache_invalidate(); - } + // link unresolved branches, emitting blx area entries as needed + emit_branch_linkage_code(sh2, block, tcache_id, branch_targets, + branch_target_count, blx_targets, blx_target_count); emith_flush(); do_host_disasm(tcache_id); emith_pool_commit(0); - // link local branches - for (i = 0; i < branch_patch_count; i++) { - void *target; - int t; - t = find_in_sorted_array(branch_target_pc, branch_target_count, branch_patch_pc[i]); - target = branch_target_ptr[t]; - if (target == NULL) { - // flush pc and go back to dispatcher (this should no longer happen) - dbg(1, "stray branch to %08x %p", branch_patch_pc[i], tcache_ptr); - target = tcache_ptr; - tmp = rcache_get_tmp_arg(0); - emith_move_r_imm(tmp, branch_patch_pc[i]); - emith_jump(sh2_drc_dispatcher); - rcache_flush(); - } - emith_jump_patch(branch_patch_ptr[i], target, NULL); - } - // fill blx backup; do this last to backup final patched code for (i = 0; i < block->entry_count; i++) for (bl = block->entryp[i].o_links; bl; bl = bl->o_next) @@ -4927,7 +4950,7 @@ static void sh2_generate_utils(void) // pc = sh2_drc_dispatcher_call(u32 pc) sh2_drc_dispatcher_call = (void *)tcache_ptr; emith_ctx_read(arg2, offsetof(SH2, rts_cache_idx)); - emith_add_r_imm(arg2, 2*sizeof(void *)); + emith_add_r_imm(arg2, (u32)(2*sizeof(void *))); emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *)); emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx)); emith_add_r_r_r_lsl_ptr(arg2, CONTEXT_REG, arg2, 0); @@ -4957,7 +4980,7 @@ static void sh2_generate_utils(void) emith_jump_cond(DCOND_NE, sh2_drc_dispatcher); #endif emith_read_r_r_offs_ptr(arg0, arg1, offsetof(SH2, rts_cache) + sizeof(void *)); - emith_sub_r_imm(arg2, 2*sizeof(void *)); + emith_sub_r_imm(arg2, (u32)(2*sizeof(void *))); emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *)); emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx)); #if (DRC_DEBUG & 128) diff --git a/cpu/sh2/compiler.h b/cpu/sh2/compiler.h index 415f01ba..44620f48 100644 --- a/cpu/sh2/compiler.h +++ b/cpu/sh2/compiler.h @@ -41,6 +41,8 @@ unsigned short scan_block(unsigned int base_pc, int is_slave, #define DRC_SR_REG "r28" #elif defined(__mips__) #define DRC_SR_REG "s6" +#elif defined(__riscv__) || defined(__riscv) +#define DRC_SR_REG "s11" #elif defined(__i386__) #define DRC_SR_REG "edi" #elif defined(__x86_64__) diff --git a/pico/32x/32x.c b/pico/32x/32x.c index 7e2e039e..896b5aa1 100644 --- a/pico/32x/32x.c +++ b/pico/32x/32x.c @@ -506,7 +506,7 @@ void sync_sh2s_normal(unsigned int m68k_target) if (CYCLES_GT(now, ssh2.m68krcycles_done)) now = ssh2.m68krcycles_done; } - if (now - timer_cycles >= STEP_N) { + if (CYCLES_GT(now, timer_cycles+STEP_N)) { p32x_timers_do(now - timer_cycles); timer_cycles = now; } diff --git a/platform/common/dismips.c b/platform/common/dismips.c index 41c0f7a5..f9888f2a 100644 --- a/platform/common/dismips.c +++ b/platform/common/dismips.c @@ -127,7 +127,7 @@ static const struct insn special_insns[] = { {0x38, S_IMM_DT, "dsll"}, {0x3A, S_IMM_DT, "dsrl"}, {0x3B, S_IMM_DT, "dsra"}, - {0x3D, S_IMM_DT, "dsll32"}, + {0x3C, S_IMM_DT, "dsll32"}, {0x3E, S_IMM_DT, "dsrl32"}, {0x3F, S_IMM_DT, "dsra32"}, }; diff --git a/platform/linux/emu.c b/platform/linux/emu.c index 887d7836..93665263 100644 --- a/platform/linux/emu.c +++ b/platform/linux/emu.c @@ -29,7 +29,7 @@ void pemu_prep_defconfig(void) void pemu_validate_config(void) { -#if !defined(__arm__) && !defined(__aarch64__) && !defined(__mips__) && !defined(__i386__) && !defined(__x86_64__) +#if !defined(__arm__) && !defined(__aarch64__) && !defined(__mips__) && !defined(__riscv__) && !defined(__riscv) && !defined(__i386__) && !defined(__x86_64__) PicoIn.opt &= ~POPT_EN_DRC; #endif } diff --git a/tools/mkoffsets.sh b/tools/mkoffsets.sh index 8a0557c7..349b8605 100755 --- a/tools/mkoffsets.sh +++ b/tools/mkoffsets.sh @@ -16,7 +16,7 @@ compile_rodata () # $CC $CFLAGS -I .. -nostdlib -Wl,-edummy /tmp/getoffs.c \ # -o /tmp/getoffs.o || exit 1 # find the name of the .rodata section (in case -fdata-sections is used) - rosect=$(readelf -S /tmp/getoffs.o | grep '\.rodata' | + rosect=$(readelf -S /tmp/getoffs.o | grep '\.rodata\|\.sdata' | sed 's/^[^.]*././;s/ .*//') # read out .rodata section as hex string (should be only 4 or 8 bytes) ro=$(readelf -x $rosect /tmp/getoffs.o | grep '0x' | cut -c14-48 | From f7a453816e061ab66cd5c48510fd9a4f29c4152f Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 13 Nov 2019 21:58:48 +0100 Subject: [PATCH 0238/1110] sh2 drc: RISC-V (RV64IM) code emitter, some work on MIPS64 --- cpu/drc/emit_riscv.c | 1579 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1579 insertions(+) create mode 100644 cpu/drc/emit_riscv.c diff --git a/cpu/drc/emit_riscv.c b/cpu/drc/emit_riscv.c new file mode 100644 index 00000000..84c3ccb2 --- /dev/null +++ b/cpu/drc/emit_riscv.c @@ -0,0 +1,1579 @@ +/* + * Basic macros to emit RISC-V RV64IM instructions and some utils + * Copyright (C) 2019 kub + * + * This work is licensed under the terms of MAME license. + * See COPYING file in the top-level directory. + */ +#define HOST_REGS 32 + +// RISC-V ABI: params: x10-x17, return: r10-x11, temp: x1(ra),x5-x7,x28-x31 +// saved: x8(fp),x9,x18-x27, reserved: x0(zero), x4(tp), x3(gp), x2(sp) +// x28-x31(t3-t6) are used internally by the code emitter +#define RET_REG 10 // a0 +#define PARAM_REGS { 10, 11, 12, 13, 14, 15, 16, 17 } // a0-a7 +#define PRESERVED_REGS { 9, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27 } // s1-s11 +#define TEMPORARY_REGS { 5, 6, 7 } // t0-t2 + +#define CONTEXT_REG 9 // s1 +#define STATIC_SH2_REGS { SHR_SR,27 , SHR_R0,26 , SHR_R0+1,25 } + +// registers usable for user code: r1-r25, others reserved or special +#define Z0 0 // zero register +#define GP 3 // global pointer +#define SP 2 // stack pointer +#define FP 8 // frame pointer +#define LR 1 // link register +// internally used by code emitter: +#define AT 31 // used to hold intermediate results +#define FNZ 30 // emulated processor flags: N (bit 31) ,Z (all bits) +#define FC 29 // emulated processor flags: C (bit 0), others 0 +#define FV 28 // emulated processor flags: Nt^Ns (bit 31). others x + + +// unified conditions; virtual, not corresponding to anything real on RISC-V +#define DCOND_EQ 0x0 +#define DCOND_NE 0x1 +#define DCOND_HS 0x2 +#define DCOND_LO 0x3 +#define DCOND_MI 0x4 +#define DCOND_PL 0x5 +#define DCOND_VS 0x6 +#define DCOND_VC 0x7 +#define DCOND_HI 0x8 +#define DCOND_LS 0x9 +#define DCOND_GE 0xa +#define DCOND_LT 0xb +#define DCOND_GT 0xc +#define DCOND_LE 0xd + +#define DCOND_CS DCOND_LO +#define DCOND_CC DCOND_HS + +// unified insn +#define R5_INSN(b25, b20, b15, b12, b7, op) \ + (((b25)<<25)|((b20)<<20)|((b15)<<15)|((b12)<<12)|((b7)<<7)|((op)<<0)) + +#define _ 0 //marker for "field unused" +#define _CB(v,l,s,d) ((((v)>>(s))&((1<<(l))-1))<<(d)) // copy l bits + +#define R5_R_INSN(op, f1, f2, rd, rs, rt) \ + R5_INSN(f2, rt, rs, f1, rd, op) +#define R5_I_INSN(op, f1, rd, rs, imm) \ + R5_INSN(_, _CB(imm,12,0,0), rs, f1, rd, op) +#define R5_S_INSN(op, f1, rt, rs, imm) \ + R5_INSN(_CB(imm,7,5,0), rt, rs, f1, _CB(imm,5,0,0), op) +#define R5_U_INSN(op, rd, imm) \ + R5_INSN(_,_,_, _CB(imm,20,12,0), rd, op) +// oy vey... R5 immediate encoding in branches is really unwieldy :-/ +#define R5_B_INSN(op, f1, rt, rs, imm) \ + R5_INSN(_CB(imm,1,12,6)|_CB(imm,6,5,0), rt, rs, f1, \ + _CB(imm,4,1,1)|_CB(imm,1,11,0), op) +#define R5_J_INSN(op, rd, imm) \ + R5_INSN(_CB(imm,1,20,6)|_CB(imm,6,5,0), _CB(imm,4,1,1)|_CB(imm,1,11,0),\ + _CB(imm,8,12,0), rd, op) + +// opcode +enum { OP_LUI=0x37, OP_JAL=0x6f, OP_JALR=0x67, OP_BCOND=0x63, OP_LD=0x03, + OP_ST=0x23, OP_IMM=0x13, OP_IMM32=0x1b, OP_REG=0x33, OP_REG32=0x3b }; +// func3 +enum { F1_ADD, F1_SL, F1_SLT, F1_SLTU, F1_XOR, F1_SR, F1_OR, F1_AND }; +enum { F1_BEQ, F1_BNE, F1_BLT=4, F1_BGE, F1_BLTU, F1_BGEU }; +enum { F1_B, F1_H, F1_W, F1_D, F1_BU, F1_HU, F1_WU }; +enum { F1_MUL, F1_MULH, F1_MULHSU, F1_MULHU, F1_DIV, F1_DIVU, F1_REM, F1_REMU }; +// func7 +enum { F2_ALT=0x20, F2_MULDIV=0x01 }; + +#define __(n) o##n // enum marker for "undefined" + +#define R5_NOP R5_I_INSN(OP_IMM, F1_ADD, Z0, Z0, 0) // nop: ADDI r0, r0, #0 + +// arithmetic/logical + +// rd = rs OP rt +#define R5_ADD_REG(rd, rs, rt) \ + R5_R_INSN(OP_REG, F1_ADD, _, rd, rs, rt) +#define R5_SUB_REG(rd, rs, rt) \ + R5_R_INSN(OP_REG, F1_ADD, F2_ALT, rd, rs, rt) + +#define R5_NEG_REG(rd, rt) \ + R5_SUB_REG(rd, Z0, rt) + +#define R5_XOR_REG(rd, rs, rt) \ + R5_R_INSN(OP_REG, F1_XOR, _, rd, rs, rt) +#define R5_OR_REG(rd, rs, rt) \ + R5_R_INSN(OP_REG, F1_OR , _, rd, rs, rt) +#define R5_AND_REG(rd, rs, rt) \ + R5_R_INSN(OP_REG, F1_AND, _, rd, rs, rt) + +// rd = rs SHIFT rt +#define R5_LSL_REG(rd, rs, rt) \ + R5_R_INSN(OP_REG, F1_SL , _, rd, rs, rt) +#define R5_LSR_REG(rd, rs, rt) \ + R5_R_INSN(OP_REG, F1_SR , _, rd, rs, rt) +#define R5_ASR_REG(rd, rs, rt) \ + R5_R_INSN(OP_REG, F1_SR , F2_ALT, rd, rs, rt) + +// rd = (rs < rt) +#define R5_SLT_REG(rd, rs, rt) \ + R5_R_INSN(OP_REG, F1_SLT, _, rd, rs, rt) +#define R5_SLTU_REG(rd, rs, rt) \ + R5_R_INSN(OP_REG, F1_SLTU,_, rd, rs, rt) + +// rd = rs OP imm12 +#define R5_ADD_IMM(rd, rs, imm12) \ + R5_I_INSN(OP_IMM, F1_ADD , rd, rs, imm12) + +#define R5_XOR_IMM(rd, rs, imm12) \ + R5_I_INSN(OP_IMM, F1_XOR , rd, rs, imm12) +#define R5_OR_IMM(rd, rs, imm12) \ + R5_I_INSN(OP_IMM, F1_OR , rd, rs, imm12) +#define R5_AND_IMM(rd, rs, imm12) \ + R5_I_INSN(OP_IMM, F1_AND , rd, rs, imm12) + +#define R5_MOV_REG(rd, rs) \ + R5_ADD_IMM(rd, rs, 0) +#define R5_MVN_REG(rd, rs) \ + R5_XOR_IMM(rd, rs, -1) + +// rd = (imm12 << (0|12)) +#define R5_MOV_IMM(rd, imm12) \ + R5_OR_IMM(rd, Z0, imm12) +#define R5_MOVT_IMM(rd, imm20) \ + R5_U_INSN(OP_LUI, rd, imm20) + +// rd = rs SHIFT imm5/imm6 +#define R5_LSL_IMM(rd, rs, bits) \ + R5_R_INSN(OP_IMM, F1_SL , _, rd, rs, bits) +#define R5_LSR_IMM(rd, rs, bits) \ + R5_R_INSN(OP_IMM, F1_SR , _, rd, rs, bits) +#define R5_ASR_IMM(rd, rs, bits) \ + R5_R_INSN(OP_IMM, F1_SR , F2_ALT, rd, rs, bits) + +// rd = (rs < imm12) +#define R5_SLT_IMM(rd, rs, imm12) \ + R5_I_INSN(OP_IMM, F1_SLT , rd, rs, imm12) +#define R5_SLTU_IMM(rd, rs, imm12) \ + R5_I_INSN(OP_IMM, F1_SLTU, rd, rs, imm12) + +// multiplication + +#define R5_MULHU(rd, rs, rt) \ + R5_R_INSN(OP_REG, F1_MULHU, F2_MULDIV, rd, rs, rt) +#define R5_MULHS(rd, rs, rt) \ + R5_R_INSN(OP_REG, F1_MULH, F2_MULDIV, rd, rs, rt) +#define R5_MUL(rd, rs, rt) \ + R5_R_INSN(OP_REG, F1_MUL, F2_MULDIV, rd, rs, rt) + +// branching + +#define R5_J(imm20) \ + R5_J_INSN(OP_JAL, Z0, imm20) +#define R5_JAL(rd, imm20) \ + R5_J_INSN(OP_JAL, rd, imm20) +#define R5_JR(rs, offs12) \ + R5_I_INSN(OP_JALR, _, Z0, rs, offs12) +#define R5_JALR(rd, rs, offs12) \ + R5_I_INSN(OP_JALR, _, rd, rs, offs12) + +// conditional branches; no condition code, these compare rs against rt +#define R5_BCOND(cond, rs, rt, offs13) \ + R5_B_INSN(OP_BCOND, cond, rt, rs, offs13) +#define R5_BCONDZ(cond, rs, offs13) \ + R5_B_INSN(OP_BCOND, cond, Z0, rs, offs13) +#define R5_B(offs13) \ + R5_BCOND(F1_BEQ, Z0, Z0, offs13) + +// load/store indexed base + +#define R5_LW(rd, rs, offs12) \ + R5_I_INSN(OP_LD, F1_W, rd, rs, offs12) +#define R5_LH(rd, rs, offs12) \ + R5_I_INSN(OP_LD, F1_H, rd, rs, offs12) +#define R5_LB(rd, rs, offs12) \ + R5_I_INSN(OP_LD, F1_B, rd, rs, offs12) +#define R5_LHU(rd, rs, offs12) \ + R5_I_INSN(OP_LD, F1_HU, rd, rs, offs12) +#define R5_LBU(rd, rs, offs12) \ + R5_I_INSN(OP_LD, F1_BU, rd, rs, offs12) + +#define R5_SW(rt, rs, offs12) \ + R5_S_INSN(OP_ST, F1_W, rt, rs, offs12) +#define R5_SH(rt, rs, offs12) \ + R5_S_INSN(OP_ST, F1_H, rt, rs, offs12) +#define R5_SB(rt, rs, offs12) \ + R5_S_INSN(OP_ST, F1_B, rt, rs, offs12) + +// pointer operations + +#if __riscv_xlen == 64 +#define R5_OP32 (OP_REG32 ^ OP_REG) +#define F1_P F1_D +#define PTR_SCALE 3 + +// NB: must split 64 bit result into 2 32 bit registers +// NB: this expects 32 bit values in s1+s2, correctly sign extended to 64 bits +#define EMIT_R5_MULLU_REG(dlo, dhi, s1, s2) do { \ + EMIT(R5_MUL(dlo, s1, s2)); \ + EMIT(R5_LSR_IMM(dhi, dlo, 32)); \ + EMIT(R5_LSL_IMM(dlo, dlo, 32)); \ + EMIT(R5_LSR_IMM(dlo, dlo, 32)); \ +} while (0) + +#define EMIT_R5_MULLS_REG(dlo, dhi, s1, s2) \ + EMIT_R5_MULLU_REG(dlo, dhi, s1, s2) +#else +#define R5_OP32 0 +#define F1_P F1_W +#define PTR_SCALE 2 + +#define EMIT_R5_MULLU_REG(dlo, dhi, s1, s2) do { \ + int at = (dhi == s1 || dhi == s2 ? AT : dhi); \ + EMIT(R5_MULHU(at, s1, s2)); \ + EMIT(R5_MUL(dlo, s1, s2)); \ + if (at != dhi) emith_move_r_r(dhi, at); \ +} while (0) + +#define EMIT_R5_MULLS_REG(dlo, dhi, s1, s2) do { \ + int at = (dhi == s1 || dhi == s2 ? AT : dhi); \ + EMIT(R5_MULHS(at, s1, s2)); \ + EMIT(R5_MUL(dlo, s1, s2)); \ + if (at != dhi) emith_move_r_r(dhi, at); \ +} while (0) +#endif + +#define R5_ADDW_REG(rd, rs, rt) (R5_ADD_REG(rd, rs, rt)^R5_OP32) +#define R5_SUBW_REG(rd, rs, rt) (R5_SUB_REG(rd, rs, rt)^R5_OP32) +#define R5_LSLW_REG(rd, rs, rt) (R5_LSL_REG(rd, rs, rt)^R5_OP32) +#define R5_LSRW_REG(rd, rs, rt) (R5_LSR_REG(rd, rs, rt)^R5_OP32) +#define R5_ASRW_REG(rd, rs, rt) (R5_ASR_REG(rd, rs, rt)^R5_OP32) + +#define R5_NEGW_REG(rd, rt) (R5_NEG_REG(rd, rt) ^R5_OP32) +#define R5_MULW(rd, rs, rt) (R5_MUL(rd, rs, rt) ^R5_OP32) + +#define R5_ADDW_IMM(rd, rs, imm) (R5_ADD_IMM(rd, rs, imm) ^R5_OP32) +#define R5_LSLW_IMM(rd, rs, bits) (R5_LSL_IMM(rd, rs, bits)^R5_OP32) +#define R5_LSRW_IMM(rd, rs, bits) (R5_LSR_IMM(rd, rs, bits)^R5_OP32) +#define R5_ASRW_IMM(rd, rs, bits) (R5_ASR_IMM(rd, rs, bits)^R5_OP32) + +// XXX: tcache_ptr type for SVP and SH2 compilers differs.. +#define EMIT_PTR(ptr, x) \ + do { \ + *(u32 *)(ptr) = x; \ + ptr = (void *)((u8 *)(ptr) + sizeof(u32)); \ + } while (0) + +#define EMIT(op) \ + do { \ + EMIT_PTR(tcache_ptr, op); \ + COUNT_OP; \ + } while (0) + +// if-then-else conditional execution helpers +#define JMP_POS(ptr) { \ + ptr = tcache_ptr; \ + EMIT(R5_B(0)); \ +} + +#define JMP_EMIT(cond, ptr) { \ + u32 val_ = (u8 *)tcache_ptr - (u8 *)(ptr); \ + EMIT_PTR(ptr, R5_BCOND(cond_m, cond_r, cond_s, val_ & 0x00001fff)); \ +} + +#define JMP_EMIT_NC(ptr) { \ + u32 val_ = (u8 *)tcache_ptr - (u8 *)(ptr); \ + EMIT_PTR(ptr, R5_B(val_ & 0x00001fff)); \ +} + +#define EMITH_JMP_START(cond) { \ + int cond_r, cond_s, cond_m = emith_cond_check(cond, &cond_r, &cond_s); \ + u8 *cond_ptr; \ + JMP_POS(cond_ptr) + +#define EMITH_JMP_END(cond) \ + JMP_EMIT(cond, cond_ptr); \ +} + +#define EMITH_JMP3_START(cond) { \ + int cond_r, cond_s, cond_m = emith_cond_check(cond, &cond_r, &cond_s); \ + u8 *cond_ptr, *else_ptr; \ + JMP_POS(cond_ptr) + +#define EMITH_JMP3_MID(cond) \ + JMP_POS(else_ptr); \ + JMP_EMIT(cond, cond_ptr); + +#define EMITH_JMP3_END() \ + JMP_EMIT_NC(else_ptr); \ +} + +// "simple" jump (no more then a few insns) +// ARM32 will use conditional instructions here +#define EMITH_SJMP_START EMITH_JMP_START +#define EMITH_SJMP_END EMITH_JMP_END + +#define EMITH_SJMP3_START EMITH_JMP3_START +#define EMITH_SJMP3_MID EMITH_JMP3_MID +#define EMITH_SJMP3_END EMITH_JMP3_END + +#define EMITH_SJMP2_START(cond) \ + EMITH_SJMP3_START(cond) +#define EMITH_SJMP2_MID(cond) \ + EMITH_SJMP3_MID(cond) +#define EMITH_SJMP2_END(cond) \ + EMITH_SJMP3_END() + + +// flag register emulation. this is modelled after arm/x86. +// the FNZ register stores the result of the last flag setting operation for +// N and Z flag, used for EQ,NE,MI,PL branches. +// the FC register stores the C flag (used for HI,HS,LO,LS,CC,CS). +// the FV register stores information for V flag calculation (used for +// GT,GE,LT,LE,VC,VS). V flag is costly and only fully calculated when needed. +// the core registers may be temp registers, since the condition after calls +// is undefined anyway. + +// flag emulation creates 2 (ie cmp #0/beq) up to 9 (ie adcf/ble) extra insns. +// flag handling shortcuts may reduce this by 1-4 insns, see emith_cond_check() +static int emith_cmp_rs, emith_cmp_rt; // registers used in cmp_r_r/cmp_r_imm +static s32 emith_cmp_imm; // immediate value used in cmp_r_imm +enum { _FHC=1, _FHV=2 } emith_flg_hint; // C/V flag usage hinted by compiler +static int emith_flg_noV; // V flag known not to be set + +#define EMITH_HINT_COND(cond) do { \ + /* only need to check cond>>1 since the lowest bit inverts the cond */ \ + unsigned _mv = BITMASK3(DCOND_VS>>1,DCOND_GE>>1,DCOND_GT>>1); \ + unsigned _mc = _mv | BITMASK2(DCOND_HS>>1,DCOND_HI>>1); \ + emith_flg_hint = (_mv & BITMASK1(cond >> 1) ? _FHV : 0); \ + emith_flg_hint |= (_mc & BITMASK1(cond >> 1) ? _FHC : 0); \ +} while (0) + +// store minimal cc information: rd, rt^rs, carry +// NB: the result *must* first go to FNZ, in case rd == rs or rd == rt. +// NB: for adcf and sbcf, carry-in must be dealt with separately (see there) +static void emith_set_arith_flags(int rd, int rs, int rt, s32 imm, int sub) +{ + if (emith_flg_hint & _FHC) { + if (sub) // C = sub:rt Z0) // Nt^Ns in FV, bit 31 + EMIT(R5_XOR_REG(FV, rs, rt)); + else if (rt == Z0 || imm == 0) + emith_flg_noV = 1; // imm #0 can't overflow + else if ((imm < 0) == !sub) + EMIT(R5_XOR_IMM(FV, rs, -1)); + else if ((imm > 0) == !sub) + EMIT(R5_XOR_REG(FV, rs, Z0)); + } + // full V = Nd^Nt^Ns^C calculation is deferred until really needed + + if (rd && rd != FNZ) + EMIT(R5_MOV_REG(rd, FNZ)); // N,Z via result value in FNZ + emith_cmp_rs = emith_cmp_rt = -1; +} + +// since R5 has less-than and compare-branch insns, handle cmp separately by +// storing the involved regs for later use in one of those R5 insns. +// This works for all conditions but VC/VS, but this is fortunately never used. +static void emith_set_compare_flags(int rs, int rt, s32 imm) +{ + emith_cmp_rt = rt; + emith_cmp_rs = rs; + emith_cmp_imm = imm; +} + +// data processing, register +#define emith_move_r_r_ptr(d, s) \ + EMIT(R5_MOV_REG(d, s)) +#define emith_move_r_r_ptr_c(cond, d, s) \ + emith_move_r_r_ptr(d, s) + +#define emith_move_r_r(d, s) \ + emith_move_r_r_ptr(d, s) +#define emith_move_r_r_c(cond, d, s) \ + emith_move_r_r(d, s) + +#define emith_mvn_r_r(d, s) \ + EMIT(R5_MVN_REG(d, s)) + +#define emith_add_r_r_r_lsl_ptr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(R5_LSL_IMM(AT, s2, simm)); \ + EMIT(R5_ADD_REG(d, s1, AT)); \ + } else EMIT(R5_ADD_REG(d, s1, s2)); \ +} while (0) +#define emith_add_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(R5_LSLW_IMM(AT, s2, simm)); \ + EMIT(R5_ADDW_REG(d, s1, AT)); \ + } else EMIT(R5_ADDW_REG(d, s1, s2)); \ +} while (0) + +#define emith_add_r_r_r_lsr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(R5_LSRW_IMM(AT, s2, simm)); \ + EMIT(R5_ADDW_REG(d, s1, AT)); \ + } else EMIT(R5_ADDW_REG(d, s1, s2)); \ +} while (0) + +#define emith_addf_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(R5_LSLW_IMM(AT, s2, simm)); \ + EMIT(R5_ADDW_REG(FNZ, s1, AT)); \ + emith_set_arith_flags(d, s1, AT, 0, 0); \ + } else { \ + EMIT(R5_ADDW_REG(FNZ, s1, s2)); \ + emith_set_arith_flags(d, s1, s2, 0, 0); \ + } \ +} while (0) + +#define emith_addf_r_r_r_lsr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(R5_LSRW_IMM(AT, s2, simm)); \ + EMIT(R5_ADDW_REG(FNZ, s1, AT)); \ + emith_set_arith_flags(d, s1, AT, 0, 0); \ + } else { \ + EMIT(R5_ADDW_REG(FNZ, s1, s2)); \ + emith_set_arith_flags(d, s1, s2, 0, 0); \ + } \ +} while (0) + +#define emith_sub_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(R5_LSLW_IMM(AT, s2, simm)); \ + EMIT(R5_SUBW_REG(d, s1, AT)); \ + } else EMIT(R5_SUBW_REG(d, s1, s2)); \ +} while (0) + +#define emith_subf_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(R5_LSLW_IMM(AT, s2, simm)); \ + EMIT(R5_SUBW_REG(FNZ, s1, AT)); \ + emith_set_arith_flags(d, s1, AT, 0, 1); \ + } else { \ + EMIT(R5_SUBW_REG(FNZ, s1, s2)); \ + emith_set_arith_flags(d, s1, s2, 0, 1); \ + } \ +} while (0) + +#define emith_or_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(R5_LSLW_IMM(AT, s2, simm)); \ + EMIT(R5_OR_REG(d, s1, AT)); \ + } else EMIT(R5_OR_REG(d, s1, s2)); \ +} while (0) + +#define emith_or_r_r_r_lsr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(R5_LSRW_IMM(AT, s2, simm)); \ + EMIT(R5_OR_REG(d, s1, AT)); \ + } else EMIT(R5_OR_REG(d, s1, s2)); \ +} while (0) + +#define emith_eor_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(R5_LSLW_IMM(AT, s2, simm)); \ + EMIT(R5_XOR_REG(d, s1, AT)); \ + } else EMIT(R5_XOR_REG(d, s1, s2)); \ +} while (0) + +#define emith_eor_r_r_r_lsr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(R5_LSRW_IMM(AT, s2, simm)); \ + EMIT(R5_XOR_REG(d, s1, AT)); \ + } else EMIT(R5_XOR_REG(d, s1, s2)); \ +} while (0) + +#define emith_and_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(R5_LSLW_IMM(AT, s2, simm)); \ + EMIT(R5_AND_REG(d, s1, AT)); \ + } else EMIT(R5_AND_REG(d, s1, s2)); \ +} while (0) + +#define emith_or_r_r_lsl(d, s, lslimm) \ + emith_or_r_r_r_lsl(d, d, s, lslimm) +#define emith_or_r_r_lsr(d, s, lsrimm) \ + emith_or_r_r_r_lsr(d, d, s, lsrimm) + +#define emith_eor_r_r_lsl(d, s, lslimm) \ + emith_eor_r_r_r_lsl(d, d, s, lslimm) +#define emith_eor_r_r_lsr(d, s, lsrimm) \ + emith_eor_r_r_r_lsr(d, d, s, lsrimm) + +#define emith_add_r_r_r(d, s1, s2) \ + emith_add_r_r_r_lsl(d, s1, s2, 0) + +#define emith_addf_r_r_r_ptr(d, s1, s2) \ + emith_addf_r_r_r_lsl(d, s1, s2, 0) +#define emith_addf_r_r_r(d, s1, s2) \ + emith_addf_r_r_r_ptr(d, s1, s2) + +#define emith_sub_r_r_r(d, s1, s2) \ + emith_sub_r_r_r_lsl(d, s1, s2, 0) + +#define emith_subf_r_r_r(d, s1, s2) \ + emith_subf_r_r_r_lsl(d, s1, s2, 0) + +#define emith_or_r_r_r(d, s1, s2) \ + emith_or_r_r_r_lsl(d, s1, s2, 0) + +#define emith_eor_r_r_r(d, s1, s2) \ + emith_eor_r_r_r_lsl(d, s1, s2, 0) + +#define emith_and_r_r_r(d, s1, s2) \ + emith_and_r_r_r_lsl(d, s1, s2, 0) + +#define emith_add_r_r_ptr(d, s) \ + emith_add_r_r_r_lsl_ptr(d, d, s, 0) +#define emith_add_r_r(d, s) \ + emith_add_r_r_r(d, d, s) + +#define emith_sub_r_r(d, s) \ + emith_sub_r_r_r(d, d, s) + +#define emith_neg_r_r(d, s) \ + EMIT(R5_NEGW_REG(d, s)) + +#define emith_adc_r_r_r(d, s1, s2) do { \ + emith_add_r_r_r(AT, s2, FC); \ + emith_add_r_r_r(d, s1, AT); \ +} while (0) + +#define emith_sbc_r_r_r(d, s1, s2) do { \ + emith_add_r_r_r(AT, s2, FC); \ + emith_sub_r_r_r(d, s1, AT); \ +} while (0) + +#define emith_adc_r_r(d, s) \ + emith_adc_r_r_r(d, d, s) + +#define emith_negc_r_r(d, s) \ + emith_sbc_r_r_r(d, Z0, s) + +// NB: the incoming carry Cin can cause Cout if s2+Cin=0 (or s1+Cin=0 FWIW) +// moreover, if s2+Cin=0 caused Cout, s1+s2+Cin=s1+0 can't cause another Cout +#define emith_adcf_r_r_r(d, s1, s2) do { \ + emith_add_r_r_r(FNZ, s2, FC); \ + EMIT(R5_SLTU_REG(AT, FNZ, FC)); \ + emith_add_r_r_r(FNZ, s1, FNZ); \ + emith_set_arith_flags(d, s1, s2, 0, 0); \ + emith_or_r_r(FC, AT); \ +} while (0) + +#define emith_sbcf_r_r_r(d, s1, s2) do { \ + emith_add_r_r_r(FNZ, s2, FC); \ + EMIT(R5_SLTU_REG(AT, FNZ, FC)); \ + emith_sub_r_r_r(FNZ, s1, FNZ); \ + emith_set_arith_flags(d, s1, s2, 0, 1); \ + emith_or_r_r(FC, AT); \ +} while (0) + +#define emith_and_r_r(d, s) \ + emith_and_r_r_r(d, d, s) +#define emith_and_r_r_c(cond, d, s) \ + emith_and_r_r(d, s) + +#define emith_or_r_r(d, s) \ + emith_or_r_r_r(d, d, s) + +#define emith_eor_r_r(d, s) \ + emith_eor_r_r_r(d, d, s) + +#define emith_tst_r_r_ptr(d, s) do { \ + if (d != s) { \ + emith_and_r_r_r(FNZ, d, s); \ + emith_cmp_rs = emith_cmp_rt = -1; \ + } else emith_cmp_rs = s, emith_cmp_rt = Z0; \ +} while (0) +#define emith_tst_r_r(d, s) \ + emith_tst_r_r_ptr(d, s) + +#define emith_teq_r_r(d, s) do { \ + emith_eor_r_r_r(FNZ, d, s); \ + emith_cmp_rs = emith_cmp_rt = -1; \ +} while (0) + +#define emith_cmp_r_r(d, s) \ + emith_set_compare_flags(d, s, 0) +// emith_subf_r_r_r(FNZ, d, s) + +#define emith_addf_r_r(d, s) \ + emith_addf_r_r_r(d, d, s) + +#define emith_subf_r_r(d, s) \ + emith_subf_r_r_r(d, d, s) + +#define emith_adcf_r_r(d, s) \ + emith_adcf_r_r_r(d, d, s) + +#define emith_sbcf_r_r(d, s) \ + emith_sbcf_r_r_r(d, d, s) + +#define emith_negcf_r_r(d, s) \ + emith_sbcf_r_r_r(d, Z0, s) + + +// move immediate +static void emith_move_imm(int r, uintptr_t imm) +{ + u32 lui = imm + _CB(imm,1,11,12); + if (lui >> 12) { + // take out the effect of the sign extension of ADDI + EMIT(R5_MOVT_IMM(r, lui)); + if (imm & 0xfff) + EMIT(R5_ADD_IMM(r, r, imm)); + } else + EMIT(R5_ADD_IMM(r, Z0, imm)); +} + +#define emith_move_r_ptr_imm(r, imm) \ + emith_move_imm(r, (uintptr_t)(imm)) + +#define emith_move_r_imm(r, imm) \ + emith_move_imm(r, (u32)(imm)) +#define emith_move_r_imm_c(cond, r, imm) \ + emith_move_r_imm(r, imm) + +#define emith_move_r_imm_s8_patchable(r, imm) \ + EMIT(R5_ADD_IMM(r, Z0, (s8)(imm))) +#define emith_move_r_imm_s8_patch(ptr, imm) do { \ + u32 *ptr_ = (u32 *)ptr; \ + while ((*ptr_ & 0xff07f) != R5_ADD_IMM(Z0, Z0, 0)) ptr_++; \ + EMIT_PTR(ptr_, (*ptr_ & 0x000fffff) | ((u16)(s8)(imm)<<20)); \ +} while (0) + +// arithmetic/logical, immediate - R5 always takes a signed 12 bit immediate + +static void emith_op_imm(int f1, int rd, int rs, u32 imm) +{ + int op32 = (f1 == F1_ADD ? R5_OP32 : 0); + if ((imm + _CB(imm,1,11,12)) >> 12) { + emith_move_r_imm(AT, imm); + EMIT(R5_R_INSN(OP_REG^op32, f1&7,_, rd, rs, AT)); + } else if (imm + (f1 == F1_AND) || rd != rs) + EMIT(R5_I_INSN(OP_IMM^op32, f1&7, rd, rs, imm)); +} + +// arithmetic, immediate - can only be ADDI, since SUBI doesn't exist +#define emith_add_r_imm(r, imm) \ + emith_add_r_r_imm(r, r, imm) +#define emith_add_r_imm_c(cond, r, imm) \ + emith_add_r_imm(r, imm) + +#define emith_addf_r_imm(r, imm) \ + emith_addf_r_r_imm(r, imm) + +#define emith_sub_r_imm(r, imm) \ + emith_sub_r_r_imm(r, r, imm) +#define emith_sub_r_imm_c(cond, r, imm) \ + emith_sub_r_imm(r, imm) + +#define emith_subf_r_imm(r, imm) \ + emith_subf_r_r_imm(r, r, imm) + +#define emith_adc_r_imm(r, imm) \ + emith_adc_r_r_imm(r, r, imm); + +#define emith_adcf_r_imm(r, imm) \ + emith_adcf_r_r_imm(r, r, imm) + +#define emith_cmp_r_imm(r, imm) \ + emith_set_compare_flags(r, -1, imm) +// emith_subf_r_r_imm(FNZ, r, imm) + +#define emith_add_r_r_ptr_imm(d, s, imm) \ + emith_op_imm(F1_ADD|F2_ALT, d, s, imm) + +#define emith_add_r_r_imm(d, s, imm) \ + emith_op_imm(F1_ADD, d, s, imm) + +#define emith_addf_r_r_imm(d, s, imm) do { \ + emith_add_r_r_imm(FNZ, s, imm); \ + emith_set_arith_flags(d, s, -1, imm, 0); \ +} while (0) + +#define emith_adc_r_r_imm(d, s, imm) do { \ + emith_add_r_r_r(AT, s, FC); \ + emith_add_r_r_imm(d, AT, imm); \ +} while (0) + +#define emith_adcf_r_r_imm(d, s, imm) do { \ + if (imm == 0) { \ + emith_add_r_r_r(FNZ, s, FC); \ + emith_set_arith_flags(d, s, -1, 1, 0); \ + } else { \ + emith_add_r_r_r(FNZ, s, FC); \ + EMIT(R5_SLTU_REG(AT, FNZ, FC)); \ + emith_add_r_r_imm(FNZ, FNZ, imm); \ + emith_set_arith_flags(d, s, -1, imm, 0); \ + emith_or_r_r(FC, AT); \ + } \ +} while (0) + +// NB: no SUBI in R5, since ADDI takes a signed imm +#define emith_sub_r_r_imm(d, s, imm) \ + emith_add_r_r_imm(d, s, -(imm)) +#define emith_sub_r_r_imm_c(cond, d, s, imm) \ + emith_sub_r_r_imm(d, s, imm) + +#define emith_subf_r_r_imm(d, s, imm) do { \ + emith_sub_r_r_imm(FNZ, s, imm); \ + emith_set_arith_flags(d, s, -1, imm, 1); \ +} while (0) + +// logical, immediate +#define emith_and_r_imm(r, imm) \ + emith_op_imm(F1_AND, r, r, imm) + +#define emith_or_r_imm(r, imm) \ + emith_op_imm(F1_OR, r, r, imm) +#define emith_or_r_imm_c(cond, r, imm) \ + emith_or_r_imm(r, imm) + +#define emith_eor_r_imm_ptr(r, imm) \ + emith_op_imm(F1_XOR, r, r, imm) +#define emith_eor_r_imm_ptr_c(cond, r, imm) \ + emith_eor_r_imm_ptr(r, imm) + +#define emith_eor_r_imm(r, imm) \ + emith_eor_r_imm_ptr(r, imm) +#define emith_eor_r_imm_c(cond, r, imm) \ + emith_eor_r_imm(r, imm) + +/* NB: BIC #imm not available in R5; use AND #~imm instead */ +#define emith_bic_r_imm(r, imm) \ + emith_op_imm(F1_AND, r, r, ~(imm)) +#define emith_bic_r_imm_c(cond, r, imm) \ + emith_bic_r_imm(r, imm) + +#define emith_tst_r_imm(r, imm) do { \ + emith_op_imm(F1_AND, FNZ, r, imm); \ + emith_cmp_rs = emith_cmp_rt = -1; \ +} while (0) +#define emith_tst_r_imm_c(cond, r, imm) \ + emith_tst_r_imm(r, imm) + +#define emith_and_r_r_imm(d, s, imm) \ + emith_op_imm(F1_AND, d, s, imm) + +#define emith_or_r_r_imm(d, s, imm) \ + emith_op_imm(F1_OR, d, s, imm) + +#define emith_eor_r_r_imm(d, s, imm) \ + emith_op_imm(F1_XOR, d, s, imm) + +// shift +#define emith_lsl(d, s, cnt) \ + EMIT(R5_LSLW_IMM(d, s, cnt)) + +#define emith_lsr(d, s, cnt) \ + EMIT(R5_LSRW_IMM(d, s, cnt)) + +#define emith_asr(d, s, cnt) \ + EMIT(R5_ASRW_IMM(d, s, cnt)) + +#define emith_ror(d, s, cnt) do { \ + EMIT(R5_LSLW_IMM(AT, s, 32-(cnt))); \ + EMIT(R5_LSRW_IMM(d, s, cnt)); \ + EMIT(R5_OR_REG(d, d, AT)); \ +} while (0) +#define emith_ror_c(cond, d, s, cnt) \ + emith_ror(d, s, cnt) + +#define emith_rol(d, s, cnt) do { \ + EMIT(R5_LSRW_IMM(AT, s, 32-(cnt))); \ + EMIT(R5_LSLW_IMM(d, s, cnt)); \ + EMIT(R5_OR_REG(d, d, AT)); \ +} while (0) + +#define emith_rorc(d) do { \ + emith_lsr(d, d, 1); \ + emith_lsl(AT, FC, 31); \ + emith_or_r_r(d, AT); \ +} while (0) + +#define emith_rolc(d) do { \ + emith_lsl(d, d, 1); \ + emith_or_r_r(d, FC); \ +} while (0) + +// NB: all flag setting shifts make V undefined +#define emith_lslf(d, s, cnt) do { \ + int _s = s; \ + if ((cnt) > 1) { \ + emith_lsl(d, s, cnt-1); \ + _s = d; \ + } \ + if ((cnt) > 0) { \ + emith_lsr(FC, _s, 31); \ + emith_lsl(d, _s, 1); \ + } \ + emith_move_r_r(FNZ, d); \ + emith_cmp_rs = emith_cmp_rt = -1; \ +} while (0) + +#define emith_lsrf(d, s, cnt) do { \ + int _s = s; \ + if ((cnt) > 1) { \ + emith_lsr(d, s, cnt-1); \ + _s = d; \ + } \ + if ((cnt) > 0) { \ + emith_and_r_r_imm(FC, _s, 1); \ + emith_lsr(d, _s, 1); \ + } \ + emith_move_r_r(FNZ, d); \ + emith_cmp_rs = emith_cmp_rt = -1; \ +} while (0) + +#define emith_asrf(d, s, cnt) do { \ + int _s = s; \ + if ((cnt) > 1) { \ + emith_asr(d, s, cnt-1); \ + _s = d; \ + } \ + if ((cnt) > 0) { \ + emith_and_r_r_imm(FC, _s, 1); \ + emith_asr(d, _s, 1); \ + } \ + emith_move_r_r(FNZ, d); \ + emith_cmp_rs = emith_cmp_rt = -1; \ +} while (0) + +#define emith_rolf(d, s, cnt) do { \ + emith_rol(d, s, cnt); \ + emith_and_r_r_imm(FC, d, 1); \ + emith_move_r_r(FNZ, d); \ + emith_cmp_rs = emith_cmp_rt = -1; \ +} while (0) + +#define emith_rorf(d, s, cnt) do { \ + emith_ror(d, s, cnt); \ + emith_lsr(FC, d, 31); \ + emith_move_r_r(FNZ, d); \ + emith_cmp_rs = emith_cmp_rt = -1; \ +} while (0) + +#define emith_rolcf(d) do { \ + emith_lsr(AT, d, 31); \ + emith_lsl(d, d, 1); \ + emith_or_r_r(d, FC); \ + emith_move_r_r(FC, AT); \ + emith_move_r_r(FNZ, d); \ + emith_cmp_rs = emith_cmp_rt = -1; \ +} while (0) + +#define emith_rorcf(d) do { \ + emith_and_r_r_imm(AT, d, 1); \ + emith_lsr(d, d, 1); \ + emith_lsl(FC, FC, 31); \ + emith_or_r_r(d, FC); \ + emith_move_r_r(FC, AT); \ + emith_move_r_r(FNZ, d); \ + emith_cmp_rs = emith_cmp_rt = -1; \ +} while (0) + +// signed/unsigned extend + +#define emith_clear_msb(d, s, count) /* bits to clear */ do { \ + u32 t; \ + if ((count) >= 21) { \ + t = (count) - 21; \ + t = 0x7ff >> t; \ + emith_and_r_r_imm(d, s, t); \ + } else { \ + emith_lsl(d, s, count); \ + emith_lsr(d, d, count); \ + } \ +} while (0) +#define emith_clear_msb_c(cond, d, s, count) \ + emith_clear_msb(d, s, count) + +#define emith_sext(d, s, count) /* bits to keep */ do { \ + emith_lsl(d, s, 32-(count)); \ + emith_asr(d, d, 32-(count)); \ +} while (0) + +// multiply Rd = Rn*Rm (+ Ra) + +#define emith_mul(d, s1, s2) \ + EMIT(R5_MULW(d, s1, s2)) \ + +#define emith_mul_u64(dlo, dhi, s1, s2) \ + EMIT_R5_MULLU_REG(dlo, dhi, s1, s2) + +#define emith_mul_s64(dlo, dhi, s1, s2) \ + EMIT_R5_MULLS_REG(dlo, dhi, s1, s2) + +#define emith_mula_s64(dlo, dhi, s1, s2) do { \ + int t_ = rcache_get_tmp(); \ + EMIT_R5_MULLS_REG(t_, AT, s1, s2); \ + emith_add_r_r(dhi, AT); \ + emith_add_r_r(dlo, t_); \ + EMIT(R5_SLTU_REG(AT, dlo, t_)); \ + emith_add_r_r(dhi, AT); \ + rcache_free_tmp(t_); \ +} while (0) +#define emith_mula_s64_c(cond, dlo, dhi, s1, s2) \ + emith_mula_s64(dlo, dhi, s1, s2) + +// load/store. offs has 12 bits signed, hence larger offs may use a temp +static void emith_ld_offs(int sz, int rd, int rs, int o12) +{ + if (o12 >= -0x800 && o12 < 0x800) { + EMIT(R5_I_INSN(OP_LD, sz, rd, rs, o12)); + } else { + EMIT(R5_MOVT_IMM(AT, o12 + _CB(o12,1,11,12))); \ + EMIT(R5_R_INSN(OP_REG, F1_ADD,_, AT, rs, AT)); \ + EMIT(R5_I_INSN(OP_LD, sz, rd, AT, o12)); + } +} + +#define emith_read_r_r_offs_ptr(r, rs, offs) \ + emith_ld_offs(F1_P, r, rs, offs) +#define emith_read_r_r_offs_ptr_c(cond, r, rs, offs) \ + emith_read_r_r_offs_ptr(r, rs, offs) + +#define emith_read_r_r_offs(r, rs, offs) \ + emith_ld_offs(F1_W, r, rs, offs) +#define emith_read_r_r_offs_c(cond, r, rs, offs) \ + emith_read_r_r_offs(r, rs, offs) + +#define emith_read_r_r_r_ptr(r, rs, rm) do { \ + emith_add_r_r_r(AT, rs, rm); \ + emith_ld_offs(F1_P, r, AT, 0); \ +} while (0) +#define emith_read_r_r_r(r, rs, rm) do { \ + emith_add_r_r_r(AT, rs, rm); \ + emith_ld_offs(F1_W, r, AT, 0); \ +} while (0) +#define emith_read_r_r_r_c(cond, r, rs, rm) \ + emith_read_r_r_r(r, rs, rm) + +#define emith_read8_r_r_offs(r, rs, offs) \ + emith_ld_offs(F1_BU, r, rs, offs) +#define emith_read8_r_r_offs_c(cond, r, rs, offs) \ + emith_read8_r_r_offs(r, rs, offs) + +#define emith_read8_r_r_r(r, rs, rm) do { \ + emith_add_r_r_r(AT, rs, rm); \ + emith_ld_offs(F1_BU, r, AT, 0); \ +} while (0) +#define emith_read8_r_r_r_c(cond, r, rs, rm) \ + emith_read8_r_r_r(r, rs, rm) + +#define emith_read16_r_r_offs(r, rs, offs) \ + emith_ld_offs(F1_HU, r, rs, offs) +#define emith_read16_r_r_offs_c(cond, r, rs, offs) \ + emith_read16_r_r_offs(r, rs, offs) + +#define emith_read16_r_r_r(r, rs, rm) do { \ + emith_add_r_r_r(AT, rs, rm); \ + emith_ld_offs(F1_HU, r, AT, 0); \ +} while (0) +#define emith_read16_r_r_r_c(cond, r, rs, rm) \ + emith_read16_r_r_r(r, rs, rm) + +#define emith_read8s_r_r_offs(r, rs, offs) \ + emith_ld_offs(F1_B, r, rs, offs) +#define emith_read8s_r_r_offs_c(cond, r, rs, offs) \ + emith_read8s_r_r_offs(r, rs, offs) + +#define emith_read8s_r_r_r(r, rs, rm) do { \ + emith_add_r_r_r(AT, rs, rm); \ + emith_ld_offs(F1_B, r, AT, 0); \ +} while (0) +#define emith_read8s_r_r_r_c(cond, r, rs, rm) \ + emith_read8s_r_r_r(r, rs, rm) + +#define emith_read16s_r_r_offs(r, rs, offs) \ + emith_ld_offs(F1_H, r, rs, offs) +#define emith_read16s_r_r_offs_c(cond, r, rs, offs) \ + emith_read16s_r_r_offs(r, rs, offs) + +#define emith_read16s_r_r_r(r, rs, rm) do { \ + emith_add_r_r_r(AT, rs, rm); \ + emith_ld_offs(F1_H, r, AT, 0); \ +} while (0) +#define emith_read16s_r_r_r_c(cond, r, rs, rm) \ + emith_read16s_r_r_r(r, rs, rm) + +static void emith_st_offs(int sz, int rt, int rs, int o12) +{ + if (o12 >= -0x800 && o12 < 800) { + EMIT(R5_S_INSN(OP_ST, sz, rt, rs, o12)); + } else { + EMIT(R5_MOVT_IMM(AT, o12 + _CB(o12,1,11,12))); \ + EMIT(R5_R_INSN(OP_REG, F1_ADD,_, AT, rs, AT)); \ + EMIT(R5_S_INSN(OP_ST, sz, rt, AT, o12)); + } +} + +#define emith_write_r_r_offs_ptr(r, rs, offs) \ + emith_st_offs(F1_P, r, rs, offs) +#define emith_write_r_r_offs_ptr_c(cond, r, rs, offs) \ + emith_write_r_r_offs_ptr(r, rs, offs) + +#define emith_write_r_r_r_ptr(r, rs, rm) do { \ + emith_add_r_r_r(AT, rs, rm); \ + emith_st_offs(F1_P, r, AT, 0); \ +} while (0) +#define emith_write_r_r_r_ptr_c(cond, r, rs, rm) \ + emith_write_r_r_r_ptr(r, rs, rm) + +#define emith_write_r_r_offs(r, rs, offs) \ + emith_st_offs(F1_W, r, rs, offs) +#define emith_write_r_r_offs_c(cond, r, rs, offs) \ + emith_write_r_r_offs(r, rs, offs) + +#define emith_write_r_r_r(r, rs, rm) do { \ + emith_add_r_r_r(AT, rs, rm); \ + emith_st_offs(F1_W, r, AT, 0); \ +} while (0) +#define emith_write_r_r_r_c(cond, r, rs, rm) \ + emith_write_r_r_r(r, rs, rm) + +#define emith_ctx_read_ptr(r, offs) \ + emith_read_r_r_offs_ptr(r, CONTEXT_REG, offs) + +#define emith_ctx_read(r, offs) \ + emith_read_r_r_offs(r, CONTEXT_REG, offs) +#define emith_ctx_read_c(cond, r, offs) \ + emith_ctx_read(r, offs) + +#define emith_ctx_write_ptr(r, offs) \ + emith_write_r_r_offs_ptr(r, CONTEXT_REG, offs) + +#define emith_ctx_write(r, offs) \ + emith_write_r_r_offs(r, CONTEXT_REG, offs) + +#define emith_ctx_read_multiple(r, offs, cnt, tmpr) do { \ + int r_ = r, offs_ = offs, cnt_ = cnt; \ + for (; cnt_ > 0; r_++, offs_ += 4, cnt_--) \ + emith_ctx_read(r_, offs_); \ +} while (0) + +#define emith_ctx_write_multiple(r, offs, cnt, tmpr) do { \ + int r_ = r, offs_ = offs, cnt_ = cnt; \ + for (; cnt_ > 0; r_++, offs_ += 4, cnt_--) \ + emith_ctx_write(r_, offs_); \ +} while (0) + +// function call handling +#define emith_save_caller_regs(mask) do { \ + int _c; u32 _m = mask & 0x3fce0; /* x5-x7,x10-x17 */ \ + _c = count_bits(_m)&3; _m |= (1<<((4-_c)&3))-1; /* ABI align */ \ + int _s = count_bits(_m) * 4, _o = _s; \ + if (_s) emith_add_r_r_ptr_imm(SP, SP, -_s); \ + for (_c = HOST_REGS-1; _m && _c >= 0; _m &= ~(1 << _c), _c--) \ + if (_m & (1 << _c)) \ + { _o -= 4; if (_c) emith_write_r_r_offs(_c, SP, _o); } \ +} while (0) + +#define emith_restore_caller_regs(mask) do { \ + int _c; u32 _m = mask & 0x3fce0; \ + _c = count_bits(_m)&3; _m |= (1<<((4-_c)&3))-1; /* ABI align */ \ + int _s = count_bits(_m) * 4, _o = 0; \ + for (_c = 0; _m && _c < HOST_REGS; _m &= ~(1 << _c), _c++) \ + if (_m & (1 << _c)) \ + { if (_c) emith_read_r_r_offs(_c, SP, _o); _o += 4; } \ + if (_s) emith_add_r_r_ptr_imm(SP, SP, _s); \ +} while (0) + +#define host_arg2reg(rd, arg) \ + rd = (arg+10) + +#define emith_pass_arg_r(arg, reg) \ + emith_move_r_r(arg, reg) + +#define emith_pass_arg_imm(arg, imm) \ + emith_move_r_imm(arg, imm) + +// branching +#define emith_invert_branch(cond) /* inverted conditional branch */ \ + ((cond) ^ 0x01) + +// evaluate the emulated condition, returns a register/branch type pair +static int emith_cmpr_check(int rs, int rt, int cond, int *r, int *s) +{ + int b = -1; + + // condition check for comparing 2 registers + switch (cond) { + case DCOND_EQ: *r = rs; *s = rt; b = F1_BEQ; break; + case DCOND_NE: *r = rs; *s = rt; b = F1_BNE; break; + case DCOND_LO: *r = rs, *s = rt, b = F1_BLTU; break; // s < t, u + case DCOND_HS: *r = rs, *s = rt, b = F1_BGEU; break; // s >= t, u + case DCOND_LS: *r = rt, *s = rs, b = F1_BGEU; break; // s <= t, u + case DCOND_HI: *r = rt, *s = rs, b = F1_BLTU; break; // s > t, u + case DCOND_LT: *r = rs, *s = rt, b = F1_BLT; break; // s < t + case DCOND_GE: *r = rs, *s = rt, b = F1_BGE; break; // s >= t + case DCOND_LE: *r = rt, *s = rs, b = F1_BGE; break; // s <= t + case DCOND_GT: *r = rt, *s = rs, b = F1_BLT; break; // s > t + } + + return b; +} + +static int emith_cmpi_check(int rs, s32 imm, int cond, int *r, int *s) +{ + int b = -1; + + // condition check for comparing register with immediate + if (imm == 0) return emith_cmpr_check(rs, Z0, cond, r, s); + + emith_move_r_imm(AT, imm); + switch (cond) { + case DCOND_EQ: *r = AT, *s = rs, b = F1_BEQ; break; + case DCOND_NE: *r = AT, *s = rs, b = F1_BNE; break; + case DCOND_LO: *r = rs, *s = AT, b = F1_BLTU; break; // s < imm, u + case DCOND_HS: *r = rs, *s = AT, b = F1_BGEU; break; // s >= imm, u + case DCOND_LS: *r = AT, *s = rs, b = F1_BGEU; break; // s <= imm, u + case DCOND_HI: *r = AT, *s = rs, b = F1_BLTU; break; // s > imm, u + case DCOND_LT: *r = rs, *s = AT, b = F1_BLT; break; // s < imm + case DCOND_GE: *r = rs, *s = AT, b = F1_BGE; break; // s >= imm + case DCOND_LE: *r = AT, *s = rs, b = F1_BGE; break; // s <= imm + case DCOND_GT: *r = AT, *s = rs, b = F1_BLT; break; // s > imm + } + return b; +} + +static int emith_cond_check(int cond, int *r, int *s) +{ + int b = -1; + + *s = Z0; + if (emith_cmp_rs >= 0) { + if (emith_cmp_rt != -1) + b = emith_cmpr_check(emith_cmp_rs,emith_cmp_rt, cond,r,s); + else b = emith_cmpi_check(emith_cmp_rs,emith_cmp_imm,cond,r,s); + } + + // shortcut for V known to be 0 + if (b < 0 && emith_flg_noV) switch (cond) { + case DCOND_VS: *r = Z0; b = F1_BNE; break; // never + case DCOND_VC: *r = Z0; b = F1_BEQ; break; // always + case DCOND_LT: *r = FNZ, b = F1_BLT; break; // N + case DCOND_GE: *r = FNZ, b = F1_BGE; break; // !N + case DCOND_LE: *r = Z0, *s = FNZ, b = F1_BGE; break; // N || Z + case DCOND_GT: *r = Z0, *s = FNZ, b = F1_BLT; break; // !N && !Z + } + + // the full monty if no shortcut + if (b < 0) switch (cond) { + // conditions using NZ + case DCOND_EQ: *r = FNZ; b = F1_BEQ; break; // Z + case DCOND_NE: *r = FNZ; b = F1_BNE; break; // !Z + case DCOND_MI: *r = FNZ; b = F1_BLT; break; // N + case DCOND_PL: *r = FNZ; b = F1_BGE; break; // !N + // conditions using C + case DCOND_LO: *r = FC; b = F1_BNE; break; // C + case DCOND_HS: *r = FC; b = F1_BEQ; break; // !C + // conditions using CZ + case DCOND_LS: // C || Z + case DCOND_HI: // !C && !Z + EMIT(R5_ADD_IMM(AT, FC, -1)); // !C && !Z + EMIT(R5_AND_REG(AT, FNZ, AT)); + *r = AT, b = (cond == DCOND_HI ? F1_BNE : F1_BEQ); + break; + + // conditions using V + case DCOND_VS: // V + case DCOND_VC: // !V + EMIT(R5_XOR_REG(AT, FV, FNZ)); // V = Nt^Ns^Nd^C + EMIT(R5_LSRW_IMM(AT, AT, 31)); + EMIT(R5_XOR_REG(AT, AT, FC)); + *r = AT, b = (cond == DCOND_VS ? F1_BNE : F1_BEQ); + break; + // conditions using VNZ + case DCOND_LT: // N^V + case DCOND_GE: // !(N^V) + EMIT(R5_LSRW_IMM(AT, FV, 31)); // Nd^V = Nt^Ns^C + EMIT(R5_XOR_REG(AT, FC, AT)); + *r = AT, b = (cond == DCOND_LT ? F1_BNE : F1_BEQ); + break; + case DCOND_LE: // (N^V) || Z + case DCOND_GT: // !(N^V) && !Z + EMIT(R5_LSRW_IMM(AT, FV, 31)); // Nd^V = Nt^Ns^C + EMIT(R5_XOR_REG(AT, FC, AT)); + EMIT(R5_ADD_IMM(AT, AT, -1)); // !(Nd^V) && !Z + EMIT(R5_AND_REG(AT, FNZ, AT)); + *r = AT, b = (cond == DCOND_GT ? F1_BNE : F1_BEQ); + break; + } + return b; +} + +// NB: R5 unconditional jumps have only +/- 1MB range, hence use reg jumps +#define emith_jump(target) do { \ + uintptr_t target_ = (uintptr_t)(target); \ + EMIT(R5_MOVT_IMM(AT, target_ + _CB(target_,1,11,12))); \ + EMIT(R5_JR(AT, target_)); \ +} while (0) +#define emith_jump_patchable(target) \ + emith_jump(target) + +// NB: R5 conditional branches have only +/- 4KB range +#define emith_jump_cond(cond, target) do { \ + int r_, s_, mcond_ = emith_cond_check(cond, &r_, &s_); \ + u32 disp_ = (u8 *)target - (u8 *)tcache_ptr; \ + EMIT(R5_BCOND(mcond_,r_,s_,disp_ & 0x00001fff)); \ +} while (0) +#define emith_jump_cond_patchable(cond, target) \ + emith_jump_cond(cond, target) + +#define emith_jump_cond_inrange(target) \ + ((u8 *)target - (u8 *)tcache_ptr < 0x1000 && \ + (u8 *)target - (u8 *)tcache_ptr >= -0x1000+0x10) // mind cond_check + +// NB: returns position of patch for cache maintenance +#define emith_jump_patch(ptr, target, pos) do { \ + u32 *ptr_ = (u32 *)ptr; /* must skip condition check code */ \ + while ((*ptr_&0x77) != OP_JALR && (*ptr_&0x77) != OP_BCOND) ptr_ ++; \ + if ((*ptr_&0x77) == OP_BCOND) { \ + u32 *p_ = ptr_, disp_ = (u8 *)target - (u8 *)ptr_; \ + u32 f1_ = _CB(*ptr_,3,12,0); \ + u32 r_ = _CB(*ptr_,5,15,0), s_ = _CB(*ptr_,5,20,0); \ + EMIT_PTR(p_, R5_BCOND(f1_, r_, s_, disp_ & 0x00001fff)); \ + } else { \ + u32 *p_ = -- ptr_; \ + uintptr_t target_ = (uintptr_t)(target); \ + EMIT_PTR(p_, R5_MOVT_IMM(AT, target_ + _CB(target_,1,11,12))); \ + EMIT_PTR(p_, R5_JR(AT, target_)); \ + } \ + if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)(ptr_); \ +} while (0) + +#define emith_jump_patch_inrange(ptr, target) \ + ((u8 *)target - (u8 *)ptr < 0x1000 && \ + (u8 *)target - (u8 *)ptr >= -0x1000+0x10) // mind cond_check +#define emith_jump_patch_size() 8 + +#define emith_jump_at(ptr, target) do { \ + uintptr_t target_ = (uintptr_t)(target); \ + u32 *ptr_ = (u32 *)ptr; \ + EMIT_PTR(ptr_, R5_MOVT_IMM(AT, target_ + _CB(target_,1,11,12))); \ + EMIT_PTR(ptr_, R5_JR(AT, target_)); \ +} while (0) +#define emith_jump_at_size() 8 + +#define emith_jump_reg(r) \ + EMIT(R5_JR(r, 0)) +#define emith_jump_reg_c(cond, r) \ + emith_jump_reg(r) + +#define emith_jump_ctx(offs) do { \ + emith_ctx_read_ptr(AT, offs); \ + emith_jump_reg(AT); \ +} while (0) +#define emith_jump_ctx_c(cond, offs) \ + emith_jump_ctx(offs) + +#define emith_call(target) do { \ + uintptr_t target_ = (uintptr_t)(target); \ + EMIT(R5_MOVT_IMM(AT, target_ + _CB(target_,1,11,12))); \ + EMIT(R5_JALR(LR, AT, target_)); \ +} while (0) +#define emith_call_cond(cond, target) \ + emith_call(target) + +#define emith_call_reg(r) \ + EMIT(R5_JALR(LR, r, 0)) + +#define emith_call_ctx(offs) do { \ + emith_ctx_read_ptr(AT, offs); \ + emith_call_reg(AT); \ +} while (0) + +#define emith_call_cleanup() /**/ + +#define emith_ret() \ + EMIT(R5_JR(LR, 0)) +#define emith_ret_c(cond) \ + emith_ret() + +#define emith_ret_to_ctx(offs) \ + emith_ctx_write_ptr(LR, offs) + +#define emith_add_r_ret(r) \ + emith_add_r_r_ptr(r, LR) + +#define emith_push_ret(r) do { \ + emith_add_r_r_ptr_imm(SP, SP, -16); /* ABI requires 16 byte aligment */\ + emith_write_r_r_offs(LR, SP, 4); \ + if ((r) > 0) emith_write_r_r_offs(r, SP, 0); \ +} while (0) + +#define emith_pop_and_ret(r) do { \ + if ((r) > 0) emith_read_r_r_offs(r, SP, 0); \ + emith_read_r_r_offs(LR, SP, 4); \ + emith_add_r_r_ptr_imm(SP, SP, 16); \ + emith_ret(); \ +} while (0) + + +// emitter ABI stuff +#define emith_pool_check() /**/ +#define emith_pool_commit(j) /**/ +#define emith_insn_ptr() ((u8 *)tcache_ptr) +#define emith_flush() /**/ +#define host_instructions_updated(base, end) __builtin___clear_cache(base, end) +#define emith_update_cache() /**/ +#define emith_rw_offs_max() 0x7ff + +// SH2 drc specific +#define emith_sh2_drc_entry() do { \ + int _c; u32 _m = 0x0ffc0202; /* x1,x9,x18-x27 */ \ + _c = count_bits(_m)&3; _m |= (1<<((4-_c)&3))-1; /* ABI align */ \ + int _s = count_bits(_m) * 4, _o = _s; \ + if (_s) emith_add_r_r_ptr_imm(SP, SP, -_s); \ + for (_c = HOST_REGS-1; _m && _c >= 0; _m &= ~(1 << _c), _c--) \ + if (_m & (1 << _c)) \ + { _o -= 4; if (_c) emith_write_r_r_offs(_c, SP, _o); } \ +} while (0) +#define emith_sh2_drc_exit() do { \ + int _c; u32 _m = 0x0ffc0202; \ + _c = count_bits(_m)&3; _m |= (1<<((4-_c)&3))-1; /* ABI align */ \ + int _s = count_bits(_m) * 4, _o = 0; \ + for (_c = 0; _m && _c < HOST_REGS; _m &= ~(1 << _c), _c++) \ + if (_m & (1 << _c)) \ + { if (_c) emith_read_r_r_offs(_c, SP, _o); _o += 4; } \ + if (_s) emith_add_r_r_ptr_imm(SP, SP, _s); \ + emith_ret(); \ +} while (0) + +// NB: assumes a is in arg0, tab, func and mask are temp +#define emith_sh2_rcall(a, tab, func, mask) do { \ + emith_lsr(mask, a, SH2_READ_SHIFT); \ + emith_add_r_r_r_lsl_ptr(tab, tab, mask, PTR_SCALE+1); \ + emith_read_r_r_offs_ptr(func, tab, 0); \ + emith_read_r_r_offs(mask, tab, 1 << PTR_SCALE); \ + emith_addf_r_r_r_ptr(func, func, func); \ +} while (0) + +// NB: assumes a, val are in arg0 and arg1, tab and func are temp +#define emith_sh2_wcall(a, val, tab, func) do { \ + emith_lsr(func, a, SH2_WRITE_SHIFT); \ + emith_lsl(func, func, PTR_SCALE); \ + emith_read_r_r_r_ptr(func, tab, func); \ + emith_move_r_r_ptr(12, CONTEXT_REG); /* arg2 */ \ + emith_jump_reg(func); \ +} while (0) + +#define emith_sh2_delay_loop(cycles, reg) do { \ + int sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); \ + int t1 = rcache_get_tmp(); \ + int t2 = rcache_get_tmp(); \ + int t3 = rcache_get_tmp(); \ + /* if (sr < 0) return */ \ + emith_cmp_r_imm(sr, 0); \ + EMITH_JMP_START(DCOND_LE); \ + /* turns = sr.cycles / cycles */ \ + emith_asr(t2, sr, 12); \ + emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)) + 1); \ + emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */ \ + rcache_free_tmp(t3); \ + if (reg >= 0) { \ + /* if (reg <= turns) turns = reg-1 */ \ + t3 = rcache_get_reg(reg, RC_GR_RMW, NULL); \ + emith_cmp_r_r(t3, t2); \ + EMITH_SJMP_START(DCOND_HI); \ + emith_sub_r_r_imm_c(DCOND_LS, t2, t3, 1); \ + EMITH_SJMP_END(DCOND_HI); \ + /* if (reg <= 1) turns = 0 */ \ + emith_cmp_r_imm(t3, 1); \ + EMITH_SJMP_START(DCOND_HI); \ + emith_move_r_imm_c(DCOND_LS, t2, 0); \ + EMITH_SJMP_END(DCOND_HI); \ + /* reg -= turns */ \ + emith_sub_r_r(t3, t2); \ + } \ + /* sr.cycles -= turns * cycles; */ \ + emith_move_r_imm(t1, cycles); \ + emith_mul(t1, t2, t1); \ + emith_sub_r_r_r_lsl(sr, sr, t1, 12); \ + EMITH_JMP_END(DCOND_LE); \ + rcache_free_tmp(t1); \ + rcache_free_tmp(t2); \ +} while (0) + +/* + * if Q + * t = carry(Rn += Rm) + * else + * t = carry(Rn -= Rm) + * T ^= t + */ +#define emith_sh2_div1_step(rn, rm, sr) do { \ + emith_tst_r_imm(sr, Q); /* if (Q ^ M) */ \ + EMITH_JMP3_START(DCOND_EQ); \ + EMITH_HINT_COND(DCOND_CS); \ + emith_addf_r_r(rn, rm); \ + EMITH_JMP3_MID(DCOND_EQ); \ + EMITH_HINT_COND(DCOND_CS); \ + emith_subf_r_r(rn, rm); \ + EMITH_JMP3_END(); \ + emith_eor_r_r(sr, FC); \ +} while (0) + +/* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ +#define emith_sh2_macl(ml, mh, rn, rm, sr) do { \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* MACH top 16 bits unused if saturated. sign ext for overfl detect */ \ + emith_sext(mh, mh, 16); \ + EMITH_SJMP_END(DCOND_EQ); \ + emith_mula_s64(ml, mh, rn, rm); \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* overflow if top 17 bits of MACH aren't all 1 or 0 */ \ + /* to check: add MACH >> 31 to MACH >> 15. this is 0 if no overflow */ \ + emith_asr(rn, mh, 15); \ + emith_add_r_r_r_lsr(rn, rn, mh, 31); /* sum = (MACH>>31)+(MACH>>15) */ \ + emith_teq_r_r(rn, Z0); /* (need only N and Z flags) */ \ + EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> ov */ \ + emith_move_r_imm_c(DCOND_NE, ml, 0x0000); /* -overflow */ \ + emith_move_r_imm_c(DCOND_NE, mh, 0x8000); \ + EMITH_SJMP_START(DCOND_PL); /* sum > 0 -> +ovl */ \ + emith_sub_r_imm_c(DCOND_MI, ml, 1); /* 0xffffffff */ \ + emith_sub_r_imm_c(DCOND_MI, mh, 1); /* 0x00007fff */ \ + EMITH_SJMP_END(DCOND_PL); \ + EMITH_SJMP_END(DCOND_EQ); \ + EMITH_SJMP_END(DCOND_EQ); \ +} while (0) + +/* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ +#define emith_sh2_macw(ml, mh, rn, rm, sr) do { \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* XXX: MACH should be untouched when S is set? */ \ + emith_asr(mh, ml, 31); /* sign ext MACL to MACH for ovrfl check */ \ + EMITH_SJMP_END(DCOND_EQ); \ + emith_mula_s64(ml, mh, rn, rm); \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* overflow if top 33 bits of MACH:MACL aren't all 1 or 0 */ \ + /* to check: add MACL[31] to MACH. this is 0 if no overflow */ \ + emith_lsr(rn, ml, 31); \ + emith_add_r_r(rn, mh); /* sum = MACH + ((MACL>>31)&1) */ \ + emith_teq_r_r(rn, Z0); /* (need only N and Z flags) */ \ + EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \ + /* XXX: LSB signalling only in SH1, or in SH2 too? */ \ + emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \ + emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* negative ovrfl */ \ + EMITH_SJMP_START(DCOND_PL); /* sum > 0 -> positive ovrfl */ \ + emith_sub_r_imm_c(DCOND_MI, ml, 1); /* 0x7fffffff */ \ + EMITH_SJMP_END(DCOND_PL); \ + EMITH_SJMP_END(DCOND_EQ); \ + EMITH_SJMP_END(DCOND_EQ); \ +} while (0) + +#define emith_write_sr(sr, srcr) do { \ + emith_lsr(sr, sr , 10); emith_lsl(sr, sr, 10); \ + emith_lsl(AT, srcr, 22); emith_lsr(AT, AT, 22); \ + emith_or_r_r(sr, AT); \ +} while (0) + +#define emith_carry_to_t(sr, is_sub) do { \ + emith_and_r_imm(sr, 0xfffffffe); \ + emith_or_r_r(sr, FC); \ +} while (0) + +#define emith_t_to_carry(sr, is_sub) do { \ + emith_and_r_r_imm(FC, sr, 1); \ +} while (0) + +#define emith_tpop_carry(sr, is_sub) do { \ + emith_and_r_r_imm(FC, sr, 1); \ + emith_eor_r_r(sr, FC); \ +} while (0) + +#define emith_tpush_carry(sr, is_sub) \ + emith_or_r_r(sr, FC) + +#ifdef T +// T bit handling +#define emith_invert_cond(cond) \ + ((cond) ^ 1) + +static void emith_clr_t_cond(int sr) +{ + emith_bic_r_imm(sr, T); +} + +static void emith_set_t_cond(int sr, int cond) +{ + int b, r, s; + u8 *ptr; + u32 val = 0, inv = 0; + + // try to avoid jumping around if possible + if (emith_cmp_rs >= 0) { + if (emith_cmp_rt >= 0) + b = emith_cmpr_check(emith_cmp_rs, emith_cmp_rt, cond, &r, &s); + else + b = emith_cmpi_check(emith_cmp_rs, emith_cmp_imm, cond, &r, &s); + } else { + b = emith_cond_check(cond, &r, &s); + if (r == Z0) { + if (b == F1_BEQ || b == F1_BGE || b == F1_BGEU) + emith_or_r_imm(sr, T); + return; + } else if (r == FC) + val++, inv = (b == F1_BEQ); + } + + if (!val) switch (b) { + case F1_BEQ: if (s == Z0) { EMIT(R5_SLTU_IMM(AT,r ,1)); r=AT; val++; break; } + EMIT(R5_XOR_REG(AT, r, s)); + EMIT(R5_SLTU_IMM(AT,AT, 1)); r=AT; val++; break; + case F1_BNE: if (s == Z0) { EMIT(R5_SLTU_IMM(AT,Z0,r)); r=AT; val++; break; } + EMIT(R5_XOR_REG(AT, r, s)); + EMIT(R5_SLTU_IMM(AT,Z0,AT)); r=AT; val++; break; + case F1_BLTU: EMIT(R5_SLTU_REG(AT, r, s)); r=AT; val++; break; + case F1_BGEU: EMIT(R5_SLTU_REG(AT, r, s)); r=AT; val++; inv++; break; + case F1_BLT: EMIT(R5_SLT_REG(AT, r, s)); r=AT; val++; break; + case F1_BGE: EMIT(R5_SLT_REG(AT, r, s)); r=AT; val++; inv++; break; + } + if (val) { + emith_or_r_r(sr, r); + if (inv) + emith_eor_r_imm(sr, T); + return; + } + + // can't obtain result directly, use presumably slower jump !cond + or sr,T + b = emith_invert_branch(b); + ptr = tcache_ptr; + EMIT(R5_BCOND(b, r, s, 0)); + emith_or_r_imm(sr, T); + val = (u8 *)tcache_ptr - (u8 *)(ptr); + EMIT_PTR(ptr, R5_BCOND(b, r, s, val & 0x00001fff)); +} + +#define emith_get_t_cond() -1 + +#define emith_sync_t(sr) ((void)sr) + +#define emith_invalidate_t() + +static void emith_set_t(int sr, int val) +{ + if (val) + emith_or_r_imm(sr, T); + else + emith_bic_r_imm(sr, T); +} + +static int emith_tst_t(int sr, int tf) +{ + emith_tst_r_imm(sr, T); + return tf ? DCOND_NE: DCOND_EQ; +} +#endif From cf0dd6ae486908cb9b1ee6a1bcb48e163cf9bcee Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 19 Nov 2019 21:56:50 +0100 Subject: [PATCH 0239/1110] sh2 drc, improved memory management --- cpu/sh2/compiler.c | 376 +++++++++++++++++++++++++++------------------ 1 file changed, 223 insertions(+), 153 deletions(-) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index d1cde69e..58ddd86f 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -262,27 +262,21 @@ static void REGPARM(3) *sh2_drc_log_entry(void *block, SH2 *sh2, u32 sr) } #endif -#define TCACHE_BUFFERS 3 // we have 3 translation cache buffers, split from one drc/cmn buffer. // BIOS shares tcache with data array because it's only used for init // and can be discarded early -// XXX: need to tune sizes -static const int tcache_sizes[TCACHE_BUFFERS] = { - DRC_TCACHE_SIZE * 30 / 32, // ROM (rarely used), DRAM - DRC_TCACHE_SIZE / 32, // BIOS, data array in master sh2 - DRC_TCACHE_SIZE / 32, // ... slave +#define TCACHE_BUFFERS 3 + + +struct ring_buffer { + u8 *base; // ring buffer memory + unsigned item_sz; // size of one buffer item + unsigned size; // number of itmes in ring + int first, next; // read and write pointers + int used; // number of used items in ring }; -static u8 *tcache_bases[TCACHE_BUFFERS]; -static u8 *tcache_ptrs[TCACHE_BUFFERS]; -static u8 *tcache_limit[TCACHE_BUFFERS]; - -// ptr for code emiters -static u8 *tcache_ptr; - -#define MAX_BLOCK_ENTRIES (BLOCK_INSN_LIMIT / 6) - enum { BL_JMP=1, BL_LDJMP, BL_JCCBLX }; struct block_link { short tcache_id; @@ -326,13 +320,35 @@ struct block_desc { int refcount; #endif int entry_count; - struct block_entry entryp[MAX_BLOCK_ENTRIES]; + struct block_entry *entryp; +}; + +struct block_list { + struct block_desc *block; // block reference + struct block_list *next; // pointers for doubly linked list + struct block_list *prev; + struct block_list **head; // list head (for removing from list) + struct block_list *l_next; +}; + +static u8 *tcache_ptr; // ptr for code emitters + +// XXX: need to tune sizes + +static struct ring_buffer tcache_ring[TCACHE_BUFFERS]; +static const int tcache_sizes[TCACHE_BUFFERS] = { + DRC_TCACHE_SIZE * 30 / 32, // ROM (rarely used), DRAM + DRC_TCACHE_SIZE / 32, // BIOS, data array in master sh2 + DRC_TCACHE_SIZE / 32, // ... slave }; #define BLOCK_MAX_COUNT(tcid) ((tcid) ? 256 : 32*256) +static struct ring_buffer block_ring[TCACHE_BUFFERS]; static struct block_desc *block_tables[TCACHE_BUFFERS]; -static int block_counts[TCACHE_BUFFERS]; -static int block_limit[TCACHE_BUFFERS]; + +#define ENTRY_MAX_COUNT(tcid) ((tcid) ? 8*512 : 256*512) +static struct ring_buffer entry_ring[TCACHE_BUFFERS]; +static struct block_entry *entry_tables[TCACHE_BUFFERS]; // we have block_link_pool to avoid using mallocs #define BLOCK_LINK_MAX_COUNT(tcid) ((tcid) ? 512 : 32*512) @@ -345,15 +361,6 @@ static struct block_link *blink_free[TCACHE_BUFFERS]; #define RAM_SIZE(tcid) ((tcid) ? 0x1000 : 0x40000) #define INVAL_PAGE_SIZE 0x100 -struct block_list { - struct block_desc *block; - struct block_list *next; - struct block_list *prev; - struct block_list **head; - struct block_list *l_next; -}; -struct block_list *blist_free; - static struct block_list *inactive_blocks[TCACHE_BUFFERS]; // array of pointers to block_lists for RAM and 2 data arrays @@ -366,6 +373,11 @@ static struct block_entry **hash_tables[TCACHE_BUFFERS]; #define HASH_FUNC(hash_tab, addr, mask) \ (hash_tab)[((addr) >> 1) & (mask)] +#define BLOCK_LIST_MAX_COUNT (64*1024) +static struct block_list *block_list_pool; +static int block_list_pool_count; +static struct block_list *blist_free; + #if (DRC_DEBUG & 128) #if BRANCH_CACHE int bchit, bcmiss; @@ -429,7 +441,7 @@ static void rcache_free_tmp(int hr); // there must be at least the free (not context or statically mapped) amount of // PRESERVED/TEMPORARY registers used by handlers in worst case (currently 4). // there must be at least 3 PARAM, and PARAM+TEMPORARY must be at least 4. -// SR and R0 should by all means be statically mapped. +// SR must and R0 should by all means be statically mapped. // XXX the static definition of SR MUST match that in compiler.h // PC and PR must not be statically mapped (accessed in context by utils). @@ -544,6 +556,72 @@ static struct block_entry *dr_get_entry(u32 pc, int is_slave, int *tcache_id) // --------------------------------------------------------------- +// ring buffer management +#define RING_INIT(r,m,n) *(r) = (struct ring_buffer) { .base = (u8 *)m, \ + .item_sz = sizeof(*(m)), .size = n }; + +static void *ring_alloc(struct ring_buffer *rb, int count) +{ + // allocate space in ring buffer + void *p; + + p = rb->base + rb->next * rb->item_sz; + if (rb->next+count > rb->size) { + rb->used += rb->size - rb->next; + p = rb->base; // wrap if overflow at end + rb->next = count; + } else { + rb->next += count; + if (rb->next == rb->size) rb->next = 0; + } + + rb->used += count; + return p; +} + +static void ring_wrap(struct ring_buffer *rb) +{ + // insufficient space at end of buffer memory, wrap around + rb->used += rb->size - rb->next; + rb->next = 0; +} + +static void ring_free(struct ring_buffer *rb, int count) +{ + // free oldest space in ring buffer + rb->first += count; + if (rb->first >= rb->size) rb->first -= rb->size; + + rb->used -= count; +} + +static void ring_free_p(struct ring_buffer *rb, void *p) +{ + // free ring buffer space upto given pointer + rb->first = ((u8 *)p - rb->base) / rb->item_sz; + + rb->used = rb->next - rb->first; + if (rb->used < 0) rb->used += rb->size; +} + +static void *ring_reset(struct ring_buffer *rb) +{ + // reset to initial state + rb->first = rb->next = rb->used = 0; + return rb->base + rb->next * rb->item_sz; +} + +static void *ring_first(struct ring_buffer *rb) +{ + return rb->base + rb->first * rb->item_sz; +} + +static void *ring_next(struct ring_buffer *rb) +{ + return rb->base + rb->next * rb->item_sz; +} + + // block management static void add_to_block_list(struct block_list **blist, struct block_desc *block) { @@ -552,13 +630,14 @@ static void add_to_block_list(struct block_list **blist, struct block_desc *bloc if (blist_free) { added = blist_free; blist_free = added->next; + } else if (block_list_pool_count >= BLOCK_LIST_MAX_COUNT) { + printf( "block list overflow\n"); + exit(1); } else { - added = malloc(sizeof(*added)); - } - if (!added) { - elprintf(EL_ANOMALY, "drc OOM (1)"); - return; + added = block_list_pool + block_list_pool_count; + block_list_pool_count ++; } + added->block = block; added->l_next = block->list; block->list = added; @@ -954,6 +1033,7 @@ static void dr_rm_block_entry(struct block_desc *bd, int tcache_id, u32 nolit, i rm_from_block_lists(bd); bd->addr = bd->size = bd->addr_lit = bd->size_lit = 0; bd->entry_count = 0; + bd->entryp = NULL; } emith_update_cache(); } @@ -976,26 +1056,28 @@ static struct block_desc *dr_find_inactive_block(int tcache_id, u16 crc, return NULL; } -static struct block_desc *dr_add_block(u32 addr, int size, +static struct block_desc *dr_add_block(int entries, u32 addr, int size, u32 addr_lit, int size_lit, u16 crc, int is_slave, int *blk_id) { struct block_entry *be; struct block_desc *bd; int tcache_id; - int *bcount; // do a lookup to get tcache_id and override check be = dr_get_entry(addr, is_slave, &tcache_id); if (be != NULL) dbg(1, "block override for %08x", addr); - bcount = &block_counts[tcache_id]; - if (*bcount == block_limit[tcache_id]) { + if (block_ring[tcache_id].used + 1 > block_ring[tcache_id].size || + entry_ring[tcache_id].used + entries > entry_ring[tcache_id].size) { dbg(1, "bd overflow for tcache %d", tcache_id); return NULL; } - bd = &block_tables[tcache_id][*bcount]; + *blk_id = block_ring[tcache_id].next; + bd = ring_alloc(&block_ring[tcache_id], 1); + bd->entryp = ring_alloc(&entry_ring[tcache_id], entries); + bd->addr = addr; bd->size = size; bd->addr_lit = addr_lit; @@ -1009,11 +1091,6 @@ static struct block_desc *dr_add_block(u32 addr, int size, bd->refcount = 0; #endif - *blk_id = *bcount; - (*bcount)++; - if (*bcount >= BLOCK_MAX_COUNT(tcache_id)) - *bcount = 0; - return bd; } @@ -1094,45 +1171,54 @@ static void REGPARM(3) *dr_lookup_block(u32 pc, SH2 *sh2, int *tcache_id) static void dr_free_oldest_block(int tcache_id) { - struct block_desc *bd; + struct block_desc *bf; - if (block_limit[tcache_id] >= BLOCK_MAX_COUNT(tcache_id)) { - // block desc wrap around - block_limit[tcache_id] = 0; + bf = ring_first(&block_ring[tcache_id]); + if (bf->addr && bf->entry_count) + dr_rm_block_entry(bf, tcache_id, 0, 1); + ring_free(&block_ring[tcache_id], 1); + + if (block_ring[tcache_id].used) { + bf = ring_first(&block_ring[tcache_id]); + ring_free_p(&entry_ring[tcache_id], bf->entryp); + ring_free_p(&tcache_ring[tcache_id], bf->tcache_ptr); + } else { + // reset since size of code block isn't known if no successor block exists + ring_reset(&block_ring[tcache_id]); + ring_reset(&entry_ring[tcache_id]); + ring_reset(&tcache_ring[tcache_id]); } - bd = &block_tables[tcache_id][block_limit[tcache_id]]; - - if (bd->tcache_ptr && bd->tcache_ptr < tcache_ptrs[tcache_id]) { - // cache wrap around - tcache_ptrs[tcache_id] = bd->tcache_ptr; - } - - if (bd->addr && bd->entry_count) - dr_rm_block_entry(bd, tcache_id, 0, 1); - - block_limit[tcache_id]++; - if (block_limit[tcache_id] >= BLOCK_MAX_COUNT(tcache_id)) - block_limit[tcache_id] = 0; - bd = &block_tables[tcache_id][block_limit[tcache_id]]; - if (bd->tcache_ptr >= tcache_ptrs[tcache_id]) - tcache_limit[tcache_id] = bd->tcache_ptr; - else - tcache_limit[tcache_id] = tcache_bases[tcache_id] + tcache_sizes[tcache_id]; } -static u8 *dr_prepare_cache(int tcache_id, int insn_count) +static inline void dr_reserve_cache(int tcache_id, struct ring_buffer *rb, int count) { - u8 *limit = tcache_limit[tcache_id]; - - // if no block desc available - if (block_counts[tcache_id] == block_limit[tcache_id]) + // while not enough space available + if (rb->next + count >= rb->size){ + // not enough space in rest of buffer -> wrap around + while (rb->first >= rb->next && rb->used) + dr_free_oldest_block(tcache_id); + if (rb->first == 0 && rb->used) + dr_free_oldest_block(tcache_id); + ring_wrap(rb); + } + while (rb->first >= rb->next && rb->next + count > rb->first && rb->used) dr_free_oldest_block(tcache_id); +} - // while not enough cache space left (limit - tcache_ptr < max space needed) - while (tcache_limit[tcache_id] - tcache_ptrs[tcache_id] < insn_count * 128) +static u8 *dr_prepare_cache(int tcache_id, int insn_count, int entry_count) +{ + int bf = block_ring[tcache_id].first; + + // reserve one block desc + if (block_ring[tcache_id].used >= block_ring[tcache_id].size) dr_free_oldest_block(tcache_id); + // reserve block entries + dr_reserve_cache(tcache_id, &entry_ring[tcache_id], entry_count); + // reserve cache space + dr_reserve_cache(tcache_id, &tcache_ring[tcache_id], insn_count*128); - if (limit != tcache_limit[tcache_id]) { + if (bf != block_ring[tcache_id].first) { + // deleted some block(s), clear branch cache and return stack #if BRANCH_CACHE if (tcache_id) memset32(sh2s[tcache_id-1].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4); @@ -1152,29 +1238,27 @@ static u8 *dr_prepare_cache(int tcache_id, int insn_count) } #endif } - return (u8 *)tcache_ptrs[tcache_id]; + + return ring_next(&tcache_ring[tcache_id]); } static void dr_flush_tcache(int tcid) { int i; #if (DRC_DEBUG & 1) - int tc_used, bl_used; - - tc_used = tcache_sizes[tcid] - (tcache_limit[tcid] - tcache_ptrs[tcid]); - bl_used = BLOCK_MAX_COUNT(tcid) - (block_limit[tcid] - block_counts[tcid]); - elprintf(EL_STATUS, "tcache #%d flush! (%d/%d, bds %d/%d)", tcid, tc_used, - tcache_sizes[tcid], bl_used, BLOCK_MAX_COUNT(tcid)); + elprintf(EL_STATUS, "tcache #%d flush! (%d/%d, bds %d/%d bes %d/%d)", tcid, + tcache_ring[tcid].used, tcache_ring[tcid].size, block_ring[tcid].used, + block_ring[tcid].size, entry_ring[tcid].used, entry_ring[tcid].size); #endif - block_counts[tcid] = 0; - block_limit[tcid] = BLOCK_MAX_COUNT(tcid) - 1; + ring_reset(&tcache_ring[tcid]); + ring_reset(&block_ring[tcid]); + ring_reset(&entry_ring[tcid]); + block_link_pool_counts[tcid] = 0; blink_free[tcid] = NULL; memset(unresolved_links[tcid], 0, sizeof(*unresolved_links[0]) * HASH_TABLE_SIZE(tcid)); memset(hash_tables[tcid], 0, sizeof(*hash_tables[0]) * HASH_TABLE_SIZE(tcid)); - tcache_ptrs[tcid] = tcache_bases[tcid]; - tcache_limit[tcid] = tcache_bases[tcid] + tcache_sizes[tcid]; if (Pico32xMem->sdram != NULL) { if (tcid == 0) { // ROM, RAM memset(Pico32xMem->drcblk_ram, 0, sizeof(Pico32xMem->drcblk_ram)); @@ -1195,7 +1279,7 @@ static void dr_flush_tcache(int tcid) } } #if (DRC_DEBUG & 4) - tcache_dsm_ptrs[tcid] = tcache_bases[tcid]; + tcache_dsm_ptrs[tcid] = tcache_ring[tcid].base; #endif for (i = 0; i < RAM_SIZE(tcid) / INVAL_PAGE_SIZE; i++) @@ -3095,13 +3179,13 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) #endif } - tcache_ptr = dr_prepare_cache(tcache_id, (end_pc - base_pc) / 2); + tcache_ptr = dr_prepare_cache(tcache_id, (end_pc - base_pc) / 2, branch_target_count); #if (DRC_DEBUG & 4) tcache_dsm_ptrs[tcache_id] = tcache_ptr; #endif - block = dr_add_block(base_pc, end_pc - base_pc, base_literals, - end_literals - base_literals, crc, sh2->is_slave, &blkid_main); + block = dr_add_block(branch_target_count, base_pc, end_pc - base_pc, + base_literals, end_literals-base_literals, crc, sh2->is_slave, &blkid_main); if (block == NULL) return NULL; @@ -3143,7 +3227,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) // make block entry v = block->entry_count; entry = &block->entryp[v]; - if (v < ARRAY_SIZE(block->entryp)) + if (v < branch_target_count) { entry = &block->entryp[v]; entry->pc = pc; @@ -4726,7 +4810,7 @@ end_op: for (bl = block->entryp[i].o_links; bl; bl = bl->o_next) memcpy(bl->jdisp, bl->blx ?: bl->jump, emith_jump_at_size()); - tcache_ptrs[tcache_id] = tcache_ptr; + ring_alloc(&tcache_ring[tcache_id], tcache_ptr - block_entry_ptr); host_instructions_updated(block_entry_ptr, tcache_ptr); dr_activate_block(block, tcache_id, sh2->is_slave); @@ -4736,10 +4820,10 @@ end_op: dbg(2, " block #%d,%d -> %p tcache %d/%d, insns %d -> %d %.3f", tcache_id, blkid_main, tcache_ptr, - tcache_ptr - tcache_bases[tcache_id], tcache_sizes[tcache_id], + tcache_ring[tcache_id].used, tcache_ring[tcache_id].size, insns_compiled, host_insn_count, (float)host_insn_count / insns_compiled); if ((sh2->pc & 0xc6000000) == 0x02000000) { // ROM - dbg(2, " hash collisions %d/%d", hash_collisions, block_counts[tcache_id]); + dbg(2, " hash collisions %d/%d", hash_collisions, block_ring[tcache_id].used); Pico32x.emu_flags |= P32XF_DRC_ROM_C; } /* @@ -5220,10 +5304,7 @@ static void block_stats(void) printf("block stats:\n"); for (b = 0; b < ARRAY_SIZE(block_tables); b++) { - for (i = 0; i < block_counts[b]; i++) - if (block_tables[b][i].addr != 0) - total += block_tables[b][i].refcount; - for (i = block_limit[b]; i < BLOCK_MAX_COUNT(b); i++) + for (i = block_ring[b].first; i != block_ring[b].next; i = (i+1)%block_ring[b].size) if (block_tables[b][i].addr != 0) total += block_tables[b][i].refcount; } @@ -5233,20 +5314,11 @@ static void block_stats(void) struct block_desc *blk, *maxb = NULL; int max = 0; for (b = 0; b < ARRAY_SIZE(block_tables); b++) { - for (i = 0; i < block_counts[b]; i++) { - blk = &block_tables[b][i]; - if (blk->addr != 0 && blk->refcount > max) { + for (i = block_ring[b].first; i != block_ring[b].next; i = (i+1)%block_ring[b].size) + if ((blk = &block_tables[b][i])->addr != 0 && blk->refcount > max) { max = blk->refcount; maxb = blk; } - } - for (i = block_limit[b]; i < BLOCK_MAX_COUNT(b); i++) { - blk = &block_tables[b][i]; - if (blk->addr != 0 && blk->refcount > max) { - max = blk->refcount; - maxb = blk; - } - } } if (maxb == NULL) break; @@ -5255,12 +5327,9 @@ static void block_stats(void) maxb->refcount = 0; } - for (b = 0; b < ARRAY_SIZE(block_tables); b++) { - for (i = 0; i < block_counts[b]; i++) + for (b = 0; b < ARRAY_SIZE(block_tables); b++) + for (i = block_ring[b].first; i != block_ring[b].next; i = (i+1)%block_ring[b].size) block_tables[b][i].refcount = 0; - for (i = block_limit[b]; i < BLOCK_MAX_COUNT(b); i++) - block_tables[b][i].refcount = 0; - } #endif } @@ -5272,10 +5341,7 @@ void entry_stats(void) printf("block entry stats:\n"); for (b = 0; b < ARRAY_SIZE(block_tables); b++) { - for (i = 0; i < block_counts[b]; i++) - for (j = 0; j < block_tables[b][i].entry_count; j++) - total += block_tables[b][i].entryp[j].entry_count; - for (i = block_limit[b]; i < BLOCK_MAX_COUNT(b); i++) + for (i = block_ring[b].first; i != block_ring[b].next; i = (i+1)%block_ring[b].size) for (j = 0; j < block_tables[b][i].entry_count; j++) total += block_tables[b][i].entryp[j].entry_count; } @@ -5286,15 +5352,7 @@ void entry_stats(void) struct block_entry *maxb = NULL; int max = 0; for (b = 0; b < ARRAY_SIZE(block_tables); b++) { - for (i = 0; i < block_counts[b]; i++) { - blk = &block_tables[b][i]; - for (j = 0; j < blk->entry_count; j++) - if (blk->entryp[j].entry_count > max) { - max = blk->entryp[j].entry_count; - maxb = &blk->entryp[j]; - } - } - for (i = block_limit[b]; i < BLOCK_MAX_COUNT(b); i++) { + for (i = block_ring[b].first; i != block_ring[b].next; i = (i+1)%block_ring[b].size) { blk = &block_tables[b][i]; for (j = 0; j < blk->entry_count; j++) if (blk->entryp[j].entry_count > max) { @@ -5311,10 +5369,7 @@ void entry_stats(void) } for (b = 0; b < ARRAY_SIZE(block_tables); b++) { - for (i = 0; i < block_counts[b]; i++) - for (j = 0; j < block_tables[b][i].entry_count; j++) - block_tables[b][i].entryp[j].entry_count = 0; - for (i = block_limit[b]; i < BLOCK_MAX_COUNT(b); i++) + for (i = block_ring[b].first; i != block_ring[b].next; i = (i+1)%block_ring[b].size) for (j = 0; j < block_tables[b][i].entry_count; j++) block_tables[b][i].entryp[j].entry_count = 0; } @@ -5432,6 +5487,9 @@ int sh2_drc_init(SH2 *sh2) block_tables[i] = calloc(BLOCK_MAX_COUNT(i), sizeof(*block_tables[0])); if (block_tables[i] == NULL) goto fail; + entry_tables[i] = calloc(ENTRY_MAX_COUNT(i), sizeof(*entry_tables[0])); + if (entry_tables[i] == NULL) + goto fail; block_link_pool[i] = calloc(BLOCK_LINK_MAX_COUNT(i), sizeof(*block_link_pool[0])); if (block_link_pool[i] == NULL) @@ -5449,33 +5507,39 @@ int sh2_drc_init(SH2 *sh2) unresolved_links[i] = calloc(HASH_TABLE_SIZE(i), sizeof(*unresolved_links[0])); if (unresolved_links[i] == NULL) goto fail; +//atexit(sh2_drc_finish); + + RING_INIT(&block_ring[i], block_tables[i], BLOCK_MAX_COUNT(i)); + RING_INIT(&entry_ring[i], entry_tables[i], ENTRY_MAX_COUNT(i)); } - memset(block_counts, 0, sizeof(block_counts)); - for (i = 0; i < ARRAY_SIZE(block_counts); i++) { - block_limit[i] = BLOCK_MAX_COUNT(i) - 1; - } + + block_list_pool = calloc(BLOCK_LIST_MAX_COUNT, sizeof(*block_list_pool)); + if (block_list_pool == NULL) + goto fail; + block_list_pool_count = 0; + blist_free = NULL; + memset(block_link_pool_counts, 0, sizeof(block_link_pool_counts)); - for (i = 0; i < ARRAY_SIZE(blink_free); i++) { - blink_free[i] = NULL; - } + memset(blink_free, 0, sizeof(blink_free)); drc_cmn_init(); rcache_init(); + tcache_ptr = tcache; sh2_generate_utils(); host_instructions_updated(tcache, tcache_ptr); emith_update_cache(); - tcache_bases[0] = tcache_ptrs[0] = tcache_ptr; - tcache_limit[0] = tcache_bases[0] + tcache_sizes[0] - (tcache_ptr-tcache); - for (i = 1; i < ARRAY_SIZE(tcache_bases); i++) { - tcache_bases[i] = tcache_ptrs[i] = tcache_bases[i - 1] + tcache_sizes[i - 1]; - tcache_limit[i] = tcache_bases[i] + tcache_sizes[i]; + i = tcache_ptr - tcache; + RING_INIT(&tcache_ring[0], tcache_ptr, tcache_sizes[0] - i); + for (i = 1; i < ARRAY_SIZE(tcache_ring); i++) { + RING_INIT(&tcache_ring[i], tcache_ring[i-1].base + tcache_sizes[i-1], + tcache_sizes[i]); } #if (DRC_DEBUG & 4) for (i = 0; i < ARRAY_SIZE(block_tables); i++) - tcache_dsm_ptrs[i] = tcache_bases[i]; + tcache_dsm_ptrs[i] = tcache_ring[i].base; // disasm the utils tcache_dsm_ptrs[0] = tcache; do_host_disasm(0); @@ -5498,7 +5562,6 @@ fail: void sh2_drc_finish(SH2 *sh2) { - struct block_list *bl, *bn; int i; if (block_tables[0] == NULL) @@ -5514,17 +5577,22 @@ void sh2_drc_finish(SH2 *sh2) for (i = 0; i < TCACHE_BUFFERS; i++) { printf("~~~ tcache %d\n", i); #if 0 - tcache_dsm_ptrs[i] = tcache_bases[i]; - tcache_ptr = tcache_ptrs[i]; - do_host_disasm(i); - if (tcache_limit[i] < tcache_bases[i] + tcache_sizes[i]) { - tcache_dsm_ptrs[i] = tcache_limit[i]; - tcache_ptr = tcache_bases[i] + tcache_sizes[i]; + if (tcache_ring[i].first < tcache_ring[i].next) { + tcache_dsm_ptrs[i] = tcache_ring[i].first; + tcache_ptr = tcache_ring[i].next; + do_host_disasm(i); + } else if (tcache_ring[i].used) { + tcache_dsm_ptrs[i] = tcache_ring[i].first; + tcache_ptr = tcache_ring[i].base + tcache_ring[i].size; + do_host_disasm(i); + tcache_dsm_ptrs[i] = tcache_ring[i].base; + tcache_ptr = tcache_ring[i].next; do_host_disasm(i); } #endif printf("max links: %d\n", block_link_pool_counts[i]); } + printf("max block list: %d\n", block_list_pool_count); #endif sh2_drc_flush_all(); @@ -5533,6 +5601,9 @@ void sh2_drc_finish(SH2 *sh2) if (block_tables[i] != NULL) free(block_tables[i]); block_tables[i] = NULL; + if (entry_tables[i] != NULL) + free(entry_tables[i]); + entry_tables[i] = NULL; if (block_link_pool[i] != NULL) free(block_link_pool[i]); block_link_pool[i] = NULL; @@ -5548,10 +5619,9 @@ void sh2_drc_finish(SH2 *sh2) } } - for (bl = blist_free; bl; bl = bn) { - bn = bl->next; - free(bl); - } + if (block_list_pool != NULL) + free(block_list_pool); + block_list_pool = NULL; blist_free = NULL; drc_cmn_cleanup(); From f2d19ddf2a4f2d8f3950d3d5dd90fdcd74cc7a82 Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 19 Nov 2019 21:59:44 +0100 Subject: [PATCH 0240/1110] sh2 drc, small improvements and bug fixes for code emitters --- cpu/drc/emit_arm.c | 20 ++++--- cpu/drc/emit_arm64.c | 14 +++-- cpu/drc/emit_mips.c | 32 +++++++---- cpu/drc/emit_riscv.c | 129 ++++++++++++++++++++++++++++++++++++------- cpu/drc/emit_x86.c | 11 +++- cpu/sh2/compiler.c | 71 ++++++++++++------------ 6 files changed, 194 insertions(+), 83 deletions(-) diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index 8f633fa3..8ea148eb 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -478,6 +478,7 @@ static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int switch (op) { case A_OP_MOV: + case A_OP_MVN: rn = 0; // use MVN if more bits 1 than 0 if (count_bits(imm) > 16) { @@ -501,7 +502,7 @@ static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int return; } #else - for (i = 2, u = v; i > 0; i--, u >>= 8) + for (i = 3, u = v; i > 0; i--, u >>= 8) while (u > 0xff && !(u & 3)) u >>= 2; if (u) { // 4 insns needed... @@ -1387,22 +1388,25 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) } while (0) /* + * T = carry(Rn = (Rn << 1) | T) * if Q - * t = carry(Rn += Rm) + * T ^= !carry(Rn += Rm) * else - * t = carry(Rn -= Rm) - * T ^= t + * T ^= !carry(Rn -= Rm) */ #define emith_sh2_div1_step(rn, rm, sr) do { \ void *jmp0, *jmp1; \ + emith_tpop_carry(sr, 0); /* Rn = 2*Rn+T */\ + emith_adcf_r_r_r(rn, rn, rn); \ + emith_tpush_carry(sr, 0); \ emith_tst_r_imm(sr, Q); /* if (Q ^ M) */ \ JMP_POS(jmp0); /* beq do_sub */ \ - emith_addf_r_r(rn, rm); \ - emith_eor_r_imm_c(A_COND_CS, sr, T); \ + emith_addf_r_r(rn, rm); /* Rn += Rm */ \ + emith_eor_r_imm_c(A_COND_CC, sr, T); \ JMP_POS(jmp1); /* b done */ \ JMP_EMIT(A_COND_EQ, jmp0); /* do_sub: */ \ - emith_subf_r_r(rn, rm); \ - emith_eor_r_imm_c(A_COND_CC, sr, T); \ + emith_subf_r_r(rn, rm); /* Rn -= Rm */ \ + emith_eor_r_imm_c(A_COND_CS, sr, T); \ JMP_EMIT(A_COND_AL, jmp1); /* done: */ \ } while (0) diff --git a/cpu/drc/emit_arm64.c b/cpu/drc/emit_arm64.c index 3f40d4cd..8f4718ee 100644 --- a/cpu/drc/emit_arm64.c +++ b/cpu/drc/emit_arm64.c @@ -372,7 +372,7 @@ enum { AM_IDX, AM_IDXPOST, AM_IDXREG, AM_IDXPRE }; #define EMITH_HINT_COND(cond) /**/ -// "simple" jump (no more then a few insns) +// "simple" jump (no more than a few insns) // ARM32 will use conditional instructions here #define EMITH_SJMP_START EMITH_JMP_START #define EMITH_SJMP_END EMITH_JMP_END @@ -1240,22 +1240,26 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) } while (0) /* + * T = carry(Rn = (Rn << 1) | T) * if Q - * t = carry(Rn += Rm) + * t = !carry(Rn += Rm) * else - * t = carry(Rn -= Rm) + * t = !carry(Rn -= Rm) * T ^= t */ #define emith_sh2_div1_step(rn, rm, sr) do { \ int tmp_ = rcache_get_tmp(); \ - emith_tst_r_imm(sr, Q); /* if (Q ^ M) */ \ + emith_tpop_carry(sr, 0); \ + emith_adcf_r_r_r(rn, rn, rn); \ + emith_tpush_carry(sr, 0); \ + emith_tst_r_imm(sr, Q); \ EMITH_SJMP3_START(DCOND_EQ); \ emith_addf_r_r(rn, rm); \ emith_adc_r_r_r(tmp_, Z0, Z0); \ + emith_eor_r_imm(tmp_, 1); \ EMITH_SJMP3_MID(DCOND_EQ); \ emith_subf_r_r(rn, rm); \ emith_adc_r_r_r(tmp_, Z0, Z0); \ - emith_eor_r_imm(tmp_, 1); \ EMITH_SJMP3_END(); \ emith_eor_r_r(sr, tmp_); \ rcache_free_tmp(tmp_); \ diff --git a/cpu/drc/emit_mips.c b/cpu/drc/emit_mips.c index 6f07e509..c9c006c8 100644 --- a/cpu/drc/emit_mips.c +++ b/cpu/drc/emit_mips.c @@ -7,9 +7,10 @@ */ #define HOST_REGS 32 -// MIPS ABI: params: r4-r7, return: r2-r3, temp: r1(at),r8-r15,r24-r25,r31(ra), +// MIPS32 ABI: params: r4-r7, return: r2-r3, temp: r1(at),r8-r15,r24-r25,r31(ra) // saved: r16-r23,r30, reserved: r0(zero), r26-r27(irq), r28(gp), r29(sp) // r1,r15,r24,r25(at,t7-t9) are used internally by the code emitter +// MIPSN32/MIPS64 ABI: params: r4-r11, no caller-reserved save area on stack #define RET_REG 2 // v0 #define PARAM_REGS { 4, 5, 6, 7 } // a0-a3 #define PRESERVED_REGS { 16, 17, 18, 19, 20, 21, 22, 23 } // s0-s7 @@ -424,7 +425,7 @@ static void *emith_branch(u32 op) JMP_EMIT_NC(else_ptr); \ } -// "simple" jump (no more then a few insns) +// "simple" jump (no more than a few insns) // ARM32 will use conditional instructions here #define EMITH_SJMP_START EMITH_JMP_START #define EMITH_SJMP_END EMITH_JMP_END @@ -761,7 +762,7 @@ static void emith_move_imm(int r, uintptr_t imm) EMIT(MIPS_OR_IMM(r, r, imm & 0xffff)); } else #endif - if ((s16)imm == imm) { + if ((s16)imm == imm) { EMIT(MIPS_ADD_IMM(r, Z0, imm)); } else if (!((u32)imm >> 16)) { EMIT(MIPS_OR_IMM(r, Z0, imm)); @@ -1576,22 +1577,31 @@ static int emith_cond_check(int cond, int *r) } while (0) /* + * T = !carry(Rn = (Rn << 1) | T) * if Q - * t = carry(Rn += Rm) + * C = carry(Rn += Rm) * else - * t = carry(Rn -= Rm) - * T ^= t + * C = carry(Rn -= Rm) + * T ^= C */ #define emith_sh2_div1_step(rn, rm, sr) do { \ + int t_ = rcache_get_tmp(); \ + emith_and_r_r_imm(AT, sr, T); \ + emith_lsr(FC, rn, 31); /*Rn = (Rn<<1)+T*/ \ + emith_lsl(t_, rn, 1); \ + emith_or_r_r(t_, AT); \ + emith_or_r_imm(sr, T); /* T = !carry */ \ + emith_eor_r_r(sr, FC); \ emith_tst_r_imm(sr, Q); /* if (Q ^ M) */ \ EMITH_JMP3_START(DCOND_EQ); \ - EMITH_HINT_COND(DCOND_CS); \ - emith_addf_r_r(rn, rm); \ + emith_add_r_r_r(rn, t_, rm); \ + EMIT(MIPS_SLTU_REG(FC, rn, t_)); \ EMITH_JMP3_MID(DCOND_EQ); \ - EMITH_HINT_COND(DCOND_CS); \ - emith_subf_r_r(rn, rm); \ + emith_sub_r_r_r(rn, t_, rm); \ + EMIT(MIPS_SLTU_REG(FC, t_, rn)); \ EMITH_JMP3_END(); \ - emith_eor_r_r(sr, FC); \ + emith_eor_r_r(sr, FC); /* T ^= carry */ \ + rcache_free_tmp(t_); \ } while (0) /* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ diff --git a/cpu/drc/emit_riscv.c b/cpu/drc/emit_riscv.c index 84c3ccb2..b66d6350 100644 --- a/cpu/drc/emit_riscv.c +++ b/cpu/drc/emit_riscv.c @@ -7,7 +7,7 @@ */ #define HOST_REGS 32 -// RISC-V ABI: params: x10-x17, return: r10-x11, temp: x1(ra),x5-x7,x28-x31 +// RISC-V ABI: params: x10-x17, return: x10-x11, temp: x1(ra),x5-x7,x28-x31 // saved: x8(fp),x9,x18-x27, reserved: x0(zero), x4(tp), x3(gp), x2(sp) // x28-x31(t3-t6) are used internally by the code emitter #define RET_REG 10 // a0 @@ -74,13 +74,14 @@ _CB(imm,8,12,0), rd, op) // opcode -enum { OP_LUI=0x37, OP_JAL=0x6f, OP_JALR=0x67, OP_BCOND=0x63, OP_LD=0x03, - OP_ST=0x23, OP_IMM=0x13, OP_IMM32=0x1b, OP_REG=0x33, OP_REG32=0x3b }; +enum { OP_LUI=0x37, OP_AUIPC=0x17, OP_JAL=0x6f, // 20-bit immediate + OP_JALR=0x67, OP_BCOND=0x63, OP_LD=0x03, OP_ST=0x23, // 12-bit immediate + OP_IMM=0x13, OP_REG=0x33, OP_IMM32=0x1b, OP_REG32=0x3b }; // func3 -enum { F1_ADD, F1_SL, F1_SLT, F1_SLTU, F1_XOR, F1_SR, F1_OR, F1_AND }; -enum { F1_BEQ, F1_BNE, F1_BLT=4, F1_BGE, F1_BLTU, F1_BGEU }; -enum { F1_B, F1_H, F1_W, F1_D, F1_BU, F1_HU, F1_WU }; +enum { F1_ADD, F1_SL, F1_SLT, F1_SLTU, F1_XOR, F1_SR, F1_OR, F1_AND };// IMM/REG enum { F1_MUL, F1_MULH, F1_MULHSU, F1_MULHU, F1_DIV, F1_DIVU, F1_REM, F1_REMU }; +enum { F1_BEQ, F1_BNE, F1_BLT=4, F1_BGE, F1_BLTU, F1_BGEU }; // BCOND +enum { F1_B, F1_H, F1_W, F1_D, F1_BU, F1_HU, F1_WU }; // LD/ST // func7 enum { F2_ALT=0x20, F2_MULDIV=0x01 }; @@ -141,6 +142,8 @@ enum { F2_ALT=0x20, F2_MULDIV=0x01 }; R5_OR_IMM(rd, Z0, imm12) #define R5_MOVT_IMM(rd, imm20) \ R5_U_INSN(OP_LUI, rd, imm20) +#define R5_MOVA_IMM(rd, imm20) \ + R5_U_INSN(OP_AUIPC, rd, imm20) // rd = rs SHIFT imm5/imm6 #define R5_LSL_IMM(rd, rs, bits) \ @@ -212,8 +215,10 @@ enum { F2_ALT=0x20, F2_MULDIV=0x01 }; #define PTR_SCALE 3 // NB: must split 64 bit result into 2 32 bit registers -// NB: this expects 32 bit values in s1+s2, correctly sign extended to 64 bits +// NB: expects 32 bit values in s1+s2, correctly sign extended to 64 bits #define EMIT_R5_MULLU_REG(dlo, dhi, s1, s2) do { \ + /*EMIT(R5_ADDW_IMM(s1, s1, 0));*/ \ + /*EMIT(R5_ADDW_IMM(s2, s2, 0));*/ \ EMIT(R5_MUL(dlo, s1, s2)); \ EMIT(R5_LSR_IMM(dhi, dlo, 32)); \ EMIT(R5_LSL_IMM(dlo, dlo, 32)); \ @@ -307,7 +312,7 @@ enum { F2_ALT=0x20, F2_MULDIV=0x01 }; JMP_EMIT_NC(else_ptr); \ } -// "simple" jump (no more then a few insns) +// "simple" jump (no more than a few insns) // ARM32 will use conditional instructions here #define EMITH_SJMP_START EMITH_JMP_START #define EMITH_SJMP_END EMITH_JMP_END @@ -620,6 +625,67 @@ static void emith_set_compare_flags(int rs, int rt, s32 imm) // move immediate +#define MAX_HOST_LITERALS 32 // pool must be smaller than 4 KB +static uintptr_t literal_pool[MAX_HOST_LITERALS]; +static u32 *literal_insn[MAX_HOST_LITERALS]; +static int literal_pindex, literal_iindex; + +static inline int emith_pool_literal(uintptr_t imm) +{ + int idx = literal_pindex - 8; // max look behind in pool + // see if one of the last literals was the same (or close enough) + for (idx = (idx < 0 ? 0 : idx); idx < literal_pindex; idx++) + if (imm == literal_pool[idx]) + break; + if (idx == literal_pindex) // store new literal + literal_pool[literal_pindex++] = imm; + return idx; +} + +static void emith_pool_commit(int jumpover) +{ + int i, sz = literal_pindex * sizeof(uintptr_t); + u8 *pool = (u8 *)tcache_ptr; + + // nothing to commit if pool is empty + if (sz == 0) + return; + // align pool to pointer size + if (jumpover) + pool += sizeof(u32); + i = (uintptr_t)pool & (sizeof(void *)-1); + pool += (i ? sizeof(void *)-i : 0); + // need branch over pool if not at block end + if (jumpover) + EMIT(R5_B(sz + (pool-(u8 *)tcache_ptr))); + // safety check - pool must be after insns and reachable + if ((u32)(pool - (u8 *)literal_insn[0] + 8) > 0x7ff) { + elprintf(EL_STATUS|EL_SVP|EL_ANOMALY, + "pool offset out of range"); + exit(1); + } + // copy pool and adjust addresses in insns accessing the pool + memcpy(pool, literal_pool, sz); + for (i = 0; i < literal_iindex; i++) { + *literal_insn[i] += ((u8 *)pool - (u8 *)literal_insn[i]) << 20; + } + // count pool constants as insns for statistics + for (i = 0; i < literal_pindex * sizeof(uintptr_t)/sizeof(u32); i++) + COUNT_OP; + + tcache_ptr = (void *)((u8 *)pool + sz); + literal_pindex = literal_iindex = 0; +} + +static void emith_pool_check(void) +{ + // check if pool must be committed + if (literal_iindex > MAX_HOST_LITERALS-4 || (literal_pindex && + (u8 *)tcache_ptr - (u8 *)literal_insn[0] > 0x700)) + // pool full, or displacement is approaching the limit + emith_pool_commit(1); +} + static void emith_move_imm(int r, uintptr_t imm) { u32 lui = imm + _CB(imm,1,11,12); @@ -632,8 +698,24 @@ static void emith_move_imm(int r, uintptr_t imm) EMIT(R5_ADD_IMM(r, Z0, imm)); } +static void emith_move_ptr_imm(int r, uintptr_t imm) +{ +#if __riscv_xlen == 64 + if ((s32)imm != imm) { + int idx; + if (literal_iindex >= MAX_HOST_LITERALS) + emith_pool_commit(1); + idx = emith_pool_literal(imm); + EMIT(R5_MOVA_IMM(AT, 0)); // loads PC of MOVA insn... + 4 in LD + literal_insn[literal_iindex++] = (u32 *)tcache_ptr; + EMIT(R5_I_INSN(OP_LD, F1_P, r, AT, idx*sizeof(uintptr_t) + 4)); + } else +#endif + emith_move_imm(r, imm); +} + #define emith_move_r_ptr_imm(r, imm) \ - emith_move_imm(r, (uintptr_t)(imm)) + emith_move_ptr_imm(r, (uintptr_t)(imm)) #define emith_move_r_imm(r, imm) \ emith_move_imm(r, (u32)(imm)) @@ -644,7 +726,6 @@ static void emith_move_imm(int r, uintptr_t imm) EMIT(R5_ADD_IMM(r, Z0, (s8)(imm))) #define emith_move_r_imm_s8_patch(ptr, imm) do { \ u32 *ptr_ = (u32 *)ptr; \ - while ((*ptr_ & 0xff07f) != R5_ADD_IMM(Z0, Z0, 0)) ptr_++; \ EMIT_PTR(ptr_, (*ptr_ & 0x000fffff) | ((u16)(s8)(imm)<<20)); \ } while (0) @@ -1235,7 +1316,6 @@ static int emith_cond_check(int cond, int *r, int *s) // NB: returns position of patch for cache maintenance #define emith_jump_patch(ptr, target, pos) do { \ u32 *ptr_ = (u32 *)ptr; /* must skip condition check code */ \ - while ((*ptr_&0x77) != OP_JALR && (*ptr_&0x77) != OP_BCOND) ptr_ ++; \ if ((*ptr_&0x77) == OP_BCOND) { \ u32 *p_ = ptr_, disp_ = (u8 *)target - (u8 *)ptr_; \ u32 f1_ = _CB(*ptr_,3,12,0); \ @@ -1319,8 +1399,6 @@ static int emith_cond_check(int cond, int *r, int *s) // emitter ABI stuff -#define emith_pool_check() /**/ -#define emith_pool_commit(j) /**/ #define emith_insn_ptr() ((u8 *)tcache_ptr) #define emith_flush() /**/ #define host_instructions_updated(base, end) __builtin___clear_cache(base, end) @@ -1404,22 +1482,31 @@ static int emith_cond_check(int cond, int *r, int *s) } while (0) /* + * T = !carry(Rn = (Rn << 1) | T) * if Q - * t = carry(Rn += Rm) + * C = carry(Rn += Rm) * else - * t = carry(Rn -= Rm) - * T ^= t + * C = carry(Rn -= Rm) + * T ^= C */ #define emith_sh2_div1_step(rn, rm, sr) do { \ + int t_ = rcache_get_tmp(); \ + emith_and_r_r_imm(AT, sr, T); \ + emith_lsr(FC, rn, 31); /*Rn = (Rn<<1)+T*/ \ + emith_lsl(t_, rn, 1); \ + emith_or_r_r(t_, AT); \ + emith_or_r_imm(sr, T); /* T = !carry */ \ + emith_eor_r_r(sr, FC); \ emith_tst_r_imm(sr, Q); /* if (Q ^ M) */ \ EMITH_JMP3_START(DCOND_EQ); \ - EMITH_HINT_COND(DCOND_CS); \ - emith_addf_r_r(rn, rm); \ + emith_add_r_r_r(rn, t_, rm); \ + EMIT(R5_SLTU_REG(FC, rn, t_)); \ EMITH_JMP3_MID(DCOND_EQ); \ - EMITH_HINT_COND(DCOND_CS); \ - emith_subf_r_r(rn, rm); \ + emith_sub_r_r_r(rn, t_, rm); \ + EMIT(R5_SLTU_REG(FC, t_, rn)); \ EMITH_JMP3_END(); \ - emith_eor_r_r(sr, FC); \ + emith_eor_r_r(sr, FC); /* T ^= carry */ \ + rcache_free_tmp(t_); \ } while (0) /* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index 9ed8b563..0b3f7697 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -974,7 +974,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common JMP8_EMIT_NC(else_ptr); \ } -// "simple" jump (no more then a few insns) +// "simple" jump (no more than a few insns) // ARM will use conditional instructions here #define EMITH_SJMP_START EMITH_JMP_START #define EMITH_SJMP_END EMITH_JMP_END @@ -1287,15 +1287,19 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common emith_adc_r_r(sr, sr) /* + * T = carry(Rn = (Rn << 1) | T) * if Q * t = carry(Rn += Rm) * else * t = carry(Rn -= Rm) - * T ^= t + * T = !(T ^ t) */ #define emith_sh2_div1_step(rn, rm, sr) do { \ u8 *jmp0, *jmp1; \ int tmp_ = rcache_get_tmp(); \ + emith_tpop_carry(sr, 0); /* Rn = 2*Rn+T */\ + emith_adcf_r_r_r(rn, rn, rn); \ + emith_tpush_carry(sr, 0); /* T = C1 */ \ emith_eor_r_r(tmp_, tmp_); \ emith_tst_r_imm(sr, Q); /* if (Q ^ M) */ \ JMP8_POS(jmp0); /* je do_sub */ \ @@ -1305,7 +1309,8 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common emith_sub_r_r(rn, rm); \ JMP8_EMIT_NC(jmp1); /* done: */ \ emith_adc_r_r(tmp_, tmp_); \ - emith_eor_r_r(sr, tmp_); \ + emith_eor_r_r(sr, tmp_);/* T = !(C1^C2) */\ + emith_eor_r_imm(sr, T); \ rcache_free_tmp(tmp_); \ } while (0) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 58ddd86f..a12dfe96 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -2957,20 +2957,18 @@ static void emit_branch_linkage_code(SH2 *sh2, struct block_desc *block, int tca struct block_link *bl; int u, v, tmp; + emith_flush(); for (u = 0; u < link_count; u++) { emith_pool_check(); // look up local branch targets - v = find_in_sorted_linkage(targets, target_count, links[u].pc); - if (v >= 0) { - if (! targets[v].ptr) { + if (links[u].mask & 0x2) { + v = find_in_sorted_linkage(targets, target_count, links[u].pc); + if (v < 0 || ! targets[v].ptr) { // forward branch not yet resolved, prepare external linking emith_jump_patch(links[u].ptr, tcache_ptr, NULL); bl = dr_prepare_ext_branch(block->entryp, links[u].pc, sh2->is_slave, tcache_id); - if (bl) { - emith_flush(); // flush to inhibit insn swapping + if (bl) bl->type = BL_LDJMP; - } - tmp = rcache_get_tmp_arg(0); emith_move_r_imm(tmp, links[u].pc); rcache_free_tmp(tmp); @@ -2985,7 +2983,7 @@ static void emit_branch_linkage_code(SH2 *sh2, struct block_desc *block, int tca } } else { // external or exit, emit blx area entry - void *target = (links[u].pc & 1 ? sh2_drc_exit : sh2_drc_dispatcher); + void *target = (links[u].mask & 0x1 ? sh2_drc_exit : sh2_drc_dispatcher); if (links[u].bl) links[u].bl->blx = tcache_ptr; emith_jump_patch(links[u].ptr, tcache_ptr, NULL); @@ -3024,6 +3022,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) u8 op_flags[BLOCK_INSN_LIMIT]; + enum flg_states { FLG_UNKNOWN, FLG_UNUSED, FLG_0, FLG_1 }; struct drcf { int delay_reg:8; u32 loop_type:8; @@ -3032,6 +3031,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) u32 test_irq:1; u32 pending_branch_direct:1; u32 pending_branch_indirect:1; + u32 Tflag:2, Mflag:2; } drcf = { 0, }; #if LOOP_OPTIMIZER @@ -3169,7 +3169,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) if (m3 && count_bits(m3) < count_bits(rcache_vregs_reg) && pinned_loop_count < ARRAY_SIZE(pinned_loops)-1) { pinned_loops[pinned_loop_count++] = - (struct linkage) { .mask = m3, .pc = base_pc + 2*v }; + (struct linkage) { .pc = base_pc + 2*v, .mask = m3 }; } else op_flags[v] &= ~OF_BASIC_LOOP; } @@ -3220,6 +3220,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); FLUSH_CYCLES(sr); emith_sync_t(sr); + drcf.Mflag = FLG_UNKNOWN; rcache_flush(); emith_flush(); } @@ -3302,7 +3303,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) if (blx_target_count < ARRAY_SIZE(blx_targets)) { // exit via stub in blx table (saves some 1-3 insns in the main flow) blx_targets[blx_target_count++] = - (struct linkage) { .ptr = tcache_ptr, .pc = pc|1, .bl = NULL }; + (struct linkage) { .pc = pc, .ptr = tcache_ptr, .mask = 0x1 }; emith_jump_patchable(tcache_ptr); } else { // blx table full, must inline exit code @@ -3319,7 +3320,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) // exit via stub in blx table (saves some 1-3 insns in the main flow) emith_cmp_r_imm(sr, 0); blx_targets[blx_target_count++] = - (struct linkage) { .ptr = tcache_ptr, .pc = pc|1, .bl = NULL }; + (struct linkage) { .pc = pc, .ptr = tcache_ptr, .mask = 0x1 }; emith_jump_cond_patchable(DCOND_LE, tcache_ptr); } else { // blx table full, must inline exit code @@ -3704,6 +3705,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_invalidate_t(); emith_bic_r_imm(sr, M|Q|T); + drcf.Mflag = FLG_0; break; case 2: // MOVT Rn 0000nnnn00101001 sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); @@ -3781,6 +3783,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_eor_r_r_lsr(tmp, tmp2, 31); emith_or_r_r(sr, tmp); // T = Q^M rcache_free(tmp); + drcf.Mflag = FLG_UNKNOWN; goto end_op; case 0x08: // TST Rm,Rn 0010nnnnmmmm1000 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); @@ -3846,17 +3849,16 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); tmp = rcache_get_reg(SHR_MACL, RC_GR_WRITE, NULL); + tmp4 = rcache_get_tmp(); if (op & 1) { emith_sext(tmp, tmp2, 16); - } else + emith_sext(tmp4, tmp3, 16); + } else { emith_clear_msb(tmp, tmp2, 16); - tmp2 = rcache_get_tmp(); - if (op & 1) { - emith_sext(tmp2, tmp3, 16); - } else - emith_clear_msb(tmp2, tmp3, 16); - emith_mul(tmp, tmp, tmp2); - rcache_free_tmp(tmp2); + emith_clear_msb(tmp4, tmp3, 16); + } + emith_mul(tmp, tmp, tmp4); + rcache_free_tmp(tmp4); goto end_op; } goto default_; @@ -3904,28 +3906,27 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) // Q = M ^ Q1 ^ Q2 // T = (Q == M) = !(Q ^ M) = !(Q1 ^ Q2) tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); - tmp2 = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp4); + tmp2 = rcache_get_reg(GET_Rn(), RC_GR_RMW, NULL); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_sync_t(sr); - EMITH_HINT_COND(DCOND_CS); - emith_tpop_carry(sr, 0); - emith_adcf_r_r_r(tmp2, tmp4, tmp4); - emith_tpush_carry(sr, 0); // keep Q1 in T for now - rcache_free(tmp4); tmp = rcache_get_tmp(); - emith_and_r_r_imm(tmp, sr, M); - emith_eor_r_r_lsr(sr, tmp, M_SHIFT - Q_SHIFT); // Q ^= M + if (drcf.Mflag != FLG_0) { + emith_and_r_r_imm(tmp, sr, M); + emith_eor_r_r_lsr(sr, tmp, M_SHIFT - Q_SHIFT); // Q ^= M + } rcache_free_tmp(tmp); - // add or sub, invert T if carry to get Q1 ^ Q2 - // in: (Q ^ M) passed in Q, Q1 in T + // shift Rn, add T, add or sub Rm, set T = !(Q1 ^ Q2) + // in: (Q ^ M) passed in Q emith_sh2_div1_step(tmp2, tmp3, sr); tmp = rcache_get_tmp(); - emith_bic_r_imm(sr, Q); // Q = M - emith_and_r_r_imm(tmp, sr, M); - emith_or_r_r_lsr(sr, tmp, M_SHIFT - Q_SHIFT); - emith_and_r_r_imm(tmp, sr, T); // Q = M ^ Q1 ^ Q2 + emith_or_r_imm(sr, Q); // Q = !T + emith_and_r_r_imm(tmp, sr, T); emith_eor_r_r_lsl(sr, tmp, Q_SHIFT); - emith_eor_r_imm(sr, T); // T = !(Q1 ^ Q2) + if (drcf.Mflag != FLG_0) { // Q = M ^ !T = M ^ Q1 ^ Q2 + emith_and_r_r_imm(tmp, sr, M); + emith_eor_r_r_lsr(sr, tmp, M_SHIFT - Q_SHIFT); + } + rcache_free_tmp(tmp); goto end_op; case 0x05: // DMULU.L Rm,Rn 0011nnnnmmmm0101 tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); @@ -4627,7 +4628,7 @@ end_op: // local forward jump target = tcache_ptr; blx_targets[blx_target_count++] = - (struct linkage) { .pc = target_pc, .ptr = target, .bl = NULL }; + (struct linkage) { .pc = target_pc, .ptr = target, .mask = 0x2 }; if (cond != -1) emith_jump_cond_patchable(cond, target); else { From f1da0a362f536c8df30a739541e3665024b40544 Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 20 Nov 2019 01:01:33 +0100 Subject: [PATCH 0241/1110] sh2 drc: fixed some RISC-V bugs --- README.md | 8 ++++---- cpu/drc/emit_riscv.c | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 8154f7dc..67f60c2c 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,14 @@ This is my foray into dynamic recompilation using PicoDrive, a Megadrive / Genesis / Sega CD / Mega CD / 32X / SMS emulator. -I added support for MIPS (mips32r1) and ARM64 (aarch64) to the recompiler, as -well as spent much effort to optimize the code generated by the DRC. +I added support for MIPS (mips32r1), ARM64 (aarch64) and RISC-V (RV64IM) to the +SH2 recompiler, as well as spent much effort to optimize the DRC-generated code. I also optimized SH2 memory access inside the emulator, and did some work on M68K/SH2 CPU synchronization to fix some problems and speed up the emulator. -It got a bit out of hand. I ended up doing fixes and optimzations all over the +It got a bit out of hand. I ended up doing fixes and optimizations all over the place, mainly for 32X and CD, 32X graphics handling, and probably some more, -see the commit history. +see the commit history. As a result, 32X emulation speed has improved a lot. ### compiling diff --git a/cpu/drc/emit_riscv.c b/cpu/drc/emit_riscv.c index b66d6350..fe4da035 100644 --- a/cpu/drc/emit_riscv.c +++ b/cpu/drc/emit_riscv.c @@ -220,9 +220,9 @@ enum { F2_ALT=0x20, F2_MULDIV=0x01 }; /*EMIT(R5_ADDW_IMM(s1, s1, 0));*/ \ /*EMIT(R5_ADDW_IMM(s2, s2, 0));*/ \ EMIT(R5_MUL(dlo, s1, s2)); \ - EMIT(R5_LSR_IMM(dhi, dlo, 32)); \ + EMIT(R5_ASR_IMM(dhi, dlo, 32)); \ EMIT(R5_LSL_IMM(dlo, dlo, 32)); \ - EMIT(R5_LSR_IMM(dlo, dlo, 32)); \ + EMIT(R5_ASR_IMM(dlo, dlo, 32)); \ } while (0) #define EMIT_R5_MULLS_REG(dlo, dhi, s1, s2) \ @@ -1322,7 +1322,7 @@ static int emith_cond_check(int cond, int *r, int *s) u32 r_ = _CB(*ptr_,5,15,0), s_ = _CB(*ptr_,5,20,0); \ EMIT_PTR(p_, R5_BCOND(f1_, r_, s_, disp_ & 0x00001fff)); \ } else { \ - u32 *p_ = -- ptr_; \ + u32 *p_ = ptr_; \ uintptr_t target_ = (uintptr_t)(target); \ EMIT_PTR(p_, R5_MOVT_IMM(AT, target_ + _CB(target_,1,11,12))); \ EMIT_PTR(p_, R5_JR(AT, target_)); \ From 57d863cb876af1b19a9aaa83b72288fae3f40dcf Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 27 Nov 2019 21:02:53 +0100 Subject: [PATCH 0242/1110] sh2 drc: bug fixing --- cpu/drc/emit_arm64.c | 2 +- cpu/drc/emit_mips.c | 10 ++++++---- cpu/drc/emit_riscv.c | 9 ++++----- cpu/sh2/compiler.c | 28 +++++++++++++++++----------- 4 files changed, 28 insertions(+), 21 deletions(-) diff --git a/cpu/drc/emit_arm64.c b/cpu/drc/emit_arm64.c index 8f4718ee..7a832747 100644 --- a/cpu/drc/emit_arm64.c +++ b/cpu/drc/emit_arm64.c @@ -25,7 +25,7 @@ #define PR 18 // platform register // All operations but ptr ops are using the lower 32 bits of the A64 registers. -// The upper 32 bits are only used in ptr ops. +// The upper 32 bits are only used in ptr ops and are zeroed by A64 32 bit ops. #define A64_COND_EQ 0x0 diff --git a/cpu/drc/emit_mips.c b/cpu/drc/emit_mips.c index c9c006c8..062737f6 100644 --- a/cpu/drc/emit_mips.c +++ b/cpu/drc/emit_mips.c @@ -33,6 +33,8 @@ #define FC 24 // emulated processor flags: C (bit 0), others 0 #define FV 25 // emulated processor flags: Nt^Ns (bit 31). others x +// All operations but ptr ops are using the lower 32 bits of the registers. +// The upper 32 bits always contain the sign extension from the lower 32 bits. // unified conditions; virtual, not corresponding to anything real on MIPS #define DCOND_EQ 0x0 @@ -1095,10 +1097,10 @@ static void emith_lohi_nops(void) emith_lohi_nops(); \ EMIT(MIPS_MULT(s1, s2)); \ EMIT(MIPS_MFLO(AT)); \ - emith_add_r_r(dlo, AT); \ - EMIT(MIPS_SLTU_REG(t_, dlo, AT)); \ - EMIT(MIPS_MFHI(AT)); \ + EMIT(MIPS_MFHI(t_)); \ last_lohi = (u8 *)tcache_ptr; \ + emith_add_r_r(dlo, AT); \ + EMIT(MIPS_SLTU_REG(AT, dlo, AT)); \ emith_add_r_r(dhi, AT); \ emith_add_r_r(dhi, t_); \ rcache_free_tmp(t_); \ @@ -1479,7 +1481,7 @@ static int emith_cond_check(int cond, int *r) // NB: ABI SP alignment is 8 for compatibility with MIPS IV #define emith_push_ret(r) do { \ - emith_add_r_r_ptr_imm(SP, SP, -8-16); /* ABI: 16 byte arg save area */ \ + emith_add_r_r_ptr_imm(SP, SP, -8-16); /* O32: 16 byte arg save area */ \ emith_write_r_r_offs(LR, SP, 4+16); \ if ((r) > 0) emith_write_r_r_offs(r, SP, 0+16); \ } while (0) diff --git a/cpu/drc/emit_riscv.c b/cpu/drc/emit_riscv.c index fe4da035..0f614f18 100644 --- a/cpu/drc/emit_riscv.c +++ b/cpu/drc/emit_riscv.c @@ -30,6 +30,8 @@ #define FC 29 // emulated processor flags: C (bit 0), others 0 #define FV 28 // emulated processor flags: Nt^Ns (bit 31). others x +// All operations but ptr ops are using the lower 32 bits of the registers. +// The upper 32 bits always contain the sign extension from the lower 32 bits. // unified conditions; virtual, not corresponding to anything real on RISC-V #define DCOND_EQ 0x0 @@ -217,12 +219,9 @@ enum { F2_ALT=0x20, F2_MULDIV=0x01 }; // NB: must split 64 bit result into 2 32 bit registers // NB: expects 32 bit values in s1+s2, correctly sign extended to 64 bits #define EMIT_R5_MULLU_REG(dlo, dhi, s1, s2) do { \ - /*EMIT(R5_ADDW_IMM(s1, s1, 0));*/ \ - /*EMIT(R5_ADDW_IMM(s2, s2, 0));*/ \ EMIT(R5_MUL(dlo, s1, s2)); \ EMIT(R5_ASR_IMM(dhi, dlo, 32)); \ - EMIT(R5_LSL_IMM(dlo, dlo, 32)); \ - EMIT(R5_ASR_IMM(dlo, dlo, 32)); \ + EMIT(R5_ADDW_IMM(dlo, dlo, 0)); \ } while (0) #define EMIT_R5_MULLS_REG(dlo, dhi, s1, s2) \ @@ -633,7 +632,7 @@ static int literal_pindex, literal_iindex; static inline int emith_pool_literal(uintptr_t imm) { int idx = literal_pindex - 8; // max look behind in pool - // see if one of the last literals was the same (or close enough) + // see if one of the last literals was the same for (idx = (idx < 0 ? 0 : idx); idx < literal_pindex; idx++) if (imm == literal_pool[idx]) break; diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index a12dfe96..57bfc212 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -7,21 +7,24 @@ * See COPYING file in the top-level directory. * * notes: - * - tcache, block descriptor, link buffer overflows result in sh2_translate() - * failure, followed by full tcache invalidation for that region + * - tcache, block descriptor, block entry buffer overflows result in oldest + * blocks being deleted until enough space is available + * - link and list element buffer overflows result in failure and exit * - jumps between blocks are tracked for SMC handling (in block_entry->links), - * except jumps between different tcaches + * except jumps from global to CPU-local tcaches * * implemented: * - static register allocation * - remaining register caching and tracking in temporaries * - block-local branch linking - * - block linking (except between tcaches) + * - block linking * - some constant propagation + * - call stack caching for host block entry address + * - delay, poll, and idle loop detection and handling + * - some T/M flag optimizations where the value is known or isn't used * * TODO: * - better constant propagation - * - stack caching? * - bug fixing */ #include @@ -1068,7 +1071,7 @@ static struct block_desc *dr_add_block(int entries, u32 addr, int size, if (be != NULL) dbg(1, "block override for %08x", addr); - if (block_ring[tcache_id].used + 1 > block_ring[tcache_id].size || + if (block_ring[tcache_id].used + 1 > block_ring[tcache_id].size || entry_ring[tcache_id].used + entries > entry_ring[tcache_id].size) { dbg(1, "bd overflow for tcache %d", tcache_id); return NULL; @@ -3014,13 +3017,13 @@ static void *dr_get_pc_base(u32 pc, SH2 *sh2); static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) { // branch targets in current block - struct linkage branch_targets[MAX_LOCAL_TARGETS]; + static struct linkage branch_targets[MAX_LOCAL_TARGETS]; int branch_target_count = 0; // unresolved local or external targets with block link/exit area if needed - struct linkage blx_targets[MAX_LOCAL_BRANCHES]; + static struct linkage blx_targets[MAX_LOCAL_BRANCHES]; int blx_target_count = 0; - u8 op_flags[BLOCK_INSN_LIMIT]; + static u8 op_flags[BLOCK_INSN_LIMIT]; enum flg_states { FLG_UNKNOWN, FLG_UNUSED, FLG_0, FLG_1 }; struct drcf { @@ -3037,7 +3040,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) #if LOOP_OPTIMIZER // loops with pinned registers for optimzation // pinned regs are like statics and don't need saving/restoring inside a loop - struct linkage pinned_loops[MAX_LOCAL_TARGETS/16]; + static struct linkage pinned_loops[MAX_LOCAL_TARGETS/16]; int pinned_loop_count = 0; #endif @@ -3479,6 +3482,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) // no sense in looking any further than the next rcache flush tmp = ((op_flags[i+v] & OF_BTARGET) || (op_flags[i+v-1] & OF_DELAY_OP) || (OP_ISBRACND(opd[v-1].op) && !(op_flags[i+v] & OF_DELAY_OP))); + // XXX looking behind cond branch to avoid evicting regs used later? if (pc + 2*v <= end_pc && !tmp) { // (pc already incremented above) late |= opd[v].source & ~write; // ignore source regs after they have been written to @@ -4636,6 +4640,7 @@ end_op: rcache_invalidate(); } } else + // no space for resolving forward branch, handle it as external dbg(1, "warning: too many unresolved branches"); } @@ -4657,6 +4662,7 @@ end_op: EMITH_JMP_START(emith_invert_cond(cond)); if (bl) { bl->jump = tcache_ptr; + emith_flush(); // flush to inhibit insn swapping bl->type = BL_LDJMP; } tmp = rcache_get_tmp_arg(0); @@ -5534,7 +5540,7 @@ int sh2_drc_init(SH2 *sh2) i = tcache_ptr - tcache; RING_INIT(&tcache_ring[0], tcache_ptr, tcache_sizes[0] - i); for (i = 1; i < ARRAY_SIZE(tcache_ring); i++) { - RING_INIT(&tcache_ring[i], tcache_ring[i-1].base + tcache_sizes[i-1], + RING_INIT(&tcache_ring[i], tcache_ring[i-1].base + tcache_ring[i-1].size, tcache_sizes[i]); } From 3b0d7104188a963be9f5ee7732dd763e02a303df Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 27 Nov 2019 23:05:27 +0100 Subject: [PATCH 0243/1110] release 1.95 --- platform/common/menu_pico.c | 1 + platform/common/version.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/platform/common/menu_pico.c b/platform/common/menu_pico.c index dc7ceda4..327190a5 100644 --- a/platform/common/menu_pico.c +++ b/platform/common/menu_pico.c @@ -938,6 +938,7 @@ static const char credits[] = "MAME devs: SH2, YM2612 and SN76496 cores\n" "Eke, Stef: some Sega CD code\n" "Inder, ketchupgun: graphics\n" + "Irixxxx: SH2 drc improvements\n" #ifdef __GP2X__ "Squidge: mmuhack\n" "Dzz: ARM940 sample\n" diff --git a/platform/common/version.h b/platform/common/version.h index 8b3adbf8..a8c3034b 100644 --- a/platform/common/version.h +++ b/platform/common/version.h @@ -1 +1 @@ -#define VERSION "1.93+" +#define VERSION "1.95" From 4f992bf5416fd1e3840d6211712983efa92da4ef Mon Sep 17 00:00:00 2001 From: kub Date: Mon, 2 Dec 2019 22:31:14 +0100 Subject: [PATCH 0244/1110] sh2 drc, tentative MIPS32/64 Release 2 support --- cpu/drc/emit_arm64.c | 23 +++--- cpu/drc/emit_mips.c | 144 ++++++++++++++++++++++++++++---------- cpu/drc/emit_riscv.c | 4 +- platform/common/dismips.c | 75 +++++++++++++++++--- 4 files changed, 186 insertions(+), 60 deletions(-) diff --git a/cpu/drc/emit_arm64.c b/cpu/drc/emit_arm64.c index 7a832747..c827fe2c 100644 --- a/cpu/drc/emit_arm64.c +++ b/cpu/drc/emit_arm64.c @@ -221,10 +221,15 @@ enum { XT_UXTW=0x4, XT_UXTX=0x6, XT_LSL=0x7, XT_SXTW=0xc, XT_SXTX=0xe }; #define A64_ROR_IMM(rd, rn, bits) /* EXTR */ \ A64_INSN(0x9,0x0,0x6,_,rn,_,bits,rn,rd) -#define A64_SXT_IMM(rd, rn, bits) \ - A64_INSN(0x9,0x0,0x4,0,0,_,bits-1,rn,rd) -#define A64_UXT_IMM(rd, rn, bits) \ - A64_INSN(0x9,0x2,0x4,0,0,_,bits-1,rn,rd) +#define A64_SXT_IMM(rd, rn, bits) /* SBFM */ \ + A64_INSN(0x9,0x0,0x4,_,0,_,bits-1,rn,rd) +#define A64_UXT_IMM(rd, rn, bits) /* UBFM */ \ + A64_INSN(0x9,0x2,0x4,_,0,_,bits-1,rn,rd) + +#define A64_BFX_IMM(rd, rn, lsb, bits) /* UBFM */ \ + A64_INSN(0x9,0x2,0x4,_,lsb,_,bits-1,rn,rd) +#define A64_BFI_IMM(rd, rn, lsb, bits) /* BFM */ \ + A64_INSN(0x9,0x1,0x4,_,(32-lsb)&31,_,bits-1,rn,rd) // multiplication @@ -1302,8 +1307,7 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) EMITH_SJMP_START(DCOND_EQ); \ /* overflow if top 33 bits of MACH:MACL aren't all 1 or 0 */ \ /* to check: add MACL[31] to MACH. this is 0 if no overflow */ \ - emith_lsr(rn, ml, 31); \ - emith_addf_r_r(rn, mh); /* sum = MACH + ((MACL>>31)&1) */ \ + emith_addf_r_r_r_lsr(rn, mh, ml, 31); /* sum = MACH + (MACL>>31) */ \ EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \ /* XXX: LSB signalling only in SH1, or in SH2 too? */ \ emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \ @@ -1315,11 +1319,8 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) EMITH_SJMP_END(DCOND_EQ); \ } while (0) -#define emith_write_sr(sr, srcr) do { \ - emith_lsr(sr, sr, 10); \ - emith_or_r_r_r_lsl(sr, sr, srcr, 22); \ - emith_ror(sr, sr, 22); \ -} while (0) +#define emith_write_sr(sr, srcr) \ + EMIT(A64_BFI_IMM(sr, srcr, 0, 10)) #define emith_carry_to_t(srr, is_sub) do { \ emith_lsr(sr, sr, 1); \ diff --git a/cpu/drc/emit_mips.c b/cpu/drc/emit_mips.c index 062737f6..753c3122 100644 --- a/cpu/drc/emit_mips.c +++ b/cpu/drc/emit_mips.c @@ -1,5 +1,5 @@ /* - * Basic macros to emit MIPS II/MIPS32 Release 1 instructions and some utils + * Basic macros to emit MIPS32/MIPS64 Release 1 or 2 instructions and some utils * Copyright (C) 2019 kub * * This work is licensed under the terms of MAME license. @@ -65,9 +65,10 @@ // opcode field (encoded in op) enum { OP__FN=000, OP__RT, OP_J, OP_JAL, OP_BEQ, OP_BNE, OP_BLEZ, OP_BGTZ }; enum { OP_ADDI=010, OP_ADDIU, OP_SLTI, OP_SLTIU, OP_ANDI, OP_ORI, OP_XORI, OP_LUI }; +enum { OP_DADDI=030, OP_DADDIU, OP_LDL, OP_LDR, OP__FN2=034, OP__FN3=037 }; enum { OP_LB=040, OP_LH, OP_LWL, OP_LW, OP_LBU, OP_LHU, OP_LWR, OP_LWU }; enum { OP_SB=050, OP_SH, OP_SWL, OP_SW, OP_SDL, OP_SDR, OP_SWR }; -enum { OP_DADDI=030, OP_DADDIU, OP_LDL, OP_LDR, OP_SD=067, OP_LD=077 }; +enum { OP_SD=067, OP_LD=077 }; // function field (encoded in fn if opcode = OP__FN) enum { FN_SLL=000, __(01), FN_SRL, FN_SRA, FN_SLLV, __(05), FN_SRLV, FN_SRAV }; enum { FN_JR=010, FN_JALR, FN_MOVZ, FN_MOVN, FN_SYNC=017 }; @@ -76,39 +77,54 @@ enum { FN_MULT=030, FN_MULTU, FN_DIV, FN_DIVU, FN_DMULT, FN_DMULTU, FN_DDIV, FN_ enum { FN_ADD=040, FN_ADDU, FN_SUB, FN_SUBU, FN_AND, FN_OR, FN_XOR, FN_NOR }; enum { FN_SLT=052, FN_SLTU, FN_DADD, FN_DADDU, FN_DSUB, FN_DSUBU }; enum { FN_DSLL=070, __(71), FN_DSRL, FN_DSRA, FN_DSLL32, __(75), FN_DSRL32, FN_DSRA32 }; +// function field (encoded in fn if opcode = OP__FN2) +enum { FN2_MADD=000, FN2_MADDU, FN2_MUL, __(03), FN2_MSUB, FN2_MSUBU }; +enum { FN2_CLZ=040, FN2_CLO, FN2_DCLZ=044, FN2_DCLO }; +// function field (encoded in fn if opcode = OP__FN3) +enum { FN3_EXT=000, FN3_DEXTM, FN3_DEXTU, FN3_DEXT, FN3_INS, FN3_DINSM, FN3_DINSU, FN3_DINS }; +enum { FN3_BSHFL=040, FN3_DBSHFL=044 }; // rt field (encoded in rt if opcode = OP__RT) enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 }; +// bit shuffle function (encoded in sa if function = FN3_BSHFL) +enum { BS_SBH=002, BS_SHD=005, BS_SEB=020, BS_SEH=030 }; +// r (rotate) bit function (encoded in rs/sa if function = FN_SRL/FN_SRLV) +enum { RB_SRL=0, RB_ROTR=1 }; + #define MIPS_NOP 000 // null operation: SLL r0, r0, #0 // arithmetic/logical -#define MIPS_OP_REG(op, rd, rs, rt) \ - MIPS_INSN(OP__FN, rs, rt, rd, _, op) // R-type, SPECIAL +#define MIPS_OP_REG(op, sa, rd, rs, rt) \ + MIPS_INSN(OP__FN, rs, rt, rd, sa, op) // R-type, SPECIAL +#define MIPS_OP2_REG(op, sa, rd, rs, rt) \ + MIPS_INSN(OP__FN2, rs, rt, rd, sa, op) // R-type, SPECIAL2 +#define MIPS_OP3_REG(op, sa, rd, rs, rt) \ + MIPS_INSN(OP__FN3, rs, rt, rd, sa, op) // R-type, SPECIAL3 #define MIPS_OP_IMM(op, rt, rs, imm) \ MIPS_INSN(op, rs, rt, _, _, (u16)(imm)) // I-type // rd = rs OP rt #define MIPS_ADD_REG(rd, rs, rt) \ - MIPS_OP_REG(FN_ADDU, rd, rs, rt) + MIPS_OP_REG(FN_ADDU,_, rd, rs, rt) #define MIPS_DADD_REG(rd, rs, rt) \ - MIPS_OP_REG(FN_DADDU, rd, rs, rt) + MIPS_OP_REG(FN_DADDU,_, rd, rs, rt) #define MIPS_SUB_REG(rd, rs, rt) \ - MIPS_OP_REG(FN_SUBU, rd, rs, rt) + MIPS_OP_REG(FN_SUBU,_, rd, rs, rt) #define MIPS_DSUB_REG(rd, rs, rt) \ - MIPS_OP_REG(FN_DSUBU, rd, rs, rt) + MIPS_OP_REG(FN_DSUBU,_, rd, rs, rt) #define MIPS_NEG_REG(rd, rt) \ MIPS_SUB_REG(rd, Z0, rt) #define MIPS_XOR_REG(rd, rs, rt) \ - MIPS_OP_REG(FN_XOR, rd, rs, rt) + MIPS_OP_REG(FN_XOR,_, rd, rs, rt) #define MIPS_OR_REG(rd, rs, rt) \ - MIPS_OP_REG(FN_OR, rd, rs, rt) + MIPS_OP_REG(FN_OR,_, rd, rs, rt) #define MIPS_AND_REG(rd, rs, rt) \ - MIPS_OP_REG(FN_AND, rd, rs, rt) + MIPS_OP_REG(FN_AND,_, rd, rs, rt) #define MIPS_NOR_REG(rd, rs, rt) \ - MIPS_OP_REG(FN_NOR, rd, rs, rt) + MIPS_OP_REG(FN_NOR,_, rd, rs, rt) #define MIPS_MOVE_REG(rd, rs) \ MIPS_OR_REG(rd, rs, Z0) @@ -117,17 +133,29 @@ enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 }; // rd = rt SHIFT rs #define MIPS_LSL_REG(rd, rt, rs) \ - MIPS_OP_REG(FN_SLLV, rd, rs, rt) + MIPS_OP_REG(FN_SLLV,_, rd, rs, rt) #define MIPS_LSR_REG(rd, rt, rs) \ - MIPS_OP_REG(FN_SRLV, rd, rs, rt) + MIPS_OP_REG(FN_SRLV,RB_SRL, rd, rs, rt) #define MIPS_ASR_REG(rd, rt, rs) \ - MIPS_OP_REG(FN_SRAV, rd, rs, rt) + MIPS_OP_REG(FN_SRAV,_, rd, rs, rt) +#define MIPS_ROR_REG(rd, rt, rs) \ + MIPS_OP_REG(FN_SRLV,RB_ROTR, rd, rs, rt) + +#define MIPS_SEB_REG(rd, rt) \ + MIPS_OP3_REG(FN3_BSHFL, BS_SEB, rd, _, rt) +#define MIPS_SEH_REG(rd, rt) \ + MIPS_OP3_REG(FN3_BSHFL, BS_SEH, rd, _, rt) + +#define MIPS_EXT_IMM(rt, rs, lsb, sz) \ + MIPS_OP3_REG(FN3_EXT, lsb, (sz)-1, rs, rt) +#define MIPS_INS_IMM(rt, rs, lsb, sz) \ + MIPS_OP3_REG(FN3_INS, lsb, (lsb)+(sz)-1, rs, rt) // rd = (rs < rt) #define MIPS_SLT_REG(rd, rs, rt) \ - MIPS_OP_REG(FN_SLT, rd, rs, rt) + MIPS_OP_REG(FN_SLT,_, rd, rs, rt) #define MIPS_SLTU_REG(rd, rs, rt) \ - MIPS_OP_REG(FN_SLTU, rd, rs, rt) + MIPS_OP_REG(FN_SLTU,_, rd, rs, rt) // rt = rs OP imm16 #define MIPS_ADD_IMM(rt, rs, imm16) \ @@ -152,9 +180,11 @@ enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 }; #define MIPS_LSL_IMM(rd, rt, bits) \ MIPS_INSN(OP__FN, _, rt, rd, bits, FN_SLL) #define MIPS_LSR_IMM(rd, rt, bits) \ - MIPS_INSN(OP__FN, _, rt, rd, bits, FN_SRL) + MIPS_INSN(OP__FN, RB_SRL, rt, rd, bits, FN_SRL) #define MIPS_ASR_IMM(rd, rt, bits) \ MIPS_INSN(OP__FN, _, rt, rd, bits, FN_SRA) +#define MIPS_ROR_IMM(rd, rt, bits) \ + MIPS_INSN(OP__FN, RB_ROTR, rt, rd, bits, FN_SRL) #define MIPS_DLSL_IMM(rd, rt, bits) \ MIPS_INSN(OP__FN, _, rt, rd, bits, FN_DSLL) @@ -170,13 +200,17 @@ enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 }; // multiplication #define MIPS_MULT(rt, rs) \ - MIPS_OP_REG(FN_MULT, _, rs, rt) + MIPS_OP_REG(FN_MULT,_, _, rs, rt) #define MIPS_MULTU(rt, rs) \ - MIPS_OP_REG(FN_MULTU, _, rs, rt) + MIPS_OP_REG(FN_MULTU,_, _, rs, rt) +#define MIPS_MADD(rt, rs) \ + MIPS_OP2_REG(FN_MADD,_, _, rs, rt) +#define MIPS_MADDU(rt, rs) \ + MIPS_OP2_REG(FN_MADDU,_, _, rs, rt) #define MIPS_MFLO(rd) \ - MIPS_OP_REG(FN_MFLO, rd, _, _) + MIPS_OP_REG(FN_MFLO,_, rd, _, _) #define MIPS_MFHI(rd) \ - MIPS_OP_REG(FN_MFHI, rd, _, _) + MIPS_OP_REG(FN_MFHI,_, rd, _, _) // branching @@ -185,9 +219,9 @@ enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 }; #define MIPS_JAL(abs26) \ MIPS_INSN(OP_JAL, _,_,_,_, (abs26) >> 2) #define MIPS_JR(rs) \ - MIPS_OP_REG(FN_JR,_,rs,_) + MIPS_OP_REG(FN_JR,_, _,rs,_) #define MIPS_JALR(rd, rs) \ - MIPS_OP_REG(FN_JALR,rd,rs,_) + MIPS_OP_REG(FN_JALR,_, rd,rs,_) // conditional branches; no condition code, these compare rs against rt or Z0 #define MIPS_BEQ (OP_BEQ << 5) // rs == rt (rt in lower 5 bits) @@ -234,7 +268,7 @@ enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 }; // pointer operations -#if __mips == 4 || __mips == 64 +#if _MIPS_SZPTR == 64 #define OP_LP OP_LD #define OP_SP OP_SD #define OP_PADDIU OP_DADDIU @@ -524,8 +558,8 @@ static void emith_set_compare_flags(int rs, int rt, s32 imm) #define emith_add_r_r_r_lsl_ptr(d, s1, s2, simm) do { \ if (simm) { \ EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ - EMIT(MIPS_OP_REG(FN_PADDU, d, s1, AT)); \ - } else EMIT(MIPS_OP_REG(FN_PADDU, d, s1, s2)); \ + EMIT(MIPS_OP_REG(FN_PADDU,_, d, s1, AT)); \ + } else EMIT(MIPS_OP_REG(FN_PADDU,_, d, s1, s2)); \ } while (0) #define emith_add_r_r_r_lsl(d, s1, s2, simm) do { \ if (simm) { \ @@ -544,10 +578,10 @@ static void emith_set_compare_flags(int rs, int rt, s32 imm) #define emith_addf_r_r_r_lsl_ptr(d, s1, s2, simm) do { \ if (simm) { \ EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ - EMIT(MIPS_OP_REG(FN_PADDU, FNZ, s1, AT)); \ + EMIT(MIPS_OP_REG(FN_PADDU,_, FNZ, s1, AT)); \ emith_set_arith_flags(d, s1, AT, 0, 0); \ } else { \ - EMIT(MIPS_OP_REG(FN_PADDU, FNZ, s1, s2)); \ + EMIT(MIPS_OP_REG(FN_PADDU,_, FNZ, s1, s2)); \ emith_set_arith_flags(d, s1, s2, 0, 0); \ } \ } while (0) @@ -752,7 +786,7 @@ static void emith_set_compare_flags(int rs, int rt, s32 imm) // move immediate static void emith_move_imm(int r, uintptr_t imm) { -#if __mips == 4 || __mips == 64 +#if _MIPS_SZPTR == 64 if ((s32)imm != imm) { emith_move_imm(r, imm >> 32); if (imm & 0xffff0000) { @@ -803,10 +837,10 @@ static void emith_add_imm(int ptr, int rd, int rs, u32 imm) EMIT(MIPS_OP_IMM(ptr ? OP_PADDIU:OP_ADDIU, rd,rs,imm)); } else if ((s32)imm < 0) { emith_move_r_imm(AT, -imm); - EMIT(MIPS_OP_REG((ptr ? FN_PSUBU:FN_SUBU), rd,rs,AT)); + EMIT(MIPS_OP_REG((ptr ? FN_PSUBU:FN_SUBU),_, rd,rs,AT)); } else { emith_move_r_imm(AT, imm); - EMIT(MIPS_OP_REG((ptr ? FN_PADDU:FN_ADDU), rd,rs,AT)); + EMIT(MIPS_OP_REG((ptr ? FN_PADDU:FN_ADDU),_, rd,rs,AT)); } } @@ -881,7 +915,7 @@ static void emith_log_imm(int op, int rd, int rs, u32 imm) { if (imm >> 16) { emith_move_r_imm(AT, imm); - EMIT(MIPS_OP_REG(FN_AND + (op-OP_ANDI), rd, rs, AT)); + EMIT(MIPS_OP_REG(FN_AND + (op-OP_ANDI),_, rd, rs, AT)); } else if (op == OP_ANDI || imm || rd != rs) EMIT(MIPS_OP_IMM(op, rd, rs, imm)); } @@ -936,20 +970,31 @@ static void emith_log_imm(int op, int rd, int rs, u32 imm) #define emith_asr(d, s, cnt) \ EMIT(MIPS_ASR_IMM(d, s, cnt)) -// NB: mips32r2 has ROT (SLR with R bit set) +#if defined(__mips_isa_rev) && __mips_isa_rev >= 2 +#define emith_ror(d, s, cnt) do { \ + EMIT(MIPS_ROR_IMM(d, s, cnt)); \ +} while (0) +#else #define emith_ror(d, s, cnt) do { \ EMIT(MIPS_LSL_IMM(AT, s, 32-(cnt))); \ EMIT(MIPS_LSR_IMM(d, s, cnt)); \ EMIT(MIPS_OR_REG(d, d, AT)); \ } while (0) +#endif #define emith_ror_c(cond, d, s, cnt) \ emith_ror(d, s, cnt) +#if defined(__mips_isa_rev) && __mips_isa_rev >= 2 +#define emith_rol(d, s, cnt) do { \ + EMIT(MIPS_ROR_IMM(d, s, 32-(cnt))); \ +} while (0) +#else #define emith_rol(d, s, cnt) do { \ EMIT(MIPS_LSR_IMM(AT, s, 32-(cnt))); \ EMIT(MIPS_LSL_IMM(d, s, cnt)); \ EMIT(MIPS_OR_REG(d, d, AT)); \ } while (0) +#endif #define emith_rorc(d) do { \ emith_lsr(d, d, 1); \ @@ -963,7 +1008,6 @@ static void emith_log_imm(int op, int rd, int rs, u32 imm) } while (0) // NB: all flag setting shifts make V undefined -// NB: mips32r2 has EXT (useful for extracting C) #define emith_lslf(d, s, cnt) do { \ int _s = s; \ if ((cnt) > 1) { \ @@ -1040,7 +1084,10 @@ static void emith_log_imm(int op, int rd, int rs, u32 imm) } while (0) // signed/unsigned extend -// NB: mips32r2 has EXT and INS +#if defined(__mips_isa_rev) && __mips_isa_rev >= 2 +#define emith_clear_msb(d, s, count) /* bits to clear */ \ + EMIT(MIPS_EXT_IMM(d, s, 0, 32-(count))) +#else #define emith_clear_msb(d, s, count) /* bits to clear */ do { \ u32 t; \ if ((count) >= 16) { \ @@ -1052,14 +1099,27 @@ static void emith_log_imm(int op, int rd, int rs, u32 imm) emith_lsr(d, d, count); \ } \ } while (0) +#endif #define emith_clear_msb_c(cond, d, s, count) \ emith_clear_msb(d, s, count) -// NB: mips32r2 has SE[BH]H +#if defined(__mips_isa_rev) && __mips_isa_rev >= 2 +#define emith_sext(d, s, count) /* bits to keep */ do { \ + if (count == 8) \ + EMIT(MIPS_SEB_REG(d, s)); \ + else if (count == 16) \ + EMIT(MIPS_SEH_REG(d, s)); \ + else { \ + emith_lsl(d, s, 32-(count)); \ + emith_asr(d, d, 32-(count)); \ + } \ +} while (0) +#else #define emith_sext(d, s, count) /* bits to keep */ do { \ emith_lsl(d, s, 32-(count)); \ emith_asr(d, d, 32-(count)); \ } while (0) +#endif // multiply Rd = Rn*Rm (+ Ra); NB: next 2 insns after MFLO/MFHI mustn't be MULT static u8 *last_lohi; @@ -1658,16 +1718,26 @@ static int emith_cond_check(int cond, int *r) EMITH_SJMP_END(DCOND_EQ); \ } while (0) +#if defined(__mips_isa_rev) && __mips_isa_rev >= 2 +#define emith_write_sr(sr, srcr) \ + EMIT(MIPS_INS_IMM(sr, srcr, 0, 10)) +#else #define emith_write_sr(sr, srcr) do { \ emith_lsr(sr, sr , 10); emith_lsl(sr, sr, 10); \ emith_lsl(AT, srcr, 22); emith_lsr(AT, AT, 22); \ emith_or_r_r(sr, AT); \ } while (0) +#endif +#if defined(__mips_isa_rev) && __mips_isa_rev >= 2 +#define emith_carry_to_t(sr, is_sub) \ + EMIT(MIPS_INS_IMM(sr, FC, 0, 1)) +#else #define emith_carry_to_t(sr, is_sub) do { \ emith_and_r_imm(sr, 0xfffffffe); \ emith_or_r_r(sr, FC); \ } while (0) +#endif #define emith_t_to_carry(sr, is_sub) do { \ emith_and_r_r_imm(FC, sr, 1); \ diff --git a/cpu/drc/emit_riscv.c b/cpu/drc/emit_riscv.c index 0f614f18..ed45e01c 100644 --- a/cpu/drc/emit_riscv.c +++ b/cpu/drc/emit_riscv.c @@ -1499,10 +1499,10 @@ static int emith_cond_check(int cond, int *r, int *s) emith_tst_r_imm(sr, Q); /* if (Q ^ M) */ \ EMITH_JMP3_START(DCOND_EQ); \ emith_add_r_r_r(rn, t_, rm); \ - EMIT(R5_SLTU_REG(FC, rn, t_)); \ + EMIT(R5_SLTU_REG(FC, rn, t_)); \ EMITH_JMP3_MID(DCOND_EQ); \ emith_sub_r_r_r(rn, t_, rm); \ - EMIT(R5_SLTU_REG(FC, t_, rn)); \ + EMIT(R5_SLTU_REG(FC, t_, rn)); \ EMITH_JMP3_END(); \ emith_eor_r_r(sr, FC); /* T ^= carry */ \ rcache_free_tmp(t_); \ diff --git a/platform/common/dismips.c b/platform/common/dismips.c index f9888f2a..61c70bfe 100644 --- a/platform/common/dismips.c +++ b/platform/common/dismips.c @@ -56,13 +56,15 @@ static char *const register_names[32] = { enum insn_type { REG_DTS, REG_TS, // 3, 2, or 1 regs - REG_DS, REG_D, REG_S, + REG_DS, REG_DT, REG_D, REG_S, S_IMM_DT, // 2 regs with shift amount + F_IMM_TS, // 2 regs with bitfield spec B_IMM_S, B_IMM_TS, // pc-relative branches with 1 or 2 regs J_IMM, // region-relative jump - A_IMM_TS, // arithmetic immediate with 1 or 2 regs - L_IMM_T, L_IMM_TS, // logical immediate with 2 regs + A_IMM_TS, // arithmetic immediate with 2 regs + L_IMM_T, L_IMM_TS, // logical immediate with 1 or 2 regs M_IMM_TS, // memory indexed with 2 regs + SR_BIT = 0x80 // shift right with R-bit }; struct insn { @@ -77,10 +79,10 @@ struct insn { #define OP_SPECIAL 0x00 static const struct insn special_insns[] = { {0x00, S_IMM_DT, "sll"}, - {0x02, S_IMM_DT, "srl"}, + {0x02, S_IMM_DT|SR_BIT, "srl\0rotr"}, {0x03, S_IMM_DT, "sra"}, {0x04, REG_DTS, "sllv"}, - {0x06, REG_DTS, "srlv"}, + {0x06, REG_DTS|SR_BIT, "srlv\0rotrv"}, {0x07, REG_DTS, "srav"}, {0x08, REG_S, "jr"}, {0x09, REG_DS, "jalr"}, @@ -94,7 +96,7 @@ static const struct insn special_insns[] = { {0x12, REG_D, "mflo"}, {0x13, REG_S, "mtlo"}, {0x14, REG_DTS, "dsllv"}, - {0x16, REG_DTS, "dslrv"}, + {0x16, REG_DTS|SR_BIT, "dsrlv\0drotrv"}, {0x17, REG_DTS, "dsrav"}, {0x18, REG_TS, "mult"}, {0x19, REG_TS, "multu"}, @@ -125,10 +127,10 @@ static const struct insn special_insns[] = { // {0x34, REG_TS, "teq" }, // {0x36, REG_TS, "tne" }, {0x38, S_IMM_DT, "dsll"}, - {0x3A, S_IMM_DT, "dsrl"}, + {0x3A, S_IMM_DT|SR_BIT, "dsrl\0drotrv"}, {0x3B, S_IMM_DT, "dsra"}, {0x3C, S_IMM_DT, "dsll32"}, - {0x3E, S_IMM_DT, "dsrl32"}, + {0x3E, S_IMM_DT|SR_BIT, "dsrl32\0drotr32"}, {0x3F, S_IMM_DT, "dsra32"}, }; @@ -146,6 +148,32 @@ static const struct insn special2_insns[] = { {0x25, REG_DS, "dclo" }, }; +// instructions with opcode SPECIAL3 (R-type) +#define OP_SPECIAL3 0x1F +static const struct insn special3_insns[] = { + {0x00, F_IMM_TS, "ext" }, + {0x01, F_IMM_TS, "dextm" }, + {0x02, F_IMM_TS, "dextu" }, + {0x03, F_IMM_TS, "dext" }, + {0x04, F_IMM_TS, "ins" }, + {0x05, F_IMM_TS, "dinsm" }, + {0x06, F_IMM_TS, "dinsu" }, + {0x07, F_IMM_TS, "dins" }, +}; + +// instruction with opcode SPECIAL3 and function *BSHFL +#define FN_BSHFL 0x20 +static const struct insn bshfl_insns[] = { + {0x02, REG_DT, "wsbh" }, + {0x10, REG_DT, "seb" }, + {0x18, REG_DT, "seh" }, +}; +#define FN_DBSHFL 0x24 +static const struct insn dbshfl_insns[] = { + {0x02, REG_DT, "dsbh" }, + {0x05, REG_DT, "dshd" }, +}; + // instructions with opcode REGIMM (I-type) #define OP_REGIMM 0x01 static const struct insn regimm_insns[] = { @@ -240,6 +268,20 @@ static const struct insn *decode_insn(uint32_t insn) op = insn & 0x3f; pi = special2_insns; r = ARRAY_SIZE(special2_insns)-1; + } else if (op == OP_SPECIAL3) { + op = insn & 0x3f; + if (op == FN_BSHFL) { + op = (insn >> 6) & 0x1f; + pi = bshfl_insns; + r = ARRAY_SIZE(bshfl_insns)-1; + } else if (op == FN_DBSHFL) { + op = (insn >> 6) & 0x1f; + pi = dbshfl_insns; + r = ARRAY_SIZE(dbshfl_insns)-1; + } else { + pi = special3_insns; + r = ARRAY_SIZE(special3_insns)-1; + } } else if (op == OP_REGIMM) { op = (insn>>16) & 0x1f; pi = regimm_insns; @@ -280,7 +322,7 @@ int dismips(uintptr_t pc, uint32_t insn, char *buf, size_t buflen, uintptr_t *sy char *rs = register_names[(insn >> 21) & 0x1f]; char *rt = register_names[(insn >> 16) & 0x1f]; char *rd = register_names[(insn >> 11) & 0x1f]; - int sa = (insn >> 6) & 0x1f; + int sa = (insn >> 6) & 0x1f, sb = (insn >> 11) & 0x1f; int imm = (int16_t) insn; *sym = 0; @@ -289,10 +331,12 @@ int dismips(uintptr_t pc, uint32_t insn, char *buf, size_t buflen, uintptr_t *sy return 0; } - switch (pi->type) { + switch (pi->type & ~SR_BIT) { case REG_DTS: if ((insn & 0x3f) == 0x25 /*OR*/ && (insn & 0x1f0000) == 0 /*zero*/) snprintf(buf, buflen, "move %s, %s", rd, rs); + else if ((pi->type & SR_BIT) && (insn & (1<<6))) + snprintf(buf, buflen, "%s %s, %s, %s", pi->name+strlen(pi->name)+1, rd, rs, rt); else snprintf(buf, buflen, "%s %s, %s, %s", pi->name, rd, rs, rt); break; @@ -302,6 +346,9 @@ int dismips(uintptr_t pc, uint32_t insn, char *buf, size_t buflen, uintptr_t *sy case REG_DS: snprintf(buf, buflen, "%s %s, %s", pi->name, rd, rs); break; + case REG_DT: + snprintf(buf, buflen, "%s %s, %s", pi->name, rd, rt); + break; case REG_D: snprintf(buf, buflen, "%s %s", pi->name, rd); break; @@ -311,9 +358,17 @@ int dismips(uintptr_t pc, uint32_t insn, char *buf, size_t buflen, uintptr_t *sy case S_IMM_DT: if (insn == 0x00000000) snprintf(buf, buflen, "nop"); + else if ((pi->type & SR_BIT) && (insn & (1<<21))) + snprintf(buf, buflen, "%s %s, %s, %d", pi->name+strlen(pi->name)+1, rd, rt, sa); else snprintf(buf, buflen, "%s %s, %s, %d", pi->name, rd, rt, sa); break; + case F_IMM_TS: + if (insn & 0x01) sb+=32; + if (insn & 0x02) sa+=32; + if (insn & 0x04) sb-=sa; + snprintf(buf, buflen, "%s %s, %s, %d, %d", pi->name, rt, rs, sa, sb+1); + break; case B_IMM_S: *sym = b_target(pc, insn); snprintf(buf, buflen, "%s %s, 0x%lx", pi->name, rs, *sym); From 9760505eafcaecdcbb19af56ef9e23b531c487e4 Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 3 Dec 2019 23:52:13 +0100 Subject: [PATCH 0245/1110] remove textrels with -fPIC/-fPIE (for android/ios) --- cpu/DrZ80/{drz80.s => drz80.S} | 293 +++++++++++----------- cpu/drc/emit_arm64.c | 4 +- cpu/drc/emit_mips.c | 10 +- pico/32x/draw_arm.S | 18 +- pico/arm_features.h | 32 +++ pico/cd/memory_arm.S | 110 ++++---- pico/memory_arm.S | 27 +- pico/sound/{ym2612_arm.s => ym2612_arm.S} | 59 ++--- platform/common/common.mak | 4 +- platform/common/dismips.c | 2 +- 10 files changed, 298 insertions(+), 261 deletions(-) rename cpu/DrZ80/{drz80.s => drz80.S} (90%) rename pico/sound/{ym2612_arm.s => ym2612_arm.S} (95%) diff --git a/cpu/DrZ80/drz80.s b/cpu/DrZ80/drz80.S similarity index 90% rename from cpu/DrZ80/drz80.s rename to cpu/DrZ80/drz80.S index c2a64df3..4d592b16 100644 --- a/cpu/DrZ80/drz80.s +++ b/cpu/DrZ80/drz80.S @@ -5,6 +5,8 @@ ;@ For commercial use, separate licencing terms must be obtained. +#include "../../pico/arm_features.h" + .data .align 4 @@ -102,6 +104,7 @@ DrZ80Ver: .long 0x0001 ;@--------------------------------------- .text + PIC_LDR_INIT() .if DRZ80_XMAP @@ -1370,7 +1373,7 @@ DrZ80Run: blne DoInterrupt .endif - ldr opcodes,MAIN_opcodes_POINTER2 + PIC_LDR(opcodes, r0, MAIN_opcodes) cmp z80_icount,#0 ;@ irq might have used all cycles ldrplb r0,[z80pc],#1 @@ -1382,11 +1385,7 @@ z80_execute_end: stmia cpucontext,{z80pc-z80sp} ;@ save Z80 registers mov r0,z80_icount ldmia sp!,{r4-r12,pc} ;@ restore registers from stack and return to C code - -MAIN_opcodes_POINTER2: .word MAIN_opcodes -.if INTERRUPT_MODE -Interrupt_local: .word Interrupt -.endif +.pool DoInterrupt: .if INTERRUPT_MODE @@ -1395,8 +1394,9 @@ DoInterrupt: ;@ save everything back into DrZ80 context stmia cpucontext,{z80pc-z80sp} ;@ save Z80 registers stmfd sp!,{r3,r4,r5,lr} ;@ save rest of regs on stack + PIC_LDR(r2, r3, Interrupt) mov lr,pc - ldr pc,Interrupt_local + bx r2 ldmfd sp!,{r3,r4,r5,lr} ;@ load regs from stack ;@ reload regs from DrZ80 context ldmia cpucontext,{z80pc-z80sp} ;@ load Z80 registers @@ -4469,7 +4469,6 @@ opcode_2_6: and z80hl,z80hl,#0xFF<<16 orr z80hl,z80hl,r1, lsl #24 fetch 7 -DAATABLE_LOCAL: .word DAATable ;@DAA opcode_2_7: mov r1,z80a, lsr #24 @@ -4479,13 +4478,14 @@ opcode_2_7: orrne r1,r1,#512 tst z80f,#1<= 2 -#define emith_ror(d, s, cnt) do { \ - EMIT(MIPS_ROR_IMM(d, s, cnt)); \ -} while (0) +#define emith_ror(d, s, cnt) \ + EMIT(MIPS_ROR_IMM(d, s, cnt)) #else #define emith_ror(d, s, cnt) do { \ EMIT(MIPS_LSL_IMM(AT, s, 32-(cnt))); \ @@ -985,9 +984,8 @@ static void emith_log_imm(int op, int rd, int rs, u32 imm) emith_ror(d, s, cnt) #if defined(__mips_isa_rev) && __mips_isa_rev >= 2 -#define emith_rol(d, s, cnt) do { \ - EMIT(MIPS_ROR_IMM(d, s, 32-(cnt))); \ -} while (0) +#define emith_rol(d, s, cnt) \ + EMIT(MIPS_ROR_IMM(d, s, 32-(cnt))) #else #define emith_rol(d, s, cnt) do { \ EMIT(MIPS_LSR_IMM(AT, s, 32-(cnt))); \ diff --git a/pico/32x/draw_arm.S b/pico/32x/draw_arm.S index f351d8e0..ad5d428b 100644 --- a/pico/32x/draw_arm.S +++ b/pico/32x/draw_arm.S @@ -7,6 +7,7 @@ @* See COPYING file in the top-level directory. @* +#include "pico/arm_features.h" #include "pico/pico_int_offs.h" .extern Pico32x @@ -17,11 +18,12 @@ .text .align 2 + PIC_LDR_INIT() .macro call_scan_prep cond est @ &Pico.est .if \cond - ldr r4, =PicoScan32xBegin - ldr r5, =PicoScan32xEnd + PIC_LDR(r4, r6, PicoScan32xBegin) + PIC_LDR(r5, r6, PicoScan32xEnd) ldr r6, [\est, #OFS_EST_DrawLineDest] ldr r4, [r4] ldr r5, [r5] @@ -66,8 +68,8 @@ \name: stmfd sp!, {r4-r11,lr} - ldr lr,=Pico - ldr r10,=Pico32x + PIC_LDR(lr, r9, Pico) + PIC_LDR(r10,r9, Pico32x) ldr r11, [lr, #OFS_Pico_est+OFS_EST_Draw2FB] ldrh r10,[r10, #0x40] @ Pico32x.vdp_regs[0] add r9, lr, #OFS_Pico_est+OFS_EST_HighPal @ palmd @@ -192,8 +194,8 @@ \name: stmfd sp!, {r4-r11,lr} - ldr lr,=Pico - ldr r10,=Pico32xMem + PIC_LDR(lr, r9, Pico) + PIC_LDR(r10,r9, Pico32xMem) ldr r9,=OFS_PMEM32x_pal_native ldr r10, [r10] ldr r11, [lr, #OFS_Pico_est+OFS_EST_Draw2FB] @@ -361,8 +363,8 @@ \name: stmfd sp!, {r4-r11,lr} - ldr lr,=Pico - ldr r10,=Pico32xMem + PIC_LDR(lr, r9, Pico) + PIC_LDR(r10,r9, Pico32xMem) ldr r9,=OFS_PMEM32x_pal_native ldr r10, [r10] ldr r11, [lr, #OFS_Pico_est+OFS_EST_Draw2FB] diff --git a/pico/arm_features.h b/pico/arm_features.h index fdec5229..4b456f45 100644 --- a/pico/arm_features.h +++ b/pico/arm_features.h @@ -49,4 +49,36 @@ #endif +// indexed branch (XB) via branch table (BT) +#ifdef __PIC__ +#define PIC_XB(c,r,s) add##c pc, r, s +#define PIC_BT(a) b a +#else +#define PIC_XB(c,r,s) ldr##c pc, [pc, r, s] +#define PIC_BT(a) .word a +#endif + +// load data address (LDR) either via literal pool or via GOT +#ifdef __PIC__ +// can't use pool loads since ldr= only allows symbol or constants, not expr :-( +#define PIC_LDR_INIT() \ + .ifndef PIC_LDR_DEF; PIC_LDR_DEF=1; \ + .macro pic_ldr r t a; \ + ldr \r, [pc, $.LD\@-.-8]; \ + ldr \t, [pc, $.LD\@-.-4]; \ + .LP\@:add \r, pc; \ + ldr \r, [\r, \t]; \ + add pc, $4; \ + .LD\@:.word _GLOBAL_OFFSET_TABLE_-.LP\@-8; \ + .word \a(GOT); \ + .endm; \ + .endif; +#define PIC_LDR(r,t,a) \ + pic_ldr r, t, a +#else +#define PIC_LDR_INIT() +#define PIC_LDR(r,t,a) \ + ldr r, =a +#endif + #endif /* __ARM_FEATURES_H__ */ diff --git a/pico/cd/memory_arm.S b/pico/cd/memory_arm.S index 04920b62..95ad09ff 100644 --- a/pico/cd/memory_arm.S +++ b/pico/cd/memory_arm.S @@ -6,6 +6,7 @@ @* See COPYING file in the top-level directory. @* +#include "../arm_features.h" #include "../pico_int_offs.h" .equiv PCM_STEP_SHIFT, 11 @@ -65,6 +66,7 @@ .extern PicoWrite16_io .extern m68k_comm_check + PIC_LDR_INIT() @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@ -73,16 +75,16 @@ @ r0=addr[in,out], r1,r2=tmp .macro cell_map ands r1, r0, #0x01c000 - ldrne pc, [pc, r1, lsr #12] - beq 0f @ most common? - .long 0f - .long 0f - .long 0f - .long 0f - .long 1f - .long 1f - .long 2f - .long 3f + PIC_XB(ne ,r1, lsr #12) + b 0f @ most common? + PIC_BT(0f) + PIC_BT(0f) + PIC_BT(0f) + PIC_BT(0f) + PIC_BT(1f) + PIC_BT(1f) + PIC_BT(2f) + PIC_BT(3f) 1: @ x16 cells and r1, r0, #0x7e00 @ col and r2, r0, #0x01fc @ row @@ -128,7 +130,7 @@ PicoReadM68k8_cell1: @ 0x220000 - 0x23ffff, cell arranged mov r3, #0x0e0000 0: cell_map - ldr r1, =Pico + PIC_LDR(r1, r2, Pico) add r0, r0, r3 ldr r1, [r1, #OFS_Pico_rom] @ Pico.mcd (used everywhere) eor r0, r0, #1 @@ -141,26 +143,26 @@ PicoRead8_mcd_io: cmp r1, #0x2000 @ a120xx? bne PicoRead8_io - ldr r1, =Pico + PIC_LDR(r1, r2, Pico) and r0, r0, #0x3f ldr r1, [r1, #OFS_Pico_rom] @ Pico.mcd cmp r0, #0x0e - ldrlt pc, [pc, r0, lsl #2] + PIC_XB(lt ,r0, lsl #2) b m_m68k_read8_hi - .long m_m68k_read8_r00 - .long m_m68k_read8_r01 - .long m_m68k_read8_r02 - .long m_m68k_read8_r03 - .long m_m68k_read8_r04 - .long m_read_null @ unused bits - .long m_m68k_read8_r06 - .long m_m68k_read8_r07 - .long m_m68k_read8_r08 - .long m_m68k_read8_r09 - .long m_read_null @ reserved - .long m_read_null - .long m_m68k_read8_r0c - .long m_m68k_read8_r0d + PIC_BT(m_m68k_read8_r00) + PIC_BT(m_m68k_read8_r01) + PIC_BT(m_m68k_read8_r02) + PIC_BT(m_m68k_read8_r03) + PIC_BT(m_m68k_read8_r04) + PIC_BT(m_read_null) @ unused bits + PIC_BT(m_m68k_read8_r06) + PIC_BT(m_m68k_read8_r07) + PIC_BT(m_m68k_read8_r08) + PIC_BT(m_m68k_read8_r09) + PIC_BT(m_read_null) @ reserved + PIC_BT(m_read_null) + PIC_BT(m_m68k_read8_r0c) + PIC_BT(m_m68k_read8_r0d) m_m68k_read8_r00: add r1, r1, #0x110000 ldr r0, [r1, #0x30] @@ -238,7 +240,7 @@ PicoReadM68k16_cell1: @ 0x220000 - 0x23ffff, cell arranged mov r3, #0x0e0000 0: cell_map - ldr r1, =Pico + PIC_LDR(r1, r2, Pico) add r0, r0, r3 ldr r1, [r1, #OFS_Pico_rom] @ Pico.mcd bic r0, r0, #1 @@ -252,19 +254,19 @@ PicoRead16_mcd_io: bne PicoRead16_io m_m68k_read16_m68k_regs: - ldr r1, =Pico + PIC_LDR(r1, r2, Pico) and r0, r0, #0x3e ldr r1, [r1, #OFS_Pico_rom] @ Pico.mcd cmp r0, #0x0e - ldrlt pc, [pc, r0, lsl #1] + PIC_XB(lt ,r0, lsl #1) b m_m68k_read16_hi - .long m_m68k_read16_r00 - .long m_m68k_read16_r02 - .long m_m68k_read16_r04 - .long m_m68k_read16_r06 - .long m_m68k_read16_r08 - .long m_read_null @ reserved - .long m_m68k_read16_r0c + PIC_BT(m_m68k_read16_r00) + PIC_BT(m_m68k_read16_r02) + PIC_BT(m_m68k_read16_r04) + PIC_BT(m_m68k_read16_r06) + PIC_BT(m_m68k_read16_r08) + PIC_BT(m_read_null) @ reserved + PIC_BT(m_m68k_read16_r0c) m_m68k_read16_r00: add r1, r1, #0x110000 ldr r0, [r1, #0x30] @@ -329,7 +331,7 @@ PicoWriteM68k8_cell1: @ 0x220000 - 0x23ffff, cell arranged 0: mov r3, r1 cell_map - ldr r2, =Pico + PIC_LDR(r2, r1, Pico) add r0, r0, r12 ldr r2, [r2, #OFS_Pico_rom] @ Pico.mcd ldr r2, [r2] @@ -357,7 +359,7 @@ PicoWriteM68k16_cell1: @ 0x220000 - 0x23ffff, cell arranged 0: mov r3, r1 cell_map - ldr r1, =Pico + PIC_LDR(r1, r2, Pico) add r0, r0, r12 ldr r1, [r1, #OFS_Pico_rom] @ Pico.mcd bic r0, r0, #1 @@ -399,7 +401,7 @@ PicoReadS68k8_dec0: @ 0x080000 - 0x0bffff PicoReadS68k8_dec1: mov r3, #0x0a0000 @ + ^ / 2 0: - ldr r2, =Pico + PIC_LDR(r2, r1, Pico) eor r0, r0, #2 ldr r2, [r2, #OFS_Pico_rom] @ Pico.mcd movs r0, r0, lsr #1 @ +4-6 <<16 @@ -431,7 +433,7 @@ m_s68k_read8_regs: bx lr m_s68k_read8_comm: - ldr r1, =Pico + PIC_LDR(r1, r2, Pico) ldr r1, [r1, #OFS_Pico_rom] @ Pico.mcd add r1, r1, #0x110000 ldrb r1, [r1, r0] @@ -444,7 +446,7 @@ m_s68k_read8_pcm: bne m_read_null @ must not trash r3 and r12 - ldr r1, =Pico + PIC_LDR(r1, r2, Pico) bic r0, r0, #0xff0000 ldr r1, [r1, #OFS_Pico_rom] @ Pico.mcd mov r2, #0x110000 @@ -479,7 +481,7 @@ PicoReadS68k16_dec0: @ 0x080000 - 0x0bffff PicoReadS68k16_dec1: mov r3, #0x0a0000 @ + ^ / 2 0: - ldr r2, =Pico + PIC_LDR(r2, r1, Pico) eor r0, r0, #2 ldr r2, [r2, #OFS_Pico_rom] @ Pico.mcd mov r0, r0, lsr #1 @ +4-6 <<16 @@ -505,12 +507,11 @@ m_s68k_read16_regs: mov r0, #1 b cdc_host_r - @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ .macro m_s68k_write8_2M_decode - ldr r2, =Pico + PIC_LDR(r2, ip, Pico) eor r0, r0, #2 ldr r2, [r2, #OFS_Pico_rom] @ Pico.mcd movs r0, r0, lsr #1 @ +4-6 <<16 @@ -594,7 +595,7 @@ m_s68k_write8_pcm: bxlt lr m_s68k_write8_pcm_ram: - ldr r3, =Pico + PIC_LDR(r3, r2, Pico) bic r0, r0, #0x00e000 ldr r3, [r3, #OFS_Pico_rom] @ Pico.mcd mov r0, r0, lsr #1 @@ -608,12 +609,11 @@ m_s68k_write8_pcm_ram: strb r1, [r3, r0] bx lr - @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ .macro m_s68k_write16_2M_decode - ldr r2, =Pico + PIC_LDR(r2, ip, Pico) eor r0, r0, #2 ldr r2, [r2, #OFS_Pico_rom] @ Pico.mcd mov r0, r0, lsr #1 @ +4-6 <<16 @@ -694,7 +694,7 @@ m_s68k_write16_regs: bne s68k_reg_write16 m_s68k_write16_regs_spec: @ special case - ldr r2, =Pico + PIC_LDR(r2, r0, Pico) mov r0, #0x110000 ldr r2, [r2, #OFS_Pico_rom] @ Pico.mcd add r0, r0, #0x00000f @@ -707,7 +707,7 @@ m_s68k_write16_regs_spec: @ special case .global s68k_write16 s68k_read8: - ldr r3, =s68k_read8_map + PIC_LDR(r3, r2, s68k_read8_map) bic r0, r0, #0xff000000 mov r2, r0, lsr #16 ldr r3, [r3, r2, lsl #2] @@ -718,7 +718,7 @@ s68k_read8: bx r3 s68k_read16: - ldr r3, =s68k_read16_map + PIC_LDR(r3, r2, s68k_read16_map) bic r0, r0, #0xff000000 mov r2, r0, lsr #16 ldr r3, [r3, r2, lsl #2] @@ -729,7 +729,7 @@ s68k_read16: bx r3 s68k_read32: - ldr r3, =s68k_read16_map + PIC_LDR(r3, r2, s68k_read16_map) bic r0, r0, #0xff000000 mov r2, r0, lsr #16 ldr r3, [r3, r2, lsl #2] @@ -755,7 +755,7 @@ s68k_read32: bx lr s68k_write8: - ldr r3, =s68k_write8_map + PIC_LDR(r3, r2, s68k_write8_map) bic r0, r0, #0xff000000 mov r2, r0, lsr #16 ldr r3, [r3, r2, lsl #2] @@ -766,7 +766,7 @@ s68k_write8: bx r3 s68k_write16: - ldr r3, =s68k_write16_map + PIC_LDR(r3, r2, s68k_write16_map) bic r0, r0, #0xff000000 mov r2, r0, lsr #16 ldr r3, [r3, r2, lsl #2] @@ -777,7 +777,7 @@ s68k_write16: bx r3 s68k_write32: - ldr r3, =s68k_write16_map + PIC_LDR(r3, r2, s68k_write16_map) bic r0, r0, #0xff000000 mov r2, r0, lsr #16 ldr r3, [r3, r2, lsl #2] diff --git a/pico/memory_arm.S b/pico/memory_arm.S index 333780c1..ebeb346b 100644 --- a/pico/memory_arm.S +++ b/pico/memory_arm.S @@ -7,6 +7,7 @@ * See COPYING file in the top-level directory. */ +#include "arm_features.h" #include "pico_int_offs.h" .equ SRR_MAPPED, (1 << 0) @@ -24,8 +25,10 @@ .global PicoWrite8_io .global PicoWrite16_io + PIC_LDR_INIT() + PicoRead8_sram: @ u32 a - ldr r3, =Pico + PIC_LDR(r3, r1, Pico) ldr r1, [r3, #OFS_Pico_sv_end] cmp r0, r1 bgt m_read8_nosram @@ -74,7 +77,7 @@ m_read8_not_io: cmp r2, #0x1000 bne PicoRead8_32x - ldr r3, =Pico + PIC_LDR(r3, r1, Pico) mov r1, r0 ldr r0, [r3, #OFS_Pico_m_rotate] add r0, r0, #1 @@ -97,7 +100,7 @@ m_read8_not_io: @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ PicoRead16_sram: @ u32 a, u32 d - ldr r3, =Pico + PIC_LDR(r3, r1, Pico) ldr r1, [r3, #OFS_Pico_sv_end] cmp r0, r1 bgt m_read16_nosram @@ -142,7 +145,7 @@ m_read16_not_io: cmp r2, #0x1000 bne PicoRead16_32x - ldr r3, =Pico + PIC_LDR(r3, r2, Pico) and r2, r0, #0xff00 ldr r0, [r3, #OFS_Pico_m_rotate] add r0, r0, #1 @@ -184,7 +187,7 @@ m_write8_not_z80ctl: eor r2, r2, #0x003000 eors r2, r2, #0x0000f1 bne PicoWrite8_32x - ldr r3, =Pico + PIC_LDR(r3, r2, Pico) ldrb r2, [r3, #OFS_Pico_m_sram_reg] and r1, r1, #(SRR_MAPPED|SRR_READONLY) bic r2, r2, #(SRR_MAPPED|SRR_READONLY) @@ -214,7 +217,7 @@ m_write16_not_z80ctl: eor r2, r2, #0x003000 eors r2, r2, #0x0000f0 bne PicoWrite16_32x - ldr r3, =Pico + PIC_LDR(r3, r2, Pico) ldrb r2, [r3, #OFS_Pico_m_sram_reg] and r1, r1, #(SRR_MAPPED|SRR_READONLY) bic r2, r2, #(SRR_MAPPED|SRR_READONLY) @@ -228,7 +231,7 @@ m_write16_not_z80ctl: .global m68k_write16 m68k_read8: - ldr r3, =m68k_read8_map + PIC_LDR(r3, r2, m68k_read8_map) bic r0, r0, #0xff000000 mov r2, r0, lsr #16 ldr r3, [r3, r2, lsl #2] @@ -239,7 +242,7 @@ m68k_read8: bx r3 m68k_read16: - ldr r3, =m68k_read16_map + PIC_LDR(r3, r2, m68k_read16_map) bic r0, r0, #0xff000000 mov r2, r0, lsr #16 ldr r3, [r3, r2, lsl #2] @@ -250,7 +253,7 @@ m68k_read16: bx r3 m68k_read32: - ldr r3, =m68k_read16_map + PIC_LDR(r3, r2, m68k_read16_map) bic r0, r0, #0xff000000 mov r2, r0, lsr #16 ldr r3, [r3, r2, lsl #2] @@ -276,7 +279,7 @@ m68k_read32: bx lr m68k_write8: - ldr r3, =m68k_write8_map + PIC_LDR(r3, r2, m68k_write8_map) bic r0, r0, #0xff000000 mov r2, r0, lsr #16 ldr r3, [r3, r2, lsl #2] @@ -287,7 +290,7 @@ m68k_write8: bx r3 m68k_write16: - ldr r3, =m68k_write16_map + PIC_LDR(r3, r2, m68k_write16_map) bic r0, r0, #0xff000000 mov r2, r0, lsr #16 ldr r3, [r3, r2, lsl #2] @@ -298,7 +301,7 @@ m68k_write16: bx r3 m68k_write32: - ldr r3, =m68k_write16_map + PIC_LDR(r3, r2, m68k_write16_map) bic r0, r0, #0xff000000 mov r2, r0, lsr #16 ldr r3, [r3, r2, lsl #2] diff --git a/pico/sound/ym2612_arm.s b/pico/sound/ym2612_arm.S similarity index 95% rename from pico/sound/ym2612_arm.s rename to pico/sound/ym2612_arm.S index 9c436d41..7d4c609a 100644 --- a/pico/sound/ym2612_arm.s +++ b/pico/sound/ym2612_arm.S @@ -12,6 +12,8 @@ @ vim:filetype=armasm +#include "../arm_features.h" + .equiv SLOT1, 0 .equiv SLOT2, 2 .equiv SLOT3, 1 @@ -34,6 +36,7 @@ .text .align 2 + PIC_LDR_INIT() @ r5=slot, r1=eg_cnt, trashes: r0,r2,r3 @ writes output to routp, but only if vol_out changes @@ -556,8 +559,8 @@ upd_algo0: stmfd sp!, {r4-r10,lr} mov lr, r0 - ldr r3, =ym_sin_tab - ldr r5, =ym_tl_tab + PIC_LDR(r3, ip, ym_sin_tab) + PIC_LDR(r5, ip, ym_tl_tab) ldmia lr, {r6-r7} ldr r10, [lr, #0x54] ldr r12, [lr, #0x4c] @@ -573,8 +576,8 @@ upd_algo1: stmfd sp!, {r4-r10,lr} mov lr, r0 - ldr r3, =ym_sin_tab - ldr r5, =ym_tl_tab + PIC_LDR(r3, ip, ym_sin_tab) + PIC_LDR(r5, ip, ym_tl_tab) ldmia lr, {r6-r7} ldr r10, [lr, #0x54] ldr r12, [lr, #0x4c] @@ -590,8 +593,8 @@ upd_algo2: stmfd sp!, {r4-r10,lr} mov lr, r0 - ldr r3, =ym_sin_tab - ldr r5, =ym_tl_tab + PIC_LDR(r3, ip, ym_sin_tab) + PIC_LDR(r5, ip, ym_tl_tab) ldmia lr, {r6-r7} ldr r10, [lr, #0x54] ldr r12, [lr, #0x4c] @@ -607,8 +610,8 @@ upd_algo3: stmfd sp!, {r4-r10,lr} mov lr, r0 - ldr r3, =ym_sin_tab - ldr r5, =ym_tl_tab + PIC_LDR(r3, ip, ym_sin_tab) + PIC_LDR(r5, ip, ym_tl_tab) ldmia lr, {r6-r7} ldr r10, [lr, #0x54] ldr r12, [lr, #0x4c] @@ -624,8 +627,8 @@ upd_algo4: stmfd sp!, {r4-r10,lr} mov lr, r0 - ldr r3, =ym_sin_tab - ldr r5, =ym_tl_tab + PIC_LDR(r3, ip, ym_sin_tab) + PIC_LDR(r5, ip, ym_tl_tab) ldmia lr, {r6-r7} ldr r10, [lr, #0x54] ldr r12, [lr, #0x4c] @@ -641,8 +644,8 @@ upd_algo5: stmfd sp!, {r4-r10,lr} mov lr, r0 - ldr r3, =ym_sin_tab - ldr r5, =ym_tl_tab + PIC_LDR(r3, ip, ym_sin_tab) + PIC_LDR(r5, ip, ym_tl_tab) ldmia lr, {r6-r7} ldr r10, [lr, #0x54] ldr r12, [lr, #0x4c] @@ -658,8 +661,8 @@ upd_algo6: stmfd sp!, {r4-r10,lr} mov lr, r0 - ldr r3, =ym_sin_tab - ldr r5, =ym_tl_tab + PIC_LDR(r3, ip, ym_sin_tab) + PIC_LDR(r5, ip, ym_tl_tab) ldmia lr, {r6-r7} ldr r10, [lr, #0x54] ldr r12, [lr, #0x4c] @@ -675,8 +678,8 @@ upd_algo7: stmfd sp!, {r4-r10,lr} mov lr, r0 - ldr r3, =ym_sin_tab - ldr r5, =ym_tl_tab + PIC_LDR(r3, ip, ym_sin_tab) + PIC_LDR(r5, ip, ym_tl_tab) ldmia lr, {r6-r7} ldr r10, [lr, #0x54] ldr r12, [lr, #0x4c] @@ -692,8 +695,8 @@ upd_slot1: stmfd sp!, {r4-r10,lr} mov lr, r0 - ldr r3, =ym_sin_tab - ldr r5, =ym_tl_tab + PIC_LDR(r3, ip, ym_sin_tab) + PIC_LDR(r5, ip, ym_tl_tab) ldmia lr, {r6-r7} ldr r10, [lr, #0x54] ldr r12, [lr, #0x4c] @@ -781,7 +784,7 @@ eg_done: beq crl_loop @ -- SLOT1 -- - ldr r3, =ym_tl_tab + PIC_LDR(r3, r2, ym_tl_tab) @ lr=context, r12=pack (stereo, lastchan, disabled, lfo_enabled | pan_r, pan_l, ams[2] | AMmasks[4] | FB[4] | lfo_ampm[16]) @ r0-r2=scratch, r3=tl_tab, r5=scratch, r6-r7=vol_out[4], r10=op1_out @@ -789,16 +792,16 @@ eg_done: @ -- SLOT2+ -- and r0, r4, #7 - ldr pc, [pc, r0, lsl #2] + PIC_XB(,r0, lsl #2) nop - .word crl_algo0 - .word crl_algo1 - .word crl_algo2 - .word crl_algo3 - .word crl_algo4 - .word crl_algo5 - .word crl_algo6 - .word crl_algo7 + PIC_BT(crl_algo0) + PIC_BT(crl_algo1) + PIC_BT(crl_algo2) + PIC_BT(crl_algo3) + PIC_BT(crl_algo4) + PIC_BT(crl_algo5) + PIC_BT(crl_algo6) + PIC_BT(crl_algo7) .pool crl_algo0: diff --git a/platform/common/common.mak b/platform/common/common.mak index 5afc0171..024ff75f 100644 --- a/platform/common/common.mak +++ b/platform/common/common.mak @@ -59,7 +59,7 @@ SRCS_COMMON += $(R)pico/memory_arm.S endif ifeq "$(asm_ym2612)" "1" DEFINES += _ASM_YM2612_C -SRCS_COMMON += $(R)pico/sound/ym2612_arm.s +SRCS_COMMON += $(R)pico/sound/ym2612_arm.S endif ifeq "$(asm_misc)" "1" DEFINES += _ASM_MISC_C @@ -148,7 +148,7 @@ endif # --- Z80 --- ifeq "$(use_drz80)" "1" DEFINES += _USE_DRZ80 -SRCS_COMMON += $(R)cpu/DrZ80/drz80.s +SRCS_COMMON += $(R)cpu/DrZ80/drz80.S endif # ifeq "$(use_cz80)" "1" diff --git a/platform/common/dismips.c b/platform/common/dismips.c index 61c70bfe..19c0b427 100644 --- a/platform/common/dismips.c +++ b/platform/common/dismips.c @@ -1,5 +1,5 @@ /* - * very basic mips disassembler for MIPS32/MIPS64 Release 1, only for picodrive + * very basic mips disassembler for MIPS32/MIPS64 Release 2, only for picodrive * Copyright (C) 2019 kub * * This work is licensed under the terms of MAME license. From d0eab7dae887dd75a33cfbb4c5a60f535d7fece3 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 7 Dec 2019 21:40:24 +0200 Subject: [PATCH 0246/1110] deal with some gcc7+ warnings --- cpu/cyclone | 2 +- pico/cd/cd_image.c | 4 ++++ platform/common/emu.c | 4 ++++ platform/libpicofe | 2 +- 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/cpu/cyclone b/cpu/cyclone index 5fc93bdd..94a9d9a8 160000 --- a/cpu/cyclone +++ b/cpu/cyclone @@ -1 +1 @@ -Subproject commit 5fc93bddb71461abb7619cf506d6f15ba8a675f3 +Subproject commit 94a9d9a888e72cbfa4db12113cd113cf5e154f7f diff --git a/pico/cd/cd_image.c b/pico/cd/cd_image.c index ae2ef0cd..5638c9be 100644 --- a/pico/cd/cd_image.c +++ b/pico/cd/cd_image.c @@ -11,6 +11,10 @@ #include "cdd.h" #include "cue.h" +#if defined(__GNUC__) && __GNUC__ >= 7 +#pragma GCC diagnostic ignored "-Wformat-truncation" +#endif + static int handle_mp3(const char *fname, int index) { track_t *track = &cdd.toc.tracks[index]; diff --git a/platform/common/emu.c b/platform/common/emu.c index 0a9f0890..7385e159 100644 --- a/platform/common/emu.c +++ b/platform/common/emu.c @@ -28,6 +28,10 @@ #include #include +#if defined(__GNUC__) && __GNUC__ >= 7 +#pragma GCC diagnostic ignored "-Wformat-truncation" +#endif + #ifndef _WIN32 #define PATH_SEP "/" #define PATH_SEP_C '/' diff --git a/platform/libpicofe b/platform/libpicofe index f8cd6a08..795b71c5 160000 --- a/platform/libpicofe +++ b/platform/libpicofe @@ -1 +1 @@ -Subproject commit f8cd6a082bb9c228397a0436f28818b74d8e9636 +Subproject commit 795b71c571518b310a22138141bb6d1cd08d85f6 From 90b1c9db91ccede5fa7a561548376edcf440e7dc Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 11 Dec 2019 20:16:14 +0100 Subject: [PATCH 0247/1110] sh2 drc: cleanup, fix for drc crash, for mips code emitter --- cpu/drc/emit_mips.c | 110 ++++++++++++++++++------------------ cpu/sh2/compiler.c | 50 ++++++++++++---- cpu/sh2/compiler.h | 31 +++++----- cpu/sh2/sh2.h | 28 ++++----- pico/arm_features.h | 12 ++-- pico/pico_port.h | 2 + platform/common/disarm.c | 4 +- platform/common/disarm.h | 2 +- platform/common/dismips.c | 11 +++- platform/common/dismips.h | 2 +- platform/common/host_dasm.c | 4 +- tools/mkoffsets.sh | 15 +++-- 12 files changed, 151 insertions(+), 120 deletions(-) diff --git a/cpu/drc/emit_mips.c b/cpu/drc/emit_mips.c index 453801f1..765986a6 100644 --- a/cpu/drc/emit_mips.c +++ b/cpu/drc/emit_mips.c @@ -20,6 +20,9 @@ #define STATIC_SH2_REGS { SHR_SR,22 , SHR_R0,21 , SHR_R0+1,20 } // NB: the ubiquitous JZ74[46]0 uses MIPS32 Release 1, a slight MIPS II superset +#ifndef __mips_isa_rev +#define __mips_isa_rev 1 // surprisingly not always defined +#endif // registers usable for user code: r1-r25, others reserved or special #define Z0 0 // zero register @@ -333,32 +336,49 @@ static int emith_is_b(u32 op) // B { return ((op>>26) & 074) == OP_BEQ || ((op>>26) == OP__RT && ((op>>16) & 036) == RT_BLTZ); } // register usage for dependency evaluation XXX better do this as in emit_arm? -static uint64_t emith_has_rs[3] = // OP__FN, OP__RT, others - { 0x00fffffffffa0ff0ULL, 0x000fff0fUL, 0xffffffff0f007ff0ULL }; -static uint64_t emith_has_rt[3] = // OP__FN, OP__RT, others - { 0xff00fffffff00cffULL, 0x00000000UL, 0x8000ff0000000030ULL }; -static uint64_t emith_has_rd[3] = // OP__FN, OP__RT, others (rt instead of rd) - { 0xff00fffffff50fffULL, 0x00000000UL, 0x119100ff0f00ff00ULL }; +static uint64_t emith_has_rs[5] = // OP__FN1-3, OP__RT, others + { 0x005ffcffffda0fd2ULL, 0x0000003300000037ULL, 0x00000000000000ffULL, + 0x800f5f0fUL, 0xf7ffffff0ff07ff0ULL }; +static uint64_t emith_has_rt[5] = // OP__FN1-3, OP__RT, others + { 0xdd5ffcffffd00cddULL, 0x0000000000000037ULL, 0x0000001100000000ULL, + 0x00000000UL, 0x80007f440c300030ULL }; +static uint64_t emith_has_rd[5] = // OP__FN1-3, OP__RT, others(rt instead of rd) + { 0xdd00fcff00d50edfULL, 0x0000003300000004ULL, 0x08000011000000ffULL, + 0x00000000UL, 0x119100ff0f00ff00ULL }; #define emith_has_(rx,ix,op,sa,m) \ (emith_has_##rx[ix] & (1ULL << (((op)>>(sa)) & (m)))) static int emith_rs(u32 op) { if ((op>>26) == OP__FN) return emith_has_(rs,0,op, 0,0x3f) ? (op>>21)&0x1f : 0; + if ((op>>26) == OP__FN2) + return emith_has_(rs,1,op, 0,0x3f) ? (op>>21)&0x1f : 0; + if ((op>>26) == OP__FN3) + return emith_has_(rs,2,op, 0,0x3f) ? (op>>21)&0x1f : 0; if ((op>>26) == OP__RT) - return emith_has_(rs,1,op,16,0x1f) ? (op>>21)&0x1f : 0; - return emith_has_(rs,2,op,26,0x3f) ? (op>>21)&0x1f : 0; + return emith_has_(rs,3,op,16,0x1f) ? (op>>21)&0x1f : 0; + return emith_has_(rs,4,op,26,0x3f) ? (op>>21)&0x1f : 0; } static int emith_rt(u32 op) { if ((op>>26) == OP__FN) return emith_has_(rt,0,op, 0,0x3f) ? (op>>16)&0x1f : 0; + if ((op>>26) == OP__FN2) + return emith_has_(rt,1,op, 0,0x3f) ? (op>>16)&0x1f : 0; + if ((op>>26) == OP__FN3) + return emith_has_(rt,2,op, 0,0x3f) ? (op>>16)&0x1f : 0; if ((op>>26) == OP__RT) return 0; - return emith_has_(rt,2,op,26,0x3f) ? (op>>16)&0x1f : 0; + return emith_has_(rt,4,op,26,0x3f) ? (op>>16)&0x1f : 0; } static int emith_rd(u32 op) - { int ret = emith_has_(rd,2,op,26,0x3f) ? (op>>16)&0x1f :-1; + { int ret = emith_has_(rd,4,op,26,0x3f) ? (op>>16)&0x1f :-1; if ((op>>26) == OP__FN) ret = emith_has_(rd,0,op, 0,0x3f) ? (op>>11)&0x1f :-1; + if ((op>>26) == OP__FN2) + ret = emith_has_(rd,1,op, 0,0x3f) ? (op>>11)&0x1f :-1; + if ((op>>26) == OP__FN3 && (op&0x3f) == FN3_BSHFL) + ret = emith_has_(rd,2,op, 0,0x3f) ? (op>>11)&0x1f :-1; + if ((op>>26) == OP__FN3 && (op&0x3f) != FN3_BSHFL) + ret = emith_has_(rd,2,op, 0,0x3f) ? (op>>16)&0x1f :-1; if ((op>>26) == OP__RT) ret = -1; return (ret ?: -1); // Z0 doesn't have dependencies @@ -970,29 +990,23 @@ static void emith_log_imm(int op, int rd, int rs, u32 imm) #define emith_asr(d, s, cnt) \ EMIT(MIPS_ASR_IMM(d, s, cnt)) -#if defined(__mips_isa_rev) && __mips_isa_rev >= 2 -#define emith_ror(d, s, cnt) \ - EMIT(MIPS_ROR_IMM(d, s, cnt)) -#else #define emith_ror(d, s, cnt) do { \ - EMIT(MIPS_LSL_IMM(AT, s, 32-(cnt))); \ - EMIT(MIPS_LSR_IMM(d, s, cnt)); \ - EMIT(MIPS_OR_REG(d, d, AT)); \ + if (__mips_isa_rev < 2) { \ + EMIT(MIPS_LSL_IMM(AT, s, 32-(cnt))); \ + EMIT(MIPS_LSR_IMM(d, s, cnt)); \ + EMIT(MIPS_OR_REG(d, d, AT)); \ + } else EMIT(MIPS_ROR_IMM(d, s, cnt)); \ } while (0) -#endif #define emith_ror_c(cond, d, s, cnt) \ emith_ror(d, s, cnt) -#if defined(__mips_isa_rev) && __mips_isa_rev >= 2 -#define emith_rol(d, s, cnt) \ - EMIT(MIPS_ROR_IMM(d, s, 32-(cnt))) -#else #define emith_rol(d, s, cnt) do { \ - EMIT(MIPS_LSR_IMM(AT, s, 32-(cnt))); \ - EMIT(MIPS_LSL_IMM(d, s, cnt)); \ - EMIT(MIPS_OR_REG(d, d, AT)); \ + if (__mips_isa_rev < 2) { \ + EMIT(MIPS_LSR_IMM(AT, s, 32-(cnt))); \ + EMIT(MIPS_LSL_IMM(d, s, cnt)); \ + EMIT(MIPS_OR_REG(d, d, AT)); \ + } else EMIT(MIPS_ROR_IMM(d, s, 32-(cnt))); \ } while (0) -#endif #define emith_rorc(d) do { \ emith_lsr(d, d, 1); \ @@ -1082,13 +1096,11 @@ static void emith_log_imm(int op, int rd, int rs, u32 imm) } while (0) // signed/unsigned extend -#if defined(__mips_isa_rev) && __mips_isa_rev >= 2 -#define emith_clear_msb(d, s, count) /* bits to clear */ \ - EMIT(MIPS_EXT_IMM(d, s, 0, 32-(count))) -#else #define emith_clear_msb(d, s, count) /* bits to clear */ do { \ u32 t; \ - if ((count) >= 16) { \ + if (__mips_isa_rev >= 2) \ + EMIT(MIPS_EXT_IMM(d, s, 0, 32-(count))); \ + else if ((count) >= 16) { \ t = (count) - 16; \ t = 0xffff >> t; \ emith_and_r_r_imm(d, s, t); \ @@ -1097,27 +1109,19 @@ static void emith_log_imm(int op, int rd, int rs, u32 imm) emith_lsr(d, d, count); \ } \ } while (0) -#endif #define emith_clear_msb_c(cond, d, s, count) \ emith_clear_msb(d, s, count) -#if defined(__mips_isa_rev) && __mips_isa_rev >= 2 #define emith_sext(d, s, count) /* bits to keep */ do { \ - if (count == 8) \ + if (__mips_isa_rev >= 2 && count == 8) \ EMIT(MIPS_SEB_REG(d, s)); \ - else if (count == 16) \ + else if (__mips_isa_rev >= 2 && count == 16) \ EMIT(MIPS_SEH_REG(d, s)); \ else { \ emith_lsl(d, s, 32-(count)); \ emith_asr(d, d, 32-(count)); \ } \ } while (0) -#else -#define emith_sext(d, s, count) /* bits to keep */ do { \ - emith_lsl(d, s, 32-(count)); \ - emith_asr(d, d, 32-(count)); \ -} while (0) -#endif // multiply Rd = Rn*Rm (+ Ra); NB: next 2 insns after MFLO/MFHI mustn't be MULT static u8 *last_lohi; @@ -1716,26 +1720,20 @@ static int emith_cond_check(int cond, int *r) EMITH_SJMP_END(DCOND_EQ); \ } while (0) -#if defined(__mips_isa_rev) && __mips_isa_rev >= 2 -#define emith_write_sr(sr, srcr) \ - EMIT(MIPS_INS_IMM(sr, srcr, 0, 10)) -#else #define emith_write_sr(sr, srcr) do { \ - emith_lsr(sr, sr , 10); emith_lsl(sr, sr, 10); \ - emith_lsl(AT, srcr, 22); emith_lsr(AT, AT, 22); \ - emith_or_r_r(sr, AT); \ + if (__mips_isa_rev < 2) { \ + emith_lsr(sr, sr , 10); emith_lsl(sr, sr, 10); \ + emith_lsl(AT, srcr, 22); emith_lsr(AT, AT, 22); \ + emith_or_r_r(sr, AT); \ + } else EMIT(MIPS_INS_IMM(sr, srcr, 0, 10)); \ } while (0) -#endif -#if defined(__mips_isa_rev) && __mips_isa_rev >= 2 -#define emith_carry_to_t(sr, is_sub) \ - EMIT(MIPS_INS_IMM(sr, FC, 0, 1)) -#else #define emith_carry_to_t(sr, is_sub) do { \ - emith_and_r_imm(sr, 0xfffffffe); \ - emith_or_r_r(sr, FC); \ + if (__mips_isa_rev < 2) { \ + emith_and_r_imm(sr, 0xfffffffe); \ + emith_or_r_r(sr, FC); \ + } else EMIT(MIPS_INS_IMM(sr, FC, 0, 1)); \ } while (0) -#endif #define emith_t_to_carry(sr, is_sub) do { \ emith_and_r_r_imm(FC, sr, 1); \ diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 57bfc212..ca9a0550 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -440,7 +440,7 @@ static int rcache_get_tmp(void); static void rcache_free_tmp(int hr); // Note: Register assignment goes by ABI convention. Caller save registers are -// TEMPORARY, the others are PRESERVED. Unusable regs are omitted. +// TEMPORARY, callee save registers are PRESERVED. Unusable regs are omitted. // there must be at least the free (not context or statically mapped) amount of // PRESERVED/TEMPORARY registers used by handlers in worst case (currently 4). // there must be at least 3 PARAM, and PARAM+TEMPORARY must be at least 4. @@ -496,6 +496,11 @@ static void REGPARM(2) (*sh2_drc_write8)(u32 a, u32 d); static void REGPARM(2) (*sh2_drc_write16)(u32 a, u32 d); static void REGPARM(2) (*sh2_drc_write32)(u32 a, u32 d); +#ifdef DRC_SR_REG +void REGPARM(1) (*sh2_drc_save_sr)(SH2 *sh2); +void REGPARM(1) (*sh2_drc_restore_sr)(SH2 *sh2); +#endif + // flags for memory access #define MF_SIZEMASK 0x03 // size of access #define MF_POSTINCR 0x10 // post increment (for read_rr) @@ -1578,7 +1583,7 @@ static void rcache_unmap_vreg(int x) FOR_ALL_BITS_SET_DO(cache_regs[x].gregs, i, if (guest_regs[i].flags & GRF_DIRTY) { // if a dirty reg is unmapped save its value to context - if ((~rcache_regs_discard | rcache_regs_now) & (1 << i)) + if (~rcache_regs_discard & (1 << i)) emith_ctx_write(cache_regs[x].hreg, i * 4); guest_regs[i].flags &= ~GRF_DIRTY; } @@ -3107,6 +3112,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) op_flags[i+1] |= OF_BTARGET; // RTE entrypoint in case of SR.IMASK change // unify T and SR since rcache doesn't know about "virtual" guest regs if (ops[i].source & BITMASK1(SHR_T)) ops[i].source |= BITMASK1(SHR_SR); + if (ops[i].dest & BITMASK1(SHR_T)) ops[i].source |= BITMASK1(SHR_SR); if (ops[i].dest & BITMASK1(SHR_T)) ops[i].dest |= BITMASK1(SHR_SR); #if LOOP_DETECTION // loop types detected: @@ -5028,7 +5034,6 @@ static void sh2_generate_utils(void) emith_move_r_r_ptr(arg0, CONTEXT_REG); emith_ctx_read(arg1, offsetof(SH2, drc_tmp)); // tcache_id emith_call(sh2_translate); -/* just after lookup function, jump to address returned */ emith_tst_r_r_ptr(RET_REG, RET_REG); EMITH_SJMP_START(DCOND_EQ); emith_jump_reg_c(DCOND_NE, RET_REG); @@ -5057,8 +5062,8 @@ static void sh2_generate_utils(void) emith_ctx_read(arg2, offsetof(SH2, rts_cache_idx)); emith_add_r_r_r_lsl_ptr(arg1, CONTEXT_REG, arg2, 0); emith_read_r_r_offs(arg3, arg1, offsetof(SH2, rts_cache)); -#if (DRC_DEBUG & 128) emith_cmp_r_r(arg0, arg3); +#if (DRC_DEBUG & 128) EMITH_SJMP_START(DCOND_EQ); emith_move_r_ptr_imm(arg3, (uptr)&rcmiss); emith_read_r_r_offs_c(DCOND_NE, arg1, arg3, 0); @@ -5067,7 +5072,6 @@ static void sh2_generate_utils(void) emith_jump_cond(DCOND_NE, sh2_drc_dispatcher); EMITH_SJMP_END(DCOND_EQ); #else - emith_cmp_r_r(arg0, arg3); emith_jump_cond(DCOND_NE, sh2_drc_dispatcher); #endif emith_read_r_r_offs_ptr(arg0, arg1, offsetof(SH2, rts_cache) + sizeof(void *)); @@ -5109,7 +5113,7 @@ static void sh2_generate_utils(void) emith_call(p32x_sh2_write32); // XXX: use sh2_drc_write32? // push PC rcache_get_reg_arg(0, SHR_SP, NULL); - emith_ctx_read(arg1, SHR_PC * 4); + rcache_get_reg_arg(1, SHR_PC, NULL); emith_move_r_r_ptr(arg2, CONTEXT_REG); rcache_invalidate_tmp(); emith_call(p32x_sh2_write32); @@ -5143,6 +5147,24 @@ static void sh2_generate_utils(void) emith_jump(sh2_drc_dispatcher); emith_flush(); +#ifdef DRC_SR_REG + // sh2_drc_save_sr(SH2 *sh2) + sh2_drc_save_sr = (void *)tcache_ptr; + tmp = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); + emith_write_r_r_offs(tmp, arg0, SHR_SR * 4); + rcache_invalidate(); + emith_ret(); + emith_flush(); + + // sh2_drc_restore_sr(SH2 *sh2) + sh2_drc_restore_sr = (void *)tcache_ptr; + tmp = rcache_get_reg(SHR_SR, RC_GR_WRITE, NULL); + emith_read_r_r_offs(tmp, arg0, SHR_SR * 4); + rcache_flush(); + emith_ret(); + emith_flush(); +#endif + #ifdef PDB_NET // debug #define MAKE_READ_WRAPPER(func) { \ @@ -5204,6 +5226,10 @@ static void sh2_generate_utils(void) host_dasm_new_symbol(sh2_drc_read8_poll); host_dasm_new_symbol(sh2_drc_read16_poll); host_dasm_new_symbol(sh2_drc_read32_poll); +#ifdef DRC_SR_REG + host_dasm_new_symbol(sh2_drc_save_sr); + host_dasm_new_symbol(sh2_drc_restore_sr); +#endif #endif #if DRC_DEBUG @@ -5273,12 +5299,12 @@ static void sh2_smc_rm_blocks(u32 a, int len, int tcache_id, u32 shift) #endif } -void sh2_drc_wcheck_ram(unsigned int a, unsigned len, SH2 *sh2) +void sh2_drc_wcheck_ram(u32 a, unsigned len, SH2 *sh2) { sh2_smc_rm_blocks(a, len, 0, SH2_DRCBLK_RAM_SHIFT); } -void sh2_drc_wcheck_da(unsigned int a, unsigned len, SH2 *sh2) +void sh2_drc_wcheck_da(u32 a, unsigned len, SH2 *sh2) { sh2_smc_rm_blocks(a, len, 1 + sh2->is_slave, SH2_DRCBLK_DA_SHIFT); } @@ -5295,7 +5321,7 @@ int sh2_execute_drc(SH2 *sh2c, int cycles) sh2_drc_entry(sh2c); // TODO: irq cycles - ret_cycles = (signed int)sh2c->sr >> 12; + ret_cycles = (int32_t)sh2c->sr >> 12; if (ret_cycles > 0) dbg(1, "warning: drc returned with cycles: %d", ret_cycles); @@ -5777,6 +5803,7 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, break; case 1: // DIV0U 0000000000011001 CHECK_UNHANDLED_BITS(0xf00, undefined); + opd->source = BITMASK1(SHR_SR); opd->dest = BITMASK2(SHR_SR, SHR_T); break; case 2: // MOVT Rn 0000nnnn00101001 @@ -5877,7 +5904,7 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, opd->dest = BITMASK2(GET_Rn(), SHR_MEM); break; case 0x07: // DIV0S Rm,Rn 0010nnnnmmmm0111 - opd->source = BITMASK2(GET_Rm(), GET_Rn()); + opd->source = BITMASK3(SHR_SR, GET_Rm(), GET_Rn()); opd->dest = BITMASK2(SHR_SR, SHR_T); break; case 0x08: // TST Rm,Rn 0010nnnnmmmm1000 @@ -6470,6 +6497,9 @@ end: last_btarget = 0; op = 0; // delay/poll insns counter for (i = 0, pc = base_pc; i < i_end; i++, pc += 2) { + int null; + if ((op_flags[i] & OF_BTARGET) && dr_get_entry(pc, is_slave, &null)) + break; // branch target already compiled opd = &ops[i]; crc += FETCH_OP(pc); diff --git a/cpu/sh2/compiler.h b/cpu/sh2/compiler.h index 44620f48..804f2a70 100644 --- a/cpu/sh2/compiler.h +++ b/cpu/sh2/compiler.h @@ -1,7 +1,7 @@ int sh2_drc_init(SH2 *sh2); void sh2_drc_finish(SH2 *sh2); -void sh2_drc_wcheck_ram(unsigned int a, unsigned len, SH2 *sh2); -void sh2_drc_wcheck_da(unsigned int a, unsigned len, SH2 *sh2); +void sh2_drc_wcheck_ram(uint32_t a, unsigned len, SH2 *sh2); +void sh2_drc_wcheck_da(uint32_t a, unsigned len, SH2 *sh2); #ifdef DRC_SH2 void sh2_drc_mem_setup(SH2 *sh2); @@ -28,13 +28,13 @@ void sh2_drc_frame(void); #define OF_DELAY_LOOP (2 << 2) #define OF_POLL_LOOP (3 << 2) -unsigned short scan_block(unsigned int base_pc, int is_slave, - unsigned char *op_flags, unsigned int *end_pc, - unsigned int *base_literals, unsigned int *end_literals); +unsigned short scan_block(uint32_t base_pc, int is_slave, + unsigned char *op_flags, uint32_t *end_pc, + uint32_t *base_literals, uint32_t *end_literals); -#if defined(DRC_SH2) -// direct access to some host CPU registers used by the DRC -// XXX MUST match definitions for SHR_SR in cpu/sh2/compiler.c +#if defined(DRC_SH2) && defined(__GNUC__) +// direct access to some host CPU registers used by the DRC +// XXX MUST match definitions for SHR_SR in cpu/drc/emit_*.c #if defined(__arm__) #define DRC_SR_REG "r10" #elif defined(__aarch64__) @@ -47,19 +47,20 @@ unsigned short scan_block(unsigned int base_pc, int is_slave, #define DRC_SR_REG "edi" #elif defined(__x86_64__) #define DRC_SR_REG "ebx" -#else -#warning "direct DRC register access not available for this host" #endif #endif #ifdef DRC_SR_REG -#define DRC_DECLARE_SR register int sh2_sr asm(DRC_SR_REG) +extern void REGPARM(1) (*sh2_drc_save_sr)(SH2 *sh2); +extern void REGPARM(1) (*sh2_drc_restore_sr)(SH2 *sh2); + +#define DRC_DECLARE_SR register int32_t sh2_sr asm(DRC_SR_REG) #define DRC_SAVE_SR(sh2) \ - if ((sh2->state & (SH2_STATE_RUN|SH2_STATE_SLEEP)) == SH2_STATE_RUN) \ - sh2->sr = sh2_sr; + if (likely((sh2->state & (SH2_STATE_RUN|SH2_STATE_SLEEP)) == SH2_STATE_RUN)) \ + sh2_drc_save_sr(sh2) #define DRC_RESTORE_SR(sh2) \ - if ((sh2->state & (SH2_STATE_RUN|SH2_STATE_SLEEP)) == SH2_STATE_RUN) \ - sh2_sr = sh2->sr; + if (likely((sh2->state & (SH2_STATE_RUN|SH2_STATE_SLEEP)) == SH2_STATE_RUN)) \ + sh2_drc_restore_sr(sh2) #else #define DRC_DECLARE_SR #define DRC_SAVE_SR(sh2) diff --git a/cpu/sh2/sh2.h b/cpu/sh2/sh2.h index 5f1a8841..2d73db59 100644 --- a/cpu/sh2/sh2.h +++ b/cpu/sh2/sh2.h @@ -14,13 +14,13 @@ typedef enum { typedef struct SH2_ { // registers. this MUST correlate with enum sh2_reg_e. - unsigned int r[16] ALIGNED(32); - unsigned int pc; // 40 - unsigned int ppc; - unsigned int pr; - unsigned int sr; - unsigned int gbr, vbr; // 50 - unsigned int mach, macl; // 58 + uint32_t r[16] ALIGNED(32); + uint32_t pc; // 40 + uint32_t ppc; + uint32_t pr; + uint32_t sr; + uint32_t gbr, vbr; // 50 + uint32_t mach, macl; // 58 // common const void *read8_map; @@ -48,14 +48,14 @@ typedef struct SH2_ #define SH2_STATE_VPOLL (1 << 3) // polling VDP #define SH2_STATE_RPOLL (1 << 4) // polling address in SDRAM unsigned int state; - unsigned int poll_addr; + uint32_t poll_addr; int poll_cycles; int poll_cnt; // DRC branch cache. size must be 2^n and <=128 int rts_cache_idx; - struct { unsigned int pc; void *code; } rts_cache[16]; - struct { unsigned int pc; void *code; } branch_cache[128]; + struct { uint32_t pc; void *code; } rts_cache[16]; + struct { uint32_t pc; void *code; } branch_cache[128]; // interpreter stuff int icount; // cycles left in current timeslice @@ -79,15 +79,15 @@ typedef struct SH2_ unsigned int mult_m68k_to_sh2; unsigned int mult_sh2_to_m68k; - unsigned char data_array[0x1000]; // cache (can be used as RAM) - unsigned int peri_regs[0x200/4]; // periphereal regs + uint8_t data_array[0x1000]; // cache (can be used as RAM) + uint32_t peri_regs[0x200/4]; // periphereal regs } SH2; #define CYCLE_MULT_SHIFT 10 #define C_M68K_TO_SH2(xsh2, c) \ - (int)(((unsigned long long)(c) * (xsh2)->mult_m68k_to_sh2) >> CYCLE_MULT_SHIFT) + (int)(((uint64_t)(c) * (xsh2)->mult_m68k_to_sh2) >> CYCLE_MULT_SHIFT) #define C_SH2_TO_M68K(xsh2, c) \ - (int)(((unsigned long long)(c+3U) * (xsh2)->mult_sh2_to_m68k) >> CYCLE_MULT_SHIFT) + (int)(((uint64_t)(c+3U) * (xsh2)->mult_sh2_to_m68k) >> CYCLE_MULT_SHIFT) int sh2_init(SH2 *sh2, int is_slave, SH2 *other_sh2); void sh2_finish(SH2 *sh2); diff --git a/pico/arm_features.h b/pico/arm_features.h index 4b456f45..b772b77c 100644 --- a/pico/arm_features.h +++ b/pico/arm_features.h @@ -60,9 +60,8 @@ // load data address (LDR) either via literal pool or via GOT #ifdef __PIC__ -// can't use pool loads since ldr= only allows symbol or constants, not expr :-( +// can't use pool loads since ldr= only allows a symbol or a constant expr :-( #define PIC_LDR_INIT() \ - .ifndef PIC_LDR_DEF; PIC_LDR_DEF=1; \ .macro pic_ldr r t a; \ ldr \r, [pc, $.LD\@-.-8]; \ ldr \t, [pc, $.LD\@-.-4]; \ @@ -71,14 +70,11 @@ add pc, $4; \ .LD\@:.word _GLOBAL_OFFSET_TABLE_-.LP\@-8; \ .word \a(GOT); \ - .endm; \ - .endif; -#define PIC_LDR(r,t,a) \ - pic_ldr r, t, a + .endm; +#define PIC_LDR(r,t,a) pic_ldr r, t, a #else #define PIC_LDR_INIT() -#define PIC_LDR(r,t,a) \ - ldr r, =a +#define PIC_LDR(r,t,a) ldr r, =a #endif #endif /* __ARM_FEATURES_H__ */ diff --git a/pico/pico_port.h b/pico/pico_port.h index e26e6ca2..af9ce853 100644 --- a/pico/pico_port.h +++ b/pico/pico_port.h @@ -17,10 +17,12 @@ #define NOINLINE __attribute__((noinline)) #define ALIGNED(n) __attribute__((aligned(n))) #define unlikely(x) __builtin_expect((x), 0) +#define likely(x) __builtin_expect(!!(x), 1) #else #define NOINLINE #define ALIGNED(n) #define unlikely(x) (x) +#define likely(x) (x) #endif #ifdef _MSC_VER diff --git a/platform/common/disarm.c b/platform/common/disarm.c index 37fd810e..24992206 100644 --- a/platform/common/disarm.c +++ b/platform/common/disarm.c @@ -435,7 +435,7 @@ static int software_interrupt(unsigned int pc, unsigned int insn, char *buf, siz return 1; } -int disarm(uintptr_t pc, uint32_t insn, char *buf, size_t buf_len, uintptr_t *addr) +int disarm(uintptr_t pc, uint32_t insn, char *buf, size_t buf_len, unsigned long *addr) { *addr = 0; @@ -467,7 +467,7 @@ int disarm(uintptr_t pc, uint32_t insn, char *buf, size_t buf_len, uintptr_t *ad return block_data_transfer(pc, insn, buf, buf_len); if ((insn & 0x0e000000) == 0x0a000000) { - *addr = (long)pc + 8 + ((long)(insn << 8) >> 6); + *addr = (unsigned long)pc+8 + ((unsigned long)(insn << 8) >> 6); return branch(pc, insn, buf, buf_len); } diff --git a/platform/common/disarm.h b/platform/common/disarm.h index f1170894..a07675fd 100644 --- a/platform/common/disarm.h +++ b/platform/common/disarm.h @@ -23,6 +23,6 @@ #ifndef DISARM_H #define DISARM_H -int disarm(uintptr_t pc, uint32_t insn, char *buf, size_t buf_len, uintptr_t *sym); +int disarm(uintptr_t pc, uint32_t insn, char *buf, size_t buf_len, unsigned long *sym); #endif /* DISARM_H */ diff --git a/platform/common/dismips.c b/platform/common/dismips.c index 19c0b427..dc06ce80 100644 --- a/platform/common/dismips.c +++ b/platform/common/dismips.c @@ -6,8 +6,9 @@ * See COPYING file in the top-level directory. */ -// XXX unimplemented: SYSCALL, BREAK, SYNC, SDBBP, T*, CACHE, PREF, -// MOVF/MOVT, LWC*/LDC*, SWC*/SDC*, COP*. +// unimplemented insns: MOV[FT], SYSCALL, BREAK, SYNC, SYNCI, T*, SDBBP, RDHWR, +// CACHE, PREF, LWC*/LDC*, SWC*/SDC*, and all of COP* (fpu, mmu, irq, exc, ...) +// unimplemented variants of insns: EHB, SSNOP (both SLL zero), JALR.HB, JR.HB // however, it's certainly good enough for anything picodrive DRC throws at it. #include @@ -79,6 +80,7 @@ struct insn { #define OP_SPECIAL 0x00 static const struct insn special_insns[] = { {0x00, S_IMM_DT, "sll"}, +// {0x01, , "movf\0movt"}, {0x02, S_IMM_DT|SR_BIT, "srl\0rotr"}, {0x03, S_IMM_DT, "sra"}, {0x04, REG_DTS, "sllv"}, @@ -146,6 +148,7 @@ static const struct insn special2_insns[] = { {0x21, REG_DS, "clo" }, {0x24, REG_DS, "dclz" }, {0x25, REG_DS, "dclo" }, +// {0x37, , "sdbbp" }, }; // instructions with opcode SPECIAL3 (R-type) @@ -159,6 +162,7 @@ static const struct insn special3_insns[] = { {0x05, F_IMM_TS, "dinsm" }, {0x06, F_IMM_TS, "dinsu" }, {0x07, F_IMM_TS, "dins" }, +// {0x3b, , "rdhwr" }, }; // instruction with opcode SPECIAL3 and function *BSHFL @@ -192,6 +196,7 @@ static const struct insn regimm_insns[] = { {0x12, B_IMM_S, "bltzall"}, {0x13, B_IMM_S, "bgezall"}, {0x13, B_IMM_S, "bgezall"}, +// {0x1f, , "synci" }, }; // instructions with other opcodes (I-type) @@ -316,7 +321,7 @@ static unsigned long j_target(unsigned long pc, uint32_t insn) } // main disassembler function -int dismips(uintptr_t pc, uint32_t insn, char *buf, size_t buflen, uintptr_t *sym) +int dismips(uintptr_t pc, uint32_t insn, char *buf, size_t buflen, unsigned long *sym) { const struct insn *pi = decode_insn(insn); char *rs = register_names[(insn >> 21) & 0x1f]; diff --git a/platform/common/dismips.h b/platform/common/dismips.h index b547003b..8d105925 100644 --- a/platform/common/dismips.h +++ b/platform/common/dismips.h @@ -1,6 +1,6 @@ #ifndef DISMIPS_H #define DISMIPS_H -int dismips(uintptr_t pc, uint32_t insn, char *buf, size_t buf_len, uintptr_t *sym); +int dismips(uintptr_t pc, uint32_t insn, char *buf, size_t buf_len, unsigned long *sym); #endif /* DISMIPS_H */ diff --git a/platform/common/host_dasm.c b/platform/common/host_dasm.c index fc3cbe67..2084aa91 100644 --- a/platform/common/host_dasm.c +++ b/platform/common/host_dasm.c @@ -37,14 +37,14 @@ void host_dasm(void *addr, int len) void *end = (char *)addr + len; const char *name; char buf[64]; - long insn, symaddr; + unsigned long insn, symaddr; while (addr < end) { name = lookup_name(addr); if (name != NULL) printf("%s:\n", name); - insn = *(long *)addr; + insn = *(unsigned long *)addr; printf(" %08lx %08lx ", (long)addr, insn); if(disasm((unsigned)addr, insn, buf, sizeof(buf), &symaddr)) { diff --git a/tools/mkoffsets.sh b/tools/mkoffsets.sh index 349b8605..8a1092e0 100755 --- a/tools/mkoffsets.sh +++ b/tools/mkoffsets.sh @@ -12,15 +12,12 @@ ENDIAN= compile_rodata () { $CC $CFLAGS -I .. -c /tmp/getoffs.c -o /tmp/getoffs.o || exit 1 - # echo 'void dummy(void) { asm(""::"r" (&val)); }' >> /tmp/getoffs.c - # $CC $CFLAGS -I .. -nostdlib -Wl,-edummy /tmp/getoffs.c \ - # -o /tmp/getoffs.o || exit 1 # find the name of the .rodata section (in case -fdata-sections is used) rosect=$(readelf -S /tmp/getoffs.o | grep '\.rodata\|\.sdata' | sed 's/^[^.]*././;s/ .*//') - # read out .rodata section as hex string (should be only 4 or 8 bytes) + # read out .rodata section as hex string (should be only 4 bytes) ro=$(readelf -x $rosect /tmp/getoffs.o | grep '0x' | cut -c14-48 | - tr -d ' \n') + tr -d ' \n' | cut -c1-8) if [ "$ENDIAN" = "le" ]; then # swap needed for le target hex="" @@ -41,16 +38,18 @@ get_define () # prefix struct member member... struct=$1; shift field=$(echo $* | sed 's/ /./g') name=$(echo $* | sed 's/ /_/g') - echo '#include "pico/pico_int.h"' > /tmp/getoffs.c + echo '#include ' > /tmp/getoffs.c + echo '#include "pico/pico_int.h"' >> /tmp/getoffs.c echo "static const struct $struct p;" >> /tmp/getoffs.c - echo "const int val = (char *)&p.$field - (char*)&p;" >>/tmp/getoffs.c + echo "const int32_t val = (char *)&p.$field - (char*)&p;" >>/tmp/getoffs.c compile_rodata line=$(printf "#define %-20s 0x%04x" $prefix$name $rodata) } if echo $CFLAGS | grep -qe -flto; then CFLAGS="$CFLAGS -fno-lto"; fi # determine endianess -echo "const int val = 1;" >/tmp/getoffs.c +echo '#include ' >/tmp/getoffs.c +echo "const int32_t val = 1;" >>/tmp/getoffs.c compile_rodata ENDIAN=$(if [ "$rodata" -eq 1 ]; then echo be; else echo le; fi) # output header From a5e51c16e6bf3d2e5bbc09e517a99c046fc2e111 Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 13 Dec 2019 18:23:03 +0100 Subject: [PATCH 0248/1110] sh2 drc: fix speed regression --- cpu/drc/emit_arm.c | 2 +- cpu/drc/emit_arm64.c | 2 +- cpu/drc/emit_mips.c | 2 +- cpu/drc/emit_riscv.c | 2 +- cpu/drc/emit_x86.c | 2 +- cpu/sh2/compiler.h | 32 ++++++++++++++++++++++++-------- cpu/sh2/sh2.h | 1 + pico/32x/memory.c | 10 +++++----- 8 files changed, 35 insertions(+), 18 deletions(-) diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index 8ea148eb..af9491f1 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -20,7 +20,7 @@ #define TEMPORARY_REGS { 12, 14 } #define CONTEXT_REG 11 -#define STATIC_SH2_REGS { SHR_SR,10 , SHR_R0,8 , SHR_R0+1,9 } +#define STATIC_SH2_REGS { SHR_SR,10 , SHR_R(0),8 , SHR_R(1),9 } // XXX: tcache_ptr type for SVP and SH2 compilers differs.. #define EMIT_PTR(ptr, x) \ diff --git a/cpu/drc/emit_arm64.c b/cpu/drc/emit_arm64.c index 26fede3a..8d1a7dd1 100644 --- a/cpu/drc/emit_arm64.c +++ b/cpu/drc/emit_arm64.c @@ -15,7 +15,7 @@ #define TEMPORARY_REGS { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 } #define CONTEXT_REG 29 -#define STATIC_SH2_REGS { SHR_SR,28 , SHR_R0,27 , SHR_R0+1,26 } +#define STATIC_SH2_REGS { SHR_SR,28 , SHR_R(0),27 , SHR_R(1),26 } // R31 doesn't exist, it aliases either with zero or SP #define SP 31 // stack pointer diff --git a/cpu/drc/emit_mips.c b/cpu/drc/emit_mips.c index 765986a6..8cb094de 100644 --- a/cpu/drc/emit_mips.c +++ b/cpu/drc/emit_mips.c @@ -17,7 +17,7 @@ #define TEMPORARY_REGS { 2, 3, 8, 9, 10, 11, 12, 13, 14 } // v0-v1,t0-t6 #define CONTEXT_REG 23 // s7 -#define STATIC_SH2_REGS { SHR_SR,22 , SHR_R0,21 , SHR_R0+1,20 } +#define STATIC_SH2_REGS { SHR_SR,22 , SHR_R(0),21 , SHR_R(1),20 } // NB: the ubiquitous JZ74[46]0 uses MIPS32 Release 1, a slight MIPS II superset #ifndef __mips_isa_rev diff --git a/cpu/drc/emit_riscv.c b/cpu/drc/emit_riscv.c index ed45e01c..90234b22 100644 --- a/cpu/drc/emit_riscv.c +++ b/cpu/drc/emit_riscv.c @@ -16,7 +16,7 @@ #define TEMPORARY_REGS { 5, 6, 7 } // t0-t2 #define CONTEXT_REG 9 // s1 -#define STATIC_SH2_REGS { SHR_SR,27 , SHR_R0,26 , SHR_R0+1,25 } +#define STATIC_SH2_REGS { SHR_SR,27 , SHR_R(0),26 , SHR_R(1),25 } // registers usable for user code: r1-r25, others reserved or special #define Z0 0 // zero register diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index 0b3f7697..ec13551e 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -1072,7 +1072,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common #define PARAM_REGS { xCX, xDX, xR8, xR9 } #define PRESERVED_REGS { xSI, xDI, xR12, xR13, xR14, xR15, xBX, xBP } #define TEMPORARY_REGS { xAX, xR10, xR11 } -#define STATIC_SH2_REGS { SHR_SR,xBX , SHR_R0,xR15 , SH2_R0+1,xR14 } +#define STATIC_SH2_REGS { SHR_SR,xBX , SHR_R(0),xR15 , SH2_R(1),xR14 } #define host_arg2reg(rd, arg) \ switch (arg) { \ diff --git a/cpu/sh2/compiler.h b/cpu/sh2/compiler.h index 804f2a70..dd37d470 100644 --- a/cpu/sh2/compiler.h +++ b/cpu/sh2/compiler.h @@ -33,34 +33,50 @@ unsigned short scan_block(uint32_t base_pc, int is_slave, uint32_t *base_literals, uint32_t *end_literals); #if defined(DRC_SH2) && defined(__GNUC__) -// direct access to some host CPU registers used by the DRC -// XXX MUST match definitions for SHR_SR in cpu/drc/emit_*.c +// direct access to some host CPU registers used by the DRC if gcc is used. +// XXX MUST match SHR_SR definitions in cpu/drc/emit_*.c; should be moved there +// XXX yuck, there's no portable way to determine register size. Use long long +// if target is 64 bit and data model is ILP32 or LLP64(windows), else long #if defined(__arm__) #define DRC_SR_REG "r10" +#define DRC_REG_LL 0 // 32 bit #elif defined(__aarch64__) #define DRC_SR_REG "r28" +#define DRC_REG_LL (__ILP32__ || _WIN32) #elif defined(__mips__) #define DRC_SR_REG "s6" +#define DRC_REG_LL (_MIPS_SIM == _ABIN32) #elif defined(__riscv__) || defined(__riscv) #define DRC_SR_REG "s11" +#define DRC_REG_LL 0 // no ABI for (__ILP32__ && __riscv_xlen != 32) #elif defined(__i386__) #define DRC_SR_REG "edi" +#define DRC_REG_LL 0 // 32 bit #elif defined(__x86_64__) -#define DRC_SR_REG "ebx" +#define DRC_SR_REG "rbx" +#define DRC_REG_LL (__ILP32__ || _WIN32) #endif #endif #ifdef DRC_SR_REG +// XXX this is more clear but produces too much overhead for slow platforms extern void REGPARM(1) (*sh2_drc_save_sr)(SH2 *sh2); extern void REGPARM(1) (*sh2_drc_restore_sr)(SH2 *sh2); -#define DRC_DECLARE_SR register int32_t sh2_sr asm(DRC_SR_REG) +// NB: sh2_sr MUST have register size if optimizing with -O3 (-fif-conversion) +#if DRC_REG_LL +#define DRC_DECLARE_SR register long long _sh2_sr asm(DRC_SR_REG) +#else +#define DRC_DECLARE_SR register long _sh2_sr asm(DRC_SR_REG) +#endif #define DRC_SAVE_SR(sh2) \ - if (likely((sh2->state & (SH2_STATE_RUN|SH2_STATE_SLEEP)) == SH2_STATE_RUN)) \ - sh2_drc_save_sr(sh2) + if (likely((sh2->state&(SH2_STATE_RUN|SH2_STATE_SLEEP)) == SH2_STATE_RUN)) \ + sh2->sr = (s32)_sh2_sr +// sh2_drc_save_sr(sh2) #define DRC_RESTORE_SR(sh2) \ - if (likely((sh2->state & (SH2_STATE_RUN|SH2_STATE_SLEEP)) == SH2_STATE_RUN)) \ - sh2_drc_restore_sr(sh2) + if (likely((sh2->state&(SH2_STATE_RUN|SH2_STATE_SLEEP)) == SH2_STATE_RUN)) \ + _sh2_sr = (s32)sh2->sr +// sh2_drc_restore_sr(sh2) #else #define DRC_DECLARE_SR #define DRC_SAVE_SR(sh2) diff --git a/cpu/sh2/sh2.h b/cpu/sh2/sh2.h index 2d73db59..2f2dfd92 100644 --- a/cpu/sh2/sh2.h +++ b/cpu/sh2/sh2.h @@ -10,6 +10,7 @@ typedef enum { SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL, SH2_REGS // register set size } sh2_reg_e; +#define SHR_R(n) (SHR_R0+(n)) typedef struct SH2_ { diff --git a/pico/32x/memory.c b/pico/32x/memory.c index 44bc72d7..30d9b577 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -254,14 +254,14 @@ static NOINLINE void sh2_poll_write(u32 a, u32 d, unsigned int cycles, SH2 *sh2) sh2_poll_rd[hix] = rd; sh2_poll_wr[hix] = wr; } -u32 REGPARM(3) p32x_sh2_poll_memory8(unsigned int a, u32 d, SH2 *sh2) +u32 REGPARM(3) p32x_sh2_poll_memory8(u32 a, u32 d, SH2 *sh2) { int shift = (a & 1 ? 0 : 8); d = (s8)(p32x_sh2_poll_memory16(a & ~1, d << shift, sh2) >> shift); return d; } -u32 REGPARM(3) p32x_sh2_poll_memory16(unsigned int a, u32 d, SH2 *sh2) +u32 REGPARM(3) p32x_sh2_poll_memory16(u32 a, u32 d, SH2 *sh2) { unsigned char *p = sh2->p_drcblk_ram; unsigned int cycles; @@ -281,7 +281,7 @@ u32 REGPARM(3) p32x_sh2_poll_memory16(unsigned int a, u32 d, SH2 *sh2) return d; } -u32 REGPARM(3) p32x_sh2_poll_memory32(unsigned int a, u32 d, SH2 *sh2) +u32 REGPARM(3) p32x_sh2_poll_memory32(u32 a, u32 d, SH2 *sh2) { unsigned char *p = sh2->p_drcblk_ram; unsigned int cycles; @@ -2017,9 +2017,9 @@ int p32x_sh2_memcpy(u32 dst, u32 src, int count, int size, SH2 *sh2) // ----------------------------------------------------------------- -static void z80_md_bank_write_32x(unsigned int a, unsigned char d) +static void z80_md_bank_write_32x(u32 a, unsigned char d) { - unsigned int addr68k; + u32 addr68k; addr68k = Pico.m.z80_bank68k << 15; addr68k += a & 0x7fff; From 0e12269073557d8e7bc6e917db0d362d8552237a Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 21 Dec 2019 16:33:52 +0100 Subject: [PATCH 0249/1110] sh2 drc: optimize T bit handling for A64 --- Makefile | 5 +-- cpu/drc/emit_arm64.c | 74 ++++++++++++++++++++++++++++---------------- cpu/drc/emit_riscv.c | 5 +-- cpu/sh2/compiler.c | 12 +++---- 4 files changed, 58 insertions(+), 38 deletions(-) diff --git a/Makefile b/Makefile index 0a0ab127..49116ce0 100644 --- a/Makefile +++ b/Makefile @@ -36,10 +36,11 @@ endif ifeq ("$(PLATFORM)",$(filter "$(PLATFORM)","gp2x" "opendingux" "rpi1")) # very small caches, avoid optimization options making the binary much bigger -CFLAGS += -finline-limit=42 -fno-unroll-loops -fno-ipa-cp +CFLAGS += -finline-limit=43 -fno-unroll-loops -fno-ipa-cp -ffast-math # this gets you about 20% better execution speed on 32bit arm/mips -CFLAGS += -fno-common -fno-stack-protector -fno-guess-branch-probability -fno-caller-saves -fno-tree-loop-if-convert -ffast-math +CFLAGS += -fno-common -fno-stack-protector -fno-guess-branch-probability -fno-caller-saves -fno-tree-loop-if-convert -fno-regmove endif +#OBJS += align.o # default settings ifeq "$(ARCH)" "arm" diff --git a/cpu/drc/emit_arm64.c b/cpu/drc/emit_arm64.c index 8d1a7dd1..2e873161 100644 --- a/cpu/drc/emit_arm64.c +++ b/cpu/drc/emit_arm64.c @@ -44,10 +44,11 @@ #define A64_COND_LE 0xd #define A64_COND_CS A64_COND_HS #define A64_COND_CC A64_COND_LO +// "fake" conditions for T bit handling #define A64_COND_AL 0xe #define A64_COND_NV 0xf -/* unified conditions */ +// DRC conditions #define DCOND_EQ A64_COND_EQ #define DCOND_NE A64_COND_NE #define DCOND_MI A64_COND_MI @@ -261,6 +262,13 @@ enum { XT_UXTW=0x4, XT_UXTX=0x6, XT_LSL=0x7, XT_SXTW=0xc, XT_SXTX=0xe }; #define A64_BCOND(cond, offs19) \ A64_INSN(0xa,0x2,_,_,_,_,_,(offs19) >> 2,(cond)) +// conditional select + +#define A64_CINC(cond, rn, rm) \ + A64_INSN(0xd,0x0,0x2,0,rm,(cond)^1,0x1,rm,rn) /* CSINC */ +#define A64_CSET(cond, rn) \ + A64_CINC(cond, rn, Z0) + // load pc-relative #define A64_LDRLIT_IMM(rd, offs19) \ @@ -1356,38 +1364,52 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) #ifdef T // T bit handling +static int tcond = -1; + #define emith_invert_cond(cond) \ ((cond) ^ 1) -static void emith_clr_t_cond(int sr) +#define emith_clr_t_cond(sr) \ + (void)sr + +#define emith_set_t_cond(sr, cond) \ + tcond = cond + +#define emith_get_t_cond() \ + tcond + +#define emith_invalidate_t() \ + tcond = -1 + +#define emith_set_t(sr, val) \ + tcond = ((val) ? A64_COND_AL: A64_COND_NV) + +static void emith_sync_t(int sr) { - emith_bic_r_imm(sr, T); -} - -static void emith_set_t_cond(int sr, int cond) -{ - EMITH_SJMP_START(emith_invert_cond(cond)); - emith_or_r_imm_c(cond, sr, T); - EMITH_SJMP_END(emith_invert_cond(cond)); -} - -#define emith_get_t_cond() -1 - -#define emith_sync_t(sr) ((void)sr) - -#define emith_invalidate_t() - -static void emith_set_t(int sr, int val) -{ - if (val) - emith_or_r_imm(sr, T); - else - emith_bic_r_imm(sr, T); + if (tcond == A64_COND_AL) + emith_or_r_imm(sr, T); + else if (tcond == A64_COND_NV) + emith_bic_r_imm(sr, T); + else if (tcond >= 0) { + int tmp = rcache_get_tmp(); + EMIT(A64_CSET(tcond, tmp)); + EMIT(A64_BFI_IMM(sr, tmp, 0, 1)); // assumes SR.T = bit 0 + rcache_free_tmp(tmp); + } + tcond = -1; } static int emith_tst_t(int sr, int tf) { - emith_tst_r_imm(sr, T); - return tf ? DCOND_NE: DCOND_EQ; + if (tcond < 0) { + emith_tst_r_imm(sr, T); + return tf ? DCOND_NE: DCOND_EQ; + } else if (tcond >= A64_COND_AL) { + // MUST sync because A64_COND_AL/NV isn't a real condition + emith_sync_t(sr); + emith_tst_r_imm(sr, T); + return tf ? DCOND_NE: DCOND_EQ; + } else + return tf ? tcond : emith_invert_cond(tcond); } #endif diff --git a/cpu/drc/emit_riscv.c b/cpu/drc/emit_riscv.c index 90234b22..69ed530e 100644 --- a/cpu/drc/emit_riscv.c +++ b/cpu/drc/emit_riscv.c @@ -87,8 +87,6 @@ enum { F1_B, F1_H, F1_W, F1_D, F1_BU, F1_HU, F1_WU }; // LD/ST // func7 enum { F2_ALT=0x20, F2_MULDIV=0x01 }; -#define __(n) o##n // enum marker for "undefined" - #define R5_NOP R5_I_INSN(OP_IMM, F1_ADD, Z0, Z0, 0) // nop: ADDI r0, r0, #0 // arithmetic/logical @@ -687,9 +685,8 @@ static void emith_pool_check(void) static void emith_move_imm(int r, uintptr_t imm) { - u32 lui = imm + _CB(imm,1,11,12); + u32 lui = imm + _CB(imm,1,11,12); // compensate for ADDI sign extension if (lui >> 12) { - // take out the effect of the sign extension of ADDI EMIT(R5_MOVT_IMM(r, lui)); if (imm & 0xfff) EMIT(R5_ADD_IMM(r, r, imm)); diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index ca9a0550..bd3e5b43 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -446,7 +446,6 @@ static void rcache_free_tmp(int hr); // there must be at least 3 PARAM, and PARAM+TEMPORARY must be at least 4. // SR must and R0 should by all means be statically mapped. // XXX the static definition of SR MUST match that in compiler.h -// PC and PR must not be statically mapped (accessed in context by utils). #ifdef __arm__ #include "../drc/emit_arm.c" @@ -3365,7 +3364,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) rcache_get_reg_arg(2, SHR_SR, NULL); tmp2 = rcache_get_tmp_arg(0); tmp3 = rcache_get_tmp_arg(1); - tmp4 = rcache_get_tmp_arg(3); + tmp4 = rcache_get_tmp(); emith_move_r_ptr_imm(tmp2, tcache_ptr); emith_move_r_r_ptr(tmp3, CONTEXT_REG); emith_move_r_imm(tmp4, pc); @@ -5049,11 +5048,12 @@ static void sh2_generate_utils(void) emith_add_r_imm(arg2, (u32)(2*sizeof(void *))); emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *)); emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx)); - emith_add_r_r_r_lsl_ptr(arg2, CONTEXT_REG, arg2, 0); - emith_ctx_read(arg3, SHR_PR * 4); + emith_add_r_r_r_lsl_ptr(arg3, CONTEXT_REG, arg2, 0); + rcache_get_reg_arg(2, SHR_PR, NULL); emith_add_r_ret(arg1); - emith_write_r_r_offs_ptr(arg1, arg2, offsetof(SH2, rts_cache)+sizeof(void *)); - emith_write_r_r_offs(arg3, arg2, offsetof(SH2, rts_cache)); + emith_write_r_r_offs_ptr(arg1, arg3, offsetof(SH2, rts_cache)+sizeof(void *)); + emith_write_r_r_offs(arg2, arg3, offsetof(SH2, rts_cache)); + rcache_flush(); emith_ret(); emith_flush(); From 9090dc0f22e209e22ad3d1531645875b6e2bde3c Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 21 Dec 2019 22:54:40 +0100 Subject: [PATCH 0250/1110] sh2 drc: updates from mame for ym2612 sound --- pico/sound/ym2612.c | 70 +++++++++++++++++++++++++++-------------- pico/sound/ym2612_arm.S | 14 ++++++--- platform/gp2x/emu.c | 2 +- 3 files changed, 56 insertions(+), 30 deletions(-) diff --git a/pico/sound/ym2612.c b/pico/sound/ym2612.c index 0867f558..56408524 100644 --- a/pico/sound/ym2612.c +++ b/pico/sound/ym2612.c @@ -5,6 +5,8 @@ ** ** SSG-EG was also removed, because it's rarely used, Sega2.doc even does not ** document it ("proprietary") and tells to write 0 to SSG-EG control register. +** +** updated with fixes from mame 0.216 (file version 1.5.1) (kub) */ /* @@ -148,7 +150,7 @@ void memset32(int *dest, int c, int count); #define FREQ_SH 16 /* 16.16 fixed point (frequency calculations) */ #define EG_SH 16 /* 16.16 fixed point (envelope generator timing) */ -#define LFO_SH 25 /* 7.25 fixed point (LFO calculations) */ +#define LFO_SH 24 /* 8.24 fixed point (LFO calculations) */ #define TIMER_SH 16 /* 16.16 fixed point (timers calculations) */ #define ENV_BITS 10 @@ -287,8 +289,8 @@ O(18),O(18),O(18),O(18),O(18),O(18),O(18),O(18), O(18),O(18),O(18),O(18),O(18),O(18),O(18),O(18), /* rates 00-11 */ -O( 0),O( 1),O( 2),O( 3), -O( 0),O( 1),O( 2),O( 3), +O(18),O(18),O( 0),O( 0), +O( 0),O( 0),O( 2),O( 2), O( 0),O( 1),O( 2),O( 3), O( 0),O( 1),O( 2),O( 3), O( 0),O( 1),O( 2),O( 3), @@ -328,10 +330,10 @@ O(16),O(16),O(16),O(16),O(16),O(16),O(16),O(16) #define O(a) (a*1) static const UINT8 eg_rate_shift[32+64+32]={ /* Envelope Generator counter shifts (32 + 64 rates + 32 RKS) */ /* 32 infinite time rates */ -O(0),O(0),O(0),O(0),O(0),O(0),O(0),O(0), -O(0),O(0),O(0),O(0),O(0),O(0),O(0),O(0), -O(0),O(0),O(0),O(0),O(0),O(0),O(0),O(0), -O(0),O(0),O(0),O(0),O(0),O(0),O(0),O(0), +O(11),O(11),O(11),O(11),O(11),O(11),O(11),O(11), +O(11),O(11),O(11),O(11),O(11),O(11),O(11),O(11), +O(11),O(11),O(11),O(11),O(11),O(11),O(11),O(11), +O(11),O(11),O(11),O(11),O(11),O(11),O(11),O(11), /* rates 00-11 */ O(11),O(11),O(11),O(11), @@ -560,7 +562,13 @@ INLINE void FM_KEYON(int c , int s ) { SLOT->key = 1; SLOT->phase = 0; /* restart Phase Generator */ - SLOT->state = EG_ATT; /* phase -> Attack */ + if (SLOT->ar + SLOT->ksr < 32+62) { + SLOT->state = (SLOT->volume > MIN_ATT_INDEX) ? EG_ATT : + ((SLOT->sl == MIN_ATT_INDEX) ? EG_SUS : EG_DEC); + } else { + SLOT->volume = MIN_ATT_INDEX; + SLOT->state = (SLOT->sl == MIN_ATT_INDEX) ? EG_SUS : EG_DEC; + } ym2612.slot_mask |= (1<eg_pack_ar = eg_inc_pack[eg_sel_ar] | (eg_sh_ar<<24); @@ -656,6 +664,9 @@ INLINE void set_sl_rr(FM_SLOT *SLOT, int v) SLOT->sl = sl_table[ v>>4 ]; + if (SLOT->state == EG_DEC && (SLOT->volume >= (INT32)(SLOT->sl))) + SLOT->state = EG_SUS; + SLOT->rr = 34 + ((v&0x0f)<<2); eg_sh_rr = eg_rate_shift [SLOT->rr + SLOT->ksr]; @@ -715,12 +726,12 @@ INLINE int advance_lfo(int lfo_ampm, UINT32 lfo_cnt_old, UINT32 lfo_cnt) if (prev_pos != pos) { lfo_ampm &= 0xff; - /* triangle */ + /* triangle (inverted) */ /* AM: 0 to 126 step +2, 126 to 0 step -2 */ if (pos<64) - lfo_ampm |= ((pos&63) * 2) << 8; /* 0 - 126 */ + lfo_ampm |= ((pos^63) * 2) << 8; /* 0 - 126 */ else - lfo_ampm |= (126 - (pos&63)*2) << 8; + lfo_ampm |= ((pos&63) * 2) << 8; } else { @@ -759,7 +770,7 @@ INLINE void update_eg_phase(UINT16 *vol_out, FM_SLOT *SLOT, UINT32 eg_cnt) if ( volume <= MIN_ATT_INDEX ) { volume = MIN_ATT_INDEX; - SLOT->state = EG_DEC; + SLOT->state = (SLOT->sl == MIN_ATT_INDEX) ? EG_SUS: EG_DEC; } break; @@ -1124,22 +1135,29 @@ static int chan_render(int *buffer, int length, int c, UINT32 flags) // flags: s { UINT8 blk; UINT32 fn; - int kc,fc; + int kc,fc,fdt; - blk = block_fnum >> 11; block_fnum = block_fnum*2 + lfo_fn_table_index_offset; - + blk = (block_fnum&0x7000) >> 12; fn = block_fnum & 0xfff; /* keyscale code */ - kc = (blk<<2) | opn_fktable[fn >> 8]; + kc = (blk<<2) | opn_fktable[(fn >> 7) & 0xf]; /* phase increment counter */ - fc = fn_table[fn]>>(7-blk); + fc = (fn_table[fn]>>(7-blk)); - crct.incr1 = ((fc+crct.CH->SLOT[SLOT1].DT[kc])*crct.CH->SLOT[SLOT1].mul) >> 1; - crct.incr2 = ((fc+crct.CH->SLOT[SLOT2].DT[kc])*crct.CH->SLOT[SLOT2].mul) >> 1; - crct.incr3 = ((fc+crct.CH->SLOT[SLOT3].DT[kc])*crct.CH->SLOT[SLOT3].mul) >> 1; - crct.incr4 = ((fc+crct.CH->SLOT[SLOT4].DT[kc])*crct.CH->SLOT[SLOT4].mul) >> 1; + fdt = fc + crct.CH->SLOT[SLOT1].DT[kc]; + if (fdt < 0) fdt += fn_table[0x7ff*2] >> 2; + crct.incr1 = (fdt*crct.CH->SLOT[SLOT1].mul) >> 1; + fdt = fc + crct.CH->SLOT[SLOT2].DT[kc]; + if (fdt < 0) fdt += fn_table[0x7ff*2] >> 2; + crct.incr2 = (fdt*crct.CH->SLOT[SLOT2].mul) >> 1; + fdt = fc + crct.CH->SLOT[SLOT3].DT[kc]; + if (fdt < 0) fdt += fn_table[0x7ff*2] >> 2; + crct.incr3 = (fdt*crct.CH->SLOT[SLOT3].mul) >> 1; + fdt = fc + crct.CH->SLOT[SLOT4].DT[kc]; + if (fdt < 0) fdt += fn_table[0x7ff*2] >> 2; + crct.incr4 = (fdt*crct.CH->SLOT[SLOT4].mul) >> 1; } else /* LFO phase modulation = zero */ { @@ -1201,7 +1219,7 @@ INLINE void refresh_fc_eg_slot(FM_SLOT *SLOT, int fc, int kc) else { eg_sh = 0; - eg_sel = 17; + eg_sel = 18; } SLOT->eg_pack_ar = eg_inc_pack[eg_sel] | (eg_sh<<24); @@ -1256,7 +1274,7 @@ static void init_timetables(const UINT8 *dttable) /* DeTune table */ for (d = 0;d <= 3;d++){ for (i = 0;i <= 31;i++){ - rate = ((double)dttable[d*32 + i]) * SIN_LEN * ym2612.OPN.ST.freqbase * (1< Date: Tue, 31 Dec 2019 10:55:40 +0100 Subject: [PATCH 0251/1110] add DC filter to sound mixer to remove potential PCM DC offset --- pico/sound/mix.c | 75 ++++++++++++++++++++++++++++++----------- pico/sound/mix.h | 1 + pico/sound/mix_arm.S | 79 +++++++++++++++++++++++++++++++++++--------- pico/sound/sound.c | 1 + 4 files changed, 121 insertions(+), 35 deletions(-) diff --git a/pico/sound/mix.c b/pico/sound/mix.c index 202ba355..242cb375 100644 --- a/pico/sound/mix.c +++ b/pico/sound/mix.c @@ -6,41 +6,72 @@ * See COPYING file in the top-level directory. */ +#include "string.h" + #define MAXOUT (+32767) #define MINOUT (-32768) /* limitter */ -#define Limit(val, max,min) { \ - if ( val > max ) val = max; \ - else if ( val < min ) val = min; \ +#define Limit16(val) { \ + val -= (val >> 2); \ + if ((short)val != val) val = (val < 0 ? MINOUT : MAXOUT); \ } int mix_32_to_16l_level; -void mix_32_to_16l_stereo_core(short *dest, int *src, int count, int level) -{ - int l, r; +static struct iir2 { // 2-pole IIR + int x[2]; // sample buffer + int y[2]; // filter intermediates +} lfi2, rfi2; - for (; count > 0; count--) - { - l = r = *dest; - l += *src++ >> level; - r += *src++ >> level; - Limit( l, MAXOUT, MINOUT ); - Limit( r, MAXOUT, MINOUT ); - *dest++ = l; - *dest++ = r; - } +// NB ">>" rounds to -infinity, "/" to 0. To compensate the effect possibly use +// "-(-y>>n)" (round to +infinity) instead of "y>>n" in places. + +// NB uses Q12 fixpoint; samples mustn't have more than 20 bits for this. +#define QB 12 + + +// exponential moving average filter for DC filtering +// y[n] = (x[n]-y[n-1])*(1/8192) (corner approx. 20Hz, gain 1) +static inline int filter_exp(struct iir2 *fi2, int x) +{ + int xf = (x<y[0]; + fi2->y[0] += xf >> 13; + xf -= xf >> 2; // level reduction to avoid clipping from overshoot + return xf>>QB; +} + +// unfiltered (for testing) +static inline int filter_null(struct iir2 *fi2, int x) +{ + return x; +} + +#define mix_32_to_16l_stereo_core(dest, src, count, lv, fl) { \ + int l, r; \ + \ + for (; count > 0; count--) \ + { \ + l = r = *dest; \ + l += *src++ >> lv; \ + r += *src++ >> lv; \ + l = fl(&lfi2, l); \ + r = fl(&rfi2, r); \ + Limit16(l); \ + Limit16(r); \ + *dest++ = l; \ + *dest++ = r; \ + } \ } void mix_32_to_16l_stereo_lvl(short *dest, int *src, int count) { - mix_32_to_16l_stereo_core(dest, src, count, mix_32_to_16l_level); + mix_32_to_16l_stereo_core(dest, src, count, mix_32_to_16l_level, filter_exp); } void mix_32_to_16l_stereo(short *dest, int *src, int count) { - mix_32_to_16l_stereo_core(dest, src, count, 0); + mix_32_to_16l_stereo_core(dest, src, count, 0, filter_exp); } void mix_32_to_16_mono(short *dest, int *src, int count) @@ -51,7 +82,8 @@ void mix_32_to_16_mono(short *dest, int *src, int count) { l = *dest; l += *src++; - Limit( l, MAXOUT, MINOUT ); + l = filter_exp(&lfi2, l); + Limit16(l); *dest++ = l; } } @@ -87,3 +119,8 @@ void mix_16h_to_32_s2(int *dest_buf, short *mp3_buf, int count) } } +void mix_reset(void) +{ + memset(&lfi2, 0, sizeof(lfi2)); + memset(&rfi2, 0, sizeof(rfi2)); +} diff --git a/pico/sound/mix.h b/pico/sound/mix.h index b9315114..e128bad1 100644 --- a/pico/sound/mix.h +++ b/pico/sound/mix.h @@ -8,3 +8,4 @@ void mix_32_to_16_mono(short *dest, int *src, int count); extern int mix_32_to_16l_level; void mix_32_to_16l_stereo_lvl(short *dest, int *src, int count); +void mix_reset(void); diff --git a/pico/sound/mix_arm.S b/pico/sound/mix_arm.S index 5088e61b..bb7388d6 100644 --- a/pico/sound/mix_arm.S +++ b/pico/sound/mix_arm.S @@ -166,13 +166,6 @@ m16_32_s2_no_unal2: @ limit and shift up by 16 @ reg=int_sample, lr=1, r3=tmp, kills flags .macro Limitsh reg -@ movs r4, r3, asr #16 -@ cmnne r4, #1 -@ beq c32_16_no_overflow -@ tst r4, r4 -@ mov r3, #0x8000 -@ subpl r3, r3, #1 - add r3, lr, \reg, asr #15 bics r3, r3, #1 @ in non-overflow conditions r3 is 0 or 1 moveq \reg, \reg, lsl #16 @@ -180,20 +173,30 @@ m16_32_s2_no_unal2: subpl \reg, \reg, #0x00010000 .endm +@ filter out DC offset +@ in=int_sample (max 20 bit), y=filter memory, r3=tmp +.macro DCfilt in y + rsb r3, \y, \in, asl #12 @ fixpoint 20.12 + add \y, \y, r3, asr #13 + sub \in, \in, \y, asr #12 + sub \in, \in, \in, asr #2 @ reduce audio lvl some +.endm @ mix 32bit audio (with 16bits really used, upper bits indicate overflow) with normal 16 bit audio with left channel only @ warning: this function assumes dest is word aligned .global mix_32_to_16l_stereo @ short *dest, int *src, int count mix_32_to_16l_stereo: - stmfd sp!, {r4-r8,lr} - - mov lr, #1 + stmfd sp!, {r4-r8,r10-r11,lr} mov r2, r2, lsl #1 subs r2, r2, #4 bmi m32_16l_st_end + mov lr, #1 + ldr r12, =filter + ldmia r12, {r10-r11} + m32_16l_st_loop: ldmia r0, {r8,r12} ldmia r1!, {r4-r7} @@ -203,6 +206,10 @@ m32_16l_st_loop: add r5, r5, r8, asr #16 add r6, r6, r12,asr #16 add r7, r7, r12,asr #16 + DCfilt r4, r10 + DCfilt r5, r11 + DCfilt r6, r10 + DCfilt r7, r11 Limitsh r4 Limitsh r5 Limitsh r6 @@ -221,13 +228,17 @@ m32_16l_st_end: ldmia r1!,{r4,r5} add r4, r4, r6 add r5, r5, r6 + DCfilt r4, r10 + DCfilt r5, r11 Limitsh r4 Limitsh r5 orr r4, r5, r4, lsr #16 str r4, [r0], #4 m32_16l_st_no_unal2: - ldmfd sp!, {r4-r8,lr} + ldr r12, =filter + stmia r12, {r10-r11} + ldmfd sp!, {r4-r8,r10-r11,lr} bx lr @@ -235,9 +246,11 @@ m32_16l_st_no_unal2: .global mix_32_to_16_mono @ short *dest, int *src, int count mix_32_to_16_mono: - stmfd sp!, {r4-r8,lr} + stmfd sp!, {r4-r8,r10-r11,lr} mov lr, #1 + ldr r12, =filter + ldr r10, [r12] @ check if dest is word aligned tst r0, #2 @@ -262,6 +275,10 @@ m32_16_mo_loop: add r7, r7, r12,asr #16 mov r12,r12,lsl #16 add r6, r6, r12,asr #16 + DCfilt r4, r10 + DCfilt r5, r10 + DCfilt r6, r10 + DCfilt r7, r10 Limitsh r4 Limitsh r5 Limitsh r6 @@ -281,6 +298,8 @@ m32_16_mo_end: add r5, r5, r6, asr #16 mov r6, r6, lsl #16 add r4, r4, r6, asr #16 + DCfilt r4, r10 + DCfilt r5, r10 Limitsh r4 Limitsh r5 orr r4, r5, r4, lsr #16 @@ -288,14 +307,18 @@ m32_16_mo_end: m32_16_mo_no_unal2: tst r2, #1 - ldmeqfd sp!, {r4-r8,pc} + beq m32_16_mo_no_unal ldrsh r5, [r0] ldr r4, [r1], #4 add r4, r4, r5 + DCfilt r4, r10 Limit r4 strh r4, [r0], #2 - ldmfd sp!, {r4-r8,lr} +m32_16_mo_no_unal: + ldr r12, =filter + str r10, [r12] + ldmfd sp!, {r4-r8,r10-r11,lr} bx lr @@ -315,11 +338,13 @@ mix_32_to_16l_level: .global mix_32_to_16l_stereo_lvl @ short *dest, int *src, int count mix_32_to_16l_stereo_lvl: - stmfd sp!, {r4-r9,lr} + stmfd sp!, {r4-r11,lr} ldr r9, =mix_32_to_16l_level mov lr, #1 ldr r9, [r9] + ldr r12, =filter + ldm r12, {r10-r11} mov r2, r2, lsl #1 subs r2, r2, #4 @@ -338,6 +363,10 @@ m32_16l_st_l_loop: mov r5, r5, asr r9 mov r6, r6, asr r9 mov r7, r7, asr r9 + DCfilt r4, r10 + DCfilt r5, r11 + DCfilt r6, r10 + DCfilt r7, r11 Limitsh r4 Limitsh r5 Limitsh r6 @@ -358,15 +387,33 @@ m32_16l_st_l_end: add r5, r5, r6 mov r4, r4, asr r9 mov r5, r5, asr r9 + DCfilt r4, r10 + DCfilt r5, r11 Limitsh r4 Limitsh r5 orr r4, r5, r4, lsr #16 str r4, [r0], #4 m32_16l_st_l_no_unal2: - ldmfd sp!, {r4-r9,lr} + ldr r12, =filter + stmia r12, {r10-r11} + ldmfd sp!, {r4-r11,lr} bx lr +.global mix_reset @ void +mix_reset: + ldr r0, =filter + mov r1, #0 + str r1, [r0] + str r1, [r0, #4] + bx lr + +.data + DCfilt r4, r10 + DCfilt r5, r11 +filter: + .ds 8 + #endif /* __GP2X__ */ @ vim:filetype=armasm diff --git a/pico/sound/sound.c b/pico/sound/sound.c index 95aac128..30d4a072 100644 --- a/pico/sound/sound.c +++ b/pico/sound/sound.c @@ -86,6 +86,7 @@ PICO_INTERNAL void PsndReset(void) // PsndRerate calls YM2612Init, which also resets PsndRerate(0); timers_reset(); + mix_reset(); } From 8ac9ab7fcb43d33952f5293720b868e7acbb62b4 Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 8 Jan 2020 00:49:13 +0100 Subject: [PATCH 0252/1110] audio: added SSG-EG to YM2612, plus some timing changes for SN76496+YM2612 --- Makefile | 2 +- cpu/drc/emit_arm64.c | 2 +- pico/memory.c | 6 +- pico/pico.h | 2 +- pico/pico_cmn.c | 29 +-- pico/pico_int.h | 7 +- pico/sms.c | 8 +- pico/sound/mix.c | 7 +- pico/sound/mix_arm.S | 6 +- pico/sound/sound.c | 148 ++++++-------- pico/sound/ym2612.c | 367 ++++++++++++++++++++++++----------- pico/sound/ym2612.h | 22 ++- pico/sound/ym2612_arm.S | 420 +++++++++++++++++++++------------------- 13 files changed, 571 insertions(+), 455 deletions(-) diff --git a/Makefile b/Makefile index 49116ce0..053e1606 100644 --- a/Makefile +++ b/Makefile @@ -36,7 +36,7 @@ endif ifeq ("$(PLATFORM)",$(filter "$(PLATFORM)","gp2x" "opendingux" "rpi1")) # very small caches, avoid optimization options making the binary much bigger -CFLAGS += -finline-limit=43 -fno-unroll-loops -fno-ipa-cp -ffast-math +CFLAGS += -finline-limit=42 -fno-unroll-loops -fno-ipa-cp -ffast-math # this gets you about 20% better execution speed on 32bit arm/mips CFLAGS += -fno-common -fno-stack-protector -fno-guess-branch-probability -fno-caller-saves -fno-tree-loop-if-convert -fno-regmove endif diff --git a/cpu/drc/emit_arm64.c b/cpu/drc/emit_arm64.c index 2e873161..f4645bc1 100644 --- a/cpu/drc/emit_arm64.c +++ b/cpu/drc/emit_arm64.c @@ -1393,7 +1393,7 @@ static void emith_sync_t(int sr) else if (tcond >= 0) { int tmp = rcache_get_tmp(); EMIT(A64_CSET(tcond, tmp)); - EMIT(A64_BFI_IMM(sr, tmp, 0, 1)); // assumes SR.T = bit 0 + EMIT(A64_BFI_IMM(sr, tmp, __builtin_ffs(T)-1, 1)); rcache_free_tmp(tmp); } tcond = -1; diff --git a/pico/memory.c b/pico/memory.c index cc82f789..9fe3a085 100644 --- a/pico/memory.c +++ b/pico/memory.c @@ -546,7 +546,7 @@ static void PicoWrite8_z80(u32 a, u32 d) } if ((a & 0x6000) == 0x4000) { // FM Sound if (PicoIn.opt & POPT_EN_FM) - Pico.m.status |= ym2612_write_local(a & 3, d & 0xff, 0) & 1; + ym2612_write_local(a & 3, d & 0xff, 0); return; } // TODO: probably other VDP access too? Maybe more mirrors? @@ -1059,6 +1059,8 @@ static int ym2612_write_local(u32 a, u32 d, int is_from_z80) break; } + int scanline = get_scanline(is_from_z80); + PsndDoFM(scanline); #ifdef __GP2X__ if (PicoIn.opt & POPT_EXT_FM) return YM2612Write_940(a, d, get_scanline(is_from_z80)); @@ -1224,7 +1226,7 @@ static unsigned char z80_md_bank_read(unsigned short a) static void z80_md_ym2612_write(unsigned int a, unsigned char data) { if (PicoIn.opt & POPT_EN_FM) - Pico.m.status |= ym2612_write_local(a, data, 1) & 1; + ym2612_write_local(a, data, 1); } static void z80_md_vdp_br_write(unsigned int a, unsigned char data) diff --git a/pico/pico.h b/pico/pico.h index a9359a18..daf5dfdf 100644 --- a/pico/pico.h +++ b/pico/pico.h @@ -70,7 +70,7 @@ extern void *p32x_bios_g, *p32x_bios_m, *p32x_bios_s; #define POPT_EN_DRC (1<<17) #define POPT_DIS_SPRITE_LIM (1<<18) #define POPT_DIS_IDLE_DET (1<<19) -#define POPT_EN_32X (1<<20) +#define POPT_EN_32X (1<<20) // x0 0000 #define POPT_EN_PWM (1<<21) #define POPT_PWM_IRQ_OPT (1<<22) diff --git a/pico/pico_cmn.c b/pico/pico_cmn.c index 1f89da90..5fa0b16f 100644 --- a/pico/pico_cmn.c +++ b/pico/pico_cmn.c @@ -88,7 +88,6 @@ static void do_timing_hacks_vb(void) static int PicoFrameHints(void) { struct PicoVideo *pv = &Pico.video; - int line_sample = Pico.m.pal ? 68 : 93; int vdp_slots = (Pico.video.reg[12] & 1) ? 18 : 16; int lines, y, lines_vis, skip; int vcnt_wrap, vcnt_adj; @@ -150,23 +149,6 @@ static int PicoFrameHints(void) } } - // get samples from sound chips - if ((y == 224 || y == line_sample) && PicoIn.sndOut) - { - cycles = SekCyclesDone(); - - if (Pico.m.z80Run && !Pico.m.z80_reset && (PicoIn.opt&POPT_EN_Z80)) - PicoSyncZ80(cycles); -#ifdef PICO_CD - if (PicoIn.AHW & PAHW_MCD) - pcd_sync_s68k(cycles, 0); -#endif -#ifdef PICO_32X - p32x_sync_sh2s(cycles); -#endif - PsndGetSamples(y); - } - // Run scanline: Pico.t.m68c_line_start = Pico.t.m68c_aim; do_timing_hacks_as(pv, vdp_slots); @@ -238,10 +220,6 @@ static int PicoFrameHints(void) p32x_start_blank(); #endif - // get samples from sound chips - if (y == 224 && PicoIn.sndOut) - PsndGetSamples(y); - // Run scanline: CPUS_RUN(CYCLES_M68K_LINE - CYCLES_M68K_VINT_LAG); @@ -298,7 +276,7 @@ static int PicoFrameHints(void) pv->status |= ((pv->reg[1] >> 3) ^ SR_VB) & SR_VB; // forced blanking // last scanline - Pico.m.scanline = y; + Pico.m.scanline = y++; pv->v_counter = 0xff; pv->lwrite_cnt = 0; @@ -337,6 +315,11 @@ static int PicoFrameHints(void) #ifdef PICO_32X p32x_sync_sh2s(cycles); #endif + + // get samples from sound chips + if (PicoIn.sndOut) + PsndGetSamples(y); + timers_cycle(); pv->hint_cnt = hint; diff --git a/pico/pico_int.h b/pico/pico_int.h index 0fc458ef..d3da72ce 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -336,7 +336,7 @@ struct PicoMisc unsigned char eeprom_cycle; // EEPROM cycle number unsigned char eeprom_slave; // EEPROM slave word for X24C02 and better SRAMs unsigned char eeprom_status; - unsigned char status; // rapid_ym2612, multi_ym_updates + unsigned char pad1; // was ym2612 status unsigned short dma_xfers; // 18 unsigned char eeprom_wb[2]; // EEPROM latch/write buffer unsigned int frame_count; // 1c for movies and idle det @@ -433,6 +433,8 @@ struct PicoSound int len_e_cnt; short dac_line; short psg_line; + unsigned int fm_mult; // samples per line in Q16 + unsigned int fm_pos; // last FM position in Q16 }; // run tools/mkoffsets pico/pico_int_offs.h if you change these @@ -872,9 +874,10 @@ PICO_INTERNAL void PsndReset(void); PICO_INTERNAL void PsndStartFrame(void); PICO_INTERNAL void PsndDoDAC(int line_to); PICO_INTERNAL void PsndDoPSG(int line_to); +PICO_INTERNAL void PsndDoFM(int line_to); PICO_INTERNAL void PsndClear(void); PICO_INTERNAL void PsndGetSamples(int y); -PICO_INTERNAL void PsndGetSamplesMS(void); +PICO_INTERNAL void PsndGetSamplesMS(int y); // sms.c #ifndef NO_SMS diff --git a/pico/sms.c b/pico/sms.c index 2800e209..b016f197 100644 --- a/pico/sms.c +++ b/pico/sms.c @@ -320,16 +320,12 @@ void PicoFrameMS(void) } } - // 224 because of how it's done for MD... - if (y == 224 && PicoIn.sndOut) - PsndGetSamplesMS(); - cycles_aim += cycles_line; cycles_done += z80_run((cycles_aim - cycles_done) >> 8) << 8; } - if (PicoIn.sndOut && Pico.snd.psg_line < lines) - PsndDoPSG(lines - 1); + if (PicoIn.sndOut) + PsndGetSamplesMS(lines); } void PicoFrameDrawOnlyMS(void) diff --git a/pico/sound/mix.c b/pico/sound/mix.c index 242cb375..4b4bbdd8 100644 --- a/pico/sound/mix.c +++ b/pico/sound/mix.c @@ -12,16 +12,15 @@ #define MINOUT (-32768) /* limitter */ -#define Limit16(val) { \ - val -= (val >> 2); \ - if ((short)val != val) val = (val < 0 ? MINOUT : MAXOUT); \ -} +#define Limit16(val) \ + if ((short)val != val) val = (val < 0 ? MINOUT : MAXOUT) int mix_32_to_16l_level; static struct iir2 { // 2-pole IIR int x[2]; // sample buffer int y[2]; // filter intermediates + int i; } lfi2, rfi2; // NB ">>" rounds to -infinity, "/" to 0. To compensate the effect possibly use diff --git a/pico/sound/mix_arm.S b/pico/sound/mix_arm.S index bb7388d6..104b3065 100644 --- a/pico/sound/mix_arm.S +++ b/pico/sound/mix_arm.S @@ -400,6 +400,8 @@ m32_16l_st_l_no_unal2: ldmfd sp!, {r4-r11,lr} bx lr +#endif /* __GP2X__ */ + .global mix_reset @ void mix_reset: ldr r0, =filter @@ -409,11 +411,7 @@ mix_reset: bx lr .data - DCfilt r4, r10 - DCfilt r5, r11 filter: .ds 8 -#endif /* __GP2X__ */ - @ vim:filetype=armasm diff --git a/pico/sound/sound.c b/pico/sound/sound.c index 30d4a072..f4cd4241 100644 --- a/pico/sound/sound.c +++ b/pico/sound/sound.c @@ -32,52 +32,17 @@ extern int *sn76496_regs; static void dac_recalculate(void) { int lines = Pico.m.pal ? 313 : 262; - int mid = Pico.m.pal ? 68 : 93; - int i, dac_cnt, pos, len; + int i, pos; - if (Pico.snd.len <= lines) - { - // shrinking algo - dac_cnt = -Pico.snd.len; - len=1; pos=0; - dac_info[225] = 1; + pos = 0; // Q16 - for(i=226; i != 225; i++) - { - if (i >= lines) i = 0; - if(dac_cnt < 0) { - pos++; - dac_cnt += lines; - } - dac_cnt -= Pico.snd.len; - dac_info[i] = pos; - } - } - else + for(i = 0; i <= lines; i++) { - // stretching - dac_cnt = Pico.snd.len; - pos=0; - for(i = 225; i != 224; i++) - { - if (i >= lines) i = 0; - len=0; - while(dac_cnt >= 0) { - dac_cnt -= lines; - len++; - } - if (i == mid) // midpoint - while(pos+len < Pico.snd.len/2) { - dac_cnt -= lines; - len++; - } - dac_cnt += Pico.snd.len; - pos += len; - dac_info[i] = pos; - } + dac_info[i] = ((pos+(1<<15)) >> 16); // round to nearest + pos += Pico.snd.fm_mult; } - for (i = lines; i < sizeof(dac_info) / sizeof(dac_info[0]); i++) - dac_info[i] = dac_info[0]; + for (i = lines+1; i < sizeof(dac_info) / sizeof(dac_info[0]); i++) + dac_info[i] = dac_info[i-1]; } @@ -95,6 +60,7 @@ void PsndRerate(int preserve_state) { void *state = NULL; int target_fps = Pico.m.pal ? 50 : 60; + int target_lines = Pico.m.pal ? 313 : 262; if (preserve_state) { state = malloc(0x204); @@ -121,6 +87,9 @@ void PsndRerate(int preserve_state) Pico.snd.len_e_add = ((PicoIn.sndRate - Pico.snd.len * target_fps) << 16) / target_fps; Pico.snd.len_e_cnt = 0; + // samples per line + Pico.snd.fm_mult = 65536.0 * PicoIn.sndRate / (target_fps*target_lines); + // recalculate dac info dac_recalculate(); @@ -149,8 +118,7 @@ PICO_INTERNAL void PsndStartFrame(void) } Pico.snd.dac_line = Pico.snd.psg_line = 0; - Pico.m.status &= ~1; - dac_info[224] = Pico.snd.len_use; + Pico.snd.fm_pos = 0; } PICO_INTERNAL void PsndDoDAC(int line_to) @@ -159,9 +127,6 @@ PICO_INTERNAL void PsndDoDAC(int line_to) int dout = ym2612.dacout; int line_from = Pico.snd.dac_line; - if (line_to >= 313) - line_to = 312; - pos = dac_info[line_from]; pos1 = dac_info[line_to + 1]; len = pos1 - pos; @@ -188,14 +153,9 @@ PICO_INTERNAL void PsndDoPSG(int line_to) int pos, pos1, len; int stereo = 0; - if (line_to >= 313) - line_to = 312; - pos = dac_info[line_from]; pos1 = dac_info[line_to + 1]; len = pos1 - pos; - //elprintf(EL_STATUS, "%3d %3d %3d %3d %3d", - // pos, pos1, len, line_from, line_to); if (len <= 0) return; @@ -211,6 +171,34 @@ PICO_INTERNAL void PsndDoPSG(int line_to) SN76496Update(PicoIn.sndOut + pos, len, stereo); } +PICO_INTERNAL void PsndDoFM(int line_to) +{ + int pos, len; + int stereo = 0; + + // Q16, number of samples to fill in buffer + len = ((line_to-1) * Pico.snd.fm_mult) - Pico.snd.fm_pos; + + // don't do this too often (no more than 256 per sec) + if (len >> 16 <= PicoIn.sndRate >> 9) + return; + + // update position and calculate buffer offset and length + pos = Pico.snd.fm_pos >> 16; + Pico.snd.fm_pos += len; + len = (Pico.snd.fm_pos >> 16) - pos; + + // fill buffer + if (PicoIn.opt & POPT_EN_STEREO) { + stereo = 1; + pos <<= 1; + } + if (PicoIn.opt & POPT_EN_FM) + YM2612UpdateOne(PsndBuffer + pos, len, stereo, 1); + else + memset32(PsndBuffer + pos, 0, len<> 3; + int fmlen = (Pico.snd.fm_pos >> 16) - offset; offset <<= stereo; + buf32 = PsndBuffer+offset; pprof_start(sound); @@ -288,14 +277,15 @@ static int PsndRender(int offset, int length) return length; } - // Add in the stereo FM buffer - if (PicoIn.opt & POPT_EN_FM) { - buf32_updated = YM2612UpdateOne(buf32, length, stereo, 1); - } else - memset32(buf32, 0, length< 0) { + int *fmbuf = buf32 + (fmlen << stereo); + if (PicoIn.opt & POPT_EN_FM) + YM2612UpdateOne(fmbuf, length-fmlen, stereo, 1); + else + memset32(fmbuf, 0, (length-fmlen)< max ) val = max; \ - else if ( val < min ) val = min; \ -} - - /* TL_TAB_LEN is calculated as: * 13 - sinus amplitude bits (Y axis) * 2 - sinus sign bit (Y axis) @@ -289,8 +281,8 @@ O(18),O(18),O(18),O(18),O(18),O(18),O(18),O(18), O(18),O(18),O(18),O(18),O(18),O(18),O(18),O(18), /* rates 00-11 */ -O(18),O(18),O( 0),O( 0), -O( 0),O( 0),O( 2),O( 2), +O(18),O(18),O( 2),O( 3), +O( 0),O( 1),O( 2),O( 3), O( 0),O( 1),O( 2),O( 3), O( 0),O( 1),O( 2),O( 3), O( 0),O( 1),O( 2),O( 3), @@ -554,6 +546,13 @@ INLINE void set_timers( int v ) ym2612.OPN.ST.status &= ~1; } +INLINE void recalc_volout(FM_SLOT *SLOT) +{ + INT16 vol_out = SLOT->volume; + if ((SLOT->ssg&0x0c) == 0x0c) + vol_out = (0x200 - SLOT->volume) & MAX_ATT_INDEX; + SLOT->vol_out = vol_out + SLOT->tl; +} INLINE void FM_KEYON(int c , int s ) { @@ -562,13 +561,15 @@ INLINE void FM_KEYON(int c , int s ) { SLOT->key = 1; SLOT->phase = 0; /* restart Phase Generator */ + SLOT->ssg ^= SLOT->ssgn; + SLOT->ssgn = 0; + SLOT->state = (SLOT->sl == MIN_ATT_INDEX) ? EG_SUS : EG_DEC; if (SLOT->ar + SLOT->ksr < 32+62) { - SLOT->state = (SLOT->volume > MIN_ATT_INDEX) ? EG_ATT : - ((SLOT->sl == MIN_ATT_INDEX) ? EG_SUS : EG_DEC); + if (SLOT->volume > MIN_ATT_INDEX) SLOT->state = EG_ATT; } else { SLOT->volume = MIN_ATT_INDEX; - SLOT->state = (SLOT->sl == MIN_ATT_INDEX) ? EG_SUS : EG_DEC; } + recalc_volout(SLOT); ym2612.slot_mask |= (1<key ) { SLOT->key = 0; - if (SLOT->state>EG_REL) + if (SLOT->state>EG_REL) { SLOT->state = EG_REL;/* phase -> Release */ + if (SLOT->ssg&0x08) { + if (SLOT->ssg&0x04) + SLOT->volume = (0x200 - SLOT->volume); + if (SLOT->volume >= 0x200) { + SLOT->volume = MAX_ATT_INDEX; + SLOT->state = EG_OFF; + } + } + } + SLOT->vol_out = SLOT->volume + SLOT->tl; } } @@ -597,12 +608,15 @@ INLINE void set_det_mul(FM_CH *CH, FM_SLOT *SLOT, int v) INLINE void set_tl(FM_SLOT *SLOT, int v) { SLOT->tl = (v&0x7f)<<(ENV_BITS-7); /* 7bit TL */ + if (SLOT->state > EG_REL) + recalc_volout(SLOT); } /* set attack rate & key scale */ INLINE void set_ar_ksr(FM_CH *CH, FM_SLOT *SLOT, int v) { UINT8 old_KSR = SLOT->KSR; + int eg_sh_ar, eg_sel_ar; SLOT->ar = (v&0x1f) ? 32 + ((v&0x1f)<<1) : 0; @@ -611,24 +625,20 @@ INLINE void set_ar_ksr(FM_CH *CH, FM_SLOT *SLOT, int v) { CH->SLOT[SLOT1].Incr=-1; } + + /* refresh Attack rate */ + if ((SLOT->ar + SLOT->ksr) < 32+62) + { + eg_sh_ar = eg_rate_shift [SLOT->ar + SLOT->ksr ]; + eg_sel_ar = eg_rate_select[SLOT->ar + SLOT->ksr ]; + } else { - int eg_sh_ar, eg_sel_ar; - - /* refresh Attack rate */ - if ((SLOT->ar + SLOT->ksr) < 32+62) - { - eg_sh_ar = eg_rate_shift [SLOT->ar + SLOT->ksr ]; - eg_sel_ar = eg_rate_select[SLOT->ar + SLOT->ksr ]; - } - else - { - eg_sh_ar = 0; - eg_sel_ar = 18; - } - - SLOT->eg_pack_ar = eg_inc_pack[eg_sel_ar] | (eg_sh_ar<<24); + eg_sh_ar = 0; + eg_sel_ar = 18; } + + SLOT->eg_pack_ar = eg_inc_pack[eg_sel_ar] | (eg_sh_ar<<24); } /* set decay rate */ @@ -750,7 +760,7 @@ INLINE int advance_lfo(int lfo_ampm, UINT32 lfo_cnt_old, UINT32 lfo_cnt) return lfo_ampm; } -INLINE void update_eg_phase(UINT16 *vol_out, FM_SLOT *SLOT, UINT32 eg_cnt) +INLINE void update_eg_phase(FM_SLOT *SLOT, UINT32 eg_cnt) { INT32 volume = SLOT->volume; UINT32 pack = SLOT->eg_pack[SLOT->state - 1]; @@ -763,44 +773,113 @@ INLINE void update_eg_phase(UINT16 *vol_out, FM_SLOT *SLOT, UINT32 eg_cnt) eg_inc_val = pack >> ((eg_cnt >> shift) & 7) * 3; eg_inc_val = (1 << (eg_inc_val & 7)) >> 1; - switch (SLOT->state) - { - case EG_ATT: /* attack phase */ - volume += ( ~volume * eg_inc_val ) >> 4; - if ( volume <= MIN_ATT_INDEX ) + if (SLOT->ssg&0x08) { + switch (SLOT->state) { - volume = MIN_ATT_INDEX; - SLOT->state = (SLOT->sl == MIN_ATT_INDEX) ? EG_SUS: EG_DEC; + case EG_ATT: /* attack phase */ + volume += ( ~volume * eg_inc_val ) >> 4; + if ( volume <= MIN_ATT_INDEX ) + { + volume = MIN_ATT_INDEX; + SLOT->state = (SLOT->sl == MIN_ATT_INDEX) ? EG_SUS: EG_DEC; + } + break; + + case EG_DEC: /* decay phase */ + if (volume < 0x200) + volume += 4*eg_inc_val; + if ( volume >= (INT32) SLOT->sl ) + SLOT->state = EG_SUS; + break; + + case EG_SUS: /* sustain phase */ + if (volume < 0x200) + volume += 4*eg_inc_val; + break; + + case EG_REL: /* release phase */ + if (volume < 0x200) + volume += 4*eg_inc_val; + if ( volume >= 0x200 ) + { + volume = MAX_ATT_INDEX; + SLOT->state = EG_OFF; + } + break; } - break; - case EG_DEC: /* decay phase */ - volume += eg_inc_val; - if ( volume >= (INT32) SLOT->sl ) - SLOT->state = EG_SUS; - break; - - case EG_SUS: /* sustain phase */ - volume += eg_inc_val; - if ( volume >= MAX_ATT_INDEX ) + SLOT->vol_out = volume + SLOT->tl; + if ((SLOT->ssg&0x04) && (SLOT->state > EG_REL)) + SLOT->vol_out = ((0x200 - volume) & MAX_ATT_INDEX) + SLOT->tl; + } else { + switch (SLOT->state) { - volume = MAX_ATT_INDEX; - /* do not change SLOT->state (verified on real chip) */ - } - break; + case EG_ATT: /* attack phase */ + volume += ( ~volume * eg_inc_val ) >> 4; + if ( volume <= MIN_ATT_INDEX ) + { + volume = MIN_ATT_INDEX; + SLOT->state = (SLOT->sl == MIN_ATT_INDEX) ? EG_SUS: EG_DEC; + } + break; - case EG_REL: /* release phase */ - volume += eg_inc_val; - if ( volume >= MAX_ATT_INDEX ) - { - volume = MAX_ATT_INDEX; - SLOT->state = EG_OFF; + case EG_DEC: /* decay phase */ + volume += eg_inc_val; + if ( volume >= (INT32) SLOT->sl ) + SLOT->state = EG_SUS; + break; + + case EG_SUS: /* sustain phase */ + volume += eg_inc_val; + if ( volume >= MAX_ATT_INDEX ) + { + volume = MAX_ATT_INDEX; + /* do not change SLOT->state (verified on real chip) */ + } + break; + + case EG_REL: /* release phase */ + volume += eg_inc_val; + if ( volume >= MAX_ATT_INDEX ) + { + volume = MAX_ATT_INDEX; + SLOT->state = EG_OFF; + } + break; } - break; + + SLOT->vol_out = volume + SLOT->tl; } - SLOT->volume = volume; - *vol_out = SLOT->tl + volume; /* tl is 7bit<<3, volume 0-1023 (0-2039 total) */ +} + +INLINE void update_ssg_eg_phase(FM_SLOT *SLOT) +{ + if (SLOT->ssg&0x01) { + if (SLOT->ssg&0x02) { + SLOT->ssg ^= SLOT->ssgn ^ 4; + SLOT->ssgn = 4; + } + + if (SLOT->state != EG_ATT && !(SLOT->ssg&0x04)) + SLOT->volume = MAX_ATT_INDEX; + } else { + if (SLOT->ssg&0x02) { + SLOT->ssg ^= 4; + SLOT->ssgn ^= 4; + } else + SLOT->phase = 0; + + if (SLOT->state != EG_ATT) { + SLOT->state = (SLOT->sl == MIN_ATT_INDEX) ? EG_SUS : EG_DEC; + if (SLOT->ar + SLOT->ksr < 32+62) { + if (SLOT->volume > MIN_ATT_INDEX) SLOT->state = EG_ATT; + } else { + SLOT->volume = MIN_ATT_INDEX; + } + } + } + recalc_volout(SLOT); } #endif @@ -846,6 +925,16 @@ static void chan_render_loop(chan_rend_context *ct, int *buffer, int length) { int smp = 0; /* produced sample */ unsigned int eg_out, eg_out2, eg_out4; + FM_SLOT *SLOT; + + SLOT = &ct->CH->SLOT[SLOT1]; + if ((SLOT->ssg&0x08) && SLOT->state > EG_REL && SLOT->volume >= 0x200) update_ssg_eg_phase(SLOT); + SLOT = &ct->CH->SLOT[SLOT2]; + if ((SLOT->ssg&0x08) && SLOT->state > EG_REL && SLOT->volume >= 0x200) update_ssg_eg_phase(SLOT); + SLOT = &ct->CH->SLOT[SLOT3]; + if ((SLOT->ssg&0x08) && SLOT->state > EG_REL && SLOT->volume >= 0x200) update_ssg_eg_phase(SLOT); + SLOT = &ct->CH->SLOT[SLOT4]; + if ((SLOT->ssg&0x08) && SLOT->state > EG_REL && SLOT->volume >= 0x200) update_ssg_eg_phase(SLOT); if (ct->pack & 8) { /* LFO enabled ? (test Earthworm Jim in between demo 1 and 2) */ ct->pack = (ct->pack&0xffff) | (advance_lfo(ct->pack >> 16, ct->lfo_cnt, ct->lfo_cnt + ct->lfo_inc) << 16); @@ -857,12 +946,58 @@ static void chan_render_loop(chan_rend_context *ct, int *buffer, int length) { ct->eg_timer -= EG_TIMER_OVERFLOW; ct->eg_cnt++; + if (ct->eg_cnt >= 4096) ct->eg_cnt = 1; - if (ct->CH->SLOT[SLOT1].state != EG_OFF) update_eg_phase(&ct->vol_out1, &ct->CH->SLOT[SLOT1], ct->eg_cnt); - if (ct->CH->SLOT[SLOT2].state != EG_OFF) update_eg_phase(&ct->vol_out2, &ct->CH->SLOT[SLOT2], ct->eg_cnt); - if (ct->CH->SLOT[SLOT3].state != EG_OFF) update_eg_phase(&ct->vol_out3, &ct->CH->SLOT[SLOT3], ct->eg_cnt); - if (ct->CH->SLOT[SLOT4].state != EG_OFF) update_eg_phase(&ct->vol_out4, &ct->CH->SLOT[SLOT4], ct->eg_cnt); + SLOT = &ct->CH->SLOT[SLOT1]; + SLOT->vol_ipol = SLOT->vol_out; + if (SLOT->state != EG_OFF) update_eg_phase(SLOT, ct->eg_cnt); + SLOT = &ct->CH->SLOT[SLOT2]; + SLOT->vol_ipol = SLOT->vol_out; + if (SLOT->state != EG_OFF) update_eg_phase(SLOT, ct->eg_cnt); + SLOT = &ct->CH->SLOT[SLOT3]; + SLOT->vol_ipol = SLOT->vol_out; + if (SLOT->state != EG_OFF) update_eg_phase(SLOT, ct->eg_cnt); + SLOT = &ct->CH->SLOT[SLOT4]; + SLOT->vol_ipol = SLOT->vol_out; + if (SLOT->state != EG_OFF) update_eg_phase(SLOT, ct->eg_cnt); } +#if 0 + UINT32 ifrac0 = ct->eg_timer / (EG_TIMER_OVERFLOW>>EG_SH); + UINT32 ifrac1 = (1<CH->SLOT[SLOT1]; + ct->vol_out1 = (SLOT->vol_ipol*ifrac1 + SLOT->vol_out*ifrac0) >> EG_SH; + SLOT = &ct->CH->SLOT[SLOT2]; + ct->vol_out2 = (SLOT->vol_ipol*ifrac1 + SLOT->vol_out*ifrac0) >> EG_SH; + SLOT = &ct->CH->SLOT[SLOT3]; + ct->vol_out3 = (SLOT->vol_ipol*ifrac1 + SLOT->vol_out*ifrac0) >> EG_SH; + SLOT = &ct->CH->SLOT[SLOT4]; + ct->vol_out4 = (SLOT->vol_ipol*ifrac1 + SLOT->vol_out*ifrac0) >> EG_SH; +#else + switch (ct->eg_timer >> EG_SH) + { + case 0: + ct->vol_out1 = ct->CH->SLOT[SLOT1].vol_ipol; + ct->vol_out2 = ct->CH->SLOT[SLOT2].vol_ipol; + ct->vol_out3 = ct->CH->SLOT[SLOT3].vol_ipol; + ct->vol_out4 = ct->CH->SLOT[SLOT4].vol_ipol; + break; + case (EG_TIMER_OVERFLOW>>EG_SH)-1: + ct->vol_out1 = ct->CH->SLOT[SLOT1].vol_out; + ct->vol_out2 = ct->CH->SLOT[SLOT2].vol_out; + ct->vol_out3 = ct->CH->SLOT[SLOT3].vol_out; + ct->vol_out4 = ct->CH->SLOT[SLOT4].vol_out; + break; + default: + ct->vol_out1 = (ct->CH->SLOT[SLOT1].vol_ipol + + ct->CH->SLOT[SLOT1].vol_out) >> 1; + ct->vol_out2 = (ct->CH->SLOT[SLOT2].vol_ipol + + ct->CH->SLOT[SLOT2].vol_out) >> 1; + ct->vol_out3 = (ct->CH->SLOT[SLOT3].vol_ipol + + ct->CH->SLOT[SLOT3].vol_out) >> 1; + ct->vol_out4 = (ct->CH->SLOT[SLOT4].vol_ipol + + ct->CH->SLOT[SLOT4].vol_out) >> 1; + } +#endif if (ct->pack & 4) continue; /* output disabled */ @@ -892,7 +1027,7 @@ static void chan_render_loop(chan_rend_context *ct, int *buffer, int length) if (ct->pack & (1<<(SLOT4+8))) eg_out4 += add; } - switch( ct->CH->ALGO ) + switch( ct->algo&0x7 ) { case 0: { @@ -1086,6 +1221,33 @@ static void chan_render_finish(void) ym2612.OPN.lfo_cnt = crct.lfo_cnt; } +static UINT32 update_lfo_phase(FM_SLOT *SLOT, UINT32 block_fnum) +{ + UINT32 fnum_lfo; + INT32 lfo_fn_table_index_offset; + UINT8 blk; + UINT32 fn; + int fc,fdt; + + fnum_lfo = ((block_fnum & 0x7f0) >> 4) * 32 * 8; + lfo_fn_table_index_offset = lfo_pm_table[ fnum_lfo + crct.CH->pms + ((crct.pack>>16)&0xff) ]; + if (lfo_fn_table_index_offset) /* LFO phase modulation active */ + { + block_fnum = block_fnum*2 + lfo_fn_table_index_offset; + blk = (block_fnum&0x7000) >> 12; + fn = block_fnum & 0xfff; + + /* phase increment counter */ + fc = (fn_table[fn]>>(7-blk)); + + fdt = fc + SLOT->DT[crct.CH->kcode]; + if (fdt < 0) fdt += fn_table[0x7ff*2] >> 2; + + return (fdt * SLOT->mul) >> 1; + } else + return SLOT->Incr; +} + static int chan_render(int *buffer, int length, int c, UINT32 flags) // flags: stereo, ?, disabled, ?, pan_r, pan_l { crct.CH = &ym2612.CH[c]; @@ -1114,58 +1276,22 @@ static int chan_render(int *buffer, int length, int c, UINT32 flags) // flags: s crct.phase3 = crct.CH->SLOT[SLOT3].phase; crct.phase4 = crct.CH->SLOT[SLOT4].phase; - /* current output from EG circuit (without AM from LFO) */ - crct.vol_out1 = crct.CH->SLOT[SLOT1].tl + ((UINT32)crct.CH->SLOT[SLOT1].volume); - crct.vol_out2 = crct.CH->SLOT[SLOT2].tl + ((UINT32)crct.CH->SLOT[SLOT2].volume); - crct.vol_out3 = crct.CH->SLOT[SLOT3].tl + ((UINT32)crct.CH->SLOT[SLOT3].volume); - crct.vol_out4 = crct.CH->SLOT[SLOT4].tl + ((UINT32)crct.CH->SLOT[SLOT4].volume); - crct.op1_out = crct.CH->op1_out; crct.algo = crct.CH->ALGO & 7; - if(crct.CH->pms) + if(crct.CH->pms && (ym2612.OPN.ST.mode & 0xC0) && c == 2) { + /* 3 slot mode */ + crct.incr1 = update_lfo_phase(&crct.CH->SLOT[SLOT1], ym2612.OPN.SL3.block_fnum[1]); + crct.incr2 = update_lfo_phase(&crct.CH->SLOT[SLOT2], ym2612.OPN.SL3.block_fnum[2]); + crct.incr3 = update_lfo_phase(&crct.CH->SLOT[SLOT3], ym2612.OPN.SL3.block_fnum[0]); + crct.incr4 = update_lfo_phase(&crct.CH->SLOT[SLOT4], crct.CH->block_fnum); + } + else if(crct.CH->pms) { - /* add support for 3 slot mode */ - UINT32 block_fnum = crct.CH->block_fnum; - - UINT32 fnum_lfo = ((block_fnum & 0x7f0) >> 4) * 32 * 8; - INT32 lfo_fn_table_index_offset = lfo_pm_table[ fnum_lfo + crct.CH->pms + ((crct.pack>>16)&0xff) ]; - - if (lfo_fn_table_index_offset) /* LFO phase modulation active */ - { - UINT8 blk; - UINT32 fn; - int kc,fc,fdt; - - block_fnum = block_fnum*2 + lfo_fn_table_index_offset; - blk = (block_fnum&0x7000) >> 12; - fn = block_fnum & 0xfff; - - /* keyscale code */ - kc = (blk<<2) | opn_fktable[(fn >> 7) & 0xf]; - /* phase increment counter */ - fc = (fn_table[fn]>>(7-blk)); - - fdt = fc + crct.CH->SLOT[SLOT1].DT[kc]; - if (fdt < 0) fdt += fn_table[0x7ff*2] >> 2; - crct.incr1 = (fdt*crct.CH->SLOT[SLOT1].mul) >> 1; - fdt = fc + crct.CH->SLOT[SLOT2].DT[kc]; - if (fdt < 0) fdt += fn_table[0x7ff*2] >> 2; - crct.incr2 = (fdt*crct.CH->SLOT[SLOT2].mul) >> 1; - fdt = fc + crct.CH->SLOT[SLOT3].DT[kc]; - if (fdt < 0) fdt += fn_table[0x7ff*2] >> 2; - crct.incr3 = (fdt*crct.CH->SLOT[SLOT3].mul) >> 1; - fdt = fc + crct.CH->SLOT[SLOT4].DT[kc]; - if (fdt < 0) fdt += fn_table[0x7ff*2] >> 2; - crct.incr4 = (fdt*crct.CH->SLOT[SLOT4].mul) >> 1; - } - else /* LFO phase modulation = zero */ - { - crct.incr1 = crct.CH->SLOT[SLOT1].Incr; - crct.incr2 = crct.CH->SLOT[SLOT2].Incr; - crct.incr3 = crct.CH->SLOT[SLOT3].Incr; - crct.incr4 = crct.CH->SLOT[SLOT4].Incr; - } + crct.incr1 = update_lfo_phase(&crct.CH->SLOT[SLOT1], crct.CH->block_fnum); + crct.incr2 = update_lfo_phase(&crct.CH->SLOT[SLOT2], crct.CH->block_fnum); + crct.incr3 = update_lfo_phase(&crct.CH->SLOT[SLOT3], crct.CH->block_fnum); + crct.incr4 = update_lfo_phase(&crct.CH->SLOT[SLOT4], crct.CH->block_fnum); } else /* no LFO phase modulation */ { @@ -1297,8 +1423,13 @@ static void reset_channels(FM_CH *CH) CH[c].fc = 0; for(s = 0 ; s < 4 ; s++ ) { + CH[c].SLOT[s].Incr = -1; + CH[c].SLOT[s].key = 0; + CH[c].SLOT[s].phase = 0; + CH[c].SLOT[s].ssg = CH[c].SLOT[s].ssgn = 0; CH[c].SLOT[s].state= EG_OFF; CH[c].SLOT[s].volume = MAX_ATT_INDEX; + CH[c].SLOT[s].vol_out = MAX_ATT_INDEX; } CH[c].mem_value = CH[c].op1_out = 0; } @@ -1503,8 +1634,10 @@ static int OPNWriteReg(int r, int v) break; case 0x90: /* SSG-EG */ - // removed. - ret = 0; + SLOT->ssg = v&0x0f; + SLOT->ssg ^= SLOT->ssgn; + if (SLOT->state > EG_REL) + recalc_volout(SLOT); break; case 0xa0: diff --git a/pico/sound/ym2612.h b/pico/sound/ym2612.h index bbe6b1a4..3a1ea7a9 100644 --- a/pico/sound/ym2612.h +++ b/pico/sound/ym2612.h @@ -53,6 +53,11 @@ typedef struct }; UINT32 eg_pack[4]; }; + + UINT8 ssg; /* 0x30 SSG-EG waveform */ + UINT8 ssgn; + UINT16 vol_out; /* 0x32 current output from EG (without LFO) */ + UINT16 vol_ipol; /* 0x34 interpolator memory */ } FM_SLOT; @@ -176,21 +181,22 @@ int YM2612PicoStateLoad2(int *tat, int *tbt); #else /* GP2X specific */ #include "../../platform/gp2x/940ctl.h" -#define YM2612Init(baseclock,rate) { \ +#define YM2612Init(baseclock,rate) do { \ if (PicoIn.opt&POPT_EXT_FM) YM2612Init_940(baseclock, rate); \ else YM2612Init_(baseclock, rate); \ -} -#define YM2612ResetChip() { \ +} while (0) +#define YM2612ResetChip() do { \ if (PicoIn.opt&POPT_EXT_FM) YM2612ResetChip_940(); \ else YM2612ResetChip_(); \ -} -#define YM2612UpdateOne(buffer,length,stereo,is_buf_empty) \ +} while (0) +#define YM2612UpdateOne(buffer,length,stereo,is_buf_empty) do { \ (PicoIn.opt&POPT_EXT_FM) ? YM2612UpdateOne_940(buffer, length, stereo, is_buf_empty) : \ - YM2612UpdateOne_(buffer, length, stereo, is_buf_empty); -#define YM2612PicoStateLoad() { \ + YM2612UpdateOne_(buffer, length, stereo, is_buf_empty); \ +} while (0) +#define YM2612PicoStateLoad() do { \ if (PicoIn.opt&POPT_EXT_FM) YM2612PicoStateLoad_940(); \ else YM2612PicoStateLoad_(); \ -} +} while (0) #endif /* __GP2X__ */ diff --git a/pico/sound/ym2612_arm.S b/pico/sound/ym2612_arm.S index 9b807928..86e5f1c0 100644 --- a/pico/sound/ym2612_arm.S +++ b/pico/sound/ym2612_arm.S @@ -1,6 +1,7 @@ /* * PicoDrive * (C) notaz, 2006 + * (C) kub, 2020 added SSG-EG and simple output rate interpolation * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. @@ -18,7 +19,7 @@ .equiv SLOT2, 2 .equiv SLOT3, 1 .equiv SLOT4, 3 -.equiv SLOT_STRUCT_SIZE, 0x30 +.equiv SLOT_STRUCT_SIZE, 0x38 .equiv TL_TAB_LEN, 0x1A00 @@ -28,11 +29,11 @@ .equiv EG_REL, 1 .equiv EG_OFF, 0 -.equiv EG_SH, 16 @ 16.16 fixed point (envelope generator timing) +.equiv EG_SH, 16 @ 16.16 fixed point (envelope generator timing) .equiv EG_TIMER_OVERFLOW, (3*(1<= (INT32) SLOT->sl ) + strgeb r3, [r5,#0x17] @ state + b 10f + +4: @ EG_ATT + subs r3, r3, #1 @ eg_inc_val_shift - 1 + mvnpl r2, r0 + movpl r2, r2, lsl r3 + addpl r0, r0, r2, asr #4 + cmp r0, #0 @ if (volume <= MIN_ATT_INDEX) + bgt 10f + ldr r2, [r5,#0x1c] + mov r0, #0 + cmp r2, #0 + movne r3, #EG_DEC + moveq r3, #EG_SUS + strb r3, [r5,#0x17] @ state + b 10f + +1: @ EG_REL + mov r2, #0x200 + cmp r0, r2 @ if ( volume >= 0x200 ) + movge r0, #1024 + subge r0, #1 + movge r3, #EG_OFF + strgeb r3, [r5,#0x17] @ state + +10: @ finish + strh r0, [r5,#0x1a] @ volume + ldrb r2, [r5,#0x30] @ ssg + ldrb r3, [r5,#0x17] @ state + cmp r2, #0x0c @ if ( ssg&0x04 && state > EG_REL ) + cmpge r3, #EG_REL+1 + rsbge r0, r0, #0x200 @ volume = (0x200-volume) & MAX_ATT + lslge r0, r0, #10 + lsrge r0, r0, #10 + +11: + ldrh r3, [r5,#0x18] @ tl + add r0, r0, r3 @ volume += tl + strh r0, [r5,#0x32] @ vol_out .if \slot == SLOT1 mov r6, r6, lsr #16 - add r0, r0, r3 orr r6, r0, r6, lsl #16 .elseif \slot == SLOT2 mov r6, r6, lsl #16 - add r0, r0, r3 mov r0, r0, lsl #16 orr r6, r0, r6, lsr #16 .elseif \slot == SLOT3 mov r7, r7, lsr #16 - add r0, r0, r3 orr r7, r0, r7, lsl #16 .elseif \slot == SLOT4 mov r7, r7, lsl #16 - add r0, r0, r3 mov r0, r0, lsl #16 orr r7, r0, r7, lsr #16 .endif @@ -137,6 +202,63 @@ 0: @ EG_OFF .endm +@ r5=slot, trashes: r0,r2,r3 +.macro update_ssg_eg + ldrh r0, [r5,#0x30] @ ssg+ssgn + ldrb r2, [r5,#0x17] @ state + ldrh r3, [r5,#0x1a] @ volume + tst r0, #0x08 @ ssg enabled? + beq 9f + cmp r2, #EG_REL @ state > EG_REL? + ble 9f + cmp r3, #0x200 @ volume >= 0x200? + blt 9f + + tst r0, #0x01 + beq 1f + + tst r0, #0x02 + eorne r0, r0, lsr #8 @ ssg ^= ssgn ^ 4 + eorne r0, r0, #0x4 + orrne r0, r0, #0x400 @ ssgn = 4 + strneh r0, [r5,#0x30] + + eor r0, r0, #0x4 @ if ( !(ssg&0x04 ) + tst r0, #0x4 + cmpne r2, #EG_ATT @ if ( state != EG_ATT ) + movne r0, #0x400 + subne r0, r0, #1 + strneh r0, [r5,#0x1a] @ volume = MAX_ATT + b 9f + +1: tst r0, #0x02 + eorne r0, r0, #0x4 @ ssg ^= 4 + eorne r0, r0, #0x400 @ ssgn ^= 4 + strneh r0, [r5,#0x30] + moveq r3, #0 + streq r3, [r5,#0x0c] @ phase = 0 + + cmp r2, #EG_ATT @ if ( state != EG_ATT ) + beq 9f + + ldr r3, [r5,#0x1c] @ sl + mov r2, #EG_SUS @ state = sl==MIN_ATT ? EG_SUS:EG_DEC + cmp r3, #0 + + ldr r0, [r5,#0x04] @ ar + ldr r3, [r5,#0x14] @ ksr + movne r2, #EG_DEC + add r0, r0, r3 + cmp r0, #32+62 @ if ( ar+ksr >= 32+62 ) + ldrlt r0, [r5,#0x1a] + movge r0, #0 + strgeh r0, [r5,#0x1a] @ volume = MIN_ATT + + cmp r0, #0 + movgt r2, #EG_ATT + strb r2, [r5,#0x17] @ state +9: +.endm @ r12=lfo_ampm[31:16], r1=lfo_cnt_old, r2=lfo_cnt, r3=scratch .macro advance_lfo_m @@ -532,187 +654,6 @@ .endm -/* -.global update_eg_phase @ FM_SLOT *SLOT, UINT32 eg_cnt - -update_eg_phase: - stmfd sp!, {r5,r6} - mov r5, r0 @ slot - ldrh r3, [r5,#0x18] @ tl - ldrh r6, [r5,#0x1a] @ volume - add r6, r6, r3 - update_eg_phase_slot SLOT1 - mov r0, r6 - ldmfd sp!, {r5,r6} - bx lr -.pool - - -.global advance_lfo @ int lfo_ampm, UINT32 lfo_cnt_old, UINT32 lfo_cnt - -advance_lfo: - mov r12, r0, lsl #16 - advance_lfo_m - mov r0, r12, lsr #16 - bx lr -.pool - - -.global upd_algo0 @ chan_rend_context *c -upd_algo0: - stmfd sp!, {r4-r10,lr} - mov lr, r0 - - PIC_LDR(r3, ip, ym_sin_tab) - PIC_LDR(r5, ip, ym_tl_tab) - ldmia lr, {r6-r7} - ldr r10, [lr, #0x54] - ldr r12, [lr, #0x4c] - - upd_algo0_m - - ldmfd sp!, {r4-r10,pc} -.pool - - -.global upd_algo1 @ chan_rend_context *c -upd_algo1: - stmfd sp!, {r4-r10,lr} - mov lr, r0 - - PIC_LDR(r3, ip, ym_sin_tab) - PIC_LDR(r5, ip, ym_tl_tab) - ldmia lr, {r6-r7} - ldr r10, [lr, #0x54] - ldr r12, [lr, #0x4c] - - upd_algo1_m - - ldmfd sp!, {r4-r10,pc} -.pool - - -.global upd_algo2 @ chan_rend_context *c -upd_algo2: - stmfd sp!, {r4-r10,lr} - mov lr, r0 - - PIC_LDR(r3, ip, ym_sin_tab) - PIC_LDR(r5, ip, ym_tl_tab) - ldmia lr, {r6-r7} - ldr r10, [lr, #0x54] - ldr r12, [lr, #0x4c] - - upd_algo2_m - - ldmfd sp!, {r4-r10,pc} -.pool - - -.global upd_algo3 @ chan_rend_context *c -upd_algo3: - stmfd sp!, {r4-r10,lr} - mov lr, r0 - - PIC_LDR(r3, ip, ym_sin_tab) - PIC_LDR(r5, ip, ym_tl_tab) - ldmia lr, {r6-r7} - ldr r10, [lr, #0x54] - ldr r12, [lr, #0x4c] - - upd_algo3_m - - ldmfd sp!, {r4-r10,pc} -.pool - - -.global upd_algo4 @ chan_rend_context *c -upd_algo4: - stmfd sp!, {r4-r10,lr} - mov lr, r0 - - PIC_LDR(r3, ip, ym_sin_tab) - PIC_LDR(r5, ip, ym_tl_tab) - ldmia lr, {r6-r7} - ldr r10, [lr, #0x54] - ldr r12, [lr, #0x4c] - - upd_algo4_m - - ldmfd sp!, {r4-r10,pc} -.pool - - -.global upd_algo5 @ chan_rend_context *c -upd_algo5: - stmfd sp!, {r4-r10,lr} - mov lr, r0 - - PIC_LDR(r3, ip, ym_sin_tab) - PIC_LDR(r5, ip, ym_tl_tab) - ldmia lr, {r6-r7} - ldr r10, [lr, #0x54] - ldr r12, [lr, #0x4c] - - upd_algo5_m - - ldmfd sp!, {r4-r10,pc} -.pool - - -.global upd_algo6 @ chan_rend_context *c -upd_algo6: - stmfd sp!, {r4-r10,lr} - mov lr, r0 - - PIC_LDR(r3, ip, ym_sin_tab) - PIC_LDR(r5, ip, ym_tl_tab) - ldmia lr, {r6-r7} - ldr r10, [lr, #0x54] - ldr r12, [lr, #0x4c] - - upd_algo6_m - - ldmfd sp!, {r4-r10,pc} -.pool - - -.global upd_algo7 @ chan_rend_context *c -upd_algo7: - stmfd sp!, {r4-r10,lr} - mov lr, r0 - - PIC_LDR(r3, ip, ym_sin_tab) - PIC_LDR(r5, ip, ym_tl_tab) - ldmia lr, {r6-r7} - ldr r10, [lr, #0x54] - ldr r12, [lr, #0x4c] - - upd_algo7_m - - ldmfd sp!, {r4-r10,pc} -.pool - - -.global upd_slot1 @ chan_rend_context *c -upd_slot1: - stmfd sp!, {r4-r10,lr} - mov lr, r0 - - PIC_LDR(r3, ip, ym_sin_tab) - PIC_LDR(r5, ip, ym_tl_tab) - ldmia lr, {r6-r7} - ldr r10, [lr, #0x54] - ldr r12, [lr, #0x4c] - - upd_slot1_m - str r10, [lr, #0x38] - - ldmfd sp!, {r4-r10,pc} -.pool -*/ - - @ lr=context, r12=pack (stereo, lastchan, disabled, lfo_enabled | pan_r, pan_l, ams[2] | AMmasks[4] | FB[4] | lfo_ampm[16]) @ r0-r2=scratch, r3=sin_tab/scratch, r4=(length<<8)|unused[4],was_update,algo[3], r5=tl_tab/slot, @ r6-r7=vol_out[4], r8=eg_timer, r9=eg_timer_add[31:16], r10=op1_out, r11=buffer @@ -730,14 +671,21 @@ chan_render_loop: add r0, lr, #0x44 ldmia r0, {r8,r9} @ eg_timer, eg_timer_add ldr r10, [lr, #0x54] @ op1_out - ldmia lr, {r6,r7} @ load volumes +@ ldmia lr, {r6,r7} @ load volumes + ldr r5, [lr, #0x40] @ CH + ldrh r6, [r5, #0x32] @ vol_out values for all slots + ldrh r2, [r5, #0x32+SLOT_STRUCT_SIZE*2] + ldrh r7, [r5, #0x32+SLOT_STRUCT_SIZE] + ldrh r3, [r5, #0x32+SLOT_STRUCT_SIZE*3] + orr r6, r6, r2, lsl #16 + orr r7, r7, r3, lsl #16 tst r12, #8 @ lfo? beq crl_loop crl_loop_lfo: add r0, lr, #0x30 - ldmia r0, {r1,r2} + ldmia r0, {r1,r2} @ lfo_cnt, lfo_inc subs r4, r4, #0x100 bmi crl_loop_end @@ -754,15 +702,29 @@ crl_loop: subs r4, r4, #0x100 bmi crl_loop_end + @ -- SSG -- + add r0, lr, #0x3c + ldmia r0, {r1,r5} @ eg_cnt, CH + + @ r5=slot, trashes: r0,r2,r3 + update_ssg_eg + add r5, r5, #SLOT_STRUCT_SIZE*2 @ SLOT2 (2) + update_ssg_eg + sub r5, r5, #SLOT_STRUCT_SIZE @ SLOT3 (1) + update_ssg_eg + add r5, r5, #SLOT_STRUCT_SIZE*2 @ SLOT4 (3) + update_ssg_eg + sub r5, r5, #SLOT_STRUCT_SIZE*3 + @ -- EG -- add r8, r8, r9 cmp r8, #EG_TIMER_OVERFLOW bcc eg_done - add r0, lr, #0x3c - ldmia r0, {r1,r5} @ eg_cnt, CH eg_loop: sub r8, r8, #EG_TIMER_OVERFLOW add r1, r1, #1 + cmp r1, #4096 + movge r1, #1 @ SLOT1 (0) @ r5=slot, r1=eg_cnt, trashes: r0,r2,r3 update_eg_phase_slot SLOT1 @@ -774,8 +736,8 @@ eg_loop: update_eg_phase_slot SLOT4 cmp r8, #EG_TIMER_OVERFLOW - subcs r5, r5, #SLOT_STRUCT_SIZE*3 - bcs eg_loop + sub r5, r5, #SLOT_STRUCT_SIZE*3 + bhs eg_loop str r1, [lr, #0x3c] eg_done: @@ -787,6 +749,66 @@ eg_done: cmp r0, #0x4 beq crl_loop + @ output interpolation +#if 0 + @ basic interpolator, interpolate in middle region, else use closer value + mov r3, r8, lsr #EG_SH @ eg_timer, [0..3<>EG_SH)/2 + bgt 0f @ mix is vol_out + + ldrh r0, [r5,#0x34] @ SLOT1 vol_ipol + lsleq r2, r6, #16 + addeq r0, r0, r2, lsr #16 + lsreq r0, r0, #1 + mov r6, r6, lsr #16 + orr r6, r0, r6, lsl #16 + + ldrh r0, [r5,#0x34+SLOT_STRUCT_SIZE*2] @ SLOT2 vol_ipol + addeq r0, r0, r6, lsr #16 + lsreq r0, r0, #1 + mov r6, r6, lsl #16 + orr r6, r6, r0 + ror r6, r6, #16 + + ldrh r0, [r5,#0x34+SLOT_STRUCT_SIZE] @ SLOT3 vol_ipol + lsleq r2, r7, #16 + addeq r0, r0, r2, lsr #16 + lsreq r0, r0, #1 + mov r7, r7, lsr #16 + orr r7, r0, r7, lsl #16 + + ldrh r0, [r5,#0x34+SLOT_STRUCT_SIZE*3] @ SLOT4 vol_ipol + addeq r0, r0, r7, lsr #16 + lsreq r0, r0, #1 + mov r7, r7, lsl #16 + orr r7, r7, r0 + ror r7, r7, #16 +#elif 0 + @ super-basic... just take value closest to sample point + mov r3, r8, lsr #EG_SH-1 @ eg_timer, [0..3<>EG_SH) + bgt 0f @ mix is vol_out + + ldrh r0, [r5,#0x34] @ SLOT1 vol_ipol + mov r6, r6, lsr #16 + orr r6, r0, r6, lsl #16 + + ldrh r0, [r5,#0x34+SLOT_STRUCT_SIZE*2] @ SLOT2 vol_ipol + mov r6, r6, lsl #16 + orr r6, r6, r0 + ror r6, r6, #16 + + ldrh r0, [r5,#0x34+SLOT_STRUCT_SIZE] @ SLOT3 vol_ipol + mov r7, r7, lsr #16 + orr r7, r0, r7, lsl #16 + + ldrh r0, [r5,#0x34+SLOT_STRUCT_SIZE*3] @ SLOT4 vol_ipol + mov r7, r7, lsl #16 + orr r7, r7, r0 + ror r7, r7, #16 +#endif +0: + @ -- SLOT1 -- PIC_LDR(r3, r2, ym_tl_tab) From b9bc876c9cd6c3ccb27f5281a4b22dacdc13fa6d Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 14 Jan 2020 22:49:03 +0100 Subject: [PATCH 0253/1110] bug fixes in drc, audio, display --- cpu/sh2/compiler.c | 15 ++------ cpu/sh2/compiler.h | 1 - pico/32x/32x.c | 1 - pico/32x/sh2soc.c | 14 +++++--- pico/draw2.c | 13 +++++++ pico/draw2_arm.S | 21 +++++++++--- pico/pico.h | 1 + pico/sound/mix_arm.S | 6 ++-- pico/sound/sound.c | 24 ++++++------- pico/sound/ym2612.c | 37 +++++++++++++++----- pico/sound/ym2612.h | 5 +-- pico/sound/ym2612_arm.S | 72 ++++++++++++++++++--------------------- platform/common/dismips.c | 8 +++-- platform/linux/emu.c | 5 ++- 14 files changed, 130 insertions(+), 93 deletions(-) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index bd3e5b43..04320424 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -703,8 +703,8 @@ static void add_to_hashlist(struct block_entry *be, int tcache_id) #if (DRC_DEBUG & 2) if (be->next != NULL) { - printf(" %08x: entry hash collision with %08x\n", - be->pc, be->next->pc); + printf(" %08x@%p: entry hash collision with %08x@%p\n", + be->pc, be->tcache_ptr, be->next->pc, be->next->tcache_ptr); hash_collisions++; } #endif @@ -5323,7 +5323,7 @@ int sh2_execute_drc(SH2 *sh2c, int cycles) // TODO: irq cycles ret_cycles = (int32_t)sh2c->sr >> 12; if (ret_cycles > 0) - dbg(1, "warning: drc returned with cycles: %d", ret_cycles); + dbg(1, "warning: drc returned with cycles: %d, pc %08x", ret_cycles, sh2c->pc); sh2c->sr &= 0x3f3; return ret_cycles; @@ -5506,10 +5506,6 @@ void sh2_drc_mem_setup(SH2 *sh2) sh2->p_drcblk_ram = Pico32xMem->drcblk_ram; } -void sh2_drc_frame(void) -{ -} - int sh2_drc_init(SH2 *sh2) { int i; @@ -5716,8 +5712,6 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, else if ((lowest_mova && lowest_mova <= pc) || (lowest_literal && lowest_literal <= pc)) break; // text area collides with data area - else if ((op_flags[i] & OF_BTARGET) && dr_get_entry(pc, is_slave, &i_end)) - break; // branch target already compiled op = FETCH_OP(pc); switch ((op & 0xf000) >> 12) @@ -6497,9 +6491,6 @@ end: last_btarget = 0; op = 0; // delay/poll insns counter for (i = 0, pc = base_pc; i < i_end; i++, pc += 2) { - int null; - if ((op_flags[i] & OF_BTARGET) && dr_get_entry(pc, is_slave, &null)) - break; // branch target already compiled opd = &ops[i]; crc += FETCH_OP(pc); diff --git a/cpu/sh2/compiler.h b/cpu/sh2/compiler.h index dd37d470..00a8707b 100644 --- a/cpu/sh2/compiler.h +++ b/cpu/sh2/compiler.h @@ -6,7 +6,6 @@ void sh2_drc_wcheck_da(uint32_t a, unsigned len, SH2 *sh2); #ifdef DRC_SH2 void sh2_drc_mem_setup(SH2 *sh2); void sh2_drc_flush_all(void); -void sh2_drc_frame(void); #else #define sh2_drc_mem_setup(x) #define sh2_drc_flush_all() diff --git a/pico/32x/32x.c b/pico/32x/32x.c index 896b5aa1..aa45ba7b 100644 --- a/pico/32x/32x.c +++ b/pico/32x/32x.c @@ -580,7 +580,6 @@ void PicoFrame32x(void) PicoFrameStart(); PicoFrameHints(); - sh2_drc_frame(); elprintf(EL_32X, "poll: %02x %02x %02x", Pico32x.emu_flags & 3, msh2.state, ssh2.state); diff --git a/pico/32x/sh2soc.c b/pico/32x/sh2soc.c index dd834bfb..cf11666d 100644 --- a/pico/32x/sh2soc.c +++ b/pico/32x/sh2soc.c @@ -137,11 +137,15 @@ static void dmac_memcpy(struct dma_chan *chan, SH2 *sh2) if (!up || chan->tcr < 4) return; - // XXX Mars Check Program fills a 64K buffer, then copies 32K longwords from - // DRAM to SDRAM in 4-longword mode, which is 128K. This overwrites a comm - // area in SDRAM, which is why the check fails. - // Is this a buswidth mismatch problem? As a kludge, usw 16-bit width xfers - if (size == 3 && (chan->sar & 0xdf000000) == 0x04000000) size = 1; +#if MARS_CHECK_HACK + // XXX Mars Check Program copies 32K longwords (128KB) from a 64KB buffer in + // ROM or DRAM to SDRAM in 4-longword mode, overwriting an SDRAM comm area in + // turn, which crashes the test on emulators without CPU cache emulation. + // This may be a bug in Mars Check. As a kludge limit the transfer to 64KB, + // which is what the check program test uses for checking the result. + // A better way would clearly be to have a mechanism to patch the ROM... + if (size == 3 && chan->tcr == 32768 && chan->dar == 0x06020000) size = 1; +#endif if (size == 3) size = 2; // 4-word xfer mode still counts in words // XXX check TCR being a multiple of 4 in 4-word xfer mode? // XXX check alignment of sar/dar, generating a bus error if unaligned? diff --git a/pico/draw2.c b/pico/draw2.c index f0e0518e..38a90ef3 100644 --- a/pico/draw2.c +++ b/pico/draw2.c @@ -157,6 +157,8 @@ static void DrawWindowFull(int start, int end, int prio, struct PicoEState *est) { nametab=(pvid->reg[3]&0x3e)<<9; // 32-cell mode nametab_step = 1<<5; + if (!(PicoIn.opt&POPT_DIS_32C_BORDER)) + scrpos += 32; } nametab += nametab_step*start; @@ -240,6 +242,8 @@ static void DrawLayerFull(int plane, int *hcache, int planestart, int planeend, else nametab=(pvid->reg[4]&0x07)<<12; // B scrpos = est->Draw2FB; + if (!(pvid->reg[12]&1) && !(PicoIn.opt&POPT_DIS_32C_BORDER)) + scrpos += 32; scrpos+=8*LINE_WIDTH*(planestart-START_ROW); // Get vertical scroll value: @@ -315,6 +319,8 @@ static void DrawTilesFromCacheF(int *hc, struct PicoEState *est) short blank=-1; // The tile we know is blank unsigned char *scrpos = est->Draw2FB, *pd = 0; + if (!(Pico.video.reg[12]&1) && !(PicoIn.opt&POPT_DIS_32C_BORDER)) + scrpos += 32; // *hcache++ = code|(dx<<16)|(trow<<27); // cache it scrpos+=(*hc++)*LINE_WIDTH - START_ROW*LINE_WIDTH*8; @@ -377,6 +383,8 @@ static void DrawSpriteFull(unsigned int *sprite, struct PicoEState *est) while(sy <= START_ROW*8) { sy+=8; tile+=tdeltay; height--; } scrpos = est->Draw2FB; + if (!(Pico.video.reg[12]&1) && !(PicoIn.opt&POPT_DIS_32C_BORDER)) + scrpos += 32; scrpos+=(sy-START_ROW*8)*LINE_WIDTH; for (; height > 0; height--, sy+=8, tile+=tdeltay) @@ -502,6 +510,11 @@ static void DrawDisplayFull(void) maxw = 264; maxcolc = 32; } + // 32C border for centering? (for asm) + est->rendstatus &= ~PDRAW_BORDER_32; + if ((est->rendstatus&PDRAW_32_COLS) && !(PicoIn.opt&POPT_DIS_32C_BORDER)) + est->rendstatus |= PDRAW_BORDER_32; + // horizontal window? if ((win=pvid->reg[0x12])) { diff --git a/pico/draw2_arm.S b/pico/draw2_arm.S index 6b094495..ded0d5a5 100644 --- a/pico/draw2_arm.S +++ b/pico/draw2_arm.S @@ -414,7 +414,10 @@ DrawLayerFull: ldr r11,[sp, #9*4] @ est sub r4, r9, #(START_ROW<<24) + ldr r7, [r11, #OFS_EST_rendstatus] ldr r11, [r11, #OFS_EST_Draw2FB] + tst r7, #0x100 @ H32 border mode? + addne r11, r11, #32 mov r4, r4, asr #24 mov r7, #328*8 mla r11, r4, r7, r11 @ scrpos+=8*328*(planestart-START_ROW); @@ -590,8 +593,11 @@ DrawTilesFromCacheF: mov r9, #0xff000000 @ r9=prevcode=-1 mvn r6, #0 @ r6=prevy=-1 + ldr r7, [r1, #OFS_EST_rendstatus] ldr r4, [r1, #OFS_EST_Draw2FB] ldr r2, [r0], #4 @ read y offset + tst r7, #0x100 @ H32 border mode? + addne r4, r4, #32 mov r7, #328 mla r2, r7, r2, r4 sub r12, r2, #(328*8*START_ROW) @ r12=scrpos @@ -688,13 +694,18 @@ DrawWindowFull: ldr r4, [r11, #OFS_Pico_video_reg+12] mov r5, #1 @ nametab_step + ldr r11, [r3, #OFS_EST_Draw2FB] tst r4, #1 @ 40 cell mode? andne r12, r12, #0xf000 @ 0x3c<<10 - andeq r12, r12, #0xf800 movne r5, r5, lsl #7 - moveq r5, r5, lsl #6 @ nametab_step + bne 0f + ldr r7, [r3, #OFS_EST_rendstatus] + and r12, r12, #0xf800 + mov r5, r5, lsl #6 @ nametab_step + tst r7, #0x100 + addne r11, r11, #32 @ center screen in H32 mode - and r4, r0, #0xff +0: and r4, r0, #0xff mla r12, r5, r4, r12 @ nametab += nametab_step*start; ldr r10, [r3, #OFS_EST_PicoMem_vram] @@ -715,7 +726,6 @@ DrawWindowFull: mov r9, #0xff000000 @ r9=prevcode=-1 - ldr r11, [r3, #OFS_EST_Draw2FB] and r4, r0, #0xff add r11, r11, #328*8 sub r4, r4, #START_ROW @@ -915,8 +925,11 @@ DrawSpriteFull: and r3, lr, #0x6000 mov r3, r3, lsr #9 @ r3=pal=((code>>9)&0x30); + ldr r0, [r1, #OFS_EST_rendstatus] ldr r11, [r1, #OFS_EST_Draw2FB] ldr r10, [r1, #OFS_EST_PicoMem_vram] + tst r0, #0x100 @ H32 border mode? + addne r11, r11, #32 sub r1, r12, #(START_ROW*8) mov r0, #328 mla r11, r1, r0, r11 @ scrpos+=(sy-START_ROW*8)*328; diff --git a/pico/pico.h b/pico/pico.h index daf5dfdf..1a60ce34 100644 --- a/pico/pico.h +++ b/pico/pico.h @@ -204,6 +204,7 @@ void PicoDoHighPal555(int sh, int line, struct PicoEState *est); #define PDRAW_PLANE_HI_PRIO (1<<6) // have layer with all hi prio tiles (mk3) #define PDRAW_SHHI_DONE (1<<7) // layer sh/hi already processed #define PDRAW_32_COLS (1<<8) // 32 column mode +#define PDRAW_BORDER_32 (1<<9) // center H32 in buffer (32 px border) extern int rendstatus_old; extern int rendlines; diff --git a/pico/sound/mix_arm.S b/pico/sound/mix_arm.S index 104b3065..a1558d74 100644 --- a/pico/sound/mix_arm.S +++ b/pico/sound/mix_arm.S @@ -176,10 +176,10 @@ m16_32_s2_no_unal2: @ filter out DC offset @ in=int_sample (max 20 bit), y=filter memory, r3=tmp .macro DCfilt in y - rsb r3, \y, \in, asl #12 @ fixpoint 20.12 + rsb r3, \y, \in, lsl #12 @ fixpoint 20.12 add \y, \y, r3, asr #13 - sub \in, \in, \y, asr #12 - sub \in, \in, \in, asr #2 @ reduce audio lvl some + sub r3, r3, r3, asr #2 @ reduce audio lvl some + asr \in, r3, #12 .endm @ mix 32bit audio (with 16bits really used, upper bits indicate overflow) with normal 16 bit audio with left channel only diff --git a/pico/sound/sound.c b/pico/sound/sound.c index f4cd4241..74fb6fcd 100644 --- a/pico/sound/sound.c +++ b/pico/sound/sound.c @@ -38,7 +38,7 @@ static void dac_recalculate(void) for(i = 0; i <= lines; i++) { - dac_info[i] = ((pos+(1<<15)) >> 16); // round to nearest + dac_info[i] = ((pos+0x8000) >> 16); // round to nearest pos += Pico.snd.fm_mult; } for (i = lines+1; i < sizeof(dac_info) / sizeof(dac_info[0]); i++) @@ -85,10 +85,10 @@ void PsndRerate(int preserve_state) // calculate Pico.snd.len Pico.snd.len = PicoIn.sndRate / target_fps; Pico.snd.len_e_add = ((PicoIn.sndRate - Pico.snd.len * target_fps) << 16) / target_fps; - Pico.snd.len_e_cnt = 0; + Pico.snd.len_e_cnt = 0; // Q16 - // samples per line - Pico.snd.fm_mult = 65536.0 * PicoIn.sndRate / (target_fps*target_lines); + // samples per line (Q16) + Pico.snd.fm_mult = 65536LL * PicoIn.sndRate / (target_fps*target_lines); // recalculate dac info dac_recalculate(); @@ -176,7 +176,7 @@ PICO_INTERNAL void PsndDoFM(int line_to) int pos, len; int stereo = 0; - // Q16, number of samples to fill in buffer + // Q16, number of samples since last call len = ((line_to-1) * Pico.snd.fm_mult) - Pico.snd.fm_pos; // don't do this too often (no more than 256 per sec) @@ -184,9 +184,9 @@ PICO_INTERNAL void PsndDoFM(int line_to) return; // update position and calculate buffer offset and length - pos = Pico.snd.fm_pos >> 16; + pos = (Pico.snd.fm_pos+0x8000) >> 16; Pico.snd.fm_pos += len; - len = (Pico.snd.fm_pos >> 16) - pos; + len = ((Pico.snd.fm_pos+0x8000) >> 16) - pos; // fill buffer if (PicoIn.opt & POPT_EN_STEREO) { @@ -195,8 +195,6 @@ PICO_INTERNAL void PsndDoFM(int line_to) } if (PicoIn.opt & POPT_EN_FM) YM2612UpdateOne(PsndBuffer + pos, len, stereo, 1); - else - memset32(PsndBuffer + pos, 0, len<> 3; - int fmlen = (Pico.snd.fm_pos >> 16) - offset; + int fmlen = ((Pico.snd.fm_pos+0x8000) >> 16) - offset; offset <<= stereo; buf32 = PsndBuffer+offset; @@ -282,15 +282,11 @@ static int PsndRender(int offset, int length) int *fmbuf = buf32 + (fmlen << stereo); if (PicoIn.opt & POPT_EN_FM) YM2612UpdateOne(fmbuf, length-fmlen, stereo, 1); - else - memset32(fmbuf, 0, (length-fmlen)<ssg ^= SLOT->ssgn; SLOT->ssgn = 0; SLOT->state = (SLOT->sl == MIN_ATT_INDEX) ? EG_SUS : EG_DEC; - if (SLOT->ar + SLOT->ksr < 32+62) { + if (SLOT->ar_ksr < 32+62) { if (SLOT->volume > MIN_ATT_INDEX) SLOT->state = EG_ATT; } else { SLOT->volume = MIN_ATT_INDEX; @@ -619,6 +619,7 @@ INLINE void set_ar_ksr(FM_CH *CH, FM_SLOT *SLOT, int v) int eg_sh_ar, eg_sel_ar; SLOT->ar = (v&0x1f) ? 32 + ((v&0x1f)<<1) : 0; + SLOT->ar_ksr = SLOT->ar + SLOT->ksr; SLOT->KSR = 3-(v>>6); if (SLOT->KSR != old_KSR) @@ -627,10 +628,10 @@ INLINE void set_ar_ksr(FM_CH *CH, FM_SLOT *SLOT, int v) } /* refresh Attack rate */ - if ((SLOT->ar + SLOT->ksr) < 32+62) + if ((SLOT->ar_ksr) < 32+62) { - eg_sh_ar = eg_rate_shift [SLOT->ar + SLOT->ksr ]; - eg_sel_ar = eg_rate_select[SLOT->ar + SLOT->ksr ]; + eg_sh_ar = eg_rate_shift [SLOT->ar_ksr]; + eg_sel_ar = eg_rate_select[SLOT->ar_ksr]; } else { @@ -872,7 +873,7 @@ INLINE void update_ssg_eg_phase(FM_SLOT *SLOT) if (SLOT->state != EG_ATT) { SLOT->state = (SLOT->sl == MIN_ATT_INDEX) ? EG_SUS : EG_DEC; - if (SLOT->ar + SLOT->ksr < 32+62) { + if (SLOT->ar_ksr < 32+62) { if (SLOT->volume > MIN_ATT_INDEX) SLOT->state = EG_ATT; } else { SLOT->volume = MIN_ATT_INDEX; @@ -972,7 +973,7 @@ static void chan_render_loop(chan_rend_context *ct, int *buffer, int length) ct->vol_out3 = (SLOT->vol_ipol*ifrac1 + SLOT->vol_out*ifrac0) >> EG_SH; SLOT = &ct->CH->SLOT[SLOT4]; ct->vol_out4 = (SLOT->vol_ipol*ifrac1 + SLOT->vol_out*ifrac0) >> EG_SH; -#else +#elif 1 switch (ct->eg_timer >> EG_SH) { case 0: @@ -997,6 +998,23 @@ static void chan_render_loop(chan_rend_context *ct, int *buffer, int length) ct->vol_out4 = (ct->CH->SLOT[SLOT4].vol_ipol + ct->CH->SLOT[SLOT4].vol_out) >> 1; } +#elif 0 + if (ct->eg_timer >> (EG_SH-1) < EG_TIMER_OVERFLOW >> EG_SH) { + ct->vol_out1 = ct->CH->SLOT[SLOT1].vol_ipol; + ct->vol_out2 = ct->CH->SLOT[SLOT2].vol_ipol; + ct->vol_out3 = ct->CH->SLOT[SLOT3].vol_ipol; + ct->vol_out4 = ct->CH->SLOT[SLOT4].vol_ipol; + } else { + ct->vol_out1 = ct->CH->SLOT[SLOT1].vol_out; + ct->vol_out2 = ct->CH->SLOT[SLOT2].vol_out; + ct->vol_out3 = ct->CH->SLOT[SLOT3].vol_out; + ct->vol_out4 = ct->CH->SLOT[SLOT4].vol_out; + } +#else + ct->vol_out1 = ct->CH->SLOT[SLOT1].vol_out; + ct->vol_out2 = ct->CH->SLOT[SLOT2].vol_out; + ct->vol_out3 = ct->CH->SLOT[SLOT3].vol_out; + ct->vol_out4 = ct->CH->SLOT[SLOT4].vol_out; #endif if (ct->pack & 4) continue; /* output disabled */ @@ -1335,12 +1353,13 @@ INLINE void refresh_fc_eg_slot(FM_SLOT *SLOT, int fc, int kc) { int eg_sh, eg_sel; SLOT->ksr = ksr; + SLOT->ar_ksr = SLOT->ar + ksr; /* calculate envelope generator rates */ - if ((SLOT->ar + ksr) < 32+62) + if ((SLOT->ar_ksr) < 32+62) { - eg_sh = eg_rate_shift [SLOT->ar + ksr ]; - eg_sel = eg_rate_select[SLOT->ar + ksr ]; + eg_sh = eg_rate_shift [SLOT->ar_ksr]; + eg_sel = eg_rate_select[SLOT->ar_ksr]; } else { diff --git a/pico/sound/ym2612.h b/pico/sound/ym2612.h index 3a1ea7a9..73e693f9 100644 --- a/pico/sound/ym2612.h +++ b/pico/sound/ym2612.h @@ -56,8 +56,9 @@ typedef struct UINT8 ssg; /* 0x30 SSG-EG waveform */ UINT8 ssgn; - UINT16 vol_out; /* 0x32 current output from EG (without LFO) */ - UINT16 vol_ipol; /* 0x34 interpolator memory */ + UINT16 ar_ksr; /* 0x32 ar+ksr */ + UINT16 vol_out; /* 0x34 current output from EG (without LFO) */ + UINT16 vol_ipol; /* 0x36 interpolator memory */ } FM_SLOT; diff --git a/pico/sound/ym2612_arm.S b/pico/sound/ym2612_arm.S index 86e5f1c0..4cb92850 100644 --- a/pico/sound/ym2612_arm.S +++ b/pico/sound/ym2612_arm.S @@ -42,10 +42,10 @@ @ r5=slot, r1=eg_cnt, trashes: r0,r2,r3 @ writes output to routp, but only if vol_out changes .macro update_eg_phase_slot slot - ldrh r0, [r5,#0x32] @ vol_out + ldrh r0, [r5,#0x34] @ vol_out ldrb r2, [r5,#0x17] @ state add r3, r5, #0x1c - strh r0, [r5,#0x34] @ vol_ipol + strh r0, [r5,#0x36] @ vol_ipol tst r2, r2 beq 0f @ EG_OFF @@ -182,7 +182,7 @@ 11: ldrh r3, [r5,#0x18] @ tl add r0, r0, r3 @ volume += tl - strh r0, [r5,#0x32] @ vol_out + strh r0, [r5,#0x34] @ vol_out .if \slot == SLOT1 mov r6, r6, lsr #16 orr r6, r0, r6, lsl #16 @@ -207,11 +207,9 @@ ldrh r0, [r5,#0x30] @ ssg+ssgn ldrb r2, [r5,#0x17] @ state ldrh r3, [r5,#0x1a] @ volume - tst r0, #0x08 @ ssg enabled? - beq 9f - cmp r2, #EG_REL @ state > EG_REL? - ble 9f - cmp r3, #0x200 @ volume >= 0x200? + cmp r0, #0x08 @ ssg enabled && + cmpge r2, #EG_REL+1 @ state > EG_REL && + cmpge r3, #0x200 @ volume >= 0x200? blt 9f tst r0, #0x01 @@ -226,35 +224,33 @@ eor r0, r0, #0x4 @ if ( !(ssg&0x04 ) tst r0, #0x4 cmpne r2, #EG_ATT @ if ( state != EG_ATT ) - movne r0, #0x400 - subne r0, r0, #1 - strneh r0, [r5,#0x1a] @ volume = MAX_ATT + movne r3, #0x400 + subne r3, r3, #1 + strneh r3, [r5,#0x1a] @ volume = MAX_ATT b 9f 1: tst r0, #0x02 eorne r0, r0, #0x4 @ ssg ^= 4 eorne r0, r0, #0x400 @ ssgn ^= 4 strneh r0, [r5,#0x30] - moveq r3, #0 - streq r3, [r5,#0x0c] @ phase = 0 + moveq r0, #0 + streq r0, [r5,#0x0c] @ phase = 0 cmp r2, #EG_ATT @ if ( state != EG_ATT ) beq 9f - ldr r3, [r5,#0x1c] @ sl + ldr r0, [r5,#0x1c] @ sl mov r2, #EG_SUS @ state = sl==MIN_ATT ? EG_SUS:EG_DEC - cmp r3, #0 - - ldr r0, [r5,#0x04] @ ar - ldr r3, [r5,#0x14] @ ksr - movne r2, #EG_DEC - add r0, r0, r3 - cmp r0, #32+62 @ if ( ar+ksr >= 32+62 ) - ldrlt r0, [r5,#0x1a] - movge r0, #0 - strgeh r0, [r5,#0x1a] @ volume = MIN_ATT - cmp r0, #0 + + ldrh r0, [r5,#0x32] @ ar+ksr + movne r2, #EG_DEC + cmp r0, #32+62 @ if ( ar+ksr >= 32+62 ) + movge r3, #0 + strgeh r3, [r5,#0x1a] @ volume = MIN_ATT + bge 9f + + cmp r3, #0 movgt r2, #EG_ATT strb r2, [r5,#0x17] @ state 9: @@ -673,10 +669,10 @@ chan_render_loop: ldr r10, [lr, #0x54] @ op1_out @ ldmia lr, {r6,r7} @ load volumes ldr r5, [lr, #0x40] @ CH - ldrh r6, [r5, #0x32] @ vol_out values for all slots - ldrh r2, [r5, #0x32+SLOT_STRUCT_SIZE*2] - ldrh r7, [r5, #0x32+SLOT_STRUCT_SIZE] - ldrh r3, [r5, #0x32+SLOT_STRUCT_SIZE*3] + ldrh r6, [r5, #0x34] @ vol_out values for all slots + ldrh r2, [r5, #0x34+SLOT_STRUCT_SIZE*2] + ldrh r7, [r5, #0x34+SLOT_STRUCT_SIZE] + ldrh r3, [r5, #0x34+SLOT_STRUCT_SIZE*3] orr r6, r6, r2, lsl #16 orr r7, r7, r3, lsl #16 @@ -756,28 +752,28 @@ eg_done: cmp r3, #(EG_TIMER_OVERFLOW>>EG_SH)/2 bgt 0f @ mix is vol_out - ldrh r0, [r5,#0x34] @ SLOT1 vol_ipol + ldrh r0, [r5,#0x36] @ SLOT1 vol_ipol lsleq r2, r6, #16 addeq r0, r0, r2, lsr #16 lsreq r0, r0, #1 mov r6, r6, lsr #16 orr r6, r0, r6, lsl #16 - ldrh r0, [r5,#0x34+SLOT_STRUCT_SIZE*2] @ SLOT2 vol_ipol + ldrh r0, [r5,#0x36+SLOT_STRUCT_SIZE*2] @ SLOT2 vol_ipol addeq r0, r0, r6, lsr #16 lsreq r0, r0, #1 mov r6, r6, lsl #16 orr r6, r6, r0 ror r6, r6, #16 - ldrh r0, [r5,#0x34+SLOT_STRUCT_SIZE] @ SLOT3 vol_ipol + ldrh r0, [r5,#0x36+SLOT_STRUCT_SIZE] @ SLOT3 vol_ipol lsleq r2, r7, #16 addeq r0, r0, r2, lsr #16 lsreq r0, r0, #1 mov r7, r7, lsr #16 orr r7, r0, r7, lsl #16 - ldrh r0, [r5,#0x34+SLOT_STRUCT_SIZE*3] @ SLOT4 vol_ipol + ldrh r0, [r5,#0x36+SLOT_STRUCT_SIZE*3] @ SLOT4 vol_ipol addeq r0, r0, r7, lsr #16 lsreq r0, r0, #1 mov r7, r7, lsl #16 @@ -787,22 +783,22 @@ eg_done: @ super-basic... just take value closest to sample point mov r3, r8, lsr #EG_SH-1 @ eg_timer, [0..3<>EG_SH) - bgt 0f @ mix is vol_out + bge 0f @ mix is vol_out - ldrh r0, [r5,#0x34] @ SLOT1 vol_ipol + ldrh r0, [r5,#0x36] @ SLOT1 vol_ipol mov r6, r6, lsr #16 orr r6, r0, r6, lsl #16 - ldrh r0, [r5,#0x34+SLOT_STRUCT_SIZE*2] @ SLOT2 vol_ipol + ldrh r0, [r5,#0x36+SLOT_STRUCT_SIZE*2] @ SLOT2 vol_ipol mov r6, r6, lsl #16 orr r6, r6, r0 ror r6, r6, #16 - ldrh r0, [r5,#0x34+SLOT_STRUCT_SIZE] @ SLOT3 vol_ipol + ldrh r0, [r5,#0x36+SLOT_STRUCT_SIZE] @ SLOT3 vol_ipol mov r7, r7, lsr #16 orr r7, r0, r7, lsl #16 - ldrh r0, [r5,#0x34+SLOT_STRUCT_SIZE*3] @ SLOT4 vol_ipol + ldrh r0, [r5,#0x36+SLOT_STRUCT_SIZE*3] @ SLOT4 vol_ipol mov r7, r7, lsl #16 orr r7, r7, r0 ror r7, r7, #16 diff --git a/platform/common/dismips.c b/platform/common/dismips.c index dc06ce80..d855ad6b 100644 --- a/platform/common/dismips.c +++ b/platform/common/dismips.c @@ -368,10 +368,12 @@ int dismips(uintptr_t pc, uint32_t insn, char *buf, size_t buflen, unsigned long else snprintf(buf, buflen, "%s %s, %s, %d", pi->name, rd, rt, sa); break; + //dext: pos,size-1 dextm: pos,size-33 dextu: pos-32,size-1 + //dins: pos,pos+size-1 dinsm: pos,pos+size-33 dinsu: pos-32,pos+size-33 case F_IMM_TS: - if (insn & 0x01) sb+=32; - if (insn & 0x02) sa+=32; - if (insn & 0x04) sb-=sa; + if (insn & 0x01) sb+=32; // ...m + if (insn & 0x02) sa+=32; // ...u + if (insn & 0x04) sb-=sa; // ins snprintf(buf, buflen, "%s %s, %s, %d, %d", pi->name, rt, rs, sa, sb+1); break; case B_IMM_S: diff --git a/platform/linux/emu.c b/platform/linux/emu.c index 93665263..5e4dd72a 100644 --- a/platform/linux/emu.c +++ b/platform/linux/emu.c @@ -176,7 +176,10 @@ void plat_debug_cat(char *str) void emu_video_mode_change(int start_line, int line_count, int is_32cols) { // clear whole screen in all buffers - memset32(g_screen_ptr, 0, g_screen_ppitch * g_screen_height * 2 / 4); + if (currentConfig.renderer != RT_16BIT && !(PicoIn.AHW & PAHW_32X)) + memset32(Pico.est.Draw2FB, 0, (320+8) * (8+240+8) / 4); + else + memset32(g_screen_ptr, 0, g_screen_ppitch * g_screen_height * 2 / 4); } void pemu_loop_prep(void) From 43e1401008bd6f981cdb45365e96674ef3ab2a10 Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 14 Jan 2020 23:00:44 +0100 Subject: [PATCH 0254/1110] emulator timing fixes, VDP DMA fixes, improved DAC audio --- cpu/cz80/cz80.c | 2 ++ cpu/cz80/cz80_op.c | 7 +++--- pico/32x/32x.c | 3 ++- pico/debug.c | 22 +++++------------ pico/memory.c | 17 +++++-------- pico/pico.c | 33 ++++++++++++++----------- pico/pico_cmn.c | 60 +++++++++++++++++++++++----------------------- pico/pico_int.h | 10 ++++---- pico/sound/sound.c | 55 ++++++++++++++++++++++++++++++------------ pico/videoport.c | 7 +++--- 10 files changed, 118 insertions(+), 98 deletions(-) diff --git a/cpu/cz80/cz80.c b/cpu/cz80/cz80.c index 0326b0b8..6b9afcde 100644 --- a/cpu/cz80/cz80.c +++ b/cpu/cz80/cz80.c @@ -288,6 +288,8 @@ Cz80_Exec_End: #if CZ80_ENCRYPTED_ROM CPU->OPBase = OPBase; #endif + if (CPU->HaltState) + CPU->ICount = 0; cycles -= CPU->ICount; #if !CZ80_EMULATE_R_EXACTLY zR = (zR + (cycles >> 2)) & 0x7f; diff --git a/cpu/cz80/cz80_op.c b/cpu/cz80/cz80_op.c index f84f8e75..5d623caf 100644 --- a/cpu/cz80/cz80_op.c +++ b/cpu/cz80/cz80_op.c @@ -687,13 +687,14 @@ OP_CCF: OP(0x76): // HALT OP_HALT: CPU->HaltState = 1; - CPU->ICount = 0; +// CPU->ICount = 0; goto Cz80_Check_Interrupt; OP(0xf3): // DI OP_DI: zIFF = 0; - RET(4) + USE_CYCLES(4) + goto Cz80_Exec_nocheck; OP(0xfb): // EI OP_EI: @@ -712,8 +713,6 @@ OP_EI: if (CPU->IRQState) { afterEI = 1; - CPU->ExtraCycles += 1 - CPU->ICount; - CPU->ICount = 1; } } else zIFF2 = (1 << 2); diff --git a/pico/32x/32x.c b/pico/32x/32x.c index aa45ba7b..0f0cc4f5 100644 --- a/pico/32x/32x.c +++ b/pico/32x/32x.c @@ -269,7 +269,8 @@ void p32x_schedule_hint(SH2 *sh2, unsigned int m68k_cycles) return; // nobody cares // note: when Pico.m.scanline is 224, SH2s might // still be at scanline 93 (or so) - if (!(Pico32x.sh2_regs[0] & 0x80) && Pico.m.scanline > 224) + if (!(Pico32x.sh2_regs[0] & 0x80) && + Pico.m.scanline > (Pico.video.reg[1] & 0x08 ? 240 : 224)) return; after = (Pico32x.sh2_regs[4 / 2] + 1) * 488; diff --git a/pico/debug.c b/pico/debug.c index 50cbaf38..e617d908 100644 --- a/pico/debug.c +++ b/pico/debug.c @@ -369,42 +369,32 @@ void PDebugDumpMem(void) void PDebugZ80Frame(void) { - int lines, line_sample; + int lines; if (PicoIn.AHW & PAHW_SMS) return; - if (Pico.m.pal) { + if (Pico.m.pal) lines = 313; - line_sample = 68; - } else { + else lines = 262; - line_sample = 93; - } z80_resetCycles(); PsndStartFrame(); - if (/*Pico.m.z80Run &&*/ !Pico.m.z80_reset && (PicoIn.opt&POPT_EN_Z80)) - PicoSyncZ80(Pico.t.m68c_cnt + line_sample * 488); - if (PicoIn.sndOut) - PsndGetSamples(line_sample); - if (/*Pico.m.z80Run &&*/ !Pico.m.z80_reset && (PicoIn.opt&POPT_EN_Z80)) { PicoSyncZ80(Pico.t.m68c_cnt + 224 * 488); z80_int(); } - if (PicoIn.sndOut) - PsndGetSamples(224); // sync z80 if (/*Pico.m.z80Run &&*/ !Pico.m.z80_reset && (PicoIn.opt&POPT_EN_Z80)) { Pico.t.m68c_cnt += Pico.m.pal ? 151809 : 127671; // cycles adjusted for converter PicoSyncZ80(Pico.t.m68c_cnt); } - if (PicoIn.sndOut && ym2612.dacen && Pico.snd.dac_line < lines) - PsndDoDAC(lines - 1); - PsndDoPSG(lines - 1); + + if (PicoIn.sndOut) + PsndGetSamples(lines); timers_cycle(); Pico.t.m68c_aim = Pico.t.m68c_cnt; diff --git a/pico/memory.c b/pico/memory.c index 9fe3a085..1d9b9135 100644 --- a/pico/memory.c +++ b/pico/memory.c @@ -943,11 +943,11 @@ static int ym2612_write_local(u32 a, u32 d, int is_from_z80) a &= 3; if (a == 1 && ym2612.OPN.ST.address == 0x2a) /* DAC data */ { - int scanline = get_scanline(is_from_z80); - //elprintf(EL_STATUS, "%03i -> %03i dac w %08x z80 %i", Pico.snd.dac_line, scanline, d, is_from_z80); + int cycles = is_from_z80 ? z80_cyclesDone() : z80_cycles_from_68k(); + //elprintf(EL_STATUS, "%03i dac w %08x z80 %i", cycles, d, is_from_z80); ym2612.dacout = ((int)d - 0x80) << 6; if (ym2612.dacen) - PsndDoDAC(scanline); + PsndDoDAC(cycles); return 0; } @@ -1029,13 +1029,9 @@ static int ym2612_write_local(u32 a, u32 d, int is_from_z80) return 0; } case 0x2b: { /* DAC Sel (YM2612) */ - int scanline = get_scanline(is_from_z80); - if (ym2612.dacen != (d & 0x80)) { - ym2612.dacen = d & 0x80; - Pico.snd.dac_line = scanline; - } + ym2612.dacen = d & 0x80; #ifdef __GP2X__ - if (PicoIn.opt & POPT_EXT_FM) YM2612Write_940(a, d, scanline); + if (PicoIn.opt & POPT_EXT_FM) YM2612Write_940(a, d, get_scanline(is_from_z80)); #endif return 0; } @@ -1059,8 +1055,7 @@ static int ym2612_write_local(u32 a, u32 d, int is_from_z80) break; } - int scanline = get_scanline(is_from_z80); - PsndDoFM(scanline); + PsndDoFM(get_scanline(is_from_z80)); #ifdef __GP2X__ if (PicoIn.opt & POPT_EXT_FM) return YM2612Write_940(a, d, get_scanline(is_from_z80)); diff --git a/pico/pico.c b/pico/pico.c index f6b43cd6..2a16a0e2 100644 --- a/pico/pico.c +++ b/pico/pico.c @@ -224,40 +224,45 @@ void PicoLoopPrepare(void) // this table is wrong and should be removed // keeping it for now to compensate wrong timing elswhere, mainly for Outrunners -static const int dma_timings[] = { - 83, 166, 83, 83, // vblank: 32cell: dma2vram dma2[vs|c]ram vram_fill vram_copy - 102, 204, 102, 102, // vblank: 40cell: - 8, 16, 8, 8, // active: 32cell: - 17, 18, 9, 9 // ... +static const int dma_timings[] = { // Q16 + // dma2vram dma2[vs|c]ram vram_fill vram_copy + // VRAM has half the width of VSRAM/CRAM, thus half the performance + ( 83<<16)/488, (166<<16)/488, (165<<16)/488, ( 83<<16)/488, // vblank 32cell + (102<<16)/488, (204<<16)/488, (203<<16)/488, (102<<16)/488, // vblank 40cell + ( 8<<16)/488, ( 16<<16)/488, ( 15<<16)/488, ( 8<<16)/488, // active 32cell + ( 9<<16)/488, ( 18<<16)/488, ( 17<<16)/488, ( 9<<16)/488 // active 40cell }; -static const int dma_bsycles[] = { - (488<<8)/83, (488<<8)/166, (488<<8)/83, (488<<8)/83, - (488<<8)/102, (488<<8)/204, (488<<8)/102, (488<<8)/102, - (488<<8)/8, (488<<8)/16, (488<<8)/8, (488<<8)/8, - (488<<8)/9, (488<<8)/18, (488<<8)/9, (488<<8)/9 +static const int dma_bsycles[] = { // Q16 + (488<<16)/83, (488<<16)/166, (488<<16)/165, (488<<16)/83, + (488<<16)/102, (488<<16)/204, (488<<16)/203, (488<<16)/102, + (488<<16)/8, (488<<16)/16, (488<<16)/15, (488<<16)/8, + (488<<16)/9, (488<<16)/18, (488<<16)/17, (488<<16)/9 }; // grossly inaccurate.. FIXME FIXXXMEE -PICO_INTERNAL int CheckDMA(void) +PICO_INTERNAL int CheckDMA(int cycles) { int burn = 0, xfers_can, dma_op = Pico.video.reg[0x17]>>6; // see gens for 00 and 01 modes int xfers = Pico.m.dma_xfers; int dma_op1; + // safety pin + if (cycles <= 0) return 0; + if(!(dma_op&2)) dma_op = (Pico.video.type==1) ? 0 : 1; // setting dma_timings offset here according to Gens dma_op1 = dma_op; if(Pico.video.reg[12] & 1) dma_op |= 4; // 40 cell mode? if(!(Pico.video.status&8)&&(Pico.video.reg[1]&0x40)) dma_op|=8; // active display? - xfers_can = dma_timings[dma_op]; + xfers_can = (dma_timings[dma_op] * cycles + 0xff) >> 16; if(xfers <= xfers_can) { Pico.video.status &= ~SR_DMA; if (!(dma_op & 2)) - burn = xfers * dma_bsycles[dma_op] >> 8; // have to be approximate because can't afford division.. + burn = xfers * dma_bsycles[dma_op] >> 16; Pico.m.dma_xfers = 0; } else { - if(!(dma_op&2)) burn = 488; + if(!(dma_op&2)) burn = cycles; Pico.m.dma_xfers -= xfers_can; } diff --git a/pico/pico_cmn.c b/pico/pico_cmn.c index 5fa0b16f..8c22c977 100644 --- a/pico/pico_cmn.c +++ b/pico/pico_cmn.c @@ -22,25 +22,29 @@ #endif // sync m68k to Pico.t.m68c_aim +static void SekExecM68k(int cyc_do) +{ + Pico.t.m68c_cnt += cyc_do; + +#if defined(EMU_C68K) + PicoCpuCM68k.cycles = cyc_do; + CycloneRun(&PicoCpuCM68k); + Pico.t.m68c_cnt -= PicoCpuCM68k.cycles; +#elif defined(EMU_M68K) + Pico.t.m68c_cnt += m68k_execute(cyc_do) - cyc_do; +#elif defined(EMU_F68K) + Pico.t.m68c_cnt += fm68k_emulate(&PicoCpuFM68k, cyc_do, 0) - cyc_do; +#endif +} + static void SekSyncM68k(void) { int cyc_do; pprof_start(m68k); pevt_log_m68k_o(EVT_RUN_START); - while ((cyc_do = Pico.t.m68c_aim - Pico.t.m68c_cnt) > 0) { - Pico.t.m68c_cnt += cyc_do; - -#if defined(EMU_C68K) - PicoCpuCM68k.cycles = cyc_do; - CycloneRun(&PicoCpuCM68k); - Pico.t.m68c_cnt -= PicoCpuCM68k.cycles; -#elif defined(EMU_M68K) - Pico.t.m68c_cnt += m68k_execute(cyc_do) - cyc_do; -#elif defined(EMU_F68K) - Pico.t.m68c_cnt += fm68k_emulate(&PicoCpuFM68k, cyc_do, 0) - cyc_do; -#endif - } + while ((cyc_do = Pico.t.m68c_aim - Pico.t.m68c_cnt) > 0) + SekExecM68k(cyc_do); SekCyclesLeft = 0; @@ -68,7 +72,7 @@ static void do_hint(struct PicoVideo *pv) } } -static void do_timing_hacks_as(struct PicoVideo *pv, int vdp_slots) +static void do_timing_hacks_as(struct PicoVideo *pv, int vdp_slots, int cycles) { pv->lwrite_cnt += vdp_slots - Pico.m.dma_xfers * 2; // wrong *2 if (pv->lwrite_cnt > vdp_slots) @@ -76,13 +80,13 @@ static void do_timing_hacks_as(struct PicoVideo *pv, int vdp_slots) else if (pv->lwrite_cnt < 0) pv->lwrite_cnt = 0; if (Pico.m.dma_xfers) - SekCyclesBurn(CheckDMA()); + SekCyclesBurn(CheckDMA(cycles)); } -static void do_timing_hacks_vb(void) +static void do_timing_hacks_vb(int cycles) { if (unlikely(Pico.m.dma_xfers)) - SekCyclesBurn(CheckDMA()); + SekCyclesBurn(CheckDMA(cycles)); } static int PicoFrameHints(void) @@ -151,7 +155,7 @@ static int PicoFrameHints(void) // Run scanline: Pico.t.m68c_line_start = Pico.t.m68c_aim; - do_timing_hacks_as(pv, vdp_slots); + do_timing_hacks_as(pv, vdp_slots, CYCLES_M68K_LINE); CPUS_RUN(CYCLES_M68K_LINE); if (PicoLineHook) PicoLineHook(); @@ -192,19 +196,18 @@ static int PicoFrameHints(void) // also delay between F bit (bit 7) is set in SR and IRQ happens (Ex-Mutants) // also delay between last H-int and V-int (Golden Axe 3) Pico.t.m68c_line_start = Pico.t.m68c_aim; - do_timing_hacks_vb(); + do_timing_hacks_vb(CYCLES_M68K_VINT_LAG); CPUS_RUN(CYCLES_M68K_VINT_LAG); pv->status |= SR_F; pv->pending_ints |= 0x20; if (pv->reg[1] & 0x20) { - Pico.t.m68c_aim = Pico.t.m68c_cnt + 11; // HACK - SekSyncM68k(); + SekExecM68k(11); // HACK elprintf(EL_INTS, "vint: @ %06x [%u]", SekPc, SekCyclesDone()); SekInterrupt(6); } - cycles = SekCyclesDone(); + cycles = Pico.t.m68c_aim; if (Pico.m.z80Run && !Pico.m.z80_reset && (PicoIn.opt&POPT_EN_Z80)) { PicoSyncZ80(cycles); elprintf(EL_INTS, "zint"); @@ -221,6 +224,7 @@ static int PicoFrameHints(void) #endif // Run scanline: + do_timing_hacks_vb(CYCLES_M68K_LINE - CYCLES_M68K_VINT_LAG); CPUS_RUN(CYCLES_M68K_LINE - CYCLES_M68K_VINT_LAG); if (PicoLineHook) PicoLineHook(); @@ -256,7 +260,7 @@ static int PicoFrameHints(void) // Run scanline: Pico.t.m68c_line_start = Pico.t.m68c_aim; - do_timing_hacks_vb(); + do_timing_hacks_vb(CYCLES_M68K_LINE); CPUS_RUN(CYCLES_M68K_LINE); if (PicoLineHook) PicoLineHook(); @@ -267,7 +271,7 @@ static int PicoFrameHints(void) unsigned int l = PicoIn.overclockM68k * lines / 100; while (l-- > 0) { Pico.t.m68c_cnt -= CYCLES_M68K_LINE; - do_timing_hacks_vb(); + do_timing_hacks_vb(CYCLES_M68K_LINE); SekSyncM68k(); } } @@ -293,20 +297,16 @@ static int PicoFrameHints(void) // Run scanline: Pico.t.m68c_line_start = Pico.t.m68c_aim; - do_timing_hacks_as(pv, vdp_slots); + do_timing_hacks_as(pv, vdp_slots, CYCLES_M68K_LINE); CPUS_RUN(CYCLES_M68K_LINE); if (PicoLineHook) PicoLineHook(); pevt_log_m68k_o(EVT_NEXT_LINE); // sync cpus - cycles = SekCyclesDone(); + cycles = Pico.t.m68c_aim; if (Pico.m.z80Run && !Pico.m.z80_reset && (PicoIn.opt&POPT_EN_Z80)) PicoSyncZ80(cycles); - if (PicoIn.sndOut && ym2612.dacen && Pico.snd.dac_line < lines) - PsndDoDAC(lines - 1); - if (PicoIn.sndOut && Pico.snd.psg_line < lines) - PsndDoPSG(lines - 1); #ifdef PICO_CD if (PicoIn.AHW & PAHW_MCD) diff --git a/pico/pico_int.h b/pico/pico_int.h index d3da72ce..58d3da88 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -193,7 +193,7 @@ extern struct DrZ80 drZ80; #define z80_int_assert(a) Cz80_Set_IRQ(&CZ80, 0, (a) ? ASSERT_LINE : CLEAR_LINE) #define z80_nmi() Cz80_Set_IRQ(&CZ80, IRQ_LINE_NMI, 0) -#define z80_cyclesLeft (CZ80.ICount - CZ80.ExtraCycles) +#define z80_cyclesLeft CZ80.ICount #define z80_subCLeft(c) CZ80.ICount -= c #define z80_pc() Cz80_Get_Reg(&CZ80, CZ80_PC) @@ -431,7 +431,9 @@ struct PicoSound short len_use; // adjusted int len_e_add; // for non-int samples/frame int len_e_cnt; - short dac_line; + int dac_val, dac_val2; // last DAC sample + unsigned int dac_mult; // z80 clocks per line in Q16 + unsigned int dac_pos; // last DAC position in Q16 short psg_line; unsigned int fm_mult; // samples per line in Q16 unsigned int fm_pos; // last FM position in Q16 @@ -738,7 +740,7 @@ extern struct Pico Pico; extern struct PicoMem PicoMem; extern void (*PicoResetHook)(void); extern void (*PicoLineHook)(void); -PICO_INTERNAL int CheckDMA(void); +PICO_INTERNAL int CheckDMA(int cycles); PICO_INTERNAL void PicoDetectRegion(void); PICO_INTERNAL void PicoSyncZ80(unsigned int m68k_cycles_done); @@ -872,7 +874,7 @@ PICO_INTERNAL_ASM void wram_1M_to_2M(unsigned char *m); // sound/sound.c PICO_INTERNAL void PsndReset(void); PICO_INTERNAL void PsndStartFrame(void); -PICO_INTERNAL void PsndDoDAC(int line_to); +PICO_INTERNAL void PsndDoDAC(int cycle_to); PICO_INTERNAL void PsndDoPSG(int line_to); PICO_INTERNAL void PsndDoFM(int line_to); PICO_INTERNAL void PsndClear(void); diff --git a/pico/sound/sound.c b/pico/sound/sound.c index 74fb6fcd..155aa452 100644 --- a/pico/sound/sound.c +++ b/pico/sound/sound.c @@ -89,6 +89,8 @@ void PsndRerate(int preserve_state) // samples per line (Q16) Pico.snd.fm_mult = 65536LL * PicoIn.sndRate / (target_fps*target_lines); + // samples per z80 clock (Q20) + Pico.snd.dac_mult = 16 * Pico.snd.fm_mult * 15/7 / 488; // recalculate dac info dac_recalculate(); @@ -117,34 +119,46 @@ PICO_INTERNAL void PsndStartFrame(void) Pico.snd.len_use++; } - Pico.snd.dac_line = Pico.snd.psg_line = 0; - Pico.snd.fm_pos = 0; + Pico.snd.psg_line = 0; } -PICO_INTERNAL void PsndDoDAC(int line_to) +PICO_INTERNAL void PsndDoDAC(int cyc_to) { - int pos, pos1, len; + int pos, len; int dout = ym2612.dacout; - int line_from = Pico.snd.dac_line; - pos = dac_info[line_from]; - pos1 = dac_info[line_to + 1]; - len = pos1 - pos; + // number of samples to fill in buffer (Q20) + len = (cyc_to * Pico.snd.dac_mult) - Pico.snd.dac_pos; + + // update position and calculate buffer offset and length + pos = (Pico.snd.dac_pos+0x80000) >> 20; + Pico.snd.dac_pos += len; + len = ((Pico.snd.dac_pos+0x80000) >> 20) - pos; + + // avoid loss of the 1st sample of a new block (Q rounding issues) + if (pos+len == 0) + len = 1, Pico.snd.dac_pos += 0x80000; if (len <= 0) return; - Pico.snd.dac_line = line_to + 1; - if (!PicoIn.sndOut) return; + // fill buffer, applying a rather weak order 1 bessel IIR on the way + // y[n] = (x[n] + x[n-1])*(1/2) (3dB cutoff at 11025 Hz, no gain) + // 1 sample delay for correct IIR filtering over audio frame boundaries if (PicoIn.opt & POPT_EN_STEREO) { short *d = PicoIn.sndOut + pos*2; - for (; len > 0; len--, d+=2) *d += dout; + // left channel only, mixed ro right channel in mixing phase + *d++ += Pico.snd.dac_val2; d++; + while (--len) *d++ += Pico.snd.dac_val, d++; } else { short *d = PicoIn.sndOut + pos; - for (; len > 0; len--, d++) *d += dout; + *d++ += Pico.snd.dac_val2; + while (--len) *d++ += Pico.snd.dac_val; } + Pico.snd.dac_val2 = (Pico.snd.dac_val + dout) >> 1; + Pico.snd.dac_val = dout; } PICO_INTERNAL void PsndDoPSG(int line_to) @@ -258,6 +272,8 @@ PICO_INTERNAL void PsndClear(void) } if (!(PicoIn.opt & POPT_EN_FM)) memset32(PsndBuffer, 0, PicoIn.opt & POPT_EN_STEREO ? len*2 : len); + // drop pos remainder to avoid rounding errors (not entirely correct though) + Pico.snd.dac_pos = Pico.snd.fm_pos = 0; } @@ -266,6 +282,7 @@ static int PsndRender(int offset, int length) int *buf32; int stereo = (PicoIn.opt & 8) >> 3; int fmlen = ((Pico.snd.fm_pos+0x8000) >> 16) - offset; + int daclen = ((Pico.snd.dac_pos+0x80000) >> 20) - offset; offset <<= stereo; buf32 = PsndBuffer+offset; @@ -277,6 +294,15 @@ static int PsndRender(int offset, int length) return length; } + // Fill up DAC output in case of missing samples (Q16 rounding errors) + if (length-daclen > 0) { + short *dacbuf = PicoIn.sndOut + (daclen << stereo); + for (; length-daclen > 0; daclen++) { + *dacbuf++ += Pico.snd.dac_val; + if (stereo) dacbuf++; + } + } + // Add in parts of the FM buffer not yet done if (length-fmlen > 0) { int *fmbuf = buf32 + (fmlen << stereo); @@ -317,8 +343,8 @@ PICO_INTERNAL void PsndGetSamples(int y) { static int curr_pos = 0; - if (ym2612.dacen && Pico.snd.dac_line < y) - PsndDoDAC(y - 1); + if (ym2612.dacen) + PsndDoDAC(cycles_68k_to_z80(Pico.t.m68c_aim - Pico.t.m68c_frame_start)); PsndDoPSG(y - 1); curr_pos = PsndRender(0, Pico.snd.len_use); @@ -327,7 +353,6 @@ PICO_INTERNAL void PsndGetSamples(int y) PicoIn.writeSound(curr_pos * ((PicoIn.opt & POPT_EN_STEREO) ? 4 : 2)); // clear sound buffer PsndClear(); - Pico.snd.dac_line = y; } PICO_INTERNAL void PsndGetSamplesMS(int y) diff --git a/pico/videoport.c b/pico/videoport.c index d18c2cf9..d196ee4f 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -97,7 +97,7 @@ static void DmaSlow(int len, unsigned int source) Pico.m.dma_xfers = len; if (Pico.m.dma_xfers < len) // lame 16bit var Pico.m.dma_xfers = ~0; - SekCyclesBurnRun(CheckDMA()); + SekCyclesBurnRun(CheckDMA(488 - (SekCyclesDone()-Pico.t.m68c_line_start))); if ((source & 0xe00000) == 0xe00000) { // Ram base = (u16 *)PicoMem.ram; @@ -344,7 +344,8 @@ static NOINLINE void CommandChange(void) static void DrawSync(int blank_on) { - if (Pico.m.scanline < 224 && !(PicoIn.opt & POPT_ALT_RENDERER) && + int lines = Pico.video.reg[1]&0x08 ? 240 : 224; + if (Pico.m.scanline < lines && !(PicoIn.opt & POPT_ALT_RENDERER) && !PicoIn.skipFrame && Pico.est.DrawScanline <= Pico.m.scanline) { //elprintf(EL_ANOMALY, "sync"); PicoDrawSync(Pico.m.scanline, blank_on); @@ -363,7 +364,7 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) { case 0x00: // Data port 0 or 2 // try avoiding the sync.. - if (Pico.m.scanline < 224 && (pvid->reg[1]&0x40) && + if (Pico.m.scanline < (pvid->reg[1]&0x08 ? 240 : 224) && (pvid->reg[1]&0x40) && !(!pvid->pending && ((pvid->command & 0xc00000f0) == 0x40000010 && PicoMem.vsram[pvid->addr>>1] == d)) ) From 7aab47685971b17e42719a7f876a6f469032406a Mon Sep 17 00:00:00 2001 From: kub Date: Sun, 26 Jan 2020 20:12:18 +0100 Subject: [PATCH 0255/1110] audio fixes for overdrive demo --- cpu/cz80/cz80.c | 3 ++- cpu/cz80/cz80_op.c | 1 - pico/memory.c | 8 +++++--- pico/pico_int.h | 11 ++++++----- pico/sound/ym2612_arm.S | 3 ++- 5 files changed, 15 insertions(+), 11 deletions(-) diff --git a/cpu/cz80/cz80.c b/cpu/cz80/cz80.c index 6b9afcde..51abc40f 100644 --- a/cpu/cz80/cz80.c +++ b/cpu/cz80/cz80.c @@ -278,7 +278,8 @@ Cz80_Check_Interrupt: CPU->ICount -= CPU->ExtraCycles; CPU->ExtraCycles = 0; } - goto Cz80_Exec; + if (!CPU->HaltState) + goto Cz80_Exec; } } else CPU->ICount = 0; diff --git a/cpu/cz80/cz80_op.c b/cpu/cz80/cz80_op.c index 5d623caf..317e9587 100644 --- a/cpu/cz80/cz80_op.c +++ b/cpu/cz80/cz80_op.c @@ -687,7 +687,6 @@ OP_CCF: OP(0x76): // HALT OP_HALT: CPU->HaltState = 1; -// CPU->ICount = 0; goto Cz80_Check_Interrupt; OP(0xf3): // DI diff --git a/pico/memory.c b/pico/memory.c index 1d9b9135..d61491c1 100644 --- a/pico/memory.c +++ b/pico/memory.c @@ -733,8 +733,10 @@ static void PicoWrite8_vdp(u32 a, u32 d) static void PicoWrite16_vdp(u32 a, u32 d) { - if ((a & 0x00f9) == 0x0010) // PSG Sound + if ((a & 0x00f9) == 0x0010) { // PSG Sound psg_write_68k(d); + return; + } if ((a & 0x00e0) == 0x0000) { PicoVideoWrite(a, d); return; @@ -898,10 +900,10 @@ void ym2612_sync_timers(int z80_cycles, int mode_old, int mode_new) int xcycles = z80_cycles << 8; /* check for overflows */ - if ((mode_old & 4) && xcycles > Pico.t.timer_a_next_oflow) + if ((mode_old & 4) && xcycles >= Pico.t.timer_a_next_oflow) ym2612.OPN.ST.status |= 1; - if ((mode_old & 8) && xcycles > Pico.t.timer_b_next_oflow) + if ((mode_old & 8) && xcycles >= Pico.t.timer_b_next_oflow) ym2612.OPN.ST.status |= 2; /* update timer a */ diff --git a/pico/pico_int.h b/pico/pico_int.h index 58d3da88..70bfa710 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -822,10 +822,10 @@ void ym2612_pack_state(void); void ym2612_unpack_state(void); #define TIMER_NO_OFLOW 0x70000000 -// tA = 72 * (1024 - NA) / M -#define TIMER_A_TICK_ZCYCLES 17203 -// tB = 1152 * (256 - NA) / M -#define TIMER_B_TICK_ZCYCLES 262800 // 275251 broken, see Dai Makaimura +// tA = 72 * (1024 - NA) / M, with M = mclock/2 -> tick = 72 * 2/mclock +#define TIMER_A_TICK_ZCYCLES 17203 // zcycles = Q8*tick*zclock = Q8*77*2*7/15 +// tB = 1152 * (256 - NA) / M, +#define TIMER_B_TICK_ZCYCLES 275251 // zcycles = Q8*1152*2*7/15 #define timers_cycle() \ if (Pico.t.timer_a_next_oflow > 0 && Pico.t.timer_a_next_oflow < TIMER_NO_OFLOW) \ @@ -837,7 +837,8 @@ void ym2612_unpack_state(void); #define timers_reset() \ Pico.t.timer_a_next_oflow = Pico.t.timer_b_next_oflow = TIMER_NO_OFLOW; \ Pico.t.timer_a_step = TIMER_A_TICK_ZCYCLES * 1024; \ - Pico.t.timer_b_step = TIMER_B_TICK_ZCYCLES * 256; + Pico.t.timer_b_step = TIMER_B_TICK_ZCYCLES * 256; \ + ym2612.OPN.ST.status &= ~3; // videoport.c diff --git a/pico/sound/ym2612_arm.S b/pico/sound/ym2612_arm.S index 4cb92850..e3ec370d 100644 --- a/pico/sound/ym2612_arm.S +++ b/pico/sound/ym2612_arm.S @@ -206,8 +206,9 @@ .macro update_ssg_eg ldrh r0, [r5,#0x30] @ ssg+ssgn ldrb r2, [r5,#0x17] @ state + and r3, r0, #0x08 + cmp r3, #0x08 @ ssg enabled && ldrh r3, [r5,#0x1a] @ volume - cmp r0, #0x08 @ ssg enabled && cmpge r2, #EG_REL+1 @ state > EG_REL && cmpge r3, #0x200 @ volume >= 0x200? blt 9f From 5f0d224e1853b9ff5872999c9689e1bef53f632e Mon Sep 17 00:00:00 2001 From: kub Date: Sun, 26 Jan 2020 20:40:07 +0100 Subject: [PATCH 0256/1110] sprite rendering improvements for masking and limit edge cases --- pico/draw.c | 109 +++++++++++++++++++++++++++++------------------ pico/draw2.c | 3 +- pico/draw_arm.S | 63 ++++++++++++++++++--------- pico/pico_int.h | 4 +- pico/videoport.c | 3 +- 5 files changed, 117 insertions(+), 65 deletions(-) diff --git a/pico/draw.c b/pico/draw.c index 7fd93f8e..652b9df7 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -53,7 +53,11 @@ static int HighPreSpr[80*2+1]; // slightly preprocessed sprites #define SPRL_HAVE_LO 0x40 // *lo* #define SPRL_MAY_HAVE_OP 0x20 // may have operator sprites on the line #define SPRL_LO_ABOVE_HI 0x10 // low priority sprites may be on top of hi -unsigned char HighLnSpr[240][3 + MAX_LINE_SPRITES]; // sprite_count, ^flags, tile_count, [spritep]... +#define SPRL_HAVE_X 0x08 // have sprites with x != 0 +#define SPRL_TILE_OVFL 0x04 // tile limit exceeded on previous line +#define SPRL_HAVE_MASK0 0x02 // have sprite with x == 0 in 1st slot +#define SPRL_MASKED 0x01 // lo prio masking by sprite with x == 0 active +unsigned char HighLnSpr[240][4+MAX_LINE_SPRITES+1]; // sprite_count, ^flags, tile_count, sprites_total, [spritep]..., last_width int rendstatus_old; int rendlines; @@ -706,7 +710,7 @@ last_cut_tile: // Index + 0 : hhhhvvvv ab--hhvv yyyyyyyy yyyyyyyy // a: offscreen h, b: offs. v, h: horiz. size // Index + 4 : xxxxxxxx xxxxxxxx pccvhnnn nnnnnnnn // x: x coord + 8 -static void DrawSprite(int *sprite, int sh) +static void DrawSprite(int *sprite, int sh, int w) { void (*fTileFunc)(unsigned char *pd, unsigned int pack, int pal); unsigned char *pd = Pico.est.HighCol; @@ -746,6 +750,7 @@ static void DrawSprite(int *sprite, int sh) else fTileFunc=TileNorm; } + if (w) width = w; // tile limit for (; width; width--,sx+=8,tile+=delta) { unsigned int pack; @@ -833,12 +838,13 @@ static NOINLINE void DrawAllSpritesInterlace(int pri, int sh) struct PicoVideo *pvid=&Pico.video; int i,u,table,link=0,sline=Pico.est.DrawScanline<<1; unsigned int *sprites[80]; // Sprite index + int max_sprites = Pico.video.reg[12]&1 ? 80 : 64; table=pvid->reg[5]&0x7f; if (pvid->reg[12]&1) table&=0x7e; // Lowest bit 0 in 40-cell mode table<<=8; // Get sprite table address/2 - for (i=u=0; u < 80 && i < 21; u++) + for (i = u = 0; u < max_sprites && link < max_sprites; u++) { unsigned int *sprite; int code, sx, sy, height; @@ -888,15 +894,18 @@ static void DrawSpritesSHi(unsigned char *sprited, const struct PicoEState *est) void (*fTileFunc)(unsigned char *pd, unsigned int pack, int pal); unsigned char *pd = Pico.est.HighCol; unsigned char *p; - int cnt; + int cnt, w; cnt = sprited[0] & 0x7f; if (cnt == 0) return; - p = &sprited[3]; + p = &sprited[4]; + if ((sprited[1] & (SPRL_TILE_OVFL|SPRL_HAVE_MASK0)) == (SPRL_TILE_OVFL|SPRL_HAVE_MASK0)) + return; // masking effective due to tile overflow // Go through sprites backwards: - for (cnt--; cnt >= 0; cnt--) + w = p[cnt]; // possibly clipped width of last sprite + for (cnt--; cnt >= 0; cnt--, w = 0) { int *sprite, code, pal, tile, sx, sy; int offs, delta, width, height, row; @@ -940,6 +949,7 @@ static void DrawSpritesSHi(unsigned char *sprited, const struct PicoEState *est) tile &= 0x7ff; tile<<=4; tile+=(row&7)<<1; // Tile address delta<<=4; // Delta of address + if (w) width = w; // tile limit for (; width; width--,sx+=8,tile+=delta) { unsigned int pack; @@ -967,7 +977,9 @@ static void DrawSpritesHiAS(unsigned char *sprited, int sh) if (cnt == 0) return; memset(mb, 0xff, sizeof(mb)); - p = &sprited[3]; + p = &sprited[4]; + if ((sprited[1] & (SPRL_TILE_OVFL|SPRL_HAVE_MASK0)) == (SPRL_TILE_OVFL|SPRL_HAVE_MASK0)) + return; // masking effective due to tile overflow // Go through sprites: for (entry = 0; entry < cnt; entry++) @@ -1019,6 +1031,7 @@ static void DrawSpritesHiAS(unsigned char *sprited, int sh) tile &= 0x7ff; tile<<=4; tile+=(row&7)<<1; // Tile address delta<<=4; // Delta of address + if (entry+1 == cnt) width = p[entry+1]; // last sprite width limited? for (; width; width--,sx+=8,tile+=delta) { unsigned int pack; @@ -1065,10 +1078,10 @@ static NOINLINE void PrepareSprites(int full) { int pack; // updates: tilecode, sx - for (u=0; u < max_sprites && (pack = *pd); u++, pd+=2) + for (u=0; u < max_sprites && link < max_sprites && (pack = *pd); u++, pd+=2) { unsigned int *sprite; - int code2, sx, sy, height; + int code2, sx, sy, height, width; sprite=(unsigned int *)(PicoMem.vram+((table+(link<<2))&0x7ffc)); // Find sprite @@ -1078,25 +1091,29 @@ static NOINLINE void PrepareSprites(int full) sx -= 0x78; // Get X coordinate + 8 sy = (pack << 16) >> 16; height = (pack >> 24) & 0xf; + width = (pack >> 28); if (sy < max_lines && - sy + (height<<3) > est->DrawScanline && // sprite onscreen (y)? - (sx > -24 || sx < max_width)) // onscreen x + sy + (height<<3) > est->DrawScanline) // sprite onscreen (y)? { int y = (sy >= est->DrawScanline) ? sy : est->DrawScanline; int entry = ((pd - HighPreSpr) / 2) | ((code2>>8)&0x80); for (; y < sy + (height<<3) && y < max_lines; y++) { int i, cnt; - cnt = HighLnSpr[y][0] & 0x7f; - if (cnt >= max_line_sprites) continue; // sprite limit? + cnt = HighLnSpr[y][0]; + if (HighLnSpr[y][3] >= max_line_sprites) continue; // sprite limit? for (i = 0; i < cnt; i++) - if (((HighLnSpr[y][3+i] ^ entry) & 0x7f) == 0) goto found; + if (((HighLnSpr[y][4+i] ^ entry) & 0x7f) == 0) goto found; // this sprite was previously missing - HighLnSpr[y][3+cnt] = entry; - HighLnSpr[y][0] = cnt + 1; + HighLnSpr[y][3] ++; + if (sx > -24 && sx < max_width) { // onscreen x + HighLnSpr[y][4+cnt] = entry; // XXX wrong sequence? + HighLnSpr[y][5+cnt] = width; // XXX should count tiles for limit + HighLnSpr[y][0] = cnt + 1; + } found:; if (entry & 0x80) HighLnSpr[y][1] |= SPRL_HAVE_HI; @@ -1118,7 +1135,7 @@ found:; for (u = 0; u < max_lines; u++) *((int *)&HighLnSpr[u][0]) = 0; - for (u = 0; u < max_sprites; u++) + for (u = 0; u < max_sprites && link < max_sprites; u++) { unsigned int *sprite; int code, code2, sx, sy, hv, height, width; @@ -1138,7 +1155,7 @@ found:; if (sy < max_lines && sy + (height<<3) > est->DrawScanline) // sprite onscreen (y)? { - int entry, y, sx_min, onscr_x, maybe_op = 0; + int entry, y, w, sx_min, onscr_x, maybe_op = 0; sx_min = 8-(width<<3); onscr_x = sx_min < sx && sx < max_width; @@ -1149,29 +1166,36 @@ found:; y = (sy >= est->DrawScanline) ? sy : est->DrawScanline; for (; y < sy + (height<<3) && y < max_lines; y++) { - unsigned char *p = &HighLnSpr[y][0]; + unsigned char *p = &HighLnSpr[y][0]; int cnt = p[0]; - if (cnt >= max_line_sprites) continue; // sprite limit? + if (p[3] >= max_line_sprites) continue; // sprite limit? + if ((p[1] & SPRL_MASKED) && !(entry & 0x80)) continue; // masked? - if (p[2] >= max_line_sprites*2) { // tile limit? - p[0] |= 0x80; - continue; + w = width; + if (p[2] + width > max_line_sprites*2) { // tile limit? + if (y+1 < 240) HighLnSpr[y+1][1] |= SPRL_TILE_OVFL; + if (p[2] >= max_line_sprites*2) continue; + w = max_line_sprites*2 - p[2]; } - p[2] += width; + p[2] += w; + p[3] ++; if (sx == -0x78) { - if (cnt > 0) - p[0] |= 0x80; // masked, no more sprites for this line - continue; - } - // must keep the first sprite even if it's offscreen, for masking - if (cnt > 0 && !onscr_x) continue; // offscreen x + if (p[1] & (SPRL_HAVE_X|SPRL_TILE_OVFL)) + p[1] |= SPRL_MASKED; // masked, no more low sprites for this line + if (!(p[1] & SPRL_HAVE_X) && cnt == 0) + p[1] |= SPRL_HAVE_MASK0; // 1st sprite is masking + } else + p[1] |= SPRL_HAVE_X; - p[3+cnt] = entry; + if (!onscr_x) continue; // offscreen x + + p[4+cnt] = entry; + p[5+cnt] = w; // width clipped by tile limit for sprite renderer p[0] = cnt + 1; p[1] |= (entry & 0x80) ? SPRL_HAVE_HI : SPRL_HAVE_LO; p[1] |= maybe_op; // there might be op sprites on this line - if (cnt > 0 && (code2 & 0x8000) && !(p[3+cnt-1]&0x80)) + if (cnt > 0 && (code2 & 0x8000) && !(p[4+cnt-1]&0x80)) p[1] |= SPRL_LO_ABOVE_HI; } } @@ -1189,9 +1213,10 @@ found:; for (u = 0; u < max_lines; u++) { int y; - printf("c%03i: %2i, %2i: ", u, HighLnSpr[u][0] & 0x7f, HighLnSpr[u][2]); - for (y = 0; y < HighLnSpr[u][0] & 0x7f; y++) - printf(" %i", HighLnSpr[u][y+3]); + printf("c%03i: f %x c %2i/%2i w %2i: ", u, HighLnSpr[u][1], + HighLnSpr[u][0], HighLnSpr[u][3], HighLnSpr[u][2]); + for (y = 0; y < HighLnSpr[u][0]; y++) + printf(" %i", HighLnSpr[u][y+4]); printf("\n"); } #endif @@ -1203,20 +1228,22 @@ static void DrawAllSprites(unsigned char *sprited, int prio, int sh, struct PicoEState *est) { unsigned char *p; - int cnt; + int cnt, w = sprited[2]; cnt = sprited[0] & 0x7f; if (cnt == 0) return; - p = &sprited[3]; + p = &sprited[4]; + if ((sprited[1] & (SPRL_TILE_OVFL|SPRL_HAVE_MASK0)) == (SPRL_TILE_OVFL|SPRL_HAVE_MASK0)) + return; // masking effective due to tile overflow // Go through sprites backwards: - for (cnt--; cnt >= 0; cnt--) + w = p[cnt]; // possibly clipped width of last sprite + for (cnt--; cnt >= 0; cnt--, w = 0) { - int offs; + int *sp = HighPreSpr + (p[cnt]&0x7f) * 2; if ((p[cnt] >> 7) != prio) continue; - offs = (p[cnt]&0x7f) * 2; - DrawSprite(HighPreSpr + offs, sh); + DrawSprite(sp, sh, w); } } diff --git a/pico/draw2.c b/pico/draw2.c index 38a90ef3..85e2b275 100644 --- a/pico/draw2.c +++ b/pico/draw2.c @@ -420,12 +420,13 @@ static void DrawAllSpritesFull(int prio, int maxwidth) int i,u,link=0; unsigned int *sprites[80]; // Sprites int y_min=START_ROW*8, y_max=END_ROW*8; // for a simple sprite masking + int max_sprites = Pico.video.reg[12]&1 ? 80 : 64; table=pvid->reg[5]&0x7f; if (pvid->reg[12]&1) table&=0x7e; // Lowest bit 0 in 40-cell mode table<<=8; // Get sprite table address/2 - for (i=u=0; u < 80; u++) + for (i = u = 0; u < max_sprites && link < max_sprites; u++) { unsigned int *sprite=NULL; int code, code2, sx, sy, height; diff --git a/pico/draw_arm.S b/pico/draw_arm.S index 2efc804c..fb6d0950 100644 --- a/pico/draw_arm.S +++ b/pico/draw_arm.S @@ -942,17 +942,23 @@ DrawTilesFromCache: .global DrawSpritesSHi DrawSpritesSHi: - ldr r3, [r0] + ldrb r3, [r0] mov r12,#0xff ands r3, r3, #0x7f bxeq lr - stmfd sp!, {r1,r4-r11,lr} @ +est - strb r12,[r0,#2] @ set end marker - add r10,r0, #3 @ r10=HighLnSpr end + stmfd sp!, {r1,r3-r11,lr} @ +est + strb r12,[r0,#3] @ set end marker + ldrb r12,[r0,#1] + add r10,r0, #4 @ r10=HighLnSpr end + mvn r12,r12 + tst r12,#0x6 @ masking in slot 1 and tile ovfl? + ldmeqfd sp!, {r1,r3-r11,pc} add r10,r10,r3 @ r10=HighLnSpr end + ldrb r12,[r10,#0] @ width of last sprite ldr r11,[r1, #OFS_EST_HighCol] + str r12,[sp, #4] mov r12,#0xf ldr lr, [r1, #OFS_EST_PicoMem_vram] @@ -963,7 +969,7 @@ DrawSpriteSHi: ldr r7, [sp] @ est ldr r1, [r7, #OFS_EST_HighPreSpr] cmp r0, #0xff - ldmeqfd sp!, {r1,r4-r11,pc} @ end of list + ldmeqfd sp!, {r1,r3-r11,pc} @ end of list and r0, r0, #0x7f add r0, r1, r0, lsl #3 @@ -1007,10 +1013,16 @@ DrawSpriteSHi: and r7, r7, #7 add r8, r8, r7, lsl #1 @ tile+=(row&7)<<1; // Tile address + ldr r0, [sp, #4] + add r6, r6, #1 @ inc now + cmp r0, #0 @ check width of last sprite + movne r6, r0 + movne r0, #0 + strne r0, [sp, #4] + mov r5, r5, lsl #4 @ delta<<=4; // Delta of address mov r3, r4, lsr #9 @ r3=pal=((code>>9)&0x30); - add r6, r6, #1 @ inc now adds r0, r2, #0 @ mov sx to r0 and set ZV flags b .dsprShi_loop_enter @@ -1126,11 +1138,18 @@ DrawAllSprites: @ time to do some real work stmfd sp!, {r1,r3-r11,lr} @ +sh|prio<<1 +est mov r12,#0xff - strb r12,[r0,#2] @ set end marker - add r10,r0, #3 + strb r12,[r0,#3] @ set end marker + ldrb r12,[r0,#1] + add r10,r0 ,#4 + mvn r12,r12 + tst r12,#0x6 @ masking in slot 1 and tile ovfl? + ldmeqfd sp!, {r1,r3-r11,pc} add r10,r10,r2 @ r10=HighLnSpr end + ldrb r12,[r10,#0] @ width of last sprite ldr r11,[r3, #OFS_EST_HighCol] + orr r1 ,r1 ,r12,lsl #24 + str r1, [sp] mov r12,#0xf ldr lr, [r3, #OFS_EST_PicoMem_vram] @@ -1140,13 +1159,13 @@ DrawAllSprites: DrawSprite: @ draw next sprite ldrb r0, [r10,#-1]! - ldr r8, [sp] @ sh|prio<<1 + ldr r4, [sp] @ sh|prio<<1|lastw<<24 ldr r7, [sp, #4] @ est - mov r2, r0, lsr #7 + mov r2, r0, lsl #24 cmp r0, #0xff ldmeqfd sp!, {r1,r3-r11,pc} @ end of list - cmp r2, r8, lsr #1 - bne DrawSprite @ wrong priority + eor r2, r2, r4, lsl #30 + bmi DrawSprite @ wrong priority ldr r1, [r7, #OFS_EST_HighPreSpr] and r0, r0, #0x7f add r0, r1, r0, lsl #3 @@ -1158,20 +1177,20 @@ DrawSprite: mov r5, r3, lsr #24 and r5, r5, #7 @ r5=height - mov r4, r3, lsl #16 @ r4=sy<<16 (tmp) + mov r8, r3, lsl #16 @ r8=sy<<16 (tmp) ldr r9, [r0, #4] - sub r7, r7, r4, asr #16 @ r7=row=DrawScanline-sy + sub r7, r7, r8, asr #16 @ r7=row=DrawScanline-sy mov r2, r9, asr #16 @ r2=sx mov r9, r9, lsl #16 mov r9, r9, lsr #16 - orr r9, r9, r8, lsl #31 @ r9=code|sh[31] + orr r9, r9, r4, lsl #31 @ r9=code|sh[31] tst r9, #0x1000 - movne r4, r5, lsl #3 - subne r4, r4, #1 - subne r7, r4, r7 @ if (code&0x1000) row=(height<<3)-1-row; // Flip Y + movne r8, r5, lsl #3 + subne r8, r8, #1 + subne r7, r8, r7 @ if (code&0x1000) row=(height<<3)-1-row; // Flip Y add r8, r9, r7, lsr #3 @ tile+=row>>3; // Tile number increases going down tst r9, #0x0800 @@ -1183,7 +1202,12 @@ DrawSprite: and r7, r7, #7 add r8, r8, r7, lsl #1 @ tile+=(row&7)<<1; // Tile address -.dspr_continue: + add r6, r6, #1 @ inc now + cmp r4, #0x1000000 @ check width of last sprite + movhs r6, r4, lsr #24 + bichs r4, r4, #0xff000000 + strhs r4, [sp] + @ cache some stuff to avoid mem access mov r5, r5, lsl #4 @ delta<<=4; // Delta of address and r4, r9, #0x6000 @@ -1193,7 +1217,6 @@ DrawSprite: mov r3, r4, lsr #9 @ r3=pal=((code>>9)&0x30); orrmi r3, r3, #0x40 @ for sh/hi - add r6, r6, #1 @ inc now adds r0, r2, #0 @ mov sx to r0 and set ZV flags b .dspr_loop_enter diff --git a/pico/pico_int.h b/pico/pico_int.h index 70bfa710..a24fc6f6 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -667,8 +667,8 @@ void FinalizeLine555(int sh, int line, struct PicoEState *est); void PicoDrawSetOutBufMD(void *dest, int increment); extern int (*PicoScanBegin)(unsigned int num); extern int (*PicoScanEnd)(unsigned int num); -#define MAX_LINE_SPRITES 29 -extern unsigned char HighLnSpr[240][3 + MAX_LINE_SPRITES]; +#define MAX_LINE_SPRITES 27 // +1 last sprite width, +4 hdr; total 32 +extern unsigned char HighLnSpr[240][4+MAX_LINE_SPRITES+1]; extern void *DrawLineDestBase; extern int DrawLineDestIncrement; diff --git a/pico/videoport.c b/pico/videoport.c index d196ee4f..c2fbd0ca 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -200,6 +200,7 @@ static void DmaSlow(int len, unsigned int source) a = (a + inc) & 0x1ffff; } Pico.video.addr_u = a >> 16; + Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; break; default: @@ -266,6 +267,7 @@ static NOINLINE void DmaFill(int data) // Increment address register a = (u16)(a + inc); } + Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; break; case 3: // cram case 5: { // vsram @@ -289,7 +291,6 @@ static NOINLINE void DmaFill(int data) Pico.video.reg[0x15] = source; Pico.video.reg[0x16] = source >> 8; - Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; } static NOINLINE void CommandDma(void) From 2d5b6a66c160ba97e3a932ff04da3f3a6fc3a6cb Mon Sep 17 00:00:00 2001 From: kub Date: Sun, 26 Jan 2020 20:43:05 +0100 Subject: [PATCH 0257/1110] added debug reg sprite plane support (fixes some issues in overdrive 2 demo) --- pico/draw.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) diff --git a/pico/draw.c b/pico/draw.c index 652b9df7..01153f3e 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -223,6 +223,14 @@ TileFlipMakerAS(TileFlipAS_onlymark, pix_sh_as_onlymark) TileNormMaker(TileNorm_and, pix_and) TileFlipMaker(TileFlip_and, pix_and) +// forced sprite draw (through debug reg) +#define pix_sh_and(x) /* XXX is there S/H with forced draw? */ \ + if (t>=0xe) pd[x]=(pd[x]&0x3f)|(t<<6); /* c0 shadow, 80 hilight */ \ + else pd[x] = (pd[x] & 0xc0) | (pd[x] & (pal | t)) + +TileNormMaker(TileNormSH_and, pix_sh_and) +TileFlipMaker(TileFlipSH_and, pix_sh_and) + // -------------------------------------------- #ifndef _ASM_DRAW_C @@ -1045,6 +1053,66 @@ static void DrawSpritesHiAS(unsigned char *sprited, int sh) } } +static void DrawSpritesForced(unsigned char *sprited) +{ + void (*fTileFunc)(unsigned char *pd, unsigned int pack, int pal); + unsigned char *pd = Pico.est.HighCol; + unsigned char *p; + int entry, cnt; + + cnt = sprited[0] & 0x7f; + if (cnt == 0) return; + + p = &sprited[4]; + if ((sprited[1] & (SPRL_TILE_OVFL|SPRL_HAVE_MASK0)) == (SPRL_TILE_OVFL|SPRL_HAVE_MASK0)) + return; // masking effective due to tile overflow + + // Go through sprites: + for (entry = 0; entry < cnt; entry++) + { + int *sprite, code, pal, tile, sx, sy; + int offs, delta, width, height, row; + + offs = (p[entry] & 0x7f) * 2; + sprite = HighPreSpr + offs; + code = sprite[1]; + pal = (code>>9)&0x30; + + if (code&0x800) fTileFunc = TileFlipSH_and; + else fTileFunc = TileNormSH_and; + + // parse remaining sprite data + sy=sprite[0]; + sx=code>>16; // X + width=sy>>28; + height=(sy>>24)&7; // Width and height in tiles + sy=(sy<<16)>>16; // Y + + row=Pico.est.DrawScanline-sy; // Row of the sprite we are on + + if (code&0x1000) row=(height<<3)-1-row; // Flip Y + + tile=code + (row>>3); // Tile number increases going down + delta=height; // Delta to increase tile by going right + if (code&0x0800) { tile+=delta*(width-1); delta=-delta; } // Flip X + + tile &= 0x7ff; tile<<=4; tile+=(row&7)<<1; // Tile address + delta<<=4; // Delta of address + + if (entry+1 == cnt) width = p[entry+1]; // last sprite width limited? + for (; width; width--,sx+=8,tile+=delta) + { + unsigned int pack; + + if(sx<=0) continue; + if(sx>=328) break; // Offscreen + + pack = *(unsigned int *)(PicoMem.vram + (tile & 0x7fff)); + fTileFunc(pd + sx, pack, pal); + } + } +} + // Index + 0 : ----hhvv -lllllll -------y yyyyyyyy // Index + 4 : -------x xxxxxxxx pccvhnnn nnnnnnnn @@ -1529,6 +1597,8 @@ static int DrawDisplay(int sh) DrawTilesFromCacheForced(HighCacheB); else if (pvid->debug_p & PVD_FORCE_A) DrawTilesFromCacheForced(HighCacheA); + else if (pvid->debug_p & PVD_FORCE_S) + DrawSpritesForced(sprited); #if 0 { @@ -1621,7 +1691,7 @@ static void PicoLine(int line, int offs, int sh, int bgc) return; } - if (Pico.video.debug_p & (PVD_FORCE_A | PVD_FORCE_B)) + if (Pico.video.debug_p & (PVD_FORCE_A | PVD_FORCE_B | PVD_FORCE_S)) bgc = 0x3f; // Draw screen: From 1259ac4f6085ec3663fe96290b0d027da618a746 Mon Sep 17 00:00:00 2001 From: kub Date: Sun, 26 Jan 2020 20:46:21 +0100 Subject: [PATCH 0258/1110] VDP timing improvements --- pico/pico.c | 3 ++- pico/pico_cmn.c | 2 +- pico/pico_int.h | 5 ++--- pico/videoport.c | 10 ++++++---- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/pico/pico.c b/pico/pico.c index 2a16a0e2..b65b7de8 100644 --- a/pico/pico.c +++ b/pico/pico.c @@ -254,7 +254,7 @@ PICO_INTERNAL int CheckDMA(int cycles) dma_op1 = dma_op; if(Pico.video.reg[12] & 1) dma_op |= 4; // 40 cell mode? if(!(Pico.video.status&8)&&(Pico.video.reg[1]&0x40)) dma_op|=8; // active display? - xfers_can = (dma_timings[dma_op] * cycles + 0xff) >> 16; + xfers_can = (dma_timings[dma_op] * cycles + 0x8000) >> 16; if(xfers <= xfers_can) { Pico.video.status &= ~SR_DMA; @@ -265,6 +265,7 @@ PICO_INTERNAL int CheckDMA(int cycles) if(!(dma_op&2)) burn = cycles; Pico.m.dma_xfers -= xfers_can; } + Pico.t.dma_end = SekCyclesDone() + burn; elprintf(EL_VDPDMA, "~Dma %i op=%i can=%i burn=%i [%u]", Pico.m.dma_xfers, dma_op1, xfers_can, burn, SekCyclesDone()); diff --git a/pico/pico_cmn.c b/pico/pico_cmn.c index 8c22c977..b7e7d835 100644 --- a/pico/pico_cmn.c +++ b/pico/pico_cmn.c @@ -56,10 +56,10 @@ static void SekSyncM68k(void) static __inline void SekRunM68k(int cyc) { Pico.t.m68c_aim += cyc; + Pico.t.m68c_cnt += cyc >> 6; // refresh slowdowns cyc = Pico.t.m68c_aim - Pico.t.m68c_cnt; if (cyc <= 0) return; - Pico.t.m68c_cnt += cyc >> 6; // refresh slowdowns SekSyncM68k(); } diff --git a/pico/pico_int.h b/pico/pico_int.h index a24fc6f6..357de4a9 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -137,9 +137,7 @@ extern m68ki_cpu_core PicoCpuMM68k, PicoCpuMS68k; // burn cycles while not in SekRun() and while in #define SekCyclesBurn(c) Pico.t.m68c_cnt += c -#define SekCyclesBurnRun(c) { \ - SekCyclesLeft -= c; \ -} +#define SekCyclesBurnRun(c) SekCyclesLeft -= c // note: sometimes may extend timeslice to delay an irq #define SekEndRun(after) { \ @@ -421,6 +419,7 @@ struct PicoTiming unsigned int z80c_aim; int z80_scanline; + unsigned int dma_end; // end of current DMA op (m68k cycles) int timer_a_next_oflow, timer_a_step; // in z80 cycles int timer_b_next_oflow, timer_b_step; }; diff --git a/pico/videoport.c b/pico/videoport.c index c2fbd0ca..16a73119 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -376,12 +376,12 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) pvid->pending=0; } - if (!(pvid->status & SR_VB) && !(PicoIn.opt&POPT_DIS_VDP_FIFO)) + if (!(pvid->status & SR_VB) && (pvid->reg[1]&0x40) && !(PicoIn.opt&POPT_DIS_VDP_FIFO)) { int use = pvid->type == 1 ? 2 : 1; pvid->lwrite_cnt -= use; if (pvid->lwrite_cnt < 0) - SekCyclesLeft = 0; + SekCyclesBurnRun(488 - (SekCyclesDone()-Pico.t.m68c_line_start)); elprintf(EL_ASVDP, "VDP data write: [%04x] %04x [%u] {%i} #%i @ %06x", Pico.video.addr, d, SekCyclesDone(), Pico.video.type, pvid->lwrite_cnt, SekPc); } @@ -509,9 +509,11 @@ static u32 SrLow(const struct PicoVideo *pv) { unsigned int c, d = pv->status; - c = SekCyclesDone() - Pico.t.m68c_line_start - 39; - if (c < 92) + c = SekCyclesDone(); + if (c - Pico.t.m68c_line_start - 39 < 92) d |= SR_HB; + if (CYCLES_GT(c, Pico.t.dma_end)) + d &= ~SR_DMA; return d; } From e1e7d1ed1cea6048063728ddf7a90bc1c8331102 Mon Sep 17 00:00:00 2001 From: kub Date: Sun, 26 Jan 2020 20:48:25 +0100 Subject: [PATCH 0259/1110] improved VRAM128K support (overdrive 2) --- pico/videoport.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/pico/videoport.c b/pico/videoport.c index 16a73119..cdc5796c 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -22,11 +22,12 @@ static __inline void AutoIncrement(void) Pico.video.addr=(unsigned short)(Pico.video.addr+Pico.video.reg[0xf]); } -static NOINLINE void VideoWrite128(u32 a, u16 d) +static NOINLINE unsigned int VideoWrite128(u32 a, u16 d) { // nasty a = ((a & 2) >> 1) | ((a & 0x400) >> 9) | (a & 0x3FC) | ((a & 0x1F800) >> 1); ((u8 *)PicoMem.vram)[a] = d; + return a; } static void VideoWrite(u16 d) @@ -38,16 +39,19 @@ static void VideoWrite(u16 d) case 1: if (a & 1) d = (u16)((d << 8) | (d >> 8)); PicoMem.vram [(a >> 1) & 0x7fff] = d; - if (a - ((unsigned)(Pico.video.reg[5]&0x7f) << 9) < 0x400) + if ((unsigned)(a - ((Pico.video.reg[5]&0x7f) << 9)) < 0x400) Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; break; case 3: if (PicoMem.cram [(a >> 1) & 0x3f] != d) Pico.m.dirtyPal = 1; PicoMem.cram [(a >> 1) & 0x3f] = d; break; case 5: PicoMem.vsram[(a >> 1) & 0x3f] = d; break; - case 0x81: - a |= Pico.video.addr_u << 16; - VideoWrite128(a, d); - break; + case 0x81: if (a & 1) + d = (u16)((d << 8) | (d >> 8)); + a |= Pico.video.addr_u << 16; + a = VideoWrite128(a, d); + if ((unsigned)(a - ((Pico.video.reg[5]&0x7f) << 9)) < 0x400) + Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; + break; //default:elprintf(EL_ANOMALY, "VDP write %04x with bad type %i", d, Pico.video.type); break; } @@ -276,6 +280,16 @@ static NOINLINE void DmaFill(int data) if (!once++) elprintf(EL_STATUS|EL_ANOMALY|EL_VDPDMA, "TODO: cram/vsram fill"); } + case 0x81: + for (l = len; l; l--) { + VideoWrite128(a, data); + + // Increment address register + a = (a + inc) & 0x1ffff; + } + Pico.video.addr_u = a >> 16; + Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; + break; default: a += len * inc; break; From 4bb8111a050ad97e24a9ddb41f65a85891a30048 Mon Sep 17 00:00:00 2001 From: kub Date: Sun, 26 Jan 2020 20:49:20 +0100 Subject: [PATCH 0260/1110] regression fix for gp2x 8bit fast mode --- platform/common/arm_utils.s | 1 + platform/linux/blit.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/platform/common/arm_utils.s b/platform/common/arm_utils.s index 9e8d9f25..6696e5af 100644 --- a/platform/common/arm_utils.s +++ b/platform/common/arm_utils.s @@ -141,6 +141,7 @@ vidcpy_m2: movne lr, #64 tstne r3, r3 addne r0, r0, #32 + addne r1, r1, #32 vidCpyM2_loop_out: mov r6, #10 diff --git a/platform/linux/blit.c b/platform/linux/blit.c index 96326fe1..82bc4ba5 100644 --- a/platform/linux/blit.c +++ b/platform/linux/blit.c @@ -61,10 +61,11 @@ void vidcpy_m2(void *dest, void *src, int m32col, int with_32c_border) for (i = 0; i < 224; i++) { ps += 8; + ps += 32; pd += 32; for (u = 0; u < 256; u++) *pd++ = *ps++; - ps += 64; + ps += 32; pd += 32; } } else { From 0f3703fd9809452251321364ef788ff7ab156392 Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 7 Feb 2020 19:55:05 +0100 Subject: [PATCH 0261/1110] new hvcounter tables as per spritesmind.net threads --- pico/misc.c | 138 +++++++++++++++++++++++++--------------------------- 1 file changed, 65 insertions(+), 73 deletions(-) diff --git a/pico/misc.c b/pico/misc.c index 47842e3f..ab282c24 100644 --- a/pico/misc.c +++ b/pico/misc.c @@ -8,83 +8,75 @@ #include "pico_int.h" -// H-counter table for hvcounter reads in 40col mode -// based on Gens code +// H-counter table for hvcounter reads in 40col mode, starting at HINT const unsigned char hcounts_40[] = { -0x07,0x07,0x08,0x08,0x08,0x09,0x09,0x0a,0x0a,0x0b,0x0b,0x0b,0x0c,0x0c,0x0d,0x0d, -0x0e,0x0e,0x0e,0x0f,0x0f,0x10,0x10,0x10,0x11,0x11,0x12,0x12,0x13,0x13,0x13,0x14, -0x14,0x15,0x15,0x15,0x16,0x16,0x17,0x17,0x18,0x18,0x18,0x19,0x19,0x1a,0x1a,0x1b, -0x1b,0x1b,0x1c,0x1c,0x1d,0x1d,0x1d,0x1e,0x1e,0x1f,0x1f,0x20,0x20,0x20,0x21,0x21, -0x22,0x22,0x23,0x23,0x23,0x24,0x24,0x25,0x25,0x25,0x26,0x26,0x27,0x27,0x28,0x28, -0x28,0x29,0x29,0x2a,0x2a,0x2a,0x2b,0x2b,0x2c,0x2c,0x2d,0x2d,0x2d,0x2e,0x2e,0x2f, -0x2f,0x30,0x30,0x30,0x31,0x31,0x32,0x32,0x32,0x33,0x33,0x34,0x34,0x35,0x35,0x35, -0x36,0x36,0x37,0x37,0x38,0x38,0x38,0x39,0x39,0x3a,0x3a,0x3a,0x3b,0x3b,0x3c,0x3c, -0x3d,0x3d,0x3d,0x3e,0x3e,0x3f,0x3f,0x3f,0x40,0x40,0x41,0x41,0x42,0x42,0x42,0x43, -0x43,0x44,0x44,0x45,0x45,0x45,0x46,0x46,0x47,0x47,0x47,0x48,0x48,0x49,0x49,0x4a, -0x4a,0x4a,0x4b,0x4b,0x4c,0x4c,0x4d,0x4d,0x4d,0x4e,0x4e,0x4f,0x4f,0x4f,0x50,0x50, -0x51,0x51,0x52,0x52,0x52,0x53,0x53,0x54,0x54,0x55,0x55,0x55,0x56,0x56,0x57,0x57, -0x57,0x58,0x58,0x59,0x59,0x5a,0x5a,0x5a,0x5b,0x5b,0x5c,0x5c,0x5c,0x5d,0x5d,0x5e, -0x5e,0x5f,0x5f,0x5f,0x60,0x60,0x61,0x61,0x62,0x62,0x62,0x63,0x63,0x64,0x64,0x64, -0x65,0x65,0x66,0x66,0x67,0x67,0x67,0x68,0x68,0x69,0x69,0x6a,0x6a,0x6a,0x6b,0x6b, -0x6c,0x6c,0x6c,0x6d,0x6d,0x6e,0x6e,0x6f,0x6f,0x6f,0x70,0x70,0x71,0x71,0x71,0x72, -0x72,0x73,0x73,0x74,0x74,0x74,0x75,0x75,0x76,0x76,0x77,0x77,0x77,0x78,0x78,0x79, -0x79,0x79,0x7a,0x7a,0x7b,0x7b,0x7c,0x7c,0x7c,0x7d,0x7d,0x7e,0x7e,0x7f,0x7f,0x7f, -0x80,0x80,0x81,0x81,0x81,0x82,0x82,0x83,0x83,0x84,0x84,0x84,0x85,0x85,0x86,0x86, -0x86,0x87,0x87,0x88,0x88,0x89,0x89,0x89,0x8a,0x8a,0x8b,0x8b,0x8c,0x8c,0x8c,0x8d, -0x8d,0x8e,0x8e,0x8e,0x8f,0x8f,0x90,0x90,0x91,0x91,0x91,0x92,0x92,0x93,0x93,0x94, -0x94,0x94,0x95,0x95,0x96,0x96,0x96,0x97,0x97,0x98,0x98,0x99,0x99,0x99,0x9a,0x9a, -0x9b,0x9b,0x9b,0x9c,0x9c,0x9d,0x9d,0x9e,0x9e,0x9e,0x9f,0x9f,0xa0,0xa0,0xa1,0xa1, -0xa1,0xa2,0xa2,0xa3,0xa3,0xa3,0xa4,0xa4,0xa5,0xa5,0xa6,0xa6,0xa6,0xa7,0xa7,0xa8, -0xa8,0xa9,0xa9,0xa9,0xaa,0xaa,0xab,0xab,0xab,0xac,0xac,0xad,0xad,0xae,0xae,0xae, -0xaf,0xaf,0xb0,0xb0, -0xe4,0xe4,0xe4,0xe5,0xe5,0xe6,0xe6,0xe6,0xe7,0xe7,0xe8,0xe8,0xe9,0xe9,0xe9,0xea, -0xea,0xeb,0xeb,0xeb,0xec,0xec,0xed,0xed,0xee,0xee,0xee,0xef,0xef,0xf0,0xf0,0xf1, -0xf1,0xf1,0xf2,0xf2,0xf3,0xf3,0xf3,0xf4,0xf4,0xf5,0xf5,0xf6,0xf6,0xf6,0xf7,0xf7, -0xf8,0xf8,0xf9,0xf9,0xf9,0xfa,0xfa,0xfb,0xfb,0xfb,0xfc,0xfc,0xfd,0xfd,0xfe,0xfe, -0xfe,0xff,0xff,0x00,0x00,0x00,0x01,0x01,0x02,0x02,0x03,0x03,0x03,0x04,0x04,0x05, -0x05,0x06,0x06,0x06, -0x07,0x07,0x08,0x08,0x08,0x09,0x09,0x0a,0x0a,0x0b,0x0b,0x0b,0x0c,0x0c,0x0d,0x0d, -0x0e,0x0e,0x0e,0x0f,0x0f,0x10,0x10,0x10, -}; - -// H-counter table for hvcounter reads in 32col mode -const unsigned char hcounts_32[] = { -0x05,0x05,0x05,0x06,0x06,0x07,0x07,0x07,0x08,0x08,0x08,0x09,0x09,0x09,0x0a,0x0a, -0x0a,0x0b,0x0b,0x0b,0x0c,0x0c,0x0c,0x0d,0x0d,0x0d,0x0e,0x0e,0x0f,0x0f,0x0f,0x10, -0x10,0x10,0x11,0x11,0x11,0x12,0x12,0x12,0x13,0x13,0x13,0x14,0x14,0x14,0x15,0x15, -0x15,0x16,0x16,0x17,0x17,0x17,0x18,0x18,0x18,0x19,0x19,0x19,0x1a,0x1a,0x1a,0x1b, -0x1b,0x1b,0x1c,0x1c,0x1c,0x1d,0x1d,0x1d,0x1e,0x1e,0x1f,0x1f,0x1f,0x20,0x20,0x20, -0x21,0x21,0x21,0x22,0x22,0x22,0x23,0x23,0x23,0x24,0x24,0x24,0x25,0x25,0x26,0x26, -0x26,0x27,0x27,0x27,0x28,0x28,0x28,0x29,0x29,0x29,0x2a,0x2a,0x2a,0x2b,0x2b,0x2b, -0x2c,0x2c,0x2c,0x2d,0x2d,0x2e,0x2e,0x2e,0x2f,0x2f,0x2f,0x30,0x30,0x30,0x31,0x31, -0x31,0x32,0x32,0x32,0x33,0x33,0x33,0x34,0x34,0x34,0x35,0x35,0x36,0x36,0x36,0x37, -0x37,0x37,0x38,0x38,0x38,0x39,0x39,0x39,0x3a,0x3a,0x3a,0x3b,0x3b,0x3b,0x3c,0x3c, -0x3d,0x3d,0x3d,0x3e,0x3e,0x3e,0x3f,0x3f,0x3f,0x40,0x40,0x40,0x41,0x41,0x41,0x42, -0x42,0x42,0x43,0x43,0x43,0x44,0x44,0x45,0x45,0x45,0x46,0x46,0x46,0x47,0x47,0x47, -0x48,0x48,0x48,0x49,0x49,0x49,0x4a,0x4a,0x4a,0x4b,0x4b,0x4b,0x4c,0x4c,0x4d,0x4d, -0x4d,0x4e,0x4e,0x4e,0x4f,0x4f,0x4f,0x50,0x50,0x50,0x51,0x51,0x51,0x52,0x52,0x52, -0x53,0x53,0x53,0x54,0x54,0x55,0x55,0x55,0x56,0x56,0x56,0x57,0x57,0x57,0x58,0x58, -0x58,0x59,0x59,0x59,0x5a,0x5a,0x5a,0x5b,0x5b,0x5c,0x5c,0x5c,0x5d,0x5d,0x5d,0x5e, -0x5e,0x5e,0x5f,0x5f,0x5f,0x60,0x60,0x60,0x61,0x61,0x61,0x62,0x62,0x62,0x63,0x63, -0x64,0x64,0x64,0x65,0x65,0x65,0x66,0x66,0x66,0x67,0x67,0x67,0x68,0x68,0x68,0x69, -0x69,0x69,0x6a,0x6a,0x6a,0x6b,0x6b,0x6c,0x6c,0x6c,0x6d,0x6d,0x6d,0x6e,0x6e,0x6e, -0x6f,0x6f,0x6f,0x70,0x70,0x70,0x71,0x71,0x71,0x72,0x72,0x72,0x73,0x73,0x74,0x74, -0x74,0x75,0x75,0x75,0x76,0x76,0x76,0x77,0x77,0x77,0x78,0x78,0x78,0x79,0x79,0x79, -0x7a,0x7a,0x7b,0x7b,0x7b,0x7c,0x7c,0x7c,0x7d,0x7d,0x7d,0x7e,0x7e,0x7e,0x7f,0x7f, -0x7f,0x80,0x80,0x80,0x81,0x81,0x81,0x82,0x82,0x83,0x83,0x83,0x84,0x84,0x84,0x85, -0x85,0x85,0x86,0x86,0x86,0x87,0x87,0x87,0x88,0x88,0x88,0x89,0x89,0x89,0x8a,0x8a, -0x8b,0x8b,0x8b,0x8c,0x8c,0x8c,0x8d,0x8d,0x8d,0x8e,0x8e,0x8e,0x8f,0x8f,0x8f,0x90, -0x90,0x90,0x91,0x91, -0xe8,0xe8,0xe8,0xe9,0xe9,0xe9,0xea,0xea,0xea,0xeb,0xeb,0xeb,0xec,0xec,0xec,0xed, +0xa5,0xa5,0xa5,0xa6,0xa6,0xa7,0xa7,0xa8,0xa8,0xa8,0xa9,0xa9,0xaa,0xaa,0xab,0xab, +0xac,0xac,0xac,0xad,0xad,0xae,0xae,0xaf,0xaf,0xaf,0xb0,0xb0,0xb1,0xb1,0xb2,0xb2, +0xb3,0xb3,0xb3,0xb4,0xb4,0xb5,0xb5,0xb6,0xe4,0xe4,0xe5,0xe5,0xe6,0xe6,0xe7,0xe7, +0xe7,0xe8,0xe8,0xe8,0xe9,0xe9,0xe9,0xea,0xea,0xeb,0xeb,0xeb,0xec,0xec,0xec,0xed, 0xed,0xed,0xee,0xee,0xee,0xef,0xef,0xf0,0xf0,0xf0,0xf1,0xf1,0xf1,0xf2,0xf2,0xf2, -0xf3,0xf3,0xf3,0xf4,0xf4,0xf4,0xf5,0xf5,0xf5,0xf6,0xf6,0xf6,0xf7,0xf7,0xf8,0xf8, -0xf8,0xf9,0xf9,0xf9,0xfa,0xfa,0xfa,0xfb,0xfb,0xfb,0xfc,0xfc,0xfc,0xfd,0xfd,0xfd, -0xfe,0xfe,0xfe,0xff,0xff,0x00,0x00,0x00,0x01,0x01,0x01,0x02,0x02,0x02,0x03,0x03, -0x03,0x04,0x04,0x04, -0x05,0x05,0x05,0x06,0x06,0x07,0x07,0x07,0x08,0x08,0x08,0x09,0x09,0x09,0x0a,0x0a, -0x0a,0x0b,0x0b,0x0b,0x0c,0x0c,0x0c,0x0d, +0xf3,0xf3,0xf3,0xf4,0xf4,0xf5,0xf5,0xf5,0xf6,0xf6,0xf6,0xf7,0xf7,0xf7,0xf8,0xf8, +0xf9,0xf9,0xfa,0xfa,0xfb,0xfb,0xfb,0xfc,0xfc,0xfd,0xfd,0xfe,0xfe,0xfe,0xff,0xff, +0x00,0x00,0x01,0x01,0x02,0x02,0x02,0x03,0x03,0x04,0x04,0x05,0x05,0x05,0x06,0x06, +0x07,0x07,0x08,0x08,0x09,0x09,0x09,0x0a,0x0a,0x0b,0x0b,0x0c,0x0c,0x0c,0x0d,0x0d, +0x0e,0x0e,0x0f,0x0f,0x10,0x10,0x10,0x11,0x11,0x12,0x12,0x13,0x13,0x13,0x14,0x14, +0x15,0x15,0x16,0x16,0x17,0x17,0x17,0x18,0x18,0x19,0x19,0x1a,0x1a,0x1a,0x1b,0x1b, +0x1c,0x1c,0x1d,0x1d,0x1e,0x1e,0x1e,0x1f,0x1f,0x20,0x20,0x21,0x21,0x21,0x22,0x22, +0x23,0x23,0x24,0x24,0x25,0x25,0x25,0x26,0x26,0x27,0x27,0x28,0x28,0x28,0x29,0x29, +0x2a,0x2a,0x2b,0x2b,0x2c,0x2c,0x2c,0x2d,0x2d,0x2e,0x2e,0x2f,0x2f,0x2f,0x30,0x30, +0x31,0x31,0x32,0x32,0x33,0x33,0x33,0x34,0x34,0x35,0x35,0x36,0x36,0x36,0x37,0x37, +0x38,0x38,0x39,0x39,0x3a,0x3a,0x3a,0x3b,0x3b,0x3c,0x3c,0x3d,0x3d,0x3d,0x3e,0x3e, +0x3f,0x3f,0x40,0x40,0x41,0x41,0x41,0x42,0x42,0x43,0x43,0x44,0x44,0x44,0x45,0x45, +0x46,0x46,0x47,0x47,0x48,0x48,0x48,0x49,0x49,0x4a,0x4a,0x4b,0x4b,0x4b,0x4c,0x4c, +0x4d,0x4d,0x4e,0x4e,0x4f,0x4f,0x4f,0x50,0x50,0x51,0x51,0x52,0x52,0x52,0x53,0x53, +0x54,0x54,0x55,0x55,0x56,0x56,0x56,0x57,0x57,0x58,0x58,0x59,0x59,0x59,0x5a,0x5a, +0x5b,0x5b,0x5c,0x5c,0x5d,0x5d,0x5d,0x5e,0x5e,0x5f,0x5f,0x60,0x60,0x60,0x61,0x61, +0x62,0x62,0x63,0x63,0x64,0x64,0x64,0x65,0x65,0x66,0x66,0x67,0x67,0x67,0x68,0x68, +0x69,0x69,0x6a,0x6a,0x6b,0x6b,0x6b,0x6c,0x6c,0x6d,0x6d,0x6e,0x6e,0x6e,0x6f,0x6f, +0x70,0x70,0x71,0x71,0x72,0x72,0x72,0x73,0x73,0x74,0x74,0x75,0x75,0x75,0x76,0x76, +0x77,0x77,0x78,0x78,0x79,0x79,0x79,0x7a,0x7a,0x7b,0x7b,0x7c,0x7c,0x7c,0x7d,0x7d, +0x7e,0x7e,0x7f,0x7f,0x80,0x80,0x80,0x81,0x81,0x82,0x82,0x83,0x83,0x83,0x84,0x84, +0x85,0x85,0x86,0x86,0x87,0x87,0x87,0x88,0x88,0x89,0x89,0x8a,0x8a,0x8a,0x8b,0x8b, +0x8c,0x8c,0x8d,0x8d,0x8e,0x8e,0x8e,0x8f,0x8f,0x90,0x90,0x91,0x91,0x91,0x92,0x92, +0x93,0x93,0x94,0x94,0x95,0x95,0x95,0x96,0x96,0x97,0x97,0x98,0x98,0x98,0x99,0x99, +0x9a,0x9a,0x9b,0x9b,0x9c,0x9c,0x9c,0x9d,0x9d,0x9e,0x9e,0x9f,0x9f,0x9f,0xa0,0xa0, +0xa1,0xa1,0xa2,0xa2,0xa3,0xa3,0xa3,0xa4,0xa5,0xa5,0xa5,0xa6,0xa6,0xa7,0xa7,0xa8, }; +// H-counter table for hvcounter reads in 32col mode, starting at HINT +const unsigned char hcounts_32[] = { +0x85,0x85,0x85,0x86,0x86,0x86,0x87,0x87,0x87,0x88,0x88,0x88,0x89,0x89,0x89,0x8a, +0x8a,0x8a,0x8b,0x8b,0x8c,0x8c,0x8c,0x8d,0x8d,0x8d,0x8e,0x8e,0x8e,0x8f,0x8f,0x8f, +0x90,0x90,0x90,0x91,0x91,0x91,0x92,0x92,0x93,0x93,0x93,0xe9,0xe9,0xe9,0xea,0xea, +0xea,0xeb,0xeb,0xeb,0xec,0xec,0xec,0xed,0xed,0xed,0xee,0xee,0xef,0xef,0xef,0xf0, +0xf0,0xf0,0xf1,0xf1,0xf1,0xf2,0xf2,0xf2,0xf3,0xf3,0xf3,0xf4,0xf4,0xf4,0xf5,0xf5, +0xf6,0xf6,0xf6,0xf7,0xf7,0xf7,0xf8,0xf8,0xf8,0xf9,0xf9,0xf9,0xfa,0xfa,0xfa,0xfb, +0xfb,0xfb,0xfc,0xfc,0xfd,0xfd,0xfd,0xfe,0xfe,0xfe,0xff,0xff,0xff,0x00,0x00,0x00, +0x01,0x01,0x01,0x02,0x02,0x02,0x03,0x03,0x04,0x04,0x04,0x05,0x05,0x05,0x06,0x06, +0x06,0x07,0x07,0x07,0x08,0x08,0x08,0x09,0x09,0x09,0x0a,0x0a,0x0b,0x0b,0x0b,0x0c, +0x0c,0x0c,0x0d,0x0d,0x0d,0x0e,0x0e,0x0e,0x0f,0x0f,0x0f,0x10,0x10,0x10,0x11,0x11, +0x12,0x12,0x12,0x13,0x13,0x13,0x14,0x14,0x14,0x15,0x15,0x15,0x16,0x16,0x16,0x17, +0x17,0x17,0x18,0x18,0x19,0x19,0x19,0x1a,0x1a,0x1a,0x1b,0x1b,0x1b,0x1c,0x1c,0x1c, +0x1d,0x1d,0x1d,0x1e,0x1e,0x1e,0x1f,0x1f,0x20,0x20,0x20,0x21,0x21,0x21,0x22,0x22, +0x22,0x23,0x23,0x23,0x24,0x24,0x24,0x25,0x25,0x25,0x26,0x26,0x27,0x27,0x27,0x28, +0x28,0x28,0x29,0x29,0x29,0x2a,0x2a,0x2a,0x2b,0x2b,0x2b,0x2c,0x2c,0x2c,0x2d,0x2d, +0x2e,0x2e,0x2e,0x2f,0x2f,0x2f,0x30,0x30,0x30,0x31,0x31,0x31,0x32,0x32,0x32,0x33, +0x33,0x33,0x34,0x34,0x35,0x35,0x35,0x36,0x36,0x36,0x37,0x37,0x37,0x38,0x38,0x38, +0x39,0x39,0x39,0x3a,0x3a,0x3a,0x3b,0x3b,0x3c,0x3c,0x3c,0x3d,0x3d,0x3d,0x3e,0x3e, +0x3e,0x3f,0x3f,0x3f,0x40,0x40,0x40,0x41,0x41,0x41,0x42,0x42,0x43,0x43,0x43,0x44, +0x44,0x44,0x45,0x45,0x45,0x46,0x46,0x46,0x47,0x47,0x47,0x48,0x48,0x48,0x49,0x49, +0x4a,0x4a,0x4a,0x4b,0x4b,0x4b,0x4c,0x4c,0x4c,0x4d,0x4d,0x4d,0x4e,0x4e,0x4e,0x4f, +0x4f,0x4f,0x50,0x50,0x51,0x51,0x51,0x52,0x52,0x52,0x53,0x53,0x53,0x54,0x54,0x54, +0x55,0x55,0x55,0x56,0x56,0x56,0x57,0x57,0x58,0x58,0x58,0x59,0x59,0x59,0x5a,0x5a, +0x5a,0x5b,0x5b,0x5b,0x5c,0x5c,0x5c,0x5d,0x5d,0x5d,0x5e,0x5e,0x5f,0x5f,0x5f,0x60, +0x60,0x60,0x61,0x61,0x61,0x62,0x62,0x62,0x63,0x63,0x63,0x64,0x64,0x64,0x65,0x65, +0x66,0x66,0x66,0x67,0x67,0x67,0x68,0x68,0x68,0x69,0x69,0x69,0x6a,0x6a,0x6a,0x6b, +0x6b,0x6b,0x6c,0x6c,0x6d,0x6d,0x6d,0x6e,0x6e,0x6e,0x6f,0x6f,0x6f,0x70,0x70,0x70, +0x71,0x71,0x71,0x72,0x72,0x72,0x73,0x73,0x74,0x74,0x74,0x75,0x75,0x75,0x76,0x76, +0x76,0x77,0x77,0x77,0x78,0x78,0x78,0x79,0x79,0x79,0x7a,0x7a,0x7b,0x7b,0x7b,0x7c, +0x7c,0x7c,0x7d,0x7d,0x7d,0x7e,0x7e,0x7e,0x7f,0x7f,0x7f,0x80,0x80,0x80,0x81,0x81, +0x82,0x82,0x82,0x83,0x83,0x83,0x84,0x84,0x85,0x85,0x85,0x86,0x86,0x86,0x87,0x87, +}; #ifndef _ASM_MISC_C PICO_INTERNAL_ASM void memcpy16bswap(unsigned short *dest, void *src, int count) From 17bd69adc6f0ae747dd7f3304099c7ef76b908cf Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 7 Feb 2020 22:10:18 +0100 Subject: [PATCH 0262/1110] revised VDP fifo implementation --- pico/debug.c | 6 + pico/pico.c | 57 +----- pico/pico_cmn.c | 37 ++-- pico/pico_int.h | 12 +- pico/videoport.c | 456 +++++++++++++++++++++++++++++++++++++++++------ 5 files changed, 430 insertions(+), 138 deletions(-) diff --git a/pico/debug.c b/pico/debug.c index e617d908..e4b5232e 100644 --- a/pico/debug.c +++ b/pico/debug.c @@ -43,6 +43,12 @@ char *PDebugMain(void) !!(Pico.sv.flags & SRF_ENABLED), !!(Pico.sv.flags & SRF_EEPROM), Pico.sv.eeprom_type); MVP; sprintf(dstrp, "sram range: %06x-%06x, reg: %02x\n", Pico.sv.start, Pico.sv.end, Pico.m.sram_reg); MVP; sprintf(dstrp, "pend int: v:%i, h:%i, vdp status: %04x\n", bit(pv->pending_ints,5), bit(pv->pending_ints,4), pv->status); MVP; + sprintf(dstrp, "VDP regs 00-07: %02x %02x %02x %02x %02x %02x %02x %02x\n",reg[0],reg[1],reg[2],reg[3],reg[4],reg[5],reg[6],reg[7]); MVP; + sprintf(dstrp, "VDP regs 08-0f: %02x %02x %02x %02x %02x %02x %02x %02x\n",reg[8],reg[9],reg[10],reg[11],reg[12],reg[13],reg[14],reg[15]); MVP; + sprintf(dstrp, "VDP regs 10-17: %02x %02x %02x %02x %02x %02x %02x %02x\n",reg[16],reg[17],reg[18],reg[19],reg[20],reg[21],reg[22],reg[23]); MVP; + sprintf(dstrp, "VDP regs 18-1f: %02x %02x %02x %02x %02x %02x %02x %02x\n",reg[24],reg[25],reg[26],reg[27],reg[28],reg[29],reg[30],reg[31]); MVP; + r = (reg[5]<<9)+(reg[6]<<11); + sprintf(dstrp, "sprite #0: %04x %04x %04x %04x\n",PicoMem.vram[r/2],PicoMem.vram[r/2+1],PicoMem.vram[r/2+2],PicoMem.vram[r/2+3]); MVP; sprintf(dstrp, "pal: %i, hw: %02x, frame#: %i, cycles: %u\n", Pico.m.pal, Pico.m.hardware, Pico.m.frame_count, SekCyclesDone()); MVP; sprintf(dstrp, "M68k: PC: %06x, SR: %04x, irql: %i\n", SekPc, SekSr, SekIrqLevel); MVP; for (r = 0; r < 8; r++) { diff --git a/pico/pico.c b/pico/pico.c index b65b7de8..9db2fc64 100644 --- a/pico/pico.c +++ b/pico/pico.c @@ -67,6 +67,7 @@ void PicoPower(void) memset(&Pico.video,0,sizeof(Pico.video)); memset(&Pico.m,0,sizeof(Pico.m)); + memset(&Pico.t,0,sizeof(Pico.t)); Pico.video.pending_ints=0; z80_reset(); @@ -182,8 +183,7 @@ int PicoReset(void) PsndReset(); // pal must be known here // create an empty "dma" to cause 68k exec start at random frame location - if (Pico.m.dma_xfers == 0 && !(PicoIn.opt & POPT_DIS_VDP_FIFO)) - Pico.m.dma_xfers = rand() & 0x1fff; + PicoVideoFIFOWrite(rand() & 0x1fff, 0, 0, PVS_CPURD); SekFinishIdleDet(); @@ -222,57 +222,6 @@ void PicoLoopPrepare(void) rendstatus_old = -1; } -// this table is wrong and should be removed -// keeping it for now to compensate wrong timing elswhere, mainly for Outrunners -static const int dma_timings[] = { // Q16 - // dma2vram dma2[vs|c]ram vram_fill vram_copy - // VRAM has half the width of VSRAM/CRAM, thus half the performance - ( 83<<16)/488, (166<<16)/488, (165<<16)/488, ( 83<<16)/488, // vblank 32cell - (102<<16)/488, (204<<16)/488, (203<<16)/488, (102<<16)/488, // vblank 40cell - ( 8<<16)/488, ( 16<<16)/488, ( 15<<16)/488, ( 8<<16)/488, // active 32cell - ( 9<<16)/488, ( 18<<16)/488, ( 17<<16)/488, ( 9<<16)/488 // active 40cell -}; - -static const int dma_bsycles[] = { // Q16 - (488<<16)/83, (488<<16)/166, (488<<16)/165, (488<<16)/83, - (488<<16)/102, (488<<16)/204, (488<<16)/203, (488<<16)/102, - (488<<16)/8, (488<<16)/16, (488<<16)/15, (488<<16)/8, - (488<<16)/9, (488<<16)/18, (488<<16)/17, (488<<16)/9 -}; - -// grossly inaccurate.. FIXME FIXXXMEE -PICO_INTERNAL int CheckDMA(int cycles) -{ - int burn = 0, xfers_can, dma_op = Pico.video.reg[0x17]>>6; // see gens for 00 and 01 modes - int xfers = Pico.m.dma_xfers; - int dma_op1; - - // safety pin - if (cycles <= 0) return 0; - - if(!(dma_op&2)) dma_op = (Pico.video.type==1) ? 0 : 1; // setting dma_timings offset here according to Gens - dma_op1 = dma_op; - if(Pico.video.reg[12] & 1) dma_op |= 4; // 40 cell mode? - if(!(Pico.video.status&8)&&(Pico.video.reg[1]&0x40)) dma_op|=8; // active display? - xfers_can = (dma_timings[dma_op] * cycles + 0x8000) >> 16; - if(xfers <= xfers_can) - { - Pico.video.status &= ~SR_DMA; - if (!(dma_op & 2)) - burn = xfers * dma_bsycles[dma_op] >> 16; - Pico.m.dma_xfers = 0; - } else { - if(!(dma_op&2)) burn = cycles; - Pico.m.dma_xfers -= xfers_can; - } - Pico.t.dma_end = SekCyclesDone() + burn; - - elprintf(EL_VDPDMA, "~Dma %i op=%i can=%i burn=%i [%u]", - Pico.m.dma_xfers, dma_op1, xfers_can, burn, SekCyclesDone()); - //dprintf("~aim: %i, cnt: %i", Pico.t.m68c_aim, Pico.t.m68c_cnt); - return burn; -} - #include "pico_cmn.c" /* sync z80 to 68k */ @@ -319,7 +268,7 @@ void PicoFrame(void) goto end; } - //if(Pico.video.reg[12]&0x2) Pico.video.status ^= 0x10; // change odd bit in interlace mode + //if(Pico.video.reg[12]&0x2) Pico.video.status ^= SR_ODD; // change odd bit in interlace mode PicoFrameStart(); PicoFrameHints(); diff --git a/pico/pico_cmn.c b/pico/pico_cmn.c index b7e7d835..75389840 100644 --- a/pico/pico_cmn.c +++ b/pico/pico_cmn.c @@ -72,27 +72,19 @@ static void do_hint(struct PicoVideo *pv) } } -static void do_timing_hacks_as(struct PicoVideo *pv, int vdp_slots, int cycles) +static void do_timing_hacks_end(struct PicoVideo *pv) { - pv->lwrite_cnt += vdp_slots - Pico.m.dma_xfers * 2; // wrong *2 - if (pv->lwrite_cnt > vdp_slots) - pv->lwrite_cnt = vdp_slots; - else if (pv->lwrite_cnt < 0) - pv->lwrite_cnt = 0; - if (Pico.m.dma_xfers) - SekCyclesBurn(CheckDMA(cycles)); + PicoVideoFIFOSync(488); } -static void do_timing_hacks_vb(int cycles) +static void do_timing_hacks_start(struct PicoVideo *pv) { - if (unlikely(Pico.m.dma_xfers)) - SekCyclesBurn(CheckDMA(cycles)); + SekCyclesBurn(PicoVideoFIFOHint()); // prolong cpu HOLD if necessary } static int PicoFrameHints(void) { struct PicoVideo *pv = &Pico.video; - int vdp_slots = (Pico.video.reg[12] & 1) ? 18 : 16; int lines, y, lines_vis, skip; int vcnt_wrap, vcnt_adj; unsigned int cycles; @@ -155,8 +147,9 @@ static int PicoFrameHints(void) // Run scanline: Pico.t.m68c_line_start = Pico.t.m68c_aim; - do_timing_hacks_as(pv, vdp_slots, CYCLES_M68K_LINE); + do_timing_hacks_start(pv); CPUS_RUN(CYCLES_M68K_LINE); + do_timing_hacks_end(pv); if (PicoLineHook) PicoLineHook(); pevt_log_m68k_o(EVT_NEXT_LINE); @@ -175,10 +168,6 @@ static int PicoFrameHints(void) #endif } - // VDP FIFO - pv->lwrite_cnt = 0; - Pico.video.status |= SR_EMPT; - memcpy(PicoIn.padInt, PicoIn.pad, sizeof(PicoIn.padInt)); PAD_DELAY(); @@ -196,7 +185,7 @@ static int PicoFrameHints(void) // also delay between F bit (bit 7) is set in SR and IRQ happens (Ex-Mutants) // also delay between last H-int and V-int (Golden Axe 3) Pico.t.m68c_line_start = Pico.t.m68c_aim; - do_timing_hacks_vb(CYCLES_M68K_VINT_LAG); + do_timing_hacks_start(pv); CPUS_RUN(CYCLES_M68K_VINT_LAG); pv->status |= SR_F; @@ -224,8 +213,8 @@ static int PicoFrameHints(void) #endif // Run scanline: - do_timing_hacks_vb(CYCLES_M68K_LINE - CYCLES_M68K_VINT_LAG); CPUS_RUN(CYCLES_M68K_LINE - CYCLES_M68K_VINT_LAG); + do_timing_hacks_end(pv); if (PicoLineHook) PicoLineHook(); pevt_log_m68k_o(EVT_NEXT_LINE); @@ -260,8 +249,9 @@ static int PicoFrameHints(void) // Run scanline: Pico.t.m68c_line_start = Pico.t.m68c_aim; - do_timing_hacks_vb(CYCLES_M68K_LINE); + do_timing_hacks_start(pv); CPUS_RUN(CYCLES_M68K_LINE); + do_timing_hacks_end(pv); if (PicoLineHook) PicoLineHook(); pevt_log_m68k_o(EVT_NEXT_LINE); @@ -271,8 +261,9 @@ static int PicoFrameHints(void) unsigned int l = PicoIn.overclockM68k * lines / 100; while (l-- > 0) { Pico.t.m68c_cnt -= CYCLES_M68K_LINE; - do_timing_hacks_vb(CYCLES_M68K_LINE); + do_timing_hacks_start(pv); SekSyncM68k(); + do_timing_hacks_end(pv); } } @@ -282,7 +273,6 @@ static int PicoFrameHints(void) // last scanline Pico.m.scanline = y++; pv->v_counter = 0xff; - pv->lwrite_cnt = 0; PAD_DELAY(); @@ -297,8 +287,9 @@ static int PicoFrameHints(void) // Run scanline: Pico.t.m68c_line_start = Pico.t.m68c_aim; - do_timing_hacks_as(pv, vdp_slots, CYCLES_M68K_LINE); + do_timing_hacks_start(pv); CPUS_RUN(CYCLES_M68K_LINE); + do_timing_hacks_end(pv); if (PicoLineHook) PicoLineHook(); pevt_log_m68k_o(EVT_NEXT_LINE); diff --git a/pico/pico_int.h b/pico/pico_int.h index 357de4a9..b3ce8a72 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -296,6 +296,10 @@ extern SH2 sh2s[2]; // not part of real SR #define PVS_ACTIVE (1 << 16) #define PVS_VB2 (1 << 17) // ignores forced blanking +#define PVS_CPUWR (1 << 18) // CPU hold by FIFO full +#define PVS_CPURD (1 << 19) // CPU hold by FIFO full +#define PVS_DMAPEND (1 << 20) // DMA operation waiting for start +#define PVS_DMAFILL (1 << 21) // DMA fill is in progress struct PicoVideo { @@ -306,7 +310,7 @@ struct PicoVideo unsigned short addr; // Read/Write address unsigned int status; // Status bits (SR) and extra flags unsigned char pending_ints; // pending interrupts: ??VH???? - signed char lwrite_cnt; // VDP write count during active display line + signed char pad1; // was VDP write count unsigned short v_counter; // V-counter unsigned short debug; // raw debug register unsigned char debug_p; // ... parsed: PVD_* @@ -335,7 +339,7 @@ struct PicoMisc unsigned char eeprom_slave; // EEPROM slave word for X24C02 and better SRAMs unsigned char eeprom_status; unsigned char pad1; // was ym2612 status - unsigned short dma_xfers; // 18 + unsigned short pad2; // 18 was dma_xfers unsigned char eeprom_wb[2]; // EEPROM latch/write buffer unsigned int frame_count; // 1c for movies and idle det }; @@ -419,7 +423,6 @@ struct PicoTiming unsigned int z80c_aim; int z80_scanline; - unsigned int dma_end; // end of current DMA op (m68k cycles) int timer_a_next_oflow, timer_a_step; // in z80 cycles int timer_b_next_oflow, timer_b_step; }; @@ -850,6 +853,9 @@ unsigned char PicoVideoRead8CtlL(void); unsigned char PicoVideoRead8HV_H(void); unsigned char PicoVideoRead8HV_L(void); extern int (*PicoDmaHook)(unsigned int source, int len, unsigned short **base, unsigned int *mask); +void PicoVideoFIFOSync(int cycles); +int PicoVideoFIFOHint(void); +int PicoVideoFIFOWrite(int count, int byte_p, unsigned sr_mask, unsigned sr_flags); // misc.c PICO_INTERNAL_ASM void memcpy16bswap(unsigned short *dest, void *src, int count); diff --git a/pico/videoport.c b/pico/videoport.c index cdc5796c..881a74a3 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -14,9 +14,296 @@ extern const unsigned char hcounts_32[]; extern const unsigned char hcounts_40[]; +static unsigned hvlatch; // latched hvcounter value +static int blankline; // display disabled for this line int (*PicoDmaHook)(unsigned int source, int len, unsigned short **base, unsigned int *mask) = NULL; + +/* VDP FIFO implementation + * + * fifo_slot: last slot executed in this scanline + * fifo_cnt: #slots remaining for active FIFO write (#writes<<#bytep) + * fifo_total: #total FIFO entries pending + * fifo_data: last values transferred through fifo + * fifo_queue: fifo transfer queue (#writes, VRAM_byte_p) + * + * FIFO states: empty total=0 + * inuse total>0 && total<4 + * full total==4 + * wait total>4 + * Conditions: + * fifo_slot is always behind slot2cyc[cycles]. Advancing it beyond cycles + * implies blocking the 68k up to that slot. + * + * A FIFO write goes to the end of the fifo queue. There can be more pending + * writes than FIFO slots, but the 68k will be blocked in most of those cases. + * This is only about correct timing, data xfer must be handled by the caller. + * Blocking the CPU means burning cycles via SekCyclesBurn*(), which is to be + * executed by the caller. + * + * FIFOSync "executes" FIFO write slots up to the given cycle in the current + * scanline. A queue entry completely executed is removed from the queue. + * FIFOWrite pushes writes to the transfer queue. If it's a blocking write, 68k + * is blocked if more than 4 FIFO writes are pending. + * FIFORead executes a 68k read. 68k is blocked until the next transfer slot. + */ + +// FIFO transfer slots per line: H32 blank, H40 blank, H32 active, H40 active +static const short vdpslots[] = { 166, 204, 16, 18 }; +// mapping between slot# and 68k cycles in a blanked scanline +static const int vdpcyc2sl_bl[] = { (166<<16)/488, (204<<16)/488, (16<<16)/488, (18<<16)/488 }; +static const int vdpsl2cyc_bl[] = { (488<<16)/166, (488<<16)/204, (488<<16)/16, (488<<16)/18 }; + +// VDP transfer slots in active display 32col mode. 1 slot is 488/171 = 2.8538 +// 68k cycles. Only 16 of the 171 slots in a scanline can be used by CPU/DMA: +// (HINT=slot 0): 13,27,42,50,58,74,82,90,106,114,122,138,146,154,169,170 +const unsigned char vdpcyc2sl_32[] = { // 68k cycles/4 since HINT to slot # +// 4 8 12 16 20 24 28 32 36 40 44 48 52 56 60 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, + 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, + 9,10,10,10,10,10,10,11,11,11,11,11,11,11,11,11, +11,11,12,12,12,12,12,12,13,13,13,13,13,13,14,14, +14,14,14,14,14,14,14,14,15,16,16,16,16,16,16,16, +}; +const unsigned char vdpsl2cyc_32[] = { // slot # to 68k cycles/4 since HINT + 0, 9, 19, 30, 35, 41, 52, 58, 64, 75, 81, 87, 98,104,110,120,121,123,123 +}; + +// VDP transfer slots in active display 40col mode. 1 slot is 488/210 = 2.3238 +// 68k cycles. Only 18 of the 210 slots in a scanline can be used by CPU/DMA: +// (HINT=0): 23,49,57,65,81,89,97,113,121,129,145,153,161,177,185,193,208,209 +const unsigned char vdpcyc2sl_40[] = { // 68k cycles/4 since HINT to slot # +// 4 8 12 16 20 24 28 32 36 40 44 48 52 56 60 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, + 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, + 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 8, 8, 8, 8, 8, 9, 9, 9, 9,10,10,10,10,10,10, +10,10,10,10,11,11,11,11,12,12,12,12,12,13,13,13, +13,13,13,13,13,13,14,14,14,14,14,15,15,15,15,15, +16,16,16,16,16,16,16,16,17,18,18,18,18,18,18,18, +}; +const unsigned char vdpsl2cyc_40[] = { // slot # to 68k cycles/4 since HINT + 0, 13, 28, 33, 37, 47, 51, 56, 65, 70, 74, 84, 88, 93,102,107,112,120,121,123,123 +}; + +// NB code assumes fifo_* arrays have size 2^n +// last transferred FIFO data, ...x = index XXX currently only CPU +static short fifo_data[4], fifo_dx; +// queued FIFO transfers, ...x = index, ...l = queue length +// each entry has 2 values: [n]>>1=#writes, [n]&1=is VRAM byte access +static int fifo_queue[8], fifo_qx, fifo_ql; + +signed int fifo_cnt; // pending slots for current queue entry +unsigned short fifo_slot; // last executed slot in current scanline +unsigned int fifo_total; // total# of pending FIFO entries + +// sync FIFO to cycles +void PicoVideoFIFOSync(int cycles) +{ + struct PicoVideo *pv = &Pico.video; + int active = !(pv->status & SR_VB) && (pv->reg[1] & 0x40); + int h40 = pv->reg[12] & 1; + const unsigned char *cs = h40 ? vdpcyc2sl_40 : vdpcyc2sl_32; + int slots, done; + + // calculate #slots since last executed slot + if (active) slots = cs[cycles/4]; + else slots = (cycles * vdpcyc2sl_bl[h40] + cycles) >> 16; + slots -= fifo_slot; + + // advance FIFO queue by #done slots + done = slots; + while (done > 0 && fifo_ql) { + int l = done, b = fifo_queue[fifo_qx&7] & 1; + if (l > fifo_cnt) + l = fifo_cnt; + fifo_total -= ((fifo_cnt & b) + l) >> b; + fifo_slot += l; + fifo_cnt -= l; + done -= l; + + if (fifo_cnt == 0) { + fifo_qx ++, fifo_ql --; + fifo_cnt= (fifo_queue[fifo_qx&7] >> 1) << (fifo_queue[fifo_qx&7] & 1); + } + } + + // release CPU and terminate DMA if FIFO isn't blocking the 68k anymore + if (fifo_total <= 4) { + pv->status &= ~PVS_CPUWR; + pv->command &= ~0x80; + if (!(pv->status & PVS_DMAPEND)) + pv->status &= ~(SR_DMA|PVS_DMAFILL); + } + if (fifo_total == 0) + pv->status &= ~PVS_CPURD; +} + +// drain FIFO, blocking 68k on the way. FIFO must be synced prior to drain. +int PicoVideoFIFODrain(int level, int cycles) +{ + struct PicoVideo *pv = &Pico.video; + int active = !(pv->status & SR_VB) && (pv->reg[1] & 0x40); + int h40 = pv->reg[12] & 1; + const unsigned char *sc = h40 ? vdpsl2cyc_40 : vdpsl2cyc_32; + int maxsl = vdpslots[h40 + 2*active]; // max xfer slots in this scanline + int burn = 0; + + while (fifo_total > level && fifo_slot < maxsl) { + int b = fifo_queue[fifo_qx&7] & 1; + int cnt = (fifo_total-level) << b; + int last = fifo_slot; + int slot = (fifo_cnt maxsl) { + // target in later scanline, advance to eol + slot = maxsl; + fifo_slot = maxsl; + cycles = 488; + } else { + // advance FIFO to target slot and CPU to cycles at that slot + fifo_slot = slot; + if (active) cycles = sc[slot]*4; + else cycles = ((slot * vdpsl2cyc_bl[h40] + slot) >> 16); + } + burn += cycles - ocyc; + + slot -= last; + fifo_total -= ((fifo_cnt & b) + slot) >> b; + fifo_cnt -= slot; + + if (fifo_cnt == 0) { + fifo_qx ++, fifo_ql --; + fifo_cnt= (fifo_queue[fifo_qx&7] >> 1) << (fifo_queue[fifo_qx&7] & 1); + } + } + + // release CPU and terminate DMA if FIFO isn't blocking the bus anymore + if (fifo_total <= 4) { + pv->status &= ~PVS_CPUWR; + pv->command &= ~0x80; + if (!(pv->status & PVS_DMAPEND)) + pv->status &= ~(SR_DMA|PVS_DMAFILL); + } + if (fifo_total == 0) + pv->status &= ~PVS_CPURD; + + return burn; +} + +// read VDP data port +int PicoVideoFIFORead(void) +{ + struct PicoVideo *pv = &Pico.video; + int active = !(pv->status & SR_VB) && (pv->reg[1] & 0x40); + int h40 = pv->reg[12] & 1; + const unsigned char *cs = h40 ? vdpcyc2sl_40 : vdpcyc2sl_32; + const unsigned char *sc = h40 ? vdpsl2cyc_40 : vdpsl2cyc_32; + int lc = SekCyclesDone()-Pico.t.m68c_line_start+4; + int burn = 0; + + PicoVideoFIFOSync(lc); + + // advance FIFO and CPU until FIFO is empty + burn = PicoVideoFIFODrain(0, lc); + lc += burn; + if (fifo_total > 0) + pv->status |= PVS_CPURD; // target slot is in later scanline + else { + // use next VDP access slot for reading, block 68k until then + if (active) { + fifo_slot = cs[lc/4] + 1; + burn += sc[fifo_slot]*4; + } else { + fifo_slot = ((lc * vdpcyc2sl_bl[h40] + lc) >> 16) + 1; + burn += ((fifo_slot * vdpsl2cyc_bl[h40] + fifo_slot) >> 16); + } + burn -= lc; + } + + return burn; +} + +// write VDP data port +int PicoVideoFIFOWrite(int count, int byte_p, unsigned sr_mask,unsigned sr_flags) +{ + struct PicoVideo *pv = &Pico.video; + int active = !(pv->status & SR_VB) && (pv->reg[1] & 0x40); + int h40 = pv->reg[12] & 1; + const unsigned char *cs = h40 ? vdpcyc2sl_40 : vdpcyc2sl_32; + int lc = SekCyclesDone()-Pico.t.m68c_line_start+4; + int burn = 0; + + PicoVideoFIFOSync(lc); + pv->status = (pv->status & ~sr_mask) | sr_flags; + + if (count) { + // update FIFO state if it was empty + if (fifo_total == 0 && count) { + if (active) fifo_slot = cs[lc/4]; + else fifo_slot = (lc * vdpcyc2sl_bl[h40] + lc) >> 16; + fifo_cnt = count << byte_p; + } + + // create xfer queue entry + int x = (fifo_qx + fifo_ql) & 7; + fifo_queue[x] = (count << 1) | byte_p; + fifo_ql ++; + fifo_total += count; + } + + // if CPU is waiting for the bus, advance CPU and FIFO until bus is free + if ((pv->status & (PVS_CPUWR|PVS_DMAFILL)) == PVS_CPUWR) + burn = PicoVideoFIFODrain(4, lc); + + return burn; +} + +// at HINT, advance FIFO to new scanline +int PicoVideoFIFOHint(void) +{ + struct PicoVideo *pv = &Pico.video; + int burn = 0; + + // reset slot to start of scanline + fifo_slot = 0; + + // if CPU is waiting for the bus, advance CPU and FIFO until bus is free + if (pv->status & PVS_CPURD) + burn = PicoVideoFIFORead(); + if (pv->status & PVS_CPUWR) + burn = PicoVideoFIFOWrite(0, 0, 0, 0); + + return burn; +} + +// switch FIFO mode between active/inactive display +void PicoVideoFIFOMode(int active) +{ + struct PicoVideo *pv = &Pico.video; + const unsigned char *cs = pv->reg[12]&1 ? vdpcyc2sl_40 : vdpcyc2sl_32; + int h40 = pv->reg[12] & 1; + int lc = SekCyclesDone() - Pico.t.m68c_line_start; + + PicoVideoFIFOSync(lc); + + if (fifo_total) { + // recalculate FIFO slot for new mode + if (!(pv->status & SR_VB) && active) + fifo_slot = cs[lc/4]; + else fifo_slot = ((lc * vdpcyc2sl_bl[h40] + lc) >> 16); + } +} + + +// VDP memory rd/wr + static __inline void AutoIncrement(void) { Pico.video.addr=(unsigned short)(Pico.video.addr+Pico.video.reg[0xf]); @@ -60,15 +347,19 @@ static void VideoWrite(u16 d) static unsigned int VideoRead(void) { - unsigned int a=0,d=0; + unsigned int a, d = fifo_data[(fifo_dx+1)&3]; a=Pico.video.addr; a>>=1; + SekCyclesBurnRun(PicoVideoFIFORead()); switch (Pico.video.type) { case 0: d=PicoMem.vram [a & 0x7fff]; break; - case 8: d=PicoMem.cram [a & 0x003f]; break; - case 4: d=PicoMem.vsram[a & 0x003f]; break; + case 8: d=(PicoMem.cram [a & 0x003f] & 0x0eee) | (d & ~0x0eee); break; + case 4: if ((a & 0x3f) >= 0x28) a = 0; + d=(PicoMem.vsram [a & 0x003f] & 0x07ff) | (d & ~0x07ff); break; + case 12:a=PicoMem.vram [a & 0x7fff]; if (Pico.video.addr&1) a >>= 8; + d=(a & 0x00ff) | (d & ~0x00ff); break; default:elprintf(EL_ANOMALY, "VDP read with bad type %i", Pico.video.type); break; } @@ -76,6 +367,8 @@ static unsigned int VideoRead(void) return d; } +// VDP DMA + static int GetDmaLength(void) { struct PicoVideo *pvid=&Pico.video; @@ -95,13 +388,11 @@ static void DmaSlow(int len, unsigned int source) u32 mask = 0x1ffff; elprintf(EL_VDPDMA, "DmaSlow[%i] %06x->%04x len %i inc=%i blank %i [%u] @ %06x", - Pico.video.type, source, a, len, inc, (Pico.video.status&8)||!(Pico.video.reg[1]&0x40), + Pico.video.type, source, a, len, inc, (Pico.video.status&SR_VB)||!(Pico.video.reg[1]&0x40), SekCyclesDone(), SekPc); - Pico.m.dma_xfers = len; - if (Pico.m.dma_xfers < len) // lame 16bit var - Pico.m.dma_xfers = ~0; - SekCyclesBurnRun(CheckDMA(488 - (SekCyclesDone()-Pico.t.m68c_line_start))); + SekCyclesBurnRun(PicoVideoFIFOWrite(len, Pico.video.type == 1, PVS_DMAPEND, + SR_DMA | PVS_CPUWR) + 8); if ((source & 0xe00000) == 0xe00000) { // Ram base = (u16 *)PicoMem.ram; @@ -224,14 +515,12 @@ static void DmaCopy(int len) int source; elprintf(EL_VDPDMA, "DmaCopy len %i [%u]", len, SekCyclesDone()); - Pico.m.dma_xfers = len; - if (Pico.m.dma_xfers < len) - Pico.m.dma_xfers = ~0; - Pico.video.status |= SR_DMA; + SekCyclesBurnRun(PicoVideoFIFOWrite(len, 1, PVS_CPUWR|PVS_DMAPEND, SR_DMA)); source =Pico.video.reg[0x15]; source|=Pico.video.reg[0x16]<<8; + // XXX implement VRAM 128k? Is this even working? for (; len; len--) { vr[a] = vr[source++ & 0xffff]; @@ -255,10 +544,7 @@ static NOINLINE void DmaFill(int data) len = GetDmaLength(); elprintf(EL_VDPDMA, "DmaFill len %i inc %i [%u]", len, inc, SekCyclesDone()); - Pico.m.dma_xfers = len; - if (Pico.m.dma_xfers < len) // lame 16bit var - Pico.m.dma_xfers = ~0; - Pico.video.status |= SR_DMA; + SekCyclesBurnRun(PicoVideoFIFOWrite(len, Pico.video.type == 1, PVS_CPUWR|PVS_DMAPEND, SR_DMA)); switch (Pico.video.type) { @@ -274,13 +560,24 @@ static NOINLINE void DmaFill(int data) Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; break; case 3: // cram + Pico.m.dirtyPal = 1; + for (l = len; l; l--) { + PicoMem.cram[(a/2) & 0x3f] = data; + + // Increment address register + a += inc; + } + break; case 5: { // vsram - // TODO: needs fifo; anyone using these? - static int once; - if (!once++) - elprintf(EL_STATUS|EL_ANOMALY|EL_VDPDMA, "TODO: cram/vsram fill"); + for (l = len; l; l--) { + PicoMem.vsram[(a/2) & 0x3f] = data; + + // Increment address register + a += inc; + } + break; } - case 0x81: + case 0x81: // vram 128k for (l = len; l; l--) { VideoWrite128(a, data); @@ -307,17 +604,22 @@ static NOINLINE void DmaFill(int data) } +// VDP command handling + static NOINLINE void CommandDma(void) { struct PicoVideo *pvid=&Pico.video; u32 len, method; u32 source; - if ((pvid->reg[1]&0x10)==0) return; // DMA not enabled - - if (Pico.m.dma_xfers) + pvid->status |= PVS_DMAPEND; + PicoVideoFIFOSync(SekCyclesDone()-Pico.t.m68c_line_start); + if (pvid->status & SR_DMA) { elprintf(EL_VDPDMA, "Dma overlap, left=%d @ %06x", - Pico.m.dma_xfers, SekPc); + fifo_total, SekPc); + fifo_total = fifo_ql = 0; + } + pvid->status |= SR_DMA; len = GetDmaLength(); source =Pico.video.reg[0x15]; @@ -329,9 +631,10 @@ static NOINLINE void CommandDma(void) DmaSlow(len, source << 1); // 68000 to VDP else if (method == 3) DmaCopy(len); // VRAM Copy - else + else { + pvid->status |= PVS_DMAFILL; return; - + } source += len; Pico.video.reg[0x13] = Pico.video.reg[0x14] = 0; Pico.video.reg[0x15] = source; @@ -357,13 +660,21 @@ static NOINLINE void CommandChange(void) pvid->addr_u = (u8)((cmd >> 2) & 1); } -static void DrawSync(int blank_on) +// VDP interface + +static void DrawSync(int skip) { int lines = Pico.video.reg[1]&0x08 ? 240 : 224; - if (Pico.m.scanline < lines && !(PicoIn.opt & POPT_ALT_RENDERER) && - !PicoIn.skipFrame && Pico.est.DrawScanline <= Pico.m.scanline) { + int last = Pico.m.scanline - (skip || blankline == Pico.m.scanline); + + if (last < lines && !(PicoIn.opt & POPT_ALT_RENDERER) && + !PicoIn.skipFrame && Pico.est.DrawScanline <= last) { //elprintf(EL_ANOMALY, "sync"); - PicoDrawSync(Pico.m.scanline, blank_on); + if (blankline >= 0 && blankline < last) { + PicoDrawSync(blankline, 1); + blankline = -1; + } + PicoDrawSync(last, 0); } } @@ -390,19 +701,19 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) pvid->pending=0; } - if (!(pvid->status & SR_VB) && (pvid->reg[1]&0x40) && !(PicoIn.opt&POPT_DIS_VDP_FIFO)) + if (!(PicoIn.opt&POPT_DIS_VDP_FIFO)) { - int use = pvid->type == 1 ? 2 : 1; - pvid->lwrite_cnt -= use; - if (pvid->lwrite_cnt < 0) - SekCyclesBurnRun(488 - (SekCyclesDone()-Pico.t.m68c_line_start)); - elprintf(EL_ASVDP, "VDP data write: [%04x] %04x [%u] {%i} #%i @ %06x", - Pico.video.addr, d, SekCyclesDone(), Pico.video.type, pvid->lwrite_cnt, SekPc); + fifo_data[++fifo_dx&3] = d; + SekCyclesBurnRun(PicoVideoFIFOWrite(1, pvid->type == 1, 0, PVS_CPUWR)); + + elprintf(EL_ASVDP, "VDP data write: [%04x] %04x [%u] {%i} @ %06x", + Pico.video.addr, d, SekCyclesDone(), Pico.video.type, SekPc); } VideoWrite(d); - if ((pvid->command&0x80) && (pvid->reg[1]&0x10) && (pvid->reg[0x17]>>6)==2) - DmaFill(d); + // start DMA fill on write. NB VSRAM and CRAM fills use wrong FIFO data. + if ((pvid->status & (PVS_DMAPEND|PVS_DMAFILL)) == (PVS_DMAPEND|PVS_DMAFILL)) + DmaFill(fifo_data[(fifo_dx + !!(pvid->type&~0x81))&3]); break; @@ -410,6 +721,8 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) if (pvid->pending) { // Low word of command: + if (!(pvid->reg[1]&0x10)) + d = (d&~0x80)|(pvid->command&0x80); pvid->command &= 0xffff0000; pvid->command |= d; pvid->pending = 0; @@ -427,16 +740,24 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) // Register write: int num=(d>>8)&0x1f; int dold=pvid->reg[num]; - int blank_on = 0; + int skip=0; pvid->type=0; // register writes clear command (else no Sega logo in Golden Axe II) if (num > 0x0a && !(pvid->reg[1]&4)) { elprintf(EL_ANOMALY, "%02x written to reg %02x in SMS mode @ %06x", d, num, SekPc); return; } - if (num == 1 && !(d&0x40) && SekCyclesDone() - Pico.t.m68c_line_start <= 488-390) - blank_on = 1; - DrawSync(blank_on); + if (num == 0 && !(pvid->reg[0]&2) && (d&2)) + hvlatch = PicoVideoRead(0x08); + if (num == 1 && ((pvid->reg[1]^d)&0x40)) { + PicoVideoFIFOMode(d & 0x40); + // handle line blanking before line rendering + if (SekCyclesDone() - Pico.t.m68c_line_start <= 488-390) { + skip = 1; + blankline = d&0x40 ? -1 : Pico.m.scanline; + } + } + DrawSync(skip); pvid->reg[num]=(unsigned char)d; switch (num) { @@ -519,15 +840,23 @@ update_irq: } } -static u32 SrLow(const struct PicoVideo *pv) +static u32 VideoSr(const struct PicoVideo *pv) { unsigned int c, d = pv->status; + unsigned int hp = pv->reg[12]&1 ? 32:40; // HBLANK start + unsigned int hl = pv->reg[12]&1 ? 94:84; // HBLANK length c = SekCyclesDone(); - if (c - Pico.t.m68c_line_start - 39 < 92) + if (c - Pico.t.m68c_line_start - hp < hl) d |= SR_HB; - if (CYCLES_GT(c, Pico.t.dma_end)) - d &= ~SR_DMA; + + PicoVideoFIFOSync(c-Pico.t.m68c_line_start); + if (pv->status & SR_DMA) + d |= SR_EMPT; // unused by DMA, or rather flags not updated? + else if (fifo_total >= 4) + d |= SR_FULL; + else if (!fifo_total) + d |= SR_EMPT; return d; } @@ -538,8 +867,11 @@ PICO_INTERNAL_ASM unsigned int PicoVideoRead(unsigned int a) if (a == 0x04) // control port { struct PicoVideo *pv = &Pico.video; - unsigned int d = SrLow(pv); - pv->pending = 0; + unsigned int d = VideoSr(pv); + if (pv->pending) { + CommandChange(); + pv->pending = 0; + } elprintf(EL_SR, "SR read: %04x [%u] @ %06x", d, SekCyclesDone(), SekPc); return d; } @@ -564,12 +896,14 @@ PICO_INTERNAL_ASM unsigned int PicoVideoRead(unsigned int a) unsigned int d; d = (SekCyclesDone() - Pico.t.m68c_line_start) & 0x1ff; // FIXME - if (Pico.video.reg[12]&1) - d = hcounts_40[d]; - else d = hcounts_32[d]; + if (Pico.video.reg[0]&2) + d = hvlatch; + else if (Pico.video.reg[12]&1) + d = hcounts_40[d] | (Pico.video.v_counter << 8); + else d = hcounts_32[d] | (Pico.video.v_counter << 8); elprintf(EL_HVCNT, "hv: %02x %02x [%u] @ %06x", d, Pico.video.v_counter, SekCyclesDone(), SekPc); - return d | (Pico.video.v_counter << 8); + return d; } if (a==0x00) // data port @@ -592,16 +926,22 @@ unsigned char PicoVideoRead8DataL(void) unsigned char PicoVideoRead8CtlH(void) { - u8 d = (u8)(Pico.video.status >> 8); - Pico.video.pending = 0; + u8 d = VideoSr(&Pico.video) >> 8; + if (Pico.video.pending) { + CommandChange(); + Pico.video.pending = 0; + } elprintf(EL_SR, "SR read (h): %02x @ %06x", d, SekPc); return d; } unsigned char PicoVideoRead8CtlL(void) { - u8 d = SrLow(&Pico.video); - Pico.video.pending = 0; + u8 d = VideoSr(&Pico.video); + if (Pico.video.pending) { + CommandChange(); + Pico.video.pending = 0; + } elprintf(EL_SR, "SR read (l): %02x @ %06x", d, SekPc); return d; } From f36709e65142736031062d553e07c91fc0601ec1 Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 8 Feb 2020 13:29:32 +0100 Subject: [PATCH 0263/1110] sh2 drc: fix for crash in generated code on x86_64 --- cpu/drc/emit_x86.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index ec13551e..80ec0444 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -1007,7 +1007,8 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common #define host_instructions_updated(base, end) (void)(base),(void)(end) #define emith_update_cache() /**/ -#define emith_rw_offs_max() 0xffffffff +// NB this MUST be <0x40000000 to avoid overflow in address calculations +#define emith_rw_offs_max() 0xfffffff // for better perfomance: <0x10000000 #ifdef __x86_64__ From 987f07974913f48ec019274ffb1f7ebca76cc8ff Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 8 Feb 2020 15:20:05 +0100 Subject: [PATCH 0264/1110] vdp fifo: kludge for DMA fill interrupted by CPU --- pico/videoport.c | 109 +++++++++++++++++++++++++---------------------- 1 file changed, 58 insertions(+), 51 deletions(-) diff --git a/pico/videoport.c b/pico/videoport.c index 881a74a3..0a6a103f 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -26,7 +26,7 @@ int (*PicoDmaHook)(unsigned int source, int len, unsigned short **base, unsigned * fifo_cnt: #slots remaining for active FIFO write (#writes<<#bytep) * fifo_total: #total FIFO entries pending * fifo_data: last values transferred through fifo - * fifo_queue: fifo transfer queue (#writes, VRAM_byte_p) + * fifo_queue: fifo transfer queue (#writes, flags) * * FIFO states: empty total=0 * inuse total>0 && total<4 @@ -95,42 +95,66 @@ const unsigned char vdpsl2cyc_40[] = { // slot # to 68k cycles/4 since HINT // last transferred FIFO data, ...x = index XXX currently only CPU static short fifo_data[4], fifo_dx; // queued FIFO transfers, ...x = index, ...l = queue length -// each entry has 2 values: [n]>>1=#writes, [n]&1=is VRAM byte access +// each entry has 2 values: [n]>>2=#writes, [n]&3=flags:2=DMA fill 1=byte access static int fifo_queue[8], fifo_qx, fifo_ql; signed int fifo_cnt; // pending slots for current queue entry unsigned short fifo_slot; // last executed slot in current scanline unsigned int fifo_total; // total# of pending FIFO entries +// do the FIFO math +static __inline int AdvanceFIFOEntry(int slots) +{ + int l = slots, b = fifo_queue[fifo_qx&7] & 1; + + if (l > fifo_cnt) + l = fifo_cnt; + fifo_total -= ((fifo_cnt & b) + l) >> b; + fifo_cnt -= l; + + if (fifo_cnt == 0) { + fifo_qx ++, fifo_ql --; + fifo_cnt= (fifo_queue[fifo_qx&7] >> 2) << (fifo_queue[fifo_qx&7] & 1); + } + return l; +} + +static __inline int GetFIFOSlot(struct PicoVideo *pv, int cycles) +{ + int active = !(pv->status & SR_VB) && (pv->reg[1] & 0x40); + int h40 = pv->reg[12] & 1; + const unsigned char *cs = h40 ? vdpcyc2sl_40 : vdpcyc2sl_32; + + if (active) return cs[cycles/4]; + else return (cycles * vdpcyc2sl_bl[h40] + cycles) >> 16; +} + +static inline int GetFIFOCycles(struct PicoVideo *pv, int slot) +{ + int active = !(pv->status & SR_VB) && (pv->reg[1] & 0x40); + int h40 = pv->reg[12] & 1; + const unsigned char *sc = h40 ? vdpsl2cyc_40 : vdpsl2cyc_32; + + if (active) return sc[slot]*4; + else return ((slot * vdpsl2cyc_bl[h40] + slot) >> 16); +} + // sync FIFO to cycles void PicoVideoFIFOSync(int cycles) { struct PicoVideo *pv = &Pico.video; - int active = !(pv->status & SR_VB) && (pv->reg[1] & 0x40); - int h40 = pv->reg[12] & 1; - const unsigned char *cs = h40 ? vdpcyc2sl_40 : vdpcyc2sl_32; int slots, done; // calculate #slots since last executed slot - if (active) slots = cs[cycles/4]; - else slots = (cycles * vdpcyc2sl_bl[h40] + cycles) >> 16; + slots = GetFIFOSlot(pv, cycles); slots -= fifo_slot; // advance FIFO queue by #done slots done = slots; while (done > 0 && fifo_ql) { - int l = done, b = fifo_queue[fifo_qx&7] & 1; - if (l > fifo_cnt) - l = fifo_cnt; - fifo_total -= ((fifo_cnt & b) + l) >> b; + int l = AdvanceFIFOEntry(done); fifo_slot += l; - fifo_cnt -= l; done -= l; - - if (fifo_cnt == 0) { - fifo_qx ++, fifo_ql --; - fifo_cnt= (fifo_queue[fifo_qx&7] >> 1) << (fifo_queue[fifo_qx&7] & 1); - } } // release CPU and terminate DMA if FIFO isn't blocking the 68k anymore @@ -150,7 +174,6 @@ int PicoVideoFIFODrain(int level, int cycles) struct PicoVideo *pv = &Pico.video; int active = !(pv->status & SR_VB) && (pv->reg[1] & 0x40); int h40 = pv->reg[12] & 1; - const unsigned char *sc = h40 ? vdpsl2cyc_40 : vdpsl2cyc_32; int maxsl = vdpslots[h40 + 2*active]; // max xfer slots in this scanline int burn = 0; @@ -169,19 +192,11 @@ int PicoVideoFIFODrain(int level, int cycles) } else { // advance FIFO to target slot and CPU to cycles at that slot fifo_slot = slot; - if (active) cycles = sc[slot]*4; - else cycles = ((slot * vdpsl2cyc_bl[h40] + slot) >> 16); + cycles = GetFIFOCycles(pv, slot); } burn += cycles - ocyc; - slot -= last; - fifo_total -= ((fifo_cnt & b) + slot) >> b; - fifo_cnt -= slot; - - if (fifo_cnt == 0) { - fifo_qx ++, fifo_ql --; - fifo_cnt= (fifo_queue[fifo_qx&7] >> 1) << (fifo_queue[fifo_qx&7] & 1); - } + AdvanceFIFOEntry(slot - last); } // release CPU and terminate DMA if FIFO isn't blocking the bus anymore @@ -201,10 +216,6 @@ int PicoVideoFIFODrain(int level, int cycles) int PicoVideoFIFORead(void) { struct PicoVideo *pv = &Pico.video; - int active = !(pv->status & SR_VB) && (pv->reg[1] & 0x40); - int h40 = pv->reg[12] & 1; - const unsigned char *cs = h40 ? vdpcyc2sl_40 : vdpcyc2sl_32; - const unsigned char *sc = h40 ? vdpsl2cyc_40 : vdpsl2cyc_32; int lc = SekCyclesDone()-Pico.t.m68c_line_start+4; int burn = 0; @@ -217,43 +228,33 @@ int PicoVideoFIFORead(void) pv->status |= PVS_CPURD; // target slot is in later scanline else { // use next VDP access slot for reading, block 68k until then - if (active) { - fifo_slot = cs[lc/4] + 1; - burn += sc[fifo_slot]*4; - } else { - fifo_slot = ((lc * vdpcyc2sl_bl[h40] + lc) >> 16) + 1; - burn += ((fifo_slot * vdpsl2cyc_bl[h40] + fifo_slot) >> 16); - } - burn -= lc; + fifo_slot = GetFIFOSlot(pv, lc) + 1; + burn += GetFIFOCycles(pv, fifo_slot) - lc; } return burn; } // write VDP data port -int PicoVideoFIFOWrite(int count, int byte_p, unsigned sr_mask,unsigned sr_flags) +int PicoVideoFIFOWrite(int count, int flags, unsigned sr_mask,unsigned sr_flags) { struct PicoVideo *pv = &Pico.video; - int active = !(pv->status & SR_VB) && (pv->reg[1] & 0x40); - int h40 = pv->reg[12] & 1; - const unsigned char *cs = h40 ? vdpcyc2sl_40 : vdpcyc2sl_32; int lc = SekCyclesDone()-Pico.t.m68c_line_start+4; int burn = 0; PicoVideoFIFOSync(lc); pv->status = (pv->status & ~sr_mask) | sr_flags; - if (count) { + if (count && fifo_ql < 8) { // update FIFO state if it was empty if (fifo_total == 0 && count) { - if (active) fifo_slot = cs[lc/4]; - else fifo_slot = (lc * vdpcyc2sl_bl[h40] + lc) >> 16; - fifo_cnt = count << byte_p; + fifo_slot = GetFIFOSlot(pv, lc); + fifo_cnt = count << (flags&1); } // create xfer queue entry int x = (fifo_qx + fifo_ql) & 7; - fifo_queue[x] = (count << 1) | byte_p; + fifo_queue[x] = (count << 2) | flags; fifo_ql ++; fifo_total += count; } @@ -261,6 +262,11 @@ int PicoVideoFIFOWrite(int count, int byte_p, unsigned sr_mask,unsigned sr_flags // if CPU is waiting for the bus, advance CPU and FIFO until bus is free if ((pv->status & (PVS_CPUWR|PVS_DMAFILL)) == PVS_CPUWR) burn = PicoVideoFIFODrain(4, lc); + else if (fifo_queue[fifo_qx&7]&2) { + // if interrupting a DMA fill terminate it + AdvanceFIFOEntry(fifo_cnt); + pv->status &= ~PVS_DMAFILL; + } return burn; } @@ -515,7 +521,7 @@ static void DmaCopy(int len) int source; elprintf(EL_VDPDMA, "DmaCopy len %i [%u]", len, SekCyclesDone()); - SekCyclesBurnRun(PicoVideoFIFOWrite(len, 1, PVS_CPUWR|PVS_DMAPEND, SR_DMA)); + SekCyclesBurnRun(PicoVideoFIFOWrite(len, 1, PVS_CPUWR | PVS_DMAPEND, SR_DMA)); source =Pico.video.reg[0x15]; source|=Pico.video.reg[0x16]<<8; @@ -544,7 +550,8 @@ static NOINLINE void DmaFill(int data) len = GetDmaLength(); elprintf(EL_VDPDMA, "DmaFill len %i inc %i [%u]", len, inc, SekCyclesDone()); - SekCyclesBurnRun(PicoVideoFIFOWrite(len, Pico.video.type == 1, PVS_CPUWR|PVS_DMAPEND, SR_DMA)); + SekCyclesBurnRun(PicoVideoFIFOWrite(len, 2|(Pico.video.type == 1), + PVS_CPUWR | PVS_DMAPEND, SR_DMA)); switch (Pico.video.type) { From c64370328ced70bab4e4e59d3f04eaad123d646c Mon Sep 17 00:00:00 2001 From: kub Date: Sun, 16 Feb 2020 08:32:29 +0100 Subject: [PATCH 0265/1110] fix compatibility with ancient gas --- README.md | 5 +---- cpu/cyclone | 2 +- cyclone_gp2x.patch | 41 ----------------------------------------- 3 files changed, 2 insertions(+), 46 deletions(-) delete mode 100644 cyclone_gp2x.patch diff --git a/README.md b/README.md index 67f60c2c..a5d0ad3a 100644 --- a/README.md +++ b/README.md @@ -35,10 +35,7 @@ opendingux|opendingux|CROSS_COMPILE=mipsel-linux- CFLAGS="-I$TC/usr/include -I$T opendingux|opendingux with ubuntu mips gcc 5.4|CROSS_COMPILE=mipsel-linux-gnu- CFLAGS="-I$TC/usr/include -I$TC/usr/include/SDL" LDFLAGS="-B$TC/usr/lib -B$TC/lib -Wl,-rpath-link=$TC/usr/lib -Wl,-rpath-link=$TC/lib" ./configure --platform=opendingux gcw0|gcw0|CROSS_COMPILE=mipsel-gcw0-linux-uclibc- CFLAGS="-I$TC/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include -I$TC/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include/SDL" LDFLAGS="--sysroot $TC/usr/mipsel-gcw0-linux-uclibc/sysroot" ./configure --platform=gcw0 -For gp2x, wiz, and caanoo you may need to compile libpng first, and additionally -cyclone_gp2x.patch may need to be applied to the cpu/cyclone submodule: - -> patch -d cpu/cyclone -p1 Date: Sun, 16 Feb 2020 08:42:45 +0100 Subject: [PATCH 0266/1110] 32X poll detection fix --- pico/32x/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pico/32x/memory.c b/pico/32x/memory.c index 30d9b577..f772d28d 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -239,7 +239,7 @@ static NOINLINE void sh2_poll_write(u32 a, u32 d, unsigned int cycles, SH2 *sh2) // NB this can take an eternity on m68k: mov.b , needs // 28 m68k-cycles (~80 sh2-cycles) to complete (observed in Metal Head) q = &fifo[(sh2_poll_wr[hix]-1) % PFIFO_SZ]; - if (rd != wr && q->a == a && !CYCLES_GT(cycles,q->cycles+30)) { + if (cpu < 0 && rd != wr && q->a == a && !CYCLES_GT(cycles,q->cycles+30)) { q->d = d; } else { // store write to poll address in fifo From 29d99d6bb852118b2d688705eb427e8e9adb797f Mon Sep 17 00:00:00 2001 From: kub Date: Sun, 16 Feb 2020 13:48:51 +0100 Subject: [PATCH 0267/1110] vdp rendering fixes --- pico/draw.c | 20 ++++++++++++-------- pico/draw2.c | 2 +- pico/draw_arm.S | 15 ++++++++++----- 3 files changed, 23 insertions(+), 14 deletions(-) diff --git a/pico/draw.c b/pico/draw.c index 01153f3e..49d41521 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -339,12 +339,13 @@ static void DrawStripVSRam(struct TileStrip *ts, int plane_sh, int cellskip) oldcode = code; // Get tile address/2: addr=(code&0x7ff)<<4; - if (code&0x1000) addr+=14-ty; else addr+=ty; // Y-flip pal=((code>>9)&0x30)|((plane_sh<<5)&0x40); } - pack = *(unsigned int *)(PicoMem.vram + addr); + if (code & 0x1000) ty ^= 0xe; // Y-flip + pack = *(unsigned int *)(PicoMem.vram + addr+ty); + if (!pack) { blank = code; continue; @@ -394,7 +395,7 @@ void DrawStripInterlace(struct TileStrip *ts) if (code!=oldcode) { oldcode = code; // Get tile address/2: - addr=(code&0x7ff)<<5; + addr=(code&0x3ff)<<5; if (code&0x1000) addr+=30-ty; else addr+=ty; // Y-flip // pal=Pico.cram+((code>>9)&0x30); @@ -449,8 +450,11 @@ static void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells, else ts.nametab=(pvid->reg[2]&0x38)<< 9; // A htab=pvid->reg[13]<<9; // Horizontal scroll table address - if ( pvid->reg[11]&2) htab+=est->DrawScanline<<1; // Offset by line - if ((pvid->reg[11]&1)==0) htab&=~0xf; // Offset by tile + switch (pvid->reg[11]&3) { + case 1: htab += (est->DrawScanline<<1) & 0x0f; break; + case 2: htab += (est->DrawScanline<<1) & ~0x0f; break; // Offset by tile + case 3: htab += (est->DrawScanline<<1); break; // Offset by line + } htab+=plane_sh&1; // A or B // Get horizontal scroll value, will be masked later @@ -626,9 +630,9 @@ static void DrawTilesFromCache(int *hc, int sh, int rlim, struct PicoEState *est if (!sh) { - short blank=-1; // The tile we know is blank + int blank=-1; // The tile we know is blank while ((code=*hc++)) { - if (!(code & 0x8000) || (short)code == blank) + if (!(code & 0x8000) || (unsigned short)code == blank) continue; // Get tile address/2: addr = (code & 0x7ff) << 4; @@ -636,7 +640,7 @@ static void DrawTilesFromCache(int *hc, int sh, int rlim, struct PicoEState *est pack = *(unsigned int *)(PicoMem.vram + addr); if (!pack) { - blank = (short)code; + blank = (unsigned short)code; continue; } diff --git a/pico/draw2.c b/pico/draw2.c index 85e2b275..91069770 100644 --- a/pico/draw2.c +++ b/pico/draw2.c @@ -20,7 +20,7 @@ #define LINE_WIDTH 328 #endif -static unsigned char PicoDraw2FB_[(8+320) * (8+240+8)]; +static unsigned char PicoDraw2FB_[(8+320) * (8+240+8) + 8]; static int HighCache2A[41*(TILE_ROWS+1)+1+1]; // caches for high layers static int HighCache2B[41*(TILE_ROWS+1)+1+1]; diff --git a/pico/draw_arm.S b/pico/draw_arm.S index fb6d0950..967bf6aa 100644 --- a/pico/draw_arm.S +++ b/pico/draw_arm.S @@ -342,11 +342,15 @@ DrawLayer: mov r4, r8, lsr #8 @ pvid->reg[13] mov r4, r4, lsl #10 @ htab=pvid->reg[13]<<9; (halfwords) - tst r7, #2 - addne r4, r4, r2, lsl #2 @ htab+=DrawScanline<<1; // Offset by line - tst r7, #1 - biceq r4, r4, #0x1f @ htab&=~0xf; // Offset by tile - add r4, r4, r0, lsl #1 @ htab+=plane + + ands r3, r7, #0x03 + beq 0f + cmp r3, #2 + mov r3, r2, lsl #2 @ htab+=DrawScanline<<1; // Offset by line + biceq r3, #0x1f @ htab&=~0xf; // Offset by tile + andlt r3, #0x1f + add r4, r4, r3 +0: add r4, r4, r0, lsl #1 @ htab+=plane bic r4, r4, #0x00ff0000 @ just in case ldrh r3, [lr, r4] @ r3=hscroll @@ -599,6 +603,7 @@ DrawLayer: tst r7, #0x8000 bne .DrawStrip_vs_hiprio + orr r7, r7, r10, lsl #24 @ code | (ty << 24) cmp r7, r9 beq .DrawStrip_vs_samecode @ we know stuff about this tile already From daf29df963bf002280ff12e02f863740b1912a28 Mon Sep 17 00:00:00 2001 From: kub Date: Sun, 16 Feb 2020 14:08:48 +0100 Subject: [PATCH 0268/1110] vdp fifo, tentative fix for broken save/load --- pico/pico_int.h | 9 ++++- pico/state.c | 8 +++- pico/videoport.c | 100 ++++++++++++++++++++++++++++++----------------- 3 files changed, 78 insertions(+), 39 deletions(-) diff --git a/pico/pico_int.h b/pico/pico_int.h index b3ce8a72..70329224 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -316,7 +316,10 @@ struct PicoVideo unsigned char debug_p; // ... parsed: PVD_* unsigned char addr_u; // bit16 of .addr unsigned char hint_cnt; - unsigned char pad[0x0b]; + unsigned char pad2; + unsigned short hv_latch; // latched hvcounter value + signed int fifo_cnt; // pending xfers for current FIFO queue entry + unsigned char pad[0x04]; }; struct PicoMisc @@ -339,7 +342,7 @@ struct PicoMisc unsigned char eeprom_slave; // EEPROM slave word for X24C02 and better SRAMs unsigned char eeprom_status; unsigned char pad1; // was ym2612 status - unsigned short pad2; // 18 was dma_xfers + unsigned short dma_xfers; // 18 unused (was VDP DMA transfer count) unsigned char eeprom_wb[2]; // EEPROM latch/write buffer unsigned int frame_count; // 1c for movies and idle det }; @@ -856,6 +859,8 @@ extern int (*PicoDmaHook)(unsigned int source, int len, unsigned short **base, u void PicoVideoFIFOSync(int cycles); int PicoVideoFIFOHint(void); int PicoVideoFIFOWrite(int count, int byte_p, unsigned sr_mask, unsigned sr_flags); +void PicoVideoSave(void); +void PicoVideoLoad(void); // misc.c PICO_INTERNAL_ASM void memcpy16bswap(unsigned short *dest, void *src, int count); diff --git a/pico/state.c b/pico/state.c index 5092ddcb..6047adbd 100644 --- a/pico/state.c +++ b/pico/state.c @@ -250,6 +250,8 @@ static int state_save(void *file) CHECKED_WRITE_BUFF(CHUNK_ZRAM, PicoMem.zram); CHECKED_WRITE_BUFF(CHUNK_CRAM, PicoMem.cram); CHECKED_WRITE_BUFF(CHUNK_MISC, Pico.m); + + PicoVideoSave(); CHECKED_WRITE_BUFF(CHUNK_VIDEO, Pico.video); z80_pack(buff_z80); @@ -433,7 +435,11 @@ static int state_load(void *file) case CHUNK_CRAM: CHECKED_READ_BUFF(PicoMem.cram); break; case CHUNK_VSRAM: CHECKED_READ_BUFF(PicoMem.vsram); break; case CHUNK_MISC: CHECKED_READ_BUFF(Pico.m); break; - case CHUNK_VIDEO: CHECKED_READ_BUFF(Pico.video); break; + case CHUNK_VIDEO: + CHECKED_READ_BUFF(Pico.video); + PicoVideoLoad(); + break; + case CHUNK_IOPORTS: CHECKED_READ_BUFF(PicoMem.ioports); break; case CHUNK_PSG: CHECKED_READ2(28*4, sn76496_regs); break; case CHUNK_FM: diff --git a/pico/videoport.c b/pico/videoport.c index 0a6a103f..f64ac693 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -14,7 +14,6 @@ extern const unsigned char hcounts_32[]; extern const unsigned char hcounts_40[]; -static unsigned hvlatch; // latched hvcounter value static int blankline; // display disabled for this line int (*PicoDmaHook)(unsigned int source, int len, unsigned short **base, unsigned int *mask) = NULL; @@ -70,7 +69,7 @@ const unsigned char vdpcyc2sl_32[] = { // 68k cycles/4 since HINT to slot # 14,14,14,14,14,14,14,14,15,16,16,16,16,16,16,16, }; const unsigned char vdpsl2cyc_32[] = { // slot # to 68k cycles/4 since HINT - 0, 9, 19, 30, 35, 41, 52, 58, 64, 75, 81, 87, 98,104,110,120,121,123,123 + 0, 9, 19, 30, 35, 41, 52, 58, 64, 75, 81, 87, 98,104,110,120,121,123 }; // VDP transfer slots in active display 40col mode. 1 slot is 488/210 = 2.3238 @@ -88,33 +87,37 @@ const unsigned char vdpcyc2sl_40[] = { // 68k cycles/4 since HINT to slot # 16,16,16,16,16,16,16,16,17,18,18,18,18,18,18,18, }; const unsigned char vdpsl2cyc_40[] = { // slot # to 68k cycles/4 since HINT - 0, 13, 28, 33, 37, 47, 51, 56, 65, 70, 74, 84, 88, 93,102,107,112,120,121,123,123 + 0, 13, 28, 33, 37, 47, 51, 56, 65, 70, 74, 84, 88, 93,102,107,112,120,121,123 }; // NB code assumes fifo_* arrays have size 2^n // last transferred FIFO data, ...x = index XXX currently only CPU -static short fifo_data[4], fifo_dx; +static short fifo_data[4], fifo_dx; // XXX must go into save? + // queued FIFO transfers, ...x = index, ...l = queue length // each entry has 2 values: [n]>>2=#writes, [n]&3=flags:2=DMA fill 1=byte access -static int fifo_queue[8], fifo_qx, fifo_ql; - -signed int fifo_cnt; // pending slots for current queue entry -unsigned short fifo_slot; // last executed slot in current scanline +static int fifo_queue[8], fifo_qx, fifo_ql; // XXX must go into save? unsigned int fifo_total; // total# of pending FIFO entries +unsigned short fifo_slot; // last executed slot in current scanline + // do the FIFO math -static __inline int AdvanceFIFOEntry(int slots) +static __inline int AdvanceFIFOEntry(struct PicoVideo *pv, int slots) { int l = slots, b = fifo_queue[fifo_qx&7] & 1; - if (l > fifo_cnt) - l = fifo_cnt; - fifo_total -= ((fifo_cnt & b) + l) >> b; - fifo_cnt -= l; + if (l > pv->fifo_cnt) + l = pv->fifo_cnt; + fifo_total -= ((pv->fifo_cnt & b) + l) >> b; + pv->fifo_cnt -= l; - if (fifo_cnt == 0) { - fifo_qx ++, fifo_ql --; - fifo_cnt= (fifo_queue[fifo_qx&7] >> 2) << (fifo_queue[fifo_qx&7] & 1); + if (pv->fifo_cnt == 0) { + if (fifo_ql) + fifo_qx ++, fifo_ql --; + if (fifo_ql) + pv->fifo_cnt= (fifo_queue[fifo_qx&7] >> 2) << (fifo_queue[fifo_qx&7] & 1); + else + fifo_total = 0; } return l; } @@ -129,7 +132,7 @@ static __inline int GetFIFOSlot(struct PicoVideo *pv, int cycles) else return (cycles * vdpcyc2sl_bl[h40] + cycles) >> 16; } -static inline int GetFIFOCycles(struct PicoVideo *pv, int slot) +static __inline int GetFIFOCycles(struct PicoVideo *pv, int slot) { int active = !(pv->status & SR_VB) && (pv->reg[1] & 0x40); int h40 = pv->reg[12] & 1; @@ -146,13 +149,12 @@ void PicoVideoFIFOSync(int cycles) int slots, done; // calculate #slots since last executed slot - slots = GetFIFOSlot(pv, cycles); - slots -= fifo_slot; + slots = GetFIFOSlot(pv, cycles) - fifo_slot; // advance FIFO queue by #done slots done = slots; - while (done > 0 && fifo_ql) { - int l = AdvanceFIFOEntry(done); + while (done > 0 && pv->fifo_cnt) { + int l = AdvanceFIFOEntry(pv, done); fifo_slot += l; done -= l; } @@ -181,7 +183,7 @@ int PicoVideoFIFODrain(int level, int cycles) int b = fifo_queue[fifo_qx&7] & 1; int cnt = (fifo_total-level) << b; int last = fifo_slot; - int slot = (fifo_cntfifo_cntfifo_cnt:cnt) + last; // target slot unsigned ocyc = cycles; if (slot > maxsl) { @@ -196,7 +198,7 @@ int PicoVideoFIFODrain(int level, int cycles) } burn += cycles - ocyc; - AdvanceFIFOEntry(slot - last); + AdvanceFIFOEntry(pv, slot - last); } // release CPU and terminate DMA if FIFO isn't blocking the bus anymore @@ -249,7 +251,7 @@ int PicoVideoFIFOWrite(int count, int flags, unsigned sr_mask,unsigned sr_flags) // update FIFO state if it was empty if (fifo_total == 0 && count) { fifo_slot = GetFIFOSlot(pv, lc); - fifo_cnt = count << (flags&1); + pv->fifo_cnt = count << (flags&1); } // create xfer queue entry @@ -263,8 +265,8 @@ int PicoVideoFIFOWrite(int count, int flags, unsigned sr_mask,unsigned sr_flags) if ((pv->status & (PVS_CPUWR|PVS_DMAFILL)) == PVS_CPUWR) burn = PicoVideoFIFODrain(4, lc); else if (fifo_queue[fifo_qx&7]&2) { - // if interrupting a DMA fill terminate it - AdvanceFIFOEntry(fifo_cnt); + // if interrupting a DMA fill terminate it XXX wrong, changes fill data + AdvanceFIFOEntry(pv, pv->fifo_cnt); pv->status &= ~PVS_DMAFILL; } @@ -699,9 +701,9 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) // try avoiding the sync.. if (Pico.m.scanline < (pvid->reg[1]&0x08 ? 240 : 224) && (pvid->reg[1]&0x40) && !(!pvid->pending && - ((pvid->command & 0xc00000f0) == 0x40000010 && PicoMem.vsram[pvid->addr>>1] == d)) + ((pvid->command & 0xc00000f0) == 0x40000010 && PicoMem.vsram[pvid->addr>>1] == (d & 0x7ff))) ) - DrawSync(0); + DrawSync(SekCyclesDone() - Pico.t.m68c_line_start <= 488-440); if (pvid->pending) { CommandChange(); @@ -736,7 +738,7 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) CommandChange(); // Check for dma: if (d & 0x80) { - DrawSync(0); + DrawSync(SekCyclesDone() - Pico.t.m68c_line_start <= 488-390); CommandDma(); } } @@ -747,7 +749,6 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) // Register write: int num=(d>>8)&0x1f; int dold=pvid->reg[num]; - int skip=0; pvid->type=0; // register writes clear command (else no Sega logo in Golden Axe II) if (num > 0x0a && !(pvid->reg[1]&4)) { elprintf(EL_ANOMALY, "%02x written to reg %02x in SMS mode @ %06x", d, num, SekPc); @@ -755,16 +756,14 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) } if (num == 0 && !(pvid->reg[0]&2) && (d&2)) - hvlatch = PicoVideoRead(0x08); + pvid->hv_latch = PicoVideoRead(0x08); if (num == 1 && ((pvid->reg[1]^d)&0x40)) { PicoVideoFIFOMode(d & 0x40); // handle line blanking before line rendering - if (SekCyclesDone() - Pico.t.m68c_line_start <= 488-390) { - skip = 1; + if (SekCyclesDone() - Pico.t.m68c_line_start <= 488-390) blankline = d&0x40 ? -1 : Pico.m.scanline; - } } - DrawSync(skip); + DrawSync(SekCyclesDone() - Pico.t.m68c_line_start <= 488-390); pvid->reg[num]=(unsigned char)d; switch (num) { @@ -904,7 +903,7 @@ PICO_INTERNAL_ASM unsigned int PicoVideoRead(unsigned int a) d = (SekCyclesDone() - Pico.t.m68c_line_start) & 0x1ff; // FIXME if (Pico.video.reg[0]&2) - d = hvlatch; + d = Pico.video.hv_latch; else if (Pico.video.reg[12]&1) d = hcounts_40[d] | (Pico.video.v_counter << 8); else d = hcounts_32[d] | (Pico.video.v_counter << 8); @@ -970,4 +969,33 @@ unsigned char PicoVideoRead8HV_L(void) return d; } +void PicoVideoSave(void) +{ + struct PicoVideo *pv = &Pico.video; + int l, x; + + // account for all outstanding xfers XXX kludge, entry attr's not saved + for (l = fifo_ql, x = fifo_qx + l-1; l > 1; l--, x--) + pv->fifo_cnt += (fifo_queue[x&7] >> 2) << (fifo_queue[x&7] & 1); +} + +void PicoVideoLoad(void) +{ + struct PicoVideo *pv = &Pico.video; + int l; + + // convert former dma_xfers (why was this in PicoMisc anyway?) + if (Pico.m.dma_xfers) { + pv->fifo_cnt = Pico.m.dma_xfers * (pv->type == 1 ? 2 : 1); + fifo_total = Pico.m.dma_xfers; + Pico.m.dma_xfers = 0; + } + + // rebuild SAT cache XXX wrong since cache and memory can differ + for (l = 0; l < 80; l++) { + *((u16 *)VdpSATCache + 2*l ) = PicoMem.vram[(sat>>1) + l*4 ]; + *((u16 *)VdpSATCache + 2*l+1) = PicoMem.vram[(sat>>1) + l*4 + 1]; + } +} + // vim:shiftwidth=2:ts=2:expandtab From 25be5c52b070b7dabc3d24c3b01eae26d5008bea Mon Sep 17 00:00:00 2001 From: kub Date: Sun, 16 Feb 2020 13:53:50 +0100 Subject: [PATCH 0269/1110] vdp sprite handling improvement (SAT cache) --- pico/draw.c | 228 +++++++++++++++++------------------------------ pico/pico_int.h | 1 + pico/videoport.c | 137 ++++++++++++++++++---------- 3 files changed, 173 insertions(+), 193 deletions(-) diff --git a/pico/draw.c b/pico/draw.c index 49d41521..da87ede2 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -45,6 +45,8 @@ static int HighCacheA[41+1]; // caches for high layers static int HighCacheB[41+1]; static int HighPreSpr[80*2+1]; // slightly preprocessed sprites +unsigned int VdpSATCache[128]; // VDP sprite cache (1st 32 sprite attr bits) + #define LF_PLANE_1 (1 << 0) #define LF_SH (1 << 1) // must be = 2 #define LF_FORCE (1 << 2) @@ -1124,14 +1126,14 @@ static void DrawSpritesForced(unsigned char *sprited) // Index + 0 : hhhhvvvv ----hhvv yyyyyyyy yyyyyyyy // v, h: vert./horiz. size // Index + 4 : xxxxxxxx xxxxxxxx pccvhnnn nnnnnnnn // x: x coord + 8 -static NOINLINE void PrepareSprites(int full) +static NOINLINE void PrepareSprites(int max_lines) { const struct PicoVideo *pvid=&Pico.video; const struct PicoEState *est=&Pico.est; int u,link=0,sh; int table=0; int *pd = HighPreSpr; - int max_lines = 224, max_sprites = 80, max_width = 328; + int max_sprites = 80, max_width = 328; int max_line_sprites = 20; // 20 sprites, 40 tiles if (!(Pico.video.reg[12]&1)) @@ -1139,160 +1141,101 @@ static NOINLINE void PrepareSprites(int full) if (PicoIn.opt & POPT_DIS_SPRITE_LIM) max_line_sprites = MAX_LINE_SPRITES; - if (pvid->reg[1]&8) max_lines = 240; sh = Pico.video.reg[0xC]&8; // shadow/hilight? table=pvid->reg[5]&0x7f; if (pvid->reg[12]&1) table&=0x7e; // Lowest bit 0 in 40-cell mode table<<=8; // Get sprite table address/2 - if (!full) + for (u = est->DrawScanline; u < max_lines; u++) + *((int *)&HighLnSpr[u][0]) = 0; + + for (u = 0; u < max_sprites && link < max_sprites; u++) { - int pack; - // updates: tilecode, sx - for (u=0; u < max_sprites && link < max_sprites && (pack = *pd); u++, pd+=2) + unsigned int *sprite; + int code, code2, sx, sy, hv, height, width; + + sprite=(unsigned int *)(PicoMem.vram+((table+(link<<2))&0x7ffc)); // Find sprite + + // parse sprite info. the 1st half comes from the VDPs internal cache, + // the 2nd half is read from VRAM + code = VdpSATCache[link]; // normally but not always equal to sprite[0] + sy = (code&0x1ff)-0x80; + hv = (code>>24)&0xf; + height = (hv&3)+1; + width = (hv>>2)+1; + + code2 = sprite[1]; + sx = (code2>>16)&0x1ff; + sx -= 0x78; // Get X coordinate + 8 + + if (sy < max_lines && sy + (height<<3) >= est->DrawScanline) // sprite onscreen (y)? { - unsigned int *sprite; - int code2, sx, sy, height, width; + int entry, y, w, sx_min, onscr_x, maybe_op = 0; - sprite=(unsigned int *)(PicoMem.vram+((table+(link<<2))&0x7ffc)); // Find sprite + sx_min = 8-(width<<3); + onscr_x = sx_min < sx && sx < max_width; + if (sh && (code2 & 0x6000) == 0x6000) + maybe_op = SPRL_MAY_HAVE_OP; - // parse sprite info - code2 = sprite[1]; - sx = (code2>>16)&0x1ff; - sx -= 0x78; // Get X coordinate + 8 - sy = (pack << 16) >> 16; - height = (pack >> 24) & 0xf; - width = (pack >> 28); - - if (sy < max_lines && - sy + (height<<3) > est->DrawScanline) // sprite onscreen (y)? + entry = ((pd - HighPreSpr) / 2) | ((code2>>8)&0x80); + y = (sy >= est->DrawScanline) ? sy : est->DrawScanline; + for (; y < sy + (height<<3) && y < max_lines; y++) { - int y = (sy >= est->DrawScanline) ? sy : est->DrawScanline; - int entry = ((pd - HighPreSpr) / 2) | ((code2>>8)&0x80); - for (; y < sy + (height<<3) && y < max_lines; y++) - { - int i, cnt; - cnt = HighLnSpr[y][0]; - if (HighLnSpr[y][3] >= max_line_sprites) continue; // sprite limit? + unsigned char *p = &HighLnSpr[y][0]; + int cnt = p[0]; + if (p[3] >= max_line_sprites) continue; // sprite limit? + if ((p[1] & SPRL_MASKED) && !(entry & 0x80)) continue; // masked? - for (i = 0; i < cnt; i++) - if (((HighLnSpr[y][4+i] ^ entry) & 0x7f) == 0) goto found; - - // this sprite was previously missing - HighLnSpr[y][3] ++; - if (sx > -24 && sx < max_width) { // onscreen x - HighLnSpr[y][4+cnt] = entry; // XXX wrong sequence? - HighLnSpr[y][5+cnt] = width; // XXX should count tiles for limit - HighLnSpr[y][0] = cnt + 1; - } -found:; - if (entry & 0x80) - HighLnSpr[y][1] |= SPRL_HAVE_HI; - else HighLnSpr[y][1] |= SPRL_HAVE_LO; + w = width; + if (p[2] + width > max_line_sprites*2) { // tile limit? + if (y+1 < 240) HighLnSpr[y+1][1] |= SPRL_TILE_OVFL; + if (p[2] >= max_line_sprites*2) continue; + w = max_line_sprites*2 - p[2]; } + p[2] += w; + p[3] ++; + + if (sx == -0x78) { + if (p[1] & (SPRL_HAVE_X|SPRL_TILE_OVFL)) + p[1] |= SPRL_MASKED; // masked, no more low sprites for this line + if (!(p[1] & SPRL_HAVE_X) && cnt == 0) + p[1] |= SPRL_HAVE_MASK0; // 1st sprite is masking + } else + p[1] |= SPRL_HAVE_X; + + if (!onscr_x) continue; // offscreen x + + p[4+cnt] = entry; + p[5+cnt] = w; // width clipped by tile limit for sprite renderer + p[0] = cnt + 1; + p[1] |= (entry & 0x80) ? SPRL_HAVE_HI : SPRL_HAVE_LO; + p[1] |= maybe_op; // there might be op sprites on this line + if (cnt > 0 && (code2 & 0x8000) && !(p[4+cnt-1]&0x80)) + p[1] |= SPRL_LO_ABOVE_HI; } - - code2 &= ~0xfe000000; - code2 -= 0x00780000; // Get X coordinate + 8 in upper 16 bits - pd[1] = code2; - - // Find next sprite - link=(sprite[0]>>16)&0x7f; - if (!link) break; // End of sprites } + + *pd++ = (width<<28)|(height<<24)|(hv<<16)|((unsigned short)sy); + *pd++ = (sx<<16)|((unsigned short)code2); + + // Find next sprite + link=(code>>16)&0x7f; + if (!link) break; // End of sprites } - else - { - for (u = 0; u < max_lines; u++) - *((int *)&HighLnSpr[u][0]) = 0; - - for (u = 0; u < max_sprites && link < max_sprites; u++) - { - unsigned int *sprite; - int code, code2, sx, sy, hv, height, width; - - sprite=(unsigned int *)(PicoMem.vram+((table+(link<<2))&0x7ffc)); // Find sprite - - // parse sprite info - code = sprite[0]; - sy = (code&0x1ff)-0x80; - hv = (code>>24)&0xf; - height = (hv&3)+1; - - width = (hv>>2)+1; - code2 = sprite[1]; - sx = (code2>>16)&0x1ff; - sx -= 0x78; // Get X coordinate + 8 - - if (sy < max_lines && sy + (height<<3) > est->DrawScanline) // sprite onscreen (y)? - { - int entry, y, w, sx_min, onscr_x, maybe_op = 0; - - sx_min = 8-(width<<3); - onscr_x = sx_min < sx && sx < max_width; - if (sh && (code2 & 0x6000) == 0x6000) - maybe_op = SPRL_MAY_HAVE_OP; - - entry = ((pd - HighPreSpr) / 2) | ((code2>>8)&0x80); - y = (sy >= est->DrawScanline) ? sy : est->DrawScanline; - for (; y < sy + (height<<3) && y < max_lines; y++) - { - unsigned char *p = &HighLnSpr[y][0]; - int cnt = p[0]; - if (p[3] >= max_line_sprites) continue; // sprite limit? - if ((p[1] & SPRL_MASKED) && !(entry & 0x80)) continue; // masked? - - w = width; - if (p[2] + width > max_line_sprites*2) { // tile limit? - if (y+1 < 240) HighLnSpr[y+1][1] |= SPRL_TILE_OVFL; - if (p[2] >= max_line_sprites*2) continue; - w = max_line_sprites*2 - p[2]; - } - p[2] += w; - p[3] ++; - - if (sx == -0x78) { - if (p[1] & (SPRL_HAVE_X|SPRL_TILE_OVFL)) - p[1] |= SPRL_MASKED; // masked, no more low sprites for this line - if (!(p[1] & SPRL_HAVE_X) && cnt == 0) - p[1] |= SPRL_HAVE_MASK0; // 1st sprite is masking - } else - p[1] |= SPRL_HAVE_X; - - if (!onscr_x) continue; // offscreen x - - p[4+cnt] = entry; - p[5+cnt] = w; // width clipped by tile limit for sprite renderer - p[0] = cnt + 1; - p[1] |= (entry & 0x80) ? SPRL_HAVE_HI : SPRL_HAVE_LO; - p[1] |= maybe_op; // there might be op sprites on this line - if (cnt > 0 && (code2 & 0x8000) && !(p[4+cnt-1]&0x80)) - p[1] |= SPRL_LO_ABOVE_HI; - } - } - - *pd++ = (width<<28)|(height<<24)|(hv<<16)|((unsigned short)sy); - *pd++ = (sx<<16)|((unsigned short)code2); - - // Find next sprite - link=(code>>16)&0x7f; - if (!link) break; // End of sprites - } - *pd = 0; + *pd = 0; #if 0 - for (u = 0; u < max_lines; u++) - { - int y; - printf("c%03i: f %x c %2i/%2i w %2i: ", u, HighLnSpr[u][1], - HighLnSpr[u][0], HighLnSpr[u][3], HighLnSpr[u][2]); - for (y = 0; y < HighLnSpr[u][0]; y++) - printf(" %i", HighLnSpr[u][y+4]); - printf("\n"); - } -#endif + for (u = 0; u < max_lines; u++) + { + int y; + printf("c%03i: f %x c %2i/%2i w %2i: ", u, HighLnSpr[u][1], + HighLnSpr[u][0], HighLnSpr[u][3], HighLnSpr[u][2]); + for (y = 0; y < HighLnSpr[u][0]; y++) + printf(" %i", HighLnSpr[u][y+4]); + printf("\n"); } +#endif } #ifndef _ASM_DRAW_C @@ -1505,12 +1448,11 @@ static int DrawDisplay(int sh) int win=0, edge=0, hvwind=0, lflags; int maxw, maxcells; - if (est->rendstatus & (PDRAW_SPRITES_MOVED|PDRAW_DIRTY_SPRITES)) { - // elprintf(EL_STATUS, "PrepareSprites(%i)", (est->rendstatus>>4)&1); - PrepareSprites(est->rendstatus & PDRAW_DIRTY_SPRITES); - est->rendstatus &= ~(PDRAW_SPRITES_MOVED|PDRAW_DIRTY_SPRITES); - } + if (!(est->DrawScanline & 15) || + (est->rendstatus & (PDRAW_SPRITES_MOVED|PDRAW_DIRTY_SPRITES))) + PrepareSprites((est->DrawScanline+16) & ~15); + est->rendstatus &= ~(PDRAW_SPRITES_MOVED|PDRAW_DIRTY_SPRITES); est->rendstatus &= ~(PDRAW_SHHI_DONE|PDRAW_PLANE_HI_PRIO); if (pvid->reg[12]&1) { @@ -1656,8 +1598,6 @@ PICO_INTERNAL void PicoFrameStart(void) if (PicoIn.opt & POPT_ALT_RENDERER) return; - - PrepareSprites(1); } static void DrawBlankedLine(int line, int offs, int sh, int bgc) diff --git a/pico/pico_int.h b/pico/pico_int.h index 70329224..12f35b56 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -676,6 +676,7 @@ extern int (*PicoScanEnd)(unsigned int num); extern unsigned char HighLnSpr[240][4+MAX_LINE_SPRITES+1]; extern void *DrawLineDestBase; extern int DrawLineDestIncrement; +extern unsigned int VdpSATCache[128]; // draw2.c void PicoDraw2Init(void); diff --git a/pico/videoport.c b/pico/videoport.c index f64ac693..b9e0401b 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -15,6 +15,8 @@ extern const unsigned char hcounts_32[]; extern const unsigned char hcounts_40[]; static int blankline; // display disabled for this line +static unsigned sat; // VRAM addr of sprite attribute table +static int satxbits; // index bits in SAT address int (*PicoDmaHook)(unsigned int source, int len, unsigned short **base, unsigned int *mask) = NULL; @@ -315,14 +317,37 @@ void PicoVideoFIFOMode(int active) static __inline void AutoIncrement(void) { Pico.video.addr=(unsigned short)(Pico.video.addr+Pico.video.reg[0xf]); + if (Pico.video.addr < Pico.video.reg[0xf]) Pico.video.addr_u ^= 1; } -static NOINLINE unsigned int VideoWrite128(u32 a, u16 d) +static __inline void UpdateSAT(u32 a, u32 d) +{ + Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; + if (!((a^sat) >> satxbits) && !(a & 4)) { + int num = (a >> 3) & 0x7f; + ((u16 *)&VdpSATCache[num])[(a&3) >> 1] = d; + } +} + +static NOINLINE void VideoWriteVRAM128(u32 a, u16 d) { // nasty - a = ((a & 2) >> 1) | ((a & 0x400) >> 9) | (a & 0x3FC) | ((a & 0x1F800) >> 1); - ((u8 *)PicoMem.vram)[a] = d; - return a; + u32 b = ((a & 2) >> 1) | ((a & 0x400) >> 9) | (a & 0x3FC) | ((a & 0x1F800) >> 1); + + ((u8 *)PicoMem.vram)[b] = d; + if (!((u16)(b^sat) >> satxbits)) + Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; + + if (!((u16)(a^sat) >> satxbits)) + UpdateSAT(a, d); +} + +static void VideoWriteVRAM(u32 a, u16 d) +{ + PicoMem.vram [(u16)a >> 1] = d; + + if (!((u16)(a^sat) >> satxbits)) + UpdateSAT(a, d); } static void VideoWrite(u16 d) @@ -333,19 +358,15 @@ static void VideoWrite(u16 d) { case 1: if (a & 1) d = (u16)((d << 8) | (d >> 8)); - PicoMem.vram [(a >> 1) & 0x7fff] = d; - if ((unsigned)(a - ((Pico.video.reg[5]&0x7f) << 9)) < 0x400) - Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; + a |= Pico.video.addr_u << 16; + VideoWriteVRAM(a, d); break; case 3: if (PicoMem.cram [(a >> 1) & 0x3f] != d) Pico.m.dirtyPal = 1; - PicoMem.cram [(a >> 1) & 0x3f] = d; break; - case 5: PicoMem.vsram[(a >> 1) & 0x3f] = d; break; - case 0x81: if (a & 1) - d = (u16)((d << 8) | (d >> 8)); + PicoMem.cram [(a >> 1) & 0x3f] = d & 0xeee; break; + case 5: PicoMem.vsram[(a >> 1) & 0x3f] = d & 0x7ff; break; + case 0x81: a |= Pico.video.addr_u << 16; - a = VideoWrite128(a, d); - if ((unsigned)(a - ((Pico.video.reg[5]&0x7f) << 9)) < 0x400) - Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; + VideoWriteVRAM128(a, d); break; //default:elprintf(EL_ANOMALY, "VDP write %04x with bad type %i", d, Pico.video.type); break; } @@ -363,9 +384,10 @@ static unsigned int VideoRead(void) switch (Pico.video.type) { case 0: d=PicoMem.vram [a & 0x7fff]; break; - case 8: d=(PicoMem.cram [a & 0x003f] & 0x0eee) | (d & ~0x0eee); break; + case 8: d=PicoMem.cram [a & 0x003f] | (d & ~0x0eee); break; + case 4: if ((a & 0x3f) >= 0x28) a = 0; - d=(PicoMem.vsram [a & 0x003f] & 0x07ff) | (d & ~0x07ff); break; + d=PicoMem.vsram [a & 0x003f] | (d & ~0x07ff); break; case 12:a=PicoMem.vram [a & 0x7fff]; if (Pico.video.addr&1) a >>= 8; d=(a & 0x00ff) | (d & ~0x00ff); break; default:elprintf(EL_ANOMALY, "VDP read with bad type %i", Pico.video.type); break; @@ -391,7 +413,7 @@ static int GetDmaLength(void) static void DmaSlow(int len, unsigned int source) { u32 inc = Pico.video.reg[0xf]; - u32 a = Pico.video.addr; + u32 a = Pico.video.addr | (Pico.video.addr_u << 16); u16 *r, *base = NULL; u32 mask = 0x1ffff; @@ -451,26 +473,28 @@ static void DmaSlow(int len, unsigned int source) switch (Pico.video.type) { case 1: // vram +#if 0 r = PicoMem.vram; - if (inc == 2 && !(a & 1) && a + len * 2 < 0x10000 - && !(((source + len - 1) ^ source) & ~mask)) + if (inc == 2 && !(a & 1) && (a >> 16) == ((a + len*2) >> 16) && + (source & ~mask) == ((source + len-1) & ~mask) && + (a << 16 >= (sat+0x280) << 16 || (a + len*2) << 16 <= sat << 16)) { // most used DMA mode memcpy((char *)r + a, base + (source & mask), len * 2); a += len * 2; } else +#endif { for(; len; len--) { u16 d = base[source++ & mask]; if(a & 1) d=(d<<8)|(d>>8); - r[a >> 1] = d; + VideoWriteVRAM(a, d); // AutoIncrement - a = (u16)(a + inc); + a = (a+inc) & ~0x20000; } } - Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; break; case 3: // cram @@ -478,9 +502,9 @@ static void DmaSlow(int len, unsigned int source) r = PicoMem.cram; for (; len; len--) { - r[(a / 2) & 0x3f] = base[source++ & mask]; + r[(a / 2) & 0x3f] = base[source++ & mask] & 0xeee; // AutoIncrement - a += inc; + a = (a+inc) & ~0x20000; } break; @@ -488,22 +512,20 @@ static void DmaSlow(int len, unsigned int source) r = PicoMem.vsram; for (; len; len--) { - r[(a / 2) & 0x3f] = base[source++ & mask]; + r[(a / 2) & 0x3f] = base[source++ & mask] & 0x7ff; // AutoIncrement - a += inc; + a = (a+inc) & ~0x20000; } break; case 0x81: // vram 128k - a |= Pico.video.addr_u << 16; for(; len; len--) { - VideoWrite128(a, base[source++ & mask]); + u16 d = base[source++ & mask]; + VideoWriteVRAM128(a, d); // AutoIncrement - a = (a + inc) & 0x1ffff; + a = (a+inc) & ~0x20000; } - Pico.video.addr_u = a >> 16; - Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; break; default: @@ -512,12 +534,13 @@ static void DmaSlow(int len, unsigned int source) break; } // remember addr - Pico.video.addr=(u16)a; + Pico.video.addr = a; + Pico.video.addr_u = a >> 16; } static void DmaCopy(int len) { - u16 a = Pico.video.addr; + u32 a = Pico.video.addr | (Pico.video.addr_u << 16); u8 *vr = (u8 *)PicoMem.vram; u8 inc = Pico.video.reg[0xf]; int source; @@ -528,21 +551,23 @@ static void DmaCopy(int len) source =Pico.video.reg[0x15]; source|=Pico.video.reg[0x16]<<8; - // XXX implement VRAM 128k? Is this even working? + // XXX implement VRAM 128k? Is this even working? count still in bytes? for (; len; len--) { - vr[a] = vr[source++ & 0xffff]; + vr[(u16)a] = vr[(u16)(source++)]; + if (!((u16)(a^sat) >> satxbits)) + UpdateSAT(a, ((u16 *)vr)[(u16)a >> 1]); // AutoIncrement - a=(u16)(a+inc); + a = (a+inc) & ~0x20000; } // remember addr - Pico.video.addr=a; - Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; + Pico.video.addr = a; + Pico.video.addr_u = a >> 16; } static NOINLINE void DmaFill(int data) { - u16 a = Pico.video.addr; + u32 a = Pico.video.addr | (Pico.video.addr_u << 16); u8 *vr = (u8 *)PicoMem.vram; u8 high = (u8)(data >> 8); u8 inc = Pico.video.reg[0xf]; @@ -561,40 +586,41 @@ static NOINLINE void DmaFill(int data) for (l = len; l; l--) { // Write upper byte to adjacent address // (here we are byteswapped, so address is already 'adjacent') - vr[a] = high; + vr[(u16)a] = high; + if (!((u16)(a^sat) >> satxbits)) + UpdateSAT(a, ((u16 *)vr)[(u16)a >> 1]); // Increment address register - a = (u16)(a + inc); + a = (a+inc) & ~0x20000; } - Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; break; case 3: // cram Pico.m.dirtyPal = 1; + data &= 0xeee; for (l = len; l; l--) { PicoMem.cram[(a/2) & 0x3f] = data; // Increment address register - a += inc; + a = (a+inc) & ~0x20000; } break; case 5: { // vsram + data &= 0x7ff; for (l = len; l; l--) { PicoMem.vsram[(a/2) & 0x3f] = data; // Increment address register - a += inc; + a = (a+inc) & ~0x20000; } break; } case 0x81: // vram 128k for (l = len; l; l--) { - VideoWrite128(a, data); + VideoWriteVRAM128(a, data); // Increment address register - a = (a + inc) & 0x1ffff; + a = (a+inc) & ~0x20000; } - Pico.video.addr_u = a >> 16; - Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; break; default: a += len * inc; @@ -603,6 +629,7 @@ static NOINLINE void DmaFill(int data) // remember addr Pico.video.addr = a; + Pico.video.addr_u = a >> 16; // register update Pico.video.reg[0x13] = Pico.video.reg[0x14] = 0; source = Pico.video.reg[0x15]; @@ -779,14 +806,21 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) pvid->status |= ((d >> 3) ^ SR_VB) & SR_VB; // forced blanking goto update_irq; case 0x05: - //elprintf(EL_STATUS, "spritep moved to %04x", (unsigned)(Pico.video.reg[5]&0x7f) << 9); + case 0x06: if (d^dold) Pico.est.rendstatus |= PDRAW_SPRITES_MOVED; break; case 0x0c: // renderers should update their palettes if sh/hi mode is changed if ((d^dold)&8) Pico.m.dirtyPal = 1; break; + default: + return; } + sat = ((pvid->reg[5]&0x7f) << 9) | ((pvid->reg[6]&0x20) << 11); + satxbits = 9; + if (Pico.video.reg[12]&1) + sat &= ~0x200, satxbits = 10; // H40, zero lowest SAT bit + //elprintf(EL_STATUS, "spritep moved to %04x", sat); return; update_irq: @@ -991,6 +1025,11 @@ void PicoVideoLoad(void) Pico.m.dma_xfers = 0; } + sat = ((pv->reg[5]&0x7f) << 9) | ((pv->reg[6]&0x20) << 11); + satxbits = 9; + if (pv->reg[12]&1) + sat &= ~0x200, satxbits = 10; // H40, zero lowest SAT bit + // rebuild SAT cache XXX wrong since cache and memory can differ for (l = 0; l < 80; l++) { *((u16 *)VdpSATCache + 2*l ) = PicoMem.vram[(sat>>1) + l*4 ]; From 787a0af9dcf27430ee79dcffa68de59569b9d39d Mon Sep 17 00:00:00 2001 From: kub Date: Sun, 23 Feb 2020 11:33:02 +0100 Subject: [PATCH 0270/1110] vdp fifo, another revision --- pico/pico_int.h | 7 +- pico/videoport.c | 217 +++++++++++++++++++++++++---------------------- 2 files changed, 120 insertions(+), 104 deletions(-) diff --git a/pico/pico_int.h b/pico/pico_int.h index 12f35b56..65b56f1d 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -296,10 +296,9 @@ extern SH2 sh2s[2]; // not part of real SR #define PVS_ACTIVE (1 << 16) #define PVS_VB2 (1 << 17) // ignores forced blanking -#define PVS_CPUWR (1 << 18) // CPU hold by FIFO full -#define PVS_CPURD (1 << 19) // CPU hold by FIFO full -#define PVS_DMAPEND (1 << 20) // DMA operation waiting for start -#define PVS_DMAFILL (1 << 21) // DMA fill is in progress +#define PVS_CPUWR (1 << 18) // CPU write blocked by FIFO full +#define PVS_CPURD (1 << 19) // CPU read blocked by FIFO not empty +#define PVS_DMAFILL (1 << 20) // DMA fill is waiting for fill data struct PicoVideo { diff --git a/pico/videoport.c b/pico/videoport.c index b9e0401b..533c7880 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -37,8 +37,10 @@ int (*PicoDmaHook)(unsigned int source, int len, unsigned short **base, unsigned * fifo_slot is always behind slot2cyc[cycles]. Advancing it beyond cycles * implies blocking the 68k up to that slot. * - * A FIFO write goes to the end of the fifo queue. There can be more pending - * writes than FIFO slots, but the 68k will be blocked in most of those cases. + * A FIFO write goes to the end of the FIFO queue, but DMA running in background + * is always the last queue entry (transfers by CPU intervene and come 1st). + * There can be more pending writes than FIFO slots, but the CPU will be blocked + * until FIFO level (without background DMA) <= 4. * This is only about correct timing, data xfer must be handled by the caller. * Blocking the CPU means burning cycles via SekCyclesBurn*(), which is to be * executed by the caller. @@ -50,16 +52,14 @@ int (*PicoDmaHook)(unsigned int source, int len, unsigned short **base, unsigned * FIFORead executes a 68k read. 68k is blocked until the next transfer slot. */ -// FIFO transfer slots per line: H32 blank, H40 blank, H32 active, H40 active -static const short vdpslots[] = { 166, 204, 16, 18 }; -// mapping between slot# and 68k cycles in a blanked scanline -static const int vdpcyc2sl_bl[] = { (166<<16)/488, (204<<16)/488, (16<<16)/488, (18<<16)/488 }; -static const int vdpsl2cyc_bl[] = { (488<<16)/166, (488<<16)/204, (488<<16)/16, (488<<16)/18 }; +// mapping between slot# and 68k cycles in a blanked scanline [H32, H40] +static const int vdpcyc2sl_bl[] = { (166<<16)/488, (204<<16)/488 }; +static const int vdpsl2cyc_bl[] = { (488<<16)/166, (488<<16)/204 }; // VDP transfer slots in active display 32col mode. 1 slot is 488/171 = 2.8538 // 68k cycles. Only 16 of the 171 slots in a scanline can be used by CPU/DMA: // (HINT=slot 0): 13,27,42,50,58,74,82,90,106,114,122,138,146,154,169,170 -const unsigned char vdpcyc2sl_32[] = { // 68k cycles/4 since HINT to slot # +static const unsigned char vdpcyc2sl_32[] = { // 68k cycles/4 to slot # // 4 8 12 16 20 24 28 32 36 40 44 48 52 56 60 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, @@ -70,14 +70,14 @@ const unsigned char vdpcyc2sl_32[] = { // 68k cycles/4 since HINT to slot # 11,11,12,12,12,12,12,12,13,13,13,13,13,13,14,14, 14,14,14,14,14,14,14,14,15,16,16,16,16,16,16,16, }; -const unsigned char vdpsl2cyc_32[] = { // slot # to 68k cycles/4 since HINT - 0, 9, 19, 30, 35, 41, 52, 58, 64, 75, 81, 87, 98,104,110,120,121,123 +static const unsigned char vdpsl2cyc_32[] = { // slot # to 68k cycles/4 + 0, 9, 19, 30, 35, 41, 52, 58, 64, 75, 81, 87, 98,104,110,120,121,131 }; // VDP transfer slots in active display 40col mode. 1 slot is 488/210 = 2.3238 // 68k cycles. Only 18 of the 210 slots in a scanline can be used by CPU/DMA: // (HINT=0): 23,49,57,65,81,89,97,113,121,129,145,153,161,177,185,193,208,209 -const unsigned char vdpcyc2sl_40[] = { // 68k cycles/4 since HINT to slot # +static const unsigned char vdpcyc2sl_40[] = { // 68k cycles/4 to slot # // 4 8 12 16 20 24 28 32 36 40 44 48 52 56 60 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, @@ -88,8 +88,8 @@ const unsigned char vdpcyc2sl_40[] = { // 68k cycles/4 since HINT to slot # 13,13,13,13,13,13,14,14,14,14,14,15,15,15,15,15, 16,16,16,16,16,16,16,16,17,18,18,18,18,18,18,18, }; -const unsigned char vdpsl2cyc_40[] = { // slot # to 68k cycles/4 since HINT - 0, 13, 28, 33, 37, 47, 51, 56, 65, 70, 74, 84, 88, 93,102,107,112,120,121,123 +static const unsigned char vdpsl2cyc_40[] = { // slot # to 68k cycles/4 + 0, 13, 28, 33, 37, 47, 51, 56, 65, 70, 74, 84, 88, 93,102,107,112,120,121,135 }; // NB code assumes fifo_* arrays have size 2^n @@ -97,51 +97,79 @@ const unsigned char vdpsl2cyc_40[] = { // slot # to 68k cycles/4 since HINT static short fifo_data[4], fifo_dx; // XXX must go into save? // queued FIFO transfers, ...x = index, ...l = queue length -// each entry has 2 values: [n]>>2=#writes, [n]&3=flags:2=DMA fill 1=byte access +// each entry has 2 values: [n]>>3 = #writes, [n]&7 = flags static int fifo_queue[8], fifo_qx, fifo_ql; // XXX must go into save? -unsigned int fifo_total; // total# of pending FIFO entries +enum { FQ_BYTE = 1, FQ_BGDMA = 2, FQ_FGDMA = 4 }; // queue flags, NB: BYTE = 1! +unsigned int fifo_total; // total# of pending FIFO entries (w/o BGDMA) unsigned short fifo_slot; // last executed slot in current scanline +// map cycles to FIFO slot +static __inline int GetFIFOSlot(struct PicoVideo *pv, int cycles) +{ + int active = !(pv->status & SR_VB) && (pv->reg[1] & 0x40); + int h40 = pv->reg[12] & 1; + + if (active) return (h40 ? vdpcyc2sl_40 : vdpcyc2sl_32)[cycles/4]; + else return (cycles * vdpcyc2sl_bl[h40] + cycles) >> 16; +} + +// map FIFO slot to cycles +static __inline int GetFIFOCycles(struct PicoVideo *pv, int slot) +{ + int active = !(pv->status & SR_VB) && (pv->reg[1] & 0x40); + int h40 = pv->reg[12] & 1; + + if (active) return (h40 ? vdpsl2cyc_40 : vdpsl2cyc_32)[slot]*4; + else return ((slot * vdpsl2cyc_bl[h40] + slot) >> 16); +} + // do the FIFO math static __inline int AdvanceFIFOEntry(struct PicoVideo *pv, int slots) { - int l = slots, b = fifo_queue[fifo_qx&7] & 1; + int l = slots, b = fifo_queue[fifo_qx] & FQ_BYTE; + // advance currently active FIFO entry if (l > pv->fifo_cnt) l = pv->fifo_cnt; - fifo_total -= ((pv->fifo_cnt & b) + l) >> b; + if (!(fifo_queue[fifo_qx] & FQ_BGDMA)) + fifo_total -= ((pv->fifo_cnt & b) + l) >> b; pv->fifo_cnt -= l; + // if entry has been processed... if (pv->fifo_cnt == 0) { + if (fifo_ql) { + // terminate DMA if applicable + if ((pv->status & SR_DMA) && (fifo_queue[fifo_qx] & FQ_BGDMA)) { + pv->status &= ~SR_DMA; + pv->command &= ~0x80; + } + // remove entry from FIFO + fifo_qx ++, fifo_qx &= 7, fifo_ql --; + } + // start processing for next entry if there is one if (fifo_ql) - fifo_qx ++, fifo_ql --; - if (fifo_ql) - pv->fifo_cnt= (fifo_queue[fifo_qx&7] >> 2) << (fifo_queue[fifo_qx&7] & 1); + pv->fifo_cnt= (fifo_queue[fifo_qx] >> 3) << (fifo_queue[fifo_qx] & FQ_BYTE); else fifo_total = 0; } return l; } -static __inline int GetFIFOSlot(struct PicoVideo *pv, int cycles) +static __inline void SetFIFOState(struct PicoVideo *pv) { - int active = !(pv->status & SR_VB) && (pv->reg[1] & 0x40); - int h40 = pv->reg[12] & 1; - const unsigned char *cs = h40 ? vdpcyc2sl_40 : vdpcyc2sl_32; - - if (active) return cs[cycles/4]; - else return (cycles * vdpcyc2sl_bl[h40] + cycles) >> 16; -} - -static __inline int GetFIFOCycles(struct PicoVideo *pv, int slot) -{ - int active = !(pv->status & SR_VB) && (pv->reg[1] & 0x40); - int h40 = pv->reg[12] & 1; - const unsigned char *sc = h40 ? vdpsl2cyc_40 : vdpsl2cyc_32; - - if (active) return sc[slot]*4; - else return ((slot * vdpsl2cyc_bl[h40] + slot) >> 16); + // release CPU and terminate DMA if FIFO isn't blocking the 68k anymore + if (fifo_total == 0) + pv->status &= ~PVS_CPURD; + if (fifo_total <= 4) { + int x = (fifo_qx + fifo_ql - 1) & 7; + if ((pv->status & SR_DMA) && !(pv->status & PVS_DMAFILL) && + fifo_ql && !(fifo_queue[x] & FQ_BGDMA)) { + pv->status &= ~SR_DMA; + pv->command &= ~0x80; + } + pv->status &= ~PVS_CPUWR; + } } // sync FIFO to cycles @@ -161,57 +189,40 @@ void PicoVideoFIFOSync(int cycles) done -= l; } - // release CPU and terminate DMA if FIFO isn't blocking the 68k anymore - if (fifo_total <= 4) { - pv->status &= ~PVS_CPUWR; - pv->command &= ~0x80; - if (!(pv->status & PVS_DMAPEND)) - pv->status &= ~(SR_DMA|PVS_DMAFILL); - } - if (fifo_total == 0) - pv->status &= ~PVS_CPURD; + SetFIFOState(pv); } // drain FIFO, blocking 68k on the way. FIFO must be synced prior to drain. -int PicoVideoFIFODrain(int level, int cycles) +int PicoVideoFIFODrain(int level, int cycles, int bgdma) { struct PicoVideo *pv = &Pico.video; - int active = !(pv->status & SR_VB) && (pv->reg[1] & 0x40); - int h40 = pv->reg[12] & 1; - int maxsl = vdpslots[h40 + 2*active]; // max xfer slots in this scanline + int maxsl = GetFIFOSlot(pv, 488); // max xfer slots in this scanline int burn = 0; - while (fifo_total > level && fifo_slot < maxsl) { - int b = fifo_queue[fifo_qx&7] & 1; - int cnt = (fifo_total-level) << b; + // process FIFO entries until low level is reached + while (fifo_total > level && fifo_slot < maxsl && + (!(fifo_queue[fifo_qx] & FQ_BGDMA) || bgdma)) { + int b = fifo_queue[fifo_qx] & FQ_BYTE; + int cnt = ((fifo_total-level) << b) - (pv->fifo_cnt & b); int last = fifo_slot; - int slot = (pv->fifo_cntfifo_cnt:cnt) + last; // target slot + int slot = (pv->fifo_cnt < cnt ? pv->fifo_cnt : cnt) + last; // target slot unsigned ocyc = cycles; if (slot > maxsl) { // target in later scanline, advance to eol slot = maxsl; - fifo_slot = maxsl; cycles = 488; } else { // advance FIFO to target slot and CPU to cycles at that slot - fifo_slot = slot; cycles = GetFIFOCycles(pv, slot); } + fifo_slot = slot; burn += cycles - ocyc; AdvanceFIFOEntry(pv, slot - last); } - // release CPU and terminate DMA if FIFO isn't blocking the bus anymore - if (fifo_total <= 4) { - pv->status &= ~PVS_CPUWR; - pv->command &= ~0x80; - if (!(pv->status & PVS_DMAPEND)) - pv->status &= ~(SR_DMA|PVS_DMAFILL); - } - if (fifo_total == 0) - pv->status &= ~PVS_CPURD; + SetFIFOState(pv); return burn; } @@ -220,13 +231,13 @@ int PicoVideoFIFODrain(int level, int cycles) int PicoVideoFIFORead(void) { struct PicoVideo *pv = &Pico.video; - int lc = SekCyclesDone()-Pico.t.m68c_line_start+4; + int lc = SekCyclesDone()-Pico.t.m68c_line_start; int burn = 0; PicoVideoFIFOSync(lc); // advance FIFO and CPU until FIFO is empty - burn = PicoVideoFIFODrain(0, lc); + burn = PicoVideoFIFODrain(0, lc, 1); lc += burn; if (fifo_total > 0) pv->status |= PVS_CPURD; // target slot is in later scanline @@ -243,34 +254,41 @@ int PicoVideoFIFORead(void) int PicoVideoFIFOWrite(int count, int flags, unsigned sr_mask,unsigned sr_flags) { struct PicoVideo *pv = &Pico.video; - int lc = SekCyclesDone()-Pico.t.m68c_line_start+4; - int burn = 0; + int lc = SekCyclesDone()-Pico.t.m68c_line_start; + int burn = 0, x; PicoVideoFIFOSync(lc); pv->status = (pv->status & ~sr_mask) | sr_flags; if (count && fifo_ql < 8) { // update FIFO state if it was empty - if (fifo_total == 0 && count) { - fifo_slot = GetFIFOSlot(pv, lc); - pv->fifo_cnt = count << (flags&1); + if (fifo_ql == 0) { + fifo_slot = GetFIFOSlot(pv, lc+10); // FIFO latency ~4 vdp slots + pv->fifo_cnt = count << (flags & FQ_BYTE); } // create xfer queue entry - int x = (fifo_qx + fifo_ql) & 7; - fifo_queue[x] = (count << 2) | flags; + x = (fifo_qx + fifo_ql - 1) & 7; + if (fifo_ql && (fifo_queue[x] & FQ_BGDMA)) { + // CPU FIFO writes have priority over a background DMA Fill/Copy + fifo_queue[(x+1) & 7] = fifo_queue[x]; + if (fifo_ql == 1) { + // XXX if interrupting a DMA fill, fill data changes + int f = fifo_queue[x] & 7; + fifo_queue[(x+1) & 7] = (pv->fifo_cnt >> (f & FQ_BYTE) << 3) | f; + pv->fifo_cnt = count << (flags & FQ_BYTE); + } + } else + x = (x+1) & 7; + fifo_queue[x] = (count << 3) | flags; fifo_ql ++; - fifo_total += count; + if (!(flags & FQ_BGDMA)) + fifo_total += count; } // if CPU is waiting for the bus, advance CPU and FIFO until bus is free - if ((pv->status & (PVS_CPUWR|PVS_DMAFILL)) == PVS_CPUWR) - burn = PicoVideoFIFODrain(4, lc); - else if (fifo_queue[fifo_qx&7]&2) { - // if interrupting a DMA fill terminate it XXX wrong, changes fill data - AdvanceFIFOEntry(pv, pv->fifo_cnt); - pv->status &= ~PVS_DMAFILL; - } + if (pv->status & PVS_CPUWR) + burn = PicoVideoFIFODrain(4, lc, 0); return burn; } @@ -287,7 +305,7 @@ int PicoVideoFIFOHint(void) // if CPU is waiting for the bus, advance CPU and FIFO until bus is free if (pv->status & PVS_CPURD) burn = PicoVideoFIFORead(); - if (pv->status & PVS_CPUWR) + else if (pv->status & PVS_CPUWR) burn = PicoVideoFIFOWrite(0, 0, 0, 0); return burn; @@ -297,16 +315,15 @@ int PicoVideoFIFOHint(void) void PicoVideoFIFOMode(int active) { struct PicoVideo *pv = &Pico.video; - const unsigned char *cs = pv->reg[12]&1 ? vdpcyc2sl_40 : vdpcyc2sl_32; int h40 = pv->reg[12] & 1; int lc = SekCyclesDone() - Pico.t.m68c_line_start; PicoVideoFIFOSync(lc); - if (fifo_total) { + if (fifo_ql) { // recalculate FIFO slot for new mode if (!(pv->status & SR_VB) && active) - fifo_slot = cs[lc/4]; + fifo_slot = (pv->reg[12]&1 ? vdpcyc2sl_40 : vdpcyc2sl_32)[lc/4]; else fifo_slot = ((lc * vdpcyc2sl_bl[h40] + lc) >> 16); } } @@ -421,8 +438,8 @@ static void DmaSlow(int len, unsigned int source) Pico.video.type, source, a, len, inc, (Pico.video.status&SR_VB)||!(Pico.video.reg[1]&0x40), SekCyclesDone(), SekPc); - SekCyclesBurnRun(PicoVideoFIFOWrite(len, Pico.video.type == 1, PVS_DMAPEND, - SR_DMA | PVS_CPUWR) + 8); + SekCyclesBurnRun(PicoVideoFIFOWrite(len, FQ_FGDMA | (Pico.video.type == 1), + 0, SR_DMA| PVS_CPUWR)); if ((source & 0xe00000) == 0xe00000) { // Ram base = (u16 *)PicoMem.ram; @@ -546,7 +563,8 @@ static void DmaCopy(int len) int source; elprintf(EL_VDPDMA, "DmaCopy len %i [%u]", len, SekCyclesDone()); - SekCyclesBurnRun(PicoVideoFIFOWrite(len, 1, PVS_CPUWR | PVS_DMAPEND, SR_DMA)); + SekCyclesBurnRun(PicoVideoFIFOWrite(len, FQ_BGDMA | FQ_BYTE, + PVS_CPUWR, SR_DMA)); source =Pico.video.reg[0x15]; source|=Pico.video.reg[0x16]<<8; @@ -577,8 +595,8 @@ static NOINLINE void DmaFill(int data) len = GetDmaLength(); elprintf(EL_VDPDMA, "DmaFill len %i inc %i [%u]", len, inc, SekCyclesDone()); - SekCyclesBurnRun(PicoVideoFIFOWrite(len, 2|(Pico.video.type == 1), - PVS_CPUWR | PVS_DMAPEND, SR_DMA)); + SekCyclesBurnRun(PicoVideoFIFOWrite(len, FQ_BGDMA | (Pico.video.type == 1), + PVS_CPUWR | PVS_DMAFILL, SR_DMA)); switch (Pico.video.type) { @@ -648,7 +666,6 @@ static NOINLINE void CommandDma(void) u32 len, method; u32 source; - pvid->status |= PVS_DMAPEND; PicoVideoFIFOSync(SekCyclesDone()-Pico.t.m68c_line_start); if (pvid->status & SR_DMA) { elprintf(EL_VDPDMA, "Dma overlap, left=%d @ %06x", @@ -748,12 +765,14 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) VideoWrite(d); // start DMA fill on write. NB VSRAM and CRAM fills use wrong FIFO data. - if ((pvid->status & (PVS_DMAPEND|PVS_DMAFILL)) == (PVS_DMAPEND|PVS_DMAFILL)) + if (pvid->status & PVS_DMAFILL) DmaFill(fifo_data[(fifo_dx + !!(pvid->type&~0x81))&3]); break; case 0x04: // Control (command) port 4 or 6 + if (pvid->status & SR_DMA) + SekCyclesBurnRun(PicoVideoFIFORead()); // kludge, flush out running DMA if (pvid->pending) { // Low word of command: @@ -886,14 +905,12 @@ static u32 VideoSr(const struct PicoVideo *pv) unsigned int hp = pv->reg[12]&1 ? 32:40; // HBLANK start unsigned int hl = pv->reg[12]&1 ? 94:84; // HBLANK length - c = SekCyclesDone(); - if (c - Pico.t.m68c_line_start - hp < hl) + c = SekCyclesDone() - Pico.t.m68c_line_start; + if (c - hp < hl) d |= SR_HB; - PicoVideoFIFOSync(c-Pico.t.m68c_line_start); - if (pv->status & SR_DMA) - d |= SR_EMPT; // unused by DMA, or rather flags not updated? - else if (fifo_total >= 4) + PicoVideoFIFOSync(c); + if (fifo_total >= 4) d |= SR_FULL; else if (!fifo_total) d |= SR_EMPT; @@ -1010,7 +1027,7 @@ void PicoVideoSave(void) // account for all outstanding xfers XXX kludge, entry attr's not saved for (l = fifo_ql, x = fifo_qx + l-1; l > 1; l--, x--) - pv->fifo_cnt += (fifo_queue[x&7] >> 2) << (fifo_queue[x&7] & 1); + pv->fifo_cnt += (fifo_queue[x&7] >> 2) << (fifo_queue[x&7] & FQ_BYTE); } void PicoVideoLoad(void) From e72bc9099c8e7c380e9779b4b1d110f1de8d5716 Mon Sep 17 00:00:00 2001 From: kub Date: Sun, 23 Feb 2020 20:15:07 +0100 Subject: [PATCH 0271/1110] vdp sprite rendering fix --- pico/videoport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pico/videoport.c b/pico/videoport.c index 533c7880..122908e4 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -341,7 +341,7 @@ static __inline void UpdateSAT(u32 a, u32 d) { Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; if (!((a^sat) >> satxbits) && !(a & 4)) { - int num = (a >> 3) & 0x7f; + int num = (a-sat) >> 3; ((u16 *)&VdpSATCache[num])[(a&3) >> 1] = d; } } From 3c6da92ba0b42b7b4eaacc7febbf0d77fbdef0af Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 25 Feb 2020 21:59:02 +0100 Subject: [PATCH 0272/1110] vdp fifo, refined timing --- pico/videoport.c | 60 +++++++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/pico/videoport.c b/pico/videoport.c index 122908e4..fb6bd69a 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -2,6 +2,7 @@ * PicoDrive * (c) Copyright Dave, 2004 * (C) notaz, 2006-2009 + * (C) kub, 2020 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. @@ -58,38 +59,38 @@ static const int vdpsl2cyc_bl[] = { (488<<16)/166, (488<<16)/204 }; // VDP transfer slots in active display 32col mode. 1 slot is 488/171 = 2.8538 // 68k cycles. Only 16 of the 171 slots in a scanline can be used by CPU/DMA: -// (HINT=slot 0): 13,27,42,50,58,74,82,90,106,114,122,138,146,154,169,170 +// (HINT=slot 0): 11,25,40,48,56,72,80,88,104,112,120,136,144,152,167,168 static const unsigned char vdpcyc2sl_32[] = { // 68k cycles/4 to slot # // 4 8 12 16 20 24 28 32 36 40 44 48 52 56 60 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, - 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, - 9,10,10,10,10,10,10,11,11,11,11,11,11,11,11,11, -11,11,12,12,12,12,12,12,13,13,13,13,13,13,14,14, -14,14,14,14,14,14,14,14,15,16,16,16,16,16,16,16, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, + 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9,10, +10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,11, +11,12,12,12,12,12,13,13,13,13,13,13,14,14,14,14, +14,14,14,14,14,14,14,15,16,16,16,16,16,16,16,16, }; static const unsigned char vdpsl2cyc_32[] = { // slot # to 68k cycles/4 - 0, 9, 19, 30, 35, 41, 52, 58, 64, 75, 81, 87, 98,104,110,120,121,131 + 0, 8, 18, 28, 33, 39, 51, 56, 62, 74, 79, 85, 97,102,108,119,120,130 }; // VDP transfer slots in active display 40col mode. 1 slot is 488/210 = 2.3238 // 68k cycles. Only 18 of the 210 slots in a scanline can be used by CPU/DMA: -// (HINT=0): 23,49,57,65,81,89,97,113,121,129,145,153,161,177,185,193,208,209 +// (HINT=0): 21,47,55,63,79,87,95,111,119,127,143,151,159,175,183,191,206,207, static const unsigned char vdpcyc2sl_40[] = { // 68k cycles/4 to slot # // 4 8 12 16 20 24 28 32 36 40 44 48 52 56 60 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, - 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, - 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 8, 8, 8, 8, 8, 9, 9, 9, 9,10,10,10,10,10,10, -10,10,10,10,11,11,11,11,12,12,12,12,12,13,13,13, -13,13,13,13,13,13,14,14,14,14,14,15,15,15,15,15, -16,16,16,16,16,16,16,16,17,18,18,18,18,18,18,18, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, + 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 9, 9, 9, 9,10,10,10,10,10,10,10, +10,10,10,11,11,11,11,12,12,12,12,12,13,13,13,13, +13,13,13,13,13,14,14,14,14,14,15,15,15,15,15,16, +16,16,16,16,16,16,16,17,18,18,18,18,18,18,18,18, }; static const unsigned char vdpsl2cyc_40[] = { // slot # to 68k cycles/4 - 0, 13, 28, 33, 37, 47, 51, 56, 65, 70, 74, 84, 88, 93,102,107,112,120,121,135 + 0, 12, 27, 32, 36, 46, 50, 55, 64, 69, 73, 83, 87, 92,101,106,111,119,120,134 }; // NB code assumes fifo_* arrays have size 2^n @@ -164,7 +165,7 @@ static __inline void SetFIFOState(struct PicoVideo *pv) if (fifo_total <= 4) { int x = (fifo_qx + fifo_ql - 1) & 7; if ((pv->status & SR_DMA) && !(pv->status & PVS_DMAFILL) && - fifo_ql && !(fifo_queue[x] & FQ_BGDMA)) { + (!fifo_ql || !(fifo_queue[x] & FQ_BGDMA))) { pv->status &= ~SR_DMA; pv->command &= ~0x80; } @@ -263,7 +264,7 @@ int PicoVideoFIFOWrite(int count, int flags, unsigned sr_mask,unsigned sr_flags) if (count && fifo_ql < 8) { // update FIFO state if it was empty if (fifo_ql == 0) { - fifo_slot = GetFIFOSlot(pv, lc+10); // FIFO latency ~4 vdp slots + fifo_slot = GetFIFOSlot(pv, lc+9); // FIFO latency ~3 vdp slots pv->fifo_cnt = count << (flags & FQ_BYTE); } @@ -340,7 +341,7 @@ static __inline void AutoIncrement(void) static __inline void UpdateSAT(u32 a, u32 d) { Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; - if (!((a^sat) >> satxbits) && !(a & 4)) { + if (!(a & 4)) { int num = (a-sat) >> 3; ((u16 *)&VdpSATCache[num])[(a&3) >> 1] = d; } @@ -672,7 +673,6 @@ static NOINLINE void CommandDma(void) fifo_total, SekPc); fifo_total = fifo_ql = 0; } - pvid->status |= SR_DMA; len = GetDmaLength(); source =Pico.video.reg[0x15]; @@ -685,7 +685,7 @@ static NOINLINE void CommandDma(void) else if (method == 3) DmaCopy(len); // VRAM Copy else { - pvid->status |= PVS_DMAFILL; + pvid->status |= SR_DMA|PVS_DMAFILL; return; } source += len; @@ -747,7 +747,7 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) !(!pvid->pending && ((pvid->command & 0xc00000f0) == 0x40000010 && PicoMem.vsram[pvid->addr>>1] == (d & 0x7ff))) ) - DrawSync(SekCyclesDone() - Pico.t.m68c_line_start <= 488-440); + DrawSync(0); // XXX it's unclear when vscroll data is fetched from vsram? if (pvid->pending) { CommandChange(); @@ -902,8 +902,8 @@ update_irq: static u32 VideoSr(const struct PicoVideo *pv) { unsigned int c, d = pv->status; - unsigned int hp = pv->reg[12]&1 ? 32:40; // HBLANK start - unsigned int hl = pv->reg[12]&1 ? 94:84; // HBLANK length + unsigned int hp = pv->reg[12]&1 ? 15*488/210+1 : 15*488/171+1; // HBLANK start + unsigned int hl = pv->reg[12]&1 ? 37*488/210+1 : 28*488/171+1; // HBLANK len c = SekCyclesDone() - Pico.t.m68c_line_start; if (c - hp < hl) @@ -1013,7 +1013,9 @@ unsigned char PicoVideoRead8HV_H(void) unsigned char PicoVideoRead8HV_L(void) { u32 d = (SekCyclesDone() - Pico.t.m68c_line_start) & 0x1ff; // FIXME - if (Pico.video.reg[12]&1) + if (Pico.video.reg[0]&2) + d = Pico.video.hv_latch; + else if (Pico.video.reg[12]&1) d = hcounts_40[d]; else d = hcounts_32[d]; elprintf(EL_HVCNT, "hcounter: %02x [%u] @ %06x", d, SekCyclesDone(), SekPc); From 672b29e658ee1ea1ece9a0697aa56811323cf759 Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 26 Feb 2020 20:31:40 +0100 Subject: [PATCH 0273/1110] bugfix for ARM asm sprite rendering --- pico/draw.c | 2 +- pico/draw_arm.S | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pico/draw.c b/pico/draw.c index da87ede2..e6545017 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -1243,7 +1243,7 @@ static void DrawAllSprites(unsigned char *sprited, int prio, int sh, struct PicoEState *est) { unsigned char *p; - int cnt, w = sprited[2]; + int cnt, w; cnt = sprited[0] & 0x7f; if (cnt == 0) return; diff --git a/pico/draw_arm.S b/pico/draw_arm.S index 967bf6aa..0eb161e3 100644 --- a/pico/draw_arm.S +++ b/pico/draw_arm.S @@ -1169,7 +1169,9 @@ DrawSprite: mov r2, r0, lsl #24 cmp r0, #0xff ldmeqfd sp!, {r1,r3-r11,pc} @ end of list - eor r2, r2, r4, lsl #30 + eors r2, r2, r4, lsl #30 + bic r2, r4, #0xff000000 + str r2, [sp] bmi DrawSprite @ wrong priority ldr r1, [r7, #OFS_EST_HighPreSpr] and r0, r0, #0x7f @@ -1210,8 +1212,6 @@ DrawSprite: add r6, r6, #1 @ inc now cmp r4, #0x1000000 @ check width of last sprite movhs r6, r4, lsr #24 - bichs r4, r4, #0xff000000 - strhs r4, [sp] @ cache some stuff to avoid mem access mov r5, r5, lsl #4 @ delta<<=4; // Delta of address From 91f5fbe89b3f52de91648b7eacf986d24ecc3ab0 Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 26 Feb 2020 20:36:46 +0100 Subject: [PATCH 0274/1110] fix for EI insn in cz80 (partial revert of 43e1401) --- cpu/cz80/cz80_op.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpu/cz80/cz80_op.c b/cpu/cz80/cz80_op.c index 317e9587..b1520088 100644 --- a/cpu/cz80/cz80_op.c +++ b/cpu/cz80/cz80_op.c @@ -712,6 +712,8 @@ OP_EI: if (CPU->IRQState) { afterEI = 1; + CPU->ExtraCycles += 1 - CPU->ICount; + CPU->ICount = 1; } } else zIFF2 = (1 << 2); From 478a1164fe286a036ba0b61ab64aeb225952465a Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 27 Feb 2020 21:19:37 +0100 Subject: [PATCH 0275/1110] fix for VINT while DMA is running --- pico/pico_cmn.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pico/pico_cmn.c b/pico/pico_cmn.c index 75389840..50a632ca 100644 --- a/pico/pico_cmn.c +++ b/pico/pico_cmn.c @@ -191,7 +191,8 @@ static int PicoFrameHints(void) pv->status |= SR_F; pv->pending_ints |= 0x20; if (pv->reg[1] & 0x20) { - SekExecM68k(11); // HACK + if (Pico.t.m68c_cnt - Pico.t.m68c_aim < 60) // CPU blocked? + SekExecM68k(11); // HACK elprintf(EL_INTS, "vint: @ %06x [%u]", SekPc, SekCyclesDone()); SekInterrupt(6); } From 49790e265a04e727908bcb37a76931e18d3c962e Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 27 Feb 2020 21:31:04 +0100 Subject: [PATCH 0276/1110] vdp, tentative fix for save/load compatibility --- pico/videoport.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pico/videoport.c b/pico/videoport.c index fb6bd69a..264bb0bc 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -101,9 +101,9 @@ static short fifo_data[4], fifo_dx; // XXX must go into save? // each entry has 2 values: [n]>>3 = #writes, [n]&7 = flags static int fifo_queue[8], fifo_qx, fifo_ql; // XXX must go into save? enum { FQ_BYTE = 1, FQ_BGDMA = 2, FQ_FGDMA = 4 }; // queue flags, NB: BYTE = 1! -unsigned int fifo_total; // total# of pending FIFO entries (w/o BGDMA) +static unsigned int fifo_total; // total# of pending FIFO entries (w/o BGDMA) -unsigned short fifo_slot; // last executed slot in current scanline +static unsigned short fifo_slot; // last executed slot in current scanline // map cycles to FIFO slot static __inline int GetFIFOSlot(struct PicoVideo *pv, int cycles) @@ -313,7 +313,7 @@ int PicoVideoFIFOHint(void) } // switch FIFO mode between active/inactive display -void PicoVideoFIFOMode(int active) +static void PicoVideoFIFOMode(int active) { struct PicoVideo *pv = &Pico.video; int h40 = pv->reg[12] & 1; @@ -671,7 +671,7 @@ static NOINLINE void CommandDma(void) if (pvid->status & SR_DMA) { elprintf(EL_VDPDMA, "Dma overlap, left=%d @ %06x", fifo_total, SekPc); - fifo_total = fifo_ql = 0; + pvid->fifo_cnt = fifo_total = fifo_ql = 0; } len = GetDmaLength(); @@ -1029,7 +1029,7 @@ void PicoVideoSave(void) // account for all outstanding xfers XXX kludge, entry attr's not saved for (l = fifo_ql, x = fifo_qx + l-1; l > 1; l--, x--) - pv->fifo_cnt += (fifo_queue[x&7] >> 2) << (fifo_queue[x&7] & FQ_BYTE); + pv->fifo_cnt += (fifo_queue[x&7] >> 3) << (fifo_queue[x&7] & FQ_BYTE); } void PicoVideoLoad(void) From d9e12ee75730ccee62e5b25ac47e5615b8213fe3 Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 29 Feb 2020 23:45:23 +0100 Subject: [PATCH 0277/1110] improved hi prio sprite rendering speed --- pico/draw.c | 97 ++++++++++++++++++++++------------------------------- 1 file changed, 41 insertions(+), 56 deletions(-) diff --git a/pico/draw.c b/pico/draw.c index e6545017..9ce727d0 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -100,7 +100,7 @@ void blockcpy_or(void *dst, void *src, size_t n, int pat) #define blockcpy memcpy #endif -#define TileNormMaker_(pix_func) \ +#define TileNormMaker_(pix_func,ret) \ { \ unsigned int t; \ \ @@ -112,9 +112,10 @@ void blockcpy_or(void *dst, void *src, size_t n, int pat) t = (pack&0x0f000000)>>24; pix_func(5); \ t = (pack&0x00f00000)>>20; pix_func(6); \ t = (pack&0x000f0000)>>16; pix_func(7); \ + return ret; \ } -#define TileFlipMaker_(pix_func) \ +#define TileFlipMaker_(pix_func,ret) \ { \ unsigned int t; \ \ @@ -126,23 +127,24 @@ void blockcpy_or(void *dst, void *src, size_t n, int pat) t = (pack&0x000000f0)>> 4; pix_func(5); \ t = (pack&0x00000f00)>> 8; pix_func(6); \ t = (pack&0x0000f000)>>12; pix_func(7); \ + return ret; \ } #define TileNormMaker(funcname, pix_func) \ static void funcname(unsigned char *pd, unsigned int pack, int pal) \ -TileNormMaker_(pix_func) +TileNormMaker_(pix_func,) #define TileFlipMaker(funcname, pix_func) \ static void funcname(unsigned char *pd, unsigned int pack, int pal) \ -TileFlipMaker_(pix_func) +TileFlipMaker_(pix_func,) #define TileNormMakerAS(funcname, pix_func) \ -static void funcname(unsigned char *pd, unsigned char *mb, unsigned int pack, int pal) \ -TileNormMaker_(pix_func) +static unsigned funcname(unsigned char *pd, unsigned m, unsigned int pack, int pal) \ +TileNormMaker_(pix_func,m) #define TileFlipMakerAS(funcname, pix_func) \ -static void funcname(unsigned char *pd, unsigned char *mb, unsigned int pack, int pal) \ -TileFlipMaker_(pix_func) +static unsigned funcname(unsigned char *pd, unsigned m, unsigned int pack, int pal) \ +TileFlipMaker_(pix_func,m) #define pix_just_write(x) \ if (t) pd[x]=pal|t @@ -184,17 +186,19 @@ TileFlipMaker(TileFlipSH_onlyop_lp, pix_sh_onlyop) #endif +// AS: sprite mask bits in m shifted to bits 8-15, see DrawSpritesHiAS + // draw a sprite pixel (AS) #define pix_as(x) \ - if (t & mb[x]) mb[x] = 0, pd[x] = pal | t + if (t && (m & (1<<(x+8)))) m &= ~(1<<(x+8)), pd[x] = pal | t TileNormMakerAS(TileNormAS, pix_as) TileFlipMakerAS(TileFlipAS, pix_as) // draw a sprite pixel, process operator colors (AS) #define pix_sh_as(x) \ - if (t & mb[x]) { \ - mb[x] = 0; \ + if (t && (m & (1<<(x+8)))) { \ + m &= ~(1<<(x+8)); \ if (t>=0xe) pd[x]=(pd[x]&0x3f)|(t<<6); /* c0 shadow, 80 hilight */ \ else pd[x] = pal | t; \ } @@ -203,8 +207,8 @@ TileNormMakerAS(TileNormSH_AS, pix_sh_as) TileFlipMakerAS(TileFlipSH_AS, pix_sh_as) #define pix_sh_as_onlyop(x) \ - if (t & mb[x]) { \ - mb[x] = 0; \ + if (t && (m & (1<<(x+8)))) { \ + m &= ~(1<<(x+8)); \ pix_sh_onlyop(x); \ } @@ -213,7 +217,7 @@ TileFlipMakerAS(TileFlipSH_AS_onlyop_lp, pix_sh_as_onlyop) // mark pixel as sprite pixel (AS) #define pix_sh_as_onlymark(x) \ - if (t) mb[x] = 0 + if (t) m &= ~(1<<(x+8)) TileNormMakerAS(TileNormAS_onlymark, pix_sh_as_onlymark) TileFlipMakerAS(TileFlipAS_onlymark, pix_sh_as_onlymark) @@ -905,6 +909,10 @@ static NOINLINE void DrawAllSpritesInterlace(int pri, int sh) */ static void DrawSpritesSHi(unsigned char *sprited, const struct PicoEState *est) { + static void (*tilefuncs[2][2][2])(unsigned char *, unsigned, int) = { + { {NULL, NULL}, {TileNorm, TileFlip} }, + { {TileNormSH_onlyop_lp, TileFlipSH_onlyop_lp}, {TileNormSH, TileFlipSH} } + }; // [sh?][hi?][flip?] void (*fTileFunc)(unsigned char *pd, unsigned int pack, int pal); unsigned char *pd = Pico.est.HighCol; unsigned char *p; @@ -929,21 +937,8 @@ static void DrawSpritesSHi(unsigned char *sprited, const struct PicoEState *est) code = sprite[1]; pal = (code>>9)&0x30; - if (pal == 0x30) - { - if (code & 0x8000) // hi priority - { - if (code&0x800) fTileFunc=TileFlipSH; - else fTileFunc=TileNormSH; - } else { - if (code&0x800) fTileFunc=TileFlipSH_onlyop_lp; - else fTileFunc=TileNormSH_onlyop_lp; - } - } else { - if (!(code & 0x8000)) continue; // non-operator low sprite, already drawn - if (code&0x800) fTileFunc=TileFlip; - else fTileFunc=TileNorm; - } + fTileFunc = tilefuncs[pal == 0x30][!!(code & 0x8000)][!!(code & 0x800)]; + if (fTileFunc == NULL) continue; // non-operator low sprite, already drawn // parse remaining sprite data sy=sprite[0]; @@ -980,11 +975,15 @@ static void DrawSpritesSHi(unsigned char *sprited, const struct PicoEState *est) static void DrawSpritesHiAS(unsigned char *sprited, int sh) { - void (*fTileFunc)(unsigned char *pd, unsigned char *mb, - unsigned int pack, int pal); + static unsigned (*tilefuncs[2][2][2])(unsigned char *, unsigned, unsigned, int) = { + { {TileNormAS_onlymark, TileFlipAS_onlymark}, {TileNormAS, TileFlipAS} }, + { {TileNormSH_AS_onlyop_lp, TileFlipSH_AS_onlyop_lp}, {TileNormSH_AS, TileFlipSH_AS} } + }; // [sh?][hi?][flip?] + unsigned (*fTileFunc)(unsigned char *pd, unsigned m, unsigned int pack, int pal); unsigned char *pd = Pico.est.HighCol; - unsigned char mb[8+320+8]; - unsigned char *p; + unsigned char mb[1+320/8+1]; + unsigned char *p, *mp; + unsigned m; int entry, cnt; cnt = sprited[0] & 0x7f; @@ -1006,26 +1005,7 @@ static void DrawSpritesHiAS(unsigned char *sprited, int sh) code = sprite[1]; pal = (code>>9)&0x30; - if (sh && pal == 0x30) - { - if (code & 0x8000) // hi priority - { - if (code&0x800) fTileFunc = TileFlipSH_AS; - else fTileFunc = TileNormSH_AS; - } else { - if (code&0x800) fTileFunc = TileFlipSH_AS_onlyop_lp; - else fTileFunc = TileNormSH_AS_onlyop_lp; - } - } else { - if (code & 0x8000) // hi priority - { - if (code&0x800) fTileFunc = TileFlipAS; - else fTileFunc = TileNormAS; - } else { - if (code&0x800) fTileFunc = TileFlipAS_onlymark; - else fTileFunc = TileNormAS_onlymark; - } - } + fTileFunc = tilefuncs[(sh && pal == 0x30)][!!(code&0x8000)][!!(code&0x800)]; // parse remaining sprite data sy=sprite[0]; @@ -1054,8 +1034,12 @@ static void DrawSpritesHiAS(unsigned char *sprited, int sh) if(sx>=328) break; // Offscreen pack = *(unsigned int *)(PicoMem.vram + (tile & 0x7fff)); - fTileFunc(pd + sx, mb + sx, pack, pal); - } + + m = (m >> 8) | mp[1] << 8; // next mask byte + // shift mask bits to bits 8-15 for easier load/store handling + m = fTileFunc(pd + sx, m << (8-(sx&0x7)), pack, pal) >> (8-(sx&0x7)); + } + *mp = m >> 8; // write last mask byte } } @@ -1106,7 +1090,8 @@ static void DrawSpritesForced(unsigned char *sprited) delta<<=4; // Delta of address if (entry+1 == cnt) width = p[entry+1]; // last sprite width limited? - for (; width; width--,sx+=8,tile+=delta) + mp = mb+(sx>>3); + for (m = *mp << 8; width; width--, sx+=8, *mp++ = m, tile+=delta) { unsigned int pack; From 93f41f8e1693bf0943d37ba0d3b9f1948b174fc4 Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 29 Feb 2020 23:47:14 +0100 Subject: [PATCH 0278/1110] more ARM asm sprite rendering bugfixes --- pico/draw_arm.S | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pico/draw_arm.S b/pico/draw_arm.S index 0eb161e3..de45f592 100644 --- a/pico/draw_arm.S +++ b/pico/draw_arm.S @@ -317,9 +317,10 @@ DrawLayer: moveq r1, #0x0007 movgt r1, #0x00ff @ r1=ymask=(height<<8)|0xff; ...; // Y Mask in pixels - add r10, r10, #5 - cmp r10, #7 - subge r10, r10, #1 @ r10=shift[width] (5,6,6,7) + cmp r10, #2 + addlt r10, r10, #5 + moveq r10, #5 + movgt r10, #7 @ r10=shift[width] (5,6,5,7) ldr r2, [r12, #OFS_EST_DrawScanline] ldr lr, [r12, #OFS_EST_PicoMem_vram] @@ -366,7 +367,8 @@ DrawLayer: bne .DrawStrip_interlace tst r0, r0 - movne r7, r7, lsr #16 + moveq r7, r7, lsl #16 + mov r7, r7, lsr #16 @ Find the line in the name table add r2, r2, r7 @@ -699,8 +701,8 @@ DrawLayer: @ interlace mode 2? Sonic 2? .DrawStrip_interlace: tst r0, r0 - moveq r7, r7, lsl #21 - movne r7, r7, lsl #5 + movne r7, r7, lsr #16 + mov r7, r7, lsl #21 @ Find the line in the name table add r2, r7, r2, lsl #22 @ r2=(vscroll+(DrawScanline<<1))<<21 (11 bits); From ea431e9ebbe652ed786bf8714c92820e67036dc3 Mon Sep 17 00:00:00 2001 From: kub Date: Sun, 1 Mar 2020 18:50:55 +0100 Subject: [PATCH 0279/1110] vdp sprite rendering fixes --- pico/draw.c | 14 +++++++------- pico/draw_arm.S | 2 -- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/pico/draw.c b/pico/draw.c index 9ce727d0..b797875d 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -333,7 +333,7 @@ static void DrawStripVSRam(struct TileStrip *ts, int plane_sh, int cellskip) } code=PicoMem.vram[ts->nametab+nametabadd+(tilex&ts->xmask)]; - if (code==blank) continue; + if ((code<<16|ty)==blank) continue; if (code>>15) { // high priority tile int cval = code | (dx<<16) | (ty<<25); if(code&0x1000) cval^=7<<26; @@ -353,7 +353,7 @@ static void DrawStripVSRam(struct TileStrip *ts, int plane_sh, int cellskip) pack = *(unsigned int *)(PicoMem.vram + addr+ty); if (!pack) { - blank = code; + blank = code<<16|ty; continue; } @@ -638,7 +638,7 @@ static void DrawTilesFromCache(int *hc, int sh, int rlim, struct PicoEState *est { int blank=-1; // The tile we know is blank while ((code=*hc++)) { - if (!(code & 0x8000) || (unsigned short)code == blank) + if ((code<<16|code>>25) == blank) continue; // Get tile address/2: addr = (code & 0x7ff) << 4; @@ -646,7 +646,7 @@ static void DrawTilesFromCache(int *hc, int sh, int rlim, struct PicoEState *est pack = *(unsigned int *)(PicoMem.vram + addr); if (!pack) { - blank = (unsigned short)code; + blank = code<<16|code>>25; continue; } @@ -1026,7 +1026,8 @@ static void DrawSpritesHiAS(unsigned char *sprited, int sh) delta<<=4; // Delta of address if (entry+1 == cnt) width = p[entry+1]; // last sprite width limited? - for (; width; width--,sx+=8,tile+=delta) + mp = mb+(sx>>3); + for (m = *mp << 8; width; width--, sx+=8, *mp++ = m, tile+=delta) { unsigned int pack; @@ -1090,8 +1091,7 @@ static void DrawSpritesForced(unsigned char *sprited) delta<<=4; // Delta of address if (entry+1 == cnt) width = p[entry+1]; // last sprite width limited? - mp = mb+(sx>>3); - for (m = *mp << 8; width; width--, sx+=8, *mp++ = m, tile+=delta) + for (; width; width--,sx+=8,tile+=delta) { unsigned int pack; diff --git a/pico/draw_arm.S b/pico/draw_arm.S index de45f592..860ab0f7 100644 --- a/pico/draw_arm.S +++ b/pico/draw_arm.S @@ -796,8 +796,6 @@ DrawTilesFromCache: bic r4, r1, #0xfe00 add r1, r11, r4 @ r1=pdest - movs r7, r6, lsl #16 - bpl .dtfc_loop @ !(code & 0x8000) cmp r5, r7, lsr #16 beq .dtfc_samecode @ if (code==prevcode) From d260165ad68c8e3be8c1e3c421ac26b88201efa1 Mon Sep 17 00:00:00 2001 From: kub Date: Mon, 2 Mar 2020 19:40:07 +0100 Subject: [PATCH 0280/1110] ARM SVP drc revived --- pico/carthw/svp/compiler.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/pico/carthw/svp/compiler.c b/pico/carthw/svp/compiler.c index 06aa1791..df051e47 100644 --- a/pico/carthw/svp/compiler.c +++ b/pico/carthw/svp/compiler.c @@ -1438,12 +1438,9 @@ static int translate_op(unsigned int op, int *pc, int imm, int *end_cond, int *j } tr_mov16(0, *pc); tr_r0_to_STACK(*pc); - if (tmpv != A_COND_AL) { - u32 *real_ptr = tcache_ptr; - tcache_ptr = jump_op; - EOP_C_B(tr_neg_cond(tmpv),0,real_ptr - jump_op - 2); - tcache_ptr = real_ptr; - } + if (tmpv != A_COND_AL) + EOP_C_B_PTR(jump_op, tr_neg_cond(tmpv), 0, + tcache_ptr - jump_op - 2); tr_mov16_cond(tmpv, 0, imm); if (tmpv != A_COND_AL) tr_mov16_cond(tr_neg_cond(tmpv), 0, *pc); @@ -1712,12 +1709,8 @@ static void *emit_block_epilogue(int cycles, int cond, int pc, int end_pc) ssp_block_table[pc]; if (target != NULL) emith_jump(target); - else { - int ops = emith_jump(ssp_drc_next); - end_ptr = tcache_ptr; - // cause the next block to be emitted over jump instruction - tcache_ptr -= ops; - } + else + emith_jump(ssp_drc_next); } else { u32 *target1 = (pc < 0x400) ? From d38906f5ac2cbc2acd99ec7cabc83ebb90519153 Mon Sep 17 00:00:00 2001 From: kub Date: Mon, 2 Mar 2020 23:48:55 +0100 Subject: [PATCH 0281/1110] arm asm sprite rendering: add line accidently deleted in ea431e9 --- pico/draw_arm.S | 1 + 1 file changed, 1 insertion(+) diff --git a/pico/draw_arm.S b/pico/draw_arm.S index 860ab0f7..8dc660c2 100644 --- a/pico/draw_arm.S +++ b/pico/draw_arm.S @@ -796,6 +796,7 @@ DrawTilesFromCache: bic r4, r1, #0xfe00 add r1, r11, r4 @ r1=pdest + movs r7, r6, lsl #16 cmp r5, r7, lsr #16 beq .dtfc_samecode @ if (code==prevcode) From 2d84b9254cacd5f1b098f67cbfaf60fb0f150fb0 Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 3 Mar 2020 20:29:23 +0100 Subject: [PATCH 0282/1110] fix config file parsing for long filenames --- platform/common/config_file.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/platform/common/config_file.c b/platform/common/config_file.c index 7248d239..09248fba 100644 --- a/platform/common/config_file.c +++ b/platform/common/config_file.c @@ -35,7 +35,7 @@ static char *mystrip(char *str); static int seek_sect(FILE *f, const char *section) { - char line[128], *tmp; + char line[640], *tmp; int len; len = strlen(section); @@ -96,7 +96,7 @@ int config_write(const char *fname) FILE *fn = NULL; menu_entry *me; int t; - char line[128]; + char line[640]; fn = fopen(fname, "w"); if (fn == NULL) @@ -165,7 +165,7 @@ write_line: int config_writelrom(const char *fname) { - char line[128], *tmp, *optr = NULL; + char line[640], *tmp, *optr = NULL; char *old_data = NULL; int size; FILE *f; @@ -212,7 +212,7 @@ int config_writelrom(const char *fname) int config_readlrom(const char *fname) { - char line[128], *tmp; + char line[640], *tmp; int i, len, ret = -1; FILE *f; @@ -503,7 +503,7 @@ bad_val: int config_readsect(const char *fname, const char *section) { - char line[128], *var, *val; + char line[640], *var, *val; int keys_encountered = 0; FILE *f; int ret; From b6bdccb7478a4342d9a83432f654d4ec8bf6f7bc Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 3 Mar 2020 20:32:38 +0100 Subject: [PATCH 0283/1110] vdp, some small improvements --- pico/videoport.c | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/pico/videoport.c b/pico/videoport.c index 264bb0bc..fd7a3a46 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -53,6 +53,8 @@ int (*PicoDmaHook)(unsigned int source, int len, unsigned short **base, unsigned * FIFORead executes a 68k read. 68k is blocked until the next transfer slot. */ +// FIFO transfer slots per line: [active][h40] +static const short vdpslots[2][2] = {{ 166, 204 },{ 16, 18 }}; // mapping between slot# and 68k cycles in a blanked scanline [H32, H40] static const int vdpcyc2sl_bl[] = { (166<<16)/488, (204<<16)/488 }; static const int vdpsl2cyc_bl[] = { (488<<16)/166, (488<<16)/204 }; @@ -115,6 +117,14 @@ static __inline int GetFIFOSlot(struct PicoVideo *pv, int cycles) else return (cycles * vdpcyc2sl_bl[h40] + cycles) >> 16; } +static __inline int GetMaxFIFOSlot(struct PicoVideo *pv) +{ + int active = !(pv->status & SR_VB) && (pv->reg[1] & 0x40); + int h40 = pv->reg[12] & 1; + + return vdpslots[active][h40]; +} + // map FIFO slot to cycles static __inline int GetFIFOCycles(struct PicoVideo *pv, int slot) { @@ -150,7 +160,7 @@ static __inline int AdvanceFIFOEntry(struct PicoVideo *pv, int slots) } // start processing for next entry if there is one if (fifo_ql) - pv->fifo_cnt= (fifo_queue[fifo_qx] >> 3) << (fifo_queue[fifo_qx] & FQ_BYTE); + pv->fifo_cnt = (fifo_queue[fifo_qx] >> 3) << (fifo_queue[fifo_qx] & FQ_BYTE); else fifo_total = 0; } @@ -190,14 +200,15 @@ void PicoVideoFIFOSync(int cycles) done -= l; } - SetFIFOState(pv); + if (done != slots) + SetFIFOState(pv); } // drain FIFO, blocking 68k on the way. FIFO must be synced prior to drain. int PicoVideoFIFODrain(int level, int cycles, int bgdma) { struct PicoVideo *pv = &Pico.video; - int maxsl = GetFIFOSlot(pv, 488); // max xfer slots in this scanline + int maxsl = GetMaxFIFOSlot(pv); // max xfer slots in this scanline int burn = 0; // process FIFO entries until low level is reached @@ -279,10 +290,17 @@ int PicoVideoFIFOWrite(int count, int flags, unsigned sr_mask,unsigned sr_flags) fifo_queue[(x+1) & 7] = (pv->fifo_cnt >> (f & FQ_BYTE) << 3) | f; pv->fifo_cnt = count << (flags & FQ_BYTE); } - } else + x = (x-1) & 7; + } + if (fifo_ql && (fifo_queue[x] & 7) == flags) { + // amalgamate entries if of same type + fifo_queue[x] += (count << 3); + if (fifo_ql == 1) pv->fifo_cnt += count << (flags & FQ_BYTE); + } else { + fifo_ql ++; x = (x+1) & 7; - fifo_queue[x] = (count << 3) | flags; - fifo_ql ++; + fifo_queue[x] = (count << 3) | flags; + } if (!(flags & FQ_BGDMA)) fifo_total += count; } @@ -340,9 +358,10 @@ static __inline void AutoIncrement(void) static __inline void UpdateSAT(u32 a, u32 d) { + unsigned num = (a-sat) >> 3; + Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; - if (!(a & 4)) { - int num = (a-sat) >> 3; + if (!(a & 4) && num < 128) { ((u16 *)&VdpSATCache[num])[(a&3) >> 1] = d; } } From c1d0377e480f3603b3e3c8c3a447ab5c369e55f4 Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 3 Mar 2020 20:34:11 +0100 Subject: [PATCH 0284/1110] 32x, small improvement for poll detector --- pico/32x/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pico/32x/memory.c b/pico/32x/memory.c index f772d28d..f4f0a18b 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -239,7 +239,7 @@ static NOINLINE void sh2_poll_write(u32 a, u32 d, unsigned int cycles, SH2 *sh2) // NB this can take an eternity on m68k: mov.b , needs // 28 m68k-cycles (~80 sh2-cycles) to complete (observed in Metal Head) q = &fifo[(sh2_poll_wr[hix]-1) % PFIFO_SZ]; - if (cpu < 0 && rd != wr && q->a == a && !CYCLES_GT(cycles,q->cycles+30)) { + if (rd != wr && q->a == a && !CYCLES_GT(cycles,q->cycles + (cpu<0 ? 30:4))) { q->d = d; } else { // store write to poll address in fifo From 2a2b4e7e882a7e81c96b966d1f19acf1c09929dd Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 3 Mar 2020 20:36:55 +0100 Subject: [PATCH 0285/1110] vdp rendering, tiny improvement --- pico/draw.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pico/draw.c b/pico/draw.c index b797875d..833d87cd 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -636,19 +636,14 @@ static void DrawTilesFromCache(int *hc, int sh, int rlim, struct PicoEState *est if (!sh) { - int blank=-1; // The tile we know is blank while ((code=*hc++)) { - if ((code<<16|code>>25) == blank) - continue; // Get tile address/2: addr = (code & 0x7ff) << 4; addr += code >> 25; // y offset into tile pack = *(unsigned int *)(PicoMem.vram + addr); - if (!pack) { - blank = code<<16|code>>25; + if (!pack) continue; - } dx = (code >> 16) & 0x1ff; pal = ((code >> 9) & 0x30); From ce32676eb890d4b4100e5d639514bfe7d6363bb4 Mon Sep 17 00:00:00 2001 From: dinkc64 Date: Fri, 13 Mar 2020 19:26:09 -0400 Subject: [PATCH 0286/1110] draw.c, DrawLayer(): impl. proper linescroll, fixes issue #56 --- pico/draw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pico/draw.c b/pico/draw.c index 680de3da..e5287b4e 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -411,6 +411,7 @@ static void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells, { struct PicoVideo *pvid=&Pico.video; const char shift[4]={5,6,5,7}; // 32,64 or 128 sized tilemaps (2 is invalid) + const unsigned char h_masks[4] = { 0x00, 0x07, 0xf8, 0xff }; struct TileStrip ts; int width, height, ymask; int vscroll, htab; @@ -437,8 +438,7 @@ static void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells, else ts.nametab=(pvid->reg[2]&0x38)<< 9; // A htab=pvid->reg[13]<<9; // Horizontal scroll table address - if ( pvid->reg[11]&2) htab+=est->DrawScanline<<1; // Offset by line - if ((pvid->reg[11]&1)==0) htab&=~0xf; // Offset by tile + htab+=(est->DrawScanline&h_masks[pvid->reg[11]&3])<<1; // Point to line (masked) htab+=plane_sh&1; // A or B // Get horizontal scroll value, will be masked later From 26643d27f2ac4562d0beb6ee653520aaf50a1014 Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 14 Mar 2020 19:14:04 +0100 Subject: [PATCH 0287/1110] vdp rendering improvements --- pico/draw.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/pico/draw.c b/pico/draw.c index 833d87cd..eeeb553f 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -324,7 +324,7 @@ static void DrawStripVSRam(struct TileStrip *ts, int plane_sh, int cellskip) //if((cell&1)==0) { int line,vscroll; - vscroll=PicoMem.vsram[(plane_sh&1)+(cell&~1)]; + vscroll=PicoMem.vsram[(plane_sh&1)+(cell&0x3e)]; // Find the line in the name table line=(vscroll+scan)&ts->line&0xffff; // ts->line is really ymask .. @@ -479,6 +479,7 @@ static void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells, // shit, we have 2-cell column based vscroll // luckily this doesn't happen too often ts.line=ymask|(shift[width]<<24); // save some stuff instead of line + PicoMem.vsram[(plane_sh & 1)+0x3e] = PicoMem.vsram[0x27]; // XXX really? DrawStripVSRam(&ts, plane_sh, cellskip); } else { vscroll = PicoMem.vsram[plane_sh & 1]; // Get vertical scroll value @@ -1022,7 +1023,7 @@ static void DrawSpritesHiAS(unsigned char *sprited, int sh) if (entry+1 == cnt) width = p[entry+1]; // last sprite width limited? mp = mb+(sx>>3); - for (m = *mp << 8; width; width--, sx+=8, *mp++ = m, tile+=delta) + for (m = *mp; width; width--, sx+=8, *mp++ = m, m >>= 8, tile+=delta) { unsigned int pack; @@ -1031,11 +1032,11 @@ static void DrawSpritesHiAS(unsigned char *sprited, int sh) pack = *(unsigned int *)(PicoMem.vram + (tile & 0x7fff)); - m = (m >> 8) | mp[1] << 8; // next mask byte + m |= mp[1] << 8; // next mask byte // shift mask bits to bits 8-15 for easier load/store handling m = fTileFunc(pd + sx, m << (8-(sx&0x7)), pack, pal) >> (8-(sx&0x7)); } - *mp = m >> 8; // write last mask byte + *mp = m; // write last mask byte } } @@ -1428,10 +1429,6 @@ static int DrawDisplay(int sh) int win=0, edge=0, hvwind=0, lflags; int maxw, maxcells; - if (!(est->DrawScanline & 15) || - (est->rendstatus & (PDRAW_SPRITES_MOVED|PDRAW_DIRTY_SPRITES))) - PrepareSprites((est->DrawScanline+16) & ~15); - est->rendstatus &= ~(PDRAW_SPRITES_MOVED|PDRAW_DIRTY_SPRITES); est->rendstatus &= ~(PDRAW_SHHI_DONE|PDRAW_PLANE_HI_PRIO); @@ -1646,6 +1643,8 @@ void PicoDrawSync(int to, int blank_last_line) if (to > 223) to = 223; } + if (Pico.est.DrawScanline <= to - blank_last_line) + PrepareSprites(to - blank_last_line + 1); for (line = Pico.est.DrawScanline; line < to; line++) PicoLine(line, offs, sh, bgc); From 20fafa71272ab9bd7959f956dea58f5b6cb78a4f Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 14 Mar 2020 19:30:28 +0100 Subject: [PATCH 0288/1110] hvcounter table resolution reduced --- pico/misc.c | 94 +++++++++++++++++------------------------------- pico/videoport.c | 8 ++--- 2 files changed, 36 insertions(+), 66 deletions(-) diff --git a/pico/misc.c b/pico/misc.c index ab282c24..4837fd3e 100644 --- a/pico/misc.c +++ b/pico/misc.c @@ -10,72 +10,42 @@ // H-counter table for hvcounter reads in 40col mode, starting at HINT const unsigned char hcounts_40[] = { -0xa5,0xa5,0xa5,0xa6,0xa6,0xa7,0xa7,0xa8,0xa8,0xa8,0xa9,0xa9,0xaa,0xaa,0xab,0xab, -0xac,0xac,0xac,0xad,0xad,0xae,0xae,0xaf,0xaf,0xaf,0xb0,0xb0,0xb1,0xb1,0xb2,0xb2, -0xb3,0xb3,0xb3,0xb4,0xb4,0xb5,0xb5,0xb6,0xe4,0xe4,0xe5,0xe5,0xe6,0xe6,0xe7,0xe7, -0xe7,0xe8,0xe8,0xe8,0xe9,0xe9,0xe9,0xea,0xea,0xeb,0xeb,0xeb,0xec,0xec,0xec,0xed, -0xed,0xed,0xee,0xee,0xee,0xef,0xef,0xf0,0xf0,0xf0,0xf1,0xf1,0xf1,0xf2,0xf2,0xf2, -0xf3,0xf3,0xf3,0xf4,0xf4,0xf5,0xf5,0xf5,0xf6,0xf6,0xf6,0xf7,0xf7,0xf7,0xf8,0xf8, -0xf9,0xf9,0xfa,0xfa,0xfb,0xfb,0xfb,0xfc,0xfc,0xfd,0xfd,0xfe,0xfe,0xfe,0xff,0xff, -0x00,0x00,0x01,0x01,0x02,0x02,0x02,0x03,0x03,0x04,0x04,0x05,0x05,0x05,0x06,0x06, -0x07,0x07,0x08,0x08,0x09,0x09,0x09,0x0a,0x0a,0x0b,0x0b,0x0c,0x0c,0x0c,0x0d,0x0d, -0x0e,0x0e,0x0f,0x0f,0x10,0x10,0x10,0x11,0x11,0x12,0x12,0x13,0x13,0x13,0x14,0x14, -0x15,0x15,0x16,0x16,0x17,0x17,0x17,0x18,0x18,0x19,0x19,0x1a,0x1a,0x1a,0x1b,0x1b, -0x1c,0x1c,0x1d,0x1d,0x1e,0x1e,0x1e,0x1f,0x1f,0x20,0x20,0x21,0x21,0x21,0x22,0x22, -0x23,0x23,0x24,0x24,0x25,0x25,0x25,0x26,0x26,0x27,0x27,0x28,0x28,0x28,0x29,0x29, -0x2a,0x2a,0x2b,0x2b,0x2c,0x2c,0x2c,0x2d,0x2d,0x2e,0x2e,0x2f,0x2f,0x2f,0x30,0x30, -0x31,0x31,0x32,0x32,0x33,0x33,0x33,0x34,0x34,0x35,0x35,0x36,0x36,0x36,0x37,0x37, -0x38,0x38,0x39,0x39,0x3a,0x3a,0x3a,0x3b,0x3b,0x3c,0x3c,0x3d,0x3d,0x3d,0x3e,0x3e, -0x3f,0x3f,0x40,0x40,0x41,0x41,0x41,0x42,0x42,0x43,0x43,0x44,0x44,0x44,0x45,0x45, -0x46,0x46,0x47,0x47,0x48,0x48,0x48,0x49,0x49,0x4a,0x4a,0x4b,0x4b,0x4b,0x4c,0x4c, -0x4d,0x4d,0x4e,0x4e,0x4f,0x4f,0x4f,0x50,0x50,0x51,0x51,0x52,0x52,0x52,0x53,0x53, -0x54,0x54,0x55,0x55,0x56,0x56,0x56,0x57,0x57,0x58,0x58,0x59,0x59,0x59,0x5a,0x5a, -0x5b,0x5b,0x5c,0x5c,0x5d,0x5d,0x5d,0x5e,0x5e,0x5f,0x5f,0x60,0x60,0x60,0x61,0x61, -0x62,0x62,0x63,0x63,0x64,0x64,0x64,0x65,0x65,0x66,0x66,0x67,0x67,0x67,0x68,0x68, -0x69,0x69,0x6a,0x6a,0x6b,0x6b,0x6b,0x6c,0x6c,0x6d,0x6d,0x6e,0x6e,0x6e,0x6f,0x6f, -0x70,0x70,0x71,0x71,0x72,0x72,0x72,0x73,0x73,0x74,0x74,0x75,0x75,0x75,0x76,0x76, -0x77,0x77,0x78,0x78,0x79,0x79,0x79,0x7a,0x7a,0x7b,0x7b,0x7c,0x7c,0x7c,0x7d,0x7d, -0x7e,0x7e,0x7f,0x7f,0x80,0x80,0x80,0x81,0x81,0x82,0x82,0x83,0x83,0x83,0x84,0x84, -0x85,0x85,0x86,0x86,0x87,0x87,0x87,0x88,0x88,0x89,0x89,0x8a,0x8a,0x8a,0x8b,0x8b, -0x8c,0x8c,0x8d,0x8d,0x8e,0x8e,0x8e,0x8f,0x8f,0x90,0x90,0x91,0x91,0x91,0x92,0x92, -0x93,0x93,0x94,0x94,0x95,0x95,0x95,0x96,0x96,0x97,0x97,0x98,0x98,0x98,0x99,0x99, -0x9a,0x9a,0x9b,0x9b,0x9c,0x9c,0x9c,0x9d,0x9d,0x9e,0x9e,0x9f,0x9f,0x9f,0xa0,0xa0, -0xa1,0xa1,0xa2,0xa2,0xa3,0xa3,0xa3,0xa4,0xa5,0xa5,0xa5,0xa6,0xa6,0xa7,0xa7,0xa8, +0xa5,0xa6,0xa7,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xae,0xaf,0xb0,0xb1,0xb2, +0xb3,0xb4,0xb5,0xb5,0xe4,0xe5,0xe6,0xe7,0xe8,0xe8,0xe9,0xea,0xea,0xeb,0xec,0xed, +0xed,0xee,0xef,0xef,0xf0,0xf1,0xf2,0xf2,0xf3,0xf4,0xf4,0xf5,0xf6,0xf7,0xf7,0xf8, +0xf9,0xfa,0xfb,0xfc,0xfd,0xfd,0xfe,0xff,0x00,0x01,0x02,0x03,0x04,0x04,0x05,0x06, +0x07,0x08,0x09,0x0a,0x0b,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x12,0x13,0x14, +0x15,0x16,0x17,0x18,0x19,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x20,0x21,0x22, +0x23,0x24,0x25,0x26,0x27,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2e,0x2f,0x30, +0x31,0x32,0x33,0x34,0x35,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3c,0x3d,0x3e, +0x3f,0x40,0x41,0x42,0x43,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4a,0x4b,0x4c, +0x4d,0x4e,0x4f,0x50,0x51,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x58,0x59,0x5a, +0x5b,0x5c,0x5d,0x5e,0x5f,0x5f,0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x66,0x67,0x68, +0x69,0x6a,0x6b,0x6c,0x6d,0x6d,0x6e,0x6f,0x70,0x71,0x72,0x73,0x74,0x74,0x75,0x76, +0x77,0x78,0x79,0x7a,0x7b,0x7b,0x7c,0x7d,0x7e,0x7f,0x80,0x81,0x82,0x82,0x83,0x84, +0x85,0x86,0x87,0x88,0x89,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,0x90,0x90,0x91,0x92, +0x93,0x94,0x95,0x96,0x97,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9e,0x9f,0xa0, +0xa1,0xa2,0xa3,0xa4,0xa5,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xac,0xad,0xae, }; // H-counter table for hvcounter reads in 32col mode, starting at HINT const unsigned char hcounts_32[] = { -0x85,0x85,0x85,0x86,0x86,0x86,0x87,0x87,0x87,0x88,0x88,0x88,0x89,0x89,0x89,0x8a, -0x8a,0x8a,0x8b,0x8b,0x8c,0x8c,0x8c,0x8d,0x8d,0x8d,0x8e,0x8e,0x8e,0x8f,0x8f,0x8f, -0x90,0x90,0x90,0x91,0x91,0x91,0x92,0x92,0x93,0x93,0x93,0xe9,0xe9,0xe9,0xea,0xea, -0xea,0xeb,0xeb,0xeb,0xec,0xec,0xec,0xed,0xed,0xed,0xee,0xee,0xef,0xef,0xef,0xf0, -0xf0,0xf0,0xf1,0xf1,0xf1,0xf2,0xf2,0xf2,0xf3,0xf3,0xf3,0xf4,0xf4,0xf4,0xf5,0xf5, -0xf6,0xf6,0xf6,0xf7,0xf7,0xf7,0xf8,0xf8,0xf8,0xf9,0xf9,0xf9,0xfa,0xfa,0xfa,0xfb, -0xfb,0xfb,0xfc,0xfc,0xfd,0xfd,0xfd,0xfe,0xfe,0xfe,0xff,0xff,0xff,0x00,0x00,0x00, -0x01,0x01,0x01,0x02,0x02,0x02,0x03,0x03,0x04,0x04,0x04,0x05,0x05,0x05,0x06,0x06, -0x06,0x07,0x07,0x07,0x08,0x08,0x08,0x09,0x09,0x09,0x0a,0x0a,0x0b,0x0b,0x0b,0x0c, -0x0c,0x0c,0x0d,0x0d,0x0d,0x0e,0x0e,0x0e,0x0f,0x0f,0x0f,0x10,0x10,0x10,0x11,0x11, -0x12,0x12,0x12,0x13,0x13,0x13,0x14,0x14,0x14,0x15,0x15,0x15,0x16,0x16,0x16,0x17, -0x17,0x17,0x18,0x18,0x19,0x19,0x19,0x1a,0x1a,0x1a,0x1b,0x1b,0x1b,0x1c,0x1c,0x1c, -0x1d,0x1d,0x1d,0x1e,0x1e,0x1e,0x1f,0x1f,0x20,0x20,0x20,0x21,0x21,0x21,0x22,0x22, -0x22,0x23,0x23,0x23,0x24,0x24,0x24,0x25,0x25,0x25,0x26,0x26,0x27,0x27,0x27,0x28, -0x28,0x28,0x29,0x29,0x29,0x2a,0x2a,0x2a,0x2b,0x2b,0x2b,0x2c,0x2c,0x2c,0x2d,0x2d, -0x2e,0x2e,0x2e,0x2f,0x2f,0x2f,0x30,0x30,0x30,0x31,0x31,0x31,0x32,0x32,0x32,0x33, -0x33,0x33,0x34,0x34,0x35,0x35,0x35,0x36,0x36,0x36,0x37,0x37,0x37,0x38,0x38,0x38, -0x39,0x39,0x39,0x3a,0x3a,0x3a,0x3b,0x3b,0x3c,0x3c,0x3c,0x3d,0x3d,0x3d,0x3e,0x3e, -0x3e,0x3f,0x3f,0x3f,0x40,0x40,0x40,0x41,0x41,0x41,0x42,0x42,0x43,0x43,0x43,0x44, -0x44,0x44,0x45,0x45,0x45,0x46,0x46,0x46,0x47,0x47,0x47,0x48,0x48,0x48,0x49,0x49, -0x4a,0x4a,0x4a,0x4b,0x4b,0x4b,0x4c,0x4c,0x4c,0x4d,0x4d,0x4d,0x4e,0x4e,0x4e,0x4f, -0x4f,0x4f,0x50,0x50,0x51,0x51,0x51,0x52,0x52,0x52,0x53,0x53,0x53,0x54,0x54,0x54, -0x55,0x55,0x55,0x56,0x56,0x56,0x57,0x57,0x58,0x58,0x58,0x59,0x59,0x59,0x5a,0x5a, -0x5a,0x5b,0x5b,0x5b,0x5c,0x5c,0x5c,0x5d,0x5d,0x5d,0x5e,0x5e,0x5f,0x5f,0x5f,0x60, -0x60,0x60,0x61,0x61,0x61,0x62,0x62,0x62,0x63,0x63,0x63,0x64,0x64,0x64,0x65,0x65, -0x66,0x66,0x66,0x67,0x67,0x67,0x68,0x68,0x68,0x69,0x69,0x69,0x6a,0x6a,0x6a,0x6b, -0x6b,0x6b,0x6c,0x6c,0x6d,0x6d,0x6d,0x6e,0x6e,0x6e,0x6f,0x6f,0x6f,0x70,0x70,0x70, -0x71,0x71,0x71,0x72,0x72,0x72,0x73,0x73,0x74,0x74,0x74,0x75,0x75,0x75,0x76,0x76, -0x76,0x77,0x77,0x77,0x78,0x78,0x78,0x79,0x79,0x79,0x7a,0x7a,0x7b,0x7b,0x7b,0x7c, -0x7c,0x7c,0x7d,0x7d,0x7d,0x7e,0x7e,0x7e,0x7f,0x7f,0x7f,0x80,0x80,0x80,0x81,0x81, -0x82,0x82,0x82,0x83,0x83,0x83,0x84,0x84,0x85,0x85,0x85,0x86,0x86,0x86,0x87,0x87, +0x85,0x86,0x86,0x87,0x88,0x88,0x89,0x8a,0x8a,0x8b,0x8c,0x8d,0x8d,0x8e,0x8f,0x8f, +0x90,0x91,0x91,0x92,0x93,0xe9,0xe9,0xea,0xeb,0xeb,0xec,0xed,0xed,0xee,0xef,0xf0, +0xf0,0xf1,0xf2,0xf2,0xf3,0xf4,0xf4,0xf5,0xf6,0xf7,0xf7,0xf8,0xf9,0xf9,0xfa,0xfb, +0xfb,0xfc,0xfd,0xfe,0xfe,0xff,0x00,0x00,0x01,0x02,0x02,0x03,0x04,0x05,0x05,0x06, +0x07,0x07,0x08,0x09,0x09,0x0a,0x0b,0x0c,0x0c,0x0d,0x0e,0x0e,0x0f,0x10,0x10,0x11, +0x12,0x13,0x13,0x14,0x15,0x15,0x16,0x17,0x17,0x18,0x19,0x1a,0x1a,0x1b,0x1c,0x1c, +0x1d,0x1e,0x1e,0x1f,0x20,0x21,0x21,0x22,0x23,0x23,0x24,0x25,0x25,0x26,0x27,0x28, +0x28,0x29,0x2a,0x2a,0x2b,0x2c,0x2c,0x2d,0x2e,0x2f,0x2f,0x30,0x31,0x31,0x32,0x33, +0x33,0x34,0x35,0x36,0x36,0x37,0x38,0x38,0x39,0x3a,0x3a,0x3b,0x3c,0x3d,0x3d,0x3e, +0x3f,0x3f,0x40,0x41,0x41,0x42,0x43,0x44,0x44,0x45,0x46,0x46,0x47,0x48,0x48,0x49, +0x4a,0x4b,0x4b,0x4c,0x4d,0x4d,0x4e,0x4f,0x4f,0x50,0x51,0x52,0x52,0x53,0x54,0x54, +0x55,0x56,0x56,0x57,0x58,0x59,0x59,0x5a,0x5b,0x5b,0x5c,0x5d,0x5d,0x5e,0x5f,0x60, +0x60,0x61,0x62,0x62,0x63,0x64,0x64,0x65,0x66,0x67,0x67,0x68,0x69,0x69,0x6a,0x6b, +0x6b,0x6c,0x6d,0x6e,0x6e,0x6f,0x70,0x70,0x71,0x72,0x72,0x73,0x74,0x75,0x75,0x76, +0x77,0x77,0x78,0x79,0x79,0x7a,0x7b,0x7c,0x7c,0x7d,0x7e,0x7e,0x7f,0x80,0x80,0x81, +0x82,0x83,0x83,0x84,0x85,0x85,0x86,0x87,0x87,0x88,0x89,0x8a,0x8a,0x8b,0x8c,0x8c, }; #ifndef _ASM_MISC_C diff --git a/pico/videoport.c b/pico/videoport.c index fd7a3a46..cbcea796 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -975,8 +975,8 @@ PICO_INTERNAL_ASM unsigned int PicoVideoRead(unsigned int a) if (Pico.video.reg[0]&2) d = Pico.video.hv_latch; else if (Pico.video.reg[12]&1) - d = hcounts_40[d] | (Pico.video.v_counter << 8); - else d = hcounts_32[d] | (Pico.video.v_counter << 8); + d = hcounts_40[d/2] | (Pico.video.v_counter << 8); + else d = hcounts_32[d/2] | (Pico.video.v_counter << 8); elprintf(EL_HVCNT, "hv: %02x %02x [%u] @ %06x", d, Pico.video.v_counter, SekCyclesDone(), SekPc); return d; @@ -1035,8 +1035,8 @@ unsigned char PicoVideoRead8HV_L(void) if (Pico.video.reg[0]&2) d = Pico.video.hv_latch; else if (Pico.video.reg[12]&1) - d = hcounts_40[d]; - else d = hcounts_32[d]; + d = hcounts_40[d/2]; + else d = hcounts_32[d/2]; elprintf(EL_HVCNT, "hcounter: %02x [%u] @ %06x", d, SekCyclesDone(), SekPc); return d; } From c55a44a88c217900cd4f56f164f14cb680f7597a Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 14 Mar 2020 19:52:27 +0100 Subject: [PATCH 0289/1110] vdp fifo speed optimization --- pico/misc.c | 129 ++++++++++++++++++++++++++++++ pico/pico.c | 1 + pico/pico_cmn.c | 2 + pico/pico_int.h | 3 + pico/videoport.c | 200 ++++++++++++++++++----------------------------- 5 files changed, 210 insertions(+), 125 deletions(-) diff --git a/pico/misc.c b/pico/misc.c index 4837fd3e..74d4d8a8 100644 --- a/pico/misc.c +++ b/pico/misc.c @@ -48,6 +48,135 @@ const unsigned char hcounts_32[] = { 0x82,0x83,0x83,0x84,0x85,0x85,0x86,0x87,0x87,0x88,0x89,0x8a,0x8a,0x8b,0x8c,0x8c, }; +// VDP transfer slots for blanked and active display in 32col and 40col mode. +// 1 slot is 488/171 = 2.8538 68k cycles in h32, and 488/210 = 2.3238 in h40 +// In blanked display, all slots but 5(h32) / 6(h40) are usable for transfers, +// in active display only 16(h32) / 18(h40) slots can be used. + +// XXX inactive tables by slot#=cycles*maxslot#/488. should be through hv tables +// VDP transfer slots in inactive (blanked) display 32col mode. +// refresh slots: 250, 26, 58, 90, 122 -> 32, 64, 96, 128, 160 +const unsigned char vdpcyc2sl_32_bl[] = { // 68k cycles/2 to slot # +// 0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30 + 0, 0, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9, 10, + 10, 11, 12, 12, 13, 14, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, + 21, 22, 23, 23, 24, 25, 25, 26, 27, 27, 28, 29, 29, 30, 31, 31, + 32, 33, 34, 34, 35, 36, 36, 37, 38, 38, 39, 40, 40, 41, 42, 42, + 43, 44, 44, 45, 46, 46, 47, 48, 48, 49, 50, 51, 51, 52, 53, 53, + 54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 61, 61, 62, 63, 63, 64, + 65, 65, 66, 67, 68, 68, 69, 70, 70, 71, 72, 72, 73, 74, 74, 75, + 76, 76, 77, 78, 78, 79, 80, 80, 81, 82, 83, 83, 84, 85, 85, 86, + 87, 87, 88, 89, 89, 90, 91, 91, 92, 93, 93, 94, 95, 95, 96, 97, + 97, 98, 99,100,100,101,102,102,103,104,104,105,106,106,107,108, + 108,109,110,110,111,112,112,113,114,114,115,116,117,117,118,119, + 119,120,121,121,122,123,123,124,125,125,126,127,127,128,129,129, + 130,131,131,132,133,134,134,135,136,136,137,138,138,139,140,140, + 141,142,142,143,144,144,145,146,146,147,148,148,149,150,151,151, + 152,153,153,154,155,155,156,157,157,158,159,159,160,161,161,162, + 163,163,164,165,166,166,167,168,168,169,170,170,171,172,172,173, +}; +// VDP transfer slots in inactive (blanked) display 40col mode. +// refresh slots: 250, 26, 58, 90, 122, 154 -> 40, 72, 104, 136, 168, 200 +const unsigned char vdpcyc2sl_40_bl[] = { // 68k cycles/2 to slot # +// 0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30 + 0, 0, 1, 2, 3, 4, 5, 5, 6, 7, 8, 9, 10, 10, 11, 12, + 13, 14, 15, 15, 16, 17, 18, 19, 20, 20, 21, 22, 23, 24, 25, 25, + 26, 27, 28, 29, 30, 30, 31, 32, 33, 34, 35, 35, 36, 37, 38, 39, + 40, 40, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 51, 52, + 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 61, 62, 63, 64, 65, 66, + 66, 67, 68, 69, 70, 71, 71, 72, 73, 74, 75, 76, 76, 77, 78, 79, + 80, 81, 81, 82, 83, 84, 85, 86, 86, 87, 88, 89, 90, 91, 91, 92, + 93, 94, 95, 96, 96, 97, 98, 99,100,101,102,102,103,104,105,106, + 107,107,108,109,110,111,112,112,113,114,115,116,117,117,118,119, + 120,121,122,122,123,124,125,126,127,127,128,129,130,131,132,132, + 133,134,135,136,137,137,138,139,140,141,142,142,143,144,145,146, + 147,147,148,149,150,151,152,153,153,154,155,156,157,158,158,159, + 160,161,162,163,163,164,165,166,167,168,168,169,170,171,172,173, + 173,174,175,176,177,178,178,179,180,181,182,183,183,184,185,186, + 187,188,188,189,190,191,192,193,193,194,195,196,197,198,198,199, + 200,201,202,203,204,204,205,206,207,208,209,209,210,211,212,213, +}; +// VDP transfer slots in active display 32col mode. Transfer slots (Hint=0): +// 11,25,40,48,56,72,80,88,104,112,120,136,144,152,167,168 +const unsigned char vdpcyc2sl_32[] = { // 68k cycles/2 to slot # +// 0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, +}; +// VDP transfer slots in active display 40col mode. Transfer slots (Hint=0): +// 21,47,55,63,79,87,95,111,119,127,143,151,159,175,183,191,206,207 +const unsigned char vdpcyc2sl_40[] = { // 68k cycles/2 to slot # +// 0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0 + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, // 32 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 64 + 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 96 + 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, // 128 + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, // 160 + 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, // 192 + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 224 + 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, // 256 + 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 288 + 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, // 320 + 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, // 352 + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, // 384 + 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, // 416 + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 17, // 448 + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, // 480 +}; + +// XXX inactive tables by cyc=slot#*488/maxslot#. should be through hv tables +const unsigned short vdpsl2cyc_32_bl[] = { // slot # to 68k cycles/2 + 0, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 23, + 24, 25, 27, 28, 30, 31, 33, 34, 36, 37, 39, 40, 42, 43, 45, 46, + 48, 49, 50, 52, 53, 55, 56, 58, 59, 61, 62, 64, 65, 67, 68, 70, + 71, 73, 74, 75, 77, 78, 80, 81, 83, 84, 86, 87, 89, 90, 92, 93, + 95, 96, 98, 99,100,102,103,105,106,108,109,111,112,114,115,117, + 118,120,121,122,124,125,127,128,130,131,133,134,136,137,139,140, + 142,143,145,146,147,149,150,152,153,155,156,158,159,161,162,164, + 165,167,168,170,171,172,174,175,177,178,180,181,183,184,186,187, + 189,190,192,193,195,196,197,199,200,202,203,205,206,208,209,211, + 212,214,215,217,218,220,221,222,224,225,227,228,230,231,233,234, + 236,237,239,240,242,243,244,246, +}; +const unsigned short vdpsl2cyc_40_bl[] = { // slot # to 68k cycles/2 + 0, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, + 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 32, 33, 34, 35, 36, 38, + 39, 40, 41, 42, 44, 45, 46, 47, 48, 50, 51, 52, 53, 54, 56, 57, + 58, 59, 60, 61, 63, 64, 65, 66, 67, 69, 70, 71, 72, 73, 75, 76, + 77, 78, 79, 81, 82, 83, 84, 85, 87, 88, 89, 90, 91, 93, 94, 95, + 96, 97, 99,100,101,102,103,105,106,107,108,109,111,112,113,114, + 115,117,118,119,120,121,122,124,125,126,127,128,130,131,132,133, + 134,136,137,138,139,140,142,143,144,145,146,148,149,150,151,152, + 154,155,156,157,158,160,161,162,163,164,166,167,168,169,170,172, + 173,174,175,176,178,179,180,181,182,183,185,186,187,188,189,191, + 192,193,194,195,197,198,199,200,201,203,204,205,206,207,209,210, + 211,212,213,215,216,217,218,219,221,222,223,224,225,227,228,229, + 230,231,233,234,235,236,237,239,240,241,242,243,244,246, +}; +const unsigned short vdpsl2cyc_32[] = { // slot # to 68k cycles/2 + 0, 16, 36, 56, 67, 79,102,113,125,148,159,171,194,205,217,239, + 240,260 +}; +const unsigned short vdpsl2cyc_40[] = { // slot # to 68k cycles/2 + 0, 24, 55, 64, 73, 92,101,110,129,138,147,166,175,184,203,212, + 221,239,240,268 +}; + #ifndef _ASM_MISC_C PICO_INTERNAL_ASM void memcpy16bswap(unsigned short *dest, void *src, int count) { diff --git a/pico/pico.c b/pico/pico.c index 9db2fc64..87e22e59 100644 --- a/pico/pico.c +++ b/pico/pico.c @@ -79,6 +79,7 @@ void PicoPower(void) Pico.video.reg[0] = Pico.video.reg[1] = 0x04; Pico.video.reg[0xc] = 0x81; Pico.video.reg[0xf] = 0x02; + PicoVideoFIFOMode(0, 1); if (PicoIn.AHW & PAHW_MCD) PicoPowerMCD(); diff --git a/pico/pico_cmn.c b/pico/pico_cmn.c index 50a632ca..017c404b 100644 --- a/pico/pico_cmn.c +++ b/pico/pico_cmn.c @@ -179,6 +179,7 @@ static int PicoFrameHints(void) } pv->status |= SR_VB | PVS_VB2; // go into vblank + PicoVideoFIFOMode(pv->reg[1]&0x40, pv->reg[12]&1); // the following SekRun is there for several reasons: // there must be a delay after vblank bit is set and irq is asserted (Mazin Saga) @@ -270,6 +271,7 @@ static int PicoFrameHints(void) pv->status &= ~(SR_VB | PVS_VB2); pv->status |= ((pv->reg[1] >> 3) ^ SR_VB) & SR_VB; // forced blanking + PicoVideoFIFOMode(pv->reg[1]&0x40, pv->reg[12]&1); // last scanline Pico.m.scanline = y++; diff --git a/pico/pico_int.h b/pico/pico_int.h index 65b56f1d..c0f2c343 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -299,6 +299,8 @@ extern SH2 sh2s[2]; #define PVS_CPUWR (1 << 18) // CPU write blocked by FIFO full #define PVS_CPURD (1 << 19) // CPU read blocked by FIFO not empty #define PVS_DMAFILL (1 << 20) // DMA fill is waiting for fill data +#define PVS_DMABG (1 << 21) // background DMA operation is running +#define PVS_FIFORUN (1 << 22) // FIFO is processing struct PicoVideo { @@ -858,6 +860,7 @@ unsigned char PicoVideoRead8HV_L(void); extern int (*PicoDmaHook)(unsigned int source, int len, unsigned short **base, unsigned int *mask); void PicoVideoFIFOSync(int cycles); int PicoVideoFIFOHint(void); +void PicoVideoFIFOMode(int active, int h40); int PicoVideoFIFOWrite(int count, int byte_p, unsigned sr_mask, unsigned sr_flags); void PicoVideoSave(void); void PicoVideoLoad(void); diff --git a/pico/videoport.c b/pico/videoport.c index cbcea796..3ed7f5b4 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -12,8 +12,11 @@ #define NEED_DMA_SOURCE #include "memory.h" -extern const unsigned char hcounts_32[]; -extern const unsigned char hcounts_40[]; +extern const unsigned char hcounts_32[], hcounts_40[]; +extern const unsigned char vdpcyc2sl_32_bl[], vdpcyc2sl_40_bl[]; +extern const unsigned char vdpcyc2sl_32[], vdpcyc2sl_40[]; +extern const unsigned short vdpsl2cyc_32_bl[], vdpsl2cyc_40_bl[]; +extern const unsigned short vdpsl2cyc_32[], vdpsl2cyc_40[]; static int blankline; // display disabled for this line static unsigned sat; // VRAM addr of sprite attribute table @@ -53,48 +56,6 @@ int (*PicoDmaHook)(unsigned int source, int len, unsigned short **base, unsigned * FIFORead executes a 68k read. 68k is blocked until the next transfer slot. */ -// FIFO transfer slots per line: [active][h40] -static const short vdpslots[2][2] = {{ 166, 204 },{ 16, 18 }}; -// mapping between slot# and 68k cycles in a blanked scanline [H32, H40] -static const int vdpcyc2sl_bl[] = { (166<<16)/488, (204<<16)/488 }; -static const int vdpsl2cyc_bl[] = { (488<<16)/166, (488<<16)/204 }; - -// VDP transfer slots in active display 32col mode. 1 slot is 488/171 = 2.8538 -// 68k cycles. Only 16 of the 171 slots in a scanline can be used by CPU/DMA: -// (HINT=slot 0): 11,25,40,48,56,72,80,88,104,112,120,136,144,152,167,168 -static const unsigned char vdpcyc2sl_32[] = { // 68k cycles/4 to slot # -// 4 8 12 16 20 24 28 32 36 40 44 48 52 56 60 - 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, - 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9,10, -10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,11, -11,12,12,12,12,12,13,13,13,13,13,13,14,14,14,14, -14,14,14,14,14,14,14,15,16,16,16,16,16,16,16,16, -}; -static const unsigned char vdpsl2cyc_32[] = { // slot # to 68k cycles/4 - 0, 8, 18, 28, 33, 39, 51, 56, 62, 74, 79, 85, 97,102,108,119,120,130 -}; - -// VDP transfer slots in active display 40col mode. 1 slot is 488/210 = 2.3238 -// 68k cycles. Only 18 of the 210 slots in a scanline can be used by CPU/DMA: -// (HINT=0): 21,47,55,63,79,87,95,111,119,127,143,151,159,175,183,191,206,207, -static const unsigned char vdpcyc2sl_40[] = { // 68k cycles/4 to slot # -// 4 8 12 16 20 24 28 32 36 40 44 48 52 56 60 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, - 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, - 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 8, 8, 8, 8, 8, 9, 9, 9, 9,10,10,10,10,10,10,10, -10,10,10,11,11,11,11,12,12,12,12,12,13,13,13,13, -13,13,13,13,13,14,14,14,14,14,15,15,15,15,15,16, -16,16,16,16,16,16,16,17,18,18,18,18,18,18,18,18, -}; -static const unsigned char vdpsl2cyc_40[] = { // slot # to 68k cycles/4 - 0, 12, 27, 32, 36, 46, 50, 55, 64, 69, 73, 83, 87, 92,101,106,111,119,120,134 -}; - // NB code assumes fifo_* arrays have size 2^n // last transferred FIFO data, ...x = index XXX currently only CPU static short fifo_data[4], fifo_dx; // XXX must go into save? @@ -106,34 +67,10 @@ enum { FQ_BYTE = 1, FQ_BGDMA = 2, FQ_FGDMA = 4 }; // queue flags, NB: BYTE = 1! static unsigned int fifo_total; // total# of pending FIFO entries (w/o BGDMA) static unsigned short fifo_slot; // last executed slot in current scanline +static unsigned short fifo_maxslot;// #slots in scanline -// map cycles to FIFO slot -static __inline int GetFIFOSlot(struct PicoVideo *pv, int cycles) -{ - int active = !(pv->status & SR_VB) && (pv->reg[1] & 0x40); - int h40 = pv->reg[12] & 1; - - if (active) return (h40 ? vdpcyc2sl_40 : vdpcyc2sl_32)[cycles/4]; - else return (cycles * vdpcyc2sl_bl[h40] + cycles) >> 16; -} - -static __inline int GetMaxFIFOSlot(struct PicoVideo *pv) -{ - int active = !(pv->status & SR_VB) && (pv->reg[1] & 0x40); - int h40 = pv->reg[12] & 1; - - return vdpslots[active][h40]; -} - -// map FIFO slot to cycles -static __inline int GetFIFOCycles(struct PicoVideo *pv, int slot) -{ - int active = !(pv->status & SR_VB) && (pv->reg[1] & 0x40); - int h40 = pv->reg[12] & 1; - - if (active) return (h40 ? vdpsl2cyc_40 : vdpsl2cyc_32)[slot]*4; - else return ((slot * vdpsl2cyc_bl[h40] + slot) >> 16); -} +static const unsigned char *fifo_cyc2sl; +static const unsigned short *fifo_sl2cyc; // do the FIFO math static __inline int AdvanceFIFOEntry(struct PicoVideo *pv, int slots) @@ -149,20 +86,16 @@ static __inline int AdvanceFIFOEntry(struct PicoVideo *pv, int slots) // if entry has been processed... if (pv->fifo_cnt == 0) { - if (fifo_ql) { - // terminate DMA if applicable - if ((pv->status & SR_DMA) && (fifo_queue[fifo_qx] & FQ_BGDMA)) { - pv->status &= ~SR_DMA; - pv->command &= ~0x80; - } - // remove entry from FIFO + // remove entry from FIFO + if (fifo_ql) fifo_qx ++, fifo_qx &= 7, fifo_ql --; - } // start processing for next entry if there is one if (fifo_ql) pv->fifo_cnt = (fifo_queue[fifo_qx] >> 3) << (fifo_queue[fifo_qx] & FQ_BYTE); - else + else { // FIFO empty + pv->status &= ~PVS_FIFORUN; fifo_total = 0; + } } return l; } @@ -170,16 +103,20 @@ static __inline int AdvanceFIFOEntry(struct PicoVideo *pv, int slots) static __inline void SetFIFOState(struct PicoVideo *pv) { // release CPU and terminate DMA if FIFO isn't blocking the 68k anymore - if (fifo_total == 0) - pv->status &= ~PVS_CPURD; if (fifo_total <= 4) { - int x = (fifo_qx + fifo_ql - 1) & 7; - if ((pv->status & SR_DMA) && !(pv->status & PVS_DMAFILL) && - (!fifo_ql || !(fifo_queue[x] & FQ_BGDMA))) { + pv->status &= ~PVS_CPUWR; + if (!(pv->status & (PVS_DMABG|PVS_DMAFILL))) { pv->status &= ~SR_DMA; pv->command &= ~0x80; } - pv->status &= ~PVS_CPUWR; + } + if (fifo_total == 0) { + pv->status &= ~PVS_CPURD; + // terminate DMA if applicable + if (!(pv->status & (PVS_FIFORUN|PVS_DMAFILL))) { + pv->status &= ~(SR_DMA|PVS_DMABG); + pv->command &= ~0x80; + } } } @@ -190,7 +127,7 @@ void PicoVideoFIFOSync(int cycles) int slots, done; // calculate #slots since last executed slot - slots = GetFIFOSlot(pv, cycles) - fifo_slot; + slots = fifo_cyc2sl[cycles>>1] - fifo_slot; // advance FIFO queue by #done slots done = slots; @@ -208,31 +145,28 @@ void PicoVideoFIFOSync(int cycles) int PicoVideoFIFODrain(int level, int cycles, int bgdma) { struct PicoVideo *pv = &Pico.video; - int maxsl = GetMaxFIFOSlot(pv); // max xfer slots in this scanline + unsigned ocyc = cycles; int burn = 0; // process FIFO entries until low level is reached - while (fifo_total > level && fifo_slot < maxsl && + while (fifo_total > level && fifo_slot < fifo_maxslot && (!(fifo_queue[fifo_qx] & FQ_BGDMA) || bgdma)) { int b = fifo_queue[fifo_qx] & FQ_BYTE; int cnt = ((fifo_total-level) << b) - (pv->fifo_cnt & b); - int last = fifo_slot; - int slot = (pv->fifo_cnt < cnt ? pv->fifo_cnt : cnt) + last; // target slot - unsigned ocyc = cycles; + int slot = (pv->fifo_cntfifo_cnt:cnt) + fifo_slot; // target slot - if (slot > maxsl) { + if (slot > fifo_maxslot) { // target in later scanline, advance to eol - slot = maxsl; + slot = fifo_maxslot; cycles = 488; } else { // advance FIFO to target slot and CPU to cycles at that slot - cycles = GetFIFOCycles(pv, slot); + cycles = fifo_sl2cyc[slot]<<1; } + AdvanceFIFOEntry(pv, slot - fifo_slot); fifo_slot = slot; - burn += cycles - ocyc; - - AdvanceFIFOEntry(pv, slot - last); } + burn = cycles - ocyc; SetFIFOState(pv); @@ -246,17 +180,19 @@ int PicoVideoFIFORead(void) int lc = SekCyclesDone()-Pico.t.m68c_line_start; int burn = 0; - PicoVideoFIFOSync(lc); + if (pv->fifo_cnt) { + PicoVideoFIFOSync(lc); + // advance FIFO and CPU until FIFO is empty + burn = PicoVideoFIFODrain(0, lc, 1); + lc += burn; + } - // advance FIFO and CPU until FIFO is empty - burn = PicoVideoFIFODrain(0, lc, 1); - lc += burn; if (fifo_total > 0) pv->status |= PVS_CPURD; // target slot is in later scanline else { // use next VDP access slot for reading, block 68k until then - fifo_slot = GetFIFOSlot(pv, lc) + 1; - burn += GetFIFOCycles(pv, fifo_slot) - lc; + fifo_slot = fifo_cyc2sl[lc>>1] + 1; + burn += (fifo_sl2cyc[fifo_slot]<<1) - lc; } return burn; @@ -267,35 +203,41 @@ int PicoVideoFIFOWrite(int count, int flags, unsigned sr_mask,unsigned sr_flags) { struct PicoVideo *pv = &Pico.video; int lc = SekCyclesDone()-Pico.t.m68c_line_start; - int burn = 0, x; + int burn = 0, x, head = 0; - PicoVideoFIFOSync(lc); + if (pv->fifo_cnt) + PicoVideoFIFOSync(lc); pv->status = (pv->status & ~sr_mask) | sr_flags; if (count && fifo_ql < 8) { // update FIFO state if it was empty if (fifo_ql == 0) { - fifo_slot = GetFIFOSlot(pv, lc+9); // FIFO latency ~3 vdp slots + fifo_slot = fifo_cyc2sl[(lc+8)>>1]; // FIFO latency ~3 vdp slots pv->fifo_cnt = count << (flags & FQ_BYTE); + pv->status |= PVS_FIFORUN; } - // create xfer queue entry + // determine queue position for entry x = (fifo_qx + fifo_ql - 1) & 7; if (fifo_ql && (fifo_queue[x] & FQ_BGDMA)) { // CPU FIFO writes have priority over a background DMA Fill/Copy fifo_queue[(x+1) & 7] = fifo_queue[x]; - if (fifo_ql == 1) { + if (x == fifo_qx) { // overtaking to queue head? // XXX if interrupting a DMA fill, fill data changes int f = fifo_queue[x] & 7; fifo_queue[(x+1) & 7] = (pv->fifo_cnt >> (f & FQ_BYTE) << 3) | f; pv->fifo_cnt = count << (flags & FQ_BYTE); + head = 1; } x = (x-1) & 7; } - if (fifo_ql && (fifo_queue[x] & 7) == flags) { + + // create xfer queue entry + if (fifo_ql && !head && (fifo_queue[x] & 7) == flags) { // amalgamate entries if of same type fifo_queue[x] += (count << 3); - if (fifo_ql == 1) pv->fifo_cnt += count << (flags & FQ_BYTE); + if (x == fifo_qx) // modifiying fifo head, adjust count + pv->fifo_cnt += count << (flags & FQ_BYTE); } else { fifo_ql ++; x = (x+1) & 7; @@ -331,20 +273,25 @@ int PicoVideoFIFOHint(void) } // switch FIFO mode between active/inactive display -static void PicoVideoFIFOMode(int active) +void PicoVideoFIFOMode(int active, int h40) { + static const unsigned char *vdpcyc2sl[2][2] = + { {vdpcyc2sl_32_bl, vdpcyc2sl_40_bl} , {vdpcyc2sl_32, vdpcyc2sl_40} }; + static const unsigned short *vdpsl2cyc[2][2] = + { {vdpsl2cyc_32_bl, vdpsl2cyc_40_bl} , {vdpsl2cyc_32, vdpsl2cyc_40} }; + struct PicoVideo *pv = &Pico.video; - int h40 = pv->reg[12] & 1; int lc = SekCyclesDone() - Pico.t.m68c_line_start; + active = active && !(pv->status & PVS_VB2); - PicoVideoFIFOSync(lc); + if (fifo_maxslot) + PicoVideoFIFOSync(lc); - if (fifo_ql) { - // recalculate FIFO slot for new mode - if (!(pv->status & SR_VB) && active) - fifo_slot = (pv->reg[12]&1 ? vdpcyc2sl_40 : vdpcyc2sl_32)[lc/4]; - else fifo_slot = ((lc * vdpcyc2sl_bl[h40] + lc) >> 16); - } + fifo_cyc2sl = vdpcyc2sl[active][h40]; + fifo_sl2cyc = vdpsl2cyc[active][h40]; + // recalculate FIFO slot for new mode + fifo_slot = fifo_cyc2sl[lc>>1]-1; + fifo_maxslot = fifo_cyc2sl[488>>1]; } @@ -459,7 +406,7 @@ static void DmaSlow(int len, unsigned int source) SekCyclesDone(), SekPc); SekCyclesBurnRun(PicoVideoFIFOWrite(len, FQ_FGDMA | (Pico.video.type == 1), - 0, SR_DMA| PVS_CPUWR)); + PVS_DMABG, SR_DMA | PVS_CPUWR)); if ((source & 0xe00000) == 0xe00000) { // Ram base = (u16 *)PicoMem.ram; @@ -583,13 +530,13 @@ static void DmaCopy(int len) int source; elprintf(EL_VDPDMA, "DmaCopy len %i [%u]", len, SekCyclesDone()); + // XXX implement VRAM 128k? Is this even working? xfer/count still FQ_BYTE? SekCyclesBurnRun(PicoVideoFIFOWrite(len, FQ_BGDMA | FQ_BYTE, - PVS_CPUWR, SR_DMA)); + PVS_CPUWR, SR_DMA | PVS_DMABG)); source =Pico.video.reg[0x15]; source|=Pico.video.reg[0x16]<<8; - // XXX implement VRAM 128k? Is this even working? count still in bytes? for (; len; len--) { vr[(u16)a] = vr[(u16)(source++)]; @@ -616,7 +563,7 @@ static NOINLINE void DmaFill(int data) elprintf(EL_VDPDMA, "DmaFill len %i inc %i [%u]", len, inc, SekCyclesDone()); SekCyclesBurnRun(PicoVideoFIFOWrite(len, FQ_BGDMA | (Pico.video.type == 1), - PVS_CPUWR | PVS_DMAFILL, SR_DMA)); + PVS_CPUWR | PVS_DMAFILL, SR_DMA | PVS_DMABG)); switch (Pico.video.type) { @@ -823,11 +770,13 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) if (num == 0 && !(pvid->reg[0]&2) && (d&2)) pvid->hv_latch = PicoVideoRead(0x08); if (num == 1 && ((pvid->reg[1]^d)&0x40)) { - PicoVideoFIFOMode(d & 0x40); + PicoVideoFIFOMode(d & 0x40, pvid->reg[12]&1); // handle line blanking before line rendering if (SekCyclesDone() - Pico.t.m68c_line_start <= 488-390) blankline = d&0x40 ? -1 : Pico.m.scanline; } + if (num == 12 && ((pvid->reg[12]^d)&0x01)) + PicoVideoFIFOMode(pvid->reg[1]&0x40, d & 1); DrawSync(SekCyclesDone() - Pico.t.m68c_line_start <= 488-390); pvid->reg[num]=(unsigned char)d; switch (num) @@ -1058,6 +1007,7 @@ void PicoVideoLoad(void) // convert former dma_xfers (why was this in PicoMisc anyway?) if (Pico.m.dma_xfers) { + pv->status = SR_DMA|PVS_FIFORUN; pv->fifo_cnt = Pico.m.dma_xfers * (pv->type == 1 ? 2 : 1); fifo_total = Pico.m.dma_xfers; Pico.m.dma_xfers = 0; From 6dd553c7a81f7c89f76204bee119c5d848579804 Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 19 Mar 2020 22:45:06 +0100 Subject: [PATCH 0290/1110] vdp rendering fixes (debug register, vscroll) for overdrive 2 --- pico/draw.c | 258 +++++++++++++++++++++++++++++++++++++++--------- pico/draw_arm.S | 8 +- 2 files changed, 218 insertions(+), 48 deletions(-) diff --git a/pico/draw.c b/pico/draw.c index eeeb553f..dff5e075 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -29,6 +29,7 @@ */ #include "pico_int.h" +#define FORCE // layer forcing via debug register? int (*PicoScanBegin)(unsigned int num) = NULL; int (*PicoScanEnd) (unsigned int num) = NULL; @@ -222,6 +223,7 @@ TileFlipMakerAS(TileFlipSH_AS_onlyop_lp, pix_sh_as_onlyop) TileNormMakerAS(TileNormAS_onlymark, pix_sh_as_onlymark) TileFlipMakerAS(TileFlipAS_onlymark, pix_sh_as_onlymark) +#ifdef FORCE // forced both layer draw (through debug reg) #define pix_and(x) \ pd[x] = (pd[x] & 0xc0) | (pd[x] & (pal | t)) @@ -230,12 +232,16 @@ TileNormMaker(TileNorm_and, pix_and) TileFlipMaker(TileFlip_and, pix_and) // forced sprite draw (through debug reg) -#define pix_sh_and(x) /* XXX is there S/H with forced draw? */ \ - if (t>=0xe) pd[x]=(pd[x]&0x3f)|(t<<6); /* c0 shadow, 80 hilight */ \ - else pd[x] = (pd[x] & 0xc0) | (pd[x] & (pal | t)) +#define pix_sh_as_and(x) /* XXX is there S/H with forced draw? */ \ + if (m & (1<<(x+8))) { \ + m &= ~(1<<(x+8)); \ + if (t>=0xe) pd[x]=(pd[x]&0x3f)|(t<<6); /* c0 shadow, 80 hilight */ \ + else pd[x] = (pd[x] & 0xc0) | (pd[x] & (pal | t)); \ + } -TileNormMaker(TileNormSH_and, pix_sh_and) -TileFlipMaker(TileFlipSH_and, pix_sh_and) +TileNormMakerAS(TileNormSH_AS_and, pix_sh_as_and) +TileFlipMakerAS(TileFlipSH_AS_and, pix_sh_as_and) +#endif // -------------------------------------------- @@ -311,6 +317,7 @@ static void DrawStripVSRam(struct TileStrip *ts, int plane_sh, int cellskip) int adj = ((ts->hscroll ^ dx) >> 3) & 1; cell -= adj + 1; ts->cells -= adj; + PicoMem.vsram[0x3e] = PicoMem.vsram[0x3f] = plane_sh >> 16; } cell+=cellskip; tilex+=cellskip; @@ -479,7 +486,7 @@ static void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells, // shit, we have 2-cell column based vscroll // luckily this doesn't happen too often ts.line=ymask|(shift[width]<<24); // save some stuff instead of line - PicoMem.vsram[(plane_sh & 1)+0x3e] = PicoMem.vsram[0x27]; // XXX really? + plane_sh |= PicoMem.vsram[0x26+(~plane_sh&1)] << 16; DrawStripVSRam(&ts, plane_sh, cellskip); } else { vscroll = PicoMem.vsram[plane_sh & 1]; // Get vertical scroll value @@ -778,28 +785,6 @@ static void DrawSprite(int *sprite, int sh, int w) } #endif -static NOINLINE void DrawTilesFromCacheForced(const int *hc) -{ - unsigned char *pd = Pico.est.HighCol; - int code, addr, dx; - unsigned int pack; - int pal; - - // *ts->hc++ = code | (dx<<16) | (ty<<25); - while ((code = *hc++)) { - // Get tile address/2: - addr = (code & 0x7ff) << 4; - addr += (code >> 25) & 0x0e; // y offset into tile - - dx = (code >> 16) & 0x1ff; - pal = ((code >> 9) & 0x30); - pack = *(unsigned int *)(PicoMem.vram + addr); - - if (code & 0x0800) TileFlip_and(pd + dx, pack, pal); - else TileNorm_and(pd + dx, pack, pal); - } -} - static void DrawSpriteInterlace(unsigned int *sprite) { unsigned char *pd = Pico.est.HighCol; @@ -1040,16 +1025,181 @@ static void DrawSpritesHiAS(unsigned char *sprited, int sh) } } +#ifdef FORCE +static void DrawStripForced(struct TileStrip *ts, int lflags, int cellskip) +{ + unsigned char *pd = Pico.est.HighCol; + int tilex,dx,ty,code=0,addr=0,cells; + int oldcode=-1; + int pal=0,sh; + + // Draw tiles across screen: + sh = (lflags & LF_SH) << 5; // 0x40 + tilex=((-ts->hscroll)>>3)+cellskip; + ty=(ts->line&7)<<1; // Y-Offset into tile + dx=((ts->hscroll-1)&7)+1; + cells = ts->cells - cellskip; + if(dx != 8) cells++; // have hscroll, need to draw 1 cell more + dx+=cellskip<<3; + + for (; cells > 0; dx+=8, tilex++, cells--) + { + unsigned int pack; + + code = PicoMem.vram[ts->nametab + (tilex & ts->xmask)]; + + if (code!=oldcode) { + oldcode = code; + // Get tile address/2: + addr=(code&0x7ff)<<4; + addr+=ty; + if (code&0x1000) addr^=0xe; // Y-flip + + pal=((code>>9)&0x30)|sh; + } + + pack = *(unsigned int *)(PicoMem.vram + addr); + + if (code & 0x0800) TileFlip_and(pd + dx, pack, pal); + else TileNorm_and(pd + dx, pack, pal); + } +} + +// this is messy +static void DrawStripVSRamForced(struct TileStrip *ts, int plane_sh, int cellskip) +{ + unsigned char *pd = Pico.est.HighCol; + int tilex,dx,code=0,addr=0,cell=0; + int oldcode=-1; + int pal=0,scan=Pico.est.DrawScanline; + + // Draw tiles across screen: + tilex=(-ts->hscroll)>>3; + dx=((ts->hscroll-1)&7)+1; + if (ts->hscroll & 0x0f) { + int adj = ((ts->hscroll ^ dx) >> 3) & 1; + cell -= adj + 1; + ts->cells -= adj; + PicoMem.vsram[0x3e] = PicoMem.vsram[0x3f] = plane_sh >> 16; + } + cell+=cellskip; + tilex+=cellskip; + dx+=cellskip<<3; + + for (; cell < ts->cells; dx+=8,tilex++,cell++) + { + int nametabadd, ty; + unsigned int pack; + + //if((cell&1)==0) + { + int line,vscroll; + vscroll=PicoMem.vsram[(plane_sh&1)+(cell&0x3e)]; + + // Find the line in the name table + line=(vscroll+scan)&ts->line&0xffff; // ts->line is really ymask .. + nametabadd=(line>>3)<<(ts->line>>24); // .. and shift[width] + ty=(line&7)<<1; // Y-Offset into tile + } + + code=PicoMem.vram[ts->nametab+nametabadd+(tilex&ts->xmask)]; + + if (code!=oldcode) { + oldcode = code; + // Get tile address/2: + addr=(code&0x7ff)<<4; + + pal=((code>>9)&0x30)|((plane_sh<<5)&0x40); + } + + if (code & 0x1000) ty ^= 0xe; // Y-flip + pack = *(unsigned int *)(PicoMem.vram + addr+ty); + + if (code & 0x0800) TileFlip_and(pd + dx, pack, pal); + else TileNorm_and(pd + dx, pack, pal); + } +} + +static void DrawLayerForced(int plane_sh, int cellskip, int maxcells, + struct PicoEState *est) +{ + struct PicoVideo *pvid=&Pico.video; + const char shift[4]={5,6,5,7}; // 32,64 or 128 sized tilemaps (2 is invalid) + struct TileStrip ts; + int width, height, ymask; + int vscroll, htab; + + ts.cells=maxcells; + + // Work out the TileStrip to draw + + // Work out the name table size: 32 64 or 128 tiles (0-3) + width=pvid->reg[16]; + height=(width>>4)&3; width&=3; + + ts.xmask=(1<reg[4]&0x07)<<12; // B + else ts.nametab=(pvid->reg[2]&0x38)<< 9; // A + + htab=pvid->reg[13]<<9; // Horizontal scroll table address + switch (pvid->reg[11]&3) { + case 1: htab += (est->DrawScanline<<1) & 0x0f; break; + case 2: htab += (est->DrawScanline<<1) & ~0x0f; break; // Offset by tile + case 3: htab += (est->DrawScanline<<1); break; // Offset by line + } + htab+=plane_sh&1; // A or B + + // Get horizontal scroll value, will be masked later + ts.hscroll = PicoMem.vram[htab & 0x7fff]; + + if((pvid->reg[12]&6) == 6) { + // interlace mode 2 + vscroll = PicoMem.vsram[plane_sh & 1]; // Get vertical scroll value + + // Find the line in the name table + ts.line=(vscroll+(est->DrawScanline<<1))&((ymask<<1)|1); + ts.nametab+=(ts.line>>4)<reg[11]&4) { + // shit, we have 2-cell column based vscroll + // luckily this doesn't happen too often + ts.line=ymask|(shift[width]<<24); // save some stuff instead of line + plane_sh |= PicoMem.vsram[0x26+(~plane_sh&1)] << 16; + DrawStripVSRamForced(&ts, plane_sh, cellskip); + } else { + vscroll = PicoMem.vsram[plane_sh & 1]; // Get vertical scroll value + + // Find the line in the name table + ts.line=(vscroll+est->DrawScanline)&ymask; + ts.nametab+=(ts.line>>3)<>9)&0x30; - if (code&0x800) fTileFunc = TileFlipSH_and; - else fTileFunc = TileNormSH_and; + if (code&0x800) fTileFunc = TileFlipSH_AS_and; + else fTileFunc = TileNormSH_AS_and; // parse remaining sprite data sy=sprite[0]; @@ -1087,7 +1237,8 @@ static void DrawSpritesForced(unsigned char *sprited) delta<<=4; // Delta of address if (entry+1 == cnt) width = p[entry+1]; // last sprite width limited? - for (; width; width--,sx+=8,tile+=delta) + mp = mb+(sx>>3); + for (m = *mp; width; width--, sx+=8, *mp++ = m, m >>= 8, tile+=delta) { unsigned int pack; @@ -1095,10 +1246,25 @@ static void DrawSpritesForced(unsigned char *sprited) if(sx>=328) break; // Offscreen pack = *(unsigned int *)(PicoMem.vram + (tile & 0x7fff)); - fTileFunc(pd + sx, pack, pal); - } + + m |= mp[1] << 8; // next mask byte + // shift mask bits to bits 8-15 for easier load/store handling + m = fTileFunc(pd + sx, m << (8-(sx&0x7)), pack, pal) >> (8-(sx&0x7)); + } + *mp = m; // write last mask byte } + + // anything not covered by a sprite is off (XXX or bg?) + for (cnt = 1; cnt < sizeof(mb)-1; cnt++) + if (mb[cnt] == 0xff) + for (m = 0; m < 8; m++) + pd[8*cnt+m] = 0; + else if (mb[cnt]) + for (m = 0; m < 8; m++) + if (mb[cnt] & (1<debug_p & PVD_KILL_B)) { lflags = LF_PLANE_1 | (sh << 1); - if (pvid->debug_p & PVD_FORCE_B) - lflags |= LF_FORCE; DrawLayer(lflags, HighCacheB, 0, maxcells, est); } /* - layer A low - */ lflags = 0 | (sh << 1); - if (pvid->debug_p & PVD_FORCE_A) - lflags |= LF_FORCE; if (pvid->debug_p & PVD_KILL_A) ; else if (hvwind == 1) @@ -1516,12 +1678,16 @@ static int DrawDisplay(int sh) else if (sprited[1] & SPRL_HAVE_HI) DrawAllSprites(sprited, 1, 0, est); - if (pvid->debug_p & PVD_FORCE_B) - DrawTilesFromCacheForced(HighCacheB); - else if (pvid->debug_p & PVD_FORCE_A) - DrawTilesFromCacheForced(HighCacheA); - else if (pvid->debug_p & PVD_FORCE_S) +#ifdef FORCE + if (pvid->debug_p & PVD_FORCE_B) { + lflags = LF_PLANE_1 | (sh << 1); + DrawLayerForced(lflags, 0, maxcells, est); + } else if (pvid->debug_p & PVD_FORCE_A) { + lflags = (sh << 1); + DrawLayerForced(lflags, 0, maxcells, est); + } else if (pvid->debug_p & PVD_FORCE_S) DrawSpritesForced(sprited); +#endif #if 0 { diff --git a/pico/draw_arm.S b/pico/draw_arm.S index 8dc660c2..1a0f3513 100644 --- a/pico/draw_arm.S +++ b/pico/draw_arm.S @@ -545,8 +545,12 @@ DrawLayer: eor r3, r3, r7 sub r10,r10, #1<<24 @ cell-- // start from negative for hscroll tst r3, #0x08 + add_c24 r1, lr, (OFS_PMEM_vsram-OFS_PMEM_vram) + ldr r3, [r1, #0x4c] @ r3=vsram[0x26..0x27] subne r10,r10, #1<<16 @ cells-- subne r10,r10, #1<<24 @ cell-- // even more negative + ror r3, r3, #16 + str r3, [r1, #0x7c] @ vsram[0x3e..0x3f]=r3 0: tst r9, #1<<31 mov r3, #0 @@ -577,8 +581,8 @@ DrawLayer: @ calc offset and read tileline code to r7, also calc ty add_c24 r7, lr, (OFS_PMEM_vsram-OFS_PMEM_vram) - add r7, r7, r10,asr #23 @ vsram + ((cell&~1)<<1) - bic r7, r7, #3 + and r4, r10, #0x3e000000 + add r7, r7, r4, asr #23 @ vsram + ((cell&0x3e)<<1) tst r10,#0x8000 @ plane1? addne r7, r7, #2 ldrh r7, [r7] @ r7=vscroll From 82b3e6cf3cddb55dbf54ce24cfc47a4cf12437e5 Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 27 Mar 2020 19:09:05 +0100 Subject: [PATCH 0291/1110] ARM asm, symbol visibility fix --- pico/cd/memory_arm.S | 2 ++ pico/memory.h | 9 +++++++++ pico/memory_arm.S | 2 ++ 3 files changed, 13 insertions(+) diff --git a/pico/cd/memory_arm.S b/pico/cd/memory_arm.S index 95ad09ff..0d1369ee 100644 --- a/pico/cd/memory_arm.S +++ b/pico/cd/memory_arm.S @@ -703,8 +703,10 @@ m_s68k_write16_regs_spec: @ special case .global s68k_read8 .global s68k_read16 +.global s68k_read32 .global s68k_write8 .global s68k_write16 +.global s68k_write32 s68k_read8: PIC_LDR(r3, r2, s68k_read8_map) diff --git a/pico/memory.h b/pico/memory.h index d55267ba..eba23471 100644 --- a/pico/memory.h +++ b/pico/memory.h @@ -25,8 +25,17 @@ typedef void (cpu68k_write_f)(u32 a, u32 d); extern u32 m68k_read8(u32 a); extern u32 m68k_read16(u32 a); +extern u32 m68k_read32(u32 a); extern void m68k_write8(u32 a, u8 d); extern void m68k_write16(u32 a, u16 d); +extern void m68k_write32(u32 a, u32 d); + +extern u32 s68k_read8(u32 a); +extern u32 s68k_read16(u32 a); +extern u32 s68k_read32(u32 a); +extern void s68k_write8(u32 a, u8 d); +extern void s68k_write16(u32 a, u16 d); +extern void s68k_write32(u32 a, u32 d); // z80 #define Z80_MEM_SHIFT 13 diff --git a/pico/memory_arm.S b/pico/memory_arm.S index ebeb346b..607006ce 100644 --- a/pico/memory_arm.S +++ b/pico/memory_arm.S @@ -227,8 +227,10 @@ m_write16_not_z80ctl: .global m68k_read8 .global m68k_read16 +.global m68k_read32 .global m68k_write8 .global m68k_write16 +.global m68k_write32 m68k_read8: PIC_LDR(r3, r2, m68k_read8_map) From bd73e6eec0116a921f092586502e2f408cab16d4 Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 27 Mar 2020 19:22:19 +0100 Subject: [PATCH 0292/1110] vdp rendering, fix for CD (sprites from WORD RAM) --- pico/cd/gfx_dma.c | 2 +- pico/pico_int.h | 18 +++++++++++++++++ pico/videoport.c | 50 +++++++++++++++-------------------------------- 3 files changed, 35 insertions(+), 35 deletions(-) diff --git a/pico/cd/gfx_dma.c b/pico/cd/gfx_dma.c index ff93a2dc..354fc213 100644 --- a/pico/cd/gfx_dma.c +++ b/pico/cd/gfx_dma.c @@ -28,7 +28,7 @@ PICO_INTERNAL void DmaSlowCell(unsigned int source, unsigned int a, int len, uns asrc = cell_map(source >> 2) << 2; asrc |= source & 2; // if(a&1) d=(d<<8)|(d>>8); // ?? - r[a>>1] = *(u16 *)(base + asrc); + VideoWriteVRAM(a, *(u16 *)(base + asrc)); source += 2; // AutoIncrement a=(u16)(a+inc); diff --git a/pico/pico_int.h b/pico/pico_int.h index c0f2c343..5fed483d 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -849,6 +849,24 @@ void ym2612_unpack_state(void); // videoport.c +extern unsigned SATaddr, SATmask; +static __inline void UpdateSAT(u32 a, u32 d) +{ + unsigned num = (a-SATaddr) >> 3; + + Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; + if (!(a & 4) && num < 128) { + ((u16 *)&VdpSATCache[num])[(a&3) >> 1] = d; + } +} +static __inline void VideoWriteVRAM(u32 a, u16 d) +{ + PicoMem.vram [(u16)a >> 1] = d; + + if (!((u16)(a^SATaddr) & SATmask)) + UpdateSAT(a, d); +} + PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d); PICO_INTERNAL_ASM unsigned int PicoVideoRead(unsigned int a); unsigned char PicoVideoRead8DataH(void); diff --git a/pico/videoport.c b/pico/videoport.c index 3ed7f5b4..bb79c09f 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -19,8 +19,8 @@ extern const unsigned short vdpsl2cyc_32_bl[], vdpsl2cyc_40_bl[]; extern const unsigned short vdpsl2cyc_32[], vdpsl2cyc_40[]; static int blankline; // display disabled for this line -static unsigned sat; // VRAM addr of sprite attribute table -static int satxbits; // index bits in SAT address + +unsigned SATaddr, SATmask; // VRAM addr of sprite attribute table int (*PicoDmaHook)(unsigned int source, int len, unsigned short **base, unsigned int *mask) = NULL; @@ -303,34 +303,16 @@ static __inline void AutoIncrement(void) if (Pico.video.addr < Pico.video.reg[0xf]) Pico.video.addr_u ^= 1; } -static __inline void UpdateSAT(u32 a, u32 d) -{ - unsigned num = (a-sat) >> 3; - - Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; - if (!(a & 4) && num < 128) { - ((u16 *)&VdpSATCache[num])[(a&3) >> 1] = d; - } -} - static NOINLINE void VideoWriteVRAM128(u32 a, u16 d) { // nasty u32 b = ((a & 2) >> 1) | ((a & 0x400) >> 9) | (a & 0x3FC) | ((a & 0x1F800) >> 1); ((u8 *)PicoMem.vram)[b] = d; - if (!((u16)(b^sat) >> satxbits)) + if (!((u16)(b^SATaddr) & SATmask)) Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; - if (!((u16)(a^sat) >> satxbits)) - UpdateSAT(a, d); -} - -static void VideoWriteVRAM(u32 a, u16 d) -{ - PicoMem.vram [(u16)a >> 1] = d; - - if (!((u16)(a^sat) >> satxbits)) + if (!((u16)(a^SATaddr) & SATmask)) UpdateSAT(a, d); } @@ -461,7 +443,7 @@ static void DmaSlow(int len, unsigned int source) r = PicoMem.vram; if (inc == 2 && !(a & 1) && (a >> 16) == ((a + len*2) >> 16) && (source & ~mask) == ((source + len-1) & ~mask) && - (a << 16 >= (sat+0x280) << 16 || (a + len*2) << 16 <= sat << 16)) + (a << 16 >= (SATaddr+0x280)<<16 || (a + len*2) << 16 <= SATaddr<<16)) { // most used DMA mode memcpy((char *)r + a, base + (source & mask), len * 2); @@ -540,7 +522,7 @@ static void DmaCopy(int len) for (; len; len--) { vr[(u16)a] = vr[(u16)(source++)]; - if (!((u16)(a^sat) >> satxbits)) + if (!((u16)(a^SATaddr) & SATmask)) UpdateSAT(a, ((u16 *)vr)[(u16)a >> 1]); // AutoIncrement a = (a+inc) & ~0x20000; @@ -572,7 +554,7 @@ static NOINLINE void DmaFill(int data) // Write upper byte to adjacent address // (here we are byteswapped, so address is already 'adjacent') vr[(u16)a] = high; - if (!((u16)(a^sat) >> satxbits)) + if (!((u16)(a^SATaddr) & SATmask)) UpdateSAT(a, ((u16 *)vr)[(u16)a >> 1]); // Increment address register @@ -803,11 +785,11 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) default: return; } - sat = ((pvid->reg[5]&0x7f) << 9) | ((pvid->reg[6]&0x20) << 11); - satxbits = 9; + SATaddr = ((pvid->reg[5]&0x7f) << 9) | ((pvid->reg[6]&0x20) << 11); + SATmask = ~0x1ff; if (Pico.video.reg[12]&1) - sat &= ~0x200, satxbits = 10; // H40, zero lowest SAT bit - //elprintf(EL_STATUS, "spritep moved to %04x", sat); + SATaddr &= ~0x200, SATmask &= ~0x200; // H40, zero lowest SAT bit + //elprintf(EL_STATUS, "spritep moved to %04x", SATaddr); return; update_irq: @@ -1013,15 +995,15 @@ void PicoVideoLoad(void) Pico.m.dma_xfers = 0; } - sat = ((pv->reg[5]&0x7f) << 9) | ((pv->reg[6]&0x20) << 11); - satxbits = 9; + SATaddr = ((pv->reg[5]&0x7f) << 9) | ((pv->reg[6]&0x20) << 11); + SATmask = ~0x1ff; if (pv->reg[12]&1) - sat &= ~0x200, satxbits = 10; // H40, zero lowest SAT bit + SATaddr &= ~0x200, SATmask &= ~0x200; // H40, zero lowest SAT bit // rebuild SAT cache XXX wrong since cache and memory can differ for (l = 0; l < 80; l++) { - *((u16 *)VdpSATCache + 2*l ) = PicoMem.vram[(sat>>1) + l*4 ]; - *((u16 *)VdpSATCache + 2*l+1) = PicoMem.vram[(sat>>1) + l*4 + 1]; + *((u16 *)VdpSATCache + 2*l ) = PicoMem.vram[(SATaddr>>1) + l*4 ]; + *((u16 *)VdpSATCache + 2*l+1) = PicoMem.vram[(SATaddr>>1) + l*4 + 1]; } } From 61114cd8b4c44f606e6030d44142534884fca35b Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 27 Mar 2020 19:25:20 +0100 Subject: [PATCH 0293/1110] vdp rendering fixes --- pico/draw.c | 12 +++++++++--- pico/draw_arm.S | 11 ++++++++--- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/pico/draw.c b/pico/draw.c index dff5e075..68af73e2 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -486,7 +486,10 @@ static void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells, // shit, we have 2-cell column based vscroll // luckily this doesn't happen too often ts.line=ymask|(shift[width]<<24); // save some stuff instead of line - plane_sh |= PicoMem.vsram[0x26+(~plane_sh&1)] << 16; + // vscroll value for leftmost cells in case of hscroll not on 16px boundary + // XXX it's unclear what exactly the hw is doing. Continue reading where it + // stopped last seems to work best (H40: 0x50 (wrap->0x00), H32 0x40). + plane_sh |= PicoMem.vsram[(pvid->reg[12]&1?0x00:0x20) + (plane_sh&1)] << 16; DrawStripVSRam(&ts, plane_sh, cellskip); } else { vscroll = PicoMem.vsram[plane_sh & 1]; // Get vertical scroll value @@ -1173,7 +1176,10 @@ static void DrawLayerForced(int plane_sh, int cellskip, int maxcells, // shit, we have 2-cell column based vscroll // luckily this doesn't happen too often ts.line=ymask|(shift[width]<<24); // save some stuff instead of line - plane_sh |= PicoMem.vsram[0x26+(~plane_sh&1)] << 16; + // vscroll value for leftmost cells in case of hscroll not on 16px boundary + // XXX it's unclear what exactly the hw is doing. Continue reading where it + // stopped last seems to work best (H40: 0x50 (wrap->0x00), H32 0x40). + plane_sh |= PicoMem.vsram[(pvid->reg[12]&1?0x00:0x20) + (plane_sh&1)] << 16; DrawStripVSRamForced(&ts, plane_sh, cellskip); } else { vscroll = PicoMem.vsram[plane_sh & 1]; // Get vertical scroll value @@ -1191,7 +1197,7 @@ static void DrawSpritesForced(unsigned char *sprited) { unsigned (*fTileFunc)(unsigned char *pd, unsigned m, unsigned int pack, int pal); unsigned char *pd = Pico.est.HighCol; - unsigned char mb[1+320+1]; + unsigned char mb[1+320/8+1]; unsigned char *p, *mp; unsigned m; int entry, cnt; diff --git a/pico/draw_arm.S b/pico/draw_arm.S index 1a0f3513..2ae6dba6 100644 --- a/pico/draw_arm.S +++ b/pico/draw_arm.S @@ -523,6 +523,9 @@ DrawLayer: @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ .DrawStrip_vsscroll: + tst r8, #1 @ if h40: lflags |= 0x10000 + orrne r0, r0, #0x10000 + rsb r8, r3, #0 mov r8, r8, lsr #3 @ r8=tilex=(-ts->hscroll)>>3 bic r8, r8, #0x3fc00000 @@ -545,11 +548,13 @@ DrawLayer: eor r3, r3, r7 sub r10,r10, #1<<24 @ cell-- // start from negative for hscroll tst r3, #0x08 - add_c24 r1, lr, (OFS_PMEM_vsram-OFS_PMEM_vram) - ldr r3, [r1, #0x4c] @ r3=vsram[0x26..0x27] subne r10,r10, #1<<16 @ cells-- subne r10,r10, #1<<24 @ cell-- // even more negative - ror r3, r3, #16 + + add_c24 r1, lr, (OFS_PMEM_vsram-OFS_PMEM_vram) + tst r0, #0x10000 @ h40? + ldrne r3, [r1, #0x00] @ r3=vsram[0x00..0x01] + ldreq r3, [r1, #0x40] @ r3=vsram[0x20..0x21] str r3, [r1, #0x7c] @ vsram[0x3e..0x3f]=r3 0: tst r9, #1<<31 From 8d67848ddfff10484f454499f70ffa9935749fcb Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 27 Mar 2020 19:27:05 +0100 Subject: [PATCH 0294/1110] fix for 68K cycle accounting --- pico/pico_cmn.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pico/pico_cmn.c b/pico/pico_cmn.c index 017c404b..8863bb39 100644 --- a/pico/pico_cmn.c +++ b/pico/pico_cmn.c @@ -35,6 +35,7 @@ static void SekExecM68k(int cyc_do) #elif defined(EMU_F68K) Pico.t.m68c_cnt += fm68k_emulate(&PicoCpuFM68k, cyc_do, 0) - cyc_do; #endif + SekCyclesLeft = 0; } static void SekSyncM68k(void) @@ -46,8 +47,6 @@ static void SekSyncM68k(void) while ((cyc_do = Pico.t.m68c_aim - Pico.t.m68c_cnt) > 0) SekExecM68k(cyc_do); - SekCyclesLeft = 0; - SekTrace(0); pevt_log_m68k_o(EVT_RUN_END); pprof_end(m68k); From 02138162c42d00ab260177583b037981ca1f6f30 Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 27 Mar 2020 19:32:45 +0100 Subject: [PATCH 0295/1110] vdp fifo speed optimization --- pico/videoport.c | 228 ++++++++++++++++++++++++++--------------------- 1 file changed, 125 insertions(+), 103 deletions(-) diff --git a/pico/videoport.c b/pico/videoport.c index bb79c09f..401190e0 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -57,125 +57,142 @@ int (*PicoDmaHook)(unsigned int source, int len, unsigned short **base, unsigned */ // NB code assumes fifo_* arrays have size 2^n -// last transferred FIFO data, ...x = index XXX currently only CPU -static short fifo_data[4], fifo_dx; // XXX must go into save? +static struct VdpFIFO { // XXX this must go into save file! + // last transferred FIFO data, ...x = index XXX currently only CPU + unsigned short fifo_data[4], fifo_dx; + + // queued FIFO transfers, ...x = index, ...l = queue length + // each entry has 2 values: [n]>>3 = #writes, [n]&7 = flags (FQ_*) + unsigned int fifo_queue[8], fifo_qx, fifo_ql; + unsigned int fifo_total; // total# of pending FIFO entries (w/o BGDMA) + + unsigned short fifo_slot; // last executed slot in current scanline + unsigned short fifo_maxslot;// #slots in scanline + + const unsigned char *fifo_cyc2sl; + const unsigned short *fifo_sl2cyc; +} VdpFIFO; -// queued FIFO transfers, ...x = index, ...l = queue length -// each entry has 2 values: [n]>>3 = #writes, [n]&7 = flags -static int fifo_queue[8], fifo_qx, fifo_ql; // XXX must go into save? enum { FQ_BYTE = 1, FQ_BGDMA = 2, FQ_FGDMA = 4 }; // queue flags, NB: BYTE = 1! -static unsigned int fifo_total; // total# of pending FIFO entries (w/o BGDMA) - -static unsigned short fifo_slot; // last executed slot in current scanline -static unsigned short fifo_maxslot;// #slots in scanline - -static const unsigned char *fifo_cyc2sl; -static const unsigned short *fifo_sl2cyc; // do the FIFO math -static __inline int AdvanceFIFOEntry(struct PicoVideo *pv, int slots) +static __inline int AdvanceFIFOEntry(struct VdpFIFO *vf, struct PicoVideo *pv, int slots) { - int l = slots, b = fifo_queue[fifo_qx] & FQ_BYTE; + int l = slots, b = vf->fifo_queue[vf->fifo_qx] & FQ_BYTE; + int cnt = pv->fifo_cnt; // advance currently active FIFO entry - if (l > pv->fifo_cnt) - l = pv->fifo_cnt; - if (!(fifo_queue[fifo_qx] & FQ_BGDMA)) - fifo_total -= ((pv->fifo_cnt & b) + l) >> b; - pv->fifo_cnt -= l; + if (l > cnt) + l = cnt; + if (!(vf->fifo_queue[vf->fifo_qx] & FQ_BGDMA)) + vf->fifo_total -= ((cnt & b) + l) >> b; + cnt -= l; // if entry has been processed... - if (pv->fifo_cnt == 0) { + if (cnt == 0) { // remove entry from FIFO - if (fifo_ql) - fifo_qx ++, fifo_qx &= 7, fifo_ql --; + if (vf->fifo_ql) + vf->fifo_qx = (vf->fifo_qx+1) & 7, vf->fifo_ql --; // start processing for next entry if there is one - if (fifo_ql) - pv->fifo_cnt = (fifo_queue[fifo_qx] >> 3) << (fifo_queue[fifo_qx] & FQ_BYTE); - else { // FIFO empty + if (vf->fifo_ql) { + b = vf->fifo_queue[vf->fifo_qx] & FQ_BYTE; + cnt = (vf->fifo_queue[vf->fifo_qx] >> 3) << b; + } else { // FIFO empty pv->status &= ~PVS_FIFORUN; - fifo_total = 0; + vf->fifo_total = 0; } } + + pv->fifo_cnt = cnt; return l; } -static __inline void SetFIFOState(struct PicoVideo *pv) +static __inline void SetFIFOState(struct VdpFIFO *vf, struct PicoVideo *pv) { + unsigned int st = pv->status, cmd = pv->command; // release CPU and terminate DMA if FIFO isn't blocking the 68k anymore - if (fifo_total <= 4) { - pv->status &= ~PVS_CPUWR; - if (!(pv->status & (PVS_DMABG|PVS_DMAFILL))) { - pv->status &= ~SR_DMA; - pv->command &= ~0x80; + if (vf->fifo_total <= 4) { + st &= ~PVS_CPUWR; + if (!(st & (PVS_DMABG|PVS_DMAFILL))) { + st &= ~SR_DMA; + cmd &= ~0x80; } } - if (fifo_total == 0) { - pv->status &= ~PVS_CPURD; + if (pv->fifo_cnt == 0) { + st &= ~PVS_CPURD; // terminate DMA if applicable - if (!(pv->status & (PVS_FIFORUN|PVS_DMAFILL))) { - pv->status &= ~(SR_DMA|PVS_DMABG); - pv->command &= ~0x80; + if (!(st & (PVS_FIFORUN|PVS_DMAFILL))) { + st &= ~(SR_DMA|PVS_DMABG); + cmd &= ~0x80; } } + pv->status = st; + pv->command = cmd; } // sync FIFO to cycles void PicoVideoFIFOSync(int cycles) { + struct VdpFIFO *vf = &VdpFIFO; struct PicoVideo *pv = &Pico.video; int slots, done; // calculate #slots since last executed slot - slots = fifo_cyc2sl[cycles>>1] - fifo_slot; + slots = vf->fifo_cyc2sl[cycles>>1] - vf->fifo_slot; // advance FIFO queue by #done slots done = slots; while (done > 0 && pv->fifo_cnt) { - int l = AdvanceFIFOEntry(pv, done); - fifo_slot += l; + int l = AdvanceFIFOEntry(vf, pv, done); + vf->fifo_slot += l; done -= l; } if (done != slots) - SetFIFOState(pv); + SetFIFOState(vf, pv); } // drain FIFO, blocking 68k on the way. FIFO must be synced prior to drain. -int PicoVideoFIFODrain(int level, int cycles, int bgdma) +static int PicoVideoFIFODrain(int level, int cycles, int bgdma) { + struct VdpFIFO *vf = &VdpFIFO; struct PicoVideo *pv = &Pico.video; unsigned ocyc = cycles; int burn = 0; +//int osl = fifo_slot; // process FIFO entries until low level is reached - while (fifo_total > level && fifo_slot < fifo_maxslot && - (!(fifo_queue[fifo_qx] & FQ_BGDMA) || bgdma)) { - int b = fifo_queue[fifo_qx] & FQ_BYTE; - int cnt = ((fifo_total-level) << b) - (pv->fifo_cnt & b); - int slot = (pv->fifo_cntfifo_cnt:cnt) + fifo_slot; // target slot + while (vf->fifo_slot < vf->fifo_maxslot && cycles < 488 && + (vf->fifo_total > level || (vf->fifo_queue[vf->fifo_qx] & bgdma))) { + int b = vf->fifo_queue[vf->fifo_qx] & FQ_BYTE; + int cnt = bgdma ? pv->fifo_cnt : ((vf->fifo_total-level)<fifo_cnt&b); + int slot = (pv->fifo_cntfifo_cnt:cnt) + vf->fifo_slot; - if (slot > fifo_maxslot) { - // target in later scanline, advance to eol - slot = fifo_maxslot; + if (slot > vf->fifo_maxslot) { + // target slot in later scanline, advance to eol + slot = vf->fifo_maxslot; cycles = 488; } else { // advance FIFO to target slot and CPU to cycles at that slot - cycles = fifo_sl2cyc[slot]<<1; + cycles = vf->fifo_sl2cyc[slot]<<1; + } + if (slot > vf->fifo_slot) { + AdvanceFIFOEntry(vf, pv, slot - vf->fifo_slot); + vf->fifo_slot = slot; } - AdvanceFIFOEntry(pv, slot - fifo_slot); - fifo_slot = slot; } - burn = cycles - ocyc; + if (cycles > ocyc) + burn = cycles - ocyc; - SetFIFOState(pv); + SetFIFOState(vf, pv); return burn; } // read VDP data port -int PicoVideoFIFORead(void) +static int PicoVideoFIFORead(void) { + struct VdpFIFO *vf = &VdpFIFO; struct PicoVideo *pv = &Pico.video; int lc = SekCyclesDone()-Pico.t.m68c_line_start; int burn = 0; @@ -183,16 +200,16 @@ int PicoVideoFIFORead(void) if (pv->fifo_cnt) { PicoVideoFIFOSync(lc); // advance FIFO and CPU until FIFO is empty - burn = PicoVideoFIFODrain(0, lc, 1); + burn = PicoVideoFIFODrain(0, lc, FQ_BGDMA); lc += burn; } - if (fifo_total > 0) + if (pv->fifo_cnt) pv->status |= PVS_CPURD; // target slot is in later scanline else { // use next VDP access slot for reading, block 68k until then - fifo_slot = fifo_cyc2sl[lc>>1] + 1; - burn += (fifo_sl2cyc[fifo_slot]<<1) - lc; + vf->fifo_slot = vf->fifo_cyc2sl[lc>>1] + 1; + burn += (vf->fifo_sl2cyc[vf->fifo_slot]<<1) - lc; } return burn; @@ -201,50 +218,51 @@ int PicoVideoFIFORead(void) // write VDP data port int PicoVideoFIFOWrite(int count, int flags, unsigned sr_mask,unsigned sr_flags) { + struct VdpFIFO *vf = &VdpFIFO; struct PicoVideo *pv = &Pico.video; int lc = SekCyclesDone()-Pico.t.m68c_line_start; - int burn = 0, x, head = 0; + int burn = 0; if (pv->fifo_cnt) PicoVideoFIFOSync(lc); pv->status = (pv->status & ~sr_mask) | sr_flags; - if (count && fifo_ql < 8) { - // update FIFO state if it was empty - if (fifo_ql == 0) { - fifo_slot = fifo_cyc2sl[(lc+8)>>1]; // FIFO latency ~3 vdp slots - pv->fifo_cnt = count << (flags & FQ_BYTE); - pv->status |= PVS_FIFORUN; - } - + if (count && vf->fifo_ql < 8) { // determine queue position for entry - x = (fifo_qx + fifo_ql - 1) & 7; - if (fifo_ql && (fifo_queue[x] & FQ_BGDMA)) { + int x = (vf->fifo_qx + vf->fifo_ql - 1) & 7; + if (unlikely(vf->fifo_ql && (vf->fifo_queue[x] & FQ_BGDMA))) { // CPU FIFO writes have priority over a background DMA Fill/Copy - fifo_queue[(x+1) & 7] = fifo_queue[x]; - if (x == fifo_qx) { // overtaking to queue head? - // XXX if interrupting a DMA fill, fill data changes - int f = fifo_queue[x] & 7; - fifo_queue[(x+1) & 7] = (pv->fifo_cnt >> (f & FQ_BYTE) << 3) | f; - pv->fifo_cnt = count << (flags & FQ_BYTE); - head = 1; - } + // XXX if interrupting a DMA fill, fill data changes + if (x == vf->fifo_qx) { // overtaking to queue head? + int f = vf->fifo_queue[x] & 7; + vf->fifo_queue[(x+1) & 7] = (pv->fifo_cnt >> (f & FQ_BYTE) << 3) | f; + pv->status &= ~PVS_FIFORUN; + } else + // push background DMA back + vf->fifo_queue[(x+1) & 7] = vf->fifo_queue[x]; x = (x-1) & 7; } - // create xfer queue entry - if (fifo_ql && !head && (fifo_queue[x] & 7) == flags) { + if ((pv->status & PVS_FIFORUN) && (vf->fifo_queue[x] & 7) == flags) { // amalgamate entries if of same type - fifo_queue[x] += (count << 3); - if (x == fifo_qx) // modifiying fifo head, adjust count + vf->fifo_queue[x] += (count << 3); + if (x == vf->fifo_qx) pv->fifo_cnt += count << (flags & FQ_BYTE); } else { - fifo_ql ++; + // create new xfer queue entry + vf->fifo_ql ++; x = (x+1) & 7; - fifo_queue[x] = (count << 3) | flags; + vf->fifo_queue[x] = (count << 3) | flags; + } + + // update FIFO state if it was empty + if (!(pv->status & PVS_FIFORUN)) { + vf->fifo_slot = vf->fifo_cyc2sl[(lc+8)>>1]; // FIFO latency ~3 vdp slots + pv->status |= PVS_FIFORUN; + pv->fifo_cnt = count << (flags & FQ_BYTE); } if (!(flags & FQ_BGDMA)) - fifo_total += count; + vf->fifo_total += count; } // if CPU is waiting for the bus, advance CPU and FIFO until bus is free @@ -257,11 +275,12 @@ int PicoVideoFIFOWrite(int count, int flags, unsigned sr_mask,unsigned sr_flags) // at HINT, advance FIFO to new scanline int PicoVideoFIFOHint(void) { + struct VdpFIFO *vf = &VdpFIFO; struct PicoVideo *pv = &Pico.video; int burn = 0; // reset slot to start of scanline - fifo_slot = 0; + vf->fifo_slot = 0; // if CPU is waiting for the bus, advance CPU and FIFO until bus is free if (pv->status & PVS_CPURD) @@ -280,18 +299,19 @@ void PicoVideoFIFOMode(int active, int h40) static const unsigned short *vdpsl2cyc[2][2] = { {vdpsl2cyc_32_bl, vdpsl2cyc_40_bl} , {vdpsl2cyc_32, vdpsl2cyc_40} }; + struct VdpFIFO *vf = &VdpFIFO; struct PicoVideo *pv = &Pico.video; int lc = SekCyclesDone() - Pico.t.m68c_line_start; active = active && !(pv->status & PVS_VB2); - if (fifo_maxslot) + if (vf->fifo_maxslot) PicoVideoFIFOSync(lc); - fifo_cyc2sl = vdpcyc2sl[active][h40]; - fifo_sl2cyc = vdpsl2cyc[active][h40]; + vf->fifo_cyc2sl = vdpcyc2sl[active][h40]; + vf->fifo_sl2cyc = vdpsl2cyc[active][h40]; // recalculate FIFO slot for new mode - fifo_slot = fifo_cyc2sl[lc>>1]-1; - fifo_maxslot = fifo_cyc2sl[488>>1]; + vf->fifo_slot = vf->fifo_cyc2sl[lc>>1]-1; + vf->fifo_maxslot = vf->fifo_cyc2sl[488>>1]; } @@ -342,7 +362,7 @@ static void VideoWrite(u16 d) static unsigned int VideoRead(void) { - unsigned int a, d = fifo_data[(fifo_dx+1)&3]; + unsigned int a, d = VdpFIFO.fifo_data[(VdpFIFO.fifo_dx+1)&3]; a=Pico.video.addr; a>>=1; @@ -351,7 +371,6 @@ static unsigned int VideoRead(void) { case 0: d=PicoMem.vram [a & 0x7fff]; break; case 8: d=PicoMem.cram [a & 0x003f] | (d & ~0x0eee); break; - case 4: if ((a & 0x3f) >= 0x28) a = 0; d=PicoMem.vsram [a & 0x003f] | (d & ~0x07ff); break; case 12:a=PicoMem.vram [a & 0x7fff]; if (Pico.video.addr&1) a >>= 8; @@ -618,8 +637,9 @@ static NOINLINE void CommandDma(void) PicoVideoFIFOSync(SekCyclesDone()-Pico.t.m68c_line_start); if (pvid->status & SR_DMA) { elprintf(EL_VDPDMA, "Dma overlap, left=%d @ %06x", - fifo_total, SekPc); - pvid->fifo_cnt = fifo_total = fifo_ql = 0; + VdpFIFO.fifo_total, SekPc); + pvid->fifo_cnt = VdpFIFO.fifo_total = VdpFIFO.fifo_ql = 0; + pvid->status &= ~(PVS_FIFORUN|PVS_DMAFILL); } len = GetDmaLength(); @@ -704,7 +724,7 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) if (!(PicoIn.opt&POPT_DIS_VDP_FIFO)) { - fifo_data[++fifo_dx&3] = d; + VdpFIFO.fifo_data[++VdpFIFO.fifo_dx&3] = d; SekCyclesBurnRun(PicoVideoFIFOWrite(1, pvid->type == 1, 0, PVS_CPUWR)); elprintf(EL_ASVDP, "VDP data write: [%04x] %04x [%u] {%i} @ %06x", @@ -714,7 +734,7 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) // start DMA fill on write. NB VSRAM and CRAM fills use wrong FIFO data. if (pvid->status & PVS_DMAFILL) - DmaFill(fifo_data[(fifo_dx + !!(pvid->type&~0x81))&3]); + DmaFill(VdpFIFO.fifo_data[(VdpFIFO.fifo_dx + !!(pvid->type&~0x81))&3]); break; @@ -860,9 +880,9 @@ static u32 VideoSr(const struct PicoVideo *pv) d |= SR_HB; PicoVideoFIFOSync(c); - if (fifo_total >= 4) + if (VdpFIFO.fifo_total >= 4) d |= SR_FULL; - else if (!fifo_total) + else if (!VdpFIFO.fifo_total) d |= SR_EMPT; return d; } @@ -974,16 +994,18 @@ unsigned char PicoVideoRead8HV_L(void) void PicoVideoSave(void) { + struct VdpFIFO *vf = &VdpFIFO; struct PicoVideo *pv = &Pico.video; int l, x; // account for all outstanding xfers XXX kludge, entry attr's not saved - for (l = fifo_ql, x = fifo_qx + l-1; l > 1; l--, x--) - pv->fifo_cnt += (fifo_queue[x&7] >> 3) << (fifo_queue[x&7] & FQ_BYTE); + for (l = vf->fifo_ql, x = vf->fifo_qx + l-1; l > 1; l--, x--) + pv->fifo_cnt += (vf->fifo_queue[x&7] >> 3) << (vf->fifo_queue[x&7] & FQ_BYTE); } void PicoVideoLoad(void) { + struct VdpFIFO *vf = &VdpFIFO; struct PicoVideo *pv = &Pico.video; int l; @@ -991,7 +1013,7 @@ void PicoVideoLoad(void) if (Pico.m.dma_xfers) { pv->status = SR_DMA|PVS_FIFORUN; pv->fifo_cnt = Pico.m.dma_xfers * (pv->type == 1 ? 2 : 1); - fifo_total = Pico.m.dma_xfers; + vf->fifo_total = Pico.m.dma_xfers; Pico.m.dma_xfers = 0; } From 84e18560bba7ada8ccb2f3201c36ae68b01ec1fc Mon Sep 17 00:00:00 2001 From: kub Date: Mon, 30 Mar 2020 23:54:11 +0200 Subject: [PATCH 0296/1110] fix for gp2x audio regression --- pico/memory.c | 2 +- pico/sound/sound.c | 27 ++++++++++++++------------- platform/gp2x/940ctl.c | 6 +++--- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/pico/memory.c b/pico/memory.c index d61491c1..0fa7b8de 100644 --- a/pico/memory.c +++ b/pico/memory.c @@ -1057,11 +1057,11 @@ static int ym2612_write_local(u32 a, u32 d, int is_from_z80) break; } - PsndDoFM(get_scanline(is_from_z80)); #ifdef __GP2X__ if (PicoIn.opt & POPT_EXT_FM) return YM2612Write_940(a, d, get_scanline(is_from_z80)); #endif + PsndDoFM(get_scanline(is_from_z80)); return YM2612Write_(a, d); } diff --git a/pico/sound/sound.c b/pico/sound/sound.c index 155aa452..57d9c2e5 100644 --- a/pico/sound/sound.c +++ b/pico/sound/sound.c @@ -193,8 +193,8 @@ PICO_INTERNAL void PsndDoFM(int line_to) // Q16, number of samples since last call len = ((line_to-1) * Pico.snd.fm_mult) - Pico.snd.fm_pos; - // don't do this too often (no more than 256 per sec) - if (len >> 16 <= PicoIn.sndRate >> 9) + // don't do this too often (about every 4th scanline) + if (len >> 16 <= PicoIn.sndRate >> 12) return; // update position and calculate buffer offset and length @@ -281,22 +281,22 @@ static int PsndRender(int offset, int length) { int *buf32; int stereo = (PicoIn.opt & 8) >> 3; - int fmlen = ((Pico.snd.fm_pos+0x8000) >> 16) - offset; - int daclen = ((Pico.snd.dac_pos+0x80000) >> 20) - offset; + int fmlen = ((Pico.snd.fm_pos+0x8000) >> 16); + int daclen = ((Pico.snd.dac_pos+0x80000) >> 20); - offset <<= stereo; - buf32 = PsndBuffer+offset; + buf32 = PsndBuffer+(offset< 0) { short *dacbuf = PicoIn.sndOut + (daclen << stereo); + Pico.snd.dac_pos += (length-daclen) << 20; for (; length-daclen > 0; daclen++) { *dacbuf++ += Pico.snd.dac_val; if (stereo) dacbuf++; @@ -305,14 +305,15 @@ static int PsndRender(int offset, int length) // Add in parts of the FM buffer not yet done if (length-fmlen > 0) { - int *fmbuf = buf32 + (fmlen << stereo); + int *fmbuf = buf32 + ((fmlen-offset) << stereo); + Pico.snd.fm_pos += (length-fmlen) << 16; if (PicoIn.opt & POPT_EN_FM) YM2612UpdateOne(fmbuf, length-fmlen, stereo, 1); } // CD: PCM sound if (PicoIn.AHW & PAHW_MCD) { - pcd_pcm_update(buf32, length, stereo); + pcd_pcm_update(buf32, length-offset, stereo); } // CD: CDDA audio @@ -323,16 +324,16 @@ static int PsndRender(int offset, int length) { // note: only 44, 22 and 11 kHz supported, with forced stereo if (Pico_mcd->cdda_type == CT_MP3) - mp3_update(buf32, length, stereo); + mp3_update(buf32, length-offset, stereo); else - cdda_raw_update(buf32, length); + cdda_raw_update(buf32, length-offset); } if ((PicoIn.AHW & PAHW_32X) && (PicoIn.opt & POPT_EN_PWM)) - p32x_pwm_update(buf32, length, stereo); + p32x_pwm_update(buf32, length-offset, stereo); // convert + limit to normal 16bit output - PsndMix_32_to_16l(PicoIn.sndOut+offset, buf32, length); + PsndMix_32_to_16l(PicoIn.sndOut+(offset<writebuffsel ? shared_ctl->writebuff0 : shared_ctl->writebuff1; /* detect rapid ym updates */ - if (upd && !(writebuff_ptr & 0x80000000) && scanline < 224) + if (upd && !(writebuff_ptr & 0x80000000)) { - int mid = Pico.m.pal ? 68 : 93; - if (scanline > mid) { + int mid = (Pico.m.pal ? 313 : 262) / 2; + if (scanline >= mid) { //printf("%05i:%03i: rapid ym\n", Pico.m.frame_count, scanline); writebuff[writebuff_ptr++ & 0xffff] = 0xfffe; writebuff_ptr |= 0x80000000; From 86198e034bcd25d2db460657f1f95336628f334c Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 2 Apr 2020 20:18:39 +0200 Subject: [PATCH 0297/1110] vdp DMA optimizations --- pico/videoport.c | 67 ++++++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 31 deletions(-) diff --git a/pico/videoport.c b/pico/videoport.c index 401190e0..dac74dc3 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -163,7 +163,7 @@ static int PicoVideoFIFODrain(int level, int cycles, int bgdma) // process FIFO entries until low level is reached while (vf->fifo_slot < vf->fifo_maxslot && cycles < 488 && - (vf->fifo_total > level || (vf->fifo_queue[vf->fifo_qx] & bgdma))) { + ((vf->fifo_total > level) | (vf->fifo_queue[vf->fifo_qx] & bgdma))) { int b = vf->fifo_queue[vf->fifo_qx] & FQ_BYTE; int cnt = bgdma ? pv->fifo_cnt : ((vf->fifo_total-level)<fifo_cnt&b); int slot = (pv->fifo_cntfifo_cnt:cnt) + vf->fifo_slot; @@ -283,10 +283,10 @@ int PicoVideoFIFOHint(void) vf->fifo_slot = 0; // if CPU is waiting for the bus, advance CPU and FIFO until bus is free - if (pv->status & PVS_CPURD) - burn = PicoVideoFIFORead(); - else if (pv->status & PVS_CPUWR) + if (pv->status & PVS_CPUWR) burn = PicoVideoFIFOWrite(0, 0, 0, 0); + else if (pv->status & PVS_CPURD) + burn = PicoVideoFIFORead(); return burn; } @@ -458,27 +458,23 @@ static void DmaSlow(int len, unsigned int source) switch (Pico.video.type) { case 1: // vram -#if 0 r = PicoMem.vram; - if (inc == 2 && !(a & 1) && (a >> 16) == ((a + len*2) >> 16) && - (source & ~mask) == ((source + len-1) & ~mask) && - (a << 16 >= (SATaddr+0x280)<<16 || (a + len*2) << 16 <= SATaddr<<16)) + if (inc == 2 && !(a & 1) && (a & ~0xffff) == ((a + len*2-1) & ~0xffff) && + ((a >= SATaddr+0x280) | ((a + len*2-1) < SATaddr)) && + (source & ~mask) == ((source + len-1) & ~mask)) { // most used DMA mode memcpy((char *)r + a, base + (source & mask), len * 2); a += len * 2; + break; } - else -#endif + for(; len; len--) { - for(; len; len--) - { - u16 d = base[source++ & mask]; - if(a & 1) d=(d<<8)|(d>>8); - VideoWriteVRAM(a, d); - // AutoIncrement - a = (a+inc) & ~0x20000; - } + u16 d = base[source++ & mask]; + if(a & 1) d=(d<<8)|(d>>8); + VideoWriteVRAM(a, d); + // AutoIncrement + a = (a+inc) & ~0x20000; } break; @@ -569,6 +565,14 @@ static NOINLINE void DmaFill(int data) switch (Pico.video.type) { case 1: // vram + if (inc == 1 && (a & ~0xffff) == ((a + len-1) & ~0xffff) && + ((a >= SATaddr+0x280) | ((a + len-1) < SATaddr))) + { + // most used DMA mode + memset(vr + (u16)a, high, len); + a += len; + break; + } for (l = len; l; l--) { // Write upper byte to adjacent address // (here we are byteswapped, so address is already 'adjacent') @@ -662,9 +666,8 @@ static NOINLINE void CommandDma(void) Pico.video.reg[0x16] = source >> 8; } -static NOINLINE void CommandChange(void) +static NOINLINE void CommandChange(struct PicoVideo *pvid) { - struct PicoVideo *pvid = &Pico.video; unsigned int cmd, addr; cmd = pvid->command; @@ -718,7 +721,7 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) DrawSync(0); // XXX it's unclear when vscroll data is fetched from vsram? if (pvid->pending) { - CommandChange(); + CommandChange(pvid); pvid->pending=0; } @@ -749,7 +752,7 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) pvid->command &= 0xffff0000; pvid->command |= d; pvid->pending = 0; - CommandChange(); + CommandChange(pvid); // Check for dma: if (d & 0x80) { DrawSync(SekCyclesDone() - Pico.t.m68c_line_start <= 488-390); @@ -896,7 +899,7 @@ PICO_INTERNAL_ASM unsigned int PicoVideoRead(unsigned int a) struct PicoVideo *pv = &Pico.video; unsigned int d = VideoSr(pv); if (pv->pending) { - CommandChange(); + CommandChange(pv); pv->pending = 0; } elprintf(EL_SR, "SR read: %04x [%u] @ %06x", d, SekCyclesDone(), SekPc); @@ -953,10 +956,11 @@ unsigned char PicoVideoRead8DataL(void) unsigned char PicoVideoRead8CtlH(void) { - u8 d = VideoSr(&Pico.video) >> 8; - if (Pico.video.pending) { - CommandChange(); - Pico.video.pending = 0; + struct PicoVideo *pv = &Pico.video; + u8 d = VideoSr(pv) >> 8; + if (pv->pending) { + CommandChange(pv); + pv->pending = 0; } elprintf(EL_SR, "SR read (h): %02x @ %06x", d, SekPc); return d; @@ -964,10 +968,11 @@ unsigned char PicoVideoRead8CtlH(void) unsigned char PicoVideoRead8CtlL(void) { - u8 d = VideoSr(&Pico.video); - if (Pico.video.pending) { - CommandChange(); - Pico.video.pending = 0; + struct PicoVideo *pv = &Pico.video; + u8 d = VideoSr(pv); + if (pv->pending) { + CommandChange(pv); + pv->pending = 0; } elprintf(EL_SR, "SR read (l): %02x @ %06x", d, SekPc); return d; From e8204ab27b91330d3d0f276b0c70df2fbad7845a Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 2 Apr 2020 20:33:56 +0200 Subject: [PATCH 0298/1110] ym2612 ARM, bug fixing and small optimizations --- pico/sound/ym2612_arm.S | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/pico/sound/ym2612_arm.S b/pico/sound/ym2612_arm.S index e3ec370d..59abb74e 100644 --- a/pico/sound/ym2612_arm.S +++ b/pico/sound/ym2612_arm.S @@ -15,6 +15,9 @@ #include "../arm_features.h" +@ very simple adaption YM2612 output rate to sample rate (~1M cycles @44100) +//#define INTERPOL + .equiv SLOT1, 0 .equiv SLOT2, 2 .equiv SLOT3, 1 @@ -42,10 +45,14 @@ @ r5=slot, r1=eg_cnt, trashes: r0,r2,r3 @ writes output to routp, but only if vol_out changes .macro update_eg_phase_slot slot +#if defined(INTERPOL) ldrh r0, [r5,#0x34] @ vol_out +#endif ldrb r2, [r5,#0x17] @ state add r3, r5, #0x1c +#if defined(INTERPOL) strh r0, [r5,#0x36] @ vol_ipol +#endif tst r2, r2 beq 0f @ EG_OFF @@ -59,11 +66,11 @@ bne 0f @ no volume change mov r3, r1, lsr r0 + ldrb r0, [r5,#0x30] @ ssg and r3, r3, #7 add r3, r3, r3, lsl #1 mov r3, r2, lsr r3 and r3, r3, #7 @ eg_inc_val shift, may be 0 - ldrb r0, [r5,#0x30] @ ssg ldrb r2, [r5,#0x17] @ state tst r0, #0x08 @ ssg enabled? @@ -124,8 +131,8 @@ b 11f 9: @ SSG-EG mode - cmp r2, #4 @ EG_ATT ldrh r0, [r5,#0x1a] @ volume, unsigned (0-1023) + cmp r2, #4 @ EG_ATT beq 4f cmp r0, #0x200 @ if ( volume < 0x200 ) @@ -170,9 +177,9 @@ strgeb r3, [r5,#0x17] @ state 10: @ finish - strh r0, [r5,#0x1a] @ volume ldrb r2, [r5,#0x30] @ ssg ldrb r3, [r5,#0x17] @ state + strh r0, [r5,#0x1a] @ volume cmp r2, #0x0c @ if ( ssg&0x04 && state > EG_REL ) cmpge r3, #EG_REL+1 rsbge r0, r0, #0x200 @ volume = (0x200-volume) & MAX_ATT @@ -206,10 +213,10 @@ .macro update_ssg_eg ldrh r0, [r5,#0x30] @ ssg+ssgn ldrb r2, [r5,#0x17] @ state - and r3, r0, #0x08 - cmp r3, #0x08 @ ssg enabled && ldrh r3, [r5,#0x1a] @ volume - cmpge r2, #EG_REL+1 @ state > EG_REL && + tst r0, #0x08 @ ssg enabled && + beq 9f + cmp r2, #EG_REL+1 @ state > EG_REL && cmpge r3, #0x200 @ volume >= 0x200? blt 9f @@ -222,7 +229,7 @@ orrne r0, r0, #0x400 @ ssgn = 4 strneh r0, [r5,#0x30] - eor r0, r0, #0x4 @ if ( !(ssg&0x04 ) + eor r0, r0, #0x4 @ if ( !(ssg&0x04) ) tst r0, #0x4 cmpne r2, #EG_ATT @ if ( state != EG_ATT ) movne r3, #0x400 @@ -747,7 +754,7 @@ eg_done: beq crl_loop @ output interpolation -#if 0 +#if 0 // too expensive on slow platforms @ basic interpolator, interpolate in middle region, else use closer value mov r3, r8, lsr #EG_SH @ eg_timer, [0..3<>EG_SH)/2 @@ -780,7 +787,7 @@ eg_done: mov r7, r7, lsl #16 orr r7, r7, r0 ror r7, r7, #16 -#elif 0 +#elif defined(INTERPOL) @ super-basic... just take value closest to sample point mov r3, r8, lsr #EG_SH-1 @ eg_timer, [0..3<>EG_SH) From b061bc166c023265a484e243eeff58735eab8bbb Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 7 Apr 2020 20:47:38 +0200 Subject: [PATCH 0299/1110] vdp rendering, sprite caching optimization --- pico/draw.c | 12 ++++++++---- pico/draw_arm.S | 2 +- pico/pico.h | 5 +++-- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/pico/draw.c b/pico/draw.c index 68af73e2..82bb3462 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -1601,7 +1601,6 @@ static int DrawDisplay(int sh) int win=0, edge=0, hvwind=0, lflags; int maxw, maxcells; - est->rendstatus &= ~(PDRAW_SPRITES_MOVED|PDRAW_DIRTY_SPRITES); est->rendstatus &= ~(PDRAW_SHHI_DONE|PDRAW_PLANE_HI_PRIO); if (pvid->reg[12]&1) { @@ -1713,6 +1712,7 @@ PICO_INTERNAL void PicoFrameStart(void) { int offs = 8, lines = 224; int dirty = ((Pico.est.rendstatus & PDRAW_SONIC_MODE) || Pico.m.dirtyPal); + int sprep = Pico.est.rendstatus & (PDRAW_SPRITES_MOVED|PDRAW_DIRTY_SPRITES); // prepare to do this frame Pico.est.rendstatus = 0; @@ -1732,6 +1732,8 @@ PICO_INTERNAL void PicoFrameStart(void) lines, (Pico.video.reg[12] & 1) ? 0 : 1); rendstatus_old = Pico.est.rendstatus; } + if (sprep) + Pico.est.rendstatus |= PDRAW_PARSE_SPRITES; Pico.est.HighCol = HighColBase + offs * HighColIncrement; Pico.est.DrawLineDest = (char *)DrawLineDestBase + offs * DrawLineDestIncrement; @@ -1804,6 +1806,7 @@ static void PicoLine(int line, int offs, int sh, int bgc) void PicoDrawSync(int to, int blank_last_line) { + struct PicoEState *est = &Pico.est; int line, offs = 0; int sh = (Pico.video.reg[0xC] & 8) >> 3; // shadow/hilight? int bgc = Pico.video.reg[7]; @@ -1815,10 +1818,11 @@ void PicoDrawSync(int to, int blank_last_line) if (to > 223) to = 223; } - if (Pico.est.DrawScanline <= to - blank_last_line) + if (est->DrawScanline <= to - blank_last_line && (est->rendstatus & + (PDRAW_SPRITES_MOVED|PDRAW_DIRTY_SPRITES|PDRAW_PARSE_SPRITES))) PrepareSprites(to - blank_last_line + 1); - for (line = Pico.est.DrawScanline; line < to; line++) + for (line = est->DrawScanline; line < to; line++) PicoLine(line, offs, sh, bgc); // last line @@ -1829,7 +1833,7 @@ void PicoDrawSync(int to, int blank_last_line) else PicoLine(line, offs, sh, bgc); line++; } - Pico.est.DrawScanline = line; + est->DrawScanline = line; pprof_end(draw); } diff --git a/pico/draw_arm.S b/pico/draw_arm.S index 2ae6dba6..9b5a4e32 100644 --- a/pico/draw_arm.S +++ b/pico/draw_arm.S @@ -14,7 +14,7 @@ .equ PDRAW_SPRITES_MOVED, (1<<0) .equ PDRAW_WND_DIFF_PRIO, (1<<1) -.equ PDRAW_ACC_SPRITES, (1<<2) +.equ PDRAW_PARSE_SPRITES, (1<<2) .equ PDRAW_DIRTY_SPRITES, (1<<4) .equ PDRAW_PLANE_HI_PRIO, (1<<6) .equ PDRAW_SHHI_DONE, (1<<7) diff --git a/pico/pico.h b/pico/pico.h index 1a60ce34..efc30e5f 100644 --- a/pico/pico.h +++ b/pico/pico.h @@ -196,10 +196,11 @@ void vidConvCpyRGB565(void *to, void *from, int pixels); #endif void PicoDoHighPal555(int sh, int line, struct PicoEState *est); // internals -#define PDRAW_SPRITES_MOVED (1<<0) // (asm) +#define PDRAW_SPRITES_MOVED (1<<0) // SAT address modified #define PDRAW_WND_DIFF_PRIO (1<<1) // not all window tiles use same priority +#define PDRAW_PARSE_SPRITES (1<<2) // SAT needs parsing #define PDRAW_INTERLACE (1<<3) -#define PDRAW_DIRTY_SPRITES (1<<4) // (asm) +#define PDRAW_DIRTY_SPRITES (1<<4) // SAT modified #define PDRAW_SONIC_MODE (1<<5) // mid-frame palette changes for 8bit renderer #define PDRAW_PLANE_HI_PRIO (1<<6) // have layer with all hi prio tiles (mk3) #define PDRAW_SHHI_DONE (1<<7) // layer sh/hi already processed From c9183791372cea2c39d0233186496c26f5c7cf3e Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 7 Apr 2020 22:07:38 +0200 Subject: [PATCH 0300/1110] ym2612 ARM optimisations --- pico/sound/ym2612_arm.S | 200 +++++++++++++++++----------------------- 1 file changed, 83 insertions(+), 117 deletions(-) diff --git a/pico/sound/ym2612_arm.S b/pico/sound/ym2612_arm.S index 59abb74e..1370e6cf 100644 --- a/pico/sound/ym2612_arm.S +++ b/pico/sound/ym2612_arm.S @@ -15,8 +15,8 @@ #include "../arm_features.h" -@ very simple adaption YM2612 output rate to sample rate (~1M cycles @44100) -//#define INTERPOL +@ very simple YM2612 output rate to sample rate adaption (~500k cycles @44100) +#define INTERPOL .equiv SLOT1, 0 .equiv SLOT2, 2 @@ -44,7 +44,7 @@ @ r5=slot, r1=eg_cnt, trashes: r0,r2,r3 @ writes output to routp, but only if vol_out changes -.macro update_eg_phase_slot slot +.macro update_eg_phase_slot #if defined(INTERPOL) ldrh r0, [r5,#0x34] @ vol_out #endif @@ -190,21 +190,6 @@ ldrh r3, [r5,#0x18] @ tl add r0, r0, r3 @ volume += tl strh r0, [r5,#0x34] @ vol_out -.if \slot == SLOT1 - mov r6, r6, lsr #16 - orr r6, r0, r6, lsl #16 -.elseif \slot == SLOT2 - mov r6, r6, lsl #16 - mov r0, r0, lsl #16 - orr r6, r0, r6, lsr #16 -.elseif \slot == SLOT3 - mov r7, r7, lsr #16 - orr r7, r0, r7, lsl #16 -.elseif \slot == SLOT4 - mov r7, r7, lsl #16 - mov r0, r0, lsl #16 - orr r7, r0, r7, lsr #16 -.endif 0: @ EG_OFF .endm @@ -672,24 +657,16 @@ chan_render_loop: mov r11, r1 and r0, r0, #7 orr r4, r4, r0 @ (length<<8)|algo - add r0, lr, #0x44 - ldmia r0, {r8,r9} @ eg_timer, eg_timer_add + ldr r8, [lr, #0x44] @ eg_timer + ldr r9, [lr, #0x48] @ eg_timer_add ldr r10, [lr, #0x54] @ op1_out -@ ldmia lr, {r6,r7} @ load volumes - ldr r5, [lr, #0x40] @ CH - ldrh r6, [r5, #0x34] @ vol_out values for all slots - ldrh r2, [r5, #0x34+SLOT_STRUCT_SIZE*2] - ldrh r7, [r5, #0x34+SLOT_STRUCT_SIZE] - ldrh r3, [r5, #0x34+SLOT_STRUCT_SIZE*3] - orr r6, r6, r2, lsl #16 - orr r7, r7, r3, lsl #16 tst r12, #8 @ lfo? beq crl_loop crl_loop_lfo: - add r0, lr, #0x30 - ldmia r0, {r1,r2} @ lfo_cnt, lfo_inc + ldr r1, [lr, #0x30] @ lfo_cnt + ldr r2, [lr, #0x34] @ lfo_inc subs r4, r4, #0x100 bmi crl_loop_end @@ -707,37 +684,48 @@ crl_loop: bmi crl_loop_end @ -- SSG -- - add r0, lr, #0x3c - ldmia r0, {r1,r5} @ eg_cnt, CH + ldr r5, [lr, #0x40] @ CH @ r5=slot, trashes: r0,r2,r3 + mov r6, #4 +ssg_upd_loop: update_ssg_eg - add r5, r5, #SLOT_STRUCT_SIZE*2 @ SLOT2 (2) - update_ssg_eg - sub r5, r5, #SLOT_STRUCT_SIZE @ SLOT3 (1) - update_ssg_eg - add r5, r5, #SLOT_STRUCT_SIZE*2 @ SLOT4 (3) +#if 0 + subs r6, r6, #1 + addne r5, r5, #SLOT_STRUCT_SIZE +#else + add r5, r5, #SLOT_STRUCT_SIZE*2 update_ssg_eg + subs r6, r6, #2 + subne r5, r5, #SLOT_STRUCT_SIZE +#endif + bne ssg_upd_loop sub r5, r5, #SLOT_STRUCT_SIZE*3 @ -- EG -- add r8, r8, r9 cmp r8, #EG_TIMER_OVERFLOW bcc eg_done + ldr r1, [lr, #0x3c] @ eg_cnt eg_loop: sub r8, r8, #EG_TIMER_OVERFLOW add r1, r1, #1 cmp r1, #4096 movge r1, #1 - @ SLOT1 (0) - @ r5=slot, r1=eg_cnt, trashes: r0,r2,r3 - update_eg_phase_slot SLOT1 - add r5, r5, #SLOT_STRUCT_SIZE*2 @ SLOT2 (2) - update_eg_phase_slot SLOT2 - sub r5, r5, #SLOT_STRUCT_SIZE @ SLOT3 (1) - update_eg_phase_slot SLOT3 - add r5, r5, #SLOT_STRUCT_SIZE*2 @ SLOT4 (3) - update_eg_phase_slot SLOT4 + + mov r6, #4 +eg_upd_loop: + update_eg_phase_slot +#if 1 + subs r6, r6, #1 + addne r5, r5, #SLOT_STRUCT_SIZE +#else + add r5, r5, #SLOT_STRUCT_SIZE*2 + update_eg_phase_slot + subs r6, r6, #2 + subne r5, r5, #SLOT_STRUCT_SIZE +#endif + bne eg_upd_loop cmp r8, #EG_TIMER_OVERFLOW sub r5, r5, #SLOT_STRUCT_SIZE*3 @@ -754,64 +742,49 @@ eg_done: beq crl_loop @ output interpolation -#if 0 // too expensive on slow platforms +#if defined(INTERPOL) +#if 1 // possibly too expensive for slow platforms? @ basic interpolator, interpolate in middle region, else use closer value mov r3, r8, lsr #EG_SH @ eg_timer, [0..3<>EG_SH)/2 - bgt 0f @ mix is vol_out + bne 0f @ mix is vol_out - ldrh r0, [r5,#0x36] @ SLOT1 vol_ipol - lsleq r2, r6, #16 - addeq r0, r0, r2, lsr #16 - lsreq r0, r0, #1 - mov r6, r6, lsr #16 - orr r6, r0, r6, lsl #16 - - ldrh r0, [r5,#0x36+SLOT_STRUCT_SIZE*2] @ SLOT2 vol_ipol - addeq r0, r0, r6, lsr #16 - lsreq r0, r0, #1 - mov r6, r6, lsl #16 - orr r6, r6, r0 - ror r6, r6, #16 - - ldrh r0, [r5,#0x36+SLOT_STRUCT_SIZE] @ SLOT3 vol_ipol - lsleq r2, r7, #16 - addeq r0, r0, r2, lsr #16 - lsreq r0, r0, #1 - mov r7, r7, lsr #16 - orr r7, r0, r7, lsl #16 - - ldrh r0, [r5,#0x36+SLOT_STRUCT_SIZE*3] @ SLOT4 vol_ipol - addeq r0, r0, r7, lsr #16 - lsreq r0, r0, #1 - mov r7, r7, lsl #16 - orr r7, r7, r0 - ror r7, r7, #16 -#elif defined(INTERPOL) + ldr r6, [r5, #0x34] @ vol_out, vol_ipol for all slots + ldr r2, [r5, #0x34+SLOT_STRUCT_SIZE*2] + ldr r7, [r5, #0x34+SLOT_STRUCT_SIZE] + ldr r3, [r5, #0x34+SLOT_STRUCT_SIZE*3] + add r6, r6, r6, lsl #16 + lsr r6, r6, #17 + add r2, r2, r2, lsl #16 + lsr r2, r2, #17 + add r7, r7, r7, lsl #16 + lsr r7, r7, #17 + add r3, r3, r3, lsl #16 + lsr r3, r3, #17 + b 1f +#else @ super-basic... just take value closest to sample point mov r3, r8, lsr #EG_SH-1 @ eg_timer, [0..3<>EG_SH) - bge 0f @ mix is vol_out - - ldrh r0, [r5,#0x36] @ SLOT1 vol_ipol - mov r6, r6, lsr #16 - orr r6, r0, r6, lsl #16 - - ldrh r0, [r5,#0x36+SLOT_STRUCT_SIZE*2] @ SLOT2 vol_ipol - mov r6, r6, lsl #16 - orr r6, r6, r0 - ror r6, r6, #16 - - ldrh r0, [r5,#0x36+SLOT_STRUCT_SIZE] @ SLOT3 vol_ipol - mov r7, r7, lsr #16 - orr r7, r0, r7, lsl #16 - - ldrh r0, [r5,#0x36+SLOT_STRUCT_SIZE*3] @ SLOT4 vol_ipol - mov r7, r7, lsl #16 - orr r7, r7, r0 - ror r7, r7, #16 #endif -0: + +0: ldrgeh r6, [r5, #0x34] @ vol_out values for all slots + ldrlth r6, [r5, #0x36] @ vol_ipol values for all slots + ldrgeh r2, [r5, #0x34+SLOT_STRUCT_SIZE*2] + ldrlth r2, [r5, #0x36+SLOT_STRUCT_SIZE*2] + ldrgeh r7, [r5, #0x34+SLOT_STRUCT_SIZE] + ldrlth r7, [r5, #0x36+SLOT_STRUCT_SIZE] + ldrgeh r3, [r5, #0x34+SLOT_STRUCT_SIZE*3] + ldrlth r3, [r5, #0x36+SLOT_STRUCT_SIZE*3] + +#else + ldrh r6, [r5, #0x34] @ vol_out values for all slots + ldrh r2, [r5, #0x34+SLOT_STRUCT_SIZE*2] + ldrh r7, [r5, #0x34+SLOT_STRUCT_SIZE] + ldrh r3, [r5, #0x34+SLOT_STRUCT_SIZE*3] +#endif +1: orr r6, r6, r2, lsl #16 + orr r7, r7, r3, lsl #16 @ -- SLOT1 -- PIC_LDR(r3, r2, ym_tl_tab) @@ -893,34 +866,28 @@ crl_algo_done: strne r1, [r11], #4 b crl_do_phase -ctl_sample_skip: - and r1, r12, #1 - add r1, r1, #1 - add r11,r11, r1, lsl #2 - b crl_do_phase - ctl_sample_mono: ldr r1, [r11] add r1, r0, r1 str r1, [r11], #4 + b crl_do_phase + +ctl_sample_skip: + and r1, r12, #1 + add r1, r1, #1 + add r11,r11, r1, lsl #2 crl_do_phase: @ -- PHASE UPDATE -- add r5, lr, #0x10 - ldmia r5, {r0-r1} - add r5, lr, #0x20 - ldmia r5, {r2-r3} - add r5, lr, #0x10 - add r0, r0, r2 - add r1, r1, r3 - stmia r5!,{r0-r1} - ldmia r5, {r0-r1} - add r5, lr, #0x28 - ldmia r5, {r2-r3} - add r5, lr, #0x18 - add r0, r0, r2 - add r1, r1, r3 - stmia r5, {r0-r1} + ldmia r5, {r0-r3,r6-r7} + add r0, r0, r6 + add r1, r1, r7 + ldr r6, [r5, #0x18] + ldr r7, [r5, #0x1c] + add r2, r2, r6 + add r3, r3, r7 + stmia r5, {r0-r3} tst r12, #8 bne crl_loop_lfo @@ -928,7 +895,6 @@ crl_do_phase: crl_loop_end: -@ stmia lr, {r6,r7} @ save volumes (for debug) str r8, [lr, #0x44] @ eg_timer str r12, [lr, #0x4c] @ pack (for lfo_ampm) str r4, [lr, #0x50] @ was_update From 7c1c9c7742a09ba1a9a22b023d97c52af0e1e237 Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 7 Apr 2020 22:23:52 +0200 Subject: [PATCH 0301/1110] menu background fix for pal mode --- pico/pico.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pico/pico.c b/pico/pico.c index 87e22e59..579cdd0d 100644 --- a/pico/pico.c +++ b/pico/pico.c @@ -282,7 +282,7 @@ void PicoFrameDrawOnly(void) { if (!(PicoIn.AHW & PAHW_SMS)) { PicoFrameStart(); - PicoDrawSync(223, 0); + PicoDrawSync(Pico.m.pal?239:223, 0); } else { PicoFrameDrawOnlyMS(); } From 74cc7aebf6a63af0506d311353585329c00f616f Mon Sep 17 00:00:00 2001 From: kub Date: Mon, 13 Apr 2020 22:20:13 +0200 Subject: [PATCH 0302/1110] sh2 timer optimization --- cpu/sh2/sh2.h | 1 + pico/32x/32x.c | 10 ++++++++-- pico/32x/memory.c | 14 +++++++------- pico/32x/sh2soc.c | 44 ++++++++++++++++++++++---------------------- pico/pico_int.h | 3 ++- 5 files changed, 40 insertions(+), 32 deletions(-) diff --git a/cpu/sh2/sh2.h b/cpu/sh2/sh2.h index 2f2dfd92..aabe45be 100644 --- a/cpu/sh2/sh2.h +++ b/cpu/sh2/sh2.h @@ -48,6 +48,7 @@ typedef struct SH2_ #define SH2_STATE_CPOLL (1 << 2) // polling comm regs #define SH2_STATE_VPOLL (1 << 3) // polling VDP #define SH2_STATE_RPOLL (1 << 4) // polling address in SDRAM +#define SH2_TIMER_RUN (1 << 8) // SOC WDT timer is running unsigned int state; uint32_t poll_addr; int poll_cycles; diff --git a/pico/32x/32x.c b/pico/32x/32x.c index 0f0cc4f5..ddd03fa8 100644 --- a/pico/32x/32x.c +++ b/pico/32x/32x.c @@ -508,12 +508,18 @@ void sync_sh2s_normal(unsigned int m68k_target) now = ssh2.m68krcycles_done; } if (CYCLES_GT(now, timer_cycles+STEP_N)) { - p32x_timers_do(now - timer_cycles); + if (msh2.state & SH2_TIMER_RUN) + p32x_timer_do(&msh2, now - timer_cycles); + if (ssh2.state & SH2_TIMER_RUN) + p32x_timer_do(&ssh2, now - timer_cycles); timer_cycles = now; } } - p32x_timers_do(now - timer_cycles); + if (msh2.state & SH2_TIMER_RUN) + p32x_timer_do(&msh2, now - timer_cycles); + if (ssh2.state & SH2_TIMER_RUN) + p32x_timer_do(&ssh2, now - timer_cycles); timer_cycles = now; } pprof_end_sub(m68k); diff --git a/pico/32x/memory.c b/pico/32x/memory.c index f4f0a18b..3f597288 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -111,7 +111,7 @@ void p32x_m68k_poll_event(u32 flags) m68k_poll.addr1 = m68k_poll.addr2 = m68k_poll.cnt = 0; } -static void NOINLINE sh2_poll_detect(u32 a, SH2 *sh2, u32 flags, int maxcnt) +void NOINLINE p32x_sh2_poll_detect(u32 a, SH2 *sh2, u32 flags, int maxcnt) { u32 cycles_done = sh2_cycles_done_t(sh2); @@ -275,7 +275,7 @@ u32 REGPARM(3) p32x_sh2_poll_memory16(u32 a, u32 d, SH2 *sh2) d = (s16)sh2_poll_read(a, d, cycles, sh2); } - sh2_poll_detect(a, sh2, SH2_STATE_RPOLL, 5); + p32x_sh2_poll_detect(a, sh2, SH2_STATE_RPOLL, 5); DRC_RESTORE_SR(sh2); return d; @@ -296,7 +296,7 @@ u32 REGPARM(3) p32x_sh2_poll_memory32(u32 a, u32 d, SH2 *sh2) ((u16)sh2_poll_read(a+2, d, cycles, sh2)); } - sh2_poll_detect(a, sh2, SH2_STATE_RPOLL, 5); + p32x_sh2_poll_detect(a, sh2, SH2_STATE_RPOLL, 5); DRC_RESTORE_SR(sh2); return d; @@ -735,7 +735,7 @@ static u32 p32x_sh2reg_read16(u32 a, SH2 *sh2) return (r[0] & P32XS_FM) | Pico32x.sh2_regs[0] | Pico32x.sh2irq_mask[sh2->is_slave]; case 0x04/2: // H count (often as comm too) - sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 9); + p32x_sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 9); cycles = sh2_cycles_done_m68k(sh2); sh2s_sync_on_read(sh2, cycles); return sh2_poll_read(a, Pico32x.sh2_regs[4 / 2], cycles, sh2); @@ -769,7 +769,7 @@ static u32 p32x_sh2reg_read16(u32 a, SH2 *sh2) case 0x2a/2: case 0x2c/2: case 0x2e/2: - sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 9); + p32x_sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 9); cycles = sh2_cycles_done_m68k(sh2); sh2s_sync_on_read(sh2, cycles); return sh2_poll_read(a, r[a / 2], cycles, sh2); @@ -1456,7 +1456,7 @@ static u32 REGPARM(2) sh2_read8_cs0(u32 a, SH2 *sh2) if ((a & 0x3fff0) == 0x4100) { d = p32x_vdp_read16(a); - sh2_poll_detect(a, sh2, SH2_STATE_VPOLL, 9); + p32x_sh2_poll_detect(a, sh2, SH2_STATE_VPOLL, 9); goto out_16to8; } @@ -1519,7 +1519,7 @@ static u32 REGPARM(2) sh2_read16_cs0(u32 a, SH2 *sh2) if ((a & 0x3fff0) == 0x4100) { d = p32x_vdp_read16(a); - sh2_poll_detect(a, sh2, SH2_STATE_VPOLL, 9); + p32x_sh2_poll_detect(a, sh2, SH2_STATE_VPOLL, 9); goto out; } diff --git a/pico/32x/sh2soc.c b/pico/32x/sh2soc.c index cf11666d..8895d49b 100644 --- a/pico/32x/sh2soc.c +++ b/pico/32x/sh2soc.c @@ -209,6 +209,9 @@ void p32x_timers_recalc(void) // SH2 timer step for (i = 0; i < 2; i++) { + sh2s[i].state &= ~SH2_TIMER_RUN; + if (PREG8(sh2s[i].peri_regs, 0x80) & 0x20) // TME + sh2s[i].state |= SH2_TIMER_RUN; tmp = PREG8(sh2s[i].peri_regs, 0x80) & 7; // Sclk cycles per timer tick if (tmp) @@ -222,32 +225,29 @@ void p32x_timers_recalc(void) } } -void p32x_timers_do(unsigned int m68k_slice) +NOINLINE void p32x_timer_do(SH2 *sh2, unsigned int m68k_slice) { unsigned int cycles = m68k_slice * 3; - int cnt, i; + void *pregs = sh2->peri_regs; + int cnt; int i = sh2->is_slave; - // WDT timers - for (i = 0; i < 2; i++) { - void *pregs = sh2s[i].peri_regs; - if (PREG8(pregs, 0x80) & 0x20) { // TME - timer_cycles[i] += cycles; - // cnt = timer_cycles[i] / timer_tick_cycles[i]; - cnt = (1ULL * timer_cycles[i] * timer_tick_factor[i]) >> 32; - timer_cycles[i] -= timer_tick_cycles[i] * cnt; - if (timer_cycles[i] > timer_tick_cycles[i]) - timer_cycles[i] -= timer_tick_cycles[i], cnt++; - cnt += PREG8(pregs, 0x81); - if (cnt >= 0x100) { - int level = PREG8(pregs, 0xe3) >> 4; - int vector = PREG8(pregs, 0xe4) & 0x7f; - elprintf(EL_32XP, "%csh2 WDT irq (%d, %d)", - i ? 's' : 'm', level, vector); - sh2_internal_irq(&sh2s[i], level, vector); - cnt &= 0xff; - } - PREG8(pregs, 0x81) = cnt; + // WDT timer + timer_cycles[i] += cycles; + if (timer_cycles[i] > timer_tick_cycles[i]) { + // cnt = timer_cycles[i] / timer_tick_cycles[i]; + cnt = (1ULL * timer_cycles[i] * timer_tick_factor[i]) >> 32; + timer_cycles[i] -= timer_tick_cycles[i] * cnt; + + cnt += PREG8(pregs, 0x81); + if (cnt >= 0x100) { + int level = PREG8(pregs, 0xe3) >> 4; + int vector = PREG8(pregs, 0xe4) & 0x7f; + elprintf(EL_32XP, "%csh2 WDT irq (%d, %d)", + i ? 's' : 'm', level, vector); + sh2_internal_irq(sh2, level, vector); + cnt &= 0xff; } + PREG8(pregs, 0x81) = cnt; } } diff --git a/pico/pico_int.h b/pico/pico_int.h index 5fed483d..e4bd4c1e 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -977,6 +977,7 @@ unsigned int REGPARM(3) p32x_sh2_poll_memory8(unsigned int a, unsigned int d, SH unsigned int REGPARM(3) p32x_sh2_poll_memory16(unsigned int a, unsigned int d, SH2 *sh2); unsigned int REGPARM(3) p32x_sh2_poll_memory32(unsigned int a, unsigned int d, SH2 *sh2); void *p32x_sh2_get_mem_ptr(unsigned int a, unsigned int *mask, SH2 *sh2); +void p32x_sh2_poll_detect(unsigned int a, SH2 *sh2, unsigned int flags, int maxcnt); void p32x_sh2_poll_event(SH2 *sh2, unsigned int flags, unsigned int m68k_cycles); int p32x_sh2_memcpy(unsigned int dst, unsigned int src, int count, int size, SH2 *sh2); @@ -1012,7 +1013,7 @@ void p32x_pwm_state_loaded(void); void p32x_dreq0_trigger(void); void p32x_dreq1_trigger(void); void p32x_timers_recalc(void); -void p32x_timers_do(unsigned int m68k_slice); +void p32x_timer_do(SH2 *sh2, unsigned int m68k_slice); void sh2_peripheral_reset(SH2 *sh2); unsigned int REGPARM(2) sh2_peripheral_read8(unsigned int a, SH2 *sh2); unsigned int REGPARM(2) sh2_peripheral_read16(unsigned int a, SH2 *sh2); From bb0488a6ba014bb23eed311176aca4f019c567fa Mon Sep 17 00:00:00 2001 From: kub Date: Mon, 13 Apr 2020 22:22:33 +0200 Subject: [PATCH 0303/1110] 32x pwm, tiny optimization --- pico/32x/pwm.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/pico/32x/pwm.c b/pico/32x/pwm.c index 3e5ce0ae..ec4bdb3e 100644 --- a/pico/32x/pwm.c +++ b/pico/32x/pwm.c @@ -9,7 +9,7 @@ static struct { int cycles; - int mult; + unsigned mult; int ptr; int irq_reload; int doing_fifo; @@ -58,11 +58,11 @@ static void do_pwm_irq(SH2 *sh2, unsigned int m68k_cycles) static int convert_sample(unsigned int v) { - if (v == 0) - return 0; if (v > pwm.cycles) v = pwm.cycles; - return (v * 2 - pwm.cycles) / 2 * pwm.mult; + if (v == 0) + return 0; + return v * pwm.mult - 0x10000/2; } #define consume_fifo(sh2, m68k_cycles) { \ @@ -89,19 +89,21 @@ static void consume_fifo_do(SH2 *sh2, unsigned int m68k_cycles, // this is for recursion from dreq1 writes pwm.doing_fifo = 1; - for (; sh2_cycles_diff >= pwm.cycles; sh2_cycles_diff -= pwm.cycles) + while (sh2_cycles_diff >= pwm.cycles) { + sh2_cycles_diff -= pwm.cycles; + if (Pico32x.pwm_p[0] > 0) { mem->pwm_index[0] = (mem->pwm_index[0]+1) % 4; Pico32x.pwm_p[0]--; pwm.current[0] = convert_sample(fifo_l[mem->pwm_index[0]]); - sum |= pwm.current[0]; + sum |= (u16)pwm.current[0]; } if (Pico32x.pwm_p[1] > 0) { mem->pwm_index[1] = (mem->pwm_index[1]+1) % 4; Pico32x.pwm_p[1]--; pwm.current[1] = convert_sample(fifo_r[mem->pwm_index[1]]); - sum |= pwm.current[1]; + sum |= (u16)pwm.current[1]; } mem->pwm[pwm.ptr * 2 ] = pwm.current[0]; @@ -234,9 +236,7 @@ void p32x_pwm_write16(unsigned int a, unsigned int d, fifo = Pico32xMem->pwm_fifo[1]; idx = Pico32xMem->pwm_index[1]; if (Pico32x.pwm_p[1] < 3) { - if (pwm.irq_state == PWM_IRQ_STOPPED) - pwm.irq_state = PWM_IRQ_LOW; - if (Pico32x.pwm_p[1] == 2 && pwm.irq_state >= PWM_IRQ_LOW) { + if (Pico32x.pwm_p[1] == 2 && pwm.irq_state >= PWM_IRQ_STOPPED) { // buffer full. If there was no buffer underrun after last fill, // try increasing reload rate to reduce IRQs if (pwm.irq_reload < 3 && pwm.irq_state == PWM_IRQ_HIGH) @@ -250,7 +250,7 @@ void p32x_pwm_write16(unsigned int a, unsigned int d, pwm.irq_reload = pwm.irq_timer; pwm.irq_state = PWM_IRQ_LOCKED; idx = (idx+1) % 4; - Pico32xMem->pwm_index[0] = idx; + Pico32xMem->pwm_index[1] = idx; } fifo[(idx+Pico32x.pwm_p[1]) % 4] = (d - 1) & 0x0fff; if (a != 8) break; // fallthrough if MONO From 9a02334f3af6dcd479c471b70a1dc2cf909b4652 Mon Sep 17 00:00:00 2001 From: kub Date: Mon, 13 Apr 2020 22:26:15 +0200 Subject: [PATCH 0304/1110] add sh2 ubc area to poll detection --- pico/32x/sh2soc.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/pico/32x/sh2soc.c b/pico/32x/sh2soc.c index 8895d49b..369fc0de 100644 --- a/pico/32x/sh2soc.c +++ b/pico/32x/sh2soc.c @@ -273,9 +273,14 @@ u32 REGPARM(2) sh2_peripheral_read8(u32 a, SH2 *sh2) a &= 0x1ff; d = PREG8(r, a); - sh2->poll_cnt = 0; elprintf_sh2(sh2, EL_32XP, "peri r8 [%08x] %02x @%06x", a | ~0x1ff, d, sh2_pc(sh2)); + if ((a & 0x1c0) == 0x140) { + // abused as comm area + DRC_SAVE_SR(sh2); + p32x_sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 3); + DRC_RESTORE_SR(sh2); + } return d; } @@ -287,9 +292,14 @@ u32 REGPARM(2) sh2_peripheral_read16(u32 a, SH2 *sh2) a &= 0x1fe; d = r[(a / 2) ^ 1]; - sh2->poll_cnt = 0; elprintf_sh2(sh2, EL_32XP, "peri r16 [%08x] %04x @%06x", a | ~0x1ff, d, sh2_pc(sh2)); + if ((a & 0x1c0) == 0x140) { + // abused as comm area + DRC_SAVE_SR(sh2); + p32x_sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 3); + DRC_RESTORE_SR(sh2); + } return d; } @@ -300,9 +310,14 @@ u32 REGPARM(2) sh2_peripheral_read32(u32 a, SH2 *sh2) a &= 0x1fc; d = sh2->peri_regs[a / 4]; - sh2->poll_cnt = 0; elprintf_sh2(sh2, EL_32XP, "peri r32 [%08x] %08x @%06x", a | ~0x1ff, d, sh2_pc(sh2)); + if ((a & 0x1c0) == 0x140) { + // abused as comm area + DRC_SAVE_SR(sh2); + p32x_sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 3); + DRC_RESTORE_SR(sh2); + } return d; } @@ -378,6 +393,9 @@ void REGPARM(3) sh2_peripheral_write8(u32 a, u32 d, SH2 *sh2) break; } PREG8(r, a) = d; + + if ((a & 0x1c0) == 0x140) + p32x_sh2_poll_event(sh2, SH2_STATE_CPOLL, SekCyclesDone()); } void REGPARM(3) sh2_peripheral_write16(u32 a, u32 d, SH2 *sh2) @@ -400,6 +418,8 @@ void REGPARM(3) sh2_peripheral_write16(u32 a, u32 d, SH2 *sh2) } r[(a / 2) ^ 1] = d; + if ((a & 0x1c0) == 0x140) + p32x_sh2_poll_event(sh2, SH2_STATE_CPOLL, SekCyclesDone()); } void REGPARM(3) sh2_peripheral_write32(u32 a, u32 d, SH2 *sh2) @@ -457,14 +477,15 @@ void REGPARM(3) sh2_peripheral_write32(u32 a, u32 d, SH2 *sh2) if (!(dmac->dmaor & DMA_DME)) return; - DRC_SAVE_SR(sh2); if ((dmac->chan[0].chcr & (DMA_TE|DMA_DE)) == DMA_DE) dmac_trigger(sh2, &dmac->chan[0]); if ((dmac->chan[1].chcr & (DMA_TE|DMA_DE)) == DMA_DE) dmac_trigger(sh2, &dmac->chan[1]); - DRC_RESTORE_SR(sh2); break; } + + if ((a & 0x1c0) == 0x140) + p32x_sh2_poll_event(sh2, SH2_STATE_CPOLL, SekCyclesDone()); } /* 32X specific */ From 2eb213314aee4d6865cc0b171ebbee31ac66a4c8 Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 22 Apr 2020 20:29:53 +0200 Subject: [PATCH 0305/1110] sh2, optimizations to innermost run loop --- cpu/sh2/sh2.h | 17 +++++++++-------- pico/32x/32x.c | 13 ++++++++----- pico/pico_int.h | 9 ++++----- 3 files changed, 21 insertions(+), 18 deletions(-) diff --git a/cpu/sh2/sh2.h b/cpu/sh2/sh2.h index aabe45be..b0054c05 100644 --- a/cpu/sh2/sh2.h +++ b/cpu/sh2/sh2.h @@ -75,6 +75,7 @@ typedef struct SH2_ unsigned int cycles_timeslice; struct SH2_ *other_sh2; + int (*run)(struct SH2_ *, int); // we use 68k reference cycles for easier sync unsigned int m68krcycles_done; @@ -82,7 +83,7 @@ typedef struct SH2_ unsigned int mult_sh2_to_m68k; uint8_t data_array[0x1000]; // cache (can be used as RAM) - uint32_t peri_regs[0x200/4]; // periphereal regs + uint32_t peri_regs[0x200/4]; // peripheral regs } SH2; #define CYCLE_MULT_SHIFT 10 @@ -103,17 +104,17 @@ void sh2_unpack(SH2 *sh2, const unsigned char *buff); int sh2_execute_drc(SH2 *sh2c, int cycles); int sh2_execute_interpreter(SH2 *sh2c, int cycles); -static __inline int sh2_execute(SH2 *sh2, int cycles, int use_drc) +static __inline void sh2_execute_prepare(SH2 *sh2, int use_drc) +{ + sh2->run = use_drc ? sh2_execute_drc : sh2_execute_interpreter; +} + +static __inline int sh2_execute(SH2 *sh2, int cycles) { int ret; sh2->cycles_timeslice = cycles; -#ifdef DRC_SH2 - if (use_drc) - ret = sh2_execute_drc(sh2, cycles); - else -#endif - ret = sh2_execute_interpreter(sh2, cycles); + ret = sh2->run(sh2, cycles); return sh2->cycles_timeslice - ret; } diff --git a/pico/32x/32x.c b/pico/32x/32x.c index ddd03fa8..3b889648 100644 --- a/pico/32x/32x.c +++ b/pico/32x/32x.c @@ -383,7 +383,7 @@ static void run_sh2(SH2 *sh2, unsigned int m68k_cycles) elprintf_sh2(sh2, EL_32X, "+run %u %d @%08x", sh2->m68krcycles_done, cycles, sh2->pc); - done = sh2_execute(sh2, cycles, PicoIn.opt & POPT_EN_DRC); + done = sh2_execute(sh2, cycles); sh2->m68krcycles_done += C_SH2_TO_M68K(sh2, done); sh2->state &= ~SH2_STATE_RUN; @@ -499,12 +499,12 @@ void sync_sh2s_normal(unsigned int m68k_target) pprof_end(msh2); now = next; - if (!(msh2.state & SH2_IDLE_STATES)) { - if (CYCLES_GT(now, msh2.m68krcycles_done)) + if (CYCLES_GT(now, msh2.m68krcycles_done)) { + if (!(msh2.state & SH2_IDLE_STATES)) now = msh2.m68krcycles_done; } - if (!(ssh2.state & SH2_IDLE_STATES)) { - if (CYCLES_GT(now, ssh2.m68krcycles_done)) + if (CYCLES_GT(now, ssh2.m68krcycles_done)) { + if (!(ssh2.state & SH2_IDLE_STATES)) now = ssh2.m68krcycles_done; } if (CYCLES_GT(now, timer_cycles+STEP_N)) { @@ -571,6 +571,9 @@ void sync_sh2s_lockstep(unsigned int m68k_target) void PicoFrame32x(void) { + sh2_execute_prepare(&msh2, PicoIn.opt & POPT_EN_DRC); + sh2_execute_prepare(&ssh2, PicoIn.opt & POPT_EN_DRC); + Pico.m.scanline = 0; Pico32x.vdp_regs[0x0a/2] &= ~P32XV_VBLK; // get out of vblank diff --git a/pico/pico_int.h b/pico/pico_int.h index e4bd4c1e..8a4aa309 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -235,11 +235,10 @@ extern SH2 sh2s[2]; # define sh2_pc(sh2) (sh2)->ppc #else # define sh2_end_run(sh2, after_) do { \ - int left_ = (signed int)(sh2)->sr >> 12; \ - if (left_ > (after_)) { \ - (sh2)->cycles_timeslice -= left_ - (after_); \ - (sh2)->sr &= 0xfff; \ - (sh2)->sr |= (after_) << 12; \ + int left_ = ((signed int)(sh2)->sr >> 12) - (after_); \ + if (left_ > 0) { \ + (sh2)->cycles_timeslice -= left_; \ + (sh2)->sr -= (left_ << 12); \ } \ } while (0) # define sh2_cycles_left(sh2) ((signed int)(sh2)->sr >> 12) From 6432fb18baa0f9b6f41a68301c841dd58dc56cd4 Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 22 Apr 2020 20:34:20 +0200 Subject: [PATCH 0306/1110] 32x, small improvement for poll detection --- pico/32x/memory.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pico/32x/memory.c b/pico/32x/memory.c index 3f597288..69f70318 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -114,12 +114,13 @@ void p32x_m68k_poll_event(u32 flags) void NOINLINE p32x_sh2_poll_detect(u32 a, SH2 *sh2, u32 flags, int maxcnt) { u32 cycles_done = sh2_cycles_done_t(sh2); + u32 cycles_diff = cycles_done - sh2->poll_cycles; // reading 2 consecutive 16bit values is probably a 32bit access. detect this // by checking address (max 2 bytes away) and cycles (max 2 cycles later). // no polling if more than 20 cycles have passed since last detect call. - if (a - sh2->poll_addr <= 2 && CYCLES_GE(sh2->poll_cycles+20, cycles_done)) { - if (CYCLES_GT(cycles_done,sh2->poll_cycles+2) && ++sh2->poll_cnt >= maxcnt) { + if (a - sh2->poll_addr <= 2 && CYCLES_GE(20, cycles_diff)) { + if (CYCLES_GT(cycles_diff, 2) && ++sh2->poll_cnt >= maxcnt) { if (!(sh2->state & flags)) elprintf_sh2(sh2, EL_32X, "state: %02x->%02x", sh2->state, sh2->state | flags); From 09b96f9940533dc94d04f415332626ba3b72ebb5 Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 22 Apr 2020 20:41:51 +0200 Subject: [PATCH 0307/1110] audio: improve cycle accuracy of SN76496+YM2612 --- pico/memory.c | 7 +++-- pico/pico_int.h | 12 ++++----- pico/sms.c | 2 +- pico/sound/sound.c | 67 +++++++++++++++------------------------------- 4 files changed, 32 insertions(+), 56 deletions(-) diff --git a/pico/memory.c b/pico/memory.c index 0fa7b8de..e1afb4db 100644 --- a/pico/memory.c +++ b/pico/memory.c @@ -391,7 +391,7 @@ static int get_scanline(int is_from_z80); static void psg_write_68k(u32 d) { // look for volume write and update if needed - if ((d & 0x90) == 0x90 && Pico.snd.psg_line < Pico.m.scanline) + if ((d & 0x90) == 0x90) PsndDoPSG(Pico.m.scanline); SN76496Write(d); @@ -401,8 +401,7 @@ static void psg_write_z80(u32 d) { if ((d & 0x90) == 0x90) { int scanline = get_scanline(1); - if (Pico.snd.psg_line < scanline) - PsndDoPSG(scanline); + PsndDoPSG(scanline); } SN76496Write(d); @@ -1061,7 +1060,7 @@ static int ym2612_write_local(u32 a, u32 d, int is_from_z80) if (PicoIn.opt & POPT_EXT_FM) return YM2612Write_940(a, d, get_scanline(is_from_z80)); #endif - PsndDoFM(get_scanline(is_from_z80)); + PsndDoFM(is_from_z80 ? z80_cyclesDone() : z80_cycles_from_68k()); return YM2612Write_(a, d); } diff --git a/pico/pico_int.h b/pico/pico_int.h index 8a4aa309..7539379a 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -436,12 +436,12 @@ struct PicoSound short len_use; // adjusted int len_e_add; // for non-int samples/frame int len_e_cnt; - int dac_val, dac_val2; // last DAC sample - unsigned int dac_mult; // z80 clocks per line in Q16 - unsigned int dac_pos; // last DAC position in Q16 - short psg_line; - unsigned int fm_mult; // samples per line in Q16 - unsigned int fm_pos; // last FM position in Q16 + unsigned int clkl_mult; // z80 clocks per line in Q20 + unsigned int smpl_mult; // samples per line in Q16 + short dac_val, dac_val2; // last DAC sample + unsigned int dac_pos; // last DAC position in Q20 + unsigned int fm_pos; // last FM position in Q20 + unsigned int psg_pos; // last PSG position in Q16 }; // run tools/mkoffsets pico/pico_int_offs.h if you change these diff --git a/pico/sms.c b/pico/sms.c index b016f197..901f2f55 100644 --- a/pico/sms.c +++ b/pico/sms.c @@ -152,7 +152,7 @@ static void z80_sms_out(unsigned short a, unsigned char d) case 0x40: case 0x41: - if ((d & 0x90) == 0x90 && Pico.snd.psg_line < Pico.m.scanline) + if ((d & 0x90) == 0x90); PsndDoPSG(Pico.m.scanline); SN76496Write(d); break; diff --git a/pico/sound/sound.c b/pico/sound/sound.c index 57d9c2e5..eb10f36b 100644 --- a/pico/sound/sound.c +++ b/pico/sound/sound.c @@ -19,9 +19,6 @@ void (*PsndMix_32_to_16l)(short *dest, int *src, int count) = mix_32_to_16l_ster // master int buffer to mix to static int PsndBuffer[2*(44100+100)/50]; -// dac, psg -static unsigned short dac_info[312+4]; // pos in sample buffer - // cdda output buffer short cdda_out_buffer[2*1152]; @@ -29,23 +26,6 @@ short cdda_out_buffer[2*1152]; extern int *sn76496_regs; -static void dac_recalculate(void) -{ - int lines = Pico.m.pal ? 313 : 262; - int i, pos; - - pos = 0; // Q16 - - for(i = 0; i <= lines; i++) - { - dac_info[i] = ((pos+0x8000) >> 16); // round to nearest - pos += Pico.snd.fm_mult; - } - for (i = lines+1; i < sizeof(dac_info) / sizeof(dac_info[0]); i++) - dac_info[i] = dac_info[i-1]; -} - - PICO_INTERNAL void PsndReset(void) { // PsndRerate calls YM2612Init, which also resets @@ -88,12 +68,9 @@ void PsndRerate(int preserve_state) Pico.snd.len_e_cnt = 0; // Q16 // samples per line (Q16) - Pico.snd.fm_mult = 65536LL * PicoIn.sndRate / (target_fps*target_lines); + Pico.snd.smpl_mult = 65536LL * PicoIn.sndRate / (target_fps*target_lines); // samples per z80 clock (Q20) - Pico.snd.dac_mult = 16 * Pico.snd.fm_mult * 15/7 / 488; - - // recalculate dac info - dac_recalculate(); + Pico.snd.clkl_mult = 16 * Pico.snd.smpl_mult * 15/7 / 488; // clear all buffers memset32(PsndBuffer, 0, sizeof(PsndBuffer)/4); @@ -118,8 +95,6 @@ PICO_INTERNAL void PsndStartFrame(void) Pico.snd.len_e_cnt -= 0x10000; Pico.snd.len_use++; } - - Pico.snd.psg_line = 0; } PICO_INTERNAL void PsndDoDAC(int cyc_to) @@ -128,7 +103,7 @@ PICO_INTERNAL void PsndDoDAC(int cyc_to) int dout = ym2612.dacout; // number of samples to fill in buffer (Q20) - len = (cyc_to * Pico.snd.dac_mult) - Pico.snd.dac_pos; + len = (cyc_to * Pico.snd.clkl_mult) - Pico.snd.dac_pos; // update position and calculate buffer offset and length pos = (Pico.snd.dac_pos+0x80000) >> 20; @@ -163,17 +138,18 @@ PICO_INTERNAL void PsndDoDAC(int cyc_to) PICO_INTERNAL void PsndDoPSG(int line_to) { - int line_from = Pico.snd.psg_line; - int pos, pos1, len; + int pos, len; int stereo = 0; - pos = dac_info[line_from]; - pos1 = dac_info[line_to + 1]; - len = pos1 - pos; + // Q16, number of samples since last call + len = ((line_to+1) * Pico.snd.smpl_mult) - Pico.snd.psg_pos; if (len <= 0) return; - Pico.snd.psg_line = line_to + 1; + // update position and calculate buffer offset and length + pos = (Pico.snd.psg_pos+0x8000) >> 16; + Pico.snd.psg_pos += len; + len = ((Pico.snd.psg_pos+0x8000) >> 16) - pos; if (!PicoIn.sndOut || !(PicoIn.opt & POPT_EN_PSG)) return; @@ -185,22 +161,22 @@ PICO_INTERNAL void PsndDoPSG(int line_to) SN76496Update(PicoIn.sndOut + pos, len, stereo); } -PICO_INTERNAL void PsndDoFM(int line_to) +PICO_INTERNAL void PsndDoFM(int cyc_to) { int pos, len; int stereo = 0; // Q16, number of samples since last call - len = ((line_to-1) * Pico.snd.fm_mult) - Pico.snd.fm_pos; + len = (cyc_to * Pico.snd.clkl_mult) - Pico.snd.fm_pos; // don't do this too often (about every 4th scanline) - if (len >> 16 <= PicoIn.sndRate >> 12) + if (len >> 20 <= PicoIn.sndRate >> 12) return; // update position and calculate buffer offset and length - pos = (Pico.snd.fm_pos+0x8000) >> 16; + pos = (Pico.snd.fm_pos+0x80000) >> 20; Pico.snd.fm_pos += len; - len = ((Pico.snd.fm_pos+0x8000) >> 16) - pos; + len = ((Pico.snd.fm_pos+0x80000) >> 20) - pos; // fill buffer if (PicoIn.opt & POPT_EN_STEREO) { @@ -273,7 +249,7 @@ PICO_INTERNAL void PsndClear(void) if (!(PicoIn.opt & POPT_EN_FM)) memset32(PsndBuffer, 0, PicoIn.opt & POPT_EN_STEREO ? len*2 : len); // drop pos remainder to avoid rounding errors (not entirely correct though) - Pico.snd.dac_pos = Pico.snd.fm_pos = 0; + Pico.snd.dac_pos = Pico.snd.fm_pos = Pico.snd.psg_pos = 0; } @@ -281,7 +257,7 @@ static int PsndRender(int offset, int length) { int *buf32; int stereo = (PicoIn.opt & 8) >> 3; - int fmlen = ((Pico.snd.fm_pos+0x8000) >> 16); + int fmlen = ((Pico.snd.fm_pos+0x80000) >> 20); int daclen = ((Pico.snd.dac_pos+0x80000) >> 20); buf32 = PsndBuffer+(offset< 0) { short *dacbuf = PicoIn.sndOut + (daclen << stereo); Pico.snd.dac_pos += (length-daclen) << 20; - for (; length-daclen > 0; daclen++) { + *dacbuf++ += Pico.snd.dac_val2; + if (stereo) dacbuf++; + for (daclen++; length-daclen > 0; daclen++) { *dacbuf++ += Pico.snd.dac_val; if (stereo) dacbuf++; } + Pico.snd.dac_val2 = Pico.snd.dac_val; } // Add in parts of the FM buffer not yet done if (length-fmlen > 0) { int *fmbuf = buf32 + ((fmlen-offset) << stereo); - Pico.snd.fm_pos += (length-fmlen) << 16; + Pico.snd.fm_pos += (length-fmlen) << 20; if (PicoIn.opt & POPT_EN_FM) YM2612UpdateOne(fmbuf, length-fmlen, stereo, 1); } @@ -344,8 +323,6 @@ PICO_INTERNAL void PsndGetSamples(int y) { static int curr_pos = 0; - if (ym2612.dacen) - PsndDoDAC(cycles_68k_to_z80(Pico.t.m68c_aim - Pico.t.m68c_frame_start)); PsndDoPSG(y - 1); curr_pos = PsndRender(0, Pico.snd.len_use); From 1dbda5f894f61369355d0720ebc86ddda5c5463f Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 22 Apr 2020 20:48:03 +0200 Subject: [PATCH 0308/1110] audio: fixes and optimizations for SSG-EG --- pico/sound/ym2612.c | 104 +++++++++++++++++++++++++++------------- pico/sound/ym2612.h | 1 + pico/sound/ym2612_arm.S | 94 +++++++++++++++++++++++++++++++----- 3 files changed, 153 insertions(+), 46 deletions(-) diff --git a/pico/sound/ym2612.c b/pico/sound/ym2612.c index af381fb0..cb4f8c7d 100644 --- a/pico/sound/ym2612.c +++ b/pico/sound/ym2612.c @@ -128,7 +128,7 @@ extern YM2612 *ym2612_940; #endif -void memset32(int *dest, int c, int count); +void memset32(void *dest, int c, int count); #ifndef __GNUC__ @@ -511,7 +511,7 @@ static INT32 lfo_pm_table[128*8*32]; /* 128 combinations of 7 bits meaningful (o but LFO works with one more bit of a precision so we really need 4096 elements */ static UINT32 fn_table[4096]; /* fnumber->increment counter */ -static int g_lfo_ampm = 0; +static int g_lfo_ampm; /* register number to channel number , slot offset */ #define OPN_CHAN(N) (N&3) @@ -569,7 +569,7 @@ INLINE void FM_KEYON(int c , int s ) } else { SLOT->volume = MIN_ATT_INDEX; } - recalc_volout(SLOT); +// recalc_volout(SLOT); ym2612.slot_mask |= (1<tl = (v&0x7f)<<(ENV_BITS-7); /* 7bit TL */ - if (SLOT->state > EG_REL) - recalc_volout(SLOT); +// if (SLOT->state > EG_REL) +// recalc_volout(SLOT); } /* set attack rate & key scale */ @@ -761,7 +761,7 @@ INLINE int advance_lfo(int lfo_ampm, UINT32 lfo_cnt_old, UINT32 lfo_cnt) return lfo_ampm; } -INLINE void update_eg_phase(FM_SLOT *SLOT, UINT32 eg_cnt) +INLINE void update_eg_phase(FM_SLOT *SLOT, UINT32 eg_cnt, UINT32 ssg_en) { INT32 volume = SLOT->volume; UINT32 pack = SLOT->eg_pack[SLOT->state - 1]; @@ -774,7 +774,7 @@ INLINE void update_eg_phase(FM_SLOT *SLOT, UINT32 eg_cnt) eg_inc_val = pack >> ((eg_cnt >> shift) & 7) * 3; eg_inc_val = (1 << (eg_inc_val & 7)) >> 1; - if (SLOT->ssg&0x08) { + if ((SLOT->ssg&0x08) && ssg_en) { switch (SLOT->state) { case EG_ATT: /* attack phase */ @@ -854,7 +854,7 @@ INLINE void update_eg_phase(FM_SLOT *SLOT, UINT32 eg_cnt) SLOT->volume = volume; } -INLINE void update_ssg_eg_phase(FM_SLOT *SLOT) +INLINE UINT32 update_ssg_eg_phase(FM_SLOT *SLOT, UINT32 phase) { if (SLOT->ssg&0x01) { if (SLOT->ssg&0x02) { @@ -869,7 +869,7 @@ INLINE void update_ssg_eg_phase(FM_SLOT *SLOT) SLOT->ssg ^= 4; SLOT->ssgn ^= 4; } else - SLOT->phase = 0; + phase = 0; if (SLOT->state != EG_ATT) { SLOT->state = (SLOT->sl == MIN_ATT_INDEX) ? EG_SUS : EG_DEC; @@ -880,7 +880,8 @@ INLINE void update_ssg_eg_phase(FM_SLOT *SLOT) } } } - recalc_volout(SLOT); +// recalc_volout(SLOT); + return phase; } #endif @@ -927,15 +928,23 @@ static void chan_render_loop(chan_rend_context *ct, int *buffer, int length) int smp = 0; /* produced sample */ unsigned int eg_out, eg_out2, eg_out4; FM_SLOT *SLOT; + UINT32 cnt = ct->eg_timer_add+(ct->eg_timer & ((1<CH->SLOT[SLOT1]; - if ((SLOT->ssg&0x08) && SLOT->state > EG_REL && SLOT->volume >= 0x200) update_ssg_eg_phase(SLOT); - SLOT = &ct->CH->SLOT[SLOT2]; - if ((SLOT->ssg&0x08) && SLOT->state > EG_REL && SLOT->volume >= 0x200) update_ssg_eg_phase(SLOT); - SLOT = &ct->CH->SLOT[SLOT3]; - if ((SLOT->ssg&0x08) && SLOT->state > EG_REL && SLOT->volume >= 0x200) update_ssg_eg_phase(SLOT); - SLOT = &ct->CH->SLOT[SLOT4]; - if ((SLOT->ssg&0x08) && SLOT->state > EG_REL && SLOT->volume >= 0x200) update_ssg_eg_phase(SLOT); + if (ct->pack & 2) while (cnt >= 1<CH->SLOT[SLOT1]; + if ((SLOT->ssg&0x08) && SLOT->state > EG_REL && SLOT->volume >= 0x200) + ct->phase1 = update_ssg_eg_phase(SLOT, ct->phase1); + SLOT = &ct->CH->SLOT[SLOT2]; + if ((SLOT->ssg&0x08) && SLOT->state > EG_REL && SLOT->volume >= 0x200) + ct->phase2 = update_ssg_eg_phase(SLOT, ct->phase2); + SLOT = &ct->CH->SLOT[SLOT3]; + if ((SLOT->ssg&0x08) && SLOT->state > EG_REL && SLOT->volume >= 0x200) + ct->phase3 = update_ssg_eg_phase(SLOT, ct->phase3); + SLOT = &ct->CH->SLOT[SLOT4]; + if ((SLOT->ssg&0x08) && SLOT->state > EG_REL && SLOT->volume >= 0x200) + ct->phase4 = update_ssg_eg_phase(SLOT, ct->phase4); + } if (ct->pack & 8) { /* LFO enabled ? (test Earthworm Jim in between demo 1 and 2) */ ct->pack = (ct->pack&0xffff) | (advance_lfo(ct->pack >> 16, ct->lfo_cnt, ct->lfo_cnt + ct->lfo_inc) << 16); @@ -943,7 +952,21 @@ static void chan_render_loop(chan_rend_context *ct, int *buffer, int length) } ct->eg_timer += ct->eg_timer_add; - while (ct->eg_timer >= EG_TIMER_OVERFLOW) + if (ct->eg_timer < EG_TIMER_OVERFLOW) { + SLOT = &ct->CH->SLOT[SLOT1]; + SLOT->vol_ipol = SLOT->vol_out; + if (SLOT->state > EG_REL) recalc_volout(SLOT); + SLOT = &ct->CH->SLOT[SLOT2]; + SLOT->vol_ipol = SLOT->vol_out; + if (SLOT->state > EG_REL) recalc_volout(SLOT); + SLOT = &ct->CH->SLOT[SLOT3]; + SLOT->vol_ipol = SLOT->vol_out; + if (SLOT->state > EG_REL) recalc_volout(SLOT); + SLOT = &ct->CH->SLOT[SLOT4]; + SLOT->vol_ipol = SLOT->vol_out; + if (SLOT->state > EG_REL) recalc_volout(SLOT); + } + else while (ct->eg_timer >= EG_TIMER_OVERFLOW) { ct->eg_timer -= EG_TIMER_OVERFLOW; ct->eg_cnt++; @@ -951,17 +974,18 @@ static void chan_render_loop(chan_rend_context *ct, int *buffer, int length) SLOT = &ct->CH->SLOT[SLOT1]; SLOT->vol_ipol = SLOT->vol_out; - if (SLOT->state != EG_OFF) update_eg_phase(SLOT, ct->eg_cnt); + if (SLOT->state != EG_OFF) update_eg_phase(SLOT, ct->eg_cnt, ct->pack & 2); SLOT = &ct->CH->SLOT[SLOT2]; SLOT->vol_ipol = SLOT->vol_out; - if (SLOT->state != EG_OFF) update_eg_phase(SLOT, ct->eg_cnt); + if (SLOT->state != EG_OFF) update_eg_phase(SLOT, ct->eg_cnt, ct->pack & 2); SLOT = &ct->CH->SLOT[SLOT3]; SLOT->vol_ipol = SLOT->vol_out; - if (SLOT->state != EG_OFF) update_eg_phase(SLOT, ct->eg_cnt); + if (SLOT->state != EG_OFF) update_eg_phase(SLOT, ct->eg_cnt, ct->pack & 2); SLOT = &ct->CH->SLOT[SLOT4]; SLOT->vol_ipol = SLOT->vol_out; - if (SLOT->state != EG_OFF) update_eg_phase(SLOT, ct->eg_cnt); + if (SLOT->state != EG_OFF) update_eg_phase(SLOT, ct->eg_cnt, ct->pack & 2); } + #if 0 UINT32 ifrac0 = ct->eg_timer / (EG_TIMER_OVERFLOW>>EG_SH); UINT32 ifrac1 = (1<CH->SLOT[SLOT3].vol_out) >> 1; ct->vol_out4 = (ct->CH->SLOT[SLOT4].vol_ipol + ct->CH->SLOT[SLOT4].vol_out) >> 1; + break; } #elif 0 if (ct->eg_timer >> (EG_SH-1) < EG_TIMER_OVERFLOW >> EG_SH) { @@ -1272,7 +1297,7 @@ static int chan_render(int *buffer, int length, int c, UINT32 flags) // flags: s crct.mem = crct.CH->mem_value; /* one sample delay memory */ crct.lfo_cnt = ym2612.OPN.lfo_cnt; - flags &= 0x35; + flags &= 0x37; if (crct.lfo_inc) { flags |= 8; @@ -1453,6 +1478,7 @@ static void reset_channels(FM_CH *CH) CH[c].mem_value = CH[c].op1_out = 0; } ym2612.slot_mask = 0; + ym2612.ssg_mask = 0; } /* initialize generic tables */ @@ -1655,8 +1681,10 @@ static int OPNWriteReg(int r, int v) case 0x90: /* SSG-EG */ SLOT->ssg = v&0x0f; SLOT->ssg ^= SLOT->ssgn; - if (SLOT->state > EG_REL) - recalc_volout(SLOT); + if (v&0x08) ym2612.ssg_mask |= 1<<(OPN_SLOT(r) + c*4); + else ym2612.ssg_mask &= ~(1<<(OPN_SLOT(r) + c*4)); +// if (SLOT->state > EG_REL) +// recalc_volout(SLOT); break; case 0xa0: @@ -1751,6 +1779,7 @@ int YM2612UpdateOne_(int *buffer, int length, int stereo, int is_buf_empty) { int pan; int active_chs = 0; + int flags = stereo ? 1:0; // if !is_buf_empty, it means it has valid samples to mix with, else it may contain trash if (is_buf_empty) memset32(buffer, 0, length<>2)) << 3; - if (ym2612.slot_mask & 0x0f0000) active_chs |= chan_render(buffer, length, 4, stereo|((pan&0x300)>>4)) << 4; - if (ym2612.slot_mask & 0xf00000) active_chs |= chan_render(buffer, length, 5, stereo|((pan&0xc00)>>6)|(ym2612.dacen<<2)) << 5; +#define BIT_IF(v,b,c) { v &= ~(1<<(b)); if (c) v |= 1<<(b); } + BIT_IF(flags, 1, (ym2612.ssg_mask & 0x00000f)); + if (ym2612.slot_mask & 0x00000f) active_chs |= chan_render(buffer, length, 0, flags|((pan&0x003)<<4)) << 0; + BIT_IF(flags, 1, (ym2612.ssg_mask & 0x0000f0)); + if (ym2612.slot_mask & 0x0000f0) active_chs |= chan_render(buffer, length, 1, flags|((pan&0x00c)<<2)) << 1; + BIT_IF(flags, 1, (ym2612.ssg_mask & 0x000f00)); + if (ym2612.slot_mask & 0x000f00) active_chs |= chan_render(buffer, length, 2, flags|((pan&0x030) )) << 2; + BIT_IF(flags, 1, (ym2612.ssg_mask & 0x00f000)); + if (ym2612.slot_mask & 0x00f000) active_chs |= chan_render(buffer, length, 3, flags|((pan&0x0c0)>>2)) << 3; + BIT_IF(flags, 1, (ym2612.ssg_mask & 0x0f0000)); + if (ym2612.slot_mask & 0x0f0000) active_chs |= chan_render(buffer, length, 4, flags|((pan&0x300)>>4)) << 4; + BIT_IF(flags, 1, (ym2612.ssg_mask & 0xf00000)); + if (ym2612.slot_mask & 0xf00000) active_chs |= chan_render(buffer, length, 5, flags|((pan&0xc00)>>6)|(!!ym2612.dacen<<2)) << 5; +#undef BIT_IF chan_render_finish(); return active_chs; // 1 if buffer updated diff --git a/pico/sound/ym2612.h b/pico/sound/ym2612.h index 73e693f9..b614790c 100644 --- a/pico/sound/ym2612.h +++ b/pico/sound/ym2612.h @@ -153,6 +153,7 @@ typedef struct FM_OPN OPN; /* OPN state */ UINT32 slot_mask; /* active slot mask (performance hack) */ + UINT32 ssg_mask; /* active ssg mask (performance hack) */ } YM2612; #endif diff --git a/pico/sound/ym2612_arm.S b/pico/sound/ym2612_arm.S index 1370e6cf..0334d1cf 100644 --- a/pico/sound/ym2612_arm.S +++ b/pico/sound/ym2612_arm.S @@ -17,6 +17,7 @@ @ very simple YM2612 output rate to sample rate adaption (~500k cycles @44100) #define INTERPOL +#define SSG_EG .equiv SLOT1, 0 .equiv SLOT2, 2 @@ -73,8 +74,11 @@ and r3, r3, #7 @ eg_inc_val shift, may be 0 ldrb r2, [r5,#0x17] @ state +#if defined(SSG_EG) tst r0, #0x08 @ ssg enabled? + tstne r12, #0x02 bne 9f +#endif @ non-SSG-EG mode cmp r2, #4 @ EG_ATT @@ -127,7 +131,9 @@ strgeb r3, [r5,#0x17] @ state 10: @ finish + ldrh r3, [r5,#0x18] @ tl strh r0, [r5,#0x1a] @ volume +#if defined(SSG_EG) b 11f 9: @ SSG-EG mode @@ -140,7 +146,7 @@ movlt r3, r0, lsl r3 ldrlth r0, [r5,#0x1a] @ volume, unsigned (0-1023) movlt r3, r3, lsr #1 @ eg_inc_val - addlt r0, r0, r3, lsr #2 + addlt r0, r0, r3, lsl #2 cmp r2, #2 blt 1f @ EG_REL @@ -182,18 +188,20 @@ strh r0, [r5,#0x1a] @ volume cmp r2, #0x0c @ if ( ssg&0x04 && state > EG_REL ) cmpge r3, #EG_REL+1 + ldrh r3, [r5,#0x18] @ tl rsbge r0, r0, #0x200 @ volume = (0x200-volume) & MAX_ATT - lslge r0, r0, #10 - lsrge r0, r0, #10 + lslge r0, r0, #22 + lsrge r0, r0, #22 11: - ldrh r3, [r5,#0x18] @ tl +#endif add r0, r0, r3 @ volume += tl strh r0, [r5,#0x34] @ vol_out 0: @ EG_OFF .endm +#if defined(SSG_EG) @ r5=slot, trashes: r0,r2,r3 .macro update_ssg_eg ldrh r0, [r5,#0x30] @ ssg+ssgn @@ -204,6 +212,7 @@ cmp r2, #EG_REL+1 @ state > EG_REL && cmpge r3, #0x200 @ volume >= 0x200? blt 9f + orr r4, r4, #0x10 @ ssg_update tst r0, #0x01 beq 1f @@ -249,6 +258,33 @@ 9: .endm +@ r5=slot, trashes: r0,r2,r3 +.macro recalc_volout +#if defined(INTERPOL) + ldrh r0, [r5,#0x34] @ vol_out +#endif + ldrb r2, [r5,#0x30] @ ssg + ldrb r3, [r5,#0x17] @ state +#if defined(INTERPOL) + strh r0, [r5,#0x36] @ vol_ipol +#endif + ldrh r0, [r5,#0x1a] @ volume + +@ and r2, r2, #0x0c + cmp r2, #0x0c @ if ( ~ssg&0x0c && state > EG_REL ) + cmpge r3, #EG_REL+1 + ldrh r3, [r5,#0x18] @ tl + rsbge r0, r0, #0x200 @ volume = (0x200-volume) & MAX_ATT + lslge r0, r0, #22 + lsrge r0, r0, #22 + ldrh r0, [r5,#0x1a] @ volume + ldrh r3, [r5,#0x18] @ tl + + add r0, r0, r3 @ volume += tl + strh r0, [r5,#0x34] @ vol_out +.endm +#endif + @ r12=lfo_ampm[31:16], r1=lfo_cnt_old, r2=lfo_cnt, r3=scratch .macro advance_lfo_m mov r2, r2, lsr #LFO_SH @@ -305,7 +341,7 @@ .endm -@ lr=context, r12=pack (stereo, lastchan, disabled, lfo_enabled | pan_r, pan_l, ams[2] | AMmasks[4] | FB[4] | lfo_ampm[16]) +@ lr=context, r12=pack (stereo, ssg_enabled, disabled, lfo_enabled | pan_r, pan_l, ams[2] | AMmasks[4] | FB[4] | lfo_ampm[16]) @ r0-r2=scratch, r3=sin_tab, r5=scratch, r6-r7=vol_out[4], r10=op1_out .macro upd_algo0_m @@ -643,8 +679,8 @@ .endm -@ lr=context, r12=pack (stereo, lastchan, disabled, lfo_enabled | pan_r, pan_l, ams[2] | AMmasks[4] | FB[4] | lfo_ampm[16]) -@ r0-r2=scratch, r3=sin_tab/scratch, r4=(length<<8)|unused[4],was_update,algo[3], r5=tl_tab/slot, +@ lr=context, r12=pack (stereo, ssg_enabled, disabled, lfo_enabled | pan_r, pan_l, ams[2] | AMmasks[4] | FB[4] | lfo_ampm[16]) +@ r0-r2=scratch, r3=sin_tab/scratch, r4=(length<<8)|unused[3],ssg_update,was_update,algo[3], r5=tl_tab/slot, @ r6-r7=vol_out[4], r8=eg_timer, r9=eg_timer_add[31:16], r10=op1_out, r11=buffer .global chan_render_loop @ chan_rend_context *ct, int *buffer, int length @@ -683,10 +719,17 @@ crl_loop: subs r4, r4, #0x100 bmi crl_loop_end - @ -- SSG -- ldr r5, [lr, #0x40] @ CH +#if defined(SSG_EG) + tst r12, #0x02 @ ssg_enabled? + beq ssg_done + @ -- SSG -- + lsl r7, r8, #EG_SH + add r7, r9, r7, lsr #EG_SH + subs r7, r7, #1< Date: Wed, 22 Apr 2020 21:40:05 +0200 Subject: [PATCH 0309/1110] audio: add option to switch off SSG-EG --- pico/pico.h | 1 + pico/sound/sound.c | 2 +- pico/sound/ym2612.c | 15 ++++++++------- pico/sound/ym2612.h | 10 +++++----- platform/common/menu_pico.c | 1 + platform/common/menu_pico.h | 1 + platform/gp2x/940ctl.c | 4 ++-- platform/gp2x/940ctl.h | 2 +- platform/gp2x/code940/940.c | 2 +- 9 files changed, 21 insertions(+), 17 deletions(-) diff --git a/pico/pico.h b/pico/pico.h index efc30e5f..d8c5959c 100644 --- a/pico/pico.h +++ b/pico/pico.h @@ -73,6 +73,7 @@ extern void *p32x_bios_g, *p32x_bios_m, *p32x_bios_s; #define POPT_EN_32X (1<<20) // x0 0000 #define POPT_EN_PWM (1<<21) #define POPT_PWM_IRQ_OPT (1<<22) +#define POPT_DIS_FM_SSGEG (1<<23) #define PAHW_MCD (1<<0) #define PAHW_32X (1<<1) diff --git a/pico/sound/sound.c b/pico/sound/sound.c index eb10f36b..54521601 100644 --- a/pico/sound/sound.c +++ b/pico/sound/sound.c @@ -48,7 +48,7 @@ void PsndRerate(int preserve_state) ym2612_pack_state(); memcpy(state, YM2612GetRegs(), 0x204); } - YM2612Init(Pico.m.pal ? OSC_PAL/7 : OSC_NTSC/7, PicoIn.sndRate); + YM2612Init(Pico.m.pal ? OSC_PAL/7 : OSC_NTSC/7, PicoIn.sndRate, !(PicoIn.opt&POPT_DIS_FM_SSGEG)); if (preserve_state) { // feed it back it's own registers, just like after loading state memcpy(YM2612GetRegs(), state, 0x204); diff --git a/pico/sound/ym2612.c b/pico/sound/ym2612.c index cb4f8c7d..622fff0b 100644 --- a/pico/sound/ym2612.c +++ b/pico/sound/ym2612.c @@ -1820,17 +1820,17 @@ int YM2612UpdateOne_(int *buffer, int length, int stereo, int is_buf_empty) // flags: stereo, ssg_enabled, disabled, _, pan_r, pan_l chan_render_prep(); #define BIT_IF(v,b,c) { v &= ~(1<<(b)); if (c) v |= 1<<(b); } - BIT_IF(flags, 1, (ym2612.ssg_mask & 0x00000f)); + BIT_IF(flags, 1, (ym2612.ssg_mask & 0x00000f) && (ym2612.OPN.ST.flags & 1)); if (ym2612.slot_mask & 0x00000f) active_chs |= chan_render(buffer, length, 0, flags|((pan&0x003)<<4)) << 0; - BIT_IF(flags, 1, (ym2612.ssg_mask & 0x0000f0)); + BIT_IF(flags, 1, (ym2612.ssg_mask & 0x0000f0) && (ym2612.OPN.ST.flags & 1)); if (ym2612.slot_mask & 0x0000f0) active_chs |= chan_render(buffer, length, 1, flags|((pan&0x00c)<<2)) << 1; - BIT_IF(flags, 1, (ym2612.ssg_mask & 0x000f00)); + BIT_IF(flags, 1, (ym2612.ssg_mask & 0x000f00) && (ym2612.OPN.ST.flags & 1)); if (ym2612.slot_mask & 0x000f00) active_chs |= chan_render(buffer, length, 2, flags|((pan&0x030) )) << 2; - BIT_IF(flags, 1, (ym2612.ssg_mask & 0x00f000)); + BIT_IF(flags, 1, (ym2612.ssg_mask & 0x00f000) && (ym2612.OPN.ST.flags & 1)); if (ym2612.slot_mask & 0x00f000) active_chs |= chan_render(buffer, length, 3, flags|((pan&0x0c0)>>2)) << 3; - BIT_IF(flags, 1, (ym2612.ssg_mask & 0x0f0000)); + BIT_IF(flags, 1, (ym2612.ssg_mask & 0x0f0000) && (ym2612.OPN.ST.flags & 1)); if (ym2612.slot_mask & 0x0f0000) active_chs |= chan_render(buffer, length, 4, flags|((pan&0x300)>>4)) << 4; - BIT_IF(flags, 1, (ym2612.ssg_mask & 0xf00000)); + BIT_IF(flags, 1, (ym2612.ssg_mask & 0xf00000) && (ym2612.OPN.ST.flags & 1)); if (ym2612.slot_mask & 0xf00000) active_chs |= chan_render(buffer, length, 5, flags|((pan&0xc00)>>6)|(!!ym2612.dacen<<2)) << 5; #undef BIT_IF chan_render_finish(); @@ -1840,13 +1840,14 @@ int YM2612UpdateOne_(int *buffer, int length, int stereo, int is_buf_empty) /* initialize YM2612 emulator */ -void YM2612Init_(int clock, int rate) +void YM2612Init_(int clock, int rate, int ssg) { memset(&ym2612, 0, sizeof(ym2612)); init_tables(); ym2612.OPN.ST.clock = clock; ym2612.OPN.ST.rate = rate; + ym2612.OPN.ST.flags = (ssg ? 1:0); OPNSetPres( 6*24 ); diff --git a/pico/sound/ym2612.h b/pico/sound/ym2612.h index b614790c..e73c9732 100644 --- a/pico/sound/ym2612.h +++ b/pico/sound/ym2612.h @@ -95,7 +95,7 @@ typedef struct UINT8 address; /* 10 address register | need_save */ UINT8 status; /* 11 status flag | need_save */ UINT8 mode; /* mode CSM / 3SLOT */ - UINT8 pad; + UINT8 flags; /* operational flags */ int TA; /* timer a */ int TAC; /* timer a maxval */ int TAT; /* timer a ticker | need_save */ @@ -161,7 +161,7 @@ typedef struct extern YM2612 ym2612; #endif -void YM2612Init_(int baseclock, int rate); +void YM2612Init_(int baseclock, int rate, int ssg); void YM2612ResetChip_(void); int YM2612UpdateOne_(int *buffer, int length, int stereo, int is_buf_empty); @@ -183,9 +183,9 @@ int YM2612PicoStateLoad2(int *tat, int *tbt); #else /* GP2X specific */ #include "../../platform/gp2x/940ctl.h" -#define YM2612Init(baseclock,rate) do { \ - if (PicoIn.opt&POPT_EXT_FM) YM2612Init_940(baseclock, rate); \ - else YM2612Init_(baseclock, rate); \ +#define YM2612Init(baseclock,rate,ssg) do { \ + if (PicoIn.opt&POPT_EXT_FM) YM2612Init_940(baseclock, rate, ssg); \ + else YM2612Init_(baseclock, rate, ssg); \ } while (0) #define YM2612ResetChip() do { \ if (PicoIn.opt&POPT_EXT_FM) YM2612ResetChip_940(); \ diff --git a/platform/common/menu_pico.c b/platform/common/menu_pico.c index 327190a5..882aef92 100644 --- a/platform/common/menu_pico.c +++ b/platform/common/menu_pico.c @@ -499,6 +499,7 @@ static menu_entry e_menu_adv_options[] = mee_range_h ("Overclock M68k (%)", MA_OPT2_OVERCLOCK_M68K,currentConfig.overclock_68k, 0, 1000, h_ovrclk), mee_onoff ("Emulate Z80", MA_OPT2_ENABLE_Z80, PicoIn.opt, POPT_EN_Z80), mee_onoff ("Emulate YM2612 (FM)", MA_OPT2_ENABLE_YM2612, PicoIn.opt, POPT_EN_FM), + mee_onoff ("Disable YM2612 SSG-EG", MA_OPT2_DISABLE_YM_SSG,PicoIn.opt, POPT_DIS_FM_SSGEG), mee_onoff ("Emulate SN76496 (PSG)", MA_OPT2_ENABLE_SN76496,PicoIn.opt, POPT_EN_PSG), mee_onoff ("gzip savestates", MA_OPT2_GZIP_STATES, currentConfig.EmuOpt, EOPT_GZIP_SAVES), mee_onoff ("Don't save last used ROM", MA_OPT2_NO_LAST_ROM, currentConfig.EmuOpt, EOPT_NO_AUTOSVCFG), diff --git a/platform/common/menu_pico.h b/platform/common/menu_pico.h index 4c0bbdd1..d15113fc 100644 --- a/platform/common/menu_pico.h +++ b/platform/common/menu_pico.h @@ -48,6 +48,7 @@ typedef enum MA_OPT2_VSYNC, MA_OPT2_ENABLE_Z80, MA_OPT2_ENABLE_YM2612, + MA_OPT2_DISABLE_YM_SSG, MA_OPT2_ENABLE_SN76496, MA_OPT2_GZIP_STATES, MA_OPT2_NO_LAST_ROM, diff --git a/platform/gp2x/940ctl.c b/platform/gp2x/940ctl.c index 2afba0d9..cd3fcdc3 100644 --- a/platform/gp2x/940ctl.c +++ b/platform/gp2x/940ctl.c @@ -282,7 +282,7 @@ void sharedmem940_finish(void) } -void YM2612Init_940(int baseclock, int rate) +void YM2612Init_940(int baseclock, int rate, int ssg) { static int oldrate; @@ -339,7 +339,7 @@ void YM2612Init_940(int baseclock, int rate) memset(shared_ctl, 0, sizeof(*shared_ctl)); /* cause local ym2612 to init REGS */ - YM2612Init_(baseclock, rate); + YM2612Init_(baseclock, rate, ssg); internal_reset(); diff --git a/platform/gp2x/940ctl.h b/platform/gp2x/940ctl.h index 5b789dad..dba6cc70 100644 --- a/platform/gp2x/940ctl.h +++ b/platform/gp2x/940ctl.h @@ -1,7 +1,7 @@ void sharedmem940_init(void); void sharedmem940_finish(void); -void YM2612Init_940(int baseclock, int rate); +void YM2612Init_940(int baseclock, int rate, int ssg); void YM2612ResetChip_940(void); int YM2612UpdateOne_940(int *buffer, int length, int stereo, int is_buf_empty); diff --git a/platform/gp2x/code940/940.c b/platform/gp2x/code940/940.c index f79db1e5..db51fdc9 100644 --- a/platform/gp2x/code940/940.c +++ b/platform/gp2x/code940/940.c @@ -167,7 +167,7 @@ void Main940(void) case JOB940_INITALL: /* ym2612 */ shared_ctl->writebuff0[0] = shared_ctl->writebuff1[0] = 0xffff; - YM2612Init_(shared_ctl->baseclock, shared_ctl->rate); + YM2612Init_(shared_ctl->baseclock, shared_ctl->rate, 0); /* Helix mp3 decoder */ __malloc_init(); shared_data->mp3dec = MP3InitDecoder(); From 2a2e0f890a004c3d572b68f2f0aff3787b2c955e Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 22 Apr 2020 21:51:35 +0200 Subject: [PATCH 0310/1110] vdp fifo, bugfix --- pico/videoport.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pico/videoport.c b/pico/videoport.c index dac74dc3..f9fd6ece 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -91,8 +91,10 @@ static __inline int AdvanceFIFOEntry(struct VdpFIFO *vf, struct PicoVideo *pv, i // if entry has been processed... if (cnt == 0) { // remove entry from FIFO - if (vf->fifo_ql) + if (vf->fifo_ql) { + vf->fifo_queue[vf->fifo_qx] = 0; vf->fifo_qx = (vf->fifo_qx+1) & 7, vf->fifo_ql --; + } // start processing for next entry if there is one if (vf->fifo_ql) { b = vf->fifo_queue[vf->fifo_qx] & FQ_BYTE; @@ -230,7 +232,7 @@ int PicoVideoFIFOWrite(int count, int flags, unsigned sr_mask,unsigned sr_flags) if (count && vf->fifo_ql < 8) { // determine queue position for entry int x = (vf->fifo_qx + vf->fifo_ql - 1) & 7; - if (unlikely(vf->fifo_ql && (vf->fifo_queue[x] & FQ_BGDMA))) { + if (unlikely(vf->fifo_queue[x] & FQ_BGDMA)) { // CPU FIFO writes have priority over a background DMA Fill/Copy // XXX if interrupting a DMA fill, fill data changes if (x == vf->fifo_qx) { // overtaking to queue head? From fe43bdc3346450deb1baed2c1f268c92efc6e9cf Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 24 Apr 2020 19:00:41 +0200 Subject: [PATCH 0311/1110] 32x poll detection fix --- pico/32x/sh2soc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pico/32x/sh2soc.c b/pico/32x/sh2soc.c index 369fc0de..9da3f296 100644 --- a/pico/32x/sh2soc.c +++ b/pico/32x/sh2soc.c @@ -312,7 +312,10 @@ u32 REGPARM(2) sh2_peripheral_read32(u32 a, SH2 *sh2) elprintf_sh2(sh2, EL_32XP, "peri r32 [%08x] %08x @%06x", a | ~0x1ff, d, sh2_pc(sh2)); - if ((a & 0x1c0) == 0x140) { + if (a == 0x18c) + // kludge for polling COMM while polling for end of DMA + sh2->poll_cnt = 0; + else if ((a & 0x1c0) == 0x140) { // abused as comm area DRC_SAVE_SR(sh2); p32x_sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 3); From 70aecd15b023843ed010b5853028f2beeaa34e0f Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 24 Apr 2020 19:05:27 +0200 Subject: [PATCH 0312/1110] audio: SN76496 fixes --- pico/memory.c | 2 +- pico/pico_int.h | 2 +- pico/sms.c | 2 +- pico/sound/sn76496.c | 8 +++++++- pico/sound/sound.c | 39 +++++++++++++++++++++++++++++++++------ 5 files changed, 43 insertions(+), 10 deletions(-) diff --git a/pico/memory.c b/pico/memory.c index e1afb4db..c0ba9ffe 100644 --- a/pico/memory.c +++ b/pico/memory.c @@ -883,7 +883,7 @@ static void m68k_mem_setup(void) static int get_scanline(int is_from_z80) { if (is_from_z80) { - int mclk_z80 = z80_cyclesDone() * 15; + int mclk_z80 = (z80_cyclesLeft<0 ? Pico.t.z80c_aim : z80_cyclesDone()) * 15; int mclk_line = Pico.t.z80_scanline * 488 * 7; while (mclk_z80 - mclk_line >= 488 * 7) Pico.t.z80_scanline++, mclk_line += 488 * 7; diff --git a/pico/pico_int.h b/pico/pico_int.h index 7539379a..088c7aa5 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -213,7 +213,7 @@ extern struct DrZ80 drZ80; #define z80_cyclesDone() \ (Pico.t.z80c_aim - z80_cyclesLeft) -#define cycles_68k_to_z80(x) ((x) * 3823 >> 13) +#define cycles_68k_to_z80(x) ((x) * 3822 >> 13) // ----------------------- SH2 CPU ----------------------- diff --git a/pico/sms.c b/pico/sms.c index 901f2f55..0f4a48ad 100644 --- a/pico/sms.c +++ b/pico/sms.c @@ -152,7 +152,7 @@ static void z80_sms_out(unsigned short a, unsigned char d) case 0x40: case 0x41: - if ((d & 0x90) == 0x90); + if ((d & 0x90) == 0x90) PsndDoPSG(Pico.m.scanline); SN76496Write(d); break; diff --git a/pico/sound/sn76496.c b/pico/sound/sn76496.c index b2127594..4507507c 100644 --- a/pico/sound/sn76496.c +++ b/pico/sound/sn76496.c @@ -173,9 +173,12 @@ void SN76496Update(short *buffer, int length, int stereo) /* If we exit the loop in the middle, Output[i] has to be inverted */ /* and vol[i] incremented only if the exit status of the square */ /* wave is 1. */ + left = 0; while (R->Count[i] <= 0) { - R->Count[i] += R->Period[i]; + if (R->Count[i] + R->Period[i]*4 < R->Period[i]) + left+= 4, R->Count[i] += R->Period[i]*4; + else left++, R->Count[i] += R->Period[i]; if (R->Count[i] > 0) { R->Output[i] ^= 1; @@ -186,6 +189,9 @@ void SN76496Update(short *buffer, int length, int stereo) vol[i] += R->Period[i]; } if (R->Output[i]) vol[i] -= R->Count[i]; + /* Cut of anything above the sample freqency. It will only create */ + /* aliasing and hearable distortions anyway. */ + if (left > 1) vol[i] = STEP/2; } left = STEP; diff --git a/pico/sound/sound.c b/pico/sound/sound.c index 54521601..a6d55df2 100644 --- a/pico/sound/sound.c +++ b/pico/sound/sound.c @@ -259,6 +259,7 @@ static int PsndRender(int offset, int length) int stereo = (PicoIn.opt & 8) >> 3; int fmlen = ((Pico.snd.fm_pos+0x80000) >> 20); int daclen = ((Pico.snd.dac_pos+0x80000) >> 20); + int psglen = ((Pico.snd.psg_pos+0x8000) >> 16); buf32 = PsndBuffer+(offset< 0) { + short *psgbuf = PicoIn.sndOut + (psglen << stereo); + Pico.snd.psg_pos += (length-psglen) << 16; + if (PicoIn.opt & POPT_EN_PSG) + SN76496Update(psgbuf, length-psglen, stereo); + } + // Add in parts of the FM buffer not yet done if (length-fmlen > 0) { int *fmbuf = buf32 + ((fmlen-offset) << stereo); @@ -323,8 +332,6 @@ PICO_INTERNAL void PsndGetSamples(int y) { static int curr_pos = 0; - PsndDoPSG(y - 1); - curr_pos = PsndRender(0, Pico.snd.len_use); if (PicoIn.writeSound) @@ -333,11 +340,20 @@ PICO_INTERNAL void PsndGetSamples(int y) PsndClear(); } -PICO_INTERNAL void PsndGetSamplesMS(int y) +static int PsndRenderMS(int offset, int length) { - int length = Pico.snd.len_use; + int stereo = (PicoIn.opt & 8) >> 3; + int psglen = ((Pico.snd.psg_pos+0x8000) >> 16); - PsndDoPSG(y - 1); + pprof_start(sound); + + // Add in parts of the PSG output not yet done + if (length-psglen > 0) { + short *psgbuf = PicoIn.sndOut + (psglen << stereo); + Pico.snd.psg_pos += (length-psglen) << 16; + if (PicoIn.opt & POPT_EN_PSG) + SN76496Update(psgbuf, length-psglen, stereo); + } // upmix to "stereo" if needed if (PicoIn.opt & POPT_EN_STEREO) { @@ -346,8 +362,19 @@ PICO_INTERNAL void PsndGetSamplesMS(int y) *p |= *p << 16; } + pprof_end(sound); + + return length; +} + +PICO_INTERNAL void PsndGetSamplesMS(int y) +{ + static int curr_pos = 0; + + curr_pos = PsndRenderMS(0, Pico.snd.len_use); + if (PicoIn.writeSound != NULL) - PicoIn.writeSound(length * ((PicoIn.opt & POPT_EN_STEREO) ? 4 : 2)); + PicoIn.writeSound(curr_pos * ((PicoIn.opt & POPT_EN_STEREO) ? 4 : 2)); PsndClear(); } From 4321a689a5eb7159dfbc7c572c2f93998cf90784 Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 25 Apr 2020 21:51:47 +0200 Subject: [PATCH 0313/1110] sh2: bugfix in drc --- cpu/sh2/compiler.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 04320424..ad1983bf 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -160,7 +160,7 @@ enum op_types { & BITMASK1(op)) #define OP_ISBRAUC(op) (BITMASK4(OP_BRANCH, OP_BRANCH_R, OP_BRANCH_RF, OP_RTE) \ & BITMASK1(op)) -#define OP_ISBRACND(op) (BITMASK3(OP_BRANCH_CT, OP_BRANCH_CF, OP_BRANCH_N) \ +#define OP_ISBRACND(op) (BITMASK2(OP_BRANCH_CT, OP_BRANCH_CF) \ & BITMASK1(op)) #define OP_ISBRAIMM(op) (BITMASK3(OP_BRANCH, OP_BRANCH_CT, OP_BRANCH_CF) \ & BITMASK1(op)) @@ -3501,7 +3501,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) rcache_set_usage_now(opd[0].source); // current insn rcache_set_usage_soon(soon); // insns 1-4 rcache_set_usage_late(late & ~soon); // insns 5-9 - rcache_set_usage_discard(write & ~(late|soon)); + rcache_set_usage_discard(write & ~(late|soon|opd[0].source)); if (v <= 9) // upcoming rcache_flush, start writing back unused dirty stuff rcache_clean_masked(rcache_dirty_mask() & ~(write|opd[0].dest)); From 39c5ec3f4c26e3c1615fe0c1ddb08696486ee519 Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 6 May 2020 22:58:39 +0200 Subject: [PATCH 0314/1110] audio: fix for save/load --- pico/state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pico/state.c b/pico/state.c index 6047adbd..b0b6a334 100644 --- a/pico/state.c +++ b/pico/state.c @@ -565,7 +565,7 @@ readend: z80_unpack(buff_z80); // due to dep from 68k cycles.. - Pico.t.m68c_aim = Pico.t.m68c_cnt; + Pico.t.m68c_frame_start = Pico.t.m68c_aim = Pico.t.m68c_cnt; if (PicoIn.AHW & PAHW_32X) Pico32xStateLoaded(0); if (PicoIn.AHW & PAHW_MCD) From 904fb98e6ca213ca4cb31a092e1bbf8dc38b1ea6 Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 6 May 2020 23:06:10 +0200 Subject: [PATCH 0315/1110] sh2: optimisations in drc --- cpu/drc/cmn.h | 24 +++++------ cpu/sh2/compiler.c | 99 ++++++++++++++++++++++++++++------------------ 2 files changed, 72 insertions(+), 51 deletions(-) diff --git a/cpu/drc/cmn.h b/cpu/drc/cmn.h index 2eb52aad..9c041e70 100644 --- a/cpu/drc/cmn.h +++ b/cpu/drc/cmn.h @@ -17,18 +17,18 @@ void drc_cmn_cleanup(void); // binary search approach, since we don't have CLZ on ARM920T #define FOR_ALL_BITS_SET_DO(mask, bit, code) { \ u32 __mask = mask; \ - for (bit = 31; bit >= 0 && mask; bit--, __mask <<= 1) { \ - if (!(__mask & (0xffff << 16))) \ - bit -= 16, __mask <<= 16; \ - if (!(__mask & (0xff << 24))) \ - bit -= 8, __mask <<= 8; \ - if (!(__mask & (0xf << 28))) \ - bit -= 4, __mask <<= 4; \ - if (!(__mask & (0x3 << 30))) \ - bit -= 2, __mask <<= 2; \ - if (!(__mask & (0x1 << 31))) \ - bit -= 1, __mask <<= 1; \ - if (__mask & (0x1 << 31)) { \ + for (bit = 0; bit < 32 && mask; bit++, __mask >>= 1) { \ + if (!(__mask & 0xffff)) \ + bit += 16,__mask >>= 16; \ + if (!(__mask & 0xff)) \ + bit += 8, __mask >>= 8; \ + if (!(__mask & 0xf)) \ + bit += 4, __mask >>= 4; \ + if (!(__mask & 0x3)) \ + bit += 2, __mask >>= 2; \ + if (!(__mask & 0x1)) \ + bit += 1, __mask >>= 1; \ + if (__mask & 0x1) { \ code; \ } \ } \ diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index ad1983bf..bfd9ec06 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -1549,22 +1549,31 @@ static u32 rcache_regs_clean; // regs needing cleaning static void rcache_lock_vreg(int x) { if (x >= 0) { + cache_regs[x].locked ++; +#if DRC_DEBUG & 64 if (cache_regs[x].type == HR_FREE) { printf("locking free vreg %x, aborting\n", x); exit(1); } - cache_regs[x].locked ++; + if (!cache_regs[x].locked) { + printf("locking overflow vreg %x, aborting\n", x); + exit(1); + } +#endif } } static void rcache_unlock_vreg(int x) { if (x >= 0) { +#if DRC_DEBUG & 64 if (cache_regs[x].type == HR_FREE) { printf("unlocking free vreg %x, aborting\n", x); exit(1); } - cache_regs[x].locked --; +#endif + if (cache_regs[x].locked) + cache_regs[x].locked --; } } @@ -1582,7 +1591,7 @@ static void rcache_unmap_vreg(int x) FOR_ALL_BITS_SET_DO(cache_regs[x].gregs, i, if (guest_regs[i].flags & GRF_DIRTY) { // if a dirty reg is unmapped save its value to context - if (~rcache_regs_discard & (1 << i)) + if ((~rcache_regs_discard | rcache_regs_now) & (1 << i)) emith_ctx_write(cache_regs[x].hreg, i * 4); guest_regs[i].flags &= ~GRF_DIRTY; } @@ -1700,26 +1709,28 @@ static int rcache_allocate(int what, int minprio) continue; if (cache_regs[i].type == HR_FREE || cache_regs[i].type == HR_TEMP) { // REG is free - prio = 6; + prio = 10; oldest = i; break; } if (cache_regs[i].type == HR_CACHED) { if (rcache_regs_now & cache_regs[i].gregs) // REGs needed for the current insn - i_prio = 1; + i_prio = 0; else if (rcache_regs_soon & cache_regs[i].gregs) // REGs needed in the next insns i_prio = 2; else if (rcache_regs_late & cache_regs[i].gregs) // REGs needed in some future insn - i_prio = 3; - else if (!(~rcache_regs_discard & cache_regs[i].gregs)) - // REGs not needed in the foreseeable future i_prio = 4; + else if (~rcache_regs_discard & cache_regs[i].gregs) + // REGs not needed in the foreseeable future + i_prio = 6; else // REGs soon overwritten anyway - i_prio = 5; + i_prio = 8; + if (!(cache_regs[i].flags & HRF_DIRTY)) i_prio ++; + if (prio < i_prio || (prio == i_prio && cache_regs[i].stamp < min_stamp)) { min_stamp = cache_regs[i].stamp; oldest = i; @@ -1744,21 +1755,21 @@ static int rcache_allocate_vreg(int needed) { int x; - x = rcache_allocate(1, needed ? 0 : 3); + x = rcache_allocate(1, needed ? 0 : 4); if (x < 0) - x = rcache_allocate(-1, 1); + x = rcache_allocate(-1, 0); return x; } static int rcache_allocate_nontemp(void) { - int x = rcache_allocate(0, 3); + int x = rcache_allocate(0, 4); return x; } static int rcache_allocate_temp(void) { - int x = rcache_allocate(-1, 1); + int x = rcache_allocate(-1, 0); if (x < 0) x = rcache_allocate(0, 0); return x; @@ -1821,20 +1832,25 @@ static void rcache_remap_vreg(int x) int d; // x must be a cached vreg - if (cache_regs[x].type != HR_CACHED) + if (cache_regs[x].type != HR_CACHED || cache_regs[x].locked) return; - // don't do it if x is already a REG or isn't used or to be cleaned anyway - if ((cache_regs[x].htype & HRT_REG) || - !(rsl_d & cache_regs[x].gregs)) { + // don't do it if x isn't used + if (!(rsl_d & cache_regs[x].gregs)) { // clean here to avoid data loss on invalidation rcache_clean_vreg(x); return; } - if (cache_regs[x].locked) { - printf("remap vreg %d is locked\n", x); - exit(1); - } + FOR_ALL_BITS_SET_DO(cache_regs[x].gregs, d, + if ((guest_regs[d].flags & (GRF_STATIC|GRF_PINNED)) && + !cache_regs[guest_regs[d].sreg].locked && + !((rsl_d|rcache_regs_now) & cache_regs[guest_regs[d].sreg].gregs)) { + // STATIC not in its sreg and sreg is available + rcache_evict_vreg(guest_regs[d].sreg); + rcache_move_vreg(guest_regs[d].sreg, x); + return; + } + ) // allocate a non-TEMP vreg rcache_lock_vreg(x); // lock to avoid evicting x @@ -1891,8 +1907,8 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr { int src, dst, ali; cache_reg_t *tr; - u32 rsp_d = (rcache_regs_now | rcache_regs_soon | - rcache_regs_static | rcache_regs_pinned) & ~rcache_regs_discard; + u32 rsp_d = (rcache_regs_soon | rcache_regs_static | rcache_regs_pinned) & + ~rcache_regs_discard; dst = src = guest_regs[r].vreg; @@ -1901,7 +1917,7 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr if ((guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) && src != guest_regs[r].sreg && (src < 0 || mode != RC_GR_READ) && !cache_regs[guest_regs[r].sreg].locked && - !(rsp_d & cache_regs[guest_regs[r].sreg].gregs)) { + !((rsp_d|rcache_regs_now) & cache_regs[guest_regs[r].sreg].gregs)) { dst = guest_regs[r].sreg; rcache_evict_vreg(dst); } else if (dst < 0) { @@ -1926,7 +1942,7 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr ali = tr->gregs & ~(1 << r); if (mode != RC_GR_READ && src == dst && ali) { int x = -1; - if (rsp_d & ali) { + if ((rsp_d|rcache_regs_now) & ali) { if ((guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) && guest_regs[r].sreg == dst && !tr->locked) { // split aliases if r is STATIC in sreg and dst isn't already locked @@ -1935,7 +1951,7 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr if ((guest_regs[t].flags & (GRF_STATIC|GRF_PINNED)) && !(ali & ~(1 << t)) && !cache_regs[guest_regs[t].sreg].locked && - !(rsp_d & cache_regs[guest_regs[t].sreg].gregs)) { + !((rsp_d|rcache_regs_now) & cache_regs[guest_regs[t].sreg].gregs)) { // alias is a single STATIC and its sreg is available x = guest_regs[t].sreg; rcache_evict_vreg(x); @@ -1947,8 +1963,9 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr break; ) if (x >= 0) { - src = x; - rcache_move_vreg(src, dst); + rcache_remove_vreg_alias(src, r); + src = dst; + rcache_move_vreg(x, dst); } } else { // split r @@ -1956,6 +1973,7 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr x = rcache_allocate_vreg(rsp_d & (1 << r)); rcache_unlock_vreg(src); if (x >= 0) { + rcache_remove_vreg_alias(src, r); dst = x; tr = &cache_regs[dst]; tr->stamp = rcache_counter; @@ -1965,8 +1983,6 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr if (x < 0) // aliases not needed or no vreg available, remove them rcache_evict_vreg_aliases(dst, r); - else if (src != dst) - rcache_remove_vreg_alias(src, r); } // assign r to dst @@ -2342,13 +2358,16 @@ static void rcache_clean_tmp(void) static void rcache_clean_masked(u32 mask) { int i, r, hr; + u32 m; rcache_regs_clean |= mask; mask = rcache_regs_clean; - // clean constants where all aliases are covered by the mask + // clean constants where all aliases are covered by the mask, exempt statics + // to avoid flushing them to context if sreg isn't available + m = mask & ~(rcache_regs_static | rcache_regs_pinned); for (i = 0; i < ARRAY_SIZE(gconsts); i++) - if ((gconsts[i].gregs & mask) && !(gconsts[i].gregs & ~mask)) { + if ((gconsts[i].gregs & m) && !(gconsts[i].gregs & ~mask)) { FOR_ALL_BITS_SET_DO(gconsts[i].gregs, r, if (guest_regs[r].flags & GRF_CDIRTY) { hr = rcache_get_reg_(r, RC_GR_READ, 0, NULL); @@ -2479,6 +2498,9 @@ static void rcache_create(void) } // create static host register mapping for SH2 regs + for (i = 0; i < ARRAY_SIZE(guest_regs); i++) { + guest_regs[i] = (guest_reg_t){.sreg = -1}; + } for (i = 0; i < ARRAY_SIZE(regs_static); i += 2) { for (x = ARRAY_SIZE(cache_regs)-1; x >= 0; x--) if (cache_regs[x].hreg == regs_static[i+1]) break; @@ -2486,8 +2508,7 @@ static void rcache_create(void) guest_regs[regs_static[i]] = (guest_reg_t){.flags = GRF_STATIC,.sreg = x}; rcache_regs_static |= (1 << regs_static[i]); rcache_vregs_reg &= ~(1 << x); - } else - guest_regs[regs_static[i]] = (guest_reg_t){.sreg = -1}; + } } printf("DRC registers created, %ld host regs (%d REG, %d STATIC, 1 CTX)\n", @@ -3501,7 +3522,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) rcache_set_usage_now(opd[0].source); // current insn rcache_set_usage_soon(soon); // insns 1-4 rcache_set_usage_late(late & ~soon); // insns 5-9 - rcache_set_usage_discard(write & ~(late|soon|opd[0].source)); + rcache_set_usage_discard(write & ~(late|soon)); if (v <= 9) // upcoming rcache_flush, start writing back unused dirty stuff rcache_clean_masked(rcache_dirty_mask() & ~(write|opd[0].dest)); @@ -4717,7 +4738,7 @@ end_op: // branch not taken, correct cycle count if (ctaken) - emith_add_r_imm(sr, ctaken << 12); + cycles -= ctaken; // set T bit to reflect branch not taken for OP_BRANCH_CT/CF if (emith_get_t_cond() >= 0) // T is synced for all other cases emith_set_t(sr, opd_b->op == OP_BRANCH_CF); @@ -5263,11 +5284,11 @@ static void sh2_smc_rm_blocks(u32 a, int len, int tcache_id, u32 shift) start_lit = block->addr_lit & wtmask; end_lit = start_lit + block->size_lit; // disable/delete block if it covers the modified address - if ((start_addr <= a+len && a < end_addr) || - (start_lit <= a+len && a < end_lit)) + if ((start_addr < a+len && a < end_addr) || + (start_lit < a+len && a < end_lit)) { dbg(2, "smc remove @%08x", a); - end_addr = (start_lit <= a+len && block->size_lit ? a : 0); + end_addr = (start_lit < a+len && block->size_lit ? a : 0); dr_rm_block_entry(block, tcache_id, end_addr, 0); #if (DRC_DEBUG & 2) removed = 1; From a002255e355adc0f9eddec124f15070ff13a9f98 Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 9 May 2020 10:45:56 +0200 Subject: [PATCH 0316/1110] 32x: libretro bugfix --- platform/libretro/libretro.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/platform/libretro/libretro.c b/platform/libretro/libretro.c index 9111048f..0794f555 100644 --- a/platform/libretro/libretro.c +++ b/platform/libretro/libretro.c @@ -83,6 +83,8 @@ static short ALIGNED(4) sndBuffer[2*44100/50]; static void snd_write(int len); +char **g_argv; + #ifdef _WIN32 #define SLASH '\\' #else @@ -500,6 +502,8 @@ void emu_video_mode_change(int start_line, int line_count, int is_32cols) void emu_32x_startup(void) { + PicoDrawSetOutFormat(PDF_RGB555, 0); + PicoDrawSetOutBuf(vout_buf, vout_width * 2); } void lprintf(const char *fmt, ...) From d39eb595bbbb0bbf994af4a2181604337f12e20b Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 15 May 2020 21:46:28 +0200 Subject: [PATCH 0317/1110] sh2 drc: revised ARM A32 backend optimizer --- cpu/drc/emit_arm.c | 141 ++++++++++++++++++++------------------------- 1 file changed, 61 insertions(+), 80 deletions(-) diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index af9491f1..e27054a3 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -36,8 +36,7 @@ #define SR 16 // CPSR, status register #define MEM 17 // memory access (src=LDR, dst=STR) #define CYC1 20 // 1 cycle interlock (LDR, reg-cntrld shift) -#define CYC2 21 // 2+ cycles interlock (LDR[BH], MUL/MLA etc) -#define SWAP 31 // swapped +#define CYC2 (CYC1+1)// 2+ cycles interlock (LDR[BH], MUL/MLA etc) #define NO 32 // token for "no register" // bitmask builders @@ -46,6 +45,7 @@ #define M3(x,y,z) (M2(x,y)|M1(z)) #define M4(x,y,z,a) (M3(x,y,z)|M1(a)) #define M5(x,y,z,a,b) (M4(x,y,z,a)|M1(b)) +#define M6(x,y,z,a,b,c) (M5(x,y,z,a,b)|M1(c)) #define M10(a,b,c,d,e,f,g,h,i,j) (M5(a,b,c,d,e)|M5(f,g,h,i,j)) // sys_cacheflush always flushes whole pages, and it's rather expensive on ARMs @@ -90,94 +90,81 @@ static inline void emith_update_add(void *base, void *end) } // peephole optimizer. ATM only tries to reduce interlock -#define EMIT_CACHE_SIZE 3 +#define EMIT_CACHE_SIZE 6 struct emit_op { u32 op; u32 src, dst; }; -// peephole cache, last commited insn + cache + next insn + empty insn = size+3 -static struct emit_op emit_cache[EMIT_CACHE_SIZE+3]; +// peephole cache, last commited insn + cache + next insn = size+2 +static struct emit_op emit_cache[EMIT_CACHE_SIZE+2]; static int emit_index; #define emith_insn_ptr() (u8 *)((u32 *)tcache_ptr-emit_index) -static inline int emith_pool_index(int tcache_offs); -static inline void emith_pool_adjust(int pool_index, int move_offs); +static inline void emith_pool_adjust(int tcache_offs, int move_offs); static NOINLINE void EMIT(u32 op, u32 dst, u32 src) { - void *emit_ptr = (u32 *)tcache_ptr - emit_index; - int i; + void * emit_ptr = (u32 *)tcache_ptr - emit_index; + struct emit_op *const ptr = emit_cache; + const int n = emit_index+1; + int i, bi, bd = 0; - EMIT_PTR(tcache_ptr, op); // emit to keep tcache_ptr current + // account for new insn in tcache + tcache_ptr = (void *)((u32 *)tcache_ptr + 1); COUNT_OP; // for conditional execution SR is always source if (op < 0xe0000000 /*A_COND_AL << 28*/) src |= M1(SR); - // put insn on back of queue - emit_cache[emit_index+1].op = op; - emit_cache[emit_index+1].src = src & ~M1(NO); // mask away the NO token - emit_cache[emit_index+1].dst = dst & ~M1(NO); - // move insn down in the queue as long as permitted by dependencies - for (i = emit_index-1; i > 0; i--) { - struct emit_op *ptr = &emit_cache[i]; + // put insn on back of queue // mask away the NO token + emit_cache[n] = (struct emit_op) + { .op=op, .src=src & ~M1(NO), .dst=dst & ~M1(NO) }; + // check insns down the queue as long as permitted by dependencies + for (bd = bi = 0, i = emit_index; i > 1 && !(dst & M1(PC)); i--) { int deps = 0; - // never swap branch insns (changes semantics) - if ((ptr[0].dst | ptr[1].dst) & M1(PC)) - continue; - // dst deps between 0 and 1 must not be swapped, since any deps - // but [0].src & [1].src lead to changed semantics if swapped. - if ((ptr[0].dst & ptr[1].src) || (ptr[1].dst & ptr[0].src) || - (ptr[0].dst & ptr[1].dst)) - continue; -#if 1 - // just move loads as far up as possible - deps -= !!(ptr[1].src & M1(MEM)); - deps += !!(ptr[0].src & M1(MEM)); -#elif 0 - // treat all dest->src deps as a potential interlock -#define DEP_INSN(x,y) !!(ptr[x].dst & ptr[y].src) - // insn sequence: -1, 0, 1, 2 - deps -= DEP_INSN(1,2) + DEP_INSN(-1,0); - deps -= !!(ptr[1].src & M1(MEM)); // favour moving LDR's down - // insn sequence: -1, 1, 0, 2 - deps += DEP_INSN(0,2) + DEP_INSN(-1,1); - deps += !!(ptr[0].src & M1(SWAP)); // penalise if swapped -#else - // calculate ARM920T interlock cycles -#define DEP_CYC1(x,y) ((ptr[x].dst & ptr[y].src)&&(ptr[x].src & M1(CYC1))) -#define DEP_CYC2(x,y) ((ptr[x].dst & ptr[y].src)&&(ptr[x].src & M1(CYC2))) -#define DEP_INSN(x,y,z) DEP_CYC1(x,y)+DEP_CYC1(y,z)+2*DEP_CYC2(x,y)+DEP_CYC2(x,z) - // insn sequence: -1, 0, 1, 2 - deps -= DEP_INSN(0,1,2) + DEP_INSN(-1,0,1); - deps -= !!(ptr[1].src & M1(MEM)); // favour moving LDR's down - // insn sequence: -1, 1, 0, 2 - deps += DEP_INSN(0,2,1) + DEP_INSN(-1,1,0); - deps += !!(ptr[0].src & M1(SWAP)); // penalise multiple swaps -#endif - // swap if fewer depencies - if (deps < 0) { - // swap insn reading PC only if uncomitted pool load - struct emit_op tmp; - int i0 = -1, i1 = -1; - if ((!(ptr[0].src & M1(PC)) || - (i0 = emith_pool_index(emit_index+2 - i)) >= 0) && - (!(ptr[1].src & M1(PC)) || - (i1 = emith_pool_index(emit_index+1 - i)) >= 0)) { - // not using PC, or pool load - emith_pool_adjust(i0, 1); - emith_pool_adjust(i1, -1); - tmp = ptr[0], ptr[0] = ptr[1], ptr[1] = tmp; - ptr[0].src |= M1(SWAP); - } + // dst deps between i and n must not be swapped, since any deps + // but [i].src & [n].src lead to changed semantics if swapped. + if ((ptr[i].dst & ptr[n].src) || (ptr[n].dst & ptr[i].src) || + (ptr[i].dst & ptr[n].dst)) + break; + // don't swap insns reading PC if it's not a word pool load + // (ptr[i].op&0xf700000) != EOP_C_AM2_IMM(0,0,0,1,0,0,0)) + if ((ptr[i].src & M1(PC)) && (ptr[i].op&0xf700000) != 0x5100000) + break; + + // calculate ARM920T interlock cycles (differences only) +#define D2(x,y) ((ptr[x].dst & ptr[y].src)?((ptr[x].src >> CYC2) & 1):0) +#define D1(x,y) ((ptr[x].dst & ptr[y].src)?((ptr[x].src >> CYC1) & 3):0) + // insn sequence: [..., i-2, i-1, i, i+1, ..., n-2, n-1, n] + deps -= D2(i-2,i)+D2(i-1,i+1)+D2(n-2,n ) + D1(i-1,i)+D1(n-1,n); + deps -= !!(ptr[n].src & M2(CYC1,CYC2));// favour moving LDR down + // insn sequence: [..., i-2, i-1, n, i, i+1, ..., n-2, n-1] + deps += D2(i-2,n)+D2(i-1,i )+D2(n ,i+1) + D1(i-1,n)+D1(n ,i); + deps += !!(ptr[i].src & M2(CYC1,CYC2));// penalize moving LDR up + // remember best match found + if (bd > deps) + bd = deps, bi = i; + } + // swap if fewer depencies + if (bd < 0) { + // make room for new insn at bi + struct emit_op tmp = ptr[n]; + for (i = n-1; i >= bi; i--) { + ptr[i+1] = ptr[i]; + if (ptr[i].src & M1(PC)) + emith_pool_adjust(n-i+1, 1); } + // insert new insn at bi + ptr[bi] = tmp; + if (ptr[bi].src & M1(PC)) + emith_pool_adjust(1, bi-n); } if (dst & M1(PC)) { // commit everything if a branch insn is emitted for (i = 1; i <= emit_index+1; i++) EMIT_PTR(emit_ptr, emit_cache[i].op); emit_index = 0; - } else if (emit_index <= EMIT_CACHE_SIZE) { + } else if (emit_index < EMIT_CACHE_SIZE) { // queue not yet full emit_index++; } else { @@ -412,13 +399,13 @@ static void emith_flush(void) EMIT(((cond)<<28) | ((s)<<20) | ((rd)<<16) | ((rs)<<8) | 0x90 | (rm), M2(rd,s?SR:NO), M3(rs,rm,CYC2)) #define EOP_C_UMULL(cond,s,rdhi,rdlo,rs,rm) \ - EMIT(((cond)<<28) | 0x00800000 | ((s)<<20) | ((rdhi)<<16) | ((rdlo)<<12) | ((rs)<<8) | 0x90 | (rm), M3(rdhi,rdlo,s?SR:NO), M3(rs,rm,CYC2)) + EMIT(((cond)<<28) | 0x00800000 | ((s)<<20) | ((rdhi)<<16) | ((rdlo)<<12) | ((rs)<<8) | 0x90 | (rm), M3(rdhi,rdlo,s?SR:NO), M4(rs,rm,CYC1,CYC2)) #define EOP_C_SMULL(cond,s,rdhi,rdlo,rs,rm) \ - EMIT(((cond)<<28) | 0x00c00000 | ((s)<<20) | ((rdhi)<<16) | ((rdlo)<<12) | ((rs)<<8) | 0x90 | (rm), M3(rdhi,rdlo,s?SR:NO), M3(rs,rm,CYC2)) + EMIT(((cond)<<28) | 0x00c00000 | ((s)<<20) | ((rdhi)<<16) | ((rdlo)<<12) | ((rs)<<8) | 0x90 | (rm), M3(rdhi,rdlo,s?SR:NO), M4(rs,rm,CYC1,CYC2)) #define EOP_C_SMLAL(cond,s,rdhi,rdlo,rs,rm) \ - EMIT(((cond)<<28) | 0x00e00000 | ((s)<<20) | ((rdhi)<<16) | ((rdlo)<<12) | ((rs)<<8) | 0x90 | (rm), M3(rdhi,rdlo,s?SR:NO), M5(rs,rm,rdlo,rdhi,CYC2)) + EMIT(((cond)<<28) | 0x00e00000 | ((s)<<20) | ((rdhi)<<16) | ((rdlo)<<12) | ((rs)<<8) | 0x90 | (rm), M3(rdhi,rdlo,s?SR:NO), M6(rs,rm,rdlo,rdhi,CYC1,CYC2)) #define EOP_MUL(rd,rm,rs) EOP_C_MUL(A_COND_AL,0,rd,rs,rm) // note: rd != rm @@ -502,10 +489,10 @@ static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int return; } #else - for (i = 3, u = v; i > 0; i--, u >>= 8) + for (i = 2, u = v; i > 0; i--, u >>= 8) while (u > 0xff && !(u & 3)) u >>= 2; - if (u) { // 4 insns needed... + if (u) { // 3+ insns needed... if (op == A_OP_MVN) imm = ~imm; // ...emit literal load @@ -660,21 +647,14 @@ static inline void emith_pool_check(void) emith_pool_commit(1); } -static inline int emith_pool_index(int tcache_offs) +static inline void emith_pool_adjust(int tcache_offs, int move_offs) { u32 *ptr = (u32 *)tcache_ptr - tcache_offs; int i; for (i = literal_iindex-1; i >= 0 && literal_insn[i] >= ptr; i--) if (literal_insn[i] == ptr) - return i; - return -1; -} - -static inline void emith_pool_adjust(int pool_index, int move_offs) -{ - if (pool_index >= 0) - literal_insn[pool_index] += move_offs; + literal_insn[i] += move_offs; } #define EMITH_HINT_COND(cond) /**/ @@ -938,6 +918,7 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) emith_top_imm(cond, A_OP_TST, r, imm) #define emith_move_r_imm_s8_patchable(r, imm) do { \ + emith_flush(); /* pin insn at current tcache_ptr for patching */ \ if ((s8)(imm) < 0) \ EOP_MVN_IMM(r, 0, (u8)~(imm)); \ else \ From b718aa2d93145ffbdba7cd1b46a5cdb5d668c5ed Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 16 May 2020 21:16:27 +0200 Subject: [PATCH 0318/1110] add copyright stuff to substantially changed files --- pico/32x/draw.c | 1 + pico/draw.c | 1 + pico/draw_arm.S | 1 + pico/misc.c | 1 + platform/common/host_dasm.c | 4 ++++ platform/common/menu_pico.c | 3 ++- 6 files changed, 10 insertions(+), 1 deletion(-) diff --git a/pico/32x/draw.c b/pico/32x/draw.c index 4119f09d..45c27260 100644 --- a/pico/32x/draw.c +++ b/pico/32x/draw.c @@ -1,6 +1,7 @@ /* * PicoDrive * (C) notaz, 2009,2010 + * (C) kub, 2019 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. diff --git a/pico/draw.c b/pico/draw.c index 82bb3462..8bf73e79 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -2,6 +2,7 @@ * line renderer * (c) Copyright Dave, 2004 * (C) notaz, 2006-2010 + * (C) kub, 2019-2020 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. diff --git a/pico/draw_arm.S b/pico/draw_arm.S index 9b5a4e32..0579006c 100644 --- a/pico/draw_arm.S +++ b/pico/draw_arm.S @@ -1,6 +1,7 @@ /* * assembly optimized versions of most funtions from draw.c * (C) notaz, 2006-2010,2017 + * (C) kub, 2020 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. diff --git a/pico/misc.c b/pico/misc.c index 74d4d8a8..cf09688e 100644 --- a/pico/misc.c +++ b/pico/misc.c @@ -1,6 +1,7 @@ /* * rarely used EEPROM code * (C) notaz, 2006-2008 + * (C) kub, 2020 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. diff --git a/platform/common/host_dasm.c b/platform/common/host_dasm.c index 2084aa91..b3b504e8 100644 --- a/platform/common/host_dasm.c +++ b/platform/common/host_dasm.c @@ -1,3 +1,7 @@ +/* + * DRC host disassembler interface for MIPS/ARM32 for use without binutils + * (C) kub, 2018,2019 + */ #include #include #include diff --git a/platform/common/menu_pico.c b/platform/common/menu_pico.c index 882aef92..1d46e634 100644 --- a/platform/common/menu_pico.c +++ b/platform/common/menu_pico.c @@ -923,7 +923,8 @@ static void draw_frame_credits(void) } static const char credits[] = - "PicoDrive v" VERSION " (c) notaz, 2006-2013\n\n\n" + "PicoDrive v" VERSION "\n" + "(c) notaz, 2006-2013; irixxxx, 2018-2020\n\n" "Credits:\n" "fDave: initial code\n" #ifdef EMU_C68K From dfff48c24a8c5beb5fbc34a0f1440342ac61eab4 Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 16 May 2020 21:17:28 +0200 Subject: [PATCH 0319/1110] release 1.96 --- platform/common/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/platform/common/version.h b/platform/common/version.h index a8c3034b..cd811a66 100644 --- a/platform/common/version.h +++ b/platform/common/version.h @@ -1 +1 @@ -#define VERSION "1.95" +#define VERSION "1.96" From 6badfabe35d626a2191db893542a3613101123d0 Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 22 May 2020 23:14:52 +0200 Subject: [PATCH 0320/1110] vdp rendering, bugfix for overlapping high prio sprites --- pico/draw.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pico/draw.c b/pico/draw.c index 8bf73e79..668a1246 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -1011,12 +1011,12 @@ static void DrawSpritesHiAS(unsigned char *sprited, int sh) delta<<=4; // Delta of address if (entry+1 == cnt) width = p[entry+1]; // last sprite width limited? + while (sx <= 0 && width) width--, sx+=8, tile+=delta; // Offscreen mp = mb+(sx>>3); - for (m = *mp; width; width--, sx+=8, *mp++ = m, m >>= 8, tile+=delta) + for (m = *mp; width; width--, sx+=8, tile+=delta, *mp++ = m, m >>= 8) { unsigned int pack; - if(sx<=0) continue; if(sx>=328) break; // Offscreen pack = *(unsigned int *)(PicoMem.vram + (tile & 0x7fff)); @@ -1244,12 +1244,12 @@ static void DrawSpritesForced(unsigned char *sprited) delta<<=4; // Delta of address if (entry+1 == cnt) width = p[entry+1]; // last sprite width limited? + while (sx <= 0 && width) width--, sx+=8, tile+=delta; // Offscreen mp = mb+(sx>>3); - for (m = *mp; width; width--, sx+=8, *mp++ = m, m >>= 8, tile+=delta) + for (m = *mp; width; width--, sx+=8, tile+=delta, *mp++ = m, m >>= 8) { unsigned int pack; - if(sx<=0) continue; if(sx>=328) break; // Offscreen pack = *(unsigned int *)(PicoMem.vram + (tile & 0x7fff)); From dae0d04dbf64f58d9a26d853cf2bae1b3678eee4 Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 16 Jun 2020 18:43:45 +0200 Subject: [PATCH 0321/1110] sh2 drc, preparations for powerpc support --- cpu/drc/emit_ppc.c | 1797 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1797 insertions(+) create mode 100644 cpu/drc/emit_ppc.c diff --git a/cpu/drc/emit_ppc.c b/cpu/drc/emit_ppc.c new file mode 100644 index 00000000..fb2ca44b --- /dev/null +++ b/cpu/drc/emit_ppc.c @@ -0,0 +1,1797 @@ +/* + * Basic macros to emit PowerISA 2.03 64 bit instructions and some utils + * Copyright (C) 2020 kub + * + * This work is licensed under the terms of MAME license. + * See COPYING file in the top-level directory. + */ + +// WARNING: unfinished, neither thoroughly tested nor optimized. little endian only! + +// NB bit numbers are reversed in PPC (MSB is bit 0). The emith_* functions and +// macros must take this into account. + +// NB PPC was a 64 bit architecture from the onset, so basically all operations +// are operating on 64 bits. 32 bit arch was only added later on, and there are +// very few 32 bit operations (cmp*, shift/rotate, extract/insert, load/store). +// For most operations the upper bits don't spill into the lower word, for the +// others there is an appropriate 32 bit operation available. + +// NB PowerPC isn't a clean RISC design. Several insns use microcode, which is +// AFAIK notably slower than using some 2-3 non-microcode insns. So, using +// such insns should by avoided if possible. Listed in Cell handbook, App. A: +// - shift/rotate having the amount in a register +// - arithmetic/logical having the RC flag set (except cmp*) +// - load/store algebraic (l?a*), multiple (lmw/stmw), string (ls*/sts*) +// - mtcrf (and some more SPR related, not used here) +// moreover, misaligned load/store crossing a cacheline boundary are microcoded. +// Note also that load/store string isn't available in little endian mode. + +// NB flag handling in PPC differs grossly from the ARM/X86 model. There are 8 +// fields in the condition register, each having 4 condition bits. However, only +// the EQ bit is similar to the Z flag. The CA and OV bits in the XER register +// are similar to the C and V bits, but shifts don't use CA, and cmp* doesn't +// use CA and OV. +// Moreover, there's no easy possibility to get CA and OV for 32 bit arithmetic +// since all arithmetic/logical insns use 64 bit. +// For now, use the "no flags" code from the RISCV backend. + +#define HOST_REGS 32 + +// PPC64: params: r3-r10, return: r3, temp: r0,r11-r12, saved: r14-r31 +// reserved: r0(zero), r1(stack), r2(TOC), r13(TID) +#define RET_REG 3 +#define PARAM_REGS { 3, 4, 5, 6, 7, 8, 9, 10 } +#define PRESERVED_REGS { 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,31 } +#define TEMPORARY_REGS { 11, 12 } + +#define CONTEXT_REG 31 +#define STATIC_SH2_REGS { SHR_SR,30 , SHR_R(0),29 , SHR_R(1),28 } + +// if RA is 0 in non-update memory insns, ADDI/ADDIS, ISEL, it aliases with zero +#define Z0 0 // zero register +#define SP 1 // stack pointer +// SPR registers +#define XER -1 // exception register +#define LR -8 // link register +#define CTR -9 // counter register +// internally used by code emitter: +#define AT 0 // emitter temporary (can't be fully used anyway) +#define FNZ 14 // emulated processor flags: N (bit 31) ,Z (all bits) +#define FC 15 // emulated processor flags: C (bit 0), others 0 +#define FV 16 // emulated processor flags: Nt^Ns (bit 31). others x + + +// PPC conditions, BO0-BO4:BI2-BI4 since we only need CR0 +#define PPC_LT 0x60 +#define PPC_GE 0x20 +#define PPC_GT 0x61 +#define PPC_LE 0x21 +#define PPC_EQ 0x62 +#define PPC_NE 0x22 +#define PPC_AL 0xa0 + +// unified conditions; virtual, not corresponding to anything real on PPC +#define DCOND_EQ 0x0 +#define DCOND_NE 0x1 +#define DCOND_HS 0x2 +#define DCOND_LO 0x3 +#define DCOND_MI 0x4 +#define DCOND_PL 0x5 +#define DCOND_VS 0x6 +#define DCOND_VC 0x7 +#define DCOND_HI 0x8 +#define DCOND_LS 0x9 +#define DCOND_GE 0xa +#define DCOND_LT 0xb +#define DCOND_GT 0xc +#define DCOND_LE 0xd + +#define DCOND_CS DCOND_LO +#define DCOND_CC DCOND_HS + +// unified insn; use right-aligned bit offsets for the bitfields +#define PPC_INSN(op, b10, b15, b20, b31) \ + (((op)<<26)|((b10)<<21)|((b15)<<16)|((b20)<<11)|((b31)<<0)) + +#define _ 0 // marker for "field unused" +#define __(n) o##n // enum marker for "undefined" +#define _CB(v,l,s,d) ((((v)>>(s))&((1<<(l))-1))<<(d)) // copy l bits + +// NB everything privileged or unneeded at 1st sight is left out +// opcode field (encoded in OPCD, bits 0-5) +enum { OP__LMA=004, OP_MULLI=007, + OP_SUBFIC, __(11), OP_CMPLI, OP_CMPI, OP_ADDIC, OP_ADDICF, OP_ADDI, OP_ADDIS, + OP_BC, __(21), OP_B, OP__CR, OP_RLWIMI, OP_RLWINM, __(26), OP_RLWNM, + OP_ORI, OP_ORIS, OP_XORI, OP_XORIS, OP_ANDI, OP_ANDIS, OP__RLD, OP__EXT, + OP_LWZ, OP_LWZU, OP_LBZ, OP_LBZU, OP_STW, OP_STWU, OP_STB, OP_STBU, + OP_LHZ, OP_LHZU, OP_LHA, OP_LHAU, OP_STH, OP_STHU, OP_LMW, OP_STMW, + /*OP_LQ=070,*/ OP__LD=072, OP__ST=076 }; +// CR subops (encoded in bits 21-31) +enum { OPC_MCRF=0, OPC_BCLR=32, OPC_BCCTR=1056 }; +// RLD subops (encoded in XO bits 27-31) +enum { OPR_RLDICL=0, OPR_RLDICR=4, OPR_RLDIC=8, OPR_RLDIMI=12, OPR_RLDCL=16, OPR_RLDCR=18 }; +// EXT subops (encoded in XO bits 21-31) +enum { + // arith/logical + OPE_CMP=0, OPE_SUBFC=16, OPE_ADDC=20, OPE_AND=56, + OPE_CMPL=64, OPE_SUBF=80, OPE_ANDC=120, OPE_NEG=208, OPE_NOR=248, + OPE_SUBFE=272, OPE_ADDE=276, OPE_SUBFZE=400, OPE_ADDZE=404, OPE_SUBFME=464, OPE_ADDME=468, + OPE_ADD=532, OPE_EQV=568, OPE_XOR=632, OPE_ORC=824, OPE_OR=888, OPE_NAND=952, + // shift + OPE_SLW=48, OPE_SLD=54, OPE_SRW=1072, OPE_SRD=1078, OPE_SRAW=1584, OPE_SRAD=1588, OPE_SRAWI=1648, OPE_SRADI=1652, + // extend, bitcount + OPE_CNTLZW=52, OPE_CNTLZD=116, OPE_EXTSH=1844, OPE_EXTSB=1908, OPE_EXTSW=1972, + // mult/div + OPE_MULHDU=18, OPE_MULHWU=22, OPE_MULHD=146, OPE_MULHW=150, OPE_MULLD=466, OPE_MULLW=470, + OPE_DIVDU=914, OPE_DIVWU=918, OPE_DIVD=978, OPE_DIVW=982, + // load/store indexed + OPE_LDX=42, OPE_LDUX=106, OPE_STDX=298, OPE_STDUX=362, + OPE_LWZX=46, OPE_LWZUX=110, OPE_LWAX=682, OPE_LWAUX=746, OPE_STWX=302, OPE_STWUX=366, + OPE_LBZX=174, OPE_LBZUX=238, /* no LBAX/LBAUX... */ OPE_STBX=430, OPE_STBUX=494, + OPE_LHZX=558, OPE_LHZUX=622, OPE_LHAX=686, OPE_LHAUX=750, OPE_STHX=814, OPE_STHUX=878, + // SPR, CR related + OPE_ISEL=15, OPE_MFCR=38, OPE_MTCRF=288, OPE_MFSPR=678, OPE_MTSPR=934, OPE_MCRXR=1024, +}; +// LD subops (encoded in XO bits 30-31) +enum { OPL_LD, OPL_LDU, OPL_LWA }; +// ST subops (encoded in XO bits 30-31) +enum { OPS_STD, OPS_STDU /*,OPS_STQ*/ }; + +// X*,M*-forms insns often have overflow detect in b21 and CR0 update in b31 +#define XOE (1<<10) // (31-21) +#define XRC (1<<0) // (31-31) +#define XF (XOE|XRC) +// MB and ME in M*-forms rotate left +#define MM(b,e) (((b)<<6)|((e)<<1)) +#define MD(b,s) (_CB(b,5,0,6)|_CB(b,1,5,5)|_CB(s,5,0,11)|_CB(s,1,5,1)) +// AA and LK in I,B-forms branches +#define BAA (1<<1) +#define BLK (1<<0) + +#define PPC_NOP \ + PPC_INSN(OP_ORI, 0, 0, _, 0) // ori r0, r0, 0 + +// arithmetic/logical + +#define PPC_OP_REG(op, xop, rt, ra, rb) /* X*,M*-form */ \ + PPC_INSN((unsigned)op, rt, ra, rb, xop) +#define PPC_OP_IMM(op, rt, ra, imm) /* D,B,I-form */ \ + PPC_INSN((unsigned)op, rt, ra, _, imm) + +// rt = ra OP rb +#define PPC_ADD_REG(rt, ra, rb) \ + PPC_OP_REG(OP__EXT,OPE_ADD,rt,ra,rb) +#define PPC_ADDC_REG(rt, ra, rb) \ + PPC_OP_REG(OP__EXT,OPE_ADD|XOE,rt,ra,rb) +#define PPC_SUB_REG(rt, rb, ra) /* NB reversed args (rb-ra) */ \ + PPC_OP_REG(OP__EXT,OPE_SUBF,rt,ra,rb) +#define PPC_SUBC_REG(rt, rb, ra) \ + PPC_OP_REG(OP__EXT,OPE_SUBF|XOE,rt,ra,rb) +#define PPC_NEG_REG(rt, ra) \ + PPC_OP_REG(OP__EXT,OPE_NEG,rt,ra,_) +#define PPC_NEGC_REG(rt, ra) \ + PPC_OP_REG(OP__EXT,OPE_NEG|XOE,rt,ra,_) + +#define PPC_CMP_REG(ra, rb) \ + PPC_OP_REG(OP__EXT,OPE_CMP,1,ra,rb) +#define PPC_CMPL_REG(ra, rb) \ + PPC_OP_REG(OP__EXT,OPE_CMPL,1,ra,rb) + +#define PPC_CMPW_REG(ra, rb) \ + PPC_OP_REG(OP__EXT,OPE_CMP,0,ra,rb) +#define PPC_CMPLW_REG(ra, rb) \ + PPC_OP_REG(OP__EXT,OPE_CMPL,0,ra,rb) + +#define PPC_XOR_REG(ra, rt, rb) \ + PPC_OP_REG(OP__EXT,OPE_XOR,rt,ra,rb) +#define PPC_OR_REG(ra, rt, rb) \ + PPC_OP_REG(OP__EXT,OPE_OR,rt,ra,rb) +#define PPC_ORN_REG(ra, rt, rb) \ + PPC_OP_REG(OP__EXT,OPE_ORC,rt,ra,rb) +#define PPC_NOR_REG(ra, rt, rb) \ + PPC_OP_REG(OP__EXT,OPE_NOR,rt,ra,rb) +#define PPC_AND_REG(ra, rt, rb) \ + PPC_OP_REG(OP__EXT,OPE_AND,rt,ra,rb) +#define PPC_BIC_REG(ra, rt, rb) \ + PPC_OP_REG(OP__EXT,OPE_ANDC,rt,ra,rb) + +#define PPC_MOV_REG(rt, ra) \ + PPC_OR_REG(rt, ra, ra) +#define PPC_MVN_REG(rt, ra) \ + PPC_NOR_REG(rt, ra, ra) + +// rt = ra OP rb OP carry +#define PPC_ADC_REG(rt, ra, rb) \ + PPC_OP_REG(OP__EXT,OPE_ADDE,rt,ra,rb) +#define PPC_SBC_REG(rt, rb, ra) \ + PPC_OP_REG(OP__EXT,OPE_SUBFE,rt,ra,rb) +#define PPC_NGC_REG(rt, ra) \ + PPC_OP_REG(OP__EXT,OPE_SUBFZE,rt,ra,_) + +// rt = ra SHIFT rb +#define PPC_LSL_REG(ra, rt, rb) \ + PPC_OP_REG(OP__EXT,OPE_SLD,rt,ra,rb) +#define PPC_LSR_REG(ra, rt, rb) \ + PPC_OP_REG(OP__EXT,OPE_SRD,rt,ra,rb) +#define PPC_ASR_REG(ra, rt, rb) \ + PPC_OP_REG(OP__EXT,OPE_SRAD,rt,ra,rb) +#define PPC_ROL_REG(ra, rt, rb) \ + PPC_OP_REG(OP__RLD,OPR_RLDCL,rt,ra,rb,0) + +#define PPC_LSLW_REG(ra, rt, rb) \ + PPC_OP_REG(OP__EXT,OPE_SLW,rt,ra,rb) +#define PPC_LSRW_REG(ra, rt, rb) \ + PPC_OP_REG(OP__EXT,OPE_SRW,rt,ra,rb) +#define PPC_ASRW_REG(ra, rt, rb) \ + PPC_OP_REG(OP__EXT,OPE_SRAW,rt,ra,rb) +#define PPC_ROLW_REG(ra, rt, rb) \ + PPC_OP_REG(OP_RLWNM,MM(0,31),rt,ra,rb) + +// rt = ra OP (imm16 << (0|16)) +#define PPC_ADD_IMM(rt, ra, imm16) \ + PPC_OP_IMM(OP_ADDI, rt, ra, imm16) +#define PPC_ADDT_IMM(rt, ra, imm16) \ + PPC_OP_IMM(OP_ADDIS, rt, ra, imm16) + +#define PPC_XOR_IMM(ra, rt, imm16) \ + PPC_OP_IMM(OP_XORI, rt, ra, imm16) +#define PPC_XORT_IMM(ra, rt, imm16) \ + PPC_OP_IMM(OP_XORIS, rt, ra, imm16) +#define PPC_OR_IMM(ra, rt, imm16) \ + PPC_OP_IMM(OP_ORI, rt, ra, imm16) +#define PPC_ORT_IMM(ra, rt, imm16) \ + PPC_OP_IMM(OP_ORIS, rt, ra, imm16) + +#define PPC_ANDS_IMM(rt, ra, imm16) \ + PPC_OP_IMM(OP_ANDI, rt, ra, imm16) +#define PPC_ANDTS_IMM(rt, ra, imm16) \ + PPC_OP_IMM(OP_ANDIS, rt, ra, imm16) +#define PPC_CMP_IMM(ra, imm16) \ + PPC_OP_IMM(OP_CMPI, 1, ra, imm16) +#define PPC_CMPL_IMM(ra, imm16) \ + PPC_OP_IMM(OP_CMPLI, 1, ra, imm16) + +#define PPC_CMPW_IMM(ra, imm16) \ + PPC_OP_IMM(OP_CMPI, 0, ra, imm16) +#define PPC_CMPLW_IMM(ra, imm16) \ + PPC_OP_IMM(OP_CMPLI, 0, ra, imm16) + +#define PPC_TST_IMM(rt, imm16) \ + PPC_ANDS_IMM(Z0,ra,imm16) + +#define PPC_MOV_IMM(rt, ra, imm16) \ + PPC_ADD_IMM(rt,ra,imm16) +#define PPC_MOVT_IMM(rt, ra, imm16) \ + PPC_ADDT_IMM(rt,ra,imm16) + +// rt = EXTEND ra +#define PPC_EXTSW_REG(ra, rt) \ + PPC_OP_REG(OP__EXT,OPE_EXTSW,rt,ra,_) +#define PPC_EXTSH_REG(ra, rt) \ + PPC_OP_REG(OP__EXT,OPE_EXTSH,rt,ra,_) +#define PPC_EXTSB_REG(ra, rt) \ + PPC_OP_REG(OP__EXT,OPE_EXTSB,rt,ra,_) +#define PPC_EXTUW_REG(ra, rt) \ + PPC_OP_REG(OP__RLD,OPR_RLDICL|MD(32,0),rt,ra,_) +#define PPC_EXTUH_REG(ra, rt) \ + PPC_OP_REG(OP__RLD,OPR_RLDICL|MD(48,0),rt,ra,_) +#define PPC_EXTUB_REG(ra, rt) \ + PPC_OP_REG(OP__RLD,OPR_RLDICL|MD(56,0),rt,ra,_) + +// rt = ra SHIFT imm5/imm6 +#define PPC_LSL_IMM(ra, rt, bits) \ + PPC_OP_REG(OP__RLD,OPR_RLDICR|MD(63-(bits),bits),rt,ra,_) +#define PPC_LSR_IMM(ra, rt, bits) \ + PPC_OP_REG(OP__RLD,OPR_RLDICL|MD(bits,64-(bits)),rt,ra,_) +#define PPC_ASR_IMM(ra, rt, bits) \ + PPC_OP_REG(OP__EXT,OPE_SRADI|MD(_,bits),rt,ra,_) +#define PPC_ROL_IMM(ra, rt, bits) \ + PPC_OP_REG(OP__RLD,OPR_RLDICL|MD(0,bits),rt,ra,_) + +#define PPC_LSLW_IMM(ra, rt, bits) \ + PPC_OP_REG(OP_RLWINM,MM(0,31-(bits)),rt,ra,bits) +#define PPC_LSRW_IMM(ra, rt, bits) \ + PPC_OP_REG(OP_RLWINM,MM(bits,31),rt,ra,32-(bits)) +#define PPC_ASRW_IMM(ra, rt, bits) \ + PPC_OP_REG(OP__EXT,OPE_SRAWI,rt,ra,bits) +#define PPC_ROLW_IMM(ra, rt, bits) \ + PPC_OP_REG(OP_RLWINM,MM(0,31),rt,ra,bits) + +// rt = EXTRACT/INSERT ra +#define PPC_BFX_IMM(ra, rt, lsb, bits) \ + PPC_OP_REG(OP__RLD,OPR_RLDICL|MD(64-(bits),63&(lsb+bits)),rt,ra,_) +#define PPC_BFXD_IMM(ra, rt, lsb, bits) /* extract to high bits, 64 bit */ \ + PPC_OP_REG(OP__RLD,OPR_RLDICR|MD(bits-1,lsb),rt,ra,_) +#define PPC_BFI_IMM(ra, rt, lsb, bits) \ + PPC_OP_REG(OP__RLD,OPR_RLDIMI|MD(lsb,64-(lsb+bits)),rt,ra,_) + +#define PPC_BFXW_IMM(ra, rt, lsb, bits) \ + PPC_OP_REG(OP_RLWINM,MM(32-(bits),31),rt,ra,31&(lsb+bits)) +#define PPC_BFXT_IMM(ra, rt, lsb, bits) /* extract to high bits, 32 bit */ \ + PPC_OP_REG(OP_RLWINM,MM(0,bits-1),rt,ra,lsb) +#define PPC_BFIW_IMM(ra, rt, lsb, bits) \ + PPC_OP_REG(OP_RLWIMI,MM(lsb,lsb+bits-1),rt,ra,32-(lsb+bits)) + +// multiplication; NB in 32 bit results the topmost 32 bits are undefined +#define PPC_MULL(rt, ra, rb) /* 64 bit */ \ + PPC_OP_REG(OP__EXT,OPE_MULLD,rt,ra,rb) +#define PPC_MUL(rt, ra, rb) /* low 32 bit */ \ + PPC_OP_REG(OP__EXT,OPE_MULLW,rt,ra,rb) +#define PPC_MULHS(rt, ra, rb) /* high 32 bit, signed */ \ + PPC_OP_REG(OP__EXT,OPE_MULHW,rt,ra,rb) +#define PPC_MULHU(rt, ra, rb) /* high 32 bit, unsigned */ \ + PPC_OP_REG(OP__EXT,OPE_MULHWU,rt,ra,rb) +// XXX use MAC* insns from the LMA group? + +// branching (only PC-relative) + +#define PPC_B(offs26) \ + PPC_OP_IMM(OP_B,_,_,(offs26)&~3) +#define PPC_BL(offs26) \ + PPC_OP_IMM(OP_B,_,_,((offs26)&~3)|BLK) +#define PPC_RET() \ + PPC_OP_REG(OP__CR,OPC_BCLR,PPC_AL>>3,_,_) +#define PPC_RETCOND(cond) \ + PPC_OP_REG(OP__CR,OPC_BCLR,(cond)>>3,(cond)&0x7,_) +#define PPC_BCTRCOND(cond) \ + PPC_OP_REG(OP__CR,OPC_BCCTR,(cond)>>3,(cond)&0x7,_) +#define PPC_BLCTRCOND(cond) \ + PPC_OP_REG(OP__CR,OPC_BCCTR|BLK,(cond)>>3,(cond)&0x7,_) +#define PPC_BCOND(cond, offs19) \ + PPC_OP_IMM(OP_BC,(cond)>>3,(cond)&0x7,(offs19)&~3) + +// load/store, offset + +#define PPC_LDX_IMM(rt, ra, offs16) \ + PPC_OP_IMM(OP__LD,rt,ra,((u16)(offs16)&~3)|OPL_LD) +#define PPC_LDW_IMM(rt, ra, offs16) \ + PPC_OP_IMM(OP_LWZ,rt,ra,(u16)(offs16)) +#define PPC_LDH_IMM(rt, ra, offs16) \ + PPC_OP_IMM(OP_LHZ,rt,ra,(u16)(offs16)) +#define PPC_LDB_IMM(rt, ra, offs16) \ + PPC_OP_IMM(OP_LBZ,rt,ra,(u16)(offs16)) + +#define PPC_LDSH_IMM(rt, ra, offs16) \ + PPC_OP_IMM(OP_LHA,rt,ra,(u16)(offs16)) + +#define PPC_STX_IMM(rt, ra, offs16) \ + PPC_OP_IMM(OP__ST,rt,ra,((u16)(offs16)&~3)|OPS_STD) +#define PPC_STW_IMM(rt, ra, offs16) \ + PPC_OP_IMM(OP_STW,rt,ra,(u16)(offs16)) +#define PPC_STH_IMM(rt, ra, offs16) \ + PPC_OP_IMM(OP_STH,rt,ra,(u16)(offs16)) +#define PPC_STB_IMM(rt, ra, offs16) \ + PPC_OP_IMM(OP_STB,rt,ra,(u16)(offs16)) + +// load/store, indexed + +#define PPC_LDX_REG(rt, ra, rb) \ + PPC_OP_REG(OP__EXT,OPE_LDX,rt,ra,rb) +#define PPC_LDW_REG(rt, ra, rb) \ + PPC_OP_REG(OP__EXT,OPE_LWZX,rt,ra,rb) +#define PPC_LDH_REG(rt, ra, rb) \ + PPC_OP_REG(OP__EXT,OPE_LHZX,rt,ra,rb) +#define PPC_LDB_REG(rt, ra, rb) \ + PPC_OP_REG(OP__EXT,OPE_LBZX,rt,ra,rb) + +#define PPC_LDSH_REG(rt, ra, rb) \ + PPC_OP_REG(OP__EXT,OPE_LHAX,rt,ra,rb) + +#define PPC_STX_REG(rt, ra, rb) \ + PPC_OP_REG(OP__EXT,OPE_STX,rt,ra,rb) +#define PPC_STW_REG(rt, ra, rb) \ + PPC_OP_REG(OP__EXT,OPE_STWX,rt,ra,rb) +#define PPC_STH_REG(rt, ra, rb) \ + PPC_OP_REG(OP__EXT,OPE_STHX,rt,ra,rb) +#define PPC_STB_REG(rt, ra, rb) \ + PPC_OP_REG(OP__EXT,OPE_STBX,rt,ra,rb) + +// special regs: LR, CTR, XER, CR + +#define PPC_MFSP_REG(rt, spr) \ + PPC_OP_REG(OP__EXT,OPE_MFSPR,rt,_,_CB(-(spr),5,0,5)|_CB(-(spr),5,5,0)) +#define PPC_MTSP_REG(rs, spr) \ + PPC_OP_REG(OP__EXT,OPE_MTSPR,rs,_,_CB(-(spr),5,0,5)|_CB(-(spr),5,5,0)) + +#define PPC_MFCR_REG(rt) \ + PPC_OP_REG(OP__EXT,OPE_MFCR,rt,_,_) +#define PPC_MTCRF_REG(rs, fm) \ + PPC_OP_REG(OP__EXT,OPE_MTCRF,rs,_,(fm)<<1) +#define PPC_MCRXR_REG(crt) \ + PPC_OP_REG(OP__EXT,OPE_MCRXR,(crt)<<2,_,_) +#define PPC_MCRCR_REG(crt, crf) \ + PPC_OP_REG(OP__CR,OPC_MCRF,(crt)<<2,(crf)<<1,_) + +#ifdef __powerpc64__ +#define PTR_SCALE 3 +#define PPC_LDP_IMM PPC_LDX_IMM +#define PPC_LDP_REG PPC_LDX_REG +#define PPC_STP_IMM PPC_STX_IMM +#define PPC_STP_REG PPC_STX_REG +#define PPC_BFXP_IMM PPC_BFX_IMM + +// "long" multiplication, 32x32 bit = 64 bit +#define EMIT_PPC_MULLU_REG(dlo, dhi, s1, s2) do { \ + EMIT(PPC_EXTUW_REG(s1, s1)); \ + EMIT(PPC_EXTUW_REG(s2, s2)); \ + EMIT(PPC_MULL(dlo, s1, s2)); \ + EMIT(PPC_ASR_IMM(dhi, dlo, 32)); \ +} while (0) + +#define EMIT_PPC_MULLS_REG(dlo, dhi, s1, s2) do { \ + EMIT(PPC_EXTSW_REG(s1, s1)); \ + EMIT(PPC_EXTSW_REG(s2, s2)); \ + EMIT(PPC_MULL(dlo, s1, s2)); \ + EMIT(PPC_ASR_IMM(dhi, dlo, 32)); \ +} while (0) + +#define EMIT_PPC_MACLS_REG(dlo, dhi, s1, s2) do { \ + EMIT(PPC_EXTSW_REG(s1, s1)); \ + EMIT(PPC_EXTSW_REG(s2, s2)); \ + EMIT(PPC_MULL(AT, s1, s2)); \ + EMIT(PPC_BFI_IMM(dlo, dhi, 0, 32)); \ + emith_add_r_r(dlo, AT); \ + EMIT(PPC_ASR_IMM(dhi, dlo, 32)); \ +} while (0) +#else +#define PTR_SCALE 2 +#define PPC_LDP_IMM PPC_LDW_IMM +#define PPC_LDP_REG PPC_LDW_REG +#define PPC_STP_IMM PPC_STW_IMM +#define PPC_STP_REG PPC_STW_REG +#define PPC_BFXP_IMM PPC_BFXW_IMM + +// "long" multiplication, 32x32 bit = 64 bit +#define EMIT_PPC_MULLU_REG(dlo, dhi, s1, s2) do { \ + int at = (dlo == s1 || dlo == s2 ? AT : dlo); \ + EMIT(PPC_MUL(at, s1, s2)); \ + EMIT(PPC_MULHU(dhi, s1, s2)); \ + if (at != dlo) emith_move_r_r(dlo, at); \ +} while (0) + +#define EMIT_PPC_MULLS_REG(dlo, dhi, s1, s2) do { \ + int at = (dlo == s1 || dlo == s2 ? AT : dlo); \ + EMIT(PPC_MUL(at, s1, s2)); \ + EMIT(PPC_MULHS(dhi, s1, s2)); \ + if (at != dlo) emith_move_r_r(dlo, at); \ +} while (0) + +#define EMIT_PPC_MACLS_REG(dlo, dhi, s1, s2) do { \ + int t_ = rcache_get_tmp(); \ + EMIT_PPC_MULLS_REG(t_, AT, s1, s2); \ + EMIT(PPC_ADDC_REG(dlo, dlo, t_)); \ + EMIT(PPC_ADC_REG(dhi, dhi, AT)); \ + rcache_free_tmp(t_); \ +} while (0) +#endif +#define PTR_SIZE (1<>1 since the lowest bit inverts the cond */ \ + unsigned _mv = BITMASK3(DCOND_VS>>1,DCOND_GE>>1,DCOND_GT>>1); \ + unsigned _mc = _mv | BITMASK2(DCOND_HS>>1,DCOND_HI>>1); \ + emith_flg_hint = (_mv & BITMASK1(cond >> 1) ? _FHV : 0); \ + emith_flg_hint |= (_mc & BITMASK1(cond >> 1) ? _FHC : 0); \ +} while (0) + +// store minimal cc information: rt, rb^ra, carry +// NB: the result *must* first go to FNZ, in case rt == ra or rt == rb. +// NB: for adcf and sbcf, carry-in must be dealt with separately (see there) +static void emith_set_arith_flags(int rt, int ra, int rb, s32 imm, int sub) +{ + if (emith_flg_hint & _FHC) { + if (sub) // C = sub:rb= 0) // Nt^Ns in FV, bit 31 + EMIT(PPC_XOR_REG(FV, ra, rb)); + else if (imm == 0) + emith_flg_noV = 1; // imm #0 can't overflow + else if ((imm < 0) == !sub) + EMIT(PPC_MVN_REG(FV, ra)); + else if ((imm > 0) == !sub) + EMIT(PPC_MOV_REG(FV, ra)); + } + // full V = Nd^Nt^Ns^C calculation is deferred until really needed + + if (rt && rt != FNZ) + EMIT(PPC_MOV_REG(rt, FNZ)); // N,Z via result value in FNZ + emith_cmp_ra = emith_cmp_rb = -1; +} + +// since R5 has less-than and compare-branch insns, handle cmp separately by +// storing the involved regs for later use in one of those R5 insns. +// This works for all conditions but VC/VS, but this is fortunately never used. +static void emith_set_compare_flags(int ra, int rb, s32 imm) +{ + emith_cmp_rb = rb; + emith_cmp_ra = ra; + emith_cmp_imm = imm; +} + + +// data processing, register + +#define emith_move_r_r_ptr(d, s) \ + EMIT(PPC_MOV_REG(d, s)) +#define emith_move_r_r_ptr_c(cond, d, s) \ + emith_move_r_r_ptr(d, s) + +#define emith_move_r_r(d, s) \ + emith_move_r_r_ptr(d, s) +#define emith_move_r_r_c(cond, d, s) \ + emith_move_r_r(d, s) + +#define emith_mvn_r_r(d, s) \ + EMIT(PPC_MVN_REG(d, s)) + +#define emith_add_r_r_r_lsl_ptr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(PPC_LSLW_IMM(AT, s2, simm)); \ + EMIT(PPC_ADD_REG(d, s1, AT)); \ + } else EMIT(PPC_ADD_REG(d, s1, s2)); \ +} while (0) +#define emith_add_r_r_r_lsl(d, s1, s2, simm) \ + emith_add_r_r_r_lsl_ptr(d, s1, s2, simm) + +#define emith_add_r_r_r_lsr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(PPC_LSRW_IMM(AT, s2, simm)); \ + EMIT(PPC_ADD_REG(d, s1, AT)); \ + } else EMIT(PPC_ADD_REG(d, s1, s2)); \ +} while (0) + +#define emith_addf_r_r_r_lsl_ptr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(PPC_LSLW_IMM(AT, s2, simm)); \ + EMIT(PPC_ADD_REG(FNZ, s1, AT)); \ + emith_set_arith_flags(d, s1, AT, 0, 0); \ + } else { \ + EMIT(PPC_ADD_REG(FNZ, s1, s2)); \ + emith_set_arith_flags(d, s1, s2, 0, 0); \ + } \ +} while (0) +#define emith_addf_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(PPC_LSLW_IMM(AT, s2, simm)); \ + EMIT(PPC_ADD_REG(FNZ, s1, AT)); \ + emith_set_arith_flags(d, s1, AT, 0, 0); \ + } else { \ + EMIT(PPC_ADD_REG(FNZ, s1, s2)); \ + emith_set_arith_flags(d, s1, s2, 0, 0); \ + } \ +} while (0) + +#define emith_addf_r_r_r_lsr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(PPC_LSRW_IMM(AT, s2, simm)); \ + EMIT(PPC_ADD_REG(FNZ, s1, AT)); \ + emith_set_arith_flags(d, s1, AT, 0, 0); \ + } else { \ + EMIT(PPC_ADD_REG(FNZ, s1, s2)); \ + emith_set_arith_flags(d, s1, s2, 0, 0); \ + } \ +} while (0) + +#define emith_sub_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(PPC_LSLW_IMM(AT, s2, simm)); \ + EMIT(PPC_SUB_REG(d, s1, AT)); \ + } else EMIT(PPC_SUB_REG(d, s1, s2)); \ +} while (0) + +#define emith_subf_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(PPC_LSLW_IMM(AT, s2, simm)); \ + EMIT(PPC_SUB_REG(FNZ, s1, AT)); \ + emith_set_arith_flags(d, s1, AT, 0, 1); \ + } else { \ + EMIT(PPC_SUB_REG(FNZ, s1, s2)); \ + emith_set_arith_flags(d, s1, s2, 0, 1); \ + } \ +} while (0) + +#define emith_or_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(PPC_LSLW_IMM(AT, s2, simm)); \ + EMIT(PPC_OR_REG(d, s1, AT)); \ + } else EMIT(PPC_OR_REG(d, s1, s2)); \ +} while (0) + +#define emith_or_r_r_r_lsr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(PPC_LSRW_IMM(AT, s2, simm)); \ + EMIT(PPC_OR_REG(d, s1, AT)); \ + } else EMIT(PPC_OR_REG(d, s1, s2)); \ +} while (0) + +#define emith_eor_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(PPC_LSLW_IMM(AT, s2, simm)); \ + EMIT(PPC_XOR_REG(d, s1, AT)); \ + } else EMIT(PPC_XOR_REG(d, s1, s2)); \ +} while (0) + +#define emith_eor_r_r_r_lsr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(PPC_LSRW_IMM(AT, s2, simm)); \ + EMIT(PPC_XOR_REG(d, s1, AT)); \ + } else EMIT(PPC_XOR_REG(d, s1, s2)); \ +} while (0) + +#define emith_and_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(PPC_LSLW_IMM(AT, s2, simm)); \ + EMIT(PPC_AND_REG(d, s1, AT)); \ + } else EMIT(PPC_AND_REG(d, s1, s2)); \ +} while (0) + +#define emith_or_r_r_lsl(d, s, lslimm) \ + emith_or_r_r_r_lsl(d, d, s, lslimm) +#define emith_or_r_r_lsr(d, s, lsrimm) \ + emith_or_r_r_r_lsr(d, d, s, lsrimm) + +#define emith_eor_r_r_lsl(d, s, lslimm) \ + emith_eor_r_r_r_lsl(d, d, s, lslimm) +#define emith_eor_r_r_lsr(d, s, lsrimm) \ + emith_eor_r_r_r_lsr(d, d, s, lsrimm) + +#define emith_add_r_r_r(d, s1, s2) \ + emith_add_r_r_r_lsl(d, s1, s2, 0) + +#define emith_addf_r_r_r_ptr(d, s1, s2) \ + emith_addf_r_r_r_lsl_ptr(d, s1, s2, 0) +#define emith_addf_r_r_r(d, s1, s2) \ + emith_addf_r_r_r_lsl(d, s1, s2, 0) + +#define emith_sub_r_r_r(d, s1, s2) \ + emith_sub_r_r_r_lsl(d, s1, s2, 0) + +#define emith_subf_r_r_r(d, s1, s2) \ + emith_subf_r_r_r_lsl(d, s1, s2, 0) + +#define emith_or_r_r_r(d, s1, s2) \ + emith_or_r_r_r_lsl(d, s1, s2, 0) + +#define emith_eor_r_r_r(d, s1, s2) \ + emith_eor_r_r_r_lsl(d, s1, s2, 0) + +#define emith_and_r_r_r(d, s1, s2) \ + emith_and_r_r_r_lsl(d, s1, s2, 0) + +#define emith_add_r_r_ptr(d, s) \ + emith_add_r_r_r_lsl_ptr(d, d, s, 0) +#define emith_add_r_r(d, s) \ + emith_add_r_r_r(d, d, s) + +#define emith_sub_r_r(d, s) \ + emith_sub_r_r_r(d, d, s) + +#define emith_neg_r_r(d, s) \ + EMIT(PPC_NEG_REG(d, s)) + +#define emith_adc_r_r_r(d, s1, s2) do { \ + emith_add_r_r_r(AT, s2, FC); \ + emith_add_r_r_r(d, s1, AT); \ +} while (0) + +#define emith_sbc_r_r_r(d, s1, s2) do { \ + emith_add_r_r_r(AT, s2, FC); \ + emith_sub_r_r_r(d, s1, AT); \ +} while (0) + +#define emith_adc_r_r(d, s) \ + emith_adc_r_r_r(d, d, s) + +#define emith_negc_r_r(d, s) do { \ + emith_neg_r_r(d, s); \ + emith_sub_r_r(d, FC); \ +} while (0) + +// NB: the incoming carry Cin can cause Cout if s2+Cin=0 (or s1+Cin=0 FWIW) +// moreover, if s2+Cin=0 caused Cout, s1+s2+Cin=s1+0 can't cause another Cout +#define emith_adcf_r_r_r(d, s1, s2) do { \ + emith_add_r_r_r(FNZ, s2, FC); \ + EMIT_PPC_SLTWU_REG(AT, FNZ, FC); \ + emith_add_r_r_r(FNZ, s1, FNZ); \ + emith_set_arith_flags(d, s1, s2, 0, 0); \ + emith_or_r_r(FC, AT); \ +} while (0) + +#define emith_sbcf_r_r_r(d, s1, s2) do { \ + emith_add_r_r_r(FNZ, s2, FC); \ + EMIT_PPC_SLTWU_REG(AT, FNZ, FC); \ + emith_sub_r_r_r(FNZ, s1, FNZ); \ + emith_set_arith_flags(d, s1, s2, 0, 1); \ + emith_or_r_r(FC, AT); \ +} while (0) + +#define emith_and_r_r(d, s) \ + emith_and_r_r_r(d, d, s) +#define emith_and_r_r_c(cond, d, s) \ + emith_and_r_r(d, s) + +#define emith_or_r_r(d, s) \ + emith_or_r_r_r(d, d, s) + +#define emith_eor_r_r(d, s) \ + emith_eor_r_r_r(d, d, s) + +#define emith_tst_r_r_ptr(d, s) do { \ + if (d != s) { \ + emith_and_r_r_r(FNZ, d, s); \ + emith_cmp_ra = emith_cmp_rb = -1; \ + } else emith_cmp_ra = s, emith_cmp_rb = -1, emith_cmp_imm = 0; \ +} while (0) +#define emith_tst_r_r(d, s) \ + emith_tst_r_r_ptr(d, s) + +#define emith_teq_r_r(d, s) do { \ + emith_eor_r_r_r(FNZ, d, s); \ + emith_cmp_ra = emith_cmp_rb = -1; \ +} while (0) + +#define emith_cmp_r_r(d, s) \ + emith_set_compare_flags(d, s, 0) +// emith_subf_r_r_r(FNZ, d, s) + +#define emith_addf_r_r(d, s) \ + emith_addf_r_r_r(d, d, s) + +#define emith_subf_r_r(d, s) \ + emith_subf_r_r_r(d, d, s) + +#define emith_adcf_r_r(d, s) \ + emith_adcf_r_r_r(d, d, s) + +#define emith_sbcf_r_r(d, s) \ + emith_sbcf_r_r_r(d, d, s) + +#define emith_negcf_r_r(d, s) do { \ + emith_add_r_r_r(FNZ, s, FC); \ + EMIT_PPC_SLTWU_REG(AT, FNZ, FC); \ + emith_neg_r_r(FNZ, FNZ); \ + emith_set_arith_flags(d, Z0, s, 0, 1); \ + emith_or_r_r(FC, AT); \ +} while (0) + +// move immediate + +static void emith_move_imm(int r, int ptr, uintptr_t imm) +{ +#ifdef __powerpc64__ + if ((u32)imm != imm && ptr) { + emith_move_imm(r, 0, imm >> 32); + if (imm >> 32) + EMIT(PPC_LSL_IMM(r, r, 32)); + if (imm & 0x0000ffff) + EMIT(PPC_OR_IMM(r, r, imm & 0x0000ffff)); + if (imm & 0xffff0000) + EMIT(PPC_ORT_IMM(r, r, (imm & 0xffff0000) >> 16)); + } else +#endif + { + int s = Z0, d = 0, c = 0; + if ((u16)imm) { + EMIT(PPC_ADD_IMM(r, s, (u16)imm)); + s = r, d = 1, c = (s16)imm < 0; + } + // adjust for sign extension in ADDI + if (!d) // low part == 0 + EMIT(PPC_ADDT_IMM(r, s, (u16)(imm>>16))); + else if (c && (u16)(~imm>>16)) // low part < 0 + EMIT(PPC_XORT_IMM(r, s, (u16)(~imm>>16))); + else if (!c && (u16)(imm>>16)) // low part > 0 + EMIT(PPC_ORT_IMM(r, s, (u16)(imm>>16))); + // make sure to clear upper half if this is a ptr + if (ptr && !(imm >> 32) && c) + EMIT(PPC_EXTUW_REG(r, r)); + } +} + +#define emith_move_r_ptr_imm(r, imm) \ + emith_move_imm(r, 1, (uintptr_t)(imm)) + +#define emith_move_r_imm(r, imm) \ + emith_move_imm(r, 0, (u32)(imm)) +#define emith_move_r_imm_c(cond, r, imm) \ + emith_move_r_imm(r, imm) + +#define emith_move_r_imm_s8_patchable(r, imm) \ + EMIT(PPC_ADD_IMM(r, Z0, (s8)(imm))) +#define emith_move_r_imm_s8_patch(ptr, imm) do { \ + u32 *ptr_ = (u32 *)ptr; \ + EMIT_PTR(ptr_, (*ptr_ & 0xffff0000) | (u16)(s8)(imm)); \ +} while (0) + +// arithmetic, immediate - can only be ADDI, since SUBI doesn't exist + +static void emith_add_imm(int rt, int ra, u32 imm) +{ + int s = ra; + if ((u16)imm) { + EMIT(PPC_ADD_IMM(rt, s, (u16)imm)); + s = rt; + } + // adjust for sign extension in ADDI + imm = (imm >> 16) + ((s16)imm < 0); + if ((u16)imm || rt != s) + EMIT(PPC_ADDT_IMM(rt, s, (u16)imm)); +} + +#define emith_add_r_imm(r, imm) \ + emith_add_r_r_imm(r, r, imm) +#define emith_add_r_imm_c(cond, r, imm) \ + emith_add_r_imm(r, imm) + +#define emith_addf_r_imm(r, imm) \ + emith_addf_r_r_imm(r, imm) + +#define emith_sub_r_imm(r, imm) \ + emith_sub_r_r_imm(r, r, imm) +#define emith_sub_r_imm_c(cond, r, imm) \ + emith_sub_r_imm(r, imm) + +#define emith_subf_r_imm(r, imm) \ + emith_subf_r_r_imm(r, r, imm) + +#define emith_adc_r_imm(r, imm) \ + emith_adc_r_r_imm(r, r, imm) + +#define emith_adcf_r_imm(r, imm) \ + emith_adcf_r_r_imm(r, r, imm) + +#define emith_cmp_r_imm(r, imm) \ + emith_set_compare_flags(r, -1, imm) +// emith_subf_r_r_imm(FNZ, r, (s16)imm) + +#define emith_add_r_r_ptr_imm(d, s, imm) \ + emith_add_imm(d, s, imm) + +#define emith_add_r_r_imm(d, s, imm) \ + emith_add_r_r_ptr_imm(d, s, imm) + +#define emith_addf_r_r_imm(d, s, imm) do { \ + emith_add_r_r_imm(FNZ, s, imm); \ + emith_set_arith_flags(d, s, -1, imm, 0); \ +} while (0) + +#define emith_adc_r_r_imm(d, s, imm) do { \ + emith_add_r_r_r(AT, s, FC); \ + emith_add_r_r_imm(d, AT, imm); \ +} while (0) + + +#define emith_adcf_r_r_imm(d, s, imm) do { \ + if (imm == 0) { \ + emith_add_r_r_r(FNZ, s, FC); \ + emith_set_arith_flags(d, s, -1, 1, 0); \ + } else { \ + emith_add_r_r_r(FNZ, s, FC); \ + EMIT_PPC_SLTWU_REG(AT, FNZ, FC); \ + emith_add_r_r_imm(FNZ, FNZ, imm); \ + emith_set_arith_flags(d, s, -1, imm, 0); \ + emith_or_r_r(FC, AT); \ + } \ +} while (0) + +// NB: no SUBI, since ADDI takes a signed imm +#define emith_sub_r_r_imm(d, s, imm) \ + emith_add_r_r_imm(d, s, -(imm)) +#define emith_sub_r_r_imm_c(cond, d, s, imm) \ + emith_sub_r_r_imm(d, s, imm) + +#define emith_subf_r_r_imm(d, s, imm) do { \ + emith_sub_r_r_imm(FNZ, s, imm); \ + emith_set_arith_flags(d, s, -1, imm, 1); \ +} while (0) + +// logical, immediate + +#define emith_log_imm2(opi, opr, rt, ra, imm) do { \ + if ((imm) >> 16 || opi == OP_ANDI) { /* too big, or microcoded ANDI */ \ + emith_move_r_imm(AT, imm); \ + EMIT(PPC_OP_REG(OP__EXT, opr, ra, rt, AT)); \ + } else if (/*opi == OP_ANDI ||*/ imm || rt != ra) \ + EMIT(PPC_OP_IMM(opi, ra, rt, imm)); \ +} while (0) +#define emith_log_imm(op, rt, ra, imm) \ + emith_log_imm2(OP_##op##I, OPE_##op, rt, ra, imm) + +#define emith_and_r_imm(r, imm) \ + emith_log_imm(AND, r, r, imm) + +#define emith_or_r_imm(r, imm) \ + emith_log_imm(OR, r, r, imm) +#define emith_or_r_imm_c(cond, r, imm) \ + emith_or_r_imm(r, imm) + +#define emith_eor_r_imm_ptr(r, imm) \ + emith_log_imm(XOR, r, r, imm) +#define emith_eor_r_imm_ptr_c(cond, r, imm) \ + emith_eor_r_imm_ptr(r, imm) + +#define emith_eor_r_imm(r, imm) \ + emith_eor_r_imm_ptr(r, imm) +#define emith_eor_r_imm_c(cond, r, imm) \ + emith_eor_r_imm(r, imm) + +/* NB: BIC #imm not available; use AND #~imm instead */ +#define emith_bic_r_imm(r, imm) \ + emith_log_imm(AND, r, r, ~(imm)) +#define emith_bic_r_imm_c(cond, r, imm) \ + emith_bic_r_imm(r, imm) + +#define emith_tst_r_imm(r, imm) do { \ + emith_log_imm(AND, FNZ, r, imm); \ + emith_cmp_ra = emith_cmp_rb = -1; \ +} while (0) +#define emith_tst_r_imm_c(cond, r, imm) \ + emith_tst_r_imm(r, imm) + +#define emith_and_r_r_imm(d, s, imm) \ + emith_log_imm(AND, d, s, imm) + +#define emith_or_r_r_imm(d, s, imm) \ + emith_log_imm(OR, d, s, imm) + +#define emith_eor_r_r_imm(d, s, imm) \ + emith_log_imm(XOR, d, s, imm) + +// shift + +#define emith_lsl(d, s, cnt) \ + EMIT(PPC_LSLW_IMM(d, s, cnt)) + +#define emith_lsr(d, s, cnt) \ + EMIT(PPC_LSRW_IMM(d, s, cnt)) + +#define emith_asr(d, s, cnt) \ + EMIT(PPC_ASRW_IMM(d, s, cnt)) + +#define emith_ror(d, s, cnt) \ + EMIT(PPC_ROLW_IMM(d, s, 32-(cnt))) +#define emith_ror_c(cond, d, s, cnt) \ + emith_ror(d, s, cnt) + +#define emith_rol(d, s, cnt) \ + EMIT(PPC_ROLW_IMM(d, s, cnt)); \ + +#define emith_rorc(d) do { \ + emith_lsr(d, d, 1); \ + emith_lsl(AT, FC, 31); \ + emith_or_r_r(d, AT); \ +} while (0) + +#define emith_rolc(d) do { \ + emith_lsl(d, d, 1); \ + emith_or_r_r(d, FC); \ +} while (0) + +// NB: all flag setting shifts make V undefined +#define emith_lslf(d, s, cnt) do { \ + int _s = s; \ + if ((cnt) > 1) { \ + emith_lsl(d, s, cnt-1); \ + _s = d; \ + } \ + if ((cnt) > 0) { \ + emith_lsr(FC, _s, 31); \ + emith_lsl(d, _s, 1); \ + } \ + emith_move_r_r(FNZ, d); \ + emith_cmp_ra = emith_cmp_rb = -1; \ +} while (0) + +#define emith_lsrf(d, s, cnt) do { \ + int _s = s; \ + if ((cnt) > 1) { \ + emith_lsr(d, s, cnt-1); \ + _s = d; \ + } \ + if ((cnt) > 0) { \ + emith_and_r_r_imm(FC, _s, 1); \ + emith_lsr(d, _s, 1); \ + } \ + emith_move_r_r(FNZ, d); \ + emith_cmp_ra = emith_cmp_rb = -1; \ +} while (0) + +#define emith_asrf(d, s, cnt) do { \ + int _s = s; \ + if ((cnt) > 1) { \ + emith_asr(d, s, cnt-1); \ + _s = d; \ + } \ + if ((cnt) > 0) { \ + emith_and_r_r_imm(FC, _s, 1); \ + emith_asr(d, _s, 1); \ + } \ + emith_move_r_r(FNZ, d); \ + emith_cmp_ra = emith_cmp_rb = -1; \ +} while (0) + +#define emith_rolf(d, s, cnt) do { \ + emith_rol(d, s, cnt); \ + emith_and_r_r_imm(FC, d, 1); \ + emith_move_r_r(FNZ, d); \ + emith_cmp_ra = emith_cmp_rb = -1; \ +} while (0) + +#define emith_rorf(d, s, cnt) do { \ + emith_ror(d, s, cnt); \ + emith_lsr(FC, d, 31); \ + emith_move_r_r(FNZ, d); \ + emith_cmp_ra = emith_cmp_rb = -1; \ +} while (0) + +#define emith_rolcf(d) do { \ + emith_lsr(AT, d, 31); \ + emith_lsl(d, d, 1); \ + emith_or_r_r(d, FC); \ + emith_move_r_r(FC, AT); \ + emith_move_r_r(FNZ, d); \ + emith_cmp_ra = emith_cmp_rb = -1; \ +} while (0) + +#define emith_rorcf(d) do { \ + emith_and_r_r_imm(AT, d, 1); \ + emith_lsr(d, d, 1); \ + emith_lsl(FC, FC, 31); \ + emith_or_r_r(d, FC); \ + emith_move_r_r(FC, AT); \ + emith_move_r_r(FNZ, d); \ + emith_cmp_ra = emith_cmp_rb = -1; \ +} while (0) + +// signed/unsigned extend + +#define emith_clear_msb(d, s, count) /* bits to clear */ \ + EMIT(PPC_BFXW_IMM(d, s, count, 32-(count))) + +#define emith_clear_msb_c(cond, d, s, count) \ + emith_clear_msb(d, s, count) + +#define emith_sext(d, s, count) /* bits to keep */ do { \ + if (count == 8) \ + EMIT(PPC_EXTSB_REG(d, s)); \ + else if (count == 16) \ + EMIT(PPC_EXTSH_REG(d, s)); \ + else { \ + emith_lsl(d, s, 32-(count)); \ + emith_asr(d, d, 32-(count)); \ + } \ +} while (0) + +#define emith_uext_ptr(r) \ + EMIT(PPC_EXTUW_REG(r, r)) + +// multiply Rd = Rn*Rm (+ Ra) + +#define emith_mul(d, s1, s2) \ + EMIT(PPC_MUL(d, s1, s2)) + +#define emith_mul_u64(dlo, dhi, s1, s2) \ + EMIT_PPC_MULLU_REG(dlo, dhi, s1, s2) + +#define emith_mul_s64(dlo, dhi, s1, s2) \ + EMIT_PPC_MULLS_REG(dlo, dhi, s1, s2) + +#define emith_mula_s64(dlo, dhi, s1, s2) \ + EMIT_PPC_MACLS_REG(dlo, dhi, s1, s2) +#define emith_mula_s64_c(cond, dlo, dhi, s1, s2) \ + emith_mula_s64(dlo, dhi, s1, s2) + +// load/store. offs has 16 bits signed, which is currently sufficient +#define emith_read_r_r_offs_ptr(r, ra, offs) \ + EMIT(PPC_LDP_IMM(r, ra, offs)) +#define emith_read_r_r_offs_ptr_c(cond, r, ra, offs) \ + emith_read_r_r_offs_ptr(r, ra, offs) + +#define emith_read_r_r_offs(r, ra, offs) \ + EMIT(PPC_LDW_IMM(r, ra, offs)) +#define emith_read_r_r_offs_c(cond, r, ra, offs) \ + emith_read_r_r_offs(r, ra, offs) + +#define emith_read_r_r_r_ptr(r, ra, rm) \ + EMIT(PPC_LDP_REG(r, ra, rm)) + +#define emith_read_r_r_r(r, ra, rm) \ + EMIT(PPC_LDW_REG(r, ra, rm)) +#define emith_read_r_r_r_c(cond, r, ra, rm) \ + emith_read_r_r_r(r, ra, rm) + +#define emith_read8_r_r_offs(r, ra, offs) \ + EMIT(PPC_LDB_IMM(r, ra, offs)) +#define emith_read8_r_r_offs_c(cond, r, ra, offs) \ + emith_read8_r_r_offs(r, ra, offs) + +#define emith_read8_r_r_r(r, ra, rm) \ + EMIT(PPC_LDB_REG(r, ra, rm)) +#define emith_read8_r_r_r_c(cond, r, ra, rm) \ + emith_read8_r_r_r(r, ra, rm) + +#define emith_read16_r_r_offs(r, ra, offs) \ + EMIT(PPC_LDH_IMM(r, ra, offs)) +#define emith_read16_r_r_offs_c(cond, r, ra, offs) \ + emith_read16_r_r_offs(r, ra, offs) + +#define emith_read16_r_r_r(r, ra, rm) \ + EMIT(PPC_LDH_REG(r, ra, rm)) +#define emith_read16_r_r_r_c(cond, r, ra, rm) \ + emith_read16_r_r_r(r, ra, rm) + +#define emith_read8s_r_r_offs(r, ra, offs) do { \ + EMIT(PPC_LDB_IMM(r, ra, offs)); \ + EMIT(PPC_EXTSB_REG(r, r)); \ +} while (0) +#define emith_read8s_r_r_offs_c(cond, r, ra, offs) \ + emith_read8s_r_r_offs(r, ra, offs) + +#define emith_read8s_r_r_r(r, ra, rm) do { \ + EMIT(PPC_LDB_REG(r, ra, rm)); \ + EMIT(PPC_EXTSB_REG(r, r)); \ +} while (0) +#define emith_read8s_r_r_r_c(cond, r, ra, rm) \ + emith_read8s_r_r_r(r, ra, rm) + +#define emith_read16s_r_r_offs(r, ra, offs) \ + EMIT(PPC_LDSH_IMM(r, ra, offs)) +#define emith_read16s_r_r_offs_c(cond, r, ra, offs) \ + emith_read16s_r_r_offs(r, ra, offs) + +#define emith_read16s_r_r_r(r, ra, rm) \ + EMIT(PPC_LDSH_REG(r, ra, rm)) +#define emith_read16s_r_r_r_c(cond, r, ra, rm) \ + emith_read16s_r_r_r(r, ra, rm) + + +#define emith_write_r_r_offs_ptr(r, ra, offs) \ + EMIT(PPC_STP_IMM(r, ra, offs)) +#define emith_write_r_r_offs_ptr_c(cond, r, ra, offs) \ + emith_write_r_r_offs_ptr(r, ra, offs) + +#define emith_write_r_r_r_ptr(r, ra, rm) \ + EMIT(PPC_STP_REG(r, ra, rm)) +#define emith_write_r_r_r_ptr_c(cond, r, ra, rm) \ + emith_write_r_r_r_ptr(r, ra, rm) + +#define emith_write_r_r_offs(r, ra, offs) \ + EMIT(PPC_STW_IMM(r, ra, offs)) +#define emith_write_r_r_offs_c(cond, r, ra, offs) \ + emith_write_r_r_offs(r, ra, offs) + +#define emith_write_r_r_r(r, ra, rm) \ + EMIT(PPC_STW_REG(r, ra, rm)) +#define emith_write_r_r_r_c(cond, r, ra, rm) \ + emith_write_r_r_r(r, ra, rm) + +#define emith_ctx_read_ptr(r, offs) \ + emith_read_r_r_offs_ptr(r, CONTEXT_REG, offs) + +#define emith_ctx_read(r, offs) \ + emith_read_r_r_offs(r, CONTEXT_REG, offs) +#define emith_ctx_read_c(cond, r, offs) \ + emith_ctx_read(r, offs) + +#define emith_ctx_write_ptr(r, offs) \ + emith_write_r_r_offs_ptr(r, CONTEXT_REG, offs) + +#define emith_ctx_write(r, offs) \ + emith_write_r_r_offs(r, CONTEXT_REG, offs) + +#define emith_ctx_read_multiple(r, offs, cnt, tmpr) do { \ + int r_ = r, offs_ = offs, cnt_ = cnt; \ + for (; cnt_ > 0; r_++, offs_ += 4, cnt_--) \ + emith_ctx_read(r_, offs_); \ +} while (0) + +#define emith_ctx_write_multiple(r, offs, cnt, tmpr) do { \ + int r_ = r, offs_ = offs, cnt_ = cnt; \ + for (; cnt_ > 0; r_++, offs_ += 4, cnt_--) \ + emith_ctx_write(r_, offs_); \ +} while (0) + +// function call handling +#define emith_save_caller_regs(mask) do { \ + int _c, _z = PTR_SIZE; u32 _m = mask & 0x1ff8; /* r3-r12 */ \ + if (__builtin_parity(_m) == 1) _m |= 0x1; /* ABI align */ \ + int _s = count_bits(_m) * _z, _o = _s; \ + if (_s) emith_add_r_r_ptr_imm(SP, SP, -_s); \ + for (_c = HOST_REGS-1; _m && _c >= 0; _m &= ~(1 << _c), _c--) \ + if (_m & (1 << _c)) \ + { _o -= _z; if (_c) emith_write_r_r_offs_ptr(_c, SP, _o); } \ +} while (0) + +#define emith_restore_caller_regs(mask) do { \ + int _c, _z = PTR_SIZE; u32 _m = mask & 0x1ff8; \ + if (__builtin_parity(_m) == 1) _m |= 0x1; \ + int _s = count_bits(_m) * _z, _o = 0; \ + for (_c = 0; _m && _c < HOST_REGS; _m &= ~(1 << _c), _c++) \ + if (_m & (1 << _c)) \ + { if (_c) emith_read_r_r_offs_ptr(_c, SP, _o); _o += _z; } \ + if (_s) emith_add_r_r_ptr_imm(SP, SP, _s); \ +} while (0) + +#define host_arg2reg(rt, arg) \ + rt = (arg+3) + +#define emith_pass_arg_r(arg, reg) \ + emith_move_r_r(arg, reg) + +#define emith_pass_arg_imm(arg, imm) \ + emith_move_r_imm(arg, imm) + +// branching +#define emith_invert_branch(cond) /* inverted conditional branch */ \ + ((cond) ^ 0x40) + +// evaluate the emulated condition, returns a register/branch type pair +static int emith_cmpr_check(int rs, int rt, int cond, u32 *op) +{ + int b = -1; + + // condition check for comparing 2 registers + switch (cond) { + case DCOND_EQ: *op = PPC_CMPW_REG(rs, rt); b = PPC_EQ; break; + case DCOND_NE: *op = PPC_CMPW_REG(rs, rt); b = PPC_NE; break; + case DCOND_LO: *op = PPC_CMPLW_REG(rs, rt); b = PPC_LT; break; + case DCOND_HS: *op = PPC_CMPLW_REG(rs, rt); b = PPC_GE; break; + case DCOND_LS: *op = PPC_CMPLW_REG(rs, rt); b = PPC_LE; break; + case DCOND_HI: *op = PPC_CMPLW_REG(rs, rt); b = PPC_GT; break; + case DCOND_LT: *op = PPC_CMPW_REG(rs, rt); b = PPC_LT; break; + case DCOND_GE: *op = PPC_CMPW_REG(rs, rt); b = PPC_GE; break; + case DCOND_LE: *op = PPC_CMPW_REG(rs, rt); b = PPC_LE; break; + case DCOND_GT: *op = PPC_CMPW_REG(rs, rt); b = PPC_GT; break; + } + + return b; +} + +static int emith_cmpi_check(int rs, s32 imm, int cond, u32 *op) +{ + int b = -1; + + // condition check for comparing register with immediate + switch (cond) { + case DCOND_EQ: *op = PPC_CMPW_IMM(rs, (u16)imm), b = PPC_EQ; break; + case DCOND_NE: *op = PPC_CMPW_IMM(rs, (u16)imm), b = PPC_NE; break; + case DCOND_LO: *op = PPC_CMPLW_IMM(rs, (u16)imm), b = PPC_LT; break; + case DCOND_HS: *op = PPC_CMPLW_IMM(rs, (u16)imm), b = PPC_GE; break; + case DCOND_LS: *op = PPC_CMPLW_IMM(rs, (u16)imm), b = PPC_LE; break; + case DCOND_HI: *op = PPC_CMPLW_IMM(rs, (u16)imm), b = PPC_GT; break; + case DCOND_LT: *op = PPC_CMPW_IMM(rs, (u16)imm), b = PPC_LT; break; + case DCOND_GE: *op = PPC_CMPW_IMM(rs, (u16)imm), b = PPC_GE; break; + case DCOND_LE: *op = PPC_CMPW_IMM(rs, (u16)imm), b = PPC_LE; break; + case DCOND_GT: *op = PPC_CMPW_IMM(rs, (u16)imm), b = PPC_GT; break; + } + + return b; +} + +static int emith_cond_check(int cond) +{ + int b = -1; + u32 op = 0; + + if (emith_cmp_ra >= 0) { + if (emith_cmp_rb != -1) + b = emith_cmpr_check(emith_cmp_ra,emith_cmp_rb, cond,&op); + else b = emith_cmpi_check(emith_cmp_ra,emith_cmp_imm,cond,&op); + } + + // shortcut for V known to be 0 + if (b < 0 && emith_flg_noV) switch (cond) { + case DCOND_VS: /* no branch */ break; // never + case DCOND_VC: b = PPC_AL; break; // always + case DCOND_LT: op = PPC_CMPW_IMM(FNZ, 0); b = PPC_LT; break; // N + case DCOND_GE: op = PPC_CMPW_IMM(FNZ, 0); b = PPC_GE; break; // !N + case DCOND_LE: op = PPC_CMPW_IMM(FNZ, 0); b = PPC_LE; break; // N || Z + case DCOND_GT: op = PPC_CMPW_IMM(FNZ, 0); b = PPC_GT; break; // !N && !Z + } + + // the full monty if no shortcut + if (b < 0) switch (cond) { + // conditions using NZ + case DCOND_EQ: op = PPC_CMPW_IMM(FNZ, 0); b = PPC_EQ; break; // Z + case DCOND_NE: op = PPC_CMPW_IMM(FNZ, 0); b = PPC_NE; break; // !Z + case DCOND_MI: op = PPC_CMPW_IMM(FNZ, 0); b = PPC_LT; break; // N + case DCOND_PL: op = PPC_CMPW_IMM(FNZ, 0); b = PPC_GE; break; // !N + // conditions using C + case DCOND_LO: op = PPC_CMPW_IMM(FC , 0); b = PPC_NE; break; // C + case DCOND_HS: op = PPC_CMPW_IMM(FC , 0); b = PPC_EQ; break; // !C + // conditions using CZ + case DCOND_LS: // C || Z + case DCOND_HI: // !C && !Z + EMIT(PPC_ADD_IMM(AT, FC, -1)); // !C && !Z + EMIT(PPC_AND_REG(AT, FNZ, AT)); + op = PPC_CMPW_IMM(AT , 0); b = (cond == DCOND_HI ? PPC_NE : PPC_EQ); + break; + + // conditions using V + case DCOND_VS: // V + case DCOND_VC: // !V + EMIT(PPC_XOR_REG(AT, FV, FNZ)); // V = Nt^Ns^Nd^C + EMIT(PPC_LSRW_IMM(AT, AT, 31)); + EMIT(PPC_XOR_REG(AT, AT, FC)); + op = PPC_CMPW_IMM(AT , 0); b = (cond == DCOND_VS ? PPC_NE : PPC_EQ); + break; + // conditions using VNZ + case DCOND_LT: // N^V + case DCOND_GE: // !(N^V) + EMIT(PPC_LSRW_IMM(AT, FV, 31)); // Nd^V = Nt^Ns^C + EMIT(PPC_XOR_REG(AT, FC, AT)); + op = PPC_CMPW_IMM(AT , 0); b = (cond == DCOND_LT ? PPC_NE : PPC_EQ); + break; + case DCOND_LE: // (N^V) || Z + case DCOND_GT: // !(N^V) && !Z + EMIT(PPC_LSRW_IMM(AT, FV, 31)); // Nd^V = Nt^Ns^C + EMIT(PPC_XOR_REG(AT, FC, AT)); + EMIT(PPC_ADD_IMM(AT, AT, -1)); // !(Nd^V) && !Z + EMIT(PPC_AND_REG(AT, FNZ, AT)); + op = PPC_CMPW_IMM(AT , 0); b = (cond == DCOND_GT ? PPC_NE : PPC_EQ); + break; + } + + if (op) EMIT(op); + return b; +} + +#define emith_jump(target) do { \ + u32 disp_ = (u8 *)target - (u8 *)tcache_ptr; \ + EMIT(PPC_B((uintptr_t)disp_ & 0x03ffffff)); \ +} while (0) +#define emith_jump_patchable(target) \ + emith_jump(target) + +// NB: PPC conditional branches have only +/- 64KB range +#define emith_jump_cond(cond, target) do { \ + int mcond_ = emith_cond_check(cond); \ + u32 disp_ = (u8 *)target - (u8 *)tcache_ptr; \ + EMIT(PPC_BCOND(mcond_,disp_ & 0x0000ffff)); \ +} while (0) +#define emith_jump_cond_patchable(cond, target) \ + emith_jump_cond(cond, target) + +#define emith_jump_cond_inrange(target) \ + ((u8 *)target - (u8 *)tcache_ptr < 0x8000 && \ + (u8 *)target - (u8 *)tcache_ptr >= -0x8000+0x10) //mind cond_check + +// NB: returns position of patch for cache maintenance +#define emith_jump_patch(ptr, target, pos) do { \ + u32 *ptr_ = (u32 *)ptr; /* must skip condition check code */ \ + u32 disp_, mask_; \ + while (*ptr_>>26 != OP_BC && *ptr_>>26 != OP_B) ptr_ ++; \ + disp_ = (u8 *)target - (u8 *)ptr_; \ + mask_ = (*ptr_>>26 == OP_BC ? 0xffff0003 : 0xfc000003); \ + EMIT_PTR(ptr_, (*ptr_ & mask_) | (disp_ & ~mask_)); \ + if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)(ptr_-1); \ +} while (0) + +#define emith_jump_patch_inrange(ptr, target) \ + ((u8 *)target - (u8 *)ptr < 0x8000 && \ + (u8 *)target - (u8 *)ptr >= -0x8000+0x10) // mind cond_check +#define emith_jump_patch_size() 4 + +#define emith_jump_at(ptr, target) do { \ + u32 disp_ = (u8 *)target - (u8 *)ptr; \ + u32 *ptr_ = (u32 *)ptr; \ + EMIT_PTR(ptr_, PPC_B((uintptr_t)disp_ & 0x03ffffff)); \ +} while (0) +#define emith_jump_at_size() 4 + +#define emith_jump_reg(r) do { \ + EMIT(PPC_MTSP_REG(r, CTR)); \ + EMIT(PPC_BCTRCOND(PPC_AL)); \ +} while(0) +#define emith_jump_reg_c(cond, r) \ + emith_jump_reg(r) + +#define emith_jump_ctx(offs) do { \ + emith_ctx_read_ptr(AT, offs); \ + emith_jump_reg(AT); \ +} while (0) +#define emith_jump_ctx_c(cond, offs) \ + emith_jump_ctx(offs) + +#define emith_call(target) do { \ + u32 disp_ = (u8 *)target - (u8 *)tcache_ptr; \ + EMIT(PPC_BL((uintptr_t)disp_ & 0x03ffffff)); \ +} while(0) +#define emith_call_cond(cond, target) \ + emith_call(target) + +#define emith_call_reg(r) do { \ + EMIT(PPC_MTSP_REG(r, CTR)); \ + EMIT(PPC_BLCTRCOND(PPC_AL)); \ +} while(0) + +#define emith_call_ctx(offs) do { \ + emith_ctx_read_ptr(AT, offs); \ + emith_call_reg(AT); \ +} while (0) + +#define emith_call_cleanup() /**/ + +#define emith_ret() \ + EMIT(PPC_RET()) +#define emith_ret_c(cond) \ + emith_ret() + +#define emith_ret_to_ctx(offs) do { \ + EMIT(PPC_MFSP_REG(AT, LR)); \ + emith_ctx_write_ptr(AT, offs); \ +} while (0) + +#define emith_add_r_ret(r) do { \ + EMIT(PPC_MFSP_REG(AT, LR)); \ + emith_add_r_r_ptr(r, AT); \ +} while (0) + +// NB: ABI SP alignment is 16 in 64 bit mode +#define emith_push_ret(r) do { \ + int offs_ = 16 - 2*PTR_SIZE; \ + emith_add_r_r_ptr_imm(SP, SP, -16); \ + EMIT(PPC_MFSP_REG(AT, LR)); \ + emith_write_r_r_offs_ptr(AT, SP, offs_ + PTR_SIZE); \ + if ((r) > 0) emith_write_r_r_offs(r, SP, offs_); \ +} while (0) + +#define emith_pop_and_ret(r) do { \ + int offs_ = 16 - 2*PTR_SIZE; \ + if ((r) > 0) emith_read_r_r_offs(r, SP, offs_); \ + emith_read_r_r_offs_ptr(AT, SP, offs_ + PTR_SIZE); \ + EMIT(PPC_MTSP_REG(AT, LR)); \ + emith_add_r_r_ptr_imm(SP, SP, 16); \ + emith_ret(); \ +} while (0) + + +// emitter ABI stuff +#define emith_pool_check() /**/ +#define emith_pool_commit(j) /**/ +#define emith_insn_ptr() ((u8 *)tcache_ptr) +#define emith_flush() /**/ +#define host_instructions_updated(base, end) __builtin___clear_cache(base, end) +#define emith_update_cache() /**/ +#define emith_rw_offs_max() 0x7fff + +// SH2 drc specific +#define STACK_EXTRA (64+48) // Param, ABI (LR,CR,FP etc) save areas +#define emith_sh2_drc_entry() do { \ + int _c, _z = PTR_SIZE; u32 _m = 0xffffc000; /* r14-r30 */ \ + if (__builtin_parity(_m) == 1) _m |= 0x1; /* ABI align for SP is 16 */ \ + int _s = count_bits(_m) * _z, _o = 0; \ + for (_c = HOST_REGS-1; _m && _c >= 0; _m &= ~(1 << _c), _c--) \ + if (_m & (1 << _c)) \ + { _o -= _z; if (_c) emith_write_r_r_offs_ptr(_c, SP, _o); } \ + EMIT(PPC_MFSP_REG(10, LR)); \ + emith_write_r_r_offs_ptr(10, SP, 16); \ + emith_write_r_r_offs_ptr(SP, SP, -_s-STACK_EXTRA); /* XXX stdu */ \ + emith_add_r_r_ptr_imm(SP, SP, -_s-STACK_EXTRA); \ +} while (0) +#define emith_sh2_drc_exit() do { \ + int _c, _z = PTR_SIZE; u32 _m = 0xffffc000; \ + if (__builtin_parity(_m) == 1) _m |= 0x1; \ + int _s = count_bits(_m) * _z, _o = STACK_EXTRA; \ + for (_c = 0; _m && _c < HOST_REGS; _m &= ~(1 << _c), _c++) \ + if (_m & (1 << _c)) \ + { if (_c) emith_read_r_r_offs_ptr(_c, SP, _o); _o += _z; } \ + emith_add_r_r_ptr_imm(SP, SP, _s+STACK_EXTRA); \ + emith_read_r_r_offs_ptr(10, SP, 16); \ + EMIT(PPC_MTSP_REG(10, LR)); \ + emith_ret(); \ +} while (0) + +// NB: assumes a is in arg0, tab, func and mask are temp +#define emith_sh2_rcall(a, tab, func, mask) do { \ + emith_lsr(mask, a, SH2_READ_SHIFT); \ + emith_add_r_r_r_lsl_ptr(tab, tab, mask, PTR_SCALE+1); \ + emith_read_r_r_offs_ptr(func, tab, 0); \ + emith_read_r_r_offs(mask, tab, PTR_SIZE); \ + EMIT(PPC_BFXP_IMM(FC, func, 0, 1)); \ + emith_add_r_r_ptr(func, func); \ + emith_cmp_ra = emith_cmp_rb = -1; \ +} while (0) + +// NB: assumes a, val are in arg0 and arg1, tab and func are temp +#define emith_sh2_wcall(a, val, tab, func) do { \ + emith_lsr(func, a, SH2_WRITE_SHIFT); \ + emith_lsl(func, func, PTR_SCALE); \ + emith_read_r_r_r_ptr(func, tab, func); \ + emith_move_r_r_ptr(5, CONTEXT_REG); /* arg2 */ \ + emith_jump_reg(func); \ +} while (0) + +#define emith_sh2_delay_loop(cycles, reg) do { \ + int sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); \ + int t1 = rcache_get_tmp(); \ + int t2 = rcache_get_tmp(); \ + int t3 = rcache_get_tmp(); \ + /* if (sr < 0) return */ \ + emith_cmp_r_imm(sr, 0); \ + EMITH_JMP_START(DCOND_LE); \ + /* turns = sr.cycles / cycles */ \ + emith_asr(t2, sr, 12); \ + emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)) + 1); \ + emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */ \ + rcache_free_tmp(t3); \ + if (reg >= 0) { \ + /* if (reg <= turns) turns = reg-1 */ \ + t3 = rcache_get_reg(reg, RC_GR_RMW, NULL); \ + emith_cmp_r_r(t3, t2); \ + EMITH_SJMP_START(DCOND_HI); \ + emith_sub_r_r_imm_c(DCOND_LS, t2, t3, 1); \ + EMITH_SJMP_END(DCOND_HI); \ + /* if (reg <= 1) turns = 0 */ \ + emith_cmp_r_imm(t3, 1); \ + EMITH_SJMP_START(DCOND_HI); \ + emith_move_r_imm_c(DCOND_LS, t2, 0); \ + EMITH_SJMP_END(DCOND_HI); \ + /* reg -= turns */ \ + emith_sub_r_r(t3, t2); \ + } \ + /* sr.cycles -= turns * cycles; */ \ + emith_move_r_imm(t1, cycles); \ + emith_mul(t1, t2, t1); \ + emith_sub_r_r_r_lsl(sr, sr, t1, 12); \ + EMITH_JMP_END(DCOND_LE); \ + rcache_free_tmp(t1); \ + rcache_free_tmp(t2); \ +} while (0) + +/* + * T = !carry(Rn = (Rn << 1) | T) + * if Q + * C = carry(Rn += Rm) + * else + * C = carry(Rn -= Rm) + * T ^= C + */ +#define emith_sh2_div1_step(rn, rm, sr) do { \ + int t_ = rcache_get_tmp(); \ + emith_and_r_r_imm(AT, sr, T); \ + emith_lsr(FC, rn, 31); /*Rn = (Rn<<1)+T*/ \ + emith_lsl(t_, rn, 1); \ + emith_or_r_r(t_, AT); \ + emith_or_r_imm(sr, T); /* T = !carry */ \ + emith_eor_r_r(sr, FC); \ + emith_tst_r_imm(sr, Q); /* if (Q ^ M) */ \ + EMITH_JMP3_START(DCOND_EQ); \ + emith_add_r_r_r(rn, t_, rm); \ + EMIT_PPC_SLTWU_REG(FC, rn, t_); \ + EMITH_JMP3_MID(DCOND_EQ); \ + emith_sub_r_r_r(rn, t_, rm); \ + EMIT_PPC_SLTWU_REG(FC, t_, rn); \ + EMITH_JMP3_END(); \ + emith_eor_r_r(sr, FC); /* T ^= carry */ \ + rcache_free_tmp(t_); \ +} while (0) + +/* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ +#define emith_sh2_macl(ml, mh, rn, rm, sr) do { \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* MACH top 16 bits unused if saturated. sign ext for overfl detect */ \ + emith_sext(mh, mh, 16); \ + EMITH_SJMP_END(DCOND_EQ); \ + emith_mula_s64(ml, mh, rn, rm); \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* overflow if top 17 bits of MACH aren't all 1 or 0 */ \ + /* to check: add MACH >> 31 to MACH >> 15. this is 0 if no overflow */ \ + emith_asr(rn, mh, 15); \ + emith_add_r_r_r_lsr(rn, rn, mh, 31); /* sum = (MACH>>31)+(MACH>>15) */ \ + emith_tst_r_r(rn, rn); /* (need only N and Z flags) */ \ + EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> ov */ \ + emith_move_r_imm_c(DCOND_NE, ml, 0x0000); /* -overflow */ \ + emith_move_r_imm_c(DCOND_NE, mh, 0x8000); \ + EMITH_SJMP_START(DCOND_PL); /* sum > 0 -> +ovl */ \ + emith_sub_r_imm_c(DCOND_MI, ml, 1); /* 0xffffffff */ \ + emith_sub_r_imm_c(DCOND_MI, mh, 1); /* 0x00007fff */ \ + EMITH_SJMP_END(DCOND_PL); \ + EMITH_SJMP_END(DCOND_EQ); \ + EMITH_SJMP_END(DCOND_EQ); \ +} while (0) + +/* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ +#define emith_sh2_macw(ml, mh, rn, rm, sr) do { \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* XXX: MACH should be untouched when S is set? */ \ + emith_asr(mh, ml, 31); /* sign ext MACL to MACH for ovrfl check */ \ + EMITH_SJMP_END(DCOND_EQ); \ + emith_mula_s64(ml, mh, rn, rm); \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* overflow if top 33 bits of MACH:MACL aren't all 1 or 0 */ \ + /* to check: add MACL[31] to MACH. this is 0 if no overflow */ \ + emith_lsr(rn, ml, 31); \ + emith_add_r_r(rn, mh); /* sum = MACH + ((MACL>>31)&1) */ \ + emith_tst_r_r(rn, rn); /* (need only N and Z flags) */ \ + EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \ + /* XXX: LSB signalling only in SH1, or in SH2 too? */ \ + emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \ + emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* negative ovrfl */ \ + EMITH_SJMP_START(DCOND_PL); /* sum > 0 -> positive ovrfl */ \ + emith_sub_r_imm_c(DCOND_MI, ml, 1); /* 0x7fffffff */ \ + EMITH_SJMP_END(DCOND_PL); \ + EMITH_SJMP_END(DCOND_EQ); \ + EMITH_SJMP_END(DCOND_EQ); \ +} while (0) + +#define emith_write_sr(sr, srcr) \ + EMIT(PPC_BFIW_IMM(sr, srcr, 22, 10)) + +#define emith_carry_to_t(sr, is_sub) \ + EMIT(PPC_BFIW_IMM(sr, FC, 31, 1)) + +#define emith_t_to_carry(sr, is_sub) \ + emith_and_r_r_imm(FC, sr, 1) + +#define emith_tpop_carry(sr, is_sub) do { \ + emith_and_r_r_imm(FC, sr, 1); \ + emith_eor_r_r(sr, FC); \ +} while (0) + +#define emith_tpush_carry(sr, is_sub) \ + emith_or_r_r(sr, FC) + +#ifdef T +#define emith_invert_cond(cond) \ + ((cond) ^ 1) + +// T bit handling +static void emith_clr_t_cond(int sr) +{ + emith_bic_r_imm(sr, T); +} + +static void emith_set_t_cond(int sr, int cond) +{ + int b; + u8 *ptr; + u32 val = 0; + + // XXX optimization + b = emith_invert_branch(emith_cond_check(cond)); + ptr = tcache_ptr; + EMIT(PPC_BCOND(b, 0)); + emith_or_r_imm(sr, T); + val = (u8 *)tcache_ptr - (u8 *)(ptr); + EMIT_PTR(ptr, PPC_BCOND(b, val & 0x00001fff)); +} + +#define emith_get_t_cond() -1 + +#define emith_sync_t(sr) ((void)sr) + +#define emith_invalidate_t() + +static void emith_set_t(int sr, int val) +{ + if (val) + emith_or_r_imm(sr, T); + else + emith_bic_r_imm(sr, T); +} + +static int emith_tst_t(int sr, int tf) +{ + emith_tst_r_imm(sr, T); + return tf ? DCOND_NE: DCOND_EQ; +} +#endif From 8bb489470a3daf6841500cd4be8044ea6611393e Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 19 Jun 2020 00:14:28 +0200 Subject: [PATCH 0322/1110] sh2 drc, add powerpc64le backend --- Makefile | 6 +- cpu/drc/emit_arm.c | 2 + cpu/drc/emit_arm64.c | 1 + cpu/drc/emit_mips.c | 1 + cpu/drc/emit_ppc.c | 230 +++++++++++++++++++------------------------ cpu/drc/emit_riscv.c | 1 + cpu/drc/emit_x86.c | 2 + cpu/sh2/compiler.c | 9 +- cpu/sh2/compiler.h | 3 + platform/linux/emu.c | 2 +- 10 files changed, 127 insertions(+), 130 deletions(-) diff --git a/Makefile b/Makefile index 053e1606..c23841d7 100644 --- a/Makefile +++ b/Makefile @@ -73,6 +73,10 @@ else ifneq (,$(findstring riscv,$(ARCH))) use_fame ?= 1 use_cz80 ?= 1 use_sh2drc ?= 1 +else ifneq (,$(findstring powerpc,$(ARCH))) +use_fame ?= 1 +use_cz80 ?= 1 +use_sh2drc ?= 1 endif -include Makefile.local @@ -270,7 +274,7 @@ pico/carthw_cfg.c: pico/carthw.cfg # random deps pico/carthw/svp/compiler.o : cpu/drc/emit_arm.c -cpu/sh2/compiler.o : cpu/drc/emit_arm.c cpu/drc/emit_arm64.c +cpu/sh2/compiler.o : cpu/drc/emit_arm.c cpu/drc/emit_arm64.c cpu/drc/emit_ppc.c cpu/sh2/compiler.o : cpu/drc/emit_x86.c cpu/drc/emit_mips.c cpu/drc/emit_riscv.c cpu/sh2/mame/sh2pico.o : cpu/sh2/mame/sh2.c pico/pico.o pico/cd/mcd.o pico/32x/32x.o : pico/pico_cmn.c pico/pico_int.h diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index e27054a3..3f373435 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -1138,6 +1138,8 @@ static inline void emith_pool_adjust(int tcache_offs, int move_offs) EOP_MOV_REG_ASR(d,d,32 - (bits)); \ } while (0) +#define emith_uext_ptr(r) /**/ + #define emith_do_caller_regs(mask, func) do { \ u32 _reg_mask = (mask) & 0x500f; \ if (_reg_mask) { \ diff --git a/cpu/drc/emit_arm64.c b/cpu/drc/emit_arm64.c index f4645bc1..ae7077a0 100644 --- a/cpu/drc/emit_arm64.c +++ b/cpu/drc/emit_arm64.c @@ -1176,6 +1176,7 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) #define host_instructions_updated(base, end) __builtin___clear_cache(base, end) #define emith_update_cache() /**/ #define emith_rw_offs_max() 0xff +#define emith_uext_ptr(r) /**/ // SH2 drc specific diff --git a/cpu/drc/emit_mips.c b/cpu/drc/emit_mips.c index 8cb094de..8eddd219 100644 --- a/cpu/drc/emit_mips.c +++ b/cpu/drc/emit_mips.c @@ -1563,6 +1563,7 @@ static int emith_cond_check(int cond, int *r) #define host_instructions_updated(base, end) __builtin___clear_cache(base, end) #define emith_update_cache() /**/ #define emith_rw_offs_max() 0x7fff +#define emith_uext_ptr(r) /**/ // SH2 drc specific #define emith_sh2_drc_entry() do { \ diff --git a/cpu/drc/emit_ppc.c b/cpu/drc/emit_ppc.c index fb2ca44b..286d4166 100644 --- a/cpu/drc/emit_ppc.c +++ b/cpu/drc/emit_ppc.c @@ -6,8 +6,6 @@ * See COPYING file in the top-level directory. */ -// WARNING: unfinished, neither thoroughly tested nor optimized. little endian only! - // NB bit numbers are reversed in PPC (MSB is bit 0). The emith_* functions and // macros must take this into account. @@ -34,7 +32,7 @@ // use CA and OV. // Moreover, there's no easy possibility to get CA and OV for 32 bit arithmetic // since all arithmetic/logical insns use 64 bit. -// For now, use the "no flags" code from the RISCV backend. +// For now, use the "no flags" code from the RISC-V backend. #define HOST_REGS 32 @@ -42,7 +40,7 @@ // reserved: r0(zero), r1(stack), r2(TOC), r13(TID) #define RET_REG 3 #define PARAM_REGS { 3, 4, 5, 6, 7, 8, 9, 10 } -#define PRESERVED_REGS { 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,31 } +#define PRESERVED_REGS { 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 } #define TEMPORARY_REGS { 11, 12 } #define CONTEXT_REG 31 @@ -50,27 +48,18 @@ // if RA is 0 in non-update memory insns, ADDI/ADDIS, ISEL, it aliases with zero #define Z0 0 // zero register -#define SP 1 // stack pointer +#define SP 1 // stack pointer // SPR registers -#define XER -1 // exception register -#define LR -8 // link register -#define CTR -9 // counter register +#define XER -1 // exception register +#define LR -8 // link register +#define CTR -9 // counter register // internally used by code emitter: -#define AT 0 // emitter temporary (can't be fully used anyway) +#define AT 0 // emitter temporary (can't be fully used anyway) #define FNZ 14 // emulated processor flags: N (bit 31) ,Z (all bits) #define FC 15 // emulated processor flags: C (bit 0), others 0 #define FV 16 // emulated processor flags: Nt^Ns (bit 31). others x -// PPC conditions, BO0-BO4:BI2-BI4 since we only need CR0 -#define PPC_LT 0x60 -#define PPC_GE 0x20 -#define PPC_GT 0x61 -#define PPC_LE 0x21 -#define PPC_EQ 0x62 -#define PPC_NE 0x22 -#define PPC_AL 0xa0 - // unified conditions; virtual, not corresponding to anything real on PPC #define DCOND_EQ 0x0 #define DCOND_NE 0x1 @@ -94,8 +83,8 @@ #define PPC_INSN(op, b10, b15, b20, b31) \ (((op)<<26)|((b10)<<21)|((b15)<<16)|((b20)<<11)|((b31)<<0)) -#define _ 0 // marker for "field unused" -#define __(n) o##n // enum marker for "undefined" +#define _ 0 // marker for "field unused" +#define __(n) o##n // enum marker for "undefined" #define _CB(v,l,s,d) ((((v)>>(s))&((1<<(l))-1))<<(d)) // copy l bits // NB everything privileged or unneeded at 1st sight is left out @@ -148,8 +137,16 @@ enum { OPS_STD, OPS_STDU /*,OPS_STQ*/ }; // AA and LK in I,B-forms branches #define BAA (1<<1) #define BLK (1<<0) +// BO and BI condition codes in B-form, BO0-BO4:BI2-BI4 since we only need CR0 +#define BLT 0x60 +#define BGE 0x20 +#define BGT 0x61 +#define BLE 0x21 +#define BEQ 0x62 +#define BNE 0x22 +#define BXX 0xa0 // unconditional, aka always -#define PPC_NOP \ +#define PPC_NOP \ PPC_INSN(OP_ORI, 0, 0, _, 0) // ori r0, r0, 0 // arithmetic/logical @@ -331,7 +328,7 @@ enum { OPS_STD, OPS_STDU /*,OPS_STQ*/ }; #define PPC_BL(offs26) \ PPC_OP_IMM(OP_B,_,_,((offs26)&~3)|BLK) #define PPC_RET() \ - PPC_OP_REG(OP__CR,OPC_BCLR,PPC_AL>>3,_,_) + PPC_OP_REG(OP__CR,OPC_BCLR,BXX>>3,_,_) #define PPC_RETCOND(cond) \ PPC_OP_REG(OP__CR,OPC_BCLR,(cond)>>3,(cond)&0x7,_) #define PPC_BCTRCOND(cond) \ @@ -411,6 +408,8 @@ enum { OPS_STD, OPS_STDU /*,OPS_STQ*/ }; #define PPC_STP_REG PPC_STX_REG #define PPC_BFXP_IMM PPC_BFX_IMM +#define emith_uext_ptr(r) EMIT(PPC_EXTUW_REG(r, r)) + // "long" multiplication, 32x32 bit = 64 bit #define EMIT_PPC_MULLU_REG(dlo, dhi, s1, s2) do { \ EMIT(PPC_EXTUW_REG(s1, s1)); \ @@ -442,6 +441,8 @@ enum { OPS_STD, OPS_STDU /*,OPS_STQ*/ }; #define PPC_STP_REG PPC_STW_REG #define PPC_BFXP_IMM PPC_BFXW_IMM +#define emith_uext_ptr(r) /**/ + // "long" multiplication, 32x32 bit = 64 bit #define EMIT_PPC_MULLU_REG(dlo, dhi, s1, s2) do { \ int at = (dlo == s1 || dlo == s2 ? AT : dlo); \ @@ -467,23 +468,7 @@ enum { OPS_STD, OPS_STDU /*,OPS_STQ*/ }; #endif #define PTR_SIZE (1<> 32); if (imm >> 32) EMIT(PPC_LSL_IMM(r, r, 32)); @@ -883,23 +867,11 @@ static void emith_move_imm(int r, int ptr, uintptr_t imm) EMIT(PPC_ORT_IMM(r, r, (imm & 0xffff0000) >> 16)); } else #endif - { - int s = Z0, d = 0, c = 0; - if ((u16)imm) { - EMIT(PPC_ADD_IMM(r, s, (u16)imm)); - s = r, d = 1, c = (s16)imm < 0; - } - // adjust for sign extension in ADDI - if (!d) // low part == 0 - EMIT(PPC_ADDT_IMM(r, s, (u16)(imm>>16))); - else if (c && (u16)(~imm>>16)) // low part < 0 - EMIT(PPC_XORT_IMM(r, s, (u16)(~imm>>16))); - else if (!c && (u16)(imm>>16)) // low part > 0 - EMIT(PPC_ORT_IMM(r, s, (u16)(imm>>16))); - // make sure to clear upper half if this is a ptr - if (ptr && !(imm >> 32) && c) - EMIT(PPC_EXTUW_REG(r, r)); - } + if ((s16)imm != (s32)imm) { + EMIT(PPC_ADDT_IMM(r, Z0, (u16)(imm>>16))); + if ((s16)imm) + EMIT(PPC_OR_IMM(r, r, (u16)(imm))); + } else EMIT(PPC_ADD_IMM(r, Z0, (u16)imm)); } #define emith_move_r_ptr_imm(r, imm) \ @@ -1176,9 +1148,6 @@ static void emith_add_imm(int rt, int ra, u32 imm) } \ } while (0) -#define emith_uext_ptr(r) \ - EMIT(PPC_EXTUW_REG(r, r)) - // multiply Rd = Rn*Rm (+ Ra) #define emith_mul(d, s1, s2) \ @@ -1248,13 +1217,17 @@ static void emith_add_imm(int rt, int ra, u32 imm) #define emith_read8s_r_r_r_c(cond, r, ra, rm) \ emith_read8s_r_r_r(r, ra, rm) -#define emith_read16s_r_r_offs(r, ra, offs) \ - EMIT(PPC_LDSH_IMM(r, ra, offs)) +#define emith_read16s_r_r_offs(r, ra, offs) do { \ + EMIT(PPC_LDH_IMM(r, ra, offs)); \ + EMIT(PPC_EXTSH_REG(r, r)); \ +} while (0) #define emith_read16s_r_r_offs_c(cond, r, ra, offs) \ emith_read16s_r_r_offs(r, ra, offs) -#define emith_read16s_r_r_r(r, ra, rm) \ - EMIT(PPC_LDSH_REG(r, ra, rm)) +#define emith_read16s_r_r_r(r, ra, rm) do { \ + EMIT(PPC_LDH_REG(r, ra, rm)); \ + EMIT(PPC_EXTSH_REG(r, r)); \ +} while (0) #define emith_read16s_r_r_r_c(cond, r, ra, rm) \ emith_read16s_r_r_r(r, ra, rm) @@ -1346,16 +1319,16 @@ static int emith_cmpr_check(int rs, int rt, int cond, u32 *op) // condition check for comparing 2 registers switch (cond) { - case DCOND_EQ: *op = PPC_CMPW_REG(rs, rt); b = PPC_EQ; break; - case DCOND_NE: *op = PPC_CMPW_REG(rs, rt); b = PPC_NE; break; - case DCOND_LO: *op = PPC_CMPLW_REG(rs, rt); b = PPC_LT; break; - case DCOND_HS: *op = PPC_CMPLW_REG(rs, rt); b = PPC_GE; break; - case DCOND_LS: *op = PPC_CMPLW_REG(rs, rt); b = PPC_LE; break; - case DCOND_HI: *op = PPC_CMPLW_REG(rs, rt); b = PPC_GT; break; - case DCOND_LT: *op = PPC_CMPW_REG(rs, rt); b = PPC_LT; break; - case DCOND_GE: *op = PPC_CMPW_REG(rs, rt); b = PPC_GE; break; - case DCOND_LE: *op = PPC_CMPW_REG(rs, rt); b = PPC_LE; break; - case DCOND_GT: *op = PPC_CMPW_REG(rs, rt); b = PPC_GT; break; + case DCOND_EQ: *op = PPC_CMPW_REG(rs, rt); b = BEQ; break; + case DCOND_NE: *op = PPC_CMPW_REG(rs, rt); b = BNE; break; + case DCOND_LO: *op = PPC_CMPLW_REG(rs, rt); b = BLT; break; + case DCOND_HS: *op = PPC_CMPLW_REG(rs, rt); b = BGE; break; + case DCOND_LS: *op = PPC_CMPLW_REG(rs, rt); b = BLE; break; + case DCOND_HI: *op = PPC_CMPLW_REG(rs, rt); b = BGT; break; + case DCOND_LT: *op = PPC_CMPW_REG(rs, rt); b = BLT; break; + case DCOND_GE: *op = PPC_CMPW_REG(rs, rt); b = BGE; break; + case DCOND_LE: *op = PPC_CMPW_REG(rs, rt); b = BLE; break; + case DCOND_GT: *op = PPC_CMPW_REG(rs, rt); b = BGT; break; } return b; @@ -1367,16 +1340,16 @@ static int emith_cmpi_check(int rs, s32 imm, int cond, u32 *op) // condition check for comparing register with immediate switch (cond) { - case DCOND_EQ: *op = PPC_CMPW_IMM(rs, (u16)imm), b = PPC_EQ; break; - case DCOND_NE: *op = PPC_CMPW_IMM(rs, (u16)imm), b = PPC_NE; break; - case DCOND_LO: *op = PPC_CMPLW_IMM(rs, (u16)imm), b = PPC_LT; break; - case DCOND_HS: *op = PPC_CMPLW_IMM(rs, (u16)imm), b = PPC_GE; break; - case DCOND_LS: *op = PPC_CMPLW_IMM(rs, (u16)imm), b = PPC_LE; break; - case DCOND_HI: *op = PPC_CMPLW_IMM(rs, (u16)imm), b = PPC_GT; break; - case DCOND_LT: *op = PPC_CMPW_IMM(rs, (u16)imm), b = PPC_LT; break; - case DCOND_GE: *op = PPC_CMPW_IMM(rs, (u16)imm), b = PPC_GE; break; - case DCOND_LE: *op = PPC_CMPW_IMM(rs, (u16)imm), b = PPC_LE; break; - case DCOND_GT: *op = PPC_CMPW_IMM(rs, (u16)imm), b = PPC_GT; break; + case DCOND_EQ: *op = PPC_CMPW_IMM(rs, (u16)imm), b = BEQ; break; + case DCOND_NE: *op = PPC_CMPW_IMM(rs, (u16)imm), b = BNE; break; + case DCOND_LO: *op = PPC_CMPLW_IMM(rs, (u16)imm), b = BLT; break; + case DCOND_HS: *op = PPC_CMPLW_IMM(rs, (u16)imm), b = BGE; break; + case DCOND_LS: *op = PPC_CMPLW_IMM(rs, (u16)imm), b = BLE; break; + case DCOND_HI: *op = PPC_CMPLW_IMM(rs, (u16)imm), b = BGT; break; + case DCOND_LT: *op = PPC_CMPW_IMM(rs, (u16)imm), b = BLT; break; + case DCOND_GE: *op = PPC_CMPW_IMM(rs, (u16)imm), b = BGE; break; + case DCOND_LE: *op = PPC_CMPW_IMM(rs, (u16)imm), b = BLE; break; + case DCOND_GT: *op = PPC_CMPW_IMM(rs, (u16)imm), b = BGT; break; } return b; @@ -1396,29 +1369,29 @@ static int emith_cond_check(int cond) // shortcut for V known to be 0 if (b < 0 && emith_flg_noV) switch (cond) { case DCOND_VS: /* no branch */ break; // never - case DCOND_VC: b = PPC_AL; break; // always - case DCOND_LT: op = PPC_CMPW_IMM(FNZ, 0); b = PPC_LT; break; // N - case DCOND_GE: op = PPC_CMPW_IMM(FNZ, 0); b = PPC_GE; break; // !N - case DCOND_LE: op = PPC_CMPW_IMM(FNZ, 0); b = PPC_LE; break; // N || Z - case DCOND_GT: op = PPC_CMPW_IMM(FNZ, 0); b = PPC_GT; break; // !N && !Z + case DCOND_VC: b = BXX; break; // always + case DCOND_LT: op = PPC_CMPW_IMM(FNZ, 0); b = BLT; break; // N + case DCOND_GE: op = PPC_CMPW_IMM(FNZ, 0); b = BGE; break; // !N + case DCOND_LE: op = PPC_CMPW_IMM(FNZ, 0); b = BLE; break; // N || Z + case DCOND_GT: op = PPC_CMPW_IMM(FNZ, 0); b = BGT; break; // !N && !Z } // the full monty if no shortcut if (b < 0) switch (cond) { // conditions using NZ - case DCOND_EQ: op = PPC_CMPW_IMM(FNZ, 0); b = PPC_EQ; break; // Z - case DCOND_NE: op = PPC_CMPW_IMM(FNZ, 0); b = PPC_NE; break; // !Z - case DCOND_MI: op = PPC_CMPW_IMM(FNZ, 0); b = PPC_LT; break; // N - case DCOND_PL: op = PPC_CMPW_IMM(FNZ, 0); b = PPC_GE; break; // !N + case DCOND_EQ: op = PPC_CMPW_IMM(FNZ, 0); b = BEQ; break; // Z + case DCOND_NE: op = PPC_CMPW_IMM(FNZ, 0); b = BNE; break; // !Z + case DCOND_MI: op = PPC_CMPW_IMM(FNZ, 0); b = BLT; break; // N + case DCOND_PL: op = PPC_CMPW_IMM(FNZ, 0); b = BGE; break; // !N // conditions using C - case DCOND_LO: op = PPC_CMPW_IMM(FC , 0); b = PPC_NE; break; // C - case DCOND_HS: op = PPC_CMPW_IMM(FC , 0); b = PPC_EQ; break; // !C + case DCOND_LO: op = PPC_CMPW_IMM(FC , 0); b = BNE; break; // C + case DCOND_HS: op = PPC_CMPW_IMM(FC , 0); b = BEQ; break; // !C // conditions using CZ case DCOND_LS: // C || Z case DCOND_HI: // !C && !Z EMIT(PPC_ADD_IMM(AT, FC, -1)); // !C && !Z EMIT(PPC_AND_REG(AT, FNZ, AT)); - op = PPC_CMPW_IMM(AT , 0); b = (cond == DCOND_HI ? PPC_NE : PPC_EQ); + op = PPC_CMPW_IMM(AT , 0); b = (cond == DCOND_HI ? BNE : BEQ); break; // conditions using V @@ -1427,14 +1400,14 @@ static int emith_cond_check(int cond) EMIT(PPC_XOR_REG(AT, FV, FNZ)); // V = Nt^Ns^Nd^C EMIT(PPC_LSRW_IMM(AT, AT, 31)); EMIT(PPC_XOR_REG(AT, AT, FC)); - op = PPC_CMPW_IMM(AT , 0); b = (cond == DCOND_VS ? PPC_NE : PPC_EQ); + op = PPC_CMPW_IMM(AT , 0); b = (cond == DCOND_VS ? BNE : BEQ); break; // conditions using VNZ case DCOND_LT: // N^V case DCOND_GE: // !(N^V) EMIT(PPC_LSRW_IMM(AT, FV, 31)); // Nd^V = Nt^Ns^C EMIT(PPC_XOR_REG(AT, FC, AT)); - op = PPC_CMPW_IMM(AT , 0); b = (cond == DCOND_LT ? PPC_NE : PPC_EQ); + op = PPC_CMPW_IMM(AT , 0); b = (cond == DCOND_LT ? BNE : BEQ); break; case DCOND_LE: // (N^V) || Z case DCOND_GT: // !(N^V) && !Z @@ -1442,7 +1415,7 @@ static int emith_cond_check(int cond) EMIT(PPC_XOR_REG(AT, FC, AT)); EMIT(PPC_ADD_IMM(AT, AT, -1)); // !(Nd^V) && !Z EMIT(PPC_AND_REG(AT, FNZ, AT)); - op = PPC_CMPW_IMM(AT , 0); b = (cond == DCOND_GT ? PPC_NE : PPC_EQ); + op = PPC_CMPW_IMM(AT , 0); b = (cond == DCOND_GT ? BNE : BEQ); break; } @@ -1461,7 +1434,7 @@ static int emith_cond_check(int cond) #define emith_jump_cond(cond, target) do { \ int mcond_ = emith_cond_check(cond); \ u32 disp_ = (u8 *)target - (u8 *)tcache_ptr; \ - EMIT(PPC_BCOND(mcond_,disp_ & 0x0000ffff)); \ + if (mcond_ >= 0) EMIT(PPC_BCOND(mcond_,disp_ & 0x0000ffff)); \ } while (0) #define emith_jump_cond_patchable(cond, target) \ emith_jump_cond(cond, target) @@ -1495,7 +1468,7 @@ static int emith_cond_check(int cond) #define emith_jump_reg(r) do { \ EMIT(PPC_MTSP_REG(r, CTR)); \ - EMIT(PPC_BCTRCOND(PPC_AL)); \ + EMIT(PPC_BCTRCOND(BXX)); \ } while(0) #define emith_jump_reg_c(cond, r) \ emith_jump_reg(r) @@ -1516,7 +1489,7 @@ static int emith_cond_check(int cond) #define emith_call_reg(r) do { \ EMIT(PPC_MTSP_REG(r, CTR)); \ - EMIT(PPC_BLCTRCOND(PPC_AL)); \ + EMIT(PPC_BLCTRCOND(BXX)); \ } while(0) #define emith_call_ctx(offs) do { \ @@ -1564,13 +1537,13 @@ static int emith_cond_check(int cond) #define emith_pool_check() /**/ #define emith_pool_commit(j) /**/ #define emith_insn_ptr() ((u8 *)tcache_ptr) -#define emith_flush() /**/ +#define emith_flush() /**/ #define host_instructions_updated(base, end) __builtin___clear_cache(base, end) -#define emith_update_cache() /**/ +#define emith_update_cache() /**/ #define emith_rw_offs_max() 0x7fff // SH2 drc specific -#define STACK_EXTRA (64+48) // Param, ABI (LR,CR,FP etc) save areas +#define STACK_EXTRA ((8+6)*PTR_SIZE) // Param, ABI (LR,CR,FP etc) save areas #define emith_sh2_drc_entry() do { \ int _c, _z = PTR_SIZE; u32 _m = 0xffffc000; /* r14-r30 */ \ if (__builtin_parity(_m) == 1) _m |= 0x1; /* ABI align for SP is 16 */ \ @@ -1579,7 +1552,7 @@ static int emith_cond_check(int cond) if (_m & (1 << _c)) \ { _o -= _z; if (_c) emith_write_r_r_offs_ptr(_c, SP, _o); } \ EMIT(PPC_MFSP_REG(10, LR)); \ - emith_write_r_r_offs_ptr(10, SP, 16); \ + emith_write_r_r_offs_ptr(10, SP, 2*PTR_SIZE); \ emith_write_r_r_offs_ptr(SP, SP, -_s-STACK_EXTRA); /* XXX stdu */ \ emith_add_r_r_ptr_imm(SP, SP, -_s-STACK_EXTRA); \ } while (0) @@ -1591,7 +1564,7 @@ static int emith_cond_check(int cond) if (_m & (1 << _c)) \ { if (_c) emith_read_r_r_offs_ptr(_c, SP, _o); _o += _z; } \ emith_add_r_r_ptr_imm(SP, SP, _s+STACK_EXTRA); \ - emith_read_r_r_offs_ptr(10, SP, 16); \ + emith_read_r_r_offs_ptr(10, SP, 2*PTR_SIZE); \ EMIT(PPC_MTSP_REG(10, LR)); \ emith_ret(); \ } while (0) @@ -1672,11 +1645,13 @@ static int emith_cond_check(int cond) emith_tst_r_imm(sr, Q); /* if (Q ^ M) */ \ EMITH_JMP3_START(DCOND_EQ); \ emith_add_r_r_r(rn, t_, rm); \ - EMIT_PPC_SLTWU_REG(FC, rn, t_); \ + EMIT(PPC_CMPLW_REG(rn, t_)); \ EMITH_JMP3_MID(DCOND_EQ); \ emith_sub_r_r_r(rn, t_, rm); \ - EMIT_PPC_SLTWU_REG(FC, t_, rn); \ + EMIT(PPC_CMPLW_REG(t_, rn)); \ EMITH_JMP3_END(); \ + EMIT(PPC_MFCR_REG(FC)); \ + EMIT(PPC_BFXW_IMM(FC, FC, 0, 1)); \ emith_eor_r_r(sr, FC); /* T ^= carry */ \ rcache_free_tmp(t_); \ } while (0) @@ -1737,7 +1712,7 @@ static int emith_cond_check(int cond) EMIT(PPC_BFIW_IMM(sr, srcr, 22, 10)) #define emith_carry_to_t(sr, is_sub) \ - EMIT(PPC_BFIW_IMM(sr, FC, 31, 1)) + EMIT(PPC_BFIW_IMM(sr, FC, 32-__builtin_ffs(T), 1)) #define emith_t_to_carry(sr, is_sub) \ emith_and_r_r_imm(FC, sr, 1) @@ -1755,26 +1730,27 @@ static int emith_cond_check(int cond) ((cond) ^ 1) // T bit handling -static void emith_clr_t_cond(int sr) -{ - emith_bic_r_imm(sr, T); -} - static void emith_set_t_cond(int sr, int cond) { int b; - u8 *ptr; - u32 val = 0; - // XXX optimization - b = emith_invert_branch(emith_cond_check(cond)); - ptr = tcache_ptr; - EMIT(PPC_BCOND(b, 0)); - emith_or_r_imm(sr, T); - val = (u8 *)tcache_ptr - (u8 *)(ptr); - EMIT_PTR(ptr, PPC_BCOND(b, val & 0x00001fff)); + // catch never and always cases + if ((b = emith_cond_check(cond)) < 0) + return; + else if (b == BXX) { + emith_or_r_imm(sr, T); + return; + } + + // extract bit from CR and insert into T + EMIT(PPC_MFCR_REG(AT)); + EMIT(PPC_BFXW_IMM(AT, AT, (b&7), 1)); + if (!(b & 0x40)) EMIT(PPC_XOR_IMM(AT, AT, 1)); + EMIT(PPC_BFIW_IMM(sr, AT, 32-__builtin_ffs(T), 1)); } +#define emith_clr_t_cond(sr) ((void)sr) + #define emith_get_t_cond() -1 #define emith_sync_t(sr) ((void)sr) diff --git a/cpu/drc/emit_riscv.c b/cpu/drc/emit_riscv.c index 69ed530e..954d14a7 100644 --- a/cpu/drc/emit_riscv.c +++ b/cpu/drc/emit_riscv.c @@ -1400,6 +1400,7 @@ static int emith_cond_check(int cond, int *r, int *s) #define host_instructions_updated(base, end) __builtin___clear_cache(base, end) #define emith_update_cache() /**/ #define emith_rw_offs_max() 0x7ff +#define emith_uext_ptr(r) /**/ // SH2 drc specific #define emith_sh2_drc_entry() do { \ diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index 80ec0444..c836e159 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -622,6 +622,8 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common emith_asr(d, d, 32 - (bits)); \ } while (0) +#define emith_uext_ptr(r) /**/ + #define emith_setc(r) do { \ assert(is_abcdx(r)); \ EMIT_REX_IF(0, 0, r); \ diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index bfd9ec06..d1a971c6 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -189,7 +189,7 @@ static char sh2dasm_buff[64]; (sh2)->r[8], (sh2)->r[9], (sh2)->r[10], (sh2)->r[11], \ (sh2)->r[12], (sh2)->r[13], (sh2)->r[14], (sh2)->r[15]); \ printf("%csh2 pc-ml %08x %08x %08x %08x %08x %08x %08x %08x\n", ms, \ - (sh2)->pc, (sh2)->ppc, (sh2)->pr, (sh2)->sr&0x3ff, \ + (sh2)->pc, (sh2)->ppc, (sh2)->pr, (sh2)->sr&0xfff, \ (sh2)->gbr, (sh2)->vbr, (sh2)->mach, (sh2)->macl); \ printf("%csh2 tmp-p %08x %08x %08x %08x %08x %08x %08x %08x\n", ms, \ (sh2)->drc_tmp, (sh2)->irq_cycles, \ @@ -246,6 +246,10 @@ static void REGPARM(3) *sh2_drc_log_entry(void *block, SH2 *sh2, u32 sr) SH2_DUMP(&fsh2, "file"); SH2_DUMP(sh2, "current"); SH2_DUMP(&csh2[idx][0], "previous"); + char *ps = (char *)sh2, *pf = (char *)&fsh2; + for (idx = 0; idx < offsetof(SH2, read8_map); idx += sizeof(u32)) + if (*(u32 *)(ps+idx) != *(u32 *)(pf+idx)) + printf("diff reg %ld\n",idx/sizeof(u32)); exit(1); } csh2[idx][0] = fsh2; @@ -455,6 +459,8 @@ static void rcache_free_tmp(int hr); #include "../drc/emit_mips.c" #elif defined(__riscv__) || defined(__riscv) #include "../drc/emit_riscv.c" +#elif defined(__powerpc__) +#include "../drc/emit_ppc.c" #elif defined(__i386__) #include "../drc/emit_x86.c" #elif defined(__x86_64__) @@ -2572,6 +2578,7 @@ static int emit_get_rbase_and_offs(SH2 *sh2, sh2_reg_e r, int rmode, u32 *offs) u32 odd = a & 1; // need to fix odd address for correct byte addressing la -= (s32)((a & ~mask) - *offs - odd); // diff between reg and memory hr = hr2 = rcache_get_reg(r, rmode, NULL); + if ((s32)a < 0) emith_uext_ptr(hr2); if ((la & ~omask) - odd) { hr = rcache_get_tmp(); emith_add_r_r_ptr_imm(hr, hr2, (la & ~omask) - odd); diff --git a/cpu/sh2/compiler.h b/cpu/sh2/compiler.h index 00a8707b..b1b7487f 100644 --- a/cpu/sh2/compiler.h +++ b/cpu/sh2/compiler.h @@ -48,6 +48,9 @@ unsigned short scan_block(uint32_t base_pc, int is_slave, #elif defined(__riscv__) || defined(__riscv) #define DRC_SR_REG "s11" #define DRC_REG_LL 0 // no ABI for (__ILP32__ && __riscv_xlen != 32) +#elif defined(__powerpc__) +#define DRC_SR_REG "r30" +#define DRC_REG_LL 0 // no ABI for __ILP32__ #elif defined(__i386__) #define DRC_SR_REG "edi" #define DRC_REG_LL 0 // 32 bit diff --git a/platform/linux/emu.c b/platform/linux/emu.c index 5e4dd72a..005f82a3 100644 --- a/platform/linux/emu.c +++ b/platform/linux/emu.c @@ -29,7 +29,7 @@ void pemu_prep_defconfig(void) void pemu_validate_config(void) { -#if !defined(__arm__) && !defined(__aarch64__) && !defined(__mips__) && !defined(__riscv__) && !defined(__riscv) && !defined(__i386__) && !defined(__x86_64__) +#if !defined(__arm__) && !defined(__aarch64__) && !defined(__mips__) && !defined(__riscv__) && !defined(__riscv) && !defined(__powerpc__) && !defined(__i386__) && !defined(__x86_64__) PicoIn.opt &= ~POPT_EN_DRC; #endif } From bb83412c51929e6bf83bc1aceec2a2d0ed9fb037 Mon Sep 17 00:00:00 2001 From: kub Date: Sun, 21 Jun 2020 22:32:37 +0200 Subject: [PATCH 0323/1110] vdp fifo, DMA bugfix --- pico/videoport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pico/videoport.c b/pico/videoport.c index f9fd6ece..f324f704 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -164,7 +164,7 @@ static int PicoVideoFIFODrain(int level, int cycles, int bgdma) //int osl = fifo_slot; // process FIFO entries until low level is reached - while (vf->fifo_slot < vf->fifo_maxslot && cycles < 488 && + while (vf->fifo_slot <= vf->fifo_maxslot && cycles < 488 && ((vf->fifo_total > level) | (vf->fifo_queue[vf->fifo_qx] & bgdma))) { int b = vf->fifo_queue[vf->fifo_qx] & FQ_BYTE; int cnt = bgdma ? pv->fifo_cnt : ((vf->fifo_total-level)<fifo_cnt&b); From c815b1bc594e759d51cd8a71266f61efe303729e Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 23 Jun 2020 23:34:07 +0200 Subject: [PATCH 0324/1110] sh2 drc, backend 32/64 bit compatibility fixes for Mips/RiscV --- cpu/drc/emit_mips.c | 27 +++++++++++++++------------ cpu/drc/emit_ppc.c | 2 +- cpu/drc/emit_riscv.c | 33 ++++++++++++++++++--------------- 3 files changed, 34 insertions(+), 28 deletions(-) diff --git a/cpu/drc/emit_mips.c b/cpu/drc/emit_mips.c index 8eddd219..1c98ac04 100644 --- a/cpu/drc/emit_mips.c +++ b/cpu/drc/emit_mips.c @@ -286,6 +286,7 @@ enum { RB_SRL=0, RB_ROTR=1 }; #define FN_PSUBU FN_SUBU #define PTR_SCALE 2 #endif +#define PTR_SIZE (1< 0) emith_write_r_r_offs(r, SP, 0+16); \ + int offs_ = 8+16 - 2*PTR_SIZE; \ + emith_add_r_r_ptr_imm(SP, SP, -8-16); \ + emith_write_r_r_offs_ptr(LR, SP, offs_ + PTR_SIZE); \ + if ((r) > 0) emith_write_r_r_offs(r, SP, offs_); \ } while (0) #define emith_pop_and_ret(r) do { \ - if ((r) > 0) emith_read_r_r_offs(r, SP, 0+16); \ - emith_read_r_r_offs(LR, SP, 4+16); \ + int offs_ = 8+16 - 2*PTR_SIZE; \ + if ((r) > 0) emith_read_r_r_offs(r, SP, offs_); \ + emith_read_r_r_offs_ptr(LR, SP, offs_ + PTR_SIZE); \ emith_add_r_r_ptr_imm(SP, SP, 8+16); \ emith_ret(); \ } while (0) @@ -1567,21 +1570,21 @@ static int emith_cond_check(int cond, int *r) // SH2 drc specific #define emith_sh2_drc_entry() do { \ - int _c; u32 _m = 0xd0ff0000; \ + int _c, _z = PTR_SIZE; u32 _m = 0xd0ff0000; \ if (__builtin_parity(_m) == 1) _m |= 0x1; /* ABI align for SP is 8 */ \ - int _s = count_bits(_m) * 4 + 16, _o = _s; /* 16 byte arg save area */ \ + int _s = count_bits(_m) * _z + 16, _o = _s; /* 16 O32 arg save area */ \ if (_s) emith_add_r_r_ptr_imm(SP, SP, -_s); \ for (_c = HOST_REGS-1; _m && _c >= 0; _m &= ~(1 << _c), _c--) \ if (_m & (1 << _c)) \ - { _o -= 4; if (_c) emith_write_r_r_offs(_c, SP, _o); } \ + { _o -= _z; if (_c) emith_write_r_r_offs_ptr(_c, SP, _o); } \ } while (0) #define emith_sh2_drc_exit() do { \ - int _c; u32 _m = 0xd0ff0000; \ + int _c, _z = PTR_SIZE; u32 _m = 0xd0ff0000; \ if (__builtin_parity(_m) == 1) _m |= 0x1; \ - int _s = count_bits(_m) * 4 + 16, _o = 16; \ + int _s = count_bits(_m) * _z + 16, _o = 16; \ for (_c = 0; _m && _c < HOST_REGS; _m &= ~(1 << _c), _c++) \ if (_m & (1 << _c)) \ - { if (_c) emith_read_r_r_offs(_c, SP, _o); _o += 4; } \ + { if (_c) emith_read_r_r_offs_ptr(_c, SP, _o); _o += _z; } \ if (_s) emith_add_r_r_ptr_imm(SP, SP, _s); \ emith_ret(); \ } while (0) diff --git a/cpu/drc/emit_ppc.c b/cpu/drc/emit_ppc.c index 286d4166..54050bad 100644 --- a/cpu/drc/emit_ppc.c +++ b/cpu/drc/emit_ppc.c @@ -1545,7 +1545,7 @@ static int emith_cond_check(int cond) // SH2 drc specific #define STACK_EXTRA ((8+6)*PTR_SIZE) // Param, ABI (LR,CR,FP etc) save areas #define emith_sh2_drc_entry() do { \ - int _c, _z = PTR_SIZE; u32 _m = 0xffffc000; /* r14-r30 */ \ + int _c, _z = PTR_SIZE; u32 _m = 0xffffc000; /* r14-r31 */ \ if (__builtin_parity(_m) == 1) _m |= 0x1; /* ABI align for SP is 16 */ \ int _s = count_bits(_m) * _z, _o = 0; \ for (_c = HOST_REGS-1; _m && _c >= 0; _m &= ~(1 << _c), _c--) \ diff --git a/cpu/drc/emit_riscv.c b/cpu/drc/emit_riscv.c index 954d14a7..ab6c4fd0 100644 --- a/cpu/drc/emit_riscv.c +++ b/cpu/drc/emit_riscv.c @@ -244,6 +244,8 @@ enum { F2_ALT=0x20, F2_MULDIV=0x01 }; } while (0) #endif +#define PTR_SIZE (1<= 0; _m &= ~(1 << _c), _c--) \ if (_m & (1 << _c)) \ - { _o -= 4; if (_c) emith_write_r_r_offs(_c, SP, _o); } \ + { _o -= _z; if (_c) emith_write_r_r_offs_ptr(_c, SP, _o); } \ } while (0) #define emith_restore_caller_regs(mask) do { \ - int _c; u32 _m = mask & 0x3fce0; \ + int _c, _z = PTR_SIZE; u32 _m = mask & 0x3fce0; \ _c = count_bits(_m)&3; _m |= (1<<((4-_c)&3))-1; /* ABI align */ \ - int _s = count_bits(_m) * 4, _o = 0; \ + int _s = count_bits(_m) * _z, _o = 0; \ for (_c = 0; _m && _c < HOST_REGS; _m &= ~(1 << _c), _c++) \ if (_m & (1 << _c)) \ - { if (_c) emith_read_r_r_offs(_c, SP, _o); _o += 4; } \ + { if (_c) emith_read_r_r_offs_ptr(_c, SP, _o); _o += _z; } \ if (_s) emith_add_r_r_ptr_imm(SP, SP, _s); \ } while (0) @@ -1312,6 +1314,7 @@ static int emith_cond_check(int cond, int *r, int *s) // NB: returns position of patch for cache maintenance #define emith_jump_patch(ptr, target, pos) do { \ u32 *ptr_ = (u32 *)ptr; /* must skip condition check code */ \ + while ((*ptr_&0x77) != OP_BCOND && (*ptr_&0x77) != OP_LUI) ptr_ ++; \ if ((*ptr_&0x77) == OP_BCOND) { \ u32 *p_ = ptr_, disp_ = (u8 *)target - (u8 *)ptr_; \ u32 f1_ = _CB(*ptr_,3,12,0); \ @@ -1382,13 +1385,13 @@ static int emith_cond_check(int cond, int *r, int *s) #define emith_push_ret(r) do { \ emith_add_r_r_ptr_imm(SP, SP, -16); /* ABI requires 16 byte aligment */\ - emith_write_r_r_offs(LR, SP, 4); \ + emith_write_r_r_offs_ptr(LR, SP, 8); \ if ((r) > 0) emith_write_r_r_offs(r, SP, 0); \ } while (0) #define emith_pop_and_ret(r) do { \ if ((r) > 0) emith_read_r_r_offs(r, SP, 0); \ - emith_read_r_r_offs(LR, SP, 4); \ + emith_read_r_r_offs_ptr(LR, SP, 8); \ emith_add_r_r_ptr_imm(SP, SP, 16); \ emith_ret(); \ } while (0) @@ -1404,21 +1407,21 @@ static int emith_cond_check(int cond, int *r, int *s) // SH2 drc specific #define emith_sh2_drc_entry() do { \ - int _c; u32 _m = 0x0ffc0202; /* x1,x9,x18-x27 */ \ + int _c, _z = PTR_SIZE; u32 _m = 0x0ffc0202; /* x1,x9,x18-x27 */ \ _c = count_bits(_m)&3; _m |= (1<<((4-_c)&3))-1; /* ABI align */ \ - int _s = count_bits(_m) * 4, _o = _s; \ + int _s = count_bits(_m) * _z, _o = _s; \ if (_s) emith_add_r_r_ptr_imm(SP, SP, -_s); \ for (_c = HOST_REGS-1; _m && _c >= 0; _m &= ~(1 << _c), _c--) \ if (_m & (1 << _c)) \ - { _o -= 4; if (_c) emith_write_r_r_offs(_c, SP, _o); } \ + { _o -= _z; if (_c) emith_write_r_r_offs_ptr(_c, SP, _o); } \ } while (0) #define emith_sh2_drc_exit() do { \ - int _c; u32 _m = 0x0ffc0202; \ + int _c, _z = PTR_SIZE; u32 _m = 0x0ffc0202; \ _c = count_bits(_m)&3; _m |= (1<<((4-_c)&3))-1; /* ABI align */ \ - int _s = count_bits(_m) * 4, _o = 0; \ + int _s = count_bits(_m) * _z, _o = 0; \ for (_c = 0; _m && _c < HOST_REGS; _m &= ~(1 << _c), _c++) \ if (_m & (1 << _c)) \ - { if (_c) emith_read_r_r_offs(_c, SP, _o); _o += 4; } \ + { if (_c) emith_read_r_r_offs_ptr(_c, SP, _o); _o += _z; } \ if (_s) emith_add_r_r_ptr_imm(SP, SP, _s); \ emith_ret(); \ } while (0) @@ -1428,7 +1431,7 @@ static int emith_cond_check(int cond, int *r, int *s) emith_lsr(mask, a, SH2_READ_SHIFT); \ emith_add_r_r_r_lsl_ptr(tab, tab, mask, PTR_SCALE+1); \ emith_read_r_r_offs_ptr(func, tab, 0); \ - emith_read_r_r_offs(mask, tab, 1 << PTR_SCALE); \ + emith_read_r_r_offs(mask, tab, PTR_SIZE); \ emith_addf_r_r_r_ptr(func, func, func); \ } while (0) From 7a7265eea0ec266e4ca77226587d566a43bfbdb6 Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 23 Jun 2020 23:36:38 +0200 Subject: [PATCH 0325/1110] SDL UI, fix for CD LED display --- platform/common/plat_sdl.c | 2 +- platform/linux/emu.c | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/platform/common/plat_sdl.c b/platform/common/plat_sdl.c index bce4b084..bb1ce612 100644 --- a/platform/common/plat_sdl.c +++ b/platform/common/plat_sdl.c @@ -270,7 +270,7 @@ void plat_init(void) if (shadow_size < 320 * 480 * 2) shadow_size = 320 * 480 * 2; - shadow_fb = malloc(shadow_size); + shadow_fb = calloc(1, shadow_size); g_menubg_ptr = calloc(1, shadow_size); if (shadow_fb == NULL || g_menubg_ptr == NULL) { fprintf(stderr, "OOM\n"); diff --git a/platform/linux/emu.c b/platform/linux/emu.c index 005f82a3..597c1308 100644 --- a/platform/linux/emu.c +++ b/platform/linux/emu.c @@ -39,10 +39,11 @@ static void draw_cd_leds(void) int led_reg, pitch, scr_offs, led_offs; led_reg = Pico_mcd->s68k_regs[0]; - pitch = 320; + pitch = g_screen_ppitch; led_offs = 4; scr_offs = pitch * 2 + 4; +#if 0 if (currentConfig.renderer != RT_16BIT) { #define p(x) px[(x) >> 2] // 8-bit modes @@ -52,7 +53,9 @@ static void draw_cd_leds(void) p(pitch*0) = p(pitch*1) = p(pitch*2) = col_g; p(pitch*0 + led_offs) = p(pitch*1 + led_offs) = p(pitch*2 + led_offs) = col_r; #undef p - } else { + } else +#endif + { #define p(x) px[(x)*2 >> 2] = px[((x)*2 >> 2) + 1] // 16-bit modes unsigned int *px = (unsigned int *)((short *)g_screen_ptr + scr_offs); From dd67441606e0c131530dd43348dd37a47fff6bcb Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 23 Jun 2020 23:43:53 +0200 Subject: [PATCH 0326/1110] sh2 drc, optimisation for SH2 16x16 multiplication --- cpu/sh2/compiler.c | 117 +++++++++++++++++++++++++++++++-------------- 1 file changed, 82 insertions(+), 35 deletions(-) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index d1a971c6..cba97e2b 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -1,7 +1,7 @@ /* * SH2 recompiler * (C) notaz, 2009,2010,2013 - * (C) kub, 2018,2019 + * (C) kub, 2018,2019,2020 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. @@ -398,12 +398,13 @@ int rchit, rcmiss; enum cache_reg_htype { HRT_TEMP = 1, // is for temps and args HRT_REG = 2, // is for sh2 regs - HRT_STATIC = 2, // is for static mappings (same as HRT_REG) }; enum cache_reg_flags { HRF_DIRTY = 1 << 0, // has "dirty" value to be written to ctx HRF_PINNED = 1 << 1, // has a pinned mapping + HRF_S16 = 1 << 2, // has a sign extended 16 bit value + HRF_U16 = 1 << 3, // has a zero extended 16 bit value }; enum cache_reg_type { @@ -413,9 +414,9 @@ enum cache_reg_type { }; typedef struct { - u8 hreg; // "host" reg + u8 hreg:6; // "host" reg u8 htype:2; // TEMP or REG? - u8 flags:2; // DIRTY, PINNED? + u8 flags:4; // DIRTY, PINNED? u8 type:2; // CACHED or TEMP? u8 locked:2; // LOCKED reference counter u16 stamp; // kind of a timestamp @@ -1334,6 +1335,37 @@ static void rcache_remove_vreg_alias(int x, sh2_reg_e r); static void rcache_evict_vreg(int x); static void rcache_remap_vreg(int x); +static void rcache_set_x16(int hr, int s16_, int u16_) +{ + int x = reg_map_host[hr]; + if (x >= 0) { + cache_regs[x].flags &= ~(HRF_S16|HRF_U16); + if (s16_) cache_regs[x].flags |= HRF_S16; + if (u16_) cache_regs[x].flags |= HRF_U16; + } +} + +static void rcache_copy_x16(int hr, int hr2) +{ + int x = reg_map_host[hr], y = reg_map_host[hr2]; + if (x >= 0 && y >= 0) { + cache_regs[x].flags = (cache_regs[x].flags & ~(HRF_S16|HRF_U16)) | + (cache_regs[y].flags & (HRF_S16|HRF_U16)); + } +} + +static int rcache_is_s16(int hr) +{ + int x = reg_map_host[hr]; + return (x >= 0 ? cache_regs[x].flags & HRF_S16 : 0); +} + +static int rcache_is_u16(int hr) +{ + int x = reg_map_host[hr]; + return (x >= 0 ? cache_regs[x].flags & HRF_U16 : 0); +} + #define RCACHE_DUMP(msg) { \ cache_reg_t *cp; \ guest_reg_t *gp; \ @@ -1467,10 +1499,13 @@ static int gconst_check(sh2_reg_e r) static int gconst_try_read(int vreg, sh2_reg_e r) { int i, x; + u32 v; if (guest_regs[r].flags & GRF_CDIRTY) { x = guest_regs[r].cnst; - emith_move_r_imm(cache_regs[vreg].hreg, gconsts[x].val); + v = gconsts[x].val; + emith_move_r_imm(cache_regs[vreg].hreg, v); + rcache_set_x16(cache_regs[vreg].hreg, v == (s16)v, v == (u16)v); FOR_ALL_BITS_SET_DO(gconsts[x].gregs, i, { if (guest_regs[i].vreg >= 0 && guest_regs[i].vreg != vreg) @@ -1641,6 +1676,8 @@ static void rcache_clean_vreg(int x) rcache_evict_vreg(guest_regs[r].sreg); emith_move_r_r(cache_regs[guest_regs[r].sreg].hreg, cache_regs[guest_regs[r].vreg].hreg); + rcache_copy_x16(cache_regs[guest_regs[r].sreg].hreg, + cache_regs[guest_regs[r].vreg].hreg); rcache_remove_vreg_alias(x, r); rcache_add_vreg_alias(guest_regs[r].sreg, r); cache_regs[guest_regs[r].sreg].flags |= HRF_DIRTY; @@ -1783,9 +1820,9 @@ static int rcache_allocate_temp(void) #if REMAP_REGISTER // maps a host register to a REG -static int rcache_map_reg(sh2_reg_e r, int hr, int mode) +static int rcache_map_reg(sh2_reg_e r, int hr) { - int x, i; + int i; gconst_kill(r); @@ -1797,19 +1834,6 @@ static int rcache_map_reg(sh2_reg_e r, int hr, int mode) exit(1); } - // deal with statically mapped regs - if (mode == RC_GR_RMW && (guest_regs[r].flags & (GRF_STATIC|GRF_PINNED))) { - x = guest_regs[r].sreg; - if (guest_regs[r].vreg == x) { - // STATIC in its sreg with no aliases, and some processing pending - if (cache_regs[x].gregs == 1 << r) - return cache_regs[x].hreg; - } else if (cache_regs[x].type == HR_FREE || - (cache_regs[x].type == HR_TEMP && !cache_regs[x].locked)) - // STATIC not in its sreg, with sreg available -> move it - i = guest_regs[r].sreg; - } - // remove old mappings of r and i if one exists if (guest_regs[r].vreg >= 0) rcache_remove_vreg_alias(guest_regs[r].vreg, r); @@ -1818,7 +1842,6 @@ static int rcache_map_reg(sh2_reg_e r, int hr, int mode) // set new mappping cache_regs[i].type = HR_CACHED; cache_regs[i].gregs = 1 << r; - cache_regs[i].flags &= HRF_PINNED; cache_regs[i].locked = 0; cache_regs[i].stamp = ++rcache_counter; cache_regs[i].flags |= HRF_DIRTY; @@ -2010,7 +2033,9 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr tr->flags |= HRF_DIRTY; guest_regs[r].flags |= GRF_DIRTY; gconst_kill(r); - } + rcache_set_x16(tr->hreg, 0, 0); + } else if (src >= 0 && cache_regs[src].hreg != tr->hreg) + rcache_copy_x16(tr->hreg, cache_regs[src].hreg); #if DRC_DEBUG & 64 RCACHE_CHECK("after getreg"); #endif @@ -2410,6 +2435,8 @@ static void rcache_clean(void) else { emith_move_r_r(cache_regs[guest_regs[i].sreg].hreg, cache_regs[guest_regs[i].vreg].hreg); + rcache_copy_x16(cache_regs[guest_regs[i].sreg].hreg, + cache_regs[guest_regs[i].vreg].hreg); rcache_remove_vreg_alias(guest_regs[i].vreg, i); } cache_regs[guest_regs[i].sreg].gregs = 1 << i; @@ -2689,6 +2716,8 @@ static void emit_sync_t_to_sr(void) // rd = @(arg0) static int emit_memhandler_read(int size) { + int hr; + emit_sync_t_to_sr(); rcache_clean_tmp(); #ifndef DRC_SR_REG @@ -2711,7 +2740,9 @@ static int emit_memhandler_read(int size) case 2: emith_call(sh2_drc_read32); break; // 32 } - return rcache_get_tmp_ret(); + hr = rcache_get_tmp_ret(); + rcache_set_x16(hr, (size & MF_SIZEMASK) < 2, 0); + return hr; } // @(arg0) = arg1 @@ -2747,6 +2778,7 @@ static int emit_memhandler_read_rr(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rs, u32 off emit_move_r_imm32(rd, val); hr2 = rcache_get_reg(rd, RC_GR_RMW, NULL); } + rcache_set_x16(hr2, val == (s16)val, val == (u16)val); if (size & MF_POSTINCR) emit_add_r_imm(rs, 1 << (size & MF_SIZEMASK)); return hr2; @@ -2790,12 +2822,11 @@ static int emit_memhandler_read_rr(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rs, u32 off } hr = emit_memhandler_read(size); - size &= MF_SIZEMASK; if (rd == SHR_TMP) hr2 = hr; else #if REMAP_REGISTER - hr2 = rcache_map_reg(rd, hr, RC_GR_WRITE); + hr2 = rcache_map_reg(rd, hr); #else hr2 = rcache_get_reg(rd, RC_GR_WRITE, NULL); #endif @@ -2865,12 +2896,11 @@ static int emit_indirect_indexed_read(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rx, sh2_ emith_add_r_r_r(hr, tx, ty); hr = emit_memhandler_read(size); - size &= MF_SIZEMASK; if (rd == SHR_TMP) hr2 = hr; else #if REMAP_REGISTER - hr2 = rcache_map_reg(rd, hr, RC_GR_WRITE); + hr2 = rcache_map_reg(rd, hr); #else hr2 = rcache_get_reg(rd, RC_GR_WRITE, NULL); #endif @@ -3644,7 +3674,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } tmp2 = emit_memhandler_read(opd->size); #if REMAP_REGISTER - tmp3 = rcache_map_reg(GET_Rn(), tmp2, RC_GR_WRITE); + tmp3 = rcache_map_reg(GET_Rn(), tmp2); #else tmp3 = rcache_get_reg(GET_Rn(), RC_GR_WRITE, NULL); #endif @@ -3886,16 +3916,29 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); tmp = rcache_get_reg(SHR_MACL, RC_GR_WRITE, NULL); - tmp4 = rcache_get_tmp(); + tmp4 = tmp3; if (op & 1) { - emith_sext(tmp, tmp2, 16); - emith_sext(tmp4, tmp3, 16); + if (! rcache_is_s16(tmp2)) { + emith_sext(tmp, tmp2, 16); + tmp2 = tmp; + } + if (! rcache_is_s16(tmp3)) { + tmp4 = rcache_get_tmp(); + emith_sext(tmp4, tmp3, 16); + } } else { - emith_clear_msb(tmp, tmp2, 16); - emith_clear_msb(tmp4, tmp3, 16); + if (! rcache_is_u16(tmp2)) { + emith_clear_msb(tmp, tmp2, 16); + tmp2 = tmp; + } + if (! rcache_is_u16(tmp3)) { + tmp4 = rcache_get_tmp(); + emith_clear_msb(tmp4, tmp3, 16); + } } - emith_mul(tmp, tmp, tmp4); - rcache_free_tmp(tmp4); + emith_mul(tmp, tmp2, tmp4); + if (tmp4 != tmp3) + rcache_free_tmp(tmp4); goto end_op; } goto default_; @@ -4415,15 +4458,19 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) break; case 0x0c: // EXTU.B Rm,Rn 0110nnnnmmmm1100 emith_clear_msb(tmp2, tmp, 24); + rcache_set_x16(tmp2, 1, 1); break; case 0x0d: // EXTU.W Rm,Rn 0110nnnnmmmm1101 emith_clear_msb(tmp2, tmp, 16); + rcache_set_x16(tmp2, 0, 1); break; case 0x0e: // EXTS.B Rm,Rn 0110nnnnmmmm1110 emith_sext(tmp2, tmp, 8); + rcache_set_x16(tmp2, 1, 0); break; case 0x0f: // EXTS.W Rm,Rn 0110nnnnmmmm1111 emith_sext(tmp2, tmp, 16); + rcache_set_x16(tmp2, 1, 0); break; } goto end_op; From 09cab6d27a8079522abd4eddeaa0561f581e8409 Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 25 Jun 2020 16:46:35 +0200 Subject: [PATCH 0327/1110] SDL UI, preparation for 2x mode, for improved color resolution --- platform/common/plat_sdl.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/platform/common/plat_sdl.c b/platform/common/plat_sdl.c index bb1ce612..276a0c61 100644 --- a/platform/common/plat_sdl.c +++ b/platform/common/plat_sdl.c @@ -136,9 +136,19 @@ void bgr_to_uyvy_init(void) void rgb565_to_uyvy(void *d, const void *s, int pixels) { - unsigned int *dst = d; - const unsigned short *src = s; + uint32_t *dst = d; + const uint16_t *src = s; + if (plat_sdl_overlay->w > 2*plat_sdl_overlay->h) + for (; pixels > 0; src += 4, dst += 4, pixels -= 4) + { + struct uyvy *uyvy0 = yuv_uyvy + src[0], *uyvy1 = yuv_uyvy + src[1]; + struct uyvy *uyvy2 = yuv_uyvy + src[2], *uyvy3 = yuv_uyvy + src[3]; + dst[0] = (uyvy0->y << 24) | uyvy0->vyu; + dst[1] = (uyvy1->y << 24) | uyvy1->vyu; + dst[2] = (uyvy2->y << 24) | uyvy2->vyu; + dst[3] = (uyvy3->y << 24) | uyvy3->vyu; + } else for (; pixels > 0; src += 4, dst += 2, pixels -= 4) { struct uyvy *uyvy0 = yuv_uyvy + src[0], *uyvy1 = yuv_uyvy + src[1]; From 18c95d9f57eaac320c3d6725333d7f11d590ac70 Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 25 Jun 2020 16:49:17 +0200 Subject: [PATCH 0328/1110] sh2 drc, fix for SH2 T handling in Mips/RiscV --- cpu/drc/emit_mips.c | 2 +- cpu/drc/emit_riscv.c | 25 +++++++++---------------- 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/cpu/drc/emit_mips.c b/cpu/drc/emit_mips.c index 1c98ac04..fb7de366 100644 --- a/cpu/drc/emit_mips.c +++ b/cpu/drc/emit_mips.c @@ -1801,7 +1801,7 @@ static void emith_set_t_cond(int sr, int cond) EMIT(MIPS_SLTU_IMM(AT,AT, 1)); r=AT; val++; break; } else if ((b>>5) == OP_BNE) { EMIT(MIPS_XOR_REG(AT, r, b&0x1f)); - EMIT(MIPS_SLTU_IMM(AT,Z0,AT)); r=AT; val++; break; + EMIT(MIPS_SLTU_REG(AT,Z0,AT)); r=AT; val++; break; } } if (val) { diff --git a/cpu/drc/emit_riscv.c b/cpu/drc/emit_riscv.c index ab6c4fd0..de99d4fd 100644 --- a/cpu/drc/emit_riscv.c +++ b/cpu/drc/emit_riscv.c @@ -1601,28 +1601,21 @@ static void emith_set_t_cond(int sr, int cond) u32 val = 0, inv = 0; // try to avoid jumping around if possible - if (emith_cmp_rs >= 0) { - if (emith_cmp_rt >= 0) - b = emith_cmpr_check(emith_cmp_rs, emith_cmp_rt, cond, &r, &s); - else - b = emith_cmpi_check(emith_cmp_rs, emith_cmp_imm, cond, &r, &s); - } else { - b = emith_cond_check(cond, &r, &s); - if (r == Z0) { - if (b == F1_BEQ || b == F1_BGE || b == F1_BGEU) - emith_or_r_imm(sr, T); - return; - } else if (r == FC) - val++, inv = (b == F1_BEQ); - } + b = emith_cond_check(cond, &r, &s); + if (r == Z0) { + if (b == F1_BEQ || b == F1_BGE || b == F1_BGEU) + emith_or_r_imm(sr, T); + return; + } else if (r == FC) + val++, inv = (b == F1_BEQ); if (!val) switch (b) { case F1_BEQ: if (s == Z0) { EMIT(R5_SLTU_IMM(AT,r ,1)); r=AT; val++; break; } EMIT(R5_XOR_REG(AT, r, s)); EMIT(R5_SLTU_IMM(AT,AT, 1)); r=AT; val++; break; - case F1_BNE: if (s == Z0) { EMIT(R5_SLTU_IMM(AT,Z0,r)); r=AT; val++; break; } + case F1_BNE: if (s == Z0) { EMIT(R5_SLTU_REG(AT,Z0,r)); r=AT; val++; break; } EMIT(R5_XOR_REG(AT, r, s)); - EMIT(R5_SLTU_IMM(AT,Z0,AT)); r=AT; val++; break; + EMIT(R5_SLTU_REG(AT,Z0,AT)); r=AT; val++; break; case F1_BLTU: EMIT(R5_SLTU_REG(AT, r, s)); r=AT; val++; break; case F1_BGEU: EMIT(R5_SLTU_REG(AT, r, s)); r=AT; val++; inv++; break; case F1_BLT: EMIT(R5_SLT_REG(AT, r, s)); r=AT; val++; break; From b29bf88c2261042bb8143394fe868a291fe8baff Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 2 Jul 2020 16:14:16 +0200 Subject: [PATCH 0329/1110] libretro make fix for non-arm architectures --- Makefile.libretro | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Makefile.libretro b/Makefile.libretro index 51da9828..179223e4 100644 --- a/Makefile.libretro +++ b/Makefile.libretro @@ -27,7 +27,7 @@ STATIC_LINKING:= 0 TARGET_NAME := picodrive LIBM := -lm GIT_VERSION ?= $(shell git rev-parse --short HEAD || echo unknown) -ifneq ($(GIT_VERSION)," unknown") +ifneq ($(GIT_VERSION),"unknown") CFLAGS += -DGIT_VERSION=\"$(GIT_VERSION)\" endif @@ -463,6 +463,9 @@ endif SHARED ?= -shared LDFLAGS += $(SHARED) $(fpic) +ifneq ($(ARCH), arm) +ARCH = $(shell $(CC) -dumpmachine | awk -F '-' '{print $$1}') +endif PLATFORM = libretro NO_CONFIG_MAK = yes From 9279264a4979ae72097117ec771a41572e765253 Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 3 Jul 2020 00:45:13 +0200 Subject: [PATCH 0330/1110] switch submodules to github and update to current version --- .gitmodules | 4 ++-- platform/libpicofe | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitmodules b/.gitmodules index b27d2a16..36091a2d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,6 @@ [submodule "platform/libpicofe"] path = platform/libpicofe - url = git://notaz.gp2x.de/~notaz/libpicofe.git + url = https://github.com/notaz/libpicofe.git [submodule "cpu/cyclone"] path = cpu/cyclone - url = git://notaz.gp2x.de/~notaz/cyclone68000.git + url = https://github.com/notaz/cyclone68000.git diff --git a/platform/libpicofe b/platform/libpicofe index f8cd6a08..811cef4d 160000 --- a/platform/libpicofe +++ b/platform/libpicofe @@ -1 +1 @@ -Subproject commit f8cd6a082bb9c228397a0436f28818b74d8e9636 +Subproject commit 811cef4d9f3772d0bbf6c1f0434e5860c9550abc From 62c7479bb154d1a6ce824054ea80fbf245785810 Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 3 Jul 2020 00:46:40 +0200 Subject: [PATCH 0331/1110] SDL UI, 2x overlay mode, for improved color resolution --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index c23841d7..d88e51f6 100644 --- a/Makefile +++ b/Makefile @@ -115,6 +115,7 @@ OBJS += platform/libpicofe/gl_platform.o USE_FRONTEND = 1 endif ifeq "$(PLATFORM)" "generic" +CFLAGS += -DSDL_OVERLAY_2X OBJS += platform/linux/emu.o platform/linux/blit.o # FIXME OBJS += platform/common/plat_sdl.o OBJS += platform/libpicofe/plat_sdl.o platform/libpicofe/in_sdl.o From e2b573c0b047e2971ead29b3c1e6abf76ad595e4 Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 7 Jul 2020 10:17:57 +0200 Subject: [PATCH 0332/1110] libretro, fix for windows and osx --- tools/mkoffsets.sh | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/mkoffsets.sh b/tools/mkoffsets.sh index 8a1092e0..d890fc0f 100755 --- a/tools/mkoffsets.sh +++ b/tools/mkoffsets.sh @@ -46,14 +46,22 @@ get_define () # prefix struct member member... line=$(printf "#define %-20s 0x%04x" $prefix$name $rodata) } +fn="${1:-.}/pico_int_offs.h" if echo $CFLAGS | grep -qe -flto; then CFLAGS="$CFLAGS -fno-lto"; fi + +# don't do this if readelf isn't available. it doesn't matter since offsets are +# only needed for the asm parts (currently mips/arm32) and those have readelf +if ! command -v readelf >/dev/null; then + echo "/* mkoffset.sh: readelf not found, offset table not created */" >$fn + echo "WARNING: readelf not found, offset table not created" + exit +fi # determine endianess echo '#include ' >/tmp/getoffs.c echo "const int32_t val = 1;" >>/tmp/getoffs.c compile_rodata ENDIAN=$(if [ "$rodata" -eq 1 ]; then echo be; else echo le; fi) # output header -fn="${1:-.}/pico_int_offs.h" echo "/* autogenerated by mkoffset.sh, do not edit */" >$fn echo "/* target endianess: $ENDIAN, compiled with: $CC $CFLAGS */" >>$fn # output offsets From 0198149a7284d9f5369f84b818e86a2bc77ebb8f Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 8 Jul 2020 20:14:12 +0200 Subject: [PATCH 0333/1110] libretro, changes to allow for both standalone and libretro build --- Makefile | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/Makefile b/Makefile index d88e51f6..f80a9ec7 100644 --- a/Makefile +++ b/Makefile @@ -1,21 +1,6 @@ TARGET ?= PicoDrive DEBUG ?= 0 -CFLAGS += -Wall -ggdb -ffunction-sections -fdata-sections CFLAGS += -I. -ifeq "$(DEBUG)" "0" -CFLAGS += -O3 -DNDEBUG -endif - -# This is actually needed, believe me. -# If you really have to disable this, set NO_ALIGN_FUNCTIONS elsewhere. -ifndef NO_ALIGN_FUNCTIONS -CFLAGS += -falign-functions=2 -endif -LDFLAGS += -Wl,--gc-sections - -# profiling -pprof ?= 0 -gperf ?= 0 all: config.mak target_ @@ -34,6 +19,28 @@ else # NO_CONFIG_MAK config.mak: endif +# This is actually needed, believe me. +# If you really have to disable this, set NO_ALIGN_FUNCTIONS elsewhere. +ifndef NO_ALIGN_FUNCTIONS +CFLAGS += -falign-functions=2 +endif +LDFLAGS += -Wl,--gc-sections + +# profiling +pprof ?= 0 +gperf ?= 0 + +ifneq ("$(PLATFORM)", "libretro") + CFLAGS += -Wall -g +ifneq ($(findstring gcc,$(CC)),) + CFLAGS += -ffunction-sections -fdata-sections + LDFLAGS += -Wl,--gc-sections +endif +ifeq "$(DEBUG)" "0" + CFLAGS += -O3 -DNDEBUG +endif +endif + ifeq ("$(PLATFORM)",$(filter "$(PLATFORM)","gp2x" "opendingux" "rpi1")) # very small caches, avoid optimization options making the binary much bigger CFLAGS += -finline-limit=42 -fno-unroll-loops -fno-ipa-cp -ffast-math From 6b67b6aa13d4b8db12d7824582662f3976803ead Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 8 Jul 2020 20:46:46 +0200 Subject: [PATCH 0334/1110] libretro, more fixes and cleanups for windows and osx --- Makefile.libretro | 90 ++++++++++++---------------------------------- cpu/drc/emit_x86.c | 4 +-- cpu/sh2/compiler.c | 35 +++++++++--------- cpu/sh2/compiler.h | 2 +- cpu/sh2/sh2.h | 4 +++ tools/mkoffsets.sh | 22 ++++++++---- 6 files changed, 61 insertions(+), 96 deletions(-) diff --git a/Makefile.libretro b/Makefile.libretro index 179223e4..309d3f72 100644 --- a/Makefile.libretro +++ b/Makefile.libretro @@ -37,6 +37,8 @@ asm_ym2612 = 0 asm_misc = 0 asm_cdmemory = 0 asm_mix = 0 +asm_32xdraw = 0 +asm_32xmemory = 0 fpic := @@ -48,7 +50,7 @@ endif ifeq ($(platform), unix) EXT ?= so TARGET := $(TARGET_NAME)_libretro.$(EXT) - fpic := -fPIC + fpic := -fPIC SHARED := -shared DONT_COMPILE_IN_ZLIB = 1 CFLAGS += -DFAMEC_NO_GOTOS @@ -59,7 +61,7 @@ else ifeq ($(platform), linux-portable) EXT ?= so TARGET := $(TARGET_NAME)_libretro.$(EXT) SHARED := -shared -nostdlib - fpic := -fPIC + fpic := -fPIC LIBM := DONT_COMPILE_IN_ZLIB = 1 CFLAGS += -DFAMEC_NO_GOTOS @@ -70,7 +72,7 @@ else ifeq ($(platform), osx) EXT ?= dylib TARGET := $(TARGET_NAME)_libretro.$(EXT) SHARED := -dynamiclib - fpic := -fPIC + fpic := -fPIC APPLE := 1 arch = intel ifeq ($(shell uname -p),powerpc) @@ -78,6 +80,8 @@ else ifeq ($(platform), osx) endif ifeq ($(arch),ppc) CFLAGS += -DBLARGG_BIG_ENDIAN=1 -D__ppc__ -DFAMEC_NO_GOTOS + else + use_sh2drc = 1 endif OSXVER = `sw_vers -productVersion | cut -d. -f 2` OSX_LT_MAVERICKS = `(( $(OSXVER) <= 9)) && echo "YES"` @@ -99,21 +103,21 @@ else ifeq ($(platform), staticios) CXX += -miphoneos-version-min=8.0 CC_AS += -miphoneos-version-min=8.0 CFLAGS += -miphoneos-version-min=8.0 - ARCH := arm + ARCH := aarch64 STATIC_LINKING = 1 use_cyclone = 0 use_fame = 1 use_drz80 = 0 use_cz80 = 1 - use_sh2drc = 0 + use_sh2drc = 1 use_svpdrc = 0 # iOS else ifneq (,$(findstring ios,$(platform))) TARGET := $(TARGET_NAME)_libretro_ios.dylib SHARED := -dynamiclib - fpic := -fPIC + fpic := -fPIC APPLE := 1 ifeq ($(IOSSDK),) IOSSDK := $(shell xcodebuild -version -sdk iphoneos Path) @@ -155,17 +159,11 @@ else ifeq ($(platform), ps3) NO_MMAP = 1 DONT_COMPILE_IN_ZLIB = 1 - asm_memory = 0 - asm_render = 0 - asm_ym2612 = 0 - asm_misc = 0 - asm_cdpico = 0 - asm_cdmemory = 0 - asm_mix = 0 use_cyclone = 0 use_fame = 1 use_drz80 = 0 use_cz80 = 1 + use_sh2drc = 1 # sncps3 else ifeq ($(platform), sncps3) @@ -177,17 +175,11 @@ else ifeq ($(platform), sncps3) NO_MMAP = 1 DONT_COMPILE_IN_ZLIB = 1 - asm_memory = 0 - asm_render = 0 - asm_ym2612 = 0 - asm_misc = 0 - asm_cdpico = 0 - asm_cdmemory = 0 - asm_mix = 0 use_cyclone = 0 use_fame = 1 use_drz80 = 0 use_cz80 = 1 + use_sh2drc = 1 # Lightweight PS3 Homebrew SDK else ifeq ($(platform), psl1ght) @@ -199,17 +191,11 @@ else ifeq ($(platform), psl1ght) NO_MMAP = 1 DONT_COMPILE_IN_ZLIB = 1 - asm_memory = 0 - asm_render = 0 - asm_ym2612 = 0 - asm_misc = 0 - asm_cdpico = 0 - asm_cdmemory = 0 - asm_mix = 0 use_cyclone = 0 use_fame = 1 use_drz80 = 0 use_cz80 = 1 + use_sh2drc = 1 # PSP else ifeq ($(platform), psp1) @@ -222,17 +208,12 @@ else ifeq ($(platform), psp1) NO_MMAP = 1 DONT_COMPILE_IN_ZLIB = 1 - asm_memory = 0 asm_render = 1 - asm_ym2612 = 0 - asm_misc = 0 - asm_cdpico = 0 - asm_cdmemory = 0 - asm_mix = 0 use_cyclone = 0 use_fame = 1 use_drz80 = 0 use_cz80 = 1 + use_sh2drc = 1 # CTR (3DS) else ifeq ($(platform), ctr) @@ -250,14 +231,6 @@ else ifeq ($(platform), ctr) ARCH = arm ARM_ASM = 1 - asm_memory = 1 - asm_render = 1 - asm_ym2612 = 1 - asm_misc = 1 - asm_cdpico = 1 - asm_cdmemory = 1 - asm_mix = 1 - use_cyclone = 1 use_fame = 0 use_drz80 = 1 @@ -280,14 +253,6 @@ else ifeq ($(platform), raspberrypi) fpic := -fPIC DONT_COMPILE_IN_ZLIB = 1 - asm_memory = 1 - asm_render = 1 - asm_ym2612 = 1 - asm_misc = 1 - asm_cdpico = 1 - asm_cdmemory = 1 - asm_mix = 1 - use_cyclone = 1 use_fame = 0 use_drz80 = 1 @@ -309,14 +274,8 @@ else ifeq ($(platform), vita) NO_MMAP = 1 DONT_COMPILE_IN_ZLIB = 1 ARCH = arm + ARM_ASM = 1 - asm_memory = 1 - asm_render = 1 - asm_ym2612 = 1 - asm_misc = 1 - asm_cdpico = 1 - asm_cdmemory = 1 - asm_mix = 1 use_cyclone = 1 use_fame = 0 use_drz80 = 1 @@ -348,7 +307,7 @@ else ifeq ($(platform), wii) # QNX else ifeq ($(platform), qnx) TARGET := $(TARGET_NAME)_libretro_$(platform).so - fpic := -fPIC + fpic := -fPIC CC = qcc -Vgcc_ntoarmv7le CC_AS = $(CC) CFLAGS += -DBASE_ADDR_FIXED=0 -D__BLACKBERRY_QNX__ -marm -mcpu=cortex-a9 -mtune=cortex-a9 -mfpu=neon -mfloat-abi=softfp @@ -367,7 +326,7 @@ else ifeq ($(platform), qnx) else ifneq (,$(findstring armv,$(platform))) TARGET := $(TARGET_NAME)_libretro.so SHARED := -shared -Wl,--no-undefined,-Bsymbolic - fpic := -fPIC + fpic := -fPIC ifneq (,$(findstring cortexa5,$(platform))) CFLAGS += -marm -mcpu=cortex-a5 ASFLAGS += -mcpu=cortex-a5 @@ -408,21 +367,14 @@ else ifeq ($(platform), emscripten) # GCW0 else ifeq ($(platform), gcw0) TARGET := $(TARGET_NAME)_libretro.so - CC = /opt/gcw0-toolchain/usr/bin/mipsel-linux-gcc - AR = /opt/gcw0-toolchain/usr/bin/mipsel-linux-ar + CC = mipsel-linux-gcc + AR = mipsel-linux-ar SHARED := -shared -nostdlib - fpic := -fPIC + fpic := -fPIC LIBM := DONT_COMPILE_IN_ZLIB = 1 CFLAGS += -ffast-math -march=mips32 -mtune=mips32r2 -mhard-float - asm_memory = 0 - asm_render = 0 - asm_ym2612 = 0 - asm_misc = 0 - asm_cdpico = 0 - asm_cdmemory = 0 - asm_mix = 0 use_cyclone = 0 use_fame = 1 use_drz80 = 0 @@ -452,6 +404,8 @@ asm_ym2612 = 1 asm_misc = 1 asm_cdmemory = 1 asm_mix = 1 +asm_32xdraw = 1 +asm_32xmemory = 1 endif CFLAGS += $(fpic) diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index c836e159..7006beff 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -733,7 +733,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common /* mov r <-> [ebp+#offs] */ \ if ((offs) == 0) { \ emith_deref_modrm(op, 0, r, rs); \ - } else if (abs(offs) >= 0x80) { \ + } else if ((s32)(offs) < -0x80 || (s32)(offs) >= 0x80) { \ emith_deref_modrm(op, 2, r, rs); \ EMIT(offs, u32); \ } else { \ @@ -1075,7 +1075,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common #define PARAM_REGS { xCX, xDX, xR8, xR9 } #define PRESERVED_REGS { xSI, xDI, xR12, xR13, xR14, xR15, xBX, xBP } #define TEMPORARY_REGS { xAX, xR10, xR11 } -#define STATIC_SH2_REGS { SHR_SR,xBX , SHR_R(0),xR15 , SH2_R(1),xR14 } +#define STATIC_SH2_REGS { SHR_SR,xBX , SHR_R(0),xR15 , SHR_R(1),xR14 } #define host_arg2reg(rd, arg) \ switch (arg) { \ diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index cba97e2b..c8940432 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -1273,24 +1273,23 @@ static void dr_flush_tcache(int tcid) blink_free[tcid] = NULL; memset(unresolved_links[tcid], 0, sizeof(*unresolved_links[0]) * HASH_TABLE_SIZE(tcid)); memset(hash_tables[tcid], 0, sizeof(*hash_tables[0]) * HASH_TABLE_SIZE(tcid)); - if (Pico32xMem->sdram != NULL) { - if (tcid == 0) { // ROM, RAM - memset(Pico32xMem->drcblk_ram, 0, sizeof(Pico32xMem->drcblk_ram)); - memset(Pico32xMem->drclit_ram, 0, sizeof(Pico32xMem->drclit_ram)); - memset(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache)); - memset(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache)); - memset(sh2s[0].rts_cache, -1, sizeof(sh2s[0].rts_cache)); - memset(sh2s[1].rts_cache, -1, sizeof(sh2s[1].rts_cache)); - sh2s[0].rts_cache_idx = sh2s[1].rts_cache_idx = 0; - } else { - memset(Pico32xMem->drcblk_ram, 0, sizeof(Pico32xMem->drcblk_ram)); - memset(Pico32xMem->drclit_ram, 0, sizeof(Pico32xMem->drclit_ram)); - memset(Pico32xMem->drcblk_da[tcid - 1], 0, sizeof(Pico32xMem->drcblk_da[tcid - 1])); - memset(Pico32xMem->drclit_da[tcid - 1], 0, sizeof(Pico32xMem->drclit_da[tcid - 1])); - memset(sh2s[tcid - 1].branch_cache, -1, sizeof(sh2s[0].branch_cache)); - memset(sh2s[tcid - 1].rts_cache, -1, sizeof(sh2s[0].rts_cache)); - sh2s[tcid - 1].rts_cache_idx = 0; - } + + if (tcid == 0) { // ROM, RAM + memset(Pico32xMem->drcblk_ram, 0, sizeof(Pico32xMem->drcblk_ram)); + memset(Pico32xMem->drclit_ram, 0, sizeof(Pico32xMem->drclit_ram)); + memset(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache)); + memset(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache)); + memset(sh2s[0].rts_cache, -1, sizeof(sh2s[0].rts_cache)); + memset(sh2s[1].rts_cache, -1, sizeof(sh2s[1].rts_cache)); + sh2s[0].rts_cache_idx = sh2s[1].rts_cache_idx = 0; + } else { + memset(Pico32xMem->drcblk_ram, 0, sizeof(Pico32xMem->drcblk_ram)); + memset(Pico32xMem->drclit_ram, 0, sizeof(Pico32xMem->drclit_ram)); + memset(Pico32xMem->drcblk_da[tcid - 1], 0, sizeof(Pico32xMem->drcblk_da[tcid - 1])); + memset(Pico32xMem->drclit_da[tcid - 1], 0, sizeof(Pico32xMem->drclit_da[tcid - 1])); + memset(sh2s[tcid - 1].branch_cache, -1, sizeof(sh2s[0].branch_cache)); + memset(sh2s[tcid - 1].rts_cache, -1, sizeof(sh2s[0].rts_cache)); + sh2s[tcid - 1].rts_cache_idx = 0; } #if (DRC_DEBUG & 4) tcache_dsm_ptrs[tcid] = tcache_ring[tcid].base; diff --git a/cpu/sh2/compiler.h b/cpu/sh2/compiler.h index b1b7487f..76b44552 100644 --- a/cpu/sh2/compiler.h +++ b/cpu/sh2/compiler.h @@ -31,7 +31,7 @@ unsigned short scan_block(uint32_t base_pc, int is_slave, unsigned char *op_flags, uint32_t *end_pc, uint32_t *base_literals, uint32_t *end_literals); -#if defined(DRC_SH2) && defined(__GNUC__) +#if defined(DRC_SH2) && defined(__GNUC__) && !defined(__clang__) // direct access to some host CPU registers used by the DRC if gcc is used. // XXX MUST match SHR_SR definitions in cpu/drc/emit_*.c; should be moved there // XXX yuck, there's no portable way to determine register size. Use long long diff --git a/cpu/sh2/sh2.h b/cpu/sh2/sh2.h index b0054c05..4b0b3384 100644 --- a/cpu/sh2/sh2.h +++ b/cpu/sh2/sh2.h @@ -106,7 +106,11 @@ int sh2_execute_interpreter(SH2 *sh2c, int cycles); static __inline void sh2_execute_prepare(SH2 *sh2, int use_drc) { +#ifdef DRC_SH2 sh2->run = use_drc ? sh2_execute_drc : sh2_execute_interpreter; +#else + sh2->run = sh2_execute_interpreter; +#endif } static __inline int sh2_execute(SH2 *sh2, int cycles) diff --git a/tools/mkoffsets.sh b/tools/mkoffsets.sh index d890fc0f..6b086a93 100755 --- a/tools/mkoffsets.sh +++ b/tools/mkoffsets.sh @@ -8,6 +8,20 @@ CC=${CC:-gcc} # endianess of target (automagically determined below) ENDIAN= +# don't do this if ELF format isn't used. it doesn't matter since offsets are +# only needed for the asm parts (currently mips/arm32) and those have ELF +check_elf () +{ + echo '#include ' >/tmp/getoffs.c + echo "const int32_t val = 1;" >>/tmp/getoffs.c + $CC $CFLAGS -I .. -c /tmp/getoffs.c -o /tmp/getoffs.o || exit 1 + if ! command -v readelf >/dev/null || ! file /tmp/getoffs.o | grep -q ELF; then + echo "/* mkoffset.sh: no readelf or not ELF, offset table not created */" >$fn + echo "WARNING: no readelf or not ELF, offset table not created" + exit + fi +} + # compile with target C compiler and extract value from .rodata section compile_rodata () { @@ -49,13 +63,7 @@ get_define () # prefix struct member member... fn="${1:-.}/pico_int_offs.h" if echo $CFLAGS | grep -qe -flto; then CFLAGS="$CFLAGS -fno-lto"; fi -# don't do this if readelf isn't available. it doesn't matter since offsets are -# only needed for the asm parts (currently mips/arm32) and those have readelf -if ! command -v readelf >/dev/null; then - echo "/* mkoffset.sh: readelf not found, offset table not created */" >$fn - echo "WARNING: readelf not found, offset table not created" - exit -fi +check_elf # determine endianess echo '#include ' >/tmp/getoffs.c echo "const int32_t val = 1;" >>/tmp/getoffs.c From 1426b7569edccd4f5f7512a9ce4a9c2c7066fac7 Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 8 Jul 2020 20:48:16 +0200 Subject: [PATCH 0335/1110] sh2 drc, fix for x86_64 backend --- cpu/drc/emit_x86.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index 7006beff..60b2b6a2 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -915,8 +915,10 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common #define emith_call_cond(cond, ptr) \ emith_call(ptr) -#define emith_call_reg(r) \ - EMIT_OP_MODRM(0xff, 3, 2, r) +#define emith_call_reg(r) do { \ + EMIT_REX_IF(0, 0, r); \ + EMIT_OP_MODRM(0xff, 3, 2, (r)&7); \ +} while (0) #define emith_call_ctx(offs) do { \ EMIT_OP_MODRM(0xff, 2, 2, CONTEXT_REG); \ @@ -934,8 +936,10 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common emith_deref_modrm(0x03, 0, r, xSP); /* add r, [xsp] */ \ } while (0) -#define emith_jump_reg(r) \ - EMIT_OP_MODRM(0xff, 3, 4, r) +#define emith_jump_reg(r) do { \ + EMIT_REX_IF(0, 0, r); \ + EMIT_OP_MODRM(0xff, 3, 4, (r)&7); \ +} while (0) #define emith_jump_ctx(offs) do { \ EMIT_OP_MODRM(0xff, 2, 4, CONTEXT_REG); \ From b1ccc271095484c0356dde19b30bccdc103dd7f0 Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 9 Jul 2020 08:40:35 +0200 Subject: [PATCH 0336/1110] sh2, fix for interpreter crash if drc is compiled in too --- cpu/sh2/compiler.c | 3 +++ cpu/sh2/compiler.h | 4 ++-- cpu/sh2/sh2.h | 3 ++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index c8940432..085a6179 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -5392,7 +5392,10 @@ int sh2_execute_drc(SH2 *sh2c, int cycles) // others are usual SH2 flags sh2c->sr &= 0x3f3; sh2c->sr |= cycles << 12; + + sh2c->state |= SH2_IN_DRC; sh2_drc_entry(sh2c); + sh2c->state &= ~SH2_IN_DRC; // TODO: irq cycles ret_cycles = (int32_t)sh2c->sr >> 12; diff --git a/cpu/sh2/compiler.h b/cpu/sh2/compiler.h index 76b44552..9642492d 100644 --- a/cpu/sh2/compiler.h +++ b/cpu/sh2/compiler.h @@ -72,11 +72,11 @@ extern void REGPARM(1) (*sh2_drc_restore_sr)(SH2 *sh2); #define DRC_DECLARE_SR register long _sh2_sr asm(DRC_SR_REG) #endif #define DRC_SAVE_SR(sh2) \ - if (likely((sh2->state&(SH2_STATE_RUN|SH2_STATE_SLEEP)) == SH2_STATE_RUN)) \ + if (likely(sh2->state & SH2_IN_DRC)) \ sh2->sr = (s32)_sh2_sr // sh2_drc_save_sr(sh2) #define DRC_RESTORE_SR(sh2) \ - if (likely((sh2->state&(SH2_STATE_RUN|SH2_STATE_SLEEP)) == SH2_STATE_RUN)) \ + if (likely(sh2->state & SH2_IN_DRC)) \ _sh2_sr = (s32)sh2->sr // sh2_drc_restore_sr(sh2) #else diff --git a/cpu/sh2/sh2.h b/cpu/sh2/sh2.h index 4b0b3384..b9267d74 100644 --- a/cpu/sh2/sh2.h +++ b/cpu/sh2/sh2.h @@ -48,7 +48,8 @@ typedef struct SH2_ #define SH2_STATE_CPOLL (1 << 2) // polling comm regs #define SH2_STATE_VPOLL (1 << 3) // polling VDP #define SH2_STATE_RPOLL (1 << 4) // polling address in SDRAM -#define SH2_TIMER_RUN (1 << 8) // SOC WDT timer is running +#define SH2_TIMER_RUN (1 << 7) // SOC WDT timer is running +#define SH2_IN_DRC (1 << 8) // DRC in use unsigned int state; uint32_t poll_addr; int poll_cycles; From 03718e6276272f3508b2c76ece1901744434b7db Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 9 Jul 2020 23:51:39 +0200 Subject: [PATCH 0337/1110] libretro, build fixes --- Makefile | 3 ++ Makefile.libretro | 4 ++ platform/common/common.mak | 2 +- tools/Makefile | 2 +- tools/mkoffsets.sh | 79 ++++++++++++++++++++++++++++++-------- 5 files changed, 71 insertions(+), 19 deletions(-) diff --git a/Makefile b/Makefile index f80a9ec7..a06b8189 100644 --- a/Makefile +++ b/Makefile @@ -280,6 +280,9 @@ endif pico/carthw_cfg.c: pico/carthw.cfg tools/make_carthw_c $< $@ +# preprocessed asm files most probably include the offsets file +$(filter %.S,$(SRCS_COMMON)): pico/pico_int_offs.h + # random deps pico/carthw/svp/compiler.o : cpu/drc/emit_arm.c cpu/sh2/compiler.o : cpu/drc/emit_arm.c cpu/drc/emit_arm64.c cpu/drc/emit_ppc.c diff --git a/Makefile.libretro b/Makefile.libretro index 309d3f72..b227f17f 100644 --- a/Makefile.libretro +++ b/Makefile.libretro @@ -364,6 +364,10 @@ else ifeq ($(platform), emscripten) STATIC_LINKING = 1 DONT_COMPILE_IN_ZLIB = 1 + use_cyclone = 0 + use_fame = 1 + use_drz80 = 0 + use_cz80 = 1 # GCW0 else ifeq ($(platform), gcw0) TARGET := $(TARGET_NAME)_libretro.so diff --git a/platform/common/common.mak b/platform/common/common.mak index 024ff75f..599f246f 100644 --- a/platform/common/common.mak +++ b/platform/common/common.mak @@ -76,7 +76,7 @@ SRCS_COMMON += $(R)pico/32x/draw_arm.S endif ifeq "$(asm_32xmemory)" "1" DEFINES += _ASM_32X_MEMORY_C -SRCS_COMMON += $(R)pico/32x/memory_arm.s +SRCS_COMMON += $(R)pico/32x/memory_arm.S endif ifeq "$(asm_mix)" "1" SRCS_COMMON += $(R)pico/sound/mix_arm.S diff --git a/tools/Makefile b/tools/Makefile index 752cd6b2..41c4d3d6 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -2,7 +2,7 @@ TARGETS = amalgamate textfilter OBJS = $(addsuffix .o,$(TARGETS)) all: $(TARGETS) - CC="$(XCC)" CFLAGS="$(XCFLAGS)" ./mkoffsets.sh ../pico + CC="$(XCC)" CFLAGS="$(XCFLAGS)" sh ./mkoffsets.sh ../pico clean: $(RM) $(TARGETS) $(OBJS) diff --git a/tools/mkoffsets.sh b/tools/mkoffsets.sh index 6b086a93..0aa8e96e 100755 --- a/tools/mkoffsets.sh +++ b/tools/mkoffsets.sh @@ -8,30 +8,80 @@ CC=${CC:-gcc} # endianess of target (automagically determined below) ENDIAN= -# don't do this if ELF format isn't used. it doesn't matter since offsets are -# only needed for the asm parts (currently mips/arm32) and those have ELF -check_elf () +# check which object format to dissect +READELF= +OBJDUMP= +check_obj () { + # prepare an object file; as side effect dtermine the endianess + CROSS=$(echo $CC | sed 's/gcc.*//') echo '#include ' >/tmp/getoffs.c echo "const int32_t val = 1;" >>/tmp/getoffs.c $CC $CFLAGS -I .. -c /tmp/getoffs.c -o /tmp/getoffs.o || exit 1 - if ! command -v readelf >/dev/null || ! file /tmp/getoffs.o | grep -q ELF; then - echo "/* mkoffset.sh: no readelf or not ELF, offset table not created */" >$fn - echo "WARNING: no readelf or not ELF, offset table not created" - exit + + # check for readelf; readelf is the only toolchain tool not using bfd, + # hence it works with ELF files for every target + if file /tmp/getoffs.o | grep -q ELF; then + if command -v readelf >/dev/null; then + READELF=readelf + elif command -v ${CROSS}readelf >/dev/null; then + READELF=${CROSS}readelf + fi fi + if [ -n "$READELF" ]; then + # find the the .rodata section (in case -fdata-sections is used) + rosect=$($READELF -S /tmp/getoffs.o | grep '\.rodata\|\.sdata' | + sed 's/^[^.]*././;s/ .*//') + # read .rodata section as hex string (should be only 4 bytes) + ro=$($READELF -x $rosect /tmp/getoffs.o | grep '0x' | cut -c14-48 | + tr -d ' \n' | cut -c1-8) + # if no output could be read readelf isn't working + if [ -z "$ro" ]; then + READELF= + fi + fi + # if there is no working readelf try using objdump + if [ -z "$READELF" ]; then + # objdump is using bfd; try using the toolchain objdump first + # since this is likely working with the toolchain objects + if command -v ${CROSS}objdump >/dev/null; then + OBJDUMP=${CROSS}objdump + elif command -v objdump >/dev/null; then + OBJDUMP=objdump + fi + # find the start line of the .rodata section; read the next line + ro=$($OBJDUMP -s /tmp/getoffs.o | awk '\ + /Contents of section.*(__const|.rodata|.sdata)/ {o=1; next} \ + {if(o) { gsub(/ .*/,""); $1=""; gsub(/ /,""); print; o=0}}') + # no working tool for extracting the ro data; stop here + if [ -z "$ro" ]; then + echo "/* mkoffset.sh: no readelf or not ELF, offset table not created */" >$fn + echo "WARNING: no readelf or not ELF, offset table not created" + exit + fi + fi + # extract decimal value from ro + rodata=$(printf "%d" 0x$ro) + ENDIAN=$(if [ "$rodata" -eq 1 ]; then echo be; else echo le; fi) } # compile with target C compiler and extract value from .rodata section compile_rodata () { $CC $CFLAGS -I .. -c /tmp/getoffs.c -o /tmp/getoffs.o || exit 1 - # find the name of the .rodata section (in case -fdata-sections is used) - rosect=$(readelf -S /tmp/getoffs.o | grep '\.rodata\|\.sdata' | + if [ -n "$READELF" ]; then + # find the .rodata section (in case -fdata-sections is used) + rosect=$(readelf -S /tmp/getoffs.o | grep '\.rodata\|\.sdata' | sed 's/^[^.]*././;s/ .*//') - # read out .rodata section as hex string (should be only 4 bytes) - ro=$(readelf -x $rosect /tmp/getoffs.o | grep '0x' | cut -c14-48 | + # read .rodata section as hex string (should be only 4 bytes) + ro=$(readelf -x $rosect /tmp/getoffs.o | grep '0x' | cut -c14-48 | tr -d ' \n' | cut -c1-8) + elif [ -n "$OBJDUMP" ]; then + # find the start line of the .rodata section; read the next line + ro=$($OBJDUMP -s /tmp/getoffs.o | awk '\ + /Contents of section.*(__const|.rodata|.sdata)/ {o=1; next} \ + {if(o) { gsub(/ .*/,""); $1=""; gsub(/ /,""); print; o=0}}') + fi if [ "$ENDIAN" = "le" ]; then # swap needed for le target hex="" @@ -63,12 +113,7 @@ get_define () # prefix struct member member... fn="${1:-.}/pico_int_offs.h" if echo $CFLAGS | grep -qe -flto; then CFLAGS="$CFLAGS -fno-lto"; fi -check_elf -# determine endianess -echo '#include ' >/tmp/getoffs.c -echo "const int32_t val = 1;" >>/tmp/getoffs.c -compile_rodata -ENDIAN=$(if [ "$rodata" -eq 1 ]; then echo be; else echo le; fi) +check_obj # output header echo "/* autogenerated by mkoffset.sh, do not edit */" >$fn echo "/* target endianess: $ENDIAN, compiled with: $CC $CFLAGS */" >>$fn From 55c3c2b02f0c469bbea04fc66e9826c57b2cdf2e Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 10 Jul 2020 09:09:52 +0200 Subject: [PATCH 0338/1110] core, fix type issues by using stdint types --- pico/pico_int.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pico/pico_int.h b/pico/pico_int.h index 088c7aa5..09cefdfd 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -33,12 +33,12 @@ extern "C" { #endif -typedef unsigned char u8; -typedef signed char s8; -typedef unsigned short u16; -typedef signed short s16; -typedef unsigned int u32; -typedef signed int s32; +typedef uint8_t u8; +typedef int8_t s8; +typedef uint16_t u16; +typedef int16_t s16; +typedef uint32_t u32; +typedef int32_t s32; typedef uintptr_t uptr; // unsigned pointer-sized int // ----------------------- 68000 CPU ----------------------- From 03d5f5105c19b1fae96ebcf803ef3b2f27575aa1 Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 10 Jul 2020 17:53:32 +0200 Subject: [PATCH 0339/1110] libretro, build fixes --- Makefile | 21 +--- Makefile.libretro | 194 +++++++++---------------------------- pico/pico_int.h | 2 + platform/gizmondo/Makefile | 3 + platform/gp2x/Makefile | 2 +- platform/pandora/Makefile | 2 +- tools/Makefile | 7 +- 7 files changed, 63 insertions(+), 168 deletions(-) diff --git a/Makefile b/Makefile index a06b8189..52382d4f 100644 --- a/Makefile +++ b/Makefile @@ -24,7 +24,6 @@ endif ifndef NO_ALIGN_FUNCTIONS CFLAGS += -falign-functions=2 endif -LDFLAGS += -Wl,--gc-sections # profiling pprof ?= 0 @@ -64,27 +63,13 @@ asm_cdmemory ?= 1 asm_mix ?= 1 asm_32xdraw ?= 1 asm_32xmemory ?= 1 -else ifneq (,$(findstring 86,$(ARCH))) -use_fame ?= 1 -use_cz80 ?= 1 -use_sh2drc ?= 1 -else ifneq (,$(findstring mips,$(ARCH))) -use_fame ?= 1 -use_cz80 ?= 1 -use_sh2drc ?= 1 -else ifneq (,$(findstring aarch64,$(ARCH))) -use_fame ?= 1 -use_cz80 ?= 1 -use_sh2drc ?= 1 -else ifneq (,$(findstring riscv,$(ARCH))) -use_fame ?= 1 -use_cz80 ?= 1 -use_sh2drc ?= 1 -else ifneq (,$(findstring powerpc,$(ARCH))) +else use_fame ?= 1 use_cz80 ?= 1 +ifneq (,$(filter "$(ARCH)","x86" "i386" "mips" "aarch64" "riscv" "powerpc")) use_sh2drc ?= 1 endif +endif -include Makefile.local diff --git a/Makefile.libretro b/Makefile.libretro index b227f17f..d288087d 100644 --- a/Makefile.libretro +++ b/Makefile.libretro @@ -8,10 +8,6 @@ ifeq ($(platform),) platform = win else ifneq ($(findstring Darwin,$(shell uname -a)),) platform = osx - arch = intel - ifeq ($(shell uname -p),powerpc) - arch = ppc - endif else ifneq ($(findstring win,$(shell uname -a)),) platform = win endif @@ -31,15 +27,6 @@ ifneq ($(GIT_VERSION),"unknown") CFLAGS += -DGIT_VERSION=\"$(GIT_VERSION)\" endif -asm_memory = 0 -asm_render = 0 -asm_ym2612 = 0 -asm_misc = 0 -asm_cdmemory = 0 -asm_mix = 0 -asm_32xdraw = 0 -asm_32xmemory = 0 - fpic := ifeq ($(STATIC_LINKING),1) @@ -54,7 +41,6 @@ ifeq ($(platform), unix) SHARED := -shared DONT_COMPILE_IN_ZLIB = 1 CFLAGS += -DFAMEC_NO_GOTOS - use_sh2drc = 1 # Portable Linux else ifeq ($(platform), linux-portable) @@ -65,7 +51,6 @@ else ifeq ($(platform), linux-portable) LIBM := DONT_COMPILE_IN_ZLIB = 1 CFLAGS += -DFAMEC_NO_GOTOS - use_sh2drc = 1 # OS X else ifeq ($(platform), osx) @@ -74,14 +59,8 @@ else ifeq ($(platform), osx) SHARED := -dynamiclib fpic := -fPIC APPLE := 1 - arch = intel ifeq ($(shell uname -p),powerpc) - arch = ppc - endif - ifeq ($(arch),ppc) CFLAGS += -DBLARGG_BIG_ENDIAN=1 -D__ppc__ -DFAMEC_NO_GOTOS - else - use_sh2drc = 1 endif OSXVER = `sw_vers -productVersion | cut -d. -f 2` OSX_LT_MAVERICKS = `(( $(OSXVER) <= 9)) && echo "YES"` @@ -103,15 +82,8 @@ else ifeq ($(platform), staticios) CXX += -miphoneos-version-min=8.0 CC_AS += -miphoneos-version-min=8.0 CFLAGS += -miphoneos-version-min=8.0 - ARCH := aarch64 STATIC_LINKING = 1 - use_cyclone = 0 - use_fame = 1 - use_drz80 = 0 - use_cz80 = 1 - use_sh2drc = 1 - use_svpdrc = 0 # iOS else ifneq (,$(findstring ios,$(platform))) @@ -139,15 +111,8 @@ else CXX += -miphoneos-version-min=5.0 CC_AS += -miphoneos-version-min=5.0 CFLAGS += -miphoneos-version-min=5.0 + use_svpdrc = 0 endif - ARCH := arm - - use_cyclone = 0 - use_fame = 1 - use_drz80 = 0 - use_cz80 = 1 - use_sh2drc = 1 - use_svpdrc = 1 # PS3 else ifeq ($(platform), ps3) @@ -159,14 +124,9 @@ else ifeq ($(platform), ps3) NO_MMAP = 1 DONT_COMPILE_IN_ZLIB = 1 - use_cyclone = 0 - use_fame = 1 - use_drz80 = 0 - use_cz80 = 1 - use_sh2drc = 1 - # sncps3 else ifeq ($(platform), sncps3) + ARCH = powerpc TARGET := $(TARGET_NAME)_libretro_ps3.a CC = $(CELL_SDK)/host-win32/sn/bin/ps3ppusnc.exe AR = $(CELL_SDK)/host-win32/sn/bin/ps3snarl.exe @@ -175,12 +135,6 @@ else ifeq ($(platform), sncps3) NO_MMAP = 1 DONT_COMPILE_IN_ZLIB = 1 - use_cyclone = 0 - use_fame = 1 - use_drz80 = 0 - use_cz80 = 1 - use_sh2drc = 1 - # Lightweight PS3 Homebrew SDK else ifeq ($(platform), psl1ght) TARGET := $(TARGET_NAME)_libretro_$(platform).a @@ -191,74 +145,45 @@ else ifeq ($(platform), psl1ght) NO_MMAP = 1 DONT_COMPILE_IN_ZLIB = 1 - use_cyclone = 0 - use_fame = 1 - use_drz80 = 0 - use_cz80 = 1 - use_sh2drc = 1 - # PSP else ifeq ($(platform), psp1) - TARGET := $(TARGET_NAME)_libretro_$(platform).a - CC = psp-gcc$(EXE_EXT) - AR = psp-ar$(EXE_EXT) - CFLAGS += -G0 -ftracer - CFLAGS += -DPSP - STATIC_LINKING = 1 - NO_MMAP = 1 - DONT_COMPILE_IN_ZLIB = 1 + TARGET := $(TARGET_NAME)_libretro_$(platform).a + CC = psp-gcc$(EXE_EXT) + AR = psp-ar$(EXE_EXT) + CFLAGS += -G0 -ftracer + CFLAGS += -DPSP + STATIC_LINKING = 1 + NO_MMAP = 1 + DONT_COMPILE_IN_ZLIB = 1 - asm_render = 1 - use_cyclone = 0 - use_fame = 1 - use_drz80 = 0 - use_cz80 = 1 - use_sh2drc = 1 + asm_render = 1 # CTR (3DS) else ifeq ($(platform), ctr) - TARGET := $(TARGET_NAME)_libretro_$(platform).a - CC = $(DEVKITARM)/bin/arm-none-eabi-gcc$(EXE_EXT) - CXX = $(DEVKITARM)/bin/arm-none-eabi-g++$(EXE_EXT) - AR = $(DEVKITARM)/bin/arm-none-eabi-ar$(EXE_EXT) - CFLAGS += -DARM11 -D_3DS - CFLAGS += -march=armv6k -mtune=mpcore -mfloat-abi=hard -marm -mfpu=vfp - CFLAGS += -Wall -mword-relocations - CFLAGS += -fomit-frame-pointer -ffast-math - STATIC_LINKING = 1 - NO_MMAP = 1 - DONT_COMPILE_IN_ZLIB = 1 - ARCH = arm - ARM_ASM = 1 + TARGET := $(TARGET_NAME)_libretro_$(platform).a + CC = $(DEVKITARM)/bin/arm-none-eabi-gcc$(EXE_EXT) + CXX = $(DEVKITARM)/bin/arm-none-eabi-g++$(EXE_EXT) + AR = $(DEVKITARM)/bin/arm-none-eabi-ar$(EXE_EXT) + CFLAGS += -DARM11 -D_3DS + CFLAGS += -march=armv6k -mtune=mpcore -mfloat-abi=hard -marm -mfpu=vfp + CFLAGS += -Wall -mword-relocations + CFLAGS += -fomit-frame-pointer -ffast-math + STATIC_LINKING = 1 + NO_MMAP = 1 + DONT_COMPILE_IN_ZLIB = 1 - use_cyclone = 1 - use_fame = 0 - use_drz80 = 1 - use_cz80 = 0 - use_sh2drc = 1 - use_svpdrc = 1 - - OBJS +=platform/libretro/3ds/3ds_utils.o + OBJS +=platform/libretro/3ds/3ds_utils.o # Raspberry Pi (original model) Raspbian else ifeq ($(platform), raspberrypi) - CFLAGS += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6j - CFLAGS += -Wall -mword-relocations - CFLAGS += -fomit-frame-pointer -ffast-math - ARCH = arm - ARM_ASM = 1 + CFLAGS += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6j + CFLAGS += -Wall -mword-relocations + CFLAGS += -fomit-frame-pointer -ffast-math - TARGET := $(TARGET_NAME)_libretro.so - SHARED := -shared - fpic := -fPIC - DONT_COMPILE_IN_ZLIB = 1 - - use_cyclone = 1 - use_fame = 0 - use_drz80 = 1 - use_cz80 = 0 - use_sh2drc = 1 - use_svpdrc = 1 + TARGET := $(TARGET_NAME)_libretro.so + SHARED := -shared + fpic := -fPIC + DONT_COMPILE_IN_ZLIB = 1 # Vita else ifeq ($(platform), vita) @@ -273,15 +198,6 @@ else ifeq ($(platform), vita) STATIC_LINKING = 1 NO_MMAP = 1 DONT_COMPILE_IN_ZLIB = 1 - ARCH = arm - ARM_ASM = 1 - - use_cyclone = 1 - use_fame = 0 - use_drz80 = 1 - use_cz80 = 0 - use_sh2drc = 1 - use_svpdrc = 1 # Xbox 360 else ifeq ($(platform), xenon) @@ -313,14 +229,6 @@ else ifeq ($(platform), qnx) CFLAGS += -DBASE_ADDR_FIXED=0 -D__BLACKBERRY_QNX__ -marm -mcpu=cortex-a9 -mtune=cortex-a9 -mfpu=neon -mfloat-abi=softfp ASFLAGS += -mcpu=cortex-a9 -mfpu=neon -mfloat-abi=softfp ARCH = arm - ARM_ASM = 1 - - use_cyclone = 0 - use_fame = 1 - use_drz80 = 0 - use_cz80 = 1 - use_sh2drc = 1 - use_svpdrc = 1 # ARM else ifneq (,$(findstring armv,$(platform))) @@ -353,21 +261,16 @@ else ifneq (,$(findstring armv,$(platform))) CFLAGS += -mfloat-abi=hard ASFLAGS += -mfloat-abi=hard endif - ifneq (,$(findstring armasm,$(platform))) - ARM_ASM = 1 + ifeq (,$(findstring armasm,$(platform))) + NO_ARM_ASM = 1 endif - ARCH = arm # Emscripten else ifeq ($(platform), emscripten) TARGET := $(TARGET_NAME)_libretro_$(platform).bc - STATIC_LINKING = 1 + ARCH = unknown DONT_COMPILE_IN_ZLIB = 1 - use_cyclone = 0 - use_fame = 1 - use_drz80 = 0 - use_cz80 = 1 # GCW0 else ifeq ($(platform), gcw0) TARGET := $(TARGET_NAME)_libretro.so @@ -379,12 +282,6 @@ else ifeq ($(platform), gcw0) DONT_COMPILE_IN_ZLIB = 1 CFLAGS += -ffast-math -march=mips32 -mtune=mips32r2 -mhard-float - use_cyclone = 0 - use_fame = 1 - use_drz80 = 0 - use_cz80 = 1 - use_sh2drc = 1 - # Windows else TARGET := $(TARGET_NAME)_libretro.dll @@ -401,15 +298,20 @@ ifeq ($(NO_MMAP),1) CFLAGS += -DNO_MMAP endif -ifeq ($(ARM_ASM),1) -asm_memory = 1 -asm_render = 1 -asm_ym2612 = 1 -asm_misc = 1 -asm_cdmemory = 1 -asm_mix = 1 -asm_32xdraw = 1 -asm_32xmemory = 1 +ifeq ($(NO_ARM_ASM),1) +use_cyclone = 0 +use_fame ?= 1 +use_drz80 = 0 +use_cz80 ?= 1 + +asm_memory = 0 +asm_render = 0 +asm_ym2612 = 0 +asm_misc = 0 +asm_cdmemory = 0 +asm_mix = 0 +asm_32xdraw = 0 +asm_32xmemory = 0 endif CFLAGS += $(fpic) @@ -421,7 +323,7 @@ endif SHARED ?= -shared LDFLAGS += $(SHARED) $(fpic) -ifneq ($(ARCH), arm) +ifeq ($(ARCH),) ARCH = $(shell $(CC) -dumpmachine | awk -F '-' '{print $$1}') endif PLATFORM = libretro diff --git a/pico/pico_int.h b/pico/pico_int.h index 09cefdfd..78f32d0e 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -33,12 +33,14 @@ extern "C" { #endif +#ifdef UTYPES_DEFINED typedef uint8_t u8; typedef int8_t s8; typedef uint16_t u16; typedef int16_t s16; typedef uint32_t u32; typedef int32_t s32; +#endif typedef uintptr_t uptr; // unsigned pointer-sized int // ----------------------- 68000 CPU ----------------------- diff --git a/platform/gizmondo/Makefile b/platform/gizmondo/Makefile index 7df468d6..7228be56 100644 --- a/platform/gizmondo/Makefile +++ b/platform/gizmondo/Makefile @@ -98,6 +98,9 @@ ifeq "$(profile)" "1" endif +../../tools/textfilter: ../../tools/textfilter.c + make -C ../../tools/ textfilter + readme.txt: ../../tools/textfilter ../base_readme.txt ../../tools/textfilter ../base_readme.txt $@ GIZ diff --git a/platform/gp2x/Makefile b/platform/gp2x/Makefile index e7b4326b..ced50a4f 100644 --- a/platform/gp2x/Makefile +++ b/platform/gp2x/Makefile @@ -11,7 +11,7 @@ endif all: rel ../../tools/textfilter: ../../tools/textfilter.c - make -C ../../tools/ + make -C ../../tools/ textfilter readme.txt: ../../tools/textfilter ../base_readme.txt ../../ChangeLog ../../tools/textfilter ../base_readme.txt $@ GP2X diff --git a/platform/pandora/Makefile b/platform/pandora/Makefile index a181e098..31c10408 100644 --- a/platform/pandora/Makefile +++ b/platform/pandora/Makefile @@ -13,7 +13,7 @@ PND_MAKE ?= $(HOME)/dev/pnd/src/pandora-libraries/testdata/scripts/pnd_make.sh all: rel ../../tools/textfilter: ../../tools/textfilter.c - make -C ../../tools/ + make -C ../../tools/ textfilter #readme.txt: ../../tools/textfilter ../base_readme.txt ../../ChangeLog # ../../tools/textfilter ../base_readme.txt $@ PANDORA diff --git a/tools/Makefile b/tools/Makefile index 41c4d3d6..f8e93881 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -1,9 +1,12 @@ TARGETS = amalgamate textfilter -OBJS = $(addsuffix .o,$(TARGETS)) +HOSTCC ?= cc -all: $(TARGETS) +all: CC="$(XCC)" CFLAGS="$(XCFLAGS)" sh ./mkoffsets.sh ../pico +$(TARGETS): $(addsuffix .c,$(TARGETS)) + $(HOSTCC) -o $@ -O $@.c + clean: $(RM) $(TARGETS) $(OBJS) From 93c08696d74a7fbc8609f24574dbd57472e27c5b Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 10 Jul 2020 23:40:35 +0200 Subject: [PATCH 0340/1110] libretro, build fixes --- Makefile | 2 +- pico/pico_int.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 52382d4f..497a58a0 100644 --- a/Makefile +++ b/Makefile @@ -66,7 +66,7 @@ asm_32xmemory ?= 1 else use_fame ?= 1 use_cz80 ?= 1 -ifneq (,$(filter "$(ARCH)","x86" "i386" "mips" "aarch64" "riscv" "powerpc")) +ifneq (,$(filter x86% i386% mips% aarch% riscv% powerpc% ppc%, $(ARCH))) use_sh2drc ?= 1 endif endif diff --git a/pico/pico_int.h b/pico/pico_int.h index 78f32d0e..da063bc9 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -33,7 +33,7 @@ extern "C" { #endif -#ifdef UTYPES_DEFINED +#ifndef UTYPES_DEFINED typedef uint8_t u8; typedef int8_t s8; typedef uint16_t u16; From 18538b2ce893eb77d31ad16b4a6b458dffbc73f4 Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 11 Jul 2020 23:54:53 +0200 Subject: [PATCH 0341/1110] core, keep offsets header from being build if no preprocessed asm files --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 497a58a0..466e5d3d 100644 --- a/Makefile +++ b/Makefile @@ -211,7 +211,7 @@ LDFLAGS += -Wl,-Map=$(TARGET).map endif -target_: pico/pico_int_offs.h $(TARGET) +target_: $(TARGET) clean: $(RM) $(TARGET) $(OBJS) pico/pico_int_offs.h From 35984c2198084b4e6566f3f0a732928d47962a6e Mon Sep 17 00:00:00 2001 From: kub Date: Mon, 13 Jul 2020 07:20:04 +0200 Subject: [PATCH 0342/1110] libretro, build fixes for ios --- Makefile | 2 +- Makefile.libretro | 2 +- pico/carthw/svp/compiler.c | 4 +-- tools/Makefile | 6 ++++- tools/offsets/generic32-offsets.h | 39 ++++++++++++++++++++++++++++++ tools/offsets/generic64-offsets.h | 39 ++++++++++++++++++++++++++++++ tools/offsets/genericn32-offsets.h | 39 ++++++++++++++++++++++++++++++ tools/offsets/ios9-offsets.h | 1 + 8 files changed, 127 insertions(+), 5 deletions(-) create mode 100644 tools/offsets/generic32-offsets.h create mode 100644 tools/offsets/generic64-offsets.h create mode 100644 tools/offsets/genericn32-offsets.h create mode 120000 tools/offsets/ios9-offsets.h diff --git a/Makefile b/Makefile index 466e5d3d..d5953bef 100644 --- a/Makefile +++ b/Makefile @@ -228,7 +228,7 @@ pprof: platform/linux/pprof.c $(CC) $(CFLAGS) -O2 -ggdb -DPPROF -DPPROF_TOOL -I../../ -I. $^ -o $@ $(LDFLAGS) $(LDLIBS) pico/pico_int_offs.h: tools/mkoffsets.sh - make -C tools/ XCC="$(CC)" XCFLAGS="$(CFLAGS)" + make -C tools/ XCC="$(CC)" XCFLAGS="$(CFLAGS)" XPLATFORM="$(platform)" .s.o: $(CC) $(CFLAGS) -c $< -o $@ diff --git a/Makefile.libretro b/Makefile.libretro index d288087d..6c63dba2 100644 --- a/Makefile.libretro +++ b/Makefile.libretro @@ -111,7 +111,6 @@ else CXX += -miphoneos-version-min=5.0 CC_AS += -miphoneos-version-min=5.0 CFLAGS += -miphoneos-version-min=5.0 - use_svpdrc = 0 endif # PS3 @@ -303,6 +302,7 @@ use_cyclone = 0 use_fame ?= 1 use_drz80 = 0 use_cz80 ?= 1 +use_svpdrc = 0 asm_memory = 0 asm_render = 0 diff --git a/pico/carthw/svp/compiler.c b/pico/carthw/svp/compiler.c index df051e47..65d1a194 100644 --- a/pico/carthw/svp/compiler.c +++ b/pico/carthw/svp/compiler.c @@ -693,9 +693,9 @@ static int tr_aop_ssp2arm(int op) /* spacial version of call for calling C needed on ios, since we use r9.. */ static void emith_call_c_func(void *target) { - EOP_STMFD_SP(A_R7M|A_R9M); + EOP_STMFD_SP(M2(7,9)); emith_call(target); - EOP_LDMFD_SP(A_R7M|A_R9M); + EOP_LDMFD_SP(M2(7,9)); } #else #define emith_call_c_func emith_call diff --git a/tools/Makefile b/tools/Makefile index f8e93881..b20bc20f 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -2,7 +2,11 @@ TARGETS = amalgamate textfilter HOSTCC ?= cc all: - CC="$(XCC)" CFLAGS="$(XCFLAGS)" sh ./mkoffsets.sh ../pico + if [ -f "offsets/$(XPLATFORM)-offsets.h" ]; then \ + ln -sf "../tools/offsets/$(XPLATFORM)-offsets.h" ../pico/pico_int_offs.h; \ + else \ + CC="$(XCC)" CFLAGS="$(XCFLAGS)" sh ./mkoffsets.sh ../pico; \ + fi $(TARGETS): $(addsuffix .c,$(TARGETS)) $(HOSTCC) -o $@ -O $@.c diff --git a/tools/offsets/generic32-offsets.h b/tools/offsets/generic32-offsets.h new file mode 100644 index 00000000..d0a33a66 --- /dev/null +++ b/tools/offsets/generic32-offsets.h @@ -0,0 +1,39 @@ +/* autogenerated by mkoffset.sh, do not edit */ +/* target endianess: le, compiled with: mipsel-linux-gnu-gcc -mabi=32 */ +#define OFS_Pico_video_reg 0x0000 +#define OFS_Pico_m_rotate 0x0040 +#define OFS_Pico_m_z80Run 0x0041 +#define OFS_Pico_m_dirtyPal 0x0046 +#define OFS_Pico_m_hardware 0x0047 +#define OFS_Pico_m_z80_reset 0x004f +#define OFS_Pico_m_sram_reg 0x0049 +#define OFS_Pico_sv 0x008c +#define OFS_Pico_sv_data 0x008c +#define OFS_Pico_sv_start 0x0090 +#define OFS_Pico_sv_end 0x0094 +#define OFS_Pico_sv_flags 0x0098 +#define OFS_Pico_rom 0x0554 +#define OFS_Pico_romsize 0x0558 +#define OFS_Pico_est 0x00c8 +#define OFS_EST_DrawScanline 0x0000 +#define OFS_EST_rendstatus 0x0004 +#define OFS_EST_DrawLineDest 0x0008 +#define OFS_EST_HighCol 0x000c +#define OFS_EST_HighPreSpr 0x0010 +#define OFS_EST_Pico 0x0014 +#define OFS_EST_PicoMem_vram 0x0018 +#define OFS_EST_PicoMem_cram 0x001c +#define OFS_EST_PicoOpt 0x0020 +#define OFS_EST_Draw2FB 0x0024 +#define OFS_EST_HighPal 0x0028 +#define OFS_PMEM_vram 0x10000 +#define OFS_PMEM_vsram 0x22100 +#define OFS_PMEM32x_pal_native 0x90e00 +#define OFS_SH2_is_slave 0x055c +#define OFS_SH2_p_bios 0x0080 +#define OFS_SH2_p_da 0x0084 +#define OFS_SH2_p_sdram 0x0088 +#define OFS_SH2_p_rom 0x008c +#define OFS_SH2_p_dram 0x0090 +#define OFS_SH2_p_drcblk_da 0x0094 +#define OFS_SH2_p_drcblk_ram 0x0098 diff --git a/tools/offsets/generic64-offsets.h b/tools/offsets/generic64-offsets.h new file mode 100644 index 00000000..8f56be03 --- /dev/null +++ b/tools/offsets/generic64-offsets.h @@ -0,0 +1,39 @@ +/* autogenerated by mkoffset.sh, do not edit */ +/* target endianess: le, compiled with: mipsel-linux-gnu-gcc -mabi=64 */ +#define OFS_Pico_video_reg 0x0000 +#define OFS_Pico_m_rotate 0x0040 +#define OFS_Pico_m_z80Run 0x0041 +#define OFS_Pico_m_dirtyPal 0x0046 +#define OFS_Pico_m_hardware 0x0047 +#define OFS_Pico_m_z80_reset 0x004f +#define OFS_Pico_m_sram_reg 0x0049 +#define OFS_Pico_sv 0x0090 +#define OFS_Pico_sv_data 0x0090 +#define OFS_Pico_sv_start 0x0098 +#define OFS_Pico_sv_end 0x009c +#define OFS_Pico_sv_flags 0x00a0 +#define OFS_Pico_rom 0x0588 +#define OFS_Pico_romsize 0x0590 +#define OFS_Pico_est 0x00d8 +#define OFS_EST_DrawScanline 0x0000 +#define OFS_EST_rendstatus 0x0004 +#define OFS_EST_DrawLineDest 0x0008 +#define OFS_EST_HighCol 0x0010 +#define OFS_EST_HighPreSpr 0x0018 +#define OFS_EST_Pico 0x0020 +#define OFS_EST_PicoMem_vram 0x0028 +#define OFS_EST_PicoMem_cram 0x0030 +#define OFS_EST_PicoOpt 0x0038 +#define OFS_EST_Draw2FB 0x0040 +#define OFS_EST_HighPal 0x0048 +#define OFS_PMEM_vram 0x10000 +#define OFS_PMEM_vsram 0x22100 +#define OFS_PMEM32x_pal_native 0x90e00 +#define OFS_SH2_is_slave 0x0a18 +#define OFS_SH2_p_bios 0x0098 +#define OFS_SH2_p_da 0x00a0 +#define OFS_SH2_p_sdram 0x00a8 +#define OFS_SH2_p_rom 0x00b0 +#define OFS_SH2_p_dram 0x00b8 +#define OFS_SH2_p_drcblk_da 0x00c0 +#define OFS_SH2_p_drcblk_ram 0x00c8 diff --git a/tools/offsets/genericn32-offsets.h b/tools/offsets/genericn32-offsets.h new file mode 100644 index 00000000..2ea64190 --- /dev/null +++ b/tools/offsets/genericn32-offsets.h @@ -0,0 +1,39 @@ +/* autogenerated by mkoffset.sh, do not edit */ +/* target endianess: le, compiled with: mipsel-linux-gnu-gcc -mabi=n32 */ +#define OFS_Pico_video_reg 0x0000 +#define OFS_Pico_m_rotate 0x0040 +#define OFS_Pico_m_z80Run 0x0041 +#define OFS_Pico_m_dirtyPal 0x0046 +#define OFS_Pico_m_hardware 0x0047 +#define OFS_Pico_m_z80_reset 0x004f +#define OFS_Pico_m_sram_reg 0x0049 +#define OFS_Pico_sv 0x008c +#define OFS_Pico_sv_data 0x008c +#define OFS_Pico_sv_start 0x0090 +#define OFS_Pico_sv_end 0x0094 +#define OFS_Pico_sv_flags 0x0098 +#define OFS_Pico_rom 0x0554 +#define OFS_Pico_romsize 0x0558 +#define OFS_Pico_est 0x00c8 +#define OFS_EST_DrawScanline 0x0000 +#define OFS_EST_rendstatus 0x0004 +#define OFS_EST_DrawLineDest 0x0008 +#define OFS_EST_HighCol 0x000c +#define OFS_EST_HighPreSpr 0x0010 +#define OFS_EST_Pico 0x0014 +#define OFS_EST_PicoMem_vram 0x0018 +#define OFS_EST_PicoMem_cram 0x001c +#define OFS_EST_PicoOpt 0x0020 +#define OFS_EST_Draw2FB 0x0024 +#define OFS_EST_HighPal 0x0028 +#define OFS_PMEM_vram 0x10000 +#define OFS_PMEM_vsram 0x22100 +#define OFS_PMEM32x_pal_native 0x90e00 +#define OFS_SH2_is_slave 0x055c +#define OFS_SH2_p_bios 0x0080 +#define OFS_SH2_p_da 0x0084 +#define OFS_SH2_p_sdram 0x0088 +#define OFS_SH2_p_rom 0x008c +#define OFS_SH2_p_dram 0x0090 +#define OFS_SH2_p_drcblk_da 0x0094 +#define OFS_SH2_p_drcblk_ram 0x0098 diff --git a/tools/offsets/ios9-offsets.h b/tools/offsets/ios9-offsets.h new file mode 120000 index 00000000..5ac5765f --- /dev/null +++ b/tools/offsets/ios9-offsets.h @@ -0,0 +1 @@ +generic32-offsets.h \ No newline at end of file From 182b8d01f9c7e304d303348a76c926f0d7ab43d0 Mon Sep 17 00:00:00 2001 From: twinaphex Date: Sun, 12 Jul 2020 13:58:17 +0200 Subject: [PATCH 0343/1110] Make sure function prototype signatures match, and put typedefs into separate header file --- cpu/sh2/sh2.h | 13 +++++++------ pico/pico_int.h | 46 ++++++++++++++++++---------------------------- pico/pico_types.h | 16 ++++++++++++++++ 3 files changed, 41 insertions(+), 34 deletions(-) create mode 100644 pico/pico_types.h diff --git a/cpu/sh2/sh2.h b/cpu/sh2/sh2.h index b9267d74..7177b754 100644 --- a/cpu/sh2/sh2.h +++ b/cpu/sh2/sh2.h @@ -1,6 +1,7 @@ #ifndef __SH2_H__ #define __SH2_H__ +#include "../../pico/pico_types.h" #include "../../pico/pico_port.h" // registers - matches structure order @@ -129,12 +130,12 @@ static __inline int sh2_execute(SH2 *sh2, int cycles) // pico memhandlers // XXX: move somewhere else -unsigned int REGPARM(2) p32x_sh2_read8(unsigned int a, SH2 *sh2); -unsigned int REGPARM(2) p32x_sh2_read16(unsigned int a, SH2 *sh2); -unsigned int REGPARM(2) p32x_sh2_read32(unsigned int a, SH2 *sh2); -void REGPARM(3) p32x_sh2_write8 (unsigned int a, unsigned int d, SH2 *sh2); -void REGPARM(3) p32x_sh2_write16(unsigned int a, unsigned int d, SH2 *sh2); -void REGPARM(3) p32x_sh2_write32(unsigned int a, unsigned int d, SH2 *sh2); +unsigned int REGPARM(2) p32x_sh2_read8(u32 a, SH2 *sh2); +unsigned int REGPARM(2) p32x_sh2_read16(u32 a, SH2 *sh2); +unsigned int REGPARM(2) p32x_sh2_read32(u32 a, SH2 *sh2); +void REGPARM(3) p32x_sh2_write8 (u32 a, u32 d, SH2 *sh2); +void REGPARM(3) p32x_sh2_write16(u32 a, u32 d, SH2 *sh2); +void REGPARM(3) p32x_sh2_write32(u32 a, u32 d, SH2 *sh2); // debug #ifdef DRC_CMP diff --git a/pico/pico_int.h b/pico/pico_int.h index da063bc9..8757f7ce 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -9,7 +9,6 @@ #ifndef PICO_INTERNAL_INCLUDED #define PICO_INTERNAL_INCLUDED - #include #include #include "pico_port.h" @@ -32,16 +31,7 @@ extern "C" { #endif - -#ifndef UTYPES_DEFINED -typedef uint8_t u8; -typedef int8_t s8; -typedef uint16_t u16; -typedef int16_t s16; -typedef uint32_t u32; -typedef int32_t s32; -#endif -typedef uintptr_t uptr; // unsigned pointer-sized int +#include "pico_types.h" // ----------------------- 68000 CPU ----------------------- #ifdef EMU_C68K @@ -965,22 +955,22 @@ void p32x_schedule_hint(SH2 *sh2, unsigned int m68k_cycles); // 32x/memory.c extern struct Pico32xMem *Pico32xMem; -unsigned int PicoRead8_32x(unsigned int a); -unsigned int PicoRead16_32x(unsigned int a); -void PicoWrite8_32x(unsigned int a, unsigned int d); -void PicoWrite16_32x(unsigned int a, unsigned int d); +unsigned int PicoRead8_32x(u32 a); +unsigned int PicoRead16_32x(u32 a); +void PicoWrite8_32x(u32 a, u32 d); +void PicoWrite16_32x(u32 a, u32 d); void PicoMemSetup32x(void); void Pico32xSwapDRAM(int b); void Pico32xMemStateLoaded(void); void p32x_update_banks(void); -void p32x_m68k_poll_event(unsigned int flags); -unsigned int REGPARM(3) p32x_sh2_poll_memory8(unsigned int a, unsigned int d, SH2 *sh2); -unsigned int REGPARM(3) p32x_sh2_poll_memory16(unsigned int a, unsigned int d, SH2 *sh2); -unsigned int REGPARM(3) p32x_sh2_poll_memory32(unsigned int a, unsigned int d, SH2 *sh2); +void p32x_m68k_poll_event(u32 flags); +unsigned int REGPARM(3) p32x_sh2_poll_memory8(u32 a, u32 d, SH2 *sh2); +unsigned int REGPARM(3) p32x_sh2_poll_memory16(u32 a, u32 d, SH2 *sh2); +unsigned int REGPARM(3) p32x_sh2_poll_memory32(u32 a, u32 d, SH2 *sh2); void *p32x_sh2_get_mem_ptr(unsigned int a, unsigned int *mask, SH2 *sh2); -void p32x_sh2_poll_detect(unsigned int a, SH2 *sh2, unsigned int flags, int maxcnt); -void p32x_sh2_poll_event(SH2 *sh2, unsigned int flags, unsigned int m68k_cycles); -int p32x_sh2_memcpy(unsigned int dst, unsigned int src, int count, int size, SH2 *sh2); +void p32x_sh2_poll_detect(u32 a, SH2 *sh2, u32 flags, int maxcnt); +void p32x_sh2_poll_event(SH2 *sh2, u32 flags, u32 m68k_cycles); +int p32x_sh2_memcpy(u32 dst, u32 src, int count, int size, SH2 *sh2); // 32x/draw.c void PicoDrawSetOutFormat32x(pdso_t which, int use_32x_line_mode); @@ -1016,12 +1006,12 @@ void p32x_dreq1_trigger(void); void p32x_timers_recalc(void); void p32x_timer_do(SH2 *sh2, unsigned int m68k_slice); void sh2_peripheral_reset(SH2 *sh2); -unsigned int REGPARM(2) sh2_peripheral_read8(unsigned int a, SH2 *sh2); -unsigned int REGPARM(2) sh2_peripheral_read16(unsigned int a, SH2 *sh2); -unsigned int REGPARM(2) sh2_peripheral_read32(unsigned int a, SH2 *sh2); -void REGPARM(3) sh2_peripheral_write8(unsigned int a, unsigned int d, SH2 *sh2); -void REGPARM(3) sh2_peripheral_write16(unsigned int a, unsigned int d, SH2 *sh2); -void REGPARM(3) sh2_peripheral_write32(unsigned int a, unsigned int d, SH2 *sh2); +u32 REGPARM(2) sh2_peripheral_read8(u32 a, SH2 *sh2); +u32 REGPARM(2) sh2_peripheral_read16(u32 a, SH2 *sh2); +u32 REGPARM(2) sh2_peripheral_read32(u32 a, SH2 *sh2); +void REGPARM(3) sh2_peripheral_write8(u32 a, u32 d, SH2 *sh2); +void REGPARM(3) sh2_peripheral_write16(u32 a, u32 d, SH2 *sh2); +void REGPARM(3) sh2_peripheral_write32(u32 a, u32 d, SH2 *sh2); #else #define Pico32xInit() diff --git a/pico/pico_types.h b/pico/pico_types.h new file mode 100644 index 00000000..c1a7db8a --- /dev/null +++ b/pico/pico_types.h @@ -0,0 +1,16 @@ +#ifndef PICO_TYPES +#define PICO_TYPES + +#include + +#ifndef UTYPES_DEFINED +typedef uint8_t u8; +typedef int8_t s8; +typedef uint16_t u16; +typedef int16_t s16; +typedef uint32_t u32; +typedef int32_t s32; +#endif +typedef uintptr_t uptr; /* unsigned pointer-sized int */ + +#endif From e6a52e1940bb0402b85d1b72a0835c7ea83ff1e0 Mon Sep 17 00:00:00 2001 From: twinaphex Date: Sun, 12 Jul 2020 19:10:14 +0200 Subject: [PATCH 0344/1110] Prevent collission with PS2 SDK --- pico/pico_types.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pico/pico_types.h b/pico/pico_types.h index c1a7db8a..c5ea8098 100644 --- a/pico/pico_types.h +++ b/pico/pico_types.h @@ -3,6 +3,7 @@ #include +#ifndef __TAMTYPES_H__ #ifndef UTYPES_DEFINED typedef uint8_t u8; typedef int8_t s8; @@ -11,6 +12,8 @@ typedef int16_t s16; typedef uint32_t u32; typedef int32_t s32; #endif +#endif + typedef uintptr_t uptr; /* unsigned pointer-sized int */ #endif From 1bee714816d7676194b8d6b2a9d04f75fbc3c637 Mon Sep 17 00:00:00 2001 From: twinaphex Date: Mon, 13 Jul 2020 01:33:41 +0200 Subject: [PATCH 0345/1110] Fix more conflicting types for prototypes --- pico/pico_int.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pico/pico_int.h b/pico/pico_int.h index 8757f7ce..7d69bcab 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -955,8 +955,8 @@ void p32x_schedule_hint(SH2 *sh2, unsigned int m68k_cycles); // 32x/memory.c extern struct Pico32xMem *Pico32xMem; -unsigned int PicoRead8_32x(u32 a); -unsigned int PicoRead16_32x(u32 a); +u32 PicoRead8_32x(u32 a); +u32 PicoRead16_32x(u32 a); void PicoWrite8_32x(u32 a, u32 d); void PicoWrite16_32x(u32 a, u32 d); void PicoMemSetup32x(void); @@ -964,10 +964,10 @@ void Pico32xSwapDRAM(int b); void Pico32xMemStateLoaded(void); void p32x_update_banks(void); void p32x_m68k_poll_event(u32 flags); -unsigned int REGPARM(3) p32x_sh2_poll_memory8(u32 a, u32 d, SH2 *sh2); -unsigned int REGPARM(3) p32x_sh2_poll_memory16(u32 a, u32 d, SH2 *sh2); -unsigned int REGPARM(3) p32x_sh2_poll_memory32(u32 a, u32 d, SH2 *sh2); -void *p32x_sh2_get_mem_ptr(unsigned int a, unsigned int *mask, SH2 *sh2); +u32 REGPARM(3) p32x_sh2_poll_memory8(u32 a, u32 d, SH2 *sh2); +u32 REGPARM(3) p32x_sh2_poll_memory16(u32 a, u32 d, SH2 *sh2); +u32 REGPARM(3) p32x_sh2_poll_memory32(u32 a, u32 d, SH2 *sh2); +void *p32x_sh2_get_mem_ptr(u32 a, u32 *mask, SH2 *sh2); void p32x_sh2_poll_detect(u32 a, SH2 *sh2, u32 flags, int maxcnt); void p32x_sh2_poll_event(SH2 *sh2, u32 flags, u32 m68k_cycles); int p32x_sh2_memcpy(u32 dst, u32 src, int count, int size, SH2 *sh2); From 9257c0c5c8899da74c0e3d100325efb6c3d0f135 Mon Sep 17 00:00:00 2001 From: twinaphex Date: Mon, 13 Jul 2020 11:59:10 +0200 Subject: [PATCH 0346/1110] Buildfix --- cpu/sh2/sh2.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpu/sh2/sh2.h b/cpu/sh2/sh2.h index 7177b754..eab52686 100644 --- a/cpu/sh2/sh2.h +++ b/cpu/sh2/sh2.h @@ -130,9 +130,9 @@ static __inline int sh2_execute(SH2 *sh2, int cycles) // pico memhandlers // XXX: move somewhere else -unsigned int REGPARM(2) p32x_sh2_read8(u32 a, SH2 *sh2); -unsigned int REGPARM(2) p32x_sh2_read16(u32 a, SH2 *sh2); -unsigned int REGPARM(2) p32x_sh2_read32(u32 a, SH2 *sh2); +u32 REGPARM(2) p32x_sh2_read8(u32 a, SH2 *sh2); +u32 REGPARM(2) p32x_sh2_read16(u32 a, SH2 *sh2); +u32 REGPARM(2) p32x_sh2_read32(u32 a, SH2 *sh2); void REGPARM(3) p32x_sh2_write8 (u32 a, u32 d, SH2 *sh2); void REGPARM(3) p32x_sh2_write16(u32 a, u32 d, SH2 *sh2); void REGPARM(3) p32x_sh2_write32(u32 a, u32 d, SH2 *sh2); From 48302a8a51414b4d28df8ec28a6a2bb3a12e74b2 Mon Sep 17 00:00:00 2001 From: twinaphex Date: Mon, 13 Jul 2020 21:29:19 +0200 Subject: [PATCH 0347/1110] Buildfix --- pico/carthw/carthw.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pico/carthw/carthw.h b/pico/carthw/carthw.h index 7303f60a..a1875b6b 100644 --- a/pico/carthw/carthw.h +++ b/pico/carthw/carthw.h @@ -1,5 +1,6 @@ /* svp */ +#include "../pico_types.h" #include "svp/ssp16.h" typedef struct { @@ -18,7 +19,7 @@ void PicoSVPMemSetup(void); extern int carthw_ssf2_active; extern unsigned char carthw_ssf2_banks[8]; void carthw_ssf2_startup(void); -void carthw_ssf2_write8(unsigned int a, unsigned int d); +void carthw_ssf2_write8(u32 a, u32 d); /* misc */ void carthw_Xin1_startup(void); From 368c9180500707088d62a591d30e106e05555c05 Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 14 Jul 2020 00:07:15 +0200 Subject: [PATCH 0348/1110] sh2 drc, optimize standard division insns (default off, needs more scrutiny) --- cpu/drc/emit_x86.c | 9 ++ cpu/sh2/compiler.c | 334 +++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 331 insertions(+), 12 deletions(-) diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index 60b2b6a2..bb514849 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -349,6 +349,15 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common } else emith_or_r_r_r(d, s1, s2); \ } while (0) +#define emith_eor_r_r_r_lsr(d, s1, s2, lsrimm) do { \ + if (lsrimm) { \ + int tmp_ = rcache_get_tmp(); \ + emith_lsr(tmp_, s2, lsrimm); \ + emith_eor_r_r_r(d, s1, tmp_); \ + rcache_free_tmp(tmp_); \ + } else emith_eor_r_r_r(d, s1, s2); \ +} while (0) + // _r_r_shift #define emith_or_r_r_lsl(d, s, lslimm) \ emith_or_r_r_r_lsl(d, d, s, lslimm) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 085a6179..3c62f13a 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -49,6 +49,7 @@ #define LOOP_DETECTION 1 #define LOOP_OPTIMIZER 1 #define T_OPTIMIZER 1 +#define DIV_OPTIMIZER 0 #define MAX_LITERAL_OFFSET 0x200 // max. MOVA, MOV @(PC) offset #define MAX_LOCAL_TARGETS (BLOCK_INSN_LIMIT / 4) @@ -152,9 +153,18 @@ enum op_types { OP_RTE, // RTE instruction OP_TRAPA, // TRAPA instruction OP_LDC, // LDC instruction + OP_DIV0, // DIV0[US] instruction OP_UNDEFINED, }; +struct div { + u32 state:1; // 0: expect DIV1/ROTCL, 1: expect DIV1 + u32 rn:5, rm:5, ro:5; // rn and rm for DIV1, ro for ROTCL + u32 div1:8, rotcl:8; // DIV1 count, ROTCL count +}; +union _div { u32 imm; struct div div; }; // XXX tut-tut type punning... +#define div(opd) ((union _div *)&((opd)->imm))->div + // XXX consider trap insns: OP_TRAPA, OP_UNDEFINED? #define OP_ISBRANCH(op) ((BITRANGE(OP_BRANCH, OP_BRANCH_RF)| BITMASK1(OP_RTE)) \ & BITMASK1(op)) @@ -2979,6 +2989,120 @@ static void emit_do_static_regs(int is_write, int tmpr) } } +#if DIV_OPTIMIZER +// divide operation replacement functions, called by compiled code. Only the +// 32:16 cases and the 64:32 cases described in the SH2 prog man are replaced. + +static uint32_t REGPARM(2) sh2_drc_divu32(uint32_t dv, uint32_t ds) +{ + if (ds && ds >= dv) { + // good case: no divide by 0, and no result overflow + uint32_t quot = dv / (ds>>16), rem = dv - (quot * (ds>>16)); + if (~quot&1) rem -= ds>>16; + return (uint16_t)quot | ((2*rem + (quot>>31)) << 16); + } else { + // bad case: use the sh2 algo to get the right result + int q = 0, t = 0, s = 16; + while (s--) { + uint32_t _ = dv>>31; + dv = (dv<<1) | t; + t = _; + _ = dv; + if (q) dv += ds, q = dv < _; + else dv -= ds, q = !(dv < _); + q ^= t, t = !q; + } + return (dv<<1) | t; + } +} + +static uint32_t REGPARM(3) sh2_drc_divu64(uint32_t dh, uint32_t *dl, uint32_t ds) +{ + if (ds > 1 && ds >= dh) { + // good case: no divide by 0, and no result overflow + uint64_t dv = *dl | ((uint64_t)dh << 32); + uint32_t quot = dv / ds, rem = dv - (quot * ds); + if (~quot&1) rem -= ds; + *dl = quot; + return rem; + } else { + // bad case: use the sh2 algo to get the right result + uint64_t dv = *dl | ((uint64_t)dh << 32); + int q = 0, t = 0, s = 32; + while (s--) { + uint64_t _ = dv>>63; + dv = (dv<<1) | t; + t = _; + _ = dv; + if (q) dv += ((uint64_t)ds << 32), q = dv < _; + else dv -= ((uint64_t)ds << 32), q = !(dv < _); + q ^= t, t = !q; + } + *dl = (dv<<1) | t; + return (dv>>32); + } +} + +static uint32_t REGPARM(2) sh2_drc_divs32(int32_t dv, int32_t ds) +{ + uint32_t adv = abs(dv), ads = abs(ds)>>16; + if (ads > 1 && ads > adv>>16 && (int32_t)ads > 0 && !(uint16_t)ds) { + // good case: no divide by 0, and no result overflow + uint32_t quot = adv / ads, rem = adv - (quot * ads); + int m1 = (rem ? dv^ds : ds) < 0; + if (rem && dv < 0) rem = (quot&1 ? -rem : +ads-rem); + else rem = (quot&1 ? +rem : -ads+rem); + quot = ((dv^ds)<0 ? -quot : +quot) - m1; + return (uint16_t)quot | ((2*rem + (quot>>31)) << 16); + } else { + // bad case: use the sh2 algo to get the right result + int m = (uint32_t)ds>>31, q = (uint32_t)dv>>31, t = m^q, s = 16; + while (s--) { + uint32_t _ = (uint32_t)dv>>31; + dv = (dv<<1) | t; + t = _; + _ = dv; + if (m^q) dv += ds, q = (uint32_t)dv < _; + else dv -= ds, q = !((uint32_t)dv < _); + q ^= m^t, t = !(m^q); + } + return (dv<<1) | t; + } +} + +static uint32_t REGPARM(3) sh2_drc_divs64(int32_t dh, uint32_t *dl, int32_t ds) +{ + int64_t _dv = *dl | ((int64_t)dh << 32); + uint64_t adv = (_dv < 0 ? -_dv : _dv); // llabs isn't in older toolchains + uint32_t ads = abs(ds); + if (ads > 1 && ads > adv>>32 && (int64_t)adv > 0) { + // good case: no divide by 0, and no result overflow + uint32_t quot = adv / ads, rem = adv - ((uint64_t)quot * ads); + int m1 = (rem ? dh^ds : ds) < 0; + if (rem && dh < 0) rem = (quot&1 ? -rem : +ads-rem); + else rem = (quot&1 ? +rem : -ads+rem); + quot = ((dh^ds)<0 ? -quot : +quot) - m1; + *dl = quot; + return rem; + } else { + // bad case: use the sh2 algo to get the right result + uint64_t dv = *dl | ((uint64_t)dh << 32); + int m = (uint32_t)ds>>31, q = (uint64_t)dv>>63, t = m^q, s = 32; + while (s--) { + int64_t _ = (uint64_t)dv>>63; + dv = (dv<<1) | t; + t = _; + _ = dv; + if (m^q) dv += ((uint64_t)ds << 32), q = dv < _; + else dv -= ((uint64_t)ds << 32), q = !(dv < _); + q ^= m^t, t = !(m^q); + } + *dl = (dv<<1) | t; + return (dv>>32); + } +} +#endif + // block local link stuff struct linkage { u32 pc; @@ -3115,6 +3239,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) u16 *dr_pc_base; struct op_data *opd; int blkid_main = 0; + int skip_op = 0; int tmp, tmp2; int cycles; int i, v; @@ -3486,6 +3611,10 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) #if (DRC_DEBUG & 2) insns_compiled++; #endif + if (skip_op > 0) { + skip_op--; + continue; + } if (op_flags[i] & OF_DELAY_OP) { @@ -3772,6 +3901,60 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_invalidate_t(); emith_bic_r_imm(sr, M|Q|T); drcf.Mflag = FLG_0; +#if DIV_OPTIMIZER + if (div(opd).div1 == 16 && div(opd).ro == div(opd).rn) { + // divide 32/16 + rcache_get_reg_arg(0, div(opd).rn, NULL); + rcache_get_reg_arg(1, div(opd).rm, NULL); + rcache_invalidate_tmp(); + emith_call(sh2_drc_divu32); + tmp = rcache_get_tmp_ret(); +#if REMAP_REGISTER + tmp2 = rcache_map_reg(div(opd).rn, tmp); +#else + tmp2 = rcache_get_reg(div(opd).rn, RC_GR_WRITE, NULL); +#endif + if (tmp != tmp2) + emith_move_r_r(tmp2, tmp); + + tmp3 = rcache_get_tmp(); + emith_and_r_r_imm(tmp3, tmp2, 1); // Q = !Rn[0] + emith_eor_r_r_imm(tmp3, tmp3, 1); + emith_or_r_r_lsl(sr, tmp3, Q_SHIFT); + rcache_free_tmp(tmp3); + emith_or_r_r_r_lsr(sr, sr, tmp2, 31); // T = Rn[31] + skip_op = div(opd).div1 + div(opd).rotcl; + } + else if (div(opd).div1 == 32 && div(opd).ro != div(opd).rn) { + // divide 64/32 + tmp4 = rcache_get_reg(div(opd).ro, RC_GR_READ, NULL); + emith_ctx_write(tmp4, offsetof(SH2, drc_tmp)); + tmp = rcache_get_tmp_arg(1); + emith_add_r_r_ptr_imm(tmp, CONTEXT_REG, offsetof(SH2, drc_tmp)); + rcache_get_reg_arg(0, div(opd).rn, NULL); + rcache_get_reg_arg(2, div(opd).rm, NULL); + rcache_invalidate_tmp(); + emith_call(sh2_drc_divu64); + tmp = rcache_get_tmp_ret(); +#if REMAP_REGISTER + tmp2 = rcache_map_reg(div(opd).rn, tmp); +#else + tmp2 = rcache_get_reg(div(opd).rn, RC_GR_WRITE, NULL); +#endif + tmp4 = rcache_get_reg(div(opd).ro, RC_GR_WRITE, NULL); + if (tmp != tmp2) + emith_move_r_r(tmp2, tmp); + emith_ctx_read(tmp4, offsetof(SH2, drc_tmp)); + + tmp3 = rcache_get_tmp(); + emith_and_r_r_imm(tmp3, tmp4, 1); // Q = !Ro[0] + emith_eor_r_r_imm(tmp3, tmp3, 1); + emith_or_r_r_lsl(sr, tmp3, Q_SHIFT); + rcache_free_tmp(tmp3); + emith_or_r_r_r_lsr(sr, sr, tmp4, 31); // T = Ro[31] + skip_op = div(opd).div1 + div(opd).rotcl; + } +#endif break; case 2: // MOVT Rn 0000nnnn00101001 sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); @@ -3837,19 +4020,82 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) goto end_op; case 0x07: // DIV0S Rm,Rn 0010nnnnmmmm0111 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); - tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); - tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); - tmp = rcache_get_tmp(); emith_invalidate_t(); emith_bic_r_imm(sr, M|Q|T); - emith_lsr(tmp, tmp2, 31); // Q = Nn - emith_or_r_r_lsl(sr, tmp, Q_SHIFT); - emith_lsr(tmp, tmp3, 31); // M = Nm - emith_or_r_r_lsl(sr, tmp, M_SHIFT); - emith_eor_r_r_lsr(tmp, tmp2, 31); - emith_or_r_r(sr, tmp); // T = Q^M - rcache_free(tmp); drcf.Mflag = FLG_UNKNOWN; +#if DIV_OPTIMIZER + if (div(opd).div1 == 16 && div(opd).ro == div(opd).rn) { + // divide 32/16 + rcache_get_reg_arg(0, div(opd).rn, NULL); + tmp2 = rcache_get_reg_arg(1, div(opd).rm, NULL); + tmp3 = rcache_get_tmp(); + emith_lsr(tmp3, tmp2, 31); + emith_or_r_r_lsl(sr, tmp3, M_SHIFT); // M = Rm[31] + rcache_invalidate_tmp(); + emith_call(sh2_drc_divs32); + tmp = rcache_get_tmp_ret(); +#if REMAP_REGISTER + tmp2 = rcache_map_reg(div(opd).rn, tmp); +#else + tmp2 = rcache_get_reg(div(opd).rn, RC_GR_WRITE, NULL); +#endif + if (tmp != tmp2) + emith_move_r_r(tmp2, tmp); + tmp3 = rcache_get_tmp(); + + emith_eor_r_r_r_lsr(tmp3, tmp2, sr, M_SHIFT); + emith_and_r_r_imm(tmp3, tmp3, 1); + emith_eor_r_r_imm(tmp3, tmp3, 1); + emith_or_r_r_lsl(sr, tmp3, Q_SHIFT); // Q = !Rn[0]^M + rcache_free_tmp(tmp3); + emith_or_r_r_r_lsr(sr, sr, tmp2, 31); // T = Rn[31] + skip_op = div(opd).div1 + div(opd).rotcl; + } + else if (div(opd).div1 == 32 && div(opd).ro != div(opd).rn) { + // divide 64/32 + tmp4 = rcache_get_reg(div(opd).ro, RC_GR_READ, NULL); + emith_ctx_write(tmp4, offsetof(SH2, drc_tmp)); + rcache_get_reg_arg(0, div(opd).rn, NULL); + tmp2 = rcache_get_reg_arg(2, div(opd).rm, NULL); + tmp3 = rcache_get_tmp_arg(1); + emith_lsr(tmp3, tmp2, 31); + emith_or_r_r_lsl(sr, tmp3, M_SHIFT); // M = Rm[31] + emith_add_r_r_ptr_imm(tmp3, CONTEXT_REG, offsetof(SH2, drc_tmp)); + rcache_invalidate_tmp(); + emith_call(sh2_drc_divs64); + tmp = rcache_get_tmp_ret(); +#if REMAP_REGISTER + tmp2 = rcache_map_reg(div(opd).rn, tmp); +#else + tmp2 = rcache_get_reg(div(opd).rn, RC_GR_WRITE, NULL); +#endif + tmp4 = rcache_get_reg(div(opd).ro, RC_GR_WRITE, NULL); + if (tmp != tmp2) + emith_move_r_r(tmp2, tmp); + emith_ctx_read(tmp4, offsetof(SH2, drc_tmp)); + + tmp3 = rcache_get_tmp(); + emith_eor_r_r_r_lsr(tmp3, tmp4, sr, M_SHIFT); + emith_and_r_r_imm(tmp3, tmp3, 1); + emith_eor_r_r_imm(tmp3, tmp3, 1); + emith_or_r_r_lsl(sr, tmp3, Q_SHIFT); // Q = !Ro[0]^M + rcache_free_tmp(tmp3); + emith_or_r_r_r_lsr(sr, sr, tmp4, 31); // T = Ro[31] + skip_op = div(opd).div1 + div(opd).rotcl; + } else +#endif + { + tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); + tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); + tmp = rcache_get_tmp(); + emith_lsr(tmp, tmp2, 31); // Q = Nn + emith_or_r_r_lsl(sr, tmp, Q_SHIFT); + emith_lsr(tmp, tmp3, 31); // M = Nm + emith_or_r_r_lsl(sr, tmp, M_SHIFT); + emith_eor_r_r_lsr(tmp, tmp2, 31); + emith_or_r_r(sr, tmp); // T = Q^M + rcache_free(tmp); + } goto end_op; case 0x08: // TST Rm,Rn 0010nnnnmmmm1000 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); @@ -5758,7 +6004,8 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, struct op_data *opd; int next_is_delay = 0; int end_block = 0; - int i, i_end; + int is_divop; + int i, i_end, i_div = -1; u32 crc = 0; // 2nd pass stuff int last_btarget; // loop detector @@ -5790,6 +6037,7 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, (lowest_literal && lowest_literal <= pc)) break; // text area collides with data area + is_divop = 0; op = FETCH_OP(pc); switch ((op & 0xf000) >> 12) { @@ -5874,8 +6122,12 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, break; case 1: // DIV0U 0000000000011001 CHECK_UNHANDLED_BITS(0xf00, undefined); + opd->op = OP_DIV0; opd->source = BITMASK1(SHR_SR); opd->dest = BITMASK2(SHR_SR, SHR_T); + div(opd) = (struct div){ .rn=SHR_MEM, .rm=SHR_MEM, .ro=SHR_MEM }; + i_div = i; + is_divop = 1; break; case 2: // MOVT Rn 0000nnnn00101001 opd->source = BITMASK1(SHR_T); @@ -5975,8 +6227,12 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, opd->dest = BITMASK2(GET_Rn(), SHR_MEM); break; case 0x07: // DIV0S Rm,Rn 0010nnnnmmmm0111 + opd->op = OP_DIV0; opd->source = BITMASK3(SHR_SR, GET_Rm(), GET_Rn()); opd->dest = BITMASK2(SHR_SR, SHR_T); + div(opd) = (struct div){ .rn=GET_Rn(), .rm=GET_Rm(), .ro=SHR_MEM }; + i_div = i; + is_divop = 1; break; case 0x08: // TST Rm,Rn 0010nnnnmmmm1000 opd->source = BITMASK2(GET_Rm(), GET_Rn()); @@ -6021,6 +6277,19 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, case 0x04: // DIV1 Rm,Rn 0011nnnnmmmm0100 opd->source = BITMASK4(GET_Rm(), GET_Rn(), SHR_SR, SHR_T); opd->dest = BITMASK3(GET_Rn(), SHR_SR, SHR_T); + if (i_div >= 0) { + // divide operation: all DIV1 operations must use the same reg pair + if (div(&ops[i_div]).rn == SHR_MEM) + div(&ops[i_div]).rn=GET_Rn(), div(&ops[i_div]).rm=GET_Rm(); + if (div(&ops[i_div]).rn == GET_Rn() && div(&ops[i_div]).rm == GET_Rm()) { + div(&ops[i_div]).div1 += 1; + div(&ops[i_div]).state = 0; + is_divop = 1; + } else { + ops[i_div].imm = 0; + i_div = -1; + } + } break; case 0x05: // DMULU.L Rm,Rn 0011nnnnmmmm0101 case 0x0d: // DMULS.L Rm,Rn 0011nnnnmmmm1101 @@ -6126,6 +6395,19 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, opd->dest = BITMASK2(GET_Rn(), SHR_T); break; case 0x24: // ROTCL Rn 0100nnnn00100100 + if (i_div >= 0) { + // divide operation: all ROTCL operations must use the same register + if (div(&ops[i_div]).ro == SHR_MEM) + div(&ops[i_div]).ro = GET_Rn(); + if (div(&ops[i_div]).ro == GET_Rn() && !div(&ops[i_div]).state) { + div(&ops[i_div]).rotcl += 1; + div(&ops[i_div]).state = 1; + is_divop = 1; + } else { + ops[i_div].imm = 0; + i_div = -1; + } + } case 0x25: // ROTCR Rn 0100nnnn00100101 opd->source = BITMASK2(GET_Rn(), SHR_T); opd->dest = BITMASK2(GET_Rn(), SHR_T); @@ -6556,7 +6838,8 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, next_is_delay = 0; break; } - } + } else if (!is_divop && i_div >= 0) + i_div = -1; // divide parser stop } end: i_end = i; @@ -6567,6 +6850,8 @@ end: t = T_UNKNOWN; last_btarget = 0; op = 0; // delay/poll insns counter + is_divop = 0; // divide op insns counter + i_div = -1; // index of current divide op for (i = 0, pc = base_pc; i < i_end; i++, pc += 2) { opd = &ops[i]; crc += FETCH_OP(pc); @@ -6599,6 +6884,31 @@ end: } } + // divide operation verification: + // 1. there must not be a branch target inside + // 2. nothing is in a delay slot (could only be DIV0) + // 2. DIV0/n*(ROTCL+DIV1)/ROTCL: + // div.div1 > 0 && div.rotcl == div.div1+1 && div.rn =! div.ro + // 3. DIV0/n*DIV1/ROTCL: + // div.div1 > 0 && div.rotcl == 1 && div.ro == div.rn + if (i_div >= 0) { + if (op_flags[i] & OF_BTARGET) { // condition 1 + ops[i_div].imm = 0; + i_div = -1; + } else if (--is_divop == 0) + i_div = -1; + } else if (opd->op == OP_DIV0) { + struct div *div = &div(opd); + is_divop = div->div1 + div->rotcl; + if (op_flags[i] & OF_DELAY_OP) // condition 2 + opd->imm = 0; + else if (! div->div1 || ! ((div->ro == div->rn && div->rotcl == 1) || + (div->ro != div->rn && div->rotcl == div->div1+1))) + opd->imm = 0; // condition 3+4 + else if (is_divop) + i_div = i; + } + // literal pool size detection if (opd->op == OP_MOVA && opd->imm >= base_pc) if (lowest_mova == 0 || opd->imm < lowest_mova) From 713e3a1c5bac0bc472cb91768730268ed1ab99b2 Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 16 Jul 2020 19:29:34 +0200 Subject: [PATCH 0349/1110] libretro, build fixes for android --- Makefile.libretro | 4 +- cpu/drc/emit_arm.c | 23 ++++++----- jni/Android.mk | 37 +++++++++++------- tools/mkoffsets.sh | 10 ++--- ...ic32-offsets.h => generic-ilp32-offsets.h} | 0 tools/offsets/generic-llp64-offsets.h | 39 +++++++++++++++++++ ...ric64-offsets.h => generic-lp64-offsets.h} | 0 tools/offsets/genericn32-offsets.h | 39 ------------------- tools/offsets/ios9-offsets.h | 1 - 9 files changed, 81 insertions(+), 72 deletions(-) rename tools/offsets/{generic32-offsets.h => generic-ilp32-offsets.h} (100%) create mode 100644 tools/offsets/generic-llp64-offsets.h rename tools/offsets/{generic64-offsets.h => generic-lp64-offsets.h} (100%) delete mode 100644 tools/offsets/genericn32-offsets.h delete mode 120000 tools/offsets/ios9-offsets.h diff --git a/Makefile.libretro b/Makefile.libretro index 6c63dba2..bf7b3870 100644 --- a/Makefile.libretro +++ b/Makefile.libretro @@ -299,9 +299,9 @@ endif ifeq ($(NO_ARM_ASM),1) use_cyclone = 0 -use_fame ?= 1 +use_fame = 1 use_drz80 = 0 -use_cz80 ?= 1 +use_cz80 = 1 use_svpdrc = 0 asm_memory = 0 diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index 3f373435..1a6ffc30 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -48,6 +48,9 @@ #define M6(x,y,z,a,b,c) (M5(x,y,z,a,b)|M1(c)) #define M10(a,b,c,d,e,f,g,h,i,j) (M5(a,b,c,d,e)|M5(f,g,h,i,j)) +// avoid a warning with clang +static inline uintptr_t pabs(intptr_t v) { return labs(v); } + // sys_cacheflush always flushes whole pages, and it's rather expensive on ARMs // hold a list of pending cache updates and merge requests to reduce cacheflush static struct { void *base, *end; } pageflush[4]; @@ -341,13 +344,13 @@ static void emith_flush(void) #define EOP_C_AM3_REG(cond,u,l,rn,rd,s,h,rm) EOP_C_AM3(cond,u,0,l,rn,rd,s,h,rm) /* ldr and str */ -#define EOP_LDR_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,(offset_12) >= 0,0,1,rn,rd,abs(offset_12)) -#define EOP_LDRB_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,(offset_12) >= 0,1,1,rn,rd,abs(offset_12)) -#define EOP_STR_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,(offset_12) >= 0,0,0,rn,rd,abs(offset_12)) +#define EOP_LDR_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,(offset_12) >= 0,0,1,rn,rd,pabs(offset_12)) +#define EOP_LDRB_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,(offset_12) >= 0,1,1,rn,rd,pabs(offset_12)) +#define EOP_STR_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,(offset_12) >= 0,0,0,rn,rd,pabs(offset_12)) -#define EOP_LDR_IMM( rd,rn,offset_12) EOP_C_AM2_IMM(A_COND_AL,(offset_12) >= 0,0,1,rn,rd,abs(offset_12)) +#define EOP_LDR_IMM( rd,rn,offset_12) EOP_C_AM2_IMM(A_COND_AL,(offset_12) >= 0,0,1,rn,rd,pabs(offset_12)) #define EOP_LDR_SIMPLE(rd,rn) EOP_C_AM2_IMM(A_COND_AL,1,0,1,rn,rd,0) -#define EOP_STR_IMM( rd,rn,offset_12) EOP_C_AM2_IMM(A_COND_AL,(offset_12) >= 0,0,0,rn,rd,abs(offset_12)) +#define EOP_STR_IMM( rd,rn,offset_12) EOP_C_AM2_IMM(A_COND_AL,(offset_12) >= 0,0,0,rn,rd,pabs(offset_12)) #define EOP_STR_SIMPLE(rd,rn) EOP_C_AM2_IMM(A_COND_AL,1,0,0,rn,rd,0) #define EOP_LDR_REG_LSL(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,0,1,rn,rd,shift_imm,A_AM1_LSL,rm) @@ -355,19 +358,19 @@ static void emith_flush(void) #define EOP_LDRB_REG_LSL(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,1,1,rn,rd,shift_imm,A_AM1_LSL,rm) #define EOP_STR_REG_LSL_WB(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,0,2,rn,rd,shift_imm,A_AM1_LSL,rm) -#define EOP_LDRH_IMM2(cond,rd,rn,offset_8) EOP_C_AM3_IMM(cond,(offset_8) >= 0,1,rn,rd,0,1,abs(offset_8)) +#define EOP_LDRH_IMM2(cond,rd,rn,offset_8) EOP_C_AM3_IMM(cond,(offset_8) >= 0,1,rn,rd,0,1,pabs(offset_8)) #define EOP_LDRH_REG2(cond,rd,rn,rm) EOP_C_AM3_REG(cond,1,1,rn,rd,0,1,rm) -#define EOP_LDRH_IMM( rd,rn,offset_8) EOP_C_AM3_IMM(A_COND_AL,(offset_8) >= 0,1,rn,rd,0,1,abs(offset_8)) +#define EOP_LDRH_IMM( rd,rn,offset_8) EOP_C_AM3_IMM(A_COND_AL,(offset_8) >= 0,1,rn,rd,0,1,pabs(offset_8)) #define EOP_LDRH_SIMPLE(rd,rn) EOP_C_AM3_IMM(A_COND_AL,1,1,rn,rd,0,1,0) #define EOP_LDRH_REG( rd,rn,rm) EOP_C_AM3_REG(A_COND_AL,1,1,rn,rd,0,1,rm) -#define EOP_STRH_IMM( rd,rn,offset_8) EOP_C_AM3_IMM(A_COND_AL,(offset_8) >= 0,0,rn,rd,0,1,abs(offset_8)) +#define EOP_STRH_IMM( rd,rn,offset_8) EOP_C_AM3_IMM(A_COND_AL,(offset_8) >= 0,0,rn,rd,0,1,pabs(offset_8)) #define EOP_STRH_SIMPLE(rd,rn) EOP_C_AM3_IMM(A_COND_AL,1,0,rn,rd,0,1,0) #define EOP_STRH_REG( rd,rn,rm) EOP_C_AM3_REG(A_COND_AL,1,0,rn,rd,0,1,rm) -#define EOP_LDRSB_IMM2(cond,rd,rn,offset_8) EOP_C_AM3_IMM(cond,(offset_8) >= 0,1,rn,rd,1,0,abs(offset_8)) +#define EOP_LDRSB_IMM2(cond,rd,rn,offset_8) EOP_C_AM3_IMM(cond,(offset_8) >= 0,1,rn,rd,1,0,pabs(offset_8)) #define EOP_LDRSB_REG2(cond,rd,rn,rm) EOP_C_AM3_REG(cond,1,1,rn,rd,1,0,rm) -#define EOP_LDRSH_IMM2(cond,rd,rn,offset_8) EOP_C_AM3_IMM(cond,(offset_8) >= 0,1,rn,rd,1,1,abs(offset_8)) +#define EOP_LDRSH_IMM2(cond,rd,rn,offset_8) EOP_C_AM3_IMM(cond,(offset_8) >= 0,1,rn,rd,1,1,pabs(offset_8)) #define EOP_LDRSH_REG2(cond,rd,rn,rm) EOP_C_AM3_REG(cond,1,1,rn,rd,1,1,rm) /* ldm and stm */ diff --git a/jni/Android.mk b/jni/Android.mk index 9252d9f9..8ed53ae0 100644 --- a/jni/Android.mk +++ b/jni/Android.mk @@ -18,7 +18,6 @@ use_musashi = 0 use_drz80 = 0 use_cz80 = 0 use_sh2drc = 0 -use_sh2mame = 0 use_svpdrc = 0 asm_memory = 0 @@ -27,6 +26,8 @@ asm_ym2612 = 0 asm_misc = 0 asm_cdmemory = 0 asm_mix = 0 +asm_32xdraw = 0 +asm_32xmemory = 0 ifeq ($(TARGET_ARCH),arm) LOCAL_ARM_MODE := arm @@ -34,21 +35,19 @@ ifeq ($(TARGET_ARCH),arm) LOCAL_ARM_NEON := true endif - use_cyclone = 1 - - # texrels, -perf ~~8% - use_drz80 = 0 - use_cz80 = 1 - +# use_cyclone = 1 +# use_drz80 = 1 use_sh2drc = 1 - use_svpdrc = 1 +# use_svpdrc = 1 -# asm_memory = 1 # texrels, -perf negligible - asm_render = 1 -# asm_ym2612 = 1 # texrels, -perf ~~4% - asm_misc = 1 -# asm_cdmemory = 1 # texrels - asm_mix = 1 +# asm_memory = 1 +# asm_render = 1 +# asm_ym2612 = 1 +# asm_misc = 1 +# asm_cdmemory = 1 +# asm_mix = 1 +# asm_32xdraw = 1 +# asm_32xmemory = 1 # for armeabi to build... CYCLONE_CONFIG = cyclone_config_armv4.h @@ -59,7 +58,6 @@ $(cleantarget):: else use_fame = 1 use_cz80 = 1 - use_sh2mame = 1 endif # PD is currently not strict aliasing safe @@ -73,6 +71,7 @@ include $(R)platform/common/common.mak LOCAL_SRC_FILES += $(SRCS_COMMON) LOCAL_SRC_FILES += $(R)platform/libretro/libretro.c +LOCAL_SRC_FILES += $(R)platform/common/mp3_sync.c LOCAL_SRC_FILES += $(R)platform/common/mp3.c LOCAL_SRC_FILES += $(R)platform/common/mp3_dummy.c @@ -91,4 +90,12 @@ LOCAL_CFLAGS += -Wall -O2 -ffast-math -DNDEBUG LOCAL_CFLAGS += $(addprefix -D,$(DEFINES)) LOCAL_LDLIBS := -llog +ifneq ($(filter armeabi%, $(TARGET_ARCH_ABI)),) +$(CORE_DIR)/pico/pico_int_offs.h: + cp $(CORE_DIR)/tools/offsets/generic-ilp32-offsets.h $@ +.PHONY: $(CORE_DIR)/pico/pico_int_offs.h + +$(filter %.S,$(SRCS_COMMON)): $(CORE_DIR)/pico/pico_int_offs.h +endif + include $(BUILD_SHARED_LIBRARY) diff --git a/tools/mkoffsets.sh b/tools/mkoffsets.sh index 0aa8e96e..207ffa28 100755 --- a/tools/mkoffsets.sh +++ b/tools/mkoffsets.sh @@ -51,8 +51,8 @@ check_obj () fi # find the start line of the .rodata section; read the next line ro=$($OBJDUMP -s /tmp/getoffs.o | awk '\ - /Contents of section.*(__const|.rodata|.sdata)/ {o=1; next} \ - {if(o) { gsub(/ .*/,""); $1=""; gsub(/ /,""); print; o=0}}') + /Contents of section.*(__const|.ro?data|.sdata)/ {o=1; next} \ + {if(o) { gsub(/ .*/,""); $1=""; gsub(/ /,""); print; exit}}') # no working tool for extracting the ro data; stop here if [ -z "$ro" ]; then echo "/* mkoffset.sh: no readelf or not ELF, offset table not created */" >$fn @@ -79,8 +79,8 @@ compile_rodata () elif [ -n "$OBJDUMP" ]; then # find the start line of the .rodata section; read the next line ro=$($OBJDUMP -s /tmp/getoffs.o | awk '\ - /Contents of section.*(__const|.rodata|.sdata)/ {o=1; next} \ - {if(o) { gsub(/ .*/,""); $1=""; gsub(/ /,""); print; o=0}}') + /Contents of section.*(__const|.ro?data|.sdata)/ {o=1; next} \ + {if(o) { gsub(/ .*/,""); $1=""; gsub(/ /,""); print; exit}}') fi if [ "$ENDIAN" = "le" ]; then # swap needed for le target @@ -104,7 +104,7 @@ get_define () # prefix struct member member... name=$(echo $* | sed 's/ /_/g') echo '#include ' > /tmp/getoffs.c echo '#include "pico/pico_int.h"' >> /tmp/getoffs.c - echo "static const struct $struct p;" >> /tmp/getoffs.c + echo "static struct $struct p;" >> /tmp/getoffs.c echo "const int32_t val = (char *)&p.$field - (char*)&p;" >>/tmp/getoffs.c compile_rodata line=$(printf "#define %-20s 0x%04x" $prefix$name $rodata) diff --git a/tools/offsets/generic32-offsets.h b/tools/offsets/generic-ilp32-offsets.h similarity index 100% rename from tools/offsets/generic32-offsets.h rename to tools/offsets/generic-ilp32-offsets.h diff --git a/tools/offsets/generic-llp64-offsets.h b/tools/offsets/generic-llp64-offsets.h new file mode 100644 index 00000000..71c776b9 --- /dev/null +++ b/tools/offsets/generic-llp64-offsets.h @@ -0,0 +1,39 @@ +/* autogenerated by mkoffset.sh, do not edit */ +/* target endianess: le, compiled with: x86_64-w64-mingw32-gcc */ +#define OFS_Pico_video_reg 0x0000 +#define OFS_Pico_m_rotate 0x0040 +#define OFS_Pico_m_z80Run 0x0041 +#define OFS_Pico_m_dirtyPal 0x0046 +#define OFS_Pico_m_hardware 0x0047 +#define OFS_Pico_m_z80_reset 0x004f +#define OFS_Pico_m_sram_reg 0x0049 +#define OFS_Pico_sv 0x0090 +#define OFS_Pico_sv_data 0x0090 +#define OFS_Pico_sv_start 0x0098 +#define OFS_Pico_sv_end 0x009c +#define OFS_Pico_sv_flags 0x00a0 +#define OFS_Pico_rom 0x0588 +#define OFS_Pico_romsize 0x0590 +#define OFS_Pico_est 0x00d8 +#define OFS_EST_DrawScanline 0x0000 +#define OFS_EST_rendstatus 0x0004 +#define OFS_EST_DrawLineDest 0x0008 +#define OFS_EST_HighCol 0x0010 +#define OFS_EST_HighPreSpr 0x0018 +#define OFS_EST_Pico 0x0020 +#define OFS_EST_PicoMem_vram 0x0028 +#define OFS_EST_PicoMem_cram 0x0030 +#define OFS_EST_PicoOpt 0x0038 +#define OFS_EST_Draw2FB 0x0040 +#define OFS_EST_HighPal 0x0048 +#define OFS_PMEM_vram 0x10000 +#define OFS_PMEM_vsram 0x22100 +#define OFS_PMEM32x_pal_native 0x90e00 +#define OFS_SH2_is_slave 0x0a18 +#define OFS_SH2_p_bios 0x0098 +#define OFS_SH2_p_da 0x00a0 +#define OFS_SH2_p_sdram 0x00a8 +#define OFS_SH2_p_rom 0x00b0 +#define OFS_SH2_p_dram 0x00b8 +#define OFS_SH2_p_drcblk_da 0x00c0 +#define OFS_SH2_p_drcblk_ram 0x00c8 diff --git a/tools/offsets/generic64-offsets.h b/tools/offsets/generic-lp64-offsets.h similarity index 100% rename from tools/offsets/generic64-offsets.h rename to tools/offsets/generic-lp64-offsets.h diff --git a/tools/offsets/genericn32-offsets.h b/tools/offsets/genericn32-offsets.h deleted file mode 100644 index 2ea64190..00000000 --- a/tools/offsets/genericn32-offsets.h +++ /dev/null @@ -1,39 +0,0 @@ -/* autogenerated by mkoffset.sh, do not edit */ -/* target endianess: le, compiled with: mipsel-linux-gnu-gcc -mabi=n32 */ -#define OFS_Pico_video_reg 0x0000 -#define OFS_Pico_m_rotate 0x0040 -#define OFS_Pico_m_z80Run 0x0041 -#define OFS_Pico_m_dirtyPal 0x0046 -#define OFS_Pico_m_hardware 0x0047 -#define OFS_Pico_m_z80_reset 0x004f -#define OFS_Pico_m_sram_reg 0x0049 -#define OFS_Pico_sv 0x008c -#define OFS_Pico_sv_data 0x008c -#define OFS_Pico_sv_start 0x0090 -#define OFS_Pico_sv_end 0x0094 -#define OFS_Pico_sv_flags 0x0098 -#define OFS_Pico_rom 0x0554 -#define OFS_Pico_romsize 0x0558 -#define OFS_Pico_est 0x00c8 -#define OFS_EST_DrawScanline 0x0000 -#define OFS_EST_rendstatus 0x0004 -#define OFS_EST_DrawLineDest 0x0008 -#define OFS_EST_HighCol 0x000c -#define OFS_EST_HighPreSpr 0x0010 -#define OFS_EST_Pico 0x0014 -#define OFS_EST_PicoMem_vram 0x0018 -#define OFS_EST_PicoMem_cram 0x001c -#define OFS_EST_PicoOpt 0x0020 -#define OFS_EST_Draw2FB 0x0024 -#define OFS_EST_HighPal 0x0028 -#define OFS_PMEM_vram 0x10000 -#define OFS_PMEM_vsram 0x22100 -#define OFS_PMEM32x_pal_native 0x90e00 -#define OFS_SH2_is_slave 0x055c -#define OFS_SH2_p_bios 0x0080 -#define OFS_SH2_p_da 0x0084 -#define OFS_SH2_p_sdram 0x0088 -#define OFS_SH2_p_rom 0x008c -#define OFS_SH2_p_dram 0x0090 -#define OFS_SH2_p_drcblk_da 0x0094 -#define OFS_SH2_p_drcblk_ram 0x0098 diff --git a/tools/offsets/ios9-offsets.h b/tools/offsets/ios9-offsets.h deleted file mode 120000 index 5ac5765f..00000000 --- a/tools/offsets/ios9-offsets.h +++ /dev/null @@ -1 +0,0 @@ -generic32-offsets.h \ No newline at end of file From 6f7beab435cf3224780701f7c79b199440665718 Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 16 Jul 2020 19:05:46 +0200 Subject: [PATCH 0350/1110] audio, fix sound issues in some intros --- pico/memory.c | 6 +++++- pico/sound/sound.c | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/pico/memory.c b/pico/memory.c index c0ba9ffe..ff41ac96 100644 --- a/pico/memory.c +++ b/pico/memory.c @@ -946,9 +946,9 @@ static int ym2612_write_local(u32 a, u32 d, int is_from_z80) { int cycles = is_from_z80 ? z80_cyclesDone() : z80_cycles_from_68k(); //elprintf(EL_STATUS, "%03i dac w %08x z80 %i", cycles, d, is_from_z80); - ym2612.dacout = ((int)d - 0x80) << 6; if (ym2612.dacen) PsndDoDAC(cycles); + ym2612.dacout = ((int)d - 0x80) << 6; return 0; } @@ -1008,6 +1008,9 @@ static int ym2612_write_local(u32 a, u32 d, int is_from_z80) case 0x27: { /* mode, timer control */ int old_mode = ym2612.OPN.ST.mode; int cycles = is_from_z80 ? z80_cyclesDone() : z80_cycles_from_68k(); + + if (ym2612.OPN.ST.mode != d) + PsndDoFM(cycles); ym2612.OPN.ST.mode = d; elprintf(EL_YMTIMER, "st mode %02x", d); @@ -1066,6 +1069,7 @@ static int ym2612_write_local(u32 a, u32 d, int is_from_z80) #define ym2612_read_local() \ + PsndDoFM(xcycles>>8); \ if (xcycles >= Pico.t.timer_a_next_oflow) \ ym2612.OPN.ST.status |= (ym2612.OPN.ST.mode >> 2) & 1; \ if (xcycles >= Pico.t.timer_b_next_oflow) \ diff --git a/pico/sound/sound.c b/pico/sound/sound.c index a6d55df2..0b371f25 100644 --- a/pico/sound/sound.c +++ b/pico/sound/sound.c @@ -169,8 +169,8 @@ PICO_INTERNAL void PsndDoFM(int cyc_to) // Q16, number of samples since last call len = (cyc_to * Pico.snd.clkl_mult) - Pico.snd.fm_pos; - // don't do this too often (about every 4th scanline) - if (len >> 20 <= PicoIn.sndRate >> 12) + // don't do this too often (about once every canline) + if (len >> 16 <= PicoIn.sndRate >> 10) return; // update position and calculate buffer offset and length From 2e5cbf5b6a24a39366c4ead8e67fe23aef98271c Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 17 Jul 2020 19:25:51 +0200 Subject: [PATCH 0351/1110] audio, fix for speed regression after last commit --- pico/memory.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pico/memory.c b/pico/memory.c index ff41ac96..aef3ee8e 100644 --- a/pico/memory.c +++ b/pico/memory.c @@ -1009,8 +1009,6 @@ static int ym2612_write_local(u32 a, u32 d, int is_from_z80) int old_mode = ym2612.OPN.ST.mode; int cycles = is_from_z80 ? z80_cyclesDone() : z80_cycles_from_68k(); - if (ym2612.OPN.ST.mode != d) - PsndDoFM(cycles); ym2612.OPN.ST.mode = d; elprintf(EL_YMTIMER, "st mode %02x", d); @@ -1028,6 +1026,7 @@ static int ym2612_write_local(u32 a, u32 d, int is_from_z80) #ifdef __GP2X__ if (PicoIn.opt & POPT_EXT_FM) return YM2612Write_940(a, d, get_scanline(is_from_z80)); #endif + PsndDoFM(cycles); return 1; } return 0; @@ -1069,7 +1068,6 @@ static int ym2612_write_local(u32 a, u32 d, int is_from_z80) #define ym2612_read_local() \ - PsndDoFM(xcycles>>8); \ if (xcycles >= Pico.t.timer_a_next_oflow) \ ym2612.OPN.ST.status |= (ym2612.OPN.ST.mode >> 2) & 1; \ if (xcycles >= Pico.t.timer_b_next_oflow) \ From 7980d47767fca1244743bd3a4c520c4dc406ef6b Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 25 Jul 2020 23:58:57 +0200 Subject: [PATCH 0352/1110] sms mode 4, fix 8 bit renderer code --- pico/draw.c | 10 +++++++--- pico/mode4.c | 17 +++++++++++++---- pico/pico_int.h | 2 ++ platform/linux/emu.c | 20 +++++++++++++------- 4 files changed, 35 insertions(+), 14 deletions(-) diff --git a/pico/draw.c b/pico/draw.c index 668a1246..43bbea76 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -36,8 +36,8 @@ int (*PicoScanBegin)(unsigned int num) = NULL; int (*PicoScanEnd) (unsigned int num) = NULL; static unsigned char DefHighCol[8+320+8]; -static unsigned char *HighColBase = DefHighCol; -static int HighColIncrement; +unsigned char *HighColBase = DefHighCol; +int HighColIncrement; static unsigned int DefOutBuff[320*2/2]; void *DrawLineDestBase = DefOutBuff; @@ -1848,7 +1848,9 @@ void PicoDrawUpdateHighPal(void) if ((PicoIn.opt & POPT_ALT_RENDERER) | (est->rendstatus & PDRAW_SONIC_MODE)) sh = 0; // no s/h support - if (FinalizeLine == FinalizeLine8bit) + if (PicoIn.AHW & PAHW_SMS) + PicoDoHighPal555M4(); + else if (FinalizeLine == FinalizeLine8bit) PicoDoHighPal555_8bit(sh, 0, est); else PicoDoHighPal555(sh, 0, est); @@ -1863,6 +1865,7 @@ void PicoDrawUpdateHighPal(void) void PicoDrawSetOutFormat(pdso_t which, int use_32x_line_mode) { + PicoDrawSetInternalBuf(NULL, 0); switch (which) { case PDF_8BIT: @@ -1878,6 +1881,7 @@ void PicoDrawSetOutFormat(pdso_t which, int use_32x_line_mode) default: FinalizeLine = NULL; + PicoDrawSetOutBufMD(Pico.est.Draw2FB+8, 328); break; } if (PicoIn.AHW & PAHW_32X) diff --git a/pico/mode4.c b/pico/mode4.c index 8c063857..ca219a0a 100644 --- a/pico/mode4.c +++ b/pico/mode4.c @@ -69,6 +69,8 @@ static void draw_sprites(int scanline) if (pv->reg[0] & 8) xoff = 0; + if (!(PicoIn.opt & POPT_DIS_32C_BORDER)) + xoff += 32; sat = (unsigned char *)PicoMem.vram + ((pv->reg[5] & 0x7e) << 7); if (pv->reg[1] & 2) { @@ -177,6 +179,8 @@ static void DrawDisplayM4(int scanline) if (dx != 8) cells++; // have hscroll, need to draw 1 cell more dx += cellskip << 3; + if (!FinalizeLineM4 && !(PicoIn.opt & POPT_DIS_32C_BORDER)) + dx += 32; // low priority tiles if (!(pv->debug_p & PVD_KILL_B)) @@ -190,9 +194,11 @@ static void DrawDisplayM4(int scanline) if (!(pv->debug_p & PVD_KILL_A)) draw_strip(nametab, dx, cells, tilex | 0x1000 | (ty << 16)); - if (pv->reg[0] & 0x20) - // first column masked - ((int *)Pico.est.HighCol)[2] = ((int *)Pico.est.HighCol)[3] = 0xe0e0e0e0; + if (pv->reg[0] & 0x20) { + // first column masked, caculate offset to start of line + dx = (dx&~0x1f) / 4; + ((u32 *)Pico.est.HighCol)[dx+2] = ((u32 *)Pico.est.HighCol)[dx+3] = 0xe0e0e0e0; + } } void PicoFrameStartMode4(void) @@ -219,6 +225,7 @@ void PicoFrameStartMode4(void) rendlines = lines; } + Pico.est.HighCol = HighColBase + screen_offset * HighColIncrement; Pico.est.DrawLineDest = (char *)DrawLineDestBase + screen_offset * DrawLineDestIncrement; } @@ -243,6 +250,7 @@ void PicoLineMode4(int line) if (PicoScanEnd != NULL) skip_next_line = PicoScanEnd(line + screen_offset); + Pico.est.HighCol += HighColIncrement; Pico.est.DrawLineDest = (char *)Pico.est.DrawLineDest + DrawLineDestIncrement; } @@ -296,7 +304,8 @@ void PicoDrawSetOutputMode4(pdso_t which) { case PDF_8BIT: FinalizeLineM4 = FinalizeLine8bitM4; break; case PDF_RGB555: FinalizeLineM4 = FinalizeLineRGB555M4; break; - default: FinalizeLineM4 = NULL; break; + default: FinalizeLineM4 = NULL; + PicoDrawSetInternalBuf(Pico.est.Draw2FB, 328); break; } } diff --git a/pico/pico_int.h b/pico/pico_int.h index 7d69bcab..2d688a9f 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -666,6 +666,8 @@ extern int (*PicoScanBegin)(unsigned int num); extern int (*PicoScanEnd)(unsigned int num); #define MAX_LINE_SPRITES 27 // +1 last sprite width, +4 hdr; total 32 extern unsigned char HighLnSpr[240][4+MAX_LINE_SPRITES+1]; +extern unsigned char *HighColBase; +extern int HighColIncrement; extern void *DrawLineDestBase; extern int DrawLineDestIncrement; extern unsigned int VdpSATCache[128]; diff --git a/platform/linux/emu.c b/platform/linux/emu.c index 597c1308..4ef08a79 100644 --- a/platform/linux/emu.c +++ b/platform/linux/emu.c @@ -22,6 +22,8 @@ const char *renderer_names[] = { "16bit accurate", " 8bit accurate", " 8bit fast const char *renderer_names32x[] = { "accurate", "faster", "fastest", NULL }; enum renderer_types { RT_16BIT, RT_8BIT_ACC, RT_8BIT_FAST, RT_COUNT }; +static int out_x, out_y; +static int out_w, out_h; void pemu_prep_defconfig(void) { @@ -70,15 +72,18 @@ static void draw_cd_leds(void) void pemu_finalize_frame(const char *fps, const char *notice) { if (currentConfig.renderer != RT_16BIT && !(PicoIn.AHW & PAHW_32X)) { - unsigned short *pd = (unsigned short *)g_screen_ptr + 8 * g_screen_ppitch; - unsigned char *ps = Pico.est.Draw2FB + 328*8 + 8; + unsigned short *pd = (unsigned short *)g_screen_ptr + out_y * g_screen_ppitch + out_x; + unsigned char *ps = Pico.est.Draw2FB + 328*out_y + out_x + 8; unsigned short *pal = Pico.est.HighPal; int i, x; PicoDrawUpdateHighPal(); - for (i = 0; i < 224; i++, ps += 8) - for (x = 0; x < 320; x++) + for (i = 0; i < out_h; i++, ps += 8) { + for (x = 0; x < out_w; x++) *pd++ = pal[*ps++]; + pd += 320 - out_w; + ps += 320 - out_w; + } } if (notice || (currentConfig.EmuOpt & EOPT_SHOW_FPS)) { @@ -180,9 +185,10 @@ void emu_video_mode_change(int start_line, int line_count, int is_32cols) { // clear whole screen in all buffers if (currentConfig.renderer != RT_16BIT && !(PicoIn.AHW & PAHW_32X)) - memset32(Pico.est.Draw2FB, 0, (320+8) * (8+240+8) / 4); - else - memset32(g_screen_ptr, 0, g_screen_ppitch * g_screen_height * 2 / 4); + memset32(Pico.est.Draw2FB, 0xe0e0e0e0, (320+8) * (8+240+8) / 4); + memset32(g_screen_ptr, 0, g_screen_ppitch * g_screen_height * 2 / 4); + out_y = start_line; out_x = (is_32cols ? 32 : 0); + out_h = line_count; out_w = (is_32cols ? 256:320); } void pemu_loop_prep(void) From a2f24bfa7bcd42b19fd2887591a5d81b4964d376 Mon Sep 17 00:00:00 2001 From: hiroshica Date: Mon, 24 Feb 2020 13:42:53 +0900 Subject: [PATCH 0353/1110] adding ym2413 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit squashed commits: YM2413追加中 一通り実装したけどポートへの書き込み方が不明でまだ音が出ない 細かい修正(未テスト) resetで初期化されるのをなんとかしたい sound 初期化と終了を追加してみた SN76496を参考にYM2413のアップデート方法を変更してみた stereoフラグをアップデートサイズに変更 処理順番を整理したら音が出た stateセーブに対応してみた addition: Support for the Japanese Mark-III extended FM sound source unit --- .gitignore | 4 + .gitmodules | 3 + AUTHORS | 1 + pico/pico.c | 2 + pico/pico.h | 2 +- pico/pico_int.h | 4 + pico/sms.c | 150 +++++++++++++++++++++++------------ pico/sound/emu2413 | 1 + pico/sound/sound.c | 91 ++++++++++++++++++++- pico/state.c | 11 ++- platform/common/common.mak | 1 + platform/common/menu_pico.c | 1 + platform/common/menu_pico.h | 1 + platform/gizmondo/Makefile | 1 + platform/gizmondo/menu.c | 1 + platform/libretro/libretro.c | 2 +- platform/psp/menu.c | 1 + 17 files changed, 221 insertions(+), 56 deletions(-) create mode 160000 pico/sound/emu2413 diff --git a/.gitignore b/.gitignore index b7ef852d..022f987e 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,7 @@ obj/ .opk_data PicoDrive PicoDrive.opk +pico_int_offs.h +amalgamate +textfilter + diff --git a/.gitmodules b/.gitmodules index 36091a2d..b778188f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,3 +4,6 @@ [submodule "cpu/cyclone"] path = cpu/cyclone url = https://github.com/notaz/cyclone68000.git +[submodule "pico/sound/emu2413"] + path = pico/sound/emu2413 + url = https://github.com/digital-sound-antiques/emu2413.git diff --git a/AUTHORS b/AUTHORS index d4791101..36dd861b 100644 --- a/AUTHORS +++ b/AUTHORS @@ -53,4 +53,5 @@ Additional thanks * Paul Cercueil for OpenDingux port. * Inder for some graphics. * squarepusher for some libretro fixes +* Hiroshica for support of japanese Mark-III extended YM2413 sound * Anyone else I forgot. Let me know if it's you. diff --git a/pico/pico.c b/pico/pico.c index 579cdd0d..577701ba 100644 --- a/pico/pico.c +++ b/pico/pico.c @@ -38,6 +38,7 @@ void PicoInit(void) PicoInitMCD(); PicoSVPInit(); Pico32xInit(); + PsndInit(); PicoDrawInit(); PicoDraw2Init(); @@ -50,6 +51,7 @@ void PicoExit(void) PicoExitMCD(); PicoCartUnload(); z80_exit(); + PsndExit(); free(Pico.sv.data); Pico.sv.data = NULL; diff --git a/pico/pico.h b/pico/pico.h index d8c5959c..5dac5c29 100644 --- a/pico/pico.h +++ b/pico/pico.h @@ -55,7 +55,7 @@ extern void *p32x_bios_g, *p32x_bios_m, *p32x_bios_s; #define POPT_EN_Z80 (1<< 2) #define POPT_EN_STEREO (1<< 3) #define POPT_ALT_RENDERER (1<< 4) // 00 00x0 -// unused (1<< 5) +#define POPT_EN_YM2413 (1<< 5) // unused (1<< 6) #define POPT_ACC_SPRITES (1<< 7) #define POPT_DIS_32C_BORDER (1<< 8) // 00 0x00 diff --git a/pico/pico_int.h b/pico/pico_int.h index 2d688a9f..1a1205f8 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -434,6 +434,7 @@ struct PicoSound unsigned int dac_pos; // last DAC position in Q20 unsigned int fm_pos; // last FM position in Q20 unsigned int psg_pos; // last PSG position in Q16 + unsigned int ym2413_pos; // last YM2413 position }; // run tools/mkoffsets pico/pico_int_offs.h if you change these @@ -897,10 +898,13 @@ PICO_INTERNAL_ASM void wram_2M_to_1M(unsigned char *m); PICO_INTERNAL_ASM void wram_1M_to_2M(unsigned char *m); // sound/sound.c +PICO_INTERNAL void PsndInit(void); +PICO_INTERNAL void PsndExit(void); PICO_INTERNAL void PsndReset(void); PICO_INTERNAL void PsndStartFrame(void); PICO_INTERNAL void PsndDoDAC(int cycle_to); PICO_INTERNAL void PsndDoPSG(int line_to); +PICO_INTERNAL void PsndDoYM2413(int line_to); PICO_INTERNAL void PsndDoFM(int line_to); PICO_INTERNAL void PsndClear(void); PICO_INTERNAL void PsndGetSamples(int y); diff --git a/pico/sms.c b/pico/sms.c index 0f4a48ad..5ddbebd2 100644 --- a/pico/sms.c +++ b/pico/sms.c @@ -15,6 +15,13 @@ #include "pico_int.h" #include "memory.h" #include "sound/sn76496.h" +#include "sound/emu2413/emu2413.h" + +extern void YM2413_regWrite(unsigned reg); +extern void YM2413_dataWrite(unsigned data); + + +static unsigned short ymflag = 0xffff; static unsigned char vdp_data_read(void) { @@ -100,42 +107,61 @@ static unsigned char z80_sms_in(unsigned short a) unsigned char d = 0; elprintf(EL_IO, "z80 port %04x read", a); - a &= 0xc1; - switch (a) - { - case 0x00: - case 0x01: - d = 0xff; + if((a&0xff)>= 0xf0){ + switch((a&0xff)) + { + case 0xf0: + // FM reg port break; - - case 0x40: /* V counter */ - d = Pico.video.v_counter; - elprintf(EL_HVCNT, "V counter read: %02x", d); + case 0xf1: + // FM data port break; - - case 0x41: /* H counter */ - d = Pico.m.rotate++; - elprintf(EL_HVCNT, "H counter read: %02x", d); - break; - - case 0x80: - d = vdp_data_read(); - break; - - case 0x81: - d = vdp_ctl_read(); - break; - - case 0xc0: /* I/O port A and B */ - d = ~((PicoIn.pad[0] & 0x3f) | (PicoIn.pad[1] << 6)); - break; - - case 0xc1: /* I/O port B and miscellaneous */ - d = (Pico.ms.io_ctl & 0x80) | ((Pico.ms.io_ctl << 1) & 0x40) | 0x30; - d |= ~(PicoIn.pad[1] >> 2) & 0x0f; + case 0xf2: + // bit 0 = 1 active FM Pac + if (PicoIn.opt & POPT_EN_YM2413){ + d = ymflag; + //printf("read FM Check = %02x\n", d); + } break; + } } + else{ + a &= 0xc1; + switch (a) + { + case 0x00: + case 0x01: + d = 0xff; + break; + case 0x40: /* V counter */ + d = Pico.video.v_counter; + elprintf(EL_HVCNT, "V counter read: %02x", d); + break; + + case 0x41: /* H counter */ + d = Pico.m.rotate++; + elprintf(EL_HVCNT, "H counter read: %02x", d); + break; + + case 0x80: + d = vdp_data_read(); + break; + + case 0x81: + d = vdp_ctl_read(); + break; + + case 0xc0: /* I/O port A and B */ + d = ~((PicoIn.pad[0] & 0x3f) | (PicoIn.pad[1] << 6)); + break; + + case 0xc1: /* I/O port B and miscellaneous */ + d = (Pico.ms.io_ctl & 0x80) | ((Pico.ms.io_ctl << 1) & 0x40) | 0x30; + d |= ~(PicoIn.pad[1] >> 2) & 0x0f; + break; + } + } elprintf(EL_IO, "ret = %02x", d); return d; } @@ -143,27 +169,52 @@ static unsigned char z80_sms_in(unsigned short a) static void z80_sms_out(unsigned short a, unsigned char d) { elprintf(EL_IO, "z80 port %04x write %02x", a, d); - a &= 0xc1; - switch (a) - { - case 0x01: - Pico.ms.io_ctl = d; - break; - case 0x40: - case 0x41: - if ((d & 0x90) == 0x90) - PsndDoPSG(Pico.m.scanline); - SN76496Write(d); - break; + if((a&0xff)>= 0xf0){ + switch((a&0xff)) + { + case 0xf0: + // FM reg port + YM2413_regWrite(d); + //printf("write FM register = %02x\n", d); + break; + case 0xf1: + // FM data port + YM2413_dataWrite(d); + //printf("write FM data = %02x\n", d); + break; + case 0xf2: + // bit 0 = 1 active FM Pac + if (PicoIn.opt & POPT_EN_YM2413){ + ymflag = d; + //printf("write FM Check = %02x\n", d); + } + break; + } + } + else{ + a &= 0xc1; + switch (a) + { + case 0x01: + Pico.ms.io_ctl = d; + break; - case 0x80: - vdp_data_write(d); - break; + case 0x40: + case 0x41: + if ((d & 0x90) == 0x90) + PsndDoPSG(Pico.m.scanline); + SN76496Write(d); + break; - case 0x81: - vdp_ctl_write(d); - break; + case 0x80: + vdp_data_write(d); + break; + + case 0x81: + vdp_ctl_write(d); + break; + } } } @@ -212,6 +263,7 @@ void PicoResetMS(void) { z80_reset(); PsndReset(); // pal must be known here + ymflag = 0xffff; } void PicoPowerMS(void) diff --git a/pico/sound/emu2413 b/pico/sound/emu2413 new file mode 160000 index 00000000..9f1dcf84 --- /dev/null +++ b/pico/sound/emu2413 @@ -0,0 +1 @@ +Subproject commit 9f1dcf848d0e33e775e49352f7bc83a9c0e87a81 diff --git a/pico/sound/sound.c b/pico/sound/sound.c index 0b371f25..6204a66a 100644 --- a/pico/sound/sound.c +++ b/pico/sound/sound.c @@ -13,6 +13,7 @@ #include "../pico_int.h" #include "../cd/cue.h" #include "mix.h" +#include "emu2413/emu2413.h" void (*PsndMix_32_to_16l)(short *dest, int *src, int count) = mix_32_to_16l_stereo; @@ -25,6 +26,25 @@ short cdda_out_buffer[2*1152]; // sn76496 extern int *sn76496_regs; +// ym2413 +#define YM2413_CLK 3579545 +OPLL old_opll; +static OPLL *opll = NULL; +unsigned YM2413_reg; + + +PICO_INTERNAL void PsndInit(void) +{ + opll = OPLL_new(YM2413_CLK, PicoIn.sndRate); + OPLL_setChipType(opll,0); + OPLL_reset(opll); +} + +PICO_INTERNAL void PsndExit(void) +{ + OPLL_delete(opll); + opll = NULL; +} PICO_INTERNAL void PsndReset(void) { @@ -59,6 +79,12 @@ void PsndRerate(int preserve_state) SN76496_init(Pico.m.pal ? OSC_PAL/15 : OSC_NTSC/15, PicoIn.sndRate); if (preserve_state) memcpy(sn76496_regs, state, 28*4); // restore old state + if(opll != NULL){ + if (preserve_state) memcpy(&old_opll, opll, sizeof(OPLL)); // remember old state + OPLL_setRate(opll, PicoIn.sndRate); + OPLL_reset(opll); + } + if (state) free(state); @@ -161,6 +187,48 @@ PICO_INTERNAL void PsndDoPSG(int line_to) SN76496Update(PicoIn.sndOut + pos, len, stereo); } +#if 0 +PICO_INTERNAL void PsndDoYM2413(int line_to) +{ + int pos, len; + int stereo = 0; + short *buf; + + // Q16, number of samples since last call + len = ((line_to+1) * Pico.snd.smpl_mult) - Pico.snd.ym2413_pos; + if (len <= 0) + return; + + // update position and calculate buffer offset and length + pos = (Pico.snd.ym2413_pos+0x8000) >> 16; + Pico.snd.ym2413_pos += len; + len = ((Pico.snd.ym2413_pos+0x8000) >> 16) - pos; + + if (!PicoIn.sndOut || !(PicoIn.opt & POPT_EN_YM2413)) + return; + + if (PicoIn.opt & POPT_EN_STEREO) { + stereo = 1; + pos <<= 1; + } + + buf = PicoIn.sndOut + pos; + while (len-- > 0) { + int16_t getdata = OPLL_calc(opll) * 3; + *buf++ += getdata; + buf += stereo; // only left for stereo, to be mixed to right later + } +} +#endif + +void YM2413_regWrite(unsigned data){ + OPLL_writeIO(opll,0,data); +} +void YM2413_dataWrite(unsigned data){ + OPLL_writeIO(opll,1,data); +} + + PICO_INTERNAL void PsndDoFM(int cyc_to) { int pos, len; @@ -249,7 +317,7 @@ PICO_INTERNAL void PsndClear(void) if (!(PicoIn.opt & POPT_EN_FM)) memset32(PsndBuffer, 0, PicoIn.opt & POPT_EN_STEREO ? len*2 : len); // drop pos remainder to avoid rounding errors (not entirely correct though) - Pico.snd.dac_pos = Pico.snd.fm_pos = Pico.snd.psg_pos = 0; + Pico.snd.dac_pos = Pico.snd.fm_pos = Pico.snd.psg_pos = Pico.snd.ym2413_pos = 0; } @@ -344,6 +412,7 @@ static int PsndRenderMS(int offset, int length) { int stereo = (PicoIn.opt & 8) >> 3; int psglen = ((Pico.snd.psg_pos+0x8000) >> 16); + int ym2413len = ((Pico.snd.ym2413_pos+0x8000) >> 16); pprof_start(sound); @@ -355,11 +424,25 @@ static int PsndRenderMS(int offset, int length) SN76496Update(psgbuf, length-psglen, stereo); } + if (length-ym2413len > 0) { + short *ym2413buf = PicoIn.sndOut + (ym2413len << stereo); + Pico.snd.ym2413_pos += (length-ym2413len) << 16; + int len = (length-ym2413len); + if (PicoIn.opt & POPT_EN_YM2413){ + while (len-- > 0) { + int16_t getdata = OPLL_calc(opll) * 3; + *ym2413buf += getdata; + ym2413buf += 1< 0; i--, p++) - *p |= *p << 16; + int i; + short *p; + for (i = length, p = (short *)PicoIn.sndOut; i > 0; i--, p+=2) + *(p + 1) = *p; } pprof_end(sound); diff --git a/pico/state.c b/pico/state.c index b0b6a334..da6b6fd8 100644 --- a/pico/state.c +++ b/pico/state.c @@ -11,10 +11,12 @@ #include "../cpu/sh2/sh2.h" #include "sound/ym2612.h" +#include "sound/emu2413/emu2413.h" #include "state.h" -// sn76496 +// sn76496 & ym2413 extern int *sn76496_regs; +extern OPLL old_opll; static arearw *areaRead; static arearw *areaWrite; @@ -123,6 +125,8 @@ typedef enum { CHUNK_DRAM, CHUNK_32XPAL, CHUNK_32X_EVT, + CHUNK_YM2413, //40 + //rename CHUNK_32X_FIRST = CHUNK_MSH2, CHUNK_32X_LAST = CHUNK_32X_EVT, // add new stuff here @@ -133,6 +137,7 @@ typedef enum { // CHUNK_DEFAULT_COUNT, CHUNK_CARTHW_ = CHUNK_CARTHW, // 64 (defined in PicoInt) + } chunk_name_e; static const char * const chunk_names[CHUNK_DEFAULT_COUNT] = { @@ -179,6 +184,7 @@ static const char * const chunk_names[CHUNK_DEFAULT_COUNT] = { "DRAM", "PAL", "events", + "YM2413", //40 }; static int write_chunk(chunk_name_e name, int len, void *data, void *file) @@ -283,6 +289,8 @@ static int state_save(void *file) memcpy(buff, pcd_event_times, sizeof(pcd_event_times)); CHECKED_WRITE(CHUNK_CD_EVT, 0x40, buff); + CHECKED_WRITE(CHUNK_YM2413, sizeof(OPLL), &old_opll); + len = gfx_context_save(buf2); CHECKED_WRITE(CHUNK_CD_GFX, len, buf2); len = cdc_context_save(buf2); @@ -442,6 +450,7 @@ static int state_load(void *file) case CHUNK_IOPORTS: CHECKED_READ_BUFF(PicoMem.ioports); break; case CHUNK_PSG: CHECKED_READ2(28*4, sn76496_regs); break; + case CHUNK_YM2413: CHECKED_READ2(sizeof(OPLL), &old_opll); break; case CHUNK_FM: ym2612_regs = YM2612GetRegs(); CHECKED_READ2(0x200+4, ym2612_regs); diff --git a/platform/common/common.mak b/platform/common/common.mak index 599f246f..8afe5d3f 100644 --- a/platform/common/common.mak +++ b/platform/common/common.mak @@ -124,6 +124,7 @@ endif # sound SRCS_COMMON += $(R)pico/sound/sound.c SRCS_COMMON += $(R)pico/sound/sn76496.c $(R)pico/sound/ym2612.c +SRCS_COMMON += $(R)pico/sound/emu2413/emu2413.c ifneq "$(ARCH)$(asm_mix)" "arm1" SRCS_COMMON += $(R)pico/sound/mix.c endif diff --git a/platform/common/menu_pico.c b/platform/common/menu_pico.c index 1d46e634..2e0e1279 100644 --- a/platform/common/menu_pico.c +++ b/platform/common/menu_pico.c @@ -501,6 +501,7 @@ static menu_entry e_menu_adv_options[] = mee_onoff ("Emulate YM2612 (FM)", MA_OPT2_ENABLE_YM2612, PicoIn.opt, POPT_EN_FM), mee_onoff ("Disable YM2612 SSG-EG", MA_OPT2_DISABLE_YM_SSG,PicoIn.opt, POPT_DIS_FM_SSGEG), mee_onoff ("Emulate SN76496 (PSG)", MA_OPT2_ENABLE_SN76496,PicoIn.opt, POPT_EN_PSG), + mee_onoff ("Emulate YM2413 (FM)", MA_OPT2_ENABLE_YM2413 ,PicoIn.opt, POPT_EN_YM2413), mee_onoff ("gzip savestates", MA_OPT2_GZIP_STATES, currentConfig.EmuOpt, EOPT_GZIP_SAVES), mee_onoff ("Don't save last used ROM", MA_OPT2_NO_LAST_ROM, currentConfig.EmuOpt, EOPT_NO_AUTOSVCFG), mee_onoff ("Disable idle loop patching",MA_OPT2_NO_IDLE_LOOPS,PicoIn.opt, POPT_DIS_IDLE_DET), diff --git a/platform/common/menu_pico.h b/platform/common/menu_pico.h index d15113fc..0abbfb03 100644 --- a/platform/common/menu_pico.h +++ b/platform/common/menu_pico.h @@ -50,6 +50,7 @@ typedef enum MA_OPT2_ENABLE_YM2612, MA_OPT2_DISABLE_YM_SSG, MA_OPT2_ENABLE_SN76496, + MA_OPT2_ENABLE_YM2413, MA_OPT2_GZIP_STATES, MA_OPT2_NO_LAST_ROM, MA_OPT2_RAMTIMINGS, /* gp2x */ diff --git a/platform/gizmondo/Makefile b/platform/gizmondo/Makefile index 7228be56..31530d79 100644 --- a/platform/gizmondo/Makefile +++ b/platform/gizmondo/Makefile @@ -64,6 +64,7 @@ OBJS += pico/sound/sound.o endif OBJS += pico/sound/mix_asm.o OBJS += pico/sound/sn76496.o pico/sound/ym2612.o +OBJS += pico/sound/emu2413/emu2413.o # zlib OBJS += zlib/gzio.o zlib/inffast.o zlib/inflate.o zlib/inftrees.o zlib/trees.o \ zlib/deflate.o zlib/crc32.o zlib/adler32.o zlib/zutil.o zlib/compress.o diff --git a/platform/gizmondo/menu.c b/platform/gizmondo/menu.c index 1045f47b..47778be2 100644 --- a/platform/gizmondo/menu.c +++ b/platform/gizmondo/menu.c @@ -931,6 +931,7 @@ menu_entry opt2_entries[] = { "Emulate Z80", MB_ONOFF, MA_OPT2_ENABLE_Z80, &PicoIn.opt, 0x00004, 0, 0, 1, 1 }, { "Emulate YM2612 (FM)", MB_ONOFF, MA_OPT2_ENABLE_YM2612, &PicoIn.opt, 0x00001, 0, 0, 1, 1 }, { "Emulate SN76496 (PSG)", MB_ONOFF, MA_OPT2_ENABLE_SN76496,&PicoIn.opt, 0x00002, 0, 0, 1, 1 }, + { "Emulate YM2413 (FM)", MB_ONOFF, MA_OPT2_ENABLE_YM2413, &PicoIn.opt, 0x00020, 0, 0, 1, 1 }, { "Double buffering", MB_ONOFF, MA_OPT2_DBLBUFF, ¤tConfig.EmuOpt, 0x8000, 0, 0, 1, 1 }, { "Wait for V-sync (slow)", MB_ONOFF, MA_OPT2_VSYNC, ¤tConfig.EmuOpt, 0x2000, 0, 0, 1, 1 }, { "gzip savestates", MB_ONOFF, MA_OPT2_GZIP_STATES, ¤tConfig.EmuOpt, 0x0008, 0, 0, 1, 1 }, diff --git a/platform/libretro/libretro.c b/platform/libretro/libretro.c index 0794f555..23cd3df2 100644 --- a/platform/libretro/libretro.c +++ b/platform/libretro/libretro.c @@ -1366,7 +1366,7 @@ void retro_init(void) sceBlock = getVMBlock(); #endif - PicoIn.opt = POPT_EN_STEREO|POPT_EN_FM|POPT_EN_PSG|POPT_EN_Z80 + PicoIn.opt = POPT_EN_STEREO|POPT_EN_FM|POPT_EN_PSG|POPT_EN_Z80|POPT_EN_YM2413 | POPT_EN_MCD_PCM|POPT_EN_MCD_CDDA|POPT_EN_MCD_GFX | POPT_EN_32X|POPT_EN_PWM | POPT_ACC_SPRITES|POPT_DIS_32C_BORDER; diff --git a/platform/psp/menu.c b/platform/psp/menu.c index fc31b8e7..1b714238 100644 --- a/platform/psp/menu.c +++ b/platform/psp/menu.c @@ -1119,6 +1119,7 @@ menu_entry opt2_entries[] = { "Emulate Z80", MB_ONOFF, MA_OPT2_ENABLE_Z80, &PicoIn.opt, 0x00004, 0, 0, 1, 1 }, { "Emulate YM2612 (FM)", MB_ONOFF, MA_OPT2_ENABLE_YM2612, &PicoIn.opt, 0x00001, 0, 0, 1, 1 }, { "Emulate SN76496 (PSG)", MB_ONOFF, MA_OPT2_ENABLE_SN76496, &PicoIn.opt, 0x00002, 0, 0, 1, 1 }, + { "Emulate YM2413 (FM)", MB_ONOFF, MA_OPT2_ENABLE_YM2413, &PicoIn.opt, 0x00020, 0, 0, 1, 1 }, { "gzip savestates", MB_ONOFF, MA_OPT2_GZIP_STATES, ¤tConfig.EmuOpt, 0x00008, 0, 0, 1, 1 }, { "Don't save last used ROM", MB_ONOFF, MA_OPT2_NO_LAST_ROM, ¤tConfig.EmuOpt, 0x00020, 0, 0, 1, 1 }, { "Status line in main menu", MB_ONOFF, MA_OPT2_STATUS_LINE, ¤tConfig.EmuOpt, 0x20000, 0, 0, 1, 1 }, From 2e66d031fe8c5921f3cb15e24ebc704dc343e3de Mon Sep 17 00:00:00 2001 From: hiroshica Date: Wed, 26 Feb 2020 15:42:14 +0900 Subject: [PATCH 0354/1110] correcting the treatment of color number 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit squashed commits: BGのLow/Highの描画を分離した プライオリティを覗いて正しく描画された状態になった --- pico/mode4.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 80 insertions(+), 3 deletions(-) diff --git a/pico/mode4.c b/pico/mode4.c index ca219a0a..df962049 100644 --- a/pico/mode4.c +++ b/pico/mode4.c @@ -19,6 +19,41 @@ static void (*FinalizeLineM4)(int line); static int skip_next_line; static int screen_offset; +#define PLANAR_PIXELL(x,p) \ + t = pack & (0x80808080 >> p); \ + t = ((t >> (7-p)) | (t >> (14-p)) | (t >> (21-p)) | (t >> (28-p))) & 0x0f; \ + pd[x] = pal|t; + +static void TileNormM4Low(int sx, unsigned int pack, int pal) +{ + unsigned char *pd = Pico.est.HighCol + sx; + unsigned int t; + + PLANAR_PIXELL(0, 0) + PLANAR_PIXELL(1, 1) + PLANAR_PIXELL(2, 2) + PLANAR_PIXELL(3, 3) + PLANAR_PIXELL(4, 4) + PLANAR_PIXELL(5, 5) + PLANAR_PIXELL(6, 6) + PLANAR_PIXELL(7, 7) +} + +static void TileFlipM4Low(int sx, unsigned int pack, int pal) +{ + unsigned char *pd = Pico.est.HighCol + sx; + unsigned int t; + + PLANAR_PIXELL(0, 7) + PLANAR_PIXELL(1, 6) + PLANAR_PIXELL(2, 5) + PLANAR_PIXELL(3, 4) + PLANAR_PIXELL(4, 3) + PLANAR_PIXELL(5, 2) + PLANAR_PIXELL(6, 1) + PLANAR_PIXELL(7, 0) +} + #define PLANAR_PIXEL(x,p) \ t = pack & (0x80808080 >> p); \ if (t) { \ @@ -111,7 +146,49 @@ static void draw_sprites(int scanline) } // tilex_ty_prio merged to reduce register pressure -static void draw_strip(const unsigned short *nametab, int dx, int cells, int tilex_ty_prio) +static void draw_strip_low(const unsigned short *nametab, int dx, int cells, int tilex_ty_prio) +{ + int oldcode = -1, blank = -1; // The tile we know is blank + int addr = 0, pal = 0; + + // Draw tiles across screen: + for (; cells > 0; dx += 8, tilex_ty_prio++, cells--) + { + unsigned int pack; + int code; + + code = nametab[tilex_ty_prio & 0x1f]; + if (code == blank) + continue; + /* + if ((code ^ tilex_ty_prio) & 0x1000) // priority differs? + continue; + */ + + if (code != oldcode) { + oldcode = code; + // Get tile address/2: + addr = (code & 0x1ff) << 4; + addr += tilex_ty_prio >> 16; + if (code & 0x0400) + addr ^= 0xe; // Y-flip + + pal = (code>>7) & 0x10; + } + + pack = *(unsigned int *)(PicoMem.vram + addr); /* Get 4 bitplanes / 8 pixels */ + /* + if (pack == 0) { + blank = code; + continue; + } + */ + if (code & 0x0200) TileFlipM4Low(dx, pack, pal); + else TileNormM4Low(dx, pack, pal); + } +} +// tilex_ty_prio merged to reduce register pressure +static void draw_strip_high(const unsigned short *nametab, int dx, int cells, int tilex_ty_prio) { int oldcode = -1, blank = -1; // The tile we know is blank int addr = 0, pal = 0; @@ -184,7 +261,7 @@ static void DrawDisplayM4(int scanline) // low priority tiles if (!(pv->debug_p & PVD_KILL_B)) - draw_strip(nametab, dx, cells, tilex | 0x0000 | (ty << 16)); + draw_strip_low(nametab, dx, cells, tilex | 0x0000 | (ty << 16)); // sprites if (!(pv->debug_p & PVD_KILL_S_LO)) @@ -192,7 +269,7 @@ static void DrawDisplayM4(int scanline) // high priority tiles (use virtual layer switch just for fun) if (!(pv->debug_p & PVD_KILL_A)) - draw_strip(nametab, dx, cells, tilex | 0x1000 | (ty << 16)); + draw_strip_high(nametab, dx, cells, tilex | 0x1000 | (ty << 16)); if (pv->reg[0] & 0x20) { // first column masked, caculate offset to start of line From 95cb712a52125b89daf4f3d7eef69d01b124975c Mon Sep 17 00:00:00 2001 From: hiroshica Date: Wed, 26 Feb 2020 22:13:15 +0900 Subject: [PATCH 0355/1110] adding RG350 platform MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit squashed commits: RG350用のキーマップを作った opkのコマンドライン起動のエラー修正 mingw挑戦途中 --- README.md | 1 + configure | 15 ++++++++-- platform/opendingux/data/default.gcw0.desktop | 8 ++--- platform/opendingux/inputmap.c | 30 +++++++++++++++++++ 4 files changed, 47 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index a5d0ad3a..86da98e1 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ gp2x,wiz,caanoo|open2x with ubuntu arm gcc 4.7|CROSS_COMPILE=arm-linux-gnueabi- opendingux|opendingux|CROSS_COMPILE=mipsel-linux- CFLAGS="-I$TC/usr/include -I$TC/usr/include/SDL" LDFLAGS="--sysroot $TC -L$TC/lib" ./configure --platform=opendingux opendingux|opendingux with ubuntu mips gcc 5.4|CROSS_COMPILE=mipsel-linux-gnu- CFLAGS="-I$TC/usr/include -I$TC/usr/include/SDL" LDFLAGS="-B$TC/usr/lib -B$TC/lib -Wl,-rpath-link=$TC/usr/lib -Wl,-rpath-link=$TC/lib" ./configure --platform=opendingux gcw0|gcw0|CROSS_COMPILE=mipsel-gcw0-linux-uclibc- CFLAGS="-I$TC/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include -I$TC/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include/SDL" LDFLAGS="--sysroot $TC/usr/mipsel-gcw0-linux-uclibc/sysroot" ./configure --platform=gcw0 +rg350|rg350|CROSS_COMPILE=mipsel-linux- CFLAGS="-I$TC/usr/include -I$TC/usr/include/SDL" LDFLAGS="--sysroot $TC -L$TC/lib" ./configure --platform=rg350 For gp2x, wiz, and caanoo you may need to compile libpng first. diff --git a/configure b/configure index c82fe205..6860eb70 100755 --- a/configure +++ b/configure @@ -38,7 +38,7 @@ check_define() # setting options to "yes" or "no" will make that choice default, # "" means "autodetect". -platform_list="generic pandora gp2x wiz caanoo opendingux gcw0 rpi1 rpi2" +platform_list="generic pandora gp2x wiz caanoo opendingux gcw0 rg350 rpi1 rpi2" platform="generic" sound_driver_list="oss alsa sdl" sound_drivers="" @@ -62,7 +62,12 @@ CC="${CC-${CROSS_COMPILE}gcc}" CXX="${CXX-${CROSS_COMPILE}g++}" AS="${AS-${CROSS_COMPILE}as}" STRIP="${STRIP-${CROSS_COMPILE}strip}" -test -n "$SDL_CONFIG" || SDL_CONFIG="`$CC $CFLAGS $LDFLAGS --print-sysroot 2> /dev/null || true`/usr/bin/sdl-config" +SYSROOT=`$CC $CFLAGS $LDFLAGS --print-sysroot 2> /dev/null || true` +test -n "$SDL_CONFIG" || SDL_CONFIG="$(ls $SYSROOT/*bin*/sdl-config 2>/dev/null | grep /bin/sdl-config | head -n 1)" +test -n "$SDL_CONFIG" || SDL_CONFIG="$(ls $SYSROOT/*/*bin*/sdl-config 2>/dev/null | grep /bin/sdl-config | head -n 1)" +#test -n "$SDL_CONFIG" || SDL_CONFIG="$(ls $SYSROOT/*bin*/sdl2-config 2>/dev/null | grep /bin/sdl2-config | head -n 1)" +#test -n "$SDL_CONFIG" || SDL_CONFIG="$(ls $SYSROOT/*/*bin*/sdl2-config 2>/dev/null | grep /bin/sdl2-config | head -n 1)" +SDLVERSION=sdl && echo $SDL_CONFIG | grep -q sdl2 && SDLVERSION=sdl2 MAIN_LDLIBS="$LDLIBS -lm" config_mak="config.mak" @@ -86,9 +91,10 @@ set_platform() ;; generic) ;; - opendingux | gcw0) + opendingux | gcw0 | rg350) sound_drivers="sdl" # both are really an opendingux + CFLAGS="$CFLAGS -D__`echo $platform | tr '[a-z]' '[A-Z]'`__" platform="opendingux" ;; pandora) @@ -376,6 +382,9 @@ if [ "$need_sdl" = "yes" ]; then CFLAGS="$CFLAGS `$SDL_CONFIG --cflags`" MAIN_LDLIBS="`$SDL_CONFIG --libs` $MAIN_LDLIBS" check_sdl `$SDL_CONFIG --libs` || fail "please install libsdl (libsdl1.2-dev)" + if [ "$SDLVERSION" = "sdl2" ]; then + CFLAGS="$CFLAGS -D__USE_SDL2__" + fi fi if check_option -Wno-unused_result; then diff --git a/platform/opendingux/data/default.gcw0.desktop b/platform/opendingux/data/default.gcw0.desktop index 80458bd8..3e17a75e 100644 --- a/platform/opendingux/data/default.gcw0.desktop +++ b/platform/opendingux/data/default.gcw0.desktop @@ -1,9 +1,9 @@ [Desktop Entry] +Type=Application Name=Picodrive Comment=A megadrive/genesis emulator -Exec=PicoDrive -Terminal=false -Type=Application -StartupNotify=true +Exec=PicoDrive %f Icon=megadrive +Terminal=false Categories=emulators; +MimeType=.md;.smd;.bin;.sms;.cue;.32x;.zip;.7z diff --git a/platform/opendingux/inputmap.c b/platform/opendingux/inputmap.c index 0398fc68..8c4ba6a0 100644 --- a/platform/opendingux/inputmap.c +++ b/platform/opendingux/inputmap.c @@ -37,6 +37,7 @@ const struct menu_keymap in_sdl_key_map[] = { SDLK_BACKSPACE, PBTN_R }, }; +#if !defined(__RG350__) const char * const in_sdl_key_names[SDLK_LAST] = { [SDLK_UP] = "UP", [SDLK_DOWN] = "DOWN", @@ -52,4 +53,33 @@ const char * const in_sdl_key_names[SDLK_LAST] = { [SDLK_ESCAPE] = "SELECT", [SDLK_POWER] = "POWER", [SDLK_PAUSE] = "LOCK", + + [SDLK_PAGEUP] = "L2", + [SDLK_PAGEDOWN] = "R2", + [SDLK_KP_DIVIDE] = "L3", + [SDLK_KP_PERIOD] = "R3", }; +#else +/* RG 350 */ +const char * const in_sdl_key_names[SDLK_LAST] = { + [SDLK_UP] = "UP", + [SDLK_DOWN] = "DOWN", + [SDLK_LEFT] = "LEFT", + [SDLK_RIGHT] = "RIGHT", + [SDLK_LCTRL] = "A", + [SDLK_LALT] = "B", + [SDLK_SPACE] = "X", + [SDLK_LSHIFT] = "Y", + [SDLK_TAB] = "L", + [SDLK_BACKSPACE] = "R", + [SDLK_RETURN] = "START", + [SDLK_ESCAPE] = "SELECT", + [SDLK_HOME] = "POWER", + [SDLK_PAUSE] = "LOCK", + + [SDLK_PAGEUP] = "L2", + [SDLK_PAGEDOWN] = "R2", + [SDLK_KP_DIVIDE] = "L3", + [SDLK_KP_PERIOD] = "R3", +}; +#endif From a97dd5cded0d9e4b0391054e7c839e368eabe2c3 Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 29 Jul 2020 20:41:07 +0200 Subject: [PATCH 0356/1110] configure, fix for newer gcc --- configure | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/configure b/configure index 6860eb70..786d249a 100755 --- a/configure +++ b/configure @@ -278,7 +278,7 @@ esac # basic compiler test cat > $TMPC < $TMPC < - int main(void) { uncompress(0, 0, 0, 0); } + int main (int argc, char *argv[]) { uncompress(0, 0, 0, 0); } EOF compile_binary "$@" } @@ -298,7 +298,7 @@ check_libpng() { cat > $TMPC < - void main() { png_init_io(0, 0); } + int main (int argc, char *argv[]) { png_init_io(0, 0); } EOF # compile_binary compile_object @@ -309,7 +309,7 @@ check_oss() cat > $TMPC < #include - void main() { int a=0; ioctl(0, SNDCTL_DSP_SETFMT, &a); } + int main (int argc, char *argv[]) { int a=0; ioctl(0, SNDCTL_DSP_SETFMT, &a); } EOF compile_binary } @@ -318,7 +318,7 @@ check_alsa() { cat > $TMPC < - void main() { snd_pcm_open(0, 0, 0, 0); } + int main (int argc, char *argv[]) { snd_pcm_open(0, 0, 0, 0); } EOF compile_binary "$@" } @@ -327,7 +327,7 @@ check_sdl() { cat > $TMPC < - void main() { SDL_OpenAudio(0, 0); } + int main (int argc, char *argv[]) { SDL_OpenAudio(0, 0); } EOF compile_binary "$@" } @@ -336,7 +336,7 @@ check_libavcodec() { cat > $TMPC < - void main() { avcodec_decode_audio3(0, 0, 0, 0); } + int main (int argc, char *argv[]) { avcodec_decode_audio3(0, 0, 0, 0); } EOF compile_object "$@" } From b74303b1a74839f5de239ae41c69d6b271ab73bd Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 29 Jul 2020 20:47:16 +0200 Subject: [PATCH 0357/1110] vdp mode 4, optimisation --- pico/mode4.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/pico/mode4.c b/pico/mode4.c index df962049..cb1e8e0c 100644 --- a/pico/mode4.c +++ b/pico/mode4.c @@ -19,6 +19,12 @@ static void (*FinalizeLineM4)(int line); static int skip_next_line; static int screen_offset; +static void TileBGM4(int sx, int pal) +{ + u32 *pd = (u32 *)(Pico.est.HighCol + sx); + pd[0] = pd[1] = pal ? 0x10101010 : 0; +} + #define PLANAR_PIXELL(x,p) \ t = pack & (0x80808080 >> p); \ t = ((t >> (7-p)) | (t >> (14-p)) | (t >> (21-p)) | (t >> (28-p))) & 0x0f; \ @@ -104,7 +110,7 @@ static void draw_sprites(int scanline) if (pv->reg[0] & 8) xoff = 0; - if (!(PicoIn.opt & POPT_DIS_32C_BORDER)) + if (!FinalizeLineM4 && !(PicoIn.opt & POPT_DIS_32C_BORDER)) xoff += 32; sat = (unsigned char *)PicoMem.vram + ((pv->reg[5] & 0x7e) << 7); @@ -145,10 +151,11 @@ static void draw_sprites(int scanline) } } + // tilex_ty_prio merged to reduce register pressure static void draw_strip_low(const unsigned short *nametab, int dx, int cells, int tilex_ty_prio) { - int oldcode = -1, blank = -1; // The tile we know is blank + int oldcode = -1; int addr = 0, pal = 0; // Draw tiles across screen: @@ -158,12 +165,6 @@ static void draw_strip_low(const unsigned short *nametab, int dx, int cells, int int code; code = nametab[tilex_ty_prio & 0x1f]; - if (code == blank) - continue; - /* - if ((code ^ tilex_ty_prio) & 0x1000) // priority differs? - continue; - */ if (code != oldcode) { oldcode = code; @@ -177,14 +178,9 @@ static void draw_strip_low(const unsigned short *nametab, int dx, int cells, int } pack = *(unsigned int *)(PicoMem.vram + addr); /* Get 4 bitplanes / 8 pixels */ - /* - if (pack == 0) { - blank = code; - continue; - } - */ - if (code & 0x0200) TileFlipM4Low(dx, pack, pal); - else TileNormM4Low(dx, pack, pal); + if (pack == 0) TileBGM4(dx, pal); + else if (code & 0x0200) TileFlipM4Low(dx, pack, pal); + else TileNormM4Low(dx, pack, pal); } } // tilex_ty_prio merged to reduce register pressure From 1f49b7503216a370fed12ff3128ea76eba47ae25 Mon Sep 17 00:00:00 2001 From: kub Date: Sun, 2 Aug 2020 23:17:57 +0200 Subject: [PATCH 0358/1110] SDL UI, fix SDL input and SDL window output mode for osx compile NB for osx >= 10.14 you need SDL >= rev 13688 (ATM only available from SDL repo) --- Makefile | 4 ++++ pico/pico.h | 1 + platform/common/main.c | 5 ++++- platform/common/plat_sdl.c | 15 +++++++++------ platform/opendingux/inputmap.c | 1 + 5 files changed, 19 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index d5953bef..31a23ab8 100644 --- a/Makefile +++ b/Makefile @@ -206,6 +206,10 @@ include platform/common/common.mak OBJS += $(OBJS_COMMON) CFLAGS += $(addprefix -D,$(DEFINES)) +ifneq (,$(findstring sdl,$(OBJS))) +CFLAGS += -DUSE_SDL +endif + ifneq ($(findstring gcc,$(CC)),) LDFLAGS += -Wl,-Map=$(TARGET).map endif diff --git a/pico/pico.h b/pico/pico.h index 5dac5c29..4cc29433 100644 --- a/pico/pico.h +++ b/pico/pico.h @@ -10,6 +10,7 @@ #ifndef PICO_H #define PICO_H +#include // [u]int_t #include // size_t #ifdef __cplusplus diff --git a/platform/common/main.c b/platform/common/main.c index e7b04466..6acdf531 100644 --- a/platform/common/main.c +++ b/platform/common/main.c @@ -10,6 +10,9 @@ #include #include #include +#ifdef USE_SDL +#include +#endif #include "../libpicofe/input.h" #include "../libpicofe/plat.h" @@ -83,12 +86,12 @@ int main(int argc, char *argv[]) plat_target_init(); plat_init(); + menu_init(); emu_prep_defconfig(); // depends on input emu_read_config(NULL, 0); emu_init(); - menu_init(); #ifdef GPERF ProfilerStart("gperf.out"); diff --git a/platform/common/plat_sdl.c b/platform/common/plat_sdl.c index 276a0c61..92da9f40 100644 --- a/platform/common/plat_sdl.c +++ b/platform/common/plat_sdl.c @@ -75,15 +75,15 @@ const struct menu_keymap in_sdl_joy_map[] __attribute__((weak)) = { SDLK_WORLD_3, PBTN_MA3 }, }; -extern const char * const in_sdl_key_names[] __attribute__((weak)); +const char *const *in_sdl_key_names_p __attribute__((weak)) = NULL; -static const struct in_pdata in_sdl_platform_data = { + +static struct in_pdata in_sdl_platform_data = { .defbinds = in_sdl_defbinds, .key_map = in_sdl_key_map, .kmap_size = sizeof(in_sdl_key_map) / sizeof(in_sdl_key_map[0]), .joy_map = in_sdl_joy_map, .jmap_size = sizeof(in_sdl_joy_map) / sizeof(in_sdl_joy_map[0]), - .key_names = in_sdl_key_names, }; /* YUV stuff */ @@ -177,8 +177,10 @@ void plat_video_flip(void) if (SDL_MUSTLOCK(plat_sdl_screen)) SDL_UnlockSurface(plat_sdl_screen); SDL_Flip(plat_sdl_screen); - g_screen_ptr = plat_sdl_screen->pixels; - PicoDrawSetOutBuf(g_screen_ptr, g_screen_ppitch * 2); + if (g_screen_ptr != shadow_fb) { + g_screen_ptr = plat_sdl_screen->pixels; + plat_video_toggle_renderer(0, 0); + } } } @@ -244,8 +246,8 @@ void plat_video_loop_prepare(void) if (SDL_MUSTLOCK(plat_sdl_screen)) SDL_LockSurface(plat_sdl_screen); g_screen_ptr = plat_sdl_screen->pixels; + plat_video_toggle_renderer(0, 0); } - PicoDrawSetOutBuf(g_screen_ptr, g_screen_ppitch * 2); } void plat_early_init(void) @@ -292,6 +294,7 @@ void plat_init(void) g_screen_ppitch = 320; g_screen_ptr = shadow_fb; + in_sdl_platform_data.key_names = in_sdl_key_names_p; in_sdl_init(&in_sdl_platform_data, plat_sdl_event_handler); in_probe(); diff --git a/platform/opendingux/inputmap.c b/platform/opendingux/inputmap.c index 8c4ba6a0..388d5e01 100644 --- a/platform/opendingux/inputmap.c +++ b/platform/opendingux/inputmap.c @@ -83,3 +83,4 @@ const char * const in_sdl_key_names[SDLK_LAST] = { [SDLK_KP_PERIOD] = "R3", }; #endif +const char *const *in_sdl_key_names_p = in_sdl_key_names; From 352479001c82fc5c9abbcc1501c6e78ddc691a14 Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 4 Aug 2020 22:24:56 +0200 Subject: [PATCH 0359/1110] vdp, optimisation for 8bit renderers --- pico/draw.c | 9 ++++++++- pico/mode4.c | 18 ++++++++---------- platform/gp2x/emu.c | 2 +- platform/linux/emu.c | 2 +- 4 files changed, 18 insertions(+), 13 deletions(-) diff --git a/pico/draw.c b/pico/draw.c index 43bbea76..ed818546 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -1582,7 +1582,10 @@ static void FinalizeLine8bit(int sh, int line, struct PicoEState *est) len = 256; } - if (!sh && (est->rendstatus & PDRAW_SONIC_MODE)) { + if (DrawLineDestBase == HighColBase) { + if (!sh && (est->rendstatus & PDRAW_SONIC_MODE)) + blockcpy_or(pd+8, est->HighCol+8, len, est->SonicPalCount*0x40); + } else if (!sh && (est->rendstatus & PDRAW_SONIC_MODE)) { // select active backup palette blockcpy_or(pd, est->HighCol+8, len, est->SonicPalCount*0x40); } else { @@ -1892,6 +1895,10 @@ void PicoDrawSetOutFormat(pdso_t which, int use_32x_line_mode) void PicoDrawSetOutBufMD(void *dest, int increment) { + if (FinalizeLine == FinalizeLine8bit && increment == 328) { + // kludge for no-copy mode + PicoDrawSetInternalBuf(dest, increment); + } if (dest != NULL) { DrawLineDestBase = dest; DrawLineDestIncrement = increment; diff --git a/pico/mode4.c b/pico/mode4.c index cb1e8e0c..fa4407b5 100644 --- a/pico/mode4.c +++ b/pico/mode4.c @@ -17,7 +17,7 @@ static void (*FinalizeLineM4)(int line); static int skip_next_line; -static int screen_offset; +static int screen_offset, line_offset; static void TileBGM4(int sx, int pal) { @@ -110,8 +110,7 @@ static void draw_sprites(int scanline) if (pv->reg[0] & 8) xoff = 0; - if (!FinalizeLineM4 && !(PicoIn.opt & POPT_DIS_32C_BORDER)) - xoff += 32; + xoff += line_offset; sat = (unsigned char *)PicoMem.vram + ((pv->reg[5] & 0x7e) << 7); if (pv->reg[1] & 2) { @@ -252,8 +251,7 @@ static void DrawDisplayM4(int scanline) if (dx != 8) cells++; // have hscroll, need to draw 1 cell more dx += cellskip << 3; - if (!FinalizeLineM4 && !(PicoIn.opt & POPT_DIS_32C_BORDER)) - dx += 32; + dx += line_offset; // low priority tiles if (!(pv->debug_p & PVD_KILL_B)) @@ -365,18 +363,18 @@ static void FinalizeLine8bitM4(int line) { unsigned char *pd = Pico.est.DrawLineDest; - if (!(PicoIn.opt & POPT_DIS_32C_BORDER)) - pd += 32; - - memcpy(pd, Pico.est.HighCol + 8, 256); + if (HighColBase != DrawLineDestBase) + memcpy(pd + line_offset, Pico.est.HighCol + line_offset + 8, 256); } void PicoDrawSetOutputMode4(pdso_t which) { + line_offset = PicoIn.opt & POPT_DIS_32C_BORDER ? 0 : 32; switch (which) { case PDF_8BIT: FinalizeLineM4 = FinalizeLine8bitM4; break; - case PDF_RGB555: FinalizeLineM4 = FinalizeLineRGB555M4; break; + case PDF_RGB555: FinalizeLineM4 = FinalizeLineRGB555M4; + line_offset = 0 /* done in FinalizeLine */; break; default: FinalizeLineM4 = NULL; PicoDrawSetInternalBuf(Pico.est.Draw2FB, 328); break; } diff --git a/platform/gp2x/emu.c b/platform/gp2x/emu.c index 1deb84da..42e34ee3 100644 --- a/platform/gp2x/emu.c +++ b/platform/gp2x/emu.c @@ -364,7 +364,7 @@ void pemu_finalize_frame(const char *fps, const char *notice) localPalSize = make_local_pal(1); // a hack for VR if (PicoIn.AHW & PAHW_SVP) - memset32((int *)(Pico.est.Draw2FB+328*8+328*223), 0xe0e0e0e0, 328); + memset32((int *)(Pico.est.Draw2FB+328*8+328*223), 0xe0e0e0e0, 328/4); // do actual copy vidcpyM2(g_screen_ptr, Pico.est.Draw2FB+328*8, !(Pico.video.reg[12] & 1), !(PicoIn.opt & POPT_DIS_32C_BORDER)); diff --git a/platform/linux/emu.c b/platform/linux/emu.c index 4ef08a79..8c86471d 100644 --- a/platform/linux/emu.c +++ b/platform/linux/emu.c @@ -107,7 +107,7 @@ static void apply_renderer(void) case RT_8BIT_ACC: PicoIn.opt &= ~POPT_ALT_RENDERER; PicoDrawSetOutFormat(PDF_8BIT, 0); - PicoDrawSetOutBuf(Pico.est.Draw2FB + 8, 328); + PicoDrawSetOutBuf(Pico.est.Draw2FB, 328); break; case RT_8BIT_FAST: PicoIn.opt |= POPT_ALT_RENDERER; From dc56ca2edef3beb46932ca9aba83d99a67c5bcd2 Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 17 Sep 2020 23:47:34 +0200 Subject: [PATCH 0360/1110] vdp fifo, tentative fix for hanging DMA --- pico/videoport.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pico/videoport.c b/pico/videoport.c index f324f704..cbcc60d3 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -64,7 +64,7 @@ static struct VdpFIFO { // XXX this must go into save file! // queued FIFO transfers, ...x = index, ...l = queue length // each entry has 2 values: [n]>>3 = #writes, [n]&7 = flags (FQ_*) unsigned int fifo_queue[8], fifo_qx, fifo_ql; - unsigned int fifo_total; // total# of pending FIFO entries (w/o BGDMA) + int fifo_total; // total# of pending FIFO entries (w/o BGDMA) unsigned short fifo_slot; // last executed slot in current scanline unsigned short fifo_maxslot;// #slots in scanline @@ -85,7 +85,7 @@ static __inline int AdvanceFIFOEntry(struct VdpFIFO *vf, struct PicoVideo *pv, i if (l > cnt) l = cnt; if (!(vf->fifo_queue[vf->fifo_qx] & FQ_BGDMA)) - vf->fifo_total -= ((cnt & b) + l) >> b; + if ((vf->fifo_total -= ((cnt & b) + l) >> b) < 0) vf->fifo_total = 0; cnt -= l; // if entry has been processed... From 48b648070b8180abcde608bd3247642f58ea5c4c Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 17 Sep 2020 23:50:18 +0200 Subject: [PATCH 0361/1110] sh2 drc, fix symbol clash --- cpu/sh2/compiler.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 3c62f13a..4ffe8e58 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -3004,12 +3004,12 @@ static uint32_t REGPARM(2) sh2_drc_divu32(uint32_t dv, uint32_t ds) // bad case: use the sh2 algo to get the right result int q = 0, t = 0, s = 16; while (s--) { - uint32_t _ = dv>>31; + uint32_t v = dv>>31; dv = (dv<<1) | t; - t = _; - _ = dv; - if (q) dv += ds, q = dv < _; - else dv -= ds, q = !(dv < _); + t = v; + v = dv; + if (q) dv += ds, q = dv < v; + else dv -= ds, q = !(dv < v); q ^= t, t = !q; } return (dv<<1) | t; @@ -3030,12 +3030,12 @@ static uint32_t REGPARM(3) sh2_drc_divu64(uint32_t dh, uint32_t *dl, uint32_t ds uint64_t dv = *dl | ((uint64_t)dh << 32); int q = 0, t = 0, s = 32; while (s--) { - uint64_t _ = dv>>63; + uint64_t v = dv>>63; dv = (dv<<1) | t; - t = _; - _ = dv; - if (q) dv += ((uint64_t)ds << 32), q = dv < _; - else dv -= ((uint64_t)ds << 32), q = !(dv < _); + t = v; + v = dv; + if (q) dv += ((uint64_t)ds << 32), q = dv < v; + else dv -= ((uint64_t)ds << 32), q = !(dv < v); q ^= t, t = !q; } *dl = (dv<<1) | t; @@ -3058,12 +3058,12 @@ static uint32_t REGPARM(2) sh2_drc_divs32(int32_t dv, int32_t ds) // bad case: use the sh2 algo to get the right result int m = (uint32_t)ds>>31, q = (uint32_t)dv>>31, t = m^q, s = 16; while (s--) { - uint32_t _ = (uint32_t)dv>>31; + uint32_t v = (uint32_t)dv>>31; dv = (dv<<1) | t; - t = _; - _ = dv; - if (m^q) dv += ds, q = (uint32_t)dv < _; - else dv -= ds, q = !((uint32_t)dv < _); + t = v; + v = dv; + if (m^q) dv += ds, q = (uint32_t)dv < v; + else dv -= ds, q = !((uint32_t)dv < v); q ^= m^t, t = !(m^q); } return (dv<<1) | t; @@ -3089,12 +3089,12 @@ static uint32_t REGPARM(3) sh2_drc_divs64(int32_t dh, uint32_t *dl, int32_t ds) uint64_t dv = *dl | ((uint64_t)dh << 32); int m = (uint32_t)ds>>31, q = (uint64_t)dv>>63, t = m^q, s = 32; while (s--) { - int64_t _ = (uint64_t)dv>>63; + int64_t v = (uint64_t)dv>>63; dv = (dv<<1) | t; - t = _; - _ = dv; - if (m^q) dv += ((uint64_t)ds << 32), q = dv < _; - else dv -= ((uint64_t)ds << 32), q = !(dv < _); + t = v; + v = dv; + if (m^q) dv += ((uint64_t)ds << 32), q = dv < v; + else dv -= ((uint64_t)ds << 32), q = !(dv < v); q ^= m^t, t = !(m^q); } *dl = (dv<<1) | t; From 056f101ff8fb8123c71ca3e37991ed64d9a65371 Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 17 Sep 2020 23:55:55 +0200 Subject: [PATCH 0362/1110] sh2 drc, standalone testing tool --- tools/drctest.c | 183 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 183 insertions(+) create mode 100644 tools/drctest.c diff --git a/tools/drctest.c b/tools/drctest.c new file mode 100644 index 00000000..2eca4222 --- /dev/null +++ b/tools/drctest.c @@ -0,0 +1,183 @@ +// gcc drctest.c cpu/drc/cmn.c cpu/sh2/mame/sh2dasm.c platform/libpicofe/linux/host_dasm.c platform/libpicofe/linux/plat.c -I. -DDRC_SH2 -g -O -o drctest -lbfd--multiarch -lopcodes--multiarch -liberty -D<__platform__> + +#include +#include + +#include "cpu/sh2/compiler.c" + +struct Pico Pico; +SH2 sh2s[2]; +struct Pico32xMem _Pico32xMem, *Pico32xMem = &_Pico32xMem; +struct Pico32x Pico32x; +char **g_argv; + +void memset32(void *dest_in, int c, int count) { memset(dest_in, c, 4*count); } + +void cache_flush_d_inval_i(void *start_addr, void *end_addr) { } +void *plat_mem_get_for_drc(size_t size) { return NULL; } +void *p32x_sh2_get_mem_ptr(u32 a, u32 *mask, SH2 *sh2) { return NULL; } + +void REGPARM(3) p32x_sh2_write8 (u32 a, u32 d, SH2 *s) { } +void REGPARM(3) p32x_sh2_write16(u32 a, u32 d, SH2 *s) { } +void REGPARM(3) p32x_sh2_write32(u32 a, u32 d, SH2 *s) { } + +u32 REGPARM(2) p32x_sh2_read8 (u32 a, SH2 *s) { } +u32 REGPARM(2) p32x_sh2_read16(u32 a, SH2 *s) { } +u32 REGPARM(2) p32x_sh2_read32(u32 a, SH2 *s) { } + +u32 REGPARM(3) p32x_sh2_poll_memory8 (u32 a, u32 d, SH2 *s) { } +u32 REGPARM(3) p32x_sh2_poll_memory16(u32 a, u32 d, SH2 *s) { } +u32 REGPARM(3) p32x_sh2_poll_memory32(u32 a, u32 d, SH2 *s) { } + +int main(int argc, char *argv[]) +{ + FILE *f; + u32 (*testfunc)(u32), ret; + int arg0, arg1, arg2, arg3, sr; + host_arg2reg(arg0, 0); + host_arg2reg(arg1, 1); + host_arg2reg(arg2, 2); + host_arg2reg(arg3, 3); + + g_argv = argv; + sh2_drc_init(sh2s); + f = fopen("utils.bin", "w"); + fwrite(tcache, 1, 4096, f); + fclose(f); + + tcache_ptr = tcache_ring[0].base; + u8 *p1 = tcache_ptr; + emith_jump_patchable(0); + u8 *p2 = tcache_ptr; + emith_jump_cond_patchable(DCOND_GE, 0); + emith_move_r_r(0, 1); + emith_move_r_r(0, 2); + u8 *p3 = tcache_ptr; + emith_move_r_r(0, 3); + emith_move_r_r(0, 4); + emith_move_r_r(0, 5); + + u8 *p4 = tcache_ptr; + emith_move_r_imm_s8_patchable(arg0, 0); + emith_move_r_r(0, 6); + emith_flush(); + + emith_jump_patch(p1, tcache_ptr, NULL); + emith_jump_patch(p2, tcache_ptr, NULL); + emith_jump_at(p3, tcache_ptr); + + emith_move_r_imm_s8_patch(p4, 42); + + emith_read8_r_r_offs(arg0, arg1, 100); + emith_read8_r_r_offs(arg0, arg1, 1000); + emith_read8_r_r_offs(arg0, arg1, 10000); + emith_read8_r_r_offs(arg0, arg1, -100); + emith_read8_r_r_offs(arg0, arg1, -1000); + emith_read8_r_r_offs(arg0, arg1, -10000); + + emith_read16_r_r_offs(arg0, arg1, 4); + emith_read_r_r_offs(arg0, arg1, 4); + emith_read8s_r_r_offs(arg0, arg1, 4); + emith_read16s_r_r_offs(arg0, arg1, 4); + + emith_write_r_r_offs(arg0, arg1, 4); + + emith_add_r_r_r_lsl(arg0, arg1, arg2, 2); + emith_move_r_r(0, 0); + + emith_mula_s64(arg0, arg1, arg2, arg3); + emith_move_r_r(0, 0); + + emith_clear_msb(arg0, arg1, 8); + emith_clear_msb(arg0, arg1, 16); + emith_clear_msb(arg0, arg1, 24); + + emith_sext(arg0, arg1, 8); + emith_sext(arg0, arg1, 16); + emith_sext(arg0, arg1, 24); + emith_move_r_r(0, 0); + + emith_lsl(arg0, arg1, 24); + emith_lsr(arg0, arg1, 24); + emith_asr(arg0, arg1, 24); + emith_rol(arg0, arg1, 24); + emith_move_r_r(0, 0); + + emith_lslf(arg0, arg1, 24); + emith_lsrf(arg0, arg1, 24); + emith_asrf(arg0, arg1, 24); + emith_rolf(arg0, arg1, 24); + emith_rorf(arg0, arg1, 24); + emith_move_r_r(0, 0); + emith_rolcf(arg0); + emith_rorcf(arg0); + + emith_negcf_r_r(arg0, arg1); + emith_move_r_r(0, 0); + + emith_eor_r_r_imm(arg0, arg1, 100); + emith_eor_r_r_imm(arg0, arg1, 10000); + emith_eor_r_r_imm(arg0, arg1, -100); + emith_eor_r_r_imm(arg0, arg1, -10000); + emith_move_r_r(0, 0); + + emith_move_r_imm(arg0, 100); + emith_move_r_imm(arg0, 1000); + emith_move_r_imm(arg0, 10000); + emith_move_r_imm(arg0, -100); + emith_move_r_imm(arg0, -1000); + emith_move_r_imm(arg0, -10000); + emith_move_r_r(0, 0); + + emith_move_r_ptr_imm(arg0, 0x1234567887654321ULL); + emith_move_r_ptr_imm(arg1, 0x8765432112345678ULL); + emith_move_r_ptr_imm(arg2, 0x0011223344556677ULL); + emith_move_r_ptr_imm(arg3, 0x7766554433221100ULL); + emith_move_r_r(0, 0); + + emith_tpop_carry(29, 0); + emith_tpush_carry(29, 0); + emith_move_r_r(0, 0); + + emith_carry_to_t(29, 0); + emith_t_to_carry(29, 0); + emith_move_r_r(0, 0); + + emith_write_sr(29, arg0); + emith_move_r_r(0, 0); + + emith_sh2_delay_loop(11, arg0); + emith_move_r_r(0, 0); + emith_sh2_delay_loop(11, -1); + emith_move_r_r(0, 0); + + emith_sh2_div1_step(arg0, arg1, 29); + emith_move_r_r(0, 0); + + emith_sh2_macl(arg0, arg1, arg2, arg3, 29); + emith_move_r_r(0, 0); + emith_sh2_macw(arg0, arg1, arg2, arg3, 29); + emith_move_r_r(0, 0); + + emith_flush(); + emith_pool_commit(1); + + emith_ret(); + + f = fopen("test.bin", "w"); + fwrite(tcache_ring[0].base, 1, tcache_ptr - tcache_ring[0].base, f); + fclose(f); + + do_host_disasm(0); + +#if 0 + testfunc = (void *)tcache_next[0]; + tcache_ptr = tcache_next[0]; + emith_move_r_r(RET_REG, arg0); + emith_ret(); + host_instructions_updated(tcache_next[0], tcache_ptr); + ret = testfunc(0x00000001); + printf("ret %x\n",ret); +#endif +} + From bebe75ddc8dcfac2a2550ef279bec61fbdc61d87 Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 17 Sep 2020 23:57:55 +0200 Subject: [PATCH 0363/1110] update author info --- AUTHORS | 2 ++ platform/common/menu_pico.c | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index 36dd861b..df62245c 100644 --- a/AUTHORS +++ b/AUTHORS @@ -29,6 +29,8 @@ Homepage: http://www.mame.net/ Eke-Eke CD graphics processor and CD controller implementation (from Genesis Plus GX) +Irixxxx +Improvements to dynamic recompilers, 32X emulation, ARM asm, sound, VDP Additional thanks ----------------- diff --git a/platform/common/menu_pico.c b/platform/common/menu_pico.c index 2e0e1279..b96eb823 100644 --- a/platform/common/menu_pico.c +++ b/platform/common/menu_pico.c @@ -941,7 +941,6 @@ static const char credits[] = "MAME devs: SH2, YM2612 and SN76496 cores\n" "Eke, Stef: some Sega CD code\n" "Inder, ketchupgun: graphics\n" - "Irixxxx: SH2 drc improvements\n" #ifdef __GP2X__ "Squidge: mmuhack\n" "Dzz: ARM940 sample\n" From 627648e40848457b9ce172a8ca54f92e3fa9e2ec Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 18 Sep 2020 00:02:45 +0200 Subject: [PATCH 0364/1110] vdp, test code for 8 bit fast renderer --- pico/32x/draw.c | 2 +- pico/draw2.c | 157 +++++++++++++++++++++++++++++-------------- platform/linux/emu.c | 6 +- 3 files changed, 112 insertions(+), 53 deletions(-) diff --git a/pico/32x/draw.c b/pico/32x/draw.c index 45c27260..991abaec 100644 --- a/pico/32x/draw.c +++ b/pico/32x/draw.c @@ -330,7 +330,7 @@ void PicoDrawSetOutFormat32x(pdso_t which, int use_32x_line_mode) } else { // use the same layout as alt renderer PicoDrawSetInternalBuf(NULL, 0); - PicoDrawSetOutBufMD(Pico.est.Draw2FB + 8, 328); + PicoDrawSetOutBufMD(Pico.est.Draw2FB, 328); } if (use_32x_line_mode) diff --git a/pico/draw2.c b/pico/draw2.c index 91069770..e2549526 100644 --- a/pico/draw2.c +++ b/pico/draw2.c @@ -11,6 +11,9 @@ #define START_ROW 0 // which row of tiles to start rendering at? #define END_ROW 28 // ..end +#define VSRAM 0 // 2-cell vscroll (broken for line based hscroll) +#define INTERLACE 0 // interlace mode 2 + #define TILE_ROWS END_ROW-START_ROW // note: this is not implemented in ARM asm @@ -22,8 +25,8 @@ static unsigned char PicoDraw2FB_[(8+320) * (8+240+8) + 8]; -static int HighCache2A[41*(TILE_ROWS+1)+1+1]; // caches for high layers -static int HighCache2B[41*(TILE_ROWS+1)+1+1]; +static int HighCache2A[2*41*(TILE_ROWS+1)+1+1]; // caches for high layers +static int HighCache2B[2*41*(TILE_ROWS+1)+1+1]; unsigned short *PicoCramHigh=PicoMem.cram; // pointer to CRAM buff (0x40 shorts), converted to native device color (works only with 16bit for now) void (*PicoPrepareCram)()=0; // prepares PicoCramHigh for renderer to use @@ -40,12 +43,15 @@ void DrawSpriteFull(unsigned int *sprite, struct PicoEState *est); #else -static int TileXnormYnorm(unsigned char *pd,int addr,unsigned char pal) +static int TileXnormYnorm(unsigned char *pd,int addr,unsigned char pal, struct PicoVideo *pvid) { unsigned int pack=0; unsigned int t=0, blank = 1; - int i; + int i, inc=2; - for(i=8; i; i--, addr+=2, pd += LINE_WIDTH) { +#if INTERLACE + if ((pvid->reg[12]&6) == 6) inc = 4; +#endif + for(i=8; i; i--, addr+=inc, pd += LINE_WIDTH) { pack=*(unsigned int *)(PicoMem.vram+addr); // Get 8 pixels if(!pack) continue; @@ -63,12 +69,15 @@ static int TileXnormYnorm(unsigned char *pd,int addr,unsigned char pal) return blank; // Tile blank? } -static int TileXflipYnorm(unsigned char *pd,int addr,unsigned char pal) +static int TileXflipYnorm(unsigned char *pd,int addr,unsigned char pal, struct PicoVideo *pvid) { unsigned int pack=0; unsigned int t=0, blank = 1; - int i; + int i, inc=2; - for(i=8; i; i--, addr+=2, pd += LINE_WIDTH) { +#if INTERLACE + if ((pvid->reg[12]&6) == 6) inc = 4; +#endif + for(i=8; i; i--, addr+=inc, pd += LINE_WIDTH) { pack=*(unsigned int *)(PicoMem.vram+addr); // Get 8 pixels if(!pack) continue; @@ -85,13 +94,16 @@ static int TileXflipYnorm(unsigned char *pd,int addr,unsigned char pal) return blank; // Tile blank? } -static int TileXnormYflip(unsigned char *pd,int addr,unsigned char pal) +static int TileXnormYflip(unsigned char *pd,int addr,unsigned char pal, struct PicoVideo *pvid) { unsigned int pack=0; unsigned int t=0, blank = 1; - int i; + int i, inc=2; +#if INTERLACE + if ((pvid->reg[12]&6) == 6) inc = 4, addr += 16; +#endif addr+=14; - for(i=8; i; i--, addr-=2, pd += LINE_WIDTH) { + for(i=8; i; i--, addr-=inc, pd += LINE_WIDTH) { pack=*(unsigned int *)(PicoMem.vram+addr); // Get 8 pixels if(!pack) continue; @@ -109,13 +121,16 @@ static int TileXnormYflip(unsigned char *pd,int addr,unsigned char pal) return blank; // Tile blank? } -static int TileXflipYflip(unsigned char *pd,int addr,unsigned char pal) +static int TileXflipYflip(unsigned char *pd,int addr,unsigned char pal, struct PicoVideo *pvid) { unsigned int pack=0; unsigned int t=0, blank = 1; - int i; + int i, inc=2; +#if INTERLACE + if ((pvid->reg[12]&6) == 6) inc = 4, addr += 16; +#endif addr+=14; - for(i=8; i; i--, addr-=2, pd += LINE_WIDTH) { + for(i=8; i; i--, addr-=inc, pd += LINE_WIDTH) { pack=*(unsigned int *)(PicoMem.vram+addr); // Get 8 pixels if(!pack) continue; @@ -187,10 +202,10 @@ static void DrawWindowFull(int start, int end, int prio, struct PicoEState *est) pal=(unsigned char)((code>>9)&0x30); switch((code>>11)&3) { - case 0: zero=TileXnormYnorm(scrpos+(tilex<<3),addr,pal); break; - case 1: zero=TileXflipYnorm(scrpos+(tilex<<3),addr,pal); break; - case 2: zero=TileXnormYflip(scrpos+(tilex<<3),addr,pal); break; - case 3: zero=TileXflipYflip(scrpos+(tilex<<3),addr,pal); break; + case 0: zero=TileXnormYnorm(scrpos+(tilex<<3),addr,pal,pvid); break; + case 1: zero=TileXflipYnorm(scrpos+(tilex<<3),addr,pal,pvid); break; + case 2: zero=TileXnormYflip(scrpos+(tilex<<3),addr,pal,pvid); break; + case 3: zero=TileXflipYflip(scrpos+(tilex<<3),addr,pal,pvid); break; } if(zero) blank=code; // We know this tile is blank now } @@ -204,7 +219,7 @@ static void DrawLayerFull(int plane, int *hcache, int planestart, int planeend, struct PicoEState *est) { struct PicoVideo *pvid=&Pico.video; - static char shift[4]={5,6,6,7}; // 32,64 or 128 sized tilemaps + static char shift[4]={5,6,5,7}; // 32,64 or 128 sized tilemaps int width, height, ymask, htab; int nametab, hscroll=0, vscroll, cells; unsigned char *scrpos; @@ -246,21 +261,25 @@ static void DrawLayerFull(int plane, int *hcache, int planestart, int planeend, scrpos += 32; scrpos+=8*LINE_WIDTH*(planestart-START_ROW); - // Get vertical scroll value: - vscroll=PicoMem.vsram[plane]&0x1ff; - scrpos+=(8-(vscroll&7))*LINE_WIDTH; - if(vscroll&7) planeend++; // we have vertically clipped tiles due to vscroll, so we need 1 more row - - *hcache++ = 8-(vscroll&7); // push y-offset to tilecache - - + if((pvid->reg[11]&4)||(PicoMem.vsram[plane]&7)) + planeend++; // we (may) have vertically clipped tiles due to vscroll, so we need 1 more row for(trow = planestart; trow < planeend; trow++) { // current tile row - int cellc=cells,tilex,dx; + int cellc=cells,tilex,dx,vsidx=0; + + // Get vertical scroll value: + vscroll=PicoMem.vsram[plane];//&0x1ff; +#if VSRAM + if (!(pvid->reg[12]&1) && (pvid->reg[11]&4)) // H32 + 2-cell mode + vscroll=PicoMem.vsram[plane+0x20];//&0x1ff; +#endif +#if INTERLACE + if ((pvid->reg[12]&6) == 6) vscroll >>= 1; +#endif + nametab_row = nametab + (((trow+(vscroll>>3))&ymask)<>3)<>3))&ymask)<>3; dx=((hscroll-1)&7)+1; - if(dx != 8) cellc++; // have hscroll, do more cells + if(dx != 8) cellc++, vsidx--; // have hscroll, do more cells for (; cellc; dx+=8,tilex++,cellc--) { - int code=0,addr=0,zero=0; + int code=0,addr=0,zero=0,scroff; // unsigned short *pal=NULL; unsigned char pal; +#if VSRAM + if ((pvid->reg[11]&4) && !(vsidx&1)) { // 2-cell mode + vscroll=PicoMem.vsram[vsidx+plane];//&0x1ff; +#if INTERLACE + if ((pvid->reg[12]&6) == 6) vscroll >>= 1; +#endif + nametab_row = nametab + (((trow+(vscroll>>3))&ymask)<>15) { // high priority tile *hcache++ = code|(dx<<16)|(trow<<27); // cache it + *hcache++ = 8-(vscroll&7); // push y-offset to tilecache continue; } // Get tile address/2: +#if INTERLACE + if ((pvid->reg[12]&6) == 6) + addr=(code&0x3ff)<<5; + else +#endif addr=(code&0x7ff)<<4; // pal=PicoCramHigh+((code>>9)&0x30); pal=(unsigned char)((code>>9)&0x30); + scroff=(8-(vscroll&7))*LINE_WIDTH; switch((code>>11)&3) { - case 0: zero=TileXnormYnorm(scrpos+dx,addr,pal); break; - case 1: zero=TileXflipYnorm(scrpos+dx,addr,pal); break; - case 2: zero=TileXnormYflip(scrpos+dx,addr,pal); break; - case 3: zero=TileXflipYflip(scrpos+dx,addr,pal); break; + case 0: zero=TileXnormYnorm(scrpos+scroff+dx,addr,pal,pvid); break; + case 1: zero=TileXflipYnorm(scrpos+scroff+dx,addr,pal,pvid); break; + case 2: zero=TileXnormYflip(scrpos+scroff+dx,addr,pal,pvid); break; + case 3: zero=TileXflipYflip(scrpos+scroff+dx,addr,pal,pvid); break; } if(zero) blank=code; // We know this tile is blank now } @@ -312,7 +349,7 @@ static void DrawLayerFull(int plane, int *hcache, int planestart, int planeend, static void DrawTilesFromCacheF(int *hc, struct PicoEState *est) { - int code, addr, zero = 0; + int code, addr, zero = 0, vscroll; unsigned int prevy=0xFFFFFFFF; // unsigned short *pal; unsigned char pal; @@ -321,10 +358,9 @@ static void DrawTilesFromCacheF(int *hc, struct PicoEState *est) if (!(Pico.video.reg[12]&1) && !(PicoIn.opt&POPT_DIS_32C_BORDER)) scrpos += 32; - // *hcache++ = code|(dx<<16)|(trow<<27); // cache it - scrpos+=(*hc++)*LINE_WIDTH - START_ROW*LINE_WIDTH*8; while((code=*hc++)) { + vscroll=(*hc++)*LINE_WIDTH - START_ROW*LINE_WIDTH*8; if((short)code == blank) continue; // y pos @@ -334,15 +370,20 @@ static void DrawTilesFromCacheF(int *hc, struct PicoEState *est) } // Get tile address/2: +#if INTERLACE + if ((Pico.video.reg[12]&6) == 6) + addr=(code&0x3ff)<<5; + else +#endif addr=(code&0x7ff)<<4; // pal=PicoCramHigh+((code>>9)&0x30); pal=(unsigned char)((code>>9)&0x30); switch((code>>11)&3) { - case 0: zero=TileXnormYnorm(pd+((code>>16)&0x1ff),addr,pal); break; - case 1: zero=TileXflipYnorm(pd+((code>>16)&0x1ff),addr,pal); break; - case 2: zero=TileXnormYflip(pd+((code>>16)&0x1ff),addr,pal); break; - case 3: zero=TileXflipYflip(pd+((code>>16)&0x1ff),addr,pal); break; + case 0: zero=TileXnormYnorm(pd+vscroll+((code>>16)&0x1ff),addr,pal,&Pico.video); break; + case 1: zero=TileXflipYnorm(pd+vscroll+((code>>16)&0x1ff),addr,pal,&Pico.video); break; + case 2: zero=TileXnormYflip(pd+vscroll+((code>>16)&0x1ff),addr,pal,&Pico.video); break; + case 3: zero=TileXflipYflip(pd+vscroll+((code>>16)&0x1ff),addr,pal,&Pico.video); break; } if(zero) blank=(short)code; @@ -362,6 +403,11 @@ static void DrawSpriteFull(unsigned int *sprite, struct PicoEState *est) sy=sprite[0]; height=sy>>24; +#if INTERLACE + if ((Pico.video.reg[12]&6) == 6) + sy = ((sy>>1)&0x1ff)-0x78; + else +#endif sy=(sy&0x1ff)-0x78; // Y width=(height>>2)&3; height&=3; width++; height++; // Width and height in tiles @@ -372,8 +418,8 @@ static void DrawSpriteFull(unsigned int *sprite, struct PicoEState *est) tile=code&0x7ff; // Tile number tdeltax=height; // Delta to increase tile by going right tdeltay=1; // Delta to increase tile by going down - if (code&0x0800) { tdeltax=-tdeltax; tile+=height*(width-1); } // Flip X - if (code&0x1000) { tdeltay=-tdeltay; tile+=height-1; } // Flip Y + if (code&0x1000) { tile+=tdeltax-1; tdeltay=-tdeltay; } // Flip Y + if (code&0x0800) { tile+=tdeltax*(width-1); tdeltax=-tdeltax; } // Flip X //delta<<=4; // Delta of address // pal=PicoCramHigh+((code>>9)&0x30); // Get palette pointer @@ -390,6 +436,7 @@ static void DrawSpriteFull(unsigned int *sprite, struct PicoEState *est) for (; height > 0; height--, sy+=8, tile+=tdeltay) { int w = width, x=sx, t=tile; + int s=4; if(sy >= END_ROW*8+8) return; // offscreen @@ -399,11 +446,14 @@ static void DrawSpriteFull(unsigned int *sprite, struct PicoEState *est) if(x>=328) break; // Offscreen t&=0x7fff; // Clip tile address +#if INTERLACE + if ((Pico.video.reg[12]&6) == 6) s=5; +#endif switch((code>>11)&3) { - case 0: TileXnormYnorm(scrpos+x,t<<4,pal); break; - case 1: TileXflipYnorm(scrpos+x,t<<4,pal); break; - case 2: TileXnormYflip(scrpos+x,t<<4,pal); break; - case 3: TileXflipYflip(scrpos+x,t<<4,pal); break; + case 0: TileXnormYnorm(scrpos+x,t<


+
F.A.M.E.
+ Fast and Accurate Morolora 68000 Emulation Library

+ Copyright (c) 2002-2005 Oscar Orallo Peláez / Daniel Lancha García. All rights + reserved.

+ March 14th, 2006
+
+
+

Table of Contents

+
+
+
+

0. Introduction
+ 1. Terms of Use
+ 2. Version History
+
3. What is emulated
+ 4. Using the emulation library
+     4.1. Data structure
+         4.1.1. + CPU context
+         4.1.2. + Memory map definition example
+     4.2. Memory handling
+     4.3. Running the CPU
+ 5. Interrupts and exceptions
+
    5.1. Interrupt + acknowledge
+     5.2. Customizing + processing (HLE)
+     5.3. IRQ lowering
+ 6. Function Reference
+     6.1. General Purpose + Functions
+     6.2. Hardware interrupt + handling functions
+     6.3. CPU context handling + functions
+     6.4. Timing functions
+ 7. Multi-CPU systems
+ 8. Helpful tips
+ 9. Troubleshooting
+ 10. Known bugs
+ 11. Special thanks

+
+
+
+ + + + +
+ 0. Introduction
+

This is the documentation for FAME library, please read it.

+

FAME is an extremely fast and accurate Motorola 68000 Emulation Library. + It is currently available for Intel x86-based systems (80386 or better processor) + and SH-4 based systems.

+

The x86 version was designed to work under any win32 development environment + such as Microsoft Visual Basic, Microsoft Visual C++, Borland Delphi or Borland + C++ Builder.

+

The SH-4 version was specially designed for the Dreamcast videogame console + but it can be used in any SH-4 based system.

+

This manual tries to be a guide to get the emulation library working in your + development environment. I hope you find it useful. If you use FAME in your + project I would like to hear your opinion about it.

+

The package contains one example (C++ program) to show how the library should + be called and used. It was compiled successfully in Microsoft Visual C++ 6.0 + SP5, Borland C++ Builder 5/6 and Borland C++ Compiler 5.5.

+

If you have any questions about how it works in your favorite compiler send + me an email. I'd like to help you with FAME.
+ If you find any bug in FAME, it would be nice that you inform me about that via + email. Any feedback, comments + and suggestions will also be appreciated.

+

How to contact Oscar Orallo:

+
+

E-mail:      oscar@m68k.com
+ Web site:  http://www.m68k.com/fame

+
+

FAME Distribution: http://www.m68k.com/fame/fame.zip + (latest)
+FAME Development Package: http://www.m68k.com/fame/famedev.zip (latest)

+

Here we go folks, have fun :)

+

 

+ + + + +
 1. + Terms of use
+

FAME is a development package that contains the following files:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FileDescription
/x86/win32/fame.dllMicrosoft win32 dinamic link library
/x86/linux/libfame.ax86 static ELF library
/x86/cygwin/libfame.ax86 static win32 library
/sh4/libfame.aSH-4 static ELF library
/delphi/fame.pasDelphi unit
/doc/fame.htmlDocumentation file
/example/main.cC source code example
/example/makefile.cygwinCygwin example makefile
/example/makefile.dcDreamcast example makefile
/example/makefile.linuxLinux example makefile
/example/romdisk/bubble.binMotorola 68000 binary code file
/lib/bc/fame.libBorland C++ 32-bit import library
/lib/vc/fame.expMicrosoft Visual C++ 32-bit export file
/include/fame.hC/C++ header file
+


+ FAME may be distributed freely in unmodified form, as long as this document +file is included.

+

Nothing may be charged for this library. If you want to use it in a shareware + or commercial application, contact me.

+

The author will not be held liable for damages. FAME comes with absolutely + NO WARRANTY. Anyway i will try to help you with any problem you have using FAME.

+

If you do not agree with all of these terms, please remove FAME from your computer.

+

You are encouraged to contact the author if you wish to use FAME in a commercial + product (to negotiate licensing).

+

Any program which uses FAME must include in its documentation or in the program + itself the following credit text:

+

FAME Motorola 68000 Emulation Library by Oscar Orallo (oscar@m68k.com)

+

 

+ + + + +
 2. + Version History
+
+ + + + + + + + + +
Intel® + 80386 CISC engine
Super H® SH-4 RISC + engine

Version 2.0a (14th, march, 2006)

+

- Stupid bug fixed in IRQ management (thanks Martin Kresse).
+ - Accurate DIV timing implemented (thanks Jorge Cwik).
+ - Overflow detection fixed in signed DIV instruction.
+ - Added makefiles for linux and cygwin environments.

+

Version 2.0 (11th, january, 2006)

+

- set_irq_type API function removed. IRQs will be automatically lowered once it was attended.
+ - Many flag calculations fixed.
+ - Some minor tweaks.

+

Version 1.23 (5th April, + 2005)

+

- Timing fixed in MOVEM instructions.

+

Version 1.22 (7th March, 2005)

+

- Fixed a stupid bug in fetch function.

+

Version 1.21 (19th February, 2005)

+

- set_irq_type function changed for flexible use.
+ - Pointer to data structure parameter removed from memory handlers to + increase throughtput.

+

Version 1.2 (17th December, 2004)

+

- Fixed the PC base calculation for fetch memory regions beyond the first + allocated area.
+ - Fixed a tiny error in the status register masking. Several instructions + could generate an invalid value.
+ - Fixed JSR instruction when jumping to a fetch bank different to the + current one.
+ - Tiny error fixed in interrupt acknowledge function parameter.
+ - Fixed the set_context function when setting status register.
+ - Fixed the PC restoring in HLE feature.
+ - Fixed DIVS instruction operation.

+

Version 1.1 (7th October, 2004)

+

- New static ELF library available.
+ - Interrupt acknowledge calling bug fixed.
+ - Custom exception processing (HLE) feature added.
+ - Some little changes in CPU context (register ordering).
+ - Flag N calculation in CHK instruction fixed.
+ - Some little code tweaks.
+ - New sections added to this document.
+ - Some defines added to header file.
+ - Some return values have been changed.
+ - Set/get context functions simplified. Some changes have been applied. +

+

Version 1.0g (2nd August, 2004)

+

- Speed emulation increased once more. The fetch/decode/execute loop + has been inlined.
+ - Some API functions added: add_cycles and release_cycles.
+ - Memory handling section added to this document (thanks Richard Hollstein + for requesting it).
+ - Faster memory access (both program code and data). Memory regions must + be 4 KB aligned now.
+ - Overhead reduced in emulate function calls (entry/exit code + optimized).
+ - Interrupt acknowledge function added.
+ - Some code tweaks here and there.
+ - Static library for Borland C++ compilers added to the package.
+ - DLL file size reduced: internal compression (UPX).
+ - Fixed a bug in STOP instruction: the processor started up after an interrupt + request even if its interrupt level was not higher than current PPL.
+ - Ver little optimization in branch instructions.
+

+

Version 1.0f (23th February, 2003)

+

- API functions added: get_register and set_register + to retrieve and set register values.
+ - Small optimizations for improved speed.
+ - CPU context modified: execinfo added for more complete CPU state handling + support.
+ - LIB file added to package to support implicit linking :).
+ - Changes in documentation.
+ - C header file (fame.h) and Delphi unit (fame.pas) modified.
+

+

Version 1.0e (18th February, 2003)

+

- Emulation core speed increased lightly (faster entry/exit code). Now + the library is pretty fast.
+ - Some changes in function and variable naming (odometer changed to cycles_counter).
+

+

Version 1.0d (20th December, 2002)

+

- Fetch function speed incremented a bit.
+ - Fixed memory boundary for byte data accesses.
+

+

Version 1.0c (27th August, 2002)

+

- Fixed a stupid bug in group 0 exceptions management.
+

+

Version 1.0b (16th August, 2002)

+

- Many errors corrected in documentation about memory mapping.
+ - The function fetch has now capability to access to the data + address space.

+


+ Version 1.0a (24th July, 2002)

+

- First public release.

Version 2.0a (14th, march, 2006)

+

- Timing fixed for DIV and signed MUL instructions.
+ - Improved overflow detection in signed DIV instruction.
+ - Added makefile for Dreamcast system (requires KOS).

+

Version 2.0 (11th, january, 2006)

+

- Tons of bugs fixed (thanks Chui).
+ - Accurate DIV timing implemented (thanks Jorge Cwik).
+ - set_irq_type API function removed. IRQs will be automatically lowered once it was attended.
+ - Great speed improvements.

+

Version 0.04 (5th April, 2005)

+

- Lightweighted entry/exit code.
+ - Fixed sign/zero flag calculation when moving long data from memory to + memory.
+ - Privilege violation exception fixed.
+ - Faster interrupt/exception management.
+ - Timing fixed in MOVEM instructions.

+

Version 0.03 (7th March, 2005)

+

- Sign flag calculation in inmediate logical instruction fixed.
+ - Carry flag calculation in NEG instruction fixed.
+ - Overflow flag in operations with X flag fixed.
+ - Fixed CPU state stop bit.
+ - Speed up by about 20%.
+ - Tiny tweaks here and there and everywhere.

+

Version 0.02 (19th February, 2005)

+

- First beta release.
+ - set_irq_type function changed for flexible use.
+ - Greatly improved internal memory management.
+ - Pointer to data structure parameter removed from memory handlers to + increase throughtput.
+ - DIV/DIVS instructions fixed.
+ - ABCD/SBCD adjusted result fixed.
+ - MOVEM (control addressing mode) instruction fixed.
+ - BTST with memory addressing mode fixed.
+ - Fixed Z flag calculation in NEGX instruction.
+ - Fixed TAS instruction.
+ - Fixed RESET instruction (external handler calling).
+ - Fixed ILLEGAL instruction (exception generation).
+ - Quick ADD to address register fixed.
+ - EXG instruction fixed.
+ - V flag calculation fixed in ASL instruction.
+ - Some tiny tweaks & improvements.

+

Version 0.01b (17th December, 2004)

+

- Memory map cache generation fixed.
+ - Fixed the PC restoring in HLE feature.
+ - Many opcodes fixed.
+ - Lots of bugs fixed.

+

Version 0.01a (7th October, 2004)

+

- First public release. Alpha development state!

+

 

+ + + + +
 3. + What is emulated
+

This library emulates the Motorola 68000 microprocessor. The main emulation + features are the following:

+
    +
  • +
     Written in 100% 32-bit assembly language.
    +
  • +
  • +
     Support for all opcodes.
    +
  • +
  • +
     Calculates 100% of flags correctly, + even undocumented ones.
    +
  • +
  • +
     Excellent accurate timing emulation for all opcodes. All instructions + have perfect timing emulation according to Motorola references. Take a look at Motorola manuals for more information about this + fact.
    +
  • +
  • +
     Complete hardware interrupt support.
    +
  • +
  • +
     Accurate exception support allowing an appropriate emulation of home + computer systems.
    +
  • +
  • +
     Priorities between interrupts and exceptions are fully emulated.
    +
  • +
+

 

+ + + +
 4. + Using the emulation library
+

4.1. Data structure

+

The data structures used in the emulation core is defined in the C file header + fame.h. In this file you will get all the data structures needed to use the + library.
+
+ If you cannot use this file because you are not using a C/C++ compliant compiler + you have to define this structures by yourself in your code.

+

Here I describe these data structures.

+
+

struct M68K_PROGRAM
+ {
+     unsigned low_addr;
+     unsigned high_addr;
+     unsigned offset;
+ }

+
+

This structure defines the memory regions for 68000 program code. The fields + low_addr and high_addr are 32-bit values used for determine + the low and high address of the memory block in the 68000 memory map.

+

The last field is a 32-bit pointer to the data of the memory region. The data + pointed by it must be allocated in native (Motorola) format. If not, the data + will be fetched incorrectly. Make sure of this fact.

+
+

struct M68K_DATA
+ {
+     unsigned low_addr;
+     unsigned high_addr;
+     void *mem_handler;
+     void *data;
+ }

+
+

This one is used for 68000 data code. This structure has an appearance very + similar to the last one but has a diference in the way you can give the control + of the memory to FAME. The pointer called mem_handler is a function + pointer. This pointer is used for memory management, so when you want to take + control in the reading/writing of a memory region, you have to set this pointer + to the appropriate value. If you do not want to use this funcionality you have + to set this pointer to NULL and set data pointing to the data itself. + The different ways to perform memory handling will be described with more detail + in memory handling section.

+

4.1.1. CPU + context

+
+

struct M68K_CONTEXT
+ {
+     struct M68K_PROGRAM *fetch;
+     struct M68K_DATA *read_byte;
+     struct M68K_DATA *read_word;
+     struct M68K_DATA *write_byte;
+     struct M68K_DATA *write_word;
+     struct M68K_PROGRAM *sv_fetch;
+     struct M68K_DATA *sv_read_byte;
+     struct M68K_DATA *sv_read_word;
+     struct M68K_DATA *sv_write_byte;
+     struct M68K_DATA *sv_write_word;
+     struct M68K_PROGRAM *user_fetch;
+     struct M68K_DATA *user_read_byte;
+     struct M68K_DATA *user_read_word;
+     struct M68K_DATA *user_write_byte;
+     struct M68K_DATA *user_write_word;
+     void (*reset_handler)(void);

+     void (*iack_handler)(unsigned level);
+     unsigned *icust_handler;
+     unsigned dreg[8];
+     unsigned areg[8];
+     unsigned asp;
+     unsigned pc;
+     unsigned cycles_counter;
+     unsigned char interrupts[8];
+     unsigned short sr;
+     unsigned short execinfo;
+ }

+
+

This structure defines a CPU context. You have to declare a variable of this + type. It contains all information related with the context of the CPU.

+

You have to set pointer values of sv* + which defines the supervisor memory map. In order to get the CPU into user mode, + set the user* + pointers.

+

The pointer reset_handler is called when the RESET instruction is + executed. In this way, you can reset all external devices in the calling to + this function. If you do not want to use this feature remember to set this pointer + to NULL.

+

The pointer iack_handler is called whenever a hardware interrupt is + handled by the CPU. This feature will be covered later in Interrupts + and exceptions section.

+

The pointer icust_handler is intented to point to an array of function + pointers to handle customized interrupt/exception processing (known as High + Level Emulation or HLE for short). See Interrupts and + exceptions section to set up this feature.

+

The rest of the structure is managed by FAME so you can read it in execution + time to retrieve information about the CPU.

+

Here I describe some interesting fields for the 68000 programmer:

+
+

- dreg[8] + holds the eight data registers in order (d0 - d7).
+ - areg[8] + holds the eight address registers in order (a0 - a7).
+ - pc + is the current PC address.
+ - asp + stands for Alternative Stack Pointer. It is used to store the not + currently used stack pointer. In supervisor mode, asp is the user stack pointer, + in user mode it is the supervisor stack pointer.
+ - cycles_counter + holds the number of cycles executed so far.
+ - interrupts + is an array that contains information about interrupts.
+ - sr + is the status register.

+
+

4.1.2 Memory map definition + example

+

As an example of an address space definition, consider the following simple + memory map:

+
    +
  •  ROM: 000000-01FFFF
  • +
  •  RAM-1: 300000-407FFF
  • +
  •  RAM-2: 500000-50FFFF
  • +
  •  RAM-3: 600000-601FFF
  • +
  •  RAM-4: 800000-80AFFF
  • +
+

This is the structure for the program address space. I will suppose that ROM, + RAM-1 and RAM-2 contains program code.

+
+

struct M68K_PROGRAM prg_fetch[] + = {
+     {0x000000, 0x01FFFF, (unsigned)rom},
+     {0x300000, 0x407FFF, (unsigned)ram1 - 0x300000},
+     {0x500000, 0x500FFF, (unsigned)ram2 - 0x500000},
+     {-1, -1, NULL}
+ }

+
+

Note that the last entry must be {-1, + -1, NULL}.

+

Now, I will set up the data address space. In this case, I will suppose that + all memory areas will be accesed and that RAM-3 is accessed by the routine mem_access. + To do this, you will have to set up the following:

+

- One structure for read byte operations:

+
+

struct M68K_DATA data_rb[] + = {
+     {0x000000, 0x01FFFF, NULL, rom},
+     {0x300000, 0x407FFF, NULL, ram1 - 0x300000},
+     {0x500000, 0x507FFF, NULL, ram2 - 0x500000},
+     {0x600000, 0x601FFF, mem_access, NULL},
+     {0x800000, 0x80AFFF, NULL, ram4 - 0x800000},
+     {-1, -1, NULL, NULL}
+ }

+
+

- One structure for write byte operations:

+
+

struct M68K_DATA data_wb[] + = {
+     {0x000000, 0x01FFFF, NULL, rom},
+     {0x300000, 0x407FFF, NULL, ram1 - 0x300000},
+     {0x500000, 0x507FFF, NULL, ram2 - 0x500000},
+     {0x600000, 0x601FFF, mem_access, NULL},
+     {0x800000, 0x80AFFF, NULL, ram4 - 0x800000},
+     {-1, -1, NULL, NULL}
+ }

+
+

- One structure for read word operations:

+
+

struct M68K_DATA data_rw[] + = {
+     {0x000000, 0x01FFFF, NULL, rom},
+     {0x300000, 0x407FFF, NULL, ram1 - 0x300000},
+     {0x500000, 0x507FFF, NULL, ram2 - 0x500000},
+     {0x600000, 0x601FFF, mem_access, NULL},
+     {0x800000, 0x80AFFF, NULL, ram4 - 0x800000},
+     {-1, -1, NULL, NULL}
+ }

+
+

- One structure for write word operations:

+
+

struct M68K_DATA data_ww[] + = {
+     {0x000000, 0x01FFFF, NULL, rom},
+     {0x300000, 0x407FFF, NULL, ram1 - 0x300000},
+     {0x500000, 0x507FFF, NULL, ram2 - 0x500000},
+     {0x600000, 0x601FFF, mem_access, NULL},
+     {0x800000, 0x80AFFF, NULL, ram4 - 0x800000},
+     {-1, -1, NULL, NULL}
+ }

+
+

In the example, the routine used for access to ram3 area is the same in all + the structures defined but it could be different.

+

And now the last step is to fill the CPU context with the defined address spaces. + This is accomplished in the following way:

+
+

struct M68K_CONTEXT cpu_contxt;

+

cpu_contxt.sv_fetch = prg_fetch;
+ cpu_contxt.user_fetch = prg_fetch;

+

cpu_contxt.sv_read_byte + = data_rb;
+ cpu_contxt.user_read_byte = data_rb;
+ cpu_contxt.sv_read_word = data_rw;
+ cpu_contxt.user_read_word = data_rw;
+ cpu_contxt.sv_write_byte = data_wb;
+ cpu_contxt.user_write_byte = data_wb;
+ cpu_contxt.sv_write_word = data_ww;
+ cpu_contxt.user_write_word = data_ww;

+
+

Note that the memory address spaces for supervisor and user are the same. This + is very common but remember they could be different.

+

And that is all.
+

+

4.2. Memory handling

+

The emulation library provides two ways to perform the access to the memory + map: built-in and custom.

+

The built-in memory handling is ideal to get the maximun speed to the memory + map but at the cost of less control. To use it you have to set data + pointing to the beginning of the native memory region and set mem_handler + to NULL.

+
+

struct M68K_DATA
+ {
+     unsigned low_addr;
+     unsigned high_addr;
+     void *mem_handler;
+     void *data;
+ }

+
+

The custom memory handling gives you total control over memory accesses but + its use could create a bottleneck in the emulated system if it is used inappropriately. + To use this feature you have to set up mem_handler pointer to the handling + function. That function will be called whenever a memory access is done.
+ There is a restriction in the definition of a memory region: it must be 4 + KB aligned. So it must start on 0XXX000h and end on 0YYYFFFh.

+

Memory handling functions have the following structure:

+

int  read_xxxx (int address);
+ void write_xxxx(int address, int data);

+

where xxxx stands for byte, word or long depending on data size, + address is the memory address accessed and data is the data itself.

+

Using memory handling functions might be a good way to customize emulated memory + space. You have to read/write data in the way FAME expects. This process could + become confusing. To avoid undesired problems in this point, i have written + some simple routines to make your life easier:

+
+

int readbyte(int address)
+ {
+     return ram[address^1];
+ }

+
+ int readword(int address)
+ {
+     return ((unsigned short *)ram)[address>>1]; +
+ }
+

+ void writebyte(int address, + int data)
+ {
+     ram[address^1] = data & 0xFF;
+ }

+
+ void writeword(int address, + int data)
+ {
+     ((unsigned short *)ram)[address>>1] = data & + 0xFFFF;
+ }

+
+

I am considering you have your emulated memory region (pointed by ram here) + in native endian format (this is, big endian for the 68000 processor). Note + the required endianess switch in the byte accesses, since we are reading in + a little endian machine (x86 and SH4 processors).

+


+ 4.3. Running the CPU

+

In order to get the 68000 CPU running, you have to do the following steps:

+
+

1. Initialize the emulation library. Call m68k_init() + to perform this task.
+ 2. Set up the memory map (see section 4.1.2).
+ 3. Reset the processor calling the m68k_reset() + function.
+ 4. Execute code calling m68k_emulate(n) + function where the parameter n means the number of clock cycles to execute.

+
+

Note: See Function Reference section + for more information about how API functions work.

+

 

+ + + + +
 5. + Interrupts and exceptions
+

The library currently emulates the group 0 exceptions (address + error and bus error), group 1 exceptions (trace mode, external + interrupts, illegal opcode and privilege violation) and group 2 exceptions.

+

The reset exception is not emulated. This is due to performance + facts. If this exception was emulated, the performance of the library would + fall notably. If you need this exception be emulated, contact me.

+

Hardware interrupts can be raised at any time, but it will be attended only in the entry code of the emulate function. To manage interrupts, please refer to the section Function Reference bellow.

+

If you have any doubt about how these events work, I recommend you to take a look at M68000 Microprocessors User's Manual (english) or at the book Sistemas Digitales (spanish).

+

5.1. Interrupt + acknowledge

+

Sometimes could be useful to be warned when a hardware interrupt is being attended. + This feature is frequently called interrupt acknowledging and allows + you to take specific actions when an interrupt is handled, signaling a device + to lower the interrupt request, for example.
+ This function accepts one parameter, the interrupt level, and returns no value.

+
+

void iackhandler(unsigned + int_level);

+
+

Once you have defined your function, set up the CPU context:

+
+

struct M68K_CONTEXT cpu_contxt;

+

cpu_contxt.iack_handler + = iackhandler;
+ m68k_set_context(&cpu_contxt);

+
+

If you do not need this feature, set this pointer to NULL to avoid undesired + results.

+

5.2. + Customizing processing (HLE)

+

Sometimes it is needed to trap an exception to perform some native tasks overriding + target system tasks (system BIOS calls, for example).

+

To customize interrupt and exception processing use icusthandler table + pointer. This pointer must point to a table of a total of 256 function pointers, + each one handling each vector exception presented in the 68000 system starting + from address $000000. The index of the table is the vector number.

+

The handling function accepts one parameter, the vector exception number, and + returns no value.

+
+

void icusthandler(unsigned + vector);

+
+

The array of pointers could be used in this fashion:

+
+

/* Function to customize + CHK exception */
+ void chk_handler(unsigned vector)
+ {
+     . . .
+     (some actions)
+     . . .
+ }

+

unsigned fpa[256];               /* + Function Pointer Array declaration */
+ struct M68K_CONTEXT
+ cpu_context;
+
+ fpa[6] = chk_handler;            /* + Customizing CHK exception */
+ cpu_context.icust_handler = fpa;  /* Setting up function pointers */

+
+

Take in account the + following when you use this feature:

+
    +
  •  Remember to set to NULL those exceptions you do not want to be customized + in the array of function pointers.
  • +
  • +
     Group 0 exceptions are a special type of exception. Since they are + raised when something has gone seriously wrong with the system, they can not + be customized.
    +
  • +
+

If you do not need this feature, set this pointer to NULL to avoid undesired + results.

+

5.3. IRQ lowering

+

Every IRQ will be automatically lowered once it has been attended. User selectable IRQ lowering type has been removed.

+

 

+ + + + +
 6. + Function Reference
+

This is a brief description of the library functions.

+
    +
  •  For C/C++ programmers: +

    - They are declared in fame.h to include in your C/C++ application. You + can take a look at the sample program included.

    +
  • +
  •  For Delphi programmers: +

    - Copy fame.pas and fame.dll into your project's directory. Add fame.pas + to your project.

    +
  • +
+


+ Remember that this is a brief overview. If you do not find answers to your questions, + contact me.

+


+ 6.1. General Purpose Functions
+

+- void m68k_init (void) +
+

This function initialize the emulation library. Must be called before any other + function library.
+

+- unsigned m68k_reset (void) +
+

Resets the CPU. You must set up the memory map before call this function.

+

Return values:

+
+

M68K_OK (0): Success.
+ M68K_RUNNING (1): The function failures because the CPU is + running. Stop the CPU first.
+ M68K_NO_SUP_ADDR_SPACE (2): The CPU could not be resetted + because there is no supervisor memory map for opcode fetching.
+

+
+- void m68k_emulate (int n) +
+

Starts the emulation and executes n clock cycles. This is the function you + have to call to execute 68000's code. The number of elapsed CPU cycles is the + lowest number equal or greater than n.

+


+- unsigned m68k_get_pc (void)

+
+

Returns the current PC address. The value returned by this function does not + have to be equal to the beginning of an instruction.
+

+- unsigned m68k_get_cpu_state (void) +
+

Returns information about the CPU current state. It could be called at any + time to retrieve interesting and useful information about the CPU state.

+

The data returned has the following format:

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
BitsMeaning
0Internal use. Should be zero.
1Processing a group 0 exception (address error or bus error).
2Double bus fault has happened.
3Trace mode is being processed.
4Processing trace mode exception.
5Processing bus error exception.
6Processing address error exception.
7CPU stopped by the STOP instruction.
8-31Reserved for future use. Should be zero.
+
+

 

+- int m68k_fetch(unsigned address, unsigned memory_space) +
+

Fetches the word pointed by the specified address using the given memory space + from the fetch memory array. The memory space means the following:

+
    +
  •  Supervisor address space (M68K_SUP_ADDR_SPACE)
  • +
  •  User address space (M68K_USER_ADDR_SPACE)
  • +
  •  Data address space (M68K_DATA_ADDR_SPACE)
  • +
  •  Program address space (M68K_PROG_ADDR_SPACE)
  • +
+

Generally, you will want to fetch a word from a memory map joining two of those + primitives types. For example:

+
+

Supervisor Data Address Space (Supervisor & Data)

+
+

To accomplish this, you have to use a bitwise OR operation:

+
+

M68K_SUP_ADDR_SPACE | M68K_DATA_ADDR_SPACE

+
+

Return value:

+
+

FFFFFFFFh: The address specified is out of bounds in the + given
+ memory space.
+ 0000xxxxh: The fetched word.

+
+


+ 6.2 + Hardware interrupt handling functions
+

+- int m68k_raise_irq (int level, int vector) +
+

This function allows you to generate a hardware interrupt. This event is external + to the CPU and generally activated by an external device. The possible values + for the parameter level are between 1 and 7, both inclusive.

+

For vector the values are the following:

+
+

M68K_AUTOVECTORED_IRQ (-1): Autovectored interrupt.
+ M68K_SPURIOUS_IRQ (-2): Spurious interrupt.
+ 0-255: Vector number.

+
+

Return value:

+
+

M68K_OK (0): Success.
+ M68K_INT_LEVEL_ERROR (-1): The function failures because + there is another interrupt activated at the given level.
+ M68K_INT_INV_PARAMS (-2): Invalid parameter values. The vector + value is not valid or the level is equal to zero.

+
+


+int m68k_lower_irq (int level)

+
+

This function is used to deactivate an interrupt.

+

Return value:

+
+

M68K_OK (0): The interrupt has been deactivated successfully.
+ M68K_IRQ_LEVEL_ERROR (-1): The function failures because + the interrupt is not activated.
+ M68K_IRQ_INV_PARAMS (-2): Invalid interrupt level value.

+
+


+- int m68k_get_irq_vector (int level)

+
+

Calling this function you will get the vector of a generated interrupt at the + given interrupt level.

+

Return value:

+
+

> -1: Requested interrupt vector.
+ M68K_IRQ_LEVEL_ERROR (-1): The function failures because + the interrupt is not activated.
+ M68K_IRQ_INV_PARAMS (-2): Invalid interrupt level.
+

+
+- int m68k_change_irq_vector (int level, int vector) +
+

It allows you to change the vector of a generated interrupt. Remember that + the interrupt must be already activated when you call this function.
+ The possible values for vector are between 0 and 255, both inclusive.

+

Return value:

+
+

M68K_OK (0): Success.
+ M68K_IRQ_LEVEL_ERROR (-1): The interrupt at the given vector + was not activated.
+ M68K_IRQ_INV_PARAMS (-2): Invalid interrupt vector value.

+
+


+ 6.3. + CPU context handling functions
+

+

These functions are intented for handling the CPU context.
+

+- int m68k_get_context_size (void) +
+

Returns the size in bytes of the CPU context.
+

+- void m68k_get_context (void *context) +
+

Fills the context pointed by the pointer with the current CPU context. You + must deserve memory space in order to allocate the CPU context.
+

+- void m68k_set_context (void *context) +
+

Allows you to set up the CPU context. The parameter is a pointer to the context + structure.
+

+- int m68k_get_register (m68k_register reg) +
+

Returns the value of the specified register. If the value of the reg parameter + is not valid, the function will return -1.

+

Note that the value returned by the function when the register specified is + not valid (-1) is a valid 32-bit register value. This may be cause for concern.
+

+- int m68k_set_register (m68k_register reg, unsigned value) +
+

Sets the value of the specified register.

+

Return values:

+
+

M68K_OK (0): Success.
+ M68K_INV_REG (-1): The register specified is not valid.

+
+


+ 6.4. Timing functions

+

These functions allows you to control the CPU cycles executed in the emulation. + This way, you can adjust the emulation speed. The cycles_counter + is the variable used in the library to count the CPU cycles. For each calling + to function emulate, the executed CPU cycles are added to cycles_counter.
+

+- unsigned m68k_get_cycles_counter (void) +
+

Returns the current value of the cycles_counter.
+

+- unsigned m68k_trip_cycles_counter (void) +
+

Returns the current value of the cycles_counter variable and resets + it to zero.

+
+- unsigned m68k_control_cycles_counter (int n) +
+

If the parameter n is equal to zero, the function returns the cycles_counter.
+ Otherwise, it returns the cycles_counter resetting it to zero.
+

+- void m68k_release_timeslice (void) +
+

Calling this function you will request the CPU to finish its execution as soon + as possible. The premature exit will be reflected in the cycles_counter.
+

+

- void m68k_add_cycles (int cycles)

+
+

Call this function when you want to increase the clock cycles counting (cycles_counter + variable).
+ This function could be useful when emulating systems equipped with DMA capabilities, + keeping track of how many clock cycles the CPU was frozen by any device doing + a DMA operation.
+

+

- void m68k_release_cycles (int cycles)

+
+

Call this function when you want to decrease the clock cycles counting (cycles_counter + variable).

+

 

+ + + + +
 7. + Multi-CPU systems
+

Emulating multiple 68000 processors is fairly simple. If you want to emulate + more than one 68000 processor, you have to set up a CPU context and a memory + map for each one (see memory map example).

+

For example, you would do this:

+
+

struct M68K_CONTEXT my_contexts[NUMBER_OF_PROCESSORS];

+

for (int i = 0; i < NUMBER_OF_PROCESSORS; + i++)
+ {
+     m68k_set_context(&my_contexts[i]);
+     m68k_emulate(100);
+     m68k_get_context(&my_contexts[i]);
+ }

+
+

Try to compensate the overhead due to the copying of the contexts emulating + the CPUs in large timeslices.

+

FAME is non-reentrant so you cannot multi-thread several processors. If you + need FAME running in this way, contact me.

+

 

+ + + + +
 8. + Helpful tips
+

- It is recommended to use built-in memory handlers as much as possible because + they should be much faster than others coded into high level languages.

+

- Use timeslices as large as possible because this way you will reduce the overhead + produced by the entry and exit code of the library.

+

- Try to avoid context swapping. It will reduce performance notably.

+

- It is a good idea to call the emulate function with a variable + number of cycles instead of a fixed one. Keep track of how many cycles overflowed + from the last call to emulate and subtract them in the next calling:

+
+

#define CPU_TIMESLICE 100

+

cpu_context.cycles_counter = 0;
+ while(!done)
+ {
+     if (cpu_context.cycles_counter < CPU_TIMESLICE)
+     {
+         m68k_emulate(CPU_TIMESLICE + - cpu_context.cycles_counter);
+     }
+     cpu_context.cycles_counter -= CPU_TIMESLICE;
+ }

+
+

- Library routines were designed with accuracy and speed in mind. Use them + as much as possible in order to reach a fast and accurate emulated system.

+

- The object code contains many symbols for program relocation. Strip your executable when you are done.

+

 

+ + + + +
 9. + Troubleshooting
+

This section tries to help you to get the library working correctly. I hope + you find this section useful.

+

- Remember to call init function before any other function library. + It initialize the library setting up the emulator.

+

- You must call reset function before starting the emulation in order + to get the library working appropriately.

+

- Set up your memory map before reset the CPU. The reset function look up + the vector table.

+

- Ensure that the CPU context has been set correctly after the calling to + set_context.

+

- Check if memory maps are well-constructed. Every memory region must be 4 + KB aligned. This is a common pitfall.

+

- Check if your emulated processor is accessing memory correctly specially + when you have to use memory handling functions. Take a look at the Memory + handling section if you are having problems in this point.

+

- Make sure to set reset_handler, iack_handler and icust_handler + to NULL if you are not using these features. It would be a good idea to set + every byte of a new context to zero to avoid any problem.

+

- Remember to set every handler not used in the array of function pointers + (icust_handler) to NULL to avoid undesired results.

+

- Remember to include fame.h in any C module that use FAME. This header + file is subject to change in future versions.

+

- Make sure to instruct your compiler configuration to treat enum types as + 32-bit ints when using m68k_get_register + and m68k_set_register functions.

+

 

+ + + + +
 10. + Known bugs
+

- The bit I/N (specific information about the processor activity) saved on + the supervisor stack when an address or bus error happens is not calculated + and its value is fixed to one (instruction). This tiny detail will be implemented + in future versions if needed.

+

 

+ + + + +
 11. + Special thanks
+

Many thanks go out to those who helped me out with this library or contributed + to the project in any form in no special order.

+

- Chui for his invaluable work to get this thing up into his NeoGeo + emulator (Neo4All) and for helping me to fix loads of errors.
+ - Bart Trzynadlowski (trzynadl@unr.nevada.edu) + for his notes about 68000 undocumented behavior.
+ - Julio César Álvarez Acosta (julio_a_a@yahoo.es) + for his help to build the import library.
+ - Richard Hollstein for let me know that memory handling functions + were not documented in previous releases.
+ - Jorge Cwik for figuring out the algorithm to calculate the exact number of cycles in DIV instructions.
+ - Neill Corlett for his excellent Starscream 680x0 emulation library + which give me a lot of understandings and ideas on CPU emulation.
+ - Stéphane Dallongeville for Gens (probably the best Genesis/Mega + Drive emulator ever programmed) and for giving me his opinion about several + aspects of 68000 emulation.
+ - BlackAura and Ian Micheal for telling me about the high + level emulation (HLE) feature.
+ - Juan Carlos Hernández Martín (jmartin@uax.es) + for his interest in this project.
+ - Antonio García Guerra for his great book Sistemas Digitales.
+ - The creators of the 68000 microprocessor, because without their work nothing + of this might be a reality.
+
+ Thank you too! for your interest in the library. If you have any suggestions, +comments or contributions do not hesitate to get in contact with me.

+

Have a nice day!

+ + From a833e78c989f8b6bbafcfde8110436a63ac81fc5 Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 22 Jan 2022 15:07:38 +0000 Subject: [PATCH 0690/1110] svp drc, bugfixes --- pico/carthw/svp/compiler.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/pico/carthw/svp/compiler.c b/pico/carthw/svp/compiler.c index f83c29ce..14e9ebfe 100644 --- a/pico/carthw/svp/compiler.c +++ b/pico/carthw/svp/compiler.c @@ -476,6 +476,7 @@ static void tr_ptrr_mod(int r, int mod, int need_modulo, int count) if (mod == 2) known_regs.r[r] = (known_regs.r[r] & ~modulo) | ((known_regs.r[r] - count) & modulo); else known_regs.r[r] = (known_regs.r[r] & ~modulo) | ((known_regs.r[r] + count) & modulo); + dirty_regb |= (1 << (r + 8)); } else { @@ -842,6 +843,7 @@ static void tr_PMX_to_r0(int reg) return; } + tr_flush_dirty_pmcrs(); known_regb &= ~KRREG_PMC; dirty_regb &= ~KRREG_PMC; known_regb &= ~(1 << (20+reg)); @@ -849,7 +851,6 @@ static void tr_PMX_to_r0(int reg) // call the C code to handle this tr_flush_dirty_ST(); - //tr_flush_dirty_pmcrs(); tr_mov16(0, reg); emith_call_c_func(ssp_pm_read); hostreg_clear(); @@ -1021,9 +1022,9 @@ static void tr_r0_to_AL(int const_val) hostreg_sspreg_changed(SSP_AL); if (const_val != -1) { known_regs.gr[SSP_A].l = const_val; - known_regb |= 1 << SSP_AL; + known_regb |= KRREG_AL; } else - known_regb &= ~(1 << SSP_AL); + known_regb &= ~KRREG_AL; } static void tr_r0_to_PMX(int reg) @@ -1083,6 +1084,7 @@ static void tr_r0_to_PMX(int reg) return; } + tr_flush_dirty_pmcrs(); known_regb &= ~KRREG_PMC; dirty_regb &= ~KRREG_PMC; known_regb &= ~(1 << (25+reg)); @@ -1090,7 +1092,6 @@ static void tr_r0_to_PMX(int reg) // call the C code to handle this tr_flush_dirty_ST(); - //tr_flush_dirty_pmcrs(); tr_mov16(1, reg); emith_call_c_func(ssp_pm_write); hostreg_clear(); @@ -1128,16 +1129,17 @@ static void tr_r0_to_PMC(int const_val) known_regs.emu_status |= SSP_PMC_HAVE_ADDR; known_regs.pmc.l = const_val; } + dirty_regb |= KRREG_PMC; } else { tr_flush_dirty_ST(); - if (known_regb & KRREG_PMC) { + if (dirty_regb & KRREG_PMC) { emith_move_r_imm(1, known_regs.pmc.v); EOP_STR_IMM(1,7,0x400+SSP_PMC*4); - known_regb &= ~KRREG_PMC; dirty_regb &= ~KRREG_PMC; } + known_regb &= ~KRREG_PMC; EOP_LDR_IMM(1,7,0x484); // ldr r1, [r7, #0x484] // emu_status EOP_ADD_IMM(2,7,24/2,4); // add r2, r7, #0x400 EOP_TST_IMM(1, 0, SSP_PMC_HAVE_ADDR); @@ -1245,7 +1247,7 @@ static int tr_detect_pm0_block(unsigned int op, int *pc, int imm) EOP_ORR_IMM(6, 6, 24/2, 6); // orr r6, r6, 0x600 hostreg_sspreg_changed(SSP_ST); known_regs.gr[SSP_ST].h = 0x60; - known_regb |= 1 << SSP_ST; + known_regb |= KRREG_ST; dirty_regb &= ~KRREG_ST; (*pc) += 3*2; n_in_ops += 3; @@ -1514,8 +1516,8 @@ static int translate_op(unsigned int op, int *pc, int imm, int *end_cond, int *j tr_make_dirty_ST(); EOP_C_DOP_REG_XIMM(A_COND_AL,A_OP_SUB,1,5,5,0,A_AM1_LSL,10); // subs r5, r5, r10 hostreg_sspreg_changed(SSP_A); - known_regb &= ~(KRREG_A|KRREG_AL); dirty_regb |= KRREG_ST; + known_regb &= ~(KRREG_A|KRREG_AL|KRREG_ST); ret++; break; // mpya (rj), (ri), b @@ -1525,8 +1527,8 @@ static int translate_op(unsigned int op, int *pc, int imm, int *end_cond, int *j tr_make_dirty_ST(); EOP_C_DOP_REG_XIMM(A_COND_AL,A_OP_ADD,1,5,5,0,A_AM1_LSL,10); // adds r5, r5, r10 hostreg_sspreg_changed(SSP_A); - known_regb &= ~(KRREG_A|KRREG_AL); dirty_regb |= KRREG_ST; + known_regb &= ~(KRREG_A|KRREG_AL|KRREG_ST); ret++; break; // mld (rj), (ri), b @@ -1534,8 +1536,9 @@ static int translate_op(unsigned int op, int *pc, int imm, int *end_cond, int *j EOP_C_DOP_IMM(A_COND_AL,A_OP_MOV,1,0,5,0,0); // movs r5, #0 hostreg_sspreg_changed(SSP_A); known_regs.gr[SSP_A].v = 0; - known_regb |= (KRREG_A|KRREG_AL); dirty_regb |= KRREG_ST; + known_regb &= ~KRREG_ST; + known_regb |= (KRREG_A|KRREG_AL); tr_mac_load_XY(op); ret++; break; From 1d5885dd84764db52407be67aee2aa0516ff66ed Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 26 Jan 2022 19:34:13 +0000 Subject: [PATCH 0691/1110] core, linux+libretro, multiplayer adaptor support --- pico/memory.c | 59 +++++++++-------------- pico/pico.h | 4 +- platform/common/config_file.c | 19 ++++++-- platform/common/emu.c | 10 +++- platform/common/menu_pico.c | 15 ++++-- platform/common/menu_pico.h | 2 + platform/libpicofe | 2 +- platform/libretro/libretro.c | 36 +++++++++++++- platform/libretro/libretro_core_options.h | 6 ++- 9 files changed, 104 insertions(+), 49 deletions(-) diff --git a/pico/memory.c b/pico/memory.c index f98375f4..89093932 100644 --- a/pico/memory.c +++ b/pico/memory.c @@ -262,43 +262,29 @@ static u32 read_pad_team(int i, u32 out_bits) int phase = Pico.m.padTHPhase[i]; u32 value; - if (phase == 0) { + switch (phase) { + case 0: value = 0x03; - goto out; - } - if (phase == 1) { + break; + case 1: value = 0x0f; - goto out; - } - - pad = ~PicoIn.padInt[0]; // Get inverse of pad MXYZ SACB RLDU - if (phase == 8) { + break; + case 4: case 5: case 6: case 7: // controller IDs, all 3 btn for now + value = 0x00; + break; + case 8: case 10: case 12: case 14: + pad = ~PicoIn.padInt[(phase-8) >> 1]; value = pad & 0x0f; // ?x?x RLDU - goto out; - } - else if(phase == 9) { + break; + case 9: case 11: case 13: case 15: + pad = ~PicoIn.padInt[(phase-8) >> 1]; value = (pad & 0xf0) >> 4; // ?x?x SACB - goto out; + break; + default: + value = 0; + break; } - pad = ~PicoIn.padInt[1]; // Get inverse of pad MXYZ SACB RLDU - if (phase == 12) { - value = pad & 0x0f; // ?x?x RLDU - goto out; - } - else if(phase == 13) { - value = (pad & 0xf0) >> 4; // ?x?x SACB - goto out; - } - - if (phase >= 8 && pad < 16) { - value = 0x0f; - goto out; - } - - value = 0; - -out: value |= (out_bits & 0x40) | ((out_bits & 0x20)>>1); return value; } @@ -308,8 +294,8 @@ static u32 read_pad_4way(int i, u32 out_bits) u32 pad = (PicoMem.ioports[2] & 0x70) >> 4; u32 value = 0; - if (i == 0 && !(pad & 1)) - value = read_pad_3btn(pad >> 1, out_bits); + if (i == 0 && pad <= 3) + value = read_pad_3btn(pad, out_bits); value |= (out_bits & 0x40); return value; @@ -357,7 +343,10 @@ void PicoSetInputDevice(int port, enum input_device device) if (port < 0 || port > 2) return; - switch (device) { + if (port == 1 && port_readers[0] == read_pad_team) + func = read_nothing; + + else switch (device) { case PICO_INPUT_PAD_3BTN: func = read_pad_3btn; break; @@ -409,7 +398,7 @@ NOINLINE void io_ports_write(u32 a, u32 d) Pico.m.padTHPhase[a - 1] = 0; else if ((d^PicoMem.ioports[a]) & 0x60) Pico.m.padTHPhase[a - 1]++; - } else if (port_readers[a - 1] == read_pad_4way) { + } else if (port_readers[0] == read_pad_4way) { if (a == 2 && ((PicoMem.ioports[a] ^ d) & 0x70)) Pico.m.padTHPhase[0] = 0; if (a == 1 && !(PicoMem.ioports[a] & 0x40) && (d & 0x40)) diff --git a/pico/pico.h b/pico/pico.h index 39fd838a..8746b9ba 100644 --- a/pico/pico.h +++ b/pico/pico.h @@ -95,8 +95,8 @@ typedef struct PicoInterface { unsigned int opt; // POPT_* bitfield - unsigned short pad[2]; // Joypads, format is MXYZ SACB RLDU - unsigned short padInt[2]; // internal copy + unsigned short pad[4]; // Joypads, format is MXYZ SACB RLDU + unsigned short padInt[4]; // internal copy unsigned short AHW; // active addon hardware: PAHW_* bitfield unsigned short skipFrame; // skip rendering frame, but still do sound (if enabled) and emulation stuff diff --git a/platform/common/config_file.c b/platform/common/config_file.c index 18331d71..c4f611d0 100644 --- a/platform/common/config_file.c +++ b/platform/common/config_file.c @@ -85,6 +85,19 @@ static void keys_write(FILE *fn, int dev_id, const int *binds) } } + for (i = 0; me_ctrl_actions[i].name != NULL; i++) { + mask = me_ctrl_actions[i].mask; + if (mask & binds[IN_BIND_OFFS(k, IN_BINDTYPE_PLAYER34)]) { + strncpy(act, me_ctrl_actions[i].name, 31); + fprintf(fn, "bind %s = player3 %s" NL, name, mystrip(act)); + } + mask = me_ctrl_actions[i].mask << 16; + if (mask & binds[IN_BIND_OFFS(k, IN_BINDTYPE_PLAYER34)]) { + strncpy(act, me_ctrl_actions[i].name, 31); + fprintf(fn, "bind %s = player4 %s" NL, name, mystrip(act)); + } + } + for (i = 0; emuctrl_actions[i].name != NULL; i++) { mask = emuctrl_actions[i].mask; if (mask & binds[IN_BIND_OFFS(k, IN_BINDTYPE_EMU)]) { @@ -371,12 +384,12 @@ static int parse_bind_val(const char *val, int *type) int player, shift = 0; player = atoi(val + 6) - 1; - if (player > 1) + if (player > 3) return -1; - if (player == 1) + if (player & 1) shift = 16; - *type = IN_BINDTYPE_PLAYER12; + *type = IN_BINDTYPE_PLAYER12 + (player >> 1); for (i = 0; me_ctrl_actions[i].name != NULL; i++) { if (strncasecmp(me_ctrl_actions[i].name, val + 8, strlen(val + 8)) == 0) return me_ctrl_actions[i].mask << shift; diff --git a/platform/common/emu.c b/platform/common/emu.c index 1b3e7082..5d4f8ffb 100644 --- a/platform/common/emu.c +++ b/platform/common/emu.c @@ -1180,21 +1180,29 @@ void emu_update_input(void) { static int prev_events = 0; int actions[IN_BINDTYPE_COUNT] = { 0, }; - int pl_actions[2]; + int pl_actions[4]; int events; in_update(actions); pl_actions[0] = actions[IN_BINDTYPE_PLAYER12]; pl_actions[1] = actions[IN_BINDTYPE_PLAYER12] >> 16; + pl_actions[2] = actions[IN_BINDTYPE_PLAYER34]; + pl_actions[3] = actions[IN_BINDTYPE_PLAYER34] >> 16; PicoIn.pad[0] = pl_actions[0] & 0xfff; PicoIn.pad[1] = pl_actions[1] & 0xfff; + PicoIn.pad[2] = pl_actions[2] & 0xfff; + PicoIn.pad[3] = pl_actions[3] & 0xfff; if (pl_actions[0] & 0x7000) do_turbo(&PicoIn.pad[0], pl_actions[0]); if (pl_actions[1] & 0x7000) do_turbo(&PicoIn.pad[1], pl_actions[1]); + if (pl_actions[2] & 0x7000) + do_turbo(&PicoIn.pad[2], pl_actions[2]); + if (pl_actions[3] & 0x7000) + do_turbo(&PicoIn.pad[3], pl_actions[3]); events = actions[IN_BINDTYPE_EMU] & PEV_MASK; diff --git a/platform/common/menu_pico.c b/platform/common/menu_pico.c index 1151ad48..812ea842 100644 --- a/platform/common/menu_pico.c +++ b/platform/common/menu_pico.c @@ -366,6 +366,12 @@ static int key_config_loop_wrap(int id, int keys) case MA_CTRL_PLAYER2: key_config_loop(me_ctrl_actions, array_size(me_ctrl_actions) - 1, 1); break; + case MA_CTRL_PLAYER3: + key_config_loop(me_ctrl_actions, array_size(me_ctrl_actions) - 1, 2); + break; + case MA_CTRL_PLAYER4: + key_config_loop(me_ctrl_actions, array_size(me_ctrl_actions) - 1, 3); + break; case MA_CTRL_EMU: key_config_loop(emuctrl_actions, array_size(emuctrl_actions) - 1, -1); break; @@ -396,15 +402,18 @@ static const char *mgn_dev_name(int id, int *offs) static int mh_saveloadcfg(int id, int keys); static const char *mgn_saveloadcfg(int id, int *offs); -const char *indev_names[] = { "none", "3 button pad", "6 button pad", NULL }; +const char *indev0_names[] = { "none", "3 button pad", "6 button pad", "Team player", "4 way play", NULL }; +const char *indev1_names[] = { "none", "3 button pad", "6 button pad", NULL }; static menu_entry e_menu_keyconfig[] = { mee_handler_id("Player 1", MA_CTRL_PLAYER1, key_config_loop_wrap), mee_handler_id("Player 2", MA_CTRL_PLAYER2, key_config_loop_wrap), + mee_handler_id("Player 3", MA_CTRL_PLAYER3, key_config_loop_wrap), + mee_handler_id("Player 4", MA_CTRL_PLAYER4, key_config_loop_wrap), mee_handler_id("Emulator controls", MA_CTRL_EMU, key_config_loop_wrap), - mee_enum ("Input device 1", MA_OPT_INPUT_DEV0, currentConfig.input_dev0, indev_names), - mee_enum ("Input device 2", MA_OPT_INPUT_DEV1, currentConfig.input_dev1, indev_names), + mee_enum ("Input device 1", MA_OPT_INPUT_DEV0, currentConfig.input_dev0, indev0_names), + mee_enum ("Input device 2", MA_OPT_INPUT_DEV1, currentConfig.input_dev1, indev1_names), mee_range ("Turbo rate", MA_CTRL_TURBO_RATE, currentConfig.turbo_rate, 1, 30), mee_range ("Analog deadzone", MA_CTRL_DEADZONE, currentConfig.analog_deadzone, 1, 99), mee_cust_nosave("Save global config", MA_OPT_SAVECFG, mh_saveloadcfg, mgn_saveloadcfg), diff --git a/platform/common/menu_pico.h b/platform/common/menu_pico.h index 0c81563b..fb1c67d9 100644 --- a/platform/common/menu_pico.h +++ b/platform/common/menu_pico.h @@ -93,6 +93,8 @@ typedef enum MA_SMSOPT_GHOSTING, MA_CTRL_PLAYER1, MA_CTRL_PLAYER2, + MA_CTRL_PLAYER3, + MA_CTRL_PLAYER4, MA_CTRL_EMU, MA_CTRL_TURBO_RATE, MA_CTRL_DEADZONE, diff --git a/platform/libpicofe b/platform/libpicofe index d57c9992..25cfdf0a 160000 --- a/platform/libpicofe +++ b/platform/libpicofe @@ -1 +1 @@ -Subproject commit d57c9992201e065f8caf6ce68247195ff98e8420 +Subproject commit 25cfdf0a342a64a01710c1b6fbe3b1b04f28975e diff --git a/platform/libretro/libretro.c b/platform/libretro/libretro.c index 8aadedf7..8646865a 100644 --- a/platform/libretro/libretro.c +++ b/platform/libretro/libretro.c @@ -1235,6 +1235,34 @@ bool retro_load_game(const struct retro_game_info *info) { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_SELECT,"Mode" }, { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_START, "Start" }, + + { 2, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_LEFT, "D-Pad Left" }, + { 2, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_UP, "D-Pad Up" }, + { 2, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_DOWN, "D-Pad Down" }, + { 2, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_RIGHT, "D-Pad Right" }, + { 2, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_B, "B" }, + { 2, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_A, "C" }, + { 2, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_X, "Y" }, + { 2, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_Y, "A" }, + { 2, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L, "X" }, + { 2, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R, "Z" }, + { 2, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_SELECT,"Mode" }, + { 2, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_START, "Start" }, + + + { 3, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_LEFT, "D-Pad Left" }, + { 3, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_UP, "D-Pad Up" }, + { 3, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_DOWN, "D-Pad Down" }, + { 3, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_RIGHT, "D-Pad Right" }, + { 3, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_B, "B" }, + { 3, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_A, "C" }, + { 3, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_X, "Y" }, + { 3, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_Y, "A" }, + { 3, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L, "X" }, + { 3, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R, "Z" }, + { 3, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_SELECT,"Mode" }, + { 3, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_START, "Start" }, + { 0 }, }; @@ -1442,6 +1470,10 @@ static enum input_device input_name_to_val(const char *name) return PICO_INPUT_PAD_3BTN; if (strcmp(name, "6 button pad") == 0) return PICO_INPUT_PAD_6BTN; + if (strcmp(name, "team player") == 0) + return PICO_INPUT_PAD_TEAM; + if (strcmp(name, "4way play") == 0) + return PICO_INPUT_PAD_4WAY; if (strcmp(name, "None") == 0) return PICO_INPUT_NOTHING; @@ -1719,8 +1751,8 @@ void retro_run(void) input_poll_cb(); - PicoIn.pad[0] = PicoIn.pad[1] = 0; - for (pad = 0; pad < 2; pad++) { + PicoIn.pad[0] = PicoIn.pad[1] = PicoIn.pad[2] = PicoIn.pad[3] = 0; + for (pad = 0; pad < 4; pad++) { if (libretro_supports_bitmasks) { input = input_state_cb(pad, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_MASK); for (i = 0; i < RETRO_PICO_MAP_LEN; i++) diff --git a/platform/libretro/libretro_core_options.h b/platform/libretro/libretro_core_options.h index 07e8b723..88b48d42 100644 --- a/platform/libretro/libretro_core_options.h +++ b/platform/libretro/libretro_core_options.h @@ -294,12 +294,14 @@ struct retro_core_option_v2_definition option_defs_us[] = { "picodrive_input1", "Input Device 1", NULL, - "Choose which type of controller is plugged into slot 1.", + "Choose which type of controller is plugged into slot 1. Note that a multiplayer adaptor uses both slots.", NULL, "input", { { "3 button pad", "3 Button Pad" }, { "6 button pad", "6 Button Pad" }, + { "team player", "Sega 4 Player Adaptor" }, + { "4way play", "EA 4way Play Adaptor" }, { "None", NULL }, { NULL, NULL }, }, @@ -309,7 +311,7 @@ struct retro_core_option_v2_definition option_defs_us[] = { "picodrive_input2", "Input Device 2", NULL, - "Choose which type of controller is plugged into slot 2.", + "Choose which type of controller is plugged into slot 2. This setting is ignored when a multiplayer adaptor is plugged into slot 1.", NULL, "input", { From 5daf702140df7718dca77e4ca43e953e4963f068 Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 26 Jan 2022 19:45:19 +0000 Subject: [PATCH 0692/1110] svp drc, another bugfix --- pico/carthw/svp/compiler.c | 1 + 1 file changed, 1 insertion(+) diff --git a/pico/carthw/svp/compiler.c b/pico/carthw/svp/compiler.c index 14e9ebfe..c48c665c 100644 --- a/pico/carthw/svp/compiler.c +++ b/pico/carthw/svp/compiler.c @@ -990,6 +990,7 @@ static void tr_r0_to_ST(int const_val) EOP_ORR_REG_LSL(6, 6, 1, 4); // orr r6, r6, r1, lsl #4 TR_WRITE_R0_TO_REG(SSP_ST); hostreg_r[1] = -1; + known_regb &= ~KRREG_ST; dirty_regb &= ~KRREG_ST; } From ed7c6238b7528f1d919798e452570108c186ec48 Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 26 Jan 2022 19:46:51 +0000 Subject: [PATCH 0693/1110] sh2 drc, optimisation for mips,riscv --- cpu/drc/emit_mips.c | 107 ++++++++++++++++++++++++++++++++++++------- cpu/drc/emit_riscv.c | 9 +++- 2 files changed, 98 insertions(+), 18 deletions(-) diff --git a/cpu/drc/emit_mips.c b/cpu/drc/emit_mips.c index 35d928e3..a65f1204 100644 --- a/cpu/drc/emit_mips.c +++ b/cpu/drc/emit_mips.c @@ -235,8 +235,8 @@ enum { RB_SRL=0, RB_ROTR=1 }; #define MIPS_BGT (OP_BGTZ << 5) // rs > 0 #define MIPS_BLT ((OP__RT << 5)|RT_BLTZ) // rs < 0 #define MIPS_BGE ((OP__RT << 5)|RT_BGEZ) // rs >= 0 -#define MIPS_BGTL ((OP__RT << 5)|RT_BLTZAL) // rs > 0, link $ra if jumping -#define MIPS_BGEL ((OP__RT << 5)|RT_BGEZAL) // rs >= 0, link $ra if jumping +#define MIPS_BLTL ((OP__RT << 5)|RT_BLTZAL) // rs > 0, always link $ra +#define MIPS_BGEL ((OP__RT << 5)|RT_BGEZAL) // rs >= 0, always link $ra #define MIPS_BCOND(cond, rs, rt, offs16) \ MIPS_OP_IMM((cond >> 5), rt, rs, (offs16) >> 2) @@ -809,20 +809,71 @@ static void emith_set_compare_flags(int rs, int rt, s32 imm) // move immediate +#define MAX_HOST_LITERALS 32 // pool must be smaller than 32 KB +static uintptr_t literal_pool[MAX_HOST_LITERALS]; +static u32 *literal_insn[MAX_HOST_LITERALS]; +static int literal_pindex, literal_iindex; + +static inline int emith_pool_literal(uintptr_t imm) +{ + int idx = literal_pindex - 8; // max look behind in pool + // see if one of the last literals was the same + for (idx = (idx < 0 ? 0 : idx); idx < literal_pindex; idx++) + if (imm == literal_pool[idx]) + break; + if (idx == literal_pindex) // store new literal + literal_pool[literal_pindex++] = imm; + return idx; +} + +static void emith_pool_commit(int jumpover) +{ + int i, sz = literal_pindex * sizeof(uintptr_t); + u8 *pool = (u8 *)tcache_ptr; + + // nothing to commit if pool is empty + if (sz == 0) + return; + // align pool to pointer size + if (jumpover) + pool += sizeof(u32); + i = (uintptr_t)pool & (sizeof(void *)-1); + pool += (i ? sizeof(void *)-i : 0); + // need branch over pool if not at block end + if (jumpover) + emith_branch(MIPS_B(sz + (pool-(u8 *)tcache_ptr))); + emith_flush(); + // safety check - pool must be after insns and reachable + if ((u32)(pool - (u8 *)literal_insn[0] + 8) > 0x7fff) { + elprintf(EL_STATUS|EL_SVP|EL_ANOMALY, + "pool offset out of range"); + exit(1); + } + // copy pool and adjust addresses in insns accessing the pool + memcpy(pool, literal_pool, sz); + for (i = 0; i < literal_iindex; i++) { + u32 *pi = literal_insn[i]; + *pi = (*pi & 0xffff0000) | (u16)(*pi + ((u8 *)pool - (u8 *)pi)); + } + // count pool constants as insns for statistics + for (i = 0; i < literal_pindex * sizeof(uintptr_t)/sizeof(u32); i++) + COUNT_OP; + + tcache_ptr = (void *)((u8 *)pool + sz); + literal_pindex = literal_iindex = 0; +} + +static void emith_pool_check(void) +{ + // check if pool must be committed + if (literal_iindex > MAX_HOST_LITERALS-4 || (literal_pindex && + (u8 *)tcache_ptr - (u8 *)literal_insn[0] > 0x7000)) + // pool full, or displacement is approaching the limit + emith_pool_commit(1); +} + static void emith_move_imm(int r, uintptr_t imm) { -#if _MIPS_SZPTR == 64 - if ((s32)imm != imm) { - emith_move_imm(r, imm >> 32); - if (imm & 0xffff0000) { - EMIT(MIPS_DLSL_IMM(r, r, 16)); - EMIT(MIPS_OR_IMM(r, r, (imm >> 16) & 0xffff)); - EMIT(MIPS_DLSL_IMM(r, r, 16)); - } else EMIT(MIPS_DLSL32_IMM(r, r, 0)); - if (imm & 0x0000ffff) - EMIT(MIPS_OR_IMM(r, r, imm & 0xffff)); - } else -#endif if ((s16)imm == imm) { EMIT(MIPS_ADD_IMM(r, Z0, imm)); } else if (!((u32)imm >> 16)) { @@ -837,9 +888,33 @@ static void emith_move_imm(int r, uintptr_t imm) EMIT(MIPS_OR_IMM(r, s, (u16)imm)); } } +static void emith_move_ptr_imm(int r, uintptr_t imm) +{ +#if _MIPS_SZPTR == 64 + uintptr_t offs = (u8 *)imm - (u8 *)tcache_ptr - 8; + if ((s32)imm != imm && (s32)offs == offs) { + // PC relative + emith_flush(); // next insn must not change its position at all + EMIT_PTR(tcache_ptr, MIPS_BCONDZ(MIPS_BLTL, Z0, 0)); // loads PC+8 into LR + emith_move_imm(r, offs); + emith_add_r_r_r_ptr(r, LR, r); + } else if ((s32)imm != imm) { + // via literal pool + int idx; + if (literal_iindex >= MAX_HOST_LITERALS) + emith_pool_commit(1); + idx = emith_pool_literal(imm); + emith_flush(); // next 2 must not change their position at all + EMIT_PTR(tcache_ptr, MIPS_BCONDZ(MIPS_BLTL, Z0, 0)); // loads PC+8 into LR + literal_insn[literal_iindex++] = (u32 *)tcache_ptr; + EMIT_PTR(tcache_ptr, MIPS_OP_IMM(OP_LP, r, LR, idx*sizeof(uintptr_t) - 4)); + } else +#endif + emith_move_imm(r, imm); +} #define emith_move_r_ptr_imm(r, imm) \ - emith_move_imm(r, (uintptr_t)(imm)) + emith_move_ptr_imm(r, (uintptr_t)(imm)) #define emith_move_r_imm(r, imm) \ emith_move_imm(r, (s32)(imm)) @@ -1580,8 +1655,6 @@ static int emith_cond_check(int cond, int *r) // emitter ABI stuff -#define emith_pool_check() /**/ -#define emith_pool_commit(j) /**/ #define emith_update_cache() /**/ #define emith_rw_offs_max() 0x7fff #define emith_uext_ptr(r) /**/ diff --git a/cpu/drc/emit_riscv.c b/cpu/drc/emit_riscv.c index 04121772..35ee0168 100644 --- a/cpu/drc/emit_riscv.c +++ b/cpu/drc/emit_riscv.c @@ -713,7 +713,14 @@ static void emith_move_imm(int r, uintptr_t imm) static void emith_move_ptr_imm(int r, uintptr_t imm) { #if __riscv_xlen == 64 - if ((s32)imm != imm) { + uintptr_t offs = (u8 *)imm - (u8 *)tcache_ptr; + if ((s32)imm != imm && (s32)offs == offs) { + // PC relative + EMIT(R5_MOVA_IMM(r, offs + _CB(offs,1,11,12))); + if (offs & 0xfff) + EMIT(R5_ADD_IMM(r, r, offs)); + } else if ((s32)imm != imm) { + // via literal pool int idx; if (literal_iindex >= MAX_HOST_LITERALS) emith_pool_commit(1); From 73bda1add93add955c4420da03a1c26ba3d96acc Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 27 Jan 2022 22:31:09 +0000 Subject: [PATCH 0694/1110] core, libretro vfs support --- Makefile.libretro | 4 ++-- pico/cart.c | 4 ---- pico/cd/cd_image.c | 4 ---- pico/cd/cd_parse.c | 4 ---- pico/patch.c | 4 ---- pico/pico_port.h | 4 ++++ pico/sek.c | 4 ---- pico/state.c | 4 ---- platform/common/config_file.c | 4 ---- platform/common/emu.c | 4 ---- platform/common/main.c | 4 ---- platform/libretro/libretro.c | 4 ---- unzip/unzip.h | 4 ++++ zlib/gzio.c | 4 ++++ 14 files changed, 14 insertions(+), 42 deletions(-) diff --git a/Makefile.libretro b/Makefile.libretro index aa20d456..3c7c67a2 100644 --- a/Makefile.libretro +++ b/Makefile.libretro @@ -34,7 +34,8 @@ CFLAGS += -I platform/libretro/libretro-common/include/streams CFLAGS += -I platform/libretro/libretro-common/include/string CFLAGS += -I platform/libretro/libretro-common/include/vfs -STATIC_LINKING:= 0 +USE_LIBRETRO_VFS := 1 +STATIC_LINKING := 0 TARGET_NAME := picodrive LIBM := -lm GIT_VERSION ?= $(shell git rev-parse --short HEAD || echo unknown) @@ -457,7 +458,6 @@ else ifneq (,$(findstring windows_msvc2017,$(platform))) ARCH = x86_64 SHARED := LIBM := - USE_LIBRETRO_VFS = 1 NO_ALIGN_FUNCTIONS = 1 CFLAGS += -DHAVE_VSNPRINTF diff --git a/pico/cart.c b/pico/cart.c index 1b15a454..39d697c8 100644 --- a/pico/cart.c +++ b/pico/cart.c @@ -10,10 +10,6 @@ #include "pico_int.h" #include -#ifdef USE_LIBRETRO_VFS -#include "file_stream_transforms.h" -#endif - #if defined(USE_LIBCHDR) #include "libchdr/chd.h" #include "libchdr/cdrom.h" diff --git a/pico/cd/cd_image.c b/pico/cd/cd_image.c index 530071a6..3f220f88 100644 --- a/pico/cd/cd_image.c +++ b/pico/cd/cd_image.c @@ -11,10 +11,6 @@ #include "cdd.h" #include "cd_parse.h" -#ifdef USE_LIBRETRO_VFS -#include "file_stream_transforms.h" -#endif - #if defined(__GNUC__) && __GNUC__ >= 7 #pragma GCC diagnostic ignored "-Wformat-truncation" #endif diff --git a/pico/cd/cd_parse.c b/pico/cd/cd_parse.c index 974e9930..574070dd 100644 --- a/pico/cd/cd_parse.c +++ b/pico/cd/cd_parse.c @@ -13,10 +13,6 @@ #include "cd_parse.h" // #define elprintf(w,f,...) printf(f "\n",##__VA_ARGS__); -#ifdef USE_LIBRETRO_VFS -#include "file_stream_transforms.h" -#endif - #if defined(USE_LIBCHDR) #include "libchdr/chd.h" #include "libchdr/cdrom.h" diff --git a/pico/patch.c b/pico/patch.c index b4c2d898..cd4620d7 100644 --- a/pico/patch.c +++ b/pico/patch.c @@ -25,10 +25,6 @@ #include "memory.h" #include "patch.h" -#ifdef USE_LIBRETRO_VFS -#include "file_stream_transforms.h" -#endif - struct patch { unsigned int addr; diff --git a/pico/pico_port.h b/pico/pico_port.h index dc9cdffa..41a4ce2f 100644 --- a/pico/pico_port.h +++ b/pico/pico_port.h @@ -8,6 +8,10 @@ #endif #include "pico_types.h" +#ifdef USE_LIBRETRO_VFS +#include "file_stream_transforms.h" +#endif + #if defined(__GNUC__) && defined(__i386__) #define REGPARM(x) __attribute__((regparm(x))) #else diff --git a/pico/sek.c b/pico/sek.c index b5649dcf..99808373 100644 --- a/pico/sek.c +++ b/pico/sek.c @@ -10,10 +10,6 @@ #include "pico_int.h" #include "memory.h" -#ifdef USE_LIBRETRO_VFS -#include "file_stream_transforms.h" -#endif - /* context */ // Cyclone 68000 #ifdef EMU_C68K diff --git a/pico/state.c b/pico/state.c index af2d4cc0..123b1acd 100644 --- a/pico/state.c +++ b/pico/state.c @@ -14,10 +14,6 @@ #include "sound/emu2413/emu2413.h" #include "state.h" -#ifdef USE_LIBRETRO_VFS -#include "file_stream_transforms.h" -#endif - // sn76496 & ym2413 extern int *sn76496_regs; extern OPLL old_opll; diff --git a/platform/common/config_file.c b/platform/common/config_file.c index c4f611d0..6f72f75c 100644 --- a/platform/common/config_file.c +++ b/platform/common/config_file.c @@ -18,10 +18,6 @@ #include "../libpicofe/lprintf.h" #include "config_file.h" -#ifdef USE_LIBRETRO_VFS -#include "file_stream_transforms.h" -#endif - static char *mystrip(char *str); #ifndef _MSC_VER diff --git a/platform/common/emu.c b/platform/common/emu.c index 5d4f8ffb..2352bf53 100644 --- a/platform/common/emu.c +++ b/platform/common/emu.c @@ -28,10 +28,6 @@ #include #include -#ifdef USE_LIBRETRO_VFS -#include "file_stream_transforms.h" -#endif - #if defined(__GNUC__) && __GNUC__ >= 7 #pragma GCC diagnostic ignored "-Wformat-truncation" #endif diff --git a/platform/common/main.c b/platform/common/main.c index a7a8312c..3c1998cd 100644 --- a/platform/common/main.c +++ b/platform/common/main.c @@ -21,10 +21,6 @@ #include "version.h" #include -#ifdef USE_LIBRETRO_VFS -#include "file_stream_transforms.h" -#endif - static int load_state_slot = -1; char **g_argv; diff --git a/platform/libretro/libretro.c b/platform/libretro/libretro.c index 8646865a..9a4e9c03 100644 --- a/platform/libretro/libretro.c +++ b/platform/libretro/libretro.c @@ -32,10 +32,6 @@ #include #endif -#ifdef USE_LIBRETRO_VFS -#include "file_stream_transforms.h" -#endif - #if defined(RENDER_GSKIT_PS2) #include #include "libretro-common/include/libretro_gskit_ps2.h" diff --git a/unzip/unzip.h b/unzip/unzip.h index 8d15d05e..f92b8a31 100644 --- a/unzip/unzip.h +++ b/unzip/unzip.h @@ -3,6 +3,10 @@ #include +#ifdef USE_LIBRETRO_VFS +#include "file_stream_transforms.h" +#endif + #ifdef __cplusplus extern "C" { #endif diff --git a/zlib/gzio.c b/zlib/gzio.c index 02a0a014..9f1e0434 100644 --- a/zlib/gzio.c +++ b/zlib/gzio.c @@ -9,6 +9,10 @@ #include +#ifdef USE_LIBRETRO_VFS +#include "file_stream_transforms.h" +#endif + #include "zutil.h" #ifdef NO_DEFLATE /* for compatibility with old definition */ From 3244eb63ff65416c45651fdb792a5ebec12969d4 Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 28 Jan 2022 17:58:50 +0000 Subject: [PATCH 0695/1110] core, fix memory leak --- pico/sek.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/pico/sek.c b/pico/sek.c index 99808373..77cf73e9 100644 --- a/pico/sek.c +++ b/pico/sek.c @@ -188,8 +188,6 @@ PICO_INTERNAL void SekSetRealTAS(int use_real) // XXX: rename PICO_INTERNAL void SekPackCpu(unsigned char *cpu, int is_sub) { - u32 pc=0; - #if defined(EMU_C68K) struct Cyclone *context = is_sub ? &PicoCpuCS68k : &PicoCpuCM68k; memcpy(cpu,context->d,0x40); @@ -301,14 +299,6 @@ void SekRegisterIdleHit(unsigned int pc) void SekInitIdleDet(void) { - unsigned short **tmp; - tmp = realloc(idledet_ptrs, 0x200 * sizeof(tmp[0])); - if (tmp == NULL) { - free(idledet_ptrs); - idledet_ptrs = NULL; - } - else - idledet_ptrs = tmp; idledet_count = idledet_bads = 0; idledet_start_frame = Pico.m.frame_count + 360; #ifdef IDLE_STATS @@ -331,7 +321,7 @@ int SekIsIdleReady(void) int SekIsIdleCode(unsigned short *dst, int bytes) { // printf("SekIsIdleCode %04x %i\n", *dst, bytes); - switch (bytes) + if (idledet_count >= 0) switch (bytes) { case 2: if ((*dst & 0xf000) != 0x6000) // not another branch @@ -412,7 +402,7 @@ int SekRegisterIdlePatch(unsigned int pc, int oldop, int newop, void *ctx) return 1; // don't patch } - if (idledet_count >= 0x200 && (idledet_count & 0x1ff) == 0) { + if (!idledet_ptrs || (idledet_count & 0x1ff) == 0) { unsigned short **tmp; tmp = realloc(idledet_ptrs, (idledet_count+0x200) * sizeof(tmp[0])); if (tmp == NULL) @@ -447,7 +437,11 @@ void SekFinishIdleDet(void) else elprintf(EL_STATUS|EL_IDLE, "idle: don't know how to restore %04x", *op); } + idledet_count = -1; + if (idledet_ptrs) + free(idledet_ptrs); + idledet_ptrs = NULL; } From 6fa5a7498b50807429a23074d6ffa9581f0409ac Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 28 Jan 2022 18:00:08 +0000 Subject: [PATCH 0696/1110] build, fix arm64 generic build (osx) --- configure | 2 ++ 1 file changed, 2 insertions(+) diff --git a/configure b/configure index 8fee576e..4019c2ed 100755 --- a/configure +++ b/configure @@ -191,6 +191,8 @@ fi # CPU/ABI stuff first, else compile test may fail case "$ARCH" in +arm64*) + ;; arm*) # ARM stuff ARCH="arm" From c88b729bdf3c218590b5b2d842b5fe2a221b925f Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 28 Jan 2022 18:02:26 +0000 Subject: [PATCH 0697/1110] libretro, improve multiplayer support --- platform/libretro/libretro.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/platform/libretro/libretro.c b/platform/libretro/libretro.c index 9a4e9c03..f03d4d68 100644 --- a/platform/libretro/libretro.c +++ b/platform/libretro/libretro.c @@ -1455,6 +1455,8 @@ static const unsigned short retro_pico_map[] = { }; #define RETRO_PICO_MAP_LEN (sizeof(retro_pico_map) / sizeof(retro_pico_map[0])) +static int has_4_pads; + static void snd_write(int len) { audio_batch_cb(PicoIn.sndOut, len / 4); @@ -1491,8 +1493,11 @@ static void update_variables(bool first_run) var.value = NULL; var.key = "picodrive_input1"; - if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) - PicoSetInputDevice(0, input_name_to_val(var.value)); + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { + int input = input_name_to_val(var.value); + PicoSetInputDevice(0, input); + has_4_pads = input == PICO_INPUT_PAD_TEAM || input == PICO_INPUT_PAD_4WAY; + } var.value = NULL; var.key = "picodrive_input2"; @@ -1736,7 +1741,7 @@ static void update_variables(bool first_run) void retro_run(void) { bool updated = false; - int pad, i; + int pad, i, padcount; static void *buff; int16_t input; @@ -1748,7 +1753,8 @@ void retro_run(void) input_poll_cb(); PicoIn.pad[0] = PicoIn.pad[1] = PicoIn.pad[2] = PicoIn.pad[3] = 0; - for (pad = 0; pad < 4; pad++) { + padcount = has_4_pads && !(PicoIn.AHW & (PAHW_SMS|PAHW_PICO)) ? 4 : 2; + for (pad = 0; pad < padcount; pad++) { if (libretro_supports_bitmasks) { input = input_state_cb(pad, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_MASK); for (i = 0; i < RETRO_PICO_MAP_LEN; i++) From 5fce11a3024bd3e294c0a7223dd82493f6dca35d Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 5 Feb 2022 21:16:16 +0000 Subject: [PATCH 0698/1110] build, fix incomplete clean in cyclone,musashi --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index 56dba332..3821ad85 100644 --- a/Makefile +++ b/Makefile @@ -305,6 +305,8 @@ target_: $(TARGET) clean: $(RM) $(TARGET) $(OBJS) pico/pico_int_offs.h + $(MAKE) -C cpu/cyclone clean + $(MAKE) -C cpu/musashi clean $(RM) -r .od_data $(TARGET): $(OBJS) From 3e1b6a774675623ff3a6e158ee0ccda673cdd767 Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 5 Feb 2022 21:25:17 +0000 Subject: [PATCH 0699/1110] build, fix gph build with none-eabi toolchain --- tools/release.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/release.sh b/tools/release.sh index 52834dec..c17a0eb7 100755 --- a/tools/release.sh +++ b/tools/release.sh @@ -35,7 +35,9 @@ export LD_LIBRARY_PATH # NB: -msoft-float uses the fpu setting for determining the parameter passing; # default upto gcc 4.7 was -mfpu=fpa, which has been removed in gcc 4.8, so # nothing newer than gcc 4.7 can be used here :-/ -TC=$HOME/opt/open2x/gcc-4.1.1-glibc-2.3.6 PATH=$HOME/opt/gcc-arm-none-eabi-4_7-2014q2/bin:$PATH CROSS_COMPILE=arm-none-eabi- CFLAGS="-I$TC/arm-open2x-linux/include -I$HOME/src/gp2x/armroot/include -U_FORTIFY_SOURCE -D__linux__" LDFLAGS="-B$TC/lib/gcc/arm-open2x-linux/4.1.1 -B$TC/arm-open2x-linux/lib -L$TC/arm-open2x-linux/lib -L$HOME/src/gp2x/armroot/lib" ./configure --platform=gp2x +# NB: the arm-none-eabi toolchain is available for gcc 4.7, but it creates bad +# ELF files for linux. The -Wl,-Ttext-segment=... below seems to fix this +TC=$HOME/opt/open2x/gcc-4.1.1-glibc-2.3.6 PATH=$HOME/opt/gcc-arm-none-eabi-4_7-2014q2/bin:$PATH CROSS_COMPILE=arm-none-eabi- CFLAGS="-I$TC/arm-open2x-linux/include -I$HOME/src/gp2x/armroot/include -U_FORTIFY_SOURCE -D__linux__" LDFLAGS="-B$TC/lib/gcc/arm-open2x-linux/4.1.1 -B$TC/arm-open2x-linux/lib -L$TC/arm-open2x-linux/lib -L$HOME/src/gp2x/armroot/lib -Wl,-Ttext-segment=0x10100" ./configure --platform=gp2x PATH=$HOME/opt/gcc-arm-none-eabi-4_7-2014q2/bin:$PATH make clean all PATH=$HOME/opt/gcc-arm-none-eabi-4_7-2014q2/bin:$PATH make -C platform/gp2x rel VER=$rel mv PicoDrive_$rel.zip release-$rel/PicoDrive-gph_$rel.zip From 8eada9d64c26e4755a1d3ac9b8bc048db52e69b5 Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 5 Feb 2022 21:37:03 +0000 Subject: [PATCH 0700/1110] core vdp, optimisation --- pico/videoport.c | 119 +++++++++++++++++++++-------------------------- 1 file changed, 52 insertions(+), 67 deletions(-) diff --git a/pico/videoport.c b/pico/videoport.c index 1ecb3117..7f6ca9a7 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -148,7 +148,6 @@ int (*PicoDmaHook)(u32 source, int len, unsigned short **base, u32 *mask) = NULL /* VDP FIFO implementation * * fifo_slot: last slot executed in this scanline - * fifo_cnt: #slots remaining for active FIFO write (#writes<<#bytep) * fifo_total: #total FIFO entries pending * fifo_data: last values transferred through fifo * fifo_queue: fifo transfer queue (#writes, flags) @@ -186,7 +185,6 @@ static struct VdpFIFO { // XXX this must go into save file! u32 fifo_queue[8], fifo_qx, fifo_ql; int fifo_total; // total# of pending FIFO entries (w/o BGDMA) - int fifo_cnt; // remaining entries in currently active transfer unsigned short fifo_slot; // last executed slot in current scanline unsigned short fifo_maxslot;// #slots in scanline @@ -203,38 +201,31 @@ enum { FQ_BYTE = 1, FQ_BGDMA = 2, FQ_FGDMA = 4 }; // queue flags, NB: BYTE = 1! #define Sl2Cyc(vf,sl) (vf->fifo_sl2cyc[sl]*clkdiv) // do the FIFO math -static __inline int AdvanceFIFOEntry(struct VdpFIFO *vf, struct PicoVideo *pv, int slots) +static NOINLINE int AdvanceFIFOEntry(struct VdpFIFO *vf, struct PicoVideo *pv, int slots) { - int l = slots, b = vf->fifo_queue[vf->fifo_qx] & FQ_BYTE; - int cnt = vf->fifo_cnt; + u32 *qx = &vf->fifo_queue[vf->fifo_qx]; + int l = slots, b = *qx & FQ_BYTE; + int cnt = *qx >> 3; // advance currently active FIFO entry if (l > cnt) l = cnt; - if (!(vf->fifo_queue[vf->fifo_qx] & FQ_BGDMA)) + if (!(*qx & FQ_BGDMA)) vf->fifo_total -= ((cnt & b) + l) >> b; - cnt -= l; - vf->fifo_cnt = cnt; + *qx -= l << 3; // if entry has been processed... - if (cnt == 0) { + if (cnt == l) { // remove entry from FIFO - vf->fifo_queue[vf->fifo_qx] = 0; - vf->fifo_qx = (vf->fifo_qx+1) & 7, vf->fifo_ql --; - // start processing for next entry if there is one - if (vf->fifo_ql) { - b = vf->fifo_queue[vf->fifo_qx] & FQ_BYTE; - vf->fifo_cnt = (vf->fifo_queue[vf->fifo_qx] >> 3) << b; - } else { // FIFO empty - pv->status &= ~PVS_FIFORUN; - vf->fifo_total = 0; - } + *qx = 0; + vf->fifo_qx = (vf->fifo_qx+1) & 7; + vf->fifo_ql --; } return l; } -static __inline void SetFIFOState(struct VdpFIFO *vf, struct PicoVideo *pv) +static void SetFIFOState(struct VdpFIFO *vf, struct PicoVideo *pv) { u32 st = pv->status, cmd = pv->command; // release CPU and terminate DMA if FIFO isn't blocking the 68k anymore @@ -245,10 +236,10 @@ static __inline void SetFIFOState(struct VdpFIFO *vf, struct PicoVideo *pv) cmd &= ~0x80; } } - if (vf->fifo_cnt == 0) { - st &= ~PVS_CPURD; + if (vf->fifo_ql == 0) { + st &= ~(PVS_CPURD|PVS_FIFORUN); // terminate DMA if applicable - if (!(st & (PVS_FIFORUN|PVS_DMAFILL))) { + if (!(st & PVS_DMAFILL)) { st &= ~(SR_DMA|PVS_DMABG); cmd &= ~0x80; } @@ -266,10 +257,11 @@ void PicoVideoFIFOSync(int cycles) // calculate #slots since last executed slot slots = Cyc2Sl(vf, cycles) - vf->fifo_slot; + if (!slots || !vf->fifo_ql) return; // advance FIFO queue by #done slots done = slots; - while (done > 0 && vf->fifo_cnt) { + while (done > 0 && vf->fifo_ql) { int l = AdvanceFIFOEntry(vf, pv, done); vf->fifo_slot += l; done -= l; @@ -288,12 +280,15 @@ static int PicoVideoFIFODrain(int level, int cycles, int bgdma) int bd = vf->fifo_queue[vf->fifo_qx] & bgdma; int burn = 0; + if (!(vf->fifo_ql && ((vf->fifo_total > level) | bd))) return 0; + // process FIFO entries until low level is reached while (vf->fifo_slot < vf->fifo_maxslot && vf->fifo_ql && ((vf->fifo_total > level) | bd)) { int b = vf->fifo_queue[vf->fifo_qx] & FQ_BYTE; - int cnt = bd ? vf->fifo_cnt : ((vf->fifo_total-level)<fifo_cnt&b); - int slot = (vf->fifo_cntfifo_cnt:cnt) + vf->fifo_slot; + int c = vf->fifo_queue[vf->fifo_qx] >> 3; + int cnt = bd ? c : ((vf->fifo_total-level)<fifo_slot; if (slot > vf->fifo_maxslot) { // target slot in later scanline, advance to eol @@ -324,14 +319,14 @@ static int PicoVideoFIFORead(void) int lc = SekCyclesDone()-Pico.t.m68c_line_start; int burn = 0; - if (vf->fifo_cnt) { + if (vf->fifo_ql) { PicoVideoFIFOSync(lc); // advance FIFO and CPU until FIFO is empty burn = PicoVideoFIFODrain(0, lc, FQ_BGDMA); lc += burn; } - if (vf->fifo_cnt) + if (vf->fifo_ql) pv->status |= PVS_CPURD; // target slot is in later scanline else { // use next VDP access slot for reading, block 68k until then @@ -350,31 +345,30 @@ int PicoVideoFIFOWrite(int count, int flags, unsigned sr_mask,unsigned sr_flags) int lc = SekCyclesDone()-Pico.t.m68c_line_start; int burn = 0; - if (vf->fifo_cnt) + if (vf->fifo_total >= 4 || (pv->status & SR_DMA)) PicoVideoFIFOSync(lc); pv->status = (pv->status & ~sr_mask) | sr_flags; - if (count && vf->fifo_ql < 8) { + if (count && vf->fifo_ql < 7) { // determine queue position for entry int x = (vf->fifo_qx + vf->fifo_ql - 1) & 7; if (unlikely(vf->fifo_queue[x] & FQ_BGDMA)) { // CPU FIFO writes have priority over a background DMA Fill/Copy - // XXX if interrupting a DMA fill, fill data changes - if (x == vf->fifo_qx) { // overtaking to queue head? - int f = vf->fifo_queue[x] & 7; - vf->fifo_queue[x] = (vf->fifo_cnt >> (f & FQ_BYTE) << 3) | f; + vf->fifo_queue[(x+1) & 7] = vf->fifo_queue[x]; // push bg DMA back + x = (x-1) & 7; + if (vf->fifo_ql == 1) { + // XXX if interrupting a DMA fill, fill data changes pv->status &= ~PVS_FIFORUN; } - // push background DMA back - vf->fifo_queue[(x+1) & 7] = vf->fifo_queue[x]; - x = (x-1) & 7; } + if (!(flags & FQ_BGDMA)) + vf->fifo_total += count; + + count <<= (flags & FQ_BYTE); if ((pv->status & PVS_FIFORUN) && (vf->fifo_queue[x] & 7) == flags) { // amalgamate entries if of same type vf->fifo_queue[x] += (count << 3); - if (x == vf->fifo_qx) - vf->fifo_cnt += count << (flags & FQ_BYTE); } else { // create new xfer queue entry vf->fifo_ql ++; @@ -383,17 +377,13 @@ int PicoVideoFIFOWrite(int count, int flags, unsigned sr_mask,unsigned sr_flags) } // update FIFO state if it was empty - if (!(pv->status & PVS_FIFORUN)) { + if (!(pv->status & PVS_FIFORUN)) vf->fifo_slot = Cyc2Sl(vf, lc+7); // FIFO latency ~3 vdp slots - pv->status |= PVS_FIFORUN; - vf->fifo_cnt = count << (flags & FQ_BYTE); - } - if (!(flags & FQ_BGDMA)) - vf->fifo_total += count; + pv->status |= PVS_FIFORUN; } // if CPU is waiting for the bus, advance CPU and FIFO until bus is free - if (pv->status & PVS_CPUWR) + if (vf->fifo_total > 4 && (pv->status & PVS_CPUWR)) burn = PicoVideoFIFODrain(4, lc, 0); return burn; @@ -404,15 +394,17 @@ int PicoVideoFIFOHint(void) { struct VdpFIFO *vf = &VdpFIFO; struct PicoVideo *pv = &Pico.video; + int lc = SekCyclesDone()-Pico.t.m68c_line_start; int burn = 0; // reset slot to start of scanline vf->fifo_slot = 0; // if CPU is waiting for the bus, advance CPU and FIFO until bus is free - if (pv->status & PVS_CPUWR) - burn = PicoVideoFIFOWrite(0, 0, 0, 0); - else if (pv->status & PVS_CPURD) + if (pv->status & PVS_CPUWR) { + PicoVideoFIFOSync(lc); + burn = PicoVideoFIFODrain(4, lc, 0); + } else if (pv->status & PVS_CPURD) burn = PicoVideoFIFORead(); return burn; @@ -654,8 +646,8 @@ static void DmaCopy(int len) int source; elprintf(EL_VDPDMA, "DmaCopy len %i [%u]", len, SekCyclesDone()); - // XXX implement VRAM 128k? Is this even working? xfer/count still FQ_BYTE? - SekCyclesBurnRun(PicoVideoFIFOWrite(len, FQ_BGDMA | FQ_BYTE, + // XXX implement VRAM 128k? Is this even working? xfer/count still in bytes? + SekCyclesBurnRun(PicoVideoFIFOWrite(2*len, FQ_BGDMA, // 2 slots each (rd+wr) PVS_CPUWR, SR_DMA | PVS_DMABG)); source =Pico.video.reg[0x15]; @@ -686,7 +678,7 @@ static NOINLINE void DmaFill(int data) len = GetDmaLength(); elprintf(EL_VDPDMA, "DmaFill len %i inc %i [%u]", len, inc, SekCyclesDone()); - SekCyclesBurnRun(PicoVideoFIFOWrite(len, FQ_BGDMA | (Pico.video.type == 1), + SekCyclesBurnRun(PicoVideoFIFOWrite(len, FQ_BGDMA, // 1 slot each (wr) PVS_CPUWR | PVS_DMAFILL, SR_DMA | PVS_DMABG)); switch (Pico.video.type) @@ -769,7 +761,7 @@ static NOINLINE void CommandDma(void) if (pvid->status & SR_DMA) { elprintf(EL_VDPDMA, "Dma overlap, left=%d @ %06x", VdpFIFO.fifo_total, SekPc); - VdpFIFO.fifo_cnt = VdpFIFO.fifo_total = VdpFIFO.fifo_ql = 0; + VdpFIFO.fifo_total = VdpFIFO.fifo_ql = 0; pvid->status &= ~(PVS_FIFORUN|PVS_DMAFILL); } @@ -1139,17 +1131,13 @@ void PicoVideoSave(void) // account for all outstanding xfers XXX kludge, entry attr's not saved pv->fifo_cnt = pv->fifo_bgcnt = 0; - for (l = vf->fifo_ql, x = vf->fifo_qx + l-1; l > 1; l--, x--) { - int cnt = (vf->fifo_queue[x&7] >> 3) << (vf->fifo_queue[x&7] & FQ_BYTE); + for (l = vf->fifo_ql, x = vf->fifo_qx + l-1; l > 0; l--, x--) { + int cnt = (vf->fifo_queue[x&7] >> 3); if (vf->fifo_queue[x&7] & FQ_BGDMA) pv->fifo_bgcnt += cnt; else pv->fifo_cnt += cnt; } - if (vf->fifo_ql && (vf->fifo_queue[vf->fifo_qx] & FQ_BGDMA)) - pv->fifo_bgcnt += vf->fifo_cnt; - else - pv->fifo_cnt += vf->fifo_cnt; } void PicoVideoLoad(void) @@ -1165,21 +1153,18 @@ void PicoVideoLoad(void) } // fake entries in the FIFO if there are outstanding transfers - vf->fifo_ql = vf->fifo_qx = vf->fifo_cnt = vf->fifo_total = 0; + vf->fifo_ql = vf->fifo_qx = vf->fifo_total = 0; if (pv->fifo_cnt) { - int wc = (pv->fifo_cnt + b) >> b; + int wc = pv->fifo_cnt; pv->status |= PVS_FIFORUN|PVS_CPUWR; - vf->fifo_total = wc; + vf->fifo_total = (wc+b) >> b; vf->fifo_queue[vf->fifo_qx + vf->fifo_ql] = (wc << 3) | b | FQ_FGDMA; vf->fifo_ql ++; - vf->fifo_cnt = pv->fifo_cnt; } if (pv->fifo_bgcnt) { int wc = pv->fifo_bgcnt; - if (!vf->fifo_ql) { - pv->status |= PVS_DMABG; - vf->fifo_cnt = pv->fifo_bgcnt; - } + if (!vf->fifo_ql) + pv->status |= PVS_FIFORUN|PVS_DMABG; vf->fifo_queue[vf->fifo_qx + vf->fifo_ql] = (wc << 3) | FQ_BGDMA; vf->fifo_ql ++; } From 9f1d5acdb43a3805405d369c36d7e04dc03163b7 Mon Sep 17 00:00:00 2001 From: kub Date: Mon, 7 Feb 2022 21:08:34 +0000 Subject: [PATCH 0701/1110] sound, fix pcm/pwm handling wrt fast forward (mcd, 32x, pico) --- pico/cd/pcm.c | 2 +- pico/pico_cmn.c | 3 +-- pico/sms.c | 3 +-- pico/sound/sound.c | 41 +++++++++++++++++++++++++++++++---------- platform/common/emu.c | 5 +---- 5 files changed, 35 insertions(+), 19 deletions(-) diff --git a/pico/cd/pcm.c b/pico/cd/pcm.c index 9d84ee89..b38ce2d6 100644 --- a/pico/cd/pcm.c +++ b/pico/cd/pcm.c @@ -128,7 +128,7 @@ void pcd_pcm_update(s32 *buf32, int length, int stereo) pcd_pcm_sync(SekCyclesDoneS68k()); - if (!Pico_mcd->pcm_mixbuf_dirty || !(PicoIn.opt & POPT_EN_MCD_PCM)) + if (!Pico_mcd->pcm_mixbuf_dirty || !(PicoIn.opt & POPT_EN_MCD_PCM) || !buf32) goto out; step = (Pico_mcd->pcm_mixpos << 16) / length; diff --git a/pico/pico_cmn.c b/pico/pico_cmn.c index d1ebe22b..6866fdac 100644 --- a/pico/pico_cmn.c +++ b/pico/pico_cmn.c @@ -311,8 +311,7 @@ static int PicoFrameHints(void) #endif // get samples from sound chips - if (PicoIn.sndOut) - PsndGetSamples(y); + PsndGetSamples(y); timers_cycle(); diff --git a/pico/sms.c b/pico/sms.c index 2410d52e..39081363 100644 --- a/pico/sms.c +++ b/pico/sms.c @@ -755,8 +755,7 @@ void PicoFrameMS(void) z80_exec(Pico.t.z80c_line_start + cycles_line); } - if (PicoIn.sndOut) - PsndGetSamplesMS(lines); + PsndGetSamplesMS(lines); } void PicoFrameDrawOnlyMS(void) diff --git a/pico/sound/sound.c b/pico/sound/sound.c index 083c642d..a7c25dc9 100644 --- a/pico/sound/sound.c +++ b/pico/sound/sound.c @@ -130,6 +130,9 @@ PICO_INTERNAL void PsndDoDAC(int cyc_to) int pos, len; int dout = ym2612.dacout; + // nothing to do if sound is off + if (!PicoIn.sndOut) return; + // number of samples to fill in buffer (Q20) len = (cyc_to * Pico.snd.clkl_mult) - Pico.snd.dac_pos; @@ -169,6 +172,9 @@ PICO_INTERNAL void PsndDoPSG(int cyc_to) int pos, len; int stereo = 0; + // nothing to do if sound is off + if (!PicoIn.sndOut) return; + // number of samples to fill in buffer (Q20) len = (cyc_to * Pico.snd.clkl_mult) - Pico.snd.psg_pos; @@ -196,6 +202,9 @@ PICO_INTERNAL void PsndDoYM2413(int cyc_to) int stereo = 0; short *buf; + // nothing to do if sound is off + if (!PicoIn.sndOut) return; + // number of samples to fill in buffer (Q20) len = (cyc_to * Pico.snd.clkl_mult) - Pico.snd.ym2413_pos; @@ -236,6 +245,9 @@ PICO_INTERNAL void PsndDoFM(int cyc_to) int pos, len; int stereo = 0; + // nothing to do if sound is off + if (!PicoIn.sndOut) return; + // Q20, number of samples since last call len = (cyc_to * Pico.snd.clkl_mult) - Pico.snd.fm_pos; @@ -306,6 +318,11 @@ PICO_INTERNAL void PsndClear(void) { int len = Pico.snd.len; if (Pico.snd.len_e_add) len++; + + // drop pos remainder to avoid rounding errors (not entirely correct though) + Pico.snd.dac_pos = Pico.snd.fm_pos = Pico.snd.psg_pos = Pico.snd.ym2413_pos = 0; + if (!PicoIn.sndOut) return; + if (PicoIn.opt & POPT_EN_STEREO) memset32((int *) PicoIn.sndOut, 0, len); // assume PicoIn.sndOut to be aligned else { @@ -316,14 +333,12 @@ PICO_INTERNAL void PsndClear(void) } if (!(PicoIn.opt & POPT_EN_FM)) memset32(PsndBuffer, 0, PicoIn.opt & POPT_EN_STEREO ? len*2 : len); - // drop pos remainder to avoid rounding errors (not entirely correct though) - Pico.snd.dac_pos = Pico.snd.fm_pos = Pico.snd.psg_pos = Pico.snd.ym2413_pos = 0; } static int PsndRender(int offset, int length) { - int *buf32; + s32 *buf32; int stereo = (PicoIn.opt & 8) >> 3; int fmlen = ((Pico.snd.fm_pos+0x80000) >> 20); int daclen = ((Pico.snd.dac_pos+0x80000) >> 20); @@ -334,12 +349,14 @@ static int PsndRender(int offset, int length) pprof_start(sound); if (PicoIn.AHW & PAHW_PICO) { - PicoPicoPCMUpdate(PicoIn.sndOut+(offset< 0) { + if (length-daclen > 0 && PicoIn.sndOut) { short *dacbuf = PicoIn.sndOut + (daclen << stereo); Pico.snd.dac_pos += (length-daclen) << 20; *dacbuf++ += Pico.snd.dac_val2; @@ -352,7 +369,7 @@ static int PsndRender(int offset, int length) } // Add in parts of the PSG output not yet done - if (length-psglen > 0) { + if (length-psglen > 0 && PicoIn.sndOut) { short *psgbuf = PicoIn.sndOut + (psglen << stereo); Pico.snd.psg_pos += (length-psglen) << 20; if (PicoIn.opt & POPT_EN_PSG) @@ -360,7 +377,7 @@ static int PsndRender(int offset, int length) } // Add in parts of the FM buffer not yet done - if (length-fmlen > 0) { + if (length-fmlen > 0 && PicoIn.sndOut) { int *fmbuf = buf32 + ((fmlen-offset) << stereo); Pico.snd.fm_pos += (length-fmlen) << 20; if (PicoIn.opt & POPT_EN_FM) @@ -389,7 +406,8 @@ static int PsndRender(int offset, int length) p32x_pwm_update(buf32, length-offset, stereo); // convert + limit to normal 16bit output - PsndMix_32_to_16l(PicoIn.sndOut+(offset<> 20); int ym2413len = ((Pico.snd.ym2413_pos+0x80000) >> 20); + if (!PicoIn.sndOut) + return length; + pprof_start(sound); // Add in parts of the PSG output not yet done @@ -456,7 +477,7 @@ PICO_INTERNAL void PsndGetSamplesMS(int y) curr_pos = PsndRenderMS(0, Pico.snd.len_use); - if (PicoIn.writeSound != NULL) + if (PicoIn.writeSound != NULL && PicoIn.sndOut) PicoIn.writeSound(curr_pos * ((PicoIn.opt & POPT_EN_STEREO) ? 4 : 2)); PsndClear(); } diff --git a/platform/common/emu.c b/platform/common/emu.c index 2352bf53..845951ea 100644 --- a/platform/common/emu.c +++ b/platform/common/emu.c @@ -986,7 +986,7 @@ void emu_set_fastforward(int set_on) set_EmuOpt = currentConfig.EmuOpt; PicoIn.sndOut = NULL; currentConfig.Frameskip = 8; - currentConfig.EmuOpt &= ~4; + currentConfig.EmuOpt &= ~EOPT_EN_SOUND; currentConfig.EmuOpt |= EOPT_NO_FRMLIMIT; is_on = 1; emu_status_msg("FAST FORWARD"); @@ -997,9 +997,6 @@ void emu_set_fastforward(int set_on) currentConfig.EmuOpt = set_EmuOpt; PsndRerate(1); is_on = 0; - // mainly to unbreak pcm - if (PicoIn.AHW & PAHW_MCD) - pcd_state_loaded(); } } From f7741cac9198a1aa212361e59517efe11b02f27d Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 8 Feb 2022 20:49:43 +0000 Subject: [PATCH 0702/1110] sound, fix mcd cdda (mono, resampling), type cleanup, remove minimp3 --- .gitmodules | 3 -- pico/32x/pwm.c | 2 +- pico/pico.h | 4 +- pico/pico_int.h | 3 +- pico/sound/mix.c | 39 +++++++++++++--- pico/sound/mix.h | 15 +++--- pico/sound/mix_arm.S | 86 +++++++++++++++++++++++++++++++++++ pico/sound/sound.c | 53 ++++++++++----------- pico/sound/ym2612.c | 10 ++-- pico/sound/ym2612.h | 2 +- platform/common/config_file.c | 2 +- platform/common/minimp3 | 1 - platform/common/mp3.c | 32 ++++++------- platform/psp/mp3.c | 30 ++++++------ 14 files changed, 193 insertions(+), 89 deletions(-) delete mode 160000 platform/common/minimp3 diff --git a/.gitmodules b/.gitmodules index 2a521d70..66e19f15 100644 --- a/.gitmodules +++ b/.gitmodules @@ -10,9 +10,6 @@ [submodule "pico/cd/libchdr"] path = pico/cd/libchdr url = https://github.com/rtissera/libchdr.git -[submodule "platform/common/minimp3"] - path = platform/common/minimp3 - url = https://github.com/lieff/minimp3 [submodule "platform/common/dr_libs"] path = platform/common/dr_libs url = https://github.com/mackron/dr_libs diff --git a/pico/32x/pwm.c b/pico/32x/pwm.c index 8fccc98e..ed8d761e 100644 --- a/pico/32x/pwm.c +++ b/pico/32x/pwm.c @@ -266,7 +266,7 @@ void p32x_pwm_write16(u32 a, unsigned int d, SH2 *sh2, unsigned int m68k_cycles) } } -void p32x_pwm_update(int *buf32, int length, int stereo) +void p32x_pwm_update(s32 *buf32, int length, int stereo) { short *pwmb; int step; diff --git a/pico/pico.h b/pico/pico.h index 8746b9ba..807ec492 100644 --- a/pico/pico.h +++ b/pico/pico.h @@ -23,7 +23,7 @@ extern void lprintf(const char *fmt, ...); // external funcs for Sega/Mega CD extern int mp3_get_bitrate(void *f, int size); extern void mp3_start_play(void *f, int pos); -extern void mp3_update(int *buffer, int length, int stereo); +extern void mp3_update(s32 *buffer, int length, int stereo); // this function should write-back d-cache and invalidate i-cache // on a mem region [start_addr, end_addr) @@ -253,7 +253,7 @@ void Pico32xSetClocks(int msh2_hz, int ssh2_hz); #define PICO_SSH2_HZ ((int)(7670442.0 * 2.4)) // sound.c -extern void (*PsndMix_32_to_16l)(short *dest, int *src, int count); +extern void (*PsndMix_32_to_16l)(s16 *dest, s32 *src, int count); void PsndRerate(int preserve_state); // media.c diff --git a/pico/pico_int.h b/pico/pico_int.h index a14a5636..49180fb5 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -452,6 +452,7 @@ struct PicoSound int len_e_cnt; unsigned int clkl_mult; // z80 clocks per line in Q20 unsigned int smpl_mult; // samples per line in Q16 + unsigned int cdda_mult, cdda_div; // 44.1 KHz resampling factor in Q16 short dac_val, dac_val2; // last DAC sample unsigned int dac_pos; // last DAC position in Q20 unsigned int fm_pos; // last FM position in Q20 @@ -1025,7 +1026,7 @@ extern int Pico32xDrawMode; // 32x/pwm.c unsigned int p32x_pwm_read16(u32 a, SH2 *sh2, unsigned int m68k_cycles); void p32x_pwm_write16(u32 a, unsigned int d, SH2 *sh2, unsigned int m68k_cycles); -void p32x_pwm_update(int *buf32, int length, int stereo); +void p32x_pwm_update(s32 *buf32, int length, int stereo); void p32x_pwm_ctl_changed(void); void p32x_pwm_schedule(unsigned int m68k_now); void p32x_pwm_schedule_sh2(SH2 *sh2); diff --git a/pico/sound/mix.c b/pico/sound/mix.c index 51882f70..8f75ef0a 100644 --- a/pico/sound/mix.c +++ b/pico/sound/mix.c @@ -8,6 +8,7 @@ */ #include +#include "../pico_int.h" #define MAXOUT (+32767) #define MINOUT (-32768) @@ -15,7 +16,7 @@ /* limitter */ #define Limit16(val) \ val -= val >> 3; /* reduce level to avoid clipping */ \ - if ((short)val != val) val = (val < 0 ? MINOUT : MAXOUT) + if ((s16)val != val) val = (val < 0 ? MINOUT : MAXOUT) int mix_32_to_16l_level; @@ -81,17 +82,17 @@ static inline int filter_null(struct iir *fi2, int x) lfi2 = lf, rfi2 = rf; \ } -void mix_32_to_16l_stereo_lvl(short *dest, int *src, int count) +void mix_32_to_16l_stereo_lvl(s16 *dest, s32 *src, int count) { mix_32_to_16l_stereo_core(dest, src, count, mix_32_to_16l_level, filter); } -void mix_32_to_16l_stereo(short *dest, int *src, int count) +void mix_32_to_16l_stereo(s16 *dest, s32 *src, int count) { mix_32_to_16l_stereo_core(dest, src, count, 0, filter); } -void mix_32_to_16_mono(short *dest, int *src, int count) +void mix_32_to_16_mono(s16 *dest, s32 *src, int count) { int l; struct iir lf = lfi2; @@ -108,7 +109,7 @@ void mix_32_to_16_mono(short *dest, int *src, int count) } -void mix_16h_to_32(int *dest_buf, short *mp3_buf, int count) +void mix_16h_to_32(s32 *dest_buf, s16 *mp3_buf, int count) { while (count--) { @@ -116,7 +117,7 @@ void mix_16h_to_32(int *dest_buf, short *mp3_buf, int count) } } -void mix_16h_to_32_s1(int *dest_buf, short *mp3_buf, int count) +void mix_16h_to_32_s1(s32 *dest_buf, s16 *mp3_buf, int count) { count >>= 1; while (count--) @@ -127,7 +128,7 @@ void mix_16h_to_32_s1(int *dest_buf, short *mp3_buf, int count) } } -void mix_16h_to_32_s2(int *dest_buf, short *mp3_buf, int count) +void mix_16h_to_32_s2(s32 *dest_buf, s16 *mp3_buf, int count) { count >>= 1; while (count--) @@ -138,6 +139,30 @@ void mix_16h_to_32_s2(int *dest_buf, short *mp3_buf, int count) } } +// mixes cdda audio @44.1 KHz into dest_buf, resampling with nearest neighbour +void mix_16h_to_32_resample_stereo(s32 *dest_buf, s16 *cdda_buf, int count, int fac16) +{ + int pos16 = 0; + while (count--) { + int pos = 2 * (pos16>>16); + *dest_buf++ += cdda_buf[pos ] >> 1; + *dest_buf++ += cdda_buf[pos+1] >> 1; + pos16 += fac16; + } +} + +// mixes cdda audio @44.1 KHz into dest_buf, resampling with nearest neighbour +void mix_16h_to_32_resample_mono(s32 *dest_buf, s16 *cdda_buf, int count, int fac16) +{ + int pos16 = 0; + while (count--) { + int pos = 2 * (pos16>>16); + *dest_buf += cdda_buf[pos ] >> 2; + *dest_buf++ += cdda_buf[pos+1] >> 2; + pos16 += fac16; + } +} + void mix_reset(int alpha_q16) { memset(&lfi2, 0, sizeof(lfi2)); diff --git a/pico/sound/mix.h b/pico/sound/mix.h index a0dfcac7..3aa3ee87 100644 --- a/pico/sound/mix.h +++ b/pico/sound/mix.h @@ -1,11 +1,14 @@ //void mix_32_to_32(int *dest, int *src, int count); -void mix_16h_to_32(int *dest, short *src, int count); -void mix_16h_to_32_s1(int *dest, short *src, int count); -void mix_16h_to_32_s2(int *dest, short *src, int count); -void mix_32_to_16l_stereo(short *dest, int *src, int count); -void mix_32_to_16_mono(short *dest, int *src, int count); +void mix_16h_to_32(s32 *dest, s16 *src, int count); +void mix_16h_to_32_s1(s32 *dest, s16 *src, int count); +void mix_16h_to_32_s2(s32 *dest, s16 *src, int count); + +void mix_16h_to_32_resample_stereo(s32 *dest, s16 *src, int count, int fac16); +void mix_16h_to_32_resample_mono(s32 *dest, s16 *src, int count, int fac16); +void mix_32_to_16l_stereo(s16 *dest, s32 *src, int count); +void mix_32_to_16_mono(s16 *dest, s32 *src, int count); extern int mix_32_to_16l_level; -void mix_32_to_16l_stereo_lvl(short *dest, int *src, int count); +void mix_32_to_16l_stereo_lvl(s16 *dest, s32 *src, int count); void mix_reset(int alpha_q16); diff --git a/pico/sound/mix_arm.S b/pico/sound/mix_arm.S index 20a19118..bc8f7b3f 100644 --- a/pico/sound/mix_arm.S +++ b/pico/sound/mix_arm.S @@ -153,6 +153,92 @@ m16_32_s2_no_unal2: +.global mix_16h_to_32_resample_stereo @ int *dest, short *src, int count, int fac16 + +mix_16h_to_32_resample_stereo: + stmfd sp!, {r4-r9,lr} + + subs r2, r2, #2 + mov r4, #0 + bmi m16_32_rss_end + +m16_32_rss_loop: + ldmia r0, {r5-r8} + lsr r9, r4, #16 + ldr r12,[r1, r9, lsl #2] + add r4, r4, r3 + lsr r9, r4, #16 + ldr lr ,[r1, r9, lsl #2] + add r4, r4, r3 + subs r2, r2, #2 + add r6, r6, r12,asr #17 + mov r12,r12,lsl #16 + add r5, r5, r12,asr #17 @ we use half volume + add r8, r8, lr, asr #17 + mov lr, lr, lsl #16 + add r7, r7, lr, asr #17 + stmia r0!,{r5-r8} + bpl m16_32_rss_loop + +m16_32_rss_end: + tst r2, #1 + ldmeqfd sp!, {r4-r9,pc} + lsr r9, r4, #16 + ldr lr ,[r1, r9, lsl #2] + ldmia r0, {r5,r6} + mov r12,lr, lsl #16 + add r5, r5, r12,asr #17 + add r6, r6, lr, asr #17 + stmia r0!,{r5,r6} + + ldmfd sp!, {r4-r9,lr} + bx lr + + + +.global mix_16h_to_32_resample_mono @ int *dest, short *src, int count, int fac16 + +mix_16h_to_32_resample_mono: + stmfd sp!, {r4-r6,r9,lr} + + subs r2, r2, #2 + mov r4, #0 + bmi m16_32_rsm_end + +m16_32_rsm_loop: + ldmia r0, {r5-r6} + lsr r9, r4, #16 + ldr r12,[r1, r9, lsl #2] + add r4, r4, r3 + lsr r9, r4, #16 + ldr lr ,[r1, r9, lsl #2] + add r4, r4, r3 + subs r2, r2, #2 + add r5, r5, r12,asr #18 + mov r12,r12,lsl #16 + add r5, r5, r12,asr #18 @ we use half volume (= quarter vol per channel) + add r6, r6, lr, asr #18 + mov lr, lr, lsl #16 + add r6, r6, lr, asr #18 + stmia r0!,{r5-r6} + bpl m16_32_rsm_loop + +m16_32_rsm_end: + tst r2, #1 + ldmeqfd sp!, {r4-r6,r9,pc} + lsr r9, r4, #16 + ldr lr ,[r1, r9, lsl #2] + ldr r5, [r0] + mov r12,lr, lsl #16 + add r5, r5, r12,asr #18 + add r5, r5, lr, asr #18 + str r5, [r0] + + ldmfd sp!, {r4-r6,r9,lr} + bx lr + + + @ limit @ reg=int_sample, r12=1, r8=tmp, kills flags .macro Limit reg diff --git a/pico/sound/sound.c b/pico/sound/sound.c index a7c25dc9..06d3625a 100644 --- a/pico/sound/sound.c +++ b/pico/sound/sound.c @@ -14,7 +14,7 @@ #include "mix.h" #include "emu2413/emu2413.h" -void (*PsndMix_32_to_16l)(short *dest, int *src, int count) = mix_32_to_16l_stereo; +void (*PsndMix_32_to_16l)(s16 *dest, s32 *src, int count) = mix_32_to_16l_stereo; // master int buffer to mix to // +1 for a fill triggered by an instruction overhanging into the next scanline @@ -98,6 +98,9 @@ void PsndRerate(int preserve_state) Pico.snd.smpl_mult = 65536LL * PicoIn.sndRate / (target_fps*target_lines); // samples per z80 clock (Q20) Pico.snd.clkl_mult = 16 * Pico.snd.smpl_mult * 15/7 / 488; + // samples per 44.1 KHz sample + Pico.snd.cdda_mult = 65536LL * 44100 / PicoIn.sndRate; + Pico.snd.cdda_div = 65536LL * PicoIn.sndRate / 44100; // clear all buffers memset32(PsndBuffer, 0, sizeof(PsndBuffer)/4); @@ -154,12 +157,12 @@ PICO_INTERNAL void PsndDoDAC(int cyc_to) // y[n] = (x[n] + x[n-1])*(1/2) (3dB cutoff at 11025 Hz, no gain) // 1 sample delay for correct IIR filtering over audio frame boundaries if (PicoIn.opt & POPT_EN_STEREO) { - short *d = PicoIn.sndOut + pos*2; + s16 *d = PicoIn.sndOut + pos*2; // left channel only, mixed ro right channel in mixing phase *d++ += Pico.snd.dac_val2; d++; while (--len) *d++ += Pico.snd.dac_val, d++; } else { - short *d = PicoIn.sndOut + pos; + s16 *d = PicoIn.sndOut + pos; *d++ += Pico.snd.dac_val2; while (--len) *d++ += Pico.snd.dac_val; } @@ -200,7 +203,7 @@ PICO_INTERNAL void PsndDoYM2413(int cyc_to) { int pos, len; int stereo = 0; - short *buf; + s16 *buf; // nothing to do if sound is off if (!PicoIn.sndOut) return; @@ -268,14 +271,11 @@ PICO_INTERNAL void PsndDoFM(int cyc_to) } // cdda -static void cdda_raw_update(int *buffer, int length) +static void cdda_raw_update(s32 *buffer, int length, int stereo) { - int ret, cdda_bytes, mult = 1; + int ret, cdda_bytes; - cdda_bytes = length*4; - if (PicoIn.sndRate <= 22050 + 100) mult = 2; - if (PicoIn.sndRate < 22050 - 100) mult = 4; - cdda_bytes *= mult; + cdda_bytes = (length * Pico.snd.cdda_mult >> 16) * 4; ret = pm_read_audio(cdda_out_buffer, cdda_bytes, Pico_mcd->cdda_stream); if (ret < cdda_bytes) { @@ -285,11 +285,13 @@ static void cdda_raw_update(int *buffer, int length) } // now mix - switch (mult) { - case 1: mix_16h_to_32(buffer, cdda_out_buffer, length*2); break; - case 2: mix_16h_to_32_s1(buffer, cdda_out_buffer, length*2); break; - case 4: mix_16h_to_32_s2(buffer, cdda_out_buffer, length*2); break; - } + if (stereo) switch (Pico.snd.cdda_mult) { + case 0x10000: mix_16h_to_32(buffer, cdda_out_buffer, length*2); break; + case 0x20000: mix_16h_to_32_s1(buffer, cdda_out_buffer, length*2); break; + case 0x40000: mix_16h_to_32_s2(buffer, cdda_out_buffer, length*2); break; + default: mix_16h_to_32_resample_stereo(buffer, cdda_out_buffer, length, Pico.snd.cdda_mult); + } else + mix_16h_to_32_resample_mono(buffer, cdda_out_buffer, length, Pico.snd.cdda_mult); } void cdda_start_play(int lba_base, int lba_offset, int lb_len) @@ -326,7 +328,7 @@ PICO_INTERNAL void PsndClear(void) if (PicoIn.opt & POPT_EN_STEREO) memset32((int *) PicoIn.sndOut, 0, len); // assume PicoIn.sndOut to be aligned else { - short *out = PicoIn.sndOut; + s16 *out = PicoIn.sndOut; if ((uintptr_t)out & 2) { *out++ = 0; len--; } memset32((int *) out, 0, len/2); if (len & 1) out[len-1] = 0; @@ -350,14 +352,14 @@ static int PsndRender(int offset, int length) if (PicoIn.AHW & PAHW_PICO) { // XXX ugly hack, need to render sound for interrupts - s16 *buf16 = PicoIn.sndOut ? PicoIn.sndOut : (short *)PsndBuffer; + s16 *buf16 = PicoIn.sndOut ? PicoIn.sndOut : (s16 *)PsndBuffer; PicoPicoPCMUpdate(buf16+(offset< 0 && PicoIn.sndOut) { - short *dacbuf = PicoIn.sndOut + (daclen << stereo); + s16 *dacbuf = PicoIn.sndOut + (daclen << stereo); Pico.snd.dac_pos += (length-daclen) << 20; *dacbuf++ += Pico.snd.dac_val2; if (stereo) dacbuf++; @@ -370,7 +372,7 @@ static int PsndRender(int offset, int length) // Add in parts of the PSG output not yet done if (length-psglen > 0 && PicoIn.sndOut) { - short *psgbuf = PicoIn.sndOut + (psglen << stereo); + s16 *psgbuf = PicoIn.sndOut + (psglen << stereo); Pico.snd.psg_pos += (length-psglen) << 20; if (PicoIn.opt & POPT_EN_PSG) SN76496Update(psgbuf, length-psglen, stereo); @@ -378,7 +380,7 @@ static int PsndRender(int offset, int length) // Add in parts of the FM buffer not yet done if (length-fmlen > 0 && PicoIn.sndOut) { - int *fmbuf = buf32 + ((fmlen-offset) << stereo); + s32 *fmbuf = buf32 + ((fmlen-offset) << stereo); Pico.snd.fm_pos += (length-fmlen) << 20; if (PicoIn.opt & POPT_EN_FM) YM2612UpdateOne(fmbuf, length-fmlen, stereo, 1); @@ -395,11 +397,10 @@ static int PsndRender(int offset, int length) && Pico_mcd->cdda_stream != NULL && !(Pico_mcd->s68k_regs[0x36] & 1)) { - // note: only 44, 22 and 11 kHz supported, with forced stereo if (Pico_mcd->cdda_type == CT_MP3) mp3_update(buf32, length-offset, stereo); else - cdda_raw_update(buf32, length-offset); + cdda_raw_update(buf32, length-offset, stereo); } if ((PicoIn.AHW & PAHW_32X) && (PicoIn.opt & POPT_EN_PWM)) @@ -439,14 +440,14 @@ static int PsndRenderMS(int offset, int length) // Add in parts of the PSG output not yet done if (length-psglen > 0) { - short *psgbuf = PicoIn.sndOut + (psglen << stereo); + s16 *psgbuf = PicoIn.sndOut + (psglen << stereo); Pico.snd.psg_pos += (length-psglen) << 20; if (PicoIn.opt & POPT_EN_PSG) SN76496Update(psgbuf, length-psglen, stereo); } if (length-ym2413len > 0) { - short *ym2413buf = PicoIn.sndOut + (ym2413len << stereo); + s16 *ym2413buf = PicoIn.sndOut + (ym2413len << stereo); Pico.snd.ym2413_pos += (length-ym2413len) << 20; int len = (length-ym2413len); if (PicoIn.opt & POPT_EN_YM2413){ @@ -461,8 +462,8 @@ static int PsndRenderMS(int offset, int length) // upmix to "stereo" if needed if (PicoIn.opt & POPT_EN_STEREO) { int i; - short *p; - for (i = length, p = (short *)PicoIn.sndOut; i > 0; i--, p+=2) + s16 *p; + for (i = length, p = (s16 *)PicoIn.sndOut; i > 0; i--, p+=2) *(p + 1) = *p; } diff --git a/pico/sound/ym2612.c b/pico/sound/ym2612.c index 418c1b36..95dca6a9 100644 --- a/pico/sound/ym2612.c +++ b/pico/sound/ym2612.c @@ -1127,7 +1127,7 @@ static int update_algo_channel(chan_rend_context *ct, unsigned int eg_out, unsig return smp; } -static void chan_render_loop(chan_rend_context *ct, int *buffer, int length) +static void chan_render_loop(chan_rend_context *ct, s32 *buffer, int length) { int scounter; /* sample counter */ @@ -1244,7 +1244,7 @@ disabled: } } #else -void chan_render_loop(chan_rend_context *ct, int *buffer, unsigned short length); +void chan_render_loop(chan_rend_context *ct, s32 *buffer, unsigned short length); #endif static chan_rend_context crct; @@ -1255,7 +1255,7 @@ static void chan_render_prep(void) crct.lfo_inc = ym2612.OPN.lfo_inc; } -static void chan_render_finish(int *buffer, unsigned short length, int active_chans) +static void chan_render_finish(s32 *buffer, unsigned short length, int active_chans) { ym2612.OPN.eg_cnt = crct.eg_cnt; ym2612.OPN.eg_timer = crct.eg_timer; @@ -1290,7 +1290,7 @@ static UINT32 update_lfo_phase(FM_SLOT *SLOT, UINT32 block_fnum) return SLOT->Incr; } -static int chan_render(int *buffer, int length, int c, UINT32 flags) // flags: stereo, ?, disabled, ?, pan_r, pan_l +static int chan_render(s32 *buffer, int length, int c, UINT32 flags) // flags: stereo, ?, disabled, ?, pan_r, pan_l { crct.CH = &ym2612.CH[c]; crct.mem = crct.CH->mem_value; /* one sample delay memory */ @@ -1781,7 +1781,7 @@ static int OPNWriteReg(int r, int v) /*******************************************************************************/ /* Generate samples for YM2612 */ -int YM2612UpdateOne_(int *buffer, int length, int stereo, int is_buf_empty) +int YM2612UpdateOne_(s32 *buffer, int length, int stereo, int is_buf_empty) { int pan; int active_chs = 0; diff --git a/pico/sound/ym2612.h b/pico/sound/ym2612.h index 8c7e8017..04079899 100644 --- a/pico/sound/ym2612.h +++ b/pico/sound/ym2612.h @@ -167,7 +167,7 @@ extern YM2612 ym2612; void YM2612Init_(int baseclock, int rate, int flags); void YM2612ResetChip_(void); -int YM2612UpdateOne_(int *buffer, int length, int stereo, int is_buf_empty); +int YM2612UpdateOne_(s32 *buffer, int length, int stereo, int is_buf_empty); int YM2612Write_(unsigned int a, unsigned int v); //unsigned char YM2612Read_(void); diff --git a/platform/common/config_file.c b/platform/common/config_file.c index 6f72f75c..e15d9180 100644 --- a/platform/common/config_file.c +++ b/platform/common/config_file.c @@ -24,7 +24,7 @@ static char *mystrip(char *str); #include "menu_pico.h" #include "emu.h" -#include +#include // always output DOS endlines #ifdef _WIN32 diff --git a/platform/common/minimp3 b/platform/common/minimp3 deleted file mode 160000 index 95864e8e..00000000 --- a/platform/common/minimp3 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 95864e8e0d3b34402a49ae9af6c66f7e98c13c35 diff --git a/platform/common/mp3.c b/platform/common/mp3.c index 346e0195..7c50652e 100644 --- a/platform/common/mp3.c +++ b/platform/common/mp3.c @@ -128,10 +128,10 @@ void mp3_start_play(void *f_, int pos1024) mp3dec_decode(mp3_current_file, &mp3_file_pos, mp3_file_len); } -void mp3_update(int *buffer, int length, int stereo) +void mp3_update(s32 *buffer, int length, int stereo) { - int length_mp3, shr = 0; - void (*mix_samples)(int *dest_buf, short *mp3_buf, int count) = mix_16h_to_32; + int length_mp3; + void (*mix_samples)(int *dest_buf, short *mp3_buf, int count, int fac16) = mix_16h_to_32_resample_stereo; if (mp3_current_file == NULL || mp3_file_pos >= mp3_file_len) return; /* no file / EOF */ @@ -139,35 +139,29 @@ void mp3_update(int *buffer, int length, int stereo) if (!decoder_active) return; - length_mp3 = length; - if (PicoIn.sndRate <= 11025 + 100) { - mix_samples = mix_16h_to_32_s2; - length_mp3 <<= 2; shr = 2; - } - else if (PicoIn.sndRate <= 22050 + 100) { - mix_samples = mix_16h_to_32_s1; - length_mp3 <<= 1; shr = 1; - } + length_mp3 = length * Pico.snd.cdda_mult >> 16; + if (!stereo) + mix_samples = mix_16h_to_32_resample_mono; if (1152 - cdda_out_pos >= length_mp3) { mix_samples(buffer, cdda_out_buffer + cdda_out_pos * 2, - length * 2); + length, Pico.snd.cdda_mult); cdda_out_pos += length_mp3; } else { - int ret, left = 1152 - cdda_out_pos; + int left = (1152 - cdda_out_pos) * Pico.snd.cdda_div >> 16; + int ret, sm = stereo ? 2 : 1; if (left > 0) mix_samples(buffer, cdda_out_buffer + cdda_out_pos * 2, - (left >> shr) * 2); + left, Pico.snd.cdda_mult); ret = mp3dec_decode(mp3_current_file, &mp3_file_pos, mp3_file_len); if (ret == 0) { - cdda_out_pos = length_mp3 - left; - mix_samples(buffer + (left >> shr) * 2, - cdda_out_buffer, - (cdda_out_pos >> shr) * 2); + mix_samples(buffer + left * sm, cdda_out_buffer, + length-left, Pico.snd.cdda_mult); + cdda_out_pos = (length-left) * Pico.snd.cdda_mult >> 16; } else cdda_out_pos = 0; } diff --git a/platform/psp/mp3.c b/platform/psp/mp3.c index dc948ed6..2d37a9b8 100644 --- a/platform/psp/mp3.c +++ b/platform/psp/mp3.c @@ -386,16 +386,14 @@ void mp3_start_play(void *f, int pos) } -void mp3_update(int *buffer, int length, int stereo) +void mp3_update(s32 *buffer, int length, int stereo) { int length_mp3; // playback was started, track not ended if (mp3_handle < 0 || mp3_src_pos >= mp3_src_size) return; - length_mp3 = length; - if (PicoIn.sndRate == 22050) length_mp3 <<= 1; // mp3s are locked to 44100Hz stereo - else if (PicoIn.sndRate == 11025) length_mp3 <<= 2; // so make length 44100ish + length_mp3 = length * Pico.snd.cdda_mult >> 16; /* do we have to wait? */ if (mp3_job_started && mp3_samples_ready < length_mp3) @@ -409,30 +407,30 @@ void mp3_update(int *buffer, int length, int stereo) /* mix mp3 data, only stereo */ if (mp3_samples_ready >= length_mp3) { - int shr = 0; - void (*mix_samples)(int *dest_buf, short *mp3_buf, int count) = mix_16h_to_32; - if (PicoIn.sndRate == 22050) { mix_samples = mix_16h_to_32_s1; shr = 1; } - else if (PicoIn.sndRate == 11025) { mix_samples = mix_16h_to_32_s2; shr = 2; } + void (*mix_samples)(s32 *dest_buf, s16 *mp3_buf, int count, int fac16) = mix_16h_to_32_resample_stereo; + if (!stereo) + mix_samples = mix_16h_to_32_resample_mono; if (1152 - mp3_buffer_offs >= length_mp3) { - mix_samples(buffer, mp3_mix_buffer[mp3_play_bufsel] + mp3_buffer_offs*2, length<<1); + mix_samples(buffer, mp3_mix_buffer[mp3_play_bufsel] + mp3_buffer_offs*2, length, Pico.snd.cdda_mult); mp3_buffer_offs += length_mp3; } else { // collect samples from both buffers.. - int left = 1152 - mp3_buffer_offs; + int left = (1152 - mp3_buffer_offs) * Pico.snd.cdda_div >> 16; + int sm = stereo ? 2 : 1; + if (mp3_play_bufsel == 0) { - mix_samples(buffer, mp3_mix_buffer[0] + mp3_buffer_offs*2, length<<1); - mp3_buffer_offs = length_mp3 - left; + mix_samples(buffer, mp3_mix_buffer[0] + mp3_buffer_offs*2, length, Pico.snd.cdda_mult); mp3_play_bufsel = 1; } else { - mix_samples(buffer, mp3_mix_buffer[1] + mp3_buffer_offs*2, (left>>shr)<<1); - mp3_buffer_offs = length_mp3 - left; - mix_samples(buffer + ((left>>shr)<<1), - mp3_mix_buffer[0], (mp3_buffer_offs>>shr)<<1); + mix_samples(buffer, mp3_mix_buffer[1] + mp3_buffer_offs*2, left, Pico.snd.cdda_mult); + mix_samples(buffer + left * sm, + mp3_mix_buffer[0], (length-left), Pico.snd.cdda_mult); mp3_play_bufsel = 0; } + mp3_buffer_offs = (length-left) * Pico.snd.cdda_mult >> 16; } mp3_samples_ready -= length_mp3; } From 0004aa7cb0acd82ffe703be91a6a6bdd5db32f7c Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 8 Feb 2022 20:56:01 +0000 Subject: [PATCH 0703/1110] build, add CFLAGS to linking for -flto --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3821ad85..94446a95 100644 --- a/Makefile +++ b/Makefile @@ -314,7 +314,7 @@ $(TARGET): $(OBJS) ifeq ($(STATIC_LINKING), 1) $(AR) rcs $@ $^ else - $(LD) $(LINKOUT)$@ $^ $(LDFLAGS) $(LDLIBS) + $(LD) $(LINKOUT)$@ $^ $(CFLAGS) $(LDFLAGS) $(LDLIBS) endif ifeq "$(PLATFORM)" "psp" From f8395445e35ccb56c580a79975e7ca5c21f17bd2 Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 8 Feb 2022 21:15:08 +0000 Subject: [PATCH 0704/1110] submodule update --- pico/cd/libchdr | 2 +- pico/sound/emu2413 | 2 +- platform/common/dr_libs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pico/cd/libchdr b/pico/cd/libchdr index 00319cf3..e8ec3507 160000 --- a/pico/cd/libchdr +++ b/pico/cd/libchdr @@ -1 +1 @@ -Subproject commit 00319cf31f034e4d468a49a60265c7c5b8305b70 +Subproject commit e8ec3507e876b4a5b71af0dce705e13732d843c4 diff --git a/pico/sound/emu2413 b/pico/sound/emu2413 index 9f1dcf84..4062e93e 160000 --- a/pico/sound/emu2413 +++ b/pico/sound/emu2413 @@ -1 +1 @@ -Subproject commit 9f1dcf848d0e33e775e49352f7bc83a9c0e87a81 +Subproject commit 4062e93e9316ee5c488167ade5a83753e5f07ae3 diff --git a/platform/common/dr_libs b/platform/common/dr_libs index 343aa923..1e42667b 160000 --- a/platform/common/dr_libs +++ b/platform/common/dr_libs @@ -1 +1 @@ -Subproject commit 343aa923439e59e7a9f7726f70edc77a4500bdec +Subproject commit 1e42667b0912437d1c101a59ee9d0225ba0dffb5 From 4496577e405d4efd9aabc17a0a3f67ff1c781683 Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 8 Feb 2022 22:05:00 +0000 Subject: [PATCH 0705/1110] core gfx, fix sprite problem in savestate load screen --- pico/pico_int.h | 2 +- pico/state.c | 9 +++++++-- pico/videoport.c | 6 +++--- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/pico/pico_int.h b/pico/pico_int.h index 49180fb5..550cb5b5 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -905,7 +905,7 @@ int PicoVideoFIFOWrite(int count, int byte_p, unsigned sr_mask, unsigned sr_flag void PicoVideoInit(void); void PicoVideoSave(void); void PicoVideoLoad(void); -void PicoVideoCacheSAT(void); +void PicoVideoCacheSAT(int load); // misc.c PICO_INTERNAL_ASM void memcpy16bswap(unsigned short *dest, void *src, int count); diff --git a/pico/state.c b/pico/state.c index 123b1acd..cd6d4de2 100644 --- a/pico/state.c +++ b/pico/state.c @@ -602,7 +602,7 @@ static int state_load_gfx(void *file) char buff[8]; if (PicoIn.AHW & PAHW_32X) - to_find += 2; + to_find += 3; g_read_offs = 0; CHECKED_READ(8, buff); @@ -629,16 +629,19 @@ static int state_load_gfx(void *file) case CHUNK_DRAM: if (Pico32xMem != NULL) CHECKED_READ_BUFF(Pico32xMem->dram); + found++; break; case CHUNK_32XPAL: if (Pico32xMem != NULL) CHECKED_READ_BUFF(Pico32xMem->pal); + found++; Pico32x.dirty_pal = 1; break; case CHUNK_32XSYS: CHECKED_READ_BUFF(Pico32x); + found++; break; #endif default: @@ -712,7 +715,8 @@ int PicoStateLoadGfx(const char *fname) } areaClose(afile); - PicoVideoCacheSAT(); + PicoVideoCacheSAT(1); + Pico.est.rendstatus = -1; return 0; } @@ -771,6 +775,7 @@ void PicoTmpStateRestore(void *data) memcpy(VdpSATCache, t->satcache, sizeof(VdpSATCache)); memcpy(&Pico.video, &t->video, sizeof(Pico.video)); Pico.m.dirtyPal = 1; + PicoVideoCacheSAT(0); #ifndef NO_32X if (PicoIn.AHW & PAHW_32X) { diff --git a/pico/videoport.c b/pico/videoport.c index 7f6ca9a7..4f9cbf10 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -1104,7 +1104,7 @@ unsigned char PicoVideoRead8HV_L(int is_from_z80) return d; } -void PicoVideoCacheSAT(void) +void PicoVideoCacheSAT(int load) { struct PicoVideo *pv = &Pico.video; int l; @@ -1115,7 +1115,7 @@ void PicoVideoCacheSAT(void) SATaddr &= ~0x200, SATmask &= ~0x200; // H40, zero lowest SAT bit // rebuild SAT cache XXX wrong since cache and memory can differ - for (l = 0; l < 80; l++) { + for (l = 0; load && l < 80; l++) { ((u16 *)VdpSATCache)[l*2 ] = PicoMem.vram[(SATaddr>>1) + l*4 ]; ((u16 *)VdpSATCache)[l*2 + 1] = PicoMem.vram[(SATaddr>>1) + l*4 + 1]; } @@ -1170,6 +1170,6 @@ void PicoVideoLoad(void) } if (vf->fifo_ql) pv->status |= SR_DMA; - PicoVideoCacheSAT(); + PicoVideoCacheSAT(1); } // vim:shiftwidth=2:ts=2:expandtab From 8717984c125f9dc77bee7177d99f3dd45a72fbdb Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 8 Feb 2022 23:49:23 +0000 Subject: [PATCH 0706/1110] update libchdr (libretro VFS fix) --- .gitmodules | 4 ++-- pico/cd/libchdr | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitmodules b/.gitmodules index 66e19f15..e0e639b8 100644 --- a/.gitmodules +++ b/.gitmodules @@ -9,7 +9,7 @@ url = https://github.com/digital-sound-antiques/emu2413.git [submodule "pico/cd/libchdr"] path = pico/cd/libchdr - url = https://github.com/rtissera/libchdr.git + url = https://github.com/irixxxx/libchdr-picodrive.git [submodule "platform/common/dr_libs"] path = platform/common/dr_libs - url = https://github.com/mackron/dr_libs + url = https://github.com/mackron/dr_libs.git diff --git a/pico/cd/libchdr b/pico/cd/libchdr index e8ec3507..470c476a 160000 --- a/pico/cd/libchdr +++ b/pico/cd/libchdr @@ -1 +1 @@ -Subproject commit e8ec3507e876b4a5b71af0dce705e13732d843c4 +Subproject commit 470c476a9728788a87af20526f5d84e5226b8bd2 From b4e7cd1b51aa08b9f832d46c1f98527810075c76 Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 9 Feb 2022 18:42:04 +0000 Subject: [PATCH 0707/1110] libretro, update libretro-common --- Makefile | 3 + Makefile.libretro | 16 - .../compat/compat_posix_string.c | 2 +- .../compat/compat_strcasestr.c | 58 ++ .../libretro-common/compat/compat_strl.c | 2 +- .../libretro-common/compat/fopen_utf8.c | 13 +- .../libretro-common/encodings/encoding_utf.c | 146 ++--- .../libretro-common/include/boolean.h | 2 +- .../include/compat/apple_compat.h | 2 +- .../libretro-common/include/compat/fnmatch.h | 2 +- .../include/compat/fopen_utf8.h | 2 +- .../libretro-common/include/compat/getopt.h | 2 +- .../include/compat/intrinsics.h | 42 +- .../libretro-common/include/compat/msvc.h | 21 +- .../include/compat/posix_string.h | 6 +- .../include/compat/strcasestr.h | 6 +- .../libretro-common/include/compat/strl.h | 2 +- .../libretro-common/include/encodings/utf.h | 4 +- .../libretro-common/include/file/file_path.h | 47 +- .../libretro-common/include/libretro.h | 7 + .../libretro/libretro-common/include/memmap.h | 4 +- .../libretro-common/include/retro_common.h | 2 +- .../include/retro_common_api.h | 2 +- .../libretro-common/include/retro_dirent.h | 3 +- .../include/retro_endianness.h | 521 ++++++++++++++---- .../include/retro_environment.h | 2 +- .../libretro-common/include/retro_inline.h | 2 +- .../include/retro_miscellaneous.h | 53 +- .../include/streams/file_stream.h | 4 +- .../include/streams/file_stream_transforms.h | 2 +- .../include/string/stdstring.h | 170 +++++- .../libretro-common/include/vfs/vfs.h | 28 +- .../include/vfs/vfs_implementation.h | 8 +- .../libretro/libretro-common/memmap/memmap.c | 163 ++++++ .../libretro-common/streams/file_stream.c | 230 +++++--- .../streams/file_stream_transforms.c | 46 +- .../libretro-common/string/stdstring.c | 398 ++++++++++--- .../libretro-common/vfs/vfs_implementation.c | 456 ++++++++------- platform/libretro/libretro.c | 26 - 39 files changed, 1767 insertions(+), 738 deletions(-) create mode 100644 platform/libretro/libretro-common/compat/compat_strcasestr.c create mode 100644 platform/libretro/libretro-common/memmap/memmap.c diff --git a/Makefile b/Makefile index 94446a95..ce1938e4 100644 --- a/Makefile +++ b/Makefile @@ -196,10 +196,13 @@ endif ifeq "$(PLATFORM)" "libretro" OBJS += platform/libretro/libretro.o ifeq "$(USE_LIBRETRO_VFS)" "1" +OBJS += platform/libretro/libretro-common/compat/compat_strcasestr.o OBJS += platform/libretro/libretro-common/compat/compat_posix_string.o OBJS += platform/libretro/libretro-common/compat/compat_strl.o OBJS += platform/libretro/libretro-common/compat/fopen_utf8.o +OBJS += platform/libretro/libretro-common/memmap/memmap.o OBJS += platform/libretro/libretro-common/encodings/encoding_utf.o +OBJS += platform/libretro/libretro-common/string/stdstring.o OBJS += platform/libretro/libretro-common/streams/file_stream.o OBJS += platform/libretro/libretro-common/streams/file_stream_transforms.o OBJS += platform/libretro/libretro-common/vfs/vfs_implementation.o diff --git a/Makefile.libretro b/Makefile.libretro index 3c7c67a2..cf9ac1d1 100644 --- a/Makefile.libretro +++ b/Makefile.libretro @@ -160,7 +160,6 @@ else ifeq ($(platform), ps3) AR = $(CELL_SDK)/host-win32/ppu/bin/ppu-lv2-ar.exe CFLAGS += -DBLARGG_BIG_ENDIAN=1 -D__ppc__ -DFAMEC_NO_GOTOS STATIC_LINKING = 1 - NO_MMAP = 1 # PS3 has memory mapped in a way not suitable for DRC use_sh2drc = 0 use_svpdrc = 0 @@ -173,7 +172,6 @@ else ifeq ($(platform), sncps3) AR = $(CELL_SDK)/host-win32/sn/bin/ps3snarl.exe CFLAGS += -DBLARGG_BIG_ENDIAN=1 -D__ppc__ -DFAMEC_NO_GOTOS STATIC_LINKING = 1 - NO_MMAP = 1 # PS3 has memory mapped in a way not suitable for DRC use_sh2drc = 0 use_svpdrc = 0 @@ -185,7 +183,6 @@ else ifeq ($(platform), psl1ght) AR = $(PS3DEV)/ppu/bin/ppu-ar$(EXE_EXT) CFLAGS += -DBLARGG_BIG_ENDIAN=1 -D__ppc__ -DFAMEC_NO_GOTOS STATIC_LINKING = 1 - NO_MMAP = 1 # PS3 has memory mapped in a way not suitable for DRC use_sh2drc = 0 use_svpdrc = 0 @@ -199,7 +196,6 @@ else ifeq ($(platform), psp1) CFLAGS += -G0 -ftracer CFLAGS += -DPSP STATIC_LINKING = 1 - NO_MMAP = 1 # PS2 else ifeq ($(platform), ps2) @@ -211,7 +207,6 @@ else ifeq ($(platform), ps2) CFLAGS += -I$(PS2DEV)/gsKit/include -I$(PS2SDK)/ee/include -I$(PS2SDK)/common/include CFLAGS += -DHAVE_NO_LANGEXTRA STATIC_LINKING = 1 - NO_MMAP = 1 # CTR (3DS) else ifeq ($(platform), ctr) @@ -224,7 +219,6 @@ else ifeq ($(platform), ctr) CFLAGS += -Wall -mword-relocations CFLAGS += -fomit-frame-pointer -ffast-math STATIC_LINKING = 1 - NO_MMAP = 1 OBJS += platform/libretro/3ds/3ds_utils.o platform/libretro/3ds/utils.o @@ -249,7 +243,6 @@ else ifeq ($(platform), vita) CFLAGS += -mword-relocations -fno-unwind-tables CFLAGS += -fno-optimize-sibling-calls STATIC_LINKING = 1 - NO_MMAP = 1 # Xbox 360 else ifeq ($(platform), xenon) @@ -265,7 +258,6 @@ else ifeq ($(platform), ngc) AR = $(DEVKITPPC)/bin/powerpc-eabi-ar$(EXE_EXT) CFLAGS += -DGEKKO -DHW_DOL -mrvl -mcpu=750 -meabi -mhard-float -D__ppc__ -DMSB_FIRST STATIC_LINKING = 1 - NO_MMAP = 1 # Nintendo Wii else ifeq ($(platform), wii) @@ -274,7 +266,6 @@ else ifeq ($(platform), wii) AR = $(DEVKITPPC)/bin/powerpc-eabi-ar$(EXE_EXT) CFLAGS += -DGEKKO -DHW_RVL -mrvl -mcpu=750 -meabi -mhard-float -D__ppc__ -DMSB_FIRST STATIC_LINKING = 1 - NO_MMAP = 1 # Nintendo Wii U else ifeq ($(platform), wiiu) @@ -284,14 +275,12 @@ else ifeq ($(platform), wiiu) AR = $(DEVKITPPC)/bin/powerpc-eabi-ar$(EXE_EXT) CFLAGS += -DGEKKO -DWIIU -DHW_RVL -DHW_WUP -mwup -mcpu=750 -meabi -mhard-float -D__ppc__ -DMSB_FIRST STATIC_LINKING = 1 - NO_MMAP = 1 # Nintendo Switch (libtransistor) else ifeq ($(platform), switch) TARGET := $(TARGET_NAME)_libretro_$(platform).a include $(LIBTRANSISTOR_HOME)/libtransistor.mk STATIC_LINKING=1 - NO_MMAP = 1 # Nintendo Switch (libnx) else ifeq ($(platform), libnx) @@ -548,14 +537,9 @@ endif CFLAGS += -DNO_ZLIB -D__LIBRETRO__ ifeq ($(USE_LIBRETRO_VFS),1) - NO_MMAP = 1 CFLAGS += -DUSE_LIBRETRO_VFS endif -ifeq ($(NO_MMAP),1) - CFLAGS += -DNO_MMAP -endif - ifeq ($(NO_ARM_ASM),1) use_cyclone = 0 use_fame = 1 diff --git a/platform/libretro/libretro-common/compat/compat_posix_string.c b/platform/libretro/libretro-common/compat/compat_posix_string.c index 33a30e57..6a2f07ee 100644 --- a/platform/libretro/libretro-common/compat/compat_posix_string.c +++ b/platform/libretro/libretro-common/compat/compat_posix_string.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2018 The RetroArch team +/* Copyright (C) 2010-2020 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (compat_posix_string.c). diff --git a/platform/libretro/libretro-common/compat/compat_strcasestr.c b/platform/libretro/libretro-common/compat/compat_strcasestr.c new file mode 100644 index 00000000..4129dab2 --- /dev/null +++ b/platform/libretro/libretro-common/compat/compat_strcasestr.c @@ -0,0 +1,58 @@ +/* Copyright (C) 2010-2020 The RetroArch team + * + * --------------------------------------------------------------------------------------- + * The following license statement only applies to this file (compat_strcasestr.c). + * --------------------------------------------------------------------------------------- + * + * Permission is hereby granted, free of charge, + * to any person obtaining a copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include + +#include + +/* Pretty much strncasecmp. */ +static int casencmp(const char *a, const char *b, size_t n) +{ + size_t i; + + for (i = 0; i < n; i++) + { + int a_lower = tolower(a[i]); + int b_lower = tolower(b[i]); + if (a_lower != b_lower) + return a_lower - b_lower; + } + + return 0; +} + +char *strcasestr_retro__(const char *haystack, const char *needle) +{ + size_t i, search_off; + size_t hay_len = strlen(haystack); + size_t needle_len = strlen(needle); + + if (needle_len > hay_len) + return NULL; + + search_off = hay_len - needle_len; + for (i = 0; i <= search_off; i++) + if (!casencmp(haystack + i, needle, needle_len)) + return (char*)haystack + i; + + return NULL; +} diff --git a/platform/libretro/libretro-common/compat/compat_strl.c b/platform/libretro/libretro-common/compat/compat_strl.c index 94cb39b6..31723107 100644 --- a/platform/libretro/libretro-common/compat/compat_strl.c +++ b/platform/libretro/libretro-common/compat/compat_strl.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2018 The RetroArch team +/* Copyright (C) 2010-2020 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (compat_strl.c). diff --git a/platform/libretro/libretro-common/compat/fopen_utf8.c b/platform/libretro/libretro-common/compat/fopen_utf8.c index 893afb83..85abb59e 100644 --- a/platform/libretro/libretro-common/compat/fopen_utf8.c +++ b/platform/libretro/libretro-common/compat/fopen_utf8.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2018 The RetroArch team +/* Copyright (C) 2010-2020 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (fopen_utf8.c). @@ -49,9 +49,14 @@ void *fopen_utf8(const char * filename, const char * mode) #else wchar_t * filename_w = utf8_to_utf16_string_alloc(filename); wchar_t * mode_w = utf8_to_utf16_string_alloc(mode); - FILE* ret = _wfopen(filename_w, mode_w); - free(filename_w); - free(mode_w); + FILE* ret = NULL; + + if (filename_w && mode_w) + ret = _wfopen(filename_w, mode_w); + if (filename_w) + free(filename_w); + if (mode_w) + free(mode_w); return ret; #endif } diff --git a/platform/libretro/libretro-common/encodings/encoding_utf.c b/platform/libretro/libretro-common/encodings/encoding_utf.c index b6ad2f96..2760824d 100644 --- a/platform/libretro/libretro-common/encodings/encoding_utf.c +++ b/platform/libretro/libretro-common/encodings/encoding_utf.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2018 The RetroArch team +/* Copyright (C) 2010-2020 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (encoding_utf.c). @@ -37,6 +37,8 @@ #include #endif +#define UTF8_WALKBYTE(string) (*((*(string))++)) + static unsigned leading_ones(uint8_t c) { unsigned ones = 0; @@ -89,13 +91,14 @@ size_t utf8_conv_utf32(uint32_t *out, size_t out_chars, bool utf16_conv_utf8(uint8_t *out, size_t *out_chars, const uint16_t *in, size_t in_size) { - static uint8_t kUtf8Limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; - size_t out_pos = 0; - size_t in_pos = 0; + size_t out_pos = 0; + size_t in_pos = 0; + static const + uint8_t utf8_limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; for (;;) { - unsigned numAdds; + unsigned num_adds; uint32_t value; if (in_pos == in_size) @@ -124,21 +127,21 @@ bool utf16_conv_utf8(uint8_t *out, size_t *out_chars, value = (((value - 0xD800) << 10) | (c2 - 0xDC00)) + 0x10000; } - for (numAdds = 1; numAdds < 5; numAdds++) - if (value < (((uint32_t)1) << (numAdds * 5 + 6))) + for (num_adds = 1; num_adds < 5; num_adds++) + if (value < (((uint32_t)1) << (num_adds * 5 + 6))) break; if (out) - out[out_pos] = (char)(kUtf8Limits[numAdds - 1] - + (value >> (6 * numAdds))); + out[out_pos] = (char)(utf8_limits[num_adds - 1] + + (value >> (6 * num_adds))); out_pos++; do { - numAdds--; + num_adds--; if (out) out[out_pos] = (char)(0x80 - + ((value >> (6 * numAdds)) & 0x3F)); + + ((value >> (6 * num_adds)) & 0x3F)); out_pos++; - }while (numAdds != 0); + }while (num_adds != 0); } *out_chars = out_pos; @@ -166,13 +169,15 @@ size_t utf8cpy(char *d, size_t d_len, const char *s, size_t chars) while (*sb && chars-- > 0) { sb++; - while ((*sb & 0xC0) == 0x80) sb++; + while ((*sb & 0xC0) == 0x80) + sb++; } if ((size_t)(sb - sb_org) > d_len-1 /* NUL */) { sb = sb_org + d_len-1; - while ((*sb & 0xC0) == 0x80) sb--; + while ((*sb & 0xC0) == 0x80) + sb--; } memcpy(d, sb_org, sb-sb_org); @@ -184,14 +189,18 @@ size_t utf8cpy(char *d, size_t d_len, const char *s, size_t chars) const char *utf8skip(const char *str, size_t chars) { const uint8_t *strb = (const uint8_t*)str; + if (!chars) return str; + do { strb++; - while ((*strb & 0xC0)==0x80) strb++; + while ((*strb & 0xC0)==0x80) + strb++; chars--; - } while(chars); + }while (chars); + return (const char*)strb; } @@ -211,24 +220,22 @@ size_t utf8len(const char *string) return ret; } -#define utf8_walkbyte(string) (*((*(string))++)) - /* Does not validate the input, returns garbage if it's not UTF-8. */ uint32_t utf8_walk(const char **string) { - uint8_t first = utf8_walkbyte(string); + uint8_t first = UTF8_WALKBYTE(string); uint32_t ret = 0; if (first < 128) return first; - ret = (ret << 6) | (utf8_walkbyte(string) & 0x3F); + ret = (ret << 6) | (UTF8_WALKBYTE(string) & 0x3F); if (first >= 0xE0) { - ret = (ret << 6) | (utf8_walkbyte(string) & 0x3F); + ret = (ret << 6) | (UTF8_WALKBYTE(string) & 0x3F); if (first >= 0xF0) { - ret = (ret << 6) | (utf8_walkbyte(string) & 0x3F); + ret = (ret << 6) | (UTF8_WALKBYTE(string) & 0x3F); return ret | (first & 7) << 18; } return ret | (first & 15) << 12; @@ -277,9 +284,7 @@ bool utf16_to_char_string(const uint16_t *in, char *s, size_t len) static char *mb_to_mb_string_alloc(const char *str, enum CodePage cp_in, enum CodePage cp_out) { - char *path_buf = NULL; wchar_t *path_buf_wide = NULL; - int path_buf_len = 0; int path_buf_wide_len = MultiByteToWideChar(cp_in, 0, str, -1, NULL, 0); /* Windows 95 will return 0 from these functions with @@ -292,54 +297,51 @@ static char *mb_to_mb_string_alloc(const char *str, * MultiByteToWideChar also supports CP_UTF7 and CP_UTF8. */ - if (path_buf_wide_len) - { - path_buf_wide = (wchar_t*) - calloc(path_buf_wide_len + sizeof(wchar_t), sizeof(wchar_t)); - - if (path_buf_wide) - { - MultiByteToWideChar(cp_in, 0, - str, -1, path_buf_wide, path_buf_wide_len); - - if (*path_buf_wide) - { - path_buf_len = WideCharToMultiByte(cp_out, 0, - path_buf_wide, -1, NULL, 0, NULL, NULL); - - if (path_buf_len) - { - path_buf = (char*) - calloc(path_buf_len + sizeof(char), sizeof(char)); - - if (path_buf) - { - WideCharToMultiByte(cp_out, 0, - path_buf_wide, -1, path_buf, - path_buf_len, NULL, NULL); - - free(path_buf_wide); - - if (*path_buf) - return path_buf; - - free(path_buf); - return NULL; - } - } - else - { - free(path_buf_wide); - return strdup(str); - } - } - } - } - else + if (!path_buf_wide_len) return strdup(str); + path_buf_wide = (wchar_t*) + calloc(path_buf_wide_len + sizeof(wchar_t), sizeof(wchar_t)); + if (path_buf_wide) + { + MultiByteToWideChar(cp_in, 0, + str, -1, path_buf_wide, path_buf_wide_len); + + if (*path_buf_wide) + { + int path_buf_len = WideCharToMultiByte(cp_out, 0, + path_buf_wide, -1, NULL, 0, NULL, NULL); + + if (path_buf_len) + { + char *path_buf = (char*) + calloc(path_buf_len + sizeof(char), sizeof(char)); + + if (path_buf) + { + WideCharToMultiByte(cp_out, 0, + path_buf_wide, -1, path_buf, + path_buf_len, NULL, NULL); + + free(path_buf_wide); + + if (*path_buf) + return path_buf; + + free(path_buf); + return NULL; + } + } + else + { + free(path_buf_wide); + return strdup(str); + } + } + free(path_buf_wide); + } return NULL; } @@ -379,13 +381,13 @@ char* local_to_utf8_string_alloc(const char *str) wchar_t* utf8_to_utf16_string_alloc(const char *str) { #ifdef _WIN32 - int len = 0; - int out_len = 0; + int len = 0; + int out_len = 0; #else - size_t len = 0; + size_t len = 0; size_t out_len = 0; #endif - wchar_t *buf = NULL; + wchar_t *buf = NULL; if (!str || !*str) return NULL; diff --git a/platform/libretro/libretro-common/include/boolean.h b/platform/libretro/libretro-common/include/boolean.h index f06ac5a7..9d0d7c12 100644 --- a/platform/libretro/libretro-common/include/boolean.h +++ b/platform/libretro/libretro-common/include/boolean.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2018 The RetroArch team +/* Copyright (C) 2010-2020 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (boolean.h). diff --git a/platform/libretro/libretro-common/include/compat/apple_compat.h b/platform/libretro/libretro-common/include/compat/apple_compat.h index 819b39ec..bf98a591 100644 --- a/platform/libretro/libretro-common/include/compat/apple_compat.h +++ b/platform/libretro/libretro-common/include/compat/apple_compat.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2018 The RetroArch team +/* Copyright (C) 2010-2020 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (apple_compat.h). diff --git a/platform/libretro/libretro-common/include/compat/fnmatch.h b/platform/libretro/libretro-common/include/compat/fnmatch.h index cede1ca6..97878784 100644 --- a/platform/libretro/libretro-common/include/compat/fnmatch.h +++ b/platform/libretro/libretro-common/include/compat/fnmatch.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2018 The RetroArch team +/* Copyright (C) 2010-2020 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (fnmatch.h). diff --git a/platform/libretro/libretro-common/include/compat/fopen_utf8.h b/platform/libretro/libretro-common/include/compat/fopen_utf8.h index f59822a5..97d4404e 100644 --- a/platform/libretro/libretro-common/include/compat/fopen_utf8.h +++ b/platform/libretro/libretro-common/include/compat/fopen_utf8.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2018 The RetroArch team +/* Copyright (C) 2010-2020 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (fopen_utf8.h). diff --git a/platform/libretro/libretro-common/include/compat/getopt.h b/platform/libretro/libretro-common/include/compat/getopt.h index 2e606a68..48603f0d 100644 --- a/platform/libretro/libretro-common/include/compat/getopt.h +++ b/platform/libretro/libretro-common/include/compat/getopt.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2018 The RetroArch team +/* Copyright (C) 2010-2020 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (getopt.h). diff --git a/platform/libretro/libretro-common/include/compat/intrinsics.h b/platform/libretro/libretro-common/include/compat/intrinsics.h index cb1f540d..ac490274 100644 --- a/platform/libretro/libretro-common/include/compat/intrinsics.h +++ b/platform/libretro/libretro-common/include/compat/intrinsics.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2018 The RetroArch team +/* Copyright (C) 2010-2020 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (intrinsics.h). @@ -41,7 +41,7 @@ RETRO_BEGIN_DECLS /* Count Leading Zero, unsigned 16bit input value */ static INLINE unsigned compat_clz_u16(uint16_t val) { -#if defined(__GNUC__) && !defined(PS2) +#if defined(__GNUC__) return __builtin_clz(val << 16 | 0x8000); #else unsigned ret = 0; @@ -63,20 +63,34 @@ static INLINE int compat_ctz(unsigned x) return __builtin_ctz(x); #elif _MSC_VER >= 1400 && !defined(_XBOX) && !defined(__WINRT__) unsigned long r = 0; - _BitScanReverse((unsigned long*)&r, x); + _BitScanForward((unsigned long*)&r, x); return (int)r; #else -/* Only checks at nibble granularity, - * because that's what we need. */ - if (x & 0x000f) - return 0; - if (x & 0x00f0) - return 4; - if (x & 0x0f00) - return 8; - if (x & 0xf000) - return 12; - return 16; + int count = 0; + if (!(x & 0xffff)) + { + x >>= 16; + count |= 16; + } + if (!(x & 0xff)) + { + x >>= 8; + count |= 8; + } + if (!(x & 0xf)) + { + x >>= 4; + count |= 4; + } + if (!(x & 0x3)) + { + x >>= 2; + count |= 2; + } + if (!(x & 0x1)) + count |= 1; + + return count; #endif } diff --git a/platform/libretro/libretro-common/include/compat/msvc.h b/platform/libretro/libretro-common/include/compat/msvc.h index 1c242630..a4c93a59 100644 --- a/platform/libretro/libretro-common/include/compat/msvc.h +++ b/platform/libretro/libretro-common/include/compat/msvc.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2018 The RetroArch team +/* Copyright (C) 2010-2020 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (msvc.h). @@ -29,22 +29,17 @@ extern "C" { #endif -/* Pre-MSVC 2015 compilers don't implement snprintf in a cross-platform manner. */ +/* Pre-MSVC 2015 compilers don't implement snprintf, vsnprintf in a cross-platform manner. */ #if _MSC_VER < 1900 - #include - #include - #ifndef snprintf - #define snprintf c99_snprintf_retro__ - #endif - - int c99_snprintf_retro__(char *outBuf, size_t size, const char *format, ...); -#endif - -/* Pre-MSVC 2008 compilers don't implement vsnprintf in a cross-platform manner? Not sure about this one. */ -#if _MSC_VER < 1500 #include #include #include + + #ifndef snprintf + #define snprintf c99_snprintf_retro__ + #endif + int c99_snprintf_retro__(char *outBuf, size_t size, const char *format, ...); + #ifndef vsnprintf #define vsnprintf c99_vsnprintf_retro__ #endif diff --git a/platform/libretro/libretro-common/include/compat/posix_string.h b/platform/libretro/libretro-common/include/compat/posix_string.h index f4380c3a..47964b2a 100644 --- a/platform/libretro/libretro-common/include/compat/posix_string.h +++ b/platform/libretro/libretro-common/include/compat/posix_string.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2018 The RetroArch team +/* Copyright (C) 2010-2020 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (posix_string.h). @@ -29,10 +29,6 @@ #include #endif -#if defined(PS2) -#include -#endif - RETRO_BEGIN_DECLS #ifdef _WIN32 diff --git a/platform/libretro/libretro-common/include/compat/strcasestr.h b/platform/libretro/libretro-common/include/compat/strcasestr.h index c26de9e0..227e253e 100644 --- a/platform/libretro/libretro-common/include/compat/strcasestr.h +++ b/platform/libretro/libretro-common/include/compat/strcasestr.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2018 The RetroArch team +/* Copyright (C) 2010-2020 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (strcasestr.h). @@ -25,10 +25,6 @@ #include -#if defined(PS2) -#include -#endif - #if defined(RARCH_INTERNAL) && defined(HAVE_CONFIG_H) #include "../../../config.h" #endif diff --git a/platform/libretro/libretro-common/include/compat/strl.h b/platform/libretro/libretro-common/include/compat/strl.h index c70f1195..5e7a892f 100644 --- a/platform/libretro/libretro-common/include/compat/strl.h +++ b/platform/libretro/libretro-common/include/compat/strl.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2018 The RetroArch team +/* Copyright (C) 2010-2020 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (strl.h). diff --git a/platform/libretro/libretro-common/include/encodings/utf.h b/platform/libretro/libretro-common/include/encodings/utf.h index b513f28a..bea4e145 100644 --- a/platform/libretro/libretro-common/include/encodings/utf.h +++ b/platform/libretro/libretro-common/include/encodings/utf.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2018 The RetroArch team +/* Copyright (C) 2010-2020 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (utf.h). @@ -35,7 +35,7 @@ RETRO_BEGIN_DECLS enum CodePage { CODEPAGE_LOCAL = 0, /* CP_ACP */ - CODEPAGE_UTF8 = 65001 /* CP_UTF8 */ + CODEPAGE_UTF8 = 65001 /* CP_UTF8 */ }; size_t utf8_conv_utf32(uint32_t *out, size_t out_chars, diff --git a/platform/libretro/libretro-common/include/file/file_path.h b/platform/libretro/libretro-common/include/file/file_path.h index 8d365a9c..452763fe 100644 --- a/platform/libretro/libretro-common/include/file/file_path.h +++ b/platform/libretro/libretro-common/include/file/file_path.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2019 The RetroArch team +/* Copyright (C) 2010-2020 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (file_path.h). @@ -125,6 +125,7 @@ char *path_remove_extension(char *path); * Returns: basename from path. **/ const char *path_basename(const char *path); +const char *path_basename_nocompression(const char *path); /** * path_basedir: @@ -178,7 +179,7 @@ char *path_resolve_realpath(char *buf, size_t size, bool resolve_symlinks); * * E.g. path /a/b/e/f.cgp with base /a/b/c/d/ turns into ../../e/f.cgp **/ -void path_relative_to(char *out, const char *path, const char *base, size_t size); +size_t path_relative_to(char *out, const char *path, const char *base, size_t size); /** * path_is_absolute: @@ -226,7 +227,7 @@ void fill_pathname(char *out_path, const char *in_path, * E.g.: * out_filename = "RetroArch-{month}{day}-{Hours}{Minutes}.{@ext}" **/ -void fill_dated_filename(char *out_filename, +size_t fill_dated_filename(char *out_filename, const char *ext, size_t size); /** @@ -259,7 +260,7 @@ void fill_str_dated_filename(char *out_filename, * present in 'in_path', it will be ignored. * */ -void fill_pathname_noext(char *out_path, const char *in_path, +size_t fill_pathname_noext(char *out_path, const char *in_path, const char *replace, size_t size); /** @@ -289,7 +290,7 @@ char *find_last_slash(const char *str); * E.g..: in_dir = "/tmp/some_dir", in_basename = "/some_content/foo.c", * replace = ".asm" => in_dir = "/tmp/some_dir/foo.c.asm" **/ -void fill_pathname_dir(char *in_dir, const char *in_basename, +size_t fill_pathname_dir(char *in_dir, const char *in_basename, const char *replace, size_t size); /** @@ -300,12 +301,12 @@ void fill_pathname_dir(char *in_dir, const char *in_basename, * * Copies basename of @in_path into @out_path. **/ -void fill_pathname_base(char *out_path, const char *in_path, size_t size); +size_t fill_pathname_base(char *out_path, const char *in_path, size_t size); void fill_pathname_base_noext(char *out_dir, const char *in_path, size_t size); -void fill_pathname_base_ext(char *out, +size_t fill_pathname_base_ext(char *out, const char *in_path, const char *ext, size_t size); @@ -376,20 +377,20 @@ void fill_pathname_resolve_relative(char *out_path, const char *in_refpath, * Makes sure not to get two consecutive slashes * between directory and path. **/ -void fill_pathname_join(char *out_path, const char *dir, +size_t fill_pathname_join(char *out_path, const char *dir, const char *path, size_t size); -void fill_pathname_join_special_ext(char *out_path, +size_t fill_pathname_join_special_ext(char *out_path, const char *dir, const char *path, const char *last, const char *ext, size_t size); -void fill_pathname_join_concat_noext(char *out_path, +size_t fill_pathname_join_concat_noext(char *out_path, const char *dir, const char *path, const char *concat, size_t size); -void fill_pathname_join_concat(char *out_path, +size_t fill_pathname_join_concat(char *out_path, const char *dir, const char *path, const char *concat, size_t size); @@ -408,10 +409,10 @@ void fill_pathname_join_noext(char *out_path, * Joins a directory (@dir) and path (@path) together * using the given delimiter (@delim). **/ -void fill_pathname_join_delim(char *out_path, const char *dir, +size_t fill_pathname_join_delim(char *out_path, const char *dir, const char *path, const char delim, size_t size); -void fill_pathname_join_delim_concat(char *out_path, const char *dir, +size_t fill_pathname_join_delim_concat(char *out_path, const char *dir, const char *path, const char delim, const char *concat, size_t size); @@ -430,7 +431,7 @@ void fill_pathname_join_delim_concat(char *out_path, const char *dir, * E.g.: "/path/to/game.img" -> game.img * "/path/to/myarchive.7z#folder/to/game.img" -> game.img */ -void fill_short_pathname_representation(char* out_rep, +size_t fill_short_pathname_representation(char* out_rep, const char *in_path, size_t size); void fill_short_pathname_representation_noext(char* out_rep, @@ -442,6 +443,12 @@ void fill_pathname_expand_special(char *out_path, void fill_pathname_abbreviate_special(char *out_path, const char *in_path, size_t size); +void fill_pathname_abbreviated_or_relative(char *out_path, const char *in_refpath, const char *in_path, size_t size); + +void pathname_conform_slashes_to_os(char *path); + +void pathname_make_slashes_portable(char *path); + /** * path_basedir: * @path : path @@ -460,9 +467,9 @@ void path_basedir_wrapper(char *path); * Returns: true (1) if character is a slash, otherwise false (0). */ #ifdef _WIN32 -#define path_char_is_slash(c) (((c) == '/') || ((c) == '\\')) +#define PATH_CHAR_IS_SLASH(c) (((c) == '/') || ((c) == '\\')) #else -#define path_char_is_slash(c) ((c) == '/') +#define PATH_CHAR_IS_SLASH(c) ((c) == '/') #endif /** @@ -473,11 +480,11 @@ void path_basedir_wrapper(char *path); * Returns: default slash separator. */ #ifdef _WIN32 -#define path_default_slash() "\\" -#define path_default_slash_c() '\\' +#define PATH_DEFAULT_SLASH() "\\" +#define PATH_DEFAULT_SLASH_C() '\\' #else -#define path_default_slash() "/" -#define path_default_slash_c() '/' +#define PATH_DEFAULT_SLASH() "/" +#define PATH_DEFAULT_SLASH_C() '/' #endif /** diff --git a/platform/libretro/libretro-common/include/libretro.h b/platform/libretro/libretro-common/include/libretro.h index 2f174837..2887e5e8 100644 --- a/platform/libretro/libretro-common/include/libretro.h +++ b/platform/libretro/libretro-common/include/libretro.h @@ -283,6 +283,9 @@ enum retro_language RETRO_LANGUAGE_HEBREW = 21, RETRO_LANGUAGE_ASTURIAN = 22, RETRO_LANGUAGE_FINNISH = 23, + RETRO_LANGUAGE_INDONESIAN = 24, + RETRO_LANGUAGE_SWEDISH = 25, + RETRO_LANGUAGE_UKRAINIAN = 26, RETRO_LANGUAGE_LAST, /* Ensure sizeof(enum) == sizeof(int) */ @@ -3461,6 +3464,10 @@ struct retro_core_option_definition const char *default_value; }; +#ifdef __PS3__ +#undef local +#endif + struct retro_core_options_intl { /* Pointer to an array of retro_core_option_definition structs diff --git a/platform/libretro/libretro-common/include/memmap.h b/platform/libretro/libretro-common/include/memmap.h index 277f9cb3..88978777 100644 --- a/platform/libretro/libretro-common/include/memmap.h +++ b/platform/libretro/libretro-common/include/memmap.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2018 The RetroArch team +/* Copyright (C) 2010-2020 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (memmap.h). @@ -26,7 +26,7 @@ #include #include -#if defined(__CELLOS_LV2__) || defined(PSP) || defined(PS2) || defined(GEKKO) || defined(VITA) || defined(_XBOX) || defined(_3DS) || defined(WIIU) || defined(SWITCH) || defined(HAVE_LIBNX) +#if defined(PSP) || defined(PS2) || defined(GEKKO) || defined(VITA) || defined(_XBOX) || defined(_3DS) || defined(WIIU) || defined(SWITCH) || defined(HAVE_LIBNX) || defined(__PS3__) || defined(__PSL1GHT__) /* No mman available */ #elif defined(_WIN32) && !defined(_XBOX) #include diff --git a/platform/libretro/libretro-common/include/retro_common.h b/platform/libretro/libretro-common/include/retro_common.h index 9a1fd5fd..a715a28c 100644 --- a/platform/libretro/libretro-common/include/retro_common.h +++ b/platform/libretro/libretro-common/include/retro_common.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2018 The RetroArch team +/* Copyright (C) 2010-2020 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (retro_common.h). diff --git a/platform/libretro/libretro-common/include/retro_common_api.h b/platform/libretro/libretro-common/include/retro_common_api.h index d784842e..0f68b7d9 100644 --- a/platform/libretro/libretro-common/include/retro_common_api.h +++ b/platform/libretro/libretro-common/include/retro_common_api.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2018 The RetroArch team +/* Copyright (C) 2010-2020 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (retro_common_api.h). diff --git a/platform/libretro/libretro-common/include/retro_dirent.h b/platform/libretro/libretro-common/include/retro_dirent.h index 8a2591bd..3b041679 100644 --- a/platform/libretro/libretro-common/include/retro_dirent.h +++ b/platform/libretro/libretro-common/include/retro_dirent.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2019 The RetroArch team +/* Copyright (C) 2010-2020 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (retro_dirent.h). @@ -25,7 +25,6 @@ #include #include -#include #include diff --git a/platform/libretro/libretro-common/include/retro_endianness.h b/platform/libretro/libretro-common/include/retro_endianness.h index e721ec9d..aefffef8 100644 --- a/platform/libretro/libretro-common/include/retro_endianness.h +++ b/platform/libretro/libretro-common/include/retro_endianness.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2018 The RetroArch team +/* Copyright (C) 2010-2020 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (retro_endianness.h). @@ -31,40 +31,97 @@ #define SWAP16 _byteswap_ushort #define SWAP32 _byteswap_ulong #else -#define SWAP16(x) ((uint16_t)( \ - (((uint16_t)(x) & 0x00ff) << 8) | \ - (((uint16_t)(x) & 0xff00) >> 8) \ - )) -#define SWAP32(x) ((uint32_t)( \ - (((uint32_t)(x) & 0x000000ff) << 24) | \ - (((uint32_t)(x) & 0x0000ff00) << 8) | \ - (((uint32_t)(x) & 0x00ff0000) >> 8) | \ - (((uint32_t)(x) & 0xff000000) >> 24) \ - )) +static INLINE uint16_t SWAP16(uint16_t x) +{ + return ((x & 0x00ff) << 8) | + ((x & 0xff00) >> 8); +} + +static INLINE uint32_t SWAP32(uint32_t x) +{ + return ((x & 0x000000ff) << 24) | + ((x & 0x0000ff00) << 8) | + ((x & 0x00ff0000) >> 8) | + ((x & 0xff000000) >> 24); +} + #endif #if defined(_MSC_VER) && _MSC_VER <= 1200 -#define SWAP64(val) \ - ((((uint64_t)(val) & 0x00000000000000ff) << 56) \ - | (((uint64_t)(val) & 0x000000000000ff00) << 40) \ - | (((uint64_t)(val) & 0x0000000000ff0000) << 24) \ - | (((uint64_t)(val) & 0x00000000ff000000) << 8) \ - | (((uint64_t)(val) & 0x000000ff00000000) >> 8) \ - | (((uint64_t)(val) & 0x0000ff0000000000) >> 24) \ - | (((uint64_t)(val) & 0x00ff000000000000) >> 40) \ - | (((uint64_t)(val) & 0xff00000000000000) >> 56)) +static INLINE uint64_t SWAP64(uint64_t val) +{ + return + ((val & 0x00000000000000ff) << 56) + | ((val & 0x000000000000ff00) << 40) + | ((val & 0x0000000000ff0000) << 24) + | ((val & 0x00000000ff000000) << 8) + | ((val & 0x000000ff00000000) >> 8) + | ((val & 0x0000ff0000000000) >> 24) + | ((val & 0x00ff000000000000) >> 40) + | ((val & 0xff00000000000000) >> 56); +} #else -#define SWAP64(val) \ - ((((uint64_t)(val) & 0x00000000000000ffULL) << 56) \ - | (((uint64_t)(val) & 0x000000000000ff00ULL) << 40) \ - | (((uint64_t)(val) & 0x0000000000ff0000ULL) << 24) \ - | (((uint64_t)(val) & 0x00000000ff000000ULL) << 8) \ - | (((uint64_t)(val) & 0x000000ff00000000ULL) >> 8) \ - | (((uint64_t)(val) & 0x0000ff0000000000ULL) >> 24) \ - | (((uint64_t)(val) & 0x00ff000000000000ULL) >> 40) \ - | (((uint64_t)(val) & 0xff00000000000000ULL) >> 56)) +static INLINE uint64_t SWAP64(uint64_t val) +{ + return ((val & 0x00000000000000ffULL) << 56) + | ((val & 0x000000000000ff00ULL) << 40) + | ((val & 0x0000000000ff0000ULL) << 24) + | ((val & 0x00000000ff000000ULL) << 8) + | ((val & 0x000000ff00000000ULL) >> 8) + | ((val & 0x0000ff0000000000ULL) >> 24) + | ((val & 0x00ff000000000000ULL) >> 40) + | ((val & 0xff00000000000000ULL) >> 56); +} #endif + +#if defined (LSB_FIRST) || defined (MSB_FIRST) +# warning Defining MSB_FIRST and LSB_FIRST in compile options is deprecated +# undef LSB_FIRST +# undef MSB_FIRST +#endif + +#ifdef _MSC_VER +/* MSVC pre-defines macros depending on target arch */ +#if defined (_M_IX86) || defined (_M_AMD64) || defined (_M_ARM) || defined (_M_ARM64) +#define LSB_FIRST 1 +#elif _M_PPC +#define MSB_FIRST 1 +#else +/* MSVC can run on _M_ALPHA and _M_IA64 too, but they're both bi-endian; need to find what mode MSVC runs them at */ +#error "unknown platform, can't determine endianness" +#endif +#else +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define MSB_FIRST 1 +#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define LSB_FIRST 1 +#else +#error "Invalid endianness macros" +#endif +#endif + +#if defined(MSB_FIRST) && defined(LSB_FIRST) +# error "Bug in LSB_FIRST/MSB_FIRST definition" +#endif + +#if !defined(MSB_FIRST) && !defined(LSB_FIRST) +# error "Bug in LSB_FIRST/MSB_FIRST definition" +#endif + +#ifdef MSB_FIRST +# define RETRO_IS_BIG_ENDIAN 1 +# define RETRO_IS_LITTLE_ENDIAN 0 +/* For compatibility */ +# define WORDS_BIGENDIAN 1 +#else +# define RETRO_IS_BIG_ENDIAN 0 +# define RETRO_IS_LITTLE_ENDIAN 1 +/* For compatibility */ +# undef WORDS_BIGENDIAN +#endif + + /** * is_little_endian: * @@ -73,23 +130,7 @@ * Returns: greater than 0 if little-endian, * otherwise big-endian. **/ -#if defined(MSB_FIRST) -#define is_little_endian() (0) -#elif defined(__x86_64) || defined(__i386) || defined(_M_IX86) || defined(_M_X64) -#define is_little_endian() (1) -#else -static INLINE uint8_t is_little_endian(void) -{ - union - { - uint16_t x; - uint8_t y[2]; - } u; - - u.x = 1; - return u.y[0]; -} -#endif +#define is_little_endian() RETRO_IS_LITTLE_ENDIAN /** * swap_if_big64: @@ -101,17 +142,10 @@ static INLINE uint8_t is_little_endian(void) * otherwise returns same value. **/ -#if defined(MSB_FIRST) +#if RETRO_IS_BIG_ENDIAN #define swap_if_big64(val) (SWAP64(val)) -#elif defined(__x86_64) || defined(__i386) || defined(_M_IX86) || defined(_M_X64) +#elif RETRO_IS_LITTLE_ENDIAN #define swap_if_big64(val) (val) -#else -static INLINE uint64_t swap_if_big64(uint64_t val) -{ - if (is_little_endian()) - return val; - return SWAP64(val); -} #endif /** @@ -124,17 +158,10 @@ static INLINE uint64_t swap_if_big64(uint64_t val) * otherwise returns same value. **/ -#if defined(MSB_FIRST) +#if RETRO_IS_BIG_ENDIAN #define swap_if_big32(val) (SWAP32(val)) -#elif defined(__x86_64) || defined(__i386) || defined(_M_IX86) || defined(_M_X64) +#elif RETRO_IS_LITTLE_ENDIAN #define swap_if_big32(val) (val) -#else -static INLINE uint32_t swap_if_big32(uint32_t val) -{ - if (is_little_endian()) - return val; - return SWAP32(val); -} #endif /** @@ -147,17 +174,10 @@ static INLINE uint32_t swap_if_big32(uint32_t val) * otherwise returns same value. **/ -#if defined(MSB_FIRST) +#if RETRO_IS_BIG_ENDIAN #define swap_if_little64(val) (val) -#elif defined(__x86_64) || defined(__i386) || defined(_M_IX86) || defined(_M_X64) +#elif RETRO_IS_LITTLE_ENDIAN #define swap_if_little64(val) (SWAP64(val)) -#else -static INLINE uint64_t swap_if_little64(uint64_t val) -{ - if (is_little_endian()) - return SWAP64(val); - return val; -} #endif /** @@ -170,17 +190,10 @@ static INLINE uint64_t swap_if_little64(uint64_t val) * otherwise returns same value. **/ -#if defined(MSB_FIRST) +#if RETRO_IS_BIG_ENDIAN #define swap_if_little32(val) (val) -#elif defined(__x86_64) || defined(__i386) || defined(_M_IX86) || defined(_M_X64) +#elif RETRO_IS_LITTLE_ENDIAN #define swap_if_little32(val) (SWAP32(val)) -#else -static INLINE uint32_t swap_if_little32(uint32_t val) -{ - if (is_little_endian()) - return SWAP32(val); - return val; -} #endif /** @@ -193,17 +206,10 @@ static INLINE uint32_t swap_if_little32(uint32_t val) * otherwise returns same value. **/ -#if defined(MSB_FIRST) +#if RETRO_IS_BIG_ENDIAN #define swap_if_big16(val) (SWAP16(val)) -#elif defined(__x86_64) || defined(__i386) || defined(_M_IX86) || defined(_M_X64) +#elif RETRO_IS_LITTLE_ENDIAN #define swap_if_big16(val) (val) -#else -static INLINE uint16_t swap_if_big16(uint16_t val) -{ - if (is_little_endian()) - return val; - return SWAP16(val); -} #endif /** @@ -216,17 +222,10 @@ static INLINE uint16_t swap_if_big16(uint16_t val) * otherwise returns same value. **/ -#if defined(MSB_FIRST) +#if RETRO_IS_BIG_ENDIAN #define swap_if_little16(val) (val) -#elif defined(__x86_64) || defined(__i386) || defined(_M_IX86) || defined(_M_X64) +#elif RETRO_IS_LITTLE_ENDIAN #define swap_if_little16(val) (SWAP16(val)) -#else -static INLINE uint16_t swap_if_little16(uint16_t val) -{ - if (is_little_endian()) - return SWAP16(val); - return val; -} #endif /** @@ -255,4 +254,326 @@ static INLINE uint32_t load32be(const uint32_t *addr) return swap_if_little32(*addr); } +/** + * retro_cpu_to_le16: + * @val : unsigned 16-bit value + * + * Convert unsigned 16-bit value from system to little-endian. + * + * Returns: Little-endian representation of val. + **/ + +#define retro_cpu_to_le16(val) swap_if_big16(val) + +/** + * retro_cpu_to_le32: + * @val : unsigned 32-bit value + * + * Convert unsigned 32-bit value from system to little-endian. + * + * Returns: Little-endian representation of val. + **/ + +#define retro_cpu_to_le32(val) swap_if_big32(val) + +/** + * retro_cpu_to_le64: + * @val : unsigned 64-bit value + * + * Convert unsigned 64-bit value from system to little-endian. + * + * Returns: Little-endian representation of val. + **/ + +#define retro_cpu_to_le64(val) swap_if_big64(val) + +/** + * retro_le_to_cpu16: + * @val : unsigned 16-bit value + * + * Convert unsigned 16-bit value from little-endian to native. + * + * Returns: Native representation of little-endian val. + **/ + +#define retro_le_to_cpu16(val) swap_if_big16(val) + +/** + * retro_le_to_cpu32: + * @val : unsigned 32-bit value + * + * Convert unsigned 32-bit value from little-endian to native. + * + * Returns: Native representation of little-endian val. + **/ + +#define retro_le_to_cpu32(val) swap_if_big32(val) + +/** + * retro_le_to_cpu16: + * @val : unsigned 64-bit value + * + * Convert unsigned 64-bit value from little-endian to native. + * + * Returns: Native representation of little-endian val. + **/ + +#define retro_le_to_cpu64(val) swap_if_big64(val) + +/** + * retro_cpu_to_be16: + * @val : unsigned 16-bit value + * + * Convert unsigned 16-bit value from system to big-endian. + * + * Returns: Big-endian representation of val. + **/ + +#define retro_cpu_to_be16(val) swap_if_little16(val) + +/** + * retro_cpu_to_be32: + * @val : unsigned 32-bit value + * + * Convert unsigned 32-bit value from system to big-endian. + * + * Returns: Big-endian representation of val. + **/ + +#define retro_cpu_to_be32(val) swap_if_little32(val) + +/** + * retro_cpu_to_be64: + * @val : unsigned 64-bit value + * + * Convert unsigned 64-bit value from system to big-endian. + * + * Returns: Big-endian representation of val. + **/ + +#define retro_cpu_to_be64(val) swap_if_little64(val) + +/** + * retro_be_to_cpu16: + * @val : unsigned 16-bit value + * + * Convert unsigned 16-bit value from big-endian to native. + * + * Returns: Native representation of big-endian val. + **/ + +#define retro_be_to_cpu16(val) swap_if_little16(val) + +/** + * retro_be_to_cpu32: + * @val : unsigned 32-bit value + * + * Convert unsigned 32-bit value from big-endian to native. + * + * Returns: Native representation of big-endian val. + **/ + +#define retro_be_to_cpu32(val) swap_if_little32(val) + +/** + * retro_be_to_cpu64: + * @val : unsigned 64-bit value + * + * Convert unsigned 64-bit value from big-endian to native. + * + * Returns: Native representation of big-endian val. + **/ + +#define retro_be_to_cpu64(val) swap_if_little64(val) + +#ifdef __GNUC__ +/* This attribute means that the same memory may be referred through + pointers to different size of the object (aliasing). E.g. that u8 * + and u32 * may actually be pointing to the same object. */ +#define MAY_ALIAS __attribute__((__may_alias__)) +#else +#define MAY_ALIAS +#endif + +#pragma pack(push, 1) +struct retro_unaligned_uint16_s +{ + uint16_t val; +} MAY_ALIAS; +struct retro_unaligned_uint32_s +{ + uint32_t val; +} MAY_ALIAS; +struct retro_unaligned_uint64_s +{ + uint64_t val; +} MAY_ALIAS; +#pragma pack(pop) + +typedef struct retro_unaligned_uint16_s retro_unaligned_uint16_t; +typedef struct retro_unaligned_uint32_s retro_unaligned_uint32_t; +typedef struct retro_unaligned_uint64_s retro_unaligned_uint64_t; + +/* L-value references to unaligned pointers. */ +#define retro_unaligned16(p) (((retro_unaligned_uint16_t *)p)->val) +#define retro_unaligned32(p) (((retro_unaligned_uint32_t *)p)->val) +#define retro_unaligned64(p) (((retro_unaligned_uint64_t *)p)->val) + +/** + * retro_get_unaligned_16be: + * @addr : pointer to unsigned 16-bit value + * + * Convert unsigned unaligned 16-bit value from big-endian to native. + * + * Returns: Native representation of big-endian val. + **/ + +static INLINE uint16_t retro_get_unaligned_16be(void *addr) { + return retro_be_to_cpu16(retro_unaligned16(addr)); +} + +/** + * retro_get_unaligned_32be: + * @addr : pointer to unsigned 32-bit value + * + * Convert unsigned unaligned 32-bit value from big-endian to native. + * + * Returns: Native representation of big-endian val. + **/ + +static INLINE uint32_t retro_get_unaligned_32be(void *addr) { + return retro_be_to_cpu32(retro_unaligned32(addr)); +} + +/** + * retro_get_unaligned_64be: + * @addr : pointer to unsigned 64-bit value + * + * Convert unsigned unaligned 64-bit value from big-endian to native. + * + * Returns: Native representation of big-endian val. + **/ + +static INLINE uint64_t retro_get_unaligned_64be(void *addr) { + return retro_be_to_cpu64(retro_unaligned64(addr)); +} + +/** + * retro_get_unaligned_16le: + * @addr : pointer to unsigned 16-bit value + * + * Convert unsigned unaligned 16-bit value from little-endian to native. + * + * Returns: Native representation of little-endian val. + **/ + +static INLINE uint16_t retro_get_unaligned_16le(void *addr) { + return retro_le_to_cpu16(retro_unaligned16(addr)); +} + +/** + * retro_get_unaligned_32le: + * @addr : pointer to unsigned 32-bit value + * + * Convert unsigned unaligned 32-bit value from little-endian to native. + * + * Returns: Native representation of little-endian val. + **/ + +static INLINE uint32_t retro_get_unaligned_32le(void *addr) { + return retro_le_to_cpu32(retro_unaligned32(addr)); +} + +/** + * retro_get_unaligned_64le: + * @addr : pointer to unsigned 64-bit value + * + * Convert unsigned unaligned 64-bit value from little-endian to native. + * + * Returns: Native representation of little-endian val. + **/ + +static INLINE uint64_t retro_get_unaligned_64le(void *addr) { + return retro_le_to_cpu64(retro_unaligned64(addr)); +} + +/** + * retro_set_unaligned_16le: + * @addr : pointer to unsigned 16-bit value + * @val : value to store + * + * Convert native value to unsigned unaligned 16-bit little-endian value + * + **/ + +static INLINE void retro_set_unaligned_16le(void *addr, uint16_t v) { + retro_unaligned16(addr) = retro_cpu_to_le16(v); +} + +/** + * retro_set_unaligned_32le: + * @addr : pointer to unsigned 32-bit value + * @val : value to store + * + * Convert native value to unsigned unaligned 32-bit little-endian value + * + **/ + +static INLINE void retro_set_unaligned_32le(void *addr, uint32_t v) { + retro_unaligned32(addr) = retro_cpu_to_le32(v); +} + +/** + * retro_set_unaligned_32le: + * @addr : pointer to unsigned 32-bit value + * @val : value to store + * + * Convert native value to unsigned unaligned 32-bit little-endian value + * + **/ + +static INLINE void retro_set_unaligned_64le(void *addr, uint64_t v) { + retro_unaligned64(addr) = retro_cpu_to_le64(v); +} + +/** + * retro_set_unaligned_16be: + * @addr : pointer to unsigned 16-bit value + * @val : value to store + * + * Convert native value to unsigned unaligned 16-bit big-endian value + * + **/ + +static INLINE void retro_set_unaligned_16be(void *addr, uint16_t v) { + retro_unaligned16(addr) = retro_cpu_to_be16(v); +} + +/** + * retro_set_unaligned_32be: + * @addr : pointer to unsigned 32-bit value + * @val : value to store + * + * Convert native value to unsigned unaligned 32-bit big-endian value + * + **/ + +static INLINE void retro_set_unaligned_32be(void *addr, uint32_t v) { + retro_unaligned32(addr) = retro_cpu_to_be32(v); +} + +/** + * retro_set_unaligned_32be: + * @addr : pointer to unsigned 32-bit value + * @val : value to store + * + * Convert native value to unsigned unaligned 32-bit big-endian value + * + **/ + +static INLINE void retro_set_unaligned_64be(void *addr, uint64_t v) { + retro_unaligned64(addr) = retro_cpu_to_be64(v); +} + + #endif diff --git a/platform/libretro/libretro-common/include/retro_environment.h b/platform/libretro/libretro-common/include/retro_environment.h index 4a68046b..1389eb5c 100644 --- a/platform/libretro/libretro-common/include/retro_environment.h +++ b/platform/libretro/libretro-common/include/retro_environment.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2018 The RetroArch team +/* Copyright (C) 2010-2020 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (retro_environment.h). diff --git a/platform/libretro/libretro-common/include/retro_inline.h b/platform/libretro/libretro-common/include/retro_inline.h index e4a21f6c..b27d6dd6 100644 --- a/platform/libretro/libretro-common/include/retro_inline.h +++ b/platform/libretro/libretro-common/include/retro_inline.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2018 The RetroArch team +/* Copyright (C) 2010-2020 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (retro_inline.h). diff --git a/platform/libretro/libretro-common/include/retro_miscellaneous.h b/platform/libretro/libretro-common/include/retro_miscellaneous.h index 3893416e..bd71c916 100644 --- a/platform/libretro/libretro-common/include/retro_miscellaneous.h +++ b/platform/libretro/libretro-common/include/retro_miscellaneous.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2018 The RetroArch team +/* Copyright (C) 2010-2020 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (retro_miscellaneous.h). @@ -30,17 +30,17 @@ #include #include -#if defined(_WIN32) && !defined(_XBOX) +#if defined(_WIN32) + +#if defined(_XBOX) +#include +#else #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif #include -#elif defined(_WIN32) && defined(_XBOX) -#include #endif -#if defined(__CELLOS_LV2__) -#include #endif #include @@ -75,15 +75,17 @@ static INLINE bool bits_any_set(uint32_t* ptr, uint32_t count) } #ifndef PATH_MAX_LENGTH -#if defined(__CELLOS_LV2__) -#define PATH_MAX_LENGTH CELL_FS_MAX_FS_PATH_LENGTH -#elif defined(_XBOX1) || defined(_3DS) || defined(PSP) || defined(PS2) || defined(GEKKO)|| defined(WIIU) || defined(ORBIS) +#if defined(_XBOX1) || defined(_3DS) || defined(PSP) || defined(PS2) || defined(GEKKO)|| defined(WIIU) || defined(ORBIS) || defined(__PSL1GHT__) || defined(__PS3__) #define PATH_MAX_LENGTH 512 #else #define PATH_MAX_LENGTH 4096 #endif #endif +#ifndef NAME_MAX_LENGTH +#define NAME_MAX_LENGTH 256 +#endif + #ifndef MAX #define MAX(a, b) ((a) > (b) ? (a) : (b)) #endif @@ -106,8 +108,8 @@ static INLINE bool bits_any_set(uint32_t* ptr, uint32_t count) #define BIT16_GET(a, bit) (((a) >> ((bit) & 15)) & 1) #define BIT16_CLEAR_ALL(a) ((a) = 0) -#define BIT32_SET(a, bit) ((a) |= (1 << ((bit) & 31))) -#define BIT32_CLEAR(a, bit) ((a) &= ~(1 << ((bit) & 31))) +#define BIT32_SET(a, bit) ((a) |= (UINT32_C(1) << ((bit) & 31))) +#define BIT32_CLEAR(a, bit) ((a) &= ~(UINT32_C(1) << ((bit) & 31))) #define BIT32_GET(a, bit) (((a) >> ((bit) & 31)) & 1) #define BIT32_CLEAR_ALL(a) ((a) = 0) @@ -116,8 +118,8 @@ static INLINE bool bits_any_set(uint32_t* ptr, uint32_t count) #define BIT64_GET(a, bit) (((a) >> ((bit) & 63)) & 1) #define BIT64_CLEAR_ALL(a) ((a) = 0) -#define BIT128_SET(a, bit) ((a).data[(bit) >> 5] |= (1 << ((bit) & 31))) -#define BIT128_CLEAR(a, bit) ((a).data[(bit) >> 5] &= ~(1 << ((bit) & 31))) +#define BIT128_SET(a, bit) ((a).data[(bit) >> 5] |= (UINT32_C(1) << ((bit) & 31))) +#define BIT128_CLEAR(a, bit) ((a).data[(bit) >> 5] &= ~(UINT32_C(1) << ((bit) & 31))) #define BIT128_GET(a, bit) (((a).data[(bit) >> 5] >> ((bit) & 31)) & 1) #define BIT128_CLEAR_ALL(a) memset(&(a), 0, sizeof(a)) @@ -136,6 +138,16 @@ static INLINE bool bits_any_set(uint32_t* ptr, uint32_t count) #define BIT256_GET_PTR(a, bit) BIT256_GET(*a, bit) #define BIT256_CLEAR_ALL_PTR(a) BIT256_CLEAR_ALL(*a) +#define BIT512_SET(a, bit) BIT256_SET(a, bit) +#define BIT512_CLEAR(a, bit) BIT256_CLEAR(a, bit) +#define BIT512_GET(a, bit) BIT256_GET(a, bit) +#define BIT512_CLEAR_ALL(a) BIT256_CLEAR_ALL(a) + +#define BIT512_SET_PTR(a, bit) BIT512_SET(*a, bit) +#define BIT512_CLEAR_PTR(a, bit) BIT512_CLEAR(*a, bit) +#define BIT512_GET_PTR(a, bit) BIT512_GET(*a, bit) +#define BIT512_CLEAR_ALL_PTR(a) BIT512_CLEAR_ALL(*a) + #define BITS_COPY16_PTR(a,bits) \ { \ BIT128_CLEAR_ALL_PTR(a); \ @@ -148,6 +160,13 @@ static INLINE bool bits_any_set(uint32_t* ptr, uint32_t count) BITS_GET_ELEM_PTR(a, 0) = (bits); \ } +#define BITS_COPY64_PTR(a,bits) \ +{ \ + BIT128_CLEAR_ALL_PTR(a); \ + BITS_GET_ELEM_PTR(a, 0) = (bits); \ + BITS_GET_ELEM_PTR(a, 1) = (bits >> 32); \ +} + /* Helper macros and struct to keep track of many booleans. */ /* This struct has 256 bits. */ typedef struct @@ -155,6 +174,12 @@ typedef struct uint32_t data[8]; } retro_bits_t; +/* This struct has 512 bits. */ +typedef struct +{ + uint32_t data[16]; +} retro_bits_512_t; + #ifdef _WIN32 # ifdef _WIN64 # define PRI_SIZET PRIu64 @@ -165,7 +190,7 @@ typedef struct # define PRI_SIZET "u" # endif # endif -#elif PS2 +#elif defined(PS2) # define PRI_SIZET "u" #else # if (SIZE_MAX == 0xFFFF) diff --git a/platform/libretro/libretro-common/include/streams/file_stream.h b/platform/libretro/libretro-common/include/streams/file_stream.h index 0cfadad8..5276f87a 100644 --- a/platform/libretro/libretro-common/include/streams/file_stream.h +++ b/platform/libretro/libretro-common/include/streams/file_stream.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2018 The RetroArch team +/* Copyright (C) 2010-2020 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (file_stream.h). @@ -81,6 +81,8 @@ char* filestream_gets(RFILE *stream, char *s, size_t len); int filestream_getc(RFILE *stream); +int filestream_vscanf(RFILE *stream, const char* format, va_list *args); + int filestream_scanf(RFILE *stream, const char* format, ...); int filestream_eof(RFILE *stream); diff --git a/platform/libretro/libretro-common/include/streams/file_stream_transforms.h b/platform/libretro/libretro-common/include/streams/file_stream_transforms.h index 9cf15c59..327e2184 100644 --- a/platform/libretro/libretro-common/include/streams/file_stream_transforms.h +++ b/platform/libretro/libretro-common/include/streams/file_stream_transforms.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2018 The RetroArch team +/* Copyright (C) 2010-2020 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (file_stream_transforms.h). diff --git a/platform/libretro/libretro-common/include/string/stdstring.h b/platform/libretro/libretro-common/include/string/stdstring.h index d57256a7..2dc00e33 100644 --- a/platform/libretro/libretro-common/include/string/stdstring.h +++ b/platform/libretro/libretro-common/include/string/stdstring.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2019 The RetroArch team +/* Copyright (C) 2010-2020 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (stdstring.h). @@ -35,6 +35,33 @@ RETRO_BEGIN_DECLS +#define STRLEN_CONST(x) ((sizeof((x))-1)) + +#define strcpy_literal(a, b) strcpy(a, b) + +#define string_is_not_equal(a, b) !string_is_equal((a), (b)) + +#define string_is_not_equal_fast(a, b, size) (memcmp(a, b, size) != 0) +#define string_is_equal_fast(a, b, size) (memcmp(a, b, size) == 0) + +#define TOLOWER(c) ((c) | (lr_char_props[(unsigned char)(c)] & 0x20)) +#define TOUPPER(c) ((c) & ~(lr_char_props[(unsigned char)(c)] & 0x20)) + +/* C standard says \f \v are space, but this one disagrees */ +#define ISSPACE(c) (lr_char_props[(unsigned char)(c)] & 0x80) + +#define ISDIGIT(c) (lr_char_props[(unsigned char)(c)] & 0x40) +#define ISALPHA(c) (lr_char_props[(unsigned char)(c)] & 0x20) +#define ISLOWER(c) (lr_char_props[(unsigned char)(c)] & 0x04) +#define ISUPPER(c) (lr_char_props[(unsigned char)(c)] & 0x02) +#define ISALNUM(c) (lr_char_props[(unsigned char)(c)] & 0x60) +#define ISUALPHA(c) (lr_char_props[(unsigned char)(c)] & 0x28) +#define ISUALNUM(c) (lr_char_props[(unsigned char)(c)] & 0x68) +#define IS_XDIGIT(c) (lr_char_props[(unsigned char)(c)] & 0x01) + +/* Deprecated alias, all callers should use string_is_equal_case_insensitive instead */ +#define string_is_equal_noncase string_is_equal_case_insensitive + static INLINE bool string_is_empty(const char *data) { return !data || (*data == '\0'); @@ -45,12 +72,44 @@ static INLINE bool string_is_equal(const char *a, const char *b) return (a && b) ? !strcmp(a, b) : false; } -#define STRLEN_CONST(x) ((sizeof((x))-1)) +static INLINE bool string_starts_with_size(const char *str, const char *prefix, + size_t size) +{ + return (str && prefix) ? !strncmp(prefix, str, size) : false; +} -#define string_is_not_equal(a, b) !string_is_equal((a), (b)) +static INLINE bool string_starts_with(const char *str, const char *prefix) +{ + return (str && prefix) ? !strncmp(prefix, str, strlen(prefix)) : false; +} + +static INLINE bool string_ends_with_size(const char *str, const char *suffix, + size_t str_len, size_t suffix_len) +{ + return (str_len < suffix_len) ? false : + !memcmp(suffix, str + (str_len - suffix_len), suffix_len); +} + +static INLINE bool string_ends_with(const char *str, const char *suffix) +{ + if (!str || !suffix) + return false; + return string_ends_with_size(str, suffix, strlen(str), strlen(suffix)); +} + +/* Returns the length of 'str' (c.f. strlen()), but only + * checks the first 'size' characters + * - If 'str' is NULL, returns 0 + * - If 'str' is not NULL and no '\0' character is found + * in the first 'size' characters, returns 'size' */ +static INLINE size_t strlen_size(const char *str, size_t size) +{ + size_t i = 0; + if (str) + while (i < size && str[i]) i++; + return i; +} -#define string_is_not_equal_fast(a, b, size) (memcmp(a, b, size) != 0) -#define string_is_equal_fast(a, b, size) (memcmp(a, b, size) == 0) static INLINE bool string_is_equal_case_insensitive(const char *a, const char *b) @@ -71,22 +130,23 @@ static INLINE bool string_is_equal_case_insensitive(const char *a, return (result == 0); } -static INLINE bool string_is_equal_noncase(const char *a, const char *b) +static INLINE bool string_starts_with_case_insensitive(const char *str, + const char *prefix) { int result = 0; - const unsigned char *p1 = (const unsigned char*)a; - const unsigned char *p2 = (const unsigned char*)b; + const unsigned char *p1 = (const unsigned char*)str; + const unsigned char *p2 = (const unsigned char*)prefix; - if (!a || !b) + if (!str || !prefix) return false; if (p1 == p2) - return false; + return true; - while ((result = tolower (*p1) - tolower (*p2++)) == 0) - if (*p1++ == '\0') + while ((result = tolower (*p1++) - tolower (*p2)) == 0) + if (*p2++ == '\0') break; - return (result == 0); + return (result == 0 || *p2 == '\0'); } char *string_to_upper(char *s); @@ -107,9 +167,61 @@ char *string_trim_whitespace_right(char *const s); /* Remove leading and trailing whitespaces */ char *string_trim_whitespace(char *const s); -/* max_lines == 0 means no limit */ -char *word_wrap(char *buffer, const char *string, - int line_width, bool unicode, unsigned max_lines); +/* + * Wraps string specified by 'src' to destination buffer + * specified by 'dst' and 'dst_size'. + * This function assumes that all glyphs in the string + * have an on-screen pixel width similar to that of + * regular Latin characters - i.e. it will not wrap + * correctly any text containing so-called 'wide' Unicode + * characters (e.g. CJK languages, emojis, etc.). + * + * @param dst pointer to destination buffer. + * @param dst_size size of destination buffer. + * @param src pointer to input string. + * @param line_width max number of characters per line. + * @param wideglyph_width not used, but is necessary to keep + * compatibility with word_wrap_wideglyph(). + * @param max_lines max lines of destination string. + * 0 means no limit. + */ +void word_wrap(char *dst, size_t dst_size, const char *src, + int line_width, int wideglyph_width, unsigned max_lines); + +/* + * Wraps string specified by 'src' to destination buffer + * specified by 'dst' and 'dst_size'. + * This function assumes that all glyphs in the string + * are: + * - EITHER 'non-wide' Unicode glyphs, with an on-screen + * pixel width similar to that of regular Latin characters + * - OR 'wide' Unicode glyphs (e.g. CJK languages, emojis, etc.) + * with an on-screen pixel width defined by 'wideglyph_width' + * Note that wrapping may occur in inappropriate locations + * if 'src' string contains 'wide' Unicode characters whose + * on-screen pixel width deviates greatly from the set + * 'wideglyph_width' value. + * + * @param dst pointer to destination buffer. + * @param dst_size size of destination buffer. + * @param src pointer to input string. + * @param line_width max number of characters per line. + * @param wideglyph_width effective width of 'wide' Unicode glyphs. + * the value here is normalised relative to the + * typical on-screen pixel width of a regular + * Latin character: + * - a regular Latin character is defined to + * have an effective width of 100 + * - wideglyph_width = 100 * (wide_character_pixel_width / latin_character_pixel_width) + * - e.g. if 'wide' Unicode characters in 'src' + * have an on-screen pixel width twice that of + * regular Latin characters, wideglyph_width + * would be 200 + * @param max_lines max lines of destination string. + * 0 means no limit. + */ +void word_wrap_wideglyph(char *dst, size_t dst_size, const char *src, + int line_width, int wideglyph_width, unsigned max_lines); /* Splits string into tokens seperated by 'delim' * > Returned token string must be free()'d @@ -121,7 +233,7 @@ char *word_wrap(char *buffer, const char *string, * char *str = "1,2,3,4,5,6,7,,,10,"; * char **str_ptr = &str; * char *token = NULL; - * while((token = string_tokenize(str_ptr, ","))) + * while ((token = string_tokenize(str_ptr, ","))) * { * printf("%s\n", token); * free(token); @@ -146,6 +258,30 @@ unsigned string_to_unsigned(const char *str); * Returns 0 if string is invalid */ unsigned string_hex_to_unsigned(const char *str); +char *string_init(const char *src); + +void string_set(char **string, const char *src); + +extern const unsigned char lr_char_props[256]; + +/* Get the total number of occurrences of a character in the given string. */ +int string_count_occurrences_single_character(char *str, char t); + +/* Replaces all spaces with the given character. */ +void string_replace_whitespace_with_single_character(char *str, char t); + +/* Replaces multiple spaces with a single space in a string. */ +void string_replace_multi_space_with_single_space(char *str); + +/* Remove all spaces from the given string. */ +void string_remove_all_whitespace(char* str_trimmed, const char* str_untrimmed); + +/* Retrieve the last occurance of the given character in a string. */ +int string_index_last_occurance(char str[], char t); + +/* Find the position of a substring in a string. */ +int string_find_index_substring_string(const char* str1, const char* str2); + RETRO_END_DECLS #endif diff --git a/platform/libretro/libretro-common/include/vfs/vfs.h b/platform/libretro/libretro-common/include/vfs/vfs.h index b876f438..bd61c69a 100644 --- a/platform/libretro/libretro-common/include/vfs/vfs.h +++ b/platform/libretro/libretro-common/include/vfs/vfs.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2019 The RetroArch team +/* Copyright (C) 2010-2020 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (vfs_implementation.h). @@ -41,17 +41,17 @@ typedef void* HANDLE; #ifdef HAVE_CDROM typedef struct { + int64_t byte_pos; char *cue_buf; size_t cue_len; - int64_t byte_pos; - char drive; + unsigned cur_lba; + unsigned last_frame_lba; unsigned char cur_min; unsigned char cur_sec; unsigned char cur_frame; unsigned char cur_track; - unsigned cur_lba; - unsigned last_frame_lba; unsigned char last_frame[2352]; + char drive; bool last_frame_valid; } vfs_cdrom_t; #endif @@ -62,29 +62,29 @@ enum vfs_scheme VFS_SCHEME_CDROM }; -#ifndef __WINRT__ +#if !(defined(__WINRT__) && defined(__cplusplus_winrt)) #ifdef VFS_FRONTEND struct retro_vfs_file_handle #else struct libretro_vfs_implementation_file #endif { - int fd; - unsigned hints; +#ifdef HAVE_CDROM + vfs_cdrom_t cdrom; /* int64_t alignment */ +#endif int64_t size; - char *buf; + uint64_t mappos; + uint64_t mapsize; FILE *fp; #ifdef _WIN32 HANDLE fh; #endif + char *buf; char* orig_path; - uint64_t mappos; - uint64_t mapsize; uint8_t *mapped; + int fd; + unsigned hints; enum vfs_scheme scheme; -#ifdef HAVE_CDROM - vfs_cdrom_t cdrom; -#endif }; #endif diff --git a/platform/libretro/libretro-common/include/vfs/vfs_implementation.h b/platform/libretro/libretro-common/include/vfs/vfs_implementation.h index c981cf72..b88d2f3d 100644 --- a/platform/libretro/libretro-common/include/vfs/vfs_implementation.h +++ b/platform/libretro/libretro-common/include/vfs/vfs_implementation.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2019 The RetroArch team +/* Copyright (C) 2010-2020 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (vfs_implementation.h). @@ -71,6 +71,12 @@ bool retro_vfs_dirent_is_dir_impl(libretro_vfs_implementation_dir *dirstream); int retro_vfs_closedir_impl(libretro_vfs_implementation_dir *dirstream); +#ifdef __WINRT__ + +void uwp_set_acl(const wchar_t* path, const wchar_t* AccessString); + +#endif + RETRO_END_DECLS #endif diff --git a/platform/libretro/libretro-common/memmap/memmap.c b/platform/libretro/libretro-common/memmap/memmap.c new file mode 100644 index 00000000..e84e0875 --- /dev/null +++ b/platform/libretro/libretro-common/memmap/memmap.c @@ -0,0 +1,163 @@ +/* Copyright (C) 2010-2020 The RetroArch team + * + * --------------------------------------------------------------------------------------- + * The following license statement only applies to this file (memmap.c). + * --------------------------------------------------------------------------------------- + * + * Permission is hereby granted, free of charge, + * to any person obtaining a copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +#ifndef PROT_READ +#define PROT_READ 0x1 /* Page can be read */ +#endif + +#ifndef PROT_WRITE +#define PROT_WRITE 0x2 /* Page can be written. */ +#endif + +#ifndef PROT_READWRITE +#define PROT_READWRITE 0x3 /* Page can be written to and read from. */ +#endif + +#ifndef PROT_EXEC +#define PROT_EXEC 0x4 /* Page can be executed. */ +#endif + +#ifndef PROT_NONE +#define PROT_NONE 0x0 /* Page can not be accessed. */ +#endif + +#ifndef MAP_FAILED +#define MAP_FAILED ((void *) -1) +#endif + +#ifdef _WIN32 +void* mmap(void *addr, size_t len, int prot, int flags, + int fildes, size_t offset) +{ + void *map = (void*)NULL; + HANDLE handle = INVALID_HANDLE_VALUE; + + switch (prot) + { + case PROT_READ: + default: + handle = CreateFileMapping((HANDLE) + _get_osfhandle(fildes), 0, PAGE_READONLY, 0, + len, 0); + if (!handle) + break; + map = (void*)MapViewOfFile(handle, FILE_MAP_READ, 0, 0, len); + CloseHandle(handle); + break; + case PROT_WRITE: + handle = CreateFileMapping((HANDLE) + _get_osfhandle(fildes),0,PAGE_READWRITE,0, + len, 0); + if (!handle) + break; + map = (void*)MapViewOfFile(handle, FILE_MAP_WRITE, 0, 0, len); + CloseHandle(handle); + break; + case PROT_READWRITE: + handle = CreateFileMapping((HANDLE) + _get_osfhandle(fildes),0,PAGE_READWRITE,0, + len, 0); + if (!handle) + break; + map = (void*)MapViewOfFile(handle, FILE_MAP_ALL_ACCESS, 0, 0, len); + CloseHandle(handle); + break; + } + + if (map == (void*)NULL) + return((void*)MAP_FAILED); + return((void*) ((int8_t*)map + offset)); +} + +int munmap(void *addr, size_t length) +{ + if (!UnmapViewOfFile(addr)) + return -1; + return 0; +} + +int mprotect(void *addr, size_t len, int prot) +{ + /* Incomplete, just assumes PAGE_EXECUTE_READWRITE right now + * instead of correctly handling prot */ + prot = 0; + if (prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) + prot = PAGE_EXECUTE_READWRITE; + return VirtualProtect(addr, len, prot, 0); +} + +#elif !defined(HAVE_MMAN) +void* mmap(void *addr, size_t len, int prot, int flags, + int fildes, size_t offset) +{ + return malloc(len); +} + +int munmap(void *addr, size_t len) +{ + free(addr); + return 0; +} + +int mprotect(void *addr, size_t len, int prot) +{ + /* stub - not really needed at this point + * since this codepath has no dynarecs. */ + return 0; +} + +#endif + +#if defined(__MACH__) && defined(__arm__) +#include +#endif + +int memsync(void *start, void *end) +{ + size_t len = (char*)end - (char*)start; +#if defined(__MACH__) && defined(__arm__) + sys_dcache_flush(start ,len); + sys_icache_invalidate(start, len); + return 0; +#elif defined(__arm__) && !defined(__QNX__) + (void)len; + __clear_cache(start, end); + return 0; +#elif defined(HAVE_MMAN) + return msync(start, len, MS_SYNC | MS_INVALIDATE +#ifdef __QNX__ + MS_CACHE_ONLY +#endif + ); +#else + (void)len; + return 0; +#endif +} + +int memprotect(void *addr, size_t len) +{ + return mprotect(addr, len, PROT_READ | PROT_WRITE | PROT_EXEC); +} diff --git a/platform/libretro/libretro-common/streams/file_stream.c b/platform/libretro/libretro-common/streams/file_stream.c index e3e45907..2ac5dbb8 100644 --- a/platform/libretro/libretro-common/streams/file_stream.c +++ b/platform/libretro/libretro-common/streams/file_stream.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2018 The RetroArch team +/* Copyright (C) 2010-2020 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (file_stream.c). @@ -31,11 +31,23 @@ #include "config.h" #endif +#ifdef _MSC_VER +#include +#endif + +#include #include #define VFS_FRONTEND #include -static const int64_t vfs_error_return_value = -1; +#define VFS_ERROR_RETURN_VALUE -1 + +struct RFILE +{ + struct retro_vfs_file_handle *hfile; + bool error_flag; + bool eof_flag; +}; static retro_vfs_get_path_t filestream_get_path_cb = NULL; static retro_vfs_open_t filestream_open_cb = NULL; @@ -50,18 +62,12 @@ static retro_vfs_flush_t filestream_flush_cb = NULL; static retro_vfs_remove_t filestream_remove_cb = NULL; static retro_vfs_rename_t filestream_rename_cb = NULL; -struct RFILE -{ - struct retro_vfs_file_handle *hfile; - bool error_flag; - bool eof_flag; -}; - /* VFS Initialization */ void filestream_vfs_init(const struct retro_vfs_interface_info* vfs_info) { - const struct retro_vfs_interface* vfs_iface; + const struct retro_vfs_interface * + vfs_iface = vfs_info->iface; filestream_get_path_cb = NULL; filestream_open_cb = NULL; @@ -76,9 +82,9 @@ void filestream_vfs_init(const struct retro_vfs_interface_info* vfs_info) filestream_remove_cb = NULL; filestream_rename_cb = NULL; - vfs_iface = vfs_info->iface; - - if (vfs_info->required_interface_version < FILESTREAM_REQUIRED_VFS_VERSION + if ( + (vfs_info->required_interface_version < + FILESTREAM_REQUIRED_VFS_VERSION) || !vfs_iface) return; @@ -99,19 +105,24 @@ void filestream_vfs_init(const struct retro_vfs_interface_info* vfs_info) /* Callback wrappers */ bool filestream_exists(const char *path) { - RFILE *dummy = NULL; + RFILE *dummy = NULL; if (!path || !*path) return false; - dummy = filestream_open(path, + dummy = filestream_open( + path, RETRO_VFS_FILE_ACCESS_READ, RETRO_VFS_FILE_ACCESS_HINT_NONE); if (!dummy) return false; - filestream_close(dummy); + if (filestream_close(dummy) != 0) + if (dummy) + free(dummy); + + dummy = NULL; return true; } @@ -119,12 +130,13 @@ int64_t filestream_get_size(RFILE *stream) { int64_t output; - if (filestream_size_cb != NULL) + if (filestream_size_cb) output = filestream_size_cb(stream->hfile); else - output = retro_vfs_file_size_impl((libretro_vfs_implementation_file*)stream->hfile); + output = retro_vfs_file_size_impl( + (libretro_vfs_implementation_file*)stream->hfile); - if (output == vfs_error_return_value) + if (output == VFS_ERROR_RETURN_VALUE) stream->error_flag = true; return output; @@ -134,12 +146,13 @@ int64_t filestream_truncate(RFILE *stream, int64_t length) { int64_t output; - if (filestream_truncate_cb != NULL) + if (filestream_truncate_cb) output = filestream_truncate_cb(stream->hfile, length); else - output = retro_vfs_file_truncate_impl((libretro_vfs_implementation_file*)stream->hfile, length); + output = retro_vfs_file_truncate_impl( + (libretro_vfs_implementation_file*)stream->hfile, length); - if (output == vfs_error_return_value) + if (output == VFS_ERROR_RETURN_VALUE) stream->error_flag = true; return output; @@ -159,7 +172,7 @@ RFILE* filestream_open(const char *path, unsigned mode, unsigned hints) struct retro_vfs_file_handle *fp = NULL; RFILE* output = NULL; - if (filestream_open_cb != NULL) + if (filestream_open_cb) fp = (struct retro_vfs_file_handle*) filestream_open_cb(path, mode, hints); else @@ -203,22 +216,19 @@ char* filestream_gets(RFILE *stream, char *s, size_t len) int filestream_getc(RFILE *stream) { char c = 0; - if (!stream) - return EOF; - if (filestream_read(stream, &c, 1) == 1) + if (stream && filestream_read(stream, &c, 1) == 1) return (int)(unsigned char)c; return EOF; } -int filestream_scanf(RFILE *stream, const char* format, ...) +int filestream_vscanf(RFILE *stream, const char* format, va_list *args) { char buf[4096]; char subfmt[64]; - va_list args; - + va_list args_copy; const char * bufiter = buf; - int64_t startpos = filestream_tell(stream); int ret = 0; + int64_t startpos = filestream_tell(stream); int64_t maxlen = filestream_read(stream, buf, sizeof(buf)-1); if (maxlen <= 0) @@ -226,14 +236,22 @@ int filestream_scanf(RFILE *stream, const char* format, ...) buf[maxlen] = '\0'; - va_start(args, format); + /* Have to copy the input va_list here + * > Calling va_arg() on 'args' directly would + * cause the va_list to have an indeterminate value + * in the function calling filestream_vscanf(), + * leading to unexpected behaviour */ +#ifdef __va_copy + __va_copy(args_copy, *args); +#else + va_copy(args_copy, *args); +#endif while (*format) { if (*format == '%') { int sublen; - char* subfmtiter = subfmt; bool asterisk = false; @@ -243,19 +261,25 @@ int filestream_scanf(RFILE *stream, const char* format, ...) if (*format == '*') { - asterisk = true; + asterisk = true; *subfmtiter++ = *format++; } - while (isdigit(*format)) *subfmtiter++ = *format++; /* width */ + while (ISDIGIT((unsigned char)*format)) + *subfmtiter++ = *format++; /* width */ /* length */ if (*format == 'h' || *format == 'l') { - if (format[1] == format[0]) *subfmtiter++ = *format++; - *subfmtiter++ = *format++; + if (format[1] == format[0]) + *subfmtiter++ = *format++; + *subfmtiter++ = *format++; } - else if (*format == 'j' || *format == 'z' || *format == 't' || *format == 'L') + else if ( + *format == 'j' || + *format == 'z' || + *format == 't' || + *format == 'L') { *subfmtiter++ = *format++; } @@ -263,31 +287,44 @@ int filestream_scanf(RFILE *stream, const char* format, ...) /* specifier - always a single character (except ]) */ if (*format == '[') { - while (*format != ']') *subfmtiter++ = *format++; - *subfmtiter++ = *format++; + while (*format != ']') + *subfmtiter++ = *format++; + *subfmtiter++ = *format++; } - else *subfmtiter++ = *format++; + else + *subfmtiter++ = *format++; - *subfmtiter++ = '%'; - *subfmtiter++ = 'n'; - *subfmtiter++ = '\0'; + *subfmtiter++ = '%'; + *subfmtiter++ = 'n'; + *subfmtiter++ = '\0'; + + if (sizeof(void*) != sizeof(long*)) + abort(); /* all pointers must have the same size */ - if (sizeof(void*) != sizeof(long*)) abort(); /* all pointers must have the same size */ if (asterisk) { - if (sscanf(bufiter, subfmt, &sublen) != 0) break; + int v = sscanf(bufiter, subfmt, &sublen); + if (v == EOF) + return EOF; + if (v != 0) + break; } else { - if (sscanf(bufiter, subfmt, va_arg(args, void*), &sublen) != 1) break; + int v = sscanf(bufiter, subfmt, va_arg(args_copy, void*), &sublen); + if (v == EOF) + return EOF; + if (v != 1) + break; } ret++; bufiter += sublen; } - else if (isspace(*format)) + else if (isspace((unsigned char)*format)) { - while (isspace(*bufiter)) bufiter++; + while (isspace((unsigned char)*bufiter)) + bufiter++; format++; } else @@ -299,24 +336,38 @@ int filestream_scanf(RFILE *stream, const char* format, ...) } } - va_end(args); - filestream_seek(stream, startpos+(bufiter-buf), RETRO_VFS_SEEK_POSITION_START); + va_end(args_copy); + filestream_seek(stream, startpos+(bufiter-buf), + RETRO_VFS_SEEK_POSITION_START); return ret; } +int filestream_scanf(RFILE *stream, const char* format, ...) +{ + int result; + va_list vl; + va_start(vl, format); + result = filestream_vscanf(stream, format, &vl); + va_end(vl); + return result; +} + int64_t filestream_seek(RFILE *stream, int64_t offset, int seek_position) { int64_t output; - if (filestream_seek_cb != NULL) + if (filestream_seek_cb) output = filestream_seek_cb(stream->hfile, offset, seek_position); else - output = retro_vfs_file_seek_impl((libretro_vfs_implementation_file*)stream->hfile, offset, seek_position); + output = retro_vfs_file_seek_impl( + (libretro_vfs_implementation_file*)stream->hfile, + offset, seek_position); - if (output == vfs_error_return_value) + if (output == VFS_ERROR_RETURN_VALUE) stream->error_flag = true; - stream->eof_flag = false; + + stream->eof_flag = false; return output; } @@ -330,12 +381,13 @@ int64_t filestream_tell(RFILE *stream) { int64_t output; - if (filestream_size_cb != NULL) + if (filestream_size_cb) output = filestream_tell_cb(stream->hfile); else - output = retro_vfs_file_tell_impl((libretro_vfs_implementation_file*)stream->hfile); + output = retro_vfs_file_tell_impl( + (libretro_vfs_implementation_file*)stream->hfile); - if (output == vfs_error_return_value) + if (output == VFS_ERROR_RETURN_VALUE) stream->error_flag = true; return output; @@ -347,23 +399,23 @@ void filestream_rewind(RFILE *stream) return; filestream_seek(stream, 0L, RETRO_VFS_SEEK_POSITION_START); stream->error_flag = false; - stream->eof_flag = false; + stream->eof_flag = false; } int64_t filestream_read(RFILE *stream, void *s, int64_t len) { int64_t output; - if (filestream_read_cb != NULL) + if (filestream_read_cb) output = filestream_read_cb(stream->hfile, s, len); else output = retro_vfs_file_read_impl( (libretro_vfs_implementation_file*)stream->hfile, s, len); - if (output == vfs_error_return_value) + if (output == VFS_ERROR_RETURN_VALUE) stream->error_flag = true; if (output < len) - stream->eof_flag = true; + stream->eof_flag = true; return output; } @@ -372,12 +424,13 @@ int filestream_flush(RFILE *stream) { int output; - if (filestream_flush_cb != NULL) + if (filestream_flush_cb) output = filestream_flush_cb(stream->hfile); else - output = retro_vfs_file_flush_impl((libretro_vfs_implementation_file*)stream->hfile); + output = retro_vfs_file_flush_impl( + (libretro_vfs_implementation_file*)stream->hfile); - if (output == vfs_error_return_value) + if (output == VFS_ERROR_RETURN_VALUE) stream->error_flag = true; return output; @@ -385,7 +438,7 @@ int filestream_flush(RFILE *stream) int filestream_delete(const char *path) { - if (filestream_remove_cb != NULL) + if (filestream_remove_cb) return filestream_remove_cb(path); return retro_vfs_file_remove_impl(path); @@ -393,7 +446,7 @@ int filestream_delete(const char *path) int filestream_rename(const char *old_path, const char *new_path) { - if (filestream_rename_cb != NULL) + if (filestream_rename_cb) return filestream_rename_cb(old_path, new_path); return retro_vfs_file_rename_impl(old_path, new_path); @@ -401,22 +454,24 @@ int filestream_rename(const char *old_path, const char *new_path) const char* filestream_get_path(RFILE *stream) { - if (filestream_get_path_cb != NULL) + if (filestream_get_path_cb) return filestream_get_path_cb(stream->hfile); - return retro_vfs_file_get_path_impl((libretro_vfs_implementation_file*)stream->hfile); + return retro_vfs_file_get_path_impl( + (libretro_vfs_implementation_file*)stream->hfile); } int64_t filestream_write(RFILE *stream, const void *s, int64_t len) { int64_t output; - if (filestream_write_cb != NULL) + if (filestream_write_cb) output = filestream_write_cb(stream->hfile, s, len); else - output = retro_vfs_file_write_impl((libretro_vfs_implementation_file*)stream->hfile, s, len); + output = retro_vfs_file_write_impl( + (libretro_vfs_implementation_file*)stream->hfile, s, len); - if (output == vfs_error_return_value) + if (output == VFS_ERROR_RETURN_VALUE) stream->error_flag = true; return output; @@ -427,13 +482,16 @@ int filestream_putc(RFILE *stream, int c) char c_char = (char)c; if (!stream) return EOF; - return filestream_write(stream, &c_char, 1)==1 ? (int)(unsigned char)c : EOF; + return filestream_write(stream, &c_char, 1) == 1 + ? (int)(unsigned char)c + : EOF; } int filestream_vprintf(RFILE *stream, const char* format, va_list args) { static char buffer[8 * 1024]; - int64_t num_chars = vsprintf(buffer, format, args); + int64_t num_chars = vsnprintf(buffer, sizeof(buffer), + format, args); if (num_chars < 0) return -1; @@ -465,10 +523,11 @@ int filestream_close(RFILE *stream) int output; struct retro_vfs_file_handle* fp = stream->hfile; - if (filestream_close_cb != NULL) + if (filestream_close_cb) output = filestream_close_cb(fp); else - output = retro_vfs_file_close_impl((libretro_vfs_implementation_file*)fp); + output = retro_vfs_file_close_impl( + (libretro_vfs_implementation_file*)fp); if (output == 0) free(stream); @@ -481,10 +540,11 @@ int filestream_close(RFILE *stream) * @path : path to file. * @buf : buffer to allocate and read the contents of the * file into. Needs to be freed manually. + * @len : optional output integer containing bytes read. * * Read the contents of a file into @buf. * - * Returns: number of items read, -1 on error. + * Returns: non zero on success. */ int64_t filestream_read_file(const char *path, void **buf, int64_t *len) { @@ -497,8 +557,8 @@ int64_t filestream_read_file(const char *path, void **buf, int64_t *len) if (!file) { - fprintf(stderr, "Failed to open %s: %s\n", path, strerror(errno)); - goto error; + *buf = NULL; + return 0; } content_buf_size = filestream_get_size(file); @@ -515,12 +575,11 @@ int64_t filestream_read_file(const char *path, void **buf, int64_t *len) ret = filestream_read(file, content_buf, (int64_t)content_buf_size); if (ret < 0) - { - fprintf(stderr, "Failed to read %s: %s\n", path, strerror(errno)); goto error; - } - filestream_close(file); + if (filestream_close(file) != 0) + if (file) + free(file); *buf = content_buf; @@ -535,7 +594,8 @@ int64_t filestream_read_file(const char *path, void **buf, int64_t *len) error: if (file) - filestream_close(file); + if (filestream_close(file) != 0) + free(file); if (content_buf) free(content_buf); if (len) @@ -564,7 +624,9 @@ bool filestream_write_file(const char *path, const void *data, int64_t size) return false; ret = filestream_write(file, data, size); - filestream_close(file); + if (filestream_close(file) != 0) + if (file) + free(file); if (ret != size) return false; diff --git a/platform/libretro/libretro-common/streams/file_stream_transforms.c b/platform/libretro/libretro-common/streams/file_stream_transforms.c index 0a0cd540..e94a3112 100644 --- a/platform/libretro/libretro-common/streams/file_stream_transforms.c +++ b/platform/libretro/libretro-common/streams/file_stream_transforms.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2018 The RetroArch team +/* Copyright (C) 2010-2020 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (file_stream_transforms.c). @@ -69,17 +69,27 @@ RFILE* rfopen(const char *path, const char *mode) int rfclose(RFILE* stream) { + if (!stream) + return EOF; + return filestream_close(stream); } int64_t rftell(RFILE* stream) { + if (!stream) + return -1; + return filestream_tell(stream); } int64_t rfseek(RFILE* stream, int64_t offset, int origin) { int seek_position = -1; + + if (!stream) + return -1; + switch (origin) { case SEEK_SET: @@ -99,39 +109,61 @@ int64_t rfseek(RFILE* stream, int64_t offset, int origin) int64_t rfread(void* buffer, size_t elem_size, size_t elem_count, RFILE* stream) { + if (!stream || (elem_size == 0) || (elem_count == 0)) + return 0; + return (filestream_read(stream, buffer, elem_size * elem_count) / elem_size); } char *rfgets(char *buffer, int maxCount, RFILE* stream) { + if (!stream) + return NULL; + return filestream_gets(stream, buffer, maxCount); } int rfgetc(RFILE* stream) { + if (!stream) + return EOF; + return filestream_getc(stream); } int64_t rfwrite(void const* buffer, size_t elem_size, size_t elem_count, RFILE* stream) { - return filestream_write(stream, buffer, elem_size * elem_count); + if (!stream || (elem_size == 0) || (elem_count == 0)) + return 0; + + return (filestream_write(stream, buffer, elem_size * elem_count) / elem_size); } int rfputc(int character, RFILE * stream) { - return filestream_putc(stream, character); + if (!stream) + return EOF; + + return filestream_putc(stream, character); } int64_t rfflush(RFILE * stream) { - return filestream_flush(stream); + if (!stream) + return EOF; + + return filestream_flush(stream); } int rfprintf(RFILE * stream, const char * format, ...) { int result; va_list vl; + + if (!stream) + return -1; + va_start(vl, format); result = filestream_vprintf(stream, format, vl); va_end(vl); @@ -152,8 +184,12 @@ int rfscanf(RFILE * stream, const char * format, ...) { int result; va_list vl; + + if (!stream) + return 0; + va_start(vl, format); - result = filestream_scanf(stream, format, vl); + result = filestream_vscanf(stream, format, &vl); va_end(vl); return result; } diff --git a/platform/libretro/libretro-common/string/stdstring.c b/platform/libretro/libretro-common/string/stdstring.c index e2afef14..45446ca7 100644 --- a/platform/libretro/libretro-common/string/stdstring.c +++ b/platform/libretro/libretro-common/string/stdstring.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2018 The RetroArch team +/* Copyright (C) 2010-2020 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (stdstring.c). @@ -22,10 +22,43 @@ #include #include +#include #include #include +const uint8_t lr_char_props[256] = { + /*x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x80,0x00,0x00,0x80,0x00,0x00, /* 0x */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 1x */ + 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 2x !"#$%&'()*+,-./ */ + 0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x00,0x00,0x00,0x00,0x00,0x00, /* 3x 0123456789:;<=>? */ + 0x00,0x23,0x23,0x23,0x23,0x23,0x23,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22, /* 4x @ABCDEFGHIJKLMNO */ + 0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x00,0x00,0x00,0x00,0x08, /* 5x PQRSTUVWXYZ[\]^_ */ + 0x00,0x25,0x25,0x25,0x25,0x25,0x25,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24, /* 6x `abcdefghijklmno */ + 0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x00,0x00,0x00,0x00,0x00, /* 7x pqrstuvwxyz{|}~ */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8x */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 9x */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Ax */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Bx */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Cx */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Dx */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Ex */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Fx */ +}; + +char *string_init(const char *src) +{ + return src ? strdup(src) : NULL; +} + +void string_set(char **string, const char *src) +{ + free(*string); + *string = string_init(src); +} + + char *string_to_upper(char *s) { char *cs = (char *)s; @@ -107,18 +140,18 @@ char *string_replace_substring(const char *in, /* Remove leading whitespaces */ char *string_trim_whitespace_left(char *const s) { - if(s && *s) + if (s && *s) { size_t len = strlen(s); char *current = s; - while(*current && isspace((unsigned char)*current)) + while (*current && ISSPACE((unsigned char)*current)) { ++current; --len; } - if(s != current) + if (s != current) memmove(s, current, len + 1); } @@ -128,18 +161,18 @@ char *string_trim_whitespace_left(char *const s) /* Remove trailing whitespaces */ char *string_trim_whitespace_right(char *const s) { - if(s && *s) + if (s && *s) { size_t len = strlen(s); char *current = s + len - 1; - while(current != s && isspace((unsigned char)*current)) + while (current != s && ISSPACE((unsigned char)*current)) { --current; --len; } - current[isspace((unsigned char)*current) ? 0 : 1] = '\0'; + current[ISSPACE((unsigned char)*current) ? 0 : 1] = '\0'; } return s; @@ -154,88 +187,207 @@ char *string_trim_whitespace(char *const s) return s; } -char *word_wrap(char* buffer, const char *string, int line_width, bool unicode, unsigned max_lines) +void word_wrap(char *dst, size_t dst_size, const char *src, int line_width, int wideglyph_width, unsigned max_lines) { - unsigned i = 0; - unsigned len = (unsigned)strlen(string); - unsigned lines = 1; + char *lastspace = NULL; + unsigned counter = 0; + unsigned lines = 1; + size_t src_len = strlen(src); + const char *src_end = src + src_len; - while (i < len) + /* Prevent buffer overflow */ + if (dst_size < src_len + 1) + return; + + /* Early return if src string length is less + * than line width */ + if (src_len < line_width) { - unsigned counter; - int pos = (int)(&buffer[i] - buffer); + strcpy(dst, src); + return; + } - /* copy string until the end of the line is reached */ - for (counter = 1; counter <= (unsigned)line_width; counter++) + while (*src != '\0') + { + unsigned char_len; + + char_len = (unsigned)(utf8skip(src, 1) - src); + counter++; + + if (*src == ' ') + lastspace = dst; /* Remember the location of the whitespace */ + else if (*src == '\n') { - const char *character; - unsigned char_len; - unsigned j = i; + /* If newlines embedded in the input, + * reset the index */ + lines++; + counter = 0; - /* check if end of string reached */ - if (i == len) + /* Early return if remaining src string + * length is less than line width */ + if (src_end - src <= line_width) { - buffer[i] = 0; - return buffer; + strcpy(dst, src); + return; } + } - character = utf8skip(&string[i], 1); - char_len = (unsigned)(character - &string[i]); + while (char_len--) + *dst++ = *src++; - if (!unicode) - counter += char_len - 1; - - do - { - buffer[i] = string[i]; - char_len--; - i++; - } while(char_len); - - /* check for newlines embedded in the original input - * and reset the index */ - if (buffer[j] == '\n') - { - lines++; - counter = 1; - } - } - - /* check for whitespace */ - if (string[i] == ' ') + if (counter >= (unsigned)line_width) { - if ((max_lines == 0 || lines < max_lines)) + counter = 0; + + if (lastspace && (max_lines == 0 || lines < max_lines)) { - buffer[i] = '\n'; - i++; + /* Replace nearest (previous) whitespace + * with newline character */ + *lastspace = '\n'; lines++; + + src -= dst - lastspace - 1; + dst = lastspace + 1; + lastspace = NULL; + + /* Early return if remaining src string + * length is less than line width */ + if (src_end - src < line_width) + { + strcpy(dst, src); + return; + } } } - else - { - int k; - - /* check for nearest whitespace back in string */ - for (k = i; k > 0; k--) - { - if (string[k] != ' ' || (max_lines != 0 && lines >= max_lines)) - continue; - - buffer[k] = '\n'; - /* set string index back to character after this one */ - i = k + 1; - lines++; - break; - } - - if (&buffer[i] - buffer == pos) - return buffer; - } } - buffer[i] = 0; + *dst = '\0'; +} - return buffer; +void word_wrap_wideglyph(char *dst, size_t dst_size, const char *src, int line_width, int wideglyph_width, unsigned max_lines) +{ + char *lastspace = NULL; + char *lastwideglyph = NULL; + const char *src_end = src + strlen(src); + unsigned lines = 1; + /* 'line_width' means max numbers of characters per line, + * but this metric is only meaningful when dealing with + * 'regular' glyphs that have an on-screen pixel width + * similar to that of regular Latin characters. + * When handing so-called 'wide' Unicode glyphs, it is + * necessary to consider the actual on-screen pixel width + * of each character. + * In order to do this, we create a distinction between + * regular Latin 'non-wide' glyphs and 'wide' glyphs, and + * normalise all values relative to the on-screen pixel + * width of regular Latin characters: + * - Regular 'non-wide' glyphs have a normalised width of 100 + * - 'line_width' is therefore normalised to 100 * (width_in_characters) + * - 'wide' glyphs have a normalised width of + * 100 * (wide_character_pixel_width / latin_character_pixel_width) + * - When a character is detected, the position in the current + * line is incremented by the regular normalised width of 100 + * - If that character is then determined to be a 'wide' + * glyph, the position in the current line is further incremented + * by the difference between the normalised 'wide' and 'non-wide' + * width values */ + unsigned counter_normalized = 0; + int line_width_normalized = line_width * 100; + int additional_counter_normalized = wideglyph_width - 100; + + /* Early return if src string length is less + * than line width */ + if (src_end - src < line_width) + { + strlcpy(dst, src, dst_size); + return; + } + + while (*src != '\0') + { + unsigned char_len; + + char_len = (unsigned)(utf8skip(src, 1) - src); + counter_normalized += 100; + + /* Prevent buffer overflow */ + if (char_len >= dst_size) + break; + + if (*src == ' ') + lastspace = dst; /* Remember the location of the whitespace */ + else if (*src == '\n') + { + /* If newlines embedded in the input, + * reset the index */ + lines++; + counter_normalized = 0; + + /* Early return if remaining src string + * length is less than line width */ + if (src_end - src <= line_width) + { + strlcpy(dst, src, dst_size); + return; + } + } + else if (char_len >= 3) + { + /* Remember the location of the first byte + * whose length as UTF-8 >= 3*/ + lastwideglyph = dst; + counter_normalized += additional_counter_normalized; + } + + dst_size -= char_len; + while (char_len--) + *dst++ = *src++; + + if (counter_normalized >= (unsigned)line_width_normalized) + { + counter_normalized = 0; + + if (max_lines != 0 && lines >= max_lines) + continue; + else if (lastwideglyph && (!lastspace || lastwideglyph > lastspace)) + { + /* Insert newline character */ + *lastwideglyph = '\n'; + lines++; + src -= dst - lastwideglyph; + dst = lastwideglyph + 1; + lastwideglyph = NULL; + + /* Early return if remaining src string + * length is less than line width */ + if (src_end - src <= line_width) + { + strlcpy(dst, src, dst_size); + return; + } + } + else if (lastspace) + { + /* Replace nearest (previous) whitespace + * with newline character */ + *lastspace = '\n'; + lines++; + src -= dst - lastspace - 1; + dst = lastspace + 1; + lastspace = NULL; + + /* Early return if remaining src string + * length is less than line width */ + if (src_end - src < line_width) + { + strlcpy(dst, src, dst_size); + return; + } + } + } + } + + *dst = '\0'; } /* Splits string into tokens seperated by 'delim' @@ -248,7 +400,7 @@ char *word_wrap(char* buffer, const char *string, int line_width, bool unicode, * char *str = "1,2,3,4,5,6,7,,,10,"; * char **str_ptr = &str; * char *token = NULL; - * while((token = string_tokenize(str_ptr, ","))) + * while ((token = string_tokenize(str_ptr, ","))) * { * printf("%s\n", token); * free(token); @@ -328,7 +480,7 @@ void string_replace_all_chars(char *str, char find, char replace) if (string_is_empty(str)) return; - while((str_ptr = strchr(str_ptr, find)) != NULL) + while ((str_ptr = strchr(str_ptr, find))) *str_ptr++ = replace; } @@ -343,7 +495,7 @@ unsigned string_to_unsigned(const char *str) for (ptr = str; *ptr != '\0'; ptr++) { - if (!isdigit(*ptr)) + if (!ISDIGIT((unsigned char)*ptr)) return 0; } @@ -376,9 +528,105 @@ unsigned string_hex_to_unsigned(const char *str) /* Check for valid characters */ for (ptr = hex_str; *ptr != '\0'; ptr++) { - if (!isxdigit(*ptr)) + if (!isxdigit((unsigned char)*ptr)) return 0; } return (unsigned)strtoul(hex_str, NULL, 16); } + +/** + * Get the total number of occurrences of a character in the given string. + */ +int string_count_occurrences_single_character(char *str, char t) +{ + int ctr = 0; + int i; + + for (i = 0; str[i] != '\0'; ++i) { + if (t == str[i]) + ++ctr; + } + + return ctr; +} + +/** + * Replaces all spaces with the given character. + */ +void string_replace_whitespace_with_single_character(char *str, char t) +{ + + while (*str) { + if (isspace(*str)) + *str = t; + str++; + } +} + +/** + * Replaces multiple spaces with a single space in a string. + */ +void string_replace_multi_space_with_single_space(char *str) +{ + char *dest = str; + + while (*str != '\0') + { + while (*str == ' ' && *(str + 1) == ' ') + str++; + + *dest++ = *str++; + } + + *dest = '\0'; +} + +/** + * Remove all spaces from the given string. + */ +void string_remove_all_whitespace(char* str_trimmed, const char* str_untrimmed) +{ + while (*str_untrimmed != '\0') + { + if(!isspace(*str_untrimmed)) + { + *str_trimmed = *str_untrimmed; + str_trimmed++; + } + str_untrimmed++; + } + *str_trimmed = '\0'; +} + +/** + * Retrieve the last occurance of the given character in a string. + */ +int string_index_last_occurance(char *str, char t) +{ + const char * ret = strrchr(str, t); + if (ret) + return ret-str; + + return -1; +} + +/** + * Find the position of a substring in a string. + */ +int string_find_index_substring_string(const char* str1, const char* str2) +{ + int index; + + if (str1[0] != '\0') + { + const char *pfound = strstr(str1, str2); + if (pfound != NULL) + { + index = (pfound - str1); + return index; + } + } + + return -1; +} diff --git a/platform/libretro/libretro-common/vfs/vfs_implementation.c b/platform/libretro/libretro-common/vfs/vfs_implementation.c index 71529482..afd4f0f6 100644 --- a/platform/libretro/libretro-common/vfs/vfs_implementation.c +++ b/platform/libretro/libretro-common/vfs/vfs_implementation.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2019 The RetroArch team +/* Copyright (C) 2010-2020 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (vfs_implementation.c). @@ -51,10 +51,6 @@ # if defined(PSP) # include # endif -# if defined(PS2) -# include -# include -# endif # include # include # if !defined(VITA) @@ -66,18 +62,12 @@ # include # include # endif +# if defined(WIIU) +# include +# endif #endif -#ifdef __CELLOS_LV2__ -#include -#define O_RDONLY CELL_FS_O_RDONLY -#define O_WRONLY CELL_FS_O_WRONLY -#define O_CREAT CELL_FS_O_CREAT -#define O_TRUNC CELL_FS_O_TRUNC -#define O_RDWR CELL_FS_O_RDWR -#else #include -#endif /* TODO: Some things are duplicated but I'm really afraid of breaking other platforms by touching this */ #if defined(VITA) @@ -93,16 +83,13 @@ # if defined(PSP) # include # endif -# if defined(PS2) -# include -# endif # include # include # include # include #endif -#if (defined(__CELLOS_LV2__) && !defined(__PSL1GHT__)) || defined(__QNX__) || defined(PSP) || defined(PS2) +#if defined(__QNX__) || defined(PSP) #include /* stat() is defined here */ #endif @@ -146,20 +133,18 @@ #include #endif -#if defined(PS2) -#include -#include +#if defined(__PS3__) || defined(__PSL1GHT__) +#include +#if defined(__PSL1GHT__) +#include #endif - -#if defined(__CELLOS_LV2__) -#include #endif #if defined(VITA) #define FIO_S_ISDIR SCE_S_ISDIR #endif -#if (defined(__CELLOS_LV2__) && !defined(__PSL1GHT__)) || defined(__QNX__) || defined(PSP) +#if defined(__QNX__) || defined(PSP) #include /* stat() is defined here */ #endif @@ -173,14 +158,16 @@ #endif #if defined(_WIN32) -#if !defined(_MSC_VER) || (defined(_MSC_VER) && _MSC_VER >= 1400) +#if defined(_MSC_VER) && _MSC_VER >= 1400 #define ATLEAST_VC2005 #endif #endif #include #include +#if defined(HAVE_MMAP) #include +#endif #include #include #include @@ -189,9 +176,17 @@ #include #endif +#if (defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE - 0) >= 200112) || (defined(__POSIX_VISIBLE) && __POSIX_VISIBLE >= 200112) || (defined(_POSIX_VERSION) && _POSIX_VERSION >= 200112) || __USE_LARGEFILE || (defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS == 64) +#ifndef HAVE_64BIT_OFFSETS +#define HAVE_64BIT_OFFSETS +#endif +#endif + #define RFILE_HINT_UNBUFFERED (1 << 8) -int64_t retro_vfs_file_seek_internal(libretro_vfs_implementation_file *stream, int64_t offset, int whence) +int64_t retro_vfs_file_seek_internal( + libretro_vfs_implementation_file *stream, + int64_t offset, int whence) { if (!stream) return -1; @@ -202,19 +197,9 @@ int64_t retro_vfs_file_seek_internal(libretro_vfs_implementation_file *stream, i if (stream->scheme == VFS_SCHEME_CDROM) return retro_vfs_file_seek_cdrom(stream, offset, whence); #endif -/* VC2005 and up have a special 64-bit fseek */ #ifdef ATLEAST_VC2005 + /* VC2005 and up have a special 64-bit fseek */ return _fseeki64(stream->fp, offset, whence); -#elif defined(__CELLOS_LV2__) || defined(_MSC_VER) && _MSC_VER <= 1310 - return fseek(stream->fp, (long)offset, whence); -#elif defined(PS2) - { - int64_t ret = fileXioLseek(fileno(stream->fp), (off_t)offset, whence); - /* fileXioLseek could return positive numbers */ - if (ret > 0) - return 0; - return ret; - } #elif defined(ORBIS) { int ret = orbisLseek(stream->fd, offset, whence); @@ -222,8 +207,10 @@ int64_t retro_vfs_file_seek_internal(libretro_vfs_implementation_file *stream, i return -1; return 0; } -#else +#elif defined(HAVE_64BIT_OFFSETS) return fseeko(stream->fp, (off_t)offset, whence); +#else + return fseek(stream->fp, (long)offset, whence); #endif } #ifdef HAVE_MMAP @@ -263,7 +250,7 @@ int64_t retro_vfs_file_seek_internal(libretro_vfs_implementation_file *stream, i } #endif - if (lseek(stream->fd, offset, whence) < 0) + if (lseek(stream->fd, (off_t)offset, whence) < 0) return -1; return 0; @@ -282,49 +269,72 @@ int64_t retro_vfs_file_seek_internal(libretro_vfs_implementation_file *stream, i libretro_vfs_implementation_file *retro_vfs_file_open_impl( const char *path, unsigned mode, unsigned hints) { - int flags = 0; - const char *mode_str = NULL; - libretro_vfs_implementation_file *stream = (libretro_vfs_implementation_file*) - calloc(1, sizeof(*stream)); #if defined(VFS_FRONTEND) || defined(HAVE_CDROM) int path_len = (int)strlen(path); #endif - #ifdef VFS_FRONTEND const char *dumb_prefix = "vfsonly://"; - size_t dumb_prefix_siz = strlen(dumb_prefix); + size_t dumb_prefix_siz = STRLEN_CONST("vfsonly://"); int dumb_prefix_len = (int)dumb_prefix_siz; - - if (path_len >= dumb_prefix_len) - { - if (!memcmp(path, dumb_prefix, dumb_prefix_len)) - path += dumb_prefix_siz; - } #endif - #ifdef HAVE_CDROM - { - const char *cdrom_prefix = "cdrom://"; - size_t cdrom_prefix_siz = strlen(cdrom_prefix); - int cdrom_prefix_len = (int)cdrom_prefix_siz; - - if (path_len > cdrom_prefix_len) - { - if (!memcmp(path, cdrom_prefix, cdrom_prefix_len)) - { - path += cdrom_prefix_siz; - stream->scheme = VFS_SCHEME_CDROM; - } - } - } + const char *cdrom_prefix = "cdrom://"; + size_t cdrom_prefix_siz = STRLEN_CONST("cdrom://"); + int cdrom_prefix_len = (int)cdrom_prefix_siz; #endif + int flags = 0; + const char *mode_str = NULL; + libretro_vfs_implementation_file *stream = + (libretro_vfs_implementation_file*) + malloc(sizeof(*stream)); if (!stream) return NULL; - (void)flags; + stream->fd = 0; + stream->hints = hints; + stream->size = 0; + stream->buf = NULL; + stream->fp = NULL; +#ifdef _WIN32 + stream->fh = 0; +#endif + stream->orig_path = NULL; + stream->mappos = 0; + stream->mapsize = 0; + stream->mapped = NULL; + stream->scheme = VFS_SCHEME_NONE; + +#ifdef VFS_FRONTEND + if (path_len >= dumb_prefix_len) + if (!memcmp(path, dumb_prefix, dumb_prefix_len)) + path += dumb_prefix_siz; +#endif + +#ifdef HAVE_CDROM + stream->cdrom.cue_buf = NULL; + stream->cdrom.cue_len = 0; + stream->cdrom.byte_pos = 0; + stream->cdrom.drive = 0; + stream->cdrom.cur_min = 0; + stream->cdrom.cur_sec = 0; + stream->cdrom.cur_frame = 0; + stream->cdrom.cur_track = 0; + stream->cdrom.cur_lba = 0; + stream->cdrom.last_frame_lba = 0; + stream->cdrom.last_frame[0] = '\0'; + stream->cdrom.last_frame_valid = false; + + if (path_len > cdrom_prefix_len) + { + if (!memcmp(path, cdrom_prefix, cdrom_prefix_len)) + { + path += cdrom_prefix_siz; + stream->scheme = VFS_SCHEME_CDROM; + } + } +#endif - stream->hints = hints; stream->orig_path = strdup(path); #ifdef HAVE_MMAP @@ -350,9 +360,7 @@ libretro_vfs_implementation_file *retro_vfs_file_open_impl( flags = O_WRONLY | O_CREAT | O_TRUNC; #if !defined(ORBIS) -#if defined(PS2) - flags |= FIO_S_IRUSR | FIO_S_IWUSR; -#elif !defined(_WIN32) +#if !defined(_WIN32) flags |= S_IRUSR | S_IWUSR; #else flags |= O_BINARY; @@ -364,9 +372,7 @@ libretro_vfs_implementation_file *retro_vfs_file_open_impl( mode_str = "w+b"; flags = O_RDWR | O_CREAT | O_TRUNC; #if !defined(ORBIS) -#if defined(PS2) - flags |= FIO_S_IRUSR | FIO_S_IWUSR; -#elif !defined(_WIN32) +#if !defined(_WIN32) flags |= S_IRUSR | S_IWUSR; #else flags |= O_BINARY; @@ -380,9 +386,7 @@ libretro_vfs_implementation_file *retro_vfs_file_open_impl( flags = O_RDWR; #if !defined(ORBIS) -#if defined(PS2) - flags |= FIO_S_IRUSR | FIO_S_IWUSR; -#elif !defined(_WIN32) +#if !defined(_WIN32) flags |= S_IRUSR | S_IWUSR; #else flags |= O_BINARY; @@ -403,7 +407,7 @@ libretro_vfs_implementation_file *retro_vfs_file_open_impl( stream->fd = -1; goto error; } - stream->fd = fd; + stream->fd = fd; #else FILE *fp; #ifdef HAVE_CDROM @@ -432,13 +436,30 @@ libretro_vfs_implementation_file *retro_vfs_file_open_impl( * * https://www.freebsd.org/cgi/man.cgi?query=setvbuf&apropos=0&sektion=0&manpath=FreeBSD+11.1-RELEASE&arch=default&format=html * - * If the size argument is not zero but buf is NULL, a buffer of the given size will be allocated immediately, and + * If the size argument is not zero but buf is NULL, + * a buffer of the given size will be allocated immediately, and * released on close. This is an extension to ANSI C. * - * Since C89 does not support specifying a null buffer with a non-zero size, we create and track our own buffer for it. + * Since C89 does not support specifying a NULL buffer + * with a non-zero size, we create and track our own buffer for it. */ - /* TODO: this is only useful for a few platforms, find which and add ifdef */ -#if !defined(PS2) && !defined(PSP) + /* TODO: this is only useful for a few platforms, + * find which and add ifdef */ +#if defined(_3DS) + if (stream->scheme != VFS_SCHEME_CDROM) + { + stream->buf = (char*)calloc(1, 0x10000); + if (stream->fp) + setvbuf(stream->fp, stream->buf, _IOFBF, 0x10000); + } +#elif defined(WIIU) + if (stream->scheme != VFS_SCHEME_CDROM) + { + const int bufsize = 128*1024; + stream->buf = (char*)memalign(0x40, bufsize); + if (stream->fp) + setvbuf(stream->fp, stream->buf, _IOFBF, bufsize); + } if (stream->scheme != VFS_SCHEME_CDROM) { stream->buf = (char*)calloc(1, 0x4000); @@ -537,9 +558,7 @@ int retro_vfs_file_close_impl(libretro_vfs_implementation_file *stream) if ((stream->hints & RFILE_HINT_UNBUFFERED) == 0) { if (stream->fp) - { fclose(stream->fp); - } } else { @@ -604,7 +623,7 @@ int64_t retro_vfs_file_truncate_impl(libretro_vfs_implementation_file *stream, i if (_chsize(_fileno(stream->fp), length) != 0) return -1; #elif !defined(VITA) && !defined(PSP) && !defined(PS2) && !defined(ORBIS) && (!defined(SWITCH) || defined(HAVE_LIBNX)) - if (ftruncate(fileno(stream->fp), length) != 0) + if (ftruncate(fileno(stream->fp), (off_t)length) != 0) return -1; #endif @@ -630,9 +649,11 @@ int64_t retro_vfs_file_tell_impl(libretro_vfs_implementation_file *stream) return ret; } #else - /* VC2005 and up have a special 64-bit ftell */ #ifdef ATLEAST_VC2005 + /* VC2005 and up have a special 64-bit ftell */ return _ftelli64(stream->fp); +#elif defined(HAVE_64BIT_OFFSETS) + return ftello(stream->fp); #else return ftell(stream->fp); #endif @@ -641,7 +662,8 @@ int64_t retro_vfs_file_tell_impl(libretro_vfs_implementation_file *stream) #ifdef HAVE_MMAP /* Need to check stream->mapped because this function * is called in filestream_open() */ - if (stream->mapped && stream->hints & RETRO_VFS_FILE_ACCESS_HINT_FREQUENT_ACCESS) + if (stream->mapped && stream->hints & + RETRO_VFS_FILE_ACCESS_HINT_FREQUENT_ACCESS) return stream->mappos; #endif if (lseek(stream->fd, 0, SEEK_CUR) < 0) @@ -865,12 +887,12 @@ const char *retro_vfs_file_get_path_impl( int retro_vfs_stat_impl(const char *path, int32_t *size) { -#if defined(VITA) || defined(PSP) - /* Vita / PSP */ - SceIoStat buf; - int stat_ret; bool is_dir = false; bool is_character_special = false; +#if defined(VITA) + /* Vita / PSP */ + SceIoStat buf; + int dir_ret; char *tmp = NULL; size_t len = 0; @@ -882,97 +904,47 @@ int retro_vfs_stat_impl(const char *path, int32_t *size) if (tmp[len-1] == '/') tmp[len-1] = '\0'; - stat_ret = sceIoGetstat(tmp, &buf); + dir_ret = sceIoGetstat(tmp, &buf); free(tmp); - if (stat_ret < 0) + if (dir_ret < 0) return 0; if (size) - *size = (int32_t)buf.st_size; - - is_dir = FIO_S_ISDIR(buf.st_mode); - - return RETRO_VFS_STAT_IS_VALID | (is_dir ? RETRO_VFS_STAT_IS_DIRECTORY : 0) | (is_character_special ? RETRO_VFS_STAT_IS_CHARACTER_SPECIAL : 0); + *size = (int32_t)buf.st_size; + is_dir = FIO_S_ISDIR(buf.st_mode); #elif defined(ORBIS) /* Orbis */ - bool is_dir, is_character_special; - int dir_ret; + int dir_ret = 0; if (!path || !*path) return 0; if (size) - *size = (int32_t)buf.st_size; + *size = (int32_t)buf.st_size; - dir_ret = orbisDopen(path); - is_dir = dir_ret > 0; + dir_ret = orbisDopen(path); + is_dir = dir_ret > 0; orbisDclose(dir_ret); - is_character_special = S_ISCHR(buf.st_mode); - - return RETRO_VFS_STAT_IS_VALID | (is_dir ? RETRO_VFS_STAT_IS_DIRECTORY : 0) | (is_character_special ? RETRO_VFS_STAT_IS_CHARACTER_SPECIAL : 0); - -#elif defined(PS2) - /* PS2 */ - iox_stat_t buf; - bool is_dir; - bool is_character_special = false; - char *tmp = NULL; - size_t len = 0; + is_character_special = S_ISCHR(buf.st_mode); +#elif defined(__PSL1GHT__) || defined(__PS3__) + /* Lowlevel Lv2 */ + sysFSStat buf; if (!path || !*path) return 0; - - tmp = strdup(path); - len = strlen(tmp); - if (tmp[len-1] == '/') - tmp[len-1] = '\0'; - - fileXioGetStat(tmp, &buf); - free(tmp); - - if (size) - *size = (int32_t)buf.size; - - if (!buf.mode) - { - /* if fileXioGetStat fails */ - int dir_ret = fileXioDopen(path); - is_dir = dir_ret > 0; - if (is_dir) { - fileXioDclose(dir_ret); - } - } - else - is_dir = FIO_S_ISDIR(buf.mode); - - return RETRO_VFS_STAT_IS_VALID | (is_dir ? RETRO_VFS_STAT_IS_DIRECTORY : 0) | (is_character_special ? RETRO_VFS_STAT_IS_CHARACTER_SPECIAL : 0); - -#elif defined(__CELLOS_LV2__) - /* CellOS Lv2 */ - bool is_dir; - bool is_character_special = false; - CellFsStat buf; - - if (!path || !*path) - return 0; - if (cellFsStat(path, &buf) < 0) + if (sysFsStat(path, &buf) < 0) return 0; if (size) - *size = (int32_t)buf.st_size; - - is_dir = ((buf.st_mode & S_IFMT) == S_IFDIR); - - return RETRO_VFS_STAT_IS_VALID | (is_dir ? RETRO_VFS_STAT_IS_DIRECTORY : 0) | (is_character_special ? RETRO_VFS_STAT_IS_CHARACTER_SPECIAL : 0); + *size = (int32_t)buf.st_size; + is_dir = ((buf.st_mode & S_IFMT) == S_IFDIR); #elif defined(_WIN32) /* Windows */ - bool is_dir; DWORD file_info; struct _stat buf; - bool is_character_special = false; #if defined(LEGACY_WIN32) char *path_local = NULL; #else @@ -983,8 +955,8 @@ int retro_vfs_stat_impl(const char *path, int32_t *size) return 0; #if defined(LEGACY_WIN32) - path_local = utf8_to_local_string_alloc(path); - file_info = GetFileAttributes(path_local); + path_local = utf8_to_local_string_alloc(path); + file_info = GetFileAttributes(path_local); if (!string_is_empty(path_local)) _stat(path_local, &buf); @@ -992,8 +964,8 @@ int retro_vfs_stat_impl(const char *path, int32_t *size) if (path_local) free(path_local); #else - path_wide = utf8_to_utf16_string_alloc(path); - file_info = GetFileAttributesW(path_wide); + path_wide = utf8_to_utf16_string_alloc(path); + file_info = GetFileAttributesW(path_wide); _wstat(path_wide, &buf); @@ -1009,11 +981,41 @@ int retro_vfs_stat_impl(const char *path, int32_t *size) is_dir = (file_info & FILE_ATTRIBUTE_DIRECTORY); - return RETRO_VFS_STAT_IS_VALID | (is_dir ? RETRO_VFS_STAT_IS_DIRECTORY : 0) | (is_character_special ? RETRO_VFS_STAT_IS_CHARACTER_SPECIAL : 0); +#elif defined(GEKKO) + /* On GEKKO platforms, paths cannot have + * trailing slashes - we must therefore + * remove them */ + char *path_buf = NULL; + int stat_ret = -1; + struct stat stat_buf; + size_t len; + + if (string_is_empty(path)) + return 0; + + path_buf = strdup(path); + if (!path_buf) + return 0; + + len = strlen(path_buf); + if (len > 0) + if (path_buf[len - 1] == '/') + path_buf[len - 1] = '\0'; + + stat_ret = stat(path_buf, &stat_buf); + free(path_buf); + + if (stat_ret < 0) + return 0; + + if (size) + *size = (int32_t)stat_buf.st_size; + + is_dir = S_ISDIR(stat_buf.st_mode); + is_character_special = S_ISCHR(stat_buf.st_mode); #else /* Every other platform */ - bool is_dir, is_character_special; struct stat buf; if (!path || !*path) @@ -1026,9 +1028,8 @@ int retro_vfs_stat_impl(const char *path, int32_t *size) is_dir = S_ISDIR(buf.st_mode); is_character_special = S_ISCHR(buf.st_mode); - - return RETRO_VFS_STAT_IS_VALID | (is_dir ? RETRO_VFS_STAT_IS_DIRECTORY : 0) | (is_character_special ? RETRO_VFS_STAT_IS_CHARACTER_SPECIAL : 0); #endif + return RETRO_VFS_STAT_IS_VALID | (is_dir ? RETRO_VFS_STAT_IS_DIRECTORY : 0) | (is_character_special ? RETRO_VFS_STAT_IS_CHARACTER_SPECIAL : 0); } #if defined(VITA) @@ -1043,27 +1044,47 @@ int retro_vfs_mkdir_impl(const char *dir) { #if defined(_WIN32) #ifdef LEGACY_WIN32 - int ret = _mkdir(dir); + int ret = _mkdir(dir); #else - wchar_t *dirW = utf8_to_utf16_string_alloc(dir); - int ret = -1; + wchar_t *dir_w = utf8_to_utf16_string_alloc(dir); + int ret = -1; - if (dirW) + if (dir_w) { - ret = _wmkdir(dirW); - free(dirW); + ret = _wmkdir(dir_w); + free(dir_w); } #endif #elif defined(IOS) int ret = mkdir(dir, 0755); -#elif defined(VITA) || defined(PSP) +#elif defined(VITA) int ret = sceIoMkdir(dir, 0777); -#elif defined(PS2) - int ret = fileXioMkdir(dir, 0777); #elif defined(ORBIS) int ret = orbisMkdir(dir, 0755); #elif defined(__QNX__) int ret = mkdir(dir, 0777); +#elif defined(GEKKO) + /* On GEKKO platforms, mkdir() fails if + * the path has a trailing slash. We must + * therefore remove it. */ + int ret = -1; + if (!string_is_empty(dir)) + { + char *dir_buf = strdup(dir); + + if (dir_buf) + { + size_t len = strlen(dir_buf); + + if (len > 0) + if (dir_buf[len - 1] == '/') + dir_buf[len - 1] = '\0'; + + ret = mkdir(dir_buf, 0750); + + free(dir_buf); + } + } #else int ret = mkdir(dir, 0750); #endif @@ -1089,16 +1110,13 @@ struct libretro_vfs_implementation_dir HANDLE directory; bool next; char path[PATH_MAX_LENGTH]; -#elif defined(VITA) || defined(PSP) +#elif defined(VITA) SceUID directory; SceIoDirent entry; -#elif defined(PS2) +#elif defined(__PSL1GHT__) || defined(__PS3__) + int error; int directory; - iox_dirent_t entry; -#elif defined(__CELLOS_LV2__) - CellFsErrno error; - int directory; - CellFsDirent entry; + sysFSDirent entry; #elif defined(ORBIS) int directory; struct dirent entry; @@ -1112,10 +1130,10 @@ static bool dirent_check_error(libretro_vfs_implementation_dir *rdir) { #if defined(_WIN32) return (rdir->directory == INVALID_HANDLE_VALUE); -#elif defined(VITA) || defined(PSP) || defined(PS2) || defined(ORBIS) +#elif defined(VITA) || defined(ORBIS) return (rdir->directory < 0); -#elif defined(__CELLOS_LV2__) - return (rdir->error != CELL_FS_SUCCEEDED); +#elif defined(__PSL1GHT__) || defined(__PS3__) + return (rdir->error != FS_SUCCEEDED); #else return !(rdir->directory); #endif @@ -1174,15 +1192,13 @@ libretro_vfs_implementation_dir *retro_vfs_opendir_impl( free(path_wide); #endif -#elif defined(VITA) || defined(PSP) +#elif defined(VITA) rdir->directory = sceIoDopen(name); -#elif defined(PS2) - rdir->directory = ps2fileXioDopen(name); #elif defined(_3DS) rdir->directory = !string_is_empty(name) ? opendir(name) : NULL; rdir->entry = NULL; -#elif defined(__CELLOS_LV2__) - rdir->error = cellFsOpendir(name, &rdir->directory); +#elif defined(__PSL1GHT__) || defined(__PS3__) + rdir->error = sysFsOpendir(name, &rdir->directory); #elif defined(ORBIS) rdir->directory = orbisDopen(name); #else @@ -1216,16 +1232,11 @@ bool retro_vfs_readdir_impl(libretro_vfs_implementation_dir *rdir) rdir->next = true; return (rdir->directory != INVALID_HANDLE_VALUE); -#elif defined(VITA) || defined(PSP) +#elif defined(VITA) return (sceIoDread(rdir->directory, &rdir->entry) > 0); -#elif defined(PS2) - iox_dirent_t record; - int ret = ps2fileXioDread(rdir->directory, &record); - rdir->entry = record; - return ( ret > 0); -#elif defined(__CELLOS_LV2__) +#elif defined(__PSL1GHT__) || defined(__PS3__) uint64_t nread; - rdir->error = cellFsReaddir(rdir->directory, &rdir->entry, &nread); + rdir->error = sysFsReaddir(rdir->directory, &rdir->entry, &nread); return (nread != 0); #elif defined(ORBIS) return (orbisDread(rdir->directory, &rdir->entry) > 0); @@ -1238,29 +1249,17 @@ const char *retro_vfs_dirent_get_name_impl(libretro_vfs_implementation_dir *rdir { #if defined(_WIN32) #if defined(LEGACY_WIN32) - { - char *name_local = local_to_utf8_string_alloc(rdir->entry.cFileName); - memset(rdir->entry.cFileName, 0, sizeof(rdir->entry.cFileName)); - strlcpy(rdir->entry.cFileName, name_local, sizeof(rdir->entry.cFileName)); - - if (name_local) - free(name_local); - } + char *name = local_to_utf8_string_alloc(rdir->entry.cFileName); #else - { - char *name = utf16_to_utf8_string_alloc(rdir->entry.cFileName); - memset(rdir->entry.cFileName, 0, sizeof(rdir->entry.cFileName)); - strlcpy((char*)rdir->entry.cFileName, name, sizeof(rdir->entry.cFileName)); - - if (name) - free(name); - } + char *name = utf16_to_utf8_string_alloc(rdir->entry.cFileName); #endif + memset(rdir->entry.cFileName, 0, sizeof(rdir->entry.cFileName)); + strlcpy((char*)rdir->entry.cFileName, name, sizeof(rdir->entry.cFileName)); + if (name) + free(name); return (char*)rdir->entry.cFileName; -#elif defined(VITA) || defined(PSP) || defined(__CELLOS_LV2__) || defined(ORBIS) +#elif defined(VITA) || defined(ORBIS) || defined(__PSL1GHT__) || defined(__PS3__) return rdir->entry.d_name; -#elif defined(PS2) - return rdir->entry.name; #else if (!rdir || !rdir->entry) return NULL; @@ -1273,21 +1272,14 @@ bool retro_vfs_dirent_is_dir_impl(libretro_vfs_implementation_dir *rdir) #if defined(_WIN32) const WIN32_FIND_DATA *entry = (const WIN32_FIND_DATA*)&rdir->entry; return entry->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY; -#elif defined(PSP) || defined(VITA) - const SceIoDirent *entry = (const SceIoDirent*)&rdir->entry; -#if defined(PSP) - return (entry->d_stat.st_attr & FIO_SO_IFDIR) == FIO_SO_IFDIR; #elif defined(VITA) + const SceIoDirent *entry = (const SceIoDirent*)&rdir->entry; return SCE_S_ISDIR(entry->d_stat.st_mode); -#endif -#elif defined(PS2) - const iox_dirent_t *entry = (const iox_dirent_t*)&rdir->entry; - return FIO_S_ISDIR(entry->stat.mode); -#elif defined(__CELLOS_LV2__) - CellFsDirent *entry = (CellFsDirent*)&rdir->entry; - return (entry->d_type == CELL_FS_TYPE_DIRECTORY); +#elif defined(__PSL1GHT__) || defined(__PS3__) + sysFSDirent *entry = (sysFSDirent*)&rdir->entry; + return (entry->d_type == FS_TYPE_DIR); #elif defined(ORBIS) - const struct dirent *entry = &rdir->entry; + const struct dirent *entry = &rdir->entry; if (entry->d_type == DT_DIR) return true; if (!(entry->d_type == DT_UNKNOWN || entry->d_type == DT_LNK)) @@ -1320,12 +1312,10 @@ int retro_vfs_closedir_impl(libretro_vfs_implementation_dir *rdir) #if defined(_WIN32) if (rdir->directory != INVALID_HANDLE_VALUE) FindClose(rdir->directory); -#elif defined(VITA) || defined(PSP) +#elif defined(VITA) sceIoDclose(rdir->directory); -#elif defined(PS2) - ps2fileXioDclose(rdir->directory); -#elif defined(__CELLOS_LV2__) - rdir->error = cellFsClosedir(rdir->directory); +#elif defined(__PSL1GHT__) || defined(__PS3__) + rdir->error = sysFsClosedir(rdir->directory); #elif defined(ORBIS) orbisDclose(rdir->directory); #else diff --git a/platform/libretro/libretro.c b/platform/libretro/libretro.c index f03d4d68..2399a3c8 100644 --- a/platform/libretro/libretro.c +++ b/platform/libretro/libretro.c @@ -15,13 +15,11 @@ #include #include #ifndef _WIN32 -#ifndef NO_MMAP #ifdef __SWITCH__ #include "switch/mman.h" #else #include #endif -#endif #else #include #include @@ -340,30 +338,6 @@ static void munmap(void *addr, size_t length) UnmapViewOfFile(addr); /* ruh-ro, we leaked handle from CreateFileMapping() ... */ } -#elif defined(NO_MMAP) -#define PROT_EXEC 0x04 -#define MAP_FAILED 0 -#define PROT_READ 0 -#define PROT_WRITE 0 -#define MAP_PRIVATE 0 -#define MAP_ANONYMOUS 0 - -void* mmap(void *desired_addr, size_t len, int mmap_prot, int mmap_flags, int fildes, size_t off) -{ - return calloc(1, len); -} - -void munmap(void *base_addr, size_t len) -{ - free(base_addr); -} - -int mprotect(void *addr, size_t len, int prot) -{ - /* stub - not really needed at this point since this codepath has no dynarecs */ - return 0; -} - #endif #ifndef MAP_ANONYMOUS From 852f27d864abdb58ffd22fd797c4792e89fd51ec Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 9 Feb 2022 22:09:23 +0000 Subject: [PATCH 0708/1110] pico, improve detection --- pico/carthw.cfg | 12 ++++++++++++ pico/carthw_cfg.c | 11 ++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/pico/carthw.cfg b/pico/carthw.cfg index f7cfcb1f..650f9404 100644 --- a/pico/carthw.cfg +++ b/pico/carthw.cfg @@ -46,6 +46,18 @@ hw = svp check_str = 0x100, "SEGA PICO" hw = pico +[Pico] +check_str = 0x100, "SEGATOYS PICO" +hw = pico + +[Pico] +check_str = 0x100, "SEGA TOYS PICO" +hw = pico + +[Pico] +check_str = 0x100, "SAMSUNG PICO" +hw = pico + [Pico] check_str = 0x100, "IMA IKUNOUJYUKU" hw = pico diff --git a/pico/carthw_cfg.c b/pico/carthw_cfg.c index 89d5e0b0..55c779e5 100644 --- a/pico/carthw_cfg.c +++ b/pico/carthw_cfg.c @@ -1,4 +1,4 @@ -/* generated by tools/make_carthw_c, do not modify */ +/* generated by ./tools/make_carthw_c, do not modify */ static const char builtin_carthw_cfg[] = "[]\n" "check_str=0x150,\"Virtua Racing\"\n" @@ -12,6 +12,15 @@ static const char builtin_carthw_cfg[] = "check_str=0x100,\"SEGA PICO\"\n" "hw=pico\n" "[]\n" + "check_str=0x100,\"SEGATOYS PICO\"\n" + "hw=pico\n" + "[]\n" + "check_str=0x100,\"SEGA TOYS PICO\"\n" + "hw=pico\n" + "[]\n" + "check_str=0x100,\"SAMSUNG PICO\"\n" + "hw=pico\n" + "[]\n" "check_str=0x100,\"IMA IKUNOUJYUKU\"\n" "hw=pico\n" "[]\n" From c1d34b8adf63f0693fb531af6fa013d6d6bcbd2c Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 9 Feb 2022 22:44:06 +0000 Subject: [PATCH 0709/1110] libchdr, switch to main repo --- .gitmodules | 2 +- pico/cd/libchdr | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index e0e639b8..53449e32 100644 --- a/.gitmodules +++ b/.gitmodules @@ -9,7 +9,7 @@ url = https://github.com/digital-sound-antiques/emu2413.git [submodule "pico/cd/libchdr"] path = pico/cd/libchdr - url = https://github.com/irixxxx/libchdr-picodrive.git + url = https://github.com/rtissera/libchdr.git [submodule "platform/common/dr_libs"] path = platform/common/dr_libs url = https://github.com/mackron/dr_libs.git diff --git a/pico/cd/libchdr b/pico/cd/libchdr index 470c476a..a03e6931 160000 --- a/pico/cd/libchdr +++ b/pico/cd/libchdr @@ -1 +1 @@ -Subproject commit 470c476a9728788a87af20526f5d84e5226b8bd2 +Subproject commit a03e69319164f69d781ab8e453f8cf407387bd13 From d4a08748fa14445f4f77f64439a0540969254bc9 Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 9 Feb 2022 23:06:10 +0000 Subject: [PATCH 0710/1110] mcd, improve cd status reporting --- pico/cd/cd_image.c | 1 + pico/cd/cdd.c | 26 ++++++++++++++++++++++++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/pico/cd/cd_image.c b/pico/cd/cd_image.c index 3f220f88..65d22ebd 100644 --- a/pico/cd/cd_image.c +++ b/pico/cd/cd_image.c @@ -263,6 +263,7 @@ int load_cd_image(const char *cd_img_name, int *type) finish: cdd.toc.last = n - 1; cdd.toc.end = lba; + tracks[n].start = cdd.toc.end; sprintf_lba(tmp_ext, sizeof(tmp_ext), cdd.toc.end); elprintf(EL_STATUS, "End CD - %s\n", tmp_ext); diff --git a/pico/cd/cdd.c b/pico/cd/cdd.c index 00e12bad..571c5a77 100644 --- a/pico/cd/cdd.c +++ b/pico/cd/cdd.c @@ -902,8 +902,20 @@ void cdd_process(void) Pico_mcd->s68k_regs[0x38+0] = cdd.status; /* unless RS1 indicated invalid track infos */ - if (Pico_mcd->s68k_regs[0x38+1] == 0x0f) + if (Pico_mcd->s68k_regs[0x38+1] == 0x0f || + Pico_mcd->s68k_regs[0x38+1] == 0x00 || + Pico_mcd->s68k_regs[0x38+1] == 0x01) { + int lba = cdd.lba + 150; + if (Pico_mcd->s68k_regs[0x38+1] == 0x01) + lba = abs(cdd.lba - cdd.toc.tracks[cdd.index].start); + if (Pico_mcd->s68k_regs[0x38+1] == 0x0f) + Pico_mcd->s68k_regs[0x38+1] = 0x00; + set_reg16(0x3a, lut_BCD_16[(lba/75)/60]); + set_reg16(0x3c, lut_BCD_16[(lba/75)%60]); + set_reg16(0x3e, lut_BCD_16[(lba%75)]); + Pico_mcd->s68k_regs[0x40+0] = cdd.index ? 0x00 : 0x04; + } else if (Pico_mcd->s68k_regs[0x38+1] == 0x02) { /* then return valid track infos, e.g current track number in RS2-RS3 (fixes Lunar - The Silver Star) */ Pico_mcd->s68k_regs[0x38+1] = 0x02; set_reg16(0x3a, (cdd.index < cdd.toc.last) ? lut_BCD_16[cdd.index + 1] : 0x0A0A); @@ -1009,6 +1021,16 @@ void cdd_process(void) break; } + case 0x06: /* Latest Error Information */ + { + set_reg16(0x38, (cdd.status << 8) | 0x06); + set_reg16(0x3a, 0x0000); + set_reg16(0x3c, 0x0000); + set_reg16(0x3e, 0x0000); + Pico_mcd->s68k_regs[0x40+0] = 0x00; + break; + } + default: { #ifdef LOG_ERROR @@ -1038,7 +1060,7 @@ void cdd_process(void) /* Wolf Team games (Anet Futatabi, Cobra Command, Road Avenger & Time Gal) need at least 6 interrupts delay */ /* Space Adventure Cobra (2nd morgue scene) needs at least 13 interrupts delay (incl. seek time, so 6 is OK) */ /* Jeopardy & ESPN Sunday Night NFL are picky about this as well: 10 interrupts delay (+ seek time) seems OK */ - cdd.latency = 10; + cdd.latency = 11; } /* CD drive seek time */ From 4fc85c80af2009eb07f0fe4ec62e474f8fecff4e Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 10 Feb 2022 22:06:47 +0000 Subject: [PATCH 0711/1110] pico, added detection by extension --- pico/cart.c | 6 +++--- pico/carthw.cfg | 21 +++++---------------- pico/carthw_cfg.c | 13 ++----------- pico/media.c | 18 +++++++++++++++++- pico/pico.h | 1 + 5 files changed, 28 insertions(+), 31 deletions(-) diff --git a/pico/cart.c b/pico/cart.c index 39d697c8..2e4f500c 100644 --- a/pico/cart.c +++ b/pico/cart.c @@ -837,7 +837,7 @@ int PicoCartInsert(unsigned char *rom, unsigned int romsize, const char *carthw_ } pdb_cleanup(); - PicoIn.AHW &= PAHW_MCD|PAHW_SMS; + PicoIn.AHW &= PAHW_MCD|PAHW_SMS|PAHW_PICO; PicoCartMemSetup = NULL; PicoDmaHook = NULL; @@ -846,9 +846,9 @@ int PicoCartInsert(unsigned char *rom, unsigned int romsize, const char *carthw_ PicoLoadStateHook = NULL; carthw_chunks = NULL; - if (!(PicoIn.AHW & (PAHW_MCD|PAHW_SMS))) + if (!(PicoIn.AHW & (PAHW_MCD|PAHW_SMS|PAHW_PICO))) PicoCartDetect(carthw_cfg); - else if (PicoIn.AHW & PAHW_SMS) + if (PicoIn.AHW & PAHW_SMS) PicoCartDetectMS(); // setup correct memory map for loaded ROM diff --git a/pico/carthw.cfg b/pico/carthw.cfg index 650f9404..b9c8db9c 100644 --- a/pico/carthw.cfg +++ b/pico/carthw.cfg @@ -42,24 +42,13 @@ check_str = 0x150, "VIRTUA RACING" check_str = 0x810, "OHMP" hw = svp -[Pico] -check_str = 0x100, "SEGA PICO" +[Soreike! Anpanman no Game de Asobou Anpanman - Pico] +check_str = 0x100, "SEGA IAC " hw = pico -[Pico] -check_str = 0x100, "SEGATOYS PICO" -hw = pico - -[Pico] -check_str = 0x100, "SEGA TOYS PICO" -hw = pico - -[Pico] -check_str = 0x100, "SAMSUNG PICO" -hw = pico - -[Pico] -check_str = 0x100, "IMA IKUNOUJYUKU" +# Unou Kaihatsu Series: IMA IKUNO[U]JYUKU +[Unou Kaihatsu Series - Pico] +check_str = 0x100, "IMA IKUNO" hw = pico # sram emulation triggers some protection for this one diff --git a/pico/carthw_cfg.c b/pico/carthw_cfg.c index 55c779e5..76de4a00 100644 --- a/pico/carthw_cfg.c +++ b/pico/carthw_cfg.c @@ -9,19 +9,10 @@ static const char builtin_carthw_cfg[] = "check_str=0x810,\"OHMP\"\n" "hw=svp\n" "[]\n" - "check_str=0x100,\"SEGA PICO\"\n" + "check_str=0x100,\"SEGA IAC \"\n" "hw=pico\n" "[]\n" - "check_str=0x100,\"SEGATOYS PICO\"\n" - "hw=pico\n" - "[]\n" - "check_str=0x100,\"SEGA TOYS PICO\"\n" - "hw=pico\n" - "[]\n" - "check_str=0x100,\"SAMSUNG PICO\"\n" - "hw=pico\n" - "[]\n" - "check_str=0x100,\"IMA IKUNOUJYUKU\"\n" + "check_str=0x100,\"IMA IKUNO\"\n" "hw=pico\n" "[]\n" "check_str=0x120,\"PUGGSY\"\n" diff --git a/pico/media.c b/pico/media.c index c944f989..ae878f92 100644 --- a/pico/media.c +++ b/pico/media.c @@ -36,6 +36,7 @@ static int detect_media(const char *fname) static const short sms_offsets[] = { 0x7ff0, 0x3ff0, 0x1ff0 }; static const char *sms_exts[] = { "sms", "gg", "sg" }; static const char *md_exts[] = { "gen", "smd" }; + static const char *pico_exts[] = { "pco" }; char buff0[512], buff[32]; unsigned short *d16; pm_file *pmf; @@ -78,8 +79,12 @@ static int detect_media(const char *fname) goto extension_check; } - /* MD header? Act as TMSS BIOS here */ if (pm_seek(pmf, 0x100, SEEK_SET) == 0x100 && pm_read(buff, 16, pmf) == 16) { + /* PICO header? Almost always appropriately marked */ + buff[16] = '\0'; + if (strstr(buff, " PICO ")) + goto looks_like_pico; + /* MD header? Act as TMSS BIOS here */ if (strncmp(buff, "SEGA", 4) == 0 || strncmp(buff, " SEG", 4) == 0) goto looks_like_md; } @@ -105,6 +110,10 @@ extension_check: if (strcasecmp(pmf->ext, sms_exts[i]) == 0) goto looks_like_sms; + for (i = 0; i < ARRAY_SIZE(pico_exts); i++) + if (strcasecmp(pmf->ext, pico_exts[i]) == 0) + goto looks_like_pico; + /* If everything else fails, make a guess on the reset vector */ d16 = (unsigned short *)(buff0 + 4); if ((((d16[0] << 16) | d16[1]) & 0xffffff) >= pmf->size) { @@ -124,6 +133,10 @@ looks_like_md: looks_like_sms: pm_close(pmf); return PM_MARK3; + +looks_like_pico: + pm_close(pmf); + return PM_PICO; } /* checks if fname points to valid MegaCD image */ @@ -250,6 +263,9 @@ enum media_type_e PicoLoadMedia(const char *filename, else if (media_type == PM_MARK3) { PicoIn.AHW = PAHW_SMS; } + else if (media_type == PM_PICO) { + PicoIn.AHW = PAHW_PICO; + } rom = pm_open(rom_fname); if (rom == NULL) { diff --git a/pico/pico.h b/pico/pico.h index 807ec492..c7e4aa2a 100644 --- a/pico/pico.h +++ b/pico/pico.h @@ -264,6 +264,7 @@ enum media_type_e { PM_BAD_CD_NO_BIOS = -4, PM_MD_CART = 1, /* also 32x */ PM_MARK3, + PM_PICO, PM_CD, }; From 3b68e5107d24d6b492db57a3976567297928ca3f Mon Sep 17 00:00:00 2001 From: kub Date: Mon, 14 Feb 2022 20:55:35 +0000 Subject: [PATCH 0712/1110] core vdp, fix obscure VInt bug, some more optimisation --- pico/pico_cmn.c | 4 +--- pico/videoport.c | 19 ++++++++++--------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/pico/pico_cmn.c b/pico/pico_cmn.c index 6866fdac..ab581ed4 100644 --- a/pico/pico_cmn.c +++ b/pico/pico_cmn.c @@ -74,7 +74,7 @@ static void do_hint(struct PicoVideo *pv) static void do_timing_hacks_end(struct PicoVideo *pv) { - PicoVideoFIFOSync(488); + PicoVideoFIFOSync(CYCLES_M68K_LINE); } static void do_timing_hacks_start(struct PicoVideo *pv) @@ -185,7 +185,6 @@ static int PicoFrameHints(void) // also delay between F bit (bit 7) is set in SR and IRQ happens (Ex-Mutants) // also delay between last H-int and V-int (Golden Axe 3) Pico.t.m68c_line_start = Pico.t.m68c_aim; - PicoVideoFIFOMode(pv->reg[1]&0x40, pv->reg[12]&1); do_timing_hacks_start(pv); CPUS_RUN(CYCLES_M68K_VINT_LAG); @@ -289,7 +288,6 @@ static int PicoFrameHints(void) // Run scanline: Pico.t.m68c_line_start = Pico.t.m68c_aim; - PicoVideoFIFOMode(pv->reg[1]&0x40, pv->reg[12]&1); do_timing_hacks_start(pv); CPUS_RUN(CYCLES_M68K_LINE); do_timing_hacks_end(pv); diff --git a/pico/videoport.c b/pico/videoport.c index 4f9cbf10..e9a68097 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -201,7 +201,7 @@ enum { FQ_BYTE = 1, FQ_BGDMA = 2, FQ_FGDMA = 4 }; // queue flags, NB: BYTE = 1! #define Sl2Cyc(vf,sl) (vf->fifo_sl2cyc[sl]*clkdiv) // do the FIFO math -static NOINLINE int AdvanceFIFOEntry(struct VdpFIFO *vf, struct PicoVideo *pv, int slots) +static int AdvanceFIFOEntry(struct VdpFIFO *vf, struct PicoVideo *pv, int slots) { u32 *qx = &vf->fifo_queue[vf->fifo_qx]; int l = slots, b = *qx & FQ_BYTE; @@ -320,7 +320,6 @@ static int PicoVideoFIFORead(void) int burn = 0; if (vf->fifo_ql) { - PicoVideoFIFOSync(lc); // advance FIFO and CPU until FIFO is empty burn = PicoVideoFIFODrain(0, lc, FQ_BGDMA); lc += burn; @@ -343,15 +342,16 @@ int PicoVideoFIFOWrite(int count, int flags, unsigned sr_mask,unsigned sr_flags) struct VdpFIFO *vf = &VdpFIFO; struct PicoVideo *pv = &Pico.video; int lc = SekCyclesDone()-Pico.t.m68c_line_start; - int burn = 0; + int burn = 0, x; - if (vf->fifo_total >= 4 || (pv->status & SR_DMA)) + // sync only needed if queue is too full or background dma might be deferred + if (vf->fifo_ql >= 6 || (pv->status & SR_DMA)) PicoVideoFIFOSync(lc); pv->status = (pv->status & ~sr_mask) | sr_flags; if (count && vf->fifo_ql < 7) { // determine queue position for entry - int x = (vf->fifo_qx + vf->fifo_ql - 1) & 7; + x = (vf->fifo_qx + vf->fifo_ql - 1) & 7; if (unlikely(vf->fifo_queue[x] & FQ_BGDMA)) { // CPU FIFO writes have priority over a background DMA Fill/Copy vf->fifo_queue[(x+1) & 7] = vf->fifo_queue[x]; // push bg DMA back @@ -383,7 +383,9 @@ int PicoVideoFIFOWrite(int count, int flags, unsigned sr_mask,unsigned sr_flags) } // if CPU is waiting for the bus, advance CPU and FIFO until bus is free - if (vf->fifo_total > 4 && (pv->status & PVS_CPUWR)) + // do this only if it would exhaust the available slots since last sync + x = (Cyc2Sl(vf,lc) - vf->fifo_slot) / 2; // lower bound of FIFO ents + if (vf->fifo_total > 4 + x && (pv->status & PVS_CPUWR)) burn = PicoVideoFIFODrain(4, lc, 0); return burn; @@ -401,10 +403,9 @@ int PicoVideoFIFOHint(void) vf->fifo_slot = 0; // if CPU is waiting for the bus, advance CPU and FIFO until bus is free - if (pv->status & PVS_CPUWR) { - PicoVideoFIFOSync(lc); + if (pv->status & PVS_CPUWR) burn = PicoVideoFIFODrain(4, lc, 0); - } else if (pv->status & PVS_CPURD) + else if (pv->status & PVS_CPURD) burn = PicoVideoFIFORead(); return burn; From 099f68c22ad58c9c3a60ba16c4065053273ae337 Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 15 Feb 2022 22:15:12 +0000 Subject: [PATCH 0713/1110] core vdp, fix regression after last commit --- pico/pico_cmn.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pico/pico_cmn.c b/pico/pico_cmn.c index ab581ed4..2bfd1a99 100644 --- a/pico/pico_cmn.c +++ b/pico/pico_cmn.c @@ -185,6 +185,7 @@ static int PicoFrameHints(void) // also delay between F bit (bit 7) is set in SR and IRQ happens (Ex-Mutants) // also delay between last H-int and V-int (Golden Axe 3) Pico.t.m68c_line_start = Pico.t.m68c_aim; + PicoVideoFIFOMode(pv->reg[1]&0x40, pv->reg[12]&1); do_timing_hacks_start(pv); CPUS_RUN(CYCLES_M68K_VINT_LAG); @@ -288,6 +289,7 @@ static int PicoFrameHints(void) // Run scanline: Pico.t.m68c_line_start = Pico.t.m68c_aim; + PicoVideoFIFOMode(pv->reg[1]&0x40, pv->reg[12]&1); do_timing_hacks_start(pv); CPUS_RUN(CYCLES_M68K_LINE); do_timing_hacks_end(pv); From b633247f35f9ad30d64f51f6ea4e2ba8ab1f1d52 Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 16 Feb 2022 22:04:10 +0000 Subject: [PATCH 0714/1110] mcd, accelerate scale/rot rendering code --- pico/cd/gfx.c | 285 ++++++++++++++++++++++++++++---------------------- 1 file changed, 158 insertions(+), 127 deletions(-) diff --git a/pico/cd/gfx.c b/pico/cd/gfx.c index 0a231b7a..979e3adb 100644 --- a/pico/cd/gfx.c +++ b/pico/cd/gfx.c @@ -52,7 +52,8 @@ typedef struct uint32 y_step; /* pico: render line step */ uint8 lut_prio[4][0x10][0x10]; /* WORD-RAM data writes priority lookup table */ uint8 lut_pixel[0x200]; /* Graphics operation dot offset lookup table */ - uint8 lut_cell[0x100]; /* Graphics operation stamp offset lookup table */ + uint16 lut_cell2[0x80]; /* Graphics operation stamp offset lookup table */ + uint16 lut_cell4[0x80]; /* Graphics operation stamp offset lookup table */ } gfx_t; static gfx_t gfx; @@ -66,7 +67,7 @@ static void gfx_schedule(void); void gfx_init(void) { int i, j; - uint8 mask, row, col, temp; + uint8 row, col, temp; memset(&gfx, 0, sizeof(gfx)); @@ -87,24 +88,23 @@ void gfx_init(void) } /* Initialize cell lookup table */ - /* table entry = yyxxshrr (8 bits) */ + /* table entry = yyxxhrr (7 bits) */ /* with: yy = cell row (0-3) */ /* xx = cell column (0-3) */ - /* s = stamp size (0=16x16, 1=32x32) */ /* hrr = HFLIP & ROTATION bits */ - for (i=0; i<0x100; i++) + for (i=0; i<0x80; i++) { /* one stamp = 2x2 cells (16x16) or 4x4 cells (32x32) */ - mask = (i & 8) ? 3 : 1; - row = (i >> 6) & mask; - col = (i >> 4) & mask; + row = (i >> 5) & 3; + col = (i >> 3) & 3; - if (i & 4) { col = col ^ mask; } /* HFLIP (always first) */ - if (i & 2) { col = col ^ mask; row = row ^ mask; } /* ROLL1 */ - if (i & 1) { temp = col; col = row ^ mask; row = temp; } /* ROLL0 */ + if (i & 4) { col = col ^ 3; } /* HFLIP (always first) */ + if (i & 2) { col = col ^ 3; row = row ^ 3; } /* ROLL1 */ + if (i & 1) { temp = col; col = row ^ 3; row = temp; } /* ROLL0 */ /* cell offset (0-3 or 0-15) */ - gfx.lut_cell[i] = row + col * (mask + 1); + gfx.lut_cell2[i] = ((row&1) + (col&1) * 2) << 6; + gfx.lut_cell4[i] = ((row&3) + (col&3) * 4) << 6; } /* Initialize pixel lookup table */ @@ -175,12 +175,139 @@ int gfx_context_load(const uint8 *state) return bufferptr; } +static inline int gfx_pixel(uint32 xpos, uint32 ypos, uint16 *lut_cell) +{ + uint16 stamp_data; + uint32 stamp_index; + uint8 pixel_out = 0x00; + + /* check if pixel is outside stamp map */ + if (((xpos | ypos) & ~gfx.dotMask) == 0) + { + /* read stamp map table data */ + stamp_data = gfx.mapPtr[(xpos >> gfx.stampShift) | ((ypos >> gfx.stampShift) << gfx.mapShift)]; + + /* stamp generator base index */ + /* sss ssssssss ccyyyxxx (16x16) or sss sssssscc ccyyyxxx (32x32) */ + /* with: s = stamp number (1 stamp = 16x16 or 32x32 pixels) */ + /* c = cell offset (0-3 for 16x16, 0-15 for 32x32) */ + /* yyy = line offset (0-7) */ + /* xxx = pixel offset (0-7) */ + stamp_index = (stamp_data & 0x7ff) << 8; + + if (stamp_index) + { + /* extract HFLIP & ROTATION bits */ + stamp_data = (stamp_data >> 13) & 7; + + /* cell offset (0-3 or 0-15) */ + /* table entry = yyxxhrr (7 bits) */ + /* with: yy = cell row (0-3) = (ypos >> (11 + 3)) & 3 */ + /* xx = cell column (0-3) = (xpos >> (11 + 3)) & 3 */ + /* hrr = HFLIP & ROTATION bits */ + stamp_index |= lut_cell[stamp_data | ((ypos >> 9) & 0x60) | ((xpos >> 11) & 0x18)]; + + /* pixel offset (0-63) */ + /* table entry = yyyxxxhrr (9 bits) */ + /* with: yyy = pixel row (0-7) = (ypos >> 11) & 7 */ + /* xxx = pixel column (0-7) = (xpos >> 11) & 7 */ + /* hrr = HFLIP & ROTATION bits */ + stamp_index |= gfx.lut_pixel[stamp_data | ((xpos >> 8) & 0x38) | ((ypos >> 5) & 0x1c0)]; + + /* read pixel pair (2 pixels/byte) */ + pixel_out = READ_BYTE(Pico_mcd->word_ram2M, stamp_index >> 1); + + /* extract left or rigth pixel */ + if (!(stamp_index & 1)) + { + pixel_out >>= 4; + } + pixel_out &= 0x0f; + } + } + + return pixel_out; +} + +#define RENDER_LOOP(N, UPDP, COND1, COND2) do { \ + if (bufferIndex & 1) { \ + bufferIndex &= ~1; \ + goto right##N; /* no initial left pixel */ \ + } \ + /* process all dots */ \ + while (width--) \ + { \ + /* left pixel */ \ + xpos &= mask; \ + ypos &= mask; \ + \ + if (COND1) { \ + pixel_out = gfx_pixel(xpos, ypos, lut_cell); \ + UPDP; \ + } \ + \ + if (COND2) { \ + /* read out paired pixel data */ \ + pixel_in = READ_BYTE(Pico_mcd->word_ram2M, bufferIndex >> 1); \ + \ + /* priority mode write */ \ + pixel_in = (lut_prio[(pixel_in & 0xf0) >> 4][pixel_out] << 4) | \ + (pixel_in & 0x0f); \ + \ + /* write data to image buffer */ \ + WRITE_BYTE(Pico_mcd->word_ram2M, bufferIndex >> 1, pixel_in); \ + } \ + \ + /* increment pixel position */ \ + xpos += xoffset; \ + ypos += yoffset; \ + \ +right##N: \ + if (width-- == 0) break; \ + \ + /* right pixel */ \ + xpos &= mask; \ + ypos &= mask; \ + \ + if (COND1) { \ + pixel_out = gfx_pixel(xpos, ypos, lut_cell); \ + UPDP; \ + } \ + \ + if (COND2) { \ + /* read out paired pixel data */ \ + pixel_in = READ_BYTE(Pico_mcd->word_ram2M, bufferIndex >> 1); \ + \ + /* priority mode write */ \ + pixel_in = (lut_prio[pixel_in & 0x0f][pixel_out]) | \ + (pixel_in & 0xf0); \ + \ + /* write data to image buffer */ \ + WRITE_BYTE(Pico_mcd->word_ram2M, bufferIndex >> 1, pixel_in); \ + } \ + \ + /* increment pixel position */ \ + xpos += xoffset; \ + ypos += yoffset; \ + \ + /* next pixel */ \ + bufferIndex += 2; \ + /* check current pixel position */ \ + if ((bufferIndex & 7) == 0) \ + { \ + /* next cell: increment buffer offset by one column (minus 8 pixels) */ \ + bufferIndex += gfx.bufferOffset-1; \ + } \ + } \ +} while (0) + static void gfx_render(uint32 bufferIndex, uint32 width) { uint8 pixel_in, pixel_out; - uint16 stamp_data; - uint32 stamp_index; uint32 priority; + uint8 (*lut_prio)[0x10]; + uint16 *lut_cell; + uint32 mask; /* pixel map start position for current line (13.3 format converted to 13.11) */ uint32 xpos = *gfx.tracePtr++ << 8; @@ -192,122 +319,26 @@ static void gfx_render(uint32 bufferIndex, uint32 width) priority = (Pico_mcd->s68k_regs[2] << 8) | Pico_mcd->s68k_regs[3]; priority = (priority >> 3) & 0x03; + lut_prio = gfx.lut_prio[priority]; - /* process all dots */ - while (width--) + lut_cell = (Pico_mcd->s68k_regs[0x58+1] & 0x02) ? gfx.lut_cell4 : gfx.lut_cell2; + + /* check if stamp map is repeated */ + mask = 0xffffff; /* 24-bit range */ + if (Pico_mcd->s68k_regs[0x58+1] & 0x01) { - /* check if stamp map is repeated */ - if (Pico_mcd->s68k_regs[0x58+1] & 0x01) - { - /* stamp map range */ - xpos &= gfx.dotMask; - ypos &= gfx.dotMask; - } - else - { - /* 24-bit range */ - xpos &= 0xffffff; - ypos &= 0xffffff; - } + /* stamp map range */ + mask = gfx.dotMask; + } - /* check if pixel is outside stamp map */ - if ((xpos | ypos) & ~gfx.dotMask) - { - /* force pixel output to 0 */ - pixel_out = 0x00; - } - else - { - /* read stamp map table data */ - stamp_data = gfx.mapPtr[(xpos >> gfx.stampShift) | ((ypos >> gfx.stampShift) << gfx.mapShift)]; - - /* stamp generator base index */ - /* sss ssssssss ccyyyxxx (16x16) or sss sssssscc ccyyyxxx (32x32) */ - /* with: s = stamp number (1 stamp = 16x16 or 32x32 pixels) */ - /* c = cell offset (0-3 for 16x16, 0-15 for 32x32) */ - /* yyy = line offset (0-7) */ - /* xxx = pixel offset (0-7) */ - stamp_index = (stamp_data & 0x7ff) << 8; - - if (stamp_index) - { - /* extract HFLIP & ROTATION bits */ - stamp_data = (stamp_data >> 13) & 7; - - /* cell offset (0-3 or 0-15) */ - /* table entry = yyxxshrr (8 bits) */ - /* with: yy = cell row (0-3) = (ypos >> (11 + 3)) & 3 */ - /* xx = cell column (0-3) = (xpos >> (11 + 3)) & 3 */ - /* s = stamp size (0=16x16, 1=32x32) */ - /* hrr = HFLIP & ROTATION bits */ - stamp_index |= gfx.lut_cell[ - stamp_data | ((Pico_mcd->s68k_regs[0x58+1] & 0x02) << 2 ) - | ((ypos >> 8) & 0xc0) | ((xpos >> 10) & 0x30)] << 6; - - /* pixel offset (0-63) */ - /* table entry = yyyxxxhrr (9 bits) */ - /* with: yyy = pixel row (0-7) = (ypos >> 11) & 7 */ - /* xxx = pixel column (0-7) = (xpos >> 11) & 7 */ - /* hrr = HFLIP & ROTATION bits */ - stamp_index |= gfx.lut_pixel[stamp_data | ((xpos >> 8) & 0x38) | ((ypos >> 5) & 0x1c0)]; - - /* read pixel pair (2 pixels/byte) */ - pixel_out = READ_BYTE(Pico_mcd->word_ram2M, stamp_index >> 1); - - /* extract left or rigth pixel */ - if (stamp_index & 1) - { - pixel_out &= 0x0f; - } - else - { - pixel_out >>= 4; - } - } - else - { - /* stamp 0 is not used: force pixel output to 0 */ - pixel_out = 0x00; - } - } - - /* read out paired pixel data */ - pixel_in = READ_BYTE(Pico_mcd->word_ram2M, bufferIndex >> 1); - - /* update left or rigth pixel */ - if (bufferIndex & 1) - { - /* priority mode write */ - pixel_out = gfx.lut_prio[priority][pixel_in & 0x0f][pixel_out]; - - pixel_out |= (pixel_in & 0xf0); - } - else - { - /* priority mode write */ - pixel_out = gfx.lut_prio[priority][pixel_in >> 4][pixel_out]; - - pixel_out = (pixel_out << 4) | (pixel_in & 0x0f); - } - - /* write data to image buffer */ - WRITE_BYTE(Pico_mcd->word_ram2M, bufferIndex >> 1, pixel_out); - - /* check current pixel position */ - if ((bufferIndex & 7) != 7) - { - /* next pixel */ - bufferIndex++; - } - else - { - /* next cell: increment image buffer offset by one column (minus 7 pixels) */ - bufferIndex += gfx.bufferOffset; - } - - /* increment pixel position */ - xpos += xoffset; - ypos += yoffset; + pixel_out = 0; + if (xoffset+(1U<<10) <= 1U<<11 && yoffset+(1U<<10) <= 1U<<11) { + /* upscaling >= 2x, test for duplicate pixels to avoid recalculation */ + uint32 oldx, oldy; + oldx = oldy = ~xpos; + RENDER_LOOP(1, oldx = xpos;oldy = ypos, (oldx^xpos ^ oldy^ypos) >> 11, (!priority) | pixel_out); + } else { + RENDER_LOOP(3, , 1, (!priority) | pixel_out); } } From 7ce6a6d18ed56439bac005434c56586aa7182714 Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 17 Feb 2022 19:57:37 +0000 Subject: [PATCH 0715/1110] mcd, accelerate scale/rot rendering code --- pico/cd/gfx.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/pico/cd/gfx.c b/pico/cd/gfx.c index 979e3adb..2d30c15f 100644 --- a/pico/cd/gfx.c +++ b/pico/cd/gfx.c @@ -88,7 +88,7 @@ void gfx_init(void) } /* Initialize cell lookup table */ - /* table entry = yyxxhrr (7 bits) */ + /* table entry = yyxxhrr (7 bits) */ /* with: yy = cell row (0-3) */ /* xx = cell column (0-3) */ /* hrr = HFLIP & ROTATION bits */ @@ -201,7 +201,7 @@ static inline int gfx_pixel(uint32 xpos, uint32 ypos, uint16 *lut_cell) stamp_data = (stamp_data >> 13) & 7; /* cell offset (0-3 or 0-15) */ - /* table entry = yyxxhrr (7 bits) */ + /* table entry = yyxxhrr (7 bits) */ /* with: yy = cell row (0-3) = (ypos >> (11 + 3)) & 3 */ /* xx = cell column (0-3) = (xpos >> (11 + 3)) & 3 */ /* hrr = HFLIP & ROTATION bits */ @@ -212,16 +212,13 @@ static inline int gfx_pixel(uint32 xpos, uint32 ypos, uint16 *lut_cell) /* with: yyy = pixel row (0-7) = (ypos >> 11) & 7 */ /* xxx = pixel column (0-7) = (xpos >> 11) & 7 */ /* hrr = HFLIP & ROTATION bits */ - stamp_index |= gfx.lut_pixel[stamp_data | ((xpos >> 8) & 0x38) | ((ypos >> 5) & 0x1c0)]; + stamp_index |= gfx.lut_pixel[stamp_data | ((ypos >> 5) & 0x1c0) | ((xpos >> 8) & 0x38)]; /* read pixel pair (2 pixels/byte) */ pixel_out = READ_BYTE(Pico_mcd->word_ram2M, stamp_index >> 1); - /* extract left or rigth pixel */ - if (!(stamp_index & 1)) - { - pixel_out >>= 4; - } + /* extract left or right pixel */ + pixel_out >>= 4 * !(stamp_index & 1); pixel_out &= 0x0f; } } @@ -231,7 +228,7 @@ static inline int gfx_pixel(uint32 xpos, uint32 ypos, uint16 *lut_cell) #define RENDER_LOOP(N, UPDP, COND1, COND2) do { \ if (bufferIndex & 1) { \ - bufferIndex &= ~1; \ + bufferIndex ^= 1; \ goto right##N; /* no initial left pixel */ \ } \ /* process all dots */ \ @@ -424,10 +421,9 @@ static void gfx_schedule(void) h = (Pico_mcd->s68k_regs[0x64] << 8) | Pico_mcd->s68k_regs[0x65]; cycles = 5 * w * h; + y_step = h; if (cycles > UPDATE_CYCLES) y_step = (UPDATE_CYCLES + 5 * w - 1) / (5 * w); - else - y_step = h; gfx.y_step = y_step; pcd_event_schedule_s68k(PCD_EVENT_GFX, 5 * w * y_step); From 4f6d3b2847650d689dd727ae2f1d3d3a5f5f1546 Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 19 Feb 2022 21:23:54 +0000 Subject: [PATCH 0716/1110] core vdp, optimize fifo writes --- pico/videoport.c | 59 ++++++++++++++++++++---------------------------- 1 file changed, 24 insertions(+), 35 deletions(-) diff --git a/pico/videoport.c b/pico/videoport.c index e9a68097..6c99fcf8 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -237,7 +237,7 @@ static void SetFIFOState(struct VdpFIFO *vf, struct PicoVideo *pv) } } if (vf->fifo_ql == 0) { - st &= ~(PVS_CPURD|PVS_FIFORUN); + st &= ~PVS_CPURD; // terminate DMA if applicable if (!(st & PVS_DMAFILL)) { st &= ~(SR_DMA|PVS_DMABG); @@ -345,47 +345,36 @@ int PicoVideoFIFOWrite(int count, int flags, unsigned sr_mask,unsigned sr_flags) int burn = 0, x; // sync only needed if queue is too full or background dma might be deferred - if (vf->fifo_ql >= 6 || (pv->status & SR_DMA)) + if ((vf->fifo_ql >= 6) | (pv->status & SR_DMA)) PicoVideoFIFOSync(lc); pv->status = (pv->status & ~sr_mask) | sr_flags; - if (count && vf->fifo_ql < 7) { - // determine queue position for entry - x = (vf->fifo_qx + vf->fifo_ql - 1) & 7; - if (unlikely(vf->fifo_queue[x] & FQ_BGDMA)) { - // CPU FIFO writes have priority over a background DMA Fill/Copy - vf->fifo_queue[(x+1) & 7] = vf->fifo_queue[x]; // push bg DMA back - x = (x-1) & 7; - if (vf->fifo_ql == 1) { - // XXX if interrupting a DMA fill, fill data changes - pv->status &= ~PVS_FIFORUN; - } - } + x = (vf->fifo_qx + vf->fifo_ql - 1) & 7; + if (unlikely(vf->fifo_queue[x] & FQ_BGDMA)) + x = (x-1) & 7; // ignore bg dma ent (pushed back below if new ent created) - if (!(flags & FQ_BGDMA)) - vf->fifo_total += count; + // determine queue position for entry + if (!(flags & FQ_BGDMA)) + vf->fifo_total += count; + if (!vf->fifo_ql) + vf->fifo_slot = Cyc2Sl(vf, lc+7); // FIFO latency ~3 vdp slots - count <<= (flags & FQ_BYTE); - if ((pv->status & PVS_FIFORUN) && (vf->fifo_queue[x] & 7) == flags) { - // amalgamate entries if of same type - vf->fifo_queue[x] += (count << 3); - } else { - // create new xfer queue entry - vf->fifo_ql ++; - x = (x+1) & 7; - vf->fifo_queue[x] = (count << 3) | flags; - } - - // update FIFO state if it was empty - if (!(pv->status & PVS_FIFORUN)) - vf->fifo_slot = Cyc2Sl(vf, lc+7); // FIFO latency ~3 vdp slots - pv->status |= PVS_FIFORUN; + count <<= (flags & FQ_BYTE)+3; + if (vf->fifo_queue[x] && (vf->fifo_queue[x] & 7) == flags) { + // amalgamate entries if of same type and not empty (in case of bgdma) + vf->fifo_queue[x] += count; + } else { + // create new xfer queue entry + vf->fifo_ql ++; + x = (x+1) & 7; + vf->fifo_queue[(x+1)&7] = vf->fifo_queue[x]; // push back bg dma if exists + vf->fifo_queue[x] = count | flags; } // if CPU is waiting for the bus, advance CPU and FIFO until bus is free // do this only if it would exhaust the available slots since last sync x = (Cyc2Sl(vf,lc) - vf->fifo_slot) / 2; // lower bound of FIFO ents - if (vf->fifo_total > 4 + x && (pv->status & PVS_CPUWR)) + if ((pv->status & PVS_CPUWR) && vf->fifo_total > 4 + x) burn = PicoVideoFIFODrain(4, lc, 0); return burn; @@ -763,7 +752,7 @@ static NOINLINE void CommandDma(void) elprintf(EL_VDPDMA, "Dma overlap, left=%d @ %06x", VdpFIFO.fifo_total, SekPc); VdpFIFO.fifo_total = VdpFIFO.fifo_ql = 0; - pvid->status &= ~(PVS_FIFORUN|PVS_DMAFILL); + pvid->status &= ~PVS_DMAFILL; } len = GetDmaLength(); @@ -1157,7 +1146,7 @@ void PicoVideoLoad(void) vf->fifo_ql = vf->fifo_qx = vf->fifo_total = 0; if (pv->fifo_cnt) { int wc = pv->fifo_cnt; - pv->status |= PVS_FIFORUN|PVS_CPUWR; + pv->status |= PVS_CPUWR; vf->fifo_total = (wc+b) >> b; vf->fifo_queue[vf->fifo_qx + vf->fifo_ql] = (wc << 3) | b | FQ_FGDMA; vf->fifo_ql ++; @@ -1165,7 +1154,7 @@ void PicoVideoLoad(void) if (pv->fifo_bgcnt) { int wc = pv->fifo_bgcnt; if (!vf->fifo_ql) - pv->status |= PVS_FIFORUN|PVS_DMABG; + pv->status |= PVS_DMABG; vf->fifo_queue[vf->fifo_qx + vf->fifo_ql] = (wc << 3) | FQ_BGDMA; vf->fifo_ql ++; } From bd9cbae4116e9b17a9dcc290da8721863631f2fc Mon Sep 17 00:00:00 2001 From: kub Date: Sun, 20 Feb 2022 22:38:12 +0000 Subject: [PATCH 0717/1110] build, fix cyclone build (replace CFLAGS in environment) --- platform/common/common.mak | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/platform/common/common.mak b/platform/common/common.mak index 0aabeab6..183a7159 100644 --- a/platform/common/common.mak +++ b/platform/common/common.mak @@ -200,7 +200,8 @@ $(FR)cpu/cyclone/Cyclone.h: $(FR)cpu/cyclone/Cyclone.s: $(FR)cpu/$(CYCLONE_CONFIG) @echo building Cyclone... - @make CC=$(CYCLONE_CC) CXX=$(CYCLONE_CXX) -C $(R)cpu/cyclone/ CONFIG_FILE=../$(CYCLONE_CONFIG) HAVE_ARMv6=$(HAVE_ARMv6) + @export CC=$(CYCLONE_CC) CXX=$(CYCLONE_CXX) CFLAGS=-O2; \ + make -C $(R)cpu/cyclone/ CONFIG_FILE=../$(CYCLONE_CONFIG) HAVE_ARMv6=$(HAVE_ARMv6) $(FR)cpu/cyclone/Cyclone.s: $(FR)cpu/cyclone/*.cpp $(FR)cpu/cyclone/*.h From 74bd70403b7da8c35edf70113f3de06bcb38d72c Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 22 Feb 2022 20:40:34 +0000 Subject: [PATCH 0718/1110] platform sdl, add config file option for fullscreen --- platform/common/main.c | 4 ++-- platform/common/menu_pico.c | 1 + platform/common/menu_pico.h | 1 + platform/common/plat_sdl.c | 5 ++--- platform/gp2x/plat.c | 2 +- 5 files changed, 7 insertions(+), 6 deletions(-) diff --git a/platform/common/main.c b/platform/common/main.c index 3c1998cd..571b8884 100644 --- a/platform/common/main.c +++ b/platform/common/main.c @@ -84,12 +84,12 @@ int main(int argc, char *argv[]) //in_probe(); plat_target_init(); - plat_init(); - menu_init(); emu_prep_defconfig(); // depends on input emu_read_config(NULL, 0); + plat_init(); + menu_init(); emu_init(); engineState = PGS_Menu; diff --git a/platform/common/menu_pico.c b/platform/common/menu_pico.c index 812ea842..8ea84328 100644 --- a/platform/common/menu_pico.c +++ b/platform/common/menu_pico.c @@ -1316,6 +1316,7 @@ static menu_entry e_menu_hidden[] = { mee_onoff("Accurate sprites", MA_OPT_ACC_SPRITES, PicoIn.opt, POPT_ACC_SPRITES), mee_onoff("autoload savestates", MA_OPT_AUTOLOAD_SAVE, g_autostateld_opt, 1), + mee_onoff("SDL fullscreen mode", MA_OPT_VOUT_FULL, plat_target.vout_fullscreen, 1), mee_end, }; diff --git a/platform/common/menu_pico.h b/platform/common/menu_pico.h index fb1c67d9..dc9d6e99 100644 --- a/platform/common/menu_pico.h +++ b/platform/common/menu_pico.h @@ -40,6 +40,7 @@ typedef enum MA_OPT_INTERLACED, /* giz */ MA_OPT_TEARING_FIX, /* wiz */ MA_OPT_VOUT_MODE, + MA_OPT_VOUT_FULL, MA_OPT_AUTOLOAD_SAVE, MA_OPT_SOUND_FILTER, MA_OPT_SOUND_ALPHA, diff --git a/platform/common/plat_sdl.c b/platform/common/plat_sdl.c index 7198136c..d66a2ace 100644 --- a/platform/common/plat_sdl.c +++ b/platform/common/plat_sdl.c @@ -128,8 +128,6 @@ static int clear_buf_cnt, clear_stat_cnt; void plat_video_set_size(int w, int h) { if (area.w != w || area.h != h) { - area = (struct area) { w, h }; - if (plat_sdl_change_video_mode(w, h, 0) < 0) { // failed, revert to original resolution plat_sdl_change_video_mode(g_screen_width, g_screen_height, 0); @@ -141,6 +139,7 @@ void plat_video_set_size(int w, int h) g_screen_ppitch = w; g_screen_ptr = plat_sdl_screen->pixels; } + area = (struct area) { w, h }; } } @@ -193,7 +192,7 @@ void plat_video_clear_status(void) void plat_video_clear_buffers(void) { if (plat_sdl_overlay != NULL || plat_sdl_gl_active) - memset(shadow_fb, 0, plat_sdl_screen->w*plat_sdl_screen->h * 2); + memset(shadow_fb, 0, g_menuscreen_w * g_menuscreen_h * 2); else { memset(g_screen_ptr, 0, plat_sdl_screen->w*plat_sdl_screen->h * 2); clear_buf_cnt = 3; // do it thrice in case of triple buffering diff --git a/platform/gp2x/plat.c b/platform/gp2x/plat.c index 4fcf76f4..b25ce850 100644 --- a/platform/gp2x/plat.c +++ b/platform/gp2x/plat.c @@ -16,7 +16,7 @@ #include "warm.h" #include "plat.h" -#include +#include /* GP2X local */ int gp2x_current_bpp; From 8df4ddba1e4d25995ee493df92d4d3092dfdad2e Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 22 Feb 2022 22:26:32 +0000 Subject: [PATCH 0719/1110] platform sdl, improved menu keymap for international keyboards --- platform/common/inputmap_kbd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/platform/common/inputmap_kbd.c b/platform/common/inputmap_kbd.c index d1f8f03b..bf099036 100644 --- a/platform/common/inputmap_kbd.c +++ b/platform/common/inputmap_kbd.c @@ -40,10 +40,10 @@ const struct menu_keymap in_sdl_key_map[] = { { SDLK_RIGHT, PBTN_RIGHT }, { SDLK_RETURN, PBTN_MOK }, { SDLK_ESCAPE, PBTN_MBACK }, - { SDLK_SEMICOLON, PBTN_MA2 }, - { SDLK_QUOTE, PBTN_MA3 }, - { SDLK_LEFTBRACKET, PBTN_L }, - { SDLK_RIGHTBRACKET, PBTN_R }, + { SDLK_COMMA, PBTN_MA2 }, + { SDLK_PERIOD, PBTN_MA3 }, + { SDLK_o, PBTN_L }, + { SDLK_p, PBTN_R }, }; const int in_sdl_key_map_sz = sizeof(in_sdl_key_map) / sizeof(in_sdl_key_map[0]); From da64996b0ddde25852c7f95a04ed79f49cf59d01 Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 22 Feb 2022 22:31:01 +0000 Subject: [PATCH 0720/1110] update libpicofe --- platform/libpicofe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/platform/libpicofe b/platform/libpicofe index 25cfdf0a..81b1aa54 160000 --- a/platform/libpicofe +++ b/platform/libpicofe @@ -1 +1 @@ -Subproject commit 25cfdf0a342a64a01710c1b6fbe3b1b04f28975e +Subproject commit 81b1aa54a4d31f43f8f6d72c95a7898b2a871791 From 22917adcff6c24fcf7ecf4fd435fe59240d4a4e9 Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 26 Feb 2022 09:41:38 +0000 Subject: [PATCH 0721/1110] sms, add missing TMS VDP modes --- pico/mode4.c | 174 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 150 insertions(+), 24 deletions(-) diff --git a/pico/mode4.c b/pico/mode4.c index 55127e36..49541c8f 100644 --- a/pico/mode4.c +++ b/pico/mode4.c @@ -3,15 +3,12 @@ * (C) notaz, 2009-2010 * (C) kub, 2021 * - * currently supports VDP mode 4 (SMS and GG) and mode 2+0 (TMS) + * currently supports VDP mode 4 (SMS and GG) and mode 3-0 (TMS) + * modes numbered after the bit numbers used in Sega and TI documentation * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. */ -/* - * TODO: - * - other TMS9918 modes? - */ #include "pico_int.h" #include @@ -55,8 +52,8 @@ static int CollisionDetect(u8 *mb, u16 sx, unsigned int pack, int zoomed) return col; } -/* Mode 4 */ -/*========*/ +/* Mode 4 - SMS Graphics */ +/*=======================*/ static void TileBGM4(u16 sx, int pal) { @@ -333,14 +330,44 @@ static void DrawDisplayM4(int scanline) /* TMS Modes */ /*===========*/ -/* Background, Graphics modes */ +/* Background */ #define TMS_PIXELBG(x,p) \ t = (pack>>(7-p)) & 0x01; \ t = (pal >> (t << 2)) & 0x0f; \ - pd[x] = t; + if (t) \ + pd[x] = t; -static void TileNormBgGr(u16 sx, unsigned int pack, int pal) +static void TileNormBgM1(u16 sx, unsigned int pack, int pal) /* Text */ +{ + u8 *pd = Pico.est.HighCol + sx; + unsigned int t; + + TMS_PIXELBG(0, 0) + TMS_PIXELBG(1, 1) + TMS_PIXELBG(2, 2) + TMS_PIXELBG(3, 3) + TMS_PIXELBG(4, 4) + TMS_PIXELBG(5, 5) +} + +static void TileNormBgM2(u16 sx, int pal) /* Multicolor */ +{ + u8 *pd = Pico.est.HighCol + sx; + unsigned int pack = 0xf0; + unsigned int t; + + TMS_PIXELBG(0, 0) + TMS_PIXELBG(1, 1) + TMS_PIXELBG(2, 2) + TMS_PIXELBG(3, 3) + TMS_PIXELBG(4, 4) + TMS_PIXELBG(5, 5) + TMS_PIXELBG(6, 6) + TMS_PIXELBG(7, 7) +} + +static void TileNormBgMg(u16 sx, unsigned int pack, int pal) /* Graphics */ { u8 *pd = Pico.est.HighCol + sx; unsigned int t; @@ -494,11 +521,105 @@ static void DrawSpritesTMS(void) } } -/* Mode 2 */ -/*========*/ + +/* Mode 1 - Text */ +/*===============*/ /* Draw the background into a scanline; cells, dx, tilex, ty merged to reduce registers */ -static void DrawStripM2(const u8 *nametab, const u8 *coltab, const u8 *pattab, int cells_dx, int tilex_ty) +static void DrawStripM1(const u8 *nametab, const u8 *pattab, int cells_dx, int tilex_ty) +{ + // Draw tiles across screen: + for (; cells_dx > 0; cells_dx += 6, tilex_ty++, cells_dx -= 0x10000) + { + unsigned int pack, pal; + unsigned code; + + code = nametab[tilex_ty & 0x3f]; + pal = Pico.video.reg[7]; + pack = pattab[code << 3]; + TileNormBgM1(cells_dx, pack, pal); + } +} + +/* Draw a scanline */ +static void DrawDisplayM1(int scanline) +{ + struct PicoVideo *pv = &Pico.video; + u8 *nametab, *pattab; + int tilex, dx, cells; + int cellskip = 0; // XXX + int maxcells = 40; + + // name, color, pattern table: + nametab = PicoMem.vramb + ((pv->reg[2]<<10) & 0x3c00); + pattab = PicoMem.vramb + ((pv->reg[4]<<11) & 0x3800); + + nametab += ((scanline>>3) * maxcells); + pattab += (scanline & 0x7); + + tilex = cellskip & 0x1f; + cells = maxcells - cellskip; + dx = (cellskip << 3) + line_offset + 8; + + // tiles + if (!(pv->debug_p & PVD_KILL_B)) + DrawStripM1(nametab, pattab, dx | (cells << 16), tilex | (scanline << 16)); +} + + +/* Mode 2 - Multicolor */ +/*=====================*/ + +/* Draw the background into a scanline; cells, dx, tilex, ty merged to reduce registers */ +static void DrawStripM2(const u8 *nametab, const u8 *pattab, int cells_dx, int tilex_ty) +{ + // Draw tiles across screen: + for (; cells_dx > 0; cells_dx += 8, tilex_ty++, cells_dx -= 0x10000) + { + unsigned int pal; + unsigned code; + + code = nametab[tilex_ty & 0x1f]; + pal = pattab[code << 3]; + TileNormBgM2(cells_dx, pal); + } +} + +/* Draw a scanline */ +static void DrawDisplayM2(int scanline) +{ + struct PicoVideo *pv = &Pico.video; + u8 *nametab, *pattab; + int tilex, dx, cells; + int cellskip = 0; // XXX + int maxcells = 32; + + // name, color, pattern table: + nametab = PicoMem.vramb + ((pv->reg[2]<<10) & 0x3c00); + pattab = PicoMem.vramb + ((pv->reg[4]<<11) & 0x3800); + + nametab += (scanline>>5) << 5; + pattab += (scanline>>2) & 0x7; + + tilex = cellskip & 0x1f; + cells = maxcells - cellskip; + dx = (cellskip << 3) + line_offset + 8; + + // tiles + if (!(pv->debug_p & PVD_KILL_B)) + DrawStripM2(nametab, pattab, dx | (cells << 16), tilex | (scanline << 16)); + + // sprites + if (!(pv->debug_p & PVD_KILL_S_LO)) + DrawSpritesTMS(); +} + + +/* Mode 3 - Graphics II */ +/*======================*/ + +/* Draw the background into a scanline; cells, dx, tilex, ty merged to reduce registers */ +static void DrawStripM3(const u8 *nametab, const u8 *coltab, const u8 *pattab, int cells_dx, int tilex_ty) { // Draw tiles across screen: for (; cells_dx > 0; cells_dx += 8, tilex_ty++, cells_dx -= 0x10000) @@ -509,12 +630,12 @@ static void DrawStripM2(const u8 *nametab, const u8 *coltab, const u8 *pattab, i code = nametab[tilex_ty & 0x1f] << 3; pal = coltab[code]; pack = pattab[code]; - TileNormBgGr(cells_dx, pack, pal); + TileNormBgMg(cells_dx, pack, pal); } } /* Draw a scanline */ -static void DrawDisplayM2(int scanline) +static void DrawDisplayM3(int scanline) { struct PicoVideo *pv = &Pico.video; u8 *nametab, *coltab, *pattab; @@ -537,15 +658,16 @@ static void DrawDisplayM2(int scanline) // tiles if (!(pv->debug_p & PVD_KILL_B)) - DrawStripM2(nametab, coltab, pattab, dx | (cells << 16), tilex | (scanline << 16)); + DrawStripM3(nametab, coltab, pattab, dx | (cells << 16), tilex | (scanline << 16)); // sprites if (!(pv->debug_p & PVD_KILL_S_LO)) DrawSpritesTMS(); } -/* Mode 0 */ -/*========*/ + +/* Mode 0 - Graphics I */ +/*=====================*/ /* Draw the background into a scanline; cells, dx, tilex, ty merged to reduce registers */ static void DrawStripM0(const u8 *nametab, const u8 *coltab, const u8 *pattab, int cells_dx, int tilex_ty) @@ -559,7 +681,7 @@ static void DrawStripM0(const u8 *nametab, const u8 *coltab, const u8 *pattab, i code = nametab[tilex_ty & 0x1f]; pal = coltab[code >> 3]; pack = pattab[code << 3]; - TileNormBgGr(cells_dx, pack, pal); + TileNormBgMg(cells_dx, pack, pal); } } @@ -690,7 +812,9 @@ void PicoLineSMS(int line) BackFill(bgcolor, 0, &Pico.est); // bgcolor is from 2nd palette in mode 4 if (Pico.video.reg[1] & 0x40) { if (Pico.video.reg[0] & 0x04) DrawDisplayM4(line); - else if (Pico.video.reg[0] & 0x02) DrawDisplayM2(line); + else if (Pico.video.reg[0] & 0x02) DrawDisplayM3(line); + else if (Pico.video.reg[1] & 0x08) DrawDisplayM2(line); + else if (Pico.video.reg[1] & 0x10) DrawDisplayM1(line); else DrawDisplayM0(line); } @@ -705,12 +829,14 @@ norender: Pico.est.DrawLineDest = (char *)Pico.est.DrawLineDest + DrawLineDestIncrement; } -/* Fixed palette for TMS9918 modes */ +/* Palette for TMS9918 mode, see https://www.smspower.org/Development/Palette */ +// RGB values: #000000 #000000 #21c842 #5edc78 #5455ed #7d76fc #d4524d #42ebf5 +// #fc5554 #ff7978 #d4c154 #e6ce80 #21b03b #c95b5a #cccccc #ffffff static u16 tmspal[32] = { // SMS palette - 0x0000, 0x0000, 0x00a0, 0x00f0, 0x0500, 0x0f00, 0x0005, 0x0ff0, - 0x000a, 0x000f, 0x0055, 0x00ff, 0x0050, 0x0f0f, 0x0555, 0x0fff, - // TMS palette +// 0x0000, 0x0000, 0x00a0, 0x00f0, 0x0a00, 0x0f00, 0x0005, 0x0ff0, +// 0x000a, 0x000f, 0x0055, 0x00ff, 0x0050, 0x0f0f, 0x0555, 0x0fff, + // GG palette 0x0000, 0x0000, 0x04c2, 0x07d5, 0x0e55, 0x0f77, 0x045d, 0x0fe4, 0x055f, 0x077f, 0x05cd, 0x08ce, 0x03b2, 0x0b5c, 0x0ccc, 0x0fff, }; From aaef4b945edd614a4825231c4b31a0e377f9d0f3 Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 26 Feb 2022 10:19:07 +0000 Subject: [PATCH 0722/1110] sms, add basic sg-1000 support --- pico/sms.c | 11 ++++++++++- platform/common/menu_pico.c | 2 +- platform/libretro/libretro.c | 2 +- platform/opendingux/data/default.gcw0.desktop | 2 +- platform/opendingux/data/default.retrofw.desktop | 2 +- 5 files changed, 14 insertions(+), 5 deletions(-) diff --git a/pico/sms.c b/pico/sms.c index 39081363..dd301b08 100644 --- a/pico/sms.c +++ b/pico/sms.c @@ -454,6 +454,14 @@ static void write_bank_jang(unsigned short a, unsigned char d) } } +static void write_bank_x8k(unsigned short a, unsigned char d) +{ + if ((a&0xe000) != 0x2000) return; + ((unsigned char *)PicoMem.vram)[a+0x6000] = d; + z80_map_set(z80_read_map, 0x2000, 0x3fff, PicoMem.vram+0x4000, 0); + z80_map_set(z80_write_map, 0x2000, 0x3fff, PicoMem.vram+0x4000, 0); +} + // TODO auto-selecting is not really reliable. // Before adding more mappers this should be revised. static void xwrite(unsigned int a, unsigned char d) @@ -480,11 +488,12 @@ static void xwrite(unsigned int a, unsigned char d) write_bank_codem(a, d); write_bank_korea(a, d); write_bank_n16k(a, d); + write_bank_x8k(a, d); break; } } -// TMR product codes and hardware type for know 50Hz-only games */ +// TMR product codes and hardware type for know 50Hz-only games static u32 region_pal[] = { // cf. GX+, core/cart_hw/sms_cartc.c 0x40207067 /* Addams Family */, 0x40207020 /* Back.Future 3 */, 0x40207058 /* Battlemaniacs */, 0x40007105 /* Cal.Games 2 */, diff --git a/platform/common/menu_pico.c b/platform/common/menu_pico.c index 8ea84328..3ebaa4e3 100644 --- a/platform/common/menu_pico.c +++ b/platform/common/menu_pico.c @@ -42,7 +42,7 @@ static const char *rom_exts[] = { "bin", "smd", "gen", "md", "iso", "cso", "cue", "chd", "32x", - "sms", "gg", + "sms", "gg", "sg", NULL }; diff --git a/platform/libretro/libretro.c b/platform/libretro/libretro.c index 2399a3c8..b24028e8 100644 --- a/platform/libretro/libretro.c +++ b/platform/libretro/libretro.c @@ -733,7 +733,7 @@ void retro_get_system_info(struct retro_system_info *info) #define _GIT_VERSION "-" GIT_VERSION #endif info->library_version = VERSION _GIT_VERSION; - info->valid_extensions = "bin|gen|smd|md|32x|cue|iso|chd|sms|gg"; + info->valid_extensions = "bin|gen|smd|md|32x|cue|iso|chd|sms|gg|sg"; info->need_fullpath = true; } diff --git a/platform/opendingux/data/default.gcw0.desktop b/platform/opendingux/data/default.gcw0.desktop index 73094f03..49668c32 100644 --- a/platform/opendingux/data/default.gcw0.desktop +++ b/platform/opendingux/data/default.gcw0.desktop @@ -6,4 +6,4 @@ Exec=PicoDrive %f Icon=megadrive Terminal=false Categories=emulators; -MimeType=.md;.smd;.bin;.sms;.gg;.cue;.chd;.32x;.zip;.7z +MimeType=.md;.smd;.bin;.sms;.gg;.sg;.cue;.chd;.32x;.zip;.7z diff --git a/platform/opendingux/data/default.retrofw.desktop b/platform/opendingux/data/default.retrofw.desktop index 2ea9e044..7fcce634 100644 --- a/platform/opendingux/data/default.retrofw.desktop +++ b/platform/opendingux/data/default.retrofw.desktop @@ -6,7 +6,7 @@ Icon=megadrive Categories=emulators; X-OD-Manual= X-OD-Selector= -X-OD-Filter=.md,.smd,.bin,.sms,.gg,.cue,.chd,.32x,.zip,.7z +X-OD-Filter=.md,.smd,.bin,.sms,.gg,.sg,.cue,.chd,.32x,.zip,.7z Terminal=true Type=Application StartupNotify=true From 171fb8cc14d919354254e902b591d4d3d6a15bd3 Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 26 Feb 2022 21:48:18 +0000 Subject: [PATCH 0723/1110] sms, mapper for taiwanese sg-1000 ram extension --- pico/pico_int.h | 1 + pico/sms.c | 7 ++++++- platform/common/menu_pico.c | 2 +- platform/libretro/libretro.c | 2 ++ platform/libretro/libretro_core_options.h | 1 + 5 files changed, 11 insertions(+), 2 deletions(-) diff --git a/pico/pico_int.h b/pico/pico_int.h index 550cb5b5..f56ee553 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -351,6 +351,7 @@ struct PicoMisc #define PMS_MAP_N16K 6 #define PMS_MAP_JANGGUN 7 #define PMS_MAP_NEMESIS 8 +#define PMS_MAP_8KBRAM 9 struct PicoMS { diff --git a/pico/sms.c b/pico/sms.c index dd301b08..dd301f9c 100644 --- a/pico/sms.c +++ b/pico/sms.c @@ -456,7 +456,12 @@ static void write_bank_jang(unsigned short a, unsigned char d) static void write_bank_x8k(unsigned short a, unsigned char d) { + // 8KB address range @ 0x2000 if ((a&0xe000) != 0x2000) return; + // never autodetected, selectable only via config + if (Pico.ms.mapper != PMS_MAP_8KBRAM) return; + elprintf(EL_Z80BNK, "bank x8k %04x %02x @ %04x", a, d, z80_pc()); + ((unsigned char *)PicoMem.vram)[a+0x6000] = d; z80_map_set(z80_read_map, 0x2000, 0x3fff, PicoMem.vram+0x4000, 0); z80_map_set(z80_write_map, 0x2000, 0x3fff, PicoMem.vram+0x4000, 0); @@ -479,6 +484,7 @@ static void xwrite(unsigned int a, unsigned char d) case PMS_MAP_N16K: write_bank_n16k(a, d); break; case PMS_MAP_JANGGUN: write_bank_jang(a, d); break; case PMS_MAP_NEMESIS: write_bank_msxn(a, d); break; + case PMS_MAP_8KBRAM: write_bank_x8k(a, d); break; case PMS_MAP_AUTO: // NB the sequence of mappers is crucial for the auto detection @@ -488,7 +494,6 @@ static void xwrite(unsigned int a, unsigned char d) write_bank_codem(a, d); write_bank_korea(a, d); write_bank_n16k(a, d); - write_bank_x8k(a, d); break; } } diff --git a/platform/common/menu_pico.c b/platform/common/menu_pico.c index 3ebaa4e3..00da408c 100644 --- a/platform/common/menu_pico.c +++ b/platform/common/menu_pico.c @@ -537,7 +537,7 @@ static int menu_loop_32x_options(int id, int keys) #ifndef NO_SMS static const char *sms_hardwares[] = { "auto", "Game Gear", "Master System", NULL }; -static const char *sms_mappers[] = { "auto", "Sega", "Codemasters", "Korea", "Korea MSX", "Korea X-in-1", "Korea 4-Pak", "Korea Janggun", "Korea Nemesis", NULL }; +static const char *sms_mappers[] = { "auto", "Sega", "Codemasters", "Korea", "Korea MSX", "Korea X-in-1", "Korea 4-Pak", "Korea Janggun", "Korea Nemesis", "Taiwan 8K RAM", NULL }; static const char h_smsfm[] = "FM sound is only supported by few games\nOther games may crash with FM enabled"; static menu_entry e_menu_sms_options[] = diff --git a/platform/libretro/libretro.c b/platform/libretro/libretro.c index b24028e8..4bbcb9d7 100644 --- a/platform/libretro/libretro.c +++ b/platform/libretro/libretro.c @@ -1526,6 +1526,8 @@ static void update_variables(bool first_run) PicoIn.mapper = PMS_MAP_JANGGUN; else if (strcmp(var.value, "Korea Nemesis") == 0) PicoIn.mapper = PMS_MAP_NEMESIS; + else if (strcmp(var.value, "Taiwan 8K RAM") == 0) + PicoIn.mapper = PMS_MAP_8KRAM; else PicoIn.mapper = PMS_MAP_SEGA; } diff --git a/platform/libretro/libretro_core_options.h b/platform/libretro/libretro_core_options.h index 88b48d42..45edb1c1 100644 --- a/platform/libretro/libretro_core_options.h +++ b/platform/libretro/libretro_core_options.h @@ -138,6 +138,7 @@ struct retro_core_option_v2_definition option_defs_us[] = { { "Korea 4-Pak", NULL }, { "Korea Janggun", NULL }, { "Korea Nemesis", NULL }, + { "Taiwan 8K RAM", NULL }, { NULL, NULL }, }, "Auto" From 3611781e65b71daa5b638ece2530d4ada65ce381 Mon Sep 17 00:00:00 2001 From: kub Date: Sun, 27 Feb 2022 09:40:04 +0000 Subject: [PATCH 0724/1110] sms, autodetection for sg-1000 ram extension --- pico/mode4.c | 8 +++++--- pico/pico.h | 17 +++++++++-------- pico/pico_int.h | 5 +++++ pico/sms.c | 36 +++++++++++++++++++++--------------- 4 files changed, 40 insertions(+), 26 deletions(-) diff --git a/pico/mode4.c b/pico/mode4.c index 49541c8f..6cabd38e 100644 --- a/pico/mode4.c +++ b/pico/mode4.c @@ -831,14 +831,16 @@ norender: /* Palette for TMS9918 mode, see https://www.smspower.org/Development/Palette */ // RGB values: #000000 #000000 #21c842 #5edc78 #5455ed #7d76fc #d4524d #42ebf5 -// #fc5554 #ff7978 #d4c154 #e6ce80 #21b03b #c95b5a #cccccc #ffffff +// #fc5554 #ff7978 #d4c154 #e6ce80 #21b03b #c95bba #cccccc #ffffff +// 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff +// 0007 0818 1929 2a3a 3b4b 4c5c 5d6d 6e7e 7f8f 90a0 a1b1 b2c2 c3d3 d4e4 e5f5 f6 static u16 tmspal[32] = { // SMS palette // 0x0000, 0x0000, 0x00a0, 0x00f0, 0x0a00, 0x0f00, 0x0005, 0x0ff0, // 0x000a, 0x000f, 0x0055, 0x00ff, 0x0050, 0x0f0f, 0x0555, 0x0fff, // GG palette - 0x0000, 0x0000, 0x04c2, 0x07d5, 0x0e55, 0x0f77, 0x045d, 0x0fe4, - 0x055f, 0x077f, 0x05cd, 0x08ce, 0x03b2, 0x0b5c, 0x0ccc, 0x0fff, + 0x0000, 0x0000, 0x04c2, 0x07d6, 0x0e55, 0x0f77, 0x055d, 0x0ee4, + 0x055f, 0x077f, 0x05bd, 0x08ce, 0x04a2, 0x0b5c, 0x0ccc, 0x0fff, }; void PicoDoHighPal555SMS(void) diff --git a/pico/pico.h b/pico/pico.h index c7e4aa2a..162a9bf6 100644 --- a/pico/pico.h +++ b/pico/pico.h @@ -77,15 +77,16 @@ extern void *p32x_bios_g, *p32x_bios_m, *p32x_bios_s; #define POPT_DIS_FM_SSGEG (1<<23) #define POPT_EN_FM_DAC (1<<24) //x00 0000 -#define PAHW_MCD (1<<0) -#define PAHW_32X (1<<1) -#define PAHW_SVP (1<<2) -#define PAHW_PICO (1<<3) -#define PAHW_SMS (1<<4) +#define PAHW_MCD (1<<0) +#define PAHW_32X (1<<1) +#define PAHW_SVP (1<<2) +#define PAHW_PICO (1<<3) +#define PAHW_SMS (1<<4) -#define PHWS_AUTO 0 -#define PHWS_GG 1 -#define PHWS_SMS 2 +#define PHWS_AUTO 0 +#define PHWS_GG 1 +#define PHWS_SMS 2 +#define PHWS_SG1000 3 #define PQUIRK_FORCE_6BTN (1<<0) diff --git a/pico/pico_int.h b/pico/pico_int.h index f56ee553..15d35161 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -342,6 +342,11 @@ struct PicoMisc unsigned int frame_count; // 1c for movies and idle det }; +#define PMS_HW_GG 0x1 // Game Gear +#define PMS_HW_LCD 0x2 // GG LCD +#define PMS_HW_JAP 0x4 // japanese system +#define PMS_HW_SG 0x8 // SG-1000 + #define PMS_MAP_AUTO 0 #define PMS_MAP_SEGA 1 #define PMS_MAP_CODEM 2 diff --git a/pico/sms.c b/pico/sms.c index dd301f9c..1829276c 100644 --- a/pico/sms.c +++ b/pico/sms.c @@ -57,7 +57,7 @@ static void vdp_data_write(unsigned char d) if (pv->type == 3) { // cram. 32 on SMS, but 64 on MD. Fill 2nd half of cram for prio bit mirror - if (Pico.m.hardware & 0x1) { // GG, same layout as MD + if (Pico.m.hardware & PMS_HW_GG) { // GG, same layout as MD unsigned a = pv->addr & 0x3f; if (a & 0x1) { // write complete color on high byte write u16 c = ((d&0x0f) << 8) | Pico.ms.vdp_buffer; @@ -160,7 +160,7 @@ static unsigned char z80_sms_in(unsigned short a) { case 0x00: case 0x01: - if ((Pico.m.hardware & 0x1) && a < 0x8) { // GG I/O area + if ((Pico.m.hardware & PMS_HW_GG) && a < 0x8) { // GG I/O area switch (a) { case 0: d = 0xff & ~(PicoIn.pad[0] & 0x80); break; case 1: d = Pico.ms.io_gg[1] | (Pico.ms.io_gg[2] & 0x7f); break; @@ -232,11 +232,11 @@ static void z80_sms_out(unsigned short a, unsigned char d) switch (a & 0xc1) { case 0x00: - if ((Pico.m.hardware & 0x1) && a < 0x8) // GG I/O area + if ((Pico.m.hardware & PMS_HW_GG) && a < 0x8) // GG I/O area Pico.ms.io_gg[a] = d; break; case 0x01: - if ((Pico.m.hardware & 0x1) && a < 0x8) { // GG I/O area + if ((Pico.m.hardware & PMS_HW_GG) && a < 0x8) { // GG I/O area Pico.ms.io_gg[a] = d; } else { // pad. latch hcounter if one of the TH lines is switched to 1 @@ -454,13 +454,15 @@ static void write_bank_jang(unsigned short a, unsigned char d) } } +// SG-1000 8KB RAM Adaptor mapper. 8KB RAM at address 0x2000 static void write_bank_x8k(unsigned short a, unsigned char d) { // 8KB address range @ 0x2000 if ((a&0xe000) != 0x2000) return; - // never autodetected, selectable only via config - if (Pico.ms.mapper != PMS_MAP_8KBRAM) return; + // this is only available on SG-1000 + if (Pico.ms.mapper != PMS_MAP_8KBRAM && (Pico.ms.mapper || !(Pico.m.hardware & PMS_HW_SG))) return; elprintf(EL_Z80BNK, "bank x8k %04x %02x @ %04x", a, d, z80_pc()); + Pico.ms.mapper = PMS_MAP_8KBRAM; ((unsigned char *)PicoMem.vram)[a+0x6000] = d; z80_map_set(z80_read_map, 0x2000, 0x3fff, PicoMem.vram+0x4000, 0); @@ -488,6 +490,7 @@ static void xwrite(unsigned int a, unsigned char d) case PMS_MAP_AUTO: // NB the sequence of mappers is crucial for the auto detection + write_bank_x8k(a, d); write_bank_n32k(a, d); write_bank_sega(a, d); write_bank_msx(a, d); @@ -517,27 +520,29 @@ void PicoResetMS(void) // set preselected hw/mapper from config if (PicoIn.hwSelect) { switch (PicoIn.hwSelect) { - case PHWS_GG: Pico.m.hardware |= 0x1; break; - default: Pico.m.hardware &= ~0x1; break; + case PHWS_GG: Pico.m.hardware |= PMS_HW_GG; break; + default: Pico.m.hardware &= ~PMS_HW_GG; break; } } if (PicoIn.mapper) Pico.ms.mapper = PicoIn.mapper; - Pico.m.hardware |= 0x4; // default region Japan if no TMR header + Pico.m.hardware |= PMS_HW_JAP; // default region Japan if no TMR header + Pico.m.hardware |= PMS_HW_SG; // default to SG-1000 if no TMR header // check if the ROM header contains more system information for (tmr = 0x2000; tmr < 0xbfff && tmr <= Pico.romsize; tmr *= 2) { if (!memcmp(Pico.rom + tmr-16, "TMR SEGA", 8)) { + Pico.m.hardware &= ~PMS_HW_SG; // not SG-1000 hw = Pico.rom[tmr-1] >> 4; if (!PicoIn.hwSelect) { - Pico.m.hardware &= ~0x1; + Pico.m.hardware &= ~PMS_HW_GG; if (hw >= 0x5 && hw < 0x8) - Pico.m.hardware |= 0x1; // GG cartridge detected + Pico.m.hardware |= PMS_HW_GG; // GG cartridge detected } if (!PicoIn.regionOverride) { - Pico.m.hardware &= ~0x4; + Pico.m.hardware &= ~PMS_HW_JAP; if (hw == 0x5 || hw == 0x3) - Pico.m.hardware |= 0x4; // region Japan + Pico.m.hardware |= PMS_HW_JAP; // region Japan } id = CPU_LE4(*(u32 *)&Pico.rom[tmr-4]) & 0xf0f0ffff; for (i = 0; i < sizeof(region_pal)/sizeof(*region_pal); i++) @@ -571,7 +576,8 @@ void PicoResetMS(void) Pico.video.reg[10] = 0xff; // BIOS, clear zram (unitialized on Mark-III, cf src/mame/drivers/sms.cpp) - memset(PicoMem.zram, (Pico.m.hardware&5) == 4 ? 0xf0:0, sizeof(PicoMem.zram)); + i = (Pico.m.hardware & (PMS_HW_JAP|PMS_HW_GG)) == PMS_HW_JAP ? 0xf0 : 0x00; + memset(PicoMem.zram, i, sizeof(PicoMem.zram)); } void PicoPowerMS(void) @@ -698,7 +704,7 @@ void PicoFrameMS(void) // for SMS the pause button generates an NMI, for GG ths is not the case nmi = (PicoIn.pad[0] >> 7) & 1; - if (!(Pico.m.hardware & 0x1) && !Pico.ms.nmi_state && nmi) + if (!(Pico.m.hardware & PMS_HW_GG) && !Pico.ms.nmi_state && nmi) z80_nmi(); Pico.ms.nmi_state = nmi; From 216c9f17fa4a23b0a8ff738d158a0e2990559155 Mon Sep 17 00:00:00 2001 From: kub Date: Sun, 27 Feb 2022 11:12:15 +0000 Subject: [PATCH 0725/1110] sms, add sg-1000 on-cart RAM mapping --- pico/sms.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pico/sms.c b/pico/sms.c index 1829276c..c045d610 100644 --- a/pico/sms.c +++ b/pico/sms.c @@ -458,15 +458,16 @@ static void write_bank_jang(unsigned short a, unsigned char d) static void write_bank_x8k(unsigned short a, unsigned char d) { // 8KB address range @ 0x2000 - if ((a&0xe000) != 0x2000) return; + if ((a&0xe000) != 0x2000 && (a&0xe000) != 0x8000) return; // this is only available on SG-1000 if (Pico.ms.mapper != PMS_MAP_8KBRAM && (Pico.ms.mapper || !(Pico.m.hardware & PMS_HW_SG))) return; elprintf(EL_Z80BNK, "bank x8k %04x %02x @ %04x", a, d, z80_pc()); Pico.ms.mapper = PMS_MAP_8KBRAM; - ((unsigned char *)PicoMem.vram)[a+0x6000] = d; - z80_map_set(z80_read_map, 0x2000, 0x3fff, PicoMem.vram+0x4000, 0); - z80_map_set(z80_write_map, 0x2000, 0x3fff, PicoMem.vram+0x4000, 0); + ((unsigned char *)PicoMem.vram)[(a&0x1fff)+0x8000] = d; + a &= 0xe000; + z80_map_set(z80_read_map, a, a+0x1fff, PicoMem.vram+0x4000, 0); + z80_map_set(z80_write_map, a, a+0x1fff, PicoMem.vram+0x4000, 0); } // TODO auto-selecting is not really reliable. From df6c895c5c7a24a26af967341fef3a8e7370187b Mon Sep 17 00:00:00 2001 From: kub Date: Mon, 28 Feb 2022 21:45:50 +0000 Subject: [PATCH 0726/1110] sms, add sg-1000 support --- pico/media.c | 9 ++-- pico/mode4.c | 24 ++++----- pico/pico.h | 2 +- pico/pico_int.h | 3 +- pico/sms.c | 62 ++++++++++++++++------- platform/common/emu.c | 6 ++- platform/common/menu_pico.c | 2 +- platform/libretro/libretro.c | 4 +- platform/libretro/libretro_core_options.h | 1 + 9 files changed, 72 insertions(+), 41 deletions(-) diff --git a/pico/media.c b/pico/media.c index ae878f92..9bdb6370 100644 --- a/pico/media.c +++ b/pico/media.c @@ -313,10 +313,13 @@ enum media_type_e PicoLoadMedia(const char *filename, rom_data = NULL; // now belongs to PicoCart // simple test for GG. Do this here since m.hardware is nulled in Insert - if (PicoIn.AHW & PAHW_SMS) { - if (!strcmp(rom->ext,"gg") && !PicoIn.hwSelect) { - Pico.m.hardware |= 0x1; + if ((PicoIn.AHW & PAHW_SMS) && !PicoIn.hwSelect) { + if (!strcmp(rom->ext,"gg")) { + Pico.m.hardware |= PMS_HW_GG; lprintf("detected GG ROM\n"); + } else if (!strcmp(rom->ext,"sg")) { + Pico.m.hardware |= PMS_HW_SG; + lprintf("detected SG-1000 ROM\n"); } else lprintf("detected SMS ROM\n"); } diff --git a/pico/mode4.c b/pico/mode4.c index 6cabd38e..21bd95b1 100644 --- a/pico/mode4.c +++ b/pico/mode4.c @@ -163,7 +163,7 @@ static void ParseSpritesM4(int scanline) if (pv->reg[0] & 8) xoff = 0; xoff += line_offset; - if ((Pico.m.hardware & 0x3) == 0x3) + if ((Pico.m.hardware & (PMS_HW_GG|PMS_HW_LCD)) == (PMS_HW_GG|PMS_HW_LCD)) xoff -= 48; // GG LCD, adjust to center 160 px sat = (u8 *)PicoMem.vram + ((pv->reg[5] & 0x7e) << 7); @@ -302,7 +302,7 @@ static void DrawDisplayM4(int scanline) // tiles if (!(pv->debug_p & PVD_KILL_B)) { - if ((Pico.m.hardware & 0x3) == 0x3) { + if ((Pico.m.hardware & (PMS_HW_GG|PMS_HW_LCD)) == (PMS_HW_GG|PMS_HW_LCD)) { // on GG render only the center 160 px DrawStripM4(nametab , dx | ((cells-12)<< 16),(tilex+6) | (ty << 16)); } else if (pv->reg[0] & 0x80) { @@ -318,7 +318,7 @@ static void DrawDisplayM4(int scanline) if (!(pv->debug_p & PVD_KILL_S_LO)) DrawSpritesM4(); - if ((pv->reg[0] & 0x20) && (Pico.m.hardware & 0x3) != 0x3) { + if ((pv->reg[0] & 0x20) && (Pico.m.hardware & (PMS_HW_GG|PMS_HW_LCD)) != (PMS_HW_GG|PMS_HW_LCD)) { // first column masked with background, caculate offset to start of line dx = (dx&~0x1f) / 4; ty = ((pv->reg[7]&0x0f)|0x10) * 0x01010101; @@ -736,11 +736,11 @@ void PicoFrameStartSMS(void) } // Copy LCD enable flag for easier handling - Pico.m.hardware &= ~0x2; + Pico.m.hardware &= ~PMS_HW_LCD; if (PicoIn.opt & POPT_EN_GG_LCD) - Pico.m.hardware |= 0x2; + Pico.m.hardware |= PMS_HW_LCD; - if ((Pico.m.hardware & 0x3) == 0x3) { + if ((Pico.m.hardware & (PMS_HW_GG|PMS_HW_LCD)) == (PMS_HW_GG|PMS_HW_LCD)) { // GG LCD always has 160x144 regardless of settings screen_offset = 24; // nonetheless the vdp timing has 224 lines loffs = 48; @@ -796,7 +796,7 @@ void PicoLineSMS(int line) unsigned bgcolor; // GG LCD, render only visible part of screen - if ((Pico.m.hardware & 0x3) == 0x3 && (line < 24 || line >= 24+144)) + if ((Pico.m.hardware & (PMS_HW_GG|PMS_HW_LCD)) == (PMS_HW_GG|PMS_HW_LCD) && (line < 24 || line >= 24+144)) goto norender; if (PicoScanBegin != NULL && skip == 0) @@ -832,12 +832,10 @@ norender: /* Palette for TMS9918 mode, see https://www.smspower.org/Development/Palette */ // RGB values: #000000 #000000 #21c842 #5edc78 #5455ed #7d76fc #d4524d #42ebf5 // #fc5554 #ff7978 #d4c154 #e6ce80 #21b03b #c95bba #cccccc #ffffff -// 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff -// 0007 0818 1929 2a3a 3b4b 4c5c 5d6d 6e7e 7f8f 90a0 a1b1 b2c2 c3d3 d4e4 e5f5 f6 static u16 tmspal[32] = { // SMS palette -// 0x0000, 0x0000, 0x00a0, 0x00f0, 0x0a00, 0x0f00, 0x0005, 0x0ff0, -// 0x000a, 0x000f, 0x0055, 0x00ff, 0x0050, 0x0f0f, 0x0555, 0x0fff, + 0x0000, 0x0000, 0x00a0, 0x00f0, 0x0a00, 0x0f00, 0x0005, 0x0ff0, + 0x000a, 0x000f, 0x0055, 0x00ff, 0x0050, 0x0f0f, 0x0555, 0x0fff, // GG palette 0x0000, 0x0000, 0x04c2, 0x07d6, 0x0e55, 0x0f77, 0x055d, 0x0ee4, 0x055f, 0x077f, 0x05bd, 0x08ce, 0x04a2, 0x0b5c, 0x0ccc, 0x0fff, @@ -861,8 +859,8 @@ void PicoDoHighPal555SMS(void) /* SMS 6 bit cram data was already converted to MD/GG format by vdp write, * hence GG/SMS/TMS can all be handled the same here */ for (j = cnt; j > 0; j--) { - if (!(Pico.video.reg[0] & 0x4)) - spal = (u32 *)tmspal; // fixed palette in TMS modes + if (!(Pico.video.reg[0] & 0x4)) // fixed palette in TMS modes + spal = (u32 *)tmspal + (Pico.m.hardware & PMS_HW_SG ? 16/2 : 0); for (i = 0x20/2; i > 0; i--, spal++, dpal++) { t = *spal; #if defined(USE_BGR555) diff --git a/pico/pico.h b/pico/pico.h index 162a9bf6..dc596796 100644 --- a/pico/pico.h +++ b/pico/pico.h @@ -86,7 +86,7 @@ extern void *p32x_bios_g, *p32x_bios_m, *p32x_bios_s; #define PHWS_AUTO 0 #define PHWS_GG 1 #define PHWS_SMS 2 -#define PHWS_SG1000 3 +#define PHWS_SG 3 #define PQUIRK_FORCE_6BTN (1<<0) diff --git a/pico/pico_int.h b/pico/pico_int.h index 15d35161..85df6c5b 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -368,7 +368,8 @@ struct PicoMS unsigned char vdp_buffer; unsigned char vdp_hlatch; unsigned char io_gg[0x08]; - unsigned char pad[0x42]; + unsigned char mapcnt; + unsigned char pad[0x41]; }; // emu state and data for the asm code diff --git a/pico/sms.c b/pico/sms.c index c045d610..6f1cfc39 100644 --- a/pico/sms.c +++ b/pico/sms.c @@ -346,7 +346,7 @@ static void write_bank_msx(unsigned short a, unsigned char d) { if (a > 0x0003) return; // don't detect linear mapping to avoid confusing with Codemasters - if (Pico.ms.mapper != PMS_MAP_MSX && (Pico.ms.mapper || (a|d) == 0)) return; + if (Pico.ms.mapper != PMS_MAP_MSX && (Pico.ms.mapper || (a|d) == 0 || d >= 0x80)) return; elprintf(EL_Z80BNK, "bank msx %04x %02x @ %04x", a, d, z80_pc()); Pico.ms.mapper = PMS_MAP_MSX; Pico.ms.carthw[a] = d; @@ -457,19 +457,32 @@ static void write_bank_jang(unsigned short a, unsigned char d) // SG-1000 8KB RAM Adaptor mapper. 8KB RAM at address 0x2000 static void write_bank_x8k(unsigned short a, unsigned char d) { - // 8KB address range @ 0x2000 + // 8KB address range @ 0x2000 (adaptor) or @ 0x8000 (cartridge) if ((a&0xe000) != 0x2000 && (a&0xe000) != 0x8000) return; - // this is only available on SG-1000 - if (Pico.ms.mapper != PMS_MAP_8KBRAM && (Pico.ms.mapper || !(Pico.m.hardware & PMS_HW_SG))) return; + if (Pico.ms.mapper != PMS_MAP_8KBRAM && Pico.ms.mapper) return; + elprintf(EL_Z80BNK, "bank x8k %04x %02x @ %04x", a, d, z80_pc()); + ((unsigned char *)PicoMem.vram)[(a&0x1fff)+0x8000] = d; Pico.ms.mapper = PMS_MAP_8KBRAM; - ((unsigned char *)PicoMem.vram)[(a&0x1fff)+0x8000] = d; a &= 0xe000; + Pico.ms.carthw[0] = a >> 12; z80_map_set(z80_read_map, a, a+0x1fff, PicoMem.vram+0x4000, 0); z80_map_set(z80_write_map, a, a+0x1fff, PicoMem.vram+0x4000, 0); } +char *mappers[] = { + [PMS_MAP_SEGA] = "Sega", + [PMS_MAP_CODEM] = "Codemasters", + [PMS_MAP_KOREA] = "Korea", + [PMS_MAP_MSX] = "Korea MSX", + [PMS_MAP_N32K] = "Korea X-in-1", + [PMS_MAP_N16K] = "Korea 4-Pak", + [PMS_MAP_JANGGUN] = "Korea Janggun", + [PMS_MAP_NEMESIS] = "Korea Nemesis", + [PMS_MAP_8KBRAM] = "Taiwan 8K RAM", +}; + // TODO auto-selecting is not really reliable. // Before adding more mappers this should be revised. static void xwrite(unsigned int a, unsigned char d) @@ -490,14 +503,23 @@ static void xwrite(unsigned int a, unsigned char d) case PMS_MAP_8KBRAM: write_bank_x8k(a, d); break; case PMS_MAP_AUTO: + // disable autodetection after some time + if ((a >= 0xc000 && a < 0xfff8) || Pico.ms.mapcnt > 20) break; // NB the sequence of mappers is crucial for the auto detection - write_bank_x8k(a, d); - write_bank_n32k(a, d); - write_bank_sega(a, d); - write_bank_msx(a, d); - write_bank_codem(a, d); - write_bank_korea(a, d); - write_bank_n16k(a, d); + if (Pico.m.hardware & PMS_HW_SG) + write_bank_x8k(a, d); + else { + write_bank_n32k(a, d); + write_bank_sega(a, d); + write_bank_msx(a, d); + write_bank_codem(a, d); + write_bank_korea(a, d); + write_bank_n16k(a, d); + } + + Pico.ms.mapcnt ++; + if (Pico.ms.mapper) + elprintf(EL_STATUS, "autodetected %s mapper",mappers[Pico.ms.mapper]); break; } } @@ -520,23 +542,23 @@ void PicoResetMS(void) // set preselected hw/mapper from config if (PicoIn.hwSelect) { + Pico.m.hardware &= ~(PMS_HW_GG|PMS_HW_SG); switch (PicoIn.hwSelect) { case PHWS_GG: Pico.m.hardware |= PMS_HW_GG; break; - default: Pico.m.hardware &= ~PMS_HW_GG; break; + case PHWS_SG: Pico.m.hardware |= PMS_HW_SG; break; } } + Pico.ms.mapcnt = Pico.ms.mapper = 0; if (PicoIn.mapper) Pico.ms.mapper = PicoIn.mapper; Pico.m.hardware |= PMS_HW_JAP; // default region Japan if no TMR header - Pico.m.hardware |= PMS_HW_SG; // default to SG-1000 if no TMR header // check if the ROM header contains more system information for (tmr = 0x2000; tmr < 0xbfff && tmr <= Pico.romsize; tmr *= 2) { if (!memcmp(Pico.rom + tmr-16, "TMR SEGA", 8)) { - Pico.m.hardware &= ~PMS_HW_SG; // not SG-1000 hw = Pico.rom[tmr-1] >> 4; if (!PicoIn.hwSelect) { - Pico.m.hardware &= ~PMS_HW_GG; + Pico.m.hardware &= ~(PMS_HW_GG|PMS_HW_SG); if (hw >= 0x5 && hw < 0x8) Pico.m.hardware |= PMS_HW_GG; // GG cartridge detected } @@ -647,19 +669,21 @@ void PicoMemSetupMS(void) xwrite(0x0000, 0); xwrite(0x4000, 1); xwrite(0x8000, 2); - } else { + } else if (mapper) { xwrite(0xfffc, 0); xwrite(0xfffd, 0); xwrite(0xfffe, 1); xwrite(0xffff, 2); } - Pico.ms.mapper = mapper; } void PicoStateLoadedMS(void) { u8 mapper = Pico.ms.mapper; - if (Pico.ms.mapper == PMS_MAP_MSX || Pico.ms.mapper == PMS_MAP_NEMESIS) { + if (Pico.ms.mapper == PMS_MAP_8KBRAM) { + u16 a = Pico.ms.carthw[0] << 12; + xwrite(a+0x888, *((unsigned char *)PicoMem.vram+0x8888)); + } else if (Pico.ms.mapper == PMS_MAP_MSX || Pico.ms.mapper == PMS_MAP_NEMESIS) { xwrite(0x0000, Pico.ms.carthw[0]); xwrite(0x0001, Pico.ms.carthw[1]); xwrite(0x0002, Pico.ms.carthw[2]); diff --git a/platform/common/emu.c b/platform/common/emu.c index 845951ea..b233050a 100644 --- a/platform/common/emu.c +++ b/platform/common/emu.c @@ -332,9 +332,11 @@ static void system_announce(void) if (PicoIn.AHW & PAHW_SMS) { sys_name = "Master System"; - if (Pico.m.hardware & 0x1) + if (Pico.m.hardware & PMS_HW_GG) sys_name = "Game Gear"; - else if (Pico.m.hardware & 0x4) + else if (Pico.m.hardware & PMS_HW_SG) + sys_name = "SG-1000"; + else if (Pico.m.hardware & PMS_HW_JAP) sys_name = "Mark III"; #ifdef NO_SMS extra = " [no support]"; diff --git a/platform/common/menu_pico.c b/platform/common/menu_pico.c index 00da408c..6a72bc26 100644 --- a/platform/common/menu_pico.c +++ b/platform/common/menu_pico.c @@ -536,7 +536,7 @@ static int menu_loop_32x_options(int id, int keys) #ifndef NO_SMS -static const char *sms_hardwares[] = { "auto", "Game Gear", "Master System", NULL }; +static const char *sms_hardwares[] = { "auto", "Game Gear", "Master System", "SG-1000", NULL }; static const char *sms_mappers[] = { "auto", "Sega", "Codemasters", "Korea", "Korea MSX", "Korea X-in-1", "Korea 4-Pak", "Korea Janggun", "Korea Nemesis", "Taiwan 8K RAM", NULL }; static const char h_smsfm[] = "FM sound is only supported by few games\nOther games may crash with FM enabled"; diff --git a/platform/libretro/libretro.c b/platform/libretro/libretro.c index 4bbcb9d7..99fc826a 100644 --- a/platform/libretro/libretro.c +++ b/platform/libretro/libretro.c @@ -1494,6 +1494,8 @@ static void update_variables(bool first_run) PicoIn.hwSelect = PHWS_AUTO; else if (strcmp(var.value, "Game Gear") == 0) PicoIn.hwSelect = PHWS_GG; + else if (strcmp(var.value, "SG-1000") == 0) + PicoIn.hwSelect = PHWS_SG; else PicoIn.hwSelect = PHWS_SMS; } @@ -1527,7 +1529,7 @@ static void update_variables(bool first_run) else if (strcmp(var.value, "Korea Nemesis") == 0) PicoIn.mapper = PMS_MAP_NEMESIS; else if (strcmp(var.value, "Taiwan 8K RAM") == 0) - PicoIn.mapper = PMS_MAP_8KRAM; + PicoIn.mapper = PMS_MAP_8KBRAM; else PicoIn.mapper = PMS_MAP_SEGA; } diff --git a/platform/libretro/libretro_core_options.h b/platform/libretro/libretro_core_options.h index 45edb1c1..6e627d49 100644 --- a/platform/libretro/libretro_core_options.h +++ b/platform/libretro/libretro_core_options.h @@ -117,6 +117,7 @@ struct retro_core_option_v2_definition option_defs_us[] = { { "Auto", NULL }, { "Game Gear", NULL }, { "Master System", NULL }, + { "SG-1000" , NULL }, { NULL, NULL }, }, "Auto" From e48f3f27959732ca7bdbecef8a9812c4e0c8a988 Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 1 Mar 2022 21:07:21 +0000 Subject: [PATCH 0727/1110] sms, some minor fixes and additions for sg-1000 --- pico/mode4.c | 22 +++++++++++++--------- pico/sms.c | 6 ++++-- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/pico/mode4.c b/pico/mode4.c index 21bd95b1..451bddfb 100644 --- a/pico/mode4.c +++ b/pico/mode4.c @@ -549,10 +549,12 @@ static void DrawDisplayM1(int scanline) int tilex, dx, cells; int cellskip = 0; // XXX int maxcells = 40; + unsigned mask = pv->reg[0] & 0x2 ? 0x2000 : 0x3800; // M3: 2 bits table select // name, color, pattern table: nametab = PicoMem.vramb + ((pv->reg[2]<<10) & 0x3c00); - pattab = PicoMem.vramb + ((pv->reg[4]<<11) & 0x3800); + pattab = PicoMem.vramb + ((pv->reg[4]<<11) & mask); + pattab += ((scanline>>6) << 11) & ~mask; // table select bits for M3 nametab += ((scanline>>3) * maxcells); pattab += (scanline & 0x7); @@ -593,12 +595,14 @@ static void DrawDisplayM2(int scanline) int tilex, dx, cells; int cellskip = 0; // XXX int maxcells = 32; + unsigned mask = pv->reg[0] & 0x2 ? 0x2000 : 0x3800; // M3: 2 bits table select // name, color, pattern table: nametab = PicoMem.vramb + ((pv->reg[2]<<10) & 0x3c00); - pattab = PicoMem.vramb + ((pv->reg[4]<<11) & 0x3800); + pattab = PicoMem.vramb + ((pv->reg[4]<<11) & mask); + pattab += ((scanline>>6) << 11) & ~mask; // table select bits for M3 - nametab += (scanline>>5) << 5; + nametab += (scanline>>3) << 5; pattab += (scanline>>2) & 0x7; tilex = cellskip & 0x1f; @@ -811,10 +815,10 @@ void PicoLineSMS(int line) bgcolor = (Pico.video.reg[7] & 0x0f) | ((Pico.video.reg[0] & 0x04) << 2); BackFill(bgcolor, 0, &Pico.est); // bgcolor is from 2nd palette in mode 4 if (Pico.video.reg[1] & 0x40) { - if (Pico.video.reg[0] & 0x04) DrawDisplayM4(line); + if (Pico.video.reg[0] & 0x04) DrawDisplayM4(line); // also M4+M3 + else if (Pico.video.reg[1] & 0x08) DrawDisplayM2(line); // also M2+M3 + else if (Pico.video.reg[1] & 0x10) DrawDisplayM1(line); // also M1+M3 else if (Pico.video.reg[0] & 0x02) DrawDisplayM3(line); - else if (Pico.video.reg[1] & 0x08) DrawDisplayM2(line); - else if (Pico.video.reg[1] & 0x10) DrawDisplayM1(line); else DrawDisplayM0(line); } @@ -835,10 +839,10 @@ norender: static u16 tmspal[32] = { // SMS palette 0x0000, 0x0000, 0x00a0, 0x00f0, 0x0a00, 0x0f00, 0x0005, 0x0ff0, - 0x000a, 0x000f, 0x0055, 0x00ff, 0x0050, 0x0f0f, 0x0555, 0x0fff, + 0x000a, 0x000f, 0x00aa, 0x00ff, 0x0050, 0x0f0f, 0x0aaa, 0x0fff, // GG palette - 0x0000, 0x0000, 0x04c2, 0x07d6, 0x0e55, 0x0f77, 0x055d, 0x0ee4, - 0x055f, 0x077f, 0x05bd, 0x08ce, 0x04a2, 0x0b5c, 0x0ccc, 0x0fff, + 0x0000, 0x0000, 0x04c2, 0x07d6, 0x0e55, 0x0f77, 0x055c, 0x0ee4, + 0x055f, 0x077f, 0x05bc, 0x08ce, 0x03a2, 0x0b5c, 0x0ccc, 0x0fff, }; void PicoDoHighPal555SMS(void) diff --git a/pico/sms.c b/pico/sms.c index 6f1cfc39..81aa823e 100644 --- a/pico/sms.c +++ b/pico/sms.c @@ -508,14 +508,14 @@ static void xwrite(unsigned int a, unsigned char d) // NB the sequence of mappers is crucial for the auto detection if (Pico.m.hardware & PMS_HW_SG) write_bank_x8k(a, d); - else { + else { write_bank_n32k(a, d); write_bank_sega(a, d); write_bank_msx(a, d); write_bank_codem(a, d); write_bank_korea(a, d); write_bank_n16k(a, d); - } + } Pico.ms.mapcnt ++; if (Pico.ms.mapper) @@ -552,6 +552,8 @@ void PicoResetMS(void) if (PicoIn.mapper) Pico.ms.mapper = PicoIn.mapper; Pico.m.hardware |= PMS_HW_JAP; // default region Japan if no TMR header + if (PicoIn.regionOverride > 2) + Pico.m.hardware &= ~PMS_HW_JAP; // check if the ROM header contains more system information for (tmr = 0x2000; tmr < 0xbfff && tmr <= Pico.romsize; tmr *= 2) { From f8aaa200cfba515c34a589f290f9ba9d6186eab6 Mon Sep 17 00:00:00 2001 From: kub Date: Sun, 6 Mar 2022 18:39:46 +0000 Subject: [PATCH 0728/1110] platform sdl, preliminary window resizing fixes picodrive doesn't handle resize events, so it's not really working :-/ it however uncovered some bugs and strange behaviour, though --- platform/common/main.c | 8 ++- platform/common/menu_pico.c | 5 +- platform/common/plat_sdl.c | 98 +++++++++++++++++++++++++++---------- platform/libpicofe | 2 +- platform/linux/emu.c | 17 +++++-- 5 files changed, 95 insertions(+), 35 deletions(-) diff --git a/platform/common/main.c b/platform/common/main.c index 571b8884..6fce5d58 100644 --- a/platform/common/main.c +++ b/platform/common/main.c @@ -84,21 +84,23 @@ int main(int argc, char *argv[]) //in_probe(); plat_target_init(); + plat_init(); + menu_init(); emu_prep_defconfig(); // depends on input emu_read_config(NULL, 0); - plat_init(); - menu_init(); emu_init(); engineState = PGS_Menu; + plat_video_menu_enter(0); if (argc > 1) parse_cmd_line(argc, argv); if (engineState == PGS_ReloadRom) { + plat_video_menu_begin(); if (emu_reload_rom(rom_fname_reload)) { engineState = PGS_Running; if (load_state_slot >= 0) { @@ -106,7 +108,9 @@ int main(int argc, char *argv[]) emu_save_load_game(1, 0); } } + plat_video_menu_end(); } + plat_video_menu_leave(); for (;;) { diff --git a/platform/common/menu_pico.c b/platform/common/menu_pico.c index 6a72bc26..65e08a01 100644 --- a/platform/common/menu_pico.c +++ b/platform/common/menu_pico.c @@ -97,10 +97,9 @@ static void make_bg(int no_scale, int from_screen) pp = g_screen_ppitch; } - if (src == NULL) { - memset(g_menubg_ptr, 0, g_menuscreen_w * g_menuscreen_h * 2); + memset(g_menubg_ptr, 0, g_menuscreen_w * g_menuscreen_h * 2); + if (src == NULL) return; - } if (!no_scale && g_menuscreen_w / w >= 2 && g_menuscreen_h / h >= 2) { diff --git a/platform/common/plat_sdl.c b/platform/common/plat_sdl.c index d66a2ace..98c0a55f 100644 --- a/platform/common/plat_sdl.c +++ b/platform/common/plat_sdl.c @@ -1,6 +1,7 @@ /* * PicoDrive * (C) notaz, 2013 + * (C) kub, 2020-2022 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. @@ -22,6 +23,7 @@ #include static void *shadow_fb; +static int shadow_size; static struct area { int w, h; } area; static struct in_pdata in_sdl_platform_data = { @@ -82,14 +84,14 @@ void bgr_to_uyvy_init(void) } } -void rgb565_to_uyvy(void *d, const void *s, int w, int h, int pitch, int x2) +void rgb565_to_uyvy(void *d, const void *s, int w, int h, int pitch, int dpitch, int x2) { uint32_t *dst = d; const uint16_t *src = s; int i; if (x2) while (h--) { - for (i = w; i > 0; src += 4, dst += 4, i -= 4) + for (i = w; i >= 4; src += 4, dst += 4, i -= 4) { struct uyvy *uyvy0 = yuv_uyvy + src[0], *uyvy1 = yuv_uyvy + src[1]; struct uyvy *uyvy2 = yuv_uyvy + src[2], *uyvy3 = yuv_uyvy + src[3]; @@ -105,9 +107,10 @@ void rgb565_to_uyvy(void *d, const void *s, int w, int h, int pitch, int x2) dst[3] = uyvy3->y | (uyvy3->vyu << 8); #endif } - src += pitch - w; + src += pitch - (w-i); + dst += (dpitch - 2*(w-i))/2; } else while (h--) { - for (i = w; i > 0; src += 4, dst += 2, i -= 4) + for (i = w; i >= 4; src += 4, dst += 2, i -= 4) { struct uyvy *uyvy0 = yuv_uyvy + src[0], *uyvy1 = yuv_uyvy + src[1]; struct uyvy *uyvy2 = yuv_uyvy + src[2], *uyvy3 = yuv_uyvy + src[3]; @@ -119,12 +122,23 @@ void rgb565_to_uyvy(void *d, const void *s, int w, int h, int pitch, int x2) dst[1] = uyvy3->y | (uyvy2->vyu << 8); #endif } - src += pitch - w; + src += pitch - (w-i); + dst += (dpitch - (w-i))/2; } } static int clear_buf_cnt, clear_stat_cnt; +static void resize_buffers(void) +{ + // make sure the shadow buffers are big enough in case of resize + if (shadow_size < g_menuscreen_w * g_menuscreen_h * 2) { + shadow_size = g_menuscreen_w * g_menuscreen_h * 2; + shadow_fb = realloc(shadow_fb, shadow_size); + g_menubg_ptr = realloc(g_menubg_ptr, shadow_size); + } +} + void plat_video_set_size(int w, int h) { if (area.w != w || area.h != h) { @@ -134,10 +148,14 @@ void plat_video_set_size(int w, int h) w = g_screen_width, h = g_screen_height; } if (!plat_sdl_overlay && !plat_sdl_gl_active) { + g_screen_width = plat_sdl_screen->w; + g_screen_height = plat_sdl_screen->h; + g_screen_ppitch = plat_sdl_screen->pitch/2; + g_screen_ptr = plat_sdl_screen->pixels; + } else { g_screen_width = w; g_screen_height = h; g_screen_ppitch = w; - g_screen_ptr = plat_sdl_screen->pixels; } area = (struct area) { w, h }; } @@ -145,13 +163,17 @@ void plat_video_set_size(int w, int h) void plat_video_flip(void) { + resize_buffers(); + if (plat_sdl_overlay != NULL) { SDL_Rect dstrect = { 0, 0, plat_sdl_screen->w, plat_sdl_screen->h }; SDL_LockYUVOverlay(plat_sdl_overlay); - rgb565_to_uyvy(plat_sdl_overlay->pixels[0], shadow_fb, - area.w, area.h, g_screen_ppitch, - plat_sdl_overlay->w >= 2*area.w); + if (area.w <= plat_sdl_overlay->w && area.h <= plat_sdl_overlay->h) + rgb565_to_uyvy(plat_sdl_overlay->pixels[0], shadow_fb, + area.w, area.h, g_screen_ppitch, + plat_sdl_overlay->pitches[0]/2, + plat_sdl_overlay->w >= 2*area.w); SDL_UnlockYUVOverlay(plat_sdl_overlay); SDL_DisplayYUVOverlay(plat_sdl_overlay, &dstrect); } @@ -165,6 +187,7 @@ void plat_video_flip(void) SDL_LockSurface(plat_sdl_screen); } else SDL_Flip(plat_sdl_screen); + g_screen_ppitch = plat_sdl_screen->pitch/2; g_screen_ptr = plat_sdl_screen->pixels; plat_video_set_buffer(g_screen_ptr); if (clear_buf_cnt) { @@ -172,6 +195,14 @@ void plat_video_flip(void) clear_buf_cnt--; } } + + // for overlay/gl modes buffer ptr may change on resize + if ((plat_sdl_overlay || plat_sdl_gl_active) && + (g_screen_ptr != shadow_fb || g_screen_ppitch != g_screen_width)) { + g_screen_ppitch = g_screen_width; + g_screen_ptr = shadow_fb; + plat_video_set_buffer(g_screen_ptr); + } if (clear_stat_cnt) { unsigned short *d = (unsigned short *)g_screen_ptr + g_screen_ppitch * g_screen_height; int l = g_screen_ppitch * 8; @@ -191,7 +222,7 @@ void plat_video_clear_status(void) void plat_video_clear_buffers(void) { - if (plat_sdl_overlay != NULL || plat_sdl_gl_active) + if (plat_sdl_overlay || plat_sdl_gl_active) memset(shadow_fb, 0, g_menuscreen_w * g_menuscreen_h * 2); else { memset(g_screen_ptr, 0, plat_sdl_screen->w*plat_sdl_screen->h * 2); @@ -204,18 +235,20 @@ void plat_video_menu_enter(int is_rom_loaded) if (SDL_MUSTLOCK(plat_sdl_screen)) SDL_UnlockSurface(plat_sdl_screen); plat_sdl_change_video_mode(g_menuscreen_w, g_menuscreen_h, 1); - g_screen_ptr = shadow_fb; - plat_video_set_buffer(g_screen_ptr); + resize_buffers(); } void plat_video_menu_begin(void) { - if (plat_sdl_overlay != NULL || plat_sdl_gl_active) { + resize_buffers(); + if (plat_sdl_overlay || plat_sdl_gl_active) { + g_menuscreen_pp = g_menuscreen_w; g_menuscreen_ptr = shadow_fb; } else { if (SDL_MUSTLOCK(plat_sdl_screen)) SDL_LockSurface(plat_sdl_screen); + g_menuscreen_pp = plat_sdl_screen->pitch / 2; g_menuscreen_ptr = plat_sdl_screen->pixels; } } @@ -227,8 +260,11 @@ void plat_video_menu_end(void) { 0, 0, plat_sdl_screen->w, plat_sdl_screen->h }; SDL_LockYUVOverlay(plat_sdl_overlay); - rgb565_to_uyvy(plat_sdl_overlay->pixels[0], shadow_fb, - g_menuscreen_w, g_menuscreen_h, g_menuscreen_pp, 0); + if (g_menuscreen_w <= plat_sdl_overlay->w && g_menuscreen_h <= plat_sdl_overlay->h) + rgb565_to_uyvy(plat_sdl_overlay->pixels[0], shadow_fb, + g_menuscreen_w, g_menuscreen_h, g_menuscreen_pp, + plat_sdl_overlay->pitches[0]/2, + plat_sdl_overlay->w >= 2 * g_menuscreen_w); SDL_UnlockYUVOverlay(plat_sdl_overlay); SDL_DisplayYUVOverlay(plat_sdl_overlay, &dstrect); @@ -251,31 +287,43 @@ void plat_video_menu_leave(void) void plat_video_loop_prepare(void) { // take over any new vout settings - plat_sdl_change_video_mode(g_menuscreen_w, g_menuscreen_h, 0); + plat_sdl_change_video_mode(0, 0, 0); + area.w = g_menuscreen_w, area.h = g_menuscreen_h; + resize_buffers(); + // switch over to scaled output if available, but keep the aspect ratio - if (plat_sdl_overlay != NULL || plat_sdl_gl_active) { + if (plat_sdl_overlay || plat_sdl_gl_active) { g_screen_width = (240 * g_menuscreen_w / g_menuscreen_h) & ~1; g_screen_height = 240; g_screen_ppitch = g_screen_width; - plat_sdl_change_video_mode(g_screen_width, g_screen_height, 0); g_screen_ptr = shadow_fb; } else { - g_screen_width = g_menuscreen_w; - g_screen_height = g_menuscreen_h; - g_screen_ppitch = g_menuscreen_pp; + g_screen_width = plat_sdl_screen->w; + g_screen_height = plat_sdl_screen->h; + g_screen_ppitch = plat_sdl_screen->pitch/2; if (SDL_MUSTLOCK(plat_sdl_screen)) SDL_LockSurface(plat_sdl_screen); g_screen_ptr = plat_sdl_screen->pixels; } - plat_video_set_buffer(g_screen_ptr); + plat_video_set_size(g_screen_width, g_screen_height); + plat_video_set_buffer(g_screen_ptr); } void plat_early_init(void) { } +static void plat_sdl_resize(int w, int h) +{ + // take over new settings + g_menuscreen_h = (plat_sdl_screen->h < 480 ? plat_sdl_screen->h : 480); + if (!plat_sdl_overlay && !plat_sdl_gl_active) + g_menuscreen_h = plat_sdl_screen->h; + g_menuscreen_w = g_menuscreen_h * plat_sdl_screen->w/plat_sdl_screen->h; +} + static void plat_sdl_quit(void) { // for now.. @@ -284,24 +332,22 @@ static void plat_sdl_quit(void) void plat_init(void) { - int shadow_size; int ret; ret = plat_sdl_init(); if (ret != 0) exit(1); - SDL_ShowCursor(0); #if defined(__RG350__) || defined(__GCW0__) || defined(__OPENDINGUX__) // opendingux on JZ47x0 may falsely report a HW overlay, fix to window plat_target.vout_method = 0; #endif plat_sdl_quit_cb = plat_sdl_quit; + plat_sdl_resize_cb = plat_sdl_resize; + SDL_ShowCursor(0); SDL_WM_SetCaption("PicoDrive " VERSION, NULL); - g_menuscreen_w = plat_sdl_screen->w; - g_menuscreen_h = plat_sdl_screen->h; g_menuscreen_pp = g_menuscreen_w; g_menuscreen_ptr = NULL; diff --git a/platform/libpicofe b/platform/libpicofe index 81b1aa54..e3ea3015 160000 --- a/platform/libpicofe +++ b/platform/libpicofe @@ -1 +1 @@ -Subproject commit 81b1aa54a4d31f43f8f6d72c95a7898b2a871791 +Subproject commit e3ea3015f042cb3a1615dc3038b524ce0a063722 diff --git a/platform/linux/emu.c b/platform/linux/emu.c index 0d90ae52..7d2c6ba4 100644 --- a/platform/linux/emu.c +++ b/platform/linux/emu.c @@ -297,7 +297,6 @@ void pemu_forced_frame(int no_scale, int do_emu) Pico.m.dirtyPal = 1; if (currentConfig.scaling) currentConfig.scaling = EOPT_SCALE_SW; if (currentConfig.vscaling) currentConfig.vscaling = EOPT_SCALE_SW; - plat_video_set_size(g_menuscreen_w, g_menuscreen_h); // render a frame in 16 bit mode render_bg = 1; @@ -402,10 +401,18 @@ void emu_video_mode_change(int start_line, int line_count, int start_col, int co break; } - if (screen_w != g_screen_width || screen_h != g_screen_height) - plat_video_set_size(screen_w, screen_h); + plat_video_set_size(screen_w, screen_h); plat_video_set_buffer(g_screen_ptr); + if (screen_w < g_screen_width) + screen_x = (g_screen_width - screen_w)/2; + if (screen_h < g_screen_height) { + screen_y = (g_screen_height - screen_h)/2; + // NTSC always has 224 visible lines, anything smaller has bars + if (out_h < 224 && out_h > 144) + screen_y += (224 - out_h)/2; + } + // create a backing buffer for emulating the bad GG lcd display if (currentConfig.ghosting && out_h == 144) { int h = currentConfig.vscaling == EOPT_SCALE_SW ? 240:out_h; @@ -429,7 +436,11 @@ void pemu_loop_prep(void) void pemu_loop_end(void) { /* do one more frame for menu bg */ + plat_video_set_size(320, 240); pemu_forced_frame(0, 1); + g_menubg_src_w = g_screen_width; + g_menubg_src_h = g_screen_height; + g_menubg_src_pp = g_screen_ppitch; if (ghost_buf) { free(ghost_buf); ghost_buf = NULL; From f507a70379d01dc591f238525f151a396bb16e99 Mon Sep 17 00:00:00 2001 From: jSTE0 <98854293+jSTE0@users.noreply.github.com> Date: Fri, 18 Feb 2022 22:33:17 +0000 Subject: [PATCH 0729/1110] platform: Add miyoo support Add support for the miyoo platform, another SDL-based platform with a small cache CPU and its own input mappings. --- Makefile | 7 ++++++- README.md | 1 + configure | 11 +++++++++-- platform/opendingux/inputmap.c | 18 +++++++++++++++++- 4 files changed, 33 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index ce1938e4..c6174aea 100644 --- a/Makefile +++ b/Makefile @@ -49,7 +49,7 @@ endif LINKOUT ?= -o endif -ifeq ("$(PLATFORM)",$(filter "$(PLATFORM)","gp2x" "opendingux" "rpi1")) +ifeq ("$(PLATFORM)",$(filter "$(PLATFORM)","gp2x" "opendingux" "miyoo" "rpi1")) # very small caches, avoid optimization options making the binary much bigger CFLAGS += -finline-limit=42 -fno-unroll-loops -fno-ipa-cp -ffast-math # this gets you about 20% better execution speed on 32bit arm/mips @@ -117,6 +117,11 @@ use_inputmap ?= 1 # OpenDingux is a generic platform, really. PLATFORM := generic endif +ifeq "$(PLATFORM)" "miyoo" +OBJS += platform/opendingux/inputmap.o +use_inputmap ?= 1 +PLATFORM := generic +endif ifeq ("$(PLATFORM)",$(filter "$(PLATFORM)","rpi1" "rpi2")) CFLAGS += -DHAVE_GLES -DRASPBERRY CFLAGS += -I/opt/vc/include/ -I/opt/vc/include/interface/vcos/pthreads/ -I/opt/vc/include/interface/vmcs_host/linux/ diff --git a/README.md b/README.md index 3a5b3fe0..154ff510 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,7 @@ dingux|dingux with ubuntu mips gcc 5.4|CROSS_COMPILE=mipsel-linux-gnu- CFLAGS="- retrofw|retrofw|CROSS_COMPILE=mipsel-linux- CFLAGS="-I $TC/include -I $TC/include/SDL -Wno-unused-result" LDFLAGS="--sysroot $TC/mipsel-buildroot-linux-uclibc/sysroot" ./configure --platform=retrofw gcw0|gcw0|CROSS_COMPILE=mipsel-gcw0-linux-uclibc- CFLAGS="-I$TC/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include -I$TC/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include/SDL" LDFLAGS="--sysroot $TC/usr/mipsel-gcw0-linux-uclibc/sysroot" ./configure --platform=gcw0 rg350|rg350|CROSS_COMPILE=mipsel-linux- CFLAGS="-I$TC/usr/include -I$TC/usr/include/SDL" LDFLAGS="--sysroot $TC -L$TC/lib" ./configure --platform=rg350 +miyoo|miyoo|CROSS_COMPILE=arm-miyoo-linux-uclibcgnueabi- CFLAGS="-I$TC/arm-miyoo-linux-uclibcgnueabi/sysroot/usr/include -I$TC/arm-miyoo-linux-uclibcgnueabi/sysroot/usr/include/SDL" LDFLAGS="--sysroot $TC/arm-miyoo-linux-uclibcgnueabi/sysroot" ./configure --platform=miyoo For gp2x, wiz, and caanoo you may need to compile libpng first. diff --git a/configure b/configure index 4019c2ed..c67b9a0c 100755 --- a/configure +++ b/configure @@ -39,7 +39,7 @@ check_define() # "" means "autodetect". # TODO this is annoyingly messy. should have platform and device -platform_list="generic pandora gp2x wiz caanoo dingux retrofw gcw0 rg350 opendingux rpi1 rpi2 psp" +platform_list="generic pandora gp2x wiz caanoo dingux retrofw gcw0 rg350 opendingux miyoo rpi1 rpi2 psp" platform="generic" sound_driver_list="oss alsa sdl" sound_drivers="" @@ -106,6 +106,13 @@ set_platform() MFLAGS="-march=mips32r2" platform="opendingux" ;; + miyoo) + # Miyoo BittBoy, PocketGO 1, PowKiddy V90/Q90 with Allwinner F1C100s + sound_drivers="sdl" + CFLAGS="$CFLAGS -D__MIYOO__" + MFLAGS="-mcpu=arm926ej-s -marm" + platform="miyoo" + ;; pandora) sound_drivers="oss alsa" have_libavcodec="yes" @@ -251,7 +258,7 @@ arm*) esac case "$platform" in -rpi1 | rpi2 | generic | opendingux) +rpi1 | rpi2 | generic | opendingux | miyoo) need_sdl="yes" ;; esac diff --git a/platform/opendingux/inputmap.c b/platform/opendingux/inputmap.c index 7c8c348f..b8851715 100644 --- a/platform/opendingux/inputmap.c +++ b/platform/opendingux/inputmap.c @@ -29,8 +29,13 @@ const struct menu_keymap in_sdl_key_map[] = { { SDLK_DOWN, PBTN_DOWN }, { SDLK_LEFT, PBTN_LEFT }, { SDLK_RIGHT, PBTN_RIGHT }, +#if defined(__MIYOO__) + { SDLK_LALT, PBTN_MOK }, + { SDLK_LCTRL, PBTN_MBACK }, +#else { SDLK_LCTRL, PBTN_MOK }, { SDLK_LALT, PBTN_MBACK }, +#endif { SDLK_SPACE, PBTN_MA2 }, { SDLK_LSHIFT, PBTN_MA3 }, { SDLK_TAB, PBTN_L }, @@ -56,9 +61,14 @@ const char * const _in_sdl_key_names[SDLK_LAST] = { [SDLK_DOWN] = "DOWN", [SDLK_LEFT] = "LEFT", [SDLK_RIGHT] = "RIGHT", +#if defined(__MIYOO__) + [SDLK_LALT] = "A", + [SDLK_LCTRL] = "B", +#else [SDLK_LCTRL] = "A", [SDLK_LALT] = "B", -#if defined(__GCW0__) +#endif +#if defined(__GCW0__) || defined(__MIYOO__) [SDLK_LSHIFT] = "X", [SDLK_SPACE] = "Y", #else @@ -77,6 +87,12 @@ const char * const _in_sdl_key_names[SDLK_LAST] = { [SDLK_PAGEDOWN] = "R2", [SDLK_KP_DIVIDE] = "L3", [SDLK_KP_PERIOD] = "R3", +#elif defined(__MIYOO__) + [SDLK_TAB] = "L1", + [SDLK_BACKSPACE] = "R1", + [SDLK_RALT] = "L2", + [SDLK_RSHIFT] = "R2", + [SDLK_RCTRL] = "R", #else [SDLK_TAB] = "L", [SDLK_BACKSPACE] = "R", From f591b8378506e12bf184b4637e4f4112f089c09a Mon Sep 17 00:00:00 2001 From: kub Date: Sun, 6 Mar 2022 22:18:52 +0000 Subject: [PATCH 0730/1110] core vdp, minor fifo write optimisation --- pico/videoport.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/pico/videoport.c b/pico/videoport.c index 6c99fcf8..279347b9 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -345,20 +345,18 @@ int PicoVideoFIFOWrite(int count, int flags, unsigned sr_mask,unsigned sr_flags) int burn = 0, x; // sync only needed if queue is too full or background dma might be deferred - if ((vf->fifo_ql >= 6) | (pv->status & SR_DMA)) + if ((vf->fifo_ql >= 6) | (pv->status & PVS_DMABG)) PicoVideoFIFOSync(lc); + + // determine last ent, ignoring bg dma (pushed back below if new ent created) + x = (vf->fifo_qx + vf->fifo_ql - 1 - !!(pv->status & PVS_DMABG)) & 7; + pv->status = (pv->status & ~sr_mask) | sr_flags; - - x = (vf->fifo_qx + vf->fifo_ql - 1) & 7; - if (unlikely(vf->fifo_queue[x] & FQ_BGDMA)) - x = (x-1) & 7; // ignore bg dma ent (pushed back below if new ent created) - - // determine queue position for entry - if (!(flags & FQ_BGDMA)) - vf->fifo_total += count; + vf->fifo_total += count * !(flags & FQ_BGDMA); if (!vf->fifo_ql) vf->fifo_slot = Cyc2Sl(vf, lc+7); // FIFO latency ~3 vdp slots + // determine queue position for entry count <<= (flags & FQ_BYTE)+3; if (vf->fifo_queue[x] && (vf->fifo_queue[x] & 7) == flags) { // amalgamate entries if of same type and not empty (in case of bgdma) From d26d4c29652003d39a139775c122217ac95000a8 Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 11 Mar 2022 21:06:40 +0000 Subject: [PATCH 0731/1110] build, add miyoo to release script --- Makefile | 10 ++++++++++ tools/release.sh | 8 ++++++++ 2 files changed, 18 insertions(+) diff --git a/Makefile b/Makefile index c6174aea..c3f5b728 100644 --- a/Makefile +++ b/Makefile @@ -118,6 +118,16 @@ use_inputmap ?= 1 PLATFORM := generic endif ifeq "$(PLATFORM)" "miyoo" +$(TARGET).zip: $(TARGET) + $(RM) -rf .od_data + cp -r platform/opendingux/data/. .od_data + cp platform/game_def.cfg .od_data + cp $< .od_data/PicoDrive + $(STRIP) .od_data/PicoDrive + rm -f .od_data/default.*.desktop .od_data/PicoDrive.dge + cd .od_data && zip -9 -r ../$@ * +all: $(TARGET).zip + OBJS += platform/opendingux/inputmap.o use_inputmap ?= 1 PLATFORM := generic diff --git a/tools/release.sh b/tools/release.sh index c17a0eb7..3731ef9f 100755 --- a/tools/release.sh +++ b/tools/release.sh @@ -14,6 +14,7 @@ # retrofw: mipsel-linux-uclibc (the retrofw toolchain) # gcw0: gcw0-toolchain; mips-toolchain (buildroot, newer gcc) # rg350: rg350-toolchain; mips-toolchain (buildroot, newer gcc) +# miyoo: miyoo # psp: pspdev # additionally needed libs for toolchains in $HOME/opt/lib and $HOME/opt/lib64 @@ -82,6 +83,13 @@ PATH=$HOME/opt/mips-toolchain/usr/bin:$PATH make clean all mv PicoDrive.opk release-$rel/PicoDrive-opendingux_$rel.opk } +[ -z "${plat##* miyoo *}" ] && { +# miyoo: BittBoy >=v1, PocketGo, Powkiddy [QV]90/Q20 (Allwinner F1C100s, ARM926) +TC=$HOME/opt/miyoo PATH=$TC/bin:$PATH CROSS_COMPILE=arm-miyoo-linux-uclibcgnueabi- CFLAGS="-I $TC/arm-miyoo-linux-uclibcgnueabi/sysroot/usr/include -I $TC/arm-miyoo-linux-uclibcgnueabi/sysroot/usr/include/SDL -Wno-unused-result" LDFLAGS="--sysroot $TC/arm-miyoo-linux-uclibcgnueabi/sysroot" ./configure --platform=miyoo +TC=$HOME/opt/miyoo PATH=$TC/bin:$PATH make clean all +mv PicoDrive.zip release-$rel/PicoDrive-miyoo_$rel.zip +} + [ -z "${plat##* psp *}" ] && { # psp (experimental), pspdev SDK toolchain TC=$HOME/opt/pspdev PATH=$TC/bin:$PATH CROSS_COMPILE=psp- CFLAGS="-I$TC/psp/sdk/include -D_POSIX_C_SOURCE=199506L" LDFLAGS="-L$TC/psp/sdk/lib" ./configure --platform=psp From 882f697ad4fce9005b5bd25a01bfc8497f4d533c Mon Sep 17 00:00:00 2001 From: kub Date: Sun, 6 Mar 2022 20:40:50 +0000 Subject: [PATCH 0732/1110] sound, add native rate mode, change resampling --- pico/pico_int.h | 2 + pico/sound/sound.c | 2 +- pico/sound/ym2612.c | 126 +++++++++------------- pico/sound/ym2612_arm.S | 83 ++------------ platform/common/config_file.c | 2 +- platform/common/emu.c | 5 +- platform/common/menu_pico.c | 20 ++-- platform/gp2x/emu.c | 2 +- platform/libretro/libretro.c | 10 +- platform/libretro/libretro_core_options.h | 3 +- platform/psp/emu.c | 2 + 11 files changed, 90 insertions(+), 167 deletions(-) diff --git a/pico/pico_int.h b/pico/pico_int.h index 85df6c5b..e95060b0 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -853,6 +853,8 @@ extern short cdda_out_buffer[2*1152]; void cdda_start_play(int lba_base, int lba_offset, int lb_len); +#define YM2612_NATIVE_RATE() (((Pico.m.pal?OSC_PAL:OSC_NTSC)/7 + 3*24) / (6*24)) + void ym2612_sync_timers(int z80_cycles, int mode_old, int mode_new); void ym2612_pack_state(void); void ym2612_unpack_state(void); diff --git a/pico/sound/sound.c b/pico/sound/sound.c index 06d3625a..591a0299 100644 --- a/pico/sound/sound.c +++ b/pico/sound/sound.c @@ -18,7 +18,7 @@ void (*PsndMix_32_to_16l)(s16 *dest, s32 *src, int count) = mix_32_to_16l_stereo // master int buffer to mix to // +1 for a fill triggered by an instruction overhanging into the next scanline -static s32 PsndBuffer[2*(44100+100)/50+2]; +static s32 PsndBuffer[2*(53267+100)/50+2]; // cdda output buffer s16 cdda_out_buffer[2*1152]; diff --git a/pico/sound/ym2612.c b/pico/sound/ym2612.c index 95dca6a9..e7e3a8a8 100644 --- a/pico/sound/ym2612.c +++ b/pico/sound/ym2612.c @@ -974,7 +974,6 @@ static int update_algo_channel(chan_rend_context *ct, unsigned int eg_out, unsig ct->mem = op_calc(ct->phase2, eg_out2, c1); } else ct->mem = 0; - if (ct->eg_timer >= (1<phase3, eg_out, m2); @@ -993,7 +992,6 @@ static int update_algo_channel(chan_rend_context *ct, unsigned int eg_out, unsig if( eg_out2 < ENV_QUIET ) { /* SLOT 2 */ ct->mem+= op_calc(ct->phase2, eg_out2, 0); } - if (ct->eg_timer >= (1<phase3, eg_out, m2); @@ -1013,7 +1011,6 @@ static int update_algo_channel(chan_rend_context *ct, unsigned int eg_out, unsig ct->mem = op_calc(ct->phase2, eg_out2, 0); } else ct->mem = 0; - if (ct->eg_timer >= (1<phase3, eg_out, m2); @@ -1033,7 +1030,6 @@ static int update_algo_channel(chan_rend_context *ct, unsigned int eg_out, unsig ct->mem = op_calc(ct->phase2, eg_out2, c1); } else ct->mem = 0; - if (ct->eg_timer >= (1<phase3, eg_out, 0); @@ -1048,7 +1044,6 @@ static int update_algo_channel(chan_rend_context *ct, unsigned int eg_out, unsig /* M1---C1-+-OUT */ /* M2---C2-+ */ /* MEM: not used */ - if (ct->eg_timer >= (1<op1_out>>16; if( eg_out < ENV_QUIET ) { /* SLOT 3 */ @@ -1069,7 +1064,6 @@ static int update_algo_channel(chan_rend_context *ct, unsigned int eg_out, unsig /* +----C2----+ */ m2 = ct->mem; ct->mem = c1 = c2 = ct->op1_out>>16; - if (ct->eg_timer >= (1<phase3, eg_out, m2); @@ -1088,7 +1082,6 @@ static int update_algo_channel(chan_rend_context *ct, unsigned int eg_out, unsig /* M2-+-OUT */ /* C2-+ */ /* MEM: not used */ - if (ct->eg_timer >= (1<op1_out>>16; if( eg_out < ENV_QUIET ) { /* SLOT 3 */ @@ -1109,7 +1102,6 @@ static int update_algo_channel(chan_rend_context *ct, unsigned int eg_out, unsig /* M2-+ */ /* C2-+ */ /* MEM: not used*/ - if (ct->eg_timer >= (1<op1_out>>16; if( eg_out < ENV_QUIET ) { /* SLOT 3 */ @@ -1139,20 +1131,6 @@ static void chan_render_loop(chan_rend_context *ct, s32 *buffer, int length) ct->eg_timer += ct->eg_timer_add; - if (ct->eg_timer >= 3<pack&0xf000)) { - int cnt = (ct->eg_timer>>EG_SH)-2; - if (ct->pack & 8) { /* LFO enabled ? (test Earthworm Jim in between demo 1 and 2) */ - int inc = cnt*ct->lfo_inc; - ct->pack = (ct->pack&0xffff) | (advance_lfo(ct->pack >> 16, ct->lfo_cnt, ct->lfo_cnt + inc) << 16); - ct->lfo_cnt += inc; - } - - ct->phase1 += cnt*ct->incr1; - ct->phase2 += cnt*ct->incr2; - ct->phase3 += cnt*ct->incr3; - ct->phase4 += cnt*ct->incr4; - } - while (ct->eg_timer >= 1<eg_timer -= 1<vol_out1 = ct->CH->SLOT[SLOT1].vol_out; - ct->vol_out2 = ct->CH->SLOT[SLOT2].vol_out; - ct->vol_out3 = ct->CH->SLOT[SLOT3].vol_out; - ct->vol_out4 = ct->CH->SLOT[SLOT4].vol_out; - - if (ct->eg_timer < (2<pack&0xf000)) { - if (ct->pack & 4) goto disabled; /* output disabled */ - - if (ct->pack & 8) { /* LFO enabled ? (test Earthworm Jim in between demo 1 and 2) */ - ct->pack = (ct->pack&0xffff) | (advance_lfo(ct->pack >> 16, ct->lfo_cnt, ct->lfo_cnt + ct->lfo_inc) << 16); - ct->lfo_cnt += ct->lfo_inc; - } - - /* calculate channel sample */ - eg_out = ct->vol_out1; - if ( (ct->pack & 8) && (ct->pack&(1<<(SLOT1+8))) ) - eg_out += ct->pack >> (((ct->pack&0xc0)>>6)+24); - - if( eg_out < ENV_QUIET ) /* SLOT 1 */ - { - int out = 0; - - if (ct->pack&0xf000) out = ((ct->op1_out + (ct->op1_out<<16))>>16) << ((ct->pack&0xf000)>>12); /* op1_out0 + op1_out1 */ - ct->op1_out <<= 16; - ct->op1_out |= (unsigned short)op_calc1(ct->phase1, eg_out, out); - } else { - ct->op1_out <<= 16; /* op1_out0 = op1_out1; op1_out1 = 0; */ - } - - if (ct->eg_timer < (2<vol_out3; // volume_calc(&CH->SLOT[SLOT3]); - eg_out2 = ct->vol_out2; // volume_calc(&CH->SLOT[SLOT2]); - eg_out4 = ct->vol_out4; // volume_calc(&CH->SLOT[SLOT4]); - - if (ct->pack & 8) { - unsigned int add = ct->pack >> (((ct->pack&0xc0)>>6)+24); - if (ct->pack & (1<<(SLOT3+8))) eg_out += add; - if (ct->pack & (1<<(SLOT2+8))) eg_out2 += add; - if (ct->pack & (1<<(SLOT4+8))) eg_out4 += add; - } - - smp = update_algo_channel(ct, eg_out, eg_out2, eg_out4); - } - /* done calculating channel sample */ - -disabled: - /* update phase counters AFTER output calculations */ - ct->phase1 += ct->incr1; - ct->phase2 += ct->incr2; - ct->phase3 += ct->incr3; - ct->phase4 += ct->incr4; - } - } + ct->vol_out1 = ct->CH->SLOT[SLOT1].vol_out; + ct->vol_out2 = ct->CH->SLOT[SLOT2].vol_out; + ct->vol_out3 = ct->CH->SLOT[SLOT3].vol_out; + ct->vol_out4 = ct->CH->SLOT[SLOT4].vol_out; + + if (ct->pack & 4) goto disabled; /* output disabled */ + + if (ct->pack & 8) { /* LFO enabled ? (test Earthworm Jim in between demo 1 and 2) */ + ct->pack = (ct->pack&0xffff) | (advance_lfo(ct->pack >> 16, ct->lfo_cnt, ct->lfo_cnt + ct->lfo_inc) << 16); + ct->lfo_cnt += ct->lfo_inc; + } + + /* calculate channel sample */ + eg_out = ct->vol_out1; + if ( (ct->pack & 8) && (ct->pack&(1<<(SLOT1+8))) ) + eg_out += ct->pack >> (((ct->pack&0xc0)>>6)+24); + + if( eg_out < ENV_QUIET ) /* SLOT 1 */ + { + int out = 0; + + if (ct->pack&0xf000) out = ((ct->op1_out + (ct->op1_out<<16))>>16) << ((ct->pack&0xf000)>>12); /* op1_out0 + op1_out1 */ + ct->op1_out <<= 16; + ct->op1_out |= (unsigned short)op_calc1(ct->phase1, eg_out, out); + } else { + ct->op1_out <<= 16; /* op1_out0 = op1_out1; op1_out1 = 0; */ + } + + eg_out = ct->vol_out3; // volume_calc(&CH->SLOT[SLOT3]); + eg_out2 = ct->vol_out2; // volume_calc(&CH->SLOT[SLOT2]); + eg_out4 = ct->vol_out4; // volume_calc(&CH->SLOT[SLOT4]); + + if (ct->pack & 8) { + unsigned int add = ct->pack >> (((ct->pack&0xc0)>>6)+24); + if (ct->pack & (1<<(SLOT3+8))) eg_out += add; + if (ct->pack & (1<<(SLOT2+8))) eg_out2 += add; + if (ct->pack & (1<<(SLOT4+8))) eg_out4 += add; + } + + smp = update_algo_channel(ct, eg_out, eg_out2, eg_out4); + /* done calculating channel sample */ +disabled: + /* update phase counters AFTER output calculations */ + ct->phase1 += ct->incr1; + ct->phase2 += ct->incr2; + ct->phase3 += ct->incr3; + ct->phase4 += ct->incr4; + /* mix sample to output buffer */ if (smp) { smp = clip(smp); /* saturate to 14 bit */ @@ -1615,7 +1587,7 @@ static void OPNSetPres(int pres) double freqbase = (ym2612.OPN.ST.rate) ? ((double)ym2612.OPN.ST.clock / ym2612.OPN.ST.rate) / pres : 0; ym2612.OPN.eg_timer_add = (1< 44100) + if (PicoIn.sndRate < 8000 || PicoIn.sndRate > 53267) PicoIn.sndRate = 22050; if (*tmp == 'H' || *tmp == 'h') tmp++; if (*tmp == 'Z' || *tmp == 'z') tmp++; diff --git a/platform/common/emu.c b/platform/common/emu.c index b233050a..63a10a38 100644 --- a/platform/common/emu.c +++ b/platform/common/emu.c @@ -57,7 +57,7 @@ int pico_inp_mode; int flip_after_sync; int engineState = PGS_Menu; -static short __attribute__((aligned(4))) sndBuffer[2*44100/50]; +static short __attribute__((aligned(4))) sndBuffer[2*53267/50]; /* tmp buff to reduce stack usage for plats with small stack */ static char static_buff[512]; @@ -1328,6 +1328,9 @@ void emu_sound_start(void) { PicoIn.sndOut = NULL; + // auto-select rate? + if (PicoIn.sndRate > 52000) + PicoIn.sndRate = YM2612_NATIVE_RATE(); if (currentConfig.EmuOpt & EOPT_EN_SOUND) { int is_stereo = (PicoIn.opt & POPT_EN_STEREO) ? 1 : 0; diff --git a/platform/common/menu_pico.c b/platform/common/menu_pico.c index 65e08a01..fc9e769e 100644 --- a/platform/common/menu_pico.c +++ b/platform/common/menu_pico.c @@ -595,24 +595,24 @@ static int menu_loop_adv_options(int id, int keys) static int sndrate_prevnext(int rate, int dir) { - static const int rates[] = { 8000, 11025, 16000, 22050, 44100 }; + static const int rates[] = { 8000, 11025, 16000, 22050, 44100, 53000 }; int i; - for (i = 0; i < 5; i++) + for (i = 0; i < 6; i++) if (rates[i] == rate) break; i += dir ? 1 : -1; - if (i > 4) { + if (i > 5) { if (!(PicoIn.opt & POPT_EN_STEREO)) { PicoIn.opt |= POPT_EN_STEREO; return rates[0]; } - return rates[4]; + return rates[5]; } if (i < 0) { if (PicoIn.opt & POPT_EN_STEREO) { PicoIn.opt &= ~POPT_EN_STEREO; - return rates[4]; + return rates[5]; } return rates[0]; } @@ -630,7 +630,9 @@ static const char *mgn_opt_sound(int id, int *offs) const char *str2; *offs = -8; str2 = (PicoIn.opt & POPT_EN_STEREO) ? "stereo" : "mono"; - sprintf(static_buff, "%5iHz %s", PicoIn.sndRate, str2); + if (PicoIn.sndRate > 52000) + sprintf(static_buff, "native %s\n", str2); + else sprintf(static_buff, "%5iHz %s", PicoIn.sndRate, str2); return static_buff; } @@ -652,12 +654,14 @@ static const char *mgn_opt_alpha(int id, int *offs) return static_buff; } +static const char h_quality[] = "native is the FM sound chip rate (53267/52781 Hz),\n" + "select this for the best FM sound quality"; static const char h_lowpass[] = "Low pass filter for sound closer to real hardware"; static menu_entry e_menu_snd_options[] = { mee_onoff ("Enable sound", MA_OPT_ENABLE_SOUND, currentConfig.EmuOpt, EOPT_EN_SOUND), - mee_cust ("Sound Quality", MA_OPT_SOUND_QUALITY, mh_opt_snd, mgn_opt_sound), + mee_cust_h ("Sound Quality", MA_OPT_SOUND_QUALITY, mh_opt_snd, mgn_opt_sound, h_quality), mee_onoff_h ("Sound filter", MA_OPT_SOUND_FILTER, PicoIn.opt, POPT_EN_SNDFILTER, h_lowpass), mee_cust ("Filter strength", MA_OPT_SOUND_ALPHA, mh_opt_alpha, mgn_opt_alpha), mee_end, @@ -667,6 +671,8 @@ static int menu_loop_snd_options(int id, int keys) { static int sel = 0; + if (PicoIn.sndRate > 52000) + PicoIn.sndRate = 53000; me_loop(e_menu_snd_options, &sel); return 0; diff --git a/platform/gp2x/emu.c b/platform/gp2x/emu.c index 37ee82cf..92ea2ec6 100644 --- a/platform/gp2x/emu.c +++ b/platform/gp2x/emu.c @@ -731,7 +731,7 @@ void pemu_sound_start(void) } } -static const int sound_rates[] = { 44100, 32000, 22050, 16000, 11025, 8000 }; +static const int sound_rates[] = { 53000, 44100, 32000, 22050, 16000, 11025, 8000 }; void pemu_sound_stop(void) { diff --git a/platform/libretro/libretro.c b/platform/libretro/libretro.c index 99fc826a..00cf4da5 100644 --- a/platform/libretro/libretro.c +++ b/platform/libretro/libretro.c @@ -1316,6 +1316,8 @@ bool retro_load_game(const struct retro_game_info *info) PicoIn.writeSound = snd_write; memset(sndBuffer, 0, sizeof(sndBuffer)); PicoIn.sndOut = sndBuffer; + if (PicoIn.sndRate > 52000) + PicoIn.sndRate = YM2612_NATIVE_RATE(); PsndRerate(0); apply_renderer(); @@ -1566,7 +1568,9 @@ static void update_variables(bool first_run) { PicoDetectRegion(); PicoLoopPrepare(); - PsndRerate(1); + if (PicoIn.sndRate > 52000) + PicoIn.sndRate = YM2612_NATIVE_RATE(); + PsndRerate(!first_run); } old_vout_aspect = vout_aspect; @@ -1687,10 +1691,12 @@ static void update_variables(bool first_run) var.key = "picodrive_sound_rate"; if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { new_sound_rate = atoi(var.value); + if (!strcmp(var.value, "native")) + new_sound_rate = YM2612_NATIVE_RATE(); if (new_sound_rate != PicoIn.sndRate) { /* Update the sound rate */ PicoIn.sndRate = new_sound_rate; - PsndRerate(1); + PsndRerate(!first_run); struct retro_system_av_info av_info; retro_get_system_av_info(&av_info); environ_cb(RETRO_ENVIRONMENT_SET_SYSTEM_AV_INFO, &av_info); diff --git a/platform/libretro/libretro_core_options.h b/platform/libretro/libretro_core_options.h index 6e627d49..4a8a9d47 100644 --- a/platform/libretro/libretro_core_options.h +++ b/platform/libretro/libretro_core_options.h @@ -207,7 +207,7 @@ struct retro_core_option_v2_definition option_defs_us[] = { "picodrive_sound_rate", "Audio Sample Rate (Hz)", "Sample Rate (Hz)", - "Higher values increase sound quality. Lower values may increase performance.", + "Higher values increase sound quality. Lower values may increase performance. Native is the FM sound chip rate, either 53267 Hz for NTSC or 52781 Hz for PAL. Select this if you want the most accurate audio.", NULL, "audio", { @@ -215,6 +215,7 @@ struct retro_core_option_v2_definition option_defs_us[] = { { "22050", NULL }, { "32000", NULL }, { "44100", NULL }, + { "native", NULL }, { NULL, NULL }, }, "44100" diff --git a/platform/psp/emu.c b/platform/psp/emu.c index b6d1a346..9c86203f 100644 --- a/platform/psp/emu.c +++ b/platform/psp/emu.c @@ -487,6 +487,8 @@ void pemu_sound_start(void) } } + if (PicoIn.sndRate > 52000) + PicoIn.sndRate = YM2612_NATIVE_RATE(); ret = POPT_EN_FM|POPT_EN_PSG|POPT_EN_STEREO; if (PicoIn.sndRate != PsndRate_old || (PicoIn.opt&ret) != (PicoOpt_old&ret) || Pico.m.pal != pal_old) { PsndRerate(Pico.m.frame_count ? 1 : 0); From e2e2b6ad1bc26ae05a10b5ae7c9acaeecfc0d899 Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 31 Mar 2022 17:27:49 +0000 Subject: [PATCH 0733/1110] sound, prepare FM filtering --- pico/pico.h | 1 + pico/pico_int.h | 1 + pico/sound/blipper.c | 540 +++++++++++++++++++++++++++++++++++++ pico/sound/blipper.h | 195 ++++++++++++++ pico/sound/resampler.c | 261 ++++++++++++++++++ pico/sound/resampler.h | 44 +++ pico/sound/sound.c | 140 +++++++++- platform/common/common.mak | 1 + 8 files changed, 1180 insertions(+), 3 deletions(-) create mode 100644 pico/sound/blipper.c create mode 100644 pico/sound/blipper.h create mode 100644 pico/sound/resampler.c create mode 100644 pico/sound/resampler.h diff --git a/pico/pico.h b/pico/pico.h index dc596796..1baf4438 100644 --- a/pico/pico.h +++ b/pico/pico.h @@ -76,6 +76,7 @@ extern void *p32x_bios_g, *p32x_bios_m, *p32x_bios_s; #define POPT_PWM_IRQ_OPT (1<<22) #define POPT_DIS_FM_SSGEG (1<<23) #define POPT_EN_FM_DAC (1<<24) //x00 0000 +#define POPT_EN_FM_FILTER (1<<25) #define PAHW_MCD (1<<0) #define PAHW_32X (1<<1) diff --git a/pico/pico_int.h b/pico/pico_int.h index e95060b0..837953cf 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -465,6 +465,7 @@ struct PicoSound unsigned int fm_pos; // last FM position in Q20 unsigned int psg_pos; // last PSG position in Q16 unsigned int ym2413_pos; // last YM2413 position + unsigned int fm_fir_mul, fm_fir_div; // ratio for FM resampling FIR }; // run tools/mkoffsets pico/pico_int_offs.h if you change these diff --git a/pico/sound/blipper.c b/pico/sound/blipper.c new file mode 100644 index 00000000..72744718 --- /dev/null +++ b/pico/sound/blipper.c @@ -0,0 +1,540 @@ +/* + * Copyright (C) 2013 - Hans-Kristian Arntzen + * + * Permission is hereby granted, free of charge, + * to any person obtaining a copy of this software and + * associated documentation files (the "Software"), + * to deal in the Software without restriction, + * including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * + * 03-2022 kub: modified for arbitrary decimation rates + * 03-2022 kub: modified for 32 bit sample size + */ + +#include "blipper.h" + +#include +#include +#include +#include + +#define BLIPPER_FILTER_AMP 0.75 + +#if BLIPPER_LOG_PERFORMANCE +#include +static double get_time(void) +{ + struct timespec tv; + clock_gettime(CLOCK_MONOTONIC, &tv); + return tv.tv_sec + tv.tv_nsec / 1000000000.0; +} +#endif + +struct blipper +{ + blipper_long_sample_t *output_buffer; + unsigned output_avail; + unsigned output_buffer_samples; + + blipper_sample_t *filter_bank; + + unsigned phase; + unsigned phases; + unsigned phases_div; + unsigned taps; + + blipper_long_sample_t integrator; + blipper_long_sample_t ramp; + blipper_long_sample_t last_sample; + +#if BLIPPER_LOG_PERFORMANCE + double total_time; + double integrator_time; + unsigned long total_samples; +#endif + + int owns_filter; +}; + +void blipper_free(blipper_t *blip) +{ + if (blip) + { +#if BLIPPER_LOG_PERFORMANCE + fprintf(stderr, "[blipper]: Processed %lu samples, using %.6f seconds blipping and %.6f seconds integrating.\n", blip->total_samples, blip->total_time, blip->integrator_time); +#endif + + if (blip->owns_filter) + free(blip->filter_bank); + free(blip->output_buffer); + free(blip); + } +} + +static double besseli0(double x) +{ + unsigned i; + double sum = 0.0; + + double factorial = 1.0; + double factorial_mult = 0.0; + double x_pow = 1.0; + double two_div_pow = 1.0; + double x_sqr = x * x; + + /* Approximate. This is an infinite sum. + * Luckily, it converges rather fast. */ + for (i = 0; i < 18; i++) + { + sum += x_pow * two_div_pow / (factorial * factorial); + + factorial_mult += 1.0; + x_pow *= x_sqr; + two_div_pow *= 0.25; + factorial *= factorial_mult; + } + + return sum; +} + +static double sinc(double v) +{ + if (fabs(v) < 0.00001) + return 1.0; + else + return sin(v) / v; +} + +/* index range = [-1, 1) */ +static double kaiser_window(double index, double beta) +{ + return besseli0(beta * sqrt(1.0 - index * index)); +} + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +static blipper_real_t *blipper_create_sinc(unsigned phases, unsigned taps, + double cutoff, double beta) +{ + unsigned i, filter_len; + double sidelobes, window_mod, window_phase, sinc_phase; + blipper_real_t *filter; + + filter = (blipper_real_t*)malloc(phases * taps * sizeof(*filter)); + if (!filter) + return NULL; + + sidelobes = taps / 2.0; + window_mod = 1.0 / kaiser_window(0.0, beta); + filter_len = phases * taps; + for (i = 0; i < filter_len; i++) + { + window_phase = (double)i / filter_len; /* [0, 1) */ + window_phase = 2.0 * window_phase - 1.0; /* [-1, 1) */ + sinc_phase = window_phase * sidelobes; /* [-taps / 2, taps / 2) */ + + filter[i] = cutoff * sinc(M_PI * sinc_phase * cutoff) * + kaiser_window(window_phase, beta) * window_mod; + } + + return filter; +} + +void blipper_set_ramp(blipper_t *blip, blipper_long_sample_t delta, + unsigned clocks) +{ + blipper_real_t ramp = BLIPPER_FILTER_AMP * delta * blip->phases / clocks; +#if BLIPPER_FIXED_POINT + blip->ramp = (blipper_long_sample_t)floor(ramp * 0x8000 + 0.5); +#else + blip->ramp = ramp; +#endif +} + +/* We differentiate and integrate at different sample rates. + * Differentiation is D(z) = 1 - z^-1 and happens when delta impulses + * are convolved. Integration step after decimation by D is 1 / (1 - z^-D). + * + * If our sinc filter is S(z) we'd have a response of + * S(z) * (1 - z^-1) / (1 - z^-D) after blipping. + * + * Compensate by prefiltering S(z) with the inverse (1 - z^-D) / (1 - z^-1). + * This filtering creates a finite length filter, albeit slightly longer. + * + * phases is the same as decimation rate. */ +static blipper_real_t *blipper_prefilter_sinc(blipper_real_t *filter, unsigned phases, + unsigned taps) +{ + unsigned i; + float filter_amp = BLIPPER_FILTER_AMP / phases; + blipper_real_t *tmp_filter; + blipper_real_t *new_filter = (blipper_real_t*)malloc((phases * taps + phases) * sizeof(*filter)); + if (!new_filter) + goto error; + + tmp_filter = (blipper_real_t*)realloc(filter, (phases * taps + phases) * sizeof(*filter)); + if (!tmp_filter) + goto error; + filter = tmp_filter; + + /* Integrate. */ + new_filter[0] = filter[0]; + for (i = 1; i < phases * taps; i++) + new_filter[i] = new_filter[i - 1] + filter[i]; + for (i = phases * taps; i < phases * taps + phases; i++) + new_filter[i] = new_filter[phases * taps - 1]; + + taps++; + + /* Differentiate with offset of D. */ + memcpy(filter, new_filter, phases * sizeof(*filter)); + for (i = phases; i < phases * taps; i++) + filter[i] = new_filter[i] - new_filter[i - phases]; + + /* blipper_prefilter_sinc() boosts the gain of the sinc. + * Have to compensate for this. Attenuate a bit more to ensure + * we don't clip, especially in fixed point. */ + for (i = 0; i < phases * taps; i++) + filter[i] *= filter_amp; + + free(new_filter); + return filter; + +error: + free(new_filter); + free(filter); + return NULL; +} + +/* Creates a polyphase filter bank. + * Interleaves the filter for cache coherency and possibilities + * for SIMD processing. */ +static blipper_real_t *blipper_interleave_sinc(blipper_real_t *filter, unsigned phases, + unsigned taps) +{ + unsigned t, p; + blipper_real_t *new_filter = (blipper_real_t*)malloc(phases * taps * sizeof(*filter)); + if (!new_filter) + goto error; + + for (t = 0; t < taps; t++) + for (p = 0; p < phases; p++) + new_filter[p * taps + t] = filter[t * phases + p]; + + free(filter); + return new_filter; + +error: + free(new_filter); + free(filter); + return NULL; +} + +#if BLIPPER_FIXED_POINT +static blipper_sample_t *blipper_quantize_sinc(blipper_real_t *filter, unsigned taps) +{ + unsigned t; + blipper_sample_t *filt = (blipper_sample_t*)malloc(taps * sizeof(*filt)); + if (!filt) + goto error; + + for (t = 0; t < taps; t++) + filt[t] = (blipper_sample_t)floor(filter[t] * 0x7fff + 0.5); + + free(filter); + return filt; + +error: + free(filter); + free(filt); + return NULL; +} +#endif + +blipper_sample_t *blipper_create_filter_bank(unsigned phases, unsigned taps, + double cutoff, double beta) +{ + blipper_real_t *sinc_filter; + + /* blipper_prefilter_sinc() will add one tap. + * To keep number of taps as expected, compensate for it here + * to keep the interface more obvious. */ + if (taps <= 1) + return 0; + taps--; + + sinc_filter = blipper_create_sinc(phases, taps, cutoff, beta); + if (!sinc_filter) + return 0; + + sinc_filter = blipper_prefilter_sinc(sinc_filter, phases, taps); + if (!sinc_filter) + return 0; + taps++; + + sinc_filter = blipper_interleave_sinc(sinc_filter, phases, taps); + if (!sinc_filter) + return 0; + +#if BLIPPER_FIXED_POINT + return blipper_quantize_sinc(sinc_filter, phases * taps); +#else + return sinc_filter; +#endif +} + +void blipper_reset(blipper_t *blip) +{ + blip->phase = 0; + memset(blip->output_buffer, 0, + (blip->output_avail + blip->taps) * sizeof(*blip->output_buffer)); + blip->output_avail = 0; + blip->last_sample = 0; + blip->integrator = 0; + blip->ramp = 0; +} + +blipper_t *blipper_new(unsigned taps, double cutoff, double beta, + unsigned decimation, unsigned buffer_samples, + const blipper_sample_t *filter_bank) +{ + blipper_t *blip = NULL; + + /* Sanity check. Not strictly required to be supported in C. */ + if ((-3 >> 2) != -1) + { + fprintf(stderr, "Integer right shift not supported.\n"); + return NULL; + } + + blip = (blipper_t*)calloc(1, sizeof(*blip)); + if (!blip) + return NULL; + + blip->phases = decimation; + blip->phases_div = 0x100000000ULL/decimation; + + blip->taps = taps; + + if (!filter_bank) + { + blip->filter_bank = blipper_create_filter_bank(blip->phases, taps, cutoff, beta); + if (!blip->filter_bank) + goto error; + blip->owns_filter = 1; + } + else + blip->filter_bank = (blipper_sample_t*)filter_bank; + + blip->output_buffer = (blipper_long_sample_t*)calloc(buffer_samples + blip->taps, + sizeof(*blip->output_buffer)); + if (!blip->output_buffer) + goto error; + blip->output_buffer_samples = buffer_samples + blip->taps; + + return blip; + +error: + blipper_free(blip); + return NULL; +} + +inline void blipper_push_delta(blipper_t *blip, blipper_long_sample_t delta, unsigned clocks_step) +{ + unsigned target_output, filter_phase, taps, i; + const blipper_sample_t *response; + blipper_long_sample_t *target; + + blip->phase += clocks_step; + + target_output = ((unsigned long long)blip->phase * blip->phases_div) >> 32; + + filter_phase = (target_output * blip->phases) - blip->phase; + if (filter_phase >= blip->phases) // rounding error for *(1/phases) + filter_phase += blip->phases, target_output ++; + response = blip->filter_bank + blip->taps * filter_phase; + + target = blip->output_buffer + target_output; + taps = blip->taps; + + blip->output_avail = target_output; + + for (i = 1; i < taps; i += 2) { + target[i-1] += delta * response[i-1]; + target[i ] += delta * response[i ]; + } + if (taps & 1) + target[i-1] += delta * response[i-1]; +} + +static inline void _blipper_push_samples(blipper_t *blip, + const char *data, blipper_long_sample_t (*get)(const char *), + unsigned samples, unsigned stride, unsigned clocks_step) +{ + unsigned s; + unsigned clocks_skip = 0; + blipper_long_sample_t last = blip->last_sample; + +#if BLIPPER_LOG_PERFORMANCE + double t0 = get_time(); +#endif + + for (s = 0; s < samples; s++, data += stride) + { + blipper_long_sample_t val = get(data); + clocks_skip += clocks_step; + if (val != last) + { + blipper_push_delta(blip, val - last, clocks_skip); + clocks_skip = 0; + last = val; + } + } + + blip->phase += clocks_skip; + blip->output_avail = ((unsigned long long)blip->phase * blip->phases_div) >> 32; + if ((blip->output_avail+1) * blip->phases <= blip->phase) + blip->output_avail++; // rounding error for *(1/phases) + blip->last_sample = last; + +#if BLIPPER_LOG_PERFORMANCE + blip->total_time += get_time() - t0; + blip->total_samples += samples; +#endif +} + +static inline blipper_long_sample_t _blipper_get_short(const char *data) +{ + return *(blipper_sample_t *)data; +} + +static inline blipper_long_sample_t _blipper_get_long(const char *data) +{ + return *(blipper_long_sample_t *)data; +} + +void blipper_push_samples(blipper_t *blip, const blipper_sample_t *data, + unsigned samples, unsigned stride, unsigned clocks_step) +{ + _blipper_push_samples(blip, (const char *)data, _blipper_get_short, samples, + stride * sizeof(*data), clocks_step); +} + +void blipper_push_long_samples(blipper_t *blip, const blipper_long_sample_t *data, + unsigned samples, unsigned stride, unsigned clocks_step) +{ + _blipper_push_samples(blip, (const char *)data, _blipper_get_long, samples, + stride * sizeof(*data), clocks_step); +} + +unsigned blipper_read_phase(blipper_t *blip) +{ + return blip->phase; +} + +unsigned blipper_read_avail(blipper_t *blip) +{ + return blip->output_avail; +} + +static inline void _blipper_put_short(char *data, blipper_long_sample_t val) +{ + *(blipper_sample_t *)data = val; +} + +static inline void _blipper_put_long(char *data, blipper_long_sample_t val) +{ + *(blipper_long_sample_t *)data = val; +} + +static inline void _blipper_read(blipper_t *blip, int clamp, char *output, + void (*put)(char *, blipper_long_sample_t), unsigned samples, unsigned stride) +{ + unsigned s; + blipper_long_sample_t sum = blip->integrator; + const blipper_long_sample_t *out = blip->output_buffer; + blipper_long_sample_t ramp = blip->ramp; + +#if BLIPPER_LOG_PERFORMANCE + double t0 = get_time(); +#endif + +#if BLIPPER_FIXED_POINT + for (s = 0; s < samples; s++, output += stride) + { + blipper_long_sample_t quant; + + /* Cannot overflow. Also add a leaky integrator. + Mitigates DC shift numerical instability which is + inherent for integrators. */ + sum += ((out[s] + ramp) >> 1) - (sum >> 9); + + /* Rounded. With leaky integrator, this cannot overflow. */ + quant = (sum + 0x4000) >> 15; + + /* Clamp. quant can potentially have range [-0x10000, 0xffff] here. + * In both cases, top 16-bits will have a uniform bit pattern which can be exploited. */ + if (clamp && (blipper_sample_t)quant != quant) + { + quant = (quant >> 16) ^ 0x7fff; + sum = quant << 15; + } + + put(output, quant); + } +#else + for (s = 0; s < samples; s++, output += stride) + { + /* Leaky integrator, same as fixed point (1.0f / 512.0f) */ + sum += out[s] + ramp - sum * 0.00195f; + put(output, sum); + } +#endif + + /* Don't bother with ring buffering. + * The entire buffer should be read out ideally anyways. */ + memmove(blip->output_buffer, blip->output_buffer + samples, + (blip->output_avail + blip->taps - samples) * sizeof(*out)); + memset(blip->output_buffer + blip->output_avail + blip->taps - samples, 0, samples * sizeof(*out)); + blip->output_avail -= samples; + blip->phase -= samples * blip->phases; + + blip->integrator = sum; + +#if BLIPPER_LOG_PERFORMANCE + blip->integrator_time += get_time() - t0; +#endif +} + +void blipper_read(blipper_t *blip, blipper_sample_t *output, unsigned samples, + unsigned stride) +{ + _blipper_read(blip, 1, (char *)output, _blipper_put_short, samples, + stride * sizeof(*output)); +} + +void blipper_read_long(blipper_t *blip, blipper_long_sample_t *output, unsigned samples, + unsigned stride) +{ + _blipper_read(blip, 0, (char *)output, _blipper_put_long, samples, + stride * sizeof(*output)); +} diff --git a/pico/sound/blipper.h b/pico/sound/blipper.h new file mode 100644 index 00000000..20b75975 --- /dev/null +++ b/pico/sound/blipper.h @@ -0,0 +1,195 @@ +/* + * Copyright (C) 2013 - Hans-Kristian Arntzen + * + * Permission is hereby granted, free of charge, + * to any person obtaining a copy of this software and + * associated documentation files (the "Software"), + * to deal in the Software without restriction, + * including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef BLIPPER_H__ +#define BLIPPER_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/* Compile time configurables. */ +#ifndef BLIPPER_LOG_PERFORMANCE +#define BLIPPER_LOG_PERFORMANCE 0 +#endif + +#ifndef BLIPPER_FIXED_POINT +#define BLIPPER_FIXED_POINT 1 +#endif + +/* Set to float or double. + * long double is unlikely to provide any improved precision. */ +#ifndef BLIPPER_REAL_T +#define BLIPPER_REAL_T float +#endif + +/* Allows including several implementations in one lib. */ +#if BLIPPER_FIXED_POINT +#define BLIPPER_MANGLE(x) x##_fixed +#else +#define BLIPPER_CONCAT2(a, b) a ## b +#define BLIPPER_CONCAT(a, b) BLIPPER_CONCAT2(a, b) +#define BLIPPER_MANGLE(x) BLIPPER_CONCAT(x##_, BLIPPER_REAL_T) +#endif + +#include + +typedef struct blipper blipper_t; +typedef BLIPPER_REAL_T blipper_real_t; + +#if BLIPPER_FIXED_POINT +#ifdef HAVE_STDINT_H +#include +typedef int16_t blipper_sample_t; +typedef int32_t blipper_long_sample_t; +#else +#if SHRT_MAX == 0x7fff +typedef short blipper_sample_t; +#elif INT_MAX == 0x7fff +typedef int blipper_sample_t; +#else +#error "Cannot find suitable type for blipper_sampler_t." +#endif + +#if INT_MAX == 0x7fffffffl +typedef int blipper_long_sample_t; +#elif LONG_MAX == 0x7fffffffl +typedef long blipper_long_sample_t; +#else +#error "Cannot find suitable type for blipper_long_sample_t." +#endif +#endif +#else +typedef BLIPPER_REAL_T blipper_sample_t; +typedef BLIPPER_REAL_T blipper_long_sample_t; /* Meaningless for float version. */ +#endif + +/* Create a new blipper. + * taps: Number of filter taps per impulse. + * + * cutoff: Cutoff frequency in the passband. Has a range of [0, 1]. + * + * beta: Beta used for Kaiser window. + * + * decimation: Sets decimation rate. + * The input sampling rate is then output_rate * decimation. + * buffer_samples: The maximum number of processed output samples that can be + * buffered up by blipper. + * + * filter_bank: An optional filter which has already been created by + * blipper_create_filter_bank(). blipper_new() does not take ownership + * of the buffer and must be freed by caller. + * If non-NULL, cutoff and beta will be ignored. + * + * Some sane values: + * taps = 64, cutoff = 0.85, beta = 8.0 + */ +#define blipper_new BLIPPER_MANGLE(blipper_new) +blipper_t *blipper_new(unsigned taps, double cutoff, double beta, + unsigned decimation, unsigned buffer_samples, const blipper_sample_t *filter_bank); + +/* Reset the blipper to its initiate state. */ +#define blipper_reset BLIPPER_MANGLE(blipper_reset) +void blipper_reset(blipper_t *blip); + +/* Create a filter which can be passed to blipper_new() in filter_bank. + * Arguments to decimation and taps must match. */ +#define blipper_create_filter_bank BLIPPER_MANGLE(blipper_create_filter_bank) +blipper_sample_t *blipper_create_filter_bank(unsigned decimation, + unsigned taps, double cutoff, double beta); + +/* Frees the blipper. blip can be NULL (no-op). */ +#define blipper_free BLIPPER_MANGLE(blipper_free) +void blipper_free(blipper_t *blip); + +/* Add a ramp to the synthesized wave. The ramp is added to the integrator + * on every input sample. + * The amount added is delta / clocks per input sample. + * The interface is fractional to have better accuract with fixed point. + * This can be combined with a delta train to synthesize e.g. sawtooth waves. + * When using a ramp, care must be taken to ensure that the integrator does not saturate. + * It is recommended to use floating point implementation when using the ramp. */ +#define blipper_set_ramp BLIPPER_MANGLE(blipper_set_ramp) +void blipper_set_ramp(blipper_t *blip, blipper_long_sample_t delta, + unsigned clocks); + +/* Data pushing interfaces. One of these should be used exclusively. */ + +/* Push a single delta, which occurs clock_step input samples after the + * last time a delta was pushed. The delta value is the difference signal + * between the new sample and the previous. + * It is unnecessary to pass a delta of 0. + * If the deltas are known beforehand (e.g. when synthesizing a waveform), + * this is a more efficient interface than blipper_push_samples(). + * + * The caller must ensure not to push deltas in a way that can destabilize + * the final integration. + */ +#define blipper_push_delta BLIPPER_MANGLE(blipper_push_delta) +void blipper_push_delta(blipper_t *blip, blipper_long_sample_t delta, unsigned clocks_step); + +/* Push raw samples. blipper will find the deltas themself and push them. + * stride is the number of samples between each sample to be used. + * This can be used to push interleaved stereo data to two independent + * blippers. + */ +#define blipper_push_samples BLIPPER_MANGLE(blipper_push_samples) +void blipper_push_samples(blipper_t *blip, const blipper_sample_t *delta, + unsigned samples, unsigned stride, unsigned clocks_step); +#define blipper_push_long_samples BLIPPER_MANGLE(blipper_push_long_samples) +void blipper_push_long_samples(blipper_t *blip, const blipper_long_sample_t *delta, + unsigned samples, unsigned stride, unsigned clocks_step); + +/* Returns the number of samples available for reading using + * blipper_read(). + */ +#define blipper_read_avail BLIPPER_MANGLE(blipper_read_avail) +unsigned blipper_read_avail(blipper_t *blip); + +/* Returns the current filter phase + */ +#define blipper_read_phase BLIPPER_MANGLE(blipper_read_phase) +unsigned blipper_read_phase(blipper_t *blip); + +/* Reads processed samples. The caller must ensure to not read + * more than what is returned from blipper_read_avail(). + * As in blipper_push_samples(), stride is the number of samples + * between each output sample in output. + * Can be used to write to an interleaved stereo buffer. + */ +#define blipper_read BLIPPER_MANGLE(blipper_read) +void blipper_read(blipper_t *blip, blipper_sample_t *output, unsigned samples, + unsigned stride); +#define blipper_read_long BLIPPER_MANGLE(blipper_long_read) +void blipper_read_long(blipper_t *blip, blipper_long_sample_t *output, unsigned samples, + unsigned stride); + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/pico/sound/resampler.c b/pico/sound/resampler.c new file mode 100644 index 00000000..5b68d0b4 --- /dev/null +++ b/pico/sound/resampler.c @@ -0,0 +1,261 @@ +/* Configurable fixed point resampling SINC filter for mono and stereo audio. + * + * (C) 2022 kub + * + * This work is licensed under the terms of any of these licenses + * (at your option): + * - GNU GPL, version 2 or later. + * - MAME license. + * See COPYING file in the top-level directory. + */ + + +/* SINC filter generation taken from the blipper library, its license is: + * + * Copyright (C) 2013 - Hans-Kristian Arntzen + * + * Permission is hereby granted, free of charge, + * to any person obtaining a copy of this software and + * associated documentation files (the "Software"), + * to deal in the Software without restriction, + * including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +#include +#include +#include +#include + +#include "../pico_types.h" +#include "resampler.h" + +static double besseli0(double x) +{ + unsigned i; + double sum = 0.0; + + double factorial = 1.0; + double factorial_mult = 0.0; + double x_pow = 1.0; + double two_div_pow = 1.0; + double x_sqr = x * x; + + /* Approximate. This is an infinite sum. + * Luckily, it converges rather fast. */ + for (i = 0; i < 18; i++) + { + sum += x_pow * two_div_pow / (factorial * factorial); + + factorial_mult += 1.0; + x_pow *= x_sqr; + two_div_pow *= 0.25; + factorial *= factorial_mult; + } + + return sum; +} + +static double sinc(double v) +{ + if (fabs(v) < 0.00001) + return 1.0; + else + return sin(v) / v; +} + +/* index range = [-1, 1) */ +static double kaiser_window(double index, double beta) +{ + return besseli0(beta * sqrt(1.0 - index * index)); +} + +/* Creates a polyphase SINC filter (:phases banks with :taps each) + * Interleaves the filter for cache coherency and possibilities for SIMD */ +static s16 *create_sinc(unsigned phases, unsigned taps, double cutoff, double beta) +{ + unsigned i, filter_len; + double sidelobes, window_mod, window_phase, sinc_phase; + s16 *filter; + double tap; + + filter = (s16*)malloc(phases * taps * sizeof(*filter)); + if (!filter) + return NULL; + + sidelobes = taps / 2.0; + window_mod = 1.0 / kaiser_window(0.0, beta); + filter_len = phases * taps; + + for (i = 0; i < filter_len; i++) + { + window_phase = (double)i / filter_len; /* [0, 1) */ + window_phase = 2.0 * window_phase - 1.0; /* [-1, 1) */ + sinc_phase = window_phase * sidelobes; /* [-taps / 2, taps / 2) */ + + tap = (cutoff * sinc(M_PI * sinc_phase * cutoff) * + kaiser_window(window_phase, beta) * window_mod); + /* assign taking filter bank interleaving into account: + * :phases banks of length :taps */ + filter[(i%phases)*taps + (i/phases)] = tap * 0x7fff + 0.5; + } + + return filter; +} + +/* Public interface */ + +/* Release a resampler */ +void resampler_free(resampler_t *rs) +{ + if (rs) + { + free(rs->buffer); + free(rs->filter); + free(rs); + } +} + +/* Create a resampler with upsampling factor :interpolation and downsampling + * factor :decimation, Kaiser windowed SINC polyphase FIR with bank size :taps. + * The created filter has a size of :taps*:interpolation for upsampling and + * :taps*:decimation for downsampling. :taps is limiting the cost per sample and + * should be big enough to avoid inaccuracy (>= 8, higher is more accurate). + * :cutoff is in [0..1] with 1 representing the Nyquist rate after decimation. + * :beta is the Kaiser window beta. + * :max_input is the maximum length in a resampler_update call */ +resampler_t *resampler_new(unsigned taps, unsigned interpolation, unsigned decimation, + double cutoff, double beta, unsigned max_input, int stereo) +{ + resampler_t *rs = NULL; + + if (taps == 0 || interpolation == 0 || decimation == 0 || max_input == 0) + return NULL; /* invalid parameters */ + + rs = (resampler_t*)calloc(1, sizeof(*rs)); + if (!rs) + return NULL; /* out of memory */ + + /* :cutoff is relative to the decimated frequency, but filtering is taking + * place at the interpolated frequency. It needs to be adapted if resampled + * rate is lower. Also needs more taps to keep the transistion band width */ + if (decimation > interpolation) { + cutoff = cutoff * interpolation/decimation; + taps = taps * decimation/interpolation; + } + + rs->interpolation = interpolation; + rs->decimation = decimation; + rs->taps = taps; + /* optimizers for resampler_update: */ + rs->interp_inv = 0x100000000ULL / interpolation; + rs->ratio_int = decimation / interpolation; + + rs->filter = create_sinc(interpolation, taps, cutoff, beta); + if (!rs->filter) + goto error; + + rs->stereo = !!stereo; + rs->buffer_sz = (max_input * decimation/interpolation) + decimation + 1; + rs->buffer = calloc(1, rs->buffer_sz * (stereo ? 2:1) * sizeof(*rs->buffer)); + if (!rs->buffer) + goto error; + + return rs; + +error: + if (rs->filter) + free(rs->filter); + if (rs->buffer) + free(rs->buffer); + free(rs); + return NULL; +} + +/* Obtain :length resampled audio frames in :buffer. Use :get_samples to obtain + * the needed amount of input samples */ +void resampler_update(resampler_t *rs, s32 *buffer, int length, + void (*get_samples)(s32 *buffer, int length, int stereo)) +{ + s16 *u; + s32 *p, *q = buffer; + int spf = (rs->stereo?2:1); + s32 inlen; + s32 l, r; + int n, i; + + if (length <= 0) return; + + /* compute samples needed on input side: + * inlen = (length*decimation + interpolation-phase) / interpolation */ + n = length*rs->decimation + rs->interpolation-rs->phase; + inlen = ((u64)n * rs->interp_inv) >> 32; /* input samples, n/interpolation */ + if (inlen * rs->interpolation < n - rs->interpolation) inlen++; /* rounding */ + + /* reset buffer to start if the input doesn't fit into the buffer */ + if (rs->buffer_idx + inlen+rs->taps >= rs->buffer_sz) { + memcpy(rs->buffer, rs->buffer + rs->buffer_idx*spf, rs->taps*spf*sizeof(*rs->buffer)); + rs->buffer_idx = 0; + } + p = rs->buffer + rs->buffer_idx*spf; + + /* generate input samples */ + if (inlen > 0) + get_samples(p + rs->taps*spf, inlen, rs->stereo); + + if (rs->stereo) { + while (--length >= 0) { + /* compute filter output */ + u = rs->filter + (rs->phase * rs->taps); + for (i = 0, l = r = 0; i < rs->taps-1; i += 2) + { n = *u++; l += n * p[2*i ]; r += n * p[2*i+1]; + n = *u++; l += n * p[2*i+2]; r += n * p[2*i+3]; } + if (i < rs->taps) + { n = *u++; l += n * p[2*i ]; r += n * p[2*i+1]; } + *q++ = l >> 16, *q++ = r >> 16; + /* advance position to next sample */ + rs->phase -= rs->decimation; +// if (rs->ratio_int) { + rs->phase += rs->ratio_int*rs->interpolation, + p += 2*rs->ratio_int, rs->buffer_idx += rs->ratio_int; +// } + if (rs->phase < 0) + { rs->phase += rs->interpolation, p += 2, rs->buffer_idx ++; } + } + } else { + while (--length >= 0) { + /* compute filter output */ + u = rs->filter + (rs->phase * rs->taps); + for (i = 0, l = r = 0; i < rs->taps-1; i += 2) + { n = *u++; l += n * p[ i ]; + n = *u++; l += n * p[ i+1]; } + if (i < rs->taps) + { n = *u++; l += n * p[ i ]; } + *q++ = l >> 16; + /* advance position to next sample */ + rs->phase -= rs->decimation; +// if (rs->ratio_int) { + rs->phase += rs->ratio_int*rs->interpolation, + p += rs->ratio_int, rs->buffer_idx += rs->ratio_int; +// } + if (rs->phase < 0) + { rs->phase += rs->interpolation, p += 1, rs->buffer_idx ++; } + } + } +} diff --git a/pico/sound/resampler.h b/pico/sound/resampler.h new file mode 100644 index 00000000..eef60c03 --- /dev/null +++ b/pico/sound/resampler.h @@ -0,0 +1,44 @@ +/* Configurable fixed point resampling SINC filter for mono and stereo audio. + * + * (C) 2022 kub + * + * This work is licensed under the terms of any of these licenses + * (at your option): + * - GNU GPL, version 2 or later. + * - MAME license. + * See COPYING file in the top-level directory. + */ + +struct resampler { + int stereo; // mono or stereo? + int taps; // taps to compute per output sample + int interpolation; // upsampling factor (numerator) + int decimation; // downsampling factor (denominator) + int ratio_int; // floor(decimation/interpolation) + u32 interp_inv; // Q16, 1.0/interpolation + s16 *filter; // filter taps + s32 *buffer; // filter history and input buffer (w/o zero stuffing) + int buffer_sz; // buffer size in frames + int buffer_idx; // buffer offset + int phase; // filter phase for last output sample +}; +typedef struct resampler resampler_t; + + +/* Release a resampler */ +void resampler_free(resampler_t *r); +/* Create a resampler with upsampling factor :interpolation and downsampling + * factor :decimation, Kaiser windowed SINC polyphase FIR with bank size :taps. + * The created filter has a size of :taps*:interpolation for upsampling and + * :taps*:decimation for downsampling. :taps is limiting the cost per sample and + * should be big enough to avoid inaccuracy (>= 8, higher is more accurate). + * :cutoff is in [0..1] with 1 representing the Nyquist rate after decimation. + * :beta is the Kaiser window beta. + * :max_input is the maximum length in a resampler_update call */ +resampler_t *resampler_new(unsigned taps, unsigned interpolation, unsigned decimation, + double cutoff, double beta, unsigned max_input, int stereo); +/* Obtain :length resampled audio frames in :buffer. Use :get_samples to obtain + * the needed amount of input samples */ +void resampler_update(resampler_t *r, s32 *buffer, int length, + void (*generate_samples)(s32 *buffer, int length, int stereo)); + diff --git a/pico/sound/sound.c b/pico/sound/sound.c index 591a0299..7b7d8de4 100644 --- a/pico/sound/sound.c +++ b/pico/sound/sound.c @@ -14,6 +14,12 @@ #include "mix.h" #include "emu2413/emu2413.h" +#ifdef USE_BLIPPER +#include "blipper.h" +#else +#include "resampler.h" +#endif + void (*PsndMix_32_to_16l)(s16 *dest, s32 *src, int count) = mix_32_to_16l_stereo; // master int buffer to mix to @@ -32,6 +38,11 @@ OPLL old_opll; static OPLL *opll = NULL; unsigned YM2413_reg; +#ifdef USE_BLIPPER +static blipper_t *fmlblip, *fmrblip; +#else +static resampler_t *fmresampler; +#endif PICO_INTERNAL void PsndInit(void) { @@ -44,6 +55,13 @@ PICO_INTERNAL void PsndExit(void) { OPLL_delete(opll); opll = NULL; + +#ifdef USE_BLIPPER + blipper_free(fmlblip); fmlblip = NULL; + blipper_free(fmrblip); fmrblip = NULL; +#else + resampler_free(fmresampler); fmresampler = NULL; +#endif } PICO_INTERNAL void PsndReset(void) @@ -53,6 +71,111 @@ PICO_INTERNAL void PsndReset(void) timers_reset(); } +int (*PsndFMUpdate)(s32 *buffer, int length, int stereo, int is_buf_empty); + +// FM polyphase FIR resampling + +#ifdef USE_BLIPPER +#define FMFIR_TAPS 11 + +// resample FM from its native 53267Hz/52781Hz with the blipper library +static u32 ymmulinv; + +int YM2612UpdateFIR(s32 *buffer, int length, int stereo, int is_buf_empty) +{ + int mul = Pico.snd.fm_fir_mul, div = Pico.snd.fm_fir_div; + s32 *p = buffer, *q = buffer; + int ymlen; + int ret = 0; + + if (length <= 0) return ret; + + // FM samples needed: (length*div + div-blipper_read_phase(fmlblip)) / mul + ymlen = ((length*div + div-blipper_read_phase(fmlblip)) * ymmulinv) >> 32; + if (ymlen > 0) + ret = YM2612UpdateOne(p, ymlen, stereo, is_buf_empty); + + if (stereo) { + blipper_push_long_samples(fmlblip, p , ymlen, 2, mul); + blipper_push_long_samples(fmrblip, p+1, ymlen, 2, mul); + blipper_read_long(fmlblip, q , blipper_read_avail(fmlblip), 2); + blipper_read_long(fmrblip, q+1, blipper_read_avail(fmrblip), 2); + } else { + blipper_push_long_samples(fmlblip, p , ymlen, 1, mul); + blipper_read_long(fmlblip, q , blipper_read_avail(fmlblip), 1); + } + + return ret; +} + +static void YM2612_setup_FIR(int inrate, int outrate, int stereo) +{ + int mindiff = 999; + int diff, mul, div; + int maxdecim = 1500/FMFIR_TAPS; + + // compute filter ratio with smallest error for a decent number of taps + for (div = maxdecim/2; div <= maxdecim; div++) { + mul = (outrate*div + inrate/2) / inrate; + diff = outrate*div/mul - inrate; + if (abs(diff) < abs(mindiff)) { + mindiff = diff; + Pico.snd.fm_fir_mul = mul; + Pico.snd.fm_fir_div = div; + } + } + ymmulinv = 0x100000000ULL / mul; /* 1/mul in Q32 */ + printf("FM polyphase FIR ratio=%d/%d error=%.3f%%\n", + Pico.snd.fm_fir_mul, Pico.snd.fm_fir_div, 100.0*mindiff/inrate); + + // create blipper (modified for polyphase resampling). Not really perfect for + // FM, but has SINC generator, a good window, and computes the filter in Q16. + blipper_free(fmlblip); + blipper_free(fmrblip); + fmlblip = blipper_new(FMFIR_TAPS, 0.85, 8.5, Pico.snd.fm_fir_div, 1000, NULL); + if (!stereo) return; + fmrblip = blipper_new(FMFIR_TAPS, 0.85, 8.5, Pico.snd.fm_fir_div, 1000, NULL); +} +#else +#define FMFIR_TAPS 8 + +// resample FM from its native 53267Hz/52781Hz with polyphase FIR filter +static int ymchans; +static void YM2612Update(s32 *buffer, int length, int stereo) +{ + ymchans = YM2612UpdateOne(buffer, length, stereo, 1); +} + +int YM2612UpdateFIR(s32 *buffer, int length, int stereo, int is_buf_empty) +{ + resampler_update(fmresampler, buffer, length, YM2612Update); + return ymchans; +} + +static void YM2612_setup_FIR(int inrate, int outrate, int stereo) +{ + int mindiff = 999; + int diff, mul, div; + int maxmult = 30; // max interpolation factor + + // compute filter ratio with largest multiplier for smallest error + for (mul = maxmult/2; mul <= maxmult; mul++) { + div = (inrate*mul + outrate/2) / outrate; + diff = outrate*div/mul - inrate; + if (abs(diff) <= abs(mindiff)) { + mindiff = diff; + Pico.snd.fm_fir_mul = mul; + Pico.snd.fm_fir_div = div; + } + } + printf("FM polyphase FIR ratio=%d/%d error=%.3f%%\n", + Pico.snd.fm_fir_mul, Pico.snd.fm_fir_div, 100.0*mindiff/inrate); + + resampler_free(fmresampler); + fmresampler = resampler_new(FMFIR_TAPS, Pico.snd.fm_fir_mul, Pico.snd.fm_fir_div, + 0.85, 2.35, 2*inrate/50, stereo); +} +#endif // to be called after changing sound rate or chips void PsndRerate(int preserve_state) @@ -60,6 +183,7 @@ void PsndRerate(int preserve_state) void *state = NULL; int target_fps = Pico.m.pal ? 50 : 60; int target_lines = Pico.m.pal ? 313 : 262; + int ym2612_clock = Pico.m.pal ? OSC_PAL/7 : OSC_NTSC/7; if (preserve_state) { state = malloc(0x204); @@ -67,9 +191,19 @@ void PsndRerate(int preserve_state) ym2612_pack_state(); memcpy(state, YM2612GetRegs(), 0x204); } - YM2612Init(Pico.m.pal ? OSC_PAL/7 : OSC_NTSC/7, PicoIn.sndRate, + if (PicoIn.opt & POPT_EN_FM_FILTER) { + int ym2612_rate = (ym2612_clock+(6*24)/2) / (6*24); + YM2612Init(ym2612_clock, ym2612_rate, ((PicoIn.opt&POPT_DIS_FM_SSGEG) ? 0 : ST_SSG) | ((PicoIn.opt&POPT_EN_FM_DAC) ? ST_DAC : 0)); + YM2612_setup_FIR(ym2612_rate, PicoIn.sndRate, PicoIn.opt & POPT_EN_STEREO); + PsndFMUpdate = YM2612UpdateFIR; + } else { + YM2612Init(ym2612_clock, PicoIn.sndRate, + ((PicoIn.opt&POPT_DIS_FM_SSGEG) ? 0 : ST_SSG) | + ((PicoIn.opt&POPT_EN_FM_DAC) ? ST_DAC : 0)); + PsndFMUpdate = YM2612UpdateOne; + } if (preserve_state) { // feed it back it's own registers, just like after loading state memcpy(YM2612GetRegs(), state, 0x204); @@ -267,7 +401,7 @@ PICO_INTERNAL void PsndDoFM(int cyc_to) pos <<= 1; } if (PicoIn.opt & POPT_EN_FM) - YM2612UpdateOne(PsndBuffer + pos, len, stereo, 1); + PsndFMUpdate(PsndBuffer + pos, len, stereo, 1); } // cdda @@ -383,7 +517,7 @@ static int PsndRender(int offset, int length) s32 *fmbuf = buf32 + ((fmlen-offset) << stereo); Pico.snd.fm_pos += (length-fmlen) << 20; if (PicoIn.opt & POPT_EN_FM) - YM2612UpdateOne(fmbuf, length-fmlen, stereo, 1); + PsndFMUpdate(fmbuf, length-fmlen, stereo, 1); } // CD: PCM sound diff --git a/platform/common/common.mak b/platform/common/common.mak index 183a7159..9a0ad6b3 100644 --- a/platform/common/common.mak +++ b/platform/common/common.mak @@ -123,6 +123,7 @@ SRCS_COMMON += $(R)pico/carthw/svp/compiler.c endif # sound SRCS_COMMON += $(R)pico/sound/sound.c +SRCS_COMMON += $(R)pico/sound/resampler.c # $(R)pico/sound/blipper.c SRCS_COMMON += $(R)pico/sound/sn76496.c $(R)pico/sound/ym2612.c SRCS_COMMON += $(R)pico/sound/emu2413/emu2413.c ifneq "$(ARCH)$(asm_mix)" "arm1" From a987b67801e39b3ad5d349253ef53070b7d3272b Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 31 Mar 2022 22:02:48 +0000 Subject: [PATCH 0734/1110] sound, fix config save/load for native quality mode --- platform/common/config_file.c | 9 +++++++-- platform/common/menu_pico.c | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/platform/common/config_file.c b/platform/common/config_file.c index 1c4b00cf..5c2311eb 100644 --- a/platform/common/config_file.c +++ b/platform/common/config_file.c @@ -275,8 +275,13 @@ static int custom_read(menu_entry *me, const char *var, const char *val) case MA_OPT_SOUND_QUALITY: if (strcasecmp(var, "Sound Quality") != 0) return 0; PicoIn.sndRate = strtoul(val, &tmp, 10); - if (PicoIn.sndRate < 8000 || PicoIn.sndRate > 53267) - PicoIn.sndRate = 22050; + if (PicoIn.sndRate < 8000 || PicoIn.sndRate > 53267) { + if (strncasecmp(tmp, "native", 6) == 0) { + tmp += 6; + PicoIn.sndRate = 53000; + } else + PicoIn.sndRate = 22050; + } if (*tmp == 'H' || *tmp == 'h') tmp++; if (*tmp == 'Z' || *tmp == 'z') tmp++; while (*tmp == ' ') tmp++; diff --git a/platform/common/menu_pico.c b/platform/common/menu_pico.c index fc9e769e..ea0fbdcf 100644 --- a/platform/common/menu_pico.c +++ b/platform/common/menu_pico.c @@ -631,7 +631,7 @@ static const char *mgn_opt_sound(int id, int *offs) *offs = -8; str2 = (PicoIn.opt & POPT_EN_STEREO) ? "stereo" : "mono"; if (PicoIn.sndRate > 52000) - sprintf(static_buff, "native %s\n", str2); + sprintf(static_buff, "native %s", str2); else sprintf(static_buff, "%5iHz %s", PicoIn.sndRate, str2); return static_buff; } From 906cc854561d442e0d629294f76919d7e13d724b Mon Sep 17 00:00:00 2001 From: jdgleaver Date: Tue, 5 Apr 2022 10:53:16 +0100 Subject: [PATCH 0735/1110] (libretro) Prevent illegal usage of RETRO_ENVIRONMENT_SET_SYSTEM_AV_INFO/RETRO_ENVIRONMENT_SET_GEOMETRY environment callbacks --- platform/libretro/libretro.c | 49 ++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/platform/libretro/libretro.c b/platform/libretro/libretro.c index 00cf4da5..fb10bd91 100644 --- a/platform/libretro/libretro.c +++ b/platform/libretro/libretro.c @@ -102,6 +102,9 @@ static int vout_width, vout_height, vout_offset; static float vout_aspect = 0.0; static int vout_ghosting = 0; +static bool libretro_update_av_info = false; +static bool libretro_update_geometry = false; + #if defined(RENDER_GSKIT_PS2) #define VOUT_8BIT_WIDTH 328 #define VOUT_8BIT_HEIGHT 256 @@ -603,8 +606,6 @@ static void apply_renderer() void emu_video_mode_change(int start_line, int line_count, int start_col, int col_count) { - struct retro_system_av_info av_info; - vm_current_start_line = start_line; vm_current_line_count = line_count; vm_current_start_col = start_col; @@ -653,9 +654,8 @@ void emu_video_mode_change(int start_line, int line_count, int start_col, int co #endif Pico.m.dirtyPal = 1; - // Update the geometry - retro_get_system_av_info(&av_info); - environ_cb(RETRO_ENVIRONMENT_SET_GEOMETRY, &av_info); + /* Notify frontend of geometry update */ + libretro_update_geometry = true; } void emu_32x_startup(void) @@ -1327,6 +1327,15 @@ bool retro_load_game(const struct retro_game_info *info) init_frameskip(); + /* Initialisation routines may have 'triggered' + * a libretro AV info or geometry update; this + * happens automatically after retro_load_game(), + * so disable the relevant flags here to avoid + * redundant updates on the first call of + * retro_run() */ + libretro_update_av_info = false; + libretro_update_geometry = false; + return true; } @@ -1585,13 +1594,9 @@ static void update_variables(bool first_run) vout_aspect = VOUT_PAR; } + /* Notify frontend of geometry update */ if (vout_aspect != old_vout_aspect) - { - // Update the geometry - struct retro_system_av_info av_info; - retro_get_system_av_info(&av_info); - environ_cb(RETRO_ENVIRONMENT_SET_GEOMETRY, &av_info); - } + libretro_update_geometry = true; var.value = NULL; var.key = "picodrive_sprlim"; @@ -1690,16 +1695,15 @@ static void update_variables(bool first_run) var.value = NULL; var.key = "picodrive_sound_rate"; if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { - new_sound_rate = atoi(var.value); if (!strcmp(var.value, "native")) - new_sound_rate = YM2612_NATIVE_RATE(); + new_sound_rate = YM2612_NATIVE_RATE(); + else + new_sound_rate = atoi(var.value); if (new_sound_rate != PicoIn.sndRate) { /* Update the sound rate */ PicoIn.sndRate = new_sound_rate; PsndRerate(!first_run); - struct retro_system_av_info av_info; - retro_get_system_av_info(&av_info); - environ_cb(RETRO_ENVIRONMENT_SET_SYSTEM_AV_INFO, &av_info); + libretro_update_av_info = true; } } @@ -1787,6 +1791,19 @@ void retro_run(void) PicoFrame(); + /* Check whether frontend needs to be notified + * of timing/geometry changes */ + if (libretro_update_av_info || libretro_update_geometry) { + struct retro_system_av_info av_info; + retro_get_system_av_info(&av_info); + environ_cb(libretro_update_av_info ? + RETRO_ENVIRONMENT_SET_SYSTEM_AV_INFO : + RETRO_ENVIRONMENT_SET_GEOMETRY, + &av_info); + libretro_update_av_info = false; + libretro_update_geometry = false; + } + /* If frame was skipped, call video_cb() with * a NULL buffer and return immediately */ if (PicoIn.skipFrame) { From 47e73a93a4633f88f0dc15c84d730084c2cf56a7 Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 9 Apr 2022 19:53:10 +0000 Subject: [PATCH 0736/1110] libretro, fix compilation for unix armv7 --- Makefile | 1 + .../libretro/libretro-common/file/file_path.c | 1381 +++++++++++++++++ 2 files changed, 1382 insertions(+) create mode 100644 platform/libretro/libretro-common/file/file_path.c diff --git a/Makefile b/Makefile index c3f5b728..2bad1743 100644 --- a/Makefile +++ b/Makefile @@ -218,6 +218,7 @@ OBJS += platform/libretro/libretro-common/compat/fopen_utf8.o OBJS += platform/libretro/libretro-common/memmap/memmap.o OBJS += platform/libretro/libretro-common/encodings/encoding_utf.o OBJS += platform/libretro/libretro-common/string/stdstring.o +OBJS += platform/libretro/libretro-common/file/file_path.o OBJS += platform/libretro/libretro-common/streams/file_stream.o OBJS += platform/libretro/libretro-common/streams/file_stream_transforms.o OBJS += platform/libretro/libretro-common/vfs/vfs_implementation.o diff --git a/platform/libretro/libretro-common/file/file_path.c b/platform/libretro/libretro-common/file/file_path.c new file mode 100644 index 00000000..320c5a2d --- /dev/null +++ b/platform/libretro/libretro-common/file/file_path.c @@ -0,0 +1,1381 @@ +/* Copyright (C) 2010-2020 The RetroArch team + * + * --------------------------------------------------------------------------------------- + * The following license statement only applies to this file (file_path.c). + * --------------------------------------------------------------------------------------- + * + * Permission is hereby granted, free of charge, + * to any person obtaining a copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include