various small improvements and fixes

2025-10-26 16:29:37 -04:00 · 2019-04-16 20:37:52 +02:00 · 2019-04-16 20:37:52 +02:00 · d40a5af495
commit d40a5af495
parent f133766faa
32 changed files with 372 additions and 241 deletions
--- a/platform/common/common.mak
+++ b/platform/common/common.mak
@ -9,6 +9,7 @@ asm_render = 0
 asm_ym2612 = 0
 asm_misc = 0
 asm_cdmemory = 0
+asm_32xdraw = 0
 asm_mix = 0
 endif

--- a/platform/common/memcpy.c
+++ b/platform/common/memcpy.c
@ -9,7 +9,7 @@
 *   to avoid under/overstepping the src region).
 *
 * ATTN does dirty aliasing tricks with undefined behaviour by standard.
- *	(however, this was needed to improve the generated code).
+ *	(however, this improved the generated code).
 * ATTN uses struct assignment, which only works if the compiler is inlining
 *	this (else it would probably call memcpy :-)).
 */
@ -33,22 +33,24 @@ void *memcpy(void *dest, const void *src, size_t n)
 	const int lm = sizeof(uint32_t)-1;

 	/* align src to word */
-	while (((unsigned)ss.c & lm) && n > 0)
+	while (((uintptr_t)ss.c & lm) && n > 0)
 		*ds.c++ = *ss.c++, n--;
-	if (((unsigned)ds.c & lm) == 0) {
+	if (((uintptr_t)ds.c & lm) == 0) {
 		/* fast copy if pointers have the same aligment */
-		while (n >= sizeof(struct _16))	/* copy 16 bytes blocks */
+		while (n >= sizeof(struct _16))	/* copy 16 byte blocks */
 			*ds.s++ = *ss.s++, n -= sizeof(struct _16);
 		if (n >= sizeof(uint64_t))	/* copy leftover 8 byte block */
 			*ds.l++ = *ss.l++, n -= sizeof(uint64_t);
+//		if (n >= sizeof(uint32_t))	/* copy leftover 4 byte block */
+//			*ds.i++ = *ss.i++, n -= sizeof(uint32_t);
 	} else if (n >= 2*sizeof(uint32_t)) {
 		/* unaligned data big enough to avoid overstepping src */
 		uint32_t v1, v2, b, s;
 		/* align dest to word */
-		while (((unsigned)ds.c & lm) && n > 0)
+		while (((uintptr_t)ds.c & lm) && n > 0)
 			*ds.c++ = *ss.c++, n--;
 		/* copy loop: load aligned words and store shifted words */
-		b = (unsigned)ss.c & lm, s = b*8; ss.c -= b;
+		b = (uintptr_t)ss.c & lm, s = b*8; ss.c -= b;
 		v1 = *ss.i++, v2 = *ss.i++;
 		while (n >= 3*sizeof(uint32_t)) {
 			*ds.i++ = (v1 _L_ s) | (v2 _U_ (32-s)); v1 = *ss.i++;
@ -78,28 +80,35 @@ void *memmove (void *dest, const void *src, size_t n)
 	struct _16 { uint32_t a[4]; };
 	union { const void *v; uint8_t *c; uint32_t *i; uint64_t *l; struct _16 *s; }
 		ss = { src+n }, ds = { dest+n };
+	size_t pd = dest > src ? dest - src : src - dest;
 	const int lm = sizeof(uint32_t)-1;

 	if (dest <= src || dest >= src+n)
 		return memcpy(dest, src, n);

 	/* align src to word */
-	while (((unsigned)ss.c & lm) && n > 0)
+	while (((uintptr_t)ss.c & lm) && n > 0)
 		*--ds.c = *--ss.c, n--;
-	if (((unsigned)ds.c & lm) == 0) {
+	/* take care not to copy multi-byte data if it overlaps */
+	if (((uintptr_t)ds.c & lm) == 0) {
 		/* fast copy if pointers have the same aligment */
-		while (n >= sizeof(struct _16))	/* copy 16 byte blocks */
+		while (n >= sizeof(struct _16) && pd >= sizeof(struct _16))
+			/* copy 16 bytes blocks if no overlap */
 			*--ds.s = *--ss.s, n -= sizeof(struct _16);
-		if (n >= sizeof(uint64_t))	/* copy leftover 8 byte block */
+		while (n >= sizeof(uint64_t) && pd >= sizeof(uint64_t))
+			/* copy leftover 8 byte blocks if no overlap */
 			*--ds.l = *--ss.l, n -= sizeof(uint64_t);
-	} else if (n >= 2*sizeof(uint32_t)) {
+		while (n >= sizeof(uint32_t) && pd >= sizeof(uint32_t))
+			/* copy leftover 4 byte blocks if no overlap */
+			*--ds.i = *--ss.i, n -= sizeof(uint32_t);
+	} else if (n >= 2*sizeof(uint32_t) && pd >= 2*sizeof(uint32_t)) {
 		/* unaligned data big enough to avoid understepping src */
 		uint32_t v1, v2, b, s;
 		/* align dest to word */
-		while (((unsigned)ds.c & lm) && n > 0)
+		while (((uintptr_t)ds.c & lm) && n > 0)
 			*--ds.c = *--ss.c, n--;
 		/* copy loop: load aligned words and store shifted words */
-		b = (unsigned)ss.c & lm, s = b*8; ss.c += b;
+		b = (uintptr_t)ss.c & lm, s = b*8; ss.c += b;
 		v1 = *--ss.i, v2 = *--ss.i;
 		while (n >= 3*sizeof(uint32_t)) {
 			*--ds.i = (v1 _U_ s) | (v2 _L_ (32-s)); v1 = *--ss.i;
@ -114,7 +123,7 @@ void *memmove (void *dest, const void *src, size_t n)
 		}
 		ss.c -= b - 2*sizeof(uint32_t);
 	}
-	/* copy 0-7 leftover bytes */
+	/* copy 0-7 leftover bytes (or upto everything if ptrs are too close) */
 	while (n >= 4) {
 		*--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--;
 		*--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--;