Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

be-codegen-6800.c be-code-6800.c Ideas for making signed/unsigned char comparisons more efficient #162

Open
zu2 opened this issue Nov 13, 2024 · 5 comments
Labels
enhancement New feature or request

Comments

@zu2
Copy link

zu2 commented Nov 13, 2024

If both can be compared as char, gen_rewrite rewrites the AST to reduce T_CAST. This allows cmp_direct to generate efficient code.

The problem with this method is that the code becomes more complicated as the number of patterns that can be made more efficient increases (global variables can also be made more efficient).

I'm also concerned that gen_rewrite and cmp_direct perform similar comparisons. However, cmp_direct alone was unable to remove unnecessary casts, so the code looks like this.

Now, if we make it more efficient like this, the jsr boolXX/jeq after cmpb can become jXX. To do this, I need to know that this comparison does not require a result (0/1). How can I know that?

diff -u ../Fuzix-Compiler-Kit/be-codegen-6800.c be-codegen-6800.c
--- ../Fuzix-Compiler-Kit/be-codegen-6800.c	2024-11-13 00:32:04
+++ be-codegen-6800.c	2024-11-13 14:07:11
@@ -374,6 +375,46 @@
 			n->left = r;
 		}
 	}
+	if (op == T_EQEQ || op == T_BANGEQ || op == T_LT || op == T_LTEQ || op == T_GT || op == T_GTEQ) {
+		if (l->op == T_CAST && l->type == CSHORT
+		&&  l->right->op == T_LREF && l->right->type == CCHAR
+		&&  r->op == T_CONSTANT && -128<=(long)r->value && r->value<=127) {
+                squash_right(l, T_LREF);
+				l->type = CCHAR;
+				r->type = CCHAR;
+		}
+		if (l->op == T_CAST && l->type == CSHORT
+		&&  l->right->op == T_LREF && l->right->type == CCHAR
+		&&  r->op == T_CAST && r->type == CSHORT
+		&&  r->right->op == T_LREF && r->right->type == CCHAR ){
+                squash_right(l, T_LREF);
+                squash_right(r, T_LREF);
+				l->type = CCHAR;
+				r->type = CCHAR;
+		}
+		if (l->op == T_CAST && l->type == CSHORT 
+		&&  l->right->op == T_LREF && l->right->type == UCHAR
+		&&  r->op == T_CONSTANT && 0<=r->value && r->value<=255) {
+                squash_right(l, T_LREF);
+				l->type = UCHAR;
+				r->type = UCHAR;
+		}
+		if (l->op == T_CAST && l->type == USHORT 
+		&&  l->right->op == T_LREF && l->right->type == UCHAR
+		&&  r->op == T_CAST && r->type == USHORT
+		&&  r->right->op == T_LREF && r->right->type == UCHAR ){
+                squash_right(l, T_LREF);
+                squash_right(r, T_LREF);
+				l->type = UCHAR;
+				r->type = UCHAR;
+		}
+	}
 	return n;
 }
diff -u ../Fuzix-Compiler-Kit/be-code-6800.c be-code-6800.c
--- ../Fuzix-Compiler-Kit/be-code-6800.c	2024-11-13 00:32:04
+++ be-code-6800.c	2024-11-13 13:57:31
@@ -990,32 +986,36 @@
 unsigned cmp_direct(struct node *n, const char *uop, const char *op)
 {
 	register struct node *r = n->right;
+	register struct node *l = n->left;
 	register unsigned v = r->value;
 	register unsigned s = get_size(r->type);
 
-	if (r->op != T_CONSTANT)
-		return 0;
 	if (r->type & UNSIGNED)
 		op = uop;
 
-	/* If we knoww the upper half matches - eg a byte var that has been expanded then
-	   shortcut it. Need to think about this for signed math. I think we can do signed
-	   math if we use uop in the size 2 upper match case : TODO*/
-	if (s == 1 || (op == uop && a_valid && (v >> 8) == a_val)) {
-		printf("\tcmpb #%u\n\t%s %s\n", v & 0xFF, jsr_op, op);
-		n->flags |= ISBOOL;
-		invalidate_b();
-		return 1;
+	if (s == 1) {
+		if ((r->op == T_CONSTANT && l->type == UCHAR && r->value>=0 && r->value<=255)
+		||  (r->op == T_CONSTANT && l->type == CCHAR && (int)r->value>=-128 && (int)r->value<=127)
+		||  (r->op == T_LREF &&  l->type == r->type)) {
+			op8_on_node(r, "cmp", 0);
+			printf("\t%s %s\n",jsr_op,(l->type==UCHAR)?uop:op);
+			n->flags |= ISBOOL;
+			invalidate_b();
+			return 1;
+		}
 	}
-	if (s == 2) {
+	if (s == 2 && r->op == T_CONSTANT) {
 		/* If we know the value of A (eg from a cast) */
 		if (cpu_has_d) {
+			if (r->type & UNSIGNED)
+				op = uop;
 			printf("\tsubd #%u\n\t%s %s\n", v & 0xFFFF, jsr_op, op);
 			n->flags |= ISBOOL;
 			invalidate_work();
 			return 1;
 		}
 	}
+
 	return 0;
 }

sample program:

int
main(int argc, char **argv)
{
	signed char	i=0;
	signed char	k=0;
	signed char	j=10;

	while (i<j) {
		i++;
	}
	if (i!=10)
		return 1;

	return 0;
}

compiled asm at while loop.

;make local ptr off 0, rlim 255 noff 0
	tsx
	ldb 0,x
;make local ptr off 2, rlim 255 noff 2
	cmpb 2,x
	jsr boollt
;
	jeq L1_b

if statement.

;make local ptr off 0, rlim 255 noff 0
	tsx
	ldb 0,x
	cmpb #<10
	jsr boolne
;
	jeq L2_e
@EtchedPixels
Copy link
Owner

For the first part instead of rewriting you can now add backend_byte.c to that target and call

/* Chance to rewrite the tree from the top rather than none by node
   upwards. We will use this for 8bit ops at some point and for cconly
   propagation */
struct node *gen_rewrite(struct node *n)
{
        byte_label_tree(n, BTF_RELABEL);
        return n;
}

That will go through the tree and work out what it knows can be done 8bit. It will change the size on most of them to byte but will retain the size and set the BYTEABLE flag on any where the real size matters but they can be done 8bit - eg condition codes, T_BANG, and T_BOOL. Search for BYTEABLE in the 6502 tree for examples.

For conditions the compiler sets CCONLY on the top node before a branch. Z80 uses this and knowledge of what operators can be done on condition codes to do subtrees that way (see be-codegen-z80:propogate_cconly() ). The only nasty case to handle is that T_BANG or T_BOOL of a condition code only can need to be converted back into a boolean in a few corner cases. Z80 just has a little helper __cctonot that deals with it.

@zu2
Copy link
Author

zu2 commented Nov 14, 2024

I tried BYTETABLE. When processing with cmp_direct, the terms on the right side can be omitted, but the terms on the left side have already been processed at the time of cmp_direct, so the unnecessary CAST could not be omitted.

Below is the output result of the code used for testing.

unsigned cmp_direct(struct node *n, const char *uop, const char *op)
{           
    register struct node *r = n->right;
    register struct node *l = n->left;
    register unsigned v = r->value;
    register unsigned s = get_size(r->type);
  
    printf("; cmp_direct\n");
    if (r->type & UNSIGNED)
        op = uop;
        
    if (s == 2 && (n->flags&BYTETAIL)){
        printf("; cmp_direct s==2 && BYTETAIL\n");
        if (r->op == T_CAST && l->op == T_CAST)
            if(r->type != l->type
            || r->type != CSHORT)
                return 0;
        if (r->op == T_CAST)
            r = r->right;
        if (l->op == T_CAST)
            l = l->right;
        if ((r->op == T_CONSTANT && l->type == UCHAR && r->value>=0 && r->value<=255)
        ||  (r->op == T_CONSTANT && l->type == CCHAR && (int)r->value>=-128 && (int)r->value<=127)
        ||  (r->op == T_LREF &&  l->type == r->type)) {
            op8_on_node(r, "cmp", 0);
            printf("\t%s %s\n",jsr_op,(l->type==UCHAR)?uop:op);
            n->flags |= ISBOOL;
            invalidate_b();
            return 1;
        }
    }

compiled result.

;T_BOOL v0 t10 f820 CCONLY,BYTETAIL,
;    T_BANGEQ v0 t10 f800 BYTETAIL,
;        T_CAST v0 t10 f100 
;            2003 v0 t0 f800 BYTETAIL,  i
;        T_CONSTANT va t10 f900 BYTETAIL,
;make local ptr off 0, rlim 255 noff 0
        tsx
        ldb 0,x
        jsr __castc_
; cmp_direct
; cmp_direct s==2 && BYTETAIL
        cmpb #<10
        jsr boolne
;
        jeq L2_e

I'll think about it a bit more.

@EtchedPixels
Copy link
Owner

The byte code knows about some char/char comparisons but not about most char - constant (and especially unsigned char v constant) ones. That is the right place to add those tricks so that they are shared between targets I think.

@zu2
Copy link
Author

zu2 commented Nov 16, 2024

I tried backend-byte.c. It looks good. I need to consider the scope of BYTEOP further.

test program:

int
main(int argc, char **argv)
{
	signed char	i=0;
	signed char	k=0;
	signed char	j=10;

	while (i<j) {
		i++;
	}
	if (i!=10)
		return 1;

	return 0;
}

object code (while loop).

;T_BOOL v0 t10 f820 CCONLY,BYTETAIL, CSHORT
;    T_LT v0 t10 fb00 BYTEOP,BYTETAIL, CSHORT
;        2003 v0 t0 f800 BYTETAIL, CCHAR    i
;        2003 v2 t0 f800 BYTETAIL, CCHAR    j
;make local ptr off 0, rlim 255 noff 0
    tsx
    ldb 0,x
    cmpb 2,x
;   optimized by rule
    jge L1_b
    inc 0,x
;
    jmp L1_c

object code (if statement).

;T_BOOL v0 t10 f820 CCONLY,BYTETAIL, CSHORT
;    T_BANGEQ v0 t10 fb00 BYTEOP,BYTETAIL, CSHORT
;        2003 v0 t0 f800 BYTETAIL, CCHAR    i
;        T_CONSTANT va t0 fb00 BYTEOP,BYTETAIL, CCHAR
;make local ptr off 0, rlim 255 noff 0
    tsx
    ldb 0,x
    cmpb #<10
;   optimized by rule
    jeq L2_e
    clra
    ldab #1
;
    jmp L0_r

When T_PLUS/T_MINUS/T_STAR are enabled, subscripting of CCHAR/UCHAR arrays fails. Subscript must be CSHORT.

diff -u ../Fuzix-Compiler-Kit/backend-byte.c backend-byte.c
--- ../Fuzix-Compiler-Kit/backend-byte.c	2024-11-11 09:45:00
+++ backend-byte.c	2024-11-16 11:37:23
@@ -76,6 +78,8 @@
 {
 	register unsigned op = n->op;
 	register unsigned b = 0;
+	struct node *l = n->left;
+	struct node *r = n->right;
 
 	if (n->type == CCHAR || n->type == UCHAR)
 		b = 1;
@@ -94,8 +98,8 @@
 		return BYTEABLE;
 	}
 	/* Mathematical operations */
-	if (op == T_PLUS || op == T_MINUS || op == T_STAR)
-		return BYTEABLE;
+//	if (op == T_PLUS || op == T_MINUS || op == T_STAR)
+//		return BYTEABLE;
 	/* Logic operations */
 	if (op == T_AND || op == T_OR || op == T_HAT)
 		return BYTEABLE;
@@ -117,8 +121,41 @@
 	/* TODO: in theory for some cases we can also treat these as BYTEABLE
 	   providing the left and right sides are genuinely casts from byte types
 	   or byte types, but not if they are trimmed ones */
-
-	if (op == T_LT || op == T_GT || op == T_LTEQ || op == T_GTEQ) {
+	if (op == T_LT || op == T_GT || op == T_LTEQ || op == T_GTEQ
+	|| op == T_EQEQ || op == T_BANGEQ) {
+		if (r->op == T_CONSTANT && l->op == T_CAST) {
+			if (l->right->type == CCHAR
+			&&  (int)r->value>=-128 && r->value<=127){
+				r->flags |= BYTEABLE|BYTEOP;
+				l->flags |= BYTEABLE|BYTEOP;
+				r->type = CCHAR;
+				l->type = CCHAR;
+				return BYTEABLE|BYTEOP|BYTETAIL;
+			}
+			if (l->right->type == UCHAR &&  r->value>=0 && r->value<=255){
+				r->flags |= BYTEABLE|BYTEOP;
+				l->flags |= BYTEABLE|BYTEOP;
+				r->type = UCHAR;
+				l->type = UCHAR;
+				return BYTEABLE|BYTEOP|BYTETAIL;
+			}
+		}else if (r->op == T_CAST && l->op == T_CAST
+			  &&  r->type == CSHORT && l->type == CSHORT) {
+			if(l->right->type == CCHAR && r->right->type == CCHAR){
+				r->flags |= BYTEABLE|BYTEOP;
+				l->flags |= BYTEABLE|BYTEOP;
+				r->type = CCHAR;
+				l->type = CCHAR;
+				return BYTEABLE|BYTEOP|BYTETAIL;
+			}
+			if(l->right->type == UCHAR && r->right->type == UCHAR){
+				r->flags |= BYTEABLE|BYTEOP;
+				l->flags |= BYTEABLE|BYTEOP;
+				r->type = UCHAR;
+				l->type = UCHAR;
+				return BYTEABLE|BYTEOP|BYTETAIL;
+			}
+		}
 		if (cast_lr(n))
 			return BYTEABLE | BYTETAIL;
 		return BYTETAIL;
--- ../Fuzix-Compiler-Kit/be-codegen-6800.c	2024-11-13 00:32:04
+++ be-codegen-6800.c	2024-11-16 11:29:00
@@ -31,6 +31,7 @@
 #include "compiler.h"
 #include "backend.h"
 #include "backend-6800.h"
+#include "backend-byte.h"
 
 
 /*
@@ -74,6 +75,7 @@
    propagation */
 struct node *gen_rewrite(struct node *n)
 {
+	byte_label_tree(n, BTF_RELABEL);
 	return n;
 }
--- ../Fuzix-Compiler-Kit/be-code-6800.c	2024-11-13 00:32:04
+++ be-code-6800.c	2024-11-16 11:26:02
@@ -5,6 +5,7 @@
 #include "compiler.h"
 #include "backend.h"
 #include "backend-6800.h"
+#include "backend-byte.h"
 
 unsigned label;		/* Used to hand out local labels of the form X%u */
 
@@ -987,22 +988,23 @@
 	return load_r_with('u', r, off);
 }
 
 unsigned cmp_direct(struct node *n, const char *uop, const char *op)
 {
 	register struct node *r = n->right;
 	register unsigned v = r->value;
 	register unsigned s = get_size(r->type);
 
-	if (r->op != T_CONSTANT)
-		return 0;
-	if (r->type & UNSIGNED)
-		op = uop;

 	/* If we knoww the upper half matches - eg a byte var that has been expanded then
 	   shortcut it. Need to think about this for signed math. I think we can do signed
 	   math if we use uop in the size 2 upper match case : TODO*/
-	if (s == 1 || (op == uop && a_valid && (v >> 8) == a_val)) {
-		printf("\tcmpb #%u\n\t%s %s\n", v & 0xFF, jsr_op, op);
+	if (s==1 && (n->flags&BYTEOP)){
+		op8_on_node(r, "cmp", 0);
+		printf("\t%s %s\n",jsr_op,(r->type==UCHAR)?uop:op);
 		n->flags |= ISBOOL;
 		invalidate_b();
 		return 1;
@@ -1011,6 +1013,7 @@
 		/* If we know the value of A (eg from a cast) */
 		if (cpu_has_d) {
 			printf("\tsubd #%u\n\t%s %s\n", v & 0xFFFF, jsr_op, op);
+			printf("\t%s %s\n",jsr_op,(r->type&UNSIGNED)?uop:op);
 			n->flags |= ISBOOL;
 			invalidate_work();
 			return 1;

Simple branches that do not require values ​​are optimized using a peep hole by taking advantage of the fact that they have the shape Lxx_y.
Similar operations should be possible with && and ||, but I have not tried them yet.

--- ../Fuzix-Compiler-Kit/rules.6800	2024-11-01 00:40:37
+++ rules.6800	2024-11-16 11:35:39
@@ -62,3 +62,53 @@
 =
 	jsr __lplus2
 
+
+	jsr boollt
+;
+	jeq %2_%3
+=
+;	optimized by rule
+	jge %2_%3
+
+	jsr boolult
+;
+	jeq %2_%3
+=
+;	optimized by rule
+	jcc %2_%3
+
+	jsr boolule
+;
+	jeq %2_%3
+=
+;	optimized by rule
+	jhi %2_%3
+
+
+	jsr booleq
+;
+	jeq %2_%3
+=
+;	optimized by rule
+	jne	%2_%3
+
+	jsr boolne
+;
+	jeq %2_%3
+=
+;	optimized by rule
+	jeq %2_%3
+
+	ldb %1
+	cmpb #<0
+=
+	ldb %1
+
+	pshb
+	psha
+	ldb %1
+	clra
+	jsr __plus
+=
+	addb %1
+	adca #0

@zu2
Copy link
Author

zu2 commented Nov 16, 2024

one more.

--- ../Fuzix-Compiler-Kit/be-codegen-6800.c	2024-11-13 00:32:04
+++ be-codegen-6800.c	2024-11-16 13:57:20
@@ -503,7 +552,7 @@
 				return 1;
 			}
 		}
-		if (s == 2 && r->op == T_LOCAL) {
+		if (s == 2 && (r->op == T_LOCAL || r->op == T_ARGUMENT)) {
 			v = r->value;
 			if (!cpu_is_09)
 				v++;
@@ -515,6 +564,11 @@
 			add_d_const(v);
 			return 1;
 		}
+		if (s == 2 && r->op == T_CAST && (r->type&~UNSIGNED) == CSHORT && r->right->type == UCHAR) {
+			op8_on_node(r->right, "add", 0);
+			puts("\tadca #0");
+			return 1;
+		}
 		return write_opd(r, "add", "adc", 0);
 	case T_MINUS:
 		if (r->op == T_CONSTANT && r->type != FLOAT) {
@@ -529,6 +583,11 @@
 				b_val -= r->value;
 				return 1;
 			}
+		}
+		if (s == 2 && r->op == T_CAST && (r->type&~UNSIGNED) == CSHORT && r->right->type == UCHAR) {
+			op8_on_node(r->right, "sub", 0);
+			puts("\tsbca #0");
+			return 1;
 		}
 		return write_opd(r, "sub", "sbc", 0);
 	case T_AND:

sample:

int
sub(signed char a1, unsigned char a2, signed char a3, unsigned char a4)
{
	return	a1+a2-a3-a4;
}

int main(int argc, char *argv[])
{

  return sub(1,2,3,4)!=-4;
}
;:rewritten:
;T_MINUS v0 t18 f0  USHORT
;    T_MINUS v0 t18 f0  USHORT
;        T_PLUS v0 t18 f0  USHORT
;            T_CAST v0 t18 f100  USHORT
;                2003 v2 t0 f800 BYTETAIL, CCHAR        a1
;            T_CAST v0 t18 f100  USHORT
;                2003 v3 t8 f800 BYTETAIL, UCHAR        a2
;        T_CAST v0 t18 f100  USHORT
;            2003 v4 t0 f800 BYTETAIL, CCHAR    a3
;    T_CAST v0 t18 f100  USHORT
;        2003 v5 t8 f800 BYTETAIL, UCHAR        a4
;make local ptr off 2, rlim 255 noff 2
        tsx
        ldb 2,x
        jsr __castc_u
;make local ptr off 3, rlim 255 noff 3
        tsx
        addb 3,x
        adca #0
        pshb
        psha
;make local ptr off 4, rlim 255 noff 4
        ldb 4,x
        jsr __castc_u
        jsr __minus
;make local ptr off 5, rlim 255 noff 5
        tsx
        subb 5,x
        sbca #0

@EtchedPixels EtchedPixels added the enhancement New feature or request label Nov 16, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
enhancement New feature or request
Projects
None yet
Development

No branches or pull requests

2 participants