Add long long literals like 123LL to ACK C.

For now, a long long literal must have the 'LL' or 'll' suffix. A literal without 'LL' or 'll' acts as before: it may become unsigned long but not long long. (For targets where int and long have the same size, some literals change from unsigned int to unsigned long.) Type `arith` may be too narrow for long long values. Add a second type `writh` for wide arithmetic, and change some variables from arith to writh. This may cause bugs if I forget to use writh, or if a conversion from writh to arith overflows. I mark some conversions with (arith) or (writh) casts. - BigPars, SmallPars: Remove SPECIAL_ARITHMETICS. This feature would change arith to a different type, but can't work, because it would conflict with definitions of arith in both <em_arith.h> and <flt_arith.h>. - LLlex.c: Understand 'LL' or 'll' suffix. Cut size of constant when it overflows writh, not only when it overflows the target machine's types. (This cut might not be necessary, because we might cut it again later.) When picking signed long or unsigned long, check the target's long type, not the compiler's arith type; the old check for `val >= 0` was broken where sizeof(arith) > 4. - LLlex.h: Change struct token's tok_ival to writh, so it can hold a long long literal. - arith.c: Adjust to VL_VALUE being writh. Don't convert between float and integer at compile-time if the integer might be too wide for <flt_arith.h>. Add writh2str(), because writh might be too wide for long2str(). - arith.h: Remove SPECIAL_ARITHMETICS. Declare full_mask[] here, not in several *.c files. Declare writh2str(). - ch3.c, ch3bin.c, ch3mon.c, declarator.c, statement.g: Remove obsolete casts. Adjust to VL_VALUE being writh. - conversion.c, stab.c: Don't declare full_mask[]. - cstoper.c: Use writh for constant operations on VL_VALUE, and for full_mask[]. - declar., field.c, ival.g: Add casts. - dumpidf.c: Need to #include "parameters.h" before checking DEBUG. Use writh2str, because "%ld" might not work. - eval.c, eval.h: Add casts. Use writh when writing a wide constant in EM. - expr.c: Add and remove casts. In fill_int_expr(), make expression from long long literal. In chk_cst_expr(), allow long long as constant expression, so the compiler may accept `case 123LL:` in a switch statement. - expr.str: Change struct value's vl_value and struct expr's VL_VALUE to writh, so an expression may have a long long value at compile time. - statement.g: Remove obsolete casts. - switch.c, switch.str: Use writh in case entries for switch statements, so `switch (ll) {...}` with long long ll works. - tokenname.c: Add ULNGLNG so LLlex.c can use it for literals.
davidgiven · Sep 5, 2019 · 15950f9 · 15950f9
1 parent 007a63d
commit 15950f9
Show file tree

Hide file tree

Showing 26 changed files with 254 additions and 176 deletions.
diff --git a/lang/cem/cemcom.ansi/BigPars b/lang/cem/cemcom.ansi/BigPars
@@ -117,11 +117,6 @@
 /*#define NOBITFIELD	1	*//* if NOT defined, implement bitfields	*/
 
 
-!File: spec_arith.h
-/* describes internal compiler arithmetics */
-#undef	SPECIAL_ARITHMETICS	/* something different from native long */
-
-
 !File: static.h
 #define GSTATIC			/* for large global "static" arrays */
 

diff --git a/lang/cem/cemcom.ansi/LLlex.c b/lang/cem/cemcom.ansi/LLlex.c
@@ -17,6 +17,7 @@
 #include "Lpars.h"
 #include "class.h"
 #include "sizes.h"
+#include "type.h"     /* no_long_long() */
 #include "error.h"
 #include "domacro.h"
 #include "specials.h" /* registration of special identifiers */
@@ -37,7 +38,6 @@ int LexSave = 0; /* last character read by GetChar	*/
 
 #define FLG_ESEEN 0x01 /* possibly a floating point number */
 #define FLG_DOTSEEN 0x02 /* certainly a floating point number */
-extern arith full_mask[];
 
 #ifdef LINT
 extern int lint_skip_comment;
@@ -594,10 +594,12 @@ static void strflt2tok(char fltbuf[], struct token* ptok)
 static void strint2tok(char intbuf[], struct token* ptok)
 {
 	register char* cp = intbuf;
-	int base = 10;
-	arith val = 0, dig, ubound;
-	int uns_flg = 0, lng_flg = 0, malformed = 0, ovfl = 0;
-	int fund;
+	int base = 10, dig;
+	unsigned writh val = 0, ubound;
+	int uns_flg = 0, lng_flg = 0, lnglng_flg = 0;
+	int malformed = 0, ovfl = 0;
+	unsigned writh uint_mask, ulng_mask, ulnglng_mask;
+	int cut, fund;
 
 	assert(*cp != '-');
 	if (*cp == '0')
@@ -611,11 +613,8 @@ static void strint2tok(char intbuf[], struct token* ptok)
 		else
 			base = 8;
 	}
-	/* The upperbound will be the same as when computed with
-	 * max_unsigned_arith / base (since base is even). The problem here
-	 * is that unsigned arith is not accepted by all compilers.
-	 */
-	ubound = max_arith / (base / 2);
+	/* The upperbound checks if val * base would overflow. */
+	ubound = ~(unsigned writh)0 / base;
 
 	while (is_hex(*cp))
 	{
@@ -626,10 +625,10 @@ static void strint2tok(char intbuf[], struct token* ptok)
 		}
 		else
 		{
-			if (val < 0 || val > ubound)
+			if (val > ubound)
 				ovfl++;
 			val *= base;
-			if (val < 0 && val + dig >= 0)
+			if (val > val + dig)
 				ovfl++;
 			val += dig;
 		}
@@ -639,7 +638,16 @@ static void strint2tok(char intbuf[], struct token* ptok)
 	while (*cp)
 	{
 		if (*cp == 'l' || *cp == 'L')
-			lng_flg++;
+		{
+			if (*cp == *(cp + 1))
+			{
+				/* 'll' or 'LL' */
+				lnglng_flg++;
+				cp++;
+			}
+			else
+				lng_flg++;
+		}
 		else if (*cp == 'u' || *cp == 'U')
 			uns_flg++;
 		else
@@ -658,59 +666,93 @@ static void strint2tok(char intbuf[], struct token* ptok)
 	}
 	else
 	{
-		if (lng_flg > 1)
+		if (lng_flg + lnglng_flg > 1)
 			lexerror("only one long suffix allowed");
 		if (uns_flg > 1)
 			lexerror("only one unsigned suffix allowed");
 	}
+
+	/* Get masks like 0XFFFF, 0XFFFFFFFF as unsigned values. */
+	uint_mask = (unsigned writh)full_mask[(int)int_size];
+	ulng_mask = (unsigned writh)full_mask[(int)long_size];
+	if (lnglng_size < 0)
+		ulnglng_mask = 0;
+	else
+		ulnglng_mask = (unsigned writh)full_mask[(int)lnglng_size];
+
+	/*	If a decimal literal with no suffix is too big for int
+	    and long, then C89 tries unsigned long, but C99 tries
+	    long long (WG14, Rationale for C99, C99RationaleV5.10.pdf,
+	    6.4.4.1 Integer constants).
+		This compiler follows C89 when the literal has no
+	    long long suffix.
+	*/
+	cut = 0;
 	if (ovfl)
 	{
 		lexwarning("overflow in constant");
-		fund = ULONG;
+		cut = 1; /* cut the size of the constant */
 	}
-	else if (!lng_flg && (val & full_mask[(int)int_size]) == val)
+	else if (!lng_flg && !lnglng_flg && (val & uint_mask) == val)
 	{
-		if (val >= 0 && val <= max_int)
-		{
+		if ((val & (uint_mask >> 1)) == val)
 			fund = INT;
-		}
-		else if (int_size == long_size)
+		else if (base == 10 && !uns_flg)
 		{
-			fund = UNSIGNED;
+			if ((val & (ulng_mask >> 1)) == val)
+				fund = LONG;
+			else
+				fund = ULONG;
 		}
-		else if (base == 10 && !uns_flg)
-			fund = LONG;
 		else
 			fund = UNSIGNED;
 	}
-	else if ((val & full_mask[(int)long_size]) == val)
+	else if (!lnglng_flg && (val & ulng_mask) == val)
 	{
-		if (val >= 0)
+		if ((val & (ulng_mask >> 1)) == val)
 			fund = LONG;
 		else
 			fund = ULONG;
 	}
+	else if (lnglng_flg && (val & ulnglng_mask) == val)
+	{
+		if ((val & (ulnglng_mask >> 1)) == val)
+			fund = LNGLNG;
+		else
+			fund = ULNGLNG;
+	}
+	else if (lnglng_flg && no_long_long())
+		fund = ERRONEOUS;
 	else
-	{ /* sizeof(arith) is greater than long_size */
-		assert(arith_size > long_size);
+	{
+		assert(sizeof(val) > long_size ||
+		       (lnglng_size >= 0 && sizeof(val) > lnglng_size));
 		lexwarning("constant too large for target machine");
-		/* cut the size to prevent further complaints */
-		val &= full_mask[(int)long_size];
-		fund = ULONG;
+		cut = 1;
 	}
-	if (lng_flg)
+	if (cut)
 	{
-		/* fund can't be INT */
-		if (fund == UNSIGNED)
+		/* cut the size to prevent further complaints */
+		if (lnglng_flg)
+		{
+			fund = ULNGLNG;
+			val &= ulnglng_mask;
+		}
+		else
+		{
 			fund = ULONG;
+			val &= ulng_mask;
+		}
 	}
 	if (uns_flg)
 	{
 		if (fund == INT)
 			fund = UNSIGNED;
 		else if (fund == LONG)
 			fund = ULONG;
+		else if (fund == LNGLNG)
+			fund = ULNGLNG;
 	}
 	ptok->tk_fund = fund;
-	ptok->tk_ival = val;
+	ptok->tk_ival = (writh)val;
 }
diff --git a/lang/cem/cemcom.ansi/LLlex.h b/lang/cem/cemcom.ansi/LLlex.h
@@ -26,7 +26,7 @@ struct token	{
 			char *tok_bts;	/* row of bytes */
 			int tok_len;	/* length of row of bytes */
 		} tok_string;
-		arith tok_ival;		/* for INTEGER */
+		writh tok_ival;		/* for INTEGER */
 		char *tok_fval;		/* for FLOATING */
 	} tok_data;
 };

diff --git a/lang/cem/cemcom.ansi/SmallPars b/lang/cem/cemcom.ansi/SmallPars
@@ -117,11 +117,6 @@
 /*#define NOBITFIELD	1	/* if NOT defined, implement bitfields	*/
 
 
-!File: spec_arith.h
-/* describes internal compiler arithmetics */
-#undef	SPECIAL_ARITHMETICS	/* something different from native long */
-
-
 !File: static.h
 #define GSTATIC			/* for large global "static" arrays */
 

diff --git a/lang/cem/cemcom.ansi/arith.c b/lang/cem/cemcom.ansi/arith.c
@@ -371,8 +371,7 @@ int int2int(struct expr **expp, register struct type *tp)
 					unsigned int x = ~0;
 					unsigned int y = -1;
 			*/
-			extern long full_mask[];
-			long remainder = exp->VL_VALUE &
+			writh remainder = exp->VL_VALUE &
 						~full_mask[(int)(tp->tp_size)];
 
 			if (remainder == 0 ||
@@ -389,6 +388,16 @@ int int2int(struct expr **expp, register struct type *tp)
 	return exp->ex_type->tp_fund;
 }
 
+static int fit4(writh val, int uns)
+{
+	/* Does this value fit in 4 bytes? */
+	unsigned writh u = (unsigned writh)val;
+
+	if (!uns)
+		u += 0x80000000UL;
+	return (u & full_mask[4]) == u;
+}
+
 /* With compile-time constants, we don't set fp_used, since this is done
  * only when necessary in eval.c.
  */
@@ -400,10 +409,10 @@ void int2float(register struct expr **expp, struct type *tp)
 	register struct expr *exp = *expp;
 	int uns = exp->ex_type->tp_unsigned;
 
-	if (is_cp_cst(exp)) {
+	if (is_cp_cst(exp) && fit4(exp->VL_VALUE, uns)) {
 		exp->ex_type = tp;
 		exp->ex_class = Float;
-		flt_arith2flt(exp->VL_VALUE, &(exp->FL_ARITH), uns);
+		flt_arith2flt((arith)exp->VL_VALUE, &(exp->FL_ARITH), uns);
 	}
 	else	{
 		fp_used = 1;
@@ -417,24 +426,35 @@ void float2int(struct expr **expp, struct type *tp)
 		converted to the integral type tp.
 	*/
 	register struct expr *ex = *expp;
-	
+
 	if (is_fp_cst(ex)) {
 		arith ar = flt_flt2arith(&ex->FL_ARITH, tp->tp_unsigned);
-
+#ifdef NOTDEF
+		/*	Historically, we always did the conversion at
+		    compile time.   This is now wrong if type arith is
+		    too narrow for an 8-byte integer.
+		*/
 		if (flt_status == FLT_OVFL)
 			expr_warning(ex,"overflow in float to int conversion");
 		else if (flt_status == FLT_UNFL)
 			expr_warning(ex,"underflow in float to unsigned conversion");
-		ex->ex_type = tp;
-		/* The following lines are copied from fill_int_expr */
-		ex->ex_class = Value;
-		ex->VL_CLASS = Const;
-		ex->VL_VALUE = ar;
-		cut_size(ex);
-	} else {
-		fp_used = 1;
-		*expp = arith2arith(tp, FLOAT2INT, ex);
+#endif /* NOTDEF */
+		/*	Now, we defer the conversion until run time
+		    unless it fits in 4 bytes.
+		*/
+		if (flt_status != FLT_OVFL && flt_status != FLT_UNFL &&
+		    fit4((writh)ar, tp->tp_unsigned)) {
+			ex->ex_type = tp;
+			/* The following lines are copied from fill_int_expr */
+			ex->ex_class = Value;
+			ex->VL_CLASS = Const;
+			ex->VL_VALUE = (writh)ar;
+			cut_size(ex);
+			return;
+		}
 	}
+	fp_used = 1;
+	*expp = arith2arith(tp, FLOAT2INT, ex);
 }
 
 void float2float(register struct expr **expp, struct type *tp)
@@ -640,3 +660,25 @@ void switch_sign_fp(register struct expr *expr)
 {
 	flt_umin(&(expr->FL_ARITH));
 }
+
+char *writh2str(writh val, int uns)
+{
+	/*	Converts val to a decimal string, like
+		long2str(val, 10), but allows wider values.
+	*/
+	static char buf[NUMSIZE + 1];
+	char *cp = &buf[NUMSIZE + 1];
+	int negative = (!uns && val < 0);
+	unsigned writh u = (unsigned writh)val;
+
+	if (negative)
+		u = -u;
+	*--cp = '\0';
+	do {
+		*--cp = '0' + (u % 10);
+		u /= 10;
+	} while (u != 0);
+	if (negative)
+		*--cp = '-';
+	return cp;
+}
diff --git a/lang/cem/cemcom.ansi/arith.h b/lang/cem/cemcom.ansi/arith.h
@@ -5,30 +5,25 @@
 /* $Id$ */
 /* COMPILER ARITHMETIC */
 
-/*	Normally the compiler does its internal arithmetics in longs
-	native to the source machine, which is always good for local
-	compilations, and generally OK too for cross compilations
-	downwards and sidewards.  For upwards cross compilation and
-	to save storage on small machines, SPECIAL_ARITHMETICS will
-	be handy.
+/*	The compiler uses 2 types, arith and writh, for its internal
+	arithmetic.  Type arith is normally long, and may be too
+	narrow for long long values.  We can't change arith to a wider
+	type, because both <em_arith.h> (pulled by <em.h>) and
+	<flt_arith.h> define arith.
+
+	Type writh (wide arithmetic) is for values that might not fit
+	in arith.  Normally writh is the long long native to the
+	source machine, which is always good for local compilations,
+	and generally OK too for cross compilations downwards and
+	sidewards.
 */
 #ifndef ARITH_H_
 #define ARITH_H_
 
-#include	"parameters.h"
-
-#ifndef	SPECIAL_ARITHMETICS
-
 #include    <em_arith.h>		/* obtain definition of "arith"	*/
-#include	<flt_arith.h>
-
-#else	/* SPECIAL_ARITHMETICS */
 
-/*	All preprocessor arithmetic should be done in longs.
-*/
-#define	arith	long				/* dummy */
-
-#endif	/* SPECIAL_ARITHMETICS */
+#define	writh		long long
+/* The compiler also uses "unsigned writh". */
 
 struct expr;
 struct type;
@@ -37,6 +32,8 @@ struct type;
 #define	arith_sign	((arith) 1 << (arith_size * 8 - 1))
 #define	max_arith	(~arith_sign)
 
+extern writh full_mask[];		/* cstoper.c */
+
 void arithbalance(register struct expr **e1p, int oper, register struct expr **e2p);
 void relbalance(register struct expr **e1p, int oper, register struct expr **e2p);
 void ch3pointer(struct expr **expp, int oper, register struct type *tp);
@@ -57,5 +54,6 @@ void any2opnd(register struct expr **expp, int oper);
 void any2parameter(register struct expr **expp);
 void field2arith(register struct expr **expp);
 void switch_sign_fp(register struct expr *expr);
+char *writh2str(writh val, int uns);
 
 #endif /* ARITH_H_ */