diff --git a/jsrc/ar.c b/jsrc/ar.c index 61718723..e590d437 100644 --- a/jsrc/ar.c +++ b/jsrc/ar.c @@ -435,13 +435,13 @@ static DF1(jtreducesp){A a,g,z;B b;I f,n,r,*v,wn,wr,*ws,wt,zt;P*wp; else DQ(m, x=*u++; y=*u++; *zv++=F(x,y); ); \ } #define BTABIFX(F) {btab[0 ]=0 F 0; \ - btab[C_LE?256: 1]=0 F 1; \ - btab[C_LE? 1:256]=1 F 0; \ + btab[256]=0 F 1; \ + btab[ 1]=1 F 0; \ btab[257 ]=1 F 1; \ } #define BTABPFX(F) {btab[0 ]=F(0,0); \ - btab[C_LE?256: 1]=F(0,1); \ - btab[C_LE? 1:256]=F(1,0); \ + btab[256]=F(0,1); \ + btab[ 1]=F(1,0); \ btab[257 ]=F(1,1); \ } #define BR2CASE(t,id) ((id)+256*(t)) diff --git a/jsrc/dtoa.c b/jsrc/dtoa.c index 4f772f93..04941fec 100644 --- a/jsrc/dtoa.c +++ b/jsrc/dtoa.c @@ -169,11 +169,7 @@ /* Options for use with J */ #include "js.h" #define Long int -#if C_LE #define IEEE_8087 -#else -#define IEEE_MC68k -#endif #define MULTIPLE_THREADS #define ACQUIRE_DTOA_LOCK(n) /* handled by using jt */ #define FREE_DTOA_LOCK(n) /* handled by using jt */ diff --git a/jsrc/j.h b/jsrc/j.h index e08868be..2ac25521 100644 --- a/jsrc/j.h +++ b/jsrc/j.h @@ -271,12 +271,10 @@ static inline omp_int_t omp_get_max_threads() { return 1;} #define XNAN "\000\000\000\000\000\000\370\177" #endif -#if C_LE #ifndef XINF #define XINF "\000\000\000\000\000\000\360\177" #define XNAN "\000\000\000\000\000\000\370\377" #endif -#endif #ifndef XINF #define XINF "\177\360\000\000\000\000\000\000" @@ -921,15 +919,10 @@ static inline __attribute__((inline)) float64x2_t vec_and_pd(float64x2_t a, floa #define NUMMAX 9 // largest number represented in num[] #define NUMMIN (~NUMMAX) // smallest number represented in num[] // Given SZI B01s read into p, pack the bits into the MSBs of p and clear the lower bits of p -#if C_LE // if anybody makes a bigendian CPU we'll have to recode #if BW==64 // this is what it should be #define PACKBITS(p) {p|=p>>7LL;p|=p>>14LL;p|=p>>28LL;p<<=56LL;} #define PACKBITS(p) {p|=p>>7LL;p|=p>>14LL;p|=p<<28LL;p&=0xff0000000; p<<=28LL;} // this generates one extra instruction, rather than the 3 for the correct version #define PACKBITSINTO(p,out) {p|=p>>7LL;p|=p>>14LL;out=((p|(p>>28LL))<<56)|(out>>SZI);} // pack and shift into out -#else -#define PACKBITS(p) {p|=p>>7LL;p|=p>>14LL;p<<=28LL;} -#define PACKBITSINTO(p,out) {p|=p>>7LL;p|=p>>14LL;out=(p<<28)|(out>>SZI);} // pack and shift into out -#endif #endif #define PRISTCOMSET(w,flg) awback=(w); if(unlikely((flg&AFVIRTUAL)!=0)){awback=ABACK(awback); flg=AFLAG(awback);} AFLAG(awback)=flg&~AFPRISTINE; #define PRISTCOMSETF(w,flg) if(unlikely((flg&AFVIRTUAL)!=0)){w=ABACK(w); flg=AFLAG(w);} AFLAG(w)=flg&~AFPRISTINE; // used only at end, when w can be destroyed @@ -1057,10 +1050,8 @@ if(likely(z<3)){_zzt+=z; z=(I)&oneone; _zzt=_i&3?_zzt:(I*)z; z=_i&2?(I)_zzt:z; z #else #define REPLBYTETOW(in,out) (out=(UC)(in),out|=out<<8,out|=out<<16) #endif -#if C_LE // Output is pointer, Input is I/UI, count is # bytes to NOT store to output pointer (0-7). #define STOREBYTES(out,in,n) {*(UI*)(out) = (*(UI*)(out)&~((UI)~(I)0 >> ((n)<<3))) | ((in)&((UI)~(I)0 >> ((n)<<3)));} -#endif // Input is the name of word of bytes. Result is modified name, 1 bit per input byte, spaced like B01s, with the bit 0 iff the corresponding input byte was all 0. Non-boolean bits of result are garbage. #define ZBYTESTOZBITS(b) (b=b|((b|(~b+VALIDBOOLEAN))>>7)) // for each byte: zero if b0 off, b7 off, and b7 turns on when you subtract 1 or 2 // to verify gah conversion #define RETF(exp) { A retfff=(exp); if ((retfff) && ((AT(retfff)&SPARSE && AN(retfff)!=1) || (AT(retfff)&DENSE && AN(retfff)!=prod(AR(retfff),AS(retfff)))))SEGFAULT;; R retfff; } // scaf @@ -1099,7 +1090,6 @@ if(likely(z<3)){_zzt+=z; z=(I)&oneone; _zzt=_i&3?_zzt:(I*)z; z=_i&2?(I)_zzt:z; z #define VAL2 '\002' #define WITHDEBUGOFF(stmt) {UC d=jt->uflags.us.cx.cx_c.db; jt->uflags.us.cx.cx_c.db=0; stmt jt->uflags.us.cx.cx_c.db=d;} // execute stmt with debug turned off -#if C_LE #if BW==64 #define IHALF0 0x00000000ffffffffLL #else @@ -1125,39 +1115,9 @@ if(likely(z<3)){_zzt+=z; z=(I)&oneone; _zzt=_i&3?_zzt:(I*)z; z=_i&2?(I)_zzt:z; z #define BS01 0x0100 #define BS10 0x0001 #define BS11 0x0101 -#else -#if BW==64 -#define IHALF0 0xffffffff00000000LL -#else -#define IHALF0 0xffff0000 -#endif -#define B0000 0x00000000 -#define B0001 0x00000001 -#define B0010 0x00000100 -#define B0011 0x00000101 -#define B0100 0x00010000 -#define B0101 0x00010001 -#define B0110 0x00010100 -#define B0111 0x00010101 -#define B1000 0x01000000 -#define B1001 0x01000001 -#define B1010 0x01000100 -#define B1011 0x01000101 -#define B1100 0x01010000 -#define B1101 0x01010001 -#define B1110 0x01010100 -#define B1111 0x01010101 -#define BS00 0x0000 -#define BS01 0x0001 -#define BS10 0x0100 -#define BS11 0x0101 -#endif - - #define CACHELINESIZE 64 // size of processor cache line, in case we align to it - // flags in call to cachedmmult and blockedmmult #define FLGCMPX 0 #define FLGCMP ((I)1<>(INTX-1)))) // the first (presumably only) value in w, when w is an INT or B01 type -#endif /* Types for AT(x) field of type A */ /* Note: BOOL name conflict with ???; SCHAR name conflict with sqltypes.h */ diff --git a/jsrc/k.c b/jsrc/k.c index bd042988..5d0ac09d 100644 --- a/jsrc/k.c +++ b/jsrc/k.c @@ -158,7 +158,7 @@ static KF1(jtQfromX){X*v=XAV(w),*x=(X*)yv; DQ(AN(w), *x++=*v++; *x++=iv1;); R 1; static KF2(jtQfromD){B neg,recip;D c,d,t,*wv;I e,i,n,*v;Q q,*x;S*tv; if(!(w))R 0; - n=AN(w); wv=DAV(w); x=(Q*)yv; tv=3*C_LE+(S*)&t; + n=AN(w); wv=DAV(w); x=(Q*)yv; tv=3+(S*)&t; for(i=0;i>LGSZI; GATV0(t,INT,tn,1); tc=UAV(t); ti=(UI*)tc; // Run g in batches of up to 255, accumulating the result bytewise. NOTE: there may be garbage at the end of yv, but because // we are supporting littleendian only, it will not affect the result - for(j=nn;0>LGSZI; r1=n&(SZI-1); \ - if (!AR(a)){ \ - ASSIGNX(av); \ +#define SUMB(f,T0,T1,F) \ + static F2(f){I an,*av,n,p,r1,wn,*wv,z=0;UI t,x; \ + an=AN(a); av=AV(a); \ + wn=AN(w); wv=AV(w); n=1; n=AR(a)?an:n; n=AR(w)?wn:n; \ + p=n>>LGSZI; r1=n&(SZI-1); \ + if (!AR(a)){ \ + ASSIGNX(av); \ while((p-=255)>0){t=0; DQ(255, t+=F(x, *wv++);); ADDBYTESINI(t); z+=t;} \ - t=0; DQ(p+255, t+=F(x, *wv++);); ADDBYTESINI(t); z+=t; x=F(x, *wv); \ - }else if(!AR(w)){ \ - ASSIGNX(wv); \ + t=0; DQ(p+255, t+=F(x, *wv++);); ADDBYTESINI(t); z+=t; x=F(x, *wv); \ + }else if(!AR(w)){ \ + ASSIGNX(wv); \ while((p-=255)>0){t=0; DQ(255, t+=F(*av++,x );); ADDBYTESINI(t); z+=t;} \ - t=0; DQ(p+255, t+=F(*av++,x );); ADDBYTESINI(t); z+=t; x=F(*av,x ); \ - }else{ \ + t=0; DQ(p+255, t+=F(*av++,x );); ADDBYTESINI(t); z+=t; x=F(*av,x ); \ + }else{ \ while((p-=255)>0){t=0; DQ(255, t+=F(*av++,*wv++);); ADDBYTESINI(t); z+=t;} \ - t=0; DQ(p+255, t+=F(*av++,*wv++);); ADDBYTESINI(t); z+=t; x=F(*av,*wv); \ - } \ - x &= ((I)1<<(r1<>(LGSZI/C_LE); r=((n-1)&(SZI-1))+1; // there is always a remnant + u=AV(z); q=(n-1)>>(LGSZI); r=((n-1)&(SZI-1))+1; // there is always a remnant I mask=mod==2?VALIDBOOLEAN:0; // if mod is 1, all results will be 0; otherwise boolean result DQ(q, DQ(SZI, m=(m>>8)+((UI)*v<<((SZI-1)*8)); v+=SZI;); *u++=m&mask;) DQ(r, m=(m>>8)+((UI)*v<<((SZI-1)*8)); v+=SZI;); // 1-8 bytes diff --git a/jsrc/vg.c b/jsrc/vg.c index fb650be8..9fe88d3e 100644 --- a/jsrc/vg.c +++ b/jsrc/vg.c @@ -662,7 +662,7 @@ static GF(jtgrc){A x;B b,q,up;I e,i,p,ps,*xv,yv[256];UC*vv,*wv; if((UI)ai>lgn)R grx(m,ai,n,w,zv); // TUNE ai<<=((AT(w)>>C2TX)&1); p=B01&AT(w)?2:256; ps=p*SZI; wv=UAV(w); up=SGNTO0(jt->workareas.compare.complt); - q=C2T&AT(w) && C_LE; + q=C2T&AT(w) && 1; if(1>8;} -#else -#define IND2(x) {US xx = (x); ii = 0x3&((xx>>7)|xx);} -#endif // Convert 4 Booleans to a code 0-15 -#if C_LE #define IND4(x) {UINT xx = (x); xx|=xx<<9; xx|=xx<<18; ii = xx>>24;} // first byte (bit 0) is the MSB when a word is loaded -#else -#define IND4(x) {UINT xx = (x); ii = 0xf&((xx>>21)|(xx>>14)|(xx>>7)|xx);} -#endif // endian constants for 16-bit radix sorts -#define FPLSBWDX (C_LE?0:3) -#define FPMSBWDX (C_LE?3:0) -#define INTLSBWDX (C_LE?0:(SZI/2-1)) -#define INTMSBWDX (C_LE?(SZI/2-1):0) -#define WDINC (C_LE?1:-1) +#define FPLSBWDX (0) +#define FPMSBWDX (3) +#define INTLSBWDX (0) +#define INTMSBWDX (SZI/2-1) +#define WDINC (1) diff --git a/jsrc/vgranking.c b/jsrc/vgranking.c index b846f95a..fe5f0da7 100644 --- a/jsrc/vgranking.c +++ b/jsrc/vgranking.c @@ -118,16 +118,12 @@ F1(jtranking){A y,z;C*wv;I icn,i,k,m,n,t,wcr,wf,wn,wr,*ws,wt,*zv;CR rng;TTYPE *y case sizeof(C4): RANKINGLOOP(C4); break; case sizeof(C): RANKINGLOOP(UC); break; -#if C_LE case sizeof(S): if(wt&IS1BYTE){I c,d,s,t;US*v;TTYPE *u; v=(US*)wv; DQ(n, ++yu[*v++];); s=0; DO(256, c=0; d=i; DQ(256, u=yv+(c+d); c+=256; if(*u){t=*u; *u=(TTYPE)s; s+=t;});); v=(US*)wv; DQ(n, *zv++=yu[*v++]++;); }else RANKINGLOOP(US); -#else - case sizeof(S): RANKINGLOOP(US); -#endif } wv+=n*k; } diff --git a/jsrc/vgsort.c b/jsrc/vgsort.c index c42c2ee3..9a447173 100644 --- a/jsrc/vgsort.c +++ b/jsrc/vgsort.c @@ -214,7 +214,7 @@ static SF(jtsortc2){A y,z;B up;I i,p,*yv;US j,k,*wv,*v; DO(p, yv[i]=0;); for(i=0;ihiv; DQ(jt->hin, HASHSTEP(z,v[*u++]); ); R z;} -#if C_LE UI hic2( I k,UC*v){UI HASHINIT(z); DQ(k>>1, HASHSTEP(z,v[0]); if(*(v+1)){HASHSTEP(z,v[1]);} v+=2;); R z;} -#else - UI hic2( I k,UC*v){UI HASHINIT(z); ++v; DQ(k>>1, HASHSTEP(z,v[0]); - if(*(v-1)){HASHSTEP(z,v[-1]);} v+=2;); R z;} -#endif -#if C_LE UI hic4( I k,UC*v){UI HASHINIT(z); DQ(k>>2, HASHSTEP(z,v[0]); if(*(v+2)||*(v+3)){HASHSTEP(z,v[1]); HASHSTEP(z,v[2]); HASHSTEP(z,v[3]);} else if(*(v+1)){HASHSTEP(z,v[1]);} v+=4;); R z;} -#else - UI hic4( I k,UC*v){UI HASHINIT(z); v+=3; DQ(k>>2, HASHSTEP(z,v[0]); - if(*(v-2)||*(v-3)){HASHSTEP(z,v[-1]); - HASHSTEP(z,v[-2]); - HASHSTEP(z,v[-3]);} - else if(*(v-1)){HASHSTEP(z,v[-1]);} v+=4;); R z;} -#endif - // Hash a single unsigned INT #define hicw(v) (10495464745870458733U**(UI*)(v)) diff --git a/jsrc/viix.c b/jsrc/viix.c index 6c854e10..3728c878 100644 --- a/jsrc/viix.c +++ b/jsrc/viix.c @@ -142,11 +142,7 @@ F2(jticap2){A*av,*wv,z;C*uu,*vv;I ar,*as,at,b,c,ck,cm,ge,gt,j,k,m,n,p,q,r,t,wr,* case TT(B01X, FLX ): BSLOOP(C, D ); break; case TT(LITX, C2TX ): BSLOOP(UC,US); break; case TT(LITX, C4TX ): BSLOOP(UC,C4); break; -#if C_LE case TT(LITX, LITX ): BSLOOP(UC,UC); break; -#else - case TT(LITX, LITX ): if(1&c){BSLOOP(UC,UC); break;}else c>>=1; /* fall thru */ -#endif case TT(C2TX, C2TX ): BSLOOP(US,US); break; case TT(C2TX, C4TX ): BSLOOP(US,C4); break; case TT(C2TX, LITX ): BSLOOP(US,UC); break; diff --git a/jsrc/vu.c b/jsrc/vu.c index eac5ca86..ade2ae21 100644 --- a/jsrc/vu.c +++ b/jsrc/vu.c @@ -18,7 +18,6 @@ A jttoc1(J jt,B h,A w){A z;C*wv,*zv;I n;C4*w4; w4=C4AV(w); ASSERT(!n||(C2T+C4T)&AT(w),EVDOMAIN); // must be empty or unicode GATV(z,LIT,n,AR(w),AS(w)); zv=CAV(z); // allocate ASCII area with same data shape -#if C_LE if(C2T&AT(w)) { if(h)DQ(n, *zv++=*wv++; wv++;) else DQ(n, *zv++=*wv++; ASSERT(!*wv++,EVDOMAIN);) @@ -29,16 +28,6 @@ A jttoc1(J jt,B h,A w){A z;C*wv,*zv;I n;C4*w4; } // copy the low byte of the data (if there is any). if b==0, verify high byte is 0 // where low and high are depends on endianness -#else - if(C2T&AT(w)) - { - if(h)DQ(n, wv++; *zv++=*wv++;) else DQ(n, ASSERT(!*wv++,EVDOMAIN); *zv++=*wv++;) - } - else - { - if(h)DQ(n, *zv++=(UC)*w4++; ) else DQ(n, *zv++=(UC)*w4++; ASSERT(*(w4-1)<256UL,EVDOMAIN);) - } -#endif RETF(z); } /* convert 2-byte or 4-byte chars to 1-byte chars; 0==h iff high order byte(s) must be 0 */ @@ -48,7 +37,6 @@ static F1(jttoc2){A z;C*wv,*zv;I n;C4*w4;US*z2; n=AN(w); wv=CAV(w); w4=C4AV(w); ASSERT(!n||(LIT+C4T)&AT(w),EVDOMAIN); GATV(z,C2T,n,AR(w),AS(w)); zv=CAV(z); z2=USAV(z); -#if C_LE if(LIT&AT(w)) { DQ(n, *zv++=*wv++; *zv++=0;); @@ -57,16 +45,6 @@ static F1(jttoc2){A z;C*wv,*zv;I n;C4*w4;US*z2; { DQ(n, *z2++=(US)*w4++;); } -#else - if(LIT&AT(w)) - { - DQ(n, *zv++=0; *zv++=*wv++;); - } - else - { - DQ(n, *z2++=(US)*w4++;); - } -#endif R z; } /* convert 1-byte chars or 4-byte chars(discard high order half) to 2-byte chars */ diff --git a/jsrc/x15.c b/jsrc/x15.c index f1740e6e..163da94f 100644 --- a/jsrc/x15.c +++ b/jsrc/x15.c @@ -909,11 +909,7 @@ static B jtcdexec1(J jt,CCT*cc,C*zv0,C*wu,I wk,I wt,I wd){A*wv=(A*)wu,x,y,*zv;B #endif #if defined(__PPC64__) /* +1 put the float in low bits in dv, but dd has to be D */ -#if C_LE *dv=0; *(((float*)dv++))=(float)(dd[dcnt++]=*(D*)xv); -#else - *dv=0; *(((float*)dv++)+1)=(float)(dd[dcnt++]=*(D*)xv); -#endif /* *dv=0; *(((float*)dv++)+1)=dd[dcnt++]=(float)*(D*)xv; */ #elif defined(__aarch64__) {f=(float)*(D*)xv; dd[dcnt]=0; *(float*)(dd+dcnt++)=f; diff --git a/jsrc/xb.c b/jsrc/xb.c index bf4036b9..2221f482 100644 --- a/jsrc/xb.c +++ b/jsrc/xb.c @@ -56,7 +56,7 @@ F2(jtnouninfo2){A z; #define BR(d,a) ((C*)(a)+(3LL<