Skip to content

Commit f3f5210

Browse files
committed
Latest additions from BWK.
1 parent 66a543e commit f3f5210

File tree

6 files changed

+71
-8
lines changed

6 files changed

+71
-8
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,5 @@ proctab.c
55
ytab*
66
testdir/foo*
77
testdir/temp*
8+
*.pdf
9+
*.mail

awk.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,8 @@ extern int pairstack[], paircnt;
225225

226226
/* structures used by regular expression matching machinery, mostly b.c: */
227227

228-
#define NCHARS (256+3) /* 256 handles 8-bit chars; 128 does 7-bit */
228+
#define NCHARS (1256+3) /* 256 handles 8-bit chars; 128 does 7-bit */
229+
/* BUG: some overflows (caught) if we use 256 */
229230
/* watch out in match(), etc. */
230231
#define HAT (NCHARS+2) /* matches ^ in regular expr */
231232
#define NSTATES 32

b.c

100644100755
+7-2
Original file line numberDiff line numberDiff line change
@@ -441,8 +441,13 @@ int *cclenter(const char *argp) /* add a character class */
441441
continue;
442442
}
443443
}
444-
if (!adjbuf((char **) &buf, &bufsz, bp-buf+8, 100, (char **) &bp, "cclenter2"))
445-
FATAL("out of space for character class [%.10s...] 3", p);
444+
if (i >= bufsz) {
445+
bufsz *= 2;
446+
buf = (int *) realloc(buf, bufsz * sizeof(int));
447+
if (buf == NULL)
448+
FATAL("out of space for character class [%.10s...] 2", p);
449+
bp = buf + i;
450+
}
446451
*bp++ = c;
447452
i++;
448453
}

lib.c

100644100755
+58-4
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,9 @@ void setclvar(char *s) /* set var=value from s */
301301
Cell *q;
302302
double result;
303303

304+
/* commit f3d9187d4e0f02294fb1b0e31152070506314e67 broke T.argv test */
305+
/* I don't understand why it was changed. */
306+
304307
for (p=s; *p != '='; p++)
305308
;
306309
e = p;
@@ -324,7 +327,7 @@ void fldbld(void) /* create fields from current record */
324327
/* possibly with a final trailing \0 not associated with any field */
325328
char *r, *fr, sep;
326329
Cell *p;
327-
int i, j, n;
330+
int i, j, n, quote;
328331

329332
if (donefld)
330333
return;
@@ -363,6 +366,57 @@ void fldbld(void) /* create fields from current record */
363366
*fr++ = 0;
364367
}
365368
*fr = 0;
369+
} else if ((sep = *inputFS) == ',') { /* CSV: handle quotes, \x, etc. */
370+
for (i = 0; *r != '\0'; ) {
371+
i++;
372+
if (i > nfields)
373+
growfldtab(i);
374+
if (freeable(fldtab[i]))
375+
xfree(fldtab[i]->sval);
376+
fldtab[i]->sval = fr;
377+
fldtab[i]->tval = FLD | STR | DONTFREE;
378+
379+
/* printf("fldbld 1 [%s] [%d:] [%s]\n", r, i, fr); */
380+
381+
if (*r == '"' /* || *r == '\'' */ ) { /* "..."; do not include '...' */
382+
quote = *r++;
383+
for ( ; *r != '\0'; ) {
384+
/* printf("fldbld 2 [%s]\n", r); */
385+
if (*r == quote && r[1] != '\0' && r[1] == quote) {
386+
r += 2; /* doubled quote */
387+
*fr++ = quote;
388+
} else if (*r == '\\') { /* BUG: off end? */
389+
r++; /* backslashes inside "..." ??? */
390+
*fr++ = *r++;
391+
} else if (*r == quote && (r[1] == '\0' || r[1] == ',')) {
392+
r++;
393+
if (*r == ',')
394+
r++;
395+
break;
396+
} else {
397+
*fr++ = *r++;
398+
}
399+
}
400+
*fr++ = 0;
401+
continue;
402+
}
403+
404+
/* unquoted field */
405+
for ( ; *r != '\0'; ) {
406+
if (*r == ',') { /* bare comma ends field */
407+
r++;
408+
*fr++ = 0;
409+
break;
410+
} else if (*r == '\\') { /* BUG: could walk off end */
411+
r++;
412+
*fr++ = *r++;
413+
} else {
414+
*fr++ = *r++;
415+
}
416+
}
417+
*fr++ = 0;
418+
}
419+
*fr = 0;
366420
} else if ((sep = *inputFS) == 0) { /* new: FS="" => 1 char/field */
367421
for (i = 0; *r != '\0'; r += n) {
368422
char buf[MB_LEN_MAX + 1];
@@ -797,11 +851,11 @@ bool is_valid_number(const char *s, bool trailing_stuff_ok,
797851
while (isspace(*s))
798852
s++;
799853

800-
// no hex floating point, sorry
854+
/* no hex floating point, sorry */
801855
if (s[0] == '0' && tolower(s[1]) == 'x')
802856
return false;
803857

804-
// allow +nan, -nan, +inf, -inf, any other letter, no
858+
/* allow +nan, -nan, +inf, -inf, any other letter, no */
805859
if (s[0] == '+' || s[0] == '-') {
806860
is_nan = (strncasecmp(s+1, "nan", 3) == 0);
807861
is_inf = (strncasecmp(s+1, "inf", 3) == 0);
@@ -835,7 +889,7 @@ bool is_valid_number(const char *s, bool trailing_stuff_ok,
835889
if (no_trailing != NULL)
836890
*no_trailing = (*ep == '\0');
837891

838-
// return true if found the end, or trailing stuff is allowed
892+
/* return true if found the end, or trailing stuff is allowed */
839893
retval = *ep == '\0' || trailing_stuff_ok;
840894

841895
return retval;

main.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
2222
THIS SOFTWARE.
2323
****************************************************************/
2424

25-
const char *version = "version 20220530";
25+
const char *version = "version 20220818";
2626

2727
#define DEBUG
2828
#include <stdio.h>

run.c

100644100755
+1
Original file line numberDiff line numberDiff line change
@@ -1454,6 +1454,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
14541454
FATAL("illegal type of split");
14551455
sep = *fs;
14561456
ap = execute(a[1]); /* array name */
1457+
/* BUG 7/26/22: this appears not to reset array: see C1/asplit */
14571458
freesymtab(ap);
14581459
DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs);
14591460
ap->tval &= ~STR;

0 commit comments

Comments
 (0)