diff --git a/awk.h b/awk.h index 49b5dfc..afcd7c6 100644 --- a/awk.h +++ b/awk.h @@ -163,6 +163,12 @@ typedef struct Node { struct Node *narg[1]; /* variable: actual size set by calling malloc */ } Node; +typedef struct Function { /* user-defined function */ + size_t nargs; /* number of defined arguments */ + char **argnames; /* the names of those arguments */ + Node *body; /* the function's parse tree */ +} Function; + #define NIL ((Node *) 0) extern Node *winner; diff --git a/parse.c b/parse.c index 14608be..85392a5 100644 --- a/parse.c +++ b/parse.c @@ -191,9 +191,7 @@ Node *makearr(Node *p) if (isfcn(cp)) SYNTAX( "%s is a function, not an array", cp->nval ); else if (!isarr(cp)) { - xfree(cp->sval); - cp->sval = (char *) makesymtab(NSYMTAB); - cp->tval = ARR; + toarray(cp); } } return p; @@ -232,8 +230,9 @@ Node *linkum(Node *a, Node *b) void defn(Cell *v, Node *vl, Node *st) /* turn on FCN bit in definition, */ { /* body of function, arglist */ - Node *p; - int n; + Node *p, *temp; + Function *f; + size_t n; if (isarr(v)) { SYNTAX( "`%s' is an array name and a function name", v->nval ); @@ -244,13 +243,45 @@ void defn(Cell *v, Node *vl, Node *st) /* turn on FCN bit in definition, */ return; } - v->tval = FCN; - v->sval = (char *) st; + f = (Function *) malloc(sizeof(Function)); + if (f == NULL) + FATAL("out of memory in defn for Function"); + f->body = st; + n = 0; /* count arguments */ for (p = vl; p; p = p->nnext) n++; - v->fval = n; - DPRINTF("defining func %s (%d args)\n", v->nval, n); + f->nargs = n; + + DPRINTF("defining func %s (%zu args)\n", v->nval, f->nargs); + + if (f->nargs > 0) { + /* overflow can't happen, the arglist is bigger */ + f->argnames = (char **) malloc(n * sizeof(char *)); + if (f->argnames == NULL) + FATAL("out of memory in defn for argnames"); + + /* + * Keep a private copy of each argument's name. + * + * Free the arglist nodes, but not their cells. The + * cells belong to the global symbol table and might + * be needed for a variable of the same name. + */ + n = 0; + for (p = vl; p; p = temp) { + DPRINTF("arg %zu is \"%s\"\n", n, ((Cell *)p->narg[0])->nval); + f->argnames[n++] = tostring(((Cell *)p->narg[0])->nval); + temp = p->nnext; + free(p); + } + } else + f->argnames = NULL; + + if (freeable(v)) + free(v->sval); + v->sval = (char *) f; + v->tval = FCN; } int isarg(const char *s) /* is s in argument list for current function? */ diff --git a/proto.h b/proto.h index cb4988e..7911635 100644 --- a/proto.h +++ b/proto.h @@ -96,7 +96,9 @@ extern Node *itonp(int); extern void syminit(void); extern void arginit(int, char **); extern void envinit(char **); +extern void toarray(Cell *); extern Array *makesymtab(int); +extern void clearsymtab(Cell *); extern void freesymtab(Cell *); extern void freeelem(Cell *, const char *); extern Cell *setsymtab(const char *, const char *, double, unsigned int, Array *); @@ -154,7 +156,8 @@ extern void run(Node *); extern Cell *execute(Node *); extern Cell *program(Node **, int); extern Cell *call(Node **, int); -extern Cell *copycell(Cell *); +extern Cell *copycell(Cell *, char *); +extern void updateargs(Cell *); extern Cell *arg(Node **, int); extern Cell *jump(Node **, int); extern Cell *awkgetline(Node **, int); diff --git a/run.c b/run.c index 3eef774..6834025 100644 --- a/run.c +++ b/run.c @@ -228,11 +228,10 @@ struct Frame *frp = NULL; /* frame pointer. bottom level unused */ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ { - static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; int i, ncall, ndef; - int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */ + Function *f; Node *x; - Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */ + Cell *args[NARGS]; /* BUG: fixed size arrays */ Cell *y, *z, *fcn; char *s; @@ -240,6 +239,7 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ s = fcn->nval; if (!isfcn(fcn)) FATAL("calling undefined function %s", s); + f = (Function *) fcn->sval; if (frame == NULL) { frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame)); if (frame == NULL) @@ -247,30 +247,29 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ } for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */ ncall++; - ndef = (int) fcn->fval; /* args in defn */ + if (f->nargs > INT_MAX) + FATAL("function %s nargs out of range", s); + ndef = f->nargs; /* args in defn */ DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame)); if (ncall > ndef) WARNING("function %s called with %d args, uses only %d", s, ncall, ndef); - if (ncall + ndef > NARGS) - FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS); + if (ndef > NARGS) + FATAL("function %s has %d defined arguments, limit %d", s, ndef, NARGS); for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */ DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame)); y = execute(x); - oargs[i] = y; DPRINTF("args[%d]: %s %f <%s>, t=%o\n", i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval); if (isfcn(y)) FATAL("can't use function %s as argument in %s", y->nval, s); - if (isarr(y)) - args[i] = y; /* arrays by ref */ - else - args[i] = copycell(y); + if (i < ndef) { + args[i] = copycell(y, f->argnames[i]); + } tempfree(y); } for ( ; i < ndef; i++) { /* add null args for ones not provided */ - args[i] = gettemp(); - *args[i] = newcopycell; + args[i] = copycell(NULL, f->argnames[i]); } frp++; /* now ok to up frame */ if (frp >= frame + nframe) { @@ -286,64 +285,151 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ frp->retval = gettemp(); DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame)); - y = execute((Node *)(fcn->sval)); /* execute body */ + y = execute(f->body); /* execute body */ DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame)); + tempfree(fcn); + /* + * If y is ... + * + * ret -> jump() has already set the return value. + * exit -> Set return value. + * next -> Set return value. + * copy -> To be visited while freeing args. + * temp -> Useless. Return it to the templist. + * + * Ignore everything else. + */ + + if (isexit(y) || isnext(y)) { + tempfree(frp->retval); + frp->retval = y; + } else { + tempfree(y); + } + + /* free args */ for (i = 0; i < ndef; i++) { - Cell *t = frp->args[i]; - if (isarr(t)) { - if (t->csub == CCOPY) { - if (i >= ncall) { - freesymtab(t); - t->csub = CTEMP; - tempfree(t); - } else { - oargs[i]->tval = t->tval; - oargs[i]->tval &= ~(STR|NUM|DONTFREE); - oargs[i]->sval = t->sval; - tempfree(t); - } - } - } else if (t != y) { /* kludge to prevent freeing twice */ - t->csub = CTEMP; - tempfree(t); - } else if (t == y && t->csub == CCOPY) { - t->csub = CTEMP; - tempfree(t); - freed = 1; + y = frp->args[i]; + if (isarr(y)) { + if (y->cnext == NULL) + freesymtab(y); + else if (y->cnext->sval != y->sval) + FATAL("can't happen: symbol table moved %s", y->cnext->nval); } + y->csub = CTEMP; + tempfree(y); } - tempfree(fcn); - if (isexit(y) || isnext(y)) - return y; - if (freed == 0) { - tempfree(y); /* don't free twice! */ - } - z = frp->retval; /* return value */ + + z = frp->retval; /* return value */ DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval); frp--; return(z); } -Cell *copycell(Cell *x) /* make a copy of a cell in a temp */ +Cell *copycell(Cell *x, char *name) /* make a copy of a cell in a temp */ { Cell *y; - /* copy is not constant or field */ - y = gettemp(); + y->nval = name; + y->csub = CCOPY; + + /* default to the uninitialized value */ + if (x == NULL) + return y; + + /* copy is not constant or field */ y->tval = x->tval & ~(CON|FLD|REC); - y->csub = CCOPY; /* prevents freeing until call is over */ - y->nval = x->nval; /* BUG? */ - if (isstr(x) /* || x->ctype == OCELL */) { + if (isstr(x)) { y->sval = tostring(x->sval); y->tval &= ~DONTFREE; - } else + } else { + y->sval = x->sval; /* by ref */ y->tval |= DONTFREE; + } y->fval = x->fval; + y->fmt = x->fmt; + + /* + * When a tempcell leaves the templist, cnext can be repurposed. + * Arguments use it to maintain a direct link to their origin. + * If an argument that should have been passed by reference was + * passed by value, updateargs() uses these links to determine + * which cells need updating. + * + * An origin is either a global variable, an array element, or a + * function argument without an origin. + * + * A missing value or a tempcell produces an argument without an + * origin. If passed along, it becomes an origin. + * + * If an argument has an origin, it can't become an origin. When + * passed along, its origin becomes the receiver's origin. + * + * Intermediaries between an argument and its origin all point to + * the same origin. + */ + if (x == NULL || istemp(x)) { + y->cnext = NULL; + } else if (x->csub == CCOPY && x->cnext != NULL) { + y->cnext = x->cnext; + } else { + y->cnext = x; + } return y; } +void updateargs(Cell *a) /* convert by-value to by-reference */ +{ + struct Frame *f; + Cell *o; + int i; + + /* + * The argument, a, was an uninitialized function argument that + * has become an array. It was passed by-value but should have + * been passed by-reference. To compensate for the mistake, we + * must update a's origin, o, and every other descendant of o, + * so that they share a symbol table. + * + * We start at the top of the stack and work our way down. + * If we encounter the origin on the stack, the scan concludes + * because all of its descendants are in higher frames that + * have already been visited. Otherwise, we continue until + * we've inspected every argument. + */ + + #define UPDATE_CELL(d) \ + if (freeable((d))) \ + xfree((d)->sval) \ + (d)->sval = a->sval; \ + (d)->tval = a->tval; + + o = a->cnext; + DPRINTF("updateargs triggered by %p for %p\n", (void *) a, (void *) o); + if (o == NULL) /* nothing to update */ + return; + + UPDATE_CELL(o); /* may not be on the stack */ + for (f = frp; f > frame; --f) { + DPRINTF("frame %td\n", f-frame); + for (i = 0; i < f->nargs; i++) { + if (f->args[i] == o) + return; + if (f->args[i]->cnext == o) { + if (f->args[i] == a) { + DPRINTF("\ttrigger arg %d\n", i); + continue; + } + UPDATE_CELL(f->args[i]); + DPRINTF("\tupdating arg %d\n", i); + } + } + } + #undef UPDATE_CELL +} + Cell *arg(Node **a, int n) /* nth argument of a function */ { @@ -516,12 +602,7 @@ Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ x = execute(a[0]); /* Cell* for symbol table */ buf = makearraystring(a[1], __func__); if (!isarr(x)) { - DPRINTF("making %s into an array\n", NN(x->nval)); - if (freeable(x)) - xfree(x->sval); - x->tval &= ~(STR|NUM|DONTFREE); - x->tval |= ARR; - x->sval = (char *) makesymtab(NSYMTAB); + toarray(x); } z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval); z->ctype = OCELL; @@ -542,10 +623,7 @@ Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts * if (!isarr(x)) return True; if (a[1] == NULL) { /* delete the elements, not the table */ - freesymtab(x); - x->tval &= ~STR; - x->tval |= ARR; - x->sval = (char *) makesymtab(NSYMTAB); + clearsymtab(x); } else { char *buf = makearraystring(a[1], __func__); freeelem(x, buf); @@ -1690,12 +1768,11 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ } sep = *fs; ap = execute(a[1]); /* array name */ -/* BUG 7/26/22: this appears not to reset array: see C1/asplit */ - freesymtab(ap); + if (isarr(ap)) + clearsymtab(ap); + else + toarray(ap); DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs); - ap->tval &= ~STR; - ap->tval |= ARR; - ap->sval = (char *) makesymtab(NSYMTAB); n = 0; if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) { diff --git a/testdir/T.func b/testdir/T.func index deb0b40..d1ad85a 100755 --- a/testdir/T.func +++ b/testdir/T.func @@ -194,3 +194,64 @@ $awk ' function foo() { i = 0 } BEGIN { x = foo(); printf "<%s> %d\n", x, x }' >foo2 diff foo1 foo2 || echo 'BAD: T.func (fall off end)' + +$awk ' +function f(x, y) { + x = "x" + y[0] = "y" + print x, y[0] +} +function g(z) { + print z[0] +} +BEGIN { + f(a, a) + g(a) +}' 2>foo1 >/dev/null +ret=$? +if [ $ret -eq 0 ] || ! grep -q 'can.t read value of .* an array' foo1; then + echo "BAD: T.func (simultaneously scalar and array: $ret)" +fi + +$awk ' +function f(f1, f2, f3) { + f1[0] = "*" + print "f: " f1[0], f2[0], f3[0] +} +function g(g1, g2, g3) { + f(g1, g1, g1) + print "g: " g1[0], g2[0], g3[0] +} +function h(h1, h2, h3) { + g(h1, h2, h3) + print "h: " h1[0], h2[0], h3[0] +} +function i(i1, i2, i3) { + print "i: " i1[0], i2[0], i3[0] +} +BEGIN { + OFS = "-" + f(a, a, a) + g(b, b, b) + h(c, c, c) + i(a, b, c) +}' >foo2 +cat <foo1 +f: *-*-* +f: *-*-* +g: *-*-* +f: *-*-* +g: *-*-* +h: *-*-* +i: *-*-* +! +diff foo1 foo2 || echo 'BAD: T.func (cousin coherency)' + +$awk ' +function f(x) { x[0]="updated"; exit } +function g(y) { print y[0] } +BEGIN { f(a) } +END { g(a) } +' >foo2 +echo 'updated' >foo1 +diff foo1 foo2 || echo 'BAD: T.func (exit skips arg conversion updates)' diff --git a/testdir/T.misc b/testdir/T.misc index 1e5c3c5..949883f 100755 --- a/testdir/T.misc +++ b/testdir/T.misc @@ -133,7 +133,7 @@ function bug2(arg) { bot[arg]=arg; } ' 2>foo -grep "can.t assign to foo" foo >/dev/null || echo 1>&2 "BAD: T.misc foo bug" +grep "can.t assign to .* array" foo >/dev/null || echo 1>&2 "BAD: T.misc foo bug" # This should be a syntax error diff --git a/tran.c b/tran.c index 482eede..7abd31f 100644 --- a/tran.c +++ b/tran.c @@ -153,6 +153,29 @@ void envinit(char **envp) /* set up ENVIRON variable */ } } +void toarray(Cell *ap) /* convert from an uninitialized scalar */ +{ + if (isarr(ap)) + return; + + DPRINTF("making %s into an array\n", NN(ap->nval)); + if (ap->sval[0] != '\0' || ap->fval != 0) + FATAL("can't happen: toarray scalar must be uninitialized"); + + if (freeable(ap)) + xfree(ap->sval); + ap->tval &= ~(STR|NUM|DONTFREE); + ap->tval |= ARR; + ap->sval = (char *) makesymtab(NSYMTAB); + + /* + * If this is a function argument, it was passed by value but + * should've been passed by reference. Correction required. + */ + if (ap->csub == CCOPY) + updateargs(ap); +} + Array *makesymtab(int n) /* make a new symbol table */ { Array *ap; @@ -168,17 +191,18 @@ Array *makesymtab(int n) /* make a new symbol table */ return(ap); } -void freesymtab(Cell *ap) /* free a symbol table */ +void clearsymtab(Cell *ap) { Cell *cp, *temp; Array *tp; int i; if (!isarr(ap)) - return; + FATAL("can't happen: clearsymtab for non-array"); + tp = (Array *) ap->sval; if (tp == NULL) - return; + FATAL("can't happen: array has no symbol table %s", ap->nval); for (i = 0; i < tp->size; i++) { for (cp = tp->tab[i]; cp != NULL; cp = temp) { xfree(cp->nval); @@ -191,9 +215,14 @@ void freesymtab(Cell *ap) /* free a symbol table */ tp->tab[i] = NULL; } if (tp->nelem != 0) - WARNING("can't happen: inconsistent element count freeing %s", ap->nval); - free(tp->tab); - free(tp); + FATAL("can't happen: inconsistent element count freeing %s", ap->nval); +} + +void freesymtab(Cell *ap) /* free a symbol table */ +{ + clearsymtab(ap); + free(((Array *)ap->sval)->tab); + free(ap->sval); } void freeelem(Cell *ap, const char *s) /* free elem s from ap (i.e., ap["s"] */