From 29c0668eaabc486250435530f7dc9a959a5f7d32 Mon Sep 17 00:00:00 2001 From: Miguel Pineiro Jr Date: Fri, 22 Sep 2023 21:34:32 -0400 Subject: [PATCH 1/4] Discard excess function arguments It's impossible for a node in a user-defined function to reference arguments beyond those which are defined, so don't store them in the arg lists. Evaluate them for side-effects and discard. This change also plugs a tempcell leak in args[i] for i >= ndef. ===============> cat leak.sh #/bin/sh # Monitor awk's memory consumption while repeatedly calling a # user-defined function with a couple of extra arguments. ${1:-./a.out} ' function f(x) { x } BEGIN { mem = "ps -p $PPID -o rsz=" system(mem) while (++i <= 100000) { f(i, i, i) if (i % 10000 == 0) system(mem) } } ' 2>/dev/null ===============> paste <(./leak.sh ./master.out) <(./leak.sh ./a.out) 2432 2304 3456 2304 4608 2304 5632 2304 6656 2304 7680 2304 8832 2304 9856 2304 11008 2304 12160 2304 13312 2304 --- run.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/run.c b/run.c index 3eef774..a1cc488 100644 --- a/run.c +++ b/run.c @@ -257,15 +257,17 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */ DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame)); y = execute(x); - oargs[i] = y; DPRINTF("args[%d]: %s %f <%s>, t=%o\n", i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval); if (isfcn(y)) FATAL("can't use function %s as argument in %s", y->nval, s); - if (isarr(y)) - args[i] = y; /* arrays by ref */ - else - args[i] = copycell(y); + if (i < ndef) { + oargs[i] = y; + if (isarr(y)) + args[i] = y; /* arrays by ref */ + else + args[i] = copycell(y); + } tempfree(y); } for ( ; i < ndef; i++) { /* add null args for ones not provided */ From ab16a769cbad31756d33a345f7f7430de35a56f1 Mon Sep 17 00:00:00 2001 From: Miguel Pineiro Jr Date: Fri, 22 Sep 2023 23:54:56 -0400 Subject: [PATCH 2/4] Enable functions to actually use NARGS arguments When checking if a function call exceeds the NARGS maximum argument count, arguments for defined parameters were counted twice, reducing the call capacity in half. ===============> cat nargs.awk # NARGS is currently 50 function args25(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, p22, p23, p24, p25) { print "25 arguments" } function args26(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, p22, p23, p24, p25, p26) { print "26 arguments" } function args50(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, p22, p23, p24, p25, p26, p27, p28, p29, p30, p31, p32, p33, p34, p35, p36, p37, p38, p39, p40, p41, p42, p43, p44, p45, p46, p47, p48, p49, p50) { print "50 arguments" } function args51(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, p22, p23, p24, p25, p26, p27, p28, p29, p30, p31, p32, p33, p34, p35, p36, p37, p38, p39, p40, p41, p42, p43, p44, p45, p46, p47, p48, p49, p50, p51) { print "51 arguments" } BEGIN { args25(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, a24, a25) args26(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, a24, a25, a26) args50(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, a29, a30, a31, a32, a33, a34, a35, a36, a37, a38, a39, a40, a41, a42, a43, a44, a45, a46, a47, a48, a49, a50) # Call args50 with 51 arguments. The 51st does not correspond # to a defined parameter, so it doesn't count against NARGS # (since excess args are not stored in the argument lists). args50(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, a29, a30, a31, a32, a33, a34, a35, a36, a37, a38, a39, a40, a41, a42, a43, a44, a45, a46, a47, a48, a49, a50, a51) args51(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, a29, a30, a31, a32, a33, a34, a35, a36, a37, a38, a39, a40, a41, a42, a43, a44, a45, a46, a47, a48, a49, a50, a51) } ===============>$ ./master.out -f nargs.awk 25 arguments ./master.out: function args26 has 52 arguments, limit 50 source line number 38 ===============>$ ./a.out -f nargs.awk 25 arguments 26 arguments 50 arguments ./a.out: function args50 called with 51 args, uses only 50 source line number 53 50 arguments ./a.out: function args51 has 51 defined arguments, limit 50 source line number 59 --- run.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/run.c b/run.c index a1cc488..72bf61d 100644 --- a/run.c +++ b/run.c @@ -252,8 +252,8 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ if (ncall > ndef) WARNING("function %s called with %d args, uses only %d", s, ncall, ndef); - if (ncall + ndef > NARGS) - FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS); + if (ndef > NARGS) + FATAL("function %s has %d defined arguments, limit %d", s, ndef, NARGS); for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */ DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame)); y = execute(x); From 13cc85e85f04713a73f5940711aaecea3e6950bf Mon Sep 17 00:00:00 2001 From: Miguel Pineiro Jr Date: Mon, 25 Sep 2023 09:22:30 -0400 Subject: [PATCH 3/4] Immediately propagate the conversion of an arg There are at least two reasons for why the propagation of the conversion of an uninitialized argument into an array cannot wait until the function has finished. The first is that otherwise any arguments in the active frame that share the origin of the converted argument will persist for the life of the function as independent copies of the uninitialized value instead of referring to the new array. The second is that the exit statement's longjmp entirely skips the function epilogue (where the conversion had been handled), opening the door to incorrect values in END blocks. Instead of waiting for the epilogue, toarray immediately initiates a scan of the stack frames to update those arguments that have the same origin as the converted argument. The argument list is now strictly of a single cell subtype, CCOPY. It had been allowed to contain a mix of subtypes (CCOPY and CVAR). This restriction simplifies the logic of the epilogue significantly. The oargs list is no more. CCOPY cells use cnext to record their origin (oargs recorded the parent, but the origin may be a more distant ancestor). Passing by reference had been done by setting the argument to the address of the array's cell. This is not allowed under the new regime. Instead it uses a pointer to the array's symbol table. Which means that the table must be immobilzed or the stack frames must be scanned upon relocation. Enter clearsymtab to empty arrays without moving their symtabs. This commit also fixes several other defects, among them a symbol table leak (in the epilogue, unaware of each other, undetected siblings would assign their private symbol tables to the same parent), the use-after-free discovered by @millert in #157 (apologies for the delay in getting this out), and the following use-after-free from a dangling name pointer (manifests only when debugging is enabled): ----->$ cat dangling-nval.sh awk=${1:-nawk} valgrind --leak-check=full $awk -d ' function f(x) { g() print x } function g() { delete a["hi"] } BEGIN { a["hi"] = "world" f(a["hi"]) }' ----->$ For the moment, function arguments are anonymous. This change forced a small modification to a test in T.misc. --- parse.c | 4 +- proto.h | 3 + run.c | 201 ++++++++++++++++++++++++++++++++++--------------- testdir/T.func | 61 +++++++++++++++ testdir/T.misc | 2 +- tran.c | 41 ++++++++-- 6 files changed, 243 insertions(+), 69 deletions(-) diff --git a/parse.c b/parse.c index 14608be..0944e89 100644 --- a/parse.c +++ b/parse.c @@ -191,9 +191,7 @@ Node *makearr(Node *p) if (isfcn(cp)) SYNTAX( "%s is a function, not an array", cp->nval ); else if (!isarr(cp)) { - xfree(cp->sval); - cp->sval = (char *) makesymtab(NSYMTAB); - cp->tval = ARR; + toarray(cp); } } return p; diff --git a/proto.h b/proto.h index cb4988e..97698e8 100644 --- a/proto.h +++ b/proto.h @@ -96,7 +96,9 @@ extern Node *itonp(int); extern void syminit(void); extern void arginit(int, char **); extern void envinit(char **); +extern void toarray(Cell *); extern Array *makesymtab(int); +extern void clearsymtab(Cell *); extern void freesymtab(Cell *); extern void freeelem(Cell *, const char *); extern Cell *setsymtab(const char *, const char *, double, unsigned int, Array *); @@ -155,6 +157,7 @@ extern Cell *execute(Node *); extern Cell *program(Node **, int); extern Cell *call(Node **, int); extern Cell *copycell(Cell *); +extern void updateargs(Cell *); extern Cell *arg(Node **, int); extern Cell *jump(Node **, int); extern Cell *awkgetline(Node **, int); diff --git a/run.c b/run.c index 72bf61d..efe7bad 100644 --- a/run.c +++ b/run.c @@ -228,11 +228,9 @@ struct Frame *frp = NULL; /* frame pointer. bottom level unused */ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ { - static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; int i, ncall, ndef; - int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */ Node *x; - Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */ + Cell *args[NARGS]; /* BUG: fixed size arrays */ Cell *y, *z, *fcn; char *s; @@ -262,17 +260,12 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ if (isfcn(y)) FATAL("can't use function %s as argument in %s", y->nval, s); if (i < ndef) { - oargs[i] = y; - if (isarr(y)) - args[i] = y; /* arrays by ref */ - else - args[i] = copycell(y); + args[i] = copycell(y); } tempfree(y); } for ( ; i < ndef; i++) { /* add null args for ones not provided */ - args[i] = gettemp(); - *args[i] = newcopycell; + args[i] = copycell(NULL); } frp++; /* now ok to up frame */ if (frp >= frame + nframe) { @@ -290,38 +283,41 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame)); y = execute((Node *)(fcn->sval)); /* execute body */ DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame)); + tempfree(fcn); + /* + * If y is ... + * + * ret -> jump() has already set the return value. + * exit -> Set return value. + * next -> Set return value. + * copy -> To be visited while freeing args. + * temp -> Useless. Return it to the templist. + * + * Ignore everything else. + */ + + if (isexit(y) || isnext(y)) { + tempfree(frp->retval); + frp->retval = y; + } else { + tempfree(y); + } + + /* free args */ for (i = 0; i < ndef; i++) { - Cell *t = frp->args[i]; - if (isarr(t)) { - if (t->csub == CCOPY) { - if (i >= ncall) { - freesymtab(t); - t->csub = CTEMP; - tempfree(t); - } else { - oargs[i]->tval = t->tval; - oargs[i]->tval &= ~(STR|NUM|DONTFREE); - oargs[i]->sval = t->sval; - tempfree(t); - } - } - } else if (t != y) { /* kludge to prevent freeing twice */ - t->csub = CTEMP; - tempfree(t); - } else if (t == y && t->csub == CCOPY) { - t->csub = CTEMP; - tempfree(t); - freed = 1; + y = frp->args[i]; + if (isarr(y)) { + if (y->cnext == NULL) + freesymtab(y); + else if (y->cnext->sval != y->sval) + FATAL("can't happen: symbol table moved %s", y->cnext->nval); } + y->csub = CTEMP; + tempfree(y); } - tempfree(fcn); - if (isexit(y) || isnext(y)) - return y; - if (freed == 0) { - tempfree(y); /* don't free twice! */ - } - z = frp->retval; /* return value */ + + z = frp->retval; /* return value */ DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval); frp--; return(z); @@ -331,21 +327,117 @@ Cell *copycell(Cell *x) /* make a copy of a cell in a temp */ { Cell *y; - /* copy is not constant or field */ - y = gettemp(); + y->csub = CCOPY; + + /* default to the uninitialized value */ + if (x == NULL) + return y; + + /* copy is not constant or field */ y->tval = x->tval & ~(CON|FLD|REC); - y->csub = CCOPY; /* prevents freeing until call is over */ - y->nval = x->nval; /* BUG? */ - if (isstr(x) /* || x->ctype == OCELL */) { + + /* + * What should we name this argument? The parser does not preserve + * the names of parameters, so we don't know what the source calls + * it. Is it worth the effort to change that? + * + * Note: Simply pointing to the incoming cell's name (as had been + * done for a long time) creates a dangling pointer if that cell + * is deleted (think array element). + * + * For now, args are anonymous. Debugging can use frame/arg indices. + */ + y->nval = NULL; + if (isstr(x)) { y->sval = tostring(x->sval); y->tval &= ~DONTFREE; - } else + } else { + y->sval = x->sval; /* by ref */ y->tval |= DONTFREE; + } y->fval = x->fval; + y->fmt = x->fmt; + + /* + * When a tempcell leaves the templist, cnext can be repurposed. + * Arguments use it to maintain a direct link to their origin. + * If an argument that should have been passed by reference was + * passed by value, updateargs() uses these links to determine + * which cells need updating. + * + * An origin is either a global variable, an array element, or a + * function argument without an origin. + * + * A missing value or a tempcell produces an argument without an + * origin. If passed along, it becomes an origin. + * + * If an argument has an origin, it can't become an origin. When + * passed along, its origin becomes the receiver's origin. + * + * Intermediaries between an argument and its origin all point to + * the same origin. + */ + if (x == NULL || istemp(x)) { + y->cnext = NULL; + } else if (x->csub == CCOPY && x->cnext != NULL) { + y->cnext = x->cnext; + } else { + y->cnext = x; + } return y; } +void updateargs(Cell *a) /* convert by-value to by-reference */ +{ + struct Frame *f; + Cell *o; + int i; + + /* + * The argument, a, was an uninitialized function argument that + * has become an array. It was passed by-value but should have + * been passed by-reference. To compensate for the mistake, we + * must update a's origin, o, and every other descendant of o, + * so that they share a symbol table. + * + * We start at the top of the stack and work our way down. + * If we encounter the origin on the stack, the scan concludes + * because all of its descendants are in higher frames that + * have already been visited. Otherwise, we continue until + * we've inspected every argument. + */ + + #define UPDATE_CELL(d) \ + if (freeable((d))) \ + xfree((d)->sval) \ + (d)->sval = a->sval; \ + (d)->tval = a->tval; + + o = a->cnext; + DPRINTF("updateargs triggered by %p for %p\n", (void *) a, (void *) o); + if (o == NULL) /* nothing to update */ + return; + + UPDATE_CELL(o); /* may not be on the stack */ + for (f = frp; f > frame; --f) { + DPRINTF("frame %td\n", f-frame); + for (i = 0; i < f->nargs; i++) { + if (f->args[i] == o) + return; + if (f->args[i]->cnext == o) { + if (f->args[i] == a) { + DPRINTF("\ttrigger arg %d\n", i); + continue; + } + UPDATE_CELL(f->args[i]); + DPRINTF("\tupdating arg %d\n", i); + } + } + } + #undef UPDATE_CELL +} + Cell *arg(Node **a, int n) /* nth argument of a function */ { @@ -518,12 +610,7 @@ Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ x = execute(a[0]); /* Cell* for symbol table */ buf = makearraystring(a[1], __func__); if (!isarr(x)) { - DPRINTF("making %s into an array\n", NN(x->nval)); - if (freeable(x)) - xfree(x->sval); - x->tval &= ~(STR|NUM|DONTFREE); - x->tval |= ARR; - x->sval = (char *) makesymtab(NSYMTAB); + toarray(x); } z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval); z->ctype = OCELL; @@ -544,10 +631,7 @@ Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts * if (!isarr(x)) return True; if (a[1] == NULL) { /* delete the elements, not the table */ - freesymtab(x); - x->tval &= ~STR; - x->tval |= ARR; - x->sval = (char *) makesymtab(NSYMTAB); + clearsymtab(x); } else { char *buf = makearraystring(a[1], __func__); freeelem(x, buf); @@ -1692,12 +1776,11 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ } sep = *fs; ap = execute(a[1]); /* array name */ -/* BUG 7/26/22: this appears not to reset array: see C1/asplit */ - freesymtab(ap); + if (isarr(ap)) + clearsymtab(ap); + else + toarray(ap); DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs); - ap->tval &= ~STR; - ap->tval |= ARR; - ap->sval = (char *) makesymtab(NSYMTAB); n = 0; if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) { diff --git a/testdir/T.func b/testdir/T.func index deb0b40..d1ad85a 100755 --- a/testdir/T.func +++ b/testdir/T.func @@ -194,3 +194,64 @@ $awk ' function foo() { i = 0 } BEGIN { x = foo(); printf "<%s> %d\n", x, x }' >foo2 diff foo1 foo2 || echo 'BAD: T.func (fall off end)' + +$awk ' +function f(x, y) { + x = "x" + y[0] = "y" + print x, y[0] +} +function g(z) { + print z[0] +} +BEGIN { + f(a, a) + g(a) +}' 2>foo1 >/dev/null +ret=$? +if [ $ret -eq 0 ] || ! grep -q 'can.t read value of .* an array' foo1; then + echo "BAD: T.func (simultaneously scalar and array: $ret)" +fi + +$awk ' +function f(f1, f2, f3) { + f1[0] = "*" + print "f: " f1[0], f2[0], f3[0] +} +function g(g1, g2, g3) { + f(g1, g1, g1) + print "g: " g1[0], g2[0], g3[0] +} +function h(h1, h2, h3) { + g(h1, h2, h3) + print "h: " h1[0], h2[0], h3[0] +} +function i(i1, i2, i3) { + print "i: " i1[0], i2[0], i3[0] +} +BEGIN { + OFS = "-" + f(a, a, a) + g(b, b, b) + h(c, c, c) + i(a, b, c) +}' >foo2 +cat <foo1 +f: *-*-* +f: *-*-* +g: *-*-* +f: *-*-* +g: *-*-* +h: *-*-* +i: *-*-* +! +diff foo1 foo2 || echo 'BAD: T.func (cousin coherency)' + +$awk ' +function f(x) { x[0]="updated"; exit } +function g(y) { print y[0] } +BEGIN { f(a) } +END { g(a) } +' >foo2 +echo 'updated' >foo1 +diff foo1 foo2 || echo 'BAD: T.func (exit skips arg conversion updates)' diff --git a/testdir/T.misc b/testdir/T.misc index 1e5c3c5..949883f 100755 --- a/testdir/T.misc +++ b/testdir/T.misc @@ -133,7 +133,7 @@ function bug2(arg) { bot[arg]=arg; } ' 2>foo -grep "can.t assign to foo" foo >/dev/null || echo 1>&2 "BAD: T.misc foo bug" +grep "can.t assign to .* array" foo >/dev/null || echo 1>&2 "BAD: T.misc foo bug" # This should be a syntax error diff --git a/tran.c b/tran.c index 482eede..7abd31f 100644 --- a/tran.c +++ b/tran.c @@ -153,6 +153,29 @@ void envinit(char **envp) /* set up ENVIRON variable */ } } +void toarray(Cell *ap) /* convert from an uninitialized scalar */ +{ + if (isarr(ap)) + return; + + DPRINTF("making %s into an array\n", NN(ap->nval)); + if (ap->sval[0] != '\0' || ap->fval != 0) + FATAL("can't happen: toarray scalar must be uninitialized"); + + if (freeable(ap)) + xfree(ap->sval); + ap->tval &= ~(STR|NUM|DONTFREE); + ap->tval |= ARR; + ap->sval = (char *) makesymtab(NSYMTAB); + + /* + * If this is a function argument, it was passed by value but + * should've been passed by reference. Correction required. + */ + if (ap->csub == CCOPY) + updateargs(ap); +} + Array *makesymtab(int n) /* make a new symbol table */ { Array *ap; @@ -168,17 +191,18 @@ Array *makesymtab(int n) /* make a new symbol table */ return(ap); } -void freesymtab(Cell *ap) /* free a symbol table */ +void clearsymtab(Cell *ap) { Cell *cp, *temp; Array *tp; int i; if (!isarr(ap)) - return; + FATAL("can't happen: clearsymtab for non-array"); + tp = (Array *) ap->sval; if (tp == NULL) - return; + FATAL("can't happen: array has no symbol table %s", ap->nval); for (i = 0; i < tp->size; i++) { for (cp = tp->tab[i]; cp != NULL; cp = temp) { xfree(cp->nval); @@ -191,9 +215,14 @@ void freesymtab(Cell *ap) /* free a symbol table */ tp->tab[i] = NULL; } if (tp->nelem != 0) - WARNING("can't happen: inconsistent element count freeing %s", ap->nval); - free(tp->tab); - free(tp); + FATAL("can't happen: inconsistent element count freeing %s", ap->nval); +} + +void freesymtab(Cell *ap) /* free a symbol table */ +{ + clearsymtab(ap); + free(((Array *)ap->sval)->tab); + free(ap->sval); } void freeelem(Cell *ap, const char *s) /* free elem s from ap (i.e., ap["s"] */ From b5e23fde3d7543e6e70ff8acb0ad469a8b261974 Mon Sep 17 00:00:00 2001 From: Miguel Pineiro Jr Date: Mon, 2 Oct 2023 12:47:54 -0400 Subject: [PATCH 4/4] Preserve user-defined function argument names Use the new struct Function in much the same way as struct Array (to make an object's internals reachable via Cell->sval). While recording argument names, plug the leak of arglist nodes. In the following program, f's arglist is leaked at the time of g's definition: function f(a,b,c) {} function g() {} The leak happens when varlist assigns to arglist in awkgram.y. Note that though the parsing and defining of a function are now limited only by available memory, invocations are still limited to NARGS (an int). --- awk.h | 6 ++++++ parse.c | 45 +++++++++++++++++++++++++++++++++++++++------ proto.h | 2 +- run.c | 28 ++++++++++------------------ 4 files changed, 56 insertions(+), 25 deletions(-) diff --git a/awk.h b/awk.h index 49b5dfc..afcd7c6 100644 --- a/awk.h +++ b/awk.h @@ -163,6 +163,12 @@ typedef struct Node { struct Node *narg[1]; /* variable: actual size set by calling malloc */ } Node; +typedef struct Function { /* user-defined function */ + size_t nargs; /* number of defined arguments */ + char **argnames; /* the names of those arguments */ + Node *body; /* the function's parse tree */ +} Function; + #define NIL ((Node *) 0) extern Node *winner; diff --git a/parse.c b/parse.c index 0944e89..85392a5 100644 --- a/parse.c +++ b/parse.c @@ -230,8 +230,9 @@ Node *linkum(Node *a, Node *b) void defn(Cell *v, Node *vl, Node *st) /* turn on FCN bit in definition, */ { /* body of function, arglist */ - Node *p; - int n; + Node *p, *temp; + Function *f; + size_t n; if (isarr(v)) { SYNTAX( "`%s' is an array name and a function name", v->nval ); @@ -242,13 +243,45 @@ void defn(Cell *v, Node *vl, Node *st) /* turn on FCN bit in definition, */ return; } - v->tval = FCN; - v->sval = (char *) st; + f = (Function *) malloc(sizeof(Function)); + if (f == NULL) + FATAL("out of memory in defn for Function"); + f->body = st; + n = 0; /* count arguments */ for (p = vl; p; p = p->nnext) n++; - v->fval = n; - DPRINTF("defining func %s (%d args)\n", v->nval, n); + f->nargs = n; + + DPRINTF("defining func %s (%zu args)\n", v->nval, f->nargs); + + if (f->nargs > 0) { + /* overflow can't happen, the arglist is bigger */ + f->argnames = (char **) malloc(n * sizeof(char *)); + if (f->argnames == NULL) + FATAL("out of memory in defn for argnames"); + + /* + * Keep a private copy of each argument's name. + * + * Free the arglist nodes, but not their cells. The + * cells belong to the global symbol table and might + * be needed for a variable of the same name. + */ + n = 0; + for (p = vl; p; p = temp) { + DPRINTF("arg %zu is \"%s\"\n", n, ((Cell *)p->narg[0])->nval); + f->argnames[n++] = tostring(((Cell *)p->narg[0])->nval); + temp = p->nnext; + free(p); + } + } else + f->argnames = NULL; + + if (freeable(v)) + free(v->sval); + v->sval = (char *) f; + v->tval = FCN; } int isarg(const char *s) /* is s in argument list for current function? */ diff --git a/proto.h b/proto.h index 97698e8..7911635 100644 --- a/proto.h +++ b/proto.h @@ -156,7 +156,7 @@ extern void run(Node *); extern Cell *execute(Node *); extern Cell *program(Node **, int); extern Cell *call(Node **, int); -extern Cell *copycell(Cell *); +extern Cell *copycell(Cell *, char *); extern void updateargs(Cell *); extern Cell *arg(Node **, int); extern Cell *jump(Node **, int); diff --git a/run.c b/run.c index efe7bad..6834025 100644 --- a/run.c +++ b/run.c @@ -229,6 +229,7 @@ struct Frame *frp = NULL; /* frame pointer. bottom level unused */ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ { int i, ncall, ndef; + Function *f; Node *x; Cell *args[NARGS]; /* BUG: fixed size arrays */ Cell *y, *z, *fcn; @@ -238,6 +239,7 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ s = fcn->nval; if (!isfcn(fcn)) FATAL("calling undefined function %s", s); + f = (Function *) fcn->sval; if (frame == NULL) { frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame)); if (frame == NULL) @@ -245,7 +247,9 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ } for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */ ncall++; - ndef = (int) fcn->fval; /* args in defn */ + if (f->nargs > INT_MAX) + FATAL("function %s nargs out of range", s); + ndef = f->nargs; /* args in defn */ DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame)); if (ncall > ndef) WARNING("function %s called with %d args, uses only %d", @@ -260,12 +264,12 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ if (isfcn(y)) FATAL("can't use function %s as argument in %s", y->nval, s); if (i < ndef) { - args[i] = copycell(y); + args[i] = copycell(y, f->argnames[i]); } tempfree(y); } for ( ; i < ndef; i++) { /* add null args for ones not provided */ - args[i] = copycell(NULL); + args[i] = copycell(NULL, f->argnames[i]); } frp++; /* now ok to up frame */ if (frp >= frame + nframe) { @@ -281,7 +285,7 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ frp->retval = gettemp(); DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame)); - y = execute((Node *)(fcn->sval)); /* execute body */ + y = execute(f->body); /* execute body */ DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame)); tempfree(fcn); @@ -323,11 +327,12 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ return(z); } -Cell *copycell(Cell *x) /* make a copy of a cell in a temp */ +Cell *copycell(Cell *x, char *name) /* make a copy of a cell in a temp */ { Cell *y; y = gettemp(); + y->nval = name; y->csub = CCOPY; /* default to the uninitialized value */ @@ -336,19 +341,6 @@ Cell *copycell(Cell *x) /* make a copy of a cell in a temp */ /* copy is not constant or field */ y->tval = x->tval & ~(CON|FLD|REC); - - /* - * What should we name this argument? The parser does not preserve - * the names of parameters, so we don't know what the source calls - * it. Is it worth the effort to change that? - * - * Note: Simply pointing to the incoming cell's name (as had been - * done for a long time) creates a dangling pointer if that cell - * is deleted (think array element). - * - * For now, args are anonymous. Debugging can use frame/arg indices. - */ - y->nval = NULL; if (isstr(x)) { y->sval = tostring(x->sval); y->tval &= ~DONTFREE;