From d8e61a829c031d887c203515d0ceb5d54dd7318e Mon Sep 17 00:00:00 2001 From: rswier Date: Wed, 27 Jan 2016 02:11:00 -0500 Subject: [PATCH 1/3] AST + Code Generator Extends c4 by adding Abstract Syntax Tree creation and back-end code generation --- README.md | 24 ++- c5.c | 581 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ c5x86.c | 578 +++++++++++++++++++++++++++++++++++++++++++++++++++++ w32.h | 32 +++ 4 files changed, 1206 insertions(+), 9 deletions(-) create mode 100644 c5.c create mode 100644 c5x86.c create mode 100644 w32.h diff --git a/README.md b/README.md index ba670b7..dba667b 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,20 @@ -c4 - C in four functions -======================== - +c5 - C in four functions + AST + back-end code generator +======================================================== An exercise in minimalism. +This branch extends **c4.c** by adding: + + * Abstract Syntax Tree creation + * Back-end code generator function: **gen()** + * Standard ordering of function parameters on stack + * Native x86 version: **c5x86.c** + * Various optimizations + Try the following: - gcc -o c4 c4.c (you may need the -m32 option on 64bit machines) - ./c4 hello.c - ./c4 -s hello.c + gcc -o c5 c5.c (you may need the -m32 option on 64bit machines) + ./c5 hello.c + ./c5 -s hello.c - ./c4 c4.c hello.c - ./c4 c4.c c4.c hello.c - + ./c5 c5.c hello.c + ./c5 c5.c c5.c hello.c diff --git a/c5.c b/c5.c new file mode 100644 index 0000000..7144fae --- /dev/null +++ b/c5.c @@ -0,0 +1,581 @@ +// c5.c - C in five functions + +// c4.c plus +// abstract syntax tree creation +// back-end code generator +// parameters passed in correct order +// various optimizations + +// Written by Robert Swierczek + +#include +#include +#include +#include +#ifdef _WIN32 +#include "w32.h" +#endif + +char *p, *lp, // current position in source code + *data; // data/bss pointer + +int *e, *le, // current position in emitted code + *id, // currently parsed identifier + *n, // current node in abstract syntax tree + *sym, // symbol table (simple list of identifiers) + tk, // current token + ival, // current token value + ty, // current expression type + line, // current line number + src, // print source and assembly flag + debug; // print executed instructions + +// tokens and classes (operators last and in precedence order) +enum { + Num = 128, Fun, Sys, Glo, Loc, Id, Load, Enter, + Char, Else, Enum, If, Int, Return, Sizeof, While, + Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Brak +}; + +// opcodes +enum { LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH , + OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD , + OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,MCPY,MMAP,DSYM,QSRT,EXIT }; + +// types +enum { CHAR, INT, PTR }; + +// identifier offsets (since we can't create an ident struct) +enum { Tk, Hash, Name, Class, Type, Val, HClass, HType, HVal, Idsz }; + +void next() +{ + char *pp; + + while (tk = *p) { + ++p; + if (tk == '\n') { + if (src) { + printf("%d: %.*s", line, p - lp, lp); + lp = p; + while (le < e) { + printf("%8.4s", &"LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ," + "OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ," + "OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,MCPY,MMAP,DSYM,QSRT,EXIT,"[*++le * 5]); + if (*le <= ADJ) printf(" %d\n", *++le); else printf("\n"); + } + } + ++line; + } + else if (tk == '#') { + while (*p != 0 && *p != '\n') ++p; + } + else if ((tk >= 'a' && tk <= 'z') || (tk >= 'A' && tk <= 'Z') || tk == '_') { + pp = p - 1; + while ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || (*p >= '0' && *p <= '9') || *p == '_') + tk = tk * 147 + *p++; + tk = (tk << 6) + (p - pp); + id = sym; + while (id[Tk]) { + if (tk == id[Hash] && !memcmp((char *)id[Name], pp, p - pp)) { tk = id[Tk]; return; } + id = id + Idsz; + } + id[Name] = (int)pp; + id[Hash] = tk; + tk = id[Tk] = Id; + return; + } + else if (tk >= '0' && tk <= '9') { + if (ival = tk - '0') { while (*p >= '0' && *p <= '9') ival = ival * 10 + *p++ - '0'; } + else if (*p == 'x' || *p == 'X') { + while ((tk = *++p) && ((tk >= '0' && tk <= '9') || (tk >= 'a' && tk <= 'f') || (tk >= 'A' && tk <= 'F'))) + ival = ival * 16 + (tk & 15) + (tk >= 'A' ? 9 : 0); + } + else { while (*p >= '0' && *p <= '7') ival = ival * 8 + *p++ - '0'; } + tk = Num; + return; + } + else if (tk == '/') { + if (*p == '/') { + ++p; + while (*p != 0 && *p != '\n') ++p; + } + else { + tk = Div; + return; + } + } + else if (tk == '\'' || tk == '"') { + pp = data; + while (*p != 0 && *p != tk) { + if ((ival = *p++) == '\\') { + if ((ival = *p++) == 'n') ival = '\n'; + } + if (tk == '"') *data++ = ival; + } + ++p; + if (tk == '"') ival = (int)pp; else tk = Num; + return; + } + else if (tk == '=') { if (*p == '=') { ++p; tk = Eq; } else tk = Assign; return; } + else if (tk == '+') { if (*p == '+') { ++p; tk = Inc; } else tk = Add; return; } + else if (tk == '-') { if (*p == '-') { ++p; tk = Dec; } else tk = Sub; return; } + else if (tk == '!') { if (*p == '=') { ++p; tk = Ne; } return; } + else if (tk == '<') { if (*p == '=') { ++p; tk = Le; } else if (*p == '<') { ++p; tk = Shl; } else tk = Lt; return; } + else if (tk == '>') { if (*p == '=') { ++p; tk = Ge; } else if (*p == '>') { ++p; tk = Shr; } else tk = Gt; return; } + else if (tk == '|') { if (*p == '|') { ++p; tk = Lor; } else tk = Or; return; } + else if (tk == '&') { if (*p == '&') { ++p; tk = Lan; } else tk = And; return; } + else if (tk == '^') { tk = Xor; return; } + else if (tk == '%') { tk = Mod; return; } + else if (tk == '*') { tk = Mul; return; } + else if (tk == '[') { tk = Brak; return; } + else if (tk == '?') { tk = Cond; return; } + else if (tk == '~' || tk == ';' || tk == '{' || tk == '}' || tk == '(' || tk == ')' || tk == ']' || tk == ',' || tk == ':') return; + } +} + +void expr(int lev) +{ + int t, *d, *b; + + if (!tk) { printf("%d: unexpected eof in expression\n", line); exit(-1); } + else if (tk == Num) { *--n = ival; *--n = Num; next(); ty = INT; } + else if (tk == '"') { + *--n = ival; *--n = Num; next(); + while (tk == '"') next(); + data = (char *)((int)data + sizeof(int) & -sizeof(int)); ty = PTR; + } + else if (tk == Sizeof) { + next(); if (tk == '(') next(); else { printf("%d: open paren expected in sizeof\n", line); exit(-1); } + ty = INT; if (tk == Int) next(); else if (tk == Char) { next(); ty = CHAR; } + while (tk == Mul) { next(); ty = ty + PTR; } + if (tk == ')') next(); else { printf("%d: close paren expected in sizeof\n", line); exit(-1); } + *--n = (ty == CHAR) ? sizeof(char) : sizeof(int); *--n = Num; + ty = INT; + } + else if (tk == Id) { + d = id; next(); + if (tk == '(') { + if (d[Class] != Sys && d[Class] != Fun) { printf("%d: bad function call\n", line); exit(-1); } + next(); + t = 0; b = 0; + while (tk != ')') { expr(Assign); *--n = (int)b; b = n; ++t; if (tk == ',') next(); } + next(); + *--n = t; *--n = d[Val]; *--n = (int)b; *--n = d[Class]; + ty = d[Type]; + } + else if (d[Class] == Num) { *--n = d[Val]; *--n = Num; ty = INT; } + else { + if (d[Class] == Loc) { *--n = d[Val]; *--n = Loc; } + else if (d[Class] == Glo) { *--n = d[Val]; *--n = Num; } + else { printf("%d: undefined variable\n", line); exit(-1); } + *--n = ty = d[Type]; *--n = Load; + } + } + else if (tk == '(') { + next(); + if (tk == Int || tk == Char) { + t = (tk == Int) ? INT : CHAR; next(); + while (tk == Mul) { next(); t = t + PTR; } + if (tk == ')') next(); else { printf("%d: bad cast\n", line); exit(-1); } + expr(Inc); + ty = t; + } + else { + expr(Assign); + if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } + } + } + else if (tk == Mul) { + next(); expr(Inc); + if (ty > INT) ty = ty - PTR; else { printf("%d: bad dereference\n", line); exit(-1); } + *--n = ty; *--n = Load; + } + else if (tk == And) { + next(); expr(Inc); + if (*n == Load) n = n+2; else { printf("%d: bad address-of\n", line); exit(-1); } + ty = ty + PTR; + } + else if (tk == '!') { + next(); expr(Inc); + if (*n == Num) n[1] = !n[1]; else { *--n = 0; *--n = Num; --n; *n = (int)(n+3); *--n = Eq; } + ty = INT; + } + else if (tk == '~') { + next(); expr(Inc); + if (*n == Num) n[1] = ~n[1]; else { *--n = -1; *--n = Num; --n; *n = (int)(n+3); *--n = Xor; } + ty = INT; + } + else if (tk == Add) { next(); expr(Inc); ty = INT; } + else if (tk == Sub) { + next(); expr(Inc); + if (*n == Num) n[1] = -n[1]; else { *--n = -1; *--n = Num; --n; *n = (int)(n+3); *--n = Mul; } + ty = INT; + } + else if (tk == Inc || tk == Dec) { + t = tk; next(); expr(Inc); + if (*n == Load) *n = t; else { printf("%d: bad lvalue in pre-increment\n", line); exit(-1); } + } + else { printf("%d: bad expression\n", line); exit(-1); } + + while (tk >= lev) { // "precedence climbing" or "Top Down Operator Precedence" method + t = ty; b = n; + if (tk == Assign) { + next(); + if (*n != Load) { printf("%d: bad lvalue in assignment\n", line); exit(-1); } + expr(Assign); *--n = (int)(b+2); *--n = ty = t; *--n = Assign; + } + else if (tk == Cond) { + next(); + expr(Assign); + if (tk == ':') next(); else { printf("%d: conditional missing colon\n", line); exit(-1); } + d = n; + expr(Cond); + --n; *n = (int)(n+1); *--n = (int)d; *--n = (int)b; *--n = Cond; + } + else if (tk == Lor) { next(); expr(Lan); if (*n==Num && *b==Num) n[1] = b[1] || n[1]; else { *--n = (int)b; *--n = Lor; } ty = INT; } + else if (tk == Lan) { next(); expr(Or); if (*n==Num && *b==Num) n[1] = b[1] && n[1]; else { *--n = (int)b; *--n = Lan; } ty = INT; } + else if (tk == Or) { next(); expr(Xor); if (*n==Num && *b==Num) n[1] = b[1] | n[1]; else { *--n = (int)b; *--n = Or; } ty = INT; } + else if (tk == Xor) { next(); expr(And); if (*n==Num && *b==Num) n[1] = b[1] ^ n[1]; else { *--n = (int)b; *--n = Xor; } ty = INT; } + else if (tk == And) { next(); expr(Eq); if (*n==Num && *b==Num) n[1] = b[1] & n[1]; else { *--n = (int)b; *--n = And; } ty = INT; } + else if (tk == Eq) { next(); expr(Lt); if (*n==Num && *b==Num) n[1] = b[1] == n[1]; else { *--n = (int)b; *--n = Eq; } ty = INT; } + else if (tk == Ne) { next(); expr(Lt); if (*n==Num && *b==Num) n[1] = b[1] != n[1]; else { *--n = (int)b; *--n = Ne; } ty = INT; } + else if (tk == Lt) { next(); expr(Shl); if (*n==Num && *b==Num) n[1] = b[1] < n[1]; else { *--n = (int)b; *--n = Lt; } ty = INT; } + else if (tk == Gt) { next(); expr(Shl); if (*n==Num && *b==Num) n[1] = b[1] > n[1]; else { *--n = (int)b; *--n = Gt; } ty = INT; } + else if (tk == Le) { next(); expr(Shl); if (*n==Num && *b==Num) n[1] = b[1] <= n[1]; else { *--n = (int)b; *--n = Le; } ty = INT; } + else if (tk == Ge) { next(); expr(Shl); if (*n==Num && *b==Num) n[1] = b[1] >= n[1]; else { *--n = (int)b; *--n = Ge; } ty = INT; } + else if (tk == Shl) { next(); expr(Add); if (*n==Num && *b==Num) n[1] = b[1] << n[1]; else { *--n = (int)b; *--n = Shl; } ty = INT; } + else if (tk == Shr) { next(); expr(Add); if (*n==Num && *b==Num) n[1] = b[1] >> n[1]; else { *--n = (int)b; *--n = Shr; } ty = INT; } + else if (tk == Add) { + next(); expr(Mul); + if ((ty = t) > PTR) { if (*n == Num) n[1] = n[1] * sizeof(int); else { *--n = sizeof(int); *--n = Num; --n; *n = (int)(n+3); *--n = Mul; } } + if (*n == Num && *b == Num) n[1] = b[1] + n[1]; else { *--n = (int)b; *--n = Add; } + } + else if (tk == Sub) { + next(); expr(Mul); + if ((ty = t) > PTR) { if (*n == Num) n[1] = n[1] * sizeof(int); else { *--n = sizeof(int); *--n = Num; --n; *n = (int)(n+3); *--n = Mul; } } + if (*n == Num && *b == Num) n[1] = b[1] - n[1]; else { *--n = (int)b; *--n = Sub; } + } + else if (tk == Mul) { next(); expr(Inc); if (*n==Num && *b==Num) n[1] = b[1] * n[1]; else { *--n = (int)b; *--n = Mul; } ty = INT; } + else if (tk == Div) { next(); expr(Inc); if (*n==Num && *b==Num) n[1] = b[1] / n[1]; else { *--n = (int)b; *--n = Div; } ty = INT; } + else if (tk == Mod) { next(); expr(Inc); if (*n==Num && *b==Num) n[1] = b[1] % n[1]; else { *--n = (int)b; *--n = Mod; } ty = INT; } + else if (tk == Inc || tk == Dec) { + if (*n == Load) *n = tk; else { printf("%d: bad lvalue in post-increment\n", line); exit(-1); } + *--n = (ty > PTR) ? sizeof(int) : sizeof(char); *--n = Num; + *--n = (int)b; *--n = (tk == Inc) ? Sub : Add; + next(); + } + else if (tk == Brak) { + next(); expr(Assign); + if (tk == ']') next(); else { printf("%d: close bracket expected\n", line); exit(-1); } + if (t > PTR) { if (*n == Num) n[1] = n[1] * sizeof(int); else { *--n = sizeof(int); *--n = Num; --n; *n = (int)(n+3); *--n = Mul; } } + else if (t < PTR) { printf("%d: pointer type expected\n", line); exit(-1); } + if (*n == Num && *b == Num) n[1] = b[1] + n[1]; else { *--n = (int)b; *--n = Add; } + *--n = ty = t - PTR; *--n = Load; + } + else { printf("%d: compiler error tk=%d\n", line, tk); exit(-1); } + } +} + +void stmt() +{ + int *a, *b, *c; + + if (tk == If) { + next(); + if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); } + expr(Assign); a = n; + if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } + stmt(); b = n; + if (tk == Else) { next(); stmt(); c = n; } else c = 0; + *--n = (int)c; *--n = (int)b; *--n = (int)a; *--n = Cond; + } + else if (tk == While) { + next(); + if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); } + expr(Assign); a = n; + if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } + stmt(); + *--n = (int)a; *--n = While; + } + else if (tk == Return) { + next(); + if (tk != ';') { expr(Assign); a = n; } else a = 0; + if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); } + *--n = (int)a; *--n = Return; + } + else if (tk == '{') { + next(); + *--n = ';'; + while (tk != '}') { a = n; stmt(); *--n = (int)a; *--n = '{'; } + next(); + } + else if (tk == ';') { + next(); *--n = ';'; + } + else { + expr(Assign); + if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); } + } +} + +void gen(int *n) +{ + int i, *a, *b; + + i = *n; + if (i == Num) { *++e = IMM; *++e = n[1]; } + else if (i == Loc) { *++e = LEA; *++e = n[1]; } + else if (i == Load) { gen(n+2); *++e = (n[1] == CHAR) ? LC : LI; } + else if (i == Assign) { gen((int *)n[2]); *++e = PSH; gen(n+3); *++e = (n[1] == CHAR) ? SC : SI; } + else if (i == Inc || i == Dec) { + gen(n+2); + *++e = PSH; *++e = (n[1] == CHAR) ? LC : LI; *++e = PSH; + *++e = IMM; *++e = (n[1] > PTR) ? sizeof(int) : sizeof(char); + *++e = (i == Inc) ? ADD : SUB; + *++e = (n[1] == CHAR) ? SC : SI; + } + else if (i == Cond) { + gen((int *)n[1]); + *++e = BZ; b = ++e; + gen((int *)n[2]); + if (n[3]) { *b = (int)(e + 3); *++e = JMP; b = ++e; gen((int *)n[3]); } + *b = (int)(e + 1); + } + else if (i == Lor) { gen((int *)n[1]); *++e = BNZ; b = ++e; gen(n+2); *b = (int)(e + 1); } + else if (i == Lan) { gen((int *)n[1]); *++e = BZ; b = ++e; gen(n+2); *b = (int)(e + 1); } + else if (i == Or) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = OR; } + else if (i == Xor) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = XOR; } + else if (i == And) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = AND; } + else if (i == Eq) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = EQ; } + else if (i == Ne) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = NE; } + else if (i == Lt) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = LT; } + else if (i == Gt) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = GT; } + else if (i == Le) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = LE; } + else if (i == Ge) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = GE; } + else if (i == Shl) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = SHL; } + else if (i == Shr) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = SHR; } + else if (i == Add) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = ADD; } + else if (i == Sub) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = SUB; } + else if (i == Mul) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = MUL; } + else if (i == Div) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = DIV; } + else if (i == Mod) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = MOD; } + else if (i == Sys || i == Fun) { + b = (int *)n[1]; + while (b) { gen(b+1); *++e = PSH; b = (int *)*b; } + if (i == Fun) *++e = JSR; *++e = n[2]; + if (n[3]) { *++e = ADJ; *++e = n[3]; } + } + else if (i == While) { + *++e = JMP; b = ++e; gen(n+2); *b = (int)(e + 1); + gen((int *)n[1]); + *++e = BNZ; *++e = (int)(b + 1); + } + else if (i == Return) { if (n[1]) gen((int *)n[1]); *++e = LEV; } + else if (i == '{') { gen((int *)n[1]); gen(n+2); } + else if (i == Enter) { *++e = ENT; *++e = n[1]; gen(n+2); *++e = LEV; } + else if (i != ';') { printf("%d: compiler error gen=%d\n", line, i); exit(-1); } +} + +int main(int argc, char **argv) +{ + int fd, bt, ty, poolsz, *idmain, *ast; + int *pc, *sp, *bp, a, cycle; // vm registers + int i, *t; // temps + + --argc; ++argv; + if (argc > 0 && **argv == '-' && (*argv)[1] == 's') { src = 1; --argc; ++argv; } + if (argc > 0 && **argv == '-' && (*argv)[1] == 'd') { debug = 1; --argc; ++argv; } + if (argc < 1) { printf("usage: c5 [-s] [-d] file ...\n"); return -1; } + + if ((fd = open(*argv, 0)) < 0) { printf("could not open(%s)\n", *argv); return -1; } + + poolsz = 256*1024; // arbitrary size + if (!(sym = malloc(poolsz))) { printf("could not malloc(%d) symbol area\n", poolsz); return -1; } + if (!(le = e = malloc(poolsz))) { printf("could not malloc(%d) text area\n", poolsz); return -1; } + if (!(data = malloc(poolsz))) { printf("could not malloc(%d) data area\n", poolsz); return -1; } + if (!(sp = malloc(poolsz))) { printf("could not malloc(%d) stack area\n", poolsz); return -1; } + if (!(ast = malloc(poolsz))) { printf("could not malloc(%d) abstract syntax tree area\n", poolsz); return -1; } + ast = (int *)((int)ast + poolsz); // abstract syntax tree is most efficiently built as a stack + + memset(sym, 0, poolsz); + memset(e, 0, poolsz); + memset(data, 0, poolsz); + + p = "char else enum if int return sizeof while " + "open read close printf malloc memset memcmp memcpy mmap dlsym qsort exit void main"; + i = Char; while (i <= While) { next(); id[Tk] = i++; } // add keywords to symbol table + i = OPEN; while (i <= EXIT) { next(); id[Class] = Sys; id[Type] = INT; id[Val] = i++; } // add library to symbol table + next(); id[Tk] = Char; // handle void type + next(); idmain = id; // keep track of main + + if (!(lp = p = malloc(poolsz))) { printf("could not malloc(%d) source area\n", poolsz); return -1; } + if ((i = read(fd, p, poolsz-1)) <= 0) { printf("read() returned %d\n", i); return -1; } + p[i] = 0; + close(fd); + + // parse declarations + line = 1; + next(); + while (tk) { + bt = INT; // basetype + if (tk == Int) next(); + else if (tk == Char) { next(); bt = CHAR; } + else if (tk == Enum) { + next(); + if (tk != '{') next(); + if (tk == '{') { + next(); + i = 0; + while (tk != '}') { + if (tk != Id) { printf("%d: bad enum identifier %d\n", line, tk); return -1; } + next(); + if (tk == Assign) { + next(); + n = ast; expr(Cond); + if (*n != Num) { printf("%d: bad enum initializer\n", line); return -1; } + i = n[1]; + } + id[Class] = Num; id[Type] = INT; id[Val] = i++; + if (tk == ',') next(); + } + next(); + } + } + while (tk != ';' && tk != '}') { + ty = bt; + while (tk == Mul) { next(); ty = ty + PTR; } + if (tk != Id) { printf("%d: bad global declaration\n", line); return -1; } + if (id[Class]) { printf("%d: duplicate global definition\n", line); return -1; } + next(); + id[Type] = ty; + if (tk == '(') { // function + id[Class] = Fun; + id[Val] = (int)(e + 1); + next(); i = 2; + while (tk != ')') { + ty = INT; + if (tk == Int) next(); + else if (tk == Char) { next(); ty = CHAR; } + while (tk == Mul) { next(); ty = ty + PTR; } + if (tk != Id) { printf("%d: bad parameter declaration\n", line); return -1; } + if (id[Class] == Loc) { printf("%d: duplicate parameter definition\n", line); return -1; } + id[HClass] = id[Class]; id[Class] = Loc; + id[HType] = id[Type]; id[Type] = ty; + id[HVal] = id[Val]; id[Val] = i++; + next(); + if (tk == ',') next(); + } + next(); + if (tk != '{') { printf("%d: bad function definition\n", line); return -1; } + i = 0; + next(); + while (tk == Int || tk == Char) { + bt = (tk == Int) ? INT : CHAR; + next(); + while (tk != ';') { + ty = bt; + while (tk == Mul) { next(); ty = ty + PTR; } + if (tk != Id) { printf("%d: bad local declaration\n", line); return -1; } + if (id[Class] == Loc) { printf("%d: duplicate local definition\n", line); return -1; } + id[HClass] = id[Class]; id[Class] = Loc; + id[HType] = id[Type]; id[Type] = ty; + id[HVal] = id[Val]; id[Val] = --i; + next(); + if (tk == ',') next(); + } + next(); + } + n = ast; + *--n = ';'; while (tk != '}') { t = n; stmt(); *--n = (int)t; *--n = '{'; } + *--n = -i; *--n = Enter; + gen(n); + id = sym; // unwind symbol table locals + while (id[Tk]) { + if (id[Class] == Loc) { + id[Class] = id[HClass]; + id[Type] = id[HType]; + id[Val] = id[HVal]; + } + id = id + Idsz; + } + } + else { + id[Class] = Glo; + id[Val] = (int)data; + data = data + sizeof(int); + } + if (tk == ',') next(); + } + next(); + } + + if (!(pc = (int *)idmain[Val])) { printf("main() not defined\n"); return -1; } + if (src) return 0; + + // setup stack + sp = (int *)((int)sp + poolsz); + *--sp = EXIT; // call exit if main returns + *--sp = PSH; t = sp; + *--sp = (int)argv; + *--sp = argc; + *--sp = (int)t; + + // run... + cycle = 0; + while (1) { + i = *pc++; ++cycle; + if (debug) { + printf("%d> %.4s", cycle, + &"LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ," + "OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ," + "OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,MCPY,MMAP,DSYM,QSRT,EXIT,"[i * 5]); + if (i <= ADJ) printf(" %d\n", *pc); else printf("\n"); + } + if (i == LEA) a = (int)(bp + *pc++); // load local address + else if (i == IMM) a = *pc++; // load global address or immediate + else if (i == JMP) pc = (int *)*pc; // jump + else if (i == JSR) { *--sp = (int)(pc + 1); pc = (int *)*pc; } // jump to subroutine + else if (i == BZ) pc = a ? pc + 1 : (int *)*pc; // branch if zero + else if (i == BNZ) pc = a ? (int *)*pc : pc + 1; // branch if not zero + else if (i == ENT) { *--sp = (int)bp; bp = sp; sp = sp - *pc++; } // enter subroutine + else if (i == ADJ) sp = sp + *pc++; // stack adjust + else if (i == LEV) { sp = bp; bp = (int *)*sp++; pc = (int *)*sp++; } // leave subroutine + else if (i == LI) a = *(int *)a; // load int + else if (i == LC) a = *(char *)a; // load char + else if (i == SI) *(int *)*sp++ = a; // store int + else if (i == SC) a = *(char *)*sp++ = a; // store char + else if (i == PSH) *--sp = a; // push + + else if (i == OR) a = *sp++ | a; + else if (i == XOR) a = *sp++ ^ a; + else if (i == AND) a = *sp++ & a; + else if (i == EQ) a = *sp++ == a; + else if (i == NE) a = *sp++ != a; + else if (i == LT) a = *sp++ < a; + else if (i == GT) a = *sp++ > a; + else if (i == LE) a = *sp++ <= a; + else if (i == GE) a = *sp++ >= a; + else if (i == SHL) a = *sp++ << a; + else if (i == SHR) a = *sp++ >> a; + else if (i == ADD) a = *sp++ + a; + else if (i == SUB) a = *sp++ - a; + else if (i == MUL) a = *sp++ * a; + else if (i == DIV) a = *sp++ / a; + else if (i == MOD) a = *sp++ % a; + + else if (i == OPEN) a = open((char *)*sp, sp[1]); + else if (i == READ) a = read(*sp, (char *)sp[1], sp[2]); + else if (i == CLOS) a = close(*sp); + else if (i == PRTF) a = printf((char *)*sp, sp[1], sp[2], sp[3], sp[4], sp[5]); + else if (i == MALC) a = (int)malloc(*sp); + else if (i == MSET) a = (int)memset((char *)*sp, sp[1], sp[2]); + else if (i == MCMP) a = memcmp((char *)*sp, (char *)sp[1], sp[2]); + else if (i == MCPY) a = (int)memcpy((char *)*sp, (char *)sp[1], sp[2]); + else if (i == MMAP) a = (int)mmap((char *)*sp, sp[1], sp[2], sp[3], sp[4], sp[5]); + else if (i == DSYM) a = (int)dlsym((char *)*sp, (char *)sp[1]); + else if (i == QSRT) qsort((char *)sp, sp[1], sp[2], (void *)sp[3]); + else if (i == EXIT) { printf("exit(%d) cycle = %d\n", *sp, cycle); return *sp; } + else { printf("unknown instruction = %d! cycle = %d\n", i, cycle); return -1; } + } +} diff --git a/c5x86.c b/c5x86.c new file mode 100644 index 0000000..d238f22 --- /dev/null +++ b/c5x86.c @@ -0,0 +1,578 @@ +// c5x86.c - C in five functions (native x86 version) + +// c4.c plus +// abstract syntax tree creation +// back-end code generator +// parameters passed in correct order +// various optimizations + +// Written by Robert Swierczek + +#include +#include +#include +#include +#ifdef _WIN32 +#include "w32.h" +#else +#include +#endif + +char *p, *lp, // current position in source code + *e, // current position in emitted code + *data, // data/bss pointer + *dsym; // external function lookup name + +int *id, // currently parsed identifier + *n, // current node in abstract syntax tree + *sym, // symbol table (simple list of identifiers) + tk, // current token + ival, // current token value + ty, // current expression type + line, // current line number + src; // print source and assembly flag + +// tokens and classes (operators last and in precedence order) +enum { + Num = 128, Fun, Glo, Loc, Id, Load, Enter, + Char, Else, Enum, If, Int, Return, Sizeof, While, + Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Brak +}; + +// types +enum { CHAR, INT, PTR }; + +// identifier offsets (since we can't create an ident struct) +enum { Tk, Hash, Name, Class, Type, Val, HClass, HType, HVal, Idsz }; + +void next() +{ + char *pp; + + while (tk = *p) { + ++p; + if (tk == '\n') { + if (src) { + printf("%d: %.*s", line, p - lp, lp); + lp = p; + } + ++line; + } + else if (tk == '#') { + while (*p != 0 && *p != '\n') ++p; + } + else if ((tk >= 'a' && tk <= 'z') || (tk >= 'A' && tk <= 'Z') || tk == '_') { + pp = p - 1; + while ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || (*p >= '0' && *p <= '9') || *p == '_') + tk = tk * 147 + *p++; + tk = (tk << 6) + (p - pp); + id = sym; + while (id[Tk]) { + if (tk == id[Hash] && !memcmp((char *)id[Name], pp, p - pp)) { tk = id[Tk]; return; } + id = id + Idsz; + } + id[Name] = (int)pp; + id[Hash] = tk; + tk = id[Tk] = Id; + return; + } + else if (tk >= '0' && tk <= '9') { + if (ival = tk - '0') { while (*p >= '0' && *p <= '9') ival = ival * 10 + *p++ - '0'; } + else if (*p == 'x' || *p == 'X') { + while ((tk = *++p) && ((tk >= '0' && tk <= '9') || (tk >= 'a' && tk <= 'f') || (tk >= 'A' && tk <= 'F'))) + ival = ival * 16 + (tk & 15) + (tk >= 'A' ? 9 : 0); + } + else { while (*p >= '0' && *p <= '7') ival = ival * 8 + *p++ - '0'; } + tk = Num; + return; + } + else if (tk == '/') { + if (*p == '/') { + ++p; + while (*p != 0 && *p != '\n') ++p; + } + else { + tk = Div; + return; + } + } + else if (tk == '\'' || tk == '"') { + pp = data; + while (*p != 0 && *p != tk) { + if ((ival = *p++) == '\\') { + if ((ival = *p++) == 'n') ival = '\n'; + } + if (tk == '"') *data++ = ival; + } + ++p; + if (tk == '"') ival = (int)pp; else tk = Num; + return; + } + else if (tk == '=') { if (*p == '=') { ++p; tk = Eq; } else tk = Assign; return; } + else if (tk == '+') { if (*p == '+') { ++p; tk = Inc; } else tk = Add; return; } + else if (tk == '-') { if (*p == '-') { ++p; tk = Dec; } else tk = Sub; return; } + else if (tk == '!') { if (*p == '=') { ++p; tk = Ne; } return; } + else if (tk == '<') { if (*p == '=') { ++p; tk = Le; } else if (*p == '<') { ++p; tk = Shl; } else tk = Lt; return; } + else if (tk == '>') { if (*p == '=') { ++p; tk = Ge; } else if (*p == '>') { ++p; tk = Shr; } else tk = Gt; return; } + else if (tk == '|') { if (*p == '|') { ++p; tk = Lor; } else tk = Or; return; } + else if (tk == '&') { if (*p == '&') { ++p; tk = Lan; } else tk = And; return; } + else if (tk == '^') { tk = Xor; return; } + else if (tk == '%') { tk = Mod; return; } + else if (tk == '*') { tk = Mul; return; } + else if (tk == '[') { tk = Brak; return; } + else if (tk == '?') { tk = Cond; return; } + else if (tk == '~' || tk == ';' || tk == '{' || tk == '}' || tk == '(' || tk == ')' || tk == ']' || tk == ',' || tk == ':') return; + } +} + +void expr(int lev) +{ + int t, *d, *b; + + if (!tk) { printf("%d: unexpected eof in expression\n", line); exit(-1); } + else if (tk == Num) { *--n = ival; *--n = Num; next(); ty = INT; } + else if (tk == '"') { + *--n = ival; *--n = Num; next(); + while (tk == '"') next(); + data = (char *)((int)data + sizeof(int) & -sizeof(int)); ty = PTR; + } + else if (tk == Sizeof) { + next(); if (tk == '(') next(); else { printf("%d: open paren expected in sizeof\n", line); exit(-1); } + ty = INT; if (tk == Int) next(); else if (tk == Char) { next(); ty = CHAR; } + while (tk == Mul) { next(); ty = ty + PTR; } + if (tk == ')') next(); else { printf("%d: close paren expected in sizeof\n", line); exit(-1); } + *--n = (ty == CHAR) ? sizeof(char) : sizeof(int); *--n = Num; + ty = INT; + } + else if (tk == Id) { + d = id; next(); + if (tk == '(') { + if (!d[Class]) { + memcpy(dsym, (char *)d[Name], d[Hash] & 63); dsym[d[Hash] & 63] = 0; + if (d[Val] = (int)dlsym(0, dsym)) d[Class] = Fun; + } + if (d[Class] != Fun) { printf("%d: bad function call\n", line); exit(-1); } + next(); + t = 0; b = 0; + while (tk != ')') { expr(Assign); *--n = (int)b; b = n; ++t; if (tk == ',') next(); } + next(); + *--n = t; *--n = d[Val]; *--n = (int)b; *--n = d[Class]; + ty = d[Type]; + } + else if (d[Class] == Num) { *--n = d[Val]; *--n = Num; ty = INT; } + else { + if (d[Class] == Loc) { *--n = d[Val]; *--n = Loc; } + else if (d[Class] == Glo) { *--n = d[Val]; *--n = Num; } + else { printf("%d: undefined variable\n", line); exit(-1); } + *--n = ty = d[Type]; *--n = Load; + } + } + else if (tk == '(') { + next(); + if (tk == Int || tk == Char) { + t = (tk == Int) ? INT : CHAR; next(); + while (tk == Mul) { next(); t = t + PTR; } + if (tk == ')') next(); else { printf("%d: bad cast\n", line); exit(-1); } + expr(Inc); + ty = t; + } + else { + expr(Assign); + if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } + } + } + else if (tk == Mul) { + next(); expr(Inc); + if (ty > INT) ty = ty - PTR; else { printf("%d: bad dereference\n", line); exit(-1); } + *--n = ty; *--n = Load; + } + else if (tk == And) { + next(); expr(Inc); + if (*n == Load) n = n+2; else { printf("%d: bad address-of\n", line); exit(-1); } + ty = ty + PTR; + } + else if (tk == '!') { + next(); expr(Inc); + if (*n == Num) n[1] = !n[1]; else { *--n = 0; *--n = Num; --n; *n = (int)(n+3); *--n = Eq; } + ty = INT; + } + else if (tk == '~') { + next(); expr(Inc); + if (*n == Num) n[1] = ~n[1]; else { *--n = -1; *--n = Num; --n; *n = (int)(n+3); *--n = Xor; } + ty = INT; + } + else if (tk == Add) { next(); expr(Inc); ty = INT; } + else if (tk == Sub) { + next(); expr(Inc); + if (*n == Num) n[1] = -n[1]; else { *--n = -1; *--n = Num; --n; *n = (int)(n+3); *--n = Mul; } + ty = INT; + } + else if (tk == Inc || tk == Dec) { + t = tk; next(); expr(Inc); + if (*n == Load) *n = t; else { printf("%d: bad lvalue in pre-increment\n", line); exit(-1); } + } + else { printf("%d: bad expression\n", line); exit(-1); } + + while (tk >= lev) { // "precedence climbing" or "Top Down Operator Precedence" method + t = ty; b = n; + if (tk == Assign) { + next(); + if (*n != Load) { printf("%d: bad lvalue in assignment\n", line); exit(-1); } + expr(Assign); *--n = (int)(b+2); *--n = ty = t; *--n = Assign; + } + else if (tk == Cond) { + next(); + expr(Assign); + if (tk == ':') next(); else { printf("%d: conditional missing colon\n", line); exit(-1); } + d = n; + expr(Cond); + --n; *n = (int)(n+1); *--n = (int)d; *--n = (int)b; *--n = Cond; + } + else if (tk == Lor) { next(); expr(Lan); if (*n==Num && *b==Num) n[1] = b[1] || n[1]; else { *--n = (int)b; *--n = Lor; } ty = INT; } + else if (tk == Lan) { next(); expr(Or); if (*n==Num && *b==Num) n[1] = b[1] && n[1]; else { *--n = (int)b; *--n = Lan; } ty = INT; } + else if (tk == Or) { next(); expr(Xor); if (*n==Num && *b==Num) n[1] = b[1] | n[1]; else { *--n = (int)b; *--n = Or; } ty = INT; } + else if (tk == Xor) { next(); expr(And); if (*n==Num && *b==Num) n[1] = b[1] ^ n[1]; else { *--n = (int)b; *--n = Xor; } ty = INT; } + else if (tk == And) { next(); expr(Eq); if (*n==Num && *b==Num) n[1] = b[1] & n[1]; else { *--n = (int)b; *--n = And; } ty = INT; } + else if (tk == Eq) { next(); expr(Lt); if (*n==Num && *b==Num) n[1] = b[1] == n[1]; else { *--n = (int)b; *--n = Eq; } ty = INT; } + else if (tk == Ne) { next(); expr(Lt); if (*n==Num && *b==Num) n[1] = b[1] != n[1]; else { *--n = (int)b; *--n = Ne; } ty = INT; } + else if (tk == Lt) { next(); expr(Shl); if (*n==Num && *b==Num) n[1] = b[1] < n[1]; else { *--n = (int)b; *--n = Lt; } ty = INT; } + else if (tk == Gt) { next(); expr(Shl); if (*n==Num && *b==Num) n[1] = b[1] > n[1]; else { *--n = (int)b; *--n = Gt; } ty = INT; } + else if (tk == Le) { next(); expr(Shl); if (*n==Num && *b==Num) n[1] = b[1] <= n[1]; else { *--n = (int)b; *--n = Le; } ty = INT; } + else if (tk == Ge) { next(); expr(Shl); if (*n==Num && *b==Num) n[1] = b[1] >= n[1]; else { *--n = (int)b; *--n = Ge; } ty = INT; } + else if (tk == Shl) { next(); expr(Add); if (*n==Num && *b==Num) n[1] = b[1] << n[1]; else { *--n = (int)b; *--n = Shl; } ty = INT; } + else if (tk == Shr) { next(); expr(Add); if (*n==Num && *b==Num) n[1] = b[1] >> n[1]; else { *--n = (int)b; *--n = Shr; } ty = INT; } + else if (tk == Add) { + next(); expr(Mul); + if ((ty = t) > PTR) { if (*n == Num) n[1] = n[1] * sizeof(int); else { *--n = sizeof(int); *--n = Num; --n; *n = (int)(n+3); *--n = Mul; } } + if (*n == Num && *b == Num) n[1] = b[1] + n[1]; else { *--n = (int)b; *--n = Add; } + } + else if (tk == Sub) { + next(); expr(Mul); + if ((ty = t) > PTR) { if (*n == Num) n[1] = n[1] * sizeof(int); else { *--n = sizeof(int); *--n = Num; --n; *n = (int)(n+3); *--n = Mul; } } + if (*n == Num && *b == Num) n[1] = b[1] - n[1]; else { *--n = (int)b; *--n = Sub; } + } + else if (tk == Mul) { next(); expr(Inc); if (*n==Num && *b==Num) n[1] = b[1] * n[1]; else { *--n = (int)b; *--n = Mul; } ty = INT; } + else if (tk == Div) { next(); expr(Inc); if (*n==Num && *b==Num) n[1] = b[1] / n[1]; else { *--n = (int)b; *--n = Div; } ty = INT; } + else if (tk == Mod) { next(); expr(Inc); if (*n==Num && *b==Num) n[1] = b[1] % n[1]; else { *--n = (int)b; *--n = Mod; } ty = INT; } + else if (tk == Inc || tk == Dec) { + if (*n == Load) *n = tk; else { printf("%d: bad lvalue in post-increment\n", line); exit(-1); } + *--n = (ty > PTR) ? sizeof(int) : sizeof(char); *--n = Num; + *--n = (int)b; *--n = (tk == Inc) ? Sub : Add; + next(); + } + else if (tk == Brak) { + next(); expr(Assign); + if (tk == ']') next(); else { printf("%d: close bracket expected\n", line); exit(-1); } + if (t > PTR) { if (*n == Num) n[1] = n[1] * sizeof(int); else { *--n = sizeof(int); *--n = Num; --n; *n = (int)(n+3); *--n = Mul; } } + else if (t < PTR) { printf("%d: pointer type expected\n", line); exit(-1); } + if (*n == Num && *b == Num) n[1] = b[1] + n[1]; else { *--n = (int)b; *--n = Add; } + *--n = ty = t - PTR; *--n = Load; + } + else { printf("%d: compiler error tk=%d\n", line, tk); exit(-1); } + } +} + +void stmt() +{ + int *a, *b, *c; + + if (tk == If) { + next(); + if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); } + expr(Assign); a = n; + if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } + stmt(); b = n; + if (tk == Else) { next(); stmt(); c = n; } else c = 0; + *--n = (int)c; *--n = (int)b; *--n = (int)a; *--n = Cond; + } + else if (tk == While) { + next(); + if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); } + expr(Assign); a = n; + if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } + stmt(); + *--n = (int)a; *--n = While; + } + else if (tk == Return) { + next(); + if (tk != ';') { expr(Assign); a = n; } else a = 0; + if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); } + *--n = (int)a; *--n = Return; + } + else if (tk == '{') { + next(); + *--n = ';'; + while (tk != '}') { a = n; stmt(); *--n = (int)a; *--n = '{'; } + next(); + } + else if (tk == ';') { + next(); *--n = ';'; + } + else { + expr(Assign); + if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); } + } +} + +void gen(int *n) +{ + int i; char *b; + + i = *n; + if (i == Num) { + *e++ = 0xb8; *(int *)e = n[1]; e = e+4; if (src) printf(" movl $%d, %%eax\n",n[1]); + } + else if (i == Loc) { + if (n[1] < -32 || n[1] > 32) { printf("%d: gen(lea) out of bounds\n", line); exit(-1); } + *(int *)e = 0x458d + (n[1] << 18); e = e+3; if (src) printf(" leal $%d(%%ebp), %%eax\n", n[1]*4); + } + else if (i == Load) { + gen(n+2); + if (n[1] == CHAR) { *(int *)e = 0x00be0f; e = e+3; if (src) printf(" movsbl (%%eax), %%eax\n"); } + else { *(int *)e = 0x008b; e = e+2; if (src) printf(" movl (%%eax), %%eax\n"); } + } + else if (i == Assign) { + gen((int *)n[2]); *e++ = 0x50; if (src) printf(" push %%eax\n"); + gen(n+3); *e++ = 0x59; if (src) printf(" pop %%ecx\n"); + if (n[1] == CHAR) { *(int *)e = 0x0188; e = e+2; if (src) printf(" movb %%al, (%%ecx)\n"); } + else { *(int *)e = 0x0189; e = e+2; if (src) printf(" movl %%eax, (%%ecx)\n"); } + } + else if (i == Inc || i == Dec) { + gen(n+2); *e++ = 0x50; if (src) printf(" push %%eax\n"); + if (n[1] == CHAR) { *(int *)e = 0x00be0f; e = e+3; if (src) printf(" movsbl (%%eax), %%eax\n"); } + else { *(int *)e = 0x008b; e = e+2; if (src) printf(" movl (%%eax), %%eax\n"); } + i = ((i == Inc) ? 1 : -1) * ((n[1] > PTR) ? sizeof(int) : sizeof(char)); + *e++ = 0xb9; *(int *)e = i; e = e+4; if (src) printf(" movl $%d, %%ecx\n", i); + *(int *)e = 0xc801; e = e+2; if (src) printf(" addl %%ecx, %%eax\n"); + *e++ = 0x59; if (src) printf(" pop %%ecx\n"); + if (n[1] == CHAR) { *(int *)e = 0x0188; e = e+2; if (src) printf(" movb %%al, (%%ecx)\n"); } + else { *(int *)e = 0x0189; e = e+2; if (src) printf(" movl %%eax, (%%ecx)\n"); } + } + else if (i == Cond) { + gen((int *)n[1]); + *(int *)e = 0x840fc085; e = e+4; b = e; e = e+4; if (src) printf(" test %%eax, %%eax\n jeq \n"); + gen((int *)n[2]); + if (n[3]) { + *(int *)b = e+5 - b - 4; + *e++ = 0xe9; b = e; e = e + 4; if (src) printf(" jmp \n"); + gen((int *)n[3]); + } + *(int *)b = e - b - 4; + } + else if (i == Lor) { + gen((int *)n[1]); + *(int *)e = 0x850fc085; e = e+4; b = e; e = e+4; if (src) printf(" test %%eax, %%eax\n jne \n"); + gen(n+2); + *(int *)b = e - b - 4; + } + else if (i == Lan) { + gen((int *)n[1]); + *(int *)e = 0x840fc085; e = e+4; b = e; e = e+4; if (src) printf(" test %%eax, %%eax\n jeq \n"); + gen(n+2); + *(int *)b = e - b - 4; + } + else if (i >= Or && i <= Mod) { + gen(n+2); *e++ = 0x50; if (src) printf(" push %%eax\n"); + gen((int *)n[1]); *e++ = 0x59; if (src) printf(" pop %%ecx\n"); + if (i == Or) { *(int *)e = 0xc809; e = e+2; if (src) printf(" orl %%ecx, %%eax\n"); } + else if (i == Xor) { *(int *)e = 0xc831; e = e+2; if (src) printf(" xorl %%ecx, %%eax\n"); } + else if (i == And) { *(int *)e = 0xc821; e = e+2; if (src) printf(" andl %%ecx, %%eax\n"); } + else if (i >= Eq && i <= Ge) { + *(int *)e = 0xc839; e = e+2; if (src) printf(" cmp %%eax, %%ecx\n"); + *e++ = 0xb8; *(int *)e = 0; e = e+4; if (src) printf(" mov $0, %%eax\n"); + if (i == Eq) { *(int *)e = 0xc0940f; if (src) printf(" sete %%al\n"); } + else if (i == Ne) { *(int *)e = 0xc0950f; if (src) printf(" setne %%al\n"); } + else if (i == Lt) { *(int *)e = 0xc09c0f; if (src) printf(" setl %%al\n"); } + else if (i == Gt) { *(int *)e = 0xc09f0f; if (src) printf(" setg %%al\n"); } + else if (i == Le) { *(int *)e = 0xc09e0f; if (src) printf(" setle %%al\n"); } + else { *(int *)e = 0xc09d0f; if (src) printf(" setge %%al\n"); } + e = e+3; + } + else if (i == Shl) { *(int *)e = 0xe0d3; e = e+2; if (src) printf(" shl %%cl, %%eax\n"); } + else if (i == Shr) { *(int *)e = 0xf8d3; e = e+2; if (src) printf(" sar %%cl, %%eax\n"); } + else if (i == Add) { *(int *)e = 0xc801; e = e+2; if (src) printf(" addl %%ecx, %%eax\n"); } + else if (i == Sub) { *(int *)e = 0xc829; e = e+2; if (src) printf(" subl %%ecx, %%eax\n"); } + else if (i == Mul) { *(int *)e = 0xc1af0f; e = e+3; if (src) printf(" imul %%ecx, %%eax\n"); } + else if (i == Div) { *(int *)e = 0xf9f799; e = e+3; if (src) printf(" cltd\n idiv %%ecx, %%eax\n"); } + else if (i == Mod) { *(int *)e = 0x92f9f799; e=e+4; if (src) printf(" cltd\n idiv %%ecx, %%eax\n xchg %%edx, %%eax\n"); } + } + else if (i == Fun) { + i = n[1]; + while (i) { + gen(((int *)i)+1); *e++ = 0x50; i = *(int *)i; if (src) printf(" push %%eax\n"); + } + *e++ = 0xe8; *(int *)e = n[2]-(int)e-4; e = e+4; if (src) printf(" call \n"); + if (n[3]) { + *(int *)e = 0xc481; e = e+2; + *(int *)e = n[3]*4; e = e+4; if (src) printf(" add $%d, %%esp\n", n[3]*4); + } + } + else if (i == While) { + *e++ = 0xe9; b = e; e = e+4; if (src) printf(" jmp \n"); + gen(n+2); + *(int *)b = e - b - 4; + gen((int *)n[1]); + *(int *)e = 0x850fc085; e = e+4; if (src) printf(" test %%eax, %%eax\n"); + *(int *)e = b - e; e = e+4; if (src) printf(" jne $%d\n", b - e); + } + else if (i == Return) { + if (n[1]) gen((int *)n[1]); if (src) printf(" mov %%ebp, %%esp\n"); + *(int *)e = 0xc35dec89; e = e+4; if (src) printf(" pop %%ebp\n ret\n"); + } + else if (i == '{') { + gen((int *)n[1]); gen(n+2); + } + else if (i == Enter) { + *(int *)e = 0xe58955; e = e+3; if (src) printf(" push %%ebp;\n mov %%esp, %%ebp\n"); + if (n[1]) { + *(int *)e = 0xec81; e = e+2; + *(int *)e = n[1]*4; e = e+4; if (src) printf(" subl $%d, %%esp\n", n[1]*4); + } + gen(n+2); if (src) printf(" mov %%ebp, %%esp\n"); + *(int *)e = 0xc35dec89; e = e+4; if (src) printf(" pop %%ebp\n ret\n"); + } + else if (i != ';') { printf("%d: compiler error gen=%d\n", line, i); exit(-1); } +} + +int main(int argc, char **argv) +{ + int fd, bt, ty, poolsz, *idmain, *ast; + int i, *t; // temps + + --argc; ++argv; + if (argc > 0 && **argv == '-' && (*argv)[1] == 's') { src = 1; --argc; ++argv; } + if (argc < 1) { printf("usage: c5x86 [-s] file ...\n"); return -1; } + + if ((fd = open(*argv, 0)) < 0) { printf("could not open(%s)\n", *argv); return -1; } + + poolsz = 256*1024; // arbitrary size + if (!(sym = malloc(poolsz))) { printf("could not malloc(%d) symbol area\n", poolsz); return -1; } + if (!(data = malloc(poolsz))) { printf("could not malloc(%d) data area\n", poolsz); return -1; } + if (!(dsym = malloc(64))) { printf("could not malloc(64) dsym\n"); return -1; } + if (!(ast = malloc(poolsz))) { printf("could not malloc(%d) abstract syntax tree area\n", poolsz); return -1; } + ast = (int *)((int)ast + poolsz); // abstract syntax tree is most efficiently built as a stack + + memset(sym, 0, poolsz); + memset(data, 0, poolsz); + + if (!(e = mmap(0, poolsz, 7, 0x22, -1, 0))) { printf("could not mmap() executable memory\n"); return -1; } + + p = "char else enum if int return sizeof while void main"; + i = Char; while (i <= While) { next(); id[Tk] = i++; } // add keywords to symbol table + next(); id[Tk] = Char; // handle void type + next(); idmain = id; // keep track of main + + if (!(lp = p = malloc(poolsz))) { printf("could not malloc(%d) source area\n", poolsz); return -1; } + if ((i = read(fd, p, poolsz-1)) <= 0) { printf("read() returned %d\n", i); return -1; } + p[i] = 0; + close(fd); + + // parse declarations + line = 1; + next(); + while (tk) { + bt = INT; // basetype + if (tk == Int) next(); + else if (tk == Char) { next(); bt = CHAR; } + else if (tk == Enum) { + next(); + if (tk != '{') next(); + if (tk == '{') { + next(); + i = 0; + while (tk != '}') { + if (tk != Id) { printf("%d: bad enum identifier %d\n", line, tk); return -1; } + next(); + if (tk == Assign) { + next(); + n = ast; expr(Cond); + if (*n != Num) { printf("%d: bad enum initializer\n", line); return -1; } + i = n[1]; + } + id[Class] = Num; id[Type] = INT; id[Val] = i++; + if (tk == ',') next(); + } + next(); + } + } + while (tk != ';' && tk != '}') { + ty = bt; + while (tk == Mul) { next(); ty = ty + PTR; } + if (tk != Id) { printf("%d: bad global declaration\n", line); return -1; } + if (id[Class]) { printf("%d: duplicate global definition\n", line); return -1; } + next(); + id[Type] = ty; + if (tk == '(') { // function + id[Class] = Fun; + id[Val] = (int)e; + next(); i = 2; + while (tk != ')') { + ty = INT; + if (tk == Int) next(); + else if (tk == Char) { next(); ty = CHAR; } + while (tk == Mul) { next(); ty = ty + PTR; } + if (tk != Id) { printf("%d: bad parameter declaration\n", line); return -1; } + if (id[Class] == Loc) { printf("%d: duplicate parameter definition\n", line); return -1; } + id[HClass] = id[Class]; id[Class] = Loc; + id[HType] = id[Type]; id[Type] = ty; + id[HVal] = id[Val]; id[Val] = i++; + next(); + if (tk == ',') next(); + } + next(); + if (tk != '{') { printf("%d: bad function definition\n", line); return -1; } + i = 0; + next(); + while (tk == Int || tk == Char) { + bt = (tk == Int) ? INT : CHAR; + next(); + while (tk != ';') { + ty = bt; + while (tk == Mul) { next(); ty = ty + PTR; } + if (tk != Id) { printf("%d: bad local declaration\n", line); return -1; } + if (id[Class] == Loc) { printf("%d: duplicate local definition\n", line); return -1; } + id[HClass] = id[Class]; id[Class] = Loc; + id[HType] = id[Type]; id[Type] = ty; + id[HVal] = id[Val]; id[Val] = --i; + next(); + if (tk == ',') next(); + } + next(); + } + n = ast; + *--n = ';'; while (tk != '}') { t = n; stmt(); *--n = (int)t; *--n = '{'; } + *--n = -i; *--n = Enter; + gen(n); + id = sym; // unwind symbol table locals + while (id[Tk]) { + if (id[Class] == Loc) { + id[Class] = id[HClass]; + id[Type] = id[HType]; + id[Val] = id[HVal]; + } + id = id + Idsz; + } + } + else { + id[Class] = Glo; + id[Val] = (int)data; + data = data + sizeof(int); + } + if (tk == ',') next(); + } + next(); + } + + if (!idmain[Val]) { printf("main() not defined\n"); return -1; } + if (!src) { + t = (int *)e; + *e++ = 0xb8; *(char ***)e = argv; e = e+4; *e++ = 0x50; // movl $argv, %eax; push %eax + *e++ = 0xb8; *(int *) e = argc; e = e+4; *e++ = 0x50; // movl $argc, %eax; push %eax + *e++ = 0xe8; *(int *)e = idmain[Val] - (int)e - 4; e = e+4; // call main + *e++ = 0x81; *e++ = 0xc4; *(int *)e = 8; e = e+4; // add $8, %esp + *e++ = 0xc3; // ret + qsort(dsym, 2, 1, (void *)t); // hack to call a function pointer + printf("exit(0) from c5x86\n"); + } + return 0; +} diff --git a/w32.h b/w32.h new file mode 100644 index 0000000..548a0ce --- /dev/null +++ b/w32.h @@ -0,0 +1,32 @@ +#include +void *mmap(void *addr, size_t len, int prot, int flags, int fildes, off_t off) +{ + HANDLE fm, h; + void *map; + const off_t maxSize = off + (off_t)len; + + h = (HANDLE)_get_osfhandle(fildes); + fm = CreateFileMapping(h, NULL, PAGE_EXECUTE_READWRITE, 0, maxSize, NULL); + map = MapViewOfFile(fm, FILE_MAP_READ | FILE_MAP_WRITE | FILE_MAP_EXECUTE, 0, off, len); + CloseHandle(fm); + return map; +} + +void *dlsym(void *handle, char *name) +{ + if (!strcmp(name, "open" )) return &open; + if (!strcmp(name, "read" )) return &read; + if (!strcmp(name, "close" )) return &close; + if (!strcmp(name, "printf")) return &printf; + if (!strcmp(name, "malloc")) return &malloc; + if (!strcmp(name, "memset")) return &memset; + if (!strcmp(name, "memcmp")) return &memcmp; + if (!strcmp(name, "memcpy")) return &memcpy; + if (!strcmp(name, "mmap" )) return &mmap; + if (!strcmp(name, "dlsym" )) return &dlsym; + if (!strcmp(name, "qsort" )) return &qsort; + if (!strcmp(name, "exit" )) return &exit; + return 0; +} +#define CHAR TYCHAR +#define INT TYINT From 13835a4b41f5a233f90b60b6fe30edd42d4b0094 Mon Sep 17 00:00:00 2001 From: rswier Date: Fri, 26 Feb 2016 01:40:06 -0500 Subject: [PATCH 2/3] Include headers --- c4.c | 525 -------------------------------------------------------- c5.c | 3 +- c5x86.c | 3 +- 3 files changed, 4 insertions(+), 527 deletions(-) delete mode 100644 c4.c diff --git a/c4.c b/c4.c deleted file mode 100644 index bebeec1..0000000 --- a/c4.c +++ /dev/null @@ -1,525 +0,0 @@ -// c4.c - C in four functions - -// char, int, and pointer types -// if, while, return, and expression statements -// just enough features to allow self-compilation and a bit more - -// Written by Robert Swierczek - -#include -#include -#include -#include - -char *p, *lp, // current position in source code - *data; // data/bss pointer - -int *e, *le, // current position in emitted code - *id, // currently parsed identifier - *sym, // symbol table (simple list of identifiers) - tk, // current token - ival, // current token value - ty, // current expression type - loc, // local variable offset - line, // current line number - src, // print source and assembly flag - debug; // print executed instructions - -// tokens and classes (operators last and in precedence order) -enum { - Num = 128, Fun, Sys, Glo, Loc, Id, - Char, Else, Enum, If, Int, Return, Sizeof, While, - Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Brak -}; - -// opcodes -enum { LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH , - OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD , - OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,EXIT }; - -// types -enum { CHAR, INT, PTR }; - -// identifier offsets (since we can't create an ident struct) -enum { Tk, Hash, Name, Class, Type, Val, HClass, HType, HVal, Idsz }; - -void next() -{ - char *pp; - - while (tk = *p) { - ++p; - if (tk == '\n') { - if (src) { - printf("%d: %.*s", line, p - lp, lp); - lp = p; - while (le < e) { - printf("%8.4s", &"LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ," - "OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ," - "OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,EXIT,"[*++le * 5]); - if (*le <= ADJ) printf(" %d\n", *++le); else printf("\n"); - } - } - ++line; - } - else if (tk == '#') { - while (*p != 0 && *p != '\n') ++p; - } - else if ((tk >= 'a' && tk <= 'z') || (tk >= 'A' && tk <= 'Z') || tk == '_') { - pp = p - 1; - while ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || (*p >= '0' && *p <= '9') || *p == '_') - tk = tk * 147 + *p++; - tk = (tk << 6) + (p - pp); - id = sym; - while (id[Tk]) { - if (tk == id[Hash] && !memcmp((char *)id[Name], pp, p - pp)) { tk = id[Tk]; return; } - id = id + Idsz; - } - id[Name] = (int)pp; - id[Hash] = tk; - tk = id[Tk] = Id; - return; - } - else if (tk >= '0' && tk <= '9') { - if (ival = tk - '0') { while (*p >= '0' && *p <= '9') ival = ival * 10 + *p++ - '0'; } - else if (*p == 'x' || *p == 'X') { - while ((tk = *++p) && ((tk >= '0' && tk <= '9') || (tk >= 'a' && tk <= 'f') || (tk >= 'A' && tk <= 'F'))) - ival = ival * 16 + (tk & 15) + (tk >= 'A' ? 9 : 0); - } - else { while (*p >= '0' && *p <= '7') ival = ival * 8 + *p++ - '0'; } - tk = Num; - return; - } - else if (tk == '/') { - if (*p == '/') { - ++p; - while (*p != 0 && *p != '\n') ++p; - } - else { - tk = Div; - return; - } - } - else if (tk == '\'' || tk == '"') { - pp = data; - while (*p != 0 && *p != tk) { - if ((ival = *p++) == '\\') { - if ((ival = *p++) == 'n') ival = '\n'; - } - if (tk == '"') *data++ = ival; - } - ++p; - if (tk == '"') ival = (int)pp; else tk = Num; - return; - } - else if (tk == '=') { if (*p == '=') { ++p; tk = Eq; } else tk = Assign; return; } - else if (tk == '+') { if (*p == '+') { ++p; tk = Inc; } else tk = Add; return; } - else if (tk == '-') { if (*p == '-') { ++p; tk = Dec; } else tk = Sub; return; } - else if (tk == '!') { if (*p == '=') { ++p; tk = Ne; } return; } - else if (tk == '<') { if (*p == '=') { ++p; tk = Le; } else if (*p == '<') { ++p; tk = Shl; } else tk = Lt; return; } - else if (tk == '>') { if (*p == '=') { ++p; tk = Ge; } else if (*p == '>') { ++p; tk = Shr; } else tk = Gt; return; } - else if (tk == '|') { if (*p == '|') { ++p; tk = Lor; } else tk = Or; return; } - else if (tk == '&') { if (*p == '&') { ++p; tk = Lan; } else tk = And; return; } - else if (tk == '^') { tk = Xor; return; } - else if (tk == '%') { tk = Mod; return; } - else if (tk == '*') { tk = Mul; return; } - else if (tk == '[') { tk = Brak; return; } - else if (tk == '?') { tk = Cond; return; } - else if (tk == '~' || tk == ';' || tk == '{' || tk == '}' || tk == '(' || tk == ')' || tk == ']' || tk == ',' || tk == ':') return; - } -} - -void expr(int lev) -{ - int t, *d; - - if (!tk) { printf("%d: unexpected eof in expression\n", line); exit(-1); } - else if (tk == Num) { *++e = IMM; *++e = ival; next(); ty = INT; } - else if (tk == '"') { - *++e = IMM; *++e = ival; next(); - while (tk == '"') next(); - data = (char *)((int)data + sizeof(int) & -sizeof(int)); ty = PTR; - } - else if (tk == Sizeof) { - next(); if (tk == '(') next(); else { printf("%d: open paren expected in sizeof\n", line); exit(-1); } - ty = INT; if (tk == Int) next(); else if (tk == Char) { next(); ty = CHAR; } - while (tk == Mul) { next(); ty = ty + PTR; } - if (tk == ')') next(); else { printf("%d: close paren expected in sizeof\n", line); exit(-1); } - *++e = IMM; *++e = (ty == CHAR) ? sizeof(char) : sizeof(int); - ty = INT; - } - else if (tk == Id) { - d = id; next(); - if (tk == '(') { - next(); - t = 0; - while (tk != ')') { expr(Assign); *++e = PSH; ++t; if (tk == ',') next(); } - next(); - if (d[Class] == Sys) *++e = d[Val]; - else if (d[Class] == Fun) { *++e = JSR; *++e = d[Val]; } - else { printf("%d: bad function call\n", line); exit(-1); } - if (t) { *++e = ADJ; *++e = t; } - ty = d[Type]; - } - else if (d[Class] == Num) { *++e = IMM; *++e = d[Val]; ty = INT; } - else { - if (d[Class] == Loc) { *++e = LEA; *++e = loc - d[Val]; } - else if (d[Class] == Glo) { *++e = IMM; *++e = d[Val]; } - else { printf("%d: undefined variable\n", line); exit(-1); } - *++e = ((ty = d[Type]) == CHAR) ? LC : LI; - } - } - else if (tk == '(') { - next(); - if (tk == Int || tk == Char) { - t = (tk == Int) ? INT : CHAR; next(); - while (tk == Mul) { next(); t = t + PTR; } - if (tk == ')') next(); else { printf("%d: bad cast\n", line); exit(-1); } - expr(Inc); - ty = t; - } - else { - expr(Assign); - if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } - } - } - else if (tk == Mul) { - next(); expr(Inc); - if (ty > INT) ty = ty - PTR; else { printf("%d: bad dereference\n", line); exit(-1); } - *++e = (ty == CHAR) ? LC : LI; - } - else if (tk == And) { - next(); expr(Inc); - if (*e == LC || *e == LI) --e; else { printf("%d: bad address-of\n", line); exit(-1); } - ty = ty + PTR; - } - else if (tk == '!') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = 0; *++e = EQ; ty = INT; } - else if (tk == '~') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = -1; *++e = XOR; ty = INT; } - else if (tk == Add) { next(); expr(Inc); ty = INT; } - else if (tk == Sub) { - next(); *++e = IMM; - if (tk == Num) { *++e = -ival; next(); } else { *++e = -1; *++e = PSH; expr(Inc); *++e = MUL; } - ty = INT; - } - else if (tk == Inc || tk == Dec) { - t = tk; next(); expr(Inc); - if (*e == LC) { *e = PSH; *++e = LC; } - else if (*e == LI) { *e = PSH; *++e = LI; } - else { printf("%d: bad lvalue in pre-increment\n", line); exit(-1); } - *++e = PSH; - *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char); - *++e = (t == Inc) ? ADD : SUB; - *++e = (ty == CHAR) ? SC : SI; - } - else { printf("%d: bad expression\n", line); exit(-1); } - - while (tk >= lev) { // "precedence climbing" or "Top Down Operator Precedence" method - t = ty; - if (tk == Assign) { - next(); - if (*e == LC || *e == LI) *e = PSH; else { printf("%d: bad lvalue in assignment\n", line); exit(-1); } - expr(Assign); *++e = ((ty = t) == CHAR) ? SC : SI; - } - else if (tk == Cond) { - next(); - *++e = BZ; d = ++e; - expr(Assign); - if (tk == ':') next(); else { printf("%d: conditional missing colon\n", line); exit(-1); } - *d = (int)(e + 3); *++e = JMP; d = ++e; - expr(Cond); - *d = (int)(e + 1); - } - else if (tk == Lor) { next(); *++e = BNZ; d = ++e; expr(Lan); *d = (int)(e + 1); ty = INT; } - else if (tk == Lan) { next(); *++e = BZ; d = ++e; expr(Or); *d = (int)(e + 1); ty = INT; } - else if (tk == Or) { next(); *++e = PSH; expr(Xor); *++e = OR; ty = INT; } - else if (tk == Xor) { next(); *++e = PSH; expr(And); *++e = XOR; ty = INT; } - else if (tk == And) { next(); *++e = PSH; expr(Eq); *++e = AND; ty = INT; } - else if (tk == Eq) { next(); *++e = PSH; expr(Lt); *++e = EQ; ty = INT; } - else if (tk == Ne) { next(); *++e = PSH; expr(Lt); *++e = NE; ty = INT; } - else if (tk == Lt) { next(); *++e = PSH; expr(Shl); *++e = LT; ty = INT; } - else if (tk == Gt) { next(); *++e = PSH; expr(Shl); *++e = GT; ty = INT; } - else if (tk == Le) { next(); *++e = PSH; expr(Shl); *++e = LE; ty = INT; } - else if (tk == Ge) { next(); *++e = PSH; expr(Shl); *++e = GE; ty = INT; } - else if (tk == Shl) { next(); *++e = PSH; expr(Add); *++e = SHL; ty = INT; } - else if (tk == Shr) { next(); *++e = PSH; expr(Add); *++e = SHR; ty = INT; } - else if (tk == Add) { - next(); *++e = PSH; expr(Mul); - if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; } - *++e = ADD; - } - else if (tk == Sub) { - next(); *++e = PSH; expr(Mul); - if (t > PTR && t == ty) { *++e = SUB; *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = DIV; ty = INT; } - else if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; *++e = SUB; } - else *++e = SUB; - } - else if (tk == Mul) { next(); *++e = PSH; expr(Inc); *++e = MUL; ty = INT; } - else if (tk == Div) { next(); *++e = PSH; expr(Inc); *++e = DIV; ty = INT; } - else if (tk == Mod) { next(); *++e = PSH; expr(Inc); *++e = MOD; ty = INT; } - else if (tk == Inc || tk == Dec) { - if (*e == LC) { *e = PSH; *++e = LC; } - else if (*e == LI) { *e = PSH; *++e = LI; } - else { printf("%d: bad lvalue in post-increment\n", line); exit(-1); } - *++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char); - *++e = (tk == Inc) ? ADD : SUB; - *++e = (ty == CHAR) ? SC : SI; - *++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char); - *++e = (tk == Inc) ? SUB : ADD; - next(); - } - else if (tk == Brak) { - next(); *++e = PSH; expr(Assign); - if (tk == ']') next(); else { printf("%d: close bracket expected\n", line); exit(-1); } - if (t > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; } - else if (t < PTR) { printf("%d: pointer type expected\n", line); exit(-1); } - *++e = ADD; - *++e = ((ty = t - PTR) == CHAR) ? LC : LI; - } - else { printf("%d: compiler error tk=%d\n", line, tk); exit(-1); } - } -} - -void stmt() -{ - int *a, *b; - - if (tk == If) { - next(); - if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); } - expr(Assign); - if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } - *++e = BZ; b = ++e; - stmt(); - if (tk == Else) { - *b = (int)(e + 3); *++e = JMP; b = ++e; - next(); - stmt(); - } - *b = (int)(e + 1); - } - else if (tk == While) { - next(); - a = e + 1; - if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); } - expr(Assign); - if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } - *++e = BZ; b = ++e; - stmt(); - *++e = JMP; *++e = (int)a; - *b = (int)(e + 1); - } - else if (tk == Return) { - next(); - if (tk != ';') expr(Assign); - *++e = LEV; - if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); } - } - else if (tk == '{') { - next(); - while (tk != '}') stmt(); - next(); - } - else if (tk == ';') { - next(); - } - else { - expr(Assign); - if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); } - } -} - -int main(int argc, char **argv) -{ - int fd, bt, ty, poolsz, *idmain; - int *pc, *sp, *bp, a, cycle; // vm registers - int i, *t; // temps - - --argc; ++argv; - if (argc > 0 && **argv == '-' && (*argv)[1] == 's') { src = 1; --argc; ++argv; } - if (argc > 0 && **argv == '-' && (*argv)[1] == 'd') { debug = 1; --argc; ++argv; } - if (argc < 1) { printf("usage: c4 [-s] [-d] file ...\n"); return -1; } - - if ((fd = open(*argv, 0)) < 0) { printf("could not open(%s)\n", *argv); return -1; } - - poolsz = 256*1024; // arbitrary size - if (!(sym = malloc(poolsz))) { printf("could not malloc(%d) symbol area\n", poolsz); return -1; } - if (!(le = e = malloc(poolsz))) { printf("could not malloc(%d) text area\n", poolsz); return -1; } - if (!(data = malloc(poolsz))) { printf("could not malloc(%d) data area\n", poolsz); return -1; } - if (!(sp = malloc(poolsz))) { printf("could not malloc(%d) stack area\n", poolsz); return -1; } - - memset(sym, 0, poolsz); - memset(e, 0, poolsz); - memset(data, 0, poolsz); - - p = "char else enum if int return sizeof while " - "open read close printf malloc memset memcmp exit void main"; - i = Char; while (i <= While) { next(); id[Tk] = i++; } // add keywords to symbol table - i = OPEN; while (i <= EXIT) { next(); id[Class] = Sys; id[Type] = INT; id[Val] = i++; } // add library to symbol table - next(); id[Tk] = Char; // handle void type - next(); idmain = id; // keep track of main - - if (!(lp = p = malloc(poolsz))) { printf("could not malloc(%d) source area\n", poolsz); return -1; } - if ((i = read(fd, p, poolsz-1)) <= 0) { printf("read() returned %d\n", i); return -1; } - p[i] = 0; - close(fd); - - // parse declarations - line = 1; - next(); - while (tk) { - bt = INT; // basetype - if (tk == Int) next(); - else if (tk == Char) { next(); bt = CHAR; } - else if (tk == Enum) { - next(); - if (tk != '{') next(); - if (tk == '{') { - next(); - i = 0; - while (tk != '}') { - if (tk != Id) { printf("%d: bad enum identifier %d\n", line, tk); return -1; } - next(); - if (tk == Assign) { - next(); - if (tk != Num) { printf("%d: bad enum initializer\n", line); return -1; } - i = ival; - next(); - } - id[Class] = Num; id[Type] = INT; id[Val] = i++; - if (tk == ',') next(); - } - next(); - } - } - while (tk != ';' && tk != '}') { - ty = bt; - while (tk == Mul) { next(); ty = ty + PTR; } - if (tk != Id) { printf("%d: bad global declaration\n", line); return -1; } - if (id[Class]) { printf("%d: duplicate global definition\n", line); return -1; } - next(); - id[Type] = ty; - if (tk == '(') { // function - id[Class] = Fun; - id[Val] = (int)(e + 1); - next(); i = 0; - while (tk != ')') { - ty = INT; - if (tk == Int) next(); - else if (tk == Char) { next(); ty = CHAR; } - while (tk == Mul) { next(); ty = ty + PTR; } - if (tk != Id) { printf("%d: bad parameter declaration\n", line); return -1; } - if (id[Class] == Loc) { printf("%d: duplicate parameter definition\n", line); return -1; } - id[HClass] = id[Class]; id[Class] = Loc; - id[HType] = id[Type]; id[Type] = ty; - id[HVal] = id[Val]; id[Val] = i++; - next(); - if (tk == ',') next(); - } - next(); - if (tk != '{') { printf("%d: bad function definition\n", line); return -1; } - loc = ++i; - next(); - while (tk == Int || tk == Char) { - bt = (tk == Int) ? INT : CHAR; - next(); - while (tk != ';') { - ty = bt; - while (tk == Mul) { next(); ty = ty + PTR; } - if (tk != Id) { printf("%d: bad local declaration\n", line); return -1; } - if (id[Class] == Loc) { printf("%d: duplicate local definition\n", line); return -1; } - id[HClass] = id[Class]; id[Class] = Loc; - id[HType] = id[Type]; id[Type] = ty; - id[HVal] = id[Val]; id[Val] = ++i; - next(); - if (tk == ',') next(); - } - next(); - } - *++e = ENT; *++e = i - loc; - while (tk != '}') stmt(); - *++e = LEV; - id = sym; // unwind symbol table locals - while (id[Tk]) { - if (id[Class] == Loc) { - id[Class] = id[HClass]; - id[Type] = id[HType]; - id[Val] = id[HVal]; - } - id = id + Idsz; - } - } - else { - id[Class] = Glo; - id[Val] = (int)data; - data = data + sizeof(int); - } - if (tk == ',') next(); - } - next(); - } - - if (!(pc = (int *)idmain[Val])) { printf("main() not defined\n"); return -1; } - if (src) return 0; - - // setup stack - sp = (int *)((int)sp + poolsz); - *--sp = EXIT; // call exit if main returns - *--sp = PSH; t = sp; - *--sp = argc; - *--sp = (int)argv; - *--sp = (int)t; - - // run... - cycle = 0; - while (1) { - i = *pc++; ++cycle; - if (debug) { - printf("%d> %.4s", cycle, - &"LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ," - "OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ," - "OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,EXIT,"[i * 5]); - if (i <= ADJ) printf(" %d\n", *pc); else printf("\n"); - } - if (i == LEA) a = (int)(bp + *pc++); // load local address - else if (i == IMM) a = *pc++; // load global address or immediate - else if (i == JMP) pc = (int *)*pc; // jump - else if (i == JSR) { *--sp = (int)(pc + 1); pc = (int *)*pc; } // jump to subroutine - else if (i == BZ) pc = a ? pc + 1 : (int *)*pc; // branch if zero - else if (i == BNZ) pc = a ? (int *)*pc : pc + 1; // branch if not zero - else if (i == ENT) { *--sp = (int)bp; bp = sp; sp = sp - *pc++; } // enter subroutine - else if (i == ADJ) sp = sp + *pc++; // stack adjust - else if (i == LEV) { sp = bp; bp = (int *)*sp++; pc = (int *)*sp++; } // leave subroutine - else if (i == LI) a = *(int *)a; // load int - else if (i == LC) a = *(char *)a; // load char - else if (i == SI) *(int *)*sp++ = a; // store int - else if (i == SC) a = *(char *)*sp++ = a; // store char - else if (i == PSH) *--sp = a; // push - - else if (i == OR) a = *sp++ | a; - else if (i == XOR) a = *sp++ ^ a; - else if (i == AND) a = *sp++ & a; - else if (i == EQ) a = *sp++ == a; - else if (i == NE) a = *sp++ != a; - else if (i == LT) a = *sp++ < a; - else if (i == GT) a = *sp++ > a; - else if (i == LE) a = *sp++ <= a; - else if (i == GE) a = *sp++ >= a; - else if (i == SHL) a = *sp++ << a; - else if (i == SHR) a = *sp++ >> a; - else if (i == ADD) a = *sp++ + a; - else if (i == SUB) a = *sp++ - a; - else if (i == MUL) a = *sp++ * a; - else if (i == DIV) a = *sp++ / a; - else if (i == MOD) a = *sp++ % a; - - else if (i == OPEN) a = open((char *)sp[1], *sp); - else if (i == READ) a = read(sp[2], (char *)sp[1], *sp); - else if (i == CLOS) a = close(*sp); - else if (i == PRTF) { t = sp + pc[1]; a = printf((char *)t[-1], t[-2], t[-3], t[-4], t[-5], t[-6]); } - else if (i == MALC) a = (int)malloc(*sp); - else if (i == MSET) a = (int)memset((char *)sp[2], sp[1], *sp); - else if (i == MCMP) a = memcmp((char *)sp[2], (char *)sp[1], *sp); - else if (i == EXIT) { printf("exit(%d) cycle = %d\n", *sp, cycle); return *sp; } - else { printf("unknown instruction = %d! cycle = %d\n", i, cycle); return -1; } - } -} diff --git a/c5.c b/c5.c index 7144fae..75e0b4c 100644 --- a/c5.c +++ b/c5.c @@ -8,10 +8,11 @@ // Written by Robert Swierczek +#include #include #include #include -#include +#include #ifdef _WIN32 #include "w32.h" #endif diff --git a/c5x86.c b/c5x86.c index d238f22..b298146 100644 --- a/c5x86.c +++ b/c5x86.c @@ -8,10 +8,11 @@ // Written by Robert Swierczek +#include #include #include #include -#include +#include #ifdef _WIN32 #include "w32.h" #else From 2a46cf103ff418fa6abc9f37fc37e56303ddcf80 Mon Sep 17 00:00:00 2001 From: Tommy Thorn Date: Sat, 10 Sep 2022 12:24:41 -0700 Subject: [PATCH 3/3] Enabling build on 64-bit + remove mmap, dsym, qsort The usual problem with the assumption that int == intptr_t. The proper fix is a bit intrusive so we use the simpler but very hackish macro trick. Lacked header inclusions for mmap, dsym, and qsort but these aren't needed for self-hosting. --- c5.c | 109 ++++++++++++++++++++++++++++++----------------------------- 1 file changed, 55 insertions(+), 54 deletions(-) diff --git a/c5.c b/c5.c index 75e0b4c..e364689 100644 --- a/c5.c +++ b/c5.c @@ -17,6 +17,8 @@ #include "w32.h" #endif +#define int long + char *p, *lp, // current position in source code *data; // data/bss pointer @@ -41,7 +43,7 @@ enum { // opcodes enum { LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH , OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD , - OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,MCPY,MMAP,DSYM,QSRT,EXIT }; + OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,MCPY,EXIT }; // types enum { CHAR, INT, PTR }; @@ -57,13 +59,13 @@ void next() ++p; if (tk == '\n') { if (src) { - printf("%d: %.*s", line, p - lp, lp); + printf("%ld: %.*s", line, p - lp, lp); lp = p; while (le < e) { printf("%8.4s", &"LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ," "OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ," - "OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,MCPY,MMAP,DSYM,QSRT,EXIT,"[*++le * 5]); - if (*le <= ADJ) printf(" %d\n", *++le); else printf("\n"); + "OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,MCPY,EXIT,"[*++le * 5]); + if (*le <= ADJ) printf(" %ld\n", *++le); else printf("\n"); } } ++line; @@ -139,7 +141,7 @@ void expr(int lev) { int t, *d, *b; - if (!tk) { printf("%d: unexpected eof in expression\n", line); exit(-1); } + if (!tk) { printf("%ld: unexpected eof in expression\n", line); exit(-1); } else if (tk == Num) { *--n = ival; *--n = Num; next(); ty = INT; } else if (tk == '"') { *--n = ival; *--n = Num; next(); @@ -147,17 +149,17 @@ void expr(int lev) data = (char *)((int)data + sizeof(int) & -sizeof(int)); ty = PTR; } else if (tk == Sizeof) { - next(); if (tk == '(') next(); else { printf("%d: open paren expected in sizeof\n", line); exit(-1); } + next(); if (tk == '(') next(); else { printf("%ld: open paren expected in sizeof\n", line); exit(-1); } ty = INT; if (tk == Int) next(); else if (tk == Char) { next(); ty = CHAR; } while (tk == Mul) { next(); ty = ty + PTR; } - if (tk == ')') next(); else { printf("%d: close paren expected in sizeof\n", line); exit(-1); } + if (tk == ')') next(); else { printf("%ld: close paren expected in sizeof\n", line); exit(-1); } *--n = (ty == CHAR) ? sizeof(char) : sizeof(int); *--n = Num; ty = INT; } else if (tk == Id) { d = id; next(); if (tk == '(') { - if (d[Class] != Sys && d[Class] != Fun) { printf("%d: bad function call\n", line); exit(-1); } + if (d[Class] != Sys && d[Class] != Fun) { printf("%ld: bad function call\n", line); exit(-1); } next(); t = 0; b = 0; while (tk != ')') { expr(Assign); *--n = (int)b; b = n; ++t; if (tk == ',') next(); } @@ -169,7 +171,7 @@ void expr(int lev) else { if (d[Class] == Loc) { *--n = d[Val]; *--n = Loc; } else if (d[Class] == Glo) { *--n = d[Val]; *--n = Num; } - else { printf("%d: undefined variable\n", line); exit(-1); } + else { printf("%ld: undefined variable\n", line); exit(-1); } *--n = ty = d[Type]; *--n = Load; } } @@ -178,23 +180,23 @@ void expr(int lev) if (tk == Int || tk == Char) { t = (tk == Int) ? INT : CHAR; next(); while (tk == Mul) { next(); t = t + PTR; } - if (tk == ')') next(); else { printf("%d: bad cast\n", line); exit(-1); } + if (tk == ')') next(); else { printf("%ld: bad cast\n", line); exit(-1); } expr(Inc); ty = t; } else { expr(Assign); - if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } + if (tk == ')') next(); else { printf("%ld: close paren expected\n", line); exit(-1); } } } else if (tk == Mul) { next(); expr(Inc); - if (ty > INT) ty = ty - PTR; else { printf("%d: bad dereference\n", line); exit(-1); } + if (ty > INT) ty = ty - PTR; else { printf("%ld: bad dereference\n", line); exit(-1); } *--n = ty; *--n = Load; } else if (tk == And) { next(); expr(Inc); - if (*n == Load) n = n+2; else { printf("%d: bad address-of\n", line); exit(-1); } + if (*n == Load) n = n+2; else { printf("%ld: bad address-of\n", line); exit(-1); } ty = ty + PTR; } else if (tk == '!') { @@ -215,21 +217,21 @@ void expr(int lev) } else if (tk == Inc || tk == Dec) { t = tk; next(); expr(Inc); - if (*n == Load) *n = t; else { printf("%d: bad lvalue in pre-increment\n", line); exit(-1); } + if (*n == Load) *n = t; else { printf("%ld: bad lvalue in pre-increment\n", line); exit(-1); } } - else { printf("%d: bad expression\n", line); exit(-1); } + else { printf("%ld: bad expression\n", line); exit(-1); } while (tk >= lev) { // "precedence climbing" or "Top Down Operator Precedence" method t = ty; b = n; if (tk == Assign) { next(); - if (*n != Load) { printf("%d: bad lvalue in assignment\n", line); exit(-1); } + if (*n != Load) { printf("%ld: bad lvalue in assignment\n", line); exit(-1); } expr(Assign); *--n = (int)(b+2); *--n = ty = t; *--n = Assign; } else if (tk == Cond) { next(); expr(Assign); - if (tk == ':') next(); else { printf("%d: conditional missing colon\n", line); exit(-1); } + if (tk == ':') next(); else { printf("%ld: conditional missing colon\n", line); exit(-1); } d = n; expr(Cond); --n; *n = (int)(n+1); *--n = (int)d; *--n = (int)b; *--n = Cond; @@ -261,20 +263,20 @@ void expr(int lev) else if (tk == Div) { next(); expr(Inc); if (*n==Num && *b==Num) n[1] = b[1] / n[1]; else { *--n = (int)b; *--n = Div; } ty = INT; } else if (tk == Mod) { next(); expr(Inc); if (*n==Num && *b==Num) n[1] = b[1] % n[1]; else { *--n = (int)b; *--n = Mod; } ty = INT; } else if (tk == Inc || tk == Dec) { - if (*n == Load) *n = tk; else { printf("%d: bad lvalue in post-increment\n", line); exit(-1); } + if (*n == Load) *n = tk; else { printf("%ld: bad lvalue in post-increment\n", line); exit(-1); } *--n = (ty > PTR) ? sizeof(int) : sizeof(char); *--n = Num; *--n = (int)b; *--n = (tk == Inc) ? Sub : Add; next(); } else if (tk == Brak) { next(); expr(Assign); - if (tk == ']') next(); else { printf("%d: close bracket expected\n", line); exit(-1); } + if (tk == ']') next(); else { printf("%ld: close bracket expected\n", line); exit(-1); } if (t > PTR) { if (*n == Num) n[1] = n[1] * sizeof(int); else { *--n = sizeof(int); *--n = Num; --n; *n = (int)(n+3); *--n = Mul; } } - else if (t < PTR) { printf("%d: pointer type expected\n", line); exit(-1); } + else if (t < PTR) { printf("%ld: pointer type expected\n", line); exit(-1); } if (*n == Num && *b == Num) n[1] = b[1] + n[1]; else { *--n = (int)b; *--n = Add; } *--n = ty = t - PTR; *--n = Load; } - else { printf("%d: compiler error tk=%d\n", line, tk); exit(-1); } + else { printf("%ld: compiler error tk=%ld\n", line, tk); exit(-1); } } } @@ -284,25 +286,25 @@ void stmt() if (tk == If) { next(); - if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); } + if (tk == '(') next(); else { printf("%ld: open paren expected\n", line); exit(-1); } expr(Assign); a = n; - if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } + if (tk == ')') next(); else { printf("%ld: close paren expected\n", line); exit(-1); } stmt(); b = n; if (tk == Else) { next(); stmt(); c = n; } else c = 0; *--n = (int)c; *--n = (int)b; *--n = (int)a; *--n = Cond; } else if (tk == While) { next(); - if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); } + if (tk == '(') next(); else { printf("%ld: open paren expected\n", line); exit(-1); } expr(Assign); a = n; - if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } + if (tk == ')') next(); else { printf("%ld: close paren expected\n", line); exit(-1); } stmt(); *--n = (int)a; *--n = While; } else if (tk == Return) { next(); if (tk != ';') { expr(Assign); a = n; } else a = 0; - if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); } + if (tk == ';') next(); else { printf("%ld: semicolon expected\n", line); exit(-1); } *--n = (int)a; *--n = Return; } else if (tk == '{') { @@ -316,13 +318,13 @@ void stmt() } else { expr(Assign); - if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); } + if (tk == ';') next(); else { printf("%ld: semicolon expected\n", line); exit(-1); } } } void gen(int *n) { - int i, *a, *b; + int i, *b; i = *n; if (i == Num) { *++e = IMM; *++e = n[1]; } @@ -375,10 +377,12 @@ void gen(int *n) else if (i == Return) { if (n[1]) gen((int *)n[1]); *++e = LEV; } else if (i == '{') { gen((int *)n[1]); gen(n+2); } else if (i == Enter) { *++e = ENT; *++e = n[1]; gen(n+2); *++e = LEV; } - else if (i != ';') { printf("%d: compiler error gen=%d\n", line, i); exit(-1); } + else if (i != ';') { printf("%ld: compiler error gen=%ld\n", line, i); exit(-1); } } +#undef int int main(int argc, char **argv) +#define int long { int fd, bt, ty, poolsz, *idmain, *ast; int *pc, *sp, *bp, a, cycle; // vm registers @@ -392,11 +396,11 @@ int main(int argc, char **argv) if ((fd = open(*argv, 0)) < 0) { printf("could not open(%s)\n", *argv); return -1; } poolsz = 256*1024; // arbitrary size - if (!(sym = malloc(poolsz))) { printf("could not malloc(%d) symbol area\n", poolsz); return -1; } - if (!(le = e = malloc(poolsz))) { printf("could not malloc(%d) text area\n", poolsz); return -1; } - if (!(data = malloc(poolsz))) { printf("could not malloc(%d) data area\n", poolsz); return -1; } - if (!(sp = malloc(poolsz))) { printf("could not malloc(%d) stack area\n", poolsz); return -1; } - if (!(ast = malloc(poolsz))) { printf("could not malloc(%d) abstract syntax tree area\n", poolsz); return -1; } + if (!(sym = malloc(poolsz))) { printf("could not malloc(%ld) symbol area\n", poolsz); return -1; } + if (!(le = e = malloc(poolsz))) { printf("could not malloc(%ld) text area\n", poolsz); return -1; } + if (!(data = malloc(poolsz))) { printf("could not malloc(%ld) data area\n", poolsz); return -1; } + if (!(sp = malloc(poolsz))) { printf("could not malloc(%ld) stack area\n", poolsz); return -1; } + if (!(ast = malloc(poolsz))) { printf("could not malloc(%ld) abstract syntax tree area\n", poolsz); return -1; } ast = (int *)((int)ast + poolsz); // abstract syntax tree is most efficiently built as a stack memset(sym, 0, poolsz); @@ -404,14 +408,14 @@ int main(int argc, char **argv) memset(data, 0, poolsz); p = "char else enum if int return sizeof while " - "open read close printf malloc memset memcmp memcpy mmap dlsym qsort exit void main"; + "open read close printf malloc memset memcmp memcpy exit void main"; i = Char; while (i <= While) { next(); id[Tk] = i++; } // add keywords to symbol table i = OPEN; while (i <= EXIT) { next(); id[Class] = Sys; id[Type] = INT; id[Val] = i++; } // add library to symbol table next(); id[Tk] = Char; // handle void type next(); idmain = id; // keep track of main - if (!(lp = p = malloc(poolsz))) { printf("could not malloc(%d) source area\n", poolsz); return -1; } - if ((i = read(fd, p, poolsz-1)) <= 0) { printf("read() returned %d\n", i); return -1; } + if (!(lp = p = malloc(poolsz))) { printf("could not malloc(%ld) source area\n", poolsz); return -1; } + if ((i = read(fd, p, poolsz-1)) <= 0) { printf("read() returned %ld\n", i); return -1; } p[i] = 0; close(fd); @@ -429,12 +433,12 @@ int main(int argc, char **argv) next(); i = 0; while (tk != '}') { - if (tk != Id) { printf("%d: bad enum identifier %d\n", line, tk); return -1; } + if (tk != Id) { printf("%ld: bad enum identifier %ld\n", line, tk); return -1; } next(); if (tk == Assign) { next(); n = ast; expr(Cond); - if (*n != Num) { printf("%d: bad enum initializer\n", line); return -1; } + if (*n != Num) { printf("%ld: bad enum initializer\n", line); return -1; } i = n[1]; } id[Class] = Num; id[Type] = INT; id[Val] = i++; @@ -446,8 +450,8 @@ int main(int argc, char **argv) while (tk != ';' && tk != '}') { ty = bt; while (tk == Mul) { next(); ty = ty + PTR; } - if (tk != Id) { printf("%d: bad global declaration\n", line); return -1; } - if (id[Class]) { printf("%d: duplicate global definition\n", line); return -1; } + if (tk != Id) { printf("%ld: bad global declaration\n", line); return -1; } + if (id[Class]) { printf("%ld: duplicate global definition\n", line); return -1; } next(); id[Type] = ty; if (tk == '(') { // function @@ -459,8 +463,8 @@ int main(int argc, char **argv) if (tk == Int) next(); else if (tk == Char) { next(); ty = CHAR; } while (tk == Mul) { next(); ty = ty + PTR; } - if (tk != Id) { printf("%d: bad parameter declaration\n", line); return -1; } - if (id[Class] == Loc) { printf("%d: duplicate parameter definition\n", line); return -1; } + if (tk != Id) { printf("%ld: bad parameter declaration\n", line); return -1; } + if (id[Class] == Loc) { printf("%ld: duplicate parameter definition\n", line); return -1; } id[HClass] = id[Class]; id[Class] = Loc; id[HType] = id[Type]; id[Type] = ty; id[HVal] = id[Val]; id[Val] = i++; @@ -468,7 +472,7 @@ int main(int argc, char **argv) if (tk == ',') next(); } next(); - if (tk != '{') { printf("%d: bad function definition\n", line); return -1; } + if (tk != '{') { printf("%ld: bad function definition\n", line); return -1; } i = 0; next(); while (tk == Int || tk == Char) { @@ -477,8 +481,8 @@ int main(int argc, char **argv) while (tk != ';') { ty = bt; while (tk == Mul) { next(); ty = ty + PTR; } - if (tk != Id) { printf("%d: bad local declaration\n", line); return -1; } - if (id[Class] == Loc) { printf("%d: duplicate local definition\n", line); return -1; } + if (tk != Id) { printf("%ld: bad local declaration\n", line); return -1; } + if (id[Class] == Loc) { printf("%ld: duplicate local definition\n", line); return -1; } id[HClass] = id[Class]; id[Class] = Loc; id[HType] = id[Type]; id[Type] = ty; id[HVal] = id[Val]; id[Val] = --i; @@ -527,11 +531,11 @@ int main(int argc, char **argv) while (1) { i = *pc++; ++cycle; if (debug) { - printf("%d> %.4s", cycle, + printf("%ld> %.4s", cycle, &"LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ," "OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ," - "OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,MCPY,MMAP,DSYM,QSRT,EXIT,"[i * 5]); - if (i <= ADJ) printf(" %d\n", *pc); else printf("\n"); + "OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,MCPY,EXIT,"[i * 5]); + if (i <= ADJ) printf(" %ld\n", *pc); else printf("\n"); } if (i == LEA) a = (int)(bp + *pc++); // load local address else if (i == IMM) a = *pc++; // load global address or immediate @@ -573,10 +577,7 @@ int main(int argc, char **argv) else if (i == MSET) a = (int)memset((char *)*sp, sp[1], sp[2]); else if (i == MCMP) a = memcmp((char *)*sp, (char *)sp[1], sp[2]); else if (i == MCPY) a = (int)memcpy((char *)*sp, (char *)sp[1], sp[2]); - else if (i == MMAP) a = (int)mmap((char *)*sp, sp[1], sp[2], sp[3], sp[4], sp[5]); - else if (i == DSYM) a = (int)dlsym((char *)*sp, (char *)sp[1]); - else if (i == QSRT) qsort((char *)sp, sp[1], sp[2], (void *)sp[3]); - else if (i == EXIT) { printf("exit(%d) cycle = %d\n", *sp, cycle); return *sp; } - else { printf("unknown instruction = %d! cycle = %d\n", i, cycle); return -1; } + else if (i == EXIT) { printf("exit(%ld) cycle = %ld\n", *sp, cycle); return *sp; } + else { printf("unknown instruction = %ld! cycle = %ld\n", i, cycle); return -1; } } }