cat << \EOF_makefile.user | sed 's/^>//' > makefile.user >SYM=../../support/sym >H=../../h >SRC=scan.c decl.c func.c expr.c err.c main.c $(SYM)/sym.c engl.c >OBJ=scan.o decl.o func.o expr.o err.o main.o sym.o engl.o >CFLAGS=-I. -I$(H) >AFLAGS=-k $(K) -gt -gh ># where are dlg and antlr binaries >DLG=../../bin/dlg >ANTLR=../../bin/antlr >K=2 # tokens of lookahead > >proto: $(OBJ) $(SRC) > cc -o proto $(OBJ) > >decl.c func.c expr.c err.c stdpccts.h parser.dlg : decl.g func.g expr.g > $(ANTLR) $(AFLAGS) decl.g func.g expr.g > >main.o engl.o : stdpccts.h > >scan.c : parser.dlg > $(DLG) -C2 parser.dlg scan.c > >sym.o : $(SYM)/sym.c > cc -c -o sym.o $(CFLAGS) $(SYM)/sym.c EOF_makefile.user cat << \EOF_README | sed 's/^>//' > README > >Quick note: > >You must run any code through the C preprocessor (e.g. "cc -E $(CFLAGS) >file.c") before sending it to the prototype generator. EOF_README cat << \EOF_sym.h | sed 's/^>//' > sym.h > >#ifndef HASH >#define HASH(p, h) while ( *p != '\0' ) h = (h<<1) + *p++; >#endif > >typedef struct _sym { > char *symbol; > struct _sym *next, *prev, **head, *scope; > unsigned int hash; > int token; > int level; > char defined; > struct _ast *type, *init; > } Sym, *SymPtr; > >#ifdef __STDC__ >void zzs_init(int, int); >void zzs_done(void); >void zzs_add(char *, Sym *); >Sym *zzs_get(char *); >void zzs_del(Sym *); >void zzs_keydel(char *); >Sym **zzs_scope(Sym **); >Sym *zzs_rmscope(Sym **); >void zzs_stat(void); >Sym *zzs_new(char *); >Sym *zzs_newadd(char *); >char *zzs_strdup(char *); >#else >void zzs_init(); >void zzs_done(); >void zzs_add(); >Sym *zzs_get(); >void zzs_del(); >void zzs_keydel(); >Sym **zzs_scope(); >Sym *zzs_rmscope(); >void zzs_stat(); >Sym *zzs_new(); >Sym *zzs_newadd(); >char *zzs_strdup(); >#endif EOF_sym.h cat << \EOF_type.h | sed 's/^>//' > type.h >/* S y m b o l l e v e l s */ >#define GLOBAL 1 >#define PARAMETER 2 >#define LOCAL 3 > >/* T y p e s */ >#define tNone 0x0000 >#define tInt 0x0001 >#define tFloat 0x0002 >#define tChar 0x0004 >#define tShort 0x0008 >#define tLong 0x0010 >#define tDouble 0x0020 >#define tVoid 0x0040 >#define tUnsigned 0x0080 >#define tEllipsis 0x0100 >#define tUnion 0x0200 >#define tStruct 0x0400 >#define tEnum 0x0800 >#define tTypeName 0x1000 >#define tSigned 0x2000 > >/* T y p e q u a l i f i e r s */ >#define cvNone 0x0000 >#define cvConst 0x0001 >#define cvVolatile 0x0002 > >/* S t o r a g e c l a s s e s */ >#define scNone 0x0000 >#define scAuto 0x0001 >#define scRegister 0x0002 >#define scStatic 0x0004 >#define scExtern 0x0008 >#define scTypedef 0x0010 > > >/* A S T n o d e s */ > >/* type-tree nodes */ >#define BaseTypeQ 1 >#define PointerQ 2 >#define ArrayQ 3 >#define FunctionQ 4 >#define FieldQ 5 >#define SymQ 6 > >/* expr-tree nodes */ >#define ENode 10 > > >#define AST_FIELDS int nodeType; \ > union { \ > qPointer p; \ > qArray a; \ > qFunction f; \ > qField fi; \ > qSym s; \ > ExprNode e; \ > qBaseType t; \ > } data; > >typedef struct _eNode { > int token; > } ExprNode; > >typedef struct _eOperator { > int token; > } eOperator; > >typedef struct _qSym { > char *name; > void *init; > } qSym; > >typedef struct _qField { > char *name; > } qField; > >typedef struct _qPointer { > int cv; > } qPointer; > >typedef struct _qArray { > struct _ast *dim; > } qArray; > >/* don't need this right now */ >typedef struct _qFunction { > void *code; > } qFunction; > >typedef struct _qBaseType { > char *name; /* type/struct/union name */ > int cv; > int sc; > int type; > } qBaseType; > >/* how to create a default node */ >#define zzcr_ast(ast, attr, tok, text) \ > (ast)->data.e.token = tok; \ > (ast)->nodeType = ENode; EOF_type.h cat << \EOF_proto.h | sed 's/^>//' > proto.h >#ifdef __STDC__ >char *strdup(char *); >struct _ast *zzmk_ast(struct _ast *, int, ...); >struct _ast *bottom(struct _ast *); >struct _ast *defineArgs(struct _ast *, Sym **); >void error(char *); >void error1(char *, char *); >void warn(char *); >void warn1(char *, char *); >Sym *addsym(int, char *,int , struct _ast *, struct _ast *); >long strtol(char *, char**, int); >#else >char *strdup(); >struct _ast *zzmk_ast(); >struct _ast *bottom(); >struct _ast *defineArgs(); >void error(); >void error1(); >void warn(); >void warn1(); >Sym *addsym(); >long strtol(); >#endif > >extern Sym *Globals; >extern Sym *Params; EOF_proto.h cat << \EOF_decl.g | sed 's/^>//' > decl.g >/* > * ANSI C recognizer > * > * Gives some error messages for semantics, but this grammar > * checks mostly syntax. We make no claim that it rigorously follows > * the ANSI C standard, but it's a good start. > * > * Type trees are constructed and maintained in the symbol table. > * Expression trees are constructed and then thrown away. The > * user can presumably do something more useful with them. > * > * Requires PCCTS Version 1.00 > * > * Terence Parr > * July 1991 > */ > >#header << > #define D_TextSize 20 > #include "charbuf.h" > #include "type.h" > #include "sym.h" > #include "proto.h" >>> > >#token "[\ \t]+" << zzskip(); >> >#token "\n" << zzline++; zzskip(); >> > >#token "#line [\ \t]+ [0-9]+ ~[\n]*\n" > << zzline = atoi(zzlextext+5); zzskip(); >> >#token "# [\ \t]+ [0-9]+ ~[\n]*\n" > << zzline = atoi(zzlextext+1); zzskip(); >> > >#token "\"" << zzmode(STRINGS); zzmore(); >> >#token "'" << zzmode(CHARACTERS); zzmore(); >> > >/* these tokens are used as node types, but not referenced in grammar */ >#token Var >#token Func >#token FuncCall >#token Label >#token PostInc >#token PostDec >#token StructPtrRef >#token StructRef >#token AggrTag > >#lexclass STRINGS >#token STRING "\"" << zzmode(START); >> >#token "\\\"" << zzmore(); >> >#token "\\n" << zzreplchar('\n'); zzmore(); >> >#token "\\r" << zzreplchar('\r'); zzmore(); >> >#token "\\t" << zzreplchar('\t'); zzmore(); >> >#token "\\[1-9][0-9]*" > << zzreplchar((char)strtol(zzbegexpr,NULL,10)); zzmore(); >> >#token "\\0[0-7]*" << zzreplchar((char)strtol(zzbegexpr,NULL,8)); zzmore(); >> >#token "\\0x[0-9]+" << zzreplchar((char)strtol(zzbegexpr,NULL,16)); zzmore(); >> >#token "\\~[\n\r]" << zzmore(); >> >#token "[\n\r]" << zzline++; zzmore(); /* print warning about \n in str */>> >#token "~[\"\n\r\\]+"<< zzmore(); >> > >#lexclass CHARACTERS >#token CHARACTER "'" << zzmode(START); >> >#token "\\'" << zzmore(); >> >#token "\\n" << zzreplchar('\n'); zzmore(); >> >#token "\\r" << zzreplchar('\r'); zzmore(); >> >#token "\\t" << zzreplchar('\t'); zzmore(); >> >#token "\\[1-9][0-9]*" > << zzreplchar((char)strtol(zzbegexpr,NULL,10)); zzmore(); >> >#token "\\0[0-7]*" << zzreplchar((char)strtol(zzbegexpr,NULL,8)); zzmore(); >> >#token "\\0x[0-9]+" << zzreplchar((char)strtol(zzbegexpr,NULL,16)); zzmore(); >> >#token "\\~[\n\r]" << zzmore(); >> >#token "[\n\r]" << zzline++; zzmore(); /* print warning about \n in str */>> >#token "~[\'\n\r\\]"<< zzmore(); >> > >#lexclass START > >#token OCT_NUM "[0][0-7]*" >#token L_OCT_NUM "[0][0-7]*[Ll]" >#token INT_NUM "[1-9][0-9]*" >#token L_INT_NUM "[1-9][0-9]*[Ll]" >#token HEX_NUM "[0][Xx][0-9A-Fa-f]+" >#token L_HEX_NUM "[0][Xx][0-9A-Fa-f]+[Ll]" >#token FNUM "([1-9][0-9]*{.[0-9]*} | {[0]}.[0-9]+) {[Ee]{[\+\-]}[0-9]+}" >#token PreInc "\+\+" >#token PreDec "\-\-" >#token LPAREN "\(" >#token LBRACK "\[" >#token SizeOf "sizeof" > >globals!: < zzs_scope(&Globals);>> > > ( <<;>> > <> decl[GLOBAL] > | <> > <> > declarator[base] > < t = defineArgs(#1, &Params); > >> > func_def[t] > <> > < p = zzs_rmscope(&Params); > pScope(p, "parameters\n"); > >> > )* > > <

pScope(p, "globals\n"); > ProtoVars(p);>> > "@" > ; > >/* d e c l -- recognize a declaration or definition. > * > * We handle typedefs in a bizarre way. WORD's are converted > * to TypeName's inside the lexical action for token WORD. So, > * because of the lookahead, we need to get a TypeName into > * the symbol table before the lookahead can get a reference > * to this. e.g. "typedef int I; I i;" We actually add the typedef > * name to the symbol table when we see its definition in > * rule declaration and friends. Aggregate tags are handled in a > * similar fashion by adding them to the symbol table as they > * are declared. > * > * functions definitions always have a FunctionQ node at the root > * of the declarator since anything in front would make a pointer to > * a function or whatever. e.g. int *f(); --> () * int --> "function > * returning pointer to integer." Or, int (*f)() --> * () int --> > * "pointer to function returning integer." The first is a function > * symbol, the 2nd is a variable. > */ >decl![int level] > : < AST *base, *d, *init=NULL, *tr; > char *w; > Sym *n=NULL, *p;>> > ( (sclass[&sc] | typeq[&cv])+ > ( type[&t] <> > | aggr[sc,cv] <> > | enum_def <> > | <> > ) > | type[&t] <> > | aggr[scNone,cvNone] <> > | enum_def <> > ) > ( declarator[base] <> > ( { <> "=" initialize <> } > <nodeType == FunctionQ ) { > sc |= scExtern; > bottom(d)->data.t.sc |= scExtern; > } > handleSymbol(sc, w, d, init, $level);>> > ( <> > "," > declarator[base] > { <> "=" initialize <> } > <> > <nodeType == FunctionQ ) { > sc |= scExtern; > bottom(#2)->data.t.sc |= scExtern; > } > handleSymbol(sc, $2.text, #2, init, $level);>> > )* > << > if ( base->data.t.type==tStruct || > base->data.t.type==tUnion || > base->data.t.type==tEnum ) > { > if ( base->data.t.name != NULL ) > { > p = zzs_get(base->data.t.name); > if ( p!=NULL ) p->level = $level; > } > } > >> > ";" > | << > handleSymbol(sc, w, d, init, $level); > tr = defineArgs(d, &Params); > >> > func_def[tr] > <> > < p = zzs_rmscope(&Params); > pScope(p, "block\n"); > >> > ) > | ";" > <> > <data.t.type==tStruct || > base->data.t.type==tUnion || > base->data.t.type==tEnum ) > { > p = zzs_get(base->data.t.name); > if ( p!=NULL ) p->level = $level; > } > else > error("missing declarator"); > >> > ) > ; > >sclass![int *sc] > : "auto" <<*$sc |= scAuto;>> > | "static" <<*$sc |= scStatic;>> > | "register" <<*$sc |= scRegister;>> > | "extern" <<*$sc |= scExtern;>> > | "typedef" <<*$sc |= scTypedef;>> > ; > >typeq![int *cv] > : "const" <<*$cv |= cvConst;>> > | "volatile" <<*$cv |= cvVolatile;>> > ; > >type![int *t] > : t1[t] <<$type = $1;>> > ; > >t1![int *type] > : ( "unsigned" <<*$type = tUnsigned;>> > | "signed" <<*$type = tSigned;>> > ) > ( "char" <<*$type |= tChar;>> > | { "short" <<*$type |= tShort;>> > | "long" <<*$type |= tLong;>> > } > { "int" <<*$type |= tInt;>> > } > ) > | ( "short" <<*$type = tShort;>> > { "int" <<*$type |= tInt;>> > } > | "long" <<*$type = tLong;>> > { "int" <<*$type |= tInt;>> > | "float" <<*$type |= tFloat;>> > | "double" <<*$type |= tDouble;>> > } > ) > | "void" <<*$type = tVoid;>> > | "char" <<*$type = tChar;>> > | "int" <<*$type = tInt;>> > | "float" <<*$type = tFloat;>> > | "double" <<*$type = tDouble;>> > | TypeName <<*$type = tTypeName; $t1 = $1;>> > ; > >/* D e c l a r a t o r */ > >/* > * Build a declarator by appending the base to the bottom of the type-tree > * matched in dcltor1. We pass the storage class to dcltor1 in case > * we have a typedef on our hands which needs to be added to the symbol > * table ASAP. > */ >declarator![AST *base] > : dcltor1[bottom($base)] <<#(bottom(#1), $base); > #0 = (#1==NULL)?$base:#1; > $declarator = $1;>> > ; > >/* > * Match *D1 or D2. Build type-trees for PointerQ (pointer qualifier) > * via: > * > * #0 = D1 > * | > * v > * * > * > * where D? is dcltor? in this grammar. > */ >dcltor1![AST *base] > : <> > "\*" > { "const" <> > | "volatile" <> > } <> > dcltor1[$base] <<#(bottom(#3), t); #0=(#3==NULL)?t:#3; > $dcltor1 = $3;>> > | dcltor2[$base] <<#0 = #1; $dcltor1 = $1;>> > ; > >/* > * For WORD D3 we return the following > * > * $$ = WORD recognized. > * #0 = D3 (array or func modifier) > * > * For ( D1 ) we return > * > * $$ = WORD recognized in D1. > * #0 = D1 (put stuff in (..) above [] or ()) > * | > * v > * D3 > * > * For instance, (*f)() yields > * > * $$ = f > * #0 = * (pointer to) > * | > * v > * ( ) (a function) > * > * If storage class is scTypedef, we need to add it to the symbol table. > */ >dcltor2![AST *base] /* pass in storage class for typedefs */ > : <> > WORD <data.t.sc&scTypedef ) > addsym(TypeName,$1.text,0,NULL,NULL); > >> > dcltor3 <<#0 = #2; $dcltor2 = $1;>> > | "\(" dcltor1[$base] "\)" <<$dcltor2 = $2;>> > dcltor3 <<#(bottom(#2), #4); #0=(#2==NULL)?#4:#2;>> > ; > >/* > * return #0 = [expr] or = [nodimension] > * or ( ) --> arg1 --> ... --> argn for a function > * > * multiple [1][2][3] yields > * > * #0 = [1] (an 1-element array of) > * | > * v > * [2] (2-element arrays of) > * | > * v > * [3] (3-element arrays) > * > */ >dcltor3!: "\[" expr1 "\]" dcltor3 <<#0 = #( #[ArrayQ,#2], #4 );>> > | "\[" "\]" dcltor3 <<#0 = #( #[ArrayQ,NULL], #3 );>> > | "\(" args "\)" <<#0 = #(NULL, #[FunctionQ], #2);>> > | <<#0 = NULL;>> > ; > >/* > * match a list of arguments. > * > * The arguments are siblings of the FunctionQ node in the type > * tree. e.g. > * > * [FunctionQ]-->[arg1]--> ... -->[argn] > * | | > * v v > * [type1] [type1] > */ >args! : <> > arg <> > ( "," arg <> > )* > { "," "..." <> > } > <<#0 = t;>> > | > ; > >arg! : typename <<#0 = #1;>> > | WORD <<#0 = #[SymQ,$1.text,NULL];>> > ; > >/* > * match a typename -- (used in type-casting and function prototypes). > * Type-trees look the same as those for decl. But, a symbol is > * optional here because they can be used in argument lists. > */ >typename!: <> > ( (typeq[&cv])+ > { type[&t] <> > | aggr[scNone,cv] <> > } > | type[&t] <> > | aggr[scNone,cvNone] <> > ) > tdecl[base] < #0=#(tr, #2);>> > ; > >/* A g g r e g a t e s */ > >/* > * match an enum definition; yield following tree: > * > * [BaseTypeQ] --> [elem1] --> ... --> [elemn] > */ >enum_def!: <> > "enum" WORD <> > enum_lst <<#0 = #(NULL, base, #3);>> > ; > >/* > * match a list of enumeration elements. > * > * The symbols are siblings of each other: > * > * [elem1] --> ... --> [elemn] > * > * If an element has an initialization, store a pointer to it in the > * AST node. > */ >enum_lst!: <> > "\{" > WORD > { "=" expr1 <> > } <> > ( "," > WORD > { <> > "=" expr1 <> > } <> > )* > "\}" > <<#0 = list;>> > | <<#0 = NULL;>> > ; > >/* > * Match a struct/union def. > * Return a tree like this: > * > * [BaseTypeQ]-->[fld1]--> ... -->[fldn] > * | | > * v v > * [type1] [type1] > * > * BUG: Allows two structs to have same name > */ >aggr![int sc, int cv] > : <> > ( "struct" <> > | "union" <> > ) <> > ( ( WORD <data.t.name = strdup($1.text);>> > | TypeName <data.t.name = strdup($1.text);>> > ) > ( ag[base] <<#0 = #(NULL, base, #1); > addsym(AggrTag, base->data.t.name, > 0, base, NULL); > >> > | <<#0 = base;>> > ) > | ag[base] <<#0 = #(NULL, base, #1);>> > ) > ; > >/* > * match a field list for a struct/union > * > * The fields are siblings of each other: > * > * [fld1] --> ... --> [fldn] > * | | > * v v > * [type1] [type1] > * > */ >ag![AST *base] > : <> > "\{" fdef[$base] <<#0=t=#2;>> > ( fdef[$base] <<#(NULL, t, #1); t = #1;>> > )* > "\}" > ; > >/* > * Match one field definition; make the following tree > * > * [FieldQ] > * | > * v > * [type1] > */ >fdef![AST *base] > : <> > ( type[&t] <> > | aggr[scNone,cvNone] <> > ) > field[$base] <> > ( "," field[$base]< f = #(NULL, f, g);>> > )* > ";" > <<#0 = f;>> > ; > >/* bitfields are recognized, but not handled 'cause not too many people > * use them > */ >field![AST *base] > : declarator[$base] { ":" expr1 } <<#0=#1; $field = $1;>> > | ":" expr1 > ; > >/* T y p e N a m e */ > >tdecl![AST *base] > : tdecl1 <<#(bottom(#1), $base); > #0 = (#1==NULL)?$base:#1; $tdecl=$1;>> > ; > >tdecl1! : <> > "\*" > { "const" <> > | "volatile" <> > } <> > tdecl1 <<#(bottom(#3), t); #0=(#3==NULL)?t:#3; > $tdecl1 = $3;>> > | tdecl2 <<#0 = #1; $tdecl1 = $1;>> > ; > >tdecl2! : <> > "\(" tdecl1 "\)" <<$tdecl2 = $2;>> > tdecl3 <<#(bottom(#2), #4); #0=(#2==NULL)?#4:#2;>> > | WORD tdecl3 <<$tdecl2 = $1; #0 = #2;>> > | tdecl3 <<#0 = #1;>> > ; > >tdecl3! : "\[" expr1 "\]" tdecl3<<#0 = #( #[ArrayQ,#2], #4 );>> > | "\[" "\]" tdecl3<<#0 = #( #[ArrayQ,NULL], #3 );>> > | "\(" args "\)" <<#0 = #( NULL, #[FunctionQ], #2 );>> > | > ; > > >/* I n i t e x p r e s s i o n s */ > > >initialize > : init2 > | expr0 > ; > >/* Build an initialization expression-tree of the form: > * > * Single-dimensioned array or structure: > * > * "{" > * | > * v > * [exp1] --> ... --> [expn] > * > * Nested structure or multi-dim array: > * > * "{" > * | > * v > * "{" --> ... --> "{" > * | ... > * v ... > * [exp1] --> ... --> [expn] > */ >init2 : "\{"^ init3 ( ","! init3 )* {","!} "\}"! > ; > >init3 : init2 > | expr1 > ; EOF_decl.g cat << \EOF_func.g | sed 's/^>//' > func.g >/* F u n c t i o n s */ > >/* recognize a function definition (after the "func_name(...)"). > * If old-style arguments are found, a pointer to the list of > * arguments is passed in. If there are no parameters or if new-style > * definitions are used, $args is NULL. > */ >func_def[AST *args] > : < save = zzs_scope(NULL); zzs_scope(&Params);>> > ( decl[PARAMETER] > <> > )* > < error("we've already got the parameters, thanks"); > else checkArgs( args ); > zzs_scope( save ); > >> > block > ; > >block : < static int level=LOCAL-1;>> > <> > > "\{"^ > ( decl[level] )* > ( stat[level] )* > "\}"! > > <<--level; > p = zzs_rmscope(&locals); /* unlink from sym table */ > pScope(p, "parameters\n"); > zzs_scope(saveScope); /* return to old scope */ > >> > ; > >/* > * match a statement and yield an expr-tree > * > * Label ":" > * | > * v > * WORD > * > * expr expr > * > * block block > * > * if "if" > * | > * v > * expr --> stat --> stat (2nd stat only if else-clause) > * > * while "while" > * | > * v > * expr --> stat > * > * do "do" > * | > * v > * stat --> expr > * > * for "for" > * | > * v > * expr --> expr --> expr -> stat > * > * switch "switch" > * | > * v > * expr --> stat > * > * case "case" > * | > * v > * expr > * > * continue "continue" > * > * break "break" > * > * return "return" > * | > * v > * expr > * > * goto "goto" > * | > * v > * WORD > * > * ";" > */ >stat[int level] > : <> > WORD ":"^ > <