lexex.dvi - PDF 無料ダウンロード

(2018, c ) http://istksckwanseiacjp/ ishiura/cpl/ 4 41 1 mini-c lexc,, 2 testlexc, lexc mini-c 1 ( ) mini-c ( ) (int, char, if, else, while, return 6 ) ( ) (+, -, *, /, %, &, =, ==,!=, >, >=, <, <= 13 ) (,, ;, (, ), {, }, [, ] 8 ) 42 1 (http://istksckwanseiacjp/~ishiura/cpl/) 2 lexh testlexc testlextxt testlex outtxt ( ) lexc * lexc UTF-8 lexc, 3 "-c" "-o" gcc -c lexc gcc -c testlexc gcc -o testlex testlexo lexo, testlex (Unix ) testlexexe (Cygwin) (, ) 4 1

Segmentation fault ( ), 1, int a[10], a[-100] a[9200], i, a[i], 2, p *p, \0, 43 testlexc 1 testlexc lexc, lexc,, int main() { } int year; year = year + 1900; return 0; testmc /testlex testmc, type = KW INT token = "int" token = "main" type = LPAR token = "(" type = RPAR token = ")" type = LBRACE token = "{" type = KW INT token = "int" token = "year" type = SEMICOLON token = ";" token = "year" type = ASSIGN token = "=" token = "year" type = PLUS token = "+" type = INT token = "1900" val = 1900 type = SEMICOLON token = ";" type = KW RETURN token = "return" type = INT token = "0" val = 0 type = SEMICOLON token = ";" type = RBRACE token = "}" 4 2

, 2 testlexc lexc [testlexc] 1: #include <stdioh> 2: #include <stdlibh> 3: #include <stringh> 4: #include "lexh" 5: 6: int main(int argc, char **argv) 7: { 8: char filename[filename MAX]; 9: lex t *x; 10: 11: if (argc<2) { 12: fprintf(stderr, "SYNTAX: %s filename\n", argv[0]); 13: exit(exit FAILURE); 14: } 15: strcpy(filename,argv[1]); 16: 17: x = lex new(filename); 18: lex get(x); 19: while(x->type!= token EOF) { 20: printf("type = %-10s", lex typename(x->type)); 21: printf(" token = \"%s\"", x->token); 22: if (x->type==token INT x->type==token CHAR) { 23: printf(" val = %d", x->val); 24: } 25: printf("\n"); 26: lex get(x); 27: } 28: lex delete(x); 29: 30: return 0; 31: } 4 lexh lexc include, lexc 6 main 2, Unix Cygwin /testlex (argc) (argv), i argv[i] (argv[i-1! argv[0] ) 8 FILENAME MAX, ( char ), stdioh 9 lex t, x 4 3

11 15 0 (argc 1 ) argv[1] /testlex testmc "testmc" filename 17, filename lex new lex new,,, 18 27 while, lex get, ( ) x->type EOF (end of file) lex typename, x->token ( ) x->val, 28 lex delete,, x 44 441, L1 L8, 1 2 3 4 ( ),, 1, ( ) lexc, ( ) lexh 442 L1 lex err() lex err(lex *x, char *message),,,, message lexc /* [ L1] lex err() */,, 1 4 4 4

1: assert(x!=null); 2: assert(message!=null); 3: fprintf(stderr, "%s:%d: %s\n", x->filename, x->linenum, message); 4: exit(exit FAILURE); 1 2, x message NULL, (?), 3 x,, x->filename, x->linenum 4 exit EXIT FAILUE (gcc 1 ),, EXIT SUCCESS (gcc 0) (main return 0; ) 2 gcc -c lexc L2 lex new lex new(lex *filename),,, filename lexc, lex new,, /* [ L2] lex new() */, 1, lex t malloc() 1: lex t* x = (lex t*) malloc(sizeof(lex t)); 2: assert(x!=null); malloc NULL, assert malloc 2, lex t lex t lexh typedef struct { char *filename; FILE *fp; int c; int linenum; lex type t type; char token[lex TOKEN MAXLEN+1]; int token len; int val; lex trace t trace; } lex t; 4 5

filename lex new filename fp c 1 linenum 1 type, token UNDEF lex type t (enum ), lexh token ( ) "" lex TOKEN MAXLEN, lexh int token len 0 val 0 trace ( ) lex 3 (lexh ), lex TRACE NO lex TRACE NO lex TRACE BY CHAR lex TRACE BY TOKEN, 1 (??? ) 1: assert(filename!=null); 2: x->filename = (char*) malloc(sizeof(char)*(strlen(filename)+1)); 3: assert(x->filename!=null); 4: strcpy(x->filename,filename); 5: if ((x->fp = fopen(x->filename,"r")) == NULL) { 6: fprintf(stderr, "lex: cannot open file %s\n", x->filename); 7: exit(exit FAILURE); 8: } 9: x->c =???; 10: x->linenum =???; 11: x->type =???; 12: token reset(x); 13: x->val =???; 14: x->trace = lex TRACE NO; 2 : filename 1 ( \0 ) 5 8 : fopen, x->fp 12 : token token len, token reset ( ) 3 lex typename L5 lex typename, define lex typename 1: define lex typename(); 4 6

4, return x; 5 L3 lex delete lex delete(lex *x), lex x, 1 1: assert(x!=null); 2: fclose(x->fp); 3: free(x->filename); 4: x->filename = NULL; 5: free(x); 6: x = NULL; 2 : 3 6 : malloc x->filename x free, NULL, x->token, x malloc, x free 2 L4 lex trace set lex trace set(lex *x, lex trace t trace), x->trace, 1 1: assert(x!=null); 2: assert(trace==lex TRACE NO 3: trace==lex TRACE BY CHAR 4: trace==lex TRACE BY TOKEN); 5: x->trace = trace; 2 L5 lex typename lex typename(lex type type), ( ),, else, token KW ELSE, 6, "KW ELSE" lex typename 1, define lex typename lex typename define lex typename, (+, -, *, /, %, &), [ L5], 4 7

2 lex typename 3 1: assert(token BEGIN <=type && type<token END ); 2: assert(lex typename [type][0]!=0); 3: return lex typename [type]; 1 : type token BEGIN, token END 1, lexh 2 : lex typename 3 : lex typename 3 L6 c get() c get(lex *x), x->fp 1, x->c 1 1, 1 fgetc(file*) 1: assert(x!=null); 2: x->c = fgetc(x->fp); 2, 3: if (x->c== \n ) { x->linenum++; } 3 lex TRACE BY CHAR, 1 4: if (x->trace==lex TRACE BY CHAR) { 5: if (x->c==eof) { 6: fprintf(stderr, " EOF\n"); 7: } 8: else { 9: fprintf(stderr, "%c", (char)(x->c)); 10: } 11: } 4 L7 token reset token reset(lex *x), x->token ( 0 ) 1 x->token len 0, x->token "" x->token = ""; 1, 1, x->token[0] = \0 ; strcpy(x->token, ""); 4 8

2 L8 token c append token c append(lex *x), x->token x->c, lex TOKEN MAXLEN, 1, x->token ( \0 ) x->c x->token len 1, ( ) \0 assert(x!=null); if (x->token len < lex TOKEN MAXLEN - 1) { x->token[x->token len] =???; x->token len++; x->token[x->token len] = \0 ; } 2 443, lex get lex get(lex *x) 1, x->token, ( ) x->type, x->val lex get 1: void lex get(lex t *x) 2: { 3: assert(x!=null); 4: token reset(x); 5: x->type = token UNDEF; 6: x->val = 0; 7: 8: lex skip space(x); 9: 10: if (isalpha(x->c)) { lex get kw id(x); } 11: else if (isdigit(x->c)) { lex get int(x); } 12: else if (x->c== \ ) { lex get char(x); } 13: else { lex get others(x); } 14: 15: if (x->trace==lex TRACE BY TOKEN) { 16: fprintf(stderr, "(linenum(%d) type(%s) token(\"%s\"))\n", 17: x->linenum, lex typename(x->type), x->token); 18: } 19: } 4 6 :, 0 4 9

8 : 10 13 : x->c, ID (lex get kw id ), (lex get int ) ( ), (lex get char ), lex get others 15 18 : lex TRACE BY TOKEN,, L9 (lex skip space ) 1,, 2 lex skip space x->c c get(x) while (x->c ) { }, isspace(char) isspace, 0 0 isspace, lexc <ctypeh> include 3 lexc testlexc gcc -c lexc gcc -c testlexc gcc -o testlex testlexo lexo testlexc 2, gcc -o testlex testlexo lexc testlex testlextxt /testlex testlextxt, testlextxt:4: invalid token ( ), testlextxt 4, testlextxt, 3,, 4 a ( ) 4 10

1: 2: ( 3 ) 3: ( 3 ) 4: a b c abc ABC main x123 p000ax3 5: char else if int return while 6: program u p0x A0x 774 7: 1 2 12 123 0123456 00123456789 8: a b 0 \t \n \ \\ 9: ;, () {} [] 10: -+*/%&= ==!= > >= < <= L10 (lex get kw id ) 1,, x->token x->token 2 lex get kw id, x->c x->token x->token "", token c append(x) x->c 1 (, 1 ), x->token while(x->c ) { } isalpha, isdigit, (x->token) x->type (token ID) x->type if (strcmp(x->token,"char" )==0) { x->type = token KW CHAR; } else if (strcmp(x->token,"else" )==0) { x->type = token KW ELSE; } else if (strcmp(x->token,"if" )==0) { x->type = token KW IF; } else { x->type = token ID; } /* */ 3 testlextxt OK 4 11

type = KW CHAR type = KW ELSE type = KW IF type = KW INT token = "a" token = "b" token = "c" token = "abc" token = "ABC" token = "main" token = "x123" token = "p000ax3" token = "char" token = "else" token = "if" token = "int" type = KW RETURN token = "return" type = KW WHILE token = "while" testlextxt:6: invalid character, testlextxt 5, 6 (6 ) char, else, if, int, return, while (, token = "char" type = KW CHAR ),, L11 ( ) (lex get kw id ) 1 isalpha(c), c isalpha (c) 2 /* */, isalpha (c) #define isalpha (c) (isalpha(c) (c)== ) isalpha 3, 6 token = " program" token = "u p0x A0x 774" testlextxt:7: invalid token L12 (lex get int ) 1, (token INT),, x->val, "123" ( ), x->val = 123 ( ) 4 12

2 lex get int, (x->type) x->type = token INT; x->token, 1 x->val x->c - 0 x->val = x->c - 0 ;, x->token x->val 10 (??? ) while(??? ) { x->val =???; } 3, 7 type = INT token = "1" val = 1 type = INT token = "2" val = 2 type = INT token = "12" val = 12 type = INT token = "123" val = 123 type = INT token = "0123456" val = 123456 type = INT token = "00123456789" val = 123456789 testlextxt:8: invalid token, val L13 (lex get char ) 1 ( ), (token CHAR) x->val, (\) ( \n ) 2 lex get char, 1 ( ) 4 13

x->type = token CHAR; x->val ( a ) 1, backslash (\), 2 if (x->c== \\ ) { if (x->c== n ) { x->val = \n ; } else if (x->c== \ ) { x->val = \ ; } \t \\ else { lex err(x, "invalid character"); } } else { x->val = x->c; },, abc, if (x->c!= \ ) { lex err(x, "character too long"); } 3, 8 val type = CHAR token = " 0 " val = 48 type = CHAR token = " \t " val = 9 type = CHAR token = " \n " val = 10 type = CHAR token = " \ " val = 39 type = CHAR token = " \\ " val = 92 type = SEMICOLON token = ";" type = COMMA token = "," testlextxt:9: invalid character L14 (lex get others ) 1 1, 2 lex get others 4 14

(;), ((, )), ({, }), ([,]) if (x->c==, ) { x->type = token COMMA;} else if (x->c== ; ) { x->type = token SEMICOLON;} 3, 9 type = SEMICOLON token = ";" type = COMMA token = "," type = LPAR token = "(" type = RPAR token = ")" type = LBRACE token = "{" type = RBRACE token = "}" type = LBRACK token = "[" type = RBRACK token = "]" testlextxt:10: invalid character L15 (lex get others ) 1, 2 1 (== ) 2 lex get others, 1 -, *, /, %, & else if (x->c== + ) { x->type = token PLUS;} =, == = 2, 1 (<, <=, >, >=,!=) else if (x->c== = ) { if (x->c== = ) { x->type = token EQ; } else { x->type = token ASSIGN; } } lex get others 2, EOF (end of file), 4 15

else if (x->c==eof) { x->type = token EOF; } else { lex err(x, "invalid character"); } 3, type = MINUS token = "-" type = PLUS token = "+" type = STAR token = "*" type = SLASH token = "/" type = PERC token = "%" type = AMP token = "&" type = ASSIGN token = "=" type = EQ token = "==" type = NE token = "!=" type = GT token = ">" type = GE token = ">=" type = LT token = "<" type = LE token = "<=" >=!=, 2, tmptxt, /testlex testlextxt > tmptxt diff tmptxt testlex outtxt, Nagisa ISHIURA 4 16

lexh/lexc 1 lex TOKEN MAXLEN ( ) token BEGIN token END lex type t lex type t +1 2 lex type t token UNDEF token EOF (end of file) token ID (, ) token INT (123 ) token CHAR ( c, \n ) token KW CHAR char token KW ELSE else token KW IF if token KW INT int token KW RETURN return token KW WHILE while token PLUS + token MINUS - token STAR * token SLASH / token PERC % token AMP & token ASSIGN = token EQ == token NE!= token GT > token GE >= token LT < token LE <= token COMMA, token SEMICOLON ; token LPAREN ( token RPAREN ) token LBRACE { token RBRACE } token LBRACK [ token RBRACK ] lex trace t lex TRACE NO lex TRACE BY CHAR lex TRACE BY TOKEN lex t char* filename FILE *fp int c 1 int linenum lex type type char token[lex TOKEN MAXLEN] int token len int val lex trace t trace 4 17

3 lex t* lex new(char *filename) void lex get(lex *x) void lex trace set (lex *x, lex trace t trace) char* lex typename (lex type t type) (lex t ) filename, 1 x->token, x->type,, x->val ( ) ( ) void lex err(lex *x, char *msg) msg, void lex delete(lex *x) 4 lexc,,, lex new lex TYPENAME MAXLEN char lex typename [token MAX ][lex TYPENAME MAXLEN+1] isalpha (char c) void define lex typename() void c get(lex *x) void token reset(lex *x) void token c append(lex *x) c 0, 0 lex typanane, x->c x->token x->token x->c 4 18