3 lex yacc http://www.cs.info.mie-u.ac.jp/~toshi/lectures/compiler/ 2018 6 1
() () (parse tree) ( ((10 + 30) * 50) ) ( ( NUM 10 + NUM 30 ) * NUM 50 ) ( * ) ( + ) NUM 50 NUM NUM 10 30 (abstract syntax tree, AST) ( ((10 + 30) * 50) ) * + 50 10 30 (postfix notation) () e e (1) e e 1, e 2 op (e 1 op e 2 ) e e 1 e 2 op (2) e e e ( ((10 + 30) * 50) ) ((10 + 30) * 50) = (10 + 30) 50 * ((1) ) = 10 30 + 50 * ((1) ) = 10 30 + 50 * ((2) ) 2
() () () (intermediate representation, IR) (abstract syntax tree, AST) x = (10 + 30) * 50 = x * + 50 10 30 2 (postfix code) () x = (10 + 30) * 50 loadc 10 loadc 30 add loadc 50 mul store x 3 (3-address code) x = y op z () num = 62; ones = 0; while num > 0 do if num % 2 == 1 then ones = ones + 1; fi num = num / 2; done num = 62 ones = 0 L1: _t1 = num > 0 ifnot _t1 goto L2 _t2 = num % 2 _t3 = _t2 == 1 ifnot _t3 goto L3 _t4 = ones + 1 ones = _t4 L3: _t5 = num / 2 num = _t5 goto L1 L2: ( _ti) 3
() prog prog com ε com ; { /*(1)*/ print(.val); ( + ) { /*(2)*/.val = 1.val + 2.val; ( * ) { /*(3)*/.val = 1.val * 2.val; NUM { /*(4)*/.val = NUM.val; {. NUM.val NUM.val com.val.val 1, 2.val = 1.val + 2.val ( e 1 + e 2 ).val e 1 e 2 1.val 2.val NUM.val prog prog com.val =.val =.val =.val =.val = ε ( ( NUM + NUM.val =.val = ) *.val NUM = ) ; ( ( 10 + 30 ) * 50 ) ; { 4
prog com prog ε com ; { print(.val); ( op ) {.val = eval(1.val, op.type, 2.val); NUM {.val = NUM.val; op + { op.type = + ; * { op.type = * ; { eval(v1, ty, v2) v1, v2 ty prog ; ( + * ) NUM { i prog {1 ; ( + {2 * {3 ) {4 NUM {5 { /*1*/ print(.val); { /*2*/.op = + ; { /*3*/.op = * ; { /*4*/.val = eval(1.val,.op, 2.val); { /*5*/.val = NUM.val; () 2 A. V. M. S. R. J. D. 20095 19805.2 5
() lex yacc lex yacc /* calc.l -- */ %{ #include "calc.tab.h" void yyerror(char *); % // // %% // yylval [+*();] // { return *yytext; [0-9]+ { yylval = atoi(yytext); return NUM; [ \t\n]+ //. { yyerror("illegal character"); %% // void yyerror(char *msg) { printf("%s at %c \n", msg, *yytext); exit(1); /* calc.y -- */ %{ // extern int yylex(void); extern void yyerror(char *); #define YYSVAL int #include <stdio.h> % // %token NUM %% // () // () // // printf() // // prog : prog com /* */ ; com : ; { printf("%d\n", $1); ; // $n n : ( + ) { $$ = $2 + $4; ( * ) { $$ = $2 * $4; NUM { $$ = $1; ; // $$ %% int main(void) { return yyparse(); $ flex calc.l; bison -d calc.y; gcc lex.yy.c calc.tab.c -lfl # -d flex $ echo 0; ((10 + 30) * 50); a.out 0 2000 6
( calc.l ) /* calc.h -- */ enum { END = 0, NUM = 128 ; int yylval; /* calc.c -- */ #include "calc.tab.h" extern int yylex(void); extern void yyerror(char *); #include <stdio.h> int token; int lexval; // // () // () // printf() // // // void error(void) { yyerror("syntax error"); void read_token(void) { token = yylex(); lexval = yylval; void match_token(int tok) { if (token == tok) read_token(); else error(); int parse_(void); // // // void parse_prog(void) { /* prog -> ; */ while (token!= END) { int val = // parse_(); printf("%d\n", val); match_token( ; ); int parse_(void) { int val; // if (token == ( ) { /* -> ( + * ) */ match_token( ( ); int val1 = // parse_(); int op = token; if (token!= + && token!= * ) error(); read_token(); int val2 = // parse_(); match_token( ) ); val = (op == + )? val1+val2 : val1*val2; else { /* -> NUM */ val = lexval; // NUM match_token(num); return val; int main(void) { read_token(); parse_prog(); return 0; $ cp calc.h calc.tab.h; flex calc.l; gcc lex.yy.c calc.c -lfl $ echo 0; ((10 + 30) * 50); a.out 0 2000 7
() ( op ) { /*(1)*/ gen(op.s); NUM { /*(2)*/ gen(num.s); op + { /*(3)*/ op.s = "+"; * { /*(4)*/ op.s = "*"; op.s, NUM.s gen() () op.s = op.s = ( ( NUM.s = + NUM.s = ) * NUM.s = ) ( ( 10 + 30 ) * 50 ) ( op ) {.tr = make_op(op.ty, 1.tr, 2.tr); NUM {.tr = make_num(num.val); op + { op.ty = + ; * { op.ty = * ;.tr op.ty NUM.val () make_op(), make_() 8
3 stm ID { stm.var = use_var(id.val); = ; { gen_asmt(stm.var,.res); + {.res = gen_opr( +, 1.res, 2.res); * {.res = gen_opr( *, 1.res, 2.res); ( ) {.res = 1.res; ID {.res = use_var(id.val); NUM {.res = use_con(num.val); (: stm ) * + * + () stm.var.res ID.val, NUM.val gen_asmt(), gen_opr() use_var(), use_con() 3 if then 1 else 2 fi while do done L1 : L2 : ( tmp) ifnot tmp goto L1 1 goto L2 2 L1 : L2 : ( tmp) ifnot tmp goto L2 goto L1 seq seq stm ε stm { stm.lab1 = new_lab(); stm.lab2 = new_lab(); if { gen_ifnot_goto(.res, stm.lab1); then seq { gen_goto(stm.lab2); gen_lab(stm.lab1); else seq fi { gen_lab(stm.lab2); { stm.lab1 = new_lab(); stm.lab2 = new_lab(); gen_label(stm.lab1); while { gen_ifnot_goto(.res, stm.lab2); do seq done { gen_goto(stm.lab1); gen_lab(stm.lab2); stm.lab1, stm.lab2.res new_lab() gen_ifnot_goto(), gen_goto() gen_label() 9
() 1 : - NUM {.val = 1.val - NUM.val; 2 : NUM {.val = NUM.val; 9-3 - 2.val = 4.val = 6.val = 9 NUM.val = 9 - NUM.val = 3 - NUM.val = 2 1 : NUM 2 : - NUM 3 : ε NUM.val = 9 - NUM.val = 3 - NUM.val = 2 ε 10
() 1 : NUM 2 : - NUM 3 : ε 2 i (input) o (output).o =.i =.o =.i =.o =.i =.o = NUM.o = 9 - NUM.o = 3 - NUM.o = 2 ε 1 : NUM {1a {1b 2 : - {2a NUM {2b 3 : ε {3 { /*1a*/.i = NUM.o; { /*1b*/.o =.o; { /*2a*/ 1.i =.i - NUM.o; { /*2b*/.o = 1.o; { /*3*/.o =.i; NUM {1 - NUM {2 { /*1*/.val = NUM.val; { /*2*/.val =.val - NUM.val; val 11
() () ( ). ( ) SP BP BP PC () 2 A. V. M. S. R. J. D. 20097 19806 12
() 2 C 2 2 void binary(int n) { int b = n%2; if (n >= 2) binary(n/2); putchar( 0 + b); int main(void) { binary(6); return 0; 2 binary() 2 putchar() main(), binary(), putchar() (call tree) main() binary(6) binary(3) putchar( 0 ) binary(1) putchar( 1 ) putchar( 1 ) 1 13
() gcc $ gcc -S ones.c -o ones.s # -S $ gcc -S -O3 ones.c -o ones-opt.s # -O3 $ wc -l ones.s; wc -l ones-opt.s # -l 62 ones.s 38 ones-opt.s $ cat ones.c ones.s ones-opt.s # (x86-64) int num_ones(int num) { num_ones: num_ones: int ones = 0; pushq %rbp xorl %eax, %eax while (num > 0) { movq %rsp, %rbp testl %edi, %edi if (num % 2 == 1) movl %edi, -20(%rbp) jle.l3 ones++; movl $0, -4(%rbp).L8: num /= 2; jmp.l2 leal 1(%rax), %edx.l4: testb $1, %dil return ones; movl -20(%rbp), %eax cmovne %edx, %eax movl %eax, %edx sarl %edi sarl $31, %edx jne.l8 int main(void) { shrl $31, %edx.l3: num_ones(62); addl %edx, %eax rep return 0; andl $1, %eax ret subl %edx, %eax main: cmpl $1, %eax xorl %eax, %eax jne.l3 ret addl $1, -4(%rbp).L3: movl -20(%rbp), %eax movl %eax, %edx shrl $31, %edx leal (%rdx,%rax), %eax sarl %eax movl %eax, -20(%rbp).L2: cmpl $0, -20(%rbp) jg.l4 movl -4(%rbp), %eax leave ret main: pushq %rbp movq %rsp, %rbp movl $62, %edi call num_ones movl $0, %eax leave ret gcc man gcc 14
() num = 62; ones = 0; while num > 0 do if num % 2 == 1 then ones = ones + 1; fi num = num / 2; done 2 num = 62 ones = 0 L1: _t1 = num > 0 ifnot _t1 goto L2 _t2 = num % 2 _t3 = _t2 == 1 ifnot _t3 goto L3 _t4 = ones + 1 ones = _t4 L3: _t5 = num / 2 num = _t5 goto L1 L2: num = 62 ones = 0 L1: _t1 = num > 0 ifnot _t1 goto L2 _t2 = num % 2 _t3 = _t2 == 1 ifnot _t3 goto L3 _t4 = ones + 1 ones = _t4 ( 1) ( 2) L3: _t5 = num / 2 num = _t5 goto L1 L2: 15
() () low = 0; high = 10; mid = (low + high) / 2; low = 0; high = 10; mid = ( + ) / 2; if (size > 10 * 5)... if (size > )... (1 + n) - 1 n * 8 x & x *&p s1 = size * n;.. s2 = size * n; s2 = dst = src dst src dst src old = n;... new = old + 1; new = old old = n; 16
DAG DAG (directed acyclic graph) 1: j = i + 1 2: i = k 3: x = i + 1 4: y = k + 1 DAG DAG (1) v v 0 (2) i () n n i v = a op b n op n a, b () n i v = a n a () DAG 1: j = i + 1 2: i = k 3: x = i + 1 4: y = k + 1 j = i + 1 i = k x = y = 2 A. V. M. S. R. J. D. 20098.5 9 19809 17
() do { c = a + b;.. while (... ) do { while (... ) x = 0 for (i = 1;... ; i++) { x = 5*i; x = 0 for (i = 1;... ; i++) { x for ( ;... ; ) { for (i = 3; i > 0; i--) val *= 5; val *= 5; for (... ; i++) { for (... ; i += 2) { 18
int power(int m, int n) { int i, val = 1; for (i = n; i > 0; i--) val *= m; return val;. y = power(x, 3);. int i, val = 1; for (i = n; i > 0; i--) val *= m; goto int pwr(int m, int n, int val) { if (n > 0) return pwr(m, n-1, val*m); else return val; int pwr(int m, int n, int val) { if (n > 0) { else { return val; 2 A. V. M. S. R. J. D. 20098.5 9 19809 19
() 3 n y m = 3 val = 1 i = n L1: ifnot i > 0 goto L2 i = i - 1 goto L1 L2: y = val i In i, Out i 8 i Gen(i) Kill(i) In 1 = Out 1 = (In 1 \ ) In 2 = Out 2 = (In 2 \ ) In 3 = Out 3 = (In 3 \ ) In 4 = Out 4 = (In 4 \ ) 20
In 1 = Out 1 = {1m, 1v, 1i (In 1 \ {3v, 3i) In 2 = Out 1 Out 3 Out 2 = In 2 In 3 = Out 2 Out 3 = {3v, 3i (In 3 \ {1v, 1i) In 4 = Out 2 Out 4 = {4y In 4 () +1m +1v 3v +1i 3i +3v 1v +3i 1i +4y 0 1 2 3 In 1 Out 1 In 2 Out 2 In 3 Out 3 In 4 Out 4 1v, 3v In 4 1 3 val 4 ( val ) In 3 m 1m 1 m 3 ( m ) 2 A. V. M. S. R. J. D. 20099.2 9.3 19809.5 21
() 2 op reg loc op +, -, *, / reg loc reg reg op loc LD reg loc reg loc reg loc e numreg(e) e numreg(e) = 0 e e 1 op e 2 numreg(e 1 ) = n 1, numreg(e 2 ) = n 2 numreg(e) n 2 = 0 > 0 n 1 op : op : = 0 1 n 2 max(2, n 2 ) > 0 n 2 + 1 max(n 1, n 2 ) n 1 n 2 = n 1 n 1 22
numreg n 2 = 0 > 0 n 1 op : op : = 0 1 n 2 max(2, n 2 ) > 0 n 2 + 1 max(n 1, n 2 ) n 1 n 2 = n 1 n 1 1 * x + w * v 5 2 / y - x / w * v 5 LD R1 v * R1 5 + R1 w * R1 x LD R1 v * R1 5 LD R2 w / R2 R1 LD R1 x - R1 R2 LD R2 y / R2 R1 3 + + / v 5 w 3 4 + + / x y w * v 5 LD R1 v * R1 5 LD R2 w / R2 3 + R1 R2 LD R1 v * R1 5 LD R2 w / R2 R1 LD R1 x * R1 y + R1 R2 23
() diff = m - n sq = n * n val = m * sq () diff = m - n sq = n * n val = m * sq m n d s v 2 (2 ) m v n s d k k ( k ) k () k 24
3 3 () v m n n 2 v m m 1 v d 3 v s 2 v v 1 () s d s d s d s () 19808 2 A. V. M. S. R. J. D. 20098 25