I am very new to Bison/Flex. I created a program to read the c file and identify the function defined. It worked. I was wondering, how can I use some predefined lex file from another open source and use its token into my lex file and produce the output?
To put in simpler way, Is it possible to combine two or more lex files and given as an input ( from where Bison (.y) file reads the tokens) ?
Please suggest me. Thanks
to make it clear, here is sample,
c.l (source --> http://www.lysator.liu.se/c/ANSI-C-grammar-l.html)
D [0-9]
L [a-zA-Z_]
H [a-fA-F0-9]
E [Ee][+-]?{D}+
FS (f|F|l|L)
IS (u|U|l|L)*
%{
/* this scanner sourced from: http://www.lysator.liu.se/c/ANSI-C-grammar-l.html */
void count();
#include <stdio.h>
#include <string.h>
#define YYSTYPE void *
%}
%%
"/*" { comment(); }
"auto" { count(); return(AUTO); }
"break" { count(); return(BREAK); }
"case" { count(); return(CASE); }
"char" { count(); return(CHAR); }
"const" { count(); return(CONST); }
"continue" { count(); return(CONTINUE); }
"default" { count(); return(DEFAULT); }
"do" { count(); return(DO); }
"double" { count(); return(DOUBLE); }
"else" { count(); return(ELSE); }
"enum" { count(); return(ENUM); }
"extern" { count(); return(EXTERN); }
"float" { count(); return(FLOAT); }
"for" { count(); return(FOR); }
"goto" { count(); return(GOTO); }
"if" { count(); return(IF); }
"int" { count(); return(INT); }
"long" { count(); return(LONG); }
"register" { count(); return(REGISTER); }
"return" { count(); return(RETURN); }
"short" { count(); return(SHORT); }
"signed" { count(); return(SIGNED); }
"sizeof" { count(); return(SIZEOF); }
"static" { count(); return(STATIC); }
"struct" { count(); return(STRUCT); }
"switch" { count(); return(SWITCH); }
"typedef" { count(); return(TYPEDEF); }
"union" { count(); return(UNION); }
"unsigned" { count(); return(UNSIGNED); }
"void" { count(); return(VOID); }
"volatile" { count(); return(VOLATILE); }
"while" { count(); return(WHILE); }
{L}({L}|{D})* { count(); return(check_type()); }
0[xX]{H}+{IS}? { count(); return(CONSTANT); }
0{D}+{IS}? { count(); return(CONSTANT); }
{D}+{IS}? { count(); return(CONSTANT); }
L?'(\\.|[^\\'])+' { count(); return(CONSTANT); }
{D}+{E}{FS}? { count(); return(CONSTANT); }
{D}*"."{D}+({E})?{FS}? { count(); return(CONSTANT); }
{D}+"."{D}*({E})?{FS}? { count(); return(CONSTANT); }
L?\"(\\.|[^\\"])*\" { count(); return(STRING_LITERAL); }
"..." { count(); return(ELLIPSIS); }
">>=" { count(); return(RIGHT_ASSIGN); }
"<<=" { count(); return(LEFT_ASSIGN); }
"+=" { count(); return(ADD_ASSIGN); }
"-=" { count(); return(SUB_ASSIGN); }
"*=" { count(); return(MUL_ASSIGN); }
"/=" { count(); return(DIV_ASSIGN); }
"%=" { count(); return(MOD_ASSIGN); }
"&=" { count(); return(AND_ASSIGN); }
"^=" { count(); return(XOR_ASSIGN); }
"|=" { count(); return(OR_ASSIGN); }
">>" { count(); return(RIGHT_OP); }
"<<" { count(); return(LEFT_OP); }
"++" { count(); return(INC_OP); }
"--" { count(); return(DEC_OP); }
"->" { count(); return(PTR_OP); }
"&&" { count(); return(BOOL_AND_OP); }
"||" { count(); return(BOOL_OR_OP); }
"<=" { count(); return(LE_OP); }
">=" { count(); return(GE_OP); }
"==" { count(); return(EQ_OP); }
"!=" { count(); return(NE_OP); }
";" { count(); return(SEMICOLON); }
("{"|"<%") { count(); return(OCB); }
("}"|"%>") { count(); return(CCB); }
"," { count(); return(COMMA); }
":" { count(); return(COLON); }
"=" { count(); return(EQU); }
"(" { count(); return(OP); }
")" { count(); return(CP); }
("["|"<:") { count(); return(LBRACKET); }
("]"|":>") { count(); return(RBRACKET); }
"." { count(); return(PERIOD); }
"&" { count(); return(AND_OP); }
"!" { count(); return(BANG); }
"~" { count(); return(TILDE); }
"-" { count(); return(MINUS); }
"+" { count(); return(ADD); }
"*" { count(); return(STAR); }
"/" { count(); return(SLASH); }
"%" { count(); return(PERCENT); }
"<" { count(); return(LT_OP); }
">" { count(); return(GT_OP); }
"^" { count(); return(CIRCUMFLEX); }
"|" { count(); return(OR_OP); }
"?" { count(); return(QUESTIONMARK); }
[ \t\v\n\f] { count(); }
. { /* ignore bad characters */ }
%%
comment()
{
char c, c1;
loop:
while ((c = input()) != '*' && c != 0)
/*putchar(c)*/;
if ((c1 = input()) != '/' && c != 0)
{
unput(c1);
goto loop;
}
if (c != 0)
/*putchar(c1)*/;
}
int column = 0;
void count()
{
int i;
for (i = 0; yytext[i] != '\0'; i++)
if (yytext[i] == '\n')
column = 0;
else if (yytext[i] == '\t')
column += 8 - (column % 8);
else
column++;
/*ECHO*/;
}
int check_type()
{
/*
* pseudo code --- this is what it should check
*
* if (yytext == type_name)
* return(TYPE_NAME);
*
* return(IDENTIFIER);
*/
/*
* it actually will only return IDENTIFIER
*/
return(IDENTIFIER);
}
I want to combine it/ use it with below file, i.e.,
lexer.l
%{
#include "c.l"
#include <stdio.h>
#include "parser_test.tab.h"
%}
%%
"{" { yylval.str = strdup(yytext); return OCB; }
"}" { yylval.str = strdup(yytext); return CCB; }
/* MANY MORE TOKENS TO ADD */
%%
Finally the only bison file (.y) which uses tokens from these two,
parser_test.y
%{
#include <stdio.h>
#include "lex.yy.c"
int yyerror ();
int yylineno;
char* a;
char* b;
%}
%union {
char *str;
}
%define parse.error verbose
%type <str> INT CONST CONSTANT RETURN IDENTIFIER ADD EQU PTR SEMICOLON OP CP OCB CCB COMMA
%token INT CONST CONSTANT RETURN IDENTIFIER ADD EQU PTR SEMICOLON NUMBER OP CP OCB CCB COMMA
%start Program
%%
Program: outStatements
functions
;
functions: function
| functions function
;
function: INT IDENTIFIER OP parametersList CP OCB statementList CCB { a=$2; printf("\nFunction Defined : %s\n", $2); }
;
parametersList: /*empty*/
| parameters
;
parameters: parameter
| parameters COMMA parameter
;
parameter: INT IDENTIFIER { printf("\nPARAMETER NAME: %s\nPARAMETER INT :%s\n", $2, $1); }
;
statementList: /*empty*/
| statements
;
statements: statement
| statements statement
;
statement: RETURN IDENTIFIER ADD IDENTIFIER SEMICOLON
| INT IDENTIFIER COMMA IDENTIFIER EQU CONSTANT COMMA IDENTIFIER EQU CONSTANT SEMICOLON
| IDENTIFIER EQU IDENTIFIER OP IDENTIFIER COMMA IDENTIFIER CP SEMICOLON { b = $3; }
;
outStatements: outStatement
| outStatements outStatement
;
outStatement: INT PTR IDENTIFIER SEMICOLON
| CONST INT IDENTIFIER EQU CONSTANT SEMICOLON
;
%%
int main (int argc, char * argv[])
{
yyin = fopen(argv[1], "r");
int err_code = yyparse();
if (err_code == 0)
{
printf("\nFunction called : '%s' from '%s'\n", b, a);
printf("\nParsing Done !!!\n");
}
else
{
printf("\nUNSUCCESSFUL ....\n");
}
fclose(yyin);
return 0;
}
int yyerror (char* s)
{
fprintf(stderr, "\nError on Line: %d :: %s\n" , yylineno, s);
}
int yywrap()
{
return 1;
}
How can I achieve ?