0
votes

I am very new to Bison/Flex. I created a program to read the c file and identify the function defined. It worked. I was wondering, how can I use some predefined lex file from another open source and use its token into my lex file and produce the output?

To put in simpler way, Is it possible to combine two or more lex files and given as an input ( from where Bison (.y) file reads the tokens) ?

Please suggest me. Thanks

to make it clear, here is sample,

c.l (source --> http://www.lysator.liu.se/c/ANSI-C-grammar-l.html)

D           [0-9]
L           [a-zA-Z_]
H           [a-fA-F0-9]
E           [Ee][+-]?{D}+
FS          (f|F|l|L)
IS          (u|U|l|L)*


%{

/* this scanner sourced from: http://www.lysator.liu.se/c/ANSI-C-grammar-l.html */

void count();
#include <stdio.h>
#include <string.h>
#define YYSTYPE void *

%}


%%
"/*"            { comment(); }

"auto"          { count(); return(AUTO); }
"break"         { count(); return(BREAK); }
"case"          { count(); return(CASE); }
"char"          { count(); return(CHAR); }
"const"         { count(); return(CONST); }
"continue"      { count(); return(CONTINUE); }
"default"       { count(); return(DEFAULT); }
"do"            { count(); return(DO); }
"double"        { count(); return(DOUBLE); }
"else"          { count(); return(ELSE); }
"enum"          { count(); return(ENUM); }
"extern"        { count(); return(EXTERN); }
"float"         { count(); return(FLOAT); }
"for"           { count(); return(FOR); }
"goto"          { count(); return(GOTO); }
"if"            { count(); return(IF); }
"int"           { count(); return(INT); }
"long"          { count(); return(LONG); }
"register"      { count(); return(REGISTER); }
"return"        { count(); return(RETURN); }
"short"         { count(); return(SHORT); }
"signed"        { count(); return(SIGNED); }
"sizeof"        { count(); return(SIZEOF); }
"static"        { count(); return(STATIC); }
"struct"        { count(); return(STRUCT); }
"switch"        { count(); return(SWITCH); }
"typedef"       { count(); return(TYPEDEF); }
"union"         { count(); return(UNION); }
"unsigned"      { count(); return(UNSIGNED); }
"void"          { count(); return(VOID); }
"volatile"      { count(); return(VOLATILE); }
"while"         { count(); return(WHILE); }

{L}({L}|{D})*       { count(); return(check_type()); }

0[xX]{H}+{IS}?      { count(); return(CONSTANT); }
0{D}+{IS}?      { count(); return(CONSTANT); }
{D}+{IS}?       { count(); return(CONSTANT); }
L?'(\\.|[^\\'])+'   { count(); return(CONSTANT); }

{D}+{E}{FS}?        { count(); return(CONSTANT); }
{D}*"."{D}+({E})?{FS}?  { count(); return(CONSTANT); }
{D}+"."{D}*({E})?{FS}?  { count(); return(CONSTANT); }

L?\"(\\.|[^\\"])*\" { count(); return(STRING_LITERAL); }

"..."           { count(); return(ELLIPSIS); }
">>="           { count(); return(RIGHT_ASSIGN); }
"<<="           { count(); return(LEFT_ASSIGN); }
"+="            { count(); return(ADD_ASSIGN); }
"-="            { count(); return(SUB_ASSIGN); }
"*="            { count(); return(MUL_ASSIGN); }
"/="            { count(); return(DIV_ASSIGN); }
"%="            { count(); return(MOD_ASSIGN); }
"&="            { count(); return(AND_ASSIGN); }
"^="            { count(); return(XOR_ASSIGN); }
"|="            { count(); return(OR_ASSIGN); }
">>"            { count(); return(RIGHT_OP); }
"<<"            { count(); return(LEFT_OP); }
"++"            { count(); return(INC_OP); }
"--"            { count(); return(DEC_OP); }
"->"            { count(); return(PTR_OP); }
"&&"            { count(); return(BOOL_AND_OP); }
"||"            { count(); return(BOOL_OR_OP); }
"<="            { count(); return(LE_OP); }
">="            { count(); return(GE_OP); }
"=="            { count(); return(EQ_OP); }
"!="            { count(); return(NE_OP); }
";"         { count(); return(SEMICOLON); }
("{"|"<%")      { count(); return(OCB); }
("}"|"%>")      { count(); return(CCB); }
","         { count(); return(COMMA); }
":"         { count(); return(COLON); }
"="         { count(); return(EQU); }
"("         { count(); return(OP); }
")"         { count(); return(CP); }
("["|"<:")      { count(); return(LBRACKET); }
("]"|":>")      { count(); return(RBRACKET); }
"."         { count(); return(PERIOD); }
"&"         { count(); return(AND_OP); }
"!"         { count(); return(BANG); }
"~"         { count(); return(TILDE); }
"-"         { count(); return(MINUS); }
"+"         { count(); return(ADD); }
"*"         { count(); return(STAR); }
"/"         { count(); return(SLASH); }
"%"         { count(); return(PERCENT); }
"<"         { count(); return(LT_OP); }
">"         { count(); return(GT_OP); }
"^"         { count(); return(CIRCUMFLEX); }
"|"         { count(); return(OR_OP); }
"?"         { count(); return(QUESTIONMARK); }

[ \t\v\n\f]     { count(); }
.           { /* ignore bad characters */ }

%%


comment()
{
    char c, c1;

loop:
    while ((c = input()) != '*' && c != 0)
      /*putchar(c)*/;

    if ((c1 = input()) != '/' && c != 0)
    {
        unput(c1);
        goto loop;
    }

    if (c != 0)
      /*putchar(c1)*/;
}


int column = 0;

void count()
{
    int i;

    for (i = 0; yytext[i] != '\0'; i++)
        if (yytext[i] == '\n')
            column = 0;
        else if (yytext[i] == '\t')
            column += 8 - (column % 8);
        else
            column++;

    /*ECHO*/;
}


int check_type()
{
/*
* pseudo code --- this is what it should check
*
*   if (yytext == type_name)
*       return(TYPE_NAME);
*
*   return(IDENTIFIER);
*/

/*
*   it actually will only return IDENTIFIER
*/

    return(IDENTIFIER);
}

I want to combine it/ use it with below file, i.e.,

lexer.l


%{

#include "c.l"
#include <stdio.h>
#include "parser_test.tab.h"

%}


%%


"{"                     { yylval.str = strdup(yytext); return OCB; }
"}"                     { yylval.str = strdup(yytext); return CCB; }

/* MANY MORE TOKENS TO ADD */

%%

Finally the only bison file (.y) which uses tokens from these two,

parser_test.y


%{

#include    <stdio.h>
#include "lex.yy.c"
int yyerror ();
int yylineno;
char* a;
char* b;

%}

%union {
    char *str;
}

%define parse.error verbose

%type <str> INT CONST CONSTANT RETURN IDENTIFIER ADD EQU PTR SEMICOLON OP CP OCB CCB COMMA

%token INT CONST CONSTANT RETURN IDENTIFIER ADD EQU PTR SEMICOLON NUMBER OP CP OCB CCB COMMA

%start Program

%%

Program:                outStatements
                        functions
                        ;

functions:              function
                        | functions function
                        ;

function:               INT IDENTIFIER OP parametersList CP OCB statementList CCB  { a=$2; printf("\nFunction Defined : %s\n", $2); }
                        ;

parametersList:         /*empty*/
                        | parameters
                        ;

parameters:             parameter
                        | parameters COMMA parameter
                        ;

parameter:              INT IDENTIFIER       { printf("\nPARAMETER NAME: %s\nPARAMETER INT :%s\n", $2, $1); }
                        ;

statementList:          /*empty*/
                        | statements 
                        ;

statements:             statement
                        | statements statement
                        ;

statement:              RETURN IDENTIFIER ADD IDENTIFIER SEMICOLON
                        | INT IDENTIFIER COMMA IDENTIFIER EQU CONSTANT COMMA IDENTIFIER EQU CONSTANT SEMICOLON
                        | IDENTIFIER EQU IDENTIFIER OP IDENTIFIER COMMA IDENTIFIER CP SEMICOLON     { b = $3; }
                        ;

outStatements:          outStatement
                        | outStatements outStatement
                        ;

outStatement:           INT PTR IDENTIFIER SEMICOLON
                        | CONST INT IDENTIFIER EQU CONSTANT SEMICOLON
                        ;

%%

int main (int argc, char * argv[]) 
{
    yyin = fopen(argv[1], "r");
    int err_code = yyparse();
    if (err_code == 0) 
    {   
        printf("\nFunction called : '%s' from '%s'\n", b, a);
        printf("\nParsing Done !!!\n");
    } 
    else 
    {
        printf("\nUNSUCCESSFUL ....\n");
    }
    fclose(yyin);
    return 0;
}

int yyerror (char* s) 
{
    fprintf(stderr, "\nError on Line: %d :: %s\n" , yylineno, s);
}

int yywrap()
{
    return 1;
}

How can I achieve ?

1

1 Answers

2
votes

To put in simpler way, Is it possible to combine two or more lex files and given as an input ( from where Bison (.y) file reads the tokens) ?

The lex (or flex) utility generates C source code for (among other things) a scanner function, based on one input file. If you run two separate inputs through it then you will get two separate functions. With some versions of lex and some extra work you could get those functions to have different names, but you cannot successfully make them scan the same input stream because they maintain their own input buffers and scanning state information, so they will interfere with each other.

You also cannot concatenate lex input files to combine them into one, at least because each consists of two or three sections, whose relative order is significant. Concatenating two lex input files does not yield a valid lex input file.

You may be able to merge two lex input files into one on a section-by-section basis, but this exercise is at best difficult if the files involved are of any complexity. Merely combining the contents of each pair of corresponding sections may produce a valid lex input file, but it is unlikely to be one that does the job you want.

If you have a third-party lex input that describes scanning rules similar to what you want, and you would like somehow to reuse that code, then your best bet is probably to take that and modify it to suit you. This may be tricky, because you will first need to achieve a good understanding of the existing input before you can modify it for your needs. But you would anyway need that and more to merge two lex inputs.

Alternatively, you might simply take the existing file as inspiration for writing your own. Study it, gather ideas from it for how you can achieve similar goals, etc.. This is the option that I would recommend myself. You will probably learn more this way, and you will likely understand the resulting code better.