0
votes

Please i need help in solving those two simple logic errors that i am facing in my example.

Here are the details:

The Input File: (input.txt)


FirstName:James
LastName:Smith
normal text


The output File: (output.txt) - [with two logic errors]


The Name is: James
The Name is: LastName:Smith
The Name is: normal text


What I am expecting as output (instead of the above lines) - [without logical errors]


The Name is: James
The Name is: Smith
normal text


In other words, i don't want the lastName to be sent to output, and i want to match normal text as well if it is written after the "FirstName:" or "LastName:".

Here is my lex File (example.l):

%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "y.tab.h"

/* prototypes */ 
void yyerror(const char*); 

/* Variables: */
char *tempString;

%}

%START sBody

%%

"FirstName:"                {        BEGIN sBody;        }
"LastName:"                 {        BEGIN sBody;        }

.?                          {        return sNormalText; } 

\n                        /* Ignore end of line */;
[ \t]+                   /* Ignore whitespace */;

<sBody>.+   {
                tempString = (char *)calloc(strlen(yytext)+1, sizeof(char));
                strcpy(tempString, yytext);
                yylval.sValue = tempString;
                return sText;
             }
%%

int main(int argc, char *argv[]) 
{
    if ( argc < 3 )
    {
        printf("Please you need two args: inputFileName and outputFileName");
    }

    else 
    {
        yyin = fopen(argv[1], "r");
        yyout = fopen(argv[2], "w");
        yyparse();
        fclose(yyin);
        fclose(yyout);
    }
    return 0;
} 

Here is my yacc file: (example.y):

%{
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <ctype.h>
    #include "y.tab.h"

    void yyerror(const char*); 
    int yywrap(); 

    extern FILE *yyout;

    %}

    %union 
    { 
        int iValue;     
        char* sValue;       
    }; 

    %token <sValue> sText
    %token <sValue> sNormalText

    %%

    StartName: /* for empty */
              | sName StartName
          ;

    sName:
         sText  
         { 
                fprintf(yyout, "The Name is: %s\n", $1);
         }
         |
         sNormalText
         {
               fprintf(yyout, "%s\n", $1);
         }
         ;    
    %%

    void yyerror(const char *str) 
    {
        fprintf(stderr,"error: %s\n",str);
    }

    int yywrap()
    {
        return 1;
    } 

Please if you can help me out correcting those simple logical errors, i will be grateful.

Thanks in advance for your help and for reading my post.

1

1 Answers

2
votes

Part of the trouble is that you move into state 'sBody' but you never move back to the initial state 0.

Another problem - not yet a major one - is that you use a right-recursive grammar rule instead of the (natural for Yacc) left-recursive rule:

StartName: /* empty */
      |    sName StartName
      ;

vs

StartName: /* empty */
      |    StartName sName
      ;

Adding BEGIN 0; to the <sBody> Lex rule improves things a lot; the remaining trouble is that you get one more line 'Smith' in the output file for each single letter in the normal text. You need to review how the value is returned to your grammar.

By adding yylval.sValue = yytext; before the return in the rule that returns sNormalText, I got the 'expected' output.

example.l

%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "y.tab.h"

/* prototypes */
void yyerror(const char*);

/* Variables: */
char *tempString;

%}

%START sBody

%%

"FirstName:"                { puts("FN");      BEGIN sBody;        }
"LastName:"                 { puts("LN");      BEGIN sBody;        }

.?                          { printf("NT: %s\n", yytext); yylval.sValue = yytext; return sNormalText; }

\n                        /* Ignore end of line */;
[ \t]+                   /* Ignore whitespace */;

<sBody>.+   {
                tempString = (char *)calloc(strlen(yytext)+1, sizeof(char));
                strcpy(tempString, yytext);
                yylval.sValue = tempString;
                puts("SB");
                BEGIN 0;
                return sText;
             }

%%

int main(int argc, char *argv[])
{
    if ( argc < 3 )
    {
        printf("Please you need two args: inputFileName and outputFileName");
    }
    else
    {
        yyin = fopen(argv[1], "r");
        if (yyin == 0)
        {
            fprintf(stderr, "failed to open %s for reading\n", argv[1]);
            exit(1);
        }
        yyout = fopen(argv[2], "w");
        if (yyout == 0)
        {
            fprintf(stderr, "failed to open %s for writing\n", argv[2]);
            exit(1);
        }
        yyparse();
        fclose(yyin);
        fclose(yyout);
    }
    return 0;
}

example.y

%{
#include <stdio.h>
#include "y.tab.h"

void yyerror(const char*);
int yywrap();

extern FILE *yyout;

%}

%union
{
    char* sValue;
};

%token <sValue> sText
%token <sValue> sNormalText

%%

StartName: /* for empty */
          | StartName sName
      ;

sName:
     sText
     {
            fprintf(yyout, "The Name is: %s\n", $1);
     }
     |
     sNormalText
     {
           fprintf(yyout, "The Text is: %s\n", $1);
     }
     ;
%%

void yyerror(const char *str)
{
    fprintf(stderr,"error: %s\n",str);
}

int yywrap()
{
    return 1;
}

output.txt

The Name is: James
The Name is: Smith
The Text is: n
The Text is: o
The Text is: r
The Text is: m
The Text is: a
The Text is: l
The Text is:  
The Text is: t
The Text is: e
The Text is: x
The Text is: t

It might make more sense to put yywrap() in with the lexical analyzer rather than with the grammar. I've left the terse debugging prints in the code - they helped me see what was going wrong.

FN
SB
LN
SB
NT: n
NT: o
NT: r
NT: m
NT: a
NT: l
NT:  
NT: t
NT: e
NT: x
NT: t

You'll need to play with the '.?' rule to get normal text returned in its entirety. You may also have to move it around the file - start states are slightly peculiar critters. When I changed the rule to '.+', Flex gave me the warning:

example.l:25: warning, rule cannot be matched
example.l:27: warning, rule cannot be matched

These lines referred to the blank/tab and sBody rules. Moving the unqualified '.+' after the sBody rule removed the warnings, but didn't seem to do what was wanted. Have fun...