1
votes

I have been stuck with this problem for a couple of days now and so is this parser. Point of this parser is to parse http request and that it does ok but when the end of request is reached parser enters into infinite loop. I have located the point in the C file that is generated from lex file but have no idea how to solve this problem.

I have tried the following approaches suggested in other similar questions without success.

bison-end-of-file

lex-flex-scanning-for-the-eof-character

This is my lex file:

#undef YYLMAX
#define YYLMAX  4096

#include "ssoyacc.h"

#define yylval  ssolval
extern YYSTYPE  yylval;

#ifdef FLEX_SCANNER
#define YY_INPUT(buf, result, max_size) { int cc = sso_read(); result = (cc == -1) ? YY_NULL : (buf[0] = cc, 1);}
#else /* NO FLEX */
#undef input
#define input()         sso_read()
#define unput(cc)         sso_unput(cc)
#define yyless(cc)         sso_yyless(cc)
#endif /* FLEX */

%}

%p 30000
%n 4000
%e 2000
%a 30000
%k 2500
%o 50000

nondigit                [_a-zA-Z]
alfanum                 [_a-zA-Z0-9]
digit                   [0-9]
nonzero_digit           [1-9]
octal_digit             [0-7]
hexadecimal_digit       [0-9a-fA-F]

%start HTTP QUERY ARG XML TAG CDAT FORM_PARAM FORM_VALUE

%%

<INITIAL,HTTP>[ ]   {
            return SP;
        }
<INITIAL,HTTP>\r\n      {
            return CRLF;
        }
<HTTP>HTTP\/{digit}\.{digit}    {
            return HTTP_VERSION;
        }
<HTTP>OPTIONS       {
            return OPTIONS;
        }
<HTTP>GET   {
            return GET;
        }
.
.
.
other tags
.
.
.
<FORM_PARAM>\=  {
            BEGIN(FORM_VALUE);
            return IS;
        }
<FORM_VALUE>\&  {
            BEGIN(FORM_PARAM);
            return AMPERSAND;
        }
<FORM_VALUE>[0-9a-zA-Z\%\+\.\/]*    {
            if (yyleng < MAX_ARG_LEN)
            {
                char cc[3];
                int ii;
                int jj = 0;
                for (ii=0;ii<yyleng;ii++)
                {
                    if (yytext[ii] != '%')
                    {
                        if (yytext[ii] == '+')
                        {
                            yylval.sval[jj++] = ' ';
                        }
                        else
                        {
                            yylval.sval[jj++] = yytext[ii];
                        }
                    }
                    else
                    {
                        strncpy(cc, yytext+ii+1, 2);
                        cc[2] = 0;
                        yylval.sval[jj++] = strtol(cc, NULL, 16);
                        ii+=2;
                    }
                }
                yylval.sval[jj] = 0;
                return STRING;
            }
            else
            {
                return ERROR;
            }
        }
%%
int ssowrap(void)
{
        return 1;
}

void start_http()
{
    init_content(); /* initialize content count */
    BEGIN(HTTP);
}

void start_urlencoded()
{
    BEGIN(FORM_PARAM);
}

void start_xml()
{
    BEGIN(XML);
}


int sso_yyless(int count)
{
    int i;
    if (count>yyleng)
    {
        return 0;
    }
    for (i=0;i<yyleng-count;i++)
    {
        unput(yytext[yyleng-1-i]);
        yytext[yyleng-1-i] = '\0';
    }
    return 0;
}

void allprint(wchar_t cc)
{
    if (isprint(cc))
    {
        fprintf(stdout, "'%c' 0x%x", cc, cc);
    }
    else
    {
        fprintf(stdout, "%x", cc);
    }
}

void sprint(wchar_t *pc)
{
    fprintf(stdout, "%s", pc);
}

The execution gets stuck to loop while ( /*CONSTCOND*/1 ) and keeps entering twice in case YY_END_OF_BUFFER: and then once in case 126: in ssolex.c file. The line that is stated in case 126 is the line %% in lex file.

/** The main scanner function which does all the work.
 */
YY_DECL
{
    yy_state_type yy_current_state;
    char *yy_cp, *yy_bp;
    int yy_act;

    if ( !(yy_init) )
        {
        (yy_init) = 1;

#ifdef YY_USER_INIT
        YY_USER_INIT;
#endif

        if ( ! (yy_start) )
            (yy_start) = 1; /* first start state */

        if ( ! ssoin )
            ssoin = stdin;

        if ( ! ssoout )
            ssoout = stdout;

        if ( ! YY_CURRENT_BUFFER ) {
            ssoensure_buffer_stack ();
            YY_CURRENT_BUFFER_LVALUE =
                sso_create_buffer(ssoin,YY_BUF_SIZE );
        }

        sso_load_buffer_state( );
        }

    {
#line 44 "ssolex.l"


#line 1265 "<stdout>"

    while ( /*CONSTCOND*/1 )        /* loops until end-of-file is reached */
        {
        yy_cp = (yy_c_buf_p);

        /* Support of ssotext. */
        *yy_cp = (yy_hold_char);

        /* yy_bp points to the position in yy_ch_buf of the start of
         * the current run.
         */
        yy_bp = yy_cp;

        yy_current_state = (yy_start);
        yy_current_state += YY_AT_BOL();
yy_match:
        do
            {
            YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)] ;
            if ( yy_accept[yy_current_state] )
                {
                (yy_last_accepting_state) = yy_current_state;
                (yy_last_accepting_cpos) = yy_cp;
                }
            while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
                {
                yy_current_state = (int) yy_def[yy_current_state];
                if ( yy_current_state >= 802 )
                    yy_c = yy_meta[(unsigned int) yy_c];
                }
            yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
            ++yy_cp;
            }
        while ( yy_base[yy_current_state] != 1067 );

yy_find_action:
        yy_act = yy_accept[yy_current_state];
        if ( yy_act == 0 )
            { /* have to back up */
            yy_cp = (yy_last_accepting_cpos);
            yy_current_state = (yy_last_accepting_state);
            yy_act = yy_accept[yy_current_state];
            }

        YY_DO_BEFORE_ACTION;

do_action:  /* This label is used only to access EOF actions. */

Last thing that is printed is Reading a token: which is in yacc C file so I think that the problem must be EOF handling in lex.

/* YYCHAR is either YYEMPTY or YYEOF or a valid lookahead symbol.  */
if (yychar == YYEMPTY)
{
  YYDPRINTF ((stderr, "Reading a token: "));
  yychar = yylex ();
  printf("TOKEN %c, %d\n", yychar, yychar);
}
1
Do you happen to be trying to parse data directly from an open network connection? EOF is not the same thing as "no more input is available right now". You will not see EOF on a network connection until the connection is closed.John Bollinger
Yes how should I determine when to stop parsing data?Henri Koski
Do you actually use yyless in your lexer code?rici
Also, i think you need to provide more information. Have you verified if sso_read returns -1 at the end of the request?rici
@rici Yes the yyless is being used. The sso_read function is never called.Henri Koski

1 Answers

0
votes

As rici mentioned the sso_read function was not returning -1 but instead it returned 0. Also the EOF was never reached because the tcp socket was still open as it should be. Thank's to rici and Jon Bollinger for helping to solve this!