I have been stuck with this problem for a couple of days now and so is this parser. Point of this parser is to parse http request and that it does ok but when the end of request is reached parser enters into infinite loop. I have located the point in the C file that is generated from lex file but have no idea how to solve this problem.
I have tried the following approaches suggested in other similar questions without success.
lex-flex-scanning-for-the-eof-character
This is my lex file:
#undef YYLMAX
#define YYLMAX 4096
#include "ssoyacc.h"
#define yylval ssolval
extern YYSTYPE yylval;
#ifdef FLEX_SCANNER
#define YY_INPUT(buf, result, max_size) { int cc = sso_read(); result = (cc == -1) ? YY_NULL : (buf[0] = cc, 1);}
#else /* NO FLEX */
#undef input
#define input() sso_read()
#define unput(cc) sso_unput(cc)
#define yyless(cc) sso_yyless(cc)
#endif /* FLEX */
%}
%p 30000
%n 4000
%e 2000
%a 30000
%k 2500
%o 50000
nondigit [_a-zA-Z]
alfanum [_a-zA-Z0-9]
digit [0-9]
nonzero_digit [1-9]
octal_digit [0-7]
hexadecimal_digit [0-9a-fA-F]
%start HTTP QUERY ARG XML TAG CDAT FORM_PARAM FORM_VALUE
%%
<INITIAL,HTTP>[ ] {
return SP;
}
<INITIAL,HTTP>\r\n {
return CRLF;
}
<HTTP>HTTP\/{digit}\.{digit} {
return HTTP_VERSION;
}
<HTTP>OPTIONS {
return OPTIONS;
}
<HTTP>GET {
return GET;
}
.
.
.
other tags
.
.
.
<FORM_PARAM>\= {
BEGIN(FORM_VALUE);
return IS;
}
<FORM_VALUE>\& {
BEGIN(FORM_PARAM);
return AMPERSAND;
}
<FORM_VALUE>[0-9a-zA-Z\%\+\.\/]* {
if (yyleng < MAX_ARG_LEN)
{
char cc[3];
int ii;
int jj = 0;
for (ii=0;ii<yyleng;ii++)
{
if (yytext[ii] != '%')
{
if (yytext[ii] == '+')
{
yylval.sval[jj++] = ' ';
}
else
{
yylval.sval[jj++] = yytext[ii];
}
}
else
{
strncpy(cc, yytext+ii+1, 2);
cc[2] = 0;
yylval.sval[jj++] = strtol(cc, NULL, 16);
ii+=2;
}
}
yylval.sval[jj] = 0;
return STRING;
}
else
{
return ERROR;
}
}
%%
int ssowrap(void)
{
return 1;
}
void start_http()
{
init_content(); /* initialize content count */
BEGIN(HTTP);
}
void start_urlencoded()
{
BEGIN(FORM_PARAM);
}
void start_xml()
{
BEGIN(XML);
}
int sso_yyless(int count)
{
int i;
if (count>yyleng)
{
return 0;
}
for (i=0;i<yyleng-count;i++)
{
unput(yytext[yyleng-1-i]);
yytext[yyleng-1-i] = '\0';
}
return 0;
}
void allprint(wchar_t cc)
{
if (isprint(cc))
{
fprintf(stdout, "'%c' 0x%x", cc, cc);
}
else
{
fprintf(stdout, "%x", cc);
}
}
void sprint(wchar_t *pc)
{
fprintf(stdout, "%s", pc);
}
The execution gets stuck to loop while ( /*CONSTCOND*/1 )
and keeps entering twice in case YY_END_OF_BUFFER:
and then once in case 126:
in ssolex.c file. The line that is stated in case 126
is the line %%
in lex file.
/** The main scanner function which does all the work.
*/
YY_DECL
{
yy_state_type yy_current_state;
char *yy_cp, *yy_bp;
int yy_act;
if ( !(yy_init) )
{
(yy_init) = 1;
#ifdef YY_USER_INIT
YY_USER_INIT;
#endif
if ( ! (yy_start) )
(yy_start) = 1; /* first start state */
if ( ! ssoin )
ssoin = stdin;
if ( ! ssoout )
ssoout = stdout;
if ( ! YY_CURRENT_BUFFER ) {
ssoensure_buffer_stack ();
YY_CURRENT_BUFFER_LVALUE =
sso_create_buffer(ssoin,YY_BUF_SIZE );
}
sso_load_buffer_state( );
}
{
#line 44 "ssolex.l"
#line 1265 "<stdout>"
while ( /*CONSTCOND*/1 ) /* loops until end-of-file is reached */
{
yy_cp = (yy_c_buf_p);
/* Support of ssotext. */
*yy_cp = (yy_hold_char);
/* yy_bp points to the position in yy_ch_buf of the start of
* the current run.
*/
yy_bp = yy_cp;
yy_current_state = (yy_start);
yy_current_state += YY_AT_BOL();
yy_match:
do
{
YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)] ;
if ( yy_accept[yy_current_state] )
{
(yy_last_accepting_state) = yy_current_state;
(yy_last_accepting_cpos) = yy_cp;
}
while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
{
yy_current_state = (int) yy_def[yy_current_state];
if ( yy_current_state >= 802 )
yy_c = yy_meta[(unsigned int) yy_c];
}
yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
++yy_cp;
}
while ( yy_base[yy_current_state] != 1067 );
yy_find_action:
yy_act = yy_accept[yy_current_state];
if ( yy_act == 0 )
{ /* have to back up */
yy_cp = (yy_last_accepting_cpos);
yy_current_state = (yy_last_accepting_state);
yy_act = yy_accept[yy_current_state];
}
YY_DO_BEFORE_ACTION;
do_action: /* This label is used only to access EOF actions. */
Last thing that is printed is Reading a token: which is in yacc C file so I think that the problem must be EOF handling in lex.
/* YYCHAR is either YYEMPTY or YYEOF or a valid lookahead symbol. */
if (yychar == YYEMPTY)
{
YYDPRINTF ((stderr, "Reading a token: "));
yychar = yylex ();
printf("TOKEN %c, %d\n", yychar, yychar);
}
sso_read
returns -1 at the end of the request? – rici