I try to write a simple parser by PLY, but the parser below will lose the first string after every NEWLINE.
The input is "a b c\nb d e\nc f".
My parser parsed first line statement as state (0, ((('a', 'b'), 'c'), 0)), but next token 'b' is lost. The second line statement is state (0, (('d', 'e'), 0)). How do I fix this?
import ply.lex as lex
import ply.yacc as yacc
tokens = ('STRING', 'NEWLINE')
t_STRING = r'[^ \n]+'
t_ignore = r' '
def t_NEWLINE(t):
r'\n'
t.lexer.lineno += 1
return t
def t_error(t):
print("Illegal character %s" % t.value[0])
t.lexer.skip(1)
def p_statement_interactive(p):
'''statement : plist
| plist NEWLINE'''
p[0] = (0, (p[1],0))
print "state", p[0]
def p_item_string_expr(p):
'''plist : plist pitem
| pitem'''
if len(p) > 2:
p[0] = (p[1],p[2])
else:
p[0] = p[1]
print "str2", p[0]
def p_item_string(p):
'''pitem : STRING'''
p[0] = p[1]
print "str1", p[0]
def p_error(p):
if not p:
print("SYNTAX ERROR AT EOF")
def main():
data = """a b c
b d e
c f"""
lexer = lex.lex(debug=0)
lexer.input(data)
while True:
tok = lexer.token()
if not tok:
break # No more input
print(tok)
parser = yacc.yacc()
parser.parse(data)
if __name__ == '__main__':
main()
Result is:
LexToken(STRING,'a',1,0)
LexToken(STRING,'b',1,2)
LexToken(STRING,'c',1,4)
LexToken(NEWLINE,'\n',1,5)
LexToken(STRING,'b',2,10)
LexToken(STRING,'d',2,12)
LexToken(STRING,'e',2,14)
LexToken(NEWLINE,'\n',2,15)
LexToken(STRING,'c',3,20)
LexToken(STRING,'f',3,22)
str1 a
str2 a
str1 b
str2 ('a', 'b')
str1 c
str2 (('a', 'b'), 'c')
state (0, ((('a', 'b'), 'c'), 0))
str1 d
str2 d
str1 e
str2 ('d', 'e')
state (0, (('d', 'e'), 0))
str1 f
str2 f
state (0, ('f', 0))