I have inherited some c# code based on ANTLR 3. We have some grammar files that uses the AST (abstract syntax tree) option and we use those grammar to parse text files with a very odd "language" to objects. we are using the AST as intermediate objects and than convert them to the real objects that we need (with some more processing).
I have no knowledge in ANTLR but currently we have a bottleneck in the application performance from ANTLR processing of the files. Since we are using ANTLR 3 we thought that we might get a performance boost if we migrate to ANTLR (and also get the latest and greatest version of ANTLR which is always a good practice). I have read that AST no longer exist in ANTLR 4, what is the best (and simplest) way to replace it and what will it mean to my current code.
What is the best approach to upgrade ? and will it really give us a performance boost.
An example of one of the grammar file ( there are 6 and this is the simplest one):
grammar Rules;
options
{
language=CSharp2;
output=AST;
ASTLabelType=CommonTree;
superClass = OOPLParserBase;
}
tokens
{
OOPL_MODEL;
}
@lexer::namespace { TestParser.Common.RulesParser }
@parser::namespace { TestParser.Common.RulesParser }
@header
{
using System.Collections.Generic;
using TestParser.OOPLModel;
}
@members
{
public RulesParser() : base(null)
{
}
protected override CommonTree GetAst()
{
return root().Tree as CommonTree;
}
protected override Lexer GetLexer()
{
return new RulesLexer();
}
}
//semantic analysis
root : header (rule_line COMMENT?)+ -> ^(header rule_line+);
header : header_comment+ -> ^(OOPL_MODEL<OOPLModel>[new CommonToken(OOPL_MODEL), "1.0"] header_comment+);
header_comment : COMMENT -> ^(COMMENT<OOPLComment>[$COMMENT, $COMMENT.Text]);
rule_line : parameter RULE_TYPE COMMA PARAMETER_NAME COLON condition -> ^(RULE_TYPE<OOPLBlock>[$RULE_TYPE, $RULE_TYPE.Text] parameter PARAMETER_NAME<OOPLValue>[$PARAMETER_NAME, $PARAMETER_NAME.Text] condition);
parameter : PARAMETER_NAME EQUALS (integer_value = INTEGER | real_value = REAL |string_value = STRING) COMMA -> ^(PARAMETER_NAME<OOPLKeyedValue>[$PARAMETER_NAME, $PARAMETER_NAME.Text, SingleWhereNotNull<IToken>($integer_value, $string_value, $real_value).Text]);
condition : condition_value COMMA condition_value COMMA condition_value COMMA condition_value COMMA condition_value COMMA condition_value COMMA condition_value COMMA condition_value COMMA condition_value COMMA condition_value COMMA condition_value COMMA condition_value COMMA condition_value;
condition_value : (asterisk| parameter_name | positive_integer);
asterisk : ASTERISK -> ^(ASTERISK<OOPLValue>[$ASTERISK, $ASTERISK.Text]);
parameter_name : PARAMETER_NAME -> ^(PARAMETER_NAME<OOPLValue>[$PARAMETER_NAME, $PARAMETER_NAME.Text]);
positive_integer : INTEGER -> ^(INTEGER<OOPLValue>[$INTEGER, $INTEGER.Text]);
//lexical analysis
EQUALS : '=';
NEW_LINE_R : '\r' { $channel = HIDDEN; };
NEW_LINE_N : '\n' { $channel = HIDDEN; };
RULE_TYPE : ('Time'|'TIME'|'Lol'|'LOL'|'World'|'WORLD'|'Template'|'TEMPLATE');
DOUBLE_COLON : COLON COLON;
INTEGER : MINUS? DIGIT+;
REAL : INTEGER '.' INTEGER;
PARAMETER_NAME : ASTERISK? (LETTER|DIGIT|UNDERSCORE|FORWARDSLASH|DOUBLE_COLON|MINUS)+ ASTERISK?;
WS : ( ' '
| '\t'
| NEW_LINE_R
| NEW_LINE_N
) { $channel = HIDDEN; } ;
COMMENT : '#' ( options {greedy=false;} : . )* NEW_LINE_R? NEW_LINE_N;
STRING : '"'~('"')* '"';
fragment
MINUS : '-';
COMMA : ',';
COLON : ':';
fragment
DOT : '.';
ASTERISK : '*';
fragment
FORWARDSLASH : '/';
fragment
UNDERSCORE : '_';
fragment
DIGIT : '0'..'9';
fragment
LETTER : 'A'..'Z' | 'a'..'z';