0
votes

all I have develop ANTR4 grammar. During parse the string

Time;25 10 * * *;'faccalc_minus1_cron.out.'yyyyMMdd.HHmm;America/New_York

I have following errors

Invalid chars in expression! Expression: ;' Invalid chars: ;' extraneous input ';' expecting {'', INTEGER, '-', '/', ','} missing ';' at '_' Incorrect timezone format :faccalc_minus1

I don't undestand why, as regex rule contain '_'.

How to fix it?

Regards, Vladimir

lexer grammar FileTriggerLexer;

CRON
:
    'cron'
;

MARKET_CRON
:
    'marketCron'
;

COMBINED
:
    'combined'
;

FILE_FEED
:
    'FileFeed'
;

MANUAL_NOTICE
:
    'ManualNotice'
;

TIME
:
    'Time'
;

MARKET_TIME
:
    'MarketTime'
;


SCHEDULE
:
    'Schedule'
;

PRODUCT
:
    'Product'
;

UCA_CLIENT
:
    'UCAClient'
;

APEX_GSM
:
    'ApexGSM'
;

DELAY
:
    'Delay'
;

CATEGORY
:
    'Category'
;

EXCHANGE
:
    'Exchange'
;

CALENDAR_EXCHANGE
:
    'CalendarExchange'
;

FEED
:
    'Feed'
;

RANGE
:
    'Range'
;

SYNTH
:
    'Synth'
;

TRIGGER
:
    'Trigger'
;

DELAYED_TRIGGER
:
    'DelayedTrigger'
;

INTRA_TRIGGER
:
    'IntraTrigger'
;

CURRENT_TRIGGER
:
    'CurrentTrigger'
;

CALENDAR_FILE_FEED
:
    'CalendarFileFeed'
;

PREVIOUS
:
    'Previous'
;

LATE_DELAY
:
    'LateDelay'
;

BUILD_ARCHIVE
:
    'BuildArchive'
;

COMPRESS
:
    'Compress'
;


LATE_TIME
:
    'LateTime'
;

CALENDAR_CATEGORY
:
    'CalendarCategory'
;

APEX_GPM
:
    'ApexGPM'
;

PORTFOLIO_NOTICE
:
    'PortfolioNotice'
;

FixedTimeOfDay: 'FixedTimeOfDay';

SEMICOLON
:
    ';'
;

ASTERISK
:
    '*'
;

LBRACKET
:
    '('
;

RBRACKET
:
    ')'
;

PERCENT
:
    '%'
;

INTEGER
:
    [0-9]+
;

DASH
:
    '-'
;

DOUBLE_QUOTE
:
    '"'
;

QUOTE
:
    '\''
;

SLASH
:
    '/'
;

DOT
:
    '.'
;

COMMA
:
    ','
;

UNDERSCORE
:
    '_'
;

EQUAL
:
    '='
;

MORE_THAN
:
    '>'
;

LESS
:
    '<'
;

ID
:
    [a-zA-Z] [a-zA-Z0-9]*
;

WS
:
    [ \t\r\n]+ -> skip
; 


/**
 * Define Fied Trigger valdiator grammar
 */
grammar FileTriggerValidator;

options
   {
    tokenVocab = FileTriggerLexer;
}

r
:
    (
        schedule
        | file_feed
        | time_feed
        | market_time_feed
        | manual_notice
        | portfolio_notice
        | not_checked
    )+
;

not_checked
:
    (
        PRODUCT
        | UCA_CLIENT
        | APEX_GSM
        | APEX_GPM
        | DELAY
        | CATEGORY
        | CALENDAR_CATEGORY
        | EXCHANGE
        | CALENDAR_EXCHANGE
        | FEED
        | RANGE
        | SYNTH
        | TRIGGER
        | DELAYED_TRIGGER
        | INTRA_TRIGGER
        | CURRENT_TRIGGER
        | CALENDAR_FILE_FEED
        | PREVIOUS
        | LATE_DELAY
        | LATE_TIME
        | COMPRESS
        | BUILD_ARCHIVE
    )
    (
        SEMICOLON anyList
    )?
;

anyList
:
    anyElement
    (
        SEMICOLON anyElement
    )*
;

anyElement
:
    cron
    | file_name
    | with_step_value
    | source_file
    | timezone
    | regEx
;

portfolio_notice
:
    PORTFOLIO_NOTICE SEMICOLON regEx
;

manual_notice
:
    MANUAL_NOTICE SEMICOLON file_name SEMICOLON timezone
;

time_feed
:
    TIME SEMICOLON cron_part
    (
        timezone?
    ) SEMICOLON file_name SEMICOLON timezone
;

market_time_feed
:
    MARKET_TIME SEMICOLON cron_part timezone SEMICOLON file_name SEMICOLON
    timezone
    (
        SEMICOLON UNDERSCORE? INTEGER
    )*
;

file_feed
:
    file_feed_name SEMICOLON source_file SEMICOLON source_host SEMICOLON
    source_host SEMICOLON regEx SEMICOLON regEx
    (
        SEMICOLON source_host
    )*
;

regEx
:
    (
        ID
        | DOT
        | ASTERISK
        | INTEGER
        | PERCENT
        | UNDERSCORE
        | DASH
        | LESS
        | MORE_THAN
        | EQUAL
        | SLASH
        | LBRACKET
        | RBRACKET
        | DOUBLE_QUOTE
        | QUOTE
        | COMMA
    )+
;

source_host
:
    ID
    (
        DASH ID
    )*
;

file_feed_name
:
    FILE_FEED
;

source_file
:
    (
        ID
        | DASH
        | UNDERSCORE
    )+
;

schedule
:
    SCHEDULE SEMICOLON schedule_defining SEMICOLON file_name SEMICOLON timezone
    (
        SEMICOLON DASH? INTEGER
    )*
;

schedule_defining
:
    cron
    | market_cron
    | combined_cron
;

cron
:
    CRON LBRACKET DOUBLE_QUOTE cron_part timezone DOUBLE_QUOTE RBRACKET
;

market_cron
:
    MARKET_CRON LBRACKET DOUBLE_QUOTE cron_part timezone DOUBLE_QUOTE COMMA
    DOUBLE_QUOTE ID DOUBLE_QUOTE RBRACKET
;

combined_cron
:
    COMBINED LBRACKET cron_list_element
    (
        COMMA cron_list_element
    )* RBRACKET
;

mic_defining
:
    ID
;

file_name
:
    regEx
;

cron_list_element
:
    cron
    | market_cron
;
//

schedule_defined_string
:
    cron
;
// 

cron_part
:
    minutes hours days_of_month month week_days
;
//

minutes
:
    with_step_value
;

hours
:
    with_step_value
;
//

int_list
:
    INTEGER
    | interval
    (
        COMMA INTEGER
        | interval
    )*
;

interval
:
    INTEGER DASH INTEGER
;
//

days_of_month
:
    with_step_value
;
//

month
:
    with_step_value
;
//

week_days
:
    with_step_value
;
//

timezone
:
    timezone_part
    (
        SLASH timezone_part
    )?
;
//

timezone_part
:
    ID
    (
        UNDERSCORE ID
    )?
;
//

with_step_value
:
    (
        INTEGER
        | COMMA
        | SLASH
        | ASTERISK
        | DASH
    )+
;

step
:
    SLASH int_list
;
1

1 Answers

1
votes

To analyze this kind of problem, dump the token stream to see what the lexer is actually doing. To directly dump the tokens, see this answer. AntlrDT, for example, also provides a graphical analysis of the corresponding parse-tree (I am the author of AntlrDT).

FileTriggerParser - ParseTree

From this, easy to see that the first error occurs in the with_step_value rule: does not allow for a trailing semicolon.

Second error is in the timezone_part rule: does not allow for repeated ID UNDERSCORE occurrences.