I am trying to make the antlr C# g4 grammar working with antlr runtime in python3.
I automated a part of the boring stuff:
- Downloading the antlr jar in a
./tmpfolder - Downloading the lexer and parser grammar also in a
./tmpfolder - Generating the lexer and parser classes with a java call on the antlr jar
- Getting the related class which the thing that fails for the lexer part, I'll get back to this error below
The dirty code
import importlib
import inspect
import os
import shutil
import subprocess
import requests
from antlr4 import *
def download_antlr(url, destination_folder):
_, antlr_base_file = os.path.split(url)
antlr_file = os.path.join(os.getcwd(), destination_folder, antlr_base_file)
tmp_folder = os.path.dirname(antlr_file)
if os.path.exists(tmp_folder):
shutil.rmtree(tmp_folder)
os.makedirs(tmp_folder, exist_ok=True)
response = requests.get(url, allow_redirects=True)
open(antlr_file, 'wb').write(response.content)
return antlr_file, tmp_folder
antlr_url = 'https://www.antlr.org/download/antlr-4.7.2-complete.jar'
antlr_file, tmp_folder = download_antlr(antlr_url, r'./tmp')
# grammar_url = 'https://raw.github.com/antlr/grammars-v4/master/json/JSON.g4'
parser_grammar_url = 'https://raw.github.com/antlr/grammars-v4/master/csharp/CSharpParser.g4'
_, parser_grammar_base_file = os.path.split(parser_grammar_url)
parser_grammar_file = os.path.join(os.getcwd(), r'tmp', parser_grammar_base_file)
response = requests.get(parser_grammar_url, auth=('my username', 'my password'))
open(parser_grammar_file, 'wb').write(response.content)
lexer_grammar_url = 'https://raw.github.com/antlr/grammars-v4/master/csharp/CSharpLexer.g4'
_, lexer_grammar_base_file = os.path.split(lexer_grammar_url)
lexer_grammar_file = os.path.join(os.getcwd(), r'tmp', lexer_grammar_base_file)
response = requests.get(lexer_grammar_url, auth=('my username', 'my password'))
open(lexer_grammar_file, 'wb').write(response.content)
generation_folder = os.path.join(tmp_folder, "generation")
parser_grammar_name = os.path.splitext(parser_grammar_base_file)[0]
lexer_base_file = parser_grammar_name + "Lexer" + ".py"
listener_base_file = parser_grammar_name + "Listener" + ".py"
parser_base_file = parser_grammar_name + "Parser" + ".py"
def call_antlr(jar_file, g4_file, language):
subprocess.call([
'java',
'-Xmx500M',
'-cp',
jar_file,
'org.antlr.v4.Tool',
'-visitor',
'-o',
generation_folder,
'-Dlanguage=' + language,
g4_file
])
def get_module_class(module_name, class_name):
module = importlib.import_module(module_name)
return getattr(module, class_name)
call_antlr(antlr_file, lexer_grammar_file, "Python3")
call_antlr(antlr_file, parser_grammar_file, "Python3")
# "tmp.generation"
lexer_class_name = "CSharp" + "Lexer"
parser_class_name = parser_grammar_name
listener_class_name = parser_grammar_name + "Listener"
visitor_class_name = parser_grammar_name + "Visitor"
lexer_class = get_module_class("tmp.generation." + lexer_class_name, lexer_class_name)
parser_class = get_module_class("tmp.generation." + parser_class_name, parser_class_name)
listener_class = get_module_class("tmp.generation." + listener_class_name, listener_class_name)
visitor_class = get_module_class("tmp.generation." + visitor_class_name, visitor_class_name)
The code works except for two things:
First, I got errors which seem to be actually warning (see this issue on GitHub):
error(134): /home/perret/Coding/Playground/Python/Playground/tmp/CSharpParser.g4:24:0: symbol type conflicts with generated code in target language or runtime
error(134): /home/perret/Coding/Playground/Python/Playground/tmp/CSharpParser.g4:71:7: symbol type conflicts with generated code in target language or runtime
error(134): /home/perret/Coding/Playground/Python/Playground/tmp/CSharpParser.g4:159:15: symbol type conflicts with generated code in target language or runtime
error(134): /home/perret/Coding/Playground/Python/Playground/tmp/CSharpParser.g4:188:23: symbol type conflicts with generated code in target language or runtime
error(134): /home/perret/Coding/Playground/Python/Playground/tmp/CSharpParser.g4:190:22: symbol type conflicts with generated code in target language or runtime
error(134): /home/perret/Coding/Playground/Python/Playground/tmp/CSharpParser.g4:289:23: symbol type conflicts with generated code in target language or runtime
error(134): /home/perret/Coding/Playground/Python/Playground/tmp/CSharpParser.g4:408:3: symbol type conflicts with generated code in target language or runtime
error(134): /home/perret/Coding/Playground/Python/Playground/tmp/CSharpParser.g4:422:9: symbol type conflicts with generated code in target language or runtime
error(134): /home/perret/Coding/Playground/Python/Playground/tmp/CSharpParser.g4:624:3: symbol type conflicts with generated code in target language or runtime
error(134): /home/perret/Coding/Playground/Python/Playground/tmp/CSharpParser.g4:656:3: symbol type conflicts with generated code in target language or runtime
error(134): /home/perret/Coding/Playground/Python/Playground/tmp/CSharpParser.g4:757:34: symbol type conflicts with generated code in target language or runtime
error(134): /home/perret/Coding/Playground/Python/Playground/tmp/CSharpParser.g4:833:7: symbol type conflicts with generated code in target language or runtime
error(134): /home/perret/Coding/Playground/Python/Playground/tmp/CSharpParser.g4:905:14: symbol type conflicts with generated code in target language or runtime
error(134): /home/perret/Coding/Playground/Python/Playground/tmp/CSharpParser.g4:1077:9: symbol type conflicts with generated code in target language or runtime
error(134): /home/perret/Coding/Playground/Python/Playground/tmp/CSharpParser.g4:1089:9: symbol type conflicts with generated code in target language or runtime
error(134): /home/perret/Coding/Playground/Python/Playground/tmp/CSharpParser.g4:1119:3: symbol type conflicts with generated code in target language or runtime
error(134): /home/perret/Coding/Playground/Python/Playground/tmp/CSharpParser.g4:179:8: symbol type conflicts with generated code in target language or runtime
error(134): /home/perret/Coding/Playground/Python/Playground/tmp/CSharpParser.g4:185:43: symbol type conflicts with generated code in target language or runtime
error(134): /home/perret/Coding/Playground/Python/Playground/tmp/CSharpParser.g4:780:38: symbol type conflicts with generated code in target language or runtime
error(134): /home/perret/Coding/Playground/Python/Playground/tmp/CSharpParser.g4:819:12: symbol type conflicts with generated code in target language or runtime
error(134): /home/perret/Coding/Playground/Python/Playground/tmp/CSharpParser.g4:824:11: symbol type conflicts with generated code in target language or runtime
error(134): /home/perret/Coding/Playground/Python/Playground/tmp/CSharpParser.g4:71:18: symbol type conflicts with generated code in target language or runtime
error(134): /home/perret/Coding/Playground/Python/Playground/tmp/CSharpParser.g4:80:48: symbol type conflicts with generated code in target language or runtime
error(134): /home/perret/Coding/Playground/Python/Playground/tmp/CSharpParser.g4:135:81: symbol type conflicts with generated code in target language or runtime
error(134): /home/perret/Coding/Playground/Python/Playground/tmp/CSharpParser.g4:306:8: symbol type conflicts with generated code in target language or runtime
error(134): /home/perret/Coding/Playground/Python/Playground/tmp/CSharpParser.g4:330:8: symbol type conflicts with generated code in target language or runtime
error(134): /home/perret/Coding/Playground/Python/Playground/tmp/CSharpParser.g4:338:14: symbol dir conflicts with generated code in target language or runtime
Second, the python module for the lexer cannot be fetched at line 75:
lexer_class = get_module_class("tmp.generation." + lexer_class_name, lexer_class_name)
The error shows:
Traceback (most recent call last):
File "/home/perret/Coding/Playground/Python/Playground/main.py", line 75, in <module>
lexer_class = get_module_class("tmp.generation." + lexer_class_name, lexer_class_name)
File "/home/perret/Coding/Playground/Python/Playground/main.py", line 62, in get_module_class
module = importlib.import_module(module_name)
File "/usr/lib/python3.7/importlib/__init__.py", line 127, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "<frozen importlib._bootstrap>", line 1006, in _gcd_import
File "<frozen importlib._bootstrap>", line 983, in _find_and_load
File "<frozen importlib._bootstrap>", line 967, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 677, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 724, in exec_module
File "<frozen importlib._bootstrap_external>", line 860, in get_code
File "<frozen importlib._bootstrap_external>", line 791, in source_to_code
File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
File "/home/perret/Coding/Playground/Python/Playground/tmp/generation/CSharpLexer.py", line 1412
private int interpolatedStringLevel;
^
When I checked CSharpLexer.py I was surprised to find that the lexer contains C# code in the middle of the Python class definition...:
# Rest of the code
def __init__(self, input=None, output:TextIO = sys.stdout):
super().__init__(input, output)
self.checkVersion("4.7.2")
self._interp = LexerATNSimulator(self, self.atn, self.decisionsToDFA, PredictionContextCache())
self._actions = None
self._predicates = None
private int interpolatedStringLevel;
private Stack<Boolean> interpolatedVerbatiums = new Stack<Boolean>();
private Stack<Integer> curlyLevels = new Stack<Integer>();
private boolean verbatium;
def action(self, localctx:RuleContext, ruleIndex:int, actionIndex:int):
if self._actions is None:
# Rest of code
I am wondering whether the g4 grammar provided at:
- https://raw.githubusercontent.com/antlr/grammars-v4/master/csharp/CSharpParser.g4
- https://raw.githubusercontent.com/antlr/grammars-v4/master/csharp/CSharpLexer.g4 is correct or am I making any mistake in my antlr automation with python 3?
I made my automation code worked with the json grammar (which contains only one file) so I am wondering if there is anything wrong the C# grammar.