Commit c13f63ea authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

Merge branch 'development' of https://gitlab.lrz.de/badw-it/DHParser into development

parents 4b75b24e 7f412297
......@@ -39,7 +39,7 @@ DHParser Version 0.9.4 (6.10.2020)
- language servers can now also be connected via streams, not only tcp
- Rudimentary Language Server example (for Visual Studio Code etc.) in
examples/EBNF added
- faster tree serialiazation with Node.as_sxpr or Node.as_xml
- faster tree serialization with Node.as_sxpr or Node.as_xml
DHParser Version 0.9.3 (23.6.2020)
......
......@@ -38,6 +38,7 @@ __all__ = ('ALLOWED_PRESET_VALUES',
'finalize_presets',
'get_preset_value',
'set_preset_value',
'NO_DEFAULT',
'THREAD_LOCALS',
'access_thread_locals',
'get_config_value',
......@@ -204,10 +205,12 @@ def access_thread_locals() -> Any:
return THREAD_LOCALS
def get_config_value(key: str) -> Any:
def get_config_value(key: str, default: Any = NO_DEFAULT) -> Any:
"""
Retrieves a configuration value thread-safely.
:param key: the key (an immutable, usually a string)
:param default: a default value that is returned if no config-value
exists for the key.
:return: the value
"""
with access_lock:
......@@ -221,7 +224,7 @@ def get_config_value(key: str) -> Any:
return cfg[key]
except KeyError:
access_presets()
value = get_preset_value(key)
value = get_preset_value(key, default)
finalize_presets()
THREAD_LOCALS.config[key] = value
return value
......
......@@ -49,7 +49,7 @@ from DHParser.log import CallItem, HistoryRecord
from DHParser.preprocess import BEGIN_TOKEN, END_TOKEN, RX_TOKEN_NAME
from DHParser.stringview import StringView, EMPTY_STRING_VIEW
from DHParser.syntaxtree import ChildrenType, Node, RootNode, WHITESPACE_PTYPE, \
TOKEN_PTYPE, ZOMBIE_TAG, EMPTY_NODE, ResultType
TOKEN_PTYPE, ZOMBIE_TAG, EMPTY_NODE, EMPTY_PTYPE, ResultType
from DHParser.toolkit import sane_parser_name, escape_ctrl_chars, re, cython, \
abbreviate_middle, RX_NEVER_MATCH, RxPatternType, linebreaks, line_col, identity
......@@ -499,10 +499,7 @@ class Parser:
`reset()`-method of the derived class."""
global _GRAMMAR_PLACEHOLDER
grammar = self._grammar
if is_grammar_placeholder(grammar):
self.visited: MemoizationDict = dict()
else:
self.visited = grammar.get_memoization_dict__(self)
self.visited: MemoizationDict = grammar.get_memoization_dict__(self)
@cython.locals(location=cython.int, gap=cython.int, i=cython.int)
def __call__(self: 'Parser', text: StringView) -> ParsingResult:
......@@ -664,10 +661,11 @@ class Parser:
@property
def grammar(self) -> 'Grammar':
try:
if not is_grammar_placeholder(self._grammar):
return self._grammar
else:
raise ValueError('Grammar has not yet been set!')
# if not is_grammar_placeholder(self._grammar):
# return self._grammar
# else:
# raise ValueError('Grammar has not yet been set!')
return self._grammar
except (AttributeError, NameError):
raise AttributeError('Parser placeholder does not have a grammar!')
......@@ -1489,10 +1487,13 @@ class Grammar:
parser.grammar = self
def get_memoization_dict__(self, parser: Parser):
def get_memoization_dict__(self, parser: Parser) -> MemoizationDict:
"""Returns the memoization dictionary for the parser's equivalence class.
"""
return self.memoization__.setdefault(parser.eq_class, {})
try:
return self.memoization__.setdefault(parser.eq_class, {})
except AttributeError: # happens when grammar object is the placeholder
return dict()
def __call__(self,
......@@ -1670,6 +1671,8 @@ class Grammar:
result.result = result.children + (error_node,)
else:
self.tree__.new_error(result, error_msg, error_code)
if result is EMPTY_NODE: # don't ever deal out the EMPTY_NODE singleton!
result = Node(EMPTY_PTYPE, '').with_pos(0)
self.tree__.swallow(result, document, source_mapping)
if not self.tree__.source: self.tree__.source = document
self.start_parser__ = None
......@@ -3930,7 +3933,7 @@ class Forward(UnaryParser):
result = self.parser(text)
self.recursion_counter[location] = depth # allow moving back and forth
else:
recursion_state = grammar.suspend_memoization__
memoization_state = grammar.suspend_memoization__
self.recursion_counter[location] = 0 # fail on the first recursion
grammar.suspend_memoization__ = False
result = self.parser(text)
......@@ -3967,9 +3970,9 @@ class Forward(UnaryParser):
break
result = next_result
depth += 1
grammar.suspend_memoization__ = recursion_state \
or location <= (grammar.last_rb__loc__ + int(text._len == result[1]._len))
# grammar.suspend_memoization__ = recursion_state
# grammar.suspend_memoization__ = recursion_state \
# or location <= (grammar.last_rb__loc__ + int(text._len == result[1]._len))
grammar.suspend_memoization__ = memoization_state
if not grammar.suspend_memoization__:
visited[location] = result
return result
......
......@@ -52,8 +52,31 @@ def run_grammar_tests(glob_pattern, get_grammar, get_transformer):
return error_report
def cpu_profile(func):
import cProfile as profile
import pstats
pr = profile.Profile()
pr.enable()
result = func()
pr.disable()
st = pstats.Stats(pr)
st.strip_dirs()
st.sort_stats('time').print_stats(40)
return result
if __name__ == '__main__':
argv = sys.argv[:]
try:
i = argv.index('--profile')
del argv[i]
access_presets()
set_preset_value('test_parallelization', False)
finalize_presets()
print("Profiling test run...")
profile = True
except ValueError:
profile = False
if len(argv) > 1 and sys.argv[1] == "--debug":
DEBUG = True
del argv[1]
......@@ -78,7 +101,11 @@ if __name__ == '__main__':
force=False)
sys.path.append('.')
from FixedEBNFParser import get_grammar, get_transformer
error_report = run_grammar_tests(arg, get_grammar, get_transformer)
if profile:
error_report = cpu_profile(
lambda : run_grammar_tests(arg, get_grammar, get_transformer))
else:
error_report = run_grammar_tests(arg, get_grammar, get_transformer)
if error_report:
print('\n')
print(error_report)
......
......@@ -88,13 +88,13 @@ def preprocess_new(source):
#
#######################################################################
class EBNFGrammar(Grammar):
r"""Parser for an EBNF source file.
class FlexibleEBNFGrammar(Grammar):
r"""Parser for a FlexibleEBNF source file.
"""
countable = Forward()
element = Forward()
expression = Forward()
source_hash__ = "039bffeb637f4cf2eca83dd83477b83a"
source_hash__ = "431992357f565327257002ab0af2018a"
disposable__ = re.compile('component$|pure_elem$|countable$|FOLLOW_UP$|SYM_REGEX$|ANY_SUFFIX$|EOF$')
static_analysis_pending__ = [] # type: List[bool]
parser_initialization__ = ["upon instantiation"]
......@@ -153,7 +153,7 @@ class EBNFGrammar(Grammar):
sequence = Series(Option(Series(Text("§"), dwsp__)), Alternative(interleave, lookaround), ZeroOrMore(Series(Retrieve(AND), dwsp__, Option(Series(Text("§"), dwsp__)), Alternative(interleave, lookaround))))
FOLLOW_UP = Alternative(Text("@"), symbol, EOF)
definition = Series(symbol, Retrieve(DEF), dwsp__, Option(Series(Retrieve(OR), dwsp__)), expression, Retrieve(ENDL), dwsp__, Lookahead(FOLLOW_UP), mandatory=1)
component = Alternative(regexp, literals, procedure, Series(symbol, NegativeLookahead(DEF)))
component = Alternative(literals, procedure, expression)
directive = Series(Series(Text("@"), dwsp__), symbol, Series(Text("="), dwsp__), component, ZeroOrMore(Series(Series(Text(","), dwsp__), component)), Lookahead(FOLLOW_UP), mandatory=1)
element.set(Alternative(Series(Option(retrieveop), symbol, NegativeLookahead(Retrieve(DEF))), literal, plaintext, regexp, char_range, Series(character, dwsp__), any_char, whitespace, group))
countable.set(Alternative(option, oneormore, element))
......@@ -164,9 +164,9 @@ class EBNFGrammar(Grammar):
root__ = syntax
_raw_grammar = ThreadLocalSingletonFactory(EBNFGrammar, ident=1)
_raw_grammar = ThreadLocalSingletonFactory(FlexibleEBNFGrammar, ident=1)
def get_grammar() -> EBNFGrammar:
def get_grammar() -> FlexibleEBNFGrammar:
grammar = _raw_grammar()
if get_config_value('resume_notices'):
resume_notices_on(grammar)
......@@ -179,7 +179,7 @@ def get_grammar() -> EBNFGrammar:
pass
return grammar
def parse_EBNF(document, start_parser = "root_parser__", *, complete_match=True):
def parse_FlexibleEBNF(document, start_parser = "root_parser__", *, complete_match=True):
return get_grammar()(document, start_parser, complete_match)
......
......@@ -16,7 +16,8 @@ if dhparserdir not in sys.path:
sys.path.append(dhparserdir)
try:
from DHParser.configuration import get_config_value, set_config_value
from DHParser.configuration import get_config_value, set_config_value, \
access_presets, set_preset_value, finalize_presets
from DHParser import dsl
import DHParser.log
from DHParser import testing
......@@ -57,9 +58,32 @@ def run_grammar_tests(glob_pattern, get_grammar, get_transformer):
return error_report
def cpu_profile(func):
import cProfile as profile
import pstats
pr = profile.Profile()
pr.enable()
result = func()
pr.disable()
st = pstats.Stats(pr)
st.strip_dirs()
st.sort_stats('time').print_stats(80)
return result
if __name__ == '__main__':
argv = sys.argv[:]
if len(argv) > 1 and sys.argv[1] == "--debug":
try:
i = argv.index('--profile')
del argv[i]
access_presets()
set_preset_value('test_parallelization', False)
finalize_presets()
print("Profiling test run...")
profile = True
except ValueError:
profile = False
if len(argv) > 1 and argv[1] == "--debug":
LOGGING = True
del argv[1]
if (len(argv) >= 2 and (argv[1].endswith('.ebnf') or
......@@ -77,7 +101,11 @@ if __name__ == '__main__':
force=False)
sys.path.append('.')
from FlexibleEBNFParser import get_grammar, get_transformer
error_report = run_grammar_tests(arg, get_grammar, get_transformer)
if profile:
error_report = cpu_profile(
lambda : run_grammar_tests(arg, get_grammar, get_transformer))
else:
error_report = run_grammar_tests(arg, get_grammar, get_transformer)
if error_report:
print('\n')
print(error_report)
......
[match:_array_ellipsis]
M1: '''{ line: 2, startChar: 5, length: 3, tokenType: "property",
M1*: '''{ line: 2, startChar: 5, length: 3, tokenType: "property",
tokenModifiers: ["private", "static"]
},
{ line: 2, startChar: 10, length: 4, tokenType: "type", tokenModifiers: [] },
......
......@@ -10,4 +10,10 @@ M4: """export const EOL: string[] = ['\n', '\r\n', '\r'];"""
[fail:const]
[match:assignment]
M1: """textDocument.codeAction.resolveSupport = { properties: ['edit'] };"""
\ No newline at end of file
M1: """textDocument.codeAction.resolveSupport = { properties: ['edit'] };"""
[match:declaration]
M1: "zahl: integer"
[ast:declaration]
M1: (declaration (identifier "zahl") (basic_type "integer"))
......@@ -50,8 +50,9 @@ declaration = [qualifier] identifier [optional] [":" types]
optional = "?"
index_signature = "[" identifier (":" | "in" "keyof") _type "]"
types = _type { "|" _type }
_type = array_of | basic_type | identifier | "(" types ")"
_type = array_of | basic_type | type_name | "(" types ")"
| mapped_type | declarations_block | type_tuple | _literal
type_name = identifier
array_of = (basic_type | "(" types ")" | identifier) "[]"
type_tuple = "[" _type {"," _type} "]"
mapped_type = "{" map_signature [";"] "}"
......@@ -111,7 +112,7 @@ _name = identifier | '"' identifier '"'
#######################################################################
basic_type = (`object` | `array` | `string` | `number` | `boolean` | `null`
`integer` | `uinteger` )~
| `integer` | `uinteger` ) ~
#######################################################################
......
......@@ -99,7 +99,7 @@ class ts2dataclassGrammar(Grammar):
declarations_block = Forward()
index_signature = Forward()
types = Forward()
source_hash__ = "b6b4d2e92e13a9911485fd79e02d8cac"
source_hash__ = "13dbdf4a7375250d71c167c8e49c5dc6"
disposable__ = re.compile('INT$|NEG$|FRAC$|DOT$|EXP$|EOF$|_type$|_literal$|_name$|_array_ellipsis$|_top_level_assignment$|_top_level_literal$')
static_analysis_pending__ = [] # type: List[bool]
parser_initialization__ = ["upon instantiation"]
......@@ -117,7 +117,7 @@ class ts2dataclassGrammar(Grammar):
INT = Series(Option(NEG), Alternative(RegExp('[1-9][0-9]+'), RegExp('[0-9]')))
identifier = Series(RegExp('(?!\\d)\\w+'), dwsp__)
variable = Series(identifier, ZeroOrMore(Series(Text("."), identifier)))
basic_type = Series(Alternative(Text("object"), Text("array"), Text("string"), Text("number"), Text("boolean"), Series(Text("null"), Text("integer")), Text("uinteger")), dwsp__)
basic_type = Series(Alternative(Text("object"), Text("array"), Text("string"), Text("number"), Text("boolean"), Text("null"), Text("integer"), Text("uinteger")), dwsp__)
_name = Alternative(identifier, Series(Series(Drop(Text('"')), dwsp__), identifier, Series(Drop(Text('"')), dwsp__)))
association = Series(_name, Series(Drop(Text(":")), dwsp__), _literal)
object = Series(Series(Drop(Text("{")), dwsp__), Option(Series(association, ZeroOrMore(Series(Series(Drop(Text(",")), dwsp__), association)))), Series(Drop(Text("}")), dwsp__))
......@@ -137,13 +137,14 @@ class ts2dataclassGrammar(Grammar):
mapped_type = Series(Series(Drop(Text("{")), dwsp__), map_signature, Option(Series(Drop(Text(";")), dwsp__)), Series(Drop(Text("}")), dwsp__))
type_tuple = Series(Series(Drop(Text("[")), dwsp__), _type, ZeroOrMore(Series(Series(Drop(Text(",")), dwsp__), _type)), Series(Drop(Text("]")), dwsp__))
array_of = Series(Alternative(basic_type, Series(Series(Drop(Text("(")), dwsp__), types, Series(Drop(Text(")")), dwsp__)), identifier), Series(Drop(Text("[]")), dwsp__))
type_name = Synonym(identifier)
extends = Series(Series(Drop(Text("extends")), dwsp__), identifier, ZeroOrMore(Series(Series(Drop(Text(",")), dwsp__), identifier)))
type_alias = Series(Option(Series(Drop(Text("export")), dwsp__)), Series(Drop(Text("type")), dwsp__), identifier, Series(Drop(Text("=")), dwsp__), types, Series(Drop(Text(";")), dwsp__), mandatory=2)
interface = Series(Option(Series(Drop(Text("export")), dwsp__)), Series(Drop(Text("interface")), dwsp__), identifier, Option(type_parameter), Option(extends), declarations_block, mandatory=2)
optional = Series(Text("?"), dwsp__)
qualifier = Series(Text("readonly"), dwsp__)
_literal.set(Alternative(number, string, array, object))
_type.set(Alternative(array_of, basic_type, identifier, Series(Series(Drop(Text("(")), dwsp__), types, Series(Drop(Text(")")), dwsp__)), mapped_type, declarations_block, type_tuple, _literal))
_type.set(Alternative(array_of, basic_type, type_name, Series(Series(Drop(Text("(")), dwsp__), types, Series(Drop(Text(")")), dwsp__)), mapped_type, declarations_block, type_tuple, _literal))
types.set(Series(_type, ZeroOrMore(Series(Series(Drop(Text("|")), dwsp__), _type))))
index_signature.set(Series(Series(Drop(Text("[")), dwsp__), identifier, Alternative(Series(Drop(Text(":")), dwsp__), Series(Series(Drop(Text("in")), dwsp__), Series(Drop(Text("keyof")), dwsp__))), _type, Series(Drop(Text("]")), dwsp__)))
declaration.set(Series(Option(qualifier), identifier, Option(optional), Option(Series(Series(Drop(Text(":")), dwsp__), types))))
......@@ -181,6 +182,7 @@ ts2dataclass_AST_transformation_table = {
# AST Transformations for the ts2dataclass-grammar
# "<": flatten,
"types": [replace_by_single_child],
"type_name": [reduce_single_child],
":Text": change_tag_name('TEXT')
# "*": replace_by_single_child
}
......@@ -212,19 +214,43 @@ class ts2dataclassCompiler(Compiler):
def reset(self):
super().reset()
self.PythonEnums = get_config_value('ts2dataclass.PythonEnums', False)
# initialize your variables here, not in the constructor!
def on_document(self, node):
return node
return self.compile(node)
def on_interface(self, node):
return node
name = self.compile(node['identifier'])
try:
tp = self.compile(node['type_parameter'])
preface = f"{tp} = TypeVar('{tp}')"
except KeyError:
tp = ''
preface = ''
try:
base_classes = self.compile(node['extends'])
if tp:
base_classes += f", Generic[{tp}]"
except KeyError:
base_classes = f"Generic[{tp}]" if tp else ''
if base_classes:
interface = f"class {name}({base_classes}):"
else:
interface = f"class {name}:"
decls = self.compile(node['declarations_block'])
return interface + '\n ' + decls.replace('\n', '\n ')
def on_type_parameter(self, node):
return self.compile(node['identifier'])
# def on_type_parameter(self, node):
# return node
def on_extends(self, node):
return ', '.join(self.compile(nd) for nd in node.children)
# def on_type_alias(self, node):
# return node
def on_type_alias(self, node):
alias = self.compile(node['identifier'])
types = self.compile(node[-1])
return f"{alias} = {types}"
def on_declarations_block(self, node):
declarations = '\n'.join(self.compile(nd) for nd in node
......@@ -241,13 +267,16 @@ class ts2dataclassCompiler(Compiler):
if 'optional' in node:
T = f"Optional[{T}]"
identifier = self.compile(node['identifier'])
return f"{identifier}: {T}"
if T == 'Any':
return identifier
else:
return f"{identifier}: {T}"
# def on_optional(self, node):
# return node
def on_optional(self, node):
assert False, "This method should never have been called!"
def on_index_signature(self, node) -> str:
return node['type'].content
return node[-1].content
def on_types(self, node):
if sys.version_info >= (3, 10) and USE_PYTHON_3_10_TYPE_UNION:
......@@ -262,8 +291,8 @@ class ts2dataclassCompiler(Compiler):
# 'number', 'string', 'array', 'object') ?
return self.compile(node)
# def on_type_tuple(self, node):
# return node
def on_type_tuple(self, node):
assert False, "Not yet implemented"
def on_mapped_type(self, node) -> str:
return cast(str, self.compile(node['map_signature']))
......@@ -272,25 +301,41 @@ class ts2dataclassCompiler(Compiler):
return "Dict[%s, %s]" % (self.compile(node['index_signature']),
self.compile(node['types']))
# def on_namespace(self, node):
# return node
# def on_enum(self, node):
# return node
# def on_item(self, node):
# return node
def on_namespace(self, node):
name = self.compile(node['identifier'])
namespace = [f'class {name}:']
for i in node.indices('const'):
namespace.append(self.compile(node[i]))
return '\n '.join(namespace)
def on_enum(self, node):
i = node.index('identifier')
base_class = '(enum.Enum)' if self.PythonEnums else ''
enum = ['class ' + self.compile(node[i]) + base_class + ':']
for item in node[i + 1:]:
enum.append(self.compile(item))
return '\n '.join(enum)
def on_item(self, node):
if len(node.children) == 1:
identifier = self.compile(node[0])
if self.PythonEnums:
return identifier + ' = enum.auto()'
else:
return identifier + ' = ' + repr(identifier)
else:
return self.compile(node[0]) + ' = ' + self.any_literal(node[1])
# def on_const(self, node):
# return node
def on_const(self, node):
self.compile(node['declaration'])
return self.compile(node['declaration']) + ' = ' + self.compile(node[-1])
def on_assignment(self, node) -> str:
return node[0].content + ' = ' + self.any_literal(node[1])
def any_literal(self, node) -> str:
nd = node[0] if node.children else node
assert nd.tag_name in ('string', 'number', 'array', 'object')
return self.compile(nd)
assert node.tag_name in ('string', 'number', 'array', 'object')
return self.compile(node)
def on_number(self, node) -> str:
return node.content
......@@ -304,8 +349,8 @@ class ts2dataclassCompiler(Compiler):
']'
def on_object(self, node) -> str:
return '{\n' + \
',\n'.join(self.compile(nd) for nd in node.children) + \
return '{\n ' + \
',\n '.join(self.compile(nd) for nd in node.children) + \
'\n}'
def on_association(self, node) -> str:
......@@ -322,36 +367,26 @@ class ts2dataclassCompiler(Compiler):
'null': 'None'}
return python_basic_types[node.content]
# def on_array_marker(self, node):
# return node
# def on_qualifier(self, node):
# return node
def on_identifier(self, node) -> str:
def on_type_name(self, node) -> str:
return node.content
# def on_INT(self, node):
# return node
def on_array_of(self, node) -> str:
assert len(node.children) == 1
return 'List[' + self.compile(node[0]) + ']'
# def on_NEG(self, node):
# return node
def on_qualifier(self, node):
assert False, "Qualifiers should be ignored and this method never be called!"
# def on_FRAC(self, node):
# return node
# def on_DOT(self, node):
# return node
# def on_EXP(self, node):
# return node
def on_variable(self, node) -> str:
return node.content
# def on_EOF(self, node):
# return node
def on_identifier(self, node) -> str:
return node.content
get_compiler = ThreadLocalSingletonFactory(ts2dataclassCompiler, ident=1)
def compile_ts2dataclass(ast):
return get_compiler()(ast)
......@@ -446,14 +481,21 @@ def batch_process(file_names: List[str], out_dir: str,
INSPECT_TEMPLATE = """<h2>{testname}</h2>
<h3>Test source</h3>
<div style="background-color: cornsilk;">
<code style="white-space: pre-wrap;">{test_source}
</code>
</div>
<h3>AST</h3>
<code style="background-color: lightgrey;">
{ast_str}
<div style="background-color: antiquewhite;">
<code style="white-space: pre-wrap;">{ast_str}
</code>
</div>
<h3>Program code</h3>
<code style="background-color: yellow;">
{code}
<div style="background-color: yellow;">
<code style="white-space: pre-wrap;">{code}
</code>
</div>
"""
......@@ -466,20 +508,20 @@ def inspect(test_file_path: str):
compiler = get_compiler()
results = []
for parser in test_unit:
for testname, test_code in test_unit[parser].get('match', dict()).items():
ast = grammar(test_code, parser)
ast_str = ast.as_tree()
for testname, test_source in test_unit[parser].get('match', dict()).items():
ast = grammar(test_source, parser)
transformer(ast)
ast_str = ast.as_tree()
code = compiler(ast)
results.append(INSPECT_TEMPLATE.format(testname=testname, ast_str=ast_str, code=code))
results.append(INSPECT_TEMPLATE.format(
testname=testname, test_source=test_source, ast_str=ast_str, code=code))
test_file_name = os.path.basename(test_file_path)
results_str = '\n '.join(results)
html = f'''<html>
html = f'''<!DOCTYPE html>\n<html>
<head><meta charset="utf-8"><title>{test_file_name}</title></head>