In January 2021 we will introduce a 10 GB quota for project repositories. Higher limits for individual projects will be available on request. Please see https://doku.lrz.de/display/PUBLIC/GitLab for more information.

Commit b00c2671 authored by eckhart's avatar eckhart

ebnf.py: added a fast-path ebnf-Parser if less flexibility regarding the variants

of EBNF-syntax is acceptable.
parent 157bd90e
......@@ -336,33 +336,59 @@ CONFIG_PRESET['default_literalws'] = "none"
# Default value for the brand of EBNF that DHParser accepts
# 'fixed' - Allows to use suffix syntax (?, +, *) as well as classic
# EBNF-syntax ([], {}). The delimiters are fixed before first use to
# the DHParser-standard and will not be read from configuration-value
# "delimiter_set".
# 'classic' - relatively closest to the ISO-standard, i.e. uses [] and {}
# for optional and zero or more elements, respectively. Does not allow
# the ?, +, * suffixes. Allows the specification of character-ranges
# within square brackets only with the ordinal unicode numbers,
# not with the characters itself, i.e. [0x41-0x5A]
# 'regex-like' - similar to regular expression syntax, allows ?, +, *
# suffixes for optional, one or more repetitions, zero or more
# repetitions, but not {} or []. Allows character-ranges within
# square bracket in any form.
# 'peg-like' - like regex-like, but uses / instead of | for the
# alternative-parser. Does not allow regular expressions between, i.e.
# / ... / within the EBNF-code!
# not with the characters itself, i.e. [0x41-0x5A]. Delimiters will
# be configured on first use.
# 'strict' - allows both classic and regex-like syntax to be mixed, but
# allows character ranges within square brackets with ordinal values,
# only. Uses | as delimiter for alternatives.
# 'configurable' - like fixed, but the delimiter constants will be configured
# from the configuration-value 'delimiter_set' (see below).
# 'heuristic' - the most liberal mode, allows about everything. However,
# because it employs heuristics to distinguish ambiguous cases, it
# may lead to unexpected errors and require the user to resolve the
# ambiguities
# 'regex-like' - similar to regular expression syntax, allows ?, +, *
# suffixes for optional, one or more repetitions, zero or more
# repetitions, but not {} or []. Allows character-ranges within
# square bracket in any form.
# 'peg-like' - like regex-like, but uses / instead of | for the
# alternative-parser. Does not allow regular expressions between, i.e.
# / ... / within the EBNF-code!
# Default value: "fixed"
EBNF_FIXED_SYNTAX = "fixed"
EBNF_CLASSIC_SYNTAX = "classic"
EBNF_ANY_SYNTAX_STRICT = "strict"
EBNF_CONFIGURABLE_SYNTAX = "configurable"
EBNF_ANY_SYNTAX_HEURISTICAL = "heuristic"
EBNF_REGULAR_EXPRESSION_SYNTAX = "regex-like"
EBNF_PARSING_EXPRESSION_GRAMMAR_SYNTAX = "peg-like"
CONFIG_PRESET['syntax_variant'] = EBNF_ANY_SYNTAX_STRICT
CONFIG_PRESET['syntax_variant'] = EBNF_FIXED_SYNTAX
# Set of delimiters when using the 'configurable'-Grammar
CONFIG_PRESET['delimiter_set'] = {
'DEF': '=',
'OR': '|',
'AND': '',
'ENDL': '',
'RNG_OPEN': '{',
'RNG_CLOSE': '}',
'RNG_DELIM': ',',
'TIMES': '*',
'RE_LEADIN': '/',
'RE_LEADOUT': '/',
'CH_LEADIN': '0x'
}
########################################################################
......
This diff is collapsed.
......@@ -57,7 +57,6 @@ __all__ = ('ParserError',
'FlagFunc',
'ParseFunc',
'Parser',
'UnknownParserError',
'AnalysisError',
'GrammarError',
'Grammar',
......@@ -68,6 +67,7 @@ __all__ = ('ParserError',
'Text',
'DropText',
'RegExp',
'update_scanner',
'RE',
'TKN',
'Whitespace',
......@@ -777,12 +777,6 @@ def mixin_nonempty(whitespace: str) -> str:
return whitespace
class UnknownParserError(KeyError):
"""UnknownParserError is raised if a Grammar object is called with a
parser that does not exist or if in the course of parsing a parser
is referred to that does not exist."""
AnalysisError = Tuple[str, Parser, Error] # pname, parser, error
# TODO: replace with a named tuple?
......@@ -1125,7 +1119,14 @@ class Grammar:
return duplicate
def __init__(self, root: Parser = None) -> None:
def __init__(self, root: Parser = None, static_analysis: Optional[bool] = None) -> None:
"""Constructor of class Grammar.
:param root: Overrides default root parser. By default the root parser
is the parser assigned to the class field `root__`. This is useful for
executing or testing certain parts of a complex parser ensemble.
:param static_analysis: If not None, this overrides the config value
"static_analysis".
"""
self.all_parsers__ = set() # type: Set[Parser]
# add compiled regular expression for comments, if it does not already exist
if not hasattr(self, 'comment_rx__') or self.comment_rx__ is None:
......@@ -1171,9 +1172,10 @@ class Grammar:
assert 'root_parser__' in self.__dict__
assert self.root_parser__ == self.__dict__['root_parser__']
if self.static_analysis_pending__ \
and get_config_value('static_analysis') in {'early', 'late'}:
# try:
if (self.static_analysis_pending__
and (static_analysis
or (static_analysis is None
and get_config_value('static_analysis') in {'early', 'late'}))):
result = self.static_analysis()
# clears any stored errors without overwriting the pointer
while self.static_analysis_errors__:
......@@ -1183,8 +1185,6 @@ class Grammar:
if has_errors:
raise GrammarError(result)
self.static_analysis_pending__.pop()
# except (NameError, AttributeError) as e:
# pass # don't fail the initialization of PLACEHOLDER
def __str__(self):
return self.__class__.__name__
......@@ -1202,7 +1202,7 @@ class Grammar:
parser.apply(self._add_parser__)
assert self[key] == parser
return self[key]
raise UnknownParserError('Unknown parser "%s" !' % key)
raise AttributeError('Unknown parser "%s" !' % key)
def __contains__(self, key):
......@@ -1815,6 +1815,35 @@ class Whitespace(RegExp):
return '~'
def update_scanner(grammar: Grammar, leaf_parsers: Dict[str, str]):
"""Updates the "scanner" of a grammar by overwriting the `text` or
`regex`-fields of some of or all of its leaf parsers with new values.
This works, of course, only for those parsers that are assigned
to a symbol in the Grammar class.
:param grammar: The grammar-object for which the leaf parsers
shall be updated.
:param leaf_parsers: A mapping of parser names to strings that
are interpreted as plain text (if the parser name refers to
a `Text`-parser or as regular expressions, if the parser name
refers to a `RegExp`-parser
:raises AttributeError: in case a leaf parser name in the
dictionary does not exist or does not refer to a `Text`
or `RegExp`-parser.
"""
for pname, t in leaf_parsers.items():
parser = grammar[pname]
if isinstance(parser, Text):
assert isinstance(t, str)
cast(Text, parser).text = t
elif isinstance(parser, RegExp):
cast(RegExp, parser).regexp = re.compile(t) if isinstance(t, str) else t
else:
raise AttributeError('Parser %s is not a Text- oder RegExp-Parser, but %s'
% (pname, type(parser)))
########################################################################
#
# Meta parser classes, i.e. parsers that contain other parsers
......
......@@ -38,7 +38,7 @@ templatedir = os.path.join(os.path.dirname(scriptdir.rstrip('/')), 'templates')
from DHParser.compile import compile_source
from DHParser.configuration import access_presets, finalize_presets, \
EBNF_ANY_SYNTAX_HEURISTICAL
EBNF_ANY_SYNTAX_HEURISTICAL, EBNF_ANY_SYNTAX_STRICT, EBNF_FIXED_SYNTAX
from DHParser.dsl import compileDSL, compile_on_disk
from DHParser.error import is_error
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
......
......@@ -41,7 +41,7 @@ from DHParser.configuration import get_config_value
from DHParser.error import Error, is_error, adjust_error_locations, PARSER_LOOKAHEAD_MATCH_ONLY, \
PARSER_LOOKAHEAD_FAILURE_ONLY, MANDATORY_CONTINUATION_AT_EOF, AUTORETRIEVED_SYMBOL_NOT_CLEARED
from DHParser.log import is_logging, clear_logs, local_log_dir, log_parsing_history
from DHParser.parse import UnknownParserError, Lookahead
from DHParser.parse import Lookahead
from DHParser.syntaxtree import Node, RootNode, parse_tree, flatten_sxpr, ZOMBIE_TAG
from DHParser.trace import set_tracer, all_descendants, trace_history
from DHParser.transform import traverse, remove_children
......@@ -383,7 +383,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report='REPORT'
if has_lookahead(parser_name):
set_tracer(all_descendants(parser[parser_name]), trace_history)
track_history = True
except UnknownParserError:
except AttributeError:
pass
assert parser_name, "Missing parser name in test %s!" % unit_name
......@@ -416,7 +416,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report='REPORT'
errflag = len(errata)
try:
cst = parser(test_code, parser_name)
except UnknownParserError as upe:
except AttributeError as upe:
cst = RootNode()
cst = cst.new_error(Node(ZOMBIE_TAG, "").with_pos(0), str(upe))
clean_test_name = str(test_name).replace('*', '')
......@@ -497,7 +497,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report='REPORT'
errflag = len(errata)
try:
cst = parser(test_code, parser_name)
except UnknownParserError as upe:
except AttributeError as upe:
node = Node(ZOMBIE_TAG, "").with_pos(0)
cst = RootNode(node).new_error(node, str(upe))
errata.append('Unknown parser "{}" in fail test "{}"!'.format(
......
......@@ -17,7 +17,7 @@
# - replace the regex_heuristics by an always matching parser
#
# Ambiguities can also be avoided by NOT using all the syntactic variants
# made possible by this EBNF-grammar within one and the same EBNF-docum
# made possible by this EBNF-grammar within one and the same EBNF-document
@ comment = /(?!#x[A-Fa-f0-9])#.*(?:\n|$)|\/\*(?:.|\n)*?\*\/|\(\*(?:.|\n)*?\*\)/
# comments can be either C-Style: /* ... */
......
......@@ -76,7 +76,7 @@ class EBNFGrammar(Grammar):
countable = Forward()
element = Forward()
expression = Forward()
source_hash__ = "09b88d557e08f59db56613dadff966e3"
source_hash__ = "94480ce7a73ec2c5f878ecb207b43073"
anonymous__ = re.compile('pure_elem$|countable$|FOLLOW_UP$|SYM_REGEX$|ANY_SUFFIX$|EOF$')
static_analysis_pending__ = [] # type: List[bool]
parser_initialization__ = ["upon instantiation"]
......
# EBNF-Grammar in EBNF
# This is faster version of EBNF relying on fixed constants for delimiters,
# rather than variables that are captured on first use as in "EBNF.ebnf".
# Different syntactical variants are not detected by the grammar itself,
# but need to be configured either by adjusting the definitions of DEF, OR,
# AND, ENDL, RNG_OPEN, RNG_CLOSE, RNG_DELIM, CH_LEADIN, TIMES, RE_LEADIN,
# RE_LEADOUT either within this grammar definition or in the Grammar-object
# changing the `text`-field of the respective parser objects.
@ comment = /(?!#x[A-Fa-f0-9])#.*(?:\n|$)|\/\*(?:.|\n)*?\*\/|\(\*(?:.|\n)*?\*\)/
# comments can be either C-Style: /* ... */
# or pascal/modula/oberon-style: (* ... *)
# or python-style: # ... \n, excluding, however, character markers: #x20
@ whitespace = /\s*/ # whitespace includes linefeed
@ literalws = right # trailing whitespace of literals will be ignored tacitly
@ anonymous = pure_elem, countable, FOLLOW_UP, SYM_REGEX, ANY_SUFFIX, EOF
@ drop = whitespace, EOF # do not include these even in the concrete syntax tree
@ RNG_BRACE_filter = matching_bracket() # filter or transform content of RNG_BRACE on retrieve
# re-entry-rules for resuming after parsing-error
@ definition_resume = /\n\s*(?=@|\w+\w*\s*=)/
@ directive_resume = /\n\s*(?=@|\w+\w*\s*=)/
# specialized error messages for certain cases
@ definition_error = /,/, 'Delimiter "," not expected in definition!\nEither this was meant to '
'be a directive and the directive symbol @ is missing\nor the error is '
'due to inconsistent use of the comma as a delimiter\nfor the elements '
'of a sequence.'
#: top-level
syntax = ~ { definition | directive } EOF
definition = symbol §DEF~ [ OR~ ] expression ENDL~ & FOLLOW_UP # [OR~] to support v. Rossum's syntax
directive = "@" §symbol "=" (regexp | literals | procedure | symbol !DEF)
{ "," (regexp | literals | procedure | symbol !DEF) } & FOLLOW_UP
literals = { literal }+ # string chaining, only allowed in directives!
procedure = SYM_REGEX "()" # procedure name, only allowed in directives!
FOLLOW_UP = `@` | symbol | EOF
#: components
expression = sequence { OR~ sequence }
sequence = ["§"] ( interleave | lookaround ) # "§" means all following terms mandatory
{ AND~ ["§"] ( interleave | lookaround ) }
interleave = difference { "°" ["§"] difference }
lookaround = flowmarker § (oneormore | pure_elem)
difference = term ["-" § (oneormore | pure_elem)]
term = oneormore | counted | repetition | option | pure_elem
#: elements
countable = option | oneormore | element
pure_elem = element § !ANY_SUFFIX # element strictly without a suffix
element = [retrieveop] symbol !DEF # negative lookahead to be sure it's not a definition
| literal
| plaintext
| regexp
# | char_range
| character ~
| any_char
| whitespace
| group
ANY_SUFFIX = /[?*+]/
#: flow-operators
flowmarker = "!" | "&" # '!' negative lookahead, '&' positive lookahead
| "<-!" | "<-&" # '<-!' negative lookbehind, '<-&' positive lookbehind
retrieveop = "::" | ":?" | ":" # '::' pop, ':?' optional pop, ':' retrieve
#: groups
group = "(" no_range §expression ")"
oneormore = "{" no_range expression "}+" | element "+"
repetition = "{" no_range §expression "}" | element "*" no_range
option = # !char_range
"[" §expression "]" | element "?"
counted = countable range | countable TIMES~ multiplier | multiplier TIMES~ §countable
range = RNG_OPEN~ multiplier [ RNG_DELIM~ multiplier ] RNG_CLOSE~
no_range = !multiplier | &multiplier TIMES
multiplier = /[1-9]\d*/~
#: leaf-elements
symbol = SYM_REGEX ~ # e.g. expression, term, parameter_list
literal = /"(?:(?<!\\)\\"|[^"])*?"/~ # e.g. "(", '+', 'while'
| /'(?:(?<!\\)\\'|[^'])*?'/~ # whitespace following literals will be ignored tacitly.
plaintext = /`(?:(?<!\\)\\`|[^`])*?`/~ # like literal but does not eat whitespace
| /´(?:(?<!\\)\\´|[^´])*?´/~
regexp = RE_LEADIN RE_CORE RE_LEADOUT ~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
# regexp = /\/(?:(?<!\\)\\(?:\/)|[^\/])*?\//~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
char_range = `[` &char_range_heuristics
[`^`] (character | free_char) { [`-`] character | free_char } "]"
character = CH_LEADIN HEXCODE
free_char = /[^\n\[\]\\]/ | /\\[nrt`´'"(){}\[\]\/\\]/
any_char = "."
whitespace = /~/~ # insignificant whitespace
#: delimiters
EOF = !/./
DEF = `=`
OR = `|`
AND = ``
ENDL = ``
RNG_OPEN = `{`
RNG_CLOSE = `}`
RNG_DELIM = `,`
TIMES = `*`
RE_LEADIN = `/`
RE_LEADOUT = `/`
CH_LEADIN = `0x`
#: heuristics
char_range_heuristics = ! ( /[\n\t ]/
| ~ literal_heuristics
| [`::`|`:?`|`:`] SYM_REGEX /\s*\]/ )
literal_heuristics = /~?\s*"(?:[\\]\]|[^\]]|[^\\]\[[^"]*)*"/
| /~?\s*'(?:[\\]\]|[^\]]|[^\\]\[[^']*)*'/
| /~?\s*`(?:[\\]\]|[^\]]|[^\\]\[[^`]*)*`/
| /~?\s*´(?:[\\]\]|[^\]]|[^\\]\[[^´]*)*´/
| /~?\s*\/(?:[\\]\]|[^\]]|[^\\]\[[^\/]*)*\//
regex_heuristics = /[^ ]/ | /[^\/\n*?+\\]*[*?+\\][^\/\n]\//
#: basic-regexes
RE_CORE = /(?:(?<!\\)\\(?:\/)|[^\/])*/ # core of a regular expression, i.e. the dots in /.../
SYM_REGEX = /(?!\d)\w+/ # regular expression for symbols
HEXCODE = /[A-Fa-f0-9]{1,8}/
......@@ -23,7 +23,7 @@ json = ~ _element _EOF
_element = object | array | string | number | bool | null
object = "{" member { "," §member } §"}"
member = string §":" _element
array = "[" [_element { "," _element }] "]"
array = "[" [ _element { "," _element } ] "]"
string = `"` §_CHARACTERS `"` ~
number = INT FRAC EXP ~
bool = /true/~ | /false/~ # use regexes so values are not dropped as tokens
......
This diff is collapsed.
......@@ -47,7 +47,7 @@ json = ~ _element _EOF
_element = object | array | string | number | bool | null
object = "{" member { _OBJECT_SEPARATOR §member } §"}"
member = string §":" _element
array = "[" [_element { _ARRAY_SEPARATOR §_element }] "]"
array = "[" [ _element { _ARRAY_SEPARATOR §_element } ] "]"
string = `"` §_CHARACTERS `"` ~
number = INT FRAC EXP ~
bool = /true/~ | /false/~ # use regexes so values are not dropped as tokens
......@@ -66,7 +66,7 @@ ESCAPE = /\\[\/bnrt\\]/ | UNICODE
UNICODE = "\u" HEX HEX
HEX = /[0-9a-fA-F][0-9a-fA-F]/
INT = [NEG] /[0-9]/ | /[1-9][0-9]+/
INT = [ NEG ] /[0-9]/ | /[1-9][0-9]+/
NEG = `-`
FRAC = [ DOT /[0-9]+/ ]
DOT = `.`
......@@ -86,7 +86,7 @@ _EOF = !/./ # no more characters ahead, end of file reached
# leave sequence, if '}' ahead, otherwise raise error on missing comma,
# but pass a comma if it exists
_OBJECT_SEPARATOR = !`}` §&`,` [","]
_OBJECT_SEPARATOR = !`}` §&`,` [ "," ]
@ _ARRAY_SEPARATOR_error = /(?!,)/, 'Missing separator ","'
......@@ -94,4 +94,4 @@ _OBJECT_SEPARATOR = !`}` §&`,` [","]
# leave sequence, if ']' ahead, otherwise raise error on missing comma,
# but pass a comma if it exists
_ARRAY_SEPARATOR = !`]` §&`,` [","]
_ARRAY_SEPARATOR = !`]` §&`,` [ "," ]
......@@ -12,32 +12,42 @@ from functools import partial
import os
import sys
if r'/home/eckhart/Entwicklung/DHParser' not in sys.path:
sys.path.append(r'/home/eckhart/Entwicklung/DHParser')
try:
scriptpath = os.path.dirname(__file__)
except NameError:
scriptpath = ''
dhparser_parentdir = os.path.abspath(os.path.join(scriptpath, r'../..'))
if scriptpath not in sys.path:
sys.path.append(scriptpath)
if dhparser_parentdir not in sys.path:
sys.path.append(dhparser_parentdir)
try:
import regex as re
except ImportError:
import re
from DHParser import start_logging, suspend_logging, resume_logging, is_filename, load_if_file, \
Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, \
Lookbehind, Lookahead, Alternative, Pop, Text, DropText, Synonym, AllOf, SomeOf, \
Unordered, Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \
Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, Drop, AnyChar, \
Lookbehind, Lookahead, Alternative, Pop, Text, Synonym, Counted, Interleave, INFINITE, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \
grammar_changed, last_value, matching_bracket, PreprocessorFunc, is_empty, remove_if, \
Node, TransformationFunc, TransformationDict, transformation_factory, traverse, \
remove_children_if, move_adjacent, normalize_whitespace, is_anonymous, matches_re, \
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
replace_by_children, remove_empty, remove_tokens, flatten, \
merge_adjacent, collapse, collapse_children_if, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \
remove_children, remove_content, remove_brackets, change_tag_name, remove_anonymous_tokens, \
keep_children, is_one_of, not_one_of, has_content, apply_if, \
replace_by_children, remove_empty, remove_tokens, flatten, all_of, any_of, \
merge_adjacent, collapse, collapse_children_if, transform_content, WHITESPACE_PTYPE, \
TOKEN_PTYPE, remove_children, remove_content, remove_brackets, change_tag_name, \
remove_anonymous_tokens, keep_children, is_one_of, not_one_of, has_content, apply_if, peek, \
remove_anonymous_empty, keep_nodes, traverse_locally, strip, lstrip, rstrip, \
replace_content, replace_content_by, forbid, assert_content, remove_infix_operator, \
error_on, recompile_grammar, left_associative, lean_left, set_config_value, \
get_config_value, XML_SERIALIZATION, SXPRESSION_SERIALIZATION, COMPACT_SERIALIZATION, \
JSON_SERIALIZATION, access_thread_locals, access_presets, finalize_presets, ErrorCode, \
RX_NEVER_MATCH
transform_content, replace_content_with, forbid, assert_content, remove_infix_operator, \
add_error, error_on, recompile_grammar, left_associative, lean_left, set_config_value, \
get_config_value, XML_SERIALIZATION, SXPRESSION_SERIALIZATION, node_maker, \
INDENTED_SERIALIZATION, JSON_SERIALIZATION, access_thread_locals, access_presets, \
finalize_presets, ErrorCode, RX_NEVER_MATCH, set_tracer, resume_notices_on, \
trace_history, has_descendant, neg, has_ancestor, optional_last_value, insert, \
positions_of, replace_tag_names, add_attributes, delimit_children, merge_connected, \
has_attr, has_parent
#######################################################################
......@@ -49,6 +59,7 @@ from DHParser import start_logging, suspend_logging, resume_logging, is_filename
def json_fail_tolerantPreprocessor(text):
return text, lambda i: i
def get_preprocessor() -> PreprocessorFunc:
return json_fail_tolerantPreprocessor
......@@ -63,7 +74,7 @@ class json_fail_tolerantGrammar(Grammar):
r"""Parser for a json_fail_tolerant source file.
"""
_element = Forward()
source_hash__ = "17f211513c84da0085bc9412da14c0a6"
source_hash__ = "8cfbcd6885f3e395ff1859ca9fc8f244"
anonymous__ = re.compile('..(?<=^)')
static_analysis_pending__ = [] # type: List[bool]
parser_initialization__ = ["upon instantiation"]
......@@ -161,11 +172,13 @@ json_fail_tolerant_AST_transformation_table = {
}
def Createjson_fail_tolerantTransformer() -> TransformationFunc:
"""Creates a transformation function that does not share state with other
threads or processes."""
return partial(traverse, processing_table=json_fail_tolerant_AST_transformation_table.copy())
def get_transformer() -> TransformationFunc:
"""Returns a thread/process-exclusive transformation function."""
THREAD_LOCALS = access_thread_locals()
......@@ -261,6 +274,7 @@ class json_fail_tolerantCompiler(Compiler):
# return node
def get_compiler() -> json_fail_tolerantCompiler:
"""Returns a thread/process-exclusive json_fail_tolerantCompiler-singleton."""
THREAD_LOCALS = access_thread_locals()
......@@ -288,7 +302,10 @@ def compile_src(source):
if __name__ == "__main__":
# recompile grammar if needed
grammar_path = os.path.abspath(__file__).replace('Parser.py', '.ebnf')
if __file__.endswith('Parser.py'):
grammar_path = os.path.abspath(__file__).replace('Parser.py', '.ebnf')
else:
grammar_path = os.path.splitext(__file__)[0] + '.ebnf'
parser_update = False
def notify():
......@@ -296,7 +313,7 @@ if __name__ == "__main__":
parser_update = True
print('recompiling ' + grammar_path)
if os.path.exists(grammar_path):
if os.path.exists(grammar_path) and os.path.isfile(grammar_path):
if not recompile_grammar(grammar_path, force=False, notify=notify):
error_file = os.path.basename(__file__).replace('Parser.py', '_ebnf_ERRORS.txt')
with open(error_file, encoding="utf-8") as f:
......@@ -304,26 +321,43 @@ if __name__ == "__main__":
sys.exit(1)
elif parser_update:
print(os.path.basename(__file__) + ' has changed. '
'Please run again in order to apply updated compiler')
'Please run again in order to apply updated compiler')
sys.exit(0)
else:
print('Could not check whether grammar requires recompiling, '
'because grammar was not found at: ' + grammar_path)
if len(sys.argv) > 1:
# compile file
file_name, log_dir = sys.argv[1], ''
if file_name in ['-d', '--debug'] and len(sys.argv) > 2:
file_name, log_dir = sys.argv[2], 'LOGS'
start_logging(log_dir)
result, errors, _ = compile_src(file_name)
if errors:
cwd = os.getcwd()
rel_path = file_name[len(cwd):] if file_name.startswith(cwd) else file_name
for error in errors:
print(rel_path + ':' + str(error))
sys.exit(1)
else:
print(result.as_xml() if isinstance(result, Node) else result)
from argparse import ArgumentParser
parser = ArgumentParser(description="Parses a json_fail_tolerant-file and shows its syntax-tree.")
parser.add_argument('files', nargs=1)
parser.add_argument('-d', '--debug', action='store_const', const='debug')
parser.add_argument('-x', '--xml', action='store_const', const='xml')
args = parser.parse_args()
file_name, log_dir = args.files[0], ''
if not os.path.exists(file_name):
print('File "%s" not found!' % file_name)
sys.exit(1)
if not os.path.isfile(file_name):
print('"%s" is not a file!' % file_name)
sys.exit(1)
if args.debug is not None:
log_dir = 'LOGS'
set_config_value('history_tracking', True)
set_config_value('resume_notices', True)
set_config_value('log_syntax_trees', set(['cst', 'ast'])) # don't use a set literal, here
start_logging(log_dir)
result, errors, _ = compile_src(file_name)
if errors:
cwd = os.getcwd()
rel_path = file_name[len(cwd):] if file_name.startswith(cwd) else file_name
for error in errors:
print(rel_path + ':' + str(error))
sys.exit(1)
else:
print("Usage: json_fail_tolerantParser.py [FILENAME]")
print(result.serialize(how='default' if args.xml is None else 'xml')
if isinstance(result, Node) else result)
......@@ -410,9 +410,9 @@ class TestBoundaryCases:
assert False, "Grammar objects should be able to cope with unconnected parsers!"
try:
nonexistant = grammar['nonexistant']
assert False, "Grammar object shoul raise a KeyError if subscripted by " \
assert False, "Grammar object should raise an AttributeError if subscripted by " \
"a non-existant parser name!"
except KeyError:
except AttributeError:
pass
......@@ -986,6 +986,13 @@ VARIABLE ::= /[A-Za-z]/, ~;
class TestAlternativeEBNFSyntax:
def setup(self):
self.save = get_config_value('syntax_variant')
set_config_value('syntax_variant', 'heuristic')
def teardown(self):
set_config_value('syntax_variant', self.save)
def test_alt_syntax(self):
code, errors, ast = compile_ebnf(ArithmeticEBNF, preserve_AST=True)
assert not ast.error_flag, str(ast.errors)
......@@ -997,6 +1004,13 @@ class TestAlternativeEBNFSyntax: