Commit 3a8fefd2 authored by di68kap's avatar di68kap
Browse files

- added ignorecase flag; Example BibTeX added

parent 56211924
......@@ -356,6 +356,9 @@ class EBNFCompiler(Compiler):
directives: A dictionary of all directives and their default
values.
re_flags: A set of regular expression flags to be added to all
regular expressions found in the current parsing process
"""
COMMENT_KEYWORD = "COMMENT__"
WHITESPACE_KEYWORD = "WSP__"
......@@ -379,6 +382,7 @@ class EBNFCompiler(Compiler):
def _reset(self):
super(EBNFCompiler, self)._reset()
self._result = '' # type: str
self.re_flags = set() # type: Set[str]
self.rules = OrderedDict() # type: OrderedDict[str, List[Node]]
self.current_symbols = [] # type: List[Node]
self.symbols = {} # type: Dict[str, Node]
......@@ -392,6 +396,7 @@ class EBNFCompiler(Compiler):
'literalws': ['right'],
'tokens': set(), # alt. 'preprocessor_tokens'
'filter': dict(), # alt. 'filter'
'ignorecase': False,
'testing': False}
@property
......@@ -624,14 +629,14 @@ class EBNFCompiler(Compiler):
return rule, defn
@staticmethod
def _check_rx(node: Node, rx: str) -> str:
def _check_rx(self, node: Node, rx: str) -> str:
"""
Checks whether the string `rx` represents a valid regular
expression. Makes sure that multiline regular expressions are
prepended by the multiline-flag. Returns the regular expression string.
"""
rx = rx if rx.find('\n') < 0 or rx[0:4] == '(?x)' else '(?x)' + rx
flags = self.re_flags | {'x'} if rx.find('\n') >= 0 else self.re_flags
rx = "(?%s)%s" % ("".join(flags), rx)
try:
re.compile(rx)
except Exception as re_error:
......@@ -668,6 +673,12 @@ class EBNFCompiler(Compiler):
"/%s/ does not." % value)
self.directives[key] = value
elif key == 'ignorecase':
value = str(node.children[1]).lower() not in {"off", "false", "no"}
self.directives['ignorecase'] == value
if value:
self.re_flags.add('i')
elif key == 'testing':
value = str(node.children[1])
self.directives['testing'] = value.lower() not in {"off", "false", "no"}
......
# BibTeX-Grammar
@ testing = True
@ whitespace = /\s*/
@ ignorecase = True
@ comment = /%.*(?:\n|$)/
bibliography = { preamble | comment | entry }
preamble = "@Preamble{" /"/ PREAMBLE /"/~ §"}"
comment = "@Comment{" COMMENT §"}"
entry = /@/ entry_type "{" KEY { "," NAME §"=" field_content } §"}"
field_content = /\{/ content §"}" | PLAIN_CONTENT
content = { /(?:\\.|[^\\{}])*/ ( /\{/ content /\}/ ) }
PREAMBLE = /[^"]*/
COMMENT = /[^}]*/
KEY = /[^,}]*/~
NAME = /\w+/~
PLAIN_CONTENT = /[^,}]*/
\ No newline at end of file
#!/usr/bin/python3
"""tst_BibTeX_grammar.py - runs the unit tests for the BibTeX grammar
Author: Eckhart Arnold <arnold@badw.de>
Copyright 2017 Bavarian Academy of Sciences and Humanities
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import sys
sys.path.extend(['../../', '../', './'])
import DHParser.dsl
from DHParser import testing
from DHParser import toolkit
if not DHParser.dsl.recompile_grammar('BibTeX.ebnf', force=False): # recompiles Grammar only if it has changed
print('\nErrors while recompiling "BibTeX.ebnf":\n--------------------------------------\n\n')
with open('BibTeX_ebnf_ERRORS.txt') as f:
print(f.read())
sys.exit(1)
from BibTeXCompiler import get_grammar, get_transformer
with toolkit.logging(True):
error_report = testing.grammar_suite('grammar_tests', get_grammar,
get_transformer, report=True, verbose=True)
if error_report:
print('\n')
print(error_report)
sys.exit(1)
else:
print('\nSUCCESS! All tests passed :-)')
......@@ -56,7 +56,7 @@ with toolkit.logging(False):
pr.enable()
for file in files:
if file.lower().endswith('.tex') and file.lower().find('error') < 0:
with open(os.path.join('testdata', file), 'r') as f:
with open(os.path.join('testdata', file), 'r', encoding="utf-8") as f:
doc = f.read()
print('\n\nParsing document: "%s"\n' % file)
result = parser(doc)
......
......@@ -150,13 +150,43 @@ class TestRegex:
result, messages, syntax_tree = compile_source(mlregex, None, get_ebnf_grammar(),
get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
assert result
assert not messages
assert not messages, str(messages)
parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
node, rest = parser.regex('abc+def')
assert rest == ''
assert node.parser.name == "regex"
assert str(node) == 'abc+def'
def text_ignore_case(self):
mlregex = r"""
@ ignorecase = True
regex = /alpha/
"""
result, messages, syntax_tree = compile_source(mlregex, None, get_ebnf_grammar(),
get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
assert result
assert not messages
parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
node, rest = parser.regex('Alpha')
assert node
assert not node.error_flag
assert rest == ''
assert node.parser.name == "regex"
assert str(node) == 'Alpha'
mlregex = r"""
@ ignorecase = False
regex = /alpha/
"""
result, messages, syntax_tree = compile_source(mlregex, None, get_ebnf_grammar(),
get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
assert result
assert not messages
parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
node, rest = parser.regex('Alpha')
assert node.error_flag
def test_token(self):
tokenlang = r"""
@whitespace = linefeed
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment