Commit 3a8fefd2 authored by di68kap's avatar di68kap
Browse files

- added ignorecase flag; Example BibTeX added

parent 56211924
...@@ -356,6 +356,9 @@ class EBNFCompiler(Compiler): ...@@ -356,6 +356,9 @@ class EBNFCompiler(Compiler):
directives: A dictionary of all directives and their default directives: A dictionary of all directives and their default
values. values.
re_flags: A set of regular expression flags to be added to all
regular expressions found in the current parsing process
""" """
COMMENT_KEYWORD = "COMMENT__" COMMENT_KEYWORD = "COMMENT__"
WHITESPACE_KEYWORD = "WSP__" WHITESPACE_KEYWORD = "WSP__"
...@@ -379,6 +382,7 @@ class EBNFCompiler(Compiler): ...@@ -379,6 +382,7 @@ class EBNFCompiler(Compiler):
def _reset(self): def _reset(self):
super(EBNFCompiler, self)._reset() super(EBNFCompiler, self)._reset()
self._result = '' # type: str self._result = '' # type: str
self.re_flags = set() # type: Set[str]
self.rules = OrderedDict() # type: OrderedDict[str, List[Node]] self.rules = OrderedDict() # type: OrderedDict[str, List[Node]]
self.current_symbols = [] # type: List[Node] self.current_symbols = [] # type: List[Node]
self.symbols = {} # type: Dict[str, Node] self.symbols = {} # type: Dict[str, Node]
...@@ -392,6 +396,7 @@ class EBNFCompiler(Compiler): ...@@ -392,6 +396,7 @@ class EBNFCompiler(Compiler):
'literalws': ['right'], 'literalws': ['right'],
'tokens': set(), # alt. 'preprocessor_tokens' 'tokens': set(), # alt. 'preprocessor_tokens'
'filter': dict(), # alt. 'filter' 'filter': dict(), # alt. 'filter'
'ignorecase': False,
'testing': False} 'testing': False}
@property @property
...@@ -624,14 +629,14 @@ class EBNFCompiler(Compiler): ...@@ -624,14 +629,14 @@ class EBNFCompiler(Compiler):
return rule, defn return rule, defn
@staticmethod def _check_rx(self, node: Node, rx: str) -> str:
def _check_rx(node: Node, rx: str) -> str:
""" """
Checks whether the string `rx` represents a valid regular Checks whether the string `rx` represents a valid regular
expression. Makes sure that multiline regular expressions are expression. Makes sure that multiline regular expressions are
prepended by the multiline-flag. Returns the regular expression string. prepended by the multiline-flag. Returns the regular expression string.
""" """
rx = rx if rx.find('\n') < 0 or rx[0:4] == '(?x)' else '(?x)' + rx flags = self.re_flags | {'x'} if rx.find('\n') >= 0 else self.re_flags
rx = "(?%s)%s" % ("".join(flags), rx)
try: try:
re.compile(rx) re.compile(rx)
except Exception as re_error: except Exception as re_error:
...@@ -668,6 +673,12 @@ class EBNFCompiler(Compiler): ...@@ -668,6 +673,12 @@ class EBNFCompiler(Compiler):
"/%s/ does not." % value) "/%s/ does not." % value)
self.directives[key] = value self.directives[key] = value
elif key == 'ignorecase':
value = str(node.children[1]).lower() not in {"off", "false", "no"}
self.directives['ignorecase'] == value
if value:
self.re_flags.add('i')
elif key == 'testing': elif key == 'testing':
value = str(node.children[1]) value = str(node.children[1])
self.directives['testing'] = value.lower() not in {"off", "false", "no"} self.directives['testing'] = value.lower() not in {"off", "false", "no"}
......
# BibTeX-Grammar
@ testing = True
@ whitespace = /\s*/
@ ignorecase = True
@ comment = /%.*(?:\n|$)/
bibliography = { preamble | comment | entry }
preamble = "@Preamble{" /"/ PREAMBLE /"/~ §"}"
comment = "@Comment{" COMMENT §"}"
entry = /@/ entry_type "{" KEY { "," NAME §"=" field_content } §"}"
field_content = /\{/ content §"}" | PLAIN_CONTENT
content = { /(?:\\.|[^\\{}])*/ ( /\{/ content /\}/ ) }
PREAMBLE = /[^"]*/
COMMENT = /[^}]*/
KEY = /[^,}]*/~
NAME = /\w+/~
PLAIN_CONTENT = /[^,}]*/
\ No newline at end of file
#!/usr/bin/python3
"""tst_BibTeX_grammar.py - runs the unit tests for the BibTeX grammar
Author: Eckhart Arnold <arnold@badw.de>
Copyright 2017 Bavarian Academy of Sciences and Humanities
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import sys
sys.path.extend(['../../', '../', './'])
import DHParser.dsl
from DHParser import testing
from DHParser import toolkit
if not DHParser.dsl.recompile_grammar('BibTeX.ebnf', force=False): # recompiles Grammar only if it has changed
print('\nErrors while recompiling "BibTeX.ebnf":\n--------------------------------------\n\n')
with open('BibTeX_ebnf_ERRORS.txt') as f:
print(f.read())
sys.exit(1)
from BibTeXCompiler import get_grammar, get_transformer
with toolkit.logging(True):
error_report = testing.grammar_suite('grammar_tests', get_grammar,
get_transformer, report=True, verbose=True)
if error_report:
print('\n')
print(error_report)
sys.exit(1)
else:
print('\nSUCCESS! All tests passed :-)')
...@@ -56,7 +56,7 @@ with toolkit.logging(False): ...@@ -56,7 +56,7 @@ with toolkit.logging(False):
pr.enable() pr.enable()
for file in files: for file in files:
if file.lower().endswith('.tex') and file.lower().find('error') < 0: if file.lower().endswith('.tex') and file.lower().find('error') < 0:
with open(os.path.join('testdata', file), 'r') as f: with open(os.path.join('testdata', file), 'r', encoding="utf-8") as f:
doc = f.read() doc = f.read()
print('\n\nParsing document: "%s"\n' % file) print('\n\nParsing document: "%s"\n' % file)
result = parser(doc) result = parser(doc)
......
...@@ -150,13 +150,43 @@ class TestRegex: ...@@ -150,13 +150,43 @@ class TestRegex:
result, messages, syntax_tree = compile_source(mlregex, None, get_ebnf_grammar(), result, messages, syntax_tree = compile_source(mlregex, None, get_ebnf_grammar(),
get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest')) get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
assert result assert result
assert not messages assert not messages, str(messages)
parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')() parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
node, rest = parser.regex('abc+def') node, rest = parser.regex('abc+def')
assert rest == '' assert rest == ''
assert node.parser.name == "regex" assert node.parser.name == "regex"
assert str(node) == 'abc+def' assert str(node) == 'abc+def'
def text_ignore_case(self):
mlregex = r"""
@ ignorecase = True
regex = /alpha/
"""
result, messages, syntax_tree = compile_source(mlregex, None, get_ebnf_grammar(),
get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
assert result
assert not messages
parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
node, rest = parser.regex('Alpha')
assert node
assert not node.error_flag
assert rest == ''
assert node.parser.name == "regex"
assert str(node) == 'Alpha'
mlregex = r"""
@ ignorecase = False
regex = /alpha/
"""
result, messages, syntax_tree = compile_source(mlregex, None, get_ebnf_grammar(),
get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
assert result
assert not messages
parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
node, rest = parser.regex('Alpha')
assert node.error_flag
def test_token(self): def test_token(self):
tokenlang = r""" tokenlang = r"""
@whitespace = linefeed @whitespace = linefeed
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment