Commit cbb9c8aa authored by di68kap's avatar di68kap
Browse files

- name property added to Node class - now independendet from Parser.__str__()-Method

- Require()-Parser now yields "fatal_error" - otherwise it would be overlooked. (Parser-Error reporting in general needs to be reconsidered...!)
- testbed for MLW example optimized
parent 40a41e35
......@@ -68,6 +68,9 @@ import types
from functools import reduce, partial
__version__ = '0.5.1' + '_dev' + str(os.stat(__file__).st_mtime)
DEBUG = True
DEBUG_DUMP_AST = ""
......@@ -148,6 +151,10 @@ class Node:
return "".join([str(child) for child in self.result])
return str(self.result)
@property
def name(self):
return self.parser.name or self.parser.__class__.__name__
@property
def result(self):
return self._result
......@@ -212,6 +219,7 @@ class Node:
return head + '\n'.join([tab + dataF(s)
for s in str(self.result).split('\n')]) + tail
def as_sexpr(self, src=None):
"""Returns content as S-expression, i.e. in lisp-like form.
......@@ -221,7 +229,7 @@ class Node:
line and column.
"""
def opening(node):
s = '(' + str(node.parser) # (node.parser.name or node.parser.__class__.__name__)
s = '(' + node.name
# s += " '(pos %i)" % node.pos
if src:
s += " '(pos %i %i %i)" % (node.pos, *line_col(src, node.pos))
......@@ -241,7 +249,7 @@ class Node:
"""
def opening(node):
s = '<' + (node.parser.name or node.parser.__class__.__name__)
s = '<' + node.name
# s += ' pos="%i"' % node.pos
if src:
s += ' line="%i" col="%i"' % line_col(src, node.pos)
......@@ -252,8 +260,7 @@ class Node:
return s
def closing(node):
s = '</' + \
(node.parser.name or node.parser.__class__.__name__) + '>'
s = '</' + node.name + '>'
return s
return self.as_tree(' ', opening, closing)
......@@ -280,6 +287,29 @@ class Node:
offset += child.len_before_AST
return errors
def navigate(self, path):
"""Returns the first descendant element matched by `path`, e.g.
'd/s' returns 'l' from (d (s l)(e (r x1) (r x2))
'e/r' returns 'x2'
'e' returns (r x1)(r x2)
:param path: the path of the object, e.g. 'a/b/c'
:return: the object at the path, either a string or a Node or
`None`, if the path did not match.
"""
pl = path.strip('')
assert pl[0] != '/', 'Path must noch start with "/"!'
nd = self
for p in pl:
if isinstance(nd.result, str):
return p if (p == nd.result) and (p == pl[-1]) else None
for child in nd.result:
if str(child) == p:
nd = child
break
else:
return None
return child
def error_messages(text, errors):
"""Converts the list of `errors` collected from the root node of the
......@@ -292,6 +322,9 @@ def error_messages(text, errors):
for entry in errors)
# lambda compact_sexpr s : re.sub('\s(?=\))', '', re.sub('\s+', ' ', s)).strip()
##############################################################################
#
# Parser base classes
......@@ -329,14 +362,16 @@ def wrap_parser(parser_func):
if parser.headquarter.moving_forward: # and result[0] == None
parser.headquarter.moving_forward = False
st = "->".join((str(p) for p in parser.headquarter.call_stack))
if result[0]:
# print("HIT!", st, '\t"%s"' % str(result[0]).replace('\n', ' '))
pass
else:
# t = text[:20].replace('\n',' ')
# print("FAIL", st, '\t"%s"' % (t + ("..." if t else "")))
pass
global DEBUG
if DEBUG:
st = "->".join((str(p) for p in parser.headquarter.call_stack))
if result[0]:
print(st, '\t"%s"' % str(result[0]).replace('\n', ' '), "\tHIT")
pass
else:
t = text[:20].replace('\n',' ')
print(st, '\t"%s"' % (t + ("..." if t else "")), "\tfail")
pass
parser.headquarter.call_stack.pop()
if result[0] is not None:
......@@ -471,6 +506,8 @@ class ParserHeadquarter:
"""
assert self.unused, ("Parser has been used up. Please create a new "
"instance of the ParserHeadquarter class!")
if self.root__ is None:
raise NotImplementedError()
self.unused = False
parser = self.root__
result = ""
......@@ -499,8 +536,8 @@ class ParserHeadquarter:
return result if not stitches else Node(None, tuple(stitches))
ZOMBIE_PARSER = Parser() # zombie object to avoid distinction of cases
# for the Node.parser variable
ZOMBIE_PARSER = Parser(name="Zombie") # zombie object to avoid distinction of cases
# for the Node.parser variable
RE_WSPC = Parser(WHITESPACE_KEYWORD) # Dummy Parser for comments that were captured
# by an RE Parser via the `comment`-parameter
......@@ -587,7 +624,7 @@ class RegExp(Parser):
def __str__(self):
pattern = self.orig_re or self.regexp.pattern # for readability of error messages !
return Parser.__str__(self) # + "/" + pattern + "/"
return Parser.__str__(self) + "/" + pattern + "/"
def escape_re(s):
......@@ -768,6 +805,7 @@ class Required(FlowOperator):
# assert False, "*"+text[:i]+"*"
node.add_error('%s expected; "%s..." found!' %
(str(self.parser), text[:10]))
node.fatal_error = True
return node, text_
......@@ -1292,12 +1330,13 @@ Scanner = collections.namedtuple('Scanner',
'symbol instantiation_call cls_name cls')
def md5(txt):
def md5(*txt):
"""Returns the md5-checksum for `txt`. This can be used to test if
some piece of text, for example a grammar source file, has changed.
"""
md5_hash = hashlib.md5()
md5_hash.update(txt.encode('utf8'))
for t in txt:
md5_hash.update(t.encode('utf8'))
return md5_hash.hexdigest()
......@@ -1305,7 +1344,7 @@ class EBNFCompiler(CompilerBase):
"""Generates a Parser from an abstract syntax tree of a grammar specified
in EBNF-Notation.
"""
# RX_DIRECTIVE = re.compile('(?:#|@)\s*(?P<key>\w*)\s*=\s*(?P<value>.*)') # this can be removed, soon
# RX_DIRECTIVE = re.compile('(?:#|@)\s*(?P<key>\w*)\s*=\s*(?P<value>.*)') # old, can be removed!
RESERVED_SYMBOLS = {TOKEN_KEYWORD, WHITESPACE_KEYWORD}
KNOWN_DIRECTIVES = {'comment', 'whitespace', 'tokens', 'literalws'}
VOWELS = {'A', 'E', 'I', 'O', 'U'} # what about cases like 'hour', 'universe' etc. ?
......@@ -1400,7 +1439,7 @@ class EBNFCompiler(CompilerBase):
definitions.append(('parser_initialization__', '"upon instatiation"'))
if self.source_text:
definitions.append(('source_hash__',
'"%s"' % md5(self.source_text)))
'"%s"' % md5(self.source_text, __version__)))
declarations.append('')
declarations += [line for line in self.source_text.split('\n')]
while declarations[-1].strip() == '':
......@@ -1846,10 +1885,20 @@ def has_source_changed(grammar_source, grammar_class):
from the source from which the grammar class was generated
"""
grammar = load_if_file(grammar_source)
chksum = md5(grammar)
chksum = md5(grammar, __version__)
if isinstance(grammar_class, str):
grammar_class = load_compiler_suite(grammar_class)[1]
return chksum != grammar_class.source_hash__
# grammar_class = load_compiler_suite(grammar_class)[1]
with open(grammar_class, 'r', encoding='utf8') as f:
pycode = f.read()
m = re.search('class \w*\(ParserHeadquarter\)', pycode)
if m:
m = re.search(' source_hash__ *= *"([a-z0-9]*)"',
pycode[m.span()[1]:])
return not (m and m.groups() and m.groups()[-1] == chksum)
else:
return True
else:
return chksum != grammar_class.source_hash__
##############################################################################
......@@ -1882,8 +1931,8 @@ def test(file_name):
# a source of sometimes obscure errors! Therefore, we will check this.
if (os.path.exists('examples/EBNF/EBNF.ebnf')
and has_source_changed('examples/EBNF/EBNF.ebnf', EBNFGrammar)):
assert False, "WARNING: Grammar source has changed. The parser may not " \
"represent the actual grammar any more!!!"
# assert False, "WARNING: Grammar source has changed. The parser may not " \
# "represent the actual grammar any more!!!"
pass
if __name__ == "__main__":
......
......@@ -25,7 +25,7 @@ LVZusatz = "sim."
#### GRAMMATIK-POSITION ######################################################
GrammatikPosition = "GRAMMATIK" §_wortart §";" §Flexionen [_genus] [GrammatikVarianten] ["."]
GrammatikPosition = "GRAMMATIK" §_wortart §";" §Flexionen [_genus] {GrammatikVarianten} [";" | "."]
_wortart = "nomen" | "n." |
"verb" | "v." |
......@@ -33,7 +33,7 @@ _wortart = "nomen" | "n." |
"adjektiv" | "adj."
GrammatikVarianten = "(" §GVariante { ";" §GVariante } §")"
GrammatikVarianten = ";" §GVariante
GVariante = Flexionen [_genus] ":" Beleg
Flexionen = Flexion { "," §Flexion }
......@@ -60,7 +60,7 @@ VerweisZiel = ~/<\w+>/~
#### BEDEUTUNGS-POSITION #####################################################
BedeutungsPosition = < "BEDEUTUNG" Bedeutung >
BedeutungsPosition = { "BEDEUTUNG" Bedeutung }+
Bedeutung = Interpretamente | Bedeutungskategorie
Bedeutungskategorie = /(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+/~
......@@ -79,8 +79,10 @@ Name = WORT { WORT | /[A-ZÄÖÜÁÀ]\./ }
#### MISZELLANEEN ############################################################
WORT = /[A-ZÄÖÜ]*[a-zäöüÄÜÖß]+/~
WORT_KLEIN = /[a-z]+/~
WORT = /[A-ZÄÖÜ]?[a-zäöüß]+/~
WORT_GROSS = /[A-ZÄÖÜ][a-zäöüß]+/~
WORT_KLEIN = /[a-zäöüß]+/~
LAT_WORT = /[a-z]+/~
GROSSSCHRIFT = /[A-ZÄÖÜ]+/~
LEER = /\s*/
......
......@@ -22,8 +22,27 @@ limitations under the License.
import os
import sys
sys.path.append(os.path.abspath('../'))
from ParserCombinators import run_compiler
errors = run_compiler("fascitergula.mlw", os.path.join('..', 'MLW_compiler.py'), ".xml")
import ParserCombinators
from ParserCombinators import run_compiler, has_source_changed
MLW_ebnf = os.path.join('..', 'MLW.ebnf')
MLW_compiler = os.path.join('..', 'MLW_compiler.py')
# print(has_source_changed(MLW_ebnf, MLW_compiler))
ParserCombinators.DEBUG = False
if (not os.path.exists(MLW_compiler) or
has_source_changed(MLW_ebnf, MLW_compiler)):
print("recompiling parser")
errors = run_compiler(MLW_ebnf)
if errors:
print(errors)
sys.exit(1)
ParserCombinators.DEBUG = True
errors = run_compiler("fascitergula.mlw", MLW_compiler, ".xml")
if errors:
print(errors)
sys.exit(1)
......@@ -17,7 +17,7 @@ GRAMMATIK
SCHREIBWEISE
script:
script.:
vizreg-: >>beleg_id_3,
festregel(a): >>beleg_id_4,
fezdregl(a): >>beleg_id5
......
# EBNF-Syntax von MLW-Einträgen (Versuch!!!)
lemma = "LEMMA" wort ["," flexion] erklaerung
lemma = "LEMMA" WORT ["," flexion] erklaerung
flexion = endung { "," endung | " " genus "." }
endung = [MINUS] KLEIBUCHSTABEN
genus = KLEINBUCHSTABE
......
......@@ -26,6 +26,7 @@ from ParserCombinators import run_compiler, has_source_changed
if (not os.path.exists('PopRetrieve_compiler.py') or
has_source_changed('PopRetrieve.ebnf', 'PopRetrieve_compiler.py')):
print("recompiling parser")
errors = run_compiler("PopRetrieve.ebnf")
if errors:
print(errors)
......@@ -36,7 +37,6 @@ if errors:
print(errors)
sys.exit(1)
errors = run_compiler("PopRetrieveTest2.txt", 'PopRetrieve_compiler.py')
if errors:
print(errors)
......
import os
os.system("nosetests")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment