16.12.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit 781cc1a0 authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

some small code cleanups

parent df506660
......@@ -585,51 +585,6 @@ class ScannerToken(Parser):
return None, text
# class RegExp(Parser):
# def __init__(self, regexp, orig_re = '', name=None):
# super(RegExp, self).__init__(name)
# # self.name = name
# self.regexp = re.compile(regexp) if isinstance(regexp, str) else regexp
# self.orig_re = orig_re
#
# def __deepcopy__(self, memo):
# # this method is obsolete with the new `regex` module!
# try:
# regexp = copy.deepcopy(self.regexp)
# except TypeError:
# regexp = self.regexp.pattern
# duplicate = RegExp(self.name, regexp, self.orig_re)
# duplicate.name = self.name # this ist needed!!!!
# duplicate.regexp = self.regexp
# duplicate.orig_re = self.orig_re
# duplicate.headquarter = self.headquarter
# duplicate.visited = copy.deepcopy(self.visited, memo)
# duplicate.recursion_counter = copy.deepcopy(self.recursion_counter,
# memo)
# return duplicate
#
# def __call__(self, text):
# match = text[0:1] != BEGIN_SCANNER_TOKEN and self.regexp.match(text) # ESC starts a scanner token.
# if match:
# end = match.end()
# groups = set(match.groups())
# if len(groups) >= 1:
# split = sorted([i for i in reduce(lambda s, r: s | set(r),
# match.regs, set()) if i >= 0])
# parts = (text[i:j] for i, j in zip(split[:-1], split[1:]))
# result = tuple(Node(None if part in groups else RE_WS, part)
# for part in parts)
# if all(r.parser == RE_WS for r in result):
# return Node(RE_WS, text[:end]), text[end:]
# return Node(self, result), text[end:]
# return Node(self, match.group()), text[end:]
# return None, text
#
# def __str__(self):
# pattern = self.orig_re or self.regexp.pattern # for readability of error messages !
# return Parser.__str__(self) + "/" + pattern + "/"
class RegExp(Parser):
def __init__(self, regexp, name=None):
super(RegExp, self).__init__(name)
......@@ -662,7 +617,9 @@ class RegExp(Parser):
class RE(Parser):
def __init__(self, regexp, wL=None, wR=None, name=None):
"""Regular Expressions with optional leading or trailing whitespace.
"""
def __init__(self, regexp, wL='', wR='', name=None):
super(RE, self).__init__(name)
self.wL = RegExp(wL, WHITESPACE_KEYWORD) if wL else ''
self.wR = RegExp(wR, WHITESPACE_KEYWORD) if wR else ''
......@@ -708,14 +665,6 @@ def mixin_comment(whitespace, comment):
wspc = '(?:' + whitespace + '(?:' + comment + whitespace + ')*)'
return wspc
#
# def RE(regexp, wL='', wR='', name=None):
# rA = '('
# rB = '\n)' if regexp.find('(?x)') >= 0 else ')' # otherwise the closing bracket might erroneously
# # be append to the end of a line comment!
# return RegExp(wL + rA + regexp + rB + wR, regexp,
# name or TOKEN_KEYWORD)
def Token(token, wL='', wR='', name=None):
return RE(escape_re(token), wL, wR, name or TOKEN_KEYWORD)
......@@ -754,12 +703,6 @@ class NaryOperator(Parser):
for parser in self.parsers:
parser.apply(func)
# def __str__(self):
# return Parser.__str__(self) + \
# ("" if self.name else str([str(p) for p in self.parsers]))
# # return "(" + ",\n".join(["\n ".join(str(parser).split("\n"))
# # for parser in self.parsers]) + ")"
class Optional(UnaryOperator):
def __init__(self, parser, name=None):
......@@ -1359,10 +1302,6 @@ class EBNFGrammar(ParserHeadquarter):
root__ = syntax
def TTTest(node):
# assert not (str(node.parser).startswith("RE") and node.children[0].result == '"-&"'), node.as_sexpr()
return node
EBNFTransTable = {
# AST Transformations for EBNF-grammar
"syntax":
......@@ -1385,7 +1324,7 @@ EBNFTransTable = {
(TOKEN_KEYWORD, WHITESPACE_KEYWORD):
[remove_expendables, reduce_single_child],
"":
[TTTest, remove_expendables, replace_by_single_child]
[remove_expendables, replace_by_single_child]
}
......@@ -2001,11 +1940,6 @@ def test(file_name):
compiler = EBNFCompiler(compiler_name, grammar)
result, errors, syntax_tree = full_compilation(grammar,
EBNFGrammar(), EBNFTransTable, compiler)
# print(syntax_tree.as_xml())
# print(result)
# print(syntax_tree.as_sexpr(grammar))
# print(compiler.gen_AST_Skeleton())
# print(compiler.gen_Compiler_Skeleton())
if errors:
print(errors)
sys.exit(1)
......@@ -2015,13 +1949,13 @@ def test(file_name):
return result
# Changes in the EBNF source that are not reflected in this file usually are
# a source of sometimes obscure errors! Therefore, we will check this.
if (os.path.exists('examples/EBNF/EBNF.ebnf')
and has_source_changed('examples/EBNF/EBNF.ebnf', EBNFGrammar)):
# assert False, "WARNING: Grammar source has changed. The parser may not " \
# "represent the actual grammar any more!!!"
pass
# # Changes in the EBNF source that are not reflected in this file could be
# # a source of sometimes obscure errors! Therefore, we will check this.
# if (os.path.exists('examples/EBNF/EBNF.ebnf')
# and has_source_changed('examples/EBNF/EBNF.ebnf', EBNFGrammar)):
# assert False, "WARNING: Grammar source has changed. The parser may not " \
# "represent the actual grammar any more!!!"
# pass
if __name__ == "__main__":
print(sys.argv)
......
<Artikel>
<LemmaPosition>
<token__>
LEMMA
</token__>
<Lemma>
<_tll>
*
</_tll>
<WORT_KLEIN>
facitergula
</WORT_KLEIN>
</Lemma>
<LemmaVarianten>
<LVariante>
<RegExp>
fasc-itergula
</RegExp>
</LVariante>
<LVariante>
<RegExp>
fac-iet-ergula
</RegExp>
</LVariante>
<LVariante>
<RegExp>
fac-ist-ergula
</RegExp>
</LVariante>
<LVariante>
<RegExp>
fa-rcu-tergula
</RegExp>
</LVariante>
<LVZusatz>
<RegExp>
sim.
</RegExp>
</LVZusatz>
</LemmaVarianten>
<GrammatikPosition>
<token__>
GRAMMATIK
</token__>
<_wortart>
<token__>
nomen
</token__>
</_wortart>
<token__>
;
</token__>
<Flexionen>
<Flexion>
<RegExp>
-ar
</RegExp>
</Flexion>
</Flexionen>
<_genus>
<token__>
f.
</token__>
</_genus>
<ZeroOrMore>
<GrammatikVarianten>
<token__>
;
</token__>
<GVariante>
<Flexionen>
<Flexion>
<RegExp>
-us
</RegExp>
</Flexion>
<Sequence>
<token__>
,
</token__>
<Flexion>
<RegExp>
-i
</RegExp>
</Flexion>
</Sequence>
</Flexionen>
<_genus>
<token__>
m.
</token__>
</_genus>
<token__>
:
</token__>
<Beleg>
<RegExp>
>>beleg_id_1
</RegExp>
</Beleg>
</GVariante>
</GrammatikVarianten>
<GrammatikVarianten>
<token__>
;
</token__>
<GVariante>
<Flexionen>
<Flexion>
<RegExp>
-um
</RegExp>
</Flexion>
<Sequence>
<token__>
,
</token__>
<Flexion>
<RegExp>
-i
</RegExp>
</Flexion>
</Sequence>
</Flexionen>
<_genus>
<token__>
n.
</token__>
</_genus>
<token__>
:
</token__>
<Beleg>
<RegExp>
>>beleg_id_2
</RegExp>
</Beleg>
</GVariante>
</GrammatikVarianten>
</ZeroOrMore>
</GrammatikPosition>
</LemmaPosition>
<ArtikelKopf>
<token__>
SCHREIBWEISE
</token__>
<SWTyp>
<token__>
script.
</token__>
</SWTyp>
<token__>
:
</token__>
<SWVariante>
<Schreibweise>
<token__>
vizreg-
</token__>
</Schreibweise>
<token__>
:
</token__>
<Beleg>
<RegExp>
>>beleg_id_3
</RegExp>
</Beleg>
</SWVariante>
<ZeroOrMore>
<Sequence>
<token__>
,
</token__>
<SWVariante>
<Schreibweise>
<token__>
festregel(a)
</token__>
</Schreibweise>
<token__>
:
</token__>
<Beleg>
<RegExp>
>>beleg_id_4
</RegExp>
</Beleg>
</SWVariante>
</Sequence>
<Sequence>
<token__>
,
</token__>
<SWVariante>
<Schreibweise>
<token__>
fezdregl(a)
</token__>
</Schreibweise>
<token__>
:
</token__>
<Beleg>
<RegExp>
>>beleg_id5
</RegExp>
</Beleg>
</SWVariante>
</Sequence>
</ZeroOrMore>
</ArtikelKopf>
<BedeutungsPosition>
<Sequence>
<token__>
BEDEUTUNG
</token__>
<Bedeutung>
<Interpretamente>
<LateinischeBedeutung>
<token__>
LAT
</token__>
<RegExp>
pannus, faciale, sudarium
</RegExp>
</LateinischeBedeutung>
<DeutscheBedeutung>
<token__>
DEU
</token__>
<RegExp>
Gesichts-, Schweißtuch [usu liturg.; de re v. p. 32, 63]
</RegExp>
</DeutscheBedeutung>
<Belege>
<token__>
BELEGE
</token__>
<ZeroOrMore>
<Sequence>
<token__>
*
</token__>
<EinBeleg>
<RegExp>
Catal. thes. Germ. 28,11 (post 851) -um III.
</RegExp>
</EinBeleg>
</Sequence>
<Sequence>
<token__>
*
</token__>
<EinBeleg>
<OneOrMore>
<RegExp>
Form. Sangall. 39 p. 421,16 "munuscula ... direximus, hoc est palliolum ... ,
</RegExp>
<RegExp>
-as duas."
</RegExp>
</OneOrMore>
</EinBeleg>
</Sequence>
<Sequence>
<token__>
*
</token__>
<EinBeleg>
<RegExp>
Catal. thes. Germ. 18,7 "-eterculi viginti quatuor".
</RegExp>
</EinBeleg>
</Sequence>
<Sequence>
<token__>
*
</token__>
<EinBeleg>
<OneOrMore>
<RegExp>
Libri confrat. I app. A 6 p. 137,30 "pulpitum ... -a cocco imaginata
</RegExp>
<RegExp>
circumdari iussit pontifex."
</RegExp>
</OneOrMore>
</EinBeleg>
</Sequence>
<Sequence>
<token__>
*
</token__>
<EinBeleg>
<OneOrMore>
<RegExp>
Catal. thes. Germ. 76,15 -rulae II. 40,5 VI vizregule. 129a,5 -sterculas
</RegExp>
<RegExp>
II. 24,8 -itella X. 114,8 VIII fezdregle. 6,24 fasciutercule
</RegExp>
<RegExp>
VII. 92,6 fascercule tres. 21,20 IIII festregele.
</RegExp>
</OneOrMore>
<Zusatz>
<token__>
ZUSATZ
</token__>
<RegExp>
saepe.
</RegExp>
</Zusatz>
</EinBeleg>
</Sequence>
</ZeroOrMore>
</Belege>
</Interpretamente>
</Bedeutung>
</Sequence>
<Sequence>
<token__>
BEDEUTUNG
</token__>
<Bedeutung>
<Interpretamente>
<LateinischeBedeutung>
<token__>
LAT
</token__>
<RegExp>
capital, rica
</RegExp>
</LateinischeBedeutung>
<DeutscheBedeutung>
<token__>
DEU
</token__>
<RegExp>
Kopftuch
</RegExp>
</DeutscheBedeutung>
<Belege>
<token__>
BELEGE
</token__>
<ZeroOrMore>
<Sequence>
<token__>
*
</token__>
<EinBeleg>
<RegExp>
Transl. Libor. I 32 raptis feminarum -is (fa[s]citergiis var. l.).
</RegExp>
</EinBeleg>
</Sequence>
<Sequence>
<token__>
*
</token__>
<EinBeleg>
<OneOrMore>
<RegExp>
II 20 nuditatem membrorum illius (puellae) tegere festinarunt fideles
</RegExp>
<RegExp>
clerici et laici inprimis cum eorum -cula, dein vestibus solitis.
</RegExp>
</OneOrMore>
</EinBeleg>
</Sequence>
</ZeroOrMore>
</Belege>
</Interpretamente>
</Bedeutung>
</Sequence>
</BedeutungsPosition>
<Autorinfo>
<token__>
AUTORIN
</token__>
<Name>
<WORT>
Weber
</WORT>
</Name>
</Autorinfo>
</Artikel>
\ No newline at end of file
def PopRetrieveScanner(text):
return text
### DON'T EDIT OR REMOVE THIS LINE ###
class PopRetrieveGrammar(ParserHeadquarter):
r"""Parser for a PopRetrieve source file, with this grammar:
document = { text | codeblock }
codeblock = delimiter { text | (!:delimiter delimiter_sign) } ::delimiter
delimiter = delimiter_sign
delimiter_sign = /`+/
text = /[^`]+/
"""
source_hash__ = "bb49baf9598a85673181fafce17e2503"
parser_initialization__ = "upon instatiation"
wsp__ = mixin_comment(whitespace=r'\s*', comment=r'')
text = RE('[^`]+')
delimiter_sign = RE('`+')
delimiter = Capture(delimiter_sign, "delimiter")
codeblock = Sequence(delimiter, ZeroOrMore(Alternative(text, Sequence(NegativeLookahead(Retrieve(delimiter)), delimiter_sign))), Pop(delimiter))
document = ZeroOrMore(Alternative(text, codeblock))
root__ = document
### DON'T EDIT OR REMOVE THIS LINE ###
PopRetrieveTransTable = {
# AST Transformations for the PopRetrieve-grammar
"document": no_transformation,
"codeblock": no_transformation,
"delimiter": no_transformation,
"delimiter_sign": no_transformation,
"text": no_transformation,
"": no_transformation
}
### DON'T EDIT OR REMOVE THIS LINE ###
class PopRetrieveCompiler(CompilerBase):
"""Compiler for the abstract-syntax-tree of a PopRetrieve source file.
"""
def __init__(self, grammar_name="PopRetrieve"):
super(PopRetrieveCompiler, self).__init__()
assert re.match('\w+\Z', grammar_name)
def document(self, node):
return node
def codeblock(self, node):
pass
def delimiter(self, node):
pass
def delimiter_sign(self, node):
pass
def text(self, node):
pass
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment