Commit 1937f410 authored by di68kap's avatar di68kap
Browse files

- added a couple of docstrings and some vertical whitespace fashism for better readability

parent 8caf886d
......@@ -495,10 +495,10 @@ class EBNFCompiler(Compiler):
# compile definitions and directives and collect definitions
for nd in node.children:
if nd.parser.name == "definition":
definitions.append(self._compile(nd))
definitions.append(self.compile(nd))
else:
assert nd.parser.name == "directive", nd.as_sxpr()
self._compile(nd)
self.compile(nd)
node.error_flag = node.error_flag or nd.error_flag
return self.assemble_parser(definitions, node)
......@@ -522,7 +522,7 @@ class EBNFCompiler(Compiler):
try:
self.current_symbols = [node]
self.rules[rule] = self.current_symbols
defn = self._compile(node.children[1])
defn = self.compile(node.children[1])
if rule in self.variables:
defn = 'Capture(%s)' % defn
self.variables.remove(rule)
......@@ -561,7 +561,7 @@ class EBNFCompiler(Compiler):
if len(node.children[1].result) != 1:
node.add_error('Directive "%s" must have one, but not %i values.' %
(key, len(node.children[1].result)))
value = self._compile(node.children[1]).pop()
value = self.compile(node.children[1]).pop()
if key == 'whitespace' and value in EBNFCompiler.WHITESPACE:
value = EBNFCompiler.WHITESPACE[value] # replace whitespace-name by regex
else:
......@@ -585,7 +585,7 @@ class EBNFCompiler(Compiler):
self.directives['testing'] = value.lower() not in {"off", "false", "no"}
elif key == 'literalws':
value = {item.lower() for item in self._compile(node.children[1])}
value = {item.lower() for item in self.compile(node.children[1])}
if (len(value - {'left', 'right', 'both', 'none'}) > 0
or ('none' in value and len(value) > 1)):
node.add_error('Directive "literalws" allows the values '
......@@ -596,10 +596,10 @@ class EBNFCompiler(Compiler):
self.directives[key] = list(ws)
elif key in {'tokens', 'scanner_tokens'}:
self.directives['tokens'] |= self._compile(node.children[1])
self.directives['tokens'] |= self.compile(node.children[1])
elif key.endswith('_filter'):
filter_set = self._compile(node.children[1])
filter_set = self.compile(node.children[1])
if not isinstance(filter_set, set) or len(filter_set) != 1:
node.add_error('Directive "%s" accepts exactly on symbol, not %s'
% (key, str(filter_set)))
......@@ -617,7 +617,7 @@ class EBNFCompiler(Compiler):
Compiles any non-terminal, where `parser_class` indicates the Parser class
name for the particular non-terminal.
"""
arguments = [self._compile(r) for r in node.children] + custom_args
arguments = [self.compile(r) for r in node.children] + custom_args
return parser_class + '(' + ', '.join(arguments) + ')'
......
......@@ -715,7 +715,8 @@ class Whitespace(RegExp):
class RE(Parser):
"""Regular Expressions with optional leading or trailing whitespace.
"""
Regular Expressions with optional leading or trailing whitespace.
The RE-parser parses pieces of text that match a given regular
expression. Other than the ``RegExp``-Parser it can also skip
......@@ -1289,6 +1290,25 @@ class Forward(Parser):
class Compiler:
"""
Class Compiler is the abstract base class for compilers. Compiler
objects are callable and take the root node of the abstract
syntax tree (AST) as agrument and return the compiled code in a
format chosen by the compiler itself.
Subclasses implementing a compiler must define `on_XXX()`-methods
for each node name that can occur in the AST where 'XXX' is the
node's name(for unnamed nodes it is the node's ptype without the
leading colon ':').
These compiler methods take the node on which they are run as
argument. Other than in the AST transformation, which runs depth-first,
compiler methods are called forward moving starting with the root
node, and they are responsible for compiling the child nodes
themselves. This should be done by invoking the `compile(node)`-
method which will pick the right `on_XXX`-method. It is not
recommended to call the `on_XXX`-methods directly.
"""
def __init__(self, grammar_name="", grammar_source=""):
self.dirty_flag = False
......@@ -1298,19 +1318,28 @@ class Compiler:
pass
def __call__(self, node: Node) -> Any:
"""Compiles the abstract syntax tree with the root ``node``.
It's called `compile_ast`` to avoid confusion with the
``_compile`` that is called from within the local node
compiler methods.
"""
Compiles the abstract syntax tree with the root node `node` and
returns the compiled code. It is up to subclasses implementing
the compiler to determine the format of the returned data.
(This very much depends on the kind and purpose of the
implemented compiler.)
"""
if self.dirty_flag:
self._reset()
else:
self.dirty_flag = True
return self._compile(node)
return self.compile(node)
def set_grammar_name(self, grammar_name, grammar_source):
def set_grammar_name(self, grammar_name="", grammar_source=""):
"""
Changes the grammar's name and the grammar's source.
The grammar name and the source text of the grammar are
metadata about the grammar that do not affect the compilation
process. Classes inheriting from `Compiler` can use this
information to name and annotate its output.
"""
assert grammar_name == "" or re.match('\w+\Z', grammar_name)
if not grammar_name and re.fullmatch(r'[\w/:\\]+', grammar_source):
grammar_name = os.path.splitext(os.path.basename(grammar_source))[0]
......@@ -1319,21 +1348,22 @@ class Compiler:
@staticmethod
def method_name(node_name: str) -> str:
"""Returns the method name for ``node_name``, e.g.
"""Returns the method name for `node_name`, e.g.
>>> Compiler.method_name('expression')
'on_expression'
"""
return 'on_' + node_name
def _compile(self, node: Node) -> Any:
"""Calls the compilation method for the given node and returns
the result of the compilation.
def compile(self, node: Node) -> Any:
"""
Calls the compilation method for the given node and returns the
result of the compilation.
The method's name is dreived from either the node's parser
name or, if the parser is anonymous, the node's parser's class
name by adding the prefix 'on_'.
Note that ``_compile`` does not call any compilation functions
Note that ``compile`` does not call any compilation functions
for the parsers of the sub nodes by itself. Rather, this should
be done within the compilation methods.
"""
......@@ -1355,7 +1385,8 @@ def compile_source(source: str,
parser: Grammar, # str -> Node (concrete syntax tree (CST))
transformer: TransformationFunc, # Node -> Node (abstract syntax tree (AST))
compiler: Compiler): # Node (AST) -> Any
"""Compiles a source in four stages:
"""
Compiles a source in four stages:
1. Scanning (if needed)
2. Parsing
3. AST-transformation
......
......@@ -5,6 +5,7 @@ Introduction to [DHParser](https://gitlab.lrz.de/badw-it/DHParser)
Motto: **Computers enjoy XML, humans don't.**
Why use domain specific languages in the humanities?
----------------------------------------------------
......@@ -51,7 +52,7 @@ a few drawbacks to this approach:
Editing and revising XML-encoded text is a pain. Just ask the
literary scientists who have to work with it.
- The XML encoding, especially TEI-XML, is often unintuitive. Only
experts understand it. Now, if you had the idea that you humanist
experts understand it. Now, if you had the idea that your humanist
friend, who is not into digital technologies, might help you with
proof-reading, you better think about it again.
- There is an awful lot of typing to do: All those lengthy opening
......@@ -187,7 +188,7 @@ for the full EBNF):
LEERZEILE = /\n[ \t]*(?=\n)/~
ENDE = !/./
Now, without going into too much detail here, let me just explain a few basics of
Without going into too much detail here, let me just explain a few basics of
this formal description: The slashes `/` enclose ordinary regular expressions.
Thus, `NZ` for ("Neue Zeile", German for: "new line") is defined as `/\n/~` which
is the newline-token `\n` in a regular expression, plus further horizontal
......
......@@ -11,15 +11,16 @@ LEMMA facitergul|a
GRAMMATIK
nomen; -ae f.
-us, -i m.: verweis_ziel_001
-um, -i n.: verweis_ziel_002
-us, -i m.: ->ziel_001
-um, -i n.: ->ziel_002
SCHREIBWEISE
script.:
vizreg-: verweis_ziel_003
festregel(a): verweis_ziel_004
fezdregl(a): verweis_ziel_005
vizreg-: ->ziel_003
festregel(a): ->ziel_004
fezdregl(a): ->ziel_005
BEDEUTUNG
......@@ -38,6 +39,7 @@ BELEGE
VII."; 92,6 "fascercule tres." 21,20 IIII "festregele."
ZUSATZ saepe.
BEDEUTUNG
LAT capital, rica
......
......@@ -96,7 +96,7 @@ Name = { NAME | NAMENS_ABKÜRZUNG }+
Beleg = Verweis
Verweis = ZielName
Verweis = "->" ZielName
VerweisZiel = "[" ZielName "]"
ZielName = BUCHSTABENFOLGE
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment