Commit f1df67a1 authored by eckhart's avatar eckhart
Browse files

- VS Code language server für MLW begonnen

parent e4994af3
......@@ -30,3 +30,10 @@ DHParser/cstringview.c
.vscode/
DHParser.egg-info
.noseids/
*.build
VERALTET
DHParser/stringview.c
*_ERRORS.txt
*_WARNINGS.txt
imperium.html
fascitergula.html
......@@ -54,7 +54,7 @@ SECTION_MARKER = """\n
\n"""
RX_SECTION_MARKER = re.compile(SECTION_MARKER.format(marker=r'.*?SECTION.*?'))
RX_WHITESPACE = re.compile('\s*')
RX_WHITESPACE = re.compile(r'\s*')
SYMBOLS_SECTION = "SYMBOLS SECTION - Can be edited. Changes will be preserved."
PREPROCESSOR_SECTION = "PREPROCESSOR SECTION - Can be edited. Changes will be preserved."
......@@ -97,8 +97,8 @@ def compile_src(source, log_dir=''):
compiler = get_compiler()
cname = compiler.__class__.__name__
log_file_name = os.path.basename(os.path.splitext(source)[0]) \\
if is_filename(source) < 0 else cname[:cname.find('.')] + '_out'
result = compile_source(source, get_preprocessor(),
if is_filename(source) < 0 else cname[:cname.find('.')] + '_out'
result = compile_source(source, get_preprocessor(),
get_grammar(),
get_transformer(), compiler)
return result
......@@ -320,7 +320,8 @@ def load_compiler_suite(compiler_suite: str) -> \
parser = compile_python_object(imports + parser_py, r'get_(?:\w+_)?grammar$')
ast = compile_python_object(imports + ast_py, r'get_(?:\w+_)?transformer$')
else:
# assume source is an ebnf grammar. Is there really any reasonable application case for this?
# Assume source is an ebnf grammar.
# Is there really any reasonable application case for this?
with logging(False):
compiler_py, messages, n = compile_source(source, None, get_ebnf_grammar(),
get_ebnf_transformer(), get_ebnf_compiler())
......@@ -397,7 +398,7 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml") -> It
need to be delimited section marker blocks.). `compile_on_disk()`
returns a list of error messages or an empty list if no errors
occurred.
Parameters:
source_file(str): The file name of the source text to be
compiled.
......@@ -432,7 +433,9 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml") -> It
if has_errors(messages):
return messages
elif cfactory == get_ebnf_compiler: # trans == get_ebnf_transformer or trans == EBNFTransformer: # either an EBNF- or no compiler suite given
elif cfactory == get_ebnf_compiler:
# trans == get_ebnf_transformer or trans == EBNFTransformer:
# either an EBNF- or no compiler suite given
ebnf_compiler = cast(EBNFCompiler, compiler1)
global SECTION_MARKER, RX_SECTION_MARKER, PREPROCESSOR_SECTION, PARSER_SECTION, \
AST_SECTION, COMPILER_SECTION, END_SECTIONS_MARKER, RX_WHITESPACE, \
......@@ -491,7 +494,8 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml") -> It
+ "\n# ".join(str(error).split('\n)')))
print(result)
finally:
if f: f.close()
if f:
f.close()
else:
f = None
......
......@@ -71,17 +71,20 @@ class EBNFGrammar(Grammar):
# EBNF-Grammar in EBNF
@ comment = /#.*(?:\n|$)/ # comments start with '#' and eat all chars up to and including '\n'
@ whitespace = /\s*/ # whitespace includes linefeed
@ literalws = right # trailing whitespace of literals will be ignored tacitly
@ comment = /#.*(?:\n|$)/ # comments start with '#' and
# eat all chars up to and including '\n'
@ whitespace = /\s*/ # whitespace includes linefeed
@ literalws = right # trailing whitespace of literals will be
# ignored tacitly
syntax = [~//] { definition | directive } §EOF
definition = symbol §"=" expression
directive = "@" §symbol "=" ( regexp | literal | list_ )
expression = term { "|" term }
term = { ["§"] factor }+ # "§" means all following factors mandatory
factor = [flowmarker] [retrieveop] symbol !"=" # negative lookahead to be sure it's not a definition
term = { ["§"] factor }+ # "§" means all following factors mandatory
factor = [flowmarker] [retrieveop] symbol !"=" # negative lookahead to be sure
# it's not a definition
| [flowmarker] literal
| [flowmarker] regexp
| [flowmarker] oneormore
......@@ -90,24 +93,27 @@ class EBNFGrammar(Grammar):
| repetition
| option
flowmarker = "!" | "&" # '!' negative lookahead, '&' positive lookahead
| "-!" | "-&" # '-' negative lookbehind, '-&' positive lookbehind
retrieveop = "::" | ":" # '::' pop, ':' retrieve
flowmarker = "!" | "&" # '!' negative lookahead, '&' positive lookahead
| "-!" | "-&" # '-' negative lookbehind, '-&' positive lookbehind
retrieveop = "::" | ":" # '::' pop, ':' retrieve
group = "(" §expression ")"
unordered = "<" §expression ">" # elements of expression in arbitrary order
unordered = "<" §expression ">" # elements of expression in arbitrary order
oneormore = "{" expression "}+"
repetition = "{" §expression "}"
option = "[" §expression "]"
symbol = /(?!\d)\w+/~ # e.g. expression, factor, parameter_list
literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while'
| /'(?:[^']|\\')*?'/~ # whitespace following literals will be ignored tacitly.
regexp = /~?\/(?:\\\/|[^\/])*?\/~?/~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
# '~' is a whitespace-marker, if present leading or trailing
# whitespace of a regular expression will be ignored tacitly.
list_ = /\w+/~ { "," /\w+/~ } # comma separated list of symbols, e.g. BEGIN_LIST, END_LIST,
# BEGIN_QUOTE, END_QUOTE ; see CommonMark/markdown.py for an exmaple
symbol = /(?!\d)\w+/~ # e.g. expression, factor, parameter_list
literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while'
| /'(?:[^']|\\')*?'/~ # whitespace following literals will be ignored
regexp = /~?\/(?:\\\/|[^\/])*?\/~?/~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
# '~' is a whitespace-marker, if present leading
# or trailing whitespace of a regular expression
# will be ignored tacitly.
list_ = /\w+/~ { "," /\w+/~ } # comma separated list of symbols,
# e.g. BEGIN_LIST, END_LIST,
# BEGIN_QUOTE, END_QUOTE
# see CommonMark/markdown.py for an exmaple
EOF = !/./
"""
expression = Forward()
......@@ -205,7 +211,8 @@ EBNF_AST_transformation_table = {
"expression":
[replace_by_single_child, flatten, remove_tokens('|')], # remove_infix_operator],
"term":
[replace_by_single_child, flatten], # supports both idioms: "{ factor }+" and "factor { factor }"
[replace_by_single_child, flatten], # supports both idioms:
# "{ factor }+" and "factor { factor }"
"factor, flowmarker, retrieveop":
replace_by_single_child,
"group":
......@@ -574,7 +581,7 @@ class EBNFCompiler(Compiler):
remove_connections(self.root_symbol)
for leftover in defined_symbols:
self.rules[leftover][0].add_error(
('Rule "%s" is not connected to parser root "%s" !') %
('Rule "%s" is not connected to parser root "%s" !') %
(leftover, self.root_symbol), Error.WARNING)
# set root_symbol parser and assemble python grammar definition
......@@ -882,7 +889,8 @@ class EBNFCompiler(Compiler):
self.symbols[symbol] = node # remember first use of symbol
if symbol in self.rules:
self.recursive.add(symbol)
if symbol in EBNFCompiler.RESERVED_SYMBOLS: # (EBNFCompiler.WHITESPACE_KEYWORD, EBNFCompiler.COMMENT_KEYWORD):
if symbol in EBNFCompiler.RESERVED_SYMBOLS:
# (EBNFCompiler.WHITESPACE_KEYWORD, EBNFCompiler.COMMENT_KEYWORD):
return "RegExp(%s)" % symbol
return symbol
......
......@@ -186,7 +186,7 @@ class HistoryRecord:
COLGROUP = '<colgroup>\n<col style="width:2%"/><col style="width:2%"/><col style="width:75"/>' \
'<col style="width:6%"/><col style="width:15%"/>\n</colgroup>\n'
HTML_LEAD_IN = (
HTML_LEAD_IN = (
'<html>\n<head>\n<meta charset="utf-8"/>\n<style>\n'
'td.line, td.column {font-family:monospace;color:darkgrey}\n'
'td.stack{font-family:monospace}\n'
......@@ -221,6 +221,7 @@ class HistoryRecord:
"""
return self.Snapshot(self.line_col[0], self.line_col[1],
self.stack, self.status, self.excerpt)
def as_csv_line(self) -> str:
"""
Returns history record formatted as a csv table row.
......@@ -249,7 +250,7 @@ class HistoryRecord:
stack = stack[:i] + '<span class="matchstack">' + stack[i:]
else:
stack = stack[:i] + '<span class="matchstack">' + stack[i:k] \
+ '</span>' + stack[k:]
+ '</span>' + stack[k:]
elif status == self.FAIL:
status = '<span class="fail">' + status + '</span>'
else:
......@@ -273,7 +274,8 @@ class HistoryRecord:
@property
def status(self) -> str:
return self.FAIL if self.node is None else \
('"%s"' % self.err_msg()) if self.node.error_flag else self.MATCH # has_errors(self.node._errors)
('"%s"' % self.err_msg()) if self.node.error_flag else self.MATCH
# has_errors(self.node._errors)
@property
def excerpt(self):
......
......@@ -185,7 +185,7 @@ def add_parser_guard(parser_func):
# Mind that meomized parser calls will not appear in the history record!
if grammar.history_tracking__:
# don't track returning parsers except in case an error has occurred
remaining = len(rest)
# remaining = len(rest)
if grammar.moving_forward__ or (node and node.error_flag): # node._errors
record = HistoryRecord(grammar.call_stack__, node, text)
grammar.history__.append(record)
......@@ -685,7 +685,7 @@ class Grammar:
if parser.name:
# prevent overwriting instance variables or parsers of a different class
assert parser.name not in self.__dict__ or \
isinstance(self.__dict__[parser.name], parser.__class__), \
isinstance(self.__dict__[parser.name], parser.__class__), \
('Cannot add parser "%s" because a field with the same name '
'already exists in grammar object!' % parser.name)
setattr(self, parser.name, parser)
......@@ -773,8 +773,8 @@ class Grammar:
stitches.append(Node(None, rest))
result = Node(None, tuple(stitches)).init_pos(0)
if any(self.variables__.values()):
error_str = "Capture-retrieve-stack not empty after end of parsing: " + \
str(self.variables__)
error_str = "Capture-retrieve-stack not empty after end of parsing: " \
+ str(self.variables__)
if result:
if result.children:
# add another child node at the end to ensure that the position
......@@ -1324,7 +1324,8 @@ class Series(NaryOperator):
# Provide useful error messages
match = text.search(Series.RX_ARGUMENT)
i = max(1, text.index(match.regs[1][0])) if match else 1
node = Node(self, text_[:i]).init_pos(self.grammar.document_length__ - len(text_))
node = Node(self, text_[:i]).init_pos(self.grammar.document_length__
- len(text_))
node.add_error('%s expected; "%s"... found!'
% (parser.repr, text_[:10].replace('\n', '\\n ')),
code=Error.MANDATORY_CONTINUATION)
......@@ -1361,19 +1362,19 @@ class Series(NaryOperator):
def __add__(self, other: Parser) -> 'Series':
other_parsers = cast('Series', other).parsers if isinstance(other, Series) \
else cast(Tuple[Parser, ...], (other,)) # type: Tuple[Parser, ...]
else cast(Tuple[Parser, ...], (other,)) # type: Tuple[Parser, ...]
return Series(*(self.parsers + other_parsers),
mandatory=self.combined_mandatory(self, other))
def __radd__(self, other: Parser) -> 'Series':
other_parsers = cast('Series', other).parsers if isinstance(other, Series) \
else cast(Tuple[Parser, ...], (other,)) # type: Tuple[Parser, ...]
else cast(Tuple[Parser, ...], (other,)) # type: Tuple[Parser, ...]
return Series(*(other_parsers + self.parsers),
mandatory=self.combined_mandatory(other, self))
def __iadd__(self, other: Parser) -> 'Series':
other_parsers = cast('Series', other).parsers if isinstance(other, Series) \
else cast(Tuple[Parser, ...], (other,)) # type: Tuple[Parser, ...]
else cast(Tuple[Parser, ...], (other,)) # type: Tuple[Parser, ...]
self.parsers += other_parsers
self.mandatory = self.combined_mandatory(self, other)
return self
......@@ -1429,17 +1430,17 @@ class Alternative(NaryOperator):
def __or__(self, other: Parser) -> 'Alternative':
other_parsers = cast('Alternative', other).parsers if isinstance(other, Alternative) \
else cast(Tuple[Parser, ...], (other,)) # type: Tuple[Parser, ...]
else cast(Tuple[Parser, ...], (other,)) # type: Tuple[Parser, ...]
return Alternative(*(self.parsers + other_parsers))
def __ror__(self, other: Parser) -> 'Alternative':
other_parsers = cast('Alternative', other).parsers if isinstance(other, Alternative) \
else cast(Tuple[Parser, ...], (other,)) # type: Tuple[Parser, ...]
else cast(Tuple[Parser, ...], (other,)) # type: Tuple[Parser, ...]
return Alternative(*(other_parsers + self.parsers))
def __ior__(self, other: Parser) -> 'Alternative':
other_parsers = cast('Alternative', other).parsers if isinstance(other, Alternative) \
else cast(Tuple[Parser, ...], (other,)) # type: Tuple[Parser, ...]
else cast(Tuple[Parser, ...], (other,)) # type: Tuple[Parser, ...]
self.parsers += other_parsers
return self
......
......@@ -255,10 +255,10 @@ class StringView(collections.abc.Sized):
k = 0
i = self.find(sep, k)
while i >= 0:
pieces.append(self.text[self.begin + k : self.begin + i])
pieces.append(self.text[self.begin + k: self.begin + i])
k = i + l
i = self.find(sep, k)
pieces.append(self.text[self.begin + k : self.end])
pieces.append(self.text[self.begin + k: self.end])
return pieces
def replace(self, old, new):
......
......@@ -155,7 +155,8 @@ def unit_from_file(filename):
for parser_name, tests in test_unit.items():
m_names = set(tests.get('match', dict()).keys())
f_names = set(tests.get('fail', dict()).keys())
intersection = list(m_names & f_names); intersection.sort()
intersection = list(m_names & f_names)
intersection.sort()
if intersection:
errors.append("Same names %s assigned to match and fail test "
"of parser %s." % (str(intersection), parser_name))
......@@ -212,10 +213,9 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
Unit tests for a grammar-parser and ast transformations.
"""
if isinstance(test_unit, str):
unit_dir, unit_name = os.path.split(os.path.splitext(test_unit)[0])
_, unit_name = os.path.split(os.path.splitext(test_unit)[0])
test_unit = unit_from_file(test_unit)
else:
unit_dir = ""
unit_name = str(id(test_unit))
if verbose:
print("\nUnit: " + unit_name)
......
......@@ -17,7 +17,7 @@ permissions and limitations under the License.
Module ``toolkit`` contains utility functions and cross-sectional
functionality like logging support that is needed across several
functionality like logging support that is needed across several
of the the other DHParser-Modules.
For logging functionality, the global variable LOGGING is defined which
......@@ -104,8 +104,8 @@ def lstrip_docstring(docstring: str) -> str:
def is_filename(strg: str) -> bool:
"""Tries to guess whether string ``s`` is a file name."""
return strg.find('\n') < 0 and strg[:1] != " " and strg[-1:] != " " \
and all(strg.find(ch) < 0 for ch in '*?"<>|')
# and strg.find('*') < 0 and strg.find('?') < 0
and all(strg.find(ch) < 0 for ch in '*?"<>|')
# and strg.find('*') < 0 and strg.find('?') < 0
#######################################################################
......@@ -275,7 +275,7 @@ def expand_table(compact_table: Dict) -> Dict:
>>> expand_table({"a, b": 1, ('d','e','f'):5, "c":3})
{'a': 1, 'b': 1, 'd': 5, 'e': 5, 'f': 5, 'c': 3}
"""
expanded_table = {}
expanded_table = {} # type: Dict
keys = list(compact_table.keys())
for key in keys:
value = compact_table[key]
......
......@@ -251,9 +251,9 @@ def traverse(root_node: Node,
try:
sequence = cache[key]
except KeyError:
sequence = table.get('+', []) + \
table.get(key, table.get('*', [])) + \
table.get('~', [])
sequence = table.get('+', []) \
+ table.get(key, table.get('*', [])) \
+ table.get('~', [])
# '+' always called (before any other processing function)
# '*' called for those nodes for which no (other) processing function
# appears in the table
......@@ -359,7 +359,7 @@ def is_token(context: List[Node], tokens: AbstractSet[str] = frozenset()) -> boo
i, k = 0, len(nd.children)
while i < len(nd.children) and nd.children[i].parser.ptype == WHITESPACE_PTYPE:
i += 1
while k > 0 and nd.children[k-1].parser.ptype == WHITESPACE_PTYPE:
while k > 0 and nd.children[k - 1].parser.ptype == WHITESPACE_PTYPE:
k -= 1
return "".join(child.content for child in node.children[i:k])
return nd.content
......@@ -727,11 +727,14 @@ def remove_children_if(context: List[Node], condition: Callable):
# # node.result = tuple(selection)
remove_whitespace = remove_children_if(is_whitespace) # partial(remove_children_if, condition=is_whitespace)
remove_whitespace = remove_children_if(is_whitespace)
# partial(remove_children_if, condition=is_whitespace)
remove_empty = remove_children_if(is_empty)
remove_anonymous_empty = remove_children_if(lambda ctx: is_empty(ctx) and is_anonymous(ctx))
remove_expendables = remove_children_if(is_expendable) # partial(remove_children_if, condition=is_expendable)
remove_anonymous_expendables = remove_children_if(lambda ctx: is_anonymous(ctx) and is_expendable(ctx))
remove_expendables = remove_children_if(is_expendable)
# partial(remove_children_if, condition=is_expendable)
remove_anonymous_expendables = remove_children_if(lambda ctx: is_anonymous(ctx)
and is_expendable(ctx))
remove_first = apply_if(keep_children(slice(1, None)), lambda ctx: len(ctx[-1].children) > 1)
remove_last = apply_if(keep_children(slice(None, -1)), lambda ctx: len(ctx[-1].children) > 1)
remove_brackets = apply_if(keep_children(slice(1, -1)), lambda ctx: len(ctx[-1].children) >= 2)
......
DHParser Reference Manual
=========================
This reference manual explains the technology used by DHParser. It is intended for people who
would like to extend or contribute to DHParser. The reference manual does not explain how a
Domain Specific Language (DSL) is developed (see the User's Manual for that). It it explains the
technical approach that DHParser employs for parsing, abstract syntax tree transformation and
compilation of a given DSL. And it describes the module and class structure of the DHParser
Software. The programming guide requires a working knowledge of Python programming and a basic
understanding or common parser technology from the reader. Also, it is recommended to
read the introduction and the user's guide first.
Fundamentals
------------
DHParser is a parser generator aimed at but not restricted to the creation of domain specific
languages in the Digital Humanities (DH), hence the name "DHParser". In the Digital Humanities,
DSLs allow to enter annotated texts or data in a human friendly and readable form with a
Text-Editor. In contrast to the prevailing XML-approach, the DSL-approach distinguishes between
a human-friendly *editing data format* and a maschine friendly *working data format* which can
be XML but does not need to be. Therefore, the DSL-approach requires an additional step to reach
the *working data format*, that is, the compilation of the annotated text or data written in the
DSL (editing data format) to the working data format. In the following a text or data file
wirtten in a DSL will simply be called *document*. The editing data format will also be called *source format* and the working data format be denoted as *target format*.
Compiling a document specified in a domain specific language involves the following steps:
1. **Parsing** the document which results in a representation of the document as a concrete
syntax tree.
2. **Transforming** the concrete syntax tree (CST) into an abstract syntax tree (AST), i.e. a
streamlined and simplified syntax tree ready for compilation.
3 **Compiling** the abstract syntax tree into the working data format.
All of these steps a carried out be the computer without any user intervention, i.e. without the
need of humans to rewrite or enrich the data during any these steps. A DSL-compiler therefore
consists of three components which are applied in sequence, a *parser*, a *transformer* and a
*compiler*. Creating, i.e. programming these components is the task of compiler construction.
The creation of all of these components is supported by DHParser, albeit to a different degree:
1. *Creating a parser*: DHParser fully automizes parser generation. Once the syntax of the DSL
is formally specified, it can be compiled into a python class that is able to parse any
document written in the DSL. DHParser uses Parsing-Expression-Grammars in a variant of the
Extended-Backus-Naur-Form (EBNF) for the specification of the syntax. (See
`examples/EBNF/EBNF.ebnf` for an example.)
2. *Specifying the AST-transformations*: DHParser supports the AST-transformation with a
depth-first tree traversal algorithm (see `DHParser.transform.traverse` ) and a number of
stock transformation functions which can also be combined. Most of the AST-transformation is
specified in a declarative manner by filling in a transformation-dictionary which associates
the node-types of the concrete syntax tree with such combinations of transformations. See
`DHParser.ebnf.EBNF_AST_transformation_table` as an example.
3. *Filling in the compiler class skeleton*: Compiler generation cannot be automated like parser
generation. It is supported by DHParser merely by generating a skeleton of a compiler class
with a method-stub for each definition (or "production" as the definition are sometimes also
called) of the EBNF-specification. (See `examples/EBNF/EBNFCompiler.py`) If the target format
is XML, there is a chance that the XML can simply be generated by serializing the abstract
syntax tree as XML without the need of a dedicated compilation step.
Compiler Creation Workflow
--------------------------
TODO: Describe:
- setting up a new projekt
- invoking the DSL Compiler
- conventions and data type
- the flat namespace of DH Parser
Component Guide
---------------
### Parser
Parser-creation if supported by DHParser by an EBNF to Python compiler which yields a working
python class that parses any document the EBNF-specified DSL to a tree of Node-objects, which
are instances of the `class Node` defined in `DHParser/snytaxtree.py`
The EBNF to Python compiler is actually a DSL-compiler that has been crafted with DHParser
itself. It is located in `DHParser/enbf.py`. The formal specification of the EBNF variant
used by DHParser can be found in `examples/EBNF/EBNF.ebnf`. Comparing the automatically
generated `examples/EBNF/EBNFCompiler.py` with `DHParser/ebnf.py` can give you an idea what
additional work is needed to create a DSL-compiler from an autogenerated DSL-parser. In most
DH-projects this task will be less complex, however, as the target format is XML which
usually can be derived from the abstract syntax tree with fewer steps than the Python code in
the case of DHParser's EBNF to Python compiler.
### AST-Transformation
Other than for the compiler generation (see the next point below), a functional rather than
object-oriented approach has been employed, because it allows for a more concise
specification of the AST-transformation since typically the same combination of
transformations can be used for several node types of the AST. It would therefore be tedious
to fill in a method for each of these. In a sense, the specification of AST-transformation
constitutes an "internal DSL" realized with the means of the Python language itself.
### Compiler
Module Structure of DHParser
----------------------------
Class Hierarchy of DHParser
---------------------------
\ No newline at end of file
DHParser User's Guide
=====================
This user's guide explains how to use create, test and employ a domain
specific language with DHParser for encoding text or data in a Digital
Humanities Project.
Introduction
------------
Most Digital Humanities projects or least most text-centered DH projects
involve in some way or other the entering and encoding of annotated text or
data into a computer. And the systems that scientists use for that purpose
consist of an input surface (or "redactation system") for entering the data, a
storage system to keep the data and a presentation system for providing the
data and possibly also functionality for working with the data to human or
machine receipients. A typical example of this type of system is Berlin'
Ediarum-System, which consists of an XML-Editor for entering data, an
XML-Database for storing the data and a web application for providing the data
to human readers or other web services via an application programming
interface (API). Ediarum is also typical, because like many DH-projects it
assumes an XML-based workflow.
......@@ -214,11 +214,13 @@ DEU_KLEIN = /(?!--)[a-zäöüßęõ_\-.]+/~
LAT_WORT = /(?!--)[a-z|\-_.]+/~
GROSSSCHRIFT = /(?!--)[A-ZÄÖÜ_\-]+/~
ZAHL = /[\d]+/~
SEITENZAHL = /[\d]+(?:\^(?:(?:\{[\d\w.,!? ]+\})|[\d\w.]+))?/~ # Zahl mit optionale folgendem hochgestelltem Buchstaben oder Text
SEITENZAHL = /[\d]+(?:\^(?:(?:\{[\d\w.,!? ]+\})|[\d\w.]+))?/~
# Zahl mit optionale folgendem hochgestelltem Buchstaben oder Text
ROEMISCHE_ZAHL = /(?=[MDCLXVI])M*(C[MD]|D?C*)(X[CL]|L?X*)(I[XV]|V?I*)(?=[^\w])/~
SCHLUESSELWORT = { //~ /\n/ }+ !ROEMISCHE_ZAHL /[A-ZÄÖÜ]{3,}\s+/
SATZZEICHEN = /(?!->)(?:(?:,(?!,))|(?:;(?!;))|(?::(?!:))|(?:-(?!-))|[.()\[\]]+)|[`''‘’?]/~ # div. Satzzeichen, aber keine doppelten ,, ;; oder ::
SATZZEICHEN = /(?!->)(?:(?:,(?!,))|(?:;(?!;))|(?::(?!:))|(?:-(?!-))|[.()\[\]]+)|[`''‘’?]/~
# div. Satzzeichen, aber keine doppelten ,, ;; oder ::
TEIL_SATZZEICHEN = /(?!->)(?:(?:,(?!,))|(?:-(?!-))|[.()]+)|[`''‘’?]/~ # Satzeichen bis auf Doppelpunkt ":", Semikolon ";" und eckige Klammern
BUCHSTABENFOLGE = /\w+/~
......@@ -243,7 +245,8 @@ RZS = /\s*?\n|$/ # Rückwärtiger Zeilensprung oder T
ZEILENSPRUNG = /[ \t]*\n/~
KOMMENTARZEILEN = { /[ \t]*\n?[ \t]*/ COMMENT__ } # echte Kommentarzeilen
KATEGORIENZEILE = /[^:\n]+[:][ \t]*\n/ # Kategorienzeilen enthalten genau einen Doppelpunkt am Ende der Zeile
KATEGORIENZEILE = /[^:\n]+[:][ \t]*\n/
# Kategorienzeilen enthalten genau einen Doppelpunkt am Ende der Zeile
FORTSETZUNG = !(ZWW /[^:\n]+[:]/)
DATEI_ENDE = !/./
......
......@@ -268,11 +268,13 @@ class MLWGrammar(Grammar):
LAT_WORT = /(?!--)[a-z|\-_.]+/~
GROSSSCHRIFT = /(?!--)[A-ZÄÖÜ_\-]+/~
ZAHL = /[\d]+/~
SEITENZAHL = /[\d]+(?:\^(?:(?:\{[\d\w.,!? ]+\})|[\d\w.]+))?/~ # Zahl mit optionale folgendem hochgestelltem Buchstaben oder Text
SEITENZAHL = /[\d]+(?:\^(?:(?:\{[\d\w.,!? ]+\})|[\d\w.]+))?/~
# Zahl mit optionale folgendem hochgestelltem Buchstaben oder Text
ROEMISCHE_ZAHL = /(?=[MDCLXVI])M*(C[MD]|D?C*)(X[CL]|L?X*)(I[XV]|V?I*)(?=[^\w])/~
SCHLUESSELWORT = { //~ /\n/ }+ !ROEMISCHE_ZAHL /[A-ZÄÖÜ]{3,}\s+/
SATZZEICHEN = /(?!->)(?:(?:,(?!,))|(?:;(?!;))|(?::(?!:))|(?:-(?!-))|[.()\[\]]+)|[`''‘’?]/~ # div. Satzzeichen, aber keine doppelten ,, ;; oder ::
SATZZEICHEN = /(?!->)(?:(?:,(?!,))|(?:;(?!;))|(?::(?!:))|(?:-(?!-))|[.()\[\]]+)|[`''‘’?]/~
# div. Satzzeichen, aber keine doppelten ,, ;; oder ::
TEIL_SATZZEICHEN = /(?!->)(?:(?:,(?!,))|(?:-(?!-))|[.()]+)|[`''‘’?]/~ # Satzeichen bis auf Doppelpunkt ":", Semikolon ";" und eckige Klammern
BUCHSTABENFOLGE = /\w+/~
......@@ -297,7 +299,8 @@ class MLWGrammar(Grammar):
ZEILENSPRUNG = /[ \t]*\n/~
KOMMENTARZEILEN = { /[ \t]*\n?[ \t]*/ COMMENT__ } # echte Kommentarzeilen
KATEGORIENZEILE = /[^:\n]+[:][ \t]*\n/ # Kategorienzeilen enthalten genau einen Doppelpunkt am Ende der Zeile
KATEGORIENZEILE = /[^:\n]+[:][ \t]*\n/
# Kategorienzeilen enthalten genau einen Doppelpunkt am Ende der Zeile
FORTSETZUNG = !(ZWW /[^:\n]+[:]/)
DATEI_ENDE = !/./
......@@ -318,7 +321,7 @@ class MLWGrammar(Grammar):
flexion = Forward()
genus = Forward()
wortart = Forward()
source_hash__ = "17e7d9c6b771eb2fa259912b687f8677"
source_hash__ = "96bc2c3d1c350e563d9cb484394fc5e2"
parser_initialization__ = "upon instantiation"
COMMENT__ = r'(?:\/\/.*)|(?:\/\*(?:.|\n)*?\*\/)'
WHITESPACE__ = r'[\t ]*'
......
......@@ -9,6 +9,4 @@ This directory contains the components for two kinds of DSLs for the
for the "Mittellateinisches Wörterbuch". The DSL texts will be converted
to an XML data model.
2. MLW_RETRO: A grammar for the retrodigitalisation of existing dictionary
entries in ASCII-Format. This is very experimental...
Visual Studio Code Unterstütztung
=================================
Zum installieren der VSCode-Unterstützung müssen die
Unterverzeichnisse `mlwcolors` und `mlwquelle` in das
Konfigurationsverzeichnis von VSCode ("~/.vscode") kopiert oder von
dort verlinkt werden. Die `tasks.json`-Datei gehört in das
`.vscode`-Verzeichnis unterhalb des Hauptverzeichnisses des Projekts,
also z.B. `MLW/.vscode`. Sie wird dann ausgewertet und die darin
angegebenen Aufgaben in Visual Studio Code verfügbar gemacht, wenn das
Hauptverzeichnis in VSCode (im Beispiel `MLW`) geöffnet wird.
MLW Language Server
===================
Das wird mal ein language server für den MLW-Dialekt.
Gegenüber bloßer Syntaxhervorhebung und externem Werkzeugaufruf
sollte dadurch eine tiefere Integration und bessere Unterstützung
von Visual Studio Code möglich werden.
Das Grundgerüst wurde von: https://github.com/sourcegraph/python-langserver/
übernommen.
Starten mit:
python python-langserver.py --mode=tcp --addr=2087