Commit 756e677e authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

ebnf - full documentation started

parent 878cf515
......@@ -40,6 +40,69 @@ However, here we will show how to compile an EBNF-specified grammar
from within Python-code and how to execute the parser that was
generated by compiling the grammar.
As an example, we will realize a json-parser (https://www.json.org/).
Let's start with creating some test-data::
>>> testobj = {'list': [1,2,"string"], 'int': 3, 'bool': False}
>>> import json
>>> testdata = json.dumps(testobj)
>>> testdata
'{"list": [1, 2, "string"], "int": 3, "bool": false}'
We define the json-Grammar (see https://www.json.org/) in
top-down manner in EBNF. We'll use a regular-expression look-alike
syntax. EBNF, as you may recall, consists of a sequence of symbol
definitions. The definiens of those definitions either is a string
literal or regular expression or other symbols or a combination
of these with four different operators: 1. sequences
2. alternatives 3. options and 4. repetitions. Here is how these
elements are denoted in classical and regex-like EBNF-syntax
..table::
======================== ============== ==============
element classical EBNF regex-like
======================== ============== ==============
insignificant whitespace ~ ~
string literal "..." or `...` "..." or `...`
regular expr. /.../ /.../
sequences A B C A B C
alternatives A | B | C A | B | C
options [ ... ] ...?
repetions { ... } ...*
one or more ...+
grouping (...) (...)
======================== ============== ==============
"insignificant whitespace" is a speciality of DHParser. Denoting
insignificant whitespace with a particular sign `~` allows to eliminate
it already during the parsing process without burdening later
syntax-tree-processing stages with this common task. DHParser offers
several more facilities to restrain the verbosity of the concrete
syntax tree, so that the outcome of the parsing stage comes close (or
at least closer) to the intended abstract-syntax-tree, already.
JSON consists of two complex data types, 1) associative arrays,
called "object" and sequences of heterogeneous data, called array; and
of four simple data types, 1) string, 2) number, 3) bool and 4) null.
The structure of a JSON file can easily be described in EBNF::
>>> grammar = 'json = ~ _element _EOF \\n'\
' _EOF = /$/ \\n'\
'_element = object | array | string | number | bool | null \\n'\
'object = "{" ~ member ( "," ~ §member )* §"}" ~ \\n'\
'member = string §":" ~ _element \\n'\
'array = "[" ~ ( _element ( "," ~ _element )* )? §"]" ~ \\n'\
'string = `"` §_CHARS `"` ~ \\n'\
' _CHARS = /[^"\\]+/ | /\\[\/bnrt\\]/ \\n'\
'number = _INT _FRAC? _EXP? ~ \\n'\
' _INT = `-` /[1-9][0-9]+/ | /[0-9]/ \\n'\
' _FRAC = `.` /[0-9]+/ \\n'\
' _EXP = (`E`|`e`) [`+`|`-`] /[0-9]+/ \\n'\
'bool = "true" ~ | "false" ~ \\n'\
'null = "null" ~ \\n'
Let's try this on our test-string
"""
......@@ -63,7 +126,7 @@ from DHParser.parse import Parser, Grammar, mixin_comment, mixin_nonempty, Forwa
INFINITE, matching_bracket, ParseFunc, update_scanner
from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node, RootNode, WHITESPACE_PTYPE, TOKEN_PTYPE
from DHParser.toolkit import load_if_file, escape_re, escape_control_characters, md5, \
from DHParser.toolkit import load_if_file, escape_re, escape_ctrl_chars, md5, \
sane_parser_name, re, expand_table, unrepr, compile_python_object, DHPARSER_PARENTDIR, \
RX_NEVER_MATCH, cython
from DHParser.transform import TransformationFunc, traverse, remove_brackets, \
......@@ -1981,10 +2044,10 @@ class EBNFCompiler(Compiler):
def on_directive(self, node: Node) -> str:
for child in node.children:
if child.tag_name == "literal":
child.result = escape_control_characters(child.content)
child.result = escape_ctrl_chars(child.content)
elif child.tag_name == "literals":
self.join_literals(child)
child.result = escape_control_characters(child.content)
child.result = escape_ctrl_chars(child.content)
key = node.children[0].content
assert key not in self.directives.tokens
......@@ -2504,16 +2567,16 @@ class EBNFCompiler(Compiler):
return symbol
def TEXT_PARSER(self, text):
if DROP_STRINGS in self.directives.drop and self.context[-2].tag_name != "definition":
return 'Drop(Text(' + text + '))'
return 'Text(' + text + ')'
def drop_on(self, category):
return category in self.directives.drop and self.context[-2].tag_name != "definition"
def REGEXP_PARSER(self, regexp):
if DROP_REGEXP in self.directives.drop and self.context[-2].tag_name != "definition":
return 'Drop(RegExp(' + regexp + '))'
return 'RegExp(' + regexp + ')'
def TEXT_PARSER(self, text, drop):
return 'Drop(Text(' + text + '))' if drop else 'Text(' + text + ')'
def REGEXP_PARSER(self, regexp, drop):
return 'Drop(RegExp(' + regexp + '))' if drop else 'RegExp(' + regexp + ')'
def WSPC_PARSER(self, force_drop=False):
......@@ -2530,7 +2593,7 @@ class EBNFCompiler(Compiler):
def on_literal(self, node: Node) -> str:
center = self.TEXT_PARSER(escape_control_characters(node.content))
center = self.TEXT_PARSER(escape_ctrl_chars(node.content), self.drop_on(DROP_STRINGS))
force = DROP_STRINGS in self.directives.drop
left = self.WSPC_PARSER(force) if 'left' in self.directives.literalws else ''
right = self.WSPC_PARSER(force) if 'right' in self.directives.literalws else ''
......@@ -2540,13 +2603,13 @@ class EBNFCompiler(Compiler):
def on_plaintext(self, node: Node) -> str:
tk = escape_control_characters(node.content)
tk = escape_ctrl_chars(node.content)
rpl = '"' if tk.find('"') < 0 else "'" if tk.find("'") < 0 else ''
if rpl:
tk = rpl + tk[1:-1] + rpl
else:
tk = rpl + tk.replace('"', '\\"')[1:-1] + rpl
return self.TEXT_PARSER(tk)
return self.TEXT_PARSER(tk, self.drop_on(DROP_STRINGS))
def on_regexp(self, node: Node) -> str:
......@@ -2560,7 +2623,7 @@ class EBNFCompiler(Compiler):
% (EBNFCompiler.AST_ERROR, str(error), trace, node.as_sxpr())
self.tree.new_error(node, errmsg)
return '"' + errmsg + '"'
return self.REGEXP_PARSER(arg)
return self.REGEXP_PARSER(arg, self.drop_on(DROP_REGEXP))
def on_char_range(self, node) -> str:
......
......@@ -62,7 +62,7 @@ from DHParser.configuration import access_presets, finalize_presets, get_config_
from DHParser.error import Error
from DHParser.stringview import StringView
from DHParser.syntaxtree import Node, FrozenNode, ZOMBIE_TAG, EMPTY_PTYPE
from DHParser.toolkit import escape_control_characters, abbreviate_middle, cython
from DHParser.toolkit import escape_ctrl_chars, abbreviate_middle, cython
__all__ = ('CallItem',
'start_logging',
......@@ -443,7 +443,7 @@ class HistoryRecord:
else:
s = self.text
excerpt = s[:36] + ' ...' if len(s) > 36 else s
excerpt = escape_control_characters(str(excerpt))
excerpt = escape_ctrl_chars(str(excerpt))
return excerpt
# @property
......
......@@ -49,7 +49,7 @@ from DHParser.preprocess import BEGIN_TOKEN, END_TOKEN, RX_TOKEN_NAME
from DHParser.stringview import StringView, EMPTY_STRING_VIEW
from DHParser.syntaxtree import ChildrenType, Node, RootNode, WHITESPACE_PTYPE, \
TOKEN_PTYPE, ZOMBIE_TAG, EMPTY_NODE, ResultType
from DHParser.toolkit import sane_parser_name, escape_control_characters, re, cython, \
from DHParser.toolkit import sane_parser_name, escape_ctrl_chars, re, cython, \
abbreviate_middle, RX_NEVER_MATCH, RxPatternType, linebreaks, line_col, identity
......@@ -1841,7 +1841,7 @@ class RegExp(Parser):
return 'whitespace__'
except (AttributeError, NameError):
pass
return '/' + escape_control_characters('%s' % abbreviate_middle(pattern, 118))\
return '/' + escape_ctrl_chars('%s' % abbreviate_middle(pattern, 118))\
.replace('/', '\\/') + '/'
......
......@@ -81,7 +81,7 @@ __all__ = ('typing',
'RxPatternType',
're_find',
'escape_re',
'escape_control_characters',
'escape_ctrl_chars',
'is_filename',
'relative_path',
'concurrent_ident',
......@@ -391,7 +391,7 @@ def escape_re(strg: str) -> str:
return re.escape(strg)
def escape_control_characters(strg: str) -> str:
def escape_ctrl_chars(strg: str) -> str:
r"""
Replace all control characters (e.g. `\n` `\t`) in a string
by their back-slashed representation and replaces backslash by
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment