10.12., 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit b1656cf6 authored by Eckhart Arnold's avatar Eckhart Arnold

- Token nodes and Whitespace nodes are now identified by Parser.ptype rather than by special names

parent cf262cfd
......@@ -79,7 +79,7 @@ from DHParser.parsers import GrammarBase, CompilerBase, nil_scanner, \\
from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, \\
remove_children_if, reduce_single_child, replace_by_single_child, remove_whitespace, \\
no_operation, remove_expendables, remove_tokens, flatten, is_whitespace, is_expendable, \\
WHITESPACE_KEYWORD, TOKEN_KEYWORD
WHITESPACE_PTYPE, TOKEN_PTYPE
'''
......
......@@ -29,8 +29,8 @@ from .parsers import GrammarBase, mixin_comment, nil_scanner, Forward, RE, Negat
Alternative, Sequence, Optional, Required, OneOrMore, ZeroOrMore, Token, CompilerBase, \
Capture, Retrieve
from .syntaxtree import Node, traverse, remove_enclosing_delimiters, reduce_single_child, \
replace_by_single_child, TOKEN_KEYWORD, remove_expendables, remove_tokens, flatten, \
forbid, assert_content, WHITESPACE_KEYWORD, key_parser_name, key_tag_name
replace_by_single_child, TOKEN_PTYPE, remove_expendables, remove_tokens, flatten, \
forbid, assert_content, WHITESPACE_PTYPE, key_parser_name, key_tag_name
from .versionnumber import __version__
......@@ -207,7 +207,7 @@ EBNF_transformation_table = {
[reduce_single_child, remove_enclosing_delimiters],
"symbol, literal, regexp":
[remove_expendables, reduce_single_child],
(TOKEN_KEYWORD, WHITESPACE_KEYWORD):
(TOKEN_PTYPE, WHITESPACE_PTYPE):
[remove_expendables, reduce_single_child],
"list_":
[flatten, partial(remove_tokens, tokens={','})],
......@@ -225,7 +225,7 @@ EBNF_validation_table = {
def EBNFTransformer(syntax_tree):
for processing_table, key_func in [(EBNF_transformation_table, key_parser_name),
for processing_table, key_func in [(EBNF_transformation_table, key_tag_name),
(EBNF_validation_table, key_tag_name)]:
traverse(syntax_tree, processing_table, key_func)
......@@ -290,7 +290,8 @@ class EBNFCompiler(CompilerBase):
in EBNF-Notation.
"""
COMMENT_KEYWORD = "COMMENT__"
RESERVED_SYMBOLS = {TOKEN_KEYWORD, WHITESPACE_KEYWORD, COMMENT_KEYWORD}
WHITESPACE_KEYWORD = "WSP__"
RESERVED_SYMBOLS = {WHITESPACE_KEYWORD, COMMENT_KEYWORD}
AST_ERROR = "Badly structured syntax tree. " \
"Potentially due to erroneuos AST transformation."
PREFIX_TABLE = {'§': 'Required',
......@@ -377,11 +378,11 @@ class EBNFCompiler(CompilerBase):
definitions[i] = (definitions[i][0], 'Capture(%s)' % definitions[1])
self.definition_names = [defn[0] for defn in definitions]
definitions.append(('wspR__', WHITESPACE_KEYWORD
definitions.append(('wspR__', self.WHITESPACE_KEYWORD
if 'right' in self.directives['literalws'] else "''"))
definitions.append(('wspL__', WHITESPACE_KEYWORD
definitions.append(('wspL__', self.WHITESPACE_KEYWORD
if 'left' in self.directives['literalws'] else "''"))
definitions.append((WHITESPACE_KEYWORD,
definitions.append((self.WHITESPACE_KEYWORD,
("mixin_comment(whitespace="
"r'{whitespace}', comment=r'{comment}')").
format(**self.directives)))
......@@ -623,13 +624,13 @@ class EBNFCompiler(CompilerBase):
name = []
if rx[:2] == '~/':
if not 'left' in self.directives['literalws']:
name = ['wL=' + WHITESPACE_KEYWORD] + name
name = ['wL=' + self.WHITESPACE_KEYWORD] + name
rx = rx[1:]
elif 'left' in self.directives['literalws']:
name = ["wL=''"] + name
if rx[-2:] == '/~':
if 'right' not in self.directives['literalws']:
name = ['wR=' + WHITESPACE_KEYWORD] + name
name = ['wR=' + self.WHITESPACE_KEYWORD] + name
rx = rx[:-1]
elif 'right' in self.directives['literalws']:
name = ["wR=''"] + name
......
This diff is collapsed.
......@@ -30,8 +30,8 @@ from typing import NamedTuple
from .toolkit import is_logging, log_dir, expand_table, line_col, smart_list
__all__ = ['WHITESPACE_KEYWORD',
'TOKEN_KEYWORD',
__all__ = ['WHITESPACE_PTYPE',
'TOKEN_PTYPE',
'ZOMBIE_PARSER',
'Error',
'Node',
......@@ -70,8 +70,9 @@ class MockParser:
object substitute is needed, chose the singleton ZOMBIE_PARSER.
"""
def __init__(self, name='', ptype='', pbases=frozenset()):
assert not ptype or ptype[0] == ':'
self.name = name
self.ptype = ptype or self.__class__.__name__
self.ptype = ptype or ':' + self.__class__.__name__
# self.pbases = pbases or {cls.__name__ for cls in inspect.getmro(self.__class__)}
def __str__(self):
......@@ -469,7 +470,7 @@ def mock_syntax_tree(sexpr):
lines.append(sexpr[:m.end()])
sexpr = sexpr[m.end():]
result = "\n".join(lines)
return Node(MockParser(name, class_name), result)
return Node(MockParser(name, ':' + class_name), result)
########################################################################
......@@ -479,8 +480,8 @@ def mock_syntax_tree(sexpr):
########################################################################
WHITESPACE_KEYWORD = 'WSP__'
TOKEN_KEYWORD = 'TOKEN__'
WHITESPACE_PTYPE = ':Whitespace'
TOKEN_PTYPE = ':Token'
def key_parser_name(node):
......@@ -491,7 +492,7 @@ def key_tag_name(node):
return node.tag_name
def traverse(root_node, processing_table, key_func=key_parser_name):
def traverse(root_node, processing_table, key_func=key_tag_name):
"""Traverses the snytax tree starting with the given ``node`` depth
first and applies the sequences of callback functions registered
in the ``calltable``-dictionary.
......@@ -524,7 +525,7 @@ def traverse(root_node, processing_table, key_func=key_parser_name):
traverse_recursive(child)
node.error_flag |= child.error_flag # propagate error flag
sequence = table.get('*', []) + \
table.get(key_func(node), table.get('?', [])) + \
table.get(key_func(node), table.get('', [])) + \
table.get('~', [])
# '*' always called (before any other processing function)
# '?' called for those nodes for which no (other) processing functions is in the table
......@@ -593,7 +594,7 @@ def replace_parser(node, parser_name, parser_type=''):
def is_whitespace(node):
"""Removes whitespace and comments defined with the
``@comment``-directive."""
return node.parser.name == WHITESPACE_KEYWORD
return node.parser.ptype == WHITESPACE_PTYPE
def is_empty(node):
......@@ -605,7 +606,7 @@ def is_expendable(node):
def is_token(node, token_set=frozenset()):
return node.parser.name == TOKEN_KEYWORD and (not token_set or node.result in token_set)
return node.parser.ptype == TOKEN_PTYPE and (not token_set or node.result in token_set)
def remove_children_if(node, condition):
......
......@@ -43,6 +43,7 @@ except ImportError:
__all__ = ['logging',
'is_logging',
'log_dir',
'logfile_basename',
'line_col',
'error_messages',
'escape_re',
......
......@@ -26,7 +26,7 @@ import sys
sys.path.extend(['../', './'])
from DHParser.toolkit import is_logging
from DHParser.parsers import compile_source, Retrieve, WHITESPACE_KEYWORD, nil_scanner
from DHParser.parsers import compile_source, Retrieve, WHITESPACE_PTYPE, nil_scanner
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, EBNFTransformer, get_ebnf_compiler
from DHParser.dsl import compileEBNF, compileDSL, parser_factory
......@@ -104,7 +104,7 @@ class TestEBNFParser:
result = self.EBNF(snippet, 'literal')
assert not result.error_flag
assert str(result) == snippet
assert result.find(lambda node: str(node) == WHITESPACE_KEYWORD)
assert result.find(lambda node: node.parser.ptype == WHITESPACE_PTYPE)
result = self.EBNF(' "literal"', 'literal')
assert result.error_flag # literals catch following, but not leading whitespace
......
......@@ -26,7 +26,7 @@ sys.path.extend(['../', './'])
from DHParser import parsers
from DHParser.toolkit import is_logging, compile_python_object
from DHParser.syntaxtree import no_operation, traverse, remove_expendables, \
replace_by_single_child, reduce_single_child, flatten, TOKEN_KEYWORD
replace_by_single_child, reduce_single_child, flatten, TOKEN_PTYPE
from DHParser.parsers import compile_source
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
from DHParser.dsl import parser_factory, DHPARSER_IMPORTS
......@@ -47,7 +47,7 @@ ARITHMETIC_EBNF_transformation_table = {
"formula": [remove_expendables],
"term, expr": [replace_by_single_child, flatten],
"factor": [remove_expendables, reduce_single_child],
(TOKEN_KEYWORD): [remove_expendables, reduce_single_child],
(TOKEN_PTYPE): [remove_expendables, reduce_single_child],
"": [remove_expendables, replace_by_single_child]
}
......@@ -74,9 +74,9 @@ class TestGrammarTest:
3: "20 / 4 * 3"
},
"ast": {
1: "(term (factor 4) (TOKEN__ *) (factor 5))",
2: "(term (factor 20) (TOKEN__ /) (factor 4))",
3: "(term (term (factor 20) (TOKEN__ /) (factor 4)) (TOKEN__ *) (factor 3))"
1: "(term (factor 4) (:Token *) (factor 5))",
2: "(term (factor 20) (:Token /) (factor 4))",
3: "(term (term (factor 20) (:Token /) (factor 4)) (:Token *) (factor 3))"
},
"fail": {
4: "4 + 5",
......@@ -93,9 +93,9 @@ class TestGrammarTest:
3: "20 / 4 * 3"
},
"ast": {
1: "(term (factor 4) (TOKEN__ *) (factor 5))",
2: "(term (factor 20) (TOKEN__ /) (factor 4))",
3: "(term (term (factor 19) (TOKEN__ /) (factor 4)) (TOKEN__ *) (factor 3))" # error 19 != 20
1: "(term (factor 4) (:Token *) (factor 5))",
2: "(term (factor 20) (:Token /) (factor 4))",
3: "(term (term (factor 19) (:Token /) (factor 4)) (:Token *) (factor 3))" # error 19 != 20
},
"fail": {
4: "4 * 5", # error: this should match
......@@ -108,7 +108,7 @@ class TestGrammarTest:
parser_fac = parser_factory(ARITHMETIC_EBNF)
trans_fac = lambda : ARITHMETIC_EBNFTransform
errata = parsers.test_grammar(self.cases, parser_fac, trans_fac)
assert not errata
assert not errata, str(errata)
errata = parsers.test_grammar(self.failure_cases, parser_fac, trans_fac)
# for e in errata:
# print(e)
......
......@@ -23,9 +23,9 @@ import copy
import sys
sys.path.extend(['../', './'])
from DHParser.toolkit import compact_sexpr
from DHParser.toolkit import compact_sexpr, logging
from DHParser.syntaxtree import traverse, mock_syntax_tree, reduce_single_child, \
replace_by_single_child, flatten, remove_expendables, TOKEN_KEYWORD
replace_by_single_child, flatten, remove_expendables, TOKEN_PTYPE
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
from DHParser.dsl import parser_factory
......@@ -71,7 +71,7 @@ class TestSExpr:
tree = mock_syntax_tree(sexpr)
assert tree.tag_name == 'a'
assert tree.result[0].tag_name == 'b'
assert tree.result[1].tag_name == 'class3'
assert tree.result[1].tag_name == ':class3'
assert tree.result[2].tag_name == 'c'
class TestNode:
......@@ -106,12 +106,12 @@ class TestNode:
ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
att = {"term": [replace_by_single_child, flatten],
"factor": [remove_expendables, reduce_single_child],
(TOKEN_KEYWORD): [remove_expendables, reduce_single_child],
"": [remove_expendables, replace_by_single_child]}
(TOKEN_PTYPE): [remove_expendables, reduce_single_child],
"?": [remove_expendables, replace_by_single_child]}
parser = parser_factory(ebnf)()
tree = parser("20 / 4 * 3")
traverse(tree, att)
compare_tree = mock_syntax_tree("(term (term (factor 20) (TOKEN__ /) (factor 4)) (TOKEN__ *) (factor 3))")
compare_tree = mock_syntax_tree("(term (term (factor 20) (:Token /) (factor 4)) (:Token *) (factor 3))")
assert tree == compare_tree
def test_copy(self):
......@@ -128,7 +128,9 @@ class TestNode:
parser = get_ebnf_grammar()
transform = get_ebnf_transformer()
compiler = get_ebnf_compiler()
tree = parser(ebnf)
with logging():
tree = parser(ebnf)
parser.log_parsing_history()
tree_copy = copy.deepcopy(tree)
transform(tree_copy)
res1 = compiler(tree_copy)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment