Commit acfce849 authored by eckhart's avatar eckhart

- DHParser/parse.py: bugfix MetaParser.return_values() empty nodes will now be...

- DHParser/parse.py: bugfix MetaParser.return_values() empty nodes will now be properly deleted (unit test added)
parent ac44f00c
......@@ -1272,7 +1272,7 @@ class MetaParser(Parser):
it will be dropped and only its result will be kept.
In all other cases or if the optimization is turned off by
setting `grammar.flatten_tree__` to False, a new node will be
generated and the descendant node will be its gingle child.
generated and the descendant node will be its single child.
"""
assert node is None or isinstance(node, Node)
if self.grammar.flatten_tree__:
......@@ -1298,7 +1298,7 @@ class MetaParser(Parser):
for child in results:
if child.children and child.tag_name[0] == ':': # faster than c.is_anonymous():
nr.extend(child.children)
else:
elif child._result or child.tag_name[0] != ':':
nr.append(child)
return Node(self.tag_name, tuple(nr))
return Node(self.tag_name, results) # unoptimized code
......
......@@ -879,12 +879,11 @@ class RootNode(Node):
"""
Adds an Error object to the tree, locating it at a specific node.
"""
assert not isinstance(node, FrozenNode)
assert node.pos == error.pos
assert node.pos == error.pos or isinstance(node, FrozenNode)
self.errors.append(error)
self.error_flag = max(self.error_flag, error.code)
self.error_nodes.setdefault(id(node), []).append(error)
self.error_positions.setdefault(node.pos, set()).add(id(node))
self.error_positions.setdefault(error.pos, set()).add(id(node))
return self
def new_error(self,
......@@ -904,13 +903,13 @@ class RootNode(Node):
def get_errors(self, node: Node) -> List[Error]:
"""
Returns the List of errors that occured on the node or any child node
Returns the List of errors that occurred on the node or any child node
at the same position that has already been removed from the tree,
for example, because it was an anonymous empty child node.
"""
node_id = id(node) # type: int
errors = [] # type: List[Error]
for nid in self.error_positions[node.pos]:
for nid in self.error_positions.get(node.pos, frozenset()):
if nid == node_id:
errors.extend(self.error_nodes[nid])
else:
......
......@@ -152,7 +152,7 @@ def unit_from_config(config_str):
section_match = RX_SECTION.match(cfg, pos)
if pos != len(cfg) and not re.match(r'\s+$', cfg[pos:]):
raise SyntaxError('in line %i' % (cfg[:pos].count('\n') + 1))
raise SyntaxError('in line %i' % (cfg[:pos].count('\n') + 2))
return unit
......
......@@ -9,7 +9,7 @@
@ whitespace = /\s*/
@ ignorecase = True
@ comment = /%.*(?:\n|$)/
@ comment = //
#######################################################################
......@@ -25,10 +25,10 @@ pre_code = { /[^"%]+/ | /%.*\n/ }
comment = "@Comment{" text §"}"
entry = /@/ type "{" key { "," field §"=" content } §"}"
entry = /@/ type "{" key { "," field §"=" content } [","] §"}"
type = WORD
key = NO_BLANK_STRING
field = WORD_
field = WORD
content = "{" text "}" | plain_content
plain_content = COMMA_TERMINATED_STRING
......@@ -41,8 +41,9 @@ text = { CONTENT_STRING | "{" text "}" }
#
#######################################################################
WORD = /\w+/
WORD_ = /\w+/~
WORD = /\w+/~
NO_BLANK_STRING = /[^ \t\n,%]+/~
COMMA_TERMINATED_STRING = { /[^,%]+/ | /(?=%)/~ }
CONTENT_STRING = { /[^{}%]+/ | /(?=%)/~ }+
EOF = !/./
\ No newline at end of file
......@@ -57,25 +57,25 @@ class BibTeXGrammar(Grammar):
r"""Parser for a BibTeX source file.
"""
text = Forward()
source_hash__ = "f0e945d8b504317cdfb6e08fd2fcf596"
source_hash__ = "d9a1a1b431a3185dab127be165a37719"
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r'(?i)%.*(?:\n|$)'
COMMENT__ = r'(?i)'
WHITESPACE__ = r'\s*'
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wsp__ = Whitespace(WSP_RE__)
EOF = NegativeLookahead(RegExp('(?i).'))
CONTENT_STRING = OneOrMore(Alternative(RegExp('(?i)[^{}%]+'), Series(RegExp('(?i)(?=%)'), wsp__)))
COMMA_TERMINATED_STRING = ZeroOrMore(Alternative(RegExp('(?i)[^,%]+'), Series(RegExp('(?i)(?=%)'), wsp__)))
NO_BLANK_STRING = Series(RegExp('(?i)[^ \\t\\n,%]+'), wsp__)
WORD_ = Series(RegExp('(?i)\\w+'), wsp__)
WORD = RegExp('(?i)\\w+')
WORD = Series(RegExp('(?i)\\w+'), wsp__)
text.set(ZeroOrMore(Alternative(CONTENT_STRING, Series(Series(Token("{"), wsp__), text, Series(Token("}"), wsp__)))))
plain_content = Synonym(COMMA_TERMINATED_STRING)
content = Alternative(Series(Series(Token("{"), wsp__), text, Series(Token("}"), wsp__)), plain_content)
field = Synonym(WORD_)
field = Synonym(WORD)
key = Synonym(NO_BLANK_STRING)
type = Synonym(WORD)
entry = Series(RegExp('(?i)@'), type, Series(Token("{"), wsp__), key, ZeroOrMore(Series(Series(Token(","), wsp__), field, Series(Token("="), wsp__), content, mandatory=2)), Series(Token("}"), wsp__), mandatory=5)
entry = Series(RegExp('(?i)@'), type, Series(Token("{"), wsp__), key, ZeroOrMore(Series(Series(Token(","), wsp__), field, Series(Token("="), wsp__), content, mandatory=2)), Option(Series(Token(","), wsp__)), Series(Token("}"), wsp__), mandatory=6)
comment = Series(Series(Token("@Comment{"), wsp__), text, Series(Token("}"), wsp__), mandatory=2)
pre_code = ZeroOrMore(Alternative(RegExp('(?i)[^"%]+'), RegExp('(?i)%.*\\n')))
preamble = Series(Series(Token("@Preamble{"), wsp__), RegExp('(?i)"'), pre_code, RegExp('(?i)"'), wsp__, Series(Token("}"), wsp__), mandatory=5)
......
......@@ -3,3 +3,14 @@
simple : {Edward N. Zalta}
nested_braces : {\url{https://plato.stanford.edu/archives/fall2013/entries/thomas-kuhn/}}
[match:entry]
entry: """@Online{wikipedia-duhem-quine,
editor = {Wikipedia},
title = {Duhem-Quine thesis},
year = {2017},
date = {2017-08-19},
url = {https://en.wikipedia.org/w/index.php?title=Duhem\%E2\%80\%93Quine\_thesis\&oldid=772834991},
organization = {Wikipedia}
}"""
......@@ -37,4 +37,169 @@ Match-test "nested_braces"
<:Token>}</:Token>
</text>
<:Token>}</:Token>
</content>
\ No newline at end of file
</content>
Test of parser: "entry"
=======================
Match-test "entry"
------------------
### Test-code:
@Online{wikipedia-duhem-quine,
editor = {Wikipedia},
title = {Duhem-Quine thesis},
year = {2017},
date = {2017-08-19},
url = {https://en.wikipedia.org/w/index.php?title=Duhem\%E2\%80\%93Quine\_thesis\&oldid=772834991},
organization = {Wikipedia}
}
### Error:
Match test "entry" for parser "entry" failed:
Expr.: @Online{wikipedia-duhem-quine,
editor = {Wikipedia},
title = {Duhem-Quine thesis},
year = {2017},
date = {2017-08-19},
url = {https://en.wikipedia.org/w/index.php?title=Duhem\%E2\%80\%93Quine\_thesis\&oldid=772834991},
organization = {Wikipedia}
}
6:68: Error (1000): DSL parser specification error: Infinite Loop encountered. Caught by parser "CONTENT_STRING = {/(?i)[^{}%]+/ | /(?i)(?=%)/ ~}+".
Call stack: entry->:ZeroOrMore->:Series->content->:Series->text->:Alternative->CONTENT_STRING->:Alternative->:Series->:Whitespace
6:68: Error (1000): DSL parser specification error: Infinite Loop encountered. Caught by parser "CONTENT_STRING = {/(?i)[^{}%]+/ | /(?i)(?=%)/ ~}+".
Call stack: entry->:ZeroOrMore->:Series->content->:Series->text->:Alternative->CONTENT_STRING->:Alternative->:Series->:Whitespace
6:68: Error (1000): DSL parser specification error: Infinite Loop encountered. Caught by parser "text = {CONTENT_STRING | '{' ~ text '}' ~}".
Call stack: entry->:ZeroOrMore->:Series->content->:Series->text->:Alternative->CONTENT_STRING
6:68: Error (1000): DSL parser specification error: Infinite Loop encountered. Caught by parser "COMMA_TERMINATED_STRING = {/(?i)[^,%]+/ | /(?i)(?=%)/ ~}".
Call stack: entry->:ZeroOrMore->:Series->content->plain_content->COMMA_TERMINATED_STRING->:Alternative
6:68: Error (1010): '}' ~ expected, "%E2\%80\%9" found!
6:69: Error (1040): Parser stopped before end! trying to recover but stopping history recording at this point.
7:1: Error (1020): Parser did not match!
Most advanced: 7, 1: ; MATCH; "E2\%80\%93Quine\_the..."
Last match: 7, 1: ; MATCH; "E2\%80\%93Quine\_the...";
### AST
<__ZOMBIE__>
<entry>
<:RegExp>@</:RegExp>
<type>
<WORD>Online</WORD>
</type>
<:Token>{</:Token>
<key>
<NO_BLANK_STRING>wikipedia-duhem-quine</NO_BLANK_STRING>
</key>
<:Token>,</:Token>
<:Whitespace>
</:Whitespace>
<field>
<WORD>
<:RegExp>editor</:RegExp>
<:Whitespace> </:Whitespace>
</WORD>
</field>
<:Token>=</:Token>
<:Whitespace> </:Whitespace>
<content>
<:Token>{</:Token>
<text>
<CONTENT_STRING>Wikipedia</CONTENT_STRING>
</text>
<:Token>}</:Token>
</content>
<:Token>,</:Token>
<:Whitespace>
</:Whitespace>
<field>
<WORD>
<:RegExp>title</:RegExp>
<:Whitespace> </:Whitespace>
</WORD>
</field>
<:Token>=</:Token>
<:Whitespace> </:Whitespace>
<content>
<:Token>{</:Token>
<text>
<CONTENT_STRING>Duhem-Quine thesis</CONTENT_STRING>
</text>
<:Token>}</:Token>
</content>
<:Token>,</:Token>
<:Whitespace>
</:Whitespace>
<field>
<WORD>
<:RegExp>year</:RegExp>
<:Whitespace> </:Whitespace>
</WORD>
</field>
<:Token>=</:Token>
<:Whitespace> </:Whitespace>
<content>
<:Token>{</:Token>
<text>
<CONTENT_STRING>2017</CONTENT_STRING>
</text>
<:Token>}</:Token>
</content>
<:Token>,</:Token>
<:Whitespace>
</:Whitespace>
<field>
<WORD>
<:RegExp>date</:RegExp>
<:Whitespace> </:Whitespace>
</WORD>
</field>
<:Token>=</:Token>
<:Whitespace> </:Whitespace>
<content>
<:Token>{</:Token>
<text>
<CONTENT_STRING>2017-08-19</CONTENT_STRING>
</text>
<:Token>}</:Token>
</content>
<:Token>,</:Token>
<:Whitespace>
</:Whitespace>
<field>
<WORD>
<:RegExp>url</:RegExp>
<:Whitespace> </:Whitespace>
</WORD>
</field>
<:Token>=</:Token>
<:Whitespace> </:Whitespace>
<plain_content>
<COMMA_TERMINATED_STRING>{https://en.wikipedia.org/w/index.php?title=Duhem\</COMMA_TERMINATED_STRING>
</plain_content>
<__ZOMBIE__>%</__ZOMBIE__>
</entry>
<__ZOMBIE__>
E2\%80\%93Quine\_thesis\&amp;oldid=772834991},
</__ZOMBIE__>
<__ZOMBIE__>
organization = {Wikipedia}
</__ZOMBIE__>
<__ZOMBIE__>}</__ZOMBIE__>
</__ZOMBIE__>
\ No newline at end of file
......@@ -145,7 +145,7 @@ Experiment? A Case-Study from Chemisty},
editor = {Edward N. Zalta},
edition = {Fall 2013},
publisher = {Metaphysics Research Lab, Stanford University},
howpublished = {\url{https://plato.stanford.edu/archives/fall2013/entries/thomas-kuhn/}},
howpublished = {\url{https://plato.stanford.edu/archives/fall2013/entries/thomas-kuhn/}}
}
@InBook{carusi-et-al:2013,
......@@ -154,7 +154,7 @@ Experiment? A Case-Study from Chemisty},
booktitle = {Computer Simulations and the Changing Face of Scientific Experimentation},
year = {2013},
editor = {Eckhart Arnold and Juan Duran},
chapter = {6},
chapter = {6}
}
@Article{cownden-et-al:2017,
......@@ -168,7 +168,7 @@ Experiment? A Case-Study from Chemisty},
issn = {1090-5138},
doi = {http://dx.doi.org/10.1016/j.evolhumbehav.2017.01.004},
url = {http://www.sciencedirect.com/science/article/pii/S1090513816301398},
keywords = {Altruism, Cultural evolution, Evolutionary game theory, Genetic evolution, Human cooperation, Phenotypic gambit},
keywords = {Altruism, Cultural evolution, Evolutionary game theory, Genetic evolution, Human cooperation, Phenotypic gambit}
}
@Book{dugatkin:1997,
......@@ -187,7 +187,7 @@ Experiment? A Case-Study from Chemisty},
year = {1994},
number = {2},
pages = {374-381},
url = {http://cogprints.org/342/1/IMPERIA.htm},
url = {http://cogprints.org/342/1/IMPERIA.htm}
}
@Book{feyerabend:1975,
......@@ -471,7 +471,7 @@ Experiment? A Case-Study from Chemisty},
year = {2017},
date = {2017-08-19},
url = {https://en.wikipedia.org/w/index.php?title=Duhem\%E2\%80\%93Quine\_thesis\&oldid=772834991},
organization = {Wikipedia},
organization = {Wikipedia}
}
@Article{winsberg:2003,
......
......@@ -773,52 +773,63 @@ class TestEarlyTokenWhitespaceDrop:
class TestMetaParser:
def test_meta_parser(self):
def setup(self):
self.mp = MetaParser()
self.mp.grammar = Grammar() # override placeholder warning
self.mp.pname = "named"
self.mp.tag_name = self.mp.pname
def test_return_value(self):
save = get_config_value('flatten_tree_while_parsing')
set_config_value('flatten_tree_while_parsing', True)
mp = MetaParser()
mp.grammar = Grammar() # override placeholder warning
mp.pname = "named"
mp.tag_name = mp.pname
nd = mp._return_value(Node('tagged', 'non-empty'))
nd = self.mp._return_value(Node('tagged', 'non-empty'))
assert nd.tag_name == 'named', nd.as_sxpr()
assert len(nd.children) == 1
assert nd.children[0].tag_name == 'tagged'
assert nd.children[0].result == "non-empty"
nd = mp._return_value(Node('tagged', ''))
nd = self.mp._return_value(Node('tagged', ''))
assert nd.tag_name == 'named', nd.as_sxpr()
assert len(nd.children) == 1
assert nd.children[0].tag_name == 'tagged'
assert not nd.children[0].result
nd = mp._return_value(Node(':anonymous', 'content'))
nd = self.mp._return_value(Node(':anonymous', 'content'))
assert nd.tag_name == 'named', nd.as_sxpr()
assert not nd.children
assert nd.result == 'content'
nd = mp._return_value(Node(':anonymous', ''))
nd = self.mp._return_value(Node(':anonymous', ''))
assert nd.tag_name == 'named', nd.as_sxpr()
assert not nd.children
assert not nd.content
mp.pname = ''
mp.tag_name = ':unnamed'
nd = mp._return_value(Node('tagged', 'non-empty'))
nd = self.mp._return_value(EMPTY_NODE)
assert nd.tag_name == 'named' and not nd.children, nd.as_sxpr()
self.mp.pname = ''
self.mp.tag_name = ':unnamed'
nd = self.mp._return_value(Node('tagged', 'non-empty'))
assert nd.tag_name == 'tagged', nd.as_sxpr()
assert len(nd.children) == 0
assert nd.content == 'non-empty'
nd = mp._return_value(Node('tagged', ''))
nd = self.mp._return_value(Node('tagged', ''))
assert nd.tag_name == 'tagged', nd.as_sxpr()
assert len(nd.children) == 0
assert not nd.content
nd = mp._return_value(Node(':anonymous', 'content'))
nd = self.mp._return_value(Node(':anonymous', 'content'))
assert nd.tag_name == ':anonymous', nd.as_sxpr()
assert not nd.children
assert nd.result == 'content'
nd = mp._return_value(Node('', ''))
nd = self.mp._return_value(Node('', ''))
assert nd.tag_name == '', nd.as_sxpr()
assert not nd.children
assert not nd.content
assert mp._return_value(None) == EMPTY_NODE
assert self.mp._return_value(None) == EMPTY_NODE
assert self.mp._return_value(EMPTY_NODE) == EMPTY_NODE
set_config_value('flatten_tree_while_parsing', save)
def test_return_values(self):
self.mp.pname = "named"
self.mp.tag_name = self.mp.pname
rv = self.mp._return_values((Node('tag', 'content'), EMPTY_NODE))
assert rv[-1].tag_name != EMPTY_NODE.tag_name, rv[-1].tag_name
if __name__ == "__main__":
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment