Commit 92bd6575 authored by eckhart's avatar eckhart
Browse files

slight adjustments

parent cc08caee
...@@ -15,7 +15,8 @@ testdata/*.pdf ...@@ -15,7 +15,8 @@ testdata/*.pdf
*.old *.old
DEBUG* DEBUG*
LOGS LOGS
**/REPORT **/REPORT/*
REPORT
external_resources/ external_resources/
tmp/* tmp/*
test/tmp* test/tmp*
......
...@@ -53,15 +53,7 @@ def get_preprocessor() -> PreprocessorFunc: ...@@ -53,15 +53,7 @@ def get_preprocessor() -> PreprocessorFunc:
####################################################################### #######################################################################
class ArithmeticGrammar(Grammar): class ArithmeticGrammar(Grammar):
r"""Parser for an Arithmetic source file, with this grammar: r"""Parser for an Arithmetic source file.
expression = term { ("+" | "-") term}
term = factor { ("*"|"/") factor}
factor = constant | variable | "(" expression ")"
variable = "x" | "y" | "z"
constant = digit {digit}
digit = "0" | "1" | "..." | "9"
test = digit constant variable
""" """
constant = Forward() constant = Forward()
digit = Forward() digit = Forward()
...@@ -69,6 +61,7 @@ class ArithmeticGrammar(Grammar): ...@@ -69,6 +61,7 @@ class ArithmeticGrammar(Grammar):
variable = Forward() variable = Forward()
source_hash__ = "c4e6e090ef9673b972ba18ef39fe7c8e" source_hash__ = "c4e6e090ef9673b972ba18ef39fe7c8e"
parser_initialization__ = "upon instantiation" parser_initialization__ = "upon instantiation"
resume_rules__ = {}
COMMENT__ = r'' COMMENT__ = r''
WHITESPACE__ = r'\s*' WHITESPACE__ = r'\s*'
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
......
...@@ -29,7 +29,7 @@ from DHParser import is_filename, load_if_file, \ ...@@ -29,7 +29,7 @@ from DHParser import is_filename, load_if_file, \
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \ remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \
is_empty, is_expendable, collapse, replace_content, remove_nodes, remove_content, remove_brackets, replace_parser, \ is_empty, is_expendable, collapse, replace_content, remove_nodes, remove_content, remove_brackets, replace_parser, \
keep_children, is_one_of, has_content, apply_if, remove_first, remove_last, \ keep_children, is_one_of, has_content, apply_if, remove_first, remove_last, \
WHITESPACE_PTYPE, TOKEN_PTYPE WHITESPACE_PTYPE, TOKEN_PTYPE, GLOBALS
from DHParser.transform import TransformationFunc from DHParser.transform import TransformationFunc
from DHParser.log import logging from DHParser.log import logging
...@@ -54,65 +54,15 @@ def get_preprocessor() -> PreprocessorFunc: ...@@ -54,65 +54,15 @@ def get_preprocessor() -> PreprocessorFunc:
####################################################################### #######################################################################
class BibTeXGrammar(Grammar): class BibTeXGrammar(Grammar):
r"""Parser for a BibTeX source file, with this grammar: r"""Parser for a BibTeX source file.
# BibTeX-Grammar
#######################################################################
#
# EBNF-Directives
#
######################################################################
@ whitespace = /\s*/
@ ignorecase = True
@ comment = /%.*(?:\n|$)/
#######################################################################
#
# Bib-file Structure and Components
#
#######################################################################
bibliography = { preamble | comment | entry }
preamble = "@Preamble{" /"/ pre_code /"/~ §"}"
pre_code = { /[^"%]+/ | /%.*\n/ }
comment = "@Comment{" text §"}"
entry = /@/ type "{" key { "," field §"=" content } §"}"
type = WORD
key = NO_BLANK_STRING
field = WORD_
content = "{" text "}" | plain_content
plain_content = COMMA_TERMINATED_STRING
text = { CONTENT_STRING | "{" text "}" }
#######################################################################
#
# Regular Expressions
#
#######################################################################
WORD = /\w+/
WORD_ = /\w+/~
NO_BLANK_STRING = /[^ \t\n,%]+/~
COMMA_TERMINATED_STRING = { /[^,%]+/ | /(?=%)/~ }
CONTENT_STRING = { /[^{}%]+/ | /(?=%)/~ }+
""" """
text = Forward() text = Forward()
source_hash__ = "534895885bfdddb19785f5d943b356a7" source_hash__ = "569bee4a051ea4d9f625ad9bbd46a7a2"
parser_initialization__ = "upon instantiation" parser_initialization__ = "upon instantiation"
resume_rules__ = {}
COMMENT__ = r'(?i)%.*(?:\n|$)' COMMENT__ = r'(?i)%.*(?:\n|$)'
WHITESPACE__ = r'\s*' WHITESPACE__ = r'\s*'
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = ''
wspR__ = WSP_RE__
wsp__ = Whitespace(WSP_RE__) wsp__ = Whitespace(WSP_RE__)
CONTENT_STRING = OneOrMore(Alternative(RegExp('(?i)[^{}%]+'), Series(RegExp('(?i)(?=%)'), wsp__))) CONTENT_STRING = OneOrMore(Alternative(RegExp('(?i)[^{}%]+'), Series(RegExp('(?i)(?=%)'), wsp__)))
COMMA_TERMINATED_STRING = ZeroOrMore(Alternative(RegExp('(?i)[^,%]+'), Series(RegExp('(?i)(?=%)'), wsp__))) COMMA_TERMINATED_STRING = ZeroOrMore(Alternative(RegExp('(?i)[^,%]+'), Series(RegExp('(?i)(?=%)'), wsp__)))
...@@ -133,12 +83,11 @@ class BibTeXGrammar(Grammar): ...@@ -133,12 +83,11 @@ class BibTeXGrammar(Grammar):
root__ = bibliography root__ = bibliography
def get_grammar() -> BibTeXGrammar: def get_grammar() -> BibTeXGrammar:
global thread_local_BibTeX_grammar_singleton
try: try:
grammar = thread_local_BibTeX_grammar_singleton grammar = GLOBALS.BibTeX_1_grammar_singleton
except NameError: except AttributeError:
thread_local_BibTeX_grammar_singleton = BibTeXGrammar() GLOBALS.BibTeX_1_grammar_singleton = BibTeXGrammar()
grammar = thread_local_BibTeX_grammar_singleton grammar = GLOBALS.BibTeX_1_grammar_singleton
return grammar return grammar
......
...@@ -11,19 +11,13 @@ Match-test "simple" ...@@ -11,19 +11,13 @@ Match-test "simple"
{Edward N. Zalta} {Edward N. Zalta}
### AST ### AST
(content <content>
(:Token <:Token>{</:Token>
"{" <text>
) <CONTENT_STRING>Edward N. Zalta</CONTENT_STRING>
(text </text>
(CONTENT_STRING <:Token>}</:Token>
"Edward N. Zalta" </content>
)
)
(:Token
"}"
)
)
Match-test "nested_braces" Match-test "nested_braces"
-------------------------- --------------------------
...@@ -32,29 +26,17 @@ Match-test "nested_braces" ...@@ -32,29 +26,17 @@ Match-test "nested_braces"
{\url{https://plato.stanford.edu/archives/fall2013/entries/thomas-kuhn/}} {\url{https://plato.stanford.edu/archives/fall2013/entries/thomas-kuhn/}}
### AST ### AST
(content <content>
(:Token <:Token>{</:Token>
"{" <text>
) <CONTENT_STRING>\url</CONTENT_STRING>
(text <:Series>
(CONTENT_STRING <:Token>{</:Token>
"\url" <text>
) <CONTENT_STRING>https://plato.stanford.edu/archives/fall2013/entries/thomas-kuhn/</CONTENT_STRING>
(:Series </text>
(:Token <:Token>}</:Token>
"{" </:Series>
) </text>
(text <:Token>}</:Token>
(CONTENT_STRING </content>
"https://plato.stanford.edu/archives/fall2013/entries/thomas-kuhn/" \ No newline at end of file
)
)
(:Token
"}"
)
)
)
(:Token
"}"
)
)
\ No newline at end of file
...@@ -57,9 +57,9 @@ class EBNFGrammar(Grammar): ...@@ -57,9 +57,9 @@ class EBNFGrammar(Grammar):
r"""Parser for an EBNF source file. r"""Parser for an EBNF source file.
""" """
expression = Forward() expression = Forward()
list_ = Forward() source_hash__ = "82a7c668f86b83f86515078e6c9093ed"
source_hash__ = "8a91723fddb6b9ab6dbdb69ac5263492"
parser_initialization__ = "upon instantiation" parser_initialization__ = "upon instantiation"
resume_rules__ = {}
COMMENT__ = r'#.*(?:\n|$)' COMMENT__ = r'#.*(?:\n|$)'
WHITESPACE__ = r'\s*' WHITESPACE__ = r'\s*'
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
...@@ -68,8 +68,7 @@ class EBNFGrammar(Grammar): ...@@ -68,8 +68,7 @@ class EBNFGrammar(Grammar):
whitespace = Series(RegExp('~'), wsp__) whitespace = Series(RegExp('~'), wsp__)
regexp = Series(RegExp('/(?:\\\\/|[^/])*?/'), wsp__) regexp = Series(RegExp('/(?:\\\\/|[^/])*?/'), wsp__)
plaintext = Series(RegExp('`(?:[^"]|\\\\")*?`'), wsp__) plaintext = Series(RegExp('`(?:[^"]|\\\\")*?`'), wsp__)
literal = Alternative(Series(RegExp('"(?:[^"]|\\\\")*?"'), wsp__), literal = Alternative(Series(RegExp('"(?:[^"]|\\\\")*?"'), wsp__), Series(RegExp("'(?:[^']|\\\\')*?'"), wsp__))
Series(RegExp("'(?:[^']|\\\\')*?'"), wsp__))
symbol = Series(RegExp('(?!\\d)\\w+'), wsp__) symbol = Series(RegExp('(?!\\d)\\w+'), wsp__)
option = Series(Series(Token("["), wsp__), expression, Series(Token("]"), wsp__), mandatory=1) option = Series(Series(Token("["), wsp__), expression, Series(Token("]"), wsp__), mandatory=1)
repetition = Series(Series(Token("{"), wsp__), expression, Series(Token("}"), wsp__), mandatory=1) repetition = Series(Series(Token("{"), wsp__), expression, Series(Token("}"), wsp__), mandatory=1)
...@@ -77,19 +76,12 @@ class EBNFGrammar(Grammar): ...@@ -77,19 +76,12 @@ class EBNFGrammar(Grammar):
unordered = Series(Series(Token("<"), wsp__), expression, Series(Token(">"), wsp__), mandatory=1) unordered = Series(Series(Token("<"), wsp__), expression, Series(Token(">"), wsp__), mandatory=1)
group = Series(Series(Token("("), wsp__), expression, Series(Token(")"), wsp__), mandatory=1) group = Series(Series(Token("("), wsp__), expression, Series(Token(")"), wsp__), mandatory=1)
retrieveop = Alternative(Series(Token("::"), wsp__), Series(Token(":"), wsp__)) retrieveop = Alternative(Series(Token("::"), wsp__), Series(Token(":"), wsp__))
flowmarker = Alternative(Series(Token("!"), wsp__), Series(Token("&"), wsp__), flowmarker = Alternative(Series(Token("!"), wsp__), Series(Token("&"), wsp__), Series(Token("-!"), wsp__), Series(Token("-&"), wsp__))
Series(Token("-!"), wsp__), Series(Token("-&"), wsp__)) factor = Alternative(Series(Option(flowmarker), Option(retrieveop), symbol, NegativeLookahead(Series(Token("="), wsp__))), Series(Option(flowmarker), literal), Series(Option(flowmarker), plaintext), Series(Option(flowmarker), regexp), Series(Option(flowmarker), whitespace), Series(Option(flowmarker), oneormore), Series(Option(flowmarker), group), Series(Option(flowmarker), unordered), repetition, option)
factor = Alternative(Series(Option(flowmarker), Option(retrieveop), symbol,
NegativeLookahead(Series(Token("="), wsp__))), Series(Option(flowmarker), literal),
Series(Option(flowmarker), plaintext), Series(Option(flowmarker), regexp),
Series(Option(flowmarker), whitespace), Series(Option(flowmarker), oneormore),
Series(Option(flowmarker), group), Series(Option(flowmarker), unordered), repetition, option)
term = OneOrMore(Series(Option(Series(Token("§"), wsp__)), factor)) term = OneOrMore(Series(Option(Series(Token("§"), wsp__)), factor))
expression.set(Series(term, ZeroOrMore(Series(Series(Token("|"), wsp__), term)))) expression.set(Series(term, ZeroOrMore(Series(Series(Token("|"), wsp__), term))))
directive = Series(Series(Token("@"), wsp__), symbol, Series(Token("="), wsp__), Alternative(regexp, literal, symbol), ZeroOrMore(Series(Series(Token(","), wsp__), Alternative(regexp, literal, symbol))), mandatory=1)
definition = Series(symbol, Series(Token("="), wsp__), expression, mandatory=1) definition = Series(symbol, Series(Token("="), wsp__), expression, mandatory=1)
directive = Series(Series(Token("@"), wsp__), symbol, Series(Token("="), wsp__), list_, mandatory=1)
list_.set(Series(Alternative(regexp, literal, symbol),
ZeroOrMore(Series(Series(Token(","), wsp__), Alternative(regexp, literal, symbol)))))
syntax = Series(Option(Series(wsp__, RegExp(''))), ZeroOrMore(Alternative(definition, directive)), EOF, mandatory=2) syntax = Series(Option(Series(wsp__, RegExp(''))), ZeroOrMore(Alternative(definition, directive)), EOF, mandatory=2)
root__ = syntax root__ = syntax
...@@ -115,7 +107,7 @@ EBNF_AST_transformation_table = { ...@@ -115,7 +107,7 @@ EBNF_AST_transformation_table = {
"syntax": "syntax":
[], # otherwise '"*": replace_by_single_child' would be applied [], # otherwise '"*": replace_by_single_child' would be applied
"directive, definition": "directive, definition":
remove_tokens('@', '='), remove_tokens('@', '=', ','),
"expression": "expression":
[replace_by_single_child, flatten, remove_tokens('|')], # remove_infix_operator], [replace_by_single_child, flatten, remove_tokens('|')], # remove_infix_operator],
"term": "term":
...@@ -134,8 +126,6 @@ EBNF_AST_transformation_table = { ...@@ -134,8 +126,6 @@ EBNF_AST_transformation_table = {
reduce_single_child, reduce_single_child,
(TOKEN_PTYPE, WHITESPACE_PTYPE): (TOKEN_PTYPE, WHITESPACE_PTYPE):
reduce_single_child, reduce_single_child,
"list_":
[flatten, remove_infix_operator],
"*": "*":
replace_by_single_child replace_by_single_child
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment