05.11., 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit 92bd6575 authored by eckhart's avatar eckhart

slight adjustments

parent cc08caee
......@@ -15,7 +15,8 @@ testdata/*.pdf
*.old
DEBUG*
LOGS
**/REPORT
**/REPORT/*
REPORT
external_resources/
tmp/*
test/tmp*
......
......@@ -53,15 +53,7 @@ def get_preprocessor() -> PreprocessorFunc:
#######################################################################
class ArithmeticGrammar(Grammar):
r"""Parser for an Arithmetic source file, with this grammar:
expression = term { ("+" | "-") term}
term = factor { ("*"|"/") factor}
factor = constant | variable | "(" expression ")"
variable = "x" | "y" | "z"
constant = digit {digit}
digit = "0" | "1" | "..." | "9"
test = digit constant variable
r"""Parser for an Arithmetic source file.
"""
constant = Forward()
digit = Forward()
......@@ -69,6 +61,7 @@ class ArithmeticGrammar(Grammar):
variable = Forward()
source_hash__ = "c4e6e090ef9673b972ba18ef39fe7c8e"
parser_initialization__ = "upon instantiation"
resume_rules__ = {}
COMMENT__ = r''
WHITESPACE__ = r'\s*'
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
......
......@@ -29,7 +29,7 @@ from DHParser import is_filename, load_if_file, \
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \
is_empty, is_expendable, collapse, replace_content, remove_nodes, remove_content, remove_brackets, replace_parser, \
keep_children, is_one_of, has_content, apply_if, remove_first, remove_last, \
WHITESPACE_PTYPE, TOKEN_PTYPE
WHITESPACE_PTYPE, TOKEN_PTYPE, GLOBALS
from DHParser.transform import TransformationFunc
from DHParser.log import logging
......@@ -54,65 +54,15 @@ def get_preprocessor() -> PreprocessorFunc:
#######################################################################
class BibTeXGrammar(Grammar):
r"""Parser for a BibTeX source file, with this grammar:
# BibTeX-Grammar
#######################################################################
#
# EBNF-Directives
#
######################################################################
@ whitespace = /\s*/
@ ignorecase = True
@ comment = /%.*(?:\n|$)/
#######################################################################
#
# Bib-file Structure and Components
#
#######################################################################
bibliography = { preamble | comment | entry }
preamble = "@Preamble{" /"/ pre_code /"/~ §"}"
pre_code = { /[^"%]+/ | /%.*\n/ }
comment = "@Comment{" text §"}"
entry = /@/ type "{" key { "," field §"=" content } §"}"
type = WORD
key = NO_BLANK_STRING
field = WORD_
content = "{" text "}" | plain_content
plain_content = COMMA_TERMINATED_STRING
text = { CONTENT_STRING | "{" text "}" }
#######################################################################
#
# Regular Expressions
#
#######################################################################
WORD = /\w+/
WORD_ = /\w+/~
NO_BLANK_STRING = /[^ \t\n,%]+/~
COMMA_TERMINATED_STRING = { /[^,%]+/ | /(?=%)/~ }
CONTENT_STRING = { /[^{}%]+/ | /(?=%)/~ }+
r"""Parser for a BibTeX source file.
"""
text = Forward()
source_hash__ = "534895885bfdddb19785f5d943b356a7"
source_hash__ = "569bee4a051ea4d9f625ad9bbd46a7a2"
parser_initialization__ = "upon instantiation"
resume_rules__ = {}
COMMENT__ = r'(?i)%.*(?:\n|$)'
WHITESPACE__ = r'\s*'
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = ''
wspR__ = WSP_RE__
wsp__ = Whitespace(WSP_RE__)
CONTENT_STRING = OneOrMore(Alternative(RegExp('(?i)[^{}%]+'), Series(RegExp('(?i)(?=%)'), wsp__)))
COMMA_TERMINATED_STRING = ZeroOrMore(Alternative(RegExp('(?i)[^,%]+'), Series(RegExp('(?i)(?=%)'), wsp__)))
......@@ -133,12 +83,11 @@ class BibTeXGrammar(Grammar):
root__ = bibliography
def get_grammar() -> BibTeXGrammar:
global thread_local_BibTeX_grammar_singleton
try:
grammar = thread_local_BibTeX_grammar_singleton
except NameError:
thread_local_BibTeX_grammar_singleton = BibTeXGrammar()
grammar = thread_local_BibTeX_grammar_singleton
grammar = GLOBALS.BibTeX_1_grammar_singleton
except AttributeError:
GLOBALS.BibTeX_1_grammar_singleton = BibTeXGrammar()
grammar = GLOBALS.BibTeX_1_grammar_singleton
return grammar
......
......@@ -11,19 +11,13 @@ Match-test "simple"
{Edward N. Zalta}
### AST
(content
(:Token
"{"
)
(text
(CONTENT_STRING
"Edward N. Zalta"
)
)
(:Token
"}"
)
)
<content>
<:Token>{</:Token>
<text>
<CONTENT_STRING>Edward N. Zalta</CONTENT_STRING>
</text>
<:Token>}</:Token>
</content>
Match-test "nested_braces"
--------------------------
......@@ -32,29 +26,17 @@ Match-test "nested_braces"
{\url{https://plato.stanford.edu/archives/fall2013/entries/thomas-kuhn/}}
### AST
(content
(:Token
"{"
)
(text
(CONTENT_STRING
"\url"
)
(:Series
(:Token
"{"
)
(text
(CONTENT_STRING
"https://plato.stanford.edu/archives/fall2013/entries/thomas-kuhn/"
)
)
(:Token
"}"
)
)
)
(:Token
"}"
)
)
\ No newline at end of file
<content>
<:Token>{</:Token>
<text>
<CONTENT_STRING>\url</CONTENT_STRING>
<:Series>
<:Token>{</:Token>
<text>
<CONTENT_STRING>https://plato.stanford.edu/archives/fall2013/entries/thomas-kuhn/</CONTENT_STRING>
</text>
<:Token>}</:Token>
</:Series>
</text>
<:Token>}</:Token>
</content>
\ No newline at end of file
......@@ -57,9 +57,9 @@ class EBNFGrammar(Grammar):
r"""Parser for an EBNF source file.
"""
expression = Forward()
list_ = Forward()
source_hash__ = "8a91723fddb6b9ab6dbdb69ac5263492"
source_hash__ = "82a7c668f86b83f86515078e6c9093ed"
parser_initialization__ = "upon instantiation"
resume_rules__ = {}
COMMENT__ = r'#.*(?:\n|$)'
WHITESPACE__ = r'\s*'
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
......@@ -68,8 +68,7 @@ class EBNFGrammar(Grammar):
whitespace = Series(RegExp('~'), wsp__)
regexp = Series(RegExp('/(?:\\\\/|[^/])*?/'), wsp__)
plaintext = Series(RegExp('`(?:[^"]|\\\\")*?`'), wsp__)
literal = Alternative(Series(RegExp('"(?:[^"]|\\\\")*?"'), wsp__),
Series(RegExp("'(?:[^']|\\\\')*?'"), wsp__))
literal = Alternative(Series(RegExp('"(?:[^"]|\\\\")*?"'), wsp__), Series(RegExp("'(?:[^']|\\\\')*?'"), wsp__))
symbol = Series(RegExp('(?!\\d)\\w+'), wsp__)
option = Series(Series(Token("["), wsp__), expression, Series(Token("]"), wsp__), mandatory=1)
repetition = Series(Series(Token("{"), wsp__), expression, Series(Token("}"), wsp__), mandatory=1)
......@@ -77,19 +76,12 @@ class EBNFGrammar(Grammar):
unordered = Series(Series(Token("<"), wsp__), expression, Series(Token(">"), wsp__), mandatory=1)
group = Series(Series(Token("("), wsp__), expression, Series(Token(")"), wsp__), mandatory=1)
retrieveop = Alternative(Series(Token("::"), wsp__), Series(Token(":"), wsp__))
flowmarker = Alternative(Series(Token("!"), wsp__), Series(Token("&"), wsp__),
Series(Token("-!"), wsp__), Series(Token("-&"), wsp__))
factor = Alternative(Series(Option(flowmarker), Option(retrieveop), symbol,
NegativeLookahead(Series(Token("="), wsp__))), Series(Option(flowmarker), literal),
Series(Option(flowmarker), plaintext), Series(Option(flowmarker), regexp),
Series(Option(flowmarker), whitespace), Series(Option(flowmarker), oneormore),
Series(Option(flowmarker), group), Series(Option(flowmarker), unordered), repetition, option)
flowmarker = Alternative(Series(Token("!"), wsp__), Series(Token("&"), wsp__), Series(Token("-!"), wsp__), Series(Token("-&"), wsp__))
factor = Alternative(Series(Option(flowmarker), Option(retrieveop), symbol, NegativeLookahead(Series(Token("="), wsp__))), Series(Option(flowmarker), literal), Series(Option(flowmarker), plaintext), Series(Option(flowmarker), regexp), Series(Option(flowmarker), whitespace), Series(Option(flowmarker), oneormore), Series(Option(flowmarker), group), Series(Option(flowmarker), unordered), repetition, option)
term = OneOrMore(Series(Option(Series(Token("§"), wsp__)), factor))
expression.set(Series(term, ZeroOrMore(Series(Series(Token("|"), wsp__), term))))
directive = Series(Series(Token("@"), wsp__), symbol, Series(Token("="), wsp__), Alternative(regexp, literal, symbol), ZeroOrMore(Series(Series(Token(","), wsp__), Alternative(regexp, literal, symbol))), mandatory=1)
definition = Series(symbol, Series(Token("="), wsp__), expression, mandatory=1)
directive = Series(Series(Token("@"), wsp__), symbol, Series(Token("="), wsp__), list_, mandatory=1)
list_.set(Series(Alternative(regexp, literal, symbol),
ZeroOrMore(Series(Series(Token(","), wsp__), Alternative(regexp, literal, symbol)))))
syntax = Series(Option(Series(wsp__, RegExp(''))), ZeroOrMore(Alternative(definition, directive)), EOF, mandatory=2)
root__ = syntax
......@@ -115,7 +107,7 @@ EBNF_AST_transformation_table = {
"syntax":
[], # otherwise '"*": replace_by_single_child' would be applied
"directive, definition":
remove_tokens('@', '='),
remove_tokens('@', '=', ','),
"expression":
[replace_by_single_child, flatten, remove_tokens('|')], # remove_infix_operator],
"term":
......@@ -134,8 +126,6 @@ EBNF_AST_transformation_table = {
reduce_single_child,
(TOKEN_PTYPE, WHITESPACE_PTYPE):
reduce_single_child,
"list_":
[flatten, remove_infix_operator],
"*":
replace_by_single_child
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment