Source code for problog.parser

"""
problog.parser - Parser for Prolog programs
-------------------------------------------

Efficient low-level parser for Prolog programs.

..
    Part of the ProbLog distribution.

    Copyright 2015 KU Leuven, DTAI Research Group

    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
    You may obtain a copy of the License at

        http://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing, software
    distributed under the License is distributed on an "AS IS" BASIS,
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
"""
from .errors import ParseError as CoreParseError

LINE_COMMENT = "%"
BLOCK_COMMENT_START = "/*"
BLOCK_COMMENT_END = "*/"
NEWLINE = "\n"

WHITESPACE = frozenset("\n\t ")


[docs]class ParseError(CoreParseError): def __init__(self, string, message, location): line, col, text = self._convert_pos(string, location) CoreParseError.__init__(self, message, location=(None, line, col), line=text) def _convert_pos(self, string, location): """Find line number, column number and text of offending line.""" lineno = 1 col = 0 end = 0 stop = False for i, x in enumerate(string): if x == "\n": if stop: break lineno += 1 col = 0 if i == location: stop = True if not stop: col += 1 return lineno, col, string[location - col : i + 1]
[docs]class UnexpectedCharacter(ParseError): def __init__(self, string, position): char = string[position] ParseError.__init__(self, string, "Unexpected character '%s'" % char, position)
[docs]class UnmatchedCharacter(ParseError): def __init__(self, string, position, length=1): char = string[position : position + length] ParseError.__init__(self, string, "Unmatched character '%s'" % char, position)
class Token(object): def __init__( self, string, pos, types=None, end=None, atom=True, functor=False, binop=None, unop=None, special=None, atom_action=None, ): # if end == None : end = pos+len(string) self.string = string self.location = pos self.atom = atom self.binop = binop self.unop = unop self.special = special self.arglist = False self.aggregate = False if atom: self.functor = functor else: self.functor = False self.is_comma_list = False self.atom_action = atom_action def is_special(self, special): return self.special == special def is_atom(self): return self.atom @property def priority(self): prior = 0 if self.binop: prior = self.binop[0] elif self.unop: prior = self.unop[0] return prior def count_options(self): o = 0 if self.atom: o += 1 if self.binop: o += 1 if self.unop: o += 1 if self.functor: o += 1 return o def list_options(self): # pragma: no cover o = "" if self.atom: o += "a" if self.binop: o += "b" if self.unop: o += "u" if self.functor: o += "f" if self.arglist: o += "l" return o def __repr__(self): # pragma: no cover return "'%s' {%s}" % (self.string, self.list_options()) SPECIAL_PAREN_OPEN = 0 SPECIAL_PAREN_CLOSE = 1 SPECIAL_END = 2 SPECIAL_COMMA = 3 SPECIAL_BRACK_OPEN = 4 SPECIAL_BRACK_CLOSE = 5 SPECIAL_VARIABLE = 6 SPECIAL_FLOAT = 7 SPECIAL_INTEGER = 8 SPECIAL_PIPE = 9 SPECIAL_STRING = 10 SPECIAL_ARGLIST = 11 SPECIAL_SHARP_OPEN = 12 SPECIAL_SHARP_CLOSE = 13 SPECIAL_HEX_INTEGER = 14 import re RE_FLOAT = re.compile(r"(0x[0-9a-fA-F]+)|([-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)") def skip_to(s, pos, char): end = s.find(char, pos) if end == -1: return len(s) else: return end + 1 def skip_comment_c(s, pos): end = s.find(BLOCK_COMMENT_END, pos) if end == -1: raise UnmatchedCharacter(s, pos, 2) return end + 2 def skip_comment_line(s, pos): return skip_to(s, pos, NEWLINE) def is_lower(c): return c.islower() # 'a' <= c <= 'z' def is_upper(c): return c.isupper() # 'A' <= c <= 'Z' def is_digit(c): return "0" <= c <= "9" def is_whitespace(c): return c <= " " def is_comment_start(c): return c == "%" or c == "/" class PrologParser(object): def __init__(self, factory): self.factory = factory self.prepare() def _skip(self, s, pos): return None, pos + 1 def _next_paren_open(self, s, pos): try: return s[pos + 1] in ("(", "[") except IndexError: return False def _token_notsupported(self, s, pos): raise UnexpectedCharacter(s, pos) def _token_dquot(self, s, pos): end = s.find('"', pos + 1) while end != -1 and s[end - 1] == "\\": end = s.find('"', end + 1) if end == -1: raise UnmatchedCharacter(s, pos) else: return Token(s[pos : end + 1], pos, special=SPECIAL_STRING), end + 1 def _token_pound(self, s, pos): return ( Token( s[pos], pos, binop=(500, "yfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 1, ) def _token_percent(self, s, pos): return None, skip_comment_line(s, pos) def _token_squot(self, s, pos): end = s.find("'", pos + 1) while end != -1 and s[end - 1] == "\\": end = s.find("'", end + 1) if end == -1: raise UnmatchedCharacter(s, pos) else: return ( Token(s[pos : end + 1], pos, functor=self._next_paren_open(s, end)), end + 1, ) def _token_paren_open(self, s, pos): return Token(s[pos], pos, atom=False, special=SPECIAL_PAREN_OPEN), pos + 1 def _token_paren_close(self, s, pos): return Token(s[pos], pos, atom=False, special=SPECIAL_PAREN_CLOSE), pos + 1 # def _token_sharp_open(self, s, pos): # return Token(s[pos], pos, atom=False, special=SPECIAL_SHARP_OPEN), pos + 1 # # def _token_sharp_close(self, s, pos): # return Token(s[pos], pos, atom=False, special=SPECIAL_SHARP_CLOSE), pos + 1 def _token_asterisk(self, s, pos): if s[pos : pos + 2] == "**": return ( Token( "**", pos, binop=(200, "xfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 2, ) elif s[pos : pos + 3] == "*->": return ( Token( "*->", pos, binop=(200, "xfy", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 3, ) else: return ( Token( "*", pos, binop=(400, "yfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 1, ) def _token_plus(self, s, pos): return ( Token( "+", pos, binop=(500, "yfx", self.factory.build_binop), unop=(200, "fy", self.factory.build_unop), functor=self._next_paren_open(s, pos), ), pos + 1, ) def _token_comma(self, s, pos): return ( Token( ",", pos, binop=(1000, "xfy", self.factory.build_conjunction), atom=False, special=SPECIAL_COMMA, ), pos + 1, ) def _token_min(self, s, pos): if s[pos : pos + 3] == "-->": return ( Token( "-->", pos, binop=(1200, "xfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 3, ) elif s[pos : pos + 2] == "->": return ( Token( "->", pos, binop=(1050, "xfy", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 2, ) else: return ( Token( "-", pos, binop=(500, "yfx", self.factory.build_binop), unop=(200, "fy", self.factory.build_unop), functor=self._next_paren_open(s, pos), ), pos + 1, ) def _token_dot(self, s, pos): if ( pos + 1 == len(s) or is_whitespace(s[pos + 1]) or is_comment_start(s[pos + 1]) ): return Token(".", pos, special=SPECIAL_END), pos + 1 elif is_digit(s[pos + 1]): return self._token_number(s, pos) elif s[pos + 1] == "(": return Token(".", pos, functor=self._next_paren_open(s, pos)), pos + 1 else: raise UnexpectedCharacter(s, pos) def _token_slash(self, s, pos): if s[pos : pos + 2] == "/\\": return ( Token( "/\\", pos, binop=(500, "yfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 2, ) elif s[pos : pos + 2] == "//": return ( Token( "//", pos, binop=(400, "yfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 2, ) elif s[pos : pos + 2] == "/*": return None, skip_comment_c(s, pos) else: return ( Token( "/", pos, binop=(400, "yfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 1, ) def _token_colon(self, s, pos): if s[pos : pos + 2] == ":-": return ( Token( ":-", pos, binop=(1200, "xfx", self._build_clause), unop=(1200, "fx", self.factory.build_directive), functor=self._next_paren_open(s, pos), ), pos + 2, ) elif s[pos : pos + 2] == "::": return ( Token( "::", pos, binop=(1000, "xfx", self.factory.build_probabilistic), functor=self._next_paren_open(s, pos), ), pos + 2, ) else: return ( Token( ":", pos, binop=(600, "xfy", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 1, ) def _token_semicolon(self, s, pos): return ( Token( ";", pos, binop=(1100, "xfy", self.factory.build_disjunction), functor=self._next_paren_open(s, pos), ), pos + 1, ) def _token_exclamation(self, s, pos): return Token("!", pos, atom=True), pos + 1 def _token_less(self, s, pos): if s[pos : pos + 2] == "<-": return ( Token( "<-", pos, binop=(1200, "xfx", self._build_clause), functor=self._next_paren_open(s, pos), ), pos + 2, ) elif s[pos : pos + 2] == "<<": return ( Token( "<<", pos, binop=(400, "yfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 2, ) else: return ( Token( "<", pos, binop=(700, "xfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), special=SPECIAL_SHARP_OPEN, ), pos + 1, ) def _token_equal(self, s, pos): if s[pos : pos + 2] == "=<": return ( Token( "=<", pos, binop=(700, "xfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 2, ) elif s[pos : pos + 3] == "=:=": return ( Token( "=:=", pos, binop=(700, "xfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 3, ) elif s[pos : pos + 3] == "=\\=": return ( Token( "=\\=", pos, binop=(700, "xfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 3, ) elif s[pos : pos + 3] == "=@=": return ( Token( "=@=", pos, binop=(700, "xfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 3, ) elif s[pos : pos + 3] == "=..": return ( Token( "=..", pos, binop=(700, "xfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 3, ) elif s[pos : pos + 2] == "==": from warnings import warn warn("The use of '==' might give unexpected results. Consider using '=' instead.") return ( Token( "==", pos, binop=(700, "xfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 2, ) elif s[pos : pos + 2] == "=>": return ( Token( "=>", pos, binop=(700, "yfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 2, ) else: return ( Token( "=", pos, binop=(700, "xfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 1, ) def _token_greater(self, s, pos): if s[pos : pos + 2] == ">>": return ( Token( ">>", pos, binop=(400, "yfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 2, ) elif s[pos : pos + 2] == "><": return ( Token( "><", pos, binop=(500, "yfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 2, ) elif s[pos : pos + 2] == ">=": return ( Token( ">=", pos, binop=(700, "xfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 2, ) else: return ( Token( ">", pos, binop=(700, "xfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), special=SPECIAL_SHARP_CLOSE, ), pos + 1, ) def _token_question(self, s, pos): return Token("?", pos, atom=True), pos + 1 # raise UnexpectedCharacter(s, pos) def _token_at(self, s, pos): if s[pos : pos + 2] == "@<": return ( Token( "@<", pos, binop=(700, "xfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 2, ) elif s[pos : pos + 3] == "@=<": return ( Token( "@=<", pos, binop=(700, "xfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 3, ) elif s[pos : pos + 3] == "@>=": return ( Token( "@>=", pos, binop=(700, "xfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 3, ) elif s[pos : pos + 2] == "@>": return ( Token( "@>", pos, binop=(700, "xfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 2, ) else: raise UnexpectedCharacter(s, pos) def _token_bracket_open(self, s, pos): return Token("[", pos, atom=False, special=SPECIAL_BRACK_OPEN), pos + 1 def _token_backslash(self, s, pos): if s[pos : pos + 2] == "\\\\": return ( Token( "\\\\", pos, unop=(200, "fy", self.factory.build_unop), functor=self._next_paren_open(s, pos), ), pos + 2, ) elif s[pos : pos + 2] == "\\+": return ( Token( "\\+", pos, unop=(900, "fy", self.factory.build_not), functor=self._next_paren_open(s, pos), ), pos + 2, ) elif s[pos : pos + 4] == "\\=@=": return ( Token( "\\+", pos, binop=(700, "xfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 4, ) elif s[pos : pos + 3] == "\\==": from warnings import warn warn("The use of '\\==' might give unexpected results. Consider using '\\=' instead.") return ( Token( "\\==", pos, binop=(700, "xfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 3, ) elif s[pos : pos + 2] == "\\=": return ( Token( "\\=", pos, binop=(700, "xfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 2, ) elif s[pos : pos + 2] == "\\/": return ( Token( "\\/", pos, binop=(500, "yfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 2, ) else: return ( Token( "\\", pos, unop=(200, "fy", self.factory.build_unop), functor=self._next_paren_open(s, pos), ), pos + 1, ) def _token_bracket_close(self, s, pos): return Token("]", pos, atom=False, special=SPECIAL_BRACK_CLOSE), pos + 1 def _token_caret(self, s, pos): return ( Token( "^", pos, binop=(400, "xfy", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 1, ) def _token_underscore(self, s, pos): return self._token_upper(s, pos) # Variable def _token_pipe(self, s, pos): return ( Token( "|", pos, atom=False, binop=(1100, "xfy", self.factory.build_binop), special=SPECIAL_PIPE, ), pos + 1, ) def _token_ampersand(self, s, pos): return ( Token("&", pos, atom=False, binop=(1000, "xfy", self.factory.build_binop)), pos + 1, ) def _token_tilde(self, s, pos): if s[pos : pos + 3] == "~==": return ( Token( "~==", pos, binop=(700, "xfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 3, ) elif s[pos : pos + 4] == "~=/=": return ( Token( "~=/=", pos, binop=(700, "xfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 4, ) elif s[pos : pos + 2] == "~<": return ( Token( "~<", pos, binop=(700, "xfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 2, ) elif s[pos : pos + 3] == "~=<": return ( Token( "~=<", pos, binop=(700, "xfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 3, ) elif s[pos : pos + 3] == "~>=": return ( Token( "~>=", pos, binop=(700, "xfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 3, ) elif s[pos : pos + 2] == "~>": return ( Token( "~>", pos, binop=(700, "xfx", self.factory.build_binop), functor=self._next_paren_open(s, pos), ), pos + 2, ) elif s[pos : pos + 2] == "~=": return ( Token( "~=", pos, binop=(700, "xfx", self.factory.build_binop), unop=(200, "fy", self.factory.build_unop), functor=self._next_paren_open(s, pos), ), pos + 2, ) else: return ( Token( "~", pos, unop=(900, "fx", self.factory.build_unop), binop=(1000, "xfx", self.factory.build_probabilistic), ), pos + 1, ) def _token_lower(self, s, pos): end = pos + 1 s_len = len(s) if end < s_len: c = s[end] while c == "_" or is_lower(c) or is_upper(c) or is_digit(c): end += 1 if end >= s_len: break c = s[end] token = s[pos:end] kwd = self.string_operators.get(token, {}) return Token(token, pos, functor=self._next_paren_open(s, end - 1), **kwd), end def _token_upper(self, s, pos): end = pos + 1 s_len = len(s) if end < s_len: c = s[end] while c == "_" or is_lower(c) or is_upper(c) or is_digit(c): end += 1 if end >= s_len: break c = s[end] return Token(s[pos:end], pos, special=SPECIAL_VARIABLE), end else: return Token(s[pos], pos, special=SPECIAL_VARIABLE), end def _token_number(self, s, pos): token = RE_FLOAT.match(s, pos).group(0) if token.startswith("0x"): return Token(token, pos, special=SPECIAL_HEX_INTEGER), pos + len(token) elif token.find(".") >= 0 or token.find("e") >= 0 or token.find("E") >= 0: return Token(token, pos, special=SPECIAL_FLOAT), pos + len(token) else: return Token(token, pos, special=SPECIAL_INTEGER), pos + len(token) def _token_action(self, char): c = ord(char) if c < 33: return self._skip # whitespace elif c < 48: return self._token_act1[c - 33] elif c < 58: return self._token_number elif c < 65: return self._token_act2[c - 58] elif c < 91: return self._token_upper elif c < 97: return self._token_act3[c - 91] elif c < 123: return self._token_lower elif c < 127: return self._token_act4[c - 123] elif char.isalpha() and char.islower(): return self._token_lower elif char.isalpha() and char.isupper(): return self._token_upper else: return None def _build_clause(self, functor, operand1, operand2, location, **extra): heads = [] current = operand1 while current.functor == ";": heads.append(current.args[0]) current = current.args[1] heads.append(current) return self.factory.build_clause( functor=functor, operand1=heads, operand2=operand2, location=location, **extra ) def next_token(self, s, pos): action = self._token_action(s[pos]) if action is None: raise UnexpectedCharacter(s, pos) result = action(s, pos) if result is None: # pragma: no cover raise RuntimeError("Undefined action: '%s'" % action) else: return result def prepare(self): self._token_act1 = [ self._token_exclamation, # 33 ! self._token_dquot, # 34 " self._token_pound, # 35 # self._token_notsupported, # 36 $ self._token_percent, # 37 % self._token_ampersand, # 38 & self._token_squot, # 39 ' self._token_paren_open, # 40 ( self._token_paren_close, # 41 ) self._token_asterisk, # 42 * self._token_plus, # 43 + self._token_comma, # 44 , self._token_min, # 45 - self._token_dot, # 46 . self._token_slash, # 47 / ] self._token_act2 = [ self._token_colon, # 58 : self._token_semicolon, # 59 ; self._token_less, # 60 < self._token_equal, # 61 = self._token_greater, # 62 > self._token_question, # 63 ? self._token_at, # 64 @ ] self._token_act3 = [ self._token_bracket_open, # 91 [ self._token_backslash, # 92 self._token_bracket_close, # 93 ] self._token_caret, # 94 ^ self._token_underscore, # 95 _ self._token_notsupported, # 96 ` ] self._token_act4 = [ self._token_notsupported, # 123 { self._token_pipe, # 124 | self._token_notsupported, # 125 } self._token_tilde, # 126 ~ ] self.string_operators = { "is": {"binop": (700, "xfx", self.factory.build_binop)}, "as": {"binop": (700, "xfx", self.factory.build_binop)}, "not": {"unop": (900, "fy", self.factory.build_not), "atom": True}, "xor": {"binop": (500, "yfx", self.factory.build_binop)}, "rdiv": {"binop": (400, "yfx", self.factory.build_binop)}, "mod": {"binop": (400, "yfx", self.factory.build_binop)}, "rem": {"binop": (400, "yfx", self.factory.build_binop)}, "div": {"binop": (400, "yfx", self.factory.build_binop)}, } def _tokenize(self, s): s_len = len(s) p = 0 if s[:2] == "#!": p = skip_comment_line(s, p) while p < s_len: t, p = self.next_token(s, p) if t is not None: yield t def _extract_statements(self, string, s): statement = [] for token in s: if token.is_special(SPECIAL_END): if not statement: raise ParseError(string, "Empty statement found", token.location) yield statement statement = [] else: statement.append(token) if statement: raise ParseError(string, "Incomplete statement", len(string)) def _build_operator_free(self, string, tokens): if len(tokens) == 1: token = tokens[0] if isinstance(token, SubExpression): if isinstance(token.tokens, list): curr = token.tokens[-1] for t in reversed(token.tokens[:-1]): curr = self.factory.build_conjunction(",", t, curr) return curr else: return token.tokens elif token.is_special(SPECIAL_VARIABLE): return self.factory.build_variable( token.string, location=token.location ) elif token.is_special(SPECIAL_INTEGER): return self.factory.build_constant( int(token.string), location=token.location ) elif token.is_special(SPECIAL_HEX_INTEGER): return self.factory.build_constant( int(token.string, 16), location=token.location ) elif token.is_special(SPECIAL_FLOAT): return self.factory.build_constant( float(token.string), location=token.location ) elif token.is_special(SPECIAL_STRING): return self.factory.build_string( token.string[1:-1], location=token.location ) else: if token.aggregate: return self.factory.build_aggregate( token.string, (), location=token.location ) else: return self.factory.build_function( token.string, (), location=token.location ) elif len(tokens) == 2: args = [tok for tok in tokens[1].enum_tokens()] if tokens[0].aggregate: # print (type(args[0])) return self.factory.build_aggregate( tokens[0].string, args, location=tokens[0].location ) else: return self.factory.build_function( tokens[0].string, args, location=tokens[0].location ) elif len(tokens) != 0: raise ParseError(string, "Unexpected token", tokens[0].location) else: return None def fold(self, string, operators, lo, hi, pprior=None, porder=None, level=0): if lo >= hi: return self._build_operator_free(string, operators[lo:hi]) else: max_op = None max_i = None for i in range(lo, hi): op_n = operators[i] op = None if op_n.binop: op = op_n.binop elif op_n.unop: op = op_n.unop if op is not None and ( max_op is None or op[0] > max_op[0] or (op[0] == max_op[0] and max_op[1] == "yfx") ): max_i = i max_op = op if max_op is None: return self._build_operator_free(string, operators[lo:hi]) else: if pprior == max_op[0] and porder == "x": raise ParseError( string, "Operator priority clash", operators[max_i].location ) else: max_order = max_op[1] if len(max_order) == 3: # binop lf = self.fold( string, operators, lo, max_i, max_op[0], max_order[0], level + 1, ) rf = self.fold( string, operators, max_i + 1, hi, max_op[0], max_order[2], level + 1, ) return max_op[2]( functor=operators[max_i].string, operand1=lf, operand2=rf, location=operators[max_i].location, priority=max_op[0], opspec=max_op[1], ) else: # unop if max_i != lo: raise ParseError( string, "Operator priority clash", operators[max_i].location, ) lf = self.fold( string, operators, lo + 1, hi, max_op[0], max_order[1], level + 1, ) return max_op[2]( functor=operators[max_i].string, operand=lf, location=operators[max_i].location, priority=max_op[0], opspec=max_op[1], ) def label_tokens(self, string, tokens): l = len(tokens) - 1 p = None for i, t in enumerate(tokens): if i == l: # Last token can not be an operator or functor t.unop = None t.binop = None t.functor = False elif t.functor and tokens[i + 1].is_comma_list: t.atom = False elif t.unop and tokens[i + 1].priority > t.priority: t.unop = False if i == 0: t.binop = None # First token can not be a binop t.arglist = False elif p.aggregate: p.atom = False p.functor = True elif p.functor: t.atom = False t.arglist = t.is_comma_list elif p.arglist: t.unop = False t.atom = False t.functor = False elif p.atom: if not t.binop: raise ParseError(string, "Expected binary operator", t.location) t.unop = None t.atom = False t.functor = False t.arglist = False elif p.binop: t.binop = None t.arglist = False else: t.arglist = False if t.unop and t.functor: t.unop = None if t.unop and t.atom: n = tokens[i + 1] if not n.binop: t.atom = False p = t if t.count_options() != 1: raise ParseError(string, "Ambiguous token role", t.location) return tokens def _parse_statement(self, string, tokens): return self.collapse(string, tokens) def parseString(self, string): return self.factory.build_program( mapl( lambda x: self._parse_statement(string, x), self._extract_statements(string, self._tokenize(string)), ) ) def parseFile(self, filename): with open(filename) as f: return self.parseString(f.read()) def collapse(self, string, tokens): """Combine tokens into subexpressions.""" root_tokens = [] expr_stack = [] for token_i, token in enumerate(tokens): if ( token.is_special(SPECIAL_SHARP_OPEN) and tokens[token_i + 1].is_special(SPECIAL_VARIABLE) and len(tokens) > token_i + 2 and tokens[token_i + 2].is_special(SPECIAL_SHARP_CLOSE) ): expr_stack.append( self._create_paren_expression(string, token, SPECIAL_SHARP_CLOSE) ) tokens[token_i - 1].aggregate = True elif token.is_special(SPECIAL_PAREN_OPEN): # Open a parenthesis expression expr_stack.append(self._create_paren_expression(string, token)) elif token.is_special(SPECIAL_BRACK_OPEN): # Open a list expression expr_stack.append(self._create_list_expression(string, token)) elif token.is_special(SPECIAL_PAREN_CLOSE) or token.is_special( SPECIAL_BRACK_CLOSE ): try: current_expr = expr_stack.pop(-1) # Close a parenthesis expression if not current_expr.accepts(token): raise UnexpectedCharacter(string, token.location) else: current_expr.append(token) current_expr.parse(self) if not expr_stack: root_tokens.append(current_expr) else: expr_stack[-1].append(current_expr) except IndexError: raise UnmatchedCharacter(string, token.location) elif ( token.is_special(SPECIAL_SHARP_CLOSE) and expr_stack and expr_stack[-1].close_char == SPECIAL_SHARP_CLOSE and expr_stack[-1].accepts(token) ): current_expr = expr_stack.pop(-1) current_expr.append(token) current_expr.parse(self) if not expr_stack: root_tokens.append(current_expr) else: expr_stack[-1].append(current_expr) elif expr_stack: expr_stack[-1].append(token) else: root_tokens.append(token) if expr_stack: raise UnmatchedCharacter(string, expr_stack[-1].start.location) toks = self.label_tokens(string, root_tokens) return self.fold(string, toks, 0, len(toks)) def _create_paren_expression(self, string, token, close_char=SPECIAL_PAREN_CLOSE): return ParenExpression(string, token, close_char) def _create_list_expression(self, string, token, close_char=SPECIAL_BRACK_CLOSE): return ListExpression(string, token, close_char) def mapl(f, l): return list(map(f, l)) def filterl(f, l): return list(filter(f, l)) class SubExpression(object): def __init__(self, string, start): self.string = string self.tokens = [] self.start = start self.end = None self.binop = None self.unop = False self.functor = False self.atom = True self._arglist = True self.max_operators = [] self.aggregate = False self.priority = 0 @property def arglist(self): return self._arglist and self.is_comma_list @arglist.setter def arglist(self, value): self._arglist = value @property def location(self): return self.start.location def parse(self, parser): self.label_tokens(parser) if self.arglist: current = [] tokens = [] for token in self.tokens: if token.is_special(SPECIAL_COMMA): tokens.append(parser.fold(self.string, current, 0, len(current))) current = [] else: current.append(token) new_token = parser.fold(self.string, current, 0, len(current)) tokens.append(new_token) self.tokens = tokens else: self.tokens = parser.fold(self.string, self.tokens, 0, len(self.tokens)) def label_tokens(self, parser): parser.label_tokens(self.string, self.tokens) def count_options(self): return 1 def is_special(self, special): return False def append(self, token): if token.is_special(self.close_char): self.end = token elif token.binop: if ( not self.max_operators or token.binop[0] > self.max_operators[0].binop[0] ): self.max_operators = [token] elif ( self.max_operators and token.binop[0] == self.max_operators[0].binop[0] ): self.max_operators.append(token) self.tokens.append(token) else: self.tokens.append(token) def list_options(self): # pragma: no cover o = "" if self.atom: o += "a" if self.binop: o += "b" if self.unop: o += "u" if self.functor: o += "f" if self.arglist: o += "l" return o class ListExpression(SubExpression): def __init__(self, string, start, close_char=SPECIAL_BRACK_CLOSE): SubExpression.__init__(self, string, start) self.close_char = close_char # self.is_comma_list = False self.arglist = True self._tokens = None @property def is_comma_list(self): return ( not self.max_operators or self.max_operators[0].string == "," or self.max_operators[0].priority < 1000 ) def accepts(self, token): return not token.is_special(SPECIAL_PAREN_CLOSE) def __repr__(self): return "LE %s {%s}" % (self.tokens, self.list_options()) def parse(self, parser): self.label_tokens(parser) prefix = [] tail = None current = [] for token_i, token in enumerate(self.tokens): if token.is_special(SPECIAL_PIPE): prefix.append(parser.fold(self.string, current, 0, len(current))) current = [] tail = parser.fold( self.string, self.tokens[token_i + 1 :], 0, len(self.tokens[token_i + 1 :]), ) break elif token.is_special(SPECIAL_COMMA): prefix.append(parser.fold(self.string, current, 0, len(current))) current = [] else: current.append(token) if current: prefix.append(parser.fold(self.string, current, 0, len(current))) self._tokens = [parser.factory.build_index(prefix)] self.tokens = parser.factory.build_list(prefix, tail) # else : # self.tokens = parser.fold(self.string, self.tokens, 0, len(self.tokens) ) def enum_tokens(self): return self._tokens class ParenExpression(SubExpression): def __init__(self, string, tokens, close_char=SPECIAL_PAREN_CLOSE): SubExpression.__init__(self, string, tokens) self.close_char = close_char @property def is_comma_list(self): return ( not self.max_operators or self.max_operators[0].string == "," or self.max_operators[0].priority < 1000 ) def accepts(self, token): return not token.is_special(SPECIAL_BRACK_CLOSE) def __repr__(self): return "PE %s {%s}" % (self.tokens, self.list_options()) def enum_tokens(self): return self.tokens
[docs]class Factory(object): """Factory object for creating suitable objects from the parse tree.""" def build_program(self, clauses): return "\n".join(map(str, clauses)) def build_function(self, functor, arguments, location=None): return "%s(%s)" % (functor, ", ".join(map(str, arguments))) def build_variable(self, name, location=None): return str(name) def build_constant(self, value, location=None): return str(value) def build_binop( self, functor, operand1, operand2, function=None, location=None, **extra ): return self.build_function( "'" + functor + "'", (operand1, operand2), location=location ) def build_unop(self, functor, operand, location=None, **extra): return self.build_function("'" + functor + "'", (operand,), location=location) def build_list(self, values, tail=None, location=None, **extra): if tail is None: return "[%s]" % (", ".join(map(str, values))) else: return "[%s | %s]" % (", ".join(map(str, values)), tail) def build_string(self, value, location=None): return self.build_constant('"' + value + '"', location=location) def build_cut(self, location=None): raise NotImplementedError("Not supported!") def build_index(self, arguments, **kwargs): return self.build_function("i", arguments, **kwargs) build_clause = build_binop build_probabilistic = build_binop build_disjunction = build_binop build_conjunction = build_binop build_compare_arithmetic = build_binop build_compare_struct = build_binop build_compare_eq = build_binop build_mathop2 = build_binop build_ifthen = build_binop build_list_op = build_binop build_not = build_unop build_mathop1 = build_unop build_directive = build_unop build_aggregate = build_function
def main(filenames): for filename in filenames: print(filename) print("------------------------------------") from problog.program import ExtendedPrologFactory try: parsed = PrologParser(ExtendedPrologFactory()).parseFile(filename) for s in parsed: print(s) except ParseError as e: print("ParseError:", e) print("====================================") DefaultPrologParser = PrologParser # from .parser_pyparsing import PrologParser as DefaultPrologParser