Source code for glossary

"""
    Copyright 2020 Simon Vandevelde, Bram Aerts, Joost Vennekens
    This code is licensed under GNU GPLv3 license (see LICENSE) for more
    information.
    This file is part of the cDMN solver.
"""

import re
from typing import List
from cdmn.idpname import idp_name

"""
The glossary object contains the entire cDMN glossary table.
It interprets each line, and creates a Type or Predicate/Function object.
"""



[docs]
class Glossary:
    """
    The Glossary object contains all types, functions and predicates.
    During initialisation, it reads and interprets all the types, functions,
    constants, relations and booleans it can find and it reports any errors.
    Once the Glossary is created and initialized without errors, it's
    possible to print out the predicates in their IDP version.
    """
    def __init__(self, glossary_dict: dict):
        """
        Initialise the glossary.
        Create 4 default types, create an empty list of predicates, and
        interpret all 5 different glossaries.

        :arg dict: glossary_dict, the dictionary containing for each glossary
            type their tables.
        """
        self.types = [Type('String', None),
                      Type('Int', None),
                      Type('Float', None),
                      Type('Real', None)]
        self.predicates: List[Predicate] = []
        self.__read_types(glossary_dict["Type"], 0, 1, 2)
        self.__read_predicates(glossary_dict["Function"], "Function")
        self.__read_predicates(glossary_dict["Constant"], "Constant",
                               zero_arity=True)
        self.__read_predicates(glossary_dict["Relation"], "Relation")
        self.__read_predicates(glossary_dict["Boolean"], "Boolean",
                               zero_arity=True)

    def __str__(self):
        """
        Magic method to convert the Glossary to string.
        Prints out all the types, predicates and functions it contains.
        """
        retstr = "Glossary containing:\n"
        for typ in self.types:
            retstr += f"\t{str(typ)}\n"
        for pred in self.predicates:
            retstr += f"\t{str(pred)}\n"
        return retstr


[docs]
    def contains(self, typestr):
        """
        Checks whether or not a type was already added to the glossary.

        :returns bool: True if the type has been added already.
        """
        for typ in self.types:
            if typestr == typ.name:
                return True
        return False



[docs]
    def find_type(self, t):
        """
        Looks for types in the glossary.

        :returns List<Type>: the types found.
        """
        types = []
        for typ in self.types:
            if typ.match(t):
                types.append(typ)
        return next(filter(lambda x: x.match(t), self.types))


    def __read_types(self, array, ix_name=0, ix_type=1, ix_posvals=2):
        """
        Read and interpret all the types listed in the Type glossary.
        When it finds the keyword, it tries to interpret the other columns on
        that row.

        :arg np.array: the numpy array containing the Type glossary.
        :arg int: ix_name, the index for the name column.
        :arg int: ix_type, the index for the type column.
        :arg int: ix_posvals, the type for the posvals column.
        :returns None:
        """
        error_message = ""
        rows, cols = array.shape
        # Skip the first 2 rows, as these are headers.
        for row in array[2:]:
            # Loop over all the rows.
            name = row[ix_name]
            name = name.strip()

            # Get, and try to decypher the type.
            # If we're not able to find the type, raise error.
            typ = row[ix_type]
            try:
                typ = self.find_type(typ)
            except StopIteration:
                error_message = (f"DataType \"{typ}\" should be either a"
                                 f" (String, Int, Float) or a"
                                 f" user-defined type")
                raise ValueError(error_message)

            # Check for possible values.
            posvals = row[ix_posvals]
            try:
                # Match for the int range type, for instance [1, 10].
                int_reg = r'(\[|\()(-?\d+)\s*(?:\.\.|,)\s*(-?\d+)\s*(\]|\))'
                match = re.match(int_reg, posvals)

            except Exception:  # TODO: find errortype to except and fix except.
                match = None

            # Interpret range of int, if a match was found.
            if match:
                match = list(match.groups())
                if match[0] == '(':
                    match[1] += 1
                if match[-1] == ')':
                    match[2] -= 1
                posvals = '..'.join(match[1:-1])
            elif posvals is not None:
                posvals = ', '.join([idp_name(x) for x in
                                    re.split(r'\s*,\s*', posvals)])

            # Create the type and append it to the list.
            self.types.append(Type(name, typ, posvals))

    def __read_predicates(self, array, glosname, ix_name=0,
                          ix_type=1, zero_arity=False):
        """
        Method to read and interpret predicates.
        Loops over an array containing only predicates or functions,
        and filters them into subcategories.

        The possible entries are: Relation, Function, partial Function,
            boolean, and relation..

        :arg array: a glossary table
        :arg glosname: the name of the glossary, i.e. Function, Relation,
            Constant or Boolean
        :arg ix_name: the column index of the name column. By default this is
            always the first column.
        :arg ix_type: the column index of the type column. By default this is
            always the second column.
        :arg zero_arity: bool which should be True when the predicate is a
            0-arity predicate (constants and booleans).

        :returns None:
        """
        # It's possible that there's no glossary defined.
        if array is None:
            return

        for row in array[2:]:
            full_name = row[ix_name].strip()
            partial = False
            typ = None
            predicate = None

            # Check if it's a (partial) function/constant or a
            # relation/boolean.
            if re.match('(partial )?Function|Constant', glosname):
                predicate = False
                typ = row[ix_type]
                if typ:
                    typ = typ.strip()

                # Check if it's a partial function
                partial = bool(re.match('(?i)partial', full_name))
                full_name = full_name.replace('partial ', '')
                try:
                    typ = self.find_type(typ)
                except TypeError:
                    raise ValueError(f'DataType of Function "{full_name}" is'
                                     f' empty')
                except StopIteration:
                    raise ValueError(f'DataType "{typ}" of "{full_name}" is'
                                     f' not an existing Type')

            # The predicate is a relation.
            else:
                predicate = True

            # Create the predicate.
            p = Predicate.from_string(full_name, predicate, typ, self,
                                      partial, zero_arity)

            # Append the new predicate to the list.
            self.predicates.append(p)


[docs]
    def lookup(self, string: str):
        """
        TODO
        REWORK ENTIRE METHOD.
        """
        return list(filter(lambda x: x,
                           map(lambda x: x.lookup(string), self.predicates)))



[docs]
    def read_datatables(self, datatables, parser):
        """
        Reads and interprets the datatables.
        Also checks if the values in the datatables appear in the possible
        values column of the glossary.

        Firstly it checks which columns are input, and which are output.
        A column is an inputcolumn if it contains the table title in the first
        cell, and an output if it contains `None` in the first cell.

        Iterates over every outputcolumn, deciphers which predicate the
        outputcolumn represents, and sets its "struct_args" to a combination
        of the input arguments and the output column's arguments.
        The predicate uses this struct_args to format its struct string.

        :arg List<np.array>: datatables, containing all the datatables.
        :arg parser:
        :returns None:
        """
        # return
        if len(datatables) == 0:
            return
        for table in datatables:
            inputs = []
            outputs = []
            tablename = table[0][0]
            # First, we find the input and outputcolumns.
            for column in table.T[1:]:
                # If a column contains the table title, it's an inputcolumn.
                if column[0] is not None:
                    # We also want to find out the input variables.
                    inputs.append(column[1:])
                else:
                    outputs.append(column[1:])
            # Second, we check if the inputcolumns contain the right values.
            # This also adds those values to the "constructed from" if needed.
            for i, inputarr in enumerate(inputs):
                header = inputarr[0]
                # The header can be just the Type name, or "Type called ..".
                typename = header.split(" ")[0]

                # Look for the type name in the glossary.
                for typ in self.types:
                    if typename == typ.name:
                        typ.check_values(inputarr[1:], tablename)

            # Third, we check if the outputcolumns contain the right values.
            # This also adds those values to the "constructed from" if needed.
            for i, outputarr in enumerate(outputs):
                header = outputarr[0]
                # The header can be a function like "Department of Person".
                # Or it can be a relation. If it's a relation, we skip.
                typename = header.split(" ")[0]
                pred = parser.interpreter.interpret_value(header).pred
                if pred.is_relation():
                    continue
                typename = pred.super_type.name

                # Look for the type name in the glossary, and check its values.
                for typ in self.types:
                    if typename == typ.name:
                        typ.check_values(outputarr[1:], tablename)

            # Then we iterate over the outputcolumns.
            for i, output in enumerate(outputs):
                header = output[0]
                # Format the args.
                args = {}
                # Iterate over each row of the outputcolumn.
                # Skip the first cell because it's always None.
                for j, value in enumerate(output[1:]):
                    # Find the inputvalues for the same row, in order to create
                    # the `args` dictionary. This contains the output values
                    # for every predicate in a data table.
                    inputvals = []
                    for inputcol in inputs:
                        inputval = str(inputcol[j+1])
                        inputvals.append(inputval)
                    inputval = "|".join(inputvals)

                    args[inputval] = value

                header = parser.interpreter.interpret_value(header).pred.name

                # Look for the predicate name.
                success = False
                for pred in self.predicates:
                    if pred.full_name == header or pred.name == header:
                        pred.struct_args = args
                        success = True
                        break
                if not success:
                    raise ValueError(f"Predicate \"{header}\" in datatable but"
                                     f" not in glossary")



[docs]
    def to_idp_voc(self):
        """
        Function which turns every object in a glossary into their vocabulary
        definitions.

        :returns voc: string
        """
        voc = ''.join(map(lambda x: x.to_idp_voc(),
                          self.types+self.predicates))
        # Add error specific concepts.
        return voc



[docs]
    def to_json_dicts(self):
        """
        Creates a dict entry for every predicate and function, which is later
        turned into json.

        :returns dict:
        """
        json_dicts = []
        for pred in self.predicates:
            json_dicts.append(pred.to_json_dict())
        return json_dicts



[docs]
    def add_aux_var(self, aux):
        """
        Some variables need to use auxiliary variables, for instance those
        found in the outputcolumns of C# tables.
        This method allows the creation of those variables.
        No aux var are created when makin an IDP file for the autoconfig
        interfaces.

         :arg List<str> a list containing strings of the variables.
         """
        for var in aux:
            # Split of the predicate name.
            p_name = var.split('(')[0]
            p_name = p_name.replace('_', ' ')
            for p in self.predicates:
                if p_name == p.name:
                    new_name = f"_{p.name}"
                    new_p = Predicate(new_name, p.args, p.super_type,
                                      partial=p.partial)
                    self.predicates.append(new_p)





[docs]
class Type:
    """
    TODO
    """
    def __init__(self, name: str, super_type, posvals="-"):
        """
        :arg str: the name of the type.
        :arg Type: the super type of the type.
        :arg str: posvals, the possible values of the type.
        """
        self.name = name
        if name != "Int" and name != "Float" and name != "Real" \
                and name != "String":
            self.display_name = self.name + "_t"
        else:
            self.display_name = self.name
        self.super_type = super_type
        self.possible_values = posvals

        self.struct_args = []
        self.knows_values = True
        self.source_datatable = ""

        # Check the input.
        if posvals is None:
            raise ValueError(f"Values column for type {self.name} is empty."
                             f" Did you forget a '-'?")

        # Toggle knows_values if the values are known.
        if posvals == "_" or posvals == "-" or posvals == "−":
            self.knows_values = False
            self.possible_values = ""

        if re.search("(?i)see_Data_Table|see_DataTable", posvals):
            self.knows_values = False
            self.possible_values = ""
            m = re.search(r"(?i)(?<=see_Data_Table_)(.*?)(?=\Z)"
                          r"|(?<=see_DataTable_)(.*?)(?=\Z)",
                          posvals)
            self.source_datatable = m[0]

    def __str__(self):
        """
        Magic method to turn the type into a string.

        :returns str: the typename.
        """
        return f"Type: {self.name}"


[docs]
    def to_theory(self):
        """
        TODO
        """
        return self.display_name


    def match(self, value):
        if self.basetype == self:  # When comparing with string, int, float,...
            return re.match(f'^{self.name}$', value, re.IGNORECASE)
        else:
            return re.match(f'^{self.name}$', value)

    @property
    def basetype(self):
        """
        The basetype represents one of the ancestor types, such as int or str.

        :returns type: the basetype.
        """
        try:
            return self.super_type.basetype
        except AttributeError:
            return self


[docs]
    def check_values(self, values, tablename):
        """
        Method to check if the values listed in a datatable match with the
        values listed in the possible values column(if a datatable was used).

        If the possible values column is left empty, then it assumes all the
        values are correct and it fills the possible values automatically.
        This is needed so that the type can input these values into constructed
        from.

        If the possible values column contains values, then every value used in
        a datatable needs to match a value in the possible values.

        :returns boolean: True if all the values match.
        :throws ValueError: if a value appears in the datatable but not in
            posvals.
        """

        # We only check the data if the tablename (see datatable ...) is
        # explicitly given or a wildcard (-) was used in the glossary.
        if self.source_datatable not in tablename:
            return

        if self.basetype.name == "String":
            # If no possible values were listed, read the datatable values and
            # add them to the possible values.
            if not self.knows_values:
                if self.possible_values is None:
                    self.possible_values = ""

                # Check for each value if it exists already, add it if not.
                for value in values:
                    subvalues = str(value).split(',')
                    for subvalue in subvalues:
                        subvalue = subvalue.strip()
                        subvalue = idp_name(subvalue)
                        regex = r"(?<!\w){}(?!\w)".format(idp_name(subvalue))
                        if not re.search(regex, self.possible_values):
                            if not self.possible_values == "":
                                self.possible_values += ","
                            self.possible_values += f" {subvalue}"
                return

            # If possible values were listed in the glossary, we check for
            # typos in the data table.
            for value in values:
                subvalues = str(value).split(',')
                for subvalue in subvalues:
                    subvalue = subvalue.strip()
                    subvalue = idp_name(subvalue)
                    regex = r"(?<!\w){}(?!\w)".format(subvalue)
                    if not re.search(regex, self.possible_values):
                        raise ValueError(f"Error: value {subvalue} in data"
                                         f"table but not in possible values.")

        elif self.basetype.name == "Int":
            # For integers we only check if they're in the right range.
            # We don't add them if the possible values is empty, because there
            # is no clear way of defining a range.
            if not self.knows_values:
                return

            # A range should be declared in the possible values. We need to
            # check if our value is within that range.
            leftbound, rightbound = None, None
            possible_values = None
            if ".." in self.possible_values:
                # E.g. "[0..30]"
                leftbound, rightbound = self.possible_values.split("..")
            elif '[' in self.possible_values and \
                    self.possible_values.count(",") == 1:
                # E.g. "[0, 30]"
                leftbound, rightbound = self.possible_values.split(",")
            else:
                # E.g. "0, 1, 2, 3, 4"
                possible_values = [int(x) for x in
                                   self.possible_values.split(",")]

            for value in values:
                subvalues = str(value).split(',')
                for subvalue in subvalues:
                    subvalue = int(subvalue.strip())
                    # If we know boundaries, the value should be within them.
                    if rightbound and (int(rightbound) < subvalue or
                                       subvalue < int(leftbound)):
                        raise ValueError(f"Error: value \"{subvalue}\" for"
                                         f" type {self.name} in"
                                         f" datatable but not in"
                                         f" range of possible values")
                    # If we know a list of values, the value should be in it.
                    if possible_values and subvalue not in possible_values:
                        raise ValueError(f"Error: value \"{subvalue}\" for"
                                         f" type {self.name} in"
                                         f" datatable but not in"
                                         f" list of possible values")



[docs]
    def to_idp_voc(self):
        """
        Converts all the information of the Type into a string for the IDP
        vocabulary.

        :returns str: the vocabulary form of the type.
        """
        # Check for 'string', 'int', and other default types which don't need
        # to explicitly be declared.
        if self.name == self.basetype.name:
            return ''

        typename = idp_name(self.display_name)
        # If it's a string, use :=.
        if self.basetype.name == 'String':
            constr_from = ':='
        # Else, we use "=" and semicolons instead of commas.
        else:
            constr_from = ':='
        if self.possible_values is None:
            return f'type {typename}\n'

        vals = self.possible_values
        voc = (f'\ttype {typename} {constr_from} {{ {vals} }}\n')

        return voc



[docs]
    def to_idp_struct(self):
        """
        Converts all the information of the Type into a string for the IDP
        structure.
        Normal types don't need a structure, as their possible values are
        listed as "constructed from" in the voc.
        This is here for future's sake.

        :returns str: the string for the structure.
        """
        return ""





[docs]
class Predicate:
    """
    Class which represents both predicates and functions.
    This double meaning is a relic of the past, and is to be fixed.
    In the future, a separate Function class should be created.
    """
    def __init__(self, name: str, args: List[Type], super_type: Type,
                 partial=False, full_name=None, zero_arity=False):
        """
        Initialises a predicate.
        :arg zero_arity: bool which should be True when the predicate is a
            0-arity predicate (constants and booleans).
        """
        self.name = name
        self.args = args
        self.super_type = super_type
        self.partial = partial
        self.repr = self.interpret_name()
        self.full_name = full_name
        self.struct_args = {}
        self.zero_arity = zero_arity

        if not self.args and self.is_function and not zero_arity:
            print(f'WARNING: "{self.name}" has been interpreted as single'
                  f' value instead of a function. Functions should be defined'
                  f' as FunctionName of Type and Type ...')
        elif not self.args and self.is_relation and not zero_arity:
            print(f'WARNING: "{self.name}" has been interpreted as a boolean'
                  f' value instead of a relation. Relations should be defined'
                  f' as Type and Type ... is RelationName')

    def __str__(self):
        """
        TODO
        """
        retstr = f"Predicate: {self.name}"
        return retstr


[docs]
    @staticmethod
    def from_string(full_name: str, predicate: bool, super_type: Type,
                    glossary: Glossary, partial=False,
                    zero_arity=False):
        """
        Static method to create a predicate from string.

        :arg str: full_name, the full name.
        :arg bool: predicate, true if predicate, false if function.
        :arg Type: super_type, the super type of the predicate.
        :arg Glossary: glossary, the glossary.
        :arg bool: partial, whether or not it's a partial function.
        :arg zero_arity: bool which should be True when the predicate is a
            0-arity predicate (constants and booleans).
        :returns Predicate:
        """
        if not predicate:  # Check if it's a function.
            regex = (r'^(?P<name>.*)$')
        else:
            regex = (r'^(?P<name>.*)$')
        try:
            name = re.match(regex, full_name).group('name')
        except AttributeError:
            name = full_name
        if zero_arity:
            return Predicate(full_name, [], super_type, partial,
                             zero_arity=zero_arity)
        else:  # We need to find the relation's types.
            args = []
            # TODO: sort the matches so the order is preserved.
            for typ in glossary.types:
                for x in re.finditer(r'\b' + f'{typ.name}' + r'\b',
                                     full_name):
                    args.append([x.start(), typ])
            # Sort the args based on their position in the string.
            args.sort(key=lambda x: x[0])
            args = [x[1] for x in args]
            return Predicate(name, args,
                             super_type, partial,
                             full_name, zero_arity)

        return Predicate(name, [glossary.find_type(t) for t in args],
                         super_type, partial, full_name, zero_arity)


    @property
    def arity(self):
        """
        Returns arity of the symbol's domain.

        :returns int:
        """
        return len(self.args)


[docs]
    def is_function(self):
        """
        Method to check whether the predicate is a function.
        Since only functions have super types, we use that as a check.
        Note that constants are a special case of functions.

        :returns boolean:
        """
        if self.super_type is None:
            return False
        else:
            return True



[docs]
    def is_relation(self):
        """
        Method to check whether the predicate is a relation.
        A predicate is either a relation or a function, so we use that as a
        check. Note that booleans are a special case of relations.

        :returns boolean:
        """
        return not self.is_function()



[docs]
    def interpret_name(self):
        """
        Method to interpret the name.
        This method forms a generic name representation, by replacing the
        arguments by dummies.
        In this way, it creates a skeleton structure for the name.

        Thus, it returns the name, without the arguments.
        For instance, `Country borders Country` becomes
        `(?P<arg0>.+) borders (?P<arg1>.+)`.
        This way, arg0 and arg1 can be found easily later on.
        """
        if not self.args:
            return self.name
        elif self.args:
            arglist = [arg.name for arg in self.args]
            new_name = self.name
            for i, arg in enumerate(arglist):
                # Replace the first occurrence of each type name by a dummy.
                new_name = re.sub(r'\b' + arg + r'\b', f'(?P<arg{i}>.+)',
                                  new_name, 1)
            return new_name
        else:
            raise ValueError("No idea what went wrong.")



[docs]
    def lookup(self, string: str):
        """
        Method to compare a string to this predicate, to see if the predicate
        appears in the string in any form.
        TODO: make this more clear.
        """
        d = re.match(self.repr, string)
        if d:
            d = d.groupdict()
            return self, [v for k, v in sorted(d.items(),
                                               key=(lambda x: int(x[0][3:])))]



[docs]
    def to_idp_voc(self):
        """
        Convert the predicate/function to a string for the IDP vocabulary.

        :returns str: the predicate/function in vocabulary format.
        """
        voc = f'\t{idp_name(self.name)}: '
        if self.args:
            arg_str = ' * '.join(map(lambda t: idp_name(t.display_name),
                                 self.args))
            voc += f'{arg_str}'
        if self.is_function():
            voc += f' -> {idp_name(self.super_type.display_name)}'
        else:
            voc += ' -> Bool'
        return voc + "\n"



[docs]
    def to_idp_struct(self):
        """
        If a function or predicate receives a value in a datatable, we need to
        set it's values in the structure.
        When parsing the datatable in "read_datatables", we set the
        "struct_args" of the predicates/functions that get a value.
        struct_args could look like: {key1|key2:value}. However, it's possible
        to input multiple keys per cell to save space.

        For instance:
        "Jim|Skydiving, Soccer" needs to be formatted as
        "Jim, Skydiving; Jim, Soccer".
        The same goes for functions.

        To achieve this, we split the keys on their seperator, and then we
        split each key on a comma. This way, we have an array of keys in which
        each item is an array of subkeys. We need to form every possible
        combination of these keys, and to do this we use itertools.product.

        :returns: str
        """
        import itertools
        if len(self.struct_args) == 0:
            return None
        assign = ":="
        struct = f'\t{idp_name(self.name)} {assign} {{'
        # If the pred is a function, the IDP format is "arg,.. -> arg;".
        # The IDP-Z3 format is "(arg, ...) -> arg,"
        if self.is_function():
            default_val = None
            for key, arg in self.struct_args.items():
                # Here be dragons.
                default_val = arg  # TODO: actual default arg!
                keys = key.split('|')
                keys = [x.split(',') for x in keys]
                keys_product = itertools.product(*keys)
                for combination in list(keys_product):
                    idp_combination = [idp_name(x.strip())
                                       for x in combination]
                    struct += (f"({','.join(idp_combination)})"
                               f" -> {idp_name(arg)}, ")
            # Remove the final ", ".
            struct = struct[:-2]
            struct += "}.\n"  # } else {} \n".format(default_val)

        else:
            for key, arg in self.struct_args.items():
                # Check if the relation is a boolean (booleans have no keys).
                if key == "":
                    if re.match("(?i)yes", arg):
                        struct = f"\t{idp_name(self.name)} {assign} true.\n"
                        return struct
                    if re.match("(?i)no", arg):
                        struct = f"\t{idp_name(self.name)} {assign} false.\n"
                        return struct
                # Only add a relation if the value of the argument is yes.
                if not re.match("(?i)yes", arg):
                    continue
                # The key can consist of multiple values.
                keys = key.split('|')
                keys = [x.split(',') for x in keys]
                keys_product = itertools.product(*keys)
                for combination in list(keys_product):
                    idp_combination = [idp_name(x.strip())
                                       for x in combination]
                    struct += f"({','.join(idp_combination)}), "
            if struct[-2] == ',':
                # Remove the ', ' at the end.
                struct = struct[:-2]
            struct += '}.\n'

        return struct


    def to_json_dict(self):
        json_dict = {}
        json_dict['idpname'] = idp_name(self.name)
        # json_dict['expandArgs'] = 1
        if self.is_function():
            json_dict['type'] = "function"
            if self.zero_arity:
                basetype = self.super_type.basetype.name
                if basetype == "Int" or basetype == "Float":
                    json_dict['showOptimize'] = "true"
        else:
            if self.zero_arity:
                json_dict['type'] = "proposition"
            else:
                json_dict['type'] = "predicate"
        return json_dict