aux.py

from functools import reduce
import os
import re
import collections
from copy import deepcopy
import importlib

import numpy as np

ArrayItemGetter = collections.namedtuple('ArrayItemGetter', ['key_path_to_array', 'i'])

class ProxyDict(object):
    '''
    Class allowing to access a dict via a proxy mapping using the same interface as dict does.
    It supports two types of proxy mappings:
    1) relative_keys
    2) keys_mappings
    and also extends a simple key to key_path. For example, a sequence of keys leading to d['a']['b']['c']
    corresponds to a key_path ('a', 'b', 'c').
    Proxy mapping relative_keys is a sequence of key_path leading to subdicts. The content of these subdicts
    is treated as located in the root of the proxy dict. For example, suppose we have d = {'a': 1, 'b':{'c': 2, 'd': 3}}.
    A proxy dict with relative_key ('b',) shall be pd = {'a': 1, 'c': 2, 'd': 3, 'b':{'c': 2, 'd': 3}}.
    Proxy mapping keys_mappings is a dict linking a (new) key in the root of proxy dict to key_path in original dict.
    For example, for dict d, a proxy dict with keys_mappings {'d': ('b', 'd')} shall be pd = {'a': 1, 'd': 3, 'b':{'c': 2, 'd': 3}}.
    Finally, we have default_relative_key which is a key_path leading to a subdict to which new elements must be added.
    For example, for dict d, proxy dict pd and default_relative_key ('b',), operation pd['z'] = 0 leads to the following change in d:
    d = {'a': 1, 'b':{'c': 2, 'd': 3, 'z': 0}}
    The order of the proxy mappings (the higher mapping overwrites the lower):
    1) keys_mappings
    2) relative_keys
    3) original dict (root)
    '''
    def __init__(self, data,
                 relative_keys=(),
                 keys_mappings={},
                 default_relative_key=(),
                 ):
        self._data = data
        self._default_relative_key = list(default_relative_key)
        self._keys_mappings = {key: key for key in self._data.keys()}
        for rel_key in relative_keys:
            for inner_key in recursive_get(data, rel_key).keys():
                self._keys_mappings[inner_key] = list(rel_key) + [inner_key]
        self._keys_mappings.update(keys_mappings)

    def __repr__(self):
        res = '{'
        for key in self._keys_mappings.keys():
            res += '{}: {}, '.format(key, self.__getitem__(key))
        return res + '}'

    def __contains__(self, key):
        return key in self._keys_mappings.keys()

    def __getitem__(self, key):
        # x[key] => x.__getitem__(key)
        return recursive_get(self._data, self._keys_mappings[key])

    def __setitem__(self, key, value):
        # x[key] = value => x.__setitem__(key, value)
        if key in self._keys_mappings:
            recursive_set(self._data, self._keys_mappings[key], value)
        else:
            recursive_set(self._data, self._default_relative_key + [key], value)
            self._keys_mappings[key] = self._default_relative_key + [key]

    def __delitem__(self, key):
        # del x[key] => x.__delitem__(key)
        val = recursive_get(self._data, self._keys_mappings[key])
        del val

    def update(self, mapping):
        for key in mapping.keys():
            self.__setitem__(key, mapping[key])

def recursive_get(d, keys):
    if isinstance(keys, ArrayItemGetter):
         array_ = recursive_get(d, keys.key_path_to_array)
         return array_[keys.i]
    elif is_sequence(keys):
        return reduce(lambda d_, key_: d_.get(key_, {}), keys, d)
    else:
        return d[keys]

def recursive_set(d, keys, val):
    if isinstance(keys, ArrayItemGetter):
        array_ = recursive_get(d, keys.key_path_to_array)
        array_[keys.i] = val
    elif is_sequence(keys):
        last_dict = reduce(lambda d_, key_: d_.setdefault(key_, {}), keys[:-1], d)
        last_dict[keys[-1]] = val
    else:
        d[keys] = val

def is_sequence(obj):
    '''
    Checks whether obj is a sequence (string does not count as a sequence)
    '''
    return isinstance(obj, collections.Sequence) and (not hasattr(obj, 'strip'))

def cp(from_, to_):
    '''
    Copies from_ to to_ where from_ may be file or dir and to_ is a dir.
    Returns new path.
    '''
    if os.path.isfile(from_):
        shutil.copy(from_, to_)
    else:
        shutil.copytree(from_, to_)
        return os.path.join(to_, os.path.basename(from_))

def rm(target):
    '''
    Removes target which may be file or dir.
    '''
    if os.path.isfile(target):
        os.remove(target)
    else:
        shutil.rmtree(target)

def remove_if_exists(path):
    try:
        os.remove(path)
        return True
    except FileNotFoundError as e:
        return False

def create_file_mkdir(filepath):
    """Opens a filepath in a write mode (i.e., creates/overwrites it). If the path does not exists,
    subsequent directories will be created.
    """
    dirpath = os.path.dirname(filepath)
    if not os.path.exists(dirpath):
        os.makedirs(dirpath)
    return open(filepath, 'w')

def get_templates_path():
    '''
    Returns the absolute path to templates directory. It is useful when the module is imported from elsewhere.
    '''
    return os.path.join(os.path.dirname(os.path.dirname(__file__)), 'templates')

def find_dir_by_named_regexp(regexp, where):
    """Search for dir in where which satisfies regexp. If successful, parses the dir according to named regexp.
    Returns a tuple (found_dir, params_from_named_regexp) or None if not found.
    """
    dirnames = next(os.walk(where))[1]
    for dir_ in dirnames:
        parsing_params = parse_by_named_regexp(regexp, dir_)
        if parsing_params is not None:
            return dir_, parsing_params
    return None

def find_all_dirs_by_named_regexp(regexp, where):
    """Search for dirs in where which satisfies regexp. If successful, parses them according to named regexp.
    Returns a list of tuples (found_dir, params_from_named_regexp).
    """
    dirnames = next(os.walk(where))[1]
    datas = []
    for dir_ in dirnames:
        parsing_params = parse_by_named_regexp(regexp, dir_)
        if parsing_params is not None:
            datas.append((dir_, parsing_params))
    return datas

def parse_by_named_regexp(regexp, val):
    """Parses val according to named regexp. Return a dictionary of params.
    """
    matching = re.search(regexp, val)
    if matching is None:
        return None
    return matching.groupdict()

def parse_datafile(path, data_names, transform_funcs, cols_to_parse=[]):
    """Parses a data file given by path and structured as a table where rows are separated by \n
    and columns are separated by any of whitespaces. The first line in the file will be ignored.
    Processed columns are given by cols_to_parse (all columns will be processed if it is empty).
    Corresponding names and transformation functions for columns in cols_to_parse are given by 
    data_names and transform_funcs. Transformation function must be a mapping string -> type.
    
    Returns a dictionary where a key corresponds to a column name (i.e., taken from data_names)
    and a value corresponds to a list of the columns values taken from all rows.
    """
    if cols_to_parse == []:
        cols_to_parse = range(len(data_names))
    if len(data_names) != len(transform_funcs) or len(data_names) != len(cols_to_parse):
        raise Exception('Number of data names, transform functions and columns to be parsed is inconsistent')
    data = collections.OrderedDict()
    for data_name in data_names:
        data[data_name] = []

    f = open(path, 'r') # if not found, expection will be raised anyway
    lines = f.readlines()
    for line in lines[1:]: # skip the first line
        tmp = line.split()
        if len(tmp) < len(data_names):
            raise Exception('Number of given data names is larger than number of columns we have in the data file.')
        for i, data_name in enumerate(data_names):
            val = tmp[cols_to_parse[i]]
            data[data_name].append(transform_funcs[i](val))
    return data

def parse_timed_numdatafile(path):
    """Parses a data file given by path and structured as a table where rows are separated by \n
    and columns are separated by any of whitespaces. The table here has an interpretation of a matrix whose 
    rows axis corresponds to time axis and columns axis corresponds to data axis. Moreover, the first column
    contains the time values so the data is contained in columns starting from the second one.

    Returns time_list (a list of times from the first column) and data_matrix (a list of numpy arrays of data where
    list's index corresponds to the time index). 
    """
    time = []
    data = []
    f = open(path, 'r') # if not found, expection will be raised anyway
    lines = f.readlines()
    for line in lines[1:]: # skip the first line
        tmp = line.split()
        time.append(float(tmp[0]))
        timed_data = np.zeros((len(tmp) - 1, ))
        for i, val in enumerate(tmp[1:]):
            timed_data[i] = float(val)
        data.append(timed_data)
    return time, data

def write_datafile(path, data):
    keys = list(data.keys())
#    print(keys)
    values = list(data.values())
    with open(path, 'w') as f:
        f.write(r'% ' + '\t'.join(keys) + '\n')
        for t_i in range(len(values[0])):
            line = '\t'.join([str(array[t_i]) for array in values]) + '\n'
            f.write(line)

def write_timed_numdatafile(path, time, data):
    with open(path, 'w') as f:
        for i in range(len(time)):
            line = '{}\t'.format(time[i]) + '\t'.join([str(data[i][j]) for j in range(data.shape[1])]) + '\n'
            f.write(line)

def load_function_from_module(full_function_name):
    module_name, function_name = full_function_name.rsplit('.', 1)
    module_ = importlib.import_module(module_name)
    return getattr(module_, function_name)