Добавлены файлы рабочей версии

1b6fe950 · Anton Pershin · c9658e69 · 1b6fe950 · 1b6fe950 · 1b6fe950
Commit 1b6fe950 authored Jul 30, 2019 by Anton Pershin
14 changed files
--- a/.gitignore
+++ b/.gitignore
+# ignore custom config file
+config_research.json
--- a/comsdk/aux.py
+++ b/comsdk/aux.py
+from functools import reduce
+import os
+import re
+import collections
+from copy import deepcopy
+import importlib
+import numpy as np
+ArrayItemGetter = collections.namedtuple('ArrayItemGetter', ['key_path_to_array', 'i'])
+class ProxyDict(object):
+    '''
+    Class allowing to access a dict via a proxy mapping using the same interface as dict does.
+    It supports two types of proxy mappings:
+    1) relative_keys
+    2) keys_mappings
+    and also extends a simple key to key_path. For example, a sequence of keys leading to d['a']['b']['c']
+    corresponds to a key_path ('a', 'b', 'c').
+    Proxy mapping relative_keys is a sequence of key_path leading to subdicts. The content of these subdicts
+    is treated as located in the root of the proxy dict. For example, suppose we have d = {'a': 1, 'b':{'c': 2, 'd': 3}}.
+    A proxy dict with relative_key ('b',) shall be pd = {'a': 1, 'c': 2, 'd': 3, 'b':{'c': 2, 'd': 3}}.
+    Proxy mapping keys_mappings is a dict linking a (new) key in the root of proxy dict to key_path in original dict.
+    For example, for dict d, a proxy dict with keys_mappings {'d': ('b', 'd')} shall be pd = {'a': 1, 'd': 3, 'b':{'c': 2, 'd': 3}}.
+    Finally, we have default_relative_key which is a key_path leading to a subdict to which new elements must be added.
+    For example, for dict d, proxy dict pd and default_relative_key ('b',), operation pd['z'] = 0 leads to the following change in d:
+    d = {'a': 1, 'b':{'c': 2, 'd': 3, 'z': 0}}
+    The order of the proxy mappings (the higher mapping overwrites the lower):
+    1) keys_mappings
+    2) relative_keys
+    3) original dict (root)
+    '''
+    def __init__(self, data,
+                 relative_keys=(),
+                 keys_mappings={},
+                 default_relative_key=(),
+                 ):
+        self._data = data
+        self._default_relative_key = list(default_relative_key)
+        self._keys_mappings = {key: key for key in self._data.keys()}
+        for rel_key in relative_keys:
+            for inner_key in recursive_get(data, rel_key).keys():
+                self._keys_mappings[inner_key] = list(rel_key) + [inner_key]
+        self._keys_mappings.update(keys_mappings)
+    def __repr__(self):
+        res = '{'
+        for key in self._keys_mappings.keys():
+            res += '{}: {}, '.format(key, self.__getitem__(key))
+        return res + '}'
+    def __contains__(self, key):
+        return key in self._keys_mappings.keys()
+    def __getitem__(self, key):
+        # x[key] => x.__getitem__(key)
+        return recursive_get(self._data, self._keys_mappings[key])
+    def __setitem__(self, key, value):
+        # x[key] = value => x.__setitem__(key, value)
+        if key in self._keys_mappings:
+            recursive_set(self._data, self._keys_mappings[key], value)
+        else:
+            recursive_set(self._data, self._default_relative_key + [key], value)
+            self._keys_mappings[key] = self._default_relative_key + [key]
+    def __delitem__(self, key):
+        # del x[key] => x.__delitem__(key)
+        val = recursive_get(self._data, self._keys_mappings[key])
+        del val
+    def update(self, mapping):
+        for key in mapping.keys():
+            self.__setitem__(key, mapping[key])
+def recursive_get(d, keys):
+    if isinstance(keys, ArrayItemGetter):
+         array_ = recursive_get(d, keys.key_path_to_array)
+         return array_[keys.i]
+    elif is_sequence(keys):
+        return reduce(lambda d_, key_: d_.get(key_, {}), keys, d)
+    else:
+        return d[keys]
+def recursive_set(d, keys, val):
+    if isinstance(keys, ArrayItemGetter):
+        array_ = recursive_get(d, keys.key_path_to_array)
+        array_[keys.i] = val
+    elif is_sequence(keys):
+        last_dict = reduce(lambda d_, key_: d_.setdefault(key_, {}), keys[:-1], d)
+        last_dict[keys[-1]] = val
+    else:
+        d[keys] = val
+def is_sequence(obj):
+    '''
+    Checks whether obj is a sequence (string does not count as a sequence)
+    '''
+    return isinstance(obj, collections.Sequence) and (not hasattr(obj, 'strip'))
+def cp(from_, to_):
+    '''
+    Copies from_ to to_ where from_ may be file or dir and to_ is a dir.
+    Returns new path.
+    '''
+    if os.path.isfile(from_):
+        shutil.copy(from_, to_)
+    else:
+        shutil.copytree(from_, to_)
+        return os.path.join(to_, os.path.basename(from_))
+def rm(target):
+    '''
+    Removes target which may be file or dir.
+    '''
+    if os.path.isfile(target):
+        os.remove(target)
+    else:
+        shutil.rmtree(target)
+def remove_if_exists(path):
+    try:
+        os.remove(path)
+        return True
+    except FileNotFoundError as e:
+        return False
+def create_file_mkdir(filepath):
+    """Opens a filepath in a write mode (i.e., creates/overwrites it). If the path does not exists,
+    subsequent directories will be created.
+    """
+    dirpath = os.path.dirname(filepath)
+    if not os.path.exists(dirpath):
+        os.makedirs(dirpath)
+    return open(filepath, 'w')
+def get_templates_path():
+    '''
+    Returns the absolute path to templates directory. It is useful when the module is imported from elsewhere.
+    '''
+    return os.path.join(os.path.dirname(os.path.dirname(__file__)), 'templates')
+def find_dir_by_named_regexp(regexp, where):
+    """Search for dir in where which satisfies regexp. If successful, parses the dir according to named regexp.
+    Returns a tuple (found_dir, params_from_named_regexp) or None if not found.
+    """
+    dirnames = next(os.walk(where))[1]
+    for dir_ in dirnames:
+        parsing_params = parse_by_named_regexp(regexp, dir_)
+        if parsing_params is not None:
+            return dir_, parsing_params
+    return None
+def find_all_dirs_by_named_regexp(regexp, where):
+    """Search for dirs in where which satisfies regexp. If successful, parses them according to named regexp.
+    Returns a list of tuples (found_dir, params_from_named_regexp).
+    """
+    dirnames = next(os.walk(where))[1]
+    datas = []
+    for dir_ in dirnames:
+        parsing_params = parse_by_named_regexp(regexp, dir_)
+        if parsing_params is not None:
+            datas.append((dir_, parsing_params))
+    return datas
+def parse_by_named_regexp(regexp, val):
+    """Parses val according to named regexp. Return a dictionary of params.
+    """
+    matching = re.search(regexp, val)
+    if matching is None:
+        return None
+    return matching.groupdict()
+def parse_datafile(path, data_names, transform_funcs, cols_to_parse=[]):
+    """Parses a data file given by path and structured as a table where rows are separated by \n
+    and columns are separated by any of whitespaces. The first line in the file will be ignored.
+    Processed columns are given by cols_to_parse (all columns will be processed if it is empty).
+    Corresponding names and transformation functions for columns in cols_to_parse are given by 
+    data_names and transform_funcs. Transformation function must be a mapping string -> type.
+    Returns a dictionary where a key corresponds to a column name (i.e., taken from data_names)
+    and a value corresponds to a list of the columns values taken from all rows.
+    """
+    if cols_to_parse == []:
+        cols_to_parse = range(len(data_names))
+    if len(data_names) != len(transform_funcs) or len(data_names) != len(cols_to_parse):
+        raise Exception('Number of data names, transform functions and columns to be parsed is inconsistent')
+    data = collections.OrderedDict()
+    for data_name in data_names:
+        data[data_name] = []
+    f = open(path, 'r') # if not found, expection will be raised anyway
+    lines = f.readlines()
+    for line in lines[1:]: # skip the first line
+        tmp = line.split()
+        if len(tmp) < len(data_names):
+            raise Exception('Number of given data names is larger than number of columns we have in the data file.')
+        for i, data_name in enumerate(data_names):
+            val = tmp[cols_to_parse[i]]
+            data[data_name].append(transform_funcs[i](val))
+    return data
+def parse_timed_numdatafile(path):
+    """Parses a data file given by path and structured as a table where rows are separated by \n
+    and columns are separated by any of whitespaces. The table here has an interpretation of a matrix whose 
+    rows axis corresponds to time axis and columns axis corresponds to data axis. Moreover, the first column
+    contains the time values so the data is contained in columns starting from the second one.
+    Returns time_list (a list of times from the first column) and data_matrix (a list of numpy arrays of data where
+    list's index corresponds to the time index). 
+    """
+    time = []
+    data = []
+    f = open(path, 'r') # if not found, expection will be raised anyway
+    lines = f.readlines()
+    for line in lines[1:]: # skip the first line
+        tmp = line.split()
+        time.append(float(tmp[0]))
+        timed_data = np.zeros((len(tmp) - 1, ))
+        for i, val in enumerate(tmp[1:]):
+            timed_data[i] = float(val)
+        data.append(timed_data)
+    return time, data
+def write_datafile(path, data):
+    keys = list(data.keys())
+#    print(keys)
+    values = list(data.values())
+    with open(path, 'w') as f:
+        f.write(r'% ' + '\t'.join(keys) + '\n')
+        for t_i in range(len(values[0])):
+            line = '\t'.join([str(array[t_i]) for array in values]) + '\n'
+            f.write(line)
+def write_timed_numdatafile(path, time, data):
+    with open(path, 'w') as f:
+        for i in range(len(time)):
+            line = '{}\t'.format(time[i]) + '\t'.join([str(data[i][j]) for j in range(data.shape[1])]) + '\n'
+            f.write(line)
+def load_function_from_module(full_function_name):
+    module_name, function_name = full_function_name.rsplit('.', 1)
+    module_ = importlib.import_module(module_name)
+    return getattr(module_, function_name)
--- a/comsdk/communication.py
+++ b/comsdk/communication.py
--- a/comsdk/distributed_storage.py
+++ b/comsdk/distributed_storage.py
+from comsdk.aux import find_dir_by_named_regexp
+from functools import partial
+import os
+class DistributedStorage:
+    """
+    Distributed storage is a set of sources contaning the data. The sources must be accessible by the OS API.
+    It is assumed that the data somewhat overlaps, namely, it should overlap in terms of the catalog hierarchy. 
+    However, this implementation does not guarantee the uniqueness of data: instead, it uses a priority to prefer 
+    one source over another while looking up. Even though duplicates are acceptable, the found ones will be printed
+    out for the sake of user's attention. 
+    """
+    def __init__(self, abs_storage_paths, prior_storage_index=0):
+        self.storage_paths = abs_storage_paths
+        self.prior_storage_index = prior_storage_index
+    def get_dir_path(self, dir_):
+        """
+        Returns the full path to dir_ or None if dir_ is absent.
+        """
+        dir_path_tuple = self.lookup_through_dir(dir_, \
+            lambda dir_path: (dir_path, dir_path) if os.path.exists(dir_path) else None)
+        return dir_path_tuple[0] if dir_path_tuple is not None else None
+    def make_dir(self, dir_):
+        """
+        Creates dir_ in prior storage. Returns the full path to it.
+        """
+        path_ = os.path.join(self.storage_paths[self.prior_storage_index], dir_)
+        os.makedirs(path_)
+        return path_
+    def find_dir_by_named_regexp(self, parent_dir, regexp):
+        """
+        Finds a directory in parent_dir fulfulling regexp. Returns a tuple (full_path_to_found_dir, named_params_from_regexp).
+        """
+        return self.lookup_through_dir(parent_dir, partial(find_dir_by_named_regexp, regexp))
+    def lookup_through_dir(self, dir_, lookup_func):
+        """
+        Looks up the data in dir_ by executing lookup_func on dir_. Returns a tuple (full_path_to_dir, some_data_regarding_dir) 
+        which must, in turn, be returned by lookup_func. lookup_func must take a single argument -- full path to the dir. 
+        """
+        possible_paths = [os.path.join(source, dir_) if dir_ != '' else source for source in self.storage_paths]
+        found_data = None
+        prior_found = False
+        for path_i in range(len(possible_paths)):
+            path_ = possible_paths[path_i]
+            if os.path.exists(possible_paths[path_i]):
+                tmp_found_data = lookup_func(possible_paths[path_i])
+                if tmp_found_data is not None:
+                    tmp_found_path = os.path.join(possible_paths[path_i], tmp_found_data[0])
+                    if found_data is not None:
+                        print("Duplicate distributed dir is found: '{}' and '{}'".format(tmp_found_path, found_data[0]))
+                    if not prior_found:
+                        found_data = (tmp_found_path, tmp_found_data[1])
+                    if path_i == self.prior_storage_index:
+                        prior_found = True
+        return found_data
+    def listdir(self, dir_):
+        """
+        Lists the content of dir_. Returns a tuple (dirnames, filenames) which are obtained by simple union of the content of sources.
+        Therefore, there might be copies whose detection must be performed elsewhere.
+        """
+        dirnames = []
+        filenames = []
+        for storage_path in self.storage_paths:
+            if os.path.exists(os.path.join(storage_path, dir_)):
+                _, dirnames_, filenames_ = next(os.walk(os.path.join(storage_path, dir_)))
+                dirnames += dirnames_
+                filenames += filenames_
+        return dirnames, filenames
--- a/comsdk/edge.py
+++ b/comsdk/edge.py
--- a/comsdk/environment.py
+++ b/comsdk/environment.py
+import os
+import subprocess
+class BaseEnvironment(object):
+    def __init__(self):
+        self._programs = {}
+    def preprocess(self, working_dir, input_copies_list):
+        raise NotImplementedError()
+    def execute(self, working_dir, prog_name, command_line):
+        raise NotImplementedError()
+    def postprocess(self, working_dir, output_copies_list):
+        raise NotImplementedError()
+    def add_program(self, prog_name, path_to_prog):
+        self._programs[prog_name] = path_to_prog
+#    def _print_copy_msg(self, from_, to_):
+#        print('\tCopying %s to %s' % (from_, to_))
+#
+#    def _print_exec_msg(self, cmd, is_remote):
+#        where = '@' + self._machine_name if is_remote else ''
+#        print('\tExecuting %s: %s' % (where, cmd))
+class LocalEnvironment(BaseEnvironment):
+    def __init__(self):
+        super().__init__()
+    def preprocess(self, working_dir, input_copies_list):
+        for copy_target in input_copies_list:
+            _copy(self, copy_target, working_dir)
+    def execute(self, working_dir, prog_name, args_str):
+        prog_path = os.path.join(self._programs[prog_name], prog_name)
+        command_line = 'cd {}; {} {}'.format(working_dir, prog_path, args_str)
+        # use PIPEs to avoid breaking the child process when the parent process finishes
+        # (works on Linux, solution for Windows is to add creationflags=0x00000010 instead of stdout, stderr, stdin)
+ #       self._print_exec_msg(command_line, is_remote=False)
+        #pid = subprocess.Popen(args, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+        #print(pid)
+        subprocess.call([command_line], shell=True)
+    def postprocess(self, working_dir, output_copies_list):
+        pass
+    def _copy(self, from_, to_, mode='from_local'):
+        """Any mode is ignored since the copying shall be within a local machine anyway
+        """
+        cp(from_, to_)
+        self._print_copy_msg(from_, to_)
+    def rm(self, target):
+        rm(target)
--- a/comsdk/graph.py
+++ b/comsdk/graph.py
--- a/comsdk/research.py
+++ b/comsdk/research.py
--- a/config_research.json.example
+++ b/config_research.json.example
+{
+    "LOCAL_HOST": {
+        "research_path": "...",
+        "custom_programs": {
+            "@path_to_binaries@": ["@bin1@", "@bin2@", ...],
+            ...
+        }
+    },
+    "REMOTE_HOSTS": {
+        "@remote_host_sid@": {
+            "ssh_host": "...",
+            "max_cores": ...,
+            "username": "...",
+            "password": "...",
+            "research_path": "...",
+            "env_programs": ["@bin1@", "@bin1@", ...],
+            "custom_programs": {
+                "@path_to_binaries@": ["@bin1@", "@bin2@", ...],
+                ...
+            },
+            "sge_template_name": "...",
+            "job_setter": "...",
+            "job_finished_checker": "..."
+       },
+        ...
+    },
+    "RESEARCH": {
+        "@research_sid@": "@research_full_name@",
+        ...
+    },
+    "RESEARCH_PROPS": {
+        ...
+    }
+}
\ No newline at end of file
--- a/templates/.gitignore
+++ b/templates/.gitignore
--- a/tests/square/square.cpp
+++ b/tests/square/square.cpp
+#include<fstream>
+using namespace std;
+int main(int argc, char* argv[])
+{
+    string input_file_path(argv[1]);
+    string output_file_path("b.dat");
+    ifstream f_in(input_file_path);
+    int x;
+    f_in >> x;
+    ofstream f_out(output_file_path);
+    f_out << x*x;
+    return 0;
+}
--- a/tests/square_test_dir/output/square.sh
+++ b/tests/square_test_dir/output/square.sh
+#$ -cwd -V
+#$ -l h_rt=12:00:00
+#$ -pe smp 12
+/home/home01/mmap/tests/square/square /home/home01/mmap/tests/square_test_dir/a.dat
--- a/tests/square_test_dir/test_square.sh
+++ b/tests/square_test_dir/test_square.sh
+#$ -cwd -V
+#$ -l h_rt=12:00:00
+#$ -pe smp 12
+./findsoln -symms reflect_symmetry.asc -R 170.320 -o find-170.320 -es 1e-15 -eqb find-170.330/ubest.h5
+qsub fe_170.315.sh
--- a/tests/test_graph.py
+++ b/tests/test_graph.py