Commit 1b6fe950 authored by Anton Pershin's avatar Anton Pershin

Добавлены файлы рабочей версии

parent c9658e69
# ignore custom config file
config_research.json
from functools import reduce
import os
import re
import collections
from copy import deepcopy
import importlib
import numpy as np
ArrayItemGetter = collections.namedtuple('ArrayItemGetter', ['key_path_to_array', 'i'])
class ProxyDict(object):
'''
Class allowing to access a dict via a proxy mapping using the same interface as dict does.
It supports two types of proxy mappings:
1) relative_keys
2) keys_mappings
and also extends a simple key to key_path. For example, a sequence of keys leading to d['a']['b']['c']
corresponds to a key_path ('a', 'b', 'c').
Proxy mapping relative_keys is a sequence of key_path leading to subdicts. The content of these subdicts
is treated as located in the root of the proxy dict. For example, suppose we have d = {'a': 1, 'b':{'c': 2, 'd': 3}}.
A proxy dict with relative_key ('b',) shall be pd = {'a': 1, 'c': 2, 'd': 3, 'b':{'c': 2, 'd': 3}}.
Proxy mapping keys_mappings is a dict linking a (new) key in the root of proxy dict to key_path in original dict.
For example, for dict d, a proxy dict with keys_mappings {'d': ('b', 'd')} shall be pd = {'a': 1, 'd': 3, 'b':{'c': 2, 'd': 3}}.
Finally, we have default_relative_key which is a key_path leading to a subdict to which new elements must be added.
For example, for dict d, proxy dict pd and default_relative_key ('b',), operation pd['z'] = 0 leads to the following change in d:
d = {'a': 1, 'b':{'c': 2, 'd': 3, 'z': 0}}
The order of the proxy mappings (the higher mapping overwrites the lower):
1) keys_mappings
2) relative_keys
3) original dict (root)
'''
def __init__(self, data,
relative_keys=(),
keys_mappings={},
default_relative_key=(),
):
self._data = data
self._default_relative_key = list(default_relative_key)
self._keys_mappings = {key: key for key in self._data.keys()}
for rel_key in relative_keys:
for inner_key in recursive_get(data, rel_key).keys():
self._keys_mappings[inner_key] = list(rel_key) + [inner_key]
self._keys_mappings.update(keys_mappings)
def __repr__(self):
res = '{'
for key in self._keys_mappings.keys():
res += '{}: {}, '.format(key, self.__getitem__(key))
return res + '}'
def __contains__(self, key):
return key in self._keys_mappings.keys()
def __getitem__(self, key):
# x[key] => x.__getitem__(key)
return recursive_get(self._data, self._keys_mappings[key])
def __setitem__(self, key, value):
# x[key] = value => x.__setitem__(key, value)
if key in self._keys_mappings:
recursive_set(self._data, self._keys_mappings[key], value)
else:
recursive_set(self._data, self._default_relative_key + [key], value)
self._keys_mappings[key] = self._default_relative_key + [key]
def __delitem__(self, key):
# del x[key] => x.__delitem__(key)
val = recursive_get(self._data, self._keys_mappings[key])
del val
def update(self, mapping):
for key in mapping.keys():
self.__setitem__(key, mapping[key])
def recursive_get(d, keys):
if isinstance(keys, ArrayItemGetter):
array_ = recursive_get(d, keys.key_path_to_array)
return array_[keys.i]
elif is_sequence(keys):
return reduce(lambda d_, key_: d_.get(key_, {}), keys, d)
else:
return d[keys]
def recursive_set(d, keys, val):
if isinstance(keys, ArrayItemGetter):
array_ = recursive_get(d, keys.key_path_to_array)
array_[keys.i] = val
elif is_sequence(keys):
last_dict = reduce(lambda d_, key_: d_.setdefault(key_, {}), keys[:-1], d)
last_dict[keys[-1]] = val
else:
d[keys] = val
def is_sequence(obj):
'''
Checks whether obj is a sequence (string does not count as a sequence)
'''
return isinstance(obj, collections.Sequence) and (not hasattr(obj, 'strip'))
def cp(from_, to_):
'''
Copies from_ to to_ where from_ may be file or dir and to_ is a dir.
Returns new path.
'''
if os.path.isfile(from_):
shutil.copy(from_, to_)
else:
shutil.copytree(from_, to_)
return os.path.join(to_, os.path.basename(from_))
def rm(target):
'''
Removes target which may be file or dir.
'''
if os.path.isfile(target):
os.remove(target)
else:
shutil.rmtree(target)
def remove_if_exists(path):
try:
os.remove(path)
return True
except FileNotFoundError as e:
return False
def create_file_mkdir(filepath):
"""Opens a filepath in a write mode (i.e., creates/overwrites it). If the path does not exists,
subsequent directories will be created.
"""
dirpath = os.path.dirname(filepath)
if not os.path.exists(dirpath):
os.makedirs(dirpath)
return open(filepath, 'w')
def get_templates_path():
'''
Returns the absolute path to templates directory. It is useful when the module is imported from elsewhere.
'''
return os.path.join(os.path.dirname(os.path.dirname(__file__)), 'templates')
def find_dir_by_named_regexp(regexp, where):
"""Search for dir in where which satisfies regexp. If successful, parses the dir according to named regexp.
Returns a tuple (found_dir, params_from_named_regexp) or None if not found.
"""
dirnames = next(os.walk(where))[1]
for dir_ in dirnames:
parsing_params = parse_by_named_regexp(regexp, dir_)
if parsing_params is not None:
return dir_, parsing_params
return None
def find_all_dirs_by_named_regexp(regexp, where):
"""Search for dirs in where which satisfies regexp. If successful, parses them according to named regexp.
Returns a list of tuples (found_dir, params_from_named_regexp).
"""
dirnames = next(os.walk(where))[1]
datas = []
for dir_ in dirnames:
parsing_params = parse_by_named_regexp(regexp, dir_)
if parsing_params is not None:
datas.append((dir_, parsing_params))
return datas
def parse_by_named_regexp(regexp, val):
"""Parses val according to named regexp. Return a dictionary of params.
"""
matching = re.search(regexp, val)
if matching is None:
return None
return matching.groupdict()
def parse_datafile(path, data_names, transform_funcs, cols_to_parse=[]):
"""Parses a data file given by path and structured as a table where rows are separated by \n
and columns are separated by any of whitespaces. The first line in the file will be ignored.
Processed columns are given by cols_to_parse (all columns will be processed if it is empty).
Corresponding names and transformation functions for columns in cols_to_parse are given by
data_names and transform_funcs. Transformation function must be a mapping string -> type.
Returns a dictionary where a key corresponds to a column name (i.e., taken from data_names)
and a value corresponds to a list of the columns values taken from all rows.
"""
if cols_to_parse == []:
cols_to_parse = range(len(data_names))
if len(data_names) != len(transform_funcs) or len(data_names) != len(cols_to_parse):
raise Exception('Number of data names, transform functions and columns to be parsed is inconsistent')
data = collections.OrderedDict()
for data_name in data_names:
data[data_name] = []
f = open(path, 'r') # if not found, expection will be raised anyway
lines = f.readlines()
for line in lines[1:]: # skip the first line
tmp = line.split()
if len(tmp) < len(data_names):
raise Exception('Number of given data names is larger than number of columns we have in the data file.')
for i, data_name in enumerate(data_names):
val = tmp[cols_to_parse[i]]
data[data_name].append(transform_funcs[i](val))
return data
def parse_timed_numdatafile(path):
"""Parses a data file given by path and structured as a table where rows are separated by \n
and columns are separated by any of whitespaces. The table here has an interpretation of a matrix whose
rows axis corresponds to time axis and columns axis corresponds to data axis. Moreover, the first column
contains the time values so the data is contained in columns starting from the second one.
Returns time_list (a list of times from the first column) and data_matrix (a list of numpy arrays of data where
list's index corresponds to the time index).
"""
time = []
data = []
f = open(path, 'r') # if not found, expection will be raised anyway
lines = f.readlines()
for line in lines[1:]: # skip the first line
tmp = line.split()
time.append(float(tmp[0]))
timed_data = np.zeros((len(tmp) - 1, ))
for i, val in enumerate(tmp[1:]):
timed_data[i] = float(val)
data.append(timed_data)
return time, data
def write_datafile(path, data):
keys = list(data.keys())
# print(keys)
values = list(data.values())
with open(path, 'w') as f:
f.write(r'% ' + '\t'.join(keys) + '\n')
for t_i in range(len(values[0])):
line = '\t'.join([str(array[t_i]) for array in values]) + '\n'
f.write(line)
def write_timed_numdatafile(path, time, data):
with open(path, 'w') as f:
for i in range(len(time)):
line = '{}\t'.format(time[i]) + '\t'.join([str(data[i][j]) for j in range(data.shape[1])]) + '\n'
f.write(line)
def load_function_from_module(full_function_name):
module_name, function_name = full_function_name.rsplit('.', 1)
module_ = importlib.import_module(module_name)
return getattr(module_, function_name)
This diff is collapsed.
from comsdk.aux import find_dir_by_named_regexp
from functools import partial
import os
class DistributedStorage:
"""
Distributed storage is a set of sources contaning the data. The sources must be accessible by the OS API.
It is assumed that the data somewhat overlaps, namely, it should overlap in terms of the catalog hierarchy.
However, this implementation does not guarantee the uniqueness of data: instead, it uses a priority to prefer
one source over another while looking up. Even though duplicates are acceptable, the found ones will be printed
out for the sake of user's attention.
"""
def __init__(self, abs_storage_paths, prior_storage_index=0):
self.storage_paths = abs_storage_paths
self.prior_storage_index = prior_storage_index
def get_dir_path(self, dir_):
"""
Returns the full path to dir_ or None if dir_ is absent.
"""
dir_path_tuple = self.lookup_through_dir(dir_, \
lambda dir_path: (dir_path, dir_path) if os.path.exists(dir_path) else None)
return dir_path_tuple[0] if dir_path_tuple is not None else None
def make_dir(self, dir_):
"""
Creates dir_ in prior storage. Returns the full path to it.
"""
path_ = os.path.join(self.storage_paths[self.prior_storage_index], dir_)
os.makedirs(path_)
return path_
def find_dir_by_named_regexp(self, parent_dir, regexp):
"""
Finds a directory in parent_dir fulfulling regexp. Returns a tuple (full_path_to_found_dir, named_params_from_regexp).
"""
return self.lookup_through_dir(parent_dir, partial(find_dir_by_named_regexp, regexp))
def lookup_through_dir(self, dir_, lookup_func):
"""
Looks up the data in dir_ by executing lookup_func on dir_. Returns a tuple (full_path_to_dir, some_data_regarding_dir)
which must, in turn, be returned by lookup_func. lookup_func must take a single argument -- full path to the dir.
"""
possible_paths = [os.path.join(source, dir_) if dir_ != '' else source for source in self.storage_paths]
found_data = None
prior_found = False
for path_i in range(len(possible_paths)):
path_ = possible_paths[path_i]
if os.path.exists(possible_paths[path_i]):
tmp_found_data = lookup_func(possible_paths[path_i])
if tmp_found_data is not None:
tmp_found_path = os.path.join(possible_paths[path_i], tmp_found_data[0])
if found_data is not None:
print("Duplicate distributed dir is found: '{}' and '{}'".format(tmp_found_path, found_data[0]))
if not prior_found:
found_data = (tmp_found_path, tmp_found_data[1])
if path_i == self.prior_storage_index:
prior_found = True
return found_data
def listdir(self, dir_):
"""
Lists the content of dir_. Returns a tuple (dirnames, filenames) which are obtained by simple union of the content of sources.
Therefore, there might be copies whose detection must be performed elsewhere.
"""
dirnames = []
filenames = []
for storage_path in self.storage_paths:
if os.path.exists(os.path.join(storage_path, dir_)):
_, dirnames_, filenames_ = next(os.walk(os.path.join(storage_path, dir_)))
dirnames += dirnames_
filenames += filenames_
return dirnames, filenames
This diff is collapsed.
import os
import subprocess
class BaseEnvironment(object):
def __init__(self):
self._programs = {}
def preprocess(self, working_dir, input_copies_list):
raise NotImplementedError()
def execute(self, working_dir, prog_name, command_line):
raise NotImplementedError()
def postprocess(self, working_dir, output_copies_list):
raise NotImplementedError()
def add_program(self, prog_name, path_to_prog):
self._programs[prog_name] = path_to_prog
# def _print_copy_msg(self, from_, to_):
# print('\tCopying %s to %s' % (from_, to_))
#
# def _print_exec_msg(self, cmd, is_remote):
# where = '@' + self._machine_name if is_remote else ''
# print('\tExecuting %s: %s' % (where, cmd))
class LocalEnvironment(BaseEnvironment):
def __init__(self):
super().__init__()
def preprocess(self, working_dir, input_copies_list):
for copy_target in input_copies_list:
_copy(self, copy_target, working_dir)
def execute(self, working_dir, prog_name, args_str):
prog_path = os.path.join(self._programs[prog_name], prog_name)
command_line = 'cd {}; {} {}'.format(working_dir, prog_path, args_str)
# use PIPEs to avoid breaking the child process when the parent process finishes
# (works on Linux, solution for Windows is to add creationflags=0x00000010 instead of stdout, stderr, stdin)
# self._print_exec_msg(command_line, is_remote=False)
#pid = subprocess.Popen(args, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
#print(pid)
subprocess.call([command_line], shell=True)
def postprocess(self, working_dir, output_copies_list):
pass
def _copy(self, from_, to_, mode='from_local'):
"""Any mode is ignored since the copying shall be within a local machine anyway
"""
cp(from_, to_)
self._print_copy_msg(from_, to_)
def rm(self, target):
rm(target)
This diff is collapsed.
This diff is collapsed.
{
"LOCAL_HOST": {
"research_path": "...",
"custom_programs": {
"@path_to_binaries@": ["@bin1@", "@bin2@", ...],
...
}
},
"REMOTE_HOSTS": {
"@remote_host_sid@": {
"ssh_host": "...",
"max_cores": ...,
"username": "...",
"password": "...",
"research_path": "...",
"env_programs": ["@bin1@", "@bin1@", ...],
"custom_programs": {
"@path_to_binaries@": ["@bin1@", "@bin2@", ...],
...
},
"sge_template_name": "...",
"job_setter": "...",
"job_finished_checker": "..."
},
...
},
"RESEARCH": {
"@research_sid@": "@research_full_name@",
...
},
"RESEARCH_PROPS": {
...
}
}
\ No newline at end of file
#include<fstream>
using namespace std;
int main(int argc, char* argv[])
{
string input_file_path(argv[1]);
string output_file_path("b.dat");
ifstream f_in(input_file_path);
int x;
f_in >> x;
ofstream f_out(output_file_path);
f_out << x*x;
return 0;
}
#$ -cwd -V
#$ -l h_rt=12:00:00
#$ -pe smp 12
/home/home01/mmap/tests/square/square /home/home01/mmap/tests/square_test_dir/a.dat
#$ -cwd -V
#$ -l h_rt=12:00:00
#$ -pe smp 12
./findsoln -symms reflect_symmetry.asc -R 170.320 -o find-170.320 -es 1e-15 -eqb find-170.330/ubest.h5
qsub fe_170.315.sh
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment