Commit 281982ca authored by Alexandr Sokolov's avatar Alexandr Sokolov

Merge branch 'dev'

# Conflicts: # README.md
parents bfd8ffb4 55ad2207
# ignore custom config file
config_research.json
*.log
__pycache__
Библиотека графоориентированной разработки.
\ No newline at end of file
# pycomsdk
A set of tools simplifying computational research based on distributed computing.
# Getting started
## Installation
This package can be installed directly from gitlab via
```bash
$ pip install git+https://sa2systems.ru:88/com/pycomsdk.git
```
All the requirements will be installed automatically.
If you want to update the package to have the most fresh one, please first uninstall it and then install it again.
## Configuration
To start working with `comsdk`, create your own `config_research.json` based on [this example](/config_research.json.example) and put it into `~/.comsdk`:
```bash
$ cp config_research.json.example ~/.comsdk/config_research.json
```
import unittest
from copy import deepcopy
import subprocess
import os
import random
import sys
from test_funcs.simplest import *
import comsdk.parser as pars
from comsdk.graph import *
from comsdk.edge import Edge
prsr = pars.Parser(tocpp=True)
data = {"a":10, "b":0}
gr = prsr.parse_file(sys.argv[1])
if sys.argv[2] !="":
prsr.generate_cpp(sys.argv[2])
else:
prsr.generate_cpp()
import os
import os.path
import shutil
import subprocess
import shlex
import json
import socket
from stat import S_ISDIR
from abc import ABCMeta, abstractmethod
import logging
import paramiko
import comsdk.misc as aux
class Host(object):
'''
Class storing all necessary information about the host of execution.
'''
def __init__(self):
self.programs = {}
self.commands = {}
def add_program(self, prog_name,
path_to_prog=None,
):
self.programs[prog_name] = path_to_prog
def add_command(self, cmd_name, cmd):
self.commands[cmd_name] = cmd
def get_program_launch_path(self, prog_name):
if prog_name not in self.programs:
raise ValueError(f'Program "{prog_name}" is not recognized. '
'Please add this program to "custom_programs" '
'in the corresponding host in the config file '
'if you want to use it.')
path_to_prog = self.programs[prog_name]
if path_to_prog is not None:
print(self.programs[prog_name], prog_name)
return self.join_path(self.programs[prog_name], prog_name)
else:
return prog_name
def join_path(self, *path_list):
return os.path.join(*path_list)
class RemoteHost(Host):
'''
RemoteHost extends Host including information about ssh host and the number of cores.
'''
def __init__(self, ssh_host, cores, sge_template_name, job_setter, job_finished_checker):
self.ssh_host = ssh_host
self.cores = cores
self.sge_template_name = sge_template_name
self.set_job_id = aux.load_function_from_module(job_setter)
self.check_task_finished = aux.load_function_from_module(job_finished_checker)
self._job_setter = job_setter
self._job_finished_checker = job_finished_checker
super().__init__()
def __getstate__(self):
return {
'ssh_host': self.ssh_host,
'cores': self.cores,
'programs': self.programs,
'sge_template_name': self.sge_template_name,
'job_setter': self._job_setter,
'job_finished_checker': self._job_finished_checker,
}
def __setstate__(self, state):
self.ssh_host = state['ssh_host']
self.cores = state['cores']
self.programs = state['programs']
self.sge_template_name = state['sge_template_name']
self.set_job_id = aux.load_function_from_module(state['job_setter'])
self.check_task_finished = aux.load_function_from_module(state['job_finished_checker'])
def join_path(self, *path_list):
# For RemoteHost, we assume that it is posix-based
return '/'.join(path_list)
# Decorator
def enable_sftp(func):
def wrapped_func(self, *args, **kwds):
self._init_sftp()
return func(self, *args, **kwds)
return wrapped_func
class BaseCommunication(metaclass=ABCMeta):
'''
BaseCommunication is an abstract class which can be used to implement the simplest access to a machine.
A concrete class ought to use a concrete method of communication (e.g., OS API or ssh) allowing to access
the filesystem (copy and remove files) and execute a command line on the machine.
Since a machine can be, in particular, the local machine, and at the same time we must always establish the communication between
the local machine and a machine being communicated, we have to sort the terminology out. We shall call the latter a communicated
machine whilst the former remain the local machine.
Generally, two types of files exchange are possible:
(1) between the local machine and a communicated machine,
(2) within a communicated machine.
Since for now only copying implies this division, we introduce so called 'modes of copying': from_local, to_local
and all_on_communicated
'''
def __init__(self, host, machine_name):
self.host = host
self.machine_name = machine_name
@abstractmethod
def execute(self, command, working_dir=None):
pass
@abstractmethod
def copy(self, from_, to_, mode='from_local', show_msg=False):
'''
Copies from_ to to_ which are interpreted according to mode:
(1) from_local (default) -> from_ is local path, to_ is a path on a communicated machine
(2) from_remote -> from_ is a path on a communicated machine, to_ local path
(3) all_remote -> from_ and to_ are paths on a communicated machine
from_ and to_ can be dirs or files according to the following combinations:
(1) from_ is dir, to_ is dir
(2) from_ is file, to_ is dir
(3) from_ is file, to_ is file
'''
pass
@abstractmethod
def rm(self, target):
'''
Removes target which can be a dir or file
'''
pass
def execute_program(self, prog_name, args_str, working_dir=None, chaining_command_at_start='',
chaining_command_at_end=''):
prog_path = self.host.get_program_launch_path(prog_name)
command = f'{chaining_command_at_start} {prog_path} {args_str} {chaining_command_at_end}'
return self.execute(command, working_dir)
def _print_copy_msg(self, from_, to_):
print('\tCopying %s to %s' % (from_, to_))
def _print_exec_msg(self, cmd, is_remote):
where = '@' + self.machine_name if is_remote else ''
print('\tExecuting %s: %s' % (where, cmd))
class LocalCommunication(BaseCommunication):
def __init__(self, local_host, machine_name='laptop'):
super(LocalCommunication, self).__init__(local_host, machine_name)
@classmethod
def create_from_config(cls):
with open(os.path.expanduser('~/.comsdk/config_research.json'), 'r') as f:
conf = json.load(f)
local_host = Host()
_add_programs_and_commands_from_config(local_host, conf['LOCAL_HOST'])
return LocalCommunication(local_host)
def execute(self, command, working_dir=None):
if working_dir is None:
command_line = command
else:
if os.name == 'posix':
command_line = 'cd {}; {}'.format(working_dir, command)
elif os.name == 'nt':
command_line = ''
if working_dir[0] != 'C':
command_line += f'{working_dir[0]}: && '
command_line += 'cd {} && {}'.format(working_dir, command)
#self._print_exec_msg(command_line, is_remote=False)
#res = subprocess.call([command_line], shell=True)
# print(command_line)
res = subprocess.run(command_line, shell=True)
return [], []
def copy(self, from_, to_, mode='from_local', show_msg=False):
'''
Any mode is ignored since the copying shall be within a local machine anyway
'''
if show_msg:
self._print_copy_msg(from_, to_)
return cp(from_, to_)
def rm(self, target):
aux.rm(target)
class SshCommunication(BaseCommunication):
def __init__(self, remote_host, username, password, machine_name='', pkey=None, execute_after_connection=None):
if not isinstance(remote_host, RemoteHost):
Exception('Only RemoteHost can be used to build SshCommunication')
self.host = remote_host
self.username = username
self.password = password
self.pkey = pkey
self.execute_after_connection = execute_after_connection
self.ssh_client = paramiko.SSHClient()
self.sftp_client = None
#self.main_dir = '/nobackup/mmap/research'
super().__init__(self.host, machine_name)
self.connect()
paramiko.util.log_to_file('paramiko.log')
@classmethod
def create_from_config(cls, host_sid):
with open(os.path.expanduser('~/.comsdk/config_research.json'), 'r') as f:
conf = json.load(f)
hostconf = conf['REMOTE_HOSTS'][host_sid]
remote_host = RemoteHost(ssh_host=hostconf['ssh_host'],
cores=hostconf['max_cores'],
sge_template_name=hostconf['sge_template_name'],
job_setter=hostconf['job_setter'],
job_finished_checker=hostconf['job_finished_checker'])
_add_programs_and_commands_from_config(remote_host, hostconf)
return SshCommunication(remote_host, username=hostconf['username'],
password=hostconf['password'] if 'password' in hostconf else None,
machine_name=host_sid,
pkey=hostconf['pkey'] if 'pkey' in hostconf else None,
execute_after_connection=hostconf['execute_after_connection'] if 'execute_after_connection' in hostconf else None)
def __getstate__(self):
return {
'host': self.host.__getstate__(),
'username': self.username,
'password': self.password,
'pkey': self.pkey,
'execute_after_connection': self.execute_after_connection,
}
def __setstate__(self, state):
remote_host = RemoteHost.__new__(RemoteHost)
remote_host.__setstate__(state['host'])
self.__init__(remote_host, state['username'], state['password'], pkey=state['pkey'],
execute_after_connection=state['execute_after_connection'])
def execute(self, command, working_dir=None):
if self.ssh_client is None:
raise Exception('Remote host is not set')
self._print_exec_msg(command, is_remote=True)
command_line = command if working_dir is None else 'cd {}; {}'.format(working_dir, command)
command_line = command_line if self.execute_after_connection is None else f'{self.execute_after_connection}; {command_line}'
print(command_line)
def _cleanup():
print('\t\tMSG: Reboot SSH client')
self.reboot()
cleanup = _cleanup
received = False
while not received:
try:
stdin, stdout, stderr = self.ssh_client.exec_command(command_line)
received = True
except (OSError, socket.timeout, socket.error, paramiko.sftp.SFTPError) as e:
print('\t\tMSG: Catched {} exception while executing "{}"'.format(type(e).__name__, command_line))
print('\t\tMSG: It says: {}'.format(e))
else:
cleanup = lambda: None
cleanup()
for line in stdout:
print('\t\t' + line.strip('\n'))
for line in stderr:
print('\t\t' + line.strip('\n'))
return stdout.readlines(), stderr.readlines()
def copy(self, from_, to_, mode='from_local', show_msg=False):
if self.ssh_client is None:
raise Exception('Remote host is not set')
self._init_sftp()
new_path = None
if mode == 'from_local':
new_path = self._copy_from_local(from_, to_, show_msg)
elif mode == 'from_remote':
new_path = self._copy_from_remote(from_, to_, show_msg)
elif mode == 'all_remote':
if show_msg:
self._print_copy_msg(self.machine_name + ':' + from_, self.machine_name + ':' + to_)
self._mkdirp(to_)
self.execute('cp -r %s %s' % (from_, to_))
else:
raise Exception("Incorrect mode '%s'" % mode)
return new_path
def rm(self, target):
if self.ssh_client is None:
raise Exception('Remote host is not set')
self._init_sftp()
self.execute('rm -r %s' % target)
@enable_sftp
def mkdir(self, path):
self.sftp_client.mkdir(path)
@enable_sftp
def listdir(self, path_on_remote):
return self.sftp_client.listdir(path_on_remote)
@enable_sftp
def _chdir(self, path=None):
self.sftp_client.chdir(path)
@enable_sftp
def _mkdirp(self, path):
path_list = path.split('/')
cur_dir = ''
if (path_list[0] == '') or (path_list[0] == '~'): # path is absolute and relative to user's home dir => don't need to check obvious
cur_dir = path_list.pop(0) + '/'
start_creating = False # just to exclude unnecessary stat() calls when we catch non-existing dir
for dir_ in path_list:
if dir_ == '': # trailing slash or double slash, can skip
continue
cur_dir += dir_
if start_creating or (not self._is_remote_dir(cur_dir)):
self.mkdir(cur_dir)
if not start_creating:
start_creating = True
cur_dir += '/'
@enable_sftp
def _open(self, filename, mode='r'):
return self.sftp_client.open(filename, mode)
@enable_sftp
def _get(self, remote_path, local_path):
def _cleanup():
print('\t\tMSG: Reboot SSH client')
self.reboot()
if os.path.exists(local_path):
aux.rm(local_path)
cleanup = _cleanup
received = False
while not received:
try:
res = self.sftp_client.get(remote_path, local_path)
received = True
except FileNotFoundError as e:
logging.error('Cannot find file or directory "{}" => interrupt downloading'.format(remote_path))
if os.path.exists(local_path):
aux.rm(local_path)
raise
except (socket.timeout, socket.error, paramiko.sftp.SFTPError) as e:
print('\t\tMSG: Catched {} exception while getting "{}"'.format(type(e).__name__, remote_path))
print('\t\tMSG: It says: {}'.format(e))
else:
cleanup = lambda: None
cleanup()
return res
@enable_sftp
def _put(self, local_path, remote_path):
def _cleanup():
print('\t\tMSG: Reboot SSH client')
self.reboot()
self.rm(remote_path)
cleanup = _cleanup
received = False
while not received:
try:
res = self.sftp_client.put(local_path, remote_path)
received = True
except FileNotFoundError as e:
logging.error('Cannot find file or directory "{}" => interrupt uploading'.format(local_path))
self.rm(remote_path)
raise
except (socket.timeout, socket.error, paramiko.sftp.SFTPError) as e:
print('\t\tMSG: Catched {} exception while putting "{}"'.format(type(e).__name__, remote_path))
print('\t\tMSG: It says: {}'.format(e))
else:
cleanup = lambda: None
cleanup()
return res
def _is_remote_dir(self, path):
try:
return S_ISDIR(self.sftp_client.stat(path).st_mode)
except IOError:
return False
def _copy_from_local(self, from_, to_, show_msg=False):
new_path_on_remote = to_ + '/' + os.path.basename(from_)
if os.path.isfile(from_):
self._mkdirp(to_)
if show_msg:
self._print_copy_msg(from_, self.machine_name + ':' + to_)
self._put(from_, new_path_on_remote)
elif os.path.isdir(from_):
self.mkdir(new_path_on_remote)
for dir_or_file in os.listdir(from_):
self._copy_from_local(os.path.join(from_, dir_or_file), new_path_on_remote, show_msg)
else:
raise CommunicationError("Path %s does not exist" % from_)
return new_path_on_remote
def _copy_from_remote(self, from_, to_, show_msg=False):
new_path_on_local = os.path.join(to_, os.path.basename(from_))
if not self._is_remote_dir(from_):
if show_msg:
self._print_copy_msg(self.machine_name + ':' + from_, to_)
self._get(from_, new_path_on_local)
else:
os.mkdir(new_path_on_local)
for dir_or_file in self.sftp_client.listdir(from_):
self._copy_from_remote(from_ + '/' + dir_or_file, new_path_on_local, show_msg)
return new_path_on_local
def disconnect(self):
if self.sftp_client is not None:
self.sftp_client.close()
self.sftp_client = None
self.ssh_client.close()
def connect(self):
self.ssh_client.load_system_host_keys()
self.ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
connected = False
# read ssh config. We assume that all necessary re-routing are done there via ProxyCommand
# only ProxyCommand is read; password should be passed explicitly to SshCommunication
ssh_config = paramiko.SSHConfig()
user_config_file = os.path.expanduser("~/.ssh/config")
if os.path.exists(user_config_file):
with open(user_config_file) as f:
ssh_config.parse(f)
user_config = ssh_config.lookup(self.host.ssh_host)
sock = None
if 'proxycommand' in user_config:
sock = paramiko.ProxyCommand(user_config['proxycommand'])
while not connected:
try:
if self.pkey is not None: # if a private key is given, first attempt to connect using it
self.ssh_client.connect(self.host.ssh_host, username=self.username, key_filename=self.pkey, timeout=10, sock=sock)
else: # otherwise try to connect via password using it is given
print(self.host.ssh_host, self.username)
self.ssh_client.connect(self.host.ssh_host, username=self.username, password=self.password, look_for_keys=False, allow_agent=False, timeout=10, sock=sock)
connected = True
except socket.timeout as e:
print('\t\tMSG: Catched {} exception while connecting'.format(type(e).__name__))
print('\t\tMSG: It says: {}'.format(e))
transport = self.ssh_client.get_transport()
transport.packetizer.REKEY_BYTES = pow(2, 40) # 1TB max, this is a security degradation (otherwise we get "paramiko.ssh_exception.SSHException: Key-exchange timed out waiting for key negotiation")
transport.packetizer.REKEY_PACKETS = pow(2, 40) # 1TB max, this is a security degradation (otherwise we get "paramiko.ssh_exception.SSHException: Key-exchange timed out waiting for key negotiation")
if self.execute_after_connection is not None:
self.execute(self.execute_after_connection)
def reboot(self):
self.disconnect()
self.connect()
self._init_sftp()
def _init_sftp(self):
if self.sftp_client is None:
self.sftp_client = self.ssh_client.open_sftp()
self.sftp_client.get_channel().settimeout(10)
class CommunicationError(Exception):
pass
def _add_programs_and_commands_from_config(host, hostconf):
if 'custom_programs' in hostconf:
paths = hostconf['custom_programs']
for path, programs in paths.items():
for program in programs:
host.add_program(program, path)
if 'env_programs' in hostconf:
for program in hostconf['env_programs']:
host.add_program(program)
if 'custom_commands' in hostconf:
for cmd_name, cmd in hostconf['custom_commands'].items():
host.add_command(cmd_name, cmd)
from comsdk.misc import find_dir_by_named_regexp
from functools import partial
import os
class DistributedStorage:
"""
Distributed storage is a set of sources contaning the data. The sources must be accessible by the OS API.
It is assumed that the data somewhat overlaps, namely, it should overlap in terms of the catalog hierarchy.
However, this implementation does not guarantee the uniqueness of data: instead, it uses a priority to prefer
one source over another while looking up. Even though duplicates are acceptable, the found ones will be printed
out for the sake of user's attention.
"""
def __init__(self, abs_storage_paths, prior_storage_index=0):
self.storage_paths = abs_storage_paths
self.prior_storage_index = prior_storage_index
def get_dir_path(self, dir_):
"""
Returns the full path to dir_ or None if dir_ is absent.
"""
dir_path_tuple = self.lookup_through_dir(dir_, lambda dir_path: (dir_path, dir_path)
if os.path.exists(dir_path) else None)
return dir_path_tuple[0] if dir_path_tuple is not None else None
def make_dir(self, dir_):
"""
Creates dir_ in prior storage. Returns the full path to it.
"""
path_ = os.path.join(self.storage_paths[self.prior_storage_index], dir_)
os.makedirs(path_)
return path_
def find_dir_by_named_regexp(self, parent_dir, regexp):
"""
Finds a directory in parent_dir fulfulling regexp. Returns a tuple (full_path_to_found_dir, named_params_from_regexp).
"""
return self.lookup_through_dir(parent_dir, partial(find_dir_by_named_regexp, regexp))
def lookup_through_dir(self, dir_, lookup_func):
"""
Looks up the data in dir_ by executing lookup_func on dir_. Returns a tuple (full_path_to_dir, some_data_regarding_dir)
which must, in turn, be returned by lookup_func. lookup_func must take a single argument -- full path to the dir.
"""
possible_paths = [os.path.join(source, dir_) if dir_ != '' else source for source in self.storage_paths]
found_data = None
prior_found = False
for path_i in range(len(possible_paths)):
path_ = possible_paths[path_i]
if os.path.exists(possible_paths[path_i]):
tmp_found_data = lookup_func(possible_paths[path_i])
if tmp_found_data is not None:
tmp_found_path = os.path.join(possible_paths[path_i], tmp_found_data[0])
if found_data is not None:
print("Duplicate distributed dir is found: '{}' and '{}'".format(tmp_found_path, found_data[0]))
if not prior_found:
found_data = (tmp_found_path, tmp_found_data[1])
if path_i == self.prior_storage_index:
prior_found = True
return found_data
def listdir(self, dir_):
"""
Lists the content of dir_. Returns a tuple (dirnames, filenames) which are obtained by simple union of the content of sources.
Therefore, there might be copies whose detection must be performed elsewhere.
"""
dirnames = []
filenames = []
for storage_path in self.storage_paths:
if os.path.exists(os.path.join(storage_path, dir_)):
_, dirnames_, filenames_ = next(os.walk(os.path.join(storage_path, dir_)))
dirnames += dirnames_
filenames += filenames_
return dirnames, filenames
import os
import posixpath
import pickle
from typing import Sequence, Tuple, Optional
import logging
import json
from mako.template import Template
import comsdk.misc as aux
from comsdk.communication import CommunicationError
from comsdk.graph import Func, State
dummy_predicate = Func(func=lambda d: True)
dummy_morphism = Func()
job_finished_predicate = Func(func=lambda d: d['job_finished'])
job_unfinished_predicate = Func(func=lambda d: not d['job_finished'])
class InOutMapping(object):
def __init__(self,
keys_mapping={},
relative_keys=(),
default_relative_key=(),
):
self._default_relative_key = default_relative_key if aux.is_sequence(default_relative_key) else (default_relative_key,)
self._relative_keys = relative_keys if aux.is_sequence(relative_keys) else (relative_keys,)
self._keys_mapping = keys_mapping
def __str__(self):
return 'Default relative key: {}\n' \
'Relative keys:\n{}\n' \
'Keys mapping:\n\tLocal -> Global\n\t----------------\n' \
'{}'.format('.'.join(self._default_relative_key),
'\n'.join(['\t' + '.'.join(k) for k in self._relative_keys]),
'\n'.join(['\t' + loc + ' -> ' + '.'.join(glo) for loc, glo in self._keys_mapping]))
def build_proxy_data(self, data, dynamic_keys_mapping={}):
if self._default_relative_key == () and self._relative_keys == () and self._keys_mapping == {} and dynamic_keys_mapping == {}:
return data
else:
#print('\t{}\n\t{}\n\t{}'.format(self._relative_keys, self._keys_mapping, dynamic_keys_mapping))
return aux.ProxyDict(data, self._relative_keys, dict(self._keys_mapping, **dynamic_keys_mapping), self._default_relative_key)
class Edge:
__slots__ = [
'pred_f',
'morph_f',
'_io_mapping',
'preprocess',
'postprocess',
'order',
'comment',
'mandatory_keys',
'use_proxy_data_for_pre_post_processing'
]
def __init__(self, predicate, morphism,
io_mapping=InOutMapping(),
order=0,
comment="",
mandatory_keys=(),
):
self.pred_f = predicate
self.morph_f = morphism
self._io_mapping = io_mapping
self.preprocess = lambda pd: None
self.postprocess = lambda pd: None
self.order = int(0 if order is None else order)
self.comment = comment
self.mandatory_keys = mandatory_keys
self.use_proxy_data_for_pre_post_processing=False
def predicate(self, data, dynamic_keys_mapping={}):
proxy_data = self._io_mapping.build_proxy_data(data, dynamic_keys_mapping)
return self.pred_f.func(proxy_data)
def morph(self, data, dynamic_keys_mapping={}):
#print(self.pred_name, self.morph_name, self.order)
proxy_data = self._io_mapping.build_proxy_data(data, dynamic_keys_mapping)
# print(proxy_data)
if (self.use_proxy_data_for_pre_post_processing):
self.preprocess(proxy_data)
else:
self.preprocess(data)
self._throw_if_not_set(proxy_data, self.mandatory_keys)
self.morph_f.func(proxy_data)
if (self.use_proxy_data_for_pre_post_processing):
self.postprocess(proxy_data)
else:
self.postprocess(data)
def _throw_if_not_set(self, data, mandatory_keys: Sequence[str]):
for k in mandatory_keys:
if k not in data:
logging.exception('EDGE {}: key "{}" is not set whilst being mandatory.\nIOMapping:\n'
'{}'.format(type(self).__name__, k, str(self._io_mapping)))
raise KeyError()
# raise KeyError('EDGE {}: key "{}" is not set whilst being mandatory.\nIOMapping:\n'
# '{}'.format(type(self).__name__, k, str(self._io_mapping)))
class ExecutableProgramEdge(Edge):
'''
Class implementing the edge which uses an external program to morph data.
The program is lauchned via so-called communication which, among others, sets where the program is located and it can be launched.
Environment can be used to launch program on remote resources.
# DESCRIPTION OF KEYS MAPPINGS #
Since data structure is hierarchical, we introduced keys mappings. The edge needs to use some variables
from data which may be located in different (nested) keys of data (we will call these keys "global").
However, it is very convenient to implement the edge imagining that there is no nested structures
and all keys are available in the top-level of data (we will call these keys "local").
To link global and local keys, we introduce keys mapping, which are either dictionaries (local key string -> sequence) or sequences.
If the keys mapping is sequence, we treat it as a relative "path" to all needed keys.
Therefore, we have keys mappings for input and output keys.
# END OF DESCRIPTION OF KEYS MAPPINGS #
We expect that necessary input files are already on remote.
Programs may require three types of arguments:
1) keyword arguments (-somearg something)
2) flags (-someflag)
3) trailing arguments
Local keys determining the corresponding values are located in keyword_names, flag_names and trailing_args_keys.
Finally, data must be somehow updated after finishing. This will be done by updating data according to output_dict (it is just added)
'''
def __init__(self, program_name, comm,
predicate=dummy_predicate,
io_mapping=InOutMapping(),
output_dict={}, # output dict which will be added to the main dictionary (w.r.t. output_keys_mapping)
keyword_names=(), # "local keys" where keyword args are stored
flag_names=(), # "local keys" where flags are stored
trailing_args_keys=(), # "local keys" where trailing args are stored
remote=False,
stdout_processor=None,
chaining_command_at_start=lambda d: '',
chaining_command_at_end=lambda d: '',
):
#predicate = predicate if predicate is not None else dummy_predicate
self._output_dict = output_dict
self._comm = comm
self._program_name = program_name
self._keyword_names = keyword_names
self._flag_names = flag_names
self._trailing_args_keys = trailing_args_keys
self._working_dir_key = '__REMOTE_WORKING_DIR__' if remote else '__WORKING_DIR__'
mandatory_keys = [self._working_dir_key]
self._stdout_processor = stdout_processor
self.chaining_command_at_start = chaining_command_at_start
self.chaining_command_at_end = chaining_command_at_end
super().__init__(predicate, Func(func=self.execute), io_mapping, mandatory_keys=mandatory_keys)
def execute(self, data):
args_str = build_args_line(data, self._keyword_names, self._flag_names, self._trailing_args_keys)
working_dir = data[self._working_dir_key]
stdout_lines, stderr_lines = self._comm.execute_program(self._program_name, args_str, working_dir,
self.chaining_command_at_start(data),
self.chaining_command_at_end(data))
output_data = self._output_dict
if self._stdout_processor:
stdout_data = self._stdout_processor(data, stdout_lines)
data.update(stdout_data)
data.update(output_data)
class QsubScriptEdge(Edge):
'''
Class implementing the edge which builds up the sh-script for qsub.
The script is created via communication.
# DESCRIPTION OF KEYS MAPPINGS #
Since data structure is hierarchical, we introduced keys mappings. The edge needs to use some variables
from data which may be located in different (nested) keys of data (we will call these keys "global").
However, it is very convenient to implement the edge imagining that there is no nested structures
and all keys are available in the top-level of data (we will call these keys "local").
To link global and local keys, we introduce keys mapping, which are either dictionaries (local key string -> sequence) or sequences.
If the keys mapping is sequence, we treat it as a relative "path" to all needed keys.
Therefore, we have keys mappings for input and output keys.
# END OF DESCRIPTION OF KEYS MAPPINGS #
Data will be augmented by 'qsub_script' pointing to the local file.
'''
def __init__(self, program_name, local_comm, remote_comm,
predicate=dummy_predicate,
io_mapping=InOutMapping(),
keyword_names=(), # "local keys" where keyword args are stored
flag_names=(), # "local keys" where flags are stored
trailing_args_keys=(), # "local keys" where trailing args are stored
):
# predicate = predicate if predicate is not None else dummy_predicate
self._local_comm = local_comm
self._remote_comm = remote_comm
self._program_name = program_name
self._keyword_names = keyword_names
self._flag_names = flag_names
self._trailing_args_keys = trailing_args_keys
mandatory_keys = ['__WORKING_DIR__', 'qsub_script_name', 'time_required', 'cores_required']
super().__init__(predicate, Func(func=self.execute), io_mapping, mandatory_keys=mandatory_keys)
def execute(self, data):
if isinstance(data, aux.ProxyDict):
print('QsubScriptEdge -> {}: {}'.format('qsub_script_name', data._keys_mappings['qsub_script_name']))
qsub_script_path = os.path.join(data['__WORKING_DIR__'], data['qsub_script_name'])
args_str = build_args_line(data, self._keyword_names, self._flag_names, self._trailing_args_keys)
program_launch_path = self._remote_comm.host.get_program_launch_path(self._program_name)
command_line = '{} {}'.format(program_launch_path, args_str)
render_sge_template(self._remote_comm.host.sge_template_name, qsub_script_path,
data['cores_required'], data['time_required'], (command_line,))
data.update({'qsub_script': qsub_script_path})
class UploadOnRemoteEdge(Edge):
'''
Class implementing the edge which uploads the data to the remote computer.
It is done via environment which must provide the interface for that.
# DESCRIPTION OF KEYS MAPPINGS #
Since data structure is hierarchical, we introduced keys mappings. The edge needs to use some variables
from data which may be located in different (nested) keys of data (we will call these keys "global").
However, it is very convenient to implement the edge imagining that there is no nested structures
and all keys are available in the top-level of data (we will call these keys "local").
To link global and local keys, we introduce keys mapping, which are either dictionaries (local key string -> sequence) or sequences.
If the keys mapping is sequence, we treat it as a relative "path" to all needed keys.
Therefore, we have keys mappings for input and output keys.
# END OF DESCRIPTION OF KEYS MAPPINGS #
Files for uploading must be found in input_files_keys which is a list of local data keys corresponding to these files.
They will be uploaded in remote working dir which must be in data['__REMOTE_WORKING_DIR__'].
After edge execution, data is going to be updated such that local paths will be replaced by remote ones.
'''
def __init__(self, comm,
predicate=dummy_predicate,
io_mapping=InOutMapping(),
local_paths_keys=(), # "local keys", needed to build a copy list
update_paths=True,
already_remote_path_key=None,
):
# predicate = predicate if predicate is not None else dummy_predicate
self._local_paths_keys = local_paths_keys
self._comm = comm
self._update_paths = update_paths
self._already_remote_path_key = already_remote_path_key
mandatory_keys = list(self._local_paths_keys) + ['__WORKING_DIR__', '__REMOTE_WORKING_DIR__']
if self._already_remote_path_key is not None:
mandatory_keys.append(self._already_remote_path_key)
super().__init__(predicate, Func(func=self.execute), io_mapping, mandatory_keys=mandatory_keys)
def execute(self, data):
if self._already_remote_path_key is not None:
if data[self._already_remote_path_key]:
return
remote_working_dir = data['__REMOTE_WORKING_DIR__']
for key in self._local_paths_keys:
try:
# try data[key] as an absolute path
data[key] = self._comm.copy(data[key], remote_working_dir, mode='from_local')
except CommunicationError as e:
# try data[key] as a relative path
working_dir = data['__WORKING_DIR__']
if isinstance(data, aux.ProxyDict):
print('UploadOnRemoteEdge -> {}: {}'.format(key, data._keys_mappings[key]))
remote_path = self._comm.copy(os.path.join(working_dir, data[key]), remote_working_dir,
mode='from_local')
if self._update_paths:
data[key] = remote_path
class DownloadFromRemoteEdge(Edge):
'''
Class implementing the edge which downloads the data from the remote computer.
It is done via environment which must provide the interface for that.
# DESCRIPTION OF KEYS MAPPINGS #
Since data structure is hierarchical, we introduced keys mappings. The edge needs to use some variables
from data which may be located in different (nested) keys of data (we will call these keys "global").
However, it is very convenient to implement the edge imagining that there is no nested structures
and all keys are available in the top-level of data (we will call these keys "local").
To link global and local keys, we introduce keys mapping, which are either dictionaries (local key string -> sequence) or sequences.
If the keys mapping is sequence, we treat it as a relative "path" to all needed keys.
Therefore, we have keys mappings for input and output keys.
# END OF DESCRIPTION OF KEYS MAPPINGS #
Files for downloading must be found in output_files_keys which is a list of local data keys corresponding to these files.
All these files are relative to the remote working dir and will be downloaded into local working dir
Local working dir must be in data['__LOCAL_WORKING_DIR__'].
Remote working dir must be in data['__REMOTE_WORKING_DIR__'].
After edge execution, data is going to be updated such that remote/relative paths will be replaced by local ones.
'''
def __init__(self, comm,
predicate=dummy_predicate,
io_mapping=InOutMapping(),
remote_paths_keys=(), # "local keys", needed to build a list for downloading
update_paths=True,
show_msg=False,
):
# predicate = predicate if predicate is not None else dummy_predicate
self._remote_paths_keys = remote_paths_keys
self._comm = comm
self._update_paths = update_paths
self._show_msg = show_msg
mandatory_keys = list(self._remote_paths_keys) + ['__WORKING_DIR__', '__REMOTE_WORKING_DIR__']
super().__init__(predicate, Func(func=self.execute), io_mapping, mandatory_keys=mandatory_keys)
def execute(self, data):
working_dir = data['__WORKING_DIR__']
remote_working_dir = data['__REMOTE_WORKING_DIR__']
for key in self._remote_paths_keys:
output_file_or_dir = data[key]
if output_file_or_dir is None:
continue
local_path = None
if output_file_or_dir == '*':
aux.print_msg_if_allowed('\tAll possible output files will be downloaded', allow=self._show_msg)
paths = self._comm.listdir(remote_working_dir)
local_full_paths = ['/'.join([working_dir, file_or_dir]) for file_or_dir in paths]
remote_full_paths = ['/'.join([remote_working_dir, file_or_dir]) for file_or_dir in paths]
for file_or_dir in remote_full_paths:
aux.print_msg_if_allowed('\tAm going to download "{}" to "{}"'.format(file_or_dir, working_dir),
allow=self._show_msg)
self._comm.copy(file_or_dir, working_dir, mode='from_remote', show_msg=self._show_msg)
local_path = local_full_paths
else:
output_file_or_dir_as_list = []
if isinstance(output_file_or_dir, list):
output_file_or_dir_as_list = output_file_or_dir
else:
output_file_or_dir_as_list = [output_file_or_dir]
for f in output_file_or_dir_as_list:
file_or_dir = '/'.join([remote_working_dir, f])
aux.print_msg_if_allowed('\tAm going to download "{}" to "{}"'.format(file_or_dir, working_dir),
allow=self._show_msg)
local_path = self._comm.copy(file_or_dir, working_dir,
mode='from_remote', show_msg=self._show_msg)
if self._update_paths:
data[key] = local_path
def make_cd(key_path):
def _cd(d):
if key_path == '..':
d['__WORKING_DIR__'] = os.path.dirname(d['__WORKING_DIR__'])
if '__REMOTE_WORKING_DIR__' in d:
d['__REMOTE_WORKING_DIR__'] = posixpath.dirname(d['__REMOTE_WORKING_DIR__'])
else:
subdir = aux.recursive_get(d, key_path)
d['__WORKING_DIR__'] = os.path.join(d['__WORKING_DIR__'], subdir)
if '__REMOTE_WORKING_DIR__' in d:
d['__REMOTE_WORKING_DIR__'] = posixpath.join(d['__REMOTE_WORKING_DIR__'], subdir)
return _cd
def make_mkdir(key_path, remote_comm=None):
def _mkdir(d):
remote = '__REMOTE_WORKING_DIR__' in d
dir = os.path.join(d['__WORKING_DIR__'],
d[key_path])
os.mkdir(dir)
if '__REMOTE_WORKING_DIR__' in d:
dir = os.path.join(d['__REMOTE_WORKING_DIR__'],
d[key_path])
remote_comm._mkdirp(dir)
return _mkdir
def make_dump(dump_name_format, format_keys=(), omit=None, method='pickle'):
def _dump(d):
format_params = [aux.recursive_get(d, key) for key in format_keys]
dump_path = os.path.join(d['__WORKING_DIR__'], dump_name_format.format(*format_params))
if omit is None:
dumped_d = d
else:
if (isinstance(d, aux.ProxyDict)):
dumped_d = {key: val for key, val in d._data.items() if not key in omit}
else:
dumped_d = {key: val for key, val in d.items() if not key in omit}
if method == 'pickle':
with open(dump_path, 'wb') as f:
pickle.dump(dumped_d, f)
elif method == 'json':
with open(dump_path, 'w') as f:
json.dump(dumped_d, f)
else:
raise ValueError(f'Method "{method}" is not supported in dumping')
return _dump
def make_composite_func(*funcs):
def _composite(d):
res = None
for func in funcs:
f_res = func(d)
# this trick allows us to combine returning
# and non-returning functions
if f_res is not None:
res = f_res
return res
return _composite
def make_composite_predicate(*preds):
def _composite(d):
for pred in preds:
if not pred(d):
return False
return True
return _composite
def create_local_data_from_global_data(global_data, keys_mapping):
if keys_mapping is None:
return global_data
elif aux.is_sequence(keys_mapping):
return aux.recursive_get(global_data, keys_mapping)
else:
return {local_key: aux.recursive_get(global_data, global_key) for local_key, global_key in keys_mapping.items()}
def update_global_data_according_to_local_data(local_data, global_data, keys_mapping):
if keys_mapping is None:
global_data.update(local_data)
elif aux.is_sequence(keys_mapping):
relative_data = aux.recursive_get(global_data, keys_mapping)
relative_data.update(local_data)
else:
for local_key, global_key in keys_mapping.items():
recursive_set(global_data, global_key, local_data[local_key])
def build_args_line(data, keyword_names, flag_names, trailing_args_keys):
args_str = ''
for keyword in keyword_names:
if keyword in data:
args_str += '-{} {} '.format(keyword, data[keyword])
for flag in flag_names:
if flag in data and data[flag]:
args_str += '-{} '.format(flag)
for place_i, trailing_arg_key in enumerate(trailing_args_keys):
# if we have a sequence under the key, we expand it
if trailing_arg_key in data:
trailing_arg = data[trailing_arg_key]
args_str += ' '.join(map(str, trailing_arg)) if aux.is_sequence(trailing_arg) else trailing_arg
args_str += ' '
return args_str
def render_sge_template(sge_template_name, sge_script_path, cores, time, commands):
with open(os.path.expanduser('~/.comsdk/config_research.json'), 'r') as f:
conf = json.load(f)
sge_templ_path = os.path.join(conf['TEMPLATES_PATH'], sge_template_name)
if not os.path.exists(sge_templ_path): # by default, templates are in templates/, but here we let the user put any path
sge_templ_path = sge_template_name
f = open(sge_templ_path, 'r')
rendered_data = Template(f.read()).render(cores=cores, time=time, commands=commands)
sge_script_file = aux.create_file_mkdir(sge_script_path)
sge_script_file.write(rendered_data)
def connect_branches(branches: Sequence[Tuple[State, State]], edges: Optional[Sequence[Edge]] = None):
if edges is None:
edges = [dummy_edge for _ in range(len(branches) - 1)]
for i, edge in zip(range(1, len(branches)), edges):
_, prev_branch_end = branches[i - 1]
next_branch_start, _ = branches[i]
prev_branch_end.connect_to(next_branch_start, edge=edge)
dummy_edge = Edge(dummy_predicate, Func())
import os
import subprocess
class BaseEnvironment(object):
def __init__(self):
self._programs = {}
def preprocess(self, working_dir, input_copies_list):
raise NotImplementedError()
def execute(self, working_dir, prog_name, command_line):
raise NotImplementedError()
def postprocess(self, working_dir, output_copies_list):
raise NotImplementedError()
def add_program(self, prog_name, path_to_prog):
self._programs[prog_name] = path_to_prog
# def _print_copy_msg(self, from_, to_):
# print('\tCopying %s to %s' % (from_, to_))
#
# def _print_exec_msg(self, cmd, is_remote):
# where = '@' + self._machine_name if is_remote else ''
# print('\tExecuting %s: %s' % (where, cmd))
class LocalEnvironment(BaseEnvironment):
def __init__(self):
super().__init__()
def preprocess(self, working_dir, input_copies_list):
for copy_target in input_copies_list:
_copy(self, copy_target, working_dir)
def execute(self, working_dir, prog_name, args_str):
prog_path = os.path.join(self._programs[prog_name], prog_name)
command_line = 'cd {}; {} {}'.format(working_dir, prog_path, args_str)
# use PIPEs to avoid breaking the child process when the parent process finishes
# (works on Linux, solution for Windows is to add creationflags=0x00000010 instead of stdout, stderr, stdin)
# self._print_exec_msg(command_line, is_remote=False)
#pid = subprocess.Popen(args, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
#print(pid)
subprocess.call([command_line], shell=True)
def postprocess(self, working_dir, output_copies_list):
pass
def _copy(self, from_, to_, mode='from_local'):
"""Any mode is ignored since the copying shall be within a local machine anyway
"""
cp(from_, to_)
self._print_copy_msg(from_, to_)
def rm(self, target):
rm(target)
import collections
import os
from enum import Enum, auto
from functools import partial
import importlib as imp
import comsdk.misc as aux
ImplicitParallelizationInfo = collections.namedtuple('ImplicitParallelizationInfo', ['array_keys_mapping', 'branches_number', 'branch_i'])
class Func:
__slots__ = (
'module',
'func',
'comment',
'name'
)
def __init__(self, module="", name="", dummy=False, func=None, comment=''):
self.module = module
self.name = name
self.comment = comment.replace("\0", " ") if comment is not None else ""
if module == "" or name == "" or module is None or name is None:
dummy = True
if func is not None:
self.func = func
elif dummy:
self.func = lambda data: data
else:
print("LOADING function {} from {} module".format(name, module) )
try:
self.func = getattr(imp.import_module(module), name)
except Exception:
raise Exception("Could not load function {} from {} module".format(name, module))
def __str__(self):
if self.module == "" or self.name == "":
return self.func.__name__
return "{}_{}".format(self.module, self.name)
class Selector(Func):
def __init__(self, ntransf, module="", name="", dummy=False):
if module == "" and name == "":
dummy = True
self.dummy = dummy
super().__init__(module, name, func=(lambda x: [True for i in range(ntransf)]) if dummy else None)
def __str__(self):
if self.module == "" or self.name == "":
return ''
return "{}_{}".format(self.module, self.name)
class Transfer:
def __init__(self, edge, output_state, order=0):
self.edge = edge
self.output_state = output_state
self.order = order
def transfer(self, data, dynamic_keys_mapping={}):
self.edge.morph(data, dynamic_keys_mapping)
return self.output_state
class IdleRunType(Enum):
INIT = auto()
CLEANUP = auto()
class PluralState:
def __init__(self, states):
self.states = states
pass
def connect_to(self, term_states, edge):
for init_state, term_state in zip(self.states, term_states):
init_state.transfers.append(Transfer(edge, term_state))
class Graph:
'''
Class describing a graph-based computational method. Graph execution must start from this object.
'''
def __init__(self, init_state,
term_state=None,
):
self.init_state = init_state
self.term_state = term_state
if self.term_state is not None:
self.term_state.is_term_state = True
self._initialized = False
def run(self, data):
'''
Goes through the graph and returns boolean denoting whether the graph has finished successfully.
It runs twice -- the first run is idle (needed for initialization) and the second run is real.
The input data will be augmented by metadata:
1) '__CURRENT_WORKING_DIR__' -- absolute path to the current working directory as defined by the OS
2) '__WORKING_DIR__' -- absolute path to the directory from which external binaries or resources will be launched.
It will be set only if it is not yet set in data
3) '__EXCEPTION__' if any error occurs
'''
self.init_graph(data)
cur_state = self.init_state
implicit_parallelization_info = None
while cur_state is not None:
# print('1) In main loop', implicit_parallelization_info)
# morph = _run_state(cur_state, data, implicit_parallelization_info)
transfer_f, implicit_parallelization_info = _run_state(cur_state, data, implicit_parallelization_info)
# print('2) In main loop', implicit_parallelization_info)
if '__EXCEPTION__' in data:
return False
# cur_state, implicit_parallelization_info = morph(data)
cur_state = transfer_f(data)
# print(morph)
if '__EXCEPTION__' in data:
return False
return True
def init_graph(self, data={}):
if not self._initialized:
self.init_state.idle_run(IdleRunType.INIT, [self.init_state.name])
self._initialized = True
else:
self.init_state.idle_run(IdleRunType.CLEANUP, [self.init_state.name])
data['__CURRENT_WORKING_DIR__'] = os.getcwd()
if not '__WORKING_DIR__' in data:
data['__WORKING_DIR__'] = data['__CURRENT_WORKING_DIR__']
class State:
__slots__ = [
'name',
'input_edges_number', #output_edges_number == len(transfers)
'looped_edges_number',
'activated_input_edges_number',
'transfers',
'parallelization_policy',
'selector',
'is_term_state',
'array_keys_mapping',
'_branching_states_history',
'_proxy_state',
'possible_branches',
'comment'
]
def __init__(self, name,
parallelization_policy=None,
selector=None,
array_keys_mapping=None, # if array_keys_mapping is not None, we have implicit parallelization in this state
):
self.name = name
self.parallelization_policy = SerialParallelizationPolicy() if parallelization_policy is None else parallelization_policy
self.selector = Selector(1) if selector is None else selector
self.array_keys_mapping = array_keys_mapping
self.input_edges_number = 0
self.looped_edges_number = 0
self.activated_input_edges_number = 0
self.transfers = []
self.possible_branches = []
self.is_term_state = False
self._branching_states_history = None
self._proxy_state = None
self.comment = None
def idle_run(self, idle_run_type, branching_states_history):
def __sort_by_order(tr):
return tr.edge.order
self.transfers.sort(key = __sort_by_order)
# print(self.name)
# for t in self.transfers:
# print("\t", t.edge.order, t.edge.pred_name, t.edge.morph_name)
if self._proxy_state is not None:
return self._proxy_state.idle_run(idle_run_type, branching_states_history)
if idle_run_type == IdleRunType.INIT:
self.input_edges_number += 1
if self.input_edges_number != 1:
if self._is_looped_branch(branching_states_history):
self.looped_edges_number += 1
return # no need to go further if we already were there
if self._branching_states_history is None:
self._branching_states_history = branching_states_history
elif idle_run_type == IdleRunType.CLEANUP:
self.activated_input_edges_number = 0
if self._branching_states_history is not None and self._is_looped_branch(branching_states_history):
self._branching_states_history = None
return
if self._branching_states_history is None:
self._branching_states_history = branching_states_history
else:
self.activated_input_edges_number += 1 # BUG: here we need to choose somehow whether we proceed or not
# if len(self.transfers) == 0:
# print('Terminate state found')
if len(self.transfers) == 1:
self.transfers[0].output_state.idle_run(idle_run_type, branching_states_history)
else:
for i, transfer in enumerate(self.transfers):
next_state = transfer.output_state
next_state.idle_run(idle_run_type, branching_states_history + [next_state.name])
def connect_to(self, term_state, edge=None, comment=None):
if comment is not None or comment != "":
self.comment = comment
self.transfers.append(Transfer(edge, term_state))
self.selector = Selector(len(self.transfers))
# edge.set_output_state(term_state)
# self.output_edges.append(edge)
def replace_with_graph(self, graph):
self._proxy_state = graph.init_state
graph.term_state.transfers = self.transfers
graph.term_state.selector = self.selector
def run(self, data, implicit_parallelization_info=None):
print('STATE {}\n\tjust entered, implicit_parallelization_info: {}'.format(self.name, implicit_parallelization_info))
# print('\t{}'.format(data))
if self._proxy_state is not None:
return self._proxy_state.run(data, implicit_parallelization_info)
self._activate_input_edge(implicit_parallelization_info)
#self.activated_input_edges_number += 1
print('\trequired input: {}, active: {}, looped: {}'.format(self.input_edges_number, self.activated_input_edges_number, self.looped_edges_number))
# print('qwer')
if not self._ready_to_transfer(implicit_parallelization_info):
return None, None # it means that this state waits for some incoming edges (it is a point of collision of several edges)
self._reset_activity(implicit_parallelization_info)
if self.is_term_state:
implicit_parallelization_info = None
if len(self.transfers) == 0:
return transfer_to_termination, None
dynamic_keys_mapping = build_dynamic_keys_mapping(implicit_parallelization_info)
selected_edges = self.selector.func(data)
if not selected_edges:
raise GraphUnexpectedTermination(
"STATE {}: error in selector: {} ".format(self.name, selected_edges))
# selected_transfers = [self.transfers[i] for i, _ in enumerate(selected_edges) if selected_edges[i]]
# for transf in selected_transfers:
# if not transf.edge.predicate(data, dynamic_keys_mapping):
# raise Exception("\tERROR: predicate {} returns {} running from state {}\n data{}".format(transf.edge.pred_f.name,transf.edge.predicate(data, dynamic_keys_mapping), self.name, data))
selected_transfers = [self.transfers[i] for i, _ in enumerate(selected_edges)
if selected_edges[i] and self.transfers[i].edge.predicate(data, dynamic_keys_mapping)]
if not selected_transfers:
raise GraphUnexpectedTermination('\tERROR: no transfer function has been '
'selected out of {} ones. Predicate values are {}. '
'Selector values are {}.'.format(len(self.transfers),
[t.edge.predicate(data, dynamic_keys_mapping) for t in self.transfers],
selected_edges))
return self.parallelization_policy.make_transfer_func(selected_transfers,
array_keys_mapping=self.array_keys_mapping,
implicit_parallelization_info=implicit_parallelization_info,
state=self), \
implicit_parallelization_info
def _activate_input_edge(self, implicit_parallelization_info=None):
if implicit_parallelization_info is None or self.is_term_state:
self.activated_input_edges_number += 1
else:
if isinstance(self.activated_input_edges_number, int):
self.activated_input_edges_number = [0 for i in range(implicit_parallelization_info.branches_number)]
self.activated_input_edges_number[implicit_parallelization_info.branch_i] += 1
def _ready_to_transfer(self, implicit_parallelization_info=None):
required_activated_input_edges_number = self.input_edges_number - self.looped_edges_number
if implicit_parallelization_info is not None:
if self.is_term_state:
required_activated_input_edges_number = implicit_parallelization_info.branches_number
return self.activated_input_edges_number == required_activated_input_edges_number
return self.activated_input_edges_number[implicit_parallelization_info.branch_i] == required_activated_input_edges_number
else:
return self.activated_input_edges_number == required_activated_input_edges_number
# if implicit_parallelization_info is None or self.is_term_state:
# if self.is_term_state:
# required_activated_input_edges_number = implicit_parallelization_info.branches_number
# return self.activated_input_edges_number == required_activated_input_edges_number
# else:
# return self.activated_input_edges_number[implicit_parallelization_info.branch_i] == required_activated_input_edges_number
def _reset_activity(self, implicit_parallelization_info=None):
self._branching_states_history = None
if self._ready_to_transfer(implicit_parallelization_info) and self._has_loop():
if implicit_parallelization_info is None or self.is_term_state:
self.activated_input_edges_number -= 1
else:
self.activated_input_edges_number[implicit_parallelization_info.branch_i] -= 1
else:
# self.activated_input_edges_number = 0
if implicit_parallelization_info is None or self.is_term_state:
self.activated_input_edges_number = 0
else:
self.activated_input_edges_number[implicit_parallelization_info.branch_i] = 0
def _is_looped_branch(self, branching_states_history):
return set(self._branching_states_history).issubset(branching_states_history)
def _has_loop(self):
return self.looped_edges_number != 0
def transfer_to_termination(data):
return None
class SerialParallelizationPolicy:
# def __init__(self, data):
# self.data = data
def __init__(self):
pass
def make_transfer_func(self, transfers, array_keys_mapping=None, implicit_parallelization_info=None, state=None):
def _morph(data):
# print("MORPHING FROM {}".format(state.name))
if array_keys_mapping is None:
dynamic_keys_mapping = build_dynamic_keys_mapping(implicit_parallelization_info)
next_transfers = [partial(t.transfer, dynamic_keys_mapping=dynamic_keys_mapping) for t in transfers]
next_impl_para_infos = [implicit_parallelization_info for _ in transfers]
# print('\t\t {}'.format(implicit_parallelization_infos))
else:
if len(transfers) != 1:
raise BadGraphStructure('Impossible to create implicit paralleilzation in the state '
'with {} output edges'.format(len(transfers)))
dynamic_keys_mapping = build_dynamic_keys_mapping(implicit_parallelization_info)
proxy_data = aux.ProxyDict(data, keys_mappings=array_keys_mapping)
anykey = next(iter(array_keys_mapping.keys()))
implicit_branches_number = len(proxy_data[anykey])
next_transfers = []
next_impl_para_infos = []
for branch_i in range(implicit_branches_number):
implicit_parallelization_info_ = ImplicitParallelizationInfo(array_keys_mapping, implicit_branches_number, branch_i)
dynamic_keys_mapping = build_dynamic_keys_mapping(implicit_parallelization_info_)
# print(dynamic_keys_mapping)
#next_transfers.append(partial(transfers[0].edge.morph, dynamic_keys_mapping=dynamic_keys_mapping))
next_transfers.append(partial(transfers[0].transfer, dynamic_keys_mapping=dynamic_keys_mapping))
next_impl_para_infos.append(implicit_parallelization_info_)
cur_transfers = []
cur_impl_para_infos = []
#while len(next_transfers) != 1 or _is_implicitly_parallelized(next_impl_para_infos):
while len(next_transfers) != 1 or _requires_joint_of_implicit_parallelization(array_keys_mapping, next_impl_para_infos):
if next_impl_para_infos == []:
raise Exception("Morphs count on state {} is {}".format(state.name, str(len(next_transfers))))
# print(array_keys_mapping, next_impl_para_infos)
cur_transfers[:] = next_transfers[:]
cur_impl_para_infos[:] = next_impl_para_infos[:]
del next_transfers[:]
del next_impl_para_infos[:]
for t, impl_para_info in zip(cur_transfers, cur_impl_para_infos):
next_state = t(data)
# print('\t next_state: {}, with impl para info: {}'.format(next_state.name, impl_para_info))
if next_state is None:
return None
next_t, next_impl_para_info = _run_state(next_state, data, impl_para_info)
# print('\t next_morph: {}'.format(next_morph))
if '__EXCEPTION__' in data:
return None
if next_t is not None:
next_transfers.append(next_t)
next_impl_para_infos.append(next_impl_para_info)
# print(array_keys_mapping, next_impl_para_infos)
#print(len(next_transfers))
# print('\t last morph: {}'.format(next_transfers[0]))
next_state = next_transfers[0](data)
# print(next_state.name, next_impl_para_infos[0])
return next_state
return _morph
class BadGraphStructure(Exception):
pass
class GraphUnexpectedTermination(Exception):
pass
def _requires_joint_of_implicit_parallelization(array_keys_mapping, impl_para_infos):
if array_keys_mapping is None:
return False
for obj in impl_para_infos:
if obj is not None:
return True
return False
def _get_trues(boolean_list):
return [i for i, val in enumerate(boolean_list) if val == True]
def _run_state(state, data, implicit_parallelization_info=None):
try:
next_morphism, next_impl_para_info = state.run(data, implicit_parallelization_info)
except GraphUnexpectedTermination as e:
data['__EXCEPTION__'] = str(e)
return None, None
return next_morphism, next_impl_para_info
def build_dynamic_keys_mapping(implicit_parallelization_info=None):
if implicit_parallelization_info is None:
return {}
dynamic_keys_mapping = {}
for key, keys_path in implicit_parallelization_info.array_keys_mapping.items():
dynamic_keys_mapping[key] = aux.ArrayItemGetter(keys_path, implicit_parallelization_info.branch_i)
return dynamic_keys_mapping
from functools import reduce, partial
import os
import re
import collections
from copy import deepcopy
import importlib
from abc import ABC, abstractmethod
from typing import Optional, List, Tuple, Type, Any
import json
import numpy as np
from jsons import JsonSerializable
ArrayItemGetter = collections.namedtuple('ArrayItemGetter', ['key_path_to_array', 'i'])
class StandardisedNaming(ABC):
"""
Class StandardisedNaming is an abstract class used to represent standardised names of files and directories in a
general sense. To make use of its features, one needs to derive one's own class and implement methods
regexp_with_substitutions and make_name. The former should return a group-named regular expression (with or
without substitution) which can be used to recognise whether a concrete name corresponds to the standardised name
or not. The latter should create a concrete standardised name based on its attributes.
"""
@classmethod
def regexp(cls) -> str:
"""
Returns a full group-named regular expression which can be used to determine whether a certain name follows the
standardised naming.
:return: regular expression as a string
"""
return cls.regexp_with_substitutions()
@classmethod
def parse(cls, name: str) -> Optional[dict]:
"""
Checks whether a given name follows the standardised naming and, if yes, parses the name and returns a
dictionary of its attributes.
:param name: name to be parsed
:return: either dictionary of the name attributes or None if a given name does not follow the standardised
naming
"""
return parse_by_named_regexp(cls.regexp(), name)
@classmethod
@abstractmethod
def regexp_with_substitutions(cls, **kwargs) -> str:
"""
Returns a group-named regular expression (if kwargs are given, they will substituted to the regular expression
according to the names) which can be used to recognise whether a concrete name follows the standardised naming
or not.
:param kwargs: name attributes
:return: regular expression as a string
"""
raise NotImplementedError('Must be implemented. It must return the regular expression with substitutions based '
'on kwargs arguments. Being invoked with no arguments, it must return the full '
'regular expression')
@classmethod
@abstractmethod
def make_name(cls, **kwargs) -> str:
"""
Returns name based on the standardised naming and attributes passed via kwargs.
TODO: must be implemented (or joint with regexp_with_substitutions) such that regexp_with_substitutions is
used inside it
:param kwargs: name attributes
:return: name as a string
"""
raise NotImplementedError('Must be implemented. It must return the name using kwards arguments as '
'substitutions')
class ProxyDict(object):
'''
Class allowing to access a dict via a proxy mapping using the same interface as dict does.
It supports two types of proxy mappings:
1) relative_keys
2) keys_mappings
and also extends a simple key to key_path. For example, a sequence of keys leading to d['a']['b']['c']
corresponds to a key_path ('a', 'b', 'c').
Proxy mapping relative_keys is a sequence of key_path leading to subdicts. The content of these subdicts
is treated as located in the root of the proxy dict. For example, suppose we have d = {'a': 1, 'b':{'c': 2, 'd': 3}}.
A proxy dict with relative_key ('b',) shall be pd = {'a': 1, 'c': 2, 'd': 3, 'b':{'c': 2, 'd': 3}}.
Proxy mapping keys_mappings is a dict linking a (new) key in the root of proxy dict to key_path in original dict.
For example, for dict d, a proxy dict with keys_mappings {'d': ('b', 'd')} shall be pd = {'a': 1, 'd': 3, 'b':{'c': 2, 'd': 3}}.
Finally, we have default_relative_key which is a key_path leading to a subdict to which new elements must be added.
For example, for dict d, proxy dict pd and default_relative_key ('b',), operation pd['z'] = 0 leads to the following change in d:
d = {'a': 1, 'b':{'c': 2, 'd': 3, 'z': 0}}
The order of the proxy mappings (the higher mapping overwrites the lower):
1) keys_mappings
2) relative_keys
3) original dict (root)
'''
def __init__(self, data,
relative_keys=(),
keys_mappings={},
default_relative_key=(),
):
self._data = data
self._default_relative_key = list(default_relative_key)
self._keys_mappings = {key: key for key in self._data.keys()}
for rel_key in relative_keys:
for inner_key in recursive_get(data, rel_key).keys():
self._keys_mappings[inner_key] = list(rel_key) + [inner_key]
self._keys_mappings.update(keys_mappings)
def __repr__(self):
res = '{'
for key in self._keys_mappings.keys():
res += '{}: {}, '.format(key, self.__getitem__(key))
return res + '}'
def __contains__(self, key):
return key in self._keys_mappings.keys()
def __getitem__(self, key):
# x[key] => x.__getitem__(key)
return recursive_get(self._data, self._keys_mappings[key])
def __setitem__(self, key, value):
# x[key] = value => x.__setitem__(key, value)
if key in self._keys_mappings:
recursive_set(self._data, self._keys_mappings[key], value)
else:
recursive_set(self._data, self._default_relative_key + [key], value)
self._keys_mappings[key] = self._default_relative_key + [key]
def __delitem__(self, key):
# del x[key] => x.__delitem__(key)
val = recursive_get(self._data, self._keys_mappings[key])
del val
def update(self, mapping):
for key in mapping.keys():
self.__setitem__(key, mapping[key])
def recursive_get(d, keys):
if isinstance(keys, ArrayItemGetter):
array_ = recursive_get(d, keys.key_path_to_array)
return array_[keys.i]
elif is_sequence(keys):
return reduce(lambda d_, key_: d_.get(key_, {}), keys, d)
else:
return d[keys]
def recursive_set(d, keys, val):
if isinstance(keys, ArrayItemGetter):
array_ = recursive_get(d, keys.key_path_to_array)
array_[keys.i] = val
elif is_sequence(keys):
last_dict = reduce(lambda d_, key_: d_.setdefault(key_, {}), keys[:-1], d)
last_dict[keys[-1]] = val
else:
d[keys] = val
def is_sequence(obj):
'''
Checks whether obj is a sequence (string does not count as a sequence)
'''
return isinstance(obj, collections.Sequence) and (not hasattr(obj, 'strip'))
def cp(from_, to_):
'''
Copies from_ to to_ where from_ may be file or dir and to_ is a dir.
Returns new path.
'''
if os.path.isfile(from_):
shutil.copy(from_, to_)
else:
shutil.copytree(from_, to_)
return os.path.join(to_, os.path.basename(from_))
def rm(target):
'''
Removes target which may be file or dir.
'''
if os.path.isfile(target):
os.remove(target)
else:
shutil.rmtree(target)
def remove_if_exists(path):
try:
os.remove(path)
return True
except FileNotFoundError as e:
return False
def create_file_mkdir(filepath):
'''
Opens a filepath in a write mode (i.e., creates/overwrites it). If the path does not exists,
subsequent directories will be created.
'''
dirpath = os.path.dirname(filepath)
if not os.path.exists(dirpath):
os.makedirs(dirpath)
return open(filepath, 'w')
def merge_dicts(*dict_args):
'''
Given any number of dicts, shallow copy and merge into a new dict,
precedence goes to key value pairs in latter dicts.
Source: Aaron Hall, https://stackoverflow.com/questions/38987/how-to-merge-two-dictionaries-in-a-single-expression
'''
result = {}
for dictionary in dict_args:
result.update(dictionary)
return result
def append_code(obj, obj_funcs, code_appendix):
'''
Adds the code defined by the function code_appendix in the end of the method obj_funcs of the object obj.
'''
def extended_func(func, *args, **kwds):
func(*args, **kwds)
code_appendix(*args, **kwds)
for func_name in obj_funcs:
func = getattr(obj, func_name)
if not func:
raise Exception('Function {} not found'.format(func_name))
setattr(obj, func_name, partial(extended_func, func))
def do_atomic(proc_func, cleanup_func):
'''
Executes the function proc_func such that if an expection is raised, the function cleanup_func
is executes and only after that the expection is hand over further. It is useful when proc_func
creates something which should be removed in the case of emergency.
'''
try:
proc_func()
except Exception as err:
cleanup_func()
raise err
def make_atomic(proc_func, cleanup_func):
'''
Returns a function corresponding to do_atomic() to which proc_func and cleanup_func are passed.
'''
return partial(do_atomic, proc_func, cleanup_func)
def find_dir_by_named_regexp(regexp, where):
'''
Search for dir in where which satisfies regexp. If successful, parses the dir according to named regexp.
Returns a tuple (found_dir, params_from_named_regexp) or None if not found.
TODO: depricated (see find_dir_by_standardised_name)
'''
dirnames = next(os.walk(where))[1]
for dir_ in dirnames:
parsing_params = parse_by_named_regexp(regexp, dir_)
if parsing_params is not None:
return dir_, parsing_params
return None
def find_all_dirs_by_named_regexp(regexp, where):
'''
Search for dirs in where which satisfies regexp. If successful, parses them according to named regexp.
Returns a list of tuples (found_dir, params_from_named_regexp).
TODO: depricated (see find_all_dirs_by_standardised_name)
'''
dirnames = next(os.walk(where))[1]
datas = []
for dir_ in dirnames:
parsing_params = parse_by_named_regexp(regexp, dir_)
if parsing_params is not None:
datas.append((dir_, parsing_params))
return datas
def find_all_files_by_named_regexp(regexp, where):
'''
Search for files in where which satisfies regexp. If successful, parses them according to named regexp.
Returns a list of tuples (found_dir, params_from_named_regexp).
TODO: depricated (see find_all_files_by_standardised_name)
'''
filenames = next(os.walk(where))[2]
datas = []
for file_ in filenames:
parsing_params = parse_by_named_regexp(regexp, file_)
if parsing_params is not None:
datas.append((file_, parsing_params))
return datas
def find_dir_by_standardised_naming(naming: Type[StandardisedNaming], where: str) -> Optional[Tuple[str, dict]]:
'''
Search for dir in where which satisfies regexp. If successful, parses the dir according to named regexp.
Returns a tuple (found_dir, params_from_named_regexp) or None if not found.
'''
dirnames = next(os.walk(where))[1]
for dir_ in dirnames:
parsing_params = naming.parse(dir_)
if parsing_params is not None:
return dir_, parsing_params
return None
def find_all_dirs_by_standardised_naming(naming: Type[StandardisedNaming], where: str) -> List[Tuple[str, dict]]:
'''
Search for dirs in where which satisfies regexp. If successful, parses them according to named regexp.
Returns a list of tuples (found_dir, params_from_named_regexp).
'''
dirnames = next(os.walk(where))[1]
datas = []
for dir_ in dirnames:
parsing_params = naming.parse(dir_)
if parsing_params is not None:
datas.append((dir_, parsing_params))
return datas
def find_all_files_by_standardised_naming(naming: Type[StandardisedNaming], where: str) -> List[Tuple[str, dict]]:
'''
Search for files in where which satisfies regexp. If successful, parses them according to named regexp.
Returns a list of tuples (found_dir, params_from_named_regexp).
'''
filenames = next(os.walk(where))[2]
datas = []
for file_ in filenames:
parsing_params = naming.parse(file_)
if parsing_params is not None:
datas.append((file_, parsing_params))
return datas
def parse_by_named_regexp(regexp, val):
'''
Parses val according to named regexp. Return a dictionary of params.
'''
matching = re.search(regexp, val)
if matching is None:
return None
return matching.groupdict()
def parse_datafile(path, data_names, transform_funcs, cols_to_parse=[]):
'''
Parses a data file given by path and structured as a table where rows are separated by \n
and columns are separated by any of whitespaces. The first line in the file will be ignored.
Processed columns are given by cols_to_parse (all columns will be processed if it is empty).
Corresponding names and transformation functions for columns in cols_to_parse are given by
data_names and transform_funcs. Transformation function must be a mapping string -> type.
Returns a dictionary where a key corresponds to a column name (i.e., taken from data_names)
and a value corresponds to a list of the columns values taken from all rows.
'''
if cols_to_parse == []:
cols_to_parse = range(len(data_names))
if len(data_names) != len(transform_funcs) or len(data_names) != len(cols_to_parse):
raise Exception('Number of data names, transform functions and columns to be parsed is inconsistent')
data = collections.OrderedDict()
for data_name in data_names:
data[data_name] = []
f = open(path, 'r') # if not found, expection will be raised anyway
lines = f.readlines()
for line in lines[1:]: # skip the first line
tmp = line.split()
if len(tmp) < len(data_names):
raise Exception('Number of given data names is larger than number of columns we have in the data file.')
for i, data_name in enumerate(data_names):
val = tmp[cols_to_parse[i]]
data[data_name].append(transform_funcs[i](val))
return {name: np.array(array_) for name, array_ in data.items()}
def parse_timed_numdatafile(path):
'''
Parses a data file given by path and structured as a table where rows are separated by \n
and columns are separated by any of whitespaces. The table here has an interpretation of a matrix whose
rows axis corresponds to time axis and columns axis corresponds to data axis. Moreover, the first column
contains the time values so the data is contained in columns starting from the second one.
Returns time_list (a list of times from the first column) and data_matrix (a list of numpy arrays of data where
list's index corresponds to the time index).
'''
time = []
data = []
f = open(path, 'r') # if not found, expection will be raised anyway
lines = f.readlines()
for line in lines[1:]: # skip the first line
tmp = line.split()
time.append(float(tmp[0]))
timed_data = np.zeros((len(tmp) - 1, ))
for i, val in enumerate(tmp[1:]):
timed_data[i] = float(val)
data.append(timed_data)
return time, np.array(data)
def write_datafile(path, data):
keys = list(data.keys())
# print(keys)
values = list(data.values())
with open(path, 'w') as f:
f.write(r'% ' + '\t'.join(keys) + '\n')
for t_i in range(len(values[0])):
line = '\t'.join([str(array[t_i]) for array in values]) + '\n'
f.write(line)
def write_timed_numdatafile(path, time, data):
with open(path, 'w') as f:
for i in range(len(time)):
line = '{}\t'.format(time[i]) + '\t'.join([str(data[i][j]) for j in range(data.shape[1])]) + '\n'
f.write(line)
def load_function_from_module(full_function_name):
module_name, function_name = full_function_name.rsplit('.', 1)
module_ = importlib.import_module(module_name)
return getattr(module_, function_name)
def print_pretty_dict(d):
for k, v in d.items():
print('{}: {}'.format(k ,v))
def raise_exception_if_arguments_not_in_keywords_or_none(argument_names, kwargs) -> None:
for arg in argument_names:
if arg not in kwargs:
raise ValueError('Keywords "{} = ..." must be set'.format(arg))
else:
if kwargs[arg] is None:
raise ValueError('Keywords "{}" must not be None'.format(arg))
def take_value_if_not_none(value, default=None, transform=str) -> Any:
if value is None:
if default is None:
raise ValueError('Value must not be None or default must be set')
else:
return default
else:
return transform(value)
def take_value_by_index(seq, i, default=None) -> Any:
return seq[i] if seq is not None else default
def dump_to_json(obj: JsonSerializable, path_to_jsons: str = 'jsons') -> None:
filename = '{}.{}.json'.format(type(obj).__module__, type(obj).__name__)
filename = os.path.join(path_to_jsons, filename)
obj_as_dict = obj.json
with open(filename, 'w') as f:
json.dump(obj_as_dict, f, indent=4)
def load_from_json(cls: Type[JsonSerializable], path_to_jsons: str = 'jsons') -> JsonSerializable:
filename = '{}.{}.json'.format(cls.__module__, cls.__name__)
filename = os.path.join(path_to_jsons, filename)
with open(filename, 'r') as f:
obj_as_dict = json.load(f)
return cls.from_json(obj_as_dict)
def print_msg_if_allowed(msg, allow=False):
if allow:
print(msg)
import re
import copy
import importlib as imp
from comsdk.graph import Graph, Func, State, Selector
from comsdk.edge import Edge
class Params():
__slots__=(
'module',
'entry_func',
'predicate',
'selector',
'function',
'morphism',
'parallelism',
'comment',
'order',
'subgraph'
)
def __init__(self):
for slot in self.__slots__:
setattr(self, slot, None)
def __str__(self):
stri = ""
for s in self.__slots__:
stri += ((s+": {}, ".format(getattr(self, s))) if getattr(self, s) is not None else "")
return stri
# entities = {}
class GraphFactory():
__slots__ = (
'name',
'states',
'graph',
'issub',
'tocpp',
'entities'
)
def __init__(self, tocpp=False):
self.states = {}
self.entities = {}
self.tocpp = tocpp
self.name = None
self.issub = False
def add_state(self, statename):
if statename not in self.states:
self.states[statename] = State(statename)
if statename in self.entities:
self.states[statename].comment = self.entities[statename].comment
def _create_morphism(self, morphname=None):
comment = ""
if morphname is None:
return Func(), Func(), comment
pred_f, func_f = Func(), Func()
morph = self.entities[morphname]
for m in morph.__slots__:
if getattr(morph,m) is not None:
if m!="predicate" and m!="function" and m!="comment":
raise Exception("ERROR: Morphisms could not have any params exept comment, predicate and function!\n{}".format(morphname))
if m=="comment":
comment=getattr(morph, m).replace("\0", " ")
if m=="predicate":
if getattr(morph,m) not in self.entities:
raise Exception("\tERROR: Predicate {} is not defined!".format(getattr(morph, m)))
pred = self.entities[getattr(morph, m)]
if self.tocpp:
pred_f = Func(pred.module, pred.entry_func, dummy=True, comment=pred.comment)
else:
pred_f = Func(pred.module, pred.entry_func, comment=pred.comment)
if m=="function":
if getattr(morph,m) not in self.entities:
raise Exception("\tERROR: Function: {} is not defined!".format(getattr(morph, m)))
fu = self.entities[getattr(morph, m)]
if self.tocpp:
func_f = Func(fu.module, fu.entry_func, dummy=True, comment=fu.comment)
else:
func_f = Func(fu.module, fu.entry_func,comment=fu.comment)
return pred_f, func_f, comment
def add_connection(self, st1, st2, morphism=None, ordr=0):
pred, entr, comm = self._create_morphism(morphism)
self.states[st1].connect_to(self.states[st2], edge=Edge(pred, entr, order=ordr, comment=comm))
print("{} -> {}".format(st1, st2))
def build(self, nsub):
print("BUILDING {}\nStates:".format(self.name))
for s in self.states:
print("\t"+ s)
if self.issub:
self.graph = Graph(self.states[self.name+str(nsub)+"_"+"__BEGIN__"], self.states[self.name+str(nsub)+"_"+"__END__"])
else:
self.graph = Graph(self.states["__BEGIN__"], self.states["__END__"])
self.graph.init_graph()
if self.issub:
oldkeys = []
for e in self.entities:
oldkeys.append(e)
for old in oldkeys:
if self.entities[old].selector is not None or self.entities[old].subgraph is not None:
self.entities[self.name + str(Parser.subgr_count)+"_"+old] = self.entities[old]
del self.entities[old]
for s in self.states:
if s in self.entities and self.entities[s].selector is not None:
selname = self.entities[s].selector
if self.tocpp:
self.states[s].selector = Selector(len(self.states[s].transfers), self.entities[selname].module, self.entities[selname].entry_func, dummy=True)
else:
self.states[s].selector = Selector(len(self.states[s].transfers), self.entities[selname].module, self.entities[selname].entry_func)
else:
self.states[s].selector = Selector(len(self.states[s].transfers))
if s in self.entities and self.entities[s].subgraph is not None:
print("Replacing state {} with subgraph {}".format(s,self.entities[s].subgraph))
parsr = Parser(subgraph=True, tocpp= self.tocpp)
subgr = parsr.parse_file(self.entities[s].subgraph)
self.states[s].replace_with_graph(subgr)
self.graph = Graph(self.graph.init_state, self.graph.term_state)
return self.graph
class Parser():
__slots__ = (
'fact',
'issub'
)
subgr_count = 0
def __init__(self, tocpp=False, subgraph=False):
self.fact = GraphFactory(tocpp=tocpp)
self.fact.issub = subgraph
self.issub = subgraph
if subgraph:
Parser.subgr_count+=1
def _check_brackets(self, rawfile):
br = { "[":{"line":0, "count":0}, "(":{"line":0, "count":0}, "{":{"line":0, "count":0}, "\"":{"line":0, "count":0}}
line = 1
qu = 0
for char in rawfile:
if char == "[":
br["["]["line"] = line
br["["]["count"] +=1
elif char == "{":
br["{"]["line"] = line
br["{"]["count"] +=1
elif char == "(":
br["("]["line"] = line
br["("]["count"] +=1
elif char == "]":
br["["]["count"] -=1
elif char == "}":
br["{"]["count"] -=1
elif char == ")":
br["("]["count"] -=1
elif char =="\"":
br["\""]["line"] = line
br["\""]["count"] += 1 if br["\""]["count"]==0 else -1
elif char == "\n":
line+=1
expstr= "Brackets or quotes do not match! Missing closing brackets on lines: "
fl = False
for c in br:
if br[c]["count"] != 0:
fl= True
expstr+=str(br[c]["line"])+" "
if fl:
raise Exception(expstr)
def _split_multiple(self,param):
vals = {}
first=True
for s in param.__slots__:
attr = getattr(param,s)
if attr is not None and '\0' in attr:
vals[s] = attr.split('\0')
l=0
for sl in vals:
if l==0:
l=len(vals[sl])
elif l!=len(vals[sl]):
raise Exception("\tERROR: Number of multiple params do not match", l)
res = [copy.copy(param) for i in range(l)]
for sl in vals:
for i, _ in enumerate(res):
setattr(res[i], sl, vals[sl][i])
return res
#Props is line "[proFp=smth, ...]"
def _param_from_props(self,props):
parm = Params()
comment = ""
if props =="":
return parm
props = props.replace("]", '')
if '\"' in props:
m = [m for m in re.finditer(r'\".*\"', props)][0]
comment = props[m.span()[0]+1:m.span()[1]-1]
props=props[:m.span()[0]]+props[m.span()[1]:]
if '(' in props:
mchs = [m for m in re.finditer(r'\((\w+,)*\w+\)', props)]
for m in mchs:
props=props[:m.span()[0]]+(props[m.span()[0]:m.span()[1]]).replace(',','\0')+props[m.span()[1]:]
props = props.replace("(","")
props = props.replace(")","")
rs =props.split(r",") #.split(r", ")
for r in rs:
r=r.split(r"=", 1)
if r[0] in parm.__slots__:
setattr(parm, r[0], r[1])
else:
raise Exception("\tERROR:Unknown parameter: "+ r[0])
if comment != "":
setattr(parm, "comment", comment.replace("\0", " "))
return parm
def _param_from_entln(self, raw):
res = re.split(r"\[", raw, 1)
return res[0], self._param_from_props(res[1])
def _multiple_morphs(self,props, n):
p = self._param_from_props(props)
if p.morphism is None:
return [copy.copy(p) for i in range(n)]
else:
return self._split_multiple(p)
def _topology(self,raw):
spl = re.split(r"\s*(=>|->|\[|\])\s*", raw)
spl = list(filter(lambda x: x!="[" and x!="]" and x!="", spl))
left = spl[0].split(",")
right = spl[2].split(",")
if self.issub:
for i in range(len(left)):
left[i] = self.fact.name + str(Parser.subgr_count) + "_" + left[i]
for i in range(len(right)):
right[i] = self.fact.name + str(Parser.subgr_count) + "_" + right[i]
if (len(left)>1) and (len(right)>1):
raise Exception("ERROR: Ambigious multiple connection in line:\n\t{}".format(raw))
# many to one conection
elif len(left)>1:
if len(spl) < 4:
spl.append("")
morphs = self._multiple_morphs(spl[3], len(left))
if len(morphs)!=len(left):
raise Exception("\tERROR: Count of edges do not match to count of states in many to one connection!\n\t\t{}".format(raw))
self.fact.add_state(right[0])
for i, st in enumerate(left):
self.fact.add_state(st)
self.fact.add_connection(st, right[0], morphs[i].morphism)
# one to many connection, here could be selector
elif len(right)>1:
if len(spl) < 4:
spl.append("")
morphs = self._multiple_morphs(spl[3], len(right))
self.fact.add_state(left[0])
if len(morphs)!=len(right):
raise Exception("\tERROR: Count of edges do not match to count of states in one to many connection!\n\t\t{}".format(raw))
for i, st in enumerate(right):
self.fact.add_state(st)
self.fact.add_connection(left[0], st, morphs[i].morphism, morphs[i].order)
# one to one connection
else:
self.fact.add_state(left[0])
self.fact.add_state(right[0])
if len(spl)==4:
pr =self._param_from_props(spl[3])
self.fact.add_connection(left[0], right[0], pr.morphism, ordr=pr.order if pr.order is not None else 0)
elif len(spl)==3:
self.fact.add_connection(left[0], right[0], None)
def parse_file(self, filename):
# @todo В случае, если на вход будет подан файл в отличной от UTF-8 кодировке программа работать не будет
file = open(filename, encoding='utf-8')# "r")
dot = file.read()
self._check_brackets(dot)
comments = [m for m in re.finditer(r'\".*\"', dot)]
for m in comments:
dot=dot[:m.span()[0]]+(dot[m.span()[0]:m.span()[1]]).replace(' ','\0')+dot[m.span()[1]:]
dot = re.sub(r"[ \t\r]", "", dot) #deleting all spaces
dot = re.sub(r"((digraph)|}|{)", "", dot)
dot = re.sub(r"\/\/.*", "", dot)
dot = re.sub(r"^\n$", "", dot)
dotlines = dot.splitlines()
dotlines = list(filter(None, dotlines))
self.fact.name = dotlines[0]
dotlines = dotlines[1:]
# ent_re - regular expr for edges, states, functions properties
ent_re = re.compile(r"^\w+\[.*\]$")
# top_re - regular expr for topology properties, most time consuming one
top_re = re.compile(r"^(\w+,?)+(->|=>)(\w+,?)+(\[(\w+=(\(?\w+,?\)?)+,?)+\])?")
# (r"^\w[\w\s,]*(->|=>)\s*\w[\w\s,=\[\]()]*$")
for i, ln in enumerate(dotlines):
if ent_re.match(ln):
name, parm = self._param_from_entln(ln)
self.fact.entities[name] = parm
elif top_re.match(ln):
self._topology(ln)
return self.fact.build(Parser.subgr_count)
checked=[]
bushes = {}
selectorends = {}
def generate_cpp(self, filename=None):
self.fact.graph.init_state.input_edges_number =0
states_to_check = [self.fact.graph.init_state]
while len(states_to_check)!=0:
for st in states_to_check:
self.checked.append(st)
states_to_check.remove(st)
bush = _Bush(st)
bush.grow_bush()
self.bushes[st] = bush
for outs in bush.outstates:
if outs not in states_to_check and outs not in self.checked:
states_to_check.append(outs)
send_token(self.fact.graph.init_state, self.bushes, [])
preds, morphs, sels, st, body = print_graph(self.fact.graph.init_state, self.fact.entities, self.bushes)
from mako.template import Template
if filename is not None:
f = open(filename, "w")
else:
f= open(self.fact.name + ".cpp", "w")
print(Template(filename="./cpp/template.cpp").render(preds=preds, morphs = morphs, sels = sels, states=st, body=body), file=f)
def print_graph(cur_state, entities, bushes):
checked = []
toloadpred = []
toloadmorph = []
toloadsel =[]
tocheck = [cur_state]
body = ""
while len(tocheck) !=0:
cur_state=tocheck[0]
cur_b = bushes[cur_state]
cur_b.token+=1
if cur_b.token < cur_b.state.input_edges_number - cur_b.state.looped_edges_number:
tocheck.remove(cur_state)
tocheck.append(cur_state)
continue
if cur_state in checked:
tocheck.remove(cur_state)
continue
if len(cur_b.branches)>1 or len(cur_b.incomes)>1:
body+="{}:\n".format(cur_state.name)
if len(cur_b.incomes)!=0:
if cur_b.state.comment!="" and cur_b.state.comment is not None:
print("STcomm:", cur_b.state.comment)
body+="//"+cur_b.state.comment+"\n"
stri = "false "
for inc in cur_b.incomes:
stri += "|| SEL_{}[{}] ".format(inc["st"].name, inc["i"])
body+="if (!({}))".format(stri)
body+="{\n\tfor (int seli = 0;"+" seli < {};".format(len(cur_state.transfers))+" seli++)\n"+ "\t\tSEL_{}[seli]=false;".format(cur_state.name)+"\n}"
if cur_state.selector.name != "":
# print(cur_state.name, cur_state.selector)
if cur_state.selector not in toloadsel:
toloadsel.append(cur_state.selector)
body+="else {\n"+ "\tSEL_{} = {}(&data);//{}\n".format(cur_state.name, cur_state.selector, cur_state.selector.comment )+"}\n"
else:
body+="else {\n\tfor (int seli = 0;"+" seli < {};".format(len(cur_state.transfers))+" seli++)\n"+"\t\tSEL_{}[seli]=true;".format(cur_state.name)+"\n}\n"
for i, br in enumerate(cur_b.branches):
body+="if (SEL_{}[{}])".format(cur_state.name, i)+"{\n"
if br[len(br)-1].output_state not in tocheck:
tocheck.append(br[len(br)-1].output_state)
if br[len(br)-1].output_state in checked or br[len(br)-1].output_state is cur_state:
stri, toloadpred, toloadmorph = cur_b.cpp_branch(i, toloadpred, toloadmorph)
body+=stri+"\tgoto {};\n".format(br[len(br)-1].output_state.name)+"}\n"
else:
stri, toloadpred, toloadmorph = cur_b.cpp_branch(i, toloadpred, toloadmorph)
body+=stri+"}\n"
tocheck.remove(cur_state)
checked.append(cur_state)
return _unique(toloadpred), _unique(toloadmorph), _unique(toloadsel), checked, body
def _unique(lst):
for i, el in enumerate(lst):
for el2 in lst[i+1:]:
if el2.module == el.module and el2.name == el.name:
lst.remove(el2)
return lst
def send_token(cur_state, bushes, checked):
cur_b = bushes[cur_state]
if cur_state in checked:
return
if len(cur_b.outstates)==0:
return
if len(cur_b.incomes) == cur_b.state.input_edges_number - cur_b.state.looped_edges_number:
checked.append(cur_state)
for i,br in enumerate(cur_b.branches):
bushes[br[len(br)-1].output_state].incomes.append({"st":cur_state, "i":i})
send_token(br[len(br)-1].output_state,bushes, checked)
class _Bush():
__slots__=(
'state',
'selector',
'branches',
'outstates',
'token',
'incomes',
'selectorfin'
)
def __init__(self, state):
self.state = state
self.selector = state.selector
self.branches = []
self.outstates = []
self.token = 0
self.incomes = []
def grow_bush(self):
for t in self.state.transfers:
branch = [t]
self._gen_branch(t.output_state, branch)
def _gen_branch(self, cur_state, branch):
while len(cur_state.transfers)==1 and cur_state.input_edges_number==1:
if cur_state._proxy_state is not None:
cur_state=cur_state._proxy_state
tr = cur_state.transfers[0]
branch.append(tr)
cur_state = tr.output_state
self.branches.append(branch)
if cur_state not in self.outstates:
self.outstates.append(cur_state)
def cpp_branch(self, i, toloadpred, toloadmorph):
res = ""
for tr in self.branches[i]:
edge = tr.edge
if edge.comment!="":
res+="\t//{}\n".format(edge.comment)
if edge.pred_f.name != "":
if edge.pred_f not in toloadpred:
toloadpred.append(edge.pred_f)
res+="\tcheck_pred({}(&data), \"{}\");".format(edge.pred_f, edge.pred_f)
res+="//{}\n".format(edge.pred_f.comment) if edge.pred_f.comment != "" else "\n"
if edge.morph_f.name != "":
if edge.morph_f not in toloadmorph:
toloadmorph.append(edge.morph_f)
res+="\t{}(&data);".format(edge.morph_f)
res+="//{}\n".format(edge.morph_f.comment) if edge.morph_f.comment != "" else "\n"
return res, toloadpred, toloadmorph
import pickle
from datetime import date
from typing import Sequence, Mapping, TypedDict
from comsdk.misc import *
from comsdk.communication import BaseCommunication, LocalCommunication, SshCommunication, Host
from comsdk.distributed_storage import *
from comsdk.edge import Func, Edge, dummy_predicate
from comsdk.graph import Graph, State
CopiesList = TypedDict('CopiesList', {'path': str, 'new_name': str})
class Research:
"""
Class Research is a representation of a group of different calculations collected into what we call a Research.
Each ''calculation'' corresponds to the launch of a graph-based scenario which produces a set of files which we
treat as the results of the calculation. We thus call such a calculation a task. Therefore, a Research is a
collection of tasks. Each task is associated with a single directory (within the code, it may be denoted as
task_dir, if only directory name is of interest, or task_path, if the absolute path is of interest) whose name has
a very simple structure, @number@-@long_name@, so that each task is associated with its own unique number (also
called task_number within the code). Normally, one should use the task number to get any task-related information.
All the tasks are located in the research directory whose the local (remote) absolute path is set by the class
property local_research_path (remote_research_path). The research directory has the following pattern:
@date@_@long_name@. Finally, we associate a short Research ID with each Research. The described structure is
independent of where these directories are located. It is assumed that there is a local root for research and
its remote analog. The latter should be available via any protocol supported by communication module. Class Research
can thus be set up in two regimes: local (remote_comm is None) and local-remote (remote_comm is not None).
Typically, one should construct an instance of Research based on the configuration file called config_research.json.
There are two static functions for this purpose: Research.open() and Research.create(). The former creates an
instance of Research based on the existing Research (one should pass its Research ID to open()) described in the
configuration file and the latter creates a new Research (thus, making a new directory in the local filesystem) and
adds all the necessary information about it in the configuration file. Also, any Research instance is automatically
augmented by the properties listed in 'RESEARCH_PROPS' dictionary in the configuration file.
For the Research constructor to understand where all the research directories are located, one must supply (either
directly in the constructor or in the configuration file) the potential root paths for the search (both for the
local and remote machines if the latter is specified). The first path in the list of the potential root paths is
called the default root path. A new Research will be created in the default path.
Note that different tasks belonging to the same research (i.e., they are associated with the same Research ID) may
be located at different root paths. When creating a new task, it will be located in the default root path.
.. todo::
Some way for saving auxiliary information about research and tasks (task date and description, for example)
should be implemented. Possibly, the same should be done for launcher scripts.
"""
def __init__(self, name: str,
continuing=False,
local_research_roots: Optional[Sequence[str]] = None,
remote_comm: Optional[BaseCommunication] = None,
remote_research_root: Optional[str] = None,
personal_task_shift=0):
"""
:param name: research description (if continuing == False) or research directory (if continuing == True)
:param continuing: if False, the Research with be read from the root path. Otherwise, a new one will be created
:param local_research_roots: a list of local paths where research directories are searched for
:param remote_comm: BaseCommunication instance used for communication with remote machine
:param remote_research_root: path on the remote machine where research directories are searched for
"""
self._local_research_root = local_research_roots[0]
self._local_root = os.path.dirname(self._local_research_root)
self._remote_research_root = remote_research_root
self._personal_task_shift = personal_task_shift
self._tasks_number = personal_task_shift
self._local_comm = LocalCommunication(Host()) # local communication created automatically, no need to pass it
self._remote_comm = remote_comm
self._distr_storage = DistributedStorage(local_research_roots, prior_storage_index=0)
self._local_research_path = None
if not continuing:
# interpret name as name without date
self._research_dir = make_suitable_research_dir(name)
if self._distr_storage.get_dir_path(self._research_dir) is not None:
raise ResearchAlreadyExists("Research with name '{}' already exists, "
"choose another name".format(self._research_dir))
self._local_research_path = self._distr_storage.make_dir(self._research_dir)
print('Started new research at {}'.format(self._local_research_path))
else:
# interpret name as the full research id
self._research_dir = name
self._local_research_path = self._load_research_data()
@classmethod
def open(cls, research_id: str,
remote_comm: Optional[BaseCommunication] = None):
"""
:param research_id: Research ID used to find a relevant research
:param remote_comm: BaseCommunication instance used for communication with remote machine
:return: new Research instance
"""
with open(os.path.expanduser('~/.comsdk/config_research.json'), 'r') as f:
conf = json.load(f)
res = Research(conf['RESEARCH'][research_id],
continuing=True,
local_research_roots=conf['LOCAL_HOST']['research_roots'],
remote_comm=remote_comm,
remote_research_root=conf['REMOTE_HOSTS'][remote_comm.machine_name]['research_root']
if remote_comm is not None else None,
personal_task_shift=conf['PERSONAL_TASK_SHIFT'])
res._add_properties(conf['RESEARCH_PROPS'])
return res
@classmethod
def create(cls, new_research_id: str, new_research_descr: str,
remote_comm: Optional[BaseCommunication] = None):
"""
:param new_research_id: Research ID (short name for this research)
:param new_research_descr: relatively long research name
:param remote_comm: BaseCommunication instance used for communication with remote machine
:return: new Research instance
"""
with open(os.path.expanduser('~/.comsdk/config_research.json'), 'r+') as f:
conf = json.load(f)
conf['RESEARCH'][new_research_id] = make_suitable_research_dir(new_research_descr)
f.seek(0)
json.dump(conf, f, indent=4)
f.truncate()
res = Research(new_research_descr,
continuing=False,
local_research_roots=conf['LOCAL_HOST']['research_roots'],
remote_comm=remote_comm,
remote_research_root=conf['REMOTE_HOSTS'][remote_comm.machine_name]['research_root']
if remote_comm is not None else None,
personal_task_shift=conf['PERSONAL_TASK_SHIFT'])
res._add_properties(conf['RESEARCH_PROPS'])
return res
@property
def local_research_path(self) -> str:
return self._local_research_path
@property
def remote_research_path(self) -> str:
return os.path.join(self._remote_research_root, self._research_dir)
@property
def local_root(self) -> str:
return self._local_root
@property
def research_dir(self) -> str:
return self._research_dir
def __getstate__(self) -> dict:
return {
'research_dir': self._research_dir,
'local_research_path': self._local_research_root,
'remote_research_path': self._remote_research_root,
'personal_task_shift': self._personal_task_shift,
'remote_comm': self._remote_comm.__getstate__(),
}
def __setstate__(self, state):
self._personal_task_shift = state['personal_task_shift']
self._tasks_number = self._personal_task_shift
self._local_comm = LocalCommunication(Host())
self._local_research_root = state['local_research_path']
self._remote_research_root = state['remote_research_path']
self._remote_comm = None
if state['remote_comm'] is not None:
self._remote_comm = SshCommunication.__new__(SshCommunication)
self._remote_comm.__setstate__(state['remote_comm'])
self._distr_storage = DistributedStorage((self._local_research_root,), prior_storage_index=0)
self._research_dir = state['research_dir']
self._research_path = self._load_research_data()
def _add_properties(self, props: Mapping[str, Any]) -> None:
for prop_name, prop_value in props.items():
self.__setattr__(prop_name, prop_value)
def _load_research_data(self) -> str:
# find corresponding date/name
# construct object from all data inside
research_path = self._distr_storage.get_dir_path(self._research_dir)
if research_path is None:
raise ResearchDoesNotExist("Research '{}' does not exist".format(self._research_dir))
print('Loaded research at {}'.format(research_path))
# determine maximum task number to set the number for the next possible task
dirnames, _ = self._distr_storage.listdir(self._research_dir)
for dir_ in dirnames:
if dir_ != 'report':
task_number, _ = split_task_dir(dir_)
if task_number > self._tasks_number:
self._tasks_number = task_number
self._tasks_number += 1
print('Next created task in the current research will hold the following number: {}'.format(self._tasks_number))
return research_path
def create_task(self, name: str) -> int:
"""
Creates a new task in the current research making a new local directory
:param name: task name
:return: task number
"""
task_number = self._get_next_task_number()
local_task_dir = self._make_task_path(task_number, name)
os.mkdir(local_task_dir)
return task_number
def grab_task_results(self, task_number: int,
copies_list: Optional[Sequence[CopiesList]] = None):
"""
Moves task content from the remote machine to the local one. Locally, the task content will appear in the task
directory located in the research directory.
:param task_number: task number
:param copies_list: a list defining which objects we wish to copy from the remote machine. It consists of
dictionaries each having keys 'path' (path of object we wish to copy relative to the task directory) and
'new_name' (path of this object on the local machine relative to the task directory)
"""
task_results_local_path = self.get_task_path(task_number)
task_results_remote_path = self.get_task_path(task_number, self._remote_comm.host)
if copies_list is None: # copy all data
paths = self._remote_comm.listdir(task_results_remote_path)
for file_or_dir in paths:
self._remote_comm.copy('/'.join((task_results_remote_path, file_or_dir)), task_results_local_path,
'from_remote', show_msg=True)
else:
for copy_target in copies_list:
# we consider copy targets as relative to task's dir
remote_copy_target_path = '/'.join((task_results_remote_path, copy_target['path']))
self._remote_comm.copy(remote_copy_target_path, task_results_local_path, 'from_remote', show_msg=True)
if 'new_name' in copy_target:
os.rename(os.path.join(task_results_local_path, os.path.basename(copy_target['path'])),
os.path.join(task_results_local_path, copy_target['new_name']))
def _make_task_path(self, task_number: int, task_name: str, at_remote_host=False) -> str:
task_path = None
task_dir = get_task_full_name(task_number, task_name)
if at_remote_host:
task_path = os.path.join(self._remote_research_root, self._research_dir, task_dir)
else:
task_path = os.path.join(self._local_research_path, task_dir)
return task_path
def get_task_path(self, task_number: int, at_remote_host=False) -> str:
"""
Return absolute task path based on its number
:param task_number: task number
:param at_remote_host: return the path on the remote machine (if True) or on the local one (if False)
:return: absolute task path
"""
task_path = None
task_name = self._get_task_name_by_number(task_number)
rel_task_dir = os.path.join(self._research_dir, get_task_full_name(task_number, task_name))
if at_remote_host:
if self._remote_comm is None:
raise ValueError('Cannot get a task path on the remote: remote communication is not set up')
task_path = '{}/{}'.format(self._remote_research_root, rel_task_dir)
else:
task_path = self._distr_storage.get_dir_path(rel_task_dir)
return task_path
def dump_object(self, task_number: int, obj: object, obj_name: str) -> None:
"""
Dumps any python object (using pickle) to the binary file, named obj_name + '.pyo', in the task directory
associated with the task number
:param task_number: task number
:param obj: any python object
:param obj_name: file name to which obj will be saved (without extension)
"""
print('Dumping ' + obj_name)
f = open(os.path.join(self.get_task_path(task_number), obj_name + '.pyo'), 'w')
pickle.dump(obj, f)
f.close()
def load_object(self, task_number: int, obj_name: str):
"""
Load any python object dumped using pickle from the binary file, named obj_name + '.pyo' and located in the task
directory associated with the task number
:param task_number: task number
:param obj_name: file name from which obj will be loaded (without extension)
:return: python object
"""
print('Loading ' + obj_name)
f = open(os.path.join(self.get_task_path(task_number), obj_name + '.pyo'), 'r')
obj = pickle.load(f)
f.close()
return obj
def _get_next_task_number(self) -> int:
self._tasks_number += 1
return self._tasks_number - 1
def _get_task_name_by_number(self, task_number: int) -> str:
find_data = self._distr_storage.find_dir_by_named_regexp(self._research_dir,
'^{}-(?P<task_name>\S+)'.format(task_number))
if find_data is None:
raise Exception("No task with number '{}' is found".format(task_number))
return find_data[1]['task_name']
class ResearchAlreadyExists(Exception):
pass
class ResearchDoesNotExist(Exception):
pass
def make_suitable_name(name: str) -> str:
return '-'.join(name.split())
def make_suitable_task_name(name: str) -> str:
return '_'.join(name.split())
def make_suitable_research_dir(descr: str) -> str:
return '-'.join([str(date.today()), make_suitable_name(descr)])
def get_task_full_name(task_number: int, task_name: str) -> str:
return str(task_number) + '-' + make_suitable_task_name(task_name)
def split_task_dir(task_dir: str) -> (int, str):
parsing_params = parse_by_named_regexp(r'^(?P<task_number>\d+)-(?P<task_name>\S+)', task_dir)
if parsing_params is None:
raise Exception("No task directory '{}' is found".format(task_dir))
return int(parsing_params['task_number']), parsing_params['task_name']
def retrieve_trailing_float_from_task_dir(task_dir: str) -> float:
matching = re.search(r'^(?P<task_number>\d+)-(?P<task_name>\S+)_(?P<float_left>\d+)\.(?P<float_right>\d+)',
task_dir)
if matching is None:
raise Exception('Incorrect task directory is given')
return float('{}.{}'.format(matching.group('float_left'), matching.group('float_right')))
class CreateTaskEdge(Edge):
def __init__(self, res, task_name_maker, predicate=dummy_predicate, remote=False):
self._res = res
self._task_name_maker = task_name_maker
self._remote = remote
super().__init__(predicate, Func(func=self.execute))
def execute(self, data):
task_name = self._task_name_maker(data)
task_number = self._res.create_task(task_name)
data['__WORKING_DIR__'] = self._res.get_task_path(task_number)
if self._remote:
data['__REMOTE_WORKING_DIR__'] = self._res.get_task_path(task_number, at_remote_host=True)
class CreateTaskGraph(Graph):
def __init__(self, res, task_name_maker, array_keys_mapping=None, remote=False):
s_init, s_term = self.create_branch(res, task_name_maker, array_keys_mapping=array_keys_mapping, remote=remote)
super().__init__(s_init, s_term)
@staticmethod
def create_branch(res, task_name_maker, array_keys_mapping=None, remote=False):
s_init = State('READY_FOR_TASK_CREATION', array_keys_mapping=array_keys_mapping)
s_term = State('TASK_CREATED')
s_init.connect_to(s_term, edge=CreateTaskEdge(res, task_name_maker=task_name_maker, remote=remote))
return s_init, s_term
digraph CODEOBJECT_GENERATOR
{
// ??????????? ???????-????????????
FUNC_1 [module=case_gen_funcs, entry_func=function_1]
FUNC_2 [module=case_gen_funcs, entry_func=function_2]
FUNC_3 [module=case_gen_funcs, entry_func=function_3]
SAVE_TO_DB [module=case_gen_funcs, entry_func=save_to_db]
SAVE_TO_FILE [module=case_gen_funcs, entry_func=save_to_file]
REPEAT [module=case_gen_funcs, entry_func=repeat]
EXIT [module=case_gen_funcs, entry_func=exit]
CREATE_DUMP [module=case_gen_funcs, entry_func=create_dump]
// ??????????? ???????-??????????
PREDICATE_X [module=predicate_funcs, entry_func=predicate_x]
PREDICATE_Y [module=predicate_funcs, entry_func=predicate_y]
SELECTOR [module=predicate_funcs, entry_func=selector]
// ??????????? ??????? ???????? (????????)
EDGE_1 [predicate=PREDICATE_X, function=FUNC_1]
EDGE_2 [predicate=PREDICATE_Y, function=FUNC_2]
EDGE_3 [predicate=PREDICATE_X, function=FUNC_3]
EDGE_4 [predicate=PREDICATE_Y, function=SAVE_TO_DB]
EDGE_5 [predicate=PREDICATE_X, function=SAVE_TO_FILE]
EDGE_6 [predicate=PREDICATE_Y, function=REPEAT]
EDGE_7 [predicate=PREDICATE_X, function=EXIT]
EDGE_8 [function=EXIT]
EDGE_9 [predicate=CHECK_DUMP, function=EXIT]
EDGE_10 [function=CREATE_DUMP]
// ? ???? ??????? ????????? ?????????????????
CONTENT_SUBSTITUTED [parallelism=threading]
// ??????????? ???????? ??????
__BEGIN__ -> INPUT_READY
INPUT_READY -> TEPMLATE_COPIED [morphism=EDGE_1]
TEPMLATE_COPIED -> NAMES_SUBSTITUTED [morphism=EDGE_2]
NAMES_SUBSTITUTED -> CONTENT_SUBSTITUTED [morphism=EDGE_3]
CONTENT_SUBSTITUTED => DUMP_CREATED [morphism=EDGE_10]
CONTENT_SUBSTITUTED -> RESULT_SAVED [morphism=EDGE_4]
CONTENT_SUBSTITUTED -> RESULT_SAVED [morphism=EDGE_5]
// ? ??????????? ?? ?????? ?????????? SELECTOR ?????????????? ??????? ?? ??????? ??? ??????? ?????
RESULT_SAVED -> INPUT_READY, __END__ [selector=SELECTOR, morphism=(EDGE_6, EDGE_7)]
RESULT_SAVED, DUMP_CREATED -> __END__ [morphism=(EDGE_8, EDGE_9)]
}
\ No newline at end of file
{
"LOCAL_HOST": {
"research_roots": "...",
"custom_programs": {
"@path_to_binaries@": ["@bin1@", "@bin2@", ...],
...
}
"custom_commands": {
"@command_name@": "@command itself@"
},
},
"REMOTE_HOSTS": {
"@remote_host_sid@": {
"ssh_host": "...",
"max_cores": ...,
"username": "...",
"password": "...",
"pkey": "...",
"research_path": "...",
"env_programs": ["@bin1@", "@bin1@", ...],
"custom_programs": {
"@path_to_binaries@": ["@bin1@", "@bin2@", ...],
...
},
"custom_commands": {
"@command_name@": "@command itself@"
},
"sge_template_name": "...",
"job_setter": "...",
"job_finished_checker": "..."
},
...
},
"RESEARCH": {
"@research_sid@": "@research_full_name@",
...
},
"RESEARCH_PROPS": {
...
},
"PERSONAL_TASK_SHIFT": 0,
"TEMPLATES_PATH": "...",
"MEETINGS_PATH": "..."
}
\ No newline at end of file
#include <iostream>
#include <anymap.h>
extern "C" {
int PrintHello(com::Anymap) {
std::cout<<"Hello!" << std::endl;
return 0;
}
int PrintBye(com::Anymap) {
std::cout<<"Bye!" << std::endl;
return 0;
}
int PrintA(com::Anymap) {
std::cout<<"A" << std::endl;
return 0;
}
int PrintB(com::Anymap) {
std::cout<<"B" << std::endl;
return 0;
}
bool ReturnTrue(){
return true;
}
bool ReturnFalse(){
return false;
}
std::list<bool> ThreeTrue(){
return false;
}
}
\ No newline at end of file
g++ -c -fPIC ./dev/core/anymap.cpp -o anymap.o -I./dev;
g++ -c -fPIC tests.cpp -o tests.o -I./dev;
# g++ -c -fPIC ./dev/iniparser/iniparser.cpp -o iniparser.o -I./dev;
g++ tests.o anymap.o -shared -o libtest.so; rm tests.o anymap.o;
if g++ $1 -o graph.out -I./dev ./dev/core/anymap.cpp -ldl; then
./graph.out;
else
echo "Not Compiled!";
fi;
\ No newline at end of file
#include <libtools.h>
#include <anymap.h>
#include <iniparser.h>
typedef std::function<int(com::Anymap*)> IntFunc;
typedef std::function<bool(com::Anymap*)> BoolFunc;
typedef std::function<bool*(com::Anymap*)> BoolArrFunc;
IntFunc LoadEntry(std::string lib, std::string func) {
DllHandle handler;
return com::lib::loadFunction<int (com::Anymap*), DllHandle>(lib.c_str(), func.c_str(), handler);
}
BoolFunc LoadPred(std::string lib, std::string func) {
DllHandle handler;
return com::lib::loadFunction<int (com::Anymap*), DllHandle>(lib.c_str(), func.c_str(), handler);
}
BoolArrFunc LoadSelector(std::string lib, std::string func){
DllHandle handler;
return com::lib::loadFunction<bool* (com::Anymap*), DllHandle>(lib.c_str(), func.c_str(), handler);
}
void check_pred(bool predval, std::string predname) {
if (!predval) {
std::cout<<"Predicate "<<predname<<" returned FALSE!"<<std::endl;
exit(-1);
}
}
int main(int argc, char const *argv[])
{
auto data = com::Anymap();
//Predicates
% for pred in preds:
auto ${pred} = LoadPred("${pred.module}", "${pred.name}");
% endfor
//Entry functions
% for morph in morphs:
auto ${str(morph)} = LoadEntry("${morph.module}", "${morph.name}");
% endfor
//Selectors
% for sel in sels:
auto ${str(sel)} = LoadSelector("${sel.module}", "${sel.name}");
% endfor
//Branch tokens
bool* SEL_${states[0].name} = new bool[${len(states[0].transfers)}];
std::fill_n(SEL_${states[0].name}, ${len(states[0].transfers)}, true);
% for st in states[1:]:
bool* SEL_${st.name} = new bool[${len(st.transfers)}];
std::fill_n(SEL_${st.name}, ${len(st.transfers)}, false);
% endfor
${body}
TERM:
std::cout<<"Termination!\n";
return 0;
}
\ No newline at end of file
[build-system]
# These are the assumed default build requirements from pip:
# https://pip.pypa.io/en/stable/reference/pip/#pep-517-and-518-support
requires = ["setuptools>=43.0.0", "wheel"]
build-backend = "setuptools.build_meta"
\ No newline at end of file
[metadata]
# This includes the license file(s) in the wheel.
# https://wheel.readthedocs.io/en/stable/user_guide.html#including-license-files-in-the-generated-wheel-file
license_files = LICENSE
\ No newline at end of file
"""A setuptools based setup module.
See:
https://packaging.python.org/guides/distributing-packages-using-setuptools/
https://github.com/pypa/sampleproject
"""
# Always prefer setuptools over distutils
from setuptools import setup, find_packages
import pathlib
here = pathlib.Path(__file__).parent.resolve()
# Get the long description from the README file
long_description = (here / "README.md").read_text(encoding="utf-8")
# Arguments marked as "Required" below must be included for upload to PyPI.
# Fields marked as "Optional" may be commented out.
setup(
# This is the name of your project. The first time you publish this
# package, this name will be registered for you. It will determine how
# users can install this project, e.g.:
#
# $ pip install sampleproject
#
# And where it will live on PyPI: https://pypi.org/project/sampleproject/
#
# There are some restrictions on what makes a valid project name
# specification here:
# https://packaging.python.org/specifications/core-metadata/#name
name="comsdk", # Required
# Versions should comply with PEP 440:
# https://www.python.org/dev/peps/pep-0440/
version="0.1.0", # Required
# This is a one-line description or tagline of what your project does. This
# corresponds to the "Summary" metadata field:
# https://packaging.python.org/specifications/core-metadata/#summary
description="Tools for computational research relying on distributed computing and member interaction", # Optional
long_description=long_description, # Optional
long_description_content_type="text/markdown", # Optional (see note above)
url="https://sa2systems.ru:88/com/pycomsdk", # Optional
author="Anton Pershin", # Optional
author_email="tony.pershin@gmail.com", # Optional
# Classifiers help users find your project by categorizing it.
#
# For a list of valid classifiers, see https://pypi.org/classifiers/
classifiers=[ # Optional
# How mature is this project? Common values are
# 3 - Alpha
# 4 - Beta
# 5 - Production/Stable
"Development Status :: 3 - Alpha",
"Intended Audience :: Science/Research",
"Topic :: Software Development",
# Pick your license as you wish
"License :: OSI Approved :: MIT License",
# Specify the Python versions you support here. In particular, ensure
# that you indicate you support Python 3. These classifiers are *not*
# checked by 'pip install'. See instead 'python_requires' below.
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3 :: Only",
],
# This field adds keywords for your project which will appear on the
# project page. What does your project relate to?
#
# Note that this is a list of additional keywords, separated
# by commas, to be used to assist searching for the distribution in a
# larger catalog.
keywords="graph-based software engineering, distributed computing, SciOps", # Optional
# When your source code is in a subdirectory under the project root, e.g.
# `src/`, it is necessary to specify the `package_dir` argument.
#package_dir={"": "src"}, # Optional
# You can just specify package directories manually here if your project is
# simple. Or you can use find_packages().
#
# Alternatively, if you just want to distribute a single Python file, use
# the `py_modules` argument instead as follows, which will expect a file
# called `my_module.py` to exist:
#
# py_modules=["my_module"],
#
#packages=find_packages(where="src"), # Required
packages=find_packages(), # Required
# Specify which Python versions you support. In contrast to the
# 'Programming Language' classifiers above, 'pip install' will check this
# and refuse to install the project if the version does not match. See
# https://packaging.python.org/guides/distributing-packages-using-setuptools/#python-requires
python_requires=">=3.9.5, <4",
# This field lists other packages that your project depends on to run.
# Any package you put here will be installed by pip when your project is
# installed, so they must be valid existing projects.
#
# For an analysis of "install_requires" vs pip's requirements files see:
# https://packaging.python.org/discussions/install-requires-vs-requirements/
# Here is how to keep both install_requires and requirements.txt
# without duplication: https://stackoverflow.com/questions/14399534/reference-requirements-txt-for-the-install-requires-kwarg-in-setuptools-setup-py/16624700
install_requires=[
"numpy",
"jsons",
"mako",
"paramiko",
], # Optional
# If there are data files included in your packages that need to be
# installed, specify them here.
#package_data={ # Optional
# "sample": ["package_data.dat"],
#},
# Although 'package_data' is the preferred approach, in some case you may
# need to place data files outside of your packages. See:
# http://docs.python.org/distutils/setupscript.html#installing-additional-files
#
# In this case, 'data_file' will be installed into '<sys.prefix>/my_data'
#data_files=[("my_data", ["data/data_file"])], # Optional
# To provide executable scripts, use entry points in preference to the
# "scripts" keyword. Entry points provide cross-platform support and allow
# `pip` to create the appropriate form of executable for the target
# platform.
#
# For example, the following would provide a command called `sample` which
# executes the function `main` from this package when invoked:
#entry_points={ # Optional
# "console_scripts": [
# "sample=sample:main",
# ],
#},
# List additional URLs that are relevant to your project as a dict.
#
# This field corresponds to the "Project-URL" metadata fields:
# https://packaging.python.org/specifications/core-metadata/#project-url-multiple-use
#
# Examples listed include a pattern for specifying where the package tracks
# issues, where the source is hosted, where to say thanks to the package
# maintainers, and where to support the project financially. The key is
# what's used to render the link text on PyPI.
project_urls={ # Optional
"Bug Reports": "https://sa2systems.ru:88/com/pycomsdk/issues",
"Source": "https://sa2systems.ru:88/com/pycomsdk",
},
)
\ No newline at end of file
def dummy_edge(data):
pass
def increment_a_edge(data):
data['a'] += 1
def increment_a_array_edge(data):
for i in range(len(data['a'])):
data['a'][i] += 1
def increment_b_edge(data):
data['b'] += 1
def decrement_a_edge(data):
data['a'] -= 1
def nonzero_predicate(data):
return data['a'] != 0
def positiveness_predicate(data):
return data['a'] > 0
def nonpositiveness_predicate(data):
return data['a'] <= 0
def copy_to_c(data):
data['c'] = data['a']
def selector_a_nonpositive(data):
res = data['a'] <= 0
return [res, not res]
def true_predicate(data):
return True
digraph ADD {
FUNC [module=test_funcs.simplest, entry_func=increment_b_edge]
PRED [module=test_funcs.simplest, entry_func=positiveness_predicate]
MORPH [predicate=PRED, function=FUNC, comment="ADD"]
__BEGIN__ -> ST [morphism = MORPH]
ST -> __END__
}
\ No newline at end of file
digraph SIMPLEST {
FUNCA [module=test_funcs.simplest, entry_func=increment_a_edge]
FUNCB [module=test_funcs.simplest, entry_func=increment_b_edge]
PRED [module=test_funcs.simplest, entry_func=true_predicate]
INCR_A [predicate=PRED, function=FUNCA]
INCR_B [predicate=PRED, function=FUNCB]
__BEGIN__ -> ROOT
ROOT -> BR1, BR2 [morphism=(INCR_A, INCR_B)]
BR1 -> BR1_ST [morphism=INCR_A]
BR2 -> BR2_ST [morphism=INCR_B]
BR1_ST, BR2_ST -> MERGE [morphism=(INCR_A, INCR_B)]
MERGE -> __END__
}
\ No newline at end of file
digraph SIMPLEST {
FUNCA [module=test_funcs.simplest, entry_func=increment_a_edge]
PRED [module=test_funcs.simplest, entry_func=true_predicate]
INCR_A [predicate=PRED, function=FUNCA]
ST1 [subgraph=tests/adot/trivial.adot]
ST2 [subgraph=tests/adot/cycled.adot]
ST3 [subgraph=tests/adot/branching.adot]
__BEGIN__ -> ST1
ST1 -> ST2 [morphism=INCR_A]
ST2 -> ST3 [morphism=INCR_A]
ST3 -> __END__
}
\ No newline at end of file
digraph SIMPLEST {
FUNCA [module=libtest, entry_func=IncA]
FUNCB [module=libtest, entry_func=IncB]
CHECKA [module=libtest, entry_func=CheckAEq4]
CHECKB [module=libtest, entry_func=CheckBEq4]
SETA [module=libtest, entry_func=SetAEq1]
SETB [module=libtest, entry_func=SetBEq1]
PASS [module=libtest, entry_func=PassFunc]
PRED [module=libtest, entry_func=PassPred]
INCR_A [predicate=PRED, function=FUNCA]
INCR_B [predicate=PRED, function=FUNCB]
CH_A [predicate=CHECKA, function = PASS]
SET_A [predicate=PRED, function=SETA]
SET_B [predicate=PRED, function=SETB]
CH_B [predicate=CHECKB, function = PASS]
__BEGIN__ -> ROT [morphism=SET_A]
ROT -> ROOT[morphism=SET_B]
ROOT -> BR1, BR2 [morphism=(INCR_A, INCR_B)]
BR1 -> BR1_ST [morphism=INCR_A]
BR2 -> BR2_ST [morphism=INCR_B]
BR1_ST, BR2_ST -> MERGE [morphism=(INCR_A, INCR_B)]
MERGE -> __END__, __END__ [morphism=(CH_A, CH_B)]
}
\ No newline at end of file
digraph SIMPLEST {
FUNCA [module=test_funcs.simplest, entry_func=increment_a_edge]
PRED [module=test_funcs.simplest, entry_func=true_predicate]
INCR_A [predicate=PRED, function=FUNCA]
ST1 [subgraph=tests/adot/cpptrivial.adot]
ST2 [subgraph=tests/adot/cppcycled.adot]
ST3 [subgraph=tests/adot/cppbranching.adot]
__BEGIN__ -> ST1
ST1 -> ST2
ST2 -> ST3
ST3 -> __END__
}
\ No newline at end of file
digraph CYCLED {
SETA [module=libtest, entry_func=SetAEq10]
FUNC [module=libtest, entry_func=DecA]
PRED [module=libtest, entry_func=PassPred]
SET [predicate=PRED, function=SETA]
MORPH [predicate=PRED, function=FUNC]
SEL [module = libtest, entry_func=SelectorA]
ST2 [selector = SEL]
__BEGIN__ -> ST1 [morphism=SET]
ST1 -> ST2 [morphism=MORPH]
ST2 -> ST1 [order=1]
ST2 -> __END__ [order = 2]
}
\ No newline at end of file
digraph SIMPLE {
FUNC [module=libtest, entry_func=IncA]
PRED [module=libtest, entry_func=PassPred]
MORPH [predicate=PRED, function=FUNC]
__BEGIN__ -> ST1 [morphism = MORPH]
ST1 -> ST2 [morphism = MORPH]
ST2 -> __END__ [morphism = MORPH]
}
\ No newline at end of file
digraph CYCLED {
FUNC [module=test_funcs.simplest, entry_func=decrement_a_edge]
PRED [module=test_funcs.simplest, entry_func=true_predicate]
MORPH [predicate=PRED, function=FUNC]
SEL [module = test_funcs.simplest, entry_func = selector_a_nonpositive]
ST2 [selector = SEL]
__BEGIN__ -> ST1
ST1 -> ST2 [morphism=MORPH]
ST2 -> ST1 [order=2]
ST2 -> __END__ [order = 1]
}
\ No newline at end of file
digraph gcdhom_inverted_model_pso
{
// Определение функций-обработчиков
PASS_PROCESSOR [module=libcomsdk, entry_func=pass_processor]
CHECK_PSO_AGENT_REINIT [module=libgcdfes, entry_func=check_pso_agent_reinit, comment="Проверка о необходимости реинициализации отдельной частицы (смещение частицы) в рое в рамках метода роя частиц."]
CHECK_PSO_SWARM_REINIT [module=libgcdfes, entry_func=check_pso_swarm_reinit, comment="Проверка о необходимости реинициализации всего роя частиц в рамках метода роя частиц."]
PSO_AGENT_REINIT [module=libgcdfes, entry_func=pso_agent_reinit, comment="Реинициализация отдельной частицы (смещение частицы) в рое в рамках метода роя частиц."]
PSO_SWARM_REINIT [module=libgcdfes, entry_func=pso_swarm_reinit, comment="Реинициализация всего роя частиц в рамках метода роя частиц."]
PSO_SWARM_ANALYSING [module=libgcdfes, entry_func=pso_swarm_analysing, comment="Анализ всего роя частиц в рамках метода роя частиц."]
PSO_HOM_AGENT_POSTPROC [module=libgcdfes, entry_func=pso_hom_agent_postproc, comment="Постпроцессинг после решения отдельной задачи методом асимптотического осреднения."]
PSO_TASK_DATA_REINIT [module=libgcdfes, entry_func=pso_task_data_reinit, comment="Реинициализация постановки задачи анализа эффективных характеристик КМ методом асимптотического осреднения."]
PSO_AGENT_INIT [module=libgcdfes, entry_func=pso_agent_init, comment="Инициализация отдельной частицы в рамках метода роя частиц."]
PSO_SWARM_INIT [module=libgcdfes, entry_func=pso_swarm_init, comment="Инициализация роя частиц."]
PSO_INIT [module=libgcdfes, entry_func=pso_swarm_init, comment="Инициализация метода роя частиц."]
// Определение функций-предикатов
PASS_PREDICATE [module=libcomsdk, entry_func=pass_predicate]
// Определение морфизмов
PASS_MORPHISM [predicate=PASS_PREDICATE, function=PASS_PROCESSOR, comment="ПАСС, морфизм."]
PSO_AGENT_REINIT_MORPHISM [predicate=PASS_PREDICATE, function=PSO_AGENT_REINIT]
PSO_SWARM_REINIT_MORPHISM [predicate=PASS_PREDICATE, function=PSO_SWARM_REINIT]
PSO_SWARM_ANALYSING_MORPHISM [predicate=PASS_PREDICATE, function=PSO_SWARM_ANALYSING]
PSO_HOM_AGENT_POSTPROC_MORPHISM [predicate=PASS_PREDICATE, function=PSO_HOM_AGENT_POSTPROC]
PSO_TASK_DATA_REINIT_MORPHISM [predicate=PASS_PREDICATE, function=PSO_TASK_DATA_REINIT]
PSO_AGENT_INIT_MORPHISM [predicate=PASS_PREDICATE, function=PSO_AGENT_INIT]
PSO_SWARM_INIT_MORPHISM [predicate=PASS_PREDICATE, function=PSO_SWARM_INIT]
PSO_INIT_MORPHISM [predicate=PASS_PREDICATE, function=PSO_INIT]
// Определение атрибутов узлов
S_1 [subgraph=gcdhom_preprocessor.adot]
S_5 [subgraph=gcdhom_processor.adot]
S_6 [selector=CHECK_PSO_AGENT_REINIT]
S_7 [selector=CHECK_PSO_SWARM_REINIT]
// Определение топологии графовой модели метода конечных элементов
__BEGIN__ -> S_1
S_1 -> S_2 [morphism=PSO_INIT_MORPHISM]
S_2 -> S_3 [morphism=PSO_SWARM_INIT_MORPHISM]
S_3 -> S_4 [morphism=PSO_AGENT_INIT_MORPHISM]
S_4 -> S_5 [morphism=PSO_TASK_DATA_REINIT_MORPHISM]
S_5 -> S_6 [morphism=PSO_HOM_AGENT_POSTPROC_MORPHISM]
S_6 -> S_4, S_7 [morphism=(PSO_AGENT_REINIT_MORPHISM, PSO_SWARM_ANALYSING_MORPHISM), order=(10,20)]
S_7 -> S_4, S_8 [morphism=(PSO_SWARM_REINIT_MORPHISM, PASS_MORPHISM), order=(30,40)]
S_8 -> __END__ [comment = "Расчет завершён."]
}
digraph SIMPLEST {
FUNCA [module=test_funcs.simplest, entry_func=increment_a_edge]
FUNCB [module=test_funcs.simplest, entry_func=increment_b_edge]
PRED [module=test_funcs.simplest, entry_func=positiveness_predicate]
INCR_A [predicate=PRED, function=FUNCA]
INCR_B [predicate=PRED, function=FUNCB]
__BEGIN__ -> ROOT
ROOT -> BR1, BR2, BR3 [morphism=(INCR_A, INCR_A, INCR_A)]
//BR3 -> SIBL3_BR1, SIBL3_BR2 [morphism=(INCR_A, INCR_A)]
//BR2 -> SIBL2_BR1, SIBL2_BR2 [morphism=(INCR_A, INCR_A)]
//SIBL3_BR1 -> SIBL3_BR1_1, SIBL3_BR1_2 [morphism=(INCR_A, INCR_A)]
//SIBL3_BR1_1, SIBL3_BR1_2 -> TERM [morphism=(INCR_A, INCR_A)]
//BR1, SIBL2_BR1, SIBL2_BR2, TERM, SIBL3_BR2 -> __END__ [morphism=(INCR_A, INCR_A, INCR_A, INCR_A, INCR_A)]
BR1, BR2, BR3 -> __END__ [morphism=(INCR_A, INCR_A, INCR_A)]
}
\ No newline at end of file
digraph TEST_SUB {
FUNC [module=test_funcs.simplest, entry_func=increment_a_edge]
PRED [module=test_funcs.simplest, entry_func=positiveness_predicate]
MORPH [predicate=PRED, function=FUNC]
SEL [module=test_funcs.simplest, entry_func=selector_a_nonpositive]
ST2 [subgraph = tests/adot/file.adot]
ST3 [selector = SEL]
__BEGIN__ -> ST1 [morphism = MORPH]
ST1 -> ST2
ST2 -> ST3
ST3 -> __END__
}
\ No newline at end of file
digraph TRIVIAL {
FUNC [module=test_funcs.simplest, entry_func=increment_a_edge]
PRED [module=test_funcs.simplest, entry_func=true_predicate]
MORPH [predicate=PRED, function=FUNC, comment="ADD"]
__BEGIN__ -> ST1 [morphism = MORPH]
ST1 -> ST2 [morphism = MORPH]
ST2 -> __END__ [morphism = MORPH]
}
\ No newline at end of file
#include<fstream>
using namespace std;
int main(int argc, char* argv[])
{
string input_file_path(argv[1]);
string output_file_path("b.dat");
ifstream f_in(input_file_path);
int x;
f_in >> x;
ofstream f_out(output_file_path);
f_out << x*x;
return 0;
}
#$ -cwd -V
#$ -l h_rt=12:00:00
#$ -pe smp 12
/home/home01/mmap/tests/square/square /home/home01/mmap/tests/square_test_dir/a.dat
#$ -cwd -V
#$ -l h_rt=12:00:00
#$ -pe smp 12
./findsoln -symms reflect_symmetry.asc -R 170.320 -o find-170.320 -es 1e-15 -eqb find-170.330/ubest.h5
qsub fe_170.315.sh
import unittest
from copy import deepcopy
import subprocess
import os
import random
from comsdk.graph import *
from comsdk.edge import *
from comsdk.communication import *
def dummy_edge(data):
pass
def increment_a_edge(data):
data['a'] += 1
def increment_a_array_edge(data):
for i in range(len(data['a'])):
data['a'][i] += 1
def increment_b_edge(data):
data['b'] += 1
def decrement_a_edge(data):
data['a'] -= 1
def write_a_edge(data):
a_filename = os.path.join(data['__CURRENT_WORKING_DIR__'], 'tests/square_test_dir/input/a.dat')
with open(a_filename, 'w') as f:
f.write(str(data['a']))
data['a_file'] = a_filename
def load_b_edge(data):
b_filename = os.path.join(data['__WORKING_DIR__'], data['b_file'])
with open(b_filename, 'r') as f:
data['b'] = int(f.read())
def nonzero_predicate(data):
return True if data['a'] != 0 else False
def positiveness_predicate(data):
return True if data['a'] > 0 else False
def nonpositiveness_predicate(data):
return True if data['a'] <= 0 else False
def print_exception(exc_data, data):
print('exception data: {}'.format(exc_data))
print('current state of data: {}'.format(data))
def print_stdout(data, stdout_lines):
# print(stdout)
return {}
def check_task_finished(data, stdout_lines):
'''
Example:
job-ID prior name user state submit/start at queue slots ja-task-ID
-----------------------------------------------------------------------------------------------------------------
663565 0.00053 RT700-tran scegr r 09/19/2018 23:51:22 24core-128G.q@dc2s2b1a.arc3.le 24
663566 0.00053 RT800-tran scegr r 09/19/2018 23:51:22 24core-128G.q@dc3s5b1a.arc3.le 24
663567 0.00053 RT900-tran scegr r 09/20/2018 00:00:22 24core-128G.q@dc4s2b1b.arc3.le 24
663569 0.00053 RT1000-tra scegr r 09/20/2018 00:05:07 24core-128G.q@dc1s1b3d.arc3.le 24
'''
job_finished = True
for line in stdout_lines[2:]:
items = line.split()
if int(items[0]) == data['job_ID']:
job_finished = False
return {'job_finished': job_finished}
def set_job_id(data, stdout_lines):
return {'job_ID': int(stdout_lines[0].split()[2])} # example: 'Your job 664989 ("fe_170.310.sh") has been submitted'
def _create_data_from_dict(d):
data = deepcopy(d)
data['__CURRENT_WORKING_DIR__'] = os.getcwd()
if not '__WORKING_DIR__' in data:
data['__WORKING_DIR__'] = data['__CURRENT_WORKING_DIR__']
return data
class GraphGoodCheck(unittest.TestCase):
initial_conditions = range(-10, 10)
@classmethod
def setUpClass(cls):
command_line = 'cd tests/square; g++ square.cpp -o square'
subprocess.call([command_line], shell=True)
local_host = Host()
local_host.add_program('square', os.path.join(os.getcwd(), 'tests', 'square'))
cls.local_comm = LocalCommunication(local_host)
cls.ssh_host = 'arc3.leeds.ac.uk'
cls.ssh_cores = 24
cls.ssh_user = 'mmap'
cls.ssh_pswd = '1bdwbzsc'
cls.ssh_path_to_tests = '/home/home01/mmap/tests'
remote_host = RemoteHost(ssh_host='arc3.leeds.ac.uk',
cores=24,
)
remote_host.add_program('square', '{}/square'.format(cls.ssh_path_to_tests))
remote_host.add_program('qsub')
remote_host.add_program('qstat')
cls.ssh_comm = SshCommunication(remote_host,
username=cls.ssh_user,
password=cls.ssh_pswd,
)
cls.ssh_comm.mkdir('{}/square_test_dir'.format(cls.ssh_path_to_tests))
@classmethod
def tearDownClass(cls):
aux.remove_if_exists('tests/square_test_dir/input/a.dat')
aux.remove_if_exists('tests/square_test_dir/output/b.dat')
cls.ssh_comm.rm('{}/square_test_dir'.format(cls.ssh_path_to_tests))
def test_trivial_serial_graph(self):
initial_datas = [{'a': ic} for ic in self.initial_conditions]
invalid_initial_datas = [{'a': ic} for ic in (-1, 0)]
initial_state, term_state, correct_outputs = self._get_trivial_serial_graph(initial_datas)
self._run_graph(initial_state, term_state, initial_datas, invalid_initial_datas, correct_outputs)
def test_trivial_parallel_graph(self):
initial_datas = [{'a': ic, 'b': ic} for ic in self.initial_conditions]
invalid_initial_datas = [{'a': ic, 'b': ic} for ic in (-2, -1, 0)]
initial_state, term_state, correct_outputs = self._get_trivial_parallel_graph(initial_datas)
self._run_graph(initial_state, term_state, initial_datas, invalid_initial_datas, correct_outputs)
def test_trivial_cycled_graph(self):
initial_datas = [{'a': ic} for ic in self.initial_conditions]
initial_state, term_state, correct_outputs = self._get_trivial_cycled_graph(initial_datas)
self._run_graph(initial_state, term_state, initial_datas, (), correct_outputs)
def test_complex_graph_made_from_trivial_ones_using_dummy_edges(self):
'''
serial graph + parallel graph + cycled graph
'''
initial_datas = [{'a': ic, 'b': ic} for ic in self.initial_conditions]
invalid_initial_datas = [{'a': ic, 'b': ic} for ic in (-4, -3, -2, -1, 0)]
s_1, s_2, correct_outputs = self._get_trivial_serial_graph(initial_datas)
s_3, s_4, correct_outputs = self._get_trivial_parallel_graph(correct_outputs)
s_5, s_6, correct_outputs = self._get_trivial_cycled_graph(correct_outputs)
s_2.connect_to(s_3, edge=Edge(dummy_predicate, dummy_edge))
s_4.connect_to(s_5, edge=Edge(dummy_predicate, dummy_edge))
self._run_graph(s_1, s_6, initial_datas, invalid_initial_datas, correct_outputs)
def test_trivial_serial_graph_with_subgraph(self):
initial_datas = [{'a': ic} for ic in self.initial_conditions]
initial_state, term_state, correct_outputs = self._get_trivial_serial_graph_with_subgraph(initial_datas)
self._run_graph(initial_state, term_state, initial_datas, (), correct_outputs)
def test_trivial_parallel_graph_with_subgraph(self):
initial_datas = [{'a': ic, 'b': ic} for ic in self.initial_conditions]
initial_state, term_state, correct_outputs = self._get_trivial_parallel_graph_with_subgraph(initial_datas)
self._run_graph(initial_state, term_state, initial_datas, (), correct_outputs)
def test_complex_graph_made_from_trivial_ones_using_subgraphs(self):
'''
serial graph + parallel graph + cycled graph
'''
initial_datas = [{'a': ic, 'b': ic} for ic in self.initial_conditions]
invalid_initial_datas = [{'a': ic, 'b': ic} for ic in (-4, -3, -2, -1, 0)]
s_1, s_2, correct_outputs = self._get_trivial_serial_graph(initial_datas)
s_3, s_4, correct_outputs = self._get_trivial_parallel_graph(correct_outputs)
s_5, s_6, correct_outputs = self._get_trivial_cycled_graph(correct_outputs)
s_2.replace_with_graph(Graph(s_3, s_4))
s_4.replace_with_graph(Graph(s_5, s_6))
print(correct_outputs)
self._run_graph(s_1, s_6, initial_datas, invalid_initial_datas, correct_outputs)
def test_trivial_graph_with_implicit_parallelization(self):
'''
s_1 -> s_2 -> s_3, with dummy edges
s_2 = s_11 -> s_12 -> s_13, with +1 edges for a
three implicitly parallel branches appear instead of s_2
'''
initial_datas = [{'a': [ic**i for i in range(1, 4)]} for ic in self.initial_conditions]
initial_state, term_state, correct_outputs = self._get_trivial_graph_with_implicit_parallelization(initial_datas)
self._run_graph(initial_state, term_state, initial_datas, (), correct_outputs)
def test_cycled_graph_with_implicit_parallelization(self):
random_neg_ics = [[random.randrange(-20, -3) for _ in range(3)] for _ in range(10)]
initial_datas = [{'a': random_neg_ic} for random_neg_ic in random_neg_ics]
#initial_datas = [{'a': [-4, -12]},]
#initial_datas = [{'a': [-3, -3]},]
initial_state, term_state, correct_outputs = self._get_cycled_graph_with_implicit_parallelization(initial_datas)
self._run_graph(initial_state, term_state, initial_datas, (), correct_outputs)
def test_trivial_graph_with_external_local_program(self):
initial_datas = [{'a': ic, '__WORKING_DIR__': os.path.join(os.getcwd(), 'tests', 'square_test_dir', 'output')} for ic in self.initial_conditions]
initial_state, term_state, correct_outputs = self._get_trivial_graph_with_external_local_program(initial_datas)
self._run_graph(initial_state, term_state, initial_datas, (), correct_outputs)
def test_trivial_graph_with_external_remote_program(self):
initial_datas = [{'a': ic,
'__WORKING_DIR__': os.path.join(os.getcwd(), 'tests', 'square_test_dir', 'output'),
'__REMOTE_WORKING_DIR__': '{}/square_test_dir'.format(self.ssh_path_to_tests)}
for ic in self.initial_conditions]
initial_state, term_state, correct_outputs = self._get_trivial_graph_with_external_remote_program(initial_datas)
self._run_graph(initial_state, term_state, initial_datas, (), correct_outputs)
def test_trivial_graph_with_external_remote_program_using_grid_engine(self):
initial_datas = [{'a': ic,
'user': self.ssh_user,
'cores_required': 12,
'time_required': '12:00:00',
'qsub_script_name': 'square.sh',
'__WORKING_DIR__': os.path.join(os.getcwd(), 'tests', 'square_test_dir', 'output'),
'__REMOTE_WORKING_DIR__': '{}/square_test_dir'.format(self.ssh_path_to_tests)}
for ic in self.initial_conditions[0:2]]
initial_state, term_state, correct_outputs = self._get_trivial_graph_with_external_remote_program_using_grid_engine(initial_datas)
self._run_graph(initial_state, term_state, initial_datas, (), correct_outputs)
def _get_trivial_serial_graph(self, initial_conditions):
'''
s_1 -> s_2 -> s_3,
p_12 = p_23 := a not 0
f_12 = f_23 := a + 1
'''
s_1 = State('serial_s_1')
s_2 = State('serial_s_2')
s_3 = State('serial_s_3')
s_1.connect_to(s_2, edge=Edge(nonzero_predicate, increment_a_edge))
s_2.connect_to(s_3, edge=Edge(nonzero_predicate, increment_a_edge))
correct_outputs = []
for ic in initial_conditions:
output = _create_data_from_dict(ic)
output['a'] += 2
correct_outputs.append(output)
return s_1, s_3, correct_outputs
def _get_trivial_parallel_graph(self, initial_conditions):
'''
s_1 -> s_2 -> s_3 ---------> s6s
-> s_4 -> s_4_1 -> s_5
-> s_4_2
p_12 = p_24 = p_13 = p_34 := a not 0
f_12 = f_24 := a + 1
f_13 = f_34 := b + 1
'''
s_1 = State('nonparallel_s_1')
s_2 = State('parallel_s_2')
s_3 = State('parallel_s_3')
s_4 = State('parallel_s_4')
s_4_1 = State('parallel_s_4_1')
s_4_2 = State('parallel_s_4_2')
s_5 = State('parallel_s_5')
s_6 = State('nonparallel_s_6')
s_1.connect_to(s_2, edge=Edge(nonzero_predicate, increment_a_edge))
s_2.connect_to(s_3, edge=Edge(nonzero_predicate, increment_a_edge))
s_3.connect_to(s_6, edge=Edge(nonzero_predicate, increment_a_edge))
s_1.connect_to(s_4, edge=Edge(nonzero_predicate, increment_b_edge))
s_4.connect_to(s_4_1, edge=Edge(nonzero_predicate, increment_b_edge))
s_4.connect_to(s_4_2, edge=Edge(nonzero_predicate, increment_b_edge))
s_4_1.connect_to(s_5, edge=Edge(nonzero_predicate, increment_b_edge))
s_4_2.connect_to(s_6, edge=Edge(nonzero_predicate, increment_b_edge))
s_5.connect_to(s_6, edge=Edge(nonzero_predicate, increment_b_edge))
correct_outputs = []
for ic in initial_conditions:
output = _create_data_from_dict(ic)
output['a'] += 3
output['b'] += 3
correct_outputs.append(output)
return s_1, s_6, correct_outputs
def _get_trivial_cycled_graph(self, initial_conditions):
'''
s_1 -> s_2 -> s_3
<-
p_12 := True
p_23 := a > 0
p_23 := a <= 0
f_12 = f_23 = f_24 := a + 1
'''
s_1 = State('cycled_s_1')
s_2 = State('cycled_s_2')
s_3 = State('cycled_s_3')
s_1.connect_to(s_2, edge=Edge(dummy_predicate, increment_a_edge))
s_2.connect_to(s_3, edge=Edge(positiveness_predicate, increment_a_edge))
s_2.connect_to(s_1, edge=Edge(nonpositiveness_predicate, increment_a_edge))
correct_outputs = []
for ic in initial_conditions:
output = _create_data_from_dict(ic)
if output['a'] >= 0:
output['a'] += 2
else:
output['a'] = output['a']%2 + 2
correct_outputs.append(output)
return s_1, s_3, correct_outputs
def _get_trivial_graph_with_external_local_program(self, initial_conditions):
'''
s_1 -> s_2 -> s_3 -> s_4,
p_12 = p_23 = dummy
f_12 = write(a) into a.dat
f_23 = a**2
f_34 = read b.dat from the working dir into b
'''
square_edge = ExecutableProgramEdge('square', self.local_comm,
output_dict={'b_file': 'b.dat'},
trailing_args_keys=('a_file',),
)
s_1 = State('external_s_1')
s_2 = State('external_s_2')
s_3 = State('external_s_3')
s_4 = State('external_s_4')
s_1.connect_to(s_2, edge=Edge(dummy_predicate, write_a_edge))
s_2.connect_to(s_3, edge=square_edge)
s_3.connect_to(s_4, edge=Edge(dummy_predicate, load_b_edge))
correct_outputs = []
for ic in initial_conditions:
output = _create_data_from_dict(ic)
output['a_file'] = os.path.join(os.getcwd(), 'tests/square_test_dir/input/a.dat')
output['b'] = output['a']**2
output['b_file'] = 'b.dat'
correct_outputs.append(output)
return s_1, s_4, correct_outputs
def _get_trivial_graph_with_external_remote_program(self, initial_conditions):
'''
s_1 -> s_2 -> s_3 -> s_4 -> s_5 -> s_6,
all predicates are dummy
f_12 = write(a) into a.dat
f_23 = upload a.dat into the working dir on remote
f_34 = a**2
f_45 = download b.dat from the working dir on remote to the working dir on local
f_56 = read download b.dat from the working dir on local into b
'''
upload_edge = UploadOnRemoteEdge(self.ssh_comm,
local_paths_keys=('a_file',),
)
square_edge = ExecutableProgramEdge('square', self.ssh_comm,
output_dict={'b_file': 'b.dat'},
trailing_args_keys=('a_file',),
remote=True,
)
download_edge = DownloadFromRemoteEdge(self.ssh_comm,
remote_paths_keys=('b_file',),
)
s_1 = State('remote_s_1')
s_2 = State('remote_s_2')
s_3 = State('remote_s_3')
s_4 = State('remote_s_4')
s_5 = State('remote_s_5')
s_6 = State('remote_s_6')
s_1.connect_to(s_2, edge=Edge(dummy_predicate, write_a_edge))
s_2.connect_to(s_3, edge=upload_edge)
s_3.connect_to(s_4, edge=square_edge)
s_4.connect_to(s_5, edge=download_edge)
s_5.connect_to(s_6, edge=Edge(dummy_predicate, load_b_edge))
correct_outputs = []
for ic in initial_conditions:
output = _create_data_from_dict(ic)
output['a_file'] = os.path.join(ic['__REMOTE_WORKING_DIR__'], 'a.dat')
output['b'] = output['a']**2
output['b_file'] = os.path.join(ic['__WORKING_DIR__'], 'b.dat')
correct_outputs.append(output)
return s_1, s_6, correct_outputs
def _get_trivial_graph_with_external_remote_program_using_grid_engine(self, initial_conditions):
'''
s_1 -> s_2 -> s_3 -> s_4 -> s_5 -> s_6 -> s_7 -> s_8 -> s_9,
<->
all predicates, expect p_66 and p_67, are dummy
p_66 = job unfinished
p_67 = job finished
f_12 = write(a) into a.dat
f_23 = upload a.dat into the working dir on remote
f_34 = make up qsub script launching square
f_45 = upload qsub script
f_56 = send job (a**2) via qsub
f_66 = check job finished via qstat
f_67 = download b.dat from the working dir on remote to the working dir on local
f_78 = read download b.dat from the working dir on local into b
f_89 = filter out a_file, b_file, job_ID, qsub_script
'''
make_up_qsub_script_edge = QsubScriptEdge('square', self.local_comm, self.ssh_comm,
trailing_args_keys=('a_file',),
)
upload_a_edge = UploadOnRemoteEdge(self.ssh_comm,
local_paths_keys=('a_file',),
)
upload_qsub_script_edge = UploadOnRemoteEdge(self.ssh_comm,
local_paths_keys=('qsub_script',),
)
qsub_edge = ExecutableProgramEdge('qsub', self.ssh_comm,
trailing_args_keys=('qsub_script',),
output_dict={'job_finished': False, 'b_file': 'b.dat'},
stdout_processor=set_job_id,
remote=True,
)
qstat_edge = ExecutableProgramEdge('qstat', self.ssh_comm,
predicate=job_unfinished_predicate,
io_mapping=InOutMapping(keys_mapping={'u': 'user', 'job_ID': 'job_ID'}),
keyword_names=('u',),
remote=True,
stdout_processor=check_task_finished,
)
download_edge = DownloadFromRemoteEdge(self.ssh_comm,
predicate=job_finished_predicate,
remote_paths_keys=('b_file',),
)
s_1 = State('remote_s_1')
s_2 = State('remote_s_2')
s_3 = State('remote_s_3')
s_4 = State('remote_s_4')
s_5 = State('remote_s_5')
s_6 = State('remote_s_6')
s_7 = State('remote_s_7')
s_8 = State('remote_s_8')
s_9 = State('remote_s_9')
s_1.connect_to(s_2, edge=Edge(dummy_predicate, write_a_edge))
s_2.connect_to(s_3, edge=upload_a_edge)
s_3.connect_to(s_4, edge=make_up_qsub_script_edge)
s_4.connect_to(s_5, edge=upload_qsub_script_edge)
s_5.connect_to(s_6, edge=qsub_edge)
s_6.connect_to(s_6, edge=qstat_edge)
s_6.connect_to(s_7, edge=download_edge)
s_7.connect_to(s_8, edge=Edge(dummy_predicate, load_b_edge))
def filter_data(data):
del data['a_file']
del data['b_file']
del data['job_ID']
del data['qsub_script']
s_8.connect_to(s_9, edge=Edge(dummy_predicate, filter_data))
correct_outputs = []
for ic in initial_conditions:
output = _create_data_from_dict(ic)
output['b'] = output['a']**2
output['job_finished'] = True
correct_outputs.append(output)
return s_1, s_9, correct_outputs
def _get_trivial_serial_graph_with_subgraph(self, initial_conditions):
'''
s_1 -> s_2,
where s_2 is replaced by s_1 -> s_2
p_12 = p_23 := dummy
f_12 := a + 1
'''
pred = Func(func=dummy_predicate)
morph = Func(func=increment_a_edge)
s_1 = State('s_1')
s_2 = State('s_2')
s_3 = State('s_3')
s_1.connect_to(s_2, edge=Edge(pred, morph))
s_2.connect_to(s_3, edge=Edge(pred, morph))
sub_s_1 = State('sub_s_1')
sub_s_2 = State('sub_s_2')
sub_s_1.connect_to(sub_s_2, edge=Edge(pred, morph))
s_2.replace_with_graph(Graph(sub_s_1, sub_s_2))
correct_outputs = []
for ic in initial_conditions:
output = _create_data_from_dict(ic)
output['a'] += 3
correct_outputs.append(output)
return s_1, s_3, correct_outputs
def _get_trivial_parallel_graph_with_subgraph(self, initial_conditions):
'''
s_1 -> s_2 -> s_4
-> s_3 ->
where s_2 and s_3 is replaced by s_5 -> s_6 -> s_7
all predicate are dummy
f_12 = f_24 := a + 1
f_13 = f_34 := b + 1
f_56 = f_67 := a + 1
'''
asp = AllSelectionPolicy()
s_1 = State('s_1', selection_policy=AllSelectionPolicy())
s_2 = State('s_2')
s_3 = State('s_3')
s_4 = State('s_4')
s_1.connect_to(s_2, edge=Edge(dummy_predicate, increment_a_edge))
s_1.connect_to(s_3, edge=Edge(dummy_predicate, increment_b_edge))
s_2.connect_to(s_4, edge=Edge(dummy_predicate, increment_a_edge))
s_3.connect_to(s_4, edge=Edge(dummy_predicate, increment_b_edge))
sub1_s_5 = State('s_2_sub_s_5')
sub1_s_6 = State('s_2_sub_s_6')
sub1_s_7 = State('s_2_sub_s_7')
sub2_s_5 = State('s_3_sub_s_5')
sub2_s_6 = State('s_3_sub_s_6')
sub2_s_7 = State('s_3_sub_s_7')
sub1_s_5.connect_to(sub1_s_6, edge=Edge(dummy_predicate, increment_a_edge))
sub1_s_6.connect_to(sub1_s_7, edge=Edge(dummy_predicate, increment_a_edge))
sub2_s_5.connect_to(sub2_s_6, edge=Edge(dummy_predicate, increment_a_edge))
sub2_s_6.connect_to(sub2_s_7, edge=Edge(dummy_predicate, increment_a_edge))
s_2.replace_with_graph(Graph(sub1_s_5, sub1_s_7))
s_3.replace_with_graph(Graph(sub2_s_5, sub2_s_7))
correct_outputs = []
for ic in initial_conditions:
output = _create_data_from_dict(ic)
output['a'] += 6
output['b'] += 2
correct_outputs.append(output)
return s_1, s_4, correct_outputs
def _get_trivial_graph_with_implicit_parallelization(self, initial_conditions):
'''
s_1 -> s_2 -> s_4
-> s_3 ->
where s_2 and s_3 is replaced by s_5 -> s_6 -> s_7
all predicate are dummy
f_12 = f_24 := a + 1
f_13 = f_34 := b + 1
f_56 = f_67 := a + 1
'''
#asp = AllSelectionPolicy()
sub_s_1 = State('sub_s_1', array_keys_mapping={'a': 'a'})
sub_s_2 = State('sub_s_2')
sub_s_3 = State('sub_s_3')
subgraph = Graph(sub_s_1, sub_s_3)
s_1 = State('s_1')
s_2 = State('s_2')
s_3 = State('s_3')
s_1.connect_to(s_2, edge=Edge(dummy_predicate, dummy_edge))
s_2.connect_to(s_3, edge=Edge(dummy_predicate, dummy_edge))
sub_s_1.connect_to(sub_s_2, edge=Edge(dummy_predicate, increment_a_edge))
sub_s_2.connect_to(sub_s_3, edge=Edge(dummy_predicate, increment_a_edge))
s_2.replace_with_graph(subgraph)
correct_outputs = []
for ic in initial_conditions:
output = _create_data_from_dict(ic)
output['a'] = [output['a'][i] + 2 for i in range(len(output['a']))]
correct_outputs.append(output)
return s_1, s_3, correct_outputs
def _get_cycled_graph_with_implicit_parallelization(self, initial_conditions):
'''
s_1 -> s_2 -> s_3 -> s_4
<-
p_23 := a > 0
p_22 := a <= 0
all other predicates are dummy
f_11 = f_22 = f_23 = f_34 := a + 1
'''
s_sub_1 = State('s_sub_1', array_keys_mapping={'a': 'a'})
s_sub_2 = State('s_sub_2')
s_sub_3 = State('s_sub_3')
s_1 = State('s_1')
s_2 = State('s_2')
s_sub_1.connect_to(s_sub_2, edge=Edge(dummy_predicate, increment_a_edge))
s_sub_2.connect_to(s_sub_2, edge=Edge(lambda d: d['a'] <= 0, increment_a_edge))
s_sub_2.connect_to(s_sub_3, edge=Edge(lambda d: d['a'] > 0, increment_a_edge))
subgraph = Graph(s_sub_1, s_sub_3)
s_1.connect_to(s_2, edge=Edge(dummy_predicate, increment_a_array_edge))
s_1.replace_with_graph(subgraph)
correct_outputs = []
for ic in initial_conditions:
output = _create_data_from_dict(ic)
output['a'] = [3 for i in range(len(output['a']))]
correct_outputs.append(output)
return s_1, s_2, correct_outputs
def _run_graph(self, initial_state, term_state, initial_datas, invalid_initial_datas, correct_outputs):
graph = Graph(initial_state, term_state)
for initial_data, correct_output in zip(initial_datas, correct_outputs):
print('Doing ic = {}'.format(initial_data))
data = deepcopy(initial_data)
okay = graph.run(data)
#print(data['__EXCEPTION__'])
if initial_data in invalid_initial_datas:
self.assertEqual('__EXCEPTION__' in data, True)
self.assertEqual(okay, False)
else:
self.assertEqual(okay, True)
self.assertEqual(data, correct_output)
if __name__ == '__main__':
unittest.main()
import unittest
import subprocess
from comsdk.graph import *
from comsdk.parser import Parser
path_to_comsdk = "/home/lbstr/bmstu/comsdk"
path_to_pycomsdk = "/home/lbstr/bmstu/pycomsdk"
class ParserGoodCheck(unittest.TestCase):
def test_trivial_graph(self):
parsr = Parser()
gr = parsr.parse_file("./tests/adot/trivial.adot")
data = {"a": 1}
gr.run(data)
self.assertEqual(data["a"], 4)
def test_branching_graph(self):
parsr = Parser()
gr = parsr.parse_file("./tests/adot/branching.adot")
data = {"a": 1, "b": 1}
gr.run(data)
self.assertEqual(data["a"], 4)
self.assertEqual(data["b"], 4)
def test_cycled_graph(self):
parsr = Parser()
gr = parsr.parse_file("./tests/adot/cycled.adot")
data = {"a": 10}
gr.run(data)
self.assertEqual(data["a"], 0)
def test_complex_graph(self):
parsr = Parser()
gr = parsr.parse_file("./tests/adot/complex.adot")
data = {"a": 1, "b": 1}
gr.run(data)
self.assertEqual(data["a"], 4)
self.assertEqual(data["b"], 4)
def test_cpp_trivial_graph(self):
parsr = Parser(tocpp=True)
gr = parsr.parse_file("./tests/adot/cpptrivial.adot")
parsr.generate_cpp(path_to_comsdk+"res.cpp")
command = "cd "+path_to_comsdk+"; "+path_to_pycomsdk+"/cpp/run.sh "+path_to_comsdk+"res.cpp"
subprocess.check_output(["bash", "-c", command])
def test_cpp_branching_graph(self):
parsr = Parser(tocpp=True)
gr = parsr.parse_file("./tests/adot/cppbranching.adot")
parsr.generate_cpp(path_to_comsdk+"res.cpp")
command = "cd "+path_to_comsdk+"; "+path_to_pycomsdk+"/cpp/run.sh "+path_to_comsdk+"res.cpp"
subprocess.check_output(["bash", "-c", command])
def test_cpp_cycled_graph(self):
parsr = Parser(tocpp=True)
gr = parsr.parse_file("./tests/adot/cppcycled.adot")
parsr.generate_cpp(path_to_comsdk+"res.cpp")
command = "cd "+path_to_comsdk+"; "+path_to_pycomsdk+"/cpp/run.sh "+path_to_comsdk+"res.cpp"
subprocess.check_output(["bash", "-c", command])
def test_cpp_complex_graph(self):
parsr = Parser(tocpp=True)
gr = parsr.parse_file("./tests/adot/cppcomplex.adot")
parsr.generate_cpp(path_to_comsdk+"res.cpp")
command = "cd "+path_to_comsdk+"; "+path_to_pycomsdk+"/cpp/run.sh "+path_to_comsdk+"res.cpp"
subprocess.check_output(["bash", "-c", command])
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment