Source code for pypet.parameter

"""This module contains implementations of result and parameter containers.

Results and parameters are the leaf nodes of the :class:`~pypet.trajectory.Trajectory` tree.
Instances of results can only be found under the subtree `traj.results`, whereas
parameters are used to handle data kept under `traj.config`, `traj.parameters`, and
`traj.derived_parameters`.

Result objects can handle more than one data item and heterogeneous data.
On the contrary, parameters only handle single data items. However, they can contain
ranges - arrays of homogeneous data items - to allow parameter exploration.

The module contains the following parameters:

    * :class:`~pypet.parameter.BaseParameter`

        Abstract base class to define the parameter interface

    * :class:`~pypet.parameter.Parameter`

        Standard parameter that handles a variety of different data types.

    * :class:`~pypet.parameter.ArrayParameter`

        Parameter class for larger numpy arrays and python tuples

    * :class:`~pypet.parameter.SparseParameter`

        Parameter for Scipy sparse matrices

    * :class:`~pypet.parameter.PickleParameter`

        Parameter that can handle all objects that can be pickled


The module contains the following results:

    * :class:`~pypet.parameter.BaseResult`

        Abstract base class to define the result interface

    * :class:`~pypet.parameter.Result`

        Standard result that handles a variety of different data types

    * :class:`~pypet.parameter.SparseResult`

        Result that can handle Scipy sparse matrices

    * :class:`~pypet.parameter.PickleResult`

        Result that can handle all objects that can be pickled

Moreover, part of this module is also the :class:`~pypet.parameter.ObjectTable`.
This is a specification of pandas_ DataFrames which maintains data types.
It prevents auto-conversion of data to numpy data types, like python integers to
numpy 64 bit integers, for instance.

.. _pandas: http://pandas.pydata.org/

"""

__author__ = 'Robert Meyer'


import pickle
import pickletools

import numpy as np
import scipy.sparse as spsp
from pandas import DataFrame, Series


import pypet.pypetconstants as pypetconstants
from pypet.naturalnaming import NNLeafNode
import pypet.utils.comparisons as comparisons
from pypet.utils.decorators import deprecated, copydoc
from pypet.utils.helpful_classes import HashArray
import pypet.pypetexceptions as pex


[docs]class ObjectTable(DataFrame): """Wrapper class for pandas_ DataFrames. It creates data frames with `dtype=object`. Data stored into an object table preserves its original type when stored to disk. For instance, a python int is not automatically converted to a numpy 64 bit integer (np.int64). The object table serves as a data structure to hand data to a storage service. Example Usage: >>> ObjectTable(data={'characters':['Luke', 'Han', 'Spock'], 'Random_Values' :[42,43,44] }) Creates the following table: ====== ============== =========== Index Random_Values characters ====== ============== =========== 0 42 Luke 1 43 Han 2 44 Spock ====== ============== =========== .. _pandas: http://pandas.pydata.org/ """ def __init__(self, data=None, index=None, columns=None, copy=False): super(ObjectTable, self).__init__(data=data, index=index, columns=columns, dtype=object, copy=copy)
[docs]class BaseParameter(NNLeafNode): """Abstract class that specifies the methods for a trajectory parameter. Parameters are simple container objects for data values. They handle single values as well as ranges of potential values. These range arrays contain multiple values which are accessed one after the other in individual simulation runs. Parameter exploration is usually initiated through the trajectory see :func:`~pypet.trajectory.Trajectory.f_explore` and :func:`~pypet.trajectory.Trajectory.f_expand`. To access the parameter's data value one can call the :func:`~pypet.parameter.BaseParameter.f_get` method. Parameters support the concept of locking. Once a value of the parameter has been accessed, the parameter cannot be changed anymore unless it is explicitly unlocked using :func:`~pypet.parameter.BaseParameter.f_unlock`. This prevents parameters from being changed during runtime of a simulation. If multiprocessing is desired the parameter must be picklable! :param full_name: The full name of the parameter in the trajectory tree, groupings are separated by a colon: `fullname = 'supergroup.subgroup.paramname'` :param comment: A useful comment describing the parameter: `comment = 'Some useful text, dude!'` """ __slots__ = ('_locked', '_full_copy', '_explored')
[docs] def __init__(self, full_name, comment=''): super(BaseParameter, self).__init__(full_name, comment, is_parameter=True) self._locked = False # Whether to keep the full range array when pickled or not self._full_copy = False self._explored = False # If explored or not
[docs] def f_supports(self, data): """Checks whether the data is supported by the parameter.""" return type(data) in pypetconstants.PARAMETER_SUPPORTED_DATA
@property def v_locked(self): """Whether or not the parameter is locked and prevents further modification""" return self._locked
[docs] def f_supports_fast_access(self): """Checks if parameter supports fast access. A parameter supports fast access if it is NOT empty! """ return not self.f_is_empty()
@property def v_explored(self): """Whether parameter is explored. Does not necessarily have to be similar to :func:`~pypet.parameter.BaseParameter.f_has_range` since the range can be deleted on pickling and the parameter remains explored. """ return self._explored @property def v_full_copy(self): """Whether or not the full parameter including the range or only the current data is copied during pickling. If you run your simulations in multiprocessing mode, the whole trajectory and all parameters need to be pickled and are sent to the individual processes. Each process than runs an individual point in the parameter space. As a consequence, you do not need the full ranges during these calculations. Thus, if the full copy mode is set to `False` the parameter is pickled without the range array and you can save memory. If you want to access the full range during individual runs, you need to set `v_full_copy` to `True`. It is recommended NOT to do that in order to save memory and also do obey the philosophy that individual simulation runs are independent. Example usage: >>> import pickle >>> param = Parameter('examples.fullcopy', data=333, comment='I show you how the copy mode works!') >>> param._explore([1,2,3,4]) >>> dump=pickle.dumps(param) >>> newparam = pickle.loads(dump) >>> newparam.f_get_range() TypeError >>> param.v_full_copy=True >>> dump = pickle.dumps(param) >>> newparam=pickle.loads(dump) >>> newparam.f_get_range() (1,2,3,4) """ return self._full_copy @v_full_copy.setter def v_full_copy(self, val): """Sets the full copy mode""" val = bool(val) self._full_copy = val
[docs] def f_has_range(self): """Returns true if the parameter contains a range array. Not necessarily equal to `v_explored` if the range is removed on pickling due to `v_full_copy=False`. ABSTRACT: Needs to be defined in subclass """ raise NotImplementedError("Should have implemented this.")
[docs] def _restore_default(self): """Restores original data if changed due to exploration. If a Parameter is explored, the actual data is changed over the course of different simulations. This method restores the original data assigned before exploration. ABSTRACT: Needs to be defined in subclass """ raise NotImplementedError("Should have implemented this.")
[docs] def f_get_range_length(self): """Returns the length of the parameter range. Raises TypeError if the parameter has no range. Does not need to be implemented if the parameter supports ``__len__`` appropriately. """ if not self.f_has_range(): raise TypeError('Not applicable, parameter does not have a range') elif hasattr(self, '__len__'): return len(self) else: raise NotImplementedError("Should have implemented this.")
[docs] def f_val_to_str(self): """String summary of the value handled by the parameter. Note that representing the parameter as a string accesses its value, but for simpler debugging, this does not lock the parameter or counts as usage! Calls `__repr__` of the contained value. """ old_locked = self._locked try: return repr(self.f_get()) except Exception: return 'No Evaluation possible (yet)!' finally: self._locked = old_locked
[docs] def _equal_values(self, val1, val2): """Checks if the parameter considers two values as equal. This is important for the trajectory in case of merging. In case you want to delete duplicate parameter points, the trajectory needs to know when two parameters are equal. Since equality is not always implemented by values handled by parameters in the same way, the parameters need to judge whether their values are equal. The straightforward example here is a numpy array. Checking for equality of two numpy arrays yields a third numpy array containing truth values of a piecewise comparison. Accordingly, the parameter could judge two numpy arrays equal if ALL of the numpy array elements are equal. In this BaseParameter class values are considered to be equal if they obey the function :func:`~pypet.utils.comparisons.nested_equal`. You might consider implementing a different equality comparison in your subclass. :raises: TypeError: If both values are not supported by the parameter. """ if self.f_supports(val1) != self.f_supports(val2): return False if not self.f_supports(val1) and not self.f_supports(val2): raise TypeError('I do not support the types of both inputs (`%s` and `%s`), ' 'therefore I cannot judge whether ' 'the two are equal.' % (str(type(val1)), str(type(val2)))) if not self._values_of_same_type(val1, val2): return False return comparisons.nested_equal(val1, val2)
[docs] def _values_of_same_type(self, val1, val2): """Checks if two values agree in type. For example, two 32 bit integers would be of same type, but not a string and an integer, nor a 64 bit and a 32 bit integer. This is important for exploration. You are only allowed to explore data that is of the same type as the default value. One could always come up with a trivial solution of `type(val1) is type(val2)`. But sometimes your parameter does want even more strict equality or less type equality. For example, the :class:`~pypet.parameter.Parameter` has a stricter sense of type equality regarding numpy arrays. In order to have two numpy arrays of the same type, they must also agree in shape. However, the :class:`~pypet.parameter.ArrayParameter`, considers all numpy arrays as of being of same type regardless of their shape. Moreover, the :class:`~pypet.parameter.SparseParameter` considers all supported sparse matrices (csc, csr, bsr, dia) as being of the same type. You can make explorations using all these four types at once. The difference in how strict types are treated arises from the way parameter data is stored to disk and how the parameters hand over their data to the storage service (see :func:`pypet.parameter.BaseParameter._store`). The :class:`~pypet.parameter.Parameter` puts all it's data in an :class:`~pypet.parameter.ObjectTable` which has strict constraints on the column sizes. This means that numpy array columns only accept numpy arrays with a particular size. In contrast, the array and sparse parameter hand over their data as individual items which yield individual entries in the hdf5 node. In order to see what I mean simply run an experiment with all 3 parameters, explore all of them, and take a look at the resulting hdf5 file! However, this BaseParameter class implements the straightforward version of `type(val1) is type(val2)` to consider data to be of the same type. :raises: TypeError: if both values are not supported by the parameter. """ if self.f_supports(val1) != self.f_supports(val2): return False if not self.f_supports(val1) and not self.f_supports(val2): raise TypeError('I do not support the types of both inputs (`%s` and `%s`),' ' therefore I cannot judge whether the two are of same type.' % str(type(val1)), str(type(val2))) return type(val1) is type(val2)
[docs] def __repr__(self): return '<%s>' % self.__str__()
[docs] def __str__(self): """String representation of the Parameter Output format is:`<class_name> full_name (len:X, `comment`): value`. If comment is the empty string, the comment is omitted. If the parameter is not explored the length is omitted. """ if self.f_has_range(): lenstr = 'len:%d' % self.f_get_range_length() else: lenstr = '' if self.v_comment: commentstr = '`%s`' % self.v_comment else: commentstr = '' if commentstr or lenstr: if commentstr and lenstr: combined_str = '%s, %s' % (lenstr, commentstr) elif commentstr: combined_str = commentstr elif lenstr: combined_str = lenstr else: raise RuntimeError('You shall not pass!') infostr = ' (%s)' % combined_str else: infostr = '' return_string = '%s %s%s' % (self.f_get_class_name(), self.v_full_name, infostr) if not self.f_is_empty(): return_string += ': ' + self.f_val_to_str() return return_string
[docs] def f_unlock(self): """Unlocks the locked parameter. Please use it very carefully, or best do not use this function at all. There should better be no reason to unlock a locked parameter! The only exception I can think of is to unlock a large derived parameter after usage to subsequently call :func:`~pypet.parameter.BaseParameter.f_empty` to clear memory. """ self._locked = False
[docs] def f_lock(self): """Locks the parameter and forbids further manipulation. Changing the data value or exploration range of the parameter are no longer allowed. """ self._locked = True
[docs] def f_set(self, data): """Sets a data value for a parameter. Example usage: >>> param = Parameter('groupA.groupB.myparam', comment='I am a neat example') >>> param.f_set(44.0) >>> param.f_get() 44.0 :raises: ParameterLockedException: If parameter is locked TypeError: If the type of the data value is not supported by the parameter ABSTRACT: Needs to be defined in subclass """ raise NotImplementedError("Should have implemented this.")
[docs] def __getitem__(self, item): """Equivalent to `f_get_range()[idx]` :raises: TypeError if parameter has no range """ return self.f_get_range().__getitem__(item)
[docs] def f_get(self): """Returns the current data value of the parameter and locks the parameter. :raises: TypeError if the parameter is empty Example usage: >>> param = Parameter('groupA.groupB.myparam', comment='I am a neat example') >>> param.f_set(44.0) >>> param.f_get() 44.0: ABSTRACT: Needs to be defined in subclass """ raise NotImplementedError("Should have implemented this.")
[docs] def f_get_default(self): """Returns the default value of the parameter and locks it. """ raise NotImplementedError("Should have implemented this.")
[docs] def f_get_range(self, copy=True): """Returns an iterable to iterate over the values of the exploration range. Note that the returned values should be either a copy of the exploration range unless explicetly requested otherwise. :param copy: If range should be copied to avoid tempering with data. :return: Iterable :raises: TypeError if the parameter is not explored Example usage: >>> param = Parameter('groupA.groupB.myparam',data=22, comment='I am a neat example') >>> param._explore([42,43,43]) >>> param.f_get_range() (42,43,44) ABSTRACT: Needs to be defined in subclass """ raise NotImplementedError("Should have implemented this.")
[docs] def _explore(self, iterable): """The method to explore a parameter and create a range of entries. :param iterable: An iterable specifying the exploration range For example: >>> param = Parameter('groupA.groupB.myparam',data=22.33,\ comment='I am a neat example') >>> param._explore([3.0,2.0,1.0]) :raises: ParameterLockedException: If the parameter is locked TypeError: If the parameter is already explored ABSTRACT: Needs to be defined in subclass """ raise NotImplementedError("Should have implemented this.")
[docs] def _expand(self, iterable): """Similar to :func:`~pypet.parameter.BaseParameter._explore` but appends to the exploration range. :param iterable: An iterable specifying the exploration range. :raises: ParameterLockedException: If the parameter is locked TypeError: If the parameter did not have a range before Example usage: >>> param = Parameter('groupA.groupB.myparam', data=3.13, comment='I am a neat example') >>> param._explore([3.0,2.0,1.0]) >>> param._expand([42.0,43.0]) >>> param.f_get_range() (3.0,2.0,1.0,42.0,43.0) ABSTRACT: Needs to be defined in subclass """ raise NotImplementedError("Should have implemented this.")
[docs] def _set_parameter_access(self, idx=0): """Sets the current value according to the `idx` in the exploration range. Prepares the parameter for further usage, and tells it which point in the parameter space should be accessed by calls to :func:`~pypet.parameter.BaseParameter.f_get`. :param idx: The index within the exploration range. If the parameter has no range, the single data value is considered regardless of the value of `idx`. Raises ValueError if the parameter is explored and `idx>=len(param)`. :raises: ValueError: If the parameter has a range and `idx` is larger or equal to the length of the parameter. Example usage: >>> param = Parameter('groupA.groupB.myparam',data=22.33, comment='I am a neat example') >>> param._explore([42.0,43.0,44.0]) >>> param._set_parameter_access(idx=1) >>> param.f_get() 43.0 ABSTRACT: Needs to be defined in subclass """ raise NotImplementedError("Should have implemented this.")
[docs] def f_get_class_name(self): """ Returns the name of the class i.e. `return self.__class__.__name__`""" return self.__class__.__name__
[docs] def f_is_empty(self): """True if no data has been assigned to the parameter. Example usage: >>> param = Parameter('myname.is.example', comment='I am _empty!') >>> param.f_is_empty() True >>> param.f_set(444) >>> param.f_is_empty() False """ raise NotImplementedError('Implement this!')
[docs] def _shrink(self): """If a parameter is explored, i.e. it has a range, the whole exploration range is deleted. Note that this function does not erase data from disk. So if the parameter has been stored with a service to disk and is shrunk, it can be restored by loading from disk. :raises: ParameterLockedException: If the parameter is locked TypeError: If the parameter has no range ABSTRACT: Needs to be defined in subclass """ raise NotImplementedError("Should have implemented this.")
[docs] def f_empty(self): """Erases all data in the parameter. Does not erase data from disk. So if the parameter has been stored with a service to disk and is emptied, it can be restored by loading from disk. :raises: ParameterLockedException: If the parameter is locked. ABSTRACT: Needs to be defined in subclass """ raise NotImplementedError("Should have implemented this.")
[docs]class Parameter(BaseParameter): """ The standard container that handles access to simulation parameters. Parameters are simple container objects for data values. They handle single values as well as the so called exploration range. An array containing multiple values which are accessed one after the other in individual simulation runs. Parameter exploration is usually initiated through the trajectory see `:func:~pypet.trajectory.Trajectory.f_explore` and `:func:~pypet.trajectory.Trajectory.f_expand`. To access the parameter's data value one can call the :func:`~pypet.parameter.Parameter.f_get` method. Parameters support the concept of locking. Once a value of the parameter has been accessed, the parameter cannot be changed anymore unless it is explicitly unlocked using :func:`~pypet.parameter.Parameter.f_unlock`. Locking prevents parameters from being changed during runtime of a simulation. Supported data values for the parameter are * python natives (int, long, str, bool, float, complex), * numpy natives, arrays and matrices of type np.int8-64, np.uint8-64, np.float32-64, np.complex, np.str * python homogeneous non-nested tuples and lists Note that for larger numpy arrays it is recommended to use the :class:`~pypet.parameter.ArrayParameter`. In case you create a new parameter you can pass the following arguments: :param full_name: The full name of the parameter. Grouping can be achieved by using colons. :param data: A data value that is handled by the parameter. It is checked whether the parameter :func:`~pypet.parameter.Parameter.f_supports` the data. If not a TypeError is thrown. If the parameter becomes explored, the data value is kept as a default. After simulation the default value will be restored. The data can be accessed as follows: >>> param.f_get() 42 Or using >>> param.data 42 [It is not `v_data` because the data is supposed to be part of the trajectory tree or extension of the natural naming scheme and not considered as an attribute/variable of the parameter container.] To change the data after parameter creation one can call :func:`~pypet.parameter.Parameter.f_set`: >>> param.f_set(43) >>> param.f_get() 43 :param comment: A useful comment describing the parameter. The comment can be changed later on using the 'v_comment' variable. >>> param.v_comment = 'Example comment' >>> print param.v_comment 'Example comment' :raises: TypeError: If `data` is not supported by the parameter. Example usage: >>> param = Parameter('traffic.mobiles.ncars',data=42, comment='I am a neat example') """ __slots__ = ('_data', '_default', '_explored_range') def __init__(self, full_name, data=None, comment=''): super(Parameter, self).__init__(full_name, comment) self._data = None self._default = None # The default value, which is the same as _data in the beginning, # but it is necessary to keep a reference to it to restore the original value # after exploration self._explored_range = [] # List that will changed later on if parameter is explored self._set_logger() if data is not None: self.f_set(data) def _restore_default(self): """Restores the default data that was set with the `:func:`~pypet.parameter.Parameter.f_set` method (or at initialisation). If the parameter is explored during the runtime of a simulation, the actual value of the parameter is changed and taken from the exploration range. Calling :func:`~pypet.parameter.Parameter._restore_default` sets the parameter's value back to it's original value. Example usage: >>> param = Parameter('supergroup1.subgroup2.', data=44, comment='Im a comment!') >>> param._explore([1,2,3,4]) >>> param._set_parameter_access(2) >>> param.f_get() 3 >>> param._restore_default() >>> param.f_get() 44 """ self._data = self._default
[docs] @copydoc(BaseParameter.f_is_empty) def f_is_empty(self): """True if no data has been assigned to the parameter. Example usage: >>> param = Parameter('myname.is.example', comment='I am _empty!') >>> param.f_is_empty() True >>> param.f_set(444) >>> param.f_is_empty() False """ return self._data is None
def __len__(self): """Returns length of parameter. :return: 0 if empty 1 if not explored length of range if explored and has range """ if self._data is None: return 0 elif len(self._explored_range) > 0: return len(self._explored_range) else: return 1
[docs] def f_has_range(self): """If the parameter has a range. Does not have to be `True` if the parameter is explored. The range might be removed during pickling to save memory. Accordingly, `v_explored` remains `True` whereas `f_has_range` is `False`. """ return len(self._explored_range) > 0
def __getstate__(self): """ Returns the actual state of the parameter for pickling. If `v_full_copy` is true the exploration range is also pickled, otherwise it is omitted. """ result = super(Parameter, self).__getstate__() # If we don't need a full copy of the Parameter (because a single process needs # only access to a single point in the parameter space) we can delete the rest if not self._full_copy: result['_explored_range'] = [] return result # @no_prefix_getattr def __getattr__(self, item): """Allows to query for `.data` as an attribute""" if item == 'data': return self.f_get() elif item == 'default': return self.f_get_default() else: raise AttributeError('`%s` object has no attribute `%s`.' % (self.f_get_class_name(), item)) def __getitem__(self, item): """Equivalent to `f_get_range[item] if item is integer` Allows also ['data'] access which is equivalent to `f_get` :raises: TypeError if parameter has no range """ if item == 'data': return self.f_get() elif item == 'default' or item == -1: return self.f_get_default() else: return super(Parameter, self).__getitem__(item) @copydoc(BaseParameter._set_parameter_access) def _set_parameter_access(self, idx=0): if idx >= len(self) and self.f_has_range(): raise ValueError('You try to access data item No. %d in the parameter range, ' 'yet there are only %d potential items.' % (idx, len(self))) elif self.f_has_range(): self._data = self._explored_range[idx] else: self._logger.warning('You try to change the access to a parameter range of parameter' ' `%s`. The parameter has no range, your setting has no' ' effect.')
[docs] def f_supports(self, data): """Checks if input data is supported by the parameter.""" dtype = type(data) if dtype is tuple or dtype is list: # Parameters cannot handle empty tuples if len(data) == 0: return False old_type = None # Check if the data in the tuple is homogeneous for item in data: if not type(item) in pypetconstants.PARAMETER_SUPPORTED_DATA: return False if not old_type is None and old_type != type(item): return False old_type = type(item) return True elif dtype is np.ndarray or dtype is np.matrix: if data.size == 0: return False # Parameters cannot handle empty arrays and matrices # Numpy has many string types that depend on the length of the string, # We allow all of them dtype = data.dtype if np.issubdtype(dtype, np.str): dtype = np.str return dtype in pypetconstants.PARAMETER_SUPPORTED_DATA
def _values_of_same_type(self, val1, val2): """Checks if two values agree in type. Raises a TypeError if both values are not supported by the parameter. Returns false if only one of the two values is supported by the parameter. Example usage: >>>param._values_of_same_type(42,43) True >>>param._values_of_same_type(42,'43') False :raises: TypeError """ if self.f_supports(val1) != self.f_supports(val2): return False if not self.f_supports(val1) and not self.f_supports(val2): raise TypeError('I do not support the types of both inputs (`%s` and `%s`),' ' therefore I cannot judge whether the two are of same type.' % str(type(val1)), str(type(val2))) if not type(val1) is type(val2): return False # Numpy arrays must agree in data type and shape if type(val1) is np.array: if not val1.dtype is val2.dtype: return False if not np.shape(val1) == np.shape(val2): return False # For tuples we now from earlier checks that the data is homogeneous. # Thus, only the type of the first item and the length must agree. if type(val1) is tuple: return (type(val1[0]) is type(val2[0])) and (len(val1) == len(val2)) return True
[docs] @copydoc(BaseParameter.f_set) def f_set(self, data): if self.v_locked: raise pex.ParameterLockedException('Parameter `%s` is locked!' % self._full_name) if self.f_has_range(): raise AttributeError( 'Your Parameter is an explored array can no longer change values!') if self.v_stored: self._logger.debug('You are changing an already stored parameter. If ' 'you not explicitly overwrite the data on disk, this change ' 'might be lost and not propagated to disk.') if not self.f_supports(data): raise TypeError('Unsupported data `%s` of type `%s`. ' 'If you passed a tuple or list, this error might also be caused ' 'by heterogeneous data within the ' 'tuple or list.' % (str(data), str(type(data)))) self._data = data self._default = self._data
[docs] @copydoc(BaseParameter.f_get_default) def f_get_default(self): if self.f_is_empty(): raise TypeError('Parameter `%s` is empty cannot access data' % self.v_full_name) self.f_lock() # As soon as someone accesses an entry the parameter gets locked return self._default
[docs] def f_get_range(self, copy=True): """Returns a python iterable containing the exploration range. :param copy: If the range should be copied before handed over to avoid tempering with data Example usage: >>> param = Parameter('groupA.groupB.myparam',data=22, comment='I am a neat example') >>> param._explore([42,43,43]) >>> param.f_get_range() (42,43,44) :raises: TypeError: If parameter is not explored. """ if not self.f_has_range(): raise TypeError('Your parameter `%s` is not array, so cannot return array.' % self.v_full_name) elif copy: return self._explored_range[:] else: return self._explored_range
def _explore(self, explore_iterable): """Explores the parameter according to the iterable. Raises ParameterLockedException if the parameter is locked. Raises TypeError if the parameter does not support the data, the types of the data in the iterable are not the same as the type of the default value, or the parameter has already an exploration range. Note that the parameter will iterate over the whole iterable once and store the individual data values into a tuple. Thus, the whole exploration range is explicitly stored in memory. :param explore_iterable: An iterable specifying the exploration range For example: >>> param._explore([3.0,2.0,1.0]) >>> param.f_get_range() (3.0, 2.0, 1.0) :raises TypeError,ParameterLockedException """ if self.v_locked: raise pex.ParameterLockedException('Parameter `%s` is locked!' % self.v_full_name) if self.f_has_range(): raise TypeError('Your parameter `%s` is already explored, ' 'cannot _explore it further!' % self._name) if self._data is None: raise TypeError('Your parameter `%s` has no default value, please specify one ' 'via `f_set` before exploration. ' % self.v_full_name) data_list = self._data_sanity_checks(explore_iterable) self._explored_range = data_list self._explored = True self.f_lock() def _expand(self, explore_iterable): """Explores the parameter according to the iterable and appends to the exploration range. Raises ParameterLockedException if the parameter is locked. Raises TypeError if the parameter does not support the data, the types of the data in the iterable are not the same as the type of the default value, or the parameter did not have an array before. Note that the parameter will iterate over the whole iterable once and store the individual data values into a tuple. Thus, the whole exploration range is explicitly stored in memory. :param explore_iterable: An iterable specifying the exploration range For example: >>> param = Parameter('Im.an.example', data=33.33, comment='Wooohoo!') >>> param._explore([3.0,2.0,1.0]) >>> param._expand([42.0, 43.42]) >>> param.f_get_range() >>> [3.0, 2.0, 1.0, 42.0, 43.42] :raises TypeError, ParameterLockedException """ if self.v_locked: raise pex.ParameterLockedException('Parameter `%s` is locked!' % self.v_full_name) if not self.f_has_range(): raise TypeError('Your Parameter `%s` is not an array and can therefore ' 'not be expanded.' % self._name) data_list = self._data_sanity_checks(explore_iterable) self._explored_range.extend(data_list) self.f_lock() def _data_sanity_checks(self, explore_iterable): """Checks if data values are valid. Checks if the data values are supported by the parameter and if the values are of the same type as the default value. """ data_list = [] for val in explore_iterable: if not self.f_supports(val): raise TypeError('%s is of not supported type %s.' % (repr(val), str(type(val)))) if not self._values_of_same_type(val, self._default): raise TypeError( 'Data of `%s` is not of the same type as the original entry value, ' 'new type is %s vs old type %s.' % (self.v_full_name, str(type(val)), str(type(self._default)))) data_list.append(val) if len(data_list) == 0: raise ValueError('Cannot explore an empty list!') return data_list def _store(self): """Returns a dictionary of formatted data understood by the storage service. The data is put into an :class:`~pypet.parameter.ObjectTable` named 'data'. If the parameter is explored, the exploration range is also put into another table named 'explored_data'. :return: Dictionary containing the data and optionally the exploration range. """ if self._data is not None: store_dict = {'data': ObjectTable(data={'data': [self._data]})} if self.f_has_range(): store_dict['explored_data'] = ObjectTable(data={'data': self._explored_range}) self._locked = True return store_dict def _load(self, load_dict): """Loads the data and exploration range from the `load_dict`. The `load_dict` needs to be in the same format as the result of the :func:`~pypet.parameter.Parameter._store` method. """ if self.v_locked: raise pex.ParameterLockedException('Parameter `%s` is locked!' % self.v_full_name) if 'data' in load_dict: self._data = load_dict['data']['data'][0] self._default = self._data else: self._logger.warning('Your parameter `%s` is empty, ' 'I did not find any data on disk.' % self.v_full_name) if 'explored_data' in load_dict: self._explored_range = [x for x in load_dict['explored_data']['data'].tolist()] self._explored = True self._locked = True
[docs] @copydoc(BaseParameter.f_get) def f_get(self): if self.f_is_empty(): raise TypeError('Parameter `%s` is empty cannot access data' % self.v_full_name) self.f_lock() # As soon as someone accesses an entry the parameter gets locked return self._data
@copydoc(BaseParameter._shrink) def _shrink(self): if self.v_locked: raise pex.ParameterLockedException('Parameter %s is locked!' % self.v_full_name) if not self.f_has_range(): raise TypeError('Cannot shrink Parameter without a range.') if self.f_is_empty(): raise TypeError('Cannot shrink empty Parameter.') del self._explored_range self._explored_range = [] self._explored = False
[docs] @copydoc(BaseParameter.f_empty) def f_empty(self): if self.v_locked: raise pex.ParameterLockedException('Parameter %s is locked!' % self.v_full_name) if self.f_has_range(): self._shrink() del self._data del self._default self._data = None self._default = None self._explored = False
[docs]class ArrayParameter(Parameter): """Similar to the :class:`~pypet.parameter.Parameter`, but recommended for large numpy arrays and python tuples. The array parameter is a bit smarter in memory management than the parameter. If a numpy array is used several times within an exploration, only one numpy array is stored by the default HDF5 storage service. For each individual run references to the corresponding numpy array are stored. Since the ArrayParameter inherits from :class:`~pypet.parameter.Parameter` it also supports all other native python types. """ __slots__ = () IDENTIFIER = '__rr__' """Identifier to mark stored data as an array""" def _store(self): """Creates a storage dictionary for the storage service. If the data is not a numpy array, a numpy matrix, or a tuple, the :func:`~pypet.parameter.Parmater._store` method of the parent class is called. Otherwise the array is put into the dictionary with the key 'data__rr__'. Each array of the exploration range is stored as a separate entry named 'xa__rr__XXXXXXXX' where 'XXXXXXXX' is the index of the array. Note if an array is used more than once in an exploration range (for example, due to cartesian product exploration), the array is stored only once. Moreover, an :class:`~pypet.parameter.ObjectTable` containing the references is stored under the name 'explored_data__rr__' in order to recall the order of the arrays later on. """ if type(self._data) not in (np.ndarray, tuple, np.matrix, list): return super(ArrayParameter, self)._store() else: store_dict = {'data' + ArrayParameter.IDENTIFIER: self._data} if self.f_has_range(): # Supports smart storage by hashable arrays # Keys are the hashable arrays or tuples and values are the indices smart_dict = {} store_dict['explored_data' + ArrayParameter.IDENTIFIER] = \ ObjectTable(columns=['idx'], index=list(range(len(self)))) count = 0 for idx, elem in enumerate(self._explored_range): # First we need to distinguish between tuples and array and extract a # hashable part of the array if isinstance(elem, np.ndarray): # You cannot hash numpy arrays themselves, but if they are read only # you can hash array.data hash_elem = HashArray(elem) elif isinstance(elem, list): hash_elem = tuple(elem) else: hash_elem = elem # Check if we have used the array before, # i.e. element can be found in the dictionary if hash_elem in smart_dict: name_idx = smart_dict[hash_elem] add = False else: name_idx = count add = True name = self._build_name(name_idx) # Store the reference to the array store_dict['explored_data' + ArrayParameter.IDENTIFIER]['idx'][idx] = \ name_idx # Only if the array was not encountered before, # store the array and remember the index if add: store_dict[name] = elem smart_dict[hash_elem] = name_idx count += 1 self._locked = True return store_dict @staticmethod def _build_name(name_idx): """Formats a name for storage :return: 'xa__rr__XXXXXXXX' where 'XXXXXXXX' is the index of the array """ return 'explored%s.set_%05d.xa_%08d' % (ArrayParameter.IDENTIFIER, name_idx // 1000, name_idx) def _load(self, load_dict): """Reconstructs the data and exploration array. Checks if it can find the array identifier in the `load_dict`, i.e. '__rr__'. If not calls :class:`~pypet.parameter.Parameter._load` of the parent class. If the parameter is explored, the exploration range of arrays is reconstructed as it was stored in :func:`~pypet.parameter.ArrayParameter._store`. """ if self.v_locked: raise pex.ParameterLockedException('Parameter `%s` is locked!' % self.v_full_name) try: self._data = load_dict['data' + ArrayParameter.IDENTIFIER] if 'explored_data' + ArrayParameter.IDENTIFIER in load_dict: explore_table = load_dict['explored_data' + ArrayParameter.IDENTIFIER] idx = explore_table['idx'] explore_list = [] # Recall the arrays in the order stored in the ObjectTable 'explored_data__rr__' for name_idx in idx: arrayname = self._build_name(name_idx) explore_list.append(load_dict[arrayname]) self._explored_range = [x for x in explore_list] self._explored = True except KeyError: super(ArrayParameter, self)._load(load_dict) self._default = self._data self._locked = True def _values_of_same_type(self, val1, val2): """Checks if two values agree in type. The array parameter is less restrictive than the parameter. If both values are arrays, matrices or tuples, they are considered to be of same type regardless of their size and values they contain. """ if (type(val1) in (np.ndarray, tuple, np.matrix)) and (type(val2) is type(val1)): return True else: return super(ArrayParameter, self)._values_of_same_type(val1, val2)
[docs] def f_supports(self, data): """Checks if input data is supported by the parameter.""" dtype = type(data) if dtype is tuple or dtype is list and len(data) == 0: return True # ArrayParameter does support empty tuples elif dtype is np.ndarray and data.size == 0 and data.ndim == 1: return True # ArrayParameter supports empty numpy arrays else: return super(ArrayParameter, self).f_supports(data)
[docs]class SparseParameter(ArrayParameter): """Parameter that handles Scipy csr, csc, bsr and dia sparse matrices. Sparse Parameter inherits from :class:`~pypet.parameter.ArrayParameter` and supports arrays and native python data as well. Uses similar memory management as its parent class. """ IDENTIFIER = '__spsp__' """Identifier to mark stored data as a sparse matrix""" DIA_NAME_LIST = ['format', 'data', 'offsets', 'shape'] """Data names for serialization of dia matrices""" OTHER_NAME_LIST = ['format', 'data', 'indices', 'indptr', 'shape'] """Data names for serialization of csr, csc, and bsr matrices""" __slots__ = () def _values_of_same_type(self, val1, val2): """Checks if two values agree in type. The sparse parameter is less restrictive than the parameter. If both values are sparse matrices they are considered to be of same type regardless of their size and values they contain. """ if self._is_supported_matrix(val1) and self._is_supported_matrix(val2): return True else: return super(SparseParameter, self)._values_of_same_type(val1, val2) def _equal_values(self, val1, val2): """Matrices are equal if they hash to the same value.""" if self._is_supported_matrix(val1): if self._is_supported_matrix(val2): _, _, hash_tuple_1 = self._serialize_matrix(val1) _, _, hash_tuple_2 = self._serialize_matrix(val2) return hash(hash_tuple_1) == hash(hash_tuple_2) else: return False else: return super(SparseParameter, self)._equal_values(val1, val2) @staticmethod def _is_supported_matrix(data): """Checks if a data is csr, csc, bsr, or dia Scipy sparse matrix""" return (spsp.isspmatrix_csc(data) or spsp.isspmatrix_csr(data) or spsp.isspmatrix_bsr(data) or spsp.isspmatrix_dia(data))
[docs] def f_supports(self, data): """Sparse matrices support Scipy csr, csc, bsr and dia matrices and everything their parent class the :class:`~pypet.parameter.ArrayParameter` supports. """ if self._is_supported_matrix(data): return True else: return super(SparseParameter, self).f_supports(data)
@staticmethod def _serialize_matrix(matrix): """Extracts data from a sparse matrix to make it serializable in a human readable format. :return: Tuple with following elements: 1. A list containing data that is necessary to reconstruct the matrix. For csr, csc, and bsr matrices the following attributes are extracted: `format`, `data`, `indices`, `indptr`, `shape`. Where format is simply one of the strings 'csr', 'csc', or 'bsr'. For dia matrices the following attributes are extracted: `format`, `data`, `offsets`, `shape`. Where `format` is simply the string 'dia'. 2. A list containing the names of the extracted attributes. For csr, csc, and bsr: [`format`, `data`, `indices`, `indptr`, `shape`] For dia: [`format`, `data`, `offsets`, `shape`] 3. A tuple containing the hashable parts of (1) in order to use the tuple as a key for a dictionary. Accordingly, the numpy arrays of (1) are changed to read-only. """ if (spsp.isspmatrix_csc(matrix) or spsp.isspmatrix_csr(matrix) or spsp.isspmatrix_bsr(matrix)): if matrix.size > 0: return_list = [matrix.data, matrix.indices, matrix.indptr, matrix.shape] else: # For empty matrices we only need the shape return_list = ['__empty__', (), (), matrix.shape] return_names = SparseParameter.OTHER_NAME_LIST if spsp.isspmatrix_csc(matrix): return_list = ['csc'] + return_list elif spsp.isspmatrix_csr(matrix): return_list = ['csr'] + return_list elif spsp.isspmatrix_bsr(matrix): return_list = ['bsr'] + return_list else: raise RuntimeError('You shall not pass!') elif spsp.isspmatrix_dia(matrix): if matrix.size > 0: return_list = ['dia', matrix.data, matrix.offsets, matrix.shape] else: # For empty matrices we only need the shape return_list = ['dia', '__empty__', (), matrix.shape] return_names = SparseParameter.DIA_NAME_LIST else: raise RuntimeError('You shall not pass!') hash_list = [] # Extract the `data` property of a read-only numpy array in order to have something # hashable. for item in return_list: if type(item) is np.ndarray: # item.flags.writeable = False hash_list.append(HashArray(item)) else: hash_list.append(item) return return_list, return_names, tuple(hash_list) @staticmethod def _get_name_list(is_dia): if is_dia: return SparseParameter.DIA_NAME_LIST else: return SparseParameter.OTHER_NAME_LIST def _store(self): """Creates a storage dictionary for the storage service. If the data is not a supported sparse matrix, the :func:`~pypet.parameter.ArrayParmater._store` method of the parent class is called. Otherwise the matrix is split into parts with :func:`~pypet.parameter.SparseParameter._serialize_matrix` and these are named 'data__spsp__XXXX' where 'XXXX' is a particular property of the matrix. The exploration range is handled similar as in the parent class. Yet, the matrices are split into the relevant parts and each part is stored as 'xspm__spsp__XXXX__spsp__XXXXXXXX` where the first 'XXXX' refer to the property and the latter 'XXXXXXX' to the sparse matrix index. The :class:`~pypet.parameter.ObjectTable` `explored_data__spsp__` stores the order of the matrices and whether the corresponding matrix is dia or not. """ if not self._is_supported_matrix(self._data): return super(SparseParameter, self)._store() else: store_dict = {} data_list, name_list, hash_tuple = self._serialize_matrix(self._data) rename_list = ['data%s%s' % (SparseParameter.IDENTIFIER, name) for name in name_list] is_dia = int(len(rename_list) == 4) store_dict['data%sis_dia' % SparseParameter.IDENTIFIER] = is_dia for idx, name in enumerate(rename_list): store_dict[name] = data_list[idx] if self.f_has_range(): # # Supports smart storage by hashing smart_dict = {} store_dict['explored_data' + SparseParameter.IDENTIFIER] = \ ObjectTable(columns=['idx', 'is_dia'], index=list(range(len(self)))) count = 0 for idx, elem in enumerate(self._explored_range): data_list, name_list, hash_tuple = self._serialize_matrix(elem) # Use the hash_tuple as a key for the smart_dict if hash_tuple in smart_dict: name_idx = smart_dict[hash_tuple] add = False else: name_idx = count add = True is_dia = int(len(name_list) == 4) rename_list = self._build_names(name_idx, is_dia) store_dict['explored_data' + SparseParameter.IDENTIFIER]['idx'][idx] = name_idx store_dict['explored_data' + SparseParameter.IDENTIFIER]['is_dia'][ idx] = is_dia if add: for irun, name in enumerate(rename_list): store_dict[name] = data_list[irun] smart_dict[hash_tuple] = name_idx count += 1 self._locked = True return store_dict def _build_names(self, name_idx, is_dia): """Formats a name for storage :return: A tuple of names with the following format: `xspm__spsp__XXXX__spsp__XXXXXXXX` where the first 'XXXX' refer to the property and the latter 'XXXXXXX' to the sparse matrix index. """ name_list = self._get_name_list(is_dia) return tuple(['explored%s.set_%05d.xspm_%s_%08d' % (SparseParameter.IDENTIFIER, name_idx // 200, name, name_idx) for name in name_list]) def _build_names_old(self, name_idx, is_dia): """ONLY for backwards compatibility""" name_list = self._get_name_list(is_dia) return tuple(['xspm%s%s%s%08d' % (SparseParameter.IDENTIFIER, name, SparseParameter.IDENTIFIER, name_idx) for name in name_list]) @staticmethod def _reconstruct_matrix(data_list): """Reconstructs a matrix from a list containing sparse matrix extracted properties `data_list` needs to be formatted as the first result of :func:`~pypet.parameter.SparseParameter._serialize_matrix` """ matrix_format = data_list[0] data = data_list[1] is_empty = isinstance(data, str) and data == '__empty__' if matrix_format == 'csc': if is_empty: return spsp.csc_matrix(data_list[4]) else: return spsp.csc_matrix(tuple(data_list[1:4]), shape=data_list[4]) elif matrix_format == 'csr': if is_empty: return spsp.csr_matrix(data_list[4]) else: return spsp.csr_matrix(tuple(data_list[1:4]), shape=data_list[4]) elif matrix_format == 'bsr': if is_empty: # We have an empty matrix, that cannot be build as in elee case return spsp.bsr_matrix(data_list[4]) else: return spsp.bsr_matrix(tuple(data_list[1:4]), shape=data_list[4]) elif matrix_format == 'dia': if is_empty: return spsp.dia_matrix(data_list[3]) else: return spsp.dia_matrix(tuple(data_list[1:3]), shape=data_list[3]) else: raise RuntimeError('You shall not pass!') def _load(self, load_dict): """Reconstructs the data and exploration array Checks if it can find the array identifier in the `load_dict`, i.e. '__spsp__'. If not, calls :class:`~pypet.parameter.ArrayParameter._load` of the parent class. If the parameter is explored, the exploration range of matrices is reconstructed as it was stored in :func:`~pypet.parameter.SparseParameter._store`. """ if self.v_locked: raise pex.ParameterLockedException('Parameter `%s` is locked!' % self.v_full_name) try: is_dia = load_dict['data%sis_dia' % SparseParameter.IDENTIFIER] name_list = self._get_name_list(is_dia) rename_list = ['data%s%s' % (SparseParameter.IDENTIFIER, name) for name in name_list] data_list = [load_dict[name] for name in rename_list] self._data = self._reconstruct_matrix(data_list) if 'explored_data' + SparseParameter.IDENTIFIER in load_dict: explore_table = load_dict['explored_data' + SparseParameter.IDENTIFIER] idx_col = explore_table['idx'] dia_col = explore_table['is_dia'] explore_list = [] for irun, name_id in enumerate(idx_col): is_dia = dia_col[irun] # To make everything work with the old format we have the try catch block try: name_list = self._build_names(name_id, is_dia) data_list = [load_dict[name] for name in name_list] except KeyError: name_list = self._build_names_old(name_id, is_dia) data_list = [load_dict[name] for name in name_list] matrix = self._reconstruct_matrix(data_list) explore_list.append(matrix) self._explored_range = explore_list self._explored = True except KeyError: super(SparseParameter, self)._load(load_dict) self._default = self._data self._locked = True
[docs]class PickleParameter(Parameter): """A parameter class that supports all picklable objects, and pickles everything! If you use the default HDF5 storage service, the pickle dumps are stored to disk. Works similar to the array parameter regarding memory management (Equality of objects is based on object id). There is no straightforward check to guarantee that data is picklable, so you have to take care that all data handled by the PickleParameter supports pickling. You can pass the pickle protocol via `protocol=2` to the constructor or change it with the `v_protocol` property. Default protocol is 0. Note that after storage to disk changing the protocol has no effect. If the parameter is loaded, `v_protocol` is set to the protocol used to store the data. """ PROTOCOL = '__pckl_prtcl__' __slots__ = ('_protocol',) def __init__(self, full_name, data=None, comment='', protocol=2): super(PickleParameter, self).__init__(full_name, data, comment) self._protocol = None self.v_protocol = protocol @property def v_protocol(self): """ The protocol used to pickle data, default is 0. See pickle_ documentation for the protocols. .. _pickle: http://docs.python.org/2/library/pickle.html """ return self._protocol @v_protocol.setter def v_protocol(self, value): """Sets the protocol""" self._protocol = value
[docs] def f_supports(self, data): """There is no straightforward check if an object can be pickled and this function will always return `True`. So you have to take care in advance that the item can be pickled. """ return True
@staticmethod def _build_name(name_id): """Formats names for storage Explored data is stored as 'xp_XXXXXXXX' where 'XXXXXXXX' is the index of the object. """ return 'xp_%08d' % name_id def _store(self): """Returns a dictionary for storage. Every element in the dictionary except for 'explored_data' is a pickle dump. Reusage of objects is identified over the object id, i.e. python's built-in id function. 'explored_data' contains the references to the objects to be able to recall the order of objects later on. """ store_dict = {} if self._data is not None: dump = pickle.dumps(self._data, protocol=self.v_protocol) store_dict['data'] = dump store_dict[PickleParameter.PROTOCOL] = self.v_protocol if self.f_has_range(): store_dict['explored_data'] = \ ObjectTable(columns=['idx'], index=list(range(len(self)))) smart_dict = {} count = 0 for idx, val in enumerate(self._explored_range): obj_id = id(val) if obj_id in smart_dict: name_id = smart_dict[obj_id] add = False else: name_id = count add = True name = self._build_name(name_id) store_dict['explored_data']['idx'][idx] = name_id if add: store_dict[name] = pickle.dumps(val, protocol=self.v_protocol) smart_dict[obj_id] = name_id count += 1 self._locked = True return store_dict @staticmethod def _get_protocol(dump): pops = pickletools.genops(dump) proto = 2 if next(pops)[0].proto == 2 else int(any(op.proto for op, fst, snd in pops)) return proto def _load(self, load_dict): """Reconstructs objects from the pickle dumps in `load_dict`. The 'explored_data' entry in `load_dict` is used to reconstruct the exploration range in the correct order. Sets the `v_protocol` property to the protocol used to store 'data'. """ if self.v_locked: raise pex.ParameterLockedException('Parameter `%s` is locked!' % self.v_full_name) if 'data' in load_dict: dump = load_dict['data'] self._data = pickle.loads(dump) else: self._logger.warning('Your parameter `%s` is empty, ' 'I did not find any data on disk.' % self.v_full_name) try: self.v_protocol = load_dict[PickleParameter.PROTOCOL] except KeyError: # For backwards compatibility self.v_protocol = PickleParameter._get_protocol(dump) if 'explored_data' in load_dict: explore_table = load_dict['explored_data'] name_col = explore_table['idx'] explore_list = [] for name_id in name_col: arrayname = self._build_name(name_id) loaded = pickle.loads(load_dict[arrayname]) explore_list.append(loaded) self._explored_range = explore_list self._explored = True self._default = self._data self._locked = True
[docs]class BaseResult(NNLeafNode): """Abstract base API for results. Compared to parameters (see :class:`~pypet.parameter.BaseParameter`) results are also initialised with a full name and a comment. Yet, results can contain more than a single value and heterogeneous data. """ __slots__ = ()
[docs] def __init__(self, full_name, comment=''): super(BaseResult, self).__init__(full_name, comment, is_parameter=False)
[docs]class Result(BaseResult): """Light Container that stores basic python and numpy data. Note that no sanity checks on individual data is made (only on outer data structure) and you have to take care, that your data is understood by the storage service. It is assumed that results tend to be large and therefore sanity checks would be too expensive. Data that can safely be stored into a Result are: * python natives (int, long, str, bool, float, complex), * numpy natives, arrays and matrices of type np.int8-64, np.uint8-64, np.float32-64, np.complex, np.str * python lists and tuples of the previous types (python natives + numpy natives and arrays) Lists and tuples are not allowed to be nested and must be homogeneous, i.e. only contain data of one particular type. Only integers, or only floats, etc. * python dictionaries of the previous types (not nested!), data can be heterogeneous, keys must be strings. For example, one key-value pair of string and int and one key-value pair of string and float, and so on. * pandas DataFrames_ * :class:`~pypet.parameter.ObjectTable` .. _DataFrames: http://pandas.pydata.org/pandas-docs/dev/dsintro.html#dataframe Note that containers should NOT be empty (like empty dicts or lists) at the time they are saved to disk. The standard HDF5 storage service cannot store empty containers! The Result emits a warning if you hand over an empty container. Data is set on initialisation or with :func:`~pypet.parameter.Result.f_set` Example usage: >>> res = Result('supergroup.subgroup.myresult', comment='I am a neat example!' \ [1000,2000], {'a':'b','c':333}, hitchhiker='Arthur Dent') In case you create a new result you can pass the following arguments: :param fullanme: The fullname of the result, grouping can be achieved by colons, :param comment: A useful comment describing the result. The comment can later on be changed using the `v_comment` variable >>> param.v_comment 'I am a neat example!' :param args: Data that is handled by the result. The first positional argument is stored with the name of the result. Following arguments are stored with `name_X` where `X` is the position of the argument. :param kwargs: Data that is handled by the result, it is kept by the result under the names specified by the keys of kwargs. >>> res.f_get(0) [1000,2000] >>> res.f_get(1) {'a':'b','c':'d'} >>> res.f_get('myresult') [1000,2000] >>> res.f_get('hitchhiker') 'ArthurDent' >>> res.f_get('myresult','hitchhiker') ([1000,2000], 'ArthurDent') Can be changed or more can be added via :func:`~pypet.parameter.Result.f_set` >>> result.f_set('Uno',x='y') >>> result.f_get(0) 'Uno' >>> result.f_get('x') 'y' Alternative method to put and retrieve data from the result container is via ``__getattr__`` and ``__setattr__``: >>> res.ford = 'prefect' >>> res.ford 'prefect' :raises: TypeError: If the data format in args or kwargs is not known to the result. Checks type of outer data structure, i.e. checks if you have a list or dictionary. But it does not check on individual values within dicts or lists. """ __slots__ = ('_data_',) SUPPORTED_DATA = set((np.ndarray, ObjectTable, DataFrame, Series, dict, tuple, list, np.matrix) + pypetconstants.PARAMETER_SUPPORTED_DATA) def __init__(self, full_name, *args, **kwargs): comment = kwargs.pop('comment', '') super(Result, self).__init__(full_name, comment) self._data_ = None self._set_logger() self.f_set(*args, **kwargs) @property def _data(self): """To avoid the overhead of producing an empty dictionary""" if self._data_ is None: self._data_ = {} return self._data_ def __dir__(self): """Adds all data to auto-completion""" result = super(Result, self).__dir__() if self._data_ is not None: result.extend(self._data.keys()) return result
[docs] def f_translate_key(self, key): """Translates integer indices into the appropriate names""" if isinstance(key, int): if key == 0: key = self.v_name else: key = self.v_name + '_%d' % key return key
def __contains__(self, key): key = self.f_translate_key(key) return key in self._data
[docs] def f_val_to_str(self): """Summarizes data handled by the result as a string. Calls `__repr__` on all handled data. Data is NOT ordered. Truncates the string if it is longer than :const:`pypetconstants.HDF5_STRCOL_MAX_VALUE_LENGTH` :return: string """ resstrlist = [] strlen = 0 for key in self._data: val = self._data[key] resstr = '%s=%s, ' % (key, repr(val)) resstrlist.append(resstr) strlen += len(resstr) if strlen > pypetconstants.HDF5_STRCOL_MAX_VALUE_LENGTH: break return_string = "".join(resstrlist) if len(return_string) > pypetconstants.HDF5_STRCOL_MAX_VALUE_LENGTH: return_string =\ return_string[0:pypetconstants.HDF5_STRCOL_MAX_VALUE_LENGTH - 3] + '...' else: return_string = return_string[0:-2] # Delete the last `, ` return return_string
def __repr__(self): return '<%s>' % self.__str__() def __str__(self): """String representation of the result. Output format is '<class_name> name (`comment`): value_string' The `value_string` is obtained from :func:`~pypet.parameter.Result.f_val_to_str`. If the comment is the empty string, the comment is omitted. """ datastr = self.f_val_to_str() return_string = '%s %s' % (self.f_get_class_name(), self.v_full_name) if self.v_comment: return_string += ' (`%s`)' % self.v_comment if datastr: return_string += ': ' + datastr return return_string
[docs] def f_to_dict(self, copy=True): """Returns all handled data as a dictionary. :param copy: Whether the original dictionary or a shallow copy is returned. :return: Data dictionary """ if copy: return self._data.copy() else: return self._data
[docs] def f_is_empty(self): """True if no data has been put into the result. Also True if all data has been erased via :func:`~pypet.parameter.Result.f_empty`. """ return len(self._data) == 0
[docs] @copydoc(BaseResult.f_empty) def f_empty(self): self._data_ = None
[docs] def f_set(self, *args, **kwargs): """ Method to put data into the result. :param args: The first positional argument is stored with the name of the result. Following arguments are stored with `name_X` where `X` is the position of the argument. :param kwargs: Arguments are stored with the key as name. :raises: TypeError if outer data structure is not understood. Example usage: >>> res = Result('supergroup.subgroup.myresult', comment='I am a neat example!') >>> res.f_set(333,42.0, mystring='String!') >>> res.f_get('myresult') 333 >>> res.f_get('myresult_1') 42.0 >>> res.f_get(1) 42.0 >>> res.f_get('mystring') 'String!' """ if args and self.v_name is None: raise AttributeError('Cannot set positional value because I do not have a name!') for idx, arg in enumerate(args): valstr = self.f_translate_key(idx) self.f_set_single(valstr, arg) for key, arg in kwargs.items(): self.f_set_single(key, arg)
def __getitem__(self, name): """ Equivalent to calling `f_get`""" return self.f_get(name) def __setitem__(self, key, value): """Almost equivalent to calling __setattr__. Treats integer values as `f_get`. """ key = self.f_translate_key(key) setattr(self, key, value) def __iter__(self): """Equivalent to iterating over the keys of the data dictionary.""" return self._data.__iter__()
[docs] def f_get(self, *args): """Returns items handled by the result. If only a single name is given, a single data item is returned. If several names are given, a list is returned. For integer inputs the result returns `resultname_X`. If the result contains only a single entry you can call `f_get()` without arguments. If you call `f_get()` and the result contains more than one element a ValueError is thrown. If the requested item(s) cannot be found an AttributeError is thrown. :param args: strings-names or integers :return: Single data item or tuple of data Example: >>> res = Result('supergroup.subgroup.myresult', comment='I am a neat example!' \ [1000,2000], {'a':'b','c':333}, hitchhiker='Arthur Dent') >>> res.f_get('hitchhiker') 'Arthur Dent' >>> res.f_get(0) [1000,2000] >>> res.f_get('hitchhiker', 'myresult') ('Arthur Dent', [1000,2000]) """ if len(args) == 0: if len(self._data) == 1: return list(self._data.values())[0] elif len(self._data) > 1: raise ValueError('Your result `%s` contains more than one entry: ' '`%s` Please use >>f_get<< with one of these.' % (self.v_full_name, str(list(self._data.keys())))) else: raise AttributeError('Your result `%s` is empty, cannot access data.' % self.v_full_name) result_list = [] for name in args: name = self.f_translate_key(name) if not name in self._data: if name == 'data' and len(self._data) == 1: return self._data[list(self._data.keys())[0]] else: raise AttributeError('`%s` is not part of your result `%s`.' % (name, self.v_full_name)) result_list.append(self._data[name]) if len(args) == 1: return result_list[0] else: return result_list
[docs] def f_set_single(self, name, item): """Sets a single data item of the result. Raises TypeError if the type of the outer data structure is not understood. Note that the type check is shallow. For example, if the data item is a list, the individual list elements are NOT checked whether their types are appropriate. :param name: The name of the data item :param item: The data item :raises: TypeError Example usage: >>> res.f_set_single('answer', 42) >>> res.f_get('answer') 42 """ if self.v_stored: self._logger.debug('You are changing an already stored result. If ' 'you not explicitly overwrite the data on disk, this change ' 'might be lost and not propagated to disk.') if self._supports(item): # self._check_if_empty(item, name) # No longer needed if name in self._data: self._logger.debug('Replacing `%s` in result `%s`.' % (name, self.v_full_name)) self._data[name] = item else: raise TypeError('Your result `%s` of type `%s` is not supported.' % (name, str(type(item))))
def _supports(self, item): """Checks if outer data structure is supported.""" return type(item) in Result.SUPPORTED_DATA
[docs] def f_supports_fast_access(self): """Whether or not the result supports fast access. A result supports fast access if it contains exactly one item with the name of the result. """ return len(self._data) == 1 and self.v_name in self._data
def _store(self): """Returns a storage dictionary understood by the storage service. Simply returns a shallow copy of its own data dictionary. """ store_dict = {} store_dict.update(self._data) return store_dict def _load(self, load_dict): """Loads data from load_dict""" self._data_ = load_dict
[docs] def f_remove(self, *args): """Removes `*args` from the result""" for arg in args: arg = self.f_translate_key(arg) if arg in self._data: del self._data[arg] else: raise AttributeError('Your result `%s` does not contain %s.' % (self.name_, arg))
def __delitem__(self, key): """ Deletes an item, see also __delattr__""" self.f_remove(key) def __delattr__(self, item): """ Deletes an item from the result. If the item has been stored to disk before with a storage service, this storage is not deleted! :param item: Name of item to delete :raises: AttributeError if the item does not exist Example usage: >>> res = Result('Iam.an.example', comment='And a neat one, indeed!', fortytwo=42) >>> 'fortytwo' in res True >>> del res.fortytwo >>> 'fortytwo' in res False """ if item[0] == '_': super(Result, self).__delattr__(item) else: self.f_remove(item) def __setattr__(self, key, value): if key[0] == '_': # We set a private attribute super(Result, self).__setattr__(key, value) elif hasattr(self.__class__, key): # Work around for python properties python_property = getattr(self.__class__, key) if python_property.fset is None: raise AttributeError('%s is read only!' % key) else: python_property.fset(self, value) else: self.f_set_single(key, value) # @no_prefix_getattr def __getattr__(self, name): return self.f_get(name)
[docs]class SparseResult(Result): """Handles Scipy sparse matrices. Supported Formats are csr, csc, bsr, and dia. Subclasses the standard result and can also handle all data supported by :class:`~pypet.parameter.Result`. """ IDENTIFIER = SparseParameter.IDENTIFIER """Identifier string to label sparse matrix data""" __slots__ = ()
[docs] @copydoc(Result.f_set_single) def f_set_single(self, name, item): if SparseResult.IDENTIFIER in name: raise AttributeError('Your result name contains the identifier for sparse matrices,' ' please do not use %s in your result names.' % SparseResult.IDENTIFIER) else: super(SparseResult, self).f_set_single(name, item)
def _supports(self, item): """Supports everything of parent class and csr, csc, bsr, and dia sparse matrices.""" if SparseParameter._is_supported_matrix(item): return True else: return super(SparseResult, self)._supports(item) # @copydoc(Result._check_if_empty) # def _check_if_empty(self, item, name): # if SparseParameter._is_supported_matrix(item): # if item.getnnz() == 0: # self._logger.warning('The Item `%s` is empty.' % name) # else: # super(SparseResult, self)._check_if_empty(item, name) def _store(self): """Returns a storage dictionary understood by the storage service. Sparse matrices are extracted similar to the :class:`~pypet.parameter.SparseParameter` and marked with the identifier `__spsp__`. """ store_dict = {} for key in self._data: val = self._data[key] if SparseParameter._is_supported_matrix(val): data_list, name_list, hash_tuple = SparseParameter._serialize_matrix(val) rename_list = ['%s%s%s' % (key, SparseParameter.IDENTIFIER, name) for name in name_list] is_dia = int(len(rename_list) == 4) store_dict[key + SparseResult.IDENTIFIER + 'is_dia'] = is_dia for idx, name in enumerate(rename_list): store_dict[name] = data_list[idx] else: store_dict[key] = val return store_dict def _load(self, load_dict): """Loads data from `load_dict` Reconstruction of sparse matrices similar to the :class:`~pypet.parameter.SparseParameter`. """ for key in list(load_dict.keys()): # We delete keys over time: if key in load_dict: if SparseResult.IDENTIFIER in key: new_key = key.split(SparseResult.IDENTIFIER)[0] is_dia = load_dict.pop(new_key + SparseResult.IDENTIFIER + 'is_dia') name_list = SparseParameter._get_name_list(is_dia) rename_list = ['%s%s%s' % (new_key, SparseResult.IDENTIFIER, name) for name in name_list] data_list = [load_dict.pop(name) for name in rename_list] matrix = SparseParameter._reconstruct_matrix(data_list) self._data[new_key] = matrix else: self._data[key] = load_dict[key]
[docs]class PickleResult(Result): """ Result that digest everything and simply pickles it! Note that it is not checked whether data can be pickled, so take care that it works! You can pass the pickle protocol via `protocol=2` to the constructor or change it with the `v_protocol` property. Default protocol is 0. Note that after storage to disk changing the protocol has no effect. If the parameter is loaded, `v_protocol` is set to a protocol used to store an item. Note that items are reconstructed from a dictionary and the protocol is taken from the first one found in the dictionary. This is a rather arbitrary choice. Yet, the underlying assumption is that all items were pickled with the same protocol, which is the general case. """ PROTOCOL = PickleParameter.PROTOCOL __slots__ = ('_protocol',) def __init__(self, full_name, *args, **kwargs): self._protocol = None protocol = kwargs.pop('protocol', 0) self.v_protocol = protocol super(PickleResult, self).__init__(full_name, *args, **kwargs) @property def v_protocol(self): """ The protocol used to pickle data, default is 0. See pickle_ documentation for the protocols. .. _pickle: http://docs.python.org/2/library/pickle.html """ return self._protocol @v_protocol.setter def v_protocol(self, value): """Sets the protocol""" self._protocol = value
[docs] def f_set_single(self, name, item): """Adds a single data item to the pickle result. Note that it is NOT checked if the item can be pickled! """ if self.v_stored: self._logger.debug('You are changing an already stored result. If ' 'you not explicitly overwrite the data on disk, this change ' 'might be lost and not propagated to disk.') if name == PickleResult.PROTOCOL: raise AttributeError('You cannot name an entry `%s`' % PickleResult.PROTOCOL) self._data[name] = item
def _store(self): """Returns a dictionary containing pickle dumps""" store_dict = {} for key, val in self._data.items(): store_dict[key] = pickle.dumps(val, protocol=self.v_protocol) store_dict[PickleResult.PROTOCOL] = self.v_protocol return store_dict def _load(self, load_dict): """Reconstructs all items from the pickle dumps in `load_dict`. Sets the `v_protocol` property to the protocol of the first reconstructed item. """ try: self.v_protocol = load_dict.pop(PickleParameter.PROTOCOL) except KeyError: # For backwards compatibility dump = next(load_dict.values()) self.v_protocol = PickleParameter._get_protocol(dump) for key in load_dict: val = load_dict[key] self._data[key] = pickle.loads(val)