"""Handle information of an experiment of the doce module."""
import types
import inspect
import os
import time
import datetime
import ast
import glob
import copy
import numpy as np
import doce.util as eu
import doce
[docs]class Experiment():
"""Stores high level information about the experiment and tools
to control the processing and storage of data.
The experiment class displays high level information about the experiment
such as its name, description, author, author's email address, and run identification.
Information about storage of data is specified using the experiment.path name_space.
It also stores one or several Plan objects and a Metric object to respectively specify
the experimental plans and the metrics considered in the experiment.
See Also
--------
doce.Plan, doce.metric.Metric
Examples
--------
>>> import doce
>>> e=doce.Experiment()
>>> e.name='my_experiment'
>>> e.author='John Doe'
>>> e.address='john.doe@no-log.org'
>>> e.path.processing='/tmp'
>>> print(e)
name: my_experiment
description
author: John Doe
address: john.doe@no-log.org
version: 0.1
status:
run_id: ...
verbose: 0
selector: []
parameter
metric
path:
code_raw: ...
code: ...
archive_raw:
archive:
export_raw: export
export: export
processing_raw: /tmp
processing: /tmp
host: []
Each level can be complemented with new members to store specific information:
>>> e.specific_info = 'stuff'
>>> import types
>>> e.my_data = types.SimpleNamespace()
>>> e.my_data.info1= 1
>>> e.my_data.info2= 2
>>> print(e)
name: my_experiment
description
author: John Doe
address: john.doe@no-log.org
version: 0.1
status:
run_id: ...
verbose: 0
selector: []
parameter
metric
path:
code_raw: ...
code: ...
archive_raw:
archive:
export_raw: export
export: export
processing_raw: /tmp
processing: /tmp
host: []
specific_info: stuff
my_data:
info1: 1
info2: 2
"""
def __init__(
self, **description
):
# list of attributes
self._atrs = []
self._plan = doce.Plan('test')
self._plans = []
self.name = ''
self.description = ''
self.author = 'no name'
self.address = 'noname@noorg.org'
self.version = '0.1'
self.status = types.SimpleNamespace()
self.status.run_id = str(
int((time.time()-datetime.datetime(2020,1,1,0,0).timestamp())/60)
)
self.status.verbose = 0
self.selector = []
self.parameter = types.SimpleNamespace()
self.metric = doce.Metric()
self.path = Path()
self.path.code = os.getcwd()
self.path.archive = ''
self.path.export = 'export'
self._doce_paths = ['export', 'export_raw', 'archive', 'archive_raw', 'code', 'code_raw']
self.host = []
self._archive_path = ''
self._gmail_id = 'expcode.mailer'
self._gmail_app_password = 'tagsqtlirkznoxro'
self._default_server_run_argument = {}
self._resume = False
self._check_setting_length = True
self._display = types.SimpleNamespace()
self._display.export_png = 'wkhtmltoimage' # could be 'chrome' or 'matplotlib'
self._display.export_pdf = 'wkhtmltopdf' # could be 'chrome' or 'latex'
self._display.factor_format_in_reduce = 'long'
self._display.metric_format_in_reduce = 'long'
self._display.metric_precision = 2
self._display.factor_format_in_reduce_length = 2
self._display.metric_format_in_reduce_length = 2
self._display.show_row_index = True
self._display.highlight = True
self._display.bar = False
self._display.pValue = 0.05
for field, value in description.items():
self.__setattr__(field, value)
self.__setattr__('metric', doce.Metric())
def __setattr__(
self,
name,
value
):
if not hasattr(self, name) and name[0] != '_':
self._atrs.append(name)
return object.__setattr__(self, name, value)
[docs] def set_path(
self,
name,
path,
force=False
):
"""Create directories whose path described in experiment.path are not reachable.
For each path set in experiment.path, create the directory if not reachable.
The user may be prompted before creation.
Parameters
----------
force : bool
If True, do not prompt the user before creating the missing directories.
If False, prompt the user before creation of each missing directory (default).
Examples
--------
>>> import doce
>>> import os
>>> e=doce.Experiment()
>>> e.name = 'experiment'
>>> e.set_path('processing', f'/tmp/{e.name}/processing', force=True)
>>> e.set_path('output', f'/tmp/{e.name}/output', force=True)
>>> os.listdir(f'/tmp/{e.name}')
['processing', 'output']
"""
# for sns in self.__getattribute__('path').__dict__.keys():
self.path.__setattr__(name, path)
path = os.path.abspath(os.path.expanduser(path))
if path:
if path.endswith('.h5'):
path = os.path.dirname(os.path.abspath(path))
else:
if not path.endswith('/'):
if not path.endswith('\\'):
if '\\' in path:
path = f'{path}\\'
self.path.__setattr__(name, path)
else:
path = f'{path}/'
self.path.__setattr__(name, path)
if not os.path.exists(path):
message = f'''The {name} path: {path} does not exist. \
Do you want to create it ?'''
if force or doce.util.query_yes_no(message):
os.makedirs(path)
if not force:
print('Path succesfully created.')
[docs] def __str__(
self,
style='str'
):
"""Provide a textual description of the experiment
List all members of the class and theirs values
parameters
----------
style : str
If 'str', return the description as a string.
If 'html', return the description with an html format.
Returns
-------
description : str
If style == 'str' : a carriage return separated enumeration
of the members of the class experiment.
If style == 'html' : an html version of the description
Examples
--------
>>> import doce
>>> print(doce.Experiment())
name
description
author: no name
address: noname@noorg.org
version: 0.1
status:
run_id: ...
verbose: 0
selector: []
parameter
metric
path:
code_raw: ...
code: ...
archive_raw:
archive:
export_raw: export
export: export
host: []
>>> import doce
>>> doce.Experiment().__str__(style='html')
'<div>name</div><div>description</div><div>author: no name</div><div>address: noname@noorg.org</div><div>version: 0.1</div><div>status:</div><div> run_id: ...</div><div> verbose: 0</div><div>selector: []</div><div>parameter</div><div>metric</div><div>path:</div><div> code_raw: ...</div><div> code: ...</div><div> archive_raw: </div><div> archive: </div><div> export_raw: export</div><div> export: export</div><div>host: []</div><div></div>'
"""
description = ''
for atr in self._atrs:
if not isinstance(inspect.getattr_static(self, atr), types.FunctionType):
if isinstance(self.__getattribute__(atr), (types.SimpleNamespace, Path)):
description += atr
if len(self.__getattribute__(atr).__dict__.keys()):
description+=':'
description+='\r\n'
for sns in self.__getattribute__(atr).__dict__.keys():
description+=f' {sns}: {str(self.__getattribute__(atr).__getattribute__(sns))}\r\n'
elif isinstance(self.__getattribute__(atr), (str, list)):
description+=atr
if str(self.__getattribute__(atr)):
description += f': {str(self.__getattribute__(atr))}'
description += '\r\n'
else:
description+=atr
if str(self.__getattribute__(atr)):
description += f': \r\n{str(self.__getattribute__(atr))}'
description += '\r\n'
if style == 'html':
desc = description.replace('\r\n', '</div><div>').replace('\t', ' ')
description = f'<div>{desc}</div>'
return description
[docs] def send_mail(
self,
title='',
body=''):
"""Send an email to the email address given in experiment.address.
Send an email to the experiment.address email address using the smtp service from gmail.
For privacy, please consider using a dedicated gmail account
by setting experiment._gmail_id and experiment._gmail_app_password.
For this, you will need to create a gmail account, set two-step validation
and allow connection with app password.
See https://support.google.com/accounts/answer/185833?hl=en for reference.
Parameters
----------
title : str
the title of the email in plain text format
body : str
the body of the email in html format
Examples
--------
>>> import doce
>>> e=doce.Experiment()
>>> e.address = 'john.doe@no-log.org'
>>> e.send_mail('hello', '<div> good day </div>')
Sent message entitled: [doce] id ... hello ...
"""
import smtplib
header = f'''From: doce mailer <{self._gmail_id}@gmail.com> \r\nTo: {self.author} {self.address}\r\nMIME-Version: 1.0 \r\nContent-type: text/html \r\nSubject: [doce] {self.name} id {self.status.run_id} {title}\r\n'''
server = smtplib.SMTP('smtp.gmail.com', 587)
server.starttls()
server.login(f'{self._gmail_id}@gmail.com', self._gmail_app_password)
exp_desc = self.__str__(style = 'html')
server.sendmail(self._gmail_id, self.address, f'{header}{body}<h3> {exp_desc}</h3>')
server.quit()
print(f'''Sent message entitled: [doce] {self.name} id {self.status.run_id} \
{title} at {time.ctime(time.time())}''')
def select(self, selector, show=False, plan_order_factor=None):
experiment_id = 'all'
if '/' in selector:
selector_split = selector.split('/')
experiment_id = selector_split[0]
if len(selector_split)>1:
selector = selector_split[1]
try:
selector = ast.literal_eval(selector)
except:
pass
else:
selector = ''
self.selector = selector
plans = self.plans()
if len(plans)==1:
self._plan = getattr(self, plans[0])
else:
if experiment_id == 'all':
o_plans = []
for plan in plans:
if show:
print(f'Plan {plan}:')
print(getattr(self, plan).as_panda_frame())
o_plans.append(getattr(self, plan))
self._plan = self._plan.merge(o_plans)
if show and len(plans)>1:
print('Those plans can be selected using the selector parameter.')
print('Otherwise the merged plan is considered: ')
else:
if experiment_id.isnumeric():
experiment_id = plans[int(experiment_id)]
print(f'Plan {experiment_id} is selected')
self._plan = getattr(self, experiment_id)
self._plan.check()
if plan_order_factor:
self._plan = self._plan.order_factor(plan_order_factor)
if show:
print(self._plan.as_panda_frame())
if self._check_setting_length:
self._plan.check_length()
return self._plan.select(selector)
[docs] def clean_data_sink(
self,
path,
selector=None,
reverse=False,
force=False,
keep=False,
wildcard='*',
setting_encoding=None,
archive_path = None,
verbose=0
):
r""" Perform a cleaning of a data sink (directory or h5 file).
This method is essentially a wrapper to :meth:`doce._plan.clean_data_sink`.
Parameters
----------
path : str
If has a / or \\\, a valid path to a directory or .h5 file.
If has no / or \\\, a member of the name_space self.path.
selector : a list of literals or a list of lists of literals (optional)
:term:`selector` used to specify the :term:`settings<setting>` set
reverse : bool (optional)
If False, remove any entry corresponding to the setting set (default).
If True, remove all entries except the ones corresponding to the setting set.
force: bool (optional)
If False, prompt the user before modifying the data sink (default).
If True, do not prompt the user before modifying the data sink.
wildcard : str (optional)
end of the wildcard used to select the entries to remove or to keep (default: '*').
setting_encoding : dict (optional)
format of the identifier describing the :term:`setting`.
Please refer to :meth:`doce.Plan.identifier` for further information.
archive_path : str (optional)
If not None, specify an existing directory where the specified data will be moved.
If None, the path doce.Experiment._archive_path is used (default).
See Also
--------
doce._plan.clean_data_sink, doce.Plan.id
Examples
--------
>>> import doce
>>> import numpy as np
>>> import os
>>> e=doce.Experiment()
>>> e.set_path('output', '/tmp/test', force=True)
>>> e.add_plan('plan', factor1=[1, 3], factor2=[2, 4])
>>> def my_function(setting, experiment):
... np.save(f'{experiment.path.output}{setting.identifier()}_sum.npy', setting.factor1+setting.factor2)
... np.save(f'{experiment.path.output}{setting.identifier()}_mult.npy', setting.factor1*setting.factor2)
>>> nb_failed = e.perform([], my_function, progress='')
>>> os.listdir(e.path.output)
['factor1=1+factor2=4_mult.npy', 'factor1=1+factor2=4_sum.npy', 'factor1=3+factor2=4_sum.npy', 'factor1=1+factor2=2_mult.npy', 'factor1=1+factor2=2_sum.npy', 'factor1=3+factor2=2_mult.npy', 'factor1=3+factor2=4_mult.npy', 'factor1=3+factor2=2_sum.npy']
>>> e.clean_data_sink('output', [0], force=True)
>>> os.listdir(e.path.output)
['factor1=3+factor2=4_sum.npy', 'factor1=3+factor2=2_mult.npy', 'factor1=3+factor2=4_mult.npy', 'factor1=3+factor2=2_sum.npy']
>>> e.clean_data_sink('output', [1, 1], force=True, reverse=True, wildcard='*mult*')
>>> os.listdir(e.path.output)
['factor1=3+factor2=4_sum.npy', 'factor1=3+factor2=4_mult.npy', 'factor1=3+factor2=2_sum.npy']
Here, we remove all the files that match the wildcard *mult*
in the directory /tmp/test that do not correspond to the settings
that have the first factor set to the second modality and the second factor
set to the second modality.
>>> import doce
>>> import tables as tb
>>> e=doce.Experiment()
>>> e.set_path('output', '/tmp/test.h5')
>>> e.add_plan('plan', factor1=[1, 3], factor2=[2, 4])
>>> e.set_metric(name = 'sum')
>>> e.set_metric(name = 'mult')
>>> def my_function(setting, experiment):
... h5 = tb.open_file(experiment.path.output, mode='a')
... sg = experiment.add_setting_group(
... h5, setting,
... output_dimension={'sum': 1, 'mult': 1})
... sg.sum[0] = setting.factor1+setting.factor2
... sg.mult[0] = setting.factor1*setting.factor2
... h5.close()
>>> nb_failed = e.perform([], my_function, progress='')
>>> h5 = tb.open_file(e.path.output, mode='r')
>>> print(h5)
/tmp/test.h5 (File) ''
Last modif.: '...'
Object Tree:
/ (RootGroup) ''
/factor1=1+factor2=2 (Group) 'factor1=1+factor2=2'
/factor1=1+factor2=2/mult (Array(1,)) 'mult'
/factor1=1+factor2=2/sum (Array(1,)) 'sum'
/factor1=1+factor2=4 (Group) 'factor1=1+factor2=4'
/factor1=1+factor2=4/mult (Array(1,)) 'mult'
/factor1=1+factor2=4/sum (Array(1,)) 'sum'
/factor1=3+factor2=2 (Group) 'factor1=3+factor2=2'
/factor1=3+factor2=2/mult (Array(1,)) 'mult'
/factor1=3+factor2=2/sum (Array(1,)) 'sum'
/factor1=3+factor2=4 (Group) 'factor1=3+factor2=4'
/factor1=3+factor2=4/mult (Array(1,)) 'mult'
/factor1=3+factor2=4/sum (Array(1,)) 'sum'
>>> h5.close()
>>> e.clean_data_sink('output', [0], force=True)
>>> h5 = tb.open_file(e.path.output, mode='r')
>>> print(h5)
/tmp/test.h5 (File) ''
Last modif.: '...'
Object Tree:
/ (RootGroup) ''
/factor1=3+factor2=2 (Group) 'factor1=3+factor2=2'
/factor1=3+factor2=2/mult (Array(1,)) 'mult'
/factor1=3+factor2=2/sum (Array(1,)) 'sum'
/factor1=3+factor2=4 (Group) 'factor1=3+factor2=4'
/factor1=3+factor2=4/mult (Array(1,)) 'mult'
/factor1=3+factor2=4/sum (Array(1,)) 'sum'
>>> h5.close()
>>> e.clean_data_sink('output', [1, 1], force=True, reverse=True, wildcard='*mult*')
>>> h5 = tb.open_file(e.path.output, mode='r')
>>> print(h5)
/tmp/test.h5 (File) ''
Last modif.: '...'
Object Tree:
/ (RootGroup) ''
/factor1=3+factor2=4 (Group) 'factor1=3+factor2=4'
/factor1=3+factor2=4/mult (Array(1,)) 'mult'
/factor1=3+factor2=4/sum (Array(1,)) 'sum'
>>> h5.close()
Here, the same operations are conducted on a h5 file.
"""
if '/' not in path and '\\' not in path:
path = self.__getattribute__('path').__getattribute__(path)
if path:
self._plan.select(selector).clean_data_sink(
path,
reverse=reverse,
force=force,
keep=keep,
wildcard=wildcard,
setting_encoding=setting_encoding,
archive_path=archive_path,
verbose=verbose
)
def plans(self):
# names = []
# for attribute in dir(self):
# if attribute[0] != '_' and isinstance(getattr(self, attribute), doce.Plan):
# names.append(attribute)
return self._plans
def add_plan(self, name, **kwargs):
self.__setattr__(name, doce.Plan(name, **kwargs))
self._plan = getattr(self, name)
self._plans.append(name)
def get_current_plan(self):
return self._plan
def set_metric(self,
name = None,
output = None,
func = np.mean,
path = 'output',
percent=False,
higher_the_better=False,
lower_the_better=False,
significance=False,
precision=None,
description = '',
unit = ''
):
if name is None:
raise Exception('A metric must of a name.')
if not isinstance(name, str):
raise Exception('A metric name must be a string.')
if significance and not lower_the_better and not higher_the_better:
raise Exception('Significance analysis requires either lower_the_better or higher_the_better to set be to True.')
if precision is None:
precision = self._display.metric_precision
if output is None:
output = name
self.metric.__setattr__(name, {
'name':name,
'output':output,
'path':path,
'func':func,
'percent':percent,
'higher_the_better':higher_the_better,
'lower_the_better':lower_the_better,
'significance': significance,
'precision':precision,
'description':description,
'unit':unit
})
def default(self, plan='', factor='', modality=''):
getattr(self, plan).default(factor, modality)
def skip_setting(self, setting):
if self._resume:
for path in self.__getattribute__('path').__dict__.keys():
if path.endswith('.h5'):
print('todo')
else:
if path not in self._doce_paths:
check = glob.glob(f'{self.path.__getattribute__(path)}{setting.identifier()}_*.npy')
if check:
return True
return False
[docs] def get_output(self, output='', selector=None, path='', tag='', plan=None):
""" Get the output vector from an .npy or a group of a .h5 file.
Get the output vector as a numpy array from an .npy or a group of a .h5 file.
Parameters
----------
output: str
The name of the output.
selector: list
Settings selector.
path: str
Name of path as defined in the experiment,
or a valid path to a directory in the case of .npy storage,
or a valid path to an .h5 file in the case of hdf5 storage.
plan: str
Name of plan to be considered.
Returns
-------
setting_metric: list of np.Array
stores for each valid setting an np.Array with the values of the metric selected.
setting_description: list of list of str
stores for each valid setting, a compact description of the modalities of each factors.
The factors with the same modality accross all the set of settings is stored
in constant_setting_description.
constant_setting_description: str
compact description of the factors with the same modality accross all the set of settings.
Examples
--------
>>> import doce
>>> import numpy as np
>>> import pandas as pd
>>> experiment = doce.experiment.Experiment()
>>> experiment.name = 'example'
>>> experiment.set_path('output', '/tmp/{experiment.name}/', force=True)
>>> experiment.add_plan('plan', f1 = [1, 2], f2 = [1, 2, 3])
>>> experiment.set_metric(name = 'm1_mean', output = 'm1', func = np.mean)
>>> experiment.set_metric(name = 'm1_std', output = 'm1', func = np.std)
>>> experiment.set_metric(name = 'm2_min', output = 'm2', func = np.min)
>>> experiment.set_metric(name = 'm2_argmin', output = 'm2', func = np.argmin)
>>> def process(setting, experiment):
... output1 = setting.f1+setting.f2+np.random.randn(100)
... output2 = setting.f1*setting.f2*np.random.randn(100)
... np.save(f'{experiment.path.output+setting.identifier()}_m1.npy', output1)
... np.save(f'{experiment.path.output+setting.identifier()}_m2.npy', output2)
>>> nb_failed = experiment.perform([], process, progress='')
>>> (setting_output,
... setting_description,
... constant_setting_description
... ) = experiment.get_output(output = 'm1', selector = [1], path='output')
>>> print(constant_setting_description)
f1=2
>>> print(setting_description)
['f2=1', 'f2=2', 'f2=3']
>>> print(len(setting_output))
3
>>> print(setting_output[0].shape)
(100,)
"""
if plan:
plan = getattr(self, plan)
else:
if len(self.plans()) > 1:
o_plans = []
for plan in self.plans():
o_plans.append(getattr(self, plan))
self._plan = self._plan.merge(o_plans)
plan = self._plan
if path:
if not (r'\/' in path or r'\\' in path):
path = getattr(self.path, path)
return get_from_path(
output,
settings=plan.select(selector),
path=path,
tag=tag
)
data = []
settings = []
for path_iterator in self.path.__dict__:
if not path.endswith('_raw'):
path_iterator = getattr(self.path, path_iterator)
(data_path, setting_path, header_path) = get_from_path(
output,
settings=plan.select(selector),
path=path_iterator,
tag=tag
)
if data_path:
for data_setting in data_path:
data.append(data_setting)
for setting_description in setting_path:
settings.append(setting_description)
return (data, settings, header_path)
[docs] def add_setting_group(
self, file_id,
setting,
output_dimension=None,
setting_encoding=None
):
"""adds a group to the root of a valid py_tables Object in order to
store the metrics corresponding to the specified setting.
adds a group to the root of a valid py_tables Object in order to
store the metrics corresponding to the specified setting.
The encoding of the setting is used to set the name of the group.
For each metric, a Floating point Pytable Array is created.
For any metric, if no dimension is provided in the output_dimension dict,
an expandable array is instantiated. If a dimension is available,
a static size array is instantiated.
Parameters
----------
file_id: py_tables file Object
a valid py_tables file Object, leading to an .h5 file opened with writing permission.
setting: :class:`doce.Plan`
an instantiated Factor object describing a setting.
output_dimension: dict
for metrics for which the dimensionality of the storage vector is known,
each key of the dict is a valid metric name and each corresponding value
is the size of the storage vector.
setting_encoding : dict
Encoding of the setting. See doce.Plan.id for references.
Returns
-------
setting_group: a Pytables Group
where metrics corresponding to the specified setting are stored.
Examples
--------
>>> import doce
>>> import numpy as np
>>> import tables as tb
>>> experiment = doce.experiment.Experiment()
>>> experiment.name = 'example'
>>> experiment.set_path('output', '/tmp/'+experiment.name+'.h5')
>>> experiment.add_plan('plan', f1 = [1, 2], f2 = [1, 2, 3])
>>> experiment.set_metric(name = 'm1_mean', output = 'm1', func = np.mean)
>>> experiment.set_metric(name = 'm1_std', output = 'm1', func = np.std)
>>> experiment.set_metric(name = 'm2_min', output = 'm2', func = np.min)
>>> experiment.set_metric(name = 'm2_argmin', output = 'm2', func = np.argmin)
>>> def process(setting, experiment):
... h5 = tb.open_file(experiment.path.output, mode='a')
... sg = experiment.add_setting_group(h5, setting, output_dimension = {'m1':100})
... sg.m1[:] = setting.f1+setting.f2+np.random.randn(100)
... sg.m2.append(setting.f1*setting.f2*np.random.randn(100))
... h5.close()
>>> nb_failed = experiment.perform([], process, progress='')
>>> h5 = tb.open_file(experiment.path.output, mode='r')
>>> print(h5)
/tmp/example.h5 (File) ''
Last modif.: '...'
Object Tree:
/ (RootGroup) ''
/f1=1+f2=1 (Group) 'f1=1+f2=1'
/f1=1+f2=1/m1 (Array(100,)) 'm1'
/f1=1+f2=1/m2 (EArray(100,)) 'm2'
/f1=1+f2=2 (Group) 'f1=1+f2=2'
/f1=1+f2=2/m1 (Array(100,)) 'm1'
/f1=1+f2=2/m2 (EArray(100,)) 'm2'
/f1=1+f2=3 (Group) 'f1=1+f2=3'
/f1=1+f2=3/m1 (Array(100,)) 'm1'
/f1=1+f2=3/m2 (EArray(100,)) 'm2'
/f1=2+f2=1 (Group) 'f1=2+f2=1'
/f1=2+f2=1/m1 (Array(100,)) 'm1'
/f1=2+f2=1/m2 (EArray(100,)) 'm2'
/f1=2+f2=2 (Group) 'f1=2+f2=2'
/f1=2+f2=2/m1 (Array(100,)) 'm1'
/f1=2+f2=2/m2 (EArray(100,)) 'm2'
/f1=2+f2=3 (Group) 'f1=2+f2=3'
/f1=2+f2=3/m1 (Array(100,)) 'm1'
/f1=2+f2=3/m2 (EArray(100,)) 'm2'
>>> h5.close()
"""
import tables as tb
import warnings
from tables import NaturalNameWarning
warnings.filterwarnings('ignore', category=NaturalNameWarning)
if not setting_encoding:
setting_encoding={}
# setting_encoding={'factor_separator':'_', 'modality_separator':'_'}
group_name = setting.identifier(**setting_encoding)
# print(group_name)
if not file_id.__contains__('/'+group_name):
setting_group = file_id.create_group('/', group_name, str(setting))
else:
setting_group = file_id.root._f_get_child(group_name)
for metric in self.metric.name():
output = getattr(self.metric, metric)['output']
if getattr(self.metric, metric)['description']:
description = getattr(self.metric, metric)['description']
else:
description = output
if getattr(self.metric, metric)['unit']:
description += ' in ' + getattr(self.metric, metric)['unit']
if output_dimension and output in output_dimension:
if not setting_group.__contains__(output):
file_id.create_array(
setting_group,
output,
np.zeros((output_dimension[output]))*np.nan,
description)
else:
if setting_group.__contains__(output):
setting_group._f_get_child(output)._f_remove()
file_id.create_earray(setting_group, output, tb.Float64Atom(), (0,), description)
return setting_group
[docs]def get_from_path(
metric,
settings = None,
path = '',
tag='',
setting_encoding=None,
verbose=False
):
""" Get the metric vector from an .npy or a group of a .h5 file.
Get the metric vector as a numpy array from an .npy or a group of a .h5 file.
Parameters
----------
metric: str
The name of the metric. Must be a member of the doce.metric.Metric object.
settings: doce.Plan
Iterable settings.
path: str
In the case of .npy storage, a valid path to the main directory.
In the case of .h5 storage, a valid path to an .h5 file.
setting_encoding : dict
Encoding of the setting. See doce.Plan.id for references.
verbose : bool
In the case of .npy metric storage, if verbose is set to True,
print the file_name seeked for the metric.
In the case of .h5 metric storage, if verbose is set to True,
print the group seeked for the metric.
Returns
-------
setting_metric: list of np.Array
stores for each valid setting an np.Array with the values of the metric selected.
setting_description: list of list of str
stores for each valid setting, a compact description of the modalities of each factors.
The factors with the same modality accross all the set of settings is stored in constant_setting_description.
constant_setting_description: str
compact description of the factors with the same modality accross all the set of settings.
Examples
--------
>>> import doce
>>> import numpy as np
>>> import pandas as pd
>>> experiment = doce.experiment.Experiment()
>>> experiment.name = 'example'
>>> experiment.set_path('output', f'/tmp/{experiment.name}/', force=True)
>>> experiment.add_plan('plan', f1 = [1, 2], f2 = [1, 2, 3])
>>> experiment.set_metric(name = 'm1_mean', output = 'm1', func = np.mean)
>>> experiment.set_metric(name = 'm1_std', output = 'm1', func = np.std)
>>> experiment.set_metric(name = 'm2_min', output = 'm2', func = np.min)
>>> experiment.set_metric(name = 'm2_argmin', output = 'm2', func = np.argmin)
>>> def process(setting, experiment):
... metric1 = setting.f1+setting.f2+np.random.randn(100)
... metric2 = setting.f1*setting.f2*np.random.randn(100)
... np.save(f'{experiment.path.output}{setting.identifier()}_m1.npy', metric1)
... np.save(f'{experiment.path.output}{setting.identifier()}_m2.npy', metric2)
>>> nb_failed = experiment.perform([], process, progress='')
>>> (setting_metric,
... setting_description,
... constant_setting_description) = get_from_path(
... 'm1',
... experiment._plan.select([1]),
... experiment.path.output)
>>> print(constant_setting_description)
f1=2
>>> print(setting_description)
['f2=1', 'f2=2', 'f2=3']
>>> print(len(setting_metric))
3
>>> print(setting_metric[0].shape)
(100,)
"""
import tables as tb
import warnings
from tables import NaturalNameWarning
warnings.filterwarnings('ignore', category=NaturalNameWarning)
setting_metric = []
setting_descriptions = []
if not setting_encoding:
setting_encoding = {}
setting_description_format = copy.deepcopy(setting_encoding)
setting_description_format['style'] = 'list'
setting_description_format['default'] = True
setting_description_format['sort'] = False
if isinstance(path, str):
if path.endswith('.h5'):
if tag:
path = path[:-3]+'_'+tag+'.h5'
h5_fid = tb.open_file(path, mode='r')
for setting in settings:
if h5_fid.root.__contains__(setting.identifier(**setting_encoding)):
if verbose:
print(f'Found group {setting.identifier(**setting_encoding)}')
setting_group = h5_fid.root._f_get_child(setting.identifier(**setting_encoding))
if setting_group.__contains__(metric):
setting_metric.append(np.array(setting_group._f_get_child(metric)))
setting_descriptions.append(setting.identifier(**setting_description_format))
elif verbose:
print(f'** Unable to find group {setting.identifier(**setting_encoding)}')
h5_fid.close()
else:
if tag:
path += tag+'/'
for setting in settings:
file_name = f'{path}{setting.identifier(**setting_encoding)}_{metric}.npy'
if os.path.exists(file_name):
if verbose:
print(f'Found {file_name}')
setting_metric.append(np.load(file_name))
setting_descriptions.append(setting.identifier(**setting_description_format))
elif verbose:
print(f'** Unable to find {file_name}')
(setting_descriptions, _,
constant_setting_description,
_) = eu.prune_setting_description(setting_descriptions, show_unique_setting = True)
for setting_description_index, setting_description in enumerate(setting_descriptions):
setting_descriptions[setting_description_index] = ', '.join(setting_description)
return (setting_metric, setting_descriptions, constant_setting_description)
[docs]class Path:
"""handle storage of path to disk """
def __setattr__(
self,
name,
value
):
object.__setattr__(self, f'{name}_raw', value)
object.__setattr__(
self,
name,
os.path.expanduser(value)
)
if __name__ == '__main__':
import doctest
doctest.testmod(optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE)