Source code for doce.experiment

"""Handle information of an experiment of the doce module."""

import types
import inspect
import os
import time
import datetime
import ast
import glob
import copy
import numpy as np
import doce.util as eu
import doce

[docs]class Experiment():
  """Stores high level information about the experiment and tools
  to control the processing and storage of data.

  The experiment class displays high level information about the experiment
  such as its name, description, author, author's email address, and run identification.

  Information about storage of data is specified using the experiment.path name_space.
  It also stores one or several Plan objects and a Metric object to respectively specify
  the experimental plans and the metrics considered in the experiment.

  See Also
  --------

  doce.Plan, doce.metric.Metric

  Examples
  --------

  >>> import doce
  >>> e=doce.Experiment()
  >>> e.name='my_experiment'
  >>> e.author='John Doe'
  >>> e.address='john.doe@no-log.org'
  >>> e.path.processing='/tmp'
  >>> print(e)
    name: my_experiment
    description
    author: John Doe
    address: john.doe@no-log.org
    version: 0.1
    status:
      run_id: ...
      verbose: 0
    selector: []
    parameter
    metric
    path:
      code_raw: ...
      code: ...
      archive_raw:
      archive:
      export_raw: export
      export: export
      processing_raw: /tmp
      processing: /tmp
    host: []


  Each level can be complemented with new members to store specific information:

  >>> e.specific_info = 'stuff'
  >>> import types
  >>> e.my_data = types.SimpleNamespace()
  >>> e.my_data.info1= 1
  >>> e.my_data.info2= 2
  >>> print(e)
    name: my_experiment
    description
    author: John Doe
    address: john.doe@no-log.org
    version: 0.1
    status:
      run_id: ...
      verbose: 0
    selector: []
    parameter
    metric
    path:
      code_raw: ...
      code: ...
      archive_raw:
      archive:
      export_raw: export
      export: export
      processing_raw: /tmp
      processing: /tmp
    host: []
    specific_info: stuff
    my_data:
      info1: 1
      info2: 2
  """

  def __init__(
    self, **description
    ):
    # list of attributes
    self._atrs = []
    self._plan = doce.Plan('test')
    self._plans = []
    self.name = ''
    self.description = ''
    self.author = 'no name'
    self.address = 'noname@noorg.org'
    self.version = '0.1'

    self.status = types.SimpleNamespace()
    self.status.run_id = str(
      int((time.time()-datetime.datetime(2020,1,1,0,0).timestamp())/60)
      )
    self.status.verbose = 0
    self.selector = []

    self.parameter = types.SimpleNamespace()
    self.metric = doce.Metric()
    self.path = Path()
    self.path.code = os.getcwd()
    self.path.archive = ''
    self.path.export = 'export'
    self._doce_paths = ['export', 'export_raw', 'archive', 'archive_raw', 'code', 'code_raw']
    self.host = []
    self._archive_path = ''
    self._gmail_id = 'expcode.mailer'
    self._gmail_app_password = 'tagsqtlirkznoxro'
    self._default_server_run_argument =  {}
    self._resume = False
    self._check_setting_length = True

    self._display = types.SimpleNamespace()
    self._display.export_png = 'wkhtmltoimage' # could be 'chrome' or 'matplotlib'
    self._display.export_pdf = 'wkhtmltopdf' # could be 'chrome' or 'latex'
    self._display.factor_format_in_reduce = 'long'
    self._display.metric_format_in_reduce = 'long'
    self._display.metric_precision = 2
    self._display.factor_format_in_reduce_length = 2
    self._display.metric_format_in_reduce_length = 2
    self._display.show_row_index = True
    self._display.highlight = True
    self._display.bar = False
    self._display.pValue = 0.05

    for field, value in description.items():
      self.__setattr__(field, value)

    self.__setattr__('metric', doce.Metric())

  def __setattr__(
    self,
    name,
    value
    ):
    if not hasattr(self, name) and name[0] != '_':
      self._atrs.append(name)
    return object.__setattr__(self, name, value)

[docs]  def set_path(
    self,
    name,
    path,
    force=False
    ):
    """Create directories whose path described in experiment.path are not reachable.

    For each path set in experiment.path, create the directory if not reachable.
    The user may be prompted before creation.

  	Parameters
  	----------

    force : bool
      If True, do not prompt the user before creating the missing directories.

      If False, prompt the user before creation of each missing directory (default).

    Examples
    --------

    >>> import doce
    >>> import os
    >>> e=doce.Experiment()
    >>> e.name = 'experiment'
    >>> e.set_path('processing', f'/tmp/{e.name}/processing', force=True)
    >>> e.set_path('output', f'/tmp/{e.name}/output', force=True)
    >>> os.listdir(f'/tmp/{e.name}')
    ['processing', 'output']
    """
    # for sns in self.__getattribute__('path').__dict__.keys():
    self.path.__setattr__(name, path)

    path = os.path.abspath(os.path.expanduser(path))
    if path:
      if path.endswith('.h5'):
        path = os.path.dirname(os.path.abspath(path))
      else:
        if not path.endswith('/'):
          if not path.endswith('\\'):
            if '\\' in path:
              path = f'{path}\\'
              self.path.__setattr__(name, path)
            else:
              path = f'{path}/'
              self.path.__setattr__(name, path)

      if not os.path.exists(path):
        message = f'''The {name} path: {path} does not exist. \
        Do you want to create it ?'''
        if force or doce.util.query_yes_no(message):
          os.makedirs(path)
          if not force:
            print('Path succesfully created.')

[docs]  def __str__(
    self,
    style='str'
    ):
    """Provide a textual description of the experiment

    List all members of the class and theirs values

    parameters
    ----------
    style : str
      If 'str', return the description as a string.

      If 'html', return the description with an html format.

  	Returns
  	-------
    description : str
        If style == 'str' : a carriage return separated enumeration
        of the members of the class experiment.

        If style == 'html' : an html version of the description

  	Examples
  	--------

    >>> import doce
    >>> print(doce.Experiment())
    name
    description
    author: no name
    address: noname@noorg.org
    version: 0.1
    status:
      run_id: ...
      verbose: 0
    selector: []
    parameter
    metric
    path:
      code_raw: ...
      code: ...
      archive_raw:
      archive:
      export_raw: export
      export: export
    host: []

    >>> import doce
    >>> doce.Experiment().__str__(style='html')
        '<div>name</div><div>description</div><div>author: no name</div><div>address: noname@noorg.org</div><div>version: 0.1</div><div>status:</div><div>  run_id: ...</div><div>  verbose: 0</div><div>selector: []</div><div>parameter</div><div>metric</div><div>path:</div><div>  code_raw: ...</div><div>  code: ...</div><div>  archive_raw: </div><div>  archive: </div><div>  export_raw: export</div><div>  export: export</div><div>host: []</div><div></div>'
    """
    description = ''
    for atr in self._atrs:
      if not isinstance(inspect.getattr_static(self, atr), types.FunctionType):
        if isinstance(self.__getattribute__(atr), (types.SimpleNamespace, Path)):
          description += atr
          if len(self.__getattribute__(atr).__dict__.keys()):
            description+=':'
          description+='\r\n'
          for sns in self.__getattribute__(atr).__dict__.keys():
            description+=f'  {sns}: {str(self.__getattribute__(atr).__getattribute__(sns))}\r\n'
        elif isinstance(self.__getattribute__(atr), (str, list)):
          description+=atr
          if str(self.__getattribute__(atr)):
            description += f': {str(self.__getattribute__(atr))}'
          description += '\r\n'
        else:
          description+=atr
          if str(self.__getattribute__(atr)):
            description += f': \r\n{str(self.__getattribute__(atr))}'
          description += '\r\n'
    if style == 'html':
      desc = description.replace('\r\n', '</div><div>').replace('\t', '&emsp;')
      description = f'<div>{desc}</div>'
    return description

[docs]  def send_mail(
    self,
    title='',
    body=''):
    """Send an email to the email address given in experiment.address.

    Send an email to the experiment.address email address using the smtp service from gmail.
    For privacy, please consider using a dedicated gmail account
    by setting experiment._gmail_id and experiment._gmail_app_password.
    For this, you will need to create a gmail account, set two-step validation
    and allow connection with app password.

    See https://support.google.com/accounts/answer/185833?hl=en for reference.

    Parameters
    ----------

    title : str
      the title of the email in plain text format

    body : str
      the body of the email in html format

    Examples
    --------
    >>> import doce
    >>> e=doce.Experiment()
    >>> e.address = 'john.doe@no-log.org'
    >>> e.send_mail('hello', '<div> good day </div>')
    Sent message entitled: [doce]  id ... hello ...

    """

    import smtplib

    header = f'''From: doce mailer <{self._gmail_id}@gmail.com> \r\nTo: {self.author} {self.address}\r\nMIME-Version: 1.0 \r\nContent-type: text/html \r\nSubject: [doce] {self.name} id {self.status.run_id} {title}\r\n'''

    server = smtplib.SMTP('smtp.gmail.com', 587)
    server.starttls()
    server.login(f'{self._gmail_id}@gmail.com', self._gmail_app_password)
    exp_desc = self.__str__(style = 'html')
    server.sendmail(self._gmail_id, self.address, f'{header}{body}<h3> {exp_desc}</h3>')
    server.quit()
    print(f'''Sent message entitled: [doce] {self.name} id {self.status.run_id} \
     {title} at {time.ctime(time.time())}''')

[docs]  def perform(
    self,
    selector,
    function=None,
    *parameters,
    nb_jobs=1,
    progress='d',
    log_file_name='',
    mail_interval=0,
    tag=''
    ):
    r"""Operate the function with parameters on the :term:`settings<setting>` set
    generated using :term:`selector`.

    Operate a given function on the setting set generated using selector.
    The setting set can be browsed in parallel by setting nb_jobs>1.
    If log_file_name is not empty, a faulty setting do not stop the execution,
    the error is stored and another setting is executed. If progress is set to True,
    a graphical display of the progress through the setting set is displayed.

    This function is essentially a wrapper to the function :meth:`doce.Plan.do`.

    Parameters
    ----------

    selector : a list of literals or a list of lists of literals
      :term:`selector` used to specify the :term:`settings<setting>` set

    function : function(:class:`~doce.Plan`, :class:`~doce.Experiment`, \*parameters) (optional)
      A function that operates on a given setting within the experiment environnment
      with optional parameters.

      If None, a description of the given setting is shown.

    *parameters : any type (optional)
      parameters to be given to the function.

    nb_jobs : int > 0 (optional)
      number of jobs.

      If nb_jobs = 1, the setting set is browsed sequentially in a depth first
      traversal of the settings tree (default).

      If nb_jobs > 1, the settings set is browsed randomly,
      and settings are distributed over the different processes.

    progress : str (optional)
      display progress of scheduling the setting set.

      If str has an m, show the selector of the current setting.
      If str has an d, show a textual description of the current setting (default).

    log_file_name : str (optional)
      path to a file where potential errors will be logged.

      If empty, the execution is stopped on the first faulty setting (default).

      If not empty, the execution is not stopped on a faulty setting,
      and the error is logged in the log_file_name file.

    mail_interval : float (optional)
      interval for sending email about the status of the run.

      If 0, no email is sent (default).

      It >0, an email is sent as soon as an setting is done and the difference
      between the current time and the time the last mail was sent is larger than mail_interval.
    
    tag : string (optional)
      specify a tag to be added to the output names

    See Also
    --------

    doce.Plan.do

    Examples
    --------

    >>> import time
    >>> import random
    >>> import doce

    >>> e=doce.Experiment()
    >>> e.add_plan('plan', factor1=[1, 3], factor2=[2, 5])

    >>> # this function displays the sum of the two modalities of the current setting
    >>> def my_function(setting, experiment):
    ...  print(f'{setting.factor1}+{setting.factor2}={setting.factor1+setting.factor2}')

    >>> # sequential execution of settings
    >>> nb_failed = e.perform([], my_function, nb_jobs=1, progress='')
    1+2=3
    1+5=6
    3+2=5
    3+5=8
    >>> # arbitrary order execution of settings due to the parallelization
    >>> nb_failed = e.perform([], my_function, nb_jobs=3, progress='') # doctest: +SKIP
    3+2=5
    1+5=6
    1+2=3
    3+5=8
    """

    return self._plan.select(selector).perform(
      function,
      self,
      *parameters,
      nb_jobs=nb_jobs,
      progress=progress,
      log_file_name=log_file_name,
      mail_interval=mail_interval
      )

  def select(self, selector, show=False, plan_order_factor=None):
    experiment_id = 'all'
    if '/' in selector:
      selector_split = selector.split('/')
      experiment_id = selector_split[0]
      if len(selector_split)>1:
        selector = selector_split[1]
        try:
          selector = ast.literal_eval(selector)
        except:
          pass
      else:
        selector = ''
    self.selector = selector

    plans = self.plans()
    if len(plans)==1:
      self._plan = getattr(self, plans[0])
    else:
      if experiment_id == 'all':
        o_plans = []
        for plan in plans:
          if show:
            print(f'Plan {plan}:')
            print(getattr(self, plan).as_panda_frame())
          o_plans.append(getattr(self, plan))
        self._plan = self._plan.merge(o_plans)
        if show and len(plans)>1:
          print('Those plans can be selected using the selector parameter.')
          print('Otherwise the merged plan is considered: ')
      else:
        if experiment_id.isnumeric():
          experiment_id = plans[int(experiment_id)]
        print(f'Plan {experiment_id} is selected')
        self._plan = getattr(self, experiment_id)
    self._plan.check()

    if plan_order_factor:
      self._plan = self._plan.order_factor(plan_order_factor)
    if show:
      print(self._plan.as_panda_frame())
    if self._check_setting_length:
      self._plan.check_length()
    return self._plan.select(selector)

[docs]  def clean_data_sink(
    self,
    path,
    selector=None,
    reverse=False,
    force=False,
    keep=False,
    wildcard='*',
    setting_encoding=None,
    archive_path = None,
    verbose=0
    ):
    r""" Perform a cleaning of a data sink (directory or h5 file).

    This method is essentially a wrapper to :meth:`doce._plan.clean_data_sink`.

    Parameters
    ----------

    path : str
      If has a / or \\\, a valid path to a directory or .h5 file.

      If has no / or \\\, a member of the name_space self.path.

    selector : a list of literals or a list of lists of literals (optional)
      :term:`selector` used to specify the :term:`settings<setting>` set

    reverse : bool (optional)
      If False, remove any entry corresponding to the setting set (default).

      If True, remove all entries except the ones corresponding to the setting set.

    force: bool (optional)
      If False, prompt the user before modifying the data sink (default).

      If True, do not prompt the user before modifying the data sink.

    wildcard : str (optional)
      end of the wildcard used to select the entries to remove or to keep (default: '*').

    setting_encoding : dict (optional)
      format of the identifier describing the :term:`setting`.
      Please refer to :meth:`doce.Plan.identifier` for further information.

    archive_path : str (optional)
      If not None, specify an existing directory where the specified data will be moved.

      If None, the path doce.Experiment._archive_path is used (default).

    See Also
    --------

    doce._plan.clean_data_sink, doce.Plan.id

    Examples
    --------

    >>> import doce
    >>> import numpy as np
    >>> import os
    >>> e=doce.Experiment()
    >>> e.set_path('output', '/tmp/test', force=True)
    >>> e.add_plan('plan', factor1=[1, 3], factor2=[2, 4])
    >>> def my_function(setting, experiment):
    ...   np.save(f'{experiment.path.output}{setting.identifier()}_sum.npy', setting.factor1+setting.factor2)
    ...   np.save(f'{experiment.path.output}{setting.identifier()}_mult.npy', setting.factor1*setting.factor2)
    >>> nb_failed = e.perform([], my_function, progress='')
    >>> os.listdir(e.path.output)
    ['factor1=1+factor2=4_mult.npy', 'factor1=1+factor2=4_sum.npy', 'factor1=3+factor2=4_sum.npy', 'factor1=1+factor2=2_mult.npy', 'factor1=1+factor2=2_sum.npy', 'factor1=3+factor2=2_mult.npy', 'factor1=3+factor2=4_mult.npy', 'factor1=3+factor2=2_sum.npy']

    >>> e.clean_data_sink('output', [0], force=True)
    >>> os.listdir(e.path.output)
    ['factor1=3+factor2=4_sum.npy', 'factor1=3+factor2=2_mult.npy', 'factor1=3+factor2=4_mult.npy', 'factor1=3+factor2=2_sum.npy']

    >>> e.clean_data_sink('output', [1, 1], force=True, reverse=True, wildcard='*mult*')
    >>> os.listdir(e.path.output)
    ['factor1=3+factor2=4_sum.npy', 'factor1=3+factor2=4_mult.npy', 'factor1=3+factor2=2_sum.npy']

    Here, we remove all the files that match the wildcard *mult*
    in the directory /tmp/test that do not correspond to the settings
    that have the first factor set to the second modality and the second factor
    set to the second modality.

    >>> import doce
    >>> import tables as tb
    >>> e=doce.Experiment()
    >>> e.set_path('output', '/tmp/test.h5')
    >>> e.add_plan('plan', factor1=[1, 3], factor2=[2, 4])
    >>> e.set_metric(name = 'sum')
    >>> e.set_metric(name = 'mult')
    >>> def my_function(setting, experiment):
    ...   h5 = tb.open_file(experiment.path.output, mode='a')
    ...   sg = experiment.add_setting_group(
    ...     h5, setting,
    ...     output_dimension={'sum': 1, 'mult': 1})
    ...   sg.sum[0] = setting.factor1+setting.factor2
    ...   sg.mult[0] = setting.factor1*setting.factor2
    ...   h5.close()
    >>> nb_failed = e.perform([], my_function, progress='')
    >>> h5 = tb.open_file(e.path.output, mode='r')
    >>> print(h5)
    /tmp/test.h5 (File) ''
    Last modif.: '...'
    Object Tree:
    / (RootGroup) ''
    /factor1=1+factor2=2 (Group) 'factor1=1+factor2=2'
    /factor1=1+factor2=2/mult (Array(1,)) 'mult'
    /factor1=1+factor2=2/sum (Array(1,)) 'sum'
    /factor1=1+factor2=4 (Group) 'factor1=1+factor2=4'
    /factor1=1+factor2=4/mult (Array(1,)) 'mult'
    /factor1=1+factor2=4/sum (Array(1,)) 'sum'
    /factor1=3+factor2=2 (Group) 'factor1=3+factor2=2'
    /factor1=3+factor2=2/mult (Array(1,)) 'mult'
    /factor1=3+factor2=2/sum (Array(1,)) 'sum'
    /factor1=3+factor2=4 (Group) 'factor1=3+factor2=4'
    /factor1=3+factor2=4/mult (Array(1,)) 'mult'
    /factor1=3+factor2=4/sum (Array(1,)) 'sum'
    >>> h5.close()

    >>> e.clean_data_sink('output', [0], force=True)
    >>> h5 = tb.open_file(e.path.output, mode='r')
    >>> print(h5)
    /tmp/test.h5 (File) ''
    Last modif.: '...'
    Object Tree:
    / (RootGroup) ''
    /factor1=3+factor2=2 (Group) 'factor1=3+factor2=2'
    /factor1=3+factor2=2/mult (Array(1,)) 'mult'
    /factor1=3+factor2=2/sum (Array(1,)) 'sum'
    /factor1=3+factor2=4 (Group) 'factor1=3+factor2=4'
    /factor1=3+factor2=4/mult (Array(1,)) 'mult'
    /factor1=3+factor2=4/sum (Array(1,)) 'sum'
    >>> h5.close()

    >>> e.clean_data_sink('output', [1, 1], force=True, reverse=True, wildcard='*mult*')
    >>> h5 = tb.open_file(e.path.output, mode='r')
    >>> print(h5)
    /tmp/test.h5 (File) ''
    Last modif.: '...'
    Object Tree:
    / (RootGroup) ''
    /factor1=3+factor2=4 (Group) 'factor1=3+factor2=4'
    /factor1=3+factor2=4/mult (Array(1,)) 'mult'
    /factor1=3+factor2=4/sum (Array(1,)) 'sum'
    >>> h5.close()

    Here, the same operations are conducted on a h5 file.
    """

    if '/' not in path and '\\' not in path:
      path = self.__getattribute__('path').__getattribute__(path)
    if path:
      self._plan.select(selector).clean_data_sink(
        path,
        reverse=reverse,
        force=force,
        keep=keep,
        wildcard=wildcard,
        setting_encoding=setting_encoding,
        archive_path=archive_path,
        verbose=verbose
        )

  def plans(self):
    # names = []
    # for attribute in dir(self):
    #   if attribute[0] != '_' and isinstance(getattr(self, attribute), doce.Plan):
    #     names.append(attribute)
    return self._plans

  def add_plan(self, name, **kwargs):
    self.__setattr__(name, doce.Plan(name, **kwargs))
    self._plan = getattr(self, name)
    self._plans.append(name)
  
  def get_current_plan(self):
    return self._plan

  def set_metric(self,
    name = None,
    output = None,
    func = np.mean,
    path = 'output',
    percent=False,
    higher_the_better=False,
    lower_the_better=False,
    significance=False,
    precision=None,
    description = '',
    unit = ''
    ):

    if name is None:
      raise Exception('A metric must of a name.')
    if not isinstance(name, str):
      raise Exception('A metric name must be a string.')
    if significance and not lower_the_better and not higher_the_better:
      raise Exception('Significance analysis requires either lower_the_better or higher_the_better to set be to True.')
    if precision is None:
      precision = self._display.metric_precision
    if output is None:
      output = name

    self.metric.__setattr__(name, {
      'name':name,
      'output':output,
      'path':path,
      'func':func,
      'percent':percent,
      'higher_the_better':higher_the_better,
      'lower_the_better':lower_the_better,
      'significance': significance,
      'precision':precision,
      'description':description,
      'unit':unit
      })

  
  def default(self, plan='', factor='', modality=''):
    getattr(self, plan).default(factor, modality)

  def skip_setting(self, setting):
    if self._resume:
      for path in self.__getattribute__('path').__dict__.keys():
        if path.endswith('.h5'):
          print('todo')
        else:
          if path not in self._doce_paths:
            check = glob.glob(f'{self.path.__getattribute__(path)}{setting.identifier()}_*.npy')
            if check:
              return True
    return False

[docs]  def get_output(self, output='', selector=None, path='', tag='', plan=None):
    """ Get the output vector from an .npy or a group of a .h5 file.

    Get the output vector as a numpy array from an .npy or a group of a .h5 file.

    Parameters
    ----------

    output: str
      The name of the output. 

    selector: list
      Settings selector.

    path: str
      Name of path as defined in the experiment,
      or a valid path to a directory in the case of .npy storage,
      or a valid path to an .h5 file in the case of hdf5 storage.

    plan: str
      Name of plan to be considered.

    Returns
    -------

    setting_metric: list of np.Array
      stores for each valid setting an np.Array with the values of the metric selected.

    setting_description: list of list of str
      stores for each valid setting, a compact description of the modalities of each factors.
      The factors with the same modality accross all the set of settings is stored
      in constant_setting_description.

    constant_setting_description: str
      compact description of the factors with the same modality accross all the set of settings.

    Examples
    --------

    >>> import doce
    >>> import numpy as np
    >>> import pandas as pd

    >>> experiment = doce.experiment.Experiment()
    >>> experiment.name = 'example'
    >>> experiment.set_path('output', '/tmp/{experiment.name}/', force=True)
    >>> experiment.add_plan('plan', f1 = [1, 2], f2 = [1, 2, 3])
    >>> experiment.set_metric(name = 'm1_mean', output = 'm1', func = np.mean)
    >>> experiment.set_metric(name = 'm1_std', output = 'm1', func = np.std)
    >>> experiment.set_metric(name = 'm2_min', output = 'm2', func = np.min)
    >>> experiment.set_metric(name = 'm2_argmin', output = 'm2', func = np.argmin)

    >>> def process(setting, experiment):
    ...  output1 = setting.f1+setting.f2+np.random.randn(100)
    ...  output2 = setting.f1*setting.f2*np.random.randn(100)
    ...  np.save(f'{experiment.path.output+setting.identifier()}_m1.npy', output1)
    ...  np.save(f'{experiment.path.output+setting.identifier()}_m2.npy', output2)
    >>> nb_failed = experiment.perform([], process, progress='')

    >>> (setting_output,
    ...  setting_description,
    ...  constant_setting_description
    ... ) = experiment.get_output(output = 'm1', selector = [1], path='output')
    >>> print(constant_setting_description)
    f1=2
    >>> print(setting_description)
    ['f2=1', 'f2=2', 'f2=3']
    >>> print(len(setting_output))
    3
    >>> print(setting_output[0].shape)
    (100,)
    """

    if plan:
      plan = getattr(self, plan)
    else:
      if len(self.plans()) > 1:
        o_plans = []
        for plan in self.plans():
          o_plans.append(getattr(self, plan))
        self._plan = self._plan.merge(o_plans)
      plan = self._plan

    if path:
      if not (r'\/' in path or r'\\' in path):
        path = getattr(self.path, path)
      return get_from_path(
        output,
        settings=plan.select(selector),
        path=path,
        tag=tag
        )
    data = []
    settings = []
    for path_iterator in self.path.__dict__:
      if not path.endswith('_raw'):
        path_iterator = getattr(self.path, path_iterator)
        (data_path, setting_path, header_path) = get_from_path(
          output,
          settings=plan.select(selector),
          path=path_iterator,
          tag=tag
          )
        if data_path:
          for data_setting in data_path:
            data.append(data_setting)
          for setting_description in setting_path:
            settings.append(setting_description)

    return (data, settings, header_path)

[docs]  def add_setting_group(
    self,   file_id,
    setting,
    output_dimension=None,
    setting_encoding=None
    ):
    """adds a group to the root of a valid py_tables Object in order to
    store the metrics corresponding to the specified setting.

    adds a group to the root of a valid py_tables Object in order to
    store the metrics corresponding to the specified setting.
    The encoding of the setting is used to set the name of the group.
    For each metric, a Floating point Pytable Array is created.
    For any metric, if no dimension is provided in the output_dimension dict,
    an expandable array is instantiated. If a dimension is available, 
    a static size array is instantiated.

    Parameters
    ----------

    file_id: py_tables file Object
    a valid py_tables file Object, leading to an .h5 file opened with writing permission.

    setting: :class:`doce.Plan`
    an instantiated Factor object describing a setting.

    output_dimension: dict
    for metrics for which the dimensionality of the storage vector is known,
    each key of the dict is a valid metric name and each corresponding value
    is the size of the storage vector.

    setting_encoding : dict
    Encoding of the setting. See doce.Plan.id for references.

    Returns
    -------

    setting_group: a Pytables Group
      where metrics corresponding to the specified setting are stored.

    Examples
    --------

    >>> import doce
    >>> import numpy as np
    >>> import tables as tb

    >>> experiment = doce.experiment.Experiment()
    >>> experiment.name = 'example'
    >>> experiment.set_path('output', '/tmp/'+experiment.name+'.h5')
    >>> experiment.add_plan('plan', f1 = [1, 2], f2 = [1, 2, 3])
    >>> experiment.set_metric(name = 'm1_mean', output = 'm1', func = np.mean)
    >>> experiment.set_metric(name = 'm1_std', output = 'm1', func = np.std)
    >>> experiment.set_metric(name = 'm2_min', output = 'm2', func = np.min)
    >>> experiment.set_metric(name = 'm2_argmin', output = 'm2', func = np.argmin)

    >>> def process(setting, experiment):
    ...  h5 = tb.open_file(experiment.path.output, mode='a')
    ...  sg = experiment.add_setting_group(h5, setting, output_dimension = {'m1':100})
    ...  sg.m1[:] = setting.f1+setting.f2+np.random.randn(100)
    ...  sg.m2.append(setting.f1*setting.f2*np.random.randn(100))
    ...  h5.close()
    >>> nb_failed = experiment.perform([], process, progress='')

    >>> h5 = tb.open_file(experiment.path.output, mode='r')
    >>> print(h5)
    /tmp/example.h5 (File) ''
    Last modif.: '...'
    Object Tree:
    / (RootGroup) ''
    /f1=1+f2=1 (Group) 'f1=1+f2=1'
    /f1=1+f2=1/m1 (Array(100,)) 'm1'
    /f1=1+f2=1/m2 (EArray(100,)) 'm2'
    /f1=1+f2=2 (Group) 'f1=1+f2=2'
    /f1=1+f2=2/m1 (Array(100,)) 'm1'
    /f1=1+f2=2/m2 (EArray(100,)) 'm2'
    /f1=1+f2=3 (Group) 'f1=1+f2=3'
    /f1=1+f2=3/m1 (Array(100,)) 'm1'
    /f1=1+f2=3/m2 (EArray(100,)) 'm2'
    /f1=2+f2=1 (Group) 'f1=2+f2=1'
    /f1=2+f2=1/m1 (Array(100,)) 'm1'
    /f1=2+f2=1/m2 (EArray(100,)) 'm2'
    /f1=2+f2=2 (Group) 'f1=2+f2=2'
    /f1=2+f2=2/m1 (Array(100,)) 'm1'
    /f1=2+f2=2/m2 (EArray(100,)) 'm2'
    /f1=2+f2=3 (Group) 'f1=2+f2=3'
    /f1=2+f2=3/m1 (Array(100,)) 'm1'
    /f1=2+f2=3/m2 (EArray(100,)) 'm2'

    >>> h5.close()
    """
    import tables as tb
    import warnings
    from tables import NaturalNameWarning
    warnings.filterwarnings('ignore', category=NaturalNameWarning)

    if not setting_encoding:
      setting_encoding={}
    #   setting_encoding={'factor_separator':'_', 'modality_separator':'_'}
    group_name = setting.identifier(**setting_encoding)
    # print(group_name)
    if not file_id.__contains__('/'+group_name):
      setting_group = file_id.create_group('/', group_name, str(setting))
    else:
      setting_group = file_id.root._f_get_child(group_name)
    for metric in self.metric.name():
      output = getattr(self.metric, metric)['output']
      if getattr(self.metric, metric)['description']:
        description = getattr(self.metric, metric)['description']
      else:
        description = output

      if getattr(self.metric, metric)['unit']:
        description += ' in ' + getattr(self.metric, metric)['unit']

      if output_dimension and output in output_dimension:
        if not setting_group.__contains__(output):
          file_id.create_array(
            setting_group,
            output,
            np.zeros((output_dimension[output]))*np.nan,
            description)
      else:
        if setting_group.__contains__(output):
          setting_group._f_get_child(output)._f_remove()
        file_id.create_earray(setting_group, output, tb.Float64Atom(), (0,), description)

    return setting_group


[docs]def get_from_path(
  metric,
  settings = None,
  path = '',
  tag='',
  setting_encoding=None,
  verbose=False
  ):
  """ Get the metric vector from an .npy or a group of a .h5 file.

  Get the metric vector as a numpy array from an .npy or a group of a .h5 file.

  Parameters
  ----------

  metric: str
    The name of the metric. Must be a member of the doce.metric.Metric object.

  settings: doce.Plan
    Iterable settings.

  path: str
    In the case of .npy storage, a valid path to the main directory.
    In the case of .h5 storage, a valid path to an .h5 file.

  setting_encoding : dict
    Encoding of the setting. See doce.Plan.id for references.

  verbose : bool
    In the case of .npy metric storage, if verbose is set to True,
    print the file_name seeked for the metric.

    In the case of .h5 metric storage, if verbose is set to True,
    print the group seeked for the metric.

  Returns
  -------

  setting_metric: list of np.Array
    stores for each valid setting an np.Array with the values of the metric selected.

  setting_description: list of list of str
    stores for each valid setting, a compact description of the modalities of each factors.
    The factors with the same modality accross all the set of settings is stored in constant_setting_description.

  constant_setting_description: str
    compact description of the factors with the same modality accross all the set of settings.

  Examples
  --------

  >>> import doce
  >>> import numpy as np
  >>> import pandas as pd

  >>> experiment = doce.experiment.Experiment()
  >>> experiment.name = 'example'
  >>> experiment.set_path('output', f'/tmp/{experiment.name}/', force=True)
  >>> experiment.add_plan('plan', f1 = [1, 2], f2 = [1, 2, 3])
  >>> experiment.set_metric(name = 'm1_mean', output = 'm1', func = np.mean)
  >>> experiment.set_metric(name = 'm1_std', output = 'm1', func = np.std)
  >>> experiment.set_metric(name = 'm2_min', output = 'm2', func = np.min)
  >>> experiment.set_metric(name = 'm2_argmin', output = 'm2', func = np.argmin)
  >>> def process(setting, experiment):
  ...  metric1 = setting.f1+setting.f2+np.random.randn(100)
  ...  metric2 = setting.f1*setting.f2*np.random.randn(100)
  ...  np.save(f'{experiment.path.output}{setting.identifier()}_m1.npy', metric1)
  ...  np.save(f'{experiment.path.output}{setting.identifier()}_m2.npy', metric2)
  >>> nb_failed = experiment.perform([], process, progress='')

  >>> (setting_metric,
  ...  setting_description,
  ...  constant_setting_description) = get_from_path(
  ...      'm1',
  ...      experiment._plan.select([1]),
  ...      experiment.path.output)
  >>> print(constant_setting_description)
  f1=2
  >>> print(setting_description)
  ['f2=1', 'f2=2', 'f2=3']
  >>> print(len(setting_metric))
  3
  >>> print(setting_metric[0].shape)
  (100,)
  """

  import tables as tb
  import warnings
  from tables import NaturalNameWarning
  warnings.filterwarnings('ignore', category=NaturalNameWarning)

  setting_metric = []
  setting_descriptions = []
  if not setting_encoding:
    setting_encoding = {}
  setting_description_format = copy.deepcopy(setting_encoding)
  setting_description_format['style'] = 'list'
  setting_description_format['default'] = True
  setting_description_format['sort'] = False

  if isinstance(path, str):
    if path.endswith('.h5'):
      if tag:
        path = path[:-3]+'_'+tag+'.h5'
      h5_fid = tb.open_file(path, mode='r')
      for setting in settings:
        if h5_fid.root.__contains__(setting.identifier(**setting_encoding)):
          if verbose:
            print(f'Found group {setting.identifier(**setting_encoding)}')
          setting_group = h5_fid.root._f_get_child(setting.identifier(**setting_encoding))
          if setting_group.__contains__(metric):
            setting_metric.append(np.array(setting_group._f_get_child(metric)))
            setting_descriptions.append(setting.identifier(**setting_description_format))
        elif verbose:
          print(f'** Unable to find group {setting.identifier(**setting_encoding)}')
      h5_fid.close()
    else:
      if tag:
        path += tag+'/'
      for setting in settings:
        file_name = f'{path}{setting.identifier(**setting_encoding)}_{metric}.npy'
        if os.path.exists(file_name):
          if verbose:
            print(f'Found {file_name}')
          setting_metric.append(np.load(file_name))
          setting_descriptions.append(setting.identifier(**setting_description_format))
        elif verbose:
          print(f'** Unable to find {file_name}')

  (setting_descriptions, _,
  constant_setting_description,
  _) = eu.prune_setting_description(setting_descriptions, show_unique_setting = True)

  for setting_description_index, setting_description in enumerate(setting_descriptions):
    setting_descriptions[setting_description_index] = ', '.join(setting_description)

  return (setting_metric, setting_descriptions, constant_setting_description)
  
[docs]class Path:
  """handle storage of path to disk """
  def __setattr__(
    self,
    name,
    value
    ):
    object.__setattr__(self, f'{name}_raw', value)
    object.__setattr__(
      self,
      name,
      os.path.expanduser(value)
      )


if __name__ == '__main__':
  import doctest
  doctest.testmod(optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE)