"""
The io.random module holds a number of functions returning random `pandas.Series` or `numpy.ndarray` objects
that can be used for developing purposes, or whenever randomized data is needed.
"""
import numpy as np
import pandas as pd
from datetime import datetime
from hydrobox.utils.decorators import accept
[docs]@accept(distribution=str,
size=(int, 'None'),
seed=(int, 'None'),
start=(str, datetime, 'None'),
end=(str, datetime, 'None'),
freq=(str, 'None'))
def timeseries_from_distribution(distribution='gamma',
distribution_args=[10, 2], size=10, seed=None,
start='now', end=None, freq='D'):
"""Generate a random time series
This function will return a ``pandas.Series`` indexed by a
``pandas.DatetimeIndex`` holding random data that is generated by the
given distribution. The distribution name has to be importable from
``numpy.random`` and the `distribution_args` list will be passed as
`*args`. The `seed` parameter will be directed to ``np.random.seed`` in
order to return reproducable pseudo-random results.
controlling the datetime
------------------------
The time series index can be controlled by the `size`, `start`, `end`
and `freq` parameter. The `size` specifies the total length of the time
series. If `None`, the `size` will be inferred from `start`, `end` and
`freq`. If `size` is given, `freq` will be ignored. If `end` is is not
given, the other three parameter have to be set.
.. warning::
You cannot set all 4 time series parameter at the same time. In case
you do not want to use one that holds a default value (like start),
you will have to set it to `None`.
Parameters
----------
distribution : string, default='gamma'
Any distribution density function from ``numpy.random`` can be
chosen. The distribution properties (like location or scale) can be
passed with the parameter `distribution_args`.
distribution_args : list, None, default=[10,2]
This list will be passed as ``*distribution_args`` into the given
density function. If no areguments shall be passed,
distribution_args can be set to `None`.
size : int, default=10
Specifies the length of the produced time series.
seed : int, default=None
Will be passed to ``numpy.random.seed``.
start : string, datetime, default='now'
Starting point for the ``pandas.DatetimeIndex``. Can be either a
``datetime`` or string. The string has either to be `'now'` for
using the current time step, or a Datetime string of format
`YYYYMMDDHHmmss`, where the time (`HHmmss`) can be omitted.
If `end` is used, start or `size` should be set to `None`.
end : string, datetime, defualt=None
see start.
freq : string, default='D'
Specify the temporal resulution of the time series. This can either
be used in case `size` is omitted, but `start` and `end` are given,
or in case either `start` or `end` is omitted but `size` is given.
Any string accepted by the `freq` attribute of ``pandas.Grouper`` is
accepted.
See Also
--------
pandas.Grouper : further information of `freq` settings
Returns
-------
pandas.Series
"""
# get the function
try:
distribution_function = getattr(np.random, distribution)
except AttributeError:
raise ValueError('The distribution %s is not known. It has to be importable from numpy.random' % distribution)
# get start and end
if isinstance(start, str) and start.lower() == 'now':
start = datetime.now()
if isinstance(start, datetime):
start = start.strftime('%Y%m%d%H%M%S')
if isinstance(end, str) and end.lower() == 'now':
end = datetime.now()
if isinstance(end, datetime):
end = end.strftime('%Y%m%d%H%M%S')
# build the DatetimeIndex and infer size
index = pd.date_range(start=start, end=end, periods=size, freq=freq)
inferred_size = len(index)
# set the seed if neccessary
if seed is not None:
np.random.seed(seed=seed)
# generate the distribution
values = distribution_function(*distribution_args, size=inferred_size)
# return the time series
return pd.Series(index=index, data=values)