Source code for hydrobox.discharge.catchment

"""
Common tools for diagnosic tools frequently used in catchment hydrology.

"""
from hydrobox.utils.decorators import accept
from matplotlib.axes import SubplotBase
from matplotlib.pylab import get_cmap
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from datetime import datetime
from scipy.stats import rankdata

MONTHS = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']


[docs]@accept(x=(np.ndarray, pd.Series),log=bool, plot=bool, non_exceeding=bool, ax=('None', SubplotBase)) def flow_duration_curve(x, log=True, plot=True, non_exceeding=True, ax=None, **kwargs): """Calculate a flow duration curve Calculate flow duration curve from the discharge measurements. The function can either return a ``matplotlib`` plot or return the ordered ( non)-exceeding probabilities of the observations. These values can then be used in any external plotting environment. In case x.ndim > 1, the function will be called iteratively along axis 0. Parameters ---------- x : numpy.ndarray, pandas.Series Series of prefereably discharge measurements log : bool, default=True if `True` plot on loglog axis, ignored when plot is `False` plot : bool, default=True if `False` plotting will be suppressed and the resulting array will be returned non_exceeding : bool, default=True if `True` use non-exceeding probabilities ax : matplotlib.AxesSubplot, default=None if not None, will plot into that AxesSubplot instance kwargs : kwargs, will be passed to the ``matplotlib.pyplot.plot`` function Returns ------- matplotlib.AxesSubplot : if `plot` was `True` numpy.ndarray : if `plot was `False` Notes ----- The probabilities are calculated using the Weibull empirical probability. Following [1]_, this probability can be calculated as: .. math:: p =m/(n + 1) where `m` is the rank of an observation in the ordered time series and `n` are the total observations. The increasion by one will prevent 0% and 100% probabilities. References ---------- .. [1] Sloto, R. a., & Crouse, M. Y. (1996). Hysep: a computer program for streamflow hydrograph separation and analysis. U.S. Geological Survey Water-Resources Investigations Report, 96(4040), 54. """ # omit the Series index if isinstance(x, pd.Series): x = x.values # if x has more than one dimension call this func recursive along axis=0 if x.ndim > 1: # check if plot was None, then iterate along axis=0 if not plot: return np.apply_along_axis(flow_duration_curve, 0, x, non_exceeding=non_exceeding, plot=False) else: # plot, if ax is None, create if ax is None: fig, ax = plt.subplots(1,1) last_ax = list(map(lambda x: flow_duration_curve(x, log=log, non_exceeding=non_exceeding, ax=ax), x.T))[-1] return last_ax # calculate the ranks ranks = rankdata(x, method='average') # calculate weibull pdf N = x.size # calculate probabilities p = np.fromiter(map(lambda r: r / (N + 1), ranks), dtype=np.float) # create sorting index if non_exceeding: index = np.argsort(p) else: index = np.argsort(p)[::-1] if not plot: return p[index] # generate an Axes, if ax is None if ax is None: fig, ax = plt.subplots(1, 1) # plot # set some defaults kwargs.setdefault('linestyle', '-') kwargs.setdefault('color', 'b') ax.plot(x[index], p[index], **kwargs) # label ax.set_xlabel('discharge [m3/s]') ax.set_ylabel('%sexceeding prob.' % ('non-' if non_exceeding else '')) # handle loglog if log: ax.loglog() else: ax.set_ylim((-0.05, 1.1)) ax.set_xlim(np.nanmin(x) * 0.98, np.nanmax(x) * 1.02) ax.set_title('%sFDC' % ('loglog ' if log else '')) ax.grid(which='both' if log else 'major') return ax
[docs]@accept(x=pd.Series, percentiles=('None', int, list, np.ndarray), normalize=bool, agg=(str, 'callable'), plot=bool, ax=('None', SubplotBase)) def regime(x, percentiles=None, normalize=False, agg='nanmedian', plot=True, ax=None, **kwargs): r"""Calculate hydrological regime Calculate a hydrological regime from discharge measurements. A regime is a annual overview, where all observations are aggregated across the month. Therefore it does only make sense to calculate a regime over more than one year with a temporal resolution higher than monthly. The regime can either be plotted or the calculated monthly aggreates can be returned (along with the quantiles, if any were calculated). Parameters ---------- x : pandas.Series The ``Series`` has to be indexed by a ``pandas.DatetimeIndex`` and hold the preferably discharge measurements. However, the methods does also work for other observables, if `agg` is adjusted. percentiles : int, list, numpy.ndarray, default=None percentiles can be used to calculate percentiles along with the main aggregate. The percentiles can either be set by an integer or a list. If an integer is passed, that many percentiles will be evenly spreaded between the 0th and 100th percentiles. A list can set the desired percentiles directly. normalize : bool, default=False If `True`, the regime will be normalized by the aggregate over all months. Then the numbers do not give the discharge itself, but the ratio of the monthly discharge to the overall discharge. agg : string, default='nanmedian' Define the function used for aggregation. Usually this will be 'mean' or 'median'. If there might be `NaN` values in the observations, the 'nan' prefixed functions can be used. In general, any aggregating function, which can be imported from ``numpy`` can be used. plot : bool, default=True if `False` plotting will be suppressed and the resulting ``pandas.DataFrame`` will be returned. In case `quantiles` was None, only the regime values will be returned as `numpy.ndarray` ax : matplotlib.AxesSubplot, default=None if not None, will plot into that AxesSubplot instance cmap : string, optional Specify a colormap for generating the Percentile areas is a smooth color gradient. This has to be a valid colormap reference, see https://matplotlib.org/examples/color/colormaps_reference.html. Defaults to ``'Blue'``. color : string, optional Define the color of the main aggregate. If ``None``, the first color of the specified cmap will be used. lw : int, optinal linewidth parameter in pixel. Defaults to 3. linestyle : string, optional Any valid matplotlib linestyle definition is accepted. ``':'`` - dotted ``'-.'`` - dash-dotted ``'--'`` - dashed ``'-'`` - solid Returns ------- matplotlib.AxesSubplot : if `plot` was `True` pandas.DataFrame : if `plot` was `False` and `quantiles` are not None numpy.ndarray : if `plot` was `False` and `quantiles` is None Notes ----- In case the color argument is not passed it will default to the first color in the the specified colormap (cmap). You might want to overwrite this in case no percentiles are produced, as many colormaps range from light to dark colors and the first color might just default to while. """ if not isinstance(x.index, pd.DatetimeIndex): raise ValueError('Data has to be indexed by a pandas.DatetimeIndex.') # create the percentiles if isinstance(percentiles, int): percentiles = np.linspace(0, 100, percentiles + 1, endpoint=False)[1:] if callable(agg): f = agg else: try: f = getattr(np, agg) except AttributeError: raise ValueError('The function %s cannot be imported from numpy') # create month index idx = [int(datetime.strftime(_, '%m')) for _ in x.index] # aggregate the regime and set the index if isinstance(x, pd.Series): x = pd.DataFrame(index=x.index, data=x.values) df = x.groupby(idx).aggregate(f) df.set_index(np.unique(idx), inplace=True) # build percentiles if percentiles is not None: for q in percentiles: df['q%d' % q] = x.groupby(idx).aggregate( lambda v: np.nanpercentile(v, q)) # handle normalize if normalize: for col in df.columns: df[col] = df[col] / f(df[col]) if not plot: if len(df.columns) == 1: return df.values else: return df # create the plot if ax is None: fig, ax = plt.subplots(1, 1) # some defaults kwargs.setdefault('cmap', 'Blues') kwargs.setdefault('color', None) kwargs.setdefault('lw', 3) kwargs.setdefault('linestyle', '-') cm = get_cmap(kwargs['cmap']) # check if there are quantiles if len(df.columns) > 1: # build the colormap n = int((len(df.columns) - 1) / 2) cmap = [cm(1. * _ / n) for _ in range(n)] cmap = np.concatenate((cmap, cmap[::-1])) # plot for i in range(len(df.columns) - 2, 1, -1): ax.fill_between(df.index, df.iloc[:, i], df.iloc[:, i - 1], interpolate=True, color=cmap[i - 1]) # plot the main aggregate c = kwargs.get('color') if c is None: c = cm(0.0) ax.plot(df.index, df.iloc[:, 0], linestyle=kwargs['linestyle'], color=c, lw=kwargs['lw']) ax.set_xlim(0, 12) plt.xticks(df.index, MONTHS, rotation=45) return ax