Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
b9797db
Refactor hk102.py module to improve performance and readability
taruma Apr 15, 2024
0ad1c1a
Refactor hk102.py module to improve performance and readability
taruma Apr 15, 2024
eb15ee7
add docstring to the module
taruma Apr 15, 2024
938dc4a
Refactor hk127.py module to improve performance and readability
taruma Apr 15, 2024
9919e55
Refactor calc_x_gumbel function in hk127.py for improved parameter ha…
taruma Apr 15, 2024
35fd121
Refactor hk127.py modules for improved performance, readability, and …
taruma Apr 15, 2024
4b8f6d9
Refactor hk127.py module for improved readability and documentation
taruma Apr 15, 2024
f99c227
Refactor hk127.py module for improved readability and documentation
taruma Apr 15, 2024
2a617b9
Refactor hk140.py module for improved readability and consistency
taruma Apr 16, 2024
522709d
Refactor hk140.py module to use descriptive function and parameter names
taruma Apr 16, 2024
1237aa8
Refactor hk140.py module to improve readability and documentation
taruma Apr 16, 2024
a0afbd7
Refactor hk141.py module to improve readability and documentation
taruma Apr 16, 2024
6ec673a
Refactor hk141.py module to improve readability and documentation
taruma Apr 16, 2024
8f5fa36
Update ujidist.py to use chi_square_test function from hk141 module
taruma Apr 16, 2024
8cc47bd
Refactor hk151.py module to improve readability and documentation
taruma Apr 16, 2024
d2854a4
Refactor hk151.py module to improve readability and documentation
taruma Apr 16, 2024
8f7b9c7
Refactor hk158.py module to improve readability and documentation
taruma Apr 16, 2024
5c28a36
Update check_distribution function in hk158.py to display detailed in…
taruma Apr 16, 2024
860f125
Deprecate ujidist.py module and update to use kolmogorov_smirnov and …
taruma Apr 16, 2024
218570b
Deprecate anfrek.py module and update to use specific distribution mo…
taruma Apr 16, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions hidrokit/contrib/taruma/anfrek.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,17 @@
Modul rekap analisis frekuensi. Untuk manual lihat modul terpisah.
"""

import warnings
from hidrokit.contrib.taruma import hk172, hk124, hk126, hk127

warnings.warn(
"This module will be deprecated in the future. "
"Please use speficic distribution modules such as "
"normal, lognormal, logpearson3, and gumbel instead.",
FutureWarning,
)


freq_normal = hk172.freq_normal
freq_lognormal = hk124.freq_lognormal
freq_logpearson3 = hk126.freq_logpearson3
Expand Down
190 changes: 157 additions & 33 deletions hidrokit/contrib/taruma/hk124.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,30 @@
"""manual:
https://gist.github.com/taruma/5d3ab88893e56f895dc3f36ea19c3e60"""
"""
hk124: freq_lognormal.py

This module provides functions for calculating values related to hydrological analysis.

For more information, refer to the manual:
https://gist.github.com/taruma/5d3ab88893e56f895dc3f36ea19c3e60

Functions:
- find_K: Calculates the K value for a given return period.
- calc_x_lognormal: Calculates the x value for a given return period
using the lognormal distribution.
- freq_lognormal: Calculates the lognormal frequency distribution for a given dataset.
- calc_prob: Calculates the probability for a given K value.
"""

import numpy as np
import pandas as pd
from scipy import stats
from hidrokit.contrib.taruma.utils import handle_deprecated_params

# pylint: disable=invalid-name

# Tabel Nilai Variabel Reduksi Gauss
# Dari buku hidrologi: Aplikasi Metode Statistik untuk Analisa Data. hal.119

# KODE: SW
# KODE: SW (Source: Soewarno)

_DATA_SW = [
[1.001, 0.999, -3.050],
Expand All @@ -34,69 +50,177 @@
[1000.000, 0.001, 3.090],
]

_COL_SW = ['periode_ulang', 'peluang', 'k']
_COL_SW = ["periode_ulang", "peluang", "k"]

t_normal_sw = pd.DataFrame(data=_DATA_SW, columns=_COL_SW)

t_normal_sw = pd.DataFrame(
data=_DATA_SW, columns=_COL_SW
)

def _find_k_in_table(return_period, table):
x = table.periode_ulang
y = table.k
return np.interp(return_period, x, y)

def find_K(return_period, source='scipy'):
if source.lower() == 'soewarno':
return _find_k_in_table(return_period, t_normal_sw)
elif source.lower() == 'scipy':
return_period = np.array(return_period)
return stats.norm.ppf(1 - 1/return_period)

def calc_x_lognormal(x, return_period=[5], source='scipy', show_stat=False):
def find_K(return_period, source="scipy"):
"""
Calculate the K values for a given return period.

Parameters:
return_period (float or array-like): The return period(s) for
which to calculate the K values.
source (str, optional): The source of the K values.
Can be "soewarno" or "scipy". Defaults to "scipy".

Returns:
array-like: The calculated K values.

Raises:
ValueError: If an unknown source is provided.

"""
if source.lower() == "soewarno":
k_values = _find_k_in_table(return_period, t_normal_sw)
elif source.lower() == "scipy":
return_period = np.array(return_period)
k_values = stats.norm.ppf(1 - 1 / return_period)
else:
raise ValueError(f"Unknown source: {source}")
return k_values


def calc_x_lognormal(x, return_period=None, source="scipy", show_stat=False):
"""
Calculate the value of x for a given return period using the lognormal distribution.

Parameters:
x (array-like): Input data array.
return_period (array-like, optional):
Return period(s) for which to calculate the value of x.
Default is [5].
source (str, optional): Source of the K factor.
Default is "scipy".
show_stat (bool, optional): Whether to display the calculated statistics.
Default is False.

Returns:
array-like: The calculated value(s) of x for the given return period(s).
"""
return_period = [5] if return_period is None else return_period
return_period = np.array(return_period)
y = np.log10(x)
y_mean = np.mean(y)
y_std = np.std(y, ddof=1)
n = len(y)

k = find_K(return_period, source=source)

if show_stat:
print(f'y_mean = {y_mean:.5f}')
print(f'y_std = {y_std:.5f}')
print(f'k = {k}')
print(f"y_mean = {y_mean:.5f}")
print(f"y_std = {y_std:.5f}")
print(f"k = {k}")

val_y = y_mean + k * y_std
val_x = np.power(10, val_y)
return val_x


def freq_lognormal(
df, col=None,
return_period=[2, 5, 10, 20, 25, 50, 100], show_stat=False, source='scipy',
col_name='Log Normal', index_name='Kala Ulang'):
dataframe=None,
target_column=None,
return_periods=None,
display_stat=False,
source="scipy",
out_column_name="Log Normal",
out_index_name="Kala Ulang",
**kwargs,
):
"""
Calculate the frequency analysis using the lognormal distribution.

Parameters:
- dataframe (pandas.DataFrame): The input dataframe containing the data.
- target_column (str):
The name of the column in the dataframe that contains the data to be analyzed.
- return_periods (list): A list of return periods for which the analysis will be performed.
Default is [2, 5, 10, 20, 25, 50, 100].
- display_stat (bool): Whether to display the statistical information. Default is False.
- source (str): The source of the lognormal distribution. Default is "scipy".
- out_column_name (str): The name of the output column in the result dataframe.
Default is "Log Normal".
- out_index_name (str): The name of the index column in the result dataframe.
Default is "Kala Ulang".
- **kwargs: Additional keyword arguments for deprecated parameters.

Returns:
- result (pandas.DataFrame): The result dataframe containing the frequency analysis results.

"""
# deprecated parameters
dataframe = handle_deprecated_params(kwargs, "df", "dataframe") or dataframe
target_column = (
handle_deprecated_params(kwargs, "col", "target_column") or target_column
)
return_periods = (
handle_deprecated_params(kwargs, "return_period", "return_periods")
or return_periods
)
display_stat = (
handle_deprecated_params(kwargs, "show_stat", "display_stat") or display_stat
)
out_column_name = (
handle_deprecated_params(kwargs, "col_name", "out_column_name")
or out_column_name
)
out_index_name = (
handle_deprecated_params(kwargs, "index_name", "out_index_name")
or out_index_name
)

col = df.columns[0] if col is None else col
return_periods = (
[2, 5, 10, 20, 25, 50, 100] if return_periods is None else return_periods
)

x = df[col].copy()
target_column = dataframe.columns[0] if target_column is None else target_column

arr = calc_x_lognormal(
x, return_period=return_period, show_stat=show_stat, source=source)
x = dataframe[target_column].copy()

result = pd.DataFrame(
data=arr, index=return_period, columns=[col_name]
arr = calc_x_lognormal(
x, return_period=return_periods, show_stat=display_stat, source=source
)

result.index.name = index_name
result = pd.DataFrame(data=arr, index=return_periods, columns=[out_column_name])

result.index.name = out_index_name
return result


def _calc_prob_in_table(k, table):
x = table.k
y = table.peluang
return np.interp(k, x, y)

def calc_prob(k, source='scipy'):
if source.lower() == 'soewarno':

def calc_prob(k, source="scipy"):
"""
Calculate the probability value for a given value of k.

Parameters:
- k (float or array-like): The value(s) for which the probability is calculated.
- source (str, optional): The source of probability calculation.
Valid options are "soewarno" and "scipy".
Default is "scipy".

Returns:
- prob_value (float or array-like): The calculated probability value(s).

Raises:
- ValueError: If an unknown source is provided.

"""
if source.lower() == "soewarno":
k = np.array(k)
return 1 - _calc_prob_in_table(k, t_normal_sw)
elif source.lower() == 'scipy':
return stats.norm.cdf(k)
prob_value = 1 - _calc_prob_in_table(k, t_normal_sw)
elif source.lower() == "scipy":
prob_value = stats.norm.cdf(k)
else:
raise ValueError(f"Unknown source: {source}")
return prob_value
Loading