/**
* Main class used for statistical analyses and data cleaning.
* @class
* @name stats
*/
export default class stats {
/**
* Makes a deep copy of original data for further manipulation.
* @method copydata
* @memberof stats
* @param {Object} data - Contains: 1d-JS array or object with original data.
* @returns {Object} Deep copy of original data.
* @example
* hydro.analyze.stats.copydata({data: [someData]})
*/
static copydata({ params, args, data } = {}) {
var arr, values, keys;
if (typeof data !== "object" || data === null) {
return data;
}
arr = Array.isArray(data) ? [] : {};
for (keys in data) {
values = data[keys];
arr[keys] = this.copydata({ data: values });
}
return arr;
}
/**
* Retrieves a 1D array with the data.
* @method onearray
* @memberof stats
* @param {Object} data - Contains: 1d-JS array as [data]
* @returns {Object[]} Array object.
* @example
* hydro.analyze.stats.onearray({data: [someData]})
*/
static onearray({ params, args, data } = {}) {
var arr = [];
arr.push(data[1]);
return arr;
}
/**
* Gives the range of a dataset
* @method range
* @memberof stats
* @param {Object} data - Contains: 1d-JS array with data as [data].
* @returns {Array} Range of the data.
* @example
* hydro.analyze.stats.range({data: [someData]})
*/
static range({ params, args, data } = {}) {
const min = this.min({ data }),
max = this.max({ data });
const N = params.N || data.length;
const step = (max - min) / N;
const range = [];
for (let i = 0; i <= N; i++) {
range.push(min + i * step);
}
return range;
}
/**
* Identifies gaps in data.
* @method datagaps
* @memberof stats
* @param {Object} data - Contains: 1d-JS array with data as [data].
* @returns {Number} Number of gaps in data.
* @example
* hydro.analyze.stats.datagaps({data: [someData]})
*/
static datagaps({ params, args, data } = {}) {
var arr = data,
or,
gap = 0;
if (typeof arr[0] != "object") {
or = this.copydata({ data: arr });
} else {
or = this.copydata({ data: arr[1] });
}
for (var i = 0; i < or.length; i++) {
if (or[i] === undefined || Number.isNaN(or[i]) || or[i] === false) {
gap++;
}
}
return console.log(`Total amount of gaps in data: ${gap}.`);
}
/**
* Remove gaps in data with an option to fill the gap.
* @method gapremoval
* @memberof stats
* @param {Object} data - Contains: 1d-JS array object with data as [data].
* @returns {Number} Number of gaps found in the data.
* @example
* hydro.analyze.stats.gapremoval({data: [someData]})
*/
static gapremoval({ params, args, data } = {}) {
var arr = data,
or = this.copydata({ data: arr }),
val;
if (typeof or[0] != "object") {
val = this.cleaner({ data: or });
} else {
var time = or[0],
ds = or[1];
for (var i = 0; i < ds.length; i++) {
if (ds[i] === undefined || Number.isNaN(ds[i]) || ds[i] === false) {
delete time[i];
}
}
val = this.cleaner({ data: ds });
time = this.cleaner({ data: time });
return [time, val];
}
}
/**
* Identifies gaps in time. Used for filling gaps if required by the
* user. Time in minutes and timestep must be divisible by the total time of the event.
* @method timegaps
* @memberof stats
* @param {Object} params - Contains: timestep (in min)
* @param {Object} data - Contains: 1d-JS array with timedata in minutes as [timeData].
* @returns {Object[]} Array with gaps.
* @example
* hydro.analyze.stats.timegaps({params: {timestep: 'someNum'} data: [timeData]})
*/
static timegaps({ params, args, data } = {}) {
var timestep = params.timestep,
arr = data,
or = this.copydata({ data: arr });
if (typeof arr[0] === "object") {
or = this.copydata({ data: arr[0] });
}
var datetr = [];
for (var i = 0; i < or.length; i++) {
if (typeof or[0] == "string") {
datetr.push(Math.abs(Date.parse(or[i]) / (60 * 1000)));
} else {
datetr.push(or[i]);
}
}
var gaps = 0,
loc = [],
//timestep and total duration in minutes.
time = timestep;
for (var i = 1; i < or.length - 1; i++) {
if (
Math.abs(datetr[i - 1] - datetr[i]) != time ||
Math.abs(datetr[i] - datetr[i + 1]) != time
) {
gaps++;
loc.push(or[i]);
}
}
if (loc.length === 0) {
console.log("No gaps in times!");
return;
} else {
console.log(`Number of time gaps: ${gaps}`);
return loc;
}
}
/**
* Fills data gaps (either time missig or data missing). Unfinished.
* @method gapfiller
* @memberof stats
* @param {Object} params - Contains: type (time or data)
* @param {Object} data - Contains: 2d-JS array with data or time gaps to be filled as [[time],[data]].
* @returns {Object[]} Array with gaps filled.
* @example
* hydro.analyze.stats.gapfiller({params: {type: 'someType'}, data: [[time1,time2...], [data1, data2,...]]})
*/
static gapfiller({ params, args, data } = {}) {
var or = this.copydata({ data: data }),
datetr = [];
if (typeof data[0] === "object") {
or = this.copydata({ data: data[0] });
}
for (var i = 0; i < or.length; i++) {
if (typeof or[0] == "string") {
datetr.push(Math.abs(Date.parse(or[i]) / (60 * 1000)));
} else {
datetr.push(or[i]);
}
}
if (params.type === "time") {
var xo = [];
}
}
/**
* Sums all data in a 1-d array.
* @method sum
* @memberof stats
* @param {Object} data - Contains: 1d-JS array object with data as [data].
* @returns {Number} Sum of all data in an array.
* @example
* hydro.analyze.stats.sum({data: [data]})
*/
static sum({ params, args, data } = {}) {
return data.reduce((acc, curr) => acc + curr, 0);
}
/**
* Calculates the mean of a 1d array.
* @method mean
* @memberof stats
* @param {Object} data - Contains: 1d-JS array object with data as [data].
* @returns {Number} Mean of the data.
* @example
* hydro.analyze.stats.mean({data: [data]})
*/
static mean({ params, args, data } = {}) {
const sum = this.sum({ data });
const mean = sum / data.length;
return mean;
}
/**
* Calculates the median values for a 1d array.
* @method median
* @memberof stats
* @param {Object} data - Contains: 1d-JS array object with data as [data].
* @returns {Number} Median of the data.
* @example
* hydro.analyze.stats.median({data: [data]})
*/
static median({ params, args, data } = {}) {
const sortedArray = data.slice().sort((a, b) => a - b);
const middleIndex = Math.floor(sortedArray.length / 2);
if (sortedArray.length % 2 === 0) {
const left = sortedArray[middleIndex - 1];
const right = sortedArray[middleIndex];
return (left + right) / 2;
} else {
return sortedArray[middleIndex];
}
}
/**
* Calculates standard deviation of a 1d array.
* @method stddev
* @memberof stats
* @param {Object} data - Contains: 1d-JS array object with data as [data].
* @returns {Number} Standard deviation.
* @example
* hydro.analyze.stats.stddev({data: [data]})
*/
static stddev({ params, args, data } = {}) {
var mean = this.mean({ data }),
SD = 0,
nex = [];
for (var i = 0; i < data.length; i += 1) {
nex.push((data[i] - mean) * (data[i] - mean));
}
return (SD = Math.sqrt(this.sum({ data: nex }) / nex.length));
}
/**
* Calculate variance for an 1-d array of data.
* @method variance
* @memberof stats
* @param {Object} data - Contains 1d-JS array object with data as [data].
* @returns {Number} Variance of the data.
* @example
* hydro.analyze.stats.variance({data: [data]})
*/
static variance({ params, args, data } = {}) {
const mean = this.mean({ data });
const squareDiffs = data.map((num) => (num - mean) ** 2);
const sumSquareDiffs = squareDiffs.reduce((acc, curr) => acc + curr, 0);
const variance = sumSquareDiffs / data.length;
return variance;
}
/**
* Calculates sum of squares for a dataset.
* @method sumsqrd
* @memberof stats
* @param {Object} data - Contains: 1d-JS array object with data as [data].
* @returns {Number} Sum of squares for data.
* @example
* hydro.analyze.stats.sumsqrd({data: [data]})
*/
static sumsqrd({ params, args, data } = {}) {
var sum = 0,
i = data.length;
while (--i >= 0) sum += data[i];
return sum;
}
/**
* Minimum value of an array.
* @method min
* @memberof stats
* @param {Object} data - Contains: 1d-JS array object with data as [data].
* @returns {Number} Minimum value of a dataset.
* @example
* hydro.analyze.stats.min({data: [data]})
*/
static min({ params, args, data } = {}) {
return Math.min(...data);
}
/**
* Maximum value of an array.
* @method max
* @memberof stats
* @param {Object} data - Contains: 1d-JS array object with data as [data].
* @returns {Number} Maximum value of a dataset.
* @example
* hydro.analyze.stats.max({data: [data]})
*/
static max({ params, args, data } = {}) {
return Math.max(...data);
}
/**
* Unique values in an array.
* @method unique
* @memberof stats
* @param {Object} data - Contains: 1d-JS array object with data as [data].
* @returns {Object[]} Array with unique values.
* @example
* hydro.analyze.stats.unique({data: [data]})
*/
static unique({ params, args, data } = {}) {
var un = {},
_arr = [];
for (var i = 0; i < data.length; i++) {
if (!un[data[i]]) {
un[data[i]] = true;
_arr.push(data[i]);
}
}
return _arr;
}
/**
* Calculates the frequency in data.
* @method frequency
* @memberof stats
* @param {Object} data - Contains: 1d-JS array object with data as [data].
* @returns {Object} Object with frenquency distribution.
* @example
* hydro.analyze.stats.frequency({data: [data]})
*/
static frequency({ params, args, data } = {}) {
var _arr = this.copydata({ data: data }),
counter = {};
_arr.forEach((i) => {
counter[i] = (counter[i] || 0) + 1;
});
return counter;
}
/**
* Use mean and standard deviation to standardize the original dataset.
* @method standardize
* @memberof stats
* @param {Object} data - Contains: 1d-JS array object with data as [data].
* @returns {Object[]} Array with standardized data.
* @example
* hydro.analyze.stats.standardize({data: [data]})
*/
static standardize({ params, args, data } = {}) {
var _arr = [],
stddev = this.stddev({ data: data }),
mean = this.mean({ data: data });
for (var i = 0; i < data.length; i++) {
_arr[i] = (data[i] - mean) / stddev;
}
return _arr;
}
/**
* Quantile calculator for given data.
* @method quantile
* @memberof stats
* @param {Object} params - Contains: q(quartile as 0.25, 0.75, or required)
* @param {Object} data - Contains: 1d-JS array object with data as [data].
* @returns {Object[]} Array with values fitting the quartile.
* @example
* hydro.analyze.stats.quantile({params: {q: 'someNum'}, data: [data]})
*/
static quantile({ params, args, data } = {}) {
var _arr = data.slice();
_arr.sort(function (a, b) {
return a - b;
});
var p = (data.length - 1) * params.q;
if (p % 1 === 0) {
return _arr[p];
} else {
var b = Math.floor(p),
rest = p - b;
if (_arr[b + 1] !== undefined) {
return _arr[b] + rest * (_arr[b + 1] - _arr[b]);
} else {
return _arr[b];
}
}
}
/**
* Removes interquartile outliers from an array or a set of arrays.
* @method interoutliers
* @memberof stats
* @param {Object} [params={ q1: 0.25, q2: 0.75 }] - Parameters object.
* @param {Array} data - Data to filter. If a 2D array is provided, the first array will be considered as a time array.
* @returns {Array} - Filtered data.
* @example
* hydro.analyze.stats.interoutliers({ params: { q1: 0.25, q2: 0.75 }, data: [1, 2, 3, 100, 4, 5, 6]});
*/
static interoutliers({ params = { q1: 0.25, q2: 0.75 }, data = [] } = {}) {
const { q1, q2 } = params;
const or = [...data];
let time = [];
if (Array.isArray(data[0])) {
[time, or] = data;
}
const Q1 = this.quantile({ data: or, params: { q: q1 } });
const Q3 = this.quantile({ data: or, params: { q: q2 } });
const IQR = Math.abs(Q3 - Q1);
const qd = Math.abs(Q1 - 1.5 * IQR);
const qu = Math.abs(Q3 + 1.5 * IQR);
const filteredData = or.filter((value, index) => {
if (value >= qd && value <= qu) {
if (time.length) {
return [time[index], value];
} else {
return value;
}
}
});
return time.length ? filteredData : filteredData;
}
/**
* Filters the outliers from the given data set based on its standard score (z-score).
*
* @memberof stats
* @static
* @method normoutliers
*
* @param {Object} [params={}] - An object containing optional parameters.
* @param {Number} [params.low=-0.5] - The lower threshold value for filtering data.
* @param {Number} [params.high=0.5] - The higher threshold value for filtering data.
* @param {Object} [args] - An object containing any additional arguments.
* @param {Array} data - The data set to filter outliers from.
* @param {Array} [data[0]=[]] - An optional array of timestamps corresponding to the data.
* @param {Array} data[1] - The main data array containing values to filter outliers from.
*
* @returns {Array} Returns the filtered data set. If timestamps are provided, it will return an array of
* timestamps and filtered data set as [t, out]. If no timestamps are provided, it will return the filtered data set.
*
* @example
*
* // Filter outliers from the data set between z-scores of -0.5 and 0.5
* let data = [1, 2, 3, 4, 5, 10, 12, 15, 20];
* let filteredData = hydro.analyze.stats.normoutliers({ params: { low: -0.5, high: 0.5 }, data: data });
* // filteredData => [1, 2, 3, 4, 5, 15, 20]
*
* // Filter outliers from the data set between z-scores of -1 and 1 with timestamps
* let data = [[1, 2, 3, 4, 5, 10, 12, 15, 20], [1, 2, 3, 4, 5, 10, 12, 15, 200]];
* let [timestamps, filteredData] = hydro.analyze.stats.normoutliers({ params: { low: -1, high: 1 }, data: data });
* // timestamps => [1, 2, 3, 4, 5, 10, 12, 15]
* // filteredData => [1, 2, 3, 4, 5, 10, 12, 15]
*/
static normoutliers({ params = {}, args, data } = {}) {
const { lowerBound = -0.5, upperBound = 0.5 } = params;
const [time, or] = Array.isArray(data[0]) ? data : [[], data];
const stnd = this.standardize({ data: or });
const out = or.filter(
(_, i) => stnd[i] < lowerBound || stnd[i] > upperBound
);
const t = time.filter(
(_, j) => stnd[j] < lowerBound || stnd[j] > upperBound
);
return time.length === 0 ? out : [t, out];
}
/**
* Remove outliers from dataset. It uses p1 and p2 as outliers to remove.
* @method outremove
* @memberof stats
* @param {Object} params - Contains: type ('normalized', 'interquartile')
* @param {Object} args - Contains: p1 (low end value), p2 (high end value) both depending on outlier analysis type
* @param {Object} data - Contains: 2d-JS array with time series data as [[time],[data]].
* @returns {Object[]} Array with cleaned data.
* @example
* hydro.analyze.stats.outremove({params: {type: 'someType'}, args: {p1: 'someNum', p2: 'someNum'},
* data: [[time1, time2,...], [data1, data2,...]]})
*/
static outremove({ params, args, data } = {}) {
var out,
p1 = args.p1,
p2 = args.p2;
if (params.type === "normalized") {
out = this.normoutliers({ params: { low: p1, high: p2 }, data: data });
} else {
out = this.interoutliers({ params: { q1: p1, q2: p2 }, data: data });
}
if (typeof data[0] != "object") {
return this.itemfilter(arr, out);
} else {
var t = this.itemfilter(arr[0], out[0]),
or = this.itemfilter(arr[1], out[1]);
return [t, or];
}
}
/**
* Calculates pearson coefficient for bivariate analysis.
* The sets must be of the same size.
* @method correlation
* @memberof stats
* @param {Object} params.data - An object containing the two data sets as set1 and set2.
* @returns {Number} Pearson coefficient.
* @example
* const data = {set1: [1,2,3], set2: [2,4,6]};
* const pearsonCoefficient = hydro1.analyze.stats.correlation({data})
*/
static correlation({ params, args, data } = {}) {
const { set1, set2 } = data;
const n = set1.length + set2.length;
const q1q2 = [];
const sq1 = [];
const sq2 = [];
for (let i = 0; i < set1.length; i++) {
q1q2[i] = set1[i] * set2[i];
sq1[i] = set1[i] ** 2;
sq2[i] = set2[i] ** 2;
}
const r1 =
n * this.sum({ data: q1q2 }) -
this.sum({ data: set1 }) * this.sum({ data: set2 });
const r2a = Math.sqrt(
n * this.sum({ data: sq1 }) - this.sum({ data: set1 }) ** 2
);
const r2b = Math.sqrt(
n * this.sum({ data: sq2 }) - this.sum({ data: set2 }) ** 2
);
return r1 / (r2a * r2b);
}
/**
* Calculates different types of efficiencies for hydrological models: Nash-Sutcliffe, Coefficient of Determination and Index of Agreement.
* Only accepts 1D array of observed and model data within the same time resolution.
* Range of validity: Nash-Sutcliffe: between 0.6-0.7 is acceptable, over 0.7 is very good.
* Determination coefficient: between 0 and 1, 1 being the highest with no dispersion between the data sets and 0 meaning there is no correlation.
* Index of agrement: between 0 and 1, with more than 0.65 being considered good.
* All efficiencies have limitations and showcase statistically the well performance of a model, but should not be considered as only variable for evaluation.
* @method efficiencies
* @memberof stats
* @param {Object} options - The options object.
* @param {Object} options.params - An object containing the type of efficiency to calculate ('NSE','determination', 'agreement', 'all').
* @param {Object} options.data - A 2D array with values arranged as [[observed], [model]].
* @param {Array} [options.args] - An optional array of additional arguments to pass to the function.
* @returns {Number|Object} - A number representing the calculated metric, or an object containing multiple metrics if 'all' is specified.
* @example
* // Calculate Nash-Sutcliffe efficiency:
* const obs = [1, 2, 3];
* const model = [1.5, 2.5, 3.5];
* const NSE = hydro.analyze.stats.efficiencies({ params: { type: 'NSE' }, data: [obs, model] });
*
* // Calculate all efficiencies:
* const metrics = hydro.analyze.stats.efficiencies({ params: { type: 'all' }, data: [obs, model] });
* // metrics = { NSE: 0.72, r2: 0.5, d: 0.62 }
*/
static efficiencies({ params, args, data } = {}) {
let { type } = params,
[obs, model] = data;
const meanobs = this.mean({ data: obs });
const meanmodel = this.mean({ data: model });
if (type === "NSE") {
const diff1 = model.map((val, i) => Math.pow(val - obs[i], 2));
const diff2 = obs.map((val) => Math.pow(val - meanobs, 2));
const NSE = 1 - this.sum({ data: diff1 }) / this.sum({ data: diff2 });
return NSE;
} else if (type === "determination") {
const diff1 = [];
const diff2 = [];
const diff3 = [];
for (let i = 0; i < obs.length; i++) {
diff1[i] = (model[i] - meanmodel) * (obs[i] - meanobs);
diff2[i] = Math.pow(model[i] - meanmodel, 2);
diff3[i] = Math.pow(obs[i] - meanobs, 2);
}
console.log(
`The values are - Upper: ${this.sum({
data: diff1,
})}, Lower: ${this.sum({ data: diff2 })} and ${this.sum({
data: diff3,
})}`
);
const r = Math.pow(
this.sum({ data: diff1 }) /
(Math.sqrt(this.sum({ data: diff2 })) *
Math.sqrt(this.sum({ data: diff3 }))),
2
);
return r;
} else if (type === "agreement") {
const diff1 = obs.map((val, i) => Math.pow(val - model[i], 2));
const diff2 = obs.map((val, i) =>
Math.pow(Math.abs(model[i] - meanobs) + Math.abs(val - meanobs), 2)
);
const d = 1 - this.sum({ data: diff1 }) / this.sum({ data: diff2 });
return d;
}
if (type === "all") {
const metrics = {
NSE: this.efficiencies({
params: { type: "NSE" },
data: [obs, model],
}),
r2: this.efficiencies({
params: { type: "determination" },
data: [obs, model],
}),
d: this.efficiencies({
params: { type: "agreement" },
data: [obs, model],
}),
};
return metrics;
}
}
/**
* Fast fourier analysis over a 1d dataset
* and see if there are any patterns within the data. Should be considered to be used
* for small data sets.
* @method fastfourier
* @memberof stats
* @param {Object} data - Contains: 1d-JS array with data as [data]
* @returns {Object[]} calculated array.
* @example
* hydro.analyze.stats.fastfourier({data: [someData]})
*/
static fastFourier({ params, args, data } = {}) {
const nearest = Math.pow(2, Math.ceil(Math.log2(data.length)));
let paddedData = tf.pad1d(data, [0, nearest - data.length]);
const imag = tf.zerosLike(paddedData);
const complexData = tf.complex(paddedData, imag);
const fft = tf.spectral.fft(complexData);
const fftResult = fft.arraySync();
return fftResult;
}
/**
* Calculates the skewness of a dataset
* @method skewness
* @memberof stat
* @param {Object} params - Contains: none
* @param {Array} data - Array of numeric values
* @returns {Number} Skewness value
* @example
* hydro.analyze.stats.skewness({data: [1, 2, 3, 4, 5]})
*/
static skewness({ params, arg, data } = {}) {
const n = data.length;
const mean = this.mean({ data: data });
const sum3 = data.reduce((acc, val) => acc + Math.pow(val - mean, 3), 0);
const stdDev = Math.sqrt(
data.reduce((acc, val) => acc + Math.pow(val - mean, 2), 0) / n
);
return (n / ((n - 1) * (n - 2))) * (sum3 / Math.pow(stdDev, 3));
}
/**
* Calculates the kurtosis of a dataset
* @method kurtosis
* @memberof stat
* @param {Object} params - Contains: none
* @param {Array} data - Array of numeri
* values
* @returns {Number} Kurtosis value
* @example
* hydro.analyze.stats.kurtosis({data: [1
* 2, 3, 4, 5]})
*/
static kurtosis({ params, args, data } = {}) {
const n = data.length;
const mean = this.mean({ data: data });
const sum4 = data.reduce((acc, val) => acc + Math.pow(val - mean, 4), 0);
const stdDev = this.stddev({ data: data });
return (
((n * (n + 1)) / ((n - 1) * (n - 2) * (n - 3))) *
(sum4 / Math.pow(stdDev, 4)) -
(3 * Math.pow(n - 1, 2)) / ((n - 2) * (n - 3))
);
}
/**
* Performs forward fill to replace missin
* values in an array with the last
* non-null value
* @method forwardFill
* @memberof stat
* @param {Object} params - Contains: none
* @param {Array} data - Array of value
* with missing entries
* @returns {Object} Object containing th
* filled data array and an array of
* replace indices
* @example hydro.analyze.stats.forwardFi
* ({data: [1, null, 3, null, null, 6]})
*/
static forwardFill({ params, args, data } = {}) {
let previousValue = null;
let replaceIndices = [];
const filledData = data.map((value, index) => {
if (value > 0) {
previousValue = value;
return value;
} else if (previousValue !== null) {
replaceIndices.push(index);
return previousValue;
} else {
// Handle the case when the first value is missing
return null;
}
});
return { data: filledData, replaceIndices };
}
/**
* Returns an array that Contains the basic statistics
* of a dataset. It is used afterwards to be prompted
* using google graphing tools.
* @method basicstats
* @memberof stats
* @param {Object} data - 1d-JS array with data arranged as [data].
* @returns {Object[]} flatenned array for the dataset.
* @example
* hydro.analyze.stats.basicstats({data: [someData]})
*/
static basicstats({ params, args, data } = {}) {
//Can pass time series data as a 2d array without additional manipulation
typeof data[0] === "object"
? (() => {
data = data[1];
data.shift();
})()
: data;
data = data.map((val) => JSON.parse(val));
//if value is outside the values required from the api call
data = data.map((val) => {
val > 99998 ? (val = 0) : val;
return val;
});
var temp = [],
values = [];
//call the basic functions for analysis.
values.push("Value");
values.push(data.length);
values.push(this.min({ data }));
values.push(this.max({ data }));
values.push(this.sum({ data }));
values.push(this.mean({ data }));
values.push(this.median({ data }));
values.push(this.stddev({ data }));
values.push(this.variance({ data }));
values.push(this.skewness({ data }));
values.push(this.kurtosis({ data }));
temp.push([
"Metric",
"Number of values",
"Minimum Value",
"Maximum value",
"Sum",
"Mean",
"Median",
"Standard deviation",
"Variance",
"Skewness",
"Kurtosis",
]);
temp.push(values);
return temp;
}
/***************************/
/*****Statistic Tests ****/
/***************************/
/**
* Mann-Kendall trend test
* Checks for mononicity of data throughout time.
* Reference: Kottegoda & Rosso, 2008.
* @method MK
* @memberof stats
* @author Alexander Michalek & Renato Amorim, IFC, University of Iowa.
* @param {Object[]} data - Contains: 1d-JS array with timeseries
* @returns {Object[]} 1d array with 3 values: p-value, value sum and z value
* @example
* hydro.analyze.stats.MK({data: [someData]})
*/
static MK({ params, args, data }) {
var flow = data,
S_sum = 0,
S = 0,
sign = 0,
z = 0,
sigma = 0;
for (var i = 0; i < flow.length - 1; i++) {
for (var j = i + 1; j < flow.length; j++) {
sign = flow[j] - flow[i];
if (sign < 0) {
S = -1;
} else if (sign == 0) {
S = 0;
} else {
S = 1;
}
S_sum = S_sum + S;
}
}
sigma = (flow.length * (flow.length - 1) * (2 * flow.length + 5)) / 18;
if (S_sum < 0) {
z = (S_sum - 1) / Math.pow(sigma, 0.5);
} else if (S_sum == 0) {
z = 0;
} else {
z = (S_sum + 1) / Math.pow(sigma, 0.5);
}
var pvalue = 2 * (1 - this.normalcdf({ data: Math.abs(z) }));
return [pvalue, S_sum, z];
}
/**
* Normal distribution
* @method normalcdf
* @memberof stats
* @author Alexander Michalek & Renato Amorim, IFC, University of Iowa.
* @param {Object[]} data - Contains: 1d-JS array with timeseries
* @returns {Object[]} 1d array with 3 values: p-value, value sum and z value
* @param {Object[]} data - 1d-JS array
* @returns {Number} number value for the distribution
* @example
* hydro.analyze.stats.normalcdf({data: [someData]})
*/
static normalcdf({ params, args, data }) {
let results = [];
for (var i = 0; i < data.length; i++) {
var X = data[i],
//HASTINGS. MAX ERROR = .000001
T = 1 / (1 + 0.2316419 * Math.abs(X)),
D = 0.3989423 * Math.exp((-X * X) / 2),
Prob =
D *
T *
(0.3193815 +
T * (-0.3565638 + T * (1.781478 + T * (-1.821256 + T * 1.330274))));
if (X > 0) {
Prob = 1 - Prob;
}
results.push(Prob);
}
return results;
}
/**
* D-statistic
* Computes D-statistic from two samples to evaluate if they come from the same distribution
* Reference: Kottegoda & Rosso, 2008.
* @method computeD
* @memberof stats
* @author Alexander Michalek & Renato Amorim, IFC, University of Iowa.
* @param {Object[]} data - 2d-JS array containing ordered as [samples_A, samples_B], with each being 1-d arrays
* @returns {Number} d-statistic of the samples
* @example
* hydro.analyze.stats.computeD({data: [samples_A, samples_B]})
*/
static computeD({ params, args, data }) {
var { sampleA, sampleB } = data,
maximumDifference = 0,
N = 1e3;
let minimum = this.min({ data: sampleA.concat(sampleB) }),
maximum = this.max({ data: sampleA.concat(sampleB) }),
N_A = sampleA.length,
N_B = sampleB.length;
for (var x of this.range({ params: { N }, data })) {
var CDF_A = sampleA.filter((d) => d <= x).length / N_A,
CDF_B = sampleB.filter((d) => d <= x).length / N_B,
difference = Math.abs(CDF_A - CDF_B);
if (difference > maximumDifference) {
maximumDifference = difference;
}
}
return maximumDifference;
}
/**
* Kolmogorov Smirnov Two-sided Test
* Calculates the P value, based on the D-statistic from the function above.
* Reference: Kottegoda & Rosso, 2008.
* @method KS_computePValue
* @memberof stats
* @author Alexander Michalek & Renato Amorim, IFC, University of Iowa
* @param {Object[]} data - 2d-JS array containing ordered as [samples_A, samples_B], with each being 1-d arrays
* @returns {Object[]} array with [p-Statistic, d-Statistic]
* @example
* hydro.analyze.stats.KS_computePValue({data: [samples_A, samples_B]})
*/
static KS_computePValue({ params, args, data }) {
var samples_A = data[0],
samples_B = data[1],
d = this.computeD({ data: [samples_A, samples_B] }),
n = samples_A.length,
m = samples_B.length,
p = 2 * Math.exp((-2 * d * d * (n * m)) / (n + m));
return [p, d];
}
/**
* Reject P statistic based on a significance level alpha.
* Reference: Kottegoda & Rosso, 2008.
* @method KS_rejectAtAlpha
* @memberof stats
* @author Alexander Michalek & Renato Amorim, IFC, University of Iowa
* @param {Object} params - contains {alpha: Number} with alpha being the significance level
* @param {Object[]} data - 2d-JS array containing ordered as [samples_A, samples_B], with each being 1-d arrays
* @returns {Number} rejection if p is less than the significance level
* @example
* hydro.analyze.stats.KS_rejectAtAlpha({params: {alpha: someAlpha}, data: [samples_A, samples_B]})
*/
static KS_rejectAtAlpha({ params, args, data }) {
let [p, d] = this.KS_computePValue({ data: data });
return p < params.alpha;
}
/**
Computes the probability density function
of a normal distribution.
@method normalDistribution
@memberof stats
@param {Object} params - Contains:
z-value.
@returns {Number} Probability density
function of the normal distribution.
@example
hydro.analyze.stats.normalDistributio
({params: {z: 1.5}})
*/
static normalDistribution({ params, args, data }) {
return Math.exp(-(Math.log(2 * Math.PI) + params.z * params.z) * 0.5);
}
/**
* Generates a random simulated number when run with a dataset
* @method runSimulation
* @author riya-patil
* @memberof stats
* @param {Object} data - passes data from an object
* @returns {Number} Returns a simulated number
* @example
* const testData = [
[1, 2, 3, 4, 5],
[6, 7, 8, 9, 10],
[11, 12, 13, 14, 15],
];
hydro.analyze.stats.simulate({data: testData})
*
*/
static runSimulation({ params, args, data } = {}) {
const { multiplier } = params || 1; //defaults to 1
const genRan = (min, max) => Math.random() * (max - min) + min;
const mean = this.mean({ data });
const std = this.stddev({ data });
const simNum = genRan(mean - std * multiplier, mean + std * multiplier);
return simNum;
}
/**
* Generates a random simulated number when run with a dataset
* @method runMonteCarlo
* @author riya-patil
* @memberof stats
* @param {Object[]} data - passes data from multiple objects
* @returns {number[]} returns an array of the simulated results
* @example
* const testData = [
[1, 2, 3, 4, 5],
[6, 7, 8, 9, 10],
[11, 12, 13, 14, 15],
];
hydro.analyze.stats.runMonteCarlo({data: testData})
*/
static runMonteCarlo({ params, args, data } = {}) {
const { iterations = 100, callback } = params || {};
// Extract iterations and callback from params
const results = [];
for (let i = 0; i < iterations; i++) {
let simResult;
if (callback) {
simResult = callback({ params, args, data });
} else {
const value = data;
simResult = this.runSimulation({ data: value });
}
results.push(simResult);
}
return results;
}
/**
* Generates a random simulated number when run with a dataset
* @method runVegas
* @author riya-patil
* @memberof stats
* @param {Object[]} data - passes data from multiple objects
* @returns {number[]} returns an array of the simulated results
* @example
* const testData = [
[1, 2, 3, 4, 5],
[6, 7, 8, 9, 10],
[11, 12, 13, 14, 15],
];
hydro.analyze.stats.runVegas({data: testData})
*/
static runVegas({ params, args, data } = {}) {
const { iterations = 100, callback } = params || {};
// Extract iterations and callback from params
const results = [];
for (let i = 0; i < iterations; i++) {
let simResult;
if (callback) {
simResult = callback({ params, args, data });
} else {
// Implementation details for the simulation without a callback
for (let value of data) {
const simValue = this.runSimulation({ data: value });
results.push(simValue);
}
}
}
return results;
}
/**
* Computes the probability density function (PDF) of a Gaussian (normal) distribution
* @method gaussianDist
* @author riya-patil
* @memberof stats
* @param {Object} params - x (value at which to compute the PDF), mean (mean of the distribution),
* and stddev (standard deviation of the distribution)
* @returns {Number} Probability density function of the Gaussian distribution
* @example
* const testData = {
x: 2.5,
mean: 3,
stddev: 1.2
};
hydro.analyze.stats.gaussianDist({params: testData});
*/
static gaussianDist({ params, args, data }) {
const { x, mean, stddev } = params;
const exponent = -((x - mean) ** 2) / (2 * stddev ** 2);
const coefficient = 1 / (stddev * Math.sqrt(2 * Math.PI));
return coefficient * Math.exp(exponent);
}
/**
* Probability mass function (PMF) of a Bernoulli distribution
* @method bernoulliDist
* @author riya-patil
* @memberof stats
* @param {Object} params - Contains: x (value at which to compute the PMF) and p (probability of success)
* @returns {Number} Probability mass function of the Bernoulli distribution
*
*/
static bernoulliDist({params, args, data} = {}) {
const { f, s } = params; //f = failure, s = success
if (f === 0) {
return 1 - s;
} else if (f === 1) {
return s;
} else {
return 0;
}
}
/**
* Computes the probability density function (PDF) of the Generalized Extreme Value (GEV) distribution.
* @method gevDistribution
* @author riya-patil
* @memberof stats
* @param {Object} params - Contains:
* x (value at which to compute the PDF), mu (location parameter),
* sigma (scale parameter), xi (shape parameter).
* @returns {Number} Probability density function of the GEV distribution.
*/
static gevDistribution({params, args, data} = {}) {
const { x, mu, sigma, xi } = params;
const z = (x - mu) / sigma;
if (xi === 0) {
// Calculate the PDF for the Gumbel distribution
const exponent = -z - Math.exp(-z);
return Math.exp(exponent) / sigma;
} else {
// Calculate the PDF for the Generalized Extreme Value (GEV) distribution
const firstTerm = Math.pow(1 + xi * z, -(1 / xi + 1));
const secondTerm = Math.exp((-(1 + xi * z)) ** (-1 / xi));
return firstTerm * secondTerm / sigma;
}
}
/**
* Calculates the probability mass function (PMF) of a Geometric Distribution
* @method geometricDist
* @author riya-patil
* @memberof stats
* @param {Object} params - Contains the probability of success "s" (where 0 <= s <= 1) as a parameter.
* @param {Number} args - Contains the number of trials until the first success trials (trials >= 1)
* @returns {Number} The probability of getting the first success on the n-th trial
* @example
* hydro.analyze.stats.geometricDist({ params: { s: 0.5 }, args: { trials: 3 }, data: [] });
* 0.125
*/
static geometricDist({params, args, data} = {}) {
const { s } = params || 1;
const { trials } = args;
if (trials < 1) {
return 0;
}
return (1 - s) ** (trials - 1) * s;
}
/**
* Calculates the probability mass function (PMF) of a Binomial Distribution.
* @method binomialDist
* @author riya-patil
* @memberof stats
* @param {Object} params - Contains the number of trials 'n' (n >= 0) and the probability of success
* @param {Object} args - Contains the number of successes 's'
* @param {Array} data - Empty array as no data is needed for this calculation.
* @returns {Number} The probability of getting exactly 's' successes in trials.
* @example
* hydro.analyze.stats.binomialDist({ params: { trials: 10, probSuccess: 0.5 }, args: { s: 3 });
*/
static binomialDist({params, args, data} = {}) {
const { trials, probSuccess } = params;
const { s } = args;
if (s < 0 || s > trials) {
return 0;
}
const coefficient = this.binomialCoefficient(trials, s);
const probability = coefficient * (probSuccess ** s) * ((1 - probSuccess) ** (trials - s));
return probability;
}
/**
* Multinomial Distribution - Generates random samples from a multinomial distribution.
* @method multinomialDistribution
* @author riya-patil
* @memberof stats
* @param {Object} params probabilities: 1D array of probabilities for each category; n: Number of samples to generate
* @returns {Object} samples: 2D array of generated samples, where each row represents a sample and each column represents a category
* frequencies: 2D array of frequencies for each category in the generated samples
* @example
* const multinomialData = {
* probabilities: [0.2, 0.3, 0.5],
* n: 100
* };
* hydro.analyze.stats.multinomialDistribution({ params: multinomialData });
*/
static multinomialDistribution({params, args, data} = {}) {
const { probabilities, n } = params;
const numCategories = probabilities.length;
const samples = [];
const frequencies = [];
for (let i = 0; i < n; i++) {
const sample = [];
let remainingProb = 1;
for (let j = 0; j < numCategories - 1; j++) {
const prob = probabilities[j];
const rand = Math.random() * remainingProb;
const count = Math.floor(rand / prob);
sample.push(count);
remainingProb -= count * prob;
}
const lastCategoryCount = Math.floor(remainingProb / probabilities[numCategories - 1]);
sample.push(lastCategoryCount);
samples.push(sample);
frequencies.push(this.frequency({data : sample}));
}
return { samples, frequencies };
}
/** Calculates the probability mass function (PMF) of the Log series Distribution
* @method LogSeriesDistribution
* @author riya-patil
* @memberof stats
* @param {Object} params - Contains the parameter 'probSuccess' which represents the probability of success in a single trial.
* @param {Object} args - Contains the argument 'trials' (trials >= 1) which represents the number of trials.
* @returns {Number} Probability of achieving the first success in # of trials.
* @example
* hydro.analyze.stats.logSeriesDist({params: {probSuccess: 0.2, trials: 3}})
*/
static logSeriesDist({params, args, data} = {}) {
const { probSuccess, trials } = params;
if (trials < 1) {
return 0;
}
const pmf = -Math.log(1 - probSuccess) * Math.pow(probSuccess, trials) / trials;
return pmf;
}
/** Calculates the probability density function (PDF) of the Lognormal Distribution
* @method lognormalDist
* @author riya-patil
* @memberof stats
* @param {Object} params - Contains the parameters 'mu' and 'sigma' which represent the mean and standard deviation of the associated normal distribution.
* @param {Object} args - Contains the argument 'x' which represents the value at which to evaluate the PDF.
* @returns {Number} Probability density at 'x' in the Lognormal Distribution.
* @example
* hydro.analyze.stats.lognormalDist({params: { mu: 0, sigma: 1 }, args: { x: 2 }})
*/
static lognormalDist({params, args, data} = {}) {
const { mu, sigma } = params;
const { x } = args;
if (x <= 0) {
return 0;
}
const exponent = -((Math.log(x) - mu) ** 2) / (2 * sigma ** 2);
const pdf = (1 / (x * sigma * Math.sqrt(2 * Math.PI))) * Math.exp(exponent);
return pdf;
}
/** Calculates the probability density function (PDF) of the Gumbel Distribution
* @method gumbelDist
* @author riya-patil
* @memberof stats
* @param {Object} params - Contains the parameters 'mu' (location parameter) and 'beta' (scale parameter).
* @returns {Number} Probability density at the given value 'x'.
* @example
* hydro.analyze.stats.gumbelDist({ params: { mu: 0, beta: 1, x: 2}})
*/
static gumbelDist({params, args, data} = {}) {
const { mu, beta } = params;
const { x } = args;
const z = (x - mu) / beta;
const pdf = (1 / beta) * Math.exp(-(z + Math.exp(-z)));
return pdf;
}
/** Calculates the probability density function (PDF) of the Uniform Distribution
* @method uniformDist
* @author riya-patil
* @memberof stats
* @param {Object} params - Contains the parameters 'a' (lower bound) and 'b' (upper bound).
* @param {Object} args - Contains the argument 'x' at which to evaluate the PDF.
* @returns {Number} Probability density at the given value 'x'.
* @example
* hydro.analyze.stats.uniformDist({ params: { a: 0, b: 1 }, args: { x: 0.5 } })
*/
static uniformDist({params, args, data} = {}) {
const { a, b } = params;
const { x } = args;
if (x >= a && x <= b) {
const pdf = 1 / (b - a);
return pdf;
} else {
return 0;
}
}
/** Calculates the Simple Moving Average of a given data set
* @method simpleMovingAverage
* @author riya-patil
* @memberof stats
* @param {Object} params - Contains the parameter 'windowSize' which specifies the size of the moving average window.
* @param {Object} data - Contains the array of data points.
* @returns {Array} Array of moving average values.
* @example
* const data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
* const windowSize = 3;
* hydro.analyze.stats.simpleMovingAverage({ params: { windowSize }, data });
*/
static simpleMovingAverage({params, args, data} = {}) {
const { windowSize } = params;
if (windowSize <= 0 || windowSize > data.length) {
throw new Error("Invalid window size.");
}
const movingAverage = [];
for (let i = 0; i <= data.length - windowSize; i++) {
const window = data.slice(i, i + windowSize);
const sum = window.reduce((total, value) => total + value, 0);
const average = sum / windowSize;
movingAverage.push(average);
}
return movingAverage;
}
/**
* Calculates the Linear Moving Average (LMA) of a given dataset.
* @method linearMovingAverage
* @author riya-patil
* @memberof stats
* @param {Number} params - Contains the windowSize parameter.
* @param {Array} data - 1D array of numerical values.
* @returns {Array} Array of moving averages.
* @throws {Error} If the window size is invalid.
* @example
* const windowSize = 5;
* const data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
* hydro.analyze.stats.linearMovingAverage({ windowSize, data });
*/
static linearMovingAverage({params, args, data} = {}) {
const { windowSize } = params;
if (windowSize <= 0 || windowSize > data.length) {
throw new Error("Invalid window size.");
}
const movingAverage = [];
let sum = 0;
for (let i = 0; i < windowSize; i++) {
sum += data[i];
}
movingAverage.push(sum / windowSize);
for (let i = windowSize; i < data.length; i++) {
sum += data[i] - data[i - windowSize];
movingAverage.push(sum / windowSize);
}
return movingAverage;
}
/**
* Computes the Exponential Moving Average (EMA) for a given dataset.
* @method exponentialMovingAverage
* @author riya-patil
* @memberof stats
* @param {Object} args - Contains the dataset as 'data' (1D JavaScript array) and the 'alpha' value (smoothing factor between 0 and 1)
* @returns {number[]} The Exponential Moving Average (EMA) values for the dataset
* @example
*const dataset= [1,2,3,4,5]
*const params={alpha: 0.5}
*hydro.analyze.stats.exponentialMovingAverage({params, data});
*/
static exponentialMovingAverage({params, args, data} = {}) {
const { alpha } = params;
const { dataset } = data;
const emaValues = [];
let ema = dataset[0];
for (let i = 1; i < dataset.length; i++) {
ema = alpha * dataset[i] + (1 - alpha) * ema;
emaValues.push(ema);
}
return emaValues;
}
/**
* Generates a sequence of events representing a Poisson Process
* @method poissonProcess
* @author riya-patil
* @memberof stats
* @param {Object} params - Contains the type of Poisson process ('homogeneous' or 'nonhomogeneous') and the time period 'T'.
* @param {Object} args - Additional arguments depending on the type of Poisson process.
* @returns {Array} Array of event times.
* @example
* hydro.analyze.stats.poissonProcess({ params: { type: 'homogeneous', T: 10 }, args: { lambda: 2 } });
* hydro.analyze.stats.poissonProcess({ params: { type: 'nonhomogeneous', T: 10 }, args: { rateFunction: (t) => Math.sin(t) } });
*/
static poissonProcess({params, args, data} = {}) {
const { type = 'homogeneous', T } = params;
const { lambda, rateFunction } = args;
const events = [];
let t = 0;
while (t < T) {
const rand = Math.random();
const interTime = (type === 'homogeneous' ? -1 / lambda : -1 / rateFunction(t)) * Math.log(1 - rand);
t += interTime;
if (t < T) {
if (type === 'homogeneous' || Math.random() <= rateFunction(t) / rateFunction(T)) {
events.push(t);
}
}
}
return events;
}
/**
* Generates random numbers following a Log Pearson Type III distribution.
* @method logPearsonTypeIII
* @author riya-patil
* @memberof stats
* @param {Object} params - Contains the parameters 'mu' 'sigma' and 'gamma'.
* @param {Number} size - Number of random numbers to generate.
* @returns {Array} Array of random numbers following the Log Pearson Type III distribution.
* @example
* hydro.analyze.stats.logPearsonTypeIII({ params: { mu: 1, sigma: 2, gamma: 3 }, args: { size: 100 } })
*/
static logPearsonTypeIII({params, args, data} = {}) {
const { mu, sigma, gamma } = params;
const { size } = args || 10;
const randomNumbers = [];
for (let i = 0; i < size; i++) {
const rand1 = Math.random();
const rand2 = Math.random();
const x = Math.sqrt(-2 * Math.log(rand1)) * Math.cos(2 * Math.PI * rand2);
const y = mu + sigma * (x / Math.sqrt(gamma));
const z = Math.exp(y);
randomNumbers.push(z);
}
return randomNumbers;
}
/**
* Generates random numbers from within a Box Plot distribution.
* @method boxPlotDistribution
* @author riya-patil
* @memberof stats
* @param {Object} params - Contains the parameters 'min', 'q1', 'median', 'q3', 'max'.
* @param {Object} args - Contains the argument 'size' for the number of random numbers to generate.
* @returns {Array} Array of random numbers following the Box Plot Distribution.
* @example
* hydro.analyze.stats.boxPlotDistribution({ params: { min: 1, q1: 2, median: 3, q3: 4, max: 5 }, args: { size: 100 }})
*/
static boxPlotDistribution({params, args, data} = {}) {
const { min, q1, median, q3, max } = params;
const { size } = args;
const randomNumbers = [];
for (let i = 0; i < size; i++) {
const randNum = Math.random();
let value;
if (randNum < 0.25) {
value = min - (q1 - min) * Math.random();
} else if (randNum < 0.5) {
value = q1 + (median - q1) * Math.random();
} else if (randNum < 0.75) {
value = median + (q3 - median) * Math.random();
} else {
value = q3 + (max - q3) * Math.random();
}
randomNumbers.push(value);
}
return randomNumbers;
}
/**
* Mean Squared Error (MSE) Estimation - Computes the Mean Squared Error (MSE) between two datasets
* @method meanSquaredError
* @author riya-patil
* @memberof stats
* @param {Object} args - Contains the two datasets as 'data1' and 'data2' (1D JavaScript arrays)
* @returns {number} The Mean Squared Error (MSE) between the two datasets
* @example
* const dataset1 = [1, 2, 3, 4, 5];
* const dataset2 = [1.5, 2.8, 3.6, 4.2, 4.9];
* hydro.analyze.stats.meanSquaredError({ data: { data1: dataset1, data2: dataset2 } });
*/
static meanSquaredError({params, args, data} = {}) {
const { data1, data2 } = data;
if (data1.length !== data2.length) {
throw new Error('Datasets need to have same length');
}
const n = data1.length;
let sum = 0;
for (let i = 0; i < n; i++) {
const diff = data1[i] - data2[i];
sum += diff * diff;
}
const mse = sum / n;
return mse;
}
/**
* Return Period - Calculates the return period for a given probability of occurrence.
* @method returnPeriod
* @author riya-patil
* @memberof stats
* @param {Object} params - probability: Probability of occurrence (between 0 and 1)
* @returns {Number} Return period corresponding to the given probability
* @example
* const returnPeriodData = {
* probability: 0.1
* };
* hydro.analyze.stats.returnPeriod({ params: returnPeriodData });
*/
static returnPeriod({params, args, data} = {}) {
const { probability } = params;
if (probability <= 0 || probability >= 1) {
throw new Error("Probability must be between 0 and 1 (exclusive).");
}
return 1 / probability;
}
/**
* Performs differencing on a time series dataset to remove trend or seasonality from the data
* @method differencing
* @author riya-patil
* @memberof stats
* @param {Object} params - Contains the order parameter
* @param {Array} data - 1D array of numerical values representing a time series
* @returns {Array} Differenced time series
* @throws {Error} If the order is invalid
* @example
* const order = 1;
* const timeSeries = [1, 3, 6, 10, 15];
* const differencedSeries = stats.differencing({ order, data: timeSeries });
*/
static differencing({params, args, data} = {}) {
const order = params.order;
const timeSeries = data;
if (order >= timeSeries.length) {
throw new Error('Invalid order for differencing.');
}
const differencedSeries = timeSeries.slice(order).map((value, i) => value - timeSeries[i]);
return differencedSeries
}
/**
* Computes the variance of residuals in a regression model to detect heteroskedasticity
* @method residualVariance
* @author riya-patil
* @memberof stats
* @param {Array} data - 1D array of numerical values representing the residuals.
* @returns {number} Variance of residuals
* @returns {Error} if not given valid array of residuals or not given correct number of arrays
* @example
* const residuals = [1.5, -2.1, 0.8, -1.2, 2.5];
* const variance = stats.residualVariance({ data: residuals });
*/
static residualVariance({params, args, data} = {}) {
const residuals = data;
if (!Array.isArray(residuals)) {
throw new Error('Invalid data. Expecting an array of residuals.');
}
if (residuals.length < 2) {
throw new Error('Insufficient data. Expecting an array of at least 2 residuals.');
}
const squaredResiduals = residuals.map((residual) => residual * residual);
const variance = squaredResiduals.reduce((sum, value) => sum + value, 0) / squaredResiduals.length;
return variance;
}
/**
* Computes the coefficients of a linear regression model.
* @method regression
* @author riya-patil
* @memberof stats
* @param {Object} data - Object containing predictor variables (X) and target variable (y).
* @returns {Array} Coefficients of the linear regression model.
* @example
* const X = [[1, 2], [3, 4], [5, 6]];
* const y = [3, 5, 7];
* hydro.analyze.stats.regression({ data: { X, y } });
*/
static regression({params, args, data} = {}) {
const X = data.X; // Matrix of predictor variables
const y = data.y; // Array of target variable
const XWithIntercept = X.map((row) => [1, ...row]);
const Xint = multiplyMatrix(transposeMatrix(XWithIntercept), XWithIntercept);
const Yint = multiplyMatrix(transposeMatrix(XWithIntercept), y);
const inverseXtX = matrixInverse(Xint);
const coefficients = multiplyMatrix(inverseXtX, Yint);
return coefficients;
}
/**
* Performs multivariate regression analysis
* @method multiregression
* @author riya-patil
* @memberof stats
* @param {Object} data - Data for the multivariate regression
* @returns {Array} Coefficients of the linear regression model.
* @example
* const X = [[1, 2], [3, 4], [5, 6]];
* const y = [3, 5, 7];
* hydro.analyze.stats.multiregression({ data: { X, y } });
*/
static multiregression({params, args, data} = {}) {
const X = data.X; // Matrix of predictor variables
const Y = data.Y; // Array of target variables
const coefficients = [];
for (let i = 0; i < Y.length; i++) {
const y = Y[i];
const regressionData = { X, y };
const coefficient = this.regression({ data: regressionData });
coefficients.push(coefficient);
}
return coefficients;
}
/**
* Performs White's test for heteroscedasticity
* @method whitesTest
* @author riya-patil
* @param {Object} params - Parameters for the test, errors is array of residuals while regressors is array of regressor vars
* @returns {Object} Object containing test statistic and p-value
* @throws {Error} If the input arrays have different lengths
* @example
* const params = {
* errors: [1, 2, 3, 4, 5],
* regressors: [[1, 1], [2, 1], [3, 1], [4, 1], [5, 1]]
* };
* hydro.analyze.stats.whitesTest({ params });
*/
static whitesTest({ params }) {
const { errors, regressors } = params;
if (errors.length !== regressors.length) {
throw new Error("Input arrays must have the same length.");
}
const n = errors.length;
const k = regressors[0].length; // Number of regressors (variables)
let XX = 0;
let XE = 0;
let EE = 0;
for (let i = 0; i < n; i++) {
const error = errors[i];
const regressor = regressors[i];
XX += this.dotProduct(regressor, regressor);
XE += this.dotProduct(regressor, error); // Adjust for the size of the regressors
EE += error ** 2;
}
const testStatistic = n * (XE ** 2) / (XX * EE);
const pValue = 1 - this.chisqCDF(testStatistic, k);
return { testStatistic, pValue };
}
/**
* Performs the Breusch-Pagan test for heteroscedasticity.
* @method breuschPaganTest
* @author riya-patil
* @memberof stats
* @param {Object} params errors: Array of residuals, regressors: Array of regressor variables
* @returns {Object} Object containing test statistic and p-value.
* @throws {Error} If the input arrays have different lengths.
* @example
* const params = {
* errors: [1, 2, 3, 4, 5],
* regressors: [[1, 1], [2, 1], [3, 1], [4, 1], [5, 1]]
* };
* hydro.analyze.stats.breuschPaganTest({ params });
*/
static breuschPaganTest({ params }) {
const { errors, regressors } = params;
if (errors.length !== regressors.length) {
throw new Error("Input arrays must have the same length.");
}
const n = errors.length;
const k = regressors[0].length;
let residualsSquared = [];
for (let i = 0; i < n; i++) {
const error = errors[i];
residualsSquared.push(error ** 2);
}
let XX = 0;
let XR = 0;
let RR = 0;
for (let i = 0; i < n; i++) {
const regressor = regressors[i];
const residualSquared = residualsSquared[i];
const dotProduct = dotProduct(regressor, regressor);
XX += dotProduct;
XR += dotProduct * residualSquared;
RR += residualSquared ** 2;
}
const testStatistic = (n / 2) * (Math.log(XR) - (1 / n) * Math.log(XX));
const pValue = 1 - chisqCDF(testStatistic, k);
return { testStatistic, pValue };
}
/**
* Performs Goldfeld-Quandt test for heteroscedasticity
* @method goldfeldQuandtTest
* @author riya-patil
* @param {Object} params - residuals (Array of residuals from a regression model), independentVar (Array of values of the independent variable)
* @returns {Object} Object containing test statistic and p-value
* @throws {Error} If the input arrays have different lengths
* @example
* const residuals = [1.2, 2.3, 0.8, 1.9, 1.5, 2.6];
* const independentVar = [3, 4, 5, 6, 7, 8];
* const result = stats.goldfeldQuandtTest({ params: { residuals, independentVar } });
* console.log(result);
*/
static goldfeldQuandtTest({ params, args, data }) {
const { residuals, independentVar } = params;
if (residuals.length !== independentVar.length) {
throw new Error("Input arrays must have the same length.");
}
const n = residuals.length;
const k = Math.floor(n * 0.4); // 40% of the data in each subset
const sortedIndices = independentVar.map((_, index) => index).sort((a, b) => independentVar[a] - independentVar[b]);
const lowSubsetIndices = sortedIndices.slice(0, k);
const highSubsetIndices = sortedIndices.slice(-k);
const lowResiduals = lowSubsetIndices.map((index) => residuals[index]);
const highResiduals = highSubsetIndices.map((index) => residuals[index]);
const testStatistic = (Math.max(...highResiduals) ** 2) / (Math.min(...lowResiduals) ** 2);
const pValue = 1 - chisqCDF(testStatistic, k - 1);
return { testStatistic, pValue };
}
/**
* Generates a random simulated number when run with a dataset
* @method runMarkovChainMonteCarlo
* @author riya-patil
* @memberof stats
* @param {Object[]} data - passes data from multiple objects
* @returns {number[]} returns an array of the simulated results
* @example
* const options = {
params: {
iterations: 100,
},
data: {
initialState,
transitionMatrix,
},
};
hydro.analyze.stats.runMarkovChainMonteCarlo(options);
*/
static runMarkovChainMonteCarlo({ params, args, data } = {}) {
const { iterations = 100, callback } = params || {};
const results = [];
let currentState = data.initialState;
for (let i = 0; i < iterations; i++) {
let nextState;
if (callback) {
nextState = callback({ params, args, data, currentState });
} else {
nextState = getNextState(data.transitionMatrix, currentState);
}
results.push(nextState);
currentState = nextState;
}
return results;
}
/***************************/
/***** Helper functions ****/
/***************************/
/**
* Preprocessing tool for joining arrays for table display.
* @method joinarray
* @memberof stats
* @param {Object} data - 2d-JS array as [[someData1], [someData2]]
* @returns {Object[]} Array for table display.
* @example
* hydro.analyze.stats.joinarray({data: [[someData1], [someData2]]})
*/
static joinarray({ params, arg, data } = {}) {
var temp = [];
for (var i = 1; i < data[0].length; i++) {
if (!temp[i]) {
temp[i] = [];
}
temp[i] = [data[0], data[1]].reduce((a, b) => a.map((v, i) => v + b[i]));
}
return temp;
}
/**
* Helper function for preparing nd-JSarrays for charts and tables for duration/discharge.
* @method flatenise
* @memberof stats
* @param {Object} params - Contains: columns (nd-JS array with names as [someName1, someName2,...])
* @param {Object} data - Contains: nd-JS array object required to be flatenned as [[somedata1, somedata2,...],[somedata1, somedata2,...],...]
* @returns {Object[]} Flatenned array.
* @example
* hydro.analyze.stats.flatenise({params:{columns: [someName1, someName2,...]},
* data: [[somedata1, somedata2,...],[somedata1, somedata2,...],...]})
*/
static flatenise({ params, args, data } = {}) {
var x = params.columns;
var d = data;
var col = [];
var dat = [];
for (var i = 0; i < x.length; i++) {
col.push(x[i]);
}
for (var j = 0; j < d.length; j++) {
dat.push(d[j].flat());
}
return [col, dat];
}
/**
* Turns data from numbers to strings. Usable when
* retrieving data or uploading data.
* @method numerise
* @memberof stats
* @param {Object} data - Contains: 1d-JS array with data composed of strings as [dataValues].
* @returns {Object[]} Array as numbers.
* @example
* hydro.analyze.stats.numerise({data: [someValues]})
*/
static numerise({ params, args, data } = {}) {
var result = data.map((x) => parseFloat(x));
return result;
}
/**
* Filters out items in an array that are undefined, NaN, null, ", etc.
* @method cleaner
* @memberof stats
* @param {Object} data - Contains: 1d-JS array with data to be cleaned as [dataValues]
* @returns {Object[]} Cleaned array.
* @example
* hydro.analyze.stats.cleaner({data: [someValues]})
*/
static cleaner({ params, args, data } = {}) {
var x = data.filter((x) => x === undefined || !Number.isNaN(x));
return x;
}
/**
* Filters out items in an array based on another array.
* @method itemfilter
* @memberof stats
* @param {Object} data - Contains: 2d-JS array with data to be kept and removed as [[dataKept],[dataRemoved]]
* @returns {Object[]} Cleaned array.
* @example
* hydro.analyze.stats.itemfilter({data: [[dataKept], [dataRemoved]]})
*/
static itemfilter({ params, args, data } = {}) {
var x = data[0].filter((el) => !data[1].includes(el));
return x;
}
/**
* Changes a 1d-date array into local strings. Used mainly
* for changing displaying into google charts.
* @method dateparser
* @memberof stats
* @param {Object[]} data - Contains: 1-dJS array with date values as [dateValue].
* @returns {Object[]} Array with date parsed.
* @example
* hydro.analyze.stats.dateparser({data: [someValues]})
*/
static dateparser({ params, args, data } = {}) {
var x = this.copydata({ data: data });
var xo = [];
for (var j = 0; j < data.length; j++) {
xo.push(new Date(data[j]).toLocaleString());
}
return xo;
}
/**
* Changes a m x n matrix into a n x m matrix (transpose).
* Mainly used for creating google charts. M != N.
* @method arrchange
* @memberof stats
* @param {Object} data - Contains: nd-JS array representing [[m],[n]] matrix.
* @returns {Object[]} [[n],[m]] array.
* @example
* hydro.analyze.stats.arrchange({data: [[someSize1],[someSize2]]})
*/
static arrchange({ params, args, data } = {}) {
var x = this.copydata({ data: data });
var transp = (matrix) => matrix[0].map((x, i) => matrix.map((x) => x[i]));
return transp(x);
}
/**
* Pushes at the end of an array the data of another array.
* @method push
* @memberof stats
* @param {Object} data - Contains: 2d-JS array with data arranged as [[willPush],[pushed]]
* @returns {Object[]} Last dataset with pushed data.
* @example
* hydro.analyze.stats.push({data: [[dataSet1], [dataSet2]]})
*/
static push({ params, args, data } = {}) {
for (var j = 0; j < data[1].length; j++)
for (var i = 0; i < data[0].length; i++) {
data[0][j].push(data[1][j][i]);
}
return arr1;
}
/**
* Generates an array of random integers within a specified range.
* @method generateRandomData
* @memberof stats
* @param {number} size - The number of
* elements in the array to be generated.
* @param {number} [range=100] - The upper
* limit (exclusive) of the range from which
* the random integers will be generated.
* @returns {number[]} An array of random
* integers between 0 and range-1, of length
* size.
* @example
* hydro.analyze.stats.generateRandomDat
* (10, 50) // returns [23, 48, 15, 9, 36,
* 28, 39, 18, 20, 22]
*/
static generateRandomData(size, range = 100) {
let data = [];
for (let i = 0; i < size; i++) {
data.push(Math.floor(Math.random() * range)); // generates a random integer between 0 and 99
}
return data;
}
/**
* **Still needs some testing**
* Compute the autocovariance matrix from the autocorrelation values
* @method autocovarianceMatrix
* @author riya-patil
* @memberof stats
* @param {Object} data - array with autocorrelation values
* @param {number} params - number of lags
* @returns {Object} Autocovariance matrix
* @example
* const acTestData = [1, 0.7, 0.5, 0.3];
* const lags = 2
* hydro.analyze.stats.autocovarianceMatrix({params: lag, data : actestData});
*/
static autocovarianceMatrix({ params, args, data } = {}) {
const { lag, lags } = params || { lag: 2, lags: 2 };
const length = data.length;
const mean = this.mean({ data });
const autocorrelation = [];
const matrix = [];
for (let l = 0; l <= lag; l++) {
let sum = 0;
for (let t = l; t < length; t++) {
sum += (data[t] - mean) * (data[t - l] - mean);
}
autocorrelation.push(sum / ((length - l) * this.variance({ data })));
}
for (let i = 0; i <= lags; i++) {
const row = [];
for (let j = 0; j <= lags; j++) {
const ac = autocorrelation[Math.abs(i - j)];
row.push(i === j ? 1 : ac);
}
matrix.push(row);
}
return { autocorrelation, matrix };
}
/**
* Calculates the binomial coefficient (n choose k format)
* @method binomialCoefficient
* @author riya-patil
* @memberof stats
* @param {Number} trials - The number of trials
* @param {Number} s - The number of successes
* @returns {Number} The binomial coefficient (trials choose s)
*/
static binomialCoefficient(trials, s) {
if (s === 0 || s === trials) {
return 1;
}
if (s > trials) {
return 0;
}
let coefficient = 1;
for (let i = 1; i <= s; i++) {
coefficient *= (trials - i + 1) / i;
}
return coefficient;
}
/**
* Multiplies two matrices
* @method multipleMatrix
* @author riya-patil
* @memberof stats
* @param {Array} matrix1 - First matrix
* @param {Array} matrix2 - Second matrix
* @returns {Array} Result of matrix multiplication
* @example
* const matrix1 = [[1, 2], [3, 4]];
* const matrix2 = [[5, 6], [7, 8]];
* hydro.analyze.stats.multiplyMatrix(matrix1, matrix2)
*/
static multiplyMatrix(matrix1, matrix2) {
const m1Rows = matrix1.length;
const m1Cols = matrix1[0].length;
const m2Cols = matrix2[0].length;
const result = [];
for (let i = 0; i < m1Rows; i++) {
result[i] = [];
for (let j = 0; j < m2Cols; j++) {
let sum = 0;
for (let k = 0; k < m1Cols; k++) {
sum += matrix1[i][k] * matrix2[k][j];
}
result[i][j] = sum;
}
}
return result;
}
/**
* Transposes a matrix
* @method transposeMatrix
* @author riya-patil
* @memberof stats
* @param {Array} matrix - Matrix to transpose
* @returns {Array} Transposed matrix
* @example
* const matrix = [[1, 2, 3], [4, 5, 6]];
* hydro.analyze.stats.transposeMatrix(matrix)
*/
static transposeMatrix(matrix) {
const rows = matrix.length;
const cols = matrix[0].length;
const transposed = [];
for (let j = 0; j < cols; j++) {
transposed[j] = [];
for (let i = 0; i < rows; i++) {
transposed[j][i] = matrix[i][j];
}
}
return transposed;
}
/**
* Computes the inverse of a matrix
* @method matrixInverse
* @author riya-patil
* @memberof stats
* @param {Array} matrix - Matrix to compute inverse of
* @returns {Array} Inverse of the matrix
* @example
* const matrix = [[1, 2, 3], [4, 5, 6]];
* hydro.analyze.stats.matrixInverse(matrix)
*/
static matrixInverse(matrix) {
const n = matrix.length;
const inv = [];
const inversed = [];
for (let i = 0; i < n; i++) {
inversed[i] = [];
inv[i] = [];
for (let j = 0; j < n; j++) {
inversed[i][j] = i === j ? 1 : 0;
inv[i][j] = matrix[i][j];
}
}
for (let k = 0; k < n; k++) {
const pivot = inv[k][k]; //elimatination to obtain inversed matrix
for (let j = 0; j < n; j++) {
inv[k][j] /= pivot;
inversed[k][j] /= pivot;
}
//row operations to eliminate other elements
for (let i = 0; i < n; i++) {
if (i !== k) {
const factor = inv[i][k];
for (let j = 0; j < n; j++) {
inv[i][j] -= factor * inv[k][j];
inversed[i][j] -= factor * inversed[k][j];
}
}
}
}
return inversed;
}
/**
* Calculates the cumulative distribution function (CDF) of the chi-square distribution
* NOTE: This will require revision in the future, readjusting considering lookups or fitting to a gamma distribution instead
* @method chisqCDF
* @author riya-patil
* @memberof stats
* @param {number} x The value at which to evaluate the CDF
* @param {number} k The degrees of freedom
* @returns {number} The cumulative probability
* @example
* const x = 10
* const df = 20
* hydro.analyze.stats.chisCDF(10, 20)
*/
static chisqCDF(x, k) {
let term = Math.exp(-x / 2);
let sum = term;
for (let i = 1; i < k; i++) {
let prevTerm = term;
term *= x / (2 * (i + 1));
sum += term;
if (term === prevTerm) break;
}
return 1 - sum;
}
/**
* Calculates the dot product of two vectors. Both vectors should be represented as 1D JS arrays with the same length
* @method dotProduct
* @author riya-patil
* @param {Array} a - The first vector
* @param {Array} b - The second vector
* @returns {number} The dot product
* @throws {Error} If the input vectors have different lengths
* @example
* const a = [1, 2, 3, 4, 5]
* const b = [10, 20, 30, 40, 50]
* hydro.analyze.stats.dotProduct(a,b)
*/
static dotProduct(a, b) {
if (a.length != b.length) {
throw new Error("Input vectors must have the same length.");
}
let result = 0;
for (let i = 0; i < a.length; i++) {
result += a[i] * b[i];
}
return result;
}
/**
* Gets the next state based on the transition probabilities defined in the transition matrix.
* @method getNextState
* @author riya-patil
* @memberof stats
* @param {number[][]} transitionMatrix transition matrix representing the probabilities of transitioning between states.
* @param {number} currentState current state of the function
* @returns {number} Next state selected based on the transition probabilities.
* @example
* const transitionMatrix = [
[0.2, 0.8],
[0.5, 0.5],
];
const initialState = 0;
*/
static getNextState(transitionMatrix, currentState) {
const randomValue = Math.random();
let cumulativeProbability = 0;
for (let i = 0; i < transitionMatrix[currentState].length; i++) {
cumulativeProbability += transitionMatrix[currentState][i];
if (randomValue <= cumulativeProbability) {
return i;
}
}
// If no state is selected, return the current state as a fallback
return currentState;
}
/**********************************/
/*** End of Helper functions **/
/**********************************/
}