Source code for isopy.toolbox.regress

from matplotlib.axes import Axes as _Axes
from matplotlib.patches import Polygon as _Polygon
from scipy import stats
import numpy as np
from isopy import core
from isopy import toolbox
from isopy import array_functions


__add__ = ['linregress', 'yorkregress', 'yorkregress2', 'yorkregress3', 'yorkregress95', 'yorkregress99']

import isopy.checks


[docs]def linregress(x, y): """ Calculate a linear least-squares regression for two sets of measurements. Uses ``scipy.stats.linregress(x, y)`` for the regression. See `here<https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.linregress.html>`_ for the scipy documentation. Parameters ---------- x, y : numpy_array_like Data points through which the regression should be calculated. Returns ------- result : LinregressResult The returned object contains the following attributes: * **slope** - Slope of the regression line. * **intercept** - Intercept of the regression line. * **slope_se** - Standard error of the slope. * **intercept_se** - Standard error of the intercept. * **df** - The degrees of freedom of the regression (N - 2). The returned object also contains the following methods: * **y(x)** - Returns the value of y at *x* * **label(sigfig)** - Returns a string in the format "y=mx + c" with *sigfig* significant figures. """ # * **pvalue** - Two-sided p-value for a hypothesis test whose null hypothesis is that the slope is zero, using Wald Test with t-distribution of the test statistic. # * **rvalue** - Correlation coefficient. result = stats.linregress(x, y) return LinregressResult(result.slope, result.intercept, result.stderr, result.intercept_stderr, len(x) - 2)
[docs]def yorkregress(x, y, xerr, yerr, r=0, err_ci = None, err_zscore=None, result_ci = None, result_zscore=None, tol=1e-10): """ Calculate the slope and intercept taking into account the uncertainty on x and y. Uncertainties on the slope and intercept are given as standard errors. Based on the formulas from `York et al. (2004) American Journal of Physics 72, 367 <https://doi.org/10.1119/1.1632486>`_. If neither *err_ci* or *err_zscore* are given it assumes the uncertainties represent 1 SD/SE. If neither *result_ci* or *result_zscore* are giuen the *err_ci*/*err_zscore* is also used for the uncertainty on the slope and intercept. The functions *yorkregress2* and *yorkregress3* are also avaliable with preset values for *err_zscore* of 2 and 3 respectively. Similarly, *yorkregress95* and *yorkregress99* exists with preset values *err_ci* 0.95 and 0.99. Parameters ---------- x, y : numpy_array_like Data points through which the regression should be calculated. xerr, yerr : numpy_array_like Uncertainties for *x* and *y* values. Can be an array with the same size as *x* and *y* or a scalar value which will be used for every datapoint. r : float, optional Correlation coefficient between errors in x and y. Default value is ``0``. err_ci : float, optional The confidence interval of *xerr* and *yerr*. err_zscore : float, optional The zscore of *xerr* and *yerr*. result_ci : float, optional The desired confidence interval of the uncertainty on the slope and intercept. result_zscore : float, optional The desired zscore of the uncertainty on the slope and intercept. tol : float, optional Tolerance for fit. Default value is ``1e-10``. Returns ------- result : YorkregressResult The returned object contains the following attributes: * **slope** - The slope of the regression * **intercept** - The y-axis intercept of the regression * **slope_se** - The standard error of the slope * **intercept_se** - The standard error of the y-axis intercept * **msdw** - The mean square weighted deviate of the regression. * **df** - The degrees of freedom of the regression (N - 2). * **pvalue** - The two-tailed probability of the chi-squared value with *df* degrees of freedom. The returned object also contains the following methods: * **y(x)** - Returns the value of y at *x* * **yerr(x)** - Returns the standard error on y at *x*. * **label(sigfig)** - Returns a string in the format "y=mx + c, msdw" with *sigfig* significant figures. """ # * **sumW** - The sum of the weights. # * **xbar** - The weighted mean of x. x = isopy.checks.check_type('x', x, np.ndarray, coerce=True, coerce_into=np.array) y = isopy.checks.check_type('y', y, np.ndarray, coerce=True, coerce_into=np.array) xerr = isopy.checks.check_type('xerr', xerr, np.ndarray, np.float64, coerce=True, coerce_into=[np.float64, np.array]) yerr = isopy.checks.check_type('yerr', yerr, np.ndarray, np.float64, coerce=True, coerce_into=[np.float64, np.array]) r = isopy.checks.check_type('r', r, np.ndarray, np.float64, coerce=True, coerce_into=[np.float64, np.array]) tol = isopy.checks.check_type('tol', tol, np.float64, coerce=True) if err_ci is not None and err_zscore is not None: raise ValueError('Both error ci and zscore was given') if result_ci is not None and result_zscore is not None: raise ValueError('Both result ci and zscore was given') if err_ci is not None and (err_ci < 0 or err_ci > 1): raise ValueError('error confidence interval must be between 0 and 1') if result_ci is not None and (result_ci < 0 or result_ci > 1): raise ValueError('result confidence interval must be between 0 and 1') X = x Y = y Xerr = xerr Yerr = yerr if r > 1 or r < 0: raise ValueError('r must be between 0 and 1') if X.ndim != 1: raise ValueError('parameter "x": must have 1 dimension not {}'.format(X.ndim)) if Y.ndim != 1: raise ValueError('parameter "y": must have 1 dimension not {}'.format(X.ndim)) if X.size != Y.size: raise ValueError('x and y must have the same size') if not isinstance(Xerr, np.ndarray): Xerr = np.ones(X.size) * Xerr elif Xerr.size != X.size: raise ValueError('xerr and x must have the same size') if not isinstance(Yerr, np.ndarray): Yerr = np.ones(Y.size) * Yerr elif Yerr.size != Y.size: raise ValueError('yerr and y must have the same size') if err_ci is not None: ci = array_functions.calculate_ci(err_ci) Xerr = Xerr / ci Yerr = Yerr / ci if result_ci is None and result_zscore is None: result_ci = err_ci elif err_zscore is not None: Xerr = Xerr / err_zscore Yerr = Yerr / err_zscore if result_ci is None and result_zscore is None: result_zscore = err_zscore # Do calculations wX = 1 / (Xerr ** 2) wY = 1 / (Yerr ** 2) k = np.sqrt(wX * wY) slope = (np.mean(X * Y) - np.mean(X) * np.mean(Y)) / (np.mean(X ** 2) - np.mean(X) ** 2) # Iteratively converge self.slope until tol is reached for i in range(1000): W = (wX * wY) / (wX + (slope ** 2) * wY - 2 * slope * r * k) Xbar = np.sum(W * X) / np.sum(W) Ybar = np.sum(W * Y) / np.sum(W) U = X - Xbar V = Y - Ybar beta = W * (U / wY + (slope * V) / wX - (slope * U + V) * (r / k)) b2 = np.sum(W * beta * V) / np.sum(W * beta * U) dif = np.abs(b2 / slope - 1) slope = b2 if dif < tol: break else: raise ValueError('Unable to calculate a fit after 1000 iterations.') # Calculate self.intercept intercept = Ybar - slope * Xbar # Calculate adjusted points x = Xbar + beta xbar = np.sum(W * x) / np.sum(W) u = x - xbar y = Ybar + slope * beta ybar = np.sum(W * y) / np.sum(W) v = y - ybar # Calculate the uncertianty on the slope slope_error = np.sqrt(1 / np.sum(W * (u ** 2))) intercept_error = np.sqrt(1 / np.sum(W) + ((0 - xbar) ** 2) * (slope_error ** 2)) # Goodness of fit S = np.sum(W * (Y - slope * X - intercept) ** 2) dof = x.size - 2 if dof > 0: #mXY = np.array([X - x, Y - y]).T.reshape(-1, 1, 2) #mXYerr = np.array([Xerr * Xerr, r * Xerr * Yerr, r * Xerr * Yerr, Yerr * Yerr]).T.reshape(-1, 2, 2) #miXYerr = np.linalg.inv(mXYerr) #mXYt = mXY.reshape(-1, 2, 1) #rmXY = np.sum(mXY @ miXYerr @ mXYt) same as S msdw = S / dof p = 1 - stats.chi2.cdf(S, dof) p2 = (p-0.5)*2 else: msdw = 1 p2 = 1 if result_ci is not None: slope_se = slope_error * stats.norm.ppf(0.5 + result_ci / 2) elif result_zscore is not None: slope_se = slope_error * result_zscore else: slope_se = slope_error intercept_se = np.sqrt(1 / np.sum(W) + ((0 - xbar) ** 2) * (slope_se ** 2)) return YorkregressResult(slope, intercept, slope_se, intercept_se, dof, msdw, p2, np.sum(W), xbar)
yorkregress1 = core.partial_func(yorkregress, 'yorkregress2', err_zscore=1, result_zscore=1) yorkregress2 = core.partial_func(yorkregress, 'yorkregress2', err_zscore=2, result_zscore=2) yorkregress3 = core.partial_func(yorkregress, 'yorkregress3', err_zscore=3, result_zscore=3) yorkregress95 = core.partial_func(yorkregress, 'yorkregress95', err_ci=0.95, result_ci=0.95) yorkregress99 = core.partial_func(yorkregress, 'yorkregress99', err_ci=0.99, result_ci=0.99) class LinregressResult: def __init__(self, slope, intercept, slope_se, intercept_se, df): self.slope = slope self.intercept = intercept self.slope_se = slope_se self.intercept_se = intercept_se self.df = df def __repr__(self): return (f'{self.__class__.__name__}(slope={self.slope}, intercept={self.intercept}, ' f'slope_se={self.slope_se}, intercept_se={self.intercept_se}, ' f'df={self.df})') def y(self, x): """ Return y at *x*. """ return self.slope * x + self.intercept def y_se(self, x): """ Currently not avaliable for linear regressions. Raises ``NotImplementedError``. """ raise NotImplementedError('"y_se" is currently not avaliable for linear regressions') def label(self, sigfig=5): label = 'y=' if np.isnan(self.slope): label = f'{label}(nan±nan) + (nan±nan)' else: label = f'{label}({toolbox.plot._format_sigfig(self.slope, sigfig, self.slope_se)}' label = f'{label}±{toolbox.plot._format_sigfig(self.slope_se, sigfig, self.slope_se)})x' label = f'{label} + ({toolbox.plot._format_sigfig(self.intercept, sigfig, self.intercept_se)}' label = f'{label}±{toolbox.plot._format_sigfig(self.intercept_se, sigfig, self.intercept_se)})' return label class YorkregressResult(LinregressResult): def __init__(self, slope, intercept, slope_se, intercept_se, df, msdw, pvalue, sumW, xbar): self.slope = slope self.intercept = intercept self.slope_se = slope_se self.intercept_se = intercept_se self.df = df self.msdw = msdw self.pvalue = pvalue self._sumW = sumW self._xbar = xbar def __repr__(self): return (f'{self.__class__.__name__}(slope={self.slope}, intercept={self.intercept}, ' f'slope_se={self.slope_se}, intercept_se={self.intercept_se}, df={self.df}, ' f'msdw={self.msdw}, pvalue={self.pvalue})') def y_se(self, x): """ Return the standard error of y at *x*. """ return np.sqrt(1 / self._sumW + ((x - self._xbar) ** 2) * (self.slope_se ** 2)) def label(self, sigfig=5): label = 'y=' if np.isnan(self.slope): label = f'{label}(nan±nan) + (nan±nan), msdw=nan' else: label = f'{label}({toolbox.plot._format_sigfig(self.slope, sigfig, self.slope_se)}' label = f'{label}±{toolbox.plot._format_sigfig(self.slope_se, sigfig, self.slope_se)})x' label = f'{label} + ({toolbox.plot._format_sigfig(self.intercept, sigfig, self.intercept_se)}' label = f'{label}±{toolbox.plot._format_sigfig(self.intercept_se, sigfig, self.intercept_se)})' label = f'{label}, msdw={self.msdw:.2f}' return label