Source code for posydon.interpolation.data_scaling

"""Module for scaling data before and after interpolation."""


__authors__ = [
    "Juanga Serra Perez <jgserra@northwestern.edu>",
]


import numpy as np


[docs] class DataScaler: """Data Normalization class. This class provides normalizing tools for float 1D arrays. Features can be standarized or scaled to a range depending on the method chosen when calling the fit ot fit_and_transform functions. """ def __init__(self): """Initialize the data scaler. No parameters are expected. After instantiation use methods `fit` or `fit_and_transform` first to fit a scaling object to a given vector of values. Example ------- >>> sc = DataScaler() """ self.params = None self.method = None # These two parameters will only be used when method is (log)_min_max self.lower = None self.upper = None
[docs] def fit(self, x, method='none', lower=-1.0, upper=1.0): """Fit a transform of 1D numpy array x. Computes the parameters that define the transform. Parameters ---------- x : numpy.ndarray expects a 1D array and finds norm values of columns method : str Scaling method. Possible values: 'min_max', 'max_abs', 'standarize' and their log versions 'log_min_max', 'neg_log_min_max', 'log_max_abs', 'log_standarize', 'neg_log_standarize'. lower : float lower range value of x_t after (log_)min_max scaling upper : float upper range value of x_t after (log_)min_max scaling """ # make sure that we have a 1D NumPy arrays assert isinstance(x, np.ndarray) and len(x.shape) == 1 # this is not rechecked in transform/inv_transform, # so the same transform could potentially be used for ndarrays # store method as class attribute self.method = method # compute scaling parameters associated with each method if method == 'min_max': assert upper > lower, "upper must be greater than lower" self.lower, self.upper = lower, upper self.params = [x.min(axis=0), x.max(axis=0)] elif method == 'log_min_max': assert upper > lower, "upper must be greater than lower" self.lower, self.upper = lower, upper self.params = [np.log10(x.min(axis=0)), np.log10(x.max(axis=0))] elif method == 'neg_log_min_max': assert upper > lower, "upper must be greater than lower" self.lower, self.upper = lower, upper self.params = [np.log10((-x).min(axis=0)), np.log10((-x).max(axis=0))] elif method == 'max_abs': self.params = [np.abs(x).max(axis=0)] elif method == 'log_max_abs': self.params = [np.abs(np.log10(x)).max(axis=0)] elif method == 'standarize': self.params = [x.mean(axis=0), x.std(axis=0)] elif method == 'log_standarize': # log will be computed in transform again self.params = [np.log10(x).mean(axis=0), np.log10(x).std(axis=0)] elif method == 'neg_log_standarize': # log(-x) self.params = [np.log10(-x).mean(axis=0), np.log10(-x).std(axis=0)] elif method == 'log': self.params = [] elif method == 'none': # no transformation self.params = [] else: raise ValueError(f"Unknown method `{method}` for data scaler.")
[docs] def transform(self, x): """Transform x using the already obtained normalization values. `self.fit()`` must be called first. lower/upper will only be taken into account for (log_)min_max normalization. In this case, the transformed x will have min(x_transf) = lower, max(x_transf) = upper Parameters ---------- x : numpy.ndarray values to normalize Returns ------- numpy.ndarray transformed version of x """ # Make sure that "self.fit" was called first if self.method is None: raise AssertionError( "You have to fit a scaling object to a feature vector first") if self.method == 'min_max': x_t = ((x - self.params[0]) / (self.params[1] - self.params[0]) * (self.upper - self.lower) + self.lower) elif self.method == 'log_min_max': x_t = ((np.log10(x) - self.params[0]) / (self.params[1] - self.params[0]) * (self.upper - self.lower) + self.lower) elif self.method == 'neg_log_min_max': x_t = ((np.log10(-x) - self.params[0]) / (self.params[1] - self.params[0]) * (self.upper - self.lower) + self.lower) elif self.method == 'max_abs': x_t = x / self.params[0] elif self.method == 'log_max_abs': x_t = np.log10(x) / self.params[0] elif self.method == 'standarize': x_t = (x - self.params[0]) / self.params[1] elif self.method == 'log_standarize': # log will be computed in transform again x_t = (np.log10(x) - self.params[0]) / self.params[1] elif self.method == 'neg_log_standarize': x_t = (np.log10(-x) - self.params[0]) / self.params[1] elif self.method == 'log': x_t = np.log10(x) else: # no transformation x_t = x return x_t
[docs] def fit_and_transform(self, x, method='none', lower=-1, upper=1): """Fit and transform the array x according to the chosen scaling. lower/upper will only be taken into account for (log_)min_max normalization. In this case, the transformed x will have min(x_transf) = lower, max(x_transf) = upper Parameters ---------- x : numpy.ndarray expects a 1D array and finds norm values of columns method : str scaling method. Possible values: 'min_max', 'max_abs', 'standarize' and the log versions 'log_min_max', 'log_max_abs', 'log_standarize' lower : float lower range value of x_t after (log_)min_max scaling upper : float upper range value of x_t after (log_)min_max scaling Returns ------- numpy.ndarray transformed version of x """ self.fit(x, method, lower, upper) return self.transform(x)
[docs] def inv_transform(self, x_t): """Revert the scaling using the stored transform parameters. Parameters ---------- x_t : numpy.ndarray expects a 1D array to unnormalize given the fitted transform. Returns ------- numpy.ndarray denormalized x using the stored parameters. """ if self.method is None: raise AssertionError("Transformation not defined yet. " "Fit a scaling object first.") if self.method == 'min_max': x = ((x_t - self.lower) / (self.upper - self.lower) * (self.params[1] - self.params[0]) + self.params[0]) elif self.method == 'log_min_max': x = 10 ** ((x_t - self.lower) / (self.upper - self.lower) * (self.params[1] - self.params[0]) + self.params[0]) elif self.method == 'neg_log_min_max': x = -10 ** ((x_t - self.lower) / (self.upper - self.lower) * (self.params[1] - self.params[0]) + self.params[0]) elif self.method == 'max_abs': x = x_t * self.params[0] elif self.method == 'log_max_abs': x = 10 ** (x_t * self.params[0]) elif self.method == 'standarize': x = x_t * self.params[1] + self.params[0] elif self.method == 'log_standarize': x = 10 ** (x_t * self.params[1] + self.params[0]) elif self.method == 'neg_log_standarize': x = -10 ** (x_t * self.params[1] + self.params[0]) elif self.method == 'log': x = 10 ** x_t else: # no transformation x = x_t return x