Skip to content

Sparsity

Sparsity estimators which can be plugged into deepmod. We keep the API in line with scikit learn (mostly), so scikit learn can also be plugged in. See scikitlearn.linear_models for applicable estimators.

Base

__init__(self, estimator) special

Basic sparse estimator class; simply a wrapper around the supplied sk-learn compatible estimator.

Parameters:

Name Type Description Default
estimator BaseEstimator

Sci-kit learn estimator.

required
Source code in deepymod/model/sparse_estimators.py
def __init__(self, estimator: BaseEstimator) -> None:
    """ Basic sparse estimator class; simply a wrapper around the supplied sk-learn compatible estimator.

    Args:
        estimator (BaseEstimator): Sci-kit learn estimator.
    """
    super().__init__()
    self.estimator = estimator
    self.estimator.set_params(fit_intercept=False)  # Library contains offset so turn off the intercept

fit(self, X, y)

Returns an array with the coefficient verctor after sparsity estimation.

Parameters:

Name Type Description Default
X ndarray

Training input data of shape (n_samples, n_features).

required
y ndarray

Training target data of shape (n_samples, n_outputs).

required

Returns:

Type Description
ndarray

np.ndarray: Coefficient vector (n_features, n_outputs).

Source code in deepymod/model/sparse_estimators.py
def fit(self, X: np.ndarray, y: np.ndarray) -> np.ndarray:
    """Returns an array with the coefficient verctor after sparsity estimation.

    Args:
        X (np.ndarray): Training input data of shape (n_samples, n_features).
        y (np.ndarray): Training target data of shape (n_samples, n_outputs).

    Returns:
        np.ndarray: Coefficient vector (n_features, n_outputs).
    """
    coeffs = self.estimator.fit(X, y).coef_
    return coeffs

Clustering

__init__(self, estimator=LassoCV(alphas=None, copy_X=True, cv=5, eps=0.001, fit_intercept=False,max_iter=1000, n_alphas=100, n_jobs=None, normalize=False,positive=False, precompute='auto', random_state=None,selection='cyclic', tol=0.0001, verbose=False)) special

Performs additional thresholding by Kmeans-clustering on coefficient result from estimator.

Parameters:

Name Type Description Default
estimator BaseEstimator

Estimator class. Defaults to LassoCV(cv=5, fit_intercept=False).

LassoCV(alphas=None, copy_X=True, cv=5, eps=0.001, fit_intercept=False, max_iter=1000, n_alphas=100, n_jobs=None, normalize=False, positive=False, precompute='auto', random_state=None, selection='cyclic', tol=0.0001, verbose=False)
Source code in deepymod/model/sparse_estimators.py
def __init__(self, estimator: BaseEstimator = LassoCV(cv=5, fit_intercept=False)) -> None:
    """Performs additional thresholding by Kmeans-clustering on coefficient result from estimator.

    Args:
        estimator (BaseEstimator, optional): Estimator class. Defaults to LassoCV(cv=5, fit_intercept=False).
    """
    super().__init__()
    self.estimator = estimator
    self.kmeans = KMeans(n_clusters=2)

    # Library contains offset so turn off the intercept
    self.estimator.set_params(fit_intercept=False)

fit(self, X, y)

Returns an array with the coefficient verctor after sparsity estimation.

Parameters:

Name Type Description Default
X ndarray

Training input data of shape (n_samples, n_features).

required
y ndarray

Training target data of shape (n_samples, n_outputs).

required

Returns:

Type Description
ndarray

np.ndarray: Coefficient vector (n_features, n_outputs).

Source code in deepymod/model/sparse_estimators.py
def fit(self, X: np.ndarray, y: np.ndarray) -> np.ndarray:
    """Returns an array with the coefficient verctor after sparsity estimation.

     Args:
         X (np.ndarray): Training input data of shape (n_samples, n_features).
         y (np.ndarray): Training target data of shape (n_samples, n_outputs).

     Returns:
         np.ndarray: Coefficient vector (n_features, n_outputs).
    """
    coeffs = self.estimator.fit(X, y).coef_[:, None]  # sklearn returns 1D
    clusters = self.kmeans.fit_predict(np.abs(coeffs)).astype(np.bool)

    # make sure terms to keep are 1 and to remove are 0
    max_idx = np.argmax(np.abs(coeffs))
    if clusters[max_idx] != 1:
        clusters = ~clusters

    coeffs = clusters.astype(np.float32)
    return coeffs

PDEFIND

__init__(self, lam=0.001, dtol=0.1) special

Implements PDEFIND as a sparse estimator.

Parameters:

Name Type Description Default
lam float

Magnitude of the L2 regularization. Defaults to 1e-3.

0.001
dtol float

Initial stepsize for the search of the thresholdDefaults to 0.1.

0.1
Source code in deepymod/model/sparse_estimators.py
def __init__(self, lam: float = 1e-3, dtol: float = 0.1) -> None:
    """Implements PDEFIND as a sparse estimator.

    Args:
        lam (float, optional): Magnitude of the L2 regularization. Defaults to 1e-3.
        dtol (float, optional): Initial stepsize for the search of the thresholdDefaults to 0.1.
    """
    super().__init__()
    self.lam = lam
    self.dtol = dtol

fit(self, X, y)

Returns an array with the coefficient verctor after sparsity estimation.

Parameters:

Name Type Description Default
X ndarray

Training input data of shape (n_samples, n_features).

required
y ndarray

Training target data of shape (n_samples, n_outputs).

required

Returns:

Type Description
ndarray

np.ndarray: Coefficient vector (n_features, n_outputs).

Source code in deepymod/model/sparse_estimators.py
def fit(self, X: np.ndarray, y: np.ndarray) -> np.ndarray:
    """Returns an array with the coefficient verctor after sparsity estimation.

    Args:
        X (np.ndarray): Training input data of shape (n_samples, n_features).
        y (np.ndarray): Training target data of shape (n_samples, n_outputs).

    Returns:
        np.ndarray: Coefficient vector (n_features, n_outputs).
    """

    coeffs = PDEFIND.TrainSTLSQ(X, y[:, None], self.lam, self.dtol)
    return coeffs.squeeze()

TrainSTLSQ(X, y, alpha, delta_threshold, max_iterations=100, test_size=0.2, random_state=0) staticmethod

PDE-FIND sparsity selection algorithm. Based on method described by Rudy et al. (10.1126/sciadv.1602614).

Parameters:

Name Type Description Default
X ndarray

Training input data of shape (n_samples, n_features).

required
y ndarray

Training target data of shape (n_samples, n_outputs).

required
alpha float

Magnitude of the L2 regularization.

required
delta_threshold float

Initial stepsize for the search of the threshold

required
max_iterations int

Maximum number of iterations. Defaults to 100.

100
test_size float

Fraction of the data that is assigned to the test-set. Defaults to 0.2.

0.2
random_state int

Defaults to 0.

0

Returns:

Type Description
ndarray

np.ndarray: Coefficient vector.

Source code in deepymod/model/sparse_estimators.py
@staticmethod
def TrainSTLSQ(X: np.ndarray,
               y: np.ndarray,
               alpha: float,
               delta_threshold: float,
               max_iterations: int = 100,
               test_size: float = 0.2,
               random_state: int = 0) -> np.ndarray:
    """PDE-FIND sparsity selection algorithm. Based on method described by Rudy et al. (10.1126/sciadv.1602614).

    Args:
        X (np.ndarray): Training input data of shape (n_samples, n_features).
        y (np.ndarray): Training target data of shape (n_samples, n_outputs).
        alpha (float): Magnitude of the L2 regularization.
        delta_threshold (float): Initial stepsize for the search of the threshold
        max_iterations (int, optional): Maximum number of iterations. Defaults to 100.
        test_size (float, optional): Fraction of the data that is assigned to the test-set. Defaults to 0.2.
        random_state (int, optional): Defaults to 0.

    Returns:
        np.ndarray: Coefficient vector.
    """
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

    # Set up the initial tolerance l0 penalty and estimates
    l0 = 1e-3 * np.linalg.cond(X)
    delta_t = delta_threshold  # for interal use, can be updated

    # Initial estimate
    optimizer = STLSQ(threshold=0, alpha=0.0, fit_intercept=False)  # Now similar to LSTSQ
    y_predict = optimizer.fit(X_train, y_train).predict(X_test)
    min_loss = np.linalg.norm(y_predict - y_test, 2) + l0 * np.count_nonzero(optimizer.coef_)

    # Setting alpha and tolerance
    best_threshold = delta_t
    threshold = delta_t

    for iteration in np.arange(max_iterations):
        optimizer.set_params(alpha=alpha, threshold=threshold)
        y_predict = optimizer.fit(X_train, y_train).predict(X_test)
        loss = np.linalg.norm(y_predict - y_test, 2) + l0 * np.count_nonzero(optimizer.coef_)

        if (loss <= min_loss) and not (np.all(optimizer.coef_ == 0)):
            min_loss = loss
            best_threshold = threshold
            threshold += delta_threshold

        else:  # if loss increases, we need to a) lower the current threshold and/or decrease step size
            new_lower_threshold = np.max([0, threshold - 2 * delta_t])
            delta_t = 2 * delta_t / (max_iterations - iteration)
            threshold = new_lower_threshold + delta_t

    optimizer.set_params(alpha=alpha, threshold=best_threshold)
    optimizer.fit(X_train, y_train)

    return optimizer.coef_

Threshold

__init__(self, threshold=0.1, estimator=LassoCV(alphas=None, copy_X=True, cv=5, eps=0.001, fit_intercept=False,max_iter=1000, n_alphas=100, n_jobs=None, normalize=False,positive=False, precompute='auto', random_state=None,selection='cyclic', tol=0.0001, verbose=False)) special

Performs additional thresholding on coefficient result from supplied estimator.

Parameters:

Name Type Description Default
threshold float

Value of the threshold above which the terms are selected. Defaults to 0.1.

0.1
estimator BaseEstimator

Sparsity estimator. Defaults to LassoCV(cv=5, fit_intercept=False).

LassoCV(alphas=None, copy_X=True, cv=5, eps=0.001, fit_intercept=False, max_iter=1000, n_alphas=100, n_jobs=None, normalize=False, positive=False, precompute='auto', random_state=None, selection='cyclic', tol=0.0001, verbose=False)
Source code in deepymod/model/sparse_estimators.py
def __init__(self, threshold: float = 0.1, estimator: BaseEstimator = LassoCV(cv=5, fit_intercept=False)) -> None:
    """Performs additional thresholding on coefficient result from supplied estimator.

    Args:
        threshold (float, optional): Value of the threshold above which the terms are selected. Defaults to 0.1.
        estimator (BaseEstimator, optional): Sparsity estimator. Defaults to LassoCV(cv=5, fit_intercept=False).
    """
    super().__init__()
    self.estimator = estimator
    self.threshold = threshold

    # Library contains offset so turn off the intercept
    self.estimator.set_params(fit_intercept=False)

fit(self, X, y)

Returns an array with the coefficient verctor after sparsity estimation.

Parameters:

Name Type Description Default
X ndarray

Training input data of shape (n_samples, n_features).

required
y ndarray

Training target data of shape (n_samples, n_outputs).

required

Returns:

Type Description
ndarray

np.ndarray: Coefficient vector (n_features, n_outputs).

Source code in deepymod/model/sparse_estimators.py
def fit(self, X: np.ndarray, y: np.ndarray) -> np.ndarray:
    """Returns an array with the coefficient verctor after sparsity estimation.

    Args:
        X (np.ndarray): Training input data of shape (n_samples, n_features).
        y (np.ndarray): Training target data of shape (n_samples, n_outputs).

    Returns:
        np.ndarray: Coefficient vector (n_features, n_outputs).
    """
    coeffs = self.estimator.fit(X, y).coef_
    coeffs[np.abs(coeffs) < self.threshold] = 0.0

    return coeffs