Sparsity

Sparsity estimators which can be plugged into deepmod. We keep the API in line with scikit learn (mostly), so scikit learn can also be plugged in. See scikitlearn.linear_models for applicable estimators.

`Base`

`init(self, estimator)` `special`

Basic sparse estimator class; simply a wrapper around the supplied sk-learn compatible estimator.

Parameters:

Name	Type	Description	Default
`estimator`	`BaseEstimator`	Sci-kit learn estimator.	required

Source code in deepymod/model/sparse_estimators.py

def __init__(self, estimator: BaseEstimator) -> None:
    """ Basic sparse estimator class; simply a wrapper around the supplied sk-learn compatible estimator.

    Args:
        estimator (BaseEstimator): Sci-kit learn estimator.
    """
    super().__init__()
    self.estimator = estimator
    self.estimator.set_params(fit_intercept=False)  # Library contains offset so turn off the intercept

`fit(self, X, y)`

Returns an array with the coefficient verctor after sparsity estimation.

Parameters:

Name	Type	Description	Default
`X`	`ndarray`	Training input data of shape (n_samples, n_features).	required
`y`	`ndarray`	Training target data of shape (n_samples, n_outputs).	required

Returns:

Type	Description
`ndarray`	np.ndarray: Coefficient vector (n_features, n_outputs).

Source code in deepymod/model/sparse_estimators.py

def fit(self, X: np.ndarray, y: np.ndarray) -> np.ndarray:
    """Returns an array with the coefficient verctor after sparsity estimation.

    Args:
        X (np.ndarray): Training input data of shape (n_samples, n_features).
        y (np.ndarray): Training target data of shape (n_samples, n_outputs).

    Returns:
        np.ndarray: Coefficient vector (n_features, n_outputs).
    """
    coeffs = self.estimator.fit(X, y).coef_
    return coeffs

`Clustering`

`init(self, estimator=LassoCV(alphas=None, copy_X=True, cv=5, eps=0.001, fit_intercept=False,max_iter=1000, n_alphas=100, n_jobs=None, normalize=False,positive=False, precompute='auto', random_state=None,selection='cyclic', tol=0.0001, verbose=False))` `special`

Performs additional thresholding by Kmeans-clustering on coefficient result from estimator.

Parameters:

Name	Type	Description	Default
`estimator`	`BaseEstimator`	Estimator class. Defaults to LassoCV(cv=5, fit_intercept=False).	`LassoCV(alphas=None, copy_X=True, cv=5, eps=0.001, fit_intercept=False, max_iter=1000, n_alphas=100, n_jobs=None, normalize=False, positive=False, precompute='auto', random_state=None, selection='cyclic', tol=0.0001, verbose=False)`

Source code in deepymod/model/sparse_estimators.py

def __init__(self, estimator: BaseEstimator = LassoCV(cv=5, fit_intercept=False)) -> None:
    """Performs additional thresholding by Kmeans-clustering on coefficient result from estimator.

    Args:
        estimator (BaseEstimator, optional): Estimator class. Defaults to LassoCV(cv=5, fit_intercept=False).
    """
    super().__init__()
    self.estimator = estimator
    self.kmeans = KMeans(n_clusters=2)

    # Library contains offset so turn off the intercept
    self.estimator.set_params(fit_intercept=False)

`fit(self, X, y)`

Returns an array with the coefficient verctor after sparsity estimation.

Parameters:

Name	Type	Description	Default
`X`	`ndarray`	Training input data of shape (n_samples, n_features).	required
`y`	`ndarray`	Training target data of shape (n_samples, n_outputs).	required

Returns:

Type	Description
`ndarray`	np.ndarray: Coefficient vector (n_features, n_outputs).

Source code in deepymod/model/sparse_estimators.py

def fit(self, X: np.ndarray, y: np.ndarray) -> np.ndarray:
    """Returns an array with the coefficient verctor after sparsity estimation.

     Args:
         X (np.ndarray): Training input data of shape (n_samples, n_features).
         y (np.ndarray): Training target data of shape (n_samples, n_outputs).

     Returns:
         np.ndarray: Coefficient vector (n_features, n_outputs).
    """
    coeffs = self.estimator.fit(X, y).coef_[:, None]  # sklearn returns 1D
    clusters = self.kmeans.fit_predict(np.abs(coeffs)).astype(np.bool)

    # make sure terms to keep are 1 and to remove are 0
    max_idx = np.argmax(np.abs(coeffs))
    if clusters[max_idx] != 1:
        clusters = ~clusters

    coeffs = clusters.astype(np.float32)
    return coeffs

`PDEFIND`

`init(self, lam=0.001, dtol=0.1)` `special`

Implements PDEFIND as a sparse estimator.

Parameters:

Name	Type	Description	Default
`lam`	`float`	Magnitude of the L2 regularization. Defaults to 1e-3.	`0.001`
`dtol`	`float`	Initial stepsize for the search of the thresholdDefaults to 0.1.	`0.1`

Source code in deepymod/model/sparse_estimators.py

def __init__(self, lam: float = 1e-3, dtol: float = 0.1) -> None:
    """Implements PDEFIND as a sparse estimator.

    Args:
        lam (float, optional): Magnitude of the L2 regularization. Defaults to 1e-3.
        dtol (float, optional): Initial stepsize for the search of the thresholdDefaults to 0.1.
    """
    super().__init__()
    self.lam = lam
    self.dtol = dtol

`fit(self, X, y)`

Returns an array with the coefficient verctor after sparsity estimation.

Parameters:

Name	Type	Description	Default
`X`	`ndarray`	Training input data of shape (n_samples, n_features).	required
`y`	`ndarray`	Training target data of shape (n_samples, n_outputs).	required

Returns:

Type	Description
`ndarray`	np.ndarray: Coefficient vector (n_features, n_outputs).

Source code in deepymod/model/sparse_estimators.py

def fit(self, X: np.ndarray, y: np.ndarray) -> np.ndarray:
    """Returns an array with the coefficient verctor after sparsity estimation.

    Args:
        X (np.ndarray): Training input data of shape (n_samples, n_features).
        y (np.ndarray): Training target data of shape (n_samples, n_outputs).

    Returns:
        np.ndarray: Coefficient vector (n_features, n_outputs).
    """

    coeffs = PDEFIND.TrainSTLSQ(X, y[:, None], self.lam, self.dtol)
    return coeffs.squeeze()

`TrainSTLSQ(X, y, alpha, delta_threshold, max_iterations=100, test_size=0.2, random_state=0)` `staticmethod`

PDE-FIND sparsity selection algorithm. Based on method described by Rudy et al. (10.1126/sciadv.1602614).

Parameters:

Name	Type	Description	Default
`X`	`ndarray`	Training input data of shape (n_samples, n_features).	required
`y`	`ndarray`	Training target data of shape (n_samples, n_outputs).	required
`alpha`	`float`	Magnitude of the L2 regularization.	required
`delta_threshold`	`float`	Initial stepsize for the search of the threshold	required
`max_iterations`	`int`	Maximum number of iterations. Defaults to 100.	`100`
`test_size`	`float`	Fraction of the data that is assigned to the test-set. Defaults to 0.2.	`0.2`
`random_state`	`int`	Defaults to 0.	`0`

Returns:

Type	Description
`ndarray`	np.ndarray: Coefficient vector.

Source code in deepymod/model/sparse_estimators.py

@staticmethod
def TrainSTLSQ(X: np.ndarray,
               y: np.ndarray,
               alpha: float,
               delta_threshold: float,
               max_iterations: int = 100,
               test_size: float = 0.2,
               random_state: int = 0) -> np.ndarray:
    """PDE-FIND sparsity selection algorithm. Based on method described by Rudy et al. (10.1126/sciadv.1602614).

    Args:
        X (np.ndarray): Training input data of shape (n_samples, n_features).
        y (np.ndarray): Training target data of shape (n_samples, n_outputs).
        alpha (float): Magnitude of the L2 regularization.
        delta_threshold (float): Initial stepsize for the search of the threshold
        max_iterations (int, optional): Maximum number of iterations. Defaults to 100.
        test_size (float, optional): Fraction of the data that is assigned to the test-set. Defaults to 0.2.
        random_state (int, optional): Defaults to 0.

    Returns:
        np.ndarray: Coefficient vector.
    """
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

    # Set up the initial tolerance l0 penalty and estimates
    l0 = 1e-3 * np.linalg.cond(X)
    delta_t = delta_threshold  # for interal use, can be updated

    # Initial estimate
    optimizer = STLSQ(threshold=0, alpha=0.0, fit_intercept=False)  # Now similar to LSTSQ
    y_predict = optimizer.fit(X_train, y_train).predict(X_test)
    min_loss = np.linalg.norm(y_predict - y_test, 2) + l0 * np.count_nonzero(optimizer.coef_)

    # Setting alpha and tolerance
    best_threshold = delta_t
    threshold = delta_t

    for iteration in np.arange(max_iterations):
        optimizer.set_params(alpha=alpha, threshold=threshold)
        y_predict = optimizer.fit(X_train, y_train).predict(X_test)
        loss = np.linalg.norm(y_predict - y_test, 2) + l0 * np.count_nonzero(optimizer.coef_)

        if (loss <= min_loss) and not (np.all(optimizer.coef_ == 0)):
            min_loss = loss
            best_threshold = threshold
            threshold += delta_threshold

        else:  # if loss increases, we need to a) lower the current threshold and/or decrease step size
            new_lower_threshold = np.max([0, threshold - 2 * delta_t])
            delta_t = 2 * delta_t / (max_iterations - iteration)
            threshold = new_lower_threshold + delta_t

    optimizer.set_params(alpha=alpha, threshold=best_threshold)
    optimizer.fit(X_train, y_train)

    return optimizer.coef_

`Threshold`

`init(self, threshold=0.1, estimator=LassoCV(alphas=None, copy_X=True, cv=5, eps=0.001, fit_intercept=False,max_iter=1000, n_alphas=100, n_jobs=None, normalize=False,positive=False, precompute='auto', random_state=None,selection='cyclic', tol=0.0001, verbose=False))` `special`

Performs additional thresholding on coefficient result from supplied estimator.

Parameters:

Name	Type	Description	Default
`threshold`	`float`	Value of the threshold above which the terms are selected. Defaults to 0.1.	`0.1`
`estimator`	`BaseEstimator`	Sparsity estimator. Defaults to LassoCV(cv=5, fit_intercept=False).	`LassoCV(alphas=None, copy_X=True, cv=5, eps=0.001, fit_intercept=False, max_iter=1000, n_alphas=100, n_jobs=None, normalize=False, positive=False, precompute='auto', random_state=None, selection='cyclic', tol=0.0001, verbose=False)`

Source code in deepymod/model/sparse_estimators.py

def __init__(self, threshold: float = 0.1, estimator: BaseEstimator = LassoCV(cv=5, fit_intercept=False)) -> None:
    """Performs additional thresholding on coefficient result from supplied estimator.

    Args:
        threshold (float, optional): Value of the threshold above which the terms are selected. Defaults to 0.1.
        estimator (BaseEstimator, optional): Sparsity estimator. Defaults to LassoCV(cv=5, fit_intercept=False).
    """
    super().__init__()
    self.estimator = estimator
    self.threshold = threshold

    # Library contains offset so turn off the intercept
    self.estimator.set_params(fit_intercept=False)

`fit(self, X, y)`

Returns an array with the coefficient verctor after sparsity estimation.

Parameters:

Name	Type	Description	Default
`X`	`ndarray`	Training input data of shape (n_samples, n_features).	required
`y`	`ndarray`	Training target data of shape (n_samples, n_outputs).	required

Returns:

Type	Description
`ndarray`	np.ndarray: Coefficient vector (n_features, n_outputs).

Source code in deepymod/model/sparse_estimators.py

def fit(self, X: np.ndarray, y: np.ndarray) -> np.ndarray:
    """Returns an array with the coefficient verctor after sparsity estimation.

    Args:
        X (np.ndarray): Training input data of shape (n_samples, n_features).
        y (np.ndarray): Training target data of shape (n_samples, n_outputs).

    Returns:
        np.ndarray: Coefficient vector (n_features, n_outputs).
    """
    coeffs = self.estimator.fit(X, y).coef_
    coeffs[np.abs(coeffs) < self.threshold] = 0.0

    return coeffs

Sparsity

Base

__init__(self, estimator) special

fit(self, X, y)

Clustering

__init__(self, estimator=LassoCV(alphas=None, copy_X=True, cv=5, eps=0.001, fit_intercept=False,max_iter=1000, n_alphas=100, n_jobs=None, normalize=False,positive=False, precompute='auto', random_state=None,selection='cyclic', tol=0.0001, verbose=False)) special

fit(self, X, y)

PDEFIND

__init__(self, lam=0.001, dtol=0.1) special

fit(self, X, y)

TrainSTLSQ(X, y, alpha, delta_threshold, max_iterations=100, test_size=0.2, random_state=0) staticmethod

Threshold

__init__(self, threshold=0.1, estimator=LassoCV(alphas=None, copy_X=True, cv=5, eps=0.001, fit_intercept=False,max_iter=1000, n_alphas=100, n_jobs=None, normalize=False,positive=False, precompute='auto', random_state=None,selection='cyclic', tol=0.0001, verbose=False)) special

fit(self, X, y)

`Base`

`init(self, estimator)` `special`

`fit(self, X, y)`

`Clustering`

`init(self, estimator=LassoCV(alphas=None, copy_X=True, cv=5, eps=0.001, fit_intercept=False,max_iter=1000, n_alphas=100, n_jobs=None, normalize=False,positive=False, precompute='auto', random_state=None,selection='cyclic', tol=0.0001, verbose=False))` `special`

`fit(self, X, y)`

`PDEFIND`

`init(self, lam=0.001, dtol=0.1)` `special`

`fit(self, X, y)`

`TrainSTLSQ(X, y, alpha, delta_threshold, max_iterations=100, test_size=0.2, random_state=0)` `staticmethod`

`Threshold`

`init(self, threshold=0.1, estimator=LassoCV(alphas=None, copy_X=True, cv=5, eps=0.001, fit_intercept=False,max_iter=1000, n_alphas=100, n_jobs=None, normalize=False,positive=False, precompute='auto', random_state=None,selection='cyclic', tol=0.0001, verbose=False))` `special`

`fit(self, X, y)`