Source code for pylwl.models.classic_lw

#!/usr/bin/env python
# Created by "Thieu" at 13:12, 16/05/2025 ----------%
#       Email: nguyenthieu2102@gmail.com            %                                                    
#       Github: https://github.com/thieu1995        %                         
# --------------------------------------------------%

import numpy as np
from scipy.special import expit, softmax  # for sigmoid
from sklearn.base import RegressorMixin, ClassifierMixin
from sklearn.metrics import accuracy_score, r2_score
from sklearn.preprocessing import LabelBinarizer
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
from pylwl.models.base_model import BaseModel
from pylwl.shared import kernel as kernel_module


[docs]class BaseLW(BaseModel): """ Base class for locally weighted models. This class provides the foundation for locally weighted regression and classification models. It includes methods for computing kernel weights based on a specified kernel function. Parameters ---------- kernel : str or callable, optional The kernel function to use. If a string is provided, it should match the name of a kernel function in the `kernel_module`. If a callable is provided, it should accept distances and `tau` as arguments and return weights. tau : float, optional The bandwidth parameter for the kernel function (default: 1.0). Attributes ---------- kernel : str or callable The kernel function used for computing weights. kernel_func_ : callable The resolved kernel function (either from `kernel_module` or the provided callable). tau : float The bandwidth parameter for the kernel function. """ def __init__(self, kernel='gaussian', tau=1.0): super().__init__() self.kernel = kernel if isinstance(kernel, str): self.kernel_func_ = getattr(kernel_module, f"{kernel}_kernel") elif callable(kernel): self.kernel_func_ = kernel else: raise ValueError("kernel must be a string or callable") self.tau = tau def _kernel_weights(self, X_train, x_query): """ Compute kernel weights for a query point. Parameters ---------- X_train : array-like, shape (n_samples, n_features) The training data. x_query : array-like, shape (n_features,) The query point. Returns ------- W : ndarray, shape (n_samples, n_samples) A diagonal matrix of kernel weights for the query point. """ distances = np.linalg.norm(X_train - x_query, axis=1) weights = self.kernel_func_(distances, tau=self.tau) W = np.diag(weights) W = np.clip(W, 1e-8, 1e8) return W
[docs]class LwRegressor(BaseLW, RegressorMixin): """ Locally Weighted Regressor. This class implements a locally weighted regression model using a specified kernel function and bandwidth parameter. It predicts target values by fitting a weighted linear model for each query point. Parameters ---------- kernel : str or callable, optional The kernel function to use. If a string is provided, it should match the name of a kernel function in the `kernel_module`. If a callable is provided, it should accept distances and `tau` as arguments and return weights. tau : float, optional The bandwidth parameter for the kernel function (default: 1.0). Attributes ---------- X_ : ndarray, shape (n_samples, n_features) The training data. y_ : ndarray, shape (n_samples,) The target values for the training data. """ def __init__(self, kernel="gaussian", tau=1.0): """ Initialize the LwRegressor. Parameters ---------- kernel : str or callable, optional The kernel function to use (default: "gaussian"). tau : float, optional The bandwidth parameter for the kernel function (default: 1.0). """ super().__init__(kernel=kernel, tau=tau)
[docs] def fit(self, X, y): """ Fit the locally weighted regression model. Parameters ---------- X : array-like, shape (n_samples, n_features) The training data. y : array-like, shape (n_samples,) The target values. Returns ------- self : LwRegressor The fitted model. """ X, y = check_X_y(X, y) self.X_ = X self.y_ = y return self
[docs] def predict(self, X): """ Predict target values for the given input data. Parameters ---------- X : array-like, shape (n_samples, n_features) The input data. Returns ------- y_pred : ndarray, shape (n_samples,) The predicted target values. """ check_is_fitted(self, ['X_', 'y_']) X = check_array(X) y_preds = [] for x in X: W = self._kernel_weights(self.X_, x) X_aug = np.hstack([np.ones((self.X_.shape[0], 1)), self.X_]) x_aug = np.insert(x, 0, 1) try: theta = np.linalg.pinv(X_aug.T @ W @ X_aug) @ X_aug.T @ W @ self.y_ y_pred = x_aug @ theta except np.linalg.LinAlgError: y_pred = np.mean(self.y_) y_preds.append(y_pred) return np.array(y_preds)
[docs] def score(self, X, y): """ Compute the R^2 score for the model. Parameters ---------- X : array-like, shape (n_samples, n_features) The input data. y : array-like, shape (n_samples,) The true target values. Returns ------- score : float The R^2 score of the predictions. """ return r2_score(y, self.predict(X))
[docs] def evaluate(self, y_true, y_pred, list_metrics=("MSE", "MAE")): """ Evaluate the regression model using specified metrics. Parameters ---------- y_true : array-like True target values. y_pred : array-like Predicted target values. list_metrics : tuple of str, optional List of metrics for evaluation (default: ("MSE", "MAE")). Returns ------- dict Dictionary of calculated metric values. """ return self._evaluate_reg(y_true, y_pred, list_metrics) # Call the evaluation method
[docs] def scores(self, X, y, list_metrics=("MSE", "MAE")): """ Compute evaluation metrics for the model on the given data. Parameters ---------- X : array-like, shape (n_samples, n_features) The input data. y : array-like, shape (n_samples,) The true target values. list_metrics : tuple of str, optional List of metrics for evaluation (default: ("MSE", "MAE")). Returns ------- dict Dictionary of calculated metric values. """ y_pred = self.predict(X) return self.evaluate(y, y_pred, list_metrics)
[docs]class LwClassifier(BaseLW, ClassifierMixin): """ Locally Weighted Classifier. This class implements a locally weighted classification model using a specified kernel function and bandwidth parameter. It predicts class probabilities and labels by fitting a weighted linear model for each query point. Parameters ---------- kernel : str or callable, optional The kernel function to use. If a string is provided, it should match the name of a kernel function in the `kernel_module`. If a callable is provided, it should accept distances and `tau` as arguments and return weights. tau : float, optional The bandwidth parameter for the kernel function (default: 1.0). Attributes ---------- X_ : ndarray, shape (n_samples, n_features) The training data. y_raw_ : ndarray, shape (n_samples,) The raw target values for the training data. classes_ : ndarray, shape (n_classes,) The unique class labels. n_classes_ : int The number of unique classes. lb_ : LabelBinarizer The label binarizer used for encoding class labels. y_bin_ : ndarray, shape (n_samples, n_classes) or (n_samples,) The binarized target values for the training data. get_prob : callable The method used to compute class probabilities (binary or multiclass). """ def __init__(self, kernel="gaussian", tau=1.0): """ Initialize the LwClassifier. Parameters ---------- kernel : str or callable, optional The kernel function to use (default: "gaussian"). tau : float, optional The bandwidth parameter for the kernel function (default: 1.0). """ super().__init__(kernel=kernel, tau=tau)
[docs] def fit(self, X, y): """ Fit the locally weighted classification model. Parameters ---------- X : array-like, shape (n_samples, n_features) The training data. y : array-like, shape (n_samples,) The target class labels. Returns ------- self : LwClassifier The fitted model. """ X, y = check_X_y(X, y) self.X_ = X self.y_raw_ = y self.classes_ = np.unique(y) self.n_classes_ = len(self.classes_) self.lb_ = LabelBinarizer() self.y_bin_ = self.lb_.fit_transform(y) if self.n_classes_ == 2: self.y_bin_ = self.y_bin_.ravel() self.get_prob = self._get_binary else: self.get_prob = self._get_multiclass return self
def _get_binary(self, logits): """ Compute binary class probabilities. Parameters ---------- logits : list of float The logits for the binary classification. Returns ------- list The probabilities for each class. """ prob = expit(logits[0]) return [1 - prob, prob] def _get_multiclass(self, logits): """ Compute multiclass probabilities. Parameters ---------- logits : list of float The logits for the multiclass classification. Returns ------- ndarray The probabilities for each class. """ probs = softmax(logits) return probs
[docs] def predict_proba(self, X): """ Predict class probabilities for the given input data. Parameters ---------- X : array-like, shape (n_samples, n_features) The input data. Returns ------- probas : ndarray, shape (n_samples, n_classes) The predicted class probabilities. """ check_is_fitted(self, ['X_', 'y_bin_']) X = check_array(X) X_aug = np.hstack([np.ones((self.X_.shape[0], 1)), self.X_]) probas = [] for x in X: W = self._kernel_weights(self.X_, x) x_aug = np.insert(x, 0, 1) logits = [] for k in range(self.n_classes_): y_k = self.y_bin_[:, k] if self.n_classes_ > 2 else self.y_bin_ try: theta = np.linalg.pinv(X_aug.T @ W @ X_aug) @ X_aug.T @ W @ y_k logit = x_aug @ theta except np.linalg.LinAlgError: logit = np.log(np.mean(y_k) / (1 - np.mean(y_k) + 1e-8)) logits.append(logit) probas.append(self.get_prob(logits)) return np.array(probas)
[docs] def predict(self, X): """ Predict class labels for the given input data. Parameters ---------- X : array-like, shape (n_samples, n_features) The input data. Returns ------- y_pred : ndarray, shape (n_samples,) The predicted class labels. """ probas = self.predict_proba(X) class_indices = np.argmax(probas, axis=1) return self.classes_[class_indices]
[docs] def score(self, X, y): """ Compute the accuracy score for the model. Parameters ---------- X : array-like, shape (n_samples, n_features) The input data. y : array-like, shape (n_samples,) The true target class labels. Returns ------- score : float The accuracy score of the predictions. """ return accuracy_score(y, self.predict(X))
[docs] def evaluate(self, y_true, y_pred, list_metrics=("AS", "RS")): """ Evaluate the classification model using specified metrics. Parameters ---------- y_true : array-like True target class labels. y_pred : array-like Predicted class labels. list_metrics : tuple of str, optional List of metrics for evaluation (default: ("AS", "RS")). Returns ------- dict Dictionary of calculated metric values. """ return self._evaluate_cls(y_true=y_true, y_pred=y_pred, list_metrics=list_metrics)
[docs] def scores(self, X, y, list_metrics=("AS", "RS")): """ Compute evaluation metrics for the model on the given data. Parameters ---------- X : array-like, shape (n_samples, n_features) The input data. y : array-like, shape (n_samples,) The true target class labels. list_metrics : tuple of str, optional List of metrics for evaluation (default: ("AS", "RS")). Returns ------- dict Dictionary of calculated metric values. """ y_pred = self.predict(X) return self.evaluate(y, y_pred, list_metrics)