Source code for paradance.pipeline.logarithm_pca

import logging
from typing import Optional, Union

import numpy as np
import pandas as pd
from mixician import SelfBalancingLogarithmPCACalculator

from ..evaluation import Calculator, LogarithmPCACalculator
from .base import BasePipeline

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
    datefmt="%H:%M:%S",
)
logger = logging.getLogger(__name__)


[docs] class LogarithmPCAPipeline(BasePipeline): """Pipeline for processing and optimizing PCA with logarithmic transformations. This pipeline extends the `BasePipeline` class to implement a specific process for optimizing Principal Component Analysis (PCA) with logarithmic transformations, particularly focusing on self-balancing mechanisms. Attributes: file_type (str): Type of the file to load data from, supported types are 'csv' and 'xlsx'. dataframe (pd.DataFrame): The loaded dataset in a pandas DataFrame. calculator (LogarithmPCACalculator): Calculator for PCA operations. objective (MultipleObjective): The optimization objective. """
[docs] def __init__( self, dataframe: Optional[pd.DataFrame] = None, config_path: Optional[str] = None, n_trials: int = 200, ) -> None: """Initializes the pipeline with configuration and trial settings. Args: config_path (Optional[str]): Path to the configuration file. Defaults to None. n_trials (int): Number of optimization trials to perform. Defaults to 200. """ super().__init__( dataframe=dataframe, config_path=config_path, n_trials=n_trials ) self._pre_run()
def _load_calculator(self) -> Union[Calculator, LogarithmPCACalculator]: """Initializes the PCA calculator with the loaded dataset.""" pca_calculator = SelfBalancingLogarithmPCACalculator( dataframe=self.dataframe, config=self.config["Calculator"], ) self.calculator = LogarithmPCACalculator( df=self.dataframe, pca_calculator=pca_calculator, ) return self.calculator
[docs] def plot_logarithm_distributions(self) -> None: """Plots the logarithmic distributions of the dataset.""" self.calculator.pca_calculator.plot_logarithm_distributions()
[docs] def plot_self_balancing_projected_distribution( self, pca_weights: np.ndarray ) -> None: """This method updates the PCA weights in the calculator's PCA component and then plots the distribution based on these updated weights. """ self.calculator.pca_calculator.update_pca_weights( pca_weights=pca_weights, ) self.calculator.pca_calculator.plot_self_balancing_projected_distribution()
[docs] def show_results(self) -> None: """Displays the results of the optimization process.""" self.calculator.pca_calculator.update( pca_weights=self.objective.best_params, ) logger.info("Best parameters for PCA with logarithmic transformations:") self.calculator.pca_calculator.get_weights() self.objective.build_logger() self.objective.logger.info( f"Best parameters: {self.calculator.pca_calculator.results}" ) self.calculator.pca_calculator.show_equation()