Source code for paradance.dataloader.load_excel

import os
from typing import Dict, List, Optional, Union

import pandas as pd

from .base import BaseDataLoader


[docs] class ExcelLoader(BaseDataLoader): "ExcelLoader class for loading excel files"
[docs] def __init__( self, file_path: Optional[str] = None, file_name: Optional[str] = None, file_type: Optional[str] = "xlsx", max_rows: Optional[int] = None, clean_zero_columns: Union[bool, List] = False, config: Optional[Dict] = None, ) -> None: super().__init__( file_path, file_name, file_type, max_rows, clean_zero_columns, config )
[docs] def load_data(self) -> pd.DataFrame: """Load data from excel file.""" if self.file_name is not None: file_url = os.path.join(str(self.file_path), self.file_name) + ".xlsx" df = pd.read_excel(file_url) else: files = os.listdir(self.file_path) df_list = [] for file in files: if file.endswith(str(self.file_type)): file_url = os.path.join(str(self.file_path), file) df_list.append(pd.read_excel(file_url)) df = pd.concat(df_list) if self.max_rows is not None: max_rows = min(self.max_rows, df.shape[0]) else: max_rows = df.shape[0] return df.iloc[:max_rows, :]