Source code for insolver.frame.frame

import json
from typing import Type, Optional, List, Dict, Union, Any
from numpy import dtype as numpy_dtype
from pandas import DataFrame
from ..model_tools import train_val_test_split


[docs] class InsolverDataFrame(DataFrame):
[docs] def __init__( self, data: Any = None, index: Any = None, columns: Any = None, dtype: Optional[numpy_dtype] = None, copy: Optional[bool] = None, ) -> None: """Primary DataFrame class for Insolver. Almost the same as the pandas.DataFrame. Args: data (ndarray (structured or homogeneous), Iterable, dict, or pandas.DataFrame): Dict can contain `pandas.Series`, arrays, constants, dataclass or list-like objects. If data is a dict, column order follows insertion-order. If a dict contains `pandas.Series` which have an index defined, it is aligned by its index (default=None). index (pandas.Index or array-like): Index to use for resulting frame. Will default to RangeIndex if no indexing information part of input data and no index provided. columns (pandas.Index or array-like): Column labels to use for resulting frame when data does not have them, defaulting to `pandas.RangeIndex(0, 1, 2, …, n)`. If data contains column labels, will perform column selection instead (default=None). dtype (numpy.dtype): Data type to force. Only a single dtype is allowed. If `None`, infer (default=None). copy (bool) Copy data from inputs. For dict data, the default of None behaves like `copy=True`. For `pandas.DataFrame` or 2d ndarray input, the default of `None` behaves like copy=False (default=None). """ super(InsolverDataFrame, self).__init__(data, index, columns, dtype, copy)
@property def _constructor(self) -> Type["InsolverDataFrame"]: return InsolverDataFrame
[docs] def get_meta_info(self) -> Dict[str, Union[str, int, List[Dict[str, Union[str, numpy_dtype]]]]]: """Gets JSON with Insolver meta information. Returns: dict: Meta information JSON. """ meta_json = {'type': 'InsolverDataFrame', 'len': self.shape[0], 'columns': list()} for column in self.columns: meta_json['columns'].append({'name': column, 'dtype': self[column].dtypes, 'use': 'unknown'}) return meta_json
[docs] def split_frame( self, val_size: float, test_size: float, random_state: Optional[int] = 0, shuffle: bool = True, stratify: Any = None, ) -> List[DataFrame]: """Function for splitting dataset into train/validation/test partitions. Args: val_size (float): The proportion of the dataset to include in validation partition. test_size (float): The proportion of the dataset to include in test partition. random_state (int, optional): Random state, passed to train_test_split() from scikit-learn (default=0). shuffle (bool, optional): Passed to train_test_split() from scikit-learn (default=True). stratify (array_like, optional): Passed to train_test_split() from scikit-learn (default=None). Returns: list: (train, valid, test). A list of partitions of the initial dataset. """ return train_val_test_split( self, val_size=val_size, test_size=test_size, random_state=random_state, shuffle=shuffle, stratify=stratify )
[docs] def sample_request(self, batch_size: int = 1) -> Dict[str, object]: """Create json request by a random sample from InsolverDataFrame Args: batch_size: number of random samples Returns: request (dict) """ if batch_size == 1: data_str = self.sample(batch_size).iloc[0].to_json() else: data_str = self.sample(batch_size).to_json() data = json.loads(data_str) request = {'df': data} return request