Source code for insolver.transforms.basic
from pandas import DataFrame, to_numeric, concat, get_dummies
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
[docs]
class TransformToNumeric:
"""Transforms parameter's values to numeric types, uses Pandas' 'to_numeric'.
Parameters:
column_param (str): Column name in InsolverDataFrame containing parameter to transform.
downcast: Target numeric dtype, equal to Pandas' 'downcast' in the 'to_numeric' function, 'integer' by default.
"""
def __init__(self, column_param, downcast='integer', priority=0):
self.priority = priority
self.column_param = column_param
self.downcast = downcast
def __call__(self, df):
df[self.column_param] = to_numeric(df[self.column_param], downcast=self.downcast)
return df
[docs]
class TransformMapValues:
"""Transforms parameter's values according to the dictionary.
Parameters:
column_param (str): Column name in InsolverDataFrame containing parameter to map.
dictionary (dict): The dictionary for mapping.
"""
def __init__(self, column_param, dictionary, priority=1):
self.priority = priority
self.column_param = column_param
self.dictionary = dictionary
def __call__(self, df):
df[self.column_param] = df[self.column_param].map(self.dictionary)
return df
[docs]
class TransformPolynomizer:
"""Gets polynomials of parameter's values.
Parameters:
column_param (str): Column name in InsolverDataFrame containing parameter to polynomize.
n (int): Polynomial degree.
"""
def __init__(self, column_param, n=2, priority=3):
self.priority = priority
self.column_param = column_param
self.n = n
def __call__(self, df):
for i in range(2, self.n + 1):
a = self.column_param + '_' + str(i)
while a in list(df.columns):
a = a + '_'
df[a] = df[self.column_param] ** i
return df
[docs]
class TransformGetDummies:
"""Gets dummy columns of the parameter, uses Pandas' 'get_dummies'.
Parameters:
column_param (str): Column name in InsolverDataFrame containing parameter to transform.
drop_first (bool): Whether to get k-1 dummies out of k categorical levels by removing the first level,
False by default.
inference (bool): Sign if the transformation is used for inference, False by default.
dummy_columns (list): List of the dummy columns, for inference only.
"""
def __init__(self, column_param, drop_first=False, inference=False, dummy_columns=None, priority=3):
self.priority = priority
self.column_param = column_param
self.drop_first = drop_first
self.inference = inference
if inference and dummy_columns is not None:
self.dummy_columns = dummy_columns
else:
self.dummy_columns = []
def __call__(self, df):
if self.dummy_columns == list():
df_dummy = get_dummies(df[[self.column_param]], prefix_sep='_', drop_first=self.drop_first)
self.dummy_columns = list([col.replace(' ', '_') for col in df_dummy.columns])
df_dummy.columns = self.dummy_columns
df = concat([df, df_dummy], axis=1)
else:
for column in self.dummy_columns:
df[column] = ((self.column_param + '_' + df[self.column_param]) == column).astype('int8')
return df
[docs]
class EncoderTransforms:
"""Label Encoder
Parameters:
column_names (list): columns for label encoding
le_classes (dict): dictionary with label encoding classes for each column
"""
def __init__(self, column_names, le_classes=None, priority=3):
self.priority = priority
self.column_names = column_names
self.le_classes = le_classes
@staticmethod
def _encode_column(column):
le = LabelEncoder()
le.fit(column)
le_classes = le.classes_.tolist()
column = le.transform(column)
return column, le_classes
def __call__(self, df):
self.le_classes = {}
for column_name in self.column_names:
df[column_name], self.le_classes[column_name] = self._encode_column(df[column_name])
return df
[docs]
class OneHotEncoderTransforms:
"""OneHotEncoder Transformations
Parameters:
column_names (list): columns for one hot encoding
encoder_dict (dict): dictionary with encoder_params for each column
"""
def __init__(self, column_names, encoder_dict=None, priority=3):
self.priority = priority
self.column_names = column_names
self.encoder_dict = encoder_dict
@staticmethod
def _encode_column(df, column_name):
encoder = OneHotEncoder(sparse_output=False)
encoder.fit(df[[column_name]])
encoder_params = encoder.categories_
encoder_params = [x.tolist() for x in encoder_params]
column_encoded = DataFrame(encoder.transform(df[[column_name]]))
column_encoded.columns = encoder.get_feature_names_out([column_name])
for column in column_encoded.columns:
df[column] = column_encoded[column]
return encoder_params
def __call__(self, df):
self.encoder_dict = {}
for column in self.column_names:
encoder_params = self._encode_column(df, column)
self.encoder_dict[column] = encoder_params
df.drop([column], axis=1, inplace=True)
return df