Source code for insolver.transforms.date_time
from numpy import where
from pandas import to_datetime, Timedelta
[docs]
class DatetimeTransforms:
"""Get selected feature from date variable.
Parameters:
column_names (list): List of columns to convert, columns in column_names can't be duplicated in column_feature.
column_types (dict): Dictionary of columns and types to return.
dayfirst (bool): Parameter from pandas.to_datetime(), specify a date parse order if arg is str or its
list-likes.
yearfirst (bool): Parameter from pandas.to_datetime(), specify a date parse order if arg is str or its
list-likes.
feature (str): Type of feature to get from date variable: unix (by default), date, time, month, quarter, year,
day, day_of_the_week, weekend.
column_feature (dict): List of columns to preprocess using specified feature for each column in the dictionary,
columns in column_feature can't be duplicated in column_names.
"""
def __init__(
self,
column_names,
column_types=None,
dayfirst=False,
yearfirst=False,
feature='unix',
column_feature=None,
priority=0,
):
self.priority = priority
self.feature = feature
self.column_names = column_names
self.column_types = column_types
self.dayfirst = dayfirst
self.yearfirst = yearfirst
self._feature_types = ['unix', 'date', 'time', 'month', 'quarter', 'year', 'day', 'day_of_the_week', 'weekend']
self.column_feature = column_feature
def _get_date_feature(self, df):
self.feature_dict = {
'unix': lambda col: (col - to_datetime("1970-01-01")) // Timedelta('1s'),
'date': lambda col: col.dt.date,
'time': lambda col: col.dt.time,
'month': lambda col: col.dt.month,
'quarter': lambda col: col.dt.quarter,
'year': lambda col: col.dt.year,
'day': lambda col: col.dt.day,
'day_of_the_week': lambda col: col.dt.dayofweek,
'weekend': lambda col: where(col.dt.day_name().isin(['Sunday', 'Saturday']), 1, 0),
}
if self.column_feature:
for column in self.column_feature.keys():
if column in self.column_names:
raise Exception(
f'Columns in column_feature{list(self.column_feature.keys())}'
f'cannot be duplicated in column_names{self.column_names}'
)
else:
_col_feature = self.column_feature[column]
type_of_column = self.column_types[column] if column in self.column_types.keys() else None
if type_of_column:
df[f'{column}_{_col_feature}'] = self.feature_dict[_col_feature](
to_datetime(df[column], dayfirst=self.dayfirst, yearfirst=self.yearfirst)
).astype(type_of_column)
else:
df[f'{column}_{_col_feature}'] = self.feature_dict[_col_feature](
to_datetime(df[column], dayfirst=self.dayfirst, yearfirst=self.yearfirst)
)
if self.feature in self._feature_types:
if self.column_types:
for column in self.column_names:
type_of_column = self.column_types[column] if column in self.column_types.keys() else None
if type_of_column:
df[f'{column}_{self.feature}'] = self.feature_dict[self.feature](
to_datetime(df[column], dayfirst=self.dayfirst, yearfirst=self.yearfirst)
).astype(type_of_column)
else:
df[f'{column}_{self.feature}'] = self.feature_dict[self.feature](
to_datetime(df[column], dayfirst=self.dayfirst, yearfirst=self.yearfirst)
)
else:
for column in self.column_names:
df[f'{column}_{self.feature}'] = self.feature_dict[self.feature](
to_datetime(df[column], dayfirst=self.dayfirst, yearfirst=self.yearfirst)
)
else:
raise NotImplementedError(f'Method parameter supports values in {self._feature_types}.')
def __call__(self, df):
self._get_date_feature(df)
return df