Source code for insolver.transforms.insurance

import datetime

from numpy import timedelta64
from pandas import isnull, merge


[docs] class TransformExp: """Transforms values of drivers' minimum experiences in years with values over 'exp_max' grouped. Parameters: column_driver_minexp (str): Column name in InsolverDataFrame containing drivers' minimum experiences in years, column type is integer. exp_max (int): Maximum value of drivers' experience in years, bigger values will be grouped, 52 by default. """ def __init__(self, column_driver_minexp, exp_max=52, priority=1): self.priority = priority self.column_driver_minexp = column_driver_minexp self.exp_max = exp_max @staticmethod def _exp(exp, exp_max): if isnull(exp): exp = None elif exp < 0: exp = None elif exp > exp_max: exp = exp_max return exp def __call__(self, df): df[self.column_driver_minexp] = df[self.column_driver_minexp].apply(self._exp, args=(self.exp_max,)) return df
[docs] class TransformAgeExpDiff: """Transforms records with difference between drivers' minimum age and minimum experience less then 'diff_min' years, sets drivers' minimum experience equal to drivers' minimum age minus 'diff_min' years. Parameters: column_driver_minage (str): Column name in InsolverDataFrame containing drivers' minimum ages in years, column type is integer. column_driver_minexp (str): Column name in InsolverDataFrame containing drivers' minimum experiences in years, column type is integer. diff_min (int): Minimum allowed difference between age and experience in years. """ def __init__(self, column_driver_minage, column_driver_minexp, diff_min=18, priority=2): self.priority = priority self.column_driver_minage = column_driver_minage self.column_driver_minexp = column_driver_minexp self.diff_min = diff_min def __call__(self, df): self.num_errors = len(df.loc[(df[self.column_driver_minage] - df[self.column_driver_minexp]) < self.diff_min]) df[self.column_driver_minexp].loc[ (df[self.column_driver_minage] - df[self.column_driver_minexp]) < self.diff_min ] = (df[self.column_driver_minage] - self.diff_min) return df
[docs] class TransformVehPower: """Transforms values of vehicles' powers. Values under 'power_min' and over 'power_max' will be grouped. Values between 'power_min' and 'power_max' will be grouped with step 'power_step'. Parameters: column_veh_power (str): Column name in InsolverDataFrame containing vehicles' powers, column type is float. power_min (float): Minimum value of vehicles' power, lower values will be grouped, 10 by default. power_max (float): Maximum value of vehicles' power, bigger values will be grouped, 500 by default. power_step (int): Values of vehicles' power will be divided by this parameter, rounded to integers, 10 by default. """ def __init__(self, column_veh_power, power_min=10, power_max=500, power_step=10, priority=1): self.priority = priority self.column_veh_power = column_veh_power self.power_min = power_min self.power_max = power_max self.power_step = power_step @staticmethod def _power(power, power_min, power_max, power_step): if isnull(power): power = None elif power < power_min: power = power_min elif power > power_max: power = power_max else: power = int(round(power / power_step, 0)) return power def __call__(self, df): df[self.column_veh_power] = df[self.column_veh_power].apply( self._power, args=( self.power_min, self.power_max, self.power_step, ), ) return df
[docs] class TransformVehAgeGetFromIssueYear: """Gets vehicles' ages in years from issue years and policies' start dates. Parameters: column_veh_issue_year (str): Column name in InsolverDataFrame containing vehicles' issue years, column type is integer. column_date_start (str): Column name in InsolverDataFrame containing policies' start dates, column type is date. column_veh_age (str): Column name in InsolverDataFrame for vehicles' ages in years, column type is integer. """ def __init__(self, column_veh_issue_year, column_date_start, column_veh_age, priority=0): self.priority = priority self.column_veh_issue_year = column_veh_issue_year self.column_date_start = column_date_start self.column_veh_age = column_veh_age @staticmethod def _veh_age_get(issueyear_datestart): veh_issue_year = issueyear_datestart[0] date_start = issueyear_datestart[1] if isnull(veh_issue_year): veh_age = None elif isnull(date_start): veh_age = None elif veh_issue_year > datetime.datetime.now().year: veh_age = None elif veh_issue_year < datetime.datetime.now().year - 90: veh_age = None elif veh_issue_year > date_start.year: veh_age = None else: veh_age = date_start.year - veh_issue_year return veh_age def __call__(self, df): df[self.column_veh_age] = df[[self.column_veh_issue_year, self.column_date_start]].apply( self._veh_age_get, axis=1 ) return df
[docs] class TransformVehAge: """Transforms values of vehicles' ages in years. Values over 'veh_age_max' will be grouped. Parameters: column_veh_age (str): Column name in InsolverDataFrame containing vehicles' ages in years, column type is integer. veh_age_max (int): Maximum value of vehicles' age in years, bigger values will be grouped, 25 by default. """ def __init__(self, column_veh_age, veh_age_max=25, priority=1): self.priority = priority self.column_veh_age = column_veh_age self.veh_age_max = veh_age_max @staticmethod def _veh_age(age, age_max): if isnull(age): age = None elif age < 0: age = None elif age > age_max: age = age_max return age def __call__(self, df): df[self.column_veh_age] = df[self.column_veh_age].apply(self._veh_age, args=(self.veh_age_max,)) return df
[docs] class TransformRegionGetFromKladr: """Gets regions' numbers from KLADRs. Parameters: column_kladr (str): Column name in InsolverDataFrame containing KLADRs, column type is string. column_region_num (str): Column name in InsolverDataFrame for regions' numbers, column type is integer. """ def __init__(self, column_kladr, column_region_num, priority=0): self.priority = priority self.column_kladr = column_kladr self.column_region_num = column_region_num @staticmethod def _region_get(kladr): if isnull(kladr): region_num = None else: region_num = kladr[0:2] try: region_num = int(region_num) except ValueError: region_num = None return region_num def __call__(self, df): df[self.column_region_num] = df[self.column_kladr].apply(self._region_get) return df
[docs] class TransformCarFleetSize: """Calculates fleet sizes for policyholders. Parameters: column_id (str): Column name in InsolverDataFrame containing policyholders' IDs. column_date_start (str): Column name in InsolverDataFrame containing policies' start dates, column type is date. column_fleet_size (str): Column name in InsolverDataFrame for fleet sizes, column type is int. """ def __init__(self, column_id, column_date_start, column_fleet_size, priority=3): self.priority = priority self.column_id = column_id self.column_date_start = column_date_start self.column_fleet_size = column_fleet_size def __call__(self, df): cp = merge( df[[self.column_id, self.column_date_start]], df[[self.column_id, self.column_date_start]], on=self.column_id, how='left', ) cp = cp[ (cp[f'{self.column_date_start}_y'] > cp[f'{self.column_date_start}_x'] - timedelta64(1, 'Y')) & (cp[f'{self.column_date_start}_y'] <= cp[f'{self.column_date_start}_y']) ] cp = cp.groupby(self.column_id).size().to_dict() df[self.column_fleet_size] = df[self.column_id].map(cp) return df