diff --git a/mbs_results/outlier_detection/calculate_winsorised_weight.py b/mbs_results/outlier_detection/calculate_winsorised_weight.py index 1bbba0ac..148ebb69 100644 --- a/mbs_results/outlier_detection/calculate_winsorised_weight.py +++ b/mbs_results/outlier_detection/calculate_winsorised_weight.py @@ -67,7 +67,7 @@ def calculate_winsorised_weight( df = df.drop(["w", "new_target"], axis=1) non_winsorised = (df[sampled] == 0) | (df[nw_ag_flag] == True) # noqa: E712 - df["outlier_weight"] = df["outlier_weight"].mask(non_winsorised, np.nan) + df["outlier_weight"] = df["outlier_weight"].mask(non_winsorised, 1) df["new_target_variable"] = df["new_target_variable"].mask(non_winsorised, np.nan) return df diff --git a/tests/data/winsorisation/winsorised_weight_data.csv b/tests/data/winsorisation/winsorised_weight_data.csv index eccf0587..7eef4343 100755 --- a/tests/data/winsorisation/winsorised_weight_data.csv +++ b/tests/data/winsorisation/winsorised_weight_data.csv @@ -11,3 +11,4 @@ group,period,aux,sampled,a_weight,g_weight,target_variable,nw_ag_flag,predicted_ 102,202401,86,0,2.5,1.023809524,90,False,,, 104,202401,20,0,,0.004,90,False,,, 104,202401,30,0,,0.004,90,False,,, +104,202401,30,1,1,0.004,90,True,,, diff --git a/tests/data/winsorisation/winsorised_weight_data_output.csv b/tests/data/winsorisation/winsorised_weight_data_output.csv index 8b1c7812..281137e5 100755 --- a/tests/data/winsorisation/winsorised_weight_data_output.csv +++ b/tests/data/winsorisation/winsorised_weight_data_output.csv @@ -1,13 +1,14 @@ group,period,aux,sampled,a_weight,g_weight,target_variable,nw_ag_flag,predicted_unit_value,l_value,ratio_estimation_treshold,new_target_variable,outlier_weight -101,202401,10,0,1.666666667,1.023809524,12,False,,,,, +101,202401,10,0,1.666666667,1.023809524,12,False,,,,,1 101,202401,23,1,1.666666667,1.023809524,20,False,14.375,0.5,15.0828652,17.96453488,0.898227 101,202401,41,1,1.666666667,1.023809524,20,False,25.625,0.5,26.3328652,20,1 101,202402,53,1,1.666666667,1.023809524,40,False,40,0.5,40.7078652,40,1 -101,202401,12,0,1.666666667,1.023809524,10,False,,,,, +101,202401,12,0,1.666666667,1.023809524,10,False,,,,,1 102,202401,50,1,2.5,1.023809524,60,False,60,0.5,60.3206107,60,1 102,202402,40,1,2.5,1.023809524,50,False,50,0.5,50.3206107,50,1 -102,202401,45,0,2.5,1.023809524,50,False,,,,, -102,202401,70,0,2.5,1.023809524,60,False,,,,, -102,202401,86,0,2.5,1.023809524,90,False,,,,, -104,202401,20,0,,0.004,90,False,,,,, -104,202401,30,0,,0.004,90,False,,,,, +102,202401,45,0,2.5,1.023809524,50,False,,,,,1 +102,202401,70,0,2.5,1.023809524,60,False,,,,,1 +102,202401,86,0,2.5,1.023809524,90,False,,,,,1 +104,202401,20,0,,0.004,90,False,,,,,1 +104,202401,30,0,,0.004,90,False,,,,,1 +104,202401,30,1,1,0.004,90,True,,,,,1