From 8997a5ca10dc0a2f9dd58ea3e7ee13a1415298ae Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Sun, 4 Apr 2021 13:29:57 -0500 Subject: [PATCH] bg fix : approximation --- pipeline.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pipeline.py b/pipeline.py index 00bb80c..1bb0707 100644 --- a/pipeline.py +++ b/pipeline.py @@ -300,12 +300,19 @@ class Components : _df = _df[list(set(_df.columns) - set(skip_columns))].copy() if x_cols : + _approx = {} for _col in x_cols : if real_df[_col].unique().size > 0 : + + _df[_col] = self.approximate(real_df[_col]) + _approx[_col] = { + "io":{"min":_df[_col].min(),"max":_df[_col].max(),"mean":_df[_col].mean(),"sd":_df[_col].values.std(),"missing": _df[_col].where(_df[_col] == -1).dropna().count(),"zeros":_df[_col].where(_df[_col] == 0).dropna().count()}, + "real":{"min":real_df[_col].min(),"max":real_df[_col].max(),"mean":real_df[_col].mean(),"sd":real_df[_col].values.std(),"missing": real_df[_col].where(_df[_col] == -1).dropna().count(),"zeros":real_df[_col].where(_df[_col] == 0).dropna().count()} + } else: _df[_col] = -1 - + logger.write({"module":"gan-generate","action":"approximate","status":_approx}) if set(df.columns) & set(_df.columns) : _columns = set(df.columns) - set(_df.columns) df = df[_columns]