|
|
@ -300,12 +300,19 @@ class Components :
|
|
|
|
|
|
|
|
|
|
|
|
_df = _df[list(set(_df.columns) - set(skip_columns))].copy()
|
|
|
|
_df = _df[list(set(_df.columns) - set(skip_columns))].copy()
|
|
|
|
if x_cols :
|
|
|
|
if x_cols :
|
|
|
|
|
|
|
|
_approx = {}
|
|
|
|
for _col in x_cols :
|
|
|
|
for _col in x_cols :
|
|
|
|
if real_df[_col].unique().size > 0 :
|
|
|
|
if real_df[_col].unique().size > 0 :
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_df[_col] = self.approximate(real_df[_col])
|
|
|
|
_df[_col] = self.approximate(real_df[_col])
|
|
|
|
|
|
|
|
_approx[_col] = {
|
|
|
|
|
|
|
|
"io":{"min":_df[_col].min(),"max":_df[_col].max(),"mean":_df[_col].mean(),"sd":_df[_col].values.std(),"missing": _df[_col].where(_df[_col] == -1).dropna().count(),"zeros":_df[_col].where(_df[_col] == 0).dropna().count()},
|
|
|
|
|
|
|
|
"real":{"min":real_df[_col].min(),"max":real_df[_col].max(),"mean":real_df[_col].mean(),"sd":real_df[_col].values.std(),"missing": real_df[_col].where(_df[_col] == -1).dropna().count(),"zeros":real_df[_col].where(_df[_col] == 0).dropna().count()}
|
|
|
|
|
|
|
|
}
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
_df[_col] = -1
|
|
|
|
_df[_col] = -1
|
|
|
|
|
|
|
|
logger.write({"module":"gan-generate","action":"approximate","status":_approx})
|
|
|
|
if set(df.columns) & set(_df.columns) :
|
|
|
|
if set(df.columns) & set(_df.columns) :
|
|
|
|
_columns = set(df.columns) - set(_df.columns)
|
|
|
|
_columns = set(df.columns) - set(_df.columns)
|
|
|
|
df = df[_columns]
|
|
|
|
df = df[_columns]
|
|
|
|