diff --git a/pipeline.py b/pipeline.py index f32a45e..a09fbde 100644 --- a/pipeline.py +++ b/pipeline.py @@ -231,7 +231,7 @@ class Components : if 'ignore' in args and 'columns' in args['ignore'] : _cols = self.get_ignore(data=df,columns=args['ignore']['columns']) - args['data'] = df[ list(set(df.columns)- set(_cols))] + args['data'] = args['data'][ list(set(df.columns)- set(_cols))] # # We need to remove the continuous columns from the data-frame # @TODO: Abstract this !! @@ -267,12 +267,6 @@ class Components : # for _name in _df.columns: # if _name in name: # skip_columns.append(_name) - if x_cols : - for _col in x_cols : - if df[_col].unique().size > 0 : - _df[_col] = self.approximate(df[_col].fillna(-1)) - else: - _df[_col] = -1 # # We perform a series of set operations to insure that the following conditions are met: # - the synthetic dataset only has fields that need to be synthesized @@ -284,6 +278,12 @@ class Components : if set(df.columns) & set(_df.columns) : _columns = set(df.columns) - set(_df.columns) df = df[_columns] + if x_cols : + for _col in x_cols : + if df[_col].unique().size > 0 : + _df[_col] = self.approximate(df[_col].fillna(-1)) + else: + _df[_col] = -1 # # Let us merge the dataset here and and have a comprehensive dataset