From be55b14e2b5723f39c387d1ebd97e7daf333463d Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Tue, 13 Apr 2021 17:41:30 -0500 Subject: [PATCH] bug fix --- pipeline.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/pipeline.py b/pipeline.py index 0aba799..2a3919c 100644 --- a/pipeline.py +++ b/pipeline.py @@ -228,13 +228,13 @@ class Components : columns = args['columns'] if 'columns' in args else df.columns columns = list(set(columns) - set(_cols)) - # for name in columns: - # i = np.arange(df.shape[0]) - # np.random.shuffle(i) - # if name in x_cols : - # if df[name].unique().size > 0 : - # df[name] = self.approximate(df.iloc[i][name].fillna(0).values) - # df[name] = df[name].copy().astype(str) + for name in columns: + i = np.arange(df.shape[0]) + np.random.shuffle(i) + if name in x_cols : + if df[name].unique().size > 0 : + df[name] = self.approximate(df.iloc[i][name].fillna(0).values) + df[name] = df[name].astype(str) # pass df.index = np.arange(df.shape[0]) @@ -539,7 +539,7 @@ if __name__ == '__main__' : # COLUMNS = DATA.columns # DATA = np.array_split(DATA,PART_SIZE) # args['schema'] = schema - GPU_CHIPS = SYS_ARGS['gpu'] if 'gpu' in SYS_ARGS else None + GPU_CHIPS = args['gpu'] if 'gpu' in args else None if GPU_CHIPS and type(GPU_CHIPS) != list : GPU_CHIPS = [int(_id.strip()) for _id in GPU_CHIPS.split(',')] if type(GPU_CHIPS) == str else [GPU_CHIPS] if 'gpu' in SYS_ARGS : @@ -594,7 +594,7 @@ if __name__ == '__main__' : # else: # generator.generate(args) # Components.generate(args) - if '--all-chips' in SYS_ARGS and GPU_CHIPS: + if 'all-chips' in SYS_ARGS and GPU_CHIPS: index = 0 jobs = [] for _id in GPU_CHIPS : @@ -613,7 +613,7 @@ if __name__ == '__main__' : generator.generate(args) elif 'shuffle' in SYS_ARGS : index = 0 - if GPU_CHIPS and '--all-chips': + if GPU_CHIPS and 'all-chips' in SYS_ARGS: for index in GPU_CHIPS : publisher = lambda _params: ( Components() ).shuffle(_params) @@ -632,7 +632,7 @@ if __name__ == '__main__' : # Let us create n-jobs across n-gpus, The assumption here is the data that is produced will be a partition # @TODO: Find better name for partition # - if GPU_CHIPS and '--all-chips' in SYS_ARGS: + if GPU_CHIPS and 'all-chips' in SYS_ARGS: index = 0 for _gpu in GPU_CHIPS :