|
|
@ -228,13 +228,13 @@ class Components :
|
|
|
|
|
|
|
|
|
|
|
|
columns = args['columns'] if 'columns' in args else df.columns
|
|
|
|
columns = args['columns'] if 'columns' in args else df.columns
|
|
|
|
columns = list(set(columns) - set(_cols))
|
|
|
|
columns = list(set(columns) - set(_cols))
|
|
|
|
# for name in columns:
|
|
|
|
for name in columns:
|
|
|
|
# i = np.arange(df.shape[0])
|
|
|
|
i = np.arange(df.shape[0])
|
|
|
|
# np.random.shuffle(i)
|
|
|
|
np.random.shuffle(i)
|
|
|
|
# if name in x_cols :
|
|
|
|
if name in x_cols :
|
|
|
|
# if df[name].unique().size > 0 :
|
|
|
|
if df[name].unique().size > 0 :
|
|
|
|
# df[name] = self.approximate(df.iloc[i][name].fillna(0).values)
|
|
|
|
df[name] = self.approximate(df.iloc[i][name].fillna(0).values)
|
|
|
|
# df[name] = df[name].copy().astype(str)
|
|
|
|
df[name] = df[name].astype(str)
|
|
|
|
# pass
|
|
|
|
# pass
|
|
|
|
|
|
|
|
|
|
|
|
df.index = np.arange(df.shape[0])
|
|
|
|
df.index = np.arange(df.shape[0])
|
|
|
@ -539,7 +539,7 @@ if __name__ == '__main__' :
|
|
|
|
# COLUMNS = DATA.columns
|
|
|
|
# COLUMNS = DATA.columns
|
|
|
|
# DATA = np.array_split(DATA,PART_SIZE)
|
|
|
|
# DATA = np.array_split(DATA,PART_SIZE)
|
|
|
|
# args['schema'] = schema
|
|
|
|
# args['schema'] = schema
|
|
|
|
GPU_CHIPS = SYS_ARGS['gpu'] if 'gpu' in SYS_ARGS else None
|
|
|
|
GPU_CHIPS = args['gpu'] if 'gpu' in args else None
|
|
|
|
if GPU_CHIPS and type(GPU_CHIPS) != list :
|
|
|
|
if GPU_CHIPS and type(GPU_CHIPS) != list :
|
|
|
|
GPU_CHIPS = [int(_id.strip()) for _id in GPU_CHIPS.split(',')] if type(GPU_CHIPS) == str else [GPU_CHIPS]
|
|
|
|
GPU_CHIPS = [int(_id.strip()) for _id in GPU_CHIPS.split(',')] if type(GPU_CHIPS) == str else [GPU_CHIPS]
|
|
|
|
if 'gpu' in SYS_ARGS :
|
|
|
|
if 'gpu' in SYS_ARGS :
|
|
|
@ -594,7 +594,7 @@ if __name__ == '__main__' :
|
|
|
|
# else:
|
|
|
|
# else:
|
|
|
|
# generator.generate(args)
|
|
|
|
# generator.generate(args)
|
|
|
|
# Components.generate(args)
|
|
|
|
# Components.generate(args)
|
|
|
|
if '--all-chips' in SYS_ARGS and GPU_CHIPS:
|
|
|
|
if 'all-chips' in SYS_ARGS and GPU_CHIPS:
|
|
|
|
index = 0
|
|
|
|
index = 0
|
|
|
|
jobs = []
|
|
|
|
jobs = []
|
|
|
|
for _id in GPU_CHIPS :
|
|
|
|
for _id in GPU_CHIPS :
|
|
|
@ -613,7 +613,7 @@ if __name__ == '__main__' :
|
|
|
|
generator.generate(args)
|
|
|
|
generator.generate(args)
|
|
|
|
elif 'shuffle' in SYS_ARGS :
|
|
|
|
elif 'shuffle' in SYS_ARGS :
|
|
|
|
index = 0
|
|
|
|
index = 0
|
|
|
|
if GPU_CHIPS and '--all-chips':
|
|
|
|
if GPU_CHIPS and 'all-chips' in SYS_ARGS:
|
|
|
|
|
|
|
|
|
|
|
|
for index in GPU_CHIPS :
|
|
|
|
for index in GPU_CHIPS :
|
|
|
|
publisher = lambda _params: ( Components() ).shuffle(_params)
|
|
|
|
publisher = lambda _params: ( Components() ).shuffle(_params)
|
|
|
@ -632,7 +632,7 @@ if __name__ == '__main__' :
|
|
|
|
# Let us create n-jobs across n-gpus, The assumption here is the data that is produced will be a partition
|
|
|
|
# Let us create n-jobs across n-gpus, The assumption here is the data that is produced will be a partition
|
|
|
|
# @TODO: Find better name for partition
|
|
|
|
# @TODO: Find better name for partition
|
|
|
|
#
|
|
|
|
#
|
|
|
|
if GPU_CHIPS and '--all-chips' in SYS_ARGS:
|
|
|
|
if GPU_CHIPS and 'all-chips' in SYS_ARGS:
|
|
|
|
index = 0
|
|
|
|
index = 0
|
|
|
|
|
|
|
|
|
|
|
|
for _gpu in GPU_CHIPS :
|
|
|
|
for _gpu in GPU_CHIPS :
|
|
|
|