|
|
@ -74,20 +74,6 @@ class Components :
|
|
|
|
# pointer = args['reader'] if 'reader' in args else lambda: Components.get(**args)
|
|
|
|
# pointer = args['reader'] if 'reader' in args else lambda: Components.get(**args)
|
|
|
|
df = args['data']
|
|
|
|
df = args['data']
|
|
|
|
|
|
|
|
|
|
|
|
if 'slice' in args and 'max_rows' in args['slice']:
|
|
|
|
|
|
|
|
max_rows = args['slice']['max_rows']
|
|
|
|
|
|
|
|
if df.shape[0] > max_rows :
|
|
|
|
|
|
|
|
print (".. slicing ")
|
|
|
|
|
|
|
|
i = np.random.choice(df.shape[0],max_rows,replace=False)
|
|
|
|
|
|
|
|
df = df.iloc[i]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
|
|
# Certain columns need to be removed too large of a matrix
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
|
|
# if df.shape[0] == 0 :
|
|
|
|
|
|
|
|
# print ("CAN NOT TRAIN EMPTY DATASET ")
|
|
|
|
|
|
|
|
# return
|
|
|
|
|
|
|
|
#
|
|
|
|
#
|
|
|
|
# Now we can parse the arguments and submit the entire thing to training
|
|
|
|
# Now we can parse the arguments and submit the entire thing to training
|
|
|
|
#
|
|
|
|
#
|
|
|
@ -103,7 +89,7 @@ class Components :
|
|
|
|
if 'batch_size' in args :
|
|
|
|
if 'batch_size' in args :
|
|
|
|
_args['batch_size'] = int(args['batch_size'])
|
|
|
|
_args['batch_size'] = int(args['batch_size'])
|
|
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
_args['matrix_size'] = args['matrix_size'] if 'matrix_size' in args else 128 #
|
|
|
|
# We ask the process to assume 1 gpu given the system number of GPU and that these tasks can run in parallel
|
|
|
|
# We ask the process to assume 1 gpu given the system number of GPU and that these tasks can run in parallel
|
|
|
|
#
|
|
|
|
#
|
|
|
|
if int(args['num_gpu']) > 1 :
|
|
|
|
if int(args['num_gpu']) > 1 :
|
|
|
@ -157,6 +143,8 @@ class Components :
|
|
|
|
_args['num_gpu'] = 1
|
|
|
|
_args['num_gpu'] = 1
|
|
|
|
os.environ['CUDA_VISIBLE_DEVICES'] = str(args['gpu'])
|
|
|
|
os.environ['CUDA_VISIBLE_DEVICES'] = str(args['gpu'])
|
|
|
|
_args['no_value']= args['no_value']
|
|
|
|
_args['no_value']= args['no_value']
|
|
|
|
|
|
|
|
_args['matrix_size'] = args['matrix_size'] if 'matrix_size' in args else 128
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# MAX_ROWS = args['max_rows'] if 'max_rows' in args else 0
|
|
|
|
# MAX_ROWS = args['max_rows'] if 'max_rows' in args else 0
|
|
|
|
PART_SIZE = int(args['part_size']) if 'part_size' in args else 8
|
|
|
|
PART_SIZE = int(args['part_size']) if 'part_size' in args else 8
|
|
|
@ -298,6 +286,8 @@ if __name__ == '__main__' :
|
|
|
|
args[key] = _config[key]
|
|
|
|
args[key] = _config[key]
|
|
|
|
|
|
|
|
|
|
|
|
args = dict(args,**SYS_ARGS)
|
|
|
|
args = dict(args,**SYS_ARGS)
|
|
|
|
|
|
|
|
if 'matrix_size' in args :
|
|
|
|
|
|
|
|
args['matrix_size'] = int(args['matrix_size'])
|
|
|
|
if 'batch_size' not in args :
|
|
|
|
if 'batch_size' not in args :
|
|
|
|
args['batch_size'] = 2000 #if 'batch_size' not in args else int(args['batch_size'])
|
|
|
|
args['batch_size'] = 2000 #if 'batch_size' not in args else int(args['batch_size'])
|
|
|
|
if 'dataset' not in args :
|
|
|
|
if 'dataset' not in args :
|
|
|
|