|
|
@ -133,7 +133,7 @@ class Components :
|
|
|
|
self.generate(args)
|
|
|
|
self.generate(args)
|
|
|
|
|
|
|
|
|
|
|
|
pass
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
# @staticmethod
|
|
|
|
# @staticmethod
|
|
|
|
def generate(self,args):
|
|
|
|
def generate(self,args):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
@ -168,11 +168,13 @@ class Components :
|
|
|
|
df = args['reader']() if 'reader' in args else args['data']
|
|
|
|
df = args['reader']() if 'reader' in args else args['data']
|
|
|
|
|
|
|
|
|
|
|
|
if 'slice' in args and 'max_rows' in args['slice']:
|
|
|
|
if 'slice' in args and 'max_rows' in args['slice']:
|
|
|
|
|
|
|
|
|
|
|
|
max_rows = args['slice']['max_rows']
|
|
|
|
max_rows = args['slice']['max_rows']
|
|
|
|
if df.shape[0] > max_rows :
|
|
|
|
if df.shape[0] > max_rows :
|
|
|
|
print (".. slicing ")
|
|
|
|
print (".. slicing ")
|
|
|
|
i = np.random.choice(df.shape[0],max_rows,replace=False)
|
|
|
|
i = np.random.choice(df.shape[0],max_rows,replace=False)
|
|
|
|
df = df.iloc[i]
|
|
|
|
df = df.iloc[i]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# bounds = Components.split(df,MAX_ROWS,PART_SIZE)
|
|
|
|
# bounds = Components.split(df,MAX_ROWS,PART_SIZE)
|
|
|
@ -182,7 +184,7 @@ class Components :
|
|
|
|
# df = pd.DataFrame(df[ int (partition) ],columns = columns)
|
|
|
|
# df = pd.DataFrame(df[ int (partition) ],columns = columns)
|
|
|
|
# max_rows = int(args['partition_max_rows']) if 'partition_max_rows' in args else 1000000
|
|
|
|
# max_rows = int(args['partition_max_rows']) if 'partition_max_rows' in args else 1000000
|
|
|
|
# N = np.divide(df.shape[0],max_rows).astype(int) + 1
|
|
|
|
# N = np.divide(df.shape[0],max_rows).astype(int) + 1
|
|
|
|
info = {"parition":int(partition),"gpu":_args["gpu"],"rows":int(df.shape[0]),"cols":int(df.shape[1]),"part_size":int(PART_SIZE)}
|
|
|
|
info = {"parition":int(partition),"gpu":_args["gpu"],"rows":int(df.shape[0]),"cols":int(df.shape[1]),"space":df[args['columns'][0]].unique().size, "part_size":int(PART_SIZE)}
|
|
|
|
logger.write({"module":"generate","action":"partition","input":info})
|
|
|
|
logger.write({"module":"generate","action":"partition","input":info})
|
|
|
|
_args['partition'] = int(partition)
|
|
|
|
_args['partition'] = int(partition)
|
|
|
|
_args['continuous']= args['continuous'] if 'continuous' in args else []
|
|
|
|
_args['continuous']= args['continuous'] if 'continuous' in args else []
|
|
|
@ -256,7 +258,7 @@ class Components :
|
|
|
|
data_comp.to_gbq(if_exists='append',destination_table=partial,credentials=credentials,chunksize=90000)
|
|
|
|
data_comp.to_gbq(if_exists='append',destination_table=partial,credentials=credentials,chunksize=90000)
|
|
|
|
|
|
|
|
|
|
|
|
INSERT_FLAG = 'replace' if 'partition' not in args or 'segment' not in args else 'append'
|
|
|
|
INSERT_FLAG = 'replace' if 'partition' not in args or 'segment' not in args else 'append'
|
|
|
|
print (_args['data'].dtypes)
|
|
|
|
|
|
|
|
_args['data'].to_gbq(if_exists='append',destination_table=complete,credentials=credentials,chunksize=90000)
|
|
|
|
_args['data'].to_gbq(if_exists='append',destination_table=complete,credentials=credentials,chunksize=90000)
|
|
|
|
Components.lock.release()
|
|
|
|
Components.lock.release()
|
|
|
|
_id = 'dataset'
|
|
|
|
_id = 'dataset'
|
|
|
|