|
|
|
#!/usr/bin/env python3
|
|
|
|
@staticmethod
|
|
|
|
df = pd.read_gbq(SQL,credentials=credentials,dialect='standard')
|
|
|
|
else :
|
|
|
|
|
|
|
|
def shuffle(self,_args):
|
|
|
|
for _item in _schema :
|
|
|
|
if _item['type'] in ['DATE','TIMESTAMP','DATETIME'] :
|
|
|
|
_df[_item['name']] = _df[_item['name']].astype(str)
|
|
|
|
writer.write(_df,schema=_schema,table=args['from'])
|
|
|
|
else:
|
|
|
|
writer.write(_df,table=args['from'])
|
|
|
|
_cast = {}
|
|
|
|
args['data'] = args['data'][ list(set(df.columns)- set(_cols))]
|
|
|
|
# we need to format the fields here to make sure we have something cohesive
|
|
|
|
if set(df.columns) & set(_df.columns) :
|
|
|
|
|
|
|
|
# info = {"full":{_id:_fname,"rows":_args['data'].shape[0]},"partial":{"path":_pname,"rows":data_comp.shape[0]} }
|
|
|
|
# if partition :
|
|
|
|
# info ['partition'] = int(partition)
|
|
|
|
# logger.write({"module":"generate","action":"write","input":info} )
|
|
|
|
args['batch_size'] = 2000 #if 'batch_size' not in args else int(args['batch_size'])
|
|
|
|
|
|
|
|
job.name = 'Trainer # ' + str(index)
|
|
|
|
time.sleep(2)
|
|
|
|
|