| 
						
						
							
								
							
						
						
					 | 
					 | 
					@ -151,6 +151,7 @@ class Components :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							if df.shape[0] and df.shape[0] :
 | 
					 | 
					 | 
					 | 
							if df.shape[0] and df.shape[0] :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								#
 | 
					 | 
					 | 
					 | 
								#
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								# We have a full blown matrix to be processed 
 | 
					 | 
					 | 
					 | 
								# We have a full blown matrix to be processed 
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								print ('-- Training --')
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								data.maker.train(**_args)
 | 
					 | 
					 | 
					 | 
								data.maker.train(**_args)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							else:
 | 
					 | 
					 | 
					 | 
							else:
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								print ("... skipping training !!")
 | 
					 | 
					 | 
					 | 
								print ("... skipping training !!")
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
							
								
							
						
						
					 | 
					 | 
					@ -260,15 +261,22 @@ class Components :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
											#_df[name] = _df[name].dt.date
 | 
					 | 
					 | 
					 | 
											#_df[name] = _df[name].dt.date
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
											# _df[name] = pd.to_datetime(_df[name].fillna(''),errors='coerce')
 | 
					 | 
					 | 
					 | 
											# _df[name] = pd.to_datetime(_df[name].fillna(''),errors='coerce')
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
										else:
 | 
					 | 
					 | 
					 | 
										else:
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
											pass
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
											_df[name] = pd.to_datetime(_df[name])
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
									else:
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
										value = 0
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
										if _item['type'] == 'INTEGER' :
 | 
					 | 
					 | 
					 | 
										if _item['type'] == 'INTEGER' :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
											_type = np.int64
 | 
					 | 
					 | 
					 | 
											_type = np.int64
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
										elif _item['type'] in ['FLOAT','NUMERIC']:
 | 
					 | 
					 | 
					 | 
										elif _item['type'] in ['FLOAT','NUMERIC']:
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
											_type = np.float64
 | 
					 | 
					 | 
					 | 
											_type = np.float64
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
										else:
 | 
					 | 
					 | 
					 | 
										else:
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
											
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
											_value = ''
 | 
					 | 
					 | 
					 | 
											_value = ''
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
										_df[name] = _df[name].fillna(_value).astype(_type)
 | 
					 | 
					 | 
					 | 
										_df[name] = _df[name].fillna(_value) #.astype(_type)
 | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
									columns.append(name)
 | 
					 | 
					 | 
					 | 
									columns.append(name)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								writer.write(_df,schema=_schema,table=args['from'])
 | 
					 | 
					 | 
					 | 
								print ()
 | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								print (_df)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								writer.write(_df.astype(object),schema=_schema,table=args['from'])
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							else:
 | 
					 | 
					 | 
					 | 
							else:
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								writer.write(_df,table=args['from'])
 | 
					 | 
					 | 
					 | 
								writer.write(_df,table=args['from'])
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
							
								
							
						
						
					 | 
					 | 
					@ -350,7 +358,7 @@ class Components :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								for _item in schema :
 | 
					 | 
					 | 
					 | 
								for _item in schema :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
									dtype = str
 | 
					 | 
					 | 
					 | 
									dtype = str
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
									name = _item['name']
 | 
					 | 
					 | 
					 | 
									name = _item['name']
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
									novalue = -1
 | 
					 | 
					 | 
					 | 
									novalue = 0
 | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
									if _item['type'] in ['INTEGER','NUMERIC']:
 | 
					 | 
					 | 
					 | 
									if _item['type'] in ['INTEGER','NUMERIC']:
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
										dtype = np.int64
 | 
					 | 
					 | 
					 | 
										dtype = np.int64
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
										
 | 
					 | 
					 | 
					 | 
										
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
							
								
							
						
						
					 | 
					 | 
					@ -550,7 +558,7 @@ if __name__ == '__main__' :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							index = f[0] if f else 0
 | 
					 | 
					 | 
					 | 
							index = f[0] if f else 0
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
						#
 | 
					 | 
					 | 
					 | 
						#
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
						
 | 
					 | 
					 | 
					 | 
						
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
						print ("..::: ",PIPELINE[index]['context'])
 | 
					 | 
					 | 
					 | 
						print ("..::: ",PIPELINE[index]['context'],':::..')
 | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
						args =  (PIPELINE[index])
 | 
					 | 
					 | 
					 | 
						args =  (PIPELINE[index])
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
						for key in _config :
 | 
					 | 
					 | 
					 | 
						for key in _config :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							if key == 'pipeline' or key in args:
 | 
					 | 
					 | 
					 | 
							if key == 'pipeline' or key in args:
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -567,6 +575,7 @@ if __name__ == '__main__' :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							args['batch_size']	= 2000 #if 'batch_size' not in args else int(args['batch_size'])
 | 
					 | 
					 | 
					 | 
							args['batch_size']	= 2000 #if 'batch_size' not in args else int(args['batch_size'])
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
						if 'dataset' not in args :
 | 
					 | 
					 | 
					 | 
						if 'dataset' not in args :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							args['dataset'] = 'combined20191004v2_deid'
 | 
					 | 
					 | 
					 | 
							args['dataset'] = 'combined20191004v2_deid'
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
						args['logs'] = args['logs'] if 'logs' in args else 'logs'
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
						PART_SIZE = int(args['part_size']) if 'part_size' in args else 8
 | 
					 | 
					 | 
					 | 
						PART_SIZE = int(args['part_size']) if 'part_size' in args else 8
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
						#
 | 
					 | 
					 | 
					 | 
						#
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
						# @TODO:
 | 
					 | 
					 | 
					 | 
						# @TODO:
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
							
								
							
						
						
					 | 
					 | 
					@ -599,6 +608,7 @@ if __name__ == '__main__' :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
									jobs.append(job)
 | 
					 | 
					 | 
					 | 
									jobs.append(job)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								pass
 | 
					 | 
					 | 
					 | 
								pass
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							else:
 | 
					 | 
					 | 
					 | 
							else:
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								generator = Components()
 | 
					 | 
					 | 
					 | 
								generator = Components()
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								generator.generate(args)
 | 
					 | 
					 | 
					 | 
								generator.generate(args)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
						elif 'shuffle' in SYS_ARGS :
 | 
					 | 
					 | 
					 | 
						elif 'shuffle' in SYS_ARGS :
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
						
					 | 
					 | 
					
 
 |