@ -163,6 +163,21 @@ class Components :
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
							cols  =  _dc . columns . tolist ( ) 
 
					 
					 
					 
							cols  =  _dc . columns . tolist ( ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
							
 
					 
					 
					 
							
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
							data_comp  =  _args [ ' data ' ] [ args [ ' columns ' ] ] . join ( _dc [ args [ ' columns ' ] ] , rsuffix = ' _io ' ) 				#-- will be used for comparison (store this in big query) 
 
					 
					 
					 
							data_comp  =  _args [ ' data ' ] [ args [ ' columns ' ] ] . join ( _dc [ args [ ' columns ' ] ] , rsuffix = ' _io ' ) 				#-- will be used for comparison (store this in big query) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
							# 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
							# performing basic analytics on the synthetic data generated (easy to quickly asses) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
							# 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
							info  =  { " module " : " generate " , " action " : " io-stats " , " input " : { " rows " : data_comp . shape [ 0 ] , " partition " : partition , " logs " : [ ] } } 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
							logs  =  [ ] 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
							for  name  in  data_comp . columns . tolist ( )  : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
								g  =  pd . DataFrame ( data_comp . groupby ( [ name ] ) . size ( ) ) 						
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
								g . columns  =  [ ' counts ' ] 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
								g [ name ]  =  g . index . tolist ( ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
								g . index  =  np . arange ( g . shape [ 0 ] ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
								logs . append ( { " name " : name , " counts " :  g . to_dict ( orient = ' records ' ) } ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
							info [ ' input ' ] [ ' logs ' ]  =  logs 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
							logger . write ( info ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
							
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
							base_cols  =  list ( set ( _args [ ' data ' ] . columns )  -  set ( args [ ' columns ' ] ) ) 	#-- rebuilt the dataset (and store it) 
 
					 
					 
					 
							base_cols  =  list ( set ( _args [ ' data ' ] . columns )  -  set ( args [ ' columns ' ] ) ) 	#-- rebuilt the dataset (and store it) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
							for  name  in  cols  : 
 
					 
					 
					 
							for  name  in  cols  : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
								_args [ ' data ' ] [ name ]  =  _dc [ name ] 
 
					 
					 
					 
								_args [ ' data ' ] [ name ]  =  _dc [ name ] 
 
				
			 
			
		
	
	
		
		
			
				
					
						
						
						
							
								 
							 
						
					 
					 
					@ -170,6 +185,7 @@ class Components :
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
								if  partition  !=  ' '  : 
 
					 
					 
					 
								if  partition  !=  ' '  : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
									info [ ' partition ' ]  =  int ( partition ) 
 
					 
					 
					 
									info [ ' partition ' ]  =  int ( partition ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
								logger . write ( info ) 
 
					 
					 
					 
								logger . write ( info ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
								
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
								# filename = os.sep.join([log_folder,'output',name+'.csv']) 
 
					 
					 
					 
								# filename = os.sep.join([log_folder,'output',name+'.csv']) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
								# data_comp[[name]].to_csv(filename,index=False) 
 
					 
					 
					 
								# data_comp[[name]].to_csv(filename,index=False) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
	
		
		
			
				
					
						
							
								 
							 
						
						
							
								 
							 
						
						
					 
					 
					@ -197,10 +213,10 @@ class Components :
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
								if  ' dump '  in  args  : 
 
					 
					 
					 
								if  ' dump '  in  args  : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
									print  ( _args [ ' data ' ] . head ( ) ) 
 
					 
					 
					 
									print  ( _args [ ' data ' ] . head ( ) ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
								else : 
 
					 
					 
					 
								else : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
									data_comp . to_gbq ( if_exists = ' append ' , destination_table = partial , credentials = credentials , chunksize = 5 0000) 	
 
					 
					 
					 
									data_comp . to_gbq ( if_exists = ' append ' , destination_table = partial , credentials = credentials , chunksize = 9 0000) 	
 
				
			 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
									
 
					 
					 
					 
									
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
									INSERT_FLAG  =  ' replace '  if  ' partition '  not  in  args  or  ' segment '  not  in  args  else  ' append ' 	
 
					 
					 
					 
									INSERT_FLAG  =  ' replace '  if  ' partition '  not  in  args  or  ' segment '  not  in  args  else  ' append ' 	
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
									_args [ ' data ' ] . to_gbq ( if_exists = ' append ' , destination_table = complete , credentials = credentials , chunksize = 5 0000) 
 
					 
					 
					 
									_args [ ' data ' ] . to_gbq ( if_exists = ' append ' , destination_table = complete , credentials = credentials , chunksize = 9 0000) 
 
				
			 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
								_id  =  ' dataset ' 
 
					 
					 
					 
								_id  =  ' dataset ' 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
							info  =  { " full " : { _id : _fname , " rows " : _args [ ' data ' ] . shape [ 0 ] } , " partial " : { " path " : _pname , " rows " : data_comp . shape [ 0 ] }  } 
 
					 
					 
					 
							info  =  { " full " : { _id : _fname , " rows " : _args [ ' data ' ] . shape [ 0 ] } , " partial " : { " path " : _pname , " rows " : data_comp . shape [ 0 ] }  } 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
							if  partition  : 
 
					 
					 
					 
							if  partition  :