| 
						
						
							
								
							
						
						
					 | 
					 | 
					@ -178,13 +178,14 @@ class Components :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							#
 | 
					 | 
					 | 
					 | 
							#
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							info = {"module":"generate","action":"io.metrics","input":{"rows":data_comp.shape[0],"partition":partition,"logs":[]}}
 | 
					 | 
					 | 
					 | 
							info = {"module":"generate","action":"io.metrics","input":{"rows":data_comp.shape[0],"partition":partition,"logs":[]}}
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							x = {}
 | 
					 | 
					 | 
					 | 
							x = {}
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							for name in args['columns'] :
 | 
					 | 
					 | 
					 | 
							# for name in args['columns'] :
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								ident = data_comp.apply(lambda row: 1*(row[name]==row[name+'_io']),axis=1).sum()
 | 
					 | 
					 | 
					 | 
							# 	ident = data_comp.apply(lambda row: 1*(row[name]==row[name+'_io']),axis=1).sum()
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								count = data_comp[name].unique().size
 | 
					 | 
					 | 
					 | 
							# 	count = data_comp[name].unique().size
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								_ident= data_comp.shape[1] - ident
 | 
					 | 
					 | 
					 | 
							# 	_ident= data_comp.shape[1] - ident
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								_count= data_comp[name+'_io'].unique().size
 | 
					 | 
					 | 
					 | 
							# 	_count= data_comp[name+'_io'].unique().size
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								
 | 
					 | 
					 | 
					 | 
							# 	_count= len(set(data_comp[name+'_io'].values.tolist()))
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								info['input']['logs'] += [{"name":name,"identical":int(ident),"no_identical":int(_ident),"original_count":count,"synthetic_count":_count}]
 | 
					 | 
					 | 
					 | 
								
 | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							# 	info['input']['logs'] += [{"name":name,"identical":int(ident),"no_identical":int(_ident),"original_count":count,"synthetic_count":_count}]
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							# for name in data_comp.columns.tolist() :
 | 
					 | 
					 | 
					 | 
							# for name in data_comp.columns.tolist() :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								# g = pd.DataFrame(data_comp.groupby([name]).size())						
 | 
					 | 
					 | 
					 | 
								# g = pd.DataFrame(data_comp.groupby([name]).size())						
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								# g.columns = ['counts']
 | 
					 | 
					 | 
					 | 
								# g.columns = ['counts']
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -192,17 +193,17 @@ class Components :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								# g.index = np.arange(g.shape[0])
 | 
					 | 
					 | 
					 | 
								# g.index = np.arange(g.shape[0])
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								# logs.append({"name":name,"counts": g.to_dict(orient='records')})
 | 
					 | 
					 | 
					 | 
								# logs.append({"name":name,"counts": g.to_dict(orient='records')})
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							# info['input']['logs'] = logs
 | 
					 | 
					 | 
					 | 
							# info['input']['logs'] = logs
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							logger.write(info)
 | 
					 | 
					 | 
					 | 
							# logger.write(info)
 | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							
 | 
					 | 
					 | 
					 | 
							
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							base_cols = list(set(_args['data'].columns) - set(args['columns']))	#-- rebuilt the dataset (and store it)
 | 
					 | 
					 | 
					 | 
							base_cols = list(set(_args['data'].columns) - set(args['columns']))	#-- rebuilt the dataset (and store it)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							cols = _dc.columns.tolist()
 | 
					 | 
					 | 
					 | 
							cols = _dc.columns.tolist()
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							for name in cols :
 | 
					 | 
					 | 
					 | 
							# for name in cols :
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								_args['data'][name] = _dc[name]
 | 
					 | 
					 | 
					 | 
							# 	_args['data'][name] = _dc[name]
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								info = {"module":"generate","action":"io","input":{"rows":_dc[name].shape[0],"name":name}}
 | 
					 | 
					 | 
					 | 
							# 	info = {"module":"generate","action":"io","input":{"rows":_dc[name].shape[0],"name":name}}
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								if partition != '' :
 | 
					 | 
					 | 
					 | 
							# 	if partition != '' :
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
									info['partition'] = int(partition)
 | 
					 | 
					 | 
					 | 
							# 		info['partition'] = int(partition)
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								logger.write(info)
 | 
					 | 
					 | 
					 | 
							# 	logger.write(info)
 | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								
 | 
					 | 
					 | 
					 | 
								
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								# filename = os.sep.join([log_folder,'output',name+'.csv'])
 | 
					 | 
					 | 
					 | 
								# filename = os.sep.join([log_folder,'output',name+'.csv'])
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								# data_comp[[name]].to_csv(filename,index=False)
 | 
					 | 
					 | 
					 | 
								# data_comp[[name]].to_csv(filename,index=False)
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
						
					 | 
					 | 
					
 
 |