| 
						
						
							
								
							
						
						
					 | 
					 | 
					@ -122,10 +122,20 @@ class Components :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							_args = copy.deepcopy(args)
 | 
					 | 
					 | 
					 | 
							_args = copy.deepcopy(args)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							# _args['store']  = args['store']['source']
 | 
					 | 
					 | 
					 | 
							# _args['store']  = args['store']['source']
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							_args['data'] = df
 | 
					 | 
					 | 
					 | 
							_args['data'] = df
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							#
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							# The columns that are continuous should also be skipped because they don't need to be synthesied (like-that)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							if 'continuous' in args :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								x_cols = args['continuous']
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							else:
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								x_cols = []
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							if 'ignore' in args and 'columns' in args['ignore'] :
 | 
					 | 
					 | 
					 | 
							if 'ignore' in args and 'columns' in args['ignore'] :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								_cols = self.get_ignore(data=df,columns=args['ignore']['columns'])
 | 
					 | 
					 | 
					 | 
								_cols = self.get_ignore(data=df,columns=args['ignore']['columns'])
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								_args['data'] = df[ list(set(df.columns)- set(_cols))]
 | 
					 | 
					 | 
					 | 
								_args['data'] = df[ list(set(df.columns)- set(_cols))]
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							
 | 
					 | 
					 | 
					 | 
							#
 | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							# We need to make sure that continuous columns are removed 
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							if x_cols :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								_args['data'] = df[list(set(df.columns) - set(x_cols))]
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							data.maker.train(**_args)
 | 
					 | 
					 | 
					 | 
							data.maker.train(**_args)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							
 | 
					 | 
					 | 
					 | 
							
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							if 'autopilot' in ( list(args.keys())) :
 | 
					 | 
					 | 
					 | 
							if 'autopilot' in ( list(args.keys())) :
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -136,7 +146,26 @@ class Components :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							pass
 | 
					 | 
					 | 
					 | 
							pass
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
						def post(self,args):
 | 
					 | 
					 | 
					 | 
						def approximate(self,values):
 | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							"""
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							:param values	array of values to be approximated
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							"""
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							if values.dtype in [int,float] :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								r = np.random.dirichlet(values)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								x = []
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								_type = values.dtype
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								for index in np.arange(values.size) :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
									
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
									if np.random.choice([0,1],1)[0] :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
										value = values[index] + (values[index] * r[index])
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
									else :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
										value = values[index] - (values[index] * r[index])
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
									value = int(value) if _type == int else np.round(value,2)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
									x.append( value)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								np.random.shuffle(x)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								return np.array(x)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							else:
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								return values
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							pass
 | 
					 | 
					 | 
					 | 
							pass
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								
 | 
					 | 
					 | 
					 | 
								
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
							
								
							
						
						
					 | 
					 | 
					@ -179,9 +208,22 @@ class Components :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							_dc = pd.DataFrame()
 | 
					 | 
					 | 
					 | 
							_dc = pd.DataFrame()
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							# for mdf in df :
 | 
					 | 
					 | 
					 | 
							# for mdf in df :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							args['data'] = df
 | 
					 | 
					 | 
					 | 
							args['data'] = df
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							#
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							# The columns that are continuous should also be skipped because they don't need to be synthesied (like-that)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							if 'continuous' in args :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								x_cols = args['continuous']
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							else:
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								x_cols = []
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							if 'ignore' in args and 'columns' in args['ignore'] :
 | 
					 | 
					 | 
					 | 
							if 'ignore' in args and 'columns' in args['ignore'] :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								_cols = self.get_ignore(data=df,columns=args['ignore']['columns'])
 | 
					 | 
					 | 
					 | 
								_cols = self.get_ignore(data=df,columns=args['ignore']['columns'])
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								args['data'] = df[ list(set(df.columns)- set(_cols))]
 | 
					 | 
					 | 
					 | 
								args['data'] = df[ list(set(df.columns)- set(_cols))]
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							#
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							# We need to remove the continuous columns from the data-frame
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							# @TODO: Abstract this !!
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							#
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							if x_cols :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								args['data'] = df[list(set(df.columns) - set(x_cols))]
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							
 | 
					 | 
					 | 
					 | 
							
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							args['candidates']	= 1 if 'candidates' not in args else int(args['candidates'])
 | 
					 | 
					 | 
					 | 
							args['candidates']	= 1 if 'candidates' not in args else int(args['candidates'])
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							
 | 
					 | 
					 | 
					 | 
							
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -192,7 +234,10 @@ class Components :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								_columns = None
 | 
					 | 
					 | 
					 | 
								_columns = None
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								skip_columns = []
 | 
					 | 
					 | 
					 | 
								skip_columns = []
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								_schema = schema
 | 
					 | 
					 | 
					 | 
								_schema = schema
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								if schema :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
									cols = [_item['name'] for _item in _schema]
 | 
					 | 
					 | 
					 | 
									cols = [_item['name'] for _item in _schema]
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								else:
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
									cols = df.columns
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								for _df in candidates :
 | 
					 | 
					 | 
					 | 
								for _df in candidates :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
									#
 | 
					 | 
					 | 
					 | 
									#
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
									# we need to format the fields here to make sure we have something cohesive
 | 
					 | 
					 | 
					 | 
									# we need to format the fields here to make sure we have something cohesive
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -206,6 +251,9 @@ class Components :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
												# 	for _name in _df.columns:
 | 
					 | 
					 | 
					 | 
												# 	for _name in _df.columns:
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
												# 		if _name in name:
 | 
					 | 
					 | 
					 | 
												# 		if _name in name:
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
												# 			skip_columns.append(_name)
 | 
					 | 
					 | 
					 | 
												# 			skip_columns.append(_name)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
									if x_cols :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
										for _col in x_cols :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
											_df[_col] = self.approximate(df[_col])
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
									#
 | 
					 | 
					 | 
					 | 
									#
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
									# We perform a series of set operations to insure that the following conditions are met:
 | 
					 | 
					 | 
					 | 
									# We perform a series of set operations to insure that the following conditions are met:
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
									#	- the synthetic dataset only has fields that need to be synthesized
 | 
					 | 
					 | 
					 | 
									#	- the synthetic dataset only has fields that need to be synthesized
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -222,10 +270,16 @@ class Components :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
									# Let us merge the dataset here and and have a comprehensive dataset
 | 
					 | 
					 | 
					 | 
									# Let us merge the dataset here and and have a comprehensive dataset
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
									_df = pd.DataFrame.join(df,_df)
 | 
					 | 
					 | 
					 | 
									_df = pd.DataFrame.join(df,_df)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
									if _schema :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
										for _item in _schema :
 | 
					 | 
					 | 
					 | 
										for _item in _schema :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
											if _item['type'] in ['DATE','TIMESTAMP','DATETIME'] :
 | 
					 | 
					 | 
					 | 
											if _item['type'] in ['DATE','TIMESTAMP','DATETIME'] :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
												_df[_item['name']] = _df[_item['name']].astype(str)
 | 
					 | 
					 | 
					 | 
												_df[_item['name']] = _df[_item['name']].astype(str)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
									
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
											pass
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
									if _schema :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
										writer.write(_df[cols],schema=_schema,table=args['from'])
 | 
					 | 
					 | 
					 | 
										writer.write(_df[cols],schema=_schema,table=args['from'])
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
									else:
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
										writer.write(_df[cols],table=args['from'])
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								# 	writer.write(df,table=table)
 | 
					 | 
					 | 
					 | 
								# 	writer.write(df,table=table)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								pass
 | 
					 | 
					 | 
					 | 
								pass
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							else:
 | 
					 | 
					 | 
					 | 
							else:
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
						
					 | 
					 | 
					
 
 |