| 
						
						
							
								
							
						
						
					 | 
					 | 
					@ -86,18 +86,6 @@ def train (**_args):
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    :params sql
 | 
					 | 
					 | 
					 | 
					    :params sql
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    :params store
 | 
					 | 
					 | 
					 | 
					    :params store
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    """
 | 
					 | 
					 | 
					 | 
					    """
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    #
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    # Let us prepare the data by calling the utility function
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    #
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    # if 'file' in _args :
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    #     #
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    #     # We are reading data from a file
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    #     _args['data'] = pd.read_csv(_args['file'])
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    # else:
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    #     #
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    #     # data will be read from elsewhere (a data-store)...
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    #     pass        
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    # if 'ignore' in _args and 'columns' in _args['ignore']:
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    
 | 
					 | 
					 | 
					 | 
					    
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    _inputhandler = prepare.Input(**_args)
 | 
					 | 
					 | 
					 | 
					    _inputhandler = prepare.Input(**_args)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    values,_matrix = _inputhandler.convert()
 | 
					 | 
					 | 
					 | 
					    values,_matrix = _inputhandler.convert()
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
							
								
							
						
						
					 | 
					 | 
					@ -125,6 +113,8 @@ def train (**_args):
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    args['matrix_size'] = _matrix.shape[0]
 | 
					 | 
					 | 
					 | 
					    args['matrix_size'] = _matrix.shape[0]
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    args['batch_size'] = 2000
 | 
					 | 
					 | 
					 | 
					    args['batch_size'] = 2000
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    args['partition'] = 0 if 'partition' not in _args else _args['partition']
 | 
					 | 
					 | 
					 | 
					    args['partition'] = 0 if 'partition' not in _args else _args['partition']
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    if 'gpu' in _args :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					        args['gpu'] = _args['gpu']
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    # os.environ['CUDA_VISIBLE_DEVICES'] = str(args['gpu']) if 'gpu' in args else '0'
 | 
					 | 
					 | 
					 | 
					    # os.environ['CUDA_VISIBLE_DEVICES'] = str(args['gpu']) if 'gpu' in args else '0'
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    
 | 
					 | 
					 | 
					 | 
					    
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    trainer = gan.Train(**args)   
 | 
					 | 
					 | 
					 | 
					    trainer = gan.Train(**args)   
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -137,50 +127,7 @@ def train (**_args):
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    trainer.apply()
 | 
					 | 
					 | 
					 | 
					    trainer.apply()
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    pass    
 | 
					 | 
					 | 
					 | 
					    pass    
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					def _train (**args) :
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    """
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    This function is intended to train the GAN in order to learn about the distribution of the features
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    :column     columns that need to be synthesized (discrete)
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    :logs       where the output of the (location on disk)
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    :id         identifier of the dataset
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    :data       data-frame to be synthesized
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    :context    label of what we are synthesizing
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    """
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    column      = args['column'] if (isinstance(args['column'],list)) else [args['column']]
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    # CONTINUOUS  = args['continuous'] if 'continuous' in args else []
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    # column_id   = args['id']
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    df          = args['data'] if not isinstance(args['data'],str) else pd.read_csv(args['data'])
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    df.columns = [name.lower() for name in df.columns]
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    #
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    # @TODO:
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    # Consider sequential training of sub population for extremely large datasets
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    #
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    #
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    # If we have several columns we will proceed one at a time (it could be done in separate threads)
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    # @TODO : Consider performing this task on several threads/GPUs simulataneously
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    # 
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    for col in column : 
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        msize = args['matrix_size'] if 'matrix_size' in args else -1        
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        args['real'] = (Binary()).apply(df[col],msize)
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        context     = args['context']
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        if 'store' in args :
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            args['store']['args']['doc'] = context
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            logger = factory.instance(**args['store'])
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            args['logger'] = logger
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            info = {"rows":args['real'].shape[0],"cols":args['real'].shape[1],"name":col,"partition":args['partition']}
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            logger.write({"module":"gan-train","action":"data-prep","input":info})
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        else:
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            logger = None
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        args['column']  = col
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        args['context'] = col
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        #
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        # If the s
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        trainer = gan.Train(**args)        
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        trainer.apply()
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					def get(**args):
 | 
					 | 
					 | 
					 | 
					def get(**args):
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    """
 | 
					 | 
					 | 
					 | 
					    """
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    This function will restore a checkpoint from a persistant storage on to disk
 | 
					 | 
					 | 
					 | 
					    This function will restore a checkpoint from a persistant storage on to disk
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
							
								
							
						
						
					 | 
					 | 
					@ -214,6 +161,8 @@ def generate(**_args):
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    _inputhandler = prepare.Input(**_args)
 | 
					 | 
					 | 
					 | 
					    _inputhandler = prepare.Input(**_args)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    values,_matrix = _inputhandler.convert()    
 | 
					 | 
					 | 
					 | 
					    values,_matrix = _inputhandler.convert()    
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    args['values'] = np.array(values)
 | 
					 | 
					 | 
					 | 
					    args['values'] = np.array(values)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    if 'gpu' in _args :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					        args['gpu'] = _args['gpu']
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					       
 | 
					 | 
					 | 
					 | 
					       
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    handler     = gan.Predict (**args)
 | 
					 | 
					 | 
					 | 
					    handler     = gan.Predict (**args)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    handler.load_meta(None)
 | 
					 | 
					 | 
					 | 
					    handler.load_meta(None)
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
						
					 | 
					 | 
					
 
 |