parent
							
								
									98a1062a30
								
							
						
					
					
						commit
						d5a343da84
					
				@ -1,705 +0,0 @@
 | 
				
			|||||||
"""
 | 
					 | 
				
			||||||
This code was originally writen by Ziqi Zhang <ziqi.zhang@vanderbilt.edu> in order to generate synthetic data.
 | 
					 | 
				
			||||||
The code is an implementation of a Generative Adversarial Network that uses the Wasserstein Distance (WGAN).
 | 
					 | 
				
			||||||
It is intended to be used in 2 modes (embedded in code or using CLI)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
USAGE :
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The following parameters should be provided in a configuration file (JSON format)
 | 
					 | 
				
			||||||
python data/maker --config <path-to-config-file.json>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
CONFIGURATION FILE STRUCTURE :
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
																																																																context																																																																	what it is you are loading (stroke, hypertension, ...)
 | 
					 | 
				
			||||||
																																																																data																																																																																						path of the file to be loaded
 | 
					 | 
				
			||||||
																																																																logs																																																																																						folder to store training model and meta data about learning
 | 
					 | 
				
			||||||
																																																																max_epochs																																												number of iterations in learning 
 | 
					 | 
				
			||||||
																																																																num_gpu																																																																	number of gpus to be used (will still run if the GPUs are not available)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
EMBEDDED IN CODE :
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
"""
 | 
					 | 
				
			||||||
import tensorflow as tf
 | 
					 | 
				
			||||||
from tensorflow.contrib.layers import l2_regularizer
 | 
					 | 
				
			||||||
import numpy as np
 | 
					 | 
				
			||||||
import pandas as pd
 | 
					 | 
				
			||||||
import time
 | 
					 | 
				
			||||||
import os
 | 
					 | 
				
			||||||
import sys
 | 
					 | 
				
			||||||
from data.params import SYS_ARGS
 | 
					 | 
				
			||||||
from data.bridge import Binary
 | 
					 | 
				
			||||||
import json
 | 
					 | 
				
			||||||
import pickle
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
 | 
					 | 
				
			||||||
os.environ['CUDA_VISIBLE_DEVICES'] = "0"
 | 
					 | 
				
			||||||
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# STEPS_PER_EPOCH																																																																															= int(SYS_ARGS['epoch']) if 'epoch' in SYS_ARGS else 256
 | 
					 | 
				
			||||||
# NUM_GPUS																																																																																																																																																																																												= 1 if 'num_gpu' not in SYS_ARGS else int(SYS_ARGS['num_gpu'])
 | 
					 | 
				
			||||||
# BATCHSIZE_PER_GPU																											= 2000
 | 
					 | 
				
			||||||
# TOTAL_BATCHSIZE																																																																															= BATCHSIZE_PER_GPU * NUM_GPUS
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class void :
 | 
					 | 
				
			||||||
																																																																pass
 | 
					 | 
				
			||||||
class GNet :
 | 
					 | 
				
			||||||
																																																																def log(self,**args):
 | 
					 | 
				
			||||||
																																																																																																																																self.logs = dict(args,**self.logs)
 | 
					 | 
				
			||||||
																																																																																	
 | 
					 | 
				
			||||||
																																																																																																																																																																																																
 | 
					 | 
				
			||||||
																																																																"""
 | 
					 | 
				
			||||||
																																																																This is the base class of a generative network functions, the details will be implemented in the subclasses.
 | 
					 | 
				
			||||||
																																																																An instance of this class is accessed as follows 
 | 
					 | 
				
			||||||
																																																																object.layers.normalize applies batch normalization or otherwise
 | 
					 | 
				
			||||||
																																																																obect.get.variables																																																																																																											instanciate variables on cpu and return a reference (tensor)
 | 
					 | 
				
			||||||
																																																																"""
 | 
					 | 
				
			||||||
																																																																def __init__(self,**args):
 | 
					 | 
				
			||||||
																																																																																																																																self.layers = void()
 | 
					 | 
				
			||||||
																																																																																																																																self.layers.normalize = self.normalize
 | 
					 | 
				
			||||||
																																																																																																																																self.logs = {}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
																																																																																																																																self.NUM_GPUS = 1 if 'num_gpu' not in args else args['num_gpu']
 | 
					 | 
				
			||||||
																																																																																																																																# if self.NUM_GPUS > 1 :
 | 
					 | 
				
			||||||
																																																																																																																																#																																															os.environ['CUDA_VISIBLE_DEVICES'] = "4"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
																																																																																																																																self.X_SPACE_SIZE = args['real'].shape[1] if 'real' in args else 854
 | 
					 | 
				
			||||||
																																																																																																																																self.G_STRUCTURE = [128,128] #[self.X_SPACE_SIZE, self.X_SPACE_SIZE]
 | 
					 | 
				
			||||||
																																																																																																																																self.D_STRUCTURE = [self.X_SPACE_SIZE,256,128] #[self.X_SPACE_SIZE, self.X_SPACE_SIZE*2, self.X_SPACE_SIZE] #-- change 854 to number of diagnosis
 | 
					 | 
				
			||||||
																																																																																																																																# self.NUM_LABELS																																																																															= 8 if 'label' not in args elif len(args['label'].shape) args['label'].shape[1]
 | 
					 | 
				
			||||||
																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																if 'label' in args and len(args['label'].shape) == 2 :
 | 
					 | 
				
			||||||
																																																																																																																																																																																																self.NUM_LABELS = args['label'].shape[1]
 | 
					 | 
				
			||||||
																																																																																																																																elif 'label' in args and len(args['label']) == 1 :
 | 
					 | 
				
			||||||
																																																																																																																																																																																																self.NUM_LABELS = args['label'].shape[0]
 | 
					 | 
				
			||||||
																																																																																																																																else:
 | 
					 | 
				
			||||||
																																																																																																																																																																																																self.NUM_LABELS = None
 | 
					 | 
				
			||||||
																																																																																																																																# self.Z_DIM = 128 #self.X_SPACE_SIZE																																					
 | 
					 | 
				
			||||||
																																																																																																																																self.Z_DIM = 128																#-- used as rows down stream
 | 
					 | 
				
			||||||
																																																																																																																																self.G_STRUCTURE = [self.Z_DIM,self.Z_DIM]
 | 
					 | 
				
			||||||
																																																																																																																																PROPOSED_BATCH_PER_GPU = 2000 if 'batch_size' not in args else int(args['batch_size'])
 | 
					 | 
				
			||||||
																																																																																																																																self.BATCHSIZE_PER_GPU = PROPOSED_BATCH_PER_GPU
 | 
					 | 
				
			||||||
																																																																																																																																if 'real' in args : 
 | 
					 | 
				
			||||||
																																																																																																																																																																																																self.D_STRUCTURE = [args['real'].shape[1],256,self.Z_DIM]
 | 
					 | 
				
			||||||
																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																																																																																if args['real'].shape[0]																< PROPOSED_BATCH_PER_GPU :
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																self.BATCHSIZE_PER_GPU = int(args['real'].shape[0]* 1) 
 | 
					 | 
				
			||||||
																																																																																																																																# self.BATCHSIZE_PER_GPU = 2000 if 'batch_size' not in args else int(args['batch_size'])
 | 
					 | 
				
			||||||
																																																																																																																																self.TOTAL_BATCHSIZE = self.BATCHSIZE_PER_GPU * self.NUM_GPUS
 | 
					 | 
				
			||||||
																																																																																																																																self.STEPS_PER_EPOCH = 256 #int(np.load('ICD9/train.npy').shape[0] / 2000)																																													
 | 
					 | 
				
			||||||
																																																																																																																																self.MAX_EPOCHS = 10 if 'max_epochs' not in args else int(args['max_epochs'])
 | 
					 | 
				
			||||||
																																																																																																																																self.ROW_COUNT = args['real'].shape[0] if 'real' in args else 100
 | 
					 | 
				
			||||||
																																																																																																																																self.CONTEXT = args['context']
 | 
					 | 
				
			||||||
																																																																																																																																self.ATTRIBUTES = {"id":args['column_id'] if 'column_id' in args else None,"synthetic":args['column'] if 'column' in args else None}
 | 
					 | 
				
			||||||
																																																																																																																																self._REAL = args['real'] if 'real' in args else None
 | 
					 | 
				
			||||||
																																																																																																																																self._LABEL = args['label'] if 'label' in args else None
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
																																																																																																																																self.get = void()
 | 
					 | 
				
			||||||
																																																																																																																																self.get.variables = self._variable_on_cpu
 | 
					 | 
				
			||||||
																																																																																																																																self.get.suffix = lambda : "-".join(self.ATTRIBUTES['synthetic']) if isinstance(self.ATTRIBUTES['synthetic'],list) else self.ATTRIBUTES['synthetic']
 | 
					 | 
				
			||||||
																																																																																																																																self.logger = args['logger'] if 'logger' in args and args['logger'] else None
 | 
					 | 
				
			||||||
																																																																																																																																self.init_logs(**args)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
																																																																def init_logs(self,**args):
 | 
					 | 
				
			||||||
																																																																																																																																self.log_dir = args['logs'] if 'logs' in args else 'logs'
 | 
					 | 
				
			||||||
																																																																																																																																self.mkdir(self.log_dir)
 | 
					 | 
				
			||||||
																																																																																																																																#
 | 
					 | 
				
			||||||
																																																																																																																																# 
 | 
					 | 
				
			||||||
																																																																																																																																for key in ['train','output'] :
 | 
					 | 
				
			||||||
																																																																																																																																																																																																self.mkdir(os.sep.join([self.log_dir,key]))
 | 
					 | 
				
			||||||
																																																																																																																																																																																																self.mkdir (os.sep.join([self.log_dir,key,self.CONTEXT]))
 | 
					 | 
				
			||||||
																																																																																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																self.train_dir		= os.sep.join([self.log_dir,'train',self.CONTEXT])																																																																																																																												
 | 
					 | 
				
			||||||
																																																																																																																																self.out_dir = os.sep.join([self.log_dir,'output',self.CONTEXT])
 | 
					 | 
				
			||||||
																																																																																																																																if self.logger :
 | 
					 | 
				
			||||||
																																																																																																																																																																																																#
 | 
					 | 
				
			||||||
																																																																																																																																																																																																# We will clear the logs from the data-store 
 | 
					 | 
				
			||||||
																																																																																																																																																																																																#
 | 
					 | 
				
			||||||
																																																																																																																																																																																																column = self.ATTRIBUTES['synthetic']
 | 
					 | 
				
			||||||
																																																																																																																																																																																																db = self.logger.db
 | 
					 | 
				
			||||||
																																																																																																																																																																																																if db[column].count() > 0 :
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																db.backup.insert({'name':column,'logs':list(db[column].find()) })
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																db[column].drop()
 | 
					 | 
				
			||||||
																																																																																																																																
 | 
					 | 
				
			||||||
																																																																def load_meta(self,column):
 | 
					 | 
				
			||||||
																																																																																																																																"""
 | 
					 | 
				
			||||||
																																																																																																																																This function is designed to accomodate the uses of the sub-classes outside of a strict dependency model.
 | 
					 | 
				
			||||||
																																																																																																																																Because prediction and training can happen independently
 | 
					 | 
				
			||||||
																																																																																																																																"""
 | 
					 | 
				
			||||||
																																																																																																																																# suffix = "-".join(column) if isinstance(column,list)else column
 | 
					 | 
				
			||||||
																																																																																																																																suffix = self.get.suffix()
 | 
					 | 
				
			||||||
																																																																																																																																_name = os.sep.join([self.out_dir,'meta-'+suffix+'.json'])
 | 
					 | 
				
			||||||
																																																																																																																																if os.path.exists(_name) :
 | 
					 | 
				
			||||||
																																																																																																																																																																																																attr = json.loads((open(_name)).read())
 | 
					 | 
				
			||||||
																																																																																																																																																																																																for key in attr :
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																value = attr[key]
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																setattr(self,key,value)
 | 
					 | 
				
			||||||
																																																																																																																																self.train_dir		= os.sep.join([self.log_dir,'train',self.CONTEXT])																																																																																																																												
 | 
					 | 
				
			||||||
																																																																																																																																self.out_dir = os.sep.join([self.log_dir,'output',self.CONTEXT])
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																																																																																
 | 
					 | 
				
			||||||
																																																																def log_meta(self,**args) :
 | 
					 | 
				
			||||||
																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																_object = {
 | 
					 | 
				
			||||||
																																																																																																																																																																																																# '_id':'meta',
 | 
					 | 
				
			||||||
																																																																																																																																																																																																'CONTEXT':self.CONTEXT,
 | 
					 | 
				
			||||||
																																																																																																																																																																																																'ATTRIBUTES':self.ATTRIBUTES,
 | 
					 | 
				
			||||||
																																																																																																																																																																																																'BATCHSIZE_PER_GPU':self.BATCHSIZE_PER_GPU,
 | 
					 | 
				
			||||||
																																																																																																																																																																																																'Z_DIM':self.Z_DIM,
 | 
					 | 
				
			||||||
																																																																																																																																																																																																"X_SPACE_SIZE":self.X_SPACE_SIZE,
 | 
					 | 
				
			||||||
																																																																																																																																																																																																"D_STRUCTURE":self.D_STRUCTURE,
 | 
					 | 
				
			||||||
																																																																																																																																																																																																"G_STRUCTURE":self.G_STRUCTURE,
 | 
					 | 
				
			||||||
																																																																																																																																																																																																"NUM_GPUS":self.NUM_GPUS,
 | 
					 | 
				
			||||||
																																																																																																																																																																																																"NUM_LABELS":self.NUM_LABELS,
 | 
					 | 
				
			||||||
																																																																																																																																																																																																"MAX_EPOCHS":self.MAX_EPOCHS,
 | 
					 | 
				
			||||||
																																																																																																																																																																																																"ROW_COUNT":self.ROW_COUNT
 | 
					 | 
				
			||||||
																																																																																																																																}
 | 
					 | 
				
			||||||
																																																																																																																																if args and 'key' in args and 'value' in args :
 | 
					 | 
				
			||||||
																																																																																																																																																																																																key = args['key']
 | 
					 | 
				
			||||||
																																																																																																																																																																																																value= args['value']
 | 
					 | 
				
			||||||
																																																																																																																																																																																																object[key] = value
 | 
					 | 
				
			||||||
																																																																																																																																# suffix = "-".join(self.column) if isinstance(self.column,list) else self.column
 | 
					 | 
				
			||||||
																																																																																																																																suffix = self.get.suffix()
 | 
					 | 
				
			||||||
																																																																																																																																_name = os.sep.join([self.out_dir,'meta-'+suffix])
 | 
					 | 
				
			||||||
																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																f = open(_name+'.json','w')
 | 
					 | 
				
			||||||
																																																																																																																																f.write(json.dumps(_object))
 | 
					 | 
				
			||||||
																																																																																																																																return _object
 | 
					 | 
				
			||||||
																																																																def mkdir (self,path):
 | 
					 | 
				
			||||||
																																																																																																																																if not os.path.exists(path) :
 | 
					 | 
				
			||||||
																																																																																																																																																																																																os.mkdir(path)																																																																																		
 | 
					 | 
				
			||||||
																																																																																																																																
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
																																																																def normalize(self,**args):
 | 
					 | 
				
			||||||
																																																																																																																																"""
 | 
					 | 
				
			||||||
																																																																																																																																This function will perform a batch normalization on an network layer
 | 
					 | 
				
			||||||
																																																																																																																																inputs																																																																		input layer of the neural network
 | 
					 | 
				
			||||||
																																																																																																																																name																																																																																						name of the scope the 
 | 
					 | 
				
			||||||
																																																																																																																																labels																																																																		labels (attributes not synthesized) by default None
 | 
					 | 
				
			||||||
																																																																																																																																n_labels																																																																number of labels default None
 | 
					 | 
				
			||||||
																																																																																																																																"""
 | 
					 | 
				
			||||||
																																																																																																																																inputs		= args['inputs']
 | 
					 | 
				
			||||||
																																																																																																																																name																						= args['name']
 | 
					 | 
				
			||||||
																																																																																																																																labels		= None if 'labels' not in args else args['labels']
 | 
					 | 
				
			||||||
																																																																																																																																n_labels= None if 'n_labels' not in args else args['n_labels']
 | 
					 | 
				
			||||||
																																																																																																																																shift																					= [0] if self.__class__.__name__.lower() == 'generator' else [1] #-- not sure what this is doing
 | 
					 | 
				
			||||||
																																																																																																																																mean, var																																																															= tf.nn.moments(inputs, shift, keep_dims=True)
 | 
					 | 
				
			||||||
																																																																																																																																shape																																																																																					= inputs.shape[1].value
 | 
					 | 
				
			||||||
																																																																																																																																if labels is not None:
 | 
					 | 
				
			||||||
																																																																																																																																																																																																offset_m																																																																= self.get.variables(shape=[1,shape], name='offset'+name,
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																initializer=tf.zeros_initializer)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																scale_m = self.get.variables(shape=[n_labels,shape], name='scale'+name,
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																initializer=tf.ones_initializer)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																offset		= tf.nn.embedding_lookup(offset_m, labels)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																scale																					= tf.nn.embedding_lookup(scale_m, labels)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
																																																																																																																																else:
 | 
					 | 
				
			||||||
																																																																																																																																																																																																offset = None
 | 
					 | 
				
			||||||
																																																																																																																																																																																																scale = None
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
																																																																																																																																result		= tf.nn.batch_normalization(inputs, mean, var,offset,scale, 1e-8)
 | 
					 | 
				
			||||||
																																																																																																																																return result
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
																																																																def _variable_on_cpu(self,**args):
 | 
					 | 
				
			||||||
																																																																																																																																"""
 | 
					 | 
				
			||||||
																																																																																																																																This function makes sure variables/tensors are not created on the GPU but rather on the CPU
 | 
					 | 
				
			||||||
																																																																																																																																"""
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
																																																																																																																																name = args['name']
 | 
					 | 
				
			||||||
																																																																																																																																shape = args['shape']
 | 
					 | 
				
			||||||
																																																																																																																																initializer=None if 'initializer' not in args else args['initializer']
 | 
					 | 
				
			||||||
																																																																																																																																with tf.device('/cpu:0') :
 | 
					 | 
				
			||||||
																																																																																																																																																																																																cpu_var =  tf.compat.v1.get_variable(name,shape,initializer= initializer)
 | 
					 | 
				
			||||||
																																																																																																																																return cpu_var
 | 
					 | 
				
			||||||
																																																																def average_gradients(self,tower_grads):
 | 
					 | 
				
			||||||
																																																																																																																																average_grads = []
 | 
					 | 
				
			||||||
																																																																																																																																for grad_and_vars in zip(*tower_grads):
 | 
					 | 
				
			||||||
																																																																																																																																																																																																grads = []
 | 
					 | 
				
			||||||
																																																																																																																																																																																																for g, _ in grad_and_vars:
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																expanded_g = tf.expand_dims(g, 0)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																grads.append(expanded_g)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
																																																																																																																																																																																																grad = tf.concat(axis=0, values=grads)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																grad = tf.reduce_mean(grad, 0)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
																																																																																																																																																																																																v = grad_and_vars[0][1]
 | 
					 | 
				
			||||||
																																																																																																																																																																																																grad_and_var = (grad, v)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																average_grads.append(grad_and_var)
 | 
					 | 
				
			||||||
																																																																																																																																return average_grads																																																																																						
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class Generator (GNet):
 | 
					 | 
				
			||||||
																																																																"""
 | 
					 | 
				
			||||||
																																																																This class is designed to handle generation of candidate datasets for this it will aggregate a discriminator, this allows the generator not to be random
 | 
					 | 
				
			||||||
																																																																
 | 
					 | 
				
			||||||
																																																																"""
 | 
					 | 
				
			||||||
																																																																def __init__(self,**args):
 | 
					 | 
				
			||||||
																																																																																																																																GNet.__init__(self,**args)
 | 
					 | 
				
			||||||
																																																																																																																																self.discriminator = Discriminator(**args)
 | 
					 | 
				
			||||||
																																																																def loss(self,**args):
 | 
					 | 
				
			||||||
																																																																																																																																fake																						= args['fake']
 | 
					 | 
				
			||||||
																																																																																																																																label																					= args['label']
 | 
					 | 
				
			||||||
																																																																																																																																y_hat_fake = self.discriminator.network(inputs=fake, label=label)
 | 
					 | 
				
			||||||
																																																																																																																																#all_regs = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
 | 
					 | 
				
			||||||
																																																																																																																																all_regs = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES)
 | 
					 | 
				
			||||||
																																																																																																																																loss = -tf.reduce_mean(y_hat_fake) + sum(all_regs)
 | 
					 | 
				
			||||||
																																																																																																																																#tf.add_to_collection('glosses', loss)
 | 
					 | 
				
			||||||
																																																																																																																																tf.compat.v1.add_to_collection('glosses', loss)
 | 
					 | 
				
			||||||
																																																																																																																																return loss, loss																																																																																																																																
 | 
					 | 
				
			||||||
																																																																def load_meta(self, column):
 | 
					 | 
				
			||||||
																																																																																																																																super().load_meta(column)
 | 
					 | 
				
			||||||
																																																																																																																																self.discriminator.load_meta(column)
 | 
					 | 
				
			||||||
																																																																def network(self,**args) :
 | 
					 | 
				
			||||||
																																																																																																																																"""
 | 
					 | 
				
			||||||
																																																																																																																																This function will build the network that will generate the synthetic candidates
 | 
					 | 
				
			||||||
																																																																																																																																:inputs matrix of data that we need
 | 
					 | 
				
			||||||
																																																																																																																																:dim																						dimensions of ...
 | 
					 | 
				
			||||||
																																																																																																																																"""
 | 
					 | 
				
			||||||
																																																																																																																																x																																																																																																																															= args['inputs']
 | 
					 | 
				
			||||||
																																																																																																																																tmp_dim = self.Z_DIM if 'dim' not in args else args['dim']
 | 
					 | 
				
			||||||
																																																																																																																																label																					= args['label']
 | 
					 | 
				
			||||||
																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																with tf.compat.v1.variable_scope('G', reuse=tf.compat.v1.AUTO_REUSE , regularizer=l2_regularizer(0.00001)):
 | 
					 | 
				
			||||||
																																																																																																																																																																																																for i, dim in enumerate(self.G_STRUCTURE[:-1]):
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																kernel = self.get.variables(name='W_' + str(i), shape=[tmp_dim, dim])
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																h1 = self.normalize(inputs=tf.matmul(x, kernel),shift=0, name='cbn' + str(i), labels=label, n_labels=self.NUM_LABELS)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																h2 = tf.nn.relu(h1)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																x = x + h2
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																tmp_dim = dim
 | 
					 | 
				
			||||||
																																																																																																																																																																																																i = len(self.G_STRUCTURE) - 1
 | 
					 | 
				
			||||||
																																																																																																																																																																																																#
 | 
					 | 
				
			||||||
																																																																																																																																																																																																# This seems to be an extra hidden layer: 
 | 
					 | 
				
			||||||
																																																																																																																																																																																																# It's goal is to map continuous values to discrete values (pre-trained to do this)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																kernel = self.get.variables(name='W_' + str(i), shape=[tmp_dim, self.G_STRUCTURE[-1]])
 | 
					 | 
				
			||||||
																																																																																																																																																																																																h1 = self.normalize(inputs=tf.matmul(x, kernel), name='cbn' + str(i),
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																labels=label, n_labels=self.NUM_LABELS)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																h2 = tf.nn.tanh(h1)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																x = x + h2
 | 
					 | 
				
			||||||
																																																																																																																																																																																																# This seems to be the output layer
 | 
					 | 
				
			||||||
																																																																																																																																																																																																#
 | 
					 | 
				
			||||||
																																																																																																																																																																																																kernel = self.get.variables(name='W_' + str(i+1), shape=[self.Z_DIM, self.X_SPACE_SIZE])
 | 
					 | 
				
			||||||
																																																																																																																																																																																																bias = self.get.variables(name='b_' + str(i+1), shape=[self.X_SPACE_SIZE])
 | 
					 | 
				
			||||||
																																																																																																																																																																																																x = tf.nn.sigmoid(tf.add(tf.matmul(x, kernel), bias))
 | 
					 | 
				
			||||||
																																																																																																																																return x																																																																																	
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class Discriminator(GNet):
 | 
					 | 
				
			||||||
																																																																def __init__(self,**args):
 | 
					 | 
				
			||||||
																																																																																																																																GNet.__init__(self,**args)																																																													
 | 
					 | 
				
			||||||
																																																																def network(self,**args):
 | 
					 | 
				
			||||||
																																																																																																																																"""
 | 
					 | 
				
			||||||
																																																																																																																																This function will apply a computational graph on a dataset passed in with the associated labels and the last layer must have a single output (neuron)
 | 
					 | 
				
			||||||
																																																																																																																																:inputs
 | 
					 | 
				
			||||||
																																																																																																																																:label
 | 
					 | 
				
			||||||
																																																																																																																																"""
 | 
					 | 
				
			||||||
																																																																																																																																x = args['inputs']
 | 
					 | 
				
			||||||
																																																																																																																																label = args['label']
 | 
					 | 
				
			||||||
																																																																																																																																with tf.compat.v1.variable_scope('D', reuse=tf.compat.v1.AUTO_REUSE , regularizer=l2_regularizer(0.00001)):
 | 
					 | 
				
			||||||
																																																																																																																																																																																																for i, dim in enumerate(self.D_STRUCTURE[1:]):
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																kernel = self.get.variables(name='W_' + str(i), shape=[self.D_STRUCTURE[i], dim])
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																bias = self.get.variables(name='b_' + str(i), shape=[dim])
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																# print (["\t",bias,kernel])
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																x = tf.nn.relu(tf.add(tf.matmul(x, kernel), bias))
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																x = self.normalize(inputs=x, name='cln' + str(i), shift=1,labels=label, n_labels=self.NUM_LABELS)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																i = len(self.D_STRUCTURE)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																kernel = self.get.variables(name='W_' + str(i), shape=[self.D_STRUCTURE[-1], 1])
 | 
					 | 
				
			||||||
																																																																																																																																																																																																bias = self.get.variables(name='b_' + str(i), shape=[1])
 | 
					 | 
				
			||||||
																																																																																																																																																																																																y = tf.add(tf.matmul(x, kernel), bias)
 | 
					 | 
				
			||||||
																																																																																																																																return y
 | 
					 | 
				
			||||||
																																																																
 | 
					 | 
				
			||||||
																																																																def loss(self,**args) :
 | 
					 | 
				
			||||||
																																																																																																																																"""
 | 
					 | 
				
			||||||
																																																																																																																																This function compute the loss of 
 | 
					 | 
				
			||||||
																																																																																																																																:real
 | 
					 | 
				
			||||||
																																																																																																																																:fake
 | 
					 | 
				
			||||||
																																																																																																																																:label
 | 
					 | 
				
			||||||
																																																																																																																																"""
 | 
					 | 
				
			||||||
																																																																																																																																real																						= args['real']
 | 
					 | 
				
			||||||
																																																																																																																																fake																						= args['fake']
 | 
					 | 
				
			||||||
																																																																																																																																label																					= args['label']
 | 
					 | 
				
			||||||
																																																																																																																																epsilon = tf.random.uniform(shape=[self.BATCHSIZE_PER_GPU,1],minval=0,maxval=1)
 | 
					 | 
				
			||||||
																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																x_hat																																																																																					= real + epsilon * (fake - real)
 | 
					 | 
				
			||||||
																																																																																																																																y_hat_fake																																												= self.network(inputs=fake, label=label)
 | 
					 | 
				
			||||||
																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																y_hat_real																																												= self.network(inputs=real, label=label)
 | 
					 | 
				
			||||||
																																																																																																																																y_hat																																																																																					= self.network(inputs=x_hat, label=label)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
																																																																																																																																grad																																																																																						= tf.gradients(y_hat, [x_hat])[0]
 | 
					 | 
				
			||||||
																																																																																																																																slopes																																																																		= tf.sqrt(tf.reduce_sum(tf.square(grad), 1))
 | 
					 | 
				
			||||||
																																																																																																																																gradient_penalty = tf.reduce_mean((slopes - 1.) ** 2)
 | 
					 | 
				
			||||||
																																																																																																																																#all_regs																																																																= tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
 | 
					 | 
				
			||||||
																																																																																																																																all_regs																																																																= tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES)
 | 
					 | 
				
			||||||
																																																																																																																																w_distance																																												= -tf.reduce_mean(y_hat_real) + tf.reduce_mean(y_hat_fake)
 | 
					 | 
				
			||||||
																																																																																																																																loss																																																																																						= w_distance + 10 * gradient_penalty + sum(all_regs)
 | 
					 | 
				
			||||||
																																																																																																																																#tf.add_to_collection('dlosses', loss)
 | 
					 | 
				
			||||||
																																																																																																																																tf.compat.v1.add_to_collection('dlosses', loss)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
																																																																																																																																return w_distance, loss																																																																																		
 | 
					 | 
				
			||||||
class Train (GNet):
 | 
					 | 
				
			||||||
																																																																def __init__(self,**args):
 | 
					 | 
				
			||||||
																																																																																																																																GNet.__init__(self,**args)
 | 
					 | 
				
			||||||
																																																																																																																																self.generator = Generator(**args)
 | 
					 | 
				
			||||||
																																																																																																																																self.discriminator = Discriminator(**args)
 | 
					 | 
				
			||||||
																																																																																																																																self._REAL = args['real']
 | 
					 | 
				
			||||||
																																																																																																																																self._LABEL= args['label'] if 'label' in args else None
 | 
					 | 
				
			||||||
																																																																																																																																self.column = args['column']
 | 
					 | 
				
			||||||
																																																																																																																																# print ([" *** ",self.BATCHSIZE_PER_GPU])
 | 
					 | 
				
			||||||
																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																self.meta = self.log_meta()
 | 
					 | 
				
			||||||
																																																																																																																																if(self.logger):
 | 
					 | 
				
			||||||
																																																																																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																																																																																self.logger.write( self.meta )
 | 
					 | 
				
			||||||
																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																# self.log (real_shape=list(self._REAL.shape),label_shape = self._LABEL.shape,meta_data=self.meta)
 | 
					 | 
				
			||||||
																																																																def load_meta(self, column):
 | 
					 | 
				
			||||||
																																																																																																																																"""
 | 
					 | 
				
			||||||
																																																																																																																																This function will delegate the calls to load meta data to it's dependents
 | 
					 | 
				
			||||||
																																																																																																																																column name
 | 
					 | 
				
			||||||
																																																																																																																																"""
 | 
					 | 
				
			||||||
																																																																																																																																super().load_meta(column)
 | 
					 | 
				
			||||||
																																																																																																																																self.generator.load_meta(column)
 | 
					 | 
				
			||||||
																																																																																																																																self.discriminator.load_meta(column)
 | 
					 | 
				
			||||||
																																																																def loss(self,**args):
 | 
					 | 
				
			||||||
																																																																																																																																"""
 | 
					 | 
				
			||||||
																																																																																																																																This function will compute a "tower" loss of the generated candidate against real data
 | 
					 | 
				
			||||||
																																																																																																																																Training will consist in having both generator and discriminators
 | 
					 | 
				
			||||||
																																																																																																																																:scope
 | 
					 | 
				
			||||||
																																																																																																																																:stage
 | 
					 | 
				
			||||||
																																																																																																																																:real
 | 
					 | 
				
			||||||
																																																																																																																																:label
 | 
					 | 
				
			||||||
																																																																																																																																"""
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
																																																																																																																																scope																					= args['scope']
 | 
					 | 
				
			||||||
																																																																																																																																stage																					= args['stage']
 | 
					 | 
				
			||||||
																																																																																																																																real																						= args['real']
 | 
					 | 
				
			||||||
																																																																																																																																label																					= args['label']
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																if label is not None :
 | 
					 | 
				
			||||||
																																																																																																																																																																																																label																					= tf.cast(label, tf.int32)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																#
 | 
					 | 
				
			||||||
																																																																																																																																																																																																# @TODO: Ziqi needs to explain what's going on here
 | 
					 | 
				
			||||||
																																																																																																																																																																																																m = [[i] for i in np.arange(self._LABEL.shape[1]-2)]
 | 
					 | 
				
			||||||
																																																																																																																																																																																																label																					= label[:, 1] * len(m) + tf.squeeze(
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																tf.matmul(label[:, 2:], tf.constant(m, dtype=tf.int32))
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																)
 | 
					 | 
				
			||||||
																																																																																																																																# label = label[:,1] * 4 + tf.squeeze( label[:,2]*[[0],[1],[2],[3]] )
 | 
					 | 
				
			||||||
																																																																																																																																z = tf.random.normal(shape=[self.BATCHSIZE_PER_GPU, self.Z_DIM])
 | 
					 | 
				
			||||||
																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																fake = self.generator.network(inputs=z, label=label)
 | 
					 | 
				
			||||||
																																																																																																																																if stage == 'D':
 | 
					 | 
				
			||||||
																																																																																																																																																																																																w, loss = self.discriminator.loss(real=real, fake=fake, label=label)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																#losses = tf.get_collection('dlosses', scope)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																flag = 'dlosses'
 | 
					 | 
				
			||||||
																																																																																																																																																																																																losses = tf.compat.v1.get_collection('dlosses', scope)
 | 
					 | 
				
			||||||
																																																																																																																																else:
 | 
					 | 
				
			||||||
																																																																																																																																																																																																w, loss = self.generator.loss(fake=fake, label=label)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																#losses = tf.get_collection('glosses', scope)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																flag = 'glosses'
 | 
					 | 
				
			||||||
																																																																																																																																																																																																losses = tf.compat.v1.get_collection('glosses', scope)
 | 
					 | 
				
			||||||
																																																																																																																																# losses = tf.compat.v1.get_collection(flag, scope)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
																																																																																																																																total_loss = tf.add_n(losses, name='total_loss')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
																																																																																																																																return total_loss, w
 | 
					 | 
				
			||||||
																																																																def input_fn(self):
 | 
					 | 
				
			||||||
																																																																																																																																"""
 | 
					 | 
				
			||||||
																																																																																																																																This function seems to produce 
 | 
					 | 
				
			||||||
																																																																																																																																"""
 | 
					 | 
				
			||||||
																																																																																																																																features_placeholder = tf.compat.v1.placeholder(shape=self._REAL.shape, dtype=tf.float32)
 | 
					 | 
				
			||||||
																																																																																																																																LABEL_SHAPE = [None,None] if self._LABEL is None else self._LABEL.shape
 | 
					 | 
				
			||||||
																																																																																																																																labels_placeholder = tf.compat.v1.placeholder(shape=LABEL_SHAPE, dtype=tf.float32)
 | 
					 | 
				
			||||||
																																																																																																																																if self._LABEL is not None :
 | 
					 | 
				
			||||||
																																																																																																																																																																																																dataset = tf.data.Dataset.from_tensor_slices((features_placeholder, labels_placeholder))
 | 
					 | 
				
			||||||
																																																																																																																																else :
 | 
					 | 
				
			||||||
																																																																																																																																																																																																dataset = tf.data.Dataset.from_tensor_slices(features_placeholder)
 | 
					 | 
				
			||||||
																																																																																																																																# labels_placeholder = None
 | 
					 | 
				
			||||||
																																																																																																																																dataset = dataset.repeat(10000)
 | 
					 | 
				
			||||||
																																																																																																																																dataset = dataset.batch(batch_size=self.BATCHSIZE_PER_GPU)
 | 
					 | 
				
			||||||
																																																																																																																																dataset = dataset.prefetch(1)
 | 
					 | 
				
			||||||
																																																																																																																																# iterator = dataset.make_initializable_iterator()
 | 
					 | 
				
			||||||
																																																																																																																																iterator = tf.compat.v1.data.make_initializable_iterator(dataset)
 | 
					 | 
				
			||||||
																																																																																																																																return iterator, features_placeholder, labels_placeholder
 | 
					 | 
				
			||||||
																																																																
 | 
					 | 
				
			||||||
																																																																def network(self,**args):
 | 
					 | 
				
			||||||
																																																																																																																																stage																					= args['stage']
 | 
					 | 
				
			||||||
																																																																																																																																opt																																																																																																											= args['opt']
 | 
					 | 
				
			||||||
																																																																																																																																tower_grads = []
 | 
					 | 
				
			||||||
																																																																																																																																per_gpu_w																																																															= []
 | 
					 | 
				
			||||||
																																																																																																																																iterator, features_placeholder, labels_placeholder = self.input_fn()
 | 
					 | 
				
			||||||
																																																																																																																																with tf.compat.v1.variable_scope(tf.compat.v1.get_variable_scope()):
 | 
					 | 
				
			||||||
																																																																																																																																																																																																for i in range(self.NUM_GPUS):
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																with tf.device('/gpu:%d' % i):
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																with tf.name_scope('%s_%d' % ('TOWER', i)) as scope:
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																if self._LABEL is not None :
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																																																																																(real, label) = iterator.get_next()
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																else:
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																																																																																real = iterator.get_next()
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																																																																																label= None
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																loss, w = self.loss(scope=scope, stage=stage, real=real, label=label)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																#tf.get_variable_scope().reuse_variables()
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																tf.compat.v1.get_variable_scope().reuse_variables()
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																#vars_ = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=stage)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																vars_ = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, scope=stage)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																grads = opt.compute_gradients(loss, vars_)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																tower_grads.append(grads)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																per_gpu_w.append(w)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
																																																																																																																																grads = self.average_gradients(tower_grads)
 | 
					 | 
				
			||||||
																																																																																																																																apply_gradient_op = opt.apply_gradients(grads)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
																																																																																																																																mean_w = tf.reduce_mean(per_gpu_w)
 | 
					 | 
				
			||||||
																																																																																																																																train_op = apply_gradient_op
 | 
					 | 
				
			||||||
																																																																																																																																return train_op, mean_w, iterator, features_placeholder, labels_placeholder
 | 
					 | 
				
			||||||
																																																																def apply(self,**args):
 | 
					 | 
				
			||||||
																																																																																																																																# max_epochs = args['max_epochs'] if 'max_epochs' in args else 10
 | 
					 | 
				
			||||||
																																																																																																																																REAL = self._REAL
 | 
					 | 
				
			||||||
																																																																																																																																LABEL= self._LABEL																																													
 | 
					 | 
				
			||||||
																																																																																																																																if (self.logger):
 | 
					 | 
				
			||||||
																																																																																																																																																																																																pass
 | 
					 | 
				
			||||||
																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																with tf.device('/cpu:0'):
 | 
					 | 
				
			||||||
																																																																																																																																																																																																opt_d = tf.compat.v1.train.AdamOptimizer(1e-4)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																opt_g = tf.compat.v1.train.AdamOptimizer(1e-4)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																																																																																train_d, w_distance, iterator_d, features_placeholder_d, labels_placeholder_d = self.network(stage='D', opt=opt_d)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																train_g, _, iterator_g, features_placeholder_g, labels_placeholder_g = self.network(stage='G', opt=opt_g)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																# saver = tf.train.Saver()
 | 
					 | 
				
			||||||
																																																																																																																																																																																																saver																					= tf.compat.v1.train.Saver()
 | 
					 | 
				
			||||||
																																																																																																																																																																																																# init																		= tf.global_variables_initializer()
 | 
					 | 
				
			||||||
																																																																																																																																																																																																init																						= tf.compat.v1.global_variables_initializer()
 | 
					 | 
				
			||||||
																																																																																																																																																																																																logs = []
 | 
					 | 
				
			||||||
																																																																																																																																																																																																#with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) as sess:
 | 
					 | 
				
			||||||
																																																																																																																																																																																																with tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(allow_soft_placement=True, log_device_placement=False)) as sess:
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																sess.run(init)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																sess.run(iterator_d.initializer,
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																																																																																feed_dict={features_placeholder_d: REAL})
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																sess.run(iterator_g.initializer,
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																																																																																feed_dict={features_placeholder_g: REAL})
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																for epoch in range(1, self.MAX_EPOCHS + 1):
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																start_time = time.time()
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																w_sum = 0
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																for i in range(self.STEPS_PER_EPOCH):
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																for _ in range(2):
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																																																																																_, w = sess.run([train_d, w_distance])
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																																																																																w_sum += w
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																sess.run(train_g)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																duration = time.time() - start_time
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																assert not np.isnan(w_sum), 'Model diverged with loss = NaN'
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																format_str = 'epoch: %d, w_distance = %f (%.1f)'
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																print(format_str % (epoch, -w_sum/(self.STEPS_PER_EPOCH*2), duration))
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																# print (dir (w_distance))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																logs.append({"epoch":epoch,"distance":-w_sum/(self.STEPS_PER_EPOCH*2) })
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																if epoch % self.MAX_EPOCHS == 0:
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																# suffix = "-".join(self.ATTRIBUTES['synthetic']) if isinstance(self.ATTRIBUTES['synthetic'],list) else self.ATTRIBUTES['synthetic']
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																suffix = self.get.suffix()
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																_name  = os.sep.join([self.train_dir,suffix])
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																# saver.save(sess, self.train_dir, write_meta_graph=False, global_step=epoch)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																saver.save(sess, _name, write_meta_graph=False, global_step=epoch)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																#
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																#
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																if self.logger :
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																																																																																row = {"logs":logs} #,"model":pickle.dump(sess)}																																																																																																																																																																																																																																																																																																																																																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																																																																																self.logger.write(row)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																																																																																#
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																																																																																# @TODO:
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																																																																																# We should upload the files in the checkpoint 
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																																																																																# This would allow the learnt model to be portable to another system
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																																																																																#
 | 
					 | 
				
			||||||
																																																																																																																																																																																																tf.compat.v1.reset_default_graph()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class Predict(GNet):
 | 
					 | 
				
			||||||
																																																																"""
 | 
					 | 
				
			||||||
																																																																This class uses synthetic data given a learned model
 | 
					 | 
				
			||||||
																																																																"""
 | 
					 | 
				
			||||||
																																																																def __init__(self,**args):
 | 
					 | 
				
			||||||
																																																																																																																																GNet.__init__(self,**args)																																																																																																																												
 | 
					 | 
				
			||||||
																																																																																																																																self.generator = Generator(**args)																																																																																																																												
 | 
					 | 
				
			||||||
																																																																																																																																self.values  = args['values']
 | 
					 | 
				
			||||||
																																																																def load_meta(self, column):
 | 
					 | 
				
			||||||
																																																																																																																																super().load_meta(column)
 | 
					 | 
				
			||||||
																																																																																																																																self.generator.load_meta(column)
 | 
					 | 
				
			||||||
																																																																def apply(self,**args):
 | 
					 | 
				
			||||||
																																																																																																																																# print (self.train_dir)
 | 
					 | 
				
			||||||
																																																																																																																																# suffix = "-".join(self.ATTRIBUTES['synthetic']) if isinstance(self.ATTRIBUTES['synthetic'],list) else self.ATTRIBUTES['synthetic']
 | 
					 | 
				
			||||||
																																																																																																																																suffix = self.get.suffix()
 | 
					 | 
				
			||||||
																																																																																																																																model_dir = os.sep.join([self.train_dir,suffix+'-'+str(self.MAX_EPOCHS)])
 | 
					 | 
				
			||||||
																																																																																																																																demo = self._LABEL #np.zeros([self.ROW_COUNT,self.NUM_LABELS]) #args['de"shape":{"LABEL":list(self._LABEL.shape)} mo']
 | 
					 | 
				
			||||||
																																																																																																																																tf.compat.v1.reset_default_graph()
 | 
					 | 
				
			||||||
																																																																																																																																#z = tf.random.normal(shape=[self.BATCHSIZE_PER_GPU, self.Z_DIM])
 | 
					 | 
				
			||||||
																																																																																																																																z = tf.random.normal(shape=[self._REAL.shape[0], self.Z_DIM])
 | 
					 | 
				
			||||||
																																																																																																																																y = tf.compat.v1.placeholder(shape=[self._REAL.shape[0], self.NUM_LABELS], dtype=tf.int32)
 | 
					 | 
				
			||||||
																																																																																																																																#y = tf.compat.v1.placeholder(shape=[self.BATCHSIZE_PER_GPU, self.NUM_LABELS], dtype=tf.int32)
 | 
					 | 
				
			||||||
																																																																																																																																if self._LABEL is not None :
 | 
					 | 
				
			||||||
																																																																																																																																																																																																ma = [[i] for i in np.arange(self.NUM_LABELS - 2)]
 | 
					 | 
				
			||||||
																																																																																																																																																																																																label = y[:, 1] * len(ma) + tf.squeeze(tf.matmul(y[:, 2:], tf.constant(ma, dtype=tf.int32)))
 | 
					 | 
				
			||||||
																																																																																																																																else:
 | 
					 | 
				
			||||||
																																																																																																																																																																																																label = None
 | 
					 | 
				
			||||||
																																																																																																																																fake																						= self.generator.network(inputs=z, label=label)
 | 
					 | 
				
			||||||
																																																																																																																																init																						= tf.compat.v1.global_variables_initializer()
 | 
					 | 
				
			||||||
																																																																																																																																saver																					= tf.compat.v1.train.Saver()
 | 
					 | 
				
			||||||
																																																																																																																																df																																																																																																												= pd.DataFrame()
 | 
					 | 
				
			||||||
																																																																																																																																CANDIDATE_COUNT = 10000
 | 
					 | 
				
			||||||
																																																																																																																																NTH_VALID_CANDIDATE = count = np.random.choice(np.arange(2,60),2)[0]
 | 
					 | 
				
			||||||
																																																																																																																																with tf.compat.v1.Session() as sess:
 | 
					 | 
				
			||||||
																																																																																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																																																																																# sess.run(init)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																saver.restore(sess, model_dir)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																if self._LABEL is not None :
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																labels = np.zeros((self.ROW_COUNT,self.NUM_LABELS) )
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																labels= demo
 | 
					 | 
				
			||||||
																																																																																																																																																																																																else:
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																labels = None
 | 
					 | 
				
			||||||
																																																																																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																																																																																found = []
 | 
					 | 
				
			||||||
																																																																																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																																																																																for i in np.arange(CANDIDATE_COUNT) :
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																if labels :
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																f = sess.run(fake,feed_dict={y:labels})
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																else:
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																f = sess.run(fake)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																#
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																# if we are dealing with numeric values only we can perform a simple marginal sum against the indexes
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																# The code below will insure we have some acceptable cardinal relationships between id and synthetic values
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																#
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																df =						( pd.DataFrame(np.round(f).astype(np.int32)))
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																p = 0 not in df.sum(axis=1).values
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																x = df.sum(axis=1).values
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																if np.divide( np.sum(x), x.size) > .9:
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																found.append(df)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																if len(found) == NTH_VALID_CANDIDATE or i == CANDIDATE_COUNT:
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																																																break
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																else:
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																continue
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																																																																																# i = df.T.index.astype(np.int32) #-- These are numeric pseudonyms
 | 
					 | 
				
			||||||
																																																																																																																																																																																																# df = (i * df).sum(axis=1)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																#
 | 
					 | 
				
			||||||
																																																																																																																																																																																																# In case we are dealing with actual values like diagnosis codes we can perform 
 | 
					 | 
				
			||||||
																																																																																																																																																																																																#
 | 
					 | 
				
			||||||
																																																																																																																																																																																																INDEX =np.random.choice(np.arange(len(found)),1)[0]
 | 
					 | 
				
			||||||
																																																																																																																																																																																																#df = found[np.random.choice(np.arange(len(found)),1)[0]]
 | 
					 | 
				
			||||||
																																																																																																																																																																																																df = found[INDEX]
 | 
					 | 
				
			||||||
																																																																																																																																																																																																columns = self.ATTRIBUTES['synthetic'] if isinstance(self.ATTRIBUTES['synthetic'],list)else [self.ATTRIBUTES['synthetic']]
 | 
					 | 
				
			||||||
																																																																																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																																																																																# r = np.zeros((self.ROW_COUNT,len(columns)))
 | 
					 | 
				
			||||||
																																																																																																																																																																																																r = np.zeros(self.ROW_COUNT)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																df.columns = self.values
 | 
					 | 
				
			||||||
																																																																																																																																																																																																if len(found):
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																print (len(found),NTH_VALID_CANDIDATE)			
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																# x = df * self.values 
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																#
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																# let's get the rows with no values synthesized (for whatever reason)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																#
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																ii = df.apply(lambda row: np.sum(row) == 0,axis=1)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																if np.sum(ii) > 0 :
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																missing = np.repeat(np.nan, np.where(ii==1)[0].size)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																else:
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																missing = []
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																print (len (missing), df.shape)	
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																i = np.where(ii == 0)[0]
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																df =						pd.DataFrame( df.iloc[i].apply(lambda row: self.values[np.random.choice(np.where(row == 1)[0],1)[0]] ,axis=1))
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																df.columns = columns
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																df = df[columns[0]].append(pd.Series(missing))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																	
 | 
					 | 
				
			||||||
																																																																																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																tf.compat.v1.reset_default_graph()
 | 
					 | 
				
			||||||
																																																																																																																																df = pd.DataFrame(df)
 | 
					 | 
				
			||||||
																																																																																																																																df.columns = columns
 | 
					 | 
				
			||||||
																																																																																																																																print (df.head())
 | 
					 | 
				
			||||||
																																																																																																																																print (df.shape)
 | 
					 | 
				
			||||||
																																																																																																																																return df.to_dict(orient='list')
 | 
					 | 
				
			||||||
																																																																																																																																																																																																# return df.to_dict(orient='list')
 | 
					 | 
				
			||||||
																																																																																																																																																																																																# count = str(len(os.listdir(self.out_dir)))
 | 
					 | 
				
			||||||
																																																																																																																																																																																																# _name = os.sep.join([self.out_dir,self.CONTEXT+'-'+count+'.csv'])
 | 
					 | 
				
			||||||
																																																																																																																																																																																																# df.to_csv(_name,index=False)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
																																																																																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																																																																																# output.extend(np.round(f))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
																																																																																																																																																																																																# for m in range(2):
 | 
					 | 
				
			||||||
																																																																																																																																																																																																#																																																																															for n in range(2, self.NUM_LABELS):
 | 
					 | 
				
			||||||
																																																																																																																																																																																																#																																																																																																																																															idx1 = (demo[:, m] == 1)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																#																																																																																																																																															idx2 = (demo[:, n] == 1)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																#																																																																																																																																															idx = [idx1[j] and idx2[j] for j in range(len(idx1))]
 | 
					 | 
				
			||||||
																																																																																																																																																																																																#																																																																																																																																															num = np.sum(idx)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																#																																																																																																																																															print ("___________________list__")
 | 
					 | 
				
			||||||
																																																																																																																																																																																																#																																																																																																																																															print (idx1)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																#																																																																																																																																															print (idx2)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																#																																																																																																																																															print (idx)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																#																																																																																																																																															print (num)
 | 
					 | 
				
			||||||
																																																																																																																																																																																																#																																																																																																																																															print ("_____________________")
 | 
					 | 
				
			||||||
																																																																																																																																																																																																#																																																																																																																																															nbatch = int(np.ceil(num / self.BATCHSIZE_PER_GPU))
 | 
					 | 
				
			||||||
																																																																																																																																																																																																#																																																																																																																																															label_input = np.zeros((nbatch*self.BATCHSIZE_PER_GPU, self.NUM_LABELS))
 | 
					 | 
				
			||||||
																																																																																																																																																																																																#																																																																																																																																															label_input[:, n] = 1
 | 
					 | 
				
			||||||
																																																																																																																																																																																																#																																																																																																																																															label_input[:, m] = 1
 | 
					 | 
				
			||||||
																																																																																																																																																																																																#																																																																																																																																															output = []
 | 
					 | 
				
			||||||
																																																																																																																																																																																																#																																																																																																																																															for i in range(nbatch):
 | 
					 | 
				
			||||||
																																																																																																																																																																																																#																																																																																																																																																																																																															f = sess.run(fake,feed_dict={y: label_input[i* self.BATCHSIZE_PER_GPU:(i+1)* self.BATCHSIZE_PER_GPU]})
 | 
					 | 
				
			||||||
																																																																																																																																																																																																#																																																																																																																																																																																																															output.extend(np.round(f))
 | 
					 | 
				
			||||||
																																																																																																																																																																																																#																																																																																																																																															output = np.array(output)[:num]
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																# print ([m,n,output])
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																																																																																																																																																																																																																# np.save(self.out_dir + str(m) + str(n), output)
 | 
					 | 
				
			||||||
																																																																
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
if __name__ == '__main__' :
 | 
					 | 
				
			||||||
																																																																#
 | 
					 | 
				
			||||||
																																																																# Now we get things done ...
 | 
					 | 
				
			||||||
																																																																column																																																																		= SYS_ARGS['column']
 | 
					 | 
				
			||||||
																																																																column_id																																																															= SYS_ARGS['id'] if 'id' in SYS_ARGS else 'person_id'
 | 
					 | 
				
			||||||
																																																																column_id																																																															= column_id.split(',') if ',' in column_id else column_id
 | 
					 | 
				
			||||||
																																																																df = pd.read_csv(SYS_ARGS['raw-data'])		
 | 
					 | 
				
			||||||
																																																																LABEL = pd.get_dummies(df[column_id]).astype(np.float32).values
 | 
					 | 
				
			||||||
																																																																
 | 
					 | 
				
			||||||
																																																																context																																																																	= SYS_ARGS['raw-data'].split(os.sep)[-1:][0][:-4]
 | 
					 | 
				
			||||||
																																																																if set(['train','learn']) & set(SYS_ARGS.keys()):
 | 
					 | 
				
			||||||
																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																df = pd.read_csv(SYS_ARGS['raw-data'])			
 | 
					 | 
				
			||||||
																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																# cols = SYS_ARGS['column']
 | 
					 | 
				
			||||||
																																																																																																																																# _map,_df = (Binary()).Export(df)
 | 
					 | 
				
			||||||
																																																																																																																																# i = np.arange(_map[column]['start'],_map[column]['end'])
 | 
					 | 
				
			||||||
																																																																																																																																max_epochs = np.int32(SYS_ARGS['max_epochs']) if 'max_epochs' in SYS_ARGS else 10
 | 
					 | 
				
			||||||
																																																																																																																																# REAL																		= _df[:,i]
 | 
					 | 
				
			||||||
																																																																																																																																REAL																						= pd.get_dummies(df[column]).astype(np.float32).values
 | 
					 | 
				
			||||||
																																																																																																																																LABEL = pd.get_dummies(df[column_id]).astype(np.float32).values
 | 
					 | 
				
			||||||
																																																																																																																																trainer = Train(context=context,max_epochs=max_epochs,real=REAL,label=LABEL,column=column,column_id=column_id)
 | 
					 | 
				
			||||||
																																																																																																																																trainer.apply()
 | 
					 | 
				
			||||||
																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																	
 | 
					 | 
				
			||||||
																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																#
 | 
					 | 
				
			||||||
																																																																																																																																# We should train upon this data
 | 
					 | 
				
			||||||
																																																																																																																																#
 | 
					 | 
				
			||||||
																																																																																																																																# -- we need to convert the data-frame to binary matrix, given a column
 | 
					 | 
				
			||||||
																																																																																																																																#
 | 
					 | 
				
			||||||
																																																																																																																																pass
 | 
					 | 
				
			||||||
																																																																elif 'generate' in SYS_ARGS:
 | 
					 | 
				
			||||||
																																																																																																																																values = df[column].unique().tolist()
 | 
					 | 
				
			||||||
																																																																																																																																values.sort()
 | 
					 | 
				
			||||||
																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																p = Predict(context=context,label=LABEL,values=values,column=column)
 | 
					 | 
				
			||||||
																																																																																																																																p.load_meta(column)
 | 
					 | 
				
			||||||
																																																																																																																																r = p.apply()
 | 
					 | 
				
			||||||
																																																																																																																																print (df)
 | 
					 | 
				
			||||||
																																																																																																																																print ()
 | 
					 | 
				
			||||||
																																																																																																																																df[column] = r[column]
 | 
					 | 
				
			||||||
																																																																																																																																print (df)
 | 
					 | 
				
			||||||
																																																																																																																																
 | 
					 | 
				
			||||||
																																																																																																																																
 | 
					 | 
				
			||||||
																																																																else:
 | 
					 | 
				
			||||||
																																																																																																																																print (SYS_ARGS.keys())
 | 
					 | 
				
			||||||
																																																																																																																																print (__doc__)
 | 
					 | 
				
			||||||
																																																																pass
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
					Loading…
					
					
				
		Reference in new issue