|
|
|
"""
|
|
|
|
usage :
|
|
|
|
optional :
|
|
|
|
--num_gpu number of gpus to use will default to 1
|
|
|
|
--epoch steps per epoch default to 256
|
|
|
|
"""
|
|
|
|
import tensorflow as tf
|
|
|
|
from tensorflow.contrib.layers import l2_regularizer
|
|
|
|
import numpy as np
|
|
|
|
import pandas as pd
|
|
|
|
import time
|
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
self.NUM_GPUS = 1 if 'num_gpu' not in args else args['num_gpu']
|
|
|
|
self.train_dir = os.sep.join([self.log_dir,'train',self.CONTEXT])
|
|
|
|
self.out_dir = os.sep.join([self.log_dir,'output',self.CONTEXT])
|
|
|
|
|
|
|
|
def load_meta(self,column):
|
|
|
|
"""
|
|
|
|
This function is designed to accomodate the uses of the sub-classes outside of a strict dependency model.
|
|
|
|
Because prediction and training can happen independently
|
|
|
|
"""
|
|
|
|
return _object
|
|
|
|
average_grads = []
|
|
|
|
for grad_and_vars in zip(*tower_grads):
|
|
|
|
grads = []
|
|
|
|
for g, _ in grad_and_vars:
|
|
|
|
expanded_g = tf.expand_dims(g, 0)
|
|
|
|
grads.append(expanded_g)
|
|
|
|
|
|
|
|
grad = tf.concat(axis=0, values=grads)
|
|
|
|
grad = tf.reduce_mean(grad, 0)
|
|
|
|
|
|
|
|
v = grad_and_vars[0][1]
|
|
|
|
grad_and_var = (grad, v)
|
|
|
|
average_grads.append(grad_and_var)
|
|
|
|
return average_grads
|
|
|
|
|
|
|
|
|
|
|
|
class Generator (GNet):
|
|
|
|
"""
|
|
|
|
This class is designed to handle generation of candidate datasets for this it will aggregate a discriminator, this allows the generator not to be random
|
|
|
|
|
|
|
|
"""
|
|
|
|
def __init__(self,**args):
|
|
|
|
GNet.__init__(self,**args)
|
|
|
|
self.discriminator = Discriminator(**args)
|
|
|
|
def loss(self,**args):
|
|
|
|
fake = args['fake']
|
|
|
|
label = args['label']
|
|
|
|
y_hat_fake = self.discriminator.network(inputs=fake, label=label)
|
|
|
|
all_regs = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
|
|
|
|
loss = -tf.reduce_mean(y_hat_fake) + sum(all_regs)
|
|
|
|
tf.add_to_collection('glosses', loss)
|
|
|
|
return loss, loss
|
|
|
|
def load_meta(self, column):
|
|
|
|
super().load_meta(column)
|
|
|
|
self.discriminator.load_meta(column)
|
|
|
|
def network(self,**args) :
|
|
|
|
"""
|
|
|
|
This function will build the network that will generate the synthetic candidates
|
|
|
|
:inputs matrix of data that we need
|
|
|
|
:dim dimensions of ...
|
|
|
|
"""
|
|
|
|
x = args['inputs']
|
|
|
|
tmp_dim = self.Z_DIM if 'dim' not in args else args['dim']
|
|
|
|
label = args['label']
|
|
|
|
|
|
|
|
with tf.compat.v1.variable_scope('G', reuse=tf.compat.v1.AUTO_REUSE , regularizer=l2_regularizer(0.00001)):
|
|
|
|
for i, dim in enumerate(self.G_STRUCTURE[:-1]):
|
|
|
|
kernel = self.get.variables(name='W_' + str(i), shape=[tmp_dim, dim])
|
|
|
|
h1 = self.normalize(inputs=tf.matmul(x, kernel),shift=0, name='cbn' + str(i), labels=label, n_labels=self.NUM_LABELS)
|
|
|
|
h2 = tf.nn.relu(h1)
|
|
|
|
x = x + h2
|
|
|
|
tmp_dim = dim
|
|
|
|
i = len(self.G_STRUCTURE) - 1
|
|
|
|
#
|
|
|
|
# This seems to be an extra hidden layer:
|
|
|
|
# It's goal is to map continuous values to discrete values (pre-trained to do this)
|
|
|
|
kernel = self.get.variables(name='W_' + str(i), shape=[tmp_dim, self.G_STRUCTURE[-1]])
|
|
|
|
h1 = self.normalize(inputs=tf.matmul(x, kernel), name='cbn' + str(i),
|
|
|
|
labels=label, n_labels=self.NUM_LABELS)
|
|
|
|
h2 = tf.nn.tanh(h1)
|
|
|
|
x = x + h2
|
|
|
|
# This seems to be the output layer
|
|
|
|
#
|
|
|
|
kernel = self.get.variables(name='W_' + str(i+1), shape=[self.Z_DIM, self.X_SPACE_SIZE])
|
|
|
|
bias = self.get.variables(name='b_' + str(i+1), shape=[self.X_SPACE_SIZE])
|
|
|
|
x = tf.nn.sigmoid(tf.add(tf.matmul(x, kernel), bias))
|
|
|
|
return x
|
|
|
|
|
|
|
|
class Discriminator(GNet):
|
|
|
|
def __init__(self,**args):
|
|
|
|
GNet.__init__(self,**args)
|
|
|
|
def network(self,**args):
|
|
|
|
"""
|
|
|
|
This function will apply a computational graph on a dataset passed in with the associated labels and the last layer must have a single output (neuron)
|
|
|
|
:inputs
|
|
|
|
:label
|
|
|
|
"""
|
|
|
|
x = args['inputs']
|
|
|
|
print ()
|
|
|
|
print (x[:3,:])
|
|
|
|
print()
|
|
|
|
label = args['label']
|
|
|
|
with tf.compat.v1.variable_scope('D', reuse=tf.compat.v1.AUTO_REUSE , regularizer=l2_regularizer(0.00001)):
|
|
|
|
for i, dim in enumerate(self.D_STRUCTURE[1:]):
|
|
|
|
kernel = self.get.variables(name='W_' + str(i), shape=[self.D_STRUCTURE[i], dim])
|
|
|
|
bias = self.get.variables(name='b_' + str(i), shape=[dim])
|
|
|
|
print (["\t",bias,kernel])
|
|
|
|
x = tf.nn.relu(tf.add(tf.matmul(x, kernel), bias))
|
|
|
|
x = self.normalize(inputs=x, name='cln' + str(i), shift=1,labels=label, n_labels=self.NUM_LABELS)
|
|
|
|
i = len(self.D_STRUCTURE)
|
|
|
|
kernel = self.get.variables(name='W_' + str(i), shape=[self.D_STRUCTURE[-1], 1])
|
|
|
|
bias = self.get.variables(name='b_' + str(i), shape=[1])
|
|
|
|
y = tf.add(tf.matmul(x, kernel), bias)
|
|
|
|
return y
|
|
|
|
|
|
|
|
def loss(self,**args) :
|
|
|
|
"""
|
|
|
|
This function compute the loss of
|
|
|
|
:real
|
|
|
|
:fake
|
|
|
|
:label
|
|
|
|
"""
|
|
|
|
real = args['real']
|
|
|
|
fake = args['fake']
|
|
|
|
label = args['label']
|
|
|
|
epsilon = tf.random.uniform(shape=[self.BATCHSIZE_PER_GPU,1],minval=0,maxval=1)
|
|
|
|
|
|
|
|
x_hat = real + epsilon * (fake - real)
|
|
|
|
y_hat_fake = self.network(inputs=fake, label=label)
|
|
|
|
|
|
|
|
y_hat_real = self.network(inputs=real, label=label)
|
|
|
|
y_hat = self.network(inputs=x_hat, label=label)
|
|
|
|
|
|
|
|
grad = tf.gradients(y_hat, [x_hat])[0]
|
|
|
|
slopes = tf.sqrt(tf.reduce_sum(tf.square(grad), 1))
|
|
|
|
gradient_penalty = tf.reduce_mean((slopes - 1.) ** 2)
|
|
|
|
all_regs = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
|
|
|
|
w_distance = -tf.reduce_mean(y_hat_real) + tf.reduce_mean(y_hat_fake)
|
|
|
|
loss = w_distance + 10 * gradient_penalty + sum(all_regs)
|
|
|
|
tf.add_to_collection('dlosses', loss)
|
|
|
|
|
|
|
|
return w_distance, loss
|
|
|
|
class Train (GNet):
|
|
|
|
def __init__(self,**args):
|
|
|
|
GNet.__init__(self,**args)
|
|
|
|
self.generator = Generator(**args)
|
|
|
|
self.discriminator = Discriminator(**args)
|
|
|
|
self._REAL = args['real']
|
|
|
|
self._LABEL= args['label']
|
|
|
|
|
|
|
|
"""
|
|
|
|
REAL = self._REAL
|
|
|
|
LABEL= self._LABEL
|
|
|
|
with tf.device('/cpu:0'):
|
|
|
|
opt_d = tf.compat.v1.train.AdamOptimizer(1e-4)
|
|
|
|
opt_g = tf.compat.v1.train.AdamOptimizer(1e-4)
|
|
|
|
|
|
|
|
train_d, w_distance, iterator_d, features_placeholder_d, labels_placeholder_d = self.network(stage='D', opt=opt_d)
|
|
|
|
train_g, _, iterator_g, features_placeholder_g, labels_placeholder_g = self.network(stage='G', opt=opt_g)
|
|
|
|
# saver = tf.train.Saver()
|
|
|
|
saver = tf.compat.v1.train.Saver()
|
|
|
|
init = tf.global_variables_initializer()
|
|
|
|
suffix = self.get.suffix()
|
|
|
|
# print ("_____________________")
|
|
|
|
# nbatch = int(np.ceil(num / self.BATCHSIZE_PER_GPU))
|
|
|
|
# label_input = np.zeros((nbatch*self.BATCHSIZE_PER_GPU, self.NUM_LABELS))
|
|
|
|
# label_input[:, n] = 1
|
|
|
|
# label_input[:, m] = 1
|
|
|
|
# output = []
|
|
|
|
# for i in range(nbatch):
|
|
|
|
# f = sess.run(fake,feed_dict={y: label_input[i* self.BATCHSIZE_PER_GPU:(i+1)* self.BATCHSIZE_PER_GPU]})
|
|
|
|
# output.extend(np.round(f))
|
|
|
|
# output = np.array(output)[:num]
|
|
|
|
# print ([m,n,output])
|
|
|
|
|
|
|
|
# np.save(self.out_dir + str(m) + str(n), output)
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__' :
|
|
|
|
#
|
|
|
|
# Now we get things done ...
|
|
|
|
column = SYS_ARGS['column']
|
|
|
|
column_id = SYS_ARGS['id'] if 'id' in SYS_ARGS else 'person_id'
|
|
|
|
df = pd.read_csv(SYS_ARGS['raw-data'])
|
|
|
|
LABEL = pd.get_dummies(df[column_id]).astype(np.float32).values
|
|
|
|
|
|
|
|
context = SYS_ARGS['raw-data'].split(os.sep)[-1:][0][:-4]
|
|
|
|
if set(['train','learn']) & set(SYS_ARGS.keys()):
|
|
|
|
|
|
|
|
df = pd.read_csv(SYS_ARGS['raw-data'])
|
|
|
|
|
|
|
|
# cols = SYS_ARGS['column']
|
|
|
|
# _map,_df = (Binary()).Export(df)
|
|
|
|
# i = np.arange(_map[column]['start'],_map[column]['end'])
|
|
|
|
max_epochs = np.int32(SYS_ARGS['max_epochs']) if 'max_epochs' in SYS_ARGS else 10
|
|
|
|
# REAL = _df[:,i]
|
|
|
|
REAL = pd.get_dummies(df[column]).astype(np.float32).values
|
|
|
|
LABEL = pd.get_dummies(df[column_id]).astype(np.float32).values
|
|
|
|
trainer = Train(context=context,max_epochs=max_epochs,real=REAL,label=LABEL,column=column,column_id=column_id)
|
|
|
|
trainer.apply()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
# We should train upon this data
|
|
|
|
#
|
|
|
|
# -- we need to convert the data-frame to binary matrix, given a column
|
|
|
|
#
|
|
|
|
pass
|
|
|
|
elif 'generate' in SYS_ARGS:
|
|
|
|
values = df[column].unique().tolist()
|
|
|
|
values.sort()
|