|
|
|
"""
|
|
|
|
self.STEPS_PER_EPOCH = 256 #int(np.load('ICD9/train.npy').shape[0] / 2000)
|
|
|
|
self.MAX_EPOCHS = 10 if 'max_epochs' not in args else int(args['max_epochs'])
|
|
|
|
self.ROW_COUNT = args['real'].shape[0] if 'real' in args else 100
|
|
|
|
self.CONTEXT = args['context']
|
|
|
|
self.ATTRIBUTES = {"id":args['column_id'] if 'column_id' in args else None,"synthetic":args['column'] if 'column' in args else None}
|
|
|
|
self._REAL = args['real'] if 'real' in args else None
|
|
|
|
self._LABEL = args['label'] if 'label' in args else None
|
|
|
|
suffix = self.get.suffix()
|
|
|
|
_name = os.sep.join([self.out_dir,'meta-'+suffix+'.json'])
|
|
|
|
name name of the scope the
|
|
|
|
labels labels (attributes not synthesized) by default None
|
|
|
|
n_labels number of labels default None
|
|
|
|
"""
|
|
|
|
inputs = args['inputs']
|
|
|
|
name = args['name']
|
|
|
|
labels = None if 'labels' not in args else args['labels']
|
|
|
|
n_labels= None if 'n_labels' not in args else args['n_labels']
|
|
|
|
shift = [0] if self.__class__.__name__.lower() == 'generator' else [1] #-- not sure what this is doing
|
|
|
|
mean, var = tf.nn.moments(inputs, shift, keep_dims=True)
|
|
|
|
shape = inputs.shape[1].value
|
|
|
|
offset_m = self.get.variables(shape=[n_labels,shape], name='offset'+name,
|
|
|
|
initializer=tf.zeros_initializer)
|
|
|
|
scale_m = self.get.variables(shape=[n_labels,shape], name='scale'+name,
|
|
|
|
initializer=tf.ones_initializer)
|
|
|
|
|
|
|
|
offset = tf.nn.embedding_lookup(offset_m, labels)
|
|
|
|
scale = tf.nn.embedding_lookup(scale_m, labels)
|
|
|
|
result = tf.nn.batch_normalization(inputs, mean, var, offset, scale, 1e-8)
|
|
|
|
return result
|
|
|
|
|
|
|
|
def _variable_on_cpu(self,**args):
|
|
|
|
"""
|
|
|
|
This function makes sure variables/tensors are not created on the GPU but rather on the CPU
|
|
|
|
"""
|
|
|
|
|
|
|
|
name = args['name']
|
|
|
|
shape = args['shape']
|
|
|
|
initializer=None if 'initializer' not in args else args['initializer']
|
|
|
|
with tf.device('/cpu:0') :
|
|
|
|
cpu_var = tf.compat.v1.get_variable(name,shape,initializer= initializer)
|
|
|
|
return cpu_var
|
|
|
|
def average_gradients(self,tower_grads):
|
|
|
|
average_grads = []
|
|
|
|
for grad_and_vars in zip(*tower_grads):
|
|
|
|
grads = []
|
|
|
|
for g, _ in grad_and_vars:
|
|
|
|
expanded_g = tf.expand_dims(g, 0)
|
|
|
|
grads.append(expanded_g)
|
|
|
|
|
|
|
|
grad = tf.concat(axis=0, values=grads)
|
|
|
|
grad = tf.reduce_mean(grad, 0)
|
|
|
|
|
|
|
|
v = grad_and_vars[0][1]
|
|
|
|
grad_and_var = (grad, v)
|
|
|
|
average_grads.append(grad_and_var)
|
|
|
|
return average_grads
|
|
|
|
|
|
|
|
|
|
|
|
class Generator (GNet):
|
|
|
|
"""
|
|
|
|
This class is designed to handle generation of candidate datasets for this it will aggregate a discriminator, this allows the generator not to be random
|
|
|
|
|
|
|
|
"""
|
|
|
|
def __init__(self,**args):
|
|
|
|
GNet.__init__(self,**args)
|
|
|
|
self.discriminator = Discriminator(**args)
|
|
|
|
def loss(self,**args):
|
|
|
|
fake = args['fake']
|
|
|
|
label = args['label']
|
|
|
|
y_hat_fake = self.discriminator.network(inputs=fake, label=label)
|
|
|
|
h1 = self.normalize(inputs=tf.matmul(x, kernel),shift=0, name='cbn' + str(i), labels=label, n_labels=self.NUM_LABELS)
|
|
|
|
x = self.normalize(inputs=x, name='cln' + str(i), shift=1,labels=label, n_labels=self.NUM_LABELS)
|
|
|
|
i = len(self.D_STRUCTURE)
|
|
|
|
kernel = self.get.variables(name='W_' + str(i), shape=[self.D_STRUCTURE[-1], 1])
|
|
|
|
bias = self.get.variables(name='b_' + str(i), shape=[1])
|
|
|
|
y = tf.add(tf.matmul(x, kernel), bias)
|
|
|
|
return y
|
|
|
|
|
|
|
|
def loss(self,**args) :
|
|
|
|
"""
|
|
|
|
This function compute the loss of
|
|
|
|
:real
|
|
|
|
:fake
|
|
|
|
:label
|
|
|
|
"""
|
|
|
|
real = args['real']
|
|
|
|
fake = args['fake']
|
|
|
|
label = args['label']
|
|
|
|
epsilon = tf.random.uniform(shape=[self.BATCHSIZE_PER_GPU,1],minval=0,maxval=1)
|
|
|
|
|
|
|
|
x_hat = real + epsilon * (fake - real)
|
|
|
|
y_hat_fake = self.network(inputs=fake, label=label)
|
|
|
|
|
|
|
|
y_hat_real = self.network(inputs=real, label=label)
|
|
|
|
y_hat = self.network(inputs=x_hat, label=label)
|
|
|
|
|
|
|
|
grad = tf.gradients(y_hat, [x_hat])[0]
|
|
|
|
slopes = tf.sqrt(tf.reduce_sum(tf.square(grad), 1))
|
|
|
|
gradient_penalty = tf.reduce_mean((slopes - 1.) ** 2)
|
|
|
|
else:
|
|
|
|
w, loss = self.generator.loss(fake=fake, label=label)
|
|
|
|
tf.compat.v1.get_variable_scope().reuse_variables()
|
|
|
|
#vars_ = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=stage)
|
|
|
|
vars_ = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, scope=stage)
|
|
|
|
|
|
|
|
for epoch in range(1, self.MAX_EPOCHS + 1):
|
|
|
|
start_time = time.time()
|
|
|
|
w_sum = 0
|
|
|
|
for i in range(self.STEPS_PER_EPOCH):
|
|
|
|
for _ in range(2):
|
|
|
|
_, w = sess.run([train_d, w_distance])
|
|
|
|
w_sum += w
|
|
|
|
sess.run(train_g)
|
|
|
|
duration = time.time() - start_time
|
|
|
|
|
|
|
|
assert not np.isnan(w_sum), 'Model diverged with loss = NaN'
|
|
|
|
|
|
|
|
format_str = 'epoch: %d, w_distance = %f (%.1f)'
|
|
|
|
print(format_str % (epoch, -w_sum/(self.STEPS_PER_EPOCH*2), duration))
|
|
|
|
def __init__(self,**args):
|
|
|
|
df = ( pd.DataFrame(np.round(f).astype(np.int32)))
|
|
|
|
# idx2 = (demo[:, n] == 1)
|
|
|
|
REAL = pd.get_dummies(df[column]).astype(np.float32).values
|
|
|
|
LABEL = pd.get_dummies(df[column_id]).astype(np.float32).values
|
|
|
|
trainer = Train(context=context,max_epochs=max_epochs,real=REAL,label=LABEL,column=column,column_id=column_id)
|
|
|
|
trainer.apply()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
# We should train upon this data
|
|
|
|
#
|
|
|
|
# -- we need to convert the data-frame to binary matrix, given a column
|
|
|
|
#
|
|
|
|
pass
|
|
|
|
elif 'generate' in SYS_ARGS:
|
|
|
|
values = df[column].unique().tolist()
|
|
|
|
values.sort()
|