|
|
|
"""
|
|
|
|
import pickle
|
|
|
|
else:
|
|
|
|
#
|
|
|
|
column = self.ATTRIBUTES['synthetic']
|
|
|
|
db = self.logger.db
|
|
|
|
if db[column].count() > 0 :
|
|
|
|
db.backup.insert({'name':column,'logs':list(db[column].find()) })
|
|
|
|
db[column].drop()
|
|
|
|
|
|
|
|
def load_meta(self,column):
|
|
|
|
"""
|
|
|
|
This function is designed to accomodate the uses of the sub-classes outside of a strict dependency model.
|
|
|
|
Because prediction and training can happen independently
|
|
|
|
"""
|
|
|
|
# suffix = "-".join(column) if isinstance(column,list)else column
|
|
|
|
suffix = self.get.suffix()
|
|
|
|
_name = os.sep.join([self.out_dir,'meta-'+suffix+'.json'])
|
|
|
|
if os.path.exists(_name) :
|
|
|
|
attr = json.loads((open(_name)).read())
|
|
|
|
for key in attr :
|
|
|
|
value = attr[key]
|
|
|
|
setattr(self,key,value)
|
|
|
|
self.train_dir = os.sep.join([self.log_dir,'train',self.CONTEXT])
|
|
|
|
self.out_dir = os.sep.join([self.log_dir,'output',self.CONTEXT])
|
|
|
|
|
|
|
|
|
|
|
|
def log_meta(self,**args) :
|
|
|
|
|
|
|
|
_object = {
|
|
|
|
# '_id':'meta',
|
|
|
|
'CONTEXT':self.CONTEXT,
|
|
|
|
'ATTRIBUTES':self.ATTRIBUTES,
|
|
|
|
'BATCHSIZE_PER_GPU':self.BATCHSIZE_PER_GPU,
|
|
|
|
'Z_DIM':self.Z_DIM,
|
|
|
|
"X_SPACE_SIZE":self.X_SPACE_SIZE,
|
|
|
|
"D_STRUCTURE":self.D_STRUCTURE,
|
|
|
|
"G_STRUCTURE":self.G_STRUCTURE,
|
|
|
|
"NUM_GPUS":self.NUM_GPUS,
|
|
|
|
"NUM_LABELS":self.NUM_LABELS,
|
|
|
|
"MAX_EPOCHS":self.MAX_EPOCHS,
|
|
|
|
"ROW_COUNT":self.ROW_COUNT
|
|
|
|
}
|
|
|
|
if args and 'key' in args and 'value' in args :
|
|
|
|
key = args['key']
|
|
|
|
value= args['value']
|
|
|
|
object[key] = value
|
|
|
|
# suffix = "-".join(self.column) if isinstance(self.column,list) else self.column
|
|
|
|
suffix = self.get.suffix()
|
|
|
|
_name = os.sep.join([self.out_dir,'meta-'+suffix])
|
|
|
|
|
|
|
|
f = open(_name+'.json','w')
|
|
|
|
f.write(json.dumps(_object))
|
|
|
|
return _object
|
|
|
|
def mkdir (self,path):
|
|
|
|
if not os.path.exists(path) :
|
|
|
|
os.mkdir(path)
|
|
|
|
all_regs = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES)
|
|
|
|
loss = -tf.reduce_mean(y_hat_fake) + sum(all_regs)
|
|
|
|
#tf.add_to_collection('glosses', loss)
|
|
|
|
tf.compat.v1.add_to_collection('glosses', loss)
|
|
|
|
return loss, loss
|
|
|
|
def load_meta(self, column):
|
|
|
|
super().load_meta(column)
|
|
|
|
self.discriminator.load_meta(column)
|
|
|
|
def network(self,**args) :
|
|
|
|
"""
|
|
|
|
This function will build the network that will generate the synthetic candidates
|
|
|
|
:inputs matrix of data that we need
|
|
|
|
:dim dimensions of ...
|
|
|
|
"""
|
|
|
|
x = args['inputs']
|
|
|
|
tmp_dim = self.Z_DIM if 'dim' not in args else args['dim']
|
|
|
|
label = args['label']
|
|
|
|
|
|
|
|
with tf.compat.v1.variable_scope('G', reuse=tf.compat.v1.AUTO_REUSE , regularizer=l2_regularizer(0.00001)):
|
|
|
|
for i, dim in enumerate(self.G_STRUCTURE[:-1]):
|
|
|
|
kernel = self.get.variables(name='W_' + str(i), shape=[tmp_dim, dim])
|
|
|
|
h1 = self.normalize(inputs=tf.matmul(x, kernel),shift=0, name='cbn' + str(i), labels=label, n_labels=self.NUM_LABELS)
|
|
|
|
h2 = tf.nn.relu(h1)
|
|
|
|
x = x + h2
|
|
|
|
tmp_dim = dim
|
|
|
|
i = len(self.G_STRUCTURE) - 1
|
|
|
|
#
|
|
|
|
# This seems to be an extra hidden layer:
|
|
|
|
# It's goal is to map continuous values to discrete values (pre-trained to do this)
|
|
|
|
kernel = self.get.variables(name='W_' + str(i), shape=[tmp_dim, self.G_STRUCTURE[-1]])
|
|
|
|
h1 = self.normalize(inputs=tf.matmul(x, kernel), name='cbn' + str(i),
|
|
|
|
labels=label, n_labels=self.NUM_LABELS)
|
|
|
|
h2 = tf.nn.tanh(h1)
|
|
|
|
x = x + h2
|
|
|
|
# This seems to be the output layer
|
|
|
|
#
|
|
|
|
kernel = self.get.variables(name='W_' + str(i+1), shape=[self.Z_DIM, self.X_SPACE_SIZE])
|
|
|
|
bias = self.get.variables(name='b_' + str(i+1), shape=[self.X_SPACE_SIZE])
|
|
|
|
x = tf.nn.sigmoid(tf.add(tf.matmul(x, kernel), bias))
|
|
|
|
return x
|
|
|
|
self.generator.load_meta(column)
|
|
|
|
self.discriminator.load_meta(column)
|
|
|
|
def loss(self,**args):
|
|
|
|
"""
|
|
|
|
This function will compute a "tower" loss of the generated candidate against real data
|
|
|
|
Training will consist in having both generator and discriminators
|
|
|
|
:scope
|
|
|
|
:stage
|
|
|
|
:real
|
|
|
|
:label
|
|
|
|
"""
|
|
|
|
|
|
|
|
scope = args['scope']
|
|
|
|
stage = args['stage']
|
|
|
|
real = args['real']
|
|
|
|
label = args['label']
|
|
|
|
label = tf.cast(label, tf.int32)
|
|
|
|
#
|
|
|
|
# @TODO: Ziqi needs to explain what's going on here
|
|
|
|
m = [[i] for i in np.arange(self._LABEL.shape[1]-2)]
|
|
|
|
label = label[:, 1] * len(m) + tf.squeeze(
|
|
|
|
tf.matmul(label[:, 2:], tf.constant(m, dtype=tf.int32))
|
|
|
|
)
|
|
|
|
# label = label[:,1] * 4 + tf.squeeze( label[:,2]*[[0],[1],[2],[3]] )
|
|
|
|
z = tf.random.normal(shape=[self.BATCHSIZE_PER_GPU, self.Z_DIM])
|
|
|
|
|
|
|
|
fake = self.generator.network(inputs=z, label=label)
|
|
|
|
if stage == 'D':
|
|
|
|
w, loss = self.discriminator.loss(real=real, fake=fake, label=label)
|
|
|
|
#losses = tf.get_collection('dlosses', scope)
|
|
|
|
flag = 'dlosses'
|
|
|
|
losses = tf.compat.v1.get_collection('dlosses', scope)
|
|
|
|
else:
|
|
|
|
w, loss = self.generator.loss(fake=fake, label=label)
|
|
|
|
#losses = tf.get_collection('glosses', scope)
|
|
|
|
flag = 'glosses'
|
|
|
|
losses = tf.compat.v1.get_collection('glosses', scope)
|
|
|
|
# losses = tf.compat.v1.get_collection(flag, scope)
|
|
|
|
|
|
|
|
total_loss = tf.add_n(losses, name='total_loss')
|
|
|
|
|
|
|
|
return total_loss, w
|
|
|
|
def input_fn(self):
|
|
|
|
"""
|
|
|
|
This function seems to produce
|
|
|
|
"""
|
|
|
|
features_placeholder = tf.compat.v1.placeholder(shape=self._REAL.shape, dtype=tf.float32)
|
|
|
|
labels_placeholder = tf.compat.v1.placeholder(shape=self._LABEL.shape, dtype=tf.float32)
|
|
|
|
dataset = tf.data.Dataset.from_tensor_slices((features_placeholder, labels_placeholder))
|
|
|
|
dataset = dataset.repeat(10000)
|
|
|
|
w_sum = 0
|
|
|
|
for i in range(self.STEPS_PER_EPOCH):
|
|
|
|
for _ in range(2):
|
|
|
|
_, w = sess.run([train_d, w_distance])
|
|
|
|
w_sum += w
|
|
|
|
sess.run(train_g)
|
|
|
|
duration = time.time() - start_time
|
|
|
|
|
|
|
|
assert not np.isnan(w_sum), 'Model diverged with loss = NaN'
|
|
|
|
|
|
|
|
format_str = 'epoch: %d, w_distance = %f (%.1f)'
|
|
|
|
print(format_str % (epoch, -w_sum/(self.STEPS_PER_EPOCH*2), duration))
|
|
|
|
# print (dir (w_distance))
|
|
|
|
|
|
|
|
logs.append({"epoch":epoch,"distance":-w_sum/(self.STEPS_PER_EPOCH*2) })
|
|
|
|
|
|
|
|
if epoch % self.MAX_EPOCHS == 0:
|
|
|
|
# suffix = "-".join(self.ATTRIBUTES['synthetic']) if isinstance(self.ATTRIBUTES['synthetic'],list) else self.ATTRIBUTES['synthetic']
|
|
|
|
suffix = self.get.suffix()
|
|
|
|
_name = os.sep.join([self.train_dir,suffix])
|
|
|
|
# saver.save(sess, self.train_dir, write_meta_graph=False, global_step=epoch)
|
|
|
|
saver.save(sess, _name, write_meta_graph=False, global_step=epoch)
|
|
|
|
#
|
|
|
|
#
|
|
|
|
if self.logger :
|
|
|
|
row = {"logs":logs} #,"model":pickle.dump(sess)}
|
|
|
|
self.logger.write(row)
|
|
|
|
#
|
|
|
|
# @TODO:
|
|
|
|
# We should upload the files in the checkpoint
|
|
|
|
# This would allow the learnt model to be portable to another system
|
|
|
|
#
|
|
|
|
tf.compat.v1.reset_default_graph()
|
|
|
|
for i in np.arange(CANDIDATE_COUNT) :
|
|
|
|
|
|
|
|
tf.compat.v1.reset_default_graph()
|
|
|
|
|
|
|
|
return df.to_dict(orient='list')
|
|
|
|
# return df.to_dict(orient='list')
|
|
|
|
# count = str(len(os.listdir(self.out_dir)))
|
|
|
|
# _name = os.sep.join([self.out_dir,self.CONTEXT+'-'+count+'.csv'])
|
|
|
|
# df.to_csv(_name,index=False)
|
|
|
|
|
|
|
|
|
|
|
|
# output.extend(np.round(f))
|
|
|
|
|
|
|
|
# for m in range(2):
|
|
|
|
# for n in range(2, self.NUM_LABELS):
|
|
|
|
# idx1 = (demo[:, m] == 1)
|
|
|
|
# idx2 = (demo[:, n] == 1)
|
|
|
|
# idx = [idx1[j] and idx2[j] for j in range(len(idx1))]
|
|
|
|
# num = np.sum(idx)
|
|
|
|
# print ("___________________list__")
|
|
|
|
# print (idx1)
|
|
|
|
# print (idx2)
|
|
|
|
# print (idx)
|
|
|
|
# print (num)
|
|
|
|
# print ("_____________________")
|
|
|
|
# nbatch = int(np.ceil(num / self.BATCHSIZE_PER_GPU))
|
|
|
|
# label_input = np.zeros((nbatch*self.BATCHSIZE_PER_GPU, self.NUM_LABELS))
|
|
|
|
# label_input[:, n] = 1
|
|
|
|
# label_input[:, m] = 1
|
|
|
|
# output = []
|
|
|
|
# for i in range(nbatch):
|
|
|
|
# f = sess.run(fake,feed_dict={y: label_input[i* self.BATCHSIZE_PER_GPU:(i+1)* self.BATCHSIZE_PER_GPU]})
|
|
|
|
# output.extend(np.round(f))
|
|
|
|
# output = np.array(output)[:num]
|
|
|
|
# print ([m,n,output])
|
|
|
|
|
|
|
|
# np.save(self.out_dir + str(m) + str(n), output)
|
|
|
|
|
|
|
|
p.load_meta(column)
|