|
|
@ -43,6 +43,10 @@ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
|
|
|
|
class void :
|
|
|
|
class void :
|
|
|
|
pass
|
|
|
|
pass
|
|
|
|
class GNet :
|
|
|
|
class GNet :
|
|
|
|
|
|
|
|
def log(self,**args):
|
|
|
|
|
|
|
|
self.logs = dict(args,**self.logs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
This is the base class of a generative network functions, the details will be implemented in the subclasses.
|
|
|
|
This is the base class of a generative network functions, the details will be implemented in the subclasses.
|
|
|
|
An instance of this class is accessed as follows
|
|
|
|
An instance of this class is accessed as follows
|
|
|
@ -52,7 +56,7 @@ class GNet :
|
|
|
|
def __init__(self,**args):
|
|
|
|
def __init__(self,**args):
|
|
|
|
self.layers = void()
|
|
|
|
self.layers = void()
|
|
|
|
self.layers.normalize = self.normalize
|
|
|
|
self.layers.normalize = self.normalize
|
|
|
|
|
|
|
|
self.logs = {}
|
|
|
|
|
|
|
|
|
|
|
|
self.NUM_GPUS = 1 if 'num_gpu' not in args else args['num_gpu']
|
|
|
|
self.NUM_GPUS = 1 if 'num_gpu' not in args else args['num_gpu']
|
|
|
|
|
|
|
|
|
|
|
@ -95,6 +99,15 @@ class GNet :
|
|
|
|
|
|
|
|
|
|
|
|
self.train_dir = os.sep.join([self.log_dir,'train',self.CONTEXT])
|
|
|
|
self.train_dir = os.sep.join([self.log_dir,'train',self.CONTEXT])
|
|
|
|
self.out_dir = os.sep.join([self.log_dir,'output',self.CONTEXT])
|
|
|
|
self.out_dir = os.sep.join([self.log_dir,'output',self.CONTEXT])
|
|
|
|
|
|
|
|
if self.logger :
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
|
|
# We will clear the logs from the data-store
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
|
|
column = self.ATTRIBUTES['synthetic']
|
|
|
|
|
|
|
|
db = self.logger.db
|
|
|
|
|
|
|
|
if db[column].count() > 0 :
|
|
|
|
|
|
|
|
db.backup.insert({'name':column,'logs':list(db[column].find()) })
|
|
|
|
|
|
|
|
db[column].drop()
|
|
|
|
|
|
|
|
|
|
|
|
def load_meta(self,column):
|
|
|
|
def load_meta(self,column):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
@ -114,7 +127,9 @@ class GNet :
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def log_meta(self,**args) :
|
|
|
|
def log_meta(self,**args) :
|
|
|
|
|
|
|
|
|
|
|
|
_object = {
|
|
|
|
_object = {
|
|
|
|
|
|
|
|
'_id':'meta',
|
|
|
|
'CONTEXT':self.CONTEXT,
|
|
|
|
'CONTEXT':self.CONTEXT,
|
|
|
|
'ATTRIBUTES':self.ATTRIBUTES,
|
|
|
|
'ATTRIBUTES':self.ATTRIBUTES,
|
|
|
|
'BATCHSIZE_PER_GPU':self.BATCHSIZE_PER_GPU,
|
|
|
|
'BATCHSIZE_PER_GPU':self.BATCHSIZE_PER_GPU,
|
|
|
@ -314,6 +329,11 @@ class Train (GNet):
|
|
|
|
# print ([" *** ",self.BATCHSIZE_PER_GPU])
|
|
|
|
# print ([" *** ",self.BATCHSIZE_PER_GPU])
|
|
|
|
|
|
|
|
|
|
|
|
self.meta = self.log_meta()
|
|
|
|
self.meta = self.log_meta()
|
|
|
|
|
|
|
|
if(self.logger):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.logger.write( row=self.meta )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.log (real_shape=list(self._REAL.shape),label_shape = list(self._LABEL.shape),meta_data=self.meta)
|
|
|
|
def load_meta(self, column):
|
|
|
|
def load_meta(self, column):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
This function will delegate the calls to load meta data to it's dependents
|
|
|
|
This function will delegate the calls to load meta data to it's dependents
|
|
|
@ -350,11 +370,14 @@ class Train (GNet):
|
|
|
|
if stage == 'D':
|
|
|
|
if stage == 'D':
|
|
|
|
w, loss = self.discriminator.loss(real=real, fake=fake, label=label)
|
|
|
|
w, loss = self.discriminator.loss(real=real, fake=fake, label=label)
|
|
|
|
#losses = tf.get_collection('dlosses', scope)
|
|
|
|
#losses = tf.get_collection('dlosses', scope)
|
|
|
|
|
|
|
|
flag = 'dlosses'
|
|
|
|
losses = tf.compat.v1.get_collection('dlosses', scope)
|
|
|
|
losses = tf.compat.v1.get_collection('dlosses', scope)
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
w, loss = self.generator.loss(fake=fake, label=label)
|
|
|
|
w, loss = self.generator.loss(fake=fake, label=label)
|
|
|
|
#losses = tf.get_collection('glosses', scope)
|
|
|
|
#losses = tf.get_collection('glosses', scope)
|
|
|
|
|
|
|
|
flag = 'glosses'
|
|
|
|
losses = tf.compat.v1.get_collection('glosses', scope)
|
|
|
|
losses = tf.compat.v1.get_collection('glosses', scope)
|
|
|
|
|
|
|
|
# losses = tf.compat.v1.get_collection(flag, scope)
|
|
|
|
|
|
|
|
|
|
|
|
total_loss = tf.add_n(losses, name='total_loss')
|
|
|
|
total_loss = tf.add_n(losses, name='total_loss')
|
|
|
|
|
|
|
|
|
|
|
@ -369,7 +392,8 @@ class Train (GNet):
|
|
|
|
dataset = dataset.repeat(10000)
|
|
|
|
dataset = dataset.repeat(10000)
|
|
|
|
dataset = dataset.batch(batch_size=self.BATCHSIZE_PER_GPU)
|
|
|
|
dataset = dataset.batch(batch_size=self.BATCHSIZE_PER_GPU)
|
|
|
|
dataset = dataset.prefetch(1)
|
|
|
|
dataset = dataset.prefetch(1)
|
|
|
|
iterator = dataset.make_initializable_iterator()
|
|
|
|
# iterator = dataset.make_initializable_iterator()
|
|
|
|
|
|
|
|
iterator = tf.compat.v1.data.make_initializable_iterator(dataset)
|
|
|
|
# next_element = iterator.get_next()
|
|
|
|
# next_element = iterator.get_next()
|
|
|
|
# init_op = iterator.initializer
|
|
|
|
# init_op = iterator.initializer
|
|
|
|
return iterator, features_placeholder, labels_placeholder
|
|
|
|
return iterator, features_placeholder, labels_placeholder
|
|
|
@ -406,6 +430,9 @@ class Train (GNet):
|
|
|
|
# max_epochs = args['max_epochs'] if 'max_epochs' in args else 10
|
|
|
|
# max_epochs = args['max_epochs'] if 'max_epochs' in args else 10
|
|
|
|
REAL = self._REAL
|
|
|
|
REAL = self._REAL
|
|
|
|
LABEL= self._LABEL
|
|
|
|
LABEL= self._LABEL
|
|
|
|
|
|
|
|
if (self.logger):
|
|
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
with tf.device('/cpu:0'):
|
|
|
|
with tf.device('/cpu:0'):
|
|
|
|
opt_d = tf.compat.v1.train.AdamOptimizer(1e-4)
|
|
|
|
opt_d = tf.compat.v1.train.AdamOptimizer(1e-4)
|
|
|
|
opt_g = tf.compat.v1.train.AdamOptimizer(1e-4)
|
|
|
|
opt_g = tf.compat.v1.train.AdamOptimizer(1e-4)
|
|
|
@ -441,7 +468,7 @@ class Train (GNet):
|
|
|
|
print(format_str % (epoch, -w_sum/(self.STEPS_PER_EPOCH*2), duration))
|
|
|
|
print(format_str % (epoch, -w_sum/(self.STEPS_PER_EPOCH*2), duration))
|
|
|
|
# print (dir (w_distance))
|
|
|
|
# print (dir (w_distance))
|
|
|
|
|
|
|
|
|
|
|
|
logs.append({"epoch":epoch,"distance":-w_sum })
|
|
|
|
logs.append({"epoch":epoch,"distance":-w_sum/(self.STEPS_PER_EPOCH*2) })
|
|
|
|
|
|
|
|
|
|
|
|
if epoch % self.MAX_EPOCHS == 0:
|
|
|
|
if epoch % self.MAX_EPOCHS == 0:
|
|
|
|
# suffix = "-".join(self.ATTRIBUTES['synthetic']) if isinstance(self.ATTRIBUTES['synthetic'],list) else self.ATTRIBUTES['synthetic']
|
|
|
|
# suffix = "-".join(self.ATTRIBUTES['synthetic']) if isinstance(self.ATTRIBUTES['synthetic'],list) else self.ATTRIBUTES['synthetic']
|
|
|
@ -453,8 +480,13 @@ class Train (GNet):
|
|
|
|
#
|
|
|
|
#
|
|
|
|
if self.logger :
|
|
|
|
if self.logger :
|
|
|
|
row = {"logs":logs} #,"model":pickle.dump(sess)}
|
|
|
|
row = {"logs":logs} #,"model":pickle.dump(sess)}
|
|
|
|
|
|
|
|
|
|
|
|
self.logger.write(row=row)
|
|
|
|
self.logger.write(row=row)
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
|
|
# @TODO:
|
|
|
|
|
|
|
|
# We should upload the files in the checkpoint
|
|
|
|
|
|
|
|
# This would allow the learnt model to be portable to another system
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
|
|
tf.compat.v1.reset_default_graph()
|
|
|
|
|
|
|
|
|
|
|
|
class Predict(GNet):
|
|
|
|
class Predict(GNet):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
@ -479,38 +511,61 @@ class Predict(GNet):
|
|
|
|
ma = [[i] for i in np.arange(self.NUM_LABELS - 2)]
|
|
|
|
ma = [[i] for i in np.arange(self.NUM_LABELS - 2)]
|
|
|
|
label = y[:, 1] * len(ma) + tf.squeeze(tf.matmul(y[:, 2:], tf.constant(ma, dtype=tf.int32)))
|
|
|
|
label = y[:, 1] * len(ma) + tf.squeeze(tf.matmul(y[:, 2:], tf.constant(ma, dtype=tf.int32)))
|
|
|
|
|
|
|
|
|
|
|
|
fake = self.generator.network(inputs=z, label=label)
|
|
|
|
fake = self.generator.network(inputs=z, label=label)
|
|
|
|
init = tf.compat.v1.global_variables_initializer()
|
|
|
|
init = tf.compat.v1.global_variables_initializer()
|
|
|
|
saver = tf.compat.v1.train.Saver()
|
|
|
|
saver = tf.compat.v1.train.Saver()
|
|
|
|
|
|
|
|
df = pd.DataFrame()
|
|
|
|
|
|
|
|
CANDIDATE_COUNT = 1000
|
|
|
|
|
|
|
|
NTH_VALID_CANDIDATE = count = np.random.choice(np.arange(2,60),2)[0]
|
|
|
|
with tf.compat.v1.Session() as sess:
|
|
|
|
with tf.compat.v1.Session() as sess:
|
|
|
|
|
|
|
|
|
|
|
|
# sess.run(init)
|
|
|
|
# sess.run(init)
|
|
|
|
saver.restore(sess, model_dir)
|
|
|
|
saver.restore(sess, model_dir)
|
|
|
|
labels = np.zeros((self.ROW_COUNT,self.NUM_LABELS) )
|
|
|
|
labels = np.zeros((self.ROW_COUNT,self.NUM_LABELS) )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
found = []
|
|
|
|
labels= demo
|
|
|
|
labels= demo
|
|
|
|
f = sess.run(fake,feed_dict={y:labels})
|
|
|
|
for i in np.arange(CANDIDATE_COUNT) :
|
|
|
|
#
|
|
|
|
|
|
|
|
# if we are dealing with numeric values only we can perform a simple marginal sum against the indexes
|
|
|
|
f = sess.run(fake,feed_dict={y:labels})
|
|
|
|
#
|
|
|
|
#
|
|
|
|
|
|
|
|
# if we are dealing with numeric values only we can perform a simple marginal sum against the indexes
|
|
|
|
|
|
|
|
# The code below will insure we have some acceptable cardinal relationships between id and synthetic values
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
|
|
df = ( pd.DataFrame(np.round(f).astype(np.int32)))
|
|
|
|
|
|
|
|
p = 0 not in df.sum(axis=1).values
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if p:
|
|
|
|
|
|
|
|
found.append(df)
|
|
|
|
|
|
|
|
if len(found) == NTH_VALID_CANDIDATE or i == CANDIDATE_COUNT:
|
|
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
df = ( pd.DataFrame(np.round(f).astype(np.int32)))
|
|
|
|
|
|
|
|
# i = df.T.index.astype(np.int32) #-- These are numeric pseudonyms
|
|
|
|
# i = df.T.index.astype(np.int32) #-- These are numeric pseudonyms
|
|
|
|
# df = (i * df).sum(axis=1)
|
|
|
|
# df = (i * df).sum(axis=1)
|
|
|
|
#
|
|
|
|
#
|
|
|
|
# In case we are dealing with actual values like diagnosis codes we can perform
|
|
|
|
# In case we are dealing with actual values like diagnosis codes we can perform
|
|
|
|
#
|
|
|
|
#
|
|
|
|
|
|
|
|
df = found[np.random.choice(np.arange(len(found)),1)[0]]
|
|
|
|
columns = self.ATTRIBUTES['synthetic'] if isinstance(self.ATTRIBUTES['synthetic'],list)else [self.ATTRIBUTES['synthetic']]
|
|
|
|
columns = self.ATTRIBUTES['synthetic'] if isinstance(self.ATTRIBUTES['synthetic'],list)else [self.ATTRIBUTES['synthetic']]
|
|
|
|
|
|
|
|
|
|
|
|
r = np.zeros((self.ROW_COUNT,len(columns)))
|
|
|
|
# r = np.zeros((self.ROW_COUNT,len(columns)))
|
|
|
|
for col in df :
|
|
|
|
r = np.zeros(self.ROW_COUNT)
|
|
|
|
i = np.where(df[col])[0]
|
|
|
|
df.columns = self.values
|
|
|
|
r[i] = col
|
|
|
|
if len(found):
|
|
|
|
|
|
|
|
print (len(found),NTH_VALID_CANDIDATE)
|
|
|
|
|
|
|
|
# x = df * self.values
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
df = pd.DataFrame( df.apply(lambda row: self.values[np.random.choice(np.where(row != 0)[0],1)[0]] ,axis=1))
|
|
|
|
|
|
|
|
df.columns = columns
|
|
|
|
|
|
|
|
|
|
|
|
df = pd.DataFrame(r,columns=columns)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
df[df.columns] = (df.apply(lambda value: self.values[ int(value)],axis=1))
|
|
|
|
|
|
|
|
return df.to_dict(orient='lists')
|
|
|
|
|
|
|
|
|
|
|
|
tf.compat.v1.reset_default_graph()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return df.to_dict(orient='list')
|
|
|
|
# return df.to_dict(orient='list')
|
|
|
|
# return df.to_dict(orient='list')
|
|
|
|
# count = str(len(os.listdir(self.out_dir)))
|
|
|
|
# count = str(len(os.listdir(self.out_dir)))
|
|
|
|
# _name = os.sep.join([self.out_dir,self.CONTEXT+'-'+count+'.csv'])
|
|
|
|
# _name = os.sep.join([self.out_dir,self.CONTEXT+'-'+count+'.csv'])
|
|
|
|