@ -11,9 +11,10 @@ import pandas as pd
import time
import os
import sys
from params import SYS_ARGS
from bridge import Binary
from data. params import SYS_ARGS
from data. bridge import Binary
import json
import pickle
os . environ [ " CUDA_DEVICE_ORDER " ] = " PCI_BUS_ID "
os . environ [ ' CUDA_VISIBLE_DEVICES ' ] = " 0 "
@ -37,10 +38,8 @@ class GNet :
self . layers = void ( )
self . layers . normalize = self . normalize
self . get = void ( )
self . get . variables = self . _variable_on_cpu
self . NUM_GPUS = 1
self . NUM_GPUS = 1 if ' num_gpu ' not in args else args [ ' num_gpu ' ]
self . X_SPACE_SIZE = args [ ' real ' ] . shape [ 1 ] if ' real ' in args else 854
@ -63,7 +62,11 @@ class GNet :
self . ATTRIBUTES = { " id " : args [ ' column_id ' ] if ' column_id ' in args else None , " synthetic " : args [ ' column ' ] if ' column ' in args else None }
self . _REAL = args [ ' real ' ] if ' real ' in args else None
self . _LABEL = args [ ' label ' ] if ' label ' in args else None
self . get = void ( )
self . get . variables = self . _variable_on_cpu
self . get . suffix = lambda : " - " . join ( self . ATTRIBUTES [ ' synthetic ' ] ) if isinstance ( self . ATTRIBUTES [ ' synthetic ' ] , list ) else self . ATTRIBUTES [ ' synthetic ' ]
self . logger = args [ ' logger ' ] if ' logger ' in args and args [ ' logger ' ] else None
self . init_logs ( * * args )
def init_logs ( self , * * args ) :
@ -83,7 +86,9 @@ class GNet :
This function is designed to accomodate the uses of the sub - classes outside of a strict dependency model .
Because prediction and training can happen independently
"""
_name = os . sep . join ( [ self . out_dir , ' meta- ' + column + ' .json ' ] )
# suffix = "-".join(column) if isinstance(column,list)else column
suffix = self . get . suffix ( )
_name = os . sep . join ( [ self . out_dir , ' meta- ' + suffix + ' .json ' ] )
if os . path . exists ( _name ) :
attr = json . loads ( ( open ( _name ) ) . read ( ) )
for key in attr :
@ -94,7 +99,7 @@ class GNet :
def log_meta ( self , * * args ) :
object = {
_ object = {
' CONTEXT ' : self . CONTEXT ,
' ATTRIBUTES ' : self . ATTRIBUTES ,
' BATCHSIZE_PER_GPU ' : self . BATCHSIZE_PER_GPU ,
@ -111,9 +116,13 @@ class GNet :
key = args [ ' key ' ]
value = args [ ' value ' ]
object [ key ] = value
_name = os . sep . join ( [ self . out_dir , ' meta- ' + SYS_ARGS [ ' column ' ] ] )
# suffix = "-".join(self.column) if isinstance(self.column,list) else self.column
suffix = self . get . suffix ( )
_name = os . sep . join ( [ self . out_dir , ' meta- ' + suffix ] )
f = open ( _name + ' .json ' , ' w ' )
f . write ( json . dumps ( object ) )
f . write ( json . dumps ( _object ) )
return _object
def mkdir ( self , path ) :
if not os . path . exists ( path ) :
os . mkdir ( path )
@ -285,8 +294,10 @@ class Train (GNet):
self . discriminator = Discriminator ( * * args )
self . _REAL = args [ ' real ' ]
self . _LABEL = args [ ' label ' ]
self . column = args [ ' column ' ]
# print ([" *** ",self.BATCHSIZE_PER_GPU])
self . log_meta ( )
self . meta = self . log_meta ( )
def load_meta ( self , column ) :
"""
This function will delegate the calls to load meta data to it ' s dependents
@ -384,7 +395,7 @@ class Train (GNet):
# saver = tf.train.Saver()
saver = tf . compat . v1 . train . Saver ( )
init = tf . global_variables_initializer ( )
logs = [ ]
with tf . Session ( config = tf . ConfigProto ( allow_soft_placement = True , log_device_placement = False ) ) as sess :
sess . run ( init )
sess . run ( iterator_d . initializer ,
@ -406,13 +417,22 @@ class Train (GNet):
format_str = ' epoch: %d , w_distance = %f ( %.1f ) '
print ( format_str % ( epoch , - w_sum / ( self . STEPS_PER_EPOCH * 2 ) , duration ) )
# print (dir (w_distance))
logs . append ( { " epoch " : epoch , " distance " : - w_sum / ( self . STEPS_PER_EPOCH * 2 ) } )
if epoch % self . MAX_EPOCHS == 0 :
_name = os . sep . join ( [ self . train_dir , self . ATTRIBUTES [ ' synthetic ' ] ] )
# suffix = "-".join(self.ATTRIBUTES['synthetic']) if isinstance(self.ATTRIBUTES['synthetic'],list) else self.ATTRIBUTES['synthetic']
suffix = self . get . suffix ( )
_name = os . sep . join ( [ self . train_dir , suffix ] )
# saver.save(sess, self.train_dir, write_meta_graph=False, global_step=epoch)
saver . save ( sess , _name , write_meta_graph = False , global_step = epoch )
#
#
if self . logger :
row = { " logs " : logs } #,"model":pickle.dump(sess)}
self . logger . write ( row = row )
class Predict ( GNet ) :
"""
@ -420,14 +440,16 @@ class Predict(GNet):
"""
def __init__ ( self , * * args ) :
GNet . __init__ ( self , * * args )
self . generator = Generator ( * * args )
self . values = values
self . generator = Generator ( * * args )
self . values = args[ ' values' ]
def load_meta ( self , column ) :
super ( ) . load_meta ( column )
self . generator . load_meta ( column )
def apply ( self , * * args ) :
# print (self.train_dir)
model_dir = os . sep . join ( [ self . train_dir , self . ATTRIBUTES [ ' synthetic ' ] + ' - ' + str ( self . MAX_EPOCHS ) ] )
# suffix = "-".join(self.ATTRIBUTES['synthetic']) if isinstance(self.ATTRIBUTES['synthetic'],list) else self.ATTRIBUTES['synthetic']
suffix = self . get . suffix ( )
model_dir = os . sep . join ( [ self . train_dir , suffix + ' - ' + str ( self . MAX_EPOCHS ) ] )
demo = self . _LABEL #np.zeros([self.ROW_COUNT,self.NUM_LABELS]) #args['de"shape":{"LABEL":list(self._LABEL.shape)} mo']
tf . compat . v1 . reset_default_graph ( )
z = tf . random . normal ( shape = [ self . BATCHSIZE_PER_GPU , self . Z_DIM ] )
@ -450,19 +472,24 @@ class Predict(GNet):
# if we are dealing with numeric values only we can perform a simple marginal sum against the indexes
#
df = ( pd . DataFrame ( np . round ( f ) . astype ( np . int32 ) ,columns = values ))
df = ( pd . DataFrame ( np . round ( f ) . astype ( np . int32 ) ))
# i = df.T.index.astype(np.int32) #-- These are numeric pseudonyms
# df = (i * df).sum(axis=1)
#
# In case we are dealing with actual values like diagnosis codes we can perform
#
r = np . zeros ( ( self . ROW_COUNT , 1 ) )
columns = self . ATTRIBUTES [ ' synthetic ' ] if isinstance ( self . ATTRIBUTES [ ' synthetic ' ] , list ) else [ self . ATTRIBUTES [ ' synthetic ' ] ]
r = np . zeros ( ( self . ROW_COUNT , len ( columns ) ) )
for col in df :
i = np . where ( df [ col ] ) [ 0 ]
r [ i ] = col
df = pd . DataFrame ( r , columns = [ self . ATTRIBUTES [ ' synthetic ' ] ] )
return df . to_dict ( orient = ' list ' )
df = pd . DataFrame ( r , columns = columns )
df [ df . columns ] = ( df . apply ( lambda value : self . values [ int ( value ) ] , axis = 1 ) )
return df . to_dict ( orient = ' lists ' )
# return df.to_dict(orient='list')
# count = str(len(os.listdir(self.out_dir)))
# _name = os.sep.join([self.out_dir,self.CONTEXT+'-'+count+'.csv'])
# df.to_csv(_name,index=False)
@ -476,7 +503,7 @@ class Predict(GNet):
# idx2 = (demo[:, n] == 1)
# idx = [idx1[j] and idx2[j] for j in range(len(idx1))]
# num = np.sum(idx)
# print ("___________________ __")
# print ("___________________ list __")
# print (idx1)
# print (idx2)
# print (idx)
@ -531,7 +558,8 @@ if __name__ == '__main__' :
elif ' generate ' in SYS_ARGS :
values = df [ column ] . unique ( ) . tolist ( )
values . sort ( )
p = Predict ( context = context , label = LABEL , values = values )
p = Predict ( context = context , label = LABEL , values = values , column = column )
p . load_meta ( column )
r = p . apply ( )
print ( df )
@ -539,6 +567,7 @@ if __name__ == '__main__' :
df [ column ] = r [ column ]
print ( df )
else :
print ( SYS_ARGS . keys ( ) )
print ( __doc__ )