|
|
@ -166,6 +166,14 @@ class GNet :
|
|
|
|
return _object
|
|
|
|
return _object
|
|
|
|
def mkdir (self,path):
|
|
|
|
def mkdir (self,path):
|
|
|
|
if not os.path.exists(path) :
|
|
|
|
if not os.path.exists(path) :
|
|
|
|
|
|
|
|
if os.sep in path :
|
|
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
root = []
|
|
|
|
|
|
|
|
for loc in path.split(os.sep) :
|
|
|
|
|
|
|
|
root.append(loc)
|
|
|
|
|
|
|
|
os.mkdir(os.sep.join(root))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
os.mkdir(path)
|
|
|
|
os.mkdir(path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -522,6 +530,8 @@ class Predict(GNet):
|
|
|
|
GNet.__init__(self,**args)
|
|
|
|
GNet.__init__(self,**args)
|
|
|
|
self.generator = Generator(**args)
|
|
|
|
self.generator = Generator(**args)
|
|
|
|
self.values = args['values']
|
|
|
|
self.values = args['values']
|
|
|
|
|
|
|
|
self.ROW_COUNT = args['row_count']
|
|
|
|
|
|
|
|
self.MISSING_VALUES = args['no_value']
|
|
|
|
def load_meta(self, column):
|
|
|
|
def load_meta(self, column):
|
|
|
|
super().load_meta(column)
|
|
|
|
super().load_meta(column)
|
|
|
|
self.generator.load_meta(column)
|
|
|
|
self.generator.load_meta(column)
|
|
|
@ -532,8 +542,8 @@ class Predict(GNet):
|
|
|
|
model_dir = os.sep.join([self.train_dir,suffix+'-'+str(self.MAX_EPOCHS)])
|
|
|
|
model_dir = os.sep.join([self.train_dir,suffix+'-'+str(self.MAX_EPOCHS)])
|
|
|
|
demo = self._LABEL #np.zeros([self.ROW_COUNT,self.NUM_LABELS]) #args['de"shape":{"LABEL":list(self._LABEL.shape)} mo']
|
|
|
|
demo = self._LABEL #np.zeros([self.ROW_COUNT,self.NUM_LABELS]) #args['de"shape":{"LABEL":list(self._LABEL.shape)} mo']
|
|
|
|
tf.compat.v1.reset_default_graph()
|
|
|
|
tf.compat.v1.reset_default_graph()
|
|
|
|
z = tf.random.normal(shape=[self.BATCHSIZE_PER_GPU, self.Z_DIM])
|
|
|
|
z = tf.random.normal(shape=[self.ROW_COUNT, self.Z_DIM])
|
|
|
|
y = tf.compat.v1.placeholder(shape=[self.BATCHSIZE_PER_GPU, self.NUM_LABELS], dtype=tf.int32)
|
|
|
|
y = tf.compat.v1.placeholder(shape=[self.ROW_COUNT, self.NUM_LABELS], dtype=tf.int32)
|
|
|
|
if self._LABEL is not None :
|
|
|
|
if self._LABEL is not None :
|
|
|
|
ma = [[i] for i in np.arange(self.NUM_LABELS - 2)]
|
|
|
|
ma = [[i] for i in np.arange(self.NUM_LABELS - 2)]
|
|
|
|
label = y[:, 1] * len(ma) + tf.squeeze(tf.matmul(y[:, 2:], tf.constant(ma, dtype=tf.int32)))
|
|
|
|
label = y[:, 1] * len(ma) + tf.squeeze(tf.matmul(y[:, 2:], tf.constant(ma, dtype=tf.int32)))
|
|
|
@ -556,7 +566,7 @@ class Predict(GNet):
|
|
|
|
labels = None
|
|
|
|
labels = None
|
|
|
|
|
|
|
|
|
|
|
|
found = []
|
|
|
|
found = []
|
|
|
|
|
|
|
|
ratio = []
|
|
|
|
for i in np.arange(CANDIDATE_COUNT) :
|
|
|
|
for i in np.arange(CANDIDATE_COUNT) :
|
|
|
|
if labels :
|
|
|
|
if labels :
|
|
|
|
f = sess.run(fake,feed_dict={y:labels})
|
|
|
|
f = sess.run(fake,feed_dict={y:labels})
|
|
|
@ -569,10 +579,11 @@ class Predict(GNet):
|
|
|
|
df = ( pd.DataFrame(np.round(f).astype(np.int32)))
|
|
|
|
df = ( pd.DataFrame(np.round(f).astype(np.int32)))
|
|
|
|
p = 0 not in df.sum(axis=1).values
|
|
|
|
p = 0 not in df.sum(axis=1).values
|
|
|
|
x = df.sum(axis=1).values
|
|
|
|
x = df.sum(axis=1).values
|
|
|
|
print ( [np.sum(x),x.size])
|
|
|
|
|
|
|
|
if np.divide( np.sum(x), x.size) :
|
|
|
|
if np.divide( np.sum(x), x.size) > .9 or p:
|
|
|
|
|
|
|
|
ratio.append(np.divide( np.sum(x), x.size))
|
|
|
|
found.append(df)
|
|
|
|
found.append(df)
|
|
|
|
if len(found) == NTH_VALID_CANDIDATE or i == CANDIDATE_COUNT:
|
|
|
|
if i == CANDIDATE_COUNT:
|
|
|
|
break
|
|
|
|
break
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
continue
|
|
|
|
continue
|
|
|
@ -582,8 +593,9 @@ class Predict(GNet):
|
|
|
|
#
|
|
|
|
#
|
|
|
|
# In case we are dealing with actual values like diagnosis codes we can perform
|
|
|
|
# In case we are dealing with actual values like diagnosis codes we can perform
|
|
|
|
#
|
|
|
|
#
|
|
|
|
|
|
|
|
INDEX = np.random.choice(np.arange(len(found)),1)[0]
|
|
|
|
df = found[np.random.choice(np.arange(len(found)),1)[0]]
|
|
|
|
INDEX = ratio.index(np.max(ratio))
|
|
|
|
|
|
|
|
df = found[INDEX]
|
|
|
|
columns = self.ATTRIBUTES['synthetic'] if isinstance(self.ATTRIBUTES['synthetic'],list)else [self.ATTRIBUTES['synthetic']]
|
|
|
|
columns = self.ATTRIBUTES['synthetic'] if isinstance(self.ATTRIBUTES['synthetic'],list)else [self.ATTRIBUTES['synthetic']]
|
|
|
|
|
|
|
|
|
|
|
|
# r = np.zeros((self.ROW_COUNT,len(columns)))
|
|
|
|
# r = np.zeros((self.ROW_COUNT,len(columns)))
|
|
|
@ -592,9 +604,20 @@ class Predict(GNet):
|
|
|
|
if len(found):
|
|
|
|
if len(found):
|
|
|
|
print (len(found),NTH_VALID_CANDIDATE)
|
|
|
|
print (len(found),NTH_VALID_CANDIDATE)
|
|
|
|
# x = df * self.values
|
|
|
|
# x = df * self.values
|
|
|
|
|
|
|
|
#
|
|
|
|
df = pd.DataFrame( df.apply(lambda row: self.values[np.random.choice(np.where(row != 0)[0],1)[0]] ,axis=1))
|
|
|
|
# let's get the missing rows (if any) ...
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
|
|
ii = df.apply(lambda row: np.sum(row) == 0 ,axis=1)
|
|
|
|
|
|
|
|
if ii :
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
|
|
#@TODO Have this be a configurable variable
|
|
|
|
|
|
|
|
missing = np.repeat(0, np.where(ii==1)[0].size)
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
missing = []
|
|
|
|
|
|
|
|
i = np.where(ii == 0)[0]
|
|
|
|
|
|
|
|
df = pd.DataFrame( df.iloc.apply(lambda row: self.values[np.random.choice(np.where(row != 0)[0],1)[0]] ,axis=1))
|
|
|
|
df.columns = columns
|
|
|
|
df.columns = columns
|
|
|
|
|
|
|
|
df = df[columns[0]].append(pd.Series(missing))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|