From 0f0c2642c2e8d1d3a2463c6945c18441a7392691 Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Tue, 18 Feb 2020 02:59:39 -0600 Subject: [PATCH] bug fix with binary matrix generation --- data/bridge.py | 8 +++++--- data/gan.py | 8 +------- data/maker/__init__.py | 12 +++++++++--- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/data/bridge.py b/data/bridge.py index fa323af..019f065 100644 --- a/data/bridge.py +++ b/data/bridge.py @@ -191,12 +191,13 @@ class Binary : # # This will give us a map of how each column was mapped to a bitstream - _map = df.fillna(np.nan).apply(lambda column: self.__stream(column),axis=0) + # _map = df.fillna(np.nan).apply(lambda column: self.__stream(column),axis=0) + _map = df.fillna('').apply(lambda column: self.__stream(column),axis=0) # # We will merge this to have a healthy matrix _matrix = _map.apply(lambda row: list(list(itertools.chain(*row.values.tolist()))),axis=1) - _matrix = np.matrix([list(item) for item in _matrix]) + _matrix = np.matrix([list(item) for item in _matrix]).astype(np.float32) # # let's format the map so we don't have an unreasonable amount of data # @@ -210,7 +211,8 @@ class Binary : _m[name] = {"start":beg,"end":end} beg = end - return _m,_matrix.astype(np.float32) + # return _m,_matrix.astype(np.float32) + return _matrix def Import(self,df,values,_map): """ diff --git a/data/gan.py b/data/gan.py index 367d63c..3d600a3 100644 --- a/data/gan.py +++ b/data/gan.py @@ -397,17 +397,13 @@ class Train (GNet): labels_placeholder = tf.compat.v1.placeholder(shape=self._LABEL.shape, dtype=tf.float32) dataset = tf.data.Dataset.from_tensor_slices((features_placeholder, labels_placeholder)) dataset = dataset.repeat(10000) - dataset = dataset.batch(batch_size=self.BATCHSIZE_PER_GPU) + dataset = dataset.batch(batch_size=3000) dataset = dataset.prefetch(1) # iterator = dataset.make_initializable_iterator() iterator = tf.compat.v1.data.make_initializable_iterator(dataset) - # next_element = iterator.get_next() - # init_op = iterator.initializer return iterator, features_placeholder, labels_placeholder def network(self,**args): - # def graph(stage, opt): - # global_step = tf.get_variable(stage+'_step', [], initializer=tf.constant_initializer(0), trainable=False) stage = args['stage'] opt = args['opt'] tower_grads = [] @@ -540,8 +536,6 @@ class Predict(GNet): # The code below will insure we have some acceptable cardinal relationships between id and synthetic values # df = ( pd.DataFrame(np.round(f).astype(np.int32))) - print (df.head()) - print () p = 0 not in df.sum(axis=1).values if p: diff --git a/data/maker/__init__.py b/data/maker/__init__.py index 12abc8d..74ae718 100644 --- a/data/maker/__init__.py +++ b/data/maker/__init__.py @@ -12,6 +12,7 @@ import pandas as pd import numpy as np import data.gan as gan from transport import factory +from data.bridge import Binary import threading as thread def train (**args) : """ @@ -32,9 +33,12 @@ def train (**args) : # If we have several columns we will proceed one at a time (it could be done in separate threads) # @TODO : Consider performing this task on several threads/GPUs simulataneously # - args['label'] = pd.get_dummies(df[column_id]).astype(np.float32).values + handler = Binary() + # args['label'] = pd.get_dummies(df[column_id]).astype(np.float32).values + args['label'] = handler.Export(df[[column_id]]) for col in column : - args['real'] = pd.get_dummies(df[col]).astype(np.float32).values + # args['real'] = pd.get_dummies(df[col]).astype(np.float32).values + args['real'] = handler.Export(df[[col]]) args['column'] = col args['context'] = col context = args['context'] @@ -77,7 +81,9 @@ def generate(**args): #@TODO: # If the identifier is not present, we should fine a way to determine or make one # - args['label'] = pd.get_dummies(df[column_id]).astype(np.float32).values + # args['label'] = pd.get_dummies(df[column_id]).astype(np.float32).values + bwrangler = Binary() + args['label'] = bwrangler.Export(df[[column_id]]) _df = df.copy() for col in column : args['context'] = col