diff --git a/data/gan.py b/data/gan.py index ff51aa8..5559e4d 100644 --- a/data/gan.py +++ b/data/gan.py @@ -593,7 +593,7 @@ class Predict(GNet): # # df = pd.DataFrame(np.round(f)).astype(np.int32) - df = pd.DataFrame(np.round(f),dtype=np.uint8) + df = pd.DataFrame(np.round(f),dtype=int) p = 0 not in df.sum(axis=1).values x = df.sum(axis=1).values @@ -637,6 +637,8 @@ class Predict(GNet): if self.logger : info = {"found":len(found),"rows":df.shape[0],"cols":df.shape[1],"expected":len(self.values)} + if df.shape[1] > len(self.values) : + df = df.iloc[:len(self.values)] if INDEX > 0 : info =dict(info ,**{"selected":INDEX, "ratio": ratio[INDEX] }) else : diff --git a/pipeline.py b/pipeline.py index 7017592..12746fa 100644 --- a/pipeline.py +++ b/pipeline.py @@ -82,6 +82,9 @@ class Components : df = df.iloc[i] + # + # Certain columns need to be removed too large of a matrix + # # if df.shape[0] == 0 : # print ("CAN NOT TRAIN EMPTY DATASET ") # return @@ -130,7 +133,7 @@ class Components : self.generate(args) pass - + # @staticmethod def generate(self,args): """ @@ -171,7 +174,7 @@ class Components : i = np.random.choice(df.shape[0],max_rows,replace=False) df = df.iloc[i] - + # bounds = Components.split(df,MAX_ROWS,PART_SIZE) # if partition != '' : # columns = args['columns'] @@ -194,13 +197,15 @@ class Components : if df[name].isnull().sum() > 0 : df[name].fillna(0,inplace=True) else: - df[name] = df[name].astype(np.int64) + df[name] = df[name].astype(int) _dc = pd.DataFrame() # for mdf in df : - _args['data'] = df + _args['data'] = df + _dc = _dc.append(data.maker.generate(**_args)) + # # We need to post the generate the data in order to : # 1. compare immediately @@ -356,14 +361,7 @@ if __name__ == '__main__' : else: generator.generate(args) # Components.generate(args) - elif 'finalize' in args : - # - # This will finalize a given set of synthetic operations into a table - # - idataset = args['input'] if 'input' in args else 'io' #-- input dataset - odataset = args['output'] #-- output dataset - labels = [name.strip() for name in args['labels'].split(',') ] - + else: # DATA = np.array_split(DATA,PART_SIZE)