|
|
@ -82,6 +82,9 @@ class Components :
|
|
|
|
df = df.iloc[i]
|
|
|
|
df = df.iloc[i]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
|
|
# Certain columns need to be removed too large of a matrix
|
|
|
|
|
|
|
|
#
|
|
|
|
# if df.shape[0] == 0 :
|
|
|
|
# if df.shape[0] == 0 :
|
|
|
|
# print ("CAN NOT TRAIN EMPTY DATASET ")
|
|
|
|
# print ("CAN NOT TRAIN EMPTY DATASET ")
|
|
|
|
# return
|
|
|
|
# return
|
|
|
@ -130,7 +133,7 @@ class Components :
|
|
|
|
self.generate(args)
|
|
|
|
self.generate(args)
|
|
|
|
|
|
|
|
|
|
|
|
pass
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
# @staticmethod
|
|
|
|
# @staticmethod
|
|
|
|
def generate(self,args):
|
|
|
|
def generate(self,args):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
@ -171,7 +174,7 @@ class Components :
|
|
|
|
i = np.random.choice(df.shape[0],max_rows,replace=False)
|
|
|
|
i = np.random.choice(df.shape[0],max_rows,replace=False)
|
|
|
|
df = df.iloc[i]
|
|
|
|
df = df.iloc[i]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# bounds = Components.split(df,MAX_ROWS,PART_SIZE)
|
|
|
|
# bounds = Components.split(df,MAX_ROWS,PART_SIZE)
|
|
|
|
# if partition != '' :
|
|
|
|
# if partition != '' :
|
|
|
|
# columns = args['columns']
|
|
|
|
# columns = args['columns']
|
|
|
@ -194,13 +197,15 @@ class Components :
|
|
|
|
if df[name].isnull().sum() > 0 :
|
|
|
|
if df[name].isnull().sum() > 0 :
|
|
|
|
df[name].fillna(0,inplace=True)
|
|
|
|
df[name].fillna(0,inplace=True)
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
df[name] = df[name].astype(np.int64)
|
|
|
|
df[name] = df[name].astype(int)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_dc = pd.DataFrame()
|
|
|
|
_dc = pd.DataFrame()
|
|
|
|
# for mdf in df :
|
|
|
|
# for mdf in df :
|
|
|
|
_args['data'] = df
|
|
|
|
_args['data'] = df
|
|
|
|
|
|
|
|
|
|
|
|
_dc = _dc.append(data.maker.generate(**_args))
|
|
|
|
_dc = _dc.append(data.maker.generate(**_args))
|
|
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
#
|
|
|
|
# We need to post the generate the data in order to :
|
|
|
|
# We need to post the generate the data in order to :
|
|
|
|
# 1. compare immediately
|
|
|
|
# 1. compare immediately
|
|
|
@ -356,14 +361,7 @@ if __name__ == '__main__' :
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
generator.generate(args)
|
|
|
|
generator.generate(args)
|
|
|
|
# Components.generate(args)
|
|
|
|
# Components.generate(args)
|
|
|
|
elif 'finalize' in args :
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
|
|
# This will finalize a given set of synthetic operations into a table
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
|
|
idataset = args['input'] if 'input' in args else 'io' #-- input dataset
|
|
|
|
|
|
|
|
odataset = args['output'] #-- output dataset
|
|
|
|
|
|
|
|
labels = [name.strip() for name in args['labels'].split(',') ]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
|
|
|
|
|
|
|
|
# DATA = np.array_split(DATA,PART_SIZE)
|
|
|
|
# DATA = np.array_split(DATA,PART_SIZE)
|
|
|
|