bug fix: epochs, process control (generator)

dev
Steve L. Nyemba 5 years ago
parent a1ac97fbca
commit 6e0f89cd3c

@ -508,7 +508,7 @@ class Train (GNet):
logs.append({"epoch":epoch,"distance":-w_sum/(self.STEPS_PER_EPOCH*2) }) logs.append({"epoch":epoch,"distance":-w_sum/(self.STEPS_PER_EPOCH*2) })
# if epoch % self.MAX_EPOCHS == 0: # if epoch % self.MAX_EPOCHS == 0:
if epoch in [5,10,50, self.MAX_EPOCHS] : if epoch in [5,10,20,50,75, self.MAX_EPOCHS] :
# suffix = "-".join(self.ATTRIBUTES['synthetic']) if isinstance(self.ATTRIBUTES['synthetic'],list) else self.ATTRIBUTES['synthetic'] # suffix = "-".join(self.ATTRIBUTES['synthetic']) if isinstance(self.ATTRIBUTES['synthetic'],list) else self.ATTRIBUTES['synthetic']
suffix = self.get.suffix() suffix = self.get.suffix()
_name = os.sep.join([self.train_dir,suffix]) _name = os.sep.join([self.train_dir,suffix])

@ -178,13 +178,14 @@ class Components :
# #
info = {"module":"generate","action":"io.metrics","input":{"rows":data_comp.shape[0],"partition":partition,"logs":[]}} info = {"module":"generate","action":"io.metrics","input":{"rows":data_comp.shape[0],"partition":partition,"logs":[]}}
x = {} x = {}
for name in args['columns'] : # for name in args['columns'] :
ident = data_comp.apply(lambda row: 1*(row[name]==row[name+'_io']),axis=1).sum() # ident = data_comp.apply(lambda row: 1*(row[name]==row[name+'_io']),axis=1).sum()
count = data_comp[name].unique().size # count = data_comp[name].unique().size
_ident= data_comp.shape[1] - ident # _ident= data_comp.shape[1] - ident
_count= data_comp[name+'_io'].unique().size # _count= data_comp[name+'_io'].unique().size
# _count= len(set(data_comp[name+'_io'].values.tolist()))
info['input']['logs'] += [{"name":name,"identical":int(ident),"no_identical":int(_ident),"original_count":count,"synthetic_count":_count}]
# info['input']['logs'] += [{"name":name,"identical":int(ident),"no_identical":int(_ident),"original_count":count,"synthetic_count":_count}]
# for name in data_comp.columns.tolist() : # for name in data_comp.columns.tolist() :
# g = pd.DataFrame(data_comp.groupby([name]).size()) # g = pd.DataFrame(data_comp.groupby([name]).size())
# g.columns = ['counts'] # g.columns = ['counts']
@ -192,17 +193,17 @@ class Components :
# g.index = np.arange(g.shape[0]) # g.index = np.arange(g.shape[0])
# logs.append({"name":name,"counts": g.to_dict(orient='records')}) # logs.append({"name":name,"counts": g.to_dict(orient='records')})
# info['input']['logs'] = logs # info['input']['logs'] = logs
logger.write(info) # logger.write(info)
base_cols = list(set(_args['data'].columns) - set(args['columns'])) #-- rebuilt the dataset (and store it) base_cols = list(set(_args['data'].columns) - set(args['columns'])) #-- rebuilt the dataset (and store it)
cols = _dc.columns.tolist() cols = _dc.columns.tolist()
for name in cols : # for name in cols :
_args['data'][name] = _dc[name] # _args['data'][name] = _dc[name]
info = {"module":"generate","action":"io","input":{"rows":_dc[name].shape[0],"name":name}} # info = {"module":"generate","action":"io","input":{"rows":_dc[name].shape[0],"name":name}}
if partition != '' : # if partition != '' :
info['partition'] = int(partition) # info['partition'] = int(partition)
logger.write(info) # logger.write(info)
# filename = os.sep.join([log_folder,'output',name+'.csv']) # filename = os.sep.join([log_folder,'output',name+'.csv'])
# data_comp[[name]].to_csv(filename,index=False) # data_comp[[name]].to_csv(filename,index=False)

Loading…
Cancel
Save