|
|
@ -22,10 +22,12 @@ from multiprocessing import Process, RLock
|
|
|
|
from datetime import datetime, timedelta
|
|
|
|
from datetime import datetime, timedelta
|
|
|
|
|
|
|
|
|
|
|
|
class Learner(Process):
|
|
|
|
class Learner(Process):
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self,**_args):
|
|
|
|
def __init__(self,**_args):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
super(Learner, self).__init__()
|
|
|
|
super(Learner, self).__init__()
|
|
|
|
|
|
|
|
self.ndx = 0
|
|
|
|
if 'gpu' in _args :
|
|
|
|
if 'gpu' in _args :
|
|
|
|
|
|
|
|
|
|
|
|
os.environ['CUDA_VISIBLE_DEVICES'] = str(_args['gpu'])
|
|
|
|
os.environ['CUDA_VISIBLE_DEVICES'] = str(_args['gpu'])
|
|
|
@ -49,19 +51,22 @@ class Learner(Process):
|
|
|
|
self._encoder = None
|
|
|
|
self._encoder = None
|
|
|
|
self._map = None
|
|
|
|
self._map = None
|
|
|
|
self._df = _args['data'] if 'data' in _args else None
|
|
|
|
self._df = _args['data'] if 'data' in _args else None
|
|
|
|
self.name = self.__class__.__name__+'::'+self.info['context']+'::'+self.info['from']
|
|
|
|
self.name = self.__class__.__name__+'::'+self.info['from']
|
|
|
|
|
|
|
|
self.name = self.name.replace('?','')
|
|
|
|
#
|
|
|
|
#
|
|
|
|
# @TODO: allow for verbose mode so we have a sens of what is going on within the newtork
|
|
|
|
# @TODO: allow for verbose mode so we have a sens of what is going on within the newtork
|
|
|
|
#
|
|
|
|
#
|
|
|
|
|
|
|
|
|
|
|
|
_log = {'module':self.name,'action':'init','context':self.info['context'],'gpu':(self.gpu if self.gpu is not None else -1)}
|
|
|
|
_log = {'action':'init','context':self.info['context'],'gpu':(self.gpu if self.gpu is not None else -1)}
|
|
|
|
self.log(**_log)
|
|
|
|
self.log(**_log)
|
|
|
|
|
|
|
|
|
|
|
|
# self.logpath= _args['logpath'] if 'logpath' in _args else 'logs'
|
|
|
|
# self.logpath= _args['logpath'] if 'logpath' in _args else 'logs'
|
|
|
|
# sel.max_epoc
|
|
|
|
# sel.max_epoc
|
|
|
|
def log(self,**_args):
|
|
|
|
def log(self,**_args):
|
|
|
|
logger = transport.factory.instance(**self.store['logger']) if 'logger' in self.store else transport.factory.instance(provider='console',context='write',lock=True)
|
|
|
|
logger = transport.factory.instance(**self.store['logger']) if 'logger' in self.store else transport.factory.instance(provider='console',context='write',lock=True)
|
|
|
|
|
|
|
|
_args = dict({'ndx':self.ndx,'module':self.name,'info':self.info['context'],**_args})
|
|
|
|
logger.write(_args)
|
|
|
|
logger.write(_args)
|
|
|
|
|
|
|
|
self.ndx += 1
|
|
|
|
if hasattr(logger,'close') :
|
|
|
|
if hasattr(logger,'close') :
|
|
|
|
logger.close()
|
|
|
|
logger.close()
|
|
|
|
|
|
|
|
|
|
|
@ -85,7 +90,7 @@ class Learner(Process):
|
|
|
|
_args['map'] = self._map
|
|
|
|
_args['map'] = self._map
|
|
|
|
self._encoder = prepare.Input(**_args) if self._df.shape[0] > 0 else None
|
|
|
|
self._encoder = prepare.Input(**_args) if self._df.shape[0] > 0 else None
|
|
|
|
|
|
|
|
|
|
|
|
_log = {'module':self.name,'action':'data-prep','input':{'rows':self._df.shape[0],'cols':self._df.shape[1]} }
|
|
|
|
_log = {'action':'data-prep','input':{'rows':self._df.shape[0],'cols':self._df.shape[1]} }
|
|
|
|
self.log(**_log)
|
|
|
|
self.log(**_log)
|
|
|
|
class Trainer(Learner):
|
|
|
|
class Trainer(Learner):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
@ -134,7 +139,7 @@ class Trainer(Learner):
|
|
|
|
# g.run()
|
|
|
|
# g.run()
|
|
|
|
|
|
|
|
|
|
|
|
end = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
|
|
|
end = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
|
|
|
_logs = {'module':self.name,'action':'train','input':{'start':beg,'end':end}}
|
|
|
|
_logs = {'action':'train','input':{'start':beg,'end':end,"unique_counts":self._encoder._io[0]}}
|
|
|
|
self.log(**_logs)
|
|
|
|
self.log(**_logs)
|
|
|
|
self.generate = g
|
|
|
|
self.generate = g
|
|
|
|
if self.autopilot :
|
|
|
|
if self.autopilot :
|
|
|
@ -180,7 +185,7 @@ class Generator (Learner):
|
|
|
|
_candidates= [ self._encoder.revert(matrix=_item) for _item in _iomatrix]
|
|
|
|
_candidates= [ self._encoder.revert(matrix=_item) for _item in _iomatrix]
|
|
|
|
|
|
|
|
|
|
|
|
_size = np.sum([len(_item) for _item in _iomatrix])
|
|
|
|
_size = np.sum([len(_item) for _item in _iomatrix])
|
|
|
|
_log = {'module':self.name,'action':'io-data','input':{'candidates':len(_candidates),'rows':int(_size)}}
|
|
|
|
_log = {'action':'io-data','input':{'candidates':len(_candidates),'rows':int(_size)}}
|
|
|
|
self.log(**_log)
|
|
|
|
self.log(**_log)
|
|
|
|
self.post(_candidates)
|
|
|
|
self.post(_candidates)
|
|
|
|
def approximate(self,_df):
|
|
|
|
def approximate(self,_df):
|
|
|
@ -195,7 +200,7 @@ class Generator (Learner):
|
|
|
|
batches = np.array_split(_df[name].fillna(np.nan).values,BATCH_SIZE)
|
|
|
|
batches = np.array_split(_df[name].fillna(np.nan).values,BATCH_SIZE)
|
|
|
|
_type = np.int64 if 'int' in self.info['approximate'][name]else np.float64
|
|
|
|
_type = np.int64 if 'int' in self.info['approximate'][name]else np.float64
|
|
|
|
x = []
|
|
|
|
x = []
|
|
|
|
_log = {'module':self.name,'action':'approximate','input':{'batch':BATCH_SIZE,'col':name}}
|
|
|
|
_log = {'action':'approximate','input':{'batch':BATCH_SIZE,'col':name}}
|
|
|
|
for values in batches :
|
|
|
|
for values in batches :
|
|
|
|
|
|
|
|
|
|
|
|
index = [ _x not in ['',None,np.nan] for _x in values]
|
|
|
|
index = [ _x not in ['',None,np.nan] for _x in values]
|
|
|
@ -285,7 +290,7 @@ class Generator (Learner):
|
|
|
|
_df = self.format(_df,_schema)
|
|
|
|
_df = self.format(_df,_schema)
|
|
|
|
writer.write(_df,schema=_schema)
|
|
|
|
writer.write(_df,schema=_schema)
|
|
|
|
|
|
|
|
|
|
|
|
self.log(**{'module':self.name,'action':'write','input':{'rows':N,'candidates':len(_candidates)}})
|
|
|
|
self.log(**{'action':'write','input':{'rows':N,'candidates':len(_candidates)}})
|
|
|
|
class factory :
|
|
|
|
class factory :
|
|
|
|
_infocache = {}
|
|
|
|
_infocache = {}
|
|
|
|
@staticmethod
|
|
|
|
@staticmethod
|
|
|
|