diff --git a/data/maker/__init__.py b/data/maker/__init__.py index ff93104..807bd84 100644 --- a/data/maker/__init__.py +++ b/data/maker/__init__.py @@ -22,10 +22,12 @@ from multiprocessing import Process, RLock from datetime import datetime, timedelta class Learner(Process): + def __init__(self,**_args): super(Learner, self).__init__() + self.ndx = 0 if 'gpu' in _args : os.environ['CUDA_VISIBLE_DEVICES'] = str(_args['gpu']) @@ -49,19 +51,22 @@ class Learner(Process): self._encoder = None self._map = None self._df = _args['data'] if 'data' in _args else None - self.name = self.__class__.__name__+'::'+self.info['context']+'::'+self.info['from'] + self.name = self.__class__.__name__+'::'+self.info['from'] + self.name = self.name.replace('?','') # # @TODO: allow for verbose mode so we have a sens of what is going on within the newtork # - _log = {'module':self.name,'action':'init','context':self.info['context'],'gpu':(self.gpu if self.gpu is not None else -1)} + _log = {'action':'init','context':self.info['context'],'gpu':(self.gpu if self.gpu is not None else -1)} self.log(**_log) # self.logpath= _args['logpath'] if 'logpath' in _args else 'logs' # sel.max_epoc def log(self,**_args): logger = transport.factory.instance(**self.store['logger']) if 'logger' in self.store else transport.factory.instance(provider='console',context='write',lock=True) + _args = dict({'ndx':self.ndx,'module':self.name,'info':self.info['context'],**_args}) logger.write(_args) + self.ndx += 1 if hasattr(logger,'close') : logger.close() @@ -85,7 +90,7 @@ class Learner(Process): _args['map'] = self._map self._encoder = prepare.Input(**_args) if self._df.shape[0] > 0 else None - _log = {'module':self.name,'action':'data-prep','input':{'rows':self._df.shape[0],'cols':self._df.shape[1]} } + _log = {'action':'data-prep','input':{'rows':self._df.shape[0],'cols':self._df.shape[1]} } self.log(**_log) class Trainer(Learner): """ @@ -134,7 +139,7 @@ class Trainer(Learner): # g.run() end = datetime.now().strftime('%Y-%m-%d %H:%M:%S') - _logs = {'module':self.name,'action':'train','input':{'start':beg,'end':end}} + _logs = {'action':'train','input':{'start':beg,'end':end,"unique_counts":self._encoder._io[0]}} self.log(**_logs) self.generate = g if self.autopilot : @@ -180,7 +185,7 @@ class Generator (Learner): _candidates= [ self._encoder.revert(matrix=_item) for _item in _iomatrix] _size = np.sum([len(_item) for _item in _iomatrix]) - _log = {'module':self.name,'action':'io-data','input':{'candidates':len(_candidates),'rows':int(_size)}} + _log = {'action':'io-data','input':{'candidates':len(_candidates),'rows':int(_size)}} self.log(**_log) self.post(_candidates) def approximate(self,_df): @@ -195,7 +200,7 @@ class Generator (Learner): batches = np.array_split(_df[name].fillna(np.nan).values,BATCH_SIZE) _type = np.int64 if 'int' in self.info['approximate'][name]else np.float64 x = [] - _log = {'module':self.name,'action':'approximate','input':{'batch':BATCH_SIZE,'col':name}} + _log = {'action':'approximate','input':{'batch':BATCH_SIZE,'col':name}} for values in batches : index = [ _x not in ['',None,np.nan] for _x in values] @@ -285,7 +290,7 @@ class Generator (Learner): _df = self.format(_df,_schema) writer.write(_df,schema=_schema) - self.log(**{'module':self.name,'action':'write','input':{'rows':N,'candidates':len(_candidates)}}) + self.log(**{'action':'write','input':{'rows':N,'candidates':len(_candidates)}}) class factory : _infocache = {} @staticmethod diff --git a/data/maker/prepare/__init__.py b/data/maker/prepare/__init__.py index 478d435..bc316e9 100644 --- a/data/maker/prepare/__init__.py +++ b/data/maker/prepare/__init__.py @@ -90,11 +90,14 @@ class Input : # else: # # We will look into the count and make a judgment call - _df = pd.DataFrame(self.df.apply(lambda col: col.dropna().unique().size )).T - MIN_SPACE_SIZE = 2 - self._columns = cols if cols else _df.apply(lambda col:None if col[0] == row_count or col[0] < MIN_SPACE_SIZE else col.name).dropna().tolist() - self._io = _df.to_dict(orient='records') - + try: + _df = pd.DataFrame(self.df.apply(lambda col: col.dropna().unique().size )).T + MIN_SPACE_SIZE = 2 + self._columns = cols if cols else _df.apply(lambda col:None if col[0] == row_count or col[0] < MIN_SPACE_SIZE else col.name).dropna().tolist() + self._io = _df.to_dict(orient='records') + except Exception as e: + print (e) + self._io = [] def _initdata(self,**_args): """ This function will initialize the class with a data-frame and columns of interest (if any)