From e5af702ddb4c04fa668f84018879c379f597e26d Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Mon, 24 Apr 2023 16:37:08 -0500 Subject: [PATCH] bug fixes: stages, other training/generation --- data/gan.py | 16 ++++++++++++--- data/maker/__init__.py | 31 +++++++++++++++++++++------- data/maker/apply.py | 2 +- data/maker/state/__init__.py | 39 +++++++++++++++++++++++++++++++----- 4 files changed, 72 insertions(+), 16 deletions(-) diff --git a/data/gan.py b/data/gan.py index d2cc3ea..a794ebe 100644 --- a/data/gan.py +++ b/data/gan.py @@ -103,11 +103,12 @@ class GNet : CHECKPOINT_SKIPS = int(args['checkpoint_skips']) if 'checkpoint_skips' in args else int(self.MAX_EPOCHS/10) CHECKPOINT_SKIPS = 1 if CHECKPOINT_SKIPS < 1 else CHECKPOINT_SKIPS + # if self.MAX_EPOCHS < 2*CHECKPOINT_SKIPS : # CHECKPOINT_SKIPS = 2 # self.CHECKPOINTS = [1,self.MAX_EPOCHS] + np.repeat( np.divide(self.MAX_EPOCHS,CHECKPOINT_SKIPS),CHECKPOINT_SKIPS ).cumsum().astype(int).tolist() self.CHECKPOINTS = np.repeat(CHECKPOINT_SKIPS, self.MAX_EPOCHS/ CHECKPOINT_SKIPS).cumsum().astype(int).tolist() - + self.ROW_COUNT = args['real'].shape[0] if 'real' in args else 100 self.CONTEXT = args['context'] self.ATTRIBUTES = {"id":args['column_id'] if 'column_id' in args else None,"synthetic":args['column'] if 'column' in args else None} @@ -287,8 +288,17 @@ class Generator (GNet): """ def __init__(self,**args): - GNet.__init__(self,**args) - self.discriminator = Discriminator(**args) + if 'trainer' not in args : + GNet.__init__(self,**args) + self.discriminator = Discriminator(**args) + else: + _args = {} + _trainer = args['trainer'] + for key in vars(_trainer) : + value = getattr(_trainer,key) + setattr(self,key,value) + _args[key] = value + self.discriminator = Discriminator(**_args) def loss(self,**args): fake = args['fake'] label = args['label'] diff --git a/data/maker/__init__.py b/data/maker/__init__.py index 7b3a347..5053e9b 100644 --- a/data/maker/__init__.py +++ b/data/maker/__init__.py @@ -33,6 +33,7 @@ class Learner(Process): super(Learner, self).__init__() + self._arch = {'init':_args} self.ndx = 0 self._queue = Queue() self.lock = RLock() @@ -44,6 +45,8 @@ class Learner(Process): self.gpu = None self.info = _args['info'] + if 'context' not in self.info : + self.info['context'] = self.info['from'] self.columns = self.info['columns'] if 'columns' in self.info else None self.store = _args['store'] @@ -97,9 +100,12 @@ class Learner(Process): # __info = (pd.DataFrame(self._states)[['name','path','args']]).to_dict(orient='records') if self._states : __info = {} - + # print (self._states) for key in self._states : - __info[key] = [{"name":_item['name'],"args":_item['args'],"path":_item['path']} for _item in self._states[key]] + _pipeline = self._states[key] + + # __info[key] = ([{'name':_payload['name']} for _payload in _pipeline]) + __info[key] = [{"name":_item['name'],"args":_item['args'],"path":_item['path']} for _item in self._states[key] if _item ] self.log(object='state-space',action='load',input=__info) @@ -270,18 +276,23 @@ class Trainer(Learner): # _epochs = [_e for _e in gTrain.logs['epochs'] if _e['path'] != ''] _epochs.sort(key=lambda _item: _item['loss'],reverse=False) - + _args['network_args']['max_epochs'] = _epochs[0]['epochs'] self.log(action='autopilot',input={'epoch':_epochs[0]}) - g = Generator(**_args) + # g.run() end = datetime.now() #.strftime('%Y-%m-%d %H:%M:%S') _min = float((end-beg).seconds/ 60) _logs = {'action':'train','input':{'start':beg.strftime('%Y-%m-%d %H:%M:%S'),'minutes':_min,"unique_counts":self._encoder._io[0]}} self.log(**_logs) - self._g = g - if self.autopilot : + + if self.autopilot : + + # g = Generator(**_args) + + g = Generator(**self._arch['init']) + self._g = g self._g.run() # #@TODO Find a way to have the data in the object .... @@ -300,10 +311,15 @@ class Generator (Learner): # # We need to load the mapping information for the space we are working with ... # + + self.network_args['candidates'] = int(_args['candidates']) if 'candidates' in _args else 1 - filename = os.sep.join([self.network_args['logs'],'output',self.network_args['context'],'map.json']) + # filename = os.sep.join([self.network_args['logs'],'output',self.network_args['context'],'map.json']) + _suffix = self.network_args['context'] + filename = os.sep.join([self.network_args['logs'],'output',self.network_args['context'],'meta-',_suffix,'.json']) self.log(**{'action':'init-map','input':{'filename':filename,'exists':os.path.exists(filename)}}) if os.path.exists(filename): + file = open(filename) self._map = json.loads(file.read()) file.close() @@ -580,6 +596,7 @@ class factory : """ + # if _args['apply'] in [apply.RANDOM] : pthread = Shuffle(**_args) diff --git a/data/maker/apply.py b/data/maker/apply.py index bb6a085..58ae094 100644 --- a/data/maker/apply.py +++ b/data/maker/apply.py @@ -69,7 +69,7 @@ class Date(Post): """ """ - pass + pass class Approximate(Post): def apply(**_args): pass diff --git a/data/maker/state/__init__.py b/data/maker/state/__init__.py index adf9837..f1b8da0 100644 --- a/data/maker/state/__init__.py +++ b/data/maker/state/__init__.py @@ -31,12 +31,22 @@ class State : continue pointer = _item['module'] - _args = _item['args'] + + if type(pointer).__name__ != 'function': + _args = _item['args'] if 'args' in _item else {} + else: + pointer = _item['module'] + + _args = _item['args'] if 'args' in _item else {} + _data = pointer(_data,_args) return _data @staticmethod def instance(_args): + """ + + """ pre = [] post=[] @@ -45,8 +55,20 @@ class State : # # If the item has a path property is should be ignored path = _args[key]['path'] if 'path' in _args[key] else '' - out[key] = [ State._build(dict(_item,**{'path':path})) if 'path' not in _item else State._build(_item) for _item in _args[key]['pipeline']] - + # out[key] = [ State._build(dict(_item,**{'path':path})) if 'path' not in _item else State._build(_item) for _item in _args[key]['pipeline']] + out[key] = [] + for _item in _args[key]['pipeline'] : + + if type(_item).__name__ == 'function': + _stageInfo = {'module':_item,'name':_item.__name__,'args':{},'path':''} + pass + else: + if 'path' in _item : + _stageInfo = State._build(dict(_item,**{'path':path})) + else : + _stageInfo= State._build(_item) + out[key].append(_stageInfo) + # print ([out]) return out # if 'pre' in _args: # path = _args['pre']['path'] if 'path' in _args['pre'] else '' @@ -68,11 +90,18 @@ class State : pass @staticmethod def _build(_args): - + """ + This function builds the object {module,path} where module is extracted from a file (if needed) + :param _args dictionary containing attributes that can be value pair + It can also be a function + """ + # + # In the advent an actual pointer is passed we should do the following + _info = State._extract(_args) # _info = dict(_args,**_info) - _info['module'] = State._instance(_info) + _info['module'] = State._instance(_info) return _info if _info['module'] is not None else None @staticmethod