diff --git a/data/maker/__init__.py b/data/maker/__init__.py index 2921b46..184bca4 100644 --- a/data/maker/__init__.py +++ b/data/maker/__init__.py @@ -64,7 +64,7 @@ class Learner(Process): # self.logpath= _args['logpath'] if 'logpath' in _args else 'logs' # sel.max_epoc def log(self,**_args): - self.lock.acquire() + # self.lock.acquire() try: logger = transport.factory.instance(**self.store['logger']) if 'logger' in self.store else transport.factory.instance(provider='console',context='write',lock=True) _args = dict({'ndx':self.ndx,'module':self.name,'table':self.info['from'],'info':self.info['context'],**_args}) @@ -78,7 +78,8 @@ class Learner(Process): print (e) pass finally: - self.lock.release() + # self.lock.release() + pass def get_schema(self): if self.store['source']['provider'] != 'bigquery' : return [{'name':self._df.dtypes.index.tolist()[i],'type':self._df.dtypes.astype(str).tolist()[i]}for i in range(self._df.dtypes.shape[0])] @@ -222,7 +223,7 @@ class Generator (Learner): values[index] = values[index].astype(_type) x += values.tolist() if x : - _log['input']['diff_pct'] = 100 * (1 - np.divide( (_df[name].dropna() == x).sum(),_df[name].dropna().size)) + _log['input']['identical_percentage'] = 100 * (1 - np.divide( (_df[name].dropna() == x).sum(),_df[name].dropna().size)) _df[name] = x #np.array(x,dtype=np.int64) if 'int' in _type else np.arry(x,dtype=np.float64) self.log(**_log) @@ -243,14 +244,15 @@ class Generator (Learner): day = np.random.randint(1,_end) #-- synthetic date - _date = datetime(year=year,month=month,day=day) - FORMAT = '%Y-%m-%d' - if 'format' in self.info and 'field' in _args and _args['field'] in self.info['format']: + _date = datetime(year=year,month=month,day=day) #,minute=0,hour=0,second=0) + FORMAT = '%Y-%d-%m' + _name = _args['field'] if 'field' in _args else None + if 'format' in self.info and _name in self.info['format']: _name = _args['field'] FORMAT = self.info['format'][_name] - + # print ([_name,FORMAT, _date.strftime(FORMAT)]) r = [] if offset : r = [_date.strftime(FORMAT)] @@ -277,7 +279,7 @@ class Generator (Learner): r[name] = FORMAT - _df[name] = pd.to_datetime(_df[name], format=FORMAT).astype(str) #.astype('datetime64[ns]') + _df[name] = pd.to_datetime(_df[name], format=FORMAT).astype('datetime64[ns]') if r : self.log(**{'action':'format','input':r}) return _df @@ -308,12 +310,13 @@ class Generator (Learner): years = _df[iname] _dates = [self.make_date(year=year,field=name) for year in years] - if _dates : - _df[name] = _dates + if _dates : + _df[name] = _dates _schema = self.get_schema() _schema = [{'name':_item.name,'type':_item.field_type} for _item in _schema] _df = self.format(_df,_schema) + writer.write(_df,schema=_schema) self.log(**{'action':'write','input':{'rows':N,'candidates':len(_candidates)}})