| 
						
						
							
								
							
						
						
					 | 
					 | 
					@ -64,7 +64,7 @@ class Learner(Process):
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        # self.logpath= _args['logpath'] if 'logpath' in _args else 'logs'
 | 
					 | 
					 | 
					 | 
					        # self.logpath= _args['logpath'] if 'logpath' in _args else 'logs'
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        # sel.max_epoc
 | 
					 | 
					 | 
					 | 
					        # sel.max_epoc
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    def log(self,**_args):
 | 
					 | 
					 | 
					 | 
					    def log(self,**_args):
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.lock.acquire()
 | 
					 | 
					 | 
					 | 
					        # self.lock.acquire()
 | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        try:
 | 
					 | 
					 | 
					 | 
					        try:
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            logger = transport.factory.instance(**self.store['logger']) if 'logger' in self.store else transport.factory.instance(provider='console',context='write',lock=True)
 | 
					 | 
					 | 
					 | 
					            logger = transport.factory.instance(**self.store['logger']) if 'logger' in self.store else transport.factory.instance(provider='console',context='write',lock=True)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            _args = dict({'ndx':self.ndx,'module':self.name,'table':self.info['from'],'info':self.info['context'],**_args})
 | 
					 | 
					 | 
					 | 
					            _args = dict({'ndx':self.ndx,'module':self.name,'table':self.info['from'],'info':self.info['context'],**_args})
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -78,7 +78,8 @@ class Learner(Process):
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            print (e)
 | 
					 | 
					 | 
					 | 
					            print (e)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            pass
 | 
					 | 
					 | 
					 | 
					            pass
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        finally:
 | 
					 | 
					 | 
					 | 
					        finally:
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            self.lock.release()
 | 
					 | 
					 | 
					 | 
					            # self.lock.release()
 | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					            pass
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    def get_schema(self):
 | 
					 | 
					 | 
					 | 
					    def get_schema(self):
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        if self.store['source']['provider'] != 'bigquery' :
 | 
					 | 
					 | 
					 | 
					        if self.store['source']['provider'] != 'bigquery' :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            return [{'name':self._df.dtypes.index.tolist()[i],'type':self._df.dtypes.astype(str).tolist()[i]}for i in range(self._df.dtypes.shape[0])]
 | 
					 | 
					 | 
					 | 
					            return [{'name':self._df.dtypes.index.tolist()[i],'type':self._df.dtypes.astype(str).tolist()[i]}for i in range(self._df.dtypes.shape[0])]
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
							
								
							
						
						
					 | 
					 | 
					@ -222,7 +223,7 @@ class Generator (Learner):
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					                values[index] = values[index].astype(_type)
 | 
					 | 
					 | 
					 | 
					                values[index] = values[index].astype(_type)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					                x += values.tolist()
 | 
					 | 
					 | 
					 | 
					                x += values.tolist()
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            if x :   
 | 
					 | 
					 | 
					 | 
					            if x :   
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					                _log['input']['diff_pct'] = 100 * (1 - np.divide( (_df[name].dropna() == x).sum(),_df[name].dropna().size))
 | 
					 | 
					 | 
					 | 
					                _log['input']['identical_percentage'] = 100 * (1 - np.divide( (_df[name].dropna() == x).sum(),_df[name].dropna().size))
 | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					                _df[name] = x  #np.array(x,dtype=np.int64) if 'int' in _type  else np.arry(x,dtype=np.float64)
 | 
					 | 
					 | 
					 | 
					                _df[name] = x  #np.array(x,dtype=np.int64) if 'int' in _type  else np.arry(x,dtype=np.float64)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					                
 | 
					 | 
					 | 
					 | 
					                
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					                self.log(**_log)
 | 
					 | 
					 | 
					 | 
					                self.log(**_log)
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -243,14 +244,15 @@ class Generator (Learner):
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        day = np.random.randint(1,_end)
 | 
					 | 
					 | 
					 | 
					        day = np.random.randint(1,_end)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        #-- synthetic date
 | 
					 | 
					 | 
					 | 
					        #-- synthetic date
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        _date = datetime(year=year,month=month,day=day)
 | 
					 | 
					 | 
					 | 
					        _date = datetime(year=year,month=month,day=day) #,minute=0,hour=0,second=0)
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        FORMAT =  '%Y-%m-%d'
 | 
					 | 
					 | 
					 | 
					        FORMAT =  '%Y-%d-%m'
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        if 'format' in self.info and 'field' in _args and _args['field'] in self.info['format']:
 | 
					 | 
					 | 
					 | 
					        _name = _args['field'] if 'field' in _args else None
 | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					        if 'format' in self.info and _name in self.info['format']:
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            _name = _args['field']
 | 
					 | 
					 | 
					 | 
					            _name = _args['field']
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            FORMAT = self.info['format'][_name]
 | 
					 | 
					 | 
					 | 
					            FORMAT = self.info['format'][_name]
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        
 | 
					 | 
					 | 
					 | 
					        # print ([_name,FORMAT, _date.strftime(FORMAT)])
 | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        r = []
 | 
					 | 
					 | 
					 | 
					        r = []
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        if offset :
 | 
					 | 
					 | 
					 | 
					        if offset :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            r = [_date.strftime(FORMAT)]
 | 
					 | 
					 | 
					 | 
					            r = [_date.strftime(FORMAT)]
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
							
								
							
						
						
					 | 
					 | 
					@ -277,7 +279,7 @@ class Generator (Learner):
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					                r[name] = FORMAT
 | 
					 | 
					 | 
					 | 
					                r[name] = FORMAT
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					                
 | 
					 | 
					 | 
					 | 
					                
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					                
 | 
					 | 
					 | 
					 | 
					                
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					                _df[name] = pd.to_datetime(_df[name], format=FORMAT).astype(str) #.astype('datetime64[ns]')
 | 
					 | 
					 | 
					 | 
					                _df[name] = pd.to_datetime(_df[name], format=FORMAT).astype('datetime64[ns]')
 | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        if r :
 | 
					 | 
					 | 
					 | 
					        if r :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            self.log(**{'action':'format','input':r})
 | 
					 | 
					 | 
					 | 
					            self.log(**{'action':'format','input':r})
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        return _df
 | 
					 | 
					 | 
					 | 
					        return _df
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
							
								
							
						
						
					 | 
					 | 
					@ -308,12 +310,13 @@ class Generator (Learner):
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					                    years = _df[iname]
 | 
					 | 
					 | 
					 | 
					                    years = _df[iname]
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					                    _dates = [self.make_date(year=year,field=name) for year in years]
 | 
					 | 
					 | 
					 | 
					                    _dates = [self.make_date(year=year,field=name) for year in years]
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					                    if _dates :
 | 
					 | 
					 | 
					 | 
					                    if _dates :                        
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					                        _df[name] = _dates            
 | 
					 | 
					 | 
					 | 
					                        _df[name] = _dates
 | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            
 | 
					 | 
					 | 
					 | 
					            
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            _schema = self.get_schema()
 | 
					 | 
					 | 
					 | 
					            _schema = self.get_schema()
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            _schema = [{'name':_item.name,'type':_item.field_type} for _item in _schema]
 | 
					 | 
					 | 
					 | 
					            _schema = [{'name':_item.name,'type':_item.field_type} for _item in _schema]
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            _df = self.format(_df,_schema)
 | 
					 | 
					 | 
					 | 
					            _df = self.format(_df,_schema)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					            
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            writer.write(_df,schema=_schema)
 | 
					 | 
					 | 
					 | 
					            writer.write(_df,schema=_schema)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        
 | 
					 | 
					 | 
					 | 
					        
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.log(**{'action':'write','input':{'rows':N,'candidates':len(_candidates)}})
 | 
					 | 
					 | 
					 | 
					        self.log(**{'action':'write','input':{'rows':N,'candidates':len(_candidates)}})
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
						
					 | 
					 | 
					
 
 |