|
|
@ -218,7 +218,13 @@ class Generator (Learner):
|
|
|
|
for values in batches :
|
|
|
|
for values in batches :
|
|
|
|
|
|
|
|
|
|
|
|
index = [ _x not in ['',None,np.nan] for _x in values]
|
|
|
|
index = [ _x not in ['',None,np.nan] for _x in values]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if len(index) == len(values):
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
|
|
# Sometimes messy data has unpleasant surprises
|
|
|
|
|
|
|
|
continue
|
|
|
|
_values = np.random.dirichlet(values[index].astype(_type))
|
|
|
|
_values = np.random.dirichlet(values[index].astype(_type))
|
|
|
|
|
|
|
|
|
|
|
|
values[index] = list(values[index] + _values )if np.random.randint(0,2) else list(values[index] - _values)
|
|
|
|
values[index] = list(values[index] + _values )if np.random.randint(0,2) else list(values[index] - _values)
|
|
|
|
values[index] = values[index].astype(_type)
|
|
|
|
values[index] = values[index].astype(_type)
|
|
|
|
x += values.tolist()
|
|
|
|
x += values.tolist()
|
|
|
@ -284,7 +290,7 @@ class Generator (Learner):
|
|
|
|
_df[name] = _df[name].astype('datetime64[ns]')
|
|
|
|
_df[name] = _df[name].astype('datetime64[ns]')
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
_df[name] = _df[name].astype(str)
|
|
|
|
_df[name] = _df[name].astype(str)
|
|
|
|
_df[name] = _df[name].replace('NaT','')
|
|
|
|
_df = _df.replace('NaT','')
|
|
|
|
if r :
|
|
|
|
if r :
|
|
|
|
self.log(**{'action':'format','input':r})
|
|
|
|
self.log(**{'action':'format','input':r})
|
|
|
|
return _df
|
|
|
|
return _df
|
|
|
|