bug fix: approximation null values

dev
Steve Nyemba 3 years ago
parent 133b0120db
commit 5d4c534fae

@ -218,7 +218,13 @@ class Generator (Learner):
for values in batches : for values in batches :
index = [ _x not in ['',None,np.nan] for _x in values] index = [ _x not in ['',None,np.nan] for _x in values]
_values = np.random.dirichlet(values[index].astype(_type))
if len(index) == len(values):
#
# Sometimes messy data has unpleasant surprises
continue
_values = np.random.dirichlet(values[index].astype(_type))
values[index] = list(values[index] + _values )if np.random.randint(0,2) else list(values[index] - _values) values[index] = list(values[index] + _values )if np.random.randint(0,2) else list(values[index] - _values)
values[index] = values[index].astype(_type) values[index] = values[index].astype(_type)
x += values.tolist() x += values.tolist()
@ -284,7 +290,7 @@ class Generator (Learner):
_df[name] = _df[name].astype('datetime64[ns]') _df[name] = _df[name].astype('datetime64[ns]')
else: else:
_df[name] = _df[name].astype(str) _df[name] = _df[name].astype(str)
_df[name] = _df[name].replace('NaT','') _df = _df.replace('NaT','')
if r : if r :
self.log(**{'action':'format','input':r}) self.log(**{'action':'format','input':r})
return _df return _df

Loading…
Cancel
Save