From 5d4c534faeac12c19ca39a1564c0ccd19b9a22cd Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Thu, 21 Apr 2022 10:14:00 -0500 Subject: [PATCH] bug fix: approximation null values --- data/maker/__init__.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/data/maker/__init__.py b/data/maker/__init__.py index cde3928..723991f 100644 --- a/data/maker/__init__.py +++ b/data/maker/__init__.py @@ -218,7 +218,13 @@ class Generator (Learner): for values in batches : index = [ _x not in ['',None,np.nan] for _x in values] - _values = np.random.dirichlet(values[index].astype(_type)) + + if len(index) == len(values): + # + # Sometimes messy data has unpleasant surprises + continue + _values = np.random.dirichlet(values[index].astype(_type)) + values[index] = list(values[index] + _values )if np.random.randint(0,2) else list(values[index] - _values) values[index] = values[index].astype(_type) x += values.tolist() @@ -284,7 +290,7 @@ class Generator (Learner): _df[name] = _df[name].astype('datetime64[ns]') else: _df[name] = _df[name].astype(str) - _df[name] = _df[name].replace('NaT','') + _df = _df.replace('NaT','') if r : self.log(**{'action':'format','input':r}) return _df