From 79b83c71d5043427c37bd81f3beebc4637fac9eb Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Wed, 12 May 2021 10:14:53 -0500 Subject: [PATCH] bug fix: date, hack put in place --- pipeline.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/pipeline.py b/pipeline.py index 252a850..b6e808f 100644 --- a/pipeline.py +++ b/pipeline.py @@ -251,13 +251,16 @@ class Components : _value = 0 if _item['type'] in ['DATE','TIMESTAMP','DATETIMESTAMP','DATETIME'] : if _item['type'] == 'DATE' : - _df[name] = _df[name].dt.date - _df[name] = pd.to_datetime(_df[name],errors='coerce') - - - - - + # + # There is an issue with missing dates that needs to be resolved. + # for some reason a missing date/time here will cause the types to turn into timestamp (problem) + # The following is a hack to address the issue (alas) assuming 10 digit dates and 'NaT' replaces missing date values (pandas specifications) + # + _df[name] = _df[name].apply(lambda value: '' if str(value) == 'NaT' else str(value)[:10]) + #_df[name] = _df[name].dt.date + # _df[name] = pd.to_datetime(_df[name].fillna(''),errors='coerce') + else: + print ([' ** ',name,_item['type']]) else: if _item['type'] == 'INTEGER' : _type = np.int64