From 73115724fe2d91d4ffbc5b21dec50c24d6963480 Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Sun, 4 Apr 2021 12:05:23 -0500 Subject: [PATCH] ... --- pipeline.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/pipeline.py b/pipeline.py index ece3030..da7b27e 100644 --- a/pipeline.py +++ b/pipeline.py @@ -216,8 +216,22 @@ class Components : # # # # This will account for autopilot mode ... # df = args['data'] - - _info = {"module":"gan-prep","action":"read","shape":{"rows":df.shape[0],"columns":df.shape[1]}} + _cast = {} + if schema : + dtype = str + name = schema['name'] + novalue = -1 + if schema['type'] == 'INTEGER' : + dtype = np.int64 + + elif schema['type'] == 'FLOAT' : + dtype = np.float64 + else: + novalue = '' + _cast[schema['name']] = dtype + df[name] = df[name].fillna(novalue).astype(dtype) + + _info = {"module":"gan-prep","action":"read","shape":{"rows":df.shape[0],"columns":df.shape[1]},"schema":schema} logger.write(_info)