diff --git a/pipeline.py b/pipeline.py index ece3030..da7b27e 100644 --- a/pipeline.py +++ b/pipeline.py @@ -216,8 +216,22 @@ class Components : # # # # This will account for autopilot mode ... # df = args['data'] - - _info = {"module":"gan-prep","action":"read","shape":{"rows":df.shape[0],"columns":df.shape[1]}} + _cast = {} + if schema : + dtype = str + name = schema['name'] + novalue = -1 + if schema['type'] == 'INTEGER' : + dtype = np.int64 + + elif schema['type'] == 'FLOAT' : + dtype = np.float64 + else: + novalue = '' + _cast[schema['name']] = dtype + df[name] = df[name].fillna(novalue).astype(dtype) + + _info = {"module":"gan-prep","action":"read","shape":{"rows":df.shape[0],"columns":df.shape[1]},"schema":schema} logger.write(_info)