diff --git a/pipeline.py b/pipeline.py index 04658da..fd5a28e 100644 --- a/pipeline.py +++ b/pipeline.py @@ -33,7 +33,7 @@ class Components : if 'limit' in args : SQL = SQL + 'LIMIT ' + args['limit'] credentials = service_account.Credentials.from_service_account_file('/home/steve/dev/aou/accounts/curation-prod.json') - df = pd.read_gbq(SQL,credentials=credentials,dialect='standard').dropna() + df = pd.read_gbq(SQL,credentials=credentials,dialect='standard') return df # return lambda: pd.read_gbq(SQL,credentials=credentials,dialect='standard')[args['columns']].dropna() @@ -51,7 +51,9 @@ class Components : # @TODO: we need to log something here about the parameters being passed pointer = args['reader'] if 'reader' in args else lambda: Components.get(**args) df = pointer() - + if df.shape[0] == 0 : + print ("CAN NOT TRAIN EMPTY DATASET ") + return # # Now we can parse the arguments and submit the entire thing to training #