bug fix ...

pull/1/head
Steve L. Nyemba 4 years ago
parent 687ffec215
commit afcc5ed690

@ -8,12 +8,12 @@ def read(fname):
return open(os.path.join(os.path.dirname(__file__), fname)).read() return open(os.path.join(os.path.dirname(__file__), fname)).read()
args = { args = {
"name":"data-transport", "name":"data-transport",
"version":"1.3.8", "version":"1.3.8.1",
"author":"The Phi Technology LLC","author_email":"info@the-phi.com", "author":"The Phi Technology LLC","author_email":"info@the-phi.com",
"license":"MIT", "license":"MIT",
"packages":["transport"]} "packages":["transport"]}
args["keywords"]=['mongodb','couchdb','rabbitmq','file','read','write','s3','sqlite'] args["keywords"]=['mongodb','couchdb','rabbitmq','file','read','write','s3','sqlite']
args["install_requires"] = ['pymongo','numpy','cloudant','pika','boto','google-cloud-bigquery','flask-session','smart_open','botocore','psycopg2-binary','mysql-connector-python'] args["install_requires"] = ['pymongo','numpy','cloudant','pika','boto3','boto','pyarrow','google-cloud-bigquery','google-cloud-bigquery-storage','flask-session','smart_open','botocore','psycopg2-binary','mysql-connector-python']
args["url"] = "https://healthcareio.the-phi.com/git/code/transport.git" args["url"] = "https://healthcareio.the-phi.com/git/code/transport.git"
if sys.version_info[0] == 2 : if sys.version_info[0] == 2 :

@ -18,9 +18,13 @@ else:
from common import Reader,Writer from common import Reader,Writer
import json import json
from google.oauth2 import service_account from google.oauth2 import service_account
from google.cloud import bigquery as bq
from multiprocessing import Lock from multiprocessing import Lock
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import copy
class SQLRW : class SQLRW :
PROVIDERS = {"postgresql":"5432","redshift":"5432","mysql":"3306","mariadb":"3306"} PROVIDERS = {"postgresql":"5432","redshift":"5432","mysql":"3306","mariadb":"3306"}
DRIVERS = {"postgresql":pg,"redshift":pg,"mysql":my,"mariadb":my} DRIVERS = {"postgresql":pg,"redshift":pg,"mysql":my,"mariadb":my}
@ -177,9 +181,32 @@ class SQLWriter(SQLRW,Writer):
pass pass
class BigQuery: class BigQuery:
def __init__(self,**_args): def __init__(self,**_args):
path = _args['service_key'] path = _args['service_key'] if 'service_key' in _args else _args['private_key']
self.credentials = service_account.Credentials.from_service_account_file(path) self.credentials = service_account.Credentials.from_service_account_file(path)
self.dataset = _args['dataset'] if 'dataset' in _args else None
self.path = path
def meta(self,**_args):
"""
This function returns meta data for a given table or query with dataset/table properly formatted
:param table name of the name WITHOUT including dataset
:param sql sql query to be pulled,
"""
#if 'table' in _args :
# sql = "SELECT * from :dataset."+ _args['table']" limit 1"
#else:
# sql = _args['sql']
# if 'limit' not in sql.lower() :
# sql = sql + ' limit 1'
#sql = sql.replace(':dataset',self.dataset) if ':dataset' in args else sql
#
# Let us return the schema information now for a given table
#
table = _args['table']
client = bq.Client.from_service_account_json(self.path)
ref = client.dataset(self.dataset).table(table)
return client.get_table(ref).schema
class BQReader(BigQuery,Reader) : class BQReader(BigQuery,Reader) :
def __init__(self,**_args): def __init__(self,**_args):
@ -196,7 +223,8 @@ class BQReader(BigQuery,Reader) :
SQL = "SELECT * FROM :table ".replace(":table",table) SQL = "SELECT * FROM :table ".replace(":table",table)
if SQL and 'limit' in _args: if SQL and 'limit' in _args:
SQL += " LIMIT "+str(_args['limit']) SQL += " LIMIT "+str(_args['limit'])
if (':dataset' in SQL or ':DATASET' in SQL) and self.dataset:
SQL = SQL.replace(':dataset',self.dataset).replace(':DATASET',self.dataset)
return pd.read_gbq(SQL,credentials=self.credentials,dialect='standard') if SQL else None return pd.read_gbq(SQL,credentials=self.credentials,dialect='standard') if SQL else None
class BQWriter(BigQuery,Writer): class BQWriter(BigQuery,Writer):
Lock = Lock() Lock = Lock()
@ -223,7 +251,14 @@ class BQWriter(BigQuery,Writer):
elif type(_info) == pd.DataFrame : elif type(_info) == pd.DataFrame :
_df = _info _df = _info
if '.' not in _args['table'] :
self.mode['destination_table'] = '.'.join([self.dataset,_args['table']])
else:
self.mode['destination_table'] = _args['table'].strip() self.mode['destination_table'] = _args['table'].strip()
if 'schema' in _args :
self.mode['table_schema'] = _args['schema']
_mode = copy.deepcopy(self.mode)
_df.to_gbq(**self.mode) #if_exists='append',destination_table=partial,credentials=credentials,chunksize=90000) _df.to_gbq(**self.mode) #if_exists='append',destination_table=partial,credentials=credentials,chunksize=90000)
pass pass

Loading…
Cancel
Save