bug fixes and optimizations

pull/1/head
Steve Nyemba 3 years ago
parent 105ff00224
commit 38e1bce6c2

@ -35,9 +35,44 @@ Within the virtual environment perform the following :
pip install git+https://dev.the-phi.com/git/steve/data-transport.git pip install git+https://dev.the-phi.com/git/steve/data-transport.git
Once installed **data-transport** can be used as a library in code or a command line interface (CLI)
## Data Transport as a Library (in code)
---
The data-transport can be used within code as a library
* Read/Write against [mongodb](https://github.com/lnyemba/data-transport/wiki/mongodb)
* Read/Write against tranditional [RDBMS](https://github.com/lnyemba/data-transport/wiki/rdbms)
* Read/Write against [bigquery](https://github.com/lnyemba/data-transport/wiki/bigquery)
The read/write functions make data-transport a great candidate for **data-science**; **data-engineering** or all things pertaining to data. It enables operations across multiple data-stores(relational or not)
## Command Line Interface (CLI)
---
The CLI program is called **transport** and it requires a configuratio file
```
[
{
"id":"logs",
"source":{
"provider":"postgresql","context":"read","database":"mydb",
"cmd":{"sql":"SELECT * FROM logs limit 10"}
},
"target":{
"provider":"bigquery","private_key":"/bgqdrive/account/bq-service-account-key.json",
"dataset":"mydataset"
}
},
]
```
## In code (Embedded) Assuming the above content is stored in a file called **etl-config.json**, we would perform the following in a terminal window:
```
[steve@data-transport]$ transport --config ./etl-config.json [--index <value>]
```
**Reading/Writing Mongodb** **Reading/Writing Mongodb**

@ -13,7 +13,7 @@ args = {
"license":"MIT", "license":"MIT",
"packages":["transport"]} "packages":["transport"]}
args["keywords"]=['mongodb','couchdb','rabbitmq','file','read','write','s3','sqlite'] args["keywords"]=['mongodb','couchdb','rabbitmq','file','read','write','s3','sqlite']
args["install_requires"] = ['pymongo','pandas','numpy','cloudant','pika','nzpy','boto3','boto','pyarrow','google-cloud-bigquery','google-cloud-bigquery-storage','flask-session','smart_open','botocore','psycopg2-binary','mysql-connector-python'] args["install_requires"] = ['pymongo','sqlalchemy','pandas','numpy','cloudant','pika','nzpy','boto3','boto','pyarrow','google-cloud-bigquery','google-cloud-bigquery-storage','flask-session','smart_open','botocore','psycopg2-binary','mysql-connector-python']
args["url"] = "https://healthcareio.the-phi.com/git/code/transport.git" args["url"] = "https://healthcareio.the-phi.com/git/code/transport.git"
args['scripts'] = ['bin/transport'] args['scripts'] = ['bin/transport']
if sys.version_info[0] == 2 : if sys.version_info[0] == 2 :

@ -125,9 +125,9 @@ class SQLRW :
_out = None _out = None
try: try:
if "select" in _sql.lower() : if "select" in _sql.lower() :
cursor.close()
_conn = self._engine.connect() if self._engine else self.conn # _conn = self._engine if self._engine else self.conn
return pd.read_sql(_sql,_conn) return pd.read_sql(_sql,self.conn)
else: else:
# Executing a command i.e no expected return values ... # Executing a command i.e no expected return values ...
cursor.execute(_sql) cursor.execute(_sql)
@ -151,7 +151,8 @@ class SQLReader(SQLRW,Reader) :
if 'sql' in _args : if 'sql' in _args :
_sql = (_args['sql']) _sql = (_args['sql'])
else: else:
_sql = "SELECT :fields FROM "+self.table table = self.table if self.table is not None else _args['table']
_sql = "SELECT :fields FROM "+self._tablename(table)
if 'filter' in _args : if 'filter' in _args :
_sql = _sql +" WHERE "+_args['filter'] _sql = _sql +" WHERE "+_args['filter']
_fields = '*' if not self.fields else ",".join(self.fields) _fields = '*' if not self.fields else ",".join(self.fields)
@ -220,7 +221,7 @@ class SQLWriter(SQLRW,Writer):
# cursor.close() # cursor.close()
self.conn.commit() self.conn.commit()
pass pass
def write(self,info): def write(self,info,**_args):
""" """
:param info writes a list of data to a given set of fields :param info writes a list of data to a given set of fields
""" """
@ -324,7 +325,8 @@ class BQReader(BigQuery,Reader) :
def __init__(self,**_args): def __init__(self,**_args):
super().__init__(**_args) super().__init__(**_args)
def apply(self,sql):
self.read(sql=sql)
pass pass
def read(self,**_args): def read(self,**_args):
SQL = None SQL = None
@ -359,6 +361,7 @@ class BQWriter(BigQuery,Writer):
try: try:
if self.parallel or 'lock' in _args : if self.parallel or 'lock' in _args :
BQWriter.lock.acquire() BQWriter.lock.acquire()
_args['table'] = self.table if 'table' not in _args else _args['table']
self._write(_info,**_args) self._write(_info,**_args)
finally: finally:
if self.parallel: if self.parallel:

Loading…
Cancel
Save