From 1c254eb133c9463c604cf0394b59b0119a57a6e1 Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Fri, 9 Dec 2022 16:19:39 -0600 Subject: [PATCH] bug fixes, enhancements mongodb --- README.md | 6 ++++-- setup.py | 2 +- transport/sql.py | 4 ++++ 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index c641952..87e5b1e 100644 --- a/README.md +++ b/README.md @@ -42,11 +42,13 @@ Once installed **data-transport** can be used as a library in code or a command ## Data Transport as a Library (in code) --- -The data-transport can be used within code as a library +The data-transport can be used within code as a library, and offers the following capabilities: + * Read/Write against [mongodb](https://github.com/lnyemba/data-transport/wiki/mongodb) * Read/Write against tranditional [RDBMS](https://github.com/lnyemba/data-transport/wiki/rdbms) * Read/Write against [bigquery](https://github.com/lnyemba/data-transport/wiki/bigquery) * ETL CLI/Code [ETL](https://github.com/lnyemba/data-transport/wiki/etl) +* Support for pre/post conditions i.e it is possible to specify queries to run before or after a read or write The read/write functions make data-transport a great candidate for **data-science**; **data-engineering** or all things pertaining to data. It enables operations across multiple data-stores(relational or not) @@ -60,7 +62,7 @@ It is possible to perform ETL within custom code as follows : import transport import time - _info = [{source:{'provider':'sqlite','path':'/home/me/foo.csv','table':'me'},target:{provider:'bigquery',private_key='/home/me/key.json','table':'me','dataset':'mydataset'}}, ...] + _info = [{source:{'provider':'sqlite','path':'/home/me/foo.csv','table':'me',"pipeline":{"pre":[],"post":[]}},target:{provider:'bigquery',private_key='/home/me/key.json','table':'me','dataset':'mydataset'}}, ...] procs = transport.factory.instance(provider='etl',info=_info) # # diff --git a/setup.py b/setup.py index 4c79ec7..b9dbb37 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ def read(fname): return open(os.path.join(os.path.dirname(__file__), fname)).read() args = { "name":"data-transport", - "version":"1.6.1", + "version":"1.6.2", "author":"The Phi Technology LLC","author_email":"info@the-phi.com", "license":"MIT", "packages":["transport"]} diff --git a/transport/sql.py b/transport/sql.py index 9d278a3..f6e196c 100644 --- a/transport/sql.py +++ b/transport/sql.py @@ -192,6 +192,10 @@ class SQLReader(SQLRW,Reader) : _sql = _sql.replace(":fields",_fields) if 'limit' in _args : _sql = _sql + " LIMIT "+str(_args['limit']) + # + # @TODO: + # It is here that we should inspect to see if there are any pre/post conditions + # return self.apply(_sql) def close(self) : try: