diff --git a/README.md b/README.md index 577350e..4b0f214 100644 --- a/README.md +++ b/README.md @@ -4,23 +4,34 @@ This project implements an abstraction of objects that can have access to a vari # Why Use Data-Transport ? -Data transport is a simple framework that: -- easy to install & modify (open-source) -- enables access to multiple database technologies (pandas, SQLAlchemy) -- enables notebook sharing without exposing database credential. -- supports pre/post processing specifications (pipeline) +Data transport is a simple framework that enables read/write to multiple databases or technologies that can hold data. In using **data-transport**, you are able to: +- Enjoy the simplicity of **data-transport** because it leverages SQLAlchemy & Pandas data-frames. +- Share notebooks and code without having to disclosing database credentials. +- Seamlessly and consistently access to multiple database technologies at no cost +- No need to worry about accidental writes to a database leading to inconsistent data +- Implement consistent pre and post processing as a pipeline i.e aggregation of functions +- **data-transport** is open-source under MIT License https://github.com/lnyemba/data-transport ## Installation -Within the virtual environment perform the following : +Within the virtual environment perform the following, the options for installation are: - pip install git+https://github.com/lnyemba/data-transport.git +**sql** - by default postgresql, mysql, sqlserver, sqlite3+, duckdb -Options to install components in square brackets + pip install data-transport[cloud,nosql,other,all]git+https://github.com/lnyemba/data-transport.git - pip install data-transport[nosql,cloud,warehouse,all]@git+https://github.com/lnyemba/data-transport.git +Options to install components in square brackets, these components are + +**warehouse** - Apache Iceberg, Apache Drill + +**cloud**  - to support nextcloud, s3 + +**nosql** - support for mongodb, couchdb +**other**  - support for files, rabbitmq, http + + pip install data-transport[nosql,cloud,warehouse,all]@git+https://github.com/lnyemba/data-transport.git ## Additional features @@ -28,7 +39,6 @@ Options to install components in square brackets - CLI interface to add to registry, run ETL - scales and integrates into shared environments like apache zeppelin; jupyterhub; SageMaker; ... - ## Learn More We have available notebooks with sample code to read/write against mongodb, couchdb, Netezza, PostgreSQL, Google Bigquery, Databricks, Microsoft SQL Server, MySQL ... Visit [data-transport homepage](https://healthcareio.the-phi.com/data-transport) diff --git a/bin/transport b/bin/transport index 41c1a75..8cec73f 100755 --- a/bin/transport +++ b/bin/transport @@ -178,7 +178,17 @@ def register (label:Annotated[str,typer.Argument(help="unique label that will be _msg = f"""{TIMES_MARK} {e}""" print (_msg) - pass +@app_r.command(name="list") +def register_list (): + """ + This function will list existing registry entries and basic information {label,vendor} + """ + # print (transport.registry.DATA) + _reg = transport.registry.DATA + _data = [{'label':key,'provider':_reg[key]['provider']} for key in _reg if 'provider' in _reg[key]] + _data = pd.DataFrame(_data) + print (_data) + @app_x.command(name='add') def register_plugs ( alias:Annotated[str,typer.Argument(help="unique function name within a file")], diff --git a/pyproject.toml b/pyproject.toml index b04071d..0391412 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,11 +42,11 @@ zip-safe = false script-files = ["bin/transport","bin/transport.cmd"] [tool.setuptools.packages.find] -include = ["info","info.*", "transport", "transport.*"] +include = [ "transport", "transport.*"] [tool.setuptools.dynamic] -version = {attr = "info.__version__"} -#authors = {attr = "meta.__author__"} +version = {attr = "transport.info.__version__"} +#authors = {attr = "transport.__author__"} # If you have a info.py file, you might also want to include the author dynamically: # [tool.setuptools.dynamic] diff --git a/transport/__init__.py b/transport/__init__.py index 6937189..63fd8b7 100644 --- a/transport/__init__.py +++ b/transport/__init__.py @@ -42,7 +42,7 @@ except Exception as e: import pandas as pd import json import os -from info import __version__,__author__,__email__,__license__,__app_name__,__whatsnew__,__edition__ +from transport.info import __version__,__author__,__email__,__license__,__app_name__,__whatsnew__,__edition__ from transport.iowrapper import IWriter, IReader, IETL from transport.plugins import PluginLoader from transport import providers diff --git a/transport/registry.py b/transport/registry.py index 050b82d..4fb96db 100644 --- a/transport/registry.py +++ b/transport/registry.py @@ -1,6 +1,6 @@ import os import json -from info import __version__ +from transport.info import __version__ import copy import transport import importlib