From 4109c4c1aa81e61e6faf8b24fec2f9e27eb76981 Mon Sep 17 00:00:00 2001 From: "Steve L. Nyemba" Date: Mon, 30 Jun 2025 20:09:03 +0000 Subject: [PATCH 01/14] Initial commit --- LICENSE | 9 +++++++++ README.md | 3 +++ 2 files changed, 12 insertions(+) create mode 100644 LICENSE create mode 100644 README.md diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..2071b23 --- /dev/null +++ b/LICENSE @@ -0,0 +1,9 @@ +MIT License + +Copyright (c) + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..bd9afdb --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# ce + +community edition of data-transport \ No newline at end of file From befdf453f502ad212eef80155d70919f34990894 Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Fri, 4 Jul 2025 16:57:30 -0500 Subject: [PATCH 02/14] bug fix: crash with etl & process --- bin/transport | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bin/transport b/bin/transport index 19b664e..41c1a75 100755 --- a/bin/transport +++ b/bin/transport @@ -53,9 +53,9 @@ def wait(jobs): while jobs : jobs = [thread for thread in jobs if thread.is_alive()] time.sleep(1) -def wait (jobs): - while jobs : - jobs = [pthread for pthread in jobs if pthread.is_alive()] +# def wait (jobs): +# while jobs : +# jobs = [pthread for pthread in jobs if pthread.is_alive()] @app_e.command(name="run") def apply (path:Annotated[str,typer.Argument(help="path of the configuration file")], From be10ae17d78154e87ac59c81bb9950562cc44d56 Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Tue, 8 Jul 2025 10:09:43 -0500 Subject: [PATCH 03/14] bug fixes: installer & registry --- pyproject.toml | 1 + transport/registry.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b61e7e5..159e9cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ classifiers = [ ] dependencies = [ "termcolor","sqlalchemy", "aiosqlite","duckdb-engine", + "mysql-connector-python","psycopg2-binary","nzpy","pymssql","duckdb-engine","aiosqlite", "typer","pandas","numpy","sqlalchemy","pyarrow", "plugin-ix@git+https://github.com/lnyemba/plugins-ix" ] diff --git a/transport/registry.py b/transport/registry.py index 196b2f0..050b82d 100644 --- a/transport/registry.py +++ b/transport/registry.py @@ -49,7 +49,8 @@ def init (email,path=REGISTRY_PATH,override=False,_file=REGISTRY_FILE): Initializing the registry and will raise an exception in the advent of an issue """ p = '@' in email - q = False if '.' not in email else email.split('.')[-1] in ['edu','com','io','ai','org'] + #q = False if '.' not in email else email.split('.')[-1] in ['edu','com','io','ai','org'] + q = len(email.split('.')[-1]) in [2,3] if p and q : _config = {"email":email,'version':__version__} if not os.path.exists(path): From f06d26f9b676332136f03fdf9891962b52e61a28 Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Tue, 8 Jul 2025 11:46:27 -0500 Subject: [PATCH 04/14] bug fixes:installer & imports --- pyproject.toml | 15 +++------------ transport/__init__.py | 22 +++++++++++++++++++++- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 159e9cb..c0d8a4f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,24 +19,15 @@ classifiers = [ dependencies = [ "termcolor","sqlalchemy", "aiosqlite","duckdb-engine", "mysql-connector-python","psycopg2-binary","nzpy","pymssql","duckdb-engine","aiosqlite", - "typer","pandas","numpy","sqlalchemy","pyarrow", + "typer","pandas","numpy","sqlalchemy","pyarrow","smart-open", "plugin-ix@git+https://github.com/lnyemba/plugins-ix" ] [project.optional-dependencies] sql = ["mysql-connector-python","psycopg2-binary","nzpy","pymssql","duckdb-engine","aiosqlite"] nosql = ["pymongo","cloudant"] -cloud = ["pandas-gbq","google-cloud-bigquery","google-cloud-bigquery-storage", "databricks-sqlalchemy","pyncclient","boto3","boto","botocore"] +cloud = ["boto","boto3","botocore","pyncclient","pandas-gbq","google-cloud-bigquery","google-cloud-bigquery-storage", "databricks-sqlalchemy","pyncclient","boto3","boto","botocore"] warehouse = ["pydrill","pyspark","sqlalchemy_drill"] -rabbitmq = ["pika"] -sqlite = ["aiosqlite"] -aws3 = ["boto3","boto","botocore"] -nextcloud = ["pyncclient"] -mongodb = ["pymongo"] -netezza = ["nzpy"] -mysql = ["mysql-connector-python"] -postgresql = ["psycopg2-binary"] -sqlserver = ["pymssql"] -http = ["flask-session"] +other = ["pika","flask-session"] all = ["mysql-connector-python","psycopg2-binary","nzpy","pymssql","duckdb-engine","aiosqlite","pymongo","cloudant","pandas-gbq","google-cloud-bigquery","google-cloud-bigquery-storage", "databricks-sqlalchemy","pyncclient","boto3","boto","botocore","pydrill","pyspark","sqlalchemy_drill", "pika","aiosqlite","boto3","boto","botocore", "pyncclient"] [project.urls] diff --git a/transport/__init__.py b/transport/__init__.py index c3bb901..bcc8904 100644 --- a/transport/__init__.py +++ b/transport/__init__.py @@ -18,7 +18,27 @@ Source Code is available under MIT License: """ import numpy as np -from transport import sql, nosql, cloud, other, warehouse +#from transport import sql, nosql, cloud, other, warehouse +from transport import sql +try: + from transport import nosql +finally: + pass +try: + from transport import cloud +finally: + pass +try: + from transport import warehouse +finally: + pass +try: + from transport import other +finally: + pass + + + import pandas as pd import json import os From 18c54d7664c4cdeeff4c8432e529e049dbaa052c Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Tue, 8 Jul 2025 12:02:38 -0500 Subject: [PATCH 05/14] bug fixes --- transport/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/transport/__init__.py b/transport/__init__.py index bcc8904..a96b4f7 100644 --- a/transport/__init__.py +++ b/transport/__init__.py @@ -22,19 +22,19 @@ import numpy as np from transport import sql try: from transport import nosql -finally: +except Exception as e: pass try: from transport import cloud -finally: +except Exception as e: pass try: from transport import warehouse -finally: +except Exception as e: pass try: from transport import other -finally: +except Exception as e: pass From 6e753a1fcd8d704e3392f92680e4e5eae2d13779 Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Tue, 8 Jul 2025 12:03:45 -0500 Subject: [PATCH 06/14] bug fixes --- transport/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/transport/__init__.py b/transport/__init__.py index a96b4f7..583b9d8 100644 --- a/transport/__init__.py +++ b/transport/__init__.py @@ -23,19 +23,19 @@ from transport import sql try: from transport import nosql except Exception as e: - pass + nosql = {} try: from transport import cloud except Exception as e: - pass + cloud = {} try: from transport import warehouse except Exception as e: - pass + warehouse = {} try: from transport import other except Exception as e: - pass + other = {} From 89d762f39ab1c12cf0cc4f7c69a86448b151413b Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Tue, 8 Jul 2025 12:14:10 -0500 Subject: [PATCH 07/14] bug fixes: conditional imports --- transport/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/transport/__init__.py b/transport/__init__.py index 583b9d8..6937189 100644 --- a/transport/__init__.py +++ b/transport/__init__.py @@ -55,7 +55,7 @@ def init(): global PROVIDERS for _module in [cloud,sql,nosql,other,warehouse] : for _provider_name in dir(_module) : - if _provider_name.startswith('__') or _provider_name == 'common': + if _provider_name.startswith('__') or _provider_name == 'common' or type(_module) in [None,str,dict]: continue PROVIDERS[_provider_name] = {'module':getattr(_module,_provider_name),'type':_module.__name__} # From a31481e19612a5b90400cee4d1544a8a63eb1ebf Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Tue, 8 Jul 2025 14:11:07 -0500 Subject: [PATCH 08/14] fix --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c0d8a4f..742915d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,12 +23,12 @@ dependencies = [ "plugin-ix@git+https://github.com/lnyemba/plugins-ix" ] [project.optional-dependencies] -sql = ["mysql-connector-python","psycopg2-binary","nzpy","pymssql","duckdb-engine","aiosqlite"] +#sql = ["mysql-connector-python","psycopg2-binary","nzpy","pymssql","duckdb-engine","aiosqlite"] nosql = ["pymongo","cloudant"] cloud = ["boto","boto3","botocore","pyncclient","pandas-gbq","google-cloud-bigquery","google-cloud-bigquery-storage", "databricks-sqlalchemy","pyncclient","boto3","boto","botocore"] warehouse = ["pydrill","pyspark","sqlalchemy_drill"] other = ["pika","flask-session"] -all = ["mysql-connector-python","psycopg2-binary","nzpy","pymssql","duckdb-engine","aiosqlite","pymongo","cloudant","pandas-gbq","google-cloud-bigquery","google-cloud-bigquery-storage", "databricks-sqlalchemy","pyncclient","boto3","boto","botocore","pydrill","pyspark","sqlalchemy_drill", "pika","aiosqlite","boto3","boto","botocore", "pyncclient"] +all = ["pymongo","cloudant","pandas-gbq","google-cloud-bigquery","google-cloud-bigquery-storage", "databricks-sqlalchemy","pyncclient","boto3","boto","botocore","pydrill","pyspark","sqlalchemy_drill", "pika","aiosqlite","boto3","boto","botocore", "pyncclient"] [project.urls] Homepage = "https://healthcareio.the-phi.com/git/code/transport.git" From 4c2efc28924b543306768dda240cbde6b7eae034 Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Mon, 21 Jul 2025 13:10:50 -0500 Subject: [PATCH 09/14] documentation ... readme --- README.md | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 7d8b414..577350e 100644 --- a/README.md +++ b/README.md @@ -4,12 +4,11 @@ This project implements an abstraction of objects that can have access to a vari # Why Use Data-Transport ? -Mostly data scientists that don't really care about the underlying database and would like a simple and consistent way to read/write and move data are well served. Additionally we implemented lightweight Extract Transform Loading API and command line (CLI) tool. Finally it is possible to add pre/post processing pipeline functions to read/write - -1. Familiarity with **pandas data-frames** -2. Connectivity **drivers** are included -3. Reading/Writing data from various sources -4. Useful for data migrations or **ETL** +Data transport is a simple framework that: +- easy to install & modify (open-source) +- enables access to multiple database technologies (pandas, SQLAlchemy) +- enables notebook sharing without exposing database credential. +- supports pre/post processing specifications (pipeline) ## Installation @@ -18,19 +17,16 @@ Within the virtual environment perform the following : pip install git+https://github.com/lnyemba/data-transport.git -## Features +Options to install components in square brackets - - read/write from over a dozen databases - - run ETL jobs seamlessly - - scales and integrates into shared environments like apache zeppelin; jupyterhub; SageMaker; ... + pip install data-transport[nosql,cloud,warehouse,all]@git+https://github.com/lnyemba/data-transport.git -## What's new -Unlike older versions 2.0 and under, we focus on collaborative environments like jupyter-x servers; apache zeppelin: +## Additional features - 1. Simpler syntax to create reader or writer - 2. auth-file registry that can be referenced using a label - 3. duckdb support + - In addition to read/write, there is support for functions for pre/post processing + - CLI interface to add to registry, run ETL + - scales and integrates into shared environments like apache zeppelin; jupyterhub; SageMaker; ... ## Learn More From d9dac42adcc3fb4bb045e004f73b49dd2030d45c Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Mon, 21 Jul 2025 13:42:15 -0500 Subject: [PATCH 10/14] merge fix --- README.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/README.md b/README.md index 493828a..577350e 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,3 @@ -<<<<<<< HEAD -# ce - -community edition of data-transport -======= # Introduction This project implements an abstraction of objects that can have access to a variety of data stores, implementing read/write with a simple and expressive interface. This abstraction works with **NoSQL**, **SQL** and **Cloud** data stores and leverages **pandas**. @@ -37,4 +32,3 @@ Options to install components in square brackets ## Learn More We have available notebooks with sample code to read/write against mongodb, couchdb, Netezza, PostgreSQL, Google Bigquery, Databricks, Microsoft SQL Server, MySQL ... Visit [data-transport homepage](https://healthcareio.the-phi.com/data-transport) ->>>>>>> v2.2.0 From a4597d4a8c5e5b5b036f7672ccd5a5ee865d8044 Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Thu, 9 Oct 2025 19:34:17 -0500 Subject: [PATCH 11/14] adding queries to files --- transport/other/files.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/transport/other/files.py b/transport/other/files.py index 62ee3c4..57d3f25 100644 --- a/transport/other/files.py +++ b/transport/other/files.py @@ -30,7 +30,11 @@ class Reader (File): def read(self,**args): _path = self.path if 'path' not in args else args['path'] _delimiter = self.delimiter if 'delimiter' not in args else args['delimiter'] - return pd.read_csv(_path,delimiter=self.delimiter) + _df = pd.read_csv(_path,delimiter=self.delimiter) + if 'query' in args : + _query = args['query'] + _df = _df.query(_query) + return _df def stream(self,**args): raise Exception ("streaming needs to be implemented") class Writer (File): From de4ee2fcfaccb6a87d715fbfbfe670bf0f433268 Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Thu, 9 Oct 2025 21:13:22 -0500 Subject: [PATCH 12/14] bug fixes ... windows runner, files --- bin/transport.cmd | 2 ++ pyproject.toml | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 bin/transport.cmd diff --git a/bin/transport.cmd b/bin/transport.cmd new file mode 100644 index 0000000..6125f7f --- /dev/null +++ b/bin/transport.cmd @@ -0,0 +1,2 @@ +cd /D "%~dp0" +python transport %1 %2 %3 %4 %5 %6 diff --git a/pyproject.toml b/pyproject.toml index 742915d..b04071d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,7 @@ Homepage = "https://healthcareio.the-phi.com/git/code/transport.git" [tool.setuptools] include-package-data = true zip-safe = false -script-files = ["bin/transport"] +script-files = ["bin/transport","bin/transport.cmd"] [tool.setuptools.packages.find] include = ["info","info.*", "transport", "transport.*"] From d784a3d9da07eae28b9fbd288951a07ed773caae Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Wed, 3 Dec 2025 16:17:59 -0600 Subject: [PATCH 13/14] bug fixes and version update --- README.md | 30 ++++++++++++++++++++---------- bin/transport | 12 +++++++++++- pyproject.toml | 6 +++--- transport/__init__.py | 2 +- transport/registry.py | 2 +- 5 files changed, 36 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 577350e..4b0f214 100644 --- a/README.md +++ b/README.md @@ -4,23 +4,34 @@ This project implements an abstraction of objects that can have access to a vari # Why Use Data-Transport ? -Data transport is a simple framework that: -- easy to install & modify (open-source) -- enables access to multiple database technologies (pandas, SQLAlchemy) -- enables notebook sharing without exposing database credential. -- supports pre/post processing specifications (pipeline) +Data transport is a simple framework that enables read/write to multiple databases or technologies that can hold data. In using **data-transport**, you are able to: +- Enjoy the simplicity of **data-transport** because it leverages SQLAlchemy & Pandas data-frames. +- Share notebooks and code without having to disclosing database credentials. +- Seamlessly and consistently access to multiple database technologies at no cost +- No need to worry about accidental writes to a database leading to inconsistent data +- Implement consistent pre and post processing as a pipeline i.e aggregation of functions +- **data-transport** is open-source under MIT License https://github.com/lnyemba/data-transport ## Installation -Within the virtual environment perform the following : +Within the virtual environment perform the following, the options for installation are: - pip install git+https://github.com/lnyemba/data-transport.git +**sql** - by default postgresql, mysql, sqlserver, sqlite3+, duckdb -Options to install components in square brackets + pip install data-transport[cloud,nosql,other,all]git+https://github.com/lnyemba/data-transport.git - pip install data-transport[nosql,cloud,warehouse,all]@git+https://github.com/lnyemba/data-transport.git +Options to install components in square brackets, these components are + +**warehouse** - Apache Iceberg, Apache Drill + +**cloud**  - to support nextcloud, s3 + +**nosql** - support for mongodb, couchdb +**other**  - support for files, rabbitmq, http + + pip install data-transport[nosql,cloud,warehouse,all]@git+https://github.com/lnyemba/data-transport.git ## Additional features @@ -28,7 +39,6 @@ Options to install components in square brackets - CLI interface to add to registry, run ETL - scales and integrates into shared environments like apache zeppelin; jupyterhub; SageMaker; ... - ## Learn More We have available notebooks with sample code to read/write against mongodb, couchdb, Netezza, PostgreSQL, Google Bigquery, Databricks, Microsoft SQL Server, MySQL ... Visit [data-transport homepage](https://healthcareio.the-phi.com/data-transport) diff --git a/bin/transport b/bin/transport index 41c1a75..8cec73f 100755 --- a/bin/transport +++ b/bin/transport @@ -178,7 +178,17 @@ def register (label:Annotated[str,typer.Argument(help="unique label that will be _msg = f"""{TIMES_MARK} {e}""" print (_msg) - pass +@app_r.command(name="list") +def register_list (): + """ + This function will list existing registry entries and basic information {label,vendor} + """ + # print (transport.registry.DATA) + _reg = transport.registry.DATA + _data = [{'label':key,'provider':_reg[key]['provider']} for key in _reg if 'provider' in _reg[key]] + _data = pd.DataFrame(_data) + print (_data) + @app_x.command(name='add') def register_plugs ( alias:Annotated[str,typer.Argument(help="unique function name within a file")], diff --git a/pyproject.toml b/pyproject.toml index b04071d..0391412 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,11 +42,11 @@ zip-safe = false script-files = ["bin/transport","bin/transport.cmd"] [tool.setuptools.packages.find] -include = ["info","info.*", "transport", "transport.*"] +include = [ "transport", "transport.*"] [tool.setuptools.dynamic] -version = {attr = "info.__version__"} -#authors = {attr = "meta.__author__"} +version = {attr = "transport.info.__version__"} +#authors = {attr = "transport.__author__"} # If you have a info.py file, you might also want to include the author dynamically: # [tool.setuptools.dynamic] diff --git a/transport/__init__.py b/transport/__init__.py index 6937189..63fd8b7 100644 --- a/transport/__init__.py +++ b/transport/__init__.py @@ -42,7 +42,7 @@ except Exception as e: import pandas as pd import json import os -from info import __version__,__author__,__email__,__license__,__app_name__,__whatsnew__,__edition__ +from transport.info import __version__,__author__,__email__,__license__,__app_name__,__whatsnew__,__edition__ from transport.iowrapper import IWriter, IReader, IETL from transport.plugins import PluginLoader from transport import providers diff --git a/transport/registry.py b/transport/registry.py index 050b82d..4fb96db 100644 --- a/transport/registry.py +++ b/transport/registry.py @@ -1,6 +1,6 @@ import os import json -from info import __version__ +from transport.info import __version__ import copy import transport import importlib From 572c2c91a4baed2062b5a0ee9eaffa872785ec1c Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Wed, 3 Dec 2025 16:18:10 -0600 Subject: [PATCH 14/14] version update --- transport/info.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 transport/info.py diff --git a/transport/info.py b/transport/info.py new file mode 100644 index 0000000..eb1453d --- /dev/null +++ b/transport/info.py @@ -0,0 +1,23 @@ +__app_name__ = 'data-transport' +__author__ = 'The Phi Technology' +__version__= '2.2.30' +__email__ = "info@the-phi.com" +__edition__= 'community' +__license__=f""" +Copyright 2010 - 2024, Steve L. Nyemba + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +""" + +__whatsnew__=f"""version {__version__}, +1. Added support for read/write logs as well as plugins (when applied) +2. Bug fix with duckdb (adding readonly) for readers because there are issues with threads & processes +3. support for streaming data, important to use this with large volumes of data + + +"""