Merge pull request 'v2.2.0' (#35 ) from v2.2.0 into master

Reviewed-on: #35
Merge branch 'master' into v2.2.0
23 changed files with 1282 additions and 267 deletions
--- a/README.md
+++ b/README.md
@ -18,6 +18,20 @@ Within the virtual environment perform the following :
    pip install git+https://github.com/lnyemba/data-transport.git
 ## Features
    - read/write from over a dozen databases
    - run ETL jobs seamlessly
    - scales and integrates into shared environments like apache zeppelin; jupyterhub; SageMaker; ...
 ## What's new
 Unlike older versions 2.0 and under, we focus on collaborative environments like jupyter-x servers; apache zeppelin:
    1. Simpler syntax to create reader or writer
    2. auth-file registry that can be referenced using a label
    3. duckdb support
 ## Learn More
--- a/bin/transport
+++ b/bin/transport
@ -24,19 +24,28 @@ from multiprocessing import Process
 import os
 import transport
-from transport import etl
+# from transport import etl
 from transport.iowrapper import IETL
 # from transport import providers
 import typer
 from typing_extensions import Annotated
 from typing import Optional
 import time
 from termcolor import colored
 from enum import Enum
 from rich import print
 import plugin_ix as pix
 app = typer.Typer()
 app_e = typer.Typer()   #-- handles etl (run, generate)
 app_x = typer.Typer()   #-- handles plugins (list,add, test)
 app_i = typer.Typer()   #-- handles information (version, license)
 app_r = typer.Typer()   #-- handles registry    
 REGISTRY_PATH=os.sep.join([os.environ['HOME'],'.data-transport'])
 REGISTRY_FILE= 'transport-registry.json'
-CHECK_MARK = ' '.join(['[',colored(u'\u2713', 'green'),']'])
+CHECK_MARK = '[ [green]\u2713[/green] ]' #' '.join(['[',colored(u'\u2713', 'green'),']'])
-TIMES_MARK= ' '.join(['[',colored(u'\u2717','red'),']'])
+TIMES_MARK= '[ [red]\u2717[/red] ]' #' '.join(['[',colored(u'\u2717','red'),']'])
 # @app.command()
 def help() :     
 	print (__doc__)
@ -44,10 +53,15 @@ def wait(jobs):
    while jobs :
        jobs = [thread for thread in jobs if thread.is_alive()]
        time.sleep(1)
 # def wait (jobs):
 #     while jobs :
 #             jobs = [pthread for pthread in jobs if pthread.is_alive()]
-@app.command(name="apply")
+@app_e.command(name="run")
 def apply (path:Annotated[str,typer.Argument(help="path of the configuration file")],
-        index:int = typer.Option(default= None, help="index of the item of interest, otherwise everything in the file will be processed")):
+        index:int = typer.Option(default= None, help="index of the item of interest, otherwise everything in the file will be processed"),
        batch:int = typer.Option(default=5, help="The number of parallel processes to run at once")
        ):
    """
    This function applies data transport ETL feature to read data from one source to write it one or several others
    """
@ -56,23 +70,34 @@ def apply (path:Annotated[str,typer.Argument(help="path of the configuration fil
        file = open(path)
        _config = json.loads (file.read() )
        file.close()
-        if index :
+        if index is not None:            
            _config = [_config[ int(index)]]
        jobs = []          
        for _args in _config :
-            pthread = etl.instance(**_args) #-- automatically starts the process
+            # pthread = etl.instance(**_args) #-- automatically starts the process
            def bootup ():
                _worker = IETL(**_args)
                _worker.run()
            pthread = Process(target=bootup)
            pthread.start()
            jobs.append(pthread)
            if len(jobs) == batch :
                wait(jobs)
                jobs = []
        if jobs :
            wait (jobs)
        #
-        # @TODO: Log the number of processes started and estimated time
+        # @TODO: Log the number of processes started and estfrom transport impfrom transport impimated time
-        while jobs :
+        # while jobs :
-             jobs = [pthread for pthread in jobs if pthread.is_alive()]
+        #      jobs = [pthread for pthread in jobs if pthread.is_alive()]
-             time.sleep(1)
+        #      time.sleep(1)
        #
        # @TODO: Log the job termination here ...
-@app.command(name="providers")
+@app_i.command(name="supported")
 def supported (format:Annotated[str,typer.Argument(help="format of the output, supported formats are (list,table,json)")]="table") :
    """
-    This function will print supported providers/vendors and their associated classifications
+    This function will print supported database technologies
    """
    _df =  (transport.supported())
    if format in ['list','json'] :
@ -80,17 +105,26 @@ def supported (format:Annotated[str,typer.Argument(help="format of the output, s
    else:
         print (_df)
    print ()
@app_i.command(name="version")
 def version ():
    """
    This function will return the version of the data-transport
    """
    print()
    print (f'[bold] {transport.__app_name__} ,[blue] {transport.__edition__} edition [/blue], version {transport.__version__}[/bold]')
    print ()
-@app.command()
+@app_i.command(name="license")
-def version():
+def info():
    """
    This function will display version and license information
    """
-
+    print()
-    print (transport.__app_name__,'version ',transport.__version__)
+    print (f'[bold] {transport.__app_name__} ,{transport.__edition__}, version {transport.__version__}[/bold]')
    print ()
    print (transport.__license__)
-@app.command()
+@app_e.command()
 def generate (path:Annotated[str,typer.Argument(help="path of the ETL configuration file template (name included)")]):
    """
    This function will generate a configuration template to give a sense of how to create one
@ -99,45 +133,45 @@ def generate (path:Annotated[str,typer.Argument(help="path of the ETL configurat
            {
                "source":{"provider":"http","url":"https://raw.githubusercontent.com/codeforamerica/ohana-api/master/data/sample-csv/addresses.csv"},
                "target":
-            [{"provider":"files","path":"addresses.csv","delimiter":","},{"provider":"sqlite","database":"sample.db3","table":"addresses"}]
+            [{"provider":"files","path":"addresses.csv","delimiter":","},{"provider":"sqlite3","database":"sample.db3","table":"addresses"}]
            }
            ]
    file = open(path,'w')
    file.write(json.dumps(_config))
    file.close()
-    print (f"""{CHECK_MARK} Successfully generated a template ETL file at {path}""" )
+    print (f"""{CHECK_MARK} Successfully generated a template ETL file at [bold]{path}[/bold]""" )
    print ("""NOTE: Each line (source or target) is the content of an auth-file""")
-@app.command(name="init")
+@app_r.command(name="reset")
 def initregistry (email:Annotated[str,typer.Argument(help="email")],
                  path:str=typer.Option(default=REGISTRY_PATH,help="path or location of the configuration file"), 
                  override:bool=typer.Option(default=False,help="override existing configuration or not")):
    """
-    This functiion will initialize the registry and have both application and calling code loading the database parameters by a label
+    This functiion will initialize the data-transport registry and have both application and calling code loading the database parameters by a label
    """
    try:
        transport.registry.init(email=email, path=path, override=override)
-        _msg = f"""{CHECK_MARK} Successfully wrote configuration to {path} from {email}"""
+        _msg = f"""{CHECK_MARK} Successfully wrote configuration to [bold]{path}[/bold] from [bold]{email}[/bold]"""
    except Exception as e:
        _msg = f"{TIMES_MARK} {e}"
    print (_msg)
    print ()
-@app.command(name="register")
+@app_r.command(name="add")
 def register (label:Annotated[str,typer.Argument(help="unique label that will be used to load the parameters of the database")],
              auth_file:Annotated[str,typer.Argument(help="path of the auth_file")],
              default:bool=typer.Option(default=False,help="set the auth_file as default"),
              path:str=typer.Option(default=REGISTRY_PATH,help="path of the data-transport registry file")):
    """
-    This function will register an auth-file i.e database connection and assign it a label, 
+    This function add  a database label for a given auth-file. which allows access to the database using a label of your choice.
-    Learn more about auth-file at https://healthcareio.the-phi.com/data-transport
+    
    """
    try:
        if transport.registry.exists(path) :
            transport.registry.set(label=label,auth_file=auth_file, default=default, path=path)
-            _msg = f"""{CHECK_MARK} Successfully added label "{label}" to data-transport registry"""
+            _msg = f"""{CHECK_MARK} Successfully added label [bold]"{label}"[/bold] to data-transport registry"""
        else:
            _msg = f"""{TIMES_MARK} Registry is not initialized, please initialize the registry (check help)"""
    except Exception as e:
@ -145,6 +179,68 @@ def register (label:Annotated[str,typer.Argument(help="unique label that will be
    print (_msg)
    pass
@app_x.command(name='add') 
 def register_plugs (
    alias:Annotated[str,typer.Argument(help="unique function name within a file")],
    path:Annotated[str,typer.Argument(help="path of the python file, that contains functions")],
    folder:str=typer.Option(default=REGISTRY_PATH,help="path of the data-transport registry folder"),
    ):
    """
    This function will register a file and the functions within we are interested in using
    """
    if ',' in alias :
        alias = [_name.strip() for _name in alias.split(',') if _name.strip() != '' ] 
    else:
        alias = [alias.strip()]
    _pregistry  = pix.Registry(folder=folder,plugin_folder='plugins/code')
    _log = _pregistry.set(path,alias)
    # transport.registry.plugins.init()
    # _log = transport.registry.plugins.add(alias,path)
    _mark = TIMES_MARK if not _log else CHECK_MARK
    _msg  = f"""Could NOT add the [bold]{alias}[/bold]to the registry""" if not _log else f""" successfully added {alias}, {_log} functions registered"""
    print (f"""{_mark} {_msg}""")
@app_x.command(name="list") 
 def registry_list (folder:str=typer.Option(default=REGISTRY_PATH,help="path of the data-transport configuration folder")):
    """
    This function will list all the plugins (python functions/files) that are registered and can be reused
    """
    _pregistry  = pix.Registry(folder=folder)
    _df = _pregistry.stats()
    if _df.empty :
        print (f"{TIMES_MARK} registry at {folder} is not ready")
    else:
        print (_df)
@app_x.command ("has")
 def registry_has (alias:Annotated[str,typer.Argument(help="alias of a function function@file or file.function")],
                  folder:str=typer.Option(default=REGISTRY_PATH,help="path of the data-transport registry file")) :
    _pregistry  = pix.Registry(folder=folder)
    if _pregistry.has(alias) :
        _msg = f"{CHECK_MARK} {alias} was [bold] found [/bold] in registry "
    else:
        _msg = f"{TIMES_MARK} {alias} was [bold] NOT found [/bold] in registry "
    print (_msg)
@app_x.command(name="test") 
 def registry_test (alias:Annotated[str,typer.Argument(help="alias of a function function@file or file.function")],
                  folder:str=typer.Option(default=REGISTRY_PATH,help="path of the data-transport registry folder")) :
    _pregistry  = pix.Registry(folder=folder)
    """
    This function allows to test syntax for a plugin i.e in terms of alias@function
    """
    # _item = transport.registry.plugins.has(key=key)
    _pointer = _pregistry.get(alias) if _pregistry.has(alias) else None
    if _pointer:
        print (f"""{CHECK_MARK} successfully loaded [bold] {alias}[/bold] found in {folder}""")
    else:
        print (f"{TIMES_MARK} unable to load {alias}. Make sure it is registered")
 app.add_typer(app_e,name='etl',help="This function will run etl or generate a template etl configuration file")
 app.add_typer(app_r,name='registry',help='This function allows labeling database access information')
 app.add_typer(app_i,name="info",help="This function will print either license or supported database technologies")
 app.add_typer(app_x, name="plugins",help="This function enables add/list/test of plugins in the registry")
 if __name__ == '__main__' :
     app()
--- a/info/init.py
+++ b/info/init.py
@ -1,7 +1,8 @@
 __app_name__  = 'data-transport'
 __author__ = 'The Phi Technology'
-__version__= '2.0.4'
+__version__= '2.2.22'
 __email__  = "info@the-phi.com"
 __edition__= 'community'
 __license__=f"""
 Copyright 2010 - 2024, Steve L. Nyemba
@ -11,4 +12,12 @@ The above copyright notice and this permission notice shall be included in all c
 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 """
 __whatsnew__=f"""version {__version__}, 
 1. Added support for read/write logs as well as plugins (when applied)
 2. Bug fix with duckdb (adding readonly) for readers because there are issues with threads & processes
 3. support for streaming data, important to use this with large volumes of data
 """
--- a/notebooks/iceberg.ipynb
+++ b/notebooks/iceberg.ipynb
@ -0,0 +1,138 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Writing to Apache Iceberg\n",
    "\n",
    "1. Insure you have a Google Bigquery service account key on disk\n",
    "2. The service key location is set as an environment variable **BQ_KEY**\n",
    "3. The dataset will be automatically created within the project associated with the service key\n",
    "\n",
    "The cell below creates a dataframe that will be stored within Google Bigquery"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['data transport version ', '2.4.0']\n"
     ]
    }
   ],
   "source": [
    "#\n",
    "# Writing to Google Bigquery database\n",
    "#\n",
    "import transport\n",
    "from transport import providers\n",
    "import pandas as pd\n",
    "import os\n",
    "\n",
    "PRIVATE_KEY = os.environ['BQ_KEY'] #-- location of the service key\n",
    "DATASET = 'demo'\n",
    "_data = pd.DataFrame({\"name\":['James Bond','Steve Rogers','Steve Nyemba'],'age':[55,150,44]})\n",
    "# bqw = transport.get.writer(provider=providers.ICEBERG,catalog='mz',database='edw.mz',table='friends')\n",
    "bqw = transport.get.writer(provider=providers.ICEBERG,table='edw.mz.friends')\n",
    "bqw.write(_data,if_exists='replace') #-- default is append\n",
    "print (['data transport version ', transport.__version__])\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Reading from Google Bigquery\n",
    "\n",
    "The cell below reads the data that has been written by the cell above and computes the average age within a Google Bigquery (simple query). \n",
    "\n",
    "- Basic read of the designated table (friends) created above\n",
    "- Execute an aggregate SQL against the table\n",
    "\n",
    "**NOTE**\n",
    "\n",
    "By design **read** object are separated from **write** objects in order to avoid accidental writes to the database.\n",
    "Read objects are created with **transport.get.reader** whereas write objects are created with **transport.get.writer**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "           name  age\n",
      "0    James Bond   55\n",
      "1  Steve Rogers  150\n",
      "2  Steve Nyemba   44\n",
      "--------- STATISTICS ------------\n"
     ]
    }
   ],
   "source": [
    "\n",
    "import transport\n",
    "from transport import providers\n",
    "import os\n",
    "PRIVATE_KEY=os.environ['BQ_KEY']\n",
    "pgr = transport.get.reader(provider=providers.ICEBERG,database='edw.mz')\n",
    "_df = pgr.read(table='friends')\n",
    "_query = 'SELECT COUNT(*) _counts, AVG(age) from friends'\n",
    "_sdf = pgr.read(sql=_query)\n",
    "print (_df)\n",
    "print ('--------- STATISTICS ------------')\n",
    "# print (_sdf)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "An **auth-file** is a file that contains database parameters used to access the database. \n",
    "For code in shared environments, we recommend \n",
    "\n",
    "1. Having the **auth-file** stored on disk \n",
    "2. and the location of the file is set to an environment variable.\n",
    "\n",
    "To generate a template of the **auth-file** open the **file generator wizard** found at visit https://healthcareio.the-phi.com/data-transport"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
--- a/notebooks/plugins.ipynb
+++ b/notebooks/plugins.ipynb
@ -0,0 +1,149 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Writing data-transport plugins\n",
    "\n",
    "The data-transport plugins are designed to automate pre/post processing i.e\n",
    "\n",
    "    - Read -> Post processing\n",
    "    - Write-> Pre processing\n",
    "    \n",
    "In this example we will assume, data and write both pre/post processing to any supported infrastructure. We will equally show how to specify the plugins within a configuration file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "#\n",
    "# Writing to Google Bigquery database\n",
    "#\n",
    "import transport\n",
    "from transport import providers\n",
    "import pandas as pd\n",
    "import os\n",
    "import shutil\n",
    "#\n",
    "#\n",
    "\n",
    "DATABASE = '/home/steve/tmp/demo.db3'\n",
    "if os.path.exists(DATABASE) :\n",
    "    os.remove(DATABASE)\n",
    "#\n",
    "#    \n",
    "_data = pd.DataFrame({\"name\":['James Bond','Steve Rogers','Steve Nyemba'],'age':[55,150,44]})\n",
    "litew = transport.get.writer(provider=providers.SQLITE,database=DATABASE)\n",
    "litew.write(_data,table='friends')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Reading from SQLite\n",
    "\n",
    "The cell below reads the data that has been written by the cell above and computes the average age from a plugin function we will write. \n",
    "\n",
    "- Basic read of the designated table (friends) created above\n",
    "- Read with pipeline functions defined in code\n",
    "\n",
    "**NOTE**\n",
    "\n",
    "It is possible to use **transport.factory.instance** or **transport.instance** or **transport.get.<[reader|writer]>** they are the same. It allows the maintainers to know that we used a factory design pattern."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "           name  age\n",
      "0    James Bond   55\n",
      "1  Steve Rogers  150\n",
      "2  Steve Nyemba   44\n",
      "\n",
      "\n",
      "           name   age  autoinc\n",
      "0    James Bond   5.5        0\n",
      "1  Steve Rogers  15.0        1\n",
      "2  Steve Nyemba   4.4        2\n"
     ]
    }
   ],
   "source": [
    "\n",
    "import transport\n",
    "from transport import providers\n",
    "import os\n",
    "import numpy as np\n",
    "def _autoincrement (_data,**kwargs) :\n",
    "    \"\"\"\n",
    "    This function will add an autoincrement field to the table\n",
    "    \"\"\"\n",
    "    _data['autoinc'] = np.arange(_data.shape[0])\n",
    "    \n",
    "    return _data\n",
    "def reduce(_data,**_args) :\n",
    "    \"\"\"\n",
    "    This function will reduce the age of the data frame\n",
    "    \"\"\"\n",
    "    _data.age /= 10\n",
    "    return _data\n",
    "reader = transport.get.reader(provider=providers.SQLITE,database=DATABASE,table='friends')\n",
    "#\n",
    "# basic read of the data created in the first cell\n",
    "_df = reader.read()\n",
    "print (_df)\n",
    "print ()\n",
    "print()\n",
    "#\n",
    "# read of the data with pipeline function provided to alter the database\n",
    "print (reader.read(pipeline=[_autoincrement,reduce]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The parameters for instianciating a transport object (reader or writer) can be found at [data-transport home](https://healthcareio.the-phi.com/data-transport)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
--- a/notebooks/s3.ipynb
+++ b/notebooks/s3.ipynb
@ -0,0 +1,131 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Writing to AWS S3\n",
    "\n",
    "We have setup our demo environment with the label **aws** passed to reference our s3 access_key and secret_key and file (called friends.csv). In the cell below we will write the data to our aws s3 bucket named **com.phi.demo**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2.2.1\n"
     ]
    }
   ],
   "source": [
    "#\n",
    "# Writing to mongodb database\n",
    "#\n",
    "import transport\n",
    "from transport import providers\n",
    "import pandas as pd\n",
    "_data = pd.DataFrame({\"name\":['James Bond','Steve Rogers','Steve Nyemba'],'age':[55,150,44]})\n",
    "mgw = transport.get.writer(label='aws')\n",
    "mgw.write(_data)\n",
    "print (transport.__version__)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Reading from AWS S3\n",
    "\n",
    "The cell below reads the data that has been written by the cell above and computes the average age within a mongodb pipeline. The code in the background executes an aggregation using\n",
    "\n",
    "- Basic read of the designated file **friends.csv**\n",
    "- Compute average age using standard pandas functions\n",
    "\n",
    "**NOTE**\n",
    "\n",
    "By design **read** object are separated from **write** objects in order to avoid accidental writes to the database.\n",
    "Read objects are created with **transport.get.reader** whereas write objects are created with **transport.get.writer**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          bname  age\n",
      "0    James Bond   55\n",
      "1  Steve Rogers  150\n",
      "2  Steve Nyemba   44\n",
      "--------- STATISTICS ------------\n",
      "83.0\n"
     ]
    }
   ],
   "source": [
    "\n",
    "import transport\n",
    "from transport import providers\n",
    "import pandas as pd\n",
    "\n",
    "def cast(stream) :\n",
    "    print (stream)\n",
    "    return pd.DataFrame(str(stream))\n",
    "mgr = transport.get.reader(label='aws')\n",
    "_df = mgr.read()\n",
    "print (_df)\n",
    "print ('--------- STATISTICS ------------')\n",
    "print (_df.age.mean())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "An **auth-file** is a file that contains database parameters used to access the database. \n",
    "For code in shared environments, we recommend \n",
    "\n",
    "1. Having the **auth-file** stored on disk \n",
    "2. and the location of the file is set to an environment variable.\n",
    "\n",
    "To generate a template of the **auth-file** open the **file generator wizard** found at visit https://healthcareio.the-phi.com/data-transport"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,54 @@
 [build-system]
 requires = ["setuptools>=61.0", "wheel"]
 build-backend = "setuptools.build_meta"
 [project]
 name = "data-transport"
 dynamic = ["version"]
 authors = [
    {name="Steve L. Nyemba" , email = "info@the-phi.com"},
 ]
 description = ""
 readme = "README.md"
 license = {text = "LICENSE"}
 keywords = ["mongodb","duckdb","couchdb","rabbitmq","file","read","write","s3","sqlite"]
 classifiers = [
 "License :: OSI Approved :: MIT License",
    "Topic :: Utilities",
 ]
 dependencies = [
    "termcolor","sqlalchemy", "aiosqlite","duckdb-engine",
    "mysql-connector-python","psycopg2-binary","nzpy","pymssql","duckdb-engine","aiosqlite",
    "typer","pandas","numpy","sqlalchemy","pyarrow","smart-open",
    "plugin-ix@git+https://github.com/lnyemba/plugins-ix"
 ]
 [project.optional-dependencies]
 sql         = ["mysql-connector-python","psycopg2-binary","nzpy","pymssql","duckdb-engine","aiosqlite"]
 nosql       = ["pymongo","cloudant"]
 cloud       = ["boto","boto3","botocore","pyncclient","pandas-gbq","google-cloud-bigquery","google-cloud-bigquery-storage", "databricks-sqlalchemy","pyncclient","boto3","boto","botocore"]
 warehouse   = ["pydrill","pyspark","sqlalchemy_drill"]
 other       = ["pika","flask-session"]
 all         = ["mysql-connector-python","psycopg2-binary","nzpy","pymssql","duckdb-engine","aiosqlite","pymongo","cloudant","pandas-gbq","google-cloud-bigquery","google-cloud-bigquery-storage", "databricks-sqlalchemy","pyncclient","boto3","boto","botocore","pydrill","pyspark","sqlalchemy_drill", "pika","aiosqlite","boto3","boto","botocore", "pyncclient"]
 [project.urls]
 Homepage = "https://healthcareio.the-phi.com/git/code/transport.git"
 #[project.scripts]
 #transport = "transport:main"
 [tool.setuptools]
 include-package-data = true
 zip-safe = false
 script-files = ["bin/transport"]
 [tool.setuptools.packages.find]
 include = ["info","info.*", "transport", "transport.*"]
 [tool.setuptools.dynamic]
 version = {attr = "info.__version__"}
 #authors = {attr = "meta.__author__"}
 # If you have a info.py file, you might also want to include the author dynamically:
 # [tool.setuptools.dynamic]
 # version = {attr = "info.__version__"}
 # authors = {attr = "info.__author__"}
--- a/setup.py
+++ b/setup.py
@ -1,28 +0,0 @@
 """
 This is a build file for the 
 """
 from setuptools import setup, find_packages
 import os
 import sys
 # from version import __version__,__author__
 from info import __version__, __author__,__app_name__,__license__
 def read(fname):
    return open(os.path.join(os.path.dirname(__file__), fname)).read() 
 args    = {
    "name":__app_name__,
    "version":__version__,
    "author":__author__,"author_email":"info@the-phi.com",
    "license":__license__,
    # "packages":["transport","info","transport/sql"]},
    "packages": find_packages(include=['info','transport', 'transport.*'])}
 args["keywords"]=['mongodb','couchdb','rabbitmq','file','read','write','s3','sqlite']
 args["install_requires"] = ['pyncclient','pymongo','sqlalchemy','pandas','typer','pandas-gbq','numpy','cloudant','pika','nzpy','boto3','boto','pyarrow','google-cloud-bigquery','google-cloud-bigquery-storage','flask-session','smart_open','botocore','psycopg2-binary','mysql-connector-python','numpy','pymssql']
 args["url"] =   "https://healthcareio.the-phi.com/git/code/transport.git"
 args['scripts'] = ['bin/transport']
 # if sys.version_info[0] == 2 :
 #     args['use_2to3'] = True
 #     args['use_2to3_exclude_fixers']=['lib2to3.fixes.fix_import']
 setup(**args)
--- a/transport/init.py
+++ b/transport/init.py
@ -18,27 +18,56 @@ Source Code is available under MIT License:
 """
 import numpy as np
-from transport import sql, nosql, cloud, other
+#from transport import sql, nosql, cloud, other, warehouse
 from transport import sql
 try:
    from transport import  nosql
 except Exception as e:
    nosql = {}
 try:
    from transport import  cloud
 except Exception as e:
    cloud = {}
 try:
    from transport import  warehouse
 except Exception as e:
    warehouse = {}
 try:
    from transport import  other
 except Exception as e:
    other = {}
 import pandas as pd
 import json
 import os
-from info import __version__,__author__,__email__,__license__,__app_name__
+from info import __version__,__author__,__email__,__license__,__app_name__,__whatsnew__,__edition__
 from transport.iowrapper import IWriter, IReader, IETL
 from transport.plugins import PluginLoader
 from transport import providers
 import copy 
 from transport import registry
-
+from transport.plugins import Plugin 
 PROVIDERS = {}
 def init():
    global PROVIDERS
-    for _module in [cloud,sql,nosql,other] :
+    for _module in [cloud,sql,nosql,other,warehouse] :
        for _provider_name in dir(_module) :
-            if _provider_name.startswith('__') or _provider_name == 'common':
+            if _provider_name.startswith('__') or _provider_name == 'common' or type(_module) in [None,str,dict]:
                continue
            PROVIDERS[_provider_name] = {'module':getattr(_module,_provider_name),'type':_module.__name__}
    #
    # loading the registry
    if not registry.isloaded() :
        registry.load()
 # def _getauthfile (path) :
 #     f = open(path)
 #     _object = json.loads(f.read())
 #     f.close()
 #     return _object
 def instance (**_args):
    """
    This function returns an object of to read or write from a supported database provider/vendor
@ -48,15 +77,6 @@ def instance (**_args):
    kwargs      These are arguments that are provider/vendor specific
    """
    global PROVIDERS
    # if not registry.isloaded () :
    #     if ('path' in _args and registry.exists(_args['path'] )) or registry.exists():
    #         registry.load() if 'path' not in _args else registry.load(_args['path'])
    #         print ([' GOT IT'])
    # if 'label' in _args and registry.isloaded():
    #     _info = registry.get(_args['label'])
    #     if _info :
    #         #
    #         _args = dict(_args,**_info)
    if 'auth_file' in _args:
        if os.path.exists(_args['auth_file']) :
@ -77,12 +97,13 @@ def instance (**_args):
        if not registry.isloaded () : 
            if ('path' in _args and registry.exists(_args['path'] )) or registry.exists():
                registry.load() if 'path' not in _args else registry.load(_args['path'])
        _info = {}
        if 'label' in _args and registry.isloaded():
            _info = registry.get(_args['label'])
-            
+        else:
            _info = registry.get()    
        if _info :
-                #
+            _args = dict(_info,**_args) #-- we can override the registry parameters with our own arguments
                _args = dict(_args,**_info)
    if 'provider' in _args and _args['provider'] in PROVIDERS :
        _info = PROVIDERS[_args['provider']]
@ -112,8 +133,32 @@ def instance (**_args):
        #         for _delegate in _params :
        #             loader.set(_delegate)
-        loader = None if 'plugins' not in _args else _args['plugins']
+        _plugins = None if 'plugins' not in _args else _args['plugins']
-        return IReader(_agent,loader) if _context == 'read' else IWriter(_agent,loader)
+        
        # if registry.has('logger') :
        #     _kwa = registry.get('logger')
        #     _lmodule = getPROVIDERS[_kwa['provider']]
        if ( ('label' in _args and _args['label'] != 'logger') and registry.has('logger')):
            #
            # We did not request label called logger, so we are setting up a logger if it is specified in the registry
            #
            _kwargs = registry.get('logger')
            _kwargs['context']  = 'write'
            _kwargs['table']    =_module.__name__.split('.')[-1]+'_logs'
            # _logger = instance(**_kwargs)
            _module = PROVIDERS[_kwargs['provider']]['module']
            _logger = getattr(_module,'Writer')
            _logger = _logger(**_kwargs)
        else:
            _logger = None
        _kwargs = {'agent':_agent,'plugins':_plugins,'logger':_logger}
        if 'args' in _args :
            _kwargs['args'] = _args['args']
        # _datatransport =  IReader(_agent,_plugins,_logger) if _context == 'read' else IWriter(_agent,_plugins,_logger)
        _datatransport =  IReader(**_kwargs) if _context == 'read' else IWriter(**_kwargs)
        return _datatransport
    else:
        #
@ -127,22 +172,45 @@ class get :
    """
    @staticmethod
    def reader (**_args):
-        if not _args :
+        if not _args or ('provider' not in _args and 'label' not in _args):
            _args['label'] = 'default'
        _args['context'] = 'read'
-        return instance(**_args)
+        # return instance(**_args)
        # _args['logger'] = instance(**{'label':'logger','context':'write','table':'logs'})
        _handler =  instance(**_args)
        # _handler.setLogger(get.logger())
        return _handler
    @staticmethod
    def writer(**_args):
        """
        This function is a wrapper that will return a writer to a database. It disambiguates the interface
        """
-        if not _args :
+        if not _args or ('provider' not in _args and 'label' not in _args):
            _args['label'] = 'default'
        _args['context'] = 'write'
        # _args['logger'] = instance(**{'label':'logger','context':'write','table':'logs'})
        _handler =  instance(**_args)
        #
        # Implementing logging with the 'eat-your-own-dog-food' approach
        # Using dependency injection to set the logger (problem with imports)
        #
        # _handler.setLogger(get.logger())
        return _handler
    @staticmethod
    def logger ():
        if registry.has('logger') :
            _args = registry.get('logger')
            _args['context']  = 'write'
            return instance(**_args)
        return None
    @staticmethod
    def etl (**_args):
        if 'source' in _args and 'target' in _args :
            return IETL(**_args)
        else:
            raise Exception ("Malformed input found, object must have both 'source' and 'target' attributes")
--- a/transport/cloud/s3.py
+++ b/transport/cloud/s3.py
@ -3,10 +3,13 @@ Data Transport - 1.0
 Steve L. Nyemba, The Phi Technology LLC
 This file is a wrapper around s3 bucket provided by AWS for reading and writing content
 TODO:
 	- Address limitations that will properly read csv if it is stored with content type text/csv
 """
 from datetime import datetime
-import boto
+import boto3
-from boto.s3.connection import S3Connection, OrdinaryCallingFormat
+# from boto.s3.connection import S3Connection, OrdinaryCallingFormat
 import numpy as np
 import botocore
 from smart_open import smart_open
@ -14,6 +17,7 @@ import sys
 import json
 from io import StringIO
 import pandas as pd
 import json
 class s3 :
@ -29,46 +33,37 @@ class s3 :
 			@param filter		filename or filtering elements
 		"""
 		try:
-			self.s3 = S3Connection(args['access_key'],args['secret_key'],calling_format=OrdinaryCallingFormat())			
+			self._client = boto3.client('s3',aws_access_key_id=args['access_key'],aws_secret_access_key=args['secret_key'],region_name=args['region'])
-			self.bucket = self.s3.get_bucket(args['bucket'].strip(),validate=False) if 'bucket' in args else None
+			self._bucket_name = args['bucket']	
-			# self.path = args['path']
+			self._file_name = args['file']
-			self.filter = args['filter'] if 'filter' in args else None
+			self._region = args['region']
 			self.filename = args['file'] if 'file' in args else None
 			self.bucket_name = args['bucket'] if 'bucket' in args else None
 		except Exception as e :
 			self.s3 = None
 			self.bucket = None
 			print (e)
 			pass
 	def has(self,**_args):
 		_found = None
 		try:
 			if 'file' in _args and 'bucket' in _args:
 				_found = self.meta(**_args)
 			elif 'bucket' in _args and not 'file' in _args:
 				_found =  self._client.list_objects(Bucket=_args['bucket']) 
 			elif 'file' in _args and not 'bucket' in _args :
 				_found = self.meta(bucket=self._bucket_name,file = _args['file'])
 		except Exception as e:
 			_found = None
 			pass
 		return type(_found) == dict
 	def meta(self,**args):
 		"""
 		This function will return information either about the file in a given bucket
 		:name name of the bucket
 		"""
-		info = self.list(**args)
+		_bucket = self._bucket_name if 'bucket' not in args else args['bucket']
-		[item.open() for item in info]
+		_file =  self._file_name if 'file' not in args else args['file']
-		return [{"name":item.name,"size":item.size} for item in info]
+		_data = self._client.get_object(Bucket=_bucket,Key=_file)
-	def list(self,**args):
+		return _data['ResponseMetadata']
-		"""
+	def close(self):
-		This function will list the content of a bucket, the bucket must be provided by the name
+		self._client.close()	
 		:name	name of the bucket
 		"""
 		return list(self.s3.get_bucket(args['name']).list())
 	def buckets(self):
 		#
 		# This function will return all buckets, not sure why but it should be used cautiously 
 		# based on why the s3 infrastructure is used
 		#
 		return [item.name for item in self.s3.get_all_buckets()]
 		# def buckets(self):
 		pass
 		# """
 		# This function is a wrapper around the bucket list of buckets for s3
 		# """
 		# return self.s3.get_all_buckets()
 class Reader(s3) :
 	"""	
@ -77,51 +72,66 @@ class Reader(s3) :
 		- stream content	if file is Not None
 		@TODO: support read from all buckets, think about it
 	"""
-	def __init__(self,**args) :
+	def __init__(self,**_args) :
-			s3.__init__(self,**args)
+			super().__init__(**_args)
-	def files(self):
+	
-		r = []
+	def _stream(self,**_args):
 		try:
 			return [item.name for item in self.bucket if item.size > 0]
 		except Exception as e:
 			pass
 		return r
 	def stream(self,limit=-1):
 		"""
 			At this point we should stream a file from a given bucket
 		"""
-		key = self.bucket.get_key(self.filename.strip())
+		_object = self._client.get_object(Bucket=_args['bucket'],Key=_args['file'])
-		if key is None :
+		_stream = None
-			yield None
+		try:
 			_stream = _object['Body'].read()
 		except Exception as e:
 			pass
 		if not _stream :
 			return None
 		if _object['ContentType'] in ['text/csv'] :
 			return pd.read_csv(StringIO(str(_stream).replace("\\n","\n").replace("\\r","").replace("\'","")))
 		else:
-			count = 0
+			return _stream
-			with smart_open(key) as remote_file:
+		
 				for line in remote_file:
 					if count == limit and limit > 0 :
 						break
 				yield line
 				count += 1
 	def read(self,**args) :
-		if self.filename is None :
+		
-			# 
+		_name = self._file_name if 'file' not in args else args['file']
-		# returning the list of files because no one file was specified.
+		_bucket = args['bucket'] if 'bucket' in args else self._bucket_name
-			return self.files()
+		return self._stream(bucket=_bucket,file=_name)
-		else:
+		
 			limit = args['size'] if 'size' in args else -1
 			return self.stream(limit)
 class Writer(s3) :
 	"""
 	def __init__(self,**args) :
 		s3.__init__(self,**args)
 	def mkdir(self,name):
 	"""
-		This function will create a folder in a bucket
+	def __init__(self,**_args) :
 		super().__init__(**_args)
 		#
 		# 
 		if not self.has(bucket=self._bucket_name) :
 			self.make_bucket(self._bucket_name)
 	def make_bucket(self,bucket_name):
 		"""
 		This function will create a folder in a bucket,It is best that the bucket is organized as a namespace
 		:name name of the folder
 		"""
-		self.s3.put_object(Bucket=self.bucket_name,key=(name+'/'))
+		
-	def write(self,content):
+		self._client.create_bucket(Bucket=bucket_name,CreateBucketConfiguration={'LocationConstraint': self._region})
-		file = StringIO(content.decode("utf8"))
+	def write(self,_data,**_args):
-		self.s3.upload_fileobj(file,self.bucket_name,self.filename)
+		"""
 		This function will write the data to the s3 bucket, files can be either csv, or json formatted files
 		"""
 		content = 'text/plain'
 		if type(_data) == pd.DataFrame :
 			_stream = _data.to_csv(index=False)
 			content = 'text/csv'
 		elif type(_data) == dict :
 			_stream = json.dumps(_data)
 			content = 'application/json'
 		else:
 			_stream = _data
 		file = StringIO(_stream)
 		bucket = self._bucket_name if 'bucket' not in _args else _args['bucket']
 		file_name = self._file_name if 'file' not in _args else _args['file']
 		self._client.put_object(Bucket=bucket, Key = file_name, Body=_stream,ContentType=content)
 		pass
--- a/transport/duck.py
+++ b/transport/duck.py
@ -0,0 +1,19 @@
 """
 This file will be intended to handle duckdb database
 """
 import duckdb
 from transport.common import Reader,Writer
 class Duck(Reader):
    def __init__(self,**_args):
        super().__init__(**_args)
        self._path = None if 'path' not in _args else _args['path']
        self._handler = duckdb.connect() if not self._path else duckdb.connect(self._path)
 class DuckReader(Duck) :
    def __init__(self,**_args):
        super().__init__(**_args)
    def read(self,**_args) :
        pass
--- a/transport/iowrapper.py
+++ b/transport/iowrapper.py
@ -5,35 +5,28 @@ NOTE: Plugins are converted to a pipeline, so we apply a pipeline when reading o
        - upon initialization we will load plugins
        - on read/write we apply a pipeline (if passed as an argument)
 """    
-from transport.plugins import plugin, PluginLoader
+from transport.plugins import Plugin, PluginLoader
 import transport
 from transport import providers
 from multiprocessing import Process
 import time
 import plugin_ix 
 class IO:
    """
    Base wrapper class for read/write and support for logs
    """
-    def __init__(self,_agent,plugins):
+    def __init__(self,**_args):
        _agent  = _args['agent']
        plugins = _args['plugins'] if 'plugins' in _args else None
        self._agent = _agent
        # self._ixloader = plugin_ix.Loader () #-- must indicate where the plugin registry file is 
        self._ixloader = plugin_ix.Loader (registry=plugin_ix.Registry(folder=transport.registry.REGISTRY_PATH))
        if plugins :
-            self._init_plugins(plugins)
+            self.init_plugins(plugins)
        else:
            self._plugins = None
    def _init_plugins(self,_args):
        """
        This function will load pipelined functions as a plugin loader
        """
        if 'path' in _args and 'names' in _args :
            self._plugins = PluginLoader(**_args)
        else:
            self._plugins = PluginLoader()
            [self._plugins.set(_pointer) for _pointer in _args]
        #
        # @TODO: We should have a way to log what plugins are loaded and ready to use
    def meta (self,**_args):
        if hasattr(self._agent,'meta') :
            return self._agent.meta(**_args)
@ -42,40 +35,58 @@ class IO:
    def close(self):
        if hasattr(self._agent,'close') :
            self._agent.close()
-    def apply(self):
+    # def apply(self):
-        """
+    #     """
-        applying pre/post conditions given a pipeline expression
+    #     applying pre/post conditions given a pipeline expression
-        """
+    #     """
-        for _pointer in self._plugins :
+    #     for _pointer in self._plugins :
-            _data = _pointer(_data)
+    #         _data = _pointer(_data)
    def apply(self,_query):
        if hasattr(self._agent,'apply') :
            return self._agent.apply(_query)
        return None
    def submit(self,_query):
        return self.delegate('submit',_query)
    def delegate(self,_name,_query):
        if hasattr(self._agent,_name) :
            pointer = getattr(self._agent,_name)
            return pointer(_query)
        return None
    def init_plugins(self,plugins):
        for _ref in plugins :
            self._ixloader.set(_ref)
 class IReader(IO):
    """
    This is a wrapper for read functionalities
    """
-    def __init__(self,_agent,pipeline=None):
+    def __init__(self,**_args):
-        super().__init__(_agent,pipeline)
+        super().__init__(**_args)
    def read(self,**_args):
        if 'plugins' in _args :
-            self._init_plugins(_args['plugins'])
+            self.init_plugins(_args['plugins'])
        _data = self._agent.read(**_args)
-        if self._plugins and self._plugins.ratio() > 0 :
+        # if self._plugins and self._plugins.ratio() > 0 :
-            _data = self._plugins.apply(_data)
+        #     _data = self._plugins.apply(_data)
        #
        # output data 
        #
        # applying the the design pattern 
        _data = self._ixloader.visitor(_data)
        return _data
 class IWriter(IO):
-    def __init__(self,_agent,pipeline=None):
+    def __init__(self,**_args): #_agent,pipeline=None):
-        super().__init__(_agent,pipeline)  
+        super().__init__(**_args) #_agent,pipeline)  
    def write(self,_data,**_args):
        # if 'plugins' in _args :
        #     self._init_plugins(_args['plugins'])
        if 'plugins' in _args :
-            self._init_plugins(_args['plugins'])
+            self.init_plugins(_args['plugins'])
        if self._plugins and self._plugins.ratio() > 0 :
            _data = self._plugins.apply(_data)
        self._ixloader.visitor(_data)
        self._agent.write(_data,**_args)
 #
@ -87,7 +98,7 @@ class IETL(IReader) :
    This class performs an ETL operation by ineriting a read and adding writes as pipeline functions
    """
    def __init__(self,**_args):
-        super().__init__(transport.get.reader(**_args['source']))
+        super().__init__(agent=transport.get.reader(**_args['source']),plugins=None)
        if 'target' in _args:
            self._targets = _args['target'] if type(_args['target']) == list else [_args['target']]
        else:
@ -98,16 +109,23 @@ class IETL(IReader) :
        self._hasParentProcess = False if 'hasParentProcess' not in _args else _args['hasParentProcess']
    def read(self,**_args):
        _data = super().read(**_args)
-
+        _schema = super().meta()
        for _kwargs in self._targets :
            if _schema :
                _kwargs['schema'] = _schema
            self.post(_data,**_kwargs)
        return _data
    def run(self) :
        return self.read()
    def post (self,_data,**_args) :
        """
        This function returns an instance of a process that will perform the write operation
        :_args  parameters associated with writer object
        """
        writer = transport.get.writer(**_args)
        if 'schema' in _args :
            writer.write(_data,schema=_args['schema'])
        else:
            writer.write(_data)
        writer.close()
--- a/transport/nosql/mongodb.py
+++ b/transport/nosql/mongodb.py
@ -33,6 +33,8 @@ class Mongo :
            :password   password for current user
        """
        self.host = 'localhost' if 'host' not in args else args['host']
        if ':' not in self.host and 'port' in args :
            self.host = ':'.join([self.host,str(args['port'])])
        self.mechanism= 'SCRAM-SHA-256' if 'mechanism' not in args else args['mechanism']
        # authSource=(args['authSource'] if 'authSource' in args else self.dbname)
        self._lock = False if 'lock' not in args else args['lock']
--- a/transport/other/init.py
+++ b/transport/other/init.py
@ -1 +1 @@
-from . import files, http, rabbitmq, callback, files
+from . import files, http, rabbitmq, callback, files, console
--- a/transport/plugins/init.py
+++ b/transport/plugins/init.py
@ -11,8 +11,10 @@ import importlib as IL
 import importlib.util
 import sys
 import os
 import pandas as pd
 import time
-class plugin :
+class Plugin :
    """
    Implementing function decorator for data-transport plugins (post-pre)-processing
    """
@ -22,8 +24,9 @@ class plugin :
        :mode   restrict to reader/writer
        :about  tell what the function is about    
        """
-        self._name = _args['name']
+        self._name = _args['name'] if 'name' in _args else None
-        self._about = _args['about']
+        self._version = _args['version'] if 'version' in _args else '0.1'
        self._doc = _args['doc'] if 'doc' in _args else "N/A"
        self._mode = _args['mode'] if 'mode' in _args else 'rw'
    def __call__(self,pointer,**kwargs):
        def wrapper(_args,**kwargs):
@ -32,57 +35,67 @@ class plugin :
        # @TODO:
        # add attributes to the wrapper object
        #
        self._name = pointer.__name__ if not self._name else self._name
        setattr(wrapper,'transport',True)
        setattr(wrapper,'name',self._name)
-        setattr(wrapper,'mode',self._mode)
+        setattr(wrapper,'version',self._version)
-        setattr(wrapper,'about',self._about)
+        setattr(wrapper,'doc',self._doc)
        return wrapper
 class PluginLoader :
    """
    This class is intended to load a plugin and make it available and assess the quality of the developed plugin
    """
    def __init__(self,**_args):
        """
        :path   location of the plugin (should be a single file)
        :_names of functions to load
        """
-        _names = _args['names'] if 'names' in _args else None
+        # _names = _args['names'] if 'names' in _args else None
-        path = _args['path'] if 'path' in _args else None
+        # path = _args['path'] if 'path' in _args else None
-        self._names = _names if type(_names) == list else [_names]
+        # self._names = _names if type(_names) == list else [_names]
        self._modules = {}
        self._names = []
-        if path and os.path.exists(path) and _names:
+        self._registry = _args['registry']
            for _name in self._names :
-                spec = importlib.util.spec_from_file_location('private', path)
+        pass
    def load (self,**_args):
        """
        This function loads a plugin
        """
        self._modules = {}
        self._names = []
        path = _args ['path']
        if os.path.exists(path) :
            _alias = path.split(os.sep)[-1]
            spec = importlib.util.spec_from_file_location(_alias, path)
            module = importlib.util.module_from_spec(spec)
            spec.loader.exec_module(module) #--loads it into sys.modules
-                if hasattr(module,_name) :
+            for _name in dir(module) :
                if self.isplugin(module,_name) :
-                        self._modules[_name] = getattr(module,_name)
+                    self._module[_name] = getattr(module,_name)
-                    else:
+                    # self._names [_name]
-                        print ([f'Found {_name}', 'not plugin'])
+    def format (self,**_args):
-                else:
+        uri = _args['alias'],_args['name']
-                    #
+    # def set(self,_pointer) :
-                    # @TODO: We should log this somewhere some how
+    def set(self,_key) :
                    print (['skipping ',_name, hasattr(module,_name)])
                    pass
        else:
            #
            # Initialization is empty
            self._names = []
        pass
    def set(self,_pointer) :
        """
        This function will set a pointer to the list of modules to be called
        This should be used within the context of using the framework as a library
        """
-        _name = _pointer.__name__
+        if type(_key).__name__ == 'function':
            #
            # The pointer is in the code provided by the user and loaded in memory
            #
            _pointer = _key
            _key = 'inline@'+_key.__name__
            # self._names.append(_key.__name__)
        else:
            _pointer = self._registry.get(key=_key)
        if _pointer  :
            self._modules[_key] = _pointer
            self._names.append(_key)
        self._modules[_name] = _pointer
        self._names.append(_name)
    def isplugin(self,module,name):
        """
        This function determines if a module is a recognized plugin
@ -107,12 +120,31 @@ class PluginLoader :
        _n = len(self._names)
        return len(set(self._modules.keys()) & set (self._names)) / _n
-    def apply(self,_data):
+    def apply(self,_data,_logger=[]):
        _input= {}
        for _name in self._modules :
            try:
                _input = {'action':'plugin','object':_name,'input':{'status':'PASS'}}
                _pointer = self._modules[_name]
                if type(_data) == list :
                    _data = pd.DataFrame(_data)
                _brow,_bcol = list(_data.shape) 
                #
                # @TODO: add exception handling
                _data = _pointer(_data)
                _input['input']['shape'] = {'rows-dropped':_brow - _data.shape[0]}
            except Exception as e:
                _input['input']['status'] = 'FAILED'
                print (e)
            time.sleep(1)
            if _logger:
                try:
                    _logger(**_input)
                except Exception as e:
                    pass    
        return _data
    # def apply(self,_data,_name):
    #     """
--- a/transport/providers/init.py
+++ b/transport/providers/init.py
@ -10,8 +10,11 @@ HTTP='http'
 BIGQUERY	='bigquery'
 FILE 	= 'file'
 ETL = 'etl'
-SQLITE = 'sqlite'
+
 SQLITE = 'sqlite3'
 SQLITE3= 'sqlite3'
 DUCKDB = 'duckdb'
 REDSHIFT = 'redshift'
 NETEZZA = 'netezza'
 MYSQL = 'mysql'
@ -41,6 +44,9 @@ PGSQL	= POSTGRESQL
 AWS_S3  = 's3'
 RABBIT = RABBITMQ
-
+ICEBERG='iceberg'
 APACHE_ICEBERG = 'iceberg'
 DRILL = 'drill'
 APACHE_DRILL = 'drill'
 # QLISTENER = 'qlistener'
--- a/transport/registry.py
+++ b/transport/registry.py
@ -3,46 +3,59 @@ import json
 from info import __version__
 import copy
 import transport
 import importlib
 import importlib.util
 import shutil
 from io import StringIO
 """
 This class manages data from the registry and allows (read only)
@TODO: add property to the DATA attribute
 """
 if 'HOME' in os.environ :
    REGISTRY_PATH=os.sep.join([os.environ['HOME'],'.data-transport'])
 else:
    REGISTRY_PATH=os.sep.join([os.environ['USERPROFILE'],'.data-transport'])
-REGISTRY_PATH=os.sep.join([os.environ['HOME'],'.data-transport'])
+#
 # This path can be overriden by an environment variable ...
 #
 if 'DATA_TRANSPORT_REGISTRY_PATH' in os.environ :
    REGISTRY_PATH = os.environ['DATA_TRANSPORT_REGISTRY_PATH']
 REGISTRY_FILE= 'transport-registry.json'
 DATA = {}
 def isloaded ():
    return DATA not in [{},None]
-def exists (path=REGISTRY_PATH) :
+def exists (path=REGISTRY_PATH,_file=REGISTRY_FILE) :
    """
    This function determines if there is a registry at all
    """
    p = os.path.exists(path)
-    q = os.path.exists( os.sep.join([path,REGISTRY_FILE]))
+    q = os.path.exists( os.sep.join([path,_file]))
    return p and q
-def load (_path=REGISTRY_PATH):
+def load (_path=REGISTRY_PATH,_file=REGISTRY_FILE):
    global DATA
    if exists(_path) :
-        path = os.sep.join([_path,REGISTRY_FILE])
+        path = os.sep.join([_path,_file])
        f = open(path)
        DATA = json.loads(f.read())
        f.close()
-def init (email,path=REGISTRY_PATH,override=False):
+def init (email,path=REGISTRY_PATH,override=False,_file=REGISTRY_FILE):
    """
    Initializing the registry and will raise an exception in the advent of an issue
    """
    p = '@' in email
-    q = False if '.' not in email else email.split('.')[-1] in ['edu','com','io','ai']
+    #q = False if '.' not in email else email.split('.')[-1] in ['edu','com','io','ai','org']
    q = len(email.split('.')[-1]) in [2,3]
    if p and q :
        _config = {"email":email,'version':__version__}
        if not os.path.exists(path):
            os.makedirs(path)
-        filename = os.sep.join([path,REGISTRY_FILE])
+        filename = os.sep.join([path,_file])
        if not os.path.exists(filename) or override == True :
            f = open(filename,'w')
@ -57,6 +70,8 @@ def init (email,path=REGISTRY_PATH,override=False):
 def lookup (label):
    global DATA
    return label in DATA
 has = lookup 
 def get (label='default') :
    global DATA
    return copy.copy(DATA[label]) if label in DATA else {}
@ -68,8 +83,11 @@ def set (label, auth_file, default=False,path=REGISTRY_PATH) :
    if label == 'default' :
        raise Exception ("""Invalid label name provided, please change the label name and use the switch""")
    reg_file = os.sep.join([path,REGISTRY_FILE])
-    if os.path.exists (auth_file) and os.path.exists(path) and os.path.exists(reg_file):
+    if os.path.exists(path) and os.path.exists(reg_file):
        if type(auth_file) == str and os.path.exists (auth_file) :
            f = open(auth_file)
        elif type(auth_file) == StringIO:
            f = auth_file
        _info = json.loads(f.read())
        f.close()
        f = open(reg_file)
--- a/transport/sql/init.py
+++ b/transport/sql/init.py
@ -3,7 +3,7 @@ This namespace/package wrap the sql functionalities for a certain data-stores
    - netezza, postgresql, mysql and sqlite
    - mariadb, redshift (also included)
 """
-from . import postgresql, mysql, netezza, sqlite, sqlserver
+from . import postgresql, mysql, netezza, sqlite, sqlserver, duckdb
 #
--- a/transport/sql/common.py
+++ b/transport/sql/common.py
@ -3,6 +3,8 @@ This file encapsulates common operations associated with SQL databases via SQLAl
 """
 import sqlalchemy as sqa
 from sqlalchemy import text , MetaData, inspect
 import pandas as pd
 class Base:
@ -11,7 +13,13 @@ class Base:
        self._port = None
        self._database = _args['database']
        self._table = _args['table'] if 'table' in _args else None
-        self._engine= sqa.create_engine(self._get_uri(**_args),future=True)
+        _uri = self._get_uri(**_args)
        if type(_uri) == str :
            self._engine= sqa.create_engine(_uri,future=True)
        else:
            _uri,_kwargs = _uri
            self._engine= sqa.create_engine(_uri,**_kwargs,future=True)
    def _set_uri(self,**_args) :
        """
        :provider   provider
@ -32,21 +40,33 @@ class Base:
        :table  optional name of the table (can be fully qualified)
        """
        _table = self._table if 'table' not in _args else _args['table']
        _map = {'TINYINT':'INTEGER','BIGINT':'INTEGER','TEXT':'STRING','DOUBLE_PRECISION':'FLOAT','NUMERIC':'FLOAT','DECIMAL':'FLOAT','REAL':'FLOAT'}
        _schema = []
-        if _table :
+        # if _table :
-            if sqa.__version__.startswith('1.') :
+        #     if sqa.__version__.startswith('1.') :
-                _handler = sqa.MetaData(bind=self._engine)
+        #         _handler = sqa.MetaData(bind=self._engine)
-                _handler.reflect()
+        #         _handler.reflect()
-            else:
+        #     else:
-                #
+        #         #
-                # sqlalchemy's version 2.+
+        #         # sqlalchemy's version 2.+
-                _handler = sqa.MetaData()
+        #         _handler = sqa.MetaData()
-                _handler.reflect(bind=self._engine)
+        #         _handler.reflect(bind=self._engine)
        #     #
        #     # Let us extract the schema with the native types
        #     _map = {'BIGINT':'INTEGER','TEXT':'STRING','DOUBLE_PRECISION':'FLOAT','NUMERIC':'FLOAT','DECIMAL':'FLOAT','REAL':'FLOAT'}
        #     _schema = [{"name":_attr.name,"type":_map.get(str(_attr.type),str(_attr.type))} for _attr in _handler.tables[_table].columns]
        #
-            # Let us extract the schema with the native types
+        try:
-            _map = {'BIGINT':'INTEGER','TEXT':'STRING','DOUBLE_PRECISION':'FLOAT','NUMERIC':'FLOAT','DECIMAL':'FLOAT','REAL':'FLOAT'}
+            if _table :
-            _schema = [{"name":_attr.name,"type":_map.get(str(_attr.type),str(_attr.type))} for _attr in _handler.tables[_table].columns]
+                _inspector = inspect(self._engine)
                _columns = _inspector.get_columns(_table)
                _schema = [{'name':column['name'],'type':_map.get(str(column['type']),str(column['type'])) } for column in _columns]
                return _schema
        except Exception as e:
            pass
        # else:
        return []
    def  has(self,**_args):
        return self.meta(**_args)
    def apply(self,sql):
@ -56,11 +76,20 @@ class Base:
        @TODO: Execution of stored procedures
        """
-        return pd.read_sql(sql,self._engine) if sql.lower().startswith('select') or sql.lower().startswith('with') else None
+        if sql.strip().lower().startswith('select') or sql.strip().lower().startswith('with') or sql.strip().startswith('show'):
            return pd.read_sql(sql,self._engine) 
        else:
            _handler = self._engine.connect()
            _handler.execute(text(sql))
            _handler.commit ()
            _handler.close()
        return None
 class SQLBase(Base):
    def __init__(self,**_args):
        super().__init__(**_args)
        self._schema = _args.get('schema',None)
    def get_provider(self):
        raise Exception ("Provider Needs to be set ...")
    def get_default_port(self) :
@ -84,7 +113,11 @@ class SQLBase(Base):
        # _uri = [_item.strip() for _item in _uri if _item.strip()]
        # return '/'.join(_uri)
        return f'{_provider}://{_host}/{_database}' if _account == '' else f'{_provider}://{_account}{_host}/{_database}'
-
+    def close(self,) :
        try:
            self._engine.dispose()
        except :
            pass
 class BaseReader(SQLBase):
    def __init__(self,**_args):
        super().__init__(**_args)    
@ -96,6 +129,8 @@ class BaseReader(SQLBase):
            sql = _args['sql']
        else:
            _table = _args['table'] if 'table' in _args else self._table
            if self._schema and type(self._schema) == str :
                _table = f'{self._schema}.{_table}'
            sql = f'SELECT * FROM {_table}'
        return self.apply(sql)
@ -106,9 +141,11 @@ class BaseWriter (SQLBase):
    """
    def __init__(self,**_args):
        super().__init__(**_args)
    def write(self,_data,**_args):
        if type(_data) == dict :
-            _df = pd.DataFrame(_data)
+            _df = pd.DataFrame([_data])
        elif type(_data) == list :
            _df = pd.DataFrame(_data)
        else:
@ -125,5 +162,8 @@ class BaseWriter (SQLBase):
        #     _mode['schema'] = _args['schema']
        # if 'if_exists' in _args :
        #     _mode['if_exists'] = _args['if_exists']
-
+        if 'schema' in _args and type(_args['schema']) == str:
            self._schema = _args.get('schema',None)
        if self._schema :
           _mode['schema'] = self._schema
        _df.to_sql(_table,self._engine,**_mode)
--- a/transport/sql/duckdb.py
+++ b/transport/sql/duckdb.py
@ -0,0 +1,26 @@
 """
 This module implements the handler for duckdb (in memory or not)
 """
 from transport.sql.common import Base, BaseReader, BaseWriter
 class Duck :
    def __init__(self,**_args):
        #
        # duckdb with none as database will operate as an in-memory database
        #
        self.database = _args['database'] if 'database' in _args else ''
    def get_provider(self):
        return "duckdb"
    def _get_uri(self,**_args):
        return f"""duckdb:///{self.database}"""
 class Reader(Duck,BaseReader) :
    def __init__(self,**_args):        
        Duck.__init__(self,**_args)
        BaseReader.__init__(self,**_args)
    def _get_uri(self,**_args):
        return super()._get_uri(**_args),{'connect_args':{'read_only':True}}
 class Writer(Duck,BaseWriter):
    def __init__(self,**_args):
        Duck.__init__(self,**_args)
        BaseWriter.__init__(self,**_args)
--- a/transport/warehouse/init.py
+++ b/transport/warehouse/init.py
@ -0,0 +1,7 @@
 """
 This namespace/package is intended to handle read/writes against data warehouse solutions like :
    - apache iceberg
    - clickhouse (...)
 """
 from . import iceberg, drill
--- a/transport/warehouse/drill.py
+++ b/transport/warehouse/drill.py
@ -0,0 +1,55 @@
 import sqlalchemy
 import pandas as pd
 from .. sql.common import BaseReader , BaseWriter
 import sqlalchemy as sqa
 class Drill :
    __template = {'host':None,'port':None,'ssl':None,'table':None,'database':None}
    def __init__(self,**_args):
        self._host = _args['host'] if 'host' in _args else 'localhost'
        self._port = _args['port'] if 'port' in _args else self.get_default_port()
        self._ssl = False if 'ssl' not in _args else _args['ssl']
        self._table = _args['table'] if 'table' in _args else None
        if self._table and '.' in self._table :
            _seg = self._table.split('.')
            if len(_seg) > 2 :
                self._schema,self._database = _seg[:2]
        else:
            self._database=_args['database']
            self._schema = self._database.split('.')[0]
    def _get_uri(self,**_args):
        return f'drill+sadrill://{self._host}:{self._port}/{self._database}?use_ssl={self._ssl}'
    def get_provider(self):
        return "drill+sadrill"
    def get_default_port(self):
        return "8047"
    def meta(self,**_args):
        _table = _args['table'] if 'table' in _args else self._table
        if '.' in _table :
            _schema = _table.split('.')[:2]
            _schema = '.'.join(_schema)
            _table = _table.split('.')[-1]
        else:
            _schema = self._schema
        # _sql = f"select COLUMN_NAME AS name, CASE WHEN DATA_TYPE ='CHARACTER VARYING' THEN 'CHAR ( 125 )' ELSE DATA_TYPE END AS type from INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA='{_schema}' and TABLE_NAME='{_table}'"
        _sql = f"select COLUMN_NAME AS name, CASE WHEN DATA_TYPE ='CHARACTER VARYING' THEN 'CHAR ( '||COLUMN_SIZE||' )' ELSE DATA_TYPE END AS type from INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA='{_schema}' and TABLE_NAME='{_table}'"
        try:
            _df  = pd.read_sql(_sql,self._engine)
            return _df.to_dict(orient='records')
        except Exception as e:
            print (e)
            pass
        return []
 class Reader (Drill,BaseReader) :
    def __init__(self,**_args):
        super().__init__(**_args)
        self._chunksize = 0 if 'chunksize' not in _args else _args['chunksize']
        self._engine= sqa.create_engine(self._get_uri(),future=True)
 class Writer(Drill,BaseWriter):
    def __init__(self,**_args):
        super().__init__(self,**_args)
--- a/transport/warehouse/iceberg.py
+++ b/transport/warehouse/iceberg.py
@ -0,0 +1,151 @@
 """
 dependency:
    - spark and SPARK_HOME environment variable must be set
 NOTE:
    When using streaming option, insure that it is inline with default (1000 rows) or increase it in spark-defaults.conf
 """
 from pyspark.sql import SparkSession
 from pyspark import SparkContext
 from pyspark.sql.types import *
 from pyspark.sql.functions import col, to_date, to_timestamp
 import copy
 class Iceberg :
    def __init__(self,**_args):
        """
        providing catalog meta information (you must get this from apache iceberg)
        """
        #
        # Turning off logging (it's annoying & un-professional)
        #
        # _spconf = SparkContext()
        # _spconf.setLogLevel("ERROR")
        #
        # @TODO:
        #   Make arrangements for additional configuration elements 
        #
        self._session = SparkSession.builder.appName("data-transport").getOrCreate()
        self._session.conf.set("spark.sql.parquet.outputTimestampType", "TIMESTAMP_MICROS")
        # self._session.sparkContext.setLogLevel("ERROR")
        self._catalog = self._session.catalog
        self._table = _args['table'] if 'table' in _args else None
        if 'catalog' in _args :
            #
            # Let us set the default catalog
            self._catalog.setCurrentCatalog(_args['catalog'])
        else:
            # No current catalog has been set ...
            pass
        if 'database' in _args :
            self._database = _args['database']
            self._catalog.setCurrentDatabase(self._database)
        else:
            #
            # Should we set the default as the first one if available ?
            #
            pass
        self._catalogName = self._catalog.currentCatalog()
        self._databaseName = self._catalog.currentDatabase()
    def meta (self,**_args) :
        """
        This function should return the schema of a table (only)
        """
        _schema = []
        try:
            _table = _args['table'] if 'table' in _args else self._table
            _tableName = self._getPrefix(**_args) + f".{_table}"
            _tmp = self._session.table(_tableName).schema
            _schema = _tmp.jsonValue()['fields']
            for _item in _schema :
                del _item['nullable'],_item['metadata']
        except Exception as e:
            pass
        return _schema
    def _getPrefix (self,**_args):        
        _catName = self._catalogName if 'catalog' not in _args else _args['catalog']
        _datName = self._databaseName if 'database' not in _args else _args['database']
        return '.'.join([_catName,_datName])
    def apply(self,_query):
        """
        sql query/command to run against apache iceberg
        """
        return self._session.sql(_query).toPandas()
    def has (self,**_args):
        try:
            _prefix = self._getPrefix(**_args)
            if _prefix.endswith('.') :
                return False
            return _args['table'] in [_item.name for _item in self._catalog.listTables(_prefix)]
        except Exception as e:
            print (e)
            return False
    def close(self):
        self._session.stop()
 class Reader(Iceberg) :
    def __init__(self,**_args):
        super().__init__(**_args)
    def read(self,**_args):
        _table = self._table
        _prefix = self._getPrefix(**_args)        
        if 'table' in _args or _table:
            _table = _args['table'] if 'table' in _args else _table
            _table = _prefix + f'.{_table}'
            return self._session.table(_table).toPandas()
        else:
            sql = _args['sql']
            return self._session.sql(sql).toPandas()
        pass
 class Writer (Iceberg):
    """
    Writing data to an Apache Iceberg data warehouse (using pyspark)
    """
    def __init__(self,**_args):
        super().__init__(**_args)
        self._mode = 'append' if 'mode' not in _args else _args['mode']
        self._table = None if 'table' not in _args else _args['table']
    def format (self,_schema) :
        _iceSchema = StructType([])
        _map = {'integer':IntegerType(),'float':DoubleType(),'double':DoubleType(),'date':DateType(),
                'timestamp':TimestampType(),'datetime':TimestampType(),'string':StringType(),'varchar':StringType()}
        for _item in _schema :
            _name = _item['name']
            _type = _item['type'].lower()
            if _type not in _map :
                _iceType = StringType()
            else:
                _iceType = _map[_type]
            _iceSchema.add (StructField(_name,_iceType,True))
        return _iceSchema if len(_iceSchema) else []
    def write(self,_data,**_args):
        _prefix = self._getPrefix(**_args)
        if 'table' not in _args and not self._table :
            raise Exception (f"Table Name should be specified for catalog/database {_prefix}")
        _schema = self.format(_args['schema']) if 'schema' in _args else []
        if not _schema :
            rdd = self._session.createDataFrame(_data,verifySchema=False)
        else :
            rdd = self._session.createDataFrame(_data,schema=_schema,verifySchema=True)
        _mode = self._mode if 'mode' not in _args else _args['mode']
        _table = self._table if 'table' not in _args else _args['table']
        # print (_data.shape,_mode,_table)
        if not self._session.catalog.tableExists(_table):
        #     # @TODO:
        #     # add partitioning information here 
            rdd.writeTo(_table).using('iceberg').create()
        # #     _mode = 'overwrite'
        # #     rdd.write.format('iceberg').mode(_mode).saveAsTable(_table)
        else:
            # rdd.writeTo(_table).append()
        # #     _table = f'{_prefix}.{_table}'
            rdd.coalesce(10).write.format('iceberg').mode('append').save(_table)
Author	SHA1	Message	Date
Steve L. Nyemba	56e10454ed	Merge pull request 'v2.2.0' (#35 ) from v2.2.0 into master Reviewed-on: #35	4 months ago
Steve L. Nyemba	2ffb775c3d	Merge branch 'master' into v2.2.0	4 months ago
Steve Nyemba	89d762f39a	bug fixes: conditional imports	4 months ago
Steve Nyemba	6e753a1fcd	bug fixes	4 months ago
Steve Nyemba	18c54d7664	bug fixes	4 months ago
Steve Nyemba	f06d26f9b6	bug fixes:installer & imports	4 months ago
Steve L. Nyemba	8fdcbce42d	Merge pull request 'v2.2.0' (#34 ) from v2.2.0 into master Reviewed-on: #34	4 months ago
Steve Nyemba	be10ae17d7	bug fixes: installer & registry	4 months ago
Steve Nyemba	befdf453f5	bug fix: crash with etl & process	4 months ago
Steve L. Nyemba	b461ce9d7b	Merge pull request 'v2.2.0' (#33 ) from v2.2.0 into master Reviewed-on: #33	4 months ago
Steve Nyemba	fbdb4a4931	bug fix: registry and emails	4 months ago
Steve Nyemba	6e1c420952	project file specification	4 months ago
Steve Nyemba	66d881fdda	upgrade pyproject.toml, bug fix with registry	4 months ago
Steve L. Nyemba	6c26588462	Merge pull request 'v2.2.0' (#32 ) from v2.2.0 into master Reviewed-on: #32	4 months ago
Steve Nyemba	de4e065ca6	bug fix with newer setuptools	4 months ago
Steve Nyemba	e035f5eba0	windows bug fix, environment variable	4 months ago
Steve Nyemba	6f8019f582	bug fix	5 months ago
Steve L. Nyemba	d3517a5720	Merge pull request 'bug fix: logger issue' (#31 ) from v2.2.0 into master Reviewed-on: #31	5 months ago
Steve Nyemba	b0cd0b85dc	bug fix: logger issue	6 months ago
Steve L. Nyemba	4c98e81c14	Merge pull request 'v2.2.0: bug fixes' (#30 ) from v2.2.0 into master Reviewed-on: library/data-transport#30	7 months ago
Steve Nyemba	4b34c746ae	bug fix: missing table	7 months ago
Steve Nyemba	0977ad1b18	setup fixes	8 months ago
Steve Nyemba	98ef8a848e	bug fixes and dependencies	8 months ago
Steve Nyemba	469c6f89a2	fixes with plugin handler	8 months ago
Steve Nyemba	dd10f6db78	bug fix: version & cli	8 months ago
Steve Nyemba	dad2956a8c	version update	8 months ago
Steve Nyemba	eaa2b99a2d	bug fix: schema (postgresql) construct	8 months ago
Steve Nyemba	a1b5f2743c	bug fixes ...	9 months ago
Steve Nyemba	afa442ea8d	versioning update edition	9 months ago
Steve Nyemba	30645e46bd	bug fix: readonly for duckdb	9 months ago
Steve Nyemba	cdf783143e	...	9 months ago
Steve Nyemba	1a8112f152	adding iceberg notebook	9 months ago
Steve Nyemba	49ebd4a432	bug fix: close & etl	9 months ago
Steve Nyemba	c3627586b3	fix: refactor cli switches	10 months ago
Steve Nyemba	2a72de4cd6	bug fixes: registry and handling cli parameters as well as adding warehousing	10 months ago
Steve Nyemba	d0e655e7e3	update, community edition baseline	1 year ago
Steve L. Nyemba	492dc8f374	Merge pull request 'new provider console and bug fixes with applied commands' (#25 ) from v2.2.0 into master Reviewed-on: library/data-transport#25	1 year ago
Steve Nyemba	2df926da12	new provider console and bug fixes with applied commands	1 year ago
Steve L. Nyemba	e848367378	Merge pull request 'bug fix, duckdb in-memory handling' (#24 ) from v2.2.0 into master Reviewed-on: library/data-transport#24	1 year ago
Steve Nyemba	e9aab3b034	bug fix, duckdb in-memory handling	1 year ago
Steve L. Nyemba	c872ba8cc2	Merge pull request 'v2.2.0 - Bug fixes with mongodb, console' (#23 ) from v2.2.0 into master Reviewed-on: library/data-transport#23	1 year ago
Steve Nyemba	34db729ad4	bug fixes: mongodb console	1 year ago
Steve Nyemba	a7c72391e8	s3 notebook - code as documentation	1 year ago
Steve L. Nyemba	baa8164f16	Merge pull request 'aws s3 notebook, brief example' (#22 ) from v2.2.0 into master Reviewed-on: library/data-transport#22	1 year ago
Steve Nyemba	955369fdd8	aws s3 notebook, brief example	1 year ago
Steve L. Nyemba	31556ebd32	Merge pull request 'v2.2.0 bug fix - AWS-S3' (#21 ) from v2.2.0 into master Reviewed-on: library/data-transport#21	1 year ago
Steve Nyemba	63666e95ce	bug fix, TODO: figure out how to parse types	1 year ago
Steve Nyemba	9dba5daecd	bug fix, TODO: figure out how to parse types	1 year ago
Steve Nyemba	40f9c3930a	bug fixes, using boto3 instead of boto for s3 support	1 year ago
Steve L. Nyemba	1e7839198a	Merge pull request 'v2.2.0 - shared environment support and duckdb support' (#20 ) from v2.2.0 into master Reviewed-on: library/data-transport#20	1 year ago
Steve Nyemba	3faee02fa2	documentation ...	1 year ago
Steve Nyemba	6f6fd48982	bug fixes: environment variable usage	1 year ago
Steve Nyemba	808378afdb	bug fix: delegate (new feature)	1 year ago
Steve Nyemba	2edce85aed	documentation duckdb support	1 year ago
Steve Nyemba	235a44be66	bug fix: registry and parameter handling	1 year ago
Steve Nyemba	037019c1d7	bug fix	1 year ago
Steve Nyemba	c443c6c953	duckdb support	1 year ago
Steve Nyemba	dde4767e37	new version	1 year ago
Steve L. Nyemba	dce50a967e	Merge pull request 'documentation ...' (#19 ) from v2.0.4 into master Reviewed-on: library/data-transport#19	1 year ago
Steve L. Nyemba	5ccb073865	Merge pull request 'refactor: etl,better reusability & streamlined and threaded' (#18 ) from v2.0.4 into master Reviewed-on: library/data-transport#18	1 year ago
Steve L. Nyemba	3081fb98e7	Merge pull request 'version 2.0 - Refactored, Plugins support' (#17 ) from v2.0 into master Reviewed-on: library/data-transport#17	2 years ago
Steve L. Nyemba	58959359ad	Merge pull request 'bug fix: psycopg2 with numpy' (#14 ) from dev into master Reviewed-on: library/data-transport#14	2 years ago
Steve L. Nyemba	68b8f6af5f	Merge pull request 'fixes 2024 pandas-gbq and sqlalchemy' (#10 ) from dev into master Reviewed-on: library/data-transport#10	2 years ago
`@ -1 +1 @@`
	`from . import files, http, rabbitmq, callback, files`	`from . import files, http, rabbitmq, callback, files, console`