parent
							
								
									34db729ad4
								
							
						
					
					
						commit
						e9aab3b034
					
				@ -0,0 +1,149 @@
 | 
				
			|||||||
 | 
					{
 | 
				
			||||||
 | 
					 "cells": [
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					   "cell_type": "markdown",
 | 
				
			||||||
 | 
					   "metadata": {},
 | 
				
			||||||
 | 
					   "source": [
 | 
				
			||||||
 | 
					    "#### Writing data-transport plugins\n",
 | 
				
			||||||
 | 
					    "\n",
 | 
				
			||||||
 | 
					    "The data-transport plugins are designed to automate pre/post processing i.e\n",
 | 
				
			||||||
 | 
					    "\n",
 | 
				
			||||||
 | 
					    "    - Read -> Post processing\n",
 | 
				
			||||||
 | 
					    "    - Write-> Pre processing\n",
 | 
				
			||||||
 | 
					    "    \n",
 | 
				
			||||||
 | 
					    "In this example we will assume, data and write both pre/post processing to any supported infrastructure. We will equally show how to specify the plugins within a configuration file"
 | 
				
			||||||
 | 
					   ]
 | 
				
			||||||
 | 
					  },
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					   "cell_type": "code",
 | 
				
			||||||
 | 
					   "execution_count": 1,
 | 
				
			||||||
 | 
					   "metadata": {},
 | 
				
			||||||
 | 
					   "outputs": [],
 | 
				
			||||||
 | 
					   "source": [
 | 
				
			||||||
 | 
					    "#\n",
 | 
				
			||||||
 | 
					    "# Writing to Google Bigquery database\n",
 | 
				
			||||||
 | 
					    "#\n",
 | 
				
			||||||
 | 
					    "import transport\n",
 | 
				
			||||||
 | 
					    "from transport import providers\n",
 | 
				
			||||||
 | 
					    "import pandas as pd\n",
 | 
				
			||||||
 | 
					    "import os\n",
 | 
				
			||||||
 | 
					    "import shutil\n",
 | 
				
			||||||
 | 
					    "#\n",
 | 
				
			||||||
 | 
					    "#\n",
 | 
				
			||||||
 | 
					    "\n",
 | 
				
			||||||
 | 
					    "DATABASE = '/home/steve/tmp/demo.db3'\n",
 | 
				
			||||||
 | 
					    "if os.path.exists(DATABASE) :\n",
 | 
				
			||||||
 | 
					    "    os.remove(DATABASE)\n",
 | 
				
			||||||
 | 
					    "#\n",
 | 
				
			||||||
 | 
					    "#    \n",
 | 
				
			||||||
 | 
					    "_data = pd.DataFrame({\"name\":['James Bond','Steve Rogers','Steve Nyemba'],'age':[55,150,44]})\n",
 | 
				
			||||||
 | 
					    "litew = transport.get.writer(provider=providers.SQLITE,database=DATABASE)\n",
 | 
				
			||||||
 | 
					    "litew.write(_data,table='friends')"
 | 
				
			||||||
 | 
					   ]
 | 
				
			||||||
 | 
					  },
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					   "cell_type": "markdown",
 | 
				
			||||||
 | 
					   "metadata": {},
 | 
				
			||||||
 | 
					   "source": [
 | 
				
			||||||
 | 
					    "#### Reading from SQLite\n",
 | 
				
			||||||
 | 
					    "\n",
 | 
				
			||||||
 | 
					    "The cell below reads the data that has been written by the cell above and computes the average age from a plugin function we will write. \n",
 | 
				
			||||||
 | 
					    "\n",
 | 
				
			||||||
 | 
					    "- Basic read of the designated table (friends) created above\n",
 | 
				
			||||||
 | 
					    "- Read with pipeline functions defined in code\n",
 | 
				
			||||||
 | 
					    "\n",
 | 
				
			||||||
 | 
					    "**NOTE**\n",
 | 
				
			||||||
 | 
					    "\n",
 | 
				
			||||||
 | 
					    "It is possible to use **transport.factory.instance** or **transport.instance** or **transport.get.<[reader|writer]>** they are the same. It allows the maintainers to know that we used a factory design pattern."
 | 
				
			||||||
 | 
					   ]
 | 
				
			||||||
 | 
					  },
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					   "cell_type": "code",
 | 
				
			||||||
 | 
					   "execution_count": 4,
 | 
				
			||||||
 | 
					   "metadata": {},
 | 
				
			||||||
 | 
					   "outputs": [
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					     "name": "stdout",
 | 
				
			||||||
 | 
					     "output_type": "stream",
 | 
				
			||||||
 | 
					     "text": [
 | 
				
			||||||
 | 
					      "           name  age\n",
 | 
				
			||||||
 | 
					      "0    James Bond   55\n",
 | 
				
			||||||
 | 
					      "1  Steve Rogers  150\n",
 | 
				
			||||||
 | 
					      "2  Steve Nyemba   44\n",
 | 
				
			||||||
 | 
					      "\n",
 | 
				
			||||||
 | 
					      "\n",
 | 
				
			||||||
 | 
					      "           name   age  autoinc\n",
 | 
				
			||||||
 | 
					      "0    James Bond   5.5        0\n",
 | 
				
			||||||
 | 
					      "1  Steve Rogers  15.0        1\n",
 | 
				
			||||||
 | 
					      "2  Steve Nyemba   4.4        2\n"
 | 
				
			||||||
 | 
					     ]
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					   ],
 | 
				
			||||||
 | 
					   "source": [
 | 
				
			||||||
 | 
					    "\n",
 | 
				
			||||||
 | 
					    "import transport\n",
 | 
				
			||||||
 | 
					    "from transport import providers\n",
 | 
				
			||||||
 | 
					    "import os\n",
 | 
				
			||||||
 | 
					    "import numpy as np\n",
 | 
				
			||||||
 | 
					    "def _autoincrement (_data,**kwargs) :\n",
 | 
				
			||||||
 | 
					    "    \"\"\"\n",
 | 
				
			||||||
 | 
					    "    This function will add an autoincrement field to the table\n",
 | 
				
			||||||
 | 
					    "    \"\"\"\n",
 | 
				
			||||||
 | 
					    "    _data['autoinc'] = np.arange(_data.shape[0])\n",
 | 
				
			||||||
 | 
					    "    \n",
 | 
				
			||||||
 | 
					    "    return _data\n",
 | 
				
			||||||
 | 
					    "def reduce(_data,**_args) :\n",
 | 
				
			||||||
 | 
					    "    \"\"\"\n",
 | 
				
			||||||
 | 
					    "    This function will reduce the age of the data frame\n",
 | 
				
			||||||
 | 
					    "    \"\"\"\n",
 | 
				
			||||||
 | 
					    "    _data.age /= 10\n",
 | 
				
			||||||
 | 
					    "    return _data\n",
 | 
				
			||||||
 | 
					    "reader = transport.get.reader(provider=providers.SQLITE,database=DATABASE,table='friends')\n",
 | 
				
			||||||
 | 
					    "#\n",
 | 
				
			||||||
 | 
					    "# basic read of the data created in the first cell\n",
 | 
				
			||||||
 | 
					    "_df = reader.read()\n",
 | 
				
			||||||
 | 
					    "print (_df)\n",
 | 
				
			||||||
 | 
					    "print ()\n",
 | 
				
			||||||
 | 
					    "print()\n",
 | 
				
			||||||
 | 
					    "#\n",
 | 
				
			||||||
 | 
					    "# read of the data with pipeline function provided to alter the database\n",
 | 
				
			||||||
 | 
					    "print (reader.read(pipeline=[_autoincrement,reduce]))"
 | 
				
			||||||
 | 
					   ]
 | 
				
			||||||
 | 
					  },
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					   "cell_type": "markdown",
 | 
				
			||||||
 | 
					   "metadata": {},
 | 
				
			||||||
 | 
					   "source": [
 | 
				
			||||||
 | 
					    "The parameters for instianciating a transport object (reader or writer) can be found at [data-transport home](https://healthcareio.the-phi.com/data-transport)\n"
 | 
				
			||||||
 | 
					   ]
 | 
				
			||||||
 | 
					  },
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					   "cell_type": "code",
 | 
				
			||||||
 | 
					   "execution_count": null,
 | 
				
			||||||
 | 
					   "metadata": {},
 | 
				
			||||||
 | 
					   "outputs": [],
 | 
				
			||||||
 | 
					   "source": []
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					 ],
 | 
				
			||||||
 | 
					 "metadata": {
 | 
				
			||||||
 | 
					  "kernelspec": {
 | 
				
			||||||
 | 
					   "display_name": "Python 3",
 | 
				
			||||||
 | 
					   "language": "python",
 | 
				
			||||||
 | 
					   "name": "python3"
 | 
				
			||||||
 | 
					  },
 | 
				
			||||||
 | 
					  "language_info": {
 | 
				
			||||||
 | 
					   "codemirror_mode": {
 | 
				
			||||||
 | 
					    "name": "ipython",
 | 
				
			||||||
 | 
					    "version": 3
 | 
				
			||||||
 | 
					   },
 | 
				
			||||||
 | 
					   "file_extension": ".py",
 | 
				
			||||||
 | 
					   "mimetype": "text/x-python",
 | 
				
			||||||
 | 
					   "name": "python",
 | 
				
			||||||
 | 
					   "nbconvert_exporter": "python",
 | 
				
			||||||
 | 
					   "pygments_lexer": "ipython3",
 | 
				
			||||||
 | 
					   "version": "3.9.7"
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					 },
 | 
				
			||||||
 | 
					 "nbformat": 4,
 | 
				
			||||||
 | 
					 "nbformat_minor": 2
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@ -0,0 +1,19 @@
 | 
				
			|||||||
 | 
					"""
 | 
				
			||||||
 | 
					This file will be intended to handle duckdb database
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import duckdb
 | 
				
			||||||
 | 
					from transport.common import Reader,Writer
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Duck(Reader):
 | 
				
			||||||
 | 
					    def __init__(self,**_args):
 | 
				
			||||||
 | 
					        super().__init__(**_args)
 | 
				
			||||||
 | 
					        self._path = None if 'path' not in _args else _args['path']
 | 
				
			||||||
 | 
					        self._handler = duckdb.connect() if not self._path else duckdb.connect(self._path)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class DuckReader(Duck) :
 | 
				
			||||||
 | 
					    def __init__(self,**_args):
 | 
				
			||||||
 | 
					        super().__init__(**_args)
 | 
				
			||||||
 | 
					    def read(self,**_args) :
 | 
				
			||||||
 | 
					        pass
 | 
				
			||||||
					Loading…
					
					
				
		Reference in new issue