Compare commits
36 Commits
Author | SHA1 | Date |
---|---|---|
Steve L. Nyemba | 492dc8f374 | 1 month ago |
Steve Nyemba | 2df926da12 | 1 month ago |
Steve L. Nyemba | e848367378 | 1 month ago |
Steve Nyemba | e9aab3b034 | 1 month ago |
Steve L. Nyemba | c872ba8cc2 | 1 month ago |
Steve Nyemba | 34db729ad4 | 1 month ago |
Steve Nyemba | a7c72391e8 | 3 months ago |
Steve L. Nyemba | baa8164f16 | 3 months ago |
Steve Nyemba | 955369fdd8 | 3 months ago |
Steve L. Nyemba | 31556ebd32 | 3 months ago |
Steve Nyemba | 63666e95ce | 3 months ago |
Steve Nyemba | 9dba5daecd | 3 months ago |
Steve Nyemba | 40f9c3930a | 3 months ago |
Steve L. Nyemba | 1e7839198a | 3 months ago |
Steve Nyemba | 3faee02fa2 | 3 months ago |
Steve Nyemba | 6f6fd48982 | 4 months ago |
Steve Nyemba | 808378afdb | 4 months ago |
Steve Nyemba | 2edce85aed | 4 months ago |
Steve Nyemba | 235a44be66 | 4 months ago |
Steve Nyemba | 037019c1d7 | 4 months ago |
Steve Nyemba | c443c6c953 | 4 months ago |
Steve Nyemba | dde4767e37 | 4 months ago |
Steve Nyemba | 8aa6f2c93d | 4 months ago |
Steve Nyemba | 24cdd9f8fe | 4 months ago |
Steve Nyemba | b9bc898161 | 4 months ago |
Steve Nyemba | 8edb764d11 | 4 months ago |
Steve Nyemba | 6544bf852a | 4 months ago |
Steve L. Nyemba | dce50a967e | 4 months ago |
Steve Nyemba | 2b5c038610 | 4 months ago |
Steve L. Nyemba | 5ccb073865 | 4 months ago |
Steve Nyemba | d0472ccee5 | 4 months ago |
Steve Nyemba | 870c1caed3 | 4 months ago |
Steve Nyemba | f5187790ce | 4 months ago |
Steve L. Nyemba | 3081fb98e7 | 6 months ago |
Steve L. Nyemba | 58959359ad | 8 months ago |
Steve L. Nyemba | 68b8f6af5f | 8 months ago |
@ -0,0 +1,188 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### Extract Transform Load (ETL) from Code\n",
|
||||||
|
"\n",
|
||||||
|
"The example below reads data from an http source (github) and will copy the data to a csv file and to a database. This example illustrates the one-to-many ETL features.\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>id</th>\n",
|
||||||
|
" <th>location_id</th>\n",
|
||||||
|
" <th>address_1</th>\n",
|
||||||
|
" <th>address_2</th>\n",
|
||||||
|
" <th>city</th>\n",
|
||||||
|
" <th>state_province</th>\n",
|
||||||
|
" <th>postal_code</th>\n",
|
||||||
|
" <th>country</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>2600 Middlefield Road</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>Redwood City</td>\n",
|
||||||
|
" <td>CA</td>\n",
|
||||||
|
" <td>94063</td>\n",
|
||||||
|
" <td>US</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1</th>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>24 Second Avenue</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>San Mateo</td>\n",
|
||||||
|
" <td>CA</td>\n",
|
||||||
|
" <td>94401</td>\n",
|
||||||
|
" <td>US</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2</th>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>24 Second Avenue</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>San Mateo</td>\n",
|
||||||
|
" <td>CA</td>\n",
|
||||||
|
" <td>94403</td>\n",
|
||||||
|
" <td>US</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>3</th>\n",
|
||||||
|
" <td>4</td>\n",
|
||||||
|
" <td>4</td>\n",
|
||||||
|
" <td>24 Second Avenue</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>San Mateo</td>\n",
|
||||||
|
" <td>CA</td>\n",
|
||||||
|
" <td>94401</td>\n",
|
||||||
|
" <td>US</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>4</th>\n",
|
||||||
|
" <td>5</td>\n",
|
||||||
|
" <td>5</td>\n",
|
||||||
|
" <td>24 Second Avenue</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>San Mateo</td>\n",
|
||||||
|
" <td>CA</td>\n",
|
||||||
|
" <td>94401</td>\n",
|
||||||
|
" <td>US</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" id location_id address_1 address_2 city \\\n",
|
||||||
|
"0 1 1 2600 Middlefield Road NaN Redwood City \n",
|
||||||
|
"1 2 2 24 Second Avenue NaN San Mateo \n",
|
||||||
|
"2 3 3 24 Second Avenue NaN San Mateo \n",
|
||||||
|
"3 4 4 24 Second Avenue NaN San Mateo \n",
|
||||||
|
"4 5 5 24 Second Avenue NaN San Mateo \n",
|
||||||
|
"\n",
|
||||||
|
" state_province postal_code country \n",
|
||||||
|
"0 CA 94063 US \n",
|
||||||
|
"1 CA 94401 US \n",
|
||||||
|
"2 CA 94403 US \n",
|
||||||
|
"3 CA 94401 US \n",
|
||||||
|
"4 CA 94401 US "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"#\n",
|
||||||
|
"# Writing to Google Bigquery database\n",
|
||||||
|
"#\n",
|
||||||
|
"import transport\n",
|
||||||
|
"from transport import providers\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"#\n",
|
||||||
|
"#\n",
|
||||||
|
"source = {\"provider\": \"http\", \"url\": \"https://raw.githubusercontent.com/codeforamerica/ohana-api/master/data/sample-csv/addresses.csv\"}\n",
|
||||||
|
"target = [{\"provider\": \"files\", \"path\": \"addresses.csv\", \"delimiter\": \",\"}, {\"provider\": \"sqlite\", \"database\": \"sample.db3\", \"table\": \"addresses\"}]\n",
|
||||||
|
"\n",
|
||||||
|
"_handler = transport.get.etl (source=source,target=target)\n",
|
||||||
|
"_data = _handler.read() #-- all etl begins with data being read\n",
|
||||||
|
"_data.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### Extract Transform Load (ETL) from CLI\n",
|
||||||
|
"\n",
|
||||||
|
"The documentation for this is available at https://healthcareio.the-phi.com/data-transport \"Docs\" -> \"Terminal CLI\"\n",
|
||||||
|
"\n",
|
||||||
|
"The entire process is documented including how to generate an ETL configuration file."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.9.7"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
@ -0,0 +1,149 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### Writing data-transport plugins\n",
|
||||||
|
"\n",
|
||||||
|
"The data-transport plugins are designed to automate pre/post processing i.e\n",
|
||||||
|
"\n",
|
||||||
|
" - Read -> Post processing\n",
|
||||||
|
" - Write-> Pre processing\n",
|
||||||
|
" \n",
|
||||||
|
"In this example we will assume, data and write both pre/post processing to any supported infrastructure. We will equally show how to specify the plugins within a configuration file"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#\n",
|
||||||
|
"# Writing to Google Bigquery database\n",
|
||||||
|
"#\n",
|
||||||
|
"import transport\n",
|
||||||
|
"from transport import providers\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import os\n",
|
||||||
|
"import shutil\n",
|
||||||
|
"#\n",
|
||||||
|
"#\n",
|
||||||
|
"\n",
|
||||||
|
"DATABASE = '/home/steve/tmp/demo.db3'\n",
|
||||||
|
"if os.path.exists(DATABASE) :\n",
|
||||||
|
" os.remove(DATABASE)\n",
|
||||||
|
"#\n",
|
||||||
|
"# \n",
|
||||||
|
"_data = pd.DataFrame({\"name\":['James Bond','Steve Rogers','Steve Nyemba'],'age':[55,150,44]})\n",
|
||||||
|
"litew = transport.get.writer(provider=providers.SQLITE,database=DATABASE)\n",
|
||||||
|
"litew.write(_data,table='friends')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### Reading from SQLite\n",
|
||||||
|
"\n",
|
||||||
|
"The cell below reads the data that has been written by the cell above and computes the average age from a plugin function we will write. \n",
|
||||||
|
"\n",
|
||||||
|
"- Basic read of the designated table (friends) created above\n",
|
||||||
|
"- Read with pipeline functions defined in code\n",
|
||||||
|
"\n",
|
||||||
|
"**NOTE**\n",
|
||||||
|
"\n",
|
||||||
|
"It is possible to use **transport.factory.instance** or **transport.instance** or **transport.get.<[reader|writer]>** they are the same. It allows the maintainers to know that we used a factory design pattern."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
" name age\n",
|
||||||
|
"0 James Bond 55\n",
|
||||||
|
"1 Steve Rogers 150\n",
|
||||||
|
"2 Steve Nyemba 44\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
" name age autoinc\n",
|
||||||
|
"0 James Bond 5.5 0\n",
|
||||||
|
"1 Steve Rogers 15.0 1\n",
|
||||||
|
"2 Steve Nyemba 4.4 2\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"\n",
|
||||||
|
"import transport\n",
|
||||||
|
"from transport import providers\n",
|
||||||
|
"import os\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"def _autoincrement (_data,**kwargs) :\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" This function will add an autoincrement field to the table\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" _data['autoinc'] = np.arange(_data.shape[0])\n",
|
||||||
|
" \n",
|
||||||
|
" return _data\n",
|
||||||
|
"def reduce(_data,**_args) :\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" This function will reduce the age of the data frame\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" _data.age /= 10\n",
|
||||||
|
" return _data\n",
|
||||||
|
"reader = transport.get.reader(provider=providers.SQLITE,database=DATABASE,table='friends')\n",
|
||||||
|
"#\n",
|
||||||
|
"# basic read of the data created in the first cell\n",
|
||||||
|
"_df = reader.read()\n",
|
||||||
|
"print (_df)\n",
|
||||||
|
"print ()\n",
|
||||||
|
"print()\n",
|
||||||
|
"#\n",
|
||||||
|
"# read of the data with pipeline function provided to alter the database\n",
|
||||||
|
"print (reader.read(pipeline=[_autoincrement,reduce]))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"The parameters for instianciating a transport object (reader or writer) can be found at [data-transport home](https://healthcareio.the-phi.com/data-transport)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.9.7"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
@ -0,0 +1,131 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### Writing to AWS S3\n",
|
||||||
|
"\n",
|
||||||
|
"We have setup our demo environment with the label **aws** passed to reference our s3 access_key and secret_key and file (called friends.csv). In the cell below we will write the data to our aws s3 bucket named **com.phi.demo**"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"2.2.1\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"#\n",
|
||||||
|
"# Writing to mongodb database\n",
|
||||||
|
"#\n",
|
||||||
|
"import transport\n",
|
||||||
|
"from transport import providers\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"_data = pd.DataFrame({\"name\":['James Bond','Steve Rogers','Steve Nyemba'],'age':[55,150,44]})\n",
|
||||||
|
"mgw = transport.get.writer(label='aws')\n",
|
||||||
|
"mgw.write(_data)\n",
|
||||||
|
"print (transport.__version__)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### Reading from AWS S3\n",
|
||||||
|
"\n",
|
||||||
|
"The cell below reads the data that has been written by the cell above and computes the average age within a mongodb pipeline. The code in the background executes an aggregation using\n",
|
||||||
|
"\n",
|
||||||
|
"- Basic read of the designated file **friends.csv**\n",
|
||||||
|
"- Compute average age using standard pandas functions\n",
|
||||||
|
"\n",
|
||||||
|
"**NOTE**\n",
|
||||||
|
"\n",
|
||||||
|
"By design **read** object are separated from **write** objects in order to avoid accidental writes to the database.\n",
|
||||||
|
"Read objects are created with **transport.get.reader** whereas write objects are created with **transport.get.writer**"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
" bname age\n",
|
||||||
|
"0 James Bond 55\n",
|
||||||
|
"1 Steve Rogers 150\n",
|
||||||
|
"2 Steve Nyemba 44\n",
|
||||||
|
"--------- STATISTICS ------------\n",
|
||||||
|
"83.0\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"\n",
|
||||||
|
"import transport\n",
|
||||||
|
"from transport import providers\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"\n",
|
||||||
|
"def cast(stream) :\n",
|
||||||
|
" print (stream)\n",
|
||||||
|
" return pd.DataFrame(str(stream))\n",
|
||||||
|
"mgr = transport.get.reader(label='aws')\n",
|
||||||
|
"_df = mgr.read()\n",
|
||||||
|
"print (_df)\n",
|
||||||
|
"print ('--------- STATISTICS ------------')\n",
|
||||||
|
"print (_df.age.mean())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"An **auth-file** is a file that contains database parameters used to access the database. \n",
|
||||||
|
"For code in shared environments, we recommend \n",
|
||||||
|
"\n",
|
||||||
|
"1. Having the **auth-file** stored on disk \n",
|
||||||
|
"2. and the location of the file is set to an environment variable.\n",
|
||||||
|
"\n",
|
||||||
|
"To generate a template of the **auth-file** open the **file generator wizard** found at visit https://healthcareio.the-phi.com/data-transport"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.9.7"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
@ -0,0 +1,19 @@
|
|||||||
|
"""
|
||||||
|
This file will be intended to handle duckdb database
|
||||||
|
"""
|
||||||
|
|
||||||
|
import duckdb
|
||||||
|
from transport.common import Reader,Writer
|
||||||
|
|
||||||
|
class Duck(Reader):
|
||||||
|
def __init__(self,**_args):
|
||||||
|
super().__init__(**_args)
|
||||||
|
self._path = None if 'path' not in _args else _args['path']
|
||||||
|
self._handler = duckdb.connect() if not self._path else duckdb.connect(self._path)
|
||||||
|
|
||||||
|
|
||||||
|
class DuckReader(Duck) :
|
||||||
|
def __init__(self,**_args):
|
||||||
|
super().__init__(**_args)
|
||||||
|
def read(self,**_args) :
|
||||||
|
pass
|
@ -1 +1 @@
|
|||||||
from . import files, http, rabbitmq, callback, files
|
from . import files, http, rabbitmq, callback, files, console
|
||||||
|
@ -0,0 +1,102 @@
|
|||||||
|
import os
|
||||||
|
import json
|
||||||
|
from info import __version__
|
||||||
|
import copy
|
||||||
|
import transport
|
||||||
|
|
||||||
|
"""
|
||||||
|
This class manages data from the registry and allows (read only)
|
||||||
|
@TODO: add property to the DATA attribute
|
||||||
|
"""
|
||||||
|
|
||||||
|
REGISTRY_PATH=os.sep.join([os.environ['HOME'],'.data-transport'])
|
||||||
|
#
|
||||||
|
# This path can be overriden by an environment variable ...
|
||||||
|
#
|
||||||
|
if 'DATA_TRANSPORT_REGISTRY_PATH' in os.environ :
|
||||||
|
REGISTRY_PATH = os.environ['DATA_TRANSPORT_REGISTRY_PATH']
|
||||||
|
REGISTRY_FILE= 'transport-registry.json'
|
||||||
|
|
||||||
|
DATA = {}
|
||||||
|
|
||||||
|
def isloaded ():
|
||||||
|
return DATA not in [{},None]
|
||||||
|
def exists (path=REGISTRY_PATH) :
|
||||||
|
"""
|
||||||
|
This function determines if there is a registry at all
|
||||||
|
"""
|
||||||
|
p = os.path.exists(path)
|
||||||
|
q = os.path.exists( os.sep.join([path,REGISTRY_FILE]))
|
||||||
|
|
||||||
|
return p and q
|
||||||
|
def load (_path=REGISTRY_PATH):
|
||||||
|
global DATA
|
||||||
|
|
||||||
|
if exists(_path) :
|
||||||
|
path = os.sep.join([_path,REGISTRY_FILE])
|
||||||
|
f = open(path)
|
||||||
|
DATA = json.loads(f.read())
|
||||||
|
f.close()
|
||||||
|
def init (email,path=REGISTRY_PATH,override=False):
|
||||||
|
"""
|
||||||
|
Initializing the registry and will raise an exception in the advent of an issue
|
||||||
|
"""
|
||||||
|
p = '@' in email
|
||||||
|
q = False if '.' not in email else email.split('.')[-1] in ['edu','com','io','ai','org']
|
||||||
|
if p and q :
|
||||||
|
_config = {"email":email,'version':__version__}
|
||||||
|
if not os.path.exists(path):
|
||||||
|
os.makedirs(path)
|
||||||
|
filename = os.sep.join([path,REGISTRY_FILE])
|
||||||
|
if not os.path.exists(filename) or override == True :
|
||||||
|
|
||||||
|
f = open(filename,'w')
|
||||||
|
f.write( json.dumps(_config))
|
||||||
|
f.close()
|
||||||
|
# _msg = f"""{CHECK_MARK} Successfully wrote configuration to {path} from {email}"""
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise Exception (f"""Unable to write configuration, Please check parameters (or help) and try again""")
|
||||||
|
else:
|
||||||
|
raise Exception (f"""Invalid Input, {email} is not well formatted, provide an email with adequate format""")
|
||||||
|
def lookup (label):
|
||||||
|
global DATA
|
||||||
|
return label in DATA
|
||||||
|
def get (label='default') :
|
||||||
|
global DATA
|
||||||
|
return copy.copy(DATA[label]) if label in DATA else {}
|
||||||
|
|
||||||
|
def set (label, auth_file, default=False,path=REGISTRY_PATH) :
|
||||||
|
"""
|
||||||
|
This function will add a label (auth-file data) into the registry and can set it as the default
|
||||||
|
"""
|
||||||
|
if label == 'default' :
|
||||||
|
raise Exception ("""Invalid label name provided, please change the label name and use the switch""")
|
||||||
|
reg_file = os.sep.join([path,REGISTRY_FILE])
|
||||||
|
if os.path.exists (auth_file) and os.path.exists(path) and os.path.exists(reg_file):
|
||||||
|
f = open(auth_file)
|
||||||
|
_info = json.loads(f.read())
|
||||||
|
f.close()
|
||||||
|
f = open(reg_file)
|
||||||
|
_config = json.loads(f.read())
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
#
|
||||||
|
# set the proposed label
|
||||||
|
_object = transport.factory.instance(**_info)
|
||||||
|
if _object :
|
||||||
|
_config[label] = _info
|
||||||
|
if default :
|
||||||
|
_config['default'] = _info
|
||||||
|
#
|
||||||
|
# now we need to write this to the location
|
||||||
|
f = open(reg_file,'w')
|
||||||
|
f.write(json.dumps(_config))
|
||||||
|
f.close()
|
||||||
|
else:
|
||||||
|
raise Exception( f"""Unable to load file locate at {path},\nLearn how to generate auth-file with wizard found at https://healthcareio.the-phi.com/data-transport""")
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
pass
|
||||||
|
|
@ -0,0 +1,24 @@
|
|||||||
|
"""
|
||||||
|
This module implements the handler for duckdb (in memory or not)
|
||||||
|
"""
|
||||||
|
from transport.sql.common import Base, BaseReader, BaseWriter
|
||||||
|
|
||||||
|
class Duck :
|
||||||
|
def __init__(self,**_args):
|
||||||
|
#
|
||||||
|
# duckdb with none as database will operate as an in-memory database
|
||||||
|
#
|
||||||
|
self.database = _args['database'] if 'database' in _args else ''
|
||||||
|
def get_provider(self):
|
||||||
|
return "duckdb"
|
||||||
|
|
||||||
|
def _get_uri(self,**_args):
|
||||||
|
return f"""duckdb:///{self.database}"""
|
||||||
|
class Reader(Duck,BaseReader) :
|
||||||
|
def __init__(self,**_args):
|
||||||
|
Duck.__init__(self,**_args)
|
||||||
|
BaseReader.__init__(self,**_args)
|
||||||
|
class Writer(Duck,BaseWriter):
|
||||||
|
def __init__(self,**_args):
|
||||||
|
Duck.__init__(self,**_args)
|
||||||
|
BaseWriter.__init__(self,**_args)
|
Loading…
Reference in new issue