diff --git a/notebooks/etl.ipynb b/notebooks/etl.ipynb new file mode 100644 index 0000000..b274da2 --- /dev/null +++ b/notebooks/etl.ipynb @@ -0,0 +1,188 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Extract Transform Load (ETL) from Code\n", + "\n", + "The example below reads data from an http source (github) and will copy the data to a csv file and to a database. This example illustrates the one-to-many ETL features.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idlocation_idaddress_1address_2citystate_provincepostal_codecountry
0112600 Middlefield RoadNaNRedwood CityCA94063US
12224 Second AvenueNaNSan MateoCA94401US
23324 Second AvenueNaNSan MateoCA94403US
34424 Second AvenueNaNSan MateoCA94401US
45524 Second AvenueNaNSan MateoCA94401US
\n", + "
" + ], + "text/plain": [ + " id location_id address_1 address_2 city \\\n", + "0 1 1 2600 Middlefield Road NaN Redwood City \n", + "1 2 2 24 Second Avenue NaN San Mateo \n", + "2 3 3 24 Second Avenue NaN San Mateo \n", + "3 4 4 24 Second Avenue NaN San Mateo \n", + "4 5 5 24 Second Avenue NaN San Mateo \n", + "\n", + " state_province postal_code country \n", + "0 CA 94063 US \n", + "1 CA 94401 US \n", + "2 CA 94403 US \n", + "3 CA 94401 US \n", + "4 CA 94401 US " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#\n", + "# Writing to Google Bigquery database\n", + "#\n", + "import transport\n", + "from transport import providers\n", + "import pandas as pd\n", + "import os\n", + "\n", + "#\n", + "#\n", + "source = {\"provider\": \"http\", \"url\": \"https://raw.githubusercontent.com/codeforamerica/ohana-api/master/data/sample-csv/addresses.csv\"}\n", + "target = [{\"provider\": \"files\", \"path\": \"addresses.csv\", \"delimiter\": \",\"}, {\"provider\": \"sqlite\", \"database\": \"sample.db3\", \"table\": \"addresses\"}]\n", + "\n", + "_handler = transport.get.etl (source=source,target=target)\n", + "_data = _handler.read() #-- all etl begins with data being read\n", + "_data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Extract Transform Load (ETL) from CLI\n", + "\n", + "The documentation for this is available at https://healthcareio.the-phi.com/data-transport \"Docs\" -> \"Terminal CLI\"\n", + "\n", + "The entire process is documented including how to generate an ETL configuration file." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}