diff --git a/qcms-manifest.json b/qcms-manifest.json index 805ba45..969f7ba 100644 --- a/qcms-manifest.json +++ b/qcms-manifest.json @@ -43,8 +43,6 @@ "order": { "menu": [ "docs", - "wizard", - "setup", "about" ] }, diff --git a/www/html/_notes/plugins-intro.html b/www/html/_notes/plugins-intro.html index 90ff0ae..2b2ec8e 100644 --- a/www/html/_notes/plugins-intro.html +++ b/www/html/_notes/plugins-intro.html @@ -1 +1,80 @@ - These are basic python functions with a single argument (data:pd.DataFrame). The functions can be used as a pipeline to be called in the context of pre/post processing. \ No newline at end of file + + +
+
Plugins
+Plugins are native python functions, that are integrated into {{layout.header.title}} and called as pre/post processing. + +
+$ transport plugins --help +
+
+
+ 0. Write a plugin function with a decorator +

+ Plugins are native python functions, that take in a single parameter. The following example should be save in a file my-plugin.py +

+
+import transport +
import numpy as np + +
_index = 0 +
@transport.Plugin(name='autoincrement') +
def _incr (_data): +
+ global _index +
_data['_id'] = _index + np.arange(_data.shape[0]) +
_index = _data.shape[0] +
return _data +
+
+ +
+ 1. Register & test the plugin +

+ The plugin utility will make a copy of the file and allow it to be reused against any supported database techology. + More information is available when running + +

+
+ $ transport plugins add myplugin my-plugin.py +
+

+ Once registered it is important to see if the function can be tested +

+
$ plugin-ix registry list --folder ~/.data-transport
+
+
+
+ Using our first plugin +

+ Plugins are used as pipelines i.e you can add more than one and they will execute accordingly in the order in which they are expressed. +

+ +
+ import transport +
+
dbreader = transport.get.reader(label="address-db",plugins=["_incr@myplugin"]) +
_df = dbreader.read() +
+
+ The code above shows how a simple plugin function can be applied to a data + +
address-db, is the database label that points to the url with data +
myplugin, points to a copy of "incr" in "my-plugin.py" +

+

+

+
+
+
\ No newline at end of file diff --git a/www/html/_notes/registry.html b/www/html/_notes/registry.html index 16b7cce..0db1141 100644 --- a/www/html/_notes/registry.html +++ b/www/html/_notes/registry.html @@ -4,11 +4,11 @@ })
-
What is the registry
+
Initialize registry

data-transport uses a registry to store database authentication information and referenced by a human readable label.

Initialize Registry
@@ -51,21 +51,21 @@ In code

-import transport -import io -import json -# -# transport.registry.exists() -_email = 'steve@the-phi.com' -transport.registry.init(_email) +import transport +
import io +
import json +
# +
# transport.registry.exists() +
_email = 'steve@the-phi.com' +
transport.registry.init(_email) -# -# Adding the entry to the registry now that is initialized -_authStr = {"provider":"http", +

# +
# Adding the entry to the registry now that is initialized +
_authStr = {"provider":"http", "url":"https://raw.githubusercontent.com/codeforamerica/ohana-api/master/data/sample-csv/addresses.csv" } -file = io.StringIO(json.dumps(_authStr)) -transport.registry.set('address-db',file) +
file = io.StringIO(json.dumps(_authStr)) +
transport.registry.set('address-db',file)

diff --git a/www/html/_notes/source-code.html b/www/html/_notes/source-code.html index ea95299..c802919 100644 --- a/www/html/_notes/source-code.html +++ b/www/html/_notes/source-code.html @@ -1,61 +1,50 @@ - + -
+
-
Collaborative development
-

- 0. In this scenario we assume the registry has been initialized and that an entry has been added (CLI). -

- # transport registry add --help -
$ transport registry add address-db http-auth.json -
- - -

-The python code would look like the following : -
-import transport -
-#
-#   We are assuming here that the label books-db is an entry in the registry
+    
Collaborative development
+

+ 0. In this scenario we assume the registry has been initialized and that an entry has been added (CLI). +

+ $ transport registry add address-db ./http-auth.json +
+ -dbreader = transport.get.reader(label='address-db') # No database credentials -_df = dbreader.read(sql="SELECT * FROM books where postal_code like '946%' ") -print (_df.head()) -
+

+ The python code would look like the following : +

+ import transport +
# +
# we are using "address-db" to refrence the content of http-auth.json file +
+
+
dbreader = transport.get.reader(label='address-db') +
_df = dbreader.read() +
print (_df.head()) -
-

+

+

1. Alternatively it is possible to directly use the authentication file dubbed "auth-file". -

- import transport -
-        #
-        #   We are assuming here that the label books-db is an entry in the registry
-
-        dbreader    = transport.get.reader(auth_file='/home/me/http-auth.json')    # No database credentials
-        _df         = dbreader.read(sql="SELECT * FROM books where postal_code like '946%' ")
-        print (_df.head())
-        
+
+ import transport + +
# +
# http-auth.json contians connectivity parameters. +
+

dbreader = transport.get.reader(auth_file='http-auth.json') +
_df = dbreader.read() +
print (_df.head())

@@ -64,20 +53,20 @@ print (_df.head())
Non-collaborative development

- In this scenario, we are using connectivity parameters in the code. We do NOT recommend this if the code will be used/shared. + In this scenario, we are using connectivity parameters in the code. This scenario is suited for non-collaborative environments.

import transport -
-    #
-    #   In this scenario we are loading an SQLite3+ database
-    url= "https://raw.githubusercontent.com/codeforamerica/ohana-api/master/data/sample-csv/addresses.csv"
-    _args = {"provider":"http","database"}
-    dbreader    = transport.get.reader(**_args)    # No database credentials
-    _df         = dbreader.read(sql="SELECT * FROM books where postal_code like '946%' ")
-    print (_df.head())
-    
+ +
# +
# In this scenario we are loading an SQLite3+ database +
url= "https://raw.githubusercontent.com/codeforamerica/ohana-api/master/data/sample-csv/addresses.csv" +
_args = {"provider":"http","url":url} +

dbreader = transport.get.reader(**_args) +
_df = dbreader.read() +
print (_df.head()) +
@@ -85,6 +74,7 @@ print (_df.head())
Learn more
It is possible to initialize the registry; run ETL from your code as well as from the command line (CLI). We compiled this in notebooks available in our code repository

+
\ No newline at end of file diff --git a/www/html/_setup/configurator.html b/www/html/_setup/configurator.html new file mode 100644 index 0000000..d6ddca5 --- /dev/null +++ b/www/html/_setup/configurator.html @@ -0,0 +1,83 @@ + + +
+
+ + + + + +
+ +
+
+
+

Source: Database Technology

+
Select a database technology as a source
+ +
+
+
+
+
\ No newline at end of file diff --git a/www/html/_wizard/wizard.html b/www/html/_wizard/wizard.html new file mode 100644 index 0000000..c30e39d --- /dev/null +++ b/www/html/_wizard/wizard.html @@ -0,0 +1,199 @@ + + + +
+
+
Wizard: auth file generator
+

+

This wizard generates an auth-file. It is a template file to be used to setup a data-transport database connectivity to help with best practice when it comes to sensitive information in code. +
    + search for the database provider / vendors +
    click on the vendor and copy the generated code to a file + +
+
+

+

+

+
+ +
+
+ +
+
+
+
+ 0 found +
+
+ + +
+

+

+

+ +
+
+
 
+
Note : +
Copy the code above to the auth-file and fill with appropriate values +
Attributes with zero i.e 0 are optional +
+
+

+

+

+

+ +
+
+
Prerequisites
+
    +
  • Familiarity with JSON format
  • +
  • Understand your current database security access policy +
    Insure your policy (permissions) match your use case +
    +
  • +
+
Thing to know
+
    +
  • Values assigned to attributes +
    value of one i.e 1 suggests a value must be provided +
    value of zero i.e 0 suggests the attribute is optional and can be removed +
  • +
  • + Supported databases (or database providers) to use in search +
    +
  • +
+
+
diff --git a/www/html/about/license.html b/www/html/about/license.html index 5bf2030..3a5aff2 100644 --- a/www/html/about/license.html +++ b/www/html/about/license.html @@ -1,28 +1,36 @@ - + diff --git a/www/html/docs/source-code.html b/www/html/docs/source-code.html deleted file mode 100644 index e69de29..0000000 diff --git a/www/html/docs/transport.html b/www/html/docs/transport.html deleted file mode 100644 index a205659..0000000 --- a/www/html/docs/transport.html +++ /dev/null @@ -1,186 +0,0 @@ - - -
-
-

-

ETL: Introduction

- Extract Load & Transform (ETL) consists in copying data from one database to one or many others. This can be done in two different ways: -
    -
    Command Line Interface (CLI), driven by JSON configuration
    -
    Or within custom python code
    -
- The ETL process will take advantage of registries for plugins and labeled database connectivity to perform pre/post processing tasks. - -

-
-

-

ETL: Command Line Interface

-

- The configuration file needed to run the ETL is a JSON formatted file where each entry contains: -

    -
    source with the content of an auth-file
    -
    target with list of elements of an auth-file
    -
- -
- The CLI (transport), is capable of generating a demo ETL : -
    -
    with source: reads CSV data from github
    -
    and target: writes the data to CSV & SQLite3 database
    -
-
- $ transport generate ./demo-etl.json -
-
-

-

-
-
-
-
- Data-transport UML Extract-Load-Transform (ETL) Workflow -
-
-
- -
-
-
- - - - - - - - - - -
-
-

- -

-
-

- The command-line interface should be instructed to run the ETL by calling the apply function. -

-

-

- $ transport apply ./demo-etl.json -
-

-

- Additional parameters can be invoked by providing the --help switch -

- -

-

- $ transport apply --help -
-

- -
-
-
- The following examples shows simple configuration files that do NOT require any database to be installed. Feel free to change and edit at your own discression. -
-

-

Example # 1: Basic ETL

-
- - - - -
- -
-
-
-

- data-transport comes with a CLI integrated that will -

    -
    generate an EL configuration file
    -
    - $ transport generate ./demo-etl.json -
    - -
-
NOTE:The configuration file supports labels and/or plugins, these would have to be done manually
-

-
-
-

Copy the content and save it to a file "demo-etl.json"

-
- -
[{
-"source": {
-    "provider": "http", 
-    "url": "https://github.com/codeforamerica/ohana-api/blob/master/data/sample-csv/addresses.csv"
-}, 
-"target": [
-    {"provider": "files", "path": "addresses.csv", "delimiter": ","}, 
-    {"provider": "sqlite3", "database": "sample.db3", "table": "addresses"}
-]}]
- -
-
- -
-
- -

-
- -

-

Example # 2: ETL With Plugins

-

Copy the content and save it to a file "demo-etl.json"

-
- -
[{
-              "source": {
-                  "provider": "http", 
-                  "plugins":["demo@autoincrement"],
-                  "url": "https://github.com/codeforamerica/ohana-api/blob/master/data/sample-csv/addresses.csv"
-              }, 
-              "target": [
-                  {"provider": "files", "path": "addresses.csv", "delimiter": ","}, 
-                  {"provider": "sqlite3", "database": "sample.db3", "table": "addresses"}
-              ]}]
- -
- -

- -
-
-

- -
-
\ No newline at end of file diff --git a/www/html/visit-us.html b/www/html/visit-us.html new file mode 100644 index 0000000..fd1d6ea --- /dev/null +++ b/www/html/visit-us.html @@ -0,0 +1,17 @@ + +
+ We encourage you to visit the github repository for examples and various ways to use {{layout.header.title}} + +

+ +

+
+
+
Github
+
+
+
+

+
\ No newline at end of file