diff --git a/notebooks/.ipynb_checkpoints/risk-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/risk-checkpoint.ipynb deleted file mode 100644 index dd5cf33..0000000 --- a/notebooks/.ipynb_checkpoints/risk-checkpoint.ipynb +++ /dev/null @@ -1,273 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 66, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "from google.cloud import bigquery as bq\n", - "\n", - "client = bq.Client.from_service_account_json('/home/steve/dev/google-cloud-sdk/accounts/vumc-test.json')" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [], - "source": [ - "xo = ['person_id','date_of_birth','race']\n", - "xi = ['person_id','value_as_number','value_source_value']" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "metadata": {}, - "outputs": [], - "source": [ - "def get_tables(client,did,fields=[]):\n", - " \"\"\"\n", - " getting table lists from google\n", - " \"\"\"\n", - " r = []\n", - " ref = client.dataset(id)\n", - " tables = list(client.list_tables(ref))\n", - " for table in tables :\n", - " ref = table.reference\n", - " schema = client.get_table(ref).schema\n", - " names = [f.field_name for f in schema]\n", - " x = list(set(names) & set(fields))\n", - " if x :\n", - " r.append({\"name\":table.table_id,\"fields\":names})\n", - " return r\n", - " \n", - "def get_fields(**args):\n", - " \"\"\"\n", - " This function will generate a random set of fields from two tables. Tables are structured as follows \n", - " {name,fields:[],\"y\":}, with \n", - " name table name (needed to generate sql query)\n", - " fields list of field names, used in the projection\n", - " y name of the field to be joined.\n", - " @param xo candidate table in the join\n", - " @param xi candidate table in the join\n", - " @param join field by which the tables can be joined.\n", - " \"\"\"\n", - " # The set operation will remove redundancies in the field names (not sure it's a good idea)\n", - " xo = args['xo']['fields']\n", - " xi = args['xi']['fields']\n", - " zi = args['xi']['name']\n", - " return list(set(xo) | set(['.'.join([args['xi']['name'],name]) for name in xi if name != args['join']]) )\n", - "def generate_sql(**args):\n", - " \"\"\"\n", - " This function will generate the SQL query for the resulting join\n", - " \"\"\"\n", - " xo = args['xo']\n", - " xi = args['xi']\n", - " sql = \"SELECT :fields FROM :xo.name INNER JOIN :xi.name ON :xi.name.:xi.y = :xo.y \"\n", - " fields = \",\".join(get_fields(xo=xi,xi=xi,join=xi['y']))\n", - " \n", - " \n", - " sql = sql.replace(\":fields\",fields).replace(\":xo.name\",xo['name']).replace(\":xi.name\",xi['name'])\n", - " sql = sql.replace(\":xi.y\",xi['y']).replace(\":xo.y\",xo['y'])\n", - " return sql\n", - " \n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['person_id',\n", - " 'measurements.value_as_number',\n", - " 'date_of_birth',\n", - " 'race',\n", - " 'measurements.value_source_value']" - ] - }, - "execution_count": 54, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "xo = {\"name\":\"person\",\"fields\":['person_id','date_of_birth','race']}\n", - "xi = {\"name\":\"measurements\",\"fields\":['person_id','value_as_number','value_source_value']}\n", - "get_fields(xo=xo,xi=xi,join=\"person_id\")" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'SELECT person_id,value_as_number,measurements.value_source_value,measurements.value_as_number,value_source_value FROM person INNER JOIN measurements ON measurements.person_id = person_id '" - ] - }, - "execution_count": 55, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "xo = {\"name\":\"person\",\"fields\":['person_id','date_of_birth','race'],\"y\":\"person_id\"}\n", - "xi = {\"name\":\"measurements\",\"fields\":['person_id','value_as_number','value_source_value'],\"y\":\"person_id\"}\n", - "generate_sql(xo=xo,xi=xi)" - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[('a', 'b'), ('a', 'c'), ('b', 'c')]" - ] - }, - "execution_count": 59, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "\"\"\"\n", - " We are designing a process that will take two tables that will generate \n", - "\"\"\"\n", - "import itertools\n", - "list(itertools.combinations(['a','b','c'],2))" - ] - }, - { - "cell_type": "code", - "execution_count": 87, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "TableReference(DatasetReference(u'aou-res-deid-vumc-test', u'raw'), 'care_site')" - ] - }, - "execution_count": 87, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ref = client.dataset('raw')\n", - "tables = list(client.list_tables(ref))\n", - "names = [table.table_id for table in tables]\n", - "(tables[0].reference)" - ] - }, - { - "cell_type": "code", - "execution_count": 85, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(u'care_site',\n", - " u'concept',\n", - " u'concept_ancestor',\n", - " u'concept_class',\n", - " u'concept_relationship',\n", - " u'concept_synonym',\n", - " u'condition_occurrence',\n", - " u'criteria',\n", - " u'death',\n", - " u'device_exposure',\n", - " u'domain',\n", - " u'drug_exposure',\n", - " u'drug_strength',\n", - " u'location',\n", - " u'measurement',\n", - " u'note',\n", - " u'observation',\n", - " u'people_seed',\n", - " u'person',\n", - " u'procedure_occurrence',\n", - " u'relationship',\n", - " u'visit_occurrence',\n", - " u'vocabulary')" - ] - }, - "execution_count": 85, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#\n", - "# find every table with person id at the very least or a subset of fields\n", - "#\n", - "def get_tables\n", - "q = ['person_id']\n", - "pairs = list(itertools.combinations(names,len(names)))\n", - "pairs[0]" - ] - }, - { - "cell_type": "code", - "execution_count": 90, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['a']" - ] - }, - "execution_count": 90, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "list(set(['a','b']) & set(['a']))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.15rc1" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/Untitled.ipynb b/notebooks/Untitled.ipynb deleted file mode 100644 index 1e154e2..0000000 --- a/notebooks/Untitled.ipynb +++ /dev/null @@ -1,238 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import itertools \n", - "import pandas as pd\n", - "import numpy as np\n", - "# from pandas_risk import *\n", - "from time import time\n", - "import os\n", - "\n", - "attr = ['gender','race','zip','year_of_birth']\n", - "comb_attr = [\n", - " ['zip' ,'gender', 'birth_datetime', 'race'], \n", - " ['zip', 'gender', 'year_of_birth', 'race'], \n", - " ['gender','race','zip'],\n", - " ['race','year_of_birth','zip']\n", - "]\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "SQL_CONTROLLED=\"SELECT * FROM deid_risk.basic_risk60k\"\n", - "dfc = pd.read_gbq(SQL_CONTROLLED,private_key='/home/steve/dev/google-cloud-sdk/accounts/curation-test.json')\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "def risk(**args):\n", - " Yi = args['data']\n", - " Yi = Yi.fillna(' ')\n", - " sizes = args['prop'] if 'prop' in args else np.arange(5,100,5)\n", - " FLAG = args['flag'] if 'flag' in args else 'UNFLAGGED'\n", - " N = args['num_runs']\n", - " if 'cols' in args :\n", - " columns = args['cols']\n", - " else:\n", - " columns = list(set(Yi.columns.tolist()) - set(['person_id']))\n", - " p = pd.DataFrame()\n", - " y_i= pd.DataFrame({\"group_size\":Yi.groupby(columns,as_index=False).size()}).reset_index()\n", - " for index in sizes :\n", - " for n in np.repeat(index,N):\n", - " \n", - " # we will randomly sample n% rows from the dataset\n", - " i = np.random.choice(Yi.shape[0],((Yi.shape[0] * n)/100),replace=False)\n", - " x_i= pd.DataFrame(Yi).loc[i] \n", - " risk = x_i.deid.risk(id='person_id',quasi_id = columns)\n", - " x_i = pd.DataFrame({\"group_size\":x_i.groupby(columns,as_index=False).size()}).reset_index()\n", - "\n", - "\n", - " r = pd.merge(x_i,y_i,on=columns,how='inner')\n", - " if r.shape[0] == 0 :\n", - " continue\n", - " r['marketer'] = r.apply(lambda row: (row.group_size_x / np.float64(row.group_size_y)) /np.sum(x_i.group_size) ,axis=1)\n", - " r['sample %'] = np.repeat(n,r.shape[0])\n", - " r['tier'] = np.repeat(FLAG,r.shape[0])\n", - " r['sample marketer'] = np.repeat(risk['marketer'].values[0],r.shape[0])\n", - " # r['patient_count'] = np.repeat(r.shape[0],r.shape[0])\n", - " r = r.groupby(['sample %','tier','sample marketer'],as_index=False).sum()[['sample %','marketer','sample marketer','tier']]\n", - " p = p.append(r)\n", - " p.index = np.arange(p.shape[0]).astype(np.int64)\n", - " return p\n", - " \n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "from pandas_risk import *\n", - "o = pd.DataFrame()\n", - "PATH=\"out/experiment-phase-2.xlsx\"\n", - "writer = pd.ExcelWriter(PATH,engine='xlsxwriter')\n", - "comb_attr = [\n", - " ['zip' ,'gender', 'birth_datetime', 'race'], \n", - " ['zip', 'gender', 'year_of_birth', 'race'], \n", - " ['gender','race','zip'],\n", - " ['race','year_of_birth','zip']\n", - "]\n", - "\n", - "for cols in comb_attr :\n", - " o = risk(data=dfc,cols=cols,flag='CONTROLLED',num_runs=5)\n", - " #\n", - " # adding the policy\n", - " x = [1* dfc.columns.isin(cols) for i in range(o.shape[0])]\n", - " o = o.join(pd.DataFrame(x,columns = dfc.columns))\n", - " #\n", - " # Write this to excel notebook\n", - " o.to_excel(writer,\"-\".join(cols))\n", - "# break\n", - " \n", - "\n", - "# p = p.rename(columns={'marketer_x':'sample marketer'})\n", - "# p.index = np.arange(p.shape[0]).astype(np.int64)\n", - "\n", - "writer.save()" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
person_idyear_of_birthmonth_of_birthday_of_birthbirth_datetimerace_concept_idethnicity_concept_idlocation_idcare_site_idperson_source_value...gender_source_concept_idrace_source_valueethnicity_source_valuesex_at_birthbirth_dateracezipcitystategender
\n", - "

0 rows × 21 columns

\n", - "
" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: [person_id, year_of_birth, month_of_birth, day_of_birth, birth_datetime, race_concept_id, ethnicity_concept_id, location_id, care_site_id, person_source_value, gender_source_value, gender_source_concept_id, race_source_value, ethnicity_source_value, sex_at_birth, birth_date, race, zip, city, state, gender]\n", - "Index: []\n", - "\n", - "[0 rows x 21 columns]" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "x = [1* dfc.columns.isin(cols) for i in range(o.shape[0])]\n", - "o.join(pd.DataFrame(x,columns = dfc.columns))\n" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'columns' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mcolumns\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;31mNameError\u001b[0m: name 'columns' is not defined" - ] - } - ], - "source": [ - "columns\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.15rc1" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/data-analysis.ipynb b/notebooks/data-analysis.ipynb deleted file mode 100644 index 8e4e21b..0000000 --- a/notebooks/data-analysis.ipynb +++ /dev/null @@ -1,214 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "df = pd.read_csv('../src/out/risk_xoi.csv')" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Unnamed: 0group_countrow_countmarketerprosecutorfield_count
00432512790808020.005469110
1017824004790808020.225390128
2043538084790808020.550552138
3064042788790808020.809840146
406866070790808020.086823117
\n", - "
" - ], - "text/plain": [ - " Unnamed: 0 group_count row_count marketer prosecutor field_count\n", - "0 0 432512 79080802 0.005469 1 10\n", - "1 0 17824004 79080802 0.225390 1 28\n", - "2 0 43538084 79080802 0.550552 1 38\n", - "3 0 64042788 79080802 0.809840 1 46\n", - "4 0 6866070 79080802 0.086823 1 17" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "compiled = df.groupby('field_count')[['field_count','marketer','prosecutor']].mean()\n", - "figure = compiled[['marketer','prosecutor']].plot.line().get_figure()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "variables": { - " figure ": "" - } - }, - "source": [ - "# Dataset Used\n", - "---\n", - "\n", - "We performed joins against all the tables from all-of-us and truncated records while randomly selecting on record per every join. As a result we have roughly a dataset of about **80 million** records and about **5000** distinct patients.\n", - "\n", - "## Expriment Design\n", - "---\n", - "\n", - "We compute both marketer and prosecutor risk computation while randomly selecting the number of attributes out of **111**. This selection is between ***2*** and **111** attributes. The number of maximum number of attributes that can be computed at any time is **64** : limitations of Google's Big-query. We performed **500** runs.\n", - "\n", - "## Results\n", - "---\n", - "\n", - "The results show the prosecutor risk is unchanging perhaps as an artifact of the number of runs **500** or the dataset curation: The joins we performed. The prosecutor risk shows there is at least one record that vulnerable.\n", - "\n", - "The marketer risk seems to increase as the number of randomly selected attributes increases as a general trend. \n", - "\n", - "{{ figure }} \n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.15rc1" - }, - "varInspector": { - "cols": { - "lenName": 16, - "lenType": 16, - "lenVar": 40 - }, - "kernels_config": { - "python": { - "delete_cmd_postfix": "", - "delete_cmd_prefix": "del ", - "library": "var_list.py", - "varRefreshCmd": "print(var_dic_list())" - }, - "r": { - "delete_cmd_postfix": ") ", - "delete_cmd_prefix": "rm(", - "library": "var_list.r", - "varRefreshCmd": "cat(var_dic_list()) " - } - }, - "types_to_exclude": [ - "module", - "function", - "builtin_function_or_method", - "instance", - "_Feature" - ], - "window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/data-preparation.ipynb b/notebooks/data-preparation.ipynb deleted file mode 100644 index adbd66e..0000000 --- a/notebooks/data-preparation.ipynb +++ /dev/null @@ -1,95 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " skiping ...\n", - " skiping ...\n", - " skiping ...\n", - " skiping ...\n", - " skiping ...\n", - " skiping ...\n", - " skiping ...\n" - ] - }, - { - "data": { - "text/plain": [ - "2" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "\"\"\"\n", - " This notebook is designed to generate SQL syntax all the quasi-identifiers for the patients in the database\n", - " The resulting SQL will be run against bigquery to produce a table with every record mapping to a patient\n", - " \n", - "\"\"\"\n", - "\n", - "from risk import *\n", - "ihandle = UtilHandler(path='/home/steve/dev/google-cloud-sdk/accounts/curation-prod.json',dataset='combined20180822',key_field='person_id',key_table='person',filter=['person','observation'])\n", - "r = ihandle.migrate_tables()\n", - "len(r)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "u' SELECT person.person_id , person.year_of_birth , person.month_of_birth , person.day_of_birth , person.birth_datetime , person.race_concept_id , person.ethnicity_concept_id , person.location_id , person.care_site_id , person.person_source_value , person.gender_source_value , person.gender_source_concept_id , person.race_source_value , person.ethnicity_source_value , basic_observation.sex_at_birth AS sex_at_birth1 , basic_observation.birth_date AS birth_date1 , basic_observation.race AS race1 , basic_observation.zip AS zip1 , basic_observation.city AS city1 , basic_observation.state AS state1 , basic_observation.gender AS gender1 FROM (select * from deid_image.person ) as person INNER JOIN (select * from deid_image.basic_observation ) as basic_observation ON basic_observation.person_id = person.person_id '" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ihandle = UtilHandler(path='/home/steve/dev/google-cloud-sdk/accounts/curation-test.json',dataset='deid_image',key_field='person_id',key_table='person',filter=['person','basic_observation'])\n", - "ihandle.create_table().replace('\\n',' ')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.15rc1" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/experiments.ipynb b/notebooks/experiments.ipynb deleted file mode 100644 index 3d52a33..0000000 --- a/notebooks/experiments.ipynb +++ /dev/null @@ -1,610 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "\"\"\"\n", - " Health Information Privacy Lab\n", - " This notebook is intended to run experiments and generate the data to be used by another notebook\n", - "\n", - " pre-requisites:\n", - " - pandas_risk This is a custom framework that will compute risk for a given dataset\n", - " - google-cloud-bigquery\n", - " - numpy\n", - "\"\"\"\n", - "import pandas as pd\n", - "import numpy as np\n", - "from pandas_risk import *\n", - "from time import time\n", - "import os\n", - "#\n", - "#-- Loading the dataset\n", - "class Logger :\n", - " cache = []\n", - " @staticmethod\n", - " def clear():\n", - " Logger.cache = []\n", - " @staticmethod\n", - " def log(**args) :\n", - " Logger.cache.append(args)\n", - " \n", - "SQL_CONTROLLED=\"SELECT person_id,birth_datetime,city,zip,state,race,gender FROM deid_risk.basic_risk60k\"\n", - "SQL_REGISTERED = \"SELECT person_id,birth_datetime,city,zip,state,race,gender FROM deid_risk.basic_deid_risk60k\"\n", - "dfr = pd.read_gbq(SQL_REGISTERED,private_key='/home/steve/dev/google-cloud-sdk/accounts/curation-test.json')\n", - "dfc = pd.read_gbq(SQL_CONTROLLED,private_key='/home/steve/dev/google-cloud-sdk/accounts/curation-test.json')\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 99, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sample %marketersample marketertier
050.9749450.981364controlled
150.9755130.981996controlled
250.9757980.980733controlled
350.9763640.981996controlled
450.9763640.981996controlled
\n", - "
" - ], - "text/plain": [ - " sample % marketer sample marketer tier\n", - "0 5 0.974945 0.981364 controlled\n", - "1 5 0.975513 0.981996 controlled\n", - "2 5 0.975798 0.980733 controlled\n", - "3 5 0.976364 0.981996 controlled\n", - "4 5 0.976364 0.981996 controlled" - ] - }, - "execution_count": 99, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "\n", - "#\n", - "FLAG='REGISTERED-TIER-1'\n", - "if FLAG == 'REGISTERED-TIER' :\n", - " Yi = pd.DataFrame(dfr)\n", - " FOLDER='registered'\n", - "else:\n", - " Yi = pd.DataFrame(dfc)\n", - " FOLDER='controlled'\n", - "Yi = Yi.fillna(' ')\n", - "N = 5\n", - "N_ = str(N)\n", - "SUFFIX = FOLDER+'-tier-'+str(N)+'-experiment.xlsx'\n", - "PATH = os.sep.join(['out',SUFFIX])\n", - "\n", - "\n", - "columns = list(set(Yi.columns.tolist()) - set(['person_id']))\n", - "merged_columns = list(columns)+['field_count']\n", - "m = {}\n", - "p = pd.DataFrame()\n", - "n = 0\n", - "y_i= pd.DataFrame({\"group_size\":Yi.groupby(columns,as_index=False).size()}).reset_index()\n", - "#.deid.risk(id='person_id',quasi_id=columns)\n", - "for index in np.arange(5,105,5):\n", - " for n in np.repeat(index,N) :\n", - "# np.random.seed( np.random.randint(0,int(time())+np.random.randint(0,1000)+index+n ) \n", - " #\n", - " # we will randomly sample n% rows from the dataset\n", - " i = np.random.choice(Yi.shape[0],((Yi.shape[0] * n)/100),replace=False)\n", - " x_i= pd.DataFrame(Yi).loc[i] \n", - " risk = x_i.deid.risk(id='person_id',quasi_id = columns)\n", - " x_i = pd.DataFrame({\"group_size\":x_i.groupby(columns,as_index=False).size()}).reset_index()\n", - " \n", - "# y_i= pd.DataFrame(Yi).deid.risk(id='person_id',quasi_id=columns)\n", - "\n", - "\n", - " r = pd.merge(x_i,y_i,on=columns,how='inner')\n", - " if r.shape[0] == 0 :\n", - " print 'skipping ',n\n", - " continue\n", - " r['marketer'] = r.apply(lambda row: (row.group_size_x / np.float64(row.group_size_y)) /np.sum(x_i.group_size) ,axis=1)\n", - " r['sample %'] = np.repeat(n,r.shape[0])\n", - " r['tier'] = np.repeat(FOLDER,r.shape[0])\n", - " r['sample marketer'] = np.repeat(risk['marketer'].values[0],r.shape[0])\n", - "# r['patient_count'] = np.repeat(r.shape[0],r.shape[0])\n", - " r = r.groupby(['sample %','tier','sample marketer'],as_index=False).sum()[['sample %','marketer','sample marketer','tier']]\n", - "# r['marketer'] = r.apply(lambda row: (row.group_size_x / row.group_size_y) / row.patient_count_x,axis=1 )\n", - "# r = r.groupby(columns+['marketer_x'],as_index=False).sum()[columns+['marketer','marketer_x']]\n", - "# r['sample %'] = np.repeat(n,r.shape[0])\n", - "# r['tier'] = np.repeat(FOLDER,r.shape[0])\n", - " p = p.append(r)\n", - "\n", - "writer = pd.ExcelWriter(PATH,engine='xlsxwriter')\n", - "p = p.rename(columns={'marketer_x':'sample marketer'})\n", - "p.index = np.arange(p.shape[0]).astype(np.int64)\n", - "p.to_excel(writer,FOLDER)\n", - "writer.save()\n", - "p.head() " - ] - }, - { - "cell_type": "code", - "execution_count": 100, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 100, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZIAAAEKCAYAAAA4t9PUAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3XuUXFWd9vHvk+4khBiIJJkIuZA4oBIgBKZBHC+BvDITRgkXGQXvLpVZM/B6QRzhxVEHZTFcFHVgdCGgMKLIZDCGeUHgJUFmRmSlA7kAMUwEJRcIzSUBQkzS3b/3j3M6VBd9OdW7qivd9XzW6pWqXeec2qerU8/Ze5+zjyICMzOzgRpR7wqYmdnQ5iAxM7MkDhIzM0viIDEzsyQOEjMzS+IgMTOzJA4SMzNL4iAxM7MkDhIzM0vSXO8KDIaJEyfGjBkz6l0NM7MhZfny5c9GxKT+lmuIIJkxYwatra31roaZ2ZAi6Q9FlnPXlpmZJXGQmJlZEgeJmZklcZCYmVkSB4mZmSVxkJiZWRIHiZmZJXGQmJlZEgeJmZklcZCYmVkSB4mZmSVxkJiZWRIHiZmZJXGQmJlZEgeJmZklqWmQSJovaa2kdZLO7+H1AyXdI2mVpHslTS157VJJD+c/HygpnynpgXybP5M0qpb7YGZmfatZkEhqAq4GTgRmAWdKmlW22BXAjRExG7gIuCRf9z3AUcAc4K3AeZL2yde5FLgyIg4CXgA+Wat9MDOz/tWyRXIMsC4iHo+IncDNwMlly8wCluSPl5a8Pgu4LyLaI2IbsAqYL0nAPGBhvtwNwCk13AczM+tHLYNkCrC+5PmGvKzUSuC0/PGpwDhJE/Ly+ZL2ljQROB6YBkwAtkREex/bNDOzQVTvwfbzgLmSHgLmAhuBjoi4C7gd+DXwU+B+oKOSDUs6S1KrpNa2trYqV9vMzLrUMkg2krUiukzNy3aLiE0RcVpEHAlcmJdtyf+9OCLmRMQJgIDHgOeA8ZKae9tmybaviYiWiGiZNGlSNffLzMxK1DJIlgEH52dZjQLOABaXLiBpoqSuOlwAXJ+XN+VdXEiaDcwG7oqIIBtLOT1f52PAL2q4D2Zm1o+aBUk+jnEOcCewBrglIh6RdJGkBflixwFrJT0GTAYuzstHAv8p6VHgGuDDJeMiXwLOlbSObMzkulrtg5mZ9U/ZQf7w1tLSEq2trfWuhpnZkCJpeUS09LdcvQfbzcxsiHOQmJlZEgeJmZklcZCYmVkSB4mZmSVxkJiZWRIHiZmZJXGQmJlZEgeJmZklcZCYmVkSB4mZmSVxkJiZWRIHiZmZJXGQmJlZEgeJmZklcZCYmVkSB4mZmSVxkJiZWRIHiZmZJXGQmJlZEgeJmZklcZCYmVkSB4mZmSVxkJiZWRIHiZmZJXGQmJlZEgeJmZklcZCYmVmSmgaJpPmS1kpaJ+n8Hl4/UNI9klZJulfS1JLXLpP0iKQ1kr4rSXn5vfk2V+Q/f1LLfTAzs77VLEgkNQFXAycCs4AzJc0qW+wK4MaImA1cBFySr/vnwNuB2cBhwNHA3JL1PhQRc/KfZ2q1D2Zm1r9atkiOAdZFxOMRsRO4GTi5bJlZwJL88dKS1wPYCxgFjAZGAptrWFczMxugWgbJFGB9yfMNeVmplcBp+eNTgXGSJkTE/WTB8lT+c2dErClZ74d5t9Y/dHV5lZN0lqRWSa1tbW3V2B8zM+tBvQfbzwPmSnqIrOtqI9Ah6SDgEGAqWfjMk/TOfJ0PRcThwDvzn4/0tOGIuCYiWiKiZdKkSbXeDzOzhlXLINkITCt5PjUv2y0iNkXEaRFxJHBhXraFrHXym4h4OSJeBu4A3pa/vjH/9yXgJ2RdaGZmVie1DJJlwMGSZkoaBZwBLC5dQNJESV11uAC4Pn/8JFlLpVnSSLLWypr8+cR83ZHAe4GHa7gPZmbWj5oFSUS0A+cAdwJrgFsi4hFJF0lakC92HLBW0mPAZODivHwh8DtgNdk4ysqIuI1s4P1OSauAFWQtnB/Uah/MzKx/ioh616HmWlpaorW1td7VMDMbUiQtj4iW/par92C7mZkNcQ4SMzNL4iAxM7MkDhIzM0viIDEzsyQOEjMzS+IgMTOzJA4SMzNL4iAxM7MkDhIzM0viIDEzsyQOEjMzS+IgMTOzJA4SMzNL4iAxM7MkDhIzM0viIDEzsyQOEjMzS+IgMTOzJA6SGtqybQdrN21hy7Yd9a6KmVnNNNe7AsPVkoc38u3bVtHUNIKOjk4+f9Jsjj9sSr2rZWZWdW6R1MCWbTv49m2r2NHeySs72tnR3smVt61yy8TMhiUHSQ1s3rqdpqbuv9qmphFs3rq9TjUyM6sdB0kNTN53DDt2tncr27Gzncn7jqlTjczMasdBUiMaoT6fm5kNF/0GiaQmSUsHozLDxeat2xnV3NStbFRzk7u2zGxY6jdIIqID6JS07yDUZ1iYvO8YOjo6u5V1dHQOetdW6unHPn3ZzIooevrvy8BqSXcD27oKI+Izfa0kaT7wHaAJuDYi/qns9QOB64FJwPPAhyNiQ/7aZcB7yMLubuCzERGS/gz4ETAGuL2rvOB+DIrxY0fz+ZNmc2XZ6b/jx44etDqknn7s05fNrKiiQXJr/lOYpCbgauAEYAOwTNLiiHi0ZLErgBsj4gZJ84BLgI9I+nPg7cDsfLn/AuYC9wLfAz4NPEAWJPOBOyqp22A4/rApHDlzIpu3bmfyvmMGNURKTz+mPWsZXXnbKo6cObFQPVLXN7PGUihI8i/6McD0iFhbcNvHAOsi4nEASTcDJwOlQTILODd/vBRY1PWWwF7AKEDASGCzpP2BfSLiN/k2bwROYQ8MEshaJvX44t19+nH7q91rXacfF6lP6vpm1lgKnbUl6SRgBfDL/PkcSYv7WW0KsL7k+Ya8rNRK4LT88anAOEkTIuJ+smB5Kv+5MyLW5Otv6GebDS91jGZPGeMxs6Gh6Om/XyNrYWwBiIgVwBur8P7nAXMlPUTWdbUR6JB0EHAIMJUsKOZJemclG5Z0lqRWSa1tbW1VqOrgG+hgd9cYzejmEew9upnRzSMqGqNJXd/MGkvRMZJdEbFV6nYtRGdvC+c2AtNKnk/Ny3aLiE3kLRJJrwPeFxFbJH0a+E1EvJy/dgfwNuBf8+30us2SbV8DXAPQ0tKyRw3GF5E62J06RlONMZ4t23bUZYzIzAZX0SB5RNIHgSZJBwOfAX7dzzrLgIMlzST7sj8D+GDpApImAs9HRCdwAdkZXABPAp+WdAnZGMlc4NsR8ZSkFyUdSzbY/lHgnwvuw5BRrcHu1DGalPWrcdaXg8hsaCjatfW/gUOBHcBPgK3AZ/taISLagXOAO4E1wC0R8YikiyQtyBc7Dlgr6TFgMnBxXr4Q+B2wmmwcZWVE3Ja/9nfAtcC6fJk9cqA9xVCfq6sak1YueXgjH/3uEs7/8QN89LtLWPpwjw1PM9sDFG2RvCciLgQu7CqQ9NfAv/W1UkTcTnaKbmnZV0oeLyQLjfL1OoC/6WWbrcBhBes9JO0pg90DbRGknvXl04/NhpaiLZILCpZZFewJg90pLYLUIBzqLTKzRtNni0TSicBfAVMkfbfkpX2A9p7Xsmqo52B3aosg9cr+arXIPMZiNjj669raBLQCC4DlJeUvAZ+vVaUsU6/B7mpckJgShNWYYmY4TPHS6EHY6Ps/lPQZJBGxElgp6Sf5spVc2W51ktqi2BPGaI4/bAp/OnkffrtpC285YDzTJ40rvO5wGGMZDme9pbz/cDgQaCRFB9vnk82LNQqYKWkOcFFELOh7NauH1BbFntAiqHeLqhrq1bUI9Q+ilPcfDgcCjaZokHyN7Mr2eyG7sj2/PsT2QJP3HcPO9o5uZTvbOypqUaR0TdV70sg9YYxlycMb+dbilQgRBF9YcMSgBWG9gyj1/at1IDCUW2RDTcqV7UPuavGhJuUPMTqjz+e1tHnrdjrLZvbvjKho0siU9avVorpy8Uo0YgTR2cm5FQTBlm07uGzRCrJdyPbj0kUrKgrClAOB1N9fNYIg5f2rcSAwlFtk1Xj/wVbLK9stQWrXzuhRzbyy49UT60aPaq7oiC7l/ceMbGJXR/cvkl0dwZiRTb2sUd31Ib1FdcWiFXQE0JF9oV9eQRCs/P2zlN8hJyIrn3tosd9hyoFA6u8vtUWQ+v6pBwLVapF96xcrQCMgOvnCyXMGtWsu5f3rYaBXtr8IfK5WlWp0qVeGpx7Rpb7/9l0djGru/qc1qnkE23d19LJGddfvMn7saN58wPiKj+bWPf0iZd+DdERWXsQL23ZWVF5u89btjBjRrfXPiBEqfB1N6u8vtUX0xDMvVVTek+MPm8JVn3oHf/uXs7jqU++o6Eu0rxZREVu27eCyn69gVyfs6uhkVydc+vMVhf/+N2/d/prumsjLB+P9uyxZtZ6v/WwZS1at73/hREVbJJN7uLL9aLL5tKzKqjVY/q2SrplKjuhS33/yvmNQWZny8iJS10/X29F/sVbBUTMnVlReLvWIvhq/v46y9y9/3pcXevnC6628J9kY0yqkrDX3hQWD1yJe+fvnegyClb9/jrmHHlDo/Xe2dz+Q29neOWjvD/Chb9/Nsy9lBy73P/YM1y35LTd97oRC6w5E0RbJv0va/SlKehevTrBoVVaNPuIAkLIvFJV/rdT2/asxjf1fzJnarewv50wdtH7iP9mn5/3srbzc9EnjWNAyvVvZgpbphU9hTm1RjB87mkOnvb5b2aHTXl/497fu6Rd7/CIr2iI7+A37VFRebsu2HVy+aCW7OjrZ2d7Jro5OLlu0svAR+RPP9FzP3srLvbDtjxWVv/Z90lpkv9u8taLycktWrd8dIl2efWlnTVsmRVskfwMsym9wdRTZLXH/qma1anBdX6S3tT65u6ySL9KurqnSo6JKr0xPeX9IH6O4a8WGbmV3rtjAh971poq282TbSwO6DuWZF3v+wnjmxT8W3s7ZJx7OSS0zBvT+qS2KJ9te4sEnnutW9uATz/Fk20uF6rHtj7sqKi/X3NxE8wjRXjKu0zxCNDcXPyLvqWuq6BF5atfi/uP3rqi83Kbnt1VUXm7MqJ6/lnsrL3ffmqd7LZ83e1qPr6Uq1CKJiGVkA+x3kZ0K/O6IqH3HW4Pq7Yu0kj7alLmqUt+/y0DHKFL7mAGuumM1n/7+fXxz8So+/f37uPqO1YXXTf0iTZXaInvwiWcrKn+ttK69yfuOoalsjKdphAoHYWrXWGrX4tbtPX/OvZWXO2C/ngOnt/Jyb3/zGyoqL/euQ3perrfyaugzSCTdJmlxflvdC4C9yQbcrytwq10boNQgmLzvGHbs7D4V2o6d7YM+aeJA7/CY2sf8ZNtL3VpTAItbn+TJtmJdC2P3GllReU9Sgiw1yF/fS+D0Vl5u7F6jKiovl9q1mRoEqV2LbzlgfEXl5Y6YMfE1vclSVl5Eav3nzZ7GxHHdP6uJ40bVrDUC/XdtXVGzd7ZeVeOCQo0QpaceaUTxcZJ6n8e/fVcHI5vUbcB0ZJMKjxE8+ETPt1Z+8Im2Qv8ZD3rDPjR1//XRpKy8iN6C7KSWGYXeP/VkhyNmTGCE1K17aITEETMmFKp/6v5DWtfm9EnjOGrmhG7dc0fNnFBR9+DZJx7OcYcewPLHn+XP3jiRQ6cX2/eu91/QMp3FJZ9hJV/k48eO5kunzOGbZRekVvI7SOkaBbjpcyewZNV67lvzNO865A01DRHof66tX0lqAv5fRBxf05pYNynXEWzeup1RzU20d7zaKhnV3DRoF/RV48r0ERKlXSkjVLxr5PVj96qovNz4saP54ilzup31dm4FXwS/3bSl1/IiXwjVONnh7085gm+WnfVUyckOKftfup2Bzlr9yPoXupU9sv4FtmzbMaDroBbe/3jFFwSmfpFXY/bu6ZPGVfy+pebNnlbzAOnS7+hNRHRI6pS0b0QUO23AkqReUFiNFkXKf4R6z/XV25F30SNySNv/1K6R1NO3If2LrBpfhAO1J0wRA+lf5Km3uh5Kip619TKwWtLdwO5TDyLiMzWpVYOr1um3KVOEdG1nIP8R6h1kAM1Nor2kb6a5qbJToGHg+5/aNQLdT9+OCk/f7pL6RVavL8Kq3RitzpN2NpKiQXJr/tNQBnr6aKrdR6S3rdrd1z3YR6Qp6h1kqV171ZDSNZJ6+vZQl/r3syfcBqHRFAqSiLih1hXZ01x1x+puA6YLWqZz9omHV7SNpEkXASKyo9HyiZsKqmfTup5Btqd8kQy0a8RH1PW/MZpVplCQ5BM1XgLMAnaPWEbEG2tUr7pKPesGqjMN986O2D1p4FA8Iq1XkA31L5I9JQjrLeXvp54HMo2oaNfWD4GvAlcCxwOfoPj0KkNO6lk3e8r9GBrZUP4iGepBuKdopMHueisaJGMi4h5Jiog/AF+TtBz4Sg3rVjepZ93sCfdjsKH9RTKUg9AaT9FWxQ5JI4D/kXSOpFOB19WwXnXVdUFUqUouiKrW/RgGemXwnmKgV7ZbZqBTzJgNtqItks+STY/yGeDrZN1bH61Vpeot9YKortlbS8+6qfR+GkP9iLQad4gzs6GhaJAE8K/AgUDXhEM/AGbXolL11tekgYNxP44uQ7VrploXhJnZ0FC0a+smsgH39wHvzX9OqlWl6i110sBG75qq1qSPZjY0FG2RtEVExbP9SpoPfAdoAq6NiH8qe/1AshtkTQKeBz4cERskHU92hliXtwBnRMQiST8C5gJd07V8PCJWVFq3vqROGgiN3TXlkwXMGkvRIPmqpGuBe8imkQcgInq92j2f7PFq4ARgA7BM0uKIeLRksSuAGyPiBknzyK5V+UhELAXm5NvZD1hHdi+ULl+MiIUF616x1EkDuzRq15RPXzVrLEWD5BNkrYKRQNehZtD3tCnHAOsi4nEASTcDJwOlQTILODd/vBRY1MN2TgfuiIhXCtY1WTWmKBnKqnEdy1BvkZlZcUWD5OiIeHOF254ClN5FcQPw1rJlVgKnkXV/nQqMkzQhIkrvE3oG8K2y9S6W9BWyFtL5EVH180urMUXJUFWtrqmh2iIzs8oUHWz/taRZNXj/84C5kh4iG/fYCOweiJC0P3A4cGfJOheQtY6OBvYDvtTThiWdJalVUmtbW883OupN6RQlO3Z1sLMjuPK2VRUPOg/V6yiGy8kCZjY4irZIjgVWSHqCbIxEQEREX6f/bgRK76oyNS/bLSI2kbVIkPQ64H0RUTo/yfuBn0fErpJ1nsof7pD0Q7Iweo2IuAa4BqClpaWiJkU1unaG+nUU7poys6KKBsn8AWx7GXCwpJlkAXIG8MHSBSRNBJ6PiE6ylsb1Zds4My8vXWf/iHhKkoBTgIcHULc+pXbtDJfrKNw1ZWZFFOraiog/9PTTzzrtwDlk3VJrgFsi4hFJF0lakC92HLBW0mPAZODirvUlzSBr0fyqbNM3SVoNrAYmAt8osg+VSO3a8XUUZtZIirZIBiQibgduLyv7SsnjhUCPp/FGxO/JBuzLy+dVt5Y9S+na8XUUZtZIhu1U8NUw0EnzPFhtZo2kpi2SRubBajNrFA6SGvJgtZk1AndtmZlZEgeJmZklcZCYmVkSB4mZmSVxkJiZWRIHiZmZJXGQmJlZEgeJmZklcZCYmVkSB4mZmSVxkJiZWRIHiZmZJXGQmJlZEgeJmZklcZCYmVkSB4mZmSVxkJiZWRIHiZmZJXGQmJlZEgeJmZklcZCYmVkSB4mZmSVxkJiZWRIHiZmZJXGQmJlZkpoGiaT5ktZKWifp/B5eP1DSPZJWSbpX0tS8/HhJK0p+/ijplPy1mZIeyLf5M0mjarkPZmbWt5oFiaQm4GrgRGAWcKakWWWLXQHcGBGzgYuASwAiYmlEzImIOcA84BXgrnydS4ErI+Ig4AXgk7XaBzMz618tWyTHAOsi4vGI2AncDJxctswsYEn+eGkPrwOcDtwREa9IElmwLMxfuwE4peo1NzOzwmoZJFOA9SXPN+RlpVYCp+WPTwXGSZpQtswZwE/zxxOALRHR3sc2zcxsENV7sP08YK6kh4C5wEago+tFSfsDhwN3VrphSWdJapXU2tbWVq36mplZmVoGyUZgWsnzqXnZbhGxKSJOi4gjgQvzsi0li7wf+HlE7MqfPweMl9Tc2zZLtn1NRLRERMukSZPS98bMzHpUyyBZBhycn2U1iqyLanHpApImSuqqwwXA9WXbOJNXu7WIiCAbSzk9L/oY8Isa1N3MzAqqWZDk4xjnkHVLrQFuiYhHJF0kaUG+2HHAWkmPAZOBi7vWlzSDrEXzq7JNfwk4V9I6sjGT62q1D2Zm1j9lB/nDW0tLS7S2tta7GmZmQ4qk5RHR0t9y9R5sNzOzIc5BYmZmSRwkZmaWxEFiZmZJHCRmZpbEQWJmZkkcJGZmlsRBYmZmSRwkZmaWxEFiZmZJHCRmZpbEQWJmZkkcJGZmlsRBYmZmSRwkZmaWxEFiZmZJHCRmZpbEQWJmZkkcJGZmlsRBYmZmSRwkZmaWxEFiZmZJHCRmZpbEQWJmZkkcJGZmlsRBYmZmSRwkZmaWpKZBImm+pLWS1kk6v4fXD5R0j6RVku6VNLXktemS7pK0RtKjkmbk5T+S9ISkFfnPnFrug5mZ9a1mQSKpCbgaOBGYBZwpaVbZYlcAN0bEbOAi4JKS124ELo+IQ4BjgGdKXvtiRMzJf1bUah/MzKx/tWyRHAOsi4jHI2IncDNwctkys4Al+eOlXa/ngdMcEXcDRMTLEfFKDetqZmYDVMsgmQKsL3m+IS8rtRI4LX98KjBO0gTgTcAWSbdKekjS5XkLp8vFeXfYlZJG12oHzMysf/UebD8PmCvpIWAusBHoAJqBd+avHw28Efh4vs4FwFvy8v2AL/W0YUlnSWqV1NrW1lbLfTAza2i1DJKNwLSS51Pzst0iYlNEnBYRRwIX5mVbyFovK/JusXZgEXBU/vpTkdkB/JCsC+01IuKaiGiJiJZJkyZVe9/MzCxXyyBZBhwsaaakUcAZwOLSBSRNlNRVhwuA60vWHS+pKwHmAY/m6+yf/yvgFODhGu6DmZn1o2ZBkrckzgHuBNYAt0TEI5IukrQgX+w4YK2kx4DJwMX5uh1k3Vr3SFoNCPhBvs5NedlqYCLwjVrtg5mZ9U8RUe861FxLS0u0trbWuxpmZkOKpOUR0dLfcvUebDczsyHOQWJmZkkcJGZmlsRBYmZmSRwkZmaWxEFiZmZJHCRmZpbEQWJmZkkcJGZmlsRBYmZmSRpiihRJbcAf6l2POpkIPFvvStSR99/77/0fuAMjot/p0xsiSBqZpNYic+UMV95/77/3v/b7764tMzNL4iAxM7MkDpLh75p6V6DOvP+Nzfs/CDxGYmZmSdwiMTOzJA6SYUTSNElLJT0q6RFJn83L95N0t6T/yf99fb3rWiuSmiQ9JOk/8uczJT0gaZ2kn0kaVe861pKk8ZIWSvqtpDWS3tZgn//n87/9hyX9VNJew/lvQNL1kp6R9HBJWY+ftzLfzX8PqyQdVa16OEiGl3bgCxExCzgWOFvSLOB84J6IOBi4J38+XH0WWFPy/FLgyog4CHgB+GRdajV4vgP8MiLeAhxB9rtoiM9f0hTgM0BLRBwGNAFnMLz/Bn4EzC8r6+3zPhE4OP85C/hetSrhIBlGIuKpiHgwf/wS2ZfIFOBk4IZ8sRuAU+pTw9qSNBV4D3Bt/lzAPGBhvsiw3XcASfsC7wKuA4iInRGxhQb5/HPNwBhJzcDewFMM47+BiLgPeL6suLfP+2Tgxsj8Bhgvaf9q1MNBMkxJmgEcCTwATI6Ip/KXngYm16latfZt4O+Bzvz5BGBLRLTnzzeQBetwNRNoA36Yd+9dK2ksDfL5R8RG4ArgSbIA2Qosp7H+BqD3z3sKsL5kuar9Lhwkw5Ck1wH/DnwuIl4sfS2y0/SG3al6kt4LPBMRy+tdlzpqBo4CvhcRRwLbKOvGGq6fP0A+FnAyWaAeAIzltd0+DWWwPm8HyTAjaSRZiNwUEbfmxZu7mrD5v8/Uq3419HZggaTfAzeTdWd8h6z53pwvMxXYWJ/qDYoNwIaIeCB/vpAsWBrh8wd4N/BERLRFxC7gVrK/i0b6G4DeP++NwLSS5ar2u3CQDCP5mMB1wJqI+FbJS4uBj+WPPwb8YrDrVmsRcUFETI2IGWQDrEsi4kPAUuD0fLFhue9dIuJpYL2kN+dF/wt4lAb4/HNPAsdK2jv/v9C1/w3zN5Dr7fNeDHw0P3vrWGBrSRdYEl+QOIxIegfwn8BqXh0n+D9k4yS3ANPJZkF+f0SUD9ANG5KOA86LiPdKeiNZC2U/4CHgwxGxo571qyVJc8hONhgFPA58guyAsSE+f0n/CHyA7AzGh4BPkY0DDMu/AUk/BY4jm+V3M/BVYBE9fN55uF5F1t33CvCJiGitSj0cJGZmlsJdW2ZmlsRBYmZmSRwkZmaWxEFiZmZJHCRmZpbEQWK2B5F0r6TC99iWdGk+k+uNJWUflvS52tTQ7LUcJGZDVD5J41ERMRvYKelwSWPIrh25ur61s0biIDHrg6Sxkv6vpJX5PS4+kJd/RdKyvOya/GKvrhbFlZJa8/uBHC3p1vzeEN/Il5mR3y/kpnyZhZL27uG9/0LS/ZIelPRv+RxqpTqBkfl77w3sAs4D/jmfIsRsUDhIzPo2H9gUEUfk97j4ZV5+VUQcnZeNAd5bss7OiGgBvk82PcXZwGHAxyVNyJd5M/AvEXEI8CLwd6VvKmki8GXg3RFxFNAKnFu6TH6rgNvJrtbumu32rRGxqDq7blaMg8Ssb6uBE/KxiHdGxNa8/Pj8rnurySaIPLRkncUl6z6S3ydmB9mUJV2T5q2PiP/OH/8YeEfZ+x4LzAL+W9IKsjmTDiyvXERcFhFzIuILwNeBr0j6lKRbJH05ac/NCnKQmPUhIh4jm0F3NfCNvEtrL+BfgNMj4nDgB8BeJat1zePUWfK463nXLLTlcxOVPxdwdx4ScyJiVkT0emc/SUfm66wF/joi3g/8qaSDi+6r2UA5SMz6IOkA4JWI+DFwOVmodIXGs/m4xem9rd+H6ZLelj/+IPBfZa//Bni7pIMa/joYAAAArklEQVTyeoyV9KY+tvd14B+AkWS3mIUsuF4z9mJWbc39L2LW0A4HLpfUSTaY/bcRsUXSD4CHye5At2wA210LnC3perKpzrvdPzsi2iR9HPippNF58ZeBx8o3JOkUoDUiNuXPV+RdbqsiYuUA6mZWEc/+azbI8tsg/0c+UG825Llry8zMkrhFYmZmSdwiMTOzJA4SMzNL4iAxM7MkDhIzM0viIDEzsyQOEjMzS/L/AYECK8dkW+H5AAAAAElFTkSuQmCC\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZIAAAEKCAYAAAA4t9PUAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3X2cXVV97/HPN8kMeSKmJCOXkjCDikoqaQIDQsEmsUDDS18GKEVSsWBj8V6l115LK1yq1ijlorRWHlpFw1PNDaXUh9SiiWDwgfqQiYEEnCbNxYQkoA7UQAIJmSS/+8feE84cJjP7zDpnTs7M9/16ndecs87e56ydmZzv2WutvZYiAjMzs8EaVe8KmJlZY3OQmJlZEgeJmZklcZCYmVkSB4mZmSVxkJiZWRIHiZmZJXGQmJlZEgeJmZklGVPvCgyFqVOnRltbW72rYWbWUNasWfNMRLQMtN2ICJK2tjY6OjrqXQ0zs4YiaUuR7dy0ZWZmSRwkZmaWxEFiZmZJHCRmZpbEQWJmZkkcJGZmlsRBYmZmSRwkZmaWxEFiZmZJahokkuZL2iBpk6Sr+3i+VdKDktZJekjStJLnbpD0WH57Z0n5nZJ+JumR/DarlsdgZmb9q1mQSBoN3AqcB8wAFkqaUbbZjcDdETETWAxcn+/7NuBkYBbwZuAqSZNK9vvziJiV3x6p1TGYmdnAanlGchqwKSKeiIi9wD3AgrJtZgDfzu+vKnl+BvDdiNgXES8A64D5NayrmZkNUi2D5Fhga8njbXlZqUeBC/P7FwBHSpqSl8+XNF7SVGAeML1kv+vy5rDPSDqirzeXdIWkDkkdXV1d1TgeMzPrQ707268C5khaC8wBtgP7I2IlcD/w78Ay4AfA/nyfa4A3AqcCRwEf7uuFI+K2iGiPiPaWlgFnQTYzs0GqZZBsp/dZxLS87KCIeCoiLoyI2cC1edmO/Od1eR/IOYCAjXn505F5CbiDrAnNzMzqpJZBsho4QdLxkpqBS4DlpRtImiqppw7XALfn5aPzJi4kzQRmAivzx8fkPwWcDzxWw2MwM7MB1Gxhq4jYJ+lKYAUwGrg9Ih6XtBjoiIjlwFzgekkBfBf4QL57E/C9LCt4Hrg0Ivblzy2V1EJ2lvII8N9rdQxmZjYwRUS961Bz7e3t4RUSzcwqI2lNRLQPtF29O9vNzKzBOUjMzCyJg8TMzJI4SMzMLImDxMzMkjhIzMwsiYPEzMySOEjMzCyJg8TMzJI4SMzMLImDxMzMkjhIzMwsiYPEzMySOEjMzCyJg8TMzJI4SMzMLImDxMzMkjhIzMwsiYPEzMySOEjMzCyJg8TMzJI4SMzMLImDxMzMkjhIzMwsiYPEzMySOEjMzCxJTYNE0nxJGyRtknR1H8+3SnpQ0jpJD0maVvLcDZIey2/vLCk/XtKP8tf8J0nNtTwGMzPrX82CRNJo4FbgPGAGsFDSjLLNbgTujoiZwGLg+nzftwEnA7OANwNXSZqU73MD8JmIeB3wK2BRrY7BzMwGVsszktOATRHxRETsBe4BFpRtMwP4dn5/VcnzM4DvRsS+iHgBWAfMlyTgrcB9+XZ3AefX8BjMzGwAtQySY4GtJY+35WWlHgUuzO9fABwpaUpePl/SeElTgXnAdGAKsCMi9vXzmmZmNoTq3dl+FTBH0lpgDrAd2B8RK4H7gX8HlgE/APZX8sKSrpDUIamjq6urytU2M7MetQyS7WRnET2m5WUHRcRTEXFhRMwGrs3LduQ/r4uIWRFxDiBgI/AsMFnSmEO9Zslr3xYR7RHR3tLSUs3jMjOzErUMktXACfkoq2bgEmB56QaSpkrqqcM1wO15+ei8iQtJM4GZwMqICLK+lIvyfS4DvlbDYzAzswHULEjyfowrgRVAJ3BvRDwuabGkd+SbzQU2SNoIHA1cl5c3Ad+T9FPgNuDSkn6RDwMfkrSJrM9kSa2OwczMBqbsS/7w1t7eHh0dHfWuhplZQ5G0JiLaB9qu3p3tZmbW4BwkZmaWxEFiZmZJHCRmZpbEQWJmZkkcJGZmlsRBYmZmSRwkZmaWxEFiZmZJHCRmZpbEQWJmZkkcJGZmlqTfIJE0StJvDVVlzMys8fQbJBFxALh1iOpiZmYNqEjT1oOSfk+Sal4bMzNrOEWC5H3APwN7JT0vaaek52tcLzMzaxBjBtogIo4cioqYmVljGvCMRJlLJX0kfzxd0mm1r5qZmTWCIk1bfw+cAfxB/ngX7oA3M7PcgE1bwJsj4mRJawEi4leSmmtcLzMzaxBFzki6JY0GAkBSC3CgprUyM7OGUSRIbgK+Arxa0nXA94Hra1orMzNrGEVGbS2VtAb4HUDA+RHRWfOamZlZQxgwSCT9Y0S8G/iPPsrMzGyEK9K09RulD/L+klNqU53hpburixdWr6a7q6veVTEzq5lDBomkayTtBGaWXNG+E/gl8LUhq2GDenbZMta3trLxnHNY39rKs8uW1btKZmY1ccggiYjr86vaPx0RkyLiyPw2JSKuKfLikuZL2iBpk6Sr+3i+VdKDktZJekjStJLnPiXpcUmdkm7qmesr326DpEfy26sHcdw11d3VxZZFi4jduznw3HPE7t1sWbTIZyZmNiwVadq6djBXtudNYLcC5wEzgIWSZpRtdiNwd0TMBBaTjwbLp64/E5gJvAk4FZhTst+7ImJWfvtlgWMYUns3b66o3MyskRUJklsZ3JXtpwGbIuKJiNgL3AMsKNtmBvDt/P6qkucDGAs0A0cATcAvCrznYWHUxInE7t29ymL3bkZNnFinGpmZ1U6RIHlzRHwA2APZle1kH/ADORbYWvJ4W15W6lHgwvz+BcCRkqZExA/IguXp/LaibMjxHXmz1kcOx+ntD+zaBePG9S4cOzYrNzMbZup9ZftVwJx8+pU5wHZgv6TXAScC08jC562S3pLv866IOAl4S37rcxiypCskdUjq6BrivonmtjbK000SzW1tQ1oPM7OhUMsr27cD00seT8vLDoqIpyLiwoiYDVybl+0gOzv5YUTsiohdwDfImteIiO35z53A/yVrQnuFiLgtItojor2lpaVAdaunqaWF1iVL0LhxjJo0CY0bR+uSJTQNcT3MzIZCLa9sXw2cIOl4sgC5hJf7WQCQNBX4r3xJ32uA2/OnngT+WNL1+XvOAf5O0hhgckQ8I6kJeDvwQIG6DLkpCxcy6eyz2bt5M81tbQ4RMxu2ilzZvigiltD7yvb/ExGvGM5bKiL2SboSWAGMBm6PiMclLQY6ImI5MBe4XlIA3wU+kO9+H/BWYD1Zk9o3I+JfJU0AVuQhMposRL5Q0REPoaaWFgeImQ17ioj+N5DuB5ZGxNL88a3A2IhYNAT1q4r29vbo6OiodzXMzBqKpDUR0T7QdkXWI/k9YLmkA8B8YEcjhchI1t3V5aY1M6u5/qZIOUrSUcA44L3AXwA7gY/n5XYY8xQtZjZUDtm0Jeln5EN+e4pK7kdEvKaWFaumRm3aGuwZRXdXF+tbW3tdFKlx4zhpyxafmZhZYclNWxFxvKRRwBkR8XBVa2cDenbZMrYsWoSam4m9e2ldsoQpCxcW2nfv5s3ZfqVB0tTE3s2bHSRmVnX9XkeSD8u9ZYjqYrnUSR+b29qIvXt7lUV3d8UXRHoafDMrosgFiQ9K+r3DcSqS4arnjKJUzxlFEdW4INJ9LGZWVJHhvzuBCcA+svm2RNZHMqn21auORusjqVYfh/tYzCxF0T6SAc9I8jVIRkVEc8m6JA0TIo2oWlOsNLW0MOHUUyveL/WMyMxGliLXkSDp14ATyKZ2ByAivlurSll9p1ipVh+LmY0MA56RSHov2fQlK4CP5z//qrbVMhj8GUU13nfKot7XnE5ZtMjNWmbWpyKd7R8kW6FwS0TMA2YDO2paK6uKwY666u7q4tklS3qVPbtkiUdvmVmfigTJnojYAyDpiIj4D+ANta2WQdrw25RRV+4jMbNKFAmSbZImA18FviXpa8CW2lbLUoLA16GY2VAqMmrrgojYERF/BXwEWAKcX+uKHQ52Pvww2z/2MXY+PLQX9qcGwd7Nmykf1B0Rvg7FzGqiklFb08kmbdwJvAn4SQ3rVXcbzj2XXd/6FgA/X7yYieeeyxtWrBiS906d4mTUxIlQsi8Ae/Zk5QWljBorDcKeY9iyaBGTzj7bHfZmw1CRha0+AVwOPMHLa7UH2cJTw9LOhx8+GCI9dq1cyc6HH+bIM8+s+funNi0d2LULjRv3igsKD+zaVVE9BrswV7Xm+vI0+GaNoUgfycXAayNiTkTMy2/DNkQAnl+5sqLyautpWmLsWDRhAowdW1HT0qECZ6iuA6lGH4ubxswaR5EgeQyYXOuKHE4mnXtuReWHktrZLAnlPytRrSvjB1v/1PdP7SMys6FVZK6tduBrZIHyUk95RLyjtlWrnsHMtfXYzJm8tH79wcdHnHQSb1q3rvD+zy5bxuY/+iM0ejSxfz9tt99eeBr4es+11VP/wU5j32N3Zycv/PjHTDjtNMadeGLh/V5YvZqN55zDgeeeO1g2atIkXv/AA0w49dTCr+OmMbM01Vxq9y7gBmA9L/eRDGvdXV3s3bSpV9neTZvo7uoq9IHU3dXF5ssug+7ug6OnNl92WeHO5kONrqq0j2GwfRzV6CxPCdJqNY2lBqGZFVOkaevFiLgpIlZFxHd6bjWvWR2lXpD34tq10N3du7C7OysvYNTEib3ORgBi9+6KRl2lSD3+g0G6Zw/xwguwZw+bL7uscNNU6hQtbhozG1pFguR7kq6XdIakk3tuNa9ZHdV70sIDu3ZBU1PvwqamikddDVbq8acGaXdXF898/vO9yp75/Ocruo7GV+abDZ0iQTIbOB34a+Bv8tuNtaxUvaWOmho/e/YrP8iamxk/e3ah/UdNnNjnB/FQnZH06iyfMGHQnfWDlRpE9f4iYDbSFLmyfV4ft2E9/LdH0qipO+/s/UF8552FP4gP7NoF48b1Lhw7tuIzktRRYxFB5D8rkRqkqao1as3Mihlw1NZwUOmorXqPmqrG+x/sbB41ijhwoKLO5sPh/dcdcwzs3/9y4ejRzHz66SG9oNGjvmykq9oKiSNRtdrYB7ueSFWuw7j88qyz+YUXss7myy8f0j6GKQsXctKWLbx+1SpO2rKl4hFTGj2638dFpKzn8uyyZaw77jg2zJvHuuOOq8sFkZ700hpFTYNE0nxJGyRtknR1H8+3SnpQ0jpJD0maVvLcpyQ9LqlT0k3K25cknSJpff6aB8ur6XBoYz/4QfzAAxV/EL+4du0r679375D3MSQt9VvWtKexY4esszx11FmPlEk/fWW/NZIiKySOl/QRSV/IH58g6e0F9hsN3AqcB8wAFkqaUbbZjcDdETETWAxcn+/7W8CZwEyyCSJPBebk+/wD8MdkS/+eAMwfqC6VOlza2Ou5QmI9j7/eQZ7a2Q/ZpJ8bzzqLny9ezMazzmLD7/5u4X09fNkaTZEzkjvIrmg/I3+8Hfhkgf1OAzZFxBMRsRe4B1hQts0M4Nv5/VUlzwfZ+vDNwBFAE/ALSccAkyLih5F17txNjaa0TzkjqLfxs2f3OXy4ks7ueh5/vad4SdXfpJ9F9HdBaiXcNGZDpUiQvDYiPgV0A0TEi0CR5qRjga0lj7flZaUeBS7M718AHClpSkT8gCxYns5vKyKiM99/2wCvWTWpZwT1+o/c1NJC21139Rq+3HbXXYPqq6nX8U9ZuJAT16xh+k03ceKaNRUH2cE+jt/+7Yr7OFJHnaVO+lmNC1IPhz4eGzmKBMleSePIzhKQ9FpK5txKdBUwR9Jasqar7cB+Sa8DTgSmkQXFWyW9pZIXlnSFpA5JHV11+EZW7zbuKQsXMvPJJ3nDqlXMfPLJIT+jSj3+Z5cto/OUU9j6wQ/SecopFa8Qufnd7876OPbsyfo43v3uiq6snzB3bq+yCXPnFg7U1Ek/U4d/V6uPJ1XqFymfUTWOIkHyMeCbwHRJS4EHgb8osN92ssWwekzLyw6KiKci4sKImA1cm5ftIDs7+WFE7IqIXcA3yJrWtuevc8jXLHnt2yKiPSLaW4a4j+FwaeOuVx9L6vGn7r9z1areQ4cB9u/PygvY3dnJrrKzh10rV7K7s7PQ/keeeSYTy0Jj4rnnFl7LprmtDfbt6124f/+QzSxwcJeED/LUM6J6fxGzyhS5IPFbZM1PlwPLgPaIeKjAa68GTpB0vKRm4BJgeekGkqZK6qnDNcDt+f0nyc5UxkhqIjtb6YyIp4HnJZ2ej9b6Q7KZiQ8rI32KjtTjT57r6xe/qKi83PMPPFBReV/esGIFr//+9/lvH/0or//+9yteXbN8MGINBif26+AH+bx5FX+Qp54RHS5fxKy4QwZJ2bxarWR9FU8BxxWZaysi9gFXAiuATuDeiHhc0mJJPVPQzwU2SNoIHA1cl5ffB/w/shmHHwUejYh/zZ97P/BFYFO+zTcqON4hUe9RR/WWevyp+48/ue8/z0OVl2s6+uiKyg9lzFFHccRrXsOYo46qaL/U4c+pfTyp1yGlnhGN9C9ijai/aeT/pp/nCi21GxH3A/eXlX205P59ZKFRvt9+4H2HeM0OsiHBh62eUUdbFi1CTU1Ed/eImqIj9fhT9x/V3JyNWiv9MGtqysoLOHLePBg1Cg6UrJowalRWXtCWP/kTnrnlloOPp155Ja0331xo3+a2tld2tu/ZUzhIm1pamDBnTq+RY5X08fR3HdKrKlzcbTCq9UXMMxMMnUMGSUQU/19jrzBl4UImnX32iP1DTj3+lP2b29rQmDFESZBozJiKPojbvvQlNr/nPSBBBG133FG4Drs7O3uFCMAzt9zCq9///sILfJVPXVTJVEa7Ozv7HH68u7Oz0Pvv37GjovJyzdOnV1RerhpfxLwezdAacGErSWPJmpPOIjsT+R7wuYjYU+O6NbzBLiw1XKQe/2D3r8YHUUqQvfDjHx+yvMgH+d7Nmxk1fnzvFSLHjSu8sFl/fTyVrFQ5WHu3bj1kedH3n7JwIeNnzRrUCpvVWJit53VG6hfBShVZIfFuYCfQc17+B8A/Ar9fq0qZparGGeFgg2zCaadVVF4utWkntY9n9OTJFZWXSz2jgbRJP/du3kz5+VtEVLTCaMoKnz1GUhAVGf77pohYlK+QuCoi/hj4jVpXzBpfva8DqNfw53EnnsjUK6/sVTb1yisLf6tOvbL/YB9PqQr6eFKbplKDKLWzf9TEiVDWx8SePYUv6KzGdTgjbfhykSD5iaTTex5IejNQfE52G5FG2n+kcq0338yMn/6U1jvvzH4W7GjvkXJlf08fD0ccAWPHwhFH0PalLw3ZejipU/SkTjq6e/36isr7ev/UFT6rMXy53l/EKlGkaesU4N8lPZk/Po5syO56IPIJF20YSllPpRpt1I1u3IknDrpPIrWzOHmwAvRqHpJU2WCFu+56RdNQ0TqkNo2lXkeU+v49w5d7reeTD18eyqa1oVQkSKo+u64d/lI+yKrxH2kkq1YQN+pghdSmsUlnn11RebXfv7mtjQPPP9+r7MDOnYWD+GDTWnf3wTDffNllh/UXsSJXtm8BngdeBUzpuUXElvw5G2ZST81H+gWZqQ6HC/KqMfvzYPuoUpvGUvuoRk2YUFF5uT0bN0L5cO2IrLyAak1x88zSpWxasIBnli6taL/BKLIeySeAdcBNZBcp/g3ZOiI2TKV+kNV7PZNGd7gEcT3Xw5n6vt7XI0993/sqqkfrzTf3mqKmkj6qlzZtqqi83I6vfKWi8nK7H3+8ovK+PDJ9OlsuvZTnli9ny6WX8shxxxXedzCKdLZfTDaV/NyImJffBryq3RpXNT7IGnk9l3ob6UHc3dXFs0uW9Cp7dsmSikdN/ec55/DLz36W/zznnIoGe6QO3x77xjdWVF6ue3uf89AesrzcM0uXsn/btl5l+7duremZSZEgeQwo1jhow0K1Psjq9Y12OBjJQZw8aWdi02xq09jkBQuyGRFKSVl5kf0vuKCi8nI77r23ovJqKNLZfj2wVtJjlKxDEhHvOPQu1uhG+hQvh4OROjNC6hlxNQZ7tN58M69+//sHdWV9U0sLbUuXZlPs5CqZYqdnGYLSpQwqWYZg8sUX89zy5X2W14oGmsNH0uPA58lm4j04i11EfKdmtaqy9vb26OjwpS9mjeLgqMGSUWNFz8q6u7pY39raO0jGjeOkLVuGNJhTr2zf+fDDPL9yJZMqCJEejxx3HPtLpqoZPX06s558sp89+iZpTUS0D7hdgSBZHRGnVlyDw4iDxKzxpHwQpwTRcPHM0qXsuPdeJl98MVPf9a5BvUY1g+RvyZq0ltO7aesng6pZHThIzEaekTTXVa0UDZIifSQ9g7dPLykrtB6JmVm9jNQ+pnoYMEi8LomZmfWnyBkJkt5GNuPv2J6yiFhcq0qZmVnjKHJl++eAdwJ/AohsHZLWGtfLzMwaRJELEn8rIv4Q+FVEfBw4A3h9batlZmaNokiQ9AzGflHSrwPdwDG1q5KZmTWSIn0kX5c0Gfg08BOyEVtfrGmtzMysYRQZtfWJ/O6/SPo6MDYinqtttczMrFEU6Wz/fUlH5g//HLhDUrGFAczMbNgr0kfykYjYKeks4GxgCfC52lbLzMwaRZEg2Z//fBtwW0T8G9Dcz/ZmZjaCFAmS7ZI+T3Ytyf2Sjii4H5LmS9ogaZOkq/t4vlXSg5LWSXpI0rS8fJ6kR0pueySdnz93p6SflTw3q/jhmplZtRVdIXEF8LsRsQM4iqyvpF+SRgO3AucBM4CFkmaUbXYjcHdEzAQWk619QkSsiohZETGLbE6vF4GVJfv9ec/zEfFIgWMwM7MaKTJq60XgyyWPnwaeLvDapwGbIuIJAEn3AAuAn5ZsMwP4UH5/FfDVPl7nIuAbeT3MzOwwU6iJapCOBbaWPN6Wl5V6FLgwv38BcKSkKWXbXAKUL7h8Xd4c9pm8qc3MzOqklkFSxFXAHElrgTnAdl7u3EfSMcBJZE1rPa4B3gicStbM9uG+XljSFZI6JHV0FVyr2czMKlfLINkOTC95PC0vOyginoqICyNiNnBtXrajZJOLga9ERHfJPk9H5iXgDrImtFeIiNsioj0i2lu8JoGZWc3UMkhWAydIOl5SM1kTVa8V6SVNldRTh2uA28teYyFlzVr5WQqSBJwPPFaDupuZWUE1C5KI2AdcSdYs1QncGxGPS1os6R35ZnOBDZI2AkcD1/XsL6mN7IzmO2UvvVTSemA9MBX4ZK2OwczMBjbgmu3DgddsNzOrXNE12+vd2W5mZg3OQWJmZkkcJGZmlsRBYmZmSRwkZmaWxEFiZmZJHCRmZpbEQWJmZkkcJGZmlsRBYmZmSRwkZmaWxEFiZmZJHCRmZpbEQWJmZkkcJGZmlsRBYmZmSRwkZmaWxEFiZmZJHCRmZpbEQWJmZkkcJGZmlsRBYmZmSRwkZmaWxEFiZmZJHCRmZpbEQWJmZklqGiSS5kvaIGmTpKv7eL5V0oOS1kl6SNK0vHyepEdKbnsknZ8/d7ykH+Wv+U+Smmt5DGZm1r+aBYmk0cCtwHnADGChpBllm90I3B0RM4HFwPUAEbEqImZFxCzgrcCLwMp8nxuAz0TE64BfAYtqdQxmZjawWp6RnAZsiognImIvcA+woGybGcC38/ur+nge4CLgGxHxoiSRBct9+XN3AedXveZmZlZYLYPkWGBryeNteVmpR4EL8/sXAEdKmlK2zSXAsvz+FGBHROzr5zXNzGwI1buz/SpgjqS1wBxgO7C/50lJxwAnASsqfWFJV0jqkNTR1dVVrfqamVmZWgbJdmB6yeNpedlBEfFURFwYEbOBa/OyHSWbXAx8JSK688fPApMljTnUa5a89m0R0R4R7S0tLelHY2ZmfaplkKwGTshHWTWTNVEtL91A0lRJPXW4Bri97DUW8nKzFhERZH0pF+VFlwFfq0HdzcysoJoFSd6PcSVZs1QncG9EPC5psaR35JvNBTZI2ggcDVzXs7+kNrIzmu+UvfSHgQ9J2kTWZ7KkVsdgZmYDU/Ylf3hrb2+Pjo6OelfDzKyhSFoTEe0DbVfvznYzM2twDhIzM0viIDEzsyQOEjMzS+IgMTOzJA4SMzNL4iAxM7MkDhIzM0viIDEzsyQOEjMzS+IgMTOzJA4SMzNL4iAxM7MkDhIzM0viIDEzsyQOEjMzS+IgMTOzJA4SMzNL4iAxM7MkDhIzM0viIDEzsyQOEjMzS+IgMTOzJIqIeteh5iR1AVvqXY86mQo8U+9K1JGP38fv4x+81ohoGWijEREkI5mkjohor3c96sXH7+P38df++N20ZWZmSRwkZmaWxEEy/N1W7wrUmY9/ZPPxDwH3kZiZWRKfkZiZWRIHyTAiabqkVZJ+KulxSR/My4+S9C1J/5n//LV617VWJI2WtFbS1/PHx0v6kaRNkv5JUnO961hLkiZLuk/Sf0jqlHTGCPv9/6/8b/8xScskjR3OfwOSbpf0S0mPlZT1+ftW5qb832GdpJOrVQ8HyfCyD/iziJgBnA58QNIM4GrgwYg4AXgwfzxcfRDoLHl8A/CZiHgd8CtgUV1qNXQ+C3wzIt4I/CbZv8WI+P1LOhb4n0B7RLwJGA1cwvD+G7gTmF9Wdqjf93nACfntCuAfqlUJB8kwEhFPR8RP8vs7yT5EjgUWAHflm90FnF+fGtaWpGnA24Av5o8FvBW4L99k2B47gKRXAb8NLAGIiL0RsYMR8vvPjQHGSRoDjAeeZhj/DUTEd4H/Kis+1O97AXB3ZH4ITJZ0TDXq4SAZpiS1AbOBHwFHR8TT+VM/B46uU7Vq7e+AvwAO5I+nADsiYl/+eBtZsA5XxwNdwB15894XJU1ghPz+I2I7cCPwJFmAPAesYWT9DcChf9/HAltLtqvav4WDZBiSNBH4F+BPI+L50uciG6Y37IbqSXo78MuIWFPvutTRGOBk4B8iYjbwAmXNWMP19w+Q9wUsIAvUXwcm8MpmnxFlqH7fDpJhRlITWYgsjYgv58W/6DmFzX/+sl71q6EzgXdI2gzcQ9ac8Vmy0/cx+TbTgO31qd6Q2AZsi4gf5Y/vIwuWkfD7Bzgb+FlykeSbAAADm0lEQVREdEVEN/Blsr+LkfQ3AIf+fW8HppdsV7V/CwfJMJL3CSwBOiPib0ueWg5clt+/DPjaUNet1iLimoiYFhFtZB2s346IdwGrgIvyzYblsfeIiJ8DWyW9IS/6HeCnjIDff+5J4HRJ4/P/Cz3HP2L+BnKH+n0vB/4wH711OvBcSRNYEl+QOIxIOgv4HrCel/sJ/jdZP8m9wHFksyBfHBHlHXTDhqS5wFUR8XZJryE7QzkKWAtcGhEv1bN+tSRpFtlgg2bgCeA9ZF8YR8TvX9LHgXeSjWBcC7yXrB9gWP4NSFoGzCWb5fcXwMeAr9LH7zsP11vImvteBN4TER1VqYeDxMzMUrhpy8zMkjhIzMwsiYPEzMySOEjMzCyJg8TMzJI4SMwOI5IeklR4jW1JN+Qzud5dUnappD+tTQ3NXslBYtag8kkaT46ImcBeSSdJGkd27cit9a2djSQOErN+SJog6d8kPZqvcfHOvPyjklbnZbflF3v1nFF8RlJHvh7IqZK+nK8N8cl8m7Z8vZCl+Tb3SRrfx3ufK+kHkn4i6Z/zOdRKHQCa8vceD3QDVwE351OEmA0JB4lZ/+YDT0XEb+ZrXHwzL78lIk7Ny8YBby/ZZ29EtAOfI5ue4gPAm4DLJU3Jt3kD8PcRcSLwPPD+0jeVNBX4S+DsiDgZ6AA+VLpNvlTA/WRXa/fMdvvmiPhqdQ7drBgHiVn/1gPn5H0Rb4mI5/Lyefmqe+vJJoj8jZJ9lpfs+3i+TsxLZFOW9EyatzUiHs7vfwk4q+x9TwdmAA9LeoRszqTW8spFxKciYlZE/BnwCeCjkt4r6V5Jf5l05GYFOUjM+hERG8lm0F0PfDJv0hoL/D1wUUScBHwBGFuyW888TgdK7vc87pmFtnxuovLHAr6Vh8SsiJgREYdc2U/S7HyfDcDvR8TFwGslnVD0WM0Gy0Fi1g9Jvw68GBFfAj5NFio9ofFM3m9x0aH278dxks7I7/8B8P2y538InCnpdXk9Jkh6fT+v9wngI0AT2RKzkAXXK/pezKptzMCbmI1oJwGflnSArDP7f0TEDklfAB4jW4Fu9SBedwPwAUm3k0113mv97IjoknQ5sEzSEXnxXwIby19I0vlAR0Q8lT9+JG9yWxcRjw6ibmYV8ey/ZkMsXwb563lHvVnDc9OWmZkl8RmJmZkl8RmJmZklcZCYmVkSB4mZmSVxkJiZWRIHiZmZJXGQmJlZkv8P0FRCzmBi+I0AAAAASUVORK5CYII=\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZIAAAEKCAYAAAA4t9PUAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3XmcXGWd7/HPr9cknQ2SNmC2DrJGEhJstlFJgopwhRACIrmKROIyMzqCgAtuM4MiwzKDMDIqY4IweqMIiMGrBkRQZIRLh4QECGEiSSABQxPTWTpJr7/7xznVqa70cqqeqq509/f9etUrVU+d59Rzqjrnd571mLsjIiKSq5JiF0BERPo3BRIREQmiQCIiIkEUSEREJIgCiYiIBFEgERGRIAokIiISRIFERESCKJCIiEiQsmIXoC+MHTvWa2pqil0MEZF+ZcWKFW+6e3Vv2w2KQFJTU0NdXV2xiyEi0q+Y2aYk26lpS0REgiiQiIhIEAUSEREJokAiIiJBFEhERCSIAomIiARRIBERkSAKJCIiEkSBREREgiiQiIhIEAUSEREJokAiIiJBFEhERCSIAomIiARRIBERkSAFDSRmdpaZrTOz9Wb2pS7en2xmj5jZajN7zMwmpL13g5k9Fz8+lJY+xcyeivf5UzOrKOQxiIhIzwoWSMysFLgdOBuYCiwws6kZm90M3O3u04FrgevjvB8ATgRmAKcAV5vZyDjPDcAt7n4ksB1YVKhjEBGR3hWyRnIysN7dX3b3ZuAnwHkZ20wFfhc/fzTt/anAH9y91d0bgdXAWWZmwBnAvfF2dwHzCngMIiLSi0IGkvHAq2mvN8dp6Z4F5sfPzwdGmNmYOP0sMxtmZmOBOcBEYAzQ4O6tPexTRET6ULE7268GZpnZSmAWsAVoc/eHgF8B/w0sBf4EtGWzYzP7pJnVmVldfX19nostIiIphQwkW4hqESkT4rQO7v6au89395nAV+K0hvjf69x9hru/DzDgJWAbMNrMyrrbZ9q+73D3Wnevra6uzudxiYhImkIGkqeBo+JRVhXAxcCy9A3MbKyZpcpwDbAkTi+Nm7gws+nAdOAhd3eivpQL4zyXAr8o4DGIiEgvChZI4n6MzwDLgbXAPe7+vJlda2Zz481mA+vM7CVgHHBdnF4OPG5mLwB3AB9J6xf5InClma0n6jNZXKhjEBGR3ll0kT+w1dbWel1dXbGLISLSr5jZCnev7W27Yne2i4hIP6dAIiIiQRRIREQkiAKJiIgEUSAREZEgCiQiIhJEgURERIIokIiISBAFEhERCaJAIiIiQRRIREQkiAKJiIgEUSAREZEgCiQiIhJEgURERIIokIiISBAFEhERCaJAIiIiQRRIREQkiAKJiIgEUSAREZEgCiQiIhJEgURERIIokIiISBAFEhERCaJAIiIiQRRI+kBLfT2NTz9NS319sYsiIpJ3CiQFtm3pUtZMnsxLc+awZvJkti1dWuwiiYjkVUEDiZmdZWbrzGy9mX2pi/cnm9kjZrbazB4zswlp791oZs+b2Vozu83MLE5/LN7nqvjxlkIeQ4iW+no2LVyI791Le2MjvncvmxYuVM1ERAaUggUSMysFbgfOBqYCC8xsasZmNwN3u/t04Frg+jjv3wDvBKYDxwMnAbPS8n3Y3WfEjzcKdQyh9qxciTc3d0rz5mb2rFxZpBKJiORfIWskJwPr3f1ld28GfgKcl7HNVOB38fNH0953YAhQAVQC5cDWApZVRERyVMhAMh54Ne315jgt3bPA/Pj5+cAIMxvj7n8iCiyvx4/l7r42Ld+dcbPW11JNXpnM7JNmVmdmdfVFakoaNnMmlJd3Tiwvj9JFRAaIYne2Xw3MMrOVRE1XW4A2MzsSOA6YQBR8zjCzd8d5Puzu04B3x49Lutqxu9/h7rXuXltdXV3o4+hSeXU1NXfdBUOGYFVVMGQINXfdRXmRyiMiUghlBdz3FmBi2usJcVoHd3+NuEZiZsOBC9y9wcw+ATzp7rvj934NnAY87u5b4ry7zOz/EDWh3V3A4wgyZsECRr73vTRv3EhFTY2CiIgMOIWskTwNHGVmU8ysArgYWJa+gZmNNbNUGa4BlsTPXyGqqZSZWTlRbWVt/HpsnLccOAd4roDHkBfl1dVUnXSSgoiIDEgFCyTu3gp8BlgOrAXucffnzexaM5sbbzYbWGdmLwHjgOvi9HuBPwNriPpRnnX3B4k63peb2WpgFVEN5z8LdQwiItI7c/dil6Hgamtrva6urtjFEBHpV8xshbvX9rZdsTvbRUSkn1MgGQS01peIFJICST8QEgg61vp63/u01peIFIQCyUEuJBC01NezadGiaK2vHTuitb4WLVLNRETyqsdAYmYl8bpXUgShgaB540asoqJTmpWX07xxY1ZlULOYiPSkx0Di7u1ECy9KEYQGgoqamgMXjWxpoaKmJlF+NYuJSBJJmrYeMbMLulvTSgonNBCUV1czefFibOhQSkaOxIYOZfLixYkmRqpZTESSShJIPgX8DGg2s51mtsvMdha4XEJYIEgZs2AB0zZt4ujf/pZpmzYxZsGCRPny0SwmIoNDr2ttufuIviiIdC0fa3WVV1dnnS+0NiQig0evNRKLfMTMvha/nmhmJxe+aJJSjLW6OtWGqqpyqg2JyOCQpGnrP4hW3v3f8evdqAO+XwkZeeXuePyviEhXkgSSU9z908A+AHffTnTnQukHch15lepsZ98+vLER9u1TZ7uIdClJIGmJ77/uAGZWDbQXtFTSSa41ipCRV/nqbNc8FJGBL0kguQ34OfAWM7sO+CNwfUFLdZDZ9cQTbPnHf2TXE0/0+WeHzOVo3riRzAYpd08UDPLR2a55KCKDQ6Jl5M3sWOA9gAGPZNw//aAXsoz8ujPPZPfDD3e8Hn7mmRyzfHm+itajlvp61kyejO/d25FmQ4cybdOmRJ3ee9eu5YWpUw9In/rCCww97rhe829bupRNixZh5eV4SwuTFy9OPHw4tOwiUnxJl5Hvdfivmf2Xu18CvNhF2oC264knOgURgN0PPcSuJ55gxDvfWfDPTzUvdToZx81LSU7G7bt3Y0OHHnAyb9+9O9Hnhww9Di27iPQfSZq23p7+Iu4veUdhinNw2fnQQ1ml51to81J322XTPJXr0ON8zUNRH4vIwa/bQGJm15jZLmB62oz2XcAbwC/6rIRFNPLMM7NKz7fy6mrGLFrUKW3MokWJT+r5mBmfq3x8tvpYRPqHXvtIzOx6d7+mj8pTEEF9JO9/P7vTaiC59JG01Nfn1DyUr36GXD8/NG9IfvWxiBRfPm+1+5XBPLN97MKFUFHR8Ri7cGFW+bctXcrqSZN4cfYcVk+alPWoq3wMwc21eaqYNQINPxbpP5IEktsZpDPbOyblNTd3PLKZlNdSX8+Gj34U9u2DPdGkvg0f/Wji/BU1NZ2uyAF8374+We8qH6v/poLoujnZB1ENPxbpPzSzvQehV8Vv/OkpaG3tnNjaGqUnlNn02FdLlYQee0t9PRsvvbTTzPiNl16aOBCF9g9pGXyRvqOZ7T0IvSpuaGzOKj1T88aNWFnnEdpWVtYnS7mHHvuelSuhpaVzYktLlJ5AS309b37/+53S3vz+9/v07pAikoxmtvcg9Kr4sNNOpq20cyBoKy3jsNOSdTGVDB9+YNPW3r2UDB+eKH+IYq/+GxqItAy+SN/pNZC4+4+BLxAFj9eBee5+T6ELdjBoqa9n2+LFndK2LV6c+Kq4tLqa+y+4kpayCprKh9BSVsH9F1xJacKTcfvu3TB0aOfEIUMSTyhMKcbqv8NmzjywRlBRwbCZM7MuQy6KOfRZZLBJMrN9kbsvpvPM9n9x9y8VtGQHgdDZ2Vt37OWld5zBjVNOYHTDVhpGj8MPHcPWHXsZXVXZa/6KmhoMOq2XZWZZdzhvWrQIKynB29sTL3PSafXfOG3TokWMfO97Ex17eXU1k3/4wwM+O+mJfNjMmVBaCm1t+xNLS7MKRPm4KVjo8GeRwSBJ09YFZvbh1Aszux0YFP+jQptHxo0aSltbO41Vo9gy/mgaq0bR1tbOuFFDe89M+FV1S309mxYujDqcGxujDueFC/ts9d+O2/w++mhWt/nt+LzS0h5fJxFyU7CQUWf5oKHL0l8kCiTAQjNbYGZ3Aa3uvqi3TABmdpaZrTOz9WZ2QA3GzCab2SNmttrMHjOzCWnv3Whmz5vZWjO7zcwsTn+Hma2J99mRXgihJ/LRVZV87tzpVJaVMKyyjMqyEj537vREtZGUXO+5DlE/Q3tGIGxvbk7Uz1BRU0NbU1OntLbm5qz7GHI9kTdv3IhlNOvZkCF91lkeOuoMwlaN1tBl6U+6bdoys0PTXn4ceAB4AvhnMzvU3f/a047jkV63A+8DNgNPm9kyd38hbbObgbvd/S4zO4OoH+YSM/sb4J3A9Hi7PwKzgMeA7wKfAJ4CfgWcBfw62eFmL7R5ZM7x45k5ZSxbd+xl3KihWQWRlFzuuQ6we1/Xo8N272tmVC95G4eN5L7zPsvc+75Ne2kZJW2tLDvvsxw5bCSjsy5J9ordWd5TZ/+oBEvkpK8a/Zdrr81qRYT0ocupZtVsmhVF+lpPfSQryGieBz4QPxw4opd9nwysd/eXAczsJ8B5QHogmQpcGT9/lChYEe9/CNF8FQPKga1mdjgw0t2fjPd5NzCPAgYSyP1EnjK6qjKnABJq15RjaSsppax9fz9DW0kpu6Yc22verTv28uKJZ7CuJrf+nVCp2mDmMvZ9vcRLLkJXje6u1pXtysnq35G+0m3TlrtPAY4ELnH3I9x9StqjtyACMB54Ne315jgt3bPA/Pj5+cAIMxvj7n8iCiyvx4/l8T1Qxsf76WmfB51itXUfdsREHvjgVTSXltNUXklzaTkPfPAqDjtiYq95Q/t3UkKOfcyCBRy3YgUTb7uN41asyLqPpaOP4/TTs+7jCBl1FrpqdD6GfRe7f0cGlx77SNy9HfhOAT//amCWma0karraArSZ2ZHAccAEokBxhpm9O5sdm9knzazOzOrqi9hZmY+27obGJta91kBDY1PvG6cZXVXJmV/9LLd+8W5+9KkbufWLd3PmVz+bqEaRj/6d0GPftnQpa9/xDl69/HLWvuMdWeVvqa9n4yWXRH0c+/ZFfRyXXJLVzPqq2bM7pVXNnp3oyj501ejQYd/56N8JFXrxpIEG/UuS1X9vBv4E3O9ZTCYws9OAf3L398evrwFw9y4nM5rZcOBFd59gZp8Hhrj7N+L3vk60RMt/AY+6+7Fx+gJgtrt/qqeyhKz+GyIfK9j+7rktfPvB1ZSWltDW1s7nzp3OnOOzq4Q1NDbl3EeTa97QYw/N/9d77mHDhz50QPqUn/6UQy+6qNf8oXeXDFk1uqW+ntXjx3fuoykvZ/qWLYmOfcdDD7H+/e8/IP3I5csT9e+klyOXprGOIecVFXhzc1Z31sxHfsmffK7++yngZ0BT2n1JdibI9zRwlJlNMbMK4GJgWUYhx5pZqgzXAEvi568Q1VTKzKycqLay1t1fB3aa2anxaK2PchDfGyV0CG1DYxPffnA1Ta3t7Glqpam1nVseXJ1TzeSYt47OqW8j17zNGzfSnjGrv700+fIuwWt9bd2aVXqmnb/9bVbpmY5Zvpyj//hHDvv61zn6j3/M+tYDmYMRCzg4sUsdtck5c7KqTYaucaY10vqnJDPbR7h7ibtXuPvI+PXIBPlagc8Ay4G1wD3u/ryZXWtmc+PNZgPrzOwlYBxwXZx+L/BnYA1RP8qz7v5g/N7fAz8A1sfbFLSjPUToyKOtO/ZSWtr5JyotLWHrjr3d5Dh47Ks+nNamzsfe2tTMvurDE+UPHX487MQTs0rPVD5uXFbpXRly9NGMPucchhx9dOI8ED70OXRVgdD5R5nNFu7eZxcQUhxJaiSY2SFmdrKZnZ56JMnn7r9y96Pd/W3ufl2c9nV3XxY/v9fdj4q3+bi7N8Xpbe7+KXc/zt2nuvuVafusc/fj431+Jpvmtr4WOg8l1eGdLpcO7xC59s+8WTGcBy+8guayCvZVDqO5rIIHL7yCNyuSdRinhh+n57/vvM/SOKzXaxgASioqoLy8c2J5eZSewIg5c6Ak479HSUmUnkCuV/QQfvuA8upqqmbN6pSWtH8HoqHPB1wAJZx/VDJ8OGSUnX37Eg8UKPawb8lNkiVSPg5cTtTxvQo4lajP5IzCFm1gCJmHkurwviWjj6SvhhKH9M+MGzWUNdNmsXbS9I7hw62jDkkcBEOHH1fU1GBlZXhaP4OVlWV1Mq750Y/Y+LGPgRm4U3PnnYn7dzYtXIg3N+9fXmbhwqzmgYTcPmDv2rVdDj/eu3Ztov6dtoaGrNLTNb/6arfpST67Pw/7Hsx6DSREQeQk4El3n2NmxwLfKmyxBpaQeSj5mNCYS4d5ev8MrVGt6JYHVzNzytisRn3d8uBqth86JusgmKqNNVWNorEqmj5ZmcPyMpsWLcLLyrDW1qxPSLleBPR0RZ+ks7t540ZKhg2jfceOjrSSoUMTzyPpqX8nycm8dHTXU067S8+30EnA+eisVyDKTpJAss/d95kZZlbp7i+a2TEFL5l0CJnQmGutoqN/pnV/01qqfyZpWUKCYD5qY89OO507PreEQ3e+wV9HvoVPTXs3yRqm9gudjJqL0Oad0P6dioldzzPqLj1dSVVVVund2dnYxNb6XYyrbmJMFl9/PlYF2LZ0KRsvuwwrLcXb2qhZskSjxnqRpI9ks5mNJpp1/rCZ/QLYVNhiST6EjPo6GPpn5hw/nu98/F383fun8p2PvyurYc+pY98+ZAR/fsvb2D5kRE4j3nIxbObMLvtnknZ2l1dX0/hPN9FSXsm+ymG0lFfS+E83JT4RhvbvtO/e3WX5k8xjaVq/Pqv0rjx+4+38z1FHsPO8s/mfo47g8RuT39k7tLM/X3NwBts8mF5rJO5+fvz0n8zsUWAU8JuClkryIqRWkY8awe+e28IdSx/fXyNY8O6sgkFIH00+alS5Kq+upuauu9h42WXRCb29nZolSxIHgobGJm5qO4KyK5fs719qO4RpjU2Jyl5eXc2e62+j/Mufw4nWGGr51i2JP79k+PAu1xlL0mFedfLJHZ+Z4nF6Ets2bqbyms9S2r7/dyu55rNsu+g8xtRM6CFnWtkDOvtD11iD8Ka1/tislqRpCzM7BJgI7IofxwPPFLBckgfjRg2lubWtU1pza1viWsWc48czfbiz9YWXGDf1aMbUZFcjePibt3H5fd+mrbSM0rZWlv35CmYuvjbRyTC0jyZfNapcJ2Q+O+10vnflEg7Z/gbbD3kLfzft9MTNaj0N+0763d3UdgRlV/8wp0DUvns3XlmJpQ2/9srKZDWSSUfw1KnncsqTD3akPXXquRw56QiSfPOvL3+YkvbOv1tJezuvL3+YMZ/6WK/5965Z0216oQcaQHjTWn+djJlk1NY3gIXAy+y/V7ujUVt9JmRmurd7j697kvqj9rJyNrW2QBZ/1H95+VXm3v9tKlqboTVq7597/7f5y1c+wehpR/aaf+uOvbRnjFRqd098Ms1XjeqWZc923JjryrknJKoRNTQ2ceMDq/AhI9lxeDRc+YYHVmUVBI9e8Qjz7r+1Iwg/MP8Kxn16dqJyp767xrSBCuVZfHf7qg+ntT1aKTWltT1K762nY+uOvfz63L/lTyf9LyZsXsfmCcfQcNgk3pvws4fvamBfN+lJtGzd2mWNKOlE1NCBBs0bN9K+b1+nz2/fty/RQIn+vOpzkhrJRcDb3L3rNcmloEKbdyorytjT1NqRVllRluiE0lJfz4bLLsP27QP24sCGyy5LfofE1zfTVlIG7P+zaS8po/z1zZAgkAwtL6WlrXMgaWlzhpYnv7lVSGd/Q2MTNz+wijan4y6NNyUMBs9ufJPM0bruUfqst/f+27XV1zPv57d2CsLzfv5t2m74B6jqvXkn9LtLzQE652e3dNxC4JcXXsFHK4YzJuFn11dPpL467pzP4rPHfeAs3vz8lV2mJ2F/0/UUt+7SM5VUVXUZiJIOFti+aTNd/fjbN22m6qSTeswbEoSKLUln+3PQJ7egkAyhS6SENO9sW/sSTd75z6PJS9i29qVEn91y+IROy9cDlLa30XJ47ydCgL0tbVSUdf78irIS9ra0dZOja7ku8bL+LzvJOBfT5lF6b7Y3dn3N1V16pq0vvER7ScbyMiVlbH0h2Xcf+t2NGzWUVcefzk1X3cnihd/kpqvuZNXxpyf6u9nwxq6s0jMNPe44RrzvfcD+e1iMOPPMRM1SANsPn8yTp56Lx/kdePLUc9l++ORk+Z9bm1V6pm2/fSSr9E6f0UMQysbvv3UrD9aezu+/dWtW+UIkCSTXAyvNbLmZLUs9Cl0wCV8iJdW8U1FqVJaXUlFqiZt3Gg4ZR2kXgaDhkGRDSA87YiK/mH95p5npv5h/eaIl7CE6mWWuLmVxet/orgmw96bBE6eMzSo907ApUyhpa+2UVtLWyrApUxLlz8d319bmGbcQSNYkur2bi5zu0jO11Nez6/HHgf21gl1/+EPi0U9Dy0vZNOEYWkvLaSmtoLW0nE0Tjk1cI9rw1qOySs9UeUzXMyO6S0/3yi+7Xo+tu/Su/G50NVVfuYLDVzxO1Veu4JHRfVOTSRJI7gJuAP4F+Ne0hxRYPjqMHcCM+D7FifOFBoKOJey/cBc//sT13PqFuxIvYd+Rf0bn2sv7Z0zos1n9bxnZ9XfcXXq6SdUjmFs7qVPa3NpJTKoekeizm0YdwrILOi8vs+yCK2gadUii/KOrKnn7xM7bvn3iIYm/u/V/2XngEFqS1caOOqzrJWy6S88UWhPe8OIG5j9wG+VtLVS0NVPe1sL8B25lw4sbEuXfPm4i/33yOZ1qNP998jlsH5fs735r7ewuv7uttbN7zfuXQ7tu9uwuPdPvv3UrI3e8iUHHY9SON/ukZpKkj2SPu99W8JLIAVIn0wfrXulIy+Zkmmoaa04bApt05NPoqkrGX/oRbkpbouQ9s6ZldSKfc/x4Zn75/Jz7KB5a1blKv3zVZj58+tFZ7eeV+l28+FoDx751dOITOcAbO7vq8o3Sk+zn02dP49zampw+e9yooTx/wuxOy8Nks7zMK/W7eGbDtk5pz2zYxiv1uxKVo3FfS1bp6crKSikrMVrTBnWUlRhlZclqBOtsJBVd1MbW2UgOS5C/8c8vU1pa1tG3BNGq041/fhnePb2HnJHDRw/ju+f8Lf/vlA90DBaor57ItaOHJSr/awzhmQs/zwU/v6Uj7b7zP8eJDOk1b+ucM/Fbvn5A/0zrnGTDjnfef1+XgyF23n8ffPnyRPvIVZJA8riZXU+0BHxH/dTdNfy3wEJPpiFzKVKfnb5ESS4n8lxn5W/dsbfrK7ss5oF859drOgXhubWT+PTZ0xLlDTmZhtp/AdHe8d3PzeIC4pkNb3abniyg5d6sN27UUEozAklpiSUOgtsrh/PEvMuZ/8CtHR3998+7nHdWJpsHMvWUE3i9i0A09ZQTEuXfsTf6fTsNFkhL781bDx3GD6fP4s9vm9FxEdBYNYpzDu09EJ166lT+/cLPM/++f0st78b9F1zJP5x64H1xujJy/gWw4vGu0wssSSBJTcc9NS1Nw3/7QOikunGjhtLU3Pk/VVNza6L/1Pma0Jfr0OWh5aWdalIAza3tidu6X6nf1SmIACyre4Vza2sSnUyrhpRnlZ4pJIiFXkAc0s023aVnqhrS9QrJ3aWnCx12feKUsXy3ixPxPyTsX6o5bgpPXvkNJv/b1zoC0aYrv8FpxyXrXzr2rV2PK+ouPdMJNWMxo9PQa7MovTeTqkdQc9kl3JR27O+ZNS1xbXbWly/nkRu/yagd+y8kdoway3sKXBuBZDPbs12eSPIkdEIhgJUY6cOPrCRZP0k++mdChi7vbWmjvNQ6DWMtL7XEI4+e2dB15+wzG+oT/cc88rCRlHb+6ii1KL03oUEsNIifUDOGErNO83BKzDihprfBu5GQY4ewYdeTqkdw4pQxPLOBjhPxiVPGZNU0ePENX+C5i8/jhafWMPWUaVw8M/nSgKn+rWUZFwFJP390VSVfnDeDf132LIbhOFfNPSHxdxDSJArwnoZ6fv+tW9l5/32MnH9BnwQRSDizXYonZELh1h17qSgrpTWtql9RVtonS6TkY2Z6iRnpzSkllryJ5JCqrtuku0vPNLqqks/Pm8G/ZUxITFL2F1/revLci681JDoxhAbx0VWVfGHeCfzrstUdTSRXzU3+24Uce/o+cl2p+vlXt3dKe/7V7TQknJUP8QXMb16mtHQEbb95mc+VD89qaZ7Qk3noit2Tqkdk/ZnpZn358oL3iWRSIDmIhUwohPATUsh/iNCr6tBA1t3Vd9Krcsj9+EObR1LHnn4iz3ZWfujJLB+3L8hF6N9N6AVMSujJPGTF7v5IgeQglo8r09BlQnL9D5GPprHQk1lZqdGa1j5TVpr9fc9zOf7Q5hHoPGzbc7xfe+jJrBgnw9C/m2Iu1jmYJVlraxhwFTDJ3T9hZkcBx7j7LwteuoNErkNIQ3VcmT64uqPNu6+vTHOVr7s7hoz6yrVZLx9CmkdChm33d6F/NwfD7Q8GoyQ1kjuBFcBp8estwM+AQRFIQkbfpAQtugjgHl2V5nh7+mJVs4sVxODgOKHk2jwy2K+qi31DNMlekkDyNnf/kJktAHD3PWY51rX7mdDRNxA2cqnjyrTNOxYO7G9XpsUKYv35hHIwBMFiC/m7KeYFzGCVJJA0m9lQOppt7W2kTUwcyEJH34R2/A32K9NQ/fWE0p+D4MFisHV2F1uSQPKPRHdEnGhmPwbeSXR/kgEvdPRN6D01dGUarr+eUPprEJTBqddFG939YWA+UfBYCtS6+2OFLdbBITU5Kl02k6NC7wuRujKtLCthWGUZlWUl/e7KtKGxiXWvNfTJvdIHmlyXwBfpa93WSMzsxIyk1+N/J5nZpMGw1lbo5KjUfSHSR99ke0+N/nxlGtI/JCL9R09NWz0tFT8o1toKXTgwX/fU6I/NM/maGCYiB79uA4nW2ApfOHAgdJrmOnRZAwVEBo8kExKHAH9MwPGEAAAPUUlEQVQPvIvogvxx4Hvu3vUNGzrnPQu4FSgFfuDu/5Lx/mRgCVAN/BX4iLtvNrM5wC1pmx4LXOzuD5jZD4FZwI74vYXuvqq3suQidOFAGLxNUxooIDJ4JLlD4t3A24F/B74TP/+v3jKZWSlwO3A2MBVYYGaZC+vfDNzt7tOBa4lu64u7P+ruM9x9BlET2h7gobR8n0+9X6ggAukLB+6XzcKBKf2x0zT0fvEDYaCAiCSTZPjv8e6eHgAeNbMXEuQ7GVjv7i8DmNlPgPOA9LxTgStT+wUe6GI/FwK/dvc9CT4zr/KxREl/lY+mqf5cGxOR5JLUSJ4xs46bWpnZKUBdgnzjgVfTXm+O09I9SzS0GOB8YISZZS7PejHRsON015nZajO7xcwKenbqWKIk/newyFfTVH+sjYlIdpIEkncA/21mG81sI/An4CQzW2NmqwM//2pglpmtJOr32AJ0dECY2eHANGB5Wp5riPpMTgIOBb7Y1Y7N7JNmVmdmdfX1Xd/kqDfpS5Q0tbTR3OZZNe+k76e/zaVQ05SIJJWkaeusHPe9BZiY9npCnNbB3V8jrpGY2XDgAndPX5fkIuDn7t6Slic1n6XJzO4kCkYHcPc7gDsAamtrc6pK5KN5pz/PpVDTlIgkkWRm+yZgJzAKGJN6uPum+L3uPA0cZWZTzKyCqIlqWfoGZjbWzFJluIZoBFe6BWQ0a8W1FOKFI+cBz/V2DLkKbd4J7bA+GKhpSkR6k2T47zeIlkf5M/vve9rrhER3bzWzzxA1S5UCS9z9eTO7Fqhz92XAbOB6M3PgD8Cn0z63hqhG8/uMXf/YzKqJ5vatAv62t2PIVeg8EM2lEJHBIEnT1kVES8k3Z7tzd/8V8KuMtK+nPb8XuLebvBs5sHMed+/TGfUhzTuaSyEig0GSzvbngGTL3Q5QuTbvqMNaRAaDJDWS64GVZvYcafchcfe5BSvVAKIOaxEZ6JIEkruAG4A1QHsv20oX+uOiiyIiSSUJJHvc/baCl0RERPqlJIHkcTO7nmjobnrT1oC/H4mIiPQuSSCZGf97alraoLgfiYiI9K7XQKL7koiISE+S1Egwsw8QLR8/JJXm7tcWqlAiItJ/9DqPxMy+B3wI+Aei2eQfBCYXuFwiItJPJJmQ+Dfu/lFgu7v/M3AacHRhiyUiIv1FkkCyN/53j5m9FWgBDi9ckUREpD9J0kfySzMbDdwEPEM0YusHBS2ViIj0G0lGbX0jfnqfmf0SGOLuOwpbLBER6S+SdLZ/0MxGxC8/D9xpZjN7yiMiIoNHkj6Sr7n7LjN7F/BeYDHwvcIWS0RE+oskgSR1D/UPAHe4+/8FKgpXJBER6U+SBJItZvZ9orkkvzKzyoT5RERkEEgSEC4iul3u+929ATiUqK9EREQk0aitPcD9aa9fB14vZKFERKT/UBOViIgEUSAREZEgCiQiIhJEgURERIIokIiISBAFEhERCaJAIiIiQRRIREQkiAKJiIgEKWggMbOzzGydma03sy918f5kM3vEzFab2WNmNiFOn2Nmq9Ie+8xsXvzeFDN7Kt7nT81MC0iKiBRRwQKJmZUCtwNnA1OBBWY2NWOzm4G73X06cC1wPYC7P+ruM9x9BnAGsAd4KM5zA3CLux8JbAcWFeoYRESkd4WskZwMrHf3l929GfgJcF7GNlOB38XPH+3ifYALgV+7+x4zM6LAcm/83l3AvLyXXEREEitkIBkPvJr2enOclu5ZYH78/HxghJmNydjmYmBp/HwM0ODurT3sU0RE+lCxO9uvBmaZ2UpgFrCF/TfSwswOB6YRLWOfFTP7pJnVmVldfX19vsorIiIZChlItgAT015PiNM6uPtr7j7f3WcCX4nTGtI2uQj4ubu3xK+3AaPNLLX8/QH7TNv3He5e6+611dXV4UcjIiJdKmQgeRo4Kh5lVUHURLUsfQMzG2tmqTJcAyzJ2McC9jdr4e5O1JdyYZx0KfCLApRdREQSKlggifsxPkPULLUWuMfdnzeza81sbrzZbGCdmb0EjAOuS+U3sxqiGs3vM3b9ReBKM1tP1GeyuFDHICIivbPoIn9gq62t9bq6umIXQ0SkXzGzFe5e29t2xe5sFxGRfk6BREREgiiQiIhIEAUSEREJokAiIiJBFEhERCSIAomIiARRIBERkSAKJCIiEkSBREREgiiQiIhIEAUSEREJokAiIiJBFEhERCSIAomIiARRIBERkSAKJCIiEkSBREREgiiQiIhIEAUSEREJokAiIiJBFEhERCSIAomIiARRIBERkSAKJCIiEkSBREREgiiQiIhIkIIGEjM7y8zWmdl6M/tSF+9PNrNHzGy1mT1mZhPS3ptkZg+Z2Voze8HMauL0H5rZBjNbFT9mFPIYRESkZwULJGZWCtwOnA1MBRaY2dSMzW4G7nb36cC1wPVp790N3OTuxwEnA2+kvfd5d58RP1YV6hhERKR3hayRnAysd/eX3b0Z+AlwXsY2U4Hfxc8fTb0fB5wyd38YwN13u/ueApZVRERyVMhAMh54Ne315jgt3bPA/Pj5+cAIMxsDHA00mNn9ZrbSzG6Kazgp18XNYbeYWWWhDkBERHpX7M72q4FZZrYSmAVsAdqAMuDd8fsnAUcAC+M81wDHxumHAl/sasdm9kkzqzOzuvr6+kIeg4jIoFbIQLIFmJj2ekKc1sHdX3P3+e4+E/hKnNZAVHtZFTeLtQIPACfG77/ukSbgTqImtAO4+x3uXuvutdXV1fk+NhERiRUykDwNHGVmU8ysArgYWJa+gZmNNbNUGa4BlqTlHW1mqQhwBvBCnOfw+F8D5gHPFfAYRESkFwULJHFN4jPAcmAtcI+7P29m15rZ3Hiz2cA6M3sJGAdcF+dtI2rWesTM1gAG/Gec58dx2hpgLPDNQh2DiIj0zty92GUouNraWq+rqyt2MURE+hUzW+Hutb1tV+zOdhER6ecUSEREJIgCiYiIBFEgERGRIAokIiISRIFERESCKJCIiEgQBRIREQmiQCIiIkEUSEREJMigWCLFzOqBRuDNYpeliMYyeI9fxz446djDTXb3XpdPHxSBBMDM6pKsGTNQDebj17Hr2Aebvj52NW2JiEgQBRIREQkymALJHcUuQJEN5uPXsQ9OOvY+Mmj6SEREpDAGU41EREQKYFAEEjM7y8zWmdl6M/tSsctTSGY20cweNbMXzOx5M7s8Tj/UzB42s/+J/z2k2GUtFDMrNbOVZvbL+PUUM3sq/v1/amYVxS5jIZjZaDO718xeNLO1ZnbaYPndzexz8d/7c2a21MyGDOTf3cyWmNkbZvZcWlqXv7VFbou/h9VmdmK+yzPgA4mZlQK3A2cDU4EFZja1uKUqqFbgKnefCpwKfDo+3i8Bj7j7UcAj8euB6nJgbdrrG4Bb3P1IYDuwqCilKrxbgd+4+7HACUTfwYD/3c1sPPBZoNbdjwdKgYsZ2L/7D4GzMtK6+63PBo6KH58Evpvvwgz4QAKcDKx395fdvRn4CXBekctUMO7+urs/Ez/fRXQyGU90zHfFm90FzCtOCQvLzCYAHwB+EL824Azg3niTAXnsZjYKOB1YDODuze7ewCD53YEyYKiZlQHDgNcZwL+7u/8B+GtGcne/9XnA3R55EhhtZofnszyDIZCMB15Ne705ThvwzKwGmAk8BYxz99fjt/4CjCtSsQrt28AXgPb49Rigwd1b49cD9fefAtQDd8bNej8wsyoGwe/u7luAm4FXiALIDmAFg+N3T9fdb13wc+BgCCSDkpkNB+4DrnD3nenveTRUb8AN1zOzc4A33H1FsctSBGXAicB33X0m0ZJAnZqxBvDvfgjRVfcU4K1AFQc2+wwqff1bD4ZAsgWYmPZ6Qpw2YJlZOVEQ+bG73x8nb01VZ+N/3yhW+QroncBcM9tI1IR5BlG/wei4yQMG7u+/Gdjs7k/Fr+8lCiyD4Xd/L7DB3evdvQW4n+hvYTD87um6+60Lfg4cDIHkaeCoeARHBVEn3LIil6lg4j6BxcBad/+3tLeWAZfGzy8FftHXZSs0d7/G3Se4ew3R7/w7d/8w8ChwYbzZQD32vwCvmtkxcdJ7gBcYBL87UZPWqWY2LP77Tx37gP/dM3T3Wy8DPhqP3joV2JHWBJYXg2JCopn9L6K281JgibtfV+QiFYyZvQt4HFjD/n6CLxP1k9wDTAI2ARe5e2Zn3YBhZrOBq939HDM7gqiGciiwEviIuzcVs3yFYGYziAYZVAAvAx8julgc8L+7mf0z8CGiUYsrgY8T9QMMyN/dzJYCs4lW+d0K/CPwAF381nFw/Q5Rc98e4GPuXpfX8gyGQCIiIoUzGJq2RESkgBRIREQkiAKJiIgEUSAREZEgCiQiIhJEgUTkIGJmj5lZ4nttm9kN8Yqud6elfcTMrihMCUUOpEAi0k/FCzWe6O7TgWYzm2ZmQ4nmj9xe3NLJYKJAItIDM6sys/9rZs/G97r4UJz+dTN7Ok67I570lapR3GJmdfE9QU4ys/vje0R8M96mJr5nyI/jbe41s2FdfPaZZvYnM3vGzH4Wr5+Wrh0ojz97GNACXA38e7xUiEifUCAR6dlZwGvufkJ8r4vfxOnfcfeT4rShwDlpeZrdvRb4HtEyFZ8GjgcWmtmYeJtjgP9w9+OAncDfp3+omY0Fvgq8191PBOqAK9O3iW8T8CuiWdupVW9PcfcH8nPoIskokIj0bA3wvrgv4t3uviNOnxPffW8N0eKQb0/Lsywt7/PxPWKaiJYtSS2e96q7PxE//xHwrozPPZXoRmxPmNkqorWTJmcWzt1vdPcZ7n4V8A3g62b2cTO7x8y+GnTkIgkpkIj0wN1fIlpFdw3wzbhJawjwH8CF7j4N+E9gSFq21HpO7WnPU69Tq9Fmrk2U+dqAh+MgMcPdp7p7t3f4M7OZcZ51wAfd/SLgbWZ2VNJjFcmVAolID8zsrcAed/8RcBNRUEkFjTfjfosLu8vfg0lmdlr8/H8Df8x4/0ngnWZ2ZFyOKjM7uof9fQP4GlBOtDgpRIHrgL4XkXwr630TkUFtGnCTmbUTdWb/nbs3mNl/As8R3Ynu6Rz2uw74tJktIVryvNN9tN293swWAkvNrDJO/irwUuaOzGweUOfur8WvV8VNbqvd/dkcyiaSFa3+K9LH4lsg/zLuqBfp99S0JSIiQVQjERGRIKqRiIhIEAUSEREJokAiIiJBFEhERCSIAomIiARRIBERkSD/H5zcetcouK8LAAAAAElFTkSuQmCC\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "p.plot(kind='scatter',x='sample %',y='marketer', c = '#4682B4')\n", - "p.plot(kind='scatter',x='sample %',y = 'sample marketer', c='#CC0000')\n", - "ax = p.plot(kind='scatter',x='sample %',y='marketer', c = '#4682B4')\n", - "p.plot(kind='scatter',x='sample %',y = 'sample marketer', c='#CC0000',ax=ax)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "\"\"\"\n", - " This experiment consists in :\n", - " 1: randomly selecting x % of the records to be sampled\n", - " 2: running a group by on the sample\n", - " 3: calling groupby on the population which th\n", - "\"\"\"\n", - "SQL_ORIGINAL=\"SELECT * FROM deid_risk.risk_60k2\"\n", - "SQL_DEID = \"SELECT * FROM deid_risk.deid_risk_60k limit 20000\"\n", - "# df = pd.read_gbq(SQL_DEID,private_key='/home/steve/dev/google-cloud-sdk/accounts/curation-test.json')\n", - "\n", - "#\n", - "FLAG='REGISTERED-TIER-9'\n", - "if FLAG == 'REGISTERED-TIER' :\n", - " Yi = pd.DataFrame(dfr)\n", - " FOLDER='registered'\n", - "else:\n", - " Yi = pd.DataFrame(dfc)\n", - " FOLDER='controlled'\n", - "N = 20\n", - "N_ = str(N)\n", - "SUFFIX = FOLDER+'-tier-'+str(N)+'-experiment.xlsx'\n", - "PATH = os.sep.join(['out',SUFFIX])\n", - "\n", - "\n", - "columns = list(set(Yi.columns.tolist()) - set(['person_id']))\n", - "merged_columns = list(columns)+['field_count']\n", - "m = {}\n", - "p = pd.DataFrame()\n", - "n = 0\n", - "y_i= pd.DataFrame(Yi).deid.risk(id='person_id',quasi_id=columns)\n", - "for index in np.arange(5,105,5):\n", - "# np.random.seed( int(time())+np.random.randint(0,100)+index ) \n", - "# n = np.random.randint(10,35) #-- randomly pick a number within an interval\n", - " \n", - " for n in np.repeat(index,20) :\n", - "# np.random.seed( np.random.randint(0,int(time())+np.random.randint(0,1000)+index+n ) \n", - " #\n", - " # we will randomly sample n% rows from the dataset\n", - " i = np.random.choice(Yi.shape[0],((Yi.shape[0] * n)/100),replace=False)\n", - " x_i= pd.DataFrame(Yi).loc[i].deid.risk(id='person_id',quasi_id = columns)\n", - " \n", - "# y_i= pd.DataFrame(Yi).deid.risk(id='person_id',quasi_id=columns)\n", - "\n", - "\n", - " r = pd.merge(x_i,y_i,on=merged_columns,how='inner')\n", - " if r.shape[0] == 0 :\n", - " print 'skipping ',n\n", - " continue\n", - "\n", - " r['marketer'] = r.apply(lambda row: (row.group_size_x / row.group_size_y) / row.patient_count_x,axis=1 )\n", - " r = r.groupby(columns+['marketer_x'],as_index=False).sum()[columns+['marketer','marketer_x']]\n", - " r['sample %'] = np.repeat(n,r.shape[0])\n", - " r['tier'] = np.repeat(FOLDER,r.shape[0])\n", - " p = p.append(r)\n", - "\n", - "writer = pd.ExcelWriter(PATH,engine='xlsxwriter')\n", - "p = p.rename(columns={'marketer_x':'sample marketer'})\n", - "p.index = np.arange(p.shape[0]).astype(np.int64)\n", - "p.to_excel(writer,FOLDER)\n", - "writer.save()\n", - "p.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "ax = p.plot(kind='scatter',x='sample %',y='marketer',c='r',ylim=[p.marketer.min(),p.marketer.max()])\n", - "p.plot(kind='scatter',x='sample %',y='sample marketer',c='#4682B4')\n", - "ax = p.plot(kind='scatter',x='sample %',y='marketer',c='r')\n", - "p.plot(kind='scatter',x='sample %',y='sample marketer',c='#4682B4',ax=ax)\n", - "\n", - "_p = pd.DataFrame(p)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "p.head()\n", - "\n", - "# writer = pd.ExcelWriter('out/foo.xlsx',engine='xlsxwriter')\n", - "# workbook = writer.book\n", - "# r.groupby('field_count',as_index=False).sum()[['field_count','marketer_x']].to_excel(writer,'page-0')\n", - "# chart = workbook.add_chart({'type':'line'})\n", - "# o = r.groupby('field_count',as_index=False).sum()[['field_count','marketer_x']]\n", - "# # values = o.marketer_x.tolist()\n", - "# # values = [['page-0',item] for item in values]\n", - "# # chart.add_series({\"values\":values})\n", - "# # chart.add_series({'values':'=page-0!$B$2:$B$5'})\n", - "\n", - "# worksheet = writer.sheets['page-0']\n", - "# worksheet.insert_chart('G2',chart)\n", - "# writer.save()\n", - "\n", - "str(10)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "help(chart.add_series)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cols = list(set(dfr.columns.tolist()) - set(['person_id'])) + ['field_count']\n", - "r = pd.merge(x_i,y_i,on=cols,how='inner')\n", - "r['marketer'] = r.apply(lambda row: (row.group_count_x/row.group_count_y)/row.patient_count_y ,axis=1)\n", - "# r['field_count'] = r['field_count_x']\n", - "o = r.groupby(cols,as_index=False).sum()[cols+['marketer']]\n", - "o.groupby(['field_count'],as_index=False).mean()\n", - "# o.groupby('field_count',as_index=False).mean().plot.line(x='field_count',y='marketer')\n", - "# r.head()\n", - "# N = r.patient_count_y.mean()\n", - "# r['marketer'] = r.apply(lambda row: row.group_count_x / row.group_count_y,axis=1)\n", - "# m = r.groupby(['field_count'],as_index=False).mean()[['field_count','marketer']]\n", - "# m.marketer = m.marketer / N\n", - "# m.groupby(['field_count']).mean().plot.line()\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "p.to_csv('out/x-2/single-runs-deid.csv',index=False)\n", - "p.groupby(['sample %']).mean()['marketer'].plot.line()\n", - "p.groupby(['sample %'],as_index=False).mean().plot.scatter(x='sample %',y='marketer')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "y = pd.DataFrame({\"name\":['d','e','f','g'],\"age\":[12,40,20,30],\"income\":[100,200,300,400]})\n", - "x = pd.DataFrame({\"name\":['a','b','c'],\"age\":[10,20,40],\"income\":[120,100,200]})\n", - "\n", - "# x.join(y,how='outer',on='age')\n", - "x_ = pd.merge(x,y,on=['age','income'],how='outer')\n", - "Logger.log(action='merge',value=x_.shape)\n", - "Logger.cache" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#\n", - "# EXP_0\n", - "# Running the experiment on the Original dataset, with all the attributes\n", - "SCHEMA = \"deid_risk\"\n", - "df = pd.read_gbq(\"select person_id,birth_datetime,race,gender,sex_at_birth, city,state,zip from deid_risk.basic_risk60k \",private_key='/home/steve/dev/google-cloud-sdk/accounts/curation-test.json',\n", - " dialect='standard')\n", - "\n", - "RUNS = 500\n", - "FLAG = 'basic-features'\n", - "r = df.deid.risk(id='person_id',num_runs=RUNS) #,field_count=11)\n", - "# r.to_csv('out/pandas-60k-'+FLAG+'-patients-'+str(RUNS)+'-x-runs.csv')\n", - "compiled = r.groupby('field_count',as_index=False)['marketer','prosecutor'].mean()\n", - "fi = compiled[['marketer','prosecutor']].plot.line().get_figure()\n", - "# fo\n", - "# r.plot.line(x='field_count',y='marketer')\n", - "compiled = r.groupby('field_count',as_index=False)['field_count','marketer','prosecutor'].mean()\n", - "fig_i = r.plot.scatter(x='field_count',y='marketer').get_figure()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#\n", - "# EXP_2 :\n", - "# This experiment will run the marketer risk against individual attributes\n", - "deid_df = pd.read_gbq(\"select person_id,birth_datetime,race,gender,sex_at_birth, city,state,zip from deid_risk.basic_deid_risk60k\",private_key='/home/steve/dev/google-cloud-sdk/accounts/curation-test.json',\n", - " dialect='standard')\n", - "RUNS = 500\n", - "FLAG = 'basic-deid-features'\n", - "deid_r = deid_df.deid.risk(id='person_id',num_runs=RUNS) #,field_count=11)\n", - "# r.to_csv('out/pandas-60k-'+FLAG+'-patients-'+str(RUNS)+'-x-runs.csv')\n", - "deid_compiled = deid_r.groupby('field_count',as_index=False)['marketer','prosecutor'].mean()\n", - "fo = deid_compiled[['marketer','prosecutor']].plot.line().get_figure()\n", - "# fo\n", - "# r.plot.line(x='field_count',y='marketer')\n", - "# deid_compiled = deid_r.groupby('field_count',as_index=False)['field_count','marketer','prosecutor'].mean()\n", - "fig_o = deid_r.plot.scatter(x='field_count',y='marketer').get_figure()\n", - "\n", - "# orig_df = pd.read_gbq(\"select * from deid_risk.risk_60k2\",private_key='/home/steve/dev/google-cloud-sdk/accounts/curation-test.json',\n", - "# dialect='standard')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# deid_r.to_csv('out/basic-attributes-deid-data-60k-patients.csv')\n", - "# r.to_csv('out/basic-attributes-raw-data-60k-patients.csv')\n", - "# deid_r.head()\n", - "p = pd.DataFrame()\n", - "p = deid_df.deid.risk(id='person_id',quasi_id=['birth_datetime','race','gender','sex_at_birth', 'city','state','zip'])\n", - "p = p.append(df.deid.risk(id='person_id',quasi_id=['birth_datetime','race','gender','sex_at_birth', 'city','state','zip']))\n", - "p.index = ['deid data','raw data']\n", - "p.to_csv('out/basic_run-7-fields.csv')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cols = deid_r.columns[5:]\n", - "deid_r.index = np.arange(deid_r.shape[0]).astype(np.int64)\n", - "xdeid_ = deid_r[cols].sum().tolist()\n", - "xraw_ = r[cols].sum().tolist()\n", - "o = pd.DataFrame()\n", - "o['name'] = cols\n", - "o['raw'] = xraw_\n", - "o['deid']= xdeid_\n", - "\n", - "\n", - "o\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "columns = list( set(orig_df.columns) - set(['person_id']))\n", - "xo = pd.DataFrame()\n", - "xi = pd.DataFrame()\n", - "#\n", - "# Let's compute the risk for every attribute given the list of attributes we've gathered\n", - "#\n", - "for name in columns :\n", - " xo = xo.append(deid_df.deid.risk(id='person_id',quasi_id=[name])[['marketer','prosecutor']],sort=False)\n", - " xi = xi.append(orig_df.deid.risk(id='person_id',quasi_id=[name])[['marketer','prosecutor']],sort=False)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#\n", - "# The following shows how much the deid process has affected each attributes\n", - "#\n", - "\n", - "RISK_THRESHOLD = 0.5\n", - "xo.index = columns\n", - "xi.index = columns\n", - "\n", - "ii = xi[xi.marketer > RISK_THRESHOLD].index\n", - "# zo = pd.concat([xi.loc[ii],xo.loc[ii]])\n", - "\n", - "zo = xi.loc[ii].join(xo.loc[ii],rsuffix='_deid')\n", - "#\n", - "# heatmap for original data\n", - "# fig_o = sns.heatmap(xi.loc[ii], cmap='RdYlGn_r', linewidths=0.5, annot=True).get_figure()\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#\n", - "# Running the experiment on the DEID dataset, with all the attributes\n", - "#\n", - "df = pd.read_gbq(\"select * from deid_risk.deid_risk_60k\",private_key='/home/steve/dev/google-cloud-sdk/accounts/curation-test.json',\n", - " dialect='standard')\n", - "\n", - "RUNS = 1500\n", - "FLAG = 'deid-full-attr-dataset'\n", - "r = df.deid.risk(id='person_id',num_runs=RUNS) #,field_count=11)\n", - "# r.to_csv('out/pandas-60k-'+FLAG+'-patients-'+str(RUNS)+'-x-runs.csv')\n", - "compiled = r.groupby('field_count',as_index=False)['marketer','prosecutor'].mean()\n", - "fo = compiled[['marketer','prosecutor']].plot.line().get_figure()\n", - "# fo\n", - "# r.plot.line(x='field_count',y='marketer')\n", - "compiled = r.groupby('field_count',as_index=False)['field_count','marketer','prosecutor'].mean()\n", - "fig_o = r.plot.scatter(x='field_count',y='marketer').get_figure()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "r.groupby('field_count',as_index=False)['marketer','prosecutor'].var()[['marketer','prosecutor']].plot.line()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#\n", - "# We are going to look into the attributes with a risk of a given threshold\n", - "# We will run the experiment (varied combinations of the list of attributes)\n", - "# The experiment is intended to capture the attributes responsible for increasing the marketer risk\n", - "#\n", - "DEID_DATASET = 'deid_risk.deid_risk_60k2'\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.15rc1" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/registered-tier-history.ipynb b/notebooks/registered-tier-history.ipynb deleted file mode 100644 index 3278968..0000000 --- a/notebooks/registered-tier-history.ipynb +++ /dev/null @@ -1,385 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "\"\"\"\n", - "The experiments here describe medical/family history as they associate with risk measures\n", - "Additionally we will have fractional risk assessments\n", - "\"\"\"\n", - "import pandas as pd\n", - "import numpy as np\n", - "from pandas_risk import *\n", - "dfm = pd.read_gbq(\"SELECT * FROM deid_risk.registered_medical_history_dec_001\",private_key='/home/steve/dev/google-cloud-sdk/accounts/curation-test.json')\n", - "dff = pd.read_gbq(\"SELECT * FROM deid_risk.registered_family_history_dec_001\",private_key='/home/steve/dev/google-cloud-sdk/accounts/curation-test.json')\n", - "df = pd.read_gbq(\"SELECT person_id, birth_date,city,state,home_owner,race,ethnicity,gender,birth_place,marital_status,orientation,education,employment_status,income,travel_abroad_6_months,active_duty_status FROM deid_risk.registered_dec_01\",private_key='/home/steve/dev/google-cloud-sdk/accounts/curation-test.json')" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [], - "source": [ - "med_cols = np.random.choice(list(set(dfm.columns.tolist()) - set(['person_id'])),3).tolist()\n", - "fam_cols = np.random.choice(list(set(dff.columns.tolist()) - set(['person_id'])),3).tolist()\n", - "medical = pd.merge(df,dfm[med_cols+['person_id']],on='person_id')\n", - "family = pd.merge(df,dff[fam_cols + ['person_id']],on='person_id')\n", - "_tmp = pd.merge(dfm[med_cols +['person_id']],dff[fam_cols+['person_id']])\n", - "data = pd.merge(df,_tmp,on='person_id')" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
field_countflaggroup_countmarketerprosecutorunique_row_ratio
021full history1153080.9926911.00.987663
118medical1153060.9926741.00.987629
218family1153040.9926561.00.987594
315no-history1153000.9926221.00.987526
43medical-only270.0002320.50.000000
53family-only1460.0012571.00.000551
\n", - "
" - ], - "text/plain": [ - " field_count flag group_count marketer prosecutor \\\n", - "0 21 full history 115308 0.992691 1.0 \n", - "1 18 medical 115306 0.992674 1.0 \n", - "2 18 family 115304 0.992656 1.0 \n", - "3 15 no-history 115300 0.992622 1.0 \n", - "4 3 medical-only 27 0.000232 0.5 \n", - "5 3 family-only 146 0.001257 1.0 \n", - "\n", - " unique_row_ratio \n", - "0 0.987663 \n", - "1 0.987629 \n", - "2 0.987594 \n", - "3 0.987526 \n", - "4 0.000000 \n", - "5 0.000551 " - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd.concat([data.deid.evaluate(flag='full history',cols= list(set(data.columns.tolist()) - set(['person_id'])) )\n", - " ,medical.deid.evaluate(flag='medical',cols=list( set(medical.columns.tolist() ) - set(['person_id']) ) )\n", - " ,family.deid.evaluate(flag='family',cols=list( set(family.columns.tolist() ) - set(['person_id']) ) )\n", - " ,df.deid.evaluate(flag='no-history',cols=list( set(df.columns.tolist() ) - set(['person_id']) ) )\n", - " , dfm.deid.evaluate(flag='medical-only',cols=med_cols )\n", - " , dff.deid.evaluate(flag='family-only',cols=fam_cols )\n", - " ],ignore_index=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "from __future__ import division\n", - "def evaluate(df) :\n", - " cols = list(set(df.columns.tolist()) - set(['person_id']))\n", - " \n", - " portions = np.round(np.random.random_sample(4),3).tolist() + np.arange(5,105,5).tolist()\n", - " \n", - " N = df.shape[0] - 1\n", - " portions = np.divide(np.multiply(portions,N),100).astype(np.int64)\n", - " portions = np.unique([n for n in portions if n > 1])\n", - " \n", - " r = pd.DataFrame()\n", - " for num_rows in portions :\n", - " \n", - " indices = np.random.choice(N,num_rows,replace=False)\n", - "# print (indices.size / N)\n", - " flag = \" \".join([str( np.round(100*indices.size/ N,2)),'%'])\n", - " r = r.append(df.loc[indices].deid.evaluate(cols=cols,flag=flag,min_group_size=2))\n", - " return r" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
field_countflaggroup_countmarketerprosecutorunique_row_ratio
011UNFLAGGED1148860.9890581.00.980535
\n", - "
" - ], - "text/plain": [ - " field_count flag group_count marketer prosecutor unique_row_ratio\n", - "0 11 UNFLAGGED 114886 0.989058 1.0 0.980535" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cols = list(set (df.columns.tolist()) - set(['person_id']))\n", - "df[['race','state','gender_identity','ethnicity','marital_status','education','orientation','sex_at_birth','birth_date','travel_abroad_6_months','active_duty_status']].deid.evaluate()" - ] - }, - { - "cell_type": "code", - "execution_count": 68, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['person_id',\n", - " 'HearingVision_FarSightedness',\n", - " 'HearingVision_Glaucoma',\n", - " 'Digestive_Pancreatitis']" - ] - }, - "execution_count": 68, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#\n", - "# This is the merge with medical history\n", - "\n", - "cols = ['person_id'] + np.random.choice(dfm.columns[1:],3,replace=False).tolist()\n", - "p = pd.merge(df,dfm[cols],on='person_id')\n", - "cols\n", - "# # cols = list(set(p.columns.tolist()) - set(['person_id']))\n", - "# evaluate(p) #p.deid.explore(cols=cols,num_runs=100)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "cols = list( set(dfm.columns.tolist()) - set(['person_id']))\n", - "cols = np.random.choice(cols,3,replace=False).tolist()\n", - "p = pd.merge(dfm[['person_id']+cols],df)\n", - "fcols = list(set(p.columns.tolist()) - set(['person_id']))\n", - "# dfm[cols].deid.evaluate(cols=list( set(cols) - set(['person_id'])))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "variables": { - " \" ; \".join(cols)": "InfectiousDiseases_HepatitisC ; Cancer_StomachCancer ; Circulatory_Hypertension", - " p.shape[0] ": "116157", - " p[fcols].deid.evaluate() ": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
field_countflaggroup_countmarketerprosecutorunique_row_ratio
037UNFLAGGED1153970.9934571.00.98886
\n
" - } - }, - "source": [ - "### Medical History\n", - "\n", - " We randomly select three a tributes {{ \" ; \".join(cols)}} . \n", - " The dataset associated risk evaluation contains {{ p.shape[0] }} records\n", - "{{ p[fcols].deid.evaluate() }}\n", - "\n", - " \n" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['person_id',\n", - " 'InfectiousDiseases_Tuberculosis',\n", - " 'SkeletalMuscular_Fibromyalgia',\n", - " 'Cancer_ProstateCancer']" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cols" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "3" - ] - }, - "execution_count": 67, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# dfm[cols[1:]].head()\n", - "np.sum(dfm.fillna(' ').groupby(cols[1:],as_index=False).size().values <= 1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.15rc1" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/registered-tier.ipynb b/notebooks/registered-tier.ipynb deleted file mode 100644 index c8acbbf..0000000 --- a/notebooks/registered-tier.ipynb +++ /dev/null @@ -1,859 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "\"\"\"\n", - "This notebook is designed to run experiments around demographics on registered tier\n", - "The \n", - "\"\"\"\n", - "import pandas as pd\n", - "import numpy as np\n", - "from pandas_risk import *\n", - "\n", - "ATTRIBUTES = ['race','ethnicity','birth_date','state','city','zip','marital_status','education','language','home_owner','income','employment_status','living_situation','active_duty_status','gender_identity','birth_place','death_date','death_cause','orientation']\n", - "dfs = pd.read_csv('scenario-settings.csv')\n", - "dfc = pd.read_gbq(\"SELECT * FROM deid_risk.registered_dec_01\",private_key='/home/steve/dev/google-cloud-sdk/accounts/curation-test.json')\n" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "cols_o = dfs.loc[(dfs.fo & dfs.fi) ==1].feature.tolist()\n", - "cols_i = dfs.loc[(dfs.fo + dfs.fi )>=1 ].feature.tolist()\n", - "cols_a = dfs.feature.tolist()\n", - "\n", - "cols_v = ['birth_date','gender_identity','race','state','city','birth_place'] #-- voter registration\n", - "#remove the dates fields because dates are shifted\n", - "cols_o = [i for i in cols_o if i not in ['birth_date','death_date']]\n", - "cols_i = [i for i in cols_i if i not in ['birth_date','death_date']]\n", - "cols_a = [i for i in cols_a if i not in ['birth_date','death_date']]\n", - "cols_v = [i for i in cols_v if i not in ['birth_date', 'death_date']]" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "# print(dfs)\n", - "# print(cols_o)\n", - "# print(cols_i)\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
field_countflaggroup_countmarketerprosecutorunique_row_ratio
08high-conj65320.0562341.00.021368
111high-disj474470.4084731.00.278554
216all607180.5227241.00.408189
35voter-reg13160.0113291.00.002944
\n", - "
" - ], - "text/plain": [ - " field_count flag group_count marketer prosecutor unique_row_ratio\n", - "0 8 high-conj 6532 0.056234 1.0 0.021368\n", - "1 11 high-disj 47447 0.408473 1.0 0.278554\n", - "2 16 all 60718 0.522724 1.0 0.408189\n", - "3 5 voter-reg 1316 0.011329 1.0 0.002944" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "r = pd.concat([dfc[cols_o].deid.evaluate(),dfc[cols_i].deid.evaluate(),dfc[cols_a].deid.evaluate(),dfc[cols_v].deid.evaluate() ])\n", - "r.index = np.arange(r.shape[0]).astype(np.int64)\n", - "r['flag']=['high-conj','high-disj','all','voter-reg']\n", - "\n", - "r\n" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEuCAYAAAB1QVLBAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAFxVJREFUeJzt3X+U3XV95/HnK+FHtohSIcsiAZO6UYGFEjdEUY/yU0E0bP3RwuourFa2q1A89lSBFqzUY6ly3HqUrlKbo9a1ILCWGKIgVVyRU8gAWWzMcow0QmArETGy7okQfe8f906cjHcydyZ3cplPno9z5uR+P9/PzH3nnpnXfOf9/d7vJ1WFJKktc4ZdgCRp8Ax3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoP2GtYTH3TQQbVw4cJhPb0kzUp33333D6tq/mTzhhbuCxcuZGRkZFhPL0mzUpLv9zPPtowkNchwl6QGGe6S1KCh9dwlteupp55i06ZNbN26ddilzFrz5s1jwYIF7L333tP6fMNd0sBt2rSJ/fffn4ULF5Jk2OXMOlXFY489xqZNm1i0aNG0voZtGUkDt3XrVg488ECDfZqScOCBB+7SXz6Gu6QZYbDvml19/Qx3SWqQPXdpN1h40U3DLqEvG684Y0a+7qD//zNV50Ruu+02rrzySlatWtXX/LVr1/LII4/wmte8ZoYrm5hH7pK0E9u2bZvy56xdu5bVq1fP+PPsjOEuqUkbN27khS98Ieeeey7Pf/7zefOb38ytt97Ky172MhYvXsxdd93FXXfdxfHHH8+SJUt46Utfyv333w/Apz/9aZYvX85JJ53EySefvMPXXbNmDUuWLOF73/seP/3pT3nrW9/KsmXLWLJkCTfeeCNPPvkkl112Gddeey3HHnss1157bc95kz3PrrItI6lZGzZs4LrrrmPFihUcd9xxfP7zn+f2229n5cqVfPCDH+Szn/0s3/zmN9lrr7249dZbueSSS7jhhhsAuOeee7jvvvt49rOfzW233QbAHXfcwQUXXMCNN97I4YcfziWXXMJJJ53EihUr+PGPf8yyZcs45ZRTuPzyyxkZGeHjH/84wITzxj/PIBnukpq1aNEijj76aACOOuooTj75ZJJw9NFHs3HjRrZs2cI555zDd7/7XZLw1FNPbf/cU089dYfAXb9+Peeddx633HILz3nOcwC45ZZbWLlyJVdeeSXQuQT0wQcf/JU6djZv/PMMiuEuqVn77rvv9sdz5szZvj1nzhy2bdvGpZdeyoknnsgXv/hFNm7cyAknnLB9/n777bfD1zrkkEPYunUr99577/ZwrypuuOEGXvCCF+ww984779xhe2fzxj/PoPTVc09yWpL7k2xIclGP/ecm2ZxkbffjdwdfqiQN1pYtWzj00EOBTv97Zw444ABuuukmLr744u1tmle/+tV87GMfo6oAuPfeewHYf//9eeKJJ7Z/7kTzZtKkR+5J5gJXAacCm4A1SVZW1XfGTb22qs6fgRolzXK7+9LFfr3nPe/hnHPO4QMf+ABnnDF5jQcffDCrVq3i9NNPZ8WKFVx66aW8613v4phjjuEXv/gFixYtYtWqVZx44olcccUVHHvssVx88cUTzptJGf1NMuGE5HjgT6rq1d3tiwGq6s/GzDkXWDqVcF+6dGm5WIf2FHvade7r16/niCOOGMjX2pP1eh2T3F1VSyf73H7aMocCD43Z3tQdG+8NSe5Lcn2Sw3p9oSTnJRlJMrJ58+Y+nlqSNB2Dus79S8DCqjoG+CrwmV6TqurqqlpaVUvnz590CUBJ0jT1E+4PA2OPxBd0x7arqseq6mfdzU8B/3Yw5UmarSZr+WrndvX16yfc1wCLkyxKsg9wFrBy7IQkh4zZXA6s36WqJM1q8+bN47HHHjPgp2n0fu7z5s2b9teY9GqZqtqW5HzgZmAusKKq1iW5HBipqpXA7ydZDmwDfgScO+2KJM16CxYsYNOmTXhubfpGV2Karr7exFRVq4HV48YuG/P4YuDiaVchqSl77733tFcQ0mB44zBJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhq0Vz+TkpwGfBSYC3yqqq6YYN4bgOuB46pqZGBVaigWXnTTsEvoy8Yrzhh2CdLTzqRH7knmAlcBpwNHAmcnObLHvP2BC4E7B12kJGlq+mnLLAM2VNUDVfUkcA1wZo95fwr8ObB1gPVJkqahn3A/FHhozPam7th2SV4EHFZVO/07Psl5SUaSjGzevHnKxUqS+rPLJ1STzAE+AvzBZHOr6uqqWlpVS+fPn7+rTy1JmkA/4f4wcNiY7QXdsVH7A/8GuC3JRuAlwMokSwdVpCRpavoJ9zXA4iSLkuwDnAWsHN1ZVVuq6qCqWlhVC4F/AJZ7tYwkDc+k4V5V24DzgZuB9cAXqmpdksuTLJ/pAiVJU9fXde5VtRpYPW7ssgnmnrDrZUmSdoXvUJWkBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkN6ivck5yW5P4kG5Jc1GP/7yX5dpK1SW5PcuTgS5Uk9WvScE8yF7gKOB04Eji7R3h/vqqOrqpjgQ8BHxl4pZKkvvVz5L4M2FBVD1TVk8A1wJljJ1TVT8Zs7gfU4EqUJE3VXn3MORR4aMz2JuDF4ycleSfwbmAf4KSBVCdJmpaBnVCtqquq6nnAe4E/7jUnyXlJRpKMbN68eVBPLUkap59wfxg4bMz2gu7YRK4B/l2vHVV1dVUtraql8+fP779KSdKU9BPua4DFSRYl2Qc4C1g5dkKSxWM2zwC+O7gSJUlTNWnPvaq2JTkfuBmYC6yoqnVJLgdGqmolcH6SU4CngMeBc2ayaEnSzvVzQpWqWg2sHjd22ZjHFw64LknSLvAdqpLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNaivcE9yWpL7k2xIclGP/e9O8p0k9yX5+yTPHXypkqR+TRruSeYCVwGnA0cCZyc5cty0e4GlVXUMcD3woUEXKknqXz9H7suADVX1QFU9CVwDnDl2QlV9var+X3fzH4AFgy1TkjQV/YT7ocBDY7Y3dccm8jbgy712JDkvyUiSkc2bN/dfpSRpSgZ6QjXJW4ClwId77a+qq6tqaVUtnT9//iCfWpI0xl59zHkYOGzM9oLu2A6SnAL8EfDKqvrZYMqTJE1HP0fua4DFSRYl2Qc4C1g5dkKSJcAngeVV9ejgy5QkTcWk4V5V24DzgZuB9cAXqmpdksuTLO9O+zDwDOC6JGuTrJzgy0mSdoN+2jJU1Wpg9bixy8Y8PmXAdUmSdoHvUJWkBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUF9hXuS05Lcn2RDkot67H9FknuSbEvyxsGXKUmaiknDPclc4CrgdOBI4OwkR46b9iBwLvD5QRcoSZq6vfqYswzYUFUPACS5BjgT+M7ohKra2N33ixmoUZI0Rf20ZQ4FHhqzvak7NmVJzksykmRk8+bN0/kSkqQ+7NYTqlV1dVUtraql8+fP351PLUl7lH7C/WHgsDHbC7pjkqSnqX7CfQ2wOMmiJPsAZwErZ7YsSdKumDTcq2obcD5wM7Ae+EJVrUtyeZLlAEmOS7IJeBPwySTrZrJoSdLO9XO1DFW1Glg9buyyMY/X0GnXSJKeBnyHqiQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1KC+7i0zWyy86KZhl9CXjVecMewSJDXOI3dJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqUFMrMUnaM7jq2uT6OnJPclqS+5NsSHJRj/37Jrm2u//OJAsHXagkqX+ThnuSucBVwOnAkcDZSY4cN+1twONV9a+B/wr8+aALlST1r58j92XAhqp6oKqeBK4Bzhw350zgM93H1wMnJ8ngypQkTUU/PfdDgYfGbG8CXjzRnKralmQLcCDww7GTkpwHnNfd/L9J7p9O0bvZQYz7f+yq7Nl/1/h6Do6v5WDNltfzuf1M2q0nVKvqauDq3fmcuyrJSFUtHXYdrfD1HBxfy8Fq7fXspy3zMHDYmO0F3bGec5LsBTwLeGwQBUqSpq6fcF8DLE6yKMk+wFnAynFzVgLndB+/EfhaVdXgypQkTcWkbZluD/184GZgLrCiqtYluRwYqaqVwF8Df5NkA/AjOr8AWjGr2kizgK/n4PhaDlZTr2c8wJak9nj7AUlqkOEuSQ0y3CWpQYa7JDXIu0JqxiR5ZlX9JMmze+wu4CdV9fPdXZcEkOT1PYa3AN+uqkd3dz2D5tUyYyR5S1V9Lsm7e+wuOpd5rqyqx3dzabNSklVV9dok/0Tn9Rt/v6FnAH9VVZfs/upmlyRP0HkNf2UXUFX1zN1c0qyX5CbgeODr3aETgLuBRcDlVfU3QyptIDxy39F+3X/3n2D/IuC/AC/ZPeXMblX12u6/i3rt795x9B8Bw30SVTXR96Smby/giKr6AUCSg4HP0rl31v8EDPdWVNUnu/++f6I53TdvaQqSvAxYW1U/TfIW4EXAX1TVg8ARw61udpigtbVdVf1od9XSkMNGg73r0e7Yj5I8NayiBsW2TA9J5gNvBxYy5hdgVb11WDXNZknuA34TOAb4NPAp4Ler6pXDrGs2maC1NbpdVfUbQylsFkvyl8DhwHXdoTfSubvtHwKrqurEYdU2CIZ7D0nuAL5Jp/+2/YRfVd0wtKJmsST3VNWLklwGPFxVfz06NuzaZqPuUfxiYN7oWFV9Y3gVzU7dNSdeD7y8O/Qt4IZW7otlW6a3X6uq9w67iIY8keRi4C3AK5LMAfYeck2zUpLfBS6kc3fWtXTO/9wBnDzMumajqqokI8CWqro1ya/ROcn/xJBLGwivc+9tVZLXDLuIhvwO8DPgbVX1z3SC6cPDLWnWuhA4Dvh+t22whM7le5qiJG+ns3LcJ7tDhwJ/N7yKBsu2TA/dy872A54ERk+seLmZhi7Jmqo6Lsla4MVV9bMk66rqqGHXNtt0X8NlwJ1VtaQ79u2qOnq4lQ2GbZkevOxsMJLcXlUv73GNttdmT9+mJAfQOcL8apLHge8PuabZ6mdV9eTocs/dhYaaOdr1yH0CSZYDr+hu3lZVq4ZZjzReklfSWfXsK93F6zUFST4E/Bj4j8AFwDuA71TVHw21sAEx3HtIcgWdvuZ/7w6dTWdhkouHV9Xs47XZejrrnth/G/AqOn9N3gx8qpWrZQz3HrrXZR9bVb/obs8F7q2qY4Zb2ewy7trsw4HHu48PAB6c6J2r0kzr/kx/tqrePOxaZopXy0zsgDGPnzW0KmaxqlrUfXPNrcDrquqgqjoQeC1wy3Cr056se8O653bXhW6SJ1R7+zPg3iRfp3Ok+QrgouGWNKu9pKrePrpRVV/u9julYXoA+FaSlcBPRwer6iPDK2lwDPcequpvk9xGp+8O8N7u9dmankeS/DHwue72m4FHhliPBPC97sccJr5Z4Kxlz72HJL8FfK2qtnS3DwBOqKpm3uCwO3VPrL6PX1599A06t1T1hKqeFpL8q9YO4Az3HpKsrapjx43dO/pGB01fkkOq6v8Muw5prBbvdeQJ1d56vS62sAbjpmEXIPUwfiGZWc9w720kyUeSPK/78RE6d4jUrmvuh0hN+KthFzBohntvF9C5r8y1wDXAVuCdQ62oHc39EGn2STI3yf8e3a6qvxxmPTPBnvsk7BEPRvdNIwez4+InDw6vIu3pktwIXNDq96F95MndRGdZOE1TkgvoXC3zAzqLn4TOO1d9x6+G6deBdUnuYsfr3JcPr6TBMdwnZ494110IvKCqHht2IdIYlw67gJlkuE/OHvGuewgXlNDTTFV9I8lzgcVjVmKaO+y6BsWe+wTsEe+6JO/uPjwKeAGdFtfPRve38jZvzU7dlZjOA55dVc9Lshj4RFU1sWShR+492CMemNG3dD/Y/din+yE9HbyT7kpMAFX13ST/crglDY7h3ps94gGoqvcPuwZpJ5peiclw780e8QAl+RK/+kOzBRgBPllVW3d/VRLfSHIJ8C+SnEpnJaYvDbmmgbHnPoY94pmR5KPAfOBvu0O/A/yETuA/s6r+w7Bq056r10pMVdXMBRQeue/IHvHMeGlVHTdm+0tJ1lTVcUnWDa0q7ekuqKqPMuaKuCQXdsdmPY/cNeOSrAdePXq1UZLD6RwlHeHdNjUsve4E2dL3o0fuPdgjHrg/AG5P8j06f/4uAt6RZD/gM0OtTHucJGcD/x5Y1F2FadQzgWbWGPDIvQd7xIOXZF/ghd3N+/0FqWHpvnFpEZ3lNMcun/kEcF9VbRtKYQNmuPcw2g/uNZZkXVUdNazaZpMkJ1XV15K8vtf+qvofu7smaawkB/PL5TTvqqpHh1nPINmW6e0ZSQ4f1yN+Rnffk8Mra9Z5JfA14HXd7dEjidE3hRnuGpokbwKuBG6j8z35sSR/WFXXD7WwAfHIvYckrwE+QWfx3O09YjrfBG+vqr8YXnWzT5J5wBuAhfzygKKq6vKhFaU9XpL/BZw6erSeZD5wa1X95nArGwyP3HuoqtXd+0z06hEb7FP3d8CPgXvoLHwCDb0TULPWnHFtmMdoaAEjw32MnfSIn5fEHvH0Laiq04ZdhDTOl5PczI4XTqweYj0DZbjvyB7xzLgjydFV9e1hFyKN8SjwOeDY7vbVVfXFIdYzUPbce7BHPBhJvk3nl+JewGLgATq3cwid19O7bGpokrwP+G0617ZfC1xXVT8YblWDY7j3kOQr/LJH/PPucHlvmanpXk88oar6/u6qRZpIkmPotGTeAGyqqlOGXNJA2JbpzR7xABjemiUeBf6ZzgnVZu7n3syZ4QG7I8nRwy5C0sxJ8o4ktwF/DxxI5zLnZlqFHrmPMa5H/J+S2COW2nUY8K6qWjvsQmaCPfcx7BFLaoXhLkkNsucuSQ0y3CWpQYa79jhJfj/J+iQPJ/n4sOuRZoJXy2hP9A7glO7H0iHXIs0Ij9y1R0nyCeA3gC8Dvz5m/HVJ7kxyb5Jbu4s4kGR+kq8mWZfkU0m+n+SgIZUv9c1w1x6lqn4PeAQ4EXh8zK7bgZd0F0e+BnhPd/x9wNe6q29dDxy+G8uVps22jNSxALg2ySHAPsA/dcdfDvwWQFV9JcnjE3y+9LTikbvU8THg41V1NPCfgXlDrkfaJYa71PEs4OHu43PGjH+Lzm1hSfIqxvTppaczw13q+BPguiR3Az8cM/5+4FVJ/hF4E527Bz6x+8uTpsbbD0g7kWRf4OdVtS3J8cB/q6pjJ/s8adg8oSrt3OHAF5LMAZ4E3j7keqS+eOQuSQ2y5y5JDTLcJalBhrskNchwl6QGGe6S1KD/DzwQEyuRP8dtAAAAAElFTkSuQmCC\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "fig_o = r.plot(kind='bar',x='flag',y=['marketer']).get_figure()\n" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "writer = pd.ExcelWriter('out-116kpatients-phase-1.xlsx',engine='xlsxwriter')\n", - "r.to_excel(writer,'phase-1')\n", - "writer.save()" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
featurefifo
0race11
1ethnicity11
2birth_date11
3city11
4state11
5marital_status11
6education10
7language00
8home_owner11
9income01
10employment_status10
11living_situation00
12active_duty_status00
13gender_identity11
14birth_place00
15death_date11
16death_cause11
17orientation00
\n", - "
" - ], - "text/plain": [ - " feature fi fo\n", - "0 race 1 1\n", - "1 ethnicity 1 1\n", - "2 birth_date 1 1\n", - "3 city 1 1\n", - "4 state 1 1\n", - "5 marital_status 1 1\n", - "6 education 1 0\n", - "7 language 0 0\n", - "8 home_owner 1 1\n", - "9 income 0 1\n", - "10 employment_status 1 0\n", - "11 living_situation 0 0\n", - "12 active_duty_status 0 0\n", - "13 gender_identity 1 1\n", - "14 birth_place 0 0\n", - "15 death_date 1 1\n", - "16 death_cause 1 1\n", - "17 orientation 0 0" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dfs\n" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "names = pd.read_csv('family-history.csv').name.tolist()\n", - "path ='/home/steve/dev/google-cloud-sdk/accounts/curation-test.json'\n", - "sql = \"\"\"\n", - "SELECT * FROM deid_risk.registered_medical_history_dec_001\n", - "\"\"\"\n", - "dfm = pd.read_gbq(\"SELECT * FROM deid_risk.registered_medical_history_dec_001\",private_key=path,dialect='standard')" - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(0.9343780009344719, 1.269831148073964)" - ] - }, - "execution_count": 69, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cols = list( set(dfm.columns.tolist()) - set(['person_id']))\n", - "r = pd.DataFrame(dfm[cols].count(),columns=['counts'])\n", - "r['attributes'] = r.index\n", - "r['rate'] = 100*(r.counts / dfm.shape[0])\n", - "r.rate.mean(),np.sqrt(r.rate.var())" - ] - }, - { - "cell_type": "code", - "execution_count": 81, - "metadata": {}, - "outputs": [], - "source": [ - "writer = pd.ExcelWriter('/home/steve/tmp/simple.xlsx', engine='xlsxwriter')\n", - "r.to_excel(writer,sheet_name='p1')\n", - "workbook = writer.book\n", - "worksheet = workbook.add_worksheet()\n", - "b = pd.DataFrame({\"id\":np.random.choice(10,30)})" - ] - }, - { - "cell_type": "code", - "execution_count": 80, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['__class__',\n", - " '__delattr__',\n", - " '__dict__',\n", - " '__doc__',\n", - " '__format__',\n", - " '__getattribute__',\n", - " '__hash__',\n", - " '__init__',\n", - " '__module__',\n", - " '__new__',\n", - " '__reduce__',\n", - " '__reduce_ex__',\n", - " '__repr__',\n", - " '__setattr__',\n", - " '__sizeof__',\n", - " '__str__',\n", - " '__subclasshook__',\n", - " '__weakref__',\n", - " '_assemble_xml_file',\n", - " '_button_params',\n", - " '_calculate_spans',\n", - " '_calculate_x_split_width',\n", - " '_check_dimensions',\n", - " '_comment_params',\n", - " '_convert_date_time',\n", - " '_convert_name_area',\n", - " '_csv_join',\n", - " '_encode_password',\n", - " '_escape_attributes',\n", - " '_escape_data',\n", - " '_escape_url',\n", - " '_extract_filter_tokens',\n", - " '_get_palette_color',\n", - " '_get_range_data',\n", - " '_initialize',\n", - " '_isinf',\n", - " '_isnan',\n", - " '_opt_close',\n", - " '_opt_reopen',\n", - " '_parse_filter_expression',\n", - " '_parse_filter_tokens',\n", - " '_position_object_emus',\n", - " '_position_object_pixels',\n", - " '_prepare_chart',\n", - " '_prepare_header_image',\n", - " '_prepare_header_vml_objects',\n", - " '_prepare_image',\n", - " '_prepare_shape',\n", - " '_prepare_tables',\n", - " '_prepare_vml_objects',\n", - " '_set_filehandle',\n", - " '_set_icon_props',\n", - " '_set_spark_color',\n", - " '_set_xml_writer',\n", - " '_size_col',\n", - " '_size_row',\n", - " '_sort_pagebreaks',\n", - " '_table_function_to_formula',\n", - " '_write',\n", - " '_write_array_formula',\n", - " '_write_auto_filter',\n", - " '_write_autofilters',\n", - " '_write_blank',\n", - " '_write_boolean',\n", - " '_write_brk',\n", - " '_write_cell',\n", - " '_write_cell_array_formula',\n", - " '_write_cell_value',\n", - " '_write_cf_rule',\n", - " '_write_cfvo',\n", - " '_write_col_breaks',\n", - " '_write_col_info',\n", - " '_write_color',\n", - " '_write_color_axis',\n", - " '_write_color_first',\n", - " '_write_color_high',\n", - " '_write_color_last',\n", - " '_write_color_low',\n", - " '_write_color_markers',\n", - " '_write_color_negative',\n", - " '_write_color_scale',\n", - " '_write_color_series',\n", - " '_write_cols',\n", - " '_write_conditional_formats',\n", - " '_write_conditional_formatting',\n", - " '_write_conditional_formatting_2010',\n", - " '_write_custom_filter',\n", - " '_write_custom_filters',\n", - " '_write_data_bar',\n", - " '_write_data_bar_ext',\n", - " '_write_data_validation',\n", - " '_write_data_validations',\n", - " '_write_datetime',\n", - " '_write_dimension',\n", - " '_write_drawing',\n", - " '_write_drawings',\n", - " '_write_empty_row',\n", - " '_write_ext',\n", - " '_write_ext_list',\n", - " '_write_ext_list_data_bars',\n", - " '_write_ext_list_sparklines',\n", - " '_write_filter',\n", - " '_write_filter_column',\n", - " '_write_filters',\n", - " '_write_font',\n", - " '_write_formula',\n", - " '_write_formula_1',\n", - " '_write_formula_2',\n", - " '_write_formula_element',\n", - " '_write_freeze_panes',\n", - " '_write_header_footer',\n", - " '_write_hyperlink_external',\n", - " '_write_hyperlink_internal',\n", - " '_write_hyperlinks',\n", - " '_write_icon_set',\n", - " '_write_legacy_drawing',\n", - " '_write_legacy_drawing_hf',\n", - " '_write_merge_cell',\n", - " '_write_merge_cells',\n", - " '_write_number',\n", - " '_write_odd_footer',\n", - " '_write_odd_header',\n", - " '_write_optimized_sheet_data',\n", - " '_write_outline_pr',\n", - " '_write_page_margins',\n", - " '_write_page_set_up_pr',\n", - " '_write_page_setup',\n", - " '_write_panes',\n", - " '_write_phonetic_pr',\n", - " '_write_print_options',\n", - " '_write_rich_string',\n", - " '_write_row',\n", - " '_write_row_breaks',\n", - " '_write_rows',\n", - " '_write_rstring_color',\n", - " '_write_selection',\n", - " '_write_selections',\n", - " '_write_sheet_data',\n", - " '_write_sheet_format_pr',\n", - " '_write_sheet_pr',\n", - " '_write_sheet_protection',\n", - " '_write_sheet_view',\n", - " '_write_sheet_views',\n", - " '_write_single_row',\n", - " '_write_spark_color',\n", - " '_write_sparkline_group',\n", - " '_write_sparkline_groups',\n", - " '_write_sparklines',\n", - " '_write_split_panes',\n", - " '_write_string',\n", - " '_write_tab_color',\n", - " '_write_table_part',\n", - " '_write_table_parts',\n", - " '_write_token_as_string',\n", - " '_write_underline',\n", - " '_write_url',\n", - " '_write_vert_align',\n", - " '_write_worksheet',\n", - " '_write_x14_axis_color',\n", - " '_write_x14_border_color',\n", - " '_write_x14_cf_rule',\n", - " '_write_x14_cfvo',\n", - " '_write_x14_data_bar',\n", - " '_write_x14_negative_border_color',\n", - " '_write_x14_negative_fill_color',\n", - " '_xml_close',\n", - " '_xml_data_element',\n", - " '_xml_declaration',\n", - " '_xml_empty_tag',\n", - " '_xml_empty_tag_unencoded',\n", - " '_xml_end_tag',\n", - " '_xml_formula_element',\n", - " '_xml_inline_string',\n", - " '_xml_number_element',\n", - " '_xml_rich_inline_string',\n", - " '_xml_rich_si_element',\n", - " '_xml_si_element',\n", - " '_xml_start_tag',\n", - " '_xml_start_tag_unencoded',\n", - " '_xml_string_element',\n", - " 'activate',\n", - " 'active',\n", - " 'active_pane',\n", - " 'add_sparkline',\n", - " 'add_table',\n", - " 'autofilter',\n", - " 'autofilter_area',\n", - " 'autofilter_ref',\n", - " 'black_white',\n", - " 'buttons_list',\n", - " 'center_horizontally',\n", - " 'center_vertically',\n", - " 'charts',\n", - " 'col_formats',\n", - " 'col_size_changed',\n", - " 'col_sizes',\n", - " 'colinfo',\n", - " 'comments',\n", - " 'comments_author',\n", - " 'comments_list',\n", - " 'comments_visible',\n", - " 'cond_formats',\n", - " 'conditional_format',\n", - " 'constant_memory',\n", - " 'data_bars_2010',\n", - " 'data_validation',\n", - " 'date_1904',\n", - " 'default_col_pixels',\n", - " 'default_date_format',\n", - " 'default_row_height',\n", - " 'default_row_pixels',\n", - " 'default_row_zeroed',\n", - " 'default_url_format',\n", - " 'dim_colmax',\n", - " 'dim_colmin',\n", - " 'dim_rowmax',\n", - " 'dim_rowmin',\n", - " 'draft_quality',\n", - " 'drawing',\n", - " 'drawing_links',\n", - " 'dxf_priority',\n", - " 'escapes',\n", - " 'excel2003_style',\n", - " 'excel_version',\n", - " 'ext_sheets',\n", - " 'external_comment_links',\n", - " 'external_drawing_links',\n", - " 'external_hyper_links',\n", - " 'external_table_links',\n", - " 'external_vml_links',\n", - " 'fh',\n", - " 'fileclosed',\n", - " 'filter_cols',\n", - " 'filter_column',\n", - " 'filter_column_list',\n", - " 'filter_on',\n", - " 'filter_range',\n", - " 'filter_type',\n", - " 'fit_height',\n", - " 'fit_page',\n", - " 'fit_to_pages',\n", - " 'fit_width',\n", - " 'footer',\n", - " 'footer_images',\n", - " 'freeze_panes',\n", - " 'get_name',\n", - " 'has_comments',\n", - " 'has_header_vml',\n", - " 'has_vml',\n", - " 'hbreaks',\n", - " 'hcenter',\n", - " 'header',\n", - " 'header_footer_aligns',\n", - " 'header_footer_changed',\n", - " 'header_footer_scales',\n", - " 'header_images',\n", - " 'header_images_list',\n", - " 'hidden',\n", - " 'hide',\n", - " 'hide_gridlines',\n", - " 'hide_row_col_headers',\n", - " 'hide_zero',\n", - " 'hlink_count',\n", - " 'hlink_refs',\n", - " 'horizontal_dpi',\n", - " 'hyperlinks',\n", - " 'images',\n", - " 'index',\n", - " 'insert_button',\n", - " 'insert_chart',\n", - " 'insert_image',\n", - " 'insert_textbox',\n", - " 'internal_fh',\n", - " 'is_chartsheet',\n", - " 'is_right_to_left',\n", - " 'last_shape_id',\n", - " 'leading_zeros',\n", - " 'margin_bottom',\n", - " 'margin_footer',\n", - " 'margin_header',\n", - " 'margin_left',\n", - " 'margin_right',\n", - " 'margin_top',\n", - " 'merge',\n", - " 'merge_range',\n", - " 'name',\n", - " 'names',\n", - " 'nan_inf_to_errors',\n", - " 'orientation',\n", - " 'original_row_height',\n", - " 'outline_below',\n", - " 'outline_changed',\n", - " 'outline_col_level',\n", - " 'outline_on',\n", - " 'outline_right',\n", - " 'outline_row_level',\n", - " 'outline_settings',\n", - " 'outline_style',\n", - " 'page_order',\n", - " 'page_setup_changed',\n", - " 'page_start',\n", - " 'page_view',\n", - " 'palette',\n", - " 'panes',\n", - " 'paper_size',\n", - " 'previous_row',\n", - " 'print_across',\n", - " 'print_area',\n", - " 'print_area_range',\n", - " 'print_comments',\n", - " 'print_gridlines',\n", - " 'print_headers',\n", - " 'print_options_changed',\n", - " 'print_row_col_headers',\n", - " 'print_scale',\n", - " 'protect',\n", - " 'protect_options',\n", - " 'rel_count',\n", - " 'remove_timezone',\n", - " 'repeat_col_range',\n", - " 'repeat_columns',\n", - " 'repeat_row_range',\n", - " 'repeat_rows',\n", - " 'right_to_left',\n", - " 'row_col_headers',\n", - " 'row_data_fh',\n", - " 'row_data_fh_closed',\n", - " 'row_data_filename',\n", - " 'row_size_changed',\n", - " 'row_sizes',\n", - " 'row_spans',\n", - " 'rstring',\n", - " 'screen_gridlines',\n", - " 'select',\n", - " 'selected',\n", - " 'selections',\n", - " 'set_cols',\n", - " 'set_column',\n", - " 'set_comments_author',\n", - " 'set_default_row',\n", - " 'set_first_sheet',\n", - " 'set_footer',\n", - " 'set_h_pagebreaks',\n", - " 'set_header',\n", - " 'set_landscape',\n", - " 'set_margins',\n", - " 'set_page_view',\n", - " 'set_paper',\n", - " 'set_portrait',\n", - " 'set_print_scale',\n", - " 'set_row',\n", - " 'set_rows',\n", - " 'set_selection',\n", - " 'set_start_page',\n", - " 'set_tab_color',\n", - " 'set_v_pagebreaks',\n", - " 'set_vba_name',\n", - " 'set_zoom',\n", - " 'shape_hash',\n", - " 'shapes',\n", - " 'show_comments',\n", - " 'show_zeros',\n", - " 'sparklines',\n", - " 'split_panes',\n", - " 'str_table',\n", - " 'strings_to_formulas',\n", - " 'strings_to_numbers',\n", - " 'strings_to_urls',\n", - " 'tab_color',\n", - " 'table',\n", - " 'tables',\n", - " 'tmpdir',\n", - " 'use_data_bars_2010',\n", - " 'validations',\n", - " 'vba_codename',\n", - " 'vbreaks',\n", - " 'vcenter',\n", - " 'vertical_dpi',\n", - " 'vml_data_id',\n", - " 'vml_drawing_links',\n", - " 'vml_header_id',\n", - " 'vml_shape_id',\n", - " 'worksheet_meta',\n", - " 'write',\n", - " 'write_array_formula',\n", - " 'write_blank',\n", - " 'write_boolean',\n", - " 'write_column',\n", - " 'write_comment',\n", - " 'write_datetime',\n", - " 'write_formula',\n", - " 'write_match',\n", - " 'write_number',\n", - " 'write_rich_string',\n", - " 'write_row',\n", - " 'write_string',\n", - " 'write_url',\n", - " 'xls_colmax',\n", - " 'xls_rowmax',\n", - " 'xls_strmax',\n", - " 'zoom',\n", - " 'zoom_scale_normal']" - ] - }, - "execution_count": 80, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dir(worksheet)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.15rc1" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/risk.ipynb b/notebooks/risk.ipynb deleted file mode 100644 index 299bd35..0000000 --- a/notebooks/risk.ipynb +++ /dev/null @@ -1,293 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "\"\"\"\n", - " This notebook is intended to show how to use the risk framework:\n", - " There are two basic usages:\n", - " 1. Experiment\n", - " \n", - " Here the framework will select a number of random fields other than the patient id and compute risk for the selection.\n", - " This will repeat over a designated number of runs.\n", - " \n", - " The parameters to pass to enable this mode are id=,nun_runs=\n", - " 2. Assessment\n", - " \n", - " Here the framework assumes you are only interested in a list of quasi identifiers and will run the evaluation once for a given list of quasi identifiers.\n", - " The parameters to enable this mode are id=,quasi_id=\n", - "\"\"\"\n", - "import os\n", - "import pandas as pd\n", - "import numpy as np\n", - "\n", - "\n", - "#\n", - "#-- Loading a template file\n", - "# The example taken a de-identification white-paper\n", - "# http://www.ehealthinformation.ca/wp-content/uploads/2014/08/2009-De-identification-PA-whitepaper1.pdf\n", - "#\n", - "\n", - "import pandas as pd\n", - "import numpy as np\n", - "from io import StringIO\n", - "csv = \"\"\"\n", - "id,sex,age,profession,drug_test\n", - "1,M,37,doctor,-\n", - "2,F,28,doctor,+\n", - "3,M,37,doctor,-\n", - "4,M,28,doctor,+\n", - "5,M,28,doctor,-\n", - "6,M,37,doctor,-\n", - "\"\"\"\n", - "f = StringIO()\n", - "f.write(unicode(csv))\n", - "f.seek(0)\n", - "MY_DATAFRAME = pd.read_csv(f) " - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "\"\"\"\n", - " Here's the pandas_risk code verbatim. \n", - " NOTE: \n", - "\"\"\"\n", - "@pd.api.extensions.register_dataframe_accessor(\"deid\")\n", - "class deid :\n", - " \"\"\"\n", - " This class is a deidentification class that will compute risk (marketer, prosecutor) given a pandas dataframe\n", - " \"\"\"\n", - " def __init__(self,df):\n", - " self._df = df\n", - " \n", - " def risk(self,**args):\n", - " \"\"\"\n", - " @param id name of patient field \n", - " @params num_runs number of runs (default will be 100)\n", - " @params quasi_id \tlist of quasi identifiers to be used (this will only perform a single run)\n", - " \"\"\"\n", - " \n", - " id = args['id']\n", - " if 'quasi_id' in args :\n", - " num_runs = 1\n", - " columns = list(set(args['quasi_id'])- set(id) )\n", - " else :\n", - " num_runs = args['num_runs'] if 'num_runs' in args else 100\n", - " columns = list(set(self._df.columns) - set([id]))\n", - " r = pd.DataFrame() \n", - " k = len(columns)\n", - " for i in range(0,num_runs) :\n", - " #\n", - " # let's chose a random number of columns and compute marketer and prosecutor risk\n", - " # Once the fields are selected we run a groupby clause\n", - " #\n", - " if 'quasi_id' not in args :\n", - " n = np.random.randint(2,k) #-- number of random fields we are picking\n", - " ii = np.random.choice(k,n,replace=False)\n", - " cols = np.array(columns)[ii].tolist()\n", - " else:\n", - " cols \t= columns\n", - " n \t= len(cols)\n", - " x_ = self._df.groupby(cols).count()[id].values\n", - " r = r.append(\n", - " pd.DataFrame(\n", - " [\n", - " {\n", - " \"selected\":n,\n", - " \"marketer\": x_.size / np.float64(np.sum(x_)),\n", - " \"prosecutor\":1 / np.float64(np.min(x_))\n", - "\n", - " }\n", - " ]\n", - " )\n", - " )\n", - " g_size = x_.size\n", - " n_ids = np.float64(np.sum(x_))\n", - "\n", - " return r" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
marketerprosecutorselected
00.5000001.02
00.5000001.03
00.5000001.03
00.3333331.02
00.3333330.52
\n", - "
" - ], - "text/plain": [ - " marketer prosecutor selected\n", - "0 0.500000 1.0 2\n", - "0 0.500000 1.0 3\n", - "0 0.500000 1.0 3\n", - "0 0.333333 1.0 2\n", - "0 0.333333 0.5 2" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#\n", - "# Lets us compute risk here for a random any random selection of quasi identifiers\n", - "# We will run this experiment 5 times\n", - "#\n", - "MY_DATAFRAME.deid.risk(id='id',num_runs=5)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
marketerprosecutorselected
00.51.03
\n", - "
" - ], - "text/plain": [ - " marketer prosecutor selected\n", - "0 0.5 1.0 3" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#\n", - "# In this scenario we are just interested in sex,profession,age\n", - "#\n", - "MY_DATAFRAME.deid.risk(id='id',quasi_id=['age','sex','profession'])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.15rc1" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}