{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "\"\"\"\n", " Health Information Privacy Lab\n", " This notebook is intended to run experiments and generate the data to be used by another notebook\n", "\n", " pre-requisites:\n", " - pandas_risk This is a custom framework that will compute risk for a given dataset\n", " - google-cloud-bigquery\n", " - numpy\n", "\"\"\"\n", "import pandas as pd\n", "import numpy as np\n", "from pandas_risk import *\n", "from time import time\n", "import os\n", "#\n", "#-- Loading the dataset\n", "class Logger :\n", " cache = []\n", " @staticmethod\n", " def clear():\n", " Logger.cache = []\n", " @staticmethod\n", " def log(**args) :\n", " Logger.cache.append(args)\n", " \n", "SQL_CONTROLLED=\"SELECT person_id,birth_datetime,city,zip,state,race,gender FROM deid_risk.basic_risk60k\"\n", "SQL_REGISTERED = \"SELECT person_id,birth_datetime,city,zip,state,race,gender FROM deid_risk.basic_deid_risk60k\"\n", "dfr = pd.read_gbq(SQL_REGISTERED,private_key='/home/steve/dev/google-cloud-sdk/accounts/curation-test.json')\n", "dfc = pd.read_gbq(SQL_CONTROLLED,private_key='/home/steve/dev/google-cloud-sdk/accounts/curation-test.json')\n", "\n" ] }, { "cell_type": "code", "execution_count": 99, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | sample % | \n", "marketer | \n", "sample marketer | \n", "tier | \n", "
---|---|---|---|---|
0 | \n", "5 | \n", "0.974945 | \n", "0.981364 | \n", "controlled | \n", "
1 | \n", "5 | \n", "0.975513 | \n", "0.981996 | \n", "controlled | \n", "
2 | \n", "5 | \n", "0.975798 | \n", "0.980733 | \n", "controlled | \n", "
3 | \n", "5 | \n", "0.976364 | \n", "0.981996 | \n", "controlled | \n", "
4 | \n", "5 | \n", "0.976364 | \n", "0.981996 | \n", "controlled | \n", "