You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
33 KiB
33 KiB
None
<html lang="en">
<head>
</head>
</html>
In [9]:
"""
This notebook is designed to run experiments around demographics on registered tier
The
"""
import pandas as pd
import numpy as np
from pandas_risk import *
ATTRIBUTES = ['race','ethnicity','birth_date','state','city','zip','marital_status','education','language','home_owner','income','employment_status','living_situation','active_duty_status','gender_identity','birth_place','death_date','death_cause','orientation']
dfs = pd.read_csv('scenario-settings.csv')
dfc = pd.read_gbq("SELECT * FROM deid_risk.registered_dec_01",private_key='/home/steve/dev/google-cloud-sdk/accounts/curation-test.json')
In [10]:
cols_o = dfs.loc[(dfs.fo & dfs.fi) ==1].feature.tolist()
cols_i = dfs.loc[(dfs.fo + dfs.fi )>=1 ].feature.tolist()
cols_a = dfs.feature.tolist()
cols_v = ['birth_date','gender_identity','race','state','city','birth_place'] #-- voter registration
#remove the dates fields because dates are shifted
cols_o = [i for i in cols_o if i not in ['birth_date','death_date']]
cols_i = [i for i in cols_i if i not in ['birth_date','death_date']]
cols_a = [i for i in cols_a if i not in ['birth_date','death_date']]
cols_v = [i for i in cols_v if i not in ['birth_date', 'death_date']]
In [11]:
# print(dfs)
# print(cols_o)
# print(cols_i)
In [12]:
r = pd.concat([dfc[cols_o].deid.evaluate(),dfc[cols_i].deid.evaluate(),dfc[cols_a].deid.evaluate(),dfc[cols_v].deid.evaluate() ])
r.index = np.arange(r.shape[0]).astype(np.int64)
r['flag']=['high-conj','high-disj','all','voter-reg']
r
Out[12]:
In [14]:
fig_o = r.plot(kind='bar',x='flag',y=['marketer']).get_figure()
In [15]:
writer = pd.ExcelWriter('out-116kpatients-phase-1.xlsx',engine='xlsxwriter')
r.to_excel(writer,'phase-1')
writer.save()
In [19]:
dfs
Out[19]:
In [38]:
import pandas as pd
import numpy as np
names = pd.read_csv('family-history.csv').name.tolist()
path ='/home/steve/dev/google-cloud-sdk/accounts/curation-test.json'
sql = """
SELECT * FROM deid_risk.registered_medical_history_dec_001
"""
dfm = pd.read_gbq("SELECT * FROM deid_risk.registered_medical_history_dec_001",private_key=path,dialect='standard')
In [69]:
cols = list( set(dfm.columns.tolist()) - set(['person_id']))
r = pd.DataFrame(dfm[cols].count(),columns=['counts'])
r['attributes'] = r.index
r['rate'] = 100*(r.counts / dfm.shape[0])
r.rate.mean(),np.sqrt(r.rate.var())
Out[69]:
In [81]:
writer = pd.ExcelWriter('/home/steve/tmp/simple.xlsx', engine='xlsxwriter')
r.to_excel(writer,sheet_name='p1')
workbook = writer.book
worksheet = workbook.add_worksheet()
b = pd.DataFrame({"id":np.random.choice(10,30)})
In [80]:
dir(worksheet)
Out[80]:
In [ ]: