update and making it 3.x compatible

pull/2/head
Steve L. Nyemba 6 years ago
parent 5a09e89965
commit 22b2cb0af3

@ -41,7 +41,7 @@ The framework will depend on pandas and numpy (for now). Below is a basic sample
import pandas as pd import pandas as pd
from pandas_risk import * from pandas_risk import *
mydf = pd.DataFrame({"x":np.random.choice( np.random.randint(1,10),50),"y":np.random.choice( np.random.randint(1,10),50) }) mydf = pd.DataFrame({"x":np.random.choice( np.random.randint(1,10),50),"y":np.random.choice( np.random.randint(1,10),50),"z":np.random.choice( np.random.randint(1,10),50),"r":np.random.choice( np.random.randint(1,10),50) })
print mydf.risk.evaluate() print mydf.risk.evaluate()
@ -51,7 +51,7 @@ The framework will depend on pandas and numpy (for now). Below is a basic sample
# - Insure the population size is much greater than the sample size # - Insure the population size is much greater than the sample size
# - Insure the fields are identical in both sample and population # - Insure the fields are identical in both sample and population
# #
pop = pd.DataFrame({"x":np.random.choice( np.random.randint(1,10),150),"y":np.random.choice( np.random.randint(1,10),150) ,"q":np.random.choice( np.random.randint(1,10),150)}) pop = pd.DataFrame({"x":np.random.choice( np.random.randint(1,10),150),"y":np.random.choice( np.random.randint(1,10),150) ,"z":np.random.choice( np.random.randint(1,10),150),"r":np.random.choice( np.random.randint(1,10),150)})
mydf.risk.evaluate(pop=pop) mydf.risk.evaluate(pop=pop)

@ -60,7 +60,7 @@ class deid :
@param pop|sample data-frame with popublation reference @param pop|sample data-frame with popublation reference
@param id key field that uniquely identifies patient/customer ... @param id key field that uniquely identifies patient/customer ...
""" """
id = args['id']
pop= args['pop'] if 'pop' in args else None pop= args['pop'] if 'pop' in args else None
if 'pop_size' in args : if 'pop_size' in args :
@ -77,7 +77,11 @@ class deid :
sample = args['sample'] if 'sample' in args else pd.DataFrame(self._df) sample = args['sample'] if 'sample' in args else pd.DataFrame(self._df)
k = sample.columns.size -1 if 'field_count' not in args else int(args['field_count']) k = sample.columns.size -1 if 'field_count' not in args else int(args['field_count'])
columns = list(set(sample.columns.tolist()) - set([id])) if 'id' in args :
id = args['id']
columns = list(set(sample.columns.tolist()) - set([id]))
else:
columns = sample.columns.tolist()
o = pd.DataFrame() o = pd.DataFrame()
for i in np.arange(RUNS): for i in np.arange(RUNS):
@ -152,8 +156,6 @@ class deid :
handle_sample.set('pop_size',pop_size) handle_sample.set('pop_size',pop_size)
r['pitman risk'] = handle_sample.pitman() r['pitman risk'] = handle_sample.pitman()
if 'pop' in args : if 'pop' in args :
print cols
print args['pop'].columns
xi = pd.DataFrame({"sample_group_size":sample.groupby(cols,as_index=False).size()}).reset_index() xi = pd.DataFrame({"sample_group_size":sample.groupby(cols,as_index=False).size()}).reset_index()
yi = pd.DataFrame({"population_group_size":args['pop'].groupby(cols,as_index=False).size()}).reset_index() yi = pd.DataFrame({"population_group_size":args['pop'].groupby(cols,as_index=False).size()}).reset_index()
merged_groups = pd.merge(xi,yi,on=cols,how='inner') merged_groups = pd.merge(xi,yi,on=cols,how='inner')

Loading…
Cancel
Save