From 3c643eb4df0020b295e47abda34824ca6df730b5 Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Thu, 15 Sep 2022 17:56:15 -0500 Subject: [PATCH] bug fix ... --- README.md | 6 +++--- privacykit/risk.py | 33 ++++++++++----------------------- setup.py | 4 ++-- 3 files changed, 15 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 36cdb6d..c0db805 100644 --- a/README.md +++ b/README.md @@ -27,19 +27,19 @@ Install this package using pip as follows : Stable : - pip install git+https://hiplab.mc.vanderbilt.edu/git/steve/deid-risk.git + pip install git+https://dev.the-phi.com/git/healthcareio/privacykit.git@release Latest Development (not fully tested): - pip install git+https://hiplab.mc.vanderbilt.edu/git/steve/deid-risk.git@risk + pip install git+https://dev.the-phi.com/git/healthcareio/privacykit.git@dev The framework will depend on pandas and numpy (for now). Below is a basic sample to get started quickly. import numpy as np import pandas as pd - import risk + import privacykit mydf = pd.DataFrame({"x":np.random.choice( np.random.randint(1,10),50),"y":np.random.choice( np.random.randint(1,10),50),"z":np.random.choice( np.random.randint(1,10),50),"r":np.random.choice( np.random.randint(1,10),50) }) print (mydf.risk.evaluate()) diff --git a/privacykit/risk.py b/privacykit/risk.py index 3110ed2..0f0cbfe 100644 --- a/privacykit/risk.py +++ b/privacykit/risk.py @@ -107,38 +107,25 @@ class deid : for size in np.arange(2,len(columns)) : p = list(combinations(columns,size)) p = (np.array(p)[ np.random.choice( len(p), _policy_count)].tolist()) - flag = 'Policy_'+str(_index) - _index += 1 + + for cols in p : + flag = 'Policy_'+str(_index) r = self.evaluate(sample=sample,cols=cols,flag = flag) p = pd.DataFrame(1*sample.columns.isin(cols)).T p.columns = sample.columns o = pd.concat([o,r.join(p)]) - + o['attr'] = ','.join(cols) + _index += 1 + # + # We rename flags to policies and adequately number them, we also have a column to summarize the attributes attr + # - # for i in np.arange(RUNS): - # if 'strict' not in args or ('strict' in args and args['strict'] is False): - # n = np.random.randint(2,k) - # else: - # n = args['field_count'] - # cols = np.random.choice(columns,n,replace=False).tolist() - # params = {'sample':sample,'cols':cols} - # if pop is not None : - # params['pop'] = pop - # if pop_size > 0 : - # params['pop_size'] = pop_size - - # r = self.evaluate(**params) - # # - # # let's put the policy in place - # p = pd.DataFrame(1*sample.columns.isin(cols)).T - # p.columns = sample.columns - # # o = o.append(r.join(p)) - # o = pd.concat([o,r.join(p)]) + o.index = np.arange(o.shape[0]).astype(np.int64) - + o = o.rename(columns={'flag':'policies'}) return o def evaluate(self, **args): """ diff --git a/setup.py b/setup.py index cbe800f..7281a1c 100644 --- a/setup.py +++ b/setup.py @@ -4,11 +4,11 @@ This is a build file for the from setuptools import setup, find_packages setup( - name = "risk", + name = "privacykit", version = "0.8.1", author = "Healthcare/IO - The Phi Technology LLC & Health Information Privacy Lab", author_email = "info@the-phi.com", license = "MIT", - packages=['risk'], + packages=['privacykit'], install_requires = ['numpy','pandas'] )