From e33223f6b293b6f691e7107e4e0ef3eed9b4419e Mon Sep 17 00:00:00 2001 From: weiyi Date: Fri, 8 Mar 2019 14:37:52 -0600 Subject: [PATCH 1/3] Update 'risk/__init__.py' adjusting python version 3 and 2 --- risk/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/risk/__init__.py b/risk/__init__.py index 57e7ee1..66cadf2 100644 --- a/risk/__init__.py +++ b/risk/__init__.py @@ -66,4 +66,8 @@ Basic examples that illustrate usage of the the framework are in the notebook fo """ -from risk import deid +import sys +if sys.version_info.major == 2: + from risk import deid +else: + from risk.risk import deid From 084e7d0fe8bc60a78e8d70258c7ba6cc5a63e7f8 Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Fri, 8 Mar 2019 18:02:06 -0600 Subject: [PATCH 2/3] adding the strict flag --- risk/risk.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/risk/risk.py b/risk/risk.py index 05a0896..f0de44b 100644 --- a/risk/risk.py +++ b/risk/risk.py @@ -76,7 +76,7 @@ class deid : sample = args['sample'] if 'sample' in args else pd.DataFrame(self._df) - k = sample.columns.size if 'field_count' not in args else int(args['field_count']) + 1 + k = sample.columns.size if 'field_count' not in args else int(args['field_count']) +1 if 'id' in args : id = args['id'] columns = list(set(sample.columns.tolist()) - set([id])) @@ -85,8 +85,10 @@ class deid : o = pd.DataFrame() for i in np.arange(RUNS): - n = np.random.randint(2,k) - + if 'strict' not in args : + n = np.random.randint(2,k) + else: + n = args['field_count'] cols = np.random.choice(columns,n,replace=False).tolist() params = {'sample':sample,'cols':cols} if pop is not None : From cfd9fffc7ef2c3ea59f82c245a639e9ed3cea006 Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Fri, 8 Mar 2019 18:27:57 -0600 Subject: [PATCH 3/3] added some comments --- risk/risk.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/risk/risk.py b/risk/risk.py index f0de44b..163003d 100644 --- a/risk/risk.py +++ b/risk/risk.py @@ -57,8 +57,10 @@ class deid : This function will perform experimentation by performing a random policies (combinations of attributes) This function is intended to explore a variety of policies and evaluate their associated risk. - @param pop|sample data-frame with popublation reference - @param id key field that uniquely identifies patient/customer ... + :pop|sample data-frame with population or sample reference + :field_count number of fields to randomly select + :strict if set the field_count is exact otherwise field_count is range from 2-field_count + :num_runs number of runs (by default 5) """ pop= args['pop'] if 'pop' in args else None @@ -85,7 +87,7 @@ class deid : o = pd.DataFrame() for i in np.arange(RUNS): - if 'strict' not in args : + if 'strict' not in args or ('strict' in args and args['strict'] is False): n = np.random.randint(2,k) else: n = args['field_count']