From 3c643eb4df0020b295e47abda34824ca6df730b5 Mon Sep 17 00:00:00 2001
From: Steve Nyemba <nyemba@gmail.com>
Date: Thu, 15 Sep 2022 17:56:15 -0500
Subject: [PATCH] bug fix ...

---
 README.md          |  6 +++---
 privacykit/risk.py | 33 ++++++++++-----------------------
 setup.py           |  4 ++--
 3 files changed, 15 insertions(+), 28 deletions(-)

diff --git a/README.md b/README.md
index 36cdb6d..c0db805 100644
--- a/README.md
+++ b/README.md
@@ -27,19 +27,19 @@ Install this package using pip as follows :
 
 Stable :
     
-    pip install git+https://hiplab.mc.vanderbilt.edu/git/steve/deid-risk.git
+    pip install git+https://dev.the-phi.com/git/healthcareio/privacykit.git@release
     
     
 Latest Development (not fully tested):
     
-    pip install git+https://hiplab.mc.vanderbilt.edu/git/steve/deid-risk.git@risk
+    pip install git+https://dev.the-phi.com/git/healthcareio/privacykit.git@dev
     
 The framework will depend on pandas and numpy (for now). Below is a basic sample to get started quickly.
 
 
     import numpy as np
     import pandas as pd
-    import risk
+    import privacykit
 
     mydf = pd.DataFrame({"x":np.random.choice( np.random.randint(1,10),50),"y":np.random.choice( np.random.randint(1,10),50),"z":np.random.choice( np.random.randint(1,10),50),"r":np.random.choice( np.random.randint(1,10),50)  })
     print (mydf.risk.evaluate())
diff --git a/privacykit/risk.py b/privacykit/risk.py
index 3110ed2..0f0cbfe 100644
--- a/privacykit/risk.py
+++ b/privacykit/risk.py
@@ -107,38 +107,25 @@ class deid :
         for size in np.arange(2,len(columns)) :
             p = list(combinations(columns,size))            
             p = (np.array(p)[ np.random.choice( len(p), _policy_count)].tolist())
-            flag = 'Policy_'+str(_index)
-            _index += 1
+            
+            
             for cols in p :
+                flag = 'Policy_'+str(_index)
                 r = self.evaluate(sample=sample,cols=cols,flag = flag)
                 p =  pd.DataFrame(1*sample.columns.isin(cols)).T
                 p.columns = sample.columns
                 o = pd.concat([o,r.join(p)])
-        
+                o['attr'] = ','.join(cols)
+                _index += 1
+        #
+        # We rename flags to policies and adequately number them, we also have a column to summarize the attributes attr
+        #
            
-        # for i in np.arange(RUNS):
-        #     if 'strict' not in args or ('strict' in args and args['strict'] is False):
-        #         n = np.random.randint(2,k)
-        #     else:
-        #         n = args['field_count']
-        #     cols = np.random.choice(columns,n,replace=False).tolist()            
-        #     params = {'sample':sample,'cols':cols}
-        #     if pop is not None :
-        #         params['pop'] = pop
-        #     if pop_size > 0  :
-        #         params['pop_size'] = pop_size
-
-        #     r = self.evaluate(**params)
-        #     #
-        #     # let's put the policy in place
-        #     p =  pd.DataFrame(1*sample.columns.isin(cols)).T
-        #     p.columns = sample.columns
-        #     # o = o.append(r.join(p))
-        #     o = pd.concat([o,r.join(p)])
+      
 
             
         o.index = np.arange(o.shape[0]).astype(np.int64)
-
+        o = o.rename(columns={'flag':'policies'})
         return o
     def evaluate(self, **args):
         """
diff --git a/setup.py b/setup.py
index cbe800f..7281a1c 100644
--- a/setup.py
+++ b/setup.py
@@ -4,11 +4,11 @@ This is a build file for the
 from setuptools import setup, find_packages
  
 setup(
-    name = "risk",
+    name = "privacykit",
     version = "0.8.1",
     author = "Healthcare/IO - The Phi Technology LLC & Health Information Privacy Lab",
     author_email = "info@the-phi.com",
     license = "MIT",
-    packages=['risk'],
+    packages=['privacykit'],
     install_requires = ['numpy','pandas']
     )