| 
						
						
							
								
							
						
						
					 | 
					 | 
					@ -35,25 +35,32 @@ class deid :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        """
 | 
					 | 
					 | 
					 | 
					        """
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            @param  id          name of patient field            
 | 
					 | 
					 | 
					 | 
					            @param  id          name of patient field            
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            @params num_runs    number of runs (default will be 100)
 | 
					 | 
					 | 
					 | 
					            @params num_runs    number of runs (default will be 100)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
						    @params quasi_id 	list of quasi identifiers to be used (this will only perform a single run)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        """
 | 
					 | 
					 | 
					 | 
					        """
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        
 | 
					 | 
					 | 
					 | 
					        
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        id  = args['id']
 | 
					 | 
					 | 
					 | 
					        id  = args['id']
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        
 | 
					 | 
					 | 
					 | 
					        if 'quasi_id' in args :
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        num_runs  = args['num_runs'] if 'num_runs' in args else 100
 | 
					 | 
					 | 
					 | 
							num_runs = 1
 | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							columns = list(set(args['quasi_id'])- set(id) )
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
						else :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							num_runs  = args['num_runs'] if 'num_runs' in args else 100
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							columns = list(set(self._df.columns) - set([id]))
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        r   = pd.DataFrame()
 | 
					 | 
					 | 
					 | 
					        r   = pd.DataFrame()
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        
 | 
					 | 
					 | 
					 | 
					        
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        columns = list(set(self._df.columns) - set([id]))
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        k = len(columns)
 | 
					 | 
					 | 
					 | 
					        k = len(columns)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        for i in range(0,num_runs) :
 | 
					 | 
					 | 
					 | 
					        for i in range(0,num_runs) :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            #
 | 
					 | 
					 | 
					 | 
					            #
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            # let's chose a random number of columns and compute marketer and prosecutor risk
 | 
					 | 
					 | 
					 | 
					            # let's chose a random number of columns and compute marketer and prosecutor risk
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            # Once the fields are selected we run a groupby clause
 | 
					 | 
					 | 
					 | 
					            # Once the fields are selected we run a groupby clause
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            #
 | 
					 | 
					 | 
					 | 
					            #
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
						    if 'quasi_id' not in args :
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            n   = np.random.randint(2,k) #-- number of random fields we are picking
 | 
					 | 
					 | 
					 | 
							    n   = np.random.randint(2,k) #-- number of random fields we are picking
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            ii = np.random.choice(k,n,replace=False)
 | 
					 | 
					 | 
					 | 
							    ii = np.random.choice(k,n,replace=False)
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            cols = np.array(columns)[ii].tolist()
 | 
					 | 
					 | 
					 | 
							    cols = np.array(columns)[ii].tolist()
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            x_ = self._df.groupby(cols).count()[id].values
 | 
					 | 
					 | 
					 | 
					            else:
 | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
						    	cols 	= columns
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							n 	= len(cols)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
						    x_ = self._df.groupby(cols).count()[id].values
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            r = r.append(
 | 
					 | 
					 | 
					 | 
					            r = r.append(
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					                pd.DataFrame(
 | 
					 | 
					 | 
					 | 
					                pd.DataFrame(
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					                    [
 | 
					 | 
					 | 
					 | 
					                    [
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -72,20 +79,22 @@ class deid :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        return r
 | 
					 | 
					 | 
					 | 
					        return r
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					# import pandas as pd
 | 
					 | 
					 | 
					 | 
					import pandas as pd
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					# import numpy as np
 | 
					 | 
					 | 
					 | 
					import numpy as np
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					# from io import StringIO
 | 
					 | 
					 | 
					 | 
					from io import StringIO
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					# csv = """
 | 
					 | 
					 | 
					 | 
					csv = """
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					# id,sex,age,profession,drug_test
 | 
					 | 
					 | 
					 | 
					id,sex,age,profession,drug_test
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					# 1,M,37,doctor,-
 | 
					 | 
					 | 
					 | 
					1,M,37,doctor,-
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					# 2,F,28,doctor,+
 | 
					 | 
					 | 
					 | 
					2,F,28,doctor,+
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					# 3,M,37,doctor,-
 | 
					 | 
					 | 
					 | 
					3,M,37,doctor,-
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					# 4,M,28,doctor,+
 | 
					 | 
					 | 
					 | 
					4,M,28,doctor,+
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					# 5,M,28,doctor,-
 | 
					 | 
					 | 
					 | 
					5,M,28,doctor,-
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					# 6,M,37,doctor,-
 | 
					 | 
					 | 
					 | 
					6,M,37,doctor,-
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					# """
 | 
					 | 
					 | 
					 | 
					"""
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					# f = StringIO()
 | 
					 | 
					 | 
					 | 
					f = StringIO()
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					# f.write(unicode(csv))
 | 
					 | 
					 | 
					 | 
					f.write(unicode(csv))
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					# f.seek(0)
 | 
					 | 
					 | 
					 | 
					f.seek(0)
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					# df = pd.read_csv(f)     
 | 
					 | 
					 | 
					 | 
					df = pd.read_csv(f)     
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					# print df.deid.risk(id='id',num_runs=2)   
 | 
					 | 
					 | 
					 | 
					print df.deid.risk(id='id',num_runs=2)   
 | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					print " *** "
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					print df.deid.risk(id='id',quasi_id=['sex','age','profession'])
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
					 | 
					 | 
					
 
 |