@ -43,6 +43,10 @@ from datetime import datetime
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					import  sys 
 
					 
					 
					 
					import  sys 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					from  itertools  import  combinations 
 
					 
					 
					 
					from  itertools  import  combinations 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# class Compute: 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					#     pass 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					# class Population(Compute): 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					#     pass 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					@pd.api.extensions.register_dataframe_accessor ( " risk " ) 
 
					 
					 
					 
					@pd.api.extensions.register_dataframe_accessor ( " risk " ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					class  deid  : 
 
					 
					 
					 
					class  deid  : 
 
				
			 
			
		
	
	
		
		
			
				
					
						
						
						
							
								 
							 
						
					 
					 
					@ -57,6 +61,16 @@ class deid :
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        # 
 
					 
					 
					 
					        # 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        values  =  df . apply ( lambda  col :  col . unique ( ) . size  /  df . shape [ 0 ] ) 
 
					 
					 
					 
					        values  =  df . apply ( lambda  col :  col . unique ( ) . size  /  df . shape [ 0 ] ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        self . _dinfo  =  dict ( zip ( df . columns . tolist ( ) , values ) ) 
 
					 
					 
					 
					        self . _dinfo  =  dict ( zip ( df . columns . tolist ( ) , values ) ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        # self.sample = self._df 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        self . init ( sample = self . _df ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    def  init ( self , * * _args ) : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        _sample  =  _args [ ' sample ' ]  if  ' sample '  in  _args  else  self . _df 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        _columns  =  [ ]  if  ' columns '  not  in  _args  else  _args [ ' columns ' ] 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        if  _columns  : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            self . _compute  =  Compute ( sample  =  _sample , columns = _columns ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        else : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            self . _comput  =  Compute ( sample = _sample ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        self . _pcompute =  Population ( )   
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    def  explore ( self , * * args ) : 
 
					 
					 
					 
					    def  explore ( self , * * args ) : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        """ 
 
					 
					 
					 
					        """ 
 
				
			 
			
		
	
	
		
		
			
				
					
						
							
								 
							 
						
						
							
								 
							 
						
						
					 
					 
					@ -107,40 +121,45 @@ class deid :
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        for  size  in  np . arange ( 2 , len ( columns ) )  : 
 
					 
					 
					 
					        for  size  in  np . arange ( 2 , len ( columns ) )  : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					            p  =  list ( combinations ( columns , size ) )             
 
					 
					 
					 
					            p  =  list ( combinations ( columns , size ) )             
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					            p  =  ( np . array ( p ) [  np . random . choice (  len ( p ) ,  _policy_count ) ] . tolist ( ) ) 
 
					 
					 
					 
					            p  =  ( np . array ( p ) [  np . random . choice (  len ( p ) ,  _policy_count ) ] . tolist ( ) ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					            flag  =  ' Policy_ ' + str ( _index ) 
 
					 
					 
					 
					            
 
				
			 
			
				
				
			
		
	
		
		
			
				
					
					 
					 
					 
					            _index  + =  1 
 
					 
					 
					 
					            
 
				
			 
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					            for  cols  in  p  : 
 
					 
					 
					 
					            for  cols  in  p  : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					                flag  =  ' Policy_ ' + str ( _index ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					                r  =  self . evaluate ( sample = sample , cols = cols , flag  =  flag ) 
 
					 
					 
					 
					                r  =  self . evaluate ( sample = sample , cols = cols , flag  =  flag ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					                p  =   pd . DataFrame ( 1 * sample . columns . isin ( cols ) ) . T 
 
					 
					 
					 
					                p  =   pd . DataFrame ( 1 * sample . columns . isin ( cols ) ) . T 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					                p . columns  =  sample . columns 
 
					 
					 
					 
					                p . columns  =  sample . columns 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					                o  =  pd . concat ( [ o , r . join ( p ) ] ) 
 
					 
					 
					 
					                o  =  pd . concat ( [ o , r . join ( p ) ] ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        
 
					 
					 
					 
					 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					           
 
					 
					 
					 
					 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        # for i in np.arange(RUNS): 
 
					 
					 
					 
					 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        #     if 'strict' not in args or ('strict' in args and args['strict'] is False): 
 
					 
					 
					 
					 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        #         n = np.random.randint(2,k) 
 
					 
					 
					 
					 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        #     else: 
 
					 
					 
					 
					 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        #         n = args['field_count'] 
 
					 
					 
					 
					 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        #     cols = np.random.choice(columns,n,replace=False).tolist()             
 
					 
					 
					 
					 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        #     params = {'sample':sample,'cols':cols} 
 
					 
					 
					 
					 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        #     if pop is not None : 
 
					 
					 
					 
					 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        #         params['pop'] = pop 
 
					 
					 
					 
					 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        #     if pop_size > 0  : 
 
					 
					 
					 
					 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        #         params['pop_size'] = pop_size 
 
					 
					 
					 
					 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        #     r = self.evaluate(**params) 
 
					 
					 
					 
					                o [ ' attributes ' ]  =  ' , ' . join ( cols ) 
 
				
			 
			
				
				
			
		
	
		
		
			
				
					
					 
					 
					 
					        #     # 
 
					 
					 
					 
					                # o['attr'] = ','.join(r.apply()) 
 
				
			 
			
				
				
			
		
	
		
		
			
				
					
					 
					 
					 
					        #     # let's put the policy in place 
 
					 
					 
					 
					                _index  + =  1 
 
				
			 
			
				
				
			
		
	
		
		
			
				
					
					 
					 
					 
					        #     p =  pd.DataFrame(1*sample.columns.isin(cols)).T 
 
					 
					 
					 
					        # 
 
				
			 
			
				
				
			
		
	
		
		
			
				
					
					 
					 
					 
					        #     p.columns = sample.columns 
 
					 
					 
					 
					        # We rename flags to policies and adequately number them, we also have a column to summarize the attributes attr 
 
				
			 
			
				
				
			
		
	
		
		
			
				
					
					 
					 
					 
					        #     # o = o.append(r.join(p)) 
 
					 
					 
					 
					        # 
 
				
			 
			
				
				
			
		
	
		
		
			
				
					
					 
					 
					 
					        #     o = pd.concat([o,r.join(p)]) 
 
					 
					 
					 
					           
 
				
			 
			
				
				
			
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					      
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					            
 
					 
					 
					 
					            
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        o . index  =  np . arange ( o . shape [ 0 ] ) . astype ( np . int64 ) 
 
					 
					 
					 
					        o . index  =  np . arange ( o . shape [ 0 ] ) . astype ( np . int64 ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
 
					 
					 
					 
					        o  =  o . rename ( columns = { ' flag ' : ' policies ' } ) 
 
				
			 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					        return  o 
 
					 
					 
					 
					        return  o 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    def  evaluate ( self ,  * * args ) : 
 
					 
					 
					 
					    def  evaluate ( self , * * _args ) : 
 
				
			 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        _measure  =  { } 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        self . init ( * * _args ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        _names  =  [ ' marketer ' , ' journalist ' , ' prosecutor ' ]  #+ (['pitman'] if 'pop_size' in _args else []) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        for  label  in  _names  : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            _pointer  =  getattr ( self , label ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            _measure [ label ]  =  _pointer ( * * _args ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        _measure [ ' fields ' ]  =  self . _compute . cache [ ' count ' ] [ ' fields ' ] 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        _measure [ ' groups ' ]  =  self . _compute . cache [ ' count ' ] [ ' groups ' ] 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        _measure [ ' rows ' ]  =  self . _compute . cache [ ' count ' ] [ ' rows ' ] 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        if  ' attr '  in  _args  : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            _measure  =  dict ( _args [ ' attr ' ] , * * _measure ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        return  pd . DataFrame ( [ _measure ] ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    def  _evaluate ( self ,  * * args ) : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        """ 
 
					 
					 
					 
					        """ 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        This  function  has  the  ability  to  evaluate  risk  associated  with  either  a  population  or  a  sample  dataset 
 
					 
					 
					 
					        This  function  has  the  ability  to  evaluate  risk  associated  with  either  a  population  or  a  sample  dataset 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        : sample  sample  dataset 
 
					 
					 
					 
					        : sample  sample  dataset 
 
				
			 
			
		
	
	
		
		
			
				
					
						
							
								 
							 
						
						
							
								 
							 
						
						
					 
					 
					@ -170,7 +189,7 @@ class deid :
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        r  =  { " flag " : flag } 
 
					 
					 
					 
					        r  =  { " flag " : flag } 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        # if sample : 
 
					 
					 
					 
					        # if sample : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        
 
					 
					 
					 
					        
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        handle_sample    =  Sampl e( )         
 
					 
					 
					 
					        handle_sample    =  Comput e( )         
 
				
			 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					        xi               =  sample . groupby ( cols , as_index = False ) . count ( ) . values 
 
					 
					 
					 
					        xi               =  sample . groupby ( cols , as_index = False ) . count ( ) . values 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        
 
					 
					 
					 
					        
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        handle_sample . set ( ' groups ' , xi ) 
 
					 
					 
					 
					        handle_sample . set ( ' groups ' , xi ) 
 
				
			 
			
		
	
	
		
		
			
				
					
						
							
								 
							 
						
						
							
								 
							 
						
						
					 
					 
					@ -226,7 +245,83 @@ class deid :
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        # 
 
					 
					 
					 
					        # 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        r [ ' field count ' ]  =  len ( cols ) 
 
					 
					 
					 
					        r [ ' field count ' ]  =  len ( cols ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        return  pd . DataFrame ( [ r ] ) 
 
					 
					 
					 
					        return  pd . DataFrame ( [ r ] ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    def  marketer ( self , * * _args ) : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        """ 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        This  function  delegates  the  calls  to  compute  marketer  risk  of  a  given  dataset  or  sample 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        : sample      optional  sample  dataset 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        : columns     optional  columns  of  the  dataset ,  if  non  is  provided  and  inference  will  be  made  using  non - unique  columns 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        """ 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        if  ' pop '  not  in  _args  : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            if  not  ' sample '  in  _args  and  not  ' columns '  in  _args  : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					                # _handler =  self._compute 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					                pass 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            else : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					                
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					                self . init ( * * _args ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					                # _handler = Compute(**_args) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            _handler  =   self . _compute 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        else : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            # 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            # Computing population estimates for the population 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            self . _pcompute . init ( * * _args ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            handler  =  self . _pcompute 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        return  _handler . marketer ( ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    def  journalist ( self , * * _args ) : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        """ 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        This  function  delegates  the  calls  to  compute  journalist  risk  of  a  given  dataset  or  sample 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        : sample      optional  sample  dataset 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        : columns     optional  columns  of  the  dataset ,  if  non  is  provided  and  inference  will  be  made  using  non - unique  columns 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        """ 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        if  ' pop '  not  in  _args  : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            if  not  ' sample '  in  _args  and  not  ' columns '  in  _args  : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					                _handler  =   self . _compute 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            else : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					                self . init ( * * _args ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					                # _handler = Compute(**_args) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            _handler  =  self . _compute 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					                # return _compute.journalist() 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        else : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            self . _pcompute . init ( * * _args ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            _handler  =  self . _pcompute 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        return  _handler . journalist ( ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    def  prosecutor ( self , * * _args ) : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        """ 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        This  function  delegates  the  calls  to  compute  prosecutor  risk  of  a  given  dataset  or  sample 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        : sample      optional  sample  dataset 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        : columns     optional  columns  of  the  dataset ,  if  non  is  provided  and  inference  will  be  made  using  non - unique  columns 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        """ 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        if  ' pop '  not  in  _args  : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            if  not  ' sample '  in  _args  and  not  ' columns '  in  _args  : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					                # _handler =  self._compute 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					                pass 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            else : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					                self . init ( * * _args ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					                # _handler = Compute(**_args) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            _handler  =   self . _compute 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					                
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        else : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            self . _pcompute . init ( * * _args ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            _handler  =  self . _pcompute 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        return  _handler . prosecutor ( ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    def  pitman ( self , * * _args ) : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        if  ' population '  not  in  _args  : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            pop_size  =  int ( _args [ ' pop_size ' ] ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            self . _compute . set ( ' pop_size ' , pop_size ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            _handler  =   self . _compute ; 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        else : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            self . _pcompute . init ( * * _args ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            _handler  =  self . _pcompute 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        return  _handler . pitman ( ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        # xi = pd.DataFrame({"sample_group_size":sample.groupby(cols,as_index=False).count()}).reset_index() 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        # yi = pd.DataFrame({"population_group_size":args['pop'].groupby(cols,as_index=False).size()}).reset_index() 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        # merged_groups = pd.merge(xi,yi,on=cols,how='inner') 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        # handle_population= Population()             
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        # handle_population.set('merged_groups',merged_groups) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					class  Risk  : 
 
					 
					 
					 
					class  Risk  : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    """ 
 
					 
					 
					 
					    """ 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    This  class  is  an  abstraction  of  how  we  chose  to  structure  risk  computation  i . e  in  2  sub  classes : 
 
					 
					 
					 
					    This  class  is  an  abstraction  of  how  we  chose  to  structure  risk  computation  i . e  in  2  sub  classes : 
 
				
			 
			
		
	
	
		
		
			
				
					
						
						
						
							
								 
							 
						
					 
					 
					@ -240,24 +335,44 @@ class Risk :
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					            self . cache [ id ]  =  { } 
 
					 
					 
					 
					            self . cache [ id ]  =  { } 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        self . cache [ key ]  =  value 
 
					 
					 
					 
					        self . cache [ key ]  =  value 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					class  Sampl e( Risk ) : 
 
					 
					 
					 
					class  Comput e( Risk ) : 
 
				
			 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					    """ 
 
					 
					 
					 
					    """ 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    This  class  will  compute  risk  for  the  sample  dataset :  the  marketer  and  prosecutor  risk  are  computed  by  default . 
 
					 
					 
					 
					    This  class  will  compute  risk  for  the  sample  dataset :  the  marketer  and  prosecutor  risk  are  computed  by  default . 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    This  class  can  optionally  add  pitman  risk  if  the  population  size  is  known . 
 
					 
					 
					 
					    This  class  can  optionally  add  pitman  risk  if  the  population  size  is  known . 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    """ 
 
					 
					 
					 
					    """ 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    def  __init__ ( self ) : 
 
					 
					 
					 
					    def  __init__ ( self , * * _args ) : 
 
				
			 
			
				
				
			
		
	
		
		
			
				
					
					 
					 
					 
					        Risk . __init__ ( self ) 
 
					 
					 
					 
					        super ( ) . __init__ ( ) 
 
				
			 
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        self . _sample  =  _args [ ' sample ' ]  if  ' sample '  in  _args  else  pd . DataFrame ( ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        self . _columns =  _args [ ' columns ' ]  if  ' columns '  in  _args  else  None 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        self . cache [ ' count ' ]   =  { ' groups ' : 0 , ' fields ' : 0 , ' rows ' : 0 } 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        if  not  self . _columns  : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            values  =  self . _sample . apply ( lambda  col :  col . unique ( ) . size  /  self . _sample . shape [ 0 ] )             
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            self . _dinfo  =  dict ( zip ( self . _sample . columns . tolist ( ) , values ) ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            self . _columns  =  [ key  for  key  in  self . _dinfo  if  self . _dinfo [ key ]  <  1 ] 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        # 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        # At this point we have all the columns that are valid candidates even if the user didn't specify them 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        self . cache [ ' count ' ] [ ' fields ' ]  =  len ( self . _columns ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        if  self . _sample . shape [ 0 ]  >  0  and  self . _columns : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            _sample  =  _args  [ ' sample ' ] 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            _groups  =  self . _sample . groupby ( self . _columns , as_index = False ) . count ( ) . values 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            self . set ( ' groups ' , _groups ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            self . cache [ ' count ' ] [ ' groups ' ]   =  len ( _groups ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            self . cache [ ' count ' ] [ ' rows ' ]     =  np . sum ( [ _g [ - 1 ]  for  _g  in  _groups ] ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					            
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    def  marketer ( self ) : 
 
					 
					 
					 
					    def  marketer ( self ) : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        """ 
 
					 
					 
					 
					        """ 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        computing  marketer  risk  for  sample  dataset 
 
					 
					 
					 
					        computing  marketer  risk  for  sample  dataset 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        """ 
 
					 
					 
					 
					        """ 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        
 
					 
					 
					 
					        
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					            
 
					 
					 
					 
					        
 
				
			 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					        groups  =  self . cache [ ' groups ' ] 
 
					 
					 
					 
					        groups  =  self . cache [ ' groups ' ] 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        # group_count = groups.size 
 
					 
					 
					 
					        # group_count = groups.size 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        # row_count   = groups.sum() 
 
					 
					 
					 
					        # row_count   = groups.sum() 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        group_count  =  len ( groups ) 
 
					 
					 
					 
					        # group_count = len(groups) 
 
				
			 
			
				
				
			
		
	
		
		
			
				
					
					 
					 
					 
					        row_count  =  np . sum ( [ _g [ - 1 ]  for  _g  in  groups ] ) 
 
					 
					 
					 
					        group_count  =  self . cache [ ' count ' ] [ ' groups ' ] 
 
				
			 
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        # row_count = np.sum([_g[-1] for _g in groups]) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        row_count  =  self . cache [ ' count ' ] [ ' rows ' ] 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        return  group_count  /  np . float64 ( row_count ) 
 
					 
					 
					 
					        return  group_count  /  np . float64 ( row_count ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    def  prosecutor ( self ) : 
 
					 
					 
					 
					    def  prosecutor ( self ) : 
 
				
			 
			
		
	
	
		
		
			
				
					
						
						
						
							
								 
							 
						
					 
					 
					@ -272,40 +387,52 @@ class Sample(Risk):
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    def  unique_ratio ( self ) : 
 
					 
					 
					 
					    def  unique_ratio ( self ) : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        groups  =  self . cache [ ' groups ' ]         
 
					 
					 
					 
					        groups  =  self . cache [ ' groups ' ]         
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        # row_count = groups.sum() 
 
					 
					 
					 
					        # row_count = groups.sum() 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        row_count  =  np . sum ( [ _g [ - 1 ]  for  _g  in  groups ] ) 
 
					 
					 
					 
					        # row_count = np.sum([_g[-1] for _g in groups]) 
 
				
			 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        row_count  =  self . cache [ ' count ' ] [ ' rows ' ] 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        # return groups[groups == 1].sum() / np.float64(row_count) 
 
					 
					 
					 
					        # return groups[groups == 1].sum() / np.float64(row_count) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        values  =  [ _g [ - 1 ]  for  _g  in  groups  if  _g [ - 1 ]  ==  1 ] 
 
					 
					 
					 
					        values  =  [ _g [ - 1 ]  for  _g  in  groups  if  _g [ - 1 ]  ==  1 ] 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        
 
					 
					 
					 
					        
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        return  np . sum ( values )  /  np . float64 ( row_count ) 
 
					 
					 
					 
					        return  np . sum ( values )  /  np . float64 ( row_count ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					    def  journalist ( self ) : 
 
				
			 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        return  self . unique_ratio ( ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    def  pitman ( self ) : 
 
					 
					 
					 
					    def  pitman ( self ) : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        """ 
 
					 
					 
					 
					        """ 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        This  function  will  approximate  pitman  de - identification  risk  based  on  pitman  sampling 
 
					 
					 
					 
					        This  function  will  approximate  pitman  de - identification  risk  based  on  pitman  sampling 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        """ 
 
					 
					 
					 
					        """ 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        groups  =  self . cache [ ' groups ' ] 
 
					 
					 
					 
					        groups  =  self . cache [ ' groups ' ] 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        print  ( self . cache [ ' pop_size ' ] ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        si  =  groups [ groups  ==  1 ] . size 
 
					 
					 
					 
					        si  =  groups [ groups  ==  1 ] . size 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        # u = groups.size 
 
					 
					 
					 
					        # u = groups.size 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        u  =  len ( groups ) 
 
					 
					 
					 
					        u  =  len ( groups ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        alpha  =  np . divide ( si  ,  np . float64 ( u )  ) 
 
					 
					 
					 
					        alpha  =  np . divide ( si  ,  np . float64 ( u )  ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        row_count  =  np . sum ( [ _g [ - 1 ]  for  _g  in  groups ] ) 
 
					 
					 
					 
					        # row_count = np.sum([_g[-1] for _g in groups]) 
 
				
			 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        row_count  =  self . cache [ ' count ' ] [ ' rows ' ] 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        # f = np.divide(groups.sum(), np.float64(self.cache['pop_size'])) 
 
					 
					 
					 
					        # f = np.divide(groups.sum(), np.float64(self.cache['pop_size'])) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        f  =  np . divide ( row_count ,  np . float64 ( self . cache [ ' pop_size ' ] ) ) 
 
					 
					 
					 
					        f  =  np . divide ( row_count ,  np . float64 ( self . cache [ ' pop_size ' ] ) ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        return  np . power ( f , 1 - alpha ) 
 
					 
					 
					 
					        return  np . power ( f , 1 - alpha ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					class  Population ( Sampl e) : 
 
					 
					 
					 
					class  Population ( Comput e) : 
 
				
			 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					    """ 
 
					 
					 
					 
					    """ 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    This  class  will  compute  risk  for  datasets  that  have  population  information  or  datasets  associated  with  them . 
 
					 
					 
					 
					    This  class  will  compute  risk  for  datasets  that  have  population  information  or  datasets  associated  with  them . 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    This  computation  includes  pitman  risk  ( it  requires  minimal  information  about  population ) 
 
					 
					 
					 
					    This  computation  includes  pitman  risk  ( it  requires  minimal  information  about  population ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    """ 
 
					 
					 
					 
					    """ 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    def  __init__ ( self , * * args ) : 
 
					 
					 
					 
					    def  __init__ ( self , * * _args ) : 
 
				
			 
			
				
				
			
		
	
		
		
			
				
					
					 
					 
					 
					        Sample . __init__ ( self ) 
 
					 
					 
					 
					        super ( ) . __init__ ( * * _args ) 
 
				
			 
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					    def  init ( self , * * _args ) : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        xi  =  pd . DataFrame ( { " sample_group_size " : self . _sample . groupby ( self . _columns , as_index = False ) . count ( ) } ) . reset_index ( ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        yi  =  pd . DataFrame ( { " population_group_size " : _args [ ' population ' ] . groupby ( self . _columns , as_index = False ) . size ( ) } ) . reset_index ( ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        merged_groups  =  pd . merge ( xi , yi , on = self . _columns , how = ' inner ' )                    
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        self . set ( ' merged_groups ' , merged_groups ) 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					
 
					 
					 
					 
					
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    def  set ( self , key , value ) : 
 
					 
					 
					 
					    def  set ( self , key , value ) : 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        Sample . set ( self , key , value ) 
 
					 
					 
					 
					        self . set ( self , key , value ) 
 
				
			 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					        if  key  ==  ' merged_groups '  :   
 
					 
					 
					 
					        if  key  ==  ' merged_groups '  :   
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					               
 
					 
					 
					 
					               
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					            Sample . set ( self , ' pop_size ' , np . float64 ( value . population_group_size . sum ( ) )  ) 
 
					 
					 
					 
					            self . set ( self , ' pop_size ' , np . float64 ( value . population_group_size . sum ( ) )  ) 
 
				
			 
			
				
				
			
		
	
		
		
			
				
					
					 
					 
					 
					            Sample . set ( self , ' groups ' , value . sample_group_size ) 
 
					 
					 
					 
					            self . set ( self , ' groups ' , value . sample_group_size ) 
 
				
			 
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					 
					 
					 
					    """ 
 
					 
					 
					 
					    """ 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    This  class  will  measure  risk  and  account  for  the  existance  of  a  population 
 
					 
					 
					 
					    This  class  will  measure  risk  and  account  for  the  existance  of  a  population 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					    : merged_groups  { sample_group_size ,  population_group_size }  is  a  merged  dataset  with  group  sizes  of  both  population  and  sample 
 
					 
					 
					 
					    : merged_groups  { sample_group_size ,  population_group_size }  is  a  merged  dataset  with  group  sizes  of  both  population  and  sample 
 
				
			 
			
		
	
	
		
		
			
				
					
						
						
						
							
								 
							 
						
					 
					 
					@ -314,6 +441,7 @@ class Population(Sample):
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        """ 
 
					 
					 
					 
					        """ 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        This  function  requires 
 
					 
					 
					 
					        This  function  requires 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        """ 
 
					 
					 
					 
					        """ 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					 
					 
					 
					 
					        
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        r  =  self . cache [ ' merged_groups ' ] 
 
					 
					 
					 
					        r  =  self . cache [ ' merged_groups ' ] 
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        sample_row_count  =  r . sample_group_size . sum ( )  
 
					 
					 
					 
					        sample_row_count  =  r . sample_group_size . sum ( )  
 
				
			 
			
		
	
		
		
			
				
					
					 
					 
					 
					        # 
 
					 
					 
					 
					        #