privacykit/risk/risk.py

"""
    Health Information Privacy Lab
    @TODO:
        sample = args['sample'] if 'sample' in args else pd.DataFrame(self._df)
        if not args  or 'cols' not in args:
            merged_groups = pd.merge(xi,yi,on=cols,how='inner')
            handle_population= Population()            
            handle_population.set('merged_groups',merged_groups)
            
            r['pop. marketer'] = handle_population.marketer()            
            r['pitman risk'] = handle_population.pitman()
            r['pop. group size'] = np.unique(yi.population_group_size).size
        #
        # At this point we have both columns for either sample,population or both
        #
        r['field count'] = len(cols)
        return pd.DataFrame([r])

class Risk :
    """
    This class is an abstraction of how we chose to structure risk computation i.e in 2 sub classes:
        - Sample        computes risk associated with a sample dataset only
        - Population    computes risk associated with a population
    """
    def __init__(self):
        self.cache = {}        
    def set(self,key,value):        
        if id not in self.cache :
            self.cache[id] = {}
        self.cache[key] = value

class Sample(Risk):
    """
    This class will compute risk for the sample dataset: the marketer and prosecutor risk are computed by default.
    This class can optionally add pitman risk if the population size is known.
    """
    def __init__(self):
        Risk.__init__(self)
    def marketer(self):

        sample_row_count = r.sample_group_size.sum() 
        #
        # @TODO : make sure the above line is size (not sum)
        # sample_row_count = r.sample_group_size.size
        return r.apply(lambda row: (row.sample_group_size / np.float64(row.population_group_size)) /np.float64(sample_row_count) ,axis=1).sum()
pandas extension for risk 6 years ago			`"""`
			`Health Information Privacy Lab`
added pitman risk, and refactored some code 6 years ago			`@TODO:`
pandas extension for risk 6 years ago			`sample = args['sample'] if 'sample' in args else pd.DataFrame(self._df)`
Refactored, including population risk assessment 6 years ago			`if not args or 'cols' not in args:`
added pitman risk, and refactored some code 6 years ago			`merged_groups = pd.merge(xi,yi,on=cols,how='inner')`
			`handle_population= Population()`
			`handle_population.set('merged_groups',merged_groups)`

			`r['pop. marketer'] = handle_population.marketer()`
			`r['pitman risk'] = handle_population.pitman()`
			`r['pop. group size'] = np.unique(yi.population_group_size).size`
			`#`
			`# At this point we have both columns for either sample,population or both`
			`#`
			`r['field count'] = len(cols)`
			`return pd.DataFrame([r])`

			`class Risk :`
			`"""`
			`This class is an abstraction of how we chose to structure risk computation i.e in 2 sub classes:`
			`- Sample computes risk associated with a sample dataset only`
			`- Population computes risk associated with a population`
			`"""`
			`def __init__(self):`
			`self.cache = {}`
			`def set(self,key,value):`
			`if id not in self.cache :`
			`self.cache[id] = {}`
			`self.cache[key] = value`

			`class Sample(Risk):`
			`"""`
			`This class will compute risk for the sample dataset: the marketer and prosecutor risk are computed by default.`
			`This class can optionally add pitman risk if the population size is known.`
			`"""`
			`def __init__(self):`
			`Risk.__init__(self)`
			`def marketer(self):`
pandas extension for risk 6 years ago
added pitman risk, and refactored some code 6 years ago			`sample_row_count = r.sample_group_size.sum()`
			`#`
			`# @TODO : make sure the above line is size (not sum)`
			`# sample_row_count = r.sample_group_size.size`
			`return r.apply(lambda row: (row.sample_group_size / np.float64(row.population_group_size)) /np.float64(sample_row_count) ,axis=1).sum()`