privacykit/risk/risk.py

"""
    Health Information Privacy Lab
    @TODO:
        sample = args['sample'] if 'sample' in args else pd.DataFrame(self._df)
            
            r['pop. marketer'] = handle_population.marketer()            
            r['pitman risk'] = handle_population.pitman()
            r['pop. group size'] = np.unique(yi.population_group_size).size
        #
        # At this point we have both columns for either sample,population or both
        #
        r['field count'] = len(cols)
        return pd.DataFrame([r])

class Risk :
    """
    This class is an abstraction of how we chose to structure risk computation i.e in 2 sub classes:
        - Sample        computes risk associated with a sample dataset only
        - Population    computes risk associated with a population
    """
    def __init__(self):
        self.cache = {}        
    def set(self,key,value):        
        if id not in self.cache :
            self.cache[id] = {}
        self.cache[key] = value

class Sample(Risk):
    """
    This class will compute risk for the sample dataset: the marketer and prosecutor risk are computed by default.
    This class can optionally add pitman risk if the population size is known.
    """
    def __init__(self):
        Risk.__init__(self)
    def marketer(self):
        It identifies if there is at least one record that is unique
        # sample_row_count = r.sample_group_size.size
        return r.apply(lambda row: (row.sample_group_size / np.float64(row.population_group_size)) /np.float64(sample_row_count) ,axis=1).sum()
pandas extension for risk 6 years ago			`"""`
			`Health Information Privacy Lab`
added pitman risk, and refactored some code 6 years ago			`@TODO:`
pandas extension for risk 6 years ago			`sample = args['sample'] if 'sample' in args else pd.DataFrame(self._df)`
added pitman risk, and refactored some code 6 years ago
			`r['pop. marketer'] = handle_population.marketer()`
			`r['pitman risk'] = handle_population.pitman()`
			`r['pop. group size'] = np.unique(yi.population_group_size).size`
			`#`
			`# At this point we have both columns for either sample,population or both`
			`#`
			`r['field count'] = len(cols)`
			`return pd.DataFrame([r])`

			`class Risk :`
			`"""`
			`This class is an abstraction of how we chose to structure risk computation i.e in 2 sub classes:`
			`- Sample computes risk associated with a sample dataset only`
			`- Population computes risk associated with a population`
			`"""`
			`def __init__(self):`
			`self.cache = {}`
			`def set(self,key,value):`
			`if id not in self.cache :`
			`self.cache[id] = {}`
			`self.cache[key] = value`

			`class Sample(Risk):`
			`"""`
			`This class will compute risk for the sample dataset: the marketer and prosecutor risk are computed by default.`
			`This class can optionally add pitman risk if the population size is known.`
			`"""`
			`def __init__(self):`
			`Risk.__init__(self)`
			`def marketer(self):`
pandas extension for risk 6 years ago			`It identifies if there is at least one record that is unique`
added pitman risk, and refactored some code 6 years ago			`# sample_row_count = r.sample_group_size.size`
			`return r.apply(lambda row: (row.sample_group_size / np.float64(row.population_group_size)) /np.float64(sample_row_count) ,axis=1).sum()`