forked from leo-mazz/crowds
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ola.py
55 lines (48 loc) · 1.6 KB
/
ola.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import pandas as pd
from crowds.kanonymity.ola import anonymize
from crowds.kanonymity.information_loss import dm_star_loss
from crowds.kanonymity.generalizations import GenRule
# Globally pandas printing options: Show all columns and rows if displaying tables.
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
# Prevent line breaks of rows
pd.set_option('display.expand_frame_repr', False)
column_names = [
'age',
'workClass',
'fnlwgt',
'education',
'education-num',
'marital-status',
'occupation',
'relationship',
'race',
'sex',
'capital-gain',
'capital-loss',
'hours-per-week',
'native-country',
'income'
]
def generalize_age(value):
if value > 60:
return 'quite old'
if value > 35 and value <= 60:
return 'a bit old'
if value > 20 and value <= 35:
return 'young'
if value <= 20:
return 'too young'
generalization_rules = {
'age': GenRule([generalize_age]), # 2-levels generalization
'sex': GenRule([]), # 1-level generalization
}
adult = pd.read_csv('../adult.csv', names=column_names, sep=' *, *', na_values='?', engine='python')
adult_anonymized, transformation = anonymize(adult, generalization_rules=generalization_rules, k=10, max_sup=0.0, info_loss=dm_star_loss)
print(adult_anonymized.head())
# Another example with different generalization rules
generalization_rules = {
'race': GenRule([])
}
adult_anonymized, transformation = anonymize(adult, generalization_rules=generalization_rules, k=10, max_sup=0.0, info_loss=dm_star_loss)
print(adult_anonymized.head())