Skip to content

Commit

Permalink
feat: Privacy Preserving Learning (#3334)
Browse files Browse the repository at this point in the history
  • Loading branch information
manavsinghal157 authored Nov 30, 2021
1 parent 8cc6e3c commit f0e16ad
Show file tree
Hide file tree
Showing 25 changed files with 572 additions and 13 deletions.
24 changes: 24 additions & 0 deletions .github/workflows/build_vw_privacy_activation.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: Linux / C++ - VW with privacy activation

on:
push:
branches:
- master
- 'releases/**'
pull_request:
branches:
- '*'

jobs:
check:
container:
image: vowpalwabbit/ubuntu1804-build:latest
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- name: Build VW with privacy activation
shell: bash
run: ./.scripts/linux/build-privacy_activation.sh
- name: Test VW with privacy activation
shell: bash
run: ./.scripts/linux/test-privacy_activation.sh
11 changes: 11 additions & 0 deletions .scripts/linux/build-privacy_activation.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash
set -e
set -x

SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
REPO_DIR=$SCRIPT_DIR/../../
cd $REPO_DIR

rm -rf build
cmake -S . -B build -G Ninja -DCMAKE_BUILD_TYPE=Debug -DWARNINGS=OFF -DBUILD_PRIVACY_ACTIVATION=On -DBUILD_JAVA=Off -DBUILD_PYTHON=Off -DBUILD_TESTS=On -DBUILD_FLATBUFFERS=Off
cmake --build build --target vw-bin vw-unit-test.out
13 changes: 13 additions & 0 deletions .scripts/linux/test-privacy_activation.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash
set -e
set -x

SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
REPO_DIR=$SCRIPT_DIR/../../
cd $REPO_DIR

cd test
python3 run_tests.py -f --skip_spanning_tree_tests -j $(nproc) --test_spec privacy_activation.vwtest.json

cd ../build
./test/unit_test/vw-unit-test.out --run_test=test_feature_is_activated*,test_feature_not_activated*,test_feature_could_be_activated_but_feature_not_initialized*
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ option(RAPIDJSON_SYS_DEP "Override using the submodule for RapidJSON dependency.
option(FMT_SYS_DEP "Override using the submodule for FMT dependency. Instead will use find_package" OFF)
option(SPDLOG_SYS_DEP "Override using the submodule for spdlog dependency. Instead will use find_package" OFF)
option(BUILD_FLATBUFFERS "Build flatbuffers" OFF)
option(BUILD_PRIVACY_ACTIVATION "Enable privacy activation feature" OFF)

string(TOUPPER "${CMAKE_BUILD_TYPE}" CONFIG)

Expand Down
9 changes: 9 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,13 @@ if(NOT WIN32)
COMMAND python3 run_tests.py --ignore_dirty --test_spec ${CMAKE_CURRENT_SOURCE_DIR}/slow.vwtest.json
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
)

if(BUILD_PRIVACY_ACTIVATION)
add_test(
NAME privacy_activation_tests

COMMAND python3 run_tests.py --ignore_dirty --test_spec ${CMAKE_CURRENT_SOURCE_DIR}/privacy_activation.vwtest.json
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
)
endif()
endif()
28 changes: 27 additions & 1 deletion test/benchmarks/standalone/benchmark_text_input.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ static void benchmark_cb_adf_learn(benchmark::State& state, int feature_count)
{
auto vw = VW::initialize("--cb_explore_adf --epsilon 0.1 --quiet -q ::", nullptr, false, nullptr, nullptr);
multi_ex examples;
examples.push_back(VW::read_example(*vw, std::string("shared | s_1 s_2")));
examples.push_back(VW::read_example(*vw, std::string("shared tag1| s_1 s_2")));
examples.push_back(VW::read_example(*vw, get_x_string_fts(feature_count)));
examples.push_back(VW::read_example(*vw, get_x_string_fts_no_label(feature_count)));
examples.push_back(VW::read_example(*vw, get_x_string_fts_no_label(feature_count)));
Expand All @@ -65,6 +65,27 @@ static void benchmark_cb_adf_learn(benchmark::State& state, int feature_count)
VW::finish(*vw);
}

#ifdef PRIVACY_ACTIVATION
static void benchmark_cb_adf_learn_privacy_preserving(benchmark::State& state, int feature_count)
{
auto vw = VW::initialize(
"--privacy_activation --cb_explore_adf --epsilon 0.1 --quiet -q ::", nullptr, false, nullptr, nullptr);
multi_ex examples;
examples.push_back(VW::read_example(*vw, std::string("shared tag1| s_1 s_2")));
examples.push_back(VW::read_example(*vw, get_x_string_fts(feature_count)));
examples.push_back(VW::read_example(*vw, get_x_string_fts_no_label(feature_count)));
examples.push_back(VW::read_example(*vw, get_x_string_fts_no_label(feature_count)));

for (auto _ : state)
{
vw->learn(examples);
benchmark::ClobberMemory();
}
vw->finish_example(examples);
VW::finish(*vw);
}
#endif

static void benchmark_ccb_adf_learn(benchmark::State& state, std::string feature_string)
{
auto vw = VW::initialize("--ccb_explore_adf --quiet", nullptr, false, nullptr, nullptr);
Expand Down Expand Up @@ -225,6 +246,11 @@ BENCHMARK_CAPTURE(benchmark_ccb_adf_learn, many_features, "a b c d e f g h i j k
BENCHMARK_CAPTURE(benchmark_cb_adf_learn, few_features, 2);
BENCHMARK_CAPTURE(benchmark_cb_adf_learn, many_features, 120);

#ifdef PRIVACY_ACTIVATION
BENCHMARK_CAPTURE(benchmark_cb_adf_learn_privacy_preserving, few_features, 2);
BENCHMARK_CAPTURE(benchmark_cb_adf_learn_privacy_preserving, many_features, 120);
#endif

BENCHMARK_CAPTURE(benchmark_multi, cb_adf_no_namespaces, gen_cb_examples(100, 7, 3, 6, 1, 4, 14, 2, false),
"--cb_explore_adf --quiet");
BENCHMARK_CAPTURE(benchmark_multi, cb_adf_diff_char_no_interactions, gen_cb_examples(100, 7, 3, 6, 3, 4, 14, 2, false),
Expand Down
15 changes: 15 additions & 0 deletions test/pred-sets/ref/readable_model_privacy.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
Version 8.11.0
Id
Min label:0
Max label:1
bits:18
lda:0
0 ngram:
0 skip:
options: --cb_adf --cb_explore_adf --cb_max_cost 1 --cb_min_cost 0 --cb_type mtr --cbify 2 --csoaa_ldf multiline --csoaa_rank
Checksum: 2467443617
event_sum 31
action_sum 62
:0
Constant:116060:0.115805
impression:236580:0.0376027
13 changes: 13 additions & 0 deletions test/pred-sets/ref/readable_model_privacy_no_tags.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Version 8.11.0
Id
Min label:0
Max label:1
bits:18
lda:0
0 ngram:
0 skip:
options: --cb_adf --cb_explore_adf --cb_max_cost 1 --cb_min_cost 0 --cb_type mtr --cbify 2 --csoaa_ldf multiline --csoaa_rank
Checksum: 2467443617
event_sum 31
action_sum 62
:0
26 changes: 26 additions & 0 deletions test/privacy_activation.vwtest.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
[
{
"id": 1,
"desc": "test privacy_activation command line argument",
"vw_command": "--privacy_activation -d train-sets/privacy_dataset --invert_hash readable_model_privacy.txt --cbify 2 --cb_adf",
"diff_files": {
"stderr": "train-sets/ref/privacy_dataset.stderr",
"readable_model_privacy.txt": "pred-sets/ref/readable_model_privacy.txt"
},
"input_files": [
"train-sets/privacy_dataset"
]
},
{
"id": 2,
"desc": "test privacy_activation command line argument with no tags input",
"vw_command": "--privacy_activation -d train-sets/privacy_dataset_no_tags --invert_hash readable_model_privacy_no_tags.txt --cbify 2 --cb_adf",
"diff_files": {
"stderr": "train-sets/ref/privacy_no_tags.stderr",
"readable_model_privacy_no_tags.txt": "pred-sets/ref/readable_model_privacy_no_tags.txt"
},
"input_files": [
"train-sets/privacy_dataset_no_tags"
]
}
]
31 changes: 31 additions & 0 deletions test/train-sets/privacy_dataset
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
1 0| impression:7 url_hash=1.4340390157469403E19 ad_id=3529332 advertiser_id=23777 depth=1 position=1 query_id=6835664 keyword_id=40967 title_id=338 description_id=460
2 9308047| impression:2 url_hash=7.1374637970466191E18 ad_id=21340345 advertiser_id=36665 depth=2 position=2 query_id=11756 keyword_id=19187 title_id=86209 description_id=675
1 3129923| impression:1 url_hash=1.205787899908646E19 ad_id=20118104 advertiser_id=27961 depth=2 position=1 query_id=8727954 keyword_id=7197 title_id=23615 description_id=25005
2 2213015| impression:4 url_hash=1.5989049276530887E19 ad_id=20313518 advertiser_id=23828 depth=2 position=1 query_id=2999 keyword_id=19270 title_id=16818 description_id=140
1 4679407| impression:1 url_hash=1.056945094151204E19 ad_id=21344630 advertiser_id=36635 depth=2 position=2 query_id=15889 keyword_id=766 title_id=502 description_id=670
1 14071245| impression:1 url_hash=1.5989049276530887E19 ad_id=20313578 advertiser_id=23828 depth=2 position=1 query_id=2524 keyword_id=18509 title_id=16003 description_id=140
1 26293| impression:1 url_hash=1.3111764113225392E19 ad_id=21273410 advertiser_id=36431 depth=1 position=1 query_id=67439 keyword_id=2293 title_id=76752 description_id=124493
1 6006964| impression:2 url_hash=1.4340390157469403E19 ad_id=3061636 advertiser_id=23777 depth=2 position=2 query_id=243636 keyword_id=400 title_id=929 description_id=1135
1 0| impression:1 url_hash=1.2837504198869455E19 ad_id=21116674 advertiser_id=30627 depth=1 position=1 query_id=19184793 keyword_id=46299 title_id=222209 description_id=1866
2 73978| impression:1 url_hash=1.81756187683264358E18 ad_id=6561197 advertiser_id=27101 depth=2 position=1 query_id=4382353 keyword_id=2814 title_id=70719 description_id=63264
1 1224594| impression:1 url_hash=1.4340390157469403E19 ad_id=20643955 advertiser_id=23808 depth=3 position=3 query_id=2407448 keyword_id=2651 title_id=466 description_id=488
1 4723775| impression:1 url_hash=5.9305714623157504E18 ad_id=4372879 advertiser_id=10986 depth=3 position=3 query_id=143 keyword_id=148 title_id=16192 description_id=6791
1 113819| impression:1 url_hash=1.7147568955774638E19 ad_id=21099609 advertiser_id=35384 depth=2 position=2 query_id=13417497 keyword_id=1136124 title_id=3571642 description_id=620008
1 0| impression:1 url_hash=9.75107224858461E18 ad_id=10850233 advertiser_id=29713 depth=3 position=3 query_id=3116643 keyword_id=237 title_id=10019 description_id=5780
1 0| impression:1 url_hash=1.205787899908646E19 ad_id=20157182 advertiser_id=27961 depth=2 position=2 query_id=283900 keyword_id=103 title_id=327 description_id=367
1 7826618| impression:3 url_hash=1.2871563470264744E19 ad_id=21320290 advertiser_id=35831 depth=2 position=1 query_id=33 keyword_id=78 title_id=229 description_id=327
1 22689| impression:1 url_hash=1.4340390157469403E19 ad_id=3200112 advertiser_id=23777 depth=1 position=1 query_id=897541 keyword_id=1183 title_id=3947 description_id=90
1 16527503| impression:1 url_hash=8.1342641745108931E18 ad_id=4176358 advertiser_id=1268 depth=2 position=1 query_id=10420695 keyword_id=58 title_id=130 description_id=213
1 9165726| impression:1 url_hash=1.7740959527443177E18 ad_id=20874956 advertiser_id=34761 depth=2 position=1 query_id=386 keyword_id=421 title_id=16682 description_id=17960
2 6192425| impression:1 url_hash=2.504651598567297E18 ad_id=22073678 advertiser_id=35668 depth=2 position=2 query_id=1346716 keyword_id=18038 title_id=111554 description_id=97582
1 0| impression:1 url_hash=1.3756257544627677E19 ad_id=8184251 advertiser_id=24354 depth=1 position=1 query_id=6274506 keyword_id=149672 title_id=462298 description_id=18035
1 0| impression:1 url_hash=7.7032790697015429E18 ad_id=5436087 advertiser_id=4983 depth=2 position=2 query_id=4650166 keyword_id=22385 title_id=218875 description_id=190932
1 6358009| impression:1 url_hash=1.7363854844105064E19 ad_id=20017078 advertiser_id=23798 depth=2 position=2 query_id=1878 keyword_id=3043 title_id=4 description_id=5519
1 0| impression:3 url_hash=6.7150490022326559E18 ad_id=3065545 advertiser_id=23783 depth=1 position=1 query_id=6372660 keyword_id=45 title_id=40 description_id=53
1 256225| impression:1 url_hash=2.3553977033639875E18 ad_id=21965276 advertiser_id=38207 depth=3 position=2 query_id=17634 keyword_id=27752 title_id=349595 description_id=287606
1 1085919| impression:1 url_hash=1.130902570404585E19 ad_id=22089370 advertiser_id=38263 depth=3 position=3 query_id=5 keyword_id=3 title_id=64 description_id=147
2 1822049| impression:1 url_hash=8.9945570705085798E18 ad_id=20030165 advertiser_id=23799 depth=2 position=1 query_id=1076 keyword_id=28 title_id=41 description_id=52
1 6203365| impression:1 url_hash=1.205787899908646E19 ad_id=20192676 advertiser_id=27961 depth=2 position=1 query_id=63 keyword_id=39 title_id=72 description_id=99
1 0| impression:1 url_hash=1.205787899908646E19 ad_id=20221106 advertiser_id=27961 depth=2 position=2 query_id=217607 keyword_id=104698 title_id=1043071 description_id=1065346
1 1117369| impression:1 url_hash=1.3756257544627677E19 ad_id=8184910 advertiser_id=24354 depth=3 position=1 query_id=61977 keyword_id=103255 title_id=95131 description_id=2619
2 0| impression:3 url_hash=5.5111324610218004E18 ad_id=21245019 advertiser_id=10040 depth=1 position=1 query_id=17134 keyword_id=4162 title_id=9593 description_id=33066
31 changes: 31 additions & 0 deletions test/train-sets/privacy_dataset_no_tags
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
1 | impression:1 url_hash=1.0710025208886284E19 ad_id=8343295 advertiser_id=11700 depth=3 position=3 query_id=7702266 keyword_id=21264 title_id=27892 description_id=1559
2 | impression:1 url_hash=1.7363854844105064E19 ad_id=20017077 advertiser_id=23798 depth=1 position=1 query_id=93079 keyword_id=35498 title_id=4 description_id=36476
1 | impression:1 url_hash=8.9154733542573404E18 ad_id=21348354 advertiser_id=36654 depth=1 position=1 query_id=10981 keyword_id=19975 title_id=36105 description_id=33292
1 | impression:1 url_hash=4.4266930771924316E18 ad_id=20366086 advertiser_id=33280 depth=3 position=3 query_id=0 keyword_id=5942 title_id=4057 description_id=4390
1 | impression:1 url_hash=1.157259655396508E19 ad_id=6803526 advertiser_id=10790 depth=2 position=1 query_id=9881978 keyword_id=60593 title_id=25242 description_id=1679
2 | impression:1 url_hash=2.82757736623248544E17 ad_id=21186478 advertiser_id=35793 depth=2 position=1 query_id=163315 keyword_id=4871 title_id=3257 description_id=1153
1 | impression:1 url_hash=8.813902859733762E18 ad_id=20886690 advertiser_id=34840 depth=2 position=2 query_id=316 keyword_id=543 title_id=2206 description_id=2888
1 | impression:1 url_hash=3.8110346841213204E18 ad_id=21367376 advertiser_id=20667 depth=3 position=2 query_id=2601439 keyword_id=118 title_id=9594 description_id=9705
1 | impression:1 url_hash=9.806838428950888E18 ad_id=21811752 advertiser_id=37737 depth=3 position=2 query_id=1631 keyword_id=333 title_id=841 description_id=2175
2 | impression:1 url_hash=1.4340390157469403E19 ad_id=9027213 advertiser_id=23808 depth=2 position=1 query_id=5 keyword_id=1 title_id=0 description_id=0
1 | impression:1 url_hash=1.154854293114536E19 ad_id=21188630 advertiser_id=17432 depth=3 position=3 query_id=54485 keyword_id=20526 title_id=680977 description_id=621442
2 | impression:3 url_hash=1.205787899908646E19 ad_id=20170434 advertiser_id=27961 depth=1 position=1 query_id=16554377 keyword_id=63355 title_id=106189 description_id=100521
1 | impression:1 url_hash=2.352816826109845E18 ad_id=10362063 advertiser_id=28873 depth=2 position=1 query_id=3203 keyword_id=38 title_id=1150 description_id=1129
1 | impression:1 url_hash=5.1206834405104681E18 ad_id=8676724 advertiser_id=1268 depth=1 position=1 query_id=1330 keyword_id=40 title_id=45 description_id=13
1 | impression:1 url_hash=5.5896494124657367E18 ad_id=20950936 advertiser_id=35067 depth=1 position=1 query_id=637 keyword_id=9563 title_id=8908 description_id=42
1 | impression:2 url_hash=1.205787899908646E19 ad_id=20163224 advertiser_id=27961 depth=1 position=1 query_id=24027 keyword_id=1003 title_id=3718 description_id=4300
1 | impression:1 url_hash=1.72996384937738752E18 ad_id=20882079 advertiser_id=23637 depth=2 position=2 query_id=1 keyword_id=659 title_id=890 description_id=227
1 | impression:1 url_hash=1.4935292746090631E19 ad_id=21967731 advertiser_id=37465 depth=3 position=3 query_id=743855 keyword_id=64605 title_id=774277 description_id=125
1 | impression:1 url_hash=1.432575689518243E19 ad_id=5937082 advertiser_id=24161 depth=2 position=1 query_id=8789369 keyword_id=3660 title_id=230 description_id=350
1 | impression:2 url_hash=6.7150490022326559E18 ad_id=3065545 advertiser_id=23783 depth=1 position=1 query_id=1060 keyword_id=60 title_id=54 description_id=78
2 | impression:1 url_hash=5.8512528144469361E18 ad_id=21477611 advertiser_id=28698 depth=1 position=1 query_id=13619350 keyword_id=1372 title_id=5269 description_id=5590
1 | impression:1 url_hash=1.7916193598630554E19 ad_id=21720811 advertiser_id=36961 depth=2 position=1 query_id=1644095 keyword_id=6655 title_id=257582 description_id=214940
1 | impression:1 url_hash=1.1547636679726547E19 ad_id=21229143 advertiser_id=36219 depth=2 position=2 query_id=118476 keyword_id=18740 title_id=184256 description_id=1155
1 | impression:1 url_hash=1.205787899908646E19 ad_id=20147104 advertiser_id=27961 depth=2 position=1 query_id=21877 keyword_id=10134 title_id=12022 description_id=13371
1 | impression:1 url_hash=3.1639974203144361E18 ad_id=21445735 advertiser_id=26288 depth=3 position=3 query_id=592 keyword_id=43 title_id=389 description_id=150
1 | impression:1 url_hash=9.75107224858461E18 ad_id=10850160 advertiser_id=29713 depth=2 position=2 query_id=274203 keyword_id=52559 title_id=299304 description_id=280190
1 | impression:1 url_hash=1.58319745024802848E17 ad_id=4383725 advertiser_id=10632 depth=2 position=1 query_id=4146195 keyword_id=99 title_id=3925 description_id=11188
2 | impression:1 url_hash=1.65645934293210163E18 ad_id=4292714 advertiser_id=24638 depth=2 position=2 query_id=11818280 keyword_id=378070 title_id=829668 description_id=700216
1 | impression:1 url_hash=2.6709527232789048E18 ad_id=20172890 advertiser_id=23805 depth=1 position=1 query_id=322 keyword_id=73 title_id=304 description_id=16
1 | impression:1 url_hash=1.4340390157469403E19 ad_id=10399705 advertiser_id=23808 depth=2 position=2 query_id=11943445 keyword_id=53500 title_id=12335 description_id=12184
1 | impression:1 url_hash=1.0974309841212465E19 ad_id=21326131 advertiser_id=36538 depth=3 position=3 query_id=2 keyword_id=142 title_id=9282 description_id=8539
24 changes: 24 additions & 0 deletions test/train-sets/ref/privacy_dataset.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
Num weight bits = 18
learning rate = 0.5
initial_t = 0
power_t = 0.5
using no cache
Reading datafile = train-sets/privacy_dataset
num sources = 1
Enabled reductions: gd, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, shared_feature_merger, cbify-adf
Input label = multiclass
Output pred = multiclass
average since example example current current current
loss last counter weight label predict features
0.000000 0.000000 1 1.0 1 1 11
0.000000 0.000000 2 2.0 2 2 11
0.000000 0.000000 4 4.0 2 2 11
0.125000 0.250000 8 8.0 1 1 11
0.187500 0.250000 16 16.0 1 1 11

finished run
number of examples = 31
weighted example sum = 31.000000
weighted label sum = 0.000000
average loss = 0.193548
total feature number = 341
24 changes: 24 additions & 0 deletions test/train-sets/ref/privacy_no_tags.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
Num weight bits = 18
learning rate = 0.5
initial_t = 0
power_t = 0.5
using no cache
Reading datafile = train-sets/privacy_dataset_no_tags
num sources = 1
Enabled reductions: gd, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, shared_feature_merger, cbify-adf
Input label = multiclass
Output pred = multiclass
average since example example current current current
loss last counter weight label predict features
0.000000 0.000000 1 1.0 1 1 11
0.000000 0.000000 2 2.0 2 2 11
0.250000 0.500000 4 4.0 1 2 11
0.375000 0.500000 8 8.0 1 1 11
0.500000 0.625000 16 16.0 1 2 11

finished run
number of examples = 31
weighted example sum = 31.000000
weighted label sum = 0.000000
average loss = 0.322581
total feature number = 341
Loading

0 comments on commit f0e16ad

Please sign in to comment.