-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathtest-accuracy.R
47 lines (40 loc) · 1.16 KB
/
test-accuracy.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
context("test accuracy")
##### TEST accuracy #####
library(dplyr)
ptaxsim_db_conn <- DBI::dbConnect(
RSQLite::SQLite(),
Sys.getenv("PTAXSIM_DB_PATH")
)
assign("ptaxsim_db_conn", ptaxsim_db_conn, envir = .GlobalEnv)
# This test measures the accuracy of PTAXSIM be calculating the tax bill for a
# sample of PINs and comparing to the known tax bill amount. The goal is to be
# within $10 of each bill 97.5% of the time
pins <- DBI::dbGetQuery(
ptaxsim_db_conn,
"
SELECT year, pin, tax_bill_total
FROM pin
ORDER BY random()
LIMIT 1000000
"
)
bills_raw <- tax_bill(pins$year, pins$pin)
bills_summ <- bills_raw %>%
group_by(year, pin) %>%
summarize(calced_bill = sum(final_tax)) %>%
left_join(pins, by = c("year", "pin")) %>%
rename(real_bill = tax_bill_total) %>%
mutate(bill_diff = real_bill - calced_bill)
test_that("random sample of bills is >97.5% accurate", {
expect_gte(
sum(abs(bills_summ$bill_diff) < 10) / length(bills_summ$bill_diff),
0.975
)
})
test_that("no agency names are missing from the sample of bills", {
expect_equal(
sum(!is.na(bills_raw$agency_name)),
nrow(bills_raw)
)
})
DBI::dbDisconnect(ptaxsim_db_conn)