-
Notifications
You must be signed in to change notification settings - Fork 12
Tiles Fastly Log Changes
Paul Norman edited this page Aug 17, 2023
·
7 revisions
Some logging changes might be backwards-incompatible. This is how to transition with backwards incompatible changes
- Create a logging endpoint that has the same settings as the previous one
- Adjust the path to increment the version number (e.g. /tile/v18/year=%Y/month=%m/day=%d/hour=%H/)
- Make the necessary changes to the logging settings
- Save
- Create a new table
CREATE EXTERNAL TABLE fastly_logs_v18 (
`time` string,
`ip` string,
`request` string,
`host` string,
`referer` string,
`useragent` string,
`secchua` string,
`requestedwith` string,
`accept` string,
`acceptlang` string,
`asn` bigint,
`country` string,
`datacenter` string,
`region` string,
`status` smallint,
`size` string,
`duration` int,
`cachehit` string,
`tls` string,
`render` string)
PARTITIONED BY (
`year` int,
`month` int,
`day` int,
`hour` int)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH SERDEPROPERTIES (
'escapeChar'='\\',
'quoteChar'='\"',
'separatorChar'=',')
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
's3://openstreetmap-fastly-logs/tile/v18'
TBLPROPERTIES (
'has_encrypted_data'='false',
'projection.day.digits'='2',
'projection.day.range'='1,31',
'projection.day.type'='integer',
'projection.enabled'='true',
'projection.hour.digits'='2',
'projection.hour.range'='0,23',
'projection.hour.type'='integer',
'projection.month.digits'='2',
'projection.month.range'='1,12',
'projection.month.type'='integer',
'projection.year.range'='2021,2030',
'projection.year.type'='integer')
Make a new aggregation table, if applicable
CREATE TABLE logs.fastly_success_logs_v1
WITH (
external_location = 's3://openstreetmap-tile-aggregated-logs/fastly_success_logs/v1',
partitioned_by = ARRAY['year', 'month', 'day', 'hour']
)
AS
SELECT
time,
ip,
CAST(regexp_extract(request, {regex}, 1) AS integer) AS z,
CAST(regexp_extract(request, {regex}, 2) AS integer) AS x,
CAST(regexp_extract(request, {regex}, 3) AS integer) AS y,
regexp_extract(request, ' HTTP/(.+?)$', 1) AS version,
host,
referer,
useragent,
secchua,
requestedwith,
accept,
acceptlang,
asn,
country,
datacenter,
region,
status,
CAST(NULLIF(size,'"-"') AS integer) AS size,
duration,
cachehit,
tls,
render,
year,
month,
day,
hour
FROM logs.fastly_logs_v18
WITH NO DATA;
CREATE TABLE logs.fastly_minimised_logs_v1
WITH (
external_location = 's3://openstreetmap-tile-aggregated-logs/fastly_minimised_logs/v1',
partitioned_by = ARRAY['year', 'month', 'day', 'hour']
)
AS
SELECT
COUNT(*) AS requests,
COUNT(DISTINCT ip) AS distinct_ip,
SUM(size) AS size,
SUM(duration) AS duration,
z,
version,
referer,
useragent,
secchua,
requestedwith,
accept,
acceptlang,
asn,
country,
datacenter,
region,
status,
cachehit,
tls,
render,
year,
month,
day,
hour
FROM logs.fastly_success_logs_v1
GROUP BY
z,
version,
referer,
useragent,
secchua,
requestedwith,
accept,
acceptlang,
asn,
country,
datacenter,
region,
status,
cachehit,
tls,
render,
year,
month,
day,
hour
WITH NO DATA;
CREATE TABLE logs.fastly_location_logs_v1
WITH (
external_location = 's3://openstreetmap-tile-aggregated-logs/fastly_location_logs/v1',
partitioned_by = ARRAY['year', 'month', 'day', 'hour']
)
AS
SELECT
COUNT(*) AS requests,
COUNT(DISTINCT ip) AS distinct_ip,
SUM(size) AS size,
SUM(duration) AS duration,
z,
x,
y,
country,
datacenter,
region,
year,
month,
day,
hour
FROM logs.fastly_success_logs_v1
GROUP BY
z,
x,
y,
country,
datacenter,
region,
year,
month,
day,
hour
WITH NO DATA;
- Verify it works with and any changes are appropriate
- Wait at least one day so there is current data for a full day in the new tables