Skip to content

Tiles Fastly Log Changes

Paul Norman edited this page Aug 17, 2023 · 7 revisions

Some logging changes might be backwards-incompatible. This is how to transition with backwards incompatible changes

Set up new fastly logging endpoint

  1. Create a logging endpoint that has the same settings as the previous one
  2. Adjust the path to increment the version number (e.g. /tile/v18/year=%Y/month=%m/day=%d/hour=%H/)
  3. Make the necessary changes to the logging settings
  4. Save

Create a new Athena table

  1. Create a new table
CREATE EXTERNAL TABLE fastly_logs_v18 (
  `time` string, 
  `ip` string, 
  `request` string, 
  `host` string, 
  `referer` string, 
  `useragent` string, 
  `secchua` string, 
  `requestedwith` string,
  `accept` string, 
  `acceptlang` string, 
  `asn` bigint, 
  `country` string, 
  `datacenter` string, 
  `region` string, 
  `status` smallint, 
  `size` string, 
  `duration` int, 
  `cachehit` string, 
  `tls` string, 
  `render` string)
PARTITIONED BY ( 
  `year` int, 
  `month` int, 
  `day` int, 
  `hour` int)
ROW FORMAT SERDE 
  'org.apache.hadoop.hive.serde2.OpenCSVSerde' 
WITH SERDEPROPERTIES ( 
  'escapeChar'='\\', 
  'quoteChar'='\"', 
  'separatorChar'=',') 
STORED AS INPUTFORMAT 
  'org.apache.hadoop.mapred.TextInputFormat' 
OUTPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
  's3://openstreetmap-fastly-logs/tile/v18'
TBLPROPERTIES (
  'has_encrypted_data'='false', 
  'projection.day.digits'='2', 
  'projection.day.range'='1,31', 
  'projection.day.type'='integer', 
  'projection.enabled'='true', 
  'projection.hour.digits'='2', 
  'projection.hour.range'='0,23', 
  'projection.hour.type'='integer', 
  'projection.month.digits'='2', 
  'projection.month.range'='1,12', 
  'projection.month.type'='integer', 
  'projection.year.range'='2021,2030', 
  'projection.year.type'='integer')

Make a new aggregation table, if applicable

CREATE TABLE logs.fastly_success_logs_v1
WITH (
    external_location = 's3://openstreetmap-tile-aggregated-logs/fastly_success_logs/v1',
    partitioned_by = ARRAY['year', 'month', 'day', 'hour']
)
AS
SELECT
    time,
    ip,
    CAST(regexp_extract(request, {regex}, 1) AS integer) AS z,
    CAST(regexp_extract(request, {regex}, 2) AS integer) AS x,
    CAST(regexp_extract(request, {regex}, 3) AS integer) AS y,
    regexp_extract(request, ' HTTP/(.+?)$', 1) AS version,
    host,
    referer,
    useragent,
    secchua,
    requestedwith,
    accept,
    acceptlang,
    asn,
    country,
    datacenter,
    region,
    status,
    CAST(NULLIF(size,'"-"') AS integer) AS size,
    duration,
    cachehit,
    tls,
    render,
    year,
    month,
    day,
    hour
FROM logs.fastly_logs_v18
WITH NO DATA;
CREATE TABLE logs.fastly_minimised_logs_v1
WITH (
    external_location = 's3://openstreetmap-tile-aggregated-logs/fastly_minimised_logs/v1',
    partitioned_by = ARRAY['year', 'month', 'day', 'hour']
)
AS
SELECT
    COUNT(*) AS requests,
    COUNT(DISTINCT ip) AS distinct_ip,
    SUM(size) AS size,
    SUM(duration) AS duration,
    z,
    version,
    referer,
    useragent,
    secchua,
    requestedwith,
    accept,
    acceptlang,
    asn,
    country,
    datacenter,
    region,
    status,
    cachehit,
    tls,
    render,
    year,
    month,
    day,
    hour
FROM logs.fastly_success_logs_v1
GROUP BY
    z,
    version,
    referer,
    useragent,
    secchua,
    requestedwith,
    accept,
    acceptlang,
    asn,
    country,
    datacenter,
    region,
    status,
    cachehit,
    tls,
    render,
    year,
    month,
    day,
    hour
WITH NO DATA;
CREATE TABLE logs.fastly_location_logs_v1
WITH (
    external_location = 's3://openstreetmap-tile-aggregated-logs/fastly_location_logs/v1',
    partitioned_by = ARRAY['year', 'month', 'day', 'hour']
)
AS
SELECT
    COUNT(*) AS requests,
    COUNT(DISTINCT ip) AS distinct_ip,
    SUM(size) AS size,
    SUM(duration) AS duration,
    z,
    x,
    y,
    country,
    datacenter,
    region,
    year,
    month,
    day,
    hour
FROM logs.fastly_success_logs_v1
GROUP BY
    z,
    x,
    y,
    country,
    datacenter,
    region,
    year,
    month,
    day,
    hour
WITH NO DATA;
  1. Verify it works with and any changes are appropriate
  2. Wait at least one day so there is current data for a full day in the new tables

Adjust log analysis queries