Skip to content

Commit

Permalink
normalize hostname and pathname columns
Browse files Browse the repository at this point in the history
to save on disk storage
  • Loading branch information
dannyvankooten committed Nov 12, 2018
1 parent 69986d9 commit dbcadcd
Show file tree
Hide file tree
Showing 39 changed files with 415 additions and 43 deletions.
Binary file added mc4wp.db.orig
Binary file not shown.
50 changes: 35 additions & 15 deletions pkg/aggregator/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,18 +40,28 @@ func (agg *Aggregator) getPageStats(r *results, siteID int64, t time.Time, hostn
return stats, nil
}

stats, err := agg.database.GetPageStats(siteID, t, hostname, pathname)
hostnameID, err := agg.database.HostnameID(hostname)
if err != nil {
return nil, err
}

pathnameID, err := agg.database.PathnameID(pathname)
if err != nil {
return nil, err
}

stats, err := agg.database.GetPageStats(siteID, t, hostnameID, pathnameID)
if err != nil && err != datastore.ErrNoResults {
return nil, err
}

if stats == nil {
stats = &models.PageStats{
SiteID: siteID,
New: true,
Hostname: hostname,
Pathname: pathname,
Date: t,
SiteID: siteID,
New: true,
HostnameID: hostnameID,
PathnameID: pathnameID,
Date: t,
}

}
Expand All @@ -66,24 +76,34 @@ func (agg *Aggregator) getReferrerStats(r *results, siteID int64, t time.Time, h
return stats, nil
}

hostnameID, err := agg.database.HostnameID(hostname)
if err != nil {
return nil, err
}

pathnameID, err := agg.database.PathnameID(pathname)
if err != nil {
return nil, err
}

// get from db
stats, err := agg.database.GetReferrerStats(siteID, t, hostname, pathname)
stats, err := agg.database.GetReferrerStats(siteID, t, hostnameID, pathnameID)
if err != nil && err != datastore.ErrNoResults {
return nil, err
}

if stats == nil {
stats = &models.ReferrerStats{
SiteID: siteID,
New: true,
Hostname: hostname,
Pathname: pathname,
Date: t,
Group: "",
SiteID: siteID,
New: true,
HostnameID: hostnameID,
PathnameID: pathnameID,
Date: t,
Group: "",
}

// TODO: Abstract this
if strings.Contains(stats.Hostname, "www.google.") {
// TODO: Abstract this so we can add more groupings
if strings.Contains(hostname, "www.google.") {
stats.Group = "Google"
}
}
Expand Down
8 changes: 6 additions & 2 deletions pkg/datastore/datastore.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,17 +45,21 @@ type Datastore interface {
DeletePageviews([]*models.Pageview) error

// page stats
GetPageStats(int64, time.Time, string, string) (*models.PageStats, error)
GetPageStats(int64, time.Time, int64, int64) (*models.PageStats, error)
SavePageStats(*models.PageStats) error
GetAggregatedPageStats(int64, time.Time, time.Time, int64) ([]*models.PageStats, error)
GetAggregatedPageStatsPageviews(int64, time.Time, time.Time) (int64, error)

// referrer stats
GetReferrerStats(int64, time.Time, string, string) (*models.ReferrerStats, error)
GetReferrerStats(int64, time.Time, int64, int64) (*models.ReferrerStats, error)
SaveReferrerStats(*models.ReferrerStats) error
GetAggregatedReferrerStats(int64, time.Time, time.Time, int64) ([]*models.ReferrerStats, error)
GetAggregatedReferrerStatsPageviews(int64, time.Time, time.Time) (int64, error)

// hostnames
HostnameID(name string) (int64, error)
PathnameID(name string) (int64, error)

// misc
Health() error
Close() error
Expand Down
30 changes: 30 additions & 0 deletions pkg/datastore/sqlstore/hostnames.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package sqlstore

import (
"database/sql"
)

func (db *sqlstore) HostnameID(name string) (int64, error) {
var id int64
query := db.Rebind("SELECT id FROM hostnames WHERE name = ? LIMIT 1")
err := db.Get(&id, query, name)

if err == sql.ErrNoRows {
// Postgres does not support LastInsertID, so use a "... RETURNING" select query
query := db.Rebind(`INSERT INTO hostnames(name) VALUES(?)`)
if db.Driver == POSTGRES {
err := db.Get(&id, query+" RETURNING id", name)
return id, err
}

// MySQL and SQLite do support LastInsertID, so use that
r, err := db.Exec(query, name)
if err != nil {
return 0, err
}

return r.LastInsertId()
}

return id, err
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- +migrate Up
CREATE TABLE hostnames(
id INTEGER AUTO_INCREMENT PRIMARY KEY NOT NULL,
name VARCHAR(255) NOT NULL
) CHARACTER SET=utf8;

-- +migrate Down
DROP TABLE IF EXISTS hostnames;
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
-- +migrate Up
CREATE UNIQUE INDEX unique_hostnames_name ON hostnames(name);

-- +migrate Down
DROP INDEX IF EXISTS unique_hostnames_name;
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- +migrate Up
CREATE TABLE pathnames(
id INTEGER AUTO_INCREMENT PRIMARY KEY NOT NULL,
name VARCHAR(255) NOT NULL
) CHARACTER SET=utf8;

-- +migrate Down
DROP TABLE IF EXISTS pathnames;
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
-- +migrate Up
CREATE UNIQUE INDEX unique_pathnames_name ON pathnames(name);

-- +migrate Down
DROP INDEX IF EXISTS unique_pathnames_name;
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
-- +migrate Up
INSERT IGNORE INTO hostnames(name) SELECT DISTINCT(hostname) FROM daily_page_stats;
INSERT IGNORE INTO hostnames(name) SELECT DISTINCT(hostname) FROM daily_referrer_stats;

-- +migrate Down

Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
-- +migrate Up
INSERT IGNORE INTO pathnames(name) SELECT DISTINCT(pathname) FROM daily_page_stats;
INSERT IGNORE INTO pathnames(name) SELECT DISTINCT(pathname) FROM daily_referrer_stats;

-- +migrate Down

Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
-- +migrate Up
DROP TABLE IF EXISTS daily_page_stats_old;
RENAME TABLE daily_page_stats TO daily_page_stats_old;
CREATE TABLE daily_page_stats(
site_id INTEGER NOT NULL DEFAULT 1,
hostname_id INTEGER NOT NULL,
pathname_id INTEGER NOT NULL,
pageviews INTEGER NOT NULL,
visitors INTEGER NOT NULL,
entries INTEGER NOT NULL,
bounce_rate FLOAT NOT NULL,
known_durations INTEGER NOT NULL DEFAULT 0,
avg_duration FLOAT NOT NULL,
date DATE NOT NULL
) CHARACTER SET=utf8;
INSERT INTO daily_page_stats
SELECT site_id, h.id, p.id, pageviews, visitors, entries, bounce_rate, known_durations, avg_duration, date
FROM daily_page_stats_old s
LEFT JOIN hostnames h ON h.name = s.hostname
LEFT JOIN pathnames p ON p.name = s.pathname;
DROP TABLE daily_page_stats_old;

-- +migrate Down

Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
-- +migrate Up
DROP TABLE IF EXISTS daily_referrer_stats_old;
RENAME TABLE daily_referrer_stats TO daily_referrer_stats_old;
CREATE TABLE daily_referrer_stats(
site_id INTEGER NOT NULL DEFAULT 1,
hostname_id INTEGER NOT NULL,
pathname_id INTEGER NOT NULL,
groupname VARCHAR(255) NULL,
pageviews INTEGER NOT NULL,
visitors INTEGER NOT NULL,
bounce_rate FLOAT NOT NULL,
known_durations INTEGER NOT NULL DEFAULT 0,
avg_duration FLOAT NOT NULL,
date DATE NOT NULL
) CHARACTER SET=utf8;
INSERT INTO daily_referrer_stats
SELECT site_id, h.id, p.id, groupname, pageviews, visitors, bounce_rate, known_durations, avg_duration, date
FROM daily_referrer_stats_old s
LEFT JOIN hostnames h ON h.name = s.hostname
LEFT JOIN pathnames p ON p.name = s.pathname;
DROP TABLE daily_referrer_stats_old;

-- +migrate Down

Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
-- +migrate Up
CREATE UNIQUE INDEX unique_daily_page_stats ON daily_page_stats(site_id, hostname_id, pathname_id, date);
CREATE UNIQUE INDEX unique_daily_referrer_stats ON daily_referrer_stats(site_id, hostname_id, pathname_id, date);

-- +migrate Down
DROP INDEX unique_daily_page_stats ON daily_page_stats;
DROP INDEX unique_daily_referrer_stats ON daily_referrer_stats;
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- +migrate Up
CREATE TABLE hostnames(
id SERIAL PRIMARY KEY NOT NULL,
name VARCHAR(255) NOT NULL
);

-- +migrate Down
DROP TABLE IF EXISTS hostnames;
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
-- +migrate Up
CREATE UNIQUE INDEX unique_hostnames_name ON hostnames(name);

-- +migrate Down
DROP INDEX IF EXISTS unique_hostnames_name;
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- +migrate Up
CREATE TABLE pathnames(
id SERIAL PRIMARY KEY NOT NULL,
name VARCHAR(255) NOT NULL
);

-- +migrate Down
DROP TABLE IF EXISTS pathnames;
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
-- +migrate Up
CREATE UNIQUE INDEX unique_pathnames_name ON pathnames(name);

-- +migrate Down
DROP INDEX IF EXISTS unique_pathnames_name;
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
-- +migrate Up
INSERT INTO hostnames(name) SELECT DISTINCT(hostname) FROM daily_page_stats;
INSERT INTO hostnames(name) SELECT DISTINCT(hostname) FROM daily_referrer_stats ON CONFLICT(name) DO NOTHING;

-- +migrate Down

Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
-- +migrate Up
INSERT INTO pathnames(name) SELECT DISTINCT(pathname) FROM daily_page_stats ON CONFLICT(name) DO NOTHING; ;
INSERT INTO pathnames(name) SELECT DISTINCT(pathname) FROM daily_referrer_stats ON CONFLICT(name) DO NOTHING; ;

-- +migrate Down

Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
-- +migrate Up
DROP TABLE IF EXISTS daily_page_stats_old;
ALTER TABLE daily_page_stats RENAME TO daily_page_stats_old;
CREATE TABLE daily_page_stats(
site_id INTEGER NOT NULL DEFAULT 1,
hostname_id INTEGER NOT NULL,
pathname_id INTEGER NOT NULL,
pageviews INTEGER NOT NULL,
visitors INTEGER NOT NULL,
entries INTEGER NOT NULL,
bounce_rate FLOAT NOT NULL,
known_durations INTEGER NOT NULL DEFAULT 0,
avg_duration FLOAT NOT NULL,
date DATE NOT NULL
);
INSERT INTO daily_page_stats
SELECT site_id, h.id, p.id, pageviews, visitors, entries, bounce_rate, known_durations, avg_duration, date
FROM daily_page_stats_old s
LEFT JOIN hostnames h ON h.name = s.hostname
LEFT JOIN pathnames p ON p.name = s.pathname;
DROP TABLE daily_page_stats_old;

-- +migrate Down

Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
-- +migrate Up
DROP TABLE IF EXISTS daily_referrer_stats_old;
ALTER TABLE daily_referrer_stats RENAME TO daily_referrer_stats_old;
CREATE TABLE daily_referrer_stats(
site_id INTEGER NOT NULL DEFAULT 1,
hostname_id INTEGER NOT NULL,
pathname_id INTEGER NOT NULL,
groupname VARCHAR(255) NULL,
pageviews INTEGER NOT NULL,
visitors INTEGER NOT NULL,
bounce_rate FLOAT NOT NULL,
known_durations INTEGER NOT NULL DEFAULT 0,
avg_duration FLOAT NOT NULL,
date DATE NOT NULL
);
INSERT INTO daily_referrer_stats
SELECT site_id, h.id, p.id, groupname, pageviews, visitors, bounce_rate, known_durations, avg_duration, date
FROM daily_referrer_stats_old s
LEFT JOIN hostnames h ON h.name = s.hostname
LEFT JOIN pathnames p ON p.name = s.pathname;
DROP TABLE daily_referrer_stats_old;

-- +migrate Down

Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
-- +migrate Up
CREATE UNIQUE INDEX unique_daily_page_stats ON daily_page_stats(site_id, hostname_id, pathname_id, date);
CREATE UNIQUE INDEX unique_daily_referrer_stats ON daily_referrer_stats(site_id, hostname_id, pathname_id, date);

-- +migrate Down
DROP INDEX unique_daily_page_stats;
DROP INDEX unique_daily_referrer_stats;
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- +migrate Up
CREATE TABLE hostnames(
id INTEGER PRIMARY KEY,
name VARCHAR(255) NOT NULL
);

-- +migrate Down
DROP TABLE IF EXISTS hostnames;
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
-- +migrate Up
CREATE UNIQUE INDEX unique_hostnames_name ON hostnames(name);

-- +migrate Down
DROP INDEX IF EXISTS unique_hostnames_name;
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- +migrate Up
CREATE TABLE pathnames(
id INTEGER PRIMARY KEY,
name VARCHAR(255) NOT NULL
);

-- +migrate Down
DROP TABLE IF EXISTS pathnames;
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
-- +migrate Up
CREATE UNIQUE INDEX unique_pathnames_name ON pathnames(name);

-- +migrate Down
DROP INDEX IF EXISTS unique_pathnames_name;
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
-- +migrate Up
INSERT OR IGNORE INTO hostnames(name) SELECT DISTINCT(hostname) FROM daily_page_stats;
INSERT OR IGNORE INTO hostnames(name) SELECT DISTINCT(hostname) FROM daily_referrer_stats;

-- +migrate Down

Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
-- +migrate Up
INSERT OR IGNORE INTO pathnames(name) SELECT DISTINCT(pathname) FROM daily_page_stats;
INSERT OR IGNORE INTO pathnames(name) SELECT DISTINCT(pathname) FROM daily_referrer_stats;

-- +migrate Down

Loading

0 comments on commit dbcadcd

Please sign in to comment.