Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhance run table query performance with indices #1277

Merged
merged 15 commits into from
Oct 4, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 38 additions & 6 deletions qcodes/dataset/sqlite_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ def perform_db_upgrade(conn: SomeConnection, version: int=-1) -> None:
'newest version'
"""

upgrade_actions = [perform_db_upgrade_0_to_1]
upgrade_actions = [perform_db_upgrade_0_to_1, perform_db_upgrade_1_to_2]
newest_version = len(upgrade_actions)
version = newest_version if version == -1 else version

Expand All @@ -304,10 +304,6 @@ def perform_db_upgrade(conn: SomeConnection, version: int=-1) -> None:
def perform_db_upgrade_0_to_1(conn: SomeConnection) -> None:
"""
Perform the upgrade from version 0 to version 1

Returns:
A bool indicating whether everything went well. The next upgrade in
the upgrade chain should run conditioned in this bool
"""
log.info('Starting database upgrade version 0 -> 1')

Expand Down Expand Up @@ -353,6 +349,43 @@ def perform_db_upgrade_0_to_1(conn: SomeConnection) -> None:
set_user_version(conn, 1)


def perform_db_upgrade_1_to_2(conn: SomeConnection) -> None:
"""
Perform the upgrade from version 1 to version 2
"""
log.info('Starting database upgrade version 1 -> 2')

start_version = get_user_version(conn)
if start_version != 1:
log.warn('Can not upgrade, current database version is '
f'{start_version}, aborting.')
return

sql = "SELECT name FROM sqlite_master WHERE type='table' AND name='runs'"
cur = atomic_transaction(conn, sql)
n_run_tables = len(cur.fetchall())

if n_run_tables == 1:
_IX_runs_exp_id = """
CREATE INDEX
IF NOT EXISTS IX_runs_exp_id
ON runs (exp_id DESC)
"""
_IX_runs_guid = """
CREATE INDEX
IF NOT EXISTS IX_runs_guid
ON runs (guid DESC)
"""
with atomic(conn) as conn:
transaction(conn, _IX_runs_exp_id)
transaction(conn, _IX_runs_guid)
else:
raise RuntimeError(f"found {n_run_tables} runs tables expected 1")

log.info('Succesfully upgraded database version 1 -> 2.')
set_user_version(conn, 2)


def transaction(conn: SomeConnection,
sql: str, *args: Any) -> sqlite3.Cursor:
"""Perform a transaction.
Expand Down Expand Up @@ -452,7 +485,6 @@ def init_db(conn: SomeConnection)->None:
transaction(conn, _layout_table_schema)
transaction(conn, _dependencies_table_schema)


def insert_column(conn: SomeConnection, table: str, name: str,
paramtype: Optional[str] = None) -> None:
"""Insert new column to a table
Expand Down
2 changes: 1 addition & 1 deletion qcodes/tests/dataset/legacy_DB_generation/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ First, please check if there is already a script producing .db-files of your des
* Identify the database version of which you would like to create one or more .db-files. Let's call that "your version".
* Check the variable `GIT_HASHES` in `utils.py` to see if "your version" already has a recorded commit hash.
* If not, search through the `git log` of `master` to find the merge commit *just* before the merge commit that introduces the *next* version after "your version". Put that first commit into `GIT_HASHES` along with the version number of "your version".
* Make a script called `generate_version_<your_version>.py`. Copy the general structure of `generate_version_0.py`. In particular, remember *not* to import qcodes *before* you have checked out the old commit. Make your generation functions take all the modules you need (e.g. `sqlite_base`, `data_set`, ...) as input arguments.
* Make a script called `generate_version_<your_version>.py`. Copy the general structure of `generate_version_0.py`. Make your generating functions take *ZERO* arguments and do all their imports inside their own scope.

## Anything else?

Expand Down
33 changes: 8 additions & 25 deletions qcodes/tests/dataset/legacy_DB_generation/generate_version_0.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,22 @@

import os

from git import Repo

# NB: it's important that we do not import anything from qcodes before we
# do the git magic (which we do below), hence the relative import here
import utils as utils

gitrepopath = os.sep.join(os.path.realpath(__file__).split(os.sep)[:-5])

fixturepath = os.sep.join(os.path.realpath(__file__).split(os.sep)[:-2])
fixturepath = os.path.join(fixturepath, 'fixtures', 'db_files')

repo = Repo(gitrepopath)


def generate_empty_DB_file(sqlite_base):
def generate_empty_DB_file():
"""
Generate the bare minimal DB file with no runs
"""

import qcodes.dataset.sqlite_base as sqlite_base

v0fixturepath = os.path.join(fixturepath, 'version0')
os.makedirs(v0fixturepath, exist_ok=True)
path = os.path.join(v0fixturepath, 'empty.db')
Expand All @@ -32,23 +31,7 @@ def generate_empty_DB_file(sqlite_base):

if __name__ == '__main__':

with utils.leave_untouched(repo): # pylint: disable=E1101

repo.git.checkout(utils.GIT_HASHES[0]) # pylint: disable=E1101

# If QCoDeS is not installed in editable mode, it makes no difference
# to do our git magic, since the import will be from site-packages in
# the environment folder, and not from the git-managed folder
import qcodes
qcpath = os.sep.join(qcodes.__file__.split(os.sep)[:-2])

# Windows and paths... There can be random un-capitalizations
if qcpath.lower() != gitrepopath.lower():
raise ValueError('QCoDeS does not seem to be installed in editable'
' mode, can not proceed. To use this script, '
'uninstall QCoDeS and reinstall it with pip '
'install -e <path-to-qcodes-folder>')

import qcodes.dataset.sqlite_base as sqlite_base
gens = (generate_empty_DB_file,)

generate_empty_DB_file(sqlite_base)
# pylint: disable=E1101
utils.checkout_to_old_version_and_run_generators(version=0, gens=gens)
36 changes: 36 additions & 0 deletions qcodes/tests/dataset/legacy_DB_generation/generate_version_1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Generate version 1 database files for qcodes' test suite to consume

import os

# NB: it's important that we do not import anything from qcodes before we
# do the git magic (which we do below), hence the relative import here
import utils as utils


fixturepath = os.sep.join(os.path.realpath(__file__).split(os.sep)[:-2])
fixturepath = os.path.join(fixturepath, 'fixtures', 'db_files')


def generate_empty_DB_file():
"""
Generate the bare minimal DB file with no runs
"""

import qcodes.dataset.sqlite_base as sqlite_base

v0fixturepath = os.path.join(fixturepath, 'version1')
os.makedirs(v0fixturepath, exist_ok=True)
path = os.path.join(v0fixturepath, 'empty.db')

if os.path.exists(path):
os.remove(path)

sqlite_base.connect(path)


if __name__ == '__main__':

gens = (generate_empty_DB_file,)

# pylint: disable=E1101
utils.checkout_to_old_version_and_run_generators(version=1, gens=gens)
47 changes: 45 additions & 2 deletions qcodes/tests/dataset/legacy_DB_generation/utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,26 @@
# General utilities for the database generation and loading scheme
from typing import Dict, List
from typing import Dict, List, Tuple
from contextlib import contextmanager
import os

GIT_HASHES: Dict[int, str] = {0: '78d42620fc245a975b5a615ed5e33061baac7846'}
from git import Repo

# A brief overview of what each version introduces:
#
# Version 0: the original table schema, runs, experiments, layouts,
# dependencies, result-tables
#
# Version 1: a GUID column is added to the runs table
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is description of version 2 missing?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We will add that when we need it, i.e. when we have version 3 on the table.



GIT_HASHES: Dict[int, str] = {0: '78d42620fc245a975b5a615ed5e33061baac7846',
1: '056d59627e22fa3ca7aad4c265e9897c343f79cf'}

DB_NAMES: Dict[int, List[str]] = {0: ['']}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this used?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nein. Should have been removed.


gitrepopath = os.sep.join(os.path.realpath(__file__).split(os.sep)[:-5])
repo = Repo(gitrepopath)


@contextmanager
def leave_untouched(repo):
Expand All @@ -32,3 +47,31 @@ def leave_untouched(repo):
repo.git.reset('--hard', current_commit)
if not was_detached:
repo.git.checkout(current_branch)


def checkout_to_old_version_and_run_generators(version: int,
gens: Tuple) -> None:
"""
Check out the repo to an older version and run the generating functions
supplied.
"""

with leave_untouched(repo):

repo.git.checkout(GIT_HASHES[version])

# If QCoDeS is not installed in editable mode, it makes no difference
# to do our git magic, since the import will be from site-packages in
# the environment folder, and not from the git-managed folder
import qcodes
qcpath = os.sep.join(qcodes.__file__.split(os.sep)[:-2])

# Windows and paths... There can be random un-capitalizations
if qcpath.lower() != gitrepopath.lower():
raise ValueError('QCoDeS does not seem to be installed in editable'
' mode, can not proceed. To use this script, '
'uninstall QCoDeS and reinstall it with pip '
'install -e <path-to-qcodes-folder>')

for generator in gens:
generator()
33 changes: 32 additions & 1 deletion qcodes/tests/dataset/test_database_creation_and_upgrading.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
update_GUIDs,
get_user_version,
atomic_transaction,
perform_db_upgrade_0_to_1)
perform_db_upgrade_0_to_1,
perform_db_upgrade_1_to_2)

from qcodes.dataset.guids import parse_guid
import qcodes.tests.dataset
Expand Down Expand Up @@ -120,6 +121,36 @@ def test_perform_actual_upgrade_0_to_1():
assert len(c.fetchall()) == 0


def test_perform_actual_upgrade_1_to_2():

v1fixpath = os.path.join(fixturepath, 'db_files', 'version1')

if not os.path.exists(v1fixpath):
pytest.skip("No db-file fixtures found. You can generate test db-files"
" using the scripts in the legacy_DB_generation folder")

dbname_old = os.path.join(v1fixpath, 'empty.db')

with temporarily_copied_DB(dbname_old, debug=False, version=1) as conn:

assert get_user_version(conn) == 1

guid_table_query = "SELECT guid FROM runs"

c = atomic_transaction(conn, guid_table_query)
assert len(c.fetchall()) == 0

index_query = "PRAGMA index_list(runs)"

c = atomic_transaction(conn, index_query)
assert len(c.fetchall()) == 0

perform_db_upgrade_1_to_2(conn)

c = atomic_transaction(conn, index_query)
assert len(c.fetchall()) == 2


@pytest.mark.usefixtures("empty_temp_db")
def test_update_existing_guids(caplog):

Expand Down