From afc622be0aa680c0d3b858ce7e350f41d425b2f9 Mon Sep 17 00:00:00 2001 From: tkrabel Date: Sat, 28 Oct 2023 22:46:02 +0200 Subject: [PATCH 01/12] Use in-memory database with shared cache --- rope/contrib/autoimport/sqlite.py | 5 +++- ropetest/contrib/autoimport/autoimporttest.py | 24 ++++++++++++++++++- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/rope/contrib/autoimport/sqlite.py b/rope/contrib/autoimport/sqlite.py index 507352782..690cf6756 100644 --- a/rope/contrib/autoimport/sqlite.py +++ b/rope/contrib/autoimport/sqlite.py @@ -153,7 +153,10 @@ def create_database_connection( raise Exception("if memory=False, project must be provided") db_path: str if memory or project is None or project.ropefolder is None: - db_path = ":memory:" + # Allows the in-memory db to be shared across threads + # See https://www.sqlite.org/inmemorydb.html + project_hash = hash(project and project.ropefolder and project.ropefolder.real_path) + db_path = f"file:memdb{project_hash}:?mode=memory&cache=shared" else: db_path = str(Path(project.ropefolder.real_path) / "autoimport.db") return sqlite3.connect(db_path) diff --git a/ropetest/contrib/autoimport/autoimporttest.py b/ropetest/contrib/autoimport/autoimporttest.py index 4b0430119..4a4612f32 100644 --- a/ropetest/contrib/autoimport/autoimporttest.py +++ b/ropetest/contrib/autoimport/autoimporttest.py @@ -1,3 +1,5 @@ +import sqlite3 + from contextlib import closing, contextmanager from textwrap import dedent from unittest.mock import ANY, patch @@ -19,13 +21,33 @@ def autoimport(project: Project): def is_in_memory_database(connection): db_list = database_list(connection) assert db_list == [(0, "main", ANY)] - return db_list[0][2] == "" + return db_list[0][2].endswith("mode=memory&cache=shared") def database_list(connection): return list(connection.execute("PRAGMA database_list")) +def get_database_name(connection: sqlite3.Connection): + return database_list(connection)[0][2] + + +def test_autoimport_database_name( + project: Project, +): + # project is None + connection1 = AutoImport.create_database_connection(memory=True, project=None) + connection2 = AutoImport.create_database_connection(memory=True, project=None) + assert get_database_name(connection1) == get_database_name(connection2) + + # project is not None + connection3 = AutoImport.create_database_connection(memory=True, project=project) + connection4 = AutoImport.create_database_connection(memory=True, project=project) + assert get_database_name(connection3) == get_database_name(connection4) + + assert get_database_name(connection1) != get_database_name(connection3) + + def test_autoimport_connection_parameter_with_in_memory( project: Project, autoimport: AutoImport, From 395250d2e356e516e25e60f601ac7b110cef2707 Mon Sep 17 00:00:00 2001 From: tkrabel Date: Sat, 28 Oct 2023 23:13:02 +0200 Subject: [PATCH 02/12] add URI --- rope/contrib/autoimport/sqlite.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/rope/contrib/autoimport/sqlite.py b/rope/contrib/autoimport/sqlite.py index 690cf6756..a3e3085f8 100644 --- a/rope/contrib/autoimport/sqlite.py +++ b/rope/contrib/autoimport/sqlite.py @@ -151,15 +151,13 @@ def create_database_connection( """ if not memory and project is None: raise Exception("if memory=False, project must be provided") - db_path: str if memory or project is None or project.ropefolder is None: # Allows the in-memory db to be shared across threads # See https://www.sqlite.org/inmemorydb.html project_hash = hash(project and project.ropefolder and project.ropefolder.real_path) - db_path = f"file:memdb{project_hash}:?mode=memory&cache=shared" + return sqlite3.connect(f"file:memdb{project_hash}:?mode=memory&cache=shared", uri=True) else: - db_path = str(Path(project.ropefolder.real_path) / "autoimport.db") - return sqlite3.connect(db_path) + return sqlite3.connect(str(Path(project.ropefolder.real_path) / "autoimport.db")) def _setup_db(self): models.Metadata.create_table(self.connection) From f021221547ea835b455525a1c0c60e386562afbf Mon Sep 17 00:00:00 2001 From: tkrabel Date: Sat, 28 Oct 2023 23:18:36 +0200 Subject: [PATCH 03/12] remove unit test bc the db name is '' for every db created --- ropetest/contrib/autoimport/autoimporttest.py | 24 +------------------ 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/ropetest/contrib/autoimport/autoimporttest.py b/ropetest/contrib/autoimport/autoimporttest.py index 4a4612f32..4b0430119 100644 --- a/ropetest/contrib/autoimport/autoimporttest.py +++ b/ropetest/contrib/autoimport/autoimporttest.py @@ -1,5 +1,3 @@ -import sqlite3 - from contextlib import closing, contextmanager from textwrap import dedent from unittest.mock import ANY, patch @@ -21,33 +19,13 @@ def autoimport(project: Project): def is_in_memory_database(connection): db_list = database_list(connection) assert db_list == [(0, "main", ANY)] - return db_list[0][2].endswith("mode=memory&cache=shared") + return db_list[0][2] == "" def database_list(connection): return list(connection.execute("PRAGMA database_list")) -def get_database_name(connection: sqlite3.Connection): - return database_list(connection)[0][2] - - -def test_autoimport_database_name( - project: Project, -): - # project is None - connection1 = AutoImport.create_database_connection(memory=True, project=None) - connection2 = AutoImport.create_database_connection(memory=True, project=None) - assert get_database_name(connection1) == get_database_name(connection2) - - # project is not None - connection3 = AutoImport.create_database_connection(memory=True, project=project) - connection4 = AutoImport.create_database_connection(memory=True, project=project) - assert get_database_name(connection3) == get_database_name(connection4) - - assert get_database_name(connection1) != get_database_name(connection3) - - def test_autoimport_connection_parameter_with_in_memory( project: Project, autoimport: AutoImport, From a288c22961a131a2224c957a25977c4ae7969d94 Mon Sep 17 00:00:00 2001 From: tkrabel Date: Sat, 28 Oct 2023 23:22:03 +0200 Subject: [PATCH 04/12] unit test share memory --- ropetest/contrib/autoimport/autoimporttest.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ropetest/contrib/autoimport/autoimporttest.py b/ropetest/contrib/autoimport/autoimporttest.py index 4b0430119..de05672c0 100644 --- a/ropetest/contrib/autoimport/autoimporttest.py +++ b/ropetest/contrib/autoimport/autoimporttest.py @@ -26,6 +26,17 @@ def database_list(connection): return list(connection.execute("PRAGMA database_list")) +def test_in_memory_database_share_cache(project): + ai1 = AutoImport(project, memory=True) + ai2 = AutoImport(project, memory=True) + + with ai1.connection: + ai1.connection.execute("CREATE TABLE shared(data)") + ai1.connection.execute("INSERT INTO shared VALUES(28)") + res = ai2.connection.execute("SELECT data FROM shared") + assert res.fetchone() == (28,) + + def test_autoimport_connection_parameter_with_in_memory( project: Project, autoimport: AutoImport, From fd89af7cd9527d3634eee87adba4fc8ab6a3f8f9 Mon Sep 17 00:00:00 2001 From: tkrabel Date: Sat, 28 Oct 2023 23:30:53 +0200 Subject: [PATCH 05/12] unit test: ensure different project uses different in-memory database --- ropetest/conftest.py | 7 +++++++ ropetest/contrib/autoimport/autoimporttest.py | 11 ++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/ropetest/conftest.py b/ropetest/conftest.py index d2efc68c6..1b6d1bb77 100644 --- a/ropetest/conftest.py +++ b/ropetest/conftest.py @@ -13,6 +13,13 @@ def project(): testutils.remove_project(project) +@pytest.fixture +def project2(): + project = testutils.sample_project("another_project") + yield project + testutils.remove_project(project) + + @pytest.fixture def project_path(project): yield pathlib.Path(project.address) diff --git a/ropetest/contrib/autoimport/autoimporttest.py b/ropetest/contrib/autoimport/autoimporttest.py index de05672c0..669106188 100644 --- a/ropetest/contrib/autoimport/autoimporttest.py +++ b/ropetest/contrib/autoimport/autoimporttest.py @@ -1,3 +1,5 @@ +import sqlite3 + from contextlib import closing, contextmanager from textwrap import dedent from unittest.mock import ANY, patch @@ -26,15 +28,18 @@ def database_list(connection): return list(connection.execute("PRAGMA database_list")) -def test_in_memory_database_share_cache(project): +def test_in_memory_database_share_cache(project, project2): ai1 = AutoImport(project, memory=True) ai2 = AutoImport(project, memory=True) + ai3 = AutoImport(project2, memory=True) + with ai1.connection: ai1.connection.execute("CREATE TABLE shared(data)") ai1.connection.execute("INSERT INTO shared VALUES(28)") - res = ai2.connection.execute("SELECT data FROM shared") - assert res.fetchone() == (28,) + assert ai2.connection.execute("SELECT data FROM shared").fetchone() == (28,) + with pytest.raises(sqlite3.OperationalError, match="no such table: shared"): + ai3.connection.execute("SELECT data FROM shared").fetchone() def test_autoimport_connection_parameter_with_in_memory( From 9f3c7a3de15c5578230068887d6a3602b3f5e589 Mon Sep 17 00:00:00 2001 From: tkrabel Date: Sat, 28 Oct 2023 23:39:34 +0200 Subject: [PATCH 06/12] add CHANGELOG --- CHANGELOG.md | 1 + ropetest/contrib/autoimport/autoimporttest.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b7c0104fa..31bab583a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ - #710, #561 Implement `except*` syntax (@lieryan) - #711 allow building documentation without having rope module installed (@kloczek) +- #719 Create in-memory databases with shared cache per project to support a one-thread-one-autoimport model in multithreading environments (@tkrabel) # Release 1.10.0 diff --git a/ropetest/contrib/autoimport/autoimporttest.py b/ropetest/contrib/autoimport/autoimporttest.py index 669106188..72a856457 100644 --- a/ropetest/contrib/autoimport/autoimporttest.py +++ b/ropetest/contrib/autoimport/autoimporttest.py @@ -29,17 +29,17 @@ def database_list(connection): def test_in_memory_database_share_cache(project, project2): - ai1 = AutoImport(project, memory=True) - ai2 = AutoImport(project, memory=True) + ai_1 = AutoImport(project, memory=True) + ai_2 = AutoImport(project, memory=True) - ai3 = AutoImport(project2, memory=True) + ai_3 = AutoImport(project2, memory=True) - with ai1.connection: - ai1.connection.execute("CREATE TABLE shared(data)") - ai1.connection.execute("INSERT INTO shared VALUES(28)") - assert ai2.connection.execute("SELECT data FROM shared").fetchone() == (28,) + with ai_1.connection: + ai_1.connection.execute("CREATE TABLE shared(data)") + ai_1.connection.execute("INSERT INTO shared VALUES(28)") + assert ai_2.connection.execute("SELECT data FROM shared").fetchone() == (28,) with pytest.raises(sqlite3.OperationalError, match="no such table: shared"): - ai3.connection.execute("SELECT data FROM shared").fetchone() + ai_3.connection.execute("SELECT data FROM shared").fetchone() def test_autoimport_connection_parameter_with_in_memory( From ab9c09d313dcaedb18a35ef12ee1f52d9d0ab965 Mon Sep 17 00:00:00 2001 From: tkrabel Date: Sat, 28 Oct 2023 23:41:22 +0200 Subject: [PATCH 07/12] update CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 31bab583a..bebf79e29 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ - #710, #561 Implement `except*` syntax (@lieryan) - #711 allow building documentation without having rope module installed (@kloczek) -- #719 Create in-memory databases with shared cache per project to support a one-thread-one-autoimport model in multithreading environments (@tkrabel) +- #719 Allow in-memory databases being shared across threads (@tkrabel) # Release 1.10.0 From ba65f2eb88dbd7fd6a84544829fd8e75140f088b Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Sun, 29 Oct 2023 18:44:59 +1100 Subject: [PATCH 08/12] Fix grammar --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bebf79e29..73c035973 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ - #710, #561 Implement `except*` syntax (@lieryan) - #711 allow building documentation without having rope module installed (@kloczek) -- #719 Allow in-memory databases being shared across threads (@tkrabel) +- #719 Allows the in-memory db to be shared across threads (@tkrabel) # Release 1.10.0 From 50ac2197db987fd48d5aeab2ce7e54c3c03de175 Mon Sep 17 00:00:00 2001 From: tkrabel Date: Sun, 29 Oct 2023 10:29:59 +0100 Subject: [PATCH 09/12] black format --- rope/contrib/autoimport/sqlite.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/rope/contrib/autoimport/sqlite.py b/rope/contrib/autoimport/sqlite.py index a3e3085f8..7fda073cb 100644 --- a/rope/contrib/autoimport/sqlite.py +++ b/rope/contrib/autoimport/sqlite.py @@ -154,10 +154,16 @@ def create_database_connection( if memory or project is None or project.ropefolder is None: # Allows the in-memory db to be shared across threads # See https://www.sqlite.org/inmemorydb.html - project_hash = hash(project and project.ropefolder and project.ropefolder.real_path) - return sqlite3.connect(f"file:memdb{project_hash}:?mode=memory&cache=shared", uri=True) + project_hash = hash( + project and project.ropefolder and project.ropefolder.real_path + ) + return sqlite3.connect( + f"file:memdb{project_hash}:?mode=memory&cache=shared", uri=True + ) else: - return sqlite3.connect(str(Path(project.ropefolder.real_path) / "autoimport.db")) + return sqlite3.connect( + str(Path(project.ropefolder.real_path) / "autoimport.db") + ) def _setup_db(self): models.Metadata.create_table(self.connection) From 18f9f4ab5e80735f26202f156d7c792e1bdc1146 Mon Sep 17 00:00:00 2001 From: tkrabel Date: Wed, 1 Nov 2023 18:26:38 +0100 Subject: [PATCH 10/12] address comment --- rope/contrib/autoimport/sqlite.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/rope/contrib/autoimport/sqlite.py b/rope/contrib/autoimport/sqlite.py index 7fda073cb..6c7a953f4 100644 --- a/rope/contrib/autoimport/sqlite.py +++ b/rope/contrib/autoimport/sqlite.py @@ -2,6 +2,7 @@ import contextlib import json +import random import re import sqlite3 import sys @@ -154,9 +155,13 @@ def create_database_connection( if memory or project is None or project.ropefolder is None: # Allows the in-memory db to be shared across threads # See https://www.sqlite.org/inmemorydb.html - project_hash = hash( - project and project.ropefolder and project.ropefolder.real_path - ) + project_hash: int + if project is None: + project_hash = random.randint(100_000_000, 999_999_999) + elif project.ropefolder is None: + project_hash = hash(project.address) + else: + project_hash = hash(project.ropefolder.real_path) return sqlite3.connect( f"file:memdb{project_hash}:?mode=memory&cache=shared", uri=True ) From 9856c02dca517226831cbd0d88301b06235fe4c9 Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Mon, 6 Nov 2023 01:47:30 +1100 Subject: [PATCH 11/12] Change to use sha256 hashing hash() is intended to be used for sets and dictionaries, where the occasional collisions are expected. Also, in some versions of Python, hash() is affected by hash randomization, it's not clear whether this will be desirable. --- rope/contrib/autoimport/sqlite.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/rope/contrib/autoimport/sqlite.py b/rope/contrib/autoimport/sqlite.py index 76049ca1e..5791ea977 100644 --- a/rope/contrib/autoimport/sqlite.py +++ b/rope/contrib/autoimport/sqlite.py @@ -2,7 +2,8 @@ import contextlib import json -import random +from hashlib import sha256 +import secrets import re import sqlite3 import sys @@ -154,18 +155,21 @@ def create_database_connection( memory : bool if true, don't persist to disk """ + def calculate_project_hash(data: str) -> str: + return sha256(data.encode()).hexdigest() + if not memory and project is None: raise Exception("if memory=False, project must be provided") if memory or project is None or project.ropefolder is None: # Allows the in-memory db to be shared across threads # See https://www.sqlite.org/inmemorydb.html - project_hash: int + project_hash: str if project is None: - project_hash = random.randint(100_000_000, 999_999_999) + project_hash = secrets.token_hex() elif project.ropefolder is None: - project_hash = hash(project.address) + project_hash = calculate_project_hash(project.address) else: - project_hash = hash(project.ropefolder.real_path) + project_hash = calculate_project_hash(project.ropefolder.real_path) return sqlite3.connect( f"file:memdb{project_hash}:?mode=memory&cache=shared", uri=True ) From f75467d679b72770f4574b21329e232dadc5ba4c Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Mon, 6 Nov 2023 01:53:34 +1100 Subject: [PATCH 12/12] Change in-memory database name prefix `memdb` -> `rope-` This better clearly uniquely identify the database. While the likelihood of a problem in practice should be really, really small, in theory because rope is a library, it may share its memory space with other libraries, which may have used the same naming scheme. --- rope/contrib/autoimport/sqlite.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rope/contrib/autoimport/sqlite.py b/rope/contrib/autoimport/sqlite.py index 5791ea977..f02d426ba 100644 --- a/rope/contrib/autoimport/sqlite.py +++ b/rope/contrib/autoimport/sqlite.py @@ -155,6 +155,7 @@ def create_database_connection( memory : bool if true, don't persist to disk """ + def calculate_project_hash(data: str) -> str: return sha256(data.encode()).hexdigest() @@ -171,7 +172,7 @@ def calculate_project_hash(data: str) -> str: else: project_hash = calculate_project_hash(project.ropefolder.real_path) return sqlite3.connect( - f"file:memdb{project_hash}:?mode=memory&cache=shared", uri=True + f"file:rope-{project_hash}:?mode=memory&cache=shared", uri=True ) else: return sqlite3.connect(