Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for distributed apps #10

Merged
merged 27 commits into from
Sep 23, 2024
Merged
Changes from 1 commit
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
add postgres seeds
* add postgres seeds
lalabuy948 committed Sep 15, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
commit cb9cdb9461cd689c0049543c6703e718e1a10acf
10 changes: 3 additions & 7 deletions priv/repo/seed_data.exs
Original file line number Diff line number Diff line change
@@ -63,7 +63,7 @@ defmodule SeedData do
device_type: Utility.get_device_type(Enum.random(@user_agents)),
session_id: UUID.uuid4(),
session_page_views: if(:rand.uniform() < 0.9, do: 1, else: :rand.uniform(5) + 1),
inserted_at: PhoenixAnalytics.Services.Utility.inserted_at()
inserted_at: random_inserted_at()
}
end

@@ -82,14 +82,10 @@ defmodule SeedData do
end

defp random_inserted_at do
now = NaiveDateTime.utc_now()
random_seconds = :rand.uniform(360 * 24 * 60 * 60)
random_time = NaiveDateTime.add(now, -random_seconds, :second)
format_timestamp(random_time)
end

defp format_timestamp(datetime) do
datetime
NaiveDateTime.utc_now()
|> NaiveDateTime.add(-random_seconds, :second)
|> NaiveDateTime.truncate(:millisecond)
|> NaiveDateTime.to_string()
|> String.replace("T", " ")
83 changes: 83 additions & 0 deletions priv/repo/seeds_postgres.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# Example seed data

Code.require_file("./priv/repo/seed_data.exs")

{:ok, db} = Duckdbex.open()
{:ok, conn} = Duckdbex.connection(db)

Duckdbex.query(conn, "INSTALL postgres;")
Duckdbex.query(conn, "LOAD postgres;")

Duckdbex.query(
conn,
"ATTACH 'dbname=postgres user=phoenix password=analytics host=localhost' AS postgres_db (TYPE POSTGRES);"
)
|> IO.inspect()

query = """
CREATE TABLE IF NOT EXISTS postgres_db.requests (
request_id UUID PRIMARY KEY,
method VARCHAR NOT NULL,
path VARCHAR NOT NULL,
status_code SMALLINT NOT NULL,
duration_ms INTEGER NOT NULL,
user_agent VARCHAR,
remote_ip VARCHAR,
referer VARCHAR,
device VARCHAR,
session_id UUID,
session_page_views INTEGER,
inserted_at TIMESTAMP
);
"""

Duckdbex.query(conn, query) |> IO.inspect()

batch_size = 1_000

1..1_000_000
|> Enum.chunk_every(batch_size)
|> Enum.with_index(1)
|> Enum.each(fn {data, index} ->
batch =
Task.async_stream(data, fn _ ->
SeedData.generate_request_data()
end)
|> Enum.map(fn {:ok, result} -> result end)

columns =
~w(request_id method path status_code duration_ms user_agent remote_ip referer device session_id session_page_views inserted_at)

placeholders = List.duplicate("?", length(columns)) |> Enum.join(", ")
batch_size = length(batch)
values_placeholders = List.duplicate("(#{placeholders})", batch_size) |> Enum.join(", ")

query =
"INSERT INTO postgres_db.requests (#{Enum.join(columns, ", ")}) VALUES #{values_placeholders};"

params =
Enum.flat_map(
batch,
fn request_data ->
[
request_data.request_id,
request_data.method,
request_data.path,
request_data.status_code,
request_data.duration_ms,
request_data.user_agent,
request_data.remote_ip,
request_data.referer,
request_data.device_type,
request_data.session_id,
request_data.session_page_views,
request_data.inserted_at
]
end
)

{:ok, stmt_ref} = Duckdbex.prepare_statement(conn, query)
{:ok, _result_ref} = Duckdbex.execute_statement(stmt_ref, params)

IO.inspect(label: "Chunk #{index} - Number of Records: #{index * batch_size}")
end)