Skip to content

Commit

Permalink
a basic command to perform a corpus run
Browse files Browse the repository at this point in the history
  • Loading branch information
Byron committed Jun 14, 2023
1 parent 12f4cec commit d9e74ff
Show file tree
Hide file tree
Showing 6 changed files with 162 additions and 5 deletions.
11 changes: 7 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,12 @@ max = ["max-control", "fast", "gitoxide-core-blocking-client", "http-client-curl
max-pure = ["max-control", "gix-features/rustsha1", "gix-features/zlib-rust-backend", "http-client-reqwest", "gitoxide-core-blocking-client" ]

## Like `max`, but with more control for configuration. See the *Package Maintainers* headline for more information.
max-control = ["fast-safe", "pretty-cli", "gitoxide-core-tools-query", "gitoxide-core-tools", "prodash-render-line", "prodash-render-tui", "prodash/render-line-autoconfigure", "gix/regex" ]
max-control = ["fast-safe", "pretty-cli", "gitoxide-core-tools-query", "gitoxide-core-tools-corpus", "gitoxide-core-tools", "prodash-render-line", "prodash-render-tui", "prodash/render-line-autoconfigure", "gix/regex" ]

## All of the good stuff, with less fanciness for smaller binaries.
##
## As fast as possible, progress line rendering, all transports based on their most mature implementation (HTTP), all `ein` tools, CLI colors and local-time support, JSON output.
lean = ["fast", "pretty-cli", "http-client-curl", "gitoxide-core-tools-query", "gitoxide-core-tools", "gitoxide-core-blocking-client", "prodash-render-line" ]
lean = ["fast", "pretty-cli", "http-client-curl", "gitoxide-core-tools-query", "gitoxide-core-tools-corpus", "gitoxide-core-tools", "gitoxide-core-blocking-client", "prodash-render-line" ]

## The smallest possible build, best suitable for small single-core machines.
##
Expand All @@ -67,7 +67,7 @@ small = ["pretty-cli", "gix-features/rustsha1", "gix-features/zlib-rust-backend"
##
## Due to async client-networking not being implemented for most transports, this one supports only the 'git+tcp' and HTTP transport.
## It uses, however, a fully asynchronous networking implementation which can serve a real-world example on how to implement custom async transports.
lean-async = ["fast", "pretty-cli", "gitoxide-core-tools", "gitoxide-core-tools-query", "gitoxide-core-async-client", "prodash-render-line"]
lean-async = ["fast", "pretty-cli", "gitoxide-core-tools", "gitoxide-core-tools-query", "gitoxide-core-tools-corpus", "gitoxide-core-async-client", "prodash-render-line"]

#! ### Package Maintainers
#! `*-control` features leave it to you to configure C libraries, involving choices for `zlib`, ! hashing and transport implementation.
Expand Down Expand Up @@ -128,7 +128,10 @@ cache-efficiency-debug = ["gix-features/cache-efficiency-debug"]
gitoxide-core-tools = ["gitoxide-core/organize", "gitoxide-core/estimate-hours"]

## A program to perform analytics on a `git` repository, using an auto-maintained sqlite database
gitoxide-core-tools-query = ["gitoxide-core-tools", "gitoxide-core/query"]
gitoxide-core-tools-query = ["gitoxide-core/query"]

## A program to run algorithms on a corpus of repositories, recording each run for later comparison.
gitoxide-core-tools-corpus = ["gitoxide-core/corpus"]

#! ### Building Blocks for mutually exclusive networking
#! Blocking and async features are mutually exclusive and cause a compile-time error. This also means that `cargo … --all-features` will fail.
Expand Down
4 changes: 3 additions & 1 deletion gitoxide-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ organize = ["dep:gix-url", "dep:jwalk"]
estimate-hours = ["dep:itertools", "dep:fs-err", "dep:crossbeam-channel", "dep:smallvec"]
## Gather information about repositories and store it in a database for easy querying.
query = ["dep:rusqlite"]
## Run algorithms on a corpus of repositories and store their results for later comparison and intelligence gathering.
corpus = ["dep:rusqlite"]

#! ### Mutually Exclusive Networking
#! If both are set, _blocking-client_ will take precedence, allowing `--all-features` to be used.
Expand Down Expand Up @@ -66,7 +68,7 @@ fs-err = { version = "2.6.0", optional = true }
crossbeam-channel = { version = "0.5.6", optional = true }
smallvec = { version = "1.10.0", optional = true }

# for 'query'
# for 'query' and 'corpus'
rusqlite = { version = "0.29.0", optional = true, features = ["bundled"] }

# for svg graph output
Expand Down
107 changes: 107 additions & 0 deletions gitoxide-core/src/corpus/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
pub struct Engine<P> {
progress: P,
con: rusqlite::Connection,
}

pub mod engine {
use crate::corpus::Engine;
use anyhow::Context;
use std::path::PathBuf;

impl<P> Engine<P>
where
P: gix::Progress,
{
/// Open the corpus DB or create it.
pub fn open_or_create(db: PathBuf, progress: P) -> anyhow::Result<Engine<P>> {
let con = crate::corpus::db::create(db).context("Could not open or create database")?;
Ok(Engine { progress, con })
}

/// Run on the existing set of repositories we have already seen or obtain them from `path` if there is none yet.
pub fn run(&self, _path: PathBuf) -> anyhow::Result<()> {
todo!()
}
}
}

pub mod db {
use anyhow::bail;
use rusqlite::{params, OptionalExtension};

/// A version to be incremented whenever the database layout is changed, to refresh it automatically.
const VERSION: usize = 1;

pub fn create(path: impl AsRef<std::path::Path>) -> anyhow::Result<rusqlite::Connection> {
let path = path.as_ref();
let con = rusqlite::Connection::open(path)?;
let meta_table = r#"
CREATE TABLE if not exists meta(
version int
)"#;
con.execute_batch(meta_table)?;
let version: Option<usize> = con.query_row("SELECT version FROM meta", [], |r| r.get(0)).optional()?;
match version {
None => {
con.execute("INSERT into meta(version) values(?)", params![VERSION])?;
}
Some(version) if version != VERSION => match con.close() {
Ok(()) => {
bail!("Cannot handle database with version {version}, cannot yet migrate to {VERSION} - maybe migrate by hand?");
}
Err((_, err)) => return Err(err.into()),
},
_ => {}
}
con.execute_batch(
r#"
CREATE TABLE if not exists runner(
vendor text,
brand text,
host_name text, -- this is just to help ID the runner
UNIQUE (vendor, brand)
)
"#,
)?;
con.execute_batch(
r#"
CREATE TABLE if not exists corpus(
root text UNIQUE -- the root path of all repositories we want to consider, as canonicalized path
)
"#,
)?;
con.execute_batch(
r#"
CREATE TABLE if not exists repository(
rela_path text UNIQUE, -- the path to the repository on disk, relative to the corpus root path, without leading `./` or `.\`
corpus integer,
FOREIGN KEY (corpus) REFERENCES corpus (rowid)
)
"#,
)?;
con.execute_batch(
r#"
CREATE TABLE if not exists gix_version(
version text UNIQUE -- the unique git version via gix describe
)
"#,
)?;
con.execute_batch(
r#"
CREATE TABLE if not exists run(
repository integer,
runner integer,
gix_version integer,
start_time integer,
end_time integer, -- or NULL if not yet finished (either successfull or with failure)
error text, -- or NULL if there was on error
FOREIGN KEY (repository) REFERENCES repository (rowid),
FOREIGN KEY (runner) REFERENCES runner (rowid),
FOREIGN KEY (gix_version) REFERENCES gix_version (rowid)
)
"#,
)?;

Ok(con)
}
}
2 changes: 2 additions & 0 deletions gitoxide-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ impl FromStr for OutputFormat {
}

pub mod commitgraph;
#[cfg(feature = "corpus")]
pub mod corpus;
pub mod net;

#[cfg(feature = "estimate-hours")]
Expand Down
14 changes: 14 additions & 0 deletions src/plumbing/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,20 @@ pub fn main() -> Result<()> {
})?;

match cmd {
#[cfg(feature = "gitoxide-core-tools-corpus")]
Subcommands::Corpus(crate::plumbing::options::corpus::Platform { db, path, cmd }) => prepare_and_run(
"corpus",
auto_verbose,
progress,
progress_keep_open,
None,
move |progress, _out, _err| {
let engine = core::corpus::Engine::open_or_create(db, progress)?;
match cmd {
crate::plumbing::options::corpus::SubCommands::Run => engine.run(path),
}
},
),
Subcommands::CommitGraph(cmd) => match cmd {
commitgraph::Subcommands::List { spec } => prepare_and_run(
"commitgraph-list",
Expand Down
29 changes: 29 additions & 0 deletions src/plumbing/options/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,40 @@ pub enum Subcommands {
/// Show which git configuration values are used or planned.
ConfigTree,
Config(config::Platform),
#[cfg(feature = "gitoxide-core-tools-corpus")]
Corpus(corpus::Platform),
/// Subcommands that need no git repository to run.
#[clap(subcommand)]
Free(free::Subcommands),
}

#[cfg(feature = "gitoxide-core-tools-corpus")]
pub mod corpus {
use std::path::PathBuf;

#[derive(Debug, clap::Parser)]
#[command(
about = "run algorithms on a corpus of git repositories and store their results for later analysis",
version = clap::crate_version!(), // TODO: make this an actual version that is git describe, leverage `gix`
)]
pub struct Platform {
/// The path to the database to read and write depending on the sub-command.
#[arg(long, default_value = "corpus.db")]
pub db: PathBuf,
/// The path to the root of the corpus to search repositories in.
#[arg(long, short = 'p', default_value = ".")]
pub path: PathBuf,
#[clap(subcommand)]
pub cmd: SubCommands,
}

#[derive(Debug, clap::Subcommand)]
pub enum SubCommands {
/// Perform a corpus run on all registered repositories.
Run,
}
}

pub mod config {

use gix::bstr::BString;
Expand Down

0 comments on commit d9e74ff

Please sign in to comment.