forked from sailuh/kaiaulu
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adds interfaces to create bipartite projections and temporal collaboration networks from a previously parsed gitlog or from gitlog entities. An additional configuration file is added to keep track of the CLI parameter choices. Signed-off-by: Nicole Hoess <[email protected]>
- Loading branch information
1 parent
8f232a3
commit b258032
Showing
3 changed files
with
234 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
# -*- yaml -*- | ||
# https://github.com/sailuh/kaiaulu | ||
# | ||
# Copying and distribution of this file, with or without modification, | ||
# are permitted in any medium without royalty provided the copyright | ||
# notice and this notice are preserved. This file is offered as-is, | ||
# without any warranty. | ||
|
||
# CLI Configuration File # | ||
# | ||
# To perform analysis with kaiaulu's CLI, you need to specify several | ||
# configuration options which are otherwise passed in function calls | ||
# directly. This file serves to keep track of these parameters and make | ||
# their choice available to others. | ||
# | ||
# Note that this configuration file is an extension to the standard | ||
# project configuration file <project>.yml. | ||
# Please check https://github.com/sailuh/kaiaulu/tree/master/conf to | ||
# see if a project configuration file already exists. Otherwise, we | ||
# would appreciate if you share your curated file with us by sending a | ||
# Pull Request: https://github.com/sailuh/kaiaulu/pulls | ||
# | ||
# Not all of these parameters might be relevant for your analysis. | ||
# | ||
# Please comment unused parameters instead of deleting them for clarity. | ||
# If you have questions, please open a discussion: | ||
# https://github.com/sailuh/kaiaulu/discussions | ||
|
||
graph: | ||
bipartite: | ||
# When creating bipartite networks, you can choose between different | ||
# combinations of authors, committers, files and entities to connect. | ||
# Make sure to prepare and pass a suitable parsed git log to the CLI. | ||
# File network options: author-file, commit-file | ||
# Entity network options: author-entity, committer-entity, | ||
# commit-entity, author-committer | ||
network_type: author-entity | ||
# When creating a bipartite projection, you can choose whether to | ||
# apply it to the first or second node. | ||
mode: TRUE # TRUE: first node | ||
# Networks can be directed or undirected. | ||
directed: TRUE | ||
# The weight scheme will determine how the edge weights between nodes | ||
# are calculated. | ||
# Options: weight_scheme_count_deleted_nodes, weight_scheme_sum_edges, | ||
# weight_scheme_cum_temporal, weight_scheme_pairwise_cum_temporal | ||
weight_scheme: weight_scheme_sum_edges | ||
temporal: | ||
# When calculating the temporal collaboration network, you can | ||
# choose whether to calculate collaborations based on files or entities. | ||
network_type: entity | ||
# You can choose between author or committer collaboration. | ||
mode: author | ||
# Networks can be directed or undirected. | ||
directed: TRUE | ||
# You may consider only the last or all preceding developers to | ||
# calculate the temporal network's edge weights. | ||
# Options: one_lag, all_lag | ||
lag: all_lag | ||
# The weight scheme will determine how the edge weights between | ||
# nodes are calculated. | ||
# Options: weight_scheme_count_deleted_nodes, weight_scheme_sum_edges, | ||
# weight_scheme_cum_temporal, weight_scheme_pairwise_cum_temporal | ||
weight_scheme: weight_scheme_pairwise_cum_temporal |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,169 @@ | ||
#!/usr/local/bin/Rscript | ||
|
||
# Kaiaulu - https://github.com/sailuh/kaiaulu | ||
# | ||
# This Source Code Form is subject to the terms of the Mozilla Public | ||
# License, v. 2.0. If a copy of the MPL was not distributed with this | ||
# file, You can obtain one at https://mozilla.org/MPL/2.0/. | ||
|
||
|
||
require(yaml,quietly=TRUE) | ||
require(cli,quietly=TRUE) | ||
require(docopt,quietly=TRUE) | ||
require(kaiaulu,quietly=TRUE) | ||
require(igraph,quietly=TRUE) | ||
|
||
|
||
|
||
doc <- " | ||
USAGE: | ||
graph.R bipartite help | ||
graph.R bipartite <tools.yml> <project_conf.yml> <cli_conf.yml> <gitlog_file_name_path> <save_file_name_path> | ||
graph.R temporal help | ||
graph.R temporal <tools.yml> <project_conf.yml> <cli_conf.yml> <gitlog_file_name_path> <save_file_name_path> | ||
graph.R (-h | --help) | ||
graph.R --version | ||
DESCRIPTION: | ||
Provides a suite of functions for network construction. Please see | ||
Kaiaulu's README.md for instructions on how to create <tool.yml> | ||
and <project_conf.yml>. An additional <cli_conf.yml> is needed to | ||
specify network construction parameters such as the coice between | ||
entity or file network, mode for bipartite projection, number of | ||
lags, edge type and edge weight scheme. See kaiaulu_cli.yml for | ||
examples. | ||
OPTIONS: | ||
-h --help Show this screen. | ||
--version Show version. | ||
" | ||
|
||
|
||
|
||
arguments <- docopt::docopt(doc, version = 'Kaiaulu 0.0.0.9600') | ||
if(arguments[["bipartite"]] & arguments[["help"]]){ | ||
cli_alert_info("Creates a bipartite graph projection from a | ||
parsed git (entity) log using | ||
transform_gitlog_to_bipartite_network(), | ||
transform_gitlog_to_entity_bipartite_network() | ||
and bipartite_graph_projection().") | ||
}else if(arguments[["bipartite"]]){ | ||
|
||
tools_path <- arguments[["<tools.yml>"]] | ||
conf_path <- arguments[["<project_conf.yml>"]] | ||
cli_path <- arguments[["<cli_conf.yml>"]] | ||
gitlog_path <- arguments[["<gitlog_file_name_path>"]] | ||
save_path <- arguments[["<save_file_name_path>"]] | ||
|
||
tool <- yaml::read_yaml(tools_path) | ||
conf <- yaml::read_yaml(conf_path) | ||
cli <- yaml::read_yaml(cli_path) | ||
|
||
network_type <- cli[["graph"]][["bipartite"]][["network_type"]] | ||
mode <- cli[["graph"]][["bipartite"]][["mode"]] | ||
directed <- cli[["graph"]][["bipartite"]][["directed"]] | ||
weight_scheme <- cli[["graph"]][["bipartite"]][["weight_scheme"]] | ||
|
||
# Read git log | ||
project_git <- data.table::fread(gitlog_path) | ||
|
||
if (nrow(project_git) > 0){ | ||
# Bipartite network | ||
if (endsWith(network_type, "file")) { | ||
bipartite_network <- transform_gitlog_to_bipartite_network(project_git, | ||
mode = network_type) | ||
}else{ | ||
bipartite_network <- transform_gitlog_to_entity_bipartite_network(project_git, | ||
mode = network_type) | ||
} | ||
|
||
if (length(bipartite_network[["edgelist"]]) > 1){ | ||
# Bipartite projection | ||
bipartite_projection <- bipartite_graph_projection(bipartite_network, | ||
mode=mode, | ||
weight_scheme_function=get(weight_scheme)) | ||
|
||
# Save adjacency matrix | ||
graph_bipartite_projection <- igraph::graph_from_data_frame(d=bipartite_projection[["edgelist"]], | ||
directed = directed, | ||
vertices = bipartite_projection[["nodes"]]) | ||
adjacency_matrix <- as_adjacency_matrix(graph_bipartite_projection, | ||
attr = "weight", sparse = F) | ||
adjacency_matrix <- as.data.frame(adjacency_matrix) | ||
rownames(adjacency_matrix) <- colnames(adjacency_matrix) | ||
|
||
data.table::fwrite(adjacency_matrix,save_path,row.names=T) | ||
cli_alert_success(paste0("Adjacency matrix for bipartite projection | ||
was saved at: ",save_path)) | ||
}else{ | ||
cli_alert_warning(paste0("Egde list is empty. Empty adjacency matrix | ||
was not saved.")) | ||
} | ||
}else{ | ||
cli_alert_warning(paste0("Git log is empty. Empty adjacency matrix | ||
was not saved.")) | ||
} | ||
|
||
}else if(arguments[["temporal"]] & arguments[["help"]]){ | ||
cli_alert_info("Creates a temporal collaboration network from a | ||
parsed git (entity) log using | ||
transform_gitlog_to_temporal_network() and | ||
transform_gitlog_to_entity_temporal_network().") | ||
}else if(arguments[["temporal"]]){ | ||
|
||
tools_path <- arguments[["<tools.yml>"]] | ||
conf_path <- arguments[["<project_conf.yml>"]] | ||
cli_path <- arguments[["<cli_conf.yml>"]] | ||
gitlog_path <- arguments[["<gitlog_file_name_path>"]] | ||
save_path <- arguments[["<save_file_name_path>"]] | ||
|
||
tool <- yaml::read_yaml(tools_path) | ||
conf <- yaml::read_yaml(conf_path) | ||
cli <- yaml::read_yaml(cli_path) | ||
|
||
network_type <- cli[["graph"]][["temporal"]][["network_type"]] | ||
mode <- cli[["graph"]][["temporal"]][["mode"]] | ||
directed <- cli[["graph"]][["temporal"]][["directed"]] | ||
lag <- cli[["graph"]][["temporal"]][["lag"]] | ||
weight_scheme <- cli[["graph"]][["temporal"]][["weight_scheme"]] | ||
|
||
# Read git log | ||
project_git <- data.table::fread(gitlog_path) | ||
|
||
if (nrow(project_git) > 0){ | ||
# Temporal network | ||
if (network_type=="file"){ | ||
temporal_network <- transform_gitlog_to_temporal_network(project_git, | ||
mode = mode, lag = lag, | ||
weight_scheme_function = get(weight_scheme)) | ||
|
||
}else{ | ||
temporal_network <- transform_gitlog_to_entity_temporal_network(project_git, | ||
mode = mode, lag = lag, | ||
weight_scheme_function = get(weight_scheme)) | ||
} | ||
|
||
if (length(temporal_network[["edgelist"]]) > 1){ | ||
# Save adjacency matrix | ||
graph_temporal_network <- igraph::graph_from_data_frame(d=temporal_network[["edgelist"]], | ||
directed = directed, | ||
vertices = temporal_network[["nodes"]]) | ||
adjacency_matrix <- as_adjacency_matrix(graph_temporal_network, | ||
attr = "weight", sparse = F) | ||
adjacency_matrix <- as.data.frame(adjacency_matrix) | ||
rownames(adjacency_matrix) <- colnames(adjacency_matrix) | ||
|
||
data.table::fwrite(adjacency_matrix,save_path,row.names=T) | ||
|
||
cli_alert_success(paste0("Adjacency matrix for temporal network was | ||
saved at: ",save_path)) | ||
}else{ | ||
cli_alert_warning(paste0("Egde list is empty. Empty adjacency matrix | ||
was not saved.")) | ||
} | ||
}else{ | ||
cli_alert_warning(paste0("Git log is empty. Empty adjacency matrix | ||
was not saved.")) | ||
} | ||
} |