Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensure that there are no paralellization hickups with xargs #16

Merged
merged 2 commits into from
Dec 14, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 16 additions & 15 deletions scripts/gitlogg-generate-log.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,13 @@
my_dir="$(dirname "$0")"
cd $my_dir

source "colors.sh"
source "gitlogg-utils.sh"

cd ..

# define the absolute path to the directory that contains all your repositories.
yourpath='./_repos/'

# define temporary 'git log' output file that will be parsed to 'json'
tempOutputFile='_tmp/gitlogg.tmp'

# ensure file exists
mkdir -p ${tempOutputFile%%.*}
touch $tempOutputFile

# name and path to this very script, for output message purposes
thisFile='./scripts/gitlogg-generate-log.sh'

Expand Down Expand Up @@ -49,15 +42,15 @@ echo -e "${Blu}Info: Calculating in $NUM_THREADS thread(s)${RCol}"
# ensure there's always a '/' at the end of the 'yourpath' variable, since its value can be changed by user.
case "$yourpath" in
*/)
yourpathSanitized="${yourpath}" # no changes if there's already a slash at the end - syntax sugar
yourPathSanitized="${yourpath}" # no changes if there's already a slash at the end - syntax sugar
;;
*)
yourpathSanitized="${yourpath}/" # add a slash at the end if there isn't already one
yourPathSanitized="${yourpath}/" # add a slash at the end if there isn't already one
;;
esac

# 'thepath' sets the path to each repository under 'yourpath' (the trailing asterix [*/] represents all the repository folders).
thepath="${yourpathSanitized}*/"
thepath="${yourPathSanitized}*/"


# function to trim whitespace
Expand Down Expand Up @@ -85,18 +78,26 @@ fi
SECONDS=0

# if the path exists and is not empty
if [ -d "${yourpathSanitized}" ] && [ "$(ls $yourpathSanitized)" ]; then
if [ -d "${yourPathSanitized}" ] && [ "$(ls $yourPathSanitized)" ]; then
echo -e "${Yel}Generating ${Pur}git log ${Yel}for ${reporef} located at ${Red}'${thepath}'${Yel}. ${Blu}This might take a while!${RCol}"

# ensure file exists or create it
mkdir -p ${tempOutputFile%%.*}

dirs=$(ls -d $thepath)
echo $dirs | xargs -n 1 -P $NUM_THREADS $workerFile > ${tempOutputFile}
echo $dirs | xargs -n 1 -P $NUM_THREADS $workerFile

cat $tempOutputFile.part.* > $tempOutputFile
rm $tempOutputFile.part.*

echo -e "${Gre}The file ${Blu}${tempOutputFile} ${Gre}generated in${RCol}: ${SECONDS}s" &&
babel "${jsonParser}" | node # only parse JSON if we have a source to parse it from
# if the path exists but is empty
elif [ -d "${yourpathSanitized}" ] && [ ! "$(ls $yourpathSanitized)" ]; then
elif [ -d "${yourPathSanitized}" ] && [ ! "$(ls $yourPathSanitized)" ]; then
echo -e "${Whi}[ERROR 002]: ${Yel}The path to the local repositories ${Red}'${yourpath}'${Yel}, which is set on the file ${Blu}'${thisFile}' ${UYel}exists, but is empty!${RCol}"
echo -e "${Yel}Please move the repos to ${Red}'${yourpath}'${Yel} or update the variable ${Pur}'yourpath'${Yel} to reflect the absolute path to the directory where the repos are located.${RCol}"
# if the path does not exists
elif [ ! -d "${yourpathSanitized}" ]; then
elif [ ! -d "${yourPathSanitized}" ]; then
echo -e "${Whi}[ERROR 001]: ${Yel}The path to the local repositories ${Red}'${yourpath}'${Yel}, which is set on the file ${Blu}'${thisFile}' ${UYel}does not exist!${RCol}"
echo -e "${Yel}Please create ${Red}'${yourpath}'${Yel} and move the repos under it, or update the variable ${Pur}'yourpath'${Yel} to reflect the absolute path to the directory where the repos are located.${RCol}"
fi
4 changes: 4 additions & 0 deletions scripts/colors.sh → scripts/gitlogg-utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,7 @@ Blu='\033[0;34m'; BBlu='\033[1;34m'; UBlu='\033[4;34m'; IBlu='\033[0;9
Pur='\033[0;35m'; BPur='\033[1;35m'; UPur='\033[4;35m'; IPur='\033[0;95m'; BIPur='\033[1;95m'; On_Pur='\033[45m'; On_IPur='\033[0;105m';
Cya='\033[0;36m'; BCya='\033[1;36m'; UCya='\033[4;36m'; ICya='\033[0;96m'; BICya='\033[1;96m'; On_Cya='\033[46m'; On_ICya='\033[0;106m';
Whi='\033[0;37m'; BWhi='\033[1;37m'; UWhi='\033[4;37m'; IWhi='\033[0;97m'; BIWhi='\033[1;97m'; On_Whi='\033[47m'; On_IWhi='\033[0;107m';

# define temporary 'git log' output file that will be parsed to 'json'
tempOutputFile='_tmp/gitlogg.tmp'

7 changes: 3 additions & 4 deletions scripts/output-intermediate-gitlog.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,14 @@
my_dir="$(dirname "$0")"
cd $my_dir

source "colors.sh"
source "gitlogg-utils.sh"

cd ..

test "$1" || exit 1
dir=$1

cd $dir &&
echo -e "${Whi}Outputting ${Pur}${PWD##*/}${RCol}" >&2 &&
git log --all --no-merges --shortstat --reverse --pretty=format:'commits\trepository\t'"${PWD##*/}"'\tcommit_hash\t%H\tcommit_hash_abbreviated\t%h\ttree_hash\t%T\ttree_hash_abbreviated\t%t\tparent_hashes\t%P\tparent_hashes_abbreviated\t%p\tauthor_name\t%an\tauthor_name_mailmap\t%aN\tauthor_email\t%ae\tauthor_email_mailmap\t%aE\tauthor_date\t%ad\tauthor_date_RFC2822\t%aD\tauthor_date_relative\t%ar\tauthor_date_unix_timestamp\t%at\tauthor_date_iso_8601\t%ai\tauthor_date_iso_8601_strict\t%aI\tcommitter_name\t%cn\tcommitter_name_mailmap\t%cN\tcommitter_email\t%ce\tcommitter_email_mailmap\t%cE\tcommitter_date\t%cd\tcommitter_date_RFC2822\t%cD\tcommitter_date_relative\t%cr\tcommitter_date_unix_timestamp\t%ct\tcommitter_date_iso_8601\t%ci\tcommitter_date_iso_8601_strict\t%cI\tref_names\t%d\tref_names_no_wrapping\t%D\tencoding\t%e\tsubject\t%s\tsubject_sanitized\t%f\tcommit_notes\t%N\tstats\t' |
cd $dir && echo -e "${Whi}Outputting ${Pur}${PWD##*/}${RCol}" >&2 && (git log --all --no-merges --shortstat --reverse --pretty=format:'commits\trepository\t'"${PWD##*/}"'\tcommit_hash\t%H\tcommit_hash_abbreviated\t%h\ttree_hash\t%T\ttree_hash_abbreviated\t%t\tparent_hashes\t%P\tparent_hashes_abbreviated\t%p\tauthor_name\t%an\tauthor_name_mailmap\t%aN\tauthor_email\t%ae\tauthor_email_mailmap\t%aE\tauthor_date\t%ad\tauthor_date_RFC2822\t%aD\tauthor_date_relative\t%ar\tauthor_date_unix_timestamp\t%at\tauthor_date_iso_8601\t%ai\tauthor_date_iso_8601_strict\t%aI\tcommitter_name\t%cn\tcommitter_name_mailmap\t%cN\tcommitter_email\t%ce\tcommitter_email_mailmap\t%cE\tcommitter_date\t%cd\tcommitter_date_RFC2822\t%cD\tcommitter_date_relative\t%cr\tcommitter_date_unix_timestamp\t%ct\tcommitter_date_iso_8601\t%ci\tcommitter_date_iso_8601_strict\t%cI\tref_names\t%d\tref_names_no_wrapping\t%D\tencoding\t%e\tsubject\t%s\tsubject_sanitized\t%f\tcommit_notes\t%N\tstats\t' |
iconv -f ISO-8859-1 -t UTF-8 | # convert ISO-8859-1 encoding to UTF-8
sed '/^[ \t]*$/d' | # remove all newlines/line-breaks, including those with empty spaces
tr '\n' 'ò' | # convert newlines/line-breaks to a character, so we can manipulate it without much trouble
Expand All @@ -29,3 +27,4 @@ cd $dir &&
paste -d ' ' - - | # collapse lines so that the `shortstat` is merged with the rest of the commit data, on a single line
awk '{print NR"\\t",$0}' | # print line number in front of each line, along with the `\t` delimiter
sed 's/\\t\ commits\\trepo/\\t\commits\\trepo/g' # get rid of the one space that shouldn't be there
) > ../../$tempOutputFile.part.$BASHPID