Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Stage 2 script changes #31

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
434 changes: 308 additions & 126 deletions automatedDataExtraction/dataExtractorForExtractedFileSystem.sh

Large diffs are not rendered by default.

Binary file added automatedDataExtraction/jtool/jtool
Binary file not shown.
74 changes: 40 additions & 34 deletions automatedDataExtraction/scriptsToAutomate/entitlementExtractor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,39 +8,45 @@ fi
rootfs_path="$1/"
rootfs_path=${rootfs_path//\/\//\/}

#the find command also has a printf option and provides much of the same data as stat
IFS=$'\n'

echoerr() { echo "$@" 1>&2; }

while read line; do
filePath="$rootfs_path$line"

entitlements=`jtool/jtool.ELF64 -arch armv7 --ent $filePath 2>&1`
entitlements=`echo $entitlements | sed 's;^.*<dict>;;' | sed 's;</dict>.*$;;' | sed 's;<key>;\\\n<key>;g'`
#-z checks to see if the string is empty.
#no identifier should indicate that the executable had no signature

if [ ! -z "$entitlements" ]; then
#echo "process(filePath('$line'),identifier('$identifier'))."
#echo $line
entlines=`printf $entitlements`
for ent in $entlines;
do
keyCheck=`echo $ent | grep '<key>'`
if [ ! -z "$keyCheck" ]; then
entKey=`echo $ent | sed 's/^.*<key>\ *//' | sed 's;\ *</key>.*;;'`
#the following code seems to be a series of sed operations with the output of the previous line flowing into the sed operations on the next line.
entValRaw=`echo $ent | sed 's;.*</key>;;' | sed 's;[\ ]*;;g'`
entValWithStrings=`echo $entValRaw | sed 's;<string>;string(";g' | sed 's;</string>;");g' | sed 's;")string;"),string;g'`
entValWithInts=`echo $entValWithStrings | sed 's;<integer>;intValue(";g' | sed 's;</integer>;");g' | sed 's;")intValue;"),intValue;g'`
entValWithBrackets=`echo $entValWithInts | sed 's;<array>;[;g' | sed 's;</array>;];g'`
#the syntax giving me trouble is <array/> which represents and empty list and can be represented in prolog as value([])
entValWithEmptyBrackets=`echo $entValWithBrackets | sed 's;<array/>;[];g'`
entValProcessBools=`echo $entValWithEmptyBrackets | sed 's;<true/>;bool("true");g' | sed 's;<false/>;bool("false");g'`
entVal=`echo $entValProcessBools`
echo "processEntitlement(filePath(\"$line\"),entitlement(key(\"$entKey\"),value($entVal)))."
OS=$(uname)

#instead of IFS=$'\n', just read -r
while read -r line; do
filePath="$rootfs_path$line"

# use jtool.ELF64 for linux; use jtool for mac
if test "$OS" == "Linux"; then
entitlements=$(jtool/jtool.ELF64 -arch armv7 --ent "$filePath" 2>&1)
elif test "$OS" == "Darwin"; then
entitlements=$(jtool/jtool -arch armv7 --ent "$filePath" 2>&1)
fi
done
fi

entitlements=$(echo "$entitlements" | sed 's;^.*<dict>;;' | sed 's;</dict>.*$;;' | sed 's;<key>;\\\n<key>;g')
#-z checks to see if the string is empty.
#no identifier should indicate that the executable had no signature

if [ -n "$entitlements" ]; then
#echo "process(filePath('$line'),identifier('$identifier'))."
#echo $line
entlines=$(printf "%s\n" "$entitlements")

for ent in $entlines; do
keyCheck=$(echo "$ent" | grep '<key>')

if [ -n "$keyCheck" ]; then
entKey=$(echo "$ent" | sed 's/^.*<key>\ *//' | sed 's;\ *</key>.*;;')
#the following code seems to be a series of sed operations with the output of the previous line flowing into the sed operations on the next line.
entValRaw=$(echo "$ent" | sed 's;.*</key>;;' | sed 's;[\ ]*;;g')
entValWithStrings=$(echo "$entValRaw" | sed 's;<string>;string(";g' | sed 's;</string>;");g' | sed 's;")string;"),string;g')
entValWithInts=$(echo "$entValWithStrings" | sed 's;<integer>;intValue(";g' | sed 's;</integer>;");g' | sed 's;")intValue;"),intValue;g')
entValWithBrackets=$(echo "$entValWithInts" | sed 's;<array>;[;g' | sed 's;</array>;];g')
#the syntax giving me trouble is <array/> which represents and empty list and can be represented in prolog as value([])
entValWithEmptyBrackets=$(echo "$entValWithBrackets" | sed 's;<array/>;[];g')
entValProcessBools=$(echo "$entValWithEmptyBrackets" | sed 's;<true/>;bool("true");g' | sed 's;<false/>;bool("false");g')
entVal="$entValProcessBools"
echo "processEntitlement(filePath(\"$line\"),entitlement(key(\"$entKey\"),value($entVal)))."
fi

done
fi
done
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
#!/bin/bash

#use LC_CTYPE=C and LC_LANG=C (this does not work if LC_ALL is set)
#so it is better to use LC_ALL=C to solve sed problem
LC_ALL=C

if test $# -ne 1; then
echo "Usage: $0 /path/to/root/filesystem/" 1>&2
exit 1
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3
import sys

input_path = sys.argv[1]
Expand Down Expand Up @@ -27,7 +27,7 @@

members = columns[3].split(",")
for user_name in members:
print "groupMembership(user(\"" + user_name + "\"),group(\"" + group_name + "\"),groupIDNumber(\"" + group_id_number + "\"))."
print("groupMembership(user(\"" + user_name + "\"),group(\"" + group_name + "\"),groupIDNumber(\"" + group_id_number + "\")).")

for u_line in user_lines:
#ignore comments
Expand All @@ -42,8 +42,8 @@

#if id of user and group match then the group name is same as user name
if user_id_number == group_id_number:
print "groupMembership(user(\"" + user_name + "\"),group(\"" + user_name + "\"),groupIDNumber(\"" + group_id_number + "\"))."
print ("groupMembership(user(\"" + user_name + "\"),group(\"" + user_name + "\"),groupIDNumber(\"" + group_id_number + "\")).")
#otherwise we need to know what the group name is for a given gid number (e.g., user _ftp has gid for "nobody" by default instead of "_ftp")
else:
print "groupMembership(user(\"" + user_name + "\"),group(\"" + group_number_to_name_dict[group_id_number] + "\"),groupIDNumber(\"" + group_id_number + "\"))."
print ("groupMembership(user(\"" + user_name + "\"),group(\"" + group_number_to_name_dict[group_id_number] + "\"),groupIDNumber(\"" + group_id_number + "\")).")

4 changes: 3 additions & 1 deletion automatedDataExtraction/scriptsToAutomate/runProlog.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,6 @@
#so I'm just filtering out duplicates here with sort and uniq.
queryToRun=$1
temporaryFiles=$2
swipl --quiet -t "ignore($queryToRun),halt(1)" --consult-file $temporaryFiles/relevantFacts.pl | sort | uniq
#swipl --quiet -t "ignore($queryToRun),halt(1)" --consult-file $temporaryFiles/relevantFacts.pl | sort | uniq
#the command above does not work, remade it as it is below.
swipl -s $temporaryFiles/relevantFacts.pl -t "ignore($queryToRun), halt(1)" | sort | uniq
17 changes: 11 additions & 6 deletions automatedDataExtraction/scriptsToAutomate/sanitizeFilePaths.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
#!/usr/bin/python
#!/usr/bin/python3

import re
import sys

fdata = open(sys.argv[1],"r").read().strip()
facts = fdata.split("\n")
#open with rb so file is treated as binary and the content treated as bytes (utf8 failure)
fdata = open(sys.argv[1],"rb").read().strip()
facts = fdata.split(b"\n")

for line in facts:
#print line
#pattern = re.compile(',filepath\(\"(.*)\"\)\)\.')
pattern = re.compile('(^.*,filePath\(\")(.*)(\"\)\)\.$)')
pattern = b'(^.*,filePath\(\")(.*)(\"\)\)\.$)'
pattern = re.compile(pattern)
#pattern = re.compile('^.*,filepath\(.*')
match = pattern.match(line)
filePath = match.group(2)
#print filePath
sanitizedPath = filePath.replace('"','_DOUBLEQUOTEWASHERE_').replace('\\','_BACKSLASHWASHERE_')
sanitizedPath = filePath.replace(b'"',bytes('_DOUBLEQUOTEWASHERE_', 'utf-8')).replace(b'\\',bytes('_BACKSLASHWASHERE_', 'utf-8'))
#sanitizedPath = filePath.replace('"','_DOUBLEQOUTEWASHERE_').replace('\\', '_BACKSLASHWASHERE_')
#print sanitizedPath
sanitizedLine = match.group(1) + sanitizedPath + match.group(3)
print sanitizedLine
sanitizedLine = sanitizedLine.decode('utf-8')
#print sanitizedLine
print(sanitizedLine)
38 changes: 23 additions & 15 deletions automatedDataExtraction/scriptsToAutomate/signatureExtractor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,34 @@ fi
rootfs_path="$1/"
rootfs_path=${rootfs_path//\/\//\/}

OS="`uname`"

#the find command also has a printf option and provides much of the same data as stat
IFS=$'\n'

echoerr() { echo "$@" 1>&2; }

while read line; do
#expects path to root of iOS file system as an argument.
#todo add usage instructions as error output if argument is missing
filePath="$rootfs_path$line"
#echo $filePath

#this current version only outputs results for programs with com.apple as the start of their identifiers
#identifier=`codesign --display --verbose=4 $filePath 2>&1 | grep -o '^Identifier=com.apple.*' | sed 's/Identifier=//'`
identifier=`./jtool/jtool.ELF64 -arch armv7 --sig $filePath 2>&1 | grep -o '.*Identifier:[\ ]*com.apple.*' | sed 's/.*Identifier:[\ ]*//' | sed 's/\ .*//'`
#expects path to root of iOS file system as an argument.
#todo add usage instructions as error output if argument is missing
filePath="$rootfs_path$line"
# echo $filePath

#-z checks to see if the string is empty.
#no identifier should indicate that the executable had no signature
if [ ! -z "$identifier" ]; then
echo "processSignature(filePath(\"$line\"),identifier(\"$identifier\"))."
#echo $line
#else echo "process(filePath('$line'),identifier('no identifier detected'))."
fi
#this current version only outputs results for programs with com.apple as the start of their identifiers
#identifier=`codesign --display --verbose=4 $filePath 2>&1 | grep -o '^Identifier=com.apple.*' | sed 's/Identifier=//'`

# use jtool.ELF64 for linux; jtool for mac
if test $OS == "Linux"; then
identifier=`./jtool/jtool.ELF64 -arch armv7 --sig $filePath 2>&1 | grep -o '.*Identifier:[\ ]*com.apple.*' | sed 's/.*Identifier:[\ ]*//' | sed 's/\ .*//'`
elif test $OS == "Darwin"; then
identifier=`./jtool/jtool -arch armv7 --sig $filePath 2>&1 | grep -o '.*Identifier:[\ ]*com.apple.*' | sed 's/.*Identifier:[\ ]*//' | sed 's/\ .*//'`
fi

#-z checks to see if the string is empty.
#no identifier should indicate that the executable had no signature
if [ ! -z "$identifier" ]; then
echo "processSignature(filePath(\"$line\"),identifier(\"$identifier\"))."
#echo $line
#else echo "process(filePath('$line'),identifier('no identifier detected'))."
fi
done
52 changes: 24 additions & 28 deletions automatedDataExtraction/scriptsToAutomate/stringExtractor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,32 +8,28 @@ fi
rootfs_path="$1/"
rootfs_path=${rootfs_path//\/\//\/}

#the find command also has a printf option and provides much of the same data as stat
IFS=$'\n'

echoerr() { echo "$@" 1>&2; }

while read line; do
#./FileSystem is hardcoded and should be changed for other systems.
#this should really be a parameter passed in as a command line argument...
filePath="$rootfs_path$line"

#echo "about to process $line"

#I think this will only work if I set the minimum string length to a reasonably high number.
#otherwise, I get a bunch of junk...
#I wonder if IDA has a smarter way to remove strings that are not of interest.
#for now I am limiting the results to strings of 7 or more ascii characters.
#the strings must have at least three consecutive numbers or letters.
#the strings must not contain backslashes or double quotes
thisSetOfStrings=`strings -n 7 $filePath | grep '[a-zA-Z0-9][a-zA-Z0-9][a-zA-Z0-9]' | grep -v '"' | grep -v '\\\\'`

#echo "about to iterate through strings"
#I'm not sure why, but the while loop works, and the for loop causes the script to fail.
#It may have something to do with memory requirements and how for loops work in bash.
#for stringEntry in $thisSetOfStrings; do
strings -n 7 $filePath | grep '[a-zA-Z0-9][a-zA-Z0-9][a-zA-Z0-9]' | grep -v '"' | grep -v '\\\\' | while read stringEntry; do
#echo "about to output a prolog fact"
echo "processString(filePath(\"$line\"),stringFromProgram(\"$stringEntry\"))."
done
#instead of IFS=$'\n', just read -r
while read -r line; do
#./FileSystem is hardcoded and should be changed for other systems.
#this should really be a parameter passed in as a command line argument...
filePath="$rootfs_path$line"

#echo "about to process $line"

#I think this will only work if I set the minimum string length to a reasonably high number.
#otherwise, I get a bunch of junk...
#I wonder if IDA has a smarter way to remove strings that are not of interest.
#for now I am limiting the results to strings of 7 or more ascii characters.
#the strings must have at least three consecutive numbers or letters.
#the strings must not contain backslashes or double quotes
#thisSetOfStrings=$(strings -n 7 $filePath | grep '[a-zA-Z0-9][a-zA-Z0-9][a-zA-Z0-9]' | grep -v '"' | grep -v '\\\\')

#echo "about to iterate through strings"
#I'm not sure why, but the while loop works, and the for loop causes the script to fail.
#It may have something to do with memory requirements and how for loops work in bash.
#for stringEntry in $thisSetOfStrings; do
strings -n 7 "$filePath" | grep '[a-zA-Z0-9][a-zA-Z0-9][a-zA-Z0-9]' | grep -v '"' | grep -v '\\\\' | while read -r stringEntry; do
#echo "about to output a prolog fact"
echo "processString(filePath(\"$line\"),stringFromProgram(\"$stringEntry\"))."
done
done
30 changes: 18 additions & 12 deletions automatedDataExtraction/scriptsToAutomate/symbolExtractor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,28 @@
# this will allow support for linux.

if test $# -ne 1; then
echo "Usage: $0 /path/to/root/filesystem/" 1>&2
exit 1
echo "Usage: $0 /path/to/root/filesystem/" 1>&2
exit 1
fi

rootfs_path="$1/"
rootfs_path=${rootfs_path//\/\//\/}

#the find command also has a printf option and provides much of the same data as stat
IFS=$'\n'
OS=$(uname)

echoerr() { echo "$@" 1>&2; }

while read line; do
filePath="$rootfs_path$line"
#echo $filePath
for symbol in $(./jtool/jtool.ELF64 -arch armv7 -S "$filePath" | sed 's/.*\ //g'); do
echo "processSymbol(filePath(\"$line\"),symbol(\"$symbol\"))."
done
#instead of IFS=$'\n', just read -r
while read -r line; do
filePath="$rootfs_path$line"
#echo $filePath

# use jtool.ELF64 for linux; jtool for mac
if test "$OS" == "Linux"; then
for symbol in $(./jtool/jtool.ELF64 -arch armv7 -S "$filePath" | sed 's/.*\ //g'); do
echo "processSymbol(filePath(\"$line\"),symbol(\"$symbol\"))."
done
elif test "$OS" == "Darwin"; then
for symbol in $(./jtool/jtool -arch armv7 -S "$filePath" | sed 's/.*\ //g'); do
echo "processSymbol(filePath(\"$line\"),symbol(\"$symbol\"))."
done
fi
done