malus-security · Corekore · Apr 7, 2023 · Apr 9, 2023 · Apr 10, 2023 · Apr 28, 2023
diff --git a/automatedDataExtraction/dataExtractorForExtractedFileSystem.sh b/automatedDataExtraction/dataExtractorForExtractedFileSystem.sh
diff --git a/automatedDataExtraction/jtool/jtool b/automatedDataExtraction/jtool/jtool
diff --git a/automatedDataExtraction/scriptsToAutomate/entitlementExtractor.sh b/automatedDataExtraction/scriptsToAutomate/entitlementExtractor.sh
@@ -8,39 +8,45 @@ fi
 rootfs_path="$1/"
 rootfs_path=${rootfs_path//\/\//\/}
 
-#the find command also has a printf option and provides much of the same data as stat
-IFS=$'\n'
-
-echoerr() { echo "$@" 1>&2; }
-
-while read line; do
-    filePath="$rootfs_path$line"
-
-    entitlements=`jtool/jtool.ELF64 -arch armv7 --ent $filePath 2>&1`
-    entitlements=`echo $entitlements | sed 's;^.*<dict>;;' | sed 's;</dict>.*$;;' | sed 's;<key>;\\\n<key>;g'`
-    #-z checks to see if the string is empty.
-    #no identifier should indicate that the executable had no signature
-
-    if [ ! -z "$entitlements" ]; then
-      #echo "process(filePath('$line'),identifier('$identifier'))."
-      #echo $line
-      entlines=`printf $entitlements`
-      for ent in $entlines;
-      do
-	keyCheck=`echo $ent | grep '<key>'`
-	if [ ! -z "$keyCheck" ]; then
-	  entKey=`echo $ent | sed 's/^.*<key>\ *//' | sed 's;\ *</key>.*;;'`
-	  #the following code seems to be a series of sed operations with the output of the previous line flowing into the sed operations on the next line.
-	  entValRaw=`echo $ent | sed 's;.*</key>;;' | sed 's;[\ 	]*;;g'`
-	  entValWithStrings=`echo $entValRaw | sed 's;<string>;string(";g' | sed 's;</string>;");g' |  sed 's;")string;"),string;g'`
-	  entValWithInts=`echo $entValWithStrings | sed 's;<integer>;intValue(";g' | sed 's;</integer>;");g' |  sed 's;")intValue;"),intValue;g'`
-	  entValWithBrackets=`echo $entValWithInts | sed 's;<array>;[;g' | sed 's;</array>;];g'`
-	  #the syntax giving me trouble is <array/> which represents and empty list and can be represented in prolog as value([])
-	  entValWithEmptyBrackets=`echo $entValWithBrackets | sed 's;<array/>;[];g'`
-	  entValProcessBools=`echo $entValWithEmptyBrackets | sed 's;<true/>;bool("true");g' | sed 's;<false/>;bool("false");g'` 
-	  entVal=`echo $entValProcessBools`
-	  echo "processEntitlement(filePath(\"$line\"),entitlement(key(\"$entKey\"),value($entVal)))."
+OS=$(uname)
+
+#instead of IFS=$'\n', just read -r
+while read -r line; do
+  filePath="$rootfs_path$line"
+
+	# use jtool.ELF64 for linux; use jtool for mac
+	if test "$OS" == "Linux"; then
+		entitlements=$(jtool/jtool.ELF64 -arch armv7 --ent "$filePath" 2>&1)
+	elif test "$OS" == "Darwin"; then 
+		entitlements=$(jtool/jtool -arch armv7 --ent "$filePath" 2>&1)
 	fi
-      done
-    fi
+
+	entitlements=$(echo "$entitlements" | sed 's;^.*<dict>;;' | sed 's;</dict>.*$;;' | sed 's;<key>;\\\n<key>;g')
+	#-z checks to see if the string is empty.
+	#no identifier should indicate that the executable had no signature
+
+	if [ -n "$entitlements" ]; then
+	#echo "process(filePath('$line'),identifier('$identifier'))."
+	#echo $line
+	entlines=$(printf "%s\n" "$entitlements")
+
+		for ent in $entlines; do
+			keyCheck=$(echo "$ent" | grep '<key>')
+
+			if [ -n "$keyCheck" ]; then
+				entKey=$(echo "$ent" | sed 's/^.*<key>\ *//' | sed 's;\ *</key>.*;;')
+				#the following code seems to be a series of sed operations with the output of the previous line flowing into the sed operations on the next line.
+				entValRaw=$(echo "$ent" | sed 's;.*</key>;;' | sed 's;[\ 	]*;;g')
+				entValWithStrings=$(echo "$entValRaw" | sed 's;<string>;string(";g' | sed 's;</string>;");g' |  sed 's;")string;"),string;g')
+				entValWithInts=$(echo "$entValWithStrings" | sed 's;<integer>;intValue(";g' | sed 's;</integer>;");g' |  sed 's;")intValue;"),intValue;g')
+				entValWithBrackets=$(echo "$entValWithInts" | sed 's;<array>;[;g' | sed 's;</array>;];g')
+				#the syntax giving me trouble is <array/> which represents and empty list and can be represented in prolog as value([])
+				entValWithEmptyBrackets=$(echo "$entValWithBrackets" | sed 's;<array/>;[];g')
+				entValProcessBools=$(echo "$entValWithEmptyBrackets" | sed 's;<true/>;bool("true");g' | sed 's;<false/>;bool("false");g')
+				entVal="$entValProcessBools"
+				echo "processEntitlement(filePath(\"$line\"),entitlement(key(\"$entKey\"),value($entVal)))."
+			fi
+
+		done
+  fi
 done
diff --git a/automatedDataExtraction/scriptsToAutomate/fileTypeExtractor.sh b/automatedDataExtraction/scriptsToAutomate/fileTypeExtractor.sh
@@ -1,5 +1,9 @@
 #!/bin/bash
 
+#use LC_CTYPE=C and LC_LANG=C (this does not work if LC_ALL is set)
+#so it is better to use LC_ALL=C to solve sed problem
+LC_ALL=C
+
 if test $# -ne 1; then
 	echo "Usage: $0 /path/to/root/filesystem/" 1>&2
 	exit 1

diff --git a/automatedDataExtraction/scriptsToAutomate/firmware_group_extractor.py b/automatedDataExtraction/scriptsToAutomate/firmware_group_extractor.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 import sys
 
 input_path = sys.argv[1]
@@ -27,7 +27,7 @@
 
   members = columns[3].split(",")
   for user_name in members:
-    print "groupMembership(user(\"" + user_name + "\"),group(\"" + group_name +  "\"),groupIDNumber(\"" + group_id_number + "\"))."
+    print("groupMembership(user(\"" + user_name + "\"),group(\"" + group_name +  "\"),groupIDNumber(\"" + group_id_number + "\")).")
 
 for u_line in user_lines:
   #ignore comments
@@ -42,8 +42,8 @@
 
   #if id of user and group match then the group name is same as user name
   if user_id_number == group_id_number:	
-    print "groupMembership(user(\"" + user_name + "\"),group(\"" + user_name +  "\"),groupIDNumber(\"" + group_id_number + "\"))."
+    print ("groupMembership(user(\"" + user_name + "\"),group(\"" + user_name +  "\"),groupIDNumber(\"" + group_id_number + "\")).")
   #otherwise we need to know what the group name is for a given gid number (e.g., user _ftp has gid for "nobody" by default instead of "_ftp")
   else:
-    print "groupMembership(user(\"" + user_name + "\"),group(\"" + group_number_to_name_dict[group_id_number] +  "\"),groupIDNumber(\"" + group_id_number + "\"))."
+    print ("groupMembership(user(\"" + user_name + "\"),group(\"" + group_number_to_name_dict[group_id_number] +  "\"),groupIDNumber(\"" + group_id_number + "\")).")
 
diff --git a/automatedDataExtraction/scriptsToAutomate/runProlog.sh b/automatedDataExtraction/scriptsToAutomate/runProlog.sh
@@ -9,4 +9,6 @@
 #so I'm just filtering out duplicates here with sort and uniq.
 queryToRun=$1
 temporaryFiles=$2
-swipl --quiet -t "ignore($queryToRun),halt(1)" --consult-file $temporaryFiles/relevantFacts.pl | sort | uniq
+#swipl --quiet -t "ignore($queryToRun),halt(1)" --consult-file $temporaryFiles/relevantFacts.pl | sort | uniq
+#the command above does not work, remade it as it is below.
+swipl -s $temporaryFiles/relevantFacts.pl -t "ignore($queryToRun), halt(1)" | sort | uniq
diff --git a/automatedDataExtraction/scriptsToAutomate/sanitizeFilePaths.py b/automatedDataExtraction/scriptsToAutomate/sanitizeFilePaths.py
@@ -1,20 +1,25 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 
 import re
 import sys
 
-fdata = open(sys.argv[1],"r").read().strip()
-facts = fdata.split("\n")
+#open with rb so file is treated as binary and the content treated as bytes (utf8 failure)
+fdata = open(sys.argv[1],"rb").read().strip()
+facts = fdata.split(b"\n")
 
 for line in facts:
   #print line
   #pattern = re.compile(',filepath\(\"(.*)\"\)\)\.')
-  pattern = re.compile('(^.*,filePath\(\")(.*)(\"\)\)\.$)')
+  pattern = b'(^.*,filePath\(\")(.*)(\"\)\)\.$)'
+  pattern = re.compile(pattern)
   #pattern = re.compile('^.*,filepath\(.*')
   match = pattern.match(line)
   filePath = match.group(2)
   #print filePath
-  sanitizedPath = filePath.replace('"','_DOUBLEQUOTEWASHERE_').replace('\\','_BACKSLASHWASHERE_')
+  sanitizedPath = filePath.replace(b'"',bytes('_DOUBLEQUOTEWASHERE_', 'utf-8')).replace(b'\\',bytes('_BACKSLASHWASHERE_', 'utf-8'))
+  #sanitizedPath = filePath.replace('"','_DOUBLEQOUTEWASHERE_').replace('\\', '_BACKSLASHWASHERE_')
   #print sanitizedPath
   sanitizedLine = match.group(1) + sanitizedPath + match.group(3)
-  print sanitizedLine
+  sanitizedLine = sanitizedLine.decode('utf-8')
+  #print sanitizedLine
+  print(sanitizedLine)
diff --git a/automatedDataExtraction/scriptsToAutomate/signatureExtractor.sh b/automatedDataExtraction/scriptsToAutomate/signatureExtractor.sh
@@ -8,26 +8,34 @@ fi
 rootfs_path="$1/"
 rootfs_path=${rootfs_path//\/\//\/}
 
+OS="`uname`"
+
 #the find command also has a printf option and provides much of the same data as stat
 IFS=$'\n'
 
 echoerr() { echo "$@" 1>&2; }
 
 while read line; do
-    #expects path to root of iOS file system as an argument.
-    #todo add usage instructions as error output if argument is missing
-    filePath="$rootfs_path$line"
-    #echo $filePath
-
-    #this current version only outputs results for programs with com.apple as the start of their identifiers
-    #identifier=`codesign --display --verbose=4 $filePath 2>&1 | grep -o '^Identifier=com.apple.*' | sed 's/Identifier=//'`
-    identifier=`./jtool/jtool.ELF64 -arch armv7 --sig $filePath 2>&1 | grep -o '.*Identifier:[\ ]*com.apple.*' | sed 's/.*Identifier:[\ ]*//' | sed 's/\ .*//'`
+	#expects path to root of iOS file system as an argument.
+	#todo add usage instructions as error output if argument is missing
+	filePath="$rootfs_path$line"
+	# echo $filePath
 
-    #-z checks to see if the string is empty.
-    #no identifier should indicate that the executable had no signature
-    if [ ! -z "$identifier" ]; then
-      echo "processSignature(filePath(\"$line\"),identifier(\"$identifier\"))."
-      #echo $line
-    #else echo "process(filePath('$line'),identifier('no identifier detected'))."
-    fi
+	#this current version only outputs results for programs with com.apple as the start of their identifiers
+	#identifier=`codesign --display --verbose=4 $filePath 2>&1 | grep -o '^Identifier=com.apple.*' | sed 's/Identifier=//'`
+
+	# use jtool.ELF64 for linux; jtool for mac
+  	if test $OS == "Linux"; then
+    	identifier=`./jtool/jtool.ELF64 -arch armv7 --sig $filePath 2>&1 | grep -o '.*Identifier:[\ ]*com.apple.*' | sed 's/.*Identifier:[\ ]*//' | sed 's/\ .*//'`
+  	elif test $OS == "Darwin"; then
+    	identifier=`./jtool/jtool -arch armv7 --sig $filePath 2>&1 | grep -o '.*Identifier:[\ ]*com.apple.*' | sed 's/.*Identifier:[\ ]*//' | sed 's/\ .*//'`
+  	fi
+
+  	#-z checks to see if the string is empty.
+  	#no identifier should indicate that the executable had no signature
+  	if [ ! -z "$identifier" ]; then
+    	echo "processSignature(filePath(\"$line\"),identifier(\"$identifier\"))."
+    	#echo $line
+  	#else echo "process(filePath('$line'),identifier('no identifier detected'))."
+  	fi
 done
diff --git a/automatedDataExtraction/scriptsToAutomate/stringExtractor.sh b/automatedDataExtraction/scriptsToAutomate/stringExtractor.sh
@@ -8,32 +8,28 @@ fi
 rootfs_path="$1/"
 rootfs_path=${rootfs_path//\/\//\/}
 
-#the find command also has a printf option and provides much of the same data as stat
-IFS=$'\n'
-
-echoerr() { echo "$@" 1>&2; }
-
-while read line; do
-    #./FileSystem is hardcoded and should be changed for other systems.
-    #this should really be a parameter passed in as a command line argument...
-    filePath="$rootfs_path$line"
-
-    #echo "about to process $line"
-
-    #I think this will only work if I set the minimum string length to a reasonably high number.
-    #otherwise, I get a bunch of junk...
-    #I wonder if IDA has a smarter way to remove strings that are not of interest.
-    #for now I am limiting the results to strings of 7 or more ascii characters.
-    #the strings must have at least three consecutive numbers or letters.
-    #the strings must not contain backslashes or double quotes
-    thisSetOfStrings=`strings -n 7 $filePath | grep '[a-zA-Z0-9][a-zA-Z0-9][a-zA-Z0-9]' | grep -v '"' | grep -v '\\\\'`
-
-    #echo "about to iterate through strings"
-    #I'm not sure why, but the while loop works, and the for loop causes the script to fail.
-    #It may have something to do with memory requirements and how for loops work in bash.
-    #for stringEntry in $thisSetOfStrings; do
-    strings -n 7 $filePath | grep '[a-zA-Z0-9][a-zA-Z0-9][a-zA-Z0-9]' | grep -v '"' | grep -v '\\\\' | while read stringEntry; do
-        #echo "about to output a prolog fact"
-        echo "processString(filePath(\"$line\"),stringFromProgram(\"$stringEntry\"))."
-    done
+#instead of IFS=$'\n', just read -r
+while read -r line; do
+  #./FileSystem is hardcoded and should be changed for other systems.
+  #this should really be a parameter passed in as a command line argument...
+  filePath="$rootfs_path$line"
+
+  #echo "about to process $line"
+
+  #I think this will only work if I set the minimum string length to a reasonably high number.
+  #otherwise, I get a bunch of junk...
+  #I wonder if IDA has a smarter way to remove strings that are not of interest.
+  #for now I am limiting the results to strings of 7 or more ascii characters.
+  #the strings must have at least three consecutive numbers or letters.
+  #the strings must not contain backslashes or double quotes
+  #thisSetOfStrings=$(strings -n 7 $filePath | grep '[a-zA-Z0-9][a-zA-Z0-9][a-zA-Z0-9]' | grep -v '"' | grep -v '\\\\')
+
+  #echo "about to iterate through strings"
+  #I'm not sure why, but the while loop works, and the for loop causes the script to fail.
+  #It may have something to do with memory requirements and how for loops work in bash.
+  #for stringEntry in $thisSetOfStrings; do
+  strings -n 7 "$filePath" | grep '[a-zA-Z0-9][a-zA-Z0-9][a-zA-Z0-9]' | grep -v '"' | grep -v '\\\\' | while read -r stringEntry; do
+    #echo "about to output a prolog fact"
+    echo "processString(filePath(\"$line\"),stringFromProgram(\"$stringEntry\"))."
+  done
 done
diff --git a/automatedDataExtraction/scriptsToAutomate/symbolExtractor.sh b/automatedDataExtraction/scriptsToAutomate/symbolExtractor.sh
@@ -4,22 +4,28 @@
 # this will allow support for linux.
 
 if test $# -ne 1; then
-    echo "Usage: $0 /path/to/root/filesystem/" 1>&2
-    exit 1
+  echo "Usage: $0 /path/to/root/filesystem/" 1>&2
+  exit 1
 fi
 
 rootfs_path="$1/"
 rootfs_path=${rootfs_path//\/\//\/}
 
-#the find command also has a printf option and provides much of the same data as stat
-IFS=$'\n'
+OS=$(uname)
 
-echoerr() { echo "$@" 1>&2; }
-
-while read line; do
-    filePath="$rootfs_path$line"
-    #echo $filePath
-    for symbol in $(./jtool/jtool.ELF64 -arch armv7 -S "$filePath" | sed 's/.*\ //g'); do
-        echo "processSymbol(filePath(\"$line\"),symbol(\"$symbol\"))."
-    done
+#instead of IFS=$'\n', just read -r
+while read -r line; do
+	filePath="$rootfs_path$line"
+	#echo $filePath
+
+	# use jtool.ELF64 for linux; jtool for mac
+	if test "$OS" == "Linux"; then
+		for symbol in $(./jtool/jtool.ELF64 -arch armv7 -S "$filePath" | sed 's/.*\ //g'); do
+			echo "processSymbol(filePath(\"$line\"),symbol(\"$symbol\"))."
+		done
+	elif test "$OS" == "Darwin"; then
+		for symbol in $(./jtool/jtool -arch armv7 -S "$filePath" | sed 's/.*\ //g'); do
+			echo "processSymbol(filePath(\"$line\"),symbol(\"$symbol\"))."
+		done
+	fi
 done