Skip to content

Commit e53b32e

Browse files
vivekrnvyxieca
authored andcommitted
[generate_dump] [Mellanox] Fix the duplicate dfw dump collection problem by adding symlinks (sonic-net#2536)
- What I did Currently the dfw dumps which are usually saved under /var/log/mellanox/sdk-dumps are collect twice in the techsupport. Once under log/ and once under sai_sdk_dump/ folder. Fixed the scenario by creating a symbolic link from sai_sdk_dump/sai-dfw-xxxxxxxxx.tar.gz -> ../log/sai-dfw-xxxxxxxxx.tar.gz - How I did it dfw dumps are copied from syncd currently, but the logic is updated to collect files from the host if SAI_DUMP_STORE_PATH is mounted on the host Fixed the duplicate dfw dump collection problems by adding a relative symbolic link from sai-sdk-dump/ -> log/ folder. fw dump me collection is moved to a new function collect_mellanox_dfw_dumps which in run at the end i.e. after the files under /var/log are saved - How to verify it root@switch:/home/admin# show techsupport --verbose root@switch:/home/admin/sonic_dump_r-lionfish-13_20221202_081958/log# ls -Al | grep dfw -rw-r--r-- 1 root root 1841061 Dec 2 08:21 sai-dfw-1669685690.tar.gz root@switch:/home/admin/sonic_dump_r-lionfish-13_20221202_081958/sai_sdk_dump# ls -Al Signed-off-by: Vivek Reddy Karri <[email protected]>
1 parent 0391221 commit e53b32e

File tree

1 file changed

+94
-10
lines changed

1 file changed

+94
-10
lines changed

scripts/generate_dump

+94-10
Original file line numberDiff line numberDiff line change
@@ -939,6 +939,49 @@ enable_logrotate() {
939939
sed -i '/\/usr\/sbin\/logrotate/s/^#*//g' /etc/cron.d/logrotate
940940
}
941941

942+
###############################################################################
943+
# Create a relative symbolic link of an existing file
944+
# Globals:
945+
# BASE
946+
# MKDIR
947+
# TAR
948+
# TARFILE
949+
# DUMPDIR
950+
# V
951+
# RM
952+
# NOOP
953+
# Arguments:
954+
# filename: the full path of the file
955+
# dest_dir: destination dir where the link is created
956+
# src_sir: directory under $TARDIR where the actual file exists
957+
# Returns:
958+
# None
959+
###############################################################################
960+
save_symlink() {
961+
trap 'handle_error $? $LINENO' ERR
962+
local start_t=$(date +%s%3N)
963+
local end_t=0
964+
local filename=$1
965+
local dest_dir=$2
966+
local src_dir=$3
967+
local do_tar_append=${4:-true}
968+
local file_basename=$(basename $filename)
969+
local tar_path="$BASE/$dest_dir/$file_basename"
970+
971+
$MKDIR $V -p "$TARDIR/$dest_dir"
972+
973+
${CMD_PREFIX}pushd $TARDIR/$dest_dir
974+
${CMD_PREFIX}ln -s ../$src_dir/$file_basename $file_basename
975+
${CMD_PREFIX}popd
976+
977+
if $do_tar_append; then
978+
($TAR $V -rf $TARFILE -C $DUMPDIR "$tar_path" \
979+
|| abort "${EXT_PROCFS_SAVE_FAILED}" "tar append operation failed. Aborting to prevent data loss.") \
980+
&& $RM $V -f "$DUMPDIR/$tar_path"
981+
fi
982+
end_t=$(date +%s%3N)
983+
echo "[ save_symlink:$filename] : $(($end_t-$start_t)) msec" >> $TECHSUPPORT_TIME_INFO
984+
}
942985

943986
###############################################################################
944987
# Collect Mellanox specific information
@@ -971,16 +1014,6 @@ collect_mellanox() {
9711014
${CMD_PREFIX}rm -rf $sai_dump_folder
9721015
${CMD_PREFIX}docker exec syncd rm -rf $sai_dump_folder
9731016

974-
# Save SDK error dumps
975-
local sdk_dump_path=`${CMD_PREFIX}docker exec syncd cat /tmp/sai.profile|grep "SAI_DUMP_STORE_PATH"|cut -d = -f2`
976-
if [[ -d $sdk_dump_path ]]; then
977-
copy_from_docker syncd $sdk_dump_path /tmp/sdk-dumps
978-
for file in $(find /tmp/sdk-dumps -type f); do
979-
save_file ${file} sai_sdk_dump false
980-
done
981-
rm -rf /tmp/sdk-dumps
982-
fi
983-
9841017
# run 'hw-management-generate-dump.sh' script and save the result file
9851018
HW_DUMP_FILE=/usr/bin/hw-management-generate-dump.sh
9861019
if [ -f "$HW_DUMP_FILE" ]; then
@@ -1002,6 +1035,53 @@ collect_mellanox() {
10021035

10031036
}
10041037

1038+
###############################################################################
1039+
# Collect dfw dumps if any. Applies to only MLNX platform
1040+
# Globals:
1041+
# CMD_PREFIX
1042+
# Arguments:
1043+
# None
1044+
# Returns:
1045+
# None
1046+
###############################################################################
1047+
collect_mellanox_dfw_dumps() {
1048+
trap 'handle_error $? $LINENO' ERR
1049+
local platform=$(python3 -c "from sonic_py_common import device_info; print(device_info.get_platform())")
1050+
local hwsku=$(python3 -c "from sonic_py_common import device_info; print(device_info.get_hwsku())")
1051+
local sdk_dump_path=`cat /usr/share/sonic/device/${platform}/${hwsku}/sai.profile|grep "SAI_DUMP_STORE_PATH"|cut -d = -f2`
1052+
1053+
if [[ ! -d $sdk_dump_path ]]; then
1054+
# This would mean the SAI_DUMP_STORE_PATH is not mounted on the host and is only accessible though the container
1055+
# This is a bad design and not recommended But there is nothing which restricts against it and thus the special handling
1056+
if [[ "$( docker container inspect -f '{{.State.Running}}' syncd )" == "true" ]]; then
1057+
$RM $V -rf /tmp/dfw-sdk-dumps
1058+
$MKDIR $V -p /tmp/dfw-sdk-dumps
1059+
copy_from_docker syncd $sdk_dump_path /tmp/dfw-sdk-dumps
1060+
else
1061+
echo "ERROR: dfw dumps cannot be collected"
1062+
fi
1063+
sdk_dump_path="/tmp/dfw-sdk-dumps"
1064+
fi
1065+
1066+
for file in $(find_files "$sdk_dump_path"); do
1067+
if $TAR -tf $TARFILE | grep $BASE/log/$(basename $file); then
1068+
# If this path sits under "/var/log/" dir, the files
1069+
# would've already been collected and thus just add a sym link
1070+
if [ ! -z "${file##*.gz}" ]; then
1071+
# files saved under log/ are zipped with gz
1072+
file=$file.gz
1073+
fi
1074+
${CMD_PREFIX}save_symlink ${file} sai_sdk_dump log
1075+
else
1076+
if [ ! -z "${file##*.gz}" ]; then
1077+
${CMD_PREFIX}save_file ${file} sai_sdk_dump true
1078+
else
1079+
${CMD_PREFIX}save_file ${file} sai_sdk_dump false
1080+
fi
1081+
fi
1082+
done
1083+
}
1084+
10051085
###############################################################################
10061086
# Collect Broadcom specific information
10071087
# Globals:
@@ -1503,6 +1583,10 @@ main() {
15031583
save_crash_files
15041584
save_warmboot_files
15051585

1586+
if [[ "$asic" = "mellanox" ]]; then
1587+
collect_mellanox_dfw_dumps
1588+
fi
1589+
15061590
finalize
15071591
}
15081592

0 commit comments

Comments
 (0)