From e68ae2e43e59a30995e80887177ba815aee10199 Mon Sep 17 00:00:00 2001 From: Xiaozheng Zhang <63225111+zhang-x-z@users.noreply.github.com> Date: Mon, 4 Sep 2023 09:59:50 +0800 Subject: [PATCH] Fix alluxio dataload bug (#3418) Signed-off-by: ZhangXiaozheng --- .../alluxio/templates/configmap.yaml | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/charts/fluid-dataloader/alluxio/templates/configmap.yaml b/charts/fluid-dataloader/alluxio/templates/configmap.yaml index f96de7aa77e..3bda23b2164 100644 --- a/charts/fluid-dataloader/alluxio/templates/configmap.yaml +++ b/charts/fluid-dataloader/alluxio/templates/configmap.yaml @@ -52,13 +52,26 @@ data: fi } + function needPreLoadMetadata() { + local alluxioVersion=$(alluxio version) + test "$(echo "$alluxioVersion 2.8.0" | tr " " "\n" | sort -rV | head -n 1)" == "$alluxioVersion" + } + function distributedLoad() { local path=$1 local replica=$2 checkPathExistence "$path" alluxio fs setReplication --max $replica -R $path if [[ $needLoadMetadata == 'true' ]]; then - time alluxio fs distributedLoad -Dalluxio.user.file.metadata.sync.interval=0 --replication $replica $path + # For Alluxio above 2.8.0, distributedLoad with -Dalluxio.user.file.metadata.sync.interval=0 cannot load new added file. + # Related issue: https://github.com/Alluxio/alluxio/issues/17827 + # Use ls with -Dalluxio.user.file.metadata.sync.interval=0 instead + if needPreLoadMetadata; then + time alluxio fs ls -Dalluxio.user.file.metadata.sync.interval=0 -R $path + time alluxio fs distributedLoad --replication $replica $path + else + time alluxio fs distributedLoad -Dalluxio.user.file.metadata.sync.interval=0 --replication $replica $path + fi else time alluxio fs distributedLoad --replication $replica $path fi