diff --git a/hadoop-ozone/dist/src/main/compose/ozone-balancer/.env b/hadoop-ozone/dist/src/main/compose/ozone-balancer/.env new file mode 100644 index 000000000000..0e99fab82fd0 --- /dev/null +++ b/hadoop-ozone/dist/src/main/compose/ozone-balancer/.env @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +HDDS_VERSION=${hdds.version} +OZONE_RUNNER_VERSION=${docker.ozone-runner.version} +OZONE_RUNNER_IMAGE=apache/ozone-runner +OZONE_OPTS= \ No newline at end of file diff --git a/hadoop-ozone/dist/src/main/compose/ozone-balancer/docker-compose.yaml b/hadoop-ozone/dist/src/main/compose/ozone-balancer/docker-compose.yaml new file mode 100644 index 000000000000..dc6bae7822e5 --- /dev/null +++ b/hadoop-ozone/dist/src/main/compose/ozone-balancer/docker-compose.yaml @@ -0,0 +1,179 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +version: "3.8" + +# reusable fragments (see https://docs.docker.com/compose/compose-file/#extension-fields) +x-common-config: + &common-config + image: ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION} + volumes: + - ../..:/opt/hadoop + env_file: + - docker-config + +x-replication: + &replication + OZONE-SITE.XML_ozone.server.default.replication: ${OZONE_REPLICATION_FACTOR:-3} + +services: + datanode1: + <<: *common-config + ports: + - 19864 + - 9882 + environment: + <<: *replication + command: ["ozone","datanode"] + volumes: + - tmpfs1:/data + - ../..:/opt/hadoop + datanode2: + <<: *common-config + ports: + - 19864 + - 9882 + environment: + <<: *replication + command: [ "ozone","datanode" ] + volumes: + - tmpfs2:/data + - ../..:/opt/hadoop + datanode3: + <<: *common-config + ports: + - 19864 + - 9882 + environment: + <<: *replication + command: [ "ozone","datanode" ] + volumes: + - tmpfs3:/data + - ../..:/opt/hadoop + datanode4: + <<: *common-config + ports: + - 19864 + - 9882 + environment: + <<: *replication + command: [ "ozone","datanode" ] + volumes: + - tmpfs4:/data + - ../..:/opt/hadoop + om1: + <<: *common-config + environment: + WAITFOR: scm3:9894 + ENSURE_OM_INITIALIZED: /data/metadata/om/current/VERSION + <<: *replication + ports: + - 9874:9874 + - 9862 + hostname: om1 + command: ["ozone","om"] + om2: + <<: *common-config + environment: + WAITFOR: scm3:9894 + ENSURE_OM_INITIALIZED: /data/metadata/om/current/VERSION + <<: *replication + ports: + - 9874 + - 9862 + hostname: om2 + command: ["ozone","om"] + om3: + <<: *common-config + environment: + WAITFOR: scm3:9894 + ENSURE_OM_INITIALIZED: /data/metadata/om/current/VERSION + <<: *replication + ports: + - 9874 + - 9862 + hostname: om3 + command: ["ozone","om"] + scm1: + <<: *common-config + ports: + - 9876:9876 + environment: + ENSURE_SCM_INITIALIZED: /data/metadata/scm/current/VERSION + OZONE-SITE.XML_hdds.scm.safemode.min.datanode: ${OZONE_SAFEMODE_MIN_DATANODES:-1} + <<: *replication + command: ["ozone","scm"] + scm2: + <<: *common-config + ports: + - 9876 + environment: + WAITFOR: scm1:9894 + ENSURE_SCM_BOOTSTRAPPED: /data/metadata/scm/current/VERSION + OZONE-SITE.XML_hdds.scm.safemode.min.datanode: ${OZONE_SAFEMODE_MIN_DATANODES:-1} + <<: *replication + command: ["ozone","scm"] + scm3: + <<: *common-config + ports: + - 9876 + environment: + WAITFOR: scm2:9894 + ENSURE_SCM_BOOTSTRAPPED: /data/metadata/scm/current/VERSION + OZONE-SITE.XML_hdds.scm.safemode.min.datanode: ${OZONE_SAFEMODE_MIN_DATANODES:-1} + <<: *replication + command: ["ozone","scm"] + httpfs: + <<: *common-config + environment: + OZONE-SITE.XML_hdds.scm.safemode.min.datanode: ${OZONE_SAFEMODE_MIN_DATANODES:-1} + <<: *replication + ports: + - 14000:14000 + command: [ "ozone","httpfs" ] + s3g: + <<: *common-config + environment: + OZONE_OPTS: + <<: *replication + ports: + - 9878:9878 + command: ["ozone","s3g"] +volumes: + tmpfs1: + driver: local + driver_opts: + o: "size=1g,uid=1000" + device: tmpfs + type: tmpfs + tmpfs2: + driver: local + driver_opts: + o: "size=1g,uid=2000" + device: tmpfs + type: tmpfs + tmpfs3: + driver: local + driver_opts: + o: "size=1g,uid=3000" + device: tmpfs + type: tmpfs + tmpfs4: + driver: local + driver_opts: + o: "size=1g,uid=4000" + device: tmpfs + type: tmpfs diff --git a/hadoop-ozone/dist/src/main/compose/ozone-balancer/docker-config b/hadoop-ozone/dist/src/main/compose/ozone-balancer/docker-config new file mode 100644 index 000000000000..60e8afe6e1f6 --- /dev/null +++ b/hadoop-ozone/dist/src/main/compose/ozone-balancer/docker-config @@ -0,0 +1,62 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# For HttpFS service it is required to enable proxying users. +CORE-SITE.XML_hadoop.proxyuser.hadoop.hosts=* +CORE-SITE.XML_hadoop.proxyuser.hadoop.groups=* + +CORE-SITE.XML_fs.defaultFS=ofs://om/ +CORE-SITE.XML_fs.trash.interval=1 + +OZONE-SITE.XML_ozone.om.service.ids=om +OZONE-SITE.XML_ozone.om.nodes.om=om1,om2,om3 +OZONE-SITE.XML_ozone.om.address.om.om1=om1 +OZONE-SITE.XML_ozone.om.address.om.om2=om2 +OZONE-SITE.XML_ozone.om.address.om.om3=om3 +OZONE-SITE.XML_ozone.om.ratis.enable=true + +OZONE-SITE.XML_ozone.scm.service.ids=scmservice +OZONE-SITE.XML_ozone.scm.nodes.scmservice=scm1,scm2,scm3 +OZONE-SITE.XML_ozone.scm.address.scmservice.scm1=scm1 +OZONE-SITE.XML_ozone.scm.address.scmservice.scm2=scm2 +OZONE-SITE.XML_ozone.scm.address.scmservice.scm3=scm3 +OZONE-SITE.XML_ozone.scm.ratis.enable=true +OZONE-SITE.XML_ozone.scm.datanode.id.dir=/data +OZONE-SITE.XML_ozone.scm.container.size=100MB +OZONE-SITE.XML_ozone.scm.block.size=20MB +OZONE-SITE.XML_ozone.scm.datanode.ratis.volume.free-space.min=10MB +OZONE-SITE.XML_ozone.metadata.dirs=/data/metadata +OZONE-SITE.XML_hdds.node.report.interval=20s +OZONE-SITE.XML_hdds.heartbeat.interval=20s +OZONE-SITE.XML_hdds.datanode.du.refresh.period=20s +OZONE-SITE.XML_hdds.datanode.dir=/data/hdds +OZONE-SITE.XML_hdds.datanode.volume.min.free.space=100MB +OZONE-SITE.XML_ozone.scm.pipeline.creation.auto.factor.one=false +OZONE-SITE.XML_ozone.datanode.pipeline.limit=1 +OZONE-SITE.XML_hdds.scmclient.max.retry.timeout=30s +OZONE-SITE.XML_ozone.scm.primordial.node.id=scm1 +OZONE-SITE.XML_hdds.container.report.interval=30s +OZONE-SITE.XML_ozone.om.s3.grpc.server_enabled=true +OZONE-SITE.XML_ozone.recon.db.dir=/data/metadata/recon +OZONE-SITE.XML_ozone.recon.address=recon:9891 +OZONE-SITE.XML_ozone.recon.http-address=0.0.0.0:9888 +OZONE-SITE.XML_ozone.recon.https-address=0.0.0.0:9889 +OZONE-SITE.XML_dfs.container.ratis.datastream.enabled=true + +OZONE_CONF_DIR=/etc/hadoop +OZONE_LOG_DIR=/var/log/hadoop + +no_proxy=om1,om2,om3,scm,s3g,recon,kdc,localhost,127.0.0.1 diff --git a/hadoop-ozone/dist/src/main/compose/ozone-balancer/test.sh b/hadoop-ozone/dist/src/main/compose/ozone-balancer/test.sh new file mode 100644 index 000000000000..e79979877ba3 --- /dev/null +++ b/hadoop-ozone/dist/src/main/compose/ozone-balancer/test.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#suite:balancer + +COMPOSE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +export COMPOSE_DIR +export OM_SERVICE_ID="om" +export OM=om1 +export SCM=scm1 +export OZONE_REPLICATION_FACTOR=3 + +# shellcheck source=/dev/null +source "$COMPOSE_DIR/../testlib.sh" + +# We need 4 dataNodes in this tests +start_docker_env 4 + +execute_robot_test ${OM} balancer/testBalancer.robot diff --git a/hadoop-ozone/dist/src/main/smoketest/balancer/testBalancer.robot b/hadoop-ozone/dist/src/main/smoketest/balancer/testBalancer.robot new file mode 100644 index 000000000000..6e2fb9d85a56 --- /dev/null +++ b/hadoop-ozone/dist/src/main/smoketest/balancer/testBalancer.robot @@ -0,0 +1,143 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +*** Settings *** +Documentation Smoketest ozone cluster startup +Library OperatingSystem +Library Collections +Resource ../commonlib.robot +Resource ../ozone-lib/shell.robot + +Test Timeout 20 minutes + +*** Variables *** +${SECURITY_ENABLED} false +${HOST} datanode1 +${VOLUME} volume1 +${BUCKET} bucket1 +${SIZE} 104857600 + + +** Keywords *** +Prepare For Tests + Execute dd if=/dev/urandom of=/tmp/100mb bs=1048576 count=100 + Run Keyword if '${SECURITY_ENABLED}' == 'true' Kinit test user testuser testuser.keytab + Execute ozone sh volume create /${VOLUME} + Execute ozone sh bucket create /${VOLUME}/${BUCKET} + + +Datanode In Maintenance Mode + ${result} = Execute ozone admin datanode maintenance ${HOST} + Should Contain ${result} Entering maintenance mode on datanode + ${result} = Execute ozone admin datanode list | grep "Operational State:*" + Wait Until Keyword Succeeds 30sec 5sec Should contain ${result} ENTERING_MAINTENANCE + Wait Until Keyword Succeeds 3min 10sec Related pipelines are closed + Sleep 60000ms + +Related pipelines are closed + ${result} = Execute ozone admin datanode list | awk -v RS= '{$1=$1}1'|grep MAINT | sed -e 's/^.*pipelines: \\(.*\\)$/\\1/' + Should Contain Any ${result} CLOSED No related pipelines or the node is not in Healthy state. + +Datanode Recommission + ${result} = Execute ozone admin datanode recommission ${HOST} + Should Contain ${result} Started recommissioning datanode + Wait Until Keyword Succeeds 1min 10sec Datanode Recommission is Finished + Sleep 300000ms + +Datanode Recommission is Finished + ${result} = Execute ozone admin datanode list | grep "Operational State:*" + Should Not Contain ${result} ENTERING_MAINTENANCE + +Run Container Balancer + ${result} = Execute ozone admin containerbalancer start -t 1 -d 100 -i 1 + Should Contain ${result} Container Balancer started successfully. + ${result} = Execute ozone admin containerbalancer status + Should Contain ${result} ContainerBalancer is Running. + Wait Until Keyword Succeeds 3min 10sec ContainerBalancer is Not Running + Sleep 60000ms + +ContainerBalancer is Not Running + ${result} = Execute ozone admin containerbalancer status + Should contain ${result} ContainerBalancer is Not Running. + +Create Multiple Keys + [arguments] ${NUM_KEYS} + ${file} = Set Variable /tmp/100mb + FOR ${INDEX} IN RANGE ${NUM_KEYS} + ${fileName} = Set Variable file-${INDEX}.txt + ${key} = Set Variable /${VOLUME}/${BUCKET}/${fileName} + LOG ${fileName} + Create Key ${key} ${file} + Key Should Match Local File ${key} ${file} + END + +Datanode Usageinfo + [arguments] ${uuid} + ${result} = Execute ozone admin datanode usageinfo --uuid=${uuid} + Should Contain ${result} Ozone Used + +Get Uuid + ${result} = Execute ozone admin datanode list | awk -v RS= '{$1=$1}1'| grep ${HOST} | sed -e 's/Datanode: //'|sed -e 's/ .*$//' + [return] ${result} + +Close All Containers + FOR ${INDEX} IN RANGE 15 + ${container} = Execute ozone admin container list --state OPEN | jq -r 'select(.replicationConfig.replicationFactor == "THREE") | .containerID' | head -1 + EXIT FOR LOOP IF "${container}" == "" + Execute ozone admin container close "${container}" + ${output} = Execute ozone admin container info "${container}" + Should contain ${output} CLOS + END + Wait until keyword succeeds 3min 10sec All container is closed + +All container is closed + ${output} = Execute ozone admin container list --state OPEN + Should Be Empty ${output} + +Get Datanode Ozone Used Bytes Info + [arguments] ${uuid} + ${output} = Execute export DATANODES=$(ozone admin datanode list --json) && for datanode in $(echo "$\{DATANODES\}" | jq -r '.[].datanodeDetails.uuid'); do ozone admin datanode usageinfo --uuid=$\{datanode\} --json | jq '{(.[0].datanodeDetails.uuid) : .[0].ozoneUsed}'; done | jq -s add + ${result} = Execute echo '${output}' | jq '. | to_entries | .[] | select(.key == "${uuid}") | .value' + [return] ${result} + +** Test Cases *** +Verify Container Balancer for RATIS containers + Prepare For Tests + + Datanode In Maintenance Mode + + ${uuid} = Get Uuid + Datanode Usageinfo ${uuid} + + Create Multiple Keys 3 + + Close All Containers + + ${datanodeOzoneUsedBytesInfo} = Get Datanode Ozone Used Bytes Info ${uuid} + Should Be True ${datanodeOzoneUsedBytesInfo} < ${SIZE} + + Datanode Recommission + + Run Container Balancer + + ${datanodeOzoneUsedBytesInfoAfterContainerBalancing} = Get Datanode Ozone Used Bytes Info ${uuid} + Should Not Be Equal As Integers ${datanodeOzoneUsedBytesInfo} ${datanodeOzoneUsedBytesInfoAfterContainerBalancing} + Should Be True ${datanodeOzoneUsedBytesInfoAfterContainerBalancing} < ${SIZE} * 3.5 + Should Be True ${datanodeOzoneUsedBytesInfoAfterContainerBalancing} > ${SIZE} * 3 + + + + +