diff --git a/eventrouter b/eventrouter index fec0a81d9..7c289ccf0 160000 --- a/eventrouter +++ b/eventrouter @@ -1 +1 @@ -Subproject commit fec0a81d9966c93bd544d5407be79001034e0c25 +Subproject commit 7c289ccf0ee93cf32ed19685bbd005a5c69749f1 diff --git a/fluentd/configs.d/openshift/filter-k8s-meta.conf b/fluentd/configs.d/openshift/filter-k8s-meta.conf index f78d1595c..c4bf1f903 100644 --- a/fluentd/configs.d/openshift/filter-k8s-meta.conf +++ b/fluentd/configs.d/openshift/filter-k8s-meta.conf @@ -20,3 +20,10 @@ preserve_json_log "#{ENV['PRESERVE_JSON_LOG'] || 'true'}" json_fields "#{ENV['JSON_FIELDS'] || 'log,MESSAGE'}" + + + @type parse_json_field + merge_json_log true + preserve_json_log true + json_fields "#{ENV['JSON_FIELDS'] || 'log,MESSAGE'}" + diff --git a/fluentd/configs.d/openshift/filter-post-genid.conf b/fluentd/configs.d/openshift/filter-post-genid.conf index 46f495b73..2ed58647b 100644 --- a/fluentd/configs.d/openshift/filter-post-genid.conf +++ b/fluentd/configs.d/openshift/filter-post-genid.conf @@ -4,5 +4,5 @@ @type elasticsearch_genid_ext hash_id_key viaq_msg_id alt_key kubernetes.event.metadata.uid - alt_tags "#{ENV['GENID_ALT_TAG'] || 'kubernetes.var.log.containers.logging-eventrouter-*.** kubernetes.journal.container._default_.kubernetes-event'}" + alt_tags "#{ENV['GENID_ALT_TAG'] || 'kubernetes.var.log.containers.logging-eventrouter-*.** kubernetes.var.log.containers.eventrouter-*.** kubernetes.var.log.containers.cluster-logging-eventrouter-*.** kubernetes.journal.container._default_.kubernetes-event'}" diff --git a/fluentd/configs.d/openshift/filter-viaq-data-model.conf b/fluentd/configs.d/openshift/filter-viaq-data-model.conf index 14c0380d1..70dae3a29 100644 --- a/fluentd/configs.d/openshift/filter-viaq-data-model.conf +++ b/fluentd/configs.d/openshift/filter-viaq-data-model.conf @@ -13,6 +13,7 @@ undefined_to_string "#{ENV['CDM_UNDEFINED_TO_STRING'] || 'false'}" undefined_dot_replace_char "#{ENV['CDM_UNDEFINED_DOT_REPLACE_CHAR'] || 'UNUSED'}" undefined_max_num_fields "#{ENV['CDM_UNDEFINED_MAX_NUM_FIELDS'] || '-1'}" + process_kubernetes_events "#{ENV['TRANSFORM_EVENTS'] || 'false'}" enabled false tag "audit.log**" diff --git a/fluentd/run.sh b/fluentd/run.sh index 5ff40ab25..7facdccf0 100644 --- a/fluentd/run.sh +++ b/fluentd/run.sh @@ -236,11 +236,6 @@ if [[ "${USE_REMOTE_SYSLOG:-}" = "true" ]] ; then fi fi -# Disable process_kubernetes_events if TRANSFORM_EVENTS is false client. -if [ "${TRANSFORM_EVENTS:-}" != true ] ; then - sed -i 's/\(.*@type viaq_data_model.*\)/\1\n process_kubernetes_events false/' $CFG_DIR/openshift/filter-viaq-data-model.conf -fi - if [ "${AUDIT_CONTAINER_ENGINE:-}" = "true" ] ; then cp -f $CFG_DIR/input-pre-audit-log.conf $CFG_DIR/openshift cp -f $CFG_DIR/filter-pre-a-audit-exclude.conf $CFG_DIR/openshift diff --git a/hack/test-logging.sh b/hack/test-logging.sh index 3d352f0f0..ff5e8347f 100755 --- a/hack/test-logging.sh +++ b/hack/test-logging.sh @@ -218,7 +218,7 @@ if [ -n "${TEST_SUITES:-}" ] ; then fi oc process -p TEST_ROOT=$testroot \ -p TEST_NAMESPACE_NAME=$( oc project -q ) \ - -p TEST_IMAGE=$testimage \ + -p TEST_IMAGE=$testimage -p IMAGE_FORMAT="${IMAGE_FORMAT:-}" \ ${artifact_dir_arg:-} ${test_suites_arg:-} \ -f hack/testing/templates/logging-ci-test-runner-template.yaml | oc create -f - diff --git a/hack/testing/templates/eventrouter_template.yaml b/hack/testing/templates/eventrouter_template.yaml new file mode 100644 index 000000000..d567511ee --- /dev/null +++ b/hack/testing/templates/eventrouter_template.yaml @@ -0,0 +1,97 @@ +kind: Template +apiVersion: v1 +metadata: + name: eventrouter-template + annotations: + description: "A pod forwarding kubernetes events to cluster logging stack." + tags: "events,EFK,logging,cluster-logging" +objects: + - kind: ServiceAccount + apiVersion: v1 + metadata: + name: eventrouter + namespace: ${NAMESPACE} + - kind: ClusterRole + apiVersion: v1 + metadata: + name: event-reader + rules: + - apiGroups: [""] + resources: ["events"] + verbs: ["get", "watch", "list"] + - kind: ClusterRoleBinding + apiVersion: v1 + metadata: + name: event-reader-binding + subjects: + - kind: ServiceAccount + name: eventrouter + namespace: ${NAMESPACE} + roleRef: + kind: ClusterRole + name: event-reader + - kind: ConfigMap + apiVersion: v1 + metadata: + name: eventrouter + namespace: ${NAMESPACE} + data: + config.json: |- + { + "sink": "stdout" + } + - kind: Deployment + apiVersion: apps/v1 + metadata: + name: eventrouter + namespace: ${NAMESPACE} + labels: + component: eventrouter + logging-infra: eventrouter + provider: openshift + spec: + selector: + matchLabels: + component: eventrouter + logging-infra: eventrouter + provider: openshift + replicas: 1 + template: + metadata: + labels: + component: eventrouter + logging-infra: eventrouter + provider: openshift + name: eventrouter + spec: + serviceAccount: eventrouter + containers: + - name: kube-eventrouter + image: ${IMAGE} + imagePullPolicy: IfNotPresent + resources: + limits: + memory: ${MEMORY} + requests: + cpu: ${CPU} + memory: ${MEMORY} + volumeMounts: + - name: config-volume + mountPath: /etc/eventrouter + volumes: + - name: config-volume + configMap: + name: eventrouter +parameters: + - name: IMAGE + displayName: Image + value: "registry.redhat.io/openshift4/ose-logging-eventrouter:latest" + - name: MEMORY + displayName: Memory + value: "128Mi" + - name: CPU + displayName: CPU + value: "100m" + - name: NAMESPACE + displayName: Namespace + value: "openshift-logging" diff --git a/hack/testing/templates/logging-ci-test-runner-template.yaml b/hack/testing/templates/logging-ci-test-runner-template.yaml index 305333073..6f74e098b 100644 --- a/hack/testing/templates/logging-ci-test-runner-template.yaml +++ b/hack/testing/templates/logging-ci-test-runner-template.yaml @@ -21,6 +21,15 @@ objects: imagePullPolicy: Always securityContext: privileged: true + env: + - name: IMAGE_FORMAT + value: "${IMAGE_FORMAT}" + - name: SUITE + value: "${TEST_SUITES}" + - name: ARTIFACT_DIR + value: "${ARTIFACT_DIR}" + - name: KUBECONFIG + value: "/tmp/admin.kubeconfig" command: - bash - -c @@ -44,14 +53,7 @@ objects: # image was built with source already on it cd "${TEST_ROOT}" fi - cp /tmp/secret/admin.kubeconfig.orig /tmp/admin.kubeconfig - export KUBECONFIG=/tmp/admin.kubeconfig - set +u - # the below is a template parameter evaluation, not a shell - # variable evaluation - export ARTIFACT_DIR=${ARTIFACT_DIR} - set -u - export SUITE="${TEST_SUITES}" + cp /tmp/secret/admin.kubeconfig.orig $KUBECONFIG . openshift/ci-operator/build-image/launch-e2e-test.sh volumeMounts: @@ -144,5 +146,8 @@ parameters: - description: test suites to run value: '.*' name: TEST_SUITES +- description: IMAGE_FORMAT is used to construct image names - set by CI if used + value: "" + name: IMAGE_FORMAT labels: test: "true" diff --git a/test/eventrouter.sh b/test/eventrouter.sh index 490845c8f..44174225c 100755 --- a/test/eventrouter.sh +++ b/test/eventrouter.sh @@ -5,36 +5,93 @@ source "$(dirname "${BASH_SOURCE[0]}" )/../hack/lib/init.sh" source "${OS_O_A_L_DIR}/hack/testing/util.sh" -function get_eventrouter_pod() { - oc get pods --namespace=default -l component=eventrouter --no-headers | awk '$3 == "Running" {print $1}' +EXTERNAL_REGISTRY=${EXTERNAL_REGISTRY:-registry.svc.ci.openshift.org} +EXT_REG_IMAGE_NS=${EXT_REG_IMAGE_NS:-origin} +MASTER_VERSION=${MASTER_VERSION:-4.3} +get_eventrouter_image() { + local tagsuffix="${1:-latest}" + local ns=openshift + if [ -n "${IMAGE_FORMAT:-}" ] ; then + echo ${IMAGE_FORMAT/'${component}'/logging-eventrouter} + elif oc -n $ns get istag origin-logging-eventrouter:$tagsuffix > /dev/null 2>&1 ; then + oc -n $ns get istag origin-logging-eventrouter:$tagsuffix -o jsonpath='{.image.dockerImageReference}' + else + # fallback to latest externally available image + echo $EXTERNAL_REGISTRY/$EXT_REG_IMAGE_NS/$MASTER_VERSION:logging-eventrouter + fi +} + +deploy_eventrouter() { + local image=$( get_eventrouter_image ) + local ns=${LOGGING_NS:-openshift-logging} + + # I want the template to be usable as-is, by any external user, so I + # don't want to have the nodeSelector in the file - this assumes the + # deployment is the last element in the template + oc process -p NAMESPACE=$ns -p IMAGE=$image \ + -f ${OS_O_A_L_DIR}/hack/testing/templates/eventrouter_template.yaml | \ + jq '.items[-1].spec.template.spec.nodeSelector["logging-ci-test"]="true"' | \ + jq '.items[-1].spec.template.spec.containers[0].imagePullPolicy="Always"' | \ + oc create -f - 2>&1 | artifact_out + local looptries=4 + local ii + # not sure what's going on here - sometimes eventrouter will get an ErrImagePull + # due to authentication issue to internal cluster registry - restarting the pod + # usually makes it work(?????) + for ii in $(seq 1 $looptries) ; do + if os::cmd::try_until_text "get_running_pod eventrouter" eventrouter 2>&1 | artifact_out; then + os::log::info started eventrouter pod $(get_running_pod eventrouter) + ii=1 + break + else + oc delete pod -l component=eventrouter 2>&1 | artifact_out || : + fi + sleep 1 + done + if [ $ii -eq $looptries ] ; then + os::log::error could not start eventrouter pod after $looptries tries + exit 1 + fi } -evpod=$( get_eventrouter_pod ) -if [ -z "$evpod" ]; then - os::log::warning "Eventrouter not deployed" - exit 0 -fi os::test::junit::declare_suite_start "test/eventrouter" FLUENTD_WAIT_TIME=${FLUENTD_WAIT_TIME:-$(( 3 * minute ))} -muxmode=$( oc set env $fluentd_ds --list | grep \^MUX_CLIENT_MODE ) || : -if [ -z "${muxmode:-}" ] ; then - muxmode=MUX_CLIENT_MODE- -fi - cleanup() { local return_code="$?" set +e + if [ $return_code -ne 0 ] ; then + get_all_logging_pod_logs + if [ -n "${evpod:-}" ] ; then + oc logs $evpod > $ARTIFACT_DIR/$evpod.log 2>&1 + fi + fi + # turn off fluentd eventrouter mode stop_fluentd "" $FLUENTD_WAIT_TIME 2>&1 | artifact_out - oc set env $fluentd_ds $muxmode 2>&1 | artifact_out + oc set env $fluentd_ds TRANSFORM_EVENTS- 2>&1 | artifact_out start_fluentd false $FLUENTD_WAIT_TIME 2>&1 | artifact_out + oc process -f ${OS_O_A_L_DIR}/hack/testing/templates/eventrouter_template.yaml | \ + oc delete -f - 2>&1 | artifact_out + os::cmd::try_until_failure "oc get deploy/eventrouter > /dev/null 2>&1" # this will call declare_test_end, suite_end, etc. os::test::junit::reconcile_output exit $return_code } trap "cleanup" EXIT +# put fluentd in eventrouter mode +stop_fluentd "" $FLUENTD_WAIT_TIME 2>&1 | artifact_out +oc set env $fluentd_ds TRANSFORM_EVENTS=true 2>&1 | artifact_out +start_fluentd false $FLUENTD_WAIT_TIME 2>&1 | artifact_out + +deploy_eventrouter +evpod=$( get_running_pod eventrouter ) +if [ -z "$evpod" ]; then + os::log::warning "Eventrouter not deployed" + exit 0 +fi + function warn_nonformatted() { local es_svc=$1 local index=$2 @@ -42,6 +99,8 @@ function warn_nonformatted() { local non_formatted_event_count=$( curl_es $es_svc $index/_count?q=verb:* | get_count_from_json ) if [ "$non_formatted_event_count" != 0 ]; then os::log::warning "$non_formatted_event_count events from eventrouter in index $index were not processed by ViaQ fluentd plugin" + else + os::log::info "good - looks like all eventrouter events were processed by fluentd" fi } @@ -55,38 +114,53 @@ essvc=$( get_es_svc es ) esopssvc=$( get_es_svc es-ops ) esopssvc=${esopssvc:-$essvc} -# Make sure there's no MUX -# undeploy fluentd -stop_fluentd "" $FLUENTD_WAIT_TIME 2>&1 | artifact_out -oc set env $fluentd_ds MUX_CLIENT_MODE- 2>&1 | artifact_out -start_fluentd false 2>&1 | artifact_out - warn_nonformatted $essvc '/project.*' warn_nonformatted $esopssvc '/.operations.*' os::cmd::try_until_not_text "curl_es $esopssvc /.operations.*/_count?q=kubernetes.event.verb:* | get_count_from_json" "^0\$" $FLUENTD_WAIT_TIME prev_event_count=$( curl_es $esopssvc /.operations.*/_count?q=kubernetes.event.verb:* | get_count_from_json ) -# utilize mux if mux pod exists -if oc get dc/logging-mux > /dev/null 2>&1 ; then - # MUX_CLIENT_MODE: maximal - stop_fluentd "" $FLUENTD_WAIT_TIME 2>&1 | artifact_out - oc set env $fluentd_ds MUX_CLIENT_MODE=maximal 2>&1 | artifact_out - start_fluentd false $FLUENTD_WAIT_TIME 2>&1 | artifact_out - os::cmd::try_until_success "logs_count_is_gt $prev_event_count" $FLUENTD_WAIT_TIME - prev_event_count=$( curl_es $esopssvc /.operations.*/_count?q=kubernetes.event.verb:* | get_count_from_json ) +# Check if 1) the doc _id is the same as the kube id 2) there's no duplicates +curl_es $esopssvc /.operations.*/_search?pretty\&q=kubernetes.event:*\&size=9999 > $ARTIFACT_DIR/id-dup-search-raw.json 2>&1 +cat $ARTIFACT_DIR/id-dup-search-raw.json | jq -r '.hits.hits[] | ._id + " " + ._source.kubernetes.event.metadata.uid' | sort > $ARTIFACT_DIR/id-and-uid +os::cmd::expect_success "test -s $ARTIFACT_DIR/id-and-uid" +cat $ARTIFACT_DIR/id-and-uid | awk '{ + if ($1 != $2) {print "Error: es _id " $1 " not equal to kube uid " $2; exit 1} + if ($1 == last1) {print "Error: found duplicate es _id " $1; exit 1} + if ($2 == last2) {print "Error: found duplicate kube uid " $2; exit 1} + last1 = $1; last2 = $2 +}' + +oc apply -f - <&1 | artifact_out - oc set env $fluentd_ds MUX_CLIENT_MODE=minimal 2>&1 | artifact_out - start_fluentd false $FLUENTD_WAIT_TIME 2>&1 | artifact_out - os::cmd::try_until_success "logs_count_is_gt $prev_event_count" $FLUENTD_WAIT_TIME +if ! os::cmd::try_until_text "curl_es $esopssvc /.operations.*/_count?q=kubernetes.event.metadata.name:eventroutertest | get_count_from_json" "^1\$" $FLUENTD_WAIT_TIME ; then + os::log::error did not find 1 record with type Info + curl_es $esopssvc /.operations.*/_search?q=kubernetes.event.metadata.name:eventroutertest\&pretty > $ARTIFACT_DIR/info-search.json 2>&1 || : + exit 1 fi -# Check if there's no duplicates -fpod=$( get_running_pod fluentd ) -qs='{"query":{ "bool": { "must": [ {"term":{"kubernetes.event.verb":"ADDED"}}, {"match":{"message":"'"${fpod}"'"}} ] } }, "_source": ["kubernetes.event.metadata.uid", "message"] }' -ids=$( curl_es $esopssvc /.operations.*/_search -X POST -d "$qs" | python -mjson.tool | egrep uid | awk '{print $2}' | sed -e "s/\"//g" ) -for id in $ids; do - os::cmd::expect_success_and_text "curl_es $esopssvc /.operations.*/_count?q=kubernetes.event.metadata.uid:$id | get_count_from_json" "^1\$" -done