Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion eventrouter
7 changes: 7 additions & 0 deletions fluentd/configs.d/openshift/filter-k8s-meta.conf
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,10 @@
preserve_json_log "#{ENV['PRESERVE_JSON_LOG'] || 'true'}"
json_fields "#{ENV['JSON_FIELDS'] || 'log,MESSAGE'}"
</filter>

<filter kubernetes.var.log.containers.eventrouter-** kubernetes.var.log.containers.cluster-logging-eventrouter-**>
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cluster-logging-eventrouter is to workaround a "bug" in the current documentation - https://docs.openshift.com/container-platform/4.1/logging/efk-logging-eventrouter.html - it really should be eventrouter

@type parse_json_field
merge_json_log true
preserve_json_log true
json_fields "#{ENV['JSON_FIELDS'] || 'log,MESSAGE'}"
</filter>
2 changes: 1 addition & 1 deletion fluentd/configs.d/openshift/filter-post-genid.conf
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@
@type elasticsearch_genid_ext
hash_id_key viaq_msg_id
alt_key kubernetes.event.metadata.uid
alt_tags "#{ENV['GENID_ALT_TAG'] || 'kubernetes.var.log.containers.logging-eventrouter-*.** kubernetes.journal.container._default_.kubernetes-event'}"
alt_tags "#{ENV['GENID_ALT_TAG'] || 'kubernetes.var.log.containers.logging-eventrouter-*.** kubernetes.var.log.containers.eventrouter-*.** kubernetes.var.log.containers.cluster-logging-eventrouter-*.** kubernetes.journal.container._default_.kubernetes-event'}"
</filter>
1 change: 1 addition & 0 deletions fluentd/configs.d/openshift/filter-viaq-data-model.conf
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
undefined_to_string "#{ENV['CDM_UNDEFINED_TO_STRING'] || 'false'}"
undefined_dot_replace_char "#{ENV['CDM_UNDEFINED_DOT_REPLACE_CHAR'] || 'UNUSED'}"
undefined_max_num_fields "#{ENV['CDM_UNDEFINED_MAX_NUM_FIELDS'] || '-1'}"
process_kubernetes_events "#{ENV['TRANSFORM_EVENTS'] || 'false'}"
<formatter>
enabled false
tag "audit.log**"
Expand Down
5 changes: 0 additions & 5 deletions fluentd/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -236,11 +236,6 @@ if [[ "${USE_REMOTE_SYSLOG:-}" = "true" ]] ; then
fi
fi

# Disable process_kubernetes_events if TRANSFORM_EVENTS is false client.
if [ "${TRANSFORM_EVENTS:-}" != true ] ; then
sed -i 's/\(.*@type viaq_data_model.*\)/\1\n process_kubernetes_events false/' $CFG_DIR/openshift/filter-viaq-data-model.conf
fi

if [ "${AUDIT_CONTAINER_ENGINE:-}" = "true" ] ; then
cp -f $CFG_DIR/input-pre-audit-log.conf $CFG_DIR/openshift
cp -f $CFG_DIR/filter-pre-a-audit-exclude.conf $CFG_DIR/openshift
Expand Down
2 changes: 1 addition & 1 deletion hack/test-logging.sh
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ if [ -n "${TEST_SUITES:-}" ] ; then
fi
oc process -p TEST_ROOT=$testroot \
-p TEST_NAMESPACE_NAME=$( oc project -q ) \
-p TEST_IMAGE=$testimage \
-p TEST_IMAGE=$testimage -p IMAGE_FORMAT="${IMAGE_FORMAT:-}" \
${artifact_dir_arg:-} ${test_suites_arg:-} \
-f hack/testing/templates/logging-ci-test-runner-template.yaml | oc create -f -

Expand Down
97 changes: 97 additions & 0 deletions hack/testing/templates/eventrouter_template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
kind: Template
apiVersion: v1
metadata:
name: eventrouter-template
annotations:
description: "A pod forwarding kubernetes events to cluster logging stack."
tags: "events,EFK,logging,cluster-logging"
objects:
- kind: ServiceAccount
apiVersion: v1
metadata:
name: eventrouter
namespace: ${NAMESPACE}
- kind: ClusterRole
apiVersion: v1
metadata:
name: event-reader
rules:
- apiGroups: [""]
resources: ["events"]
verbs: ["get", "watch", "list"]
- kind: ClusterRoleBinding
apiVersion: v1
metadata:
name: event-reader-binding
subjects:
- kind: ServiceAccount
name: eventrouter
namespace: ${NAMESPACE}
roleRef:
kind: ClusterRole
name: event-reader
- kind: ConfigMap
apiVersion: v1
metadata:
name: eventrouter
namespace: ${NAMESPACE}
data:
config.json: |-
{
"sink": "stdout"
}
- kind: Deployment
apiVersion: apps/v1
metadata:
name: eventrouter
namespace: ${NAMESPACE}
labels:
component: eventrouter
logging-infra: eventrouter
provider: openshift
spec:
selector:
matchLabels:
component: eventrouter
logging-infra: eventrouter
provider: openshift
replicas: 1
template:
metadata:
labels:
component: eventrouter
logging-infra: eventrouter
provider: openshift
name: eventrouter
spec:
serviceAccount: eventrouter
containers:
- name: kube-eventrouter
image: ${IMAGE}
imagePullPolicy: IfNotPresent
resources:
limits:
memory: ${MEMORY}
requests:
cpu: ${CPU}
memory: ${MEMORY}
volumeMounts:
- name: config-volume
mountPath: /etc/eventrouter
volumes:
- name: config-volume
configMap:
name: eventrouter
parameters:
- name: IMAGE
displayName: Image
value: "registry.redhat.io/openshift4/ose-logging-eventrouter:latest"
- name: MEMORY
displayName: Memory
value: "128Mi"
- name: CPU
displayName: CPU
value: "100m"
- name: NAMESPACE
displayName: Namespace
value: "openshift-logging"
21 changes: 13 additions & 8 deletions hack/testing/templates/logging-ci-test-runner-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,15 @@ objects:
imagePullPolicy: Always
securityContext:
privileged: true
env:
- name: IMAGE_FORMAT
value: "${IMAGE_FORMAT}"
- name: SUITE
value: "${TEST_SUITES}"
- name: ARTIFACT_DIR
value: "${ARTIFACT_DIR}"
- name: KUBECONFIG
value: "/tmp/admin.kubeconfig"
command:
- bash
- -c
Expand All @@ -44,14 +53,7 @@ objects:
# image was built with source already on it
cd "${TEST_ROOT}"
fi
cp /tmp/secret/admin.kubeconfig.orig /tmp/admin.kubeconfig
export KUBECONFIG=/tmp/admin.kubeconfig
set +u
# the below is a template parameter evaluation, not a shell
# variable evaluation
export ARTIFACT_DIR=${ARTIFACT_DIR}
set -u
export SUITE="${TEST_SUITES}"
cp /tmp/secret/admin.kubeconfig.orig $KUBECONFIG
. openshift/ci-operator/build-image/launch-e2e-test.sh

volumeMounts:
Expand Down Expand Up @@ -144,5 +146,8 @@ parameters:
- description: test suites to run
value: '.*'
name: TEST_SUITES
- description: IMAGE_FORMAT is used to construct image names - set by CI if used
value: ""
name: IMAGE_FORMAT
labels:
test: "true"
152 changes: 113 additions & 39 deletions test/eventrouter.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,43 +5,102 @@
source "$(dirname "${BASH_SOURCE[0]}" )/../hack/lib/init.sh"
source "${OS_O_A_L_DIR}/hack/testing/util.sh"

function get_eventrouter_pod() {
oc get pods --namespace=default -l component=eventrouter --no-headers | awk '$3 == "Running" {print $1}'
EXTERNAL_REGISTRY=${EXTERNAL_REGISTRY:-registry.svc.ci.openshift.org}
EXT_REG_IMAGE_NS=${EXT_REG_IMAGE_NS:-origin}
MASTER_VERSION=${MASTER_VERSION:-4.3}
get_eventrouter_image() {
local tagsuffix="${1:-latest}"
local ns=openshift
if [ -n "${IMAGE_FORMAT:-}" ] ; then
echo ${IMAGE_FORMAT/'${component}'/logging-eventrouter}
elif oc -n $ns get istag origin-logging-eventrouter:$tagsuffix > /dev/null 2>&1 ; then
oc -n $ns get istag origin-logging-eventrouter:$tagsuffix -o jsonpath='{.image.dockerImageReference}'
else
# fallback to latest externally available image
echo $EXTERNAL_REGISTRY/$EXT_REG_IMAGE_NS/$MASTER_VERSION:logging-eventrouter
fi
}

deploy_eventrouter() {
local image=$( get_eventrouter_image )
local ns=${LOGGING_NS:-openshift-logging}

# I want the template to be usable as-is, by any external user, so I
# don't want to have the nodeSelector in the file - this assumes the
# deployment is the last element in the template
oc process -p NAMESPACE=$ns -p IMAGE=$image \
-f ${OS_O_A_L_DIR}/hack/testing/templates/eventrouter_template.yaml | \
jq '.items[-1].spec.template.spec.nodeSelector["logging-ci-test"]="true"' | \
jq '.items[-1].spec.template.spec.containers[0].imagePullPolicy="Always"' | \
oc create -f - 2>&1 | artifact_out
local looptries=4
local ii
# not sure what's going on here - sometimes eventrouter will get an ErrImagePull
# due to authentication issue to internal cluster registry - restarting the pod
# usually makes it work(?????)
for ii in $(seq 1 $looptries) ; do
if os::cmd::try_until_text "get_running_pod eventrouter" eventrouter 2>&1 | artifact_out; then
os::log::info started eventrouter pod $(get_running_pod eventrouter)
ii=1
break
else
oc delete pod -l component=eventrouter 2>&1 | artifact_out || :
fi
sleep 1
done
if [ $ii -eq $looptries ] ; then
os::log::error could not start eventrouter pod after $looptries tries
exit 1
fi
}
evpod=$( get_eventrouter_pod )
if [ -z "$evpod" ]; then
os::log::warning "Eventrouter not deployed"
exit 0
fi

os::test::junit::declare_suite_start "test/eventrouter"

FLUENTD_WAIT_TIME=${FLUENTD_WAIT_TIME:-$(( 3 * minute ))}

muxmode=$( oc set env $fluentd_ds --list | grep \^MUX_CLIENT_MODE ) || :
if [ -z "${muxmode:-}" ] ; then
muxmode=MUX_CLIENT_MODE-
fi

cleanup() {
local return_code="$?"
set +e
if [ $return_code -ne 0 ] ; then
get_all_logging_pod_logs
if [ -n "${evpod:-}" ] ; then
oc logs $evpod > $ARTIFACT_DIR/$evpod.log 2>&1
fi
fi
# turn off fluentd eventrouter mode
stop_fluentd "" $FLUENTD_WAIT_TIME 2>&1 | artifact_out
oc set env $fluentd_ds $muxmode 2>&1 | artifact_out
oc set env $fluentd_ds TRANSFORM_EVENTS- 2>&1 | artifact_out
start_fluentd false $FLUENTD_WAIT_TIME 2>&1 | artifact_out
oc process -f ${OS_O_A_L_DIR}/hack/testing/templates/eventrouter_template.yaml | \
oc delete -f - 2>&1 | artifact_out
os::cmd::try_until_failure "oc get deploy/eventrouter > /dev/null 2>&1"
# this will call declare_test_end, suite_end, etc.
os::test::junit::reconcile_output
exit $return_code
}
trap "cleanup" EXIT

# put fluentd in eventrouter mode
stop_fluentd "" $FLUENTD_WAIT_TIME 2>&1 | artifact_out
oc set env $fluentd_ds TRANSFORM_EVENTS=true 2>&1 | artifact_out
start_fluentd false $FLUENTD_WAIT_TIME 2>&1 | artifact_out

deploy_eventrouter
evpod=$( get_running_pod eventrouter )
if [ -z "$evpod" ]; then
os::log::warning "Eventrouter not deployed"
exit 0
fi

function warn_nonformatted() {
local es_svc=$1
local index=$2
# check if eventrouter and fluentd with correct ViaQ plugin are deployed
local non_formatted_event_count=$( curl_es $es_svc $index/_count?q=verb:* | get_count_from_json )
if [ "$non_formatted_event_count" != 0 ]; then
os::log::warning "$non_formatted_event_count events from eventrouter in index $index were not processed by ViaQ fluentd plugin"
else
os::log::info "good - looks like all eventrouter events were processed by fluentd"
fi
}

Expand All @@ -55,38 +114,53 @@ essvc=$( get_es_svc es )
esopssvc=$( get_es_svc es-ops )
esopssvc=${esopssvc:-$essvc}

# Make sure there's no MUX
# undeploy fluentd
stop_fluentd "" $FLUENTD_WAIT_TIME 2>&1 | artifact_out
oc set env $fluentd_ds MUX_CLIENT_MODE- 2>&1 | artifact_out
start_fluentd false 2>&1 | artifact_out

warn_nonformatted $essvc '/project.*'
warn_nonformatted $esopssvc '/.operations.*'

os::cmd::try_until_not_text "curl_es $esopssvc /.operations.*/_count?q=kubernetes.event.verb:* | get_count_from_json" "^0\$" $FLUENTD_WAIT_TIME
prev_event_count=$( curl_es $esopssvc /.operations.*/_count?q=kubernetes.event.verb:* | get_count_from_json )

# utilize mux if mux pod exists
if oc get dc/logging-mux > /dev/null 2>&1 ; then
# MUX_CLIENT_MODE: maximal
stop_fluentd "" $FLUENTD_WAIT_TIME 2>&1 | artifact_out
oc set env $fluentd_ds MUX_CLIENT_MODE=maximal 2>&1 | artifact_out
start_fluentd false $FLUENTD_WAIT_TIME 2>&1 | artifact_out
os::cmd::try_until_success "logs_count_is_gt $prev_event_count" $FLUENTD_WAIT_TIME
prev_event_count=$( curl_es $esopssvc /.operations.*/_count?q=kubernetes.event.verb:* | get_count_from_json )
# Check if 1) the doc _id is the same as the kube id 2) there's no duplicates
curl_es $esopssvc /.operations.*/_search?pretty\&q=kubernetes.event:*\&size=9999 > $ARTIFACT_DIR/id-dup-search-raw.json 2>&1
cat $ARTIFACT_DIR/id-dup-search-raw.json | jq -r '.hits.hits[] | ._id + " " + ._source.kubernetes.event.metadata.uid' | sort > $ARTIFACT_DIR/id-and-uid
os::cmd::expect_success "test -s $ARTIFACT_DIR/id-and-uid"
cat $ARTIFACT_DIR/id-and-uid | awk '{
if ($1 != $2) {print "Error: es _id " $1 " not equal to kube uid " $2; exit 1}
if ($1 == last1) {print "Error: found duplicate es _id " $1; exit 1}
if ($2 == last2) {print "Error: found duplicate kube uid " $2; exit 1}
last1 = $1; last2 = $2
}'

oc apply -f - <<EOF
{
"apiVersion": "v1",
"count": 1,
"eventTime": null,
"involvedObject": {
"apiVersion": "apps.openshift.io/v1",
"kind": "DeploymentConfig",
"name": "eventroutertest",
"namespace": "default"
},
"kind": "Event",
"message": "eventroutertest",
"metadata": {
"name": "eventroutertest",
"namespace": "default"
},
"reason": "DeploymentCreated",
"reportingComponent": "",
"reportingInstance": "",
"source": {
"component": "deploymentconfig-controller"
},
"type": "Info"
}
EOF

# MUX_CLIENT_MODE: minimal
stop_fluentd "" $FLUENTD_WAIT_TIME 2>&1 | artifact_out
oc set env $fluentd_ds MUX_CLIENT_MODE=minimal 2>&1 | artifact_out
start_fluentd false $FLUENTD_WAIT_TIME 2>&1 | artifact_out
os::cmd::try_until_success "logs_count_is_gt $prev_event_count" $FLUENTD_WAIT_TIME
if ! os::cmd::try_until_text "curl_es $esopssvc /.operations.*/_count?q=kubernetes.event.metadata.name:eventroutertest | get_count_from_json" "^1\$" $FLUENTD_WAIT_TIME ; then
os::log::error did not find 1 record with type Info
curl_es $esopssvc /.operations.*/_search?q=kubernetes.event.metadata.name:eventroutertest\&pretty > $ARTIFACT_DIR/info-search.json 2>&1 || :
exit 1
fi

# Check if there's no duplicates
fpod=$( get_running_pod fluentd )
qs='{"query":{ "bool": { "must": [ {"term":{"kubernetes.event.verb":"ADDED"}}, {"match":{"message":"'"${fpod}"'"}} ] } }, "_source": ["kubernetes.event.metadata.uid", "message"] }'
ids=$( curl_es $esopssvc /.operations.*/_search -X POST -d "$qs" | python -mjson.tool | egrep uid | awk '{print $2}' | sed -e "s/\"//g" )
for id in $ids; do
os::cmd::expect_success_and_text "curl_es $esopssvc /.operations.*/_count?q=kubernetes.event.metadata.uid:$id | get_count_from_json" "^1\$"
done