-
Notifications
You must be signed in to change notification settings - Fork 440
TEZ-4039: Tez should inject dag id, query id into MDC #98
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
23d1287
717d739
25134f3
bc6e39d
1200ae8
01adba3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,151 @@ | ||
| /** | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.tez.util; | ||
|
|
||
| import java.lang.reflect.Constructor; | ||
| import java.lang.reflect.Field; | ||
| import java.lang.reflect.Modifier; | ||
| import java.util.Arrays; | ||
| import java.util.Hashtable; | ||
|
|
||
| import org.apache.hadoop.conf.Configuration; | ||
| import org.apache.log4j.helpers.ThreadLocalMap; | ||
| import org.apache.tez.dag.api.TezConfiguration; | ||
| import org.slf4j.Logger; | ||
| import org.slf4j.LoggerFactory; | ||
|
|
||
| public final class LoggingUtils { | ||
| private static final Logger LOG = LoggerFactory.getLogger(LoggingUtils.class); | ||
|
|
||
| private LoggingUtils() {} | ||
|
|
||
| @SuppressWarnings("unchecked") | ||
| public static void initLoggingContext(ThreadLocalMap threadLocalMap, Configuration conf, | ||
| String dagId, String taskAttemptId) { | ||
| Hashtable<String, String> data = (Hashtable<String, String>) threadLocalMap.get(); | ||
| if (data == null) { | ||
| data = new NonClonableHashtable<String, String>(); | ||
| threadLocalMap.set(data); | ||
| } | ||
| data.put("dagId", dagId == null ? "" : dagId); | ||
| data.put("taskAttemptId", taskAttemptId == null ? "" : taskAttemptId); | ||
|
|
||
| String[] mdcKeys = conf.getStrings(TezConfiguration.TEZ_MDC_CUSTOM_KEYS, | ||
| TezConfiguration.TEZ_MDC_CUSTOM_KEYS_DEFAULT); | ||
|
|
||
| if (mdcKeys == null || mdcKeys.length == 0) { | ||
| return; | ||
| } | ||
|
|
||
| String[] mdcKeysValuesFrom = conf.getStrings(TezConfiguration.TEZ_MDC_CUSTOM_KEYS_CONF_PROPS, | ||
| TezConfiguration.TEZ_MDC_CUSTOM_KEYS_CONF_PROPS_DEFAULT); | ||
| LOG.info("MDC_LOGGING: setting up MDC keys: keys: {} / conf: {}", Arrays.asList(mdcKeys), | ||
| Arrays.asList(mdcKeysValuesFrom)); | ||
|
|
||
| int i = 0; | ||
| for (String mdcKey : mdcKeys) { | ||
| // don't want to fail on incorrect mdc key settings, but warn in app logs | ||
| if (mdcKey.isEmpty() || mdcKeysValuesFrom.length < i + 1) { | ||
| LOG.warn("cannot set mdc key: {}", mdcKey); | ||
| break; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did you mean break or continue here?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if we hit this it means that mdcKey is most probably empty, so we had: as it's very unlikely that the user set something like this: if we split string by commas and find empty string, it makes no sense to loop further, or at least I cannot figure out a valid use-case |
||
| } | ||
|
|
||
| String mdcValue = mdcKeysValuesFrom[i] == null ? "" : conf.get(mdcKeysValuesFrom[i]); | ||
| // MDC is backed by a Hashtable, let's prevent NPE because of null values | ||
| if (mdcValue != null) { | ||
| data.put(mdcKey, mdcValue); | ||
| } else { | ||
| LOG.warn("MDC_LOGGING: mdc value is null for key: {}, config key: {}", mdcKey, | ||
| mdcKeysValuesFrom[i]); | ||
| } | ||
|
|
||
| i++; | ||
| } | ||
| } | ||
|
|
||
| public static String getPatternForAM(Configuration conf) { | ||
| String pattern = | ||
| conf.get(TezConfiguration.TEZ_LOG_PATTERN_LAYOUT_AM, TezConfiguration.TEZ_LOG_PATTERN_LAYOUT_DEFAULT); | ||
| return pattern.isEmpty() ? null : pattern; | ||
| } | ||
|
|
||
| public static String getPatternForTask(Configuration conf) { | ||
| String pattern = | ||
| conf.get(TezConfiguration.TEZ_LOG_PATTERN_LAYOUT_TASK, TezConfiguration.TEZ_LOG_PATTERN_LAYOUT_DEFAULT); | ||
| return pattern.isEmpty() ? null : pattern; | ||
| } | ||
|
|
||
| /** | ||
| * This method is for setting a NonClonableHashtable into log4j's mdc. Reflection hacks are | ||
| * needed, because MDC.mdc is well protected (final static MDC mdc = new MDC();). The logic below | ||
| * is supposed to be called once per JVM, so it's not a subject to performance bottlenecks. For | ||
| * further details of this solution, please check NonClonableHashtable class, which is set into | ||
| * the ThreadLocalMap. A wrong outcome of this method (any kind of runtime/reflection problems) | ||
| * should not affect the DAGAppMaster/TezChild. In case of an exception a ThreadLocalMap is | ||
| * returned, but it won't affect the content of the MDC. | ||
| */ | ||
| @SuppressWarnings("unchecked") | ||
| public static ThreadLocalMap setupLog4j() { | ||
| ThreadLocalMap mdcContext = new ThreadLocalMap(); | ||
| mdcContext.set(new NonClonableHashtable<String, String>()); | ||
|
|
||
| try { | ||
| final Constructor<?>[] constructors = org.apache.log4j.MDC.class.getDeclaredConstructors(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. log4j or slf4j?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. log4j, intentionally, the whole hack implemented in this method is log4j specific |
||
| for (Constructor<?> c : constructors) { | ||
| c.setAccessible(true); | ||
| } | ||
|
|
||
| org.apache.log4j.MDC mdc = (org.apache.log4j.MDC) constructors[0].newInstance(); | ||
| Field tlmField = org.apache.log4j.MDC.class.getDeclaredField("tlm"); | ||
| tlmField.setAccessible(true); | ||
| tlmField.set(mdc, mdcContext); | ||
|
|
||
| Field mdcField = org.apache.log4j.MDC.class.getDeclaredField("mdc"); | ||
| mdcField.setAccessible(true); | ||
|
|
||
| Field modifiers = Field.class.getDeclaredField("modifiers"); | ||
| modifiers.setAccessible(true); | ||
| modifiers.setInt(mdcField, mdcField.getModifiers() & ~Modifier.FINAL); | ||
|
|
||
| mdcField.set(null, mdc); | ||
|
|
||
| } catch (Exception e) { | ||
| LOG.warn("Cannot set log4j global MDC, mdcContext won't be applied to log4j's MDC class", e); | ||
| } | ||
|
|
||
| return mdcContext; | ||
| } | ||
|
|
||
| /** | ||
| * NonClonableHashtable is a special class for hacking the log4j MDC context. By design, log4j's | ||
| * MDC uses a ThreadLocalMap, which clones parent thread's context before propagating it to child | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't this configurable via isThreadContextMapInheritable system property?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. unfortunately, isThreadContextMapInheritable is slightly different from what I needed: this is used for inheritance, which is needed, child thread inherits parent's context, this is fine, the problem is that it inherits and clones the map, which is against my implementation... instead, I choose to define 1 context in the main thread and propagating it to all child threads (which is automatic due to ThreadLocalMap behavior by default), but I only need to init the logging context once for every dag/taskattempt. Without cloning, a single change will change the MDC contents of all threads in the JVM. |
||
| * thread (see: @see {@link org.apache.log4j.helpers.ThreadLocalMap#childValue()}). In our | ||
| * usecase, this is not suitable, as we want to maintain only one context globally (and set e.g. | ||
| * dagId, taskAttemptId), then update it as easy as possible when dag/taskattempt changes, without | ||
| * having to propagate the update parameters to all the threads in the JVM. | ||
| */ | ||
| private static class NonClonableHashtable<K, V> extends Hashtable<String, String> { | ||
| private static final long serialVersionUID = 1L; | ||
|
|
||
| @Override | ||
| public synchronized Object clone() { | ||
| return this; | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there any default value for this? (e.g If the user wants to reset it back to old value)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
good catch, user should be able to turn this off by setting it to an empty string, I'm reflecting this in the configuration