-
Notifications
You must be signed in to change notification settings - Fork 398
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
not trying to hold leases on WAL files if we are holding them already. #142
Changes from 7 commits
cbf52b3
20184d5
4d51138
06cca38
d76ad1d
c10caff
943a089
e28ec4f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -35,15 +35,13 @@ | |
public class FSWAL implements WAL { | ||
|
||
private static final Logger log = LoggerFactory.getLogger(FSWAL.class); | ||
private static final String leaseException = | ||
"org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException"; | ||
private static final long MAX_SLEEP_INTERVAL_MS = 16000L; | ||
|
||
private WALFile.Writer writer = null; | ||
private WALFile.Reader reader = null; | ||
private String logFile = null; | ||
private HdfsSinkConnectorConfig conf = null; | ||
private HdfsStorage storage = null; | ||
private long sleepIntervalMs = WALConstants.INITIAL_SLEEP_INTERVAL_MS; | ||
|
||
public FSWAL(String logsDir, TopicPartition topicPart, HdfsStorage storage) | ||
throws ConnectException { | ||
|
@@ -62,13 +60,14 @@ public void append(String tempFile, String committedFile) throws ConnectExceptio | |
writer.append(key, value); | ||
writer.hsync(); | ||
} catch (IOException e) { | ||
log.error("Error appending WAL file: {}, {}", logFile, e); | ||
close(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, so just for the sake of debuggability, if we're doing work in the exception handler that could itself throw exceptions, I think we might want to at least log that something went wrong. Because with the current code, if This is true for a couple of the other similar changes below. The only alternative I could think of is allowing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please have a look now. |
||
throw new DataException(e); | ||
} | ||
} | ||
|
||
public void acquireLease() throws ConnectException { | ||
long sleepIntervalMs = 1000L; | ||
while (sleepIntervalMs < MAX_SLEEP_INTERVAL_MS) { | ||
while (sleepIntervalMs < WALConstants.MAX_SLEEP_INTERVAL_MS) { | ||
try { | ||
if (writer == null) { | ||
writer = WALFile.createWriter(conf, Writer.file(new Path(logFile)), | ||
|
@@ -77,7 +76,7 @@ public void acquireLease() throws ConnectException { | |
} | ||
break; | ||
} catch (RemoteException e) { | ||
if (e.getClassName().equals(leaseException)) { | ||
if (e.getClassName().equals(WALConstants.LEASE_EXCEPTION_CLASS_NAME)) { | ||
log.info("Cannot acquire lease on WAL {}", logFile); | ||
try { | ||
Thread.sleep(sleepIntervalMs); | ||
|
@@ -92,7 +91,7 @@ public void acquireLease() throws ConnectException { | |
throw new DataException("Error creating writer for log file " + logFile, e); | ||
} | ||
} | ||
if (sleepIntervalMs >= MAX_SLEEP_INTERVAL_MS) { | ||
if (sleepIntervalMs >= WALConstants.MAX_SLEEP_INTERVAL_MS) { | ||
throw new ConnectException("Cannot acquire lease after timeout, will retry."); | ||
} | ||
} | ||
|
@@ -129,33 +128,37 @@ public void apply() throws ConnectException { | |
} | ||
} | ||
} catch (IOException e) { | ||
log.error("Error applying WAL file: {}, {}", logFile, e); | ||
close(); | ||
throw new DataException(e); | ||
} | ||
} | ||
|
||
@Override | ||
public void truncate() throws ConnectException { | ||
String oldLogFile = logFile + ".1"; | ||
storage.delete(oldLogFile); | ||
storage.commit(logFile, oldLogFile); | ||
// Clean out references to the current WAL file. | ||
// Open a new one on the next lease acquisition. | ||
close(); | ||
try { | ||
String oldLogFile = logFile + ".1"; | ||
storage.delete(oldLogFile); | ||
storage.commit(logFile, oldLogFile); | ||
} finally { | ||
close(); | ||
} | ||
} | ||
|
||
@Override | ||
public void close() throws ConnectException { | ||
try { | ||
if (writer != null) { | ||
writer.close(); | ||
writer = null; | ||
} | ||
if (reader != null) { | ||
reader.close(); | ||
reader = null; | ||
} | ||
} catch (IOException e) { | ||
throw new DataException("Error closing " + logFile, e); | ||
} finally { | ||
writer = null; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can't point to the actual lines, but nullifying reader and writer above, is redundant now that it's done within the |
||
reader = null; | ||
} | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
/** | ||
* Copyright 2015 Confluent Inc. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: If you apply any changes based on the comment above, you may change this one too to be |
||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except | ||
* in compliance with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software distributed under the License | ||
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express | ||
* or implied. See the License for the specific language governing permissions and limitations under | ||
* the License. | ||
**/ | ||
|
||
|
||
package io.confluent.connect.hdfs.wal; | ||
|
||
public class WALConstants { | ||
protected static final String LEASE_EXCEPTION_CLASS_NAME | ||
= "org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException"; | ||
protected static final long MAX_SLEEP_INTERVAL_MS = 16000L; | ||
protected static final long INITIAL_SLEEP_INTERVAL_MS = 1000L; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,6 +34,7 @@ | |
import org.apache.hadoop.io.serializer.Deserializer; | ||
import org.apache.hadoop.io.serializer.SerializationFactory; | ||
import org.apache.hadoop.io.serializer.Serializer; | ||
import org.apache.hadoop.ipc.RemoteException; | ||
import org.apache.hadoop.util.Options; | ||
import org.apache.hadoop.util.Time; | ||
|
||
|
@@ -132,42 +133,56 @@ public static class Writer implements Closeable, Syncable { | |
throw new IllegalArgumentException("file modifier options not compatible with stream"); | ||
} | ||
|
||
FileSystem fs = null; | ||
FSDataOutputStream out; | ||
boolean ownStream = fileOption != null; | ||
if (ownStream) { | ||
Path p = fileOption.getValue(); | ||
FileSystem fs; | ||
fs = p.getFileSystem(conf); | ||
int bufferSize = bufferSizeOption == null ? getBufferSize(conf) : | ||
bufferSizeOption.getValue(); | ||
short replication = replicationOption == null | ||
? fs.getDefaultReplication(p) | ||
: (short) replicationOption.getValue(); | ||
long blockSize = blockSizeOption == null ? fs.getDefaultBlockSize(p) : | ||
blockSizeOption.getValue(); | ||
|
||
if (appendIfExistsOption != null && appendIfExistsOption.getValue() | ||
&& fs.exists(p)) { | ||
// Read the file and verify header details | ||
try ( | ||
WALFile.Reader reader = | ||
new WALFile.Reader(conf, WALFile.Reader.file(p), new Reader.OnlyHeaderOption()) | ||
) { | ||
if (reader.getVersion() != VERSION[3]) { | ||
throw new VersionMismatchException(VERSION[3], reader.getVersion()); | ||
|
||
try { | ||
if (ownStream) { | ||
Path p = fileOption.getValue(); | ||
fs = p.getFileSystem(conf); | ||
int bufferSize = bufferSizeOption == null | ||
? getBufferSize(conf) | ||
: bufferSizeOption.getValue(); | ||
short replication = replicationOption == null | ||
? fs.getDefaultReplication(p) | ||
: (short) replicationOption.getValue(); | ||
long blockSize = blockSizeOption == null | ||
? fs.getDefaultBlockSize(p) | ||
: blockSizeOption.getValue(); | ||
|
||
if (appendIfExistsOption != null && appendIfExistsOption.getValue() && fs.exists(p)) { | ||
// Read the file and verify header details | ||
try (WALFile.Reader reader = new WALFile.Reader( | ||
connectorConfig.getHadoopConfiguration(), | ||
WALFile.Reader.file(p), | ||
new Reader.OnlyHeaderOption() | ||
)) { | ||
if (reader.getVersion() != VERSION[3]) { | ||
throw new VersionMismatchException(VERSION[3], reader.getVersion()); | ||
} | ||
sync = reader.getSync(); | ||
} | ||
sync = reader.getSync(); | ||
out = fs.append(p, bufferSize); | ||
this.appendMode = true; | ||
} else { | ||
out = fs.create(p, true, bufferSize, replication, blockSize); | ||
} | ||
out = fs.append(p, bufferSize); | ||
this.appendMode = true; | ||
} else { | ||
out = fs.create(p, true, bufferSize, replication, blockSize); | ||
out = streamOption.getValue(); | ||
} | ||
} else { | ||
out = streamOption.getValue(); | ||
|
||
init(connectorConfig, out, ownStream); | ||
} catch (RemoteException re) { | ||
log.error("Failed creating a WAL Writer: " + re.getMessage()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we do
instead so the log will include stack trace info? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In fact, is the log even going to be useful since it gets logged again by There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I was hoping to log a bit more information so that it is easier to read the debug log which is VERY long. So re.getMessage() is the short version that I like :-) If we log the stack trace, the same info will be logged again all the way up at https://github.com/confluentinc/kafka-connect-hdfs/blob/master/src/main/java/io/confluent/connect/hdfs/TopicPartitionWriter.java#L324. |
||
if (re.getClassName().equals(WALConstants.LEASE_EXCEPTION_CLASS_NAME)) { | ||
if (fs != null) { | ||
fs.close(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this need protection from a possible There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nope. I think IOException will be caught here: https://github.com/confluentinc/kafka-connect-hdfs/blob/master/src/main/java/io/confluent/connect/hdfs/TopicPartitionWriter.java#L324. |
||
} | ||
} | ||
throw re; | ||
} | ||
|
||
init(connectorConfig, out, ownStream); | ||
} | ||
|
||
public static Option file(Path value) { | ||
|
@@ -394,6 +409,7 @@ public Reader(Configuration conf, Option... opts) throws IOException { | |
InputStreamOption streamOpt = Options.getOption(InputStreamOption.class, opts); | ||
LengthOption lenOpt = Options.getOption(LengthOption.class, opts); | ||
BufferSizeOption bufOpt = Options.getOption(BufferSizeOption.class, opts); | ||
|
||
// check for consistency | ||
if ((fileOpt == null) == (streamOpt == null)) { | ||
throw new | ||
|
@@ -402,27 +418,40 @@ public Reader(Configuration conf, Option... opts) throws IOException { | |
if (fileOpt == null && bufOpt != null) { | ||
throw new IllegalArgumentException("buffer size can only be set when a file is specified."); | ||
} | ||
|
||
// figure out the real values | ||
Path filename = null; | ||
FSDataInputStream file; | ||
final long len; | ||
if (fileOpt != null) { | ||
filename = fileOpt.getValue(); | ||
FileSystem fs = filename.getFileSystem(conf); | ||
int bufSize = bufOpt == null ? getBufferSize(conf) : bufOpt.getValue(); | ||
len = null == lenOpt | ||
? fs.getFileStatus(filename).getLen() | ||
: lenOpt.getValue(); | ||
file = openFile(fs, filename, bufSize, len); | ||
} else { | ||
len = null == lenOpt ? Long.MAX_VALUE : lenOpt.getValue(); | ||
file = streamOpt.getValue(); | ||
} | ||
StartOption startOpt = Options.getOption(StartOption.class, opts); | ||
long start = startOpt == null ? 0 : startOpt.getValue(); | ||
// really set up | ||
OnlyHeaderOption headerOnly = Options.getOption(OnlyHeaderOption.class, opts); | ||
initialize(filename, file, start, len, conf, headerOnly != null); | ||
FileSystem fs = null; | ||
|
||
try { | ||
if (fileOpt != null) { | ||
filename = fileOpt.getValue(); | ||
fs = filename.getFileSystem(conf); | ||
int bufSize = bufOpt == null ? getBufferSize(conf) : bufOpt.getValue(); | ||
len = null == lenOpt | ||
? fs.getFileStatus(filename).getLen() | ||
: lenOpt.getValue(); | ||
file = openFile(fs, filename, bufSize, len); | ||
} else { | ||
len = null == lenOpt ? Long.MAX_VALUE : lenOpt.getValue(); | ||
file = streamOpt.getValue(); | ||
} | ||
StartOption startOpt = Options.getOption(StartOption.class, opts); | ||
long start = startOpt == null ? 0 : startOpt.getValue(); | ||
// really set up | ||
OnlyHeaderOption headerOnly = Options.getOption(OnlyHeaderOption.class, opts); | ||
initialize(filename, file, start, len, conf, headerOnly != null); | ||
} catch (RemoteException re) { | ||
log.error("Failed creating a WAL Reader: " + re.getMessage()); | ||
if (re.getClassName().equals(WALConstants.LEASE_EXCEPTION_CLASS_NAME)) { | ||
if (fs != null) { | ||
fs.close(); | ||
} | ||
} | ||
throw re; | ||
} | ||
} | ||
|
||
/** | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is this one upgraded from local variable to member field variable? The behavior definitely changes on repeated calls on the same object. Notice that this variable is mutated iteratively as part of the
while
loop inacquiredLease
. Even if it happens and there's only one call ofacquireLease
perFSWAL
object I think it's a better practice to keep it local if that's what we intend to do here.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good catch!