Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,13 @@
*/
package org.apache.hadoop.ozone.common;

import org.apache.hadoop.hdds.JavaUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.lang.invoke.MethodHandle;
import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodType;
import java.lang.reflect.Field;
import java.nio.ByteBuffer;
import java.util.zip.Checksum;
Expand All @@ -35,6 +39,8 @@ public class ChecksumByteBufferImpl implements ChecksumByteBuffer {
private final Checksum checksum;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@duongkame , it seems that we could change this field to ChecksumByteBuffer and update the callers of the ChecksumByteBufferImpl constructor.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand. ChecksumByteBufferImpl is a decoration of an actual Checksum to simulate (and now bridge) the update(ByteBuffer).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, my previous comment does not make sense. My suggestion is not to use CRC32 and PureJavaCrc32C.

public static ChecksumByteBuffer crc32Impl() {
return new ChecksumByteBufferImpl(new CRC32());
}

We may

return new PureJavaCrc32ByteBuffer();

Similarly,


We may

return new PureJavaCrc32CByteBuffer();

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PureJavaCrc32ByteBuffer impl involves reading buffers, while the CRC32 implementation in JDK9+ (e.g. in OpenJdk11) calculates checksum using native code on the native buffer directly and that probably indicates performance advantages.

Probably we should only replace the return new ChecksumByteBufferImpl(new PureJavaCrc32C()); by return new PureJavaCrc32CByteBuffer();, but I'm not sure what's the advatages.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

... implementation in JDK9+ (e.g. in OpenJdk11) calculates using native code on the native buffer directly and that probably indicates performance advantages.

It is likely. We should benchmark it.

... but I'm not sure what's the advatages.

The advantage is to avoid creating an array and copying for (slow path) direct buffers in Java 8.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Think we should keep this PR as an optimization for DirectBuffer by calling a buffer-friendly API.
Actual CrC implementation/algo change can be done by another JIRA.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 I'll merge it.


private static final Field IS_READY_ONLY_FIELD;
// To access Checksum.update(ByteBuffer) API from Java 9+.
private static final MethodHandle BYTE_BUFFER_UPDATE;

static {
Field f = null;
Expand All @@ -46,6 +52,18 @@ public class ChecksumByteBufferImpl implements ChecksumByteBuffer {
LOG.error("No isReadOnly field in ByteBuffer", e);
}
IS_READY_ONLY_FIELD = f;

MethodHandle byteBufferUpdate = null;
if (JavaUtils.isJavaVersionAtLeast(9)) {
try {
byteBufferUpdate = MethodHandles.publicLookup().findVirtual(Checksum.class, "update",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not NativeCheckSumCRC32? This reflection is to lock the implementation to be used if the Java version is recent?
Also, not sure if the unit test will fail if I skip using the locked implementation.

─⠠⠵ java -version
openjdk version "1.8.0_392"
OpenJDK Runtime Environment (Zulu 8.74.0.17-CA-macos-aarch64) (build 1.8.0_392-b08)
OpenJDK 64-Bit Server VM (Zulu 8.74.0.17-CA-macos-aarch64) (build 25.392-b08, mixed mode)

MethodType.methodType(void.class, ByteBuffer.class));
} catch (Throwable t) {
throw new IllegalStateException("Failed to lookup Checksum.update(ByteBuffer).");
}
}
BYTE_BUFFER_UPDATE = byteBufferUpdate;

}

public ChecksumByteBufferImpl(Checksum impl) {
Expand All @@ -57,6 +75,17 @@ public ChecksumByteBufferImpl(Checksum impl) {
// should be refactored to simply call checksum.update(buffer), as the
// Checksum interface has been enhanced to allow this since Java 9.
public void update(ByteBuffer buffer) {
// Prefer JDK9+ implementation that allows ByteBuffer. This allows DirectByteBuffer to be checksum directly in
// native memory.
if (BYTE_BUFFER_UPDATE != null) {
try {
BYTE_BUFFER_UPDATE.invokeExact(checksum, buffer);
return;
} catch (Throwable e) {
throw new IllegalStateException("Error invoking " + BYTE_BUFFER_UPDATE, e);
}
}

// this is a hack to not do memory copy.
if (IS_READY_ONLY_FIELD != null) {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@

import org.apache.hadoop.util.PureJavaCrc32;
import org.apache.hadoop.util.PureJavaCrc32C;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;

import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import org.apache.commons.lang3.RandomUtils;
import java.util.zip.Checksum;
Expand All @@ -45,6 +47,23 @@ public void testPureJavaCrc32CByteBuffer() {
new VerifyChecksumByteBuffer(expected, testee).testCorrectness();
}

@Test
public void testWithDirectBuffer() {
final ChecksumByteBuffer checksum = ChecksumByteBufferFactory.crc32CImpl();
byte[] value = "test".getBytes(StandardCharsets.UTF_8);
checksum.reset();
checksum.update(value, 0, value.length);
long checksum1 = checksum.getValue();

ByteBuffer byteBuffer = ByteBuffer.allocateDirect(value.length);
byteBuffer.put(value).rewind();
checksum.reset();
checksum.update(byteBuffer);
long checksum2 = checksum.getValue();

Assertions.assertEquals(checksum1, checksum2);
}

static class VerifyChecksumByteBuffer {
private final Checksum expected;
private final ChecksumByteBuffer testee;
Expand Down