Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ Sanjay Ghemawat <[email protected]>
# Partial list of contributors:
Kevin Regan <[email protected]>
Johan Bilien <[email protected]>
Fangming Fang <[email protected]>
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -422,3 +422,9 @@ $(STATIC_OUTDIR)/port/port_posix_sse.o: port/port_posix_sse.cc

$(SHARED_OUTDIR)/port/port_posix_sse.o: port/port_posix_sse.cc
$(CXX) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) $(PLATFORM_SSEFLAGS) -c $< -o $@

$(STATIC_OUTDIR)/port/port_posix_linux_arm64.o: port/port_posix_linux_arm64.cc
$(CXX) $(CXXFLAGS) $(PLATFORM_ARMV8_CRC32FLAGS) -c $< -o $@

$(SHARED_OUTDIR)/port/port_posix_linux_arm64.o: port/port_posix_linux_arm64.cc
$(CXX) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) $(PLATFORM_ARMV8_CRC32FLAGS) -c $< -o $@
31 changes: 30 additions & 1 deletion build_detect_platform
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ if test -z "$TARGET_OS"; then
TARGET_OS=`uname -s`
fi

# Detect ARCH
if test -z "$TARGET_ARCH"; then
TARGET_ARCH=`$CXX -dumpmachine | cut -d- -f1`
fi

COMMON_FLAGS=
CROSS_COMPILE=
PLATFORM_CCFLAGS=
Expand All @@ -64,6 +69,7 @@ PLATFORM_SHARED_LDFLAGS="-shared -Wl,-soname -Wl,"
PLATFORM_SHARED_CFLAGS="-fPIC"
PLATFORM_SHARED_VERSIONED=true
PLATFORM_SSEFLAGS=
PORT_CRC_FILE_ARMV8=

MEMCMP_FLAG=
if [ "$CXX" = "g++" ]; then
Expand Down Expand Up @@ -164,6 +170,11 @@ case "$TARGET_OS" in
exit 1
esac

if [ $TARGET_ARCH = "aarch64" -a $PLATFORM = "OS_LINUX" ];then
PORT_SSE_FILE=
PORT_CRC_FILE_ARMV8=port/port_posix_linux_arm64.cc
fi

# We want to make a list of all cc files within util, db, table, and helpers
# except for the test and benchmark files. By default, find will output a list
# of all files matching either rule, so we need to append -print to make the
Expand All @@ -180,7 +191,7 @@ set +f # re-enable globbing

# The sources consist of the portable files, plus the platform-specific port
# file.
echo "SOURCES=$PORTABLE_FILES $PORT_FILE $PORT_SSE_FILE" >> $OUTPUT
echo "SOURCES=$PORTABLE_FILES $PORT_FILE $PORT_SSE_FILE $PORT_CRC_FILE_ARMV8" >> $OUTPUT
echo "MEMENV_SOURCES=helpers/memenv/memenv.cc" >> $OUTPUT

if [ "$CROSS_COMPILE" = "true" ]; then
Expand Down Expand Up @@ -232,13 +243,30 @@ EOF
fi

rm -f $CXXOUTPUT 2>/dev/null

# Test if gcc armv8-a+crc+crypto is supported
$CXX $CXXFLAGS -x c++ - -o $CXXOUTPUT -march=armv8-a+crc+crypto 2>/dev/null <<EOF
#include <arm_acle.h>
#include <arm_neon.h>
int main() {__crc32cd(0, 0); vmull_p64(0, 0);}
EOF
if [ "$?" = 0 ]; then
PLATFORM_ARMV8_CRC32FLAGS="-march=armv8-a+crc+crypto"
fi

rm -f $CXXOUTPUT 2>/dev/null
fi

# Use the SSE 4.2 CRC32C intrinsics iff runtime checks indicate compiler supports them.
if [ -n "$PLATFORM_SSEFLAGS" ]; then
PLATFORM_SSEFLAGS="$PLATFORM_SSEFLAGS -DLEVELDB_PLATFORM_POSIX_SSE"
fi

# Use the ARMv8 CRC32C intrinsics if runtime checks indicate compiler supports them.
if [ -n "$PLATFORM_ARMV8_CRC32FLAGS" ]; then
PLATFORM_ARMV8_CRC32FLAGS="$PLATFORM_ARMV8_CRC32FLAGS -DLEVELDB_PLATFORM_POSIX_ARMV8_CRC_CRYPTO"
fi

PLATFORM_CCFLAGS="$PLATFORM_CCFLAGS $COMMON_FLAGS"
PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS $COMMON_FLAGS"

Expand All @@ -254,3 +282,4 @@ echo "PLATFORM_SHARED_CFLAGS=$PLATFORM_SHARED_CFLAGS" >> $OUTPUT
echo "PLATFORM_SHARED_EXT=$PLATFORM_SHARED_EXT" >> $OUTPUT
echo "PLATFORM_SHARED_LDFLAGS=$PLATFORM_SHARED_LDFLAGS" >> $OUTPUT
echo "PLATFORM_SHARED_VERSIONED=$PLATFORM_SHARED_VERSIONED" >> $OUTPUT
echo "PLATFORM_ARMV8_CRC32FLAGS=$PLATFORM_ARMV8_CRC32FLAGS" >> $OUTPUT
139 changes: 139 additions & 0 deletions port/port_posix_linux_arm64.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
// Copyright 2017 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
//
// A portable implementation of crc32c, optimized to handle
// up to eight bytes at a time.
//
// In a separate source file to allow this accelerated CRC32C function to be
// compiled with the appropriate compiler flags to enable aarch64 crc32
// instructions.

#include <stdint.h>
#include <string.h>
#include "port/port.h"


#if defined(LEVELDB_PLATFORM_POSIX_ARMV8_CRC_CRYPTO)

#include <arm_acle.h>
#include <arm_neon.h>
#include <sys/auxv.h>

// see kernel file 'arch/arm64/include/uapi/asm/hwcap.h'
#define HWCAP_CRC32 (1 << 7)
#define HWCAP_PMULL (1 << 4)

#define KBYTES 1032
#define SEGMENTBYTES 256

// compute 8bytes for each segment parallelly
#define CRC32C32BYTES(P, IND) do {\
crc1 = __crc32cd(crc1, *((const uint64_t *)(P) + (SEGMENTBYTES/8)*1 + (IND)));\
crc2 = __crc32cd(crc2, *((const uint64_t *)(P) + (SEGMENTBYTES/8)*2 + (IND)));\
crc3 = __crc32cd(crc3, *((const uint64_t *)(P) + (SEGMENTBYTES/8)*3 + (IND)));\
crc0 = __crc32cd(crc0, *((const uint64_t *)(P) + (SEGMENTBYTES/8)*0 + (IND)));\
} while(0);

// compute 8*8 bytes for each segment parallelly
#define CRC32C256BYTES(P, IND) do {\
CRC32C32BYTES((P), (IND)*8+0) \
CRC32C32BYTES((P), (IND)*8+1) \
CRC32C32BYTES((P), (IND)*8+2) \
CRC32C32BYTES((P), (IND)*8+3) \
CRC32C32BYTES((P), (IND)*8+4) \
CRC32C32BYTES((P), (IND)*8+5) \
CRC32C32BYTES((P), (IND)*8+6) \
CRC32C32BYTES((P), (IND)*8+7) \
} while(0);

// compute 4*8*8 bytes for each segment parallelly
#define CRC32C1024BYTES(P) do {\
CRC32C256BYTES((P), 0) \
CRC32C256BYTES((P), 1) \
CRC32C256BYTES((P), 2) \
CRC32C256BYTES((P), 3) \
(P) += 4*SEGMENTBYTES; \
} while(0)

#endif // defined(LEVELDB_PLATFORM_POSIX_ARMV8_CRC_CRYPTO)

namespace leveldb {
namespace port {

static inline bool CanAccelerateCRC32C() {
unsigned long hwcap = getauxval(AT_HWCAP);
if ((hwcap & HWCAP_CRC32) && (hwcap & HWCAP_PMULL)) {
return true;
}
return false;
}

uint32_t AcceleratedCRC32C(uint32_t crc, const char* buf, size_t size) {
#if !defined(LEVELDB_PLATFORM_POSIX_ARMV8_CRC_CRYPTO)
return 0;
#else
static bool can = CanAccelerateCRC32C();
if(!can) {
return 0;
}

int64_t length = size;
uint32_t crc0, crc1, crc2, crc3;
uint64_t t0, t1, t2;

// k0=CRC(x^(3*SEGMENTBYTES*8)), k1=CRC(x^(2*SEGMENTBYTES*8)), k2=CRC(x^(SEGMENTBYTES*8))
const poly64_t k0 = 0x8d96551c, k1 = 0xbd6f81f8, k2 = 0xdcb17aa4;

crc = crc ^ 0xffffffffu;
const uint8_t *p = reinterpret_cast<const uint8_t *>(buf);

while ( length >= KBYTES) {
crc0 = crc;
crc1 = 0;
crc2 = 0;
crc3 = 0;

// compute 1024bytes parallelly
CRC32C1024BYTES(p);

// merge crc0 crc1 crc2 crc3
t2 = (uint64_t)vmull_p64(crc2, k2);
t1 = (uint64_t)vmull_p64(crc1, k1);
t0 = (uint64_t)vmull_p64(crc0, k0);
crc = __crc32cd(crc3, *(uint64_t *)p);
p += sizeof(uint64_t);
crc ^= __crc32cd(0, t2);
crc ^= __crc32cd(0, t1);
crc ^= __crc32cd(0, t0);

length -= KBYTES;
}

while(length >= sizeof(uint64_t)) {
crc = __crc32cd(crc, *(uint64_t *)p);
p += sizeof(uint64_t);
length -= sizeof(uint64_t);
}

if(length & sizeof(uint32_t)) {
crc = __crc32cw(crc, *(uint32_t *)p);
p += sizeof(uint32_t);
}

if(length & sizeof(uint16_t)) {
crc = __crc32ch(crc, *(uint16_t *)p);
p += sizeof(uint16_t);
}

if(length & sizeof(uint8_t)) {
crc = __crc32cb(crc, *p);
}

return crc ^ 0xffffffffu;

#endif // defined(LEVELDB_PLATFORM_POSIX_ARMV8_CRC_CRYPTO)
}

} // namespace port
} // namespace leveldb