Skip to content

Commit

Permalink
PDHMM implementation added (IntelLabs#201)
Browse files Browse the repository at this point in the history
* pdhmm implementation added

* Removed blank space changes

* Removed blank space changes 2

* Pull request review 1 changes

* Readme added

* PR review 2 fix

* Readme fixed PR review 2

* Switch case performance comment added

* Removed unnecessary comparison

* Removed todo comment; and added casting for long error in mac

* included assert for mac system

* Added check for omp before using
  • Loading branch information
chirayuharyan authored Feb 13, 2024
1 parent 28d60b7 commit 26de538
Show file tree
Hide file tree
Showing 26 changed files with 16,093 additions and 49 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,4 @@ add_subdirectory("${NATIVE_DIR}/compression")
add_subdirectory("${NATIVE_DIR}/pairhmm")
add_subdirectory("${NATIVE_DIR}/utils")
add_subdirectory("${NATIVE_DIR}/smithwaterman")
add_subdirectory("${NATIVE_DIR}/pdhmm")
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ Kernels included:
* **DEFLATE Compression/Decompression**:
* Performance optimized Level 1 and 2 compression and decompression from Intel's [ISA-L](https://github.com/01org/isa-l) library.
* Performance optimized Level 3 through 9 compression from Intel's [Open Source Technology Center](https://01.org) [zlib](https://github.com/intel/zlib) library.
* **Partially Determined HMM (PDHMM)**
* AVX2 and AVX-512 optimized versions of PDHMM used in GATK.
* Serial Implementation for CPU's with no AVX.

# Building GKL
GKL release binaries are built on CentOS 7, to enable running on most Linux distributions (see [holy-build-box](https://github.com/phusion/holy-build-box#problem-introduction) for a good description of portability issues).
Expand Down
3 changes: 2 additions & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,10 @@ dependencies {
api 'org.broadinstitute:gatk-native-bindings:1.0.0'
api 'com.github.samtools:htsjdk:3.0.5'
testImplementation 'org.testng:testng:7.5.1'
testImplementation 'org.apache.commons:commons-lang3:3.13.0'
}

wrapper {
wrapper {
gradleVersion = '8.0.2'
}

Expand Down
6 changes: 3 additions & 3 deletions src/main/java/com/intel/gkl/NativeLibraryLoader.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public final class NativeLibraryLoader {
private static final Set<String> loadedLibraries = new HashSet<>();
private static final int MAX_PATH;

private static enum NATIVE_LIB {gkl_compression, gkl_pairhmm, gkl_pairhmm_omp, gkl_smithwaterman, gkl_utils}
private static enum NATIVE_LIB {gkl_compression, gkl_pairhmm, gkl_pairhmm_omp, gkl_smithwaterman, gkl_utils, gkl_pdhmm}

static {
logger = LogFactory.getLog(NativeLibraryLoader.class);
Expand All @@ -60,7 +60,7 @@ private static boolean checkPath(String tempPath){
// Check libraryName is as expected
private static boolean checkLibraryName(String libName){
// Check boundaries for input
if(libName == null || libName.length() == 0 || libName.length() > MAX_PATH) {
if(libName == null || libName.length() == 0 || libName.length() > MAX_PATH) {
logger.warn(" Unexpected library name");
return false;
}
Expand All @@ -75,7 +75,7 @@ private static boolean checkLibraryName(String libName){
return false;
}

/**
/**
* Tries to load the native library from the classpath, usually from a jar file. <p>
*
* If the USE_LIBRARY_PATH environment variable is defined, the native library will be loaded from the
Expand Down
86 changes: 86 additions & 0 deletions src/main/java/com/intel/gkl/pdhmm/IntelPDHMM.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/*
#Copyright(C) 2023 Intel Corporation
#SPDX - License - Identifier : MIT License
*/

package com.intel.gkl.pdhmm;

import com.intel.gkl.NativeLibraryLoader;
import org.broadinstitute.gatk.nativebindings.NativeLibrary;

import java.io.File;

public class IntelPDHMM implements NativeLibrary {
private static final Object lock_class = new Object();
private static final String NATIVE_LIBRARY_NAME;
private static boolean initialized;

static {
NATIVE_LIBRARY_NAME = "gkl_pdhmm";
initialized = false;
}

@Override
public boolean load(File tempDir) {
synchronized (lock_class) {
if (!NativeLibraryLoader.load(tempDir, NATIVE_LIBRARY_NAME)) {
return false;
}
if (!initialized) {
initialized = true;
}
}
initNative();
return true;
}

public double[] computePDHMM(byte[] hap_bases, byte[] hap_pdbases, byte[] read_bases, byte[] read_qual,
byte[] read_ins_qual, byte[] read_del_qual, byte[] gcp, long[] hap_lengths, long[] read_lengths,
int testcase, int maxHapLength, int maxReadLength)
throws RuntimeException, OutOfMemoryError {
if (hap_bases == null)
throw new NullPointerException("hap_bases array is null.");
if (hap_pdbases == null)
throw new NullPointerException("hap_pdbases array is null.");
if (read_bases == null)
throw new NullPointerException("read_bases array is null.");
if (read_qual == null)
throw new NullPointerException("read_qual array is null.");
if (read_ins_qual == null)
throw new NullPointerException("read_ins_qual array is null.");
if (read_del_qual == null)
throw new NullPointerException("read_del_qual array is null.");
if (gcp == null)
throw new NullPointerException("gcp array is null.");
if (hap_lengths == null)
throw new NullPointerException("hap_lengths array is null.");
if (read_lengths == null)
throw new NullPointerException("read_lengths array is null.");
if (testcase <= 0)
throw new IllegalArgumentException("Invalid number of testcase.");
if (maxHapLength <= 0 || maxReadLength <= 0)
throw new IllegalArgumentException("Cannot perform pdhmm on empty sequences");

try {
return computePDHMMNative(hap_bases, hap_pdbases, read_bases, read_qual,
read_ins_qual, read_del_qual, gcp, hap_lengths, read_lengths,
testcase, maxHapLength, maxReadLength);
} catch (OutOfMemoryError e) {
throw new OutOfMemoryError(
"OutOfMemory exception thrown from native pdhmm function call " + e.getMessage());
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException(
"IllegalArgument exception thrown from native pdhmm function call " + e.getMessage());
} catch (RuntimeException e) {
throw new RuntimeException(
"Runtime exception thrown from native pdhmm function call " + e.getMessage());
}
}

private native static void initNative();

private native double[] computePDHMMNative(byte[] hap_bases, byte[] hap_pdbases, byte[] read_bases,
byte[] read_qual,
byte[] read_ins_qual, byte[] read_del_qual, byte[] gcp, long[] hap_lengths, long[] read_lengths,
int testcase, int maxHapLength, int maxReadLength);
}
26 changes: 26 additions & 0 deletions src/main/native/pdhmm/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#---------------------------------------------------------------------
# common
#---------------------------------------------------------------------
if(NOT APPLE)
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-as-needed")
endif()
set_property(SOURCE MathUtils.cc APPEND_STRING PROPERTY COMPILE_FLAGS)
set_property(SOURCE pdhmm-serial.cc APPEND_STRING PROPERTY COMPILE_FLAGS)
set_property(SOURCE avx2_impl.cc APPEND_STRING PROPERTY COMPILE_FLAGS " -mavx -mavx2 -msse2 -msse4.1")
set_property(SOURCE avx512_impl.cc APPEND_STRING PROPERTY COMPILE_FLAGS " -mavx -mavx2 -mavx512f -mavx512dq -mavx512vl -mavx512bw")


#---------------------------------------------------------------------
# common includes
#---------------------------------------------------------------------
include_directories(../common)

find_package(OpenMP)

set(TARGET gkl_pdhmm)
add_library(${TARGET} SHARED IntelPDHMM.cc avx2_impl.cc avx512_impl.cc MathUtils.cc pdhmm-serial.cc)
if(OPENMP_FOUND)
set_target_properties(${TARGET} PROPERTIES COMPILE_OPTIONS ${OpenMP_CXX_FLAGS})
target_link_libraries(${TARGET} ${OpenMP_CXX_FLAGS})
endif()
install(TARGETS ${TARGET} DESTINATION ${CMAKE_BINARY_DIR})
160 changes: 160 additions & 0 deletions src/main/native/pdhmm/IntelPDHMM.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
/*
#Copyright(C) 2023 Intel Corporation
#SPDX - License - Identifier : MIT License
*/

#include "IntelPDHMM.h"
#include <debug.h>
#include "pdhmm-serial.h"
#include "avx2_impl.h"
#ifndef __APPLE__
#include "avx512_impl.h"
#endif
#include <avx.h>
#ifdef __APPLE__
#include <cassert>
#endif

int32_t (*g_computePDHMM)(const int8_t *hap_bases, const int8_t *hap_pdbases, const int8_t *read_bases, const int8_t *read_qual, const int8_t *read_ins_qual, const int8_t *read_del_qual, const int8_t *gcp, double *result, int64_t t, const int64_t *hap_lengths, const int64_t *read_lengths, int32_t maxReadLength, int32_t maxHaplotypeLength);

inline bool is_sse_supported()
{
uint32_t a, b, c, d;
uint32_t sse_mask = (1 << 27) | bit_SSE2 | bit_SSE4_1;

__cpuid_count(1, 0, a, b, c, d);
if ((c & sse_mask) != sse_mask)
{
return false;
}

if (!check_xcr0_ymm())
{
return false;
}

return true;
}

JNIEXPORT void JNICALL Java_com_intel_gkl_pdhmm_IntelPDHMM_initNative(JNIEnv *env, jclass obj)
{
if (is_avx512_supported())
{
#ifndef __APPLE__
INFO("Using CPU-supported AVX-512 instructions.");
g_computePDHMM = computePDHMM_fp_avx512;

#else
assert(false);
#endif
}
else if (is_avx_supported() && is_avx2_supported() && is_sse_supported())
{
INFO("Using CPU-supported AVX-2, AVX and SSE instructions.");
g_computePDHMM = computePDHMM_fp_avx2;
}
else
{
INFO("Using Serial Implementation.");
g_computePDHMM = computePDHMM_serial;
}
}

/*
* Class: com_intel_gkl_pdhmm_IntelPDHMM
* Method: computePDHMM
* Signature: (Z)V
*/
JNIEXPORT jdoubleArray JNICALL Java_com_intel_gkl_pdhmm_IntelPDHMM_computePDHMMNative(JNIEnv *env, jobject obj, jbyteArray jhap_bases, jbyteArray jhap_pdbases, jbyteArray jread_bases, jbyteArray jread_qual, jbyteArray jread_ins_qual, jbyteArray jread_del_qual, jbyteArray jgcp, jlongArray jhap_lengths, jlongArray jread_lengths, jint testcase, jint maxHapLength, jint maxReadLength)
{
jdoubleArray jresult;
jresult = env->NewDoubleArray(testcase);
if (jresult == NULL)
{
env->ExceptionClear();
env->ThrowNew(env->FindClass("java/lang/OutOfMemoryError"), "Memory allocation issue.");
return NULL; /* out of memory error thrown */
}

jbyte *hap_bases = (jbyte *)env->GetPrimitiveArrayCritical(jhap_bases, 0);
jbyte *hap_pdbases = (jbyte *)env->GetPrimitiveArrayCritical(jhap_pdbases, 0);
jbyte *read_bases = (jbyte *)env->GetPrimitiveArrayCritical(jread_bases, 0);
jbyte *read_qual = (jbyte *)env->GetPrimitiveArrayCritical(jread_qual, 0);
jbyte *read_ins_qual = (jbyte *)env->GetPrimitiveArrayCritical(jread_ins_qual, 0);
jbyte *read_del_qual = (jbyte *)env->GetPrimitiveArrayCritical(jread_del_qual, 0);
jbyte *gcp = (jbyte *)env->GetPrimitiveArrayCritical(jgcp, 0);
jlong *hap_lengths = (jlong *)env->GetPrimitiveArrayCritical(jhap_lengths, 0);
jlong *read_lengths = (jlong *)env->GetPrimitiveArrayCritical(jread_lengths, 0);

if (hap_bases == NULL || hap_pdbases == NULL || read_bases == NULL || read_qual == NULL || read_ins_qual == NULL || read_del_qual == NULL || gcp == NULL || hap_lengths == NULL || read_lengths == NULL)
{
DBG("GetPrimitiveArrayCritical failed from JAVA unable to continue.");
if (env->ExceptionCheck())
env->ExceptionClear();
env->ThrowNew(env->FindClass("java/lang/IllegalArgumentException"), "Input arrays aren't valid.");
if (hap_bases != NULL)
env->ReleasePrimitiveArrayCritical(jhap_bases, hap_bases, 0);
if (hap_pdbases != NULL)
env->ReleasePrimitiveArrayCritical(jhap_pdbases, hap_pdbases, 0);
if (read_bases != NULL)
env->ReleasePrimitiveArrayCritical(jread_bases, read_bases, 0);
if (read_qual != NULL)
env->ReleasePrimitiveArrayCritical(jread_qual, read_qual, 0);
if (read_ins_qual != NULL)
env->ReleasePrimitiveArrayCritical(jread_ins_qual, read_ins_qual, 0);
if (read_del_qual != NULL)
env->ReleasePrimitiveArrayCritical(jread_del_qual, read_del_qual, 0);
if (gcp != NULL)
env->ReleasePrimitiveArrayCritical(jgcp, gcp, 0);
if (hap_lengths != NULL)
env->ReleasePrimitiveArrayCritical(jhap_lengths, hap_lengths, 0);
if (read_lengths != NULL)
env->ReleasePrimitiveArrayCritical(jread_lengths, read_lengths, 0);
return NULL;
}

double *result = (double *)_mm_malloc(testcase * sizeof(double), ALIGN_SIZE);
if (result == NULL)
{
env->ExceptionClear();
env->ThrowNew(env->FindClass("java/lang/OutOfMemoryError"), "Memory allocation issue.");
return NULL;
}
int32_t status = g_computePDHMM(hap_bases, hap_pdbases, read_bases, read_qual, read_ins_qual, read_del_qual, gcp, result, testcase, (int64_t *)hap_lengths, (int64_t *)read_lengths, maxReadLength, maxHapLength);

// release buffers
env->ReleasePrimitiveArrayCritical(jhap_bases, hap_bases, 0);
env->ReleasePrimitiveArrayCritical(jhap_pdbases, hap_pdbases, 0);
env->ReleasePrimitiveArrayCritical(jread_bases, read_bases, 0);
env->ReleasePrimitiveArrayCritical(jread_qual, read_qual, 0);
env->ReleasePrimitiveArrayCritical(jread_ins_qual, read_ins_qual, 0);
env->ReleasePrimitiveArrayCritical(jread_del_qual, read_del_qual, 0);
env->ReleasePrimitiveArrayCritical(jgcp, gcp, 0);
env->ReleasePrimitiveArrayCritical(jhap_lengths, hap_lengths, 0);
env->ReleasePrimitiveArrayCritical(jread_lengths, read_lengths, 0);

if (status != PDHMM_SUCCESS)
{
if (status == PDHMM_MEMORY_ALLOCATION_FAILED)
{
env->ExceptionClear();
env->ThrowNew(env->FindClass("java/lang/OutOfMemoryError"), "Memory allocation issue.");
}
if (status == PDHMM_INPUT_DATA_ERROR)
{
env->ExceptionClear();
env->ThrowNew(env->FindClass("java/lang/IllegalArgumentException"), "Error while calculating pdhmm. Input arrays aren't valid.");
}
if (status == PDHMM_FAILURE)
{
env->ExceptionClear();
env->ThrowNew(env->FindClass("java/lang/RuntimeException"), "Failure while computing PDHMM.");
}
}
else
{
env->SetDoubleArrayRegion(jresult, 0, testcase, result);
}
_mm_free(result);
return jresult;
}
26 changes: 26 additions & 0 deletions src/main/native/pdhmm/IntelPDHMM.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
#Copyright(C) 2023 Intel Corporation
#SPDX - License - Identifier : MIT License
*/

#ifndef _Included_com_intel_gkl_pdhmm_IntelPDHMM
#define _Included_com_intel_gkl_pdhmm_IntelPDHMM
#include <jni.h>
#ifdef __cplusplus
extern "C"
{
#endif

JNIEXPORT void JNICALL Java_com_intel_gkl_pdhmm_IntelPDHMM_initNative(JNIEnv *env, jclass obj);

/*
* Class: com_intel_gkl_pdhmm_IntelPDHMM
* Method: computeReadLikelihoodGivenHaplotypeLog10Vec
* Signature: ()Z
*/
JNIEXPORT jdoubleArray JNICALL Java_com_intel_gkl_pdhmm_IntelPDHMM_computePDHMMNative(JNIEnv *env, jobject obj, jbyteArray jhap_bases, jbyteArray jhap_pdbases, jbyteArray jread_bases, jbyteArray jread_qual, jbyteArray jread_ins_qual, jbyteArray jread_del_qual, jbyteArray jgcp, jlongArray jhap_lengths, jlongArray jread_lengths, jint testcase, jint maxHapLength, jint maxReadLength);

#ifdef __cplusplus
}
#endif
#endif
Loading

0 comments on commit 26de538

Please sign in to comment.