Skip to content

Commit

Permalink
implement new method
Browse files Browse the repository at this point in the history
  • Loading branch information
hasindu2008 committed Sep 22, 2024
1 parent 207a5e7 commit 8f17afd
Show file tree
Hide file tree
Showing 9 changed files with 1,968,889 additions and 1,968,827 deletions.
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ OBJ = $(BUILD_DIR)/main.o \
$(BUILD_DIR)/index.o \
$(BUILD_DIR)/nanopolish_fast5_io.o \
$(BUILD_DIR)/model.o \
$(BUILD_DIR)/methmodel.o \
$(BUILD_DIR)/align.o \
$(BUILD_DIR)/meth.o \
$(BUILD_DIR)/hmm.o \
Expand Down Expand Up @@ -87,6 +88,9 @@ $(BUILD_DIR)/nanopolish_fast5_io.o: src/nanopolish_fast5_io.c src/fast5lite.h
$(BUILD_DIR)/model.o: src/model.c src/model.h src/f5c.h src/fast5lite.h src/f5cmisc.h
$(CXX) $(CFLAGS) $(CPPFLAGS) $(LANGFLAG) $< -c -o $@

$(BUILD_DIR)/methmodel.o: src/methmodel.c
$(CXX) $(CFLAGS) $(CPPFLAGS) $(LANGFLAG) $< -c -o $@

$(BUILD_DIR)/align.o: src/align.c src/f5c.h src/fast5lite.h
$(CXX) $(CFLAGS) $(CPPFLAGS) $(LANGFLAG) $< -c -o $@

Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ First, the reads have to be indexed using `f5c index`. Then, invoke `f5c call-me
[![GitHub Downloads](https://img.shields.io/github/downloads/hasindu2008/f5c/total?logo=GitHub)](https://github.com/hasindu2008/f5c/releases)
[![BioConda Install](https://img.shields.io/conda/dn/bioconda/f5c?label=BioConda)](https://anaconda.org/bioconda/f5c)
[![x86_64](https://github.com/hasindu2008/f5c/actions/workflows/f5c-x86_64.yml/badge.svg)](https://github.com/hasindu2008/f5c/actions/workflows/f5c-x86_64.yml)
[![AARCH64](https://www.travis-ci.com/hasindu2008/f5c.svg?branch=master)](https://www.travis-ci.com/hasindu2008/f5c)
[![AARCH64](https://www.travis-ci.com/hasindu2008/f5c.svg?branch=master)](https://app.travis-ci.com/hasindu2008/f5c)
[![CodeFactor](https://www.codefactor.io/repository/github/hasindu2008/f5c/badge/master)](https://www.codefactor.io/repository/github/hasindu2008/f5c/overview/master)

Please cite the following when using *f5c* in your publications:
Expand All @@ -41,7 +41,7 @@ Please cite the following when using *f5c* in your publications:

If you are a Linux user and want to quickly try out download the compiled binaries from the [latest release](https://github.com/hasindu2008/f5c/releases). For example:
```sh
VERSION=v1.4
VERSION=v1.5
wget "https://github.com/hasindu2008/f5c/releases/download/$VERSION/f5c-$VERSION-binaries.tar.gz" && tar xvf f5c-$VERSION-binaries.tar.gz && cd f5c-$VERSION/
./f5c_x86_64_linux # CPU version
./f5c_x86_64_linux_cuda # cuda supported version
Expand All @@ -57,7 +57,7 @@ You can also use conda to install *f5c* as `conda install f5c -c bioconda -c con
Users are recommended to build from the [latest release](https://github.com/hasindu2008/f5c/releases) tar ball. You need a compiler that supports C++11. Quick example for Ubuntu :
```sh
sudo apt-get install libhdf5-dev zlib1g-dev #install HDF5 and zlib development libraries
VERSION=v1.4
VERSION=v1.5
wget "https://github.com/hasindu2008/f5c/releases/download/$VERSION/f5c-$VERSION-release.tar.gz" && tar xvf f5c-$VERSION-release.tar.gz && cd f5c-$VERSION/
scripts/install-hts.sh # download and compile the htslib
./configure
Expand Down
181 changes: 117 additions & 64 deletions src/eventalign.c
Original file line number Diff line number Diff line change
Expand Up @@ -1263,33 +1263,6 @@ struct EventAlignmentParameters
};


// static inline int32_t *get_ref_to_read_map(AlignedSegment& aligned_pairs, int32_t *ref_seg_len_ptr){

// int32_t ref_start = aligned_pairs.front().ref_pos;
// int32_t ref_end = aligned_pairs.back().ref_pos;
// int32_t ref_seg_len = ref_end - ref_start + 1;
// *ref_seg_len_ptr = ref_seg_len;
// assert(ref_start < ref_end);
// assert(ref_seg_len > 0);

// int32_t *ref_to_read_map = (int32_t*)malloc(sizeof(int32_t)*ref_seg_len);
// MALLOC_CHK(ref_to_read_map);
// for(int32_t i=0; i<ref_seg_len; i++){
// ref_to_read_map[i] = -1;
// }
// for(size_t i = 0; i < aligned_pairs.size(); i++){
// int32_t ref_pos = aligned_pairs[i].ref_pos - ref_start;
// int32_t read_pos = aligned_pairs[i].read_pos;
// ref_to_read_map[ref_pos] = read_pos;
// }

// // for(int32_t i=0; i<ref_seg_len; i++){
// // fprintf(stderr, "ref_to_read_map[%d] = %d\n",i+ref_start,ref_to_read_map[i]);
// // }

// return ref_to_read_map;
// }

std::vector<event_alignment_t> align_read_to_ref(const EventAlignmentParameters& params, char *ref)
{
// Sanity check input parameters
Expand Down Expand Up @@ -1346,9 +1319,6 @@ struct EventAlignmentParameters

AlignedSegment& aligned_pairs = aligned_segments[segment_idx];

//int32_t ref_seg_len = 0;
//int32_t *ref_to_read_map = get_ref_to_read_map(aligned_pairs, &ref_seg_len);

if(params.region_start != -1 && params.region_end != -1) {
//fprintf(stderr, "params.region_start = %d params.region_end = %d\n",params.region_start,params.region_end);
trim_aligned_pairs_to_ref_region(aligned_pairs, params.region_start, params.region_end);
Expand Down Expand Up @@ -1498,8 +1468,6 @@ struct EventAlignmentParameters
ea.ref_position = curr_start_ref + as.kmer_idx;
std::string ref__kmer = ref_seq.substr(ea.ref_position - ref_offset, k);
kmer_cpy(ea.ref_kmer, ref__kmer.c_str(),k);
//assert(as.kmer_idx < ref_seg_len - params.kmer_size + 1);
//ea.read_position = ref_to_read_map[as.kmer_idx];

// event
ea.read_idx = params.read_idx;
Expand Down Expand Up @@ -1560,7 +1528,6 @@ struct EventAlignmentParameters
tester_i++;
} // for realignmentsegment
// fprintf(stderr, "tester_i = %d\n",tester_i);
//free(ref_to_read_map);
} // for bam aligned segment
// fprintf(stderr, "alignment_output len = %d\n",alignment_output.size());
// assert(alignment_output.size() == 4451);
Expand Down Expand Up @@ -2066,36 +2033,121 @@ std::vector<float> get_scaled_samples_for_event(const event_table* events,scalin
return out;
}

// static inline int32_t *get_event_to_base_map(const event_table* et, index_pair_t *base_to_event_map, int32_t read_len, uint32_t kmer_size){
// int32_t *event_to_base_map = (int32_t*)malloc(sizeof(int32_t)*et->n);
// MALLOC_CHK(event_to_base_map);
// for(size_t i=0; i<et->n; i++){
// event_to_base_map[i] = -1;
// }
// int n_kmer = read_len - kmer_size + 1;
// for(int i=0; i<n_kmer; i++){
// fprintf(stderr, "i: %d start: %d stop: %d\n", i, base_to_event_map[i].start, base_to_event_map[i].stop);
// if(base_to_event_map[i].start != -1){
// for(int j=base_to_event_map[i].start; j<=base_to_event_map[i].stop; j++){
// event_to_base_map[j] = i;
// }
// }
// }
// return event_to_base_map;
// }
typedef struct {
int32_t capacity;
int32_t size;
int32_t *map;
int32_t ref_pos_start;
} ref2read_t;

static inline ref2read_t get_ref2read_map(bam1_t *record, int32_t read_len){

ref2read_t ref2read;
ref2read.capacity = read_len;
ref2read.size = 0;

ref2read.map = (int32_t*)malloc(sizeof(int32_t)*ref2read.capacity);
MALLOC_CHK(ref2read.map);
for(int i = 0; i < ref2read.capacity; i++){
ref2read.map[i] = -1;
}

// static inline void sprintf_read_kmer(kstring_t *sp, int32_t *event_to_base_map, uint64_t event_idx, char *read, int32_t read_len, uint32_t kmer_size){
// This code is derived from bam_fillmd1_core
uint32_t *cigar = bam_get_cigar(record);
const bam1_core_t *c = &record->core;

// int32_t read_pos = event_to_base_map[event_idx];
// if(read_pos == -1){
// sprintf_append(sp, "\t.\t.");
// return;
// } else {
// char kmer[kmer_size+1];
// kmer_cpy(kmer, read+read_pos, kmer_size);
// sprintf_append(sp, "\t%d\t%s", read_pos, kmer);
// }
// }
// read pos is an index into the original sequence that is present in the FASTQ
// on the strand matching the reference
int read_pos = 0;

int ref_pos = c->pos;
ref2read.ref_pos_start = ref_pos;

for (uint32_t ci = 0; ci < c->n_cigar; ++ci) {

int cigar_len = cigar[ci] >> 4;
int cigar_op = cigar[ci] & 0xf;

// Set the amount that the ref/read positions should be incremented
// based on the cigar operation
int read_inc = 0;
int ref_inc = 0;

// Process match between the read and the reference
bool is_aligned = false;
if(cigar_op == BAM_CMATCH || cigar_op == BAM_CEQUAL || cigar_op == BAM_CDIFF) {
is_aligned = true;
read_inc = 1;
ref_inc = 1;
} else if(cigar_op == BAM_CDEL) {
ref_inc = 1;
} else if(cigar_op == BAM_CREF_SKIP) {
ref_inc = 1;
} else if(cigar_op == BAM_CINS) {
read_inc = 1;
} else if(cigar_op == BAM_CSOFT_CLIP) {
read_inc = 1; // special case, do not use read_stride
} else if(cigar_op == BAM_CHARD_CLIP) {
read_inc = 0;
} else {
printf("Cigar: %d\n", cigar_op);
assert(false && "Unhandled cigar operation");
}

// Iterate over the pairs of aligned bases
for(int j = 0; j < cigar_len; ++j) {
if(is_aligned) {
ref2read.map[ref2read.size] = read_pos;
}

// increment
read_pos += read_inc;
ref_pos += ref_inc;
ref2read.size += ref_inc;

if(ref2read.size >= ref2read.capacity){
ref2read.capacity = ref2read.capacity*+1000;
ref2read.map = (int32_t*)realloc(ref2read.map, sizeof(int32_t)*ref2read.capacity);
MALLOC_CHK(ref2read.map);
for(int i = ref2read.size; i < ref2read.capacity; i++){
ref2read.map[i] = -1;
}
}
}
}

// for(int i = 0; i < ref2read.size; i++){
// fprintf(stderr, "ref2read_map[%d]=%d\n",i+ref2read.ref_pos_start, ref2read.map[i]);
// }

return ref2read;
}

static inline void sprintf_read_kmer(kstring_t *sp, ref2read_t ref2read, uint64_t ref_pos, char *read, uint32_t kmer_size){

int32_t map_idx = ref_pos - ref2read.ref_pos_start;
assert(map_idx >= 0 && map_idx < ref2read.size);
int32_t read_pos = ref2read.map[map_idx];

if(read_pos == -1){
sprintf_append(sp, "\t.\t.");
return;
} else {
char kmer[kmer_size+1];
for(uint32_t i = 0; i < kmer_size; i++){
int ref_base_idx = map_idx + i;
assert(ref_base_idx >= 0 && ref_base_idx < ref2read.size);
int read_base_idx=ref2read.map[ref_base_idx];
if(read_base_idx == -1){
kmer[i] = '.';
} else {
kmer[i] = read[read_base_idx];
}
}
kmer[kmer_size] = '\0';
sprintf_append(sp, "\t%d\t%s", read_pos, kmer);
}
}


// static inline void sprintf_read_kmer(kstring_t *sp, int32_t read_pos, char *read, int32_t read_len, uint32_t kmer_size){
Expand All @@ -2115,14 +2167,14 @@ char *emit_event_alignment_tsv(uint32_t strand_idx,
const event_table* et, model_t* model, uint32_t kmer_size, scalings_t scalings,
const std::vector<event_alignment_t>& alignments,
int8_t print_read_names, int8_t scale_events, int8_t write_samples, int8_t write_signal_index, int8_t collapse,
int64_t read_index, char* read_name, char *ref_name,float sample_rate, float *rawptr, int32_t read_len, char *read)
int64_t read_index, char* read_name, char *ref_name,float sample_rate, float *rawptr, int32_t read_len, char *read, bam1_t* bam_record)
{

kstring_t str;
kstring_t *sp = &str;
str_init(sp, sizeof(char)*alignments.size()*120);

//int32_t *event_to_base_map = get_event_to_base_map(et, base_to_event_map, read_len, kmer_size);
ref2read_t ref2read = get_ref2read_map( bam_record, read_len);

size_t n_collapse = 1;
for(size_t i = 0; i < alignments.size(); i+=n_collapse) {
Expand Down Expand Up @@ -2246,10 +2298,11 @@ char *emit_event_alignment_tsv(uint32_t strand_idx,
sprintf_append(sp, "\t%s", sample_str.c_str());
}
//sprintf_read_kmer(sp, ea.read_position, read, read_len, kmer_size);
sprintf_read_kmer(sp, ref2read, ea.ref_position, read, kmer_size);
sprintf_append(sp, "\n");
}

//free(event_to_base_map);
free(ref2read.map);

//str_free(sp); //freeing is later done in free_db_tmp()
return sp->s;
Expand Down
2 changes: 1 addition & 1 deletion src/f5c.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#include <string>
#include <vector> //required for eventalign

#define F5C_VERSION "1.4-dirty"
#define F5C_VERSION "1.5-dirty"

/*******************************
* major hard coded parameters *
Expand Down
Loading

0 comments on commit 8f17afd

Please sign in to comment.