From d07c6c0df2156b60c481c18d42dc7db696c76f70 Mon Sep 17 00:00:00 2001 From: Andrey Prjibelski Date: Wed, 31 Jan 2024 23:07:32 +0200 Subject: [PATCH] c++ lib started --- barcode_detection/cseqlib/cseqlib.cpp | 43 ++++++++++++++++ barcode_detection/cseqlib/test.py | 24 +++++++++ misc/poly_perf.py | 73 +++++++++++++++++++++++++++ 3 files changed, 140 insertions(+) create mode 100644 barcode_detection/cseqlib/cseqlib.cpp create mode 100644 barcode_detection/cseqlib/test.py create mode 100644 misc/poly_perf.py diff --git a/barcode_detection/cseqlib/cseqlib.cpp b/barcode_detection/cseqlib/cseqlib.cpp new file mode 100644 index 0000000..d41f289 --- /dev/null +++ b/barcode_detection/cseqlib/cseqlib.cpp @@ -0,0 +1,43 @@ +#include +#include +#include + + + +char rev_comp_arr[256] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 'T', ' ', 'G', ' ', ' ', ' ', 'C', ' ', ' ', ' ', ' ', ' ', ' ', 'N', ' ', ' ', ' ', ' ', ' ', 'A', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 't', ' ', 'g', ' ', ' ', ' ', 'c', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 'a', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '}; + +extern "C" { + +void rev_comp(const wchar_t* s, wchar_t* rc, int len) { + for (int i = len - 1; i >= 0; --i) { + int ri = len - 1 - i; + rc[ri] = rev_comp_arr[s[i]]; + /// std::cout << char(s[i]) << " " << char(rc[ri]) << std::endl; + } +} + +} + +/* + +int main() { + std::string test[8] = { +"GTGTACTTCGTTCAGTTACCAATTTGGGTGTTTAGCATGGTCATCGCCTACCGTGACAAGAAAGTTGTCGGTGTCTTTGTGTTTCTGTTGGTGCTGATATTGTGGGGGTTTTGAAAATGTCCTCGGCATAAAAGCGCCATTTTAATTTAAGAAAACGGGGAACTATGC", +"TCAGGATTGGATTTATATGACTGATCAGTTTCCTCTGCTGTTATCGAAAGCAGATATCAAATGGCTGTGGAGGAATGCAGGTGATTGGAGTTGGTCCAAAGGAAGTTGTGAGTTCTGGGAGAGGCAGAAGGAAAGCAGCTGCCATGTTCTGAAGGTTATCAGCACCTG", +"TTGGTGATAGAAACAGGCACAGAAGGTGTTAGCAGGTTCCTGTTTGTCCTCTCGCACCCCCTCCCTCCTGATGGTGACCTTGTCCCAGGTCTTCTACCAGGCCTTCTACCAGATGCCTTGCTGAGAATTCACAGAGGCCGTAGACCTGAAGAACCAACAACCTTCCAT", +"AGGCTTTGAGGTCCCACTCCGGCAGCAGGACGTGCTGGCTCCCAGGAATCACTGACATCAAGGCGTGTAAAATAACACAAAGAGGTTTGCAAAGTCCACAGCCCAAGAGGAGCCGGAGCGTCCTGTTTTTACCATCACGCTTCGTTGTTCACGTTGCTTGTGTGTGTA", +"AAACAGGGTACAATATTTAGAAACGGTGAAAGGAAATGACTGGCCTAAAACTCTTGTGATTCAGTGACTCAAGGATGATTGACACTGTGTAAAAACAGGCACATTAGACCAAGAGATAATTTGAAACCTTATTATTGGGTATTGTTTTTAAAAATTAAACCTATGGAC", +"ATGAATAAAATGAAAAACTTTATAAGCACAGCTTATTGAACTAAAGGGCCTTAAGACCATCACTTCCAACGGCCTCATTTTACAGGAGAGAAATCTGAGGACCAGGGAAACCAAGTGACTTTTCTGGGGTCACATGGAATGTCAACAGCAGAGCTGTGAAGGCGTTCA", +"GGTCTGCTAACCTCCCGGCTATGCTCATTCATGGAGAGTGCTTCGAAGAGTGTTTGCAACATTTAGTCACAGTTTATCTTTGGTTCCAATTCCACATTTACTCTATTTTTAATGTGTGTGAAAATGGCCCAGATTCATATGATTTGTTGCAGGTCAAACAGGTATTAG", +"AAAAACTGCCAAGCTTGCACCGCTTATGTATAGTTATTTGTTGTGTATGTGCAAGTGTTTGTATGTGTGTGAGCACATAAGCATAATCTCTTTACACACACACACACACACCATTCCTACATCAAAAAGCTCTGAAAATTAAACTTTTTCATAAATTTGTGACAAATT"}; + + int len = test[0].length(); + for (int i = 0; i <= 1000000; ++i) { + char* rc = rev_comp(test[i % 8].c_str(), len); + delete rc; + } + return 0; +} + + +*/ diff --git a/barcode_detection/cseqlib/test.py b/barcode_detection/cseqlib/test.py new file mode 100644 index 0000000..a2a9b10 --- /dev/null +++ b/barcode_detection/cseqlib/test.py @@ -0,0 +1,24 @@ +from ctypes import cdll +lib = cdll.LoadLibrary('./cseqlib.so') +import ctypes + + +TEST_SEQ = [ +"GTGTACTTCGTTCAGTTACCAATTTGGGTGTTTAGCATGGTCATCGCCTACCGTGACAAGAAAGTTGTCGGTGTCTTTGTGTTTCTGTTGGTGCTGATATTGTGGGGGTTTTGAAAATGTCCTCGGCATAAAAGCGCCATTTTAATTTAAGAAAACGGGGAACTATGC", +"TCAGGATTGGATTTATATGACTGATCAGTTTCCTCTGCTGTTATCGAAAGCAGATATCAAATGGCTGTGGAGGAATGCAGGTGATTGGAGTTGGTCCAAAGGAAGTTGTGAGTTCTGGGAGAGGCAGAAGGAAAGCAGCTGCCATGTTCTGAAGGTTATCAGCACCTG", +"TTGGTGATAGAAACAGGCACAGAAGGTGTTAGCAGGTTCCTGTTTGTCCTCTCGCACCCCCTCCCTCCTGATGGTGACCTTGTCCCAGGTCTTCTACCAGGCCTTCTACCAGATGCCTTGCTGAGAATTCACAGAGGCCGTAGACCTGAAGAACCAACAACCTTCCAT", +"AGGCTTTGAGGTCCCACTCCGGCAGCAGGACGTGCTGGCTCCCAGGAATCACTGACATCAAGGCGTGTAAAATAACACAAAGAGGTTTGCAAAGTCCACAGCCCAAGAGGAGCCGGAGCGTCCTGTTTTTACCATCACGCTTCGTTGTTCACGTTGCTTGTGTGTGTA", +"AAACAGGGTACAATATTTAGAAACGGTGAAAGGAAATGACTGGCCTAAAACTCTTGTGATTCAGTGACTCAAGGATGATTGACACTGTGTAAAAACAGGCACATTAGACCAAGAGATAATTTGAAACCTTATTATTGGGTATTGTTTTTAAAAATTAAACCTATGGAC", +"ATGAATAAAATGAAAAACTTTATAAGCACAGCTTATTGAACTAAAGGGCCTTAAGACCATCACTTCCAACGGCCTCATTTTACAGGAGAGAAATCTGAGGACCAGGGAAACCAAGTGACTTTTCTGGGGTCACATGGAATGTCAACAGCAGAGCTGTGAAGGCGTTCA", +"GGTCTGCTAACCTCCCGGCTATGCTCATTCATGGAGAGTGCTTCGAAGAGTGTTTGCAACATTTAGTCACAGTTTATCTTTGGTTCCAATTCCACATTTACTCTATTTTTAATGTGTGTGAAAATGGCCCAGATTCATATGATTTGTTGCAGGTCAAACAGGTATTAG", +"AAAAACTGCCAAGCTTGCACCGCTTATGTATAGTTATTTGTTGTGTATGTGCAAGTGTTTGTATGTGTGTGAGCACATAAGCATAATCTCTTTACACACACACACACACACCATTCCTACATCAAAAAGCTCTGAAAATTAAACTTTTTCATAAATTTGTGACAAATT"] + + + +for i in range(1000000): + s = TEST_SEQ[i % 8] + slen = len(s) + rc = ctypes.create_unicode_buffer(slen) + lib.rev_comp(s, rc, slen) + if i == 1: + print(rc.value) diff --git a/misc/poly_perf.py b/misc/poly_perf.py new file mode 100644 index 0000000..9d18b8f --- /dev/null +++ b/misc/poly_perf.py @@ -0,0 +1,73 @@ +RCARR = [''] * 256 +RCARR[ord('A')] = 'T' +RCARR[ord('T')] = 'A' +RCARR[ord('C')] = 'G' +RCARR[ord('G')] = 'C' +RCARR[ord('N')] = 'N' +RCARR[ord('a')] = 't' +RCARR[ord('t')] = 'a' +RCARR[ord('c')] = 'g' +RCARR[ord('g')] = 'c' +RCARR[ord('g')] = 'c' + + + +RCDICT = {'A': 'T', 'T': 'A', 'G': 'C', 'C': 'G', 'N': 'N'} + + +def rc_func(c): + if c == 'A': return 'T' + elif c == 'T': return 'A' + elif c == 'C': return 'G' + elif c == 'G': return 'C' + return "" + + +def rev_comp_arr_map(seq): + return "".join(map(lambda x: RCARR[ord(x)], seq[::-1])) + + +def rev_comp_arr_for(seq): + res = "" + for i in range(len(seq) - 1, -1, -1): + res += RCARR[ord(seq[i])] + return res + + +def rev_comp_dict_map(seq): + return "".join(map(lambda x: RCDICT[x], seq[::-1])) + + +def rev_comp_dict_for(seq): + res = "" + for i in range(len(seq) - 1, -1, -1): + res += RCDICT[seq[i]] + return res + + +def rev_comp_func_map(seq): + return "".join(map(rc_func, seq[::-1])) + + +def rev_comp_func_for(seq): + res = "" + for i in range(len(seq) - 1, -1, -1): + res += rc_func(seq[i]) + return res + + + +TEST_SEQ = [ +"GTGTACTTCGTTCAGTTACCAATTTGGGTGTTTAGCATGGTCATCGCCTACCGTGACAAGAAAGTTGTCGGTGTCTTTGTGTTTCTGTTGGTGCTGATATTGTGGGGGTTTTGAAAATGTCCTCGGCATAAAAGCGCCATTTTAATTTAAGAAAACGGGGAACTATGC", +"TCAGGATTGGATTTATATGACTGATCAGTTTCCTCTGCTGTTATCGAAAGCAGATATCAAATGGCTGTGGAGGAATGCAGGTGATTGGAGTTGGTCCAAAGGAAGTTGTGAGTTCTGGGAGAGGCAGAAGGAAAGCAGCTGCCATGTTCTGAAGGTTATCAGCACCTG", +"TTGGTGATAGAAACAGGCACAGAAGGTGTTAGCAGGTTCCTGTTTGTCCTCTCGCACCCCCTCCCTCCTGATGGTGACCTTGTCCCAGGTCTTCTACCAGGCCTTCTACCAGATGCCTTGCTGAGAATTCACAGAGGCCGTAGACCTGAAGAACCAACAACCTTCCAT", +"AGGCTTTGAGGTCCCACTCCGGCAGCAGGACGTGCTGGCTCCCAGGAATCACTGACATCAAGGCGTGTAAAATAACACAAAGAGGTTTGCAAAGTCCACAGCCCAAGAGGAGCCGGAGCGTCCTGTTTTTACCATCACGCTTCGTTGTTCACGTTGCTTGTGTGTGTA", +"AAACAGGGTACAATATTTAGAAACGGTGAAAGGAAATGACTGGCCTAAAACTCTTGTGATTCAGTGACTCAAGGATGATTGACACTGTGTAAAAACAGGCACATTAGACCAAGAGATAATTTGAAACCTTATTATTGGGTATTGTTTTTAAAAATTAAACCTATGGAC", +"ATGAATAAAATGAAAAACTTTATAAGCACAGCTTATTGAACTAAAGGGCCTTAAGACCATCACTTCCAACGGCCTCATTTTACAGGAGAGAAATCTGAGGACCAGGGAAACCAAGTGACTTTTCTGGGGTCACATGGAATGTCAACAGCAGAGCTGTGAAGGCGTTCA", +"GGTCTGCTAACCTCCCGGCTATGCTCATTCATGGAGAGTGCTTCGAAGAGTGTTTGCAACATTTAGTCACAGTTTATCTTTGGTTCCAATTCCACATTTACTCTATTTTTAATGTGTGTGAAAATGGCCCAGATTCATATGATTTGTTGCAGGTCAAACAGGTATTAG", +"AAAAACTGCCAAGCTTGCACCGCTTATGTATAGTTATTTGTTGTGTATGTGCAAGTGTTTGTATGTGTGTGAGCACATAAGCATAATCTCTTTACACACACACACACACACCATTCCTACATCAAAAAGCTCTGAAAATTAAACTTTTTCATAAATTTGTGACAAATT"] + + + +for i in range(1000000): + s = rev_comp_dict_map(TEST_SEQ[i % 8]) \ No newline at end of file