-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathannotate-tails.R
53 lines (44 loc) · 1.69 KB
/
annotate-tails.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#' Assign transcript IDs to poly(A) tails
#'
#' This function assigns transcript IDs to the poly(A) tails estimated by
#' tailfindr's find_tails() function. It merges information alignment SAM
#' file and tailfindr's CSV output. The output is written to a file, and also
#' returned as dataframe.
#'
#' @param sam_file The path of alignment SAM file
#' @param tails_csv_file The path of the CSV file produced by find_tails
#' @param output_file The path of the output file
#'
#' @return A dataframe
#' @export
#' @importFrom magrittr "%>%"
annotate_tails <- function(sam_file,
tails_csv_file,
output_file) {
#Read SAM file
sam_list <- read_sam(sam_file)
df_sam <- sam_list[["x"]]
QNAME <- RNAME <- MAPQ <- FLAG <- NULL
read_id <- transcript_id <- mapping_quality <- sam_flag <- NULL
df_sam <- df_sam %>%
dplyr::filter(FLAG != 256 & FLAG != 272 & FLAG != 4 & FLAG < 2048) %>%
dplyr::rename(read_id = QNAME,
transcript_id = RNAME,
mapping_quality = MAPQ,
sam_flag = FLAG) %>%
dplyr::select(read_id,
transcript_id,
mapping_quality,
sam_flag)
#Read tails CSV file
message("Reading poly(A) tail data...")
df_tails <- read.csv(file = tails_csv_file,
header = TRUE,
stringsAsFactors = FALSE)
message("Merging poly(A) tail data and transcript IDs...")
df <- dplyr::inner_join(df_tails, df_sam, by = 'read_id')
message("Writing CSV file...")
data.table::fwrite(df, file = output_file)
message("Done!")
return(df)
}