Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,5 @@ doc
Meta
.Rproj.user
FRASER_output
/doc/
/Meta/
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: FRASER
Type: Package
Title: Find RAre Splicing Events in RNA-Seq Data
Version: 2.4.3
Version: 2.4.4
Date: 2025-06-04
Authors@R: c(
person("Christian", "Mertes", role=c("aut", "cre"),
Expand Down
7 changes: 6 additions & 1 deletion NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
CHANGES IN VERSION 2.4.4
-------------------------
o Bugfix: reading fragments instead of read pairs for unstranded paired-end data.
If you have such kind of data, please rerun the whole analysis from the start (make sure you have recount=TRUE).

CHANGES IN VERSION 2.4.3
-------------------------
o Fix bug in cache
Expand All @@ -15,7 +20,7 @@ CHANGES IN VERSION 1.99.4
CHANGES IN VERSION 1.99.3
-------------------------
o Bugfix for contig names containing certain characters
o Update of the plot functions to support colorring aberrant status based
o Update of the plot functions to support coloring aberrant status based
on p values computed on subsets of genes
o Major update to FRASER2:
o Introduction of new & more robust splice metric Intron Jaccard Index
Expand Down
89 changes: 47 additions & 42 deletions R/countRNAseqData.R
Original file line number Diff line number Diff line change
Expand Up @@ -568,10 +568,16 @@ countSplitReadsPerChromosome <- function(chromosome, bamFile,
return(GRanges())
}

# get reads from bam file
if(isFALSE(as.logical(strandMode)) || isFALSE(pairedEnd)){
galignment <- readGAlignments(bamFile, param=param)
} else{
# if single end, count reads
# if paired end, count pairs
if(isFALSE(pairedEnd)){
galignment <- readGAlignments(bamFile, param=param)

# remove the strand information if unstranded data
if(isFALSE(as.logical(strandMode))){
strand(galignment) <- "*"
}
} else {
galignment <- readGAlignmentPairs(
bamFile, param=param, strandMode=strandMode)
}
Expand All @@ -580,17 +586,6 @@ countSplitReadsPerChromosome <- function(chromosome, bamFile,
# (occurs if reads of a pair align to different chromosomes)
galignment <- galignment[!is.na(seqnames(galignment))]

# remove the strand information if unstranded data
if(isFALSE(as.logical(strandMode))){
strand(galignment) <- "*"
}
# invert the strand information for reverse strand specific protocols
# (only needed for single-end reads as real strand is already set for
# paired-end reads in the readGAlignmentPairs function)
if(isFALSE(pairedEnd) && strandMode == 2L){

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are you sure we do not need this invertStrand anymore

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The invert would only be interesting for SE data that is stranded. For PE readGAlignmentPairs takes care of this.

galignment <- invertStrand(galignment)
}

# dont count if there is nothing to count
if(length(galignment) == 0){
return(GRanges())
Expand All @@ -617,45 +612,55 @@ countSplitReadsPerChromosome <- function(chromosome, bamFile,
}
}

# get the junction positions and their counts
jc <- summarizeJunctions(galignment, genome=genome,
with.revmap=(as.logical(strandMode) && pairedEnd) )
# get the junction positions and their counts and
# use revmap for paired-end to count fragments (avoid double-counting reads)
jc <- summarizeJunctions(galignment, genome=genome, with.revmap=isTRUE(pairedEnd))

if(length(jc) == 0){
return(GRanges())
}

# for strand specific counting: split result into counts for + and - strand
if(isTRUE(as.logical(strandMode))){

# for paired-end reads: ensure that each read pair is only counted once
if(isTRUE(pairedEnd)){
fragment_counts <- vapply(jc@elementMetadata$revmap,
FUN=function(pairs){
strands <- strand(galignment[pairs,])
return(c(plus_score=sum(strands == "+"),
minus_score=sum(strands == "-")))
}, FUN.VALUE=integer(2) )
mcols(jc)[,"plus_score"] <- fragment_counts["plus_score",]
mcols(jc)[,"minus_score"] <- fragment_counts["minus_score",]
# for paired-end reads: ensure that each read pair is only counted once
if(isTRUE(pairedEnd)){

# for unstranded pairedend data: just count the revmap
if(isFALSE(as.logical(strandMode))){
mcols(jc)['score'] <- sapply(mcols(jc)[['revmap']], length)

# for stranded pairedend data: count only the pairs for each strand
} else {
gastrand <- strand(galignment)
fragment_counts <- sapply(mcols(jc)[['revmap']],
FUN=function(pairs){
tbl <- table(gastrand[pairs])
# Ensure consistent dimensions by including all expected levels
result <- c("+"=0, "-"=0, "*"=0)
result[names(tbl)] <- tbl
return(result)
}
)
mcols(jc)[,"plus_score"] <- fragment_counts["+",]
mcols(jc)[,"minus_score"] <- fragment_counts["-",]

# create for each junction on each side that was found a row
jc_per_strand <- sapply(c("-", "+"), jc=jc, FUN=function(strand_name, jc){
col_name <- paste0(c('+'="plus", "-"="minus")[strand_name], "_score")
jcStrand <- jc
mcols(jcStrand)[['score']] <- mcols(jc)[[col_name]]
strand(jcStrand) <- strand_name
jcStrand <- jcStrand[mcols(jcStrand)[['score']] > 0]
return(jcStrand)
})

# combine the junctions for both strands
jc <- c(jc_per_strand[[1]], jc_per_strand[[2]])
}

jcPlus <- jc
mcols(jcPlus)[,"score"] <- mcols(jc)[,"plus_score"]
strand(jcPlus) <- "+"
jcPlus <- jcPlus[mcols(jcPlus)[,"score"] > 0,]
jcMinus <- jc
mcols(jcMinus)[,"score"] <- mcols(jc)[,"minus_score"]
strand(jcMinus) <- "-"
jcMinus <- jcMinus[mcols(jcMinus)[,"score"] > 0,]
jc <- c(jcPlus, jcMinus)
}

ans <- jc[,"score"]
colnames(mcols(ans)) <- "count"

# set predicted strand if present or set it to + if NA

if(isFALSE(as.logical(strandMode)) && !is.null(genome) &&
length(ans) > 0){
strand(ans) <- jc$intron_strand
Expand Down
2 changes: 1 addition & 1 deletion R/helper-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ checkCountData <- function(fds, stop=TRUE){
if(isFALSE(stop)) return(invisible(FALSE))
stop("No counts detected! Please provide counts first.")
}
if(!all(paste0("rawOtherCounts_", psiTypes) %in% assayNames(fds))){
if(!all(paste0("rawOtherCounts_", metadata(fds)$fit_metrics) %in% assayNames(fds))){
if(isFALSE(stop)) return(invisible(FALSE))
stop("Please compute first the total expression at each junction.")
}
Expand Down
10 changes: 5 additions & 5 deletions R/makeSimulatedDataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,13 @@ makeSimulatedFraserDataSet_BetaBinomial <- function(m=200, j=10000, q=10,
k <- matrix(rbetabinom(j*m, size=n, prob=mu_psi, rho=rho_psi),
nrow=j, ncol=m)
mode(k) <- 'integer'



#
# Create FRASER data set
#
sampleIDs <- paste0("sample", seq_len(m))
anno <- data.table(sampleID = sampleIDs, bamFile=rep(NA, m))
anno <- data.table(sampleID = sampleIDs,
bamFile=rep(NA, m), strand=rep(0L, m))
fds <- FraserDataSet(colData=anno, ...)

# put in n as rawcountsJ first so it doesn't complain later
Expand Down Expand Up @@ -122,8 +122,8 @@ makeSimulatedFraserDataSet_BetaBinomial <- function(m=200, j=10000, q=10,
encodingDimension=q, evaluationLoss=1, evalMethod='simulation')

# Store "other" counts
counts(fds, type="psi3", side="other", withDimnames=FALSE) <- n - k
counts(fds, type="psi5", side="other", withDimnames=FALSE) <- n - k
counts(fds, type="psi3", side="other", withDimnames=FALSE) <- n - k
counts(fds, type="psi5", side="other", withDimnames=FALSE) <- n - k
counts(fds, type="theta", side="other", withDimnames=FALSE) <- n

# store information about the simulation in the fds
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ or if you use FRASER2:

## What's new

In version 2.4.5, we fixed a bug affecting unstranded paired-end data. We were counting read fragments instead of read pairs. The counted split reads with the fixed method are, on average, lower by around 20%. If you have such data, please rerun your entire analysis from the start (make sure you have recount=TRUE).

In version 2.4.3, instead of doing a grid search to determine the optimal encoding dimension of the denoising autoencoder, we now use the Optimal Hard Threshold (OHT). This makes the algorithm 6-10 times faster!

⚠️ Also, since this version, FRASER is released under `CC-BY-NC 4.0`, meaning it requires a license for any commercial use. If you want to use it for commercial purposes, please contact us.
Expand Down
1 change: 1 addition & 0 deletions tests/testthat.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ if(.Platform$OS.type != "unix") {
register(SerialParam())
}

set.seed(42)
test_check("FRASER")
18 changes: 10 additions & 8 deletions tests/testthat/helper_test_data.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#
# all functions to generate test objects
#
test_generate_count_example <- function(recount=FALSE){
test_generate_count_example <- function(recount=TRUE){
# get a new object for only one sample
if(recount || !"test_fdsSample3" %in% ls()){

Expand All @@ -27,26 +27,28 @@ test_generate_count_example <- function(recount=FALSE){
#
# This is manually counted from the IGV browser
#
test_rawCountsJ <- c(3, 13, 1)
test_rawCountsSS <- c(9, 10, 0, 0, 10)
test_p5rawOCounts <- c(0, 1, 13)
test_p3rawOCounts <- c(0, 0, 0)
test_pSrawOCounts <- c(3, 3, 14, 13, 1)
test_rawCountsJ_SE <- c(3, 13, 1)
test_rawCountsJ_PE <- c(2, 10, 1)
test_rawCountsSS <- c(7, 8, 0, 0, 7)
test_p5rawOCounts <- c(0, 1, 10)
test_p3rawOCounts <- c(0, 0, 0)
test_pSrawOCounts <- c(2, 2, 11, 10, 1)

return(list(
test_fdsSample3=test_fdsSample3,
test_range=test_range,
test_rangeOV=test_rangeOV,
test_rangeFDS=test_rangeFDS,
test_rawCountsJ=test_rawCountsJ,
test_rawCountsJ_SE=test_rawCountsJ_SE,
test_rawCountsJ_PE=test_rawCountsJ_PE,
test_rawCountsSS=test_rawCountsSS,
test_p5rawOCounts=test_p5rawOCounts,
test_p3rawOCounts=test_p3rawOCounts,
test_pSrawOCounts=test_pSrawOCounts
))
}

test_generate_strand_specific_count_example <- function(recount=FALSE){
test_generate_strand_specific_count_example <- function(recount=TRUE){
# get a new object for only one sample
if(recount || !"test_fdsSample3_stranded" %in% ls()){

Expand Down
26 changes: 16 additions & 10 deletions tests/testthat/test_counting.R
Original file line number Diff line number Diff line change
@@ -1,17 +1,23 @@
context("Test counting")

test_that("Count junctions", {
out <- capture.output(attach(test_generate_count_example()))
out <- capture.output(attach(test_generate_count_example(recount=TRUE)))

expect_is(test_fdsSample3, "FraserDataSet")

# test how many ranges we found
expect_equal(length(test_rangeFDS), 3)
expect_equal(length(nonSplicedReads(test_rangeFDS)), 5)

# test the manually counted positions
expect_equal(as.vector(counts(test_rangeFDS, type="j")), test_rawCountsJ)
# expect_equal(as.vector(counts(test_rangeFDS, type="ss")), test_rawCountsSS)
# test the manually counted positions for PE counting
expect_equal(as.vector(counts(test_rangeFDS, type="j")), test_rawCountsJ_PE)

# test the manually counted positions for SE counting
se_counting <- countSplitReadsPerChromosome("chr19", bamFile(test_fdsSample3), pairedEnd=FALSE,
strandMode=0L, genome=NULL, scanBamParam=scanBamParam(test_fdsSample3))
test_rangeOV <- findOverlaps(test_range, se_counting, type = "equal")
expect_equal(mcols(se_counting[to(test_rangeOV)])[['count']], test_rawCountsJ_SE)

})

test_that("Strand spcific counting", {
Expand All @@ -31,7 +37,7 @@ test_that("Strand spcific counting", {

# check for non empty chromosome but no split reads present
fds <- createTestFraserSettings()
strandSpecific(fds) <- TRUE
strandSpecific(fds) <- rep(TRUE, length(samples(fds)))
ans <- countSplitReadsPerChromosome("chrUn_gl000218", bamFile(fds)[1],
pairedEnd=TRUE, strandMode=strandSpecific(fds)[1], genome=NULL,
scanBamParam=scanBamParam(fds))
Expand All @@ -57,27 +63,27 @@ test_that("test minAnchor", {
test_that("Test psi values", {
attach(test_generate_count_example())

expect_equal(as.vector(counts(test_rangeFDS, type="psi3")), test_rawCountsJ)
expect_equal(as.vector(counts(test_rangeFDS, type="psi3")), test_rawCountsJ_PE)
expect_equal(test_p3rawOCounts,
as.vector(counts(test_rangeFDS, type="psi3", side="other"))
)

expect_equal(as.vector(counts(test_rangeFDS, type="psi5")), test_rawCountsJ)
expect_equal(as.vector(counts(test_rangeFDS, type="psi5")), test_rawCountsJ_PE)
expect_equal(test_p5rawOCounts,
as.vector(counts(test_rangeFDS, type="psi5", side="other"))
)

#expect_equal(as.vector(counts(test_rangeFDS, type="theta")), test_rawCountsSS)
expect_equal(as.vector(counts(test_rangeFDS, type="theta")), test_rawCountsSS)
expect_equal(test_pSrawOCounts,
as.vector(counts(test_rangeFDS, type="theta", side="other"))
)

expect_equal(as.vector(assays(test_rangeFDS)[["psi3"]]),
test_rawCountsJ / (test_rawCountsJ + test_p3rawOCounts)
test_rawCountsJ_PE / (test_rawCountsJ_PE + test_p3rawOCounts)
)

expect_equal(as.vector(assays(test_rangeFDS)[["psi5"]]),
test_rawCountsJ / (test_rawCountsJ + test_p5rawOCounts)
test_rawCountsJ_PE / (test_rawCountsJ_PE + test_p5rawOCounts)
)

#expect_equal(as.vector(assays(test_rangeFDS)[["theta"]]),
Expand Down
14 changes: 7 additions & 7 deletions tests/testthat/test_plotSampleResults.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ test_that("Results function", {
res_signif <- results(fds, aggregate=FALSE, all=FALSE,
padjCutoff=NA, deltaPsiCutoff=0.01)
res_signif <- as.data.table(res_signif)
expect_equal(res_signif[type == "jaccard", .N], 1)
expect_equal(res_signif[type == "psi5", .N], 8)
expect_equal(res_signif[type == "psi3", .N], 3)
expect_equal(res_signif[type == "theta", .N], 6)
expect_equal(res_signif[type == "jaccard", .N], 3)
expect_equal(res_signif[type == "psi5", .N], 7)
expect_equal(res_signif[type == "psi3", .N], 6)
expect_equal(res_signif[type == "theta", .N], 8)

# gene-level results
res_gene <- results(fds, aggregate=TRUE, all=TRUE)
Expand All @@ -31,9 +31,9 @@ test_that("Results function", {
padjCutoff=NA, deltaPsiCutoff=0.01)
res_gene_signif <- as.data.table(res_gene_signif)
expect_equal(res_gene_signif[type == "jaccard", .N], 1)
expect_equal(res_gene_signif[type == "psi5", .N], 4)
expect_equal(res_gene_signif[type == "psi3", .N], 2)
expect_equal(res_gene_signif[type == "theta", .N], 3)
expect_equal(res_gene_signif[type == "psi5", .N], 3)
expect_equal(res_gene_signif[type == "psi3", .N], 3)
expect_equal(res_gene_signif[type == "theta", .N], 2)

# results on subset of genes during FDR
geneList <- list('sample1'=c("TIMMDC1"), 'sample2'=c("MCOLN1"))
Expand Down
18 changes: 9 additions & 9 deletions tests/testthat/test_stats.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ test_that("Gene p value calculation with NAs", {
mcols(fds, type="j")$hgnc_symbol <- rep(c("geneA", "geneB", "geneC"),
times=c(3, 4, 3))
mcols(fds, type="ss")$hgnc_symbol <- rep(c("geneA", "geneB", "geneC"),
times=c(4, 6, 4))
times=c(4, 6, 5))

# simulate junction with bad rho fit to create partly NAs
rho <- rho(fds, type="jaccard")
Expand Down Expand Up @@ -85,7 +85,7 @@ test_that("FDR on subset of genes", {
fds <- getFraser()
mcols(fds, type="j")$hgnc_symbol <-
rep(c("geneA", "geneB", "geneC", "geneD", "geneE"),
times=c(3, 7, 5, 4, 8))
times=c(3, 7, 5, 4, 5))

# define gene subset per sample
genes_per_sample <- list(
Expand All @@ -98,13 +98,13 @@ test_that("FDR on subset of genes", {
fds <- calculatePadjValuesOnSubset(fds, genesToTest=genes_per_sample,
subsetName=subsetName, type="jaccard")
subset_padj <- padjVals(fds, type="jaccard", subsetName=subsetName)
expect_true(is(subset_padj, "matrix"))
expect_true(nrow(subset_padj) == 27)
expect_true(ncol(subset_padj) == 3)
expect_is(subset_padj, "matrix")
expect_equal(nrow(subset_padj), 24)
expect_equal(ncol(subset_padj), 3)
subset_padj_gene <- padjVals(fds, type="jaccard", level="gene",
subsetName=subsetName)
expect_true(is(subset_padj_gene, "matrix"))
expect_true(nrow(subset_padj_gene) == 5)
expect_true(ncol(subset_padj_gene) == 3)
expect_is(subset_padj_gene, "matrix")
expect_equal(nrow(subset_padj_gene), 5)
expect_equal(ncol(subset_padj_gene), 3)

})
Loading