gagneurlab · vyepez88 · Aug 13, 2025 · Aug 11, 2025 · Aug 11, 2025 · Aug 11, 2025
diff --git a/.gitignore b/.gitignore
@@ -40,3 +40,5 @@ doc
 Meta
 .Rproj.user
 FRASER_output
+/doc/
+/Meta/
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: FRASER
 Type: Package
 Title: Find RAre Splicing Events in RNA-Seq Data
-Version: 2.4.3
+Version: 2.4.4
 Date: 2025-06-04
 Authors@R: c(
     person("Christian", "Mertes", role=c("aut", "cre"), 

diff --git a/NEWS b/NEWS
@@ -1,3 +1,8 @@
+CHANGES IN VERSION 2.4.4
+-------------------------
+    o Bugfix: reading fragments instead of read pairs for unstranded paired-end data. 
+      If you have such kind of data, please rerun the whole analysis from the start (make sure you have recount=TRUE).
+
 CHANGES IN VERSION 2.4.3
 -------------------------
     o Fix bug in cache
@@ -15,7 +20,7 @@ CHANGES IN VERSION 1.99.4
 CHANGES IN VERSION 1.99.3
 -------------------------
     o Bugfix for contig names containing certain characters
-    o Update of the plot functions to support colorring aberrant status based 
+    o Update of the plot functions to support coloring aberrant status based 
       on p values computed on subsets of genes
     o Major update to FRASER2:
         o Introduction of new & more robust splice metric Intron Jaccard Index

diff --git a/R/countRNAseqData.R b/R/countRNAseqData.R
@@ -568,10 +568,16 @@ countSplitReadsPerChromosome <- function(chromosome, bamFile,
         return(GRanges())
     }
 
-    # get reads from bam file
-    if(isFALSE(as.logical(strandMode)) || isFALSE(pairedEnd)){
-        galignment <- readGAlignments(bamFile, param=param)
-    } else{
+    # if single end, count reads
+    # if paired end, count pairs
+    if(isFALSE(pairedEnd)){
+        galignment <- readGAlignments(bamFile, param=param)        
+
+        # remove the strand information if unstranded data
+        if(isFALSE(as.logical(strandMode))){
+            strand(galignment) <- "*"
+        }
+    } else {
         galignment <- readGAlignmentPairs(
                 bamFile, param=param, strandMode=strandMode)
     }
@@ -580,17 +586,6 @@ countSplitReadsPerChromosome <- function(chromosome, bamFile,
     # (occurs if reads of a pair align to different chromosomes)
     galignment <- galignment[!is.na(seqnames(galignment))]
 
-    # remove the strand information if unstranded data
-    if(isFALSE(as.logical(strandMode))){
-        strand(galignment) <- "*"
-    }
-    # invert the strand information for reverse strand specific protocols
-    # (only needed for single-end reads as real strand is already set for 
-    # paired-end reads in the readGAlignmentPairs function)
-    if(isFALSE(pairedEnd) && strandMode == 2L){
-        galignment <- invertStrand(galignment)
-    }
-
     # dont count if there is nothing to count
     if(length(galignment) == 0){
         return(GRanges())
@@ -617,45 +612,55 @@ countSplitReadsPerChromosome <- function(chromosome, bamFile,
         }
     }
 
-    # get the junction positions and their counts
-    jc <- summarizeJunctions(galignment, genome=genome, 
-            with.revmap=(as.logical(strandMode) && pairedEnd) )
+    # get the junction positions and their counts and 
+    # use revmap for paired-end to count fragments (avoid double-counting reads)
+    jc <- summarizeJunctions(galignment, genome=genome, with.revmap=isTRUE(pairedEnd))
 
     if(length(jc) == 0){
         return(GRanges())
     }
 
-    # for strand specific counting: split result into counts for + and - strand
-    if(isTRUE(as.logical(strandMode))){
-
-        # for paired-end reads: ensure that each read pair is only counted once
-        if(isTRUE(pairedEnd)){
-            fragment_counts <- vapply(jc@elementMetadata$revmap, 
-                                FUN=function(pairs){
-                                    strands <- strand(galignment[pairs,])
-                                    return(c(plus_score=sum(strands == "+"),
-                                            minus_score=sum(strands == "-")))
-                                }, FUN.VALUE=integer(2) )
-            mcols(jc)[,"plus_score"]  <- fragment_counts["plus_score",]
-            mcols(jc)[,"minus_score"] <- fragment_counts["minus_score",]
+    # for paired-end reads: ensure that each read pair is only counted once
+    if(isTRUE(pairedEnd)){
+
+        # for unstranded pairedend data: just count the revmap
+        if(isFALSE(as.logical(strandMode))){
+            mcols(jc)['score'] <- sapply(mcols(jc)[['revmap']], length)
+
+        # for stranded pairedend data: count only the pairs for each strand
+        } else {
+            gastrand <- strand(galignment)
+            fragment_counts <- sapply(mcols(jc)[['revmap']], 
+                FUN=function(pairs){ 
+                    tbl <- table(gastrand[pairs])
+                    # Ensure consistent dimensions by including all expected levels
+                    result <- c("+"=0, "-"=0, "*"=0)
+                    result[names(tbl)] <- tbl
+                    return(result)
+                }
+            )
+            mcols(jc)[,"plus_score"]  <- fragment_counts["+",]
+            mcols(jc)[,"minus_score"] <- fragment_counts["-",]
+
+            # create for each junction on each side that was found a row
+            jc_per_strand <- sapply(c("-", "+"), jc=jc, FUN=function(strand_name, jc){
+                col_name <- paste0(c('+'="plus", "-"="minus")[strand_name], "_score")
+                jcStrand <- jc
+                mcols(jcStrand)[['score']] <- mcols(jc)[[col_name]]
+                strand(jcStrand) <- strand_name
+                jcStrand <- jcStrand[mcols(jcStrand)[['score']] > 0]
+                return(jcStrand)
+            })
+
+            # combine the junctions for both strands
+            jc <- c(jc_per_strand[[1]], jc_per_strand[[2]])
         }
-
-        jcPlus <- jc
-        mcols(jcPlus)[,"score"] <- mcols(jc)[,"plus_score"]
-        strand(jcPlus) <- "+"
-        jcPlus <- jcPlus[mcols(jcPlus)[,"score"] > 0,]
-        jcMinus <- jc
-        mcols(jcMinus)[,"score"] <- mcols(jc)[,"minus_score"]
-        strand(jcMinus) <- "-"
-        jcMinus <- jcMinus[mcols(jcMinus)[,"score"] > 0,]
-        jc <- c(jcPlus, jcMinus)
     }
 
     ans <- jc[,"score"]
     colnames(mcols(ans)) <- "count"
 
     # set predicted strand if present or set it to + if NA
-
     if(isFALSE(as.logical(strandMode)) && !is.null(genome) && 
                 length(ans) > 0){
         strand(ans) <- jc$intron_strand

diff --git a/R/helper-functions.R b/R/helper-functions.R
@@ -22,7 +22,7 @@ checkCountData <- function(fds, stop=TRUE){
         if(isFALSE(stop)) return(invisible(FALSE))
         stop("No counts detected! Please provide counts first.")
     }
-    if(!all(paste0("rawOtherCounts_", psiTypes) %in% assayNames(fds))){
+    if(!all(paste0("rawOtherCounts_", metadata(fds)$fit_metrics) %in% assayNames(fds))){
         if(isFALSE(stop)) return(invisible(FALSE))
         stop("Please compute first the total expression at each junction.")
     }

diff --git a/R/makeSimulatedDataset.R b/R/makeSimulatedDataset.R
@@ -86,13 +86,13 @@ makeSimulatedFraserDataSet_BetaBinomial <- function(m=200, j=10000, q=10,
     k <- matrix(rbetabinom(j*m, size=n, prob=mu_psi, rho=rho_psi),
             nrow=j, ncol=m)
     mode(k) <- 'integer'
-
-
+
     #
     # Create FRASER data set
     #
     sampleIDs <- paste0("sample", seq_len(m))
-    anno <- data.table(sampleID = sampleIDs, bamFile=rep(NA, m))
+    anno <- data.table(sampleID = sampleIDs, 
+            bamFile=rep(NA, m), strand=rep(0L, m))
     fds <- FraserDataSet(colData=anno, ...)
 
     # put in n as rawcountsJ first so it doesn't complain later
@@ -122,8 +122,8 @@ makeSimulatedFraserDataSet_BetaBinomial <- function(m=200, j=10000, q=10,
             encodingDimension=q, evaluationLoss=1, evalMethod='simulation')
 
     # Store "other" counts
-    counts(fds, type="psi3", side="other", withDimnames=FALSE) <-    n - k
-    counts(fds, type="psi5", side="other", withDimnames=FALSE) <-    n - k
+    counts(fds, type="psi3", side="other", withDimnames=FALSE) <- n - k
+    counts(fds, type="psi5", side="other", withDimnames=FALSE) <- n - k
     counts(fds, type="theta", side="other", withDimnames=FALSE) <- n
 
     # store information about the simulation in the fds

diff --git a/README.md b/README.md
@@ -19,6 +19,8 @@ or if you use FRASER2:
 
 ## What's new
 
+In version 2.4.5, we fixed a bug affecting unstranded paired-end data. We were counting read fragments instead of read pairs. The counted split reads with the fixed method are, on average, lower by around 20%. If you have such data, please rerun your entire analysis from the start (make sure you have recount=TRUE).
+
 In version 2.4.3, instead of doing a grid search to determine the optimal encoding dimension of the denoising autoencoder, we now use the Optimal Hard Threshold (OHT). This makes the algorithm 6-10 times faster!
 
 ⚠️ Also, since this version, FRASER is released under `CC-BY-NC 4.0`,  meaning it requires a license for any commercial use. If you want to use it for commercial purposes, please contact us.

diff --git a/tests/testthat.R b/tests/testthat.R
@@ -6,4 +6,5 @@ if(.Platform$OS.type != "unix") {
     register(SerialParam())
 }
 
+set.seed(42)
 test_check("FRASER")
diff --git a/tests/testthat/helper_test_data.R b/tests/testthat/helper_test_data.R
@@ -1,7 +1,7 @@
 #
 # all functions to generate test objects
 #
-test_generate_count_example <- function(recount=FALSE){
+test_generate_count_example <- function(recount=TRUE){
     # get a new object for only one sample
     if(recount || !"test_fdsSample3" %in% ls()){
 
@@ -27,26 +27,28 @@ test_generate_count_example <- function(recount=FALSE){
     #
     # This is manually counted from the IGV browser
     #
-    test_rawCountsJ   <- c(3, 13, 1)
-    test_rawCountsSS  <- c(9, 10, 0, 0, 10)
-    test_p5rawOCounts <- c(0, 1, 13)
-    test_p3rawOCounts <- c(0, 0, 0)
-    test_pSrawOCounts <- c(3, 3, 14, 13, 1)
+    test_rawCountsJ_SE <- c(3, 13, 1)
+    test_rawCountsJ_PE <- c(2, 10, 1)
+    test_rawCountsSS   <- c(7, 8, 0, 0, 7)
+    test_p5rawOCounts  <- c(0, 1, 10)
+    test_p3rawOCounts  <- c(0, 0, 0)
+    test_pSrawOCounts  <- c(2, 2, 11, 10, 1)
 
     return(list(
         test_fdsSample3=test_fdsSample3,
         test_range=test_range,
         test_rangeOV=test_rangeOV,
         test_rangeFDS=test_rangeFDS,
-        test_rawCountsJ=test_rawCountsJ,
+        test_rawCountsJ_SE=test_rawCountsJ_SE,
+        test_rawCountsJ_PE=test_rawCountsJ_PE,
         test_rawCountsSS=test_rawCountsSS,
         test_p5rawOCounts=test_p5rawOCounts,
         test_p3rawOCounts=test_p3rawOCounts,
         test_pSrawOCounts=test_pSrawOCounts
     ))
 }
 
-test_generate_strand_specific_count_example <- function(recount=FALSE){
+test_generate_strand_specific_count_example <- function(recount=TRUE){
     # get a new object for only one sample
     if(recount || !"test_fdsSample3_stranded" %in% ls()){
 

diff --git a/tests/testthat/test_counting.R b/tests/testthat/test_counting.R
@@ -1,17 +1,23 @@
 context("Test counting")
 
 test_that("Count junctions", {
-    out <- capture.output(attach(test_generate_count_example()))
+    out <- capture.output(attach(test_generate_count_example(recount=TRUE)))
 
     expect_is(test_fdsSample3, "FraserDataSet")
 
     # test how many ranges we found
     expect_equal(length(test_rangeFDS), 3)
     expect_equal(length(nonSplicedReads(test_rangeFDS)), 5)
 
-    # test the manually counted positions
-    expect_equal(as.vector(counts(test_rangeFDS, type="j")), test_rawCountsJ)
-    # expect_equal(as.vector(counts(test_rangeFDS, type="ss")), test_rawCountsSS)
+    # test the manually counted positions for PE counting
+    expect_equal(as.vector(counts(test_rangeFDS, type="j")), test_rawCountsJ_PE)
+
+    # test the manually counted positions for SE counting
+    se_counting <- countSplitReadsPerChromosome("chr19", bamFile(test_fdsSample3), pairedEnd=FALSE, 
+            strandMode=0L, genome=NULL, scanBamParam=scanBamParam(test_fdsSample3))
+    test_rangeOV  <- findOverlaps(test_range, se_counting, type = "equal")
+    expect_equal(mcols(se_counting[to(test_rangeOV)])[['count']], test_rawCountsJ_SE)
+
 })
 
 test_that("Strand spcific counting", {
@@ -31,7 +37,7 @@ test_that("Strand spcific counting", {
 
     # check for non empty chromosome but no split reads present
     fds <- createTestFraserSettings()
-    strandSpecific(fds) <- TRUE
+    strandSpecific(fds) <- rep(TRUE, length(samples(fds)))
     ans <- countSplitReadsPerChromosome("chrUn_gl000218", bamFile(fds)[1], 
             pairedEnd=TRUE, strandMode=strandSpecific(fds)[1], genome=NULL,
             scanBamParam=scanBamParam(fds))
@@ -57,27 +63,27 @@ test_that("test minAnchor", {
 test_that("Test psi values", {
     attach(test_generate_count_example())
 
-    expect_equal(as.vector(counts(test_rangeFDS, type="psi3")), test_rawCountsJ)
+    expect_equal(as.vector(counts(test_rangeFDS, type="psi3")), test_rawCountsJ_PE)
     expect_equal(test_p3rawOCounts,
         as.vector(counts(test_rangeFDS, type="psi3", side="other"))
     )
 
-    expect_equal(as.vector(counts(test_rangeFDS, type="psi5")), test_rawCountsJ)
+    expect_equal(as.vector(counts(test_rangeFDS, type="psi5")), test_rawCountsJ_PE)
     expect_equal(test_p5rawOCounts,
         as.vector(counts(test_rangeFDS, type="psi5", side="other"))
     )
 
-    #expect_equal(as.vector(counts(test_rangeFDS, type="theta")), test_rawCountsSS)
+    expect_equal(as.vector(counts(test_rangeFDS, type="theta")), test_rawCountsSS)
     expect_equal(test_pSrawOCounts,
         as.vector(counts(test_rangeFDS, type="theta", side="other"))
     )
 
     expect_equal(as.vector(assays(test_rangeFDS)[["psi3"]]),
-        test_rawCountsJ / (test_rawCountsJ + test_p3rawOCounts)
+        test_rawCountsJ_PE / (test_rawCountsJ_PE + test_p3rawOCounts)
     )
 
     expect_equal(as.vector(assays(test_rangeFDS)[["psi5"]]),
-        test_rawCountsJ / (test_rawCountsJ + test_p5rawOCounts)
+        test_rawCountsJ_PE / (test_rawCountsJ_PE + test_p5rawOCounts)
     )
 
     #expect_equal(as.vector(assays(test_rangeFDS)[["theta"]]),

diff --git a/tests/testthat/test_plotSampleResults.R b/tests/testthat/test_plotSampleResults.R
@@ -15,10 +15,10 @@ test_that("Results function", {
     res_signif <- results(fds, aggregate=FALSE, all=FALSE,
                             padjCutoff=NA, deltaPsiCutoff=0.01)
     res_signif <- as.data.table(res_signif)
-    expect_equal(res_signif[type == "jaccard", .N], 1)
-    expect_equal(res_signif[type == "psi5", .N], 8)
-    expect_equal(res_signif[type == "psi3", .N], 3)
-    expect_equal(res_signif[type == "theta", .N], 6)
+    expect_equal(res_signif[type == "jaccard", .N], 3)
+    expect_equal(res_signif[type == "psi5", .N], 7)
+    expect_equal(res_signif[type == "psi3", .N], 6)
+    expect_equal(res_signif[type == "theta", .N], 8)
 
     # gene-level results
     res_gene <- results(fds, aggregate=TRUE, all=TRUE)
@@ -31,9 +31,9 @@ test_that("Results function", {
                           padjCutoff=NA, deltaPsiCutoff=0.01)
     res_gene_signif <- as.data.table(res_gene_signif)
     expect_equal(res_gene_signif[type == "jaccard", .N], 1)
-    expect_equal(res_gene_signif[type == "psi5", .N], 4)
-    expect_equal(res_gene_signif[type == "psi3", .N], 2)
-    expect_equal(res_gene_signif[type == "theta", .N], 3)
+    expect_equal(res_gene_signif[type == "psi5", .N], 3)
+    expect_equal(res_gene_signif[type == "psi3", .N], 3)
+    expect_equal(res_gene_signif[type == "theta", .N], 2)
 
     # results on subset of genes during FDR
     geneList <- list('sample1'=c("TIMMDC1"), 'sample2'=c("MCOLN1"))

diff --git a/tests/testthat/test_stats.R b/tests/testthat/test_stats.R
@@ -37,7 +37,7 @@ test_that("Gene p value calculation with NAs", {
     mcols(fds, type="j")$hgnc_symbol <- rep(c("geneA", "geneB", "geneC"), 
                                             times=c(3, 4, 3))
     mcols(fds, type="ss")$hgnc_symbol <- rep(c("geneA", "geneB", "geneC"), 
-                                            times=c(4, 6, 4))
+                                            times=c(4, 6, 5))
 
     # simulate junction with bad rho fit to create partly NAs
     rho <- rho(fds, type="jaccard")
@@ -85,7 +85,7 @@ test_that("FDR on subset of genes", {
     fds <- getFraser()
     mcols(fds, type="j")$hgnc_symbol <- 
         rep(c("geneA", "geneB", "geneC", "geneD", "geneE"), 
-            times=c(3, 7, 5, 4, 8))
+            times=c(3, 7, 5, 4, 5))
 
     # define gene subset per sample
     genes_per_sample <- list(
@@ -98,13 +98,13 @@ test_that("FDR on subset of genes", {
     fds <- calculatePadjValuesOnSubset(fds, genesToTest=genes_per_sample, 
                                        subsetName=subsetName, type="jaccard")
     subset_padj <- padjVals(fds, type="jaccard", subsetName=subsetName)
-    expect_true(is(subset_padj, "matrix"))
-    expect_true(nrow(subset_padj) == 27)
-    expect_true(ncol(subset_padj) == 3)
+    expect_is(subset_padj, "matrix")
+    expect_equal(nrow(subset_padj), 24)
+    expect_equal(ncol(subset_padj), 3)
     subset_padj_gene <- padjVals(fds, type="jaccard", level="gene", 
                                     subsetName=subsetName)
-    expect_true(is(subset_padj_gene, "matrix"))
-    expect_true(nrow(subset_padj_gene) == 5)
-    expect_true(ncol(subset_padj_gene) == 3)
-      
+    expect_is(subset_padj_gene, "matrix")
+    expect_equal(nrow(subset_padj_gene), 5)
+    expect_equal(ncol(subset_padj_gene), 3)
+
 })
-Original file line number
+Diff line change
@@ Expand Up / @@ -40,3 +40,5 @@ doc @@
     Meta
     .Rproj.user
     FRASER_output
+    /doc/
+    /Meta/