Skip to contents

Plot the average read coverages for each length bin or a perticular isoform

Usage

plot_coverage(
  x,
  quantiles = c(0, 0.2375, 0.475, 0.7125, 0.95, 1),
  length_bins = c(0, 1, 2, 5, 10, Inf),
  weight_fn = weight_transcripts,
  filter_fn,
  detailed = FALSE
)

Arguments

x,

path to the BAM file (aligning reads to the transcriptome), or the (GenomicAlignments::readGAlignments) parsed GAlignments object, or the tibble returned by get_coverage, or the filtered tibble returned by filter_coverage.

quantiles

numeric vector to specify the quantiles to bin the transcripts lengths by if length_bins is missing. The length bins will be determined such that the read counts are distributed acording to the quantiles.

length_bins,

numeric vector to specify the sizes to bin the transcripts by

weight_fn

function to calculate the weights for the transcripts. The function should take a numeric vector of read counts and return a numeric vector of weights. The default function is weight_transcripts, you can change its default parameters by passing an anonymous function like function(x) weight_transcripts(x, type = 'equal').

filter_fn

Optional filter function to filter the transcripts before plotting. See the filter_fn parameter in filter_coverage for more details. Providing a filter fucntion here is the same as providing it in filter_coverage and then passing the result to this function.

detailed

logical, if TRUE, also plot the top 10 transcripts with the highest read counts for each length bin.

Value

a ggplot2 object of the coverage plot(s)

Examples

# Create a BAM file with minimap2_realign
temp_path <- tempfile()
bfc <- BiocFileCache::BiocFileCache(temp_path, ask = FALSE)
file_url <- 'https://raw.githubusercontent.com/OliverVoogd/FLAMESData/master/data'
fastq1 <- bfc[[names(BiocFileCache::bfcadd(bfc, 'Fastq1', paste(file_url, 'fastq/sample1.fastq.gz', sep = '/')))]]
genome_fa <- bfc[[names(BiocFileCache::bfcadd(bfc, 'genome.fa', paste(file_url, 'SIRV_isoforms_multi-fasta_170612a.fasta', sep = '/')))]]
annotation <- bfc[[names(BiocFileCache::bfcadd(bfc, 'annot.gtf', paste(file_url, 'SIRV_isoforms_multi-fasta-annotation_C_170612a.gtf', sep = '/')))]]
outdir <- tempfile()
dir.create(outdir)
fasta <- annotation_to_fasta(annotation, genome_fa, outdir)
#> Import genomic features from the file as a GRanges object ... 
#> OK
#> Prepare the 'metadata' data frame ... 
#> OK
#> Make the TxDb object ... 
#> Warning: The "phase" metadata column contains non-NA values for features of type
#>   exon. This information was ignored.
#> OK
minimap2_realign(
  config = jsonlite::fromJSON(
    system.file("extdata", "config_sclr_nanopore_3end.json", package = "FLAMES")),
  fq_in = fastq1,
  outdir = outdir
)
#> 02:13:39 AM Fri Oct 25 2024 minimap2_realign
#> Sorting by position
#>                                                         total mapped primary
#> /tmp/RtmpoS7Kzz/file1f6d3272b12a/realign2transcript.bam  4554   3764    2500
#>                                                         secondary
#> /tmp/RtmpoS7Kzz/file1f6d3272b12a/realign2transcript.bam      2051
# Plot the coverages directly from the BAM file
plot_coverage(file.path(outdir, 'realign2transcript.bam'))
#> Using quantiles to bin transcripts.
#> The number of transcripts is less than the inflection index, returning equal weights for the current bin.
#> The number of transcripts is less than the inflection index, returning equal weights for the current bin.
#> The number of transcripts is less than the inflection index, returning equal weights for the current bin.
#> The number of transcripts is less than the inflection index, returning equal weights for the current bin.
#> The number of transcripts is less than the inflection index, returning equal weights for the current bin.


# Get the coverage information first
coverage <- get_coverage(file.path(outdir, 'realign2transcript.bam')) |>
  dplyr::filter(read_counts > 2) |> # Filter out transcripts with read counts < 3
  filter_coverage(filter_fn = convolution_filter) # Filter out transcripts with sharp drops / rises
#> 26 transcripts found in the BAM file.
#> 3(11.54%) transcripts failed the filter.
#> Failed transcripts account for 140 reads, out of 1095(12.79%) reads in total.
# Plot the filtered coverages
plot_coverage(coverage, detailed = TRUE)
#> Using quantiles to bin transcripts.
#> The number of transcripts is less than the inflection index, returning equal weights for the current bin.
#> The number of transcripts is less than the inflection index, returning equal weights for the current bin.
#> The number of transcripts is less than the inflection index, returning equal weights for the current bin.
#> The number of transcripts is less than the inflection index, returning equal weights for the current bin.
#> The number of transcripts is less than the inflection index, returning equal weights for the current bin.

# filtering function can also be passed directly to plot_coverage
plot_coverage(file.path(outdir, 'realign2transcript.bam'), filter_fn = convolution_filter)
#> 26 transcripts found in the BAM file.
#> 3(11.54%) transcripts failed the filter.
#> Failed transcripts account for 140 reads, out of 1095(12.79%) reads in total.
#> Using quantiles to bin transcripts.
#> The number of transcripts is less than the inflection index, returning equal weights for the current bin.
#> The number of transcripts is less than the inflection index, returning equal weights for the current bin.
#> The number of transcripts is less than the inflection index, returning equal weights for the current bin.
#> The number of transcripts is less than the inflection index, returning equal weights for the current bin.
#> The number of transcripts is less than the inflection index, returning equal weights for the current bin.