Get the read coverages for each transcript in the BAM file (or a GAlignments object).
The read coverages are sampled at 100 positions along the transcript, and the coverage is scaled
by dividing the coverage at each position by the total read counts for the transcript. If a BAM
file is provided, alignment with MAPQ < 5, secondary alignments and supplementary alignments
are filtered out. A GAlignments
object can also be provided in case alternative filtering
is desired.
Value
a tibble of the transcript information and coverages, with the following columns:
transcript
: the transcript name / IDread_counts
: the total number of aligments for the transcriptcoverage_1-100
: the coverage at each of the 100 positions along the transcripttr_length
: the length of the transcript
Examples
# Create a BAM file with minimap2_realign
temp_path <- tempfile()
bfc <- BiocFileCache::BiocFileCache(temp_path, ask = FALSE)
file_url <- 'https://raw.githubusercontent.com/OliverVoogd/FLAMESData/master/data'
fastq1 <- bfc[[names(BiocFileCache::bfcadd(bfc, 'Fastq1', paste(file_url, 'fastq/sample1.fastq.gz', sep = '/')))]]
genome_fa <- bfc[[names(BiocFileCache::bfcadd(bfc, 'genome.fa', paste(file_url, 'SIRV_isoforms_multi-fasta_170612a.fasta', sep = '/')))]]
annotation <- bfc[[names(BiocFileCache::bfcadd(bfc, 'annot.gtf', paste(file_url, 'SIRV_isoforms_multi-fasta-annotation_C_170612a.gtf', sep = '/')))]]
outdir <- tempfile()
dir.create(outdir)
fasta <- annotation_to_fasta(annotation, genome_fa, outdir)
#> Import genomic features from the file as a GRanges object ...
#> OK
#> Prepare the 'metadata' data frame ...
#> OK
#> Make the TxDb object ...
#> Warning: The "phase" metadata column contains non-NA values for features of type
#> exon. This information was ignored.
#> OK
minimap2_realign(
config = jsonlite::fromJSON(
system.file("extdata", "config_sclr_nanopore_3end.json", package = "FLAMES")),
fq_in = fastq1,
outdir = outdir
)
#> 02:13:31 AM Fri Oct 25 2024 minimap2_realign
#> Sorting by position
#> total mapped primary
#> /tmp/RtmpoS7Kzz/file1f6d433edb47/realign2transcript.bam 4554 3764 2500
#> secondary
#> /tmp/RtmpoS7Kzz/file1f6d433edb47/realign2transcript.bam 2051
x <- get_coverage(file.path(outdir, 'realign2transcript.bam'))
head(x)
#> # A tibble: 6 × 103
#> transcript coverage_1 coverage_2 coverage_3 coverage_4 coverage_5 coverage_6
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 SIRV101 0 0.3 0.6 0.6 0.7 0.8
#> 2 SIRV106 0 0 0.174 0.348 0.435 0.478
#> 3 SIRV103 0 0.143 0.571 0.786 0.786 0.786
#> 4 SIRV105 0 0 0.625 0.844 0.906 1
#> 5 SIRV107 0 0 0.340 0.415 0.436 0.574
#> 6 SIRV201 0 0.182 0.455 0.455 0.455 0.455
#> # ℹ 96 more variables: coverage_7 <dbl>, coverage_8 <dbl>, coverage_9 <dbl>,
#> # coverage_10 <dbl>, coverage_11 <dbl>, coverage_12 <dbl>, coverage_13 <dbl>,
#> # coverage_14 <dbl>, coverage_15 <dbl>, coverage_16 <dbl>, coverage_17 <dbl>,
#> # coverage_18 <dbl>, coverage_19 <dbl>, coverage_20 <dbl>, coverage_21 <dbl>,
#> # coverage_22 <dbl>, coverage_23 <dbl>, coverage_24 <dbl>, coverage_25 <dbl>,
#> # coverage_26 <dbl>, coverage_27 <dbl>, coverage_28 <dbl>, coverage_29 <dbl>,
#> # coverage_30 <dbl>, coverage_31 <dbl>, coverage_32 <dbl>, …