Count the number of reads supporting each variants at the given positions for each cell.
Arguments
- bam_path
character(1) or character(n): path to the bam file(s) aligned to the reference genome (NOT the transcriptome! Unless the postions are also from the transcriptome).
- seqnames
character(n): chromosome names of the postions to count alleles.
- positions
integer(n): positions, 1-based, same length as seqnames. The positions to count alleles.
- indel
logical(1): whether to count indels (TRUE) or SNPs (FALSE).
- barcodes
character(n) when bam_path is a single file, or list of character(n) when bam_path is a list of files paths. The cell barcodes to count alleles for. Only reads with these barcodes will be counted.
- threads
integer(1): number of threads to use. Maximum number of threads is the number of bam files * number of positions.
Examples
outdir <- tempfile()
dir.create(outdir)
genome_fa <- file.path(outdir, "rps24.fa")
R.utils::gunzip(
filename = system.file("extdata", "rps24.fa.gz", package = "FLAMES"),
destname = genome_fa, remove = FALSE
)
minimap2_align( # align to genome
config = jsonlite::fromJSON(
system.file("extdata", "config_sclr_nanopore_3end.json", package = "FLAMES")
),
fa_file = genome_fa,
fq_in = system.file("extdata", "fastq", "demultiplexed.fq.gz", package = "FLAMES"),
annot = system.file("extdata", "rps24.gtf.gz", package = "FLAMES"),
outdir = outdir
)
#> 02:14:16 AM Fri Oct 25 2024 minimap2_align
#> total mapped primary
#> /tmp/RtmpoS7Kzz/file1f6d4dfd0d1e/align2genome.bam 10 10 10
#> secondary
#> /tmp/RtmpoS7Kzz/file1f6d4dfd0d1e/align2genome.bam 0
snps_tb <- sc_mutations(
bam_path = file.path(outdir, "align2genome.bam"),
seqnames = c("chr14", "chr14"),
positions = c(1260, 2714), # positions of interest
indel = FALSE,
barcodes = read.delim(
system.file("extdata", "bc_allow.tsv.gz", package = "FLAMES"),
header = FALSE)$V1
)
#> 02:14:16 Got 1 bam file, parallelizing over each position ...
#>
|
| | 0%
|
|=================================== | 50%
|
|======================================================================| 100%
#>
#> 02:14:18 Merging results ...
head(snps_tb)
#> # A tibble: 6 × 7
#> allele barcode allele_count cell_total_reads pct pos seqname
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
#> 1 A TTTATGCAGACTAGAT 0 1 0 1260 chr14
#> 2 A TTGTAGGTCTTACCTA 0 0 NaN 1260 chr14
#> 3 A TTGACTTGTGGCGAAT 0 0 NaN 1260 chr14
#> 4 A TTCGGTCGTGCTGTAT 0 0 NaN 1260 chr14
#> 5 A TTAACTCGTCACAAGG 0 0 NaN 1260 chr14
#> 6 A TGGCGCAGTACGCACC 0 0 NaN 1260 chr14
snps_tb |>
dplyr::filter(pos == 1260) |>
dplyr::group_by(allele) |>
dplyr::summarise(count = sum(allele_count)) # should be identical to samtools pileup
#> # A tibble: 5 × 2
#> allele count
#> <chr> <dbl>
#> 1 A 3
#> 2 C 0
#> 3 G 6
#> 4 T 0
#> 5 del 1