Count the number of reads supporting each variants at the given positions for each cell.
Arguments
- bam_path
character(1) or character(n): path to the bam file(s) aligned to the reference genome (NOT the transcriptome! Unless the postions are also from the transcriptome).
- seqnames
character(n): chromosome names of the postions to count alleles.
- positions
integer(n): positions, 1-based, same length as seqnames. The positions to count alleles.
- indel
logical(1): whether to count indels (TRUE) or SNPs (FALSE).
- threads
integer(1): number of threads to use. Maximum number of threads is the number of bam files * number of positions.
Examples
outdir <- tempfile()
dir.create(outdir)
genome_fa <- file.path(outdir, "rps24.fa")
R.utils::gunzip(
filename = system.file("extdata", "rps24.fa.gz", package = "FLAMES"),
destname = genome_fa, remove = FALSE
)
minimap2_align( # align to genome
config = jsonlite::fromJSON(
system.file("extdata", "config_sclr_nanopore_3end.json", package = "FLAMES")
),
fa_file = genome_fa,
fq_in = system.file("extdata", "fastq", "demultiplexed.fq.gz", package = "FLAMES"),
annot = system.file("extdata", "rps24.gtf.gz", package = "FLAMES"),
outdir = outdir
)
#> 06:55:23 AM Mon Jan 06 2025 minimap2_align
#> total mapped primary
#> /tmp/RtmpqmWKMU/filecd673ad61810/align2genome.bam 10 10 10
#> secondary
#> /tmp/RtmpqmWKMU/filecd673ad61810/align2genome.bam 0
snps_tb <- sc_mutations(
bam_path = file.path(outdir, "align2genome.bam"),
seqnames = c("chr14", "chr14"),
positions = c(1260, 2714), # positions of interest
indel = FALSE
)
#> 06:55:23 Got 1 bam file, parallelizing over each position ...
#>
|
| | 0%
|
|=================================== | 50%
|
|======================================================================| 100%
#>
#> 06:55:24 Merging results ...
head(snps_tb)
#> # A tibble: 6 × 7
#> allele barcode allele_count cell_total_reads pct pos seqname
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
#> 1 A ACCGTAACAAGCGTAG 0 1 0 1260 chr14
#> 2 A CATCGAACAGATGGGT 1 1 1 1260 chr14
#> 3 A CGAACATGTCTGCCAG 1 1 1 1260 chr14
#> 4 A CGGACTGAGTTGAGTA 0 1 0 1260 chr14
#> 5 A CGGGTCAGTAGTAGTA 1 1 1 1260 chr14
#> 6 A CTGTGCTTCCGATATG 0 1 0 1260 chr14
snps_tb |>
dplyr::filter(pos == 1260) |>
dplyr::group_by(allele) |>
dplyr::summarise(count = sum(allele_count)) # should be identical to samtools pileup
#> # A tibble: 5 × 2
#> allele count
#> <chr> <dbl>
#> 1 - 1
#> 2 A 3
#> 3 C 0
#> 4 G 6
#> 5 T 0