Skip to contents

Count the number of reads supporting each variants at the given positions for each cell.

Usage

sc_mutations(bam_path, seqnames, positions, indel = FALSE, threads = 1)

Arguments

bam_path

character(1) or character(n): path to the bam file(s) aligned to the reference genome (NOT the transcriptome! Unless the postions are also from the transcriptome).

seqnames

character(n): chromosome names of the postions to count alleles.

positions

integer(n): positions, 1-based, same length as seqnames. The positions to count alleles.

indel

logical(1): whether to count indels (TRUE) or SNPs (FALSE).

threads

integer(1): number of threads to use. Maximum number of threads is the number of bam files * number of positions.

Value

A tibble with columns: allele, barcode, allele_count, cell_total_reads, pct, pos, seqname.

Examples

outdir <- tempfile()
dir.create(outdir)
genome_fa <- file.path(outdir, "rps24.fa")
R.utils::gunzip(
  filename = system.file("extdata", "rps24.fa.gz", package = "FLAMES"),
  destname = genome_fa, remove = FALSE
)
minimap2_align( # align to genome
  config = jsonlite::fromJSON(
    system.file("extdata", "config_sclr_nanopore_3end.json", package = "FLAMES")
  ),
  fa_file = genome_fa,
  fq_in = system.file("extdata", "fastq", "demultiplexed.fq.gz", package = "FLAMES"),
  annot = system.file("extdata", "rps24.gtf.gz", package = "FLAMES"),
  outdir = outdir
)
#> 06:55:23 AM Mon Jan 06 2025 minimap2_align
#>                                                   total mapped primary
#> /tmp/RtmpqmWKMU/filecd673ad61810/align2genome.bam    10     10      10
#>                                                   secondary
#> /tmp/RtmpqmWKMU/filecd673ad61810/align2genome.bam         0
snps_tb <- sc_mutations(
  bam_path = file.path(outdir, "align2genome.bam"),
  seqnames = c("chr14", "chr14"),
  positions = c(1260, 2714), # positions of interest
  indel = FALSE
)
#> 06:55:23 Got 1 bam file, parallelizing over each position ...
#> 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |======================================================================| 100%
#> 
#> 06:55:24 Merging results ...
head(snps_tb)
#> # A tibble: 6 × 7
#>   allele barcode          allele_count cell_total_reads   pct   pos seqname
#>   <chr>  <chr>                   <dbl>            <dbl> <dbl> <dbl> <chr>  
#> 1 A      ACCGTAACAAGCGTAG            0                1     0  1260 chr14  
#> 2 A      CATCGAACAGATGGGT            1                1     1  1260 chr14  
#> 3 A      CGAACATGTCTGCCAG            1                1     1  1260 chr14  
#> 4 A      CGGACTGAGTTGAGTA            0                1     0  1260 chr14  
#> 5 A      CGGGTCAGTAGTAGTA            1                1     1  1260 chr14  
#> 6 A      CTGTGCTTCCGATATG            0                1     0  1260 chr14  
snps_tb |>
  dplyr::filter(pos == 1260) |>
  dplyr::group_by(allele) |>
  dplyr::summarise(count = sum(allele_count)) # should be identical to samtools pileup
#> # A tibble: 5 × 2
#>   allele count
#>   <chr>  <dbl>
#> 1 -          1
#> 2 A          3
#> 3 C          0
#> 4 G          6
#> 5 T          0