Skip to contents

Count the number of reads supporting each variants at the given positions for each cell.

Usage

sc_mutations(
  bam_path,
  seqnames,
  positions,
  indel = FALSE,
  barcodes,
  threads = 1
)

Arguments

bam_path

character(1) or character(n): path to the bam file(s) aligned to the reference genome (NOT the transcriptome! Unless the postions are also from the transcriptome).

seqnames

character(n): chromosome names of the postions to count alleles.

positions

integer(n): positions, 1-based, same length as seqnames. The positions to count alleles.

indel

logical(1): whether to count indels (TRUE) or SNPs (FALSE).

barcodes

character(n) when bam_path is a single file, or list of character(n) when bam_path is a list of files paths. The cell barcodes to count alleles for. Only reads with these barcodes will be counted.

threads

integer(1): number of threads to use. Maximum number of threads is the number of bam files * number of positions.

Value

A tibble with columns: allele, barcode, allele_count, cell_total_reads, pct, pos, seqname.

Examples

outdir <- tempfile()
dir.create(outdir)
genome_fa <- file.path(outdir, "rps24.fa")
R.utils::gunzip(
  filename = system.file("extdata", "rps24.fa.gz", package = "FLAMES"),
  destname = genome_fa, remove = FALSE
)
minimap2_align( # align to genome
  config = jsonlite::fromJSON(
    system.file("extdata", "config_sclr_nanopore_3end.json", package = "FLAMES")
  ),
  fa_file = genome_fa,
  fq_in = system.file("extdata", "fastq", "demultiplexed.fq.gz", package = "FLAMES"),
  annot = system.file("extdata", "rps24.gtf.gz", package = "FLAMES"),
  outdir = outdir
)
#> 02:14:16 AM Fri Oct 25 2024 minimap2_align
#>                                                   total mapped primary
#> /tmp/RtmpoS7Kzz/file1f6d4dfd0d1e/align2genome.bam    10     10      10
#>                                                   secondary
#> /tmp/RtmpoS7Kzz/file1f6d4dfd0d1e/align2genome.bam         0
snps_tb <- sc_mutations(
  bam_path = file.path(outdir, "align2genome.bam"),
  seqnames = c("chr14", "chr14"),
  positions = c(1260, 2714), # positions of interest
  indel = FALSE,
  barcodes = read.delim(
    system.file("extdata", "bc_allow.tsv.gz", package = "FLAMES"),
    header = FALSE)$V1
)
#> 02:14:16 Got 1 bam file, parallelizing over each position ...
#> 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |======================================================================| 100%
#> 
#> 02:14:18 Merging results ...
head(snps_tb)
#> # A tibble: 6 × 7
#>   allele barcode          allele_count cell_total_reads   pct   pos seqname
#>   <chr>  <chr>                   <dbl>            <dbl> <dbl> <dbl> <chr>  
#> 1 A      TTTATGCAGACTAGAT            0                1     0  1260 chr14  
#> 2 A      TTGTAGGTCTTACCTA            0                0   NaN  1260 chr14  
#> 3 A      TTGACTTGTGGCGAAT            0                0   NaN  1260 chr14  
#> 4 A      TTCGGTCGTGCTGTAT            0                0   NaN  1260 chr14  
#> 5 A      TTAACTCGTCACAAGG            0                0   NaN  1260 chr14  
#> 6 A      TGGCGCAGTACGCACC            0                0   NaN  1260 chr14  
snps_tb |>
  dplyr::filter(pos == 1260) |>
  dplyr::group_by(allele) |>
  dplyr::summarise(count = sum(allele_count)) # should be identical to samtools pileup
#> # A tibble: 5 × 2
#>   allele count
#>   <chr>  <dbl>
#> 1 A          3
#> 2 C          0
#> 3 G          6
#> 4 T          0
#> 5 del        1