Skip to contents

Count the number of reads supporting each variants at the given positions for each cell.

Usage

sc_mutations(bam_path, seqnames, positions, indel = FALSE, threads = 1)

Arguments

bam_path

character(1) or character(n): path to the bam file(s) aligned to the reference genome (NOT the transcriptome! Unless the postions are also from the transcriptome).

seqnames

character(n): chromosome names of the postions to count alleles.

positions

integer(n): positions, 1-based, same length as seqnames. The positions to count alleles.

indel

logical(1): whether to count indels (TRUE) or SNPs (FALSE).

threads

integer(1): number of threads to use. Maximum number of threads is the number of bam files * number of positions.

Value

A tibble with columns: allele, barcode, allele_count, cell_total_reads, pct, pos, seqname.

Examples

ppl <- example_pipeline("SingleCellPipeline")
#> Writing configuration parameters to:  /tmp/RtmpgpEV0i/file25bf11d12306/config_file_9663.json 
#> Configured steps: 
#> 	barcode_demultiplex: TRUE
#> 	genome_alignment: TRUE
#> 	gene_quantification: TRUE
#> 	isoform_identification: TRUE
#> 	read_realignment: TRUE
#> 	transcript_quantification: TRUE
#> samtools not found, will use Rsamtools package instead
ppl <- run_step(ppl, "barcode_demultiplex")
#> Running step: barcode_demultiplex
#> FLEXIPLEX 0.96.2
#> Setting max barcode edit distance to 2
#> Setting max flanking sequence edit distance to 8
#> Setting read IDs to be  replaced
#> Setting number of threads to 8
#> Search pattern: 
#> primer: CTACACGACGCTCTTCCGATCT
#> BC: NNNNNNNNNNNNNNNN
#> UMI: NNNNNNNNNNNN
#> polyT: TTTTTTTTT
#> Setting known barcodes from /tmp/RtmpgpEV0i/file25bf11d12306/bc_allow.tsv
#> Number of known barcodes: 143
#> Processing file: /__w/_temp/Library/FLAMES/extdata/fastq/musc_rps24.fastq.gz
#> Searching for barcodes...
#> Number of reads processed: 393
#> Number of reads where at least one barcode was found: 368
#> Number of reads with exactly one barcode match: 364
#> Number of chimera reads: 1
#> All done!
ppl <- run_step(ppl, "genome_alignment")
#> Running step: genome_alignment
#> Creating junction bed file from GFF3 annotation.
#> Aligning sample /tmp/RtmpgpEV0i/file25bf11d12306/matched_reads.fastq -> /tmp/RtmpgpEV0i/file25bf11d12306/align2genome.bam
#> Your fastq file appears to have tags, but you did not provide the -y option to minimap2 to include the tags in the output.
#> Warning: samtools not found, using Rsamtools instead, this could be slower and might fail for large BAM files.
#> Sorting BAM files by genome coordinates with 8 threads...
#> Indexing bam files
snps_tb <- sc_mutations(
  bam_path = ppl@genome_bam,
  seqnames = c("chr14", "chr14"),
  positions = c(1260, 2714), # positions of interest
  indel = FALSE
)
#> 03:23:43 Got 1 bam file, parallelizing over each position ...
#> 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |======================================================================| 100%
#> 
#> 03:23:45 Merging results ...
head(snps_tb)
#> # A tibble: 6 × 7
#>   allele barcode          allele_count cell_total_reads   pct   pos seqname
#>   <chr>  <chr>                   <dbl>            <dbl> <dbl> <dbl> <chr>  
#> 1 A      AACCATGAGTCGTTTG            0                2 0      1260 chr14  
#> 2 A      AACTCTTGTCACCTAA            0                1 0      1260 chr14  
#> 3 A      AACTTTCCACAGACTT            0                1 0      1260 chr14  
#> 4 A      AAGCCGCGTGTGAATA            0                4 0      1260 chr14  
#> 5 A      AAGGAGCGTGCTGTAT            1                3 0.333  1260 chr14  
#> 6 A      AATCGGTTCAGGTTCA            0                1 0      1260 chr14  
snps_tb |>
  dplyr::filter(pos == 1260) |>
  dplyr::group_by(allele) |>
  dplyr::summarise(count = sum(allele_count)) # should be identical to samtools pileup
#> # A tibble: 5 × 2
#>   allele count
#>   <chr>  <dbl>
#> 1 -         56
#> 2 A        103
#> 3 C          4
#> 4 G        169
#> 5 T          6