Count the number of reads supporting each variants at the given positions for each cell.
Arguments
- bam_path
character(1) or character(n): path to the bam file(s) aligned to the reference genome (NOT the transcriptome! Unless the postions are also from the transcriptome).
- seqnames
character(n): chromosome names of the postions to count alleles.
- positions
integer(n): positions, 1-based, same length as seqnames. The positions to count alleles.
- indel
logical(1): whether to count indels (TRUE) or SNPs (FALSE).
- threads
integer(1): number of threads to use. Maximum number of threads is the number of bam files * number of positions.
Examples
ppl <- example_pipeline("SingleCellPipeline")
#> Writing configuration parameters to: /tmp/RtmpgpEV0i/file25bf11d12306/config_file_9663.json
#> Configured steps:
#> barcode_demultiplex: TRUE
#> genome_alignment: TRUE
#> gene_quantification: TRUE
#> isoform_identification: TRUE
#> read_realignment: TRUE
#> transcript_quantification: TRUE
#> samtools not found, will use Rsamtools package instead
ppl <- run_step(ppl, "barcode_demultiplex")
#> Running step: barcode_demultiplex
#> FLEXIPLEX 0.96.2
#> Setting max barcode edit distance to 2
#> Setting max flanking sequence edit distance to 8
#> Setting read IDs to be replaced
#> Setting number of threads to 8
#> Search pattern:
#> primer: CTACACGACGCTCTTCCGATCT
#> BC: NNNNNNNNNNNNNNNN
#> UMI: NNNNNNNNNNNN
#> polyT: TTTTTTTTT
#> Setting known barcodes from /tmp/RtmpgpEV0i/file25bf11d12306/bc_allow.tsv
#> Number of known barcodes: 143
#> Processing file: /__w/_temp/Library/FLAMES/extdata/fastq/musc_rps24.fastq.gz
#> Searching for barcodes...
#> Number of reads processed: 393
#> Number of reads where at least one barcode was found: 368
#> Number of reads with exactly one barcode match: 364
#> Number of chimera reads: 1
#> All done!
ppl <- run_step(ppl, "genome_alignment")
#> Running step: genome_alignment
#> Creating junction bed file from GFF3 annotation.
#> Aligning sample /tmp/RtmpgpEV0i/file25bf11d12306/matched_reads.fastq -> /tmp/RtmpgpEV0i/file25bf11d12306/align2genome.bam
#> Your fastq file appears to have tags, but you did not provide the -y option to minimap2 to include the tags in the output.
#> Warning: samtools not found, using Rsamtools instead, this could be slower and might fail for large BAM files.
#> Sorting BAM files by genome coordinates with 8 threads...
#> Indexing bam files
snps_tb <- sc_mutations(
bam_path = ppl@genome_bam,
seqnames = c("chr14", "chr14"),
positions = c(1260, 2714), # positions of interest
indel = FALSE
)
#> 03:23:43 Got 1 bam file, parallelizing over each position ...
#>
|
| | 0%
|
|=================================== | 50%
|
|======================================================================| 100%
#>
#> 03:23:45 Merging results ...
head(snps_tb)
#> # A tibble: 6 × 7
#> allele barcode allele_count cell_total_reads pct pos seqname
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
#> 1 A AACCATGAGTCGTTTG 0 2 0 1260 chr14
#> 2 A AACTCTTGTCACCTAA 0 1 0 1260 chr14
#> 3 A AACTTTCCACAGACTT 0 1 0 1260 chr14
#> 4 A AAGCCGCGTGTGAATA 0 4 0 1260 chr14
#> 5 A AAGGAGCGTGCTGTAT 1 3 0.333 1260 chr14
#> 6 A AATCGGTTCAGGTTCA 0 1 0 1260 chr14
snps_tb |>
dplyr::filter(pos == 1260) |>
dplyr::group_by(allele) |>
dplyr::summarise(count = sum(allele_count)) # should be identical to samtools pileup
#> # A tibble: 5 × 2
#> allele count
#> <chr> <dbl>
#> 1 - 56
#> 2 A 103
#> 3 C 4
#> 4 G 169
#> 5 T 6