Create SingleCellExperiment
object from FLAMES
output folder
Source: R/sc_long_pipeline.R
create_sce_from_dir.Rd
Create SingleCellExperiment
object from FLAMES
output folder
Value
a list of SingleCellExperiment
objects if multiple transcript matrices were
found in the output folder, or a SingleCellExperiment
object if only one were found
Examples
outdir <- tempfile()
dir.create(outdir)
bc_allow <- file.path(outdir, "bc_allow.tsv")
genome_fa <- file.path(outdir, "rps24.fa")
R.utils::gunzip(
filename = system.file("extdata", "bc_allow.tsv.gz", package = "FLAMES"),
destname = bc_allow, remove = FALSE
)
R.utils::gunzip(
filename = system.file("extdata", "rps24.fa.gz", package = "FLAMES"),
destname = genome_fa, remove = FALSE
)
annotation <- system.file("extdata", "rps24.gtf.gz", package = "FLAMES")
sce <- FLAMES::sc_long_pipeline(
genome_fa = genome_fa,
fastq = system.file("extdata", "fastq", "musc_rps24.fastq.gz", package = "FLAMES"),
annotation = annotation,
outdir = outdir,
barcodes_file = bc_allow,
config_file = create_config(outdir, oarfish_quantification = FALSE)
)
#> Writing configuration parameters to: /tmp/RtmpoS7Kzz/file1f6d8a60cb5/config_file_8045.json
#> 02:13:02 AM Fri Oct 25 2024 Start running
#> 02:13:02 AM Fri Oct 25 2024 Demultiplexing using flexiplex...
#> Matching cell barcodes...
#> FLEXIPLEX 0.96.2
#> Setting max barcode edit distance to 2
#> Setting max flanking sequence edit distance to 8
#> Setting read IDs to be replaced
#> Setting number of threads to 8
#> Search pattern:
#> primer: CTACACGACGCTCTTCCGATCT
#> BC: NNNNNNNNNNNNNNNN
#> UMI: NNNNNNNNNNNN
#> polyT: TTTTTTTTT
#> Setting known barcodes from /tmp/RtmpoS7Kzz/file1f6d8a60cb5/bc_allow.tsv
#> Number of known barcodes: 143
#> Processing file: /__w/_temp/Library/FLAMES/extdata/fastq/musc_rps24.fastq.gz
#> Searching for barcodes...
#> Number of reads processed: 393
#> Number of reads where at least one barcode was found: 368
#> Number of reads with exactly one barcode match: 364
#> Number of chimera reads: 1
#> All done!
#> 02:13:02 AM Fri Oct 25 2024 Demultiplex done
#> Running FLAMES pipeline...
#> #### Input parameters:
#> {
#> "pipeline_parameters": {
#> "seed": [2022],
#> "threads": [8],
#> "do_barcode_demultiplex": [true],
#> "do_gene_quantification": [true],
#> "do_genome_alignment": [true],
#> "do_isoform_identification": [true],
#> "bambu_isoform_identification": [false],
#> "multithread_isoform_identification": [false],
#> "do_read_realignment": [true],
#> "do_transcript_quantification": [true],
#> "oarfish_quantification": [false]
#> },
#> "barcode_parameters": {
#> "max_bc_editdistance": [2],
#> "max_flank_editdistance": [8],
#> "pattern": {
#> "primer": ["CTACACGACGCTCTTCCGATCT"],
#> "BC": ["NNNNNNNNNNNNNNNN"],
#> "UMI": ["NNNNNNNNNNNN"],
#> "polyT": ["TTTTTTTTT"]
#> },
#> "strand": ["-"],
#> "TSO_seq": ["AAGCAGTGGTATCAACGCAGAGTACATGGG"],
#> "TSO_prime": [3],
#> "full_length_only": [false]
#> },
#> "isoform_parameters": {
#> "generate_raw_isoform": [false],
#> "max_dist": [10],
#> "max_ts_dist": [100],
#> "max_splice_match_dist": [10],
#> "min_fl_exon_len": [40],
#> "max_site_per_splice": [3],
#> "min_sup_cnt": [5],
#> "min_cnt_pct": [0.001],
#> "min_sup_pct": [0.2],
#> "bambu_trust_reference": [true],
#> "strand_specific": [0],
#> "remove_incomp_reads": [4],
#> "downsample_ratio": [1]
#> },
#> "alignment_parameters": {
#> "use_junctions": [true],
#> "no_flank": [false]
#> },
#> "realign_parameters": {
#> "use_annotation": [true]
#> },
#> "transcript_counting": {
#> "min_tr_coverage": [0.4],
#> "min_read_coverage": [0.4]
#> }
#> }
#> gene annotation: /__w/_temp/Library/FLAMES/extdata/rps24.gtf.gz
#> genome fasta: /tmp/RtmpoS7Kzz/file1f6d8a60cb5/rps24.fa
#> input fastq: /tmp/RtmpoS7Kzz/file1f6d8a60cb5/matched_reads.fastq
#> output directory: /tmp/RtmpoS7Kzz/file1f6d8a60cb5
#> minimap2 path:
#> k8 path:
#> #### Aligning reads to genome using minimap2
#> 02:13:02 AM Fri Oct 25 2024 minimap2_align
#> 02:13:02 AM Fri Oct 25 2024 Start gene quantification and UMI deduplication
#> 02:13:02 AM Fri Oct 25 2024 quantify genes
#> Found genome alignment file(s): align2genome.bam
#> 02:13:03 AM Fri Oct 25 2024 Gene quantification and UMI deduplication done!
#> 02:13:03 AM Fri Oct 25 2024 Start isoform identificaiton
#> 02:13:03 AM Fri Oct 25 2024 find_isoform
#> 02:13:03 AM Fri Oct 25 2024 Isoform identificaiton done
#> #### Realigning deduplicated reads to transcript using minimap2
#> 02:13:03 AM Fri Oct 25 2024 minimap2_realign
#> Sorting by position
#> #### Generating transcript count matrix
#> 02:13:03 AM Fri Oct 25 2024 quantify transcripts
#> Found realignment file(s): realign2transcript.bam
sce_2 <- create_sce_from_dir(outdir, annotation)