Skip to contents

Create SingleCellExperiment object from FLAMES output folder

Usage

create_sce_from_dir(outdir, annotation, quantification = "FLAMES")

Arguments

outdir

The folder containing FLAMES output files

annotation

the annotation file that was used to produce the output files

quantification

(Optional) the quantification method used to generate the output files (either "FLAMES" or "Oarfish".). If not specified, the function will attempt to determine the quantification method.

Value

a list of SingleCellExperiment objects if multiple transcript matrices were found in the output folder, or a SingleCellExperiment object if only one were found

Examples

outdir <- tempfile()
dir.create(outdir)
bc_allow <- file.path(outdir, "bc_allow.tsv")
genome_fa <- file.path(outdir, "rps24.fa")
R.utils::gunzip(
  filename = system.file("extdata", "bc_allow.tsv.gz", package = "FLAMES"),
  destname = bc_allow, remove = FALSE
)
R.utils::gunzip(
  filename = system.file("extdata", "rps24.fa.gz", package = "FLAMES"),
  destname = genome_fa, remove = FALSE
)
annotation <- system.file("extdata", "rps24.gtf.gz", package = "FLAMES")

sce <- FLAMES::sc_long_pipeline(
  genome_fa = genome_fa,
  fastq = system.file("extdata", "fastq", "musc_rps24.fastq.gz", package = "FLAMES"),
  annotation = annotation,
  outdir = outdir,
  barcodes_file = bc_allow,
  config_file = create_config(outdir, oarfish_quantification = FALSE)
)
#> Writing configuration parameters to:  /tmp/Rtmpx6clDQ/file29ea7ea3bd66/config_file_10730.json 
#> 04:59:35 AM Fri Feb 14 2025 Start running
#> 04:59:35 AM Fri Feb 14 2025 Demultiplexing using flexiplex...
#> Matching cell barcodes...
#> FLEXIPLEX 0.96.2
#> Setting max barcode edit distance to 2
#> Setting max flanking sequence edit distance to 8
#> Setting read IDs to be  replaced
#> Setting number of threads to 8
#> Search pattern: 
#> primer: CTACACGACGCTCTTCCGATCT
#> BC: NNNNNNNNNNNNNNNN
#> UMI: NNNNNNNNNNNN
#> polyT: TTTTTTTTT
#> Setting known barcodes from /tmp/Rtmpx6clDQ/file29ea7ea3bd66/bc_allow.tsv
#> Number of known barcodes: 143
#> Processing file: /__w/_temp/Library/FLAMES/extdata/fastq/musc_rps24.fastq.gz
#> Searching for barcodes...
#> Number of reads processed: 393
#> Number of reads where at least one barcode was found: 368
#> Number of reads with exactly one barcode match: 364
#> Number of chimera reads: 1
#> All done!
#> 04:59:36 AM Fri Feb 14 2025 Demultiplex done
#> Running FLAMES pipeline...
#> #### Input parameters:
#> {
#>   "pipeline_parameters": {
#>     "seed": [2022],
#>     "threads": [8],
#>     "do_barcode_demultiplex": [true],
#>     "do_gene_quantification": [true],
#>     "do_genome_alignment": [true],
#>     "do_isoform_identification": [true],
#>     "bambu_isoform_identification": [false],
#>     "multithread_isoform_identification": [false],
#>     "do_read_realignment": [true],
#>     "do_transcript_quantification": [true],
#>     "oarfish_quantification": [false]
#>   },
#>   "barcode_parameters": {
#>     "max_bc_editdistance": [2],
#>     "max_flank_editdistance": [8],
#>     "pattern": {
#>       "primer": ["CTACACGACGCTCTTCCGATCT"],
#>       "BC": ["NNNNNNNNNNNNNNNN"],
#>       "UMI": ["NNNNNNNNNNNN"],
#>       "polyT": ["TTTTTTTTT"]
#>     },
#>     "strand": ["-"],
#>     "TSO_seq": ["AAGCAGTGGTATCAACGCAGAGTACATGGG"],
#>     "TSO_prime": [5],
#>     "cutadapt_minimum_length": [10],
#>     "full_length_only": [false]
#>   },
#>   "isoform_parameters": {
#>     "generate_raw_isoform": [false],
#>     "max_dist": [10],
#>     "max_ts_dist": [100],
#>     "max_splice_match_dist": [10],
#>     "min_fl_exon_len": [40],
#>     "max_site_per_splice": [3],
#>     "min_sup_cnt": [5],
#>     "min_cnt_pct": [0.001],
#>     "min_sup_pct": [0.2],
#>     "bambu_trust_reference": [true],
#>     "strand_specific": [0],
#>     "remove_incomp_reads": [4],
#>     "downsample_ratio": [1]
#>   },
#>   "alignment_parameters": {
#>     "use_junctions": [true],
#>     "no_flank": [false]
#>   },
#>   "realign_parameters": {
#>     "use_annotation": [true]
#>   },
#>   "transcript_counting": {
#>     "min_tr_coverage": [0.4],
#>     "min_read_coverage": [0.4]
#>   }
#> } 
#> gene annotation: /__w/_temp/Library/FLAMES/extdata/rps24.gtf.gz 
#> genome fasta: /tmp/Rtmpx6clDQ/file29ea7ea3bd66/rps24.fa 
#> input fastq: /tmp/Rtmpx6clDQ/file29ea7ea3bd66/matched_reads.fastq 
#> output directory: /tmp/Rtmpx6clDQ/file29ea7ea3bd66 
#> minimap2 path: 
#> k8 path: 
#> #### Aligning reads to genome using minimap2
#> 04:59:36 AM Fri Feb 14 2025 minimap2_align
#> 04:59:36 AM Fri Feb 14 2025 Start gene quantification and UMI deduplication
#> 04:59:36 AM Fri Feb 14 2025 quantify genes 
#> Found genome alignment file(s): 	align2genome.bam
#> 04:59:37 AM Fri Feb 14 2025 Gene quantification and UMI deduplication done!
#> 04:59:37 AM Fri Feb 14 2025 Start isoform identificaiton
#> 04:59:37 AM Fri Feb 14 2025 find_isoform
#> 04:59:37 AM Fri Feb 14 2025 Isoform identificaiton done
#> #### Realigning deduplicated reads to transcript using minimap2
#> 04:59:37 AM Fri Feb 14 2025 minimap2_realign
#> Sorting by position
#> #### Generating transcript count matrix
#> 04:59:37 AM Fri Feb 14 2025 quantify transcripts 
#> Found realignment file(s): 	realign2transcript.bam
sce_2 <- create_sce_from_dir(outdir, annotation)