Skip to contents

Create SingleCellExperiment object from FLAMES output folder

Usage

create_sce_from_dir(outdir, annotation)

Arguments

outdir

The folder containing FLAMES output files

annotation

(Optional) the annotation file that was used to produce the output files

Value

a list of SingleCellExperiment objects if multiple transcript matrices were found in the output folder, or a SingleCellExperiment object if only one were found

Examples

outdir <- tempfile()
dir.create(outdir)
bc_allow <- file.path(outdir, "bc_allow.tsv")
genome_fa <- file.path(outdir, "rps24.fa")
R.utils::gunzip(
  filename = system.file("extdata", "bc_allow.tsv.gz", package = "FLAMES"),
  destname = bc_allow, remove = FALSE
)
R.utils::gunzip(
  filename = system.file("extdata", "rps24.fa.gz", package = "FLAMES"),
  destname = genome_fa, remove = FALSE
)
annotation <- system.file("extdata", "rps24.gtf.gz", package = "FLAMES")

sce <- FLAMES::sc_long_pipeline(
  genome_fa = genome_fa,
  fastq = system.file("extdata", "fastq", "musc_rps24.fastq.gz", package = "FLAMES"),
  annotation = annotation,
  outdir = outdir,
  barcodes_file = bc_allow,
  config_file = create_config(outdir, oarfish_quantification = FALSE)
)
#> Writing configuration parameters to:  /tmp/RtmpoS7Kzz/file1f6d8a60cb5/config_file_8045.json 
#> 02:13:02 AM Fri Oct 25 2024 Start running
#> 02:13:02 AM Fri Oct 25 2024 Demultiplexing using flexiplex...
#> Matching cell barcodes...
#> FLEXIPLEX 0.96.2
#> Setting max barcode edit distance to 2
#> Setting max flanking sequence edit distance to 8
#> Setting read IDs to be  replaced
#> Setting number of threads to 8
#> Search pattern: 
#> primer: CTACACGACGCTCTTCCGATCT
#> BC: NNNNNNNNNNNNNNNN
#> UMI: NNNNNNNNNNNN
#> polyT: TTTTTTTTT
#> Setting known barcodes from /tmp/RtmpoS7Kzz/file1f6d8a60cb5/bc_allow.tsv
#> Number of known barcodes: 143
#> Processing file: /__w/_temp/Library/FLAMES/extdata/fastq/musc_rps24.fastq.gz
#> Searching for barcodes...
#> Number of reads processed: 393
#> Number of reads where at least one barcode was found: 368
#> Number of reads with exactly one barcode match: 364
#> Number of chimera reads: 1
#> All done!
#> 02:13:02 AM Fri Oct 25 2024 Demultiplex done
#> Running FLAMES pipeline...
#> #### Input parameters:
#> {
#>   "pipeline_parameters": {
#>     "seed": [2022],
#>     "threads": [8],
#>     "do_barcode_demultiplex": [true],
#>     "do_gene_quantification": [true],
#>     "do_genome_alignment": [true],
#>     "do_isoform_identification": [true],
#>     "bambu_isoform_identification": [false],
#>     "multithread_isoform_identification": [false],
#>     "do_read_realignment": [true],
#>     "do_transcript_quantification": [true],
#>     "oarfish_quantification": [false]
#>   },
#>   "barcode_parameters": {
#>     "max_bc_editdistance": [2],
#>     "max_flank_editdistance": [8],
#>     "pattern": {
#>       "primer": ["CTACACGACGCTCTTCCGATCT"],
#>       "BC": ["NNNNNNNNNNNNNNNN"],
#>       "UMI": ["NNNNNNNNNNNN"],
#>       "polyT": ["TTTTTTTTT"]
#>     },
#>     "strand": ["-"],
#>     "TSO_seq": ["AAGCAGTGGTATCAACGCAGAGTACATGGG"],
#>     "TSO_prime": [3],
#>     "full_length_only": [false]
#>   },
#>   "isoform_parameters": {
#>     "generate_raw_isoform": [false],
#>     "max_dist": [10],
#>     "max_ts_dist": [100],
#>     "max_splice_match_dist": [10],
#>     "min_fl_exon_len": [40],
#>     "max_site_per_splice": [3],
#>     "min_sup_cnt": [5],
#>     "min_cnt_pct": [0.001],
#>     "min_sup_pct": [0.2],
#>     "bambu_trust_reference": [true],
#>     "strand_specific": [0],
#>     "remove_incomp_reads": [4],
#>     "downsample_ratio": [1]
#>   },
#>   "alignment_parameters": {
#>     "use_junctions": [true],
#>     "no_flank": [false]
#>   },
#>   "realign_parameters": {
#>     "use_annotation": [true]
#>   },
#>   "transcript_counting": {
#>     "min_tr_coverage": [0.4],
#>     "min_read_coverage": [0.4]
#>   }
#> } 
#> gene annotation: /__w/_temp/Library/FLAMES/extdata/rps24.gtf.gz 
#> genome fasta: /tmp/RtmpoS7Kzz/file1f6d8a60cb5/rps24.fa 
#> input fastq: /tmp/RtmpoS7Kzz/file1f6d8a60cb5/matched_reads.fastq 
#> output directory: /tmp/RtmpoS7Kzz/file1f6d8a60cb5 
#> minimap2 path: 
#> k8 path: 
#> #### Aligning reads to genome using minimap2
#> 02:13:02 AM Fri Oct 25 2024 minimap2_align
#> 02:13:02 AM Fri Oct 25 2024 Start gene quantification and UMI deduplication
#> 02:13:02 AM Fri Oct 25 2024 quantify genes 
#> Found genome alignment file(s): 	align2genome.bam
#> 02:13:03 AM Fri Oct 25 2024 Gene quantification and UMI deduplication done!
#> 02:13:03 AM Fri Oct 25 2024 Start isoform identificaiton
#> 02:13:03 AM Fri Oct 25 2024 find_isoform
#> 02:13:03 AM Fri Oct 25 2024 Isoform identificaiton done
#> #### Realigning deduplicated reads to transcript using minimap2
#> 02:13:03 AM Fri Oct 25 2024 minimap2_realign
#> Sorting by position
#> #### Generating transcript count matrix
#> 02:13:03 AM Fri Oct 25 2024 quantify transcripts 
#> Found realignment file(s): 	realign2transcript.bam
sce_2 <- create_sce_from_dir(outdir, annotation)