Skip to contents

Create SummarizedExperiment object from FLAMES output folder

Usage

create_se_from_dir(outdir, annotation)

Arguments

outdir

The folder containing FLAMES output files

annotation

(Optional) the annotation file that was used to produce the output files

Value

a SummarizedExperiment object

Examples

# download the two fastq files, move them to a folder to be merged together
temp_path <- tempfile()
bfc <- BiocFileCache::BiocFileCache(temp_path, ask = FALSE)
file_url <-
  "https://raw.githubusercontent.com/OliverVoogd/FLAMESData/master/data"
# download the required fastq files, and move them to new folder
fastq1 <- bfc[[names(BiocFileCache::bfcadd(bfc, "Fastq1", paste(file_url, "fastq/sample1.fastq.gz", sep = "/")))]]
fastq2 <- bfc[[names(BiocFileCache::bfcadd(bfc, "Fastq2", paste(file_url, "fastq/sample2.fastq.gz", sep = "/")))]]
annotation <- bfc[[names(BiocFileCache::bfcadd(bfc, "annot.gtf", paste(file_url, "SIRV_isoforms_multi-fasta-annotation_C_170612a.gtf", sep = "/")))]]
genome_fa <- bfc[[names(BiocFileCache::bfcadd(bfc, "genome.fa", paste(file_url, "SIRV_isoforms_multi-fasta_170612a.fasta", sep = "/")))]]
fastq_dir <- paste(temp_path, "fastq_dir", sep = "/") # the downloaded fastq files need to be in a directory to be merged together
dir.create(fastq_dir)
file.copy(c(fastq1, fastq2), fastq_dir)
#> [1] TRUE TRUE
unlink(c(fastq1, fastq2)) # the original files can be deleted

outdir <- tempfile()
dir.create(outdir)
se <- bulk_long_pipeline(
  annotation = annotation, fastq = fastq_dir, outdir = outdir, genome_fa = genome_fa,
  config_file = create_config(outdir, type = "sc_3end", threads = 1, no_flank = TRUE)
)
#> Writing configuration parameters to:  /tmp/RtmpoS7Kzz/file1f6dfb26570/config_file_8045.json 
#> #### Input parameters:
#> {
#>   "pipeline_parameters": {
#>     "seed": [2022],
#>     "threads": [1],
#>     "do_barcode_demultiplex": [true],
#>     "do_gene_quantification": [true],
#>     "do_genome_alignment": [true],
#>     "do_isoform_identification": [true],
#>     "bambu_isoform_identification": [false],
#>     "multithread_isoform_identification": [false],
#>     "do_read_realignment": [true],
#>     "do_transcript_quantification": [true],
#>     "oarfish_quantification": [true]
#>   },
#>   "barcode_parameters": {
#>     "max_bc_editdistance": [2],
#>     "max_flank_editdistance": [8],
#>     "pattern": {
#>       "primer": ["CTACACGACGCTCTTCCGATCT"],
#>       "BC": ["NNNNNNNNNNNNNNNN"],
#>       "UMI": ["NNNNNNNNNNNN"],
#>       "polyT": ["TTTTTTTTT"]
#>     },
#>     "strand": ["-"],
#>     "TSO_seq": ["AAGCAGTGGTATCAACGCAGAGTACATGGG"],
#>     "TSO_prime": [3],
#>     "full_length_only": [false]
#>   },
#>   "isoform_parameters": {
#>     "generate_raw_isoform": [false],
#>     "max_dist": [10],
#>     "max_ts_dist": [100],
#>     "max_splice_match_dist": [10],
#>     "min_fl_exon_len": [40],
#>     "max_site_per_splice": [3],
#>     "min_sup_cnt": [5],
#>     "min_cnt_pct": [0.001],
#>     "min_sup_pct": [0.2],
#>     "bambu_trust_reference": [true],
#>     "strand_specific": [0],
#>     "remove_incomp_reads": [4],
#>     "downsample_ratio": [1]
#>   },
#>   "alignment_parameters": {
#>     "use_junctions": [true],
#>     "no_flank": [true]
#>   },
#>   "realign_parameters": {
#>     "use_annotation": [true]
#>   },
#>   "transcript_counting": {
#>     "min_tr_coverage": [0.4],
#>     "min_read_coverage": [0.4]
#>   }
#> } 
#> gene annotation: /tmp/RtmpoS7Kzz/file1f6d48f7b2bb/1f6d738b815a_SIRV_isoforms_multi-fasta-annotation_C_170612a.gtf 
#> genome fasta: /tmp/RtmpoS7Kzz/file1f6d48f7b2bb/1f6d75bd63c5_SIRV_isoforms_multi-fasta_170612a.fasta 
#> input fastq files: /tmp/RtmpoS7Kzz/file1f6d48f7b2bb/fastq_dir/1f6d2b67b85d_sample1.fastq.gz
#>  /tmp/RtmpoS7Kzz/file1f6d48f7b2bb/fastq_dir/1f6d3315a36b_sample2.fastq.gz
#> output directory: /tmp/RtmpoS7Kzz/file1f6dfb26570 
#> minimap2 path: 
#> k8 path: 
#> #### Aligning reads to genome using minimap2
#> 	Aligning sample  1f6d2b67b85d_sample1 ...
#> 02:13:05 AM Fri Oct 25 2024 minimap2_align
#> 	Aligning sample  1f6d3315a36b_sample2 ...
#> 02:13:09 AM Fri Oct 25 2024 minimap2_align
#> 02:13:12 AM Fri Oct 25 2024 find_isoform
#> #### Realign to transcript using minimap2
#> 	Realigning sample  1f6d2b67b85d_sample1 ...
#> 02:13:12 AM Fri Oct 25 2024 minimap2_realign
#> file renamed to  1f6d2b67b85d_sample1_realign2transcript.bam 
#> Warning: cannot remove file '/tmp/RtmpoS7Kzz/file1f6dfb26570/1f6d2b67b85d_sample1_tmp_align.bam', reason 'No such file or directory'
#> 	Realigning sample  1f6d3315a36b_sample2 ...
#> 02:13:13 AM Fri Oct 25 2024 minimap2_realign
#> file renamed to  1f6d3315a36b_sample2_realign2transcript.bam 
#> Warning: cannot remove file '/tmp/RtmpoS7Kzz/file1f6dfb26570/1f6d3315a36b_sample2_tmp_align.bam', reason 'No such file or directory'
#> #### Generating transcript count matrix