Create SummarizedExperiment
object from FLAMES
output folder
Source: R/sc_long_pipeline.R
create_se_from_dir.Rd
Create SummarizedExperiment
object from FLAMES
output folder
Examples
# download the two fastq files, move them to a folder to be merged together
temp_path <- tempfile()
bfc <- BiocFileCache::BiocFileCache(temp_path, ask = FALSE)
file_url <-
"https://raw.githubusercontent.com/OliverVoogd/FLAMESData/master/data"
# download the required fastq files, and move them to new folder
fastq1 <- bfc[[names(BiocFileCache::bfcadd(bfc, "Fastq1", paste(file_url, "fastq/sample1.fastq.gz", sep = "/")))]]
fastq2 <- bfc[[names(BiocFileCache::bfcadd(bfc, "Fastq2", paste(file_url, "fastq/sample2.fastq.gz", sep = "/")))]]
annotation <- bfc[[names(BiocFileCache::bfcadd(bfc, "annot.gtf", paste(file_url, "SIRV_isoforms_multi-fasta-annotation_C_170612a.gtf", sep = "/")))]]
genome_fa <- bfc[[names(BiocFileCache::bfcadd(bfc, "genome.fa", paste(file_url, "SIRV_isoforms_multi-fasta_170612a.fasta", sep = "/")))]]
fastq_dir <- paste(temp_path, "fastq_dir", sep = "/") # the downloaded fastq files need to be in a directory to be merged together
dir.create(fastq_dir)
file.copy(c(fastq1, fastq2), fastq_dir)
#> [1] TRUE TRUE
unlink(c(fastq1, fastq2)) # the original files can be deleted
outdir <- tempfile()
dir.create(outdir)
se <- bulk_long_pipeline(
annotation = annotation, fastq = fastq_dir, outdir = outdir, genome_fa = genome_fa,
config_file = create_config(outdir, type = "sc_3end", threads = 1, no_flank = TRUE)
)
#> Writing configuration parameters to: /tmp/RtmpqmWKMU/filecd6716257d0/config_file_52583.json
#> #### Input parameters:
#> {
#> "pipeline_parameters": {
#> "seed": [2022],
#> "threads": [1],
#> "do_barcode_demultiplex": [true],
#> "do_gene_quantification": [true],
#> "do_genome_alignment": [true],
#> "do_isoform_identification": [true],
#> "bambu_isoform_identification": [false],
#> "multithread_isoform_identification": [false],
#> "do_read_realignment": [true],
#> "do_transcript_quantification": [true],
#> "oarfish_quantification": [true]
#> },
#> "barcode_parameters": {
#> "max_bc_editdistance": [2],
#> "max_flank_editdistance": [8],
#> "pattern": {
#> "primer": ["CTACACGACGCTCTTCCGATCT"],
#> "BC": ["NNNNNNNNNNNNNNNN"],
#> "UMI": ["NNNNNNNNNNNN"],
#> "polyT": ["TTTTTTTTT"]
#> },
#> "strand": ["-"],
#> "TSO_seq": ["AAGCAGTGGTATCAACGCAGAGTACATGGG"],
#> "TSO_prime": [5],
#> "cutadapt_minimum_length": [10],
#> "full_length_only": [false]
#> },
#> "isoform_parameters": {
#> "generate_raw_isoform": [false],
#> "max_dist": [10],
#> "max_ts_dist": [100],
#> "max_splice_match_dist": [10],
#> "min_fl_exon_len": [40],
#> "max_site_per_splice": [3],
#> "min_sup_cnt": [5],
#> "min_cnt_pct": [0.001],
#> "min_sup_pct": [0.2],
#> "bambu_trust_reference": [true],
#> "strand_specific": [0],
#> "remove_incomp_reads": [4],
#> "downsample_ratio": [1]
#> },
#> "alignment_parameters": {
#> "use_junctions": [true],
#> "no_flank": [true]
#> },
#> "realign_parameters": {
#> "use_annotation": [true]
#> },
#> "transcript_counting": {
#> "min_tr_coverage": [0.4],
#> "min_read_coverage": [0.4]
#> }
#> }
#> gene annotation: /tmp/RtmpqmWKMU/filecd6712fc9fd/cd675ba4c758_SIRV_isoforms_multi-fasta-annotation_C_170612a.gtf
#> genome fasta: /tmp/RtmpqmWKMU/filecd6712fc9fd/cd672a350eaf_SIRV_isoforms_multi-fasta_170612a.fasta
#> input fastq files: /tmp/RtmpqmWKMU/filecd6712fc9fd/fastq_dir/cd671bf3e267_sample1.fastq.gz
#> /tmp/RtmpqmWKMU/filecd6712fc9fd/fastq_dir/cd673195112f_sample2.fastq.gz
#> output directory: /tmp/RtmpqmWKMU/filecd6716257d0
#> minimap2 path:
#> k8 path:
#> #### Aligning reads to genome using minimap2
#> Aligning sample cd671bf3e267_sample1 ...
#> 06:54:05 AM Mon Jan 06 2025 minimap2_align
#> Aligning sample cd673195112f_sample2 ...
#> 06:54:08 AM Mon Jan 06 2025 minimap2_align
#> 06:54:11 AM Mon Jan 06 2025 find_isoform
#> #### Realign to transcript using minimap2
#> Realigning sample cd671bf3e267_sample1 ...
#> 06:54:11 AM Mon Jan 06 2025 minimap2_realign
#> file renamed to cd671bf3e267_sample1_realign2transcript.bam
#> Warning: cannot remove file '/tmp/RtmpqmWKMU/filecd6716257d0/cd671bf3e267_sample1_tmp_align.bam', reason 'No such file or directory'
#> Realigning sample cd673195112f_sample2 ...
#> 06:54:12 AM Mon Jan 06 2025 minimap2_realign
#> file renamed to cd673195112f_sample2_realign2transcript.bam
#> Warning: cannot remove file '/tmp/RtmpqmWKMU/filecd6716257d0/cd673195112f_sample2_tmp_align.bam', reason 'No such file or directory'
#> #### Generating transcript count matrix