Create SummarizedExperiment
object from FLAMES
output folder
Source: R/sc_long_pipeline.R
create_se_from_dir.Rd
Create SummarizedExperiment
object from FLAMES
output folder
Examples
# download the two fastq files, move them to a folder to be merged together
temp_path <- tempfile()
bfc <- BiocFileCache::BiocFileCache(temp_path, ask = FALSE)
file_url <-
"https://raw.githubusercontent.com/OliverVoogd/FLAMESData/master/data"
# download the required fastq files, and move them to new folder
fastq1 <- bfc[[names(BiocFileCache::bfcadd(bfc, "Fastq1", paste(file_url, "fastq/sample1.fastq.gz", sep = "/")))]]
fastq2 <- bfc[[names(BiocFileCache::bfcadd(bfc, "Fastq2", paste(file_url, "fastq/sample2.fastq.gz", sep = "/")))]]
annotation <- bfc[[names(BiocFileCache::bfcadd(bfc, "annot.gtf", paste(file_url, "SIRV_isoforms_multi-fasta-annotation_C_170612a.gtf", sep = "/")))]]
genome_fa <- bfc[[names(BiocFileCache::bfcadd(bfc, "genome.fa", paste(file_url, "SIRV_isoforms_multi-fasta_170612a.fasta", sep = "/")))]]
fastq_dir <- paste(temp_path, "fastq_dir", sep = "/") # the downloaded fastq files need to be in a directory to be merged together
dir.create(fastq_dir)
file.copy(c(fastq1, fastq2), fastq_dir)
#> [1] TRUE TRUE
unlink(c(fastq1, fastq2)) # the original files can be deleted
outdir <- tempfile()
dir.create(outdir)
se <- bulk_long_pipeline(
annotation = annotation, fastq = fastq_dir, outdir = outdir, genome_fa = genome_fa,
config_file = create_config(outdir, type = "sc_3end", threads = 1, no_flank = TRUE)
)
#> Writing configuration parameters to: /tmp/RtmpoS7Kzz/file1f6dfb26570/config_file_8045.json
#> #### Input parameters:
#> {
#> "pipeline_parameters": {
#> "seed": [2022],
#> "threads": [1],
#> "do_barcode_demultiplex": [true],
#> "do_gene_quantification": [true],
#> "do_genome_alignment": [true],
#> "do_isoform_identification": [true],
#> "bambu_isoform_identification": [false],
#> "multithread_isoform_identification": [false],
#> "do_read_realignment": [true],
#> "do_transcript_quantification": [true],
#> "oarfish_quantification": [true]
#> },
#> "barcode_parameters": {
#> "max_bc_editdistance": [2],
#> "max_flank_editdistance": [8],
#> "pattern": {
#> "primer": ["CTACACGACGCTCTTCCGATCT"],
#> "BC": ["NNNNNNNNNNNNNNNN"],
#> "UMI": ["NNNNNNNNNNNN"],
#> "polyT": ["TTTTTTTTT"]
#> },
#> "strand": ["-"],
#> "TSO_seq": ["AAGCAGTGGTATCAACGCAGAGTACATGGG"],
#> "TSO_prime": [3],
#> "full_length_only": [false]
#> },
#> "isoform_parameters": {
#> "generate_raw_isoform": [false],
#> "max_dist": [10],
#> "max_ts_dist": [100],
#> "max_splice_match_dist": [10],
#> "min_fl_exon_len": [40],
#> "max_site_per_splice": [3],
#> "min_sup_cnt": [5],
#> "min_cnt_pct": [0.001],
#> "min_sup_pct": [0.2],
#> "bambu_trust_reference": [true],
#> "strand_specific": [0],
#> "remove_incomp_reads": [4],
#> "downsample_ratio": [1]
#> },
#> "alignment_parameters": {
#> "use_junctions": [true],
#> "no_flank": [true]
#> },
#> "realign_parameters": {
#> "use_annotation": [true]
#> },
#> "transcript_counting": {
#> "min_tr_coverage": [0.4],
#> "min_read_coverage": [0.4]
#> }
#> }
#> gene annotation: /tmp/RtmpoS7Kzz/file1f6d48f7b2bb/1f6d738b815a_SIRV_isoforms_multi-fasta-annotation_C_170612a.gtf
#> genome fasta: /tmp/RtmpoS7Kzz/file1f6d48f7b2bb/1f6d75bd63c5_SIRV_isoforms_multi-fasta_170612a.fasta
#> input fastq files: /tmp/RtmpoS7Kzz/file1f6d48f7b2bb/fastq_dir/1f6d2b67b85d_sample1.fastq.gz
#> /tmp/RtmpoS7Kzz/file1f6d48f7b2bb/fastq_dir/1f6d3315a36b_sample2.fastq.gz
#> output directory: /tmp/RtmpoS7Kzz/file1f6dfb26570
#> minimap2 path:
#> k8 path:
#> #### Aligning reads to genome using minimap2
#> Aligning sample 1f6d2b67b85d_sample1 ...
#> 02:13:05 AM Fri Oct 25 2024 minimap2_align
#> Aligning sample 1f6d3315a36b_sample2 ...
#> 02:13:09 AM Fri Oct 25 2024 minimap2_align
#> 02:13:12 AM Fri Oct 25 2024 find_isoform
#> #### Realign to transcript using minimap2
#> Realigning sample 1f6d2b67b85d_sample1 ...
#> 02:13:12 AM Fri Oct 25 2024 minimap2_realign
#> file renamed to 1f6d2b67b85d_sample1_realign2transcript.bam
#> Warning: cannot remove file '/tmp/RtmpoS7Kzz/file1f6dfb26570/1f6d2b67b85d_sample1_tmp_align.bam', reason 'No such file or directory'
#> Realigning sample 1f6d3315a36b_sample2 ...
#> 02:13:13 AM Fri Oct 25 2024 minimap2_realign
#> file renamed to 1f6d3315a36b_sample2_realign2transcript.bam
#> Warning: cannot remove file '/tmp/RtmpoS7Kzz/file1f6dfb26570/1f6d3315a36b_sample2_tmp_align.bam', reason 'No such file or directory'
#> #### Generating transcript count matrix