Skip to contents

This function is deprecated. Use BulkPipeline instead.

Usage

bulk_long_pipeline(
  annotation,
  fastq,
  outdir,
  genome_fa,
  minimap2 = NULL,
  config_file
)

Arguments

annotation

The file path to the annotation file in GFF3 / GTF format.

fastq

Path to the FASTQ file or a directory containing FASTQ files. Each file will be processed as an individual sample.

outdir

Path to the output directory. If it does not exist, it will be created.

genome_fa

The file path to the reference genome in FASTA format.

minimap2

(optional) The path to the minimap2 binary. If not provided, FLAMES will use a copy from bioconda via basilisk. provided, FLAMES will use a copy from bioconda via basilisk.

config_file

Path to the JSON configuration file. See create_config for creating one.

Value

A SummarizedExperiment object containing the transcript counts.

See also

BulkPipeline for the new pipeline function. SingleCellPipeline for single cell pipelines, MultiSampleSCPipeline for multi sample single cell pipelines.

Examples

outdir <- tempfile()
dir.create(outdir)
# simulate 3 samples via sampling
reads <- ShortRead::readFastq(
  system.file("extdata", "fastq", "musc_rps24.fastq.gz", package = "FLAMES")
)
dir.create(file.path(outdir, "fastq"))
ShortRead::writeFastq(reads[1:100],
  file.path(outdir, "fastq/sample1.fq.gz"), mode = "w", full = FALSE)
reads <- reads[-(1:100)]
ShortRead::writeFastq(reads[1:100],
  file.path(outdir, "fastq/sample2.fq.gz"), mode = "w", full = FALSE)
reads <- reads[-(1:100)]
ShortRead::writeFastq(reads,
  file.path(outdir, "fastq/sample3.fq.gz"), mode = "w", full = FALSE)
# prepare the reference genome
genome_fa <- file.path(outdir, "rps24.fa")
R.utils::gunzip(
  filename = system.file("extdata", "rps24.fa.gz", package = "FLAMES"),
  destname = genome_fa, remove = FALSE
)
se <- bulk_long_pipeline(
  fastq = file.path(outdir, "fastq"),
  annotation = system.file("extdata", "rps24.gtf.gz", package = "FLAMES"),
  outdir = outdir, genome_fa = genome_fa,
  config_file = create_config(outdir, type = "sc_3end", threads = 1, no_flank = TRUE)
)
#> bulk_long_pipeline() is deprecated. Use BulkPipeline() instead.
#> Writing configuration parameters to:  /tmp/RtmpgpEV0i/file25bf2164c695/config_file_9663.json 
#> Configured steps: 
#> 	genome_alignment: TRUE
#> 	isoform_identification: TRUE
#> 	read_realignment: TRUE
#> 	transcript_quantification: TRUE
#> samtools not found, will use Rsamtools package instead
#> Running step: genome_alignment
#> Creating junction bed file from GFF3 annotation.
#> Aligning sample sample1 -> /tmp/RtmpgpEV0i/file25bf2164c695/sample1_align2genome.bam
#> Warning: samtools not found, using Rsamtools instead, this could be slower and might fail for large BAM files.
#> Sorting BAM files by genome coordinates with 1 threads...
#> Indexing bam files
#> Aligning sample sample2 -> /tmp/RtmpgpEV0i/file25bf2164c695/sample2_align2genome.bam
#> Warning: samtools not found, using Rsamtools instead, this could be slower and might fail for large BAM files.
#> Sorting BAM files by genome coordinates with 1 threads...
#> Indexing bam files
#> Aligning sample sample3 -> /tmp/RtmpgpEV0i/file25bf2164c695/sample3_align2genome.bam
#> Warning: samtools not found, using Rsamtools instead, this could be slower and might fail for large BAM files.
#> Sorting BAM files by genome coordinates with 1 threads...
#> Indexing bam files
#> Running step: isoform_identification
#> Import genomic features from the file as a GRanges object ... 
#> OK
#> Prepare the 'metadata' data frame ... 
#> OK
#> Make the TxDb object ... 
#> Warning: genome version information is not available for this TxDb object
#> OK
#> Running step: read_realignment
#> Realigning sample sample1 -> /tmp/RtmpgpEV0i/file25bf2164c695/sample1_realign2transcript.bam
#> Warning: samtools not found, using Rsamtools instead, this could be slower and might fail for large BAM files.
#> Skipped sorting BAM files.
#> Realigning sample sample2 -> /tmp/RtmpgpEV0i/file25bf2164c695/sample2_realign2transcript.bam
#> Warning: samtools not found, using Rsamtools instead, this could be slower and might fail for large BAM files.
#> Skipped sorting BAM files.
#> Realigning sample sample3 -> /tmp/RtmpgpEV0i/file25bf2164c695/sample3_realign2transcript.bam
#> Warning: samtools not found, using Rsamtools instead, this could be slower and might fail for large BAM files.
#> Skipped sorting BAM files.
#> Running step: transcript_quantification
#> Pipeline saved to /tmp/RtmpgpEV0i/file25bf2164c695/pipeline.rds
se
#> class: SummarizedExperiment 
#> dim: 10 3 
#> metadata(0):
#> assays(1): counts
#> rownames(10): ENSMUST00000169826.2 ENSMUSG00000025290.17_19_5159_1 ...
#>   ENSMUSG00000025290.17_19_5159_8 ENSMUST00000225023.1
#> rowData names(0):
#> colnames(3): sample1 sample2 sample3
#> colData names(0):