Small RNA sequencing processing in the example of smallRNA_7 using exceRpt

gene_x 0 like s 415 view s

Tags: pipeline

Input data

mkdir ~/DATA/Data_Ute/Data_Ute_smallRNA_7/raw_data
cd raw_data
cp ~/DATA/Data_Ute/Data_Ute_smallRNA_7/231016_NB501882_0435_AHG7HMBGXV/nf930/01_0505_WaGa_wt_EV_RNA_S1_R1_001.fastq.gz         0505_WaGa_wt.fastq.gz
cp ~/DATA/Data_Ute/Data_Ute_smallRNA_7/231016_NB501882_0435_AHG7HMBGXV/nf931/02_0505_WaGa_sT_DMSO_EV_RNA_S2_R1_001.fastq.gz    0505_WaGa_sT_DMSO.fastq.gz
cp ~/DATA/Data_Ute/Data_Ute_smallRNA_7/231016_NB501882_0435_AHG7HMBGXV/nf932/03_0505_WaGa_sT_Dox_EV_RNA_S3_R1_001.fastq.gz     0505_WaGa_sT_Dox.fastq.gz
cp ~/DATA/Data_Ute/Data_Ute_smallRNA_7/231016_NB501882_0435_AHG7HMBGXV/nf933/04_0505_WaGa_scr_DMSO_EV_RNA_S4_R1_001.fastq.gz   0505_WaGa_scr_DMSO.fastq.gz
cp ~/DATA/Data_Ute/Data_Ute_smallRNA_7/231016_NB501882_0435_AHG7HMBGXV/nf934/05_0505_WaGa_scr_Dox_EV_RNA_S5_R1_001.fastq.gz    0505_WaGa_scr_Dox.fastq.gz
cp ~/DATA/Data_Ute/Data_Ute_smallRNA_7/231016_NB501882_0435_AHG7HMBGXV/nf935/06_1905_WaGa_wt_EV_RNA_S6_R1_001.fastq.gz         1905_WaGa_wt.fastq.gz
cp ~/DATA/Data_Ute/Data_Ute_smallRNA_7/231016_NB501882_0435_AHG7HMBGXV/nf936/07_1905_WaGa_sT_DMSO_EV_RNA_S7_R1_001.fastq.gz    1905_WaGa_sT_DMSO.fastq.gz
cp ~/DATA/Data_Ute/Data_Ute_smallRNA_7/231016_NB501882_0435_AHG7HMBGXV/nf937/08_1905_WaGa_sT_Dox_EV_RNA_S8_R1_001.fastq.gz     1905_WaGa_sT_Dox.fastq.gz
cp ~/DATA/Data_Ute/Data_Ute_smallRNA_7/231016_NB501882_0435_AHG7HMBGXV/nf938/09_1905_WaGa_scr_DMSO_EV_RNA_S9_R1_001.fastq.gz   1905_WaGa_scr_DMSO.fastq.gz
cp ~/DATA/Data_Ute/Data_Ute_smallRNA_7/231016_NB501882_0435_AHG7HMBGXV/nf939/10_1905_WaGa_scr_Dox_EV_RNA_S10_R1_001.fastq.gz   1905_WaGa_scr_Dox.fastq.gz
cp ~/DATA/Data_Ute/Data_Ute_smallRNA_7/231016_NB501882_0435_AHG7HMBGXV/nf940/11_control_MKL1_S11_R1_001.fastq.gz               control_MKL1.fastq.gz
cp ~/DATA/Data_Ute/Data_Ute_smallRNA_7/231016_NB501882_0435_AHG7HMBGXV/nf941/12_control_WaGa_S12_R1_001.fastq.gz               control_WaGa.fastq.gz
#END

Run cutadapt

some common adapter sequences from different kits for reference:

    - TruSeq Small RNA (Illumina): TGGAATTCTCGGGTGCCAAGG
    - Small RNA Kits V1 (Illumina): TCGTATGCCGTCTTCTGCTTGT
    - Small RNA Kits V1.5 (Illumina): ATCTCGTATGCCGTCTTCTGCTTG
    - NEXTflex Small RNA Sequencing Kit v3 for Illumina Platforms (Bioo Scientific): TGGAATTCTCGGGTGCCAAGG
    - LEXOGEN Small RNA-Seq Library Prep Kit (Illumina): TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC *

mkdir trimmed; cd trimmed
for sample in 0505_WaGa_wt 0505_WaGa_sT_DMSO 0505_WaGa_sT_Dox 0505_WaGa_scr_DMSO 0505_WaGa_scr_Dox 1905_WaGa_wt 1905_WaGa_sT_DMSO 1905_WaGa_sT_Dox 1905_WaGa_scr_DMSO 1905_WaGa_scr_Dox  control_MKL1 control_WaGa; do
  cutadapt -a TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC -q 20 -o ${sample}_cutadapted.fastq.gz --minimum-length 5 --trim-n ../raw_data/${sample}.fastq.gz >> LOG
done

# -- check if it is necessary to remove adapter from 5'-end --
(Option_1) cutadapt -g TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC -o /dev/null --report=minimal 0505_WaGa_wt_cutadapted.fastq.gz --> The trimming statistics in the output will show how often 5'-end adapters were removed.
(Option 2) zcat your_sample.fastq.gz | grep 'TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC' | head -n 20
(Option 3) fastqc your_sample.fastq.gz
#Open the generated HTML report and check:
#    The "Overrepresented sequences" section for adapter sequences.
#    The "Per base sequence content" plot to see if there are unexpected sequences at the start of reads.
#(If check results shows both ends contain adapter) cutadapt -g TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC -a TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC -q 20 --minimum-length 10 -o ${sample}_trimmed.fastq.gz ${sample}.fastq.gz >> LOG2
#    -g → Trims 5'-end adapters
#    -a → Trims 3'-end adapters; -a TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC → Specifies the adapter sequence to be removed from the 3' end of the reads. The sequence provided is common in RNA-seq libraries (e.g., Illumina small RNA sequencing).
#    -q 20 → Performs quality trimming at both read ends, removing bases with a Phred quality score below 20.

Install exceRpt (https://github.gersteinlab.org/exceRpt/)

docker pull rkitchen/excerpt

mkdir MyexceRptDatabase
cd /mnt/nvme0n1p1/MyexceRptDatabase
wget http://org.gersteinlab.excerpt.s3-website-us-east-1.amazonaws.com/exceRptDB_v4_hg38_lowmem.tgz
tar -xvf exceRptDB_v4_hg38_lowmem.tgz

#http://org.gersteinlab.excerpt.s3-website-us-east-1.amazonaws.com/exceRptDB_v4_hg19_lowmem.tgz
#http://org.gersteinlab.excerpt.s3-website-us-east-1.amazonaws.com/exceRptDB_v4_hg38_lowmem.tgz
#http://org.gersteinlab.excerpt.s3-website-us-east-1.amazonaws.com/exceRptDB_v4_mm10_lowmem.tgz

wget http://org.gersteinlab.excerpt.s3-website-us-east-1.amazonaws.com/exceRptDB_v4_EXOmiRNArRNA.tgz
tar -xvf exceRptDB_v4_EXOmiRNArRNA.tgz
wget http://org.gersteinlab.excerpt.s3-website-us-east-1.amazonaws.com/exceRptDB_v4_EXOGenomes.tgz
tar -xvf exceRptDB_v4_EXOGenomes.tgz

Run exceRpt

#[COMPLETE_DB]
docker run -v /mnt/nvme0n1p1/MyInputSample:/exceRptInput \
          -v /mnt/nvme0n1p1/MyResults:/exceRptOutput \
          -v /mnt/nvme0n1p1/MyexceRptDatabase:/exceRpt_DB \
          -t rkitchen/excerpt \
          INPUT_FILE_PATH=/exceRptInput/0505_WaGa_wt_cutadapted.fastq.gz \
          MAIN_ORGANISM_GENOME_ID=hg38 \
          N_THREADS=50 \
          JAVA_RAM='800G'
#[SMALL_DB]
docker run -v /mnt/nvme0n1p1/MyInputSample:/exceRptInput \
          -v /mnt/nvme0n1p1/MyResults:/exceRptOutput \
          -v /mnt/nvme0n1p1/MyexceRptDatabase/hg38:/exceRpt_DB/hg38 \
          -t rkitchen/excerpt \
          INPUT_FILE_PATH=/exceRptInput/${sample}_cutadapted.fastq.gz
          N_THREADS=50 \
          JAVA_RAM='800G'

#[REAL_RUNNING]
mkdir results
for sample in 0505_WaGa_wt 0505_WaGa_sT_DMSO 0505_WaGa_sT_Dox 0505_WaGa_scr_DMSO 0505_WaGa_scr_Dox 1905_WaGa_wt 1905_WaGa_sT_DMSO 1905_WaGa_sT_Dox 1905_WaGa_scr_DMSO 1905_WaGa_scr_Dox  control_MKL1 control_WaGa; do
    docker run -v ~/DATA/Data_Ute/Data_Ute_smallRNA_7/trimmed:/exceRptInput \
              -v ~/DATA/Data_Ute/Data_Ute_smallRNA_7/results:/exceRptOutput \
              -v /mnt/nvme0n1p1/MyexceRptDatabase/hg38:/exceRpt_DB/hg38 \
              -t rkitchen/excerpt \
              INPUT_FILE_PATH=/exceRptInput/${sample}_cutadapted.fastq.gz MAIN_ORGANISM_GENOME_ID=hg38 N_THREADS=50 JAVA_RAM='200G'
done

mkdir results2
for sample in 0505_WaGa_wt 0505_WaGa_sT_DMSO 0505_WaGa_sT_Dox 0505_WaGa_scr_DMSO 0505_WaGa_scr_Dox 1905_WaGa_wt 1905_WaGa_sT_DMSO 1905_WaGa_sT_Dox 1905_WaGa_scr_DMSO 1905_WaGa_scr_Dox  control_MKL1 control_WaGa; do
  docker run -v ~/DATA/Data_Ute/Data_Ute_smallRNA_7/raw_data:/exceRptInput \
              -v ~/DATA/Data_Ute/Data_Ute_smallRNA_7/results2:/exceRptOutput \
              -v /mnt/nvme0n1p1/MyexceRptDatabase/hg38:/exceRpt_DB/hg38 \
              -t rkitchen/excerpt \
              INPUT_FILE_PATH=/exceRptInput/${sample}.fastq.gz MAIN_ORGANISM_GENOME_ID=hg38 N_THREADS=50 JAVA_RAM='200G'
done

#Most of the Docker command is loading directories on your machine (the -v parameters) so that exceRpt can read from or write to them. The directory to the left of each : can obviously be whatever you want, but it is important to make sure the right side of each : is written as above or exceRpt will not be able to find/write the data it needs.

Analysis customisation options

There are a number of options available for customising the analysis that are specified using the command-line. These are a list of the most commonly-modified options and their default values.

Required OPTIONs:

  * INPUT_FILE_PATH                  | Path to the input fastq/fasta/sra file

Main analysis OPTIONs:

  * ADAPTER_SEQ                      | 'guessKnown'/'none'/<String>     | [default: 'guessKnown'] will attempt to guess the 3 adapter using known sequences.  The actual adapter can be input here if known, or specify 'none' if the adapter is already removed
  * SAMPLE_NAME                      | <String>                         | add an optional ID to the input file specified above
  * MAIN_ORGANISM_GENOME_ID          | 'hg38'/'hg19'/'mm10'             | [default: 'hg38'] changes the organism/genome build used for alignment
  * CALIBRATOR_LIBRARY               | <Path>                           | path to a bowtie2 index of calibrator oligos used for QC or normalisation
  * ENDOGENOUS_LIB_PRIORITY          | <comma,separated,list,no,spaces> | [default: 'miRNA,tRNA,piRNA,gencode,circRNA'] choose the priority of each library during read assignment and quantification

Additional analysis OPTIONs:

  * TRIM_N_BASES_5p                  | <int>                 | [default: '0'] remove N bases from the 5' end of every read
  * TRIM_N_BASES_3p                  | <int>                 | [default: '0'] remove N bases from the 3' end of every read
  * RANDOM_BARCODE_LENGTH            | <int>                 | [default: 0] identify and remove random barcodes of this number of nucleotides.  For a Bioo prep with a 4N random barcode on both the 3' and 5' adapter, this value should be '4'.
  * RANDOM_BARCODE_LOCATION          | '-5p -3p'/'-5p'/'-3p' | [default: '-5p -3p'] specify where to look for the random barcode(s)
  * KEEP_RANDOM_BARCODE_STATS        | 'false'/'true'        | [default: 'false'] specify whether or not to calculate overrepresentation statistics using the random barcodes (this may be slow and memory intensive!)
  * DOWNSAMPLE_RNA_READS             | <int>                 | [default: NULL] choose whether to downsample to this number of reads after assigning reads to the various transcriptome libraries (may be useful for normalising very different yields)

Hardware-specific OPTIONs:

  * N_THREADS                        | <int>          | [default: 4] change the number of threads used in the alignments performed by exceRpt
  * JAVA_RAM                         | <String>       | [default: '10G'] change the amount of memory (RAM) available to Java. This may need to be higher if crashes occur during quantification or random barcode stats calculation
  * REMOVE_LARGE_INTERMEDIATE_FILES  | 'false'/'true' | [default: 'false'] when exceRpt finishes, choose whether to remove the large alignment files that can take a lot of disk space

Alignment/QC OPTIONs:

  * MIN_READ_LENGTH                  | <int>              | [default: 18] minimum read-length to use after adapter (+ random barcode) removal
  * QFILTER_MIN_QUAL                 | <int>              | [default: 20] minimum base-call quality of the read
  * QFILTER_MIN_READ_FRAC            | <double>           | [default: 80] read must have base-calls higher than the value above for at least this fraction of its length
  * STAR_alignEndsType               | 'Local'/'EndToEnd' | [default: Local] defines the alignment mode; local alignment is recommended to allow for isomiRs
  * STAR_outFilterMatchNmin          | <int>              | [default: 18] minimum number of bases to include in the alignment (should match the minimum read length defined above)
  * STAR_outFilterMatchNminOverLread | <double>           | [default: 0.9] minimum fraction of the read that *must* remain following soft-clipping in a local alignment
  * STAR_outFilterMismatchNmax       | <int>              | [default: 1] maximum allowed mismatched bases in the aligned portion of the read
  * MAX_MISMATCHES_EXOGENOUS         | <int>              | [default: 0] maximum allowed mismatched bases in the *entire* read when aligning to exogenous sequences

Understanding the exceRpt output contained in OUTPUT_DIR

A variety of output files are created for each sample as they are run through the exceRpt pipeline. At the highest level, 5 files and one directory are output to the OUTPUT_DIR:

  [sampleID]/                    | Directory containing the complete set of output files for this sample
  [sampleID]_CORE_RESULTS_v*.tgz | Archive containing only the most commonly used results for this sample
  [sampleID].err                 | Text file containing error logging information for this run
  [sampleID].log                 | Text file containing normal logging information for this run
  [sampleID].qcResult            | Text file containing a variety of QC metrics for this sample
  [sampleID].stats               | Text file containing a variety of alignment statistics for this sample

This archive ([sampleID]_CORE_RESULTS_v4.*.tgz) contains the most commonly used results for this sample and is the only file required to run the mergePipelineRuns.R script described below for processing the output from multiple runs of the exceRpt pipeline (i.e. for multiple samples). The contents of this archive are as follows:

  [sampleID].log                               | Same as above
  [sampleID].stats                             | Same as above
  [sampleID].qcResult                          | Same as above
  [sampleID]/[sampleID].readCounts_*_sense.txt | Read counts of each annotated RNA using sense alignments
  [sampleID]/[sampleID].readCounts_*_sense.txt | Read counts of each annotated RNA using antisense alignments
  [sampleID]/[sampleID].*.coverage.txt         | Contains read-depth across all gencode transcripts
  [sampleID]/[sampleID].*.CIGARstats.txt       | Summary of the alignment characteristics for genome-mapped reads
  [sampleID]/[sampleID].*_fastqc.zip           | FastQC output both before and after UniVec/rRNA contaminant removal
  [sampleID]/[sampleID].*.readLengths.txt      | Counts of the number of reads of each length following adapter removal
  [sampleID]/[sampleID].*.counts               | Read counts mapped to UniVec & rRNA (and calibrator oligo, if used) sequences
  [sampleID]/[sampleID].*.knownAdapterSeq      | 3' adapter sequence guessed (from known adapters) in this sample
  [sampleID]/[sampleID].*.adapterSeq           | 3' adapter used to clip the reads in this run
  [sampleID]/[sampleID].*.qualityEncoding      | PHRED encoding guessed for the input sequence reads

The main results directory ([sampleID]/, e.g. control_MKL1_cutadapted.fastq/) contains all files above as well as the following:

  Intermediate files containing reads ‘surviving’ each stage, in the following order of 1) 3’ adapter clipping, 2) 5’/3’ end trimming, 3) read-quality and homopolymer filtering, 4) UniVec contaminant removal, and 5) rRNA removal:

  [sampleID]/[sampleID].*.fastq.gz | Reads remaining after each QC / filtering / alignment step

  Reads aligned at each step of the pipeline in the following order 1) UniVec, 2) rRNA, 3) endogenous genome, 4) endogenous transcriptome:

  [sampleID]/filteringAlignments_*.bam                              | Alignments to the UniVec and rRNA sequences
  [sampleID]/endogenousAlignments_genome*.bam                       | Alignments (ungapped) to the endogenous genome
  [sampleID]/endogenousAlignments_genomeMapped_transcriptome*.bam   | Transcriptome alignments (ungapped) of reads mapped to the genome
  [sampleID]/endogenousAlignments_genomeUnmapped_transcriptome*.bam | Transcriptome alignments (ungapped) of reads **not** mapped to the genome

  Alignment summary information obtained after invoking the library priority. In the default setting, this will choose a miRBase alignment over any other alignment, for example if it is aligned to both a miRNA in miRBase and a miRNA in Gencode, the miRBase alignment is kept and all others discarded. It is especially important for tRNAs to be chosen in favour of piRNAs, as the latter have quite a large number of mis-annotations to the former.

  [sampleID]/endogenousAlignments_Accepted.txt.gz | All compatible alignments against the transcriptome after invoking the library priority
  [sampleID]/endogenousAlignments_Accepted.dict   | Contains the ID(s) of the RNA annotations indexed in the fifth column of the .txt.gz file above

  Finally, the quantifications are stored in the various readCounts_*.txt files. The format of these tab-delimited files is as follows:

  ReferenceID                                         uniqueReadCount totalReadCount multimapAdjustedReadCount multimapAdjustedBarcodeCount
  hsa-miR-143-3p:MIMAT0000435:Homo:sapiens:miR-143-3p   1235            4147219        4147219.0            0.0
  hsa-miR-10b-5p:MIMAT0000254:Homo:sapiens:miR-10b-5p   1430            2420500        2420241.0            0.0
  hsa-miR-10a-5p:MIMAT0000253:Homo:sapiens:miR-10a-5p   1115            784863         784600.5                 0.0
  hsa-miR-192-5p:MIMAT0000222:Homo:sapiens:miR-192-5p   759             559068         558542.5                 0.0

  Where ReferenceID is the ID of this annotated RNA, uniqueReadCount is the number of unique insert sequences attributed to this annotated RNA, totalReadCount is the total number of reads attributable to this annotated RNA, multimapAdjustedReadCount is the count after adjusting for multi-mapped reads, and multimapAdjustedBarcodeCount (available only for samples prepped with randomly barcoded 5’ and/or 3’ adapters such as Bioo) is the number of unique N-mer barcodes adjusted for multimapping ambiguity in the insert sequence.

Processing exceRpt output from multiple samples

Also provided is a script to combine output from multiple samples run through the exceRpt pipeline. The script (mergePipelineRuns.R) will take as input a directory containing 1 or more subdirectories or zipfiles containing output from the makefile above. In this way, results from 1 or more smallRNA-seq samples can be combined, several QC plots are generated, and the read-counts are normalised ready for downstream analysis by clustering and/or differential expression.

Installation

  This script is comparatively much simpler to install. Once the R software (http://cran.r-project.org/) is set up on your system the script should automatically identify and install all required dependencies. Again, this script is available on the Genboree Workbench (www.genboree.org) and is also free for academic use.

Using the script: On the command line

  mamba activate r_env
  jhuang@WS-2290C:/mnt/nvme0n1p1/exceRpt-master$ Rscript mergePipelineRuns.R /home/jhuang/DATA/Data_Ute/Data_Ute_smallRNA_7/MyResults/
  #OBSERVE the env of R: ~/mambaforge/envs/r_env/lib/R/library
  #which R: /home/jhuang/mambaforge/envs/r_env/bin/R
  #The env is nothing to do with "sudo chmod -R 777 /usr/lib/R/site-library"
  #ERROR: MyResults is not writable --> DEBUG: sudo chown -R jhuang:jhuang MyResults MyResults2 results results2

Using the script: Interactively in R

  Alternatively in an interactive R session, the merge can be performed using the following two commands:

  #mkdir MySummaries
  (r_env) jhuang@WS-2290C:~/DATA/Data_Ute/Data_Ute_smallRNA_7/exceRpt-master$ R
  > source("mergePipelineRuns_functions.R")
  #DEBUG freetype-error
  #sudo apt-get install libfreetype6-dev
  mamba activate r_env
  mamba install -c conda-forge --force-reinstall freetype fontconfig pkg-config
  library(systemfonts)
  system_fonts()  # Should return font list without errors

  > processSamplesInDir("../MyResults/", "../MySummaries")
      2025-03-28 18:18:40.916167:  Searching for valid exceRpt pipeline output in ../MyResults/
      2025-03-28 18:18:44.479166:  Found 12 valid samples
      2025-03-28 18:18:44.892834:  Reading sample data...
      2025-03-28 18:18:47.02125:  [1/12] Added sample '0505_WaGa_scr_DMSO_cutadapted.fastq'
      2025-03-28 18:18:49.314131:  [2/12] Added sample '0505_WaGa_scr_Dox_cutadapted.fastq'
      2025-03-28 18:18:52.701234:  [3/12] Added sample '0505_WaGa_sT_DMSO_cutadapted.fastq'
      2025-03-28 18:18:57.191507:  [4/12] Added sample '0505_WaGa_sT_Dox_cutadapted.fastq'
      2025-03-28 18:19:00.162267:  [5/12] Added sample '0505_WaGa_wt_cutadapted.fastq'
      2025-03-28 18:19:05.992193:  [6/12] Added sample '1905_WaGa_scr_DMSO_cutadapted.fastq'
      2025-03-28 18:19:11.061668:  [7/12] Added sample '1905_WaGa_scr_Dox_cutadapted.fastq'
      2025-03-28 18:19:16.101974:  [8/12] Added sample '1905_WaGa_sT_DMSO_cutadapted.fastq'
      2025-03-28 18:19:21.43279:  [9/12] Added sample '1905_WaGa_sT_Dox_cutadapted.fastq'
      2025-03-28 18:19:30.264677:  [10/12] Added sample '1905_WaGa_wt_cutadapted.fastq'
      2025-03-28 18:19:38.989424:  [11/12] Added sample 'control_MKL1_cutadapted.fastq'
      2025-03-28 18:19:47.058822:  [12/12] Added sample 'control_WaGa_cutadapted.fastq'
      2025-03-28 18:19:47.059524:  Creating raw read-count matrices for available libraries
      2025-03-28 18:19:47.122305:  Saving raw data to disk
      [1] "Attempting to save to: ../MySummaries/exceRpt_smallRNAQuants_ReadCounts.RData"
      [1] "Directory exists? TRUE"
      [1] "Directory writable? TRUE"
      2025-03-28 18:19:47.888117:  Normalising to RPM
      2025-03-28 18:19:47.906386:  Saving normalised data to disk
      2025-03-28 18:19:49.156846:  Creating QC plots
      2025-03-28 18:19:49.18454:  Plotting read-length distributions
      2025-03-28 18:19:50.033017:  Plotting run-duration
      2025-03-28 18:19:50.521018:  Plotting # mapped reads
      2025-03-28 18:19:50.525369:  Plotting mapping stats heatmap (1/3)
      2025-03-28 18:19:50.714444:  Plotting mapping stats heatmap (2/3)
      2025-03-28 18:19:50.909217:  Plotting mapping stats heatmap (3/3)
      2025-03-28 18:19:51.100313:  Plotting QC result
      2025-03-28 18:19:51.954369:  Plotting biotype counts
      2025-03-28 18:19:53.470085:  Plotting miRNA expression distributions
      2025-03-28 18:19:54.861385:  All done!
      2025-03-28 18:19:54.861712:  Warning messages:
      Warning message:
      In install.packages(update[instlib == l, "Package"], l,  ... :
        installation of package ‘systemfonts’ had non-zero exit status
      There were 27 warnings (use warnings() to see them)

  Apart from some status messages, warnings, or possibly errors, no R objects are output from this function. Instead several files are created that are described immediately below…

Script output

  Several files are output by the script in the location of the input exceRpt results (or somewhere else if explicitly specified). All output files are prefixed with ‘exceRpt_’ and contain a variety of information regarding all samples input:

  File Name     Description

  QC data:
  exceRpt_DiagnosticPlots.pdf   All diagnostic plots automatically generated by the merge script
  exceRpt_readMappingSummary.txt    Read-alignment summary including total counts for each library
  exceRpt_ReadLengths.txt   Read-lengths (after 3’ adapters/barcodes are removed)

  Raw transcriptome quantifications:
  exceRpt_miRNA_ReadCounts.txt  miRNA read-counts quantifications
  exceRpt_tRNA_ReadCounts.txt   tRNA read-counts quantifications
  exceRpt_piRNA_ReadCounts.txt  piRNA read-counts quantifications
  exceRpt_gencode_ReadCounts.txt    gencode read-counts quantifications
  exceRpt_circularRNA_ReadCounts.txt    circularRNA read-count quantifications

  Normalised transcriptome quantifications:
  exceRpt_miRNA_ReadsPerMillion.txt     miRNA RPM quantifications
  exceRpt_tRNA_ReadsPerMillion.txt  tRNA RPM quantifications
  exceRpt_piRNA_ReadsPerMillion.txt     piRNA RPM quantifications
  exceRpt_gencode_ReadsPerMillion.txt   gencode RPM quantifications
  exceRpt_circularRNA_ReadsPerMillion.txt   circularRNA RPM quantifications

  R objects:
  exceRpt_smallRNAQuants_ReadCounts.RData   All raw data (binary R object)
  exceRpt_smallRNAQuants_ReadsPerMillion.RData  All normalised data (binary R object)

like unlike

点赞本文的读者

还没有人对此文章表态

本文有评论

没有评论

Small RNA sequencing processing in the example of smallRNA_7 using exceRpt

本文有评论

看文章，发评论，不要沉默

最受欢迎文章

最新文章

最多评论文章

推荐相似文章