-
adapter sequence
Lexogen small RNA-Seq kit some common adapter sequences from different kits for reference: - TruSeq Small RNA (Illumina): TGGAATTCTCGGGTGCCAAGG - Small RNA Kits V1 (Illumina): TCGTATGCCGTCTTCTGCTTGT - Small RNA Kits V1.5 (Illumina): ATCTCGTATGCCGTCTTCTGCTTG - NEXTflex Small RNA Sequencing Kit v3 for Illumina Platforms (Bioo Scientific): TGGAATTCTCGGGTGCCAAGG - LEXOGEN Small RNA-Seq Library Prep Kit (Illumina): TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC [Header],,,,, IEMFileVersion,4,,,, InvestigatorName,ag96,,,, ExperimentName,ag96,,,, Date,16.10.2023,,,, Workflow,GenerateFASTQ,,,, Application,NextSeqFASTQOnly,,,, Assay,TruSeq HT,,,, Description,pcr,,,, Chemistry,Amplicon,,,, ,,,,, [Reads],,,,, 82,,,,, ,,,,, ,,,,, [Settings],,,,, Adapter,TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC,,,, ,,,,, ,,,,, [Data],,,,, Sample_ID,Sample_Name,I7_Index_ID,index,Sample_Project,Description nf930,01_0505_WaGa_wt_EV_RNA,SRi7001,CAGCGT,2023_064_nf_ute,smallRNA-Seq nf931,02_0505_WaGa_sT_DMSO_EV_RNA,SRi7002,GATCAC,2023_064_nf_ute,smallRNA-Seq nf932,03_0505_WaGa_sT_Dox_EV_RNA,SRi7003,ACCAGT,2023_064_nf_ute,smallRNA-Seq nf933,04_0505_WaGa_scr_DMSO_EV_RNA,SRi7004,TGCACG,2023_064_nf_ute,smallRNA-Seq nf934,05_0505_WaGa_scr_Dox_EV_RNA,SRi7005,ACATTA,2023_064_nf_ute,smallRNA-Seq nf935,06_1905_WaGa_wt_EV_RNA,SRi7006,GTGTAG,2023_064_nf_ute,smallRNA-Seq nf936,07_1905_WaGa_sT_DMSO_EV_RNA,SRi7007,CTAGTC,2023_064_nf_ute,smallRNA-Seq nf937,08_1905_WaGa_sT_Dox_EV_RNA,SRi7008,TGTGCA,2023_064_nf_ute,smallRNA-Seq nf938,09_1905_WaGa_scr_DMSO_EV_RNA,SRi7009,TCAGGA,2023_064_nf_ute,smallRNA-Seq nf939,10_1905_WaGa_scr_Dox_EV_RNA,SRi7010,CGGTTA,2023_064_nf_ute,smallRNA-Seq nf940,11_control_MKL1,SRi7011,TTAACT,2023_064_nf_ute,smallRNA-Seq nf941,12_control_WaGa,SRi7012,ATGAAC,2023_064_nf_ute,smallRNA-Seq
-
input data
ln -s ./231016_NB501882_0435_AHG7HMBGXV/nf930/01_0505_WaGa_wt_EV_RNA_S1_R1_001.fastq.gz 0505_WaGa_wt.fastq.gz ln -s ./231016_NB501882_0435_AHG7HMBGXV/nf931/02_0505_WaGa_sT_DMSO_EV_RNA_S2_R1_001.fastq.gz 0505_WaGa_sT_DMSO.fastq.gz ln -s ./231016_NB501882_0435_AHG7HMBGXV/nf932/03_0505_WaGa_sT_Dox_EV_RNA_S3_R1_001.fastq.gz 0505_WaGa_sT_Dox.fastq.gz ln -s ./231016_NB501882_0435_AHG7HMBGXV/nf933/04_0505_WaGa_scr_DMSO_EV_RNA_S4_R1_001.fastq.gz 0505_WaGa_scr_DMSO.fastq.gz ln -s ./231016_NB501882_0435_AHG7HMBGXV/nf934/05_0505_WaGa_scr_Dox_EV_RNA_S5_R1_001.fastq.gz 0505_WaGa_scr_Dox.fastq.gz ln -s ./231016_NB501882_0435_AHG7HMBGXV/nf935/06_1905_WaGa_wt_EV_RNA_S6_R1_001.fastq.gz 1905_WaGa_wt.fastq.gz ln -s ./231016_NB501882_0435_AHG7HMBGXV/nf936/07_1905_WaGa_sT_DMSO_EV_RNA_S7_R1_001.fastq.gz 1905_WaGa_sT_DMSO.fastq.gz ln -s ./231016_NB501882_0435_AHG7HMBGXV/nf937/08_1905_WaGa_sT_Dox_EV_RNA_S8_R1_001.fastq.gz 1905_WaGa_sT_Dox.fastq.gz ln -s ./231016_NB501882_0435_AHG7HMBGXV/nf938/09_1905_WaGa_scr_DMSO_EV_RNA_S9_R1_001.fastq.gz 1905_WaGa_scr_DMSO.fastq.gz ln -s ./231016_NB501882_0435_AHG7HMBGXV/nf939/10_1905_WaGa_scr_Dox_EV_RNA_S10_R1_001.fastq.gz 1905_WaGa_scr_Dox.fastq.gz ln -s ./231016_NB501882_0435_AHG7HMBGXV/nf940/11_control_MKL1_S11_R1_001.fastq.gz control_MKL1.fastq.gz ln -s ./231016_NB501882_0435_AHG7HMBGXV/nf941/12_control_WaGa_S12_R1_001.fastq.gz control_WaGa.fastq.gz
-
run cutadapt
for sample in 0505_WaGa_wt 0505_WaGa_sT_DMSO 0505_WaGa_sT_Dox 0505_WaGa_scr_DMSO 0505_WaGa_scr_Dox 1905_WaGa_wt 1905_WaGa_sT_DMSO 1905_WaGa_sT_Dox 1905_WaGa_scr_DMSO 1905_WaGa_scr_Dox control_MKL1 control_WaGa; do cutadapt -a TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC -q 20 -o ${sample}2.fastq.gz --minimum-length 5 --trim-n ${sample}.fastq.gz >> LOG done #jhuang@hamburg:~/DATA/Data_Ute/Data_Ute_smallRNA_7$ fastp -i 0505_WaGa_wt.fastq.gz -o 0505_WaGa_wt3.fastq.gz -a TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC
-
run COMPSRA
ln -s ../Data_Ute_smallRNA_3/bundle_v1 . # DEBUG_1: Make sure the file COMPSRA.jar under Data_Ute_smallRNA_7 # DEBUG_2: "-qc -ra TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC -rb 4" does not work! Using cutadapt -a xxxx -q 20 replace those parameters! for sample in 0505_WaGa_wt 0505_WaGa_sT_DMSO 0505_WaGa_sT_Dox 0505_WaGa_scr_DMSO 0505_WaGa_scr_Dox 1905_WaGa_wt 1905_WaGa_sT_DMSO 1905_WaGa_sT_Dox 1905_WaGa_scr_DMSO 1905_WaGa_scr_Dox control_MKL1 control_WaGa; do mkdir our_out/${sample}2/ java -jar COMPSRA.jar -ref hg38 -rh 20 -rt 20 -rr 20 -rlh 8,17 -aln -mt star -ann -ac 1,2,3,4,5,6 -in ${sample}2.fastq.gz -out ./our_out/ done #4.2.3 -rb/-rm_bias n #To remove n random bases in both 5’ (5-prime) and 3’ (3-prime) ends after removing the adapter sequence. #4.2.4 -rh/-rm_low_quality_head score #To remove the low quality bases with the score less than score from 5’ (5-prime) end. #4.2.5 -rt/-rm_low_quality_tail score #To remove the low quality bases with the score less than score from 3’ (3-prime) end. #4.2.6 -rr/-rm_low_quality_read score #To remove the low quality reads with the average score less than score. #4.6.3 -fdclass/-fun_diff_class A1,A2,...,An #To set the small RNAs that will be performed the differential expression analysis. The format is the same as the parameter -ac/-ann_class A1,A2,...,An. #4.6.4 -fdcase/-fun_diff_case ID1,ID2,...,IDn #To set the IDs of case samples. #4.6.5 -fdctrl/-fun_diff_control ID1,ID2,...,IDn #To set the IDs of control samples. #4.4.2 -ac/-ann_class A1,A2,...,An #To set the small RNA categories that will be annotated. The index of small RNA is listed: # 1 miRNA # 2 piRNA # 3 tRNA # 4 snoRNA # 5 snRNA # 6 circRNA java -jar COMPSRA.jar -ref hg38 -fun -fm -fms 1-5 -fdclass 1,2,3,4,5 -fdann -pro COMPSRA_MERGE -inf ./sample.list -out ./our_out/ java -jar COMPSRA.jar -ref hg38 -fun -fd -fdclass 1,2,3,4,5 -fdcase 1-2 -fdctrl 3-6 -fdnorm cpm -fdtest mwu -fdann -pro COMPSRA_DEG -inf ./sample.list -out ./our_out/
-
The results without using cutadapt for comparison
mkdir our_out for sample in 0505_WaGa_wt 0505_WaGa_sT_DMSO 0505_WaGa_sT_Dox 0505_WaGa_scr_DMSO 0505_WaGa_scr_Dox 1905_WaGa_wt 1905_WaGa_sT_DMSO 1905_WaGa_sT_Dox 1905_WaGa_scr_DMSO 1905_WaGa_scr_Dox control_MKL1 control_WaGa; do mkdir our_out/${sample}/ java -jar COMPSRA.jar -ref hg38 -rh 20 -rt 20 -rr 20 -rlh 8,17 -aln -mt star -ann -ac 1,2,3,4,5,6 -in ${sample}2.fastq.gz -out ./our_out/ done {miRNA}: [miRBase] Total Annotation Items: 4697 Annotated Items (covered by least one read): 587 Unannotated Items: 4110 Reads Support the Annotation: 791636 {piRNA}: [piRNABank] Total Annotation Items: 665175 Annotated Items (covered by least one read): 480 Unannotated Items: 664695 Reads Support the Annotation: 6363051 [piRBase] Total Annotation Items: 804849 Annotated Items (covered by least one read): 1220 Unannotated Items: 803629 Reads Support the Annotation: 41374788 {tRNA}: [GtRNAdb] Total Annotation Items: 601 Annotated Items (covered by least one read): 440 Unannotated Items: 161 Reads Support the Annotation: 18690795 {snoRNA}: [GEN_snoRNA] Total Annotation Items: 1006 Annotated Items (covered by least one read): 250 Unannotated Items: 756 Reads Support the Annotation: 416228 {snRNA}: [GEN_snRNA] Total Annotation Items: 2053 Annotated Items (covered by least one read): 267 Unannotated Items: 1786 Reads Support the Annotation: 793559 {circRNA}: [circRNA] Total Annotation Items: 140195 Annotated Items (covered by least one read): 51488 Unannotated Items: 88707 Reads Support the Annotation: 14238651
Small RNA sequencing processing in the example of smallRNA_7
Leave a reply