gene_x 0 like s 519 view s
Tags:
Small RNA sequencing is a type of RNA-sequencing (RNA-seq) that specifically targets and sequences small RNA molecules in a sample.
RNA-seq is a technique that uses next-generation sequencing (NGS) to reveal the presence and quantity of RNA in a biological sample at a given moment, capturing a snapshot of the transcriptome.
Small RNAs, including microRNAs (miRNAs), small interfering RNAs (siRNAs), and piwi-interacting RNAs (piRNAs), play crucial roles in gene regulation. They typically range from 20 to 30 nucleotides in length.
prepare raw data
#mv Data_Ute_smallRNA_3/bundle_v1 Data_Ute_smallRNA_5
ln -s ../Data_Ute_smallRNA_3/bundle_v1 .
#OLD MKL_1_wt_1_221216.fastq.gz -> ../221216_NB501882_0404_AHLVNMBGXM/ute/nf796/MKL_1_wt_1_S16_R1_001.fastq.gz
#OLD MKL_1_wt_2_221216.fastq.gz -> ../221216_NB501882_0404_AHLVNMBGXM/ute/nf797/MKL_1_wt_2_S17_R1_001.fastq.gz
ln -s ./230623_newDemulti_smallRNAs/210817_NB501882_0294_AHW5Y2BGXJ_smallRNA_Ute_newDemulti/2021_nf_ute_smallRNA/nf655/MKL_1_derived_EV_miRNA_S1_R1_001.fastq.gz 2021_August_nf655_MKL-1_EV-miRNA.fastq.gz
ln -s ./230623_newDemulti_smallRNAs/210817_NB501882_0294_AHW5Y2BGXJ_smallRNA_Ute_newDemulti/2021_nf_ute_smallRNA/nf657/WaGa_derived_EV_miRNA_S2_R1_001.fastq.gz 2021_August_nf657_WaGa_EV-miRNA.fastq.gz
ln -s ./230623_newDemulti_smallRNAs/220617_NB501882_0371_AH7572BGXM_smallRNA_Ute_newDemulti/2022_nf_ute_smallRNA/nf774/0403_WaGa_wt_S1_R1_001.fastq.gz 2022_August_nf774_0403_WaGa_wt.fastq.gz
ln -s ./230623_newDemulti_smallRNAs/220617_NB501882_0371_AH7572BGXM_smallRNA_Ute_newDemulti/2022_nf_ute_smallRNA/nf780/0505_MKL1_wt_S2_R1_001.fastq.gz 2022_August_nf780_0505_MKL-1_wt.fastq.gz
ln -s ./230623_newDemulti_smallRNAs/221216_NB501882_0404_AHLVNMBGXM_smallRNA_Ute_newDemulti/2022_nf_ute_smallRNA/nf796/MKL-1_wt_1_S1_R1_001.fastq.gz 2022_November_nf796_MKL-1_wt_1.fastq.gz
ln -s ./230623_newDemulti_smallRNAs/221216_NB501882_0404_AHLVNMBGXM_smallRNA_Ute_newDemulti/2022_nf_ute_smallRNA/nf797/MKL-1_wt_2_S2_R1_001.fastq.gz 2022_November_nf797_MKL-1_wt_2.fastq.gz
ln -s ./230602_NB501882_0428_AHKG53BGXT/demulti_new/nf876/1002_WaGa_sT_Dox_S1_R1_001.fastq.gz 2023_June_nf876_1002_WaGa_sT_Dox.fastq.gz
ln -s ./230602_NB501882_0428_AHKG53BGXT/demulti_new/nf887/2312_MKL_1_scr_DMSO_S2_R1_001.fastq.gz 2023_June_nf887_2312_MKL-1_scr_DMSO.fastq.gz
main run
mkdir our_out
# -qc -ra TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC -rb 4 #NOT_USED
# -mic -mtool Blast -mdb viruses #IGNORING Microbe Module since it is very time-consuming!
#jhuang@hamburg:~/DATA/Data_Ute/Data_Ute_smallRNA_5$ java -jar COMPSRA.jar -ref hg38 -qc -ra TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC -rb 4 -rh 20 -rt 20 -rr 20 -rlh 8,17 -aln -mt star -ann -ac 1,2,3,4,5,6 -mic -mtool Blast -mdb viruses -in 2021_August_nf655_MKL-1_EV-miRNA.fastq.gz -out ./our_out/
for sample in 2021_August_nf655_MKL-1_EV-miRNA 2021_August_nf657_WaGa_EV-miRNA 2022_August_nf774_0403_WaGa_wt 2022_August_nf780_0505_MKL-1_wt 2022_November_nf796_MKL-1_wt_1 2022_November_nf797_MKL-1_wt_2 2023_June_nf876_1002_WaGa_sT_Dox 2023_June_nf887_2312_MKL-1_scr_DMSO; do
mkdir our_out/${sample}/
java -jar COMPSRA.jar -ref hg38 -rh 20 -rt 20 -rr 20 -rlh 8,17 -aln -mt star -ann -ac 1,2,3,4,5,6 -in ${sample}.fastq.gz -out ./our_out/
done
prepare Data_Ute/Data_Ute_smallRNA_4/sample.list
2021_August_nf655_MKL-1_EV-miRNA
2021_August_nf657_WaGa_EV-miRNA
2022_August_nf774_0403_WaGa_wt
2022_August_nf780_0505_MKL-1_wt
2022_November_nf796_MKL-1_wt_1
2022_November_nf797_MKL-1_wt_2
2023_June_nf876_1002_WaGa_sT_Dox
2023_June_nf887_2312_MKL-1_scr_DMSO
extract the raw counts and perform statistical test on pre-defined groups.
#The following results calculate raw counts (Note: If you only want to merge the count files, you can use -fm -fms.)
java -jar COMPSRA.jar -ref hg38 -fun -fm -fms 1-8 -fdclass 1,2,3,4,5 -fdann -pro COMPSRA_MERGE -inf ./sample.list -out ./our_out/
#The following command calculate statistical test after defining case and control.
java -jar COMPSRA.jar -ref hg38 -fun -fd -fdclass 1,2,3,4,5,6 -fdcase 1-2 -fdctrl 3-6 -fdnorm cpm -fdtest mwu -fdann -pro COMPSRA_DEG -inf ./sample.list -out ./our_out/
sed -i -e 's/_August/-08/g' COMPSRA_MERGE_0_miRNA.txt
sed -i -e 's/_November/-11/g' COMPSRA_MERGE_0_miRNA.txt
sed -i -e 's/_June/-06/g' COMPSRA_MERGE_0_miRNA.txt
sed -i -e 's/_STAR_Aligned_miRNA.txt//g' COMPSRA_MERGE_0_miRNA.txt
#sed -i -e 's/_piRNA.txt//g' COMPSRA_MERGE_0_piRNA.txt
#sed -i -e 's/_tRNA.txt//g' COMPSRA_MERGE_0_tRNA.txt
#sed -i -e 's/_snoRNA.txt//g' COMPSRA_MERGE_0_snoRNA.txt
#sed -i -e 's/_snRNA.txt//g' COMPSRA_DEG_0_snRNA.txt
sed -i -e 's/_August/-08/g' COMPSRA_DEG_0_miRNA.txt
sed -i -e 's/_November/-11/g' COMPSRA_DEG_0_miRNA.txt
sed -i -e 's/_June/-06/g' COMPSRA_DEG_0_miRNA.txt
sed -i -e 's/_STAR_Aligned_miRNA.txt//g' COMPSRA_DEG_0_miRNA.txt
import pandas as pd
df = pd.read_csv('COMPSRA_MERGE_0_miRNA.txt', sep='\t', index_col=0)
df = df.drop(columns=['Unnamed: 9'])
# Assuming df is your DataFrame
df.loc['Sum'] = df.sum(numeric_only=True)
df.to_csv('COMPSRA_MERGE_0_miRNA_.txt', sep='\t')
df = pd.read_csv('COMPSRA_MERGE_0_piRNA.txt', sep='\t', index_col=0)
df = df.drop(columns=['Unnamed: 9'])
df.loc['Sum'] = df.sum(numeric_only=True)
df.to_csv('COMPSRA_MERGE_0_piRNA_.txt', sep='\t')
df = pd.read_csv('COMPSRA_MERGE_0_snoRNA.txt', sep='\t', index_col=0)
df = df.drop(columns=['Unnamed: 9'])
df.loc['Sum'] = df.sum(numeric_only=True)
df.to_csv('COMPSRA_MERGE_0_snoRNA_.txt', sep='\t')
df = pd.read_csv('COMPSRA_MERGE_0_snRNA.txt', sep='\t', index_col=0)
df = df.drop(columns=['Unnamed: 9'])
df.loc['Sum'] = df.sum(numeric_only=True)
df.to_csv('COMPSRA_MERGE_0_snRNA_.txt', sep='\t')
df = pd.read_csv('COMPSRA_MERGE_0_tRNA.txt', sep='\t', index_col=0)
df = df.drop(columns=['Unnamed: 9'])
df.loc['Sum'] = df.sum(numeric_only=True)
df.to_csv('COMPSRA_MERGE_0_tRNA_.txt', sep='\t')
#samtools flagstat **.bam
#47217410 + 0 in total (QC-passed reads + QC-failed reads)
#45166321 + 0 mapped (95.66% : N/A)
#2051089 + 0 in total (QC-passed reads + QC-failed reads)
#TODO: check the microRNA in the paper mentioned in which position?
#Single publications on EVs as transport vehicles for specific miRNAs in the pathogenesis of Merkel cell carcinoma have also been reported, such as miR-375 and its function in proliferation
~/Tools/csv2xls-0.4/csv_to_xls.py COMPSRA_MERGE_0_miRNA_.txt \
COMPSRA_MERGE_0_piRNA_.txt \
COMPSRA_MERGE_0_tRNA_.txt \
COMPSRA_MERGE_0_snoRNA_.txt \
COMPSRA_MERGE_0_snRNA_.txt \
-d$'\t' -o raw_counts.xls;
# sorting the DEG table, change the sheet labels to 'miRNA_between_columns_B-C_and_columns_D-G'
~/Tools/csv2xls-0.4/csv_to_xls.py COMPSRA_DEG_0_miRNA.txt -d$'\t' -o normalized_and_significance_test_miRNA.xls;
##merging the row counts and statical values
#cut -f1-1 COMPSRA_MERGE_0_snoRNA.txt > f1_MERGE
#cut -f1-1 COMPSRA_DEG_0_snoRNA.txt > f1_DEG
#cut -f1-1 COMPSRA_MERGE_0_miRNA.txt > f1_MERGE
#cut -f1-1 COMPSRA_DEG_0_miRNA.txt > f1_DEG
#diff f1_MERGE f1_DEG
calculate the number of total reads, total human reads and total non-human reads.
for sample in 2021_August_nf655_MKL-1_EV-miRNA 2021_August_nf657_WaGa_EV-miRNA 2022_August_nf774_0403_WaGa_wt 2022_August_nf780_0505_MKL-1_wt 2022_November_nf796_MKL-1_wt_1 2022_November_nf797_MKL-1_wt_2 2023_June_nf876_1002_WaGa_sT_Dox 2023_June_nf887_2312_MKL-1_scr_DMSO; do
echo "--------------- ${sample} ---------------"
samtools flagstat ./${sample}/${sample}_STAR_Aligned.out.bam
samtools flagstat ./${sample}/${sample}_STAR_Aligned_UnMapped.bam
done
COMPSRA was composed of five functional modules: Quality Control, Alignment, Annotation, Microbe and Function. They are integrated into a pipeline and each module can also process independently.
Quality Control: To deal with fastq files and filter out the adapter sequences and reads with low quality.
Alignment:
Annotation:
Microbe:
Function:
点赞本文的读者
还没有人对此文章表态
没有评论
Sorry, 没有相似文章
© 2023 XGenes.com Impressum