How to install DAMIAN?

gene_x 0 like s 149 view s

Tags: software

https://sourceforge.net/projects/damian-pd/

1, install PostgreSQL and gem on 18.04

sudo apt-get update
sudo apt install ruby-dev libffi-dev build-essential
sudo apt-get install postgresql postgresql-contrib
sudo apt-get install libpq-dev
sudo apt install default-jre
sudo apt install hmmer
#sudo apt-get install pgadmin3

sudo gem install pg -v 0.19
sudo gem install axlsx
sudo gem install amatch

#interactive: sudo -u postgres createuser --interactive
#not_interactive: https://medium.com/coding-blocks/creating-user-database-and-adding-access-on-postgresql-8bfcd2f4a91e
#sudo -u postgres psql
#postgres=# create database mydb;
#postgres=# create user myuser with encrypted password 'mypass';
#postgres=# grant all privileges on database mydb to myuser;

sudo -u postgres psql
CREATE USER damian_user WITH PASSWORD 'hamburg_uke';
CREATE DATABASE damian_db WITH OWNER damian_user;
postgre=# \q

2, install blast, tax and pfam

cd databases;
./get_all.sh;
cd tax; ./get_tax.sh;
cd pfam; ./get_pfam.sh;

#Taxonomy
#The following taxonomy files are required:
#ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdump.tar.gz (the downloaded file must unpacked using tar as well as decompressed.)
#http://s3.amazonaws.com/matrixsciencemisc/prot.av2taxid.gz
#
#curl ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz -O
#curl "ftp://ftp.ebi.ac.uk/pub/databases/Pfam/releases/Pfam32.0/Pfam-A.hmm.gz" | gunzip > Pfam-A.hmm.txt
#curl "ftp://ftp.ebi.ac.uk/pub/databases/Pfam/releases/Pfam32.0/database_files/pfamA_tax_depth.txt.gz" | gunzip > pfamA_tax_depth.txt

#change settings in config.rb
DB_NAME = 'damian_db'
DB_USER = 'damian_user'
DB_PASS = 'hamburg_uke'
./damian_database.rb  --erase_and_rebuild --names databases/tax/names.dmp --nodes databases/tax/nodes.dmp --hmm databases/pfam/Pfam-A.hmm.txt --taxdepth databases/pfam/pfamA_tax_depth.txt

#### download and update the blast-database ####
cd /mnt/nvme0n1p1/REFs/blast/
#wget ftp://ftp.ncbi.nlm.nih.gov/blast/db/FASTA/nt.gz
#wget ftp://ftp.ncbi.nlm.nih.gov/blast/db/FASTA/nr.gz
#perl update_blastdb.pl --decompress nt
#perl update_blastdb.pl --decompress nr
##https://www.ncbi.nlm.nih.gov/books/NBK569850/
#update_blastdb.pl --decompress nt
#update_blastdb.pl --decompress nr
#curl ftp://ftp.ncbi.nlm.nih.gov/blast/db/FASTA/nr.gz -O
#curl ftp://ftp.ncbi.nlm.nih.gov/blast/db/FASTA/nt.gz -O
##makeblastdb -in exons_for_blastall.fasta -input_type fasta -dbtype nucl -title exons_for_blastall -parse_seqids -out exons_for_blastall
#makeblastdb -in nt -out nt -parse_seqids -dbtype nucl
#makeblastdb -in nr -out nr -parse_seqids -dbtype prot
##curl ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz -O
##curl "ftp://ftp.ebi.ac.uk/pub/databases/Pfam/releases/Pfam32.0/Pfam-A.hmm.gz" | gunzip > Pfam-A.hmm.txt
##curl "ftp://ftp.ebi.ac.uk/pub/databases/Pfam/releases/Pfam32.0/database_files/pfamA_tax_depth.txt.gz" | gunzip > pfamA_tax_depth.txt
##or:
##NO_THIS_SCRIPT: ./get_blast.sh

#Standard databases (nr etc.): rRNA/ITS databases Genomic + transcript databases Betacoronavirus
#curl ftp://ftp.ncbi.nlm.nih.gov/blast/db/Betacoronavirus.tar.gz -O
#The contents are the same between https://ftp.ncbi.nlm.nih.gov/blast/db/ and https://ftp.ncbi.nlm.nih.gov/blast/db/v5/ since v5 is the default!
#https://ftp.ncbi.nlm.nih.gov/blast/db/nt-nucl-metadata.json    #158
for no in 000 001 002 003 004 005 006 007 008 009  010 011 012 013 014 015 016 017 018 019  020 021 022 023 024 025 026 027 028 029  030 031 032 033 034 035 036 037 038 039  040 041 042 043 044 045 046 047 048 049  050 051 052 053 054 055 056 057 058 059  060 061 062 063 064 065 066 067 068 069  070 071 072 073 074 075 076 077 078 079  080 081 082 083 084 085 086 087 088 089  090 091 092 093 094 095 096 097 098 099  100 101 102 103 104 105 106 107 108 109  110 111 112 113 114 115 116 117 118 119  120 121 122 123 124 125 126 127 128 129  130 131 132 133 134 135 136 137 138 139  140 141 142 143 144 145 146 147 148 149  150 151 152 153 154 155 156 157; do
  curl ftp://ftp.ncbi.nlm.nih.gov/blast/db/nt.${no}.tar.gz -O
done
#https://ftp.ncbi.nlm.nih.gov/blast/db/nr-prot-metadata.json    #103
for no in 00 01 02 03 04 05 06 07 08 09  10 11 12 13 14 15 16 17 18 19  20 21 22 23 24 25 26 27 28 29  30 31 32 33 34 35 36 37 38 39  40 41 42 43 44 45 46 47 48 49  50 51 52 53 54 55 56 57 58 59  60 61 62 63 64 65 66 67 68 69  70 71 72 73 74 75 76 77 78 79  80 81 82 83 84 85 86 87 88 89  90 91 92 93 94 95 96 97 98 99  100 101 102; do
  curl ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.${no}.tar.gz -O
done
tar xzf *tar.gz
##Sind die RNA-data (Transcriptome)? by default is RNA-data
#damian_database.rb  --erase_and_rebuild --names blast_2020_install/taxdump/names.dmp --nodes blast_2020_install/taxdump/nodes.dmp --hmm pfam/Pfam-A.hmm.txt --taxdepth pfam/#pfamA_tax_depth.txt #pfam annotation cannot be updated!!

3, create .ncbirc and setting

#in the file /home/jhuang/.ncbirc
BLASTDB=/mnt/nvme0n1p1/REFs/blast/
#echo "[BLAST]" > /home/jhuang/.ncbirc
#echo "BLASTDB=/media/jhuang/Elements1/BLAST_db_v5/nt_v5/" >> /home/jhuang/.ncbirc

#mv damian_release damian
# add damian into PATH
DAMIAN_LOCATION='/home/jhuang/Tools/damian'
export PATH=$PATH:$DAMIAN_LOCATION

4, generate bowtie2 index and set damian_reference

##human
##Using existing index /ref/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.
#
##Horse (equCab2)
#rsync -a -P rsync://hgdownload.soe.ucsc.edu/goldenPath/equCab2/bigZips/chromFa.tar.gz ./
##Cattle  NCBI Genome ID: 82 (Bos taurus)
#rsync -a -P rsync://hgdownload.soe.ucsc.edu/goldenPath/bosTau8/bigZips/bosTau8.fa.gz ./
##ftp://ftp.ensembl.org/pub/release-95/fasta/bos_taurus/dna/
#
##Sheep NCBI Genome ID: 83 (Ovis aries)
#rsync -a -P rsync://hgdownload.soe.ucsc.edu/goldenPath/oviAri4/bigZips/oviAri4.fa.gz ./
#
##Wild boar  NCBI Genome ID: 84 (Sus scrofa)
#rsync -a -P rsync://hgdownload.soe.ucsc.edu/goldenPath/susScr11/bigZips/susScr11.fa.gz ./
#
##salmon salar
#https://www.ncbi.nlm.nih.gov/genome/369?genome_assembly_id=248466
#https://www.ncbi.nlm.nih.gov/genome/?term=salmo%20salar
#https://www.ncbi.nlm.nih.gov/assembly/?term=salmon+salar
#rsync -avz /ref/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa jhuang@10.162.6.119:/home/jhuang/DATA/
#rsync -a -P salmon_salar_assemblies.tar jhuang@10.162.6.119:/home/jhuang/REFs
#
##Mosquitoes/culex pipiens
#https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?lvl=0&id=233155
#https://www.biorxiv.org/content/10.1101/240747v1.full
#aedes mascarensis
#Aedes albopictus
#
#Taxonomy ID: 7176 (Culex quinquefasciatus (southern house mosquito))
#https://www.ncbi.nlm.nih.gov/nuccore/?term=C.+pipiens
#https://www.ncbi.nlm.nih.gov/assembly?LinkName=bioproject_assembly_all&from_uid=18751
#https://www.ncbi.nlm.nih.gov/genome/?term=txid7176[orgn]
#https://www.ncbi.nlm.nih.gov/assembly/GCF_000208785.1/
#https://www.ncbi.nlm.nih.gov/genome/?term=txid263438[orgn]
#
#Taxonomy ID: 7175 (C. pipiens) --> no genome
#https://www.ncbi.nlm.nih.gov/genome/?term=txid7175[orgn]
#https://www.ncbi.nlm.nih.gov/assembly/GCF_000209185.1
#rsync -a -P GCF_000209185_1_CulPip1_0_genomic.fna.gz jhuang@10.162.6.119:/home/jhuang/REFs

rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Ovis_aries.Oar_v3.1.dna.toplevel.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Ovis_aries.Oar_v3.1.cdna.all.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Ovis_aries.Oar_v3.1.ncrna.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Sus_scrofa.Sscrofa11.1.dna.toplevel.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Sus_scrofa.Sscrofa11.1.cdna.all.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Sus_scrofa.Sscrofa11.1.ncrna.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Bos_taurus.ARS-UCD1.2.dna.toplevel.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Bos_taurus.ARS-UCD1.2.cdna.all.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Bos_taurus.ARS-UCD1.2.ncrna.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Equus_caballus.EquCab3.0.dna.toplevel.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Equus_caballus.EquCab3.0.cdna.all.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Equus_caballus.EquCab3.0.ncrna.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Salmo_salar.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/GCF_000209185_1_CulPip1_0_genomic.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Macaca_mulatta.Mmul_8.0.1.dna.toplevel.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Macaca_mulatta.Mmul_8.0.1.cdna.all.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Macaca_mulatta.Mmul_8.0.1.ncrna.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Ovis_aries_musimon.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Cervus_elaphus_hippelaphus.fa  .

#damian_reference.rb --add --host hg38 --type both --fasta /mnt/h/jhuang/ref/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa --primary --description 'Homo_sapiens_UCSC_hg38 (dna)'
#damian_reference.rb --add --host wildboar --type both --fasta /home/jhuang/REFs/susScr11.fa --primary --description 'Wild boar  NCBI Genome ID: 84 (Sus scrofa) (dna)'
#damian_reference.rb --add --host horse --type both --fasta /home/jhuang/REFs/equCab2.fa --primary --description 'Horse equCab2 (dna)'
#damian_reference.rb --add --host salmon --type both --fasta /home/jhuang/REFs/salmon_salar.fa --primary --description 'Salmon salar RefSeq assembly accession: GCF_000233375.1 (dna)'
##damian_reference.rb --add --host sheep --type both --fasta /home/jhuang/REFs/oviAri4.fa --primary --description 'Sheep NCBI Genome ID: 83 (Ovis aries) (dna)'
##damian_reference.rb --add --host cattle --type both --fasta /home/jhuang/REFs/bosTau8.fa --primary --description 'Cattle  NCBI Genome ID: 82 (Bos taurus) (dna)'
##damian_reference.rb --add --host mosquito --type both --fasta /home/jhuang/REFs/GCF_000209185_1_CulPip1_0_genomic.fa --primary --description 'Culex pipiens quinquefasciatus (dna)'

# -- host index anlegen with ensemble-files --
ftp://ftp.ensembl.org/pub/release-95/fasta/ovis_aries/dna/
#human and human3
damian_reference.rb --add  --host human --type both --fasta ./Homo_sapiens.GRCh38.dna.toplevel.fa --primary --description 'Homo sapiens (dna)'
damian_reference.rb --add  --host human --type rna --fasta ./Homo_sapiens.GRCh38.cdna.all.fa --description 'Homo sapiens (cdna)'
damian_reference.rb --add  --host human --type rna --fasta ./Homo_sapiens.GRCh38.ncrna.fa --description 'Homo sapiens (ncrna)'
#human3 (since for some fastqs, human delete too much and too strictly, therefore we genertate human3 for loose filtering of human reads.
damian_reference.rb --add  --host human3 --type both --fasta ./genome.fa --primary --description 'Homo_sapiens_UCSC_hg38 (dna)'
damian_reference.rb --add  --host human3 --type rna --fasta ./Homo_sapiens.GRCh38.cdna.all.fa --description 'Homo sapiens (cdna)'
damian_reference.rb --add  --host human3 --type rna --fasta ./Homo_sapiens.GRCh38.ncrna.fa --description 'Homo sapiens (ncrna)'

#sheep
damian_reference.rb --add  --host sheep --type both --fasta Ovis_aries.Oar_v3.1.dna.toplevel.fa --primary --description 'Ovis aries (dna)'
damian_reference.rb --add  --host sheep --type rna --fasta Ovis_aries.Oar_v3.1.cdna.all.fa --description 'Ovis aries (cdna)'
damian_reference.rb --add  --host sheep --type rna --fasta Ovis_aries.Oar_v3.1.ncrna.fa --description 'Ovis aries (ncrna)'
#pig
damian_reference.rb --add  --host pig --type both --fasta Sus_scrofa.Sscrofa11.1.dna.toplevel.fa --primary --description 'Sus scrofa (dna)'
damian_reference.rb --add  --host pig --type rna --fasta Sus_scrofa.Sscrofa11.1.cdna.all.fa --description 'Sus scrofa (cdna)'
damian_reference.rb --add  --host pig --type rna --fasta Sus_scrofa.Sscrofa11.1.ncrna.fa --description 'Sus scrofa (ncrna)'
#cow
damian_reference.rb --add  --host cow --type both --fasta Bos_taurus.ARS-UCD1.2.dna.toplevel.fa --primary --description 'Bos taurus (dna)'
damian_reference.rb --add  --host cow --type rna --fasta Bos_taurus.ARS-UCD1.2.cdna.all.fa --description 'Bos taurus (cdna)'
damian_reference.rb --add  --host cow --type rna --fasta Bos_taurus.ARS-UCD1.2.ncrna.fa --description 'Bos taurus (ncrna)'

#horse
damian_reference.rb --add  --host horse --type both --fasta ./Equus_caballus.EquCab3.0.dna.toplevel.fa --primary --description 'Equus caballus (dna)'
damian_reference.rb --add  --host horse --type rna --fasta ./Equus_caballus.EquCab3.0.cdna.all.fa --description 'Equus caballus (cdna)'
damian_reference.rb --add  --host horse --type rna --fasta ./Equus_caballus.EquCab3.0.ncrna.fa --description 'Equus caballus (ncrna)'
#salmo
damian_reference.rb --add  --host Salmo_salar --type both --fasta Salmo_salar.fa --primary --description 'Salmo salar (dna)'
#mosquito
damian_reference.rb --add  --host Culex_pipiens --type both --fasta GCF_000209185_1_CulPip1_0_genomic.fa --primary --description 'Culex pipiens (dna)'
#macaque
damian_reference.rb --add  --host macaque --type both --fasta ./Macaca_mulatta.Mmul_8.0.1.dna.toplevel.fa --primary --description 'Macaca mulatta (dna)'
damian_reference.rb --add  --host macaque --type rna --fasta ./Macaca_mulatta.Mmul_8.0.1.cdna.all.fa --description 'Macaca mulatta (cdna)'
damian_reference.rb --add  --host macaque --type rna --fasta ./Macaca_mulatta.Mmul_8.0.1.ncrna.fa --description 'Macaca mulatta (ncrna)'

#mouflon
damian_reference.rb --add  --host mouflon --type both --fasta ./Ovis_aries_musimon.fa --primary --description 'Ovis aries musimon (dna)'

#reddeer
damian_reference.rb --add  --host reddeer --type both --fasta ./Cervus_elaphus_hippelaphus.fa --primary --description 'Cervus elaphus hippelaphus (dna)'

##icebear
#damian_reference.rb --add  --host polarbear --type both --fasta ./Ursus_maritimus.UrsMar_1.0.dna.toplevel.fa --primary --description 'Ursus_maritimus (dna)'

##Der Graue Mausmaki (Microcebus murinus) ist eine Primatenart aus der Gattung der Mausmakis innerhalb der Gruppe der Lemuren.
#damian_reference.rb --add  --host lemur --type both --fasta ./Mmur3.0.fa --primary --description 'Microcebus murinus (dna)'

5, install and configure mutt

sudo apt install mutt

#in ~/.muttrc
set imap_user = 'xxx@yyy.com'
set imap_pass = 'xxxx'
set from= $imap_user
set use_from=yes
set realname='XXX YYY'
set folder = imaps://imap-mail.outlook.com:993
set spoolfile = "+INBOX"
set postponed="+[hotmail]/Drafts"
set mail_check = 100
set header_cache = "~/.mutt/cache/headers"
set message_cachedir = "~/.mutt/cache/bodies"
set certificate_file = "~/.mutt/certificates"
set smtp_url = "smtp://$imap_user@smtp-mail.outlook.com:587"
set smtp_pass = $imap_pass
set move = no
set imap_keepalive = 900
set record="+Sent"

Test: echo -e "Hi XXX,\n\nPlease find attached the latest results from our DAMIAN analysis.\n\nBest,\nYYY" | mutt -s "New results from DAMIAN" -- "xxx@googlemail.com"

6, intermediate commands

--1-- hmmsearch --domE 0.00001 -o /dev/null --domtblout /home/jhuang/rtpd_files/HD04_cons/idba_ud_assembly/domain.table --noali --cpu 10 /home/jhuang/Tools/damian/databases/pfam/Pfam-A.hmm.txt /home/jhuang/rtpd_files/HD04_cons/idba_ud_assembly/orfs.fasta
--2-- megablast
--3-- blastn or blastp
/home/jhuang/Tools/damian/3rd_party/ncbi-blast/bin/blastp -task blastp -evalue 10E-2 -num_threads 26 -query /tmp/rtpd__565_20190514-28525-1cqkejq -db nr -outfmt 6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore staxids qcovs qcovhsp
/home/jhuang/Tools/damian/3rd_party/ncbi-blast/bin/blastp -task blastp -evalue 10E-2 -num_threads 10 -query /tmp/rtpd__584_20190515-11072-i8ct4h -db nr -outfmt 6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore staxids qcovs qcovhsp
/home/jhuang/Tools/damian/3rd_party/ncbi-blast/bin/blastn -task blastn -evalue 10E-2 -num_threads 10 -query /tmp/rtpd__586_20190515-6605-1wfobqe -db nt -outfmt 6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore staxids qcovs qcovhsp

7, Verifying the installation

#damian.rb --left selftest/r1.fastq.gz --right selftest/r2.fastq.gz --sample testrun --threads 12

seqtk sample -s100 ./240621_M03701_0312_000000000-GHL9N/p20534/7448_7501_S0_R1_001.fastq.gz 0.1 > R1_0.1.fastq
seqtk sample -s100 ./240621_M03701_0312_000000000-GHL9N/p20534/7448_7501_S0_R2_001.fastq.gz 0.1 > R2_0.1.fastq

cd /mnt/nvme0n1p1/REFs/blast
damian.rb --host human3 --type rna -1 R1_0.1.fastq -2 R2_0.1.fastq --sample p20534_7448_7501_S0_megablast --blastn never --blastp never --min_contiglength 500 --threads 64 --force
damian_report.rb
zip -r p20534_7448_7501_S0_megablast.zip p20534_7448_7501_S0_megablast/
echo -e "Hi XXX,\n\nPlease find attached the latest results from our DAMIAN analysis.\n\nBest,\nYYY" | mutt -a "./p20534_7448_7501_S0_megablast.zip" -s "New results from DAMIAN" -- "xxx@googlemail.com"
damian.rb --host human3 --type rna -1 R1_0.1.fastq -2 R2_0.1.fastq --sample p20534_7448_7501_S0_blastn --blastn progressive --blastp never --min_contiglength 500 --threads 64 --force
damian_report.rb
zip -r p20534_7448_7501_S0_blastn.zip p20534_7448_7501_S0_blastn/
echo -e "Hi XXX,\n\nPlease find attached the latest results from our DAMIAN analysis.\n\nBest,\nYYY" | mutt -a "./p20534_7448_7501_S0_blastn.zip" -s "New results from DAMIAN" -- "xxx@googlemail.com"
damian.rb --host human3 --type rna -1 R1_0.1.fastq -2 R2_0.1.fastq --sample p20534_7448_7501_S0_blastp --blastn never --blastp progressive --min_contiglength 500 --threads 64 --force
damian_report.rb
zip -r p20534_7448_7501_S0_blastp.zip p20534_7448_7501_S0_blastp/
echo -e "Hi XXX,\n\nPlease find attached the latest results from our DAMIAN analysis.\n\nBest,\nYYY" | mutt -a "./p20534_7448_7501_S0_blastp.zip" -s "New results from DAMIAN" -- "xxx@googlemail.com"

like unlike

点赞本文的读者

还没有人对此文章表态


本文有评论

没有评论

看文章,发评论,不要沉默


© 2023 XGenes.com Impressum