Run viral_ngs

http://xgenes.com/article/article-content/388/variant-calling-for-data-huang-human-herpesvirus-3-using-snippy-spandx-viralngs/

Calling inter-host variants by merging the results from snippy+spandx (Manually!) Calling intra-host variants using viral-ngs (http://xgenes.com/article/article-content/347/variant-calling-for-herpes-simplex-virus-1-from-patient-sample-using-capture-probe-sequencing/) #TODO: How? Merge intra- and inter-host variants, comparing the variants to the alignments of the assemblies to confirm its correctness.

#TODO: If the results from 2024 contains only intra-host variants, explain this time I also give the results of the inter-host variants!

Variant calling (inter-host + intra-host) for Data_Pietschmann_229ECoronavirus_Mutations_2024+2025+2026 (via docker own_viral_ngs) v2

  1. Input data:

     # ---- Datasets 2024 (in total 4) ----
     ln -s ../../Data_Pietschmann_229ECoronavirus_Mutations_2025/raw_data_2024/hCoV229E_Rluc_R1.fastq.gz hCoV229E_Rluc_R1.fastq.gz
     ln -s ../../Data_Pietschmann_229ECoronavirus_Mutations_2025/raw_data_2024/hCoV229E_Rluc_R2.fastq.gz hCoV229E_Rluc_R2.fastq.gz
     ln -s ../../Data_Pietschmann_229ECoronavirus_Mutations_2025/raw_data_2024/p10_DMSO_R1.fastq.gz DMSO_p10_R1.fastq.gz
     ln -s ../../Data_Pietschmann_229ECoronavirus_Mutations_2025/raw_data_2024/p10_DMSO_R2.fastq.gz DMSO_p10_R2.fastq.gz
     ln -s ../../Data_Pietschmann_229ECoronavirus_Mutations_2025/raw_data_2024/p10_K22_R1.fastq.gz K22_p10_R1.fastq.gz
     ln -s ../../Data_Pietschmann_229ECoronavirus_Mutations_2025/raw_data_2024/p10_K22_R2.fastq.gz K22_p10_R2.fastq.gz
     ln -s ../../Data_Pietschmann_229ECoronavirus_Mutations_2025/raw_data_2024/p10_K7523_R1.fastq.gz X7523_p10_R1.fastq.gz
     ln -s ../../Data_Pietschmann_229ECoronavirus_Mutations_2025/raw_data_2024/p10_K7523_R2.fastq.gz X7523_p10_R2.fastq.gz
    
     # ---- Datasets 2025 (in total 3) ----
     ln -s ../../Data_Pietschmann_229ECoronavirus_Mutations_2025/raw_data_2025/250506_VH00358_136_AAG3YJ5M5/p20606/p16_DMSO_S29_R1_001.fastq.gz DMSO_p16_R1.fastq.gz
     ln -s ../../Data_Pietschmann_229ECoronavirus_Mutations_2025/raw_data_2025/250506_VH00358_136_AAG3YJ5M5/p20606/p16_DMSO_S29_R2_001.fastq.gz DMSO_p16_R2.fastq.gz
     ln -s ../../Data_Pietschmann_229ECoronavirus_Mutations_2025/raw_data_2025/250506_VH00358_136_AAG3YJ5M5/p20607/p16_K22_S30_R1_001.fastq.gz K22_p16_R1.fastq.gz
     ln -s ../../Data_Pietschmann_229ECoronavirus_Mutations_2025/raw_data_2025/250506_VH00358_136_AAG3YJ5M5/p20607/p16_K22_S30_R2_001.fastq.gz K22_p16_R2.fastq.gz
     ln -s ../../Data_Pietschmann_229ECoronavirus_Mutations_2025/raw_data_2025/250506_VH00358_136_AAG3YJ5M5/p20608/p16_X7523_S31_R1_001.fastq.gz X7523_p16_R1.fastq.gz
     ln -s ../../Data_Pietschmann_229ECoronavirus_Mutations_2025/raw_data_2025/250506_VH00358_136_AAG3YJ5M5/p20608/p16_X7523_S31_R2_001.fastq.gz X7523_p16_R2.fastq.gz
    
     # ---- Datasets 2026 (in total 3) ----
     ln -s ../raw_data_2026/20260212_AV243904_0054_B/02_DMSO_p26/02_DMSO_p26_R1.fastq.gz DMSO_p26_R1.fastq.gz
     ln -s ../raw_data_2026/20260212_AV243904_0054_B/02_DMSO_p26/02_DMSO_p26_R2.fastq.gz DMSO_p26_R2.fastq.gz
     ln -s ../raw_data_2026/20260212_AV243904_0054_B/01_K22_p26/01_K22_p26_R1.fastq.gz K22_p26_R1.fastq.gz
     ln -s ../raw_data_2026/20260212_AV243904_0054_B/01_K22_p26/01_K22_p26_R2.fastq.gz K22_p26_R2.fastq.gz
     ln -s ../raw_data_2026/20260212_AV243904_0054_B/03_X723_p26/03_X723_p26_R1.fastq.gz X7523_p26_R1.fastq.gz
     ln -s ../raw_data_2026/20260212_AV243904_0054_B/03_X723_p26/03_X723_p26_R2.fastq.gz X7523_p26_R2.fastq.gz
  2. Call variant calling using snippy

     ln -s ~/Tools/bacto/db/ .;
     ln -s ~/Tools/bacto/envs/ .;
     ln -s ~/Tools/bacto/local/ .;
     cp ~/Tools/bacto/Snakefile .;
     cp ~/Tools/bacto/bacto-0.1.json .;
     cp ~/Tools/bacto/cluster.json .;
    
     #download CU459141.gb from GenBank
     mv ~/Downloads/sequence\(2\).gb db/PP810610.gb
    
     #setting the following in bacto-0.1.json
         "fastqc": false,
         "taxonomic_classifier": false,
         "assembly": true,
         "typing_ariba": false,
         "typing_mlst": true,
         "pangenome": true,
         "variants_calling": true,
         "phylogeny_fasttree": true,
         "phylogeny_raxml": true,
         "recombination": false, (due to gubbins-error set false)
         "genus": "Alphacoronavirus",
         "kingdom": "Viruses",
         "species": "Human coronavirus 229E",
         "mykrobe": {
             "species": "corona"
         },
         "reference": "db/PP810610.gb"
    
     mamba activate /home/jhuang/miniconda3/envs/bengal3_ac3
     (bengal3_ac3) /home/jhuang/miniconda3/envs/snakemake_4_3_1/bin/snakemake --printshellcmds
  3. Summarize all SNPs and Indels from the snippy result directory.

     cp ~/Scripts/summarize_snippy_res_ordered.py .
     # IMPORTANT_ADAPT the array isolates = ["hCoV229E_Rluc", "DMSO_p10", "K22_p10", "X7523_p10", "DMSO_p16", "K22_p16", "X7523_p16", "DMSO_p26", "K22_p26", "X7523_p26"]
     mamba activate plot-numpy1
     python3 ./summarize_snippy_res_ordered.py snippy
     #--> Summary CSV file created successfully at: snippy/summary_snps_indels.csv
     cd snippy
     #REMOVE_the_line? I don't find the sence of the line:    grep -v "None,,,,,,None,None" summary_snps_indels.csv > summary_snps_indels_.csv
  4. Using spandx calling variants (almost the same results to the one from viral-ngs!)

     mamba deactivate
     mamba activate /home/jhuang/miniconda3/envs/spandx
     mkdir ~/miniconda3/envs/spandx/share/snpeff-5.1-2/data/PP810610
     cp PP810610.gb  ~/miniconda3/envs/spandx/share/snpeff-5.1-2/data/PP810610/genes.gbk
     vim ~/miniconda3/envs/spandx/share/snpeff-5.1-2/snpEff.config
     /home/jhuang/miniconda3/envs/spandx/bin/snpEff build PP810610    #-d
     ~/Scripts/genbank2fasta.py PP810610.gb
     mv PP810610.gb_converted.fna PP810610.fasta    #rename "NC_001348.1 xxxxx" to "NC_001348" in the fasta-file
     ln -s /home/jhuang/Tools/spandx/ spandx
     (spandx) nextflow run spandx/main.nf --fastq "trimmed/*_P_{1,2}.fastq" --ref PP810610.fasta --annotation --database PP810610 -resume
    
     # Rerun SNP_matrix.sh due to the error ERROR_CHROMOSOME_NOT_FOUND in the variants annotation
     cd Outputs/Master_vcf
     (spandx) cp -r ../../snippy/hCoV229E_Rluc/reference .
     (spandx) cp ../../spandx/bin/SNP_matrix.sh ./
     #Note that ${variant_genome_path}=NC_001348 in the following command, but it was not used after command replacement.
     #Adapt "snpEff eff -no-downstream -no-intergenic -ud 100 -formatEff -v ${variant_genome_path} out.vcf > out.annotated.vcf" to
     "/home/jhuang/miniconda3/envs/bengal3_ac3/bin/snpEff eff -no-downstream -no-intergenic -ud 100 -formatEff -c reference/snpeff.config -dataDir . ref out.vcf > out.annotated.vcf" in SNP_matrix.sh
     (spandx) bash SNP_matrix.sh PP810610 .
  5. Calling inter-host variants by merging the results from snippy+spandx (Manually!)

     # Inter-host variants(宿主间变异):一种病毒在两个人之间有不同的基因变异,这些变异可能与宿主的免疫反应、疾病表现或病毒传播的方式相关。
     cp All_SNPs_indels_annotated.txt All_SNPs_indels_annotated_backup.txt
     vim All_SNPs_indels_annotated.txt
    
     #in the file ids: grep "$(echo -e '\t')353$(echo -e '\t')" All_SNPs_indels_annotated.txt >> All_SNPs_indels_annotated_.txt
     #Replace \n with " All_SNPs_indels_annotated.txt >> All_SNPs_indels_annotated_.txt\ngrep "
     #Replace grep " --> grep "$(echo -e '\t')
     #Replace " All_ --> $(echo -e '\t')" All_
    
     # Potential intra-host variants: 10871, 19289, 23435.
     CHROM   POS     REF     ALT     TYPE    hCoV229E_Rluc_trimmed   p10_DMSO_trimmed        p10_K22_trimmed p10_K7523_trimmed       p16_DMSO_trimmed        p16_K22_trimmed p16_X7523_trimmed       Effect  Impact  Functional_Class        Codon_change    Protein_and_nucleotide_change   Amino_Acid_Length       Gene_name       Biotype
     PP810610        1464    T       C       SNP     C       C       C       C       C       C       C       missense_variant        MODERATE        MISSENSE        gTt/gCt p.Val416Ala/c.1247T>C   6757    CDS_1   protein_coding
     PP810610        1699    C       T       SNP     T       T       T       T       T       T       T       synonymous_variant      LOW     SILENT  gtC/gtT p.Val494Val/c.1482C>T   6757    CDS_1   protein_coding
     PP810610        6691    C       T       SNP     T       T       T       T       T       T       T       synonymous_variant      LOW     SILENT  tgC/tgT p.Cys2158Cys/c.6474C>T  6757    CDS_1   protein_coding
     PP810610        6919    C       G       SNP     G       G       G       G       G       G       G       synonymous_variant      LOW     SILENT  ggC/ggG p.Gly2234Gly/c.6702C>G  6757    CDS_1   protein_coding
     PP810610        7294    T       A       SNP     A       A       A       A       A       A       A       missense_variant        MODERATE        MISSENSE        agT/agA p.Ser2359Arg/c.7077T>A  6757    CDS_1   protein_coding
     * PP810610       10871   C       T       SNP     C       C/T     T       C/T     C/T     T       C/T     missense_variant        MODERATE        MISSENSE        Ctt/Ttt p.Leu3552Phe/c.10654C>T 6757    CDS_1   protein_coding
     PP810610        14472   T       C       SNP     C       C       C       C       C       C       C       missense_variant        MODERATE        MISSENSE        aTg/aCg p.Met4752Thr/c.14255T>C 6757    CDS_1   protein_coding
     PP810610        15458   T       C       SNP     C       C       C       C       C       C       C       synonymous_variant      LOW     SILENT  Ttg/Ctg p.Leu5081Leu/c.15241T>C 6757    CDS_1   protein_coding
     PP810610        16035   C       A       SNP     A       A       A       A       A       A       A       stop_gained     HIGH    NONSENSE        tCa/tAa p.Ser5273*/c.15818C>A   6757    CDS_1   protein_coding
     PP810610        17430   T       C       SNP     C       C       C       C       C       C       C       missense_variant        MODERATE        MISSENSE        tTa/tCa p.Leu5738Ser/c.17213T>C 6757    CDS_1   protein_coding
     * PP810610       19289   G       T       SNP     G       G       T       G       G       G/T     G       missense_variant        MODERATE        MISSENSE        Gtt/Ttt p.Val6358Phe/c.19072G>T 6757    CDS_1   protein_coding
     PP810610        21183   T       G       SNP     G       G       G       G       G       G       G       missense_variant        MODERATE        MISSENSE        tTt/tGt p.Phe230Cys/c.689T>G    1173    CDS_2   protein_coding
     PP810610        22636   T       G       SNP     G       G       G       G       G       G       G       missense_variant        MODERATE        MISSENSE        aaT/aaG p.Asn714Lys/c.2142T>G   1173    CDS_2   protein_coding
     PP810610        23022   T       C       SNP     C       C       C       C       C       C       C       missense_variant        MODERATE        MISSENSE        tTa/tCa p.Leu843Ser/c.2528T>C   1173    CDS_2   protein_coding
     * PP810610       23435   C       T       SNP     C       C       T       C/T     C       C/T     C/T     missense_variant        MODERATE        MISSENSE        Ctt/Ttt p.Leu981Phe/c.2941C>T   1173    CDS_2   protein_coding
     PP810610        24512   C       T       SNP     T       T       T       T       T       T       T       missense_variant        MODERATE        MISSENSE        Ctc/Ttc p.Leu36Phe/c.106C>T     88      CDS_4   protein_coding
     PP810610        24781   C       T       SNP     T       T       T       T       T       T       T       missense_variant        MODERATE        MISSENSE        aCt/aTt p.Thr36Ile/c.107C>T     77      CDS_5   protein_coding
     PP810610        25163   C       T       SNP     T       T       T       T       T       T       T       missense_variant        MODERATE        MISSENSE        Ctt/Ttt p.Leu82Phe/c.244C>T     225     CDS_6   protein_coding
     PP810610        25264   C       T       SNP     T       T       T       T       T       T       T       synonymous_variant      LOW     SILENT  gtC/gtT p.Val115Val/c.345C>T    225     CDS_6   protein_coding
     PP810610        26838   G       T       SNP     T       T       T       T       T       T       T
  6. Calling intra-host variants using viral-ngs

     # Intra-host variants(宿主内变异):同一个人感染了某种病毒,但在其体内的不同细胞或器官中可能存在多个不同的病毒变异株。
    
     #How to run and debug the viral-ngs docker?
     # ---- DEBUG_2026_1: using docker instead ----
     mkdir viralngs; cd viralngs
     ln -s ~/Tools/viral-ngs_docker/Snakefile Snakefile
     ln -s  ~/Tools/viral-ngs_docker/bin bin
     cp  ~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2024/refsel.acids refsel.acids
     cp  ~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2024/lastal.acids lastal.acids
     cp  ~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2024/config.yaml config.yaml
     cp  ~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2024/samples-runs.txt samples-runs.txt
     cp  ~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2024/samples-depletion.txt samples-depletion.txt
     cp  ~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2024/samples-metagenomics.txt samples-metagenomics.txt
     cp  ~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2024/samples-assembly.txt samples-assembly.txt
     cp  ~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2024/samples-assembly-failures.txt samples-assembly-failures.txt
     # Adapt the sample-*.txt
    
     mkdir viralngs/data
     mkdir viralngs/data/00_raw
    
     mkdir bams
     ref_fa="PP810610.fasta";
     #for sample in hCoV229E_Rluc p10_DMSO p10_K22; do
     #for sample in p10_K7523 p16_DMSO p16_K22 p16_X7523; do
     for sample in hCoV229E_Rluc DMSO_p10 K22_p10 X7523_p10 DMSO_p16 K22_p16 X7523_p16 DMSO_p26 K22_p26 X7523_p26; do
         bwa index ${ref_fa}; \
         bwa mem -M -t 16 ${ref_fa} trimmed/${sample}_trimmed_P_1.fastq trimmed/${sample}_trimmed_P_2.fastq | samtools view -bS - > bams/${sample}_genome_alignment.bam; \
     done
    
     conda activate viral-ngs4
     #for sample in hCoV229E_Rluc p10_DMSO p10_K22; do
     #for sample in p10_K7523 p16_DMSO p16_K22 p16_X7523; do
     for sample in hCoV229E_Rluc DMSO_p10 K22_p10 X7523_p10 DMSO_p16 K22_p16 X7523_p16 DMSO_p26 K22_p26 X7523_p26; do
         picard AddOrReplaceReadGroups I=bams/${sample}_genome_alignment.bam O=~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2026/viralngs/data/00_raw/${sample}.bam SORT_ORDER=coordinate CREATE_INDEX=true RGPL=illumina RGID=$sample RGSM=$sample RGLB=standard RGPU=$sample VALIDATION_STRINGENCY=LENIENT; \
     done
     conda deactivate
    
     # -- ! Firstly set the samples-assembly.txt empty, so that only focus on running depletion!
     docker run -it -v /mnt/md1/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2026/viralngs:/work -v /home/jhuang/Tools/viral-ngs_docker:/home/jhuang/Tools/viral-ngs_docker -v /home/jhuang/REFs:/home/jhuang/REFs -v /home/jhuang/Tools/GenomeAnalysisTK-3.6:/home/jhuang/Tools/GenomeAnalysisTK-3.6 -v /home/jhuang/Tools/novocraft_v3:/home/jhuang/Tools/novocraft_v3 -v /usr/local/bin/gatk:/usr/local/bin/gatk   own_viral_ngs bash
     cd /work
     snakemake --directory /work --printshellcmds --cores 80
    
     # -- ! Secondly manully run assembly steps
     # --> By itereative add the unfinished assembly in the list, each time replace one, and run "cd /work; snakemake --directory /work --printshellcmds --cores 80" after exiting and re-entering the docker-env, since some tools were during the running automatically deleted.

Here is the combined, consolidated fix sequence for your Docker container:


🔧 Complete Fix Commands (Run Inside Docker Container)

#!/bin/bash
# ============================================================
# FIX SCRIPT FOR viral-ngs Docker Environment
# Run these commands INSIDE your running Docker container
# ============================================================

echo "=== Step 1: Activate base environment for config changes ==="
conda activate base

echo "=== Step 2: Disable conda safety checks (fixes 'unsafe path' errors) ==="
conda config --set safety_checks disabled
conda config --set allow_softlinks true

echo "=== Step 3: Verify config was applied ==="
conda config --show safety_checks
# Expected output: safety_checks: disabled

echo "=== Step 4: (Optional) Update conda for compatibility ==="
conda update -n base -c defaults conda -y

echo "=== Step 5: Activate viral-ngs environment ==="
conda activate viral-ngs-env
echo "Current env: $CONDA_DEFAULT_ENV" && which python
#Current env: viral-ngs-env
#/opt/miniconda/bin/python --> ⚠️ Problem Identified!

#conda deactivate; conda deactivate;
## 1. Install mamba in your base environment (one-time setup)
#conda install -n base -c conda-forge mamba -y
## 2. Activate your existing environment
#conda activate viral-ngs-env
## 3. Use mamba to install missing packages (instead of conda)
#mamba install -c bioconda biopython mafft -y
## 4. Verify existing tools are still there
#which python
#conda list  # Shows all packages, both old and new

conda install python=3.6.7 -y
which python

echo "Current env: $CONDA_DEFAULT_ENV" && which python
#Current env: viral-ngs-env
#/opt/miniconda/envs/viral-ngs-env/bin/python (Python 3.6.7)

echo "=== Step 6: Install missing Python packages ==="
conda install -y -c conda-forge biopython

echo "=== Step 7: Install missing binary tools (with specific versions if needed) ==="
conda install -y -c bioconda perl=5.32.1 prinseq-lite samtools

echo "=== Step 8: Verify all installations ==="
echo "--- Checking samtools ---"
which samtools && samtools --version
#/opt/miniconda/envs/viral-ngs-env/bin/samtools samtools 1.9 using htslib 1.9

echo "--- Checking perl ---"
which perl && perl --version

echo "--- Checking prinseq-lite ---"
which prinseq-lite.pl && prinseq-lite.pl -version

echo "--- Checking Biopython ---"
python -c "import Bio; print('Biopython OK:', Bio.__version__)"

echo "=== Step 9: Refresh environment PATH ==="
hash -r

echo "=== ✅ All fixes applied! You can now re-run your pipeline ==="
echo "Tip: Run 'snakemake --unlock' first if pipeline is locked, then:"
echo "     snakemake -j 
<threads> --rerun-incomplete"

In Dockerfile

#ENV CONDA_ALLOW_UNSAFE_PATHS=1 #RUN conda update -n base -c defaults conda -y


🐳 To Make Fixes Permanent: Commit the Container

After running the fixes above, save your working container:

# 1. Exit the container (but don't delete it)
exit
docker ps -a
docker commit c51d44624f1b viral-ngs-fixed:2026-03-19

# 3. Next time, run the fixed image
#docker run -it -v /mnt/md1/... [your other volumes] viral-ngs-fixed:2026-03-19 bash
docker run -it -v /mnt/md1/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2026/viralngs:/work -v /home/jhuang/Tools/viral-ngs_docker:/home/jhuang/Tools/viral-ngs_docker -v /home/jhuang/REFs:/home/jhuang/REFs -v /home/jhuang/Tools/GenomeAnalysisTK-3.6:/home/jhuang/Tools/GenomeAnalysisTK-3.6 -v /home/jhuang/Tools/novocraft_v3:/home/jhuang/Tools/novocraft_v3 -v /usr/local/bin/gatk:/usr/local/bin/gatk   viral-ngs-fixed:2026-03-19 bash

conda activate viral-ngs-env
which samtools && samtools --version
#/opt/miniconda/envs/viral-ngs-env/bin/samtools samtools 1.9 using htslib 1.9
which perl && perl --version
#/opt/miniconda/envs/viral-ngs-env/bin/perl (v5.26.2)
which prinseq-lite.pl && prinseq-lite.pl -version
#/opt/miniconda/envs/viral-ngs-env/bin/prinseq-lite.pl (PRINSEQ-lite 0.20.4)
python -c "import Bio; print('Biopython OK:', Bio.__version__)"
#Biopython OK: 1.72

conda install -c bioconda trimmomatic -y
which trimmomatic
#/opt/miniconda/envs/viral-ngs-env/bin/trimmomatic (trimmomatic-0.39)

exit
docker ps -a
docker commit e70395e5625c viral-ngs-fixed:l

docker run -it -v /mnt/md1/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2026/viralngs:/work -v /home/jhuang/Tools/viral-ngs_docker:/home/jhuang/Tools/viral-ngs_docker -v /home/jhuang/REFs:/home/jhuang/REFs -v /home/jhuang/Tools/GenomeAnalysisTK-3.6:/home/jhuang/Tools/GenomeAnalysisTK-3.6 -v /home/jhuang/Tools/novocraft_v3:/home/jhuang/Tools/novocraft_v3 -v /usr/local/bin/gatk:/usr/local/bin/gatk   viral-ngs-fixed:l bash
cd /work
#!!!! IMPORTANT !!!!
conda activate viral-ngs-env
snakemake --directory /work --printshellcmds --cores 80

#DEBUG the specific commmand as follows, for example install Gap2Seq in the docker-env. bin/assembly.py gapfill_gap2seq tmp/02_assembly/hCoV229E_Rluc.assembly2-scaffolded.fasta data/01_per_sample/hCoV229E_Rluc.cleaned.bam tmp/02_assembly/hCoV229E_Rluc.assembly2-gapfilled.fasta –memLimitGb 12 –maskErrors –randomSeed 0 —-> go to the script tools/gap2seq.py, install the required tool (for example gap2seq) and adapt the correct version. conda install -y -c bioconda gap2seq root@f47daf7c44ee:/work# Gap2Seq -h #>Gap2Seq 3.1 vim /home/jhuang/Tools/viral-ngs_docker/bin/tools/gap2seq.py #Adapt the TOOL_NAME and TOOL_VERSION in the script

#Save the docker-env with newly installed Gap2Seq exit docker ps -a docker commit f47daf7c44ee viral-ngs-fixed:la

#调用新的 docker-env installed with Gap2Seq docker run -it -v /mnt/md1/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2026/viralngs:/work -v /home/jhuang/Tools/viral-ngs_docker:/home/jhuang/Tools/viral-ngs_docker -v /home/jhuang/REFs:/home/jhuang/REFs -v /home/jhuang/Tools/GenomeAnalysisTK-3.6:/home/jhuang/Tools/GenomeAnalysisTK-3.6 -v /home/jhuang/Tools/novocraft_v3:/home/jhuang/Tools/novocraft_v3 -v /usr/local/bin/gatk:/usr/local/bin/gatk viral-ngs-fixed:la bash cd /work conda activate viral-ngs-env snakemake –directory /work –printshellcmds –cores 80

#MANUALLY running the following commands! bin/assembly.py gapfill_gap2seq in_scaffold=tmp/02_assembly/hCoV229E_Rluc.assembly2-scaffolded.fasta in_bam=data/01_per_sample/hCoV229E_Rluc.cleaned.bam out_scaffold=tmp/02_assembly/hCoV229E_Rluc.assembly2-gapfilled.fasta mem_limit_gb=12 time_soft_limit_minutes=60.0 mask_errors=True gap2seq_opts= random_seed=0 threads=None loglevel=INFO tmp_dir=/tmp tmp_dirKeep=False

#MANUALLY running the following commands! bin/assembly.py impute_from_reference tmp/02_assembly/hCoV229E_Rluc.assembly2-gapfilled.fasta tmp/02_assembly/hCoV229E_Rluc.assembly2-scaffold_ref.fasta tmp/02_assembly/hCoV229E_Rluc.assembly3-modify.fasta –newName hCoV229E_Rluc –replaceLength 55 –minLengthFraction 0.05 –minUnambig 0.05 –index

#!!!! TODO_NEXT_WEEK !!!!: run several times of docker so that all ${sample}.assembly2-scaffolded.fasta generated! Then run the following commands, after that run docker for all isolates. for sample in DMSO_p10 K22_p10 X7523_p10 DMSO_p16 K22_p16 X7523_p16 DMSO_p26 K22_p26 X7523_p26; do #MANUALLY running the following commands! bin/assembly.py gapfill_gap2seq in_scaffold=tmp/02_assembly/${sample}.assembly2-scaffolded.fasta in_bam=data/01_per_sample/${sample}.cleaned.bam out_scaffold=tmp/02_assembly/${sample}.assembly2-gapfilled.fasta mem_limit_gb=12 time_soft_limit_minutes=60.0 mask_errors=True gap2seq_opts= random_seed=0 threads=None loglevel=INFO tmp_dir=/tmp tmp_dirKeep=False #MANUALLY running the following commands! bin/assembly.py impute_from_reference tmp/02_assembly/${sample}.assembly2-gapfilled.fasta tmp/02_assembly/${sample}.assembly2-scaffold_ref.fasta tmp/02_assembly/${sample}.assembly3-modify.fasta –newName ${sample} –replaceLength 55 –minLengthFraction 0.05 –minUnambig 0.05 –index done


🔍 Troubleshooting Checklist

If issues persist after running the fix:

# Check conda config
conda config --show | grep -E "safety|softlink"

# List installed packages in viral-ngs-env
conda activate viral-ngs-env
conda list | grep -E "samtools|prinseq|perl|biopython"

# Test each tool manually
samtools --version
prinseq-lite.pl -version
perl -e 'use Bio::Seq; print "BioPerl OK\n"'

# Check PATH includes conda bin directories
echo $PATH | tr ':' '\n' | grep conda

⚠️ Important Notes

Issue Solution
safety_checks disabled not working Must run conda config in base env, not viral-ngs-env
Packages still fail to install Try conda clean --all -y first, then reinstall
samtools: command not found after install Run hash -r or restart shell to refresh PATH
Pipeline still fails after fixes Run snakemake --unlock --rerun-incomplete to resume
    conda config --set safety_checks disable
    conda activate viral-ngs-env
    conda install -y -c conda-forge biopython

    docker ps -a
    # Look for the container you are working in, e.g., "viral-ngs-container"
    #bb117a6ca70a

    docker commit 
viral-ngs-fixed:latest docker run -it viral-ngs-fixed:latest bash # # —- NOTE that the following steps need rerun –> DOES NOT WORK, USE STRATEGY ABOVE —- # #for sample in p10_K22 p10_K7523; do # for sample in hCoV229E_Rluc p10_DMSO p10_K22 p10_K7523 p16_DMSO p16_K22 p16_X7523; do # bin/read_utils.py merge_bams data/01_cleaned/${sample}.cleaned.bam tmp/01_cleaned/${sample}.cleaned.bam –picardOptions SORT_ORDER=queryname # bin/read_utils.py rmdup_mvicuna_bam tmp/01_cleaned/${sample}.cleaned.bam data/01_per_sample/${sample}.cleaned.bam –JVMmemory 30g # done # # #Note that the error generated by nextflow is from the step gapfill_gap2seq! # for sample in hCoV229E_Rluc p10_DMSO p10_K22 p10_K7523 p16_DMSO p16_K22 p16_X7523; do # bin/assembly.py assemble_spades data/01_per_sample/${sample}.taxfilt.bam /home/jhuang/REFs/viral_ngs_dbs/trim_clip/contaminants.fasta tmp/02_assembly/${sample}.assembly1-spades.fasta –nReads 10000000 –threads 15 –memLimitGb 12 # done # for sample in hCoV229E_Rluc p10_DMSO p10_K22 p10_K7523 p16_DMSO p16_K22 p16_X7523; do # for sample in p10_K22 p10_K7523; do # bin/assembly.py order_and_orient tmp/02_assembly/${sample}.assembly1-spades.fasta refsel_db/refsel.fasta tmp/02_assembly/${sample}.assembly2-scaffolded.fasta –min_pct_contig_aligned 0.05 –outAlternateContigs tmp/02_assembly/${sample}.assembly2-alternate_sequences.fasta –nGenomeSegments 1 –outReference tmp/02_assembly/${sample}.assembly2-scaffold_ref.fasta –threads 15 # done # # for sample in hCoV229E_Rluc p10_DMSO p10_K22 p10_K7523 p16_DMSO p16_K22 p16_X7523; do # bin/assembly.py gapfill_gap2seq tmp/02_assembly/${sample}.assembly2-scaffolded.fasta data/01_per_sample/${sample}.cleaned.bam tmp/02_assembly/${sample}.assembly2-gapfilled.fasta –memLimitGb 12 –maskErrors –randomSeed 0 –loglevel DEBUG # done #IMPORTANT: Reun the following commands! for sample in hCoV229E_Rluc DMSO_p10 K22_p10 X7523_p10 DMSO_p16 K22_p16 X7523_p16 DMSO_p26 K22_p26 X7523_p26; do bin/assembly.py impute_from_reference tmp/02_assembly/${sample}.assembly2-gapfilled.fasta tmp/02_assembly/${sample}.assembly2-scaffold_ref.fasta tmp/02_assembly/${sample}.assembly3-modify.fasta –newName ${sample} –replaceLength 55 –minLengthFraction 0.05 –minUnambig 0.05 –index –loglevel DEBUG done # for sample in hCoV229E_Rluc p10_DMSO p10_K22 p10_K7523 p16_DMSO p16_K22 p16_X7523; do # bin/assembly.py refine_assembly tmp/02_assembly/${sample}.assembly3-modify.fasta data/01_per_sample/${sample}.cleaned.bam tmp/02_assembly/${sample}.assembly4-refined.fasta –outVcf tmp/02_assembly/${sample}.assembly3.vcf.gz –min_coverage 2 –novo_params ‘-r Random -l 20 -g 40 -x 20 -t 502’ –threads 15 –loglevel DEBUG # bin/assembly.py refine_assembly tmp/02_assembly/${sample}.assembly4-refined.fasta data/01_per_sample/${sample}.cleaned.bam data/02_assembly/${sample}.fasta –outVcf tmp/02_assembly/${sample}.assembly4.vcf.gz –min_coverage 3 –novo_params ‘-r Random -l 20 -g 40 -x 20 -t 100’ –threads 15 –loglevel DEBUG # done # — ! Thirdly set the samples-assembly.txt completely and run “snakemake –directory /work –printshellcmds –cores 40” # —————————- BUG list of the docker pipeline, mostly are due to the version incompability —————————- #BUG_1: FileNotFoundError: [Errno 2] No such file or directory: ‘/home/jhuang/Tools/samtools-1.9/samtools’: ‘/home/jhuang/Tools/samtools-1.9/samtools’ #DEBUG_1 (DEPRECATED): # – In docker install independent samtools conda create -n samtools-1.9-env samtools=1.9 -c bioconda -c conda-forge # – persistence the modified docker, next time run own docker image docker ps #CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES #881a1ad6a990 quay.io/broadinstitute/viral-ngs “bash” 8 minutes ago Up 8 minutes intelligent_yalow docker commit 881a1ad6a990 own_viral_ngs docker image ls docker run -it own_viral_ngs bash #Change the path as “/opt/miniconda/envs/samtools-1.9-env/bin/samtools” in /work/bin/tools/samtools.py # If another tool expect for samtools could not be installed, also use the same method above to install it on own_viral_ngs! #DEBUG_1_BETTER_SIMPLE: TOOL_VERSION = ‘1.6’ –> ‘1.9’ in ~/Tools/viral-ngs_docker/bin/tools/samtools.py #BUG_2: bin/taxon_filter.py deplete data/00_raw/2040_04.bam tmp/01_cleaned/2040_04.raw.bam tmp/01_cleaned/2040_04.bmtagger_depleted.bam tmp/01_cleaned/2040_04.rmdup.bam data/01_cleaned/2040_04.cleaned.bam –bmtaggerDbs /home/jhuang/REFs/viral_ngs_dbs/bmtagger_dbs_remove/hg19 /home/jhuang/REFs/viral_ngs_dbs/bmtagger_dbs_remove/metagenomics_contaminants_v3 /home/jhuang/REFs/viral_ngs_dbs/bmtagger_dbs_remove/GRCh37.68_ncRNA-GRCh37.68_transcripts-HS_rRNA_mitRNA –blastDbs /home/jhuang/REFs/viral_ngs_dbs/blast_dbs_remove/hybsel_probe_adapters /home/jhuang/REFs/viral_ngs_dbs/blast_dbs_remove/metag_v3.ncRNA.mRNA.mitRNA.consensus –threads 15 –srprismMemory 14250 –JVMmemory 50g –loglevel DEBUG #2025-05-23 09:58:45,326 – __init__:445:_attempt_install – DEBUG – Currently installed version of blast: 2.7.1-h4422958_6 #2025-05-23 09:58:45,327 – __init__:448:_attempt_install – DEBUG – Expected version of blast: 2.6.0 #2025-05-23 09:58:45,327 – __init__:449:_attempt_install – DEBUG – Incorrect version of blast installed. Removing it… #DEBUG_2: TOOL_VERSION = “2.6.0” –> “2.7.1” in ~/Tools/viral-ngs_docker/bin/tools/blast.py #BUG_3: bin/read_utils.py bwamem_idxstats data/01_cleaned/1762_04.cleaned.bam /home/jhuang/REFs/viral_ngs_dbs/spikeins/ercc_spike-ins.fasta –outStats reports/spike_count/1762_04.spike_count.txt –minScoreToFilter 60 –loglevel DEBUG #DEBUG_3: TOOL_VERSION = “0.7.15” –> “0.7.17” in ~/Tools/viral-ngs_docker/bin/tools/bwa.py #BUG_4: FileNotFoundError: [Errno 2] No such file or directory: ‘/usr/local/bin/trimmomatic’: ‘/usr/local/bin/trimmomatic’ #DEBUG_4: TOOL_VERSION = “0.36” –> “0.38” in ~/Tools/viral-ngs_docker/bin/tools/trimmomatic.py #BUG_5: FileNotFoundError: [Errno 2] No such file or directory: ‘/usr/bin/spades.py’: ‘/usr/bin/spades.py’ #DEBUG_5: TOOL_VERSION = “0.36” –> “0.38” in ~/Tools/viral-ngs_docker/bin/tools/trimmomatic.py # def install_and_get_path(self): # # the conda version wraps the jar file with a shell script # return ‘trimmomatic’ #BUG_6: bin/assembly.py order_and_orient tmp/02_assembly/2039_04.assembly1-spades.fasta refsel_db/refsel.fasta tmp/02_assembly/2039_04.assembly2-scaffolded.fasta –min_pct_contig_aligned 0.05 –outAlternateContigs tmp/02_assembly/2039_04.assembly2-alternate_sequences.fasta –nGenomeSegments 1 –outReference tmp/02_assembly/2039_04.assembly2-scaffold_ref.fasta –threads 15 –loglevel DEBUG 2025-05-23 17:40:19,526 – __init__:445:_attempt_install – DEBUG – Currently installed version of mummer4: 4.0.0beta2-pl526hf484d3e_4 2025-05-23 17:40:19,527 – __init__:448:_attempt_install – DEBUG – Expected version of mummer4: 4.0.0rc1 2025-05-23 17:40:19,527 – __init__:449:_attempt_install – DEBUG – Incorrect version of mummer4 installed. Removing it.. DEBUG_6: TOOL_VERSION = “4.0.0rc1” –> “4.0.0beta2” in ~/Tools/viral-ngs_docker/bin/tools/mummer.py #BUG_7: bin/assembly.py order_and_orient tmp/02_assembly/2039_04.assembly1-spades.fasta refsel_db/refsel.fasta tmp/02_assembly/2039_04.assembly2-scaffolded.fasta –min_pct_contig_aligned 0.05 –outAlternateContigs tmp/02_assembly/2039_04.assembly2-alternate_sequences.fasta –nGenomeSegments 1 –outReference tmp/02_assembly/2039_04.assembly2-scaffold_ref.fasta –threads 15 –loglevel DEBUG File “bin/assembly.py”, line 549, in base_counts = [sum([len(seg.seq.replace(“N”, “”)) for seg in scaffold]) \ AttributeError: ‘Seq’ object has no attribute ‘replace’ DEBUG_7: base_counts = [sum([len(seg.seq.replace(“N”, “”)) for seg in scaffold]) –> base_counts = [sum([len(seg.seq.ungap(‘N’)) for seg in scaffold]) in ~/Tools/viral-ngs_docker/bin/assembly.py BUG_8: bin/assembly.py refine_assembly tmp/02_assembly/1243_2.assembly3-modify.fasta data/01_per_sample/1243_2.cleaned.bam tmp/02_assembly/1243_2.assembly4-refined.fasta –outVcf tmp/02_assembly/1243_2.assembly3.vcf.gz –min_coverage 2 –novo_params ‘-r Random -l 20 -g 40 -x 20 -t 502’ –threads 15 –loglevel DEBUG File “/work/bin/tools/gatk.py”, line 75, in execute FileNotFoundError: [Errno 2] No such file or directory: ‘/usr/local/bin/gatk’: ‘/usr/local/bin/gatk’ #DEBUG_8: -v /usr/local/bin/gatk:/usr/local/bin/gatk in ‘docker run’ and change default python in the script via a shebang; TOOL_VERSION = “3.8” –> “3.6” in ~/Tools/viral-ngs_docker/bin/tools/gatk.py BUG_9: pyyaml is missing! #DEBUG_9: NO_ERROR if rerun! bin/assembly.py impute_from_reference tmp/02_assembly/2039_04.assembly2-gapfilled.fasta tmp/02_assembly/2039_04.assembly2-scaffold_ref.fasta tmp/02_assembly/2039_04.assembly3-modify.fasta –newName 2039_04 –replaceLength 55 –minLengthFraction 0.05 –minUnambig 0.05 –index –loglevel DEBUG for sample in 2039_04 2040_04; do for sample in 1762_04 1243_2 875_04; do bin/assembly.py impute_from_reference tmp/02_assembly/${sample}.assembly2-gapfilled.fasta tmp/02_assembly/${sample}.assembly2-scaffold_ref.fasta tmp/02_assembly/${sample}.assembly3-modify.fasta –newName ${sample} –replaceLength 55 –minLengthFraction 0.05 –minUnambig 0.05 –index –loglevel DEBUG done #BUG_10: bin/reports.py consolidate_fastqc reports/fastqc/2039_04/align_to_self reports/fastqc/2040_04/align_to_self reports/fastqc/1762_04/align_to_self reports/fastqc/1243_2/align_to_self reports/fastqc/875_04/align_to_self reports/summary.fastqc.align_to_self.txt #DEBUG_10: File “bin/intrahost.py”, line 527 and line 579 in merge_to_vcf # #MODIFIED_BACK samp_to_seqIndex[sampleName] = seq.seq.ungap(‘-‘) #samp_to_seqIndex[sampleName] = seq.seq.replace(“-“, “”) #BUG_11: bin/interhost.py multichr_mafft ref_genome/reference.fasta data/02_assembly/2039_04.fasta data/02_assembly/2040_04.fasta data/02_assembly/1762_04.fasta data/02_assembly/1243_2.fasta data/02_assembly/875_04.fasta data/03_multialign_to_ref –ep 0.123 –maxiters 1000 –preservecase –localpair –outFilePrefix aligned –sampleNameListFile data/03_multialign_to_ref/sampleNameList.txt –threads 15 –loglevel DEBUG 2025-05-26 15:04:19,014 – cmd:195:main_argparse – INFO – command: bin/interhost.py multichr_mafft inFastas=[‘ref_genome/reference.fasta’, ‘data/02_assembly/2039_04.fasta’, ‘data/02_assembly/2040_04.fasta’, ‘data/02_assembly/1762_04.fasta’, ‘data/02_assembly/1243_2.fasta’, ‘data/02_assembly/875_04.fasta’] localpair=True globalpair=None preservecase=True reorder=None gapOpeningPenalty=1.53 ep=0.123 verbose=False outputAsClustal=None maxiters=1000 outDirectory=data/03_multialign_to_ref outFilePrefix=aligned sampleRelationFile=None sampleNameListFile=data/03_multialign_to_ref/sampleNameList.txt threads=15 loglevel=DEBUG tmp_dir=/tmp tmp_dirKeep=False 2025-05-26 15:04:19,014 – cmd:209:main_argparse – DEBUG – using tempDir: /tmp/tmp-interhost-multichr_mafft-nuws9mhp 2025-05-26 15:04:21,085 – __init__:445:_attempt_install – DEBUG – Currently installed version of mafft: 7.402-0 2025-05-26 15:04:21,085 – __init__:448:_attempt_install – DEBUG – Expected version of mafft: 7.221 2025-05-26 15:04:21,085 – __init__:449:_attempt_install – DEBUG – Incorrect version of mafft installed. Removing it… #DEBUG_11: TOOL_VERSION = “7.221” –> “7.402” in ~/Tools/viral-ngs_docker/bin/tools/mafft.py #BUG_12: bin/interhost.py snpEff data/04_intrahost/isnvs.vcf.gz PP810610.1 data/04_intrahost/isnvs.annot.vcf.gz j.huang@uke.de –loglevel DEBUG 2025-06-10 13:14:07,526 – __init__:445:_attempt_install – DEBUG – Currently installed version of snpeff: 4.3.1t-3 2025-06-10 13:14:07,527 – __init__:448:_attempt_install – DEBUG – Expected version of snpeff: 4.1l #DEBUG_12: -v /usr/local/bin/gatk:/usr/local/bin/gatk in ‘docker run’ and change default python in the script via a shebang; TOOL_VERSION = “4.1l” –> “4.3.1t” in ~/Tools/viral-ngs_docker/bin/tools/snpeff.py 7. Comparing intra- and inter-host variants, comparing the variants to the alignments of the assemblies to confirm its correctness. From the step 5, only 5 inter-host variants were confirmed: they are 10871, 19289, 23435. PP810610 10871 hCoV229E_Rluc hCoV229E_Rluc C,T 0.0057070386810399495 0.011348936781066188 1.0 missense_variant 10654C>T Leu3552Phe 3552 6758 Gene_217_20492 XBA84229.1 PP810610 10871 p10_DMSO p10_DMSO C,T 0.0577716643741403 0.10886819833916395 1.0 missense_variant 10654C>T Leu3552Phe 3552 6758 Gene_217_20492 XBA84229.1 PP810610 10871 p10_K22 p10_K22 C,T 1.0 0.0 1.0 missense_variant 10654C>T Leu3552Phe 3552 6758 Gene_217_20492 XBA84229.1 PP810610 10871 p10_K7523 p10_K7523 C,T 0.8228321896444167 0.2915587546587828 1.0 missense_variant 10654C>T Leu3552Phe 3552 6758 Gene_217_20492 XBA84229.1 PP810610 10871 p16_DMSO p16_DMSO C,T 0.02927088877062267 0.05682820768240093 1.0 missense_variant 10654C>T Leu3552Phe 3552 6758 Gene_217_20492 XBA84229.1 PP810610 10871 p16_K22 p16_K22 C,T 0.9911209766925638 0.017600372505084394 1.0 missense_variant 10654C>T Leu3552Phe 3552 6758 Gene_217_20492 XBA84229.1 PP810610 10871 p16_X7523 p16_X7523 C,T 0.8776699029126214 0.21473088886794223 1.0 missense_variant 10654C>T Leu3552Phe 3552 6758 Gene_217_20492 XBA84229.1 PP810610 19289 hCoV229E_Rluc hCoV229E_Rluc G,T 0.0 0.0 1.0 missense_variant 19073G>T Gly6358Val 6358 6758 Gene_217_20492 XBA84229.1 PP810610 19289 p10_DMSO p10_DMSO G,T 0.0 0.0 1.0 missense_variant 19073G>T Gly6358Val 6358 6758 Gene_217_20492 XBA84229.1 PP810610 19289 p10_K22 p10_K22 G,T 1.0 0.0 1.0 missense_variant 19073G>T Gly6358Val 6358 6758 Gene_217_20492 XBA84229.1 PP810610 19289 p10_K7523 p10_K7523 G,T 0.0 0.0 1.0 missense_variant 19073G>T Gly6358Val 6358 6758 Gene_217_20492 XBA84229.1 PP810610 19289 p16_DMSO p16_DMSO G,T 0.0 0.0 1.0 missense_variant 19073G>T Gly6358Val 6358 6758 Gene_217_20492 XBA84229.1 PP810610 19289 p16_K22 p16_K22 G,T 0.9884823848238482 0.02276991943361173 1.0 missense_variant 19073G>T Gly6358Val 6358 6758 Gene_217_20492 XBA84229.1 PP810610 19289 p16_X7523 p16_X7523 G,T 0.0 0.0 1.0 missense_variant 19073G>T Gly6358Val 6358 6758 Gene_217_20492 XBA84229.1 PP810610 23435 hCoV229E_Rluc hCoV229E_Rluc C,T 0.0 0.0 1.0 missense_variant 2941C>T Leu981Phe 981 1173 Gene_20494_24015 XBA84230.1 PP810610 23435 p10_DMSO p10_DMSO C,T 0.031912415560214305 0.061788026586653055 1.0 missense_variant 2941C>T Leu981Phe 981 1173 Gene_20494_24015 XBA84230.1 PP810610 23435 p10_K22 p10_K22 C,T 1.0 0.0 1.0 missense_variant 2941C>T Leu981Phe 981 1173 Gene_20494_24015 XBA84230.1 PP810610 23435 p10_K7523 p10_K7523 C,T 0.8352090032154341 0.27526984832663026 1.0 missense_variant 2941C>T Leu981Phe 981 1173 Gene_20494_24015 XBA84230.1 PP810610 23435 p16_DMSO p16_DMSO C,T 0.0 0.0 1.0 missense_variant 2941C>T Leu981Phe 981 1173 Gene_20494_24015 XBA84230.1 PP810610 23435 p16_K22 p16_K22 C,T 0.958498023715415 0.07955912449811753 1.0 missense_variant 2941C>T Leu981Phe 981 1173 Gene_20494_24015 XBA84230.1 PP810610 23435 p16_X7523 p16_X7523 C,T 0.13175164058556285 0.22878629157715102 1.0 missense_variant 2941C>T Leu981Phe 981 1173 Gene_20494_24015 XBA84230.1 8. Generate variant_annot.xls and coverages.xls sudo chown -R jhuang:jhuang data # — generate isnvs_annot_complete__.txt, isnvs_annot_0.05.txt from ~/DATA/Data_Pietschmann_RSV_Probe3/data/04_intrahost cp isnvs.annot.txt isnvs.annot_complete.txt ~/Tools/csv2xls-0.4/csv_to_xls.py isnvs.annot_complete.txt -d$’\t’ -o isnvs.annot_complete.xls #delete the columns patient, time, Hw and Hs and the header in the xls and save as txt file. awk ‘{printf “%.3f\n”, $5}’ isnvs.annot_complete.csv > f5 cut -f1-4 isnvs.annot_complete.csv > f1_4 cut -f6- isnvs.annot_complete.csv > f6_ paste f1_4 f5 > f1_5 paste f1_5 f6_ > isnvs_annot_complete_.txt #correct f5 in header of isnvs_annot_complete_.txt to iSNV_freq #header: chr pos sample alleles iSNV_freq eff_type eff_codon_dna eff_aa eff_aa_pos eff_prot_len eff_gene eff_protein ~/Tools/csv2xls-0.4/csv_to_xls.py isnvs_annot_complete_.txt -d$’\t’ -o variant_annot.xls #MANUALLY generate variant_annot_0.01.csv variant_annot_0.05.csv awk ‘ $5 >= 0.05 ‘ isnvs_annot_complete_.txt > 0.05.csv cut -f2 0.05.csv awk ‘ $5 >= 0.01 ‘ isnvs_annot_complete_.txt > 0.01.csv cut -f2 0.05.csv | uniq > ids_0.05 cut -f2 0.01.csv | uniq > ids_0.01 #Replace ‘\n’ with ‘\\t” isnvs_annot_complete_.txt >> isnvs_annot_0.05.txt\ngrep -P “PP810610\\t’ in ids_0.05 and then deleting the ‘pos’ line #Replace ‘\n’ with ‘\\t” isnvs_annot_complete_.txt >> isnvs_annot_0.01.txt\ngrep -P “PP810610\\t’ in ids_0.01 and then deleting the ‘pos’ line #Run ids_0.05 and ids_0.01 cp ../../Outputs/Master_vcf/All_SNPs_indels_annotated.txt ../../Outputs/Master_vcf/All_SNPs_indels_annotated.txt hCoV229E_Rluc_variants # Delete the three records which already reported in intra-host results hCoV229E_Rluc_variants: they are 10871, 19289, 23435. PP810610 10871 C T SNP C C/T T C/T C/T T C/T missense_variant MODERATE MISSENSE Ctt/Ttt p.Leu3552Phe/c.10654C>T 6757 CDS_1 protein_coding PP810610 19289 G T SNP G G T G G G/T G missense_variant MODERATE MISSENSE Gtt/Ttt p.Val6358Phe/c.19072G>T 6757 CDS_1 protein_coding PP810610 23435 C T SNP C C T C/T C C/T C/T missense_variant MODERATE MISSENSE Ctt/Ttt p.Leu981Phe/c.2941C>T 1173 CDS_2 protein_coding ~/Tools/csv2xls-0.4/csv_to_xls.py isnvs_annot_0.05.txt isnvs_annot_0.01.txt hCoV229E_Rluc_variants -d$’\t’ -o variant_annot.xls #Modify sheetname to variant_annot_0.05 and variant_annot_0.01 and add the header in Excel file. #Note in the complete list, Set 2024 is NOT a subset of Set 2025 because the element 26283 is in set 2024 but missing from set 2025. # — calculate the coverage samtools depth ./data/02_align_to_self/hCoV229E_Rluc.mapped.bam > hCoV229E_Rluc_cov.txt samtools depth ./data/02_align_to_self/p10_DMSO.mapped.bam > p10_DMSO_cov.txt samtools depth ./data/02_align_to_self/p10_K22.mapped.bam > p10_K22_cov.txt samtools depth ./data/02_align_to_self/p10_K7523.mapped.bam > p10_K7523_cov.txt ~/Tools/csv2xls-0.4/csv_to_xls.py hCoV229E_Rluc_cov.txt p10_DMSO_cov.txt p10_K22_cov.txt p10_K7523_cov.txt -d$’\t’ -o coverages.xls #draw coverage and see if they are continuous? samtools depth ./data/02_align_to_self/p16_DMSO.mapped.bam > p16_DMSO_cov.txt samtools depth ./data/02_align_to_self/p16_K22.mapped.bam > p16_K22_cov.txt samtools depth ./data/02_align_to_self/p16_X7523.mapped.bam > p16_K7523_cov.txt ~/Tools/csv2xls-0.4/csv_to_xls.py p16_DMSO_cov.txt p16_K22_cov.txt p16_K7523_cov.txt -d$’\t’ -o coverages_p16.xls # Load required packages library(ggplot2) library(dplyr) # Read the coverage data cov_data <- read.table("p16_K7523_cov.txt", header = FALSE, sep = "\t", col.names = c("Chromosome", "Position", "Coverage")) # Create full position range for the given chromosome full_range <- data.frame(Position = seq(min(cov_data$Position), max(cov_data$Position))) # Merge with actual coverage data and fill missing positions with 0 cov_full % left_join(cov_data[, c(“Position”, “Coverage”)], by = “Position”) %>% mutate(Coverage = ifelse(is.na(Coverage), 0, Coverage)) # Save the plot to PNG png(“p16_K7523_coverage_filled.png”, width = 1200, height = 600) ggplot(cov_full, aes(x = Position, y = Coverage)) + geom_line(color = “steelblue”, size = 0.3) + labs(title = “Coverage Plot for p16_K7523 (Missing = 0)”, x = “Genomic Position”, y = “Coverage Depth”) + theme_minimal() + theme( plot.title = element_text(hjust = 0.5), axis.text = element_text(size = 10), axis.title = element_text(size = 12) ) dev.off() 9. (Optional) Consensus sequences of each and of all isolates cat PP810610.1.fa OZ035258.1.fa MZ712010.1.fa OK662398.1.fa OK625404.1.fa KF293664.1.fa NC_002645.1.fa > all.fa cp data/02_assembly/*.fasta ./ for sample in hCoV229E_Rluc p10_DMSO p10_K22 p10_K7523; do \ mv ${sample}.fasta ${sample}.fa cat all.fa ${sample}.fa >> all.fa done cat RSV_dedup.fa all.fa > RSV_all.fa mafft –clustalout –adjustdirection RSV_all.fa > RSV_all.aln snp-sites RSV_all.aln -o RSV_all_.aln 10. Report Please find attached the variant analysis results for Thomas. Variant frequencies in the new samples are highlighted in yellow. Although PP810610 is used as the reference, only differences observed in the samples p10_DMSO, p10_K22, p10_K7523, p16_DMSO, p16_K22, and p16_X7523 compared to hCoV229E_Rluc are reported in the sheets variant_annot_0.05 and variant_annot_0.01 (see variant_annot.xls). Variants already present in hCoV229E_Rluc are excluded from these sheets. In total, 17 mutations were found in hCoV229E_Rluc relative to PP810610, detailed in the sheet “hCoV229E_Rluc_variants” (see variant_annot.xls). —— Explanation of iSNV_freq in the sheets variant_annot_0.05 and variant_annot_0.01 —— The iSNV_freq column shows the frequency of the second allele at each position. For example, at position 23435 on chr PP810610: chr Position Sample Alleles iSNV_freq PP810610 23435 hCoV229E_Rluc C,T 0 PP810610 23435 p10_DMSO C,T 0.032 PP810610 23435 p10_K22 C,T 0.995 PP810610 23435 p10_K7523 C,T 0.835 PP810610 23435 p16_DMSO C,T 0 PP810610 23435 p16_K22 C,T 0.958 PP810610 23435 p16_X7523 C,T 0.132 The second allele (T) frequencies are: 0 (only C) 0.032 (3.2% T) 0.995 (99.5% T) 0.835 (83.5% T) 0 (only C) 0.958 (95.8% T) 0.132 (13.2% T) # —- Explanation of Mutation at Position 19289 —- Regarding the mutation at position 19289 — you’re absolutely right, and I had also noticed the discrepancy. In the 2024 analysis, I performed intra-host variant calling, which detects only those variants with frequencies strictly between 0% and 100% within a single sample. Since position 19289 showed 100% G in p10_DMSO, 100% T in p10_K22, and 100% G in p10_K7523, it was not identified as an intra-host variant at that time. Rather, it’s a clear example of an inter-host variant — a fixed difference between samples. In the 2025 analysis, I again used intra-host variant calling. This time, the mutation at position 19289 in p16_K22 was detected at 98.8% T, which falls within the threshold and therefore appears in the intra-host variant table. After noticing this, I also ran a dedicated inter-host variant calling analysis, which specifically highlights differences between samples rather than within them. The results can be found in the third table (“hCoV229E_Rluc_variants”) of the variant_annot.xls file I sent you previously. As you’ll see, all 17 positions are identical across the 7 samples, indicating that no additional inter-host variants were detected beyond what we had already observed. Lastly, please find the coverage data in the attached files. # — Just following up on the mutation at position 19289. By tweaking some settings in the inter-host variant calling, we can also detect variants at positions like 19289. However, in these results, a “/” indicates intra-host variants that require further validation through intra-host variant calling. The intra-host variant calling uses a more precise mapping strategy, enabling a more accurate estimation of allele frequencies. Here’s an example from the inter-host variant table showing the mutation at 19289 with the adjusted settings: CHROM POS REF ALT TYPE hCoV229E_Rluc p10_DMSO p10_K22 p10_K7523 p16_DMSO p16_K22 p16_X7523 PP810610 19289 G T SNP G G T G G G/T G # —————————————— END —————————————- #Check if the 0.05 and 0.01 are superset of 0.05 and 0.01 of 2024 version: comparing ‘cut -f2 0.05.csv | uniq > ids_0.05_’ and ‘cut -f2 0.01.csv | uniq > ids_0.01_’ between 2024 and 2025 869, 1492, 4809, 5797, 8289, 8294, 8331, 8376, 9146, 9174, 9933, 9954, 9993, 10145, 10239, 10310, 10871, 10898, 10970, 11577, 12634, 17941, 18640, 18646, 18701, 18815, 19028, 19294, 19388, 21027, 21633, 21671, 21928, 22215, 23435, 23633, 24738, 25025, 25592, 26885 869, 1492, 3422, 4074, 4809, 5345, 5373, 5543, 5797, 6470, 8289, 8294, 8331, 8376, 9146, 9174, 9261, 9933, 9954, 9993, 10145, 10239, 10310, 10871, 10898, 10970, 11194, 11568, 11577, 11706, 12634, 13113, 13912, 15615, 17941, 18640, 18646, 18701, 18815, 18919, 19028, 19165, 19289, 19294, 19388, 21027, 21633, 21671, 21747, 21928, 22215, 22318, 22630, 22788, 22820, 22906, 22918, 23435, 23586, 23633, 24738, 24903, 25025, 25432, 25592, 26104, 26281, 26307, 26411, 26500, 26746, 26885 ✅ Ja, Set 1 ist eine Teilmenge von Set 2. Alle Elemente von Set 1 sind auch in Set 2 enthalten. Set 1: {1492,8289,8294,9174,10239,10310,10871,10898,11577,18640,21027,21633,22215,23435,24738,25025,25592} Set 2: {1492,8289,8294,9174,10145,10239,10310,10871,10898,11577,18640,19289,21027,21633,22215,23435,24738,25025,25592} Since every element of Set 1 is in Set 2, we have: Set 1 ⊆ Set 2 In other words, Set 1 is a subset of Set 2. diff 0.05_test_uniq.txt 0.05_test.csv diff 0.01_test_uniq.txt 0.01_test.csv > chr pos sample alleles iSNV_freq eff_type eff_codon_dna eff_aa eff_aa_pos eff_prot_len eff_gene eff_protein 8a10,17 > PP810610 3422 hCoV229E_Rluc C,T 0 missense_variant 3205C>T Leu1069Phe 1069 6758 Gene_217_20492 XBA84229.1 > PP810610 3422 p10_DMSO C,T 0 missense_variant 3205C>T Leu1069Phe 1069 6758 Gene_217_20492 XBA84229.1 > PP810610 3422 p10_K22 C,T 0 missense_variant 3205C>T Leu1069Phe 1069 6758 Gene_217_20492 XBA84229.1 > PP810610 3422 p10_K7523 C,T 0 missense_variant 3205C>T Leu1069Phe 1069 6758 Gene_217_20492 XBA84229.1 > PP810610 4074 hCoV229E_Rluc G,T 0 missense_variant 3857G>T Gly1286Val 1286 6758 Gene_217_20492 XBA84229.1 > PP810610 4074 p10_DMSO G,T 0 missense_variant 3857G>T Gly1286Val 1286 6758 Gene_217_20492 XBA84229.1 > PP810610 4074 p10_K22 G,T 0 missense_variant 3857G>T Gly1286Val 1286 6758 Gene_217_20492 XBA84229.1 > PP810610 4074 p10_K7523 G,T 0 missense_variant 3857G>T Gly1286Val 1286 6758 Gene_217_20492 XBA84229.1 12a22,33 > PP810610 5345 hCoV229E_Rluc C,T 0 synonymous_variant 5128C>T Leu1710Leu 1710 6758 Gene_217_20492 XBA84229.1 > PP810610 5345 p10_DMSO C,T 0 synonymous_variant 5128C>T Leu1710Leu 1710 6758 Gene_217_20492 XBA84229.1 > PP810610 5345 p10_K22 C,T 0 synonymous_variant 5128C>T Leu1710Leu 1710 6758 Gene_217_20492 XBA84229.1 > PP810610 5345 p10_K7523 C,T 0 synonymous_variant 5128C>T Leu1710Leu 1710 6758 Gene_217_20492 XBA84229.1 > PP810610 5373 hCoV229E_Rluc C,A 0 stop_gained 5156C>A Ser1719* 1719 6758 Gene_217_20492 XBA84229.1 > PP810610 5373 p10_DMSO C,A 0 stop_gained 5156C>A Ser1719* 1719 6758 Gene_217_20492 XBA84229.1 > PP810610 5373 p10_K22 C,A 0 stop_gained 5156C>A Ser1719* 1719 6758 Gene_217_20492 XBA84229.1 > PP810610 5373 p10_K7523 C,A 0 stop_gained 5156C>A Ser1719* 1719 6758 Gene_217_20492 XBA84229.1 > PP810610 5543 hCoV229E_Rluc C,T 0 missense_variant 5326C>T His1776Tyr 1776 6758 Gene_217_20492 XBA84229.1 > PP810610 5543 p10_DMSO C,T 0 missense_variant 5326C>T His1776Tyr 1776 6758 Gene_217_20492 XBA84229.1 > PP810610 5543 p10_K22 C,T 0 missense_variant 5326C>T His1776Tyr 1776 6758 Gene_217_20492 XBA84229.1 > PP810610 5543 p10_K7523 C,T 0 missense_variant 5326C>T His1776Tyr 1776 6758 Gene_217_20492 XBA84229.1 16a38,41 > PP810610 6470 hCoV229E_Rluc C,T 0 synonymous_variant 6253C>T Leu2085Leu 2085 6758 Gene_217_20492 XBA84229.1 > PP810610 6470 p10_DMSO C,T 0 synonymous_variant 6253C>T Leu2085Leu 2085 6758 Gene_217_20492 XBA84229.1 > PP810610 6470 p10_K22 C,T 0 synonymous_variant 6253C>T Leu2085Leu 2085 6758 Gene_217_20492 XBA84229.1 > PP810610 6470 p10_K7523 C,T 0 synonymous_variant 6253C>T Leu2085Leu 2085 6758 Gene_217_20492 XBA84229.1 40a66,69 > PP810610 9261 hCoV229E_Rluc C,T 0 missense_variant 9044C>T Ala3015Val 3015 6758 Gene_217_20492 XBA84229.1 > PP810610 9261 p10_DMSO C,T 0 missense_variant 9044C>T Ala3015Val 3015 6758 Gene_217_20492 XBA84229.1 > PP810610 9261 p10_K22 C,T 0 missense_variant 9044C>T Ala3015Val 3015 6758 Gene_217_20492 XBA84229.1 > PP810610 9261 p10_K7523 C,T 0 missense_variant 9044C>T Ala3015Val 3015 6758 Gene_217_20492 XBA84229.1 *1* 72c101 A Gly3561Ser 3561 6758 Gene_217_20492 XBA84229.1 (OLD calculation,take this to integrate) — > PP810610 10898 p10_K7523 G,A 0.062 missense_variant 10681G>A Gly3561Ser 3561 6758 Gene_217_20492 XBA84229.1 (NEW calculation) 76a106,113 > PP810610 11194 hCoV229E_Rluc C,A 0 synonymous_variant 10977C>A Ser3659Ser 3659 6758 Gene_217_20492 XBA84229.1 > PP810610 11194 p10_DMSO C,A 0 synonymous_variant 10977C>A Ser3659Ser 3659 6758 Gene_217_20492 XBA84229.1 > PP810610 11194 p10_K22 C,A 0 synonymous_variant 10977C>A Ser3659Ser 3659 6758 Gene_217_20492 XBA84229.1 > PP810610 11194 p10_K7523 C,A 0 synonymous_variant 10977C>A Ser3659Ser 3659 6758 Gene_217_20492 XBA84229.1 > PP810610 11568 hCoV229E_Rluc C,T 0 missense_variant 11351C>T Thr3784Ile 3784 6758 Gene_217_20492 XBA84229.1 > PP810610 11568 p10_DMSO C,T 0 missense_variant 11351C>T Thr3784Ile 3784 6758 Gene_217_20492 XBA84229.1 > PP810610 11568 p10_K22 C,T 0 missense_variant 11351C>T Thr3784Ile 3784 6758 Gene_217_20492 XBA84229.1 > PP810610 11568 p10_K7523 C,T 0 missense_variant 11351C>T Thr3784Ile 3784 6758 Gene_217_20492 XBA84229.1 80a118,121 > PP810610 11706 hCoV229E_Rluc C,A 0 missense_variant 11489C>A Pro3830Gln 3830 6758 Gene_217_20492 XBA84229.1 > PP810610 11706 p10_DMSO C,A 0 missense_variant 11489C>A Pro3830Gln 3830 6758 Gene_217_20492 XBA84229.1 > PP810610 11706 p10_K22 C,A 0 missense_variant 11489C>A Pro3830Gln 3830 6758 Gene_217_20492 XBA84229.1 > PP810610 11706 p10_K7523 C,A 0 missense_variant 11489C>A Pro3830Gln 3830 6758 Gene_217_20492 XBA84229.1 84a126,137 > PP810610 13113 hCoV229E_Rluc C,T 0 synonymous_variant 12897C>T Tyr4299Tyr 4299 6758 Gene_217_20492 XBA84229.1 > PP810610 13113 p10_DMSO C,T 0 synonymous_variant 12897C>T Tyr4299Tyr 4299 6758 Gene_217_20492 XBA84229.1 > PP810610 13113 p10_K22 C,T 0 synonymous_variant 12897C>T Tyr4299Tyr 4299 6758 Gene_217_20492 XBA84229.1 > PP810610 13113 p10_K7523 C,T 0 synonymous_variant 12897C>T Tyr4299Tyr 4299 6758 Gene_217_20492 XBA84229.1 > PP810610 13912 hCoV229E_Rluc G,A 0 missense_variant 13696G>A Gly4566Ser 4566 6758 Gene_217_20492 XBA84229.1 > PP810610 13912 p10_DMSO G,A 0 missense_variant 13696G>A Gly4566Ser 4566 6758 Gene_217_20492 XBA84229.1 > PP810610 13912 p10_K22 G,A 0 missense_variant 13696G>A Gly4566Ser 4566 6758 Gene_217_20492 XBA84229.1 > PP810610 13912 p10_K7523 G,A 0 missense_variant 13696G>A Gly4566Ser 4566 6758 Gene_217_20492 XBA84229.1 > PP810610 15615 hCoV229E_Rluc C,A 0 synonymous_variant 15399C>A Val5133Val 5133 6758 Gene_217_20492 XBA84229.1 > PP810610 15615 p10_DMSO C,A 0 synonymous_variant 15399C>A Val5133Val 5133 6758 Gene_217_20492 XBA84229.1 > PP810610 15615 p10_K22 C,A 0 synonymous_variant 15399C>A Val5133Val 5133 6758 Gene_217_20492 XBA84229.1 > PP810610 15615 p10_K7523 C,A 0 synonymous_variant 15399C>A Val5133Val 5133 6758 Gene_217_20492 XBA84229.1 104a158,161 > PP810610 18919 hCoV229E_Rluc C,T 0 missense_variant 18703C>T Arg6235Cys 6235 6758 Gene_217_20492 XBA84229.1 > PP810610 18919 p10_DMSO C,T 0 missense_variant 18703C>T Arg6235Cys 6235 6758 Gene_217_20492 XBA84229.1 > PP810610 18919 p10_K22 C,T 0 missense_variant 18703C>T Arg6235Cys 6235 6758 Gene_217_20492 XBA84229.1 > PP810610 18919 p10_K7523 C,T 0 missense_variant 18703C>T Arg6235Cys 6235 6758 Gene_217_20492 XBA84229.1 108a166,173 > PP810610 19165 hCoV229E_Rluc C,A 0 missense_variant 18949C>A Arg6317Ser 6317 6758 Gene_217_20492 XBA84229.1 > PP810610 19165 p10_DMSO C,A 0 missense_variant 18949C>A Arg6317Ser 6317 6758 Gene_217_20492 XBA84229.1 > PP810610 19165 p10_K22 C,A 0 missense_variant 18949C>A Arg6317Ser 6317 6758 Gene_217_20492 XBA84229.1 > PP810610 19165 p10_K7523 C,A 0 missense_variant 18949C>A Arg6317Ser 6317 6758 Gene_217_20492 XBA84229.1 > PP810610 19289 hCoV229E_Rluc G,T 0 missense_variant 19073G>T Gly6358Val 6358 6758 Gene_217_20492 XBA84229.1 > PP810610 19289 p10_DMSO G,T 0 missense_variant 19073G>T Gly6358Val 6358 6758 Gene_217_20492 XBA84229.1 > PP810610 19289 p10_K22 G,T 1 missense_variant 19073G>T Gly6358Val 6358 6758 Gene_217_20492 XBA84229.1 > PP810610 19289 p10_K7523 G,T 0 missense_variant 19073G>T Gly6358Val 6358 6758 Gene_217_20492 XBA84229.1 128a194,197 > PP810610 21747 hCoV229E_Rluc C,A 0 missense_variant 1253C>A Ser418Tyr 418 1173 Gene_20494_24015 XBA84230.1 > PP810610 21747 p10_DMSO C,A 0 missense_variant 1253C>A Ser418Tyr 418 1173 Gene_20494_24015 XBA84230.1 > PP810610 21747 p10_K22 C,A 0 missense_variant 1253C>A Ser418Tyr 418 1173 Gene_20494_24015 XBA84230.1 > PP810610 21747 p10_K7523 C,A 0 missense_variant 1253C>A Ser418Tyr 418 1173 Gene_20494_24015 XBA84230.1 *2* 131c200 C Gly478Gly 478 1173 Gene_20494_24015 XBA84230.1 — > PP810610 21928 p10_K22 T,C 0.029 synonymous_variant 1434T>C Gly478Gly 478 1173 Gene_20494_24015 XBA84230.1 140a210,229 > PP810610 22630 hCoV229E_Rluc C,T 0 synonymous_variant 2136C>T Tyr712Tyr 712 1173 Gene_20494_24015 XBA84230.1 > PP810610 22630 p10_DMSO C,T 0 synonymous_variant 2136C>T Tyr712Tyr 712 1173 Gene_20494_24015 XBA84230.1 > PP810610 22630 p10_K22 C,T 0 synonymous_variant 2136C>T Tyr712Tyr 712 1173 Gene_20494_24015 XBA84230.1 > PP810610 22630 p10_K7523 C,T 0 synonymous_variant 2136C>T Tyr712Tyr 712 1173 Gene_20494_24015 XBA84230.1 > PP810610 22788 hCoV229E_Rluc T,C 0 missense_variant 2294T>C Val765Ala 765 1173 Gene_20494_24015 XBA84230.1 > PP810610 22788 p10_DMSO T,C 0 missense_variant 2294T>C Val765Ala 765 1173 Gene_20494_24015 XBA84230.1 > PP810610 22788 p10_K22 T,C 0 missense_variant 2294T>C Val765Ala 765 1173 Gene_20494_24015 XBA84230.1 > PP810610 22788 p10_K7523 T,C 0 missense_variant 2294T>C Val765Ala 765 1173 Gene_20494_24015 XBA84230.1 > PP810610 22820 hCoV229E_Rluc C,T 0 missense_variant 2326C>T Arg776Cys 776 1173 Gene_20494_24015 XBA84230.1 > PP810610 22820 p10_DMSO C,T 0 missense_variant 2326C>T Arg776Cys 776 1173 Gene_20494_24015 XBA84230.1 > PP810610 22820 p10_K22 C,T 0 missense_variant 2326C>T Arg776Cys 776 1173 Gene_20494_24015 XBA84230.1 > PP810610 22820 p10_K7523 C,T 0 missense_variant 2326C>T Arg776Cys 776 1173 Gene_20494_24015 XBA84230.1 > PP810610 22906 hCoV229E_Rluc C,T 0 synonymous_variant 2412C>T Asn804Asn 804 1173 Gene_20494_24015 XBA84230.1 > PP810610 22906 p10_DMSO C,T 0 synonymous_variant 2412C>T Asn804Asn 804 1173 Gene_20494_24015 XBA84230.1 > PP810610 22906 p10_K22 C,T 0 synonymous_variant 2412C>T Asn804Asn 804 1173 Gene_20494_24015 XBA84230.1 > PP810610 22906 p10_K7523 C,T 0 synonymous_variant 2412C>T Asn804Asn 804 1173 Gene_20494_24015 XBA84230.1 > PP810610 22918 hCoV229E_Rluc C,A 0 synonymous_variant 2424C>A Ala808Ala 808 1173 Gene_20494_24015 XBA84230.1 > PP810610 22918 p10_DMSO C,A 0 synonymous_variant 2424C>A Ala808Ala 808 1173 Gene_20494_24015 XBA84230.1 > PP810610 22918 p10_K22 C,A 0 synonymous_variant 2424C>A Ala808Ala 808 1173 Gene_20494_24015 XBA84230.1 > PP810610 22918 p10_K7523 C,A 0 synonymous_variant 2424C>A Ala808Ala 808 1173 Gene_20494_24015 XBA84230.1 *3* 143c232 T Leu981Phe 981 1173 Gene_20494_24015 XBA84230.1 — > PP810610 23435 p10_K22 C,T 1 missense_variant 2941C>T Leu981Phe 981 1173 Gene_20494_24015 XBA84230.1 144a234,237 > PP810610 23586 hCoV229E_Rluc C,A 0 missense_variant 3092C>A Pro1031Gln 1031 1173 Gene_20494_24015 XBA84230.1 > PP810610 23586 p10_DMSO C,A 0 missense_variant 3092C>A Pro1031Gln 1031 1173 Gene_20494_24015 XBA84230.1 > PP810610 23586 p10_K22 C,A 0 missense_variant 3092C>A Pro1031Gln 1031 1173 Gene_20494_24015 XBA84230.1 > PP810610 23586 p10_K7523 C,A 0 missense_variant 3092C>A Pro1031Gln 1031 1173 Gene_20494_24015 XBA84230.1 ‘-minus this record-‘ 149,152c242,249 T,64C>A Leu22Phe,Leu22Ile 22 77 Gene_24674_24907 XBA84233.1 T,64C>A Leu22Phe,Leu22Ile 22 77 Gene_24674_24907 XBA84233.1 T,64C>A Leu22Phe,Leu22Ile 22 77 Gene_24674_24907 XBA84233.1 T,64C>A Leu22Phe,Leu22Ile 22 77 Gene_24674_24907 XBA84233.1 — > PP810610 24738 hCoV229E_Rluc C,A,T 0 missense_variant 64C>A,64C>T Leu22Ile,Leu22Phe 22 77 Gene_24674_24907 XBA84233.1 > PP810610 24738 p10_DMSO C,A,T 0.011 missense_variant 64C>A,64C>T Leu22Ile,Leu22Phe 22 77 Gene_24674_24907 XBA84233.1 > PP810610 24738 p10_K22 C,A,T 1 missense_variant 64C>A,64C>T Leu22Ile,Leu22Phe 22 77 Gene_24674_24907 XBA84233.1 > PP810610 24738 p10_K7523 C,A,T 0.106 missense_variant 64C>A,64C>T Leu22Ile,Leu22Phe 22 77 Gene_24674_24907 XBA84233.1 > PP810610 24903 hCoV229E_Rluc T,C 0 missense_variant 229T>C Phe77Leu 77 77 Gene_24674_24907 XBA84233.1 > PP810610 24903 p10_DMSO T,C 0 missense_variant 229T>C Phe77Leu 77 77 Gene_24674_24907 XBA84233.1 > PP810610 24903 p10_K22 T,C 0 missense_variant 229T>C Phe77Leu 77 77 Gene_24674_24907 XBA84233.1 > PP810610 24903 p10_K7523 T,C 0 missense_variant 229T>C Phe77Leu 77 77 Gene_24674_24907 XBA84233.1 *4* 154c251 T His36Tyr 36 225 Gene_24919_25596 XBA84234.1 — > PP810610 25025 p10_DMSO C,T 0.049 missense_variant 106C>T His36Tyr 36 225 Gene_24919_25596 XBA84234.1 156a254,257 > PP810610 25432 hCoV229E_Rluc C,A 0 synonymous_variant 513C>A Ala171Ala 171 225 Gene_24919_25596 XBA84234.1 > PP810610 25432 p10_DMSO C,A 0 synonymous_variant 513C>A Ala171Ala 171 225 Gene_24919_25596 XBA84234.1 > PP810610 25432 p10_K22 C,A 0 synonymous_variant 513C>A Ala171Ala 171 225 Gene_24919_25596 XBA84234.1 > PP810610 25432 p10_K7523 C,A 0 synonymous_variant 513C>A Ala171Ala 171 225 Gene_24919_25596 XBA84234.1 161,164c262,289 < PP810610 26885 hCoV229E_Rluc T,A 0 intergenic_region < PP810610 26885 p10_DMSO T,A 0 intergenic_region < PP810610 26885 p10_K22 T,A 0.009 intergenic_region PP810610 26104 hCoV229E_Rluc C,T,A 0 missense_variant 494C>A,494C>T Pro165His,Pro165Leu 165 389 Gene_25610_26779 XBA84235.1 > PP810610 26104 p10_DMSO C,T,A 0 missense_variant 494C>A,494C>T Pro165His,Pro165Leu 165 389 Gene_25610_26779 XBA84235.1 > PP810610 26104 p10_K22 C,T,A 0 missense_variant 494C>A,494C>T Pro165His,Pro165Leu 165 389 Gene_25610_26779 XBA84235.1 > PP810610 26104 p10_K7523 C,T,A 0 missense_variant 494C>A,494C>T Pro165His,Pro165Leu 165 389 Gene_25610_26779 XBA84235.1 > PP810610 26281 hCoV229E_Rluc C,T 0 missense_variant 671C>T Thr224Ile 224 389 Gene_25610_26779 XBA84235.1 > PP810610 26281 p10_DMSO C,T 0 missense_variant 671C>T Thr224Ile 224 389 Gene_25610_26779 XBA84235.1 > PP810610 26281 p10_K22 C,T 0 missense_variant 671C>T Thr224Ile 224 389 Gene_25610_26779 XBA84235.1 > PP810610 26281 p10_K7523 C,T 0 missense_variant 671C>T Thr224Ile 224 389 Gene_25610_26779 XBA84235.1 > PP810610 26307 hCoV229E_Rluc C,A 0 missense_variant 697C>A Gln233Lys 233 389 Gene_25610_26779 XBA84235.1 > PP810610 26307 p10_DMSO C,A 0 missense_variant 697C>A Gln233Lys 233 389 Gene_25610_26779 XBA84235.1 > PP810610 26307 p10_K22 C,A 0 missense_variant 697C>A Gln233Lys 233 389 Gene_25610_26779 XBA84235.1 > PP810610 26307 p10_K7523 C,A 0 missense_variant 697C>A Gln233Lys 233 389 Gene_25610_26779 XBA84235.1 > PP810610 26411 hCoV229E_Rluc C,A 0 synonymous_variant 801C>A Pro267Pro 267 389 Gene_25610_26779 XBA84235.1 > PP810610 26411 p10_DMSO C,A 0 synonymous_variant 801C>A Pro267Pro 267 389 Gene_25610_26779 XBA84235.1 > PP810610 26411 p10_K22 C,A 0 synonymous_variant 801C>A Pro267Pro 267 389 Gene_25610_26779 XBA84235.1 > PP810610 26411 p10_K7523 C,A 0 synonymous_variant 801C>A Pro267Pro 267 389 Gene_25610_26779 XBA84235.1 > PP810610 26500 hCoV229E_Rluc C,A 0 missense_variant 890C>A Pro297Gln 297 389 Gene_25610_26779 XBA84235.1 > PP810610 26500 p10_DMSO C,A 0 missense_variant 890C>A Pro297Gln 297 389 Gene_25610_26779 XBA84235.1 > PP810610 26500 p10_K22 C,A 0 missense_variant 890C>A Pro297Gln 297 389 Gene_25610_26779 XBA84235.1 > PP810610 26500 p10_K7523 C,A 0 missense_variant 890C>A Pro297Gln 297 389 Gene_25610_26779 XBA84235.1 > PP810610 26746 hCoV229E_Rluc C,A 0 missense_variant 1136C>A Ser379Tyr 379 389 Gene_25610_26779 XBA84235.1 > PP810610 26746 p10_DMSO C,A 0 missense_variant 1136C>A Ser379Tyr 379 389 Gene_25610_26779 XBA84235.1 > PP810610 26746 p10_K22 C,A 0 missense_variant 1136C>A Ser379Tyr 379 389 Gene_25610_26779 XBA84235.1 > PP810610 26746 p10_K7523 C,A 0 missense_variant 1136C>A Ser379Tyr 379 389 Gene_25610_26779 XBA84235.1 > PP810610 26885 hCoV229E_Rluc T,A 0 intergenic_region n.26885T>A Gene_25610_26779-CHR_END Gene_25610_26779-CHR_END > PP810610 26885 p10_DMSO T,A 0 intergenic_region n.26885T>A Gene_25610_26779-CHR_END Gene_25610_26779-CHR_END > PP810610 26885 p10_K22 T,A 0.009 intergenic_region n.26885T>A Gene_25610_26779-CHR_END Gene_25610_26779-CHR_END > PP810610 26885 p10_K7523 T,A 0.011 intergenic_region n.26885T>A Gene_25610_26779-CHR_END Gene_25610_26779-CHR_END TODOs: Schnaps-Idee: we can organize the results with a additional column 2025, so at the end: #chr pos n.a. alleles iSNV_freq eff_type eff_codon_dna eff_aa eff_aa_pos eff_prot_len eff_gene eff_protein #chr pos sample2024 sample2025 alleles iSNV_freq eff_type eff_codon_dna eff_aa eff_aa_pos eff_prot_len eff_gene eff_protein hCoV229E_Rluc that means, we need to delete the SNP results of 2025 for hCoV229E_Rluc, p10_DMSO, p10_K22, p10_K7523. we have only three new samples of data. If the SNP ist complete new in 2025, the 2024 data should be all ‘0’ For the please generate the report according to the SNP-comparison between 2024 and 2025: !!!!TODO_TOMORROW!!!!: 1. Using the following report, however copy the results of 2024 to new table so that we can unify the results! Marking all new added results yellow. 2. If the SNP ist complete new in 2025, the 2024 data should be all ‘0’, should all 7 mark yellow. 3. One for 0.01 and one for 0.05, in this way, we can also present the results 2024_0.01. 4. Copy the pipeline process to xgenes.com! 2024: chr pos sample alleles iSNV_freq eff_type eff_codon_dna eff_aa eff_aa_pos eff_prot_len eff_gene eff_protein PP810610 1492 hCoV229E_Rluc T,A 0.207 synonymous_variant 1275T>A Thr425Thr 425 6758 Gene_217_20492 XBA84229.1 PP810610 1492 p10_DMSO T,A 0.081 synonymous_variant 1275T>A Thr425Thr 425 6758 Gene_217_20492 XBA84229.1 PP810610 1492 p10_K22 T,A 0.854 synonymous_variant 1275T>A Thr425Thr 425 6758 Gene_217_20492 XBA84229.1 PP810610 1492 p10_K7523 T,A 0.229 synonymous_variant 1275T>A Thr425Thr 425 6758 Gene_217_20492 XBA84229.1 PP810610 8289 hCoV229E_Rluc C,A 0.325 missense_variant 8072C>A Ala2691Asp 2691 6758 Gene_217_20492 XBA84229.1 PP810610 8289 p10_DMSO C,A 0.028 missense_variant 8072C>A Ala2691Asp 2691 6758 Gene_217_20492 XBA84229.1 PP810610 8289 p10_K22 C,A 0 missense_variant 8072C>A Ala2691Asp 2691 6758 Gene_217_20492 XBA84229.1 PP810610 8289 p10_K7523 C,A 0.831 missense_variant 8072C>A Ala2691Asp 2691 6758 Gene_217_20492 XBA84229.1 PP810610 8294 hCoV229E_Rluc A,G 0.179 missense_variant 8077A>G Lys2693Glu 2693 6758 Gene_217_20492 XBA84229.1 PP810610 8294 p10_DMSO A,G 0.024 missense_variant 8077A>G Lys2693Glu 2693 6758 Gene_217_20492 XBA84229.1 PP810610 8294 p10_K22 A,G 0.074 missense_variant 8077A>G Lys2693Glu 2693 6758 Gene_217_20492 XBA84229.1 PP810610 8294 p10_K7523 A,G 0 missense_variant 8077A>G Lys2693Glu 2693 6758 Gene_217_20492 XBA84229.1 PP810610 9174 hCoV229E_Rluc G,A 0 missense_variant 8957G>A Cys2986Tyr 2986 6758 Gene_217_20492 XBA84229.1 PP810610 9174 p10_DMSO G,A 0 missense_variant 8957G>A Cys2986Tyr 2986 6758 Gene_217_20492 XBA84229.1 PP810610 9174 p10_K22 G,A 0 missense_variant 8957G>A Cys2986Tyr 2986 6758 Gene_217_20492 XBA84229.1 PP810610 9174 p10_K7523 G,A 0.066 missense_variant 8957G>A Cys2986Tyr 2986 6758 Gene_217_20492 XBA84229.1 PP810610 10239 hCoV229E_Rluc T,G 0 missense_variant 10022T>G Val3341Gly 3341 6758 Gene_217_20492 XBA84229.1 PP810610 10239 p10_DMSO T,G 0 missense_variant 10022T>G Val3341Gly 3341 6758 Gene_217_20492 XBA84229.1 PP810610 10239 p10_K22 T,G 0.055 missense_variant 10022T>G Val3341Gly 3341 6758 Gene_217_20492 XBA84229.1 PP810610 10239 p10_K7523 T,G 0 missense_variant 10022T>G Val3341Gly 3341 6758 Gene_217_20492 XBA84229.1 PP810610 10310 hCoV229E_Rluc G,A 0 missense_variant 10093G>A Val3365Ile 3365 6758 Gene_217_20492 XBA84229.1 PP810610 10310 p10_DMSO G,A 0 missense_variant 10093G>A Val3365Ile 3365 6758 Gene_217_20492 XBA84229.1 PP810610 10310 p10_K22 G,A 0 missense_variant 10093G>A Val3365Ile 3365 6758 Gene_217_20492 XBA84229.1 PP810610 10310 p10_K7523 G,A 0.156 missense_variant 10093G>A Val3365Ile 3365 6758 Gene_217_20492 XBA84229.1 PP810610 10871 hCoV229E_Rluc C,T 0.006 missense_variant 10654C>T Leu3552Phe 3552 6758 Gene_217_20492 XBA84229.1 PP810610 10871 p10_DMSO C,T 0.058 missense_variant 10654C>T Leu3552Phe 3552 6758 Gene_217_20492 XBA84229.1 PP810610 10871 p10_K22 C,T 1 missense_variant 10654C>T Leu3552Phe 3552 6758 Gene_217_20492 XBA84229.1 PP810610 10871 p10_K7523 C,T 0.823 missense_variant 10654C>T Leu3552Phe 3552 6758 Gene_217_20492 XBA84229.1 PP810610 10898 hCoV229E_Rluc G,A 0.012 missense_variant 10681G>A Gly3561Ser 3561 6758 Gene_217_20492 XBA84229.1 PP810610 10898 p10_DMSO G,A 0.036 missense_variant 10681G>A Gly3561Ser 3561 6758 Gene_217_20492 XBA84229.1 PP810610 10898 p10_K22 G,A 0 missense_variant 10681G>A Gly3561Ser 3561 6758 Gene_217_20492 XBA84229.1 PP810610 10898 p10_K7523 G,A 0.064 missense_variant 10681G>A Gly3561Ser 3561 6758 Gene_217_20492 XBA84229.1 PP810610 11577 hCoV229E_Rluc A,C 0 missense_variant 11360A>C Glu3787Ala 3787 6758 Gene_217_20492 XBA84229.1 PP810610 11577 p10_DMSO A,C 0.184 missense_variant 11360A>C Glu3787Ala 3787 6758 Gene_217_20492 XBA84229.1 PP810610 11577 p10_K22 A,C 0 missense_variant 11360A>C Glu3787Ala 3787 6758 Gene_217_20492 XBA84229.1 PP810610 11577 p10_K7523 A,C 0 missense_variant 11360A>C Glu3787Ala 3787 6758 Gene_217_20492 XBA84229.1 PP810610 18640 hCoV229E_Rluc T,G 0 missense_variant 18424T>G Phe6142Val 6142 6758 Gene_217_20492 XBA84229.1 PP810610 18640 p10_DMSO T,G 0 missense_variant 18424T>G Phe6142Val 6142 6758 Gene_217_20492 XBA84229.1 PP810610 18640 p10_K22 T,G 0 missense_variant 18424T>G Phe6142Val 6142 6758 Gene_217_20492 XBA84229.1 PP810610 18640 p10_K7523 T,G 0.055 missense_variant 18424T>G Phe6142Val 6142 6758 Gene_217_20492 XBA84229.1 PP810610 21027 hCoV229E_Rluc C,T 0 missense_variant 533C>T Thr178Ile 178 1173 Gene_20494_24015 XBA84230.1 PP810610 21027 p10_DMSO C,T 0.186 missense_variant 533C>T Thr178Ile 178 1173 Gene_20494_24015 XBA84230.1 PP810610 21027 p10_K22 C,T 0 missense_variant 533C>T Thr178Ile 178 1173 Gene_20494_24015 XBA84230.1 PP810610 21027 p10_K7523 C,T 0.032 missense_variant 533C>T Thr178Ile 178 1173 Gene_20494_24015 XBA84230.1 PP810610 21633 hCoV229E_Rluc T,C 0 missense_variant 1139T>C Val380Ala 380 1173 Gene_20494_24015 XBA84230.1 PP810610 21633 p10_DMSO T,C 0.08 missense_variant 1139T>C Val380Ala 380 1173 Gene_20494_24015 XBA84230.1 PP810610 21633 p10_K22 T,C 0 missense_variant 1139T>C Val380Ala 380 1173 Gene_20494_24015 XBA84230.1 PP810610 21633 p10_K7523 T,C 0 missense_variant 1139T>C Val380Ala 380 1173 Gene_20494_24015 XBA84230.1 PP810610 22215 hCoV229E_Rluc T,G 0 missense_variant 1721T>G Val574Gly 574 1173 Gene_20494_24015 XBA84230.1 PP810610 22215 p10_DMSO T,G 0 missense_variant 1721T>G Val574Gly 574 1173 Gene_20494_24015 XBA84230.1 PP810610 22215 p10_K22 T,G 0 missense_variant 1721T>G Val574Gly 574 1173 Gene_20494_24015 XBA84230.1 PP810610 22215 p10_K7523 T,G 0.078 missense_variant 1721T>G Val574Gly 574 1173 Gene_20494_24015 XBA84230.1 PP810610 23435 hCoV229E_Rluc C,T 0 missense_variant 2941C>T Leu981Phe 981 1173 Gene_20494_24015 XBA84230.1 PP810610 23435 p10_DMSO C,T 0.032 missense_variant 2941C>T Leu981Phe 981 1173 Gene_20494_24015 XBA84230.1 PP810610 23435 p10_K22 C,T 0.995 missense_variant 2941C>T Leu981Phe 981 1173 Gene_20494_24015 XBA84230.1 PP810610 23435 p10_K7523 C,T 0.835 missense_variant 2941C>T Leu981Phe 981 1173 Gene_20494_24015 XBA84230.1 PP810610 25592 hCoV229E_Rluc T,C 0.012 missense_variant 673T>C Phe225Leu 225 225 Gene_24919_25596 XBA84234.1 PP810610 25592 p10_DMSO T,C 0.925 missense_variant 673T>C Phe225Leu 225 225 Gene_24919_25596 XBA84234.1 PP810610 25592 p10_K22 T,C 0 missense_variant 673T>C Phe225Leu 225 225 Gene_24919_25596 XBA84234.1 PP810610 25592 p10_K7523 T,C 0 missense_variant 673T>C Phe225Leu 225 225 Gene_24919_25596 XBA84234.1 2025: chr pos sample alleles iSNV_freq eff_type eff_codon_dna eff_aa eff_aa_pos eff_prot_len eff_gene eff_protein PP810610 1492 hCoV229E_Rluc T,A 0.207 synonymous_variant 1275T>A Thr425Thr 425 6758 Gene_217_20492 XBA84229.1 PP810610 1492 p10_DMSO T,A 0.081 synonymous_variant 1275T>A Thr425Thr 425 6758 Gene_217_20492 XBA84229.1 PP810610 1492 p10_K22 T,A 0.854 synonymous_variant 1275T>A Thr425Thr 425 6758 Gene_217_20492 XBA84229.1 PP810610 1492 p10_K7523 T,A 0.229 synonymous_variant 1275T>A Thr425Thr 425 6758 Gene_217_20492 XBA84229.1 PP810610 1492 p16_DMSO T,A 0.043 synonymous_variant 1275T>A Thr425Thr 425 6758 Gene_217_20492 XBA84229.1 PP810610 1492 p16_K22 T,A 0.893 synonymous_variant 1275T>A Thr425Thr 425 6758 Gene_217_20492 XBA84229.1 PP810610 1492 p16_X7523 T,A 0.179 synonymous_variant 1275T>A Thr425Thr 425 6758 Gene_217_20492 XBA84229.1 PP810610 8289 hCoV229E_Rluc C,A 0.325 missense_variant 8072C>A Ala2691Asp 2691 6758 Gene_217_20492 XBA84229.1 PP810610 8289 p10_DMSO C,A 0.028 missense_variant 8072C>A Ala2691Asp 2691 6758 Gene_217_20492 XBA84229.1 PP810610 8289 p10_K22 C,A 0 missense_variant 8072C>A Ala2691Asp 2691 6758 Gene_217_20492 XBA84229.1 PP810610 8289 p10_K7523 C,A 0.831 missense_variant 8072C>A Ala2691Asp 2691 6758 Gene_217_20492 XBA84229.1 PP810610 8289 p16_DMSO C,A 0 missense_variant 8072C>A Ala2691Asp 2691 6758 Gene_217_20492 XBA84229.1 PP810610 8289 p16_K22 C,A 0 missense_variant 8072C>A Ala2691Asp 2691 6758 Gene_217_20492 XBA84229.1 PP810610 8289 p16_X7523 C,A 0.226 missense_variant 8072C>A Ala2691Asp 2691 6758 Gene_217_20492 XBA84229.1 PP810610 8294 hCoV229E_Rluc A,G 0.179 missense_variant 8077A>G Lys2693Glu 2693 6758 Gene_217_20492 XBA84229.1 PP810610 8294 p10_DMSO A,G 0.024 missense_variant 8077A>G Lys2693Glu 2693 6758 Gene_217_20492 XBA84229.1 PP810610 8294 p10_K22 A,G 0.074 missense_variant 8077A>G Lys2693Glu 2693 6758 Gene_217_20492 XBA84229.1 PP810610 8294 p10_K7523 A,G 0 missense_variant 8077A>G Lys2693Glu 2693 6758 Gene_217_20492 XBA84229.1 PP810610 8294 p16_DMSO A,G 0 missense_variant 8077A>G Lys2693Glu 2693 6758 Gene_217_20492 XBA84229.1 PP810610 8294 p16_K22 A,G 0.145 missense_variant 8077A>G Lys2693Glu 2693 6758 Gene_217_20492 XBA84229.1 PP810610 8294 p16_X7523 A,G 0 missense_variant 8077A>G Lys2693Glu 2693 6758 Gene_217_20492 XBA84229.1 PP810610 9174 hCoV229E_Rluc G,A 0 missense_variant 8957G>A Cys2986Tyr 2986 6758 Gene_217_20492 XBA84229.1 PP810610 9174 p10_DMSO G,A 0 missense_variant 8957G>A Cys2986Tyr 2986 6758 Gene_217_20492 XBA84229.1 PP810610 9174 p10_K22 G,A 0 missense_variant 8957G>A Cys2986Tyr 2986 6758 Gene_217_20492 XBA84229.1 PP810610 9174 p10_K7523 G,A 0.066 missense_variant 8957G>A Cys2986Tyr 2986 6758 Gene_217_20492 XBA84229.1 PP810610 9174 p16_DMSO G,A 0 missense_variant 8957G>A Cys2986Tyr 2986 6758 Gene_217_20492 XBA84229.1 PP810610 9174 p16_K22 G,A 0 missense_variant 8957G>A Cys2986Tyr 2986 6758 Gene_217_20492 XBA84229.1 PP810610 9174 p16_X7523 G,A 0.025 missense_variant 8957G>A Cys2986Tyr 2986 6758 Gene_217_20492 XBA84229.1 PP810610 10145 hCoV229E_Rluc A,G 0 missense_variant 9928A>G Met3310Val 3310 6758 Gene_217_20492 XBA84229.1 PP810610 10145 p10_DMSO A,G 0 missense_variant 9928A>G Met3310Val 3310 6758 Gene_217_20492 XBA84229.1 PP810610 10145 p10_K22 A,G 0 missense_variant 9928A>G Met3310Val 3310 6758 Gene_217_20492 XBA84229.1 PP810610 10145 p10_K7523 A,G 0.045 missense_variant 9928A>G Met3310Val 3310 6758 Gene_217_20492 XBA84229.1 PP810610 10145 p16_DMSO A,G 0 missense_variant 9928A>G Met3310Val 3310 6758 Gene_217_20492 XBA84229.1 PP810610 10145 p16_K22 A,G 0 missense_variant 9928A>G Met3310Val 3310 6758 Gene_217_20492 XBA84229.1 PP810610 10145 p16_X7523 A,G 0.064 missense_variant 9928A>G Met3310Val 3310 6758 Gene_217_20492 XBA84229.1 PP810610 10239 hCoV229E_Rluc T,G 0 missense_variant 10022T>G Val3341Gly 3341 6758 Gene_217_20492 XBA84229.1 PP810610 10239 p10_DMSO T,G 0 missense_variant 10022T>G Val3341Gly 3341 6758 Gene_217_20492 XBA84229.1 PP810610 10239 p10_K22 T,G 0.055 missense_variant 10022T>G Val3341Gly 3341 6758 Gene_217_20492 XBA84229.1 PP810610 10239 p10_K7523 T,G 0 missense_variant 10022T>G Val3341Gly 3341 6758 Gene_217_20492 XBA84229.1 PP810610 10239 p16_DMSO T,G 0 missense_variant 10022T>G Val3341Gly 3341 6758 Gene_217_20492 XBA84229.1 PP810610 10239 p16_K22 T,G 0.08 missense_variant 10022T>G Val3341Gly 3341 6758 Gene_217_20492 XBA84229.1 PP810610 10239 p16_X7523 T,G 0 missense_variant 10022T>G Val3341Gly 3341 6758 Gene_217_20492 XBA84229.1 PP810610 10310 hCoV229E_Rluc G,A 0 missense_variant 10093G>A Val3365Ile 3365 6758 Gene_217_20492 XBA84229.1 PP810610 10310 p10_DMSO G,A 0 missense_variant 10093G>A Val3365Ile 3365 6758 Gene_217_20492 XBA84229.1 PP810610 10310 p10_K22 G,A 0 missense_variant 10093G>A Val3365Ile 3365 6758 Gene_217_20492 XBA84229.1 PP810610 10310 p10_K7523 G,A 0.156 missense_variant 10093G>A Val3365Ile 3365 6758 Gene_217_20492 XBA84229.1 PP810610 10310 p16_DMSO G,A 0 missense_variant 10093G>A Val3365Ile 3365 6758 Gene_217_20492 XBA84229.1 PP810610 10310 p16_K22 G,A 0 missense_variant 10093G>A Val3365Ile 3365 6758 Gene_217_20492 XBA84229.1 PP810610 10310 p16_X7523 G,A 0.091 missense_variant 10093G>A Val3365Ile 3365 6758 Gene_217_20492 XBA84229.1 PP810610 10871 hCoV229E_Rluc C,T 0.006 missense_variant 10654C>T Leu3552Phe 3552 6758 Gene_217_20492 XBA84229.1 PP810610 10871 p10_DMSO C,T 0.058 missense_variant 10654C>T Leu3552Phe 3552 6758 Gene_217_20492 XBA84229.1 PP810610 10871 p10_K22 C,T 1 missense_variant 10654C>T Leu3552Phe 3552 6758 Gene_217_20492 XBA84229.1 PP810610 10871 p10_K7523 C,T 0.823 missense_variant 10654C>T Leu3552Phe 3552 6758 Gene_217_20492 XBA84229.1 PP810610 10871 p16_DMSO C,T 0.029 missense_variant 10654C>T Leu3552Phe 3552 6758 Gene_217_20492 XBA84229.1 PP810610 10871 p16_K22 C,T 0.991 missense_variant 10654C>T Leu3552Phe 3552 6758 Gene_217_20492 XBA84229.1 PP810610 10871 p16_X7523 C,T 0.878 missense_variant 10654C>T Leu3552Phe 3552 6758 Gene_217_20492 XBA84229.1 PP810610 10898 hCoV229E_Rluc G,A 0.012 missense_variant 10681G>A Gly3561Ser 3561 6758 Gene_217_20492 XBA84229.1 PP810610 10898 p10_DMSO G,A 0.036 missense_variant 10681G>A Gly3561Ser 3561 6758 Gene_217_20492 XBA84229.1 PP810610 10898 p10_K22 G,A 0 missense_variant 10681G>A Gly3561Ser 3561 6758 Gene_217_20492 XBA84229.1 PP810610 10898 p10_K7523 G,A 0.062 missense_variant 10681G>A Gly3561Ser 3561 6758 Gene_217_20492 XBA84229.1 PP810610 10898 p16_DMSO G,A 0.018 missense_variant 10681G>A Gly3561Ser 3561 6758 Gene_217_20492 XBA84229.1 PP810610 10898 p16_K22 G,A 0 missense_variant 10681G>A Gly3561Ser 3561 6758 Gene_217_20492 XBA84229.1 PP810610 10898 p16_X7523 G,A 0.044 missense_variant 10681G>A Gly3561Ser 3561 6758 Gene_217_20492 XBA84229.1 PP810610 11577 hCoV229E_Rluc A,C 0 missense_variant 11360A>C Glu3787Ala 3787 6758 Gene_217_20492 XBA84229.1 PP810610 11577 p10_DMSO A,C 0.184 missense_variant 11360A>C Glu3787Ala 3787 6758 Gene_217_20492 XBA84229.1 PP810610 11577 p10_K22 A,C 0 missense_variant 11360A>C Glu3787Ala 3787 6758 Gene_217_20492 XBA84229.1 PP810610 11577 p10_K7523 A,C 0 missense_variant 11360A>C Glu3787Ala 3787 6758 Gene_217_20492 XBA84229.1 PP810610 11577 p16_DMSO A,C 0.946 missense_variant 11360A>C Glu3787Ala 3787 6758 Gene_217_20492 XBA84229.1 PP810610 11577 p16_K22 A,C 0 missense_variant 11360A>C Glu3787Ala 3787 6758 Gene_217_20492 XBA84229.1 PP810610 11577 p16_X7523 A,C 0 missense_variant 11360A>C Glu3787Ala 3787 6758 Gene_217_20492 XBA84229.1 PP810610 18640 hCoV229E_Rluc T,G 0 missense_variant 18424T>G Phe6142Val 6142 6758 Gene_217_20492 XBA84229.1 PP810610 18640 p10_DMSO T,G 0 missense_variant 18424T>G Phe6142Val 6142 6758 Gene_217_20492 XBA84229.1 PP810610 18640 p10_K22 T,G 0 missense_variant 18424T>G Phe6142Val 6142 6758 Gene_217_20492 XBA84229.1 PP810610 18640 p10_K7523 T,G 0.055 missense_variant 18424T>G Phe6142Val 6142 6758 Gene_217_20492 XBA84229.1 PP810610 18640 p16_DMSO T,G 0 missense_variant 18424T>G Phe6142Val 6142 6758 Gene_217_20492 XBA84229.1 PP810610 18640 p16_K22 T,G 0 missense_variant 18424T>G Phe6142Val 6142 6758 Gene_217_20492 XBA84229.1 PP810610 18640 p16_X7523 T,G 0.183 missense_variant 18424T>G Phe6142Val 6142 6758 Gene_217_20492 XBA84229.1 PP810610 19289 hCoV229E_Rluc G,T 0 missense_variant 19073G>T Gly6358Val 6358 6758 Gene_217_20492 XBA84229.1 PP810610 19289 p10_DMSO G,T 0 missense_variant 19073G>T Gly6358Val 6358 6758 Gene_217_20492 XBA84229.1 PP810610 19289 p10_K22 G,T 1 missense_variant 19073G>T Gly6358Val 6358 6758 Gene_217_20492 XBA84229.1 PP810610 19289 p10_K7523 G,T 0 missense_variant 19073G>T Gly6358Val 6358 6758 Gene_217_20492 XBA84229.1 PP810610 19289 p16_DMSO G,T 0 missense_variant 19073G>T Gly6358Val 6358 6758 Gene_217_20492 XBA84229.1 PP810610 19289 p16_K22 G,T 0.988 missense_variant 19073G>T Gly6358Val 6358 6758 Gene_217_20492 XBA84229.1 PP810610 19289 p16_X7523 G,T 0 missense_variant 19073G>T Gly6358Val 6358 6758 Gene_217_20492 XBA84229.1 PP810610 21027 hCoV229E_Rluc C,T 0 missense_variant 533C>T Thr178Ile 178 1173 Gene_20494_24015 XBA84230.1 PP810610 21027 p10_DMSO C,T 0.186 missense_variant 533C>T Thr178Ile 178 1173 Gene_20494_24015 XBA84230.1 PP810610 21027 p10_K22 C,T 0 missense_variant 533C>T Thr178Ile 178 1173 Gene_20494_24015 XBA84230.1 PP810610 21027 p10_K7523 C,T 0.032 missense_variant 533C>T Thr178Ile 178 1173 Gene_20494_24015 XBA84230.1 PP810610 21027 p16_DMSO C,T 0.954 missense_variant 533C>T Thr178Ile 178 1173 Gene_20494_24015 XBA84230.1 PP810610 21027 p16_K22 C,T 0.009 missense_variant 533C>T Thr178Ile 178 1173 Gene_20494_24015 XBA84230.1 PP810610 21027 p16_X7523 C,T 0.158 missense_variant 533C>T Thr178Ile 178 1173 Gene_20494_24015 XBA84230.1 PP810610 21633 hCoV229E_Rluc T,C 0 missense_variant 1139T>C Val380Ala 380 1173 Gene_20494_24015 XBA84230.1 PP810610 21633 p10_DMSO T,C 0.08 missense_variant 1139T>C Val380Ala 380 1173 Gene_20494_24015 XBA84230.1 PP810610 21633 p10_K22 T,C 0 missense_variant 1139T>C Val380Ala 380 1173 Gene_20494_24015 XBA84230.1 PP810610 21633 p10_K7523 T,C 0 missense_variant 1139T>C Val380Ala 380 1173 Gene_20494_24015 XBA84230.1 PP810610 21633 p16_DMSO T,C 0.015 missense_variant 1139T>C Val380Ala 380 1173 Gene_20494_24015 XBA84230.1 PP810610 21633 p16_K22 T,C 0 missense_variant 1139T>C Val380Ala 380 1173 Gene_20494_24015 XBA84230.1 PP810610 21633 p16_X7523 T,C 0 missense_variant 1139T>C Val380Ala 380 1173 Gene_20494_24015 XBA84230.1 PP810610 22215 hCoV229E_Rluc T,G 0 missense_variant 1721T>G Val574Gly 574 1173 Gene_20494_24015 XBA84230.1 PP810610 22215 p10_DMSO T,G 0 missense_variant 1721T>G Val574Gly 574 1173 Gene_20494_24015 XBA84230.1 PP810610 22215 p10_K22 T,G 0 missense_variant 1721T>G Val574Gly 574 1173 Gene_20494_24015 XBA84230.1 PP810610 22215 p10_K7523 T,G 0.078 missense_variant 1721T>G Val574Gly 574 1173 Gene_20494_24015 XBA84230.1 PP810610 22215 p16_DMSO T,G 0 missense_variant 1721T>G Val574Gly 574 1173 Gene_20494_24015 XBA84230.1 PP810610 22215 p16_K22 T,G 0 missense_variant 1721T>G Val574Gly 574 1173 Gene_20494_24015 XBA84230.1 PP810610 22215 p16_X7523 T,G 0.033 missense_variant 1721T>G Val574Gly 574 1173 Gene_20494_24015 XBA84230.1 PP810610 23435 hCoV229E_Rluc C,T 0 missense_variant 2941C>T Leu981Phe 981 1173 Gene_20494_24015 XBA84230.1 PP810610 23435 p10_DMSO C,T 0.032 missense_variant 2941C>T Leu981Phe 981 1173 Gene_20494_24015 XBA84230.1 PP810610 23435 p10_K22 C,T 1 missense_variant 2941C>T Leu981Phe 981 1173 Gene_20494_24015 XBA84230.1 PP810610 23435 p10_K7523 C,T 0.835 missense_variant 2941C>T Leu981Phe 981 1173 Gene_20494_24015 XBA84230.1 PP810610 23435 p16_DMSO C,T 0 missense_variant 2941C>T Leu981Phe 981 1173 Gene_20494_24015 XBA84230.1 PP810610 23435 p16_K22 C,T 0.958 missense_variant 2941C>T Leu981Phe 981 1173 Gene_20494_24015 XBA84230.1 PP810610 23435 p16_X7523 C,T 0.132 missense_variant 2941C>T Leu981Phe 981 1173 Gene_20494_24015 XBA84230.1 PP810610 24738 hCoV229E_Rluc C,A,T 0 missense_variant 64C>A,64C>T Leu22Ile,Leu22Phe 22 77 Gene_24674_24907 XBA84233.1 PP810610 24738 p10_DMSO C,A,T 0.011 missense_variant 64C>A,64C>T Leu22Ile,Leu22Phe 22 77 Gene_24674_24907 XBA84233.1 PP810610 24738 p10_K22 C,A,T 1 missense_variant 64C>A,64C>T Leu22Ile,Leu22Phe 22 77 Gene_24674_24907 XBA84233.1 PP810610 24738 p10_K7523 C,A,T 0.106 missense_variant 64C>A,64C>T Leu22Ile,Leu22Phe 22 77 Gene_24674_24907 XBA84233.1 PP810610 24738 p16_DMSO C,A,T 0 missense_variant 64C>A,64C>T Leu22Ile,Leu22Phe 22 77 Gene_24674_24907 XBA84233.1 PP810610 24738 p16_K22 C,A,T 1 missense_variant 64C>A,64C>T Leu22Ile,Leu22Phe 22 77 Gene_24674_24907 XBA84233.1 PP810610 24738 p16_X7523 C,A,T 0.958 missense_variant 64C>A,64C>T Leu22Ile,Leu22Phe 22 77 Gene_24674_24907 XBA84233.1 PP810610 25025 hCoV229E_Rluc C,T 0 missense_variant 106C>T His36Tyr 36 225 Gene_24919_25596 XBA84234.1 PP810610 25025 p10_DMSO C,T 0.049 missense_variant 106C>T His36Tyr 36 225 Gene_24919_25596 XBA84234.1 PP810610 25025 p10_K22 C,T 0 missense_variant 106C>T His36Tyr 36 225 Gene_24919_25596 XBA84234.1 PP810610 25025 p10_K7523 C,T 0 missense_variant 106C>T His36Tyr 36 225 Gene_24919_25596 XBA84234.1 PP810610 25025 p16_DMSO C,T 0.057 missense_variant 106C>T His36Tyr 36 225 Gene_24919_25596 XBA84234.1 PP810610 25025 p16_K22 C,T 0 missense_variant 106C>T His36Tyr 36 225 Gene_24919_25596 XBA84234.1 PP810610 25025 p16_X7523 C,T 0.016 missense_variant 106C>T His36Tyr 36 225 Gene_24919_25596 XBA84234.1 PP810610 25592 hCoV229E_Rluc T,C 0.012 missense_variant 673T>C Phe225Leu 225 225 Gene_24919_25596 XBA84234.1 PP810610 25592 p10_DMSO T,C 0.925 missense_variant 673T>C Phe225Leu 225 225 Gene_24919_25596 XBA84234.1 PP810610 25592 p10_K22 T,C 0 missense_variant 673T>C Phe225Leu 225 225 Gene_24919_25596 XBA84234.1 PP810610 25592 p10_K7523 T,C 0 missense_variant 673T>C Phe225Leu 225 225 Gene_24919_25596 XBA84234.1 PP810610 25592 p16_DMSO T,C 0.935 missense_variant 673T>C Phe225Leu 225 225 Gene_24919_25596 XBA84234.1 PP810610 25592 p16_K22 T,C 0 missense_variant 673T>C Phe225Leu 225 225 Gene_24919_25596 XBA84234.1 PP810610 25592 p16_X7523 T,C 0.013 missense_variant 673T>C Phe225Leu 225 225 Gene_24919_25596 XBA84234.1 the mail I generate answer: Lieber Jiabin, wir hatten im Sommer 2024 ein Projekt gemeinsam mit Thomas Pietschmann zu 229E Coronavirus, wo wir sequenziert hatten und die Adaption des Virus durch Variantenanalyse bestimmt hatten. Ich habe Dir die alten Auswertungen angehängt. Die Viren sind nun ohne Selektionsdruck weiterpassagiert worden und die Frage ist ob die damals aufgefundenen Mutationen erhalten bleiben oder verloren gehen. Könntest Du diese drei Proben (siehe link von Patrick https://public.leibniz-liv.de/sharing/tuBWPq3ca ) im Vergleich zu dem adaptierten Virus von 2024 analysieren? Viele Grüße Nicole 7. Merge intra- and inter-host variants, comparing the variants to the alignments of the assemblies to confirm its correctness. cat NC_001348.fasta viralngs/data/02_assembly/VZV_20S.fasta viralngs/data/02_assembly/VZV_60S.fasta > aligned_1.fasta mafft –clustalout aligned_1.fasta > aligned_1.aln #~/Scripts/convert_fasta_to_clustal.py aligned_1.fasta_orig aligned_1.aln ~/Scripts/convert_clustal_to_clustal.py aligned_1.aln aligned_1_.aln #manully delete the postion with all or ‘-‘ in aligned_1_.aln ~/Scripts/check_sequence_differences.py aligned_1_.aln ~/Scripts/check_sequence_differences.py aligned_1_.aln > aligned_1.res grep -v ” = n” aligned_1.res > aligned_1_.res cat NC_001348.fasta viralngs/tmp/02_assembly/VZV_20S.assembly4-refined.fasta viralngs/tmp/02_assembly/VZV_60S.assembly4-refined.fasta > aligned_1.fasta mafft –clustalout aligned_1.fasta > aligned_1.aln ~/Scripts/convert_clustal_to_clustal.py aligned_1.aln aligned_1_.aln ~/Scripts/check_sequence_differences.py aligned_1_.aln > aligned_1.res grep -v ” = n” aligned_1.res > aligned_1_.res #Differences found at the following positions (150): Position 8956: OP297860.1 = A, HSV1_S1-1 = A, HSV-Klinik_S2-1 = G Position 8991: OP297860.1 = A, HSV1_S1-1 = A, HSV-Klinik_S2-1 = C Position 8992: OP297860.1 = T, HSV1_S1-1 = C, HSV-Klinik_S2-1 = C Position 8995: OP297860.1 = T, HSV1_S1-1 = T, HSV-Klinik_S2-1 = C Position 9190: OP297860.1 = T, HSV1_S1-1 = A, HSV-Klinik_S2-1 = T * Position 13659: OP297860.1 = G, HSV1_S1-1 = T, HSV-Klinik_S2-1 = G * Position 47969: OP297860.1 = C, HSV1_S1-1 = T, HSV-Klinik_S2-1 = C * Position 53691: OP297860.1 = G, HSV1_S1-1 = T, HSV-Klinik_S2-1 = G * Position 55501: OP297860.1 = T, HSV1_S1-1 = C, HSV-Klinik_S2-1 = C * Position 63248: OP297860.1 = G, HSV1_S1-1 = T, HSV-Klinik_S2-1 = G Position 63799: OP297860.1 = T, HSV1_S1-1 = C, HSV-Klinik_S2-1 = T * Position 64328: OP297860.1 = C, HSV1_S1-1 = A, HSV-Klinik_S2-1 = C Position 65179: OP297860.1 = T, HSV1_S1-1 = T, HSV-Klinik_S2-1 = C * Position 65225: OP297860.1 = G, HSV1_S1-1 = G, HSV-Klinik_S2-1 = A * Position 95302: OP297860.1 = C, HSV1_S1-1 = A, HSV-Klinik_S2-1 = C gunzip isnvs.annot.txt.gz ~/Scripts/filter_isnv.py isnvs.annot.txt 0.05 cut -d$’\t’ filtered_isnvs.annot.txt -f1-7 chr pos sample patient time alleles iSNV_freq OP297860 13203 HSV1_S1 HSV1_S1 T,C,A 1.0 OP297860 13203 HSV-Klinik_S2 HSV-Klinik_S2 T,C,A 1.0 OP297860 13522 HSV1_S1 HSV1_S1 G,T 1.0 OP297860 13522 HSV-Klinik_S2 HSV-Klinik_S2 G,T 0.008905554253573941 OP297860 13659 HSV1_S1 HSV1_S1 G,T 1.0 OP297860 13659 HSV-Klinik_S2 HSV-Klinik_S2 G,T 0.008383233532934131 ~/Scripts/convert_clustal_to_fasta.py aligned_1_.aln aligned_1.fasta samtools faidx aligned_1.fasta samtools faidx aligned_1.fasta OP297860.1 > OP297860.1.fasta samtools faidx aligned_1.fasta HSV1_S1-1 > HSV1_S1-1.fasta samtools faidx aligned_1.fasta HSV-Klinik_S2-1 > HSV-Klinik_S2-1.fasta seqkit seq OP297860.1.fasta -w 70 > OP297860.1_w70.fasta diff OP297860.1_w70.fasta ../../refsel_db/refsel.fasta 8. Consensus sequences of each and of all isolates cp data/02_assembly/*.fasta ./ for sample in 838_S1 840_S2 820_S3 828_S4 815_S5 834_S6 808_S7 811_S8 837_S9 768_S10 773_S11 767_S12 810_S13 814_S14 10121-16_S15 7510-15_S16 828-17_S17 8806-15_S18 9881-16_S19 8981-14_S20; do for sample in p953-84660-tsek p938-16972-nra p942-88507-nra p943-98523-nra p944-103323-nra p947-105565-nra p948-112830-nra; do \ mv ${sample}.fasta ${sample}.fa cat all.fa ${sample}.fa >> all.fa done cat RSV_dedup.fa all.fa > RSV_all.fa mafft –adjustdirection RSV_all.fa > RSV_all.aln snp-sites RSV_all.aln -o RSV_all_.aln 9. Download all Human alphaherpesvirus 3 (Varicella-zoster virus) genomes Human alphaherpesvirus 3 acronym: HHV-3 VZV equivalent: Human herpes virus 3 Human alphaherpesvirus 3 (Varicella-zoster virus) * Human herpesvirus 3 strain Dumas * Human herpesvirus 3 strain Oka vaccine * Human herpesvirus 3 VZV-32 #Taxonomy ID: 10335 esearch -db nucleotide -query “txid10335[Organism:exp]” | efetch -format fasta -email j.huang@uke.de > genome_10335_ncbi.fasta python ~/Scripts/filter_fasta.py genome_10335_ncbi.fasta complete_genome_10335_ncbi.fasta #2041–>165 # —- Download related genomes from ENA —- https://www.ebi.ac.uk/ena/browser/view/10335 #Click “Sequence” and download “Counts” (2003) and “Taxon descendants count” (2005) if there is enough time! Downloading time points is 11.03.2025. python ~/Scripts/filter_fasta.py ena_10335_sequence.fasta complete_genome_10335_ena_taxon_descendants_count.fasta #2005–>153 #python ~/Scripts/filter_fasta.py ena_10335_sequence_Counts.fasta complete_genome_10335_ena_Counts.fasta #xxx, 5.8G https://www.ebi.ac.uk/ena/browser/view/10239 https://www.ebi.ac.uk/ena/browser/view/2497569 https://www.ebi.ac.uk/ena/browser/view/Taxon:2497569 ena_10239_sequence.fasta esearch -db nucleotide -query “txid10239[Organism:exp]” | efetch -format fasta -email j.huang@uke.de > genome_10239_ncbi.fasta 10. Using Multi-CAR for scaffolding the contigs (If not useful, choose another scaffolding tool, e.g. https://github.com/malonge/RagTag) All contigs over 500 bp were successfully scaffolded to the graft genome using Multi-CAR (13), resulting in a chromosomal assembly of 4,506,689 bp. https://genome.cs.nthu.edu.tw/Multi-CAR/ https://github.com/ablab-nthu/Multi-CSAR 11. Using the bowtie of vrap to map the reads on ref_genome/reference.fasta (The reference refers to the closest related genome found from the list generated by vrap) (vrap) vrap/vrap.py -1 trimmed/VZV_20S_trimmed_P_1.fastq -2 trimmed/VZV_20S_trimmed_P_2.fastq -o VZV_20S_on_X04370 –host /home/jhuang/DATA/Data_Huang_Human_herpesvirus_3/X04370.fasta -t 100 -l 200 -g cd bowtie mv mapped mapped.sam samtools view -S -b mapped.sam > mapped.bam samtools sort mapped.bam -o mapped_sorted.bam samtools index mapped_sorted.bam samtools view -H mapped_sorted.bam samtools flagstat mapped_sorted.bam 12. Show the bw on IGV 13. Reports diff data/02_assembly/2040_04.fasta tmp/02_assembly/2040_04.assembly4-refined.fasta diff data/02_assembly/2040_04.fasta tmp/02_assembly/2040_04.assembly1-spades.fasta diff data/02_assembly/2040_04.fasta tmp/02_assembly/2040_04.assembly2-scaffolded.fasta diff data/02_assembly/2040_04.fasta tmp/02_assembly/2040_04.assembly2-gapfilled.fasta diff data/02_assembly/2040_04.fasta tmp/02_assembly/2040_04.assembly3-modify.fasta diff data/02_assembly/2040_04.fasta tmp/02_assembly/2040_04.assembly4-refined.fasta ./2040_04.assembly2-alternate_sequences.fasta ./2040_04.assembly2-scaffold_ref.fasta # —————————————– END —————————————– Lieber Jiabin, wir hatten im Sommer 2024 ein Projekt gemeinsam mit Thomas Pietschmann zu 229E Coronavirus, wo wir sequenziert hatten und die Adaption des Virus durch Variantenanalyse bestimmt hatten. Ich habe Dir die alten Auswertungen angehängt. Die Viren sind nun ohne Selektionsdruck weiterpassagiert worden und die Frage ist ob die damals aufgefundenen Mutationen erhalten bleiben oder verloren gehen. Könntest Du diese drei Proben (siehe link von Patrick https://public.leibniz-liv.de/sharing/tuBWPq3ca) im Vergleich zu dem adaptierten Virus von 2024 analysieren? Viele Grüße Nicole 亲爱的Jiabin, 我们在2024年夏天与Thomas Pietschmann一起进行了一项关于229E冠状病毒的项目,我们进行了测序,并通过变异分析确定了病毒的适应性。我已经将之前的分析结果附在了邮件中。 这些病毒现在已经在没有选择压力的情况下继续通过传代,我们的问题是,之前发现的突变是否会保持下来还是会消失。 你能否分析一下这三份样本(请参阅Patrick提供的链接:https://public.leibniz-liv.de/sharing/tuBWPq3ca),并与2024年适应的病毒进行比较? 此致 Nicole Von: Blümke, Patrick Gesendet: Freitag, 9. Mai 2025 16:26 An: ‘nfischer@uke.de’ Betreff: [EXT] RE: Re: Re: [EXTERN] NGS von adaptiertem hCoV-229E Virus Liebe Nicole, wir haben die 3 RNA-Proben von Thomas Pietschmann mittlerweile sequenziert und du kannst die Daten hier herunterladen: https://public.leibniz-liv.de/sharing/tuBWPq3ca LG und ein schönes Wochenende, Patrick From: nfischer@uke.de Sent: Mittwoch, 23. April 2025 12:42 To: Blümke, Patrick Subject: WG: [EXT] Re: Re: [EXTERN] NGS von adaptiertem hCoV-229E Virus Lieber Patrick, Thomas Pietschmann/Twin Core Hannover hat drei weitere Proben geschickt, RNA aus dem Überstand von passagiertem CoV229E. Er würde sie gerne bei Euch nochmal sequenzieren und Jiabin würde die Auswertung entsprechend der vorherigen Analyse machen. Ich weiß nicht, wie tief Ihr damals sequenziert habt. Ich denke 5Mio reads? Kann ich Euch die RNA bringen und habt Ihr die Kapazität diese Libraries zu machen und zu sequenzieren? LG Nicole Von: KC6hler, Natalie Gesendet: Freitag, 4. April 2025 11:21 An: Nicole Fischer ; Pietschmann, Thomas ; Sibylle Haid ; Nicole Fischer ; Pietschmann, Thomas ; Sibylle Haid Betreff: [EXT] NGS von adaptiertem hCoV-229E Virus Liebe Nicole, vielen Dank nochmal für die Sequenzierung unseres adaptierten hCoV-229E Virus. Die Ergebnisse haben uns sehr weitergeholfen. Wir haben nun unsere adaptierten hCoV-229E Viruspopulationen ohne Selektionsdruck weiter passagiert, um zu schauen, ob unsere Mutationen und die Resistenz wieder verloren geht oder erhalten bleibt. Dazu haben wir auch bereits einen ersten Phänotyp zeigen können, den wir nochmal validieren wollen. Thomas und ich haben uns daher gefragt, ob Ihr nach der Validierung unsere neuen Virusstocks nochmal sequenzieren könntet (es wären drei Proben)? Vielen Dank schon mal für die Hilfe und ich wünsche ein schönes sonniges Wochenende! Liebe Grüße aus Hannover Natalie ——– Natalie Köhler, M. Sc. PhD Student AG Prof. Thomas Pietschmann TWINCORE – Centre for Experimental and Clinical Infection Research Institute for Experimental Virology Feodor-Lynen-Str. 7 | D-30625 Hannover Tel.: +49 (0)511 22002-7138 E-Mail: natalie.koehler@twincore.de Von: “Pietschmann, Thomas” Datum: Mittwoch, 26. Juni 2024 um 10:36 An: Nicole Fischer , “Haid, Sibylle (Twincore)” , Köhler, Natalie Betreff: Re: [EXTERN] AW: [EXT] AW: NGS von adaptiertem CoV-229E Virus Liebe Nicole, liebe alle, Dir und deinen Teammitgliedern, die beteiligt waren vielen Dank für die Analyse der Viren. Das sind sehr interessante Ergebnisse. Wir finden nicht die vorbeschriebenen Mutationen, die nach Selektion mit K22 erzeugt wurden (siehe Lundin et al in der Anlage V..) Wir finden zwei identische Mutationen, die sowohl bei K22 als auch bei K7523 selektiert wurden (Leu3552Phe, Leu981Phe) Wir finden eine zusätzliche Mutation für K7523 (Ala2691Asp) Keine der Mutationen ist direkt in Nsp6 (Leu2552Phe müsste der N-terminus von Nsp7 sein); habe auf die schnelle kein annotiertes 229E Genom mit unserer Sequenz gefunden.. stattdessen AGT21366.1 verwendet, dort ist allerdings 3552 ein Serin (https://www.ncbi.nlm.nih.gov/protein/AGT21366.1) Die Mutation die nur durch K7523 (nicht durch K22) selektiert wurde sitzt in nsp4 (Ala 2691Asp; https://www.ncbi.nlm.nih.gov/protein/AGT21366.1) Die gemeinsame zweite gemeinsame Mutation Leu981Phe sitzt in nsp3, Papain-like protease PLpro (https://www.ncbi.nlm.nih.gov/protein/AGT21366.1) PLpro spaltet nur nsp1-4, die Spaltung zw. nsp6-7 wird durch CLpro gemacht. Natalie, kannst Du all das bitte nachprüfen? Am besten mit einem annotierten hCoV229E Genom, das unserer Sequenz entspricht. Wir sollte nun sofort in Kooperation die Varianten klonieren und testen, welche Varienten-(Kombinationen) Resistenz vermitteln. Spannend wird auch die Selektion auf SARS-CoV-2. Es könnte auch interessant sein, die Selektion nochmal auf hCoV229E zu wiederholen (wieder selbige Mutation)… vielleicht auch eine Selektion in einer anderen Zelllinie… gibt es weitere Lösungen oder immer selbige? Viele Grüße Thomas PS: Findest du weitere Papiere, die K22 Resistenzen beschreiben? Wenn ja, wo liegen die? Von: “nfischer@uke.de” Datum: Dienstag, 25. Juni 2024 um 18:58 An: Sibylle Haid Cc: “‘KC6hler,Natalie'” , Thomas Pietschmann Betreff: [EXTERN] AW: [EXT] AW: NGS von adaptiertem CoV-229E Virus Liebe Sibylle, Liebe Natalie, lieber Thomas, anbei die Ergebnisse des variant callings der Proben. Es gab etwas Probleme mit der Referenzsequenz assembly, weshalb wir auf eine andere accession number ausweichen mussten. Jiabin hat PP810610 als Referenz genommen aufgrund des unvollständigen de novo assemblies der mitgeschickten Referenz. PP810610 ist nur wenig abweichend von den Bereichen des Reportervirus, die assembled werden konnten. Gerne beantworten wir Fragen zu den Analysen. Jiabin schaut sich nochmal eine Position genauer an, Ihr werdet es sehen, an Position xxx zeigt die DMSO Kontrolle eine Abweichung. Wir schauen uns gerade die IGV files dazu an, warum das so ist. Das könnt Ihr für jetzt erstmal ignorieren. Herzliche Grüße Nicole # ——————————————————————- # —- !! DEPRECATED mamba env configuration due to too many conflicts, use docker instead (see above) !! —- #mamba activate /home/jhuang/miniconda3/envs/viral-ngs4 mkdir viralngs ln -s ~/Tools/viral-ngs/Snakefile Snakefile ln -s ~/Tools/viral-ngs/bin bin cp ~/Tools/viral-ngs/refsel.acids refsel.acids cp ~/Tools/viral-ngs/lastal.acids lastal.acids cp ~/Tools/viral-ngs/config.yaml config.yaml cp ~/Tools/viral-ngs/samples-runs.txt samples-runs.txt cp ~/Tools/viral-ngs/samples-depletion.txt samples-depletion.txt cp ~/Tools/viral-ngs/samples-metagenomics.txt samples-metagenomics.txt cp ~/Tools/viral-ngs/samples-assembly.txt samples-assembly.txt cp ~/Tools/viral-ngs/samples-assembly-failures.txt samples-assembly-failures.txt # — DEBUG: If the env disappeared, reinstall the env viral-ngs4 — # — Running time hints — #Note that novoalign is not installed. The used Novoalign path: /home/jhuang/Tools/novocraft_v3/novoalign; the used gatk: /usr/local/bin/gatk using /home/jhuang/Tools/GenomeAnalysisTK-3.6/GenomeAnalysisTK.jar. #Samtools path: #Why, the samtools in the env is v1.6? #Novoalign path: /home/jhuang/Tools/novocraft_v3/novoalign #GATK path: /usr/local/bin/gatk # jar_file in the file: jar_file = ‘/home/jhuang/Tools/GenomeAnalysisTK-3.6/GenomeAnalysisTK.jar’ # — in config.yaml — #GATK_PATH: “/home/jhuang/Tools/GenomeAnalysisTK-3.6” #NOVOALIGN_PATH: “/home/jhuang/Tools/novocraft_v3” mamba list or mamba list blast mamba create -n viral-ngs4 python=3.6 mamba activate viral-ngs4 mamba install blast=2.6.0 bmtagger biopython pysam pyyaml picard mvicuna pybedtools fastqc matplotlib spades last=876 -c conda-forge -c bioconda #mafft=7.221 –> mafft since └─ mafft 7.221** is not installable because it conflicts with any installable versions previously reported. mamba install cd-hit cd-hit-auxtools diamond gap2seq=2.1 mafft mummer4 muscle=3.8 parallel pigz prinseq samtools=1.6 tbl2asn trimmomatic trinity unzip vphaser2 bedtools -c r -c defaults -c conda-forge -c bioconda mamba install bwa mamba install vphaser2=2.0 # Sovle confilict between bowtie, bowtie2 and snpeff mamba remove bowtie mamba install bowtie2 mamba remove snpeff mamba install snpeff=4.1l #which snpEff mamba install gatk=3.6 #DEBUG if FileNotFoundError: [Errno 2] No such file or directory: ‘/usr/local/bin/gatk’: ‘/usr/local/bin/gatk’ #IMPORTANT_UPDATE jar_file in the file /home/jhuang/mambaforge/envs/viral-ngs4/bin/gatk3 with “/home/jhuang/Tools/GenomeAnalysisTK-3.6/GenomeAnalysisTK.jar” #IMPORTANT_REPLACE “sudo cp /home/jhuang/mambaforge/envs/viral-ngs4/bin/gatk3 /usr/local/bin/gatk” #IMPORTANT_SET /home/jhuang/Tools/GenomeAnalysisTK-3.6 as GATK_PATH in config.yaml #IMPORTANT_CHECK if it works # java -jar /home/jhuang/Tools/GenomeAnalysisTK-3.6/GenomeAnalysisTK.jar -T RealignerTargetCreator –help # /usr/local/bin/gatk -T RealignerTargetCreator –help #IMPORTANT_NOTE that the env viral-ngs4 cannot logined from the base env due to the python3-conflict! # —- BUG_2025_1 —- bin/taxon_filter.py deplete data/00_raw/1762_04.bam tmp/01_cleaned/1762_04.raw.bam tmp/01_cleaned/1762_04.bmtagger_depleted.bam tmp/01_cleaned/1762_04.rmdup.bam data/01_cleaned/1762_04.cleaned.bam –bmtaggerDbs /home/jhuang/REFs/viral_ngs_dbs/bmtagger_dbs_remove/hg19 /home/jhuang/REFs/viral_ngs_dbs/bmtagger_dbs_remove/GRCh37.68_ncRNA-GRCh37.68_transcripts-HS_rRNA_mitRNA /home/jhuang/REFs/viral_ngs_dbs/bmtagger_dbs_remove/metagenomics_contaminants_v3 –blastDbs /home/jhuang/REFs/viral_ngs_dbs/blast_dbs_remove/metag_v3.ncRNA.mRNA.mitRNA.consensus /home/jhuang/REFs/viral_ngs_dbs/blast_dbs_remove/hybsel_probe_adapters –threads 15 –srprismMemory 14250 –JVMmemory 50g –loglevel DEBUG 2025-05-22 12:10:18,313 – __init__:445:_attempt_install – DEBUG – Currently installed version of blast: 2.16.0-hc155240_3 2025-05-22 12:10:18,314 – __init__:448:_attempt_install – DEBUG – Expected version of blast: 2.6.0 2025-05-22 12:10:18,314 – __init__:449:_attempt_install – DEBUG – Incorrect version of blast installed. Removing it… 2025-05-23 09:58:43,151 – __init__:445:_attempt_install – DEBUG – Currently installed version of bmtagger: 3.101-h470a237_4 2025-05-23 09:58:45,326 – __init__:445:_attempt_install – DEBUG – Currently installed version of blast: 2.7.1-h4422958_6 2025-05-23 09:58:45,327 – __init__:448:_attempt_install – DEBUG – Expected version of blast: 2.6.0 2025-05-23 09:58:45,327 – __init__:449:_attempt_install – DEBUG – Incorrect version of blast installed. Removing it… # —- # # Some not errorous intermediate commands # fastqc -f bam data/02_align_to_self/p10_K7523.bam -o reports/fastqc/p10_K7523 # bin/intrahost.py vphaser_one_sample data/02_align_to_self/p10_K7523.mapped.bam data/02_assembly/p10_K7523.fasta data/04_intrahost/vphaser2.p10_K7523.txt.gz –vphaserNumThreads 15 –removeDoublyMappedReads –minReadsEach 5 –maxBias 10 # bin/read_utils.py align_and_fix data/01_per_sample/p16_DMSO.cleaned.bam data/02_assembly/p16_DMSO.fasta –outBamAll data/02_align_to_self/p16_DMSO.bam –outBamFiltered data/02_align_to_self/p16_DMSO.mapped.bam –aligner novoalign –aligner_options ‘-r Random -l 20 -g 40 -x 20 -t 100 -k’ –threads 15 # bin/intrahost.py merge_to_vcf ref_genome/reference.fasta data/04_intrahost/isnvs.vcf.gz –samples 2039_04 2040_04 1762_04 1243_2 875_04 –isnvs data/04_intrahost/vphaser2.2039_04.txt.gz data/04_intrahost/vphaser2.2040_04.txt.gz data/04_intrahost/vphaser2.1762_04.txt.gz data/04_intrahost/vphaser2.1243_2.txt.gz data/04_intrahost/vphaser2.875_04.txt.gz –alignments data/03_multialign_to_ref/aligned_1.fasta –strip_chr_version –parse_accession –loglevel DEBUG

Leave a Reply

Your email address will not be published. Required fields are marked *