From Genbank-ID to Phylogenetic Tree Visualization using roary, raxml and ete3

http://xgenes.com/article/article-content/371/identify-all-occurrences-of-phage-hh1-mt880870-in-s-epidermidis-st2-genomes-from-public-and-clinical-isolates/

  1. Downloads gff

     #for id in CP009257.1 CP000521.1 CP059040.1 CU459141.1 CU468230.2 CP141284.1 CP131470.1 CP002080.1 CP002177.1 CP041365.1; do
     #  efetch -db nuccore -id ${id} -format gff3 > ${id}.gff;
     #  efetch -db nuccore -id ${id} -format fasta > ${id}.fasta;
     #done
    
     echo -e "CP009257\nCP000521\nCP059040\nCU459141\nCU468230\nCP141284\nCP131470\nCP002080\nCP002177\nCP041365\nCP191205" > ids.txt
    
     cat ids.txt | while read id; do
         efetch -db nuccore -id $id -format gff3 > "${id}.gff"
         efetch -db nuccore -id $id -format fasta > "${id}.fasta"
         # Append the FASTA sequence to the GFF3 file with ##FASTA header
         echo "##FASTA" >> "${id}.gff"
         cat "${id}.fasta" >> "${id}.gff"
         rm "${id}.fasta"  # Optionally remove the separate FASTA file
  2. Roary

     mv *.gff roary_input
     (bacto) roary -f roary_out -e --mafft -p 100 roary_input/*.gff
  3. Generate phylogenetic tree using FastTree or raxml

     #iqtree -s core_gene_alignment.aln -m GTR+G -bb 1000 -nt AUTO
    
     (bacto) FastTree -nt roary_out/core_gene_alignment.aln > roary_out/core_gene_tree.nwk
    
     (bacto) raxml-ng --all \
       --msa roary_out/core_gene_alignment.aln \
       --model GTR+G \
       --bs-trees 1000 \
       --threads 40 \
       --prefix core_gene_tree_1000
    
       运行完后,你会得到:
    
       core_gene_tree_1000.raxml.bestTree : 最佳最大似然树。
       core_gene_tree_1000.raxml.bootstraps : 100 个 bootstrap 树。
       core_gene_tree_1000.raxml.support : 带有 bootstrap 支持值的树(就是你要的)。
  4. Install ete3_env

     mamba create -n ete3_env python=3.10 ete3 -c etetoolkit -y
     mamba activate ete3_env
     mamba install -c etetoolkit ete3 pyqt
  5. Phylogenetic Tree Visualization using ete3

     #(ete3_env) python3 ~/Scripts/label_tree.py roary_out/core_gene_tree.nwk
     (ete3_env) python3 ~/Scripts/label_tree.py core_gene_tree_1000.raxml.support
    
     #!/usr/bin/env python3
    
     from ete3 import Tree, TreeStyle, TextFace, NodeStyle, faces
     import sys
    
     #(ete3_env) jhuang@WS-2290C:~/DATA/Data_Tam_DNAseq_2025_AYE/REVIEW$ python3 ~/Scripts/label_tree.py core_gene_tree_1000.raxml.support
    
     # -------------------------
     group_colors = {
         "A. baumannii": "#1f77b4",
         "A. junii": "#ff7f0e",
         "A. pittii": "#2ca02c",
         "A. oleivorans": "#d62728",
         "A. tandoii": "#9467bd",
     }
    
     leaf_name_map = {
         "CP191205": "A. baumannii HKAB-1",
         "CP009257": "A. baumannii AB030",
         "CP000521": "A. baumannii ATCC 17978",
         "CP059040": "A. baumannii ATCC 19606",
         "CU459141": "A. baumannii AYE",
         "CU468230": "A. baumannii SDF",
         "CP141284": "A. junii H1",
         "CP131470": "A. junii SC22",
         "CP002080": "A. oleivorans DR1",
         "CP002177": "A. pittii PHEA-2",
         "CP041365": "A. tandoii SE63"
     }
    
     def get_group(label):
         for group in group_colors:
             if label.startswith(group):
                 return group
         return "Other"
    
     def colorize_node(node):
         if node.is_leaf():
             label = leaf_name_map.get(node.name, node.name)
             node.name = label
             group = get_group(label)
             color = group_colors.get(group, "black")
             face = TextFace(label, fsize=8, fgcolor=color)
             face.margin_left = 5  # <-- add margin to move label right from the point
             faces.add_face_to_node(face, node, column=0)
    
             nstyle = NodeStyle()
             nstyle["fgcolor"] = color
             nstyle["shape"] = "circle"
             node.set_style(nstyle)
    
     def render_tree(tree, filename, title="", circular=False):
         ts = TreeStyle()
         ts.mode = "c" if circular else "r"
         ts.show_leaf_name = False
         ts.title.add_face(TextFace(title, fsize=12, bold=True), column=0)
         ts.layout_fn = colorize_node
         ts.show_scale = True
         ts.branch_vertical_margin = 0
    
         # Customize scale bar:
         #ts.scale_len = 0.05  # Scale bar length in tree units
         #ts.scale_format = "%.2f"  # Format scale label to 2 decimals
    
         # Add a TextFace to show the scale you want
         #scale_face = TextFace("Scale: 0.05 substitutions/site", fsize=8)
         #ts.title.add_face(scale_face, column=1)
    
         tree.render(filename, w=1200, h=1200 if circular else 800, tree_style=ts)
    
     def main():
         if len(sys.argv) != 2:
             print("Usage: python3 label_tree.py 
    “) sys.exit(1) newick_file = sys.argv[1] try: with open(newick_file, “r”) as f: nwk_str = f.read().strip() if not nwk_str.endswith(“;”): nwk_str += “;” t = Tree(nwk_str, format=1) except Exception as e: print(f”[ERROR] Failed to load tree: {e}”) sys.exit(1) # Attempt to reroot at outgroup try: outgroup = t&”CP041365″ if outgroup != t: t.set_outgroup(outgroup) else: print(“[!] Outgroup is root already; skipping reroot.”) except Exception as e: print(f”[!] Warning: Could not reroot tree at baumannii clade: {e}”) #render_tree(t, “tree_colored_bootstrap.png”, title=””, circular=False) render_tree(t, “tree_colored_bootstrap.svg”, title=””, circular=False) print(“✅ Saved: tree_colored_bootstrap.svg”) if __name__ == “__main__”: main()

Leave a Reply

Your email address will not be published. Required fields are marked *