Author Archives: gene_x

MicrobiotaProcess for RA vs control

differential_abundance_analysis_RA_vs_control

https://bioconductor.org/packages/release/bioc/vignettes/MicrobiotaProcess/inst/doc//MicrobiotaProcess.html

1, prepare the R environment

    #Rscript MicrobiotaProcess.R
    #NOTE: exit R script, then login again R-environment; rm -rf Phyloseq*_cache

    # -- using R under base environment --
    #(base) jhuang@WS-2290C:~/DATA_A/Data_Nicole8_Lamprecht_new_PUBLISHED/core_diversity_e1300
    #mkdir figures
    rmarkdown::render('Phyloseq.Rmd',output_file='Phyloseq.html')

2, bridges other tools

    library(MicrobiotaProcess)
    library(microeco)
    library(ggalluvial)
    library(ggh4x)
    library(gghalves)
    library(tidyr)

    ps.ng.tax_sel <- ps.ng.tax_abund
    #Choose all samples
    otu_table(ps.ng.tax_sel) <- otu_table(ps.ng.tax_abund)[,c("kg001", "kg002", "kg003", "kg004", "kg005", "kg007", "kg009", "kg015", "kg016", "kg019", "kg020", "kg021", "kg022", "kg023", "kg025", "kg026", "kg027", "kg028", "kg029", "ra002", "ra003", "ra004", "ra005", "ra006", "ra007", "ra008", "ra009", "ra010", "ra013", "ra014", "ra015", "ra017", "ra018", "ra019", "ra020", "ra022", "ra023", "ra024", "ra025")]
    mpse_abund <- ps.ng.tax_sel %>% as.MPSE()

3, rarefaction analysis

    mpse_abund %<>% mp_rrarefy()
    mpse_abund %<>%
        mp_cal_rarecurve(
            .abundance = RareAbundance,
            chunks = 400
        )

    p1 <- mpse_abund %>%
          mp_plot_rarecurve(
            .rare = RareAbundanceRarecurve,
            .alpha = Observe,
          )
    p2 <- mpse_abund %>%
          mp_plot_rarecurve(
            .rare = RareAbundanceRarecurve,
            .alpha = Observe,
            .group = SampleType
          ) +
          scale_color_manual(values=c("#1f78b4", "#e31a1c")) +
          scale_fill_manual(values=c("#1f78b4", "#e31a1c"), guide="none")

    glimpse(mpse_abund)
    mpse_abund %>% print(width=380, n=2)
    p3 <- mpse_abund %>%
          mp_plot_rarecurve(
            .rare = RareAbundanceRarecurve,
            .alpha = "Observe",
            .group = SampleType,
            plot.group = TRUE
          ) +
          scale_color_manual(values=c("#1f78b4", "#e31a1c")) +
          scale_fill_manual(values=c("#1f78b4", "#e31a1c"),guide="none")
    png("rarefaction_of_samples_or_groups.png", width=1080, height=600)
    p1 + p2 + p3
    dev.off()

alpha diversity analysis

4, calculate alpha index and visualization

    library(ggplot2)
    library(MicrobiotaProcess)
    mpse_abund %<>%
        mp_cal_alpha(.abundance=RareAbundance)
    mpse_abund
    #NOTE mpse_abund contains 28 varibles = 22 varibles + Observe 
, Chao1 , ACE , Shannon , Simpson , Pielou f1 <- mpse_abund %>% mp_plot_alpha( .group=SampleType, .alpha=c(Observe, Chao1, ACE, Shannon, Simpson, Pielou) ) + scale_fill_manual(values=c(“#1f78b4”, “#e31a1c”), guide=”none”) + scale_color_manual(values=c(“#1f78b4”, “#e31a1c”), guide=”none”) f2 <- mpse_abund %>% mp_plot_alpha( .alpha=c(Observe, Chao1, ACE, Shannon, Simpson, Pielou) ) #ps.ng.tax_sel contais only pre samples –> f1 cannot be generated! png(“alpha_diversity_comparison.png”, width=1400, height=600) f1 / f2 dev.off() 5, visualize taxonomy abundance (Class) mpse_abund %<>% mp_cal_abundance( # for each samples .abundance = RareAbundance ) %>% mp_cal_abundance( # for each groups .abundance=RareAbundance, .group=SampleType ) mpse_abund p1 <- mpse_abund %>% mp_plot_abundance( .abundance=RareAbundance, taxa.class = Class, topn = 20, relative = TRUE ) p2 <- mpse_abund %>% mp_plot_abundance( .abundance=RareAbundance, taxa.class = Class, topn = 20, relative = FALSE ) png(“relative_abundance_and_abundance.png”, width= 1200, height=600) #NOT PRODUCED! p1 / p2 dev.off() h1 <- mpse_abund %>% mp_plot_abundance( .abundance = RareAbundance, .group = SampleType, taxa.class = Class, relative = TRUE, topn = 20, geom = ‘heatmap’, features.dist = ‘euclidean’, features.hclust = ‘average’, sample.dist = ‘bray’, sample.hclust = ‘average’ ) h2 <- mpse_abund %>% mp_plot_abundance( .abundance = RareAbundance, .group = SampleType, taxa.class = Class, relative = FALSE, topn = 20, geom = ‘heatmap’, features.dist = ‘euclidean’, features.hclust = ‘average’, sample.dist = ‘bray’, sample.hclust = ‘average’ ) # the character (scale or theme) of figure can be adjusted by set_scale_theme # refer to the mp_plot_dist png(“relative_abundance_and_abundance_heatmap.png”, width= 1200, height=600) aplot::plot_list(gglist=list(h1, h2), tag_levels=”A”) dev.off() # visualize the relative abundance of top 20 class for each .group (SampleType) p3 <- mpse_abund %>% mp_plot_abundance( .abundance=RareAbundance, .group=SampleType, taxa.class = Class, topn = 20, plot.group = TRUE ) # visualize the abundance of top 20 phyla for each .group (time) p4 <- mpse_abund %>% mp_plot_abundance( .abundance=RareAbundance, .group= SampleType, taxa.class = Class, topn = 20, relative = FALSE, plot.group = TRUE ) png(“relative_abundance_and_abundance_groups.png”, width= 1000, height=1000) p3 / p4 dev.off() > beta diversity analysis 6, calculate the distance between samples or groups mpse_abund %<>% mp_decostand(.abundance=Abundance) mpse_abund %<>% mp_cal_dist(.abundance=hellinger, distmethod=”bray”) mpse_abund p1 <- mpse_abund %>% mp_plot_dist(.distmethod = bray) png(“distance_between_samples.png”, width= 1000, height=1000) p1 dev.off() # when .group is provided, the dot heatmap plot with group information will be return. p2 <- mpse_abund %>% mp_plot_dist(.distmethod = bray, .group = SampleType) # The scale or theme of dot heatmap plot can be adjusted using set_scale_theme function. p2 %>% set_scale_theme( x = scale_fill_manual( values=c(“#1f78b4”, “#e31a1c”), #c(“orange”, “deepskyblue”), guide = guide_legend( keywidth = 1, keyheight = 0.5, title.theme = element_text(size=8), label.theme = element_text(size=6) ) ), aes_var = SampleType # specific the name of variable ) %>% set_scale_theme( x = scale_color_gradient( guide = guide_legend(keywidth = 0.5, keyheight = 0.5) ), aes_var = bray ) %>% set_scale_theme( x = scale_size_continuous( range = c(0.1, 3), guide = guide_legend(keywidth = 0.5, keyheight = 0.5) ), aes_var = bray ) png(“distance_between_samples_with_group_info.png”, width= 1000, height=1000) p2 dev.off() # when .group is provided and group.test is TRUE, the comparison of different groups will be returned # Assuming p3 is a ggplot object after mp_plot_dist call p3 <- mpse_abund %>% mp_plot_dist(.distmethod = bray, .group = SampleType, group.test = TRUE, textsize = 6) + theme( axis.title.x = element_text(size = 14), # Customize x-axis label face = “bold” axis.title.y = element_text(size = 14), # Customize y-axis label axis.text.x = element_text(size = 14), # Customize x-axis ticks axis.text.y = element_text(size = 14) # Customize y-axis ticks ) # Save the plot with the new theme settings png(“Comparison_of_Bray_Distances.png”, width = 1000, height = 1000) print(p3) # Ensure that p3 is explicitly printed in the device dev.off() # Extract Bray-Curtis Distance Values and save them in a Excel-table. library(dplyr) library(openxlsx) # Define the sample numbers vector sample_numbers <- c("1","2","5","6","7", "29","30","31","32") # Consolidate the list of tibbles using the actual sample numbers bray_data <- bind_rows( lapply(seq_along(mpse_abund$bray), function(i) { tibble( Sample1 = sample_numbers[i], # Use actual sample number Sample2 = mpse_abund$bray[[i]]$braySampley, BrayDistance = mpse_abund$bray[[i]]$bray ) }), .id = "PairID" ) # Print the data frame to check the output print(bray_data) # Write the data frame to an Excel file write.xlsx(bray_data, file = "Bray_Curtis_Distances.xlsx") #DELETE the column "PairID" in Excel file 7, the PCoA analysis #install.packages("corrr") library(corrr) #install.packages("ggside") library(ggside) mpse_abund %<>% mp_cal_pcoa(.abundance=hellinger, distmethod=”bray”) # The dimensions of ordination analysis will be added the colData slot (default). mpse_abund methods(class=class(mpse_abund)) mpse_abund %>% print(width=380, n=2) #NOTE mpse_abund contains 34 varibles = 31 varibles + `PCo1 (30.16%)` , `PCo2 (15.75%)` , `PCo3 (10.53%)` + [Domain … Species] # We also can perform adonis or anosim to check whether it is significant to the dissimilarities of groups. mpse_abund %<>% mp_adonis(.abundance=hellinger, .formula=~SampleType, distmethod=”bray”, permutations=9999, action=”add”) mpse_abund %>% mp_extract_internal_attr(name=adonis) #PAUSE p1 <- mpse_abund %>% mp_plot_ord( .ord = pcoa, .group = SampleType, .color = SampleType, .size = 2.4, .alpha = 1, ellipse = TRUE, show.legend = FALSE # don’t display the legend of stat_ellipse ) + scale_fill_manual( #values = c(“#a6cee3”, “#1f78b4”, “#b2df8a”, “#33a02c”, “#fb9a99”, “#e31a1c”, “#cab2d6”, “#6a3d9a”), #values = c(“#a6cee3”, “#b2df8a”, “#fb9a99”, “#cab2d6”), values = c(“#1f78b4”, “#e31a1c”), guide = guide_legend(keywidth=1.6, keyheight=1.6, label.theme=element_text(size=12)) ) + scale_color_manual( #values=c(“#a6cee3”, “#1f78b4”, “#b2df8a”, “#33a02c”, “#fb9a99”, “#e31a1c”, “#cab2d6”, “#6a3d9a”), #values = c(“#a6cee3”, “#b2df8a”, “#fb9a99”, “#cab2d6”), values = c(“#1f78b4”, “#e31a1c”), guide = guide_legend(keywidth=1.6, keyheight=1.6, label.theme=element_text(size=12)) ) pdf(“PCoA.pdf”) p1 dev.off() # The size of point also can be mapped to other variables such as Observe, or Shannon # Then the alpha diversity and beta diversity will be displayed simultaneously. p2 <- mpse_abund %>% mp_plot_ord( .ord = pcoa, .group = SampleType, .color = SampleType, .size = Shannon, .alpha = Observe, ellipse = TRUE, show.legend = FALSE # don’t display the legend of stat_ellipse ) + scale_fill_manual( values = c(“#1f78b4”, “#e31a1c”), #only needs four colors. #values = c(“#FF0000”, “#000000”, “#0000FF”, “#C0C0C0”, “#00FF00”, “#FFFF00”, “#00FFFF”, “#FFA500”), guide = guide_legend(keywidth=0.6, keyheight=0.6, label.theme=element_text(size=8)) ) + scale_color_manual( values = c(“#1f78b4”, “#e31a1c”), #only needs four colors. #values=c(“#FF0000”, “#000000”, “#0000FF”, “#C0C0C0”, “#00FF00”, “#FFFF00”, “#00FFFF”, “#FFA500”), guide = guide_legend(keywidth=0.6, keyheight=0.6, label.theme=element_text(size=8)) ) + scale_size_continuous( range=c(0.5, 3), guide = guide_legend(keywidth=0.6, keyheight=0.6, label.theme=element_text(size=8)) ) pdf(“PCoA2.pdf”) p2 dev.off() # Add the sample name as text labels library(ggrepel) p2 <- mpse_abund %>% mp_plot_ord( .ord = pcoa, .group = SampleType, .color = SampleType, .size = Shannon, .alpha = Observe, ellipse = TRUE, show.legend = FALSE # don’t display the legend of stat_ellipse ) + geom_text_repel(aes(label = ifelse(Sample == “1”, “1”, Sample)), # Prioritize “1” size = 3, color = “black”, # Set the label color to black for better visibility max.overlaps = Inf, # Allow maximum labels force = 2, # Increase the force to push labels apart box.padding = 0.5, # Add more padding around the labels segment.size = 0.2 # Line segment size connecting labels to points ) + scale_fill_manual( values = c(“#1f78b4”, “#e31a1c”), # only needs two colors guide = guide_legend(keywidth=0.6, keyheight=0.6, label.theme=element_text(size=8)) ) + scale_color_manual( values = c(“#1f78b4”, “#e31a1c”), # only needs two colors guide = guide_legend(keywidth=0.6, keyheight=0.6, label.theme=element_text(size=8)) ) + scale_size_continuous( range=c(0.5, 3), guide = guide_legend(keywidth=0.6, keyheight=0.6, label.theme=element_text(size=8)) ) #pdf(“PCoA2_labeled.pdf”) png(“PCoA2_labeled.png”, width=800, height=800) p2 dev.off() 8, hierarchical cluster (tree) analysis #input should contain hellinger! mpse_abund %<>% mp_cal_clust( .abundance = hellinger, distmethod = “bray”, hclustmethod = “average”, # (UPGAE) action = “add” # action is used to control which result will be returned ) mpse_abund mpse_abund %>% print(width=380, n=2) #NOTE mpse_abund contains 34 varibles, no new variable, the column bray has been new calculated! # if action = ‘add’, the result of hierarchical cluster will be added to the MPSE object # mp_extract_internal_attr can extract it. It is a treedata object, so it can be visualized # by ggtree. sample.clust <- mpse_abund %>% mp_extract_internal_attr(name=’SampleClust’) #The object contained internal attribute: PCoA ADONIS SampleClust sample.clust #–> The associated data tibble abstraction: 27 × 30 library(ggtree) p <- ggtree(sample.clust) + geom_tippoint(aes(color=SampleType)) + geom_tiplab(as_ylab = TRUE) + ggplot2::scale_x_continuous(expand=c(0, 0.01)) png("hierarchical_cluster1.png", width= 1000, height=800) p dev.off() #https://bioconductor.org/packages/release/bioc/vignettes/MicrobiotaProcess/inst/doc//MicrobiotaProcess.html # mapping = aes(x = RelRareAbundanceBySample-->SampleType, # y = Sample–>SampleType, # fill = Phyla # ), library(ggtreeExtra) library(ggplot2) # Extract relative abundance of phyla phyla.tb <- mpse_abund %>% mp_extract_abundance(taxa.class=Phylum, topn=30) # The abundance of each samples is nested, it can be flatted using the unnest of tidyr. phyla.tb %<>% tidyr::unnest(cols=RareAbundanceBySample) %>% dplyr::rename(Phyla=”label”) phyla.tb phyla.tb %>% print(width=380, n=10) p1 <- p + geom_fruit( data=phyla.tb, geom=geom_col, mapping = aes(x = RelRareAbundanceBySample, y = Sample, fill = Phyla ), orientation = "y", #offset = 0.4, pwidth = 3, axis.params = list(axis = "x", title = "The relative abundance of phyla (%)", title.size = 4, text.size = 2, vjust = 1), grid.params = list() ) png("hierarchical_cluster2_Phyla.png", width = 1000, height = 800) p1 dev.off() # Extract relative abundance of classes class.tb <- mpse_abund %>% mp_extract_abundance(taxa.class = Class, topn = 30) # Flatten and rename the columns class.tb %<>% tidyr::unnest(cols = RareAbundanceBySample) %>% dplyr::rename(Class = “label”) # View the data frame class.tb # Create the plot p1 <- p + geom_fruit( data = class.tb, geom = geom_col, mapping = aes(x = RelRareAbundanceBySample, y = Sample, fill = Class ), orientation = "y", pwidth = 3, axis.params = list(axis = "x", title = "The relative abundance of classes (%)", title.size = 4, text.size = 2, vjust = 1), grid.params = list() ) # Save the plot to a file #ERROR-->NEED to be DEBUGGED! png(“hierarchical_cluster2_Class.png”, width = 1000, height = 800) print(p1) dev.off() 9, biomarker discovery (update Sign_Group to Sign_SampleType, RareAbundanceByGroup to RareAbundanceBySampleType) library(ggtree) library(ggtreeExtra) library(ggplot2) library(MicrobiotaProcess) library(tidytree) library(ggstar) library(forcats) library(writexl) mpse_abund %>% print(width=150) #mpse_abund %<>% # mp_cal_abundance( # for each samples # .abundance = RareAbundance # ) %>% # mp_cal_abundance( # for each groups # .abundance=RareAbundance, # .group=SampleType # ) #mpse_abund mpse_abund %<>% mp_diff_analysis( .abundance = RelRareAbundanceBySample, .group = SampleType, cl.min = 4, first.test.alpha = 0.01, filter.p=”pvalue” ) # The result is stored to the taxatree or otutree slot, you can use mp_extract_tree to extract the specific slot. taxa.tree <- mpse_abund %>% mp_extract_tree(type=”taxatree”) taxa.tree ## And the result tibble of different analysis can also be extracted with tidytree (>=0.3.5) #LDAupper, LDAmean, LDAlower, taxa.tree %>% select(label, nodeClass, Sign_SampleType, fdr) #%>% dplyr::filter(!is.na(fdr)) taxa.tree %>% print(width=150, n=200) # — replace the pvalue and fdr with pvalue and p-adjusted from DESeq enrichment results — tree_data <- as_tibble(taxa.tree) # ---- modify tree_data by left_joining with sigtab and updating Sign_SampleType ---- sigtab$label <- rownames(sigtab) write.xlsx(sigtab, file = "sigtab.xlsx") sum(sigtab$padj<0.05) #taxa.tree <- left_join(tree_data, sigtab[, c("label", "log2FoldChange", "pvalue", "padj")], by = 'label') %>% as.treedata taxa.tree2 <- tree_data %>% left_join(sigtab[, c(“label”, “baseMean”, “log2FoldChange”, “lfcSE”, “stat”, “pvalue”, “padj”)], by = “label”) %>% mutate(Sign_SampleType = case_when( log2FoldChange > 0 & padj <= 0.05 ~ "RA", log2FoldChange < 0 & padj <= 0.05 ~ "control", TRUE ~ NA_character_ # Sets Sign_SampleType to NA otherwise )) %>% as.treedata() # Convert the dataframe to a treedata object # —- print taxa_data2 to Excel —- taxa.tree2 %>% print(width=380, n=20) taxa_data2 <- as_tibble(taxa.tree2) sum(!is.na(taxa_data2$Sign_SampleType)) sapply(taxa_data2, class) # Remove or transform list columns if not needed taxa_data2_simplified <- taxa_data2 %>% select(-RareAbundanceBySample, -RareAbundanceBySampleType) %>% mutate(across(where(is.list), ~toString(.))) # Convert lists to character strings if needed # Replace NA with a placeholder, such as “NA” or another suitable representation taxa_data2_simplified <- taxa_data2_simplified %>% mutate(across(everything(), ~ifelse(is.na(.), “NA”, .))) taxonomy_data <- as.data.frame(mp_extract_taxonomy(mpse_abund)) colnames(taxa_data2_simplified)[colnames(taxa_data2_simplified) == "label"] <- "OTU" combined_data <- left_join(taxa_data2_simplified, taxonomy_data, by = "OTU") write_xlsx(combined_data, "taxa_data2.xlsx") #(UNDER HOST-ENV) cp sigtab.xlsx diff_analysis_RA_vs_control.xlsx and then switch label as the 1st column and sort the columns by padj. # -- NOTE that sometimes the record in DESeq2 not occurs in the final list, since the statistics calculation of MicrobiotaProcess results in NA, e.g. the record FJ879443.1.1488, we can simply delete the record from diff_analysis_RA_vs_control.xlsx -- # ---- since taxa.tree is treedata object, it can be visualized by ggtree and ggtreeExtra ---- p1 <- ggtree( taxa.tree2, layout="radial", size = 0.3 ) + geom_point( data = td_filter(!isTip), fill="white", size=1, shape=21 ) # display the high light of phylum clade. p2 <- p1 + geom_hilight( data = td_filter(nodeClass == "Phylum"), mapping = aes(node = node, fill = label) ) # display the relative abundance of features(OTU) p3 <- p2 + ggnewscale::new_scale("fill") + geom_fruit( data = td_unnest(RareAbundanceBySample), geom = geom_star, mapping = aes( x = fct_reorder(Sample, SampleType, .fun=min), size = RelRareAbundanceBySample, fill = SampleType, subset = RelRareAbundanceBySample > 0 ), starshape = 13, starstroke = 0.25, offset = 0.03, pwidth = 0.4, grid.params = list(linetype=2) ) + scale_size_continuous( name=”Relative Abundance (%)”, range = c(.5, 3) ) + scale_fill_manual(values=c(“#1B9E77”, “#D95F02”)) # display the tip labels of taxa tree p4 <- p3 + geom_tiplab(size=6, offset=4.0) # display the LDA of significant OTU. #p5 <- p4 + # ggnewscale::new_scale("fill") + # geom_fruit( # geom = geom_col, # mapping = aes( # x = LDAmean, # fill = Sign_SampleType, # subset = !is.na(LDAmean) # ), # orientation = "y", # offset = 0.3, # pwidth = 0.5, # axis.params = list(axis = "x", # title = "Log10(LDA)", # title.height = 0.01, # title.size = 2, # text.size = 1.8, # vjust = 1), # grid.params = list(linetype = 2) # ) # display the significant (FDR-->pvalue–>padj) taxonomy after kruskal.test (default) #shape = 21, #scale_size_continuous(range=c(1, 3)) + p6 <- p4 + ggnewscale::new_scale("size") + geom_point( data=td_filter(!is.na(Sign_SampleType)), mapping = aes(size = -log10(padj), fill = Sign_SampleType, ), shape = 21, ) + scale_size_continuous(range=c(1, 4)) + scale_fill_manual(values=c("#1B9E77", "#D95F02")) svg("diff_analysis.svg",width=22, height=22) #png("differently_expressed_otu.png", width=2000, height=2000) p6 + theme( legend.key.height = unit(1.0, "cm"), legend.key.width = unit(1.0, "cm"), legend.spacing.y = unit(0.01, "cm"), legend.text = element_text(size = 20), legend.title = element_text(size = 20) #legend.position = c(0.99, 0.01) ) dev.off()

MicrobiotaProcess for GPA vs control

custom baits = custom oligos

differential_abundance_analysis_GPA_vs_control

https://bioconductor.org/packages/release/bioc/vignettes/MicrobiotaProcess/inst/doc//MicrobiotaProcess.html

1, prepare the R environment

    #Rscript MicrobiotaProcess.R
    #NOTE: exit R script, then login again R-environment; rm -rf Phyloseq*_cache

    # -- using R under base environment --
    #(base) jhuang@WS-2290C:~/DATA_A/Data_Nicole8_Lamprecht_new_PUBLISHED/core_diversity_e1300
    #mkdir figures
    # under (base) using "/home/jhuang/miniconda3/lib/R/library"
    # NOTE: we need to update the sample names in Phyloseq.Rmd in the last chapter "Differential abundance analysis", let the GPA_vs_control at the end!
    rmarkdown::render('Phyloseq.Rmd',output_file='Phyloseq.html')
    # run at core_diversity_e1300, then copy results to directory MicrobiotaProcess_GPA_RA

2, bridges other tools

    library(MicrobiotaProcess)
    library(microeco)
    library(ggalluvial)
    library(ggh4x)
    library(gghalves)
    library(tidyr)

    ps.ng.tax_sel <- ps.ng.tax_abund
    #Choose all samples
    otu_table(ps.ng.tax_sel) <- otu_table(ps.ng.tax_abund)[,c("micro1", "micro3", "micro4", "micro6", "micro7", "micro12", "micro13", "micro16", "micro17", "mw001", "mw004", "mw005", "mw006", "mw007", "mw009", "mw010", "mw013", "mw014", "mw015", "mw017", "mw021",    "kg001", "kg002", "kg003", "kg004", "kg005", "kg007", "kg009", "kg015", "kg016", "kg019", "kg020", "kg021", "kg022", "kg023", "kg025", "kg026", "kg027", "kg028", "kg029")]
    mpse_abund <- ps.ng.tax_sel %>% as.MPSE()

3, rarefaction analysis

    mpse_abund %<>% mp_rrarefy()
    mpse_abund %<>%
        mp_cal_rarecurve(
            .abundance = RareAbundance,
            chunks = 400
        )

    p1 <- mpse_abund %>%
          mp_plot_rarecurve(
            .rare = RareAbundanceRarecurve,
            .alpha = Observe,
          )
    p2 <- mpse_abund %>%
          mp_plot_rarecurve(
            .rare = RareAbundanceRarecurve,
            .alpha = Observe,
            .group = SampleType
          ) +
          scale_color_manual(values=c("#1f78b4", "#e31a1c")) +
          scale_fill_manual(values=c("#1f78b4", "#e31a1c"), guide="none")

    glimpse(mpse_abund)
    mpse_abund %>% print(width=380, n=2)
    p3 <- mpse_abund %>%
          mp_plot_rarecurve(
            .rare = RareAbundanceRarecurve,
            .alpha = "Observe",
            .group = SampleType,
            plot.group = TRUE
          ) +
          scale_color_manual(values=c("#1f78b4", "#e31a1c")) +
          scale_fill_manual(values=c("#1f78b4", "#e31a1c"),guide="none")
    png("rarefaction_of_samples_or_groups.png", width=1080, height=600)
    p1 + p2 + p3
    dev.off()

alpha diversity analysis

4, calculate alpha index and visualization

    library(ggplot2)
    library(MicrobiotaProcess)
    mpse_abund %<>%
        mp_cal_alpha(.abundance=RareAbundance)
    mpse_abund
    #NOTE mpse_abund contains 28 varibles = 22 varibles + Observe 
, Chao1 , ACE , Shannon , Simpson , Pielou f1 <- mpse_abund %>% mp_plot_alpha( .group=SampleType, .alpha=c(Observe, Chao1, ACE, Shannon, Simpson, Pielou) ) + scale_fill_manual(values=c(“#1f78b4”, “#e31a1c”), guide=”none”) + scale_color_manual(values=c(“#1f78b4”, “#e31a1c”), guide=”none”) f2 <- mpse_abund %>% mp_plot_alpha( .alpha=c(Observe, Chao1, ACE, Shannon, Simpson, Pielou) ) #ps.ng.tax_sel contais only pre samples –> f1 cannot be generated! png(“alpha_diversity_comparison.png”, width=1400, height=600) f1 / f2 dev.off() 5, visualize taxonomy abundance (Class) mpse_abund %<>% mp_cal_abundance( # for each samples .abundance = RareAbundance ) %>% mp_cal_abundance( # for each groups .abundance=RareAbundance, .group=SampleType ) mpse_abund p1 <- mpse_abund %>% mp_plot_abundance( .abundance=RareAbundance, taxa.class = Class, topn = 20, relative = TRUE ) p2 <- mpse_abund %>% mp_plot_abundance( .abundance=RareAbundance, taxa.class = Class, topn = 20, relative = FALSE ) png(“relative_abundance_and_abundance.png”, width= 1200, height=600) #NOT PRODUCED! p1 / p2 dev.off() h1 <- mpse_abund %>% mp_plot_abundance( .abundance = RareAbundance, .group = SampleType, taxa.class = Class, relative = TRUE, topn = 20, geom = ‘heatmap’, features.dist = ‘euclidean’, features.hclust = ‘average’, sample.dist = ‘bray’, sample.hclust = ‘average’ ) h2 <- mpse_abund %>% mp_plot_abundance( .abundance = RareAbundance, .group = SampleType, taxa.class = Class, relative = FALSE, topn = 20, geom = ‘heatmap’, features.dist = ‘euclidean’, features.hclust = ‘average’, sample.dist = ‘bray’, sample.hclust = ‘average’ ) # the character (scale or theme) of figure can be adjusted by set_scale_theme # refer to the mp_plot_dist png(“relative_abundance_and_abundance_heatmap.png”, width= 1200, height=600) aplot::plot_list(gglist=list(h1, h2), tag_levels=”A”) dev.off() # visualize the relative abundance of top 20 class for each .group (SampleType) p3 <- mpse_abund %>% mp_plot_abundance( .abundance=RareAbundance, .group=SampleType, taxa.class = Class, topn = 20, plot.group = TRUE ) # visualize the abundance of top 20 phyla for each .group (time) p4 <- mpse_abund %>% mp_plot_abundance( .abundance=RareAbundance, .group= SampleType, taxa.class = Class, topn = 20, relative = FALSE, plot.group = TRUE ) png(“relative_abundance_and_abundance_groups.png”, width= 1000, height=1000) p3 / p4 dev.off() > beta diversity analysis 6, calculate the distance between samples or groups mpse_abund %<>% mp_decostand(.abundance=Abundance) mpse_abund %<>% mp_cal_dist(.abundance=hellinger, distmethod=”bray”) mpse_abund p1 <- mpse_abund %>% mp_plot_dist(.distmethod = bray) png(“distance_between_samples.png”, width= 1000, height=1000) p1 dev.off() # when .group is provided, the dot heatmap plot with group information will be return. p2 <- mpse_abund %>% mp_plot_dist(.distmethod = bray, .group = SampleType) # The scale or theme of dot heatmap plot can be adjusted using set_scale_theme function. p2 %>% set_scale_theme( x = scale_fill_manual( values=c(“#1f78b4”, “#e31a1c”), #c(“orange”, “deepskyblue”), guide = guide_legend( keywidth = 1, keyheight = 0.5, title.theme = element_text(size=8), label.theme = element_text(size=6) ) ), aes_var = SampleType # specific the name of variable ) %>% set_scale_theme( x = scale_color_gradient( guide = guide_legend(keywidth = 0.5, keyheight = 0.5) ), aes_var = bray ) %>% set_scale_theme( x = scale_size_continuous( range = c(0.1, 3), guide = guide_legend(keywidth = 0.5, keyheight = 0.5) ), aes_var = bray ) png(“distance_between_samples_with_group_info.png”, width= 1000, height=1000) p2 dev.off() # when .group is provided and group.test is TRUE, the comparison of different groups will be returned # Assuming p3 is a ggplot object after mp_plot_dist call p3 <- mpse_abund %>% mp_plot_dist(.distmethod = bray, .group = SampleType, group.test = TRUE, textsize = 6) + theme( axis.title.x = element_text(size = 14), # Customize x-axis label face = “bold” axis.title.y = element_text(size = 14), # Customize y-axis label axis.text.x = element_text(size = 14), # Customize x-axis ticks axis.text.y = element_text(size = 14) # Customize y-axis ticks ) # Save the plot with the new theme settings png(“Comparison_of_Bray_Distances.png”, width = 1000, height = 1000) print(p3) # Ensure that p3 is explicitly printed in the device dev.off() # Extract Bray-Curtis Distance Values and save them in a Excel-table. library(dplyr) library(openxlsx) # Define the sample numbers vector sample_numbers <- c("1","2","5","6","7", "29","30","31","32") # Consolidate the list of tibbles using the actual sample numbers bray_data <- bind_rows( lapply(seq_along(mpse_abund$bray), function(i) { tibble( Sample1 = sample_numbers[i], # Use actual sample number Sample2 = mpse_abund$bray[[i]]$braySampley, BrayDistance = mpse_abund$bray[[i]]$bray ) }), .id = "PairID" ) # Print the data frame to check the output print(bray_data) # Write the data frame to an Excel file write.xlsx(bray_data, file = "Bray_Curtis_Distances.xlsx") #DELETE the column "PairID" in Excel file 7, the PCoA analysis #install.packages("corrr") library(corrr) #install.packages("ggside") library(ggside) mpse_abund %<>% mp_cal_pcoa(.abundance=hellinger, distmethod=”bray”) # The dimensions of ordination analysis will be added the colData slot (default). mpse_abund methods(class=class(mpse_abund)) mpse_abund %>% print(width=380, n=2) #NOTE mpse_abund contains 34 varibles = 31 varibles + `PCo1 (30.16%)` , `PCo2 (15.75%)` , `PCo3 (10.53%)` + [Domain … Species] # We also can perform adonis or anosim to check whether it is significant to the dissimilarities of groups. mpse_abund %<>% mp_adonis(.abundance=hellinger, .formula=~SampleType, distmethod=”bray”, permutations=9999, action=”add”) mpse_abund %>% mp_extract_internal_attr(name=adonis) #PAUSE p1 <- mpse_abund %>% mp_plot_ord( .ord = pcoa, .group = SampleType, .color = SampleType, .size = 2.4, .alpha = 1, ellipse = TRUE, show.legend = FALSE # don’t display the legend of stat_ellipse ) + scale_fill_manual( #values = c(“#a6cee3”, “#1f78b4”, “#b2df8a”, “#33a02c”, “#fb9a99”, “#e31a1c”, “#cab2d6”, “#6a3d9a”), #values = c(“#a6cee3”, “#b2df8a”, “#fb9a99”, “#cab2d6”), values = c(“#1f78b4”, “#e31a1c”), guide = guide_legend(keywidth=1.6, keyheight=1.6, label.theme=element_text(size=12)) ) + scale_color_manual( #values=c(“#a6cee3”, “#1f78b4”, “#b2df8a”, “#33a02c”, “#fb9a99”, “#e31a1c”, “#cab2d6”, “#6a3d9a”), #values = c(“#a6cee3”, “#b2df8a”, “#fb9a99”, “#cab2d6”), values = c(“#1f78b4”, “#e31a1c”), guide = guide_legend(keywidth=1.6, keyheight=1.6, label.theme=element_text(size=12)) ) pdf(“PCoA.pdf”) p1 dev.off() # The size of point also can be mapped to other variables such as Observe, or Shannon # Then the alpha diversity and beta diversity will be displayed simultaneously. p2 <- mpse_abund %>% mp_plot_ord( .ord = pcoa, .group = SampleType, .color = SampleType, .size = Shannon, .alpha = Observe, ellipse = TRUE, show.legend = FALSE # don’t display the legend of stat_ellipse ) + scale_fill_manual( values = c(“#1f78b4”, “#e31a1c”), #only needs four colors. #values = c(“#FF0000”, “#000000”, “#0000FF”, “#C0C0C0”, “#00FF00”, “#FFFF00”, “#00FFFF”, “#FFA500”), guide = guide_legend(keywidth=0.6, keyheight=0.6, label.theme=element_text(size=8)) ) + scale_color_manual( values = c(“#1f78b4”, “#e31a1c”), #only needs four colors. #values=c(“#FF0000”, “#000000”, “#0000FF”, “#C0C0C0”, “#00FF00”, “#FFFF00”, “#00FFFF”, “#FFA500”), guide = guide_legend(keywidth=0.6, keyheight=0.6, label.theme=element_text(size=8)) ) + scale_size_continuous( range=c(0.5, 3), guide = guide_legend(keywidth=0.6, keyheight=0.6, label.theme=element_text(size=8)) ) pdf(“PCoA2.pdf”) p2 dev.off() # Add the sample name as text labels library(ggrepel) p2 <- mpse_abund %>% mp_plot_ord( .ord = pcoa, .group = SampleType, .color = SampleType, .size = Shannon, .alpha = Observe, ellipse = TRUE, show.legend = FALSE # don’t display the legend of stat_ellipse ) + geom_text_repel(aes(label = ifelse(Sample == “1”, “1”, Sample)), # Prioritize “1” size = 3, color = “black”, # Set the label color to black for better visibility max.overlaps = Inf, # Allow maximum labels force = 2, # Increase the force to push labels apart box.padding = 0.5, # Add more padding around the labels segment.size = 0.2 # Line segment size connecting labels to points ) + scale_fill_manual( values = c(“#1f78b4”, “#e31a1c”), # only needs two colors guide = guide_legend(keywidth=0.6, keyheight=0.6, label.theme=element_text(size=8)) ) + scale_color_manual( values = c(“#1f78b4”, “#e31a1c”), # only needs two colors guide = guide_legend(keywidth=0.6, keyheight=0.6, label.theme=element_text(size=8)) ) + scale_size_continuous( range=c(0.5, 3), guide = guide_legend(keywidth=0.6, keyheight=0.6, label.theme=element_text(size=8)) ) #pdf(“PCoA2_labeled.pdf”) png(“PCoA2_labeled.png”, width=800, height=800) p2 dev.off() 8, hierarchical cluster (tree) analysis #input should contain hellinger! mpse_abund %<>% mp_cal_clust( .abundance = hellinger, distmethod = “bray”, hclustmethod = “average”, # (UPGAE) action = “add” # action is used to control which result will be returned ) mpse_abund mpse_abund %>% print(width=380, n=2) #NOTE mpse_abund contains 34 varibles, no new variable, the column bray has been new calculated! # if action = ‘add’, the result of hierarchical cluster will be added to the MPSE object # mp_extract_internal_attr can extract it. It is a treedata object, so it can be visualized # by ggtree. sample.clust <- mpse_abund %>% mp_extract_internal_attr(name=’SampleClust’) #The object contained internal attribute: PCoA ADONIS SampleClust sample.clust #–> The associated data tibble abstraction: 27 × 30 library(ggtree) p <- ggtree(sample.clust) + geom_tippoint(aes(color=SampleType)) + geom_tiplab(as_ylab = TRUE) + ggplot2::scale_x_continuous(expand=c(0, 0.01)) png("hierarchical_cluster1.png", width= 1000, height=800) p dev.off() #https://bioconductor.org/packages/release/bioc/vignettes/MicrobiotaProcess/inst/doc//MicrobiotaProcess.html # mapping = aes(x = RelRareAbundanceBySample-->SampleType, # y = Sample–>SampleType, # fill = Phyla # ), library(ggtreeExtra) library(ggplot2) # Extract relative abundance of phyla phyla.tb <- mpse_abund %>% mp_extract_abundance(taxa.class=Phylum, topn=30) # The abundance of each samples is nested, it can be flatted using the unnest of tidyr. phyla.tb %<>% tidyr::unnest(cols=RareAbundanceBySample) %>% dplyr::rename(Phyla=”label”) phyla.tb phyla.tb %>% print(width=380, n=10) p1 <- p + geom_fruit( data=phyla.tb, geom=geom_col, mapping = aes(x = RelRareAbundanceBySample, y = Sample, fill = Phyla ), orientation = "y", #offset = 0.4, pwidth = 3, axis.params = list(axis = "x", title = "The relative abundance of phyla (%)", title.size = 4, text.size = 2, vjust = 1), grid.params = list() ) png("hierarchical_cluster2_Phyla.png", width = 1000, height = 800) p1 dev.off() # Extract relative abundance of classes class.tb <- mpse_abund %>% mp_extract_abundance(taxa.class = Class, topn = 30) # Flatten and rename the columns class.tb %<>% tidyr::unnest(cols = RareAbundanceBySample) %>% dplyr::rename(Class = “label”) # View the data frame class.tb # Create the plot p1 <- p + geom_fruit( data = class.tb, geom = geom_col, mapping = aes(x = RelRareAbundanceBySample, y = Sample, fill = Class ), orientation = "y", pwidth = 3, axis.params = list(axis = "x", title = "The relative abundance of classes (%)", title.size = 4, text.size = 2, vjust = 1), grid.params = list() ) # Save the plot to a file #ERROR-->NEED to be DEBUGGED! png(“hierarchical_cluster2_Class.png”, width = 1000, height = 800) print(p1) dev.off() 9, biomarker discovery (update Sign_Group to Sign_SampleType, RareAbundanceByGroup to RareAbundanceBySampleType) library(ggtree) library(ggtreeExtra) library(ggplot2) library(MicrobiotaProcess) library(tidytree) library(ggstar) library(forcats) library(writexl) mpse_abund %>% print(width=150) #mpse_abund %<>% # mp_cal_abundance( # for each samples # .abundance = RareAbundance # ) %>% # mp_cal_abundance( # for each groups # .abundance=RareAbundance, # .group=SampleType # ) #mpse_abund mpse_abund %<>% mp_diff_analysis( .abundance = RelRareAbundanceBySample, .group = SampleType, cl.min = 4, first.test.alpha = 0.01, filter.p=”pvalue” ) # The result is stored to the taxatree or otutree slot, you can use mp_extract_tree to extract the specific slot. taxa.tree <- mpse_abund %>% mp_extract_tree(type=”taxatree”) taxa.tree ## And the result tibble of different analysis can also be extracted with tidytree (>=0.3.5) #LDAupper, LDAmean, LDAlower, taxa.tree %>% select(label, nodeClass, Sign_SampleType, fdr) #%>% dplyr::filter(!is.na(fdr)) taxa.tree %>% print(width=150, n=200) # — replace the pvalue and fdr with pvalue and p-adjusted from DESeq enrichment results — tree_data <- as_tibble(taxa.tree) # ---- modify tree_data by left_joining with sigtab and updating Sign_SampleType ---- sigtab$label <- rownames(sigtab) write.xlsx(sigtab, file = "sigtab.xlsx") sum(sigtab$padj<0.05) #taxa.tree <- left_join(tree_data, sigtab[, c("label", "log2FoldChange", "pvalue", "padj")], by = 'label') %>% as.treedata taxa.tree2 <- tree_data %>% left_join(sigtab[, c(“label”, “baseMean”, “log2FoldChange”, “lfcSE”, “stat”, “pvalue”, “padj”)], by = “label”) %>% mutate(Sign_SampleType = case_when( log2FoldChange > 0 & padj <= 0.05 ~ "GPA", log2FoldChange < 0 & padj <= 0.05 ~ "control", TRUE ~ NA_character_ # Sets Sign_SampleType to NA otherwise )) %>% as.treedata() # Convert the dataframe to a treedata object # —- print taxa_data2 to Excel —- taxa.tree2 %>% print(width=380, n=20) taxa_data2 <- as_tibble(taxa.tree2) sum(!is.na(taxa_data2$Sign_SampleType)) sapply(taxa_data2, class) # Remove or transform list columns if not needed taxa_data2_simplified <- taxa_data2 %>% select(-RareAbundanceBySample, -RareAbundanceBySampleType) %>% mutate(across(where(is.list), ~toString(.))) # Convert lists to character strings if needed # Replace NA with a placeholder, such as “NA” or another suitable representation taxa_data2_simplified <- taxa_data2_simplified %>% mutate(across(everything(), ~ifelse(is.na(.), “NA”, .))) taxonomy_data <- as.data.frame(mp_extract_taxonomy(mpse_abund)) colnames(taxa_data2_simplified)[colnames(taxa_data2_simplified) == "label"] <- "OTU" combined_data <- left_join(taxa_data2_simplified, taxonomy_data, by = "OTU") write_xlsx(combined_data, "taxa_data2.xlsx") #(UNDER HOST-ENV) cp sigtab.xlsx diff_analysis_RA_vs_control.xlsx and then switch label as the 1st column and sort the columns by padj. # -- NOTE that sometimes the record in DESeq2 not occurs in the final list, since the statistics calculation of MicrobiotaProcess results in NA, e.g. the record FJ879443.1.1488, we can simply delete the record from diff_analysis_RA_vs_control.xlsx -- # ---- since taxa.tree is treedata object, it can be visualized by ggtree and ggtreeExtra ---- p1 <- ggtree( taxa.tree2, layout="radial", size = 0.3 ) + geom_point( data = td_filter(!isTip), fill="white", size=1, shape=21 ) # display the high light of phylum clade. p2 <- p1 + geom_hilight( data = td_filter(nodeClass == "Phylum"), mapping = aes(node = node, fill = label) ) # display the relative abundance of features(OTU) p3 <- p2 + ggnewscale::new_scale("fill") + geom_fruit( data = td_unnest(RareAbundanceBySample), geom = geom_star, mapping = aes( x = fct_reorder(Sample, SampleType, .fun=min), size = RelRareAbundanceBySample, fill = SampleType, subset = RelRareAbundanceBySample > 0 ), starshape = 13, starstroke = 0.25, offset = 0.03, pwidth = 0.4, grid.params = list(linetype=2) ) + scale_size_continuous( name=”Relative Abundance (%)”, range = c(.5, 3) ) + scale_fill_manual(values=c(“#1B9E77”, “#D95F02”)) # display the tip labels of taxa tree p4 <- p3 + geom_tiplab(size=6, offset=4.0) # display the LDA of significant OTU. #p5 <- p4 + # ggnewscale::new_scale("fill") + # geom_fruit( # geom = geom_col, # mapping = aes( # x = LDAmean, # fill = Sign_SampleType, # subset = !is.na(LDAmean) # ), # orientation = "y", # offset = 0.3, # pwidth = 0.5, # axis.params = list(axis = "x", # title = "Log10(LDA)", # title.height = 0.01, # title.size = 2, # text.size = 1.8, # vjust = 1), # grid.params = list(linetype = 2) # ) # display the significant (FDR-->pvalue–>padj) taxonomy after kruskal.test (default) #shape = 21, #scale_size_continuous(range=c(1, 3)) + p6 <- p4 + ggnewscale::new_scale("size") + geom_point( data=td_filter(!is.na(Sign_SampleType)), mapping = aes(size = -log10(padj), fill = Sign_SampleType, ), shape = 21, ) + scale_size_continuous(range=c(1, 4)) + scale_fill_manual(values=c("#1B9E77", "#D95F02")) svg("diff_analysis.svg",width=22, height=22) #png("differently_expressed_otu.png", width=2000, height=2000) p6 + theme( legend.key.height = unit(1.0, "cm"), legend.key.width = unit(1.0, "cm"), legend.spacing.y = unit(0.01, "cm"), legend.text = element_text(size = 20), legend.title = element_text(size = 20) #legend.position = c(0.99, 0.01) ) dev.off()

Monkeypox Virus Amplicon Panel (Baits)

Integrated DNA Technologies: https://www.idtdna.com/pages/products/next-generation-sequencing/workflow/xgen-ngs-amplicon-sequencing/predesigned-amplicon-panels/xgen-monkeypox-virus-amplicon-panel

Product details of xGen™ Monkeypox Virus Amplicon Panel

  • IDT is committed to providing quality products to researchers working on the cutting edge of scientific discovery.

  • The xGen Monkeypox Virus Amplicon Panel was designed as part of the next generation sequencing (NGS) Tech Access program at IDT, which is intended to accelerate innovation by enabling earlier access to our most advanced research tools.

  • Tech Access products have not been through our standard, rigorous development cycle.

  • These products are particularly well suited for researchers who require the most up-to-date technology to unlock new discoveries.

  • The current variants of monkeypox that are circulating have a genome of nearly 200 kb double-stranded DNA [1].

  • Surveillance of the virus and any potential mutations have gained international support due to the lessons learned through the COVID-19 pandemic.

  • The IDT xGen Monkeypox Virus Amplicon Panel helps to enable researchers to track monkeypox strains, including potential new variants, by next generation sequencing.

  • DNA-to-sequencer in 2.5 hours

  • The workflow for the xGen Monkeypox Virus Amplicon Panel starts with extracted viral DNA (Figure 1). You can then generate an NGS library in a single tube using tiled primer pairs designed to target 184 kb of the monkeypox genome.

  • Primers were designed for the currently circulating strain of the monkeypox virus (NCBI accession number ON568298 [1]), which allows you to generate overlapping amplicons in a single-tube, PCR 1 + PCR 2 workflow.

  • If pooling multiple samples for NGS, the xGen Amplicon Core Kit includes the reagents for Normalase™ technology, the proprietary enzymatic normalization step that reduces hands-on time needed for manual normalization.

  • Specifications of the Tech Access, xGen Monkeypox Virus Amplicon Panel are found in Table 1.

  • Please note that the term amplicons in the product refers to the same concept as the baits I mentioned in my talk.

  • Table 1. Features of the xGen Monkeypox Virus Panel.

  • Features Specifications

  • Design coverage and panel information Comprehensive coverage from positions 6760–190,905. (ITRs not included); 1892 amplicons, sized 93–246 bp (average size is 150 bp)

  • Input Material Extracted viral DNA; Suggested minimum of 300 viral genome copies

  • Time ~2.5 hours for viral DNA-to-library

  • Multiplexing capability Up to 1536 UDIs

  • Compatible with other indexes? Yes

  • Recommended depth

  • Strain identification or variant calling: 500K reads per library. However, we have 70K(70896)x2 reads in the sample Affe31!

  • UDI stands for Unique Dual Index. This refers to a method used in next-generation sequencing (NGS) where each sample is tagged with a unique combination of two indices (dual indices) to enable multiplexing, i.e., the simultaneous sequencing of multiple samples in a single run. The dual indexing helps to distinguish different samples and minimizes the risk of index hopping or misassignments.

  • Index hopping(索引跳跃) 是下一代测序(Next-Generation Sequencing, NGS)中的一种现象。在这种情况下,用于标识不同样本的索引(条形码)会意外地在样本之间“跳跃”。这意味着某个样本的索引对(双索引)可能会与其他样本混合,从而导致测序数据错误地分配。具体来说,索引跳跃会导致特定样本的序列与错误的条形码关联。例如,如果样本A的索引与样本B的序列混淆,最终的分析可能会错误地将样本B的序列归类为样本A的序列。这种现象在使用某些测序平台(如Illumina NovaSeq)时尤其明显,因为在聚类或扩增过程中,适配子分子可能会交换条形码。为降低索引跳跃的风险,研究人员可以使用双重索引(即使用两个索引)和特定的文库准备方法。

Product data of xGen™ Monkeypox Virus Amplicon Panel

  • The recent emergence of monkeypox viral infections globally has resulted in an increased need for rapid, reliable NGS approaches to not only monitor and trace outbreaks but to also track any potentially novel variants that may arise.

  • Epidemiological studies are currently underway to pinpoint transmission and infection patterns of this zoonotic disease.

  • IDT recognizes the importance of these studies and has designed an xGen NGS Amplicon Sequencing Panel to target the monkeypox virus†.

  • The xGen Monkeypox Virus Amplicon Panel offers a streamlined (DNA-to-sequencer in 2.5 hours), single-tube NGS workflow for studying the monkeypox virus (MPXV).

  • This Predesigned xGen Amplicon Panel provides 184 kb of high-quality coverage of the monkeypox genome (Table 2, Figure 2 and Figure 3) from inputs as low as 300 viral genome copies (Table 3).

  • xGen Amplicon technology includes amplicon tiling and creation of super amplicons to ensure comprehensive genome coverage and provide resistance to future viral mutations that may fall on a priming site (Figure 4), thus enabling future identification of novel variants.

  • Coverage and on-target mapping rates

  • Based on initial research and development, this panel has been shown to offer comprehensive coverage of the monkeypox viral genome from positions 6760–190,905. * !! The inverted terminal repeats (ITRs) at both ends of the genome were omitted from the panel design due to the repetitive nature of these sequences. !!

  • proprietary 英 [prəˈpraɪətri] adj. 专有的,专利的; 所有(人)的; (商品)专卖的

  • To prepare amplicon sequencing libraries using the xGen Monkeypox Virus Amplicon Panel, ~3000 copies of the monkeypox genome (BEI Resources) and 10 ng Coriell DNA NA12878 (human) were used.

  • The resulting NGS library was sequenced on a MiniSeq™ system (Illumina) with 150 bp paired-end (PE) sequencing with 1,774,058 total reads.

  • Reads were aligned and mapped to the monkeypox reference genome (DQ011157 [2]) using bwa (v 2.2.1 [3]).

  • Table 2 shows representative metrics obtained in this proof-of-concept experiment.

  • Table 2. xGen Monkeypox Virus Amplicon Panel NGS metrics.

  • name | % mapping | % on-target (base) | % base uniformity (>0.2X mean)

  • xGen Monkeypox Virus Amplicon Panel | 88.1 | 97.7 | 98.0

  • Sequencing results from titers (浓度测定,滴定量) as low as 300 viral genome copies

  • The input into xGen Monkeypox Virus Amplicon Panel consisted of either 3000, 300, or 0 copies of the monkeypox genome (BEI Resources) and 10 ng Coriell DNA NA12878 (human).

  • The resulting NGS library was sequenced as described above.

  • Reads were aligned and mapped to the monkeypox reference genome (DQ011157 [2]) using bwa (v 2.2.1 [3]).

  • A high level of genomic coverage was observed with 3000 and 300 monkeypox genomic copies (Table 3), and no genomic coverage was observed with no monkeypox copy input.

  • Table 3. The xGen Monkeypox Virus Amplicon Panel provides coverage for a range of viral DNA inputs.

  • 他们的depth是 1-2 million reads, 但是你的depth只有70K!

  • Sample number | Input copies of viral genomes | Total reads | Percent target bases | 10X (base)

  • 1 3000 2,374,500 99.4

  • 2 2,725,540 99.4

  • 3 300 1,984,616 98.8

  • 4 1,963,414 98.7

  • 5 0/NTC 2,183,412 0.2

  • 6 2,464,906 0.3

  • Super amplicons help maintain sequencing coverage despite mutations

  • Figure 4. The xGen Monkeypox Virus Amplicon Panel maintains genomic coverage despite mutations at primer binding sites.

  • The generation of super amplicons means that even in the case of a mutation occurring in primer binding sites (shown here by black arrows), the xGen Monkeypox Virus Amplicon Panel can maintain genomic coverage.

  • The input into library prep consisted of ~3000 copies of the monkeypox genome (BEI Resources) and 10 ng Coriell DNA NA12878 (human).

  • The resulting NGS library was sequenced on a MiniSeq system (Illumina) (150 bp PE sequencing) with 1,774,058 total reads.

  • Reads were aligned and mapped to the monkeypox reference genome (DQ011157 [2]) using bwa (v 2.2.1 [3]) and coverage was visualized with IGV (Broad Institute [4]).

https://virological.org/t/first-german-genome-sequence-of-monkeypox-virus-associated-to-multi-country-outbreak-in-may-2022/812

  • Since early May 2022, dozens of suspected and confirmed monkeypox infections have been reported in several European and North American countries.

  • The first German monkeypox case was reported in Munich on May 19, where the 26 year-old patient had shown characteristic skin changes.

  • During his travel through Europe, he finally showed mild symptoms and requested medical examination.

  • The Bundeswehr Institute of Microbiology performed primary diagnostics of a swab taken from a skin lesion and subsequently sequenced the sample.

  • Here, we announce the available data to the scientific community.

  • DNA extraction was performed using DNeasy Mini Kit (Qiagen, Hilden, Germany) from clinical material and eluted in 100µl EB-Buffer.

  • DNA concentrations were quantified using the Qubit dsDNA HS Assay Kit (Thermo Fisher Scientific, Dreieich, Germany) according to the manufacturers’ protocols.

  • From total DNA, an Illumina-compatible library was prepared (NEBNext® Ultra™ II DNA Library Prep Kit, NEB (New England Bialabs), Frankfurt am Main, Germany) and sequenced on a MiSeq instrument (Illumina, San Diego, CA, USA) using 2x 150bp v2 chemistry in order to obtained paired-end reads.

  • Raw reads were assigned by Kraken 2 [1] and human reads were discarded.

  • Remaining paired-end reads were assembled de-novo using an in-house script based on the SPAdes assembler [2] in single-cell mode.

  • In addition, viral reads were mapped to MPXV_USA_2022_MA001 (AccNo:ON563414 77) for validation and manual curation of the reported genome sequence.

  • Afterwards, a contig extension was performed using the previously assembled contigs.

  • The full-length genome comprises 197.378 bp and was sequenced directly from clinical material.

  • BLAST analysis [3] and phylogenetic inference support the classification of this isolate into the West-African clade associated with the recent isolates from Europe and the US.

  • The nearest neighbor is PT0010/2022, an isolate from Portugal (published May 23 by INSA).

  • All of the identified SNPs compared to the MPXV_UK_P2 34 are either TC→TT or GA→AA in dinucleotide context and potentially caused by APOBEC3 as hypothesized by Rambaut.

  • In addition, a 10 bp deletion (CAATCTTTCT) was discovered at 133.175 bp which is part of an exact tandem repeat or an inexact triplet repeat upstream of a hypothetical protein.

  • Interestingly, this duplication is not annotated in the recently published CDC strain ON563414 77 and the Belgium strain ITM_MPX_1_Belgium but in all sequences from Portugal (PT0001-PT0009 4).

  • The genome sequence was submitted to NCBI Nucleotide (ON568298 159) and will be updated (if needed) as soon as data from the grown cell-cultures are available.

  • Phylogenetic analysis is based on an incremental analysis of all publicly available full-length (>180 kbp) sequences acquired via NCBI Nucleotide or shared here by colleagues on virological.org 19.

  • Recombinant or apparently partially older sequences were discarded.

  • Multiple sequence alignments were carried out using MAFFT v7.490 (options: –auto –6merpair; [4]).

  • Phylogenetic inference with maximum likelihood as implemented in fasttree [5] using the GTRCAT model with subsequent rescaling of branch lengths optimizing a discrete gamma model with 20 rate categories (fasttreemp option: -nt -gtr –gamma; binary compiled with -DUSE_DOUBLE).

  • The full tree was rooted at the separation between the Central and West African clades and sequences of the West African Clade were selected for a reanalysis that included also the most recent sequences from Portugal 4.

  • It was rooted at the midpoint and is shown below.

Enriching Monkeypox virus using xGen™ Monkeypox Virus Amplicon Panel

https://www.idtdna.com/pages/products/next-generation-sequencing/workflow/xgen-ngs-amplicon-sequencing/predesigned-amplicon-panels/xgen-monkeypox-virus-amplicon-panel

Monkeypox has been termed a global health emergency. For researchers interested in tracking the evolution of the monkeypox virus genome, it can be difficult to get sufficient coverage. The xGen Monkeypox Virus Amplicon Panel provides a single-tube, two-step PCR amplification workflow with primer sets designed to create amplicons across the entire genome.*

If coverage is too low, you may need to sequence more reads to ensure uniform coverage. For many assembly algorithms, 30x coverage or higher is recommended.

Query Seq-id Start of alignment in query End of alignment in query Start of alignment in subject End of alignment in subject Expect value Alignment length Percentage of identical matches Subject Seq-id Subject Title Query Coverage Per Subject Query Coverage Per HSP Subject accession Subject sequence length Query sequence length

  1. Using bacto pipeline to get trimmed reads

    cp /home/jhuang/Tools/bacto/bacto-0.1.json .
    cp /home/jhuang/Tools/bacto/cluster.json .
    cp /home/jhuang/Tools/bacto/Snakefile .
    ln -s /home/jhuang/Tools/bacto/local .
    ln -s /home/jhuang/Tools/bacto/db .
    ln -s /home/jhuang/Tools/bacto/envs .
    
    mkdir raw_data; cd raw_data
    ln -s ../raw_data_downloads/Affe30_S1_L001_R1_001.fastq.gz Affe30_R1.fastq.gz
    ln -s ../raw_data_downloads/Affe30_S1_L001_R2_001.fastq.gz Affe30_R2.fastq.gz
    ln -s ../raw_data_downloads/Affe31_S2_L001_R1_001.fastq.gz Affe31_R1.fastq.gz
    ln -s ../raw_data_downloads/Affe31_S2_L001_R2_001.fastq.gz Affe31_R2.fastq.gz
    
    (bengal3_ac3) /home/jhuang/miniconda3/envs/snakemake_4_3_1/bin/snakemake --printshellcmds
  2. Filtering low complexity

    fastp -i Affe30_trimmed_P_1.fastq -I Affe30_trimmed_P_2.fastq -o Affe30_trimmed_R1.fastq -O Affe30_trimmed_R2.fastq --low_complexity_filter --complexity_threshold 30
    fastp -i Affe31_trimmed_P_1.fastq -I Affe31_trimmed_P_2.fastq -o Affe31_trimmed_R1.fastq -O Affe31_trimmed_R2.fastq --low_complexity_filter --complexity_threshold 30
  3. Using vrap to assembly and annotate the contigs, the spades-step was replaced with idba of DAMIAN

    #--host /home/jhuang/REFs/genome.fa -n /mnt/nvme0n1p1/blast/nt -a /mnt/nvme0n1p1/blast/nr
    vrap/vrap.py  -1 Affe30_trimmed_R1.fastq.gz -2 Affe30_trimmed_R2.fastq.gz -o Affe30_trimmed_vrap_out   -t 40 -l 100
    vrap/vrap.py  -1 Affe31_trimmed_R1.fastq.gz -2 Affe31_trimmed_R2.fastq.gz -o Affe31_trimmed_vrap_out   -t 40 -l 100
    
    #--> ERROR in spades-assembly, we usding idba from DAMIAN assembly, copy the assembly to spades. IT WORKS!
    damian.rb --host human3 --type dna -1 /home/jhuang/DATA/Data_Susanne_MPox/Affe31_trimmed_R1.fastq.gz -2 /home/jhuang/DATA/Data_Susanne_MPox/Affe31_trimmed_R2.fastq.gz --sample Affe31_megablast --blastn never --blastp never --min_contiglength 100 --threads 56 --force
    damian.rb --host human3 --type dna -1 /home/jhuang/DATA/Data_Susanne_MPox/Affe30_trimmed_R1.fastq.gz -2 /home/jhuang/DATA/Data_Susanne_MPox/Affe30_trimmed_R2.fastq.gz --sample Affe30_megablast --blastn never --blastp never --min_contiglength 100 --threads 56 --force
    damian.rb --host human3 --type dna -1 /home/jhuang/DATA/Data_Susanne_MPox/Affe31_trimmed_R1.fastq.gz -2 /home/jhuang/DATA/Data_Susanne_MPox/Affe31_trimmed_R2.fastq.gz --sample Affe31_blastn --blastn progressive --blastp never --min_contiglength 100 --threads 56 --force
    damian.rb --host human3 --type dna -1 /home/jhuang/DATA/Data_Susanne_MPox/Affe30_trimmed_R1.fastq.gz -2 /home/jhuang/DATA/Data_Susanne_MPox/Affe30_trimmed_R2.fastq.gz --sample Affe30_blastn --blastn progressive --blastp never --min_contiglength 100 --threads 56 --force
    cp ~/rtpd_files/Affe30_megablast/idba_ud_assembly/contig.fa contigs.fasta
    cp ~/rtpd_files/Affe31_megablast/idba_ud_assembly/contig.fa contigs.fasta
    
    vrap/vrap.py  -1 Affe30_trimmed_R1.fastq.gz -2 Affe30_trimmed_R2.fastq.gz -o Affe30_trimmed_vrap_out   -t 40 -l 100
    vrap/vrap.py  -1 Affe31_trimmed_R1.fastq.gz -2 Affe31_trimmed_R2.fastq.gz -o Affe31_trimmed_vrap_out   -t 40 -l 100
  4. Step-by-Step Process for read and contig mapping profiling plots

    a. mapping the contig on the reference JX878414
    
            bowtie2-build JX878414.1.fasta JX878414.1_index
    
            bowtie2 -f -x JX878414.1_index -U Affe30_trimmed_vrap_out/spades/contigs.fasta -S Affe30_contigs_aligned.sam
            samtools view -S -b Affe30_contigs_aligned.sam > Affe30_contigs_aligned.bam
            samtools sort Affe30_contigs_aligned.bam -o Affe30_contigs_aligned_sorted.bam
            samtools index Affe30_contigs_aligned_sorted.bam
    
            bowtie2 -f -x JX878414.1_index -U Affe31_trimmed_vrap_out/spades/contigs.fasta -S Affe31_contigs_aligned.sam
            samtools view -S -b Affe31_contigs_aligned.sam > Affe31_contigs_aligned.bam
            samtools sort Affe31_contigs_aligned.bam -o Affe31_contigs_aligned_sorted.bam
            samtools index Affe31_contigs_aligned_sorted.bam
    
            62 reads; of these:
            62 (100.00%) were unpaired; of these:
                1 (1.61%) aligned 0 times
                61 (98.39%) aligned exactly 1 time
                0 (0.00%) aligned >1 times
            98.39% overall alignment rate
            17 reads; of these:
            17 (100.00%) were unpaired; of these:
                1 (5.88%) aligned 0 times
                16 (94.12%) aligned exactly 1 time
                0 (0.00%) aligned >1 times
            94.12% overall alignment rate
    
    b. candidate methods for mapping the contigs on a reference
    
            #1. Minimap2 – Efficient for aligning large contigs or entire genomes.
            minimap2 -ax asm5 JX878414.1.fasta contigs_Affe30.fasta > output.sam
    
            #2. BLAST – More sensitive but slower; good for divergent sequences.
            blastn -query contigs_Affe30.fasta -db JX878414.1.fasta -out output.blast -outfmt 6
    
            #3. LAST – Suitable for more distant relationships and large genomic changes.
            lastdb my_reference_db JX878414.1.fasta
            lastal my_reference_db contigs_Affe30.fasta > output.maf
    
            #4. MUMmer (nucmer) – Best for comparing large contigs or entire genomes, especially with structural variations.
            nucmer JX878414.1.fasta contigs_Affe30.fasta -p output
    
    c. Index the Reference Genome (JX878414.1.fasta): You'll need to index your reference genome to align the contigs and reads against it.
    
            samtools faidx JX878414.1.fasta
    
    d. Generate Coverage Profile for Reads (from Fastq): Align the trimmed fastq reads (Affe31_trimmed_R1.fastq.gz and Affe31_trimmed_R2.fastq.gz) to the reference genome using a mapper like BWA or Bowtie2.
    
            bwa index JX878414.1.fasta
            bwa mem JX878414.1.fasta Affe31_trimmed_R1.fastq.gz Affe31_trimmed_R2.fastq.gz > Affe31_reads_aligned.sam
    
        Convert the SAM file to BAM and sort it:
    
            samtools view -Sb Affe31_reads_aligned.sam | samtools sort -o Affe31_reads_aligned_sorted.bam
            samtools index Affe31_reads_aligned_sorted.bam
            #6743 + 0 in total (QC-passed reads + QC-failed reads)
            #0 + 0 secondary
            #9 + 0 supplementary
            #0 + 0 duplicates
            #5312 + 0 mapped (78.78% : N/A)
            #6734 + 0 paired in sequencing
            #3367 + 0 read1
            #3367 + 0 read2
            #4822 + 0 properly paired (71.61% : N/A)
            #4844 + 0 with itself and mate mapped
            #459 + 0 singletons (6.82% : N/A)
            #0 + 0 with mate mapped to a different chr
            #0 + 0 with mate mapped to a different chr (mapQ>=5)
    
    e. Generate Coverage Profile for Contigs: The file Affe31_output_sorted.bam is already aligned against the reference genome. You can directly use it to visualize contig coverage.
    
        Ensure the BAM file is indexed:
    
            samtools index Affe31_contigs_aligned_sorted.bam
    
    f. Generate Coverage Tracks: Use BamCoverage to generate coverage files (in bigWig format) for both the reads and contigs.
    
        For reads coverage:
    
            bamCoverage -b Affe31_reads_aligned_sorted.bam -o Affe31_reads_coverage.bw
    
        For contigs coverage:
    
            bamCoverage -b Affe31_contigs_aligned_sorted.bam -o Affe31_contigs_coverage.bw
    
    g. Generate Coverage Profile for Reads (from Fastq): Align the trimmed fastq reads (Affe30_trimmed_R1.fastq.gz and Affe30_trimmed_R2.fastq.gz) to the reference genome using a mapper like BWA or Bowtie2.
    
            bwa index JX878414.1.fasta
            bwa mem JX878414.1.fasta Affe30_trimmed_R1.fastq.gz Affe30_trimmed_R2.fastq.gz > Affe30_reads_aligned.sam
    
        Convert the SAM file to BAM and sort it:
    
            samtools view -Sb Affe30_reads_aligned.sam | samtools sort -o Affe30_reads_aligned_sorted.bam
            samtools index Affe30_reads_aligned_sorted.bam
            #20556 + 0 in total (QC-passed reads + QC-failed reads)
            #0 + 0 secondary
            #42 + 0 supplementary
            #0 + 0 duplicates
            #13645 + 0 mapped (66.38% : N/A)
            #20514 + 0 paired in sequencing
            #10257 + 0 read1
            #10257 + 0 read2
            #12582 + 0 properly paired (61.33% : N/A)
            #12648 + 0 with itself and mate mapped
            #955 + 0 singletons (4.66% : N/A)
            #0 + 0 with mate mapped to a different chr
            #0 + 0 with mate mapped to a different chr (mapQ>=5)
    
    h. Generate Coverage Profile for Contigs: The file Affe30_output_sorted.bam is already aligned against the reference genome. You can directly use it to visualize contig coverage.
    
        Ensure the BAM file is indexed:
    
            samtools index Affe30_contigs_aligned_sorted.bam
    
    i. Generate Coverage Tracks: Use BamCoverage to generate coverage files (in bigWig format) for both the reads and contigs.
    
        For reads coverage:
    
            bamCoverage -b Affe30_reads_aligned_sorted.bam -o Affe30_reads_coverage.bw
    
        For contigs coverage:
    
            bamCoverage -b Affe30_contigs_aligned_sorted.bam -o Affe30_contigs_coverage.bw
    
    j. calculate coverages
    
            samtools depth -a Affe30_reads_aligned_sorted.bam > coverage.txt
            awk '{sum+=$3} END {print "Average coverage = ",sum/NR}' coverage.txt
            #Average coverage =  26.6073
            samtools depth -a Affe31_reads_aligned_sorted.bam > coverage.txt
            awk '{sum+=$3} END {print "Average coverage = ",sum/NR}' coverage.txt
            Average coverage =  22.0228
    
    k. Visualize Alignments: Use tools like IGV (Integrative Genomics Viewer) or the following python scripts to visualize the alignment.
    
            import matplotlib.pyplot as plt
            import pandas as pd
            import pysam
    
            # File paths
            contig_bam_file = 'Affe30_contigs_aligned_sorted.bam'
            reads_bam_file = 'Affe30_reads_aligned_sorted.bam'
    
            # Function to calculate coverage from BAM file
            def calculate_coverage(bam_file):
                samfile = pysam.AlignmentFile(bam_file, "rb")
                coverage = []
    
                # Iterate over each position in the BAM file to get coverage
                for pileupcolumn in samfile.pileup():
                    coverage.append(pileupcolumn.n)  # Number of reads covering this position
    
                return coverage
    
            # Calculate read coverage
            read_coverage = calculate_coverage(reads_bam_file)
    
            # Create a DataFrame for read coverage
            read_positions = range(len(read_coverage))  # Position is the index
            read_data = pd.DataFrame({'position': read_positions, 'coverage': read_coverage})
    
            # Plotting Read Coverage
            plt.figure(figsize=(12, 6))
            plt.plot(read_data['position'], read_data['coverage'], color='blue')
            plt.title("Read Coverage Profile")
            plt.xlabel("Genomic Position")
            plt.ylabel("Coverage")
            plt.grid()
            plt.savefig("reads_coverage_profile.png")  # Save the plot to a file
            plt.close()
    
            # Calculate contig coverage
            contig_coverage = calculate_coverage(contig_bam_file)
    
            # Create a DataFrame for contig coverage
            contig_positions = range(len(contig_coverage))  # Position is the index
            contig_data = pd.DataFrame({'position': contig_positions, 'coverage': contig_coverage})
    
            # Plotting Contig Coverage
            plt.figure(figsize=(12, 6))
            plt.plot(contig_data['position'], contig_data['coverage'], color='green')
            plt.title("Contigs Coverage Profile")
            plt.xlabel("Genomic Position")
            plt.ylabel("Coverage")
            plt.grid()
            plt.savefig("contigs_coverage_profile.png")  # Save the plot to a file
            plt.close()
    
            print("Coverage plots saved successfully.")
  5. Reporting

    cp ./Affe30_trimmed_vrap_out/spades/contigs.fasta Affe30_contigs.fasta
    cp ./Affe31_trimmed_vrap_out/spades/contigs.fasta Affe31_contigs.fasta
    cp ./Affe30_trimmed_vrap_out/blast/blastn.xlsx Affe30_contigs_annotation.xlsx
    cp ./Affe31_trimmed_vrap_out/blast/blastn.xlsx Affe31_contigs_annotation.xlsx
    
    1. The contigs from the de novo assembly are incomplete and far from covering the full genome (see attached: Affe30_contigs.fasta, Affe31_contigs.fasta, Affe30_contigs_annotation.xlsx, Affe31_contigs_annotation.xlsx).
    
    2. I've generated a plot showing both the reads and assembled contigs mapped to the closely related reference genome JX878414 (see attached: reads_and_contigs_on_JX878414.png).
    
    3. One major issue I've noted is the low sequencing depth. After processing (trimming, adapter removal, and filtering out low-complexity reads), the remaining read count is very low. In comparison, the benchmark data from IDT shows they generated approximately 2-2.7 million paired reads per sample for a similar study. Our read count after quality control is significantly lower: Affe30 has only 10,257 paired-end reads, and Affe31 has 3,367 paired-end reads.
    
    Here's a summary of the read counts:
    
    Summary of Read Numbers
    
    * Raw read count:
    - Affe30: 88,183 x 2 paired-end reads
    - Affe31: 70,896 x 2 paired-end reads
    
    * After trimming (adapter sequences, bases with quality < Q30, and reads < 36 nt):
    - Affe30: 18,789 x 2 paired-end reads
    - Affe31: 11,282 x 2 paired-end reads
    
    * After removing low-complexity reads (e.g., GGGATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT):
    - Affe30: 10,257 x 2 paired-end reads
    - Affe31: 3,367 x 2 paired-end reads

Benchmarking transcriptional host response signatures for infection diagnosis

https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9768893/

Abstract

  • Identification of host transcriptional response signatures has emerged as a new paradigm for infection diagnosis.
  • For clinical applications, signatures must robustly detect the pathogen of interest without cross-reacting with unintended conditions.
  • To evaluate the performance of infectious disease signatures, we developed a framework that includes a compendium of 17,105 transcriptional profiles capturing infectious and non-infectious conditions and a standardized methodology to assess robustness and cross-reactivity.
  • Applied to 30 published signatures of infection, the analysis showed that signatures were generally robust in detecting viral and bacterial infections in independent data.
  • Asymptomatic and chronic infections were also detectable, albeit with decreased performance.
  • However, many signatures were cross-reactive with unintended infections and aging.
  • In general, we found robustness and cross-reactivity to be conflicting objectives, and we identified signature properties associated with this trade-off.
  • The data compendium and evaluation framework developed here provide a foundation for the development of signatures for clinical application.
  • A record of this paper’s transparent peer review process is included in the supplemental information.

Introduction

  • The ability to diagnose infectious diseases has a profound impact on global health.

  • Most recently, diagnostic testing for SARS-CoV-2 infection has helped contain the COVID-19 pandemic, lessening the strain on healthcare systems.

  • As a further example, diagnostic technologies that discriminate bacterial from viral infections can inform the prescription of antibiotics.

  • This is a high-stakes clinical decision: if prescribed for bacterial infections, the use of antibiotics substantially reduces mortality,1 but if prescribed for viral infections, their misuse exacerbates antimicrobial resistance.2

  • Standard tests for infection diagnosis involve a variety of technologies including microbial cultures, PCR assays, and antigen-binding assays.

  • Despite the diversity in technologies, standard tests generally share a common design principle, which is to directly quantify pathogen material in patient samples.

  • As a consequence, standard tests have poor detection, particularly early after infection, before the pathogen replicates to detectable levels.

  • For example, PCR-based tests for SARS-CoV-2 infection may miss 60%–100% of infections within the first few days of infection due to insufficient viral genetic material.3 , 4

  • Similarly, a study of community acquired pneumonia found that pathogen-based tests failed to identify the causative pathogen in over 60% of patients.5

  • To overcome these limitations, new tools for infection diagnosis are urgently needed.

  • Host transcriptional response assays have emerged as a new paradigm to diagnose infections.6 , 7 , 8 , 9 , 10

  • Research in the field has produced a variety of host response signatures to detect general viral or bacterial infections as well as signatures for specific pathogens such as influenza virus.6 , 11 , 12 , 13 , 14 , 15

  • Unlike standard tests that measure pathogen material, these assays monitor changes in gene expression in response to infection.16

  • For example, transcriptional upregulation of IFN response genes may indicate an ongoing viral infection because these genes take part in the host antiviral response.17

  • Host response assays have a major potential advantage over pathogen-based tests because they may detect an infection even when the pathogen material is undetectable through direct measurements.

  • Development of ‘host response assays’ that can be implemented clinically poses new methodological problems.

  • The most challenging problem is identifying the so-called “infection signature” for a pathogen of interest, that is, a set of host transcriptional changes induced in response to that pathogen.

  • Signature performance is characterized along two axes, robustness and cross-reactivity.

  • Robustness is defined as the ability of a signature to detect the intended infectious condition consistently in multiple independent cohorts.

  • Cross-reactivity is defined as the extent to which a signature predicts any condition other than the intended one.

  • To be clinically viable, an infection signature must simultaneously demonstrate high robustness and low cross-reactivity.

  • A robust signature that does not demonstrate low cross-reactivity would detect unintended conditions, such as other infections (e.g., viral signatures detecting bacterial infections) and/or non-infectious conditions involving abnormal immune activation.

  • The clinical applicability of host response signatures ultimately depends on a rigorous evaluation of their robustness and cross-reactivity properties.

  • Such an evaluation is a complex task because it requires integrating and analyzing massive amounts of transcriptional studies involving the pathogen of interest along with a wide variety of other infectious and non-infectious conditions that may cause cross-reactivity.

  • Despite recent progress in this direction,10 , 18 , 19 a general framework to benchmark both robustness and cross-reactivity of candidate signatures is still lacking.

  • Here, we establish a general framework for systematic quantification of robustness and cross-reactivity of a candidate signature, based on a fine-grained curation of massive public data and development of a standardized signature scoring method.

  • Using this framework, we demonstrated that published signatures are generally robust but substantially cross-reactive with infectious and non-infectious conditions.

  • Further analysis of 200,000 synthetic signatures identified an inherent trade-off between robustness and cross-reactivity and determined signature properties associated with this trade-off.

  • Our framework, freely accessible at https://kleinsteinlab.shinyapps.io/compendium_shiny_app/, lays the foundation for the discovery of signatures of infection for clinical application.

Result 1: A curated set of human transcriptional infection signatures

  • While many transcriptional host response signatures of infection have been published, their robustness and cross-reactivity properties have not been systematically evaluated.

  • To identify published signatures for inclusion in our systematic evaluation, we performed a search of NCBI PubMed for publications describing immune profiling of viral or bacterial infections (Figure 1 A).

  • We initially focused our curation on general viral or bacterial (rather than pathogen-specific) signatures from human whole-blood or peripheral-blood mononuclear cells (PBMCs).

  • We identified 24 signatures that were derived using a wide range of computational approaches, including differential expression analyses,7 , 20 , 21 , 22 gene clustering,23 , 24 regularized logistic regression,19 , 20 , 25 and meta-analyses.8 , 11

  • The signatures were annotated with multiple characteristics that were needed for the evaluation of performance.

  • The most important characteristic was the intended use of the signatures.

  • The intended use of the included signatures was to detect viral infection (V), bacterial infection (B), or directly discriminate between viral and bacterial infections (V/B).

  • For each signature, we recorded a set of genes and a group I versus group II comparison capturing the design of the signature, where group I was the intended infection type, and group II was a control group. For most viral and bacterial signatures, group II was comprised healthy controls; in a few cases, it was comprised of non-infectious illness controls. For signatures distinguishing viral and bacterial infections (V/B), we conventionally took the bacterial infection group as the control group.

  • We parsed the genes in these signatures as either “positive” or “negative” based on whether they were upregulated or downregulated in the intended group, respectively.

  • We also manually annotated the PubMed identifiers for the publication in which the signature was reported, accession records to identify discovery datasets used to build each signature, association of the signature with either acute or chronic infection, and additional metadata related to demographics and experimental design (Table S1).

  • This curation process identified 11 viral (V) signatures intended to capture transcriptional responses that are common across many viral pathogens, 7 bacterial (B) signatures intended to capture transcriptional responses common across bacterial pathogens, and 6 viral versus bacterial (V/B) signatures discriminating between viral and bacterial infections.

  • Viral signatures varied in size between 3 and 396 genes.

  • Several genes appeared in multiple viral signatures. For example, OASL, an interferon-induced gene with antiviral function,26 appeared in 6 of 11 signatures.

  • Enrichment analysis on the pool of viral signature genes showed significantly enriched terms consistent with antiviral immunity, including response to type I interferon (Figure 1B).

  • Bacterial signatures ranged in size from 2 to 69 genes, and enrichment analysis again highlighted expected pathways associated with antibacterial immunity (Figure 1C).

  • V/B signatures varied in size from 2 to 69 genes. The most common genes among V/B signatures were OASL and IFI27, both of which were also highly represented viral signature genes, and many of the same antiviral pathways were significantly enriched among V/B signature genes (Figure 1D).

  • We further investigated the similarity between viral, bacterial, and V/B signatures and found that many viral signatures shared genes with each other and V/B signatures, but bacterial signatures shared fewer similarities with each other (Figure 1E).

  • Overall, our curation produced a structured and well-annotated set of transcriptional signatures for systematic evaluation.

Result 2: A compendium of human transcriptional infection datasets

  • To profile the performance of the curated infection signatures, we compiled a large compendium of datasets capturing host blood transcriptional responses to a wide diversity of pathogens.

  • We carried out a comprehensive search in the NCBI Gene Expression Omnibus (GEO)27 selecting transcriptional responses to in vivo viral, bacterial, parasitic, and fungal infections in human whole blood or PBMCs.

  • We screened over 8,000 GEO records and identified 136 transcriptional datasets that met our inclusion criteria (see STAR Methods).

  • Furthermore, to evaluate whether infection signatures cross-react with non-infectious conditions with documented immunomodulating effects, we also compiled an additional 14 datasets containing transcriptomes from the blood of aged and obese individuals.28 , 29

  • All datasets were downloaded from GEO and passed through a standardized pipeline.

  • Briefly, the pipeline included (1) uniform pre-processing of raw data files where possible, (2) remapping of available gene identifiers to Entrez Gene IDs, and (3) detection of outlier samples.30

  • In aggregate, we compiled, processed, and annotated 150 datasets to include in our data compendium (Figure 2 A; Table S2; see STAR Methods for details).

  • The compendium datasets showed wide variability in study design, sample composition, and available metadata necessitating annotation both at the study level and at the finer-grained sample level.

  • Datasets followed either cross-sectional study designs, where individual subjects were profiled once for a snapshot of their infection, or longitudinal study designs, in which individual subjects were profiled at multiple time points over the course of an infection.

  • For longitudinal datasets, we also recorded subject identifiers and labeled time points.

  • Many datasets contained multiple subgroups, each profiling infection with a different pathogen.

  • Detailed review of the clinical methods and metadata for each study enabled us to annotate individual samples with infectious class (e.g., viral or bacterial) and causative pathogen.

  • For clinical variables, we manually recorded whether datasets profiled acute or chronic infections according to the authors and annotated symptom severity when available.

  • We further supplemented this information with biological sex, which we inferred computationally (see STAR Methods).

  • In total, we annotated 16,173 infection and control samples in a consistent way, capturing host responses to viral, bacterial, and parasitic infections.

  • We similarly annotated the additional 932 samples from aging and obesity datasets including young and lean controls, respectively. In aggregate we captured a broad range of more than 35 unique pathogens and non-infectious conditions (Figure 2B).

  • Most of our compendium datasets were composed of viral and bacterial infection response profiles.

  • We examined several technical factors that may bias the signature performance evaluation between these pathogen categories.

  • Datasets profiling viral infections and datasets profiling bacterial infections contained similar numbers of samples, with median samples sizes of 75.5 and 63.0, respectively, though the largest viral studies contained more samples than the largest bacterial studies (Figure 2C).

  • The number of cross-sectional studies was also nearly identical for both viral and bacterial infection datasets, but our compendium contained 20 viral longitudinal datasets (35% of viral) compared with 6 bacterial longitudinal datasets (10% of bacterial) (Figure 2D).

  • We also examined the distribution of platforms used to generate viral and bacterial infection datasets and found that gene expression was measured most commonly using Illumina platforms, followed by Affymetrix, for both viral and bacterial datasets (Figure 2E).

  • We also examined the frequency of whole blood and PBMC samples in our compendium (Figure 2F).

  • We did not identify systematic differences in the viral and bacterial datasets within our compendium, and therefore we do not expect these differences to impact the interpretation of our signature evaluations.

Result 3: Establishing a general framework for signature evaluation

  • We sought to quantify two measures of performance for all curated signatures: (1) robustness, the ability of a signature to predict its target infection in independent datasets not used for signature discovery, and (2) cross-reactivity, which we quantify as the undesired extent to which a signature predicts unrelated infections or conditions.

  • An ideal signature would demonstrate robustness but not cross-reactivity, e.g., an ideal viral signature would predict viral infections in independent datasets but would not be associated with infections caused by pathogens such as bacteria or parasites.

  • To score each signature in a standardized way, we leveraged the geometric mean scoring approach described in Haynes et al.31

  • For each signature (i.e., a set of positive genes and an optional set of negative genes), we calculated its sample score from log-transformed expression values by taking the difference between the geometric mean of positive signature gene expression values and the geometric mean of negative signature gene expression values.

  • For cross-sectional study designs, this generates a single signature score for each subject, but for longitudinal study designs, this approach produces a vector of scores across time points for each subject (refers to a dataset from GEO, for example virus datasets GSE117827, or bacteria dataset GSE128557, or parasite dataset GSE122737).

  • The scores at different time points can vary dramatically as the transcriptional program underlying an immune response changes over the course of an infection.11 , 16 , 32

  • In this case, we chose the maximally discriminative time point, so that a signature is considered robust if it can detect the infection at any time point but also considered cross-reactive if it would produce a false-positive call at any time point (see STAR Methods).

  • These subject scores were then used to quantify signature performance as the area under a receiver operator characteristic curve (AUROC) associated with each group comparison.

  • This approach is advantageous because it is computationally efficient and model-free.

  • The model-free property presents an advantage over parameterized models because it does not require transferring or re-training model coefficients between datasets.

  • Overall, this framework enables us to evaluate the performance of all signatures in a standardized and consistent way in any dataset (Figure 3 A).

  • signature –> transcriptiomics –> subject score (subject refers to a patient or a healthy control in a dataset in a GEO dataset) –> metric –> evaluation (robustness and cross-reactivity)

#TODO

https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6716367/#R67 Benchmarking Metagenomics Tools for Taxonomic Classification

DNA classifiers

  • Bracken
  • Centrifuge
  • CLARK
  • CLARK-S
  • Kraken
  • Kraken2
  • KrakenUniq
  • k-SLAM
  • MegaBLAST
  • metaOthello
  • PathSeq
  • prophyle
  • taxMaps

Protein classifiers

  • DIAMOND
  • Kaiju
  • MMseqs2

Markers classifiers

  • MetaPhlAn2
  • mOTUs2

Using deepARG to detect Antibiotic Resistance Genes (ARGs)

#Using https://cge.food.dtu.dk/services/SCCmecFinder/ and http://genepi.food.dtu.dk/resfinder instead
#https://cge.food.dtu.dk/services/VirulenceFinder/

#https://github.com/gaarangoa/deeparg
#https://docs.conda.io/en/latest/miniconda.html

# -- install deepARG --
conda create -n deeparg_env python=2.7.18
source activate deeparg_env
mamba install -c bioconda diamond==0.9.24
mamba install -c bioconda trimmomatic
mamba install -c bioconda vsearch
mamba install -c bioconda bedtools==2.29.2
mamba install -c bioconda bowtie2==2.3.5.1
mamba install -c bioconda samtools

pip install git+https://github.com/gaarangoa/deeparg.git
deeparg download_data -o ~/Tools/deeparg/

conda activate deeparg_env

# -- run deeparg --
deeparg short_reads_pipeline --forward_pe_file ./raw_data/HDM1_R1.fastq.gz --reverse_pe_file ./raw_data/HDM1_R2.fastq.gz --output_file ./HDM1_arg_out -d ~/Tools/deeparg --bowtie_16s_identity 100
deeparg short_reads_pipeline --forward_pe_file ./raw_data/HDM7_R1.fastq.gz --reverse_pe_file ./raw_data/HDM7_R2.fastq.gz --output_file ./HDM7_arg_out -d ~/Tools/deeparg --bowtie_16s_identity 100
deeparg short_reads_pipeline --forward_pe_file ./raw_data/HDM10_R1.fastq.gz --reverse_pe_file ./raw_data/HDM10_R2.fastq.gz --output_file ./HDM10_arg_out -d ~/Tools/deeparg --bowtie_16s_identity 100
deeparg short_reads_pipeline --forward_pe_file ./raw_data/HDM11-SF1_R1.fastq.gz --reverse_pe_file ./raw_data/HDM11-SF1_R2.fastq.gz --output_file ./HDM11-SF1_arg_out -d ~/Tools/deeparg --bowtie_16s_identity 100
deeparg short_reads_pipeline --forward_pe_file ./raw_data/HDM15-SF2_R1.fastq.gz --reverse_pe_file ./raw_data/HDM15-SF2_R2.fastq.gz --output_file ./HDM15-SF2_arg_out -d ~/Tools/deeparg --bowtie_16s_identity 100

Configuring Mutt for Gmail: A Step-by-Step Guide (Todo)

Um Mutt mit einem Google-Konto zu konfigurieren, sind sowohl IMAP für das Empfangen von E-Mails als auch SMTP für das Senden von E-Mails erforderlich. Hier sind die Schritte zur Konfiguration von Mutt mit den erforderlichen Einstellungen für beide Dienste:

Schritt 1: IMAP und SMTP in Ihrem Google-Konto aktivieren

  • IMAP aktivieren:

    • Melden Sie sich bei Ihrem Gmail-Konto an.
    • Klicken Sie auf das Zahnrad-Symbol in der oberen rechten Ecke und wählen Sie “Alle Einstellungen anzeigen”.
    • Gehen Sie zum Tab Weiterleitung und POP/IMAP.
    • Aktivieren Sie die Option IMAP-Zugriff und speichern Sie die Änderungen.
  • Zwei-Faktor-Authentifizierung aktivieren (falls noch nicht geschehen):

    • Gehen Sie zu Google-Konto > Sicherheit.
    • Unter “Bei Google anmelden” aktivieren Sie die Bestätigung in zwei Schritten.
    • Folgen Sie den Anweisungen, um die Zwei-Faktor-Authentifizierung einzurichten.
  • App-Passwort generieren:

    • Nachdem Sie die Zwei-Faktor-Authentifizierung aktiviert haben, gehen Sie zurück zu Google-Konto > Sicherheit.
    • Unter “Bei Google anmelden” finden Sie den Abschnitt App-Passwörter.
    • Wählen Sie Mail und dann Sonstige (benutzerdefinierter Name) aus, um einen Namen für Ihr App-Passwort einzugeben (z. B. „Mutt“).
    • Klicken Sie auf Generieren und notieren Sie sich das 16-stellige App-Passwort.

Schritt 2: Mutt konfigurieren

Öffnen Sie Ihre Mutt-Konfigurationsdatei (~/.muttrc) und fügen Sie die folgenden Zeilen hinzu:

# ~/.muttrc

# E-Mail-Adresse
set from = "xxxx@gmail.com"
set realname = "XXXX YYYY"

# SMTP-Einstellungen für Gmail
set smtp_url = "smtp://xxxx@gmail.com:YOUR_APP_PASSWORD@smtp.gmail.com:587/"
set smtp_authenticators = "login"
set ssl_starttls = yes
set ssl_force_tls = yes

# IMAP-Einstellungen
set folder = "imaps://imap.gmail.com:993"
set spoolfile = "+INBOX"
set record = "+Sent"

# Editor festlegen
set editor = "vim"  # Ersetzen Sie dies durch Ihren bevorzugten Editor

Wichtig: Ersetzen Sie YOUR_APP_PASSWORD durch das 16-stellige App-Passwort, das Sie in Schritt 1 generiert haben, und stellen Sie sicher, dass keine Leerzeichen vorhanden sind.

Schritt 3: Mutt testen

  • Konfigurationsdatei sichern: Stellen Sie sicher, dass die Konfigurationsdatei gesichert ist, indem Sie den folgenden Befehl ausführen:

     chmod 600 ~/.muttrc
  • E-Mail senden: Verwenden Sie den folgenden Befehl, um eine E-Mail zu senden:

     echo -e "Hi XXXX,\n\nBitte finden Sie im Anhang die neuesten Ergebnisse unserer DAMIAN-Analyse.\n\nBeste Grüße,\nYYYY" | mutt -s "Neue Ergebnisse von ZZZZZZ" -- "example@example.com"
  • E-Mail empfangen: Starten Sie Mutt einfach durch Eingabe von mutt im Terminal, um Ihre E-Mails anzuzeigen und zu verwalten.

Fehlerbehebung

Falls Sie auf Probleme stoßen, überprüfen Sie Folgendes:

  • Authentifizierung: Stellen Sie sicher, dass Sie das korrekte App-Passwort verwenden und dass Ihr Mutt über die richtige Authentifizierungsmethode verfügt.
  • Zugriffsrechte: Überprüfen Sie, ob der IMAP-Zugriff in Ihrem Google-Konto aktiviert ist und dass die Zwei-Faktor-Authentifizierung ordnungsgemäß konfiguriert ist.
  • Netzwerkverbindung: Stellen Sie sicher, dass Ihr Computer mit dem Internet verbunden ist und dass keine Firewall-Einstellungen den Zugriff auf Gmail blockieren.

Mit diesen Einstellungen sollte Mutt erfolgreich mit Ihrem Google-Konto konfiguriert sein, sodass Sie E-Mails empfangen und senden können. Wenn Sie weitere Fragen haben oder auf spezifische Probleme stoßen, lassen Sie es mich wissen!

Co-Authorship Network Generator using scraped data from Google Scholar via SerpAPI

co_author_network_small

  1. main script coauthorship_network.py

    import networkx as nx
    import matplotlib.pyplot as plt
    import bibtexparser
    import re
    
    #python3 get_articles_with_serpapi.py > articles_Brinkmann.txt
    #grep -o 'title' articles_Brinkmann.txt | wc -l
    
    # Helper function to clean special characters from strings
    def clean_string(s):
        s = s.replace('.', '')  # Remove periods
        s = re.sub(r'[{}]', '', s)  # Remove curly braces used in BibTeX formatting
        s = re.sub(r'\\[a-zA-Z]{1,2}', '', s)  # Remove LaTeX commands (e.g., \textcopyright)
        return ' '.join([n for n in s.split(' ') if len(n) > 1])  # Remove single-letter names
    # Load the .bib file
    with open('articles_Brinkmann.txt', 'r') as bibtex_file:
        bib_database = bibtexparser.load(bibtex_file)
        print(len(bib_database.entries))
    
    # Create an empty graph
    G = nx.Graph()
    
    #internal_authors = ["L Redecke", "T Schulz", "M Brinkmann"]
    #internal_authors = list(set(internal_authors))
    # Initialize an empty set for internal authors
    internal_authors = set()
    # Iterate through each entry in the bibtex file
    for entry in bib_database.entries:
    
        # Get and clean authors
        authors = entry.get('authors', '').split(', ')
        #cleaned_authors = [clean_string(author) for author in authors]
    
        # Print authors
        #print(f"Authors: {cleaned_authors}")
        print(f"Authors: {authors}")
    
        # Add authors to the internal_authors set
        internal_authors.update(authors)
    
    # Convert the internal_authors set to a list
    #internal_authors = list(internal_authors)
    internal_authors = list(set(internal_authors))
    print(f"Internal Authors: {internal_authors}")
    
    #print("Nodes:", G.nodes())
    #print("Edges:", G.edges())
    
    # Iterate through each entry in the bibtex file
    for entry in bib_database.entries:
        title = entry.get('title', '').replace('=', ' ')
        #title = clean_string(f"Paper/{title}")
        #print(title)
        authors = entry.get('authors', '').split(', ')  # Authors are separated by 'and' in BibTeX
        #print(authors)
        for author in authors:
            #author = clean_string(f"Author/{author}")
            if author in internal_authors:
                print(author)
                G.add_edge(author, title)
    ## Try different layout engines if 'sfdp' is problematic
    #try:
    #    pos = nx.nx_pydot.graphviz_layout(G, prog='sfdp')
    #except AssertionError:
    #    print("Error with sfdp layout, switching to 'dot' layout.")
    #    pos = nx.nx_pydot.graphviz_layout(G, prog='dot')  # Fallback to 'dot'
    try:
        pos = nx.nx_pydot.graphviz_layout(G, prog='sfdp')
    except Exception as e:
        print("Error generating layout:", e)
        pos = nx.spring_layout(G)  # Fallback to another layout
    for node in G.nodes():
        if isinstance(node, str):
            G.nodes[node]['label'] = node.replace(' ', '_').replace('/', '_')
    print("Nodes in the graph:", G.nodes())
    # Determine maximum length for author nodes
    max_len = max([len(n) for n in G.nodes() if n in internal_authors])
    print(max_len)
    plt.figure(figsize=(96, 72))
    #plot nodes for authors
    nx.draw_networkx_nodes(G, pos,
        #nodelist=[n for n in G.nodes() if n.startswith('Author/')],
        nodelist=[n for n in G.nodes() if n in internal_authors],
        node_size=max_len*200)
    #plot nodes for publications
    nx.draw_networkx_nodes(G, pos,
        nodelist=[n for n in G.nodes() if n not in internal_authors],
        node_color='y',
        node_size=100)
    nx.draw_networkx_edges(G, pos)
    nx.draw_networkx_labels(G, pos,
        labels={n: n.split('/')[-1].replace(' ', '\n') for n in G.nodes() if n in internal_authors},
        font_color='w', font_size=10, font_weight='bold', font_family='serif') #font_family='sans-serif')
    #'serif': Uses a serif typeface (e.g., Times New Roman).
    #'sans-serif': Uses a sans-serif typeface (e.g., Helvetica, Arial).
    #'monospace': Uses a monospace typeface (e.g., Courier New).
    #'DejaVu Sans': A popular sans-serif typeface available in many environments.
    #'Arial': A widely available sans-serif typeface.
    #'Times New Roman': A classic serif typeface.
    #'Comic Sans MS': A casual sans-serif typeface.
    plt.axis('off')
    # Save the plot as a PNG file
    #, bbox_inches="tight"
    plt.savefig("co_author_network.png", format="png")
    #convert Figure_2.png -crop 1340x750+315+135 co_author_network_cropped_2.png
    plt.show()
    
    # Count the number of publications and authors
    publications = [n for n in G.nodes() if n not in internal_authors]
    #authors = [n for n in G.nodes() if n in internal_authors]
    
    # Print the counts
    print(f"Number of Publications: {len(publications)}")
    print(f"Number of Authors: {len(internal_authors)}")
    #68, 476
    
    ## Optionally, print out the publications and authors themselves
    #print(f"Publications: {publications}")
    print(f"Authors: {internal_authors}")
  2. code of get_articles_with_serpapi.py

    from serpapi import GoogleSearch
    #pip install google-search-results
    #https://github.com/serpapi/google-search-results-python
    #https://serpapi.com/google-scholar-author-co-authors
    #We are able to extract: name, link, author_id, affiliations, email, and thumbnail results.
    
    params = {
      "engine": "google_scholar_author",
      "author_id": "5AzhtgUAAAAJ",
      "api_key": "ed",
      "num" : 100
    }
    
    #-- for each publication, view complete citation statistics per year, e.g. for 5AzhtgUAAAAJ:KlAtU1dfN6UC
    #https://scholar.google.com/citations?hl=en&view_op=view_citation&citation_for_view=5AzhtgUAAAAJ:KlAtU1dfN6UC
    #params = {
    #  "engine": "google_scholar_author",
    #  "citation_id": "5AzhtgUAAAAJ:KlAtU1dfN6UC",
    #  "view_op": "view_citation",
    #  "api_key": "ed"
    #}
    
    search = GoogleSearch(params)
    results = search.get_dict()
    print(results)
    
    ## Safely get the co_authors
    #co_authors = results.get("co_authors", [])
    #if co_authors:
    #    print("Co-authors:", co_authors)
    #else:
    #    print("No co-authors found.")
    ##co_authors = results["co_authors"]
  3. code of get_coauthors_jiabin.py

    from serpapi import GoogleSearch
    
    params = {
      "engine": "google_scholar_author",
      "author_id": "P1pS4s0AAAAJ",
      "view_op": "list_colleagues",
      "api_key": "ed"
    }
    
    search = GoogleSearch(params)
    results = search.get_dict()
    print(results)
    
    co_authors = results["co_authors"]
  4. get_citations_raw.py

    import requests
    
    api_key = "60"
    url = "https://api.scrapingdog.com/google_scholar"
    
    params = {
        "api_key": api_key,
        "query": "Melanie M. Brinkmann",
        "language": "en",
        "page": 10,
        "results": 100
    }
    
    response = requests.get(url, params=params)
    
    if response.status_code == 200:
        data = response.json()
        print(data)
    else:
        print(f"Request failed with status code: {response.status_code}")
  5. post-processing of the file generated by get_articles_with_serpapi.py

    #delete all header and ends: namely remove "...'articles': [" and "], cited_by= {"table"= [{citations= {"all"= 5351, "since_2019"= 2358}}, {"h_index"= {"all"= 33, "since_2019"= 25}}, {"i10_index"= {"all"= 47, "since_2019"= 42}}], graph= [{year= 2006, citations= 22}, {year= 2007, citations= 73}, {year= 2008, citations= 93}, {year= 2009, citations= 216}, {year= 2010, citations= 204}, {year= 2011, citations= 293}, {year= 2012, citations= 269}, {year= 2013, citations= 331}, {year= 2014, citations= 282}, {year= 2015, citations= 308}, {year= 2016, citations= 281}, {year= 2017, citations= 301}, {year= 2018, citations= 266}, {year= 2019, citations= 288}, {year= 2020, citations= 369}, {year= 2021, citations= 609}, {year= 2022, citations= 430}, {year= 2023, citations= 366}, {year= 2024, citations= 288}]}, 'public_access': {'link': 'https://scholar.google.com/citations?view_op=list_mandates&hl=en&user=5AzhtgUAAAAJ', 'available': 50, 'not_available': 4}}"
    
    "}, {'title':" --> "}\n\n@article{1, title="
    replace 1 to actual id (1,...,82)
    "MM Brinkmann" --> "M Brinkmann"
    :-->=
    '-->"
    "link" --> link, "citation_id" --> citation_id, "authors"-->authors, "publication"-->publication, "cited_by"-->cited_by, "serpapi_link"-->serpapi_link, "graph"-->graph, "cites_id"-->cites_id, "year"-->year, "citations"-->citations, "value" --> value
    
    #manually replace the author complete name by clicking the google scholar links
    #remove the records still with abbreviated name in the links.
    
    #The end effect as follows:
    @article{1, title= "UNC93B1 delivers nucleotide-sensing toll-like receptors to endolysosomes", link= "https=//scholar.google.com/citations?view_op=view_citation&hl=en&user=5AzhtgUAAAAJ&pagesize=100&citation_for_view=5AzhtgUAAAAJ=W7OEmFMy1HYC", citation_id= "5AzhtgUAAAAJ=W7OEmFMy1HYC", authors= "You-Me Kim, Melanie M Brinkmann, Marie-Eve Paquet, Hidde L Ploegh", publication= "Nature 452 (7184), 234-238, 2008", cited_by= {value= 847, link= "https=//scholar.google.com/scholar?oi=bibs&hl=en&cites=3461748963046634721", serpapi_link= "https=//serpapi.com/search.json?cites=3461748963046634721&engine=google_scholar&hl=en", cites_id= "3461748963046634721"}, year= "2008"}
    
    @article{2, title= "Proteolytic cleavage in an endolysosomal compartment is required for activation of Toll-like receptor 9", link= "https=//scholar.google.com/citations?view_op=view_citation&hl=en&user=5AzhtgUAAAAJ&pagesize=100&citation_for_view=5AzhtgUAAAAJ=4TOpqqG69KYC", citation_id= "5AzhtgUAAAAJ=4TOpqqG69KYC", authors= "Boyoun Park, Melanie M Brinkmann, Eric Spooner, Clarissa C Lee, You-Me Kim, Hidde L Ploegh", publication= "Nature immunology 9 (12), 1407-1414, 2008", cited_by= {value= 587, link= "https=//scholar.google.com/scholar?oi=bibs&hl=en&cites=8523162291112327960", serpapi_link= "https=//serpapi.com/search.json?cites=8523162291112327960&engine=google_scholar&hl=en", cites_id= "8523162291112327960"}, year= "2008"}
    
    @article{3, title= "The interaction between the ER membrane protein UNC93B and TLR3, 7, and 9 is crucial for TLR signaling", link= "https=//scholar.google.com/citations?view_op=view_citation&hl=en&user=5AzhtgUAAAAJ&pagesize=100&citation_for_view=5AzhtgUAAAAJ=-f6ydRqryjwC", citation_id= "5AzhtgUAAAAJ=-f6ydRqryjwC", authors= "Melanie M Brinkmann, Eric Spooner, Kasper Hoebe, Bruce Beutler, Hidde L Ploegh, You-Me Kim", publication= "The Journal of cell biology 177 (2), 265-275, 2007", cited_by= {value= 562, link= "https=//scholar.google.com/scholar?oi=bibs&hl=en&cites=13542374013520997852", serpapi_link= "https=//serpapi.com/search.json?cites=13542374013520997852&engine=google_scholar&hl=en", cites_id= "13542374013520997852"}, year= "2007"}
    
    @article{4, title= "Noncanonical autophagy is required for type I interferon secretion in response to DNA-immune complexes", link= "https=//scholar.google.com/citations?view_op=view_citation&hl=en&user=5AzhtgUAAAAJ&pagesize=100&citation_for_view=5AzhtgUAAAAJ=KlAtU1dfN6UC", citation_id= "5AzhtgUAAAAJ=KlAtU1dfN6UC", authors= "Jill Henault, Jennifer Martinez, Jeffrey M Riggs, Jane Tian, Payal Mehta, Lorraine Clarke, Miwa Sasai, Eicke Latz, Melanie M Brinkmann, Akiko Iwasaki, Anthony J Coyle, Roland Kolbeck, Douglas R Green, Miguel A Sanjuan", publication= "Immunity 37 (6), 986-997, 2012", cited_by= {value= 376, link= "https=//scholar.google.com/scholar?oi=bibs&hl=en&cites=6648645242373278731", serpapi_link= "https=//serpapi.com/search.json?cites=6648645242373278731&engine=google_scholar&hl=en", cites_id= "6648645242373278731"}, year= "2012"}
    
    @article{5, title= "Granulin is a soluble cofactor for toll-like receptor 9 signaling", link= "https=//scholar.google.com/citations?view_op=view_citation&hl=en&user=5AzhtgUAAAAJ&pagesize=100&citation_for_view=5AzhtgUAAAAJ=M3ejUd6NZC8C", citation_id= "5AzhtgUAAAAJ=M3ejUd6NZC8C", authors= "Boyoun Park, Ludovico Buti, Sungwook Lee, Takashi Matsuwaki, Eric Spooner, Melanie M Brinkmann, Masugi Nishihara, Hidde L Ploegh", publication= "Immunity 34 (4), 505-513, 2011", cited_by= {value= 223, link= "https=//scholar.google.com/scholar?oi=bibs&hl=en&cites=8731573748380815185", serpapi_link= "https=//serpapi.com/search.json?cites=8731573748380815185&engine=google_scholar&hl=en", cites_id= "8731573748380815185"}, year= "2011"}
    
    #Check how many unique author names in the graphics?
    sed 's/^[ \t]*//;s/[ \t]*$//' author_names.txt | sort -u > author_name_uniq.txt
    sort -uf author_names.txt > author_name_uniq.txt
    cat author_names.txt | tr -cd '\11\12\15\40-\176' | sort -u > author_name_uniq.txt
    sed 's/^[ \t]*//;s/[ \t]*$//' author_names.txt | tr -cd '\11\12\15\40-\176' | sort -uf > author_name_uniq.txt
    
    sed 's/‐/-/g' author_names.txt | sort -u > author_name_uniq.txt
    cat author_name_uniq.txt | tr '[:upper:]' '[:lower:]' | sort -u > author_name_uniq2.txt

Typing of 81 S. epidermidis samples (Luise)

ggtree_and_gheatmap_Luise_81samples

  1. prepare the bacto environment

    cd ~/DATA/Data_Luise_Sepi_STKN
    cp /home/jhuang/Tools/bacto/bacto-0.1.json .
    cp /home/jhuang/Tools/bacto/cluster.json .
    cp /home/jhuang/Tools/bacto/Snakefile .
    ln -s /home/jhuang/Tools/bacto/local .
    ln -s /home/jhuang/Tools/bacto/db .
    ln -s /home/jhuang/Tools/bacto/envs .
  2. prepare raw_data

    mkdir raw_data; cd raw_data
    
    ln -s ../raw_data_batch2/mibi2312/Luise_1_S33_R1_001.fastq.gz mibi2312_R1.fastq.gz
    ln -s ../raw_data_batch2/mibi2312/Luise_1_S33_R2_001.fastq.gz mibi2312_R2.fastq.gz
    ln -s ../raw_data_batch2/mibi2379/Luise_2_S3_R1_001.fastq.gz  mibi2379_R1.fastq.gz
    ln -s ../raw_data_batch2/mibi2379/Luise_2_S3_R2_001.fastq.gz  mibi2379_R2.fastq.gz
    ln -s ../raw_data_batch2/mibi2313/Luise_3_S34_R1_001.fastq.gz mibi2313_R1.fastq.gz
    ln -s ../raw_data_batch2/mibi2313/Luise_3_S34_R2_001.fastq.gz mibi2313_R2.fastq.gz
    ln -s ../raw_data_batch2/mibi2380/Luise_4_S4_R1_001.fastq.gz  mibi2380_R1.fastq.gz
    ln -s ../raw_data_batch2/mibi2380/Luise_4_S4_R2_001.fastq.gz  mibi2380_R2.fastq.gz
    ln -s ../raw_data_batch2/mibi2314/Luise_5_S35_R1_001.fastq.gz mibi2314_R1.fastq.gz
    ln -s ../raw_data_batch2/mibi2314/Luise_5_S35_R2_001.fastq.gz mibi2314_R2.fastq.gz
    ln -s ../raw_data_batch2/mibi2315/Luise_6_S36_R1_001.fastq.gz mibi2315_R1.fastq.gz
    ln -s ../raw_data_batch2/mibi2315/Luise_6_S36_R2_001.fastq.gz mibi2315_R2.fastq.gz
    ln -s ../raw_data_batch2/mibi2381/Luise_7_S5_R1_001.fastq.gz  mibi2381_R1.fastq.gz
    ln -s ../raw_data_batch2/mibi2381/Luise_7_S5_R2_001.fastq.gz  mibi2381_R2.fastq.gz
    ln -s ../raw_data_batch2/mibi2316/Luise_8_S37_R1_001.fastq.gz mibi2316_R1.fastq.gz
    ln -s ../raw_data_batch2/mibi2316/Luise_8_S37_R2_001.fastq.gz mibi2316_R2.fastq.gz
    ln -s ../raw_data_batch2/mibi2317/Luise_9_S38_R1_001.fastq.gz mibi2317_R1.fastq.gz
    ln -s ../raw_data_batch2/mibi2317/Luise_9_S38_R2_001.fastq.gz mibi2317_R2.fastq.gz
    ln -s ../raw_data_batch2/mibi2318/Luise_10_S39_R1_001.fastq.gz mibi2318_R1.fastq.gz
    ln -s ../raw_data_batch2/mibi2318/Luise_10_S39_R2_001.fastq.gz mibi2318_R2.fastq.gz
    ln -s ../raw_data_batch2/mibi2319/Luise_11_S40_R1_001.fastq.gz mibi2319_R1.fastq.gz
    ln -s ../raw_data_batch2/mibi2319/Luise_11_S40_R2_001.fastq.gz mibi2319_R2.fastq.gz
    ln -s ../raw_data_batch2/mibi2320/Luise_12_S41_R1_001.fastq.gz mibi2320_R1.fastq.gz
    ln -s ../raw_data_batch2/mibi2320/Luise_12_S41_R2_001.fastq.gz mibi2320_R2.fastq.gz
    ln -s ../raw_data_batch2/mibi2321/Luise_13_S42_R1_001.fastq.gz mibi2321_R1.fastq.gz
    ln -s ../raw_data_batch2/mibi2321/Luise_13_S42_R2_001.fastq.gz mibi2321_R2.fastq.gz
    
    # raw_data_batch1: mibi1435 - mibi1506
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1435/S_epi_K_38_S1_R1_001.fastq.gz mibi1435_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1435/S_epi_K_38_S1_R2_001.fastq.gz mibi1435_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1436/S_epi_K_68_S2_R1_001.fastq.gz mibi1436_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1436/S_epi_K_68_S2_R2_001.fastq.gz mibi1436_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1437/S_epi_K_70_S3_R1_001.fastq.gz mibi1437_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1437/S_epi_K_70_S3_R2_001.fastq.gz mibi1437_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1438/S_epi_K_71_S4_R1_001.fastq.gz mibi1438_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1438/S_epi_K_71_S4_R2_001.fastq.gz mibi1438_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1439/S_epi_K_48_S5_R1_001.fastq.gz mibi1439_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1439/S_epi_K_48_S5_R2_001.fastq.gz mibi1439_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1440/S_epi_K_51_S6_R1_001.fastq.gz mibi1440_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1440/S_epi_K_51_S6_R2_001.fastq.gz mibi1440_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1441/S_epi_K_53_S7_R1_001.fastq.gz mibi1441_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1441/S_epi_K_53_S7_R2_001.fastq.gz mibi1441_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1442/S_epi_K_54_S8_R1_001.fastq.gz mibi1442_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1442/S_epi_K_54_S8_R2_001.fastq.gz mibi1442_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1443/S_epi_K_56_S9_R1_001.fastq.gz mibi1443_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1443/S_epi_K_56_S9_R2_001.fastq.gz mibi1443_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1444/S_epi_K_63_S10_R1_001.fastq.gz mibi1444_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1444/S_epi_K_63_S10_R2_001.fastq.gz mibi1444_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1445/S_epi_K_64_S11_R1_001.fastq.gz mibi1445_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1445/S_epi_K_64_S11_R2_001.fastq.gz mibi1445_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1446/S_epi_K_65_S12_R1_001.fastq.gz mibi1446_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1446/S_epi_K_65_S12_R2_001.fastq.gz mibi1446_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1447/S_epi_K_75_S13_R1_001.fastq.gz mibi1447_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1447/S_epi_K_75_S13_R2_001.fastq.gz mibi1447_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1448/S_epi_K_11_S14_R1_001.fastq.gz mibi1448_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1448/S_epi_K_11_S14_R2_001.fastq.gz mibi1448_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1449/S_epi_K_12_S15_R1_001.fastq.gz mibi1449_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1449/S_epi_K_12_S15_R2_001.fastq.gz mibi1449_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1450/S_epi_K_13_S16_R1_001.fastq.gz mibi1450_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1450/S_epi_K_13_S16_R2_001.fastq.gz mibi1450_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1451/S_epi_K_77_S17_R1_001.fastq.gz mibi1451_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1451/S_epi_K_77_S17_R2_001.fastq.gz mibi1451_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1452/S_epi_K_79_S18_R1_001.fastq.gz mibi1452_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1452/S_epi_K_79_S18_R2_001.fastq.gz mibi1452_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1453/S_epi_K_80_S19_R1_001.fastq.gz mibi1453_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1453/S_epi_K_80_S19_R2_001.fastq.gz mibi1453_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1454/S_epi_K_1_gross_S20_R1_001.fastq.gz mibi1454_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1454/S_epi_K_1_gross_S20_R2_001.fastq.gz mibi1454_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1455/S_epi_K_2_S21_R1_001.fastq.gz mibi1455_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1455/S_epi_K_2_S21_R2_001.fastq.gz mibi1455_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1456/S_epi_K_3_S22_R1_001.fastq.gz mibi1456_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1456/S_epi_K_3_S22_R2_001.fastq.gz mibi1456_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1457/S_epi_K_6_S23_R1_001.fastq.gz mibi1457_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1457/S_epi_K_6_S23_R2_001.fastq.gz mibi1457_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1458/S_epi_K_8_S24_R1_001.fastq.gz mibi1458_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1458/S_epi_K_8_S24_R2_001.fastq.gz mibi1458_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1459/S_epi_K_17_S25_R1_001.fastq.gz mibi1459_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1459/S_epi_K_17_S25_R2_001.fastq.gz mibi1459_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1460/S_epi_K_26_S26_R1_001.fastq.gz mibi1460_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1460/S_epi_K_26_S26_R2_001.fastq.gz mibi1460_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1461/S_epi_K_34_S27_R1_001.fastq.gz mibi1461_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1461/S_epi_K_34_S27_R2_001.fastq.gz mibi1461_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1462/S_epi_K_35_S28_R1_001.fastq.gz mibi1462_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1462/S_epi_K_35_S28_R2_001.fastq.gz mibi1462_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1463/S_epi_K_19_S29_R1_001.fastq.gz mibi1463_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1463/S_epi_K_19_S29_R2_001.fastq.gz mibi1463_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1464/S_epi_K_22_S30_R1_001.fastq.gz mibi1464_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1464/S_epi_K_22_S30_R2_001.fastq.gz mibi1464_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1465/S_epi_K_23_S31_R1_001.fastq.gz mibi1465_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1465/S_epi_K_23_S31_R2_001.fastq.gz mibi1465_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1466/S_epi_K_24_S32_R1_001.fastq.gz mibi1466_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1466/S_epi_K_24_S32_R2_001.fastq.gz mibi1466_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1467/S_epi_K_30_S33_R1_001.fastq.gz mibi1467_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1467/S_epi_K_30_S33_R2_001.fastq.gz mibi1467_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1468/S_epi_K_31_S34_R1_001.fastq.gz mibi1468_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1468/S_epi_K_31_S34_R2_001.fastq.gz mibi1468_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1469/S_epi_K_32_S35_R1_001.fastq.gz mibi1469_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1469/S_epi_K_32_S35_R2_001.fastq.gz mibi1469_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1470/S_epi_K_1_klein_S36_R1_001.fastq.gz mibi1470_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1470/S_epi_K_1_klein_S36_R2_001.fastq.gz mibi1470_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1471/S_epi_K_37_S37_R1_001.fastq.gz mibi1471_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1471/S_epi_K_37_S37_R2_001.fastq.gz mibi1471_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1472/S_epi_K_46_S38_R1_001.fastq.gz mibi1472_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1472/S_epi_K_46_S38_R2_001.fastq.gz mibi1472_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1473/S_epi_K_57_S39_R1_001.fastq.gz mibi1473_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1473/S_epi_K_57_S39_R2_001.fastq.gz mibi1473_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1474/S_epi_K_59_S40_R1_001.fastq.gz mibi1474_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1474/S_epi_K_59_S40_R2_001.fastq.gz mibi1474_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1475/S_epi_K_58_S41_R1_001.fastq.gz mibi1475_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1475/S_epi_K_58_S41_R2_001.fastq.gz mibi1475_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1476/S_epi_K_60_S42_R1_001.fastq.gz mibi1476_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1476/S_epi_K_60_S42_R2_001.fastq.gz mibi1476_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1477/S_epi_K_61_S43_R1_001.fastq.gz mibi1477_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1477/S_epi_K_61_S43_R2_001.fastq.gz mibi1477_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1478/S_epi_K_28_S44_R1_001.fastq.gz mibi1478_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1478/S_epi_K_28_S44_R2_001.fastq.gz mibi1478_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1479/S_epi_K_33_S45_R1_001.fastq.gz mibi1479_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1479/S_epi_K_33_S45_R2_001.fastq.gz mibi1479_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1480/S_epi_K_39_S46_R1_001.fastq.gz mibi1480_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1480/S_epi_K_39_S46_R2_001.fastq.gz mibi1480_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1481/S_epi_K_40_S47_R1_001.fastq.gz mibi1481_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1481/S_epi_K_40_S47_R2_001.fastq.gz mibi1481_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1482/S_epi_K_43_S48_R1_001.fastq.gz mibi1482_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1482/S_epi_K_43_S48_R2_001.fastq.gz mibi1482_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1483/S_epi_K_97_S49_R1_001.fastq.gz mibi1483_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1483/S_epi_K_97_S49_R2_001.fastq.gz mibi1483_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1484/S_epi_K_84_S50_R1_001.fastq.gz mibi1484_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1484/S_epi_K_84_S50_R2_001.fastq.gz mibi1484_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1485/S_epi_K_85_S51_R1_001.fastq.gz mibi1485_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1485/S_epi_K_85_S51_R2_001.fastq.gz mibi1485_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1486/S_epi_K_86_S52_R1_001.fastq.gz mibi1486_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1486/S_epi_K_86_S52_R2_001.fastq.gz mibi1486_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1487/S_epi_K_116_S53_R1_001.fastq.gz mibi1487_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1487/S_epi_K_116_S53_R2_001.fastq.gz mibi1487_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1488/S_epi_K_118_S54_R1_001.fastq.gz mibi1488_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1488/S_epi_K_118_S54_R2_001.fastq.gz mibi1488_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1489/S_epi_K_120_S55_R1_001.fastq.gz mibi1489_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1489/S_epi_K_120_S55_R2_001.fastq.gz mibi1489_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1490/S_epi_K_123_S56_R1_001.fastq.gz mibi1490_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1490/S_epi_K_123_S56_R2_001.fastq.gz mibi1490_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1491/S_epi_K_124_S57_R1_001.fastq.gz mibi1491_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1491/S_epi_K_124_S57_R2_001.fastq.gz mibi1491_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1492/S_epi_K_127_S58_R1_001.fastq.gz mibi1492_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1492/S_epi_K_127_S58_R2_001.fastq.gz mibi1492_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1493/S_epi_K_29_S59_R1_001.fastq.gz mibi1493_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1493/S_epi_K_29_S59_R2_001.fastq.gz mibi1493_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1494/S_epi_K_72_S60_R1_001.fastq.gz mibi1494_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1494/S_epi_K_72_S60_R2_001.fastq.gz mibi1494_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1495/S_epi_K_89_S61_R1_001.fastq.gz mibi1495_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1495/S_epi_K_89_S61_R2_001.fastq.gz mibi1495_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1496/S_epi_K_110_S62_R1_001.fastq.gz mibi1496_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1496/S_epi_K_110_S62_R2_001.fastq.gz mibi1496_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1497/S_epi_K_112_S63_R1_001.fastq.gz mibi1497_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1497/S_epi_K_112_S63_R2_001.fastq.gz mibi1497_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1498/S_epi_K_114_S64_R1_001.fastq.gz mibi1498_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1498/S_epi_K_114_S64_R2_001.fastq.gz mibi1498_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1499/S_epi_K_91_S65_R1_001.fastq.gz mibi1499_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1499/S_epi_K_91_S65_R2_001.fastq.gz mibi1499_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1500/S_epi_K_92_S66_R1_001.fastq.gz mibi1500_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1500/S_epi_K_92_S66_R2_001.fastq.gz mibi1500_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1501/S_epi_K_95_S67_R1_001.fastq.gz mibi1501_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1501/S_epi_K_95_S67_R2_001.fastq.gz mibi1501_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1502/S_epi_K_96_S68_R1_001.fastq.gz mibi1502_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1502/S_epi_K_96_S68_R2_001.fastq.gz mibi1502_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1503/S_epi_K_98_S69_R1_001.fastq.gz mibi1503_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1503/S_epi_K_98_S69_R2_001.fastq.gz mibi1503_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1504/S_epi_K_100_S70_R1_001.fastq.gz mibi1504_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1504/S_epi_K_100_S70_R2_001.fastq.gz mibi1504_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1505/S_epi_K_106_S71_R1_001.fastq.gz mibi1505_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1505/S_epi_K_106_S71_R2_001.fastq.gz mibi1505_R2.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1506/S_epi_K_108_S72_R1_001.fastq.gz mibi1506_R1.fastq.gz
    ln -s ../200615_NB501882_0207_AH5YFGAFX2/mibi1506/S_epi_K_108_S72_R2_001.fastq.gz mibi1506_R2.fastq.gz
  3. start bacto pipeline until step shovill (set true for the first 2 steps: assembly + typing_mlst)

    (bengal3_ac3) jhuang@WS-2290C:~/DATA/Data_Luise_Sepi_STKN$ /home/jhuang/miniconda3/envs/snakemake_4_3_1/bin/snakemake --printshellcmds
  4. concate the first assembly

    find . -name "contigs.fa" -exec sh -c 'echo -n "{}: "; wc -c < "{}"; echo " contigs: $(grep -c ">" "{}")"' \;
    find . -name "contigs.fa" -exec sh -c 'wc -c < "{}" | tr -d "\n" | awk "{print \"{}: \" \$0 \" contigs: \" \$(grep -c \">\" \"{}\")} " ' \; | sort -k2,2n -k4,4n
    find . -name "contigs.fa" -exec sh -c 'size=$(wc -c < "{}"); contig_count=$(grep -c ">" "{}"); printf "%s: %d bytes, contigs: %d\n" "{}" "$size" "$contig_count"' \; | sort -t: -k4,4n
    
    grep -v '^>' contigs.fa | awk '{ total += length($0) } END { print total }'
    #2399339
    
    find . -name "contigs.fa" -exec sh -c 'total_length=$(grep -v "^>" "{}" | awk "{ total += length(\$0) } END { print total }"); printf "%s: %d\n" "{}" "$total_length"' \; | sort -t: -k2,2n
            ./mibi2381/contigs.fa: 1077606
            ./mibi1472/contigs.fa: 1145583
            ./mibi2380/contigs.fa: 1478149
            ./mibi1458/contigs.fa: 2345604
            ./mibi1466/contigs.fa: 2349339
            ./mibi1451/contigs.fa: 2375432
            ./mibi1441/contigs.fa: 2390897
            ./mibi1471/contigs.fa: 2399056
            ./mibi1435/contigs.fa: 2399339
            ./mibi1473/contigs.fa: 2416173
            ./mibi1474/contigs.fa: 2419849
            ./mibi1495/contigs.fa: 2423463
            ./mibi1497/contigs.fa: 2427317
            ./mibi1476/contigs.fa: 2428189
            ./mibi1463/contigs.fa: 2430568
            ./mibi2313/contigs.fa: 2434775
            ./mibi2321/contigs.fa: 2435584
            ./mibi1494/contigs.fa: 2436708
            ./mibi1481/contigs.fa: 2437976
            ./mibi1464/contigs.fa: 2438603
            ./mibi1450/contigs.fa: 2439465
            ./mibi1483/contigs.fa: 2440401
            ./mibi1506/contigs.fa: 2440413
            ./mibi1462/contigs.fa: 2441903
            ./mibi1443/contigs.fa: 2445186
            ./mibi1448/contigs.fa: 2447751
            ./mibi1439/contigs.fa: 2453223
            ./mibi1501/contigs.fa: 2454766
            ./mibi1505/contigs.fa: 2455113
            ./mibi1436/contigs.fa: 2457498
            ./mibi1499/contigs.fa: 2459216
            ./mibi1452/contigs.fa: 2467292
            ./mibi1500/contigs.fa: 2467976
            ./mibi1455/contigs.fa: 2468334
            ./mibi1445/contigs.fa: 2469433
            ./mibi1479/contigs.fa: 2471443
            ./mibi1482/contigs.fa: 2473012
            ./mibi1469/contigs.fa: 2475757
            ./mibi1468/contigs.fa: 2478455
            ./mibi1438/contigs.fa: 2482554
            ./mibi2379/contigs.fa: 2483292
            ./mibi1493/contigs.fa: 2483445
            ./mibi1488/contigs.fa: 2484950
            ./mibi1486/contigs.fa: 2485023
            ./mibi1442/contigs.fa: 2488933
            ./mibi1437/contigs.fa: 2489787
            ./mibi1460/contigs.fa: 2491014
            ./mibi2312/contigs.fa: 2494588
            ./mibi1489/contigs.fa: 2495771
            ./mibi1504/contigs.fa: 2501045
            ./mibi2317/contigs.fa: 2504746
            ./mibi1444/contigs.fa: 2504874
            ./mibi1446/contigs.fa: 2508952
            ./mibi1465/contigs.fa: 2509542
            ./mibi1467/contigs.fa: 2509571
            ./mibi2314/contigs.fa: 2510798
            ./mibi2315/contigs.fa: 2511464
            ./mibi1457/contigs.fa: 2513005
            ./mibi1490/contigs.fa: 2514271
            ./mibi1491/contigs.fa: 2519558
            ./mibi1449/contigs.fa: 2520352
            ./mibi1503/contigs.fa: 2521179
            ./mibi1475/contigs.fa: 2526469
            ./mibi1484/contigs.fa: 2530724
            ./mibi1447/contigs.fa: 2538610
            ./mibi1453/contigs.fa: 2543893
            ./mibi1477/contigs.fa: 2550922
            ./mibi1454/contigs.fa: 2553326
            ./mibi1498/contigs.fa: 2556315
            ./mibi2316/contigs.fa: 2556628
            ./mibi1492/contigs.fa: 2558492
            ./mibi1470/contigs.fa: 2563169
            ./mibi2320/contigs.fa: 2571421
            ./mibi1461/contigs.fa: 2582715
            ./mibi1478/contigs.fa: 2584524
            ./mibi2318/contigs.fa: 2594120
            ./mibi1459/contigs.fa: 2595990
            ./mibi1440/contigs.fa: 2604213
            ./mibi1456/contigs.fa: 2605647
            ./mibi1487/contigs.fa: 2614950
            ./mibi1496/contigs.fa: 2616533
            ./mibi2319/contigs.fa: 2621258
            ./mibi1480/contigs.fa: 2621312
            ./mibi1485/contigs.fa: 2665904
            ./mibi1502/contigs.fa: 2668891
    
    cd mibi1502
    awk '/^>/ {if(seen_header++) print "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN"; next} {printf "%s", $0} END {print ""}' contigs.fa > concatenated_contigs.fasta
    awk '/^>/ {if(seen_header++) print ""; print $0; next} {printf "%s", $0} END {print ""}' concatenated_contigs.fasta > concatenated_contigs.fa
    sed -i '1s/^/>mibi1502\n/' concatenated_contigs.fa
    seqkit seq -i concatenated_contigs.fa -m 100 > ../mibi1502.fasta
    
    bakta --db /mnt/nvme0n1p1/bakta_db mibi1502.fasta
    
    ATCC 14990
    Genes (total)                     :: 2,368
            CDSs (total)                      :: 2,284
            Genes (coding)                    :: 2,231
            CDSs (with protein)               :: 2,231
            Genes (RNA)                       :: 84
            rRNAs                             :: 7, 6, 6 (5S, 16S, 23S)
            complete rRNAs                    :: 7, 6, 6 (5S, 16S, 23S)
            tRNAs                             :: 61
            ncRNAs                            :: 4
            Pseudo Genes (total)              :: 53
            CDSs (without protein)            :: 53
            Pseudo Genes (ambiguous residues) :: 0 of 53
            Pseudo Genes (frameshifted)       :: 34 of 53
            Pseudo Genes (incomplete)         :: 31 of 53
            Pseudo Genes (internal stop)      :: 24 of 53
    
    Detected IPSs: 2369 This likely refers to insertion sequences or insertional plasmids (depending on the specific context). In this case, 2,369 insertion sequences were detected in the analysis, which could indicate mobile genetic elements that might affect the genome's stability or function.
    
    predict tRNAs...
            found: 61
    predict tmRNAs...
            found: 1
    predict rRNAs...
            found: 12
    predict ncRNAs...
            found: 55
    predict ncRNA regions...
            found: 26
    predict CRISPR arrays...
            found: 0
    predict & annotate CDSs...
            predicted: 2529
            discarded spurious: 0
            revised translational exceptions: 0
            detected IPSs: 2369
            found PSCs: 117
            found PSCCs: 26
            lookup annotations...
            conduct expert systems...
                    amrfinder: 20
                    protein sequences: 54
            combine annotations and mark hypotheticals...
            detect pseudogenes...
                    pseudogene candidates: 15
                    found pseudogenes: 7
    analyze hypothetical proteins: 96
            detected Pfam hits: 1
            calculated proteins statistics
            revise special cases...
    extract sORF...
            potential: 46973
            discarded due to overlaps: 37990
            discarded spurious: 0
            detected IPSs: 9
            found PSCs: 2
            lookup annotations...
            filter and combine annotations...
            filtered sORFs: 9
    detect gaps...
            found: 113
    detect oriCs/oriVs...
            found: 1
    detect oriTs...
            found: 0
    apply feature overlap filters...
    select features and create locus tags...
    selected: 2801
    improve annotations...
            revised gene symbols: 9
  5. generate genebank in snpEff and run spandx

    #mkdir ~/miniconda3/envs/spandx/share/snpeff-5.1-2/data/NZ_CP035288
    #cp NZ_CP035288.gb ~/miniconda3/envs/spandx/share/snpeff-5.1-2/data/NZ_CP035288/genes.gbk
    #vim ~/miniconda3/envs/spandx/share/snpeff-5.1-2/snpEff.config
    #/home/jhuang/miniconda3/envs/spandx/bin/snpEff build -genbank NZ_CP035288      #-d
    
    cp mibi1502.gbff ../db/mibi1502.gb
    #Then replace contig_1 to mibi1502 in the gb-file
    cp mibi1502.fasta ../db/mibi1502.fasta
    
    mkdir ~/miniconda3/envs/spandx/share/snpeff-5.1-2/data/mibi1502
    cp mibi1502.gb ~/miniconda3/envs/spandx/share/snpeff-5.1-2/data/mibi1502/genes.gbk
    vim ~/miniconda3/envs/spandx/share/snpeff-5.1-2/snpEff.config
    #Protein check:  mibi1502        OK: 2525        Not found: 7    Errors: 0       Error percentage: 0.0%
    /home/jhuang/miniconda3/envs/spandx/bin/snpEff build -genbank mibi1502      #-d
    
    # run the first 5 steps in bacto-pipeline ("pangenome"+"variants_calling"+"phylogeny_fasttree"+"phylogeny_raxml"+"recombination")
    /home/jhuang/miniconda3/envs/snakemake_4_3_1/bin/snakemake --printshellcmds
    
    ln -s /home/jhuang/Tools/spandx/ spandx
    # cp trimmed trimmed_gzipped; cd trimmed_gzipped; gzip *.fastq; cd ..
    (spandx) nextflow run spandx/main.nf --fastq "trimmed_gzipped/*_P_{1,2}.fastq.gz" --ref db/mibi1502.fasta --annotation --database mibi1502 -resume
    mv work work_mibi1502
    mv Outputs Outputs_mibi1502
  6. filter out 4 samples for downstream analyses

    #The coverages of shovill/mibi1470/contigs.fa and shovill/mibi1472/contigs.fa, and shovill/mibi2381/contigs.fa are low.
    #mibi1470 could not be identified as S. epidermidis based on its MLST type.
    #remove: mibi1470 2.5M
    #remove: ./mibi1472/contigs.fa: 1145583
    #remove: ./mibi2380/contigs.fa: 1478149
    #remove: ./mibi2381/contigs.fa: 1077606
    #Then we have 81 samples for downstream analysis: mibi1435 -- mibi1506 (72); mibi2312 -- mibi2321 (10); mibi2379 -- mibi2381 (3)
  7. SCCmec typing and drawing with clinker

    #https://www.genomicepidemiology.org/
    #https://cge.food.dtu.dk/services/SCCmecFinder/
    mkdir contigs; cd contigs
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    cp ../${sample}/contigs.fa ${sample}_contigs.fa
    done
    
    #Predicted SCCmec element
    grep "Predicted SCCmec element" *.txt
    grep "2 SCCmec" *.txt
    grep "ccr class" *.txt
    grep "mec class" *.txt
    
    mibi1435_SCCmec_out.txt:Predicted SCCmec element: SCCmec_type_IVb(2B)
    mibi1436_SCCmec_out.txt:Predicted SCCmec element: none
    mibi1437_SCCmec_out.txt:Predicted SCCmec element: none
    mibi1438_SCCmec_out.txt:Predicted SCCmec element: SCCmec_type_IV(2B)
    mibi1439_SCCmec_out.txt:Predicted SCCmec element: SCCmec_type_IVa(2B)
    
    mibi1440_SCCmec_out.txt:Predicted SCCmec element: none
    mibi1441_SCCmec_out.txt:Predicted SCCmec element: none
    mibi1442_SCCmec_out.txt:Predicted SCCmec element: none
    mibi1443_SCCmec_out.txt:Predicted SCCmec element: SCCmec_type_IVg(2B)
    mibi1444_SCCmec_out.txt:Predicted SCCmec element: SCCmec_type_IV(2B&5)
    mibi1445_SCCmec_out.txt:Predicted SCCmec element: SCCmec_type_IVa(2B)
    mibi1446_SCCmec_out.txt: SCCmec_type_IV(2B) and SCCmec_type_VI(4B)
    mibi1447_SCCmec_out.txt: SCCmec_type_IV(2B) and SCCmec_type_VI(4B)
    mibi1448_SCCmec_out.txt:Predicted SCCmec element: none
    mibi1449_SCCmec_out.txt:Predicted SCCmec element: none
    
    mibi1450_SCCmec_out.txt:Predicted SCCmec element: SCCmec_type_IV(2B)
    mibi1451_SCCmec_out.txt:Predicted SCCmec element: SCCmec_type_IV(2B)
    mibi1452_SCCmec_out.txt:Predicted SCCmec element: none
    mibi1453_SCCmec_out.txt:Predicted SCCmec element: none
    mibi1454_SCCmec_out.txt:Predicted SCCmec element: none
    mibi1455_SCCmec_out.txt:Predicted SCCmec element: none
    mibi1456_SCCmec_out.txt:Predicted SCCmec element: SCCmec_type_III(3A)
    mibi1457_SCCmec_out.txt: SCCmec_type_IV(2B) and SCCmec_type_VI(4B)
    mibi1458_SCCmec_out.txt:Predicted SCCmec element: none
    mibi1459_SCCmec_out.txt:Predicted SCCmec element: SCCmec_type_III(3A)
    
    mibi1460_SCCmec_out.txt:Predicted SCCmec element: SCCmec_type_IVa(2B)
    mibi1461_SCCmec_out.txt:Predicted SCCmec element: none
    mibi1462_SCCmec_out.txt:Predicted SCCmec element: SCCmec_type_IVg(2B)
    mibi1463_SCCmec_out.txt:Predicted SCCmec element: none
    mibi1464_SCCmec_out.txt:Predicted SCCmec element: none
    mibi1465_SCCmec_out.txt:Predicted SCCmec element: SCCmec_type_IV(2B&5)
    mibi1466_SCCmec_out.txt: SCCmec_type_IV(2B) and SCCmec_type_VI(4B)
    mibi1467_SCCmec_out.txt:Predicted SCCmec element: none
    mibi1468_SCCmec_out.txt:Predicted SCCmec element: none
    mibi1469_SCCmec_out.txt:Predicted SCCmec element: none
    
    mibi1471_SCCmec_out.txt:Predicted SCCmec element: none
    mibi1473_SCCmec_out.txt:Predicted SCCmec element: SCCmec_type_IVa(2B)
    mibi1474_SCCmec_out.txt:Predicted SCCmec element: none
    mibi1475_SCCmec_out.txt: SCCmec_type_IV(2B) and SCCmec_type_VI(4B)
    mibi1476_SCCmec_out.txt:Predicted SCCmec element: none
    mibi1477_SCCmec_out.txt:Predicted SCCmec element: none
    mibi1478_SCCmec_out.txt: SCCmec_type_III(3A) and SCCmec_type_VIII(4A)
    mibi1479_SCCmec_out.txt: SCCmec_type_IV(2B) and SCCmec_type_VI(4B)
    
    mibi1480_SCCmec_out.txt: SCCmec_type_III(3A) and SCCmec_type_VIII(4A)
    mibi1481_SCCmec_out.txt:Predicted SCCmec element: SCCmec_type_II(2A)
    mibi1482_SCCmec_out.txt:Predicted SCCmec element: none
    mibi1483_SCCmec_out.txt:Predicted SCCmec element: none
    mibi1484_SCCmec_out.txt:Predicted SCCmec element: SCCmec_type_IV(2B&5)
    mibi1485_SCCmec_out.txt:Predicted SCCmec element: none
    mibi1486_SCCmec_out.txt:Predicted SCCmec element: none
    mibi1487_SCCmec_out.txt: SCCmec_type_III(3A) and SCCmec_type_VIII(4A)
    mibi1488_SCCmec_out.txt:Predicted SCCmec element: SCCmec_type_IV(2B)
    mibi1489_SCCmec_out.txt:Predicted SCCmec element: none
    
    mibi1490_SCCmec_out.txt: SCCmec_type_IV(2B) and SCCmec_type_VI(4B)
    mibi1491_SCCmec_out.txt: SCCmec_type_IV(2B) and SCCmec_type_VI(4B)
    mibi1492_SCCmec_out.txt:Predicted SCCmec element: none
    mibi1493_SCCmec_out.txt:Predicted SCCmec element: none
    mibi1494_SCCmec_out.txt:Predicted SCCmec element: none
    mibi1495_SCCmec_out.txt:Predicted SCCmec element: SCCmec_type_IVa(2B)
    mibi1496_SCCmec_out.txt:Predicted SCCmec element: none
    mibi1497_SCCmec_out.txt:Predicted SCCmec element: none
    mibi1498_SCCmec_out.txt:Predicted SCCmec element: none
    mibi1499_SCCmec_out.txt:Predicted SCCmec element: SCCmec_type_IVa(2B)
    
    mibi1500_SCCmec_out.txt:Predicted SCCmec element: SCCmec_type_IVa(2B)
    mibi1501_SCCmec_out.txt: SCCmec_type_IV(2B) and SCCmec_type_VI(4B)
    mibi1502_SCCmec_out.txt:Predicted SCCmec element: SCCmec_type_III(3A)
    mibi1503_SCCmec_out.txt:Predicted SCCmec element: none
    mibi1504_SCCmec_out.txt:Predicted SCCmec element: none
    mibi1505_SCCmec_out.txt:Predicted SCCmec element: SCCmec_type_IV(2B&5)
    mibi1506_SCCmec_out.txt:Predicted SCCmec element: SCCmec_type_IVg(2B)
    
    mibi2312_SCCmec_out.txt:Predicted SCCmec element: SCCmec_type_IV(2B&5)
    mibi2313_SCCmec_out.txt:Predicted SCCmec element: SCCmec_type_IV(2B)
    mibi2314_SCCmec_out.txt:Predicted SCCmec element: none
    mibi2315_SCCmec_out.txt:Predicted SCCmec element: SCCmec_type_IVa(2B)
    mibi2316_SCCmec_out.txt:Predicted SCCmec element: none
    mibi2317_SCCmec_out.txt:Predicted SCCmec element: SCCmec_type_IV(2B&5)
    mibi2318_SCCmec_out.txt:Predicted SCCmec element: SCCmec_type_III(3A)
    mibi2319_SCCmec_out.txt: SCCmec_type_III(3A) and SCCmec_type_VIII(4A)
    mibi2320_SCCmec_out.txt:Predicted SCCmec element: SCCmec_type_IV(2B&5)
    mibi2321_SCCmec_out.txt:Predicted SCCmec element: SCCmec_type_IV(2B)
    mibi2379_SCCmec_out.txt:Predicted SCCmec element: SCCmec_type_IV(2B&5)
    
    mibi1441_SCCmec_out.txt:ccr class 2
    mibi1446_SCCmec_out.txt:ccr class 2
    mibi1446_SCCmec_out.txt:ccr class 4
    mibi1447_SCCmec_out.txt:ccr class 2
    mibi1447_SCCmec_out.txt:ccr class 4
    mibi1452_SCCmec_out.txt:ccr class 5
    mibi1455_SCCmec_out.txt:ccr class 2
    
    mibi1457_SCCmec_out.txt:ccr class 2
    mibi1457_SCCmec_out.txt:ccr class 4
    
    mibi1464_SCCmec_out.txt:ccr class 2
    mibi1466_SCCmec_out.txt:ccr class 2
    mibi1466_SCCmec_out.txt:ccr class 4
    mibi1467_SCCmec_out.txt:ccr class 5
    mibi1475_SCCmec_out.txt:ccr class 2
    mibi1475_SCCmec_out.txt:ccr class 4
    
    mibi1477_SCCmec_out.txt:ccr class 9
    mibi1478_SCCmec_out.txt:ccr class 3
    mibi1478_SCCmec_out.txt:ccr class 4
    
    mibi1479_SCCmec_out.txt:ccr class 2
    mibi1479_SCCmec_out.txt:ccr class 4
    
    mibi1480_SCCmec_out.txt:ccr class 3
    mibi1480_SCCmec_out.txt:ccr class 4
    
    mibi1482_SCCmec_out.txt:ccr class 5
    mibi1483_SCCmec_out.txt:ccr class 5
    mibi1486_SCCmec_out.txt:ccr class 2
    
    mibi1487_SCCmec_out.txt:ccr class 3
    mibi1487_SCCmec_out.txt:ccr class 4
    
    mibi1489_SCCmec_out.txt:ccr class 2
    
    mibi1490_SCCmec_out.txt:ccr class 2
    mibi1490_SCCmec_out.txt:ccr class 4
    
    mibi1491_SCCmec_out.txt:ccr class 2
    mibi1491_SCCmec_out.txt:ccr class 4
    
    mibi1492_SCCmec_out.txt:ccr class 2
    mibi1494_SCCmec_out.txt:ccr class 5
    
    mibi1501_SCCmec_out.txt:ccr class 2
    mibi1501_SCCmec_out.txt:ccr class 4
    
    mibi1504_SCCmec_out.txt:ccr class 2
    mibi1504_SCCmec_out.txt:ccr class 5&5
    
    mibi2316_SCCmec_out.txt:ccr class 2
    mibi2319_SCCmec_out.txt:ccr class 3
    mibi2319_SCCmec_out.txt:ccr class 4
    
    mibi1446_SCCmec_out.txt:mec class B
    mibi1447_SCCmec_out.txt:mec class B
    
    mibi1457_SCCmec_out.txt:mec class B
    mibi1463_SCCmec_out.txt:mec class B
    mibi1466_SCCmec_out.txt:mec class B
    mibi1475_SCCmec_out.txt:mec class B
    mibi1478_SCCmec_out.txt:mec class A
    mibi1479_SCCmec_out.txt:mec class B
    mibi1480_SCCmec_out.txt:mec class A
    
    mibi1483_SCCmec_out.txt:mec class A
    mibi1487_SCCmec_out.txt:mec class A
    mibi1490_SCCmec_out.txt:mec class B
    mibi1491_SCCmec_out.txt:mec class B
    mibi1501_SCCmec_out.txt:mec class B
    mibi2319_SCCmec_out.txt:mec class A
  8. agr typing

    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    bakta --db /mnt/nvme0n1p1/bakta_db shovill/${sample}/contigs.fa --prefix ${sample}
    done
    
    grep "agrD" mibi1436.gbff
    mibi1435    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1436    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1437    MNLLGGLLLKIFSNFMAVIGNASKYNPCSNYLDEPQVPEELTKLDE    AgrD II
    mibi1438    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1439    MNLLGGLLLKIFSNFMAVIGNASKYNPCSNYLDEPQVPEELTKLDE    AgrD II
    mibi1440    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1441    MNLLGGLLLKLFSNFMAVIGNAAKYNPCASYLDEPQVPEELTKLDE    AgrD III
    mibi1442    MNLLGGLLLKIFSNFMAVIGNASKYNPCSNYLDEPQVPEELTKLDE    AgrD II
    mibi1443    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1444    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1445    MNLLGGLLLKIFSNFMAVIGNASKYNPCSNYLDEPQVPEELTKLDE    AgrD II
    mibi1446    MNLLGGLLLKIFSNFMAVIGNASKYNPCSNYLDEPQVPEELTKLDE    AgrD II
    mibi1447    MNLLGGLLLKIFSNFMAVIGNASKYNPCSNYLDEPQVPEELTKLDE    AgrD II
    mibi1448    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1449    MNLLGGLLLKLFSNFMAVIGNAAKYNPCASYLDEPQVPEELTKLDE    AgrD III
    mibi1450    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1451    MNLLGGLLLKIFSNFMAVIGNASKYNPCSNYLDEPQVPEELTKLDE    AgrD II
    mibi1452    MNLLGGLLLKLFSNFMAVIGNAAKYNPCASYLDEPQVPEELTKLDE    AgrD III
    mibi1453    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1454    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1455    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1456    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1457    MNLLGGLLLKIFSNFMAVIGNASKYNPCSNYLDEPQVPEELTKLDE    AgrD II
    mibi1458    MENIFNLFIK I FTTILEF V GTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I* (with a 2-codon difference)
    mibi1459    MENIFNLFIK F FTTILEF I GTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1460    MNLLGGLLLKIFSNFMAVIGNASKYNPCSNYLDEPQVPEELTKLDE    AgrD II
    mibi1461    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1462    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1463    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1464    MNLLGGLLLKLFSNFMAVIGNAAKYNPCASYLDEPQVPEELTKLDE    AgrD III
    mibi1465    MNLLGGLLLKIFSNFMAVIGNASKYNPCSNYLDEPQVPEELTKLDE    AgrD II
    mibi1466    MNLLGGLLLKIFSNFMAVIGNASKYNPCSNYLDEPQVPEELTKLDE    AgrD II
    mibi1467    MNLLGGLLLKIFSNFMAVIGNASKYNPCSNYLDEPQVPEELTKLDE    AgrD II
    mibi1468    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1469    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1471    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1473    MNLLGGLLLKIFSNFMAVIGNASKYNPCSNYLDEPQVPEELTKLDE    AgrD II
    mibi1474    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1475    MNLLGGLLLKIFSNFMAVIGNASKYNPCSNYLDEPQVPEELTKLDE    AgrD II
    mibi1476    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1477    MNLLGGLLLKIFSNFMAVIGNASKYNPCSNYLDEPQVPEELTKLDE    AgrD II
    mibi1478    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1479    MNLLGGLLLKIFSNFMAVIGNASKYNPCSNYLDEPQVPEELTKLDE    AgrD II
    mibi1480    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1481    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1482    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1483    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1484    none
    mibi1485    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1486    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1487    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1488    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1489    MNLLGGLLLKIFSNFMAVIGNASKYNPCSNYLDEPQVPEELTKLDE    AgrD II
    mibi1490    MNLLGGLLLKIFSNFMAVIGNASKYNPCSNYLDEPQVPEELTKLDE    AgrD II
    mibi1491    MNLLGGLLLKIFSNFMAVIGNASKYNPCSNYLDEPQVPEELTKLDE    AgrD II
    mibi1492    MNLLGGLLLKIFSNFMAVIGNASKYNPCSNYLDEPQVPEELTKLDE    AgrD II
    mibi1493    MNLLGGL F LK I FSNFMAVIGNA A KYNPC AS YLDEPQVPEELTKLDE  AgrD III* (with a 2-codon difference)
    mibi1494    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1495    MNLLGGLLLKIFSNFMAVIGNASKYNPCSNYLDEPQVPEELTKLDE    AgrD II
    mibi1496    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1497    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1498    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1499    MNLLGGLLLKIFSNFMAVIGNASKYNPCSNYLDEPQVPEELTKLDE    AgrD II
    mibi1500    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1501    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1502    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1503    MNLLGGLLLKLFSNFMAVIGNAAKYNPCASYLDEPQVPEELTKLDE    AgrD III
    mibi1504    MNLLGGLLLKLFSNFMAVIGNAAKYNPCASYLDEPQVPEELTKLDE    AgrD III
    mibi1505    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi1506    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi2312    MNLLGGLLLKIFSNFMAVIGNASKYNPCSNYLDEPQVPEELTKLDE    AgrD II
    mibi2313    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi2314    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi2315    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi2316    MNLLGGLLLKIFSNFMAVIGNASKYNPCSNYLDEPQVPEELTKLDE    AgrD II
    mibi2317    MNLLGGLLLKIFSNFMAVIGNASKYNPCSNYLDEPQVPEELTKLDE    AgrD II
    mibi2318    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi2319    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi2320    MNLLGGLLLKIFSNFMAVIGNASKYNPCSNYLDEPQVPEELTKLDE    AgrD II
    mibi2321    MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE    AgrD I
    mibi2379    MNLLGGLLLKIFSNFMAVIGNASKYNPCSNYLDEPQVPEELTKLDE    AgrD II
    
        #MNLLGGL L LK L FSNFMAVIGNA A KYNPC AS YLDEPQVPEELTKLDE  AgrD III (2)
        #MNLLGGL L LK I FSNFMAVIGNA S KYNPC SN YLDEPQVPEELTKLDE  AgrD II  (4)
    
    -- AgrD I --
    Query  1       MENIFNLFIKFFTTILEFIGTVAGDSVCASYFDEPEVPEELTKLYE  46
            M  +  L +K F+  +  IG  +  + C  Y DEP+VPEELTKL E
    Sbjct  926825  MNLLGGLLLKIFSNFMAVIGNASKYNPCVMYLDEPQVPEELTKLDE  926688
    -- AgrD II --
    Query  1       MNLLGGLLLKIFSNFMAVIGNASKYNPCSNYLDEPQVPEELTKLDE  46
            MNLLGGLLLKIFSNFMAVIGNASKYNPC  YLDEPQVPEELTKLDE
    Sbjct  926825  MNLLGGLLLKIFSNFMAVIGNASKYNPCVMYLDEPQVPEELTKLDE  926688
    -- AgrD III --
    Query  1       MNLLGGLLLKLFSNFMAVIGNAAKYNPCASYLDEPQVPEELTKLDE  46
            MNLLGGLLLK+FSNFMAVIGNA+KYNPC  YLDEPQVPEELTKLDE
    Sbjct  926825  MNLLGGLLLKIFSNFMAVIGNASKYNPCVMYLDEPQVPEELTKLDE  926688
  9. Calulate the presence-absence-matrix by searching keyword gene=gene_name in gff3-files

    cut -d',' -f1 plotTreeHeatmap/typing.csv > all_isolate_names.txt
    grep "gene=gyrB" *.gff3 > temp
    cut -d'.' -f1 temp > temp1
    diff all_isolate_names.txt temp1
    
    grep "gene=fumC" *.gff3 > temp
    cut -d'.' -f1 temp > temp1
    diff all_isolate_names.txt temp1
    
    grep "gene=gltA" *.gff3 > temp
    cut -d'.' -f1 temp > temp1
    diff all_isolate_names.txt temp1
    
    grep "gene=icd" *.gff3 > temp
    cut -d'.' -f1 temp > temp1
    diff all_isolate_names.txt temp1
    
    grep "gene=apsS" *.gff3 > temp
    cut -d'.' -f1 temp > temp1
    diff all_isolate_names.txt temp1 | wc -l
    #TODO, using sequence comparisons for one record!
    
    grep "gene=sigB" *.gff3 > temp
    cut -d'.' -f1 temp > temp1
    diff all_isolate_names.txt temp1
    #27d26
    #< mibi1461
    
    grep "gene=sarA" *.gff3 > temp
    cut -d'.' -f1 temp > temp1
    diff all_isolate_names.txt temp1
    #17d16
    #< mibi1451
    #32d30
    #< mibi1466
    #45d42
    #< mibi1481
    
    grep "gene=agrC" *.gff3 > temp
    cut -d'.' -f1 temp > temp1
    diff all_isolate_names.txt temp1 | wc -l
    #TODO, using sequence comparisons for one record!
    
    grep "gene=yycG" *.gff3 > temp
    cut -d'.' -f1 temp > temp1
    diff all_isolate_names.txt temp1 | wc -l
    #TODO, using sequence comparisons for one record!
    
    grep "PSM-beta" *.gff3 > temp
    cut -d'.' -f1 temp > temp1
    diff all_isolate_names.txt temp1 | wc -l
    #TODO, using sequence comparisons for one record!
    
    grep "PSM-delta" *.gff3 > temp
    cut -d'.' -f1 temp > temp1
    diff all_isolate_names.txt temp1
    #9d8
    #< mibi1443
    
    grep "hlb" *.gff3 > temp
    cut -d'.' -f1 temp > temp1
    diff all_isolate_names.txt temp1 | wc -l
    #TODO, using sequence comparisons for one record!
    
    grep "atlE" *.gff3 > temp
    cut -d'.' -f1 temp > temp1
    diff all_isolate_names.txt temp1 | wc -l
    #TODO, using sequence comparisons for one record!
    
    grep "gene=atl" *.gff3 > temp
    cut -d'.' -f1 temp > temp1
    diff all_isolate_names.txt temp1
    #19d18
    #< mibi1453
    
    grep "gene=sdrG" *.gff3 > temp
    cut -d'.' -f1 temp > temp1
    diff all_isolate_names.txt temp1
    #2d1
    < mibi1436
    5d3
    < mibi1439
    8,9d5
    < mibi1442
    < mibi1443
    11,12d6
    < mibi1445
    < mibi1446
    14d7
    < mibi1448
    18,19d10
    < mibi1452
    < mibi1453
    30,32d20
    < mibi1464
    < mibi1465
    < mibi1466
    34,35d21
    < mibi1468
    < mibi1469
    37d22
    < mibi1473
    39d23
    < mibi1475
    41d24
    < mibi1477
    43d25
    < mibi1479
    47d28
    < mibi1483
    50d30
    < mibi1486
    52d31
    < mibi1488
    55d33
    < mibi1491
    63,64d40
    < mibi1499
    < mibi1500
    67d42
    < mibi1503
    76d50
    < mibi2317
    79d52
    < mibi2320
    81d53
    < mibi2379
    
    grep "gene=sdrH" *.gff3 > temp
    cut -d'.' -f1 temp > temp1
    diff all_isolate_names.txt temp1
    #24d23
    #< mibi1458
    #57d55
    #< mibi1493
    
    grep "gene=ebh" *.gff3 > temp
    cut -d'.' -f1 temp > temp1
    diff all_isolate_names.txt temp1
    #1d0
    #< mibi1435
    6d4
    < mibi1440
    11,13d8
    < mibi1445
    < mibi1446
    < mibi1447
    16,18d10
    < mibi1450
    < mibi1451
    < mibi1452
    20d11
    < mibi1454
    22,23d12
    < mibi1456
    < mibi1457
    25,27d13
    < mibi1459
    < mibi1460
    < mibi1461
    29d14
    < mibi1463
    31,32d15
    < mibi1465
    < mibi1466
    37d19
    < mibi1473
    39d20
    < mibi1475
    41,44d21
    < mibi1477
    < mibi1478
    < mibi1479
    < mibi1480
    47d23
    < mibi1483
    49d24
    < mibi1485
    51,52d25
    < mibi1487
    < mibi1488
    54,55d26
    < mibi1490
    < mibi1491
    59,60d29
    < mibi1495
    < mibi1496
    62,63d30
    < mibi1498
    < mibi1499
    66d32
    < mibi1502
    71d36
    < mibi2312
    74d38
    < mibi2315
    76,79d39
    < mibi2317
    < mibi2318
    < mibi2319
    < mibi2320
    81d40
    < mibi2379
    
    grep "gene=ebpS" *.gff3 > temp
    cut -d'.' -f1 temp > temp1
    diff all_isolate_names.txt temp1
    
    grep "gene=tagB" *.gff3 > temp
    cut -d'.' -f1 temp > temp1
    uniq temp1 > temp2    # 3 copies for each isolate
    diff all_isolate_names.txt temp2
    
    grep "gene=capC" *.gff3 > temp
    cut -d'.' -f1 temp > temp1
    diff all_isolate_names.txt temp1 | wc -l
    #TODO, using sequence comparisons for one record!
    
    grep "gene=sepA" *.gff3 > temp
    cut -d'.' -f1 temp > temp1
    uniq temp1 > temp2
    diff all_isolate_names.txt temp2
    
    grep "gene=dltA" *.gff3 > temp
    cut -d'.' -f1 temp > temp1
    diff all_isolate_names.txt temp1
    
    grep "gene=fmtC" *.gff3 > temp
    cut -d'.' -f1 temp > temp1
    diff all_isolate_names.txt temp1 | wc -l
    #TODO, using sequence comparisons for one record!
    
    grep "gene=lipA" *.gff3 > temp
    cut -d'.' -f1 temp > temp1
    diff all_isolate_names.txt temp1
    
    grep "gene=sceD" *.gff3 > temp
    cut -d'.' -f1 temp > temp1
    diff all_isolate_names.txt temp1
    
    #TODO: check the sdrG more detailed with sequence alignments, since SE0760 is not standard gene name.
    grep "gene=SE0760" *.gff3 > temp
    cut -d'.' -f1 temp > temp1
    diff all_isolate_names.txt temp1 | wc -l
    #TODO, using sequence comparisons for one record!
    
    grep "gene=esp" *.gff3 > temp
    cut -d'.' -f1 temp > temp1
    diff all_isolate_names.txt temp1 | wc -l
    #TODO, using sequence comparisons for one record!
    
    grep "gene=ecpA" *.gff3 > temp
    cut -d'.' -f1 temp > temp1
    diff all_isolate_names.txt temp1
    17d16
    < mibi1451
    37d35
    < mibi1473
  10. Calulate the presence-absence-matrix by local blastn searching

    (Optional online search) https://blast.ncbi.nlm.nih.gov/Blast.cgi?PROGRAM=blastn&PAGE_TYPE=BlastSearch&BLAST_SPEC=MicrobialGenomes
    #Title:Refseq prokaryote representative genomes (contains refseq assembly)
    #Molecule Type:mixed DNA
    #Update date:2024/10/16
    #Number of sequences:1038672
    
    #cat gyrB_revcomp.fasta fumC_revcomp.fasta gltA.fasta icd_revcomp.fasta apsS.fasta sigB_revcomp.fasta sarA_revcomp.fasta ...
    
    cp ~/DATA/Data_Patricia_Sepi_5Samples/presence_absence_matrix_on_gene_list/apsS.fasta .
    cp ~/DATA/Data_Patricia_Sepi_5Samples/presence_absence_matrix_on_gene_list/agrC.fasta .
    cp ~/DATA/Data_Patricia_Sepi_5Samples/presence_absence_matrix_on_gene_list/yycG.fasta .
    cp ~/DATA/Data_Patricia_Sepi_5Samples/presence_absence_matrix_on_gene_list/psm-beta.fasta .
    cp ~/DATA/Data_Patricia_Sepi_5Samples/presence_absence_matrix_on_gene_list/hlb_.fasta .
    cp ~/DATA/Data_Patricia_Sepi_5Samples/presence_absence_matrix_on_gene_list/atlE.fasta .
    cp ~/DATA/Data_Patricia_Sepi_5Samples/presence_absence_matrix_on_gene_list/capC.fasta .
    cp ~/DATA/Data_Patricia_Sepi_5Samples/presence_absence_matrix_on_gene_list/fmtC.fasta .
    cp ~/DATA/Data_Patricia_Sepi_5Samples/presence_absence_matrix_on_gene_list/SE0760.fasta .
    cp ~/DATA/Data_Patricia_Sepi_5Samples/presence_absence_matrix_on_gene_list/esp.fasta .
    cp ~/DATA/Data_Patricia_Sepi_5Samples/presence_absence_matrix_on_gene_list/MT880870.fasta .
    cp ~/DATA/Data_Patricia_Sepi_5Samples/presence_absence_matrix_on_gene_list/MT880871.fasta .
    cp ~/DATA/Data_Patricia_Sepi_5Samples/presence_absence_matrix_on_gene_list/MT880872.fasta .
    
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    cd ../shovill/${sample}/
    makeblastdb -in contigs.fa -dbtype nucl
    cd ../../presence_absence_matrix_on_gene_list
    done
    
    # -length(apsS.fasta)=1041
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    blastn -db ../shovill/${sample}/contigs.fa -query apsS.fasta -evalue 1e-50 -num_threads 15 -outfmt 6 -strand both -max_target_seqs 1 > apsS_on_${sample}.blastn
    done
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    python3 analyze_blastn_res.py apsS_on_${sample}.blastn
    done
    apsS_on_mibi1435.blastn: 1041
    apsS_on_mibi1436.blastn: 1041
    apsS_on_mibi1437.blastn: 1041
    apsS_on_mibi1438.blastn: 1041
    apsS_on_mibi1439.blastn: 1041
    apsS_on_mibi1440.blastn: 1041
    apsS_on_mibi1441.blastn: 1041
    apsS_on_mibi1442.blastn: 1041
    apsS_on_mibi1443.blastn: 1041
    apsS_on_mibi1444.blastn: 1041
    apsS_on_mibi1445.blastn: 1041
    apsS_on_mibi1446.blastn: 1041
    apsS_on_mibi1447.blastn: 1041
    apsS_on_mibi1448.blastn: 1041
    apsS_on_mibi1449.blastn: 1041
    apsS_on_mibi1450.blastn: 1041
    apsS_on_mibi1451.blastn: 1041
    apsS_on_mibi1452.blastn: 1041
    apsS_on_mibi1453.blastn: 1041
    apsS_on_mibi1454.blastn: 1041
    apsS_on_mibi1455.blastn: 1041
    apsS_on_mibi1456.blastn: 1041
    apsS_on_mibi1457.blastn: 1041
    apsS_on_mibi1458.blastn: 1041
    apsS_on_mibi1459.blastn: 1041
    apsS_on_mibi1460.blastn: 1041
    apsS_on_mibi1461.blastn: 1041
    apsS_on_mibi1462.blastn: 1041
    apsS_on_mibi1463.blastn: 1041
    apsS_on_mibi1464.blastn: 1041
    apsS_on_mibi1465.blastn: 1041
    apsS_on_mibi1466.blastn: 1041
    apsS_on_mibi1467.blastn: 1041
    apsS_on_mibi1468.blastn: 1041
    apsS_on_mibi1469.blastn: 1041
    apsS_on_mibi1471.blastn: 1041
    apsS_on_mibi1473.blastn: 1041
    apsS_on_mibi1474.blastn: 1041
    apsS_on_mibi1475.blastn: 1041
    apsS_on_mibi1476.blastn: 1041
    apsS_on_mibi1477.blastn: 1041
    apsS_on_mibi1478.blastn: 1041
    apsS_on_mibi1479.blastn: 1041
    apsS_on_mibi1480.blastn: 1041
    apsS_on_mibi1481.blastn: 1041
    apsS_on_mibi1482.blastn: 1041
    apsS_on_mibi1483.blastn: 1041
    apsS_on_mibi1484.blastn: 1041
    apsS_on_mibi1485.blastn: 1041
    apsS_on_mibi1486.blastn: 1041
    apsS_on_mibi1487.blastn: 1041
    apsS_on_mibi1488.blastn: 1041
    apsS_on_mibi1489.blastn: 1041
    apsS_on_mibi1490.blastn: 1041
    apsS_on_mibi1491.blastn: 1041
    apsS_on_mibi1492.blastn: 1041
    apsS_on_mibi1493.blastn: 1041
    apsS_on_mibi1494.blastn: 1041
    apsS_on_mibi1495.blastn: 1041
    apsS_on_mibi1496.blastn: 1041
    apsS_on_mibi1497.blastn: 1041
    apsS_on_mibi1498.blastn: 1041
    apsS_on_mibi1499.blastn: 1041
    apsS_on_mibi1500.blastn: 1041
    apsS_on_mibi1501.blastn: 1041
    apsS_on_mibi1502.blastn: 1041
    apsS_on_mibi1503.blastn: 1041
    apsS_on_mibi1504.blastn: 1041
    apsS_on_mibi1505.blastn: 1041
    apsS_on_mibi1506.blastn: 1041
    apsS_on_mibi2312.blastn: 1041
    apsS_on_mibi2313.blastn: 1041
    apsS_on_mibi2314.blastn: 1041
    apsS_on_mibi2315.blastn: 1041
    apsS_on_mibi2316.blastn: 1041
    apsS_on_mibi2317.blastn: 1041
    apsS_on_mibi2318.blastn: 1041
    apsS_on_mibi2319.blastn: 1041
    apsS_on_mibi2320.blastn: 1041
    apsS_on_mibi2321.blastn: 1041
    apsS_on_mibi2379.blastn: 1041
    
    # -length(sigB_revcomp.fasta)=771
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    blastn -db ../shovill/${sample}/contigs.fa -query sigB_revcomp.fasta -evalue 1e-50 -num_threads 15 -outfmt 6 -strand both -max_target_seqs 1 > sigB_on_${sample}.blastn
    done
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    python3 analyze_blastn_res.py sigB_on_${sample}.blastn
    done
    sigB_on_mibi1435.blastn: 771
    sigB_on_mibi1436.blastn: 771
    sigB_on_mibi1437.blastn: 771
    sigB_on_mibi1438.blastn: 771
    sigB_on_mibi1439.blastn: 771
    sigB_on_mibi1440.blastn: 771
    sigB_on_mibi1441.blastn: 771
    sigB_on_mibi1442.blastn: 771
    sigB_on_mibi1443.blastn: 771
    sigB_on_mibi1444.blastn: 771
    sigB_on_mibi1445.blastn: 771
    sigB_on_mibi1446.blastn: 771
    sigB_on_mibi1447.blastn: 771
    sigB_on_mibi1448.blastn: 771
    sigB_on_mibi1449.blastn: 771
    sigB_on_mibi1450.blastn: 771
    sigB_on_mibi1451.blastn: 771
    sigB_on_mibi1452.blastn: 771
    sigB_on_mibi1453.blastn: 771
    sigB_on_mibi1454.blastn: 771
    sigB_on_mibi1455.blastn: 771
    sigB_on_mibi1456.blastn: 771
    sigB_on_mibi1457.blastn: 771
    sigB_on_mibi1458.blastn: 771
    sigB_on_mibi1459.blastn: 771
    sigB_on_mibi1460.blastn: 771
    sigB_on_mibi1461.blastn: 0 *
    sigB_on_mibi1462.blastn: 771
    sigB_on_mibi1463.blastn: 771
    sigB_on_mibi1464.blastn: 771
    sigB_on_mibi1465.blastn: 771
    sigB_on_mibi1466.blastn: 771
    sigB_on_mibi1467.blastn: 771
    sigB_on_mibi1468.blastn: 771
    sigB_on_mibi1469.blastn: 771
    sigB_on_mibi1471.blastn: 771
    sigB_on_mibi1473.blastn: 771
    sigB_on_mibi1474.blastn: 771
    sigB_on_mibi1475.blastn: 771
    sigB_on_mibi1476.blastn: 771
    sigB_on_mibi1477.blastn: 771
    sigB_on_mibi1478.blastn: 771
    sigB_on_mibi1479.blastn: 771
    sigB_on_mibi1480.blastn: 771
    sigB_on_mibi1481.blastn: 771
    sigB_on_mibi1482.blastn: 771
    sigB_on_mibi1483.blastn: 771
    sigB_on_mibi1484.blastn: 771
    sigB_on_mibi1485.blastn: 771
    sigB_on_mibi1486.blastn: 771
    sigB_on_mibi1487.blastn: 771
    sigB_on_mibi1488.blastn: 771
    sigB_on_mibi1489.blastn: 771
    sigB_on_mibi1490.blastn: 771
    sigB_on_mibi1491.blastn: 771
    sigB_on_mibi1492.blastn: 771
    sigB_on_mibi1493.blastn: 771
    sigB_on_mibi1494.blastn: 771
    sigB_on_mibi1495.blastn: 771
    sigB_on_mibi1496.blastn: 771
    sigB_on_mibi1497.blastn: 771
    sigB_on_mibi1498.blastn: 771
    sigB_on_mibi1499.blastn: 771
    sigB_on_mibi1500.blastn: 771
    sigB_on_mibi1501.blastn: 771
    sigB_on_mibi1502.blastn: 771
    sigB_on_mibi1503.blastn: 771
    sigB_on_mibi1504.blastn: 771
    sigB_on_mibi1505.blastn: 771
    sigB_on_mibi1506.blastn: 771
    sigB_on_mibi2312.blastn: 771
    sigB_on_mibi2313.blastn: 771
    sigB_on_mibi2314.blastn: 771
    sigB_on_mibi2315.blastn: 771
    sigB_on_mibi2316.blastn: 771
    sigB_on_mibi2317.blastn: 771
    sigB_on_mibi2318.blastn: 771
    sigB_on_mibi2319.blastn: 771
    sigB_on_mibi2320.blastn: 771
    sigB_on_mibi2321.blastn: 771
    sigB_on_mibi2379.blastn: 771
    
    # - length(sarA_revcomp.fasta)=375
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    blastn -db ../shovill/${sample}/contigs.fa -query sarA_revcomp.fasta -evalue 1e-50 -num_threads 15 -outfmt 6 -strand both -max_target_seqs 1 > sarA_on_${sample}.blastn
    done
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    python3 analyze_blastn_res.py sarA_on_${sample}.blastn
    done
    sarA_on_mibi1435.blastn: 375
    sarA_on_mibi1436.blastn: 375
    sarA_on_mibi1437.blastn: 375
    sarA_on_mibi1438.blastn: 375
    sarA_on_mibi1439.blastn: 375
    sarA_on_mibi1440.blastn: 375
    sarA_on_mibi1441.blastn: 375
    sarA_on_mibi1442.blastn: 375
    sarA_on_mibi1443.blastn: 375
    sarA_on_mibi1444.blastn: 375
    sarA_on_mibi1445.blastn: 375
    sarA_on_mibi1446.blastn: 375
    sarA_on_mibi1447.blastn: 375
    sarA_on_mibi1448.blastn: 375
    sarA_on_mibi1449.blastn: 375
    sarA_on_mibi1450.blastn: 375
    sarA_on_mibi1451.blastn: 0 *
    sarA_on_mibi1452.blastn: 375
    sarA_on_mibi1453.blastn: 375
    sarA_on_mibi1454.blastn: 375
    sarA_on_mibi1455.blastn: 375
    sarA_on_mibi1456.blastn: 375
    sarA_on_mibi1457.blastn: 375
    sarA_on_mibi1458.blastn: 375
    sarA_on_mibi1459.blastn: 375
    sarA_on_mibi1460.blastn: 375
    sarA_on_mibi1461.blastn: 375
    sarA_on_mibi1462.blastn: 375
    sarA_on_mibi1463.blastn: 375
    sarA_on_mibi1464.blastn: 375
    sarA_on_mibi1465.blastn: 375
    sarA_on_mibi1466.blastn: 208 *
    sarA_on_mibi1467.blastn: 375
    sarA_on_mibi1468.blastn: 375
    sarA_on_mibi1469.blastn: 375
    sarA_on_mibi1471.blastn: 375
    sarA_on_mibi1473.blastn: 375
    sarA_on_mibi1474.blastn: 375
    sarA_on_mibi1475.blastn: 375
    sarA_on_mibi1476.blastn: 375
    sarA_on_mibi1477.blastn: 375
    sarA_on_mibi1478.blastn: 375
    sarA_on_mibi1479.blastn: 375
    sarA_on_mibi1480.blastn: 375
    sarA_on_mibi1481.blastn: 248 *
    sarA_on_mibi1482.blastn: 375
    sarA_on_mibi1483.blastn: 375
    sarA_on_mibi1484.blastn: 375
    sarA_on_mibi1485.blastn: 375
    sarA_on_mibi1486.blastn: 375
    sarA_on_mibi1487.blastn: 375
    sarA_on_mibi1488.blastn: 375
    sarA_on_mibi1489.blastn: 375
    sarA_on_mibi1490.blastn: 375
    sarA_on_mibi1491.blastn: 375
    sarA_on_mibi1492.blastn: 375
    sarA_on_mibi1493.blastn: 375
    sarA_on_mibi1494.blastn: 375
    sarA_on_mibi1495.blastn: 375
    sarA_on_mibi1496.blastn: 375
    sarA_on_mibi1497.blastn: 375
    sarA_on_mibi1498.blastn: 375
    sarA_on_mibi1499.blastn: 375
    sarA_on_mibi1500.blastn: 375
    sarA_on_mibi1501.blastn: 375
    sarA_on_mibi1502.blastn: 375
    sarA_on_mibi1503.blastn: 375
    sarA_on_mibi1504.blastn: 375
    sarA_on_mibi1505.blastn: 375
    sarA_on_mibi1506.blastn: 357
    sarA_on_mibi2312.blastn: 375
    sarA_on_mibi2313.blastn: 375
    sarA_on_mibi2314.blastn: 375
    sarA_on_mibi2315.blastn: 375
    sarA_on_mibi2316.blastn: 375
    sarA_on_mibi2317.blastn: 375
    sarA_on_mibi2318.blastn: 375
    sarA_on_mibi2319.blastn: 375
    sarA_on_mibi2320.blastn: 375
    sarA_on_mibi2321.blastn: 375
    sarA_on_mibi2379.blastn: 375
    
    # -length(agrC.fasta)=1290
    samtools faidx agrABCD_hld.fasta "gi|3320006|emb|Z49220.1|":1494-2783
    
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    blastn -db ../shovill/${sample}/contigs.fa -query agrC.fasta -evalue 1e-50 -num_threads 15 -outfmt 6 -strand both -max_target_seqs 1 > agrC_on_${sample}.blastn
    done
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    python3 analyze_blastn_res.py agrC_on_${sample}.blastn
    done
    agrC_on_mibi1435.blastn: 1002
    agrC_on_mibi1436.blastn: 1290
    agrC_on_mibi1437.blastn: 688
    agrC_on_mibi1438.blastn: 1290
    agrC_on_mibi1439.blastn: 688
    agrC_on_mibi1440.blastn: 1290
    agrC_on_mibi1441.blastn: 861
    agrC_on_mibi1442.blastn: 688
    agrC_on_mibi1443.blastn: 1290
    agrC_on_mibi1444.blastn: 1290
    agrC_on_mibi1445.blastn: 688
    agrC_on_mibi1446.blastn: 688
    agrC_on_mibi1447.blastn: 688
    agrC_on_mibi1448.blastn: 1290
    agrC_on_mibi1449.blastn: 861
    agrC_on_mibi1450.blastn: 1290
    agrC_on_mibi1451.blastn: 688
    agrC_on_mibi1452.blastn: 818
    agrC_on_mibi1453.blastn: 1290
    agrC_on_mibi1454.blastn: 1290
    agrC_on_mibi1455.blastn: 1290
    agrC_on_mibi1456.blastn: 1290
    agrC_on_mibi1457.blastn: 688
    agrC_on_mibi1458.blastn: 1290
    agrC_on_mibi1459.blastn: 1290
    agrC_on_mibi1460.blastn: 688
    agrC_on_mibi1461.blastn: 1290
    agrC_on_mibi1462.blastn: 1290
    agrC_on_mibi1463.blastn: 1290
    agrC_on_mibi1464.blastn: 861
    agrC_on_mibi1465.blastn: 688
    agrC_on_mibi1466.blastn: 541
    agrC_on_mibi1467.blastn: 688
    agrC_on_mibi1468.blastn: 1290
    agrC_on_mibi1469.blastn: 1290
    agrC_on_mibi1471.blastn: 1165
    agrC_on_mibi1473.blastn: 688
    agrC_on_mibi1474.blastn: 1290
    agrC_on_mibi1475.blastn: 688
    agrC_on_mibi1476.blastn: 1290
    agrC_on_mibi1477.blastn: 688
    agrC_on_mibi1478.blastn: 442
    agrC_on_mibi1479.blastn: 688
    agrC_on_mibi1480.blastn: 1290
    agrC_on_mibi1481.blastn: 862
    agrC_on_mibi1482.blastn: 1290
    agrC_on_mibi1483.blastn: 1290
    agrC_on_mibi1484.blastn: 491
    agrC_on_mibi1485.blastn: 1290
    agrC_on_mibi1486.blastn: 1290
    agrC_on_mibi1487.blastn: 1290
    agrC_on_mibi1488.blastn: 1290
    agrC_on_mibi1489.blastn: 688
    agrC_on_mibi1490.blastn: 688
    agrC_on_mibi1491.blastn: 688
    agrC_on_mibi1492.blastn: 688
    agrC_on_mibi1493.blastn: 818
    agrC_on_mibi1494.blastn: 1290
    agrC_on_mibi1495.blastn: 688
    agrC_on_mibi1496.blastn: 1290
    agrC_on_mibi1497.blastn: 911
    agrC_on_mibi1498.blastn: 1290
    agrC_on_mibi1499.blastn: 688
    agrC_on_mibi1500.blastn: 1290
    agrC_on_mibi1501.blastn: 1290
    agrC_on_mibi1502.blastn: 1094
    agrC_on_mibi1503.blastn: 818
    agrC_on_mibi1504.blastn: 861
    agrC_on_mibi1505.blastn: 1290
    agrC_on_mibi1506.blastn: 1290
    agrC_on_mibi2312.blastn: 688
    agrC_on_mibi2313.blastn: 1290
    agrC_on_mibi2314.blastn: 1290
    agrC_on_mibi2315.blastn: 1290
    agrC_on_mibi2316.blastn: 688
    agrC_on_mibi2317.blastn: 688
    agrC_on_mibi2318.blastn: 1290
    agrC_on_mibi2319.blastn: 1290
    agrC_on_mibi2320.blastn: 688
    agrC_on_mibi2321.blastn: 1290
    agrC_on_mibi2379.blastn: 688
    
    partial, 1002/1290 nt present
    +
    partial, 688/1290 nt present
    +
    partial, 688/1290 nt present
    +
    partial, 861/1290 nt present
    partial, 688/1290 nt present
    +
    +
    partial, 688/1290 nt present
    partial, 688/1290 nt present
    partial, 688/1290 nt present
    +
    partial, 861/1290 nt present
    +
    partial, 688/1290 nt present
    partial, 818/1290 nt present
    +
    +
    +
    +
    partial, 688/1290 nt present
    +
    +
    partial, 688/1290 nt present
    +
    +
    +
    partial, 861/1290 nt present
    partial, 688/1290 nt present
    partial, 541/1290 nt present
    partial, 688/1290 nt present
    +
    +
    partial, 1165/1290 nt present
    partial, 688/1290 nt present
    +
    partial, 688/1290 nt present
    +
    partial, 688/1290 nt present
    partial, 442/1290 nt present
    partial, 688/1290 nt present
    +
    partial, 862/1290 nt present
    +
    +
    partial, 491/1290 nt present
    +
    +
    +
    +
    partial, 688/1290 nt present
    partial, 688/1290 nt present
    partial, 688/1290 nt present
    partial, 688/1290 nt present
    partial, 818/1290 nt present
    +
    partial, 688/1290 nt present
    +
    partial, 911/1290 nt present
    +
    partial, 688/1290 nt present
    +
    +
    partial, 1094/1290 nt present
    partial, 818/1290 nt present
    partial, 861/1290 nt present
    +
    +
    partial, 688/1290 nt present
    +
    +
    +
    partial, 688/1290 nt present
    partial, 688/1290 nt present
    +
    +
    partial, 688/1290 nt present
    +
    partial, 688/1290 nt present
    
    # - length(yycG.fasta)=1827
    gene            complement(2589240..2591072)
                    /gene="yycG"
    samtools faidx CP000029.fasta "gi|57636010|gb|CP000029.1|":2589240-2591072 > yycG.fasta
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    blastn -db ../shovill/${sample}/contigs.fa -query yycG_revcomp.fasta -evalue 1e-50 -num_threads 15 -outfmt 6 -strand both -max_target_seqs 1 > yycG_on_${sample}.blastn
    done
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    python3 analyze_blastn_res.py yycG_on_${sample}.blastn
    done
    #-->WARNING: Only 80% similarity, not the same gene!
    yycG_on_mibi1435.blastn: 1833
    yycG_on_mibi1436.blastn: 1833
    yycG_on_mibi1437.blastn: 1833
    yycG_on_mibi1438.blastn: 1833
    yycG_on_mibi1439.blastn: 1833
    yycG_on_mibi1440.blastn: 1833
    yycG_on_mibi1441.blastn: 1833
    yycG_on_mibi1442.blastn: 1833
    yycG_on_mibi1443.blastn: 1833
    yycG_on_mibi1444.blastn: 1833
    yycG_on_mibi1445.blastn: 1833
    yycG_on_mibi1446.blastn: 1833
    yycG_on_mibi1447.blastn: 1833
    yycG_on_mibi1448.blastn: 1833
    yycG_on_mibi1449.blastn: 1833
    yycG_on_mibi1450.blastn: 1833
    yycG_on_mibi1451.blastn: 1833
    yycG_on_mibi1452.blastn: 1833
    yycG_on_mibi1453.blastn: 1833
    yycG_on_mibi1454.blastn: 1833
    yycG_on_mibi1455.blastn: 1833
    yycG_on_mibi1456.blastn: 1833
    yycG_on_mibi1457.blastn: 1833
    yycG_on_mibi1458.blastn: 1833
    yycG_on_mibi1459.blastn: 1833
    yycG_on_mibi1460.blastn: 1833
    yycG_on_mibi1461.blastn: 1833
    yycG_on_mibi1462.blastn: 1833
    yycG_on_mibi1463.blastn: 1833
    yycG_on_mibi1464.blastn: 1833
    yycG_on_mibi1465.blastn: 1833
    yycG_on_mibi1466.blastn: 1833
    yycG_on_mibi1467.blastn: 1833
    yycG_on_mibi1468.blastn: 1833
    yycG_on_mibi1469.blastn: 1833
    yycG_on_mibi1471.blastn: 1833
    yycG_on_mibi1473.blastn: 1833
    yycG_on_mibi1474.blastn: 1833
    yycG_on_mibi1475.blastn: 1833
    yycG_on_mibi1476.blastn: 1833
    yycG_on_mibi1477.blastn: 1833
    yycG_on_mibi1478.blastn: 1833
    yycG_on_mibi1479.blastn: 1833
    yycG_on_mibi1480.blastn: 1833
    yycG_on_mibi1481.blastn: 1833
    yycG_on_mibi1482.blastn: 1833
    yycG_on_mibi1483.blastn: 1833
    yycG_on_mibi1484.blastn: 1833
    yycG_on_mibi1485.blastn: 1833
    yycG_on_mibi1486.blastn: 1833
    yycG_on_mibi1487.blastn: 1833
    yycG_on_mibi1488.blastn: 1833
    yycG_on_mibi1489.blastn: 1833
    yycG_on_mibi1490.blastn: 1833
    yycG_on_mibi1491.blastn: 1833
    yycG_on_mibi1492.blastn: 1833
    yycG_on_mibi1493.blastn: 1833
    yycG_on_mibi1494.blastn: 1833
    yycG_on_mibi1495.blastn: 1833
    yycG_on_mibi1496.blastn: 1833
    yycG_on_mibi1497.blastn: 1833
    yycG_on_mibi1498.blastn: 1833
    yycG_on_mibi1499.blastn: 1833
    yycG_on_mibi1500.blastn: 1833
    yycG_on_mibi1501.blastn: 1833
    yycG_on_mibi1502.blastn: 1833
    yycG_on_mibi1503.blastn: 1833
    yycG_on_mibi1504.blastn: 1833
    yycG_on_mibi1505.blastn: 1833
    yycG_on_mibi1506.blastn: 1833
    yycG_on_mibi2312.blastn: 1833
    yycG_on_mibi2313.blastn: 1833
    yycG_on_mibi2314.blastn: 1833
    yycG_on_mibi2315.blastn: 1833
    yycG_on_mibi2316.blastn: 1833
    yycG_on_mibi2317.blastn: 1833
    yycG_on_mibi2318.blastn: 1833
    yycG_on_mibi2319.blastn: 1833
    yycG_on_mibi2320.blastn: 1833
    yycG_on_mibi2321.blastn: 1833
    yycG_on_mibi2379.blastn: 1833
    
    #TODOs: exact the gene sequence for psm-delta and atl
    #https://www.ncbi.nlm.nih.gov/nuccore    (psm gene) AND "Staphylococcus aureus"[porgn:__txid1280]
    #grep "PSM-delta" mibi1435.gbff
    #                     /product="Phenol-soluble modulin PSM-delta"
    #grep "atl" mibi1435.gbff
    #                     /gene="atl"
    #                     /gene="atl"
    
    https://www.ncbi.nlm.nih.gov/nuccore/JQ066320.1
    # -length(psm-beta1.fasta)=135
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    blastn -db ../shovill/${sample}/contigs.fa -query psm-beta1.fasta -evalue 1e-50 -num_threads 15 -outfmt 6 -strand both -max_target_seqs 1 > psm-beta1_on_${sample}.blastn
    done
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    python3 analyze_blastn_res.py psm-beta1_on_${sample}.blastn
    done
    psm-beta1_on_mibi1435.blastn: 0
    psm-beta1_on_mibi1436.blastn: 0
    psm-beta1_on_mibi1437.blastn: 0
    psm-beta1_on_mibi1438.blastn: 0
    psm-beta1_on_mibi1439.blastn: 0
    psm-beta1_on_mibi1440.blastn: 0
    psm-beta1_on_mibi1441.blastn: 0
    psm-beta1_on_mibi1442.blastn: 0
    psm-beta1_on_mibi1443.blastn: 0
    psm-beta1_on_mibi1444.blastn: 0
    psm-beta1_on_mibi1445.blastn: 0
    psm-beta1_on_mibi1446.blastn: 0
    psm-beta1_on_mibi1447.blastn: 0
    psm-beta1_on_mibi1448.blastn: 0
    psm-beta1_on_mibi1449.blastn: 0
    psm-beta1_on_mibi1450.blastn: 0
    psm-beta1_on_mibi1451.blastn: 0
    psm-beta1_on_mibi1452.blastn: 0
    psm-beta1_on_mibi1453.blastn: 0
    psm-beta1_on_mibi1454.blastn: 0
    psm-beta1_on_mibi1455.blastn: 0
    psm-beta1_on_mibi1456.blastn: 0
    psm-beta1_on_mibi1457.blastn: 0
    psm-beta1_on_mibi1458.blastn: 0
    psm-beta1_on_mibi1459.blastn: 0
    psm-beta1_on_mibi1460.blastn: 0
    psm-beta1_on_mibi1461.blastn: 0
    psm-beta1_on_mibi1462.blastn: 0
    psm-beta1_on_mibi1463.blastn: 0
    psm-beta1_on_mibi1464.blastn: 0
    psm-beta1_on_mibi1465.blastn: 0
    psm-beta1_on_mibi1466.blastn: 0
    psm-beta1_on_mibi1467.blastn: 0
    psm-beta1_on_mibi1468.blastn: 0
    psm-beta1_on_mibi1469.blastn: 0
    psm-beta1_on_mibi1471.blastn: 0
    psm-beta1_on_mibi1473.blastn: 0
    psm-beta1_on_mibi1474.blastn: 0
    psm-beta1_on_mibi1475.blastn: 0
    psm-beta1_on_mibi1476.blastn: 0
    psm-beta1_on_mibi1477.blastn: 0
    psm-beta1_on_mibi1478.blastn: 0
    psm-beta1_on_mibi1479.blastn: 0
    psm-beta1_on_mibi1480.blastn: 0
    psm-beta1_on_mibi1481.blastn: 0
    psm-beta1_on_mibi1482.blastn: 0
    psm-beta1_on_mibi1483.blastn: 0
    psm-beta1_on_mibi1484.blastn: 0
    psm-beta1_on_mibi1485.blastn: 0
    psm-beta1_on_mibi1486.blastn: 0
    psm-beta1_on_mibi1487.blastn: 0
    psm-beta1_on_mibi1488.blastn: 0
    psm-beta1_on_mibi1489.blastn: 0
    psm-beta1_on_mibi1490.blastn: 0
    psm-beta1_on_mibi1491.blastn: 0
    psm-beta1_on_mibi1492.blastn: 0
    psm-beta1_on_mibi1493.blastn: 0
    psm-beta1_on_mibi1494.blastn: 0
    psm-beta1_on_mibi1495.blastn: 0
    psm-beta1_on_mibi1496.blastn: 0
    psm-beta1_on_mibi1497.blastn: 0
    psm-beta1_on_mibi1498.blastn: 0
    psm-beta1_on_mibi1499.blastn: 0
    psm-beta1_on_mibi1500.blastn: 0
    psm-beta1_on_mibi1501.blastn: 0
    psm-beta1_on_mibi1502.blastn: 0
    psm-beta1_on_mibi1503.blastn: 0
    psm-beta1_on_mibi1504.blastn: 0
    psm-beta1_on_mibi1505.blastn: 0
    psm-beta1_on_mibi1506.blastn: 0
    psm-beta1_on_mibi2312.blastn: 0
    psm-beta1_on_mibi2313.blastn: 0
    psm-beta1_on_mibi2314.blastn: 0
    psm-beta1_on_mibi2315.blastn: 0
    psm-beta1_on_mibi2316.blastn: 0
    psm-beta1_on_mibi2317.blastn: 0
    psm-beta1_on_mibi2318.blastn: 0
    psm-beta1_on_mibi2319.blastn: 0
    psm-beta1_on_mibi2320.blastn: 0
    psm-beta1_on_mibi2321.blastn: 0
    psm-beta1_on_mibi2379.blastn: 0
    
    # -length(hlb.fasta)=993
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    blastn -db ../shovill/${sample}/contigs.fa -query psm-beta.fasta -evalue 1e-50 -num_threads 15 -outfmt 6 -strand both -max_target_seqs 1 > hlb_on_${sample}.blastn
    done
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    python3 analyze_blastn_res.py hlb_on_${sample}.blastn
    done
    hlb_on_mibi1435.blastn: 0
    hlb_on_mibi1436.blastn: 0
    hlb_on_mibi1437.blastn: 0
    hlb_on_mibi1438.blastn: 0
    hlb_on_mibi1439.blastn: 0
    hlb_on_mibi1440.blastn: 0
    hlb_on_mibi1441.blastn: 0
    hlb_on_mibi1442.blastn: 0
    hlb_on_mibi1443.blastn: 0
    hlb_on_mibi1444.blastn: 0
    hlb_on_mibi1445.blastn: 0
    hlb_on_mibi1446.blastn: 0
    hlb_on_mibi1447.blastn: 0
    hlb_on_mibi1448.blastn: 0
    hlb_on_mibi1449.blastn: 0
    hlb_on_mibi1450.blastn: 0
    hlb_on_mibi1451.blastn: 0
    hlb_on_mibi1452.blastn: 0
    hlb_on_mibi1453.blastn: 0
    hlb_on_mibi1454.blastn: 0
    hlb_on_mibi1455.blastn: 0
    hlb_on_mibi1456.blastn: 0
    hlb_on_mibi1457.blastn: 0
    hlb_on_mibi1458.blastn: 0
    hlb_on_mibi1459.blastn: 0
    hlb_on_mibi1460.blastn: 0
    hlb_on_mibi1461.blastn: 0
    hlb_on_mibi1462.blastn: 0
    hlb_on_mibi1463.blastn: 0
    hlb_on_mibi1464.blastn: 0
    hlb_on_mibi1465.blastn: 0
    hlb_on_mibi1466.blastn: 0
    hlb_on_mibi1467.blastn: 0
    hlb_on_mibi1468.blastn: 0
    hlb_on_mibi1469.blastn: 0
    hlb_on_mibi1471.blastn: 0
    hlb_on_mibi1473.blastn: 0
    hlb_on_mibi1474.blastn: 0
    hlb_on_mibi1475.blastn: 0
    hlb_on_mibi1476.blastn: 0
    hlb_on_mibi1477.blastn: 0
    hlb_on_mibi1478.blastn: 0
    hlb_on_mibi1479.blastn: 0
    hlb_on_mibi1480.blastn: 0
    hlb_on_mibi1481.blastn: 0
    hlb_on_mibi1482.blastn: 0
    hlb_on_mibi1483.blastn: 0
    hlb_on_mibi1484.blastn: 0
    hlb_on_mibi1485.blastn: 0
    hlb_on_mibi1486.blastn: 0
    hlb_on_mibi1487.blastn: 0
    hlb_on_mibi1488.blastn: 0
    hlb_on_mibi1489.blastn: 0
    hlb_on_mibi1490.blastn: 0
    hlb_on_mibi1491.blastn: 0
    hlb_on_mibi1492.blastn: 0
    hlb_on_mibi1493.blastn: 0
    hlb_on_mibi1494.blastn: 0
    hlb_on_mibi1495.blastn: 0
    hlb_on_mibi1496.blastn: 0
    hlb_on_mibi1497.blastn: 0
    hlb_on_mibi1498.blastn: 0
    hlb_on_mibi1499.blastn: 0
    hlb_on_mibi1500.blastn: 0
    hlb_on_mibi1501.blastn: 0
    hlb_on_mibi1502.blastn: 0
    hlb_on_mibi1503.blastn: 0
    hlb_on_mibi1504.blastn: 0
    hlb_on_mibi1505.blastn: 0
    hlb_on_mibi1506.blastn: 0
    hlb_on_mibi2312.blastn: 0
    hlb_on_mibi2313.blastn: 0
    hlb_on_mibi2314.blastn: 0
    hlb_on_mibi2315.blastn: 0
    hlb_on_mibi2316.blastn: 0
    hlb_on_mibi2317.blastn: 0
    hlb_on_mibi2318.blastn: 0
    hlb_on_mibi2319.blastn: 0
    hlb_on_mibi2320.blastn: 0
    hlb_on_mibi2321.blastn: 0
    hlb_on_mibi2379.blastn: 0
    
    ## -length(altE_revcomp.fasta)=4008
    #gene            complement(627656..631663)
    #                     /gene="atlE"
    #samtools faidx CP000029.fasta gi|57636010|gb|CP000029.1|
    samtools faidx CP000029.fasta "gi|57636010|gb|CP000029.1|":627656-631663 > atlE.fasta
    revcomp atlE.fasta > atlE_revcomp.fasta
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    blastn -db ../shovill/${sample}/contigs.fa -query atlE_revcomp.fasta -evalue 1e-50 -num_threads 15 -outfmt 6 -strand both -max_target_seqs 1 > atlE_on_${sample}.blastn
    done
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    python3 analyze_blastn_res.py atlE_on_${sample}.blastn
    done
    atlE_on_mibi1435.blastn: 4008
    atlE_on_mibi1436.blastn: 4008
    atlE_on_mibi1437.blastn: 4465
    atlE_on_mibi1438.blastn: 4008
    atlE_on_mibi1439.blastn: 4008
    atlE_on_mibi1440.blastn: 4008
    atlE_on_mibi1441.blastn: 4450
    atlE_on_mibi1442.blastn: 4450
    atlE_on_mibi1443.blastn: 4008
    atlE_on_mibi1444.blastn: 4008
    atlE_on_mibi1445.blastn: 4008
    atlE_on_mibi1446.blastn: 4008
    atlE_on_mibi1447.blastn: 4008
    atlE_on_mibi1448.blastn: 4008
    atlE_on_mibi1449.blastn: 4008
    atlE_on_mibi1450.blastn: 4008
    atlE_on_mibi1451.blastn: 4008
    atlE_on_mibi1452.blastn: 4450
    atlE_on_mibi1453.blastn: 4008
    atlE_on_mibi1454.blastn: 4008
    atlE_on_mibi1455.blastn: 4008
    atlE_on_mibi1456.blastn: 4008
    atlE_on_mibi1457.blastn: 4008
    atlE_on_mibi1458.blastn: 4450
    atlE_on_mibi1459.blastn: 4008
    atlE_on_mibi1460.blastn: 4008
    atlE_on_mibi1461.blastn: 4008
    atlE_on_mibi1462.blastn: 4008
    atlE_on_mibi1463.blastn: 4450
    atlE_on_mibi1464.blastn: 4450
    atlE_on_mibi1465.blastn: 4008
    atlE_on_mibi1466.blastn: 4008
    atlE_on_mibi1467.blastn: 4450
    atlE_on_mibi1468.blastn: 4465
    atlE_on_mibi1469.blastn: 4465
    atlE_on_mibi1471.blastn: 4008
    atlE_on_mibi1473.blastn: 4008
    atlE_on_mibi1474.blastn: 4008
    atlE_on_mibi1475.blastn: 4008
    atlE_on_mibi1476.blastn: 4008
    atlE_on_mibi1477.blastn: 4008
    atlE_on_mibi1478.blastn: 4008
    atlE_on_mibi1479.blastn: 4008
    atlE_on_mibi1480.blastn: 4008
    atlE_on_mibi1481.blastn: 4008
    atlE_on_mibi1482.blastn: 4008
    atlE_on_mibi1483.blastn: 4450
    atlE_on_mibi1484.blastn: 4008
    atlE_on_mibi1485.blastn: 4008
    atlE_on_mibi1486.blastn: 4450
    atlE_on_mibi1487.blastn: 4008
    atlE_on_mibi1488.blastn: 4008
    atlE_on_mibi1489.blastn: 4450
    atlE_on_mibi1490.blastn: 4008
    atlE_on_mibi1491.blastn: 4008
    atlE_on_mibi1492.blastn: 4008
    atlE_on_mibi1493.blastn: 4450
    atlE_on_mibi1494.blastn: 4008
    atlE_on_mibi1495.blastn: 4008
    atlE_on_mibi1496.blastn: 4008
    atlE_on_mibi1497.blastn: 4008
    atlE_on_mibi1498.blastn: 4008
    atlE_on_mibi1499.blastn: 4008
    atlE_on_mibi1500.blastn: 4008
    atlE_on_mibi1501.blastn: 4008
    atlE_on_mibi1502.blastn: 4008
    atlE_on_mibi1503.blastn: 4008
    atlE_on_mibi1504.blastn: 4450
    atlE_on_mibi1505.blastn: 4008
    atlE_on_mibi1506.blastn: 4008
    atlE_on_mibi2312.blastn: 4008
    atlE_on_mibi2313.blastn: 4008
    atlE_on_mibi2314.blastn: 4008
    atlE_on_mibi2315.blastn: 4008
    atlE_on_mibi2316.blastn: 4008
    atlE_on_mibi2317.blastn: 4008
    atlE_on_mibi2318.blastn: 4008
    atlE_on_mibi2319.blastn: 4008
    atlE_on_mibi2320.blastn: 4008
    atlE_on_mibi2321.blastn: 4008
    atlE_on_mibi2379.blastn: 4008
    
    # -length(sdrG)=2796
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    blastn -db ../shovill/${sample}/contigs.fa -query sdrG.fasta -evalue 1e-50 -num_threads 15 -outfmt 6 -strand both -max_target_seqs 1 > sdrG_on_${sample}.blastn
    done
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    python3 analyze_blastn_res.py sdrG_on_${sample}.blastn
    done
    sdrG_on_mibi1435.blastn: 2973
    sdrG_on_mibi1436.blastn: 2967
    sdrG_on_mibi1437.blastn: 2821
    sdrG_on_mibi1438.blastn: 2796
    sdrG_on_mibi1439.blastn: 3155
    sdrG_on_mibi1440.blastn: 2967
    sdrG_on_mibi1441.blastn: 2797
    sdrG_on_mibi1442.blastn: 2801
    sdrG_on_mibi1443.blastn: 2966
    sdrG_on_mibi1444.blastn: 2796
    sdrG_on_mibi1445.blastn: 3158
    sdrG_on_mibi1446.blastn: 3155
    sdrG_on_mibi1447.blastn: 2966
    sdrG_on_mibi1448.blastn: 2797
    sdrG_on_mibi1449.blastn: 2712
    sdrG_on_mibi1450.blastn: 2794
    sdrG_on_mibi1451.blastn: 2946
    sdrG_on_mibi1452.blastn: 3166
    sdrG_on_mibi1453.blastn: 2970
    sdrG_on_mibi1454.blastn: 2976
    sdrG_on_mibi1455.blastn: 2797
    sdrG_on_mibi1456.blastn: 2967
    sdrG_on_mibi1457.blastn: 2963
    sdrG_on_mibi1458.blastn: 2794
    sdrG_on_mibi1459.blastn: 3153
    sdrG_on_mibi1460.blastn: 2566
    sdrG_on_mibi1461.blastn: 2500
    sdrG_on_mibi1462.blastn: 2966
    sdrG_on_mibi1463.blastn: 2971
    sdrG_on_mibi1464.blastn: 2530
    sdrG_on_mibi1465.blastn: 2981
    sdrG_on_mibi1466.blastn: 1547 *
    sdrG_on_mibi1467.blastn: 2794
    sdrG_on_mibi1468.blastn: 2967
    sdrG_on_mibi1469.blastn: 2967
    sdrG_on_mibi1471.blastn: 2796
    sdrG_on_mibi1473.blastn: 2976
    sdrG_on_mibi1474.blastn: 2797
    sdrG_on_mibi1475.blastn: 2981
    sdrG_on_mibi1476.blastn: 2794
    sdrG_on_mibi1477.blastn: 2967
    sdrG_on_mibi1478.blastn: 1511 *
    sdrG_on_mibi1479.blastn: 2966
    sdrG_on_mibi1480.blastn: 2976
    sdrG_on_mibi1481.blastn: 2797
    sdrG_on_mibi1482.blastn: 2797
    sdrG_on_mibi1483.blastn: 2797
    sdrG_on_mibi1484.blastn: 2976
    sdrG_on_mibi1485.blastn: 3001
    sdrG_on_mibi1486.blastn: 2797
    sdrG_on_mibi1487.blastn: 2976
    sdrG_on_mibi1488.blastn: 2963
    sdrG_on_mibi1489.blastn: 2797
    sdrG_on_mibi1490.blastn: 2981
    sdrG_on_mibi1491.blastn: 3155
    sdrG_on_mibi1492.blastn: 2982
    sdrG_on_mibi1493.blastn: 2904
    sdrG_on_mibi1494.blastn: 2980
    sdrG_on_mibi1495.blastn: 2976
    sdrG_on_mibi1496.blastn: 2994
    sdrG_on_mibi1497.blastn: 2797
    sdrG_on_mibi1498.blastn: 2967
    sdrG_on_mibi1499.blastn: 2976
    sdrG_on_mibi1500.blastn: 2797
    sdrG_on_mibi1501.blastn: 2797
    sdrG_on_mibi1502.blastn: 2976
    sdrG_on_mibi1503.blastn: 2976
    sdrG_on_mibi1504.blastn: 2634
    sdrG_on_mibi1505.blastn: 2970
    sdrG_on_mibi1506.blastn: 2985
    sdrG_on_mibi2312.blastn: 2866
    sdrG_on_mibi2313.blastn: 2794
    sdrG_on_mibi2314.blastn: 3171
    sdrG_on_mibi2315.blastn: 2797
    sdrG_on_mibi2316.blastn: 3158
    sdrG_on_mibi2317.blastn: 2975
    sdrG_on_mibi2318.blastn: 2976
    sdrG_on_mibi2319.blastn: 2613
    sdrG_on_mibi2320.blastn: 2972
    sdrG_on_mibi2321.blastn: 2794
    sdrG_on_mibi2379.blastn: 2975
    
    ##length(sdrH)=1446
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    blastn -db ../shovill/${sample}/contigs.fa -query sdrH_revcomp.fasta -evalue 1e-50 -num_threads 15 -outfmt 6 -strand both -max_target_seqs 1 > sdrH_on_${sample}.blastn
    done
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    python3 analyze_blastn_res.py sdrH_on_${sample}.blastn
    done
    sdrH_on_mibi1435.blastn: 1647
    sdrH_on_mibi1436.blastn: 1683
    sdrH_on_mibi1437.blastn: 1482
    sdrH_on_mibi1438.blastn: 1608
    sdrH_on_mibi1439.blastn: 1739
    sdrH_on_mibi1440.blastn: 1644
    sdrH_on_mibi1441.blastn: 1446
    sdrH_on_mibi1442.blastn: 1482
    sdrH_on_mibi1443.blastn: 1683
    sdrH_on_mibi1444.blastn: 1683
    sdrH_on_mibi1445.blastn: 1482
    sdrH_on_mibi1446.blastn: 1482
    sdrH_on_mibi1447.blastn: 1482
    sdrH_on_mibi1448.blastn: 1446
    sdrH_on_mibi1449.blastn: 1450
    sdrH_on_mibi1450.blastn: 1446
    sdrH_on_mibi1451.blastn: 1487
    sdrH_on_mibi1452.blastn: 1446
    sdrH_on_mibi1453.blastn: 1464
    sdrH_on_mibi1454.blastn: 1647
    sdrH_on_mibi1455.blastn: 1644
    sdrH_on_mibi1456.blastn: 1647
    sdrH_on_mibi1457.blastn: 1572
    sdrH_on_mibi1458.blastn: 1632
    sdrH_on_mibi1459.blastn: 1647
    sdrH_on_mibi1460.blastn: 1482
    sdrH_on_mibi1461.blastn: 1647
    sdrH_on_mibi1462.blastn: 1683
    sdrH_on_mibi1463.blastn: 1647
    sdrH_on_mibi1464.blastn: 1739
    sdrH_on_mibi1465.blastn: 1464
    sdrH_on_mibi1466.blastn: 1305
    sdrH_on_mibi1467.blastn: 1482
    sdrH_on_mibi1468.blastn: 1559
    sdrH_on_mibi1469.blastn: 1556
    sdrH_on_mibi1471.blastn: 1572
    sdrH_on_mibi1473.blastn: 1482
    sdrH_on_mibi1474.blastn: 1446
    sdrH_on_mibi1475.blastn: 1482
    sdrH_on_mibi1476.blastn: 1458
    sdrH_on_mibi1477.blastn: 1647
    sdrH_on_mibi1478.blastn: 710 *
    sdrH_on_mibi1479.blastn: 1482
    sdrH_on_mibi1480.blastn: 1647
    sdrH_on_mibi1481.blastn: 1412
    sdrH_on_mibi1482.blastn: 1647
    sdrH_on_mibi1483.blastn: 1647
    sdrH_on_mibi1484.blastn: 1448
    sdrH_on_mibi1485.blastn: 1647
    sdrH_on_mibi1486.blastn: 1827
    sdrH_on_mibi1487.blastn: 1647
    sdrH_on_mibi1488.blastn: 1683
    sdrH_on_mibi1489.blastn: 1739
    sdrH_on_mibi1490.blastn: 1482
    sdrH_on_mibi1491.blastn: 1482
    sdrH_on_mibi1492.blastn: 1482
    sdrH_on_mibi1493.blastn: 1440
    sdrH_on_mibi1494.blastn: 1458
    sdrH_on_mibi1495.blastn: 1482
    sdrH_on_mibi1496.blastn: 1644
    sdrH_on_mibi1497.blastn: 1736
    sdrH_on_mibi1498.blastn: 1647
    sdrH_on_mibi1499.blastn: 1482
    sdrH_on_mibi1500.blastn: 1683
    sdrH_on_mibi1501.blastn: 1647
    sdrH_on_mibi1502.blastn: 1647
    sdrH_on_mibi1503.blastn: 1745
    sdrH_on_mibi1504.blastn: 1683
    sdrH_on_mibi1505.blastn: 1736
    sdrH_on_mibi1506.blastn: 1683
    sdrH_on_mibi2312.blastn: 1556
    sdrH_on_mibi2313.blastn: 1464
    sdrH_on_mibi2314.blastn: 1464
    sdrH_on_mibi2315.blastn: 1683
    sdrH_on_mibi2316.blastn: 1482
    sdrH_on_mibi2317.blastn: 1464
    sdrH_on_mibi2318.blastn: 1647
    sdrH_on_mibi2319.blastn: 1644
    sdrH_on_mibi2320.blastn: 1464
    sdrH_on_mibi2321.blastn: 1464
    sdrH_on_mibi2379.blastn: 1647
    
    # -length(ebh_revcomp.fasta)=30450
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    blastn -db ../shovill/${sample}/contigs.fa -query ebh_revcomp.fasta -evalue 1e-50 -num_threads 15 -outfmt 6 -strand both -max_target_seqs 1 > ebh_on_${sample}.blastn
    done
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    python3 analyze_blastn_res.py ebh_on_${sample}.blastn
    done
    
    ebh_on_mibi1435.blastn: 30450
    ebh_on_mibi1436.blastn: 30450
    ebh_on_mibi1437.blastn: 29490
    ebh_on_mibi1438.blastn: 25428
    ebh_on_mibi1439.blastn: 30452
    ebh_on_mibi1440.blastn: 11933
    ebh_on_mibi1441.blastn: 30450
    ebh_on_mibi1442.blastn: 30463
    ebh_on_mibi1443.blastn: 30450
    ebh_on_mibi1444.blastn: 30450
    ebh_on_mibi1445.blastn: 30450
    ebh_on_mibi1446.blastn: 30450
    ebh_on_mibi1447.blastn: 30450
    ebh_on_mibi1448.blastn: 30450
    ebh_on_mibi1449.blastn: 30450
    ebh_on_mibi1450.blastn: 30450
    ebh_on_mibi1451.blastn: 30450
    ebh_on_mibi1452.blastn: 21401
    ebh_on_mibi1453.blastn: 30450
    ebh_on_mibi1454.blastn: 11933
    ebh_on_mibi1455.blastn: 30450
    ebh_on_mibi1456.blastn: 11933
    ebh_on_mibi1457.blastn: 30450
    ebh_on_mibi1458.blastn: 29955
    ebh_on_mibi1459.blastn: 11933
    ebh_on_mibi1460.blastn: 30450
    ebh_on_mibi1461.blastn: 11933
    ebh_on_mibi1462.blastn: 30450
    ebh_on_mibi1463.blastn: 20602
    ebh_on_mibi1464.blastn: 30450
    ebh_on_mibi1465.blastn: 30450
    ebh_on_mibi1466.blastn: 14964
    ebh_on_mibi1467.blastn: 30463
    ebh_on_mibi1468.blastn: 30450
    ebh_on_mibi1469.blastn: 30450
    ebh_on_mibi1471.blastn: 25428
    ebh_on_mibi1473.blastn: 30450
    ebh_on_mibi1474.blastn: 30450
    ebh_on_mibi1475.blastn: 30450
    ebh_on_mibi1476.blastn: 30450
    ebh_on_mibi1477.blastn: 30384
    ebh_on_mibi1478.blastn: 11933
    ebh_on_mibi1479.blastn: 30450
    ebh_on_mibi1480.blastn: 11933
    ebh_on_mibi1481.blastn: 25694
    ebh_on_mibi1482.blastn: 30450
    ebh_on_mibi1483.blastn: 30450
    ebh_on_mibi1484.blastn: 30450
    ebh_on_mibi1485.blastn: 11933
    ebh_on_mibi1486.blastn: 30450
    ebh_on_mibi1487.blastn: 11933
    ebh_on_mibi1488.blastn: 30450
    ebh_on_mibi1489.blastn: 30450
    ebh_on_mibi1490.blastn: 30450
    ebh_on_mibi1491.blastn: 30450
    ebh_on_mibi1492.blastn: 30450
    ebh_on_mibi1493.blastn: 30465
    ebh_on_mibi1494.blastn: 30450
    ebh_on_mibi1495.blastn: 30450
    ebh_on_mibi1496.blastn: 11933
    ebh_on_mibi1497.blastn: 30450
    ebh_on_mibi1498.blastn: 11933
    ebh_on_mibi1499.blastn: 30450
    ebh_on_mibi1500.blastn: 30450
    ebh_on_mibi1501.blastn: 30450
    ebh_on_mibi1502.blastn: 11933
    ebh_on_mibi1503.blastn: 30450
    ebh_on_mibi1504.blastn: 30450
    ebh_on_mibi1505.blastn: 30450
    ebh_on_mibi1506.blastn: 30450
    ebh_on_mibi2312.blastn: 30450
    ebh_on_mibi2313.blastn: 30450
    ebh_on_mibi2314.blastn: 30450
    ebh_on_mibi2315.blastn: 30461
    ebh_on_mibi2316.blastn: 30450
    ebh_on_mibi2317.blastn: 30450
    ebh_on_mibi2318.blastn: 11933
    ebh_on_mibi2319.blastn: 11933
    ebh_on_mibi2320.blastn: 30110
    ebh_on_mibi2321.blastn: 30450
    ebh_on_mibi2379.blastn: 30450
    
    #For gene: ebpS, tagB, capC, sepA, dltA, fmtC
    # -length(SE0760)=1383
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    blastn -db ../shovill/${sample}/contigs.fa -query ebpS_revcomp.fasta -evalue 1e-50 -num_threads 15 -outfmt 6 -strand both -max_target_seqs 1 > ebpS_on_${sample}.blastn
    done
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    python3 analyze_blastn_res.py ebpS_on_${sample}.blastn
    done
    ebpS_on_mibi1435.blastn: 1383
    ebpS_on_mibi1436.blastn: 1383
    ebpS_on_mibi1437.blastn: 1383
    ebpS_on_mibi1438.blastn: 1383
    ebpS_on_mibi1439.blastn: 1383
    ebpS_on_mibi1440.blastn: 1383
    ebpS_on_mibi1441.blastn: 1383
    ebpS_on_mibi1442.blastn: 1408
    ebpS_on_mibi1443.blastn: 1383
    ebpS_on_mibi1444.blastn: 1383
    ebpS_on_mibi1445.blastn: 1383
    ebpS_on_mibi1446.blastn: 1383
    ebpS_on_mibi1447.blastn: 1383
    ebpS_on_mibi1448.blastn: 1383
    ebpS_on_mibi1449.blastn: 1383
    ebpS_on_mibi1450.blastn: 1383
    ebpS_on_mibi1451.blastn: 1383
    ebpS_on_mibi1452.blastn: 1408
    ebpS_on_mibi1453.blastn: 1383
    ebpS_on_mibi1454.blastn: 1383
    ebpS_on_mibi1455.blastn: 1383
    ebpS_on_mibi1456.blastn: 1383
    ebpS_on_mibi1457.blastn: 1383
    ebpS_on_mibi1458.blastn: 1408
    ebpS_on_mibi1459.blastn: 1383
    ebpS_on_mibi1460.blastn: 1383
    ebpS_on_mibi1461.blastn: 1383
    ebpS_on_mibi1462.blastn: 1383
    ebpS_on_mibi1463.blastn: 1383
    ebpS_on_mibi1464.blastn: 1392
    ebpS_on_mibi1465.blastn: 1383
    ebpS_on_mibi1466.blastn: 1383
    ebpS_on_mibi1467.blastn: 1420
    ebpS_on_mibi1468.blastn: 1383
    ebpS_on_mibi1469.blastn: 1383
    ebpS_on_mibi1471.blastn: 1383
    ebpS_on_mibi1473.blastn: 1383
    ebpS_on_mibi1474.blastn: 1383
    ebpS_on_mibi1475.blastn: 1383
    ebpS_on_mibi1476.blastn: 1383
    ebpS_on_mibi1477.blastn: 1275
    ebpS_on_mibi1478.blastn: 1383
    ebpS_on_mibi1479.blastn: 1383
    ebpS_on_mibi1480.blastn: 1383
    ebpS_on_mibi1481.blastn: 1383
    ebpS_on_mibi1482.blastn: 1383
    ebpS_on_mibi1483.blastn: 1383
    ebpS_on_mibi1484.blastn: 1383
    ebpS_on_mibi1485.blastn: 1383
    ebpS_on_mibi1486.blastn: 1383
    ebpS_on_mibi1487.blastn: 1383
    ebpS_on_mibi1488.blastn: 1383
    ebpS_on_mibi1489.blastn: 1383
    ebpS_on_mibi1490.blastn: 1383
    ebpS_on_mibi1491.blastn: 1383
    ebpS_on_mibi1492.blastn: 1383
    ebpS_on_mibi1493.blastn: 1420
    ebpS_on_mibi1494.blastn: 1383
    ebpS_on_mibi1495.blastn: 1383
    ebpS_on_mibi1496.blastn: 1383
    ebpS_on_mibi1497.blastn: 1383
    ebpS_on_mibi1498.blastn: 1383
    ebpS_on_mibi1499.blastn: 1383
    ebpS_on_mibi1500.blastn: 1383
    ebpS_on_mibi1501.blastn: 1383
    ebpS_on_mibi1502.blastn: 1383
    ebpS_on_mibi1503.blastn: 1383
    ebpS_on_mibi1504.blastn: 1408
    ebpS_on_mibi1505.blastn: 1383
    ebpS_on_mibi1506.blastn: 1383
    ebpS_on_mibi2312.blastn: 1383
    ebpS_on_mibi2313.blastn: 1383
    ebpS_on_mibi2314.blastn: 1383
    ebpS_on_mibi2315.blastn: 1383
    ebpS_on_mibi2316.blastn: 1383
    ebpS_on_mibi2317.blastn: 1383
    ebpS_on_mibi2318.blastn: 1383
    ebpS_on_mibi2319.blastn: 1383
    ebpS_on_mibi2320.blastn: 1383
    ebpS_on_mibi2321.blastn: 1383
    ebpS_on_mibi2379.blastn: 1383
    
    # -length(tagB)=1680
    #mibi1435.gff3:contig_2  Prodigal        CDS     146190  147869  .       -       0       ID=MMJCKL_03035;Name=CDP-glycerol glycerophosphotransferase%2C TagB/SpsB family;locus_tag=MMJCKL_03035;product=CDP-glycerol glycerophosphotransferase%2C TagB/SpsB family;Dbxref=COG:COG1887,COG:MI,RefSeq:WP_001831509.1,SO:0001217,UniParc:UPI00004BA7A8,UniRef:UniRef100_A0A4Q9W688,UniRef:UniRef50_A0A0H2VI21,UniRef:UniRef90_A0A0H2VI21;gene=tagB
    samtools faidx mibi1435.fna contig_2:146190-147869 > tagB_mibi1435.fasta
    revcomp tagB_mibi1435.fasta > tagB_mibi1435_revcomp.fasta
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    blastn -db ../shovill/${sample}/contigs.fa -query tagB_mibi1435_revcomp.fasta -evalue 1e-50 -num_threads 15 -outfmt 6 -strand both -max_target_seqs 1 > tagB_on_${sample}.blastn
    done
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    python3 analyze_blastn_res.py tagB_on_${sample}.blastn
    done
    tagB_on_mibi1435.blastn: 1680
    tagB_on_mibi1436.blastn: 1680
    tagB_on_mibi1437.blastn: 1680
    tagB_on_mibi1438.blastn: 1680
    tagB_on_mibi1439.blastn: 1680
    tagB_on_mibi1440.blastn: 1680
    tagB_on_mibi1441.blastn: 1680
    tagB_on_mibi1442.blastn: 1680
    tagB_on_mibi1443.blastn: 1680
    tagB_on_mibi1444.blastn: 1680
    tagB_on_mibi1445.blastn: 1680
    tagB_on_mibi1446.blastn: 1680
    tagB_on_mibi1447.blastn: 1680
    tagB_on_mibi1448.blastn: 1680
    tagB_on_mibi1449.blastn: 1680
    tagB_on_mibi1450.blastn: 1680
    tagB_on_mibi1451.blastn: 1680
    tagB_on_mibi1452.blastn: 1680
    tagB_on_mibi1453.blastn: 1680
    tagB_on_mibi1454.blastn: 1680
    tagB_on_mibi1455.blastn: 1680
    tagB_on_mibi1456.blastn: 1680
    tagB_on_mibi1457.blastn: 1680
    tagB_on_mibi1458.blastn: 1680
    tagB_on_mibi1459.blastn: 1680
    tagB_on_mibi1460.blastn: 1680
    tagB_on_mibi1461.blastn: 1680
    tagB_on_mibi1462.blastn: 1680
    tagB_on_mibi1463.blastn: 1680
    tagB_on_mibi1464.blastn: 1680
    tagB_on_mibi1465.blastn: 1680
    tagB_on_mibi1466.blastn: 1680
    tagB_on_mibi1467.blastn: 1680
    tagB_on_mibi1468.blastn: 1680
    tagB_on_mibi1469.blastn: 1680
    tagB_on_mibi1471.blastn: 1680
    tagB_on_mibi1473.blastn: 1680
    tagB_on_mibi1474.blastn: 1680
    tagB_on_mibi1475.blastn: 1680
    tagB_on_mibi1476.blastn: 1680
    tagB_on_mibi1477.blastn: 1680
    tagB_on_mibi1478.blastn: 1680
    tagB_on_mibi1479.blastn: 1680
    tagB_on_mibi1480.blastn: 1680
    tagB_on_mibi1481.blastn: 1680
    tagB_on_mibi1482.blastn: 1680
    tagB_on_mibi1483.blastn: 1680
    tagB_on_mibi1484.blastn: 1680
    tagB_on_mibi1485.blastn: 1680
    tagB_on_mibi1486.blastn: 1680
    tagB_on_mibi1487.blastn: 1680
    tagB_on_mibi1488.blastn: 1680
    tagB_on_mibi1489.blastn: 1680
    tagB_on_mibi1490.blastn: 1680
    tagB_on_mibi1491.blastn: 1680
    tagB_on_mibi1492.blastn: 1680
    tagB_on_mibi1493.blastn: 1680
    tagB_on_mibi1494.blastn: 1680
    tagB_on_mibi1495.blastn: 1680
    tagB_on_mibi1496.blastn: 1680
    tagB_on_mibi1497.blastn: 1680
    tagB_on_mibi1498.blastn: 1680
    tagB_on_mibi1499.blastn: 1680
    tagB_on_mibi1500.blastn: 1680
    tagB_on_mibi1501.blastn: 1680
    tagB_on_mibi1502.blastn: 1680
    tagB_on_mibi1503.blastn: 1680
    tagB_on_mibi1504.blastn: 1680
    tagB_on_mibi1505.blastn: 1680
    tagB_on_mibi1506.blastn: 1680
    tagB_on_mibi2312.blastn: 1680
    tagB_on_mibi2313.blastn: 1680
    tagB_on_mibi2314.blastn: 1680
    tagB_on_mibi2315.blastn: 1680
    tagB_on_mibi2316.blastn: 1680
    tagB_on_mibi2317.blastn: 1680
    tagB_on_mibi2318.blastn: 1680
    tagB_on_mibi2319.blastn: 1680
    tagB_on_mibi2320.blastn: 1680
    tagB_on_mibi2321.blastn: 1680
    tagB_on_mibi2379.blastn: 1680
    
    # -length(capC)=450
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    blastn -db ../shovill/${sample}/contigs.fa -query capC.fasta -evalue 1e-50 -num_threads 15 -outfmt 6 -strand both -max_target_seqs 1 > capC_on_${sample}.blastn
    done
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    python3 analyze_blastn_res.py capC_on_${sample}.blastn
    done
    capC_on_mibi1435.blastn: 0
    capC_on_mibi1436.blastn: 0
    capC_on_mibi1437.blastn: 0
    capC_on_mibi1438.blastn: 0
    capC_on_mibi1439.blastn: 0
    capC_on_mibi1440.blastn: 0
    capC_on_mibi1441.blastn: 0
    capC_on_mibi1442.blastn: 0
    capC_on_mibi1443.blastn: 0
    capC_on_mibi1444.blastn: 0
    capC_on_mibi1445.blastn: 0
    capC_on_mibi1446.blastn: 0
    capC_on_mibi1447.blastn: 0
    capC_on_mibi1448.blastn: 0
    capC_on_mibi1449.blastn: 0
    capC_on_mibi1450.blastn: 0
    capC_on_mibi1451.blastn: 0
    capC_on_mibi1452.blastn: 0
    capC_on_mibi1453.blastn: 0
    capC_on_mibi1454.blastn: 0
    capC_on_mibi1455.blastn: 0
    capC_on_mibi1456.blastn: 0
    capC_on_mibi1457.blastn: 0
    capC_on_mibi1458.blastn: 0
    capC_on_mibi1459.blastn: 0
    capC_on_mibi1460.blastn: 0
    capC_on_mibi1461.blastn: 0
    capC_on_mibi1462.blastn: 0
    capC_on_mibi1463.blastn: 0
    capC_on_mibi1464.blastn: 0
    capC_on_mibi1465.blastn: 0
    capC_on_mibi1466.blastn: 0
    capC_on_mibi1467.blastn: 0
    capC_on_mibi1468.blastn: 0
    capC_on_mibi1469.blastn: 0
    capC_on_mibi1471.blastn: 0
    capC_on_mibi1473.blastn: 0
    capC_on_mibi1474.blastn: 0
    capC_on_mibi1475.blastn: 0
    capC_on_mibi1476.blastn: 0
    capC_on_mibi1477.blastn: 0
    capC_on_mibi1478.blastn: 0
    capC_on_mibi1479.blastn: 0
    capC_on_mibi1480.blastn: 0
    capC_on_mibi1481.blastn: 0
    capC_on_mibi1482.blastn: 0
    capC_on_mibi1483.blastn: 0
    capC_on_mibi1484.blastn: 0
    capC_on_mibi1485.blastn: 0
    capC_on_mibi1486.blastn: 0
    capC_on_mibi1487.blastn: 0
    capC_on_mibi1488.blastn: 0
    capC_on_mibi1489.blastn: 0
    capC_on_mibi1490.blastn: 0
    capC_on_mibi1491.blastn: 0
    capC_on_mibi1492.blastn: 0
    capC_on_mibi1493.blastn: 0
    capC_on_mibi1494.blastn: 0
    capC_on_mibi1495.blastn: 0
    capC_on_mibi1496.blastn: 0
    capC_on_mibi1497.blastn: 0
    capC_on_mibi1498.blastn: 0
    capC_on_mibi1499.blastn: 0
    capC_on_mibi1500.blastn: 0
    capC_on_mibi1501.blastn: 0
    capC_on_mibi1502.blastn: 0
    capC_on_mibi1503.blastn: 0
    capC_on_mibi1504.blastn: 0
    capC_on_mibi1505.blastn: 0
    capC_on_mibi1506.blastn: 0
    capC_on_mibi2312.blastn: 0
    capC_on_mibi2313.blastn: 0
    capC_on_mibi2314.blastn: 0
    capC_on_mibi2315.blastn: 0
    capC_on_mibi2316.blastn: 0
    capC_on_mibi2317.blastn: 0
    capC_on_mibi2318.blastn: 0
    capC_on_mibi2319.blastn: 0
    capC_on_mibi2320.blastn: 0
    capC_on_mibi2321.blastn: 0
    capC_on_mibi2379.blastn: 0
    
    # -length(sepA_mibi1435.fasta)=1524
    #contig_7        Prodigal        CDS     52151   53674   .       +       0       ID=MMJCKL_06215;Name=Extracellular elastase;locus_tag=MMJCKL_06215;product=Extracellular elastase;Dbxref=COG:COG3227,COG:O,EC:3.4.24.-,GO:0004222,GO:0005576,GO:0006508,GO:0046872,KEGG:K01401,RefSeq:WP_002486087.1,SO:0001217,UniParc:UPI00024E17FE,UniRef:UniRef100_UPI00066C9785,UniRef:UniRef50_P0C0Q3,UniRef:UniRef90_P0C0Q3;gene=sepA
    samtools faidx mibi1435.fna contig_7:52151-53674 > sepA_mibi1435.fasta
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    blastn -db ../shovill/${sample}/contigs.fa -query sepA_mibi1435.fasta -evalue 1e-50 -num_threads 15 -outfmt 6 -strand both -max_target_seqs 1 > sepA_on_${sample}.blastn
    done
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    python3 analyze_blastn_res.py sepA_on_${sample}.blastn
    done
    sepA_on_mibi1435.blastn: 1524
    sepA_on_mibi1436.blastn: 1524
    sepA_on_mibi1437.blastn: 1523
    sepA_on_mibi1438.blastn: 1524
    sepA_on_mibi1439.blastn: 1523
    sepA_on_mibi1440.blastn: 1523
    sepA_on_mibi1441.blastn: 1523
    sepA_on_mibi1442.blastn: 1523
    sepA_on_mibi1443.blastn: 1523
    sepA_on_mibi1444.blastn: 1523
    sepA_on_mibi1445.blastn: 1523
    sepA_on_mibi1446.blastn: 1523
    sepA_on_mibi1447.blastn: 1523
    sepA_on_mibi1448.blastn: 1524
    sepA_on_mibi1449.blastn: 1523
    sepA_on_mibi1450.blastn: 1523
    sepA_on_mibi1451.blastn: 1523
    sepA_on_mibi1452.blastn: 1523
    sepA_on_mibi1453.blastn: 1523
    sepA_on_mibi1454.blastn: 1523
    sepA_on_mibi1455.blastn: 1524
    sepA_on_mibi1456.blastn: 1523
    sepA_on_mibi1457.blastn: 1523
    sepA_on_mibi1458.blastn: 1523
    sepA_on_mibi1459.blastn: 1523
    sepA_on_mibi1460.blastn: 1523
    sepA_on_mibi1461.blastn: 1523
    sepA_on_mibi1462.blastn: 1523
    sepA_on_mibi1463.blastn: 1523
    sepA_on_mibi1464.blastn: 1523
    sepA_on_mibi1465.blastn: 1523
    sepA_on_mibi1466.blastn: 1523
    sepA_on_mibi1467.blastn: 1524
    sepA_on_mibi1468.blastn: 1523
    sepA_on_mibi1469.blastn: 1523
    sepA_on_mibi1471.blastn: 1524
    sepA_on_mibi1473.blastn: 1523
    sepA_on_mibi1474.blastn: 1524
    sepA_on_mibi1475.blastn: 1523
    sepA_on_mibi1476.blastn: 1523
    sepA_on_mibi1477.blastn: 1524
    sepA_on_mibi1478.blastn: 1523
    sepA_on_mibi1479.blastn: 1523
    sepA_on_mibi1480.blastn: 1523
    sepA_on_mibi1481.blastn: 1524
    sepA_on_mibi1482.blastn: 1524
    sepA_on_mibi1483.blastn: 1523
    sepA_on_mibi1484.blastn: 1523
    sepA_on_mibi1485.blastn: 1523
    sepA_on_mibi1486.blastn: 1523
    sepA_on_mibi1487.blastn: 1523
    sepA_on_mibi1488.blastn: 1523
    sepA_on_mibi1489.blastn: 1523
    sepA_on_mibi1490.blastn: 1523
    sepA_on_mibi1491.blastn: 1523
    sepA_on_mibi1492.blastn: 1523
    sepA_on_mibi1493.blastn: 1523
    sepA_on_mibi1494.blastn: 1523
    sepA_on_mibi1495.blastn: 1523
    sepA_on_mibi1496.blastn: 1523
    sepA_on_mibi1497.blastn: 1523
    sepA_on_mibi1498.blastn: 1523
    sepA_on_mibi1499.blastn: 1523
    sepA_on_mibi1500.blastn: 1523
    sepA_on_mibi1501.blastn: 1523
    sepA_on_mibi1502.blastn: 1523
    sepA_on_mibi1503.blastn: 1523
    sepA_on_mibi1504.blastn: 1523
    sepA_on_mibi1505.blastn: 1523
    sepA_on_mibi1506.blastn: 1523
    sepA_on_mibi2312.blastn: 1523
    sepA_on_mibi2313.blastn: 1523
    sepA_on_mibi2314.blastn: 1523
    sepA_on_mibi2315.blastn: 1523
    sepA_on_mibi2316.blastn: 1523
    sepA_on_mibi2317.blastn: 1523
    sepA_on_mibi2318.blastn: 1523
    sepA_on_mibi2319.blastn: 1523
    sepA_on_mibi2320.blastn: 1523
    sepA_on_mibi2321.blastn: 1523
    sepA_on_mibi2379.blastn: 1523
    
    # -length(dltA.fasta)=1458
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    blastn -db ../shovill/${sample}/contigs.fa -query dltA.fasta -evalue 1e-50 -num_threads 15 -outfmt 6 -strand both -max_target_seqs 1 > dltA_on_${sample}.blastn
    done
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    python3 analyze_blastn_res.py dltA_on_${sample}.blastn
    done
    dltA_on_mibi1435.blastn: 1458
    dltA_on_mibi1436.blastn: 1458
    dltA_on_mibi1437.blastn: 1458
    dltA_on_mibi1438.blastn: 1458
    dltA_on_mibi1439.blastn: 1458
    dltA_on_mibi1440.blastn: 1458
    dltA_on_mibi1441.blastn: 1458
    dltA_on_mibi1442.blastn: 1458
    dltA_on_mibi1443.blastn: 1458
    dltA_on_mibi1444.blastn: 1458
    dltA_on_mibi1445.blastn: 1458
    dltA_on_mibi1446.blastn: 1458
    dltA_on_mibi1447.blastn: 1458
    dltA_on_mibi1448.blastn: 1464
    dltA_on_mibi1449.blastn: 1458
    dltA_on_mibi1450.blastn: 1458
    dltA_on_mibi1451.blastn: 1458
    dltA_on_mibi1452.blastn: 1458
    dltA_on_mibi1453.blastn: 1458
    dltA_on_mibi1454.blastn: 1458
    dltA_on_mibi1455.blastn: 1458
    dltA_on_mibi1456.blastn: 1458
    dltA_on_mibi1457.blastn: 1458
    dltA_on_mibi1458.blastn: 1458
    dltA_on_mibi1459.blastn: 1458
    dltA_on_mibi1460.blastn: 1458
    dltA_on_mibi1461.blastn: 1458
    dltA_on_mibi1462.blastn: 1458
    dltA_on_mibi1463.blastn: 1458
    dltA_on_mibi1464.blastn: 1458
    dltA_on_mibi1465.blastn: 1458
    dltA_on_mibi1466.blastn: 1458
    dltA_on_mibi1467.blastn: 1458
    dltA_on_mibi1468.blastn: 1458
    dltA_on_mibi1469.blastn: 1458
    dltA_on_mibi1471.blastn: 1458
    dltA_on_mibi1473.blastn: 1458
    dltA_on_mibi1474.blastn: 1458
    dltA_on_mibi1475.blastn: 1458
    dltA_on_mibi1476.blastn: 1458
    dltA_on_mibi1477.blastn: 1458
    dltA_on_mibi1478.blastn: 1458
    dltA_on_mibi1479.blastn: 1458
    dltA_on_mibi1480.blastn: 1458
    dltA_on_mibi1481.blastn: 1458
    dltA_on_mibi1482.blastn: 1458
    dltA_on_mibi1483.blastn: 1458
    dltA_on_mibi1484.blastn: 1458
    dltA_on_mibi1485.blastn: 1458
    dltA_on_mibi1486.blastn: 1458
    dltA_on_mibi1487.blastn: 1458
    dltA_on_mibi1488.blastn: 1458
    dltA_on_mibi1489.blastn: 1458
    dltA_on_mibi1490.blastn: 1458
    dltA_on_mibi1491.blastn: 1458
    dltA_on_mibi1492.blastn: 1458
    dltA_on_mibi1493.blastn: 1458
    dltA_on_mibi1494.blastn: 1458
    dltA_on_mibi1495.blastn: 1458
    dltA_on_mibi1496.blastn: 1458
    dltA_on_mibi1497.blastn: 1458
    dltA_on_mibi1498.blastn: 1458
    dltA_on_mibi1499.blastn: 1458
    dltA_on_mibi1500.blastn: 1458
    dltA_on_mibi1501.blastn: 1458
    dltA_on_mibi1502.blastn: 1458
    dltA_on_mibi1503.blastn: 1458
    dltA_on_mibi1504.blastn: 1458
    dltA_on_mibi1505.blastn: 1458
    dltA_on_mibi1506.blastn: 1458
    dltA_on_mibi2312.blastn: 1458
    dltA_on_mibi2313.blastn: 1458
    dltA_on_mibi2314.blastn: 1458
    dltA_on_mibi2315.blastn: 1458
    dltA_on_mibi2316.blastn: 1458
    dltA_on_mibi2317.blastn: 1458
    dltA_on_mibi2318.blastn: 1458
    dltA_on_mibi2319.blastn: 1458
    dltA_on_mibi2320.blastn: 1458
    dltA_on_mibi2321.blastn: 1458
    dltA_on_mibi2379.blastn: 1458
    
    # -length(fmtC.fasta)=2523
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    blastn -db ../shovill/${sample}/contigs.fa -query fmtC.fasta -evalue 1e-50 -num_threads 15 -outfmt 6 -strand both -max_target_seqs 1 > fmtC_on_${sample}.blastn
    done
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    python3 analyze_blastn_res.py fmtC_on_${sample}.blastn
    done
    fmtC_on_mibi1435.blastn: 0
    fmtC_on_mibi1436.blastn: 0
    fmtC_on_mibi1437.blastn: 0
    fmtC_on_mibi1438.blastn: 0
    fmtC_on_mibi1439.blastn: 0
    fmtC_on_mibi1440.blastn: 0
    fmtC_on_mibi1441.blastn: 0
    fmtC_on_mibi1442.blastn: 0
    fmtC_on_mibi1443.blastn: 0
    fmtC_on_mibi1444.blastn: 0
    fmtC_on_mibi1445.blastn: 0
    fmtC_on_mibi1446.blastn: 0
    fmtC_on_mibi1447.blastn: 0
    fmtC_on_mibi1448.blastn: 0
    fmtC_on_mibi1449.blastn: 0
    fmtC_on_mibi1450.blastn: 0
    fmtC_on_mibi1451.blastn: 0
    fmtC_on_mibi1452.blastn: 0
    fmtC_on_mibi1453.blastn: 0
    fmtC_on_mibi1454.blastn: 0
    fmtC_on_mibi1455.blastn: 0
    fmtC_on_mibi1456.blastn: 0
    fmtC_on_mibi1457.blastn: 0
    fmtC_on_mibi1458.blastn: 0
    fmtC_on_mibi1459.blastn: 0
    fmtC_on_mibi1460.blastn: 0
    fmtC_on_mibi1461.blastn: 0
    fmtC_on_mibi1462.blastn: 0
    fmtC_on_mibi1463.blastn: 0
    fmtC_on_mibi1464.blastn: 0
    fmtC_on_mibi1465.blastn: 0
    fmtC_on_mibi1466.blastn: 0
    fmtC_on_mibi1467.blastn: 0
    fmtC_on_mibi1468.blastn: 0
    fmtC_on_mibi1469.blastn: 0
    fmtC_on_mibi1471.blastn: 0
    fmtC_on_mibi1473.blastn: 0
    fmtC_on_mibi1474.blastn: 0
    fmtC_on_mibi1475.blastn: 0
    fmtC_on_mibi1476.blastn: 0
    fmtC_on_mibi1477.blastn: 0
    fmtC_on_mibi1478.blastn: 0
    fmtC_on_mibi1479.blastn: 0
    fmtC_on_mibi1480.blastn: 0
    fmtC_on_mibi1481.blastn: 0
    fmtC_on_mibi1482.blastn: 0
    fmtC_on_mibi1483.blastn: 0
    fmtC_on_mibi1484.blastn: 0
    fmtC_on_mibi1485.blastn: 0
    fmtC_on_mibi1486.blastn: 0
    fmtC_on_mibi1487.blastn: 0
    fmtC_on_mibi1488.blastn: 0
    fmtC_on_mibi1489.blastn: 0
    fmtC_on_mibi1490.blastn: 0
    fmtC_on_mibi1491.blastn: 0
    fmtC_on_mibi1492.blastn: 0
    fmtC_on_mibi1493.blastn: 0
    fmtC_on_mibi1494.blastn: 0
    fmtC_on_mibi1495.blastn: 0
    fmtC_on_mibi1496.blastn: 0
    fmtC_on_mibi1497.blastn: 0
    fmtC_on_mibi1498.blastn: 0
    fmtC_on_mibi1499.blastn: 0
    fmtC_on_mibi1500.blastn: 0
    fmtC_on_mibi1501.blastn: 0
    fmtC_on_mibi1502.blastn: 0
    fmtC_on_mibi1503.blastn: 0
    fmtC_on_mibi1504.blastn: 0
    fmtC_on_mibi1505.blastn: 0
    fmtC_on_mibi1506.blastn: 0
    fmtC_on_mibi2312.blastn: 0
    fmtC_on_mibi2313.blastn: 0
    fmtC_on_mibi2314.blastn: 0
    fmtC_on_mibi2315.blastn: 0
    fmtC_on_mibi2316.blastn: 0
    fmtC_on_mibi2317.blastn: 0
    fmtC_on_mibi2318.blastn: 0
    fmtC_on_mibi2319.blastn: 0
    fmtC_on_mibi2320.blastn: 0
    fmtC_on_mibi2321.blastn: 0
    fmtC_on_mibi2379.blastn: 0
    
    #-- For genes: lipA (915), sceD (699), esp (660), ecpA (1188)
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    blastn -db ../shovill/${sample}/contigs.fa -query lipA.fasta -evalue 1e-50 -num_threads 15 -outfmt 6 -strand both -max_target_seqs 1 > ./lipA_on_${sample}.blastn
    done
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    python3 analyze_blastn_res.py lipA_on_${sample}.blastn
    done
    lipA_on_mibi1435.blastn: 915
    lipA_on_mibi1436.blastn: 915
    lipA_on_mibi1437.blastn: 915
    lipA_on_mibi1438.blastn: 915
    lipA_on_mibi1439.blastn: 915
    lipA_on_mibi1440.blastn: 915
    lipA_on_mibi1441.blastn: 915
    lipA_on_mibi1442.blastn: 915
    lipA_on_mibi1443.blastn: 915
    lipA_on_mibi1444.blastn: 915
    lipA_on_mibi1445.blastn: 915
    lipA_on_mibi1446.blastn: 915
    lipA_on_mibi1447.blastn: 915
    lipA_on_mibi1448.blastn: 915
    lipA_on_mibi1449.blastn: 915
    lipA_on_mibi1450.blastn: 915
    lipA_on_mibi1451.blastn: 915
    lipA_on_mibi1452.blastn: 915
    lipA_on_mibi1453.blastn: 915
    lipA_on_mibi1454.blastn: 915
    lipA_on_mibi1455.blastn: 915
    lipA_on_mibi1456.blastn: 915
    lipA_on_mibi1457.blastn: 915
    lipA_on_mibi1458.blastn: 915
    lipA_on_mibi1459.blastn: 915
    lipA_on_mibi1460.blastn: 915
    lipA_on_mibi1461.blastn: 915
    lipA_on_mibi1462.blastn: 915
    lipA_on_mibi1463.blastn: 915
    lipA_on_mibi1464.blastn: 915
    lipA_on_mibi1465.blastn: 915
    lipA_on_mibi1466.blastn: 915
    lipA_on_mibi1467.blastn: 915
    lipA_on_mibi1468.blastn: 915
    lipA_on_mibi1469.blastn: 915
    lipA_on_mibi1471.blastn: 915
    lipA_on_mibi1473.blastn: 915
    lipA_on_mibi1474.blastn: 915
    lipA_on_mibi1475.blastn: 915
    lipA_on_mibi1476.blastn: 915
    lipA_on_mibi1477.blastn: 915
    lipA_on_mibi1478.blastn: 915
    lipA_on_mibi1479.blastn: 915
    lipA_on_mibi1480.blastn: 915
    lipA_on_mibi1481.blastn: 915
    lipA_on_mibi1482.blastn: 915
    lipA_on_mibi1483.blastn: 915
    lipA_on_mibi1484.blastn: 915
    lipA_on_mibi1485.blastn: 915
    lipA_on_mibi1486.blastn: 915
    lipA_on_mibi1487.blastn: 915
    lipA_on_mibi1488.blastn: 915
    lipA_on_mibi1489.blastn: 915
    lipA_on_mibi1490.blastn: 915
    lipA_on_mibi1491.blastn: 915
    lipA_on_mibi1492.blastn: 915
    lipA_on_mibi1493.blastn: 915
    lipA_on_mibi1494.blastn: 915
    lipA_on_mibi1495.blastn: 915
    lipA_on_mibi1496.blastn: 915
    lipA_on_mibi1497.blastn: 915
    lipA_on_mibi1498.blastn: 915
    lipA_on_mibi1499.blastn: 915
    lipA_on_mibi1500.blastn: 915
    lipA_on_mibi1501.blastn: 915
    lipA_on_mibi1502.blastn: 915
    lipA_on_mibi1503.blastn: 915
    lipA_on_mibi1504.blastn: 915
    lipA_on_mibi1505.blastn: 915
    lipA_on_mibi1506.blastn: 915
    lipA_on_mibi2312.blastn: 915
    lipA_on_mibi2313.blastn: 915
    lipA_on_mibi2314.blastn: 915
    lipA_on_mibi2315.blastn: 915
    lipA_on_mibi2316.blastn: 915
    lipA_on_mibi2317.blastn: 915
    lipA_on_mibi2318.blastn: 915
    lipA_on_mibi2319.blastn: 915
    lipA_on_mibi2320.blastn: 915
    lipA_on_mibi2321.blastn: 915
    lipA_on_mibi2379.blastn: 915
    
    #grep "gene=sceD" mibi1435.gff3
    #contig_11       Prodigal        CDS     25537   26196   .       -       0       ID=MMJCKL_08095;Name=putative transglycosylase SceD;locus_tag=MMJCKL_08095;product=putative transglycosylase SceD;Dbxref=EC:3.2.-.-,GO:0005576,GO:0008152,GO:0016798,RefSeq:WP_001829904.1,SO:0001217,UniParc:UPI000003B952,UniRef:UniRef100_Q5HMC6,UniRef:UniRef50_Q5HEA4,UniRef:UniRef90_Q5HMC6;gene=sceD
    samtools faidx mibi1435.fna contig_11:25537-26196 > sceD_mibi1435.fasta
    revcomp sceD_mibi1435.fasta > sceD_mibi1435_revcomp.fasta
    cp sceD_mibi1435_revcomp.fasta ../presence_absence_matrix_on_gene_list/
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    blastn -db ../shovill/${sample}/contigs.fa -query sceD_mibi1435_revcomp.fasta -evalue 1e-50 -num_threads 15 -outfmt 6 -strand both -max_target_seqs 1 > ./sceD_on_${sample}.blastn
    done
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    python3 analyze_blastn_res.py sceD_on_${sample}.blastn
    done
    sceD_on_mibi1435.blastn: 660
    sceD_on_mibi1436.blastn: 660
    sceD_on_mibi1437.blastn: 660
    sceD_on_mibi1438.blastn: 660
    sceD_on_mibi1439.blastn: 660
    sceD_on_mibi1440.blastn: 660
    sceD_on_mibi1441.blastn: 660
    sceD_on_mibi1442.blastn: 660
    sceD_on_mibi1443.blastn: 660
    sceD_on_mibi1444.blastn: 660
    sceD_on_mibi1445.blastn: 660
    sceD_on_mibi1446.blastn: 660
    sceD_on_mibi1447.blastn: 660
    sceD_on_mibi1448.blastn: 660
    sceD_on_mibi1449.blastn: 660
    sceD_on_mibi1450.blastn: 660
    sceD_on_mibi1451.blastn: 660
    sceD_on_mibi1452.blastn: 660
    sceD_on_mibi1453.blastn: 660
    sceD_on_mibi1454.blastn: 660
    sceD_on_mibi1455.blastn: 660
    sceD_on_mibi1456.blastn: 660
    sceD_on_mibi1457.blastn: 660
    sceD_on_mibi1458.blastn: 660
    sceD_on_mibi1459.blastn: 660
    sceD_on_mibi1460.blastn: 660
    sceD_on_mibi1461.blastn: 660
    sceD_on_mibi1462.blastn: 660
    sceD_on_mibi1463.blastn: 660
    sceD_on_mibi1464.blastn: 660
    sceD_on_mibi1465.blastn: 660
    sceD_on_mibi1466.blastn: 660
    sceD_on_mibi1467.blastn: 660
    sceD_on_mibi1468.blastn: 660
    sceD_on_mibi1469.blastn: 660
    sceD_on_mibi1471.blastn: 660
    sceD_on_mibi1473.blastn: 660
    sceD_on_mibi1474.blastn: 660
    sceD_on_mibi1475.blastn: 660
    sceD_on_mibi1476.blastn: 660
    sceD_on_mibi1477.blastn: 660
    sceD_on_mibi1478.blastn: 660
    sceD_on_mibi1479.blastn: 660
    sceD_on_mibi1480.blastn: 660
    sceD_on_mibi1481.blastn: 660
    sceD_on_mibi1482.blastn: 660
    sceD_on_mibi1483.blastn: 660
    sceD_on_mibi1484.blastn: 660
    sceD_on_mibi1485.blastn: 660
    sceD_on_mibi1486.blastn: 660
    sceD_on_mibi1487.blastn: 660
    sceD_on_mibi1488.blastn: 660
    sceD_on_mibi1489.blastn: 660
    sceD_on_mibi1490.blastn: 660
    sceD_on_mibi1491.blastn: 660
    sceD_on_mibi1492.blastn: 660
    sceD_on_mibi1493.blastn: 660
    sceD_on_mibi1494.blastn: 660
    sceD_on_mibi1495.blastn: 660
    sceD_on_mibi1496.blastn: 660
    sceD_on_mibi1497.blastn: 660
    sceD_on_mibi1498.blastn: 660
    sceD_on_mibi1499.blastn: 660
    sceD_on_mibi1500.blastn: 660
    sceD_on_mibi1501.blastn: 660
    sceD_on_mibi1502.blastn: 660
    sceD_on_mibi1503.blastn: 660
    sceD_on_mibi1504.blastn: 660
    sceD_on_mibi1505.blastn: 660
    sceD_on_mibi1506.blastn: 660
    sceD_on_mibi2312.blastn: 660
    sceD_on_mibi2313.blastn: 660
    sceD_on_mibi2314.blastn: 660
    sceD_on_mibi2315.blastn: 660
    sceD_on_mibi2316.blastn: 660
    sceD_on_mibi2317.blastn: 660
    sceD_on_mibi2318.blastn: 660
    sceD_on_mibi2319.blastn: 660
    sceD_on_mibi2320.blastn: 660
    sceD_on_mibi2321.blastn: 660
    sceD_on_mibi2379.blastn: 660
    
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    blastn -db ../shovill/${sample}/contigs.fa -query esp.fasta -evalue 1e-50 -num_threads 15 -outfmt 6 -strand both -max_target_seqs 1 > ./esp_on_${sample}.blastn
    done
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    python3 analyze_blastn_res.py esp_on_${sample}.blastn
    done
    esp_on_mibi1435.blastn: 0
    esp_on_mibi1436.blastn: 0
    esp_on_mibi1437.blastn: 0
    esp_on_mibi1438.blastn: 0
    esp_on_mibi1439.blastn: 0
    esp_on_mibi1440.blastn: 0
    esp_on_mibi1441.blastn: 0
    esp_on_mibi1442.blastn: 0
    esp_on_mibi1443.blastn: 0
    esp_on_mibi1444.blastn: 0
    esp_on_mibi1445.blastn: 0
    esp_on_mibi1446.blastn: 0
    esp_on_mibi1447.blastn: 0
    esp_on_mibi1448.blastn: 0
    esp_on_mibi1449.blastn: 0
    esp_on_mibi1450.blastn: 0
    esp_on_mibi1451.blastn: 0
    esp_on_mibi1452.blastn: 0
    esp_on_mibi1453.blastn: 0
    esp_on_mibi1454.blastn: 0
    esp_on_mibi1455.blastn: 0
    esp_on_mibi1456.blastn: 0
    esp_on_mibi1457.blastn: 0
    esp_on_mibi1458.blastn: 0
    esp_on_mibi1459.blastn: 0
    esp_on_mibi1460.blastn: 0
    esp_on_mibi1461.blastn: 0
    esp_on_mibi1462.blastn: 0
    esp_on_mibi1463.blastn: 0
    esp_on_mibi1464.blastn: 0
    esp_on_mibi1465.blastn: 0
    esp_on_mibi1466.blastn: 0
    esp_on_mibi1467.blastn: 0
    esp_on_mibi1468.blastn: 0
    esp_on_mibi1469.blastn: 0
    esp_on_mibi1471.blastn: 0
    esp_on_mibi1473.blastn: 0
    esp_on_mibi1474.blastn: 0
    esp_on_mibi1475.blastn: 0
    esp_on_mibi1476.blastn: 0
    esp_on_mibi1477.blastn: 0
    esp_on_mibi1478.blastn: 0
    esp_on_mibi1479.blastn: 0
    esp_on_mibi1480.blastn: 0
    esp_on_mibi1481.blastn: 0
    esp_on_mibi1482.blastn: 0
    esp_on_mibi1483.blastn: 0
    esp_on_mibi1484.blastn: 0
    esp_on_mibi1485.blastn: 0
    esp_on_mibi1486.blastn: 0
    esp_on_mibi1487.blastn: 0
    esp_on_mibi1488.blastn: 0
    esp_on_mibi1489.blastn: 0
    esp_on_mibi1490.blastn: 0
    esp_on_mibi1491.blastn: 0
    esp_on_mibi1492.blastn: 0
    esp_on_mibi1493.blastn: 0
    esp_on_mibi1494.blastn: 0
    esp_on_mibi1495.blastn: 0
    esp_on_mibi1496.blastn: 0
    esp_on_mibi1497.blastn: 0
    esp_on_mibi1498.blastn: 0
    esp_on_mibi1499.blastn: 0
    esp_on_mibi1500.blastn: 0
    esp_on_mibi1501.blastn: 0
    esp_on_mibi1502.blastn: 0
    esp_on_mibi1503.blastn: 0
    esp_on_mibi1504.blastn: 0
    esp_on_mibi1505.blastn: 0
    esp_on_mibi1506.blastn: 0
    esp_on_mibi2312.blastn: 0
    esp_on_mibi2313.blastn: 0
    esp_on_mibi2314.blastn: 0
    esp_on_mibi2315.blastn: 0
    esp_on_mibi2316.blastn: 0
    esp_on_mibi2317.blastn: 0
    esp_on_mibi2318.blastn: 0
    esp_on_mibi2319.blastn: 0
    esp_on_mibi2320.blastn: 0
    esp_on_mibi2321.blastn: 0
    esp_on_mibi2379.blastn: 0
    
    #grep "gene=ecpA" mibi1435.gff3
    #contig_9        Prodigal        CDS     6382    7569    .       -       0       ID=MMJCKL_07070;Name=Extracellular cysteine protease;locus_tag=MMJCKL_07070;product=Extracellular cysteine protease;Dbxref=EC:3.4.22.-,GO:0005576,GO:0006508,GO:0008234,KEGG:K08258,RefSeq:WP_002497714.1,SO:0001217,UniParc:UPI00026C1A6F,UniRef:UniRef100_UPI00026C1A6F,UniRef:UniRef50_P81297,UniRef:UniRef90_Q5HKF6;gene=ecpA
    samtools faidx mibi1435.fna contig_9:6382-7569 > ecpA_mibi1435.fasta
    revcomp ecpA_mibi1435.fasta > ecpA_mibi1435_revcomp.fasta
    cp ecpA_mibi1435_revcomp.fasta ../presence_absence_matrix_on_gene_list/
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    blastn -db ../shovill/${sample}/contigs.fa -query ecpA_mibi1435_revcomp.fasta -evalue 1e-50 -num_threads 15 -outfmt 6 -strand both -max_target_seqs 1 > ./ecpA_on_${sample}.blastn
    done
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    python3 analyze_blastn_res.py ecpA_on_${sample}.blastn
    done
    ecpA_on_mibi1435.blastn: 1188
    ecpA_on_mibi1436.blastn: 1188
    ecpA_on_mibi1437.blastn: 1188
    ecpA_on_mibi1438.blastn: 1188
    ecpA_on_mibi1439.blastn: 1188
    ecpA_on_mibi1440.blastn: 1188
    ecpA_on_mibi1441.blastn: 1188
    ecpA_on_mibi1442.blastn: 1188
    ecpA_on_mibi1443.blastn: 1188
    ecpA_on_mibi1444.blastn: 1188
    ecpA_on_mibi1445.blastn: 1188
    ecpA_on_mibi1446.blastn: 1188
    ecpA_on_mibi1447.blastn: 1188
    ecpA_on_mibi1448.blastn: 1188
    ecpA_on_mibi1449.blastn: 1188
    ecpA_on_mibi1450.blastn: 1188
    ecpA_on_mibi1451.blastn: 1188
    ecpA_on_mibi1452.blastn: 1188
    ecpA_on_mibi1453.blastn: 1188
    ecpA_on_mibi1454.blastn: 1188
    ecpA_on_mibi1455.blastn: 1188
    ecpA_on_mibi1456.blastn: 1188
    ecpA_on_mibi1457.blastn: 1188
    ecpA_on_mibi1458.blastn: 1188
    ecpA_on_mibi1459.blastn: 1188
    ecpA_on_mibi1460.blastn: 1188
    ecpA_on_mibi1461.blastn: 1188
    ecpA_on_mibi1462.blastn: 1188
    ecpA_on_mibi1463.blastn: 1188
    ecpA_on_mibi1464.blastn: 1188
    ecpA_on_mibi1465.blastn: 1188
    ecpA_on_mibi1466.blastn: 1188
    ecpA_on_mibi1467.blastn: 1188
    ecpA_on_mibi1468.blastn: 1188
    ecpA_on_mibi1469.blastn: 1188
    ecpA_on_mibi1471.blastn: 1188
    ecpA_on_mibi1473.blastn: 1188
    ecpA_on_mibi1474.blastn: 1188
    ecpA_on_mibi1475.blastn: 1188
    ecpA_on_mibi1476.blastn: 1188
    ecpA_on_mibi1477.blastn: 1188
    ecpA_on_mibi1478.blastn: 1188
    ecpA_on_mibi1479.blastn: 1188
    ecpA_on_mibi1480.blastn: 1188
    ecpA_on_mibi1481.blastn: 1188
    ecpA_on_mibi1482.blastn: 1188
    ecpA_on_mibi1483.blastn: 1188
    ecpA_on_mibi1484.blastn: 1188
    ecpA_on_mibi1485.blastn: 1188
    ecpA_on_mibi1486.blastn: 1188
    ecpA_on_mibi1487.blastn: 1188
    ecpA_on_mibi1488.blastn: 1188
    ecpA_on_mibi1489.blastn: 1188
    ecpA_on_mibi1490.blastn: 1188
    ecpA_on_mibi1491.blastn: 1188
    ecpA_on_mibi1492.blastn: 1188
    ecpA_on_mibi1493.blastn: 1188
    ecpA_on_mibi1494.blastn: 1188
    ecpA_on_mibi1495.blastn: 1188
    ecpA_on_mibi1496.blastn: 1188
    ecpA_on_mibi1497.blastn: 1188
    ecpA_on_mibi1498.blastn: 1188
    ecpA_on_mibi1499.blastn: 1188
    ecpA_on_mibi1500.blastn: 1188
    ecpA_on_mibi1501.blastn: 1188
    ecpA_on_mibi1502.blastn: 1188
    ecpA_on_mibi1503.blastn: 1188
    ecpA_on_mibi1504.blastn: 1188
    ecpA_on_mibi1505.blastn: 1188
    ecpA_on_mibi1506.blastn: 1188
    ecpA_on_mibi2312.blastn: 1188
    ecpA_on_mibi2313.blastn: 1188
    ecpA_on_mibi2314.blastn: 1188
    ecpA_on_mibi2315.blastn: 1188
    ecpA_on_mibi2316.blastn: 1188
    ecpA_on_mibi2317.blastn: 1188
    ecpA_on_mibi2318.blastn: 1188
    ecpA_on_mibi2319.blastn: 1188
    ecpA_on_mibi2320.blastn: 1188
    ecpA_on_mibi2321.blastn: 1188
    ecpA_on_mibi2379.blastn: 1188
    
    # -length(SE0760)=798
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    blastn -db ../shovill/${sample}/contigs.fa -query SE0760.fasta -evalue 1e-50 -num_threads 15 -outfmt 6 -strand both -max_target_seqs 1 > SE0760_on_${sample}.blastn
    done
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    python3 analyze_blastn_res.py SE0760_on_${sample}.blastn
    done
    SE0760_on_mibi1435.blastn: 798
    SE0760_on_mibi1436.blastn: 798
    SE0760_on_mibi1437.blastn: 798
    SE0760_on_mibi1438.blastn: 798
    SE0760_on_mibi1439.blastn: 798
    SE0760_on_mibi1440.blastn: 798
    SE0760_on_mibi1441.blastn: 798
    SE0760_on_mibi1442.blastn: 798
    SE0760_on_mibi1443.blastn: 798
    SE0760_on_mibi1444.blastn: 798
    SE0760_on_mibi1445.blastn: 798
    SE0760_on_mibi1446.blastn: 798
    SE0760_on_mibi1447.blastn: 798
    SE0760_on_mibi1448.blastn: 798
    SE0760_on_mibi1449.blastn: 798
    SE0760_on_mibi1450.blastn: 798
    SE0760_on_mibi1451.blastn: 798
    SE0760_on_mibi1452.blastn: 798
    SE0760_on_mibi1453.blastn: 798
    SE0760_on_mibi1454.blastn: 798
    SE0760_on_mibi1455.blastn: 798
    SE0760_on_mibi1456.blastn: 798
    SE0760_on_mibi1457.blastn: 798
    SE0760_on_mibi1458.blastn: 798
    SE0760_on_mibi1459.blastn: 798
    SE0760_on_mibi1460.blastn: 798
    SE0760_on_mibi1461.blastn: 798
    SE0760_on_mibi1462.blastn: 798
    SE0760_on_mibi1463.blastn: 798
    SE0760_on_mibi1464.blastn: 798
    SE0760_on_mibi1465.blastn: 798
    SE0760_on_mibi1466.blastn: 798
    SE0760_on_mibi1467.blastn: 799
    SE0760_on_mibi1468.blastn: 798
    SE0760_on_mibi1469.blastn: 798
    SE0760_on_mibi1471.blastn: 798
    SE0760_on_mibi1473.blastn: 798
    SE0760_on_mibi1474.blastn: 798
    SE0760_on_mibi1475.blastn: 798
    SE0760_on_mibi1476.blastn: 798
    SE0760_on_mibi1477.blastn: 798
    SE0760_on_mibi1478.blastn: 798
    SE0760_on_mibi1479.blastn: 798
    SE0760_on_mibi1480.blastn: 798
    SE0760_on_mibi1481.blastn: 798
    SE0760_on_mibi1482.blastn: 798
    SE0760_on_mibi1483.blastn: 798
    SE0760_on_mibi1484.blastn: 798
    SE0760_on_mibi1485.blastn: 798
    SE0760_on_mibi1486.blastn: 798
    SE0760_on_mibi1487.blastn: 798
    SE0760_on_mibi1488.blastn: 798
    SE0760_on_mibi1489.blastn: 798
    SE0760_on_mibi1490.blastn: 798
    SE0760_on_mibi1491.blastn: 798
    SE0760_on_mibi1492.blastn: 798
    SE0760_on_mibi1493.blastn: 798
    SE0760_on_mibi1494.blastn: 798
    SE0760_on_mibi1495.blastn: 798
    SE0760_on_mibi1496.blastn: 798
    SE0760_on_mibi1497.blastn: 798
    SE0760_on_mibi1498.blastn: 798
    SE0760_on_mibi1499.blastn: 798
    SE0760_on_mibi1500.blastn: 798
    SE0760_on_mibi1501.blastn: 798
    SE0760_on_mibi1502.blastn: 798
    SE0760_on_mibi1503.blastn: 798
    SE0760_on_mibi1504.blastn: 798
    SE0760_on_mibi1505.blastn: 798
    SE0760_on_mibi1506.blastn: 798
    SE0760_on_mibi2312.blastn: 798
    SE0760_on_mibi2313.blastn: 798
    SE0760_on_mibi2314.blastn: 798
    SE0760_on_mibi2315.blastn: 798
    SE0760_on_mibi2316.blastn: 798
    SE0760_on_mibi2317.blastn: 798
    SE0760_on_mibi2318.blastn: 798
    SE0760_on_mibi2319.blastn: 798
    SE0760_on_mibi2320.blastn: 798
    SE0760_on_mibi2321.blastn: 798
    SE0760_on_mibi2379.blastn: 798
    
    # -lenth(MT880870)=34053
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    blastn -db ../shovill/${sample}/contigs.fa -query MT880870.fasta -evalue 1e-50 -num_threads 15 -outfmt 6 -strand both -max_target_seqs 1 > ./MT880870_on_${sample}.blastn
    done
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    python3 analyze_blastn_res.py MT880870_on_${sample}.blastn
    done
    
    MT880870_on_mibi1435.blastn: 2366
    MT880870_on_mibi1436.blastn: 3198
    MT880870_on_mibi1437.blastn: 3198
    MT880870_on_mibi1438.blastn: 2366
    MT880870_on_mibi1439.blastn: 3198
    MT880870_on_mibi1440.blastn: 34177
    MT880870_on_mibi1441.blastn: 2372
    MT880870_on_mibi1442.blastn: 3470
    MT880870_on_mibi1443.blastn: 2366
    MT880870_on_mibi1444.blastn: 2372
    MT880870_on_mibi1445.blastn: 3198
    MT880870_on_mibi1446.blastn: 3198
    MT880870_on_mibi1447.blastn: 3198
    MT880870_on_mibi1448.blastn: 3198
    MT880870_on_mibi1449.blastn: 2366
    MT880870_on_mibi1450.blastn: 3198
    MT880870_on_mibi1451.blastn: 1085
    MT880870_on_mibi1452.blastn: 1832
    MT880870_on_mibi1453.blastn: 2366
    MT880870_on_mibi1454.blastn: 34177
    MT880870_on_mibi1455.blastn: 3198
    MT880870_on_mibi1456.blastn: 34177
    MT880870_on_mibi1457.blastn: 3198
    MT880870_on_mibi1458.blastn: 835
    MT880870_on_mibi1459.blastn: 34177
    MT880870_on_mibi1460.blastn: 3198
    MT880870_on_mibi1461.blastn: 34177
    MT880870_on_mibi1462.blastn: 2372
    MT880870_on_mibi1463.blastn: 2372
    MT880870_on_mibi1464.blastn: 2311
    MT880870_on_mibi1465.blastn: 3198
    MT880870_on_mibi1466.blastn: 1264
    MT880870_on_mibi1467.blastn: 3470
    MT880870_on_mibi1468.blastn: 2372
    MT880870_on_mibi1469.blastn: 2372
    MT880870_on_mibi1471.blastn: 2366
    MT880870_on_mibi1473.blastn: 3198
    MT880870_on_mibi1474.blastn: 3198
    MT880870_on_mibi1475.blastn: 3198
    MT880870_on_mibi1476.blastn: 3198
    MT880870_on_mibi1477.blastn: 2372
    MT880870_on_mibi1478.blastn: 5918
    MT880870_on_mibi1479.blastn: 3198
    MT880870_on_mibi1480.blastn: 34177
    MT880870_on_mibi1481.blastn: 3198
    MT880870_on_mibi1482.blastn: 3198
    MT880870_on_mibi1483.blastn: 3198
    MT880870_on_mibi1484.blastn: 3198
    MT880870_on_mibi1485.blastn: 20038
    MT880870_on_mibi1486.blastn: 3198
    MT880870_on_mibi1487.blastn: 28520
    MT880870_on_mibi1488.blastn: 2366
    MT880870_on_mibi1489.blastn: 2366
    MT880870_on_mibi1490.blastn: 3198
    MT880870_on_mibi1491.blastn: 3198
    MT880870_on_mibi1492.blastn: 3198
    MT880870_on_mibi1493.blastn: 2233
    MT880870_on_mibi1494.blastn: 3198
    MT880870_on_mibi1495.blastn: 3198
    MT880870_on_mibi1496.blastn: 25135
    MT880870_on_mibi1497.blastn: 3198
    MT880870_on_mibi1498.blastn: 28012
    MT880870_on_mibi1499.blastn: 3198
    MT880870_on_mibi1500.blastn: 2366
    MT880870_on_mibi1501.blastn: 2366
    MT880870_on_mibi1502.blastn: 34177
    MT880870_on_mibi1503.blastn: 2366
    MT880870_on_mibi1504.blastn: 3198
    MT880870_on_mibi1505.blastn: 3198
    MT880870_on_mibi1506.blastn: 2372
    MT880870_on_mibi2312.blastn: 3198
    MT880870_on_mibi2313.blastn: 3198
    MT880870_on_mibi2314.blastn: 3198
    MT880870_on_mibi2315.blastn: 2366
    MT880870_on_mibi2316.blastn: 3198
    MT880870_on_mibi2317.blastn: 3198
    MT880870_on_mibi2318.blastn: 20006
    MT880870_on_mibi2319.blastn: 17544
    MT880870_on_mibi2320.blastn: 3198
    MT880870_on_mibi2321.blastn: 3198
    MT880870_on_mibi2379.blastn: 3198
    
    #len=36164
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    blastn -db ../shovill/${sample}/contigs.fa -query MT880871.fasta -evalue 1e-50 -num_threads 15 -outfmt 6 -strand both -max_target_seqs 1 > ./MT880871_on_${sample}.blastn
    done
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    python3 analyze_blastn_res.py MT880871_on_${sample}.blastn
    done
    MT880871_on_mibi1435.blastn: 270
    MT880871_on_mibi1436.blastn: 35580
    MT880871_on_mibi1437.blastn: 518
    MT880871_on_mibi1438.blastn: 518
    MT880871_on_mibi1439.blastn: 518
    MT880871_on_mibi1440.blastn: 36682
    MT880871_on_mibi1441.blastn: 519
    MT880871_on_mibi1442.blastn: 514
    MT880871_on_mibi1443.blastn: 0
    MT880871_on_mibi1444.blastn: 0
    MT880871_on_mibi1445.blastn: 518
    MT880871_on_mibi1446.blastn: 518
    MT880871_on_mibi1447.blastn: 518
    MT880871_on_mibi1448.blastn: 518
    MT880871_on_mibi1449.blastn: 518
    MT880871_on_mibi1450.blastn: 518
    MT880871_on_mibi1451.blastn: 0
    MT880871_on_mibi1452.blastn: 30418
    MT880871_on_mibi1453.blastn: 518
    MT880871_on_mibi1454.blastn: 982
    MT880871_on_mibi1455.blastn: 518
    MT880871_on_mibi1456.blastn: 982
    MT880871_on_mibi1457.blastn: 518
    MT880871_on_mibi1458.blastn: 0
    MT880871_on_mibi1459.blastn: 34965
    MT880871_on_mibi1460.blastn: 518
    MT880871_on_mibi1461.blastn: 16296
    MT880871_on_mibi1462.blastn: 0
    MT880871_on_mibi1463.blastn: 518
    MT880871_on_mibi1464.blastn: 947
    MT880871_on_mibi1465.blastn: 518
    MT880871_on_mibi1466.blastn: 0
    MT880871_on_mibi1467.blastn: 514
    MT880871_on_mibi1468.blastn: 518
    MT880871_on_mibi1469.blastn: 518
    MT880871_on_mibi1471.blastn: 518
    MT880871_on_mibi1473.blastn: 518
    MT880871_on_mibi1474.blastn: 518
    MT880871_on_mibi1475.blastn: 518
    MT880871_on_mibi1476.blastn: 518
    MT880871_on_mibi1477.blastn: 518
    MT880871_on_mibi1478.blastn: 18607
    MT880871_on_mibi1479.blastn: 518
    MT880871_on_mibi1480.blastn: 36682
    MT880871_on_mibi1481.blastn: 0
    MT880871_on_mibi1482.blastn: 518
    MT880871_on_mibi1483.blastn: 32395
    MT880871_on_mibi1484.blastn: 0
    MT880871_on_mibi1485.blastn: 36164
    MT880871_on_mibi1486.blastn: 31072
    MT880871_on_mibi1487.blastn: 36682
    MT880871_on_mibi1488.blastn: 0
    MT880871_on_mibi1489.blastn: 0
    MT880871_on_mibi1490.blastn: 518
    MT880871_on_mibi1491.blastn: 518
    MT880871_on_mibi1492.blastn: 518
    MT880871_on_mibi1493.blastn: 740
    MT880871_on_mibi1494.blastn: 518
    MT880871_on_mibi1495.blastn: 0
    MT880871_on_mibi1496.blastn: 24187
    MT880871_on_mibi1497.blastn: 31069
    MT880871_on_mibi1498.blastn: 36682
    MT880871_on_mibi1499.blastn: 518
    MT880871_on_mibi1500.blastn: 0
    MT880871_on_mibi1501.blastn: 0
    MT880871_on_mibi1502.blastn: 36682
    MT880871_on_mibi1503.blastn: 518
    MT880871_on_mibi1504.blastn: 518
    MT880871_on_mibi1505.blastn: 518
    MT880871_on_mibi1506.blastn: 0
    MT880871_on_mibi2312.blastn: 316
    MT880871_on_mibi2313.blastn: 518
    MT880871_on_mibi2314.blastn: 518
    MT880871_on_mibi2315.blastn: 0
    MT880871_on_mibi2316.blastn: 518
    MT880871_on_mibi2317.blastn: 518
    MT880871_on_mibi2318.blastn: 36682
    MT880871_on_mibi2319.blastn: 36164
    MT880871_on_mibi2320.blastn: 518
    MT880871_on_mibi2321.blastn: 518
    MT880871_on_mibi2379.blastn: 518
    
    #len=147057
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    blastn -db ../shovill/${sample}/contigs.fa -query MT880872.fasta -evalue 1e-50 -num_threads 15 -outfmt 6 -strand both -max_target_seqs 1 > MT880872_on_${sample}.blastn
    done
    for sample in mibi1435 mibi1436 mibi1437 mibi1438 mibi1439 mibi1440 mibi1441 mibi1442 mibi1443 mibi1444 mibi1445 mibi1446 mibi1447 mibi1448 mibi1449 mibi1450 mibi1451 mibi1452 mibi1453 mibi1454 mibi1455 mibi1456 mibi1457 mibi1458 mibi1459 mibi1460 mibi1461 mibi1462 mibi1463 mibi1464 mibi1465 mibi1466 mibi1467 mibi1468 mibi1469 mibi1471 mibi1473 mibi1474 mibi1475 mibi1476 mibi1477 mibi1478 mibi1479 mibi1480 mibi1481 mibi1482 mibi1483 mibi1484 mibi1485 mibi1486 mibi1487 mibi1488 mibi1489 mibi1490 mibi1491 mibi1492 mibi1493 mibi1494 mibi1495 mibi1496 mibi1497 mibi1498 mibi1499 mibi1500 mibi1501 mibi1502 mibi1503 mibi1504 mibi1505 mibi1506 mibi2312 mibi2313 mibi2314 mibi2315 mibi2316 mibi2317 mibi2318 mibi2319 mibi2320 mibi2321 mibi2379; do
    python3 analyze_blastn_res.py MT880872_on_${sample}.blastn
    done
    MT880872_on_mibi1435.blastn: 19155
    MT880872_on_mibi1436.blastn: 19049
    MT880872_on_mibi1437.blastn: 19049
    MT880872_on_mibi1438.blastn: 19045
    MT880872_on_mibi1439.blastn: 19042
    MT880872_on_mibi1440.blastn: 77536 *
    MT880872_on_mibi1441.blastn: 19041
    MT880872_on_mibi1442.blastn: 19044
    MT880872_on_mibi1443.blastn: 19049
    MT880872_on_mibi1444.blastn: 19049
    MT880872_on_mibi1445.blastn: 19042
    MT880872_on_mibi1446.blastn: 19042
    MT880872_on_mibi1447.blastn: 19042
    MT880872_on_mibi1448.blastn: 19049
    MT880872_on_mibi1449.blastn: 19041
    MT880872_on_mibi1450.blastn: 19049
    MT880872_on_mibi1451.blastn: 19043
    MT880872_on_mibi1452.blastn: 19047
    MT880872_on_mibi1453.blastn: 19165
    MT880872_on_mibi1454.blastn: 52122 *
    MT880872_on_mibi1455.blastn: 20996
    MT880872_on_mibi1456.blastn: 52122 *
    MT880872_on_mibi1457.blastn: 19042
    MT880872_on_mibi1458.blastn: 19048
    MT880872_on_mibi1459.blastn: 77366 *
    MT880872_on_mibi1460.blastn: 19042
    MT880872_on_mibi1461.blastn: 84536 *
    MT880872_on_mibi1462.blastn: 19048
    MT880872_on_mibi1463.blastn: 19042
    MT880872_on_mibi1464.blastn: 19049
    MT880872_on_mibi1465.blastn: 19045
    MT880872_on_mibi1466.blastn: 6525
    MT880872_on_mibi1467.blastn: 19044
    MT880872_on_mibi1468.blastn: 19037
    MT880872_on_mibi1469.blastn: 19039
    MT880872_on_mibi1471.blastn: 19045
    MT880872_on_mibi1473.blastn: 19043
    MT880872_on_mibi1474.blastn: 19052
    MT880872_on_mibi1475.blastn: 19264
    MT880872_on_mibi1476.blastn: 19052
    MT880872_on_mibi1477.blastn: 19041
    MT880872_on_mibi1478.blastn: 26492 *
    MT880872_on_mibi1479.blastn: 19042
    MT880872_on_mibi1480.blastn: 45789 *
    MT880872_on_mibi1481.blastn: 19036
    MT880872_on_mibi1482.blastn: 19036
    MT880872_on_mibi1483.blastn: 19036
    MT880872_on_mibi1484.blastn: 19045
    MT880872_on_mibi1485.blastn: 52124 *
    MT880872_on_mibi1486.blastn: 19052
    MT880872_on_mibi1487.blastn: 56847 *
    MT880872_on_mibi1488.blastn: 19049
    MT880872_on_mibi1489.blastn: 19053
    MT880872_on_mibi1490.blastn: 19045
    MT880872_on_mibi1491.blastn: 19042
    MT880872_on_mibi1492.blastn: 19049
    MT880872_on_mibi1493.blastn: 19046
    MT880872_on_mibi1494.blastn: 19049
    MT880872_on_mibi1495.blastn: 19046
    MT880872_on_mibi1496.blastn: 92504 *
    MT880872_on_mibi1497.blastn: 19052
    MT880872_on_mibi1498.blastn: 58985 *
    MT880872_on_mibi1499.blastn: 19043
    MT880872_on_mibi1500.blastn: 19052
    MT880872_on_mibi1501.blastn: 19049
    MT880872_on_mibi1502.blastn: 46101 *
    MT880872_on_mibi1503.blastn: 19044
    MT880872_on_mibi1504.blastn: 19041
    MT880872_on_mibi1505.blastn: 19052
    MT880872_on_mibi1506.blastn: 19049
    MT880872_on_mibi2312.blastn: 19045
    MT880872_on_mibi2313.blastn: 19049
    MT880872_on_mibi2314.blastn: 19052
    MT880872_on_mibi2315.blastn: 19049
    MT880872_on_mibi2316.blastn: 19052
    MT880872_on_mibi2317.blastn: 19042
    MT880872_on_mibi2318.blastn: 78246 *
    MT880872_on_mibi2319.blastn: 77512 *
    MT880872_on_mibi2320.blastn: 19045
    MT880872_on_mibi2321.blastn: 19052
    MT880872_on_mibi2379.blastn: 19156
  11. yycG has only 80% similarity, but it still belongs to the gene family.

    Based on the provided BLASTN result summary, determining whether it's the same gene depends on multiple factors, including sequence similarity, alignment length, and expected value (E-value).
    
    Let’s break down the results:
    
    Query ID: gi|49240382|emb|BX571856.1|:25617-27443
    Subject ID: contig00005
    Percentage Identity: 80.338%
    Alignment Length: 1836 (out of 1827 in the subject sequence)
    Mismatches: 349
    Gap Openings: 7
    E-value: 0.0 (this indicates a highly significant alignment)
    Bit Score: 1380
    
    Key Points to Consider:
    
    Percentage Identity (80.34%): While 80% identity is relatively high, it’s not close enough to the 90-100% typically expected for identical or highly similar genes. However, the threshold for considering sequences as "the same gene" depends on the organism, type of gene, and biological context. For some genes or species, this might still be acceptable.
    
    Alignment Length (1836 bp): The alignment covers a large portion of both sequences (1827 in the subject sequence and 1836 in the query). This is a good indication that the sequences are largely aligned.
    
    Mismatches and Gaps: There are 349 mismatches and 7 gap openings, which suggests there are differences between the two sequences. The number of differences is significant, and the biological impact of these differences would depend on whether they occur in coding regions or regulatory elements.
    
    E-value (0.0): The E-value of 0.0 indicates that the alignment is highly significant and not due to chance.
    
    Conclusion:
    
    Based on the 80.34% identity and the number of mismatches, it is unlikely that this is exactly the same gene, but it could still be a closely related gene variant, allele, or paralog. If you are looking for an exact match, this result would not qualify. However, if you are investigating gene families or variants, this sequence could be considered closely related. Further functional or experimental validation might be necessary to determine the biological significance of these differences.
  12. plotTreeHeatmap

    FastTree -gtr -nt variants/snippy.core_without_reference.aln > plotTreeHeatmap/snippy.core.tree
    
    library(ggtree)
    library(ggplot2)
    library(dplyr)
    setwd("/home/jhuang/DATA/Data_Luise_Sepi_STKN/plotTreeHeatmap/")
    # -- edit tree --
    info <- read.csv("typing.csv", sep="\t")
    info$name <- info$Isolate
    tree <- read.tree("990_backup.tree")
    #cols <- c(infection='purple2', commensalism='skyblue2')
    
    #heatmapData2 <- info %>% select(Isolate, ST, SCCmec, agr.typing)
    heatmapData2 <- info %>% select(Isolate, ST)
    rn <- heatmapData2$Isolate
    heatmapData2$Isolate <- NULL
    heatmapData2 <- as.data.frame(sapply(heatmapData2, as.character))
    rownames(heatmapData2) <- rn
    
    #https://bookdown.org/hneth/ds4psy/D-3-apx-colors-basics.html
    #"blueviolet","darkgoldenrod",  "tomato","mediumpurple4","indianred",
    #"lightcyan3","azure3",
    #"magenta",
    #"tan","brown",
    heatmap.colours <- c("cornflowerblue","darkgreen","seagreen3","tan","red",  "navyblue", "gold",     "green","orange","pink","purple","magenta","brown", "darksalmon","chocolate4","darkkhaki", "azure3", "maroon","lightgreen",     "blue","cyan", "skyblue2", "blueviolet","darkgrey")
                            #"cornflowerblue","darkgreen","seagreen3","tan","red","green","orange","pink","brown","darkgrey",  #"cornflowerblue","darkgreen","red",     "darkgrey")
    names(heatmap.colours) <- c("2","5","7","9","14", "17","23",   "35","59","73", "81","86","87","89","130","190","290", "297","325",    "454","487","558","766","-")
    #"SCCmec_type_II(2A)","SCCmec_type_III(3A)","SCCmec_type_III(3A) and #SCCmec_type_VIII(4A)","SCCmec_type_IV(2B)","SCCmec_type_IV(2B&5)","SCCmec_type_IV(2B) and #SCCmec_type_VI(4B)","SCCmec_type_IVa(2B)","SCCmec_type_IVb(2B)","SCCmec_type_IVg(2B)","none",    "I","II","III", "none")
    
    #circular
    #scale_color_manual(values=cols) +
    #geom_tippoint(aes(color=Type)) +
    p <- ggtree(tree, layout='circular', branch.length='none') %<+% info +
    geom_tiplab2(aes(label=name), offset=1, size=6.0)
    #, geom='text', align=TRUE,  linetype=NA, hjust=1.8,check.overlap=TRUE
    #difference between geom_tiplab and geom_tiplab2?
    #+ theme(axis.text.x = element_text(angle = 30, vjust = 0.5)) + theme(axis.text = element_text(size = 20))  + scale_size(range = c(1, 20))
    #font.size=10,
    png("ggtree.png", width=1260, height=1260)
    #svg("ggtree.svg", width=1260, height=1260)
    p
    dev.off()
    png("ggtree_and_gheatmap.png", width=1290, height=1000)
    #svg("ggtree_and_gheatmap.svg", width=17, height=15)
    gheatmap(p, heatmapData2, width=0.1,colnames_position="top", colnames_angle=90, colnames_offset_y = 0.1, hjust=0.5, font.size=6, offset = 5) + scale_fill_manual(values=heatmap.colours) +  theme(legend.text = element_text(size = 14)) + theme(legend.title = element_text(size = 14)) + guides(fill=guide_legend(title=""), color = guide_legend(override.aes = list(size = 5)))
    dev.off()
  13. Response

    I’ve reviewed the 85 genomes as discussed. Four samples were filtered out due to insufficient coverage: mibi2381, mibi2380, and mibi1472. Additionally, the sample mibi1470 was excluded as it could not be identified as S. epidermidis based on its MLST type.
    
    This leaves us with a total of 81 samples for downstream analysis.
    
    Attached, you’ll find:
    
    * The graphic ggtree_and_gheatmap.png
    * A table, presence_absence_matrix_on_genes, providing an overview of gene presence/absence and relevant typing information
    * FASTA files for all 81 genomes

Disk Directories

  1. Titisee

    • ‘$RECYCLE.BIN’
    • 180119_M03701_0115_000000000-BFG46.zip
    • 3rd_party
    • Anna11_assemblies
    • Anna11_trees
    • bengal_results_v1_2018
    • ConsPred_prokaryotic_genome_annotation
    • damian_DEL
    • damian_v201016
    • Data_Amir_PUBLISHED (*)
    • Data_Anastasia_RNASeq
    • Data_Anna4_SNP
    • Data_Anna5_SNP_rsync_error
    • Data_Anna9_OXA-48_or_OXA-181
    • Data_Francesco_16S
    • Data_Holger_Pseudomonas_aeruginosa_SNP
    • Data_Holger_S.epidermidis_long
    • Data_Holger_S.epidermidis_short
    • Data_Holger_VRE
    • Data_Marc_RNA-seq_Sepidermidis
    • Data_Nico_Gagliani
    • Data_Nicola_Schaltenberg
    • Data_Nicola_Schaltenberg_PICRUSt
    • Data_Nicole10_16S_interlab_PUBLISHED (*)
    • Data_Nicole6_HEV_4_SNP_calling_PE_DEL
    • Data_Nicole6_HEV_4_SNP_calling_SE_DEL
    • Data_Nicole6_HEV_new_orig_fastqs
    • Data_Nicole6_HEV_Swantje
    • Data_Nina1_Nicole7
    • Data_Tam_Acinetobacter_baumannii
    • Data_Ute_MKL1
    • Data_Ute_RNA_1_2
    • Data_Ute_RNA_4
    • Data_Ute_RNA_4_2022-11_test
    • DO.pdf
    • Eigene_Ordner_HR
    • GAMOLA2
    • GAMOLA2_prototyp
    • HOME_FREIBURG
    • MAGpy_db
    • ‘System Volume Information’
    • Thomas_methylation_EPIC_DO
    • TRASH
    • UGENE_v1_32_data_cistrome
    • UGENE_v1_32_data_ngs_classification
  2. Denise_ChIPseq

    • ‘$RECYCLE.BIN’
    • ALL_trimmed_part_DEL
    • coi_disclosure.docx
    • Data_Amir_PUBLISHED_DEL
    • Data_Anna12_HAPDICS_final_not_finished_DEL
    • Data_Anna_Mixta_hanseatica_PUBLISHED (*)
    • Data_Arck_16S_MMc_PUBLISHED (*)
    • Data_Caroline_RNAseq_brain_organoids
    • Data_Caroline_RNAseq_wt_timecourse
    • Data_Damian
    • Data_Denise_ChIPSeq_Protocol1
    • Data_Denise_ChIPSeq_Protocol2
    • Data_Denise_LTtrunc_H3K27me3_2_results_DEL
    • Data_Denise_LTtrunc_H3K4me3_2_results_DEL
    • Data_download_virus_fam
    • Data_Gunnar_Yersiniomics_COPYFAILED_DEL
    • Data_HepE_Freiburg_PUBLISHED (*)
    • Data_INTENSO_2022-06
    • Data_Jingang
    • Data_Laura_ChIPseq_GSE120945
    • Data_Nicole6_HEV_ownMethod
    • Data_Nicole6_HEV_ownMethod_new
    • Data_Paul_and_Marc_Epidome_batch3
    • Data_Pietschmann_HCV_Amplicon
    • Data_Soeren_RNA-seq_2023_PUBLISHING (*)
    • Data_Susanne_16S_re_UNPUBLISHED
    • Data_Susanne_16S_UNPUBLISHED
    • Data_Svenja_RSV_Probe3_PUBLISHING (*)
    • Data_Ute
    • HD04-1.fasta
    • m_aepfelbacher_DEL
    • RNAHiSwitch
    • RNAHiSwitch_
    • RNAHiSwitch__
    • RNAHiSwitch___
    • RNAHiSwitch_milestone1
    • RNAHiSwitch_milestone1_DELETED
    • RNAHiSwitch_paper
    • RNAHiSwitchpaper
    • RNAHiSwitch_paper_DELETED
    • RNAHiSwitch_paper.tar.gz
    • ST772_DEL
    • ‘System Volume Information’
    • Ute_miRNA_results_38
    • Ute_RNASeq_results
  3. Elements1 (An14_RNAs)

    • ‘$RECYCLE.BIN’
    • chromhmm-enhancers
    • Data_16S_arckNov
    • Data_16S_arckNov_re
    • Data_16S_arckNov_review_PUBLISHED (*)
    • Data_Anna10_RP62A
    • Data_Anna14_RNASeq_plus_public (#)
    • Data_Anna14_RNASeq_to_be_DEL
    • Data_Anna_Cutibacterium_acnes_DEL
    • Data_ChIPSeq_Laura
    • Data_Denise_LTtrunc_Methylation
    • Data_Denise_sT_Methylation
    • Data_Hannes_ChIPSeq
    • Data_Holger_Klebsiella_pneumoniae_SNP_PUBLISHING (*)
    • Data_Kieler_Sepi_Staemme
    • Data_Nicole12_16S_Kluwe_Bunders
    • Data_Pietschmann_RSV_Probe2_PUBLISHED (*)
    • Data_RNA188_Paul_Becher
    • Data_Silvia_RNASeq_SUBMISSION
    • Data_Susanne_Amplicon_RdRp_orf1_2
    • Data_Tabea_RNASeq
    • Data_Tabea_RNASeq_submission
    • Fastqs
    • host_refs
    • j_huang_raw_fq
    • nr_gz_README
    • ‘System Volume Information’
    • tmp
    • Vraw
  4. Elements (Anna C.arnes)

    • ‘$RECYCLE.BIN’
    • ChIPSeq_pipeline_desc.docx
    • ChIPSeq_pipeline_desc.fodt
    • ChIPSeq_pipeline_desc.pdf
    • Data_16S_Leonie_from_Nico_Gaglianis
    • Data_Anna_C.acnes_PUBLISHED (*#)
    • Data_Denise_LT_DNA_Bindung
    • Data_Denise_LT_K331A_RNASeq
    • Data_Denise_RNASeq_GSE79958
    • Data_Luise_Epidome_batch1
    • Data_Luise_Epidome_batch2
    • Data_Luise_Epidome_test
    • Data_Luise_Pseudomonas_aeruginosa_PUBLISHED (*)
    • Data_Nicole6_HEV_benchmark
    • Data_Nicole6_HEV_Swantje1_blood
    • Data_Susanne_spatialRNA_2022.9.1_backup
    • Data_Swantje_HEV_using_viral-ngs
    • Data_Tam_RNASeq3
    • Fastqs_19-21
    • picrust2_out_2024_2
    • RNASeq_pipeline_desc.docx
    • RNASeq_pipeline_desc.pdf
    • ‘System Volume Information’
    • VIPER_static_DEL
  5. Elements (j_huang_until_201904)

    • ‘$RECYCLE.BIN’
    • bacteria_refseq
    • bacteria_refseq.zip
    • Data_2019_April
    • Data_2019_August
    • Data_2019_July
    • Data_2019_June
    • Data_2019_May
    • Data_2019_September
    • Data_Becher_Damian_Picornavirus_BovHepV
    • Data_Laura_MP_RNASeq
    • Data_Nicole6_HEV_Swantje2
    • Data_Rotavirus
    • Data_Rotavirus_DEL
    • Data_Song_RNASeq_PUBLISHED (*)
    • Data_Xiaobo_10x
    • ‘Install Western Digital Software for Mac.dmg’
    • ‘Install Western Digital Software for Windows.exe’
    • j_huang_until_201904 (#)
    • sage_jhuang_DEL
    • ‘System Volume Information’
  6. Elements (Indra HAPDICS)

    • ‘$RECYCLE.BIN’
    • align_assem_res_DEL
    • Data_Anna11_Pair1-6_P6
    • Data_Anna11_Sepdermidis_DEL
    • Data_Anna12_HAPDICS_HyAsP (#)
    • Data_Anna_HAPDICS_review (#)
    • Data_Anna_HAPDICS_RNASeq_rawdata (#)
    • Data_Gunnar_MS
    • Data_Holger_Klebsiella_pneumoniae_SNP_PUBLISHING (*)
    • Data_Indra0_pioneer (#)
    • Data_Indra10_K9me3S10ph_r2 (#)
    • Data_Indra2_RNASeq (#)
    • Data_Indra3_H3K4me3 (#)
    • Data_Indra3_H3K4me3_incomplete_DEL (#)
    • Data_Indra4_H3K27me3 (#)
    • Data_Indra5_H3K27ac (#)
    • Data_Indra7_H3K4me1 (#)
    • Data_Indra8_H3K4me3 (#)
    • Data_Indra9_K9me3S10ph_r1 (#)
    • Data_Indra_H3K27ac_public (#)
    • Data_Indra_H3K4me3_public (#)
    • Data_Indra_RNASeq_GSM2262901 (#)
    • Data_Indra_TypeI_IFNs_and_TNF (#)
    • Data_Laura1_Indra2_RNASeq (#)
    • Data_Marie_Indra_H3S10 (#)
    • data_overview.txt
    • EXCHANGE_DEL
    • HAPDICS_hyasp_plasmids (#)
    • HD04 (#)
    • HD15_with_10 (#)
    • HD15_without_10 (#)
    • HD17 (#)
    • HD21 (#)
    • HD25 (#)
    • HD26 (#)
    • HD31 (#)
    • HD33 (#)
    • HD39 (#)
    • HD43 (#)
    • HD46 (#)
    • HD59 (#)
    • Linux_DELLWorkstation_C_Users_indbe_VirtualBoxVMs
    • m_aepfelbacher
    • m_aepfelbacher.zip
    • ‘System Volume Information’
    • UKE_DELLWorkstation_C_Users_indbe_Desktop
  7. Seagate 1

    • ‘$RECYCLE.BIN’
    • 2017-18_raw_data
    • batch_200314_incomplete
    • batch_200319
    • batch_200325
    • ChromHMM_Dir
    • chromhmm-enhancers
    • Data_Anna12_HAPDICS_final
    • Data_Arck_MeDIP
    • Data_Denise_ChIPSeq_Protocol1
    • Data_Denise_sT_H3K27me3
    • Data_Denise_sT_H3K4me3
    • Data_ENNGS_pathogen_detection_pipeline_comparison
    • Data_Laura0
    • Data_Laura_16S
    • Data_Laura_16S_2
    • Data_Laura_16S_2_re
    • Data_Laura_16S_2re
    • Data_Laura_16S_merged
    • Data_Laura_ChIPseq_GSE120945
    • Data_Laura_plasmid
    • Data_Martin_mycoplasma
    • Data_Nicola_Gagliani
    • Data_Nicole16_parapoxvirus
    • Data_Nicole_16S_Christmas_2020
    • Data_Nicole_16S_Christmas_2020_2
    • Data_Petra_Arck
    • Downloads_2021-01-18_DEL
    • Downloads_DEL
    • GAMOLA2_prototyp
    • j_huang_201904_202002
    • j_huang_202007_202012
    • m_aepfelbacher.zip
    • m_error_DEL
    • Project_h_rohde_Susanne_WGS_unbiased_DEL.zip
    • RNA_seq_analysis_tools_2013
    • Seagate
    • Start_Here_Mac.app
    • ‘System Volume Information’
    • trimmed
  8. Seagate 2

    • ‘$RECYCLE.BIN’
    • 201030_M03701_0207_000000000-J57B4.zip
    • 91.orf
    • 91.orf.fai
    • 91.pep
    • ALL
    • ALL83
    • Autorun.inf
    • Data_Anna12_HAPDICS_raw_data_shovill_prokka
    • Data_Anna_HAPDICS_RNASeq
    • Data_Anna_HAPDICS_WGS_ALL
    • Data_Christopher_MeDIP_MMc_PUBLISHED (*)
    • Data_Denise_RNASeq
    • Data_Denise_RNASeq_trimmed_DEL
    • Data_Gunnar_Yersiniomics_IMCOMPLETE_DEL
    • Data_HEV
    • Data_HEV_Freiburg_2020
    • Data_Manthey_16S
    • Data_Nicole_16S_Hamburg_Odense_Cornell_Muenster
    • Data_Nicole4_TH17
    • Data_Nicole_HDV_Recombination_PUBLISHED (*)
    • Data_Pietschmann_RSV_Probe_PUBLISHED (*)
    • Data_Susanne_Carotis_RNASeq_PUBLISHING (*)
    • dgaston-dec-06-2012-121211124858-phpapp01.pdf
    • dna2.fasta.fai
    • f1_R1_link.sh
    • f1_R2_link.sh
    • fastq_HPI_bw_2019_08_and_2020_02
    • GSE128169_family.soft.gz
    • GSE128169_series_matrix.txt.gz
    • HD04.infection.hS_vs_HD04.nose.hS_annotated_degenes.xls
    • HD12
    • HyAsP_bold
    • HyAsP_complete_genomes
    • HyAsP_incomplete_genomes
    • HyAsP_normal
    • HyAsP_normal_sampled_input
    • m_aepfelbacher.zip
    • ppat.1009304.s016.tif
    • Qi_panGenome
    • README
    • ‘README(1)’
    • rtpd_files
    • rtpd_files_DEL
    • sam2bedgff.pl
    • Seagate
    • ‘sequence(1).txt’
    • sequence.txt
    • Seq_VRE_hybridassembly
    • s_hero2x
    • Start_Here_Mac.app
    • Start_Here_Win.exe
    • ‘System Volume Information’
    • tileshop.fcgi
    • video.zip
    • Warranty.pdf
  9. Seagate 3 (/home/jhuang backup DATA_COPY_FROM_hamburg)

    • Autorun.inf
    • DATA_COPY_DEL
    • DATA_COPY_FROM_hamburg
    • Seagate
    • Start_Here_Mac.app
    • Start_Here_Win.exe
    • Warranty.pdf
  10. Seagate 4 (ERROR)

  11. Smarty

    • ALIGN_ASSEM
    • Blast_db
    • Data_16S_Degenhardt_Marius_DEL
    • Data_Emilia_MeDIP
    • Data_Gunnar_Yersiniomics_DEL
    • Data_Manja_RNAseq_Organoids_Virus
    • Data_Paul_Staphylococcus_epidermidis
    • Data_Susanne_spatialRNA
    • Data_Ute_RNA_3
    • DjangoApp_Backup_2023-10-30
    • lost+found
    • ref
    • temporary_files_DEL
  12. Volume (466G)

    • Data_Indra3_H3K4me3_results_picard
    • Data_Indra4_H3K27me3_results_picard
    • Data_Indra5_H3K27ac_results_picard
    • finding_gap_files_2023
    • HOME_FREIBURG
    • ls: cannot access ‘results_H3K27ac’: Input/output error
  13. DATA_Intenso

    • 150810_M03701_0019_000000000-AFJFK
    • chipseq
    • ChipSeq_Raw_Data3_171009_NB501882_0024_AHNGTYBGX3
    • CLC_Data
    • DATA (empty? if yes, delete it!)
    • Data_Anna1_1585_RNAseq
    • Data_Anna2_CO6114
    • Data_Anna3_VRE_Ausbruch
    • Data_Anna4_SNP
    • Data_Anna5_SNP
    • Data_Anna6_RNASeq
    • Data_Anna7_RNASeq_Cytoscape
    • Data_Anna8_RNASeq_static_shake_deprecated
    • Data_Carolin1_16S
    • Data_Gagliani1_18S_16S
    • Data_Gagliani2_enriched_16S
    • Data_Laura
    • Data_Laura_2
    • Data_Laura_3
    • Data_Nicole10_16S_interlab
    • Data_Nicole1_Tropheryma_whipplei
    • Data_Nicole3_TH17_orig
    • Data_Nicole7_Anelloviruses_Polyomavirus
    • Data_Nicole8_Lamprecht
    • Data_Nicole9_Hund_Katze_Mega
    • Data_Nina1_merged
    • Data_Nina1_Nicole5_1-76
    • Data_SPANDx1_Kpneumoniae_vs_Assembly1
    • Data_Susanne_WGS_3amplicons
    • Data_Thaiss1_Microarray
    • Data_Thaiss2_Microarray
    • Downloads
    • Downloads2
    • Downloads_DEL
    • Fastqs
    • galaxy_tools
    • HOME_FREIBURG_DEL
    • m_aepfelbacher
    • m_aepfelbacher_DEL.zip
    • MauveOutput
    • mom-baby_com_cn
    • PAPERS
    • PENDRIVE_cont
    • results_K27
    • TB
    • ‘VirtualBox VMs’
    • VirtualBox_VMs
    • ‘VirtualBox VMs2’
    • ‘VirtualBox VMs2_DEL’
    • ‘VirtualBox VMs_DEL’
    • VirtualBox_VMs_DEL
    • websites
    • Work_Dir2
    • Work_Dir2_SGE
    • Work_Dir_dM_broad_mockinput
    • Work_Dir_dP_broad_mockinput
    • Work_Dir_mock_broad_mockinput
    • Work_Dir_WAC_broad_mockinput
    • Work_Dir_WAP_broad_mockinput
  14. DATA

    • Data_Biobakery
    • Data_Damian
    • Data_Jiline_Transposon
    • Data_Jiline_Transposon2
    • Data_Jiline_Yersinia_SNP
    • Data_Luise_Sepi_STKN
    • Data_Marius_16S
    • Data_Nicole_CRC1648
    • Data_Patricia_Sepi_5Samples
    • Data_PaulBongarts_S.epidermidis_HDRNA
    • Data_Susanne_MPox
    • Data_Tam_ABAYE_RS05070_on_A_calcoaceticus_baumannii_complex_DUPLICATED_DEL
    • Data_Xiaobo_10x_2
    • Data_Xiaobo_10x_3
    • Mouse_HS3ST1_12175_out
    • Mouse_HS3ST1_12373_out
    • Talk_Nicole_CRC1648
    • Talks_Bioinformatics_Meeting
    • Talks_including_DEEP-DV
    • Talks_resources
  15. DATA_A

    • Data_Damian_NEW_CREATED
    • Data_Nicole8_Lamprecht_new_PUBLISHED
    • Data_R_bubbleplots
    • Data_Samira_RNAseq
    • Data_Ute_TRANSFERED_DEL
    • Paper_Target_capture_sequencing_MHH_PUBLISHED
  16. DATA_B

    • Antraege_
    • Data_16S_Nicole_210222
    • Data_Adam_Influenza_A_virus
    • Data_Anna_Efaecium_assembly
    • Data_Bactopia
    • Data_Ben_RNAseq
    • Data_DAMIAN_endocarditis_encephalitis
    • Data_Denise_sT_PUBLISHING
    • Data_Django
    • Data_Fran2_16S_func
    • Data_Gunnar_Yersiniomics
    • Data_Holger_5179-R1_vs_5179
    • Data_Holger_MT880870_MT880872_Annotation
    • Data_Holger_S.epidermidis_1585_5179_HD05
    • Data_Johannes_PIV3
    • Data_Luise_Epidome_longitudinal_nose
    • Data_Manja_Hannes_Probedesign
    • Data_Manja_RNAseq_Organoids
    • Data_Manja_RNAseq_Organoids_Merged
    • Data_Manja_RNAseq_Organoids_Virus
    • Data_Marc_AD_PUBLISHING
    • Data_Marc_RNA-seq_Saureus_Review
    • Data_Nicole_16S
    • Data_Nicole_cfDNA_pathogens
    • Data_Ring_and_CSF_PegivirusC_DAMIAN
    • Data_Soeren_RNA-seq_2022
    • Data_Song_Microarray
    • Data_Susanne_Carotis_MS
    • Data_Susanne_Omnikron
    • Data_Viro
    • Doktorarbeit
    • Poster_Rohde_20230724
  17. DATA_C

    • 16304905.fasta
    • ’16S data manuscript_NF.docx’
    • 180820_2_supp_4265595_sw6zjk.docx
    • 180820_2_supp_4265596_sw6zjk.docx
    • 1a_vs_3.csv
    • 2014SawickaBBA.pdf
    • 20160509Manuscript_NDM_OXA_mitKomm.doc
    • 2022-10-27_IRI_manuscript_v03_JH.docx
    • ‘20221129 Table mutations.docx’
    • ‘2.05.01.05-A01 Urlaubsantrag-Shuting-beantragt.pdf’
    • 220607_Agenda_monthly_meeting.pdf
    • 230602_NB501882_0428_AHKG53BGXT.zip
    • 362383173.rar
    • 3932-Leber
    • 562.9459.1.fa
    • 562.9459.1_rc.fa
    • ‘add. Figures Hamburg_UKE.pptx’
    • align_4l_on_FJ705359
    • align_4p_on_FJ705359
    • all_gene_counts_with_annotation.xlsx
    • All_indels_annotated_vHR.xlsx
    • ‘Amplikon_indeces_Susanne +groups.xlsx’
    • Amplikon_indeces_Susanne.xlsx
    • app_flask.py
    • ASA3P.pdf
    • assembly
    • Astrovirus.pdf
    • Aufnahmeantrag_komplett_10_2022.pdf
    • Bacterial_pipelines.txt
    • bacto
    • bam2fastq_mapping_again
    • bengal3_ac3.yml
    • bengal3ac3.yml
    • bengal-bay-0.1.json
    • Biopython
    • BioPython
    • call_shell_from_Ruby.png
    • ChIPSeq_pipeline_desc.docx
    • ChIPSeq_pipeline_desc.pdf
    • chipster
    • coefficients_csaw_vs_diffreps.xlsx
    • COMMANDS
    • Comparative_genomic_analysis_of_eight_novel_haloal.pdf
    • COMPSRA_uke_DEL.jar
    • ‘Copy of pool_b1_CGATGT_300.xlsx’
    • CvO_Klassenliste_7_3.pdf
    • damian_GUI
    • damian_nodbs
    • Data_16S_Arck_vaginal_stool
    • Data_16S_benchmark
    • Data_16S_benchmark2
    • Data_16S_Birgit
    • Data_16S_BS052
    • Data_16S_Christner
    • Data_16S_gcdh_BKV
    • Data_16S_Leonie
    • Data_16S_PatientA-G_CSF
    • Data_16S_Schaltenberg
    • Data_Alex1_Amplicon
    • Data_Alex1_SNP
    • Data_Analysis_for_Life_Science
    • Data_Anastasia_RNASeq_PUBLISHING
    • Data_Anna12_HAPDICS_final
    • Data_Anna13_vanA-Element
    • Data_Anna14_PACBIO_methylation
    • Data_Anna_C.acnes2_old_DEL
    • Data_Anna_gap_filling_agrC
    • Data_Anna_Kieler_Sepi_Staemme
    • Data_Anna_MT880872_update
    • Data_Baechlein_Hepacivirus_2018
    • Data_Bornavirus
    • Data_Christine_cz19-178-rothirsch-bovines-hepacivirus
    • Data_Christopher_MeDIP_MMc_published
    • Data_ChristophFR_HepE_published
    • Data_CSF
    • Data_Daniela_adenovirus_WGS
    • Data_Emilia_MeDIP
    • Data_Emilia_MeDIP_DEL
    • Data_Francesco2021_16S
    • Data_Francesco2021_16S_re
    • Data_Gunnar_MS
    • Data_Hannes_ChIPSeq
    • Data_Hannes_RNASeq
    • Data_Holger_Efaecium_variants_PUBLISHED
    • Data_Holger_Pseudomonas_aeruginosa_SNP
    • Data_Holger_VRE
    • Data_Holger_VRE_DEL
    • Data_Icebear_Damian
    • Data_Indra3_H3K4_p2_DEL
    • Data_Indra6_RNASeq_ChipSeq_Integration_DEL
    • Data_Indra_Figures
    • Data_Indra_RNASeq_GSM2262901
    • Data_Jingang
    • Data_jupnote
    • Data_KatjaGiersch_new_HDV
    • Data_Manja_RPAChIPSeq_public
    • Data_Manuel_WGS_Yersinia
    • Data_Manuel_WGS_Yersinia2_DEL
    • Data_Manuel_WGS_Yersinia_DEL
    • Data_Marcus_tracrRNA_structures
    • Data_Mausmaki_Damian
    • Data_methylome_MMc
    • Data_MHH_Encephalitits_DAMIAN
    • Data_Nicola_Gagliani
    • Data_Nicola_Schaltenberg
    • Data_Nicola_Schaltenberg_PICRUSt
    • Data_Nicole1_Tropheryma_whipplei
    • Data_Nicole5
    • Data_Nicole5_77-92
    • Data_parainfluenza
    • Data_PaulBecher_Rotavirus
    • Data_Paul_Staphylococcus_epidermidis
    • Data_Pietschmann_HCV_Amplicon_bigFile
    • Data_Piscine_Orthoreovirus_3_in_Brown_Trout
    • Data_Proteomics
    • Data_R_courses
    • Data_RNABioinformatics
    • Data_RNAKinetics
    • Data_SARS-CoV-2
    • Data_SARS-CoV-2_Genome_Announcement_PUBLISHED
    • Data_Seite
    • Data_snakemake_recipe
    • Data_Song_aggregate_sum
    • Data_Susanne_Amplicon_haplotype_analyses_RdRp_orf1_2_re
    • Data_Susanne_Amplicon_RdRp_orf1_2_re
    • Data_Susanne_Carotis_spatialRNA_PUBLISHING
    • Data_Susanne_WGS_unbiased
    • Data_Tabea_RNASeq
    • Data_temp
    • Data_Thaiss1_Microarray_new
    • Data_Tintelnot_16S
    • Data_viGEN
    • Data_Wuenee_Plots
    • Data_Yang_Poster
    • DEEP-DV
    • DOKTORARBEIT
    • empty.fasta
    • enhancer-snakemake-demo
    • exchange.txt
    • exdata-data-NEI_data.zip
    • Fran_16S_Exp8-17-21-27.txt
    • GAMOLA2
    • genes_wac6_wap6.xls
    • Genomic_Data_Science
    • go1.13.linux-amd64.tar.gz.1
    • HEV_aligned.fasta
    • hev_p2-p5.fa
    • hg19_gene_annotations
    • hg19.rmsk.bed
    • Hotmail_to_Gmail
    • HPI_DRIVE
    • HPI_samples_for_NGS_29.09.22.xlsx
    • Indra_Thesis_161020.pdf
    • install_nginx_on_hamm
    • INTENSO_DIR
    • interlab_comparison_DEL
    • Learn_UGENE
    • LOG
    • LOG_p954_stat
    • ‘LT K331A.gbk’
    • Manuscript_10_02_2021.docx
    • Manuscript_Epigenetics_Macrophage_Yersinia
    • Manuscript_RNAHiSwitch
    • map_corrected_backup.txt
    • MeDIP_Emilia_copy_DEL
    • metadata-9563675-processed-ok.tsv
    • Metagenomics_Tools_and_Insights.pdf
    • Method_biopython
    • ‘Miseq Amplikon LAuf April.xlsx’
    • mkg_sprechstundenflyer_ver1b_dezember_2019.pdf
    • MMcPaper
    • multiqc_config.yaml
    • my_flask
    • Nachweis_Bakterien_Viren_im_Hochdurchsatz.pdf
    • Nanopore.handouts.pdf
    • NGS
    • NGS.tar.gz
    • Nicole8_Lamprecht_logs
    • ‘Norovirus paper Susanne 191105.docx’
    • Okazaki-Seq_Processing
    • p11326_OMIKRON3398_corsurv.gb
    • p11326_OMIKRON3398_corsurv.gb_converted.fna
    • pangenome-snakemake-master.zip
    • pangenome-snakemake_zhaoc1
    • papers
    • parseGenbank_reformat.py
    • PhyloRNAalifold.pdf
    • ‘phylo tree draft.pdf’
    • pool_b1_CGATGT_300.zip
    • pyflow-epilogos
    • qiime_params_backup.txt
    • qiime_params_s16_s18.txt
    • qiime_params.txt
    • Rawdata_Readme.pdf
    • raw_data_rnaseq_Indra
    • R_cats_package
    • R_DataCamp
    • README_R
    • README_RNAHiSwitch_DEL
    • results_description.html
    • rnaalihishapes.tar.gz
    • RNAConSLOptV1.2
    • RNAConSLOptV1.2.tar.gz
    • RNAHeliCes
    • RNA_li_HeliCes
    • RNAliHeliCes
    • RNAliHeliCes_Relatedshapes_modified
    • RNA-NGS_Analysis_modul3_NanoStringNorm
    • RNA-NGS_Analysis_modul3_NanoStringNorm.zip
    • rnaseq_length_bias.pdf
    • roentgenpass.pdf
    • R_refcard
    • ‘RSV GFP5 including 3`UTR.docx’
    • R_tutorials-master
    • R_tutorials-master.zip
    • salmon_tx2gene_chrHsv1.tsv
    • salmon_tx2gene_GRCh38.tsv
    • ‘sample IDs_Lamprecht.xlsx’
    • SERVER
    • SnakeChunks
    • Snakefile_list
    • snakePipes
    • SNPs_on_pangenome.txt
    • Source_Classification_Code.rds
    • S_staphylococcus_annotated_diff_expr.xls
    • SUB10826945_record_preview.txt
    • summarySCC_PM25.rds
    • Supplementary_Table_S3.xlsx
    • test_raw_data_dnaseq
    • test_raw_data_rnaseq
    • to_Francesco
    • tutorial-rnaseq.pdf
    • ukepipe
    • ukepipe_nf
    • UniproUGENE_UserManual.pdf
    • Untitled1.ipynb
    • Untitled2.ipynb
    • Untitled3.ipynb
    • Untitled.ipynb
    • untitled.py
    • var_www_DjangoApp_mysite2_2023-05
    • WAC6h_vs_WAP6h_down.txt
    • WAC6h_vs_WAP6h_up.txt
    • webapp.tar.gz
    • x.log
  18. DATA_D

    • Books_DA_for_Life

    • Data_Marc_RNA-seq_Sepidermidis -> /media/jhuang/Titisee/Data_Marc_RNA-seq_Sepidermidis

    • Data_Patricia_Transposon

    • Data_Paul_HD46_1-wt_resequencing

    • Data_Pietschmann_Mutations

    • Data_Samira_Manuscripts

    • Data_Sanam_DAMIAN

    • Data_Silvia_VoltRon_Debug

    • Datasize_calculation_based_on_coverage.txt

    • Data_Soeren_RNA-seq_2023_PUBLISHING -> /media/jhuang/Elements/Data_Soeren_RNA-seq_2023_PUBLISHING/

    • Data_Sven

    • Data_Svenja_RSV_Probe3_PUBLISHING -> /media/jhuang/Elements/Data_Svenja_RSV_Probe3_PUBLISHING

    • Data_Tam_variant_calling

    • Data_Ute -> /media/jhuang/Elements/Data_Ute/

    • 126.3.1.1.2.010.02 (enp0s31f6: 10.169.63.124); 126.3.1.1.2.010.03 (eno2: 10.169.63.115); 126.2.1.1.2.010.04 (enp4s0: 10.169.63.113)