gene_x 0 like s 498 view s
Tags: protein, pipeline
This step uses rsync to download data from the NCBI server to a local directory, save all gff-files in the directory prokka.
rsync --copy-links --recursive --times --verbose rsync://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/696/305/GCF_001696305.1_UCN72.1 Yersinia_pestis_1045
GCF_001656035.1_ASM165603v1_genomic.fna.gz
070 status=suppressed
jhuang@hamburg:~/DATA/Data_Gunnar_Yersiniomics$ cp data/Yersinia_pseudotuberculosis_PB1+/GCF_000020085.1_ASM2008v1/GCF_000020085.1_ASM2008v1_genomic.fna.gz assembly/Yersinia_pseudotuberculosis_PB1+.fna.gz
cp: cannot stat 'data/Yersinia_pseudotuberculosis_PB1+/GCF_000020085.1_ASM2008v1/GCF_000020085.1_ASM2008v1_genomic.fna.gz': No such file or directory
088
jhuang@hamburg:~/DATA/Data_Gunnar_Yersiniomics$ cp data/Yersinia_pseudotuberculosis_YPIII/GCF_000019465.1_ASM1946v1/GCF_000019465.1_ASM1946v1_genomic.fna.gz assembly/Yersinia_pseudotuberculosis_YPIII.fna.gz
cp: cannot stat 'data/Yersinia_pseudotuberculosis_YPIII/GCF_000019465.1_ASM1946v1/GCF_000019465.1_ASM1946v1_genomic.fna.gz': No such file or directory
#status=latest
for sample in Yersinia_pestis_1045 Yersinia_pestis_SCPM-O-B-6291_C-25 Yersinia_pestis_2944 Yersinia_pestis_KIM10+ Yersinia_pestis_M-1482 Yersinia_pestis_KIM5 Yersinia_pestis_C-781 Yersinia_pestis_14D Yersinia_pestis_KM_567 Yersinia_pestis_M-1770 Yersinia_pestis_C-792 Yersinia_pestis_M2086 Yersinia_pestis_Harbin_35 Yersinia_pestis_Nicholisk_41 Yersinia_pestis_Harbin_35_bis Yersinia_pestis_SCPM-O-B-5935_I-1996 Yersinia_pestis_I-1252 Yersinia_pestis_FDAARGOS_603 Yersinia_pestis_195P Yersinia_pestis_Nepal516 Yersinia_pestis_S19960127 Yersinia_pestis_SCPM-O-B-6530 Yersinia_pestis_C-783 Yersinia_pestis_A1122 Yersinia_pestis_Cadman Yersinia_pestis_A1122_bis Yersinia_pestis_CO92_pgm-_pPCP1- Yersinia_pestis_CO92 Yersinia_pestis_Shasta Yersinia_pestis_Dodson Yersinia_pestis_El_Dorado Yersinia_pestis_EV76-CN Yersinia_pestis_EV_NIIEG Yersinia_pestis_Java9 Yersinia_pestis_PBM19 Yersinia_pestis_20 Yersinia_pestis_D182038 Yersinia_pestis_D106004 Yersinia_pestis_Z176003 Yersinia_pestis_Antiqua_bis Yersinia_pestis_FDAARGOS_601 Yersinia_pestis_Antiqua Yersinia_pestis_Nairobi Yersinia_pestis_M2085 Yersinia_pestis_SCPM-O-B-5942_I-2638 Yersinia_pestis_M2029 Yersinia_pestis_SCPM-O-DNA-18_I-3113 Yersinia_pestis_94 Yersinia_pestis_R Yersinia_pestis_790 Yersinia_pestis_SCPM-O-B-6899_231 Yersinia_pestis_FDAARGOS_602 Yersinia_pestis_Pestoides_B Yersinia_pestis_M-1974 Yersinia_pestis_91001 Yersinia_pestis_Angola Yersinia_pestis_Angola_bis Yersinia_pestis_3770 Yersinia_pestis_1412 Yersinia_pestis_1413 Yersinia_pestis_8787 Yersinia_pestis_3067 Yersinia_pestis_Pestoides_G Yersinia_pestis_Pestoides_F Yersinia_pestis_Pestoides_F_bis Yersinia_pestis_1522 Yersinia_pseudotuberculosis_FDAARGOS_582 Yersinia_pseudotuberculosis_NZYP4713 Yersinia_pseudotuberculosis_NCTC8480 Yersinia_pseudotuberculosis_PB1+_bis Yersinia_pseudotuberculosis_MD67 Yersinia_pseudotuberculosis_NCTC10217 Yersinia_pseudotuberculosis_NCTC10275 Yersinia_pseudotuberculosis_1 Yersinia_pseudotuberculosis_IP32953 Yersinia_pseudotuberculosis_IP32953_bis Yersinia_pseudotuberculosis_FDAARGOS_583 Yersinia_pseudotuberculosis_FDAARGOS_581 Yersinia_pseudotuberculosis_ATCC_6904 Yersinia_pseudotuberculosis_EP2+ Yersinia_pseudotuberculosis_IP31758 Yersinia_pseudotuberculosis_598 Yersinia_pseudotuberculosis_PA3606 Yersinia_pseudotuberculosis_FDAARGOS_665 Yersinia_pseudotuberculosis_FDAARGOS_584 Yersinia_pseudotuberculosis_YPIII_bis Yersinia_pseudotuberculosis_FDAARGOS_579 Yersinia_pseudotuberculosis_IP2666pIB1 Yersinia_pseudotuberculosis_FDAARGOS_342 Yersinia_pseudotuberculosis_FDAARGOS_580 Yersinia_pseudotuberculosis_NCTC3571 Yersinia_similis_228 Yersinia_enterocolitica_NCTC13629 Yersinia_enterocolitica_MGYG-HGUT-02335 Yersinia_enterocolitica_Y1 Yersinia_enterocolitica_Y11 Yersinia_enterocolitica_NCTC13769 Yersinia_enterocolitica_FDAARGOS_1082 Yersinia_enterocolitica_2516-87 Yersinia_enterocolitica_KNG22703 Yersinia_enterocolitica_1055Rr Yersinia_enterocolitica_FDAARGOS_1090 Yersinia_enterocolitica_YE1 Yersinia_enterocolitica_YE3 Yersinia_enterocolitica_YE6 Yersinia_enterocolitica_YE7 Yersinia_enterocolitica_YE5 Yersinia_enterocolitica_YE165 Yersinia_enterocolitica_8081 Yersinia_enterocolitica_8081_bis Yersinia_enterocolitica_NCTC12982 Yersinia_enterocolitica_WA Yersinia_enterocolitica_NW57 Yersinia_enterocolitica_NW117 Yersinia_enterocolitica_NW51 Yersinia_enterocolitica_NW56 Yersinia_enterocolitica_NW115 Yersinia_enterocolitica_NW67 Yersinia_enterocolitica_FORC_002 Yersinia_enterocolitica_FORC_002_bis Yersinia_enterocolitica_NW66 Yersinia_enterocolitica_MP98 Yersinia_enterocolitica_Gp259 Yersinia_enterocolitica_FORC066 Yersinia_enterocolitica_Gp2 Yersinia_enterocolitica_str_YE5303 Yersinia_enterocolitica_Gp200 Yersinia_enterocolitica_NW116 Yersinia_enterocolitica_Gp169 Yersinia_enterocolitica_NW1 Yersinia_enterocolitica_FORC065 Yersinia_frederiksenii_Y225 Yersinia_kristensenii_Y231 Yersinia_rochesterensis_ATCC_33639 Yersinia_rochesterensis_ATCC_BAA-2637 Yersinia_intermedia_SCPM-O-B-9106_C-191 Yersinia_kristensenii_2012N-4030 Yersinia_hibernica_CFS1934 Yersinia_hibernica_LC20 Yersinia_canariae_NCTC_14382 Yersinia_frederiksenii_FDAARGOS_418 Yersinia_alsatica_SCPM-O-B-7604 Yersinia_rohdei_YRA Yersinia_massiliensis_GTA Yersinia_massiliensis_2011N-4075 Yersinia_frederiksenii_FDAARGOS_417 Yersinia_intermedia_SCPM-O-B-8026_C-146 Yersinia_sp_KBS0713 Yersinia_bercovieri_ATCC_43970 Yersinia_aleksiciae_159 Yersinia_mollaretii_ATCC_43969 Yersinia_intermedia_FDAARGOS_729 Yersinia_intermedia_FDAARGOS_730 Yersinia_intermedia_NCTC11469 Yersinia_intermedia_FDAARGOS_358 Yersinia_sp_FDAARGOS_228 Yersinia_intermedia_Y228 Yersinia_intermedia_N6293 Yersinia_intermedia_SCPM-O-B-10209_333 Yersinia_aldovae_670-83 Yersinia_ruckeri_NHV_3758 Yersinia_ruckeri_NVI-10705 Yersinia_ruckeri_NVI-1292 Yersinia_ruckeri_NVI-4570 Yersinia_ruckeri_NVI-6614 Yersinia_ruckeri_NVI-11267 Yersinia_ruckeri_NVI-11294 Yersinia_ruckeri_NVI-10571 Yersinia_ruckeri_NVI-8524 Yersinia_ruckeri_NVI-1176 Yersinia_ruckeri_NVI-701 Yersinia_ruckeri_17Y0412 Yersinia_ruckeri_17Y0414 Yersinia_ruckeri_NVI-492 Yersinia_ruckeri_NVI-9681 Yersinia_ruckeri_SC09 Yersinia_ruckeri_17Y0157 Yersinia_ruckeri_17Y0189 Yersinia_ruckeri_17Y0153 Yersinia_ruckeri_17Y0155 Yersinia_ruckeri_KMM821 Yersinia_ruckeri_16Y0180 Yersinia_ruckeri_NVI-11050 Yersinia_ruckeri_NVI-11076 Yersinia_ruckeri_QMA0440 Yersinia_ruckeri_Big_Creek_74 Yersinia_ruckeri_NVI-5089 Yersinia_ruckeri_NVI-10587 Yersinia_ruckeri_NVI-4840 Yersinia_ruckeri_NVI-4479 Yersinia_ruckeri_17Y0161 Yersinia_ruckeri_17Y0163 Yersinia_ruckeri_NVI-11073 Yersinia_ruckeri_NVI-11065 Yersinia_ruckeri_17Y0159 Yersinia_ruckeri_NVI-8270 Yersinia_ruckeri_YRB Yersinia_entomophaga_MH96; do
mlst ${sample}.fna >> ../mlst/all.txt;
done
#gene-M486_RS20950
#M486_RS20950
#extract CDS with locus_tag from genbank file
#cut -d' ' -f1 ../assembly/${sample}.fna > ../assembly/${sample}.fasta;
#cat ${sample}.gff ../assembly/${sample}.fasta > ../prokka_plus/$(echo $sample | cut -d'_' -f3- | tr " " "_").gff;
#sed -i 's/###/##FASTA/g' ../prokka_plus/$(echo $sample | cut -d'_' -f3- | tr " " "_").gff;
(important since only with the modification we can track the Gene ID) The step processes GFF files containing gene annotations for a set of samples in the directory prokka. The primary goal is to modify the GFF files and create new ones with specific changes and to save them in the directory prokka_plus. The script operates on each sample one by one, and for each sample, it performs the following steps:
* Replace all occurrences of \tCDS\t with _CDS_ in the original GFF file.
* Extract all lines containing _CDS_ and save them in a new file with the suffix _CDS.gff.
* Replace all occurrences of ID= with ID_old= in the new _CDS.gff file.
* Cut the second field (delimited by ;) from the _CDS.gff file and save it in a new file with the suffix _CDS_f2.
* Replace all occurrences of Parent=gene- with ID= in the _CDS_f2 file.
* Paste the contents of the _CDS.gff and _CDS_f2 files side by side, with a ; delimiter, and save the result in a new file with the suffix _CDS_.gff.
* Run the enum.py script on the _CDS_.gff file to add line numbers at the end, and save the result in a new file with the suffix _CDS__.gff.
import sys
if len(sys.argv) < 2:
print("Please provide a filename as an argument.")
sys.exit(1)
filename = sys.argv[1]
try:
with open(filename) as f:
for i, line in enumerate(f):
print(f"{line.strip()}_{i+1}")
except FileNotFoundError:
print(f"File {filename} not found.")
* Extract all lines from the original GFF file that do not contain _CDS_ and save them in a new file with the suffix _nonCDS.gff.
* Remove all lines containing ### from the _nonCDS.gff file and save the result in a new file with the suffix _nonCDS_.gff.
* Concatenate the contents of the _nonCDS_.gff and _CDS__.gff files and save the result in a new file with the suffix _nonCDS_CDS.gff.
* Replace all occurrences of _CDS_ with \tCDS\t in the _nonCDS_CDS.gff file.
* Append the string ##FASTA to the end of the _nonCDS_CDS.gff file.
* Modify the FASTA file associated with the sample by replacing the first field (delimited by a space) with the corresponding sample name.
* Concatenate the modified GFF file (_nonCDS_CDS.gff) and the modified FASTA file, and save the result in the ../prokka_plus/ directory with a new name based on the sample name.
* After processing all samples, the script removes intermediate files generated during the process.
# ERROR: Input file contains duplicate gene IDs, attempting to fix by adding a unique suffix, new GFF in the fixed_input_files directory: /mnt/Samsung_T5/Data_Gunnar_Yersiniomics/prokka_plus/1045.gff
#To Debug the error above, perform the data as follows.
for sample in Yersinia_pestis_1045 Yersinia_pestis_SCPM-O-B-6291_C-25 Yersinia_pestis_2944 Yersinia_pestis_KIM10+ Yersinia_pestis_M-1482 Yersinia_pestis_KIM5 Yersinia_pestis_C-781 Yersinia_pestis_14D Yersinia_pestis_KM_567 Yersinia_pestis_M-1770 Yersinia_pestis_C-792 Yersinia_pestis_M2086 Yersinia_pestis_Harbin_35 Yersinia_pestis_Nicholisk_41 Yersinia_pestis_Harbin_35_bis Yersinia_pestis_SCPM-O-B-5935_I-1996 Yersinia_pestis_I-1252 Yersinia_pestis_FDAARGOS_603 Yersinia_pestis_195P Yersinia_pestis_Nepal516 Yersinia_pestis_S19960127 Yersinia_pestis_SCPM-O-B-6530 Yersinia_pestis_C-783 Yersinia_pestis_A1122 Yersinia_pestis_Cadman Yersinia_pestis_A1122_bis Yersinia_pestis_CO92_pgm-_pPCP1- Yersinia_pestis_CO92 Yersinia_pestis_Shasta Yersinia_pestis_Dodson Yersinia_pestis_El_Dorado Yersinia_pestis_EV76-CN Yersinia_pestis_EV_NIIEG Yersinia_pestis_Java9 Yersinia_pestis_PBM19 Yersinia_pestis_20 Yersinia_pestis_D182038 Yersinia_pestis_D106004 Yersinia_pestis_Z176003 Yersinia_pestis_Antiqua_bis Yersinia_pestis_FDAARGOS_601 Yersinia_pestis_Antiqua Yersinia_pestis_Nairobi Yersinia_pestis_M2085 Yersinia_pestis_SCPM-O-B-5942_I-2638 Yersinia_pestis_M2029 Yersinia_pestis_SCPM-O-DNA-18_I-3113 Yersinia_pestis_94 Yersinia_pestis_R Yersinia_pestis_790 Yersinia_pestis_SCPM-O-B-6899_231 Yersinia_pestis_FDAARGOS_602 Yersinia_pestis_Pestoides_B Yersinia_pestis_M-1974 Yersinia_pestis_91001 Yersinia_pestis_Angola Yersinia_pestis_Angola_bis Yersinia_pestis_3770 Yersinia_pestis_1412 Yersinia_pestis_1413 Yersinia_pestis_8787 Yersinia_pestis_3067 Yersinia_pestis_Pestoides_G Yersinia_pestis_Pestoides_F Yersinia_pestis_Pestoides_F_bis Yersinia_pestis_1522 Yersinia_pseudotuberculosis_FDAARGOS_582 Yersinia_pseudotuberculosis_NZYP4713 Yersinia_pseudotuberculosis_NCTC8480 Yersinia_pseudotuberculosis_PB1+_bis Yersinia_pseudotuberculosis_MD67 Yersinia_pseudotuberculosis_NCTC10217 Yersinia_pseudotuberculosis_NCTC10275 Yersinia_pseudotuberculosis_1 Yersinia_pseudotuberculosis_IP32953 Yersinia_pseudotuberculosis_IP32953_bis Yersinia_pseudotuberculosis_FDAARGOS_583 Yersinia_pseudotuberculosis_FDAARGOS_581 Yersinia_pseudotuberculosis_ATCC_6904 Yersinia_pseudotuberculosis_EP2+ Yersinia_pseudotuberculosis_IP31758 Yersinia_pseudotuberculosis_598 Yersinia_pseudotuberculosis_PA3606 Yersinia_pseudotuberculosis_FDAARGOS_665 Yersinia_pseudotuberculosis_FDAARGOS_584 Yersinia_pseudotuberculosis_YPIII_bis Yersinia_pseudotuberculosis_FDAARGOS_579 Yersinia_pseudotuberculosis_IP2666pIB1 Yersinia_pseudotuberculosis_FDAARGOS_342 Yersinia_pseudotuberculosis_FDAARGOS_580 Yersinia_pseudotuberculosis_NCTC3571 Yersinia_similis_228 Yersinia_enterocolitica_NCTC13629 Yersinia_enterocolitica_MGYG-HGUT-02335 Yersinia_enterocolitica_Y1 Yersinia_enterocolitica_Y11 Yersinia_enterocolitica_NCTC13769 Yersinia_enterocolitica_FDAARGOS_1082 Yersinia_enterocolitica_2516-87 Yersinia_enterocolitica_KNG22703 Yersinia_enterocolitica_1055Rr Yersinia_enterocolitica_FDAARGOS_1090 Yersinia_enterocolitica_YE1 Yersinia_enterocolitica_YE3 Yersinia_enterocolitica_YE6 Yersinia_enterocolitica_YE7 Yersinia_enterocolitica_YE5 Yersinia_enterocolitica_YE165 Yersinia_enterocolitica_8081 Yersinia_enterocolitica_8081_bis Yersinia_enterocolitica_NCTC12982 Yersinia_enterocolitica_WA Yersinia_enterocolitica_NW57 Yersinia_enterocolitica_NW117 Yersinia_enterocolitica_NW51 Yersinia_enterocolitica_NW56 Yersinia_enterocolitica_NW115 Yersinia_enterocolitica_NW67 Yersinia_enterocolitica_FORC_002 Yersinia_enterocolitica_FORC_002_bis Yersinia_enterocolitica_NW66 Yersinia_enterocolitica_MP98 Yersinia_enterocolitica_Gp259 Yersinia_enterocolitica_FORC066 Yersinia_enterocolitica_Gp2 Yersinia_enterocolitica_str_YE5303 Yersinia_enterocolitica_Gp200 Yersinia_enterocolitica_NW116 Yersinia_enterocolitica_Gp169 Yersinia_enterocolitica_NW1 Yersinia_enterocolitica_FORC065 Yersinia_frederiksenii_Y225 Yersinia_kristensenii_Y231 Yersinia_rochesterensis_ATCC_33639 Yersinia_rochesterensis_ATCC_BAA-2637 Yersinia_intermedia_SCPM-O-B-9106_C-191 Yersinia_kristensenii_2012N-4030 Yersinia_hibernica_CFS1934 Yersinia_hibernica_LC20 Yersinia_canariae_NCTC_14382 Yersinia_frederiksenii_FDAARGOS_418 Yersinia_alsatica_SCPM-O-B-7604 Yersinia_rohdei_YRA Yersinia_massiliensis_GTA Yersinia_massiliensis_2011N-4075 Yersinia_frederiksenii_FDAARGOS_417 Yersinia_intermedia_SCPM-O-B-8026_C-146 Yersinia_sp_KBS0713 Yersinia_bercovieri_ATCC_43970 Yersinia_aleksiciae_159 Yersinia_mollaretii_ATCC_43969 Yersinia_intermedia_FDAARGOS_729 Yersinia_intermedia_FDAARGOS_730 Yersinia_intermedia_NCTC11469 Yersinia_intermedia_FDAARGOS_358 Yersinia_sp_FDAARGOS_228 Yersinia_intermedia_Y228 Yersinia_intermedia_N6293 Yersinia_intermedia_SCPM-O-B-10209_333 Yersinia_aldovae_670-83 Yersinia_ruckeri_NHV_3758 Yersinia_ruckeri_NVI-10705 Yersinia_ruckeri_NVI-1292 Yersinia_ruckeri_NVI-4570 Yersinia_ruckeri_NVI-6614 Yersinia_ruckeri_NVI-11267 Yersinia_ruckeri_NVI-11294 Yersinia_ruckeri_NVI-10571 Yersinia_ruckeri_NVI-8524 Yersinia_ruckeri_NVI-1176 Yersinia_ruckeri_NVI-701 Yersinia_ruckeri_17Y0412 Yersinia_ruckeri_17Y0414 Yersinia_ruckeri_NVI-492 Yersinia_ruckeri_NVI-9681 Yersinia_ruckeri_SC09 Yersinia_ruckeri_17Y0157 Yersinia_ruckeri_17Y0189 Yersinia_ruckeri_17Y0153 Yersinia_ruckeri_17Y0155 Yersinia_ruckeri_KMM821 Yersinia_ruckeri_16Y0180 Yersinia_ruckeri_NVI-11050 Yersinia_ruckeri_NVI-11076 Yersinia_ruckeri_QMA0440 Yersinia_ruckeri_Big_Creek_74 Yersinia_ruckeri_NVI-5089 Yersinia_ruckeri_NVI-10587 Yersinia_ruckeri_NVI-4840 Yersinia_ruckeri_NVI-4479 Yersinia_ruckeri_17Y0161 Yersinia_ruckeri_17Y0163 Yersinia_ruckeri_NVI-11073 Yersinia_ruckeri_NVI-11065 Yersinia_ruckeri_17Y0159 Yersinia_ruckeri_NVI-8270 Yersinia_ruckeri_YRB Yersinia_entomophaga_MH96; do
for sample in Yersinia_pestis_1045 Yersinia_pestis_SCPM-O-B-6291_C-25 Yersinia_pestis_2944 Yersinia_pestis_KIM10+ Yersinia_pestis_M-1482; do
sed -i 's/\tCDS\t/_CDS_/g' ${sample}.gff
grep "_CDS_" ${sample}.gff > ${sample}_CDS.gff
sed -i 's/ID=/ID_old=/g' ${sample}_CDS.gff
cut -d';' -f2 ${sample}_CDS.gff > ${sample}_CDS_f2
sed -i 's/Parent=gene-/ID=/g' ${sample}_CDS_f2
paste -d';' ${sample}_CDS.gff ${sample}_CDS_f2 > ${sample}_CDS_.gff
python enum.py ${sample}_CDS_.gff > ${sample}_CDS__.gff # add a line number to end to avoid the sameple Gene_ID
grep -v "_CDS_" ${sample}.gff > ${sample}_nonCDS.gff
grep -v "###" ${sample}_nonCDS.gff > ${sample}_nonCDS_.gff
cat ${sample}_nonCDS_.gff ${sample}_CDS__.gff > ${sample}_nonCDS_CDS.gff
sed -i 's/_CDS_/\tCDS\t/g' ${sample}_nonCDS_CDS.gff
echo "##FASTA" >> ${sample}_nonCDS_CDS.gff
cut -d' ' -f1 ../assembly/${sample}.fna > ../assembly/${sample}.fasta;
cat ${sample}_nonCDS_CDS.gff ../assembly/${sample}.fasta > ../prokka_plus/$(echo $sample | cut -d'_' -f3- | tr " " "_").gff;
done
rm *_CDS.gff *_CDS_f2 *_CDS_.gff *_CDS__.gff *_nonCDS.gff *_nonCDS_.gff
#for sample in Yersinia_pestis_1045 Yersinia_pestis_SCPM-O-B-6291_C-25 ...; do
#echo $sample | cut -d'_' -f3- | tr " " "_" >> temp
#done
After standand running of bacto-pipeline. Then we run Roary in the step, a tool for pan-genome analysis. It takes annotated bacterial genomes in GFF3 format as input and clusters the genes based on sequence similarity.
roary -p 4 -f ./roary -i 95 -cd 99 -s -e -n -v prokka_plus/1045.gff prokka_plus/SCPM-O-B-6291_C-25.gff prokka_plus/2944.gff prokka_plus/KIM10+.gff
roary -p 4 -f ./roary -i 50 -cd 99 -s -e -n -v prokka_plus/1045.gff prokka_plus/SCPM-O-B-6291_C-25.gff prokka_plus/2944.gff prokka_plus/KIM10+.gff prokka_plus/M-1482.gff prokka_plus/KIM5.gff prokka_plus/C-781.gff prokka_plus/14D.gff prokka_plus/KM_567.gff prokka_plus/M-1770.gff prokka_plus/C-792.gff prokka_plus/M2086.gff prokka_plus/Harbin_35.gff prokka_plus/Nicholisk_41.gff prokka_plus/Harbin_35_bis.gff prokka_plus/SCPM-O-B-5935_I-1996.gff prokka_plus/I-1252.gff prokka_plus/FDAARGOS_603.gff prokka_plus/195P.gff prokka_plus/Nepal516.gff prokka_plus/S19960127.gff prokka_plus/SCPM-O-B-6530.gff prokka_plus/C-783.gff prokka_plus/A1122.gff prokka_plus/Cadman.gff prokka_plus/A1122_bis.gff prokka_plus/CO92_pgm-_pPCP1-.gff prokka_plus/CO92.gff prokka_plus/Shasta.gff prokka_plus/Dodson.gff prokka_plus/El_Dorado.gff prokka_plus/EV76-CN.gff prokka_plus/EV_NIIEG.gff prokka_plus/Java9.gff prokka_plus/PBM19.gff prokka_plus/20.gff prokka_plus/D182038.gff prokka_plus/D106004.gff prokka_plus/Z176003.gff prokka_plus/Antiqua_bis.gff prokka_plus/FDAARGOS_601.gff prokka_plus/Antiqua.gff prokka_plus/Nairobi.gff prokka_plus/M2085.gff prokka_plus/SCPM-O-B-5942_I-2638.gff prokka_plus/M2029.gff prokka_plus/SCPM-O-DNA-18_I-3113.gff prokka_plus/94.gff prokka_plus/R.gff prokka_plus/790.gff prokka_plus/SCPM-O-B-6899_231.gff prokka_plus/FDAARGOS_602.gff prokka_plus/Pestoides_B.gff prokka_plus/M-1974.gff prokka_plus/91001.gff prokka_plus/Angola.gff prokka_plus/Angola_bis.gff prokka_plus/3770.gff prokka_plus/1412.gff prokka_plus/1413.gff prokka_plus/8787.gff prokka_plus/3067.gff prokka_plus/Pestoides_G.gff prokka_plus/Pestoides_F.gff prokka_plus/Pestoides_F_bis.gff prokka_plus/1522.gff prokka_plus/FDAARGOS_582.gff prokka_plus/NZYP4713.gff prokka_plus/NCTC8480.gff prokka_plus/PB1+_bis.gff prokka_plus/MD67.gff prokka_plus/NCTC10217.gff prokka_plus/NCTC10275.gff prokka_plus/1.gff prokka_plus/IP32953.gff prokka_plus/IP32953_bis.gff prokka_plus/FDAARGOS_583.gff prokka_plus/FDAARGOS_581.gff prokka_plus/ATCC_6904.gff prokka_plus/EP2+.gff prokka_plus/IP31758.gff prokka_plus/598.gff prokka_plus/PA3606.gff prokka_plus/FDAARGOS_665.gff prokka_plus/FDAARGOS_584.gff prokka_plus/YPIII_bis.gff prokka_plus/FDAARGOS_579.gff prokka_plus/IP2666pIB1.gff prokka_plus/FDAARGOS_342.gff prokka_plus/FDAARGOS_580.gff prokka_plus/NCTC3571.gff prokka_plus/228.gff prokka_plus/NCTC13629.gff prokka_plus/MGYG-HGUT-02335.gff prokka_plus/Y1.gff prokka_plus/Y11.gff prokka_plus/NCTC13769.gff prokka_plus/FDAARGOS_1082.gff prokka_plus/2516-87.gff prokka_plus/KNG22703.gff prokka_plus/1055Rr.gff prokka_plus/FDAARGOS_1090.gff prokka_plus/YE1.gff prokka_plus/YE3.gff prokka_plus/YE6.gff prokka_plus/YE7.gff prokka_plus/YE5.gff prokka_plus/YE165.gff prokka_plus/8081.gff prokka_plus/8081_bis.gff prokka_plus/NCTC12982.gff prokka_plus/WA.gff prokka_plus/NW57.gff prokka_plus/NW117.gff prokka_plus/NW51.gff prokka_plus/NW56.gff prokka_plus/NW115.gff prokka_plus/NW67.gff prokka_plus/FORC_002.gff prokka_plus/FORC_002_bis.gff prokka_plus/NW66.gff prokka_plus/MP98.gff prokka_plus/Gp259.gff prokka_plus/FORC066.gff prokka_plus/Gp2.gff prokka_plus/str_YE5303.gff prokka_plus/Gp200.gff prokka_plus/NW116.gff prokka_plus/Gp169.gff prokka_plus/NW1.gff prokka_plus/FORC065.gff prokka_plus/Y225.gff prokka_plus/Y231.gff prokka_plus/ATCC_33639.gff prokka_plus/ATCC_BAA-2637.gff prokka_plus/SCPM-O-B-9106_C-191.gff prokka_plus/2012N-4030.gff prokka_plus/CFS1934.gff prokka_plus/LC20.gff prokka_plus/NCTC_14382.gff prokka_plus/FDAARGOS_418.gff prokka_plus/SCPM-O-B-7604.gff prokka_plus/YRA.gff prokka_plus/GTA.gff prokka_plus/2011N-4075.gff prokka_plus/FDAARGOS_417.gff prokka_plus/SCPM-O-B-8026_C-146.gff prokka_plus/KBS0713.gff prokka_plus/ATCC_43970.gff prokka_plus/159.gff prokka_plus/ATCC_43969.gff prokka_plus/FDAARGOS_729.gff prokka_plus/FDAARGOS_730.gff prokka_plus/NCTC11469.gff prokka_plus/FDAARGOS_358.gff prokka_plus/FDAARGOS_228.gff prokka_plus/Y228.gff prokka_plus/N6293.gff prokka_plus/SCPM-O-B-10209_333.gff prokka_plus/670-83.gff prokka_plus/NHV_3758.gff prokka_plus/NVI-10705.gff prokka_plus/NVI-1292.gff prokka_plus/NVI-4570.gff prokka_plus/NVI-6614.gff prokka_plus/NVI-11267.gff prokka_plus/NVI-11294.gff prokka_plus/NVI-10571.gff prokka_plus/NVI-8524.gff prokka_plus/NVI-1176.gff prokka_plus/NVI-701.gff prokka_plus/17Y0412.gff prokka_plus/17Y0414.gff prokka_plus/NVI-492.gff prokka_plus/NVI-9681.gff prokka_plus/SC09.gff prokka_plus/17Y0157.gff prokka_plus/17Y0189.gff prokka_plus/17Y0153.gff prokka_plus/17Y0155.gff prokka_plus/KMM821.gff prokka_plus/16Y0180.gff prokka_plus/NVI-11050.gff prokka_plus/NVI-11076.gff prokka_plus/QMA0440.gff prokka_plus/Big_Creek_74.gff prokka_plus/NVI-5089.gff prokka_plus/NVI-10587.gff prokka_plus/NVI-4840.gff prokka_plus/NVI-4479.gff prokka_plus/17Y0161.gff prokka_plus/17Y0163.gff prokka_plus/NVI-11073.gff prokka_plus/NVI-11065.gff prokka_plus/17Y0159.gff prokka_plus/NVI-8270.gff prokka_plus/YRB.gff prokka_plus/MH96.gff
#DEL makeblastdb -in fna -dbtype 'nucl' -out fna.db
#DELblastn -db fna.db -query yopK.fasta -out yopK_on_fna.blastn -evalue 10000 -num_threads 15 -outfmt 6
generate yop*_seq.txt from roary: This step extracts the coding sequences (CDS) of specific genes from multiple genome files and saves them to an output file. Start-files: roary/pan_genome_reference.fa and roary/gene_presence_absence.csv. For example for yopM.
grep "yopM" roary/gene_presence_absence.csv
#6+19+45=70 --> 71
"yopM","","type III secretion system effector YopM","45","45","1","","","","","","1229","1229","1229","","M486_RS20920_3990","","M479_RS01070_4055","M480_RS01170_4076","","M481_RS01115_4071","","","","","","","","","","","","","LDH65_RS21345_4177","","","","","M478_RS01000_4055","M482_RS01070_4063","M483_RS00915_4013","","","M477_RS21610_4128","","","M484_RS01125_4011","","LDH63_RS21760_4259","","","","","","","","","","YPA_RS22550_4200","CH58_RS00945_4248","","","","","","YPO_RS00170_4130","AK38_RS00930_4114","BAY22_RS21640_4174","YPD4_RS21505_4104","YPD8_RS21525_4060","CH61_RS00195_4143","BZ20_RS00435_4174","M0M60_RS21870_4286","","CH46_RS00070_4122","","","","","","","","EGX53_RS00030_4033","EGX52_RS00260_4348","","","","","EGX46_RS00245_4205","","EGX74_RS00040_4070","","","","","","","","","","","","","YPC_RS21075_4024","CH55_RS00770_4123","","DN756_RS21785_4075","","","","CH62_RS00690_4176","","","CH44_RS00795_4078","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","CH63_RS00700_4106","","","CH59_RS00970_4231","","YPDSF_RS21140_4036","BZ18_RS00325_4042","CH43_RS00040_3994","","LDH64_RS21810_4270","S96127_RS00100_4096","","","GCK71_RS22420_4113","GD372_RS22475_4112","","DJY80_RS22415_4098","GCK69_RS22480_4113","","","","GCK70_RS22160_4053","BZ15_RS00325_4183","","","","","","","","","","","","","","","","YPZ3_RS21220_4056",""
"group_5673","yopM","type III secretion system effector YopM","19","19","1","","","","","","1103","1103","1103","","","YE105_RS20595_4018","","","","","","","","","","","","","","","","","","","","","CH48_RS00390_4060","","","","YP598_RS21115_4110","","","YE_RS21175_4135","CH49_RS00235_4177","","YP_RS21285_4111","","","","","","","","","YPANGOLA_RS22070_4036","CH56_RS22160_4084","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","XM56_RS20545_4037","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","BZ19_RS21445_4113","","","CH60_RS01070_4100","","","","","","","","","","","","","","","","","","","","YEY1_RS21430_4040","Y11_RS21100_4128","","","","BFS78_RS21580_4258","BB936_RS22285_4398","BED35_RS00500_4353","BED32_RS00030_4182","BED33_RS21910_4325","BED34_RS22270_4407","","","","",""
"group_23005","yopM","type III secretion system effector YopM","6","6","1","","","","","","1589","1589","1589","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","EGX47_RS00105_4453","EGX44_RS00020_4153","EGX39_RS00330_3982","","","","","","","","","","","","","","","","","","","","","","YPTB_RS21675_4159","BZ17_RS00175_4115","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","BN7064_RS22100_4159","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","",""
> yopM_seq.txt
for gene_id in M486_RS20920_3990 YE105_RS20595_4018 M479_RS01070_4055 M480_RS01170_4076 M481_RS01115_4071 LDH65_RS21345_4177 CH48_RS00390_4060 M478_RS01000_4055 M482_RS01070_4063 M483_RS00915_4013 YP598_RS21115_4110 M477_RS21610_4128 YE_RS21175_4135 CH49_RS00235_4177 M484_RS01125_4011 YP_RS21285_4111 LDH63_RS21760_4259 YPANGOLA_RS22070_4036 CH56_RS22160_4084 YPA_RS22550_4200 CH58_RS00945_4248 YPO_RS00170_4130 AK38_RS00930_4114 BAY22_RS21640_4174 YPD4_RS21505_4104 YPD8_RS21525_4060 CH61_RS00195_4143 BZ20_RS00435_4174 M0M60_RS21870_4286 CH46_RS00070_4122 EGX53_RS00030_4033 EGX52_RS00260_4348 EGX47_RS00105_4453 EGX44_RS00020_4153 EGX39_RS00330_3982 EGX46_RS00245_4205 EGX74_RS00040_4070 YPC_RS21075_4024 CH55_RS00770_4123 DN756_RS21785_4075 YPTB_RS21675_4159 BZ17_RS00175_4115 CH62_RS00690_4176 CH44_RS00795_4078 XM56_RS20545_4037 BN7064_RS22100_4159 CH63_RS00700_4106 BZ19_RS21445_4113 CH59_RS00970_4231 CH60_RS01070_4100 YPDSF_RS21140_4036 BZ18_RS00325_4042 CH43_RS00040_3994 LDH64_RS21810_4270 S96127_RS00100_4096 GCK71_RS22420_4113 GD372_RS22475_4112 DJY80_RS22415_4098 GCK69_RS22480_4113 GCK70_RS22160_4053 BZ15_RS00325_4183 CH47_RS00140_4080 YEY1_RS21430_4040 Y11_RS21100_4128 BFS78_RS21580_4258 BB936_RS22285_4398 BED35_RS00500_4353 BED32_RS00030_4182 BED33_RS21910_4325 BED34_RS22270_4407 YPZ3_RS21220_4056; do
for gbff in Yersinia_massiliensis_2011N-4075/GCF_013282765.1_ASM1328276v1/GCF_013282765.1_ASM1328276v1_genomic.gbff.gz Yersinia_pestis_EV_NIIEG/GCF_000590535.2_ASM59053v2/GCF_000590535.2_ASM59053v2_genomic.gbff.gz Yersinia_pestis_Shasta/GCF_000834335.1_ASM83433v1/GCF_000834335.1_ASM83433v1_genomic.gbff.gz Yersinia_ruckeri_NVI-492/GCF_023212565.2_ASM2321256v2/GCF_023212565.2_ASM2321256v2_genomic.gbff.gz Yersinia_pestis_Pestoides_G/GCF_000834985.1_ASM83498v1/GCF_000834985.1_ASM83498v1_genomic.gbff.gz Yersinia_pestis_Antiqua_bis/GCF_000834825.1_ASM83482v1/GCF_000834825.1_ASM83482v1_genomic.gbff.gz Yersinia_pestis_91001/GCF_000007885.1_ASM788v1/GCF_000007885.1_ASM788v1_genomic.gbff.gz Yersinia_intermedia_Y228/GCF_000834515.1_ASM83451v1/GCF_000834515.1_ASM83451v1_genomic.gbff.gz Yersinia_pestis_Java9/GCF_000834905.1_ASM83490v1/GCF_000834905.1_ASM83490v1_genomic.gbff.gz Yersinia_pseudotuberculosis_IP32953_bis/GCF_000834295.1_ASM83429v1/GCF_000834295.1_ASM83429v1_genomic.gbff.gz Yersinia_pseudotuberculosis_YPIII_bis/GCF_000834375.1_ASM83437v1/GCF_000834375.1_ASM83437v1_genomic.gbff.gz Yersinia_enterocolitica_8081_bis/GCF_000834795.1_ASM83479v1/GCF_000834795.1_ASM83479v1_genomic.gbff.gz Yersinia_sp_FDAARGOS_228/GCF_002073315.2_ASM207331v2/GCF_002073315.2_ASM207331v2_genomic.gbff.gz Yersinia_enterocolitica_Gp169/GCF_025758435.1_ASM2575843v1/GCF_025758435.1_ASM2575843v1_genomic.gbff.gz Yersinia_pestis_195P/GCF_002005285.1_ASM200528v1/GCF_002005285.1_ASM200528v1_genomic.gbff.gz Yersinia_frederiksenii_FDAARGOS_418/GCF_002591195.1_ASM259119v1/GCF_002591195.1_ASM259119v1_genomic.gbff.gz Yersinia_pseudotuberculosis_NCTC3571/GCF_900636705.1_43908_A02/GCF_900636705.1_43908_A02_genomic.gbff.gz Yersinia_enterocolitica_FORC_002/GCF_000987925.1_ASM98792v1/GCF_000987925.1_ASM98792v1_genomic.gbff.gz Yersinia_ruckeri_NVI-1292/GCF_026435275.1_ASM2643527v1/GCF_026435275.1_ASM2643527v1_genomic.gbff.gz Yersinia_pestis_3067/GCF_001188795.1_ASM118879v1/GCF_001188795.1_ASM118879v1_genomic.gbff.gz Yersinia_pestis_M2086/GCF_015336695.1_ASM1533669v1/GCF_015336695.1_ASM1533669v1_genomic.gbff.gz Yersinia_ruckeri_16Y0180/GCF_021399215.1_ASM2139921v1/GCF_021399215.1_ASM2139921v1_genomic.gbff.gz Yersinia_pestis_2944/GCF_001188815.1_ASM118881v1/GCF_001188815.1_ASM118881v1_genomic.gbff.gz Yersinia_rochesterensis_ATCC_BAA-2637/GCF_003600645.1_ASM360064v1/GCF_003600645.1_ASM360064v1_genomic.gbff.gz Yersinia_pestis_Z176003/GCF_000022845.1_ASM2284v1/GCF_000022845.1_ASM2284v1_genomic.gbff.gz Yersinia_intermedia_SCPM-O-B-8026_C-146/GCF_026183385.1_ASM2618338v1/GCF_026183385.1_ASM2618338v1_genomic.gbff.gz Yersinia_enterocolitica_YE5/GCF_001708615.1_ASM170861v1/GCF_001708615.1_ASM170861v1_genomic.gbff.gz Yersinia_enterocolitica_YE6/GCF_001708595.1_ASM170859v1/GCF_001708595.1_ASM170859v1_genomic.gbff.gz Yersinia_pestis_CO92_pgm-_pPCP1-/GCF_001293415.1_ASM129341v1/GCF_001293415.1_ASM129341v1_genomic.gbff.gz Yersinia_pestis_1412/GCF_001188695.1_ASM118869v1/GCF_001188695.1_ASM118869v1_genomic.gbff.gz Yersinia_pestis_El_Dorado/GCF_000834495.1_ASM83449v1/GCF_000834495.1_ASM83449v1_genomic.gbff.gz Yersinia_enterocolitica_KNG22703/GCF_001305635.1_ASM130563v1/GCF_001305635.1_ASM130563v1_genomic.gbff.gz Yersinia_pestis_M-1770/GCF_015337825.2_ASM1533782v2/GCF_015337825.2_ASM1533782v2_genomic.gbff.gz Yersinia_enterocolitica_MP98/GCF_025758515.1_ASM2575851v1/GCF_025758515.1_ASM2575851v1_genomic.gbff.gz Yersinia_enterocolitica_NCTC13629/GCF_900635745.1_32868_F02/GCF_900635745.1_32868_F02_genomic.gbff.gz Yersinia_pestis_94/GCF_024498395.1_ASM2449839v1/GCF_024498395.1_ASM2449839v1_genomic.gbff.gz Yersinia_kristensenii_Y231/GCF_000834865.1_ASM83486v1/GCF_000834865.1_ASM83486v1_genomic.gbff.gz Yersinia_pestis_C-783/GCF_015337285.1_ASM1533728v1/GCF_015337285.1_ASM1533728v1_genomic.gbff.gz Yersinia_pseudotuberculosis_NCTC8480/GCF_900635715.1_32473_H02/GCF_900635715.1_32473_H02_genomic.gbff.gz Yersinia_enterocolitica_NW57/GCF_025758475.1_ASM2575847v1/GCF_025758475.1_ASM2575847v1_genomic.gbff.gz Yersinia_enterocolitica_YE1/GCF_001708635.1_ASM170863v1/GCF_001708635.1_ASM170863v1_genomic.gbff.gz Yersinia_pestis_790/GCF_001188675.1_ASM118867v1/GCF_001188675.1_ASM118867v1_genomic.gbff.gz Yersinia_ruckeri_NVI-11065/GCF_026435655.1_ASM2643565v1/GCF_026435655.1_ASM2643565v1_genomic.gbff.gz Yersinia_pestis_14D/GCF_015159615.2_ASM1515961v2/GCF_015159615.2_ASM1515961v2_genomic.gbff.gz Yersinia_enterocolitica_NW115/GCF_025758655.1_ASM2575865v1/GCF_025758655.1_ASM2575865v1_genomic.gbff.gz Yersinia_enterocolitica_Gp259/GCF_025758265.1_ASM2575826v1/GCF_025758265.1_ASM2575826v1_genomic.gbff.gz Yersinia_enterocolitica_FORC066/GCF_025340245.1_ASM2534024v1/GCF_025340245.1_ASM2534024v1_genomic.gbff.gz Yersinia_pestis_20/GCF_024498415.1_ASM2449841v1/GCF_024498415.1_ASM2449841v1_genomic.gbff.gz Yersinia_pestis_FDAARGOS_602/GCF_003798345.1_ASM379834v1/GCF_003798345.1_ASM379834v1_genomic.gbff.gz Yersinia_aleksiciae_159/GCF_001047675.1_ASM104767v1/GCF_001047675.1_ASM104767v1_genomic.gbff.gz Yersinia_enterocolitica_Gp2/GCF_025758285.1_ASM2575828v1/GCF_025758285.1_ASM2575828v1_genomic.gbff.gz Yersinia_pseudotuberculosis_1/GCF_000834435.1_ASM83443v1/GCF_000834435.1_ASM83443v1_genomic.gbff.gz Yersinia_pestis_3770/GCF_001188775.1_ASM118877v1/GCF_001188775.1_ASM118877v1_genomic.gbff.gz Yersinia_intermedia_FDAARGOS_729/GCF_009730075.1_ASM973007v1/GCF_009730075.1_ASM973007v1_genomic.gbff.gz Yersinia_enterocolitica_NW67/GCF_025758535.1_ASM2575853v1/GCF_025758535.1_ASM2575853v1_genomic.gbff.gz Yersinia_intermedia_SCPM-O-B-10209_333/GCF_026183345.1_ASM2618334v1/GCF_026183345.1_ASM2618334v1_genomic.gbff.gz Yersinia_ruckeri_17Y0414/GCF_021399075.1_ASM2139907v1/GCF_021399075.1_ASM2139907v1_genomic.gbff.gz Yersinia_pestis_SCPM-O-B-6530/GCF_009295985.1_ASM929598v1/GCF_009295985.1_ASM929598v1_genomic.gbff.gz Yersinia_pseudotuberculosis_EP2+/GCF_000834415.1_ASM83441v1/GCF_000834415.1_ASM83441v1_genomic.gbff.gz Yersinia_pestis_KM_567/GCF_015337445.1_ASM1533744v1/GCF_015337445.1_ASM1533744v1_genomic.gbff.gz Yersinia_ruckeri_Big_Creek_74/GCF_000964565.1_ASM96456v1/GCF_000964565.1_ASM96456v1_genomic.gbff.gz Yersinia_intermedia_FDAARGOS_358/GCF_002983625.1_ASM298362v1/GCF_002983625.1_ASM298362v1_genomic.gbff.gz Yersinia_ruckeri_NVI-9681/GCF_023212445.2_ASM2321244v2/GCF_023212445.2_ASM2321244v2_genomic.gbff.gz Yersinia_kristensenii_2012N-4030/GCF_013282785.1_ASM1328278v1/GCF_013282785.1_ASM1328278v1_genomic.gbff.gz Yersinia_ruckeri_17Y0157/GCF_021399195.1_ASM2139919v1/GCF_021399195.1_ASM2139919v1_genomic.gbff.gz Yersinia_ruckeri_NVI-8270/GCF_026435135.1_ASM2643513v1/GCF_026435135.1_ASM2643513v1_genomic.gbff.gz Yersinia_ruckeri_17Y0189/GCF_021399095.1_ASM2139909v1/GCF_021399095.1_ASM2139909v1_genomic.gbff.gz Yersinia_ruckeri_NVI-8524/GCF_026435115.1_ASM2643511v1/GCF_026435115.1_ASM2643511v1_genomic.gbff.gz Yersinia_pestis_M-1482/GCF_015337645.1_ASM1533764v1/GCF_015337645.1_ASM1533764v1_genomic.gbff.gz Yersinia_pestis_Harbin_35_bis/GCF_000834275.1_ASM83427v1/GCF_000834275.1_ASM83427v1_genomic.gbff.gz Yersinia_pseudotuberculosis_NCTC10217/GCF_900635755.1_33467_B01/GCF_900635755.1_33467_B01_genomic.gbff.gz Yersinia_pseudotuberculosis_598/GCF_020889805.1_ASM2088980v1/GCF_020889805.1_ASM2088980v1_genomic.gbff.gz Yersinia_ruckeri_NVI-11267/GCF_026435335.1_ASM2643533v1/GCF_026435335.1_ASM2643533v1_genomic.gbff.gz Yersinia_enterocolitica_NW56/GCF_025758635.1_ASM2575863v1/GCF_025758635.1_ASM2575863v1_genomic.gbff.gz Yersinia_pestis_Angola/GCF_000018805.1_ASM1880v1/GCF_000018805.1_ASM1880v1_genomic.gbff.gz Yersinia_pestis_SCPM-O-DNA-18_I-3113/GCF_009295945.1_ASM929594v1/GCF_009295945.1_ASM929594v1_genomic.gbff.gz Yersinia_enterocolitica_Y11/GCF_000253175.1_ASM25317v1/GCF_000253175.1_ASM25317v1_genomic.gbff.gz Yersinia_pestis_Dodson/GCF_000834775.1_ASM83477v1/GCF_000834775.1_ASM83477v1_genomic.gbff.gz Yersinia_pestis_Cadman/GCF_001693595.1_ASM169359v1/GCF_001693595.1_ASM169359v1_genomic.gbff.gz Yersinia_pestis_KIM5/GCF_000970105.1_ASM97010v1/GCF_000970105.1_ASM97010v1_genomic.gbff.gz Yersinia_ruckeri_NVI-10705/GCF_023212585.2_ASM2321258v2/GCF_023212585.2_ASM2321258v2_genomic.gbff.gz Yersinia_pestis_EV76-CN/GCF_024758685.1_ASM2475868v1/GCF_024758685.1_ASM2475868v1_genomic.gbff.gz Yersinia_intermedia_FDAARGOS_730/GCF_009730055.1_ASM973005v1/GCF_009730055.1_ASM973005v1_genomic.gbff.gz Yersinia_ruckeri_NVI-11073/GCF_026435495.1_ASM2643549v1/GCF_026435495.1_ASM2643549v1_genomic.gbff.gz Yersinia_ruckeri_17Y0161/GCF_021399155.1_ASM2139915v1/GCF_021399155.1_ASM2139915v1_genomic.gbff.gz Yersinia_sp_KBS0713/GCF_005937895.2_ASM593789v2/GCF_005937895.2_ASM593789v2_genomic.gbff.gz Yersinia_pestis_SCPM-O-B-6899_231/GCF_009295925.1_ASM929592v1/GCF_009295925.1_ASM929592v1_genomic.gbff.gz Yersinia_ruckeri_NVI-5089/GCF_026435195.1_ASM2643519v1/GCF_026435195.1_ASM2643519v1_genomic.gbff.gz Yersinia_pestis_Nicholisk_41/GCF_000834885.1_ASM83488v1/GCF_000834885.1_ASM83488v1_genomic.gbff.gz Yersinia_enterocolitica_YE7/GCF_001708555.1_ASM170855v1/GCF_001708555.1_ASM170855v1_genomic.gbff.gz Yersinia_intermedia_SCPM-O-B-9106_C-191/GCF_026183365.1_ASM2618336v1/GCF_026183365.1_ASM2618336v1_genomic.gbff.gz Yersinia_canariae_NCTC_14382/GCF_009831415.1_ASM983141v1/GCF_009831415.1_ASM983141v1_genomic.gbff.gz Yersinia_enterocolitica_YE3/GCF_001708655.1_ASM170865v1/GCF_001708655.1_ASM170865v1_genomic.gbff.gz Yersinia_pseudotuberculosis_NCTC10275/GCF_900637475.1_51108_B01/GCF_900637475.1_51108_B01_genomic.gbff.gz Yersinia_enterocolitica_8081/GCF_000009345.1_ASM934v1/GCF_000009345.1_ASM934v1_genomic.gbff.gz Yersinia_ruckeri_NVI-10571/GCF_026435835.1_ASM2643583v1/GCF_026435835.1_ASM2643583v1_genomic.gbff.gz Yersinia_enterocolitica_2516-87/GCF_000834735.1_ASM83473v1/GCF_000834735.1_ASM83473v1_genomic.gbff.gz Yersinia_frederiksenii_FDAARGOS_417/GCF_002591095.1_ASM259109v1/GCF_002591095.1_ASM259109v1_genomic.gbff.gz Yersinia_pestis_I-1252/GCF_015336465.1_ASM1533646v1/GCF_015336465.1_ASM1533646v1_genomic.gbff.gz Yersinia_ruckeri_17Y0155/GCF_021399235.1_ASM2139923v1/GCF_021399235.1_ASM2139923v1_genomic.gbff.gz Yersinia_pseudotuberculosis_FDAARGOS_665/GCF_008693365.1_ASM869336v1/GCF_008693365.1_ASM869336v1_genomic.gbff.gz Yersinia_alsatica_SCPM-O-B-7604/GCF_025133195.1_ASM2513319v1/GCF_025133195.1_ASM2513319v1_genomic.gbff.gz Yersinia_pseudotuberculosis_PA3606/GCF_000834945.1_ASM83494v1/GCF_000834945.1_ASM83494v1_genomic.gbff.gz Yersinia_pestis_KIM10+/GCF_000006645.1_ASM664v1/GCF_000006645.1_ASM664v1_genomic.gbff.gz Yersinia_ruckeri_NVI-701/GCF_026435155.1_ASM2643515v1/GCF_026435155.1_ASM2643515v1_genomic.gbff.gz Yersinia_enterocolitica_NW117/GCF_025758455.1_ASM2575845v1/GCF_025758455.1_ASM2575845v1_genomic.gbff.gz Yersinia_enterocolitica_FORC065/GCA_025340225.1_ASM2534022v1/GCA_025340225.1_ASM2534022v1_genomic.gbff.gz Yersinia_enterocolitica_NW1/GCF_025758495.1_ASM2575849v1/GCF_025758495.1_ASM2575849v1_genomic.gbff.gz Yersinia_ruckeri_QMA0440/GCF_002192595.1_ASM219259v1/GCF_002192595.1_ASM219259v1_genomic.gbff.gz Yersinia_pseudotuberculosis_FDAARGOS_579/GCF_003798305.1_ASM379830v1/GCF_003798305.1_ASM379830v1_genomic.gbff.gz Yersinia_enterocolitica_1055Rr/GCF_000192105.1_ASM19210v1/GCF_000192105.1_ASM19210v1_genomic.gbff.gz Yersinia_hibernica_CFS1934/GCF_004124235.1_ASM412423v1/GCF_004124235.1_ASM412423v1_genomic.gbff.gz Yersinia_pestis_D106004/GCF_000022805.1_ASM2280v1/GCF_000022805.1_ASM2280v1_genomic.gbff.gz Yersinia_enterocolitica_Y1/GCF_004368055.1_ASM436805v1/GCF_004368055.1_ASM436805v1_genomic.gbff.gz Yersinia_pseudotuberculosis_IP31758/GCF_000016945.1_ASM1694v1/GCF_000016945.1_ASM1694v1_genomic.gbff.gz Yersinia_pestis_Pestoides_F_bis/GCF_000834315.1_ASM83431v1/GCF_000834315.1_ASM83431v1_genomic.gbff.gz Yersinia_pestis_M-1974/GCF_015336865.1_ASM1533686v1/GCF_015336865.1_ASM1533686v1_genomic.gbff.gz Yersinia_ruckeri_NHV_3758/GCF_002442495.2_ASM244249v2/GCF_002442495.2_ASM244249v2_genomic.gbff.gz Yersinia_ruckeri_17Y0163/GCF_021399115.1_ASM2139911v1/GCF_021399115.1_ASM2139911v1_genomic.gbff.gz Yersinia_pseudotuberculosis_MD67/GCF_000834355.1_ASM83435v1/GCF_000834355.1_ASM83435v1_genomic.gbff.gz Yersinia_pestis_D182038/GCF_000022825.1_ASM2282v1/GCF_000022825.1_ASM2282v1_genomic.gbff.gz Yersinia_enterocolitica_FDAARGOS_1090/GCF_016727905.1_ASM1672790v1/GCF_016727905.1_ASM1672790v1_genomic.gbff.gz Yersinia_bercovieri_ATCC_43970/GCF_013282745.1_ASM1328274v1/GCF_013282745.1_ASM1328274v1_genomic.gbff.gz Yersinia_enterocolitica_WA/GCF_000834195.1_ASM83419v1/GCF_000834195.1_ASM83419v1_genomic.gbff.gz Yersinia_ruckeri_NVI-10587/GCF_023212425.2_ASM2321242v2/GCF_023212425.2_ASM2321242v2_genomic.gbff.gz Yersinia_pestis_R/GCF_024498375.1_ASM2449837v1/GCF_024498375.1_ASM2449837v1_genomic.gbff.gz Yersinia_intermedia_N6293/GCF_022637335.1_ASM2263733v1/GCF_022637335.1_ASM2263733v1_genomic.gbff.gz Yersinia_ruckeri_NVI-6614/GCF_026435175.1_ASM2643517v1/GCF_026435175.1_ASM2643517v1_genomic.gbff.gz Yersinia_hibernica_LC20/GCF_000597945.1_ASM59794v2/GCF_000597945.1_ASM59794v2_genomic.gbff.gz Yersinia_ruckeri_17Y0153/GCF_021399175.1_ASM2139917v1/GCF_021399175.1_ASM2139917v1_genomic.gbff.gz Yersinia_aldovae_670-83/GCF_000834395.1_ASM83439v1/GCF_000834395.1_ASM83439v1_genomic.gbff.gz Yersinia_pestis_SCPM-O-B-5935_I-1996/GCF_009295965.1_ASM929596v1/GCF_009295965.1_ASM929596v1_genomic.gbff.gz Yersinia_ruckeri_YRB/GCF_000834255.1_ASM83425v1/GCF_000834255.1_ASM83425v1_genomic.gbff.gz Yersinia_enterocolitica_FORC_002_bis/GCF_001304755.1_ASM130475v1/GCF_001304755.1_ASM130475v1_genomic.gbff.gz Yersinia_pestis_Antiqua/GCF_000013825.1_ASM1382v1/GCF_000013825.1_ASM1382v1_genomic.gbff.gz Yersinia_pestis_Pestoides_B/GCF_000834925.1_ASM83492v1/GCF_000834925.1_ASM83492v1_genomic.gbff.gz Yersinia_pestis_M2085/GCF_015338045.2_ASM1533804v2/GCF_015338045.2_ASM1533804v2_genomic.gbff.gz Yersinia_pestis_CO92/GCF_000009065.1_ASM906v1/GCF_000009065.1_ASM906v1_genomic.gbff.gz Yersinia_ruckeri_17Y0159/GCF_021399135.1_ASM2139913v1/GCF_021399135.1_ASM2139913v1_genomic.gbff.gz Yersinia_enterocolitica_NCTC12982/GCF_901472495.1_32868_C01/GCF_901472495.1_32868_C01_genomic.gbff.gz Yersinia_pestis_SCPM-O-B-5942_I-2638/GCF_009363195.1_ASM936319v1/GCF_009363195.1_ASM936319v1_genomic.gbff.gz Yersinia_pestis_Nepal516/GCF_000013805.1_ASM1380v1/GCF_000013805.1_ASM1380v1_genomic.gbff.gz Yersinia_pseudotuberculosis_FDAARGOS_342/GCF_003546905.1_ASM354690v1/GCF_003546905.1_ASM354690v1_genomic.gbff.gz Yersinia_ruckeri_SC09/GCF_000775355.2_ASM77535v2/GCF_000775355.2_ASM77535v2_genomic.gbff.gz Yersinia_mollaretii_ATCC_43969/GCF_013282725.1_ASM1328272v1/GCF_013282725.1_ASM1328272v1_genomic.gbff.gz Yersinia_pestis_Pestoides_F/GCF_000016445.1_ASM1644v1/GCF_000016445.1_ASM1644v1_genomic.gbff.gz Yersinia_pestis_Angola_bis/GCF_000834845.1_ASM83484v1/GCF_000834845.1_ASM83484v1_genomic.gbff.gz Yersinia_ruckeri_17Y0412/GCF_021399055.1_ASM2139905v1/GCF_021399055.1_ASM2139905v1_genomic.gbff.gz Yersinia_pestis_1522/GCF_001188715.1_ASM118871v1/GCF_001188715.1_ASM118871v1_genomic.gbff.gz Yersinia_enterocolitica_MGYG-HGUT-02335/GCF_902385945.1_UHGG_MGYG-HGUT-02335/GCF_902385945.1_UHGG_MGYG-HGUT-02335_genomic.gbff.gz Yersinia_pestis_C-792/GCF_015337085.2_ASM1533708v2/GCF_015337085.2_ASM1533708v2_genomic.gbff.gz Yersinia_ruckeri_NVI-11050/GCF_023212385.2_ASM2321238v2/GCF_023212385.2_ASM2321238v2_genomic.gbff.gz Yersinia_intermedia_NCTC11469/GCF_900635455.1_28307_A01/GCF_900635455.1_28307_A01_genomic.gbff.gz Yersinia_pseudotuberculosis_FDAARGOS_583/GCF_003798285.1_ASM379828v1/GCF_003798285.1_ASM379828v1_genomic.gbff.gz Yersinia_pestis_M2029/GCF_015336265.1_ASM1533626v1/GCF_015336265.1_ASM1533626v1_genomic.gbff.gz Yersinia_enterocolitica_Gp200/GCF_025758555.1_ASM2575855v1/GCF_025758555.1_ASM2575855v1_genomic.gbff.gz Yersinia_massiliensis_GTA/GCF_003048255.1_ASM304825v1/GCF_003048255.1_ASM304825v1_genomic.gbff.gz Yersinia_pestis_A1122_bis/GCF_000834755.1_ASM83475v1/GCF_000834755.1_ASM83475v1_genomic.gbff.gz Yersinia_pseudotuberculosis_NZYP4713/GCF_900092345.1_YP4713/GCF_900092345.1_YP4713_genomic.gbff.gz Yersinia_pestis_PBM19/GCF_000834235.1_ASM83423v1/GCF_000834235.1_ASM83423v1_genomic.gbff.gz Yersinia_enterocolitica_NW116/GCF_025758575.1_ASM2575857v1/GCF_025758575.1_ASM2575857v1_genomic.gbff.gz Yersinia_ruckeri_KMM821/GCF_017498685.1_ASM1749868v1/GCF_017498685.1_ASM1749868v1_genomic.gbff.gz Yersinia_ruckeri_NVI-4840/GCF_026435215.1_ASM2643521v1/GCF_026435215.1_ASM2643521v1_genomic.gbff.gz Yersinia_enterocolitica_FDAARGOS_1082/GCF_016727765.1_ASM1672776v1/GCF_016727765.1_ASM1672776v1_genomic.gbff.gz Yersinia_enterocolitica_NW51/GCF_025758615.1_ASM2575861v1/GCF_025758615.1_ASM2575861v1_genomic.gbff.gz Yersinia_ruckeri_NVI-11076/GCF_023212325.2_ASM2321232v2/GCF_023212325.2_ASM2321232v2_genomic.gbff.gz Yersinia_rohdei_YRA/GCF_000834455.1_ASM83445v1/GCF_000834455.1_ASM83445v1_genomic.gbff.gz Yersinia_pestis_C-781/GCF_015336085.1_ASM1533608v1/GCF_015336085.1_ASM1533608v1_genomic.gbff.gz Yersinia_pestis_Harbin_35/GCF_000186725.1_ASM18672v1/GCF_000186725.1_ASM18672v1_genomic.gbff.gz Yersinia_pseudotuberculosis_ATCC_6904/GCF_000750315.1_ASM75031v1/GCF_000750315.1_ASM75031v1_genomic.gbff.gz Yersinia_pseudotuberculosis_FDAARGOS_580/GCF_003798445.1_ASM379844v1/GCF_003798445.1_ASM379844v1_genomic.gbff.gz Yersinia_enterocolitica_str_YE5303/GCF_000968115.1_ASM96811v1/GCF_000968115.1_ASM96811v1_genomic.gbff.gz Yersinia_pestis_FDAARGOS_601/GCF_003798225.1_ASM379822v1/GCF_003798225.1_ASM379822v1_genomic.gbff.gz Yersinia_pestis_SCPM-O-B-6291_C-25/GCF_009296005.1_ASM929600v1/GCF_009296005.1_ASM929600v1_genomic.gbff.gz Yersinia_pestis_Nairobi/GCF_000835005.1_ASM83500v1/GCF_000835005.1_ASM83500v1_genomic.gbff.gz Yersinia_pseudotuberculosis_FDAARGOS_584/GCF_003798385.1_ASM379838v1/GCF_003798385.1_ASM379838v1_genomic.gbff.gz Yersinia_similis_228/GCF_000582515.1_ASM58251v1/GCF_000582515.1_ASM58251v1_genomic.gbff.gz Yersinia_pestis_1413/GCF_001188935.1_ASM118893v1/GCF_001188935.1_ASM118893v1_genomic.gbff.gz Yersinia_pseudotuberculosis_FDAARGOS_581/GCF_003798425.1_ASM379842v1/GCF_003798425.1_ASM379842v1_genomic.gbff.gz Yersinia_entomophaga_MH96/GCF_001656035.1_ASM165603v1/GCF_001656035.1_ASM165603v1_genomic.gbff.gz Yersinia_ruckeri_NVI-1176/GCF_026435295.1_ASM2643529v1/GCF_026435295.1_ASM2643529v1_genomic.gbff.gz Yersinia_pestis_S19960127/GCF_015190655.1_ASM1519065v1/GCF_015190655.1_ASM1519065v1_genomic.gbff.gz Yersinia_ruckeri_NVI-4479/GCF_026435255.1_ASM2643525v1/GCF_026435255.1_ASM2643525v1_genomic.gbff.gz Yersinia_frederiksenii_Y225/GCF_000834215.1_ASM83421v1/GCF_000834215.1_ASM83421v1_genomic.gbff.gz Yersinia_ruckeri_NVI-4570/GCF_026435235.1_ASM2643523v1/GCF_026435235.1_ASM2643523v1_genomic.gbff.gz Yersinia_pseudotuberculosis_IP2666pIB1/GCF_003814345.1_ASM381434v1/GCF_003814345.1_ASM381434v1_genomic.gbff.gz Yersinia_pseudotuberculosis_FDAARGOS_582/GCF_003798405.1_ASM379840v1/GCF_003798405.1_ASM379840v1_genomic.gbff.gz Yersinia_enterocolitica_NCTC13769/GCF_900637005.1_46582_C01/GCF_900637005.1_46582_C01_genomic.gbff.gz Yersinia_pestis_A1122/GCF_000222975.1_ASM22297v1/GCF_000222975.1_ASM22297v1_genomic.gbff.gz Yersinia_enterocolitica_YE165/GCF_001708575.1_ASM170857v1/GCF_001708575.1_ASM170857v1_genomic.gbff.gz Yersinia_pseudotuberculosis_IP32953/GCF_000047365.1_ASM4736v1/GCF_000047365.1_ASM4736v1_genomic.gbff.gz Yersinia_pestis_8787/GCF_001188755.1_ASM118875v1/GCF_001188755.1_ASM118875v1_genomic.gbff.gz Yersinia_rochesterensis_ATCC_33639/GCF_000750355.1_ASM75035v1/GCF_000750355.1_ASM75035v1_genomic.gbff.gz Yersinia_pestis_FDAARGOS_603/GCF_003798205.1_ASM379820v1/GCF_003798205.1_ASM379820v1_genomic.gbff.gz Yersinia_pseudotuberculosis_PB1+_bis/GCF_000834475.1_ASM83447v1/GCF_000834475.1_ASM83447v1_genomic.gbff.gz Yersinia_ruckeri_NVI-11294/GCF_026435315.1_ASM2643531v1/GCF_026435315.1_ASM2643531v1_genomic.gbff.gz Yersinia_enterocolitica_NW66/GCF_025758595.1_ASM2575859v1/GCF_025758595.1_ASM2575859v1_genomic.gbff.gz Yersinia_pestis_1045/GCF_001188735.1_ASM118873v1/GCF_001188735.1_ASM118873v1_genomic.gbff.gz; do
output=$(python3 extract_CDS_of_a_locus_tag.py ${gbff} $(echo "${gene_id}" | cut -d '_' -f 1-2))
if [[ ! -z "${output}" ]]; then
gbff_short=$(echo "${gbff}" | cut -d '/' -f 1)
printf "%s\t%s\n" "${gbff_short}" "${output}" >> yopM_seq.txt
fi
done
done
extract the sequences according to NCBI annotations
#------------------------------- yopJ (+6) -------------------------------
#grep "yopJ" selected_gtf_files/Yersinia_enterocolitica_2516-87.gtf
NZ_CP009837.1 RefSeq gene 69041 69701 . - . gene_id "CH48_RS00445"; transcript_id ""; gbkey "Gene"; gene "yopJ"; gene_biotype "protein_coding"; locus_tag "CH48_RS00445"; old_locus_tag "CH48_4238"; part "2";
NZ_CP009837.1 RefSeq gene 1 206 . - . gene_id "CH48_RS00445"; transcript_id ""; gbkey "Gene"; gene "yopJ"; gene_biotype "protein_coding"; locus_tag "CH48_RS00445"; old_locus_tag "CH48_4238"; part "1";
#grep "yopJ" selected_gtf_files/Yersinia_pestis_790.gtf (NZ_CP006807.1)
#grep "yopJ" selected_gtf_files/Yersinia_pestis_Antiqua_bis.gtf
NZ_CP009905.1 RefSeq gene 16737 17602 . - . gene_id "CH58_RS00725"; transcript_id ""; gbkey "Gene"; gene "yopJ"; gene_biotype "pseudogene"; locus_tag "CH58_RS00725"; old_locus_tag "CH58_4444"; pseudo "true";
#grep "yopJ" selected_gtf_files/Yersinia_pestis_FDAARGOS_602.gtf
NZ_CP033695.1 RefSeq gene 36152 37017 . + . gene_id "EGX42_RS00935"; transcript_id ""; gbkey "Gene"; gene "yopJ"; gene_biotype "pseudogene"; locus_tag "EGX42_RS00935"; old_locus_tag "EGX42_00930"; pseudo "true";
#grep "yopJ" selected_gtf_files/Yersinia_pestis_Pestoides_B.gtf
NZ_CP010022.1 RefSeq gene 23121 23986 . - . gene_id "CH60_RS00825"; transcript_id ""; gbkey "Gene"; gene "yopJ"; gene_biotype "pseudogene"; locus_tag "CH60_RS00825"; old_locus_tag "CH60_4301"; pseudo "true";
#grep "yopJ" selected_gtf_files/Yersinia_pseudotuberculosis_EP2+.gtf
NZ_CP009758.1 RefSeq gene 33302 34168 . + . gene_id "BZ20_RS00215"; transcript_id ""; gbkey "Gene"; gene "yopJ"; gene_biotype "pseudogene"; locus_tag "BZ20_RS00215"; old_locus_tag "BZ20_4189"; pseudo "true";
#under selected_fna_files
samtools faidx Yersinia_enterocolitica_2516-87.fna NZ_CP009837.1:69041-69701 > temp.fna
samtools faidx Yersinia_enterocolitica_2516-87.fna NZ_CP009837.1:1-206 >> temp.fna
revseq
sed -e ':a;N;$!ba;s/\n//g' -e 's/:/\t/g' yersinia_enterocolitica_2516-87.rev > temp_.fna
samtools faidx Yersinia_pestis_Antiqua_bis.fna NZ_CP009905.1:16737-17602 > temp.fna
revseq
sed -e ':a;N;$!ba;s/\n//g' -e 's/:/\t/g' 16737-17602.rev > temp_.fna
samtools faidx Yersinia_pestis_FDAARGOS_602.fna NZ_CP033695.1:36152-37017 > temp.fna
sed -i -e ':a;N;$!ba;s/\n//g' -e 's/:/\t/g' temp.fna
samtools faidx Yersinia_pestis_Pestoides_B.fna NZ_CP010022.1:23121-23986 > temp.fna
revseq
sed -e ':a;N;$!ba;s/\n//g' -e 's/:/\t/g' 23121-23986.rev > temp_.fna
samtools faidx Yersinia_pseudotuberculosis_EP2+.fna NZ_CP009758.1:33302-34168 > temp.fna
sed -i -e ':a;N;$!ba;s/\n//g' -e 's/:/\t/g' temp.fna
Yersinia_enterocolitica_2516-87 ATGATTGGGCCAATATCACAAATAAACAGCTTCGGTGGCTTATCAGAAAAAGAGACCCGTTCTTTAATCAGTAATGAAGAGCTTAAAAATATCATAATACAGTTGGAAACTGATATAGCGGATGGATCCTGGTTCCATAAAAATTATTCACGCCTGGATATAGAAGTCATGCCCGCATTAGTAATTCAGGCGAACAATAAATATCCGGAAATGAATCTTAATTTTGTTACATCTCCCCAGGACCTTTCGATAGAAATAAAAAATGTCATAGAAAATGGAGTTGGATCTTCCCGCTTCATAATTAACATGGGGGAGGGTGGAATACATTTCAGTGTAATTGATTACAAACATATAAATGGGAAAACATCTCTGATATTATTTGAACCAGTAAACTTTAATAGTATGGGGCCAGCGATACTGGCAATAAGTACAAAAACGGCCATTGAACGTTATCAATTACCTGATTGCCATTTTTCCATGGTGGAAATGGATATTCAGCGAAGCTCATCTGAATGTGGTATTTTTAGTTTGGCACTGGCAAAAAAACTTTACACCGAGAGAGATAGCCTGTTGAAAATACATGAAGATAATATAAAAGGTATATTAAGTGATAGTGAAAATCCTTTACCCCACAATAAGTTGGATCCGTATCTCCCGGTAACTTTTTACAAACATACTCAAGGTAAAAAACGTCTTAATGAATATTTAAATACTAACCCGCAGGGAGTTGGTACTGTTGTTAACAAAAAAAATGAAACCATCTTTAATAGGTTTGATAACAATAAATCCATTATAGATGGAAAGGAATTATCAGTTTCGGTACATAAAAAGAGAATAGCTGAATATAAAACACTTCTCAAAGTATAA
Yersinia_pestis_Antiqua_bis ATGATCGGACCAATATCACAAATAAATATCTCCGGTGGCTTATCAGAAAAAGAGACCAGTTCTTTAATCAGTAATGAAGAGCTTAAAAATATCATAACACAGTTGGAAACTGATATATCGGATGGATCCTGGTTCCATAAAAATTATTCACGTATGGATGTAGAAGTCATGCCCGCATTGGTAATCCAGGCGAACAATAAATATCCGGAAATGAATCTTAATCTTGTTACATCTCCATTGGACCTTTCAATAGAAATAAAAAACGTCATAGAAAATGGAGTTAGATCTTCCCGCTTCATAATTAACATGGGGGAAGGTGGAATACATTTCAGTGTAATTGATTACAAACATATAAATGGGAAAACATCTCTGATATTGTTTGAACCAGCAAACTTTAACAGTATGGGGCCAGCGATGCTGGCAATAAGGACAAAAACGGCTATTGAACGTTATCAATTACCTGATTGCCATTTCTCCATGGTGGAAATGGATATTCAGCGAAGCTCATCTGAATGTGGTATTTTTAGTTTTGCACTGGCAAAAAAACTTTACATCGAGAGAGATAGCCTGTTGAAAATACATGAAGATAATATAAAAGGTATATTAAGTGATGGTAAAAATCCTTTACCCCACGATAAGTTGGACCCGTATCTCCCGGTAACTTTTTACAAACATACTCAAGGTAAAAAACGTCTTAATGAATATTTAAATACTAACCCGCAGGGAGTTGGTACTGTTGTTAACAAAAAAATGAAACCATCGTTAATAGATTTGATAACAATAAATCCATTGTAGATGGAAAGGAATTATCAGTTTCGGTACATAAAAAGAGAATAGCTGAATATAAAACACTTCTCAAAGTATAA
>Yersinia_pestis_FDAARGOS_602 ATGATCGGACCAATATCACAAATAAATATCTCCGGTGGCTTATCAGAAAAAGAGACCAGTTCTTTAATCAGTAATGAAGAGCTTAAAAATATCATAACACAGTTGGAAACTGATATATCGGATGGATCCTGGTTCCATAAAAATTATTCACGTATGGATGTAGAAGTCATGCCCGCATTGGTAATCCAGGCGAACAATAAATATCCGGAAATGAATCTTAATCTTGTTACATCTCCATTGGACCTTTCAATAGAAATAAAAAACGTCATAGAAAATGGAGTTAGATCTTCCCGCTTCATAATTAACATGGGGGAAGGTGGAATACATTTCAGTGTAATTGATTACAAACATATAAATGGGAAAACATCTCTGATATTGTTTGAACCAGCAAACTTTAACAGTATGGGGCCAGCGATGCTGGCAATAAGGACAAAAACGGCTATTGAACGTTATCAATTACCTGATTGCCATTTCTCCATGGTGGAAATGGATATTCAGCGAAGCTCATCTGAATGTGGTATTTTTAGTTTTGCACTGGCAAAAAAACTTTACATCGAGAGAGATAGCCTGTTGAAAATACATGAAGATAATATAAAAGGTATATTAAGTGATGGTGAAAATCCTTTACCCCACGATAAGTTGGACCCGTATCTCCCGGTAACTTTTTACAAACATACTCAAGGTAAAAAACGTCTTAATGAATATTTAAATACTAACCCGCAGGGAGTTGGTACTGTTGTTAACAAAAAAAATGAAACCATCGTTAATAGATTTGATAACAATAAATCCATTGTAGATGGAAAGGAATTATCAGTTTCGTACATAAAAAGAGAATAGCTGAATATAAAACACTTCTCAAAGTATAA
>Yersinia_pestis_Pestoides_B ATGATCGGACCAATATCACAAATAAATATCTCCGGTGGCTTATCAGAAAAAGAGACCAGTTCTTTAATCAGTAATGAAGAGCTTAAAAATATCATAACACAGTTGGAAACTGATATATCGGATGGATCCTGGTTCCATAAAAATTATTCACGTATGGATGTAGAAGTCATGCCCGCATTGGTAATCCAGGCGAACAATAAATATCCGGAAATGAATCTTAATCTTGTTACATCTCCATTGGACCTTTCAATAGAAATAAAAAACGTCATAGAAAATGGAGTTAGATCTTCCCGCTTCATAATTAACATGGGGGAAGGTGGAATACATTTCAGTGTAATTGATTACAAACATATAAATGGGAAAACATCTCTGATATTGTTTGAACCAGCAAACTTTAACAGTATGGGGCCAGCGATGCTGGCAATAAGGACAAAAACGGCTATTGAACGTTATCAATTACCTGATTGCCATTTCTCCATGGTGGAAATGGATATTCAGCGAAGCTCATCTGAATGTGGTATTTTTAGTTTTGCACTGGCAAAAAAACTTTACATCGAGAGAGATAGCCTGTTGAAAATACATGAAGATAATATAAAAGGTATATTAAGTGATGGTGAAAATCCTTTACCCCACGATAAGTTGGACCCGTATCTCCCGGTAACTTTTTACAAACATACTCAAGGTAAAAAACGTCTTAATGAATATTTAAATACTAACCCGCAGGGAGTTGGTACTGTTGTTAACAAAAAAAATGAAACCATCGTTAATAGATTTGATAACAATAAATCCATTGTAGATGGAAAGGAATTATCAGTTTCGTACATAAAAAGAGAATAGCTGAATATAAAACACTTCTCAAAGTATAA
Yersinia_pseudotuberculosis_EP2+ ATGATCGGACCAATATCACAAATAAATATCTCCGGTGGCTTATCAGAAAAAGAGACCAGTTCTTTAATCAGTAATGAAGAGCTTAAAAATATCATAACACAGTTGGAAACTGATATATCGGATGGATCCTGGTTCCATAAAAATTATTCACGTATGGATGTAGAAGTCATGCCCGCATTGGTAATCTAGGCGAACAATAAATATCCGGAAATGAATCTTAATCTTGTTACATCTCCATTGGACCTTTCAATAGAAATAAAAAACGTCATAGAAAATGGAGTTAGATCTTCCCGCTTCATAATTAACATGGGGGAAGGTGGAATACATTTCAGTGTAATTGATTACAAACATATAAATGGGAAAACATCTCTGATATTGTTTGAACCAGCAAACTTTAACAGTATGGGGCCAGCGATGCTGGCAATAAGGACAAAAACGGCTATTGAACGTTATCAATTACCTGATTGCCATTTCTCCATGGTGGAAATGGATATTCAGCGAAGCTCATCTGAATGTGGTATTTTTAGTTTTGCACTGGCAAAAAAACTTTACATCGAGAGAGATAGCCTGTTGAAAATACATGAAGATAATATAAAAGGTATATTAAGTGATGGTGAAAATCCTTTACCCCACGATAAGTTGGACCCGTATCTCCCGGTAACTTTTTACAAACATACTCAAGGTAAAAAACGTCTTAATGAATATTTAAATACTAACCCGCAGGGAGTTGGTACTGTTGTTAACAAAAAAAATGAAACCATCGTTAATAGATTTGATAACAATAAATCCATTGTAGATGGAAAGGAATTATCAGTTTCGGTACATAAAAAGAGAATAGCTGAATATAAAACACTTCTCAAAGTATAA
#------------------------------- yopB (+4) -------------------------------
#-- grep "yopB" Yersinia_enterocolitica_YE1.gtf
grep "yopB" Yersinia_enterocolitica_YE1.gtf
NZ_CP016946.1 RefSeq gene 73029 73029 . + . gene_id "BFS78_RS21560"; transcript_id ""; gbkey "Gene"; gene "yopB"; gene_biotype "protein_coding"; locus_tag "BFS78_RS21560"; old_locus_tag "BFS78_21560"; part "1";
NZ_CP016946.1 RefSeq gene 1 1205 . + . gene_id "BFS78_RS21560"; transcript_id ""; gbkey "Gene"; gene "yopB"; gene_biotype "protein_coding"; locus_tag "BFS78_RS21560"; old_locus_tag "BFS78_21560"; part "2";
#-- grep "yopB" Yersinia_enterocolitica_YE3.gtf
NZ_CP016943.1 RefSeq gene 72880 73026 . + . gene_id "BED35_RS00480"; transcript_id ""; gbkey "Gene"; gene "yopB"; gene_biotype "pseudogene"; locus_tag "BED35_RS00480"; old_locus_tag "BED35_00480"; part "1"; pseudo "true";
NZ_CP016943.1 RefSeq gene 1 1058 . + . gene_id "BED35_RS00480"; transcript_id ""; gbkey "Gene"; gene "yopB"; gene_biotype "pseudogene"; locus_tag "BED35_RS00480"; old_locus_tag "BED35_00480"; part "2"; pseudo "true";
grep "yopB" Yersinia_enterocolitica_YE5.gtf
NZ_CP016939.1 RefSeq gene 73034 73034 . + . gene_id "BED32_RS00010"; transcript_id ""; gbkey "Gene"; gene "yopB"; gene_biotype "protein_coding"; locus_tag "BED32_RS00010"; old_locus_tag "BED32_00010"; part "1";
NZ_CP016939.1 RefSeq gene 1 1205 . + . gene_id "BED32_RS00010"; transcript_id ""; gbkey "Gene"; gene "yopB"; gene_biotype "protein_coding"; locus_tag "BED32_RS00010"; old_locus_tag "BED32_00010"; part "2";
#-- grep "yopB" Yersinia_pestis_Harbin_35_bis.gtf
NZ_CP009703.1 RefSeq gene 18869 20075 . + . gene_id "CH55_RS00745"; transcript_id ""; gbkey "Gene"; gene "yopB"; gene_biotype "pseudogene"; locus_tag "CH55_RS00745"; old_locus_tag "CH55_4304"; pseudo "true";
#under selected_fna_files
samtools faidx Yersinia_enterocolitica_YE1.fna NZ_CP016946.1:73029-73029 > temp.fna
samtools faidx Yersinia_enterocolitica_YE1.fna NZ_CP016946.1:1-1205 >> temp.fna
samtools faidx Yersinia_enterocolitica_YE3.fna NZ_CP016943.1:72880-73026 > temp.fna
samtools faidx Yersinia_enterocolitica_YE3.fna NZ_CP016943.1:1-1058 >> temp.fna
samtools faidx Yersinia_enterocolitica_YE5.fna NZ_CP016939.1:73034-73034 > temp.fna
samtools faidx Yersinia_enterocolitica_YE5.fna NZ_CP016939.1:1-1205 >> temp.fna
samtools faidx Yersinia_pestis_Harbin_35_bis.fna NZ_CP009703.1:18869-20075 > temp.fna
Yersinia_enterocolitica_YE1 ATGAGTGCGTTGATAACCCATGATCGCTCAACGCCAGTAACTGGAAGTCTAGTTCCCTACATCGAGACACCAGCGCCCGCCCCCCTTCAGACCCAACAAGTCGCGGGAGAACTGAAGGATAAAAATGGCGGGGTGAGTTCTCAGGGCGTGCAGCTCCCTGCACCACTAGCAGTGGTTGCCAGCCAAGTCACTGAAGGACAACAGCAAGAAATCACTAAATTATTGGAGTCGGTCACCCGCGGCACGGCAGGATCTCAACTGATATCAAATTATGTTTCAGTGCTAACGAATTTTACGCTCGCTTCACCTGATACATTTGAGATTGAGTTAGGTAAGCTAGTTTCTAATTTAGAAGAAGTACGCAAAGACATAAAAATCGCTGATATTCAGCGTCTTCATGAACAAAACATGAAGAAAATTGAAGAGAATCAAGAGAAAATCAAAGAAACAGAAGAGAATGCCAAGCAAGTCAAGAAATCCGGCATGGCATCAAAGATTTTTGGCTGGCTCAGCGCCATAGCCTCAGTGGTTATCGGTGCCATCATGGTGGCCTCAGGGGTAGGAGCCGTTGCCGGTGCAATGATGATTGCCTCAGGCGTAATTGGGATGGCGAATATGGCTGTGAAACAAGCGGCGGAAGATGGCCTGATATCCCAAGAGGCAATGCAAGTATTAGGGCCGATACTCACTGCGATTGAAGTCGCATTGACTGTAGTTTCAACCGTAATGACCTTTGGCGGTTCGGCACTAAAATGCCTGGCTGATATTGGCGCAAAACTCGGTGCTAACACCGCAAGTCTTGCTGCTAAAGGAGCCGAGTTTTCAGCCAAAGTTGCCCAAATTTCGACAGGCATATCAAACACTGTCGGGAGTGCAGTGACTAAATTAGGGGGCAGTTTTGGTAGTTTAACAATGAGCCATGTAATCCGTACAGGATCACAGGCAACACAAGTCGCCGTTGGTGTGGGCAGCGGAATAACTCAGACCATCAATAATAAAAAACAAGCTGATTTACAACATAATAACGCTGATTTGGCCTTGAACAAGGCAGACATGGCAGCGTTACAAAGTATTATTGACCGACTCAAAGAAGAGTTATCCCATTTGTCAGAGTCACATCAACAAGTGATGGAACTGATTTTCCAGATGATTAATGCAAAAGGTGACATGCTGCATAATTTGGCCGGCAGACCCCATACTGTTTAA
Yersinia_enterocolitica_YE3 ATGAGTGCGTTGATAACCCATGATCGCTCAACGCCAGTAACTGGAAGTCTAGTTCCCTACATCGAGACACCAGCGCCCGCCCCCTTCAGACCCAACAAGTCGCGGGAGAACTGAAGGATAAAAATGGCGGGGTGAGTTCTCAGGGCGTGCAGCTCCCTGCACCACTAGCAGTGGTTGCCAGCCAAGTCACTGAAGGACAACAGCAAGAAATCACTAAATTATTGGAGTCGGTCACCCGCGGCACGGCAGGATCTCAACTGATATCAAATTATGTTTCAGTGCTAACGAATTTTACGCTCGCTTCACCTGATACATTTGAGATTGAGTTAGGTAAGCTAGTTTCTAATTTAGAAGAAGTACGCAAAGACATAAAAATCGCTGATATTCAGCGTCTTCATGAACAAAACATGAAGAAAATTGAAGAGAATCAAGAGAAAATCAAAGAAACAGAAGAGAATGCCAAGCAAGTCAAGAAATCCGGCATGGCATCAAAGATTTTTGGCTGGCTCAGCGCCATAGCCTCAGTGGTTATCGGTGCCATCATGGTGGCCTCAGGGGTAGGAGCCGTTGCCGGTGCAATGATGATTGCCTCAGGCGTAATTGGGATGGCGAATATGGCTGTGAAACAAGCGGCGGAAGATGGCCTGATATCCCAAGAGGCAATGCAAGTATTAGGGCCGATACTCACTGCGATTGAAGTCGCATTGACTGTAGTTTCAACCGTAATGACCTTTGGCGGTTCGGCACTAAAATGCCTGGCTGATATTGGCGCAAAACTCGGTGCTAACACCGCAAGTCTTGCTGCTAAAGGAGCCGAGTTTTCAGCCAAAGTTGCCCAAATTTCGACAGGCATATCAAACACTGTCGGGAGTGCAGTGACTAAATTAGGGGGCAGTTTTGGTAGTTTAACAATGAGCCATGTAATCCGTACAGGATCACAGGCAACACAAGTCGCCGTTGGTGTGGGCAGCGGAATAACTCAGACCATCAATAATAAAAAACAAGCTGATTTACAACATAATAACGCTGATTTGGCCTTGAACAAGGCAGACATGGCAGCGTTACAAAGTATTATTGACCGACTCAAAGAAGAGTTATCCCATTTGTCAGAGTCACATCAACAAGTGATGGAACTGATTTTCCAGATGATTAATGCAAAAGGTGACATGCTGCATAATTTGGCCGGCAGACCCCATACTGTTTAA
Yersinia_enterocolitica_YE5 ATGAGTGCGTTGATAACCCATGATCGCTCAACGCCAGTAACTGGAAGTCTAGTTCCCTACATCGAGACACCAGCGCCCGCCCCCCTTCAGACCCAACAAGTCGCGGGAGAACTGAAGGATAAAAATGGCGGGGTGAGTTCTCAGGGCGTGCAGCTCCCTGCACCACTAGCAGTGGTTGCCAGCCAAGTCACTGAAGGACAACAGCAAGAAATCACTAAATTATTGGAGTCGGTCACCCGCGGCACGGCAGGATCTCAACTGATATCAAATTATGTTTCAGTGCTAACGAATTTTACGCTCGCTTCACCTGATACATTTGAGATTGAGTTAGGTAAGCTAGTTTCTAATTTAGAAGAAGTACGCAAAGACATAAAAATCGCTGATATTCAGCGTCTTCATGAACAAAACATGAAGAAAATTGAAGAGAATCAAGAGAAAATCAAAGAAACAGAAGAGAATGCCAAGCAAGTCAAGAAATCCGGCATGGCATCAAAGATTTTTGGCTGGCTCAGCGCCATAGCCTCAGTGGTTATCGGTGCCATCATGGTGGCCTCAGGGGTAGGAGCCGTTGCCGGTGCAATGATGATTGCCTCAGGCGTAATTGGGATGGCGAATATGGCTGTGAAACAAGCGGCGGAAGATGGCCTGATATCCCAAGAGGCAATGCAAGTATTAGGGCCGATACTCACTGCGATTGAAGTCGCATTGACTGTAGTTTCAACCGTAATGACCTTTGGCGGTTCGGCACTAAAATGCCTGGCTGATATTGGCGCAAAACTCGGTGCTAACACCGCAAGTCTTGCTGCTAAAGGAGCCGAGTTTTCAGCCAAAGTTGCCCAAATTTCGACAGGCATATCAAACACTGTCGGGAGTGCAGTGACTAAATTAGGGGGCAGTTTTGGTAGTTTAACAATGAGCCATGTAATCCGTACAGGATCACAGGCAACACAAGTCGCCGTTGGTGTGGGCAGCGGAATAACTCAGACCATCAATAATAAAAAACAAGCTGATTTACAACATAATAACGCTGATTTGGCCTTGAACAAGGCAGACATGGCAGCGTTACAAAGTATTATTGACCGACTCAAAGAAGAGTTATCCCATTTGTCAGAGTCACATCAACAAGTGATGGAACTGATTTTCCAGATGATTAATGCAAAAGGTGACATGCTGCATAATTTGGCCGGCAGACCCCATACTGTTTAA
Yersinia_pestis_Harbin_35_bis ATGAGTGCGTTGATAACCCATGACCGCTCAACGCCAGTAACTGGAAGTCTACTTCCCTACGTCGAGACACCAGCGCCCGCCCCCCCTTCAGACCCAACAAGTCGCGGGAGAACTGAAGGATAAAAATGGCGGGGTGAGTTCTCAGGGCGTACAGCTCCCTGCACCACTAGCAGTGGTTGCCAGCCAAGTTACTGAAGGACAACAGCAAGAAGTCACTAAATTATTGGAGTCGGTCACCCGCGGCGCGGCAGGATCTCAACTGATATCAAATTATGTTTCAGTGCTAACGAAGTTTACGCTTGCTTCACCTGATACATTTGAGATTGAGTTAGGTAAGCTAGTTTCTAATTTAGAAGAAGTACGCAAAGACATAAAAATCGCTGATATTCAGCGTCTTCATGAACAAAACATGAAGAAAATTGAAGAGAATCAAGAGAAAATCAAAGAAACAGAAGAGAATGCCAAGCAAGTCAAGAAATCCGGCATCGCATCAAAGATTTTTGGCTGGCTCAGCGCCATAGCCTCAGTGATTGTCGGTGCCATCATGGTGGCCTCAGGGGTAGGAGCCGTTGCCGGTGCAATGATGGTTGCCTCAGGCGTAATTGGGATGGCGAATATGGCAGTGAAACAAGCGGCGGAAGATGGCCTGATATCCCAAGAGGCAATGAAAATATTAGGGCCGATACTCACTGCGATTGAAGTCGCATTGACTGTAGTTTCAACCGTAATGACCTTTGGCGGTTCGGCACTAAAATGCCTGGCTAATATTGGCGCAAAACTCGGTGCTAACACCGCAAGTCTTGTGGCTAAAGGAGCCGAGTTTTCGGCCAAAGTTGCCCAAATTTCGACAGGCATATCAAACACTGTCGGGAGTGCAGTGACTAAATTAGGGGGCAGTTTTGCTGGTTTAACAATGAGCCATGCAATCCGTACAGGATCACAGGCAACACAAGTCGCCGTTGGTGTGGGCAGCGGAATAACTCAGACCATCAATAATAAAAAGCAAGCTGATTTACAACATAATAACGCTGATTTGGCCTTGAACAAGGCAGACATGGCAGCGTTACAAAGTATTATTGACCGACTCAAAGAAGAGTTATCCCATTTGTCAGAGTCACATCAACAAGTGATGGAACTGATTTTCCAGATGATTAATGCAAAAGGTGACATGCTGCATAATTTGGCCGGCAGACCCCATACTGTTTAA
#------------------------------- yopT (+9) -------------------------------
#grep "yopT" selected_gtf_files/Yersinia_pestis_1412.gtf
NZ_CP006780.1 RefSeq gene 43360 44327 . + . gene_id "M479_RS22185"; transcript_id ""; gbkey "Gene"; gene "yopT"; gene_biotype "pseudogene"; locus_tag "M479_RS22185"; old_locus_tag "M479_4302"; pseudo "true";
#grep "yopT" selected_gtf_files/Yersinia_pestis_1413.gtf
NZ_CP006761.1 RefSeq gene 60310 61277 . + . gene_id "M480_RS22170"; transcript_id ""; gbkey "Gene"; gene "yopT"; gene_biotype "pseudogene"; locus_tag "M480_RS22170"; old_locus_tag "M480_4319"; pseudo "true";
#grep "yopT" selected_gtf_files/Yersinia_pestis_1522.gtf
NZ_CP006757.1 RefSeq gene 61673 62640 . - . gene_id "M481_RS22190"; transcript_id ""; gbkey "Gene"; gene "yopT"; gene_biotype "pseudogene"; locus_tag "M481_RS22190"; old_locus_tag "M481_4325"; pseudo "true";
samtools faidx Yersinia_pestis_1412.fna NZ_CP006780.1:43360-44327 > temp.fna
sed -i -e ':a;N;$!ba;s/\n//g' -e 's/:/\t/g' temp.fna
Yersinia_pestis_1412 ATGAACAGTATTCACGGACACTACCATATTCAACTATCGAATTATTCTGCCGGTGAAAACCTTCAATCAGTACCCTCACCGAAGGGGTGATTGGCGCACACCGAGTGAAAGTGGAAACAGCACTGTCACACTCAAACCTGCAGAAAAAGTTATCAGCCACCATAAAACATAACCAGTCAGGCCGTTCTATGCTGGATAGAAAGTTGACCAGCGACGGCAAAGCTAACCAACGCAGCAGCTTTACCTTCAGTATGATTATGTATCGCATGATACATTTTGTACTCAGCACTCGTGTGCCCGCGGTGAGAGAGTCTGTTGCAAATTACGGAGGTAACATCAATTTCAAGTTTGCTCAGACCAAAGGGGCTTTTCTTCATAAAATAATAAAACATTCAGACACTGCTAGCGGTGTCTGTGAGGCTTTATGTGCACATTGGATCAGGAACCATGCACAAGGCCAAAGCTTATTTGACCAGCTCTATGTTGGCGGGCGTAAGGGGAAATTCCAGATCGATACACTTTACTCAATTAAACAGTTGCAAATAGATGGTTGTAAAGCAGACGTTGATCAAGATGAGGTAACACTAGATTGGTTCAAGAAAAATGGCATATCAGAACGTATGATTGAACGGCATTGCTTACTGCGTCCAGTTGATGTTACTGGTACGACGGAATCAGAAGGGCTGGATCAATTATTAAACGCTATCCTTGATACTCATGGGATAGGTTACGGTTATAAAAAAATACATCTCTCAGGCCAAATGTCAGCCCACGCCATAGCGGCGTATGTCAACGAAAAGAGTGGTGTTACTTTCTTCGATCCCAATTTCGGTGAATTCCACTTTTCTGATAAGGAAAAGTTCCGCAAATGGTTTACTAACTCATTCTGGGGTAATTCTATGTATCATTATCCTCTGGGGGTGGGGCAGCGTTTTAGAGTGTTAACATTTGACTCCAAGGAGGTTTAA
samtools faidx Yersinia_pestis_1413.fna NZ_CP006761.1:60310-61277 > temp.fna
sed -i -e ':a;N;$!ba;s/\n//g' -e 's/:/\t/g' temp.fna
Yersinia_pestis_1413 ATGAACAGTATTCACGGACACTACCATATTCAACTATCGAATTATTCTGCCGGTGAAAACCTTCAATCAGTACCCTCACCGAAGGGGTGATTGGCGCACACCGAGTGAAAGTGGAAACAGCACTGTCACACTCAAACCTGCAGAAAAAGTTATCAGCCACCATAAAACATAACCAGTCAGGCCGTTCTATGCTGGATAGAAAGTTGACCAGCGACGGCAAAGCTAACCAACGCAGCAGCTTTACCTTCAGTATGATTATGTATCGCATGATACATTTTGTACTCAGCACTCGTGTGCCCGCGGTGAGAGAGTCTGTTGCAAATTACGGAGGTAACATCAATTTCAAGTTTGCTCAGACCAAAGGGGCTTTTCTTCATAAAATAATAAAACATTCAGACACTGCTAGCGGTGTCTGTGAGGCTTTATGTGCACATTGGATCAGGAACCATGCACAAGGCCAAAGCTTATTTGACCAGCTCTATGTTGGCGGGCGTAAGGGGAAATTCCAGATCGATACACTTTACTCAATTAAACAGTTGCAAATAGATGGTTGTAAAGCAGACGTTGATCAAGATGAGGTAACACTAGATTGGTTCAAGAAAAATGGCATATCAGAACGTATGATTGAACGGCATTGCTTACTGCGTCCAGTTGATGTTACTGGTACGACGGAATCAGAAGGGCTGGATCAATTATTAAACGCTATCCTTGATACTCATGGGATAGGTTACGGTTATAAAAAAATACATCTCTCAGGCCAAATGTCAGCCCACGCCATAGCGGCGTATGTCAACGAAAAGAGTGGTGTTACTTTCTTCGATCCCAATTTCGGTGAATTCCACTTTTCTGATAAGGAAAAGTTCCGCAAATGGTTTACTAACTCATTCTGGGGTAATTCTATGTATCATTATCCTCTGGGGGTGGGGCAGCGTTTTAGAGTGTTAACATTTGACTCCAAGGAGGTTTAA
samtools faidx Yersinia_pestis_1522.fna NZ_CP006757.1:61673-62640 > temp.fna
revseq
sed -e ':a;N;$!ba;s/\n//g' -e 's/:/\t/g' 61673-62640.rev > temp_.fna
Yersinia_pestis_1522 ATGAACAGTATTCACGGACACTACCATATTCAACTATCGAATTATTCTGCCGGTGAAAACCTTCAATCAGTACCCTCACCGAAGGGGTGATTGGCGCACACCGAGTGAAAGTGGAAACAGCACTGTCACACTCAAACCTGCAGAAAAAGTTATCAGCCACCATAAAACATAACCAGTCAGGCCGTTCTATGCTGGATAGAAAGTTGACCAGCGACGGCAAAGCTAACCAACGCAGCAGCTTTACCTTCAGTATGATTATGTATCGCATGATACATTTTGTACTCAGCACTCGTGTGCCCGCGGTGAGAGAGTCTGTTGCAAATTACGGAGGTAACATCAATTTCAAGTTTGCTCAGACCAAAGGGGCTTTTCTTCATAAAATAATAAAACATTCAGACACTGCTAGCGGTGTCTGTGAGGCTTTATGTGCACATTGGATCAGGAACCATGCACAAGGCCAAAGCTTATTTGACCAGCTCTATGTTGGCGGGCGTAAGGGGAAATTCCAGATCGATACACTTTACTCAATTAAACAGTTGCAAATAGATGGTTGTAAAGCAGACGTTGATCAAGATGAGGTAACACTAGATTGGTTCAAGAAAAATGGCATATCAGAACGTATGATTGAACGGCATTGCTTACTGCGTCCAGTTGATGTTACTGGTACGACGGAATCAGAAGGGCTGGATCAATTATTAAACGCTATCCTTGATACTCATGGGATAGGTTACGGTTATAAAAAAATACATCTCTCAGGCCAAATGTCAGCCCACGCCATAGCGGCGTATGTCAACGAAAAGAGTGGTGTTACTTTCTTCGATCCCAATTTCGGTGAATTCCACTTTTCTGATAAGGAAAAGTTCCGCAAATGGTTTACTAACTCATTCTGGGGTAATTCTATGTATCATTATCCTCTGGGGGTGGGGCAGCGTTTTAGAGTGTTAACATTTGACTCCAAGGAGGTTTAA
#grep "yopT" selected_gtf_files/Yersinia_pestis_3067.gtf
NZ_CP006753.1 RefSeq gene 43515 44482 . + . gene_id "M482_RS22205"; transcript_id ""; gbkey "Gene"; gene "yopT"; gene_biotype "pseudogene"; locus_tag "M482_RS22205"; old_locus_tag "M482_4297"; pseudo "true";
samtools faidx Yersinia_pestis_3067.fna NZ_CP006753.1:43515-44482 > temp.fna
sed -i -e ':a;N;$!ba;s/\n//g' -e 's/:/\t/g' temp.fna
Yersinia_pestis_3067 ATGAACAGTATTCACGGACACTACCATATTCAACTATCGAATTATTCTGCCGGTGAAAACCTTCAATCAGTACCCTCACCGAAGGGGTGATTGGCGCACACCGAGTGAAAGTGGAAACAGCACTGTCACACTCAAACCTGCAGAAAAAGTTATCAGCCACCATAAAACATAACCAGTCAGGCCGTTCTATGCTGGATAGAAAGTTGACCAGCGACGGCAAAGCTAACCAACGCAGCAGCTTTACCTTCAGTATGATTATGTATCGCATGATACATTTTGTACTCAGCACTCGTGTGCCCGCGGTGAGAGAGTCTGTTGCAAATTACGGAGGTAACATCAATTTCAAGTTTGCTCAGACCAAAGGGGCTTTTCTTCATAAAATAATAAAACATTCAGACACTGCTAGCGGTGTCTGTGAGGCTTTATGTGCACATTGGATCAGGAACCATGCACAAGGCCAAAGCTTATTTGACCAGCTCTATGTTGGCGGGCGTAAGGGGAAATTCCAGATCGATACACTTTACTCAATTAAACAGTTGCAAATAGATGGTTGTAAAGCAGACGTTGATCAAGATGAGGTAACACTAGATTGGTTCAAGAAAAATGGCATATCAGAACGTATGATTGAACGGCATTGCTTACTGCGTCCAGTTGATGTTACTGGTACGACGGAATCAGAAGGGCTGGATCAATTATTAAACGCTATCCTTGATACTCATGGGATAGGTTACGGTTATAAAAAAATACATCTCTCAGGCCAAATGTCAGCCCACGCCATAGCGGCGTATGTCAACGAAAAGAGTGGTGTTACTTTCTTCGATCCCAATTTCGGTGAATTCCACTTTTCTGATAAGGAAAAGTTCCGCAAATGGTTTACTAACTCATTCTGGGGTAATTCTATGTATCATTATCCTCTGGGGGTGGGGCAGCGTTTTAGAGTGTTAACATTTGACTCCAAGGAGGTTTAA
#grep "yopT" selected_gtf_files/Yersinia_pestis_3770.gtf
NZ_CP006750.1 RefSeq gene 18136 19103 . + . gene_id "M483_RS22135"; transcript_id ""; gbkey "Gene"; gene "yopT"; gene_biotype "pseudogene"; locus_tag "M483_RS22135"; old_locus_tag "M483_4264"; pseudo "true";
samtools faidx Yersinia_pestis_3770.fna NZ_CP006750.1:18136-19103 > temp.fna
sed -i -e ':a;N;$!ba;s/\n//g' -e 's/:/\t/g' temp.fna
Yersinia_pestis_3770 ATGAACAGTATTCACGGACACTACCATATTCAACTATCGAATTATTCTGCCGGTGAAAACCTTCAATCAGTACCCTCACCGAAGGGGTGATTGGCGCACACCGAGTGAAAGTGGAAACAGCACTGTCACACTCAAACCTGCAGAAAAAGTTATCAGCCACCATAAAACATAACCAGTCAGGCCGTTCTATGCTGGATAGAAAGTTGACCAGCGACGGCAAAGCTAACCAACGCAGCAGCTTTACCTTCAGTATGATTATGTATCGCATGATACATTTTGTACTCAGCACTCGTGTGCCCGCGGTGAGAGAGTCTGTTGCAAATTACGGAGGTAACATCAATTTCAAGTTTGCTCAGACCAAAGGGGCTTTTCTTCATAAAATAATAAAACATTCAGACACTGCTAGCGGTGTCTGTGAGGCTTTATGTGCACATTGGATCAGGAACCATGCACAAGGCCAAAGCTTATTTGACCAGCTCTATGTTGGCGGGCGTAAGGGGAAATTCCAGATCGATACACTTTACTCAATTAAACAGTTGCAAATAGATGGTTGTAAAGCAGACGTTGATCAAGATGAGGTAACACTAGATTGGTTCAAGAAAAATGGCATATCAGAACGTATGATTGAACGGCATTGCTTACTGCGTCCAGTTGATGTTACTGGTACGACGGAATCAGAAGGGCTGGATCAATTATTAAACGCTATCCTTGATACTCATGGGATAGGTTACGGTTATAAAAAAATACATCTCTCAGGCCAAATGTCAGCCCACGCCATAGCGGCGTATGTCAACGAAAAGAGTGGTGTTACTTTCTTCGATCCCAATTTCGGTGAATTCCACTTTTCTGATAAGGAAAAGTTCCGCAAATGGTTTACTAACTCATTCTGGGGTAATTCTATGTATCATTATCCTCTGGGGGTGGGGCAGCGTTTTAGAGTGTTAACATTTGACTCCAAGGAGGTTTAA
#grep "yopT" selected_gtf_files/Yersinia_pestis_8787.gtf
NZ_CP006747.1 RefSeq gene 55293 56260 . + . gene_id "M484_RS21915"; transcript_id ""; gbkey "Gene"; gene "yopT"; gene_biotype "pseudogene"; locus_tag "M484_RS21915"; old_locus_tag "M484_4255"; pseudo "true";
samtools faidx Yersinia_pestis_8787.fna NZ_CP006747.1:55293-56260 > temp.fna
sed -i -e ':a;N;$!ba;s/\n//g' -e 's/:/\t/g' temp.fna
Yersinia_pestis_8787 ATGAACAGTATTCACGGACACTACCATATTCAACTATCGAATTATTCTGCCGGTGAAAACCTTCAATCAGTACCCTCACCGAAGGGGTGATTGGCGCACACCGAGTGAAAGTGGAAACAGCACTGTCACACTCAAACCTGCAGAAAAAGTTATCAGCCACCATAAAACATAACCAGTCAGGCCGTTCTATGCTGGATAGAAAGTTGACCAGCGACGGCAAAGCTAACCAACGCAGCAGCTTTACCTTCAGTATGATTATGTATCGCATGATACATTTTGTACTCAGCACTCGTGTGCCCGCGGTGAGAGAGTCTGTTGCAAATTACGGAGGTAACATCAATTTCAAGTTTGCTCAGACCAAAGGGGCTTTTCTTCATAAAATAATAAAACATTCAGACACTGCTAGCGGTGTCTGTGAGGCTTTATGTGCACATTGGATCAGGAACCATGCACAAGGCCAAAGCTTATTTGACCAGCTCTATGTTGGCGGGCGTAAGGGGAAATTCCAGATCGATACACTTTACTCAATTAAACAGTTGCAAATAGATGGTTGTAAAGCAGACGTTGATCAAGATGAGGTAACACTAGATTGGTTCAAGAAAAATGGCATATCAGAACGTATGATTGAACGGCATTGCTTACTGCGTCCAGTTGATGTTACTGGTACGACGGAATCAGAAGGGCTGGATCAATTATTAAACGCTATCCTTGATACTCATGGGATAGGTTACGGTTATAAAAAAATACATCTCTCAGGCCAAATGTCAGCCCACGCCATAGCGGCGTATGTCAACGAAAAGAGTGGTGTTACTTTCTTCGATCCCAATTTCGGTGAATTCCACTTTTCTGATAAGGAAAAGTTCCGCAAATGGTTTACTAACTCATTCTGGGGTAATTCTATGTATCATTATCCTCTGGGGGTGGGGCAGCGTTTTAGAGTGTTAACATTTGACTCCAAGGAGGTTTAA
#grep "yopT" selected_gtf_files/Yersinia_pestis_Pestoides_F.gtf
NC_009377.1 RefSeq gene 48563 49530 . - . gene_id "YPDSF_RS23435"; transcript_id ""; gbkey "Gene"; gene "yopT"; gene_biotype "pseudogene"; locus_tag "YPDSF_RS23435"; old_locus_tag "YPDSF_4001"; pseudo "true";
samtools faidx Yersinia_pestis_Pestoides_F.fna NC_009377.1:48563-49530 > temp.fna
revseq
sed -e ':a;N;$!ba;s/\n//g' -e 's/:/\t/g' 48563-49530.rev > temp_.fna
Yersinia_pestis_Pestoides_F ATGAACAGTATTCACGGACACTACCATATTCAACTATCGAATTATTCTGCCGGTGAAAACCTTCAATCAGTACCCTCACCGAAGGGGTGATTGGCGCACACCGAGTGAAAGTGGAAACAGCACTGTCACACTCAAACCTGCAGAAAAAGTTATCAGCCACCATAAAACATAACCAGTCAGGCCGTTCTATGCTGGATAGAAAGTTGACCAGCGACGGCAAAGCTAACCAACGCAGCAGCTTTACCTTCAGTATGATTATGTATCGCATGATACATTTTGTACTCAGCACTCGTGTGCCCGCGGTGAGAGAGTCTGTTGCAAATTACGGAGGTAACATCAATTTCAAGTTTGCTCAGACCAAAGGGGCTTTTCTTCATAAAATAATAAAACATTCAGACACTGCTAGCGGTGTCTGTGAGGCTTTATGTGCACATTGGATCAGGAACCATGCACAAGGCCAAAGCTTATTTGACCAGCTCTATGTTGGCGGGCGTAAGGGGAAATTCCAGATCGATACACTTTACTCAATTAAACAGTTGCAAATAGATGGTTGTAAAGCAGACGTTGATCAAGATGAGGTAACACTAGATTGGTTCAAGAAAAATGGCATATCAGAACGTATGATTGAACGGCATTGCTTACTGCGTCCAGTTGATGTTACTGGTACGACGGAATCAGAAGGGCTGGATCAATTATTAAACGCTATCCTTGATACTCATGGGATAGGTTACGGTTATAAAAAAATACATCTCTCAGGCCAAATGTCAGCCCACGCCATAGCGGCGTATGTCAACGAAAAGAGTGGTGTTACTTTCTTCGATCCCAATTTCGGTGAATTCCACTTTTCTGATAAGGAAAAGTTCCGCAAATGGTTTACTAACTCATTCTGGGGTAATTCTATGTATCATTATCCTCTGGGGGTGGGGCAGCGTTTTAGAGTGTTAACATTTGACTCCAAGGAGGTTTAA
#grep "yopT" selected_gtf_files/Yersinia_pestis_Pestoides_F_bis.gtf
NZ_CP009713.1 RefSeq gene 53246 54213 . - . gene_id "BZ18_RS22165"; transcript_id ""; gbkey "Gene"; gene "yopT"; gene_biotype "pseudogene"; locus_tag "BZ18_RS22165"; old_locus_tag "BZ18_4298"; pseudo "true";
samtools faidx Yersinia_pestis_Pestoides_F_bis.fna NZ_CP009713.1:53246-54213 > temp.fna
revseq
sed -e ':a;N;$!ba;s/\n//g' -e 's/:/\t/g' 53246-54213.rev > temp_.fna
Yersinia_pestis_Pestoides_F_bis ATGAACAGTATTCACGGACACTACCATATTCAACTATCGAATTATTCTGCCGGTGAAAACCTTCAATCAGTACCCTCACCGAAGGGGTGATTGGCGCACACCGAGTGAAAGTGGAAACAGCACTGTCACACTCAAACCTGCAGAAAAAGTTATCAGCCACCATAAAACATAACCAGTCAGGCCGTTCTATGCTGGATAGAAAGTTGACCAGCGACGGCAAAGCTAACCAACGCAGCAGCTTTACCTTCAGTATGATTATGTATCGCATGATACATTTTGTACTCAGCACTCGTGTGCCCGCGGTGAGAGAGTCTGTTGCAAATTACGGAGGTAACATCAATTTCAAGTTTGCTCAGACCAAAGGGGCTTTTCTTCATAAAATAATAAAACATTCAGACACTGCTAGCGGTGTCTGTGAGGCTTTATGTGCACATTGGATCAGGAACCATGCACAAGGCCAAAGCTTATTTGACCAGCTCTATGTTGGCGGGCGTAAGGGGAAATTCCAGATCGATACACTTTACTCAATTAAACAGTTGCAAATAGATGGTTGTAAAGCAGACGTTGATCAAGATGAGGTAACACTAGATTGGTTCAAGAAAAATGGCATATCAGAACGTATGATTGAACGGCATTGCTTACTGCGTCCAGTTGATGTTACTGGTACGACGGAATCAGAAGGGCTGGATCAATTATTAAACGCTATCCTTGATACTCATGGGATAGGTTACGGTTATAAAAAAATACATCTCTCAGGCCAAATGTCAGCCCACGCCATAGCGGCGTATGTCAACGAAAAGAGTGGTGTTACTTTCTTCGATCCCAATTTCGGTGAATTCCACTTTTCTGATAAGGAAAAGTTCCGCAAATGGTTTACTAACTCATTCTGGGGTAATTCTATGTATCATTATCCTCTGGGGGTGGGGCAGCGTTTTAGAGTGTTAACATTTGACTCCAAGGAGGTTTAA
#grep "yopT" selected_gtf_files/Yersinia_pestis_Pestoides_G.gtf
NZ_CP010246.1 RefSeq gene 1551 2518 . + . gene_id "CH43_RS22165"; transcript_id ""; gbkey "Gene"; gene "yopT"; gene_biotype "pseudogene"; locus_tag "CH43_RS22165"; old_locus_tag "CH43_4244"; pseudo "true";
samtools faidx Yersinia_pestis_Pestoides_G.fna NZ_CP010246.1:1551-2518 > temp.fna
sed -i -e ':a;N;$!ba;s/\n//g' -e 's/:/\t/g' temp.fna
Yersinia_pestis_Pestoides_G ATGAACAGTATTCACGGACACTACCATATTCAACTATCGAATTATTCTGCCGGTGAAAACCTTCAATCAGTACCCTCACCGAAGGGGTGATTGGCGCACACCGAGTGAAAGTGGAAACAGCACTGTCACACTCAAACCTGCAGAAAAAGTTATCAGCCACCATAAAACATAACCAGTCAGGCCGTTCTATGCTGGATAGAAAGTTGACCAGCGACGGCAAAGCTAACCAACGCAGCAGCTTTACCTTCAGTATGATTATGTATCGCATGATACATTTTGTACTCAGCACTCGTGTGCCCGCGGTGAGAGAGTCTGTTGCAAATTACGGAGGTAACATCAATTTCAAGTTTGCTCAGACCAAAGGGGCTTTTCTTCATAAAATAATAAAACATTCAGACACTGCTAGCGGTGTCTGTGAGGCTTTATGTGCACATTGGATCAGGAACCATGCACAAGGCCAAAGCTTATTTGACCAGCTCTATGTTGGCGGGCGTAAGGGGAAATTCCAGATCGATACACTTTACTCAATTAAACAGTTGCAAATAGATGGTTGTAAAGCAGACGTTGATCAAGATGAGGTAACACTAGATTGGTTCAAGAAAAATGGCATATCAGAACGTATGATTGAACGGCATTGCTTACTGCGTCCAGTTGATGTTACTGGTACGACGGAATCAGAAGGGCTGGATCAATTATTAAACGCTATCCTTGATACTCATGGGATAGGTTACGGTTATAAAAAAATACATCTCTCAGGCCAAATGTCAGCCCACGCCATAGCGGCGTATGTCAACGAAAAGAGTGGTGTTACTTTCTTCGATCCCAATTTCGGTGAATTCCACTTTTCTGATAAGGAAAAGTTCCGCAAATGGTTTACTAACTCATTCTGGGGTAATTCTATGTATCATTATCCTCTGGGGGTGGGGCAGCGTTTTAGAGTGTTAACATTTGACTCCAAGGAGGTTTAA
#------------------------------- yopE (+3) -------------------------------
#grep "yopE" selected_gtf_files/Yersinia_pestis_1522.gtf
NZ_CP006757.1 RefSeq gene 70902 71507 . - . gene_id "M481_RS24690"; transcript_id ""; gbkey "Gene"; gene "yopE"; gene_biotype "pseudogene"; locus_tag "M481_RS24690"; old_locus_tag "M481_4336"; part "2"; pseudo "true";
NZ_CP006757.1 RefSeq gene 1 53 . - . gene_id "M481_RS24690"; transcript_id ""; gbkey "Gene"; gene "yopE"; gene_biotype "pseudogene"; locus_tag "M481_RS24690"; old_locus_tag "M481_4336"; part "1"; pseudo "true";
samtools faidx Yersinia_pestis_1522.fna NZ_CP006757.1:70902-71507 > temp.fna
samtools faidx Yersinia_pestis_1522.fna NZ_CP006757.1:1-53 >> temp.fna
#delete the second ">****"
revseq
sed -e ':a;N;$!ba;s/\n//g' -e 's/:/\t/g' 70902-71507.rev > temp_.fna
Yersinia_pestis_1522 ATGAAAATATCATCATTTATTTCTACATCACTGCCCCTGCCGACATCTGTGTCGGATCTAGCAGCGTAGGAGAAATGTCTGGGCGCTCAGTCTCACAGCAAACAAGTGATCAATATGCAAACAATCTGGCCGGGCGCACTGAAAGCCCTCAGGGTTCCAGCTTAGCCAGCCGTATCATTGAGAGGTTATCATCAGTGGCCCACTCTGTGATTGGGTTTATCCAACGCATGTTCTCGGAGGGGAGCCATAAACCGGTGGTGACACCAGCACCCACACCTGCACAAATGCCAAGTCCTACGTCTTTCAGTGACAGTATCAAGCAACTTGCTGCTGAGACGCTGCCAAAATACATGCAGCAGTTGAATAGCTTGGATGCAGAGATGCTGCAGAAAAATCATGATCAGTTCGCTACGGGCAGCGGCCCTCTTCGTGGCAGTATCACTCAATGCCAAGGGCTGATGCAGTTTTGTGGTGGGGAATTGCAAGCTGAGGCCAGTGCCATCTTAAACACGCCTGTTTGTGGTATTCCCTTCTCGCAGTGGGGAACTATTGGTGGGGCGGCCAGCGCGTACGTCGCCAGTGGCGTTGATCTAACGCAGGCAGCAAATGAGATCAAAGGGCTGGCGCAACAGATGCAGAAATTACTGTCATTGATGTGA
#grep "yopE" selected_gtf_files/Yersinia_pestis_Nicholisk_41.gtf
NZ_CP009990.1 RefSeq gene 67916 68552 . + . gene_id "CH63_RS00620"; transcript_id ""; gbkey "Gene"; gene "yopE"; gene_biotype "protein_coding"; locus_tag "CH63_RS00620"; part "1";
NZ_CP009990.1 RefSeq gene 1 23 . + . gene_id "CH63_RS00620"; transcript_id ""; gbkey "Gene"; gene "yopE"; gene_biotype "protein_coding"; locus_tag "CH63_RS00620"; part "2";
samtools faidx Yersinia_pestis_Nicholisk_41.fna NZ_CP009990.1:67916-68552 > temp.fna
samtools faidx Yersinia_pestis_Nicholisk_41.fna NZ_CP009990.1:1-23 >> temp.fna
#delete the second ">****"
sed -i -e ':a;N;$!ba;s/\n//g' -e 's/:/\t/g' temp.fna
Yersinia_pestis_Nicholisk_41 ATGAAAATATCATCATTTATTTCTACATCACTGCCCCTGCCGACATCTGTGTCAGGATCTAGCAGCGTAGGAGAAATGTCTGGGCGCTCAGTCTCACAGCAAACAAGTGATCAATATGCAAACAATCTGGCCGGGCGCACTGAAAGCCCTCAGGGTTCCAGCTTAGCCAGCCGTATCATTGAGAGGTTATCATCAGTGGCCCACTCTGTGATTGGGTTTATCCAACGCATGTTCTCGGAGGGGAGCCATAAACCGGTGGTGACACCGGCACCCACACCTGCACAAATGCCAAGTCCTACGTCTTTCAGTGACAGTATCAAGCAACTTGCTGCTGAGACGCTGCCAAAATACATGCAGCAGTTGAATAGCTTGGATGCAGAGATGCTGCAGAAAAATCATGATCAGTTCGCTACGGGCAGCGGCCCTCTTCGTGGCAGTATCACTCAATGCCAAGGGCTGATGCAGTTTTGTGGTGGGGAATTGCAAGCTGAGGCCAGTGCCATCTTAAACACGCCTGTTTGTGGTATTCCCTTCTCGCAGTGGGGAACTATTGGTGGGGCGGCCAGCGCGTACGTCGCCAGTGGCGTTGATCTAACGCAGGCAGCAAATGAGATCAAAGGGCTGGCGCAACAGATGCAGAAATTACTGTCATTGATGTGA
#grep "yopE" selected_gtf_files/Yersinia_pseudotuberculosis_FDAARGOS_581.gtf
NZ_CP033712.1 RefSeq gene 69663 70035 . + . gene_id "EGX47_RS00005"; transcript_id ""; gbkey "Gene"; gene "yopE"; gene_biotype "protein_coding"; locus_tag "EGX47_RS00005"; old_locus_tag "EGX47_00005"; part "1";
NZ_CP033712.1 RefSeq gene 1 287 . + . gene_id "EGX47_RS00005"; transcript_id ""; gbkey "Gene"; gene "yopE"; gene_biotype "protein_coding"; locus_tag "EGX47_RS00005"; old_locus_tag "EGX47_00005"; part "2";
samtools faidx Yersinia_pseudotuberculosis_FDAARGOS_581.fna NZ_CP033712.1:69663-70035 > temp.fna
samtools faidx Yersinia_pseudotuberculosis_FDAARGOS_581.fna NZ_CP033712.1:1-287 >> temp.fna
#delete the second ">****"
sed -i -e ':a;N;$!ba;s/\n//g' -e 's/:/\t/g' temp.fna
Yersinia_pseudotuberculosis_FDAARGOS_581 ATGAAAATATCATCATTTATTTCTACATCACTGCCCCTGCCGACATCTGTGTCAGGATCTAGCAGCGTAGGAGAAATGTCTGGGCGCTCAGTCTCACAGCAAACAAGTGATCAATATGCAAACAATCTGGCCGGGCGCACTGAAAGCCCTCAGGGTTCCAGCTTAGCCAGCCGTATCATTGAGAGGTTATCATCAGTGGCCCACTCTGTGATTGGGTTTATCCAACGCATGTTCTCGGAGGGGAGCCATAAACCGGTGGTGACACCAGCACCCACACCTGCACAAATGCCAAGTCCTACGTCTTTCAGTGACAGTATCAAGCAACTTGCTGCTGAGACGCTGCCAAAATACATGCAGCAGTTGAATAGCTTGGATGCAGAGATGCTGCAGAAAAATCATGATCAGTTCGCTACGGGCAGCGGCCCTCTTCGTGGCAGTATCACTCAATGCCAAGGGCTGATGCAGTTTTGTGGTGGGGAATTGCAAGCTGAGGCCAGTGCCATCTTAAACACGCCTGTTTGTGGTATTCCCTTCTCGCAGTGGGGAACTATTGGTGGGGCGGCCAGCGCGTACGTCGCCAGTGGCGTTGATCTAACGCAGGCAGCAAATGAGATCAAAGGGCTGGCGCAACAGATGCAGAAATTACTGTCATTGATGTGA
#------------------------------- yopD (+2) -------------------------------
#grep "yopD" selected_gtf_files/Yersinia_enterocolitica_YE165.gtf
NZ_CP016933.1 RefSeq gene 74497 74497 . + . gene_id "BB936_RS22270"; transcript_id ""; gbkey "Gene"; gene "yopD"; gene_biotype "protein_coding"; locus_tag "BB936_RS22270"; old_locus_tag "BB936_22265"; part "1";
NZ_CP016933.1 RefSeq gene 1 920 . + . gene_id "BB936_RS22270"; transcript_id ""; gbkey "Gene"; gene "yopD"; gene_biotype "protein_coding"; locus_tag "BB936_RS22270"; old_locus_tag "BB936_22265"; part "2";
samtools faidx Yersinia_enterocolitica_YE165.fna NZ_CP016933.1:74497-74497 > temp.fna
samtools faidx Yersinia_enterocolitica_YE165.fna NZ_CP016933.1:1-920 >> temp.fna
#delete the second ">****"
sed -i -e ':a;N;$!ba;s/\n//g' -e 's/:/\t/g' temp.fna
Yersinia_enterocolitica_YE165 ATGACAATAAATATCAAGACAGACAGCCCAATTATCACGACCGGTTCACAGCTTGATGCCATCACTACAGAGACAGTCGGGCAAAGCGGTGAGGTTAAAAAAACAGAAGACACCCGTCATGAAGCACAAGCAATAAAGAGTAGCGAGGCAAGCTTATCTCGGTCACAGGTGCCTGAATTGATCAAACCGAGTCAGGGAATCAATGTTGCATTACTGAGTAAAAGCCAGGGAGATCTTAATGGTACTTTAAGTATCTTGTTGTTGCTGTTGGAACTGGCACGTAAAGCGCGAGAAATGGGTTTGCAACAAAGGGATATAGAAAATAAAGCTACTATTTCTGCCCAAAAGGAGCAGGTAGCGGAGATGGTCAGCGGTGCAAAACTGATGATCGCCATGGCGGTGGTGTCTGGCATCATGGCTGCTACTTCTACGGTTGCTAGTGCTTTTTCTATAGCGAAAGAGGTGAAAATAGTTAAACAGGAACAAATTCTAAACAGTAACATTGCCGGCCGTGATCAACTTATTGATACAAAAATGCAGCAAATGAGTAACGCTGGTGATAAAGCGGTAAGCAGAGAGGATATCGGGAGAATATGGAAACCAGAGCAGGTAGCGGATCAAAATAAGCTGGCATTATTGGATAAAGAATTCAGAATGACCGACTCAAAAGCCAATGCGTTTAATGCCGCAACGCAGCCGTTAGGACAAATGGCAAACAGTGCGATTCAAGTTCATCAAGGGTATTCTCAAGCCGAGGTCAAAGAAAAAGAAGTCAATGCAAGTATTGCTGCCAACGAGAAGCAAAAAGCCGAAGAGGCGATGAACTATAATGATAACTTTATGAAAGATGTCCTGCGCTTGATTGAACAATATGTTAGCAGTCATACTCACGCCATGAAAGCCGCTTTTGGTGTTGTCTGA
#grep "yopD" selected_gtf_files/Yersinia_pseudotuberculosis_IP32953_bis.gtf
NZ_CP009711.1 RefSeq gene 68202 68525 . + . gene_id "BZ17_RS00160"; transcript_id ""; db_xref "GeneID:66841050"; gbkey "Gene"; gene "yopD"; gene_biotype "protein_coding"; locus_tag "BZ17_RS00160"; part "1";
NZ_CP009711.1 RefSeq gene 1 597 . + . gene_id "BZ17_RS00160"; transcript_id ""; db_xref "GeneID:66841050"; gbkey "Gene"; gene "yopD"; gene_biotype "protein_coding"; locus_tag "BZ17_RS00160"; part "2";
samtools faidx Yersinia_pseudotuberculosis_IP32953_bis.fna NZ_CP009711.1:68202-68525 > temp.fna
samtools faidx Yersinia_pseudotuberculosis_IP32953_bis.fna NZ_CP009711.1:1-597 >> temp.fna
#delete the second ">****"
sed -i -e ':a;N;$!ba;s/\n//g' -e 's/:/\t/g' temp.fna
Yersinia_pseudotuberculosis_IP32953_bis ATGACAATAAATATCAAGACAGACAGCCCAATTATCACGACCGGTTCACAGCTTGATGCCATCACTACAGAGACAGTCAAGCAAAGCGGTGAGATTAAAAAAACAGAAGACACCCGTCATGAAGCACAAGCAATAAAGAGTAGCGAGGCAAGCTTATCTCGGTCACAGGTGCCAGAATTGATCAAACCGAGCCAGGGAATCAATGTTGCATTACTGAGTAAAAGCCAGGGTGATCTTAATGGTACTTTAAGTATCTTGTTGTTGCTGTTGGAACTGGCACGTAAAGCGCGAGAAATGGGTTTGCAACAAAGGGATATAGAAAATAAAGCTACTATTACTGCCCAAAAGGAGCAGGTAGCGGAGATGGTCAGCGGTGCAAAACTGATGATCGCCATGGCGGTGGTGTCTGGCATCATGGCTGCTACTTCTACGGTTGCTAGTGCTTTTTCTATAGCGAAAGAGGTGAAAATAGTTAAACAGGAACAAATTCTAAACAGTAATATTGCTGGCCGCGAACAACTTATTGATACAAAAATGCAGCAAATGAGTAACATTGGTGATAAAGCGGTAAGCAGAGAGGATATCGGGAGAATATGGAAACCAGAGCAGGTAGCGGATCAAAATAAGCTGGCATTATTGGATAAAGAATTCAGAATGACCGACTCAAAAGCCAATGCGTTTAATGCCGCAACGCAGCCGTTAGGACAAATGGCAAACAGTGCGATTCAAGTTCATCAAGGGTATTCTCAAGCCGAGGTCAAAGAGAAAGAAGTCAATGCAAGTATTGCTGCCAACGAGAAGCAAAAAGCCGAAGAGGCGATGAACTATAATGATAACTTTATGAAAGATGTCCTGCGCTTGATTGAACAATATGTTAGCAGTCATACTCACGCCATGAAAGCCGCTTTTGGTGTTGTCTGA
#------------------------------- yopM (+2) -------------------------------
#grep "yopM" selected_gtf_files/Yersinia_pestis_FDAARGOS_602.gtf
NZ_CP033695.1 RefSeq gene 69663 70174 . + . gene_id "EGX42_RS00660"; transcript_id ""; gbkey "Gene"; gene "yopM"; gene_biotype "protein_coding"; locus_tag "EGX42_RS00660"; old_locus_tag "EGX42_00655"; part "1";
NZ_CP033695.1 RefSeq gene 1 592 . + . gene_id "EGX42_RS00660"; transcript_id ""; gbkey "Gene"; gene "yopM"; gene_biotype "protein_coding"; locus_tag "EGX42_RS00660"; old_locus_tag "EGX42_00655"; part "2";
samtools faidx Yersinia_pestis_FDAARGOS_602.fna NZ_CP033695.1:69663-70174 > temp.fna
samtools faidx Yersinia_pestis_FDAARGOS_602.fna NZ_CP033695.1:1-592 >> temp.fna
#delete the second ">****"
sed -i -e ':a;N;$!ba;s/\n//g' -e 's/:/\t/g' temp.fna
Yersinia_pestis_FDAARGOS_602 ATGTTCATAAATCCAAGAAATGTATCTAATACTTTTTTGCAAGAACCATTACGTCATTCTTCTAATTTAACTGAGATGCCGGTTGAGGCAGAAAATGTTAAATCTAAGACTGAATATTATAATGCATGGTCGGAATGGGAACGAAATGCCCCTCCGGGGAATGGTGAACAGAGGGAAATGGCGGTTTCAAGGTTACGAGATTGCCTGGACCGACAAGCCCATGAGCTAGAACTAAATAATCTGGGGCTGAGTTCTTTGCCGGAATTACCTCCGCATTTAGAGAGTTTAGTGGCGTCATGTAATTCTCTTACAGAATTACCGGAATTACCGCAGAGCCTGAAATCACTTCTAGTTGATAATAACAATCTGAAGGCATTATCCGATTTACCACCTTTACTGGAATATTTAGGTGTCTCTAATAATCAGCTGGAAAAATTGCCAGAGTTGCAAAACTCGTCCTTCTTGAAAATTATTGATGTTGATAACAATTCACTGAAAAAACTACCTGATTTACCTCCTTCACTGGAGTTTATTGCTGCTGGTAATAATCAGCTGGAAGAATTGCCAGAGTTGCAAAACTTGCCCTTCTTGACTACGATTTATGCTGATAACAATTTACTGAAAACATTACCCGATTTACCCCCTTCCCTGGAAGCACTTAATGTCAGAGATAATTATTTAACTGATCTGCCAGAATTACCGCAGAGTTTAACCTTCTTAGATGTTTCTGAAAATATTTTTTCTGGATTATCGGAATTGCCACCAAACTTGTATTATCTCAATGCATCCAGCAATGAAATAAGATCCTTATGCGATTTACCCCCTTCACTGGAAGAACTTAATGTCAGTAATAATAAGTTGATCGAACTGCCAGCGTTACCTCCACGCTTAGAACGTTTAATCGCTTCATTTAATCATCTTGCTGAAGTACCTGAATTGCCGCAAAACCTGAAACAGCTCCACGTAGAGTACAACCCTCTGAGAGAGTTTCCCGATATACCTGAGTCAGTGGAAGATCTTCGGATGAACTCTGAACGTGTAGTTGATCCATATGAATTTGCTCATGAGACTACAGACAAACTTGAAGATGATGTATTTGAGTAG
#grep "yopM" selected_gtf_files/Yersinia_pseudotuberculosis_PB1+_bis.gtf
NZ_CP009779.1 RefSeq gene 69708 69812 . + . gene_id "BZ16_RS00005"; transcript_id ""; gbkey "Gene"; gene "yopM"; gene_biotype "protein_coding"; locus_tag "BZ16_RS00005"; old_locus_tag "BZ16_4135"; part "1";
NZ_CP009779.1 RefSeq gene 1 1485 . + . gene_id "BZ16_RS00005"; transcript_id ""; gbkey "Gene"; gene "yopM"; gene_biotype "protein_coding"; locus_tag "BZ16_RS00005"; old_locus_tag "BZ16_4135"; part "2";
samtools faidx Yersinia_pseudotuberculosis_PB1+_bis.fna NZ_CP009779.1:69708-69812 > temp.fna
samtools faidx Yersinia_pseudotuberculosis_PB1+_bis.fna NZ_CP009779.1:1-1485 >> temp.fna
#delete the second ">****"
sed -i -e ':a;N;$!ba;s/\n//g' -e 's/:/\t/g' temp.fna
Yersinia_pseudotuberculosis_PB1+_bis ATGTTCATAAATCCAAGAAATGTATCTAATACTTTTTTGCAAGAACCATTACGTCATTCTTCTAATTTAACTGAGATGCCGGTTGAGGCAGAAAATGTTAAATCTAAGACTGAATATTATAATGCATGGTCGGAATGGGAACGAAATGCCCCTCCGGGGAATGGTGAACAGAGGGAAATGGCGGTTTCAAGGTTACGAGATTGCCTGGACCGACAAGCCCATGAGCTAGAACTAAATAATCTGGGGCTGAGTTCTTTGCCGGAATTACCTCCGCATTTAGAGAGTTTAGTGGCGTCATGTAATTCTCTTACAGAATTACCGGAATTGCCGCAGAGCCTGAAATCACTTCAAGTTGAAAATAACAATCTGAAGGCATTACCCGATTTACCCCCTTCCCTGAAAAAACTTCATGTCAGAGAAAATGATTTAACTGATCTGCCAGAATTACCGCAGAGCCTGGAATCACTTCGAGTTGATAATAACAATCTGAAGGCATTATCCGATTTACCTCCTTCACTGGAATATCTTACTGCTAGTAGTAATAAGCTGGAAGAATTGCCAGAGTTGCAAAACTTGCCCTTCTTGGCTGCGATTTATGCTGATAACAATTTACTGGAAACATTACCCGATTTACCCCCTTCCCTGAAAAAACTTCATGTCAGAGAAAATGATTTAACTGATCTGCCAGAATTACCGCAGAGCCTGGAATCACTTCAAGTTGATAATAACAATCTGAAGGCATTATCCGATTTACCTCCTTCACTGGAATATCTTACTGCTAGTAGTAATAAGCTGGAAGAATTGCCAGAGTTGCAAAACTTGCCCTTCTTGGCTGCGATTTATGCTGATAACAATTTACTGGAAACATTACCCGATTTACCCCCACATTTAGAGATTTTAGTGGCGTCATATAATTCTCTTACTGAATTACCGGAATTGCCGCAGAGCCTGAAATCACTTCGAGTTGATAATAACAATCTGAAGGCATTATCCGATTTACCTCCTTCACTGGAATATCTTACTGCTAGTAGTAATAAGCTGGAAGAATTACCAGAGTTGCAAAACTTGCCCTTCTTGGCTGCGATTTATGCTGATAACAATTTACTGGAAACATTACCCGATTTACCCCCTTCCCTGAAAAAACTTCATGTCAGAGAAAATGATTTAACTGATCTGCCAGAATTACCGCAGAGTTTAACCTTCTTAGATGTTTCTGATAATAATATTTCTGGATTATCGGAATTGCCACCAAACTTGTATTATCTCGATGCATCCAGCAATGAAATAAGATCCTTATGCGATTTACCTCCTTCACTGGTAGACCTTAATGTCAAAAGTAATCAGTTGAGCGAACTGCCAGCGTTACCTCCACACTTAGAACGTTTAATCGCTTCATTTAATTATCTTGCTGAAGTACCTGAATTGCCGCAAAACCTGAAACAGCTCCACGTAGAGCAAAACGCTCTGAGAGAGTTTCCCGATATACCTGAGTCATTGGAAGAGCTTGAGATGGACTCTGAACGTGTAGTTGATCCATATGAATTTGCTCATGAGACTACAGACAAACTTGAAGATGATGTATTTGAGTAG
#------------------------------- yopO (+9) -------------------------------
#grep "yopO" selected_gtf_files/Yersinia_enterocolitica_YE165.gtf
NZ_CP016933.1 RefSeq gene 11705 13893 . - . gene_id "BB936_RS22335"; transcript_id ""; gbkey "Gene"; gene "yopO"; gene_biotype "pseudogene"; gene_synonym "ypkA"; locus_tag "BB936_RS22335"; old_locus_tag "BB936_22330"; pseudo "true";
samtools faidx Yersinia_enterocolitica_YE165.fna NZ_CP016933.1:11705-13893 > temp.fna
revseq
sed -e ':a;N;$!ba;s/\n//g' -e 's/:/\t/g' 11705-13893.rev > temp_.fna
Yersinia_enterocolitica_YE165 ATGAAAATCATGGGAACTATGCCACCGTCGATCTCCCTCGCTAAAGCTCATGAGCGCATCAGCCAACATTGGCAAAATCCTGTCGGTGAGCTCAATATCGGAGGAAAACGGTATAGAATTATCGATAATCAAGTGCTGCGCTTGAACCCCCACAGTGGTTTTTCTCTCTTTCGAGAAGGGGTTGGTAAGATCTTTTCGGGGAAGATGTTTAACTTTTCAATTGCTCGTAACCTTACTGAGACACTCCATGCAGCCCAGAAAACGACTTCGCAGGAGCTAAGGTCTGATATCCCCAATGTTCTCAGTAATCTCTTTGGAGCCAAGCCACAGACCGAACTGCCGCTGGGTTGGAAAGGGAAGCCTTTGTCAGGAGCTCCGGATCTTGAAGGGATGCGAGTGGCTGAAACCGATAAGTTTGCCGAGGGCGAAAGCCATATTAGTATAATAGAAACTAAGGATAATCAGCGGTTGGTGGCTAAGATTGAACGCTCCATTGCCGAGGGGCATTTGTTCGCAGAACTGGAGGCTTATAAACACATCTATAAAACCGCGGGCAAACATCCTAATCTTGCCAATGTCCATGGCATGGCTGTGGTGCCATACGGTAACCGTAAGGAGGAAGCATTGCTGATGGATGAGGTGGATGGTTGGCGTTGTTCTGACACACTAAGAAGCCTCGCCGATAGCTGGAAGCAAGGAAAGATCAATAGTGAAGCCTACTGGGGAACGATCAAGTTTATTGCCCATCGGCTATTAGATGTAACCAATCACCTTGCCAAGGCAGGGATAGTACATAACGATATCAAACCCGGTAATGTGGTATTTGACCGCGCTAGCGGAGAGCCCGTTGTCATTGATCTAGGATTACACTCTCGTTCAGGGGAACAACCTAAGGGGTTTACAGAATCCTTCAAAGCGCCGGAGCTTGGAGTAGGAAACCTAGGCGCATCAGAAAAGAGCGATGTTTTTCTCGTAGTTTCAACCCTTCTACATGGTATCGAAGGTTTTGAGAAAGATCCGGAGATAAAACCTAATCAAGGACTGAGATCCATTACCTCAGAACCAGCGCACGTAATGGATGAGAATGGTTACCCAATCCATCGACCTGGTATAGCTGGAGTCGAGACAGCCTATACACGCTTCATCACAGACATCCTTGGCGTTTCCGCTGACTCAAGACCTGATTCCAACGAAGCCAGACTCCACGAGTTCTTGAGCGACGGAACTATTGACGAGGAGTCGGCCAAGCAGATCCTAAAAGATACTCTAACCGGAGAAATGAGCCCATTATCTACTGATGTAAGGCGGATAACACCCAAGAAGCTTCGGGAGCTCTCTGATTTGCTTAGGACGCATTTGAGTAGTGCAGCAACTAAGCAATTGGATATGGGGGTGGTTTTGTCGGATCTTGATACCATGTTGGTGACACTCGACAAGGCCGAACGCGAGGGGGAGTAGACAAGGATCAGTTGAAGAGTTTTAACAGTTTGATTCTGAAGACTTACAGCGTGATTGAAGACTATGTCAAAGGCAGAGAAGGGGATACCAAGAGTTCCAGTGCGGAAGTATCCCCCTATCATCGCAGTAACTTTATGCTATCGATCGTCGAGCCTTCACTGCAGAGGATCCAAAAGCATCTGGACCAGACACACTCTTTTTCTGATATCGGTTCACTAGTGCGCGCACATAAGCACCTGGAAACGCTTTTAGAGGTCTTAGTCACCTTGTCACCGCAAGGGCAGCCCGTGTCCTCTGAAACCTACAGCTTCCTGAATCGATTAGCTGAGGCTAAGGTCACCTTGTCGCAGCAATTGGATACTCTCCAGCAGCAGCAGGAGAGTGCGAAAGCGCAACTATCTATTCTGATTAATCGTTCAGGTTCTTGGGCCGATGTTGCTCGTCAGTCCCTGCAGCGTTTTGACAGTACCCGGCCTGTAGTGAAATTCGGCACTGAGCAGTATACCGCAATTCACCGTCAGATGATGGCGGCCCATGCAGCCATTACGCTACAGGAGGTATCGGAGTTTACTGATGATATGCGAAACTTTACAGCGGACTCTATTCCACTACTGATTCGACTTGGACGAAGCAGTTTAATAGATGAGCATTTGGTTGAACAGAGAGAGAAGTTGCGAGAGCTGACGACCATCGCCGAGCGACTGAACCGGTTGGAGCGGGAATGGATGTGA
#grep "yopO" selected_gtf_files/Yersinia_enterocolitica_YE3.gtf
NZ_CP016943.1 RefSeq gene 12782 14970 . - . gene_id "BED35_RS00550"; transcript_id ""; gbkey "Gene"; gene "yopO"; gene_biotype "pseudogene"; gene_synonym "ypkA"; locus_tag "BED35_RS00550"; old_locus_tag "BED35_00550"; pseudo "true";
samtools faidx Yersinia_enterocolitica_YE3.fna NZ_CP016943.1:12782-14970 > temp.fna
revseq
sed -e ':a;N;$!ba;s/\n//g' -e 's/:/\t/g' 12782-14970.rev > temp_.fna
Yersinia_enterocolitica_YE3 ATGAAAATCATGGGAACTATGCCACCGTCGATCTCCCTCGCTAAAGCTCATGAGCGCATCAGCCAACATTGGCAAAATCCTGTCGGTGAGCTCAATATCGGAGGAAAACGGTATAGAATTATCGATAATCAAGTGCTGCGCTTGAACCCCCACAGTGGTTTTTCTCTCTTTCGAGAAGGGGTTGGTAAGATCTTTTCGGGGAAGATGTTTAACTTTTCAATTGCTCGTAACCTTACTGAGACACTCCATGCAGCCCAGAAAACGACTTCGCAGGAGCTAAGGTCTGATATCCCCAATGTTCTCAGTAATCTCTTTGGAGCCAAGCCACAGACCGAACTGCCGCTGGGTTGGAAAGGGAAGCCTTTGTCAGGAGCTCCGGATCTTGAAGGGATGCGAGTGGCTGAAACCGATAAGTTTGCCGAGGGCGAAAGCCATATTAGTATAATAGAAACTAAGGATAATCAGCGGTTGGTGGCTAAGATTGAACGCTCCATTGCCGAGGGGCATTTGTTCGCAGAACTGGAGGCTTATAAACACATCTATAAAACCGCGGGCAAACATCCTAATCTTGCCAATGTCCATGGCATGGCTGTGGTGCCATACGGTAACCGTAAGGAGGAAGCATTGCTGATGGATGAGGTGGATGGTTGGCGTTGTTCTGACACACTAAGAAGCCTCGCCGATAGCTGGAAGCAAGGAAAGATCAATAGTGAAGCCTACTGGGGAACGATCAAGTTTATTGCCCATCGGCTATTAGATGTAACCAATCACCTTGCCAAGGCAGGGATAGTACATAACGATATCAAACCCGGTAATGTGGTATTTGACCGCGCTAGCGGAGAGCCCGTTGTCATTGATCTAGGATTACACTCTCGTTCAGGGGAACAACCTAAGGGGTTTACAGAATCCTTCAAAGCGCCGGAGCTTGGAGTAGGAAACCTAGGCGCATCAGAAAAGAGCGATGTTTTTCTCGTAGTTTCAACCCTTCTACATGGTATCGAAGGTTTTGAGAAAGATCCGGAGATAAAACCTAATCAAGGACTGAGATCCATTACCTCAGAACCAGCGCACGTAATGGATGAGAATGGTTACCCAATCCATCGACCTGGTATAGCTGGAGTCGAGACAGCCTATACACGCTTCATCACAGACATCCTTGGCGTTTCCGCTGACTCAAGACCTGATTCCAACGAAGCCAGACTCCACGAGTTCTTGAGCGACGGAACTATTGACGAGGAGTCGGCCAAGCAGATCCTAAAAGATACTCTAACCGGAGAAATGAGCCCATTATCTACTGATGTAAGGCGGATAACACCCAAGAAGCTTCGGGAGCTCTCTGATTTGCTTAGGACGCATTTGAGTAGTGCAGCAACTAAGCAATTGGATATGGGGGTGGTTTTGTCGGATCTTGATACCATGTTGGTGACACTCGACAAGGCCGAACGCGAGGGGGAGTAGACAAGGATCAGTTGAAGAGTTTTAACAGTTTGATTCTGAAGACTTACAGCGTGATTGAAGACTATGTCAAAGGCAGAGAAGGGGATACCAAGAGTTCCAGTGCGGAAGTATCCCCCTATCATCGCAGTAACTTTATGCTATCGATCGTCGAGCCTTCACTGCAGAGGATCCAAAAGCATCTGGACCAGACACACTCTTTTTCTGATATCGGTTCACTAGTGCGCGCACATAAGCACCTGGAAACGCTTTTAGAGGTCTTAGTCACCTTGTCACCGCAAGGGCAGCCCGTGTCCTCTGAAACCTACAGCTTCCTGAATCGATTAGCTGAGGCTAAGGTCACCTTGTCGCAGCAATTGGATACTCTCCAGCAGCAGCAGGAGAGTGCGAAAGCGCAACTATCTATTCTGATTAATCGTTCAGGTTCTTGGGCCGATGTTGCTCGTCAGTCCCTGCAGCGTTTTGACAGTACCCGGCCTGTAGTGAAATTCGGCACTGAGCAGTATACCGCAATTCACCGTCAGATGATGGCGGCCCATGCAGCCATTACGCTACAGGAGGTATCGGAGTTTACTGATGATATGCGAAACTTTACAGCGGACTCTATTCCACTACTGATTCGACTTGGACGAAGCAGTTTAATAGATGAGCATTTGGTTGAACAGAGAGAGAAGTTGCGAGAGCTGACGACCATCGCCGAGCGACTGAACCGGTTGGAGCGGGAATGGATGTGA
#grep "yopO" selected_gtf_files/Yersinia_enterocolitica_YE6.gtf
NZ_CP016937.1 RefSeq gene 4748707 4750895 . - . gene_id "BED33_RS21960"; transcript_id ""; gbkey "Gene"; gene "yopO"; gene_biotype "pseudogene"; gene_synonym "ypkA"; locus_tag "BED33_RS21960"; old_locus_tag "BED33_21960"; pseudo "true";
samtools faidx Yersinia_enterocolitica_YE6.fna NZ_CP016937.1:4748707-4750895 > temp.fna
revseq
sed -e ':a;N;$!ba;s/\n//g' -e 's/:/\t/g' 4748707-4750895.rev > temp_.fna
Yersinia_enterocolitica_YE6 ATGAAAATCATGGGAACTATGCCACCGTCGATCTCCCTCGCTAAAGCTCATGAGCGCATCAGCCAACATTGGCAAAATCCTGTCGGTGAGCTCAATATCGGAGGAAAACGGTATAGAATTATCGATAATCAAGTGCTGCGCTTGAACCCCCACAGTGGTTTTTCTCTCTTTCGAGAAGGGGTTGGTAAGATCTTTTCGGGGAAGATGTTTAACTTTTCAATTGCTCGTAACCTTACTGAGACACTCCATGCAGCCCAGAAAACGACTTCGCAGGAGCTAAGGTCTGATATCCCCAATGTTCTCAGTAATCTCTTTGGAGCCAAGCCACAGACCGAACTGCCGCTGGGTTGGAAAGGGAAGCCTTTGTCAGGAGCTCCGGATCTTGAAGGGATGCGAGTGGCTGAAACCGATAAGTTTGCCGAGGGCGAAAGCCATATTAGTATAATAGAAACTAAGGATAATCAGCGGTTGGTGGCTAAGATTGAACGCTCCATTGCCGAGGGGCATTTGTTCGCAGAACTGGAGGCTTATAAACACATCTATAAAACCGCGGGCAAACATCCTAATCTTGCCAATGTCCATGGCATGGCTGTGGTGCCATACGGTAACCGTAAGGAGGAAGCATTGCTGATGGATGAGGTGGATGGTTGGCGTTGTTCTGACACACTAAGAAGCCTCGCCGATAGCTGGAAGCAAGGAAAGATCAATAGTGAAGCCTACTGGGGAACGATCAAGTTTATTGCCCATCGGCTATTAGATGTAACCAATCACCTTGCCAAGGCAGGGATAGTACATAACGATATCAAACCCGGTAATGTGGTATTTGACCGCGCTAGCGGAGAGCCCGTTGTCATTGATCTAGGATTACACTCTCGTTCAGGGGAACAACCTAAGGGGTTTACAGAATCCTTCAAAGCGCCGGAGCTTGGAGTAGGAAACCTAGGCGCATCAGAAAAGAGCGATGTTTTTCTCGTAGTTTCAACCCTTCTACATGGTATCGAAGGTTTTGAGAAAGATCCGGAGATAAAACCTAATCAAGGACTGAGATCCATTACCTCAGAACCAGCGCACGTAATGGATGAGAATGGTTACCCAATCCATCGACCTGGTATAGCTGGAGTCGAGACAGCCTATACACGCTTCATCACAGACATCCTTGGCGTTTCCGCTGACTCAAGACCTGATTCCAACGAAGCCAGACTCCACGAGTTCTTGAGCGACGGAACTATTGACGAGGAGTCGGCCAAGCAGATCCTAAAAGATACTCTAACCGGAGAAATGAGCCCATTATCTACTGATGTAAGGCGGATAACACCCAAGAAGCTTCGGGAGCTCTCTGATTTGCTTAGGACGCATTTGAGTAGTGCAGCAACTAAGCAATTGGATATGGGGGTGGTTTTGTCGGATCTTGATACCATGTTGGTGACACTCGACAAGGCCGAACGCGAGGGGGAGTAGACAAGGATCAGTTGAAGAGTTTTAACAGTTTGATTCTGAAGACTTACAGCGTGATTGAAGACTATGTCAAAGGCAGAGAAGGGGATACCAAGAGTTCCAGTGCGGAAGTATCCCCCTATCATCGCAGTAACTTTATGCTATCGATCGTCGAGCCTTCACTGCAGAGGATCCAAAAGCATCTGGACCAGACACACTCTTTTTCTGATATCGGTTCACTAGTGCGCGCACATAAGCACCTGGAAACGCTTTTAGAGGTCTTAGTCACCTTGTCACCGCAAGGGCAGCCCGTGTCCTCTGAAACCTACAGCTTCCTGAATCGATTAGCTGAGGCTAAGGTCACCTTGTCGCAGCAATTGGATACTCTCCAGCAGCAGCAGGAGAGTGCGAAAGCGCAACTATCTATTCTGATTAATCGTTCAGGTTCTTGGGCCGATGTTGCTCGTCAGTCCCTGCAGCGTTTTGACAGTACCCGGCCTGTAGTGAAATTCGGCACTGAGCAGTATACCGCAATTCACCGTCAGATGATGGCGGCCCATGCAGCCATTACGCTACAGGAGGTATCGGAGTTTACTGATGATATGCGAAACTTTACAGCGGACTCTATTCCACTACTGATTCGACTTGGACGAAGCAGTTTAATAGATGAGCATTTGGTTGAACAGAGAGAGAAGTTGCGAGAGCTGACGACCATCGCCGAGCGACTGAACCGGTTGGAGCGGGAATGGATGTGA
#grep "yopO" selected_gtf_files/Yersinia_pestis_790.gtf
#grep "yopO" selected_gtf_files/Yersinia_pestis_FDAARGOS_601.gtf
NZ_CP033697.1 RefSeq gene 68815 70300 . + . gene_id "EGX46_RS00005"; transcript_id ""; gbkey "Gene"; gene "yopO"; gene_biotype "protein_coding"; gene_synonym "ypkA"; locus_tag "EGX46_RS00005"; old_locus_tag "EGX46_00005"; part "1";
NZ_CP033697.1 RefSeq gene 1 713 . + . gene_id "EGX46_RS00005"; transcript_id ""; gbkey "Gene"; gene "yopO"; gene_biotype "protein_coding"; gene_synonym "ypkA"; locus_tag "EGX46_RS00005"; old_locus_tag "EGX46_00005"; part "2";
samtools faidx Yersinia_pestis_FDAARGOS_601.fna NZ_CP033697.1:68815-70300 > temp.fna
samtools faidx Yersinia_pestis_FDAARGOS_601.fna NZ_CP033697.1:1-713 >> temp.fna
#delete the second ">****"
sed -i -e ':a;N;$!ba;s/\n//g' -e 's/:/\t/g' temp.fna
Yersinia_pestis_FDAARGOS_601 ATGAAAAGCGTGAAAATCATGGGAACTATGCCACCGTCGATCTCCCTCGCCAAAGCTCATGAGCGCATCAGCCAACATTGGCAAAATCCTGTCGGTGAGCTCAATATCGGAGGAAAACGGTATAGAATTATCGATAATCAAGTGTTGCGCTTGAACCCCCACAGTGGTTTTTCTCTCTTTCGAGAAGGGGTTGGTAAGATCTTTTCGGGGAAGATGTTTAACTTTTCAATTGCTCGTAACCTTACTGACACACTCCATGCGGCCCAGAAAACGACTTCGCAGGAGCTAAGGTCTGATATCCCCAATGCTCTCAGTAATCTCTTTGGAGCCAAGCCACAGACCGAACTGCCGCTGGGTTGGAAAGGGGAGCCCTTGTCAGGAGCTCCGGATCTTGAAGGGATGCGAGTGGCTGAAACCGATAAGTTTGCCGAGGGCGAAAGCCATATTAGTATAATAGAAACTAAGGATAAGCAGCGGTTGGTAGCTAAGATTGAACGCTCCATTGCCGAGGGGCATTTGTTCGCAGAACTGGAGGCTTATAAACACATCTATAAAACCGCGGGCAAACATCCTAATCTTGCCAATGTTCATGGCATGGCTGTGGTGCCATACGGTAACCGTAAGGAGGAAGCATTGCTGATGGATGAGGTGGATGGTTGGCGTTGTTCTGACACACTAAGAACCCTCGCCGATAGCTGGAAGCAAGGAAAGATCAATAGTGAAGCCTACTGGGGAACGATCAAGTTTATTGCCCATCGGCTATTAGATGTAACCAATCACCTTGCCAAGGCAGGGGTAGTACATAACGATATCAAACCCGGTAATGTGGTATTTGACCGCGCTAGCGGAGAGCCCGTTGTTATTGATCTAGGATTACACTCTCGTTCAGGGGAACAACCTAAGGGGTTTACAGAATCCTTCAAAGCGCCGGAGCTTGGAGTAGGAAACCTAGGCGCATCAGAAAAGAGCGATGTTTTTCTCGTAGTGTCAACCCTTCTACATTGTATCGAAGGTTTTGAGAAAAATCCGGAGATAAAGCCTAATCAAGGACTGAGATTCATTACCTCAGAACCAGCGCACGTAATGGATGAGAATGGTTATCCAATCCATCGACCTGGTATAGCTGGAGTCGAGACAGCCTATACACGCTTCATCACAGACATCCTTGGCGTTTCCGCTGACTCAAGACCTGATTCCAACGAAGCCAGACTCCACGAGTTCTTGAGCGACGGAACTATCGACGAGGAGTCGGCCAAGCAGATCCTAAAAGATACCCTAACCGGAGAAATGAGCCCATTATCTACTGATGTAAGGCGGATAACACCCAAGAAGCTTCGGGAGCTATCTGATTTGCTTAGGACGCATTTGAGCAGTGCAGCAACTAAGCAATTGGATATGGGGGGGGTTTTGTCGGATCTTGATACCATGTTGGTGGCACTCGACAAGGCCGAACGCGAGGGGGGAGTAGACAAGGATCAGTTGAAGAGTTTTAACAGTTTGATTCTGAAGACTTACAGAGTGATTGAAGACTATGTCAAAGGCAGAGAAGGGGATACCAAGAATTCCAGTACGGAAGTATCCCCCTATCATCGCAGTAACTTTATGCTATCGATCGTCGAACCTTCACTGCAGAGGATCCAGAAGCATCTGGACCAGACACACTCTTTTTCTGATATCGGTTCACTAGTGCGCGCACATAAGCACCTGGAAACGCTTTTAGAGGTCTTAGTCACCTTGTCACAGCAAGGGCAGCCCGTGTCCTCTGAAACCTACGGCTTCCTGAATCGATTAACTGAGGCTAAGATCACCTTGTCGCAGCAATTGAATACTCTCCAGCAGCAGCAGGAGAGTGCGAAAGCGCAATTATCTATTCTGATTAATCGTTCAGGTTCTTGGGCCGATGTTGCTCGTCAGTCCCTGCAGCGTTTTGACAGTACCCGGCCTGTAGTGAAATTCGGCACTGAGCAGTATACCGCAATTCACCGTCAGATGATGGCGGCCCATGCAGCTATTACGCTACAGGAGGTATCGGAGTTTACTGATGATATGCGAAACTTTACAGTGGACTCTATTCCACTACTGATTCAACTTGGACGAAGCAGTTTAATGGATGAGCATTTGGTTGAACAGAGAGAAAAGTTGCGAGAGCTGACGACCATCGCCGAGCGACTGAACCGGTTGGAGCGGGAATGGATGTGA
#grep "yopO" selected_gtf_files/Yersinia_pestis_Harbin_35.gtf
NC_017263.1 RefSeq gene 49729 51926 . - . gene_id "YPC_RS21300"; transcript_id ""; gbkey "Gene"; gene "yopO"; gene_biotype "pseudogene"; gene_synonym "ypkA"; locus_tag "YPC_RS21300"; pseudo "true";
samtools faidx Yersinia_pestis_Harbin_35.fna NC_017263.1:49729-51926 > temp.fna
revseq
sed -e ':a;N;$!ba;s/\n//g' -e 's/:/\t/g' 49729-51926.rev > temp_.fna
Yersinia_pestis_Harbin_35 ATGAAAAGCGTGAAAATCATGGGAACTATGCCACCGTCGATCTCCCTCGCCAAAGCTCATGAGCGCATCAGCCAACATTGGCAAAATCCTGTCGGTGAGCTCAATATCGGAGGAAAACGGTATAGAATTATCGATAATCAAGTGTTGCGCTTGAACCCCCACAGTGGTTTTTCTCTCTTTCGAGAAGGGGTTGGTAAGATCTTTTCGGGGAAGATGTTTAACTTTTCAATTGCTCGTAACCTTACTGACACACTCCATGCGGCCCAGAAAACGACTTCGCAGGAGCTAAGGTCTGATATCCCCAATGCTCTCAGTAATCTCTTTGGAGCCAAGCCACAGACCGAACTGCCGCTGGGTTGGAAAGGGGAGCCCTTGTCAGGAGCTCCGGATCTTGAAGGGATGCGAGTGGCTGAAACCGATAAGTTTGCCGAGGGCGAAAGCCATATTAGTATAATAGAAACTAAGGATAAGCAGCGGTTGGTAGCTAAGATTGAACGCTCCATTGCCGAGGGGCATTTGTTCGCAGAACTGGAGGCTTATAAACACATCTATAAAACCGCGGGCAAACATCCTAATCTTGCCAATGTTCATGGCATGGCTGTGGTGCCATACGGTAACCGTAAGGAGGAAGCATTGCTGATGGATGAGGTGGATGGTTGGCGTTGTTCTGACACACTAAGAACCCTCGCCGATAGCTGGAAGCAAGGAAAGATCAATAGTGAAGCCTACTGGGGAACGATCAAGTTTATTGCCCATCGGCTATTAGATGTAACCAATCACCTTGCCAAGGCAGGGGTAGTACATAACGATATCAAACCCGGTAATGTGGTATTTGACCGCGCTAGCGGAGAGCCCGTTGTTATTGATCTAGGATTACACTCTCGTTCAGGGGAACAACCTAAGGGGTTTACAGAATCCTTCAAAGCGCCGGAGCTTGGAGTAGGAAACCTAGGCGCATCAGAAAAGAGCGATGTTTTTCTCGTAGTGTCAACCCTTCTACATTGTATCGAAGGTTTTGAGAAAAATCCGGAGATAAAGCCTAATCAAGGACTGAGATTCATTACCTCAGAACCAGCGCACGTAATGGATGAGAATGGTTATCCAATCCATCGACCTGGTATAGCTGGAGTCGAGACAGCCTATACACGCTTCATCACAGACATCCTTGGCGTTTCCGCTGACTCAAGACCTGATTCCAACGAAGCCAGACTCCACGAGTTCTTGAGCGACGGAACTATCGACGAGGAGTCGGCCAAGCAGATCCTAAAAGATACCCTAACCGGAGAAATGAGCCCATTATCTACTGATGTAAGGCGGATAACACCCAAGAAGCTTCGGGAGCTATCTGATTTGCTTAGGACGCATTTGAGCAGTGCAGCAACTAAGCAATTGGATATGGGGGGGTTTTGTCGGATCTTGATACCATGTTGGTGGCACTCGACAAGGCCGAACGCGAGGGGGGAGTAGACAAGGATCAGTTGAAGAGTTTTAACAGTTTGATTCTGAAGACTTACAGAGTGATTGAAGACTATGTCAAAGGCAGAGAAGGGGATACCAAGAATTCCAGTACGGAAGTATCCCCCTATCATCGCAGTAACTTTATGCTATCGATCGTCGAACCTTCACTGCAGAGGATCCAGAAGCATCTGGACCAGACACACTCTTTTTCTGATATCGGTTCACTAGTGCGCGCACATAAGCACCTGGAAACGCTTTTAGAGGTCTTAGTCACCTTGTCACAGCAAGGGCAGCCCGTGTCCTCTGAAACCTACGGCTTCCTGAATCGATTAACTGAGGCTAAGATCACCTTGTCGCAGCAATTGAATACTCTCCAGCAGCAGCAGGAGAGTGCGAAAGCGCAATTATCTATTCTGATTAATCGTTCAGGTTCTTGGGCCGATGTTGCTCGTCAGTCCCTGCAGCGTTTTGACAGTACCCGGCCTGTAGTGAAATTCGGCACTGAGCAGTATACCGCAATTCACCGTCAGATGATGGCGGCCCATGCAGCTATTACGCTACAGGAGGTATCGGAGTTTACTGATGATATGCGAAACTTTACAGTGGACTCTATTCCACTACTGATTCAACTTGGACGAAGCAGTTTAATGGATGAGCATTTGGTTGAACAGAGAGAAAAGTTGCGAGAGCTGACGACCATCGCCGAGCGACTGAACCGGTTGGAGCGGGAATGGATGTGA
#grep "yopO" selected_gtf_files/Yersinia_pestis_Harbin_35_bis.gtf
NZ_CP009703.1 RefSeq gene 55189 57386 . + . gene_id "CH55_RS00985"; transcript_id ""; gbkey "Gene"; gene "yopO"; gene_biotype "pseudogene"; gene_synonym "ypkA"; locus_tag "CH55_RS00985"; old_locus_tag "CH55_4357"; pseudo "true";
samtools faidx Yersinia_pestis_Harbin_35_bis.fna NZ_CP009703.1:55189-57386 > temp.fna
sed -i -e ':a;N;$!ba;s/\n//g' -e 's/:/\t/g' temp.fna
Yersinia_pestis_Harbin_35_bis ATGAAAAGCGTGAAAATCATGGGAACTATGCCACCGTCGATCTCCCTCGCCAAAGCTCATGAGCGCATCAGCCAACATTGGCAAAATCCTGTCGGTGAGCTCAATATCGGAGGAAAACGGTATAGAATTATCGATAATCAAGTGTTGCGCTTGAACCCCCACAGTGGTTTTTCTCTCTTTCGAGAAGGGGTTGGTAAGATCTTTTCGGGGAAGATGTTTAACTTTTCAATTGCTCGTAACCTTACTGACACACTCCATGCGGCCCAGAAAACGACTTCGCAGGAGCTAAGGTCTGATATCCCCAATGCTCTCAGTAATCTCTTTGGAGCCAAGCCACAGACCGAACTGCCGCTGGGTTGGAAAGGGGAGCCCTTGTCAGGAGCTCCGGATCTTGAAGGGATGCGAGTGGCTGAAACCGATAAGTTTGCCGAGGGCGAAAGCCATATTAGTATAATAGAAACTAAGGATAAGCAGCGGTTGGTAGCTAAGATTGAACGCTCCATTGCCGAGGGGCATTTGTTCGCAGAACTGGAGGCTTATAAACACATCTATAAAACCGCGGGCAAACATCCTAATCTTGCCAATGTTCATGGCATGGCTGTGGTGCCATACGGTAACCGTAAGGAGGAAGCATTGCTGATGGATGAGGTGGATGGTTGGCGTTGTTCTGACACACTAAGAACCCTCGCCGATAGCTGGAAGCAAGGAAAGATCAATAGTGAAGCCTACTGGGGAACGATCAAGTTTATTGCCCATCGGCTATTAGATGTAACCAATCACCTTGCCAAGGCAGGGGTAGTACATAACGATATCAAACCCGGTAATGTGGTATTTGACCGCGCTAGCGGAGAGCCCGTTGTTATTGATCTAGGATTACACTCTCGTTCAGGGGAACAACCTAAGGGGTTTACAGAATCCTTCAAAGCGCCGGAGCTTGGAGTAGGAAACCTAGGCGCATCAGAAAAGAGCGATGTTTTTCTCGTAGTGTCAACCCTTCTACATTGTATCGAAGGTTTTGAGAAAAATCCGGAGATAAAGCCTAATCAAGGACTGAGATTCATTACCTCAGAACCAGCGCACGTAATGGATGAGAATGGTTATCCAATCCATCGACCTGGTATAGCTGGAGTCGAGACAGCCTATACACGCTTCATCACAGACATCCTTGGCGTTTCCGCTGACTCAAGACCTGATTCCAACGAAGCCAGACTCCACGAGTTCTTGAGCGACGGAACTATCGACGAGGAGTCGGCCAAGCAGATCCTAAAAGATACCCTAACCGGAGAAATGAGCCCATTATCTACTGATGTAAGGCGGATAACACCCAAGAAGCTTCGGGAGCTATCTGATTTGCTTAGGACGCATTTGAGCAGTGCAGCAACTAAGCAATTGGATATGGGGGGGTTTTGTCGGATCTTGATACCATGTTGGTGGCACTCGACAAGGCCGAACGCGAGGGGGGAGTAGACAAGGATCAGTTGAAGAGTTTTAACAGTTTGATTCTGAAGACTTACAGAGTGATTGAAGACTATGTCAAAGGCAGAGAAGGGGATACCAAGAATTCCAGTACGGAAGTATCCCCCTATCATCGCAGTAACTTTATGCTATCGATCGTCGAACCTTCACTGCAGAGGATCCAGAAGCATCTGGACCAGACACACTCTTTTTCTGATATCGGTTCACTAGTGCGCGCACATAAGCACCTGGAAACGCTTTTAGAGGTCTTAGTCACCTTGTCACAGCAAGGGCAGCCCGTGTCCTCTGAAACCTACGGCTTCCTGAATCGATTAACTGAGGCTAAGATCACCTTGTCGCAGCAATTGAATACTCTCCAGCAGCAGCAGGAGAGTGCGAAAGCGCAATTATCTATTCTGATTAATCGTTCAGGTTCTTGGGCCGATGTTGCTCGTCAGTCCCTGCAGCGTTTTGACAGTACCCGGCCTGTAGTGAAATTCGGCACTGAGCAGTATACCGCAATTCACCGTCAGATGATGGCGGCCCATGCAGCTATTACGCTACAGGAGGTATCGGAGTTTACTGATGATATGCGAAACTTTACAGTGGACTCTATTCCACTACTGATTCAACTTGGACGAAGCAGTTTAATGGATGAGCATTTGGTTGAACAGAGAGAAAAGTTGCGAGAGCTGACGACCATCGCCGAGCGACTGAACCGGTTGGAGCGGGAATGGATGTGA
#grep "yopO" selected_gtf_files/Yersinia_pestis_Java9.gtf
NZ_CP009995.1 RefSeq gene 76131 77073 . - . gene_id "CH62_RS22640"; transcript_id ""; gbkey "Gene"; gene "yopO"; gene_biotype "protein_coding"; gene_synonym "ypkA"; locus_tag "CH62_RS22640"; part "2";
NZ_CP009995.1 RefSeq gene 1 1256 . - . gene_id "CH62_RS22640"; transcript_id ""; gbkey "Gene"; gene "yopO"; gene_biotype "protein_coding"; gene_synonym "ypkA"; locus_tag "CH62_RS22640"; part "1";
samtools faidx Yersinia_pestis_Java9.fna NZ_CP009995.1:76131-77073 > temp.fna
samtools faidx Yersinia_pestis_Java9.fna NZ_CP009995.1:1-1256 >> temp.fna
#delete the second ">****"
revseq
sed -e ':a;N;$!ba;s/\n//g' -e 's/:/\t/g' 76131-77073.rev > temp_.fna
Yersinia_pestis_Java9 ATGAAAAGCGTGAAAATCATGGGAACTATGCCACCGTCGATCTCCCTCGCCAAAGCTCATGAGCGCATCAGCCAACATTGGCAAAATCCTGTCGGTGAGCTCAATATCGGAGGAAAACGGTATAGAATTATCGATAATCAAGTGTTGCGCTTGAACCCCCACAGTGGTTTTTCTCTCTTTCGAGAAGGGGTTGGTAAGATCTTTTCGGGGAAGATGTTTAACTTTTCAATTGCTCGTAACCTTACTGACACACTCCATGCGGCCCAGAAAACGACTTCGCAGGAGCTAAGGTCTGATATCCCCAATGCTCTCAGTAATCTCTTTGGAGCCAAGCCACAGACCGAACTGCCGCTGGGTTGGAAAGGGGAGCCCTTGTCAGGAGCTCCGGATCTTGAAGGGATGCGAGTGGCTGAAACCGATAAGTTTGCCGAGGGCGAAAGCCATATTAGTATAATAGAAACTAAGGATAAGCAGCGGTTGGTAGCTAAGATTGAACGCTCCATTGCCGAGGGGCATTTGTTCGCAGAACTGGAGGCTTATAAACACATCTATAAAACCGCGGGCAAACATCCTAATCTTGCCAATGTTCATGGCATGGCTGTGGTGCCATACGGTAACCGTAAGGAGGAAGCATTGCTGATGGATGAGGTGGATGGTTGGCGTTGTTCTGACACACTAAGAACCCTCGCCGATAGCTGGAAGCAAGGAAAGATCAATAGTGAAGCCTACTGGGGAACGATCAAGTTTATTGCCCATCGGCTATTAGATGTAACCAATCACCTTGCCAAGGCAGGGGTAGTACATAACGATATCAAACCCGGTAATGTGGTATTTGACCGCGCTAGCGGAGAGCCCGTTGTTATTGATCTAGGATTACACTCTCGTTCAGGGGAACAACCTAAGGGGTTTACAGAATCCTTCAAAGCGCCGGAGCTTGGAGTAGGAAACCTAGGCGCATCAGAAAAGAGCGATGTTTTTCTCGTAGTGTCAACCCTTCTACATTGTATCGAAGGTTTTGAGAAAAATCCGGAGATAAAGCCTAATCAAGGACTGAGATTCATTACCTCAGAACCAGCGCACGTAATGGATGAGAATGGTTATCCAATCCATCGACCTGGTATAGCTGGAGTCGAGACAGCCTATACACGCTTCATCACAGACATCCTTGGCGTTTCCGCTGACTCAAGACCTGATTCCAACGAAGCCAGACTCCACGAGTTCTTGAGCGACGGAACTATCGACGAGGAGTCGGCCAAGCAGATCCTAAAAGATACCCTAACCGGAGAAATGAGCCCATTATCTACTGATGTAAGGCGGATAACACCCAAGAAGCTTCGGGAGCTATCTGATTTGCTTAGGACGCATTTGAGCAGTGCAGCAACTAAGCAATTGGATATGGGGGGGGTTTTGTCGGATCTTGATACCATGTTGGTGGCACTCGACAAGGCCGAACGCGAGGGGGGAGTAGACAAGGATCAGTTGAAGAGTTTTAACAGTTTGATTCTGAAGACTTACAGAGTGATTGAAGACTATGTCAAAGGCAGAGAAGGGGATACCAAGAATTCCAGTACGGAAGTATCCCCCTATCATCGCAGTAACTTTATGCTATCGATCGTCGAACCTTCACTGCAGAGGATCCAGAAGCATCTGGACCAGACACACTCTTTTTCTGATATCGGTTCACTAGTGCGCGCACATAAGCACCTGGAAACGCTTTTAGAGGTCTTAGTCACCTTGTCACAGCAAGGGCAGCCCGTGTCCTCTGAAACCTACGGCTTCCTGAATCGATTAACTGAGGCTAAGATCACCTTGTCGCAGCAATTGAATACTCTCCAGCAGCAGCAGGAGAGTGCGAAAGCGCAATTATCTATTCTGATTAATCGTTCAGGTTCTTGGGCCGATGTTGCTCGTCAGTCCCTGCAGCGTTTTGACAGTACCCGGCCTGTAGTGAAATTCGGCACTGAGCAGTATACCGCAATTCACCGTCAGATGATGGCGGCCCATGCAGCTATTACGCTACAGGAGGTATCGGAGTTTACTGATGATATGCGAAACTTTACAGTGGACTCTATTCCACTACTGATTCAACTTGGACGAAGCAGTTTAATGGATGAGCATTTGGTTGAACAGAGAGAAAAGTTGCGAGAGCTGACGACCATCGCCGAGCGACTGAACCGGTTGGAGCGGGAATGGATGTGA
#grep "yopO" selected_gtf_files/Yersinia_pestis_Nicholisk_41.gtf
NZ_CP009990.1 RefSeq gene 47448 49645 . - . gene_id "CH63_RS00925"; transcript_id ""; gbkey "Gene"; gene "yopO"; gene_biotype "pseudogene"; gene_synonym "ypkA"; locus_tag "CH63_RS00925"; old_locus_tag "CH63_4306"; pseudo "true";
samtools faidx Yersinia_pestis_Nicholisk_41.fna NZ_CP009990.1:47448-49645 > temp.fna
revseq
sed -e ':a;N;$!ba;s/\n//g' -e 's/:/\t/g' 47448-49645.rev > temp_.fna
Yersinia_pestis_Nicholisk_41 ATGAAAAGCGTGAAAATCATGGGAACTATGCCACCGTCGATCTCCCTCGCCAAAGCTCATGAGCGCATCAGCCAACATTGGCAAAATCCTGTCGGTGAGCTCAATATCGGAGGAAAACGGTATAGAATTATCGATAATCAAGTGTTGCGCTTGAACCCCCACAGTGGTTTTTCTCTCTTTCGAGAAGGGGTTGGTAAGATCTTTTCGGGGAAGATGTTTAACTTTTCAATTGCTCGTAACCTTACTGACACACTCCATGCGGCCCAGAAAACGACTTCGCAGGAGCTAAGGTCTGATATCCCCAATGCTCTCAGTAATCTCTTTGGAGCCAAGCCACAGACCGAACTGCCGCTGGGTTGGAAAGGGGAGCCCTTGTCAGGAGCTCCGGATCTTGAAGGGATGCGAGTGGCTGAAACCGATAAGTTTGCCGAGGGCGAAAGCCATATTAGTATAATAGAAACTAAGGATAAGCAGCGGTTGGTAGCTAAGATTGAACGCTCCATTGCCGAGGGGCATTTGTTCGCAGAACTGGAGGCTTATAAACACATCTATAAAACCGCGGGCAAACATCCTAATCTTGCCAATGTTCATGGCATGGCTGTGGTGCCATACGGTAACCGTAAGGAGGAAGCATTGCTGATGGATGAGGTGGATGGTTGGCGTTGTTCTGACACACTAAGAACCCTCGCCGATAGCTGGAAGCAAGGAAAGATCAATAGTGAAGCCTACTGGGGAACGATCAAGTTTATTGCCCATCGGCTATTAGATGTAACCAATCACCTTGCCAAGGCAGGGGTAGTACATAACGATATCAAACCCGGTAATGTGGTATTTGACCGCGCTAGCGGAGAGCCCGTTGTTATTGATCTAGGATTACACTCTCGTTCAGGGGAACAACCTAAGGGGTTTACAGAATCCTTCAAAGCGCCGGAGCTTGGAGTAGGAAACCTAGGCGCATCAGAAAAGAGCGATGTTTTTCTCGTAGTGTCAACCCTTCTACATTGTATCGAAGGTTTTGAGAAAAATCCGGAGATAAAGCCTAATCAAGGACTGAGATTCATTACCTCAGAACCAGCGCACGTAATGGATGAGAATGGTTATCCAATCCATCGACCTGGTATAGCTGGAGTCGAGACAGCCTATACACGCTTCATCACAGACATCCTTGGCGTTTCCGCTGACTCAAGACCTGATTCCAACGAAGCCAGACTCCACGAGTTCTTGAGCGACGGAACTATCGACGAGGAGTCGGCCAAGCAGATCCTAAAAGATACCCTAACCGGAGAAATGAGCCCATTATCTACTGATGTAAGGCGGATAACACCCAAGAAGCTTCGGGAGCTATCTGATTTGCTTAGGACGCATTTGAGCAGTGCAGCAACTAAGCAATTGGATATGGGGGGGTTTTGTCGGATCTTGATACCATGTTGGTGGCACTCGACAAGGCCGAACGCGAGGGGGGAGTAGACAAGGATCAGTTGAAGAGTTTTAACAGTTTGATTCTGAAGACTTACAGAGTGATTGAAGACTATGTCAAAGGCAGAGAAGGGGATACCAAGAATTCCAGTACGGAAGTATCCCCCTATCATCGCAGTAACTTTATGCTATCGATCGTCGAACCTTCACTGCAGAGGATCCAGAAGCATCTGGACCAGACACACTCTTTTTCTGATATCGGTTCACTAGTGCGCGCACATAAGCACCTGGAAACGCTTTTAGAGGTCTTAGTCACCTTGTCACAGCAAGGGCAGCCCGTGTCCTCTGAAACCTACGGCTTCCTGAATCGATTAACTGAGGCTAAGATCACCTTGTCGCAGCAATTGAATACTCTCCAGCAGCAGCAGGAGAGTGCGAAAGCGCAATTATCTATTCTGATTAATCGTTCAGGTTCTTGGGCCGATGTTGCTCGTCAGTCCCTGCAGCGTTTTGACAGTACCCGGCCTGTAGTGAAATTCGGCACTGAGCAGTATACCGCAATTCACCGTCAGATGATGGCGGCCCATGCAGCTATTACGCTACAGGAGGTATCGGAGTTTACTGATGATATGCGAAACTTTACAGTGGACTCTATTCCACTACTGATTCAACTTGGACGAAGCAGTTTAATGGATGAGCATTTGGTTGAACAGAGAGAAAAGTTGCGAGAGCTGACGACCATCGCCGAGCGACTGAACCGGTTGGAGCGGGAATGGATGTGA
manually correct point-nt-errors in the sequences according to _seq_additional.aln and then added the corrected sequences to _seq.txt (time-consuming)
for yop in yopJ yopB yopT yopE yopD yopM yopK yopO yopH; do
grep "Yersinia_enterocolitica_WA" ${yop}_seq.txt > ${yop}_seq_additional.fasta
done
for yop in yopJ yopB yopT yopE yopD yopM yopK yopO yopH; do
mafft --adjustdirection --clustalout ${yop}_seq_additional.fasta > ${yop}_seq_additional.aln
done
from ${yop}_seq.txt --> ${yop}_protein.fasta --> ${yop}_aligned_protein.fasta
cd data/yop_files
for yop in yopJ yopB yopT yopE yopD yopM yopK yopO yopH; do
python3 txt_to_protein.py ${yop}_seq.txt ${yop}_protein.fasta
done
for yop in yopJ yopB yopT yopE yopD yopM yopK yopO yopH; do
#NOTE: sometimes the alignment didn't work well since the manually added sequences missing bases!
python3 protein_alignment.py ${yop}_protein.fasta ${yop}_aligned_protein.fasta mafft
#awk -F '_' '/^>/ { printf(">%s", $3); for (i = 4; i <= NF; ++i) printf("_%s", $i); printf("\n"); next } { print }' ${yop}_aligned_protein.fasta > ${yop}_aligned_protein_.fasta
done
conda install mamba -c conda-forge #-n base
mamba env create -f environment.yml
grep ">" yopB_seq.txt | wc -l
67 --> 73
grep ">" yopJ_seq.txt | wc -l #*
67 --> 72
grep ">" yopT_seq.txt | wc -l
64 --> 73
grep ">" yopE_seq.txt | wc -l
70 --> 73
grep ">" yopD_seq.txt | wc -l
71 --> 73
grep ">" yopM_seq.txt | wc -l
70 --> 71 --> 73
grep ">" yopK_seq.txt | wc -l
73
grep ">" yopO_seq.txt | wc -l #*
64 --> 72
grep ">" yopH_seq.txt | wc -l
73
cluster all sequences in yopM_aligned_protein.fasta, all 100% identital sequences will in a group clustered. For each cluster, output a record as representative. Give a table for All members of groups.
for yop in yopJ yopB yopT yopE yopD yopM yopK yopO yopH; do
usearch -cluster_fast ${yop}_aligned_protein.fasta -id 1.0 -centroids ${yop}_clustered.fasta -uc ${yop}_clusters.uc;
done
for yop in yopJ yopB yopT yopE yopD yopM yopK yopO yopH; do
#parse the output of usarch to give a list a members for each class.
python3 ~/Scripts/yop_analysis/parse_uc_file.py ${yop}_clusters.uc > ${yop}_clusters.txt
sed -i "s/Members: \['//g" ${yop}_clusters.txt
sed -i "s/'\]//g" ${yop}_clusters.txt
sed -i "s/', '/, /g" ${yop}_clusters.txt
sed -i "s/, /,/g" ${yop}_clusters.txt
cut -d',' -f2- ${yop}_clusters.txt | sort > ${yop}_clusters_.txt
done
~/Tools/csv2xls-0.4/csv_to_xls.py yopJ_clusters_.txt yopB_clusters_.txt yopT_clusters_.txt yopE_clusters_.txt yopD_clusters_.txt yopM_clusters_.txt yopK_clusters_.txt yopO_clusters_.txt yopH_clusters_.txt -o yop_clusters.xls
for yop in yopJ yopB yopT yopE yopD yopM yopK yopO yopH; do
python3 protein_alignment.py ${yop}_clustered.fasta ${yop}_clustered_aligned_protein.fasta mafft
done
for yop in yopJ yopB yopT yopE yopD yopM yopK yopO yopH; do
python3 sort_fasta2.py ${yop}_clustered_aligned_protein.fasta ${yop}_sorted_selected_aligned_protein.fasta
done
draw alignments
library(ggmsa)
library(ggplot2)
library(ggtree)
#library(gggenes)
library(ape)
library(Biostrings)
library(ggnewscale)
library(dplyr)
library(ggtreeExtra)
library(phangorn)
library(RColorBrewer)
library(patchwork)
library(ggplotify)
library(aplot)
library(magick)
library(treeio)
#219 --> 5
data <- "yopE_sorted_selected_aligned_protein.fasta"
tidymsa <- tidy_msa(data)
png("alignment_yopE.png", width=1100, height=800*1.2)
msa_plot <- ggplot() +
geom_msa(data = tidymsa, char_width = 0.5, seq_name = TRUE, show.legend = TRUE) + theme_msa() + facet_msa(50)
msa_plot
dev.off()
#288 --> 6
data <- "yopJ_sorted_selected_aligned_protein.fasta"
tidymsa <- tidy_msa(data)
png("alignment_yopJ.png", width=1100, height=192*6)
msa_plot <- ggplot() +
geom_msa(data = tidymsa, char_width = 0.5, seq_name = TRUE, show.legend = TRUE) + theme_msa() + facet_msa(50)
msa_plot
dev.off()
#306 --> 7
data <- "yopD_sorted_selected_aligned_protein.fasta"
tidymsa <- tidy_msa(data)
png("alignment_yopD.png", width=1100, height=192*6)
msa_plot <- ggplot() +
geom_msa(data = tidymsa, char_width = 0.5, seq_name = TRUE, show.legend = TRUE) + theme_msa() + facet_msa(50)
msa_plot
dev.off()
#529 --> 11
data <- "yopM_sorted_selected_aligned_protein.fasta"
tidymsa <- tidy_msa(data)
png("alignment_yopM.png", width=1100, height=192*12)
msa_plot <- ggplot() +
geom_msa(data = tidymsa, char_width = 0.5, seq_name = TRUE, show.legend = TRUE) + theme_msa() + facet_msa(50)
msa_plot
dev.off()
#182 --> 4
data <- "yopK_sorted_selected_aligned_protein.fasta"
tidymsa <- tidy_msa(data)
png("alignment_yopK.png", width=1100, height=192*4)
msa_plot <- ggplot() +
geom_msa(data = tidymsa, char_width = 0.5, seq_name = TRUE, show.legend = TRUE) + theme_msa() + facet_msa(50)
msa_plot
dev.off()
#732 --> 15
data <- "yopO_sorted_selected_aligned_protein.fasta"
tidymsa <- tidy_msa(data)
png("alignment_yopO.png", width=1100, height=192*15)
msa_plot <- ggplot() +
geom_msa(data = tidymsa, char_width = 0.5, seq_name = TRUE, show.legend = TRUE) + theme_msa() + facet_msa(50)
msa_plot
dev.off()
# -- RERUN due to the one-letter-in-last-line Bug
#401 --> 9 --> 8
data <- "yopB_sorted_selected_aligned_protein.fasta"
tidymsa <- tidy_msa(data)
png("alignment_yopB.png", width=1100, height=192*8)
msa_plot <- ggplot() +
geom_msa(data = tidymsa, char_width = 0.5, seq_name = TRUE, show.legend = TRUE) + theme_msa() + facet_msa(51)
msa_plot
dev.off()
# -- RERUN due to Error in tidy_msa(data) : Sequences must have unique names --
#322 --> 7 --> delete the repeated Yersinia_pestis_D182038 --> merge the two partial CDS into one
data <- "yopT_sorted_selected_aligned_protein.fasta"
tidymsa <- tidy_msa(data)
png("alignment_yopT.png", width=1100, height=192*8)
msa_plot <- ggplot() +
geom_msa(data = tidymsa, char_width = 0.5, seq_name = TRUE, show.legend = TRUE) + theme_msa() + facet_msa(50)
msa_plot
dev.off()
#468 --> 10 --> delete the repeated Yersinia_enterocolitica_YE6
data <- "yopH_sorted_selected_aligned_protein.fasta"
tidymsa <- tidy_msa(data)
png("alignment_yopH.png", width=1100, height=192*10)
msa_plot <- ggplot() +
geom_msa(data = tidymsa, char_width = 0.5, seq_name = TRUE, show.legend = TRUE) + theme_msa() + facet_msa(50)
msa_plot
dev.off()
blast search and mauve analysis (mauve should be opened under bengal3_ac3)
makeblastdb -in Yersinia_pestis_790.fna -dbtype nucl
blastn -query yopJ_WA.fasta -db Yersinia_pestis_790.fna -out yopJ_WA_on_790.txt
blastn -query yopO_WA.fasta -db Yersinia_pestis_790.fna -out yopO_WA_on_790.txt
点赞本文的读者
还没有人对此文章表态
没有评论
Variant Calling for Herpes Simplex Virus 1 from Patient Sample Using Capture Probe Sequencing
Typing of 81 S. epidermidis samples (Luise)
Co-Authorship Network Generator using scraped data from Google Scholar via SerpAPI
© 2023 XGenes.com Impressum