From 4d1ba402beef4b439db45fe3b5211ef04b4d8390 Mon Sep 17 00:00:00 2001 From: Adrian Altenhoff Date: Thu, 7 Aug 2025 18:51:52 +0200 Subject: [PATCH 1/2] allow from-nhx cli to set species encode way (#25) --- examples/data/sample.nhx | 2 +- src/orthoxml/cli.py | 15 +++++++++++++-- tests/test_cli.sh | 2 ++ 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/examples/data/sample.nhx b/examples/data/sample.nhx index 815cd98..8205a62 100644 --- a/examples/data/sample.nhx +++ b/examples/data/sample.nhx @@ -1 +1 @@ -((A_s1:0.1[&&NHX:conf=0.9],B_s2:0.2[&&NHX:conf=0.8])[&&NHX:S=speciesA],C_s3:0.3[&&NHX:S=speciesB]); \ No newline at end of file +((A_s1:0.1[&&NHX:conf=0.9:S=s1],B_s2:0.2[&&NHX:conf=0.8:S=s2])[&&NHX:S=speciesA],C_s3:0.3[&&NHX:S=speciesB]); \ No newline at end of file diff --git a/src/orthoxml/cli.py b/src/orthoxml/cli.py index a9d80fa..ccf0a99 100644 --- a/src/orthoxml/cli.py +++ b/src/orthoxml/cli.py @@ -8,7 +8,7 @@ from orthoxml import __version__ from orthoxml.parsers import process_stream_orthoxml from orthoxml.converters.to_nhx import orthoxml_to_newick -from orthoxml.converters.from_nhx import orthoxml_from_newicktrees +from orthoxml.converters.from_nhx import (orthoxml_from_newicktrees, nhx_species_encoded_leaf) from orthoxml.converters.from_orthofinder import convert_csv_to_orthoxml from orthoxml.custom_parsers import ( BasicStats, @@ -178,11 +178,15 @@ def handle_conversion_to_nhx(args): logger.info("You can visualise each tree using https://beta.phylo.io/viewer/ as extended newick format.") def handle_conversion_from_nhx(args): + if args.species_encode == "nhx": + species_encode = nhx_species_encoded_leaf + else: + species_encode = None orthoxml_from_newicktrees( args.infile, args.outfile, label_to_event=None, - label_to_id_and_species=None + label_to_id_and_species=species_encode ) def handle_conversion_from_orthofinder(args): @@ -277,6 +281,13 @@ def main(): required=True, help="Paths to one or more Newick (NHX) files" ) + converter_from_nhx_parser.add_argument( + "--species-encode", + required=False, + choices=("nhx", "underscore"), + help="Way how species/taxonomic levels are encoded in the input Newick files. 'nhx' means that the " + "species/taxonomic levels are encoded in the Newick file using the NHX comments S= or T=, 'underscore' " + "means that the species/taxonomic levels are encoded in the Newick file using underscores.") converter_from_nhx_parser.add_argument("--outfile", required=True, help="Path to the output OrthoXML file") converter_from_nhx_parser.set_defaults(func=handle_conversion_from_nhx) diff --git a/tests/test_cli.sh b/tests/test_cli.sh index 30fa4bb..96ddea4 100755 --- a/tests/test_cli.sh +++ b/tests/test_cli.sh @@ -81,6 +81,8 @@ orthoxml-tools to-nhx \ echo -e "\n[10] Test: Newick (NHX) to OrthoXML conversion" orthoxml-tools from-nhx --infile "$EXAMPLES_DIR/sample.nhx" --outfile "tests_output/from_nhx.orthoxml" orthoxml-tools from-nhx --infile "$EXAMPLES_DIR/sample2.nhx" "$EXAMPLES_DIR/sample.nhx" --outfile "tests_output/from_nhx21.orthoxml" +orthoxml-tools from-nhx --species-encode "nhx" --infile "$EXAMPLES_DIR/sample.nhx" --outfile "tests_output/from_nhx_nhxspecies.orthoxml" + echo -e "\n[11] Test: Orthofinder CSV to OrthoXML conversion" orthoxml-tools from-csv --infile examples/data/InputOrthogroups.csv --outfile tests_output/orthofinder.orthoxml From f6ea4e27619f1443ec275e125895b658b28fa225 Mon Sep 17 00:00:00 2001 From: Ali Yazdizadeh Date: Wed, 13 Aug 2025 20:54:37 +0330 Subject: [PATCH 2/2] feat: update the docs --- README.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 3385c04..5a93e70 100644 --- a/README.md +++ b/README.md @@ -215,7 +215,7 @@ orthoxml-tools to-nhx --infile examples/data/sample-for-nhx.orthoxml --outdir ./ Convert Newick (NHX) format to OrthoXML. ```bash -orthoxml-tools from-nhx --infile path/to/file.nhx --outfile path/to/file.orthoxml +orthoxml-tools from-nhx --infile path/to/file.nhx --outfile path/to/file.orthoxml [--species-encode nhx|underscore] ``` **Options:** @@ -223,11 +223,18 @@ orthoxml-tools from-nhx --infile path/to/file.nhx --outfile path/to/file.orthoxm - You can specify multiple files by providing them as a space-separated list. - If you provide multiple files, they will be combined into a single OrthoXML output. - `--outfile `: Specify the output OrthoXML file (required). +- `--species-encode `: How species/taxonomic levels are encoded in the Newick files. + nhx → Species encoded in NHX comments using S= or T= tags. For example: (A_s1:0.1[&&NHX:conf=0.9:S=s1],B_s2:0.2[&&NHX:conf=0.8:S=s2]); + underscore → Species encoded in leaf labels using underscores (e.g., GeneID_SpeciesID). **Example:** ```bash orthoxml-tools from-nhx --infile examples/data/sample.nhx --outfile ./tests_output/from_nhx.orthoxml orthoxml-tools from-nhx --infile examples/data/sample2.nhx examples/data/sample.nhx --outfile ./tests_output/from_nhx21.orthoxml +orthoxml-tools from-nhx \ + --species-encode nhx \ + --infile examples/data/sample.nhx \ + --outfile tests_output/from_nhx_nhxspecies.orthoxml ``` ### 🛠️ CSV to OrthoXML (exploratory feature)