From bbe70b60de7c12dd31976e1c7818bb61208daea0 Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 9 Jun 2025 10:10:39 +0100 Subject: [PATCH 1/5] CU-8699brxuh: Update installed version to v0.5.0 --- notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb | 2 +- .../introductory/migration/1._Migrate_v1_model_to_v2.ipynb | 2 +- requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb b/notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb index dd1dc3a..618f618 100644 --- a/notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb +++ b/notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb @@ -76,7 +76,7 @@ } ], "source": [ - "! pip install \"medcat2[meta-cat] @ git+https://github.com/CogStack/MedCAT2@v0.3.3\" # NOTE: VERSION-STRING" + "! pip install \"medcat[meta-cat] @ git+https://github.com/CogStack/MedCAT2@v0.5.0\" # NOTE: VERSION-STRING" ] }, { diff --git a/notebooks/introductory/migration/1._Migrate_v1_model_to_v2.ipynb b/notebooks/introductory/migration/1._Migrate_v1_model_to_v2.ipynb index cf39046..312b64f 100644 --- a/notebooks/introductory/migration/1._Migrate_v1_model_to_v2.ipynb +++ b/notebooks/introductory/migration/1._Migrate_v1_model_to_v2.ipynb @@ -131,7 +131,7 @@ } ], "source": [ - "! pip install \"medcat2[meta-cat,spacy,deid] @ git+https://github.com/CogStack/MedCAT2@v0.3.3\"" + "! pip install \"medcat[meta-cat,spacy,deid] @ git+https://github.com/CogStack/MedCAT2@v0.5.0\"" ] }, { diff --git a/requirements.txt b/requirements.txt index 64d605d..4c2f226 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -medcat2 @ git+https://github.com/CogStack/MedCAT2@v0.3.3 +medcat @ git+https://github.com/CogStack/MedCAT2@v0.5.0 ipykernel pytest-xdist~=3.6.0 nbmake<1.6 From 82b7c9a195805dddd947a10b72e50c4734714c1c Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 9 Jun 2025 10:15:51 +0100 Subject: [PATCH 2/5] CU-8699brxuh: Update all imports to use correct namespace --- ...reating_and_using_a_custom_tokenizer.ipynb | 16 +++++------ .../2._Create_and_use_component.ipynb | 28 +++++++++---------- ..._a_Concept_Database_and_a_Vocabulary.ipynb | 16 +++++------ .../2._Unsupervised_training_on_model.ipynb | 4 +-- .../3._Supervised_training_on_model.ipynb | 8 +++--- .../meta/1._Add_a_MetaCat_to_a_Model.ipynb | 8 +++--- .../migration/1._Migrate_v1_model_to_v2.ipynb | 4 +-- 7 files changed, 42 insertions(+), 42 deletions(-) diff --git a/notebooks/advanced/1._Creating_and_using_a_custom_tokenizer.ipynb b/notebooks/advanced/1._Creating_and_using_a_custom_tokenizer.ipynb index 756b0c5..033b8ea 100644 --- a/notebooks/advanced/1._Creating_and_using_a_custom_tokenizer.ipynb +++ b/notebooks/advanced/1._Creating_and_using_a_custom_tokenizer.ipynb @@ -26,7 +26,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -34,9 +34,9 @@ "\n", "import re\n", "\n", - "from medcat2.config.config import Config\n", - "from medcat2.tokenizing.tokens import BaseDocument, BaseEntity, BaseToken\n", - "from medcat2.tokenizing.tokens import MutableDocument, MutableEntity, MutableToken\n", + "from medcat.config.config import Config\n", + "from medcat.tokenizing.tokens import BaseDocument, BaseEntity, BaseToken\n", + "from medcat.tokenizing.tokens import MutableDocument, MutableEntity, MutableToken\n", "\n", "\n", "# define \"whitespace\"\n", @@ -376,7 +376,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -388,7 +388,7 @@ } ], "source": [ - "from medcat2.tokenizing.tokenizers import register_tokenizer, list_available_tokenizers\n", + "from medcat.tokenizing.tokenizers import register_tokenizer, list_available_tokenizers\n", "register_tokenizer(\"whitespace-tokenizer\", WhitespaceTokenizer)\n", "print(\"Registered tokenizers:\", list_available_tokenizers())" ] @@ -404,7 +404,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -416,7 +416,7 @@ } ], "source": [ - "from medcat2.tokenizing.tokenizers import create_tokenizer\n", + "from medcat.tokenizing.tokenizers import create_tokenizer\n", "tokenizer = create_tokenizer(\"whitespace-tokenizer\")\n", "print(\"We've got one:\", tokenizer)" ] diff --git a/notebooks/advanced/2._Create_and_use_component.ipynb b/notebooks/advanced/2._Create_and_use_component.ipynb index 7da1eb0..c435c57 100644 --- a/notebooks/advanced/2._Create_and_use_component.ipynb +++ b/notebooks/advanced/2._Create_and_use_component.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# How to create and use a (core) component with medcat2\n", + "# How to create and use a (core) component with medcat v2\n", "\n", "The overall process is quite simple:\n", "- Implement and extend `CoreComponent`\n", @@ -26,20 +26,20 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# for init args\n", "from typing import Optional, Any\n", "\n", - "from medcat2.tokenizing.tokenizers import BaseTokenizer\n", - "from medcat2.vocab import Vocab\n", - "from medcat2.cdb.cdb import CDB\n", + "from medcat.tokenizing.tokenizers import BaseTokenizer\n", + "from medcat.vocab import Vocab\n", + "from medcat.cdb.cdb import CDB\n", "# for the component itself\n", - "from medcat2.components.types import AbstractCoreComponent, CoreComponentType\n", - "from medcat2.tokenizing.tokens import MutableDocument, MutableEntity\n", - "from medcat2.components.ner.vocab_based_annotator import maybe_annotate_name\n", + "from medcat.components.types import AbstractCoreComponent, CoreComponentType\n", + "from medcat.tokenizing.tokens import MutableDocument, MutableEntity\n", + "from medcat.components.ner.vocab_based_annotator import maybe_annotate_name\n", "\n", "# for the randomness\n", "import random\n", @@ -143,11 +143,11 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "from medcat2.components.types import register_core_component\n", + "from medcat.components.types import register_core_component\n", "register_core_component(CoreComponentType.ner, RandomNER.name, RandomNER)" ] }, @@ -160,7 +160,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -241,9 +241,9 @@ } ], "source": [ - "from medcat2.config.config import Config\n", - "from medcat2.preprocessors.cleaners import NameDescriptor\n", - "from medcat2.cat import CAT\n", + "from medcat.config.config import Config\n", + "from medcat.preprocessors.cleaners import NameDescriptor\n", + "from medcat.cat import CAT\n", "import numpy as np\n", "\n", "from pprint import pprint\n", diff --git a/notebooks/introductory/basic/1._Build_a_Concept_Database_and_a_Vocabulary.ipynb b/notebooks/introductory/basic/1._Build_a_Concept_Database_and_a_Vocabulary.ipynb index 64ddaf5..2f03afd 100644 --- a/notebooks/introductory/basic/1._Build_a_Concept_Database_and_a_Vocabulary.ipynb +++ b/notebooks/introductory/basic/1._Build_a_Concept_Database_and_a_Vocabulary.ipynb @@ -28,13 +28,13 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "import numpy as np\n", - "from medcat2.vocab import Vocab\n", + "from medcat.vocab import Vocab\n", "\n", "vocab = Vocab()" ] @@ -111,7 +111,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -124,9 +124,9 @@ ], "source": [ "import pandas as pd\n", - "from medcat2.model_creation.cdb_maker import CDBMaker\n", - "from medcat2.cdb import CDB\n", - "from medcat2.config import Config\n", + "from medcat.model_creation.cdb_maker import CDBMaker\n", + "from medcat.cdb import CDB\n", + "from medcat.config import Config\n", "\n", "# first we need a config\n", "# we can use the default for now\n", @@ -295,11 +295,11 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "from medcat2.cat import CAT\n", + "from medcat.cat import CAT\n", "\n", "cat = CAT(cdb, vocab, cnf)" ] diff --git a/notebooks/introductory/basic/2._Unsupervised_training_on_model.ipynb b/notebooks/introductory/basic/2._Unsupervised_training_on_model.ipynb index d1f313b..f451a92 100644 --- a/notebooks/introductory/basic/2._Unsupervised_training_on_model.ipynb +++ b/notebooks/introductory/basic/2._Unsupervised_training_on_model.ipynb @@ -15,12 +15,12 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", - "from medcat2.cat import CAT\n", + "from medcat.cat import CAT\n", "\n", "\n", "# NOTE: can refer to the .zip or the folder - both will work just fine\n", diff --git a/notebooks/introductory/basic/3._Supervised_training_on_model.ipynb b/notebooks/introductory/basic/3._Supervised_training_on_model.ipynb index f6a231b..f8d92d1 100644 --- a/notebooks/introductory/basic/3._Supervised_training_on_model.ipynb +++ b/notebooks/introductory/basic/3._Supervised_training_on_model.ipynb @@ -22,13 +22,13 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", - "from medcat2.cat import CAT\n", + "from medcat.cat import CAT\n", "\n", "model_path = os.path.join(\"models\", \"unsup_trained_model.zip\")\n", "\n", @@ -46,7 +46,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -63,7 +63,7 @@ ], "source": [ "import pandas as pd\n", - "from medcat2.model_creation.cdb_maker import CDBMaker\n", + "from medcat.model_creation.cdb_maker import CDBMaker\n", "\n", "cdb_maker = CDBMaker(cat.config, cat.cdb)\n", "\n", diff --git a/notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb b/notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb index 618f618..ee8d7a6 100644 --- a/notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb +++ b/notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb @@ -91,7 +91,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "feefc4ff", "metadata": {}, "outputs": [ @@ -106,9 +106,9 @@ ], "source": [ "import os\n", - "from medcat2.cat import CAT\n", - "from medcat2.components.addons.meta_cat.meta_cat import MetaCATAddon\n", - "from medcat2.config.config_meta_cat import ConfigMetaCAT\n", + "from medcat.cat import CAT\n", + "from medcat.components.addons.meta_cat.meta_cat import MetaCATAddon\n", + "from medcat.config.config_meta_cat import ConfigMetaCAT\n", "\n", "BASIC_MODELS_FOLDER = os.path.join(\"..\", \"basic\", \"models\")\n", "MODEL_NAME = \"sup_trained_model.zip\"\n", diff --git a/notebooks/introductory/migration/1._Migrate_v1_model_to_v2.ipynb b/notebooks/introductory/migration/1._Migrate_v1_model_to_v2.ipynb index 312b64f..f559ecf 100644 --- a/notebooks/introductory/migration/1._Migrate_v1_model_to_v2.ipynb +++ b/notebooks/introductory/migration/1._Migrate_v1_model_to_v2.ipynb @@ -147,7 +147,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "486327da", "metadata": {}, "outputs": [ @@ -163,7 +163,7 @@ "source": [ "model_path = \"models/medcat1_model_pack.zip\"\n", "new_model_folder = \"models\" # file in this folder\n", - "! python -m medcat2.utils.legacy.legacy_converter $model_path $new_model_folder --verbose" + "! python -m medcat.utils.legacy.legacy_converter $model_path $new_model_folder --verbose" ] } ], From d7fbfa4b550e4f972181537601e4da6d785eaf0f Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 9 Jun 2025 10:21:49 +0100 Subject: [PATCH 3/5] CU-8699brxuh: Update initial save model name to not include hash so that it can be used further down the line --- ...._Build_a_Concept_Database_and_a_Vocabulary.ipynb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/notebooks/introductory/basic/1._Build_a_Concept_Database_and_a_Vocabulary.ipynb b/notebooks/introductory/basic/1._Build_a_Concept_Database_and_a_Vocabulary.ipynb index 2f03afd..22869e0 100644 --- a/notebooks/introductory/basic/1._Build_a_Concept_Database_and_a_Vocabulary.ipynb +++ b/notebooks/introductory/basic/1._Build_a_Concept_Database_and_a_Vocabulary.ipynb @@ -28,7 +28,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -111,7 +111,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -295,7 +295,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -379,20 +379,20 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Saved at models/base_model\n" + "Saved at models/base_model_15ba4a6c78264c90\n" ] } ], "source": [ "save_path = \"models\"\n", - "mpp = cat.save_model_pack(save_path, pack_name=\"base_model\")\n", + "mpp = cat.save_model_pack(save_path, pack_name=\"base_model\", add_hash_to_pack_name=False)\n", "print(\"Saved at\", mpp)" ] } From c917c558593a7c9a0d6ef1f39947e9b12959dc21 Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 9 Jun 2025 10:23:16 +0100 Subject: [PATCH 4/5] CU-8699brxuh: Update self-supervised trained model save path to remove hash --- .../basic/2._Unsupervised_training_on_model.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/notebooks/introductory/basic/2._Unsupervised_training_on_model.ipynb b/notebooks/introductory/basic/2._Unsupervised_training_on_model.ipynb index f451a92..48e1070 100644 --- a/notebooks/introductory/basic/2._Unsupervised_training_on_model.ipynb +++ b/notebooks/introductory/basic/2._Unsupervised_training_on_model.ipynb @@ -143,7 +143,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -156,7 +156,7 @@ ], "source": [ "save_path = \"models\"\n", - "mpp = cat.save_model_pack(save_path, pack_name=\"unsup_trained_model\")\n", + "mpp = cat.save_model_pack(save_path, pack_name=\"unsup_trained_model\", add_hash_to_pack_name=False)\n", "print(\"Saved at\", mpp)" ] } From 33ed0fb099d2276a0c4a8f823af6233299c25741 Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 9 Jun 2025 10:23:26 +0100 Subject: [PATCH 5/5] CU-8699brxuh: Update supervised trained model save path to remove hash --- .../introductory/basic/3._Supervised_training_on_model.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/notebooks/introductory/basic/3._Supervised_training_on_model.ipynb b/notebooks/introductory/basic/3._Supervised_training_on_model.ipynb index f8d92d1..0df5983 100644 --- a/notebooks/introductory/basic/3._Supervised_training_on_model.ipynb +++ b/notebooks/introductory/basic/3._Supervised_training_on_model.ipynb @@ -208,7 +208,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -224,7 +224,7 @@ ], "source": [ "new_model_folder, new_model_name = \"models\", \"sup_trained_model\"\n", - "cat.save_model_pack(new_model_folder, pack_name=new_model_name)\n" + "cat.save_model_pack(new_model_folder, pack_name=new_model_name, add_hash_to_pack_name=False)\n" ] } ],