From 2e3a3142aabbb68d90748e075c991c5d54702f7b Mon Sep 17 00:00:00 2001 From: mart-r Date: Fri, 9 May 2025 09:18:47 +0100 Subject: [PATCH 01/14] Bump requirements to 0.3.0 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index c312819..4e9d505 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -git+https://github.com/CogStack/MedCAT2@v0.2.1#egg=medcat2 +git+https://github.com/CogStack/MedCAT2@v0.3.0#egg=medcat2 ipykernel pytest-xdist~=3.6.0 nbmake<1.6 From 58bcf0d74b0f5039cc5ae8b4d36ee409e4e952fc Mon Sep 17 00:00:00 2001 From: mart-r Date: Fri, 9 May 2025 09:19:11 +0100 Subject: [PATCH 02/14] Bump migration tutorial install to 0.3.0 --- .../introductory/migration/1._Migrate_v1_model_to_v2.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/notebooks/introductory/migration/1._Migrate_v1_model_to_v2.ipynb b/notebooks/introductory/migration/1._Migrate_v1_model_to_v2.ipynb index 42e91d8..45ec715 100644 --- a/notebooks/introductory/migration/1._Migrate_v1_model_to_v2.ipynb +++ b/notebooks/introductory/migration/1._Migrate_v1_model_to_v2.ipynb @@ -28,7 +28,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "007aa248", "metadata": {}, "outputs": [ @@ -131,7 +131,7 @@ } ], "source": [ - "! pip install \"git+https://github.com/CogStack/MedCAT2@v0.2.1#egg=medcat2[meta-cat,spacy,deid]\"" + "! pip install \"git+https://github.com/CogStack/MedCAT2@v0.3.0#egg=medcat2[meta-cat,spacy,deid]\"" ] }, { From 2ec04a10e485d79446cdd7859191a2750dfcb726 Mon Sep 17 00:00:00 2001 From: mart-r Date: Fri, 9 May 2025 09:19:28 +0100 Subject: [PATCH 03/14] Bump MetaCAT install to 0.3.0 --- notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb b/notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb index 1362c6f..4bfaa73 100644 --- a/notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb +++ b/notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb @@ -76,7 +76,7 @@ } ], "source": [ - "! pip install \"git+https://github.com/CogStack/MedCAT2@v0.2.1#egg=medcat2[meta-cat]\" # NOTE: VERSION-STRING" + "! pip install \"git+https://github.com/CogStack/MedCAT2@v0.3.0#egg=medcat2[meta-cat]\" # NOTE: VERSION-STRING" ] }, { From 107d7874a1263ef0a111f417b597b2838c90e088 Mon Sep 17 00:00:00 2001 From: mart-r Date: Fri, 9 May 2025 16:23:00 +0100 Subject: [PATCH 04/14] Bump requiremnets to v0.3.1 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 4e9d505..1c18118 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -git+https://github.com/CogStack/MedCAT2@v0.3.0#egg=medcat2 +git+https://github.com/CogStack/MedCAT2@v0.3.1#egg=medcat2 ipykernel pytest-xdist~=3.6.0 nbmake<1.6 From 5737219c11ab0d5d22ab72e3a143ebed34e29db9 Mon Sep 17 00:00:00 2001 From: mart-r Date: Fri, 9 May 2025 16:23:52 +0100 Subject: [PATCH 05/14] Bump migration installation version to 0.3.1 --- .../introductory/migration/1._Migrate_v1_model_to_v2.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/introductory/migration/1._Migrate_v1_model_to_v2.ipynb b/notebooks/introductory/migration/1._Migrate_v1_model_to_v2.ipynb index 45ec715..8123aee 100644 --- a/notebooks/introductory/migration/1._Migrate_v1_model_to_v2.ipynb +++ b/notebooks/introductory/migration/1._Migrate_v1_model_to_v2.ipynb @@ -131,7 +131,7 @@ } ], "source": [ - "! pip install \"git+https://github.com/CogStack/MedCAT2@v0.3.0#egg=medcat2[meta-cat,spacy,deid]\"" + "! pip install \"git+https://github.com/CogStack/MedCAT2@v0.3.1#egg=medcat2[meta-cat,spacy,deid]\"" ] }, { From 7a176067c657a93b5a9878e6e4c115a9778a89f8 Mon Sep 17 00:00:00 2001 From: mart-r Date: Fri, 9 May 2025 16:24:27 +0100 Subject: [PATCH 06/14] Bump MetaCAT tutorial install version to 0.3.1 --- notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb b/notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb index 4bfaa73..5b04e4f 100644 --- a/notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb +++ b/notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb @@ -76,7 +76,7 @@ } ], "source": [ - "! pip install \"git+https://github.com/CogStack/MedCAT2@v0.3.0#egg=medcat2[meta-cat]\" # NOTE: VERSION-STRING" + "! pip install \"git+https://github.com/CogStack/MedCAT2@v0.3.1#egg=medcat2[meta-cat]\" # NOTE: VERSION-STRING" ] }, { From 23c1333532b600ca9ec04bd8061d284b51863650 Mon Sep 17 00:00:00 2001 From: mart-r Date: Fri, 9 May 2025 16:57:30 +0100 Subject: [PATCH 07/14] Fix MetaCAT creation --- .../meta/1._Add_a_MetaCat_to_a_Model.ipynb | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb b/notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb index 5b04e4f..c719633 100644 --- a/notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb +++ b/notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb @@ -127,7 +127,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "034a9108", "metadata": {}, "outputs": [ @@ -144,8 +144,6 @@ } ], "source": [ - "from tokenizers import ByteLevelBPETokenizer\n", - "from medcat2.components.addons.meta_cat.meta_cat_tokenizers import TokenizerWrapperBPE\n", "# create config\n", "config = ConfigMetaCAT()\n", "# add categoryy name\n", @@ -153,15 +151,12 @@ "config.model.nclasses = 3\n", "config.general.category_value2id = {'False': 2, 'Hypothetical': 1, 'True': 0}\n", "\n", - "# create tokenizer\n", - "bpe_tok = ByteLevelBPETokenizer()\n", "data_path = os.path.join(\"in_data\", \"tok_data.txt\")\n", - "bpe_tok.train(data_path)\n", - "tokenizer = TokenizerWrapperBPE(bpe_tok)\n", "\n", "# create MetaCAT\n", "# TODO: remove need for call to protected attribute\n", - "mc = MetaCATAddon.create_new(config, cat._pipeline.tokenizer, tokenizer)\n", + "mc = MetaCATAddon.create_new(config, cat._pipeline.tokenizer)\n", + "mc.mc.tokenizer.hf_tokenizers.train(data_path)\n", "\n", "# add MetaCAT\n", "if cat.config.components.addons:\n", From 5f970cf84983772f617d176c459d5e46bb037b4c Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 12 May 2025 11:40:08 +0100 Subject: [PATCH 08/14] Bump requirements to 0.3.2 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 1c18118..c1159b8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -git+https://github.com/CogStack/MedCAT2@v0.3.1#egg=medcat2 +git+https://github.com/CogStack/MedCAT2@v0.3.2#egg=medcat2 ipykernel pytest-xdist~=3.6.0 nbmake<1.6 From 844da9fae571925d20478a8236837ed4411750ee Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 12 May 2025 11:43:02 +0100 Subject: [PATCH 09/14] Bump migration install requirements to v0.3.2 --- .../introductory/migration/1._Migrate_v1_model_to_v2.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/introductory/migration/1._Migrate_v1_model_to_v2.ipynb b/notebooks/introductory/migration/1._Migrate_v1_model_to_v2.ipynb index 8123aee..1bb6c2c 100644 --- a/notebooks/introductory/migration/1._Migrate_v1_model_to_v2.ipynb +++ b/notebooks/introductory/migration/1._Migrate_v1_model_to_v2.ipynb @@ -131,7 +131,7 @@ } ], "source": [ - "! pip install \"git+https://github.com/CogStack/MedCAT2@v0.3.1#egg=medcat2[meta-cat,spacy,deid]\"" + "! pip install \"git+https://github.com/CogStack/MedCAT2@v0.3.2#egg=medcat2[meta-cat,spacy,deid]\"" ] }, { From 39e320a516b53dff75786f278f09bff11c1b21ad Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 12 May 2025 11:43:40 +0100 Subject: [PATCH 10/14] Bump MetaCAT tutorials to v0.3.2 --- notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb b/notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb index c719633..f990a65 100644 --- a/notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb +++ b/notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb @@ -76,7 +76,7 @@ } ], "source": [ - "! pip install \"git+https://github.com/CogStack/MedCAT2@v0.3.1#egg=medcat2[meta-cat]\" # NOTE: VERSION-STRING" + "! pip install \"git+https://github.com/CogStack/MedCAT2@v0.3.2#egg=medcat2[meta-cat]\" # NOTE: VERSION-STRING" ] }, { From 256b86a4ad48742dadabadb504b15b34d085b7b5 Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 12 May 2025 15:12:59 +0100 Subject: [PATCH 11/14] Bump requirements to 0.3.3 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index c1159b8..9e4ac6a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -git+https://github.com/CogStack/MedCAT2@v0.3.2#egg=medcat2 +git+https://github.com/CogStack/MedCAT2@v0.3.3#egg=medcat2 ipykernel pytest-xdist~=3.6.0 nbmake<1.6 From 01639b92c428b2789c4aef00731ce70bc83ca0e4 Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 12 May 2025 15:13:30 +0100 Subject: [PATCH 12/14] Bump migration to 0.3.3 --- .../introductory/migration/1._Migrate_v1_model_to_v2.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/introductory/migration/1._Migrate_v1_model_to_v2.ipynb b/notebooks/introductory/migration/1._Migrate_v1_model_to_v2.ipynb index 1bb6c2c..223d337 100644 --- a/notebooks/introductory/migration/1._Migrate_v1_model_to_v2.ipynb +++ b/notebooks/introductory/migration/1._Migrate_v1_model_to_v2.ipynb @@ -131,7 +131,7 @@ } ], "source": [ - "! pip install \"git+https://github.com/CogStack/MedCAT2@v0.3.2#egg=medcat2[meta-cat,spacy,deid]\"" + "! pip install \"git+https://github.com/CogStack/MedCAT2@v0.3.3#egg=medcat2[meta-cat,spacy,deid]\"" ] }, { From b2aab1a4ce73b19bf06bf4ecafa7453c0dc8cfb4 Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 12 May 2025 15:14:11 +0100 Subject: [PATCH 13/14] Bump MetaCAT to 0.3.3 dependency --- notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb b/notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb index f990a65..c26d51b 100644 --- a/notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb +++ b/notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb @@ -76,7 +76,7 @@ } ], "source": [ - "! pip install \"git+https://github.com/CogStack/MedCAT2@v0.3.2#egg=medcat2[meta-cat]\" # NOTE: VERSION-STRING" + "! pip install \"git+https://github.com/CogStack/MedCAT2@v0.3.3#egg=medcat2[meta-cat]\" # NOTE: VERSION-STRING" ] }, { From 2270a0e992d87cdf4cc1a09ba0a7d954f12f486e Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 12 May 2025 15:15:34 +0100 Subject: [PATCH 14/14] Fix tokenizer creation for MetaCAT with preprocessor --- .../introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb b/notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb index c26d51b..cc976ac 100644 --- a/notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb +++ b/notebooks/introductory/meta/1._Add_a_MetaCat_to_a_Model.ipynb @@ -155,8 +155,9 @@ "\n", "# create MetaCAT\n", "# TODO: remove need for call to protected attribute\n", - "mc = MetaCATAddon.create_new(config, cat._pipeline.tokenizer)\n", - "mc.mc.tokenizer.hf_tokenizers.train(data_path)\n", + "mc = MetaCATAddon.create_new(\n", + " config, cat._pipeline.tokenizer,\n", + " tknzer_preprocessor=lambda tknzer: tknzer.hf_tokenizers.train(data_path))\n", "\n", "# add MetaCAT\n", "if cat.config.components.addons:\n",