From da25698e9c12343e89070ce1dc5e2c1d66b93cf4 Mon Sep 17 00:00:00 2001 From: CoderOMaster Date: Sun, 3 May 2026 14:27:30 +0530 Subject: [PATCH 1/2] moss integration --- docs/docs.json | 1 + .../vector-db-integrations/moss.mdx | 117 ++++++ .../handlers/moss_handler/README.md | 118 +++++++ .../handlers/moss_handler/__about__.py | 9 + .../handlers/moss_handler/__init__.py | 33 ++ .../handlers/moss_handler/connection_args.py | 31 ++ .../handlers/moss_handler/icon.png | Bin 0 -> 22216 bytes .../handlers/moss_handler/moss_handler.py | 320 +++++++++++++++++ .../handlers/moss_handler/requirements.txt | 1 + .../handlers/moss_handler/settings.py | 32 ++ .../handlers/moss_handler/tests/__init__.py | 0 .../moss_handler/tests/test_moss_handler.py | 333 ++++++++++++++++++ 12 files changed, 995 insertions(+) create mode 100644 docs/integrations/vector-db-integrations/moss.mdx create mode 100644 mindsdb/integrations/handlers/moss_handler/README.md create mode 100644 mindsdb/integrations/handlers/moss_handler/__about__.py create mode 100644 mindsdb/integrations/handlers/moss_handler/__init__.py create mode 100644 mindsdb/integrations/handlers/moss_handler/connection_args.py create mode 100644 mindsdb/integrations/handlers/moss_handler/icon.png create mode 100644 mindsdb/integrations/handlers/moss_handler/moss_handler.py create mode 100644 mindsdb/integrations/handlers/moss_handler/requirements.txt create mode 100644 mindsdb/integrations/handlers/moss_handler/settings.py create mode 100644 mindsdb/integrations/handlers/moss_handler/tests/__init__.py create mode 100644 mindsdb/integrations/handlers/moss_handler/tests/test_moss_handler.py diff --git a/docs/docs.json b/docs/docs.json index ff5a1352eb..ed53a9fdb8 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -186,6 +186,7 @@ "integrations/vector-db-integrations/chromadb", "integrations/vector-db-integrations/couchbase", "integrations/vector-db-integrations/milvus", + "integrations/vector-db-integrations/moss", "integrations/vector-db-integrations/pgvector", "integrations/vector-db-integrations/pinecone", "integrations/vector-db-integrations/weaviate" diff --git a/docs/integrations/vector-db-integrations/moss.mdx b/docs/integrations/vector-db-integrations/moss.mdx new file mode 100644 index 0000000000..71fedb84ee --- /dev/null +++ b/docs/integrations/vector-db-integrations/moss.mdx @@ -0,0 +1,117 @@ +--- +title: Moss +sidebarTitle: Moss +--- + +In this section, we present how to connect Moss to MindsDB. + +[Moss](https://moss.dev) is a semantic search runtime for Conversational AI agents. It delivers hybrid search with sub-10ms latency. + +## Prerequisites + +Before proceeding, ensure the following prerequisites are met: + +1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). +2. To connect Moss to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). +3. Create a Moss account and obtain your project credentials from [portal.usemoss.dev](https://portal.usemoss.dev). + +## Connection + +This handler is implemented using the `moss` Python library. + +To connect Moss to MindsDB, use the following statement: + +```sql +CREATE DATABASE moss_db +WITH ENGINE = 'moss', +PARAMETERS = { + "project_id": "your-project-id", + "project_key": "moss_access_key_xxxxx", + "alpha": "0.8" +}; +``` + +The required parameters are: + +- `project_id`: Your Moss project ID, available from [portal.usemoss.dev](https://portal.usemoss.dev). +- `project_key`: Your Moss project key (secret), available from [portal.usemoss.dev](https://portal.usemoss.dev). + +The optional parameters are: + +- `alpha`: Hybrid search weight between `0.0` (keyword-only) and `1.0` (semantic-only). Defaults to `0.8`. + +## Usage + +### Inserting documents + +Insert documents to create a new index. The index is built asynchronously — MindsDB waits until it is ready before returning (typically 5–30 seconds). + +```sql +INSERT INTO moss_db.my_index (id, content, metadata) +VALUES + ('doc-1', 'MindsDB unifies AI and data with SQL', '{"category": "mindsdb"}'), + ('doc-2', 'Moss delivers sub-10ms hybrid semantic search', '{"category": "moss"}'), + ('doc-3', 'RAG pipelines combine retrieval and generation', '{"category": "rag"}'); +``` + + + The `id` column is optional. If omitted, Moss generates a unique ID + automatically using a hash of the document content. + + +Inserting into an existing index upserts the documents: + +```sql +INSERT INTO moss_db.my_index (id, content, metadata) +VALUES ('doc-4', 'Vector databases store embeddings for similarity search', '{"category": "vectordb"}'); +``` + +### Semantic search + +Query your index using natural language. Results are ranked by relevance and include a `distance` column (lower = better match): + +```sql +SELECT id, content, distance +FROM moss_db.my_index +WHERE content = 'how do I build a RAG pipeline?' +LIMIT 5; +``` + +### Fetch all documents + +```sql +SELECT id, content, metadata +FROM moss_db.my_index; +``` + +### Fetch by ID + +```sql +SELECT id, content +FROM moss_db.my_index +WHERE id = 'doc-1'; +``` + +### Semantic search with metadata filter + +Combine a semantic search query with a metadata filter: + +```sql +SELECT id, content, distance +FROM moss_db.my_index +WHERE content = 'semantic search performance' + AND metadata.category = 'moss' +LIMIT 3; +``` + +### Delete a document + +```sql +DELETE FROM moss_db.my_index WHERE id = 'doc-1'; +``` + +### Drop an index + +```sql +DROP TABLE moss_db.my_index; +``` diff --git a/mindsdb/integrations/handlers/moss_handler/README.md b/mindsdb/integrations/handlers/moss_handler/README.md new file mode 100644 index 0000000000..1508840b99 --- /dev/null +++ b/mindsdb/integrations/handlers/moss_handler/README.md @@ -0,0 +1,118 @@ +--- +title: Moss +sidebarTitle: Moss +--- + +In this section, we present how to connect Moss to MindsDB. + +[Moss](https://moss.dev) is a semantic search runtime built for Conversational AI agents. It lets you index documents and run hybrid semantic/keyword queries in under 10ms — fast enough for real-time conversation, compared to 200–300ms with typical retrieval systems. + +## Prerequisites + +Before proceeding, ensure the following prerequisites are met: + +1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). +2. To connect Moss to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). +3. Create a Moss project and obtain your credentials from the [Moss Portal](https://portal.usemoss.dev). + +## Connection + +This handler is implemented using the `moss` Python library. + +To connect your Moss project to MindsDB, use the following statement: + +```sql +CREATE DATABASE moss_datasource +WITH ENGINE = 'moss', +PARAMETERS = { + "project_id": "your-project-id", + "project_key": "moss_access_key_xxxxx", + "alpha": "0.8" +}; +``` + +The required parameters are: + +* `project_id`: Your Moss project ID, available in the [Moss Portal](https://portal.usemoss.dev). +* `project_key`: Your Moss project access key. + +The optional parameters are: + +* `alpha`: Controls the blend between semantic and keyword search. Range is `0.0` to `1.0`, where `0.0` is pure keyword (BM25), `1.0` is pure semantic, and `0.8` is the default. + +## Usage + +Once connected, you can insert documents into a Moss index. The index is created automatically on the first insert. + +```sql +INSERT INTO moss_datasource.my_index (id, content, metadata) +VALUES + ('doc-1', 'MindsDB unifies AI and data pipelines', '{"category": "product"}'), + ('doc-2', 'Connect MindsDB to PostgreSQL, MySQL, and more', '{"category": "integrations"}'), + ('doc-3', 'Create AI models using the CREATE MODEL syntax', '{"category": "docs"}'); +``` + + +The `INSERT` statement blocks until Moss finishes building the index, which typically takes 5–30 seconds depending on document count. + + +To run a semantic search query: + +```sql +SELECT id, content, distance +FROM moss_datasource.my_index +WHERE content = 'how do I create an AI model?' +LIMIT 5; +``` + +The `distance` column is `1 - score`, so lower values indicate a closer match. + +To fetch all documents without a search query: + +```sql +SELECT id, content, metadata +FROM moss_datasource.my_index; +``` + +To fetch specific documents by ID: + +```sql +SELECT id, content +FROM moss_datasource.my_index +WHERE id = 'doc-1'; +``` + +To filter results by metadata alongside a semantic search: + +```sql +SELECT id, content, distance +FROM moss_datasource.my_index +WHERE content = 'connecting to databases' + AND metadata.category = 'integrations' +LIMIT 3; +``` + +To delete documents from an index: + +```sql +DELETE FROM moss_datasource.my_index +WHERE id = 'doc-1'; +``` + +To drop an index entirely: + +```sql +DROP TABLE moss_datasource.my_index; +``` + +## Using Moss in a RAG Pipeline + +You can combine Moss with a MindsDB model to build a retrieval-augmented generation (RAG) pipeline entirely in SQL: + +```sql +SELECT r.content, m.answer +FROM moss_datasource.my_index AS r +JOIN mindsdb.my_llm AS m +WHERE r.content = 'what is the refund policy?' +LIMIT 1; +``` diff --git a/mindsdb/integrations/handlers/moss_handler/__about__.py b/mindsdb/integrations/handlers/moss_handler/__about__.py new file mode 100644 index 0000000000..7381ff2cbd --- /dev/null +++ b/mindsdb/integrations/handlers/moss_handler/__about__.py @@ -0,0 +1,9 @@ +__title__ = "MindsDB Moss handler" +__package_name__ = "mindsdb_moss_handler" +__version__ = "0.0.1" +__description__ = "MindsDB handler for Moss semantic search" +__author__ = "Keshav Arora" +__github__ = "https://github.com/mindsdb/mindsdb" +__pypi__ = "https://pypi.org/project/mindsdb/" +__license__ = "MIT" +__copyright__ = "Copyright 2024 - mindsdb" diff --git a/mindsdb/integrations/handlers/moss_handler/__init__.py b/mindsdb/integrations/handlers/moss_handler/__init__.py new file mode 100644 index 0000000000..3d74cbea83 --- /dev/null +++ b/mindsdb/integrations/handlers/moss_handler/__init__.py @@ -0,0 +1,33 @@ +from mindsdb.integrations.libs.const import HANDLER_SUPPORT_LEVEL, HANDLER_TYPE + +from .__about__ import __description__ as description +from .__about__ import __version__ as version +from .connection_args import connection_args, connection_args_example + +try: + from .moss_handler import MossHandler as Handler + + import_error = None +except Exception as e: + Handler = None + import_error = e + +title = "Moss" +name = "moss" +type = HANDLER_TYPE.DATA +support_level = HANDLER_SUPPORT_LEVEL.COMMUNITY +icon_path = "icon.png" + +__all__ = [ + "Handler", + "version", + "name", + "type", + "title", + "description", + "support_level", + "connection_args", + "connection_args_example", + "import_error", + "icon_path", +] diff --git a/mindsdb/integrations/handlers/moss_handler/connection_args.py b/mindsdb/integrations/handlers/moss_handler/connection_args.py new file mode 100644 index 0000000000..dc0be17096 --- /dev/null +++ b/mindsdb/integrations/handlers/moss_handler/connection_args.py @@ -0,0 +1,31 @@ +from collections import OrderedDict + +from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE + +connection_args = OrderedDict( + project_id={ + "type": ARG_TYPE.STR, + "description": "Moss project ID from the Moss Portal (portal.usemoss.dev)", + "required": True, + }, + project_key={ + "type": ARG_TYPE.PWD, + "description": "Moss project key from the Moss Portal", + "required": True, + "secret": True, + }, + alpha={ + "type": ARG_TYPE.STR, + "description": ( + "Hybrid search weight between semantic and keyword search. " + "0.0 = pure keyword (BM25), 1.0 = pure semantic, 0.8 = default" + ), + "required": False, + }, +) + +connection_args_example = OrderedDict( + project_id="your-project-id", + project_key="moss_access_key_xxxxx", + alpha="0.8", +) diff --git a/mindsdb/integrations/handlers/moss_handler/icon.png b/mindsdb/integrations/handlers/moss_handler/icon.png new file mode 100644 index 0000000000000000000000000000000000000000..29d5613861af54dceeff11646e134020a5560d80 GIT binary patch literal 22216 zcmeHvd011|+V7%K1~o{uPGJy1p;`x2#4x9Tbw*FAm8xjOi7IMTR0Py5QjAu?qZO$W z;p?gMfI|@lH5MFBv5;0la12nC78FoHw2I&F-8<}5o_p_i|Gdw0PtJ3mvesVfoz^?= zwcR)|c%-zQyd7gqI(k&lXN;NKGS>EMOLNTF44WH||5&LGA30hK{b!2VHveY%Fl%ms50U zc1PjQ;_;EfA9H1(4nOY8JR*=WRo9bd{2!?!{-Dzb zc6Q!qrVgGUtM4vbDXU)=bWi%D&phk++s{0$8TXxa{WIUH$I-8XfB(y7Mjwwl*K33J zPnIpQm-+uI$&9O9rO~Vos#&d{`&Cj}#iH`c)Au&q8^%~v)$Q6qe*k(EJoBqO^X9_( zlcQz@#oY+|fE`(4EBn4<>FnKOE+p;j`C&a_S@tS6sK#f-#u*!>nhwk@txCD6;&ImM ztZ=xd!qq|dqF>sF=O)_Eu+I8MGof@U({_I6d27hrWbN8(*ZMrk?f4C28Fufw4DwH? zTyomBk4NOK&;Oj16Zc8OHYHOW+n~v64Dmg)G;a291LKk;OtorXdF5aFutuAL+8L)T z)N4WxWow$!>e_Z;Q<{1?JxyBc*uiz!j`PWych+eYjD_y+D678|a%gyIpu&Hr)VBO2 z1h&HANZx2W|7-WJjJ@#z3&}A<=5_8sg13Np{eJYK?m8^$No9F>xQ3i&tn~ad8Tt-VuAgP ztL64mwzc-sfzQ_7aJU>}Hz+&-?lPwC-yKyAh2FMa9y?-s$lmS7K{xq>*Y5LI6*&BK z3P(I5@7?TLxRK6YL3x!E^9xG@BXZ{i9!M&Dd4C?}n+;)2Z@*hyM<)>c%I}uv$@Ce+ zW%a#c+ps#h8&vJXnCiu`w!NK1d zzWX@FJaiRvyem(oPngxnnBjRN1%~8K+*Vtq4+q<9t`B#ms|4*sDu$h9I+|U0^E_GJepG+TgT+URh-!xxX za_vasgi>#))aB)kEO=yO)i?e}0lWUM;Y2YTwe&k*?xxJvYl)BNGs`|KG%dfz>~~MU zPsgf1V5%c)>LNG zvR@lt?GkIjSnuo`pcVVFZa5$cmdq*aclvUf`#*a)GH+6& zHRwtqHm0pCl|(#sae6Q(SzNfR`}|jZW7|BjJ0V|0>ch^s5E-TOI%>~VtBWuZ$q|lu z0QiWfYBx_OOl#E*hflmi4&5g01Oqxx5}aova4aIRGWEc43Ddp0-|@$LbCR`t58XN< zz|#PpdJT?u*Yo3$kt&2}dmf`M(w{waD__$@-jNyh@UwU3fR4ejfTlaQ5#S#`cfWP2 zkjLAt>@Xxy;)sA`h=a1a3;bUv^7dT2JSp&D(qw5G&(mxh2(BPETW*xmbtkMjv(oZp z`ht){wa(@Jh<+vIIob>NC+5RZ?>fIxv$E6~cux^mPoA8%&==GnF5}fl=|U@y-Azxq zRwHv?ZVzQ<+?LM3aYv4Idq|N~&x!sO(}qPY>et}qqCUy z$?Azn!1FYj=gHBD_I^P;Jq`nQHj)JShi}&WDp=GUxNJK6j1T$QC#Nj*_jm(Sjizlk zW90Ko=kfCXhL?Ap1+DvPi+C;>?)*l(X~SMTi7u+^%OBov#O8sDFzH)cUK{)WlTJy# z!N&Hy$Bx&2PRDsc8fUZ9QyvrNon^JBMuI-$^j|e*XO`v6&-yn{-sglR?rh)y*nGsF zUcBza5mdK_sV%w`_ce`^k0chrfsM@D0Jp)|Sd!9>ze(u_677ShYYtWpgr<)BVfxo3 zVQw**#{=k^a@hk?)Vg+r@vcuRF`4tTH6}3)ATiaxY^6wsB7Pc5)S_^^NF)y*8xE&0 zZt+sAI0)9&wgjg{|&;S!p3Qwzc(Q6=DA(a8{Fm; zJeimeU%un>PFQM9ZI%P{dB{W zr>92h3lMI}p))>jZ#oZpe_1sdrllK464}{WomTL(CUw&Cdh^&<`eV&$9@+>Vn!<{Z zJU1C!6`f(-Jd4TF)h55xo)#7697$g*DEdAU)7q$37B_)J?th1Jn`G^w-J0l~z8^5A zbtOuAzHf4iY3ajz)<60n!#E^(CiPNUxR|#S@&#Hm!us|5+T=@GGQ9?NDOibiGBzpm z-cHO9L+t#{=&so>MBtk8)$MbR5Tss!RC1npLQ1q*lT0Cs#gJl7J)19@?*q6bT}o>S zMFc}fG9yVZ1?C69%DH1+q?d3ZA-qEt=AIo-Y=&V1c4h!=sTqmv~DdI&gx@gV;7zLIno3LDM(4D$_TiSpR|>h#r)0IiXlm5q4jb3>lHcko4(~ass=~ed7`N?oSXhcphg z8TpS4N5WwHzI2+l5U{x>>AWFReDQm0XLJRzncIgr9Z`^taKO4!c#o+k=Mi#fr>uT3 zd0DoWmqv*g)L5D;(G}fFJQ<-x$$FBtpdC#^AnefW^+ysf+Mkg7&7ojo$fpy3BH9?c zNP1UX^S%lhXCsH-dbY!UrQl7RFlQ6rewU~_4?};gu zm`$l!aji&EFT0aciuY1tx^HFXu$!y&FK~0|m-e=#T^b_Zb-xKl-DSxG>WY@^f4BZjah1Wt4 z(#lgNbMqjr9MUTE>sH|ql!;+5a3QHtlVYhCO$Z!-vV?%i1Yp$IDp+WeP{CN5_~U&E z4Fd5BSCfRE6ZOkH6H4?y9=i2Xwqyr6El!v%Z57g5roqml&d7i8{+-#i`+s*xDO3do&onut{yU*pHU_oIh^wX8bE5Ka`Z80^#;U0@Jwf1vT!j^r;HFo!(k1qQ6kz%+G7&!4*6k*be73d0 z#qhJHT$@`?I(TALtN6xepmoDY%LT*CSVP42c`t|5y(VBgqbCQx6HKss!9W(yFa!T(OaC2D*JtlTL`hKcE6uR8paXO{S;>T zEYc83V~6+iq!McgBy*n0;37#W?=Ed^aPy#rx=fSfR(R9C)&`e}=)pbto59^hPO9^0 z?Uo!j5)SmUT7t#hJ|0Z@j?zfJXcZDARW|6YN`sJUz4(xF7c?jz!Y@Tq(+R&!*f)34 zm_`=VU(x&X<+|!FZM^FT#>4pD3InaLmwVrOmg*b-XdquFQ?B_I2d?f)jV7eYfaG^> zeo0cYX-JjK18Rj)X=ei^R3-}{U)|N8hs#+4aE!ZvBNNexy9kvb$_jYOU{Ex;3u9IK z3u=NP_XO@y+Y?rVpK3SJntORtZez4RtsYBwdh+=L8-AmSo&X`AX{>s(sA7inhSmzH~<}$(8P-GznwN5OShu#;Fd#l847`gOc(*zuu5H| z^Y^}ZsltQLnE8W$4FP0?4llb;0_;sH8YV~~bbm#*?Nz>xj!ec$R1cZkQB|5aWrb`> z8Xt}qLPRQGC`9xRN9q)Pu)P5G&Nb8>H=kbFr1&A<#tw6e&X9j`nN7Il;mm`TkE2)3 zbjPeaoV?DfCdvPp^@$a6HN}^xKg+ahVRA=vp%m4<6&S0nG4B6RsNajLyW2I-h~7Y& zu!STT2iA&fo2)nVbT01|+aA+ziK5bJ!_-6283`QV|7qUZxr-qs?I40f%J#O${2tEb zi)HmMfA5P8QAlmL%JV{xC@pES^<-K7iC$iK;#HYMvCww6?@gA!U=?O}L(sL3#8JAYsFV@3NWU!yHFBOQ|%V zbXVCXPFaKZAxWA`Bk~qLv*UXf+?;y!2e~ah=C67a7S$As>1aBrlnNXa)6KR2K6g^k zn*}*>9&NZHQVP!%*B&>0z%ph+8nPv_<8~}mM(by#5fO9HgZG>6`DSusR~Ai)Y)YAs z!b65bZY}LBd$$iNbCIlX4l*6ogX54VLO((kSp1A!pW#Rw3zmZVfs5nqsQ<&kihJmO zbA>om$J;_QKp_Mr@^W%OlSbzcAM1FV7edjwO`1T|UQLv0KjZBiQUBq~VJ?Mef`ER< zLf-PKkyW%$4*5X!3GD59eN^1#p7f@6Jj}d_IzFC!G(38@{-1kZqPBG;_2_`eiLU2$ zLPX8O>jujB_T<%{FQfJzG*LYrtvqP@fV4qoqe!t=3FeO&wIdDgh5XoKvCvBcXqc>J8dD)qh?`#ydzwN5Xvu4w|ZZ?s*9wFzt-M z0XU>Xpe3SKNG2G&?~S+3yf=>E!ssv2x|X{DXT2z$ zKnRh6gq$K1Tb;mGorjSva{RiN*?iJWa^(BOaYDP2>9*0*)qFFSy&-G5SO}McpN=}f zbuogMrim2t&$osAOz7~QcuuwoGoapf^j^3M{J0-Ff%>~*RAMQl1&dqueFutXtmGTf ztZq5zj@!#;Ml5bUJy}7K4-hGOHJuU)OCBhRBpq)RA+Xb|d*eba?p zgT_ax<1KA@YUI>FH zG%zNi80+o;NF?%4ddT-&r&bD54x)wggEem<~oi6u2JI2kUjlcMPlI{G z7(ozucLi`L{5S`p80W$B;}-C+WYrVFXnI34L$i&n9HYT$p*cajrUG@wTtUOgl8dX2 znlc1ZJX$1VR%{SGsS?5`HgR;e3yf!J7*2y_g280;rvYBd5bX9y;xb%#FOK#i5@gkQ zp@^o3P0(3x7|IKjdhR)3&-fz&3WJsAJq9 z;*Q=M&~L`e+i=TuUC3ezX5m_c(;A=V^NQxzb>CfT=;gG}x%{yvdb1`wJteL1qzqPs zrUcjLE=;Pbh%alsn$K_6tUU2%G{R`{AeqLbCQqY!U`t65qeRJp* zu2ncs|4DZ!AlE;(o9vxT_AYhI{hk%DN4+Oeg7!O8Y(b|cbhPE}_1N&75^{yw1-ZMOs-8&)2w)JgR5Ll;~&^9kMbgPDV!nQwO1jUTd4pK8wW znGkd>;%3?{+0EM7mCN7AdNjUR7L?qPoy9rg37$HLWUmv3(Y z2ofE4{3T}&9;+g<`Y4bVR6rO*-XeR|&I!n!j0BBEK&x@tu9N&JW$4Q9v4BNv*4H_b zDC9T*rVLC-+-`}^wZ1#WA9*kLJtK(QMDlrhmR{yW@^D80X58$6KpDT0YI~(q`pnMs zjNAZo+^Em!OV2t%N^Gk#;$Q$l=?zv`&A5nzV`$WdAB~DA`I-dhP5V@?0lCMC(h>?S z%-cr(84;5~0gLtPEdR`kMiPG1NxsXGxFF+XpF6p`^Mr09o(?J_BDN9G9Yl7<;Pmvb zh_ug%P+ej|;y@lKCF{(?$vukSC?!fW%+k}x($YwJoyi3R1m5PUp(K#CO-SrT8z)j@ zgllgl4U|D$wEVMfHu6N0sqSmSuh_14b0_u(@)&z0B=)6^YRYX`aBufuyA+N9T66Q%p9ZRRFTUuRDzHIDkns&)Yq zEB+a5jk)=xuM6V$5fj~a>6H?CmWcSk-1?`lOC&*cpthoFRDeWE2HBlD-#T@Ix!eU3 zcI!=4r&~&+h(DDg92Di+$g4DvjBX+^k?82I??p_^B;KPYL}+Ib!azb;wM(yKgz!8e z)DK8sw~i1-5kl?Q0EvVURuMww$^>(BLa3*gDco^O={Q2D^(KV(ZRDwhP&t_po_BP2 z;}I?*gzgg}RN;g$j1Wfb(lZ)7ArVHT=-DMY-iTdmou`cxk$o^jcz~os zAe{_dpV5s-Fz-$>)m1)9)=oXMBR+h{W(~msk~$tvEKwdeGJ-wrMM$V> z;!apfba&8QbM$>4!OwRpGBP;$HMtg~=?Z{l2OD}i0S%}ev zq%lqz!M5?wNKegO-PynSeS};BB^`s2uecag=QU-JCly?{v!Q`Zw9q3(k_sx3d z9t=Tsr>3l%O}bfC0(fwv@S(B^km!5hOjYy(S;rF0wIRd=xf-s(N~(NJ2+atgJrI6E z2@U76;k`15Yih+ChaU|PUXwg(R*>Tf}_`{Y>OB24jVaJ^mZvF9L_lC-GoAm6h)(1 zix+(nH>z__`4ysfr+dkRv`cCDDkulndh}Y?vgb0U9Sug6vB#auH`K`3me=(eM{g6L zCbD<^){2TxaG}*qpEVgtPzoXTCt4P++KV79nNa?`uN%c&@a$e5eYk88x)@&dr*C4( z6FQ?i_5G09%!^y}fe?WsbThw9`!uhRmOUR5%h2v8J{CrQ2XKKCtn*(Fq3WJU`k1HR;M-LxsBu(uD@J+o0Llz6*E=5wVgK1VJGpjqM0;U7)2`&ZxcpWS$37$_#E)~r zUiwX>C`-RhBnI~4X<#FsmJ$=M;zpZKd_8L`u(BG2xFs;YZ@X)jE?+SWn4 zx$qyzOL)pC1+)Jv>c@e@=>X=EyYHPnWoL5Pwg_%4ZKjSii?~AT_QvpiT+*N}7yC;T z4-eG6vSRu+G+{Qm`q|W=H?FZ3%6Sj$dycSRO1g8PwLgL4AnFm{IOoN_C2Pvg-I}Ih zSm^q|VZzsD$d_p0fZHUaB{*W^aZ5e-EsDkkSnZN`Q7CA&)DKd|;{NLaUS}3(Rp9o8 zCr!>eUR*gV=nZatX#e4F@9&D2ndId3+KP&Ao4>>C;I)^g?0m^<7~Lz-A3U=Bob}7$ z>|RXlj{_aj+A#e`M8d)oPUYyZakCenD6X7C*BqXbJlt!VrakKhU-i5b%3CkW4ak&T zT+rmD<|?b=3~U^%k5%z#tuC9TYV<(E+Zz0ln^`)b2NJ3C+E}Pm0r=wp8HTl$wT?@` zA4znUdpp^2>n!OH-nilLBrr2(P6(rb(2EduAcWn4kj$NRCxmX6m(8jOq2Ns%<0X1R zs8Rxb_b|pp@t3z1m6&0Z)l;TQyU9IfAGL&5WXfX-9!5fe2y_gX2)63+I#fo z(cm2~QCom}j*l|;Myrv`0b?Cq^Wi-bdENl^JX zyUEg8Ka8G@Coz85*^Y84#B?$%=Mi@&di=}L&ipThfFnaV+C~p$R!*m)Ydkkzm z$@m~2_1j%G+vNoO6DbFwRK@)haHaJkY7V!t7=c-pVP*zq;IN~7|}+0=fWIOi0mfxqv(jYV&8b$CnQoJ6tRYJ zF2v!#I@=kXh{Ioq>aZjzR zqX?m@J0T3Tv7^Y_l4AE#=y`-?gs{U1wRQ<13?zg|zm(q-!bC#o*8B3htAtR<=|kfs zTL@uyLa5WXvDilDHjZi*Rdl>$H0|5Z*9mPeuiMK%<7)_cyu_O%oSH=YK5t`zVi2Qk zG>XEAf5z7d-?p*vAb;XUcWSt=tgIu0h=~|t0;SbQBzz@N?Wj;A17$2J4>&C^uRBfH z>PXfssf`7yG=Qc{?hHkj&Y1|yqRuRPXk(YP0g^yH25t@3@sfwck->~G+zV72$bWGo z2}K>dJTry_!`me{($e}Ssh1&$mZCmG5d}E&csN1vx$cBSLGP|GXO~Dvp=(PND7G6= z8-cKayVyvJit~2QE2!B7?sO@w)y$5miIr%LcwF`OBlS|Rcij! zW)a;{^R))@=E}!JT2%=X5b~+9{#=rPa6KVZF?#`_IWJ(xi zL-b+8m6>182+BkAZt9h@v9_`kNHQ>7^udiHbY(YqchNb!hPcS-le|S?jlDgCq|q2Rl6&nYSgoI{@-4SNd$M zdx{sEOmUZ9)$za86y@;y-zAw4GLHsJJ|d#%H-e;&A74V!AOBOP`X3+=QmgF!KQj4$ zP9}~w>E$?nKa(57?>*Ju`Ue{2^s9vW4|z*f9n5AajQLByuw(g)sjPsJz&UKKG_Jf0 z7j%6oI@z>xA>7UERl*GzVbEl_NQT5Mp(m)U`DITqsCg0+oq%t+h4i7Z@%8*Lzq!fu zSVg&R?CTF5(&<&U8=765Z7#Xlr~{U36YSa}95{`Z8`6xTz@%3Cj7I zZqa;?vi&)35ZyozPu1}Ryx3~0aJrYAMrB8n+RU@4;asQ`?(lR31o8-RYvRqjN0zVp zjy|h!sY98|mcefvtmHj=gMCWFm;A@5+&Q4SRoCmyye`NeD-Qr7qFlz0_{6fDSb+EB zMuQB#$ezka+L0*jWitMr6nHsi3BhMvq8AKwzbQjX{&>X-pQ5^*xST^%6Ztq}M@6Cm z35=4`D%J^&g*o6@83^_V@E+-zQE}(CoAIsOAWLa)@8(}|U0_$YYiS4~t8hk{ZSY<|yHM2?ul~a6-vzK`Hu939 znP0BmRJb{&$PJ;Pi=+MokxU#Y%>KKTJ zB|<}PjE>MA&OJ4K!*v316Po|X*?rT1iby1GxiGT8lI!dNYk zt75Qw^Y{pDJ31-WgdEpxVEVd#Bx`1v3W8ICkB*;l**LHYnfxI7i1YEKa;-ks~_h(H|x^s#vOF*v6^ zBgKulm!kF(H?G9Suf&Zqu$U}v{1nISGp=Q0cf#o3vQg!ZjrUC(f$^bfBQVB^>ed|? z>ny~LQP?=I_QQ8gXzaJY$*piZssdJ9ABB6Re7@pzk1&2ApmA_)U-NV&;dU{Y^c zH%i>-KpWu%1dEN{gN;gYBZQ_G8RTwmlqw6wjRi373nE4Z?&Z8NVH|>uB8OZ)Hck*X z{;ZKI2bo5A)=?Z~;s}q4=Td^=`iyv>D!3YbKaq_m*!YdO@lP9DuGq3g*>8ci0iu>3 z#C6pvy;_ugA}Bp0vZ(4AF4-y`y~-aW(b82jFcO`KYAr^h`*4I$suA}&;!10cxX%$^ zFw}_q9D%P!+^3a>OD67aIZ~x}p1IaVWGNGp8PK9?43~yExyaxhMAWIGWFmkfQbeeQ zqsizZf@#D7cNI6%wfi4TBM7*UX#_X0Q#`{ea4{{QhK0sh%DR^VBBfi2#jY< zBRC#efMkaypGGc&ZxgrSQzEzF;t2N$-Newry~4;ef>nA&GPACXSG2xpizA zu{?8=88Q_1H$?s6pp>TA$PL0aQD*F79}iP(L>5va#>S?1jZaIm77cBmTUTH3=!-=4 zxh|xK|0N2m%6B zC341U#}+Uu+_`>X!Zti|Os5dt-VwCX!oa+K2e_T$HsQ zZ(<*b_*Bw7^XnqM3>EXCYZl?(< zLQu|T@stMG*cy--8QF+LR4+6w?GrAE+bb?TCbKonhYE6$emgx40k}9!1xeg)7sFfKb!zHo3TTWDk>yM2{ zKa9G*Z=_6cfh!ON zZ#a>c_#`^QSL-ZF6rTal^nN+!Q#%Q-PK-tw?Rj!$Y~zc5eMC;;dSYha)Q+JV6R@kEr^JnaP^4P;~x6UV()e$Z;m4)OOE58`Y4IX z+bnM0ij}wO)8CA!wjXMu(I+oaYI~q;pwpU#7n~8Bj9{P3Y&{n3Nj##-`szZ(4iSdi zpp!E#u6~uJX=saF%XktuCPbEO+|hQ!N+>+NctS-X56?|H+FNmN^G)?YcQ|$`LD-)f zsrRxD*6$WxsQ7q|_tW)XTJdpU;@HTN^d)k%1ZUL02upB?OVX@OzNs#avApZdJ1(W~ z$(aX$wC8~`cc(QAe9&JE#&AK-dfY)Hz^WeMxdrwoteSh|ugx!8?GrY1T9cooi=c|3 z1s#)s z0;hMn#=xLyBlDgpq#ghLuKhkLJaJojQ%MG5$-}7b`%h~WzRi2=-o3rE{vJyCzWW>+ z(OBGjyZ1?Y!3DCG=Ou1wimf(#QQP$M_t{s68om_#`*Q`B7vr?$qDM z&htp+LX6W$CENuHbshlNS(uJQaOzKu3CI{<3UMRs6aVNf;41kL{N0$D`XDH8q2OvW z;Px~Ah3fZEgC*@yO~+W1>vmy_=G~bGJl#+<_OtyZ=teY8FtrX-)Lky#r1fk!>VY4u zbECRW#~W)JgZUzu75&ZTgV^uzX>j2&`nH>~AHmaaBdU3@&@Z!(mUQ2b8hdQDJw2tF zgG(R8(BCWrk@6rCApCT}U~b3hVyYqW84Gfx8X%O*{4)gt`{N>2zddL83)QSY?hyCe zlliwn&R2Xdi-jUX{#;#3K$y1x8P})6d8lmb|7xO(!xVv!6r4-TK-C4lN`-rS6U)*Y zcwk?Yg;bl}AR<1(Jyv|yk{2e7=ME3%-#Rhf*yyA)(BFxbs|Dtxo>VBL)4vzyG6gW_ zAHDh&S2dMgF7+oU(FFwM@rG;Ht`*yITf92l#AV=HF?^@|{wqar?&xaSjcaox6aP4D z+#hv62DbrhxgizqFrdf9-UOzqU^&1bt3QlyUGOu!yTAEnoj#@wt(-r(>^vO?=bGkR zeyYLp5mLn;u71_@p=|NgT3W4bKl!*k^A95t_KFTS!-C%26NvaFuKR(qGYyvWv!D-4 zWz!GP`De`QcFbZT&Y2aXU%W2JG`i_lpBS4F~ zD5lMiai`5f@6AkwH`*zX{n1PK!ROfX_!OQfq}^moZVG6B@Jpz5uSv`|*Y@a!J4evC z<;&iz4_Wck9Etu$eYd2=3Yv_}uMNm;24%avJKS7F4kdZZ0eVmu16ITLB;G9!4BZDH z=PCwiw`-zrN4KM=C!P;<3QkCTyR?)=Rl1FvS07bbmz>G}!;;bK@HxC^aBf(2uQSm3 zFdWE;pqwq5H2DM)q2WZ3c{42I{EF`TZ^GMYo~xe_ME!60G;NFK$P&4Od~y0vSeN`- zow}>+z_p%}dF5K}7nJw5{_z(o1Fnr^#9XWOp>1xG6Q7n3%i|4>IaV${W_&|Z4fEoE p|3}6B&x-%Svi~EX|L+9!PLkH^N=8h#+xc|(qelb>9S=~&|2N0r#IXPX literal 0 HcmV?d00001 diff --git a/mindsdb/integrations/handlers/moss_handler/moss_handler.py b/mindsdb/integrations/handlers/moss_handler/moss_handler.py new file mode 100644 index 0000000000..de6ff0a3e1 --- /dev/null +++ b/mindsdb/integrations/handlers/moss_handler/moss_handler.py @@ -0,0 +1,320 @@ +import ast +import asyncio +import hashlib +import time +from concurrent.futures import ThreadPoolExecutor +from typing import List, Optional + +import pandas as pd + +try: + from moss import ( + DocumentInfo, + GetDocumentsOptions, + MossClient, + MutationOptions, + QueryOptions, + ) +except ImportError: + pass # captured as import_error in __init__.py + +from mindsdb.integrations.handlers.moss_handler.settings import MossHandlerConfig +from mindsdb.integrations.libs.response import RESPONSE_TYPE +from mindsdb.integrations.libs.response import HandlerResponse as Response +from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse +from mindsdb.integrations.libs.vectordatabase_handler import ( + FilterCondition, + FilterOperator, + TableField, + VectorStoreHandler, +) +from mindsdb.utilities import log + +logger = log.getLogger(__name__) + +_executor = ThreadPoolExecutor(max_workers=4) + + +def _run(coro): + future = _executor.submit(asyncio.run, coro) + return future.result() + + +class MossHandler(VectorStoreHandler): + """MindsDB handler for Moss semantic search.""" + + name = "moss" + + def __init__(self, name: str, **kwargs): + super().__init__(name) + self._client = None + self.is_connected = False + self._loaded_indexes: set = set() + + config = self._validate_config(name, **kwargs) + self._project_id = config.project_id + self._project_key = config.project_key + self._alpha = config.alpha + + def _validate_config(self, name: str, **kwargs) -> MossHandlerConfig: + connection_data = dict(kwargs.get("connection_data", {})) + connection_data["vector_store"] = name + return MossHandlerConfig(**connection_data) + + # ------------------------------------------------------------------ + # Connection lifecycle + # ------------------------------------------------------------------ + + def connect(self): + if self.is_connected: + return self._client + try: + self._client = MossClient(self._project_id, self._project_key) + self.is_connected = True + return self._client + except Exception as e: + self.is_connected = False + raise Exception(f"Error connecting to Moss: {e}") + + def disconnect(self): + self._client = None + self.is_connected = False + self._loaded_indexes.clear() + + def check_connection(self) -> StatusResponse: + response = StatusResponse(False) + need_to_close = not self.is_connected + try: + client = self.connect() + _run(client.list_indexes()) + response.success = True + except Exception as e: + logger.error(f"Error connecting to Moss: {e}") + response.error_message = str(e) + finally: + if response.success and need_to_close: + self.disconnect() + if not response.success and self.is_connected: + self.is_connected = False + return response + + def _ensure_loaded(self, index_name: str): + if index_name not in self._loaded_indexes: + try: + _run(self._client.load_index(index_name)) + except Exception as e: + if "not found" in str(e).lower(): + raise Exception( + f"Index '{index_name}' not found in Moss. " + "Insert documents first: INSERT INTO moss_db. (content) VALUES (...)" + ) from e + raise + self._loaded_indexes.add(index_name) + + def _wait_for_index_ready(self, index_name: str, timeout: int = 180): + """Poll get_index until status is Ready.""" + deadline = time.time() + timeout + while time.time() < deadline: + try: + info = _run(self._client.get_index(index_name)) + if info.status == "Ready": + return + if str(info.status).lower() == "failed": + raise Exception(f"Moss index build failed for '{index_name}'") + except Exception as e: + if "index build failed" in str(e).lower(): + raise + # Index not visible yet — keep polling + time.sleep(3) + raise Exception(f"Timed out waiting for Moss index '{index_name}' to be ready") + + def _index_exists(self, index_name: str) -> bool: + try: + indexes = _run(self._client.list_indexes()) + return any(idx.name == index_name for idx in (indexes or [])) + except Exception: + return False + + def _translate_metadata_filters(self, conditions: List[FilterCondition]) -> Optional[dict]: + if not conditions: + return None + + prefix = TableField.METADATA.value + "." + meta_conditions = [c for c in conditions if c.column.startswith(prefix)] + if not meta_conditions: + return None + + op_map = { + FilterOperator.EQUAL: "$eq", + FilterOperator.NOT_EQUAL: "$ne", + FilterOperator.LESS_THAN: "$lt", + FilterOperator.LESS_THAN_OR_EQUAL: "$lte", + FilterOperator.GREATER_THAN: "$gt", + FilterOperator.GREATER_THAN_OR_EQUAL: "$gte", + FilterOperator.IN: "$in", + } + + translated = [] + for cond in meta_conditions: + field = cond.column.split(".", 1)[-1] + op = op_map.get(cond.op) + if op: + translated.append({"field": field, "condition": {op: cond.value}}) + + if not translated: + return None + return {"$and": translated} if len(translated) > 1 else translated[0] + + def _parse_metadata(self, raw) -> dict: + if raw is None or (isinstance(raw, float) and pd.isna(raw)): + return {} + if isinstance(raw, dict): + return raw + try: + parsed = ast.literal_eval(str(raw)) + return parsed if isinstance(parsed, dict) else {} + except (ValueError, SyntaxError): + return {} + + # ------------------------------------------------------------------ + # VectorStoreHandler abstract methods + # ------------------------------------------------------------------ + + def select( + self, + table_name: str, + columns: List[str] = None, + conditions: List[FilterCondition] = None, + offset: int = None, + limit: int = None, + ) -> pd.DataFrame: + self.connect() + self._ensure_loaded(table_name) + + content_filter = None + id_filters = [] + moss_filter = self._translate_metadata_filters(conditions) + + if conditions: + for cond in conditions: + if cond.column == TableField.CONTENT.value: + content_filter = cond + elif cond.column == TableField.ID.value: + if cond.op == FilterOperator.EQUAL: + id_filters.append(str(cond.value)) + elif cond.op == FilterOperator.IN: + id_filters.extend(str(v) for v in cond.value) + + if content_filter is not None: + query_text = content_filter.value + if isinstance(query_text, list): + query_text = query_text[0] + + opts = QueryOptions( + top_k=limit or 10, + alpha=self._alpha, + **({"filter": moss_filter} if moss_filter else {}), + ) + search_result = _run(self._client.query(table_name, query_text, opts)) + results = (search_result.docs if search_result is not None else []) or [] + + # Moss score is 0-1 (higher = better); map to distance (lower = better) + df = pd.DataFrame({ + TableField.ID.value: [r.id for r in results], + TableField.CONTENT.value: [r.text for r in results], + TableField.METADATA.value: [r.metadata for r in results], + TableField.EMBEDDINGS.value: [None] * len(results), + TableField.DISTANCE.value: [1.0 - (r.score or 0.0) for r in results], + }) + else: + get_opts = GetDocumentsOptions(doc_ids=id_filters) if id_filters else None + docs = _run(self._client.get_docs(table_name, get_opts)) or [] + + if limit is not None: + docs = docs[offset or 0: (offset or 0) + limit] + + df = pd.DataFrame({ + TableField.ID.value: [d.id for d in docs], + TableField.CONTENT.value: [d.text for d in docs], + TableField.METADATA.value: [d.metadata for d in docs], + TableField.EMBEDDINGS.value: [None] * len(docs), + }) + + if columns: + available = [c for c in columns if c in df.columns] + if available: + df = df[available] + + return df + + def insert(self, table_name: str, df: pd.DataFrame) -> Response: + self.connect() + + documents = [] + for _, row in df.iterrows(): + content = str(row.get(TableField.CONTENT.value, "")) + doc_id = row.get(TableField.ID.value) + doc_id_str = ( + str(doc_id) + if doc_id is not None and not (isinstance(doc_id, float) and pd.isna(doc_id)) + else hashlib.sha256(content.encode()).hexdigest() + ) + documents.append(DocumentInfo( + id=doc_id_str, + text=content, + metadata=self._parse_metadata(row.get(TableField.METADATA.value)), + )) + + if self._index_exists(table_name): + _run(self._client.add_docs(table_name, documents, MutationOptions(upsert=True))) + else: + _run(self._client.create_index(table_name, documents)) + + self._wait_for_index_ready(table_name) + _run(self._client.load_index(table_name)) + self._loaded_indexes.add(table_name) + + return Response(RESPONSE_TYPE.OK, affected_rows=len(df)) + + def delete(self, table_name: str, conditions: List[FilterCondition] = None): + self.connect() + if not conditions: + raise Exception("Delete requires at least one condition") + + id_filters = [] + for cond in conditions: + if cond.column != TableField.ID.value: + continue + if cond.op == FilterOperator.EQUAL: + id_filters.append(str(cond.value)) + elif cond.op == FilterOperator.IN: + id_filters.extend(str(v) for v in cond.value) + + if not id_filters: + raise Exception("Moss delete only supports filtering by id") + + _run(self._client.delete_docs(table_name, id_filters)) + self._wait_for_index_ready(table_name) + self._loaded_indexes.discard(table_name) + + def create_table(self, table_name: str, if_not_exists=True): + pass # index is created lazily on first insert + + def drop_table(self, table_name: str, if_exists=True): + self.connect() + try: + _run(self._client.delete_index(table_name)) + self._loaded_indexes.discard(table_name) + except Exception as e: + if if_exists: + return + raise Exception(f"Failed to delete Moss index '{table_name}': {e}") + + def get_tables(self) -> Response: + self.connect() + indexes = _run(self._client.list_indexes()) or [] + df = pd.DataFrame({"table_name": [idx.name for idx in indexes]}) + return Response(resp_type=RESPONSE_TYPE.TABLE, data_frame=df) + + def get_columns(self, table_name: str) -> Response: + return super().get_columns(table_name) diff --git a/mindsdb/integrations/handlers/moss_handler/requirements.txt b/mindsdb/integrations/handlers/moss_handler/requirements.txt new file mode 100644 index 0000000000..407bc63d23 --- /dev/null +++ b/mindsdb/integrations/handlers/moss_handler/requirements.txt @@ -0,0 +1 @@ +moss diff --git a/mindsdb/integrations/handlers/moss_handler/settings.py b/mindsdb/integrations/handlers/moss_handler/settings.py new file mode 100644 index 0000000000..9f63e962c5 --- /dev/null +++ b/mindsdb/integrations/handlers/moss_handler/settings.py @@ -0,0 +1,32 @@ +import difflib +from typing import Any + +from pydantic import BaseModel, ConfigDict, field_validator, model_validator + + +class MossHandlerConfig(BaseModel): + model_config = ConfigDict(extra="forbid") + + vector_store: str + project_id: str + project_key: str + alpha: float = 0.8 + + @model_validator(mode="before") + @classmethod + def check_param_typos(cls, values: Any) -> Any: + expected = set(cls.model_fields.keys()) + for key in values.keys(): + if key not in expected: + close = difflib.get_close_matches(key, expected, cutoff=0.4) + hint = f" Did you mean '{close[0]}'?" if close else "" + raise ValueError(f"Unexpected parameter '{key}'.{hint}") + return values + + @field_validator("alpha", mode="before") + @classmethod + def validate_alpha(cls, v: Any) -> float: + v = float(v) + if not 0.0 <= v <= 1.0: + raise ValueError("alpha must be between 0.0 and 1.0") + return v diff --git a/mindsdb/integrations/handlers/moss_handler/tests/__init__.py b/mindsdb/integrations/handlers/moss_handler/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/mindsdb/integrations/handlers/moss_handler/tests/test_moss_handler.py b/mindsdb/integrations/handlers/moss_handler/tests/test_moss_handler.py new file mode 100644 index 0000000000..88eb1b1b68 --- /dev/null +++ b/mindsdb/integrations/handlers/moss_handler/tests/test_moss_handler.py @@ -0,0 +1,333 @@ +import unittest +from unittest.mock import MagicMock, patch +import pandas as pd + +from mindsdb.integrations.handlers.moss_handler.moss_handler import MossHandler +from mindsdb.integrations.libs.response import RESPONSE_TYPE +from mindsdb.integrations.libs.vectordatabase_handler import FilterCondition, FilterOperator, TableField + + +HANDLER_KWARGS = { + "connection_data": { + "project_id": "test-project-id", + "project_key": "test-project-key", + "alpha": "0.8", + } +} + + +def _make_handler(): + return MossHandler(name="test_moss", **HANDLER_KWARGS) + + +def _mock_doc(id="doc-1", text="hello world", metadata=None, score=0.9): + doc = MagicMock() + doc.id = id + doc.text = text + doc.metadata = metadata or {} + doc.score = score + return doc + + +def _mock_index(name="my_index", status="Ready", doc_count=1): + idx = MagicMock() + idx.name = name + idx.status = status + idx.doc_count = doc_count + return idx + + +def _mock_search_result(docs): + result = MagicMock() + result.docs = docs + return result + + +class TestMossHandlerConnection(unittest.TestCase): + @patch("mindsdb.integrations.handlers.moss_handler.moss_handler.MossClient") + def test_connect(self, MockClient): + handler = _make_handler() + handler.connect() + MockClient.assert_called_once_with("test-project-id", "test-project-key") + self.assertTrue(handler.is_connected) + + @patch("mindsdb.integrations.handlers.moss_handler.moss_handler.MossClient") + def test_connect_idempotent(self, MockClient): + handler = _make_handler() + handler.connect() + handler.connect() + MockClient.assert_called_once() + + @patch("mindsdb.integrations.handlers.moss_handler.moss_handler._run") + @patch("mindsdb.integrations.handlers.moss_handler.moss_handler.MossClient") + def test_check_connection_success(self, _mock_client, mock_run): + mock_run.return_value = [] + handler = _make_handler() + res = handler.check_connection() + self.assertTrue(res.success) + + @patch("mindsdb.integrations.handlers.moss_handler.moss_handler._run") + @patch("mindsdb.integrations.handlers.moss_handler.moss_handler.MossClient") + def test_check_connection_failure(self, _mock_client, mock_run): + mock_run.side_effect = Exception("auth failed") + handler = _make_handler() + res = handler.check_connection() + self.assertFalse(res.success) + self.assertIn("auth failed", res.error_message) + + +class TestMossHandlerInsert(unittest.TestCase): + def _make_connected_handler(self, mock_run): + handler = _make_handler() + handler._client = MagicMock() + handler.is_connected = True + mock_run.side_effect = self._run_side_effects(mock_run) + return handler + + @staticmethod + def _run_side_effects(_mock_run): + # list_indexes → [] (index doesn't exist yet), get_index → Ready + ready_index = _mock_index(status="Ready") + calls = iter([[], None, ready_index]) + return lambda coro: next(calls) + + @patch("mindsdb.integrations.handlers.moss_handler.moss_handler._run") + def test_insert_creates_index_when_new(self, mock_run): + handler = _make_handler() + handler._client = MagicMock() + handler.is_connected = True + + # list_indexes → [], create_index → None, get_index → Ready, load_index → None + ready = _mock_index(status="Ready") + mock_run.side_effect = [[], None, ready, None] + + df = pd.DataFrame({ + TableField.ID.value: ["doc-1"], + TableField.CONTENT.value: ["MindsDB unifies AI and data"], + TableField.METADATA.value: ['{"category": "docs"}'], + }) + res = handler.insert("my_index", df) + + handler._client.create_index.assert_called_once() + self.assertEqual(res.resp_type, RESPONSE_TYPE.OK) + + @patch("mindsdb.integrations.handlers.moss_handler.moss_handler._run") + def test_insert_upserts_when_index_exists(self, mock_run): + handler = _make_handler() + handler._client = MagicMock() + handler.is_connected = True + + existing = _mock_index(name="my_index", status="Ready") + ready = _mock_index(status="Ready") + # list_indexes → [existing], add_docs → None, get_index → Ready, load_index → None + mock_run.side_effect = [[existing], None, ready, None] + + df = pd.DataFrame({ + TableField.ID.value: ["doc-1"], + TableField.CONTENT.value: ["updated content"], + TableField.METADATA.value: [None], + }) + handler.insert("my_index", df) + handler._client.add_docs.assert_called_once() + + @patch("mindsdb.integrations.handlers.moss_handler.moss_handler._run") + def test_insert_generates_id_when_missing(self, mock_run): + handler = _make_handler() + handler._client = MagicMock() + handler.is_connected = True + + ready = _mock_index(status="Ready") + mock_run.side_effect = [[], None, ready, None] + + df = pd.DataFrame({ + TableField.CONTENT.value: ["no id provided"], + }) + handler.insert("my_index", df) + + call_args = handler._client.create_index.call_args + docs = call_args[0][1] + self.assertEqual(len(docs), 1) + self.assertIsNotNone(docs[0].id) + self.assertNotEqual(docs[0].id, "") + + +class TestMossHandlerSelect(unittest.TestCase): + @patch("mindsdb.integrations.handlers.moss_handler.moss_handler._run") + def test_select_semantic_search(self, mock_run): + handler = _make_handler() + handler._client = MagicMock() + handler.is_connected = True + handler._loaded_indexes.add("my_index") + + results = [_mock_doc("doc-1", "MindsDB is great", score=0.95)] + mock_run.return_value = _mock_search_result(results) + + conditions = [FilterCondition( + column=TableField.CONTENT.value, + op=FilterOperator.EQUAL, + value="what is MindsDB?", + )] + df = handler.select("my_index", conditions=conditions, limit=5) + + handler._client.query.assert_called_once() + self.assertIn(TableField.CONTENT.value, df.columns) + self.assertIn(TableField.DISTANCE.value, df.columns) + self.assertAlmostEqual(df[TableField.DISTANCE.value].iloc[0], 0.05) + + @patch("mindsdb.integrations.handlers.moss_handler.moss_handler._run") + def test_select_score_to_distance_mapping(self, mock_run): + handler = _make_handler() + handler._client = MagicMock() + handler.is_connected = True + handler._loaded_indexes.add("my_index") + + mock_run.return_value = _mock_search_result([_mock_doc(score=1.0)]) + + conditions = [FilterCondition( + column=TableField.CONTENT.value, + op=FilterOperator.EQUAL, + value="query", + )] + df = handler.select("my_index", conditions=conditions) + self.assertAlmostEqual(df[TableField.DISTANCE.value].iloc[0], 0.0) + + @patch("mindsdb.integrations.handlers.moss_handler.moss_handler._run") + def test_select_get_all_docs(self, mock_run): + handler = _make_handler() + handler._client = MagicMock() + handler.is_connected = True + handler._loaded_indexes.add("my_index") + + docs = [_mock_doc("doc-1"), _mock_doc("doc-2")] + mock_run.return_value = docs + + df = handler.select("my_index") + + handler._client.get_docs.assert_called_once() + self.assertEqual(len(df), 2) + + @patch("mindsdb.integrations.handlers.moss_handler.moss_handler._run") + def test_select_by_id(self, mock_run): + handler = _make_handler() + handler._client = MagicMock() + handler.is_connected = True + handler._loaded_indexes.add("my_index") + + mock_run.return_value = [_mock_doc("doc-1")] + + conditions = [FilterCondition( + column=TableField.ID.value, + op=FilterOperator.EQUAL, + value="doc-1", + )] + handler.select("my_index", conditions=conditions) + + call_args = handler._client.get_docs.call_args + get_opts = call_args[0][1] + self.assertIn("doc-1", get_opts.doc_ids) + + @patch("mindsdb.integrations.handlers.moss_handler.moss_handler._run") + def test_select_uses_alpha(self, mock_run): + handler = _make_handler() + handler._client = MagicMock() + handler.is_connected = True + handler._loaded_indexes.add("my_index") + handler._alpha = 0.5 + + mock_run.return_value = _mock_search_result([]) + + conditions = [FilterCondition( + column=TableField.CONTENT.value, + op=FilterOperator.EQUAL, + value="query", + )] + handler.select("my_index", conditions=conditions) + + call_args = handler._client.query.call_args + query_opts = call_args[0][2] + self.assertEqual(query_opts.alpha, 0.5) + + +class TestMossHandlerDelete(unittest.TestCase): + @patch("mindsdb.integrations.handlers.moss_handler.moss_handler._run") + def test_delete_by_id(self, mock_run): + handler = _make_handler() + handler._client = MagicMock() + handler.is_connected = True + + ready = _mock_index(status="Ready") + mock_run.side_effect = [None, ready] + + conditions = [FilterCondition( + column=TableField.ID.value, + op=FilterOperator.EQUAL, + value="doc-1", + )] + handler.delete("my_index", conditions=conditions) + + handler._client.delete_docs.assert_called_once_with("my_index", ["doc-1"]) + + def test_delete_without_conditions_raises(self): + handler = _make_handler() + handler._client = MagicMock() + handler.is_connected = True + with self.assertRaises(Exception): + handler.delete("my_index", conditions=[]) + + def test_delete_without_id_filter_raises(self): + handler = _make_handler() + handler._client = MagicMock() + handler.is_connected = True + conditions = [FilterCondition( + column="metadata.category", + op=FilterOperator.EQUAL, + value="docs", + )] + with self.assertRaises(Exception): + handler.delete("my_index", conditions=conditions) + + +class TestMossHandlerTableOps(unittest.TestCase): + @patch("mindsdb.integrations.handlers.moss_handler.moss_handler._run") + def test_get_tables(self, mock_run): + handler = _make_handler() + handler._client = MagicMock() + handler.is_connected = True + + mock_run.return_value = [_mock_index("idx-a"), _mock_index("idx-b")] + + res = handler.get_tables() + self.assertEqual(list(res.data_frame["table_name"]), ["idx-a", "idx-b"]) + + @patch("mindsdb.integrations.handlers.moss_handler.moss_handler._run") + def test_drop_table(self, mock_run): + handler = _make_handler() + handler._client = MagicMock() + handler.is_connected = True + handler._loaded_indexes.add("my_index") + + mock_run.return_value = True + handler.drop_table("my_index") + + handler._client.delete_index.assert_called_once_with("my_index") + self.assertNotIn("my_index", handler._loaded_indexes) + + @patch("mindsdb.integrations.handlers.moss_handler.moss_handler._run") + def test_drop_table_if_exists_swallows_error(self, mock_run): + handler = _make_handler() + handler._client = MagicMock() + handler.is_connected = True + mock_run.side_effect = Exception("not found") + + handler.drop_table("my_index", if_exists=True) # should not raise + + def test_create_table_is_noop(self): + handler = _make_handler() + handler._client = MagicMock() + handler.is_connected = True + handler.create_table("my_index") # should not raise or call anything + handler._client.assert_not_called() + + +if __name__ == "__main__": + unittest.main() From b725772fb9cb011fe340346f91fe62c18480654f Mon Sep 17 00:00:00 2001 From: CoderOMaster Date: Wed, 6 May 2026 21:33:12 +0530 Subject: [PATCH 2/2] pinning moss version --- mindsdb/integrations/handlers/moss_handler/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mindsdb/integrations/handlers/moss_handler/requirements.txt b/mindsdb/integrations/handlers/moss_handler/requirements.txt index 407bc63d23..e2d3219c1e 100644 --- a/mindsdb/integrations/handlers/moss_handler/requirements.txt +++ b/mindsdb/integrations/handlers/moss_handler/requirements.txt @@ -1 +1 @@ -moss +moss==1.0.0 \ No newline at end of file