Skip to content

percevalw/pysimstring

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

123 Commits
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

Tests PyPI License

SimString Python Package

Orginal version by chokkan and QuickUMLS.

This version removes the libiconv dependency which required a conda installation before installing the simstring package on Windows.

How to use

Install it:

pip install pysimstring

and use it:

import os
import tempfile
from pathlib import Path
from typing import Union

import pysimstring.simstring as simstring


class SimstringWriter:
    def __init__(self, path: Union[str, Path]):
        """
        A context class to write a simstring database

        Parameters
        ----------
        path: Union[str, Path]
            Path to database
        """
        os.makedirs(path, exist_ok=True)
        self.path = path

    def __enter__(self):
        path = os.path.join(self.path, "terms.simstring")
        self.db = simstring.writer(path, 3, False, True)
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.db.close()

    def insert(self, term):
        self.db.insert(term)


def test_simstring():
    terms = ["paracetamol", "doliprane"]
    path = tempfile.mkdtemp()

    with SimstringWriter(path) as ss_db:
        for term in terms:
            ss_db.insert("##" + term + "##")

    ss_reader = simstring.reader(os.path.join(path, "terms.simstring"))
    ss_reader.measure = getattr(simstring, "jaccard")
    ss_reader.threshold = 0.5

    assert ss_reader.retrieve("##paracetomol##") == ("##paracetamol##",)
    assert ss_reader.retrieve("##doliprano##") == ("##doliprane##",)

About

Python Simstring bindings for Linux, OS X and Windows

Resources

License

Stars

Watchers

Forks

Packages

 
 
 

Contributors