diff --git a/README.md b/README.md index 5069361..e96bf4c 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,37 @@ required**. ## What's new +Added new English back end: [merriam_webster](https://dictionaryapi.com/). This is the Merriam-Webster API and +as such is a very high-quality back end. However, it does require registration on their website as a developer in order to +gain access to the API Keys. They do explicitly state that it is free for non-commercial use up to 1,000 queries a day, which +should be sufficient for most needs. Make sure you select the `Thesaurus` api key, as that is what you will need in order for this +backend to work. + +In order to use this backend, add `merriam_webster` to `g:tq_enabled_backend` and set your api key to `g:tq_merriam_webster_api_key`, ex: + +``` +let g:tq_merriam_webster_api_key='cxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxa' +``` +**This backend cannot work without an API key.** + +The backend also returns antonyms using a different command: +``` +:ThesaurusQueryReplaceCurrentWordAntonym +``` + +By default, this is bound to: + +```vim +nnoremap ca :ThesaurusQueryReplaceCurrentWord +``` + +See **Usage** below for changing the bindings. + +If the word cannot be found, the API may sometimes return a list of word suggestions. These will be returned as `Unknown word` +and allow you to choose a replacement from the list. + +------- + Deleted `thesaurus_com` backend due to the legal warning from Thesaurus.com on the upstream package [thesarus](https://github.com/Manwholikespie/thesaurus). For existing users that still have `thesaurus_com` explicitly enabled, the @@ -202,6 +233,13 @@ backends is behaving properly. website didn't provide standard API to use. Hence functionality of this backend depends on whether the website owner will change the webpage design. This backend requires `bs4` *BeautifulSoup* dependency. +* **merriam\_webster** is an *English* thesaurus backend. It queries + [dictionaryapi.com](https://dictionaryapi.com/) for both synonym and antonym resources. + The api requires an api key that can be obtained by registering on their [website](https://dictionaryapi.com/). + Registration is free, but limits requests to 1,000 queries a day, which should be fine + for most users. When registering, make sure to select the "Thesaurus" api key. You will need + to assign that api key to `g:tq_merriam_webster_api_key` and manually add the + backend to `g:tq_enabled_backends` in your vimrc file. The thesaurus query plugin will go through the list `g:tq_enabled_backends` in sequence until a match is found. Unless user explicitly instruct, Next query @@ -239,6 +277,22 @@ originally defined priority, simply invoke command :ThesaurusQueryReset ``` +#### Caching + +In order to speed up results and avoid hitting the backend for repeated requests, the results +of a query can be cached. This is off by default. + +To enable caching, set the `tq_cache_results` value to some number. + +- `let g:tq_cache_results=-1`: (_Default_) Query results are not cached. +- `let g:tq_cache_results=0`: Query results are cached without limit. +- `let g:tq_cache_results=10`: The last 10 results are cached. You can set this to any positive number. + +Note: Separate caches are maintained for synonyms and antonyms (if the back end supports it). The cache setting +will apply to each cache separately. So if you specify `let g:tq_cache_results=10`, then the last 10 synonym and +antonym requests will be stored. Also, some back ends (Merriam-Webster, for example) always return both and will thus +be cached. + #### Online Backends Timeout Mechanism Timeout mechanism (configurable with `g:tq_online_backends_timeout`) is added diff --git a/autoload/thesaurus_query.vim b/autoload/thesaurus_query.vim index b5d58e2..84b3000 100644 --- a/autoload/thesaurus_query.vim +++ b/autoload/thesaurus_query.vim @@ -123,6 +123,14 @@ if !exists("g:tq_raise_backend_priority_if_synonym_found") let g:tq_raise_backend_priority_if_synonym_found=0 endif +" this variable sets whether the results will be cached +" -1: Results will not be cached +" 0: All results will be cached with no limit +" >=1: X results will be cached (Fifo logic) +if !exists("g:tq_cache_results") + let g:tq_cache_results=-1 +endif + " this variable is offered by core query handler. It's a list of " query_backends user want to enable, with the sequence of user prefered " priority. @@ -223,11 +231,15 @@ function! thesaurus_query#Thesaurus_Query_Restore_Handler() exec s:tq_use_python.'tq_framework.restore_thesaurus_query_handler()' endfunction -function! thesaurus_query#Thesaurus_Query_Lookup(word, replace) " {{{ +function! thesaurus_query#Thesaurus_Query_Lookup(word, replace, query_type) " {{{ " a:word word to be looked up " a:replace flag: " 0 - don't replace word under cursor " 1 - replace word under cursor +" a:query_type flag: +" 0 - synonyms +" 1 - antonyms + let l:query_type = a:query_type let l:replace = a:replace let l:trimmed_word = s:Trim(a:word) let l:word = substitute(tolower(l:trimmed_word), '"', '', 'g') @@ -245,17 +257,17 @@ tq_continue_query = 1 while tq_continue_query>0: vim.command("redraw") tq_next_query_direction = True if tq_continue_query==1 else False - tq_synonym_result = tq_framework.query(decode_utf_8(vim.eval("l:word")), tq_next_query_direction) + tq_synonym_result = tq_framework.query(decode_utf_8(vim.eval("l:word")), tq_next_query_direction, 0 if vim.eval('l:query_type') == '0' else 1) # Use Python environment for handling candidate displaying {{{ # mark for exit function if no candidate is found if not tq_synonym_result: - vim.command("echom \"No synonym found for \\\"{0}\\\".\"".format(vim.eval("l:trimmed_word").replace('\\','\\\\').replace('"','\\"'))) + vim.command("echom \"No results found for \\\"{0}\\\".\"".format(vim.eval("l:trimmed_word").replace('\\','\\\\').replace('"','\\"'))) vim.command("let l:syno_found=0") tq_framework.session_terminate() tq_continue_query = 0 # if replace flag is on, prompt user to choose after populating candidate list elif vim.eval('l:replace') != '0': - tq_continue_query = tq_interface.tq_replace_cursor_word_from_candidates(tq_synonym_result, tq_framework.good_backends[-1]) + tq_continue_query = tq_interface.tq_replace_cursor_word_from_candidates(tq_synonym_result, tq_framework.good_backends[-1], 0 if vim.eval('l:query_type') == '0' else 1) else: tq_continue_query = 0 tq_framework.session_terminate() diff --git a/autoload/thesaurus_query/backends/merriam_webster_lookup.py b/autoload/thesaurus_query/backends/merriam_webster_lookup.py new file mode 100644 index 0000000..754e928 --- /dev/null +++ b/autoload/thesaurus_query/backends/merriam_webster_lookup.py @@ -0,0 +1,88 @@ +# python wrapper for word query from dictionaryapi.com +# Author: Aaron Hayman [[ahayman@gmail.com][E-mail]] + + +try: + from urllib2 import urlopen + from urllib2 import URLError, HTTPError +except ImportError: + from urllib.request import urlopen + from urllib.error import URLError, HTTPError +import json +import socket +from ..tq_common_lib import fixurl, get_variable + +query_result_trunc=100 +identifier="merriam_webster" +language="en" + +_timeout_period_default = 1.0 +time_out_choice = float(get_variable('tq_online_backends_timeout', _timeout_period_default)) +api_key = get_variable('tq_merriam_webster_api_key', '') + +def query(target): + ''' + Queries the Merriam Webster API to retrieve thesaurus results for the target word. + Requires the `tq_merriam_webster_api_key` to be set to an appropriate value. + Returns Status code and two lists: synonyms and antonyms. Both lists are broken up into their + appropriate word definitions. + + Note: If no word matches the target, the API may return substitute words. If this happens, both + synonyms and antonyms will list those words under the "Unknown Word" heading. + ''' + if not target or target == '': + return [1, [], []] + target=target.replace(u" ", u"+") + result_list=_dictionary_api_wrapper(target) + if result_list == -1: + return [-1, [], []] + elif result_list == 1: + return [1, [], []] + else: + return _parser(result_list) + + +def _dictionary_api_wrapper(target): + if api_key == '': + return [-1, []] + try: + url = fixurl(u'https://www.dictionaryapi.com/api/v3/references/thesaurus/json/{0}?key={1}'.format(target, api_key)).decode('ASCII') + response = urlopen(url, timeout = time_out_choice).read() + result_list = json.loads(response.decode('utf-8')) + except HTTPError: + return 1 + except URLError as err: + if isinstance(err.reason, socket.timeout): + return 1 + print(err) + return -1 + except socket.timeout: # timeout only means underperforming + return 1 + except ValueError: + return -1 + return result_list + +def _parseAntonyms(result_dict): + defs = result_dict.get(u'shortdef', []) + ants_list = result_dict.get(u'meta', {}).get(u'ants', []) + length = min(len(defs), len(ants_list)) + return [ [ defs[idx]+' ('+ result_dict.get(u'fl', '') +')', ants_list[idx] ] for idx in range(length) if len(ants_list) > 0 ] + +def _parseSynonyms(result_dict): + defs = result_dict.get(u'shortdef', []) + syns_list = result_dict.get(u'meta', {}).get(u'syns', []) + length = min(len(defs), len(syns_list)) + return [ [ defs[idx]+' ('+ result_dict.get(u'fl', '') +')', syns_list[idx] ] for idx in range(length) if len(syns_list) > 0 ] + +def _parser(result): + if result is None or len(result) == 0: + return [1, [], []] + result_dict = result[0] + if not result_dict: + return [1, [], []] + if isinstance(result_dict, str): + return [0, [['Unknown word (did you mean):', result]], [['Unknown word (did you mean):', result]]] + return [ 0, + [pair for r_dict in result for pair in _parseSynonyms(r_dict)], + [pair for r_dict in result for pair in _parseAntonyms(r_dict)] + ] diff --git a/autoload/thesaurus_query/thesaurus_query.py b/autoload/thesaurus_query/thesaurus_query.py index 01b6913..efdafc4 100644 --- a/autoload/thesaurus_query/thesaurus_query.py +++ b/autoload/thesaurus_query/thesaurus_query.py @@ -21,6 +21,9 @@ _double_width_type = ["Lo"] query_session=list() +cache_results = int(get_variable('tq_cache_results', '-1')) +specified_language = get_variable("tq_language", ['en']) + class Thesaurus_Query_Handler: ''' Handler for thesaurus_query Description: @@ -32,6 +35,8 @@ def __init__(self): ''' Initialize handler, load all available backends. ''' self.restore_thesaurus_query_handler() self.query_backends = tq_backends.query_backends + self.synonym_cache=[] + self.antonym_cache=[] self._session_inited=False def session_init(self): @@ -41,8 +46,10 @@ def session_init(self): self.good_backends = [] self.bad_backends = [] self.backend_in_line = self.query_backend_priority[:] - self.last_valid_result = [] + self.last_valid_synonyms = [] + self.last_valid_antonyms = [] self._session_inited = True + self.cached_used = False def session_terminate(self): ''' Terminate a query session, adjust query backend priority according @@ -61,47 +68,65 @@ def session_terminate(self): del self.bad_backends del self.backend_in_line self._session_inited = False + self.cached_used = False - def query(self, word, next=True, use_cache=True): + def query(self, word, next=True, query_type=0): """ Query from enabled backend one by one until synonym found return: - synonym_list + result list for synonyms or antonyms, depending on query_type """ - found = False if not self._session_inited: self.session_init() # start a session if not started - # word not found, start searching - error_encountered = 0 -# use session-wise backend management to prepare for current query + + found = False + local_bad_backends=[] + antonym_list=[] + synonym_list=[] + + # Check cache first + if not self.cached_used and cache_results > -1: + current_cache = self.synonym_cache if query_type == 0 else self.antonym_cache + for cache_result in current_cache: + if cache_result[0] == word: + self.cached_used = True + self.good_backends = [cache_result[2] + ' (cached)'] + return cache_result[1] + # use session-wise backend management to prepare for current query if next: to_use_list = self.backend_in_line[:] success_list = self.good_backends[:] if len(to_use_list)==0: - return self.last_valid_result + return self.last_valid_synonyms if query_type == 0 else self.last_valid_antonyms else: to_use_list = self.good_backends[::-1] success_list = self.backend_in_line[::-1] if len(to_use_list)<=1: - return self.last_valid_result + return self.last_valid_synonyms if query_type == 0 else self.last_valid_antonyms success_list.append(to_use_list.pop(0)) - local_bad_backends=[] for query_backend_curr in to_use_list: # query each of the backend list till found - specified_language = get_variable("tq_language", ['en']) - if specified_language!="All" and \ - (self.query_backends[query_backend_curr].language not in \ - specified_language): + query_backend = self.query_backends.get(query_backend_curr, None) + if not query_backend: + continue + if specified_language!="All" and query_backend.language not in specified_language: continue - [state, synonym_list]=self.query_backends[query_backend_curr].query(word) + query_result = query_backend.query(word) + if (len(query_result) >= 3): + [state, synonym_list, antonym_list] = query_result + else: + [state, synonym_list] = query_result + antonym_list = [] + if query_type == 1: + state = 1 if state == -1: - error_encountered = 1 - local_bad_backends.append( - self.query_backends[query_backend_curr].identifier) + local_bad_backends.append(query_backend.identifier) continue if state == 0: + # Update caches + update_cache(self.antonym_cache, word, antonym_list, query_backend.identifier) + update_cache(self.synonym_cache, word, synonym_list, query_backend.identifier) found = True if next: - success_list.append( - self.query_backends[query_backend_curr].identifier) + success_list.append(query_backend.identifier) break if found and next: to_use_list.remove(success_list[-1]) @@ -120,15 +145,16 @@ def query(self, word, next=True, use_cache=True): self.good_backends = to_use_list[::-1] self.backend_in_line = success_list[::-1] if 'state' not in locals(): - if not self.last_valid_result: + if (query_type == 0 and not self.last_valid_synonyms) or not self.last_valid_antonyms: vim_command('echohl WarningMSG | echon "WARNING: " | echohl None | echon "No thesaurus source is used. Please check on your configuration on g:tq_enabled_backends and g:tq_language or b:tq_language.\n"') - return self.last_valid_result - if not synonym_list: # update last valid result if positive result is found - return self.last_valid_result + return self.last_valid_synonyms if query_type == 0 else self.last_valid_antonyms + if (query_type == 0 and not synonym_list) or (query_type == 1 and not antonym_list): # update last valid result if positive result is found + return self.last_valid_synonyms if query_type == 0 else self.last_valid_antonyms else: - self.last_valid_result=synonym_list + self.last_valid_synonyms=synonym_list + self.last_valid_antonyms=antonym_list - return synonym_list + return synonym_list if query_type == 0 else antonym_list def restore_thesaurus_query_handler(self): self.query_backend_priority = get_variable( @@ -140,6 +166,22 @@ def restore_thesaurus_query_handler(self): self.query_backend_priority.remove("mthesaur_txt") self.query_backend_priority.insert(0,"mthesaur_txt") +def update_cache(cache, word, results, backend): + """ Takes a cache, a word, and the results returned by a query for a backend and adds + them to the cache. + Will remove existing word results if it exists. + Will truncate the cache if cache_results has been set to a postive number + Will not cache results if results is empty + """ + if cache_results == -1 or not results: + return + index = next((i for i, v in enumerate(cache) if v[0] == word), None) + if index is not None: + cache.pop(index) + cache.append((word, results, backend)) + if cache_results > 0 and len(cache) > cache_results: + cache.pop(0) + def truncate_synonym_list(synonym_list): """ Truncate synonym_list according to user truncation settings return: @@ -208,7 +250,7 @@ def _double_width_char_count(word): dw_count += 1 return dw_count -def tq_replace_cursor_word_from_candidates(candidate_list, source_backend=None): +def tq_replace_cursor_word_from_candidates(candidate_list, source_backend=None, candidate_type=0): ''' populate candidate list, replace target word/phrase with candidate Description: Using vim's color message box to populate a candidate list from found @@ -232,7 +274,7 @@ def candidate_list_printing(result_IDed): for case in result_IDed: if case[0] != u"": vim_command('call thesaurus_query#echo_HL("Keyword|Found as: |Directory|{0}|None|\\n")'.format(send_string_to_vim(case[0]))) - vim_command('call thesaurus_query#echo_HL("Keyword|Synonyms: |None|")') + vim_command('call thesaurus_query#echo_HL("Keyword|{0}: |None|")'.format(send_string_to_vim("Synonyms" if candidate_type == 0 else "Antonyms"))) col_count = 10 col_count_max = int(vim.eval("&columns")) for synonym_i in case[1]: @@ -274,7 +316,7 @@ def obtain_user_choice(trunc_flag): if not thesaurus_user_choice: return 0 elif thesaurus_user_choice == "A": - tq_generate_thesaurus_buffer(candidate_list) + tq_generate_thesaurus_buffer(candidate_list, candidate_type) return 0 elif thesaurus_user_choice == "n": return 1 @@ -368,7 +410,7 @@ def scan_current_layer(find_tail): ]) return 0 -def tq_generate_thesaurus_buffer(candidates): +def tq_generate_thesaurus_buffer(candidates, candidate_type): ''' generate a buffer in Vim to show all found synonyms from query ''' if independent_session: # this module don't work in Vim independent session return None @@ -394,13 +436,13 @@ def tq_generate_thesaurus_buffer(candidates): tq_thesaurus_buffer = vim.current.buffer del tq_thesaurus_buffer[:] - def candidate_list_printing(word_list): + def candidate_list_printing(word_list, candidate_type): """ Append a list of synonyms to the end of buffer, Acceptable structure: [ word1, word2, word3, ... ] """ tq_thesaurus_buffer.append([""]) - tq_thesaurus_buffer[-1]='Synonyms:' + tq_thesaurus_buffer[-1]= 'Synonyms:' if candidate_type == 0 else 'Antonyms:' column_curr = 10 word_list_size = len(word_list) @@ -420,11 +462,11 @@ def candidate_list_printing(word_list): for case in candidates: tq_thesaurus_buffer.append([""]) if not case[0]: - candidate_list_printing(case[1]) + candidate_list_printing(case[1], candidate_type) continue tq_thesaurus_buffer.append([""]) tq_thesaurus_buffer[-1]='Found_as: {0}'.format(send_string_to_vim(case[0])) - candidate_list_printing(case[1]) + candidate_list_printing(case[1], candidate_type) vim_command("setlocal bufhidden=") vim_command("exec 'resize ' . (line('$'))") vim_command("nnoremap q :q") diff --git a/doc/thesaurus_query.txt b/doc/thesaurus_query.txt index 067e006..fe68d84 100644 --- a/doc/thesaurus_query.txt +++ b/doc/thesaurus_query.txt @@ -23,6 +23,7 @@ CONTENTS *thesaurus_query.vim* `g:tq_truncation_on_relavance` `g:tq_truncation_on_definition_num` `g:tq_truncation_on_syno_list_size` + `g:tq_cache_results` 4.2 Make Your Own Backend ........................... |tq-backend-format| 5. Licence.................................................. |tq-licence| =============================================================================== @@ -156,7 +157,7 @@ g:tq_enabled_backends~ query_backends user want to enable, with the sequence of user prefered priority. Please be careful not to mis-spell when setting this variable. available options: `openthesaurus_de`, `woxikon_de`, `jeck_ru`, `thesaurus_com`, - `datamuse_com`, `mthesaur_txt`, `cilin_txt` + `datamuse_com`, `mthesaur_txt`, `cilin_txt`, `merriam_webster` > let g:tq_enabled_backends = ["cilin_txt", \ "openthesaurus_de", \ @@ -267,6 +268,15 @@ g:tq_truncation_on_syno_list_size~ interface, thesaurus split will NOT be truncated. > let g:tq_truncation_on_syno_list_size = -1 < +g:tq_cache_results~ + This defined the behavior for caching query results. If value is + -1: results are not cached + 0: All results are cached + n>0: Only the last n results are cached + Note that caching limits apply to antonym and synonym results + separately. + let g:tq_cache_results = -1 +< ------------------------------------------------------------------------------- 4.2 Make Your Own Backend *tq-backend-format* diff --git a/plugin/thesaurus_query.vim b/plugin/thesaurus_query.vim index 89fe206..52e821f 100644 --- a/plugin/thesaurus_query.vim +++ b/plugin/thesaurus_query.vim @@ -30,14 +30,20 @@ endif " Expose our commands to the user " -------------------------------- " -command! ThesaurusQueryReplaceCurrentWord :call thesaurus_query#Thesaurus_Query_Lookup(expand(''), 1) -command! ThesaurusQueryLookupCurrentWord :call thesaurus_query#Thesaurus_Query_Lookup(expand(''), 0) +command! ThesaurusQueryReplaceCurrentWord :call thesaurus_query#Thesaurus_Query_Lookup(expand(''), 1, 0) +command! ThesaurusQueryLookupCurrentWord :call thesaurus_query#Thesaurus_Query_Lookup(expand(''), 0, 0) command! ThesaurusQueryReset :call thesaurus_query#Thesaurus_Query_Restore_Handler() -command! -nargs=1 Thesaurus :call thesaurus_query#Thesaurus_Query_Lookup(, 0) +command! -nargs=1 Thesaurus :call thesaurus_query#Thesaurus_Query_Lookup(, 0, 0) -command! -nargs=1 ThesaurusQueryReplace :call thesaurus_query#Thesaurus_Query_Lookup(, 1) +command! -nargs=1 ThesaurusQueryReplace :call thesaurus_query#Thesaurus_Query_Lookup(, 1, 0) +command! ThesaurusQueryReplaceCurrentWordAntonym :call thesaurus_query#Thesaurus_Query_Lookup(expand(''), 1, 1) +command! ThesaurusQueryLookupCurrentWordAntonym :call thesaurus_query#Thesaurus_Query_Lookup(expand(''), 0, 1) + +command! -nargs=1 ThesaurusAntonym :call thesaurus_query#Thesaurus_Query_Lookup(, 0, 1) + +command! -nargs=1 ThesaurusQueryReplaceAntonym :call thesaurus_query#(, 1, 1) " -------------------------------- " Map keys @@ -48,6 +54,11 @@ if g:tq_map_keys vnoremap cs "ky:ThesaurusQueryReplace k nnoremap cs :ThesaurusQueryReplaceCurrentWord vnoremap cs "ky:ThesaurusQueryReplace k + + nnoremap ca :ThesaurusQueryReplaceCurrentWordAntonym + vnoremap ca "ky:ThesaurusQueryReplaceAntonym k + nnoremap ca :ThesaurusQueryReplaceCurrentWordAntonym + vnoremap ca "ky:ThesaurusQueryReplaceAntonym k endif if g:tq_use_vim_autocomplete