77//
88#include < queue>
99#include < boost/range/adaptor/reversed.hpp>
10- #include < rime/dict/prism.h>
1110#include < rime/algo/syllabifier.h>
11+ #include < rime/dict/corrector.h>
12+ #include < rime/dict/prism.h>
13+ #include " syllabifier.h"
1214
1315namespace rime {
16+ using namespace corrector ;
1417
1518using Vertex = pair<size_t , SpellingType>;
1619using VertexQueue = std::priority_queue<Vertex,
@@ -35,16 +38,36 @@ int Syllabifier::BuildSyllableGraph(const string &input,
3538 // record a visit to the vertex
3639 if (graph->vertices .find (current_pos) == graph->vertices .end ())
3740 graph->vertices .insert (vertex); // preferred spelling type comes first
38- else
41+ else {
42+ // graph->vertices[current_pos] = std::min(vertex.second, graph->vertices[current_pos]);
3943 continue ; // discard worse spelling types
44+ }
4045
4146 if (current_pos > farthest)
4247 farthest = current_pos;
4348 DLOG (INFO) << " current_pos: " << current_pos;
4449
4550 // see where we can go by advancing a syllable
4651 vector<Prism::Match> matches;
47- prism.CommonPrefixSearch (input.substr (current_pos), &matches);
52+ set<SyllableId> match_set;
53+ auto current_input = input.substr (current_pos);
54+ prism.CommonPrefixSearch (current_input, &matches);
55+ for (auto &m : matches) {
56+ match_set.insert (m.value );
57+ }
58+ if (enable_correction_) {
59+ Corrections corrections;
60+ corrector_->ToleranceSearch (prism, current_input, &corrections, 5 );
61+ for (const auto &m : corrections) {
62+ for (auto accessor = prism.QuerySpelling (m.first ); !accessor.exhausted (); accessor.Next ()) {
63+ if (accessor.properties ().type == kNormalSpelling ) {
64+ matches.push_back ({ m.first , m.second .length });
65+ break ;
66+ }
67+ }
68+ }
69+ }
70+
4871 if (!matches.empty ()) {
4972 auto & end_vertices (graph->edges [current_pos]);
5073 for (const auto & m : matches) {
@@ -56,15 +79,15 @@ int Syllabifier::BuildSyllableGraph(const string &input,
5679 ++end_pos;
5780 DLOG (INFO) << " end_pos: " << end_pos;
5881 bool matches_input = (current_pos == 0 && end_pos == input.length ());
59- SpellingMap spellings;
82+ SpellingMap& spellings (end_vertices[end_pos]) ;
6083 SpellingType end_vertex_type = kInvalidSpelling ;
6184 // when spelling algebra is enabled,
6285 // a spelling evaluates to a set of syllables;
6386 // otherwise, it resembles exactly the syllable itself.
6487 SpellingAccessor accessor (prism.QuerySpelling (m.value ));
6588 while (!accessor.exhausted ()) {
6689 SyllableId syllable_id = accessor.syllable_id ();
67- SpellingProperties props = accessor.properties ();
90+ EdgeProperties props ( accessor.properties () );
6891 if (strict_spelling_ &&
6992 matches_input &&
7093 props.type != kNormalSpelling ) {
@@ -74,20 +97,29 @@ int Syllabifier::BuildSyllableGraph(const string &input,
7497 props.end_pos = end_pos;
7598 // add a syllable with properties to the edge's
7699 // spelling-to-syllable map
77- spellings.insert ({syllable_id, props});
100+ if (match_set.find (m.value ) == match_set.end ()) {
101+ props.is_correction = true ;
102+ props.credibility = 0.01 ;
103+ }
104+ auto it = spellings.find (syllable_id);
105+ if (it == spellings.end ()) {
106+ spellings.insert ({syllable_id, props});
107+ } else {
108+ it->second .type = std::min (it->second .type , props.type );
109+ }
78110 // let end_vertex_type be the best (smaller) type of spelling
79111 // that ends at the vertex
80- if (end_vertex_type > props.type ) {
112+ if (end_vertex_type > props.type && !props. is_correction ) {
81113 end_vertex_type = props.type ;
82114 }
83115 }
84116 accessor.Next ();
85117 }
86118 if (spellings.empty ()) {
87119 DLOG (INFO) << " not spelt." ;
120+ end_vertices.erase (end_pos);
88121 continue ;
89122 }
90- end_vertices[end_pos].swap (spellings);
91123 // find the best common type in a path up to the end vertex
92124 // eg. pinyin "shurfa" has vertex type kNormalSpelling at position 3,
93125 // kAbbreviation at position 4 and kAbbreviation at position 6
@@ -121,6 +153,10 @@ int Syllabifier::BuildSyllableGraph(const string &input,
121153 // when there is a path of more favored type
122154 SpellingType edge_type = kInvalidSpelling ;
123155 for (auto k = j->second .begin (); k != j->second .end (); ) {
156+ if (k->second .is_correction ) {
157+ ++k;
158+ continue ; // Don't care correction edges
159+ }
124160 if (k->second .type > last_type) {
125161 j->second .erase (k++);
126162 }
@@ -245,4 +281,9 @@ void Syllabifier::Transpose(SyllableGraph* graph) {
245281 }
246282}
247283
284+ void Syllabifier::EnableCorrection (an<Corrector> corrector) {
285+ enable_correction_ = true ;
286+ corrector_ = std::move (corrector);
287+ }
288+
248289} // namespace rime
0 commit comments