13Aluminium commited on
Commit
5439334
·
verified ·
1 Parent(s): 2b6a295

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -0
app.py CHANGED
@@ -29,6 +29,16 @@ class SanskritMorphemeTokenizer:
29
  """
30
  if not word:
31
  return [self.unk_token]
 
 
 
 
 
 
 
 
 
 
32
 
33
  from functools import lru_cache
34
 
 
29
  """
30
  if not word:
31
  return [self.unk_token]
32
+ if word in self.morpheme_vocab:
33
+ return [word]
34
+
35
+ for i in range(len(word), 0, -1):
36
+ prefix = word[:i]
37
+ if prefix in self.morpheme_vocab:
38
+ remaining = word[i:]
39
+ return [prefix] + (self._segment_word(remaining) if remaining else [])
40
+ return [self.unk_token]
41
+
42
 
43
  from functools import lru_cache
44