| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469 |
- # Generated by Snowball 2.2.0 - https://snowballstem.org/
- from .basestemmer import BaseStemmer
- from .among import Among
- class LithuanianStemmer(BaseStemmer):
- '''
- This class implements the stemming algorithm defined by a snowball script.
- Generated by Snowball 2.2.0 - https://snowballstem.org/
- '''
- a_0 = [
- Among(u"a", -1, -1),
- Among(u"ia", 0, -1),
- Among(u"eria", 1, -1),
- Among(u"osna", 0, -1),
- Among(u"iosna", 3, -1),
- Among(u"uosna", 3, -1),
- Among(u"iuosna", 5, -1),
- Among(u"ysna", 0, -1),
- Among(u"\u0117sna", 0, -1),
- Among(u"e", -1, -1),
- Among(u"ie", 9, -1),
- Among(u"enie", 10, -1),
- Among(u"erie", 10, -1),
- Among(u"oje", 9, -1),
- Among(u"ioje", 13, -1),
- Among(u"uje", 9, -1),
- Among(u"iuje", 15, -1),
- Among(u"yje", 9, -1),
- Among(u"enyje", 17, -1),
- Among(u"eryje", 17, -1),
- Among(u"\u0117je", 9, -1),
- Among(u"ame", 9, -1),
- Among(u"iame", 21, -1),
- Among(u"sime", 9, -1),
- Among(u"ome", 9, -1),
- Among(u"\u0117me", 9, -1),
- Among(u"tum\u0117me", 25, -1),
- Among(u"ose", 9, -1),
- Among(u"iose", 27, -1),
- Among(u"uose", 27, -1),
- Among(u"iuose", 29, -1),
- Among(u"yse", 9, -1),
- Among(u"enyse", 31, -1),
- Among(u"eryse", 31, -1),
- Among(u"\u0117se", 9, -1),
- Among(u"ate", 9, -1),
- Among(u"iate", 35, -1),
- Among(u"ite", 9, -1),
- Among(u"kite", 37, -1),
- Among(u"site", 37, -1),
- Among(u"ote", 9, -1),
- Among(u"tute", 9, -1),
- Among(u"\u0117te", 9, -1),
- Among(u"tum\u0117te", 42, -1),
- Among(u"i", -1, -1),
- Among(u"ai", 44, -1),
- Among(u"iai", 45, -1),
- Among(u"eriai", 46, -1),
- Among(u"ei", 44, -1),
- Among(u"tumei", 48, -1),
- Among(u"ki", 44, -1),
- Among(u"imi", 44, -1),
- Among(u"erimi", 51, -1),
- Among(u"umi", 44, -1),
- Among(u"iumi", 53, -1),
- Among(u"si", 44, -1),
- Among(u"asi", 55, -1),
- Among(u"iasi", 56, -1),
- Among(u"esi", 55, -1),
- Among(u"iesi", 58, -1),
- Among(u"siesi", 59, -1),
- Among(u"isi", 55, -1),
- Among(u"aisi", 61, -1),
- Among(u"eisi", 61, -1),
- Among(u"tumeisi", 63, -1),
- Among(u"uisi", 61, -1),
- Among(u"osi", 55, -1),
- Among(u"\u0117josi", 66, -1),
- Among(u"uosi", 66, -1),
- Among(u"iuosi", 68, -1),
- Among(u"siuosi", 69, -1),
- Among(u"usi", 55, -1),
- Among(u"ausi", 71, -1),
- Among(u"\u010Diausi", 72, -1),
- Among(u"\u0105si", 55, -1),
- Among(u"\u0117si", 55, -1),
- Among(u"\u0173si", 55, -1),
- Among(u"t\u0173si", 76, -1),
- Among(u"ti", 44, -1),
- Among(u"enti", 78, -1),
- Among(u"inti", 78, -1),
- Among(u"oti", 78, -1),
- Among(u"ioti", 81, -1),
- Among(u"uoti", 81, -1),
- Among(u"iuoti", 83, -1),
- Among(u"auti", 78, -1),
- Among(u"iauti", 85, -1),
- Among(u"yti", 78, -1),
- Among(u"\u0117ti", 78, -1),
- Among(u"tel\u0117ti", 88, -1),
- Among(u"in\u0117ti", 88, -1),
- Among(u"ter\u0117ti", 88, -1),
- Among(u"ui", 44, -1),
- Among(u"iui", 92, -1),
- Among(u"eniui", 93, -1),
- Among(u"oj", -1, -1),
- Among(u"\u0117j", -1, -1),
- Among(u"k", -1, -1),
- Among(u"am", -1, -1),
- Among(u"iam", 98, -1),
- Among(u"iem", -1, -1),
- Among(u"im", -1, -1),
- Among(u"sim", 101, -1),
- Among(u"om", -1, -1),
- Among(u"tum", -1, -1),
- Among(u"\u0117m", -1, -1),
- Among(u"tum\u0117m", 105, -1),
- Among(u"an", -1, -1),
- Among(u"on", -1, -1),
- Among(u"ion", 108, -1),
- Among(u"un", -1, -1),
- Among(u"iun", 110, -1),
- Among(u"\u0117n", -1, -1),
- Among(u"o", -1, -1),
- Among(u"io", 113, -1),
- Among(u"enio", 114, -1),
- Among(u"\u0117jo", 113, -1),
- Among(u"uo", 113, -1),
- Among(u"s", -1, -1),
- Among(u"as", 118, -1),
- Among(u"ias", 119, -1),
- Among(u"es", 118, -1),
- Among(u"ies", 121, -1),
- Among(u"is", 118, -1),
- Among(u"ais", 123, -1),
- Among(u"iais", 124, -1),
- Among(u"tumeis", 123, -1),
- Among(u"imis", 123, -1),
- Among(u"enimis", 127, -1),
- Among(u"omis", 123, -1),
- Among(u"iomis", 129, -1),
- Among(u"umis", 123, -1),
- Among(u"\u0117mis", 123, -1),
- Among(u"enis", 123, -1),
- Among(u"asis", 123, -1),
- Among(u"ysis", 123, -1),
- Among(u"ams", 118, -1),
- Among(u"iams", 136, -1),
- Among(u"iems", 118, -1),
- Among(u"ims", 118, -1),
- Among(u"enims", 139, -1),
- Among(u"erims", 139, -1),
- Among(u"oms", 118, -1),
- Among(u"ioms", 142, -1),
- Among(u"ums", 118, -1),
- Among(u"\u0117ms", 118, -1),
- Among(u"ens", 118, -1),
- Among(u"os", 118, -1),
- Among(u"ios", 147, -1),
- Among(u"uos", 147, -1),
- Among(u"iuos", 149, -1),
- Among(u"ers", 118, -1),
- Among(u"us", 118, -1),
- Among(u"aus", 152, -1),
- Among(u"iaus", 153, -1),
- Among(u"ius", 152, -1),
- Among(u"ys", 118, -1),
- Among(u"enys", 156, -1),
- Among(u"erys", 156, -1),
- Among(u"\u0105s", 118, -1),
- Among(u"i\u0105s", 159, -1),
- Among(u"\u0117s", 118, -1),
- Among(u"am\u0117s", 161, -1),
- Among(u"iam\u0117s", 162, -1),
- Among(u"im\u0117s", 161, -1),
- Among(u"kim\u0117s", 164, -1),
- Among(u"sim\u0117s", 164, -1),
- Among(u"om\u0117s", 161, -1),
- Among(u"\u0117m\u0117s", 161, -1),
- Among(u"tum\u0117m\u0117s", 168, -1),
- Among(u"at\u0117s", 161, -1),
- Among(u"iat\u0117s", 170, -1),
- Among(u"sit\u0117s", 161, -1),
- Among(u"ot\u0117s", 161, -1),
- Among(u"\u0117t\u0117s", 161, -1),
- Among(u"tum\u0117t\u0117s", 174, -1),
- Among(u"\u012Fs", 118, -1),
- Among(u"\u016Bs", 118, -1),
- Among(u"t\u0173s", 118, -1),
- Among(u"at", -1, -1),
- Among(u"iat", 179, -1),
- Among(u"it", -1, -1),
- Among(u"sit", 181, -1),
- Among(u"ot", -1, -1),
- Among(u"\u0117t", -1, -1),
- Among(u"tum\u0117t", 184, -1),
- Among(u"u", -1, -1),
- Among(u"au", 186, -1),
- Among(u"iau", 187, -1),
- Among(u"\u010Diau", 188, -1),
- Among(u"iu", 186, -1),
- Among(u"eniu", 190, -1),
- Among(u"siu", 190, -1),
- Among(u"y", -1, -1),
- Among(u"\u0105", -1, -1),
- Among(u"i\u0105", 194, -1),
- Among(u"\u0117", -1, -1),
- Among(u"\u0119", -1, -1),
- Among(u"\u012F", -1, -1),
- Among(u"en\u012F", 198, -1),
- Among(u"er\u012F", 198, -1),
- Among(u"\u0173", -1, -1),
- Among(u"i\u0173", 201, -1),
- Among(u"er\u0173", 201, -1)
- ]
- a_1 = [
- Among(u"ing", -1, -1),
- Among(u"aj", -1, -1),
- Among(u"iaj", 1, -1),
- Among(u"iej", -1, -1),
- Among(u"oj", -1, -1),
- Among(u"ioj", 4, -1),
- Among(u"uoj", 4, -1),
- Among(u"iuoj", 6, -1),
- Among(u"auj", -1, -1),
- Among(u"\u0105j", -1, -1),
- Among(u"i\u0105j", 9, -1),
- Among(u"\u0117j", -1, -1),
- Among(u"\u0173j", -1, -1),
- Among(u"i\u0173j", 12, -1),
- Among(u"ok", -1, -1),
- Among(u"iok", 14, -1),
- Among(u"iuk", -1, -1),
- Among(u"uliuk", 16, -1),
- Among(u"u\u010Diuk", 16, -1),
- Among(u"i\u0161k", -1, -1),
- Among(u"iul", -1, -1),
- Among(u"yl", -1, -1),
- Among(u"\u0117l", -1, -1),
- Among(u"am", -1, -1),
- Among(u"dam", 23, -1),
- Among(u"jam", 23, -1),
- Among(u"zgan", -1, -1),
- Among(u"ain", -1, -1),
- Among(u"esn", -1, -1),
- Among(u"op", -1, -1),
- Among(u"iop", 29, -1),
- Among(u"ias", -1, -1),
- Among(u"ies", -1, -1),
- Among(u"ais", -1, -1),
- Among(u"iais", 33, -1),
- Among(u"os", -1, -1),
- Among(u"ios", 35, -1),
- Among(u"uos", 35, -1),
- Among(u"iuos", 37, -1),
- Among(u"aus", -1, -1),
- Among(u"iaus", 39, -1),
- Among(u"\u0105s", -1, -1),
- Among(u"i\u0105s", 41, -1),
- Among(u"\u0119s", -1, -1),
- Among(u"ut\u0117ait", -1, -1),
- Among(u"ant", -1, -1),
- Among(u"iant", 45, -1),
- Among(u"siant", 46, -1),
- Among(u"int", -1, -1),
- Among(u"ot", -1, -1),
- Among(u"uot", 49, -1),
- Among(u"iuot", 50, -1),
- Among(u"yt", -1, -1),
- Among(u"\u0117t", -1, -1),
- Among(u"yk\u0161t", -1, -1),
- Among(u"iau", -1, -1),
- Among(u"dav", -1, -1),
- Among(u"sv", -1, -1),
- Among(u"\u0161v", -1, -1),
- Among(u"yk\u0161\u010D", -1, -1),
- Among(u"\u0119", -1, -1),
- Among(u"\u0117j\u0119", 60, -1)
- ]
- a_2 = [
- Among(u"ojime", -1, 7),
- Among(u"\u0117jime", -1, 3),
- Among(u"avime", -1, 6),
- Among(u"okate", -1, 8),
- Among(u"aite", -1, 1),
- Among(u"uote", -1, 2),
- Among(u"asius", -1, 5),
- Among(u"okat\u0117s", -1, 8),
- Among(u"ait\u0117s", -1, 1),
- Among(u"uot\u0117s", -1, 2),
- Among(u"esiu", -1, 4)
- ]
- a_3 = [
- Among(u"\u010D", -1, 1),
- Among(u"d\u017E", -1, 2)
- ]
- a_4 = [
- Among(u"gd", -1, 1)
- ]
- g_v = [17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 64, 1, 0, 64, 0, 0, 0, 0, 0, 0, 0, 4, 4]
- I_p1 = 0
- def __r_R1(self):
- if not self.I_p1 <= self.cursor:
- return False
- return True
- def __r_step1(self):
- if self.cursor < self.I_p1:
- return False
- v_2 = self.limit_backward
- self.limit_backward = self.I_p1
- self.ket = self.cursor
- if self.find_among_b(LithuanianStemmer.a_0) == 0:
- self.limit_backward = v_2
- return False
- self.bra = self.cursor
- self.limit_backward = v_2
- if not self.__r_R1():
- return False
- if not self.slice_del():
- return False
- return True
- def __r_step2(self):
- while True:
- v_1 = self.limit - self.cursor
- try:
- if self.cursor < self.I_p1:
- raise lab0()
- v_3 = self.limit_backward
- self.limit_backward = self.I_p1
- self.ket = self.cursor
- if self.find_among_b(LithuanianStemmer.a_1) == 0:
- self.limit_backward = v_3
- raise lab0()
- self.bra = self.cursor
- self.limit_backward = v_3
- if not self.slice_del():
- return False
- continue
- except lab0: pass
- self.cursor = self.limit - v_1
- break
- return True
- def __r_fix_conflicts(self):
- self.ket = self.cursor
- among_var = self.find_among_b(LithuanianStemmer.a_2)
- if among_var == 0:
- return False
- self.bra = self.cursor
- if among_var == 1:
- if not self.slice_from(u"ait\u0117"):
- return False
- elif among_var == 2:
- if not self.slice_from(u"uot\u0117"):
- return False
- elif among_var == 3:
- if not self.slice_from(u"\u0117jimas"):
- return False
- elif among_var == 4:
- if not self.slice_from(u"esys"):
- return False
- elif among_var == 5:
- if not self.slice_from(u"asys"):
- return False
- elif among_var == 6:
- if not self.slice_from(u"avimas"):
- return False
- elif among_var == 7:
- if not self.slice_from(u"ojimas"):
- return False
- else:
- if not self.slice_from(u"okat\u0117"):
- return False
- return True
- def __r_fix_chdz(self):
- self.ket = self.cursor
- among_var = self.find_among_b(LithuanianStemmer.a_3)
- if among_var == 0:
- return False
- self.bra = self.cursor
- if among_var == 1:
- if not self.slice_from(u"t"):
- return False
- else:
- if not self.slice_from(u"d"):
- return False
- return True
- def __r_fix_gd(self):
- self.ket = self.cursor
- if self.find_among_b(LithuanianStemmer.a_4) == 0:
- return False
- self.bra = self.cursor
- if not self.slice_from(u"g"):
- return False
- return True
- def _stem(self):
- self.I_p1 = self.limit
- v_1 = self.cursor
- try:
- v_2 = self.cursor
- try:
- v_3 = self.cursor
- if not self.eq_s(u"a"):
- self.cursor = v_2
- raise lab1()
- self.cursor = v_3
- if not len(self.current) > 6:
- self.cursor = v_2
- raise lab1()
- c = self.cursor + 1
- if c > self.limit:
- self.cursor = v_2
- raise lab1()
- self.cursor = c
- except lab1: pass
- if not self.go_out_grouping(LithuanianStemmer.g_v, 97, 371):
- raise lab0()
- self.cursor += 1
- if not self.go_in_grouping(LithuanianStemmer.g_v, 97, 371):
- raise lab0()
- self.cursor += 1
- self.I_p1 = self.cursor
- except lab0: pass
- self.cursor = v_1
- self.limit_backward = self.cursor
- self.cursor = self.limit
- v_4 = self.limit - self.cursor
- self.__r_fix_conflicts()
- self.cursor = self.limit - v_4
- v_5 = self.limit - self.cursor
- self.__r_step1()
- self.cursor = self.limit - v_5
- v_6 = self.limit - self.cursor
- self.__r_fix_chdz()
- self.cursor = self.limit - v_6
- v_7 = self.limit - self.cursor
- self.__r_step2()
- self.cursor = self.limit - v_7
- v_8 = self.limit - self.cursor
- self.__r_fix_chdz()
- self.cursor = self.limit - v_8
- v_9 = self.limit - self.cursor
- self.__r_fix_gd()
- self.cursor = self.limit - v_9
- self.cursor = self.limit_backward
- return True
- class lab0(BaseException): pass
- class lab1(BaseException): pass
|