irish_stemmer.py 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276
  1. # Generated by Snowball 2.2.0 - https://snowballstem.org/
  2. from .basestemmer import BaseStemmer
  3. from .among import Among
  4. class IrishStemmer(BaseStemmer):
  5. '''
  6. This class implements the stemming algorithm defined by a snowball script.
  7. Generated by Snowball 2.2.0 - https://snowballstem.org/
  8. '''
  9. a_0 = [
  10. Among(u"b'", -1, 1),
  11. Among(u"bh", -1, 4),
  12. Among(u"bhf", 1, 2),
  13. Among(u"bp", -1, 8),
  14. Among(u"ch", -1, 5),
  15. Among(u"d'", -1, 1),
  16. Among(u"d'fh", 5, 2),
  17. Among(u"dh", -1, 6),
  18. Among(u"dt", -1, 9),
  19. Among(u"fh", -1, 2),
  20. Among(u"gc", -1, 5),
  21. Among(u"gh", -1, 7),
  22. Among(u"h-", -1, 1),
  23. Among(u"m'", -1, 1),
  24. Among(u"mb", -1, 4),
  25. Among(u"mh", -1, 10),
  26. Among(u"n-", -1, 1),
  27. Among(u"nd", -1, 6),
  28. Among(u"ng", -1, 7),
  29. Among(u"ph", -1, 8),
  30. Among(u"sh", -1, 3),
  31. Among(u"t-", -1, 1),
  32. Among(u"th", -1, 9),
  33. Among(u"ts", -1, 3)
  34. ]
  35. a_1 = [
  36. Among(u"\u00EDochta", -1, 1),
  37. Among(u"a\u00EDochta", 0, 1),
  38. Among(u"ire", -1, 2),
  39. Among(u"aire", 2, 2),
  40. Among(u"abh", -1, 1),
  41. Among(u"eabh", 4, 1),
  42. Among(u"ibh", -1, 1),
  43. Among(u"aibh", 6, 1),
  44. Among(u"amh", -1, 1),
  45. Among(u"eamh", 8, 1),
  46. Among(u"imh", -1, 1),
  47. Among(u"aimh", 10, 1),
  48. Among(u"\u00EDocht", -1, 1),
  49. Among(u"a\u00EDocht", 12, 1),
  50. Among(u"ir\u00ED", -1, 2),
  51. Among(u"air\u00ED", 14, 2)
  52. ]
  53. a_2 = [
  54. Among(u"\u00F3ideacha", -1, 6),
  55. Among(u"patacha", -1, 5),
  56. Among(u"achta", -1, 1),
  57. Among(u"arcachta", 2, 2),
  58. Among(u"eachta", 2, 1),
  59. Among(u"grafa\u00EDochta", -1, 4),
  60. Among(u"paite", -1, 5),
  61. Among(u"ach", -1, 1),
  62. Among(u"each", 7, 1),
  63. Among(u"\u00F3ideach", 8, 6),
  64. Among(u"gineach", 8, 3),
  65. Among(u"patach", 7, 5),
  66. Among(u"grafa\u00EDoch", -1, 4),
  67. Among(u"pataigh", -1, 5),
  68. Among(u"\u00F3idigh", -1, 6),
  69. Among(u"acht\u00FAil", -1, 1),
  70. Among(u"eacht\u00FAil", 15, 1),
  71. Among(u"gineas", -1, 3),
  72. Among(u"ginis", -1, 3),
  73. Among(u"acht", -1, 1),
  74. Among(u"arcacht", 19, 2),
  75. Among(u"eacht", 19, 1),
  76. Among(u"grafa\u00EDocht", -1, 4),
  77. Among(u"arcachta\u00ED", -1, 2),
  78. Among(u"grafa\u00EDochta\u00ED", -1, 4)
  79. ]
  80. a_3 = [
  81. Among(u"imid", -1, 1),
  82. Among(u"aimid", 0, 1),
  83. Among(u"\u00EDmid", -1, 1),
  84. Among(u"a\u00EDmid", 2, 1),
  85. Among(u"adh", -1, 2),
  86. Among(u"eadh", 4, 2),
  87. Among(u"faidh", -1, 1),
  88. Among(u"fidh", -1, 1),
  89. Among(u"\u00E1il", -1, 2),
  90. Among(u"ain", -1, 2),
  91. Among(u"tear", -1, 2),
  92. Among(u"tar", -1, 2)
  93. ]
  94. g_v = [17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 4, 2]
  95. I_p2 = 0
  96. I_p1 = 0
  97. I_pV = 0
  98. def __r_mark_regions(self):
  99. self.I_pV = self.limit
  100. self.I_p1 = self.limit
  101. self.I_p2 = self.limit
  102. v_1 = self.cursor
  103. try:
  104. if not self.go_out_grouping(IrishStemmer.g_v, 97, 250):
  105. raise lab0()
  106. self.cursor += 1
  107. self.I_pV = self.cursor
  108. if not self.go_in_grouping(IrishStemmer.g_v, 97, 250):
  109. raise lab0()
  110. self.cursor += 1
  111. self.I_p1 = self.cursor
  112. if not self.go_out_grouping(IrishStemmer.g_v, 97, 250):
  113. raise lab0()
  114. self.cursor += 1
  115. if not self.go_in_grouping(IrishStemmer.g_v, 97, 250):
  116. raise lab0()
  117. self.cursor += 1
  118. self.I_p2 = self.cursor
  119. except lab0: pass
  120. self.cursor = v_1
  121. return True
  122. def __r_initial_morph(self):
  123. self.bra = self.cursor
  124. among_var = self.find_among(IrishStemmer.a_0)
  125. if among_var == 0:
  126. return False
  127. self.ket = self.cursor
  128. if among_var == 1:
  129. if not self.slice_del():
  130. return False
  131. elif among_var == 2:
  132. if not self.slice_from(u"f"):
  133. return False
  134. elif among_var == 3:
  135. if not self.slice_from(u"s"):
  136. return False
  137. elif among_var == 4:
  138. if not self.slice_from(u"b"):
  139. return False
  140. elif among_var == 5:
  141. if not self.slice_from(u"c"):
  142. return False
  143. elif among_var == 6:
  144. if not self.slice_from(u"d"):
  145. return False
  146. elif among_var == 7:
  147. if not self.slice_from(u"g"):
  148. return False
  149. elif among_var == 8:
  150. if not self.slice_from(u"p"):
  151. return False
  152. elif among_var == 9:
  153. if not self.slice_from(u"t"):
  154. return False
  155. else:
  156. if not self.slice_from(u"m"):
  157. return False
  158. return True
  159. def __r_RV(self):
  160. if not self.I_pV <= self.cursor:
  161. return False
  162. return True
  163. def __r_R1(self):
  164. if not self.I_p1 <= self.cursor:
  165. return False
  166. return True
  167. def __r_R2(self):
  168. if not self.I_p2 <= self.cursor:
  169. return False
  170. return True
  171. def __r_noun_sfx(self):
  172. self.ket = self.cursor
  173. among_var = self.find_among_b(IrishStemmer.a_1)
  174. if among_var == 0:
  175. return False
  176. self.bra = self.cursor
  177. if among_var == 1:
  178. if not self.__r_R1():
  179. return False
  180. if not self.slice_del():
  181. return False
  182. else:
  183. if not self.__r_R2():
  184. return False
  185. if not self.slice_del():
  186. return False
  187. return True
  188. def __r_deriv(self):
  189. self.ket = self.cursor
  190. among_var = self.find_among_b(IrishStemmer.a_2)
  191. if among_var == 0:
  192. return False
  193. self.bra = self.cursor
  194. if among_var == 1:
  195. if not self.__r_R2():
  196. return False
  197. if not self.slice_del():
  198. return False
  199. elif among_var == 2:
  200. if not self.slice_from(u"arc"):
  201. return False
  202. elif among_var == 3:
  203. if not self.slice_from(u"gin"):
  204. return False
  205. elif among_var == 4:
  206. if not self.slice_from(u"graf"):
  207. return False
  208. elif among_var == 5:
  209. if not self.slice_from(u"paite"):
  210. return False
  211. else:
  212. if not self.slice_from(u"\u00F3id"):
  213. return False
  214. return True
  215. def __r_verb_sfx(self):
  216. self.ket = self.cursor
  217. among_var = self.find_among_b(IrishStemmer.a_3)
  218. if among_var == 0:
  219. return False
  220. self.bra = self.cursor
  221. if among_var == 1:
  222. if not self.__r_RV():
  223. return False
  224. if not self.slice_del():
  225. return False
  226. else:
  227. if not self.__r_R1():
  228. return False
  229. if not self.slice_del():
  230. return False
  231. return True
  232. def _stem(self):
  233. v_1 = self.cursor
  234. self.__r_initial_morph()
  235. self.cursor = v_1
  236. self.__r_mark_regions()
  237. self.limit_backward = self.cursor
  238. self.cursor = self.limit
  239. v_3 = self.limit - self.cursor
  240. self.__r_noun_sfx()
  241. self.cursor = self.limit - v_3
  242. v_4 = self.limit - self.cursor
  243. self.__r_deriv()
  244. self.cursor = self.limit - v_4
  245. v_5 = self.limit - self.cursor
  246. self.__r_verb_sfx()
  247. self.cursor = self.limit - v_5
  248. self.cursor = self.limit_backward
  249. return True
  250. class lab0(BaseException): pass