wordlists.py 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. """Wordlists loaded from package data.
  2. We can treat them as part of the code for the imperative mood check, and
  3. therefore we load them at import time, rather than on-demand.
  4. """
  5. import pkgutil
  6. import re
  7. from typing import Dict, Iterator, Set
  8. import snowballstemmer
  9. #: Regular expression for stripping comments from the wordlists
  10. COMMENT_RE = re.compile(r'\s*#.*')
  11. #: Stemmer function for stemming words in English
  12. stem = snowballstemmer.stemmer('english').stemWord
  13. def load_wordlist(name: str) -> Iterator[str]:
  14. """Iterate over lines of a wordlist data file.
  15. `name` should be the name of a package data file within the data/
  16. directory.
  17. Whitespace and #-prefixed comments are stripped from each line.
  18. """
  19. data = pkgutil.get_data('pydocstyle', 'data/' + name)
  20. if data is not None:
  21. text = data.decode('utf8')
  22. for line in text.splitlines():
  23. line = COMMENT_RE.sub('', line).strip()
  24. if line:
  25. yield line
  26. def make_imperative_verbs_dict(wordlist: Iterator[str]) -> Dict[str, Set[str]]:
  27. """Create a dictionary mapping stemmed verbs to the imperative form."""
  28. imperative_verbs = {} # type: Dict[str, Set[str]]
  29. for word in wordlist:
  30. imperative_verbs.setdefault(stem(word), set()).add(word)
  31. return imperative_verbs
  32. IMPERATIVE_VERBS = make_imperative_verbs_dict(load_wordlist('imperatives.txt'))
  33. #: Words that are forbidden to appear as the first word in a docstring
  34. IMPERATIVE_BLACKLIST = set(load_wordlist('imperatives_blacklist.txt'))