session.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587
  1. #!/usr/bin/env python
  2. #
  3. # Author: Mike McKerns (mmckerns @caltech and @uqfoundation)
  4. # Author: Leonardo Gama (@leogama)
  5. # Copyright (c) 2008-2015 California Institute of Technology.
  6. # Copyright (c) 2016-2022 The Uncertainty Quantification Foundation.
  7. # License: 3-clause BSD. The full license text is available at:
  8. # - https://github.com/uqfoundation/dill/blob/master/LICENSE
  9. """
  10. Pickle and restore the intepreter session.
  11. """
  12. __all__ = [
  13. 'dump_module', 'load_module', 'load_module_asdict',
  14. 'dump_session', 'load_session' # backward compatibility
  15. ]
  16. import re
  17. import sys
  18. import warnings
  19. from dill import _dill, Pickler, Unpickler
  20. from ._dill import (
  21. BuiltinMethodType, FunctionType, MethodType, ModuleType, TypeType,
  22. _import_module, _is_builtin_module, _is_imported_module, _main_module,
  23. _reverse_typemap, __builtin__,
  24. )
  25. # Type hints.
  26. from typing import Optional, Union
  27. import pathlib
  28. import tempfile
  29. TEMPDIR = pathlib.PurePath(tempfile.gettempdir())
  30. def _module_map():
  31. """get map of imported modules"""
  32. from collections import defaultdict
  33. from types import SimpleNamespace
  34. modmap = SimpleNamespace(
  35. by_name=defaultdict(list),
  36. by_id=defaultdict(list),
  37. top_level={},
  38. )
  39. for modname, module in sys.modules.items():
  40. if modname in ('__main__', '__mp_main__') or not isinstance(module, ModuleType):
  41. continue
  42. if '.' not in modname:
  43. modmap.top_level[id(module)] = modname
  44. for objname, modobj in module.__dict__.items():
  45. modmap.by_name[objname].append((modobj, modname))
  46. modmap.by_id[id(modobj)].append((modobj, objname, modname))
  47. return modmap
  48. IMPORTED_AS_TYPES = (ModuleType, TypeType, FunctionType, MethodType, BuiltinMethodType)
  49. if 'PyCapsuleType' in _reverse_typemap:
  50. IMPORTED_AS_TYPES += (_reverse_typemap['PyCapsuleType'],)
  51. IMPORTED_AS_MODULES = ('ctypes', 'typing', 'subprocess', 'threading',
  52. r'concurrent\.futures(\.\w+)?', r'multiprocessing(\.\w+)?')
  53. IMPORTED_AS_MODULES = tuple(re.compile(x) for x in IMPORTED_AS_MODULES)
  54. def _lookup_module(modmap, name, obj, main_module):
  55. """lookup name or id of obj if module is imported"""
  56. for modobj, modname in modmap.by_name[name]:
  57. if modobj is obj and sys.modules[modname] is not main_module:
  58. return modname, name
  59. __module__ = getattr(obj, '__module__', None)
  60. if isinstance(obj, IMPORTED_AS_TYPES) or (__module__ is not None
  61. and any(regex.fullmatch(__module__) for regex in IMPORTED_AS_MODULES)):
  62. for modobj, objname, modname in modmap.by_id[id(obj)]:
  63. if sys.modules[modname] is not main_module:
  64. return modname, objname
  65. return None, None
  66. def _stash_modules(main_module):
  67. modmap = _module_map()
  68. newmod = ModuleType(main_module.__name__)
  69. imported = []
  70. imported_as = []
  71. imported_top_level = [] # keep separated for backward compatibility
  72. original = {}
  73. for name, obj in main_module.__dict__.items():
  74. if obj is main_module:
  75. original[name] = newmod # self-reference
  76. elif obj is main_module.__dict__:
  77. original[name] = newmod.__dict__
  78. # Avoid incorrectly matching a singleton value in another package (ex.: __doc__).
  79. elif any(obj is singleton for singleton in (None, False, True)) \
  80. or isinstance(obj, ModuleType) and _is_builtin_module(obj): # always saved by ref
  81. original[name] = obj
  82. else:
  83. source_module, objname = _lookup_module(modmap, name, obj, main_module)
  84. if source_module is not None:
  85. if objname == name:
  86. imported.append((source_module, name))
  87. else:
  88. imported_as.append((source_module, objname, name))
  89. else:
  90. try:
  91. imported_top_level.append((modmap.top_level[id(obj)], name))
  92. except KeyError:
  93. original[name] = obj
  94. if len(original) < len(main_module.__dict__):
  95. newmod.__dict__.update(original)
  96. newmod.__dill_imported = imported
  97. newmod.__dill_imported_as = imported_as
  98. newmod.__dill_imported_top_level = imported_top_level
  99. if getattr(newmod, '__loader__', None) is None and _is_imported_module(main_module):
  100. # Trick _is_imported_module() to force saving as an imported module.
  101. newmod.__loader__ = True # will be discarded by save_module()
  102. return newmod
  103. else:
  104. return main_module
  105. def _restore_modules(unpickler, main_module):
  106. try:
  107. for modname, name in main_module.__dict__.pop('__dill_imported'):
  108. main_module.__dict__[name] = unpickler.find_class(modname, name)
  109. for modname, objname, name in main_module.__dict__.pop('__dill_imported_as'):
  110. main_module.__dict__[name] = unpickler.find_class(modname, objname)
  111. for modname, name in main_module.__dict__.pop('__dill_imported_top_level'):
  112. main_module.__dict__[name] = __import__(modname)
  113. except KeyError:
  114. pass
  115. #NOTE: 06/03/15 renamed main_module to main
  116. def dump_module(
  117. filename = str(TEMPDIR/'session.pkl'),
  118. module: Optional[Union[ModuleType, str]] = None,
  119. refimported: bool = False,
  120. **kwds
  121. ) -> None:
  122. """Pickle the current state of :py:mod:`__main__` or another module to a file.
  123. Save the contents of :py:mod:`__main__` (e.g. from an interactive
  124. interpreter session), an imported module, or a module-type object (e.g.
  125. built with :py:class:`~types.ModuleType`), to a file. The pickled
  126. module can then be restored with the function :py:func:`load_module`.
  127. Parameters:
  128. filename: a path-like object or a writable stream.
  129. module: a module object or the name of an importable module. If `None`
  130. (the default), :py:mod:`__main__` is saved.
  131. refimported: if `True`, all objects identified as having been imported
  132. into the module's namespace are saved by reference. *Note:* this is
  133. similar but independent from ``dill.settings[`byref`]``, as
  134. ``refimported`` refers to virtually all imported objects, while
  135. ``byref`` only affects select objects.
  136. **kwds: extra keyword arguments passed to :py:class:`Pickler()`.
  137. Raises:
  138. :py:exc:`PicklingError`: if pickling fails.
  139. Examples:
  140. - Save current interpreter session state:
  141. >>> import dill
  142. >>> squared = lambda x: x*x
  143. >>> dill.dump_module() # save state of __main__ to /tmp/session.pkl
  144. - Save the state of an imported/importable module:
  145. >>> import dill
  146. >>> import pox
  147. >>> pox.plus_one = lambda x: x+1
  148. >>> dill.dump_module('pox_session.pkl', module=pox)
  149. - Save the state of a non-importable, module-type object:
  150. >>> import dill
  151. >>> from types import ModuleType
  152. >>> foo = ModuleType('foo')
  153. >>> foo.values = [1,2,3]
  154. >>> import math
  155. >>> foo.sin = math.sin
  156. >>> dill.dump_module('foo_session.pkl', module=foo, refimported=True)
  157. - Restore the state of the saved modules:
  158. >>> import dill
  159. >>> dill.load_module()
  160. >>> squared(2)
  161. 4
  162. >>> pox = dill.load_module('pox_session.pkl')
  163. >>> pox.plus_one(1)
  164. 2
  165. >>> foo = dill.load_module('foo_session.pkl')
  166. >>> [foo.sin(x) for x in foo.values]
  167. [0.8414709848078965, 0.9092974268256817, 0.1411200080598672]
  168. *Changed in version 0.3.6:* Function ``dump_session()`` was renamed to
  169. ``dump_module()``. Parameters ``main`` and ``byref`` were renamed to
  170. ``module`` and ``refimported``, respectively.
  171. Note:
  172. Currently, ``dill.settings['byref']`` and ``dill.settings['recurse']``
  173. don't apply to this function.`
  174. """
  175. for old_par, par in [('main', 'module'), ('byref', 'refimported')]:
  176. if old_par in kwds:
  177. message = "The argument %r has been renamed %r" % (old_par, par)
  178. if old_par == 'byref':
  179. message += " to distinguish it from dill.settings['byref']"
  180. warnings.warn(message + ".", PendingDeprecationWarning)
  181. if locals()[par]: # the defaults are None and False
  182. raise TypeError("both %r and %r arguments were used" % (par, old_par))
  183. refimported = kwds.pop('byref', refimported)
  184. module = kwds.pop('main', module)
  185. from .settings import settings
  186. protocol = settings['protocol']
  187. main = module
  188. if main is None:
  189. main = _main_module
  190. elif isinstance(main, str):
  191. main = _import_module(main)
  192. if not isinstance(main, ModuleType):
  193. raise TypeError("%r is not a module" % main)
  194. if hasattr(filename, 'write'):
  195. file = filename
  196. else:
  197. file = open(filename, 'wb')
  198. try:
  199. pickler = Pickler(file, protocol, **kwds)
  200. pickler._original_main = main
  201. if refimported:
  202. main = _stash_modules(main)
  203. pickler._main = main #FIXME: dill.settings are disabled
  204. pickler._byref = False # disable pickling by name reference
  205. pickler._recurse = False # disable pickling recursion for globals
  206. pickler._session = True # is best indicator of when pickling a session
  207. pickler._first_pass = True
  208. pickler._main_modified = main is not pickler._original_main
  209. pickler.dump(main)
  210. finally:
  211. if file is not filename: # if newly opened file
  212. file.close()
  213. return
  214. # Backward compatibility.
  215. def dump_session(filename=str(TEMPDIR/'session.pkl'), main=None, byref=False, **kwds):
  216. warnings.warn("dump_session() has been renamed dump_module()", PendingDeprecationWarning)
  217. dump_module(filename, module=main, refimported=byref, **kwds)
  218. dump_session.__doc__ = dump_module.__doc__
  219. class _PeekableReader:
  220. """lightweight stream wrapper that implements peek()"""
  221. def __init__(self, stream):
  222. self.stream = stream
  223. def read(self, n):
  224. return self.stream.read(n)
  225. def readline(self):
  226. return self.stream.readline()
  227. def tell(self):
  228. return self.stream.tell()
  229. def close(self):
  230. return self.stream.close()
  231. def peek(self, n):
  232. stream = self.stream
  233. try:
  234. if hasattr(stream, 'flush'): stream.flush()
  235. position = stream.tell()
  236. stream.seek(position) # assert seek() works before reading
  237. chunk = stream.read(n)
  238. stream.seek(position)
  239. return chunk
  240. except (AttributeError, OSError):
  241. raise NotImplementedError("stream is not peekable: %r", stream) from None
  242. def _make_peekable(stream):
  243. """return stream as an object with a peek() method"""
  244. import io
  245. if hasattr(stream, 'peek'):
  246. return stream
  247. if not (hasattr(stream, 'tell') and hasattr(stream, 'seek')):
  248. try:
  249. return io.BufferedReader(stream)
  250. except Exception:
  251. pass
  252. return _PeekableReader(stream)
  253. def _identify_module(file, main=None):
  254. """identify the name of the module stored in the given file-type object"""
  255. from pickletools import genops
  256. UNICODE = {'UNICODE', 'BINUNICODE', 'SHORT_BINUNICODE'}
  257. found_import = False
  258. try:
  259. for opcode, arg, pos in genops(file.peek(256)):
  260. if not found_import:
  261. if opcode.name in ('GLOBAL', 'SHORT_BINUNICODE') and \
  262. arg.endswith('_import_module'):
  263. found_import = True
  264. else:
  265. if opcode.name in UNICODE:
  266. return arg
  267. else:
  268. raise UnpicklingError("reached STOP without finding main module")
  269. except (NotImplementedError, ValueError) as error:
  270. # ValueError occours when the end of the chunk is reached (without a STOP).
  271. if isinstance(error, NotImplementedError) and main is not None:
  272. # file is not peekable, but we have main.
  273. return None
  274. raise UnpicklingError("unable to identify main module") from error
  275. def load_module(
  276. filename = str(TEMPDIR/'session.pkl'),
  277. module: Optional[Union[ModuleType, str]] = None,
  278. **kwds
  279. ) -> Optional[ModuleType]:
  280. """Update the selected module (default is :py:mod:`__main__`) with
  281. the state saved at ``filename``.
  282. Restore a module to the state saved with :py:func:`dump_module`. The
  283. saved module can be :py:mod:`__main__` (e.g. an interpreter session),
  284. an imported module, or a module-type object (e.g. created with
  285. :py:class:`~types.ModuleType`).
  286. When restoring the state of a non-importable module-type object, the
  287. current instance of this module may be passed as the argument ``main``.
  288. Otherwise, a new instance is created with :py:class:`~types.ModuleType`
  289. and returned.
  290. Parameters:
  291. filename: a path-like object or a readable stream.
  292. module: a module object or the name of an importable module;
  293. the module name and kind (i.e. imported or non-imported) must
  294. match the name and kind of the module stored at ``filename``.
  295. **kwds: extra keyword arguments passed to :py:class:`Unpickler()`.
  296. Raises:
  297. :py:exc:`UnpicklingError`: if unpickling fails.
  298. :py:exc:`ValueError`: if the argument ``main`` and module saved
  299. at ``filename`` are incompatible.
  300. Returns:
  301. A module object, if the saved module is not :py:mod:`__main__` or
  302. a module instance wasn't provided with the argument ``main``.
  303. Examples:
  304. - Save the state of some modules:
  305. >>> import dill
  306. >>> squared = lambda x: x*x
  307. >>> dill.dump_module() # save state of __main__ to /tmp/session.pkl
  308. >>>
  309. >>> import pox # an imported module
  310. >>> pox.plus_one = lambda x: x+1
  311. >>> dill.dump_module('pox_session.pkl', module=pox)
  312. >>>
  313. >>> from types import ModuleType
  314. >>> foo = ModuleType('foo') # a module-type object
  315. >>> foo.values = [1,2,3]
  316. >>> import math
  317. >>> foo.sin = math.sin
  318. >>> dill.dump_module('foo_session.pkl', module=foo, refimported=True)
  319. - Restore the state of the interpreter:
  320. >>> import dill
  321. >>> dill.load_module() # updates __main__ from /tmp/session.pkl
  322. >>> squared(2)
  323. 4
  324. - Load the saved state of an importable module:
  325. >>> import dill
  326. >>> pox = dill.load_module('pox_session.pkl')
  327. >>> pox.plus_one(1)
  328. 2
  329. >>> import sys
  330. >>> pox in sys.modules.values()
  331. True
  332. - Load the saved state of a non-importable module-type object:
  333. >>> import dill
  334. >>> foo = dill.load_module('foo_session.pkl')
  335. >>> [foo.sin(x) for x in foo.values]
  336. [0.8414709848078965, 0.9092974268256817, 0.1411200080598672]
  337. >>> import math
  338. >>> foo.sin is math.sin # foo.sin was saved by reference
  339. True
  340. >>> import sys
  341. >>> foo in sys.modules.values()
  342. False
  343. - Update the state of a non-importable module-type object:
  344. >>> import dill
  345. >>> from types import ModuleType
  346. >>> foo = ModuleType('foo')
  347. >>> foo.values = ['a','b']
  348. >>> foo.sin = lambda x: x*x
  349. >>> dill.load_module('foo_session.pkl', module=foo)
  350. >>> [foo.sin(x) for x in foo.values]
  351. [0.8414709848078965, 0.9092974268256817, 0.1411200080598672]
  352. *Changed in version 0.3.6:* Function ``load_session()`` was renamed to
  353. ``load_module()``. Parameter ``main`` was renamed to ``module``.
  354. See also:
  355. :py:func:`load_module_asdict` to load the contents of module saved
  356. with :py:func:`dump_module` into a dictionary.
  357. """
  358. if 'main' in kwds:
  359. warnings.warn(
  360. "The argument 'main' has been renamed 'module'.",
  361. PendingDeprecationWarning
  362. )
  363. if module is not None:
  364. raise TypeError("both 'module' and 'main' arguments were used")
  365. module = kwds.pop('main')
  366. main = module
  367. if hasattr(filename, 'read'):
  368. file = filename
  369. else:
  370. file = open(filename, 'rb')
  371. try:
  372. file = _make_peekable(file)
  373. #FIXME: dill.settings are disabled
  374. unpickler = Unpickler(file, **kwds)
  375. unpickler._session = True
  376. # Resolve unpickler._main
  377. pickle_main = _identify_module(file, main)
  378. if main is None and pickle_main is not None:
  379. main = pickle_main
  380. if isinstance(main, str):
  381. if main.startswith('__runtime__.'):
  382. # Create runtime module to load the session into.
  383. main = ModuleType(main.partition('.')[-1])
  384. else:
  385. main = _import_module(main)
  386. if main is not None:
  387. if not isinstance(main, ModuleType):
  388. raise TypeError("%r is not a module" % main)
  389. unpickler._main = main
  390. else:
  391. main = unpickler._main
  392. # Check against the pickle's main.
  393. is_main_imported = _is_imported_module(main)
  394. if pickle_main is not None:
  395. is_runtime_mod = pickle_main.startswith('__runtime__.')
  396. if is_runtime_mod:
  397. pickle_main = pickle_main.partition('.')[-1]
  398. error_msg = "can't update{} module{} %r with the saved state of{} module{} %r"
  399. if is_runtime_mod and is_main_imported:
  400. raise ValueError(
  401. error_msg.format(" imported", "", "", "-type object")
  402. % (main.__name__, pickle_main)
  403. )
  404. if not is_runtime_mod and not is_main_imported:
  405. raise ValueError(
  406. error_msg.format("", "-type object", " imported", "")
  407. % (pickle_main, main.__name__)
  408. )
  409. if main.__name__ != pickle_main:
  410. raise ValueError(error_msg.format("", "", "", "") % (main.__name__, pickle_main))
  411. # This is for find_class() to be able to locate it.
  412. if not is_main_imported:
  413. runtime_main = '__runtime__.%s' % main.__name__
  414. sys.modules[runtime_main] = main
  415. loaded = unpickler.load()
  416. finally:
  417. if not hasattr(filename, 'read'): # if newly opened file
  418. file.close()
  419. try:
  420. del sys.modules[runtime_main]
  421. except (KeyError, NameError):
  422. pass
  423. assert loaded is main
  424. _restore_modules(unpickler, main)
  425. if main is _main_module or main is module:
  426. return None
  427. else:
  428. return main
  429. # Backward compatibility.
  430. def load_session(filename=str(TEMPDIR/'session.pkl'), main=None, **kwds):
  431. warnings.warn("load_session() has been renamed load_module().", PendingDeprecationWarning)
  432. load_module(filename, module=main, **kwds)
  433. load_session.__doc__ = load_module.__doc__
  434. def load_module_asdict(
  435. filename = str(TEMPDIR/'session.pkl'),
  436. update: bool = False,
  437. **kwds
  438. ) -> dict:
  439. """
  440. Load the contents of a saved module into a dictionary.
  441. ``load_module_asdict()`` is the near-equivalent of::
  442. lambda filename: vars(dill.load_module(filename)).copy()
  443. however, does not alter the original module. Also, the path of
  444. the loaded module is stored in the ``__session__`` attribute.
  445. Parameters:
  446. filename: a path-like object or a readable stream
  447. update: if `True`, initialize the dictionary with the current state
  448. of the module prior to loading the state stored at filename.
  449. **kwds: extra keyword arguments passed to :py:class:`Unpickler()`
  450. Raises:
  451. :py:exc:`UnpicklingError`: if unpickling fails
  452. Returns:
  453. A copy of the restored module's dictionary.
  454. Note:
  455. If ``update`` is True, the corresponding module may first be imported
  456. into the current namespace before the saved state is loaded from
  457. filename to the dictionary. Note that any module that is imported into
  458. the current namespace as a side-effect of using ``update`` will not be
  459. modified by loading the saved module in filename to a dictionary.
  460. Example:
  461. >>> import dill
  462. >>> alist = [1, 2, 3]
  463. >>> anum = 42
  464. >>> dill.dump_module()
  465. >>> anum = 0
  466. >>> new_var = 'spam'
  467. >>> main = dill.load_module_asdict()
  468. >>> main['__name__'], main['__session__']
  469. ('__main__', '/tmp/session.pkl')
  470. >>> main is globals() # loaded objects don't reference globals
  471. False
  472. >>> main['alist'] == alist
  473. True
  474. >>> main['alist'] is alist # was saved by value
  475. False
  476. >>> main['anum'] == anum # changed after the session was saved
  477. False
  478. >>> new_var in main # would be True if the option 'update' was set
  479. False
  480. """
  481. if 'module' in kwds:
  482. raise TypeError("'module' is an invalid keyword argument for load_module_asdict()")
  483. if hasattr(filename, 'read'):
  484. file = filename
  485. else:
  486. file = open(filename, 'rb')
  487. try:
  488. file = _make_peekable(file)
  489. main_name = _identify_module(file)
  490. old_main = sys.modules.get(main_name)
  491. main = ModuleType(main_name)
  492. if update:
  493. if old_main is None:
  494. old_main = _import_module(main_name)
  495. main.__dict__.update(old_main.__dict__)
  496. else:
  497. main.__builtins__ = __builtin__
  498. sys.modules[main_name] = main
  499. load_module(file, **kwds)
  500. finally:
  501. if not hasattr(filename, 'read'): # if newly opened file
  502. file.close()
  503. try:
  504. if old_main is None:
  505. del sys.modules[main_name]
  506. else:
  507. sys.modules[main_name] = old_main
  508. except NameError: # failed before setting old_main
  509. pass
  510. main.__session__ = str(filename)
  511. return main.__dict__
  512. # Internal exports for backward compatibility with dill v0.3.5.1
  513. # Can't be placed in dill._dill because of circular import problems.
  514. for name in (
  515. '_lookup_module', '_module_map', '_restore_modules', '_stash_modules',
  516. 'dump_session', 'load_session' # backward compatibility functions
  517. ):
  518. setattr(_dill, name, globals()[name])
  519. del name