opy/_regtest/src/pickle.py

OILS / opy / _regtest / src / pickle.py View on Github | oilshell.org

1397 lines, 912 significant

1	"""Create portable serialized representations of Python objects.
2
3	See module cPickle for a (much) faster implementation.
4	See module copy_reg for a mechanism for registering custom picklers.
5	See module pickletools source for extensive comments.
6
7	Classes:
8
9	Pickler
10	Unpickler
11
12	Functions:
13
14	dump(object, file)
15	dumps(object) -> string
16	load(file) -> object
17	loads(string) -> object
18
19	Misc variables:
20
21	__version__
22	format_version
23	compatible_formats
24
25	"""
26
27	__version__ = "$Revision: 72223 $" # Code version
28
29	from types import *
30	from copy_reg import dispatch_table
31	from copy_reg import _extension_registry, _inverted_registry, _extension_cache
32	import marshal
33	import sys
34	import struct
35	import re
36
37	__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
38	"Unpickler", "dump", "dumps", "load", "loads"]
39
40	# These are purely informational; no code uses these.
41	format_version = "2.0" # File format version we write
42	compatible_formats = ["1.0", # Original protocol 0
43	"1.1", # Protocol 0 with INST added
44	"1.2", # Original protocol 1
45	"1.3", # Protocol 1 with BINFLOAT added
46	"2.0", # Protocol 2
47	] # Old format versions we can read
48
49	# Keep in synch with cPickle. This is the highest protocol number we
50	# know how to read.
51	HIGHEST_PROTOCOL = 2
52
53	# Why use struct.pack() for pickling but marshal.loads() for
54	# unpickling? struct.pack() is 40% faster than marshal.dumps(), but
55	# marshal.loads() is twice as fast as struct.unpack()!
56	mloads = marshal.loads
57
58	class PickleError(Exception):
59	"""A common base class for the other pickling exceptions."""
60	pass
61
62	class PicklingError(PickleError):
63	"""This exception is raised when an unpicklable object is passed to the
64	dump() method.
65
66	"""
67	pass
68
69	class UnpicklingError(PickleError):
70	"""This exception is raised when there is a problem unpickling an object,
71	such as a security violation.
72
73	Note that other exceptions may also be raised during unpickling, including
74	(but not necessarily limited to) AttributeError, EOFError, ImportError,
75	and IndexError.
76
77	"""
78	pass
79
80	# An instance of _Stop is raised by Unpickler.load_stop() in response to
81	# the STOP opcode, passing the object that is the result of unpickling.
82	class _Stop(Exception):
83	def __init__(self, value):
84	self.value = value
85
86	# Jython has PyStringMap; it's a dict subclass with string keys
87	try:
88	from org.python.core import PyStringMap
89	except ImportError:
90	PyStringMap = None
91
92	# UnicodeType may or may not be exported (normally imported from types)
93	try:
94	UnicodeType
95	except NameError:
96	UnicodeType = None
97
98	# Pickle opcodes. See pickletools.py for extensive docs. The listing
99	# here is in kind-of alphabetical order of 1-character pickle code.
100	# pickletools groups them by purpose.
101
102	MARK = '(' # push special markobject on stack
103	STOP = '.' # every pickle ends with STOP
104	POP = '0' # discard topmost stack item
105	POP_MARK = '1' # discard stack top through topmost markobject
106	DUP = '2' # duplicate top stack item
107	FLOAT = 'F' # push float object; decimal string argument
108	INT = 'I' # push integer or bool; decimal string argument
109	BININT = 'J' # push four-byte signed int
110	BININT1 = 'K' # push 1-byte unsigned int
111	LONG = 'L' # push long; decimal string argument
112	BININT2 = 'M' # push 2-byte unsigned int
113	NONE = 'N' # push None
114	PERSID = 'P' # push persistent object; id is taken from string arg
115	BINPERSID = 'Q' # " " " ; " " " " stack
116	REDUCE = 'R' # apply callable to argtuple, both on stack
117	STRING = 'S' # push string; NL-terminated string argument
118	BINSTRING = 'T' # push string; counted binary string argument
119	SHORT_BINSTRING = 'U' # " " ; " " " " < 256 bytes
120	UNICODE = 'V' # push Unicode string; raw-unicode-escaped'd argument
121	BINUNICODE = 'X' # " " " ; counted UTF-8 string argument
122	APPEND = 'a' # append stack top to list below it
123	BUILD = 'b' # call __setstate__ or __dict__.update()
124	GLOBAL = 'c' # push self.find_class(modname, name); 2 string args
125	DICT = 'd' # build a dict from stack items
126	EMPTY_DICT = '}' # push empty dict
127	APPENDS = 'e' # extend list on stack by topmost stack slice
128	GET = 'g' # push item from memo on stack; index is string arg
129	BINGET = 'h' # " " " " " " ; " " 1-byte arg
130	INST = 'i' # build & push class instance
131	LONG_BINGET = 'j' # push item from memo on stack; index is 4-byte arg
132	LIST = 'l' # build list from topmost stack items
133	EMPTY_LIST = ']' # push empty list
134	OBJ = 'o' # build & push class instance
135	PUT = 'p' # store stack top in memo; index is string arg
136	BINPUT = 'q' # " " " " " ; " " 1-byte arg
137	LONG_BINPUT = 'r' # " " " " " ; " " 4-byte arg
138	SETITEM = 's' # add key+value pair to dict
139	TUPLE = 't' # build tuple from topmost stack items
140	EMPTY_TUPLE = ')' # push empty tuple
141	SETITEMS = 'u' # modify dict by adding topmost key+value pairs
142	BINFLOAT = 'G' # push float; arg is 8-byte float encoding
143
144	TRUE = 'I01\n' # not an opcode; see INT docs in pickletools.py
145	FALSE = 'I00\n' # not an opcode; see INT docs in pickletools.py
146
147	# Protocol 2
148
149	PROTO = '\x80' # identify pickle protocol
150	NEWOBJ = '\x81' # build object by applying cls.__new__ to argtuple
151	EXT1 = '\x82' # push object from extension registry; 1-byte index
152	EXT2 = '\x83' # ditto, but 2-byte index
153	EXT4 = '\x84' # ditto, but 4-byte index
154	TUPLE1 = '\x85' # build 1-tuple from stack top
155	TUPLE2 = '\x86' # build 2-tuple from two topmost stack items
156	TUPLE3 = '\x87' # build 3-tuple from three topmost stack items
157	NEWTRUE = '\x88' # push True
158	NEWFALSE = '\x89' # push False
159	LONG1 = '\x8a' # push long from < 256 bytes
160	LONG4 = '\x8b' # push really big long
161
162	_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
163
164
165	__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])
166	del x
167
168
169	# Pickling machinery
170
171	class Pickler:
172
173	def __init__(self, file, protocol=None):
174	"""This takes a file-like object for writing a pickle data stream.
175
176	The optional protocol argument tells the pickler to use the
177	given protocol; supported protocols are 0, 1, 2. The default
178	protocol is 0, to be backwards compatible. (Protocol 0 is the
179	only protocol that can be written to a file opened in text
180	mode and read back successfully. When using a protocol higher
181	than 0, make sure the file is opened in binary mode, both when
182	pickling and unpickling.)
183
184	Protocol 1 is more efficient than protocol 0; protocol 2 is
185	more efficient than protocol 1.
186
187	Specifying a negative protocol version selects the highest
188	protocol version supported. The higher the protocol used, the
189	more recent the version of Python needed to read the pickle
190	produced.
191
192	The file parameter must have a write() method that accepts a single
193	string argument. It can thus be an open file object, a StringIO
194	object, or any other custom object that meets this interface.
195
196	"""
197	if protocol is None:
198	protocol = 0
199	if protocol < 0:
200	protocol = HIGHEST_PROTOCOL
201	elif not 0 <= protocol <= HIGHEST_PROTOCOL:
202	raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
203	self.write = file.write
204	self.memo = {}
205	self.proto = int(protocol)
206	self.bin = protocol >= 1
207	self.fast = 0
208
209	def clear_memo(self):
210	"""Clears the pickler's "memo".
211
212	The memo is the data structure that remembers which objects the
213	pickler has already seen, so that shared or recursive objects are
214	pickled by reference and not by value. This method is useful when
215	re-using picklers.
216
217	"""
218	self.memo.clear()
219
220	def dump(self, obj):
221	"""Write a pickled representation of obj to the open file."""
222	if self.proto >= 2:
223	self.write(PROTO + chr(self.proto))
224	self.save(obj)
225	self.write(STOP)
226
227	def memoize(self, obj):
228	"""Store an object in the memo."""
229
230	# The Pickler memo is a dictionary mapping object ids to 2-tuples
231	# that contain the Unpickler memo key and the object being memoized.
232	# The memo key is written to the pickle and will become
233	# the key in the Unpickler's memo. The object is stored in the
234	# Pickler memo so that transient objects are kept alive during
235	# pickling.
236
237	# The use of the Unpickler memo length as the memo key is just a
238	# convention. The only requirement is that the memo values be unique.
239	# But there appears no advantage to any other scheme, and this
240	# scheme allows the Unpickler memo to be implemented as a plain (but
241	# growable) array, indexed by memo key.
242	if self.fast:
243	return
244	assert id(obj) not in self.memo
245	memo_len = len(self.memo)
246	self.write(self.put(memo_len))
247	self.memo[id(obj)] = memo_len, obj
248
249	# Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
250	def put(self, i, pack=struct.pack):
251	if self.bin:
252	if i < 256:
253	return BINPUT + chr(i)
254	else:
255	return LONG_BINPUT + pack("<i", i)
256
257	return PUT + repr(i) + '\n'
258
259	# Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
260	def get(self, i, pack=struct.pack):
261	if self.bin:
262	if i < 256:
263	return BINGET + chr(i)
264	else:
265	return LONG_BINGET + pack("<i", i)
266
267	return GET + repr(i) + '\n'
268
269	def save(self, obj):
270	# Check for persistent id (defined by a subclass)
271	pid = self.persistent_id(obj)
272	if pid is not None:
273	self.save_pers(pid)
274	return
275
276	# Check the memo
277	x = self.memo.get(id(obj))
278	if x:
279	self.write(self.get(x[0]))
280	return
281
282	# Check the type dispatch table
283	t = type(obj)
284	f = self.dispatch.get(t)
285	if f:
286	f(self, obj) # Call unbound method with explicit self
287	return
288
289	# Check copy_reg.dispatch_table
290	reduce = dispatch_table.get(t)
291	if reduce:
292	rv = reduce(obj)
293	else:
294	# Check for a class with a custom metaclass; treat as regular class
295	try:
296	issc = issubclass(t, TypeType)
297	except TypeError: # t is not a class (old Boost; see SF #502085)
298	issc = 0
299	if issc:
300	self.save_global(obj)
301	return
302
303	# Check for a __reduce_ex__ method, fall back to __reduce__
304	reduce = getattr(obj, "__reduce_ex__", None)
305	if reduce:
306	rv = reduce(self.proto)
307	else:
308	reduce = getattr(obj, "__reduce__", None)
309	if reduce:
310	rv = reduce()
311	else:
312	raise PicklingError("Can't pickle %r object: %r" %
313	(t.__name__, obj))
314
315	# Check for string returned by reduce(), meaning "save as global"
316	if type(rv) is StringType:
317	self.save_global(obj, rv)
318	return
319
320	# Assert that reduce() returned a tuple
321	if type(rv) is not TupleType:
322	raise PicklingError("%s must return string or tuple" % reduce)
323
324	# Assert that it returned an appropriately sized tuple
325	l = len(rv)
326	if not (2 <= l <= 5):
327	raise PicklingError("Tuple returned by %s must have "
328	"two to five elements" % reduce)
329
330	# Save the reduce() output and finally memoize the object
331	self.save_reduce(obj=obj, *rv)
332
333	def persistent_id(self, obj):
334	# This exists so a subclass can override it
335	return None
336
337	def save_pers(self, pid):
338	# Save a persistent id reference
339	if self.bin:
340	self.save(pid)
341	self.write(BINPERSID)
342	else:
343	self.write(PERSID + str(pid) + '\n')
344
345	def save_reduce(self, func, args, state=None,
346	listitems=None, dictitems=None, obj=None):
347	# This API is called by some subclasses
348
349	# Assert that args is a tuple or None
350	if not isinstance(args, TupleType):
351	raise PicklingError("args from reduce() should be a tuple")
352
353	# Assert that func is callable
354	if not hasattr(func, '__call__'):
355	raise PicklingError("func from reduce should be callable")
356
357	save = self.save
358	write = self.write
359
360	# Protocol 2 special case: if func's name is __newobj__, use NEWOBJ
361	if self.proto >= 2 and getattr(func, "__name__", "") == "__newobj__":
362	# A __reduce__ implementation can direct protocol 2 to
363	# use the more efficient NEWOBJ opcode, while still
364	# allowing protocol 0 and 1 to work normally. For this to
365	# work, the function returned by __reduce__ should be
366	# called __newobj__, and its first argument should be a
367	# new-style class. The implementation for __newobj__
368	# should be as follows, although pickle has no way to
369	# verify this:
370	#
371	# def __newobj__(cls, *args):
372	# return cls.__new__(cls, *args)
373	#
374	# Protocols 0 and 1 will pickle a reference to __newobj__,
375	# while protocol 2 (and above) will pickle a reference to
376	# cls, the remaining args tuple, and the NEWOBJ code,
377	# which calls cls.__new__(cls, *args) at unpickling time
378	# (see load_newobj below). If __reduce__ returns a
379	# three-tuple, the state from the third tuple item will be
380	# pickled regardless of the protocol, calling __setstate__
381	# at unpickling time (see load_build below).
382	#
383	# Note that no standard __newobj__ implementation exists;
384	# you have to provide your own. This is to enforce
385	# compatibility with Python 2.2 (pickles written using
386	# protocol 0 or 1 in Python 2.3 should be unpicklable by
387	# Python 2.2).
388	cls = args[0]
389	if not hasattr(cls, "__new__"):
390	raise PicklingError(
391	"args[0] from __newobj__ args has no __new__")
392	if obj is not None and cls is not obj.__class__:
393	raise PicklingError(
394	"args[0] from __newobj__ args has the wrong class")
395	args = args[1:]
396	save(cls)
397	save(args)
398	write(NEWOBJ)
399	else:
400	save(func)
401	save(args)
402	write(REDUCE)
403
404	if obj is not None:
405	# If the object is already in the memo, this means it is
406	# recursive. In this case, throw away everything we put on the
407	# stack, and fetch the object back from the memo.
408	if id(obj) in self.memo:
409	write(POP + self.get(self.memo[id(obj)][0]))
410	else:
411	self.memoize(obj)
412
413	# More new special cases (that work with older protocols as
414	# well): when __reduce__ returns a tuple with 4 or 5 items,
415	# the 4th and 5th item should be iterators that provide list
416	# items and dict items (as (key, value) tuples), or None.
417
418	if listitems is not None:
419	self._batch_appends(listitems)
420
421	if dictitems is not None:
422	self._batch_setitems(dictitems)
423
424	if state is not None:
425	save(state)
426	write(BUILD)
427
428	# Methods below this point are dispatched through the dispatch table
429
430	dispatch = {}
431
432	def save_none(self, obj):
433	self.write(NONE)
434	dispatch[NoneType] = save_none
435
436	def save_bool(self, obj):
437	if self.proto >= 2:
438	self.write(obj and NEWTRUE or NEWFALSE)
439	else:
440	self.write(obj and TRUE or FALSE)
441	dispatch[bool] = save_bool
442
443	def save_int(self, obj, pack=struct.pack):
444	if self.bin:
445	# If the int is small enough to fit in a signed 4-byte 2's-comp
446	# format, we can store it more efficiently than the general
447	# case.
448	# First one- and two-byte unsigned ints:
449	if obj >= 0:
450	if obj <= 0xff:
451	self.write(BININT1 + chr(obj))
452	return
453	if obj <= 0xffff:
454	self.write("%c%c%c" % (BININT2, obj&0xff, obj>>8))
455	return
456	# Next check for 4-byte signed ints:
457	high_bits = obj >> 31 # note that Python shift sign-extends
458	if high_bits == 0 or high_bits == -1:
459	# All high bits are copies of bit 2**31, so the value
460	# fits in a 4-byte signed int.
461	self.write(BININT + pack("<i", obj))
462	return
463	# Text pickle, or int too big to fit in signed 4-byte format.
464	self.write(INT + repr(obj) + '\n')
465	dispatch[IntType] = save_int
466
467	def save_long(self, obj, pack=struct.pack):
468	if self.proto >= 2:
469	bytes = encode_long(obj)
470	n = len(bytes)
471	if n < 256:
472	self.write(LONG1 + chr(n) + bytes)
473	else:
474	self.write(LONG4 + pack("<i", n) + bytes)
475	return
476	self.write(LONG + repr(obj) + '\n')
477	dispatch[LongType] = save_long
478
479	def save_float(self, obj, pack=struct.pack):
480	if self.bin:
481	self.write(BINFLOAT + pack('>d', obj))
482	else:
483	self.write(FLOAT + repr(obj) + '\n')
484	dispatch[FloatType] = save_float
485
486	def save_string(self, obj, pack=struct.pack):
487	if self.bin:
488	n = len(obj)
489	if n < 256:
490	self.write(SHORT_BINSTRING + chr(n) + obj)
491	else:
492	self.write(BINSTRING + pack("<i", n) + obj)
493	else:
494	self.write(STRING + repr(obj) + '\n')
495	self.memoize(obj)
496	dispatch[StringType] = save_string
497
498	def save_unicode(self, obj, pack=struct.pack):
499	if self.bin:
500	encoding = obj.encode('utf-8')
501	n = len(encoding)
502	self.write(BINUNICODE + pack("<i", n) + encoding)
503	else:
504	obj = obj.replace("\\", "\\u005c")
505	obj = obj.replace("\n", "\\u000a")
506	self.write(UNICODE + obj.encode('raw-unicode-escape') + '\n')
507	self.memoize(obj)
508	dispatch[UnicodeType] = save_unicode
509
510	if StringType is UnicodeType:
511	# This is true for Jython
512	def save_string(self, obj, pack=struct.pack):
513	unicode = obj.isunicode()
514
515	if self.bin:
516	if unicode:
517	obj = obj.encode("utf-8")
518	l = len(obj)
519	if l < 256 and not unicode:
520	self.write(SHORT_BINSTRING + chr(l) + obj)
521	else:
522	s = pack("<i", l)
523	if unicode:
524	self.write(BINUNICODE + s + obj)
525	else:
526	self.write(BINSTRING + s + obj)
527	else:
528	if unicode:
529	obj = obj.replace("\\", "\\u005c")
530	obj = obj.replace("\n", "\\u000a")
531	obj = obj.encode('raw-unicode-escape')
532	self.write(UNICODE + obj + '\n')
533	else:
534	self.write(STRING + repr(obj) + '\n')
535	self.memoize(obj)
536	dispatch[StringType] = save_string
537
538	def save_tuple(self, obj):
539	write = self.write
540	proto = self.proto
541
542	n = len(obj)
543	if n == 0:
544	if proto:
545	write(EMPTY_TUPLE)
546	else:
547	write(MARK + TUPLE)
548	return
549
550	save = self.save
551	memo = self.memo
552	if n <= 3 and proto >= 2:
553	for element in obj:
554	save(element)
555	# Subtle. Same as in the big comment below.
556	if id(obj) in memo:
557	get = self.get(memo[id(obj)][0])
558	write(POP * n + get)
559	else:
560	write(_tuplesize2code[n])
561	self.memoize(obj)
562	return
563
564	# proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple
565	# has more than 3 elements.
566	write(MARK)
567	for element in obj:
568	save(element)
569
570	if id(obj) in memo:
571	# Subtle. d was not in memo when we entered save_tuple(), so
572	# the process of saving the tuple's elements must have saved
573	# the tuple itself: the tuple is recursive. The proper action
574	# now is to throw away everything we put on the stack, and
575	# simply GET the tuple (it's already constructed). This check
576	# could have been done in the "for element" loop instead, but
577	# recursive tuples are a rare thing.
578	get = self.get(memo[id(obj)][0])
579	if proto:
580	write(POP_MARK + get)
581	else: # proto 0 -- POP_MARK not available
582	write(POP * (n+1) + get)
583	return
584
585	# No recursion.
586	self.write(TUPLE)
587	self.memoize(obj)
588
589	dispatch[TupleType] = save_tuple
590
591	# save_empty_tuple() isn't used by anything in Python 2.3. However, I
592	# found a Pickler subclass in Zope3 that calls it, so it's not harmless
593	# to remove it.
594	def save_empty_tuple(self, obj):
595	self.write(EMPTY_TUPLE)
596
597	def save_list(self, obj):
598	write = self.write
599
600	if self.bin:
601	write(EMPTY_LIST)
602	else: # proto 0 -- can't use EMPTY_LIST
603	write(MARK + LIST)
604
605	self.memoize(obj)
606	self._batch_appends(iter(obj))
607
608	dispatch[ListType] = save_list
609
610	# Keep in synch with cPickle's BATCHSIZE. Nothing will break if it gets
611	# out of synch, though.
612	_BATCHSIZE = 1000
613
614	def _batch_appends(self, items):
615	# Helper to batch up APPENDS sequences
616	save = self.save
617	write = self.write
618
619	if not self.bin:
620	for x in items:
621	save(x)
622	write(APPEND)
623	return
624
625	r = xrange(self._BATCHSIZE)
626	while items is not None:
627	tmp = []
628	for i in r:
629	try:
630	x = items.next()
631	tmp.append(x)
632	except StopIteration:
633	items = None
634	break
635	n = len(tmp)
636	if n > 1:
637	write(MARK)
638	for x in tmp:
639	save(x)
640	write(APPENDS)
641	elif n:
642	save(tmp[0])
643	write(APPEND)
644	# else tmp is empty, and we're done
645
646	def save_dict(self, obj):
647	write = self.write
648
649	if self.bin:
650	write(EMPTY_DICT)
651	else: # proto 0 -- can't use EMPTY_DICT
652	write(MARK + DICT)
653
654	self.memoize(obj)
655	self._batch_setitems(obj.iteritems())
656
657	dispatch[DictionaryType] = save_dict
658	if not PyStringMap is None:
659	dispatch[PyStringMap] = save_dict
660
661	def _batch_setitems(self, items):
662	# Helper to batch up SETITEMS sequences; proto >= 1 only
663	save = self.save
664	write = self.write
665
666	if not self.bin:
667	for k, v in items:
668	save(k)
669	save(v)
670	write(SETITEM)
671	return
672
673	r = xrange(self._BATCHSIZE)
674	while items is not None:
675	tmp = []
676	for i in r:
677	try:
678	tmp.append(items.next())
679	except StopIteration:
680	items = None
681	break
682	n = len(tmp)
683	if n > 1:
684	write(MARK)
685	for k, v in tmp:
686	save(k)
687	save(v)
688	write(SETITEMS)
689	elif n:
690	k, v = tmp[0]
691	save(k)
692	save(v)
693	write(SETITEM)
694	# else tmp is empty, and we're done
695
696	def save_inst(self, obj):
697	cls = obj.__class__
698
699	memo = self.memo
700	write = self.write
701	save = self.save
702
703	if hasattr(obj, '__getinitargs__'):
704	args = obj.__getinitargs__()
705	len(args) # XXX Assert it's a sequence
706	_keep_alive(args, memo)
707	else:
708	args = ()
709
710	write(MARK)
711
712	if self.bin:
713	save(cls)
714	for arg in args:
715	save(arg)
716	write(OBJ)
717	else:
718	for arg in args:
719	save(arg)
720	write(INST + cls.__module__ + '\n' + cls.__name__ + '\n')
721
722	self.memoize(obj)
723
724	try:
725	getstate = obj.__getstate__
726	except AttributeError:
727	stuff = obj.__dict__
728	else:
729	stuff = getstate()
730	_keep_alive(stuff, memo)
731	save(stuff)
732	write(BUILD)
733
734	dispatch[InstanceType] = save_inst
735
736	def save_global(self, obj, name=None, pack=struct.pack):
737	write = self.write
738	memo = self.memo
739
740	if name is None:
741	name = obj.__name__
742
743	module = getattr(obj, "__module__", None)
744	if module is None:
745	module = whichmodule(obj, name)
746
747	try:
748	__import__(module)
749	mod = sys.modules[module]
750	klass = getattr(mod, name)
751	except (ImportError, KeyError, AttributeError):
752	raise PicklingError(
753	"Can't pickle %r: it's not found as %s.%s" %
754	(obj, module, name))
755	else:
756	if klass is not obj:
757	raise PicklingError(
758	"Can't pickle %r: it's not the same object as %s.%s" %
759	(obj, module, name))
760
761	if self.proto >= 2:
762	code = _extension_registry.get((module, name))
763	if code:
764	assert code > 0
765	if code <= 0xff:
766	write(EXT1 + chr(code))
767	elif code <= 0xffff:
768	write("%c%c%c" % (EXT2, code&0xff, code>>8))
769	else:
770	write(EXT4 + pack("<i", code))
771	return
772
773	write(GLOBAL + module + '\n' + name + '\n')
774	self.memoize(obj)
775
776	dispatch[ClassType] = save_global
777	dispatch[FunctionType] = save_global
778	dispatch[BuiltinFunctionType] = save_global
779	dispatch[TypeType] = save_global
780
781	# Pickling helpers
782
783	def _keep_alive(x, memo):
784	"""Keeps a reference to the object x in the memo.
785
786	Because we remember objects by their id, we have
787	to assure that possibly temporary objects are kept
788	alive by referencing them.
789	We store a reference at the id of the memo, which should
790	normally not be used unless someone tries to deepcopy
791	the memo itself...
792	"""
793	try:
794	memo[id(memo)].append(x)
795	except KeyError:
796	# aha, this is the first one :-)
797	memo[id(memo)]=[x]
798
799
800	# A cache for whichmodule(), mapping a function object to the name of
801	# the module in which the function was found.
802
803	classmap = {} # called classmap for backwards compatibility
804
805	def whichmodule(func, funcname):
806	"""Figure out the module in which a function occurs.
807
808	Search sys.modules for the module.
809	Cache in classmap.
810	Return a module name.
811	If the function cannot be found, return "__main__".
812	"""
813	# Python functions should always get an __module__ from their globals.
814	mod = getattr(func, "__module__", None)
815	if mod is not None:
816	return mod
817	if func in classmap:
818	return classmap[func]
819
820	for name, module in sys.modules.items():
821	if module is None:
822	continue # skip dummy package entries
823	if name != '__main__' and getattr(module, funcname, None) is func:
824	break
825	else:
826	name = '__main__'
827	classmap[func] = name
828	return name
829
830
831	# Unpickling machinery
832
833	class Unpickler:
834
835	def __init__(self, file):
836	"""This takes a file-like object for reading a pickle data stream.
837
838	The protocol version of the pickle is detected automatically, so no
839	proto argument is needed.
840
841	The file-like object must have two methods, a read() method that
842	takes an integer argument, and a readline() method that requires no
843	arguments. Both methods should return a string. Thus file-like
844	object can be a file object opened for reading, a StringIO object,
845	or any other custom object that meets this interface.
846	"""
847	self.readline = file.readline
848	self.read = file.read
849	self.memo = {}
850
851	def load(self):
852	"""Read a pickled object representation from the open file.
853
854	Return the reconstituted object hierarchy specified in the file.
855	"""
856	self.mark = object() # any new unique object
857	self.stack = []
858	self.append = self.stack.append
859	read = self.read
860	dispatch = self.dispatch
861	try:
862	while 1:
863	key = read(1)
864	dispatch[key](self)
865	except _Stop, stopinst:
866	return stopinst.value
867
868	# Return largest index k such that self.stack[k] is self.mark.
869	# If the stack doesn't contain a mark, eventually raises IndexError.
870	# This could be sped by maintaining another stack, of indices at which
871	# the mark appears. For that matter, the latter stack would suffice,
872	# and we wouldn't need to push mark objects on self.stack at all.
873	# Doing so is probably a good thing, though, since if the pickle is
874	# corrupt (or hostile) we may get a clue from finding self.mark embedded
875	# in unpickled objects.
876	def marker(self):
877	stack = self.stack
878	mark = self.mark
879	k = len(stack)-1
880	while stack[k] is not mark: k = k-1
881	return k
882
883	dispatch = {}
884
885	def load_eof(self):
886	raise EOFError
887	dispatch[''] = load_eof
888
889	def load_proto(self):
890	proto = ord(self.read(1))
891	if not 0 <= proto <= 2:
892	raise ValueError, "unsupported pickle protocol: %d" % proto
893	dispatch[PROTO] = load_proto
894
895	def load_persid(self):
896	pid = self.readline()[:-1]
897	self.append(self.persistent_load(pid))
898	dispatch[PERSID] = load_persid
899
900	def load_binpersid(self):
901	pid = self.stack.pop()
902	self.append(self.persistent_load(pid))
903	dispatch[BINPERSID] = load_binpersid
904
905	def load_none(self):
906	self.append(None)
907	dispatch[NONE] = load_none
908
909	def load_false(self):
910	self.append(False)
911	dispatch[NEWFALSE] = load_false
912
913	def load_true(self):
914	self.append(True)
915	dispatch[NEWTRUE] = load_true
916
917	def load_int(self):
918	data = self.readline()
919	if data == FALSE[1:]:
920	val = False
921	elif data == TRUE[1:]:
922	val = True
923	else:
924	try:
925	val = int(data)
926	except ValueError:
927	val = long(data)
928	self.append(val)
929	dispatch[INT] = load_int
930
931	def load_binint(self):
932	self.append(mloads('i' + self.read(4)))
933	dispatch[BININT] = load_binint
934
935	def load_binint1(self):
936	self.append(ord(self.read(1)))
937	dispatch[BININT1] = load_binint1
938
939	def load_binint2(self):
940	self.append(mloads('i' + self.read(2) + '\000\000'))
941	dispatch[BININT2] = load_binint2
942
943	def load_long(self):
944	self.append(long(self.readline()[:-1], 0))
945	dispatch[LONG] = load_long
946
947	def load_long1(self):
948	n = ord(self.read(1))
949	bytes = self.read(n)
950	self.append(decode_long(bytes))
951	dispatch[LONG1] = load_long1
952
953	def load_long4(self):
954	n = mloads('i' + self.read(4))
955	bytes = self.read(n)
956	self.append(decode_long(bytes))
957	dispatch[LONG4] = load_long4
958
959	def load_float(self):
960	self.append(float(self.readline()[:-1]))
961	dispatch[FLOAT] = load_float
962
963	def load_binfloat(self, unpack=struct.unpack):
964	self.append(unpack('>d', self.read(8))[0])
965	dispatch[BINFLOAT] = load_binfloat
966
967	def load_string(self):
968	rep = self.readline()[:-1]
969	for q in "\"'": # double or single quote
970	if rep.startswith(q):
971	if len(rep) < 2 or not rep.endswith(q):
972	raise ValueError, "insecure string pickle"
973	rep = rep[len(q):-len(q)]
974	break
975	else:
976	raise ValueError, "insecure string pickle"
977	self.append(rep.decode("string-escape"))
978	dispatch[STRING] = load_string
979
980	def load_binstring(self):
981	len = mloads('i' + self.read(4))
982	self.append(self.read(len))
983	dispatch[BINSTRING] = load_binstring
984
985	def load_unicode(self):
986	self.append(unicode(self.readline()[:-1],'raw-unicode-escape'))
987	dispatch[UNICODE] = load_unicode
988
989	def load_binunicode(self):
990	len = mloads('i' + self.read(4))
991	self.append(unicode(self.read(len),'utf-8'))
992	dispatch[BINUNICODE] = load_binunicode
993
994	def load_short_binstring(self):
995	len = ord(self.read(1))
996	self.append(self.read(len))
997	dispatch[SHORT_BINSTRING] = load_short_binstring
998
999	def load_tuple(self):
1000	k = self.marker()
1001	self.stack[k:] = [tuple(self.stack[k+1:])]
1002	dispatch[TUPLE] = load_tuple
1003
1004	def load_empty_tuple(self):
1005	self.stack.append(())
1006	dispatch[EMPTY_TUPLE] = load_empty_tuple
1007
1008	def load_tuple1(self):
1009	self.stack[-1] = (self.stack[-1],)
1010	dispatch[TUPLE1] = load_tuple1
1011
1012	def load_tuple2(self):
1013	self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
1014	dispatch[TUPLE2] = load_tuple2
1015
1016	def load_tuple3(self):
1017	self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
1018	dispatch[TUPLE3] = load_tuple3
1019
1020	def load_empty_list(self):
1021	self.stack.append([])
1022	dispatch[EMPTY_LIST] = load_empty_list
1023
1024	def load_empty_dictionary(self):
1025	self.stack.append({})
1026	dispatch[EMPTY_DICT] = load_empty_dictionary
1027
1028	def load_list(self):
1029	k = self.marker()
1030	self.stack[k:] = [self.stack[k+1:]]
1031	dispatch[LIST] = load_list
1032
1033	def load_dict(self):
1034	k = self.marker()
1035	d = {}
1036	items = self.stack[k+1:]
1037	for i in range(0, len(items), 2):
1038	key = items[i]
1039	value = items[i+1]
1040	d[key] = value
1041	self.stack[k:] = [d]
1042	dispatch[DICT] = load_dict
1043
1044	# INST and OBJ differ only in how they get a class object. It's not
1045	# only sensible to do the rest in a common routine, the two routines
1046	# previously diverged and grew different bugs.
1047	# klass is the class to instantiate, and k points to the topmost mark
1048	# object, following which are the arguments for klass.__init__.
1049	def _instantiate(self, klass, k):
1050	args = tuple(self.stack[k+1:])
1051	del self.stack[k:]
1052	instantiated = 0
1053	if (not args and
1054	type(klass) is ClassType and
1055	not hasattr(klass, "__getinitargs__")):
1056	try:
1057	value = _EmptyClass()
1058	value.__class__ = klass
1059	instantiated = 1
1060	except RuntimeError:
1061	# In restricted execution, assignment to inst.__class__ is
1062	# prohibited
1063	pass
1064	if not instantiated:
1065	try:
1066	value = klass(*args)
1067	except TypeError, err:
1068	raise TypeError, "in constructor for %s: %s" % (
1069	klass.__name__, str(err)), sys.exc_info()[2]
1070	self.append(value)
1071
1072	def load_inst(self):
1073	module = self.readline()[:-1]
1074	name = self.readline()[:-1]
1075	klass = self.find_class(module, name)
1076	self._instantiate(klass, self.marker())
1077	dispatch[INST] = load_inst
1078
1079	def load_obj(self):
1080	# Stack is ... markobject classobject arg1 arg2 ...
1081	k = self.marker()
1082	klass = self.stack.pop(k+1)
1083	self._instantiate(klass, k)
1084	dispatch[OBJ] = load_obj
1085
1086	def load_newobj(self):
1087	args = self.stack.pop()
1088	cls = self.stack[-1]
1089	obj = cls.__new__(cls, *args)
1090	self.stack[-1] = obj
1091	dispatch[NEWOBJ] = load_newobj
1092
1093	def load_global(self):
1094	module = self.readline()[:-1]
1095	name = self.readline()[:-1]
1096	klass = self.find_class(module, name)
1097	self.append(klass)
1098	dispatch[GLOBAL] = load_global
1099
1100	def load_ext1(self):
1101	code = ord(self.read(1))
1102	self.get_extension(code)
1103	dispatch[EXT1] = load_ext1
1104
1105	def load_ext2(self):
1106	code = mloads('i' + self.read(2) + '\000\000')
1107	self.get_extension(code)
1108	dispatch[EXT2] = load_ext2
1109
1110	def load_ext4(self):
1111	code = mloads('i' + self.read(4))
1112	self.get_extension(code)
1113	dispatch[EXT4] = load_ext4
1114
1115	def get_extension(self, code):
1116	nil = []
1117	obj = _extension_cache.get(code, nil)
1118	if obj is not nil:
1119	self.append(obj)
1120	return
1121	key = _inverted_registry.get(code)
1122	if not key:
1123	raise ValueError("unregistered extension code %d" % code)
1124	obj = self.find_class(*key)
1125	_extension_cache[code] = obj
1126	self.append(obj)
1127
1128	def find_class(self, module, name):
1129	# Subclasses may override this
1130	__import__(module)
1131	mod = sys.modules[module]
1132	klass = getattr(mod, name)
1133	return klass
1134
1135	def load_reduce(self):
1136	stack = self.stack
1137	args = stack.pop()
1138	func = stack[-1]
1139	value = func(*args)
1140	stack[-1] = value
1141	dispatch[REDUCE] = load_reduce
1142
1143	def load_pop(self):
1144	del self.stack[-1]
1145	dispatch[POP] = load_pop
1146
1147	def load_pop_mark(self):
1148	k = self.marker()
1149	del self.stack[k:]
1150	dispatch[POP_MARK] = load_pop_mark
1151
1152	def load_dup(self):
1153	self.append(self.stack[-1])
1154	dispatch[DUP] = load_dup
1155
1156	def load_get(self):
1157	self.append(self.memo[self.readline()[:-1]])
1158	dispatch[GET] = load_get
1159
1160	def load_binget(self):
1161	i = ord(self.read(1))
1162	self.append(self.memo[repr(i)])
1163	dispatch[BINGET] = load_binget
1164
1165	def load_long_binget(self):
1166	i = mloads('i' + self.read(4))
1167	self.append(self.memo[repr(i)])
1168	dispatch[LONG_BINGET] = load_long_binget
1169
1170	def load_put(self):
1171	self.memo[self.readline()[:-1]] = self.stack[-1]
1172	dispatch[PUT] = load_put
1173
1174	def load_binput(self):
1175	i = ord(self.read(1))
1176	self.memo[repr(i)] = self.stack[-1]
1177	dispatch[BINPUT] = load_binput
1178
1179	def load_long_binput(self):
1180	i = mloads('i' + self.read(4))
1181	self.memo[repr(i)] = self.stack[-1]
1182	dispatch[LONG_BINPUT] = load_long_binput
1183
1184	def load_append(self):
1185	stack = self.stack
1186	value = stack.pop()
1187	list = stack[-1]
1188	list.append(value)
1189	dispatch[APPEND] = load_append
1190
1191	def load_appends(self):
1192	stack = self.stack
1193	mark = self.marker()
1194	list = stack[mark - 1]
1195	list.extend(stack[mark + 1:])
1196	del stack[mark:]
1197	dispatch[APPENDS] = load_appends
1198
1199	def load_setitem(self):
1200	stack = self.stack
1201	value = stack.pop()
1202	key = stack.pop()
1203	dict = stack[-1]
1204	dict[key] = value
1205	dispatch[SETITEM] = load_setitem
1206
1207	def load_setitems(self):
1208	stack = self.stack
1209	mark = self.marker()
1210	dict = stack[mark - 1]
1211	for i in range(mark + 1, len(stack), 2):
1212	dict[stack[i]] = stack[i + 1]
1213
1214	del stack[mark:]
1215	dispatch[SETITEMS] = load_setitems
1216
1217	def load_build(self):
1218	stack = self.stack
1219	state = stack.pop()
1220	inst = stack[-1]
1221	setstate = getattr(inst, "__setstate__", None)
1222	if setstate:
1223	setstate(state)
1224	return
1225	slotstate = None
1226	if isinstance(state, tuple) and len(state) == 2:
1227	state, slotstate = state
1228	if state:
1229	try:
1230	d = inst.__dict__
1231	try:
1232	for k, v in state.iteritems():
1233	d[intern(k)] = v
1234	# keys in state don't have to be strings
1235	# don't blow up, but don't go out of our way
1236	except TypeError:
1237	d.update(state)
1238
1239	except RuntimeError:
1240	# XXX In restricted execution, the instance's __dict__
1241	# is not accessible. Use the old way of unpickling
1242	# the instance variables. This is a semantic
1243	# difference when unpickling in restricted
1244	# vs. unrestricted modes.
1245	# Note, however, that cPickle has never tried to do the
1246	# .update() business, and always uses
1247	# PyObject_SetItem(inst.__dict__, key, value) in a
1248	# loop over state.items().
1249	for k, v in state.items():
1250	setattr(inst, k, v)
1251	if slotstate:
1252	for k, v in slotstate.items():
1253	setattr(inst, k, v)
1254	dispatch[BUILD] = load_build
1255
1256	def load_mark(self):
1257	self.append(self.mark)
1258	dispatch[MARK] = load_mark
1259
1260	def load_stop(self):
1261	value = self.stack.pop()
1262	raise _Stop(value)
1263	dispatch[STOP] = load_stop
1264
1265	# Helper class for load_inst/load_obj
1266
1267	class _EmptyClass:
1268	pass
1269
1270	# Encode/decode longs in linear time.
1271
1272	import binascii as _binascii
1273
1274	def encode_long(x):
1275	r"""Encode a long to a two's complement little-endian binary string.
1276	Note that 0L is a special case, returning an empty string, to save a
1277	byte in the LONG1 pickling context.
1278
1279	>>> encode_long(0L)
1280	''
1281	>>> encode_long(255L)
1282	'\xff\x00'
1283	>>> encode_long(32767L)
1284	'\xff\x7f'
1285	>>> encode_long(-256L)
1286	'\x00\xff'
1287	>>> encode_long(-32768L)
1288	'\x00\x80'
1289	>>> encode_long(-128L)
1290	'\x80'
1291	>>> encode_long(127L)
1292	'\x7f'
1293	>>>
1294	"""
1295
1296	if x == 0:
1297	return ''
1298	if x > 0:
1299	ashex = hex(x)
1300	assert ashex.startswith("0x")
1301	njunkchars = 2 + ashex.endswith('L')
1302	nibbles = len(ashex) - njunkchars
1303	if nibbles & 1:
1304	# need an even # of nibbles for unhexlify
1305	ashex = "0x0" + ashex[2:]
1306	elif int(ashex[2], 16) >= 8:
1307	# "looks negative", so need a byte of sign bits
1308	ashex = "0x00" + ashex[2:]
1309	else:
1310	# Build the 256's-complement: (1L << nbytes) + x. The trick is
1311	# to find the number of bytes in linear time (although that should
1312	# really be a constant-time task).
1313	ashex = hex(-x)
1314	assert ashex.startswith("0x")
1315	njunkchars = 2 + ashex.endswith('L')
1316	nibbles = len(ashex) - njunkchars
1317	if nibbles & 1:
1318	# Extend to a full byte.
1319	nibbles += 1
1320	nbits = nibbles * 4
1321	x += 1L << nbits
1322	assert x > 0
1323	ashex = hex(x)
1324	njunkchars = 2 + ashex.endswith('L')
1325	newnibbles = len(ashex) - njunkchars
1326	if newnibbles < nibbles:
1327	ashex = "0x" + "0" * (nibbles - newnibbles) + ashex[2:]
1328	if int(ashex[2], 16) < 8:
1329	# "looks positive", so need a byte of sign bits
1330	ashex = "0xff" + ashex[2:]
1331
1332	if ashex.endswith('L'):
1333	ashex = ashex[2:-1]
1334	else:
1335	ashex = ashex[2:]
1336	assert len(ashex) & 1 == 0, (x, ashex)
1337	binary = _binascii.unhexlify(ashex)
1338	return binary[::-1]
1339
1340	def decode_long(data):
1341	r"""Decode a long from a two's complement little-endian binary string.
1342
1343	>>> decode_long('')
1344	0L
1345	>>> decode_long("\xff\x00")
1346	255L
1347	>>> decode_long("\xff\x7f")
1348	32767L
1349	>>> decode_long("\x00\xff")
1350	-256L
1351	>>> decode_long("\x00\x80")
1352	-32768L
1353	>>> decode_long("\x80")
1354	-128L
1355	>>> decode_long("\x7f")
1356	127L
1357	"""
1358
1359	nbytes = len(data)
1360	if nbytes == 0:
1361	return 0L
1362	ashex = _binascii.hexlify(data[::-1])
1363	n = long(ashex, 16) # quadratic time before Python 2.3; linear now
1364	if data[-1] >= '\x80':
1365	n -= 1L << (nbytes * 8)
1366	return n
1367
1368	# Shorthands
1369
1370	try:
1371	from cStringIO import StringIO
1372	except ImportError:
1373	from StringIO import StringIO
1374
1375	def dump(obj, file, protocol=None):
1376	Pickler(file, protocol).dump(obj)
1377
1378	def dumps(obj, protocol=None):
1379	file = StringIO()
1380	Pickler(file, protocol).dump(obj)
1381	return file.getvalue()
1382
1383	def load(file):
1384	return Unpickler(file).load()
1385
1386	def loads(str):
1387	file = StringIO(str)
1388	return Unpickler(file).load()
1389
1390	# Doctest
1391
1392	def _test():
1393	import doctest
1394	return doctest.testmod()
1395
1396	if __name__ == "__main__":
1397	_test()