comparison 2.00/zipfile27.py @ 29:a8cc383b787c

Clean up zipfiles and diff them to stock ones
author Oleg Oshmyan <chortos@inbox.lv>
date Wed, 24 Nov 2010 23:21:31 +0000
parents 2.00/zipfile2.py@ec6f1a132109
children
comparison
equal deleted inserted replaced
28:3d535503161f 29:a8cc383b787c
1 """
2 Read and write ZIP files.
3 """
4 # Improved by Chortos-2 in 2010 (added bzip2 support)
5 import struct, os, time, sys, shutil
6 import binascii, cStringIO, stat
7 import io
8 import re
9
10 try:
11 import zlib # We may need its compression method
12 crc32 = zlib.crc32
13 except ImportError:
14 zlib = None
15 crc32 = binascii.crc32
16
17 try:
18 import bz2 # We may need its compression method
19 except ImportError:
20 bz2 = None
21
22 __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
23 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile", "ZIP_BZIP2" ]
24
25 class BadZipfile(Exception):
26 pass
27
28
29 class LargeZipFile(Exception):
30 """
31 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
32 and those extensions are disabled.
33 """
34
35 error = BadZipfile # The exception raised by this module
36
37 ZIP64_LIMIT = (1 << 31) - 1
38 ZIP_FILECOUNT_LIMIT = 1 << 16
39 ZIP_MAX_COMMENT = (1 << 16) - 1
40
41 # constants for Zip file compression methods
42 ZIP_STORED = 0
43 ZIP_DEFLATED = 8
44 ZIP_BZIP2 = 12
45 # Other ZIP compression methods not supported
46
47 # Below are some formats and associated data for reading/writing headers using
48 # the struct module. The names and structures of headers/records are those used
49 # in the PKWARE description of the ZIP file format:
50 # http://www.pkware.com/documents/casestudies/APPNOTE.TXT
51 # (URL valid as of January 2008)
52
53 # The "end of central directory" structure, magic number, size, and indices
54 # (section V.I in the format document)
55 structEndArchive = "<4s4H2LH"
56 stringEndArchive = "PK\005\006"
57 sizeEndCentDir = struct.calcsize(structEndArchive)
58
59 _ECD_SIGNATURE = 0
60 _ECD_DISK_NUMBER = 1
61 _ECD_DISK_START = 2
62 _ECD_ENTRIES_THIS_DISK = 3
63 _ECD_ENTRIES_TOTAL = 4
64 _ECD_SIZE = 5
65 _ECD_OFFSET = 6
66 _ECD_COMMENT_SIZE = 7
67 # These last two indices are not part of the structure as defined in the
68 # spec, but they are used internally by this module as a convenience
69 _ECD_COMMENT = 8
70 _ECD_LOCATION = 9
71
72 # The "central directory" structure, magic number, size, and indices
73 # of entries in the structure (section V.F in the format document)
74 structCentralDir = "<4s4B4HL2L5H2L"
75 stringCentralDir = "PK\001\002"
76 sizeCentralDir = struct.calcsize(structCentralDir)
77
78 # indexes of entries in the central directory structure
79 _CD_SIGNATURE = 0
80 _CD_CREATE_VERSION = 1
81 _CD_CREATE_SYSTEM = 2
82 _CD_EXTRACT_VERSION = 3
83 _CD_EXTRACT_SYSTEM = 4
84 _CD_FLAG_BITS = 5
85 _CD_COMPRESS_TYPE = 6
86 _CD_TIME = 7
87 _CD_DATE = 8
88 _CD_CRC = 9
89 _CD_COMPRESSED_SIZE = 10
90 _CD_UNCOMPRESSED_SIZE = 11
91 _CD_FILENAME_LENGTH = 12
92 _CD_EXTRA_FIELD_LENGTH = 13
93 _CD_COMMENT_LENGTH = 14
94 _CD_DISK_NUMBER_START = 15
95 _CD_INTERNAL_FILE_ATTRIBUTES = 16
96 _CD_EXTERNAL_FILE_ATTRIBUTES = 17
97 _CD_LOCAL_HEADER_OFFSET = 18
98
99 # The "local file header" structure, magic number, size, and indices
100 # (section V.A in the format document)
101 structFileHeader = "<4s2B4HL2L2H"
102 stringFileHeader = "PK\003\004"
103 sizeFileHeader = struct.calcsize(structFileHeader)
104
105 _FH_SIGNATURE = 0
106 _FH_EXTRACT_VERSION = 1
107 _FH_EXTRACT_SYSTEM = 2
108 _FH_GENERAL_PURPOSE_FLAG_BITS = 3
109 _FH_COMPRESSION_METHOD = 4
110 _FH_LAST_MOD_TIME = 5
111 _FH_LAST_MOD_DATE = 6
112 _FH_CRC = 7
113 _FH_COMPRESSED_SIZE = 8
114 _FH_UNCOMPRESSED_SIZE = 9
115 _FH_FILENAME_LENGTH = 10
116 _FH_EXTRA_FIELD_LENGTH = 11
117
118 # The "Zip64 end of central directory locator" structure, magic number, and size
119 structEndArchive64Locator = "<4sLQL"
120 stringEndArchive64Locator = "PK\x06\x07"
121 sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
122
123 # The "Zip64 end of central directory" record, magic number, size, and indices
124 # (section V.G in the format document)
125 structEndArchive64 = "<4sQ2H2L4Q"
126 stringEndArchive64 = "PK\x06\x06"
127 sizeEndCentDir64 = struct.calcsize(structEndArchive64)
128
129 _CD64_SIGNATURE = 0
130 _CD64_DIRECTORY_RECSIZE = 1
131 _CD64_CREATE_VERSION = 2
132 _CD64_EXTRACT_VERSION = 3
133 _CD64_DISK_NUMBER = 4
134 _CD64_DISK_NUMBER_START = 5
135 _CD64_NUMBER_ENTRIES_THIS_DISK = 6
136 _CD64_NUMBER_ENTRIES_TOTAL = 7
137 _CD64_DIRECTORY_SIZE = 8
138 _CD64_OFFSET_START_CENTDIR = 9
139
140 def _check_zipfile(fp):
141 try:
142 if _EndRecData(fp):
143 return True # file has correct magic number
144 except IOError:
145 pass
146 return False
147
148 def is_zipfile(filename):
149 """Quickly see if a file is a ZIP file by checking the magic number.
150
151 The filename argument may be a file or file-like object too.
152 """
153 result = False
154 try:
155 if hasattr(filename, "read"):
156 result = _check_zipfile(fp=filename)
157 else:
158 with open(filename, "rb") as fp:
159 result = _check_zipfile(fp)
160 except IOError:
161 pass
162 return result
163
164 def _EndRecData64(fpin, offset, endrec):
165 """
166 Read the ZIP64 end-of-archive records and use that to update endrec
167 """
168 fpin.seek(offset - sizeEndCentDir64Locator, 2)
169 data = fpin.read(sizeEndCentDir64Locator)
170 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
171 if sig != stringEndArchive64Locator:
172 return endrec
173
174 if diskno != 0 or disks != 1:
175 raise BadZipfile("zipfiles that span multiple disks are not supported")
176
177 # Assume no 'zip64 extensible data'
178 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
179 data = fpin.read(sizeEndCentDir64)
180 sig, sz, create_version, read_version, disk_num, disk_dir, \
181 dircount, dircount2, dirsize, diroffset = \
182 struct.unpack(structEndArchive64, data)
183 if sig != stringEndArchive64:
184 return endrec
185
186 # Update the original endrec using data from the ZIP64 record
187 endrec[_ECD_SIGNATURE] = sig
188 endrec[_ECD_DISK_NUMBER] = disk_num
189 endrec[_ECD_DISK_START] = disk_dir
190 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
191 endrec[_ECD_ENTRIES_TOTAL] = dircount2
192 endrec[_ECD_SIZE] = dirsize
193 endrec[_ECD_OFFSET] = diroffset
194 return endrec
195
196
197 def _EndRecData(fpin):
198 """Return data from the "End of Central Directory" record, or None.
199
200 The data is a list of the nine items in the ZIP "End of central dir"
201 record followed by a tenth item, the file seek offset of this record."""
202
203 # Determine file size
204 fpin.seek(0, 2)
205 filesize = fpin.tell()
206
207 # Check to see if this is ZIP file with no archive comment (the
208 # "end of central directory" structure should be the last item in the
209 # file if this is the case).
210 try:
211 fpin.seek(-sizeEndCentDir, 2)
212 except IOError:
213 return None
214 data = fpin.read()
215 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
216 # the signature is correct and there's no comment, unpack structure
217 endrec = struct.unpack(structEndArchive, data)
218 endrec=list(endrec)
219
220 # Append a blank comment and record start offset
221 endrec.append("")
222 endrec.append(filesize - sizeEndCentDir)
223
224 # Try to read the "Zip64 end of central directory" structure
225 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
226
227 # Either this is not a ZIP file, or it is a ZIP file with an archive
228 # comment. Search the end of the file for the "end of central directory"
229 # record signature. The comment is the last item in the ZIP file and may be
230 # up to 64K long. It is assumed that the "end of central directory" magic
231 # number does not appear in the comment.
232 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
233 fpin.seek(maxCommentStart, 0)
234 data = fpin.read()
235 start = data.rfind(stringEndArchive)
236 if start >= 0:
237 # found the magic number; attempt to unpack and interpret
238 recData = data[start:start+sizeEndCentDir]
239 endrec = list(struct.unpack(structEndArchive, recData))
240 comment = data[start+sizeEndCentDir:]
241 # check that comment length is correct
242 if endrec[_ECD_COMMENT_SIZE] == len(comment):
243 # Append the archive comment and start offset
244 endrec.append(comment)
245 endrec.append(maxCommentStart + start)
246
247 # Try to read the "Zip64 end of central directory" structure
248 return _EndRecData64(fpin, maxCommentStart + start - filesize,
249 endrec)
250
251 # Unable to find a valid end of central directory structure
252 return
253
254
255 class ZipInfo (object):
256 """Class with attributes describing each file in the ZIP archive."""
257
258 __slots__ = (
259 'orig_filename',
260 'filename',
261 'date_time',
262 'compress_type',
263 'comment',
264 'extra',
265 'create_system',
266 'create_version',
267 'extract_version',
268 'reserved',
269 'flag_bits',
270 'volume',
271 'internal_attr',
272 'external_attr',
273 'header_offset',
274 'CRC',
275 'compress_size',
276 'file_size',
277 '_raw_time',
278 )
279
280 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
281 self.orig_filename = filename # Original file name in archive
282
283 # Terminate the file name at the first null byte. Null bytes in file
284 # names are used as tricks by viruses in archives.
285 null_byte = filename.find(chr(0))
286 if null_byte >= 0:
287 filename = filename[0:null_byte]
288 # This is used to ensure paths in generated ZIP files always use
289 # forward slashes as the directory separator, as required by the
290 # ZIP format specification.
291 if os.sep != "/" and os.sep in filename:
292 filename = filename.replace(os.sep, "/")
293
294 self.filename = filename # Normalized file name
295 self.date_time = date_time # year, month, day, hour, min, sec
296 # Standard values:
297 self.compress_type = ZIP_STORED # Type of compression for the file
298 self.comment = "" # Comment for each file
299 self.extra = "" # ZIP extra data
300 if sys.platform == 'win32':
301 self.create_system = 0 # System which created ZIP archive
302 else:
303 # Assume everything else is unix-y
304 self.create_system = 3 # System which created ZIP archive
305 self.create_version = 20 # Version which created ZIP archive
306 self.extract_version = 20 # Version needed to extract archive
307 self.reserved = 0 # Must be zero
308 self.flag_bits = 0 # ZIP flag bits
309 self.volume = 0 # Volume number of file header
310 self.internal_attr = 0 # Internal attributes
311 self.external_attr = 0 # External file attributes
312 # Other attributes are set by class ZipFile:
313 # header_offset Byte offset to the file header
314 # CRC CRC-32 of the uncompressed file
315 # compress_size Size of the compressed file
316 # file_size Size of the uncompressed file
317
318 def FileHeader(self):
319 """Return the per-file header as a string."""
320 dt = self.date_time
321 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
322 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
323 if self.flag_bits & 0x08:
324 # Set these to zero because we write them after the file data
325 CRC = compress_size = file_size = 0
326 else:
327 CRC = self.CRC
328 compress_size = self.compress_size
329 file_size = self.file_size
330
331 extra = self.extra
332
333 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
334 # File is larger than what fits into a 4 byte integer,
335 # fall back to the ZIP64 extension
336 fmt = '<HHQQ'
337 extra = extra + struct.pack(fmt,
338 1, struct.calcsize(fmt)-4, file_size, compress_size)
339 file_size = 0xffffffff
340 compress_size = 0xffffffff
341 self.extract_version = max(45, self.extract_version)
342 self.create_version = max(45, self.extract_version)
343
344 filename, flag_bits = self._encodeFilenameFlags()
345 header = struct.pack(structFileHeader, stringFileHeader,
346 self.extract_version, self.reserved, flag_bits,
347 self.compress_type, dostime, dosdate, CRC,
348 compress_size, file_size,
349 len(filename), len(extra))
350 return header + filename + extra
351
352 def _encodeFilenameFlags(self):
353 if isinstance(self.filename, unicode):
354 try:
355 return self.filename.encode('ascii'), self.flag_bits
356 except UnicodeEncodeError:
357 return self.filename.encode('utf-8'), self.flag_bits | 0x800
358 else:
359 return self.filename, self.flag_bits
360
361 def _decodeFilename(self):
362 if self.flag_bits & 0x800:
363 return self.filename.decode('utf-8')
364 else:
365 return self.filename
366
367 def _decodeExtra(self):
368 # Try to decode the extra field.
369 extra = self.extra
370 unpack = struct.unpack
371 while extra:
372 tp, ln = unpack('<HH', extra[:4])
373 if tp == 1:
374 if ln >= 24:
375 counts = unpack('<QQQ', extra[4:28])
376 elif ln == 16:
377 counts = unpack('<QQ', extra[4:20])
378 elif ln == 8:
379 counts = unpack('<Q', extra[4:12])
380 elif ln == 0:
381 counts = ()
382 else:
383 raise RuntimeError, "Corrupt extra field %s"%(ln,)
384
385 idx = 0
386
387 # ZIP64 extension (large files and/or large archives)
388 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
389 self.file_size = counts[idx]
390 idx += 1
391
392 if self.compress_size == 0xFFFFFFFFL:
393 self.compress_size = counts[idx]
394 idx += 1
395
396 if self.header_offset == 0xffffffffL:
397 old = self.header_offset
398 self.header_offset = counts[idx]
399 idx+=1
400
401 extra = extra[ln+4:]
402
403
404 class _ZipDecrypter:
405 """Class to handle decryption of files stored within a ZIP archive.
406
407 ZIP supports a password-based form of encryption. Even though known
408 plaintext attacks have been found against it, it is still useful
409 to be able to get data out of such a file.
410
411 Usage:
412 zd = _ZipDecrypter(mypwd)
413 plain_char = zd(cypher_char)
414 plain_text = map(zd, cypher_text)
415 """
416
417 def _GenerateCRCTable():
418 """Generate a CRC-32 table.
419
420 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
421 internal keys. We noticed that a direct implementation is faster than
422 relying on binascii.crc32().
423 """
424 poly = 0xedb88320
425 table = [0] * 256
426 for i in range(256):
427 crc = i
428 for j in range(8):
429 if crc & 1:
430 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
431 else:
432 crc = ((crc >> 1) & 0x7FFFFFFF)
433 table[i] = crc
434 return table
435 crctable = _GenerateCRCTable()
436
437 def _crc32(self, ch, crc):
438 """Compute the CRC32 primitive on one byte."""
439 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
440
441 def __init__(self, pwd):
442 self.key0 = 305419896
443 self.key1 = 591751049
444 self.key2 = 878082192
445 for p in pwd:
446 self._UpdateKeys(p)
447
448 def _UpdateKeys(self, c):
449 self.key0 = self._crc32(c, self.key0)
450 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
451 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
452 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
453
454 def __call__(self, c):
455 """Decrypt a single character."""
456 c = ord(c)
457 k = self.key2 | 2
458 c = c ^ (((k * (k^1)) >> 8) & 255)
459 c = chr(c)
460 self._UpdateKeys(c)
461 return c
462
463 class ZipExtFile(io.BufferedIOBase):
464 """File-like object for reading an archive member.
465 Is returned by ZipFile.open().
466 """
467
468 # Max size supported by decompressor.
469 MAX_N = 1 << 31 - 1
470
471 # Read from compressed files in 4k blocks.
472 MIN_READ_SIZE = 4096
473
474 # Search for universal newlines or line chunks.
475 PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
476
477 def __init__(self, fileobj, mode, zipinfo, decrypter=None):
478 self._fileobj = fileobj
479 self._decrypter = decrypter
480
481 self._compress_type = zipinfo.compress_type
482 self._compress_size = zipinfo.compress_size
483 self._compress_left = zipinfo.compress_size
484
485 if self._compress_type == ZIP_DEFLATED:
486 self._decompressor = zlib.decompressobj(-15)
487 elif self._compress_type == ZIP_BZIP2:
488 self._decompressor = bz2.BZ2Decompressor()
489 self.MIN_READ_SIZE = 900000
490 self._unconsumed = ''
491
492 self._readbuffer = ''
493 self._offset = 0
494
495 self._universal = 'U' in mode
496 self.newlines = None
497
498 # Adjust read size for encrypted files since the first 12 bytes
499 # are for the encryption/password information.
500 if self._decrypter is not None:
501 self._compress_left -= 12
502
503 self.mode = mode
504 self.name = zipinfo.filename
505
506 if hasattr(zipinfo, 'CRC'):
507 self._expected_crc = zipinfo.CRC
508 self._running_crc = crc32(b'') & 0xffffffff
509 else:
510 self._expected_crc = None
511
512 def readline(self, limit=-1):
513 """Read and return a line from the stream.
514
515 If limit is specified, at most limit bytes will be read.
516 """
517
518 if not self._universal and limit < 0:
519 # Shortcut common case - newline found in buffer.
520 i = self._readbuffer.find('\n', self._offset) + 1
521 if i > 0:
522 line = self._readbuffer[self._offset: i]
523 self._offset = i
524 return line
525
526 if not self._universal:
527 return io.BufferedIOBase.readline(self, limit)
528
529 line = ''
530 while limit < 0 or len(line) < limit:
531 readahead = self.peek(2)
532 if readahead == '':
533 return line
534
535 #
536 # Search for universal newlines or line chunks.
537 #
538 # The pattern returns either a line chunk or a newline, but not
539 # both. Combined with peek(2), we are assured that the sequence
540 # '\r\n' is always retrieved completely and never split into
541 # separate newlines - '\r', '\n' due to coincidental readaheads.
542 #
543 match = self.PATTERN.search(readahead)
544 newline = match.group('newline')
545 if newline is not None:
546 if self.newlines is None:
547 self.newlines = []
548 if newline not in self.newlines:
549 self.newlines.append(newline)
550 self._offset += len(newline)
551 return line + '\n'
552
553 chunk = match.group('chunk')
554 if limit >= 0:
555 chunk = chunk[: limit - len(line)]
556
557 self._offset += len(chunk)
558 line += chunk
559
560 return line
561
562 def peek(self, n=1):
563 """Returns buffered bytes without advancing the position."""
564 if n > len(self._readbuffer) - self._offset:
565 chunk = self.read(n)
566 self._offset -= len(chunk)
567
568 # Return up to 512 bytes to reduce allocation overhead for tight loops.
569 return self._readbuffer[self._offset: self._offset + 512]
570
571 def readable(self):
572 return True
573
574 def read(self, n=-1):
575 """Read and return up to n bytes.
576 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
577 """
578 buf = ''
579 if n is None:
580 n = -1
581 while True:
582 if n < 0:
583 data = self.read1(n)
584 elif n > len(buf):
585 data = self.read1(n - len(buf))
586 else:
587 return buf
588 if len(data) == 0:
589 return buf
590 buf += data
591
592 def _update_crc(self, newdata, eof):
593 # Update the CRC using the given data.
594 if self._expected_crc is None:
595 # No need to compute the CRC if we don't have a reference value
596 return
597 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
598 # Check the CRC if we're at the end of the file
599 if eof and self._running_crc != self._expected_crc:
600 raise BadZipfile("Bad CRC-32 for file %r" % self.name)
601
602 def read1(self, n):
603 """Read up to n bytes with at most one read() system call."""
604
605 # Simplify algorithm (branching) by transforming negative n to large n.
606 if n < 0 or n is None:
607 n = self.MAX_N
608
609 # Bytes available in read buffer.
610 len_readbuffer = len(self._readbuffer) - self._offset
611
612 # Read from file.
613 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
614 nbytes = n - len_readbuffer - len(self._unconsumed)
615 nbytes = max(nbytes, self.MIN_READ_SIZE)
616 nbytes = min(nbytes, self._compress_left)
617
618 data = self._fileobj.read(nbytes)
619 self._compress_left -= len(data)
620
621 if data and self._decrypter is not None:
622 data = ''.join(map(self._decrypter, data))
623
624 if self._compress_type == ZIP_STORED:
625 self._update_crc(data, eof=(self._compress_left==0))
626 self._readbuffer = self._readbuffer[self._offset:] + data
627 self._offset = 0
628 else:
629 # Prepare deflated bytes for decompression.
630 self._unconsumed += data
631
632 # Handle unconsumed data.
633 if (len(self._unconsumed) > 0 and n > len_readbuffer and
634 self._compress_type == ZIP_DEFLATED):
635 data = self._decompressor.decompress(
636 self._unconsumed,
637 max(n - len_readbuffer, self.MIN_READ_SIZE)
638 )
639
640 self._unconsumed = self._decompressor.unconsumed_tail
641 eof = len(self._unconsumed) == 0 and self._compress_left == 0
642 if eof:
643 data += self._decompressor.flush()
644
645 self._update_crc(data, eof=eof)
646 self._readbuffer = self._readbuffer[self._offset:] + data
647 self._offset = 0
648 elif (len(self._unconsumed) > 0 and n > len_readbuffer and
649 self._compress_type == ZIP_BZIP2):
650 data = self._decompressor.decompress(self._unconsumed)
651
652 self._unconsumed = ''
653 self._readbuffer = self._readbuffer[self._offset:] + data
654 self._offset = 0
655
656 # Read from buffer.
657 data = self._readbuffer[self._offset: self._offset + n]
658 self._offset += len(data)
659 return data
660
661
662
663 class ZipFile:
664 """ Class with methods to open, read, write, close, list zip files.
665
666 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
667
668 file: Either the path to the file, or a file-like object.
669 If it is a path, the file will be opened and closed by ZipFile.
670 mode: The mode can be either read "r", write "w" or append "a".
671 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
672 or ZIP_BZIP2 (requires bz2).
673 allowZip64: if True ZipFile will create files with ZIP64 extensions when
674 needed, otherwise it will raise an exception when this would
675 be necessary.
676
677 """
678
679 fp = None # Set here since __del__ checks it
680
681 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
682 """Open the ZIP file with mode read "r", write "w" or append "a"."""
683 if mode not in ("r", "w", "a"):
684 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
685
686 if compression == ZIP_STORED:
687 pass
688 elif compression == ZIP_DEFLATED:
689 if not zlib:
690 raise RuntimeError,\
691 "Compression requires the (missing) zlib module"
692 elif compression == ZIP_BZIP2:
693 if not bz2:
694 raise RuntimeError,\
695 "Compression requires the (missing) bz2 module"
696 else:
697 raise RuntimeError, "That compression method is not supported"
698
699 self._allowZip64 = allowZip64
700 self._didModify = False
701 self.debug = 0 # Level of printing: 0 through 3
702 self.NameToInfo = {} # Find file info given name
703 self.filelist = [] # List of ZipInfo instances for archive
704 self.compression = compression # Method of compression
705 self.mode = key = mode.replace('b', '')[0]
706 self.pwd = None
707 self.comment = ''
708
709 # Check if we were passed a file-like object
710 if isinstance(file, basestring):
711 self._filePassed = 0
712 self.filename = file
713 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
714 try:
715 self.fp = open(file, modeDict[mode])
716 except IOError:
717 if mode == 'a':
718 mode = key = 'w'
719 self.fp = open(file, modeDict[mode])
720 else:
721 raise
722 else:
723 self._filePassed = 1
724 self.fp = file
725 self.filename = getattr(file, 'name', None)
726
727 if key == 'r':
728 self._GetContents()
729 elif key == 'w':
730 pass
731 elif key == 'a':
732 try: # See if file is a zip file
733 self._RealGetContents()
734 # seek to start of directory and overwrite
735 self.fp.seek(self.start_dir, 0)
736 except BadZipfile: # file is not a zip file, just append
737 self.fp.seek(0, 2)
738 else:
739 if not self._filePassed:
740 self.fp.close()
741 self.fp = None
742 raise RuntimeError, 'Mode must be "r", "w" or "a"'
743
744 def __enter__(self):
745 return self
746
747 def __exit__(self, type, value, traceback):
748 self.close()
749
750 def _GetContents(self):
751 """Read the directory, making sure we close the file if the format
752 is bad."""
753 try:
754 self._RealGetContents()
755 except BadZipfile:
756 if not self._filePassed:
757 self.fp.close()
758 self.fp = None
759 raise
760
761 def _RealGetContents(self):
762 """Read in the table of contents for the ZIP file."""
763 fp = self.fp
764 endrec = _EndRecData(fp)
765 if not endrec:
766 raise BadZipfile, "File is not a zip file"
767 if self.debug > 1:
768 print endrec
769 size_cd = endrec[_ECD_SIZE] # bytes in central directory
770 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
771 self.comment = endrec[_ECD_COMMENT] # archive comment
772
773 # "concat" is zero, unless zip was concatenated to another file
774 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
775 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
776 # If Zip64 extension structures are present, account for them
777 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
778
779 if self.debug > 2:
780 inferred = concat + offset_cd
781 print "given, inferred, offset", offset_cd, inferred, concat
782 # self.start_dir: Position of start of central directory
783 self.start_dir = offset_cd + concat
784 fp.seek(self.start_dir, 0)
785 data = fp.read(size_cd)
786 fp = cStringIO.StringIO(data)
787 total = 0
788 while total < size_cd:
789 centdir = fp.read(sizeCentralDir)
790 if centdir[0:4] != stringCentralDir:
791 raise BadZipfile, "Bad magic number for central directory"
792 centdir = struct.unpack(structCentralDir, centdir)
793 if self.debug > 2:
794 print centdir
795 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
796 # Create ZipInfo instance to store file information
797 x = ZipInfo(filename)
798 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
799 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
800 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
801 (x.create_version, x.create_system, x.extract_version, x.reserved,
802 x.flag_bits, x.compress_type, t, d,
803 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
804 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
805 # Convert date/time code to (year, month, day, hour, min, sec)
806 x._raw_time = t
807 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
808 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
809
810 x._decodeExtra()
811 x.header_offset = x.header_offset + concat
812 x.filename = x._decodeFilename()
813 self.filelist.append(x)
814 self.NameToInfo[x.filename] = x
815
816 # update total bytes read from central directory
817 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
818 + centdir[_CD_EXTRA_FIELD_LENGTH]
819 + centdir[_CD_COMMENT_LENGTH])
820
821 if self.debug > 2:
822 print "total", total
823
824
825 def namelist(self):
826 """Return a list of file names in the archive."""
827 l = []
828 for data in self.filelist:
829 l.append(data.filename)
830 return l
831
832 def infolist(self):
833 """Return a list of class ZipInfo instances for files in the
834 archive."""
835 return self.filelist
836
837 def printdir(self):
838 """Print a table of contents for the zip file."""
839 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
840 for zinfo in self.filelist:
841 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
842 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
843
844 def testzip(self):
845 """Read all the files and check the CRC."""
846 chunk_size = 2 ** 20
847 for zinfo in self.filelist:
848 try:
849 # Read by chunks, to avoid an OverflowError or a
850 # MemoryError with very large embedded files.
851 f = self.open(zinfo.filename, "r")
852 while f.read(chunk_size): # Check CRC-32
853 pass
854 except BadZipfile:
855 return zinfo.filename
856
857 def getinfo(self, name):
858 """Return the instance of ZipInfo given 'name'."""
859 info = self.NameToInfo.get(name)
860 if info is None:
861 raise KeyError(
862 'There is no item named %r in the archive' % name)
863
864 return info
865
866 def setpassword(self, pwd):
867 """Set default password for encrypted files."""
868 self.pwd = pwd
869
870 def read(self, name, pwd=None):
871 """Return file bytes (as a string) for name."""
872 return self.open(name, "r", pwd).read()
873
874 def open(self, name, mode="r", pwd=None):
875 """Return file-like object for 'name'."""
876 if mode not in ("r", "U", "rU"):
877 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
878 if not self.fp:
879 raise RuntimeError, \
880 "Attempt to read ZIP archive that was already closed"
881
882 # Only open a new file for instances where we were not
883 # given a file object in the constructor
884 if self._filePassed:
885 zef_file = self.fp
886 else:
887 zef_file = open(self.filename, 'rb')
888
889 # Make sure we have an info object
890 if isinstance(name, ZipInfo):
891 # 'name' is already an info object
892 zinfo = name
893 else:
894 # Get info object for name
895 zinfo = self.getinfo(name)
896
897 zef_file.seek(zinfo.header_offset, 0)
898
899 # Skip the file header:
900 fheader = zef_file.read(sizeFileHeader)
901 if fheader[0:4] != stringFileHeader:
902 raise BadZipfile, "Bad magic number for file header"
903
904 fheader = struct.unpack(structFileHeader, fheader)
905 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
906 if fheader[_FH_EXTRA_FIELD_LENGTH]:
907 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
908
909 if fname != zinfo.orig_filename:
910 raise BadZipfile, \
911 'File name in directory "%s" and header "%s" differ.' % (
912 zinfo.orig_filename, fname)
913
914 # check for encrypted flag & handle password
915 is_encrypted = zinfo.flag_bits & 0x1
916 zd = None
917 if is_encrypted:
918 if not pwd:
919 pwd = self.pwd
920 if not pwd:
921 raise RuntimeError, "File %s is encrypted, " \
922 "password required for extraction" % name
923
924 zd = _ZipDecrypter(pwd)
925 # The first 12 bytes in the cypher stream is an encryption header
926 # used to strengthen the algorithm. The first 11 bytes are
927 # completely random, while the 12th contains the MSB of the CRC,
928 # or the MSB of the file time depending on the header type
929 # and is used to check the correctness of the password.
930 bytes = zef_file.read(12)
931 h = map(zd, bytes[0:12])
932 if zinfo.flag_bits & 0x8:
933 # compare against the file type from extended local headers
934 check_byte = (zinfo._raw_time >> 8) & 0xff
935 else:
936 # compare against the CRC otherwise
937 check_byte = (zinfo.CRC >> 24) & 0xff
938 if ord(h[11]) != check_byte:
939 raise RuntimeError("Bad password for file", name)
940
941 return ZipExtFile(zef_file, mode, zinfo, zd)
942
943 def extract(self, member, path=None, pwd=None):
944 """Extract a member from the archive to the current working directory,
945 using its full name. Its file information is extracted as accurately
946 as possible. `member' may be a filename or a ZipInfo object. You can
947 specify a different directory using `path'.
948 """
949 if not isinstance(member, ZipInfo):
950 member = self.getinfo(member)
951
952 if path is None:
953 path = os.getcwd()
954
955 return self._extract_member(member, path, pwd)
956
957 def extractall(self, path=None, members=None, pwd=None):
958 """Extract all members from the archive to the current working
959 directory. `path' specifies a different directory to extract to.
960 `members' is optional and must be a subset of the list returned
961 by namelist().
962 """
963 if members is None:
964 members = self.namelist()
965
966 for zipinfo in members:
967 self.extract(zipinfo, path, pwd)
968
969 def _extract_member(self, member, targetpath, pwd):
970 """Extract the ZipInfo object 'member' to a physical
971 file on the path targetpath.
972 """
973 # build the destination pathname, replacing
974 # forward slashes to platform specific separators.
975 # Strip trailing path separator, unless it represents the root.
976 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
977 and len(os.path.splitdrive(targetpath)[1]) > 1):
978 targetpath = targetpath[:-1]
979
980 # don't include leading "/" from file name if present
981 if member.filename[0] == '/':
982 targetpath = os.path.join(targetpath, member.filename[1:])
983 else:
984 targetpath = os.path.join(targetpath, member.filename)
985
986 targetpath = os.path.normpath(targetpath)
987
988 # Create all upper directories if necessary.
989 upperdirs = os.path.dirname(targetpath)
990 if upperdirs and not os.path.exists(upperdirs):
991 os.makedirs(upperdirs)
992
993 if member.filename[-1] == '/':
994 if not os.path.isdir(targetpath):
995 os.mkdir(targetpath)
996 return targetpath
997
998 source = self.open(member, pwd=pwd)
999 target = file(targetpath, "wb")
1000 shutil.copyfileobj(source, target)
1001 source.close()
1002 target.close()
1003
1004 return targetpath
1005
1006 def _writecheck(self, zinfo):
1007 """Check for errors before writing a file to the archive."""
1008 if zinfo.filename in self.NameToInfo:
1009 if self.debug: # Warning for duplicate names
1010 print "Duplicate name:", zinfo.filename
1011 if self.mode not in ("w", "a"):
1012 raise RuntimeError, 'write() requires mode "w" or "a"'
1013 if not self.fp:
1014 raise RuntimeError, \
1015 "Attempt to write ZIP archive that was already closed"
1016 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
1017 raise RuntimeError, \
1018 "Compression requires the (missing) zlib module"
1019 if zinfo.compress_type == ZIP_BZIP2 and not bz2:
1020 raise RuntimeError, \
1021 "Compression requires the (missing) bz2 module"
1022 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED, ZIP_BZIP2):
1023 raise RuntimeError, \
1024 "That compression method is not supported"
1025 if zinfo.file_size > ZIP64_LIMIT:
1026 if not self._allowZip64:
1027 raise LargeZipFile("Filesize would require ZIP64 extensions")
1028 if zinfo.header_offset > ZIP64_LIMIT:
1029 if not self._allowZip64:
1030 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
1031
1032 def write(self, filename, arcname=None, compress_type=None):
1033 """Put the bytes from filename into the archive under the name
1034 arcname."""
1035 if not self.fp:
1036 raise RuntimeError(
1037 "Attempt to write to ZIP archive that was already closed")
1038
1039 st = os.stat(filename)
1040 isdir = stat.S_ISDIR(st.st_mode)
1041 mtime = time.localtime(st.st_mtime)
1042 date_time = mtime[0:6]
1043 # Create ZipInfo instance to store file information
1044 if arcname is None:
1045 arcname = filename
1046 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1047 while arcname[0] in (os.sep, os.altsep):
1048 arcname = arcname[1:]
1049 if isdir:
1050 arcname += '/'
1051 zinfo = ZipInfo(arcname, date_time)
1052 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
1053 if compress_type is None:
1054 zinfo.compress_type = self.compression
1055 else:
1056 zinfo.compress_type = compress_type
1057
1058 zinfo.file_size = st.st_size
1059 zinfo.flag_bits = 0x00
1060 zinfo.header_offset = self.fp.tell() # Start of header bytes
1061
1062 self._writecheck(zinfo)
1063 self._didModify = True
1064
1065 if isdir:
1066 zinfo.file_size = 0
1067 zinfo.compress_size = 0
1068 zinfo.CRC = 0
1069 self.filelist.append(zinfo)
1070 self.NameToInfo[zinfo.filename] = zinfo
1071 self.fp.write(zinfo.FileHeader())
1072 return
1073
1074 with open(filename, "rb") as fp:
1075 # Must overwrite CRC and sizes with correct data later
1076 zinfo.CRC = CRC = 0
1077 zinfo.compress_size = compress_size = 0
1078 zinfo.file_size = file_size = 0
1079 self.fp.write(zinfo.FileHeader())
1080 if zinfo.compress_type == ZIP_DEFLATED:
1081 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1082 zlib.DEFLATED, -15)
1083 elif zinfo.compress_type == ZIP_BZIP2:
1084 cmpr = bz2.BZ2Compressor()
1085 else:
1086 cmpr = None
1087 while 1:
1088 buf = fp.read(1024 * 8)
1089 if not buf:
1090 break
1091 file_size = file_size + len(buf)
1092 CRC = crc32(buf, CRC) & 0xffffffff
1093 if cmpr:
1094 buf = cmpr.compress(buf)
1095 compress_size = compress_size + len(buf)
1096 self.fp.write(buf)
1097 if cmpr:
1098 buf = cmpr.flush()
1099 compress_size = compress_size + len(buf)
1100 self.fp.write(buf)
1101 zinfo.compress_size = compress_size
1102 else:
1103 zinfo.compress_size = file_size
1104 zinfo.CRC = CRC
1105 zinfo.file_size = file_size
1106 # Seek backwards and write CRC and file sizes
1107 position = self.fp.tell() # Preserve current position in file
1108 self.fp.seek(zinfo.header_offset + 14, 0)
1109 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
1110 zinfo.file_size))
1111 self.fp.seek(position, 0)
1112 self.filelist.append(zinfo)
1113 self.NameToInfo[zinfo.filename] = zinfo
1114
1115 def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
1116 """Write a file into the archive. The contents is the string
1117 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1118 the name of the file in the archive."""
1119 if not isinstance(zinfo_or_arcname, ZipInfo):
1120 zinfo = ZipInfo(filename=zinfo_or_arcname,
1121 date_time=time.localtime(time.time())[:6])
1122
1123 zinfo.compress_type = self.compression
1124 zinfo.external_attr = 0600 << 16
1125 else:
1126 zinfo = zinfo_or_arcname
1127
1128 if not self.fp:
1129 raise RuntimeError(
1130 "Attempt to write to ZIP archive that was already closed")
1131
1132 if compress_type is not None:
1133 zinfo.compress_type = compress_type
1134
1135 zinfo.file_size = len(bytes) # Uncompressed size
1136 zinfo.header_offset = self.fp.tell() # Start of header bytes
1137 self._writecheck(zinfo)
1138 self._didModify = True
1139 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
1140 if zinfo.compress_type == ZIP_DEFLATED:
1141 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1142 zlib.DEFLATED, -15)
1143 bytes = co.compress(bytes) + co.flush()
1144 zinfo.compress_size = len(bytes) # Compressed size
1145 elif zinfo.compress_type == ZIP_BZIP2:
1146 co = bz2.BZ2Compressor()
1147 bytes = co.compress(bytes) + co.flush()
1148 zinfo.compress_size = len(bytes) # Compressed size
1149 else:
1150 zinfo.compress_size = zinfo.file_size
1151 zinfo.header_offset = self.fp.tell() # Start of header bytes
1152 self.fp.write(zinfo.FileHeader())
1153 self.fp.write(bytes)
1154 self.fp.flush()
1155 if zinfo.flag_bits & 0x08:
1156 # Write CRC and file sizes after the file data
1157 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
1158 zinfo.file_size))
1159 self.filelist.append(zinfo)
1160 self.NameToInfo[zinfo.filename] = zinfo
1161
1162 def __del__(self):
1163 """Call the "close()" method in case the user forgot."""
1164 self.close()
1165
1166 def close(self):
1167 """Close the file, and for mode "w" and "a" write the ending
1168 records."""
1169 if self.fp is None:
1170 return
1171
1172 if self.mode in ("w", "a") and self._didModify: # write ending records
1173 count = 0
1174 pos1 = self.fp.tell()
1175 for zinfo in self.filelist: # write central directory
1176 count = count + 1
1177 dt = zinfo.date_time
1178 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1179 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1180 extra = []
1181 if zinfo.file_size > ZIP64_LIMIT \
1182 or zinfo.compress_size > ZIP64_LIMIT:
1183 extra.append(zinfo.file_size)
1184 extra.append(zinfo.compress_size)
1185 file_size = 0xffffffff
1186 compress_size = 0xffffffff
1187 else:
1188 file_size = zinfo.file_size
1189 compress_size = zinfo.compress_size
1190
1191 if zinfo.header_offset > ZIP64_LIMIT:
1192 extra.append(zinfo.header_offset)
1193 header_offset = 0xffffffffL
1194 else:
1195 header_offset = zinfo.header_offset
1196
1197 extra_data = zinfo.extra
1198 if extra:
1199 # Append a ZIP64 field to the extra's
1200 extra_data = struct.pack(
1201 '<HH' + 'Q'*len(extra),
1202 1, 8*len(extra), *extra) + extra_data
1203
1204 extract_version = max(45, zinfo.extract_version)
1205 create_version = max(45, zinfo.create_version)
1206 else:
1207 extract_version = zinfo.extract_version
1208 create_version = zinfo.create_version
1209
1210 try:
1211 filename, flag_bits = zinfo._encodeFilenameFlags()
1212 centdir = struct.pack(structCentralDir,
1213 stringCentralDir, create_version,
1214 zinfo.create_system, extract_version, zinfo.reserved,
1215 flag_bits, zinfo.compress_type, dostime, dosdate,
1216 zinfo.CRC, compress_size, file_size,
1217 len(filename), len(extra_data), len(zinfo.comment),
1218 0, zinfo.internal_attr, zinfo.external_attr,
1219 header_offset)
1220 except DeprecationWarning:
1221 print >>sys.stderr, (structCentralDir,
1222 stringCentralDir, create_version,
1223 zinfo.create_system, extract_version, zinfo.reserved,
1224 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1225 zinfo.CRC, compress_size, file_size,
1226 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1227 0, zinfo.internal_attr, zinfo.external_attr,
1228 header_offset)
1229 raise
1230 self.fp.write(centdir)
1231 self.fp.write(filename)
1232 self.fp.write(extra_data)
1233 self.fp.write(zinfo.comment)
1234
1235 pos2 = self.fp.tell()
1236 # Write end-of-zip-archive record
1237 centDirCount = count
1238 centDirSize = pos2 - pos1
1239 centDirOffset = pos1
1240 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1241 centDirOffset > ZIP64_LIMIT or
1242 centDirSize > ZIP64_LIMIT):
1243 # Need to write the ZIP64 end-of-archive records
1244 zip64endrec = struct.pack(
1245 structEndArchive64, stringEndArchive64,
1246 44, 45, 45, 0, 0, centDirCount, centDirCount,
1247 centDirSize, centDirOffset)
1248 self.fp.write(zip64endrec)
1249
1250 zip64locrec = struct.pack(
1251 structEndArchive64Locator,
1252 stringEndArchive64Locator, 0, pos2, 1)
1253 self.fp.write(zip64locrec)
1254 centDirCount = min(centDirCount, 0xFFFF)
1255 centDirSize = min(centDirSize, 0xFFFFFFFF)
1256 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1257
1258 # check for valid comment length
1259 if len(self.comment) >= ZIP_MAX_COMMENT:
1260 if self.debug > 0:
1261 msg = 'Archive comment is too long; truncating to %d bytes' \
1262 % ZIP_MAX_COMMENT
1263 self.comment = self.comment[:ZIP_MAX_COMMENT]
1264
1265 endrec = struct.pack(structEndArchive, stringEndArchive,
1266 0, 0, centDirCount, centDirCount,
1267 centDirSize, centDirOffset, len(self.comment))
1268 self.fp.write(endrec)
1269 self.fp.write(self.comment)
1270 self.fp.flush()
1271
1272 if not self._filePassed:
1273 self.fp.close()
1274 self.fp = None
1275
1276
1277 class PyZipFile(ZipFile):
1278 """Class to create ZIP archives with Python library files and packages."""
1279
1280 def writepy(self, pathname, basename = ""):
1281 """Add all files from "pathname" to the ZIP archive.
1282
1283 If pathname is a package directory, search the directory and
1284 all package subdirectories recursively for all *.py and enter
1285 the modules into the archive. If pathname is a plain
1286 directory, listdir *.py and enter all modules. Else, pathname
1287 must be a Python *.py file and the module will be put into the
1288 archive. Added modules are always module.pyo or module.pyc.
1289 This method will compile the module.py into module.pyc if
1290 necessary.
1291 """
1292 dir, name = os.path.split(pathname)
1293 if os.path.isdir(pathname):
1294 initname = os.path.join(pathname, "__init__.py")
1295 if os.path.isfile(initname):
1296 # This is a package directory, add it
1297 if basename:
1298 basename = "%s/%s" % (basename, name)
1299 else:
1300 basename = name
1301 if self.debug:
1302 print "Adding package in", pathname, "as", basename
1303 fname, arcname = self._get_codename(initname[0:-3], basename)
1304 if self.debug:
1305 print "Adding", arcname
1306 self.write(fname, arcname)
1307 dirlist = os.listdir(pathname)
1308 dirlist.remove("__init__.py")
1309 # Add all *.py files and package subdirectories
1310 for filename in dirlist:
1311 path = os.path.join(pathname, filename)
1312 root, ext = os.path.splitext(filename)
1313 if os.path.isdir(path):
1314 if os.path.isfile(os.path.join(path, "__init__.py")):
1315 # This is a package directory, add it
1316 self.writepy(path, basename) # Recursive call
1317 elif ext == ".py":
1318 fname, arcname = self._get_codename(path[0:-3],
1319 basename)
1320 if self.debug:
1321 print "Adding", arcname
1322 self.write(fname, arcname)
1323 else:
1324 # This is NOT a package directory, add its files at top level
1325 if self.debug:
1326 print "Adding files from directory", pathname
1327 for filename in os.listdir(pathname):
1328 path = os.path.join(pathname, filename)
1329 root, ext = os.path.splitext(filename)
1330 if ext == ".py":
1331 fname, arcname = self._get_codename(path[0:-3],
1332 basename)
1333 if self.debug:
1334 print "Adding", arcname
1335 self.write(fname, arcname)
1336 else:
1337 if pathname[-3:] != ".py":
1338 raise RuntimeError, \
1339 'Files added with writepy() must end with ".py"'
1340 fname, arcname = self._get_codename(pathname[0:-3], basename)
1341 if self.debug:
1342 print "Adding file", arcname
1343 self.write(fname, arcname)
1344
1345 def _get_codename(self, pathname, basename):
1346 """Return (filename, archivename) for the path.
1347
1348 Given a module name path, return the correct file path and
1349 archive name, compiling if necessary. For example, given
1350 /python/lib/string, return (/python/lib/string.pyc, string).
1351 """
1352 file_py = pathname + ".py"
1353 file_pyc = pathname + ".pyc"
1354 file_pyo = pathname + ".pyo"
1355 if os.path.isfile(file_pyo) and \
1356 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
1357 fname = file_pyo # Use .pyo file
1358 elif not os.path.isfile(file_pyc) or \
1359 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
1360 import py_compile
1361 if self.debug:
1362 print "Compiling", file_py
1363 try:
1364 py_compile.compile(file_py, file_pyc, None, True)
1365 except py_compile.PyCompileError,err:
1366 print err.msg
1367 fname = file_pyc
1368 else:
1369 fname = file_pyc
1370 archivename = os.path.split(fname)[1]
1371 if basename:
1372 archivename = "%s/%s" % (basename, archivename)
1373 return (fname, archivename)
1374
1375
1376 def main(args = None):
1377 import textwrap
1378 USAGE=textwrap.dedent("""\
1379 Usage:
1380 zipfile.py -l zipfile.zip # Show listing of a zipfile
1381 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1382 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1383 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1384 """)
1385 if args is None:
1386 args = sys.argv[1:]
1387
1388 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1389 print USAGE
1390 sys.exit(1)
1391
1392 if args[0] == '-l':
1393 if len(args) != 2:
1394 print USAGE
1395 sys.exit(1)
1396 zf = ZipFile(args[1], 'r')
1397 zf.printdir()
1398 zf.close()
1399
1400 elif args[0] == '-t':
1401 if len(args) != 2:
1402 print USAGE
1403 sys.exit(1)
1404 zf = ZipFile(args[1], 'r')
1405 badfile = zf.testzip()
1406 if badfile:
1407 print("The following enclosed file is corrupted: {!r}".format(badfile))
1408 print "Done testing"
1409
1410 elif args[0] == '-e':
1411 if len(args) != 3:
1412 print USAGE
1413 sys.exit(1)
1414
1415 zf = ZipFile(args[1], 'r')
1416 out = args[2]
1417 for path in zf.namelist():
1418 if path.startswith('./'):
1419 tgt = os.path.join(out, path[2:])
1420 else:
1421 tgt = os.path.join(out, path)
1422
1423 tgtdir = os.path.dirname(tgt)
1424 if not os.path.exists(tgtdir):
1425 os.makedirs(tgtdir)
1426 with open(tgt, 'wb') as fp:
1427 fp.write(zf.read(path))
1428 zf.close()
1429
1430 elif args[0] == '-c':
1431 if len(args) < 3:
1432 print USAGE
1433 sys.exit(1)
1434
1435 def addToZip(zf, path, zippath):
1436 if os.path.isfile(path):
1437 zf.write(path, zippath, ZIP_DEFLATED)
1438 elif os.path.isdir(path):
1439 for nm in os.listdir(path):
1440 addToZip(zf,
1441 os.path.join(path, nm), os.path.join(zippath, nm))
1442 # else: ignore
1443
1444 zf = ZipFile(args[1], 'w', allowZip64=True)
1445 for src in args[2:]:
1446 addToZip(zf, src, os.path.basename(src))
1447
1448 zf.close()
1449
1450 if __name__ == "__main__":
1451 main()