comparison 2.00/zipfile2.py @ 21:ec6f1a132109

A pretty usable version Test groups and testconfs in non-ZIP archives or ZIP archives with comments are not yet supported.
author Oleg Oshmyan <chortos@inbox.lv>
date Fri, 06 Aug 2010 15:39:29 +0000
parents
children
comparison
equal deleted inserted replaced
20:5bfa23cd638d 21:ec6f1a132109
1 """
2 Read and write ZIP files.
3 """
4 # Improved by Chortos-2 in 2010 (added bzip2 support)
5 import struct, os, time, sys, shutil
6 import binascii, cStringIO, stat
7 import io
8 import re
9
10 try:
11 import zlib # We may need its compression method
12 crc32 = zlib.crc32
13 except ImportError:
14 zlib = None
15 crc32 = binascii.crc32
16
17 try:
18 import bz2 # We may need its compression method
19 except ImportError:
20 bz2 = None
21
22 __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
23 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile", "ZIP_BZIP2" ]
24
25 class BadZipfile(Exception):
26 pass
27
28
29 class LargeZipFile(Exception):
30 """
31 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
32 and those extensions are disabled.
33 """
34
35 error = BadZipfile # The exception raised by this module
36
37 ZIP64_LIMIT = (1 << 31) - 1
38 ZIP_FILECOUNT_LIMIT = 1 << 16
39 ZIP_MAX_COMMENT = (1 << 16) - 1
40
41 # constants for Zip file compression methods
42 ZIP_STORED = 0
43 ZIP_DEFLATED = 8
44 ZIP_BZIP2 = 12
45 # Other ZIP compression methods not supported
46
47 # Below are some formats and associated data for reading/writing headers using
48 # the struct module. The names and structures of headers/records are those used
49 # in the PKWARE description of the ZIP file format:
50 # http://www.pkware.com/documents/casestudies/APPNOTE.TXT
51 # (URL valid as of January 2008)
52
53 # The "end of central directory" structure, magic number, size, and indices
54 # (section V.I in the format document)
55 structEndArchive = "<4s4H2LH"
56 stringEndArchive = "PK\005\006"
57 sizeEndCentDir = struct.calcsize(structEndArchive)
58
59 _ECD_SIGNATURE = 0
60 _ECD_DISK_NUMBER = 1
61 _ECD_DISK_START = 2
62 _ECD_ENTRIES_THIS_DISK = 3
63 _ECD_ENTRIES_TOTAL = 4
64 _ECD_SIZE = 5
65 _ECD_OFFSET = 6
66 _ECD_COMMENT_SIZE = 7
67 # These last two indices are not part of the structure as defined in the
68 # spec, but they are used internally by this module as a convenience
69 _ECD_COMMENT = 8
70 _ECD_LOCATION = 9
71
72 # The "central directory" structure, magic number, size, and indices
73 # of entries in the structure (section V.F in the format document)
74 structCentralDir = "<4s4B4HL2L5H2L"
75 stringCentralDir = "PK\001\002"
76 sizeCentralDir = struct.calcsize(structCentralDir)
77
78 # indexes of entries in the central directory structure
79 _CD_SIGNATURE = 0
80 _CD_CREATE_VERSION = 1
81 _CD_CREATE_SYSTEM = 2
82 _CD_EXTRACT_VERSION = 3
83 _CD_EXTRACT_SYSTEM = 4
84 _CD_FLAG_BITS = 5
85 _CD_COMPRESS_TYPE = 6
86 _CD_TIME = 7
87 _CD_DATE = 8
88 _CD_CRC = 9
89 _CD_COMPRESSED_SIZE = 10
90 _CD_UNCOMPRESSED_SIZE = 11
91 _CD_FILENAME_LENGTH = 12
92 _CD_EXTRA_FIELD_LENGTH = 13
93 _CD_COMMENT_LENGTH = 14
94 _CD_DISK_NUMBER_START = 15
95 _CD_INTERNAL_FILE_ATTRIBUTES = 16
96 _CD_EXTERNAL_FILE_ATTRIBUTES = 17
97 _CD_LOCAL_HEADER_OFFSET = 18
98
99 # The "local file header" structure, magic number, size, and indices
100 # (section V.A in the format document)
101 structFileHeader = "<4s2B4HL2L2H"
102 stringFileHeader = "PK\003\004"
103 sizeFileHeader = struct.calcsize(structFileHeader)
104
105 _FH_SIGNATURE = 0
106 _FH_EXTRACT_VERSION = 1
107 _FH_EXTRACT_SYSTEM = 2
108 _FH_GENERAL_PURPOSE_FLAG_BITS = 3
109 _FH_COMPRESSION_METHOD = 4
110 _FH_LAST_MOD_TIME = 5
111 _FH_LAST_MOD_DATE = 6
112 _FH_CRC = 7
113 _FH_COMPRESSED_SIZE = 8
114 _FH_UNCOMPRESSED_SIZE = 9
115 _FH_FILENAME_LENGTH = 10
116 _FH_EXTRA_FIELD_LENGTH = 11
117
118 # The "Zip64 end of central directory locator" structure, magic number, and size
119 structEndArchive64Locator = "<4sLQL"
120 stringEndArchive64Locator = "PK\x06\x07"
121 sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
122
123 # The "Zip64 end of central directory" record, magic number, size, and indices
124 # (section V.G in the format document)
125 structEndArchive64 = "<4sQ2H2L4Q"
126 stringEndArchive64 = "PK\x06\x06"
127 sizeEndCentDir64 = struct.calcsize(structEndArchive64)
128
129 _CD64_SIGNATURE = 0
130 _CD64_DIRECTORY_RECSIZE = 1
131 _CD64_CREATE_VERSION = 2
132 _CD64_EXTRACT_VERSION = 3
133 _CD64_DISK_NUMBER = 4
134 _CD64_DISK_NUMBER_START = 5
135 _CD64_NUMBER_ENTRIES_THIS_DISK = 6
136 _CD64_NUMBER_ENTRIES_TOTAL = 7
137 _CD64_DIRECTORY_SIZE = 8
138 _CD64_OFFSET_START_CENTDIR = 9
139
140 def _check_zipfile(fp):
141 try:
142 if _EndRecData(fp):
143 return True # file has correct magic number
144 except IOError:
145 pass
146 return False
147
148 def is_zipfile(filename):
149 """Quickly see if a file is a ZIP file by checking the magic number.
150
151 The filename argument may be a file or file-like object too.
152 """
153 result = False
154 try:
155 if hasattr(filename, "read"):
156 result = _check_zipfile(fp=filename)
157 else:
158 with open(filename, "rb") as fp:
159 result = _check_zipfile(fp)
160 except IOError:
161 pass
162 return result
163
164 def _EndRecData64(fpin, offset, endrec):
165 """
166 Read the ZIP64 end-of-archive records and use that to update endrec
167 """
168 fpin.seek(offset - sizeEndCentDir64Locator, 2)
169 data = fpin.read(sizeEndCentDir64Locator)
170 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
171 if sig != stringEndArchive64Locator:
172 return endrec
173
174 if diskno != 0 or disks != 1:
175 raise BadZipfile("zipfiles that span multiple disks are not supported")
176
177 # Assume no 'zip64 extensible data'
178 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
179 data = fpin.read(sizeEndCentDir64)
180 sig, sz, create_version, read_version, disk_num, disk_dir, \
181 dircount, dircount2, dirsize, diroffset = \
182 struct.unpack(structEndArchive64, data)
183 if sig != stringEndArchive64:
184 return endrec
185
186 # Update the original endrec using data from the ZIP64 record
187 endrec[_ECD_SIGNATURE] = sig
188 endrec[_ECD_DISK_NUMBER] = disk_num
189 endrec[_ECD_DISK_START] = disk_dir
190 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
191 endrec[_ECD_ENTRIES_TOTAL] = dircount2
192 endrec[_ECD_SIZE] = dirsize
193 endrec[_ECD_OFFSET] = diroffset
194 return endrec
195
196
197 def _EndRecData(fpin):
198 """Return data from the "End of Central Directory" record, or None.
199
200 The data is a list of the nine items in the ZIP "End of central dir"
201 record followed by a tenth item, the file seek offset of this record."""
202
203 # Determine file size
204 fpin.seek(0, 2)
205 filesize = fpin.tell()
206
207 # Check to see if this is ZIP file with no archive comment (the
208 # "end of central directory" structure should be the last item in the
209 # file if this is the case).
210 try:
211 fpin.seek(-sizeEndCentDir, 2)
212 except IOError:
213 return None
214 data = fpin.read()
215 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
216 # the signature is correct and there's no comment, unpack structure
217 endrec = struct.unpack(structEndArchive, data)
218 endrec=list(endrec)
219
220 # Append a blank comment and record start offset
221 endrec.append("")
222 endrec.append(filesize - sizeEndCentDir)
223
224 # Try to read the "Zip64 end of central directory" structure
225 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
226
227 # Either this is not a ZIP file, or it is a ZIP file with an archive
228 # comment. Search the end of the file for the "end of central directory"
229 # record signature. The comment is the last item in the ZIP file and may be
230 # up to 64K long. It is assumed that the "end of central directory" magic
231 # number does not appear in the comment.
232 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
233 fpin.seek(maxCommentStart, 0)
234 data = fpin.read()
235 start = data.rfind(stringEndArchive)
236 if start >= 0:
237 # found the magic number; attempt to unpack and interpret
238 recData = data[start:start+sizeEndCentDir]
239 endrec = list(struct.unpack(structEndArchive, recData))
240 comment = data[start+sizeEndCentDir:]
241 # check that comment length is correct
242 if endrec[_ECD_COMMENT_SIZE] == len(comment):
243 # Append the archive comment and start offset
244 endrec.append(comment)
245 endrec.append(maxCommentStart + start)
246
247 # Try to read the "Zip64 end of central directory" structure
248 return _EndRecData64(fpin, maxCommentStart + start - filesize,
249 endrec)
250
251 # Unable to find a valid end of central directory structure
252 return
253
254
255 class ZipInfo (object):
256 """Class with attributes describing each file in the ZIP archive."""
257
258 __slots__ = (
259 'orig_filename',
260 'filename',
261 'date_time',
262 'compress_type',
263 'comment',
264 'extra',
265 'create_system',
266 'create_version',
267 'extract_version',
268 'reserved',
269 'flag_bits',
270 'volume',
271 'internal_attr',
272 'external_attr',
273 'header_offset',
274 'CRC',
275 'compress_size',
276 'file_size',
277 '_raw_time',
278 )
279
280 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
281 self.orig_filename = filename # Original file name in archive
282
283 # Terminate the file name at the first null byte. Null bytes in file
284 # names are used as tricks by viruses in archives.
285 null_byte = filename.find(chr(0))
286 if null_byte >= 0:
287 filename = filename[0:null_byte]
288 # This is used to ensure paths in generated ZIP files always use
289 # forward slashes as the directory separator, as required by the
290 # ZIP format specification.
291 if os.sep != "/" and os.sep in filename:
292 filename = filename.replace(os.sep, "/")
293
294 self.filename = filename # Normalized file name
295 self.date_time = date_time # year, month, day, hour, min, sec
296 # Standard values:
297 self.compress_type = ZIP_STORED # Type of compression for the file
298 self.comment = "" # Comment for each file
299 self.extra = "" # ZIP extra data
300 if sys.platform == 'win32':
301 self.create_system = 0 # System which created ZIP archive
302 else:
303 # Assume everything else is unix-y
304 self.create_system = 3 # System which created ZIP archive
305 self.create_version = 20 # Version which created ZIP archive
306 self.extract_version = 20 # Version needed to extract archive
307 self.reserved = 0 # Must be zero
308 self.flag_bits = 0 # ZIP flag bits
309 self.volume = 0 # Volume number of file header
310 self.internal_attr = 0 # Internal attributes
311 self.external_attr = 0 # External file attributes
312 # Other attributes are set by class ZipFile:
313 # header_offset Byte offset to the file header
314 # CRC CRC-32 of the uncompressed file
315 # compress_size Size of the compressed file
316 # file_size Size of the uncompressed file
317
318 def FileHeader(self):
319 """Return the per-file header as a string."""
320 dt = self.date_time
321 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
322 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
323 if self.flag_bits & 0x08:
324 # Set these to zero because we write them after the file data
325 CRC = compress_size = file_size = 0
326 else:
327 CRC = self.CRC
328 compress_size = self.compress_size
329 file_size = self.file_size
330
331 extra = self.extra
332
333 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
334 # File is larger than what fits into a 4 byte integer,
335 # fall back to the ZIP64 extension
336 fmt = '<HHQQ'
337 extra = extra + struct.pack(fmt,
338 1, struct.calcsize(fmt)-4, file_size, compress_size)
339 file_size = 0xffffffff
340 compress_size = 0xffffffff
341 self.extract_version = max(45, self.extract_version)
342 self.create_version = max(45, self.extract_version)
343
344 filename, flag_bits = self._encodeFilenameFlags()
345 header = struct.pack(structFileHeader, stringFileHeader,
346 self.extract_version, self.reserved, flag_bits,
347 self.compress_type, dostime, dosdate, CRC,
348 compress_size, file_size,
349 len(filename), len(extra))
350 return header + filename + extra
351
352 def _encodeFilenameFlags(self):
353 if isinstance(self.filename, unicode):
354 try:
355 return self.filename.encode('ascii'), self.flag_bits
356 except UnicodeEncodeError:
357 return self.filename.encode('utf-8'), self.flag_bits | 0x800
358 else:
359 return self.filename, self.flag_bits
360
361 def _decodeFilename(self):
362 if self.flag_bits & 0x800:
363 return self.filename.decode('utf-8')
364 else:
365 return self.filename
366
367 def _decodeExtra(self):
368 # Try to decode the extra field.
369 extra = self.extra
370 unpack = struct.unpack
371 while extra:
372 tp, ln = unpack('<HH', extra[:4])
373 if tp == 1:
374 if ln >= 24:
375 counts = unpack('<QQQ', extra[4:28])
376 elif ln == 16:
377 counts = unpack('<QQ', extra[4:20])
378 elif ln == 8:
379 counts = unpack('<Q', extra[4:12])
380 elif ln == 0:
381 counts = ()
382 else:
383 raise RuntimeError, "Corrupt extra field %s"%(ln,)
384
385 idx = 0
386
387 # ZIP64 extension (large files and/or large archives)
388 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
389 self.file_size = counts[idx]
390 idx += 1
391
392 if self.compress_size == 0xFFFFFFFFL:
393 self.compress_size = counts[idx]
394 idx += 1
395
396 if self.header_offset == 0xffffffffL:
397 old = self.header_offset
398 self.header_offset = counts[idx]
399 idx+=1
400
401 extra = extra[ln+4:]
402
403
404 class _ZipDecrypter:
405 """Class to handle decryption of files stored within a ZIP archive.
406
407 ZIP supports a password-based form of encryption. Even though known
408 plaintext attacks have been found against it, it is still useful
409 to be able to get data out of such a file.
410
411 Usage:
412 zd = _ZipDecrypter(mypwd)
413 plain_char = zd(cypher_char)
414 plain_text = map(zd, cypher_text)
415 """
416
417 def _GenerateCRCTable():
418 """Generate a CRC-32 table.
419
420 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
421 internal keys. We noticed that a direct implementation is faster than
422 relying on binascii.crc32().
423 """
424 poly = 0xedb88320
425 table = [0] * 256
426 for i in range(256):
427 crc = i
428 for j in range(8):
429 if crc & 1:
430 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
431 else:
432 crc = ((crc >> 1) & 0x7FFFFFFF)
433 table[i] = crc
434 return table
435 crctable = _GenerateCRCTable()
436
437 def _crc32(self, ch, crc):
438 """Compute the CRC32 primitive on one byte."""
439 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
440
441 def __init__(self, pwd):
442 self.key0 = 305419896
443 self.key1 = 591751049
444 self.key2 = 878082192
445 for p in pwd:
446 self._UpdateKeys(p)
447
448 def _UpdateKeys(self, c):
449 self.key0 = self._crc32(c, self.key0)
450 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
451 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
452 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
453
454 def __call__(self, c):
455 """Decrypt a single character."""
456 c = ord(c)
457 k = self.key2 | 2
458 c = c ^ (((k * (k^1)) >> 8) & 255)
459 c = chr(c)
460 self._UpdateKeys(c)
461 return c
462
463 class ZipExtFile(io.BufferedIOBase):
464 """File-like object for reading an archive member.
465 Is returned by ZipFile.open().
466 """
467
468 # Max size supported by decompressor.
469 MAX_N = 1 << 31 - 1
470
471 # Read from compressed files in 4k blocks.
472 MIN_READ_SIZE = 4096
473
474 # Search for universal newlines or line chunks.
475 PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
476
477 def __init__(self, fileobj, mode, zipinfo, decrypter=None):
478 self._fileobj = fileobj
479 self._decrypter = decrypter
480
481 self._compress_type = zipinfo.compress_type
482 self._compress_size = zipinfo.compress_size
483 self._compress_left = zipinfo.compress_size
484
485 if self._compress_type == ZIP_DEFLATED:
486 self._decompressor = zlib.decompressobj(-15)
487 elif self._compress_type == ZIP_BZIP2:
488 self._decompressor = bz2.BZ2Decompressor()
489 self.MIN_READ_SIZE = 900000
490 self._unconsumed = ''
491
492 self._readbuffer = ''
493 self._offset = 0
494
495 self._universal = 'U' in mode
496 self.newlines = None
497
498 # Adjust read size for encrypted files since the first 12 bytes
499 # are for the encryption/password information.
500 if self._decrypter is not None:
501 self._compress_left -= 12
502
503 self.mode = mode
504 self.name = zipinfo.filename
505
506 def readline(self, limit=-1):
507 """Read and return a line from the stream.
508
509 If limit is specified, at most limit bytes will be read.
510 """
511
512 if not self._universal and limit < 0:
513 # Shortcut common case - newline found in buffer.
514 i = self._readbuffer.find('\n', self._offset) + 1
515 if i > 0:
516 line = self._readbuffer[self._offset: i]
517 self._offset = i
518 return line
519
520 if not self._universal:
521 return io.BufferedIOBase.readline(self, limit)
522
523 line = ''
524 while limit < 0 or len(line) < limit:
525 readahead = self.peek(2)
526 if readahead == '':
527 return line
528
529 #
530 # Search for universal newlines or line chunks.
531 #
532 # The pattern returns either a line chunk or a newline, but not
533 # both. Combined with peek(2), we are assured that the sequence
534 # '\r\n' is always retrieved completely and never split into
535 # separate newlines - '\r', '\n' due to coincidental readaheads.
536 #
537 match = self.PATTERN.search(readahead)
538 newline = match.group('newline')
539 if newline is not None:
540 if self.newlines is None:
541 self.newlines = []
542 if newline not in self.newlines:
543 self.newlines.append(newline)
544 self._offset += len(newline)
545 return line + '\n'
546
547 chunk = match.group('chunk')
548 if limit >= 0:
549 chunk = chunk[: limit - len(line)]
550
551 self._offset += len(chunk)
552 line += chunk
553
554 return line
555
556 def peek(self, n=1):
557 """Returns buffered bytes without advancing the position."""
558 if n > len(self._readbuffer) - self._offset:
559 chunk = self.read(n)
560 self._offset -= len(chunk)
561
562 # Return up to 512 bytes to reduce allocation overhead for tight loops.
563 return self._readbuffer[self._offset: self._offset + 512]
564
565 def readable(self):
566 return True
567
568 def read(self, n=-1):
569 """Read and return up to n bytes.
570 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
571 """
572
573 buf = ''
574 while n < 0 or n is None or n > len(buf):
575 data = self.read1(n)
576 if len(data) == 0:
577 return buf
578
579 buf += data
580
581 return buf
582
583 def read1(self, n):
584 """Read up to n bytes with at most one read() system call."""
585
586 # Simplify algorithm (branching) by transforming negative n to large n.
587 if n < 0 or n is None:
588 n = self.MAX_N
589
590 # Bytes available in read buffer.
591 len_readbuffer = len(self._readbuffer) - self._offset
592
593 # Read from file.
594 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
595 nbytes = n - len_readbuffer - len(self._unconsumed)
596 nbytes = max(nbytes, self.MIN_READ_SIZE)
597 nbytes = min(nbytes, self._compress_left)
598
599 data = self._fileobj.read(nbytes)
600 self._compress_left -= len(data)
601
602 if data and self._decrypter is not None:
603 data = ''.join(map(self._decrypter, data))
604
605 if self._compress_type == ZIP_STORED:
606 self._readbuffer = self._readbuffer[self._offset:] + data
607 self._offset = 0
608 else:
609 # Prepare deflated bytes for decompression.
610 self._unconsumed += data
611
612 # Handle unconsumed data.
613 if (len(self._unconsumed) > 0 and n > len_readbuffer and
614 self._compress_type == ZIP_DEFLATED):
615 data = self._decompressor.decompress(
616 self._unconsumed,
617 max(n - len_readbuffer, self.MIN_READ_SIZE)
618 )
619
620 self._unconsumed = self._decompressor.unconsumed_tail
621 if len(self._unconsumed) == 0 and self._compress_left == 0:
622 data += self._decompressor.flush()
623
624 self._readbuffer = self._readbuffer[self._offset:] + data
625 self._offset = 0
626 elif (len(self._unconsumed) > 0 and n > len_readbuffer and
627 self._compress_type == ZIP_BZIP2):
628 data = self._decompressor.decompress(self._unconsumed)
629
630 self._unconsumed = ''
631 self._readbuffer = self._readbuffer[self._offset:] + data
632 self._offset = 0
633
634 # Read from buffer.
635 data = self._readbuffer[self._offset: self._offset + n]
636 self._offset += len(data)
637 return data
638
639
640
641 class ZipFile:
642 """ Class with methods to open, read, write, close, list zip files.
643
644 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
645
646 file: Either the path to the file, or a file-like object.
647 If it is a path, the file will be opened and closed by ZipFile.
648 mode: The mode can be either read "r", write "w" or append "a".
649 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
650 or ZIP_BZIP2 (requires bz2).
651 allowZip64: if True ZipFile will create files with ZIP64 extensions when
652 needed, otherwise it will raise an exception when this would
653 be necessary.
654
655 """
656
657 fp = None # Set here since __del__ checks it
658
659 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
660 """Open the ZIP file with mode read "r", write "w" or append "a"."""
661 if mode not in ("r", "w", "a"):
662 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
663
664 if compression == ZIP_STORED:
665 pass
666 elif compression == ZIP_DEFLATED:
667 if not zlib:
668 raise RuntimeError,\
669 "Compression requires the (missing) zlib module"
670 elif compression == ZIP_BZIP2:
671 if not bz2:
672 raise RuntimeError,\
673 "Compression requires the (missing) bz2 module"
674 else:
675 raise RuntimeError, "That compression method is not supported"
676
677 self._allowZip64 = allowZip64
678 self._didModify = False
679 self.debug = 0 # Level of printing: 0 through 3
680 self.NameToInfo = {} # Find file info given name
681 self.filelist = [] # List of ZipInfo instances for archive
682 self.compression = compression # Method of compression
683 self.mode = key = mode.replace('b', '')[0]
684 self.pwd = None
685 self.comment = ''
686
687 # Check if we were passed a file-like object
688 if isinstance(file, basestring):
689 self._filePassed = 0
690 self.filename = file
691 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
692 try:
693 self.fp = open(file, modeDict[mode])
694 except IOError:
695 if mode == 'a':
696 mode = key = 'w'
697 self.fp = open(file, modeDict[mode])
698 else:
699 raise
700 else:
701 self._filePassed = 1
702 self.fp = file
703 self.filename = getattr(file, 'name', None)
704
705 if key == 'r':
706 self._GetContents()
707 elif key == 'w':
708 pass
709 elif key == 'a':
710 try: # See if file is a zip file
711 self._RealGetContents()
712 # seek to start of directory and overwrite
713 self.fp.seek(self.start_dir, 0)
714 except BadZipfile: # file is not a zip file, just append
715 self.fp.seek(0, 2)
716 else:
717 if not self._filePassed:
718 self.fp.close()
719 self.fp = None
720 raise RuntimeError, 'Mode must be "r", "w" or "a"'
721
722 def __enter__(self):
723 return self
724
725 def __exit__(self, type, value, traceback):
726 self.close()
727
728 def _GetContents(self):
729 """Read the directory, making sure we close the file if the format
730 is bad."""
731 try:
732 self._RealGetContents()
733 except BadZipfile:
734 if not self._filePassed:
735 self.fp.close()
736 self.fp = None
737 raise
738
739 def _RealGetContents(self):
740 """Read in the table of contents for the ZIP file."""
741 fp = self.fp
742 endrec = _EndRecData(fp)
743 if not endrec:
744 raise BadZipfile, "File is not a zip file"
745 if self.debug > 1:
746 print endrec
747 size_cd = endrec[_ECD_SIZE] # bytes in central directory
748 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
749 self.comment = endrec[_ECD_COMMENT] # archive comment
750
751 # "concat" is zero, unless zip was concatenated to another file
752 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
753 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
754 # If Zip64 extension structures are present, account for them
755 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
756
757 if self.debug > 2:
758 inferred = concat + offset_cd
759 print "given, inferred, offset", offset_cd, inferred, concat
760 # self.start_dir: Position of start of central directory
761 self.start_dir = offset_cd + concat
762 fp.seek(self.start_dir, 0)
763 data = fp.read(size_cd)
764 fp = cStringIO.StringIO(data)
765 total = 0
766 while total < size_cd:
767 centdir = fp.read(sizeCentralDir)
768 if centdir[0:4] != stringCentralDir:
769 raise BadZipfile, "Bad magic number for central directory"
770 centdir = struct.unpack(structCentralDir, centdir)
771 if self.debug > 2:
772 print centdir
773 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
774 # Create ZipInfo instance to store file information
775 x = ZipInfo(filename)
776 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
777 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
778 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
779 (x.create_version, x.create_system, x.extract_version, x.reserved,
780 x.flag_bits, x.compress_type, t, d,
781 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
782 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
783 # Convert date/time code to (year, month, day, hour, min, sec)
784 x._raw_time = t
785 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
786 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
787
788 x._decodeExtra()
789 x.header_offset = x.header_offset + concat
790 x.filename = x._decodeFilename()
791 self.filelist.append(x)
792 self.NameToInfo[x.filename] = x
793
794 # update total bytes read from central directory
795 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
796 + centdir[_CD_EXTRA_FIELD_LENGTH]
797 + centdir[_CD_COMMENT_LENGTH])
798
799 if self.debug > 2:
800 print "total", total
801
802
803 def namelist(self):
804 """Return a list of file names in the archive."""
805 l = []
806 for data in self.filelist:
807 l.append(data.filename)
808 return l
809
810 def infolist(self):
811 """Return a list of class ZipInfo instances for files in the
812 archive."""
813 return self.filelist
814
815 def printdir(self):
816 """Print a table of contents for the zip file."""
817 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
818 for zinfo in self.filelist:
819 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
820 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
821
822 def testzip(self):
823 """Read all the files and check the CRC."""
824 chunk_size = 2 ** 20
825 for zinfo in self.filelist:
826 try:
827 # Read by chunks, to avoid an OverflowError or a
828 # MemoryError with very large embedded files.
829 f = self.open(zinfo.filename, "r")
830 while f.read(chunk_size): # Check CRC-32
831 pass
832 except BadZipfile:
833 return zinfo.filename
834
835 def getinfo(self, name):
836 """Return the instance of ZipInfo given 'name'."""
837 info = self.NameToInfo.get(name)
838 if info is None:
839 raise KeyError(
840 'There is no item named %r in the archive' % name)
841
842 return info
843
844 def setpassword(self, pwd):
845 """Set default password for encrypted files."""
846 self.pwd = pwd
847
848 def read(self, name, pwd=None):
849 """Return file bytes (as a string) for name."""
850 return self.open(name, "r", pwd).read()
851
852 def open(self, name, mode="r", pwd=None):
853 """Return file-like object for 'name'."""
854 if mode not in ("r", "U", "rU"):
855 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
856 if not self.fp:
857 raise RuntimeError, \
858 "Attempt to read ZIP archive that was already closed"
859
860 # Only open a new file for instances where we were not
861 # given a file object in the constructor
862 if self._filePassed:
863 zef_file = self.fp
864 else:
865 zef_file = open(self.filename, 'rb')
866
867 # Make sure we have an info object
868 if isinstance(name, ZipInfo):
869 # 'name' is already an info object
870 zinfo = name
871 else:
872 # Get info object for name
873 zinfo = self.getinfo(name)
874
875 zef_file.seek(zinfo.header_offset, 0)
876
877 # Skip the file header:
878 fheader = zef_file.read(sizeFileHeader)
879 if fheader[0:4] != stringFileHeader:
880 raise BadZipfile, "Bad magic number for file header"
881
882 fheader = struct.unpack(structFileHeader, fheader)
883 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
884 if fheader[_FH_EXTRA_FIELD_LENGTH]:
885 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
886
887 if fname != zinfo.orig_filename:
888 raise BadZipfile, \
889 'File name in directory "%s" and header "%s" differ.' % (
890 zinfo.orig_filename, fname)
891
892 # check for encrypted flag & handle password
893 is_encrypted = zinfo.flag_bits & 0x1
894 zd = None
895 if is_encrypted:
896 if not pwd:
897 pwd = self.pwd
898 if not pwd:
899 raise RuntimeError, "File %s is encrypted, " \
900 "password required for extraction" % name
901
902 zd = _ZipDecrypter(pwd)
903 # The first 12 bytes in the cypher stream is an encryption header
904 # used to strengthen the algorithm. The first 11 bytes are
905 # completely random, while the 12th contains the MSB of the CRC,
906 # or the MSB of the file time depending on the header type
907 # and is used to check the correctness of the password.
908 bytes = zef_file.read(12)
909 h = map(zd, bytes[0:12])
910 if zinfo.flag_bits & 0x8:
911 # compare against the file type from extended local headers
912 check_byte = (zinfo._raw_time >> 8) & 0xff
913 else:
914 # compare against the CRC otherwise
915 check_byte = (zinfo.CRC >> 24) & 0xff
916 if ord(h[11]) != check_byte:
917 raise RuntimeError("Bad password for file", name)
918
919 return ZipExtFile(zef_file, mode, zinfo, zd)
920
921 def extract(self, member, path=None, pwd=None):
922 """Extract a member from the archive to the current working directory,
923 using its full name. Its file information is extracted as accurately
924 as possible. `member' may be a filename or a ZipInfo object. You can
925 specify a different directory using `path'.
926 """
927 if not isinstance(member, ZipInfo):
928 member = self.getinfo(member)
929
930 if path is None:
931 path = os.getcwd()
932
933 return self._extract_member(member, path, pwd)
934
935 def extractall(self, path=None, members=None, pwd=None):
936 """Extract all members from the archive to the current working
937 directory. `path' specifies a different directory to extract to.
938 `members' is optional and must be a subset of the list returned
939 by namelist().
940 """
941 if members is None:
942 members = self.namelist()
943
944 for zipinfo in members:
945 self.extract(zipinfo, path, pwd)
946
947 def _extract_member(self, member, targetpath, pwd):
948 """Extract the ZipInfo object 'member' to a physical
949 file on the path targetpath.
950 """
951 # build the destination pathname, replacing
952 # forward slashes to platform specific separators.
953 # Strip trailing path separator, unless it represents the root.
954 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
955 and len(os.path.splitdrive(targetpath)[1]) > 1):
956 targetpath = targetpath[:-1]
957
958 # don't include leading "/" from file name if present
959 if member.filename[0] == '/':
960 targetpath = os.path.join(targetpath, member.filename[1:])
961 else:
962 targetpath = os.path.join(targetpath, member.filename)
963
964 targetpath = os.path.normpath(targetpath)
965
966 # Create all upper directories if necessary.
967 upperdirs = os.path.dirname(targetpath)
968 if upperdirs and not os.path.exists(upperdirs):
969 os.makedirs(upperdirs)
970
971 if member.filename[-1] == '/':
972 if not os.path.isdir(targetpath):
973 os.mkdir(targetpath)
974 return targetpath
975
976 source = self.open(member, pwd=pwd)
977 target = file(targetpath, "wb")
978 shutil.copyfileobj(source, target)
979 source.close()
980 target.close()
981
982 return targetpath
983
984 def _writecheck(self, zinfo):
985 """Check for errors before writing a file to the archive."""
986 if zinfo.filename in self.NameToInfo:
987 if self.debug: # Warning for duplicate names
988 print "Duplicate name:", zinfo.filename
989 if self.mode not in ("w", "a"):
990 raise RuntimeError, 'write() requires mode "w" or "a"'
991 if not self.fp:
992 raise RuntimeError, \
993 "Attempt to write ZIP archive that was already closed"
994 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
995 raise RuntimeError, \
996 "Compression requires the (missing) zlib module"
997 if zinfo.compress_type == ZIP_BZIP2 and not bz2:
998 raise RuntimeError, \
999 "Compression requires the (missing) bz2 module"
1000 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED, ZIP_BZIP2):
1001 raise RuntimeError, \
1002 "That compression method is not supported"
1003 if zinfo.file_size > ZIP64_LIMIT:
1004 if not self._allowZip64:
1005 raise LargeZipFile("Filesize would require ZIP64 extensions")
1006 if zinfo.header_offset > ZIP64_LIMIT:
1007 if not self._allowZip64:
1008 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
1009
1010 def write(self, filename, arcname=None, compress_type=None):
1011 """Put the bytes from filename into the archive under the name
1012 arcname."""
1013 if not self.fp:
1014 raise RuntimeError(
1015 "Attempt to write to ZIP archive that was already closed")
1016
1017 st = os.stat(filename)
1018 isdir = stat.S_ISDIR(st.st_mode)
1019 mtime = time.localtime(st.st_mtime)
1020 date_time = mtime[0:6]
1021 # Create ZipInfo instance to store file information
1022 if arcname is None:
1023 arcname = filename
1024 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1025 while arcname[0] in (os.sep, os.altsep):
1026 arcname = arcname[1:]
1027 if isdir:
1028 arcname += '/'
1029 zinfo = ZipInfo(arcname, date_time)
1030 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
1031 if compress_type is None:
1032 zinfo.compress_type = self.compression
1033 else:
1034 zinfo.compress_type = compress_type
1035
1036 zinfo.file_size = st.st_size
1037 zinfo.flag_bits = 0x00
1038 zinfo.header_offset = self.fp.tell() # Start of header bytes
1039
1040 self._writecheck(zinfo)
1041 self._didModify = True
1042
1043 if isdir:
1044 zinfo.file_size = 0
1045 zinfo.compress_size = 0
1046 zinfo.CRC = 0
1047 self.filelist.append(zinfo)
1048 self.NameToInfo[zinfo.filename] = zinfo
1049 self.fp.write(zinfo.FileHeader())
1050 return
1051
1052 with open(filename, "rb") as fp:
1053 # Must overwrite CRC and sizes with correct data later
1054 zinfo.CRC = CRC = 0
1055 zinfo.compress_size = compress_size = 0
1056 zinfo.file_size = file_size = 0
1057 self.fp.write(zinfo.FileHeader())
1058 if zinfo.compress_type == ZIP_DEFLATED:
1059 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1060 zlib.DEFLATED, -15)
1061 elif zinfo.compress_type == ZIP_BZIP2:
1062 cmpr = bz2.BZ2Compressor()
1063 else:
1064 cmpr = None
1065 while 1:
1066 buf = fp.read(1024 * 8)
1067 if not buf:
1068 break
1069 file_size = file_size + len(buf)
1070 CRC = crc32(buf, CRC) & 0xffffffff
1071 if cmpr:
1072 buf = cmpr.compress(buf)
1073 compress_size = compress_size + len(buf)
1074 self.fp.write(buf)
1075 if cmpr:
1076 buf = cmpr.flush()
1077 compress_size = compress_size + len(buf)
1078 self.fp.write(buf)
1079 zinfo.compress_size = compress_size
1080 else:
1081 zinfo.compress_size = file_size
1082 zinfo.CRC = CRC
1083 zinfo.file_size = file_size
1084 # Seek backwards and write CRC and file sizes
1085 position = self.fp.tell() # Preserve current position in file
1086 self.fp.seek(zinfo.header_offset + 14, 0)
1087 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
1088 zinfo.file_size))
1089 self.fp.seek(position, 0)
1090 self.filelist.append(zinfo)
1091 self.NameToInfo[zinfo.filename] = zinfo
1092
1093 def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
1094 """Write a file into the archive. The contents is the string
1095 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1096 the name of the file in the archive."""
1097 if not isinstance(zinfo_or_arcname, ZipInfo):
1098 zinfo = ZipInfo(filename=zinfo_or_arcname,
1099 date_time=time.localtime(time.time())[:6])
1100
1101 zinfo.compress_type = self.compression
1102 zinfo.external_attr = 0600 << 16
1103 else:
1104 zinfo = zinfo_or_arcname
1105
1106 if not self.fp:
1107 raise RuntimeError(
1108 "Attempt to write to ZIP archive that was already closed")
1109
1110 if compress_type is not None:
1111 zinfo.compress_type = compress_type
1112
1113 zinfo.file_size = len(bytes) # Uncompressed size
1114 zinfo.header_offset = self.fp.tell() # Start of header bytes
1115 self._writecheck(zinfo)
1116 self._didModify = True
1117 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
1118 if zinfo.compress_type == ZIP_DEFLATED:
1119 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1120 zlib.DEFLATED, -15)
1121 bytes = co.compress(bytes) + co.flush()
1122 zinfo.compress_size = len(bytes) # Compressed size
1123 elif zinfo.compress_type == ZIP_BZIP2:
1124 co = bz2.BZ2Compressor()
1125 bytes = co.compress(bytes) + co.flush()
1126 zinfo.compress_size = len(bytes) # Compressed size
1127 else:
1128 zinfo.compress_size = zinfo.file_size
1129 zinfo.header_offset = self.fp.tell() # Start of header bytes
1130 self.fp.write(zinfo.FileHeader())
1131 self.fp.write(bytes)
1132 self.fp.flush()
1133 if zinfo.flag_bits & 0x08:
1134 # Write CRC and file sizes after the file data
1135 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
1136 zinfo.file_size))
1137 self.filelist.append(zinfo)
1138 self.NameToInfo[zinfo.filename] = zinfo
1139
1140 def __del__(self):
1141 """Call the "close()" method in case the user forgot."""
1142 self.close()
1143
1144 def close(self):
1145 """Close the file, and for mode "w" and "a" write the ending
1146 records."""
1147 if self.fp is None:
1148 return
1149
1150 if self.mode in ("w", "a") and self._didModify: # write ending records
1151 count = 0
1152 pos1 = self.fp.tell()
1153 for zinfo in self.filelist: # write central directory
1154 count = count + 1
1155 dt = zinfo.date_time
1156 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1157 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1158 extra = []
1159 if zinfo.file_size > ZIP64_LIMIT \
1160 or zinfo.compress_size > ZIP64_LIMIT:
1161 extra.append(zinfo.file_size)
1162 extra.append(zinfo.compress_size)
1163 file_size = 0xffffffff
1164 compress_size = 0xffffffff
1165 else:
1166 file_size = zinfo.file_size
1167 compress_size = zinfo.compress_size
1168
1169 if zinfo.header_offset > ZIP64_LIMIT:
1170 extra.append(zinfo.header_offset)
1171 header_offset = 0xffffffffL
1172 else:
1173 header_offset = zinfo.header_offset
1174
1175 extra_data = zinfo.extra
1176 if extra:
1177 # Append a ZIP64 field to the extra's
1178 extra_data = struct.pack(
1179 '<HH' + 'Q'*len(extra),
1180 1, 8*len(extra), *extra) + extra_data
1181
1182 extract_version = max(45, zinfo.extract_version)
1183 create_version = max(45, zinfo.create_version)
1184 else:
1185 extract_version = zinfo.extract_version
1186 create_version = zinfo.create_version
1187
1188 try:
1189 filename, flag_bits = zinfo._encodeFilenameFlags()
1190 centdir = struct.pack(structCentralDir,
1191 stringCentralDir, create_version,
1192 zinfo.create_system, extract_version, zinfo.reserved,
1193 flag_bits, zinfo.compress_type, dostime, dosdate,
1194 zinfo.CRC, compress_size, file_size,
1195 len(filename), len(extra_data), len(zinfo.comment),
1196 0, zinfo.internal_attr, zinfo.external_attr,
1197 header_offset)
1198 except DeprecationWarning:
1199 print >>sys.stderr, (structCentralDir,
1200 stringCentralDir, create_version,
1201 zinfo.create_system, extract_version, zinfo.reserved,
1202 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1203 zinfo.CRC, compress_size, file_size,
1204 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1205 0, zinfo.internal_attr, zinfo.external_attr,
1206 header_offset)
1207 raise
1208 self.fp.write(centdir)
1209 self.fp.write(filename)
1210 self.fp.write(extra_data)
1211 self.fp.write(zinfo.comment)
1212
1213 pos2 = self.fp.tell()
1214 # Write end-of-zip-archive record
1215 centDirCount = count
1216 centDirSize = pos2 - pos1
1217 centDirOffset = pos1
1218 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1219 centDirOffset > ZIP64_LIMIT or
1220 centDirSize > ZIP64_LIMIT):
1221 # Need to write the ZIP64 end-of-archive records
1222 zip64endrec = struct.pack(
1223 structEndArchive64, stringEndArchive64,
1224 44, 45, 45, 0, 0, centDirCount, centDirCount,
1225 centDirSize, centDirOffset)
1226 self.fp.write(zip64endrec)
1227
1228 zip64locrec = struct.pack(
1229 structEndArchive64Locator,
1230 stringEndArchive64Locator, 0, pos2, 1)
1231 self.fp.write(zip64locrec)
1232 centDirCount = min(centDirCount, 0xFFFF)
1233 centDirSize = min(centDirSize, 0xFFFFFFFF)
1234 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1235
1236 # check for valid comment length
1237 if len(self.comment) >= ZIP_MAX_COMMENT:
1238 if self.debug > 0:
1239 msg = 'Archive comment is too long; truncating to %d bytes' \
1240 % ZIP_MAX_COMMENT
1241 self.comment = self.comment[:ZIP_MAX_COMMENT]
1242
1243 endrec = struct.pack(structEndArchive, stringEndArchive,
1244 0, 0, centDirCount, centDirCount,
1245 centDirSize, centDirOffset, len(self.comment))
1246 self.fp.write(endrec)
1247 self.fp.write(self.comment)
1248 self.fp.flush()
1249
1250 if not self._filePassed:
1251 self.fp.close()
1252 self.fp = None
1253
1254
1255 class PyZipFile(ZipFile):
1256 """Class to create ZIP archives with Python library files and packages."""
1257
1258 def writepy(self, pathname, basename = ""):
1259 """Add all files from "pathname" to the ZIP archive.
1260
1261 If pathname is a package directory, search the directory and
1262 all package subdirectories recursively for all *.py and enter
1263 the modules into the archive. If pathname is a plain
1264 directory, listdir *.py and enter all modules. Else, pathname
1265 must be a Python *.py file and the module will be put into the
1266 archive. Added modules are always module.pyo or module.pyc.
1267 This method will compile the module.py into module.pyc if
1268 necessary.
1269 """
1270 dir, name = os.path.split(pathname)
1271 if os.path.isdir(pathname):
1272 initname = os.path.join(pathname, "__init__.py")
1273 if os.path.isfile(initname):
1274 # This is a package directory, add it
1275 if basename:
1276 basename = "%s/%s" % (basename, name)
1277 else:
1278 basename = name
1279 if self.debug:
1280 print "Adding package in", pathname, "as", basename
1281 fname, arcname = self._get_codename(initname[0:-3], basename)
1282 if self.debug:
1283 print "Adding", arcname
1284 self.write(fname, arcname)
1285 dirlist = os.listdir(pathname)
1286 dirlist.remove("__init__.py")
1287 # Add all *.py files and package subdirectories
1288 for filename in dirlist:
1289 path = os.path.join(pathname, filename)
1290 root, ext = os.path.splitext(filename)
1291 if os.path.isdir(path):
1292 if os.path.isfile(os.path.join(path, "__init__.py")):
1293 # This is a package directory, add it
1294 self.writepy(path, basename) # Recursive call
1295 elif ext == ".py":
1296 fname, arcname = self._get_codename(path[0:-3],
1297 basename)
1298 if self.debug:
1299 print "Adding", arcname
1300 self.write(fname, arcname)
1301 else:
1302 # This is NOT a package directory, add its files at top level
1303 if self.debug:
1304 print "Adding files from directory", pathname
1305 for filename in os.listdir(pathname):
1306 path = os.path.join(pathname, filename)
1307 root, ext = os.path.splitext(filename)
1308 if ext == ".py":
1309 fname, arcname = self._get_codename(path[0:-3],
1310 basename)
1311 if self.debug:
1312 print "Adding", arcname
1313 self.write(fname, arcname)
1314 else:
1315 if pathname[-3:] != ".py":
1316 raise RuntimeError, \
1317 'Files added with writepy() must end with ".py"'
1318 fname, arcname = self._get_codename(pathname[0:-3], basename)
1319 if self.debug:
1320 print "Adding file", arcname
1321 self.write(fname, arcname)
1322
1323 def _get_codename(self, pathname, basename):
1324 """Return (filename, archivename) for the path.
1325
1326 Given a module name path, return the correct file path and
1327 archive name, compiling if necessary. For example, given
1328 /python/lib/string, return (/python/lib/string.pyc, string).
1329 """
1330 file_py = pathname + ".py"
1331 file_pyc = pathname + ".pyc"
1332 file_pyo = pathname + ".pyo"
1333 if os.path.isfile(file_pyo) and \
1334 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
1335 fname = file_pyo # Use .pyo file
1336 elif not os.path.isfile(file_pyc) or \
1337 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
1338 import py_compile
1339 if self.debug:
1340 print "Compiling", file_py
1341 try:
1342 py_compile.compile(file_py, file_pyc, None, True)
1343 except py_compile.PyCompileError,err:
1344 print err.msg
1345 fname = file_pyc
1346 else:
1347 fname = file_pyc
1348 archivename = os.path.split(fname)[1]
1349 if basename:
1350 archivename = "%s/%s" % (basename, archivename)
1351 return (fname, archivename)
1352
1353
1354 def main(args = None):
1355 import textwrap
1356 USAGE=textwrap.dedent("""\
1357 Usage:
1358 zipfile.py -l zipfile.zip # Show listing of a zipfile
1359 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1360 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1361 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1362 """)
1363 if args is None:
1364 args = sys.argv[1:]
1365
1366 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1367 print USAGE
1368 sys.exit(1)
1369
1370 if args[0] == '-l':
1371 if len(args) != 2:
1372 print USAGE
1373 sys.exit(1)
1374 zf = ZipFile(args[1], 'r')
1375 zf.printdir()
1376 zf.close()
1377
1378 elif args[0] == '-t':
1379 if len(args) != 2:
1380 print USAGE
1381 sys.exit(1)
1382 zf = ZipFile(args[1], 'r')
1383 zf.testzip()
1384 print "Done testing"
1385
1386 elif args[0] == '-e':
1387 if len(args) != 3:
1388 print USAGE
1389 sys.exit(1)
1390
1391 zf = ZipFile(args[1], 'r')
1392 out = args[2]
1393 for path in zf.namelist():
1394 if path.startswith('./'):
1395 tgt = os.path.join(out, path[2:])
1396 else:
1397 tgt = os.path.join(out, path)
1398
1399 tgtdir = os.path.dirname(tgt)
1400 if not os.path.exists(tgtdir):
1401 os.makedirs(tgtdir)
1402 with open(tgt, 'wb') as fp:
1403 fp.write(zf.read(path))
1404 zf.close()
1405
1406 elif args[0] == '-c':
1407 if len(args) < 3:
1408 print USAGE
1409 sys.exit(1)
1410
1411 def addToZip(zf, path, zippath):
1412 if os.path.isfile(path):
1413 zf.write(path, zippath, ZIP_DEFLATED)
1414 elif os.path.isdir(path):
1415 for nm in os.listdir(path):
1416 addToZip(zf,
1417 os.path.join(path, nm), os.path.join(zippath, nm))
1418 # else: ignore
1419
1420 zf = ZipFile(args[1], 'w', allowZip64=True)
1421 for src in args[2:]:
1422 addToZip(zf, src, os.path.basename(src))
1423
1424 zf.close()
1425
1426 if __name__ == "__main__":
1427 main()