comparison 2.00/zipfile26.py @ 29:a8cc383b787c

Clean up zipfiles and diff them to stock ones
author Oleg Oshmyan <chortos@inbox.lv>
date Wed, 24 Nov 2010 23:21:31 +0000
parents
children f17f19d9eb0a
comparison
equal deleted inserted replaced
28:3d535503161f 29:a8cc383b787c
1 """
2 Read and write ZIP files.
3 """
4 # Improved by Chortos-2 in 2009 and 2010 (added bzip2 support)
5 import struct, os, time, sys, shutil
6 import binascii, cStringIO, stat
7
8 try:
9 import zlib # We may need its compression method
10 crc32 = zlib.crc32
11 except ImportError:
12 zlib = None
13 crc32 = binascii.crc32
14
15 try:
16 import bz2 # We may need its compression method
17 except ImportError:
18 bz2 = None
19
20 __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
21 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile", "ZIP_BZIP2" ]
22
23 class BadZipfile(Exception):
24 pass
25
26
27 class LargeZipFile(Exception):
28 """
29 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
30 and those extensions are disabled.
31 """
32
33 error = BadZipfile # The exception raised by this module
34
35 ZIP64_LIMIT = (1 << 31) - 1
36 ZIP_FILECOUNT_LIMIT = 1 << 16
37 ZIP_MAX_COMMENT = (1 << 16) - 1
38
39 # constants for Zip file compression methods
40 ZIP_STORED = 0
41 ZIP_DEFLATED = 8
42 ZIP_BZIP2 = 12
43 # Other ZIP compression methods not supported
44
45 # Below are some formats and associated data for reading/writing headers using
46 # the struct module. The names and structures of headers/records are those used
47 # in the PKWARE description of the ZIP file format:
48 # http://www.pkware.com/documents/casestudies/APPNOTE.TXT
49 # (URL valid as of January 2008)
50
51 # The "end of central directory" structure, magic number, size, and indices
52 # (section V.I in the format document)
53 structEndArchive = "<4s4H2LH"
54 stringEndArchive = "PK\005\006"
55 sizeEndCentDir = struct.calcsize(structEndArchive)
56
57 _ECD_SIGNATURE = 0
58 _ECD_DISK_NUMBER = 1
59 _ECD_DISK_START = 2
60 _ECD_ENTRIES_THIS_DISK = 3
61 _ECD_ENTRIES_TOTAL = 4
62 _ECD_SIZE = 5
63 _ECD_OFFSET = 6
64 _ECD_COMMENT_SIZE = 7
65 # These last two indices are not part of the structure as defined in the
66 # spec, but they are used internally by this module as a convenience
67 _ECD_COMMENT = 8
68 _ECD_LOCATION = 9
69
70 # The "central directory" structure, magic number, size, and indices
71 # of entries in the structure (section V.F in the format document)
72 structCentralDir = "<4s4B4HL2L5H2L"
73 stringCentralDir = "PK\001\002"
74 sizeCentralDir = struct.calcsize(structCentralDir)
75
76 # indexes of entries in the central directory structure
77 _CD_SIGNATURE = 0
78 _CD_CREATE_VERSION = 1
79 _CD_CREATE_SYSTEM = 2
80 _CD_EXTRACT_VERSION = 3
81 _CD_EXTRACT_SYSTEM = 4
82 _CD_FLAG_BITS = 5
83 _CD_COMPRESS_TYPE = 6
84 _CD_TIME = 7
85 _CD_DATE = 8
86 _CD_CRC = 9
87 _CD_COMPRESSED_SIZE = 10
88 _CD_UNCOMPRESSED_SIZE = 11
89 _CD_FILENAME_LENGTH = 12
90 _CD_EXTRA_FIELD_LENGTH = 13
91 _CD_COMMENT_LENGTH = 14
92 _CD_DISK_NUMBER_START = 15
93 _CD_INTERNAL_FILE_ATTRIBUTES = 16
94 _CD_EXTERNAL_FILE_ATTRIBUTES = 17
95 _CD_LOCAL_HEADER_OFFSET = 18
96
97 # The "local file header" structure, magic number, size, and indices
98 # (section V.A in the format document)
99 structFileHeader = "<4s2B4HL2L2H"
100 stringFileHeader = "PK\003\004"
101 sizeFileHeader = struct.calcsize(structFileHeader)
102
103 _FH_SIGNATURE = 0
104 _FH_EXTRACT_VERSION = 1
105 _FH_EXTRACT_SYSTEM = 2
106 _FH_GENERAL_PURPOSE_FLAG_BITS = 3
107 _FH_COMPRESSION_METHOD = 4
108 _FH_LAST_MOD_TIME = 5
109 _FH_LAST_MOD_DATE = 6
110 _FH_CRC = 7
111 _FH_COMPRESSED_SIZE = 8
112 _FH_UNCOMPRESSED_SIZE = 9
113 _FH_FILENAME_LENGTH = 10
114 _FH_EXTRA_FIELD_LENGTH = 11
115
116 # The "Zip64 end of central directory locator" structure, magic number, and size
117 structEndArchive64Locator = "<4sLQL"
118 stringEndArchive64Locator = "PK\x06\x07"
119 sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
120
121 # The "Zip64 end of central directory" record, magic number, size, and indices
122 # (section V.G in the format document)
123 structEndArchive64 = "<4sQ2H2L4Q"
124 stringEndArchive64 = "PK\x06\x06"
125 sizeEndCentDir64 = struct.calcsize(structEndArchive64)
126
127 _CD64_SIGNATURE = 0
128 _CD64_DIRECTORY_RECSIZE = 1
129 _CD64_CREATE_VERSION = 2
130 _CD64_EXTRACT_VERSION = 3
131 _CD64_DISK_NUMBER = 4
132 _CD64_DISK_NUMBER_START = 5
133 _CD64_NUMBER_ENTRIES_THIS_DISK = 6
134 _CD64_NUMBER_ENTRIES_TOTAL = 7
135 _CD64_DIRECTORY_SIZE = 8
136 _CD64_OFFSET_START_CENTDIR = 9
137
138 def is_zipfile(filename):
139 """Quickly see if file is a ZIP file by checking the magic number."""
140 try:
141 fpin = open(filename, "rb")
142 endrec = _EndRecData(fpin)
143 fpin.close()
144 if endrec:
145 return True # file has correct magic number
146 except IOError:
147 pass
148 return False
149
150 def _EndRecData64(fpin, offset, endrec):
151 """
152 Read the ZIP64 end-of-archive records and use that to update endrec
153 """
154 fpin.seek(offset - sizeEndCentDir64Locator, 2)
155 data = fpin.read(sizeEndCentDir64Locator)
156 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
157 if sig != stringEndArchive64Locator:
158 return endrec
159
160 if diskno != 0 or disks != 1:
161 raise BadZipfile("zipfiles that span multiple disks are not supported")
162
163 # Assume no 'zip64 extensible data'
164 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
165 data = fpin.read(sizeEndCentDir64)
166 sig, sz, create_version, read_version, disk_num, disk_dir, \
167 dircount, dircount2, dirsize, diroffset = \
168 struct.unpack(structEndArchive64, data)
169 if sig != stringEndArchive64:
170 return endrec
171
172 # Update the original endrec using data from the ZIP64 record
173 endrec[_ECD_SIGNATURE] = sig
174 endrec[_ECD_DISK_NUMBER] = disk_num
175 endrec[_ECD_DISK_START] = disk_dir
176 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
177 endrec[_ECD_ENTRIES_TOTAL] = dircount2
178 endrec[_ECD_SIZE] = dirsize
179 endrec[_ECD_OFFSET] = diroffset
180 return endrec
181
182
183 def _EndRecData(fpin):
184 """Return data from the "End of Central Directory" record, or None.
185
186 The data is a list of the nine items in the ZIP "End of central dir"
187 record followed by a tenth item, the file seek offset of this record."""
188
189 # Determine file size
190 fpin.seek(0, 2)
191 filesize = fpin.tell()
192
193 # Check to see if this is ZIP file with no archive comment (the
194 # "end of central directory" structure should be the last item in the
195 # file if this is the case).
196 try:
197 fpin.seek(-sizeEndCentDir, 2)
198 except IOError:
199 return None
200 data = fpin.read()
201 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
202 # the signature is correct and there's no comment, unpack structure
203 endrec = struct.unpack(structEndArchive, data)
204 endrec=list(endrec)
205
206 # Append a blank comment and record start offset
207 endrec.append("")
208 endrec.append(filesize - sizeEndCentDir)
209
210 # Try to read the "Zip64 end of central directory" structure
211 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
212
213 # Either this is not a ZIP file, or it is a ZIP file with an archive
214 # comment. Search the end of the file for the "end of central directory"
215 # record signature. The comment is the last item in the ZIP file and may be
216 # up to 64K long. It is assumed that the "end of central directory" magic
217 # number does not appear in the comment.
218 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
219 fpin.seek(maxCommentStart, 0)
220 data = fpin.read()
221 start = data.rfind(stringEndArchive)
222 if start >= 0:
223 # found the magic number; attempt to unpack and interpret
224 recData = data[start:start+sizeEndCentDir]
225 endrec = list(struct.unpack(structEndArchive, recData))
226 comment = data[start+sizeEndCentDir:]
227 # check that comment length is correct
228 if endrec[_ECD_COMMENT_SIZE] == len(comment):
229 # Append the archive comment and start offset
230 endrec.append(comment)
231 endrec.append(maxCommentStart + start)
232
233 # Try to read the "Zip64 end of central directory" structure
234 return _EndRecData64(fpin, maxCommentStart + start - filesize,
235 endrec)
236
237 # Unable to find a valid end of central directory structure
238 return
239
240
241 class ZipInfo (object):
242 """Class with attributes describing each file in the ZIP archive."""
243
244 __slots__ = (
245 'orig_filename',
246 'filename',
247 'date_time',
248 'compress_type',
249 'comment',
250 'extra',
251 'create_system',
252 'create_version',
253 'extract_version',
254 'reserved',
255 'flag_bits',
256 'volume',
257 'internal_attr',
258 'external_attr',
259 'header_offset',
260 'CRC',
261 'compress_size',
262 'file_size',
263 '_raw_time',
264 )
265
266 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
267 self.orig_filename = filename # Original file name in archive
268
269 # Terminate the file name at the first null byte. Null bytes in file
270 # names are used as tricks by viruses in archives.
271 null_byte = filename.find(chr(0))
272 if null_byte >= 0:
273 filename = filename[0:null_byte]
274 # This is used to ensure paths in generated ZIP files always use
275 # forward slashes as the directory separator, as required by the
276 # ZIP format specification.
277 if os.sep != "/" and os.sep in filename:
278 filename = filename.replace(os.sep, "/")
279
280 self.filename = filename # Normalized file name
281 self.date_time = date_time # year, month, day, hour, min, sec
282 # Standard values:
283 self.compress_type = ZIP_STORED # Type of compression for the file
284 self.comment = "" # Comment for each file
285 self.extra = "" # ZIP extra data
286 if sys.platform == 'win32':
287 self.create_system = 0 # System which created ZIP archive
288 else:
289 # Assume everything else is unix-y
290 self.create_system = 3 # System which created ZIP archive
291 self.create_version = 20 # Version which created ZIP archive
292 self.extract_version = 20 # Version needed to extract archive
293 self.reserved = 0 # Must be zero
294 self.flag_bits = 0 # ZIP flag bits
295 self.volume = 0 # Volume number of file header
296 self.internal_attr = 0 # Internal attributes
297 self.external_attr = 0 # External file attributes
298 # Other attributes are set by class ZipFile:
299 # header_offset Byte offset to the file header
300 # CRC CRC-32 of the uncompressed file
301 # compress_size Size of the compressed file
302 # file_size Size of the uncompressed file
303
304 def FileHeader(self):
305 """Return the per-file header as a string."""
306 dt = self.date_time
307 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
308 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
309 if self.flag_bits & 0x08:
310 # Set these to zero because we write them after the file data
311 CRC = compress_size = file_size = 0
312 else:
313 CRC = self.CRC
314 compress_size = self.compress_size
315 file_size = self.file_size
316
317 extra = self.extra
318
319 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
320 # File is larger than what fits into a 4 byte integer,
321 # fall back to the ZIP64 extension
322 fmt = '<HHQQ'
323 extra = extra + struct.pack(fmt,
324 1, struct.calcsize(fmt)-4, file_size, compress_size)
325 file_size = 0xffffffff
326 compress_size = 0xffffffff
327 self.extract_version = max(45, self.extract_version)
328 self.create_version = max(45, self.extract_version)
329
330 filename, flag_bits = self._encodeFilenameFlags()
331 header = struct.pack(structFileHeader, stringFileHeader,
332 self.extract_version, self.reserved, flag_bits,
333 self.compress_type, dostime, dosdate, CRC,
334 compress_size, file_size,
335 len(filename), len(extra))
336 return header + filename + extra
337
338 def _encodeFilenameFlags(self):
339 if isinstance(self.filename, unicode):
340 try:
341 return self.filename.encode('ascii'), self.flag_bits
342 except UnicodeEncodeError:
343 return self.filename.encode('utf-8'), self.flag_bits | 0x800
344 else:
345 return self.filename, self.flag_bits
346
347 def _decodeFilename(self):
348 if self.flag_bits & 0x800:
349 return self.filename.decode('utf-8')
350 else:
351 return self.filename
352
353 def _decodeExtra(self):
354 # Try to decode the extra field.
355 extra = self.extra
356 unpack = struct.unpack
357 while extra:
358 tp, ln = unpack('<HH', extra[:4])
359 if tp == 1:
360 if ln >= 24:
361 counts = unpack('<QQQ', extra[4:28])
362 elif ln == 16:
363 counts = unpack('<QQ', extra[4:20])
364 elif ln == 8:
365 counts = unpack('<Q', extra[4:12])
366 elif ln == 0:
367 counts = ()
368 else:
369 raise RuntimeError, "Corrupt extra field %s"%(ln,)
370
371 idx = 0
372
373 # ZIP64 extension (large files and/or large archives)
374 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
375 self.file_size = counts[idx]
376 idx += 1
377
378 if self.compress_size == 0xFFFFFFFFL:
379 self.compress_size = counts[idx]
380 idx += 1
381
382 if self.header_offset == 0xffffffffL:
383 old = self.header_offset
384 self.header_offset = counts[idx]
385 idx+=1
386
387 extra = extra[ln+4:]
388
389
390 class _ZipDecrypter:
391 """Class to handle decryption of files stored within a ZIP archive.
392
393 ZIP supports a password-based form of encryption. Even though known
394 plaintext attacks have been found against it, it is still useful
395 to be able to get data out of such a file.
396
397 Usage:
398 zd = _ZipDecrypter(mypwd)
399 plain_char = zd(cypher_char)
400 plain_text = map(zd, cypher_text)
401 """
402
403 def _GenerateCRCTable():
404 """Generate a CRC-32 table.
405
406 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
407 internal keys. We noticed that a direct implementation is faster than
408 relying on binascii.crc32().
409 """
410 poly = 0xedb88320
411 table = [0] * 256
412 for i in range(256):
413 crc = i
414 for j in range(8):
415 if crc & 1:
416 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
417 else:
418 crc = ((crc >> 1) & 0x7FFFFFFF)
419 table[i] = crc
420 return table
421 crctable = _GenerateCRCTable()
422
423 def _crc32(self, ch, crc):
424 """Compute the CRC32 primitive on one byte."""
425 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
426
427 def __init__(self, pwd):
428 self.key0 = 305419896
429 self.key1 = 591751049
430 self.key2 = 878082192
431 for p in pwd:
432 self._UpdateKeys(p)
433
434 def _UpdateKeys(self, c):
435 self.key0 = self._crc32(c, self.key0)
436 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
437 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
438 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
439
440 def __call__(self, c):
441 """Decrypt a single character."""
442 c = ord(c)
443 k = self.key2 | 2
444 c = c ^ (((k * (k^1)) >> 8) & 255)
445 c = chr(c)
446 self._UpdateKeys(c)
447 return c
448
449 class ZipExtFile:
450 """File-like object for reading an archive member.
451 Is returned by ZipFile.open().
452 """
453
454 def __init__(self, fileobj, zipinfo, decrypt=None):
455 self.fileobj = fileobj
456 self.decrypter = decrypt
457 self.bytes_read = 0L
458 self.rawbuffer = ''
459 self.readbuffer = ''
460 self.linebuffer = ''
461 self.eof = False
462 self.univ_newlines = False
463 self.nlSeps = ("\n", )
464 self.lastdiscard = ''
465
466 self.compress_type = zipinfo.compress_type
467 self.compress_size = zipinfo.compress_size
468
469 self.closed = False
470 self.mode = "r"
471 self.name = zipinfo.filename
472
473 # read from compressed files in 64k blocks
474 self.compreadsize = 64*1024
475 if self.compress_type == ZIP_DEFLATED:
476 self.dc = zlib.decompressobj(-15)
477 elif self.compress_type == ZIP_BZIP2:
478 self.dc = bz2.BZ2Decompressor()
479 self.compreadsize = 900000
480
481 def set_univ_newlines(self, univ_newlines):
482 self.univ_newlines = univ_newlines
483
484 # pick line separator char(s) based on universal newlines flag
485 self.nlSeps = ("\n", )
486 if self.univ_newlines:
487 self.nlSeps = ("\r\n", "\r", "\n")
488
489 def __iter__(self):
490 return self
491
492 def next(self):
493 nextline = self.readline()
494 if not nextline:
495 raise StopIteration()
496
497 return nextline
498
499 def close(self):
500 self.closed = True
501
502 def _checkfornewline(self):
503 nl, nllen = -1, -1
504 if self.linebuffer:
505 # ugly check for cases where half of an \r\n pair was
506 # read on the last pass, and the \r was discarded. In this
507 # case we just throw away the \n at the start of the buffer.
508 if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'):
509 self.linebuffer = self.linebuffer[1:]
510
511 for sep in self.nlSeps:
512 nl = self.linebuffer.find(sep)
513 if nl >= 0:
514 nllen = len(sep)
515 return nl, nllen
516
517 return nl, nllen
518
519 def readline(self, size = -1):
520 """Read a line with approx. size. If size is negative,
521 read a whole line.
522 """
523 if size < 0:
524 size = sys.maxint
525 elif size == 0:
526 return ''
527
528 # check for a newline already in buffer
529 nl, nllen = self._checkfornewline()
530
531 if nl >= 0:
532 # the next line was already in the buffer
533 nl = min(nl, size)
534 else:
535 # no line break in buffer - try to read more
536 size -= len(self.linebuffer)
537 while nl < 0 and size > 0:
538 buf = self.read(min(size, 100))
539 if not buf:
540 break
541 self.linebuffer += buf
542 size -= len(buf)
543
544 # check for a newline in buffer
545 nl, nllen = self._checkfornewline()
546
547 # we either ran out of bytes in the file, or
548 # met the specified size limit without finding a newline,
549 # so return current buffer
550 if nl < 0:
551 s = self.linebuffer
552 self.linebuffer = ''
553 return s
554
555 buf = self.linebuffer[:nl]
556 self.lastdiscard = self.linebuffer[nl:nl + nllen]
557 self.linebuffer = self.linebuffer[nl + nllen:]
558
559 # line is always returned with \n as newline char (except possibly
560 # for a final incomplete line in the file, which is handled above).
561 return buf + "\n"
562
563 def readlines(self, sizehint = -1):
564 """Return a list with all (following) lines. The sizehint parameter
565 is ignored in this implementation.
566 """
567 result = []
568 while True:
569 line = self.readline()
570 if not line: break
571 result.append(line)
572 return result
573
574 def read(self, size = None):
575 # act like file() obj and return empty string if size is 0
576 if size == 0:
577 return ''
578
579 # determine read size
580 bytesToRead = self.compress_size - self.bytes_read
581
582 # adjust read size for encrypted files since the first 12 bytes
583 # are for the encryption/password information
584 if self.decrypter is not None:
585 bytesToRead -= 12
586
587 if size is not None and size >= 0:
588 if self.compress_type == ZIP_STORED:
589 lr = len(self.readbuffer)
590 bytesToRead = min(bytesToRead, size - lr)
591 else:
592 if len(self.readbuffer) > size:
593 # the user has requested fewer bytes than we've already
594 # pulled through the decompressor; don't read any more
595 bytesToRead = 0
596 else:
597 # user will use up the buffer, so read some more
598 lr = len(self.rawbuffer)
599 bytesToRead = min(bytesToRead, self.compreadsize - lr)
600
601 # avoid reading past end of file contents
602 if bytesToRead + self.bytes_read > self.compress_size:
603 bytesToRead = self.compress_size - self.bytes_read
604
605 # try to read from file (if necessary)
606 if bytesToRead > 0:
607 bytes = self.fileobj.read(bytesToRead)
608 self.bytes_read += len(bytes)
609 self.rawbuffer += bytes
610
611 # handle contents of raw buffer
612 if self.rawbuffer:
613 newdata = self.rawbuffer
614 self.rawbuffer = ''
615
616 # decrypt new data if we were given an object to handle that
617 if newdata and self.decrypter is not None:
618 newdata = ''.join(map(self.decrypter, newdata))
619
620 # decompress newly read data if necessary
621 if newdata and self.compress_type != ZIP_STORED:
622 newdata = self.dc.decompress(newdata)
623 self.rawbuffer = self.dc.unconsumed_tail if self.compress_type == ZIP_DEFLATED else ''
624 if self.eof and len(self.rawbuffer) == 0:
625 # we're out of raw bytes (both from the file and
626 # the local buffer); flush just to make sure the
627 # decompressor is done
628 if hasattr(self.dc, 'flush'):
629 newdata += self.dc.flush()
630 # prevent decompressor from being used again
631 self.dc = None
632
633 self.readbuffer += newdata
634
635
636 # return what the user asked for
637 if size is None or len(self.readbuffer) <= size:
638 bytes = self.readbuffer
639 self.readbuffer = ''
640 else:
641 bytes = self.readbuffer[:size]
642 self.readbuffer = self.readbuffer[size:]
643
644 return bytes
645
646
647 class ZipFile:
648 """ Class with methods to open, read, write, close, list zip files.
649
650 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
651
652 file: Either the path to the file, or a file-like object.
653 If it is a path, the file will be opened and closed by ZipFile.
654 mode: The mode can be either read "r", write "w" or append "a".
655 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
656 or ZIP_BZIP2 (requires bz2).
657 allowZip64: if True ZipFile will create files with ZIP64 extensions when
658 needed, otherwise it will raise an exception when this would
659 be necessary.
660
661 """
662
663 fp = None # Set here since __del__ checks it
664
665 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
666 """Open the ZIP file with mode read "r", write "w" or append "a"."""
667 if mode not in ("r", "w", "a"):
668 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
669
670 if compression == ZIP_STORED:
671 pass
672 elif compression == ZIP_DEFLATED:
673 if not zlib:
674 raise RuntimeError,\
675 "Compression requires the (missing) zlib module"
676 elif compression == ZIP_BZIP2:
677 if not bz2:
678 raise RuntimeError,\
679 "Compression requires the (missing) bz2 module"
680 else:
681 raise RuntimeError, "That compression method is not supported"
682
683 self._allowZip64 = allowZip64
684 self._didModify = False
685 self.debug = 0 # Level of printing: 0 through 3
686 self.NameToInfo = {} # Find file info given name
687 self.filelist = [] # List of ZipInfo instances for archive
688 self.compression = compression # Method of compression
689 self.mode = key = mode.replace('b', '')[0]
690 self.pwd = None
691 self.comment = ''
692
693 # Check if we were passed a file-like object
694 if isinstance(file, basestring):
695 self._filePassed = 0
696 self.filename = file
697 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
698 try:
699 self.fp = open(file, modeDict[mode])
700 except IOError:
701 if mode == 'a':
702 mode = key = 'w'
703 self.fp = open(file, modeDict[mode])
704 else:
705 raise
706 else:
707 self._filePassed = 1
708 self.fp = file
709 self.filename = getattr(file, 'name', None)
710
711 if key == 'r':
712 self._GetContents()
713 elif key == 'w':
714 pass
715 elif key == 'a':
716 try: # See if file is a zip file
717 self._RealGetContents()
718 # seek to start of directory and overwrite
719 self.fp.seek(self.start_dir, 0)
720 except BadZipfile: # file is not a zip file, just append
721 self.fp.seek(0, 2)
722 else:
723 if not self._filePassed:
724 self.fp.close()
725 self.fp = None
726 raise RuntimeError, 'Mode must be "r", "w" or "a"'
727
728 def _GetContents(self):
729 """Read the directory, making sure we close the file if the format
730 is bad."""
731 try:
732 self._RealGetContents()
733 except BadZipfile:
734 if not self._filePassed:
735 self.fp.close()
736 self.fp = None
737 raise
738
739 def _RealGetContents(self):
740 """Read in the table of contents for the ZIP file."""
741 fp = self.fp
742 endrec = _EndRecData(fp)
743 if not endrec:
744 raise BadZipfile, "File is not a zip file"
745 if self.debug > 1:
746 print endrec
747 size_cd = endrec[_ECD_SIZE] # bytes in central directory
748 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
749 self.comment = endrec[_ECD_COMMENT] # archive comment
750
751 # "concat" is zero, unless zip was concatenated to another file
752 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
753 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
754 # If Zip64 extension structures are present, account for them
755 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
756
757 if self.debug > 2:
758 inferred = concat + offset_cd
759 print "given, inferred, offset", offset_cd, inferred, concat
760 # self.start_dir: Position of start of central directory
761 self.start_dir = offset_cd + concat
762 fp.seek(self.start_dir, 0)
763 data = fp.read(size_cd)
764 fp = cStringIO.StringIO(data)
765 total = 0
766 while total < size_cd:
767 centdir = fp.read(sizeCentralDir)
768 if centdir[0:4] != stringCentralDir:
769 raise BadZipfile, "Bad magic number for central directory"
770 centdir = struct.unpack(structCentralDir, centdir)
771 if self.debug > 2:
772 print centdir
773 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
774 # Create ZipInfo instance to store file information
775 x = ZipInfo(filename)
776 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
777 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
778 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
779 (x.create_version, x.create_system, x.extract_version, x.reserved,
780 x.flag_bits, x.compress_type, t, d,
781 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
782 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
783 # Convert date/time code to (year, month, day, hour, min, sec)
784 x._raw_time = t
785 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
786 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
787
788 x._decodeExtra()
789 x.header_offset = x.header_offset + concat
790 x.filename = x._decodeFilename()
791 self.filelist.append(x)
792 self.NameToInfo[x.filename] = x
793
794 # update total bytes read from central directory
795 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
796 + centdir[_CD_EXTRA_FIELD_LENGTH]
797 + centdir[_CD_COMMENT_LENGTH])
798
799 if self.debug > 2:
800 print "total", total
801
802
803 def namelist(self):
804 """Return a list of file names in the archive."""
805 l = []
806 for data in self.filelist:
807 l.append(data.filename)
808 return l
809
810 def infolist(self):
811 """Return a list of class ZipInfo instances for files in the
812 archive."""
813 return self.filelist
814
815 def printdir(self):
816 """Print a table of contents for the zip file."""
817 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
818 for zinfo in self.filelist:
819 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
820 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
821
822 def testzip(self):
823 """Read all the files and check the CRC."""
824 chunk_size = 2 ** 20
825 for zinfo in self.filelist:
826 try:
827 # Read by chunks, to avoid an OverflowError or a
828 # MemoryError with very large embedded files.
829 f = self.open(zinfo.filename, "r")
830 while f.read(chunk_size): # Check CRC-32
831 pass
832 except BadZipfile:
833 return zinfo.filename
834
835 def getinfo(self, name):
836 """Return the instance of ZipInfo given 'name'."""
837 info = self.NameToInfo.get(name)
838 if info is None:
839 raise KeyError(
840 'There is no item named %r in the archive' % name)
841
842 return info
843
844 def setpassword(self, pwd):
845 """Set default password for encrypted files."""
846 self.pwd = pwd
847
848 def read(self, name, pwd=None):
849 """Return file bytes (as a string) for name."""
850 return self.open(name, "r", pwd).read()
851
852 def open(self, name, mode="r", pwd=None):
853 """Return file-like object for 'name'."""
854 if mode not in ("r", "U", "rU"):
855 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
856 if not self.fp:
857 raise RuntimeError, \
858 "Attempt to read ZIP archive that was already closed"
859
860 # Only open a new file for instances where we were not
861 # given a file object in the constructor
862 if self._filePassed:
863 zef_file = self.fp
864 else:
865 zef_file = open(self.filename, 'rb')
866
867 # Make sure we have an info object
868 if isinstance(name, ZipInfo):
869 # 'name' is already an info object
870 zinfo = name
871 else:
872 # Get info object for name
873 zinfo = self.getinfo(name)
874
875 zef_file.seek(zinfo.header_offset, 0)
876
877 # Skip the file header:
878 fheader = zef_file.read(sizeFileHeader)
879 if fheader[0:4] != stringFileHeader:
880 raise BadZipfile, "Bad magic number for file header"
881
882 fheader = struct.unpack(structFileHeader, fheader)
883 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
884 if fheader[_FH_EXTRA_FIELD_LENGTH]:
885 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
886
887 if fname != zinfo.orig_filename:
888 raise BadZipfile, \
889 'File name in directory "%s" and header "%s" differ.' % (
890 zinfo.orig_filename, fname)
891
892 # check for encrypted flag & handle password
893 is_encrypted = zinfo.flag_bits & 0x1
894 zd = None
895 if is_encrypted:
896 if not pwd:
897 pwd = self.pwd
898 if not pwd:
899 raise RuntimeError, "File %s is encrypted, " \
900 "password required for extraction" % name
901
902 zd = _ZipDecrypter(pwd)
903 # The first 12 bytes in the cypher stream is an encryption header
904 # used to strengthen the algorithm. The first 11 bytes are
905 # completely random, while the 12th contains the MSB of the CRC,
906 # or the MSB of the file time depending on the header type
907 # and is used to check the correctness of the password.
908 bytes = zef_file.read(12)
909 h = map(zd, bytes[0:12])
910 if zinfo.flag_bits & 0x8:
911 # compare against the file type from extended local headers
912 check_byte = (zinfo._raw_time >> 8) & 0xff
913 else:
914 # compare against the CRC otherwise
915 check_byte = (zinfo.CRC >> 24) & 0xff
916 if ord(h[11]) != check_byte:
917 raise RuntimeError("Bad password for file", name)
918
919 # build and return a ZipExtFile
920 if zd is None:
921 zef = ZipExtFile(zef_file, zinfo)
922 else:
923 zef = ZipExtFile(zef_file, zinfo, zd)
924
925 # set universal newlines on ZipExtFile if necessary
926 if "U" in mode:
927 zef.set_univ_newlines(True)
928 return zef
929
930 def extract(self, member, path=None, pwd=None):
931 """Extract a member from the archive to the current working directory,
932 using its full name. Its file information is extracted as accurately
933 as possible. `member' may be a filename or a ZipInfo object. You can
934 specify a different directory using `path'.
935 """
936 if not isinstance(member, ZipInfo):
937 member = self.getinfo(member)
938
939 if path is None:
940 path = os.getcwd()
941
942 return self._extract_member(member, path, pwd)
943
944 def extractall(self, path=None, members=None, pwd=None):
945 """Extract all members from the archive to the current working
946 directory. `path' specifies a different directory to extract to.
947 `members' is optional and must be a subset of the list returned
948 by namelist().
949 """
950 if members is None:
951 members = self.namelist()
952
953 for zipinfo in members:
954 self.extract(zipinfo, path, pwd)
955
956 def _extract_member(self, member, targetpath, pwd):
957 """Extract the ZipInfo object 'member' to a physical
958 file on the path targetpath.
959 """
960 # build the destination pathname, replacing
961 # forward slashes to platform specific separators.
962 # Strip trailing path separator, unless it represents the root.
963 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
964 and len(os.path.splitdrive(targetpath)[1]) > 1):
965 targetpath = targetpath[:-1]
966
967 # don't include leading "/" from file name if present
968 if member.filename[0] == '/':
969 targetpath = os.path.join(targetpath, member.filename[1:])
970 else:
971 targetpath = os.path.join(targetpath, member.filename)
972
973 targetpath = os.path.normpath(targetpath)
974
975 # Create all upper directories if necessary.
976 upperdirs = os.path.dirname(targetpath)
977 if upperdirs and not os.path.exists(upperdirs):
978 os.makedirs(upperdirs)
979
980 if member.filename[-1] == '/':
981 if not os.path.isdir(targetpath):
982 os.mkdir(targetpath)
983 return targetpath
984
985 source = self.open(member, pwd=pwd)
986 target = file(targetpath, "wb")
987 shutil.copyfileobj(source, target)
988 source.close()
989 target.close()
990
991 return targetpath
992
993 def _writecheck(self, zinfo):
994 """Check for errors before writing a file to the archive."""
995 if zinfo.filename in self.NameToInfo:
996 if self.debug: # Warning for duplicate names
997 print "Duplicate name:", zinfo.filename
998 if self.mode not in ("w", "a"):
999 raise RuntimeError, 'write() requires mode "w" or "a"'
1000 if not self.fp:
1001 raise RuntimeError, \
1002 "Attempt to write ZIP archive that was already closed"
1003 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
1004 raise RuntimeError, \
1005 "Compression requires the (missing) zlib module"
1006 if zinfo.compress_type == ZIP_BZIP2 and not bz2:
1007 raise RuntimeError, \
1008 "Compression requires the (missing) bz2 module"
1009 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED, ZIP_BZIP2):
1010 raise RuntimeError, \
1011 "That compression method is not supported"
1012 if zinfo.file_size > ZIP64_LIMIT:
1013 if not self._allowZip64:
1014 raise LargeZipFile("Filesize would require ZIP64 extensions")
1015 if zinfo.header_offset > ZIP64_LIMIT:
1016 if not self._allowZip64:
1017 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
1018
1019 def write(self, filename, arcname=None, compress_type=None):
1020 """Put the bytes from filename into the archive under the name
1021 arcname."""
1022 if not self.fp:
1023 raise RuntimeError(
1024 "Attempt to write to ZIP archive that was already closed")
1025
1026 st = os.stat(filename)
1027 isdir = stat.S_ISDIR(st.st_mode)
1028 mtime = time.localtime(st.st_mtime)
1029 date_time = mtime[0:6]
1030 # Create ZipInfo instance to store file information
1031 if arcname is None:
1032 arcname = filename
1033 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1034 while arcname[0] in (os.sep, os.altsep):
1035 arcname = arcname[1:]
1036 if isdir:
1037 arcname += '/'
1038 zinfo = ZipInfo(arcname, date_time)
1039 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
1040 if compress_type is None:
1041 zinfo.compress_type = self.compression
1042 else:
1043 zinfo.compress_type = compress_type
1044
1045 zinfo.file_size = st.st_size
1046 zinfo.flag_bits = 0x00
1047 zinfo.header_offset = self.fp.tell() # Start of header bytes
1048
1049 self._writecheck(zinfo)
1050 self._didModify = True
1051
1052 if isdir:
1053 zinfo.file_size = 0
1054 zinfo.compress_size = 0
1055 zinfo.CRC = 0
1056 self.filelist.append(zinfo)
1057 self.NameToInfo[zinfo.filename] = zinfo
1058 self.fp.write(zinfo.FileHeader())
1059 return
1060
1061 fp = open(filename, "rb")
1062 # Must overwrite CRC and sizes with correct data later
1063 zinfo.CRC = CRC = 0
1064 zinfo.compress_size = compress_size = 0
1065 zinfo.file_size = file_size = 0
1066 self.fp.write(zinfo.FileHeader())
1067 if zinfo.compress_type == ZIP_DEFLATED:
1068 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1069 zlib.DEFLATED, -15)
1070 elif zinfo.compress_type == ZIP_BZIP2:
1071 cmpr = bz2.BZ2Compressor()
1072 else:
1073 cmpr = None
1074 while 1:
1075 buf = fp.read(1024 * 8)
1076 if not buf:
1077 break
1078 file_size = file_size + len(buf)
1079 CRC = crc32(buf, CRC) & 0xffffffff
1080 if cmpr:
1081 buf = cmpr.compress(buf)
1082 compress_size = compress_size + len(buf)
1083 self.fp.write(buf)
1084 fp.close()
1085 if cmpr:
1086 buf = cmpr.flush()
1087 compress_size = compress_size + len(buf)
1088 self.fp.write(buf)
1089 zinfo.compress_size = compress_size
1090 else:
1091 zinfo.compress_size = file_size
1092 zinfo.CRC = CRC
1093 zinfo.file_size = file_size
1094 # Seek backwards and write CRC and file sizes
1095 position = self.fp.tell() # Preserve current position in file
1096 self.fp.seek(zinfo.header_offset + 14, 0)
1097 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
1098 zinfo.file_size))
1099 self.fp.seek(position, 0)
1100 self.filelist.append(zinfo)
1101 self.NameToInfo[zinfo.filename] = zinfo
1102
1103 def writestr(self, zinfo_or_arcname, bytes):
1104 """Write a file into the archive. The contents is the string
1105 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1106 the name of the file in the archive."""
1107 if not isinstance(zinfo_or_arcname, ZipInfo):
1108 zinfo = ZipInfo(filename=zinfo_or_arcname,
1109 date_time=time.localtime(time.time())[:6])
1110 zinfo.compress_type = self.compression
1111 zinfo.external_attr = 0600 << 16
1112 else:
1113 zinfo = zinfo_or_arcname
1114
1115 if not self.fp:
1116 raise RuntimeError(
1117 "Attempt to write to ZIP archive that was already closed")
1118
1119 zinfo.file_size = len(bytes) # Uncompressed size
1120 zinfo.header_offset = self.fp.tell() # Start of header bytes
1121 self._writecheck(zinfo)
1122 self._didModify = True
1123 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
1124 if zinfo.compress_type == ZIP_DEFLATED:
1125 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1126 zlib.DEFLATED, -15)
1127 bytes = co.compress(bytes) + co.flush()
1128 zinfo.compress_size = len(bytes) # Compressed size
1129 elif zinfo.compress_type == ZIP_BZIP2:
1130 co = bz2.BZ2Compressor()
1131 bytes = co.compress(bytes) + co.flush()
1132 zinfo.compress_size = len(bytes) # Compressed size
1133 else:
1134 zinfo.compress_size = zinfo.file_size
1135 zinfo.header_offset = self.fp.tell() # Start of header bytes
1136 self.fp.write(zinfo.FileHeader())
1137 self.fp.write(bytes)
1138 self.fp.flush()
1139 if zinfo.flag_bits & 0x08:
1140 # Write CRC and file sizes after the file data
1141 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
1142 zinfo.file_size))
1143 self.filelist.append(zinfo)
1144 self.NameToInfo[zinfo.filename] = zinfo
1145
1146 def __del__(self):
1147 """Call the "close()" method in case the user forgot."""
1148 self.close()
1149
1150 def close(self):
1151 """Close the file, and for mode "w" and "a" write the ending
1152 records."""
1153 if self.fp is None:
1154 return
1155
1156 if self.mode in ("w", "a") and self._didModify: # write ending records
1157 count = 0
1158 pos1 = self.fp.tell()
1159 for zinfo in self.filelist: # write central directory
1160 count = count + 1
1161 dt = zinfo.date_time
1162 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1163 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1164 extra = []
1165 if zinfo.file_size > ZIP64_LIMIT \
1166 or zinfo.compress_size > ZIP64_LIMIT:
1167 extra.append(zinfo.file_size)
1168 extra.append(zinfo.compress_size)
1169 file_size = 0xffffffff
1170 compress_size = 0xffffffff
1171 else:
1172 file_size = zinfo.file_size
1173 compress_size = zinfo.compress_size
1174
1175 if zinfo.header_offset > ZIP64_LIMIT:
1176 extra.append(zinfo.header_offset)
1177 header_offset = 0xffffffffL
1178 else:
1179 header_offset = zinfo.header_offset
1180
1181 extra_data = zinfo.extra
1182 if extra:
1183 # Append a ZIP64 field to the extra's
1184 extra_data = struct.pack(
1185 '<HH' + 'Q'*len(extra),
1186 1, 8*len(extra), *extra) + extra_data
1187
1188 extract_version = max(45, zinfo.extract_version)
1189 create_version = max(45, zinfo.create_version)
1190 else:
1191 extract_version = zinfo.extract_version
1192 create_version = zinfo.create_version
1193
1194 try:
1195 filename, flag_bits = zinfo._encodeFilenameFlags()
1196 centdir = struct.pack(structCentralDir,
1197 stringCentralDir, create_version,
1198 zinfo.create_system, extract_version, zinfo.reserved,
1199 flag_bits, zinfo.compress_type, dostime, dosdate,
1200 zinfo.CRC, compress_size, file_size,
1201 len(filename), len(extra_data), len(zinfo.comment),
1202 0, zinfo.internal_attr, zinfo.external_attr,
1203 header_offset)
1204 except DeprecationWarning:
1205 print >>sys.stderr, (structCentralDir,
1206 stringCentralDir, create_version,
1207 zinfo.create_system, extract_version, zinfo.reserved,
1208 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1209 zinfo.CRC, compress_size, file_size,
1210 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1211 0, zinfo.internal_attr, zinfo.external_attr,
1212 header_offset)
1213 raise
1214 self.fp.write(centdir)
1215 self.fp.write(filename)
1216 self.fp.write(extra_data)
1217 self.fp.write(zinfo.comment)
1218
1219 pos2 = self.fp.tell()
1220 # Write end-of-zip-archive record
1221 centDirCount = count
1222 centDirSize = pos2 - pos1
1223 centDirOffset = pos1
1224 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1225 centDirOffset > ZIP64_LIMIT or
1226 centDirSize > ZIP64_LIMIT):
1227 # Need to write the ZIP64 end-of-archive records
1228 zip64endrec = struct.pack(
1229 structEndArchive64, stringEndArchive64,
1230 44, 45, 45, 0, 0, centDirCount, centDirCount,
1231 centDirSize, centDirOffset)
1232 self.fp.write(zip64endrec)
1233
1234 zip64locrec = struct.pack(
1235 structEndArchive64Locator,
1236 stringEndArchive64Locator, 0, pos2, 1)
1237 self.fp.write(zip64locrec)
1238 centDirCount = min(centDirCount, 0xFFFF)
1239 centDirSize = min(centDirSize, 0xFFFFFFFF)
1240 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1241
1242 # check for valid comment length
1243 if len(self.comment) >= ZIP_MAX_COMMENT:
1244 if self.debug > 0:
1245 msg = 'Archive comment is too long; truncating to %d bytes' \
1246 % ZIP_MAX_COMMENT
1247 self.comment = self.comment[:ZIP_MAX_COMMENT]
1248
1249 endrec = struct.pack(structEndArchive, stringEndArchive,
1250 0, 0, centDirCount, centDirCount,
1251 centDirSize, centDirOffset, len(self.comment))
1252 self.fp.write(endrec)
1253 self.fp.write(self.comment)
1254 self.fp.flush()
1255
1256 if not self._filePassed:
1257 self.fp.close()
1258 self.fp = None
1259
1260
1261 class PyZipFile(ZipFile):
1262 """Class to create ZIP archives with Python library files and packages."""
1263
1264 def writepy(self, pathname, basename = ""):
1265 """Add all files from "pathname" to the ZIP archive.
1266
1267 If pathname is a package directory, search the directory and
1268 all package subdirectories recursively for all *.py and enter
1269 the modules into the archive. If pathname is a plain
1270 directory, listdir *.py and enter all modules. Else, pathname
1271 must be a Python *.py file and the module will be put into the
1272 archive. Added modules are always module.pyo or module.pyc.
1273 This method will compile the module.py into module.pyc if
1274 necessary.
1275 """
1276 dir, name = os.path.split(pathname)
1277 if os.path.isdir(pathname):
1278 initname = os.path.join(pathname, "__init__.py")
1279 if os.path.isfile(initname):
1280 # This is a package directory, add it
1281 if basename:
1282 basename = "%s/%s" % (basename, name)
1283 else:
1284 basename = name
1285 if self.debug:
1286 print "Adding package in", pathname, "as", basename
1287 fname, arcname = self._get_codename(initname[0:-3], basename)
1288 if self.debug:
1289 print "Adding", arcname
1290 self.write(fname, arcname)
1291 dirlist = os.listdir(pathname)
1292 dirlist.remove("__init__.py")
1293 # Add all *.py files and package subdirectories
1294 for filename in dirlist:
1295 path = os.path.join(pathname, filename)
1296 root, ext = os.path.splitext(filename)
1297 if os.path.isdir(path):
1298 if os.path.isfile(os.path.join(path, "__init__.py")):
1299 # This is a package directory, add it
1300 self.writepy(path, basename) # Recursive call
1301 elif ext == ".py":
1302 fname, arcname = self._get_codename(path[0:-3],
1303 basename)
1304 if self.debug:
1305 print "Adding", arcname
1306 self.write(fname, arcname)
1307 else:
1308 # This is NOT a package directory, add its files at top level
1309 if self.debug:
1310 print "Adding files from directory", pathname
1311 for filename in os.listdir(pathname):
1312 path = os.path.join(pathname, filename)
1313 root, ext = os.path.splitext(filename)
1314 if ext == ".py":
1315 fname, arcname = self._get_codename(path[0:-3],
1316 basename)
1317 if self.debug:
1318 print "Adding", arcname
1319 self.write(fname, arcname)
1320 else:
1321 if pathname[-3:] != ".py":
1322 raise RuntimeError, \
1323 'Files added with writepy() must end with ".py"'
1324 fname, arcname = self._get_codename(pathname[0:-3], basename)
1325 if self.debug:
1326 print "Adding file", arcname
1327 self.write(fname, arcname)
1328
1329 def _get_codename(self, pathname, basename):
1330 """Return (filename, archivename) for the path.
1331
1332 Given a module name path, return the correct file path and
1333 archive name, compiling if necessary. For example, given
1334 /python/lib/string, return (/python/lib/string.pyc, string).
1335 """
1336 file_py = pathname + ".py"
1337 file_pyc = pathname + ".pyc"
1338 file_pyo = pathname + ".pyo"
1339 if os.path.isfile(file_pyo) and \
1340 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
1341 fname = file_pyo # Use .pyo file
1342 elif not os.path.isfile(file_pyc) or \
1343 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
1344 import py_compile
1345 if self.debug:
1346 print "Compiling", file_py
1347 try:
1348 py_compile.compile(file_py, file_pyc, None, True)
1349 except py_compile.PyCompileError,err:
1350 print err.msg
1351 fname = file_pyc
1352 else:
1353 fname = file_pyc
1354 archivename = os.path.split(fname)[1]
1355 if basename:
1356 archivename = "%s/%s" % (basename, archivename)
1357 return (fname, archivename)
1358
1359
1360 def main(args = None):
1361 import textwrap
1362 USAGE=textwrap.dedent("""\
1363 Usage:
1364 zipfile.py -l zipfile.zip # Show listing of a zipfile
1365 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1366 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1367 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1368 """)
1369 if args is None:
1370 args = sys.argv[1:]
1371
1372 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1373 print USAGE
1374 sys.exit(1)
1375
1376 if args[0] == '-l':
1377 if len(args) != 2:
1378 print USAGE
1379 sys.exit(1)
1380 zf = ZipFile(args[1], 'r')
1381 zf.printdir()
1382 zf.close()
1383
1384 elif args[0] == '-t':
1385 if len(args) != 2:
1386 print USAGE
1387 sys.exit(1)
1388 zf = ZipFile(args[1], 'r')
1389 zf.testzip()
1390 print "Done testing"
1391
1392 elif args[0] == '-e':
1393 if len(args) != 3:
1394 print USAGE
1395 sys.exit(1)
1396
1397 zf = ZipFile(args[1], 'r')
1398 out = args[2]
1399 for path in zf.namelist():
1400 if path.startswith('./'):
1401 tgt = os.path.join(out, path[2:])
1402 else:
1403 tgt = os.path.join(out, path)
1404
1405 tgtdir = os.path.dirname(tgt)
1406 if not os.path.exists(tgtdir):
1407 os.makedirs(tgtdir)
1408 fp = open(tgt, 'wb')
1409 fp.write(zf.read(path))
1410 fp.close()
1411 zf.close()
1412
1413 elif args[0] == '-c':
1414 if len(args) < 3:
1415 print USAGE
1416 sys.exit(1)
1417
1418 def addToZip(zf, path, zippath):
1419 if os.path.isfile(path):
1420 zf.write(path, zippath, ZIP_DEFLATED)
1421 elif os.path.isdir(path):
1422 for nm in os.listdir(path):
1423 addToZip(zf,
1424 os.path.join(path, nm), os.path.join(zippath, nm))
1425 # else: ignore
1426
1427 zf = ZipFile(args[1], 'w', allowZip64=True)
1428 for src in args[2:]:
1429 addToZip(zf, src, os.path.basename(src))
1430
1431 zf.close()
1432
1433 if __name__ == "__main__":
1434 main()