Mercurial > ~astiob > upreckon > hgweb
annotate zipfiles/zipfile31.py @ 246:1bc89faac941 2.04
Fixed: match='re' could produce duplicate test identifiers
files.Files.regexp(pattern) now makes sure to return only one
metafile for each matching virtual path, namely, the one that would
be returned for that virtual path by files.Files.from_virtual_path.
author | Oleg Oshmyan <chortos@inbox.lv> |
---|---|
date | Thu, 03 Oct 2013 01:19:09 +0300 |
parents | b993d9257400 |
children |
rev | line source |
---|---|
21 | 1 """ |
2 Read and write ZIP files. | |
3 | |
4 XXX references to utf-8 need further investigation. | |
5 """ | |
6 # Improved by Chortos-2 in 2010 (added bzip2 support) | |
7 import struct, os, time, sys, shutil | |
8 import binascii, io, stat | |
9 | |
10 try: | |
11 import zlib # We may need its compression method | |
12 crc32 = zlib.crc32 | |
13 except ImportError: | |
14 zlib = None | |
15 crc32 = binascii.crc32 | |
16 | |
17 try: | |
18 import bz2 # We may need its compression method | |
19 except ImportError: | |
20 bz2 = None | |
21 | |
22 __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile", | |
23 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile", "ZIP_BZIP2" ] | |
24 | |
25 class BadZipfile(Exception): | |
26 pass | |
27 | |
28 | |
29 class LargeZipFile(Exception): | |
30 """ | |
31 Raised when writing a zipfile, the zipfile requires ZIP64 extensions | |
32 and those extensions are disabled. | |
33 """ | |
34 | |
35 error = BadZipfile # The exception raised by this module | |
36 | |
37 ZIP64_LIMIT = (1 << 31) - 1 | |
38 ZIP_FILECOUNT_LIMIT = 1 << 16 | |
39 ZIP_MAX_COMMENT = (1 << 16) - 1 | |
40 | |
41 # constants for Zip file compression methods | |
42 ZIP_STORED = 0 | |
43 ZIP_DEFLATED = 8 | |
44 ZIP_BZIP2 = 12 | |
45 # Other ZIP compression methods not supported | |
46 | |
47 # Below are some formats and associated data for reading/writing headers using | |
48 # the struct module. The names and structures of headers/records are those used | |
49 # in the PKWARE description of the ZIP file format: | |
50 # http://www.pkware.com/documents/casestudies/APPNOTE.TXT | |
51 # (URL valid as of January 2008) | |
52 | |
53 # The "end of central directory" structure, magic number, size, and indices | |
54 # (section V.I in the format document) | |
55 structEndArchive = b"<4s4H2LH" | |
56 stringEndArchive = b"PK\005\006" | |
57 sizeEndCentDir = struct.calcsize(structEndArchive) | |
58 | |
59 _ECD_SIGNATURE = 0 | |
60 _ECD_DISK_NUMBER = 1 | |
61 _ECD_DISK_START = 2 | |
62 _ECD_ENTRIES_THIS_DISK = 3 | |
63 _ECD_ENTRIES_TOTAL = 4 | |
64 _ECD_SIZE = 5 | |
65 _ECD_OFFSET = 6 | |
66 _ECD_COMMENT_SIZE = 7 | |
67 # These last two indices are not part of the structure as defined in the | |
68 # spec, but they are used internally by this module as a convenience | |
69 _ECD_COMMENT = 8 | |
70 _ECD_LOCATION = 9 | |
71 | |
72 # The "central directory" structure, magic number, size, and indices | |
73 # of entries in the structure (section V.F in the format document) | |
74 structCentralDir = "<4s4B4HL2L5H2L" | |
75 stringCentralDir = b"PK\001\002" | |
76 sizeCentralDir = struct.calcsize(structCentralDir) | |
77 | |
78 # indexes of entries in the central directory structure | |
79 _CD_SIGNATURE = 0 | |
80 _CD_CREATE_VERSION = 1 | |
81 _CD_CREATE_SYSTEM = 2 | |
82 _CD_EXTRACT_VERSION = 3 | |
83 _CD_EXTRACT_SYSTEM = 4 | |
84 _CD_FLAG_BITS = 5 | |
85 _CD_COMPRESS_TYPE = 6 | |
86 _CD_TIME = 7 | |
87 _CD_DATE = 8 | |
88 _CD_CRC = 9 | |
89 _CD_COMPRESSED_SIZE = 10 | |
90 _CD_UNCOMPRESSED_SIZE = 11 | |
91 _CD_FILENAME_LENGTH = 12 | |
92 _CD_EXTRA_FIELD_LENGTH = 13 | |
93 _CD_COMMENT_LENGTH = 14 | |
94 _CD_DISK_NUMBER_START = 15 | |
95 _CD_INTERNAL_FILE_ATTRIBUTES = 16 | |
96 _CD_EXTERNAL_FILE_ATTRIBUTES = 17 | |
97 _CD_LOCAL_HEADER_OFFSET = 18 | |
98 | |
99 # The "local file header" structure, magic number, size, and indices | |
100 # (section V.A in the format document) | |
101 structFileHeader = "<4s2B4HL2L2H" | |
102 stringFileHeader = b"PK\003\004" | |
103 sizeFileHeader = struct.calcsize(structFileHeader) | |
104 | |
105 _FH_SIGNATURE = 0 | |
106 _FH_EXTRACT_VERSION = 1 | |
107 _FH_EXTRACT_SYSTEM = 2 | |
108 _FH_GENERAL_PURPOSE_FLAG_BITS = 3 | |
109 _FH_COMPRESSION_METHOD = 4 | |
110 _FH_LAST_MOD_TIME = 5 | |
111 _FH_LAST_MOD_DATE = 6 | |
112 _FH_CRC = 7 | |
113 _FH_COMPRESSED_SIZE = 8 | |
114 _FH_UNCOMPRESSED_SIZE = 9 | |
115 _FH_FILENAME_LENGTH = 10 | |
116 _FH_EXTRA_FIELD_LENGTH = 11 | |
117 | |
118 # The "Zip64 end of central directory locator" structure, magic number, and size | |
119 structEndArchive64Locator = "<4sLQL" | |
120 stringEndArchive64Locator = b"PK\x06\x07" | |
121 sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator) | |
122 | |
123 # The "Zip64 end of central directory" record, magic number, size, and indices | |
124 # (section V.G in the format document) | |
125 structEndArchive64 = "<4sQ2H2L4Q" | |
126 stringEndArchive64 = b"PK\x06\x06" | |
127 sizeEndCentDir64 = struct.calcsize(structEndArchive64) | |
128 | |
129 _CD64_SIGNATURE = 0 | |
130 _CD64_DIRECTORY_RECSIZE = 1 | |
131 _CD64_CREATE_VERSION = 2 | |
132 _CD64_EXTRACT_VERSION = 3 | |
133 _CD64_DISK_NUMBER = 4 | |
134 _CD64_DISK_NUMBER_START = 5 | |
135 _CD64_NUMBER_ENTRIES_THIS_DISK = 6 | |
136 _CD64_NUMBER_ENTRIES_TOTAL = 7 | |
137 _CD64_DIRECTORY_SIZE = 8 | |
138 _CD64_OFFSET_START_CENTDIR = 9 | |
139 | |
140 def _check_zipfile(fp): | |
141 try: | |
142 if _EndRecData(fp): | |
143 return True # file has correct magic number | |
144 except IOError: | |
145 pass | |
146 return False | |
147 | |
148 def is_zipfile(filename): | |
149 """Quickly see if a file is a ZIP file by checking the magic number. | |
150 | |
151 The filename argument may be a file or file-like object too. | |
152 """ | |
153 result = False | |
154 try: | |
155 if hasattr(filename, "read"): | |
156 result = _check_zipfile(fp=filename) | |
157 else: | |
158 with open(filename, "rb") as fp: | |
159 result = _check_zipfile(fp) | |
160 except IOError: | |
161 pass | |
162 return result | |
163 | |
164 def _EndRecData64(fpin, offset, endrec): | |
165 """ | |
166 Read the ZIP64 end-of-archive records and use that to update endrec | |
167 """ | |
32
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
168 try: |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
169 fpin.seek(offset - sizeEndCentDir64Locator, 2) |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
170 except IOError: |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
171 # If the seek fails, the file is not large enough to contain a ZIP64 |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
172 # end-of-archive record, so just return the end record we were given. |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
173 return endrec |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
174 |
21 | 175 data = fpin.read(sizeEndCentDir64Locator) |
176 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data) | |
177 if sig != stringEndArchive64Locator: | |
178 return endrec | |
179 | |
180 if diskno != 0 or disks != 1: | |
181 raise BadZipfile("zipfiles that span multiple disks are not supported") | |
182 | |
183 # Assume no 'zip64 extensible data' | |
184 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2) | |
185 data = fpin.read(sizeEndCentDir64) | |
186 sig, sz, create_version, read_version, disk_num, disk_dir, \ | |
187 dircount, dircount2, dirsize, diroffset = \ | |
188 struct.unpack(structEndArchive64, data) | |
189 if sig != stringEndArchive64: | |
190 return endrec | |
191 | |
192 # Update the original endrec using data from the ZIP64 record | |
193 endrec[_ECD_SIGNATURE] = sig | |
194 endrec[_ECD_DISK_NUMBER] = disk_num | |
195 endrec[_ECD_DISK_START] = disk_dir | |
196 endrec[_ECD_ENTRIES_THIS_DISK] = dircount | |
197 endrec[_ECD_ENTRIES_TOTAL] = dircount2 | |
198 endrec[_ECD_SIZE] = dirsize | |
199 endrec[_ECD_OFFSET] = diroffset | |
200 return endrec | |
201 | |
202 | |
203 def _EndRecData(fpin): | |
204 """Return data from the "End of Central Directory" record, or None. | |
205 | |
206 The data is a list of the nine items in the ZIP "End of central dir" | |
207 record followed by a tenth item, the file seek offset of this record.""" | |
208 | |
209 # Determine file size | |
210 fpin.seek(0, 2) | |
211 filesize = fpin.tell() | |
212 | |
213 # Check to see if this is ZIP file with no archive comment (the | |
214 # "end of central directory" structure should be the last item in the | |
215 # file if this is the case). | |
216 try: | |
217 fpin.seek(-sizeEndCentDir, 2) | |
218 except IOError: | |
219 return None | |
220 data = fpin.read() | |
221 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000": | |
222 # the signature is correct and there's no comment, unpack structure | |
223 endrec = struct.unpack(structEndArchive, data) | |
224 endrec=list(endrec) | |
225 | |
226 # Append a blank comment and record start offset | |
227 endrec.append(b"") | |
228 endrec.append(filesize - sizeEndCentDir) | |
229 | |
230 # Try to read the "Zip64 end of central directory" structure | |
231 return _EndRecData64(fpin, -sizeEndCentDir, endrec) | |
232 | |
233 # Either this is not a ZIP file, or it is a ZIP file with an archive | |
234 # comment. Search the end of the file for the "end of central directory" | |
235 # record signature. The comment is the last item in the ZIP file and may be | |
236 # up to 64K long. It is assumed that the "end of central directory" magic | |
237 # number does not appear in the comment. | |
238 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0) | |
239 fpin.seek(maxCommentStart, 0) | |
240 data = fpin.read() | |
241 start = data.rfind(stringEndArchive) | |
242 if start >= 0: | |
243 # found the magic number; attempt to unpack and interpret | |
244 recData = data[start:start+sizeEndCentDir] | |
245 endrec = list(struct.unpack(structEndArchive, recData)) | |
246 comment = data[start+sizeEndCentDir:] | |
247 # check that comment length is correct | |
248 if endrec[_ECD_COMMENT_SIZE] == len(comment): | |
249 # Append the archive comment and start offset | |
250 endrec.append(comment) | |
251 endrec.append(maxCommentStart + start) | |
252 | |
253 # Try to read the "Zip64 end of central directory" structure | |
254 return _EndRecData64(fpin, maxCommentStart + start - filesize, | |
255 endrec) | |
256 | |
257 # Unable to find a valid end of central directory structure | |
258 return | |
259 | |
260 | |
261 class ZipInfo (object): | |
262 """Class with attributes describing each file in the ZIP archive.""" | |
263 | |
264 __slots__ = ( | |
265 'orig_filename', | |
266 'filename', | |
267 'date_time', | |
268 'compress_type', | |
269 'comment', | |
270 'extra', | |
271 'create_system', | |
272 'create_version', | |
273 'extract_version', | |
274 'reserved', | |
275 'flag_bits', | |
276 'volume', | |
277 'internal_attr', | |
278 'external_attr', | |
279 'header_offset', | |
280 'CRC', | |
281 'compress_size', | |
282 'file_size', | |
283 '_raw_time', | |
284 ) | |
285 | |
286 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): | |
287 self.orig_filename = filename # Original file name in archive | |
288 | |
289 # Terminate the file name at the first null byte. Null bytes in file | |
290 # names are used as tricks by viruses in archives. | |
291 null_byte = filename.find(chr(0)) | |
292 if null_byte >= 0: | |
293 filename = filename[0:null_byte] | |
294 # This is used to ensure paths in generated ZIP files always use | |
295 # forward slashes as the directory separator, as required by the | |
296 # ZIP format specification. | |
297 if os.sep != "/" and os.sep in filename: | |
298 filename = filename.replace(os.sep, "/") | |
299 | |
300 self.filename = filename # Normalized file name | |
301 self.date_time = date_time # year, month, day, hour, min, sec | |
302 # Standard values: | |
303 self.compress_type = ZIP_STORED # Type of compression for the file | |
304 self.comment = b"" # Comment for each file | |
305 self.extra = b"" # ZIP extra data | |
306 if sys.platform == 'win32': | |
307 self.create_system = 0 # System which created ZIP archive | |
308 else: | |
309 # Assume everything else is unix-y | |
310 self.create_system = 3 # System which created ZIP archive | |
311 self.create_version = 20 # Version which created ZIP archive | |
312 self.extract_version = 20 # Version needed to extract archive | |
313 self.reserved = 0 # Must be zero | |
314 self.flag_bits = 0 # ZIP flag bits | |
315 self.volume = 0 # Volume number of file header | |
316 self.internal_attr = 0 # Internal attributes | |
317 self.external_attr = 0 # External file attributes | |
318 # Other attributes are set by class ZipFile: | |
319 # header_offset Byte offset to the file header | |
320 # CRC CRC-32 of the uncompressed file | |
321 # compress_size Size of the compressed file | |
322 # file_size Size of the uncompressed file | |
323 | |
324 def FileHeader(self): | |
325 """Return the per-file header as a string.""" | |
326 dt = self.date_time | |
327 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] | |
328 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) | |
329 if self.flag_bits & 0x08: | |
330 # Set these to zero because we write them after the file data | |
331 CRC = compress_size = file_size = 0 | |
332 else: | |
333 CRC = self.CRC | |
334 compress_size = self.compress_size | |
335 file_size = self.file_size | |
336 | |
337 extra = self.extra | |
338 | |
339 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT: | |
340 # File is larger than what fits into a 4 byte integer, | |
341 # fall back to the ZIP64 extension | |
342 fmt = '<HHQQ' | |
343 extra = extra + struct.pack(fmt, | |
344 1, struct.calcsize(fmt)-4, file_size, compress_size) | |
345 file_size = 0xffffffff | |
346 compress_size = 0xffffffff | |
347 self.extract_version = max(45, self.extract_version) | |
348 self.create_version = max(45, self.extract_version) | |
349 | |
350 filename, flag_bits = self._encodeFilenameFlags() | |
351 header = struct.pack(structFileHeader, stringFileHeader, | |
352 self.extract_version, self.reserved, flag_bits, | |
353 self.compress_type, dostime, dosdate, CRC, | |
354 compress_size, file_size, | |
355 len(filename), len(extra)) | |
356 return header + filename + extra | |
357 | |
358 def _encodeFilenameFlags(self): | |
359 try: | |
360 return self.filename.encode('ascii'), self.flag_bits | |
361 except UnicodeEncodeError: | |
362 return self.filename.encode('utf-8'), self.flag_bits | 0x800 | |
363 | |
364 def _decodeExtra(self): | |
365 # Try to decode the extra field. | |
366 extra = self.extra | |
367 unpack = struct.unpack | |
368 while extra: | |
369 tp, ln = unpack('<HH', extra[:4]) | |
370 if tp == 1: | |
371 if ln >= 24: | |
372 counts = unpack('<QQQ', extra[4:28]) | |
373 elif ln == 16: | |
374 counts = unpack('<QQ', extra[4:20]) | |
375 elif ln == 8: | |
376 counts = unpack('<Q', extra[4:12]) | |
377 elif ln == 0: | |
378 counts = () | |
379 else: | |
380 raise RuntimeError("Corrupt extra field %s"%(ln,)) | |
381 | |
382 idx = 0 | |
383 | |
384 # ZIP64 extension (large files and/or large archives) | |
385 if self.file_size in (0xffffffffffffffff, 0xffffffff): | |
386 self.file_size = counts[idx] | |
387 idx += 1 | |
388 | |
389 if self.compress_size == 0xFFFFFFFF: | |
390 self.compress_size = counts[idx] | |
391 idx += 1 | |
392 | |
393 if self.header_offset == 0xffffffff: | |
394 old = self.header_offset | |
395 self.header_offset = counts[idx] | |
396 idx+=1 | |
397 | |
398 extra = extra[ln+4:] | |
399 | |
400 | |
401 class _ZipDecrypter: | |
402 """Class to handle decryption of files stored within a ZIP archive. | |
403 | |
404 ZIP supports a password-based form of encryption. Even though known | |
405 plaintext attacks have been found against it, it is still useful | |
406 to be able to get data out of such a file. | |
407 | |
408 Usage: | |
409 zd = _ZipDecrypter(mypwd) | |
410 plain_char = zd(cypher_char) | |
411 plain_text = map(zd, cypher_text) | |
412 """ | |
413 | |
414 def _GenerateCRCTable(): | |
415 """Generate a CRC-32 table. | |
416 | |
417 ZIP encryption uses the CRC32 one-byte primitive for scrambling some | |
418 internal keys. We noticed that a direct implementation is faster than | |
419 relying on binascii.crc32(). | |
420 """ | |
421 poly = 0xedb88320 | |
422 table = [0] * 256 | |
423 for i in range(256): | |
424 crc = i | |
425 for j in range(8): | |
426 if crc & 1: | |
427 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly | |
428 else: | |
429 crc = ((crc >> 1) & 0x7FFFFFFF) | |
430 table[i] = crc | |
431 return table | |
432 crctable = _GenerateCRCTable() | |
433 | |
434 def _crc32(self, ch, crc): | |
435 """Compute the CRC32 primitive on one byte.""" | |
436 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff] | |
437 | |
438 def __init__(self, pwd): | |
439 self.key0 = 305419896 | |
440 self.key1 = 591751049 | |
441 self.key2 = 878082192 | |
442 for p in pwd: | |
443 self._UpdateKeys(p) | |
444 | |
445 def _UpdateKeys(self, c): | |
446 self.key0 = self._crc32(c, self.key0) | |
447 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295 | |
448 self.key1 = (self.key1 * 134775813 + 1) & 4294967295 | |
449 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2) | |
450 | |
451 def __call__(self, c): | |
452 """Decrypt a single character.""" | |
453 assert isinstance(c, int) | |
454 k = self.key2 | 2 | |
455 c = c ^ (((k * (k^1)) >> 8) & 255) | |
456 self._UpdateKeys(c) | |
457 return c | |
458 | |
459 class ZipExtFile: | |
460 """File-like object for reading an archive member. | |
461 Is returned by ZipFile.open(). | |
462 """ | |
463 | |
464 def __init__(self, fileobj, zipinfo, decrypt=None): | |
465 self.fileobj = fileobj | |
466 self.decrypter = decrypt | |
467 self.bytes_read = 0 | |
468 self.rawbuffer = b'' | |
469 self.readbuffer = b'' | |
470 self.linebuffer = b'' | |
471 self.eof = False | |
472 self.univ_newlines = False | |
473 self.nlSeps = (b"\n", ) | |
474 self.lastdiscard = b'' | |
475 | |
476 self.compress_type = zipinfo.compress_type | |
477 self.compress_size = zipinfo.compress_size | |
478 | |
479 self.closed = False | |
480 self.mode = "r" | |
481 self.name = zipinfo.filename | |
482 | |
483 # read from compressed files in 64k blocks | |
484 self.compreadsize = 64*1024 | |
485 if self.compress_type == ZIP_DEFLATED: | |
486 self.dc = zlib.decompressobj(-15) | |
487 elif self.compress_type == ZIP_BZIP2: | |
488 self.dc = bz2.BZ2Decompressor() | |
489 self.compreadsize = 900000 | |
490 | |
29
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
491 if hasattr(zipinfo, 'CRC'): |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
492 self._expected_crc = zipinfo.CRC |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
493 self._running_crc = crc32(b'') & 0xffffffff |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
494 else: |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
495 self._expected_crc = None |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
496 |
21 | 497 def set_univ_newlines(self, univ_newlines): |
498 self.univ_newlines = univ_newlines | |
499 | |
500 # pick line separator char(s) based on universal newlines flag | |
501 self.nlSeps = (b"\n", ) | |
502 if self.univ_newlines: | |
503 self.nlSeps = (b"\r\n", b"\r", b"\n") | |
504 | |
505 def __iter__(self): | |
506 return self | |
507 | |
508 def __next__(self): | |
509 nextline = self.readline() | |
510 if not nextline: | |
511 raise StopIteration() | |
512 | |
513 return nextline | |
514 | |
515 def close(self): | |
516 self.closed = True | |
517 | |
518 def _checkfornewline(self): | |
519 nl, nllen = -1, -1 | |
520 if self.linebuffer: | |
521 # ugly check for cases where half of an \r\n pair was | |
522 # read on the last pass, and the \r was discarded. In this | |
523 # case we just throw away the \n at the start of the buffer. | |
524 if (self.lastdiscard, self.linebuffer[:1]) == (b'\r', b'\n'): | |
525 self.linebuffer = self.linebuffer[1:] | |
526 | |
527 for sep in self.nlSeps: | |
528 nl = self.linebuffer.find(sep) | |
529 if nl >= 0: | |
530 nllen = len(sep) | |
531 return nl, nllen | |
532 | |
533 return nl, nllen | |
534 | |
535 def readline(self, size = -1): | |
536 """Read a line with approx. size. If size is negative, | |
537 read a whole line. | |
538 """ | |
539 if size < 0: | |
540 size = sys.maxsize | |
541 elif size == 0: | |
542 return b'' | |
543 | |
544 # check for a newline already in buffer | |
545 nl, nllen = self._checkfornewline() | |
546 | |
547 if nl >= 0: | |
548 # the next line was already in the buffer | |
549 nl = min(nl, size) | |
550 else: | |
551 # no line break in buffer - try to read more | |
552 size -= len(self.linebuffer) | |
553 while nl < 0 and size > 0: | |
554 buf = self.read(min(size, 100)) | |
555 if not buf: | |
556 break | |
557 self.linebuffer += buf | |
558 size -= len(buf) | |
559 | |
560 # check for a newline in buffer | |
561 nl, nllen = self._checkfornewline() | |
562 | |
563 # we either ran out of bytes in the file, or | |
564 # met the specified size limit without finding a newline, | |
565 # so return current buffer | |
566 if nl < 0: | |
567 s = self.linebuffer | |
568 self.linebuffer = b'' | |
569 return s | |
570 | |
571 buf = self.linebuffer[:nl] | |
572 self.lastdiscard = self.linebuffer[nl:nl + nllen] | |
573 self.linebuffer = self.linebuffer[nl + nllen:] | |
574 | |
575 # line is always returned with \n as newline char (except possibly | |
576 # for a final incomplete line in the file, which is handled above). | |
577 return buf + b"\n" | |
578 | |
579 def readlines(self, sizehint = -1): | |
580 """Return a list with all (following) lines. The sizehint parameter | |
581 is ignored in this implementation. | |
582 """ | |
583 result = [] | |
584 while True: | |
585 line = self.readline() | |
586 if not line: break | |
587 result.append(line) | |
588 return result | |
589 | |
29
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
590 def _update_crc(self, newdata, eof): |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
591 # Update the CRC using the given data. |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
592 if self._expected_crc is None: |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
593 # No need to compute the CRC if we don't have a reference value |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
594 return |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
595 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
596 # Check the CRC if we're at the end of the file |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
597 if eof and self._running_crc != self._expected_crc: |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
598 raise BadZipfile("Bad CRC-32 for file %r" % self.name) |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
599 |
21 | 600 def read(self, size = None): |
601 # act like file obj and return empty string if size is 0 | |
602 if size == 0: | |
603 return b'' | |
604 | |
605 # determine read size | |
606 bytesToRead = self.compress_size - self.bytes_read | |
607 | |
608 # adjust read size for encrypted files since the first 12 bytes | |
609 # are for the encryption/password information | |
610 if self.decrypter is not None: | |
611 bytesToRead -= 12 | |
612 | |
613 if size is not None and size >= 0: | |
614 if self.compress_type == ZIP_STORED: | |
615 lr = len(self.readbuffer) | |
616 bytesToRead = min(bytesToRead, size - lr) | |
617 else: | |
618 if len(self.readbuffer) > size: | |
619 # the user has requested fewer bytes than we've already | |
620 # pulled through the decompressor; don't read any more | |
621 bytesToRead = 0 | |
622 else: | |
623 # user will use up the buffer, so read some more | |
624 lr = len(self.rawbuffer) | |
625 bytesToRead = min(bytesToRead, self.compreadsize - lr) | |
626 | |
627 # avoid reading past end of file contents | |
628 if bytesToRead + self.bytes_read > self.compress_size: | |
629 bytesToRead = self.compress_size - self.bytes_read | |
630 | |
631 # try to read from file (if necessary) | |
632 if bytesToRead > 0: | |
633 data = self.fileobj.read(bytesToRead) | |
634 self.bytes_read += len(data) | |
635 try: | |
636 self.rawbuffer += data | |
637 except: | |
638 print(repr(self.fileobj), repr(self.rawbuffer), | |
639 repr(data)) | |
640 raise | |
641 | |
642 # handle contents of raw buffer | |
643 if self.rawbuffer: | |
644 newdata = self.rawbuffer | |
645 self.rawbuffer = b'' | |
646 | |
647 # decrypt new data if we were given an object to handle that | |
648 if newdata and self.decrypter is not None: | |
649 newdata = bytes(map(self.decrypter, newdata)) | |
650 | |
651 # decompress newly read data if necessary | |
652 if newdata and self.compress_type != ZIP_STORED: | |
653 newdata = self.dc.decompress(newdata) | |
654 self.rawbuffer = self.dc.unconsumed_tail if self.compress_type == ZIP_DEFLATED else '' | |
655 if self.eof and len(self.rawbuffer) == 0: | |
656 # we're out of raw bytes (both from the file and | |
657 # the local buffer); flush just to make sure the | |
658 # decompressor is done | |
28 | 659 try: |
21 | 660 newdata += self.dc.flush() |
28 | 661 except AttributeError: |
662 pass | |
21 | 663 # prevent decompressor from being used again |
664 self.dc = None | |
665 | |
29
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
666 self._update_crc(newdata, eof=( |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
667 self.compress_size == self.bytes_read and |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
668 len(self.rawbuffer) == 0)) |
21 | 669 self.readbuffer += newdata |
670 | |
671 # return what the user asked for | |
672 if size is None or len(self.readbuffer) <= size: | |
673 data = self.readbuffer | |
674 self.readbuffer = b'' | |
675 else: | |
676 data = self.readbuffer[:size] | |
677 self.readbuffer = self.readbuffer[size:] | |
678 | |
679 return data | |
680 | |
681 | |
682 class ZipFile: | |
683 """ Class with methods to open, read, write, close, list zip files. | |
684 | |
685 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False) | |
686 | |
687 file: Either the path to the file, or a file-like object. | |
688 If it is a path, the file will be opened and closed by ZipFile. | |
689 mode: The mode can be either read "r", write "w" or append "a". | |
690 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib), | |
691 or ZIP_BZIP2 (requires bz2). | |
692 allowZip64: if True ZipFile will create files with ZIP64 extensions when | |
693 needed, otherwise it will raise an exception when this would | |
694 be necessary. | |
695 | |
696 """ | |
697 | |
698 fp = None # Set here since __del__ checks it | |
699 | |
700 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False): | |
701 """Open the ZIP file with mode read "r", write "w" or append "a".""" | |
702 if mode not in ("r", "w", "a"): | |
703 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"') | |
704 | |
705 if compression == ZIP_STORED: | |
706 pass | |
707 elif compression == ZIP_DEFLATED: | |
708 if not zlib: | |
709 raise RuntimeError( | |
710 "Compression requires the (missing) zlib module") | |
711 elif compression == ZIP_BZIP2: | |
712 if not bz2: | |
713 raise RuntimeError( | |
714 "Compression requires the (missing) bz2 module") | |
715 else: | |
716 raise RuntimeError("That compression method is not supported") | |
717 | |
718 self._allowZip64 = allowZip64 | |
719 self._didModify = False | |
720 self.debug = 0 # Level of printing: 0 through 3 | |
721 self.NameToInfo = {} # Find file info given name | |
722 self.filelist = [] # List of ZipInfo instances for archive | |
723 self.compression = compression # Method of compression | |
724 self.mode = key = mode.replace('b', '')[0] | |
725 self.pwd = None | |
726 self.comment = b'' | |
727 | |
728 # Check if we were passed a file-like object | |
729 if isinstance(file, str): | |
730 # No, it's a filename | |
731 self._filePassed = 0 | |
732 self.filename = file | |
733 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'} | |
734 try: | |
735 self.fp = io.open(file, modeDict[mode]) | |
736 except IOError: | |
737 if mode == 'a': | |
738 mode = key = 'w' | |
739 self.fp = io.open(file, modeDict[mode]) | |
740 else: | |
741 raise | |
742 else: | |
743 self._filePassed = 1 | |
744 self.fp = file | |
745 self.filename = getattr(file, 'name', None) | |
746 | |
747 if key == 'r': | |
748 self._GetContents() | |
749 elif key == 'w': | |
32
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
750 # set the modified flag so central directory gets written |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
751 # even if no files are added to the archive |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
752 self._didModify = True |
21 | 753 elif key == 'a': |
32
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
754 try: |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
755 # See if file is a zip file |
21 | 756 self._RealGetContents() |
757 # seek to start of directory and overwrite | |
758 self.fp.seek(self.start_dir, 0) | |
32
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
759 except BadZipfile: |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
760 # file is not a zip file, just append |
21 | 761 self.fp.seek(0, 2) |
32
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
762 |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
763 # set the modified flag so central directory gets written |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
764 # even if no files are added to the archive |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
765 self._didModify = True |
21 | 766 else: |
767 if not self._filePassed: | |
768 self.fp.close() | |
769 self.fp = None | |
770 raise RuntimeError('Mode must be "r", "w" or "a"') | |
771 | |
772 def _GetContents(self): | |
773 """Read the directory, making sure we close the file if the format | |
774 is bad.""" | |
775 try: | |
776 self._RealGetContents() | |
777 except BadZipfile: | |
778 if not self._filePassed: | |
779 self.fp.close() | |
780 self.fp = None | |
781 raise | |
782 | |
783 def _RealGetContents(self): | |
784 """Read in the table of contents for the ZIP file.""" | |
785 fp = self.fp | |
32
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
786 try: |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
787 endrec = _EndRecData(fp) |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
788 except IOError: |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
789 raise BadZipfile("File is not a zip file") |
21 | 790 if not endrec: |
791 raise BadZipfile("File is not a zip file") | |
792 if self.debug > 1: | |
793 print(endrec) | |
794 size_cd = endrec[_ECD_SIZE] # bytes in central directory | |
795 offset_cd = endrec[_ECD_OFFSET] # offset of central directory | |
796 self.comment = endrec[_ECD_COMMENT] # archive comment | |
797 | |
798 # "concat" is zero, unless zip was concatenated to another file | |
799 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd | |
800 if endrec[_ECD_SIGNATURE] == stringEndArchive64: | |
801 # If Zip64 extension structures are present, account for them | |
802 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) | |
803 | |
804 if self.debug > 2: | |
805 inferred = concat + offset_cd | |
806 print("given, inferred, offset", offset_cd, inferred, concat) | |
807 # self.start_dir: Position of start of central directory | |
808 self.start_dir = offset_cd + concat | |
809 fp.seek(self.start_dir, 0) | |
810 data = fp.read(size_cd) | |
811 fp = io.BytesIO(data) | |
812 total = 0 | |
813 while total < size_cd: | |
814 centdir = fp.read(sizeCentralDir) | |
815 if centdir[0:4] != stringCentralDir: | |
816 raise BadZipfile("Bad magic number for central directory") | |
817 centdir = struct.unpack(structCentralDir, centdir) | |
818 if self.debug > 2: | |
819 print(centdir) | |
820 filename = fp.read(centdir[_CD_FILENAME_LENGTH]) | |
821 flags = centdir[5] | |
822 if flags & 0x800: | |
823 # UTF-8 file names extension | |
824 filename = filename.decode('utf-8') | |
825 else: | |
826 # Historical ZIP filename encoding | |
827 filename = filename.decode('cp437') | |
828 # Create ZipInfo instance to store file information | |
829 x = ZipInfo(filename) | |
830 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) | |
831 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) | |
832 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] | |
833 (x.create_version, x.create_system, x.extract_version, x.reserved, | |
834 x.flag_bits, x.compress_type, t, d, | |
835 x.CRC, x.compress_size, x.file_size) = centdir[1:12] | |
836 x.volume, x.internal_attr, x.external_attr = centdir[15:18] | |
837 # Convert date/time code to (year, month, day, hour, min, sec) | |
838 x._raw_time = t | |
839 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F, | |
840 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 ) | |
841 | |
842 x._decodeExtra() | |
843 x.header_offset = x.header_offset + concat | |
844 self.filelist.append(x) | |
845 self.NameToInfo[x.filename] = x | |
846 | |
847 # update total bytes read from central directory | |
848 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH] | |
849 + centdir[_CD_EXTRA_FIELD_LENGTH] | |
850 + centdir[_CD_COMMENT_LENGTH]) | |
851 | |
852 if self.debug > 2: | |
853 print("total", total) | |
854 | |
855 | |
856 def namelist(self): | |
857 """Return a list of file names in the archive.""" | |
858 l = [] | |
859 for data in self.filelist: | |
860 l.append(data.filename) | |
861 return l | |
862 | |
863 def infolist(self): | |
864 """Return a list of class ZipInfo instances for files in the | |
865 archive.""" | |
866 return self.filelist | |
867 | |
868 def printdir(self, file=None): | |
869 """Print a table of contents for the zip file.""" | |
870 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"), | |
871 file=file) | |
872 for zinfo in self.filelist: | |
873 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6] | |
874 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size), | |
875 file=file) | |
876 | |
877 def testzip(self): | |
878 """Read all the files and check the CRC.""" | |
879 chunk_size = 2 ** 20 | |
880 for zinfo in self.filelist: | |
881 try: | |
882 # Read by chunks, to avoid an OverflowError or a | |
883 # MemoryError with very large embedded files. | |
884 f = self.open(zinfo.filename, "r") | |
885 while f.read(chunk_size): # Check CRC-32 | |
886 pass | |
887 except BadZipfile: | |
888 return zinfo.filename | |
889 | |
890 def getinfo(self, name): | |
891 """Return the instance of ZipInfo given 'name'.""" | |
892 info = self.NameToInfo.get(name) | |
893 if info is None: | |
894 raise KeyError( | |
895 'There is no item named %r in the archive' % name) | |
896 | |
897 return info | |
898 | |
899 def setpassword(self, pwd): | |
900 """Set default password for encrypted files.""" | |
170 | 901 if pwd and not isinstance(pwd, bytes): |
902 raise TypeError("pwd: expected bytes, got %s" % type(pwd)) | |
903 if pwd: | |
904 self.pwd = pwd | |
905 else: | |
906 self.pwd = None | |
21 | 907 |
908 def read(self, name, pwd=None): | |
909 """Return file bytes (as a string) for name.""" | |
910 return self.open(name, "r", pwd).read() | |
911 | |
912 def open(self, name, mode="r", pwd=None): | |
913 """Return file-like object for 'name'.""" | |
914 if mode not in ("r", "U", "rU"): | |
915 raise RuntimeError('open() requires mode "r", "U", or "rU"') | |
170 | 916 if pwd and not isinstance(pwd, bytes): |
917 raise TypeError("pwd: expected bytes, got %s" % type(pwd)) | |
21 | 918 if not self.fp: |
919 raise RuntimeError( | |
920 "Attempt to read ZIP archive that was already closed") | |
921 | |
922 # Only open a new file for instances where we were not | |
923 # given a file object in the constructor | |
924 if self._filePassed: | |
925 zef_file = self.fp | |
926 else: | |
927 zef_file = io.open(self.filename, 'rb') | |
928 | |
929 # Make sure we have an info object | |
930 if isinstance(name, ZipInfo): | |
931 # 'name' is already an info object | |
932 zinfo = name | |
933 else: | |
934 # Get info object for name | |
935 zinfo = self.getinfo(name) | |
936 | |
937 zef_file.seek(zinfo.header_offset, 0) | |
938 | |
939 # Skip the file header: | |
940 fheader = zef_file.read(sizeFileHeader) | |
941 if fheader[0:4] != stringFileHeader: | |
942 raise BadZipfile("Bad magic number for file header") | |
943 | |
944 fheader = struct.unpack(structFileHeader, fheader) | |
945 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH]) | |
946 if fheader[_FH_EXTRA_FIELD_LENGTH]: | |
947 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) | |
948 | |
170 | 949 if zinfo.flag_bits & 0x800: |
950 # UTF-8 filename | |
951 fname_str = fname.decode("utf-8") | |
952 else: | |
953 fname_str = fname.decode("cp437") | |
954 | |
955 if fname_str != zinfo.orig_filename: | |
21 | 956 raise BadZipfile( |
957 'File name in directory %r and header %r differ.' | |
958 % (zinfo.orig_filename, fname)) | |
959 | |
960 # check for encrypted flag & handle password | |
961 is_encrypted = zinfo.flag_bits & 0x1 | |
962 zd = None | |
963 if is_encrypted: | |
964 if not pwd: | |
965 pwd = self.pwd | |
966 if not pwd: | |
967 raise RuntimeError("File %s is encrypted, " | |
968 "password required for extraction" % name) | |
969 | |
970 zd = _ZipDecrypter(pwd) | |
971 # The first 12 bytes in the cypher stream is an encryption header | |
972 # used to strengthen the algorithm. The first 11 bytes are | |
973 # completely random, while the 12th contains the MSB of the CRC, | |
974 # or the MSB of the file time depending on the header type | |
975 # and is used to check the correctness of the password. | |
170 | 976 header = zef_file.read(12) |
977 h = list(map(zd, header[0:12])) | |
21 | 978 if zinfo.flag_bits & 0x8: |
979 # compare against the file type from extended local headers | |
980 check_byte = (zinfo._raw_time >> 8) & 0xff | |
981 else: | |
982 # compare against the CRC otherwise | |
983 check_byte = (zinfo.CRC >> 24) & 0xff | |
984 if h[11] != check_byte: | |
985 raise RuntimeError("Bad password for file", name) | |
986 | |
987 # build and return a ZipExtFile | |
988 if zd is None: | |
989 zef = ZipExtFile(zef_file, zinfo) | |
990 else: | |
991 zef = ZipExtFile(zef_file, zinfo, zd) | |
992 | |
993 # set universal newlines on ZipExtFile if necessary | |
994 if "U" in mode: | |
995 zef.set_univ_newlines(True) | |
996 return zef | |
997 | |
998 def extract(self, member, path=None, pwd=None): | |
999 """Extract a member from the archive to the current working directory, | |
1000 using its full name. Its file information is extracted as accurately | |
1001 as possible. `member' may be a filename or a ZipInfo object. You can | |
1002 specify a different directory using `path'. | |
1003 """ | |
1004 if not isinstance(member, ZipInfo): | |
1005 member = self.getinfo(member) | |
1006 | |
1007 if path is None: | |
1008 path = os.getcwd() | |
1009 | |
1010 return self._extract_member(member, path, pwd) | |
1011 | |
1012 def extractall(self, path=None, members=None, pwd=None): | |
1013 """Extract all members from the archive to the current working | |
1014 directory. `path' specifies a different directory to extract to. | |
1015 `members' is optional and must be a subset of the list returned | |
1016 by namelist(). | |
1017 """ | |
1018 if members is None: | |
1019 members = self.namelist() | |
1020 | |
1021 for zipinfo in members: | |
1022 self.extract(zipinfo, path, pwd) | |
1023 | |
1024 def _extract_member(self, member, targetpath, pwd): | |
1025 """Extract the ZipInfo object 'member' to a physical | |
1026 file on the path targetpath. | |
1027 """ | |
1028 # build the destination pathname, replacing | |
1029 # forward slashes to platform specific separators. | |
1030 # Strip trailing path separator, unless it represents the root. | |
1031 if (targetpath[-1:] in (os.path.sep, os.path.altsep) | |
1032 and len(os.path.splitdrive(targetpath)[1]) > 1): | |
1033 targetpath = targetpath[:-1] | |
1034 | |
1035 # don't include leading "/" from file name if present | |
1036 if member.filename[0] == '/': | |
1037 targetpath = os.path.join(targetpath, member.filename[1:]) | |
1038 else: | |
1039 targetpath = os.path.join(targetpath, member.filename) | |
1040 | |
1041 targetpath = os.path.normpath(targetpath) | |
1042 | |
1043 # Create all upper directories if necessary. | |
1044 upperdirs = os.path.dirname(targetpath) | |
1045 if upperdirs and not os.path.exists(upperdirs): | |
1046 os.makedirs(upperdirs) | |
1047 | |
1048 if member.filename[-1] == '/': | |
1049 if not os.path.isdir(targetpath): | |
1050 os.mkdir(targetpath) | |
1051 return targetpath | |
1052 | |
1053 source = self.open(member, pwd=pwd) | |
1054 target = open(targetpath, "wb") | |
1055 shutil.copyfileobj(source, target) | |
1056 source.close() | |
1057 target.close() | |
1058 | |
1059 return targetpath | |
1060 | |
1061 def _writecheck(self, zinfo): | |
1062 """Check for errors before writing a file to the archive.""" | |
1063 if zinfo.filename in self.NameToInfo: | |
1064 if self.debug: # Warning for duplicate names | |
1065 print("Duplicate name:", zinfo.filename) | |
1066 if self.mode not in ("w", "a"): | |
1067 raise RuntimeError('write() requires mode "w" or "a"') | |
1068 if not self.fp: | |
1069 raise RuntimeError( | |
1070 "Attempt to write ZIP archive that was already closed") | |
1071 if zinfo.compress_type == ZIP_DEFLATED and not zlib: | |
1072 raise RuntimeError( | |
1073 "Compression requires the (missing) zlib module") | |
1074 if zinfo.compress_type == ZIP_BZIP2 and not bz2: | |
1075 raise RuntimeError( | |
1076 "Compression requires the (missing) bz2 module") | |
1077 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED, ZIP_BZIP2): | |
1078 raise RuntimeError("That compression method is not supported") | |
1079 if zinfo.file_size > ZIP64_LIMIT: | |
1080 if not self._allowZip64: | |
1081 raise LargeZipFile("Filesize would require ZIP64 extensions") | |
1082 if zinfo.header_offset > ZIP64_LIMIT: | |
1083 if not self._allowZip64: | |
1084 raise LargeZipFile( | |
1085 "Zipfile size would require ZIP64 extensions") | |
1086 | |
1087 def write(self, filename, arcname=None, compress_type=None): | |
1088 """Put the bytes from filename into the archive under the name | |
1089 arcname.""" | |
1090 if not self.fp: | |
1091 raise RuntimeError( | |
1092 "Attempt to write to ZIP archive that was already closed") | |
1093 | |
1094 st = os.stat(filename) | |
1095 isdir = stat.S_ISDIR(st.st_mode) | |
1096 mtime = time.localtime(st.st_mtime) | |
1097 date_time = mtime[0:6] | |
1098 # Create ZipInfo instance to store file information | |
1099 if arcname is None: | |
1100 arcname = filename | |
1101 arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) | |
1102 while arcname[0] in (os.sep, os.altsep): | |
1103 arcname = arcname[1:] | |
1104 if isdir: | |
1105 arcname += '/' | |
1106 zinfo = ZipInfo(arcname, date_time) | |
1107 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes | |
1108 if compress_type is None: | |
1109 zinfo.compress_type = self.compression | |
1110 else: | |
1111 zinfo.compress_type = compress_type | |
1112 | |
1113 zinfo.file_size = st.st_size | |
1114 zinfo.flag_bits = 0x00 | |
1115 zinfo.header_offset = self.fp.tell() # Start of header bytes | |
1116 | |
1117 self._writecheck(zinfo) | |
1118 self._didModify = True | |
1119 | |
1120 if isdir: | |
1121 zinfo.file_size = 0 | |
1122 zinfo.compress_size = 0 | |
1123 zinfo.CRC = 0 | |
1124 self.filelist.append(zinfo) | |
1125 self.NameToInfo[zinfo.filename] = zinfo | |
1126 self.fp.write(zinfo.FileHeader()) | |
1127 return | |
1128 | |
1129 with open(filename, "rb") as fp: | |
1130 # Must overwrite CRC and sizes with correct data later | |
1131 zinfo.CRC = CRC = 0 | |
1132 zinfo.compress_size = compress_size = 0 | |
1133 zinfo.file_size = file_size = 0 | |
1134 self.fp.write(zinfo.FileHeader()) | |
1135 if zinfo.compress_type == ZIP_DEFLATED: | |
1136 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, | |
1137 zlib.DEFLATED, -15) | |
1138 elif zinfo.compress_type == ZIP_BZIP2: | |
1139 cmpr = bz2.BZ2Compressor() | |
1140 else: | |
1141 cmpr = None | |
1142 while 1: | |
1143 buf = fp.read(1024 * 8) | |
1144 if not buf: | |
1145 break | |
1146 file_size = file_size + len(buf) | |
1147 CRC = crc32(buf, CRC) & 0xffffffff | |
1148 if cmpr: | |
1149 buf = cmpr.compress(buf) | |
1150 compress_size = compress_size + len(buf) | |
1151 self.fp.write(buf) | |
1152 if cmpr: | |
1153 buf = cmpr.flush() | |
1154 compress_size = compress_size + len(buf) | |
1155 self.fp.write(buf) | |
1156 zinfo.compress_size = compress_size | |
1157 else: | |
1158 zinfo.compress_size = file_size | |
1159 zinfo.CRC = CRC | |
1160 zinfo.file_size = file_size | |
1161 # Seek backwards and write CRC and file sizes | |
1162 position = self.fp.tell() # Preserve current position in file | |
1163 self.fp.seek(zinfo.header_offset + 14, 0) | |
1164 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size, | |
1165 zinfo.file_size)) | |
1166 self.fp.seek(position, 0) | |
1167 self.filelist.append(zinfo) | |
1168 self.NameToInfo[zinfo.filename] = zinfo | |
1169 | |
1170 def writestr(self, zinfo_or_arcname, data): | |
1171 """Write a file into the archive. The contents is 'data', which | |
1172 may be either a 'str' or a 'bytes' instance; if it is a 'str', | |
1173 it is encoded as UTF-8 first. | |
1174 'zinfo_or_arcname' is either a ZipInfo instance or | |
1175 the name of the file in the archive.""" | |
1176 if isinstance(data, str): | |
1177 data = data.encode("utf-8") | |
1178 if not isinstance(zinfo_or_arcname, ZipInfo): | |
1179 zinfo = ZipInfo(filename=zinfo_or_arcname, | |
1180 date_time=time.localtime(time.time())[:6]) | |
1181 zinfo.compress_type = self.compression | |
1182 zinfo.external_attr = 0o600 << 16 | |
1183 else: | |
1184 zinfo = zinfo_or_arcname | |
1185 | |
1186 if not self.fp: | |
1187 raise RuntimeError( | |
1188 "Attempt to write to ZIP archive that was already closed") | |
1189 | |
1190 zinfo.file_size = len(data) # Uncompressed size | |
1191 zinfo.header_offset = self.fp.tell() # Start of header data | |
1192 self._writecheck(zinfo) | |
1193 self._didModify = True | |
1194 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum | |
1195 if zinfo.compress_type == ZIP_DEFLATED: | |
1196 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, | |
1197 zlib.DEFLATED, -15) | |
1198 data = co.compress(data) + co.flush() | |
1199 zinfo.compress_size = len(data) # Compressed size | |
1200 elif zinfo.compress_type == ZIP_BZIP2: | |
1201 co = bz2.BZ2Compressor() | |
1202 data = co.compress(data) + co.flush() | |
1203 zinfo.compress_size = len(data) # Compressed size | |
1204 else: | |
1205 zinfo.compress_size = zinfo.file_size | |
1206 zinfo.header_offset = self.fp.tell() # Start of header data | |
1207 self.fp.write(zinfo.FileHeader()) | |
1208 self.fp.write(data) | |
1209 self.fp.flush() | |
1210 if zinfo.flag_bits & 0x08: | |
1211 # Write CRC and file sizes after the file data | |
1212 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size, | |
1213 zinfo.file_size)) | |
1214 self.filelist.append(zinfo) | |
1215 self.NameToInfo[zinfo.filename] = zinfo | |
1216 | |
1217 def __del__(self): | |
1218 """Call the "close()" method in case the user forgot.""" | |
1219 self.close() | |
1220 | |
1221 def close(self): | |
1222 """Close the file, and for mode "w" and "a" write the ending | |
1223 records.""" | |
1224 if self.fp is None: | |
1225 return | |
1226 | |
1227 if self.mode in ("w", "a") and self._didModify: # write ending records | |
1228 count = 0 | |
1229 pos1 = self.fp.tell() | |
1230 for zinfo in self.filelist: # write central directory | |
1231 count = count + 1 | |
1232 dt = zinfo.date_time | |
1233 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] | |
1234 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) | |
1235 extra = [] | |
1236 if zinfo.file_size > ZIP64_LIMIT \ | |
1237 or zinfo.compress_size > ZIP64_LIMIT: | |
1238 extra.append(zinfo.file_size) | |
1239 extra.append(zinfo.compress_size) | |
1240 file_size = 0xffffffff | |
1241 compress_size = 0xffffffff | |
1242 else: | |
1243 file_size = zinfo.file_size | |
1244 compress_size = zinfo.compress_size | |
1245 | |
1246 if zinfo.header_offset > ZIP64_LIMIT: | |
1247 extra.append(zinfo.header_offset) | |
1248 header_offset = 0xffffffff | |
1249 else: | |
1250 header_offset = zinfo.header_offset | |
1251 | |
1252 extra_data = zinfo.extra | |
1253 if extra: | |
1254 # Append a ZIP64 field to the extra's | |
1255 extra_data = struct.pack( | |
1256 '<HH' + 'Q'*len(extra), | |
1257 1, 8*len(extra), *extra) + extra_data | |
1258 | |
1259 extract_version = max(45, zinfo.extract_version) | |
1260 create_version = max(45, zinfo.create_version) | |
1261 else: | |
1262 extract_version = zinfo.extract_version | |
1263 create_version = zinfo.create_version | |
1264 | |
1265 try: | |
1266 filename, flag_bits = zinfo._encodeFilenameFlags() | |
1267 centdir = struct.pack(structCentralDir, | |
1268 stringCentralDir, create_version, | |
1269 zinfo.create_system, extract_version, zinfo.reserved, | |
1270 flag_bits, zinfo.compress_type, dostime, dosdate, | |
1271 zinfo.CRC, compress_size, file_size, | |
1272 len(filename), len(extra_data), len(zinfo.comment), | |
1273 0, zinfo.internal_attr, zinfo.external_attr, | |
1274 header_offset) | |
1275 except DeprecationWarning: | |
1276 print((structCentralDir, stringCentralDir, create_version, | |
1277 zinfo.create_system, extract_version, zinfo.reserved, | |
1278 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate, | |
1279 zinfo.CRC, compress_size, file_size, | |
1280 len(zinfo.filename), len(extra_data), len(zinfo.comment), | |
1281 0, zinfo.internal_attr, zinfo.external_attr, | |
1282 header_offset), file=sys.stderr) | |
1283 raise | |
1284 self.fp.write(centdir) | |
1285 self.fp.write(filename) | |
1286 self.fp.write(extra_data) | |
1287 self.fp.write(zinfo.comment) | |
1288 | |
1289 pos2 = self.fp.tell() | |
1290 # Write end-of-zip-archive record | |
1291 centDirCount = count | |
1292 centDirSize = pos2 - pos1 | |
1293 centDirOffset = pos1 | |
1294 if (centDirCount >= ZIP_FILECOUNT_LIMIT or | |
1295 centDirOffset > ZIP64_LIMIT or | |
1296 centDirSize > ZIP64_LIMIT): | |
1297 # Need to write the ZIP64 end-of-archive records | |
1298 zip64endrec = struct.pack( | |
1299 structEndArchive64, stringEndArchive64, | |
1300 44, 45, 45, 0, 0, centDirCount, centDirCount, | |
1301 centDirSize, centDirOffset) | |
1302 self.fp.write(zip64endrec) | |
1303 | |
1304 zip64locrec = struct.pack( | |
1305 structEndArchive64Locator, | |
1306 stringEndArchive64Locator, 0, pos2, 1) | |
1307 self.fp.write(zip64locrec) | |
1308 centDirCount = min(centDirCount, 0xFFFF) | |
1309 centDirSize = min(centDirSize, 0xFFFFFFFF) | |
1310 centDirOffset = min(centDirOffset, 0xFFFFFFFF) | |
1311 | |
1312 # check for valid comment length | |
1313 if len(self.comment) >= ZIP_MAX_COMMENT: | |
1314 if self.debug > 0: | |
1315 msg = 'Archive comment is too long; truncating to %d bytes' \ | |
1316 % ZIP_MAX_COMMENT | |
1317 self.comment = self.comment[:ZIP_MAX_COMMENT] | |
1318 | |
1319 endrec = struct.pack(structEndArchive, stringEndArchive, | |
1320 0, 0, centDirCount, centDirCount, | |
1321 centDirSize, centDirOffset, len(self.comment)) | |
1322 self.fp.write(endrec) | |
1323 self.fp.write(self.comment) | |
1324 self.fp.flush() | |
1325 | |
1326 if not self._filePassed: | |
1327 self.fp.close() | |
1328 self.fp = None | |
1329 | |
1330 | |
1331 class PyZipFile(ZipFile): | |
1332 """Class to create ZIP archives with Python library files and packages.""" | |
1333 | |
1334 def writepy(self, pathname, basename=""): | |
1335 """Add all files from "pathname" to the ZIP archive. | |
1336 | |
1337 If pathname is a package directory, search the directory and | |
1338 all package subdirectories recursively for all *.py and enter | |
1339 the modules into the archive. If pathname is a plain | |
1340 directory, listdir *.py and enter all modules. Else, pathname | |
1341 must be a Python *.py file and the module will be put into the | |
1342 archive. Added modules are always module.pyo or module.pyc. | |
1343 This method will compile the module.py into module.pyc if | |
1344 necessary. | |
1345 """ | |
1346 dir, name = os.path.split(pathname) | |
1347 if os.path.isdir(pathname): | |
1348 initname = os.path.join(pathname, "__init__.py") | |
1349 if os.path.isfile(initname): | |
1350 # This is a package directory, add it | |
1351 if basename: | |
1352 basename = "%s/%s" % (basename, name) | |
1353 else: | |
1354 basename = name | |
1355 if self.debug: | |
1356 print("Adding package in", pathname, "as", basename) | |
1357 fname, arcname = self._get_codename(initname[0:-3], basename) | |
1358 if self.debug: | |
1359 print("Adding", arcname) | |
1360 self.write(fname, arcname) | |
1361 dirlist = os.listdir(pathname) | |
1362 dirlist.remove("__init__.py") | |
1363 # Add all *.py files and package subdirectories | |
1364 for filename in dirlist: | |
1365 path = os.path.join(pathname, filename) | |
1366 root, ext = os.path.splitext(filename) | |
1367 if os.path.isdir(path): | |
1368 if os.path.isfile(os.path.join(path, "__init__.py")): | |
1369 # This is a package directory, add it | |
1370 self.writepy(path, basename) # Recursive call | |
1371 elif ext == ".py": | |
1372 fname, arcname = self._get_codename(path[0:-3], | |
1373 basename) | |
1374 if self.debug: | |
1375 print("Adding", arcname) | |
1376 self.write(fname, arcname) | |
1377 else: | |
1378 # This is NOT a package directory, add its files at top level | |
1379 if self.debug: | |
1380 print("Adding files from directory", pathname) | |
1381 for filename in os.listdir(pathname): | |
1382 path = os.path.join(pathname, filename) | |
1383 root, ext = os.path.splitext(filename) | |
1384 if ext == ".py": | |
1385 fname, arcname = self._get_codename(path[0:-3], | |
1386 basename) | |
1387 if self.debug: | |
1388 print("Adding", arcname) | |
1389 self.write(fname, arcname) | |
1390 else: | |
1391 if pathname[-3:] != ".py": | |
1392 raise RuntimeError( | |
1393 'Files added with writepy() must end with ".py"') | |
1394 fname, arcname = self._get_codename(pathname[0:-3], basename) | |
1395 if self.debug: | |
1396 print("Adding file", arcname) | |
1397 self.write(fname, arcname) | |
1398 | |
1399 def _get_codename(self, pathname, basename): | |
1400 """Return (filename, archivename) for the path. | |
1401 | |
1402 Given a module name path, return the correct file path and | |
1403 archive name, compiling if necessary. For example, given | |
1404 /python/lib/string, return (/python/lib/string.pyc, string). | |
1405 """ | |
1406 file_py = pathname + ".py" | |
1407 file_pyc = pathname + ".pyc" | |
1408 file_pyo = pathname + ".pyo" | |
1409 if os.path.isfile(file_pyo) and \ | |
1410 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime: | |
1411 fname = file_pyo # Use .pyo file | |
1412 elif not os.path.isfile(file_pyc) or \ | |
1413 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime: | |
1414 import py_compile | |
1415 if self.debug: | |
1416 print("Compiling", file_py) | |
1417 try: | |
1418 py_compile.compile(file_py, file_pyc, None, True) | |
1419 except py_compile.PyCompileError as err: | |
1420 print(err.msg) | |
1421 fname = file_pyc | |
1422 else: | |
1423 fname = file_pyc | |
1424 archivename = os.path.split(fname)[1] | |
1425 if basename: | |
1426 archivename = "%s/%s" % (basename, archivename) | |
1427 return (fname, archivename) | |
1428 | |
1429 | |
1430 def main(args = None): | |
1431 import textwrap | |
1432 USAGE=textwrap.dedent("""\ | |
1433 Usage: | |
1434 zipfile.py -l zipfile.zip # Show listing of a zipfile | |
1435 zipfile.py -t zipfile.zip # Test if a zipfile is valid | |
1436 zipfile.py -e zipfile.zip target # Extract zipfile into target dir | |
1437 zipfile.py -c zipfile.zip src ... # Create zipfile from sources | |
1438 """) | |
1439 if args is None: | |
1440 args = sys.argv[1:] | |
1441 | |
1442 if not args or args[0] not in ('-l', '-c', '-e', '-t'): | |
1443 print(USAGE) | |
1444 sys.exit(1) | |
1445 | |
1446 if args[0] == '-l': | |
1447 if len(args) != 2: | |
1448 print(USAGE) | |
1449 sys.exit(1) | |
1450 zf = ZipFile(args[1], 'r') | |
1451 zf.printdir() | |
1452 zf.close() | |
1453 | |
1454 elif args[0] == '-t': | |
1455 if len(args) != 2: | |
1456 print(USAGE) | |
1457 sys.exit(1) | |
1458 zf = ZipFile(args[1], 'r') | |
29
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
1459 badfile = zf.testzip() |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
1460 if badfile: |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
1461 print("The following enclosed file is corrupted: {!r}".format(badfile)) |
21 | 1462 print("Done testing") |
1463 | |
1464 elif args[0] == '-e': | |
1465 if len(args) != 3: | |
1466 print(USAGE) | |
1467 sys.exit(1) | |
1468 | |
1469 zf = ZipFile(args[1], 'r') | |
1470 out = args[2] | |
1471 for path in zf.namelist(): | |
1472 if path.startswith('./'): | |
1473 tgt = os.path.join(out, path[2:]) | |
1474 else: | |
1475 tgt = os.path.join(out, path) | |
1476 | |
1477 tgtdir = os.path.dirname(tgt) | |
1478 if not os.path.exists(tgtdir): | |
1479 os.makedirs(tgtdir) | |
1480 with open(tgt, 'wb') as fp: | |
1481 fp.write(zf.read(path)) | |
1482 zf.close() | |
1483 | |
1484 elif args[0] == '-c': | |
1485 if len(args) < 3: | |
1486 print(USAGE) | |
1487 sys.exit(1) | |
1488 | |
1489 def addToZip(zf, path, zippath): | |
1490 if os.path.isfile(path): | |
1491 zf.write(path, zippath, ZIP_DEFLATED) | |
1492 elif os.path.isdir(path): | |
1493 for nm in os.listdir(path): | |
1494 addToZip(zf, | |
1495 os.path.join(path, nm), os.path.join(zippath, nm)) | |
1496 # else: ignore | |
1497 | |
1498 zf = ZipFile(args[1], 'w', allowZip64=True) | |
1499 for src in args[2:]: | |
1500 addToZip(zf, src, os.path.basename(src)) | |
1501 | |
1502 zf.close() | |
1503 | |
1504 if __name__ == "__main__": | |
1505 main() |