Mercurial > ~astiob > upreckon > hgweb
annotate zipfile31.py @ 76:0e5ae28e0b2b
Points are now weighted on a test context basis
In particular, this has allowed for simple extensions to the format
of testconf to award points to whole test groups without at the same time
compromising the future ability of giving partial score for correct
but slow solutions. Specifically, the groupweight configuration variable
has been added and normally has the format {groupindex: points} where
groupindex is the group's index in the tests configuration variable.
The backwards incompatible change is that test contexts are no longer
guaranteed to learn the score awarded or the maximum possible score
for every test case and may instead be notified about them in batches.
In other news, the pointmap and groupweight configuration variables can
(now) be given as sequences in addition to mappings. (Technically,
the distinction currently made is dict versus everything else.) Items
of a sequence pointmap/groupweight correspond directly to the test cases/
groups defined in the tests configuration variable; in particular,
when groups are used, tests=[1],[2,3];pointmap={1:1,2:2,3:3} can now be
written as pointmap=tests=[1],[2,3]. Missing items are handled in the same
way in which they are handled when the variable is a mapping. Note
that the items of groupweight correspond to whole test groups rather
than individual test cases.
In other news again, the wording of problem total lines has been changed
from '<unweighted> points; weighted score: <weighted>' to '<weighted>
points (<unweighted> before weighting)', and group total lines now
properly report fractional numbers of points (this is a bug fix).
author | Oleg Oshmyan <chortos@inbox.lv> |
---|---|
date | Sat, 08 Jan 2011 16:03:35 +0200 |
parents | 4ea7133ac25c |
children |
rev | line source |
---|---|
21 | 1 """ |
2 Read and write ZIP files. | |
3 | |
4 XXX references to utf-8 need further investigation. | |
5 """ | |
6 # Improved by Chortos-2 in 2010 (added bzip2 support) | |
7 import struct, os, time, sys, shutil | |
8 import binascii, io, stat | |
9 | |
10 try: | |
11 import zlib # We may need its compression method | |
12 crc32 = zlib.crc32 | |
13 except ImportError: | |
14 zlib = None | |
15 crc32 = binascii.crc32 | |
16 | |
17 try: | |
18 import bz2 # We may need its compression method | |
19 except ImportError: | |
20 bz2 = None | |
21 | |
22 __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile", | |
23 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile", "ZIP_BZIP2" ] | |
24 | |
25 class BadZipfile(Exception): | |
26 pass | |
27 | |
28 | |
29 class LargeZipFile(Exception): | |
30 """ | |
31 Raised when writing a zipfile, the zipfile requires ZIP64 extensions | |
32 and those extensions are disabled. | |
33 """ | |
34 | |
35 error = BadZipfile # The exception raised by this module | |
36 | |
37 ZIP64_LIMIT = (1 << 31) - 1 | |
38 ZIP_FILECOUNT_LIMIT = 1 << 16 | |
39 ZIP_MAX_COMMENT = (1 << 16) - 1 | |
40 | |
41 # constants for Zip file compression methods | |
42 ZIP_STORED = 0 | |
43 ZIP_DEFLATED = 8 | |
44 ZIP_BZIP2 = 12 | |
45 # Other ZIP compression methods not supported | |
46 | |
47 # Below are some formats and associated data for reading/writing headers using | |
48 # the struct module. The names and structures of headers/records are those used | |
49 # in the PKWARE description of the ZIP file format: | |
50 # http://www.pkware.com/documents/casestudies/APPNOTE.TXT | |
51 # (URL valid as of January 2008) | |
52 | |
53 # The "end of central directory" structure, magic number, size, and indices | |
54 # (section V.I in the format document) | |
55 structEndArchive = b"<4s4H2LH" | |
56 stringEndArchive = b"PK\005\006" | |
57 sizeEndCentDir = struct.calcsize(structEndArchive) | |
58 | |
59 _ECD_SIGNATURE = 0 | |
60 _ECD_DISK_NUMBER = 1 | |
61 _ECD_DISK_START = 2 | |
62 _ECD_ENTRIES_THIS_DISK = 3 | |
63 _ECD_ENTRIES_TOTAL = 4 | |
64 _ECD_SIZE = 5 | |
65 _ECD_OFFSET = 6 | |
66 _ECD_COMMENT_SIZE = 7 | |
67 # These last two indices are not part of the structure as defined in the | |
68 # spec, but they are used internally by this module as a convenience | |
69 _ECD_COMMENT = 8 | |
70 _ECD_LOCATION = 9 | |
71 | |
72 # The "central directory" structure, magic number, size, and indices | |
73 # of entries in the structure (section V.F in the format document) | |
74 structCentralDir = "<4s4B4HL2L5H2L" | |
75 stringCentralDir = b"PK\001\002" | |
76 sizeCentralDir = struct.calcsize(structCentralDir) | |
77 | |
78 # indexes of entries in the central directory structure | |
79 _CD_SIGNATURE = 0 | |
80 _CD_CREATE_VERSION = 1 | |
81 _CD_CREATE_SYSTEM = 2 | |
82 _CD_EXTRACT_VERSION = 3 | |
83 _CD_EXTRACT_SYSTEM = 4 | |
84 _CD_FLAG_BITS = 5 | |
85 _CD_COMPRESS_TYPE = 6 | |
86 _CD_TIME = 7 | |
87 _CD_DATE = 8 | |
88 _CD_CRC = 9 | |
89 _CD_COMPRESSED_SIZE = 10 | |
90 _CD_UNCOMPRESSED_SIZE = 11 | |
91 _CD_FILENAME_LENGTH = 12 | |
92 _CD_EXTRA_FIELD_LENGTH = 13 | |
93 _CD_COMMENT_LENGTH = 14 | |
94 _CD_DISK_NUMBER_START = 15 | |
95 _CD_INTERNAL_FILE_ATTRIBUTES = 16 | |
96 _CD_EXTERNAL_FILE_ATTRIBUTES = 17 | |
97 _CD_LOCAL_HEADER_OFFSET = 18 | |
98 | |
99 # The "local file header" structure, magic number, size, and indices | |
100 # (section V.A in the format document) | |
101 structFileHeader = "<4s2B4HL2L2H" | |
102 stringFileHeader = b"PK\003\004" | |
103 sizeFileHeader = struct.calcsize(structFileHeader) | |
104 | |
105 _FH_SIGNATURE = 0 | |
106 _FH_EXTRACT_VERSION = 1 | |
107 _FH_EXTRACT_SYSTEM = 2 | |
108 _FH_GENERAL_PURPOSE_FLAG_BITS = 3 | |
109 _FH_COMPRESSION_METHOD = 4 | |
110 _FH_LAST_MOD_TIME = 5 | |
111 _FH_LAST_MOD_DATE = 6 | |
112 _FH_CRC = 7 | |
113 _FH_COMPRESSED_SIZE = 8 | |
114 _FH_UNCOMPRESSED_SIZE = 9 | |
115 _FH_FILENAME_LENGTH = 10 | |
116 _FH_EXTRA_FIELD_LENGTH = 11 | |
117 | |
118 # The "Zip64 end of central directory locator" structure, magic number, and size | |
119 structEndArchive64Locator = "<4sLQL" | |
120 stringEndArchive64Locator = b"PK\x06\x07" | |
121 sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator) | |
122 | |
123 # The "Zip64 end of central directory" record, magic number, size, and indices | |
124 # (section V.G in the format document) | |
125 structEndArchive64 = "<4sQ2H2L4Q" | |
126 stringEndArchive64 = b"PK\x06\x06" | |
127 sizeEndCentDir64 = struct.calcsize(structEndArchive64) | |
128 | |
129 _CD64_SIGNATURE = 0 | |
130 _CD64_DIRECTORY_RECSIZE = 1 | |
131 _CD64_CREATE_VERSION = 2 | |
132 _CD64_EXTRACT_VERSION = 3 | |
133 _CD64_DISK_NUMBER = 4 | |
134 _CD64_DISK_NUMBER_START = 5 | |
135 _CD64_NUMBER_ENTRIES_THIS_DISK = 6 | |
136 _CD64_NUMBER_ENTRIES_TOTAL = 7 | |
137 _CD64_DIRECTORY_SIZE = 8 | |
138 _CD64_OFFSET_START_CENTDIR = 9 | |
139 | |
140 def _check_zipfile(fp): | |
141 try: | |
142 if _EndRecData(fp): | |
143 return True # file has correct magic number | |
144 except IOError: | |
145 pass | |
146 return False | |
147 | |
148 def is_zipfile(filename): | |
149 """Quickly see if a file is a ZIP file by checking the magic number. | |
150 | |
151 The filename argument may be a file or file-like object too. | |
152 """ | |
153 result = False | |
154 try: | |
155 if hasattr(filename, "read"): | |
156 result = _check_zipfile(fp=filename) | |
157 else: | |
158 with open(filename, "rb") as fp: | |
159 result = _check_zipfile(fp) | |
160 except IOError: | |
161 pass | |
162 return result | |
163 | |
164 def _EndRecData64(fpin, offset, endrec): | |
165 """ | |
166 Read the ZIP64 end-of-archive records and use that to update endrec | |
167 """ | |
32
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
168 try: |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
169 fpin.seek(offset - sizeEndCentDir64Locator, 2) |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
170 except IOError: |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
171 # If the seek fails, the file is not large enough to contain a ZIP64 |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
172 # end-of-archive record, so just return the end record we were given. |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
173 return endrec |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
174 |
21 | 175 data = fpin.read(sizeEndCentDir64Locator) |
176 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data) | |
177 if sig != stringEndArchive64Locator: | |
178 return endrec | |
179 | |
180 if diskno != 0 or disks != 1: | |
181 raise BadZipfile("zipfiles that span multiple disks are not supported") | |
182 | |
183 # Assume no 'zip64 extensible data' | |
184 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2) | |
185 data = fpin.read(sizeEndCentDir64) | |
186 sig, sz, create_version, read_version, disk_num, disk_dir, \ | |
187 dircount, dircount2, dirsize, diroffset = \ | |
188 struct.unpack(structEndArchive64, data) | |
189 if sig != stringEndArchive64: | |
190 return endrec | |
191 | |
192 # Update the original endrec using data from the ZIP64 record | |
193 endrec[_ECD_SIGNATURE] = sig | |
194 endrec[_ECD_DISK_NUMBER] = disk_num | |
195 endrec[_ECD_DISK_START] = disk_dir | |
196 endrec[_ECD_ENTRIES_THIS_DISK] = dircount | |
197 endrec[_ECD_ENTRIES_TOTAL] = dircount2 | |
198 endrec[_ECD_SIZE] = dirsize | |
199 endrec[_ECD_OFFSET] = diroffset | |
200 return endrec | |
201 | |
202 | |
203 def _EndRecData(fpin): | |
204 """Return data from the "End of Central Directory" record, or None. | |
205 | |
206 The data is a list of the nine items in the ZIP "End of central dir" | |
207 record followed by a tenth item, the file seek offset of this record.""" | |
208 | |
209 # Determine file size | |
210 fpin.seek(0, 2) | |
211 filesize = fpin.tell() | |
212 | |
213 # Check to see if this is ZIP file with no archive comment (the | |
214 # "end of central directory" structure should be the last item in the | |
215 # file if this is the case). | |
216 try: | |
217 fpin.seek(-sizeEndCentDir, 2) | |
218 except IOError: | |
219 return None | |
220 data = fpin.read() | |
221 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000": | |
222 # the signature is correct and there's no comment, unpack structure | |
223 endrec = struct.unpack(structEndArchive, data) | |
224 endrec=list(endrec) | |
225 | |
226 # Append a blank comment and record start offset | |
227 endrec.append(b"") | |
228 endrec.append(filesize - sizeEndCentDir) | |
229 | |
230 # Try to read the "Zip64 end of central directory" structure | |
231 return _EndRecData64(fpin, -sizeEndCentDir, endrec) | |
232 | |
233 # Either this is not a ZIP file, or it is a ZIP file with an archive | |
234 # comment. Search the end of the file for the "end of central directory" | |
235 # record signature. The comment is the last item in the ZIP file and may be | |
236 # up to 64K long. It is assumed that the "end of central directory" magic | |
237 # number does not appear in the comment. | |
238 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0) | |
239 fpin.seek(maxCommentStart, 0) | |
240 data = fpin.read() | |
241 start = data.rfind(stringEndArchive) | |
242 if start >= 0: | |
243 # found the magic number; attempt to unpack and interpret | |
244 recData = data[start:start+sizeEndCentDir] | |
245 endrec = list(struct.unpack(structEndArchive, recData)) | |
246 comment = data[start+sizeEndCentDir:] | |
247 # check that comment length is correct | |
248 if endrec[_ECD_COMMENT_SIZE] == len(comment): | |
249 # Append the archive comment and start offset | |
250 endrec.append(comment) | |
251 endrec.append(maxCommentStart + start) | |
252 | |
253 # Try to read the "Zip64 end of central directory" structure | |
254 return _EndRecData64(fpin, maxCommentStart + start - filesize, | |
255 endrec) | |
256 | |
257 # Unable to find a valid end of central directory structure | |
258 return | |
259 | |
260 | |
261 class ZipInfo (object): | |
262 """Class with attributes describing each file in the ZIP archive.""" | |
263 | |
264 __slots__ = ( | |
265 'orig_filename', | |
266 'filename', | |
267 'date_time', | |
268 'compress_type', | |
269 'comment', | |
270 'extra', | |
271 'create_system', | |
272 'create_version', | |
273 'extract_version', | |
274 'reserved', | |
275 'flag_bits', | |
276 'volume', | |
277 'internal_attr', | |
278 'external_attr', | |
279 'header_offset', | |
280 'CRC', | |
281 'compress_size', | |
282 'file_size', | |
283 '_raw_time', | |
284 ) | |
285 | |
286 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): | |
287 self.orig_filename = filename # Original file name in archive | |
288 | |
289 # Terminate the file name at the first null byte. Null bytes in file | |
290 # names are used as tricks by viruses in archives. | |
291 null_byte = filename.find(chr(0)) | |
292 if null_byte >= 0: | |
293 filename = filename[0:null_byte] | |
294 # This is used to ensure paths in generated ZIP files always use | |
295 # forward slashes as the directory separator, as required by the | |
296 # ZIP format specification. | |
297 if os.sep != "/" and os.sep in filename: | |
298 filename = filename.replace(os.sep, "/") | |
299 | |
300 self.filename = filename # Normalized file name | |
301 self.date_time = date_time # year, month, day, hour, min, sec | |
302 # Standard values: | |
303 self.compress_type = ZIP_STORED # Type of compression for the file | |
304 self.comment = b"" # Comment for each file | |
305 self.extra = b"" # ZIP extra data | |
306 if sys.platform == 'win32': | |
307 self.create_system = 0 # System which created ZIP archive | |
308 else: | |
309 # Assume everything else is unix-y | |
310 self.create_system = 3 # System which created ZIP archive | |
311 self.create_version = 20 # Version which created ZIP archive | |
312 self.extract_version = 20 # Version needed to extract archive | |
313 self.reserved = 0 # Must be zero | |
314 self.flag_bits = 0 # ZIP flag bits | |
315 self.volume = 0 # Volume number of file header | |
316 self.internal_attr = 0 # Internal attributes | |
317 self.external_attr = 0 # External file attributes | |
318 # Other attributes are set by class ZipFile: | |
319 # header_offset Byte offset to the file header | |
320 # CRC CRC-32 of the uncompressed file | |
321 # compress_size Size of the compressed file | |
322 # file_size Size of the uncompressed file | |
323 | |
324 def FileHeader(self): | |
325 """Return the per-file header as a string.""" | |
326 dt = self.date_time | |
327 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] | |
328 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) | |
329 if self.flag_bits & 0x08: | |
330 # Set these to zero because we write them after the file data | |
331 CRC = compress_size = file_size = 0 | |
332 else: | |
333 CRC = self.CRC | |
334 compress_size = self.compress_size | |
335 file_size = self.file_size | |
336 | |
337 extra = self.extra | |
338 | |
339 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT: | |
340 # File is larger than what fits into a 4 byte integer, | |
341 # fall back to the ZIP64 extension | |
342 fmt = '<HHQQ' | |
343 extra = extra + struct.pack(fmt, | |
344 1, struct.calcsize(fmt)-4, file_size, compress_size) | |
345 file_size = 0xffffffff | |
346 compress_size = 0xffffffff | |
347 self.extract_version = max(45, self.extract_version) | |
348 self.create_version = max(45, self.extract_version) | |
349 | |
350 filename, flag_bits = self._encodeFilenameFlags() | |
351 header = struct.pack(structFileHeader, stringFileHeader, | |
352 self.extract_version, self.reserved, flag_bits, | |
353 self.compress_type, dostime, dosdate, CRC, | |
354 compress_size, file_size, | |
355 len(filename), len(extra)) | |
356 return header + filename + extra | |
357 | |
358 def _encodeFilenameFlags(self): | |
359 try: | |
360 return self.filename.encode('ascii'), self.flag_bits | |
361 except UnicodeEncodeError: | |
362 return self.filename.encode('utf-8'), self.flag_bits | 0x800 | |
363 | |
364 def _decodeExtra(self): | |
365 # Try to decode the extra field. | |
366 extra = self.extra | |
367 unpack = struct.unpack | |
368 while extra: | |
369 tp, ln = unpack('<HH', extra[:4]) | |
370 if tp == 1: | |
371 if ln >= 24: | |
372 counts = unpack('<QQQ', extra[4:28]) | |
373 elif ln == 16: | |
374 counts = unpack('<QQ', extra[4:20]) | |
375 elif ln == 8: | |
376 counts = unpack('<Q', extra[4:12]) | |
377 elif ln == 0: | |
378 counts = () | |
379 else: | |
380 raise RuntimeError("Corrupt extra field %s"%(ln,)) | |
381 | |
382 idx = 0 | |
383 | |
384 # ZIP64 extension (large files and/or large archives) | |
385 if self.file_size in (0xffffffffffffffff, 0xffffffff): | |
386 self.file_size = counts[idx] | |
387 idx += 1 | |
388 | |
389 if self.compress_size == 0xFFFFFFFF: | |
390 self.compress_size = counts[idx] | |
391 idx += 1 | |
392 | |
393 if self.header_offset == 0xffffffff: | |
394 old = self.header_offset | |
395 self.header_offset = counts[idx] | |
396 idx+=1 | |
397 | |
398 extra = extra[ln+4:] | |
399 | |
400 | |
401 class _ZipDecrypter: | |
402 """Class to handle decryption of files stored within a ZIP archive. | |
403 | |
404 ZIP supports a password-based form of encryption. Even though known | |
405 plaintext attacks have been found against it, it is still useful | |
406 to be able to get data out of such a file. | |
407 | |
408 Usage: | |
409 zd = _ZipDecrypter(mypwd) | |
410 plain_char = zd(cypher_char) | |
411 plain_text = map(zd, cypher_text) | |
412 """ | |
413 | |
414 def _GenerateCRCTable(): | |
415 """Generate a CRC-32 table. | |
416 | |
417 ZIP encryption uses the CRC32 one-byte primitive for scrambling some | |
418 internal keys. We noticed that a direct implementation is faster than | |
419 relying on binascii.crc32(). | |
420 """ | |
421 poly = 0xedb88320 | |
422 table = [0] * 256 | |
423 for i in range(256): | |
424 crc = i | |
425 for j in range(8): | |
426 if crc & 1: | |
427 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly | |
428 else: | |
429 crc = ((crc >> 1) & 0x7FFFFFFF) | |
430 table[i] = crc | |
431 return table | |
432 crctable = _GenerateCRCTable() | |
433 | |
434 def _crc32(self, ch, crc): | |
435 """Compute the CRC32 primitive on one byte.""" | |
436 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff] | |
437 | |
438 def __init__(self, pwd): | |
439 self.key0 = 305419896 | |
440 self.key1 = 591751049 | |
441 self.key2 = 878082192 | |
442 for p in pwd: | |
443 self._UpdateKeys(p) | |
444 | |
445 def _UpdateKeys(self, c): | |
446 self.key0 = self._crc32(c, self.key0) | |
447 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295 | |
448 self.key1 = (self.key1 * 134775813 + 1) & 4294967295 | |
449 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2) | |
450 | |
451 def __call__(self, c): | |
452 """Decrypt a single character.""" | |
453 assert isinstance(c, int) | |
454 k = self.key2 | 2 | |
455 c = c ^ (((k * (k^1)) >> 8) & 255) | |
456 self._UpdateKeys(c) | |
457 return c | |
458 | |
459 class ZipExtFile: | |
460 """File-like object for reading an archive member. | |
461 Is returned by ZipFile.open(). | |
462 """ | |
463 | |
464 def __init__(self, fileobj, zipinfo, decrypt=None): | |
465 self.fileobj = fileobj | |
466 self.decrypter = decrypt | |
467 self.bytes_read = 0 | |
468 self.rawbuffer = b'' | |
469 self.readbuffer = b'' | |
470 self.linebuffer = b'' | |
471 self.eof = False | |
472 self.univ_newlines = False | |
473 self.nlSeps = (b"\n", ) | |
474 self.lastdiscard = b'' | |
475 | |
476 self.compress_type = zipinfo.compress_type | |
477 self.compress_size = zipinfo.compress_size | |
478 | |
479 self.closed = False | |
480 self.mode = "r" | |
481 self.name = zipinfo.filename | |
482 | |
483 # read from compressed files in 64k blocks | |
484 self.compreadsize = 64*1024 | |
485 if self.compress_type == ZIP_DEFLATED: | |
486 self.dc = zlib.decompressobj(-15) | |
487 elif self.compress_type == ZIP_BZIP2: | |
488 self.dc = bz2.BZ2Decompressor() | |
489 self.compreadsize = 900000 | |
490 | |
29
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
491 if hasattr(zipinfo, 'CRC'): |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
492 self._expected_crc = zipinfo.CRC |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
493 self._running_crc = crc32(b'') & 0xffffffff |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
494 else: |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
495 self._expected_crc = None |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
496 |
21 | 497 def set_univ_newlines(self, univ_newlines): |
498 self.univ_newlines = univ_newlines | |
499 | |
500 # pick line separator char(s) based on universal newlines flag | |
501 self.nlSeps = (b"\n", ) | |
502 if self.univ_newlines: | |
503 self.nlSeps = (b"\r\n", b"\r", b"\n") | |
504 | |
505 def __iter__(self): | |
506 return self | |
507 | |
508 def __next__(self): | |
509 nextline = self.readline() | |
510 if not nextline: | |
511 raise StopIteration() | |
512 | |
513 return nextline | |
514 | |
515 def close(self): | |
516 self.closed = True | |
517 | |
518 def _checkfornewline(self): | |
519 nl, nllen = -1, -1 | |
520 if self.linebuffer: | |
521 # ugly check for cases where half of an \r\n pair was | |
522 # read on the last pass, and the \r was discarded. In this | |
523 # case we just throw away the \n at the start of the buffer. | |
524 if (self.lastdiscard, self.linebuffer[:1]) == (b'\r', b'\n'): | |
525 self.linebuffer = self.linebuffer[1:] | |
526 | |
527 for sep in self.nlSeps: | |
528 nl = self.linebuffer.find(sep) | |
529 if nl >= 0: | |
530 nllen = len(sep) | |
531 return nl, nllen | |
532 | |
533 return nl, nllen | |
534 | |
535 def readline(self, size = -1): | |
536 """Read a line with approx. size. If size is negative, | |
537 read a whole line. | |
538 """ | |
539 if size < 0: | |
540 size = sys.maxsize | |
541 elif size == 0: | |
542 return b'' | |
543 | |
544 # check for a newline already in buffer | |
545 nl, nllen = self._checkfornewline() | |
546 | |
547 if nl >= 0: | |
548 # the next line was already in the buffer | |
549 nl = min(nl, size) | |
550 else: | |
551 # no line break in buffer - try to read more | |
552 size -= len(self.linebuffer) | |
553 while nl < 0 and size > 0: | |
554 buf = self.read(min(size, 100)) | |
555 if not buf: | |
556 break | |
557 self.linebuffer += buf | |
558 size -= len(buf) | |
559 | |
560 # check for a newline in buffer | |
561 nl, nllen = self._checkfornewline() | |
562 | |
563 # we either ran out of bytes in the file, or | |
564 # met the specified size limit without finding a newline, | |
565 # so return current buffer | |
566 if nl < 0: | |
567 s = self.linebuffer | |
568 self.linebuffer = b'' | |
569 return s | |
570 | |
571 buf = self.linebuffer[:nl] | |
572 self.lastdiscard = self.linebuffer[nl:nl + nllen] | |
573 self.linebuffer = self.linebuffer[nl + nllen:] | |
574 | |
575 # line is always returned with \n as newline char (except possibly | |
576 # for a final incomplete line in the file, which is handled above). | |
577 return buf + b"\n" | |
578 | |
579 def readlines(self, sizehint = -1): | |
580 """Return a list with all (following) lines. The sizehint parameter | |
581 is ignored in this implementation. | |
582 """ | |
583 result = [] | |
584 while True: | |
585 line = self.readline() | |
586 if not line: break | |
587 result.append(line) | |
588 return result | |
589 | |
29
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
590 def _update_crc(self, newdata, eof): |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
591 # Update the CRC using the given data. |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
592 if self._expected_crc is None: |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
593 # No need to compute the CRC if we don't have a reference value |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
594 return |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
595 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
596 # Check the CRC if we're at the end of the file |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
597 if eof and self._running_crc != self._expected_crc: |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
598 raise BadZipfile("Bad CRC-32 for file %r" % self.name) |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
599 |
21 | 600 def read(self, size = None): |
601 # act like file obj and return empty string if size is 0 | |
602 if size == 0: | |
603 return b'' | |
604 | |
605 # determine read size | |
606 bytesToRead = self.compress_size - self.bytes_read | |
607 | |
608 # adjust read size for encrypted files since the first 12 bytes | |
609 # are for the encryption/password information | |
610 if self.decrypter is not None: | |
611 bytesToRead -= 12 | |
612 | |
613 if size is not None and size >= 0: | |
614 if self.compress_type == ZIP_STORED: | |
615 lr = len(self.readbuffer) | |
616 bytesToRead = min(bytesToRead, size - lr) | |
617 else: | |
618 if len(self.readbuffer) > size: | |
619 # the user has requested fewer bytes than we've already | |
620 # pulled through the decompressor; don't read any more | |
621 bytesToRead = 0 | |
622 else: | |
623 # user will use up the buffer, so read some more | |
624 lr = len(self.rawbuffer) | |
625 bytesToRead = min(bytesToRead, self.compreadsize - lr) | |
626 | |
627 # avoid reading past end of file contents | |
628 if bytesToRead + self.bytes_read > self.compress_size: | |
629 bytesToRead = self.compress_size - self.bytes_read | |
630 | |
631 # try to read from file (if necessary) | |
632 if bytesToRead > 0: | |
633 data = self.fileobj.read(bytesToRead) | |
634 self.bytes_read += len(data) | |
635 try: | |
636 self.rawbuffer += data | |
637 except: | |
638 print(repr(self.fileobj), repr(self.rawbuffer), | |
639 repr(data)) | |
640 raise | |
641 | |
642 # handle contents of raw buffer | |
643 if self.rawbuffer: | |
644 newdata = self.rawbuffer | |
645 self.rawbuffer = b'' | |
646 | |
647 # decrypt new data if we were given an object to handle that | |
648 if newdata and self.decrypter is not None: | |
649 newdata = bytes(map(self.decrypter, newdata)) | |
650 | |
651 # decompress newly read data if necessary | |
652 if newdata and self.compress_type != ZIP_STORED: | |
653 newdata = self.dc.decompress(newdata) | |
654 self.rawbuffer = self.dc.unconsumed_tail if self.compress_type == ZIP_DEFLATED else '' | |
655 if self.eof and len(self.rawbuffer) == 0: | |
656 # we're out of raw bytes (both from the file and | |
657 # the local buffer); flush just to make sure the | |
658 # decompressor is done | |
28 | 659 try: |
21 | 660 newdata += self.dc.flush() |
28 | 661 except AttributeError: |
662 pass | |
21 | 663 # prevent decompressor from being used again |
664 self.dc = None | |
665 | |
29
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
666 self._update_crc(newdata, eof=( |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
667 self.compress_size == self.bytes_read and |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
668 len(self.rawbuffer) == 0)) |
21 | 669 self.readbuffer += newdata |
670 | |
671 # return what the user asked for | |
672 if size is None or len(self.readbuffer) <= size: | |
673 data = self.readbuffer | |
674 self.readbuffer = b'' | |
675 else: | |
676 data = self.readbuffer[:size] | |
677 self.readbuffer = self.readbuffer[size:] | |
678 | |
679 return data | |
680 | |
681 | |
682 class ZipFile: | |
683 """ Class with methods to open, read, write, close, list zip files. | |
684 | |
685 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False) | |
686 | |
687 file: Either the path to the file, or a file-like object. | |
688 If it is a path, the file will be opened and closed by ZipFile. | |
689 mode: The mode can be either read "r", write "w" or append "a". | |
690 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib), | |
691 or ZIP_BZIP2 (requires bz2). | |
692 allowZip64: if True ZipFile will create files with ZIP64 extensions when | |
693 needed, otherwise it will raise an exception when this would | |
694 be necessary. | |
695 | |
696 """ | |
697 | |
698 fp = None # Set here since __del__ checks it | |
699 | |
700 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False): | |
701 """Open the ZIP file with mode read "r", write "w" or append "a".""" | |
702 if mode not in ("r", "w", "a"): | |
703 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"') | |
704 | |
705 if compression == ZIP_STORED: | |
706 pass | |
707 elif compression == ZIP_DEFLATED: | |
708 if not zlib: | |
709 raise RuntimeError( | |
710 "Compression requires the (missing) zlib module") | |
711 elif compression == ZIP_BZIP2: | |
712 if not bz2: | |
713 raise RuntimeError( | |
714 "Compression requires the (missing) bz2 module") | |
715 else: | |
716 raise RuntimeError("That compression method is not supported") | |
717 | |
718 self._allowZip64 = allowZip64 | |
719 self._didModify = False | |
720 self.debug = 0 # Level of printing: 0 through 3 | |
721 self.NameToInfo = {} # Find file info given name | |
722 self.filelist = [] # List of ZipInfo instances for archive | |
723 self.compression = compression # Method of compression | |
724 self.mode = key = mode.replace('b', '')[0] | |
725 self.pwd = None | |
726 self.comment = b'' | |
727 | |
728 # Check if we were passed a file-like object | |
729 if isinstance(file, str): | |
730 # No, it's a filename | |
731 self._filePassed = 0 | |
732 self.filename = file | |
733 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'} | |
734 try: | |
735 self.fp = io.open(file, modeDict[mode]) | |
736 except IOError: | |
737 if mode == 'a': | |
738 mode = key = 'w' | |
739 self.fp = io.open(file, modeDict[mode]) | |
740 else: | |
741 raise | |
742 else: | |
743 self._filePassed = 1 | |
744 self.fp = file | |
745 self.filename = getattr(file, 'name', None) | |
746 | |
747 if key == 'r': | |
748 self._GetContents() | |
749 elif key == 'w': | |
32
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
750 # set the modified flag so central directory gets written |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
751 # even if no files are added to the archive |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
752 self._didModify = True |
21 | 753 elif key == 'a': |
32
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
754 try: |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
755 # See if file is a zip file |
21 | 756 self._RealGetContents() |
757 # seek to start of directory and overwrite | |
758 self.fp.seek(self.start_dir, 0) | |
32
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
759 except BadZipfile: |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
760 # file is not a zip file, just append |
21 | 761 self.fp.seek(0, 2) |
32
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
762 |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
763 # set the modified flag so central directory gets written |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
764 # even if no files are added to the archive |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
765 self._didModify = True |
21 | 766 else: |
767 if not self._filePassed: | |
768 self.fp.close() | |
769 self.fp = None | |
770 raise RuntimeError('Mode must be "r", "w" or "a"') | |
771 | |
772 def _GetContents(self): | |
773 """Read the directory, making sure we close the file if the format | |
774 is bad.""" | |
775 try: | |
776 self._RealGetContents() | |
777 except BadZipfile: | |
778 if not self._filePassed: | |
779 self.fp.close() | |
780 self.fp = None | |
781 raise | |
782 | |
783 def _RealGetContents(self): | |
784 """Read in the table of contents for the ZIP file.""" | |
785 fp = self.fp | |
32
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
786 try: |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
787 endrec = _EndRecData(fp) |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
788 except IOError: |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
789 raise BadZipfile("File is not a zip file") |
21 | 790 if not endrec: |
791 raise BadZipfile("File is not a zip file") | |
792 if self.debug > 1: | |
793 print(endrec) | |
794 size_cd = endrec[_ECD_SIZE] # bytes in central directory | |
795 offset_cd = endrec[_ECD_OFFSET] # offset of central directory | |
796 self.comment = endrec[_ECD_COMMENT] # archive comment | |
797 | |
798 # "concat" is zero, unless zip was concatenated to another file | |
799 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd | |
800 if endrec[_ECD_SIGNATURE] == stringEndArchive64: | |
801 # If Zip64 extension structures are present, account for them | |
802 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) | |
803 | |
804 if self.debug > 2: | |
805 inferred = concat + offset_cd | |
806 print("given, inferred, offset", offset_cd, inferred, concat) | |
807 # self.start_dir: Position of start of central directory | |
808 self.start_dir = offset_cd + concat | |
809 fp.seek(self.start_dir, 0) | |
810 data = fp.read(size_cd) | |
811 fp = io.BytesIO(data) | |
812 total = 0 | |
813 while total < size_cd: | |
814 centdir = fp.read(sizeCentralDir) | |
815 if centdir[0:4] != stringCentralDir: | |
816 raise BadZipfile("Bad magic number for central directory") | |
817 centdir = struct.unpack(structCentralDir, centdir) | |
818 if self.debug > 2: | |
819 print(centdir) | |
820 filename = fp.read(centdir[_CD_FILENAME_LENGTH]) | |
821 flags = centdir[5] | |
822 if flags & 0x800: | |
823 # UTF-8 file names extension | |
824 filename = filename.decode('utf-8') | |
825 else: | |
826 # Historical ZIP filename encoding | |
827 filename = filename.decode('cp437') | |
828 # Create ZipInfo instance to store file information | |
829 x = ZipInfo(filename) | |
830 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) | |
831 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) | |
832 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] | |
833 (x.create_version, x.create_system, x.extract_version, x.reserved, | |
834 x.flag_bits, x.compress_type, t, d, | |
835 x.CRC, x.compress_size, x.file_size) = centdir[1:12] | |
836 x.volume, x.internal_attr, x.external_attr = centdir[15:18] | |
837 # Convert date/time code to (year, month, day, hour, min, sec) | |
838 x._raw_time = t | |
839 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F, | |
840 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 ) | |
841 | |
842 x._decodeExtra() | |
843 x.header_offset = x.header_offset + concat | |
844 self.filelist.append(x) | |
845 self.NameToInfo[x.filename] = x | |
846 | |
847 # update total bytes read from central directory | |
848 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH] | |
849 + centdir[_CD_EXTRA_FIELD_LENGTH] | |
850 + centdir[_CD_COMMENT_LENGTH]) | |
851 | |
852 if self.debug > 2: | |
853 print("total", total) | |
854 | |
855 | |
856 def namelist(self): | |
857 """Return a list of file names in the archive.""" | |
858 l = [] | |
859 for data in self.filelist: | |
860 l.append(data.filename) | |
861 return l | |
862 | |
863 def infolist(self): | |
864 """Return a list of class ZipInfo instances for files in the | |
865 archive.""" | |
866 return self.filelist | |
867 | |
868 def printdir(self, file=None): | |
869 """Print a table of contents for the zip file.""" | |
870 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"), | |
871 file=file) | |
872 for zinfo in self.filelist: | |
873 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6] | |
874 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size), | |
875 file=file) | |
876 | |
877 def testzip(self): | |
878 """Read all the files and check the CRC.""" | |
879 chunk_size = 2 ** 20 | |
880 for zinfo in self.filelist: | |
881 try: | |
882 # Read by chunks, to avoid an OverflowError or a | |
883 # MemoryError with very large embedded files. | |
884 f = self.open(zinfo.filename, "r") | |
885 while f.read(chunk_size): # Check CRC-32 | |
886 pass | |
887 except BadZipfile: | |
888 return zinfo.filename | |
889 | |
890 def getinfo(self, name): | |
891 """Return the instance of ZipInfo given 'name'.""" | |
892 info = self.NameToInfo.get(name) | |
893 if info is None: | |
894 raise KeyError( | |
895 'There is no item named %r in the archive' % name) | |
896 | |
897 return info | |
898 | |
899 def setpassword(self, pwd): | |
900 """Set default password for encrypted files.""" | |
901 assert isinstance(pwd, bytes) | |
902 self.pwd = pwd | |
903 | |
904 def read(self, name, pwd=None): | |
905 """Return file bytes (as a string) for name.""" | |
906 return self.open(name, "r", pwd).read() | |
907 | |
908 def open(self, name, mode="r", pwd=None): | |
909 """Return file-like object for 'name'.""" | |
910 if mode not in ("r", "U", "rU"): | |
911 raise RuntimeError('open() requires mode "r", "U", or "rU"') | |
912 if not self.fp: | |
913 raise RuntimeError( | |
914 "Attempt to read ZIP archive that was already closed") | |
915 | |
916 # Only open a new file for instances where we were not | |
917 # given a file object in the constructor | |
918 if self._filePassed: | |
919 zef_file = self.fp | |
920 else: | |
921 zef_file = io.open(self.filename, 'rb') | |
922 | |
923 # Make sure we have an info object | |
924 if isinstance(name, ZipInfo): | |
925 # 'name' is already an info object | |
926 zinfo = name | |
927 else: | |
928 # Get info object for name | |
929 zinfo = self.getinfo(name) | |
930 | |
931 zef_file.seek(zinfo.header_offset, 0) | |
932 | |
933 # Skip the file header: | |
934 fheader = zef_file.read(sizeFileHeader) | |
935 if fheader[0:4] != stringFileHeader: | |
936 raise BadZipfile("Bad magic number for file header") | |
937 | |
938 fheader = struct.unpack(structFileHeader, fheader) | |
939 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH]) | |
940 if fheader[_FH_EXTRA_FIELD_LENGTH]: | |
941 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) | |
942 | |
943 if fname != zinfo.orig_filename.encode("utf-8"): | |
944 raise BadZipfile( | |
945 'File name in directory %r and header %r differ.' | |
946 % (zinfo.orig_filename, fname)) | |
947 | |
948 # check for encrypted flag & handle password | |
949 is_encrypted = zinfo.flag_bits & 0x1 | |
950 zd = None | |
951 if is_encrypted: | |
952 if not pwd: | |
953 pwd = self.pwd | |
954 if not pwd: | |
955 raise RuntimeError("File %s is encrypted, " | |
956 "password required for extraction" % name) | |
957 | |
958 zd = _ZipDecrypter(pwd) | |
959 # The first 12 bytes in the cypher stream is an encryption header | |
960 # used to strengthen the algorithm. The first 11 bytes are | |
961 # completely random, while the 12th contains the MSB of the CRC, | |
962 # or the MSB of the file time depending on the header type | |
963 # and is used to check the correctness of the password. | |
964 bytes = zef_file.read(12) | |
965 h = list(map(zd, bytes[0:12])) | |
966 if zinfo.flag_bits & 0x8: | |
967 # compare against the file type from extended local headers | |
968 check_byte = (zinfo._raw_time >> 8) & 0xff | |
969 else: | |
970 # compare against the CRC otherwise | |
971 check_byte = (zinfo.CRC >> 24) & 0xff | |
972 if h[11] != check_byte: | |
973 raise RuntimeError("Bad password for file", name) | |
974 | |
975 # build and return a ZipExtFile | |
976 if zd is None: | |
977 zef = ZipExtFile(zef_file, zinfo) | |
978 else: | |
979 zef = ZipExtFile(zef_file, zinfo, zd) | |
980 | |
981 # set universal newlines on ZipExtFile if necessary | |
982 if "U" in mode: | |
983 zef.set_univ_newlines(True) | |
984 return zef | |
985 | |
986 def extract(self, member, path=None, pwd=None): | |
987 """Extract a member from the archive to the current working directory, | |
988 using its full name. Its file information is extracted as accurately | |
989 as possible. `member' may be a filename or a ZipInfo object. You can | |
990 specify a different directory using `path'. | |
991 """ | |
992 if not isinstance(member, ZipInfo): | |
993 member = self.getinfo(member) | |
994 | |
995 if path is None: | |
996 path = os.getcwd() | |
997 | |
998 return self._extract_member(member, path, pwd) | |
999 | |
1000 def extractall(self, path=None, members=None, pwd=None): | |
1001 """Extract all members from the archive to the current working | |
1002 directory. `path' specifies a different directory to extract to. | |
1003 `members' is optional and must be a subset of the list returned | |
1004 by namelist(). | |
1005 """ | |
1006 if members is None: | |
1007 members = self.namelist() | |
1008 | |
1009 for zipinfo in members: | |
1010 self.extract(zipinfo, path, pwd) | |
1011 | |
1012 def _extract_member(self, member, targetpath, pwd): | |
1013 """Extract the ZipInfo object 'member' to a physical | |
1014 file on the path targetpath. | |
1015 """ | |
1016 # build the destination pathname, replacing | |
1017 # forward slashes to platform specific separators. | |
1018 # Strip trailing path separator, unless it represents the root. | |
1019 if (targetpath[-1:] in (os.path.sep, os.path.altsep) | |
1020 and len(os.path.splitdrive(targetpath)[1]) > 1): | |
1021 targetpath = targetpath[:-1] | |
1022 | |
1023 # don't include leading "/" from file name if present | |
1024 if member.filename[0] == '/': | |
1025 targetpath = os.path.join(targetpath, member.filename[1:]) | |
1026 else: | |
1027 targetpath = os.path.join(targetpath, member.filename) | |
1028 | |
1029 targetpath = os.path.normpath(targetpath) | |
1030 | |
1031 # Create all upper directories if necessary. | |
1032 upperdirs = os.path.dirname(targetpath) | |
1033 if upperdirs and not os.path.exists(upperdirs): | |
1034 os.makedirs(upperdirs) | |
1035 | |
1036 if member.filename[-1] == '/': | |
1037 if not os.path.isdir(targetpath): | |
1038 os.mkdir(targetpath) | |
1039 return targetpath | |
1040 | |
1041 source = self.open(member, pwd=pwd) | |
1042 target = open(targetpath, "wb") | |
1043 shutil.copyfileobj(source, target) | |
1044 source.close() | |
1045 target.close() | |
1046 | |
1047 return targetpath | |
1048 | |
1049 def _writecheck(self, zinfo): | |
1050 """Check for errors before writing a file to the archive.""" | |
1051 if zinfo.filename in self.NameToInfo: | |
1052 if self.debug: # Warning for duplicate names | |
1053 print("Duplicate name:", zinfo.filename) | |
1054 if self.mode not in ("w", "a"): | |
1055 raise RuntimeError('write() requires mode "w" or "a"') | |
1056 if not self.fp: | |
1057 raise RuntimeError( | |
1058 "Attempt to write ZIP archive that was already closed") | |
1059 if zinfo.compress_type == ZIP_DEFLATED and not zlib: | |
1060 raise RuntimeError( | |
1061 "Compression requires the (missing) zlib module") | |
1062 if zinfo.compress_type == ZIP_BZIP2 and not bz2: | |
1063 raise RuntimeError( | |
1064 "Compression requires the (missing) bz2 module") | |
1065 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED, ZIP_BZIP2): | |
1066 raise RuntimeError("That compression method is not supported") | |
1067 if zinfo.file_size > ZIP64_LIMIT: | |
1068 if not self._allowZip64: | |
1069 raise LargeZipFile("Filesize would require ZIP64 extensions") | |
1070 if zinfo.header_offset > ZIP64_LIMIT: | |
1071 if not self._allowZip64: | |
1072 raise LargeZipFile( | |
1073 "Zipfile size would require ZIP64 extensions") | |
1074 | |
1075 def write(self, filename, arcname=None, compress_type=None): | |
1076 """Put the bytes from filename into the archive under the name | |
1077 arcname.""" | |
1078 if not self.fp: | |
1079 raise RuntimeError( | |
1080 "Attempt to write to ZIP archive that was already closed") | |
1081 | |
1082 st = os.stat(filename) | |
1083 isdir = stat.S_ISDIR(st.st_mode) | |
1084 mtime = time.localtime(st.st_mtime) | |
1085 date_time = mtime[0:6] | |
1086 # Create ZipInfo instance to store file information | |
1087 if arcname is None: | |
1088 arcname = filename | |
1089 arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) | |
1090 while arcname[0] in (os.sep, os.altsep): | |
1091 arcname = arcname[1:] | |
1092 if isdir: | |
1093 arcname += '/' | |
1094 zinfo = ZipInfo(arcname, date_time) | |
1095 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes | |
1096 if compress_type is None: | |
1097 zinfo.compress_type = self.compression | |
1098 else: | |
1099 zinfo.compress_type = compress_type | |
1100 | |
1101 zinfo.file_size = st.st_size | |
1102 zinfo.flag_bits = 0x00 | |
1103 zinfo.header_offset = self.fp.tell() # Start of header bytes | |
1104 | |
1105 self._writecheck(zinfo) | |
1106 self._didModify = True | |
1107 | |
1108 if isdir: | |
1109 zinfo.file_size = 0 | |
1110 zinfo.compress_size = 0 | |
1111 zinfo.CRC = 0 | |
1112 self.filelist.append(zinfo) | |
1113 self.NameToInfo[zinfo.filename] = zinfo | |
1114 self.fp.write(zinfo.FileHeader()) | |
1115 return | |
1116 | |
1117 with open(filename, "rb") as fp: | |
1118 # Must overwrite CRC and sizes with correct data later | |
1119 zinfo.CRC = CRC = 0 | |
1120 zinfo.compress_size = compress_size = 0 | |
1121 zinfo.file_size = file_size = 0 | |
1122 self.fp.write(zinfo.FileHeader()) | |
1123 if zinfo.compress_type == ZIP_DEFLATED: | |
1124 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, | |
1125 zlib.DEFLATED, -15) | |
1126 elif zinfo.compress_type == ZIP_BZIP2: | |
1127 cmpr = bz2.BZ2Compressor() | |
1128 else: | |
1129 cmpr = None | |
1130 while 1: | |
1131 buf = fp.read(1024 * 8) | |
1132 if not buf: | |
1133 break | |
1134 file_size = file_size + len(buf) | |
1135 CRC = crc32(buf, CRC) & 0xffffffff | |
1136 if cmpr: | |
1137 buf = cmpr.compress(buf) | |
1138 compress_size = compress_size + len(buf) | |
1139 self.fp.write(buf) | |
1140 if cmpr: | |
1141 buf = cmpr.flush() | |
1142 compress_size = compress_size + len(buf) | |
1143 self.fp.write(buf) | |
1144 zinfo.compress_size = compress_size | |
1145 else: | |
1146 zinfo.compress_size = file_size | |
1147 zinfo.CRC = CRC | |
1148 zinfo.file_size = file_size | |
1149 # Seek backwards and write CRC and file sizes | |
1150 position = self.fp.tell() # Preserve current position in file | |
1151 self.fp.seek(zinfo.header_offset + 14, 0) | |
1152 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size, | |
1153 zinfo.file_size)) | |
1154 self.fp.seek(position, 0) | |
1155 self.filelist.append(zinfo) | |
1156 self.NameToInfo[zinfo.filename] = zinfo | |
1157 | |
1158 def writestr(self, zinfo_or_arcname, data): | |
1159 """Write a file into the archive. The contents is 'data', which | |
1160 may be either a 'str' or a 'bytes' instance; if it is a 'str', | |
1161 it is encoded as UTF-8 first. | |
1162 'zinfo_or_arcname' is either a ZipInfo instance or | |
1163 the name of the file in the archive.""" | |
1164 if isinstance(data, str): | |
1165 data = data.encode("utf-8") | |
1166 if not isinstance(zinfo_or_arcname, ZipInfo): | |
1167 zinfo = ZipInfo(filename=zinfo_or_arcname, | |
1168 date_time=time.localtime(time.time())[:6]) | |
1169 zinfo.compress_type = self.compression | |
1170 zinfo.external_attr = 0o600 << 16 | |
1171 else: | |
1172 zinfo = zinfo_or_arcname | |
1173 | |
1174 if not self.fp: | |
1175 raise RuntimeError( | |
1176 "Attempt to write to ZIP archive that was already closed") | |
1177 | |
1178 zinfo.file_size = len(data) # Uncompressed size | |
1179 zinfo.header_offset = self.fp.tell() # Start of header data | |
1180 self._writecheck(zinfo) | |
1181 self._didModify = True | |
1182 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum | |
1183 if zinfo.compress_type == ZIP_DEFLATED: | |
1184 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, | |
1185 zlib.DEFLATED, -15) | |
1186 data = co.compress(data) + co.flush() | |
1187 zinfo.compress_size = len(data) # Compressed size | |
1188 elif zinfo.compress_type == ZIP_BZIP2: | |
1189 co = bz2.BZ2Compressor() | |
1190 data = co.compress(data) + co.flush() | |
1191 zinfo.compress_size = len(data) # Compressed size | |
1192 else: | |
1193 zinfo.compress_size = zinfo.file_size | |
1194 zinfo.header_offset = self.fp.tell() # Start of header data | |
1195 self.fp.write(zinfo.FileHeader()) | |
1196 self.fp.write(data) | |
1197 self.fp.flush() | |
1198 if zinfo.flag_bits & 0x08: | |
1199 # Write CRC and file sizes after the file data | |
1200 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size, | |
1201 zinfo.file_size)) | |
1202 self.filelist.append(zinfo) | |
1203 self.NameToInfo[zinfo.filename] = zinfo | |
1204 | |
1205 def __del__(self): | |
1206 """Call the "close()" method in case the user forgot.""" | |
1207 self.close() | |
1208 | |
1209 def close(self): | |
1210 """Close the file, and for mode "w" and "a" write the ending | |
1211 records.""" | |
1212 if self.fp is None: | |
1213 return | |
1214 | |
1215 if self.mode in ("w", "a") and self._didModify: # write ending records | |
1216 count = 0 | |
1217 pos1 = self.fp.tell() | |
1218 for zinfo in self.filelist: # write central directory | |
1219 count = count + 1 | |
1220 dt = zinfo.date_time | |
1221 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] | |
1222 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) | |
1223 extra = [] | |
1224 if zinfo.file_size > ZIP64_LIMIT \ | |
1225 or zinfo.compress_size > ZIP64_LIMIT: | |
1226 extra.append(zinfo.file_size) | |
1227 extra.append(zinfo.compress_size) | |
1228 file_size = 0xffffffff | |
1229 compress_size = 0xffffffff | |
1230 else: | |
1231 file_size = zinfo.file_size | |
1232 compress_size = zinfo.compress_size | |
1233 | |
1234 if zinfo.header_offset > ZIP64_LIMIT: | |
1235 extra.append(zinfo.header_offset) | |
1236 header_offset = 0xffffffff | |
1237 else: | |
1238 header_offset = zinfo.header_offset | |
1239 | |
1240 extra_data = zinfo.extra | |
1241 if extra: | |
1242 # Append a ZIP64 field to the extra's | |
1243 extra_data = struct.pack( | |
1244 '<HH' + 'Q'*len(extra), | |
1245 1, 8*len(extra), *extra) + extra_data | |
1246 | |
1247 extract_version = max(45, zinfo.extract_version) | |
1248 create_version = max(45, zinfo.create_version) | |
1249 else: | |
1250 extract_version = zinfo.extract_version | |
1251 create_version = zinfo.create_version | |
1252 | |
1253 try: | |
1254 filename, flag_bits = zinfo._encodeFilenameFlags() | |
1255 centdir = struct.pack(structCentralDir, | |
1256 stringCentralDir, create_version, | |
1257 zinfo.create_system, extract_version, zinfo.reserved, | |
1258 flag_bits, zinfo.compress_type, dostime, dosdate, | |
1259 zinfo.CRC, compress_size, file_size, | |
1260 len(filename), len(extra_data), len(zinfo.comment), | |
1261 0, zinfo.internal_attr, zinfo.external_attr, | |
1262 header_offset) | |
1263 except DeprecationWarning: | |
1264 print((structCentralDir, stringCentralDir, create_version, | |
1265 zinfo.create_system, extract_version, zinfo.reserved, | |
1266 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate, | |
1267 zinfo.CRC, compress_size, file_size, | |
1268 len(zinfo.filename), len(extra_data), len(zinfo.comment), | |
1269 0, zinfo.internal_attr, zinfo.external_attr, | |
1270 header_offset), file=sys.stderr) | |
1271 raise | |
1272 self.fp.write(centdir) | |
1273 self.fp.write(filename) | |
1274 self.fp.write(extra_data) | |
1275 self.fp.write(zinfo.comment) | |
1276 | |
1277 pos2 = self.fp.tell() | |
1278 # Write end-of-zip-archive record | |
1279 centDirCount = count | |
1280 centDirSize = pos2 - pos1 | |
1281 centDirOffset = pos1 | |
1282 if (centDirCount >= ZIP_FILECOUNT_LIMIT or | |
1283 centDirOffset > ZIP64_LIMIT or | |
1284 centDirSize > ZIP64_LIMIT): | |
1285 # Need to write the ZIP64 end-of-archive records | |
1286 zip64endrec = struct.pack( | |
1287 structEndArchive64, stringEndArchive64, | |
1288 44, 45, 45, 0, 0, centDirCount, centDirCount, | |
1289 centDirSize, centDirOffset) | |
1290 self.fp.write(zip64endrec) | |
1291 | |
1292 zip64locrec = struct.pack( | |
1293 structEndArchive64Locator, | |
1294 stringEndArchive64Locator, 0, pos2, 1) | |
1295 self.fp.write(zip64locrec) | |
1296 centDirCount = min(centDirCount, 0xFFFF) | |
1297 centDirSize = min(centDirSize, 0xFFFFFFFF) | |
1298 centDirOffset = min(centDirOffset, 0xFFFFFFFF) | |
1299 | |
1300 # check for valid comment length | |
1301 if len(self.comment) >= ZIP_MAX_COMMENT: | |
1302 if self.debug > 0: | |
1303 msg = 'Archive comment is too long; truncating to %d bytes' \ | |
1304 % ZIP_MAX_COMMENT | |
1305 self.comment = self.comment[:ZIP_MAX_COMMENT] | |
1306 | |
1307 endrec = struct.pack(structEndArchive, stringEndArchive, | |
1308 0, 0, centDirCount, centDirCount, | |
1309 centDirSize, centDirOffset, len(self.comment)) | |
1310 self.fp.write(endrec) | |
1311 self.fp.write(self.comment) | |
1312 self.fp.flush() | |
1313 | |
1314 if not self._filePassed: | |
1315 self.fp.close() | |
1316 self.fp = None | |
1317 | |
1318 | |
1319 class PyZipFile(ZipFile): | |
1320 """Class to create ZIP archives with Python library files and packages.""" | |
1321 | |
1322 def writepy(self, pathname, basename=""): | |
1323 """Add all files from "pathname" to the ZIP archive. | |
1324 | |
1325 If pathname is a package directory, search the directory and | |
1326 all package subdirectories recursively for all *.py and enter | |
1327 the modules into the archive. If pathname is a plain | |
1328 directory, listdir *.py and enter all modules. Else, pathname | |
1329 must be a Python *.py file and the module will be put into the | |
1330 archive. Added modules are always module.pyo or module.pyc. | |
1331 This method will compile the module.py into module.pyc if | |
1332 necessary. | |
1333 """ | |
1334 dir, name = os.path.split(pathname) | |
1335 if os.path.isdir(pathname): | |
1336 initname = os.path.join(pathname, "__init__.py") | |
1337 if os.path.isfile(initname): | |
1338 # This is a package directory, add it | |
1339 if basename: | |
1340 basename = "%s/%s" % (basename, name) | |
1341 else: | |
1342 basename = name | |
1343 if self.debug: | |
1344 print("Adding package in", pathname, "as", basename) | |
1345 fname, arcname = self._get_codename(initname[0:-3], basename) | |
1346 if self.debug: | |
1347 print("Adding", arcname) | |
1348 self.write(fname, arcname) | |
1349 dirlist = os.listdir(pathname) | |
1350 dirlist.remove("__init__.py") | |
1351 # Add all *.py files and package subdirectories | |
1352 for filename in dirlist: | |
1353 path = os.path.join(pathname, filename) | |
1354 root, ext = os.path.splitext(filename) | |
1355 if os.path.isdir(path): | |
1356 if os.path.isfile(os.path.join(path, "__init__.py")): | |
1357 # This is a package directory, add it | |
1358 self.writepy(path, basename) # Recursive call | |
1359 elif ext == ".py": | |
1360 fname, arcname = self._get_codename(path[0:-3], | |
1361 basename) | |
1362 if self.debug: | |
1363 print("Adding", arcname) | |
1364 self.write(fname, arcname) | |
1365 else: | |
1366 # This is NOT a package directory, add its files at top level | |
1367 if self.debug: | |
1368 print("Adding files from directory", pathname) | |
1369 for filename in os.listdir(pathname): | |
1370 path = os.path.join(pathname, filename) | |
1371 root, ext = os.path.splitext(filename) | |
1372 if ext == ".py": | |
1373 fname, arcname = self._get_codename(path[0:-3], | |
1374 basename) | |
1375 if self.debug: | |
1376 print("Adding", arcname) | |
1377 self.write(fname, arcname) | |
1378 else: | |
1379 if pathname[-3:] != ".py": | |
1380 raise RuntimeError( | |
1381 'Files added with writepy() must end with ".py"') | |
1382 fname, arcname = self._get_codename(pathname[0:-3], basename) | |
1383 if self.debug: | |
1384 print("Adding file", arcname) | |
1385 self.write(fname, arcname) | |
1386 | |
1387 def _get_codename(self, pathname, basename): | |
1388 """Return (filename, archivename) for the path. | |
1389 | |
1390 Given a module name path, return the correct file path and | |
1391 archive name, compiling if necessary. For example, given | |
1392 /python/lib/string, return (/python/lib/string.pyc, string). | |
1393 """ | |
1394 file_py = pathname + ".py" | |
1395 file_pyc = pathname + ".pyc" | |
1396 file_pyo = pathname + ".pyo" | |
1397 if os.path.isfile(file_pyo) and \ | |
1398 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime: | |
1399 fname = file_pyo # Use .pyo file | |
1400 elif not os.path.isfile(file_pyc) or \ | |
1401 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime: | |
1402 import py_compile | |
1403 if self.debug: | |
1404 print("Compiling", file_py) | |
1405 try: | |
1406 py_compile.compile(file_py, file_pyc, None, True) | |
1407 except py_compile.PyCompileError as err: | |
1408 print(err.msg) | |
1409 fname = file_pyc | |
1410 else: | |
1411 fname = file_pyc | |
1412 archivename = os.path.split(fname)[1] | |
1413 if basename: | |
1414 archivename = "%s/%s" % (basename, archivename) | |
1415 return (fname, archivename) | |
1416 | |
1417 | |
1418 def main(args = None): | |
1419 import textwrap | |
1420 USAGE=textwrap.dedent("""\ | |
1421 Usage: | |
1422 zipfile.py -l zipfile.zip # Show listing of a zipfile | |
1423 zipfile.py -t zipfile.zip # Test if a zipfile is valid | |
1424 zipfile.py -e zipfile.zip target # Extract zipfile into target dir | |
1425 zipfile.py -c zipfile.zip src ... # Create zipfile from sources | |
1426 """) | |
1427 if args is None: | |
1428 args = sys.argv[1:] | |
1429 | |
1430 if not args or args[0] not in ('-l', '-c', '-e', '-t'): | |
1431 print(USAGE) | |
1432 sys.exit(1) | |
1433 | |
1434 if args[0] == '-l': | |
1435 if len(args) != 2: | |
1436 print(USAGE) | |
1437 sys.exit(1) | |
1438 zf = ZipFile(args[1], 'r') | |
1439 zf.printdir() | |
1440 zf.close() | |
1441 | |
1442 elif args[0] == '-t': | |
1443 if len(args) != 2: | |
1444 print(USAGE) | |
1445 sys.exit(1) | |
1446 zf = ZipFile(args[1], 'r') | |
29
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
1447 badfile = zf.testzip() |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
1448 if badfile: |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
28
diff
changeset
|
1449 print("The following enclosed file is corrupted: {!r}".format(badfile)) |
21 | 1450 print("Done testing") |
1451 | |
1452 elif args[0] == '-e': | |
1453 if len(args) != 3: | |
1454 print(USAGE) | |
1455 sys.exit(1) | |
1456 | |
1457 zf = ZipFile(args[1], 'r') | |
1458 out = args[2] | |
1459 for path in zf.namelist(): | |
1460 if path.startswith('./'): | |
1461 tgt = os.path.join(out, path[2:]) | |
1462 else: | |
1463 tgt = os.path.join(out, path) | |
1464 | |
1465 tgtdir = os.path.dirname(tgt) | |
1466 if not os.path.exists(tgtdir): | |
1467 os.makedirs(tgtdir) | |
1468 with open(tgt, 'wb') as fp: | |
1469 fp.write(zf.read(path)) | |
1470 zf.close() | |
1471 | |
1472 elif args[0] == '-c': | |
1473 if len(args) < 3: | |
1474 print(USAGE) | |
1475 sys.exit(1) | |
1476 | |
1477 def addToZip(zf, path, zippath): | |
1478 if os.path.isfile(path): | |
1479 zf.write(path, zippath, ZIP_DEFLATED) | |
1480 elif os.path.isdir(path): | |
1481 for nm in os.listdir(path): | |
1482 addToZip(zf, | |
1483 os.path.join(path, nm), os.path.join(zippath, nm)) | |
1484 # else: ignore | |
1485 | |
1486 zf = ZipFile(args[1], 'w', allowZip64=True) | |
1487 for src in args[2:]: | |
1488 addToZip(zf, src, os.path.basename(src)) | |
1489 | |
1490 zf.close() | |
1491 | |
1492 if __name__ == "__main__": | |
1493 main() |