Add ZIP64 support to wxZipInputStream and wxZipOutputStream.

Zip archives with sizes larger 4GB or containing files larger than 4GB or more
than 65k files are saved in ZIP64 format which adds a few additional footers
and extra fields to allow to exceed these limits.

This implements the PKWARE specification available at:
https://www.pkware.com/support/zip-app-note

It has been tested for compatibility with Windows internal ZIP folders, OSX
Archive Utility and 7-zip.

Closes https://github.com/wxWidgets/wxWidgets/pull/72
This commit is contained in:
Tobias Taschner
2015-08-13 14:27:24 +02:00
committed by Vadim Zeitlin
parent 0f39ea0963
commit b9d46f0719
3 changed files with 240 additions and 34 deletions

View File

@@ -49,7 +49,7 @@ Changes in behaviour which may result in build errors
All:
- Add UTF-8 support to wxZip{Input,Output}Stream (Tobias Taschner).
- Add UTF-8 and ZIP 64 support to wxZip{Input,Output}Stream (Tobias Taschner).
- Allow calling wxItemContainer::Add() and similar with std::vector<> argument.
- Add "%z" support to printf()-like functions like wxString::Format() (RIVDSL).
- Add DOCTYPE support to wxXmlDocument (Nick Matthews).

View File

@@ -234,6 +234,8 @@ private:
size_t WriteDescriptor(wxOutputStream& stream, wxUint32 crc,
wxFileOffset compressedSize, wxFileOffset size);
bool LoadExtraInfo(const char* extraData, wxUint16 extraLen, bool localInfo);
wxUint8 m_SystemMadeBy; // one of enum wxZipSystem
wxUint8 m_VersionMadeBy; // major * 10 + minor

View File

@@ -33,7 +33,8 @@
// value for the 'version needed to extract' field (20 means 2.0)
enum {
VERSION_NEEDED_TO_EXTRACT = 20
VERSION_NEEDED_TO_EXTRACT = 20,
Z64_VERSION_NEEDED_TO_EXTRACT = 45 // File uses ZIP64 format extensions
};
// signatures for the various records (PKxx)
@@ -41,7 +42,9 @@ enum {
CENTRAL_MAGIC = 0x02014b50, // central directory record
LOCAL_MAGIC = 0x04034b50, // local header
END_MAGIC = 0x06054b50, // end of central directory record
SUMS_MAGIC = 0x08074b50 // data descriptor (info-zip)
SUMS_MAGIC = 0x08074b50, // data descriptor (info-zip)
Z64_LOC_MAGIC = 0x07064b50, // zip64 end of central directory locator
Z64_END_MAGIC = 0x06064b50 // zip64 end of central directory record
};
// unix file attributes. zip stores them in the high 16 bits of the
@@ -57,7 +60,9 @@ enum {
CENTRAL_SIZE = 46,
LOCAL_SIZE = 30,
END_SIZE = 22,
SUMS_SIZE = 12
SUMS_SIZE = 12,
Z64_LOC_SIZE = 20,
Z64_END_SIZE = 56
};
// The number of bytes that must be written to an wxZipOutputStream before
@@ -103,6 +108,34 @@ static wxString ReadString(wxInputStream& stream, wxUint16 len, wxMBConv& conv)
return str;
}
static inline wxUint16 LimitUint16(wxUint64 value)
{
if (value > 0xffff)
return 0xffff;
else
return wx_truncate_cast(wxUint16, value);
}
static inline wxUint32 LimitUint32(wxUint64 value)
{
if (value > 0xffffffff)
return 0xffffffff;
else
return wx_truncate_cast(wxUint32,value);
}
// Decode a little endian wxUint64 number from a character array
//
static inline wxUint64 CrackUint64(const char *m)
{
const unsigned char *n = (const unsigned char*)m;
return (static_cast<wxUint64>(n[7]) << 56) |
(static_cast<wxUint64>(n[6]) << 48) |
(static_cast<wxUint64>(n[5]) << 40) |
(static_cast<wxUint64>(n[4]) << 32) |
(n[3] << 24) | (n[2] << 16) | (n[1] << 8) | n[0];
}
// Decode a little endian wxUint32 number from a character array
//
static inline wxUint32 CrackUint32(const char *m)
@@ -171,10 +204,12 @@ class wxZipHeader
{
public:
wxZipHeader(wxInputStream& stream, size_t size);
wxZipHeader(const char* data, size_t size);
inline wxUint8 Read8();
inline wxUint16 Read16();
inline wxUint32 Read32();
inline wxUint64 Read64();
const char *GetData() const { return m_data; }
size_t GetSize() const { return m_size; }
@@ -186,6 +221,7 @@ public:
wxZipHeader& operator>>(wxUint8& n) { n = Read8(); return *this; }
wxZipHeader& operator>>(wxUint16& n) { n = Read16(); return *this; }
wxZipHeader& operator>>(wxUint32& n) { n = Read32(); return *this; }
wxZipHeader& operator>>(wxUint64& n) { n = Read64(); return *this; }
private:
char m_data[64];
@@ -204,6 +240,15 @@ wxZipHeader::wxZipHeader(wxInputStream& stream, size_t size)
m_ok = m_size == size;
}
wxZipHeader::wxZipHeader(const char* data, size_t size)
: m_size(size),
m_pos(0),
m_ok(true)
{
wxCHECK_RET(size <= sizeof(m_data), wxT("buffer too small"));
memcpy(m_data, data, size);
}
inline wxUint8 wxZipHeader::Read8()
{
wxASSERT(m_pos < m_size);
@@ -226,6 +271,13 @@ inline wxUint32 wxZipHeader::Read32()
return n;
}
inline wxUint64 wxZipHeader::Read64()
{
wxASSERT(m_pos + 8 <= m_size);
wxUint64 n = CrackUint64(m_data + m_pos);
m_pos += 8;
return n;
}
/////////////////////////////////////////////////////////////////////////////
// Stored input stream
@@ -962,6 +1014,30 @@ void wxZipEntry::UnsetNotifier()
m_zipnotifier = NULL;
}
bool wxZipEntry::LoadExtraInfo(const char* extraData, wxUint16 extraLen, bool localInfo)
{
wxZipHeader ds(extraData, extraLen);
// A file may contain larger size, compressed size or offset
// in a zip64 extra data block. Use the 64 bit values if available
if ( extraLen > 4 && ds.Read16() == 1 )
{
ds.Read16(); // skip record size
if ( m_Size == 0xffffffff )
m_Size = ds.Read64();
if ( m_CompressedSize == 0xffffffff )
m_CompressedSize = ds.Read64();
if ( !localInfo && m_Offset == 0xffffffff )
m_Offset = ds.Read64();
// extraInfo was used and parsed
return true;
}
// extraInfo had unknown format
return false;
}
size_t wxZipEntry::ReadLocal(wxInputStream& stream, wxMBConv& conv)
{
wxUint16 nameLen, extraLen;
@@ -996,6 +1072,12 @@ size_t wxZipEntry::ReadLocal(wxInputStream& stream, wxMBConv& conv)
stream.Read(m_LocalExtra->GetData(), extraLen);
if (stream.LastRead() != extraLen + 0u)
return 0;
if (LoadExtraInfo(m_Extra->GetData(), extraLen, true))
{
Release(m_LocalExtra);
m_LocalExtra = NULL;
}
}
}
@@ -1010,24 +1092,37 @@ size_t wxZipEntry::WriteLocal(wxOutputStream& stream, wxMBConv& conv) const
if (!name) name = "";
wxUint16 nameLen = wx_truncate_cast(wxUint16, strlen(name));
bool z64Required = m_CompressedSize > 0xffffffff || m_Size > 0xffffffff;
wxUint16 versionNeeded =
(z64Required) ? Z64_VERSION_NEEDED_TO_EXTRACT : m_VersionNeeded;
wxDataOutputStream ds(stream);
ds << m_VersionNeeded << m_Flags << m_Method;
ds << versionNeeded << m_Flags << m_Method;
ds.Write32(GetDateTime().GetAsDOS());
ds.Write32(m_Crc);
ds.Write32(m_CompressedSize != wxInvalidOffset ?
wx_truncate_cast(wxUint32, m_CompressedSize) : 0);
LimitUint32(m_CompressedSize) : 0);
ds.Write32(m_Size != wxInvalidOffset ?
wx_truncate_cast(wxUint32, m_Size) : 0);
LimitUint32(m_Size) : 0);
ds << nameLen;
wxUint16 extraLen = wx_truncate_cast(wxUint16, GetLocalExtraLen());
if (z64Required)
extraLen += 20; // tag and 2x64bit file sizes
ds.Write16(extraLen);
stream.Write(name, nameLen);
if (extraLen)
stream.Write(m_LocalExtra->GetData(), extraLen);
if (z64Required)
{
ds.Write16(1); // id
ds.Write16(16); // record size
ds.Write64(m_CompressedSize);
ds.Write64(m_Size);
}
if (GetLocalExtraLen())
stream.Write(m_LocalExtra->GetData(), GetLocalExtraLen());
return LOCAL_SIZE + nameLen + extraLen;
}
@@ -1066,6 +1161,12 @@ size_t wxZipEntry::ReadCentral(wxInputStream& stream, wxMBConv& conv)
stream.Read(m_Extra->GetData(), extraLen);
if (stream.LastRead() != extraLen + 0u)
return 0;
if (LoadExtraInfo(m_Extra->GetData(), extraLen, false))
{
Release(m_Extra);
m_Extra = NULL;
}
}
}
@@ -1094,27 +1195,63 @@ size_t wxZipEntry::WriteCentral(wxOutputStream& stream, wxMBConv& conv) const
wxUint16 commentLen = wx_truncate_cast(wxUint16, strlen(comment));
wxUint16 extraLen = wx_truncate_cast(wxUint16, GetExtraLen());
wxUint16 z64InfoLen = 0;
bool z64Required = false;
if ( m_CompressedSize > 0xffffffff )
{
z64Required = true;
z64InfoLen += 8;
}
if ( m_Size > 0xffffffff )
{
z64Required = true;
z64InfoLen += 8;
}
if ( m_Offset > 0xffffffff )
{
z64Required = true;
z64InfoLen += 8;
}
if (z64Required)
{
extraLen += 4 + z64InfoLen;
}
wxUint16 versionNeeded =
(z64Required) ? Z64_VERSION_NEEDED_TO_EXTRACT : GetVersionNeeded();
wxDataOutputStream ds(stream);
ds << CENTRAL_MAGIC << m_VersionMadeBy << m_SystemMadeBy;
ds.Write16(wx_truncate_cast(wxUint16, GetVersionNeeded()));
ds.Write16(versionNeeded);
ds.Write16(wx_truncate_cast(wxUint16, GetFlags()));
ds.Write16(wx_truncate_cast(wxUint16, GetMethod()));
ds.Write32(GetDateTime().GetAsDOS());
ds.Write32(GetCrc());
ds.Write32(wx_truncate_cast(wxUint32, GetCompressedSize()));
ds.Write32(wx_truncate_cast(wxUint32, GetSize()));
ds.Write32(LimitUint32(GetCompressedSize()));
ds.Write32(LimitUint32(GetSize()));
ds.Write16(nameLen);
ds.Write16(extraLen);
ds << commentLen << m_DiskStart << m_InternalAttributes
<< m_ExternalAttributes << wx_truncate_cast(wxUint32, GetOffset());
<< m_ExternalAttributes << LimitUint32(GetOffset());
stream.Write(name, nameLen);
if (extraLen)
stream.Write(GetExtra(), extraLen);
if (z64Required)
{
ds.Write16(1); // tag
ds.Write16(z64InfoLen); // record size
if (m_CompressedSize > 0xffffffff)
ds.Write64(m_CompressedSize);
if (m_Size > 0xffffffff)
ds.Write64(m_Size);
if (m_Offset > 0xffffffff)
ds.Write64(m_Offset);
}
if (GetExtraLen())
stream.Write(GetExtra(), GetExtraLen());
stream.Write(comment, commentLen);
return CENTRAL_SIZE + nameLen + extraLen + commentLen;
@@ -1199,17 +1336,17 @@ public:
wxString GetComment() const { return m_Comment; }
void SetDiskNumber(int num)
{ m_DiskNumber = wx_truncate_cast(wxUint16, num); }
{ m_DiskNumber = wx_truncate_cast(wxUint32, num); }
void SetStartDisk(int num)
{ m_StartDisk = wx_truncate_cast(wxUint16, num); }
{ m_StartDisk = wx_truncate_cast(wxUint32, num); }
void SetEntriesHere(int num)
{ m_EntriesHere = wx_truncate_cast(wxUint16, num); }
{ m_EntriesHere = wx_truncate_cast(wxUint32, num); }
void SetTotalEntries(int num)
{ m_TotalEntries = wx_truncate_cast(wxUint16, num); }
{ m_TotalEntries = wx_truncate_cast(wxUint32, num); }
void SetSize(wxFileOffset size)
{ m_Size = wx_truncate_cast(wxUint32, size); }
{ m_Size = wx_truncate_cast(wxUint64, size); }
void SetOffset(wxFileOffset offset)
{ m_Offset = wx_truncate_cast(wxUint32, offset); }
{ m_Offset = wx_truncate_cast(wxUint64, offset); }
void SetComment(const wxString& comment)
{ m_Comment = comment; }
@@ -1217,12 +1354,12 @@ public:
bool Write(wxOutputStream& stream, wxMBConv& conv) const;
private:
wxUint16 m_DiskNumber;
wxUint16 m_StartDisk;
wxUint16 m_EntriesHere;
wxUint16 m_TotalEntries;
wxUint32 m_Size;
wxUint32 m_Offset;
wxUint32 m_DiskNumber;
wxUint32 m_StartDisk;
wxUint64 m_EntriesHere;
wxUint64 m_TotalEntries;
wxUint64 m_Size;
wxUint64 m_Offset;
wxString m_Comment;
};
@@ -1245,8 +1382,41 @@ bool wxZipEndRec::Write(wxOutputStream& stream, wxMBConv& conv) const
wxDataOutputStream ds(stream);
ds << END_MAGIC << m_DiskNumber << m_StartDisk << m_EntriesHere
<< m_TotalEntries << m_Size << m_Offset << commentLen;
// Check if zip64 is required
if (m_DiskNumber > 0xffff || m_StartDisk > 0xffff ||
m_EntriesHere > 0xffff || m_TotalEntries > 0xffff ||
m_Size > 0xffffffff || m_Offset > 0xffffffff)
{
// Write zip64 end of central directory record
wxFileOffset z64endOffset = stream.TellO();
ds.Write32(Z64_END_MAGIC);
ds.Write64(static_cast<wxUint64>(Z64_END_SIZE - 12)); // size of zip64 end record
ds.Write16(Z64_VERSION_NEEDED_TO_EXTRACT);
ds.Write16(Z64_VERSION_NEEDED_TO_EXTRACT);
ds.Write32(m_DiskNumber);
ds.Write32(m_StartDisk);
ds.Write64(m_EntriesHere);
ds.Write64(m_TotalEntries);
ds.Write64(m_Size);
ds.Write64(m_Offset);
// Write zip64 end of central directory locator
ds.Write32(Z64_LOC_MAGIC);
ds.Write32(m_StartDisk);
ds.Write64(z64endOffset);
ds.Write32(1); // total number of disks
}
ds << END_MAGIC;
ds.Write16(LimitUint16(m_DiskNumber));
ds.Write16(LimitUint16(m_StartDisk));
ds.Write16(LimitUint16(m_EntriesHere));
ds.Write16(LimitUint16(m_TotalEntries));
ds.Write32(LimitUint32(m_Size));
ds.Write32(LimitUint32(m_Offset));
ds << commentLen;
stream.Write(comment, commentLen);
@@ -1261,8 +1431,13 @@ bool wxZipEndRec::Read(wxInputStream& stream, wxMBConv& conv)
wxUint16 commentLen;
ds >> m_DiskNumber >> m_StartDisk >> m_EntriesHere
>> m_TotalEntries >> m_Size >> m_Offset >> commentLen;
m_DiskNumber = ds.Read16();
m_StartDisk = ds.Read16();
m_EntriesHere = ds.Read16();
m_TotalEntries = ds.Read16();
m_Size = ds.Read32();
m_Offset = ds.Read32();
ds >> commentLen;
if (commentLen) {
m_Comment = ReadString(stream, commentLen, conv);
@@ -1276,6 +1451,34 @@ bool wxZipEndRec::Read(wxInputStream& stream, wxMBConv& conv)
wxLogWarning(_("assuming this is a multi-part zip concatenated"));
}
// Look for zip64 end record
stream.SeekI(-(END_SIZE+Z64_LOC_SIZE), wxFromCurrent);
wxZipHeader dsLoc(stream, Z64_LOC_SIZE);
if ( dsLoc && dsLoc.Read32() == Z64_LOC_MAGIC )
{
// Found zip64 locator, read z64 directory
dsLoc.Read32(); // skip: disk with the start of the zip64
wxUint64 z64EndOffset = dsLoc.Read64();
// Read zip64 end of central directory record
if (stream.SeekI(z64EndOffset) == wxInvalidOffset)
return false;
wxZipHeader dsEnd(stream, Z64_END_SIZE);
if ( dsEnd.Read32() != Z64_END_MAGIC ||
dsEnd.Read64() < Z64_END_SIZE - 12 ) // Check record size
return false;
dsEnd.Read16(); // skip: version made by
dsEnd.Read16(); // skip: version needed to extract
m_DiskNumber = dsEnd.Read32();
m_StartDisk = dsEnd.Read32();
m_EntriesHere = dsEnd.Read64();
m_TotalEntries = dsEnd.Read64();
m_Size = dsEnd.Read64();
m_Offset = dsEnd.Read64();
}
return true;
}
@@ -1536,7 +1739,7 @@ wxStreamError wxZipInputStream::ReadCentral()
if (!AtHeader())
CloseEntry();
if (m_signature == END_MAGIC)
if (m_signature == END_MAGIC || m_signature == Z64_END_MAGIC)
return wxSTREAM_EOF;
if (m_signature != CENTRAL_MAGIC) {
@@ -1571,7 +1774,8 @@ wxStreamError wxZipInputStream::ReadLocal(bool readEndRec /*=false*/)
if (!m_signature)
m_signature = ReadSignature();
if (m_signature == CENTRAL_MAGIC || m_signature == END_MAGIC) {
if (m_signature == CENTRAL_MAGIC || m_signature == END_MAGIC ||
m_signature == Z64_END_MAGIC) {
if (m_streamlink && !m_streamlink->GetOutputStream()) {
m_streamlink->Release(this);
m_streamlink = NULL;
@@ -1604,7 +1808,7 @@ wxStreamError wxZipInputStream::ReadLocal(bool readEndRec /*=false*/)
m_signature = ReadSignature();
}
if (m_signature == END_MAGIC) {
if (m_signature == END_MAGIC || m_signature == Z64_END_MAGIC) {
if (readEndRec || m_streamlink) {
wxZipEndRec endrec;
endrec.Read(*m_parent_i_stream, GetConv());