From b9d46f07191f15e83b593edc5cbda70b62d0830e Mon Sep 17 00:00:00 2001 From: Tobias Taschner Date: Thu, 13 Aug 2015 14:27:24 +0200 Subject: [PATCH] Add ZIP64 support to wxZipInputStream and wxZipOutputStream. Zip archives with sizes larger 4GB or containing files larger than 4GB or more than 65k files are saved in ZIP64 format which adds a few additional footers and extra fields to allow to exceed these limits. This implements the PKWARE specification available at: https://www.pkware.com/support/zip-app-note It has been tested for compatibility with Windows internal ZIP folders, OSX Archive Utility and 7-zip. Closes https://github.com/wxWidgets/wxWidgets/pull/72 --- docs/changes.txt | 2 +- include/wx/zipstrm.h | 2 + src/common/zipstrm.cpp | 270 ++++++++++++++++++++++++++++++++++++----- 3 files changed, 240 insertions(+), 34 deletions(-) diff --git a/docs/changes.txt b/docs/changes.txt index 46febbaceb..cafb0c207c 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -49,7 +49,7 @@ Changes in behaviour which may result in build errors All: -- Add UTF-8 support to wxZip{Input,Output}Stream (Tobias Taschner). +- Add UTF-8 and ZIP 64 support to wxZip{Input,Output}Stream (Tobias Taschner). - Allow calling wxItemContainer::Add() and similar with std::vector<> argument. - Add "%z" support to printf()-like functions like wxString::Format() (RIVDSL). - Add DOCTYPE support to wxXmlDocument (Nick Matthews). diff --git a/include/wx/zipstrm.h b/include/wx/zipstrm.h index 9d66603aae..4866535bef 100644 --- a/include/wx/zipstrm.h +++ b/include/wx/zipstrm.h @@ -234,6 +234,8 @@ private: size_t WriteDescriptor(wxOutputStream& stream, wxUint32 crc, wxFileOffset compressedSize, wxFileOffset size); + bool LoadExtraInfo(const char* extraData, wxUint16 extraLen, bool localInfo); + wxUint8 m_SystemMadeBy; // one of enum wxZipSystem wxUint8 m_VersionMadeBy; // major * 10 + minor diff --git a/src/common/zipstrm.cpp b/src/common/zipstrm.cpp index 167e8abc51..7003836a93 100644 --- a/src/common/zipstrm.cpp +++ b/src/common/zipstrm.cpp @@ -33,7 +33,8 @@ // value for the 'version needed to extract' field (20 means 2.0) enum { - VERSION_NEEDED_TO_EXTRACT = 20 + VERSION_NEEDED_TO_EXTRACT = 20, + Z64_VERSION_NEEDED_TO_EXTRACT = 45 // File uses ZIP64 format extensions }; // signatures for the various records (PKxx) @@ -41,7 +42,9 @@ enum { CENTRAL_MAGIC = 0x02014b50, // central directory record LOCAL_MAGIC = 0x04034b50, // local header END_MAGIC = 0x06054b50, // end of central directory record - SUMS_MAGIC = 0x08074b50 // data descriptor (info-zip) + SUMS_MAGIC = 0x08074b50, // data descriptor (info-zip) + Z64_LOC_MAGIC = 0x07064b50, // zip64 end of central directory locator + Z64_END_MAGIC = 0x06064b50 // zip64 end of central directory record }; // unix file attributes. zip stores them in the high 16 bits of the @@ -57,7 +60,9 @@ enum { CENTRAL_SIZE = 46, LOCAL_SIZE = 30, END_SIZE = 22, - SUMS_SIZE = 12 + SUMS_SIZE = 12, + Z64_LOC_SIZE = 20, + Z64_END_SIZE = 56 }; // The number of bytes that must be written to an wxZipOutputStream before @@ -103,6 +108,34 @@ static wxString ReadString(wxInputStream& stream, wxUint16 len, wxMBConv& conv) return str; } +static inline wxUint16 LimitUint16(wxUint64 value) +{ + if (value > 0xffff) + return 0xffff; + else + return wx_truncate_cast(wxUint16, value); +} + +static inline wxUint32 LimitUint32(wxUint64 value) +{ + if (value > 0xffffffff) + return 0xffffffff; + else + return wx_truncate_cast(wxUint32,value); +} + +// Decode a little endian wxUint64 number from a character array +// +static inline wxUint64 CrackUint64(const char *m) +{ + const unsigned char *n = (const unsigned char*)m; + return (static_cast(n[7]) << 56) | + (static_cast(n[6]) << 48) | + (static_cast(n[5]) << 40) | + (static_cast(n[4]) << 32) | + (n[3] << 24) | (n[2] << 16) | (n[1] << 8) | n[0]; +} + // Decode a little endian wxUint32 number from a character array // static inline wxUint32 CrackUint32(const char *m) @@ -171,10 +204,12 @@ class wxZipHeader { public: wxZipHeader(wxInputStream& stream, size_t size); + wxZipHeader(const char* data, size_t size); inline wxUint8 Read8(); inline wxUint16 Read16(); inline wxUint32 Read32(); + inline wxUint64 Read64(); const char *GetData() const { return m_data; } size_t GetSize() const { return m_size; } @@ -186,6 +221,7 @@ public: wxZipHeader& operator>>(wxUint8& n) { n = Read8(); return *this; } wxZipHeader& operator>>(wxUint16& n) { n = Read16(); return *this; } wxZipHeader& operator>>(wxUint32& n) { n = Read32(); return *this; } + wxZipHeader& operator>>(wxUint64& n) { n = Read64(); return *this; } private: char m_data[64]; @@ -204,6 +240,15 @@ wxZipHeader::wxZipHeader(wxInputStream& stream, size_t size) m_ok = m_size == size; } +wxZipHeader::wxZipHeader(const char* data, size_t size) + : m_size(size), + m_pos(0), + m_ok(true) +{ + wxCHECK_RET(size <= sizeof(m_data), wxT("buffer too small")); + memcpy(m_data, data, size); +} + inline wxUint8 wxZipHeader::Read8() { wxASSERT(m_pos < m_size); @@ -226,6 +271,13 @@ inline wxUint32 wxZipHeader::Read32() return n; } +inline wxUint64 wxZipHeader::Read64() +{ + wxASSERT(m_pos + 8 <= m_size); + wxUint64 n = CrackUint64(m_data + m_pos); + m_pos += 8; + return n; +} ///////////////////////////////////////////////////////////////////////////// // Stored input stream @@ -962,6 +1014,30 @@ void wxZipEntry::UnsetNotifier() m_zipnotifier = NULL; } +bool wxZipEntry::LoadExtraInfo(const char* extraData, wxUint16 extraLen, bool localInfo) +{ + wxZipHeader ds(extraData, extraLen); + + // A file may contain larger size, compressed size or offset + // in a zip64 extra data block. Use the 64 bit values if available + if ( extraLen > 4 && ds.Read16() == 1 ) + { + ds.Read16(); // skip record size + if ( m_Size == 0xffffffff ) + m_Size = ds.Read64(); + if ( m_CompressedSize == 0xffffffff ) + m_CompressedSize = ds.Read64(); + if ( !localInfo && m_Offset == 0xffffffff ) + m_Offset = ds.Read64(); + + // extraInfo was used and parsed + return true; + } + + // extraInfo had unknown format + return false; +} + size_t wxZipEntry::ReadLocal(wxInputStream& stream, wxMBConv& conv) { wxUint16 nameLen, extraLen; @@ -996,6 +1072,12 @@ size_t wxZipEntry::ReadLocal(wxInputStream& stream, wxMBConv& conv) stream.Read(m_LocalExtra->GetData(), extraLen); if (stream.LastRead() != extraLen + 0u) return 0; + + if (LoadExtraInfo(m_Extra->GetData(), extraLen, true)) + { + Release(m_LocalExtra); + m_LocalExtra = NULL; + } } } @@ -1010,24 +1092,37 @@ size_t wxZipEntry::WriteLocal(wxOutputStream& stream, wxMBConv& conv) const if (!name) name = ""; wxUint16 nameLen = wx_truncate_cast(wxUint16, strlen(name)); + bool z64Required = m_CompressedSize > 0xffffffff || m_Size > 0xffffffff; + wxUint16 versionNeeded = + (z64Required) ? Z64_VERSION_NEEDED_TO_EXTRACT : m_VersionNeeded; + wxDataOutputStream ds(stream); - ds << m_VersionNeeded << m_Flags << m_Method; + ds << versionNeeded << m_Flags << m_Method; ds.Write32(GetDateTime().GetAsDOS()); ds.Write32(m_Crc); ds.Write32(m_CompressedSize != wxInvalidOffset ? - wx_truncate_cast(wxUint32, m_CompressedSize) : 0); + LimitUint32(m_CompressedSize) : 0); ds.Write32(m_Size != wxInvalidOffset ? - wx_truncate_cast(wxUint32, m_Size) : 0); + LimitUint32(m_Size) : 0); ds << nameLen; wxUint16 extraLen = wx_truncate_cast(wxUint16, GetLocalExtraLen()); + if (z64Required) + extraLen += 20; // tag and 2x64bit file sizes ds.Write16(extraLen); stream.Write(name, nameLen); - if (extraLen) - stream.Write(m_LocalExtra->GetData(), extraLen); + if (z64Required) + { + ds.Write16(1); // id + ds.Write16(16); // record size + ds.Write64(m_CompressedSize); + ds.Write64(m_Size); + } + if (GetLocalExtraLen()) + stream.Write(m_LocalExtra->GetData(), GetLocalExtraLen()); return LOCAL_SIZE + nameLen + extraLen; } @@ -1066,6 +1161,12 @@ size_t wxZipEntry::ReadCentral(wxInputStream& stream, wxMBConv& conv) stream.Read(m_Extra->GetData(), extraLen); if (stream.LastRead() != extraLen + 0u) return 0; + + if (LoadExtraInfo(m_Extra->GetData(), extraLen, false)) + { + Release(m_Extra); + m_Extra = NULL; + } } } @@ -1094,27 +1195,63 @@ size_t wxZipEntry::WriteCentral(wxOutputStream& stream, wxMBConv& conv) const wxUint16 commentLen = wx_truncate_cast(wxUint16, strlen(comment)); wxUint16 extraLen = wx_truncate_cast(wxUint16, GetExtraLen()); + wxUint16 z64InfoLen = 0; + + bool z64Required = false; + if ( m_CompressedSize > 0xffffffff ) + { + z64Required = true; + z64InfoLen += 8; + } + if ( m_Size > 0xffffffff ) + { + z64Required = true; + z64InfoLen += 8; + } + if ( m_Offset > 0xffffffff ) + { + z64Required = true; + z64InfoLen += 8; + } + if (z64Required) + { + extraLen += 4 + z64InfoLen; + } + + wxUint16 versionNeeded = + (z64Required) ? Z64_VERSION_NEEDED_TO_EXTRACT : GetVersionNeeded(); wxDataOutputStream ds(stream); ds << CENTRAL_MAGIC << m_VersionMadeBy << m_SystemMadeBy; - ds.Write16(wx_truncate_cast(wxUint16, GetVersionNeeded())); + ds.Write16(versionNeeded); ds.Write16(wx_truncate_cast(wxUint16, GetFlags())); ds.Write16(wx_truncate_cast(wxUint16, GetMethod())); ds.Write32(GetDateTime().GetAsDOS()); ds.Write32(GetCrc()); - ds.Write32(wx_truncate_cast(wxUint32, GetCompressedSize())); - ds.Write32(wx_truncate_cast(wxUint32, GetSize())); + ds.Write32(LimitUint32(GetCompressedSize())); + ds.Write32(LimitUint32(GetSize())); ds.Write16(nameLen); ds.Write16(extraLen); ds << commentLen << m_DiskStart << m_InternalAttributes - << m_ExternalAttributes << wx_truncate_cast(wxUint32, GetOffset()); + << m_ExternalAttributes << LimitUint32(GetOffset()); stream.Write(name, nameLen); - if (extraLen) - stream.Write(GetExtra(), extraLen); + if (z64Required) + { + ds.Write16(1); // tag + ds.Write16(z64InfoLen); // record size + if (m_CompressedSize > 0xffffffff) + ds.Write64(m_CompressedSize); + if (m_Size > 0xffffffff) + ds.Write64(m_Size); + if (m_Offset > 0xffffffff) + ds.Write64(m_Offset); + } + if (GetExtraLen()) + stream.Write(GetExtra(), GetExtraLen()); stream.Write(comment, commentLen); return CENTRAL_SIZE + nameLen + extraLen + commentLen; @@ -1199,17 +1336,17 @@ public: wxString GetComment() const { return m_Comment; } void SetDiskNumber(int num) - { m_DiskNumber = wx_truncate_cast(wxUint16, num); } + { m_DiskNumber = wx_truncate_cast(wxUint32, num); } void SetStartDisk(int num) - { m_StartDisk = wx_truncate_cast(wxUint16, num); } + { m_StartDisk = wx_truncate_cast(wxUint32, num); } void SetEntriesHere(int num) - { m_EntriesHere = wx_truncate_cast(wxUint16, num); } + { m_EntriesHere = wx_truncate_cast(wxUint32, num); } void SetTotalEntries(int num) - { m_TotalEntries = wx_truncate_cast(wxUint16, num); } + { m_TotalEntries = wx_truncate_cast(wxUint32, num); } void SetSize(wxFileOffset size) - { m_Size = wx_truncate_cast(wxUint32, size); } + { m_Size = wx_truncate_cast(wxUint64, size); } void SetOffset(wxFileOffset offset) - { m_Offset = wx_truncate_cast(wxUint32, offset); } + { m_Offset = wx_truncate_cast(wxUint64, offset); } void SetComment(const wxString& comment) { m_Comment = comment; } @@ -1217,12 +1354,12 @@ public: bool Write(wxOutputStream& stream, wxMBConv& conv) const; private: - wxUint16 m_DiskNumber; - wxUint16 m_StartDisk; - wxUint16 m_EntriesHere; - wxUint16 m_TotalEntries; - wxUint32 m_Size; - wxUint32 m_Offset; + wxUint32 m_DiskNumber; + wxUint32 m_StartDisk; + wxUint64 m_EntriesHere; + wxUint64 m_TotalEntries; + wxUint64 m_Size; + wxUint64 m_Offset; wxString m_Comment; }; @@ -1245,8 +1382,41 @@ bool wxZipEndRec::Write(wxOutputStream& stream, wxMBConv& conv) const wxDataOutputStream ds(stream); - ds << END_MAGIC << m_DiskNumber << m_StartDisk << m_EntriesHere - << m_TotalEntries << m_Size << m_Offset << commentLen; + // Check if zip64 is required + if (m_DiskNumber > 0xffff || m_StartDisk > 0xffff || + m_EntriesHere > 0xffff || m_TotalEntries > 0xffff || + m_Size > 0xffffffff || m_Offset > 0xffffffff) + { + // Write zip64 end of central directory record + wxFileOffset z64endOffset = stream.TellO(); + ds.Write32(Z64_END_MAGIC); + ds.Write64(static_cast(Z64_END_SIZE - 12)); // size of zip64 end record + ds.Write16(Z64_VERSION_NEEDED_TO_EXTRACT); + ds.Write16(Z64_VERSION_NEEDED_TO_EXTRACT); + ds.Write32(m_DiskNumber); + ds.Write32(m_StartDisk); + ds.Write64(m_EntriesHere); + ds.Write64(m_TotalEntries); + ds.Write64(m_Size); + ds.Write64(m_Offset); + + // Write zip64 end of central directory locator + ds.Write32(Z64_LOC_MAGIC); + ds.Write32(m_StartDisk); + ds.Write64(z64endOffset); + ds.Write32(1); // total number of disks + } + + ds << END_MAGIC; + + ds.Write16(LimitUint16(m_DiskNumber)); + ds.Write16(LimitUint16(m_StartDisk)); + ds.Write16(LimitUint16(m_EntriesHere)); + ds.Write16(LimitUint16(m_TotalEntries)); + ds.Write32(LimitUint32(m_Size)); + ds.Write32(LimitUint32(m_Offset)); + + ds << commentLen; stream.Write(comment, commentLen); @@ -1261,8 +1431,13 @@ bool wxZipEndRec::Read(wxInputStream& stream, wxMBConv& conv) wxUint16 commentLen; - ds >> m_DiskNumber >> m_StartDisk >> m_EntriesHere - >> m_TotalEntries >> m_Size >> m_Offset >> commentLen; + m_DiskNumber = ds.Read16(); + m_StartDisk = ds.Read16(); + m_EntriesHere = ds.Read16(); + m_TotalEntries = ds.Read16(); + m_Size = ds.Read32(); + m_Offset = ds.Read32(); + ds >> commentLen; if (commentLen) { m_Comment = ReadString(stream, commentLen, conv); @@ -1276,6 +1451,34 @@ bool wxZipEndRec::Read(wxInputStream& stream, wxMBConv& conv) wxLogWarning(_("assuming this is a multi-part zip concatenated")); } + // Look for zip64 end record + stream.SeekI(-(END_SIZE+Z64_LOC_SIZE), wxFromCurrent); + wxZipHeader dsLoc(stream, Z64_LOC_SIZE); + if ( dsLoc && dsLoc.Read32() == Z64_LOC_MAGIC ) + { + // Found zip64 locator, read z64 directory + dsLoc.Read32(); // skip: disk with the start of the zip64 + wxUint64 z64EndOffset = dsLoc.Read64(); + + // Read zip64 end of central directory record + if (stream.SeekI(z64EndOffset) == wxInvalidOffset) + return false; + wxZipHeader dsEnd(stream, Z64_END_SIZE); + if ( dsEnd.Read32() != Z64_END_MAGIC || + dsEnd.Read64() < Z64_END_SIZE - 12 ) // Check record size + return false; + + dsEnd.Read16(); // skip: version made by + dsEnd.Read16(); // skip: version needed to extract + + m_DiskNumber = dsEnd.Read32(); + m_StartDisk = dsEnd.Read32(); + m_EntriesHere = dsEnd.Read64(); + m_TotalEntries = dsEnd.Read64(); + m_Size = dsEnd.Read64(); + m_Offset = dsEnd.Read64(); + } + return true; } @@ -1536,7 +1739,7 @@ wxStreamError wxZipInputStream::ReadCentral() if (!AtHeader()) CloseEntry(); - if (m_signature == END_MAGIC) + if (m_signature == END_MAGIC || m_signature == Z64_END_MAGIC) return wxSTREAM_EOF; if (m_signature != CENTRAL_MAGIC) { @@ -1571,7 +1774,8 @@ wxStreamError wxZipInputStream::ReadLocal(bool readEndRec /*=false*/) if (!m_signature) m_signature = ReadSignature(); - if (m_signature == CENTRAL_MAGIC || m_signature == END_MAGIC) { + if (m_signature == CENTRAL_MAGIC || m_signature == END_MAGIC || + m_signature == Z64_END_MAGIC) { if (m_streamlink && !m_streamlink->GetOutputStream()) { m_streamlink->Release(this); m_streamlink = NULL; @@ -1604,7 +1808,7 @@ wxStreamError wxZipInputStream::ReadLocal(bool readEndRec /*=false*/) m_signature = ReadSignature(); } - if (m_signature == END_MAGIC) { + if (m_signature == END_MAGIC || m_signature == Z64_END_MAGIC) { if (readEndRec || m_streamlink) { wxZipEndRec endrec; endrec.Read(*m_parent_i_stream, GetConv());