diff --git a/docs/changes.txt b/docs/changes.txt index 46febbaceb..cafb0c207c 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -49,7 +49,7 @@ Changes in behaviour which may result in build errors All: -- Add UTF-8 support to wxZip{Input,Output}Stream (Tobias Taschner). +- Add UTF-8 and ZIP 64 support to wxZip{Input,Output}Stream (Tobias Taschner). - Allow calling wxItemContainer::Add() and similar with std::vector<> argument. - Add "%z" support to printf()-like functions like wxString::Format() (RIVDSL). - Add DOCTYPE support to wxXmlDocument (Nick Matthews). diff --git a/include/wx/zipstrm.h b/include/wx/zipstrm.h index 9d66603aae..4866535bef 100644 --- a/include/wx/zipstrm.h +++ b/include/wx/zipstrm.h @@ -234,6 +234,8 @@ private: size_t WriteDescriptor(wxOutputStream& stream, wxUint32 crc, wxFileOffset compressedSize, wxFileOffset size); + bool LoadExtraInfo(const char* extraData, wxUint16 extraLen, bool localInfo); + wxUint8 m_SystemMadeBy; // one of enum wxZipSystem wxUint8 m_VersionMadeBy; // major * 10 + minor diff --git a/src/common/zipstrm.cpp b/src/common/zipstrm.cpp index 167e8abc51..7003836a93 100644 --- a/src/common/zipstrm.cpp +++ b/src/common/zipstrm.cpp @@ -33,7 +33,8 @@ // value for the 'version needed to extract' field (20 means 2.0) enum { - VERSION_NEEDED_TO_EXTRACT = 20 + VERSION_NEEDED_TO_EXTRACT = 20, + Z64_VERSION_NEEDED_TO_EXTRACT = 45 // File uses ZIP64 format extensions }; // signatures for the various records (PKxx) @@ -41,7 +42,9 @@ enum { CENTRAL_MAGIC = 0x02014b50, // central directory record LOCAL_MAGIC = 0x04034b50, // local header END_MAGIC = 0x06054b50, // end of central directory record - SUMS_MAGIC = 0x08074b50 // data descriptor (info-zip) + SUMS_MAGIC = 0x08074b50, // data descriptor (info-zip) + Z64_LOC_MAGIC = 0x07064b50, // zip64 end of central directory locator + Z64_END_MAGIC = 0x06064b50 // zip64 end of central directory record }; // unix file attributes. zip stores them in the high 16 bits of the @@ -57,7 +60,9 @@ enum { CENTRAL_SIZE = 46, LOCAL_SIZE = 30, END_SIZE = 22, - SUMS_SIZE = 12 + SUMS_SIZE = 12, + Z64_LOC_SIZE = 20, + Z64_END_SIZE = 56 }; // The number of bytes that must be written to an wxZipOutputStream before @@ -103,6 +108,34 @@ static wxString ReadString(wxInputStream& stream, wxUint16 len, wxMBConv& conv) return str; } +static inline wxUint16 LimitUint16(wxUint64 value) +{ + if (value > 0xffff) + return 0xffff; + else + return wx_truncate_cast(wxUint16, value); +} + +static inline wxUint32 LimitUint32(wxUint64 value) +{ + if (value > 0xffffffff) + return 0xffffffff; + else + return wx_truncate_cast(wxUint32,value); +} + +// Decode a little endian wxUint64 number from a character array +// +static inline wxUint64 CrackUint64(const char *m) +{ + const unsigned char *n = (const unsigned char*)m; + return (static_cast(n[7]) << 56) | + (static_cast(n[6]) << 48) | + (static_cast(n[5]) << 40) | + (static_cast(n[4]) << 32) | + (n[3] << 24) | (n[2] << 16) | (n[1] << 8) | n[0]; +} + // Decode a little endian wxUint32 number from a character array // static inline wxUint32 CrackUint32(const char *m) @@ -171,10 +204,12 @@ class wxZipHeader { public: wxZipHeader(wxInputStream& stream, size_t size); + wxZipHeader(const char* data, size_t size); inline wxUint8 Read8(); inline wxUint16 Read16(); inline wxUint32 Read32(); + inline wxUint64 Read64(); const char *GetData() const { return m_data; } size_t GetSize() const { return m_size; } @@ -186,6 +221,7 @@ public: wxZipHeader& operator>>(wxUint8& n) { n = Read8(); return *this; } wxZipHeader& operator>>(wxUint16& n) { n = Read16(); return *this; } wxZipHeader& operator>>(wxUint32& n) { n = Read32(); return *this; } + wxZipHeader& operator>>(wxUint64& n) { n = Read64(); return *this; } private: char m_data[64]; @@ -204,6 +240,15 @@ wxZipHeader::wxZipHeader(wxInputStream& stream, size_t size) m_ok = m_size == size; } +wxZipHeader::wxZipHeader(const char* data, size_t size) + : m_size(size), + m_pos(0), + m_ok(true) +{ + wxCHECK_RET(size <= sizeof(m_data), wxT("buffer too small")); + memcpy(m_data, data, size); +} + inline wxUint8 wxZipHeader::Read8() { wxASSERT(m_pos < m_size); @@ -226,6 +271,13 @@ inline wxUint32 wxZipHeader::Read32() return n; } +inline wxUint64 wxZipHeader::Read64() +{ + wxASSERT(m_pos + 8 <= m_size); + wxUint64 n = CrackUint64(m_data + m_pos); + m_pos += 8; + return n; +} ///////////////////////////////////////////////////////////////////////////// // Stored input stream @@ -962,6 +1014,30 @@ void wxZipEntry::UnsetNotifier() m_zipnotifier = NULL; } +bool wxZipEntry::LoadExtraInfo(const char* extraData, wxUint16 extraLen, bool localInfo) +{ + wxZipHeader ds(extraData, extraLen); + + // A file may contain larger size, compressed size or offset + // in a zip64 extra data block. Use the 64 bit values if available + if ( extraLen > 4 && ds.Read16() == 1 ) + { + ds.Read16(); // skip record size + if ( m_Size == 0xffffffff ) + m_Size = ds.Read64(); + if ( m_CompressedSize == 0xffffffff ) + m_CompressedSize = ds.Read64(); + if ( !localInfo && m_Offset == 0xffffffff ) + m_Offset = ds.Read64(); + + // extraInfo was used and parsed + return true; + } + + // extraInfo had unknown format + return false; +} + size_t wxZipEntry::ReadLocal(wxInputStream& stream, wxMBConv& conv) { wxUint16 nameLen, extraLen; @@ -996,6 +1072,12 @@ size_t wxZipEntry::ReadLocal(wxInputStream& stream, wxMBConv& conv) stream.Read(m_LocalExtra->GetData(), extraLen); if (stream.LastRead() != extraLen + 0u) return 0; + + if (LoadExtraInfo(m_Extra->GetData(), extraLen, true)) + { + Release(m_LocalExtra); + m_LocalExtra = NULL; + } } } @@ -1010,24 +1092,37 @@ size_t wxZipEntry::WriteLocal(wxOutputStream& stream, wxMBConv& conv) const if (!name) name = ""; wxUint16 nameLen = wx_truncate_cast(wxUint16, strlen(name)); + bool z64Required = m_CompressedSize > 0xffffffff || m_Size > 0xffffffff; + wxUint16 versionNeeded = + (z64Required) ? Z64_VERSION_NEEDED_TO_EXTRACT : m_VersionNeeded; + wxDataOutputStream ds(stream); - ds << m_VersionNeeded << m_Flags << m_Method; + ds << versionNeeded << m_Flags << m_Method; ds.Write32(GetDateTime().GetAsDOS()); ds.Write32(m_Crc); ds.Write32(m_CompressedSize != wxInvalidOffset ? - wx_truncate_cast(wxUint32, m_CompressedSize) : 0); + LimitUint32(m_CompressedSize) : 0); ds.Write32(m_Size != wxInvalidOffset ? - wx_truncate_cast(wxUint32, m_Size) : 0); + LimitUint32(m_Size) : 0); ds << nameLen; wxUint16 extraLen = wx_truncate_cast(wxUint16, GetLocalExtraLen()); + if (z64Required) + extraLen += 20; // tag and 2x64bit file sizes ds.Write16(extraLen); stream.Write(name, nameLen); - if (extraLen) - stream.Write(m_LocalExtra->GetData(), extraLen); + if (z64Required) + { + ds.Write16(1); // id + ds.Write16(16); // record size + ds.Write64(m_CompressedSize); + ds.Write64(m_Size); + } + if (GetLocalExtraLen()) + stream.Write(m_LocalExtra->GetData(), GetLocalExtraLen()); return LOCAL_SIZE + nameLen + extraLen; } @@ -1066,6 +1161,12 @@ size_t wxZipEntry::ReadCentral(wxInputStream& stream, wxMBConv& conv) stream.Read(m_Extra->GetData(), extraLen); if (stream.LastRead() != extraLen + 0u) return 0; + + if (LoadExtraInfo(m_Extra->GetData(), extraLen, false)) + { + Release(m_Extra); + m_Extra = NULL; + } } } @@ -1094,27 +1195,63 @@ size_t wxZipEntry::WriteCentral(wxOutputStream& stream, wxMBConv& conv) const wxUint16 commentLen = wx_truncate_cast(wxUint16, strlen(comment)); wxUint16 extraLen = wx_truncate_cast(wxUint16, GetExtraLen()); + wxUint16 z64InfoLen = 0; + + bool z64Required = false; + if ( m_CompressedSize > 0xffffffff ) + { + z64Required = true; + z64InfoLen += 8; + } + if ( m_Size > 0xffffffff ) + { + z64Required = true; + z64InfoLen += 8; + } + if ( m_Offset > 0xffffffff ) + { + z64Required = true; + z64InfoLen += 8; + } + if (z64Required) + { + extraLen += 4 + z64InfoLen; + } + + wxUint16 versionNeeded = + (z64Required) ? Z64_VERSION_NEEDED_TO_EXTRACT : GetVersionNeeded(); wxDataOutputStream ds(stream); ds << CENTRAL_MAGIC << m_VersionMadeBy << m_SystemMadeBy; - ds.Write16(wx_truncate_cast(wxUint16, GetVersionNeeded())); + ds.Write16(versionNeeded); ds.Write16(wx_truncate_cast(wxUint16, GetFlags())); ds.Write16(wx_truncate_cast(wxUint16, GetMethod())); ds.Write32(GetDateTime().GetAsDOS()); ds.Write32(GetCrc()); - ds.Write32(wx_truncate_cast(wxUint32, GetCompressedSize())); - ds.Write32(wx_truncate_cast(wxUint32, GetSize())); + ds.Write32(LimitUint32(GetCompressedSize())); + ds.Write32(LimitUint32(GetSize())); ds.Write16(nameLen); ds.Write16(extraLen); ds << commentLen << m_DiskStart << m_InternalAttributes - << m_ExternalAttributes << wx_truncate_cast(wxUint32, GetOffset()); + << m_ExternalAttributes << LimitUint32(GetOffset()); stream.Write(name, nameLen); - if (extraLen) - stream.Write(GetExtra(), extraLen); + if (z64Required) + { + ds.Write16(1); // tag + ds.Write16(z64InfoLen); // record size + if (m_CompressedSize > 0xffffffff) + ds.Write64(m_CompressedSize); + if (m_Size > 0xffffffff) + ds.Write64(m_Size); + if (m_Offset > 0xffffffff) + ds.Write64(m_Offset); + } + if (GetExtraLen()) + stream.Write(GetExtra(), GetExtraLen()); stream.Write(comment, commentLen); return CENTRAL_SIZE + nameLen + extraLen + commentLen; @@ -1199,17 +1336,17 @@ public: wxString GetComment() const { return m_Comment; } void SetDiskNumber(int num) - { m_DiskNumber = wx_truncate_cast(wxUint16, num); } + { m_DiskNumber = wx_truncate_cast(wxUint32, num); } void SetStartDisk(int num) - { m_StartDisk = wx_truncate_cast(wxUint16, num); } + { m_StartDisk = wx_truncate_cast(wxUint32, num); } void SetEntriesHere(int num) - { m_EntriesHere = wx_truncate_cast(wxUint16, num); } + { m_EntriesHere = wx_truncate_cast(wxUint32, num); } void SetTotalEntries(int num) - { m_TotalEntries = wx_truncate_cast(wxUint16, num); } + { m_TotalEntries = wx_truncate_cast(wxUint32, num); } void SetSize(wxFileOffset size) - { m_Size = wx_truncate_cast(wxUint32, size); } + { m_Size = wx_truncate_cast(wxUint64, size); } void SetOffset(wxFileOffset offset) - { m_Offset = wx_truncate_cast(wxUint32, offset); } + { m_Offset = wx_truncate_cast(wxUint64, offset); } void SetComment(const wxString& comment) { m_Comment = comment; } @@ -1217,12 +1354,12 @@ public: bool Write(wxOutputStream& stream, wxMBConv& conv) const; private: - wxUint16 m_DiskNumber; - wxUint16 m_StartDisk; - wxUint16 m_EntriesHere; - wxUint16 m_TotalEntries; - wxUint32 m_Size; - wxUint32 m_Offset; + wxUint32 m_DiskNumber; + wxUint32 m_StartDisk; + wxUint64 m_EntriesHere; + wxUint64 m_TotalEntries; + wxUint64 m_Size; + wxUint64 m_Offset; wxString m_Comment; }; @@ -1245,8 +1382,41 @@ bool wxZipEndRec::Write(wxOutputStream& stream, wxMBConv& conv) const wxDataOutputStream ds(stream); - ds << END_MAGIC << m_DiskNumber << m_StartDisk << m_EntriesHere - << m_TotalEntries << m_Size << m_Offset << commentLen; + // Check if zip64 is required + if (m_DiskNumber > 0xffff || m_StartDisk > 0xffff || + m_EntriesHere > 0xffff || m_TotalEntries > 0xffff || + m_Size > 0xffffffff || m_Offset > 0xffffffff) + { + // Write zip64 end of central directory record + wxFileOffset z64endOffset = stream.TellO(); + ds.Write32(Z64_END_MAGIC); + ds.Write64(static_cast(Z64_END_SIZE - 12)); // size of zip64 end record + ds.Write16(Z64_VERSION_NEEDED_TO_EXTRACT); + ds.Write16(Z64_VERSION_NEEDED_TO_EXTRACT); + ds.Write32(m_DiskNumber); + ds.Write32(m_StartDisk); + ds.Write64(m_EntriesHere); + ds.Write64(m_TotalEntries); + ds.Write64(m_Size); + ds.Write64(m_Offset); + + // Write zip64 end of central directory locator + ds.Write32(Z64_LOC_MAGIC); + ds.Write32(m_StartDisk); + ds.Write64(z64endOffset); + ds.Write32(1); // total number of disks + } + + ds << END_MAGIC; + + ds.Write16(LimitUint16(m_DiskNumber)); + ds.Write16(LimitUint16(m_StartDisk)); + ds.Write16(LimitUint16(m_EntriesHere)); + ds.Write16(LimitUint16(m_TotalEntries)); + ds.Write32(LimitUint32(m_Size)); + ds.Write32(LimitUint32(m_Offset)); + + ds << commentLen; stream.Write(comment, commentLen); @@ -1261,8 +1431,13 @@ bool wxZipEndRec::Read(wxInputStream& stream, wxMBConv& conv) wxUint16 commentLen; - ds >> m_DiskNumber >> m_StartDisk >> m_EntriesHere - >> m_TotalEntries >> m_Size >> m_Offset >> commentLen; + m_DiskNumber = ds.Read16(); + m_StartDisk = ds.Read16(); + m_EntriesHere = ds.Read16(); + m_TotalEntries = ds.Read16(); + m_Size = ds.Read32(); + m_Offset = ds.Read32(); + ds >> commentLen; if (commentLen) { m_Comment = ReadString(stream, commentLen, conv); @@ -1276,6 +1451,34 @@ bool wxZipEndRec::Read(wxInputStream& stream, wxMBConv& conv) wxLogWarning(_("assuming this is a multi-part zip concatenated")); } + // Look for zip64 end record + stream.SeekI(-(END_SIZE+Z64_LOC_SIZE), wxFromCurrent); + wxZipHeader dsLoc(stream, Z64_LOC_SIZE); + if ( dsLoc && dsLoc.Read32() == Z64_LOC_MAGIC ) + { + // Found zip64 locator, read z64 directory + dsLoc.Read32(); // skip: disk with the start of the zip64 + wxUint64 z64EndOffset = dsLoc.Read64(); + + // Read zip64 end of central directory record + if (stream.SeekI(z64EndOffset) == wxInvalidOffset) + return false; + wxZipHeader dsEnd(stream, Z64_END_SIZE); + if ( dsEnd.Read32() != Z64_END_MAGIC || + dsEnd.Read64() < Z64_END_SIZE - 12 ) // Check record size + return false; + + dsEnd.Read16(); // skip: version made by + dsEnd.Read16(); // skip: version needed to extract + + m_DiskNumber = dsEnd.Read32(); + m_StartDisk = dsEnd.Read32(); + m_EntriesHere = dsEnd.Read64(); + m_TotalEntries = dsEnd.Read64(); + m_Size = dsEnd.Read64(); + m_Offset = dsEnd.Read64(); + } + return true; } @@ -1536,7 +1739,7 @@ wxStreamError wxZipInputStream::ReadCentral() if (!AtHeader()) CloseEntry(); - if (m_signature == END_MAGIC) + if (m_signature == END_MAGIC || m_signature == Z64_END_MAGIC) return wxSTREAM_EOF; if (m_signature != CENTRAL_MAGIC) { @@ -1571,7 +1774,8 @@ wxStreamError wxZipInputStream::ReadLocal(bool readEndRec /*=false*/) if (!m_signature) m_signature = ReadSignature(); - if (m_signature == CENTRAL_MAGIC || m_signature == END_MAGIC) { + if (m_signature == CENTRAL_MAGIC || m_signature == END_MAGIC || + m_signature == Z64_END_MAGIC) { if (m_streamlink && !m_streamlink->GetOutputStream()) { m_streamlink->Release(this); m_streamlink = NULL; @@ -1604,7 +1808,7 @@ wxStreamError wxZipInputStream::ReadLocal(bool readEndRec /*=false*/) m_signature = ReadSignature(); } - if (m_signature == END_MAGIC) { + if (m_signature == END_MAGIC || m_signature == Z64_END_MAGIC) { if (readEndRec || m_streamlink) { wxZipEndRec endrec; endrec.Read(*m_parent_i_stream, GetConv());