/* +----------------------------------------------------------------------+ | HipHop for PHP | +----------------------------------------------------------------------+ | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) | | Copyright (c) 1997-2010 The PHP Group | +----------------------------------------------------------------------+ | This source file is subject to version 3.01 of the PHP license, | | that is bundled with this package in the file LICENSE, and is | | available through the world-wide-web at the following url: | | http://www.php.net/license/3_01.txt | | If you did not receive a copy of the PHP license and are unable to | | obtain it through the world-wide-web, please send a note to | | license@php.net so we can mail you a copy immediately. | +----------------------------------------------------------------------+ */ #include "hphp/util/compression.h" #include "hphp/util/logger.h" #include "hphp/util/exception.h" #define PHP_ZLIB_MODIFIER 1000 #define GZIP_HEADER_LENGTH 10 #define GZIP_FOOTER_LENGTH 8 namespace HPHP { static const int gz_magic[2] = {0x1f, 0x8b}; /* gzip magic header */ /////////////////////////////////////////////////////////////////////////////// // This check_header() function is copied from zlib 1.2.3 and re-factored to // work with in-memory buffers (than file streams). /* zlib.h -- interface of the 'zlib' general purpose compression library version 1.2.3, July 18th, 2005 Copyright (C) 1995-2005 Jean-loup Gailly and Mark Adler This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 3. This notice may not be removed or altered from any source distribution. Jean-loup Gailly Mark Adler jloup@gzip.org madler@alumni.caltech.edu The data format used by the zlib library is described by RFCs (Request for Comments) 1950 to 1952 in the files http://www.ietf.org/rfc/rfc1950.txt (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format). */ /* gzip flag byte */ #define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */ #define HEAD_CRC 0x02 /* bit 1 set: header CRC present */ #define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */ #define ORIG_NAME 0x08 /* bit 3 set: original file name present */ #define COMMENT 0x10 /* bit 4 set: file comment present */ #define RESERVED 0xE0 /* bits 5..7: reserved */ /* =========================================================================== Read a byte from a gz_stream; update next_in and avail_in. Return EOF for end of file. IN assertion: the stream s has been sucessfully opened for reading. */ static int get_byte(z_stream &stream) { if (stream.avail_in == 0) { return EOF; } stream.avail_in--; return *(stream.next_in)++; } /* =========================================================================== Check the gzip header of a gz_stream opened for reading. Set the stream mode to transparent if the gzip magic header is not present; set s->err to Z_DATA_ERROR if the magic header is present but the rest of the header is incorrect. IN assertion: the stream s has already been created sucessfully; s->stream.avail_in is zero for the first time, but may be non-zero for concatenated .gz files. */ static int check_header(z_stream &stream) { int method; /* method byte */ int flags; /* flags byte */ uInt len; int c; /* Assure two bytes in the buffer so we can peek ahead -- handle case where first byte of header is at the end of the buffer after the last gzip segment */ len = stream.avail_in; if (len <= 2) { return Z_DATA_ERROR; } /* Peek ahead to check the gzip magic header */ if (stream.next_in[0] != gz_magic[0] || stream.next_in[1] != gz_magic[1]) { return Z_DATA_ERROR; } stream.avail_in -= 2; stream.next_in += 2; /* Check the rest of the gzip header */ method = get_byte(stream); flags = get_byte(stream); if (method != Z_DEFLATED || (flags & RESERVED) != 0) { return Z_DATA_ERROR; } /* Discard time, xflags and OS code: */ for (len = 0; len < 6; len++) (void)get_byte(stream); if ((flags & EXTRA_FIELD) != 0) { /* skip the extra field */ len = (uInt)get_byte(stream); len += ((uInt)get_byte(stream))<<8; /* len is garbage if EOF but the loop below will quit anyway */ while (len-- != 0 && get_byte(stream) != EOF) ; } if ((flags & ORIG_NAME) != 0) { /* skip the original file name */ while ((c = get_byte(stream)) != 0 && c != EOF) ; } if ((flags & COMMENT) != 0) { /* skip the .gz file comment */ while ((c = get_byte(stream)) != 0 && c != EOF) ; } if ((flags & HEAD_CRC) != 0) { /* skip the header crc */ for (len = 0; len < 2; len++) (void)get_byte(stream); } return stream.avail_in == 0 ? Z_DATA_ERROR : Z_OK; } /////////////////////////////////////////////////////////////////////////////// bool is_compressible_file(const char *filename) { static const char *ext[] = { "gif", "png", "jpeg", "jpg", "tiff", "swf", "zip", "gz", "bz2", "cab", "bmp", "xcf", "mp3", "wav", "rsrc", "ico", "jar", "exe", "dll", "so", }; const char *dot = nullptr; for (const char *p = filename; *p; p++) { if (*p == '.') dot = p; } if (dot) { dot++; for (unsigned int i = 0; i < sizeof(ext)/sizeof(ext[0]); i++) { if (strcmp(dot, ext[i]) == 0) { return false; } } } return true; } /////////////////////////////////////////////////////////////////////////////// // StreamCompressor StreamCompressor::StreamCompressor(int level, int encoding_mode, bool header) : m_encoding(encoding_mode), m_header(header), m_ended(false) { if (level < -1 || level > 9) { throw Exception("compression level(%d) must be within -1..9", level); } if (encoding_mode != CODING_GZIP && encoding_mode != CODING_DEFLATE) { throw Exception("encoding mode must be FORCE_GZIP or FORCE_DEFLATE"); } m_stream.zalloc = Z_NULL; m_stream.zfree = Z_NULL; m_stream.opaque = Z_NULL; m_stream.total_in = 0; m_stream.next_in = Z_NULL; m_stream.avail_in = 0; m_stream.avail_out = 0; m_stream.next_out = Z_NULL; m_crc = crc32(0L, Z_NULL, 0); int status; switch (encoding_mode) { case CODING_GZIP: /* windowBits is passed < 0 to suppress zlib header & trailer */ if ((status = deflateInit2(&m_stream, level, Z_DEFLATED, -MAX_WBITS, MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY)) != Z_OK) { throw Exception("%s", zError(status)); } break; case CODING_DEFLATE: if ((status = deflateInit(&m_stream, level)) != Z_OK) { throw Exception("%s", zError(status)); } break; } } StreamCompressor::~StreamCompressor() { if (!m_ended) { deflateEnd(&m_stream); } } char *StreamCompressor::compress(const char *data, int &len, bool trailer) { // middle chunks should never be zero size assert(len || trailer); m_stream.next_in = (Bytef *)data; m_stream.avail_in = len; m_stream.total_out = 0; m_stream.avail_out = m_stream.avail_in + (m_stream.avail_in / PHP_ZLIB_MODIFIER) + 15 + 1; /* room for \0 */ char *s2 = (char *)malloc (m_stream.avail_out + GZIP_HEADER_LENGTH + ((trailer && m_encoding == CODING_GZIP) ? GZIP_FOOTER_LENGTH : 0)); /* add gzip file header */ bool header = m_header; if (header) { s2[0] = gz_magic[0]; s2[1] = gz_magic[1]; s2[2] = Z_DEFLATED; s2[3] = s2[4] = s2[5] = s2[6] = s2[7] = s2[8] = 0; /* time set to 0 */ s2[9] = 0x03; // OS_CODE m_stream.next_out = (Bytef*)&(s2[GZIP_HEADER_LENGTH]); m_header = false; // only the 1st chunnk got it } else { m_stream.next_out = (Bytef*)s2; } int status = deflate(&m_stream, trailer ? Z_FINISH : Z_SYNC_FLUSH); if (status == Z_BUF_ERROR || status == Z_STREAM_END) { status = deflateEnd(&m_stream); m_ended = true; } if (status == Z_OK) { if (len) { m_crc = crc32(m_crc, (const Bytef *)data, len); } int new_len = m_stream.total_out + (header ? GZIP_HEADER_LENGTH : 0); len = new_len; if (trailer && m_encoding == CODING_GZIP) { len += GZIP_FOOTER_LENGTH; char *strailer = s2 + new_len; /* write crc & stream.total_in in LSB order */ strailer[0] = (char) m_crc & 0xFF; strailer[1] = (char) (m_crc >> 8) & 0xFF; strailer[2] = (char) (m_crc >> 16) & 0xFF; strailer[3] = (char) (m_crc >> 24) & 0xFF; strailer[4] = (char) m_stream.total_in & 0xFF; strailer[5] = (char) (m_stream.total_in >> 8) & 0xFF; strailer[6] = (char) (m_stream.total_in >> 16) & 0xFF; strailer[7] = (char) (m_stream.total_in >> 24) & 0xFF; strailer[8] = '\0'; } else { s2[len] = '\0'; } return s2; } free(s2); Logger::Error("%s", zError(status)); return nullptr; } /////////////////////////////////////////////////////////////////////////////// char *gzencode(const char *data, int &len, int level, int encoding_mode) { if (level < -1 || level > 9) { Logger::Warning("compression level(%d) must be within -1..9", level); return nullptr; } if (encoding_mode != CODING_GZIP && encoding_mode != CODING_DEFLATE) { Logger::Warning("encoding mode must be FORCE_GZIP or FORCE_DEFLATE"); return nullptr; } z_stream stream; stream.zalloc = Z_NULL; stream.zfree = Z_NULL; stream.opaque = Z_NULL; stream.next_in = (Bytef *)data; stream.avail_in = len; stream.avail_out = stream.avail_in + (stream.avail_in / PHP_ZLIB_MODIFIER) + 15 + 1; /* room for \0 */ char *s2 = (char *)malloc (stream.avail_out + GZIP_HEADER_LENGTH + (encoding_mode == CODING_GZIP ? GZIP_FOOTER_LENGTH : 0)); if (!s2) { return nullptr; } /* add gzip file header */ s2[0] = gz_magic[0]; s2[1] = gz_magic[1]; s2[2] = Z_DEFLATED; s2[3] = s2[4] = s2[5] = s2[6] = s2[7] = s2[8] = 0; /* time set to 0 */ s2[9] = 0x03; // OS_CODE stream.next_out = (Bytef*)&(s2[GZIP_HEADER_LENGTH]); int status; switch (encoding_mode) { case CODING_GZIP: /* windowBits is passed < 0 to suppress zlib header & trailer */ if ((status = deflateInit2(&stream, level, Z_DEFLATED, -MAX_WBITS, MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY)) != Z_OK) { Logger::Warning("%s", zError(status)); return nullptr; } break; case CODING_DEFLATE: if ((status = deflateInit(&stream, level)) != Z_OK) { Logger::Warning("%s", zError(status)); return nullptr; } break; } status = deflate(&stream, Z_FINISH); if (status != Z_STREAM_END) { deflateEnd(&stream); if (status == Z_OK) { status = Z_BUF_ERROR; } } else { status = deflateEnd(&stream); } if (status == Z_OK) { int old_len = len; len = stream.total_out + GZIP_HEADER_LENGTH + (encoding_mode == CODING_GZIP ? GZIP_FOOTER_LENGTH : 0); /* resize to buffer to the "right" size */ s2 = (char *)realloc(s2, len + 1); assert(s2); if (encoding_mode == CODING_GZIP) { char *trailer = s2 + (stream.total_out + GZIP_HEADER_LENGTH); uLong crc = crc32(0L, Z_NULL, 0); crc = crc32(crc, (const Bytef *)data, old_len); /* write crc & stream.total_in in LSB order */ trailer[0] = (char) crc & 0xFF; trailer[1] = (char) (crc >> 8) & 0xFF; trailer[2] = (char) (crc >> 16) & 0xFF; trailer[3] = (char) (crc >> 24) & 0xFF; trailer[4] = (char) stream.total_in & 0xFF; trailer[5] = (char) (stream.total_in >> 8) & 0xFF; trailer[6] = (char) (stream.total_in >> 16) & 0xFF; trailer[7] = (char) (stream.total_in >> 24) & 0xFF; trailer[8] = '\0'; } else { s2[len] = '\0'; } return s2; } free(s2); Logger::Warning("%s", zError(status)); return nullptr; } char *gzdecode(const char *data, int &len) { z_stream stream; stream.zalloc = (alloc_func) Z_NULL; stream.zfree = (free_func) Z_NULL; unsigned long length; int status; unsigned int factor = 4, maxfactor = 16; char *s1 = nullptr, *s2 = nullptr; do { stream.next_in = (Bytef *)data; stream.avail_in = (uInt)len + 1; /* there is room for \0 */ if (check_header(stream) != Z_OK) { Logger::Warning("gzdecode: header is in wrong format"); return nullptr; } length = len * (1 << factor++); s2 = (char *)realloc(s1, length); if (!s2) { if (s1) free(s1); return nullptr; } s1 = s2; stream.next_out = (Bytef*)s2; stream.avail_out = (uInt)length; /* init with -MAX_WBITS disables the zlib internal headers */ status = inflateInit2(&stream, -MAX_WBITS); if (status == Z_OK) { status = inflate(&stream, Z_FINISH); if (status != Z_STREAM_END) { inflateEnd(&stream); if (status == Z_OK) { status = Z_BUF_ERROR; } } else { status = inflateEnd(&stream); } } } while (status == Z_BUF_ERROR && factor < maxfactor); if (status == Z_OK) { len = stream.total_out; // shrink the buffer down to what we really need since this can be 16 // times greater than we actually need. s2 = (char *)realloc(s2, len + 1); assert(s2); s2[len] = '\0'; return s2; } free(s2); Logger::Warning("%s", zError(status)); return nullptr; } }