633e78e076
This fixes a problem where the gzip_decompressor would fail the CRC check when reading a multipart gzip file that had been written using Z_FULL_FLUSH, and contains an empty part (with a 0 CRC). Including a unit test that exposes the bug.
247 lines
9.0 KiB
C++
247 lines
9.0 KiB
C++
// (C) Copyright 2008 CodeRage, LLC (turkanis at coderage dot com)
|
|
// (C) Copyright 2004-2007 Jonathan Turkanis
|
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt.)
|
|
|
|
// See http://www.boost.org/libs/iostreams for documentation.
|
|
|
|
#include <cstddef>
|
|
#include <string>
|
|
#include <boost/iostreams/copy.hpp>
|
|
#include <boost/iostreams/device/array.hpp>
|
|
#include <boost/iostreams/device/back_inserter.hpp>
|
|
#include <boost/iostreams/filter/gzip.hpp>
|
|
#include <boost/iostreams/filter/test.hpp>
|
|
#include <boost/iostreams/filtering_stream.hpp>
|
|
#include <boost/ref.hpp>
|
|
#include <boost/range/iterator_range.hpp>
|
|
#include <boost/test/test_tools.hpp>
|
|
#include <boost/test/unit_test.hpp>
|
|
#include "detail/sequence.hpp"
|
|
#include "detail/verification.hpp"
|
|
|
|
using namespace boost;
|
|
using namespace boost::iostreams;
|
|
using namespace boost::iostreams::test;
|
|
namespace io = boost::iostreams;
|
|
using boost::unit_test::test_suite;
|
|
|
|
struct gzip_alloc : std::allocator<char> {
|
|
gzip_alloc() { }
|
|
gzip_alloc(const gzip_alloc& other) { }
|
|
template<typename T>
|
|
gzip_alloc(const std::allocator<T>& other) { }
|
|
};
|
|
|
|
void compression_test()
|
|
{
|
|
text_sequence data;
|
|
|
|
// Test compression and decompression with metadata
|
|
for (int i = 0; i < 4; ++i) {
|
|
gzip_params params;
|
|
if (i & 1) {
|
|
params.file_name = "original file name";
|
|
}
|
|
if (i & 2) {
|
|
params.comment = "detailed file description";
|
|
}
|
|
gzip_compressor out(params);
|
|
gzip_decompressor in;
|
|
BOOST_CHECK(
|
|
test_filter_pair( boost::ref(out),
|
|
boost::ref(in),
|
|
std::string(data.begin(), data.end()) )
|
|
);
|
|
BOOST_CHECK(in.file_name() == params.file_name);
|
|
BOOST_CHECK(in.comment() == params.comment);
|
|
}
|
|
|
|
// Test compression and decompression with custom allocator
|
|
BOOST_CHECK(
|
|
test_filter_pair( basic_gzip_compressor<gzip_alloc>(),
|
|
basic_gzip_decompressor<gzip_alloc>(),
|
|
std::string(data.begin(), data.end()) )
|
|
);
|
|
}
|
|
|
|
void multiple_member_test()
|
|
{
|
|
text_sequence data;
|
|
std::vector<char> temp, dest;
|
|
|
|
// Write compressed data to temp, twice in succession
|
|
filtering_ostream out;
|
|
out.push(gzip_compressor());
|
|
out.push(io::back_inserter(temp));
|
|
io::copy(make_iterator_range(data), out);
|
|
out.push(io::back_inserter(temp));
|
|
io::copy(make_iterator_range(data), out);
|
|
|
|
// Read compressed data from temp into dest
|
|
filtering_istream in;
|
|
in.push(gzip_decompressor());
|
|
in.push(array_source(&temp[0], temp.size()));
|
|
io::copy(in, io::back_inserter(dest));
|
|
|
|
// Check that dest consists of two copies of data
|
|
BOOST_REQUIRE_EQUAL(data.size() * 2, dest.size());
|
|
BOOST_CHECK(std::equal(data.begin(), data.end(), dest.begin()));
|
|
BOOST_CHECK(std::equal(data.begin(), data.end(), dest.begin() + dest.size() / 2));
|
|
|
|
dest.clear();
|
|
io::copy(
|
|
array_source(&temp[0], temp.size()),
|
|
io::compose(gzip_decompressor(), io::back_inserter(dest)));
|
|
|
|
// Check that dest consists of two copies of data
|
|
BOOST_REQUIRE_EQUAL(data.size() * 2, dest.size());
|
|
BOOST_CHECK(std::equal(data.begin(), data.end(), dest.begin()));
|
|
BOOST_CHECK(std::equal(data.begin(), data.end(), dest.begin() + dest.size() / 2));
|
|
}
|
|
|
|
void array_source_test()
|
|
{
|
|
std::string data = "simple test string.";
|
|
std::string encoded;
|
|
|
|
filtering_ostream out;
|
|
out.push(gzip_compressor());
|
|
out.push(io::back_inserter(encoded));
|
|
io::copy(make_iterator_range(data), out);
|
|
|
|
std::string res;
|
|
io::array_source src(encoded.data(),encoded.length());
|
|
io::copy(io::compose(io::gzip_decompressor(), src), io::back_inserter(res));
|
|
|
|
BOOST_CHECK_EQUAL(data, res);
|
|
}
|
|
|
|
#if defined(BOOST_MSVC)
|
|
# pragma warning(push)
|
|
# pragma warning(disable:4309) // Truncation of constant value
|
|
#endif
|
|
|
|
void header_test()
|
|
{
|
|
// This test is in response to https://svn.boost.org/trac/boost/ticket/5908
|
|
// which describes a problem parsing gzip headers with extra fields as
|
|
// defined in RFC 1952 (http://www.ietf.org/rfc/rfc1952.txt).
|
|
// The extra field data used here is characteristic of the tabix file
|
|
// format (http://samtools.sourceforge.net/tabix.shtml).
|
|
const char header_bytes[] = {
|
|
static_cast<char>(gzip::magic::id1),
|
|
static_cast<char>(gzip::magic::id2),
|
|
gzip::method::deflate, // Compression Method: deflate
|
|
gzip::flags::extra | gzip::flags::name | gzip::flags::comment, // flags
|
|
'\x22', '\x9c', '\xf3', '\x4e', // 4 byte modification time (little endian)
|
|
gzip::extra_flags::best_compression, // XFL
|
|
gzip::os_unix, // OS
|
|
6, 0, // 2 byte length of extra field (little endian, 6 bytes)
|
|
'B', 'C', 2, 0, 0, 0, // 6 bytes worth of extra field data
|
|
'a', 'b', 'c', 0, // original filename, null terminated
|
|
'n', 'o', ' ', 'c', 'o', 'm', 'm', 'e', 'n', 't', 0, // comment
|
|
};
|
|
size_t sz = sizeof(header_bytes)/sizeof(header_bytes[0]);
|
|
|
|
boost::iostreams::detail::gzip_header hdr;
|
|
for (size_t i = 0; i < sz; ++i) {
|
|
hdr.process(header_bytes[i]);
|
|
|
|
// Require that we are done at the last byte, not before.
|
|
if (i == sz-1)
|
|
BOOST_REQUIRE(hdr.done());
|
|
else
|
|
BOOST_REQUIRE(!hdr.done());
|
|
}
|
|
|
|
BOOST_CHECK_EQUAL("abc", hdr.file_name());
|
|
BOOST_CHECK_EQUAL("no comment", hdr.comment());
|
|
BOOST_CHECK_EQUAL(0x4ef39c22, hdr.mtime());
|
|
BOOST_CHECK_EQUAL(gzip::os_unix, hdr.os());
|
|
}
|
|
|
|
#if defined(BOOST_MSVC)
|
|
# pragma warning(pop)
|
|
#endif
|
|
|
|
void empty_file_test()
|
|
{
|
|
// This test is in response to https://svn.boost.org/trac/boost/ticket/5237
|
|
// The previous implementation of gzip_compressor only wrote the gzip file
|
|
// header when the first bytes of uncompressed input were processed, causing
|
|
// incorrect behavior for empty files
|
|
BOOST_CHECK(
|
|
test_filter_pair( gzip_compressor(),
|
|
gzip_decompressor(),
|
|
std::string() )
|
|
);
|
|
}
|
|
|
|
void multipart_test()
|
|
{
|
|
// This test verifies that the gzip_decompressor properly handles a file
|
|
// that was written in multiple parts using Z_FULL_FLUSH, and in particular
|
|
// handles the CRC properly when one of those parts is empty.
|
|
const char multipart_file[] = {
|
|
'\x1f', '\x8b', '\x08', '\x00', '\x00', '\x00', '\x00', '\x00', '\x02', '\xff', '\xf2', '\xc9',
|
|
'\xcc', '\x4b', '\x55', '\x30', '\xe4', '\xf2', '\x01', '\x51', '\x46', '\x10', '\xca', '\x98',
|
|
'\x0b', '\x00', '\x00', '\x00', '\xff', '\xff', '\x03', '\x00', '\xdb', '\xa7', '\x83', '\xc9',
|
|
'\x15', '\x00', '\x00', '\x00', '\x1f', '\x8b', '\x08', '\x00', '\x00', '\x00', '\x00', '\x00',
|
|
'\x02', '\xff', '\xf2', '\xc9', '\xcc', '\x4b', '\x55', '\x30', '\xe1', '\xf2', '\x01', '\x51',
|
|
'\xa6', '\x10', '\xca', '\x8c', '\x0b', '\x00', '\x00', '\x00', '\xff', '\xff', '\x03', '\x00',
|
|
'\x41', '\xe3', '\xcc', '\xaa', '\x15', '\x00', '\x00', '\x00', '\x1f', '\x8b', '\x08', '\x00',
|
|
'\x00', '\x00', '\x00', '\x00', '\x02', '\xff', '\x02', '\x00', '\x00', '\x00', '\xff', '\xff',
|
|
'\x03', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x1f', '\x8b',
|
|
'\x08', '\x00', '\x00', '\x00', '\x00', '\x00', '\x02', '\xff', '\xf2', '\xc9', '\xcc', '\x4b',
|
|
'\x55', '\x30', '\xe7', '\xf2', '\x01', '\x51', '\x16', '\x10', '\xca', '\x92', '\x0b', '\x00',
|
|
'\x00', '\x00', '\xff', '\xff', '\x03', '\x00', '\x2b', '\xac', '\xd3', '\xf5', '\x15', '\x00',
|
|
'\x00', '\x00'
|
|
};
|
|
|
|
filtering_istream in;
|
|
std::string line;
|
|
|
|
in.push(gzip_decompressor());
|
|
in.push(io::array_source(multipart_file, sizeof(multipart_file)));
|
|
|
|
// First part
|
|
std::getline(in, line);
|
|
BOOST_CHECK_EQUAL("Line 1", line);
|
|
std::getline(in, line);
|
|
BOOST_CHECK_EQUAL("Line 2", line);
|
|
std::getline(in, line);
|
|
BOOST_CHECK_EQUAL("Line 3", line);
|
|
|
|
// Second part immediately follows
|
|
std::getline(in, line);
|
|
BOOST_CHECK_EQUAL("Line 4", line);
|
|
std::getline(in, line);
|
|
BOOST_CHECK_EQUAL("Line 5", line);
|
|
std::getline(in, line);
|
|
BOOST_CHECK_EQUAL("Line 6", line);
|
|
|
|
// Then an empty part, followed by one last 3-line part.
|
|
std::getline(in, line);
|
|
BOOST_CHECK_EQUAL("Line 7", line);
|
|
std::getline(in, line);
|
|
BOOST_CHECK_EQUAL("Line 8", line);
|
|
std::getline(in, line);
|
|
BOOST_CHECK_EQUAL("Line 9", line);
|
|
|
|
// Check for gzip errors too.
|
|
BOOST_CHECK(!in.bad());
|
|
}
|
|
|
|
test_suite* init_unit_test_suite(int, char* [])
|
|
{
|
|
test_suite* test = BOOST_TEST_SUITE("gzip test");
|
|
test->add(BOOST_TEST_CASE(&compression_test));
|
|
test->add(BOOST_TEST_CASE(&multiple_member_test));
|
|
test->add(BOOST_TEST_CASE(&array_source_test));
|
|
test->add(BOOST_TEST_CASE(&header_test));
|
|
test->add(BOOST_TEST_CASE(&empty_file_test));
|
|
test->add(BOOST_TEST_CASE(&multipart_test));
|
|
return test;
|
|
}
|