iostreams/test/gzip_test.cpp
Joel Nordell 633e78e076 Do not check length_ before computing CRC.
This fixes a problem where the gzip_decompressor would fail the CRC
check when reading a multipart gzip file that had been written using
Z_FULL_FLUSH, and contains an empty part (with a 0 CRC).

Including a unit test that exposes the bug.
2016-07-12 17:59:44 -05:00

247 lines
9.0 KiB
C++

// (C) Copyright 2008 CodeRage, LLC (turkanis at coderage dot com)
// (C) Copyright 2004-2007 Jonathan Turkanis
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt.)
// See http://www.boost.org/libs/iostreams for documentation.
#include <cstddef>
#include <string>
#include <boost/iostreams/copy.hpp>
#include <boost/iostreams/device/array.hpp>
#include <boost/iostreams/device/back_inserter.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include <boost/iostreams/filter/test.hpp>
#include <boost/iostreams/filtering_stream.hpp>
#include <boost/ref.hpp>
#include <boost/range/iterator_range.hpp>
#include <boost/test/test_tools.hpp>
#include <boost/test/unit_test.hpp>
#include "detail/sequence.hpp"
#include "detail/verification.hpp"
using namespace boost;
using namespace boost::iostreams;
using namespace boost::iostreams::test;
namespace io = boost::iostreams;
using boost::unit_test::test_suite;
struct gzip_alloc : std::allocator<char> {
gzip_alloc() { }
gzip_alloc(const gzip_alloc& other) { }
template<typename T>
gzip_alloc(const std::allocator<T>& other) { }
};
void compression_test()
{
text_sequence data;
// Test compression and decompression with metadata
for (int i = 0; i < 4; ++i) {
gzip_params params;
if (i & 1) {
params.file_name = "original file name";
}
if (i & 2) {
params.comment = "detailed file description";
}
gzip_compressor out(params);
gzip_decompressor in;
BOOST_CHECK(
test_filter_pair( boost::ref(out),
boost::ref(in),
std::string(data.begin(), data.end()) )
);
BOOST_CHECK(in.file_name() == params.file_name);
BOOST_CHECK(in.comment() == params.comment);
}
// Test compression and decompression with custom allocator
BOOST_CHECK(
test_filter_pair( basic_gzip_compressor<gzip_alloc>(),
basic_gzip_decompressor<gzip_alloc>(),
std::string(data.begin(), data.end()) )
);
}
void multiple_member_test()
{
text_sequence data;
std::vector<char> temp, dest;
// Write compressed data to temp, twice in succession
filtering_ostream out;
out.push(gzip_compressor());
out.push(io::back_inserter(temp));
io::copy(make_iterator_range(data), out);
out.push(io::back_inserter(temp));
io::copy(make_iterator_range(data), out);
// Read compressed data from temp into dest
filtering_istream in;
in.push(gzip_decompressor());
in.push(array_source(&temp[0], temp.size()));
io::copy(in, io::back_inserter(dest));
// Check that dest consists of two copies of data
BOOST_REQUIRE_EQUAL(data.size() * 2, dest.size());
BOOST_CHECK(std::equal(data.begin(), data.end(), dest.begin()));
BOOST_CHECK(std::equal(data.begin(), data.end(), dest.begin() + dest.size() / 2));
dest.clear();
io::copy(
array_source(&temp[0], temp.size()),
io::compose(gzip_decompressor(), io::back_inserter(dest)));
// Check that dest consists of two copies of data
BOOST_REQUIRE_EQUAL(data.size() * 2, dest.size());
BOOST_CHECK(std::equal(data.begin(), data.end(), dest.begin()));
BOOST_CHECK(std::equal(data.begin(), data.end(), dest.begin() + dest.size() / 2));
}
void array_source_test()
{
std::string data = "simple test string.";
std::string encoded;
filtering_ostream out;
out.push(gzip_compressor());
out.push(io::back_inserter(encoded));
io::copy(make_iterator_range(data), out);
std::string res;
io::array_source src(encoded.data(),encoded.length());
io::copy(io::compose(io::gzip_decompressor(), src), io::back_inserter(res));
BOOST_CHECK_EQUAL(data, res);
}
#if defined(BOOST_MSVC)
# pragma warning(push)
# pragma warning(disable:4309) // Truncation of constant value
#endif
void header_test()
{
// This test is in response to https://svn.boost.org/trac/boost/ticket/5908
// which describes a problem parsing gzip headers with extra fields as
// defined in RFC 1952 (http://www.ietf.org/rfc/rfc1952.txt).
// The extra field data used here is characteristic of the tabix file
// format (http://samtools.sourceforge.net/tabix.shtml).
const char header_bytes[] = {
static_cast<char>(gzip::magic::id1),
static_cast<char>(gzip::magic::id2),
gzip::method::deflate, // Compression Method: deflate
gzip::flags::extra | gzip::flags::name | gzip::flags::comment, // flags
'\x22', '\x9c', '\xf3', '\x4e', // 4 byte modification time (little endian)
gzip::extra_flags::best_compression, // XFL
gzip::os_unix, // OS
6, 0, // 2 byte length of extra field (little endian, 6 bytes)
'B', 'C', 2, 0, 0, 0, // 6 bytes worth of extra field data
'a', 'b', 'c', 0, // original filename, null terminated
'n', 'o', ' ', 'c', 'o', 'm', 'm', 'e', 'n', 't', 0, // comment
};
size_t sz = sizeof(header_bytes)/sizeof(header_bytes[0]);
boost::iostreams::detail::gzip_header hdr;
for (size_t i = 0; i < sz; ++i) {
hdr.process(header_bytes[i]);
// Require that we are done at the last byte, not before.
if (i == sz-1)
BOOST_REQUIRE(hdr.done());
else
BOOST_REQUIRE(!hdr.done());
}
BOOST_CHECK_EQUAL("abc", hdr.file_name());
BOOST_CHECK_EQUAL("no comment", hdr.comment());
BOOST_CHECK_EQUAL(0x4ef39c22, hdr.mtime());
BOOST_CHECK_EQUAL(gzip::os_unix, hdr.os());
}
#if defined(BOOST_MSVC)
# pragma warning(pop)
#endif
void empty_file_test()
{
// This test is in response to https://svn.boost.org/trac/boost/ticket/5237
// The previous implementation of gzip_compressor only wrote the gzip file
// header when the first bytes of uncompressed input were processed, causing
// incorrect behavior for empty files
BOOST_CHECK(
test_filter_pair( gzip_compressor(),
gzip_decompressor(),
std::string() )
);
}
void multipart_test()
{
// This test verifies that the gzip_decompressor properly handles a file
// that was written in multiple parts using Z_FULL_FLUSH, and in particular
// handles the CRC properly when one of those parts is empty.
const char multipart_file[] = {
'\x1f', '\x8b', '\x08', '\x00', '\x00', '\x00', '\x00', '\x00', '\x02', '\xff', '\xf2', '\xc9',
'\xcc', '\x4b', '\x55', '\x30', '\xe4', '\xf2', '\x01', '\x51', '\x46', '\x10', '\xca', '\x98',
'\x0b', '\x00', '\x00', '\x00', '\xff', '\xff', '\x03', '\x00', '\xdb', '\xa7', '\x83', '\xc9',
'\x15', '\x00', '\x00', '\x00', '\x1f', '\x8b', '\x08', '\x00', '\x00', '\x00', '\x00', '\x00',
'\x02', '\xff', '\xf2', '\xc9', '\xcc', '\x4b', '\x55', '\x30', '\xe1', '\xf2', '\x01', '\x51',
'\xa6', '\x10', '\xca', '\x8c', '\x0b', '\x00', '\x00', '\x00', '\xff', '\xff', '\x03', '\x00',
'\x41', '\xe3', '\xcc', '\xaa', '\x15', '\x00', '\x00', '\x00', '\x1f', '\x8b', '\x08', '\x00',
'\x00', '\x00', '\x00', '\x00', '\x02', '\xff', '\x02', '\x00', '\x00', '\x00', '\xff', '\xff',
'\x03', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x1f', '\x8b',
'\x08', '\x00', '\x00', '\x00', '\x00', '\x00', '\x02', '\xff', '\xf2', '\xc9', '\xcc', '\x4b',
'\x55', '\x30', '\xe7', '\xf2', '\x01', '\x51', '\x16', '\x10', '\xca', '\x92', '\x0b', '\x00',
'\x00', '\x00', '\xff', '\xff', '\x03', '\x00', '\x2b', '\xac', '\xd3', '\xf5', '\x15', '\x00',
'\x00', '\x00'
};
filtering_istream in;
std::string line;
in.push(gzip_decompressor());
in.push(io::array_source(multipart_file, sizeof(multipart_file)));
// First part
std::getline(in, line);
BOOST_CHECK_EQUAL("Line 1", line);
std::getline(in, line);
BOOST_CHECK_EQUAL("Line 2", line);
std::getline(in, line);
BOOST_CHECK_EQUAL("Line 3", line);
// Second part immediately follows
std::getline(in, line);
BOOST_CHECK_EQUAL("Line 4", line);
std::getline(in, line);
BOOST_CHECK_EQUAL("Line 5", line);
std::getline(in, line);
BOOST_CHECK_EQUAL("Line 6", line);
// Then an empty part, followed by one last 3-line part.
std::getline(in, line);
BOOST_CHECK_EQUAL("Line 7", line);
std::getline(in, line);
BOOST_CHECK_EQUAL("Line 8", line);
std::getline(in, line);
BOOST_CHECK_EQUAL("Line 9", line);
// Check for gzip errors too.
BOOST_CHECK(!in.bad());
}
test_suite* init_unit_test_suite(int, char* [])
{
test_suite* test = BOOST_TEST_SUITE("gzip test");
test->add(BOOST_TEST_CASE(&compression_test));
test->add(BOOST_TEST_CASE(&multiple_member_test));
test->add(BOOST_TEST_CASE(&array_source_test));
test->add(BOOST_TEST_CASE(&header_test));
test->add(BOOST_TEST_CASE(&empty_file_test));
test->add(BOOST_TEST_CASE(&multipart_test));
return test;
}