Skip to content

Commit

Permalink
Replace std::iostream use with stdio in FastPFor lib
Browse files Browse the repository at this point in the history
Even basic use of iostream pulls in dependencies that are almost as big in code size as the rest of the FastPFor library itself. Consider the following simple example that pulls in all codecs from codecfactory compiled with static runtime with VS2022:

```
#include "codecfactory.h"

int main()
{
    for (auto codec : FastPForLib::CODECFactory().allSchemes())
    {
        size_t z;
        codec->encodeArray((uint32_t*)0, 0, 0, z);
        codec->decodeArray(0, 0, (uint32_t*)0, z);
    }
}
```

When using std::iostream resulting binary is 834KB. When using stdio resulting binary is 571KB.
  • Loading branch information
pps83 committed Dec 24, 2022
1 parent 27fb6ec commit 56c8bec
Show file tree
Hide file tree
Showing 16 changed files with 82 additions and 84 deletions.
3 changes: 0 additions & 3 deletions headers/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@
#include <chrono>
#include <cmath>
#include <cstdint>
#include <iomanip>
#include <iostream>
#include <map>
#include <memory>
#include <numeric>
Expand All @@ -50,7 +48,6 @@

#define __attribute__(n)
#define __restrict__ __restrict

#endif

#endif /* COMMON_H_ */
2 changes: 2 additions & 0 deletions headers/deltautil.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
#define DELTAUTIL_H_
#include <vector>
#include <exception>
#include <iostream>
#include <iomanip>
#include "common.h"
#include "codecs.h"
#include "memutil.h"
Expand Down
15 changes: 7 additions & 8 deletions headers/fastpfor.h
Original file line number Diff line number Diff line change
Expand Up @@ -406,14 +406,13 @@ class SimplePFor : public IntegerCODEC {
}
assert(out == nvalue + initout);
if (oldnvalue < nvalue)
std::cerr
<< "It is possible we have a buffer overrun. You reported having allocated "
<< oldnvalue * sizeof(uint32_t)
<< " bytes for the compressed data but we needed "
<< nvalue * sizeof(uint32_t)
<< " bytes. Please increase the available memory"
" for compressed data or check the value of the last parameter provided "
" to the encodeArray method." << std::endl;
fprintf(stderr,
"It is possible we have a buffer overrun. You reported having allocated "
"%zu bytes for the compressed data but we needed "
"%zu bytes. Please increase the available memory "
"for compressed data or check the value of the last parameter provided "
"to the encodeArray method.\n",
oldnvalue * sizeof(uint32_t), nvalue * sizeof(uint32_t));
}

void getBestBFromData(const uint32_t *in, uint8_t &bestb,
Expand Down
1 change: 1 addition & 0 deletions headers/maropuparser.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#define MAROPUPARSER_H_

#include "common.h"
#include <iostream>

namespace FastPForLib {

Expand Down
14 changes: 8 additions & 6 deletions headers/newpfor.h
Original file line number Diff line number Diff line change
Expand Up @@ -242,14 +242,15 @@ void NewPFor<BlockSizeInUnitsOfPackSize, ExceptionCoder>::encodeArray(
}
#ifdef STATS
for (uint32_t k = 0; k < 33; ++k)
std::cout << "newpfor b=" << k << " " << stats[k] << std::endl;
printf("newpfor b=%u %u\n", k, stats[k]);
#endif
if (nvalue > initnvalue) {
std::cerr << " we have a possible buffer overrun" << std::endl;
fprintf(stderr, "we have a possible buffer overrun\n");
}
ASSERT(len == static_cast<size_t>(in - initin), len << " " << (in - initin));
ASSERT(len == static_cast<size_t>(in - initin),
std::to_string(len) + " " + std::to_string(in - initin));
ASSERT(nvalue == static_cast<size_t>(out - initout),
nvalue << " " << (out - initout));
std::to_string(nvalue) + " " + std::to_string(out - initout));
}

template <uint32_t BlockSizeInUnitsOfPackSize, class ExceptionCoder>
Expand Down Expand Up @@ -310,9 +311,10 @@ NewPFor<BlockSizeInUnitsOfPackSize, ExceptionCoder>::decodeArray(
}

if (static_cast<size_t>(out - initout) > nvalue) {
std::cerr << "possible buffer overrun" << std::endl;
fprintf(stderr, "possible buffer overrun\n");
}
ASSERT(in <= len + initin, in - initin << " " << len);
ASSERT(in <= len + initin,
std::to_string(in - initin) + " " + std::to_string(len));

nvalue = out - initout;
assert(nvalue == numBlocks * BlockSize);
Expand Down
32 changes: 15 additions & 17 deletions headers/simdfastpfor.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,15 +209,14 @@ class SIMDFastPFor : public IntegerCODEC {
in += thissize;
}
assert(out == nvalue + initout);
if (oldnvalue < nvalue)
std::cerr
<< "It is possible we have a buffer overrun. You reported having allocated "
<< oldnvalue * sizeof(uint32_t)
<< " bytes for the compressed data but we needed "
<< nvalue * sizeof(uint32_t)
<< " bytes. Please increase the available memory"
" for compressed data or check the value of the last parameter provided "
" to the encodeArray method." << std::endl;
if (oldnvalue < nvalue)
fprintf(stderr,
"It is possible we have a buffer overrun. You reported having allocated "
"%zu bytes for the compressed data but we needed "
"%zu bytes. Please increase the available memory "
"for compressed data or check the value of the last parameter provided "
"to the encodeArray method.\n",
oldnvalue * sizeof(uint32_t), nvalue * sizeof(uint32_t));
resetBuffer(); // if you don't do this, the buffer has a memory
}

Expand Down Expand Up @@ -444,14 +443,13 @@ class SIMDSimplePFor : public IntegerCODEC {
}
assert(out == nvalue + initout);
if (oldnvalue < nvalue)
std::cerr
<< "It is possible we have a buffer overrun. You reported having allocated "
<< oldnvalue * sizeof(uint32_t)
<< " bytes for the compressed data but we needed "
<< nvalue * sizeof(uint32_t)
<< " bytes. Please increase the available memory"
" for compressed data or check the value of the last parameter provided "
" to the encodeArray method." << std::endl;
fprintf(stderr,
"It is possible we have a buffer overrun. You reported having allocated "
"%zu bytes for the compressed data but we needed "
"%zu bytes. Please increase the available memory "
"for compressed data or check the value of the last parameter provided "
"to the encodeArray method.\n",
oldnvalue * sizeof(uint32_t), nvalue * sizeof(uint32_t));
}

void getBestBFromData(const uint32_t *in, uint8_t &bestb,
Expand Down
14 changes: 8 additions & 6 deletions headers/simdnewpfor.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,14 +220,15 @@ void SIMDNewPFor<BlockSizeInUnitsOfPackSize, ExceptionCoder>::encodeArray(
}
#ifdef STATS
for (uint32_t k = 0; k < 33; ++k)
std::cout << "simdnewpfor b=" << k << " " << stats[k] << std::endl;
printf("simdnewpfor b=%u %u\n", k, stats[k]);
#endif
if (nvalue > initnvalue) {
std::cerr << " we have a possible buffer overrun" << std::endl;
fprintf(stderr, "we have a possible buffer overrun\n");
}
ASSERT(len == static_cast<size_t>(in - initin), len << " " << (in - initin));
ASSERT(len == static_cast<size_t>(in - initin),
std::to_string(len) + " " + std::to_string(in - initin));
ASSERT(nvalue == static_cast<size_t>(out - initout),
nvalue << " " << (out - initout));
std::to_string(nvalue) + " " + std::to_string(out - initout));
}

template <uint32_t BlockSizeInUnitsOfPackSize, class ExceptionCoder>
Expand Down Expand Up @@ -277,9 +278,10 @@ SIMDNewPFor<BlockSizeInUnitsOfPackSize, ExceptionCoder>::decodeArray(
}
}
if (static_cast<size_t>(out - initout) > nvalue) {
std::cerr << "possible buffer overrun" << std::endl;
fprintf(stderr, "possible buffer overrun\n");
}
ASSERT(in <= len + initin, in - initin << " " << len);
ASSERT(in <= len + initin,
std::to_string(in - initin) + " " + std::to_string(len));

nvalue = out - initout;
assert(nvalue == numBlocks * BlockSize);
Expand Down
12 changes: 6 additions & 6 deletions headers/simple16.h
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ void Simple16<MarkLength>::encodeArray(const uint32_t *in, const size_t length,
assert(which(out) == 14);
} else {
if ((*in >> 28) > 0) {
std::cerr << "Input's out of range: " << *in << std::endl;
fprintf(stderr, "Input's out of range: %u\n", *in);
throw std::runtime_error(
"You tried to apply Simple16 to an incompatible set of integers: they should be in [0,2^28).");
}
Expand Down Expand Up @@ -617,7 +617,7 @@ void Simple16<MarkLength>::encodeArray(const uint32_t *in, const size_t length,
assert(which(out) == 14);
} else {
if ((*in >> 28) > 0) {
std::cerr << "Input's out of range: " << *in << std::endl;
fprintf(stderr, "Input's out of range: %u\n", *in);
throw std::runtime_error(
"You tried to apply Simple16 to an incompatible set of integers.");
}
Expand Down Expand Up @@ -725,10 +725,10 @@ const uint32_t *Simple16<MarkLength>::decodeArray(const uint32_t *in,
const uint32_t actualvalue =
MarkLength ? *(in++) : static_cast<uint32_t>(nvalue);
if (nvalue < actualvalue)
std::cerr << " possible overrun" << std::endl;
fprintf(stderr, "possible overrun\n");
nvalue = actualvalue;
#ifdef STATS
std::cout << "simple16 decode " << len << std::endl;
printf("simple16 decode %zu\n", len);
std::vector<uint32_t> stats(16, 0);
#endif
const uint32_t *const end = out + nvalue;
Expand All @@ -742,10 +742,10 @@ const uint32_t *Simple16<MarkLength>::decodeArray(const uint32_t *in,
#ifdef STATS
uint32_t sum = std::accumulate(stats.begin(), stats.end(), 0);
for (uint32_t k = 0; k < stats.size(); ++k) {
std::cout << "simple16 stats[" << k << "]=" << stats[k] * 1.0 / sum << std::endl;
printf("simple16 stats[%u]=%f\n", k, stats[k] * 1.0 / sum);
}
#endif
ASSERT(in <= endin, in - endin);
ASSERT(in <= endin, std::to_string(in - endin));
return in;
}

Expand Down
8 changes: 4 additions & 4 deletions headers/simple8b.h
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ void Simple8b<MarkLength>::encodeArray(const uint32_t *in, const size_t length,
}
if (becareful)
ASSERT(initin + length - ValuesRemaining + NumberOfValuesCoded == in,
which(out64));
std::to_string(which(out64)));
++out64;

ValuesRemaining -= NumberOfValuesCoded;
Expand Down Expand Up @@ -512,7 +512,7 @@ const uint32_t *Simple8b<MarkLength>::decodeArray(const uint32_t *in,
#endif

if (nvalue < actualvalue)
std::cerr << " possible overrun" << std::endl;
fprintf(stderr, "possible overrun\n");
nvalue = actualvalue;
const uint32_t *const end = out + nvalue;
const uint32_t *const initout(out);
Expand Down Expand Up @@ -634,14 +634,14 @@ const uint32_t *Simple8b<MarkLength>::decodeArray(const uint32_t *in,
#ifdef STATS
uint32_t sum = std::accumulate(stats.begin(), stats.end(), 0);
for (uint32_t k = 0; k < stats.size(); ++k) {
std::cout << "simple8b stats[" << k << "]=" << stats[k] * 1.0 / sum << std::endl;
printf("simple8b stats[%u]=%f\n", k, stats[k] * 1.0 / sum);
}
#endif
assert(in64 <= finalin64);
in = reinterpret_cast<const uint32_t *>(in64);
assert(in <= endin);
// check that we don't overrun the buffer too much?
ASSERT(out < end + 240, out - end);
ASSERT(out < end + 240, std::to_string(out - end));
nvalue = MarkLength ? actualvalue : out - initout;
return in;
}
Expand Down
2 changes: 1 addition & 1 deletion headers/simple8b_rle.h
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ template <bool MarkLength> class Simple8b_RLE : public IntegerCODEC {
const uint64_t *finalin64 = reinterpret_cast<const uint64_t *>(endin);
#endif
if (nvalue < actualvalue) {
std::cerr << " possible overrun" << std::endl;
fprintf(stderr, "possible overrun\n");
}
nvalue = actualvalue;

Expand Down
17 changes: 7 additions & 10 deletions headers/simple9.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ void Simple9<MarkLength, hacked>::encodeArray(const uint32_t *in,
assert(which(out) == 14);
} else {
if ((*in >> 28) > 0) {
std::cerr << "Input's out of range: " << *in << std::endl;
fprintf(stderr, "Input's out of range: %u\n", *in);
throw std::runtime_error(
"You tried to apply Simple9 to an incompatible set of integers.");
}
Expand Down Expand Up @@ -260,7 +260,7 @@ void Simple9<MarkLength, hacked>::encodeArray(const uint32_t *in,
assert(which(out) == 14);
} else {
if ((*in >> 28) > 0) {
std::cerr << "Input's out of range: " << *in << std::endl;
fprintf(stderr, "Input's out of range: %u\n", *in);
throw std::runtime_error(
"You tried to apply Simple9 to an incompatible set of integers.");
}
Expand Down Expand Up @@ -294,7 +294,7 @@ Simple9<MarkLength, hacked>::decodeArray(const uint32_t *in, const size_t /* len
throw NotEnoughStorage(*in);
const uint32_t actualvalue = MarkLength ? *(in++) : nvalue;
if (nvalue < actualvalue)
std::cerr << " possible overrun" << std::endl;
fprintf(stderr, "possible overrun\n");
nvalue = actualvalue;
const uint32_t *const end = out + nvalue;
while (end > out) {
Expand All @@ -310,14 +310,11 @@ Simple9<MarkLength, hacked>::decodeArray(const uint32_t *in, const size_t /* len
uint32_t sum = std::accumulate(stats.begin(), stats.end(), 0);

for (uint32_t k = 0; k < stats.size(); ++k) {
std::cout << "k=" << k << std::endl;
std::cout << "simple9 stats[" << k << "]=" << (stats[k] * 1.0 / sum)
<< std::endl;
printf("simple9 stats[k=%u]=%f\n", k, stats[k] * 1.0 / sum);
}
std::cout << "alt computed length" << sum << std::endl;
std::cout << "computed length = " << expectedlength << std::endl;
std::cout << "we compressed " << nvalue << " integers down to " << len
<< " 32-bit words" << std::endl;
printf("alt computed length %u\n", sum);
printf("computed length = %zu\n", expectedlength);
printf("we compressed %zu integers down to %zu 32-bit words\n", nvalue, len);
#endif
return in;
}
Expand Down
2 changes: 1 addition & 1 deletion headers/simple9_rle.h
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ template <bool MarkLength> class Simple9_RLE : public IntegerCODEC {
}
const size_t actualvalue = MarkLength ? markednvalue : nvalue;
if (nvalue < actualvalue) {
std::cerr << " possible overrun" << std::endl;
fprintf(stderr, "possible overrun\n");
}
auto count = actualvalue;
Simple9_Codec::Decompress(input, 0, out, 0, count);
Expand Down
20 changes: 10 additions & 10 deletions headers/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,17 @@ namespace FastPForLib {
//#define STATS
// taken from stackoverflow
#ifndef NDEBUG
#define ASSERT(condition, message) \
do { \
if (!(condition)) { \
std::cerr << "Assertion `" #condition "` failed in " << __FILE__ \
<< " line " << __LINE__ << ": " << message << std::endl; \
std::exit(EXIT_FAILURE); \
} \
#define ASSERT(condition, message) /* message is an std::string */ \
do { \
if (!(condition)) { \
fprintf(stderr, "Assertion `%s` failed in %s line %d : %s\n", \
#condition, __FILE__, __LINE__, (message).c_str()); \
std::exit(EXIT_FAILURE); \
} \
} while (false)
#else
#define ASSERT(condition, message) \
do { \
#define ASSERT(condition, message) \
do { \
} while (false)
#endif

Expand Down Expand Up @@ -416,7 +416,7 @@ class BitWidthHistoGram {
if (sum == 0)
return;
for (size_t k = 0; k < histo.size(); ++k) {
std::cout << prefix << k << " " << histo[k] / sum << std::endl;
printf("%s%zu %f\n", prefix.c_str(), k, histo[k] / sum);
}
}
template <class container> void eatIntegers(const container &rawdata) {
Expand Down
Loading

0 comments on commit 56c8bec

Please sign in to comment.