Skip to content

Commit e9990d3

Browse files
committed
serialization correct for all but eager bucket
1 parent 7b9d587 commit e9990d3

2 files changed

Lines changed: 12 additions & 4 deletions

File tree

include/bucket_buffer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,9 @@ struct BufferEntry {
8282
class BucketBuffer {
8383
friend class Sketch;
8484
public:
85-
std::vector<BufferEntry> entries;
8685

8786
private:
87+
std::vector<BufferEntry> entries;
8888
size_t _capacity;
8989

9090
bool _compacted=false;

src/sketch.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ Sketch::Sketch(vec_t vector_len, uint64_t seed, bool compressed, std::istream &b
8585
else {
8686
binary_in.read((char *)buckets, bucket_array_bytes());
8787
}
88+
// in both cases, now deserialize the bucket buffer
89+
bucket_buffer.deserialize(binary_in);
8890
}
8991

9092

@@ -131,18 +133,21 @@ void Sketch::compressed_deserialize(std::istream& binary_in) {
131133
Sketch::Sketch(vec_t vector_len, uint64_t seed, std::istream &binary_in, size_t _samples,
132134
size_t _cols):
133135
seed(seed) {
136+
// TODO - do this properly
134137
num_samples = _samples;
135138
cols_per_sample = _cols;
136139
num_columns = num_samples * cols_per_sample;
137-
bkt_per_col = calc_bkt_per_col(vector_len);
140+
// bkt_per_col = calc_bkt_per_col(vector_len);
138141
// bkt_per_col = 1;
142+
binary_in.read((char *) &bkt_per_col, sizeof(size_t));
139143
num_buckets = num_columns * bkt_per_col + 1; // plus 1 for deterministic bucket
140144
bucket_buffer = BucketBuffer();
141145
buckets = (Bucket*) new char[bucket_array_bytes()];
142146
#ifdef EAGER_BUCKET_CHECK
143147
nonempty_buckets = (vec_t*) (buckets + num_buckets);
144148
#endif
145149
binary_in.read((char *)buckets, bucket_array_bytes());
150+
bucket_buffer.deserialize(binary_in);
146151
//
147152
}
148153

@@ -186,7 +191,7 @@ Sketch::~Sketch() {
186191
#ifdef ROW_MAJOR_SKETCHES
187192
// TODO - implement to allow shrinkage
188193
std::memcpy(new_buckets, buckets, old_bucket_array_bytes);
189-
assert(false)
194+
// assert(false)
190195
#else
191196
for (size_t i = 0; i < num_columns; ++i) {
192197
Bucket *old_column = buckets + (i * old_num_rows);
@@ -639,7 +644,7 @@ void Sketch::compressed_serialize(std::ostream &binary_out) const {
639644
#ifdef ROW_MAJOR_SKETCHES
640645
// write out max depth, nonempty flags, determinstic bucket, everything else
641646
// then all other buckets
642-
uint8_t max_depth = effective_size();
647+
uint8_t max_depth = effective_depth();
643648
binary_out.write((char*) &max_depth, sizeof(uint8_t));
644649
size_t number_of_buckets = num_columns * max_depth;
645650
binary_out.write((char *) &get_deterministic_bucket(), sizeof(Bucket));
@@ -662,12 +667,15 @@ void Sketch::compressed_serialize(std::ostream &binary_out) const {
662667
binary_out.write((char *) current_column, sizeof(Bucket) * sizes[i]);
663668
}
664669
#endif
670+
// write out deep bucket buffer!
671+
bucket_buffer.serialize(binary_out);
665672
}
666673

667674
void Sketch::serialize(std::ostream &binary_out) const {
668675
binary_out.write((char*) &bkt_per_col, sizeof(size_t));
669676
// note that these will include the flag bits, if used.
670677
binary_out.write((char*) buckets, bucket_array_bytes());
678+
bucket_buffer.serialize(binary_out);
671679
}
672680

673681
bool operator==(const Sketch &sketch1, const Sketch &sketch2) {

0 commit comments

Comments
 (0)