-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdecoder.cpp
More file actions
162 lines (143 loc) · 5.41 KB
/
Copy pathdecoder.cpp
File metadata and controls
162 lines (143 loc) · 5.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
//
// Created by wwwis on 15.02.2022.
//
#include <iostream>
#include <numeric>
#include <vector>
#include <algorithm>
#include <tuple>
#include <fstream>
#include "arith_enc_dec.h"
size_t BYTE_SIZE = 256;
/*
std::vector<unsigned char> read_bytes(const std::string &file_name, const bool read_meta = false) {
size_t bwt_shift_position = SIZE_MAX;
std::vector<unsigned char> data;
std::ifstream fin(file_name, std::ios::binary);
std::vector<unsigned char> bytes((std::istreambuf_iterator<char>(fin)), {});
fin.close();
if (read_meta) {
bwt_shift_position = *((size_t *) bytes.data());
data = bytes;
data.push_back(bwt_shift_position);
} else {
data = bytes;
}
return data;
}
*/
std::vector<unsigned char> move_to_front_reverse(std::vector<unsigned char> data) {
std::vector<unsigned char> alphabet(BYTE_SIZE);
std::iota(alphabet.begin(), alphabet.end(), 0);
std::vector<unsigned char> decoded_data(data.size());
for (size_t i = 0; i < data.size(); ++i) {
auto current_index = data[i];
decoded_data[i] = alphabet[current_index];
auto found_index_it = alphabet.begin() + current_index;
if (found_index_it != alphabet.begin() && found_index_it != alphabet.end()) {
std::rotate(alphabet.begin(), found_index_it, found_index_it + 1);
}
}
return decoded_data;
}
size_t cyclic_index(const size_t &start_index, const size_t &offset, const size_t &n) {
return start_index + offset < n ? start_index + offset : start_index + offset - n;
}
class bwt_cmp_straight {
const std::vector<unsigned char> &data;
public:
explicit bwt_cmp_straight(const std::vector<unsigned char> &bwt_data) : data(bwt_data) {}
bool operator()(size_t left, size_t right) {
size_t i = 0;
while (data[cyclic_index(left, i, data.size())] == data[cyclic_index(right, i, data.size())] &&
i < data.size()) {
++i;
}
return data[cyclic_index(left, i, data.size())] < data[cyclic_index(right, i, data.size())];
}
};
class bwt_cmp_reverse {
const std::vector<unsigned char> &data;
public:
explicit bwt_cmp_reverse(const std::vector<unsigned char> &bwt_data) : data(bwt_data) {}
bool operator()(size_t left, size_t right) {
return data[left] < data[right];
}
};
std::vector<unsigned char> bwt_reverse(const std::vector<unsigned char> &bwt_data, size_t row_index) {
std::vector<size_t> l_shift(bwt_data.size());
std::iota(l_shift.begin(), l_shift.end(), 0);
std::stable_sort(l_shift.begin(), l_shift.end(), bwt_cmp_reverse(bwt_data));
std::vector<unsigned char> initial_data(bwt_data.size(), '0');
for (size_t i = 0; i < bwt_data.size(); ++i) {
initial_data[i] = bwt_data[l_shift[row_index]];
row_index = l_shift[row_index];
}
return initial_data;
}
std::tuple<std::vector<unsigned char>, size_t>
read_bytes(
const std::string &file_name,
const bool read_meta = false
) {
size_t bwt_shift_position = SIZE_MAX;
long size_of_tree = SIZE_MAX;
std::vector<unsigned char> data;
std::ifstream fin(file_name, std::ios::binary);
if(!fin.is_open()) {
std::cout << "File isn't open!" << std::endl;
exit(0);
}
std::vector<unsigned char> bytes((std::istreambuf_iterator<char>(fin)), {});
fin.close();
std::cout <<"size: " <<bytes.size() << std::endl;
if (read_meta) {
bwt_shift_position = *((size_t *) bytes.data());
auto iter = bytes.begin();
bytes.erase(iter, iter+4);
data = bytes;
} else {
data = bytes;
}
return {data, bwt_shift_position};
}
void write_bytes(
const std::string &file_name,
const std::vector<unsigned char> &data,
const size_t bwt_shift_position = SIZE_MAX
) {
std::ofstream fout(file_name, std::ios::binary);
if(!fout.is_open()) {
std::cout << "File isn't open!" << std::endl;
exit(0);
}
if (bwt_shift_position != SIZE_MAX) {
fout.write(reinterpret_cast<const char *>(&bwt_shift_position), sizeof(size_t));
}
fout.write(reinterpret_cast<const char *>(data.data()), static_cast<long>(data.size()));
fout.close();
}
int main(int argc, char* argv[]) {
std::vector<std::string> file_list = {"bib", "book1", "book2", "geo", "news", "obj1", "obj2", "paper1", "paper2",
"pic", "progc", "progl", "progp", "trans"};
std::string dir = "calgarycorpus/";
for(auto& file : file_list) {
std::string input_file = dir + file + ".enc";
std::string output_decoded_main_file = dir + file + ".dec";
std::string output_temporary_file = file + "_decode_mtf2";
const char *cstr = input_file.c_str();
const char *cstr_out = output_temporary_file.c_str();
decode(cstr, cstr_out);
//std::vector<unsigned char> bytes_input = read_bytes(output_temporary_file, true);
const auto &[bytes_input, bwt_shift_position] = read_bytes(
output_temporary_file, true);
//auto bwt_shift_position = bytes_input.back();
//bytes_input.pop_back();
auto decoded_mtf = move_to_front_reverse(bytes_input);
std::cout<< "pos: " << bwt_shift_position << std::endl;
auto decoded_data = bwt_reverse(decoded_mtf, bwt_shift_position);
write_bytes(output_decoded_main_file, decoded_data);
std::remove(cstr_out);
}
return 0;
}