+/*
+ * Copyright (c) 2015, the urlblock developers.
+ * Copyright (c) 2018 Jonathan Dieter <jdieter@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+y
#include "buzhash.h"
-#include "util.h"
#include <string.h>
+#define rol32(v,s) (((v) << (s)) | ((v) >> (32 - (s))))
+
const uint32_t buzhash_table[] = {
0x458be752, 0xc10748cc, 0xfbbcdbb8, 0x6ded5b68,
0xb10a82b5, 0x20d75648, 0xdfc5665f, 0xa8428801,
0x7bf7cabc, 0xf9c18d66, 0x593ade65, 0xd95ddf11,
};
-const size_t buzhash_width = 4;
+const size_t buzhash_width = 4096;
uint32_t
buzhash (const char *s)
uint32_t
buzhash_update (const char *s, uint32_t h)
{
- return rol32 (h, 1) ^ rol32 (buzhash_table[(size_t) s[0]], buzhash_width) ^ buzhash_table[(size_t) s[buzhash_width]];
+ return rol32 (h, 1) ^ rol32 (buzhash_table[(size_t) (s-buzhash_width)[0]], buzhash_width) ^ buzhash_table[(size_t) s[0]];
}
#include <unistd.h>
#include <zck.h>
+#include "buzhash/buzhash.h"
#include "memmem.h"
-#define WINDOW_SIZE 4096
-#define MATCH_SUM WINDOW_SIZE-1
-
int main (int argc, char *argv[]) {
char *out_name;
char *dict = NULL;
close(in_fd);
/* Chunk based on string in argv[2] (Currently with ugly hack to group srpms together) */
- if(True) {
+ if(False) {
char *found = data;
char *search = found;
char *prev_srpm = memmem(search, in_size - (search-data), "<rpm:sourcerpm", 14);
search = NULL;
}
}
- /* Naive (and inefficient) rolling window */
+ /* Buzhash rolling window */
} else {
char *cur_loc = data;
char *start = data;
char *window_loc;
- int window_sum;
while(cur_loc < data + in_size) {
- window_sum = 0;
+ uint32_t bh = 0;
window_loc = cur_loc;
- if(cur_loc + WINDOW_SIZE < data + in_size) {
- for(int i=0; i<WINDOW_SIZE; i++) {
- window_sum += cur_loc[i];
- }
- cur_loc += WINDOW_SIZE;
+ if(cur_loc + buzhash_width < data + in_size) {
+ bh = buzhash(window_loc);
+ cur_loc += buzhash_width;
while(cur_loc < data + in_size) {
- window_sum += cur_loc[0];
- window_sum -= window_loc[0];
- cur_loc++;
- window_loc++;
- if(((window_sum) & (WINDOW_SIZE - 1)) == 0)
+ bh = buzhash_update(cur_loc, bh);
+ if(((bh) & (8192 - 1)) == 0)
break;
+ cur_loc++;
}
} else {
cur_loc = data + in_size;