index 7024bf8f58e219a9b42a6b5e5217a843b6d75130..bd646eb3d245460cd126327b39ca464b041f7a41 100644 (file)
--- a/gsimm.c
+++ b/gsimm.c
+#include <string.h>
#include "rabinpoly.h"
#include "gsimm.h"
bzero (freq, sizeof(freq[0]) * MD_BITS);
}
+static int dist (u_char *l, u_char *r)
+{ int j, k;
+ int d = 0;
+
+ for (j = 0; j < MD_LENGTH; j++)
+ { u_char ch = l[j] ^ r[j];
+
+ for (k = 0; k < 8; k++) d += ((ch & (1<<k)) > 0);
+ }
+
+ return d;
+}
+
+double gb_simm_score(u_char *l, u_char *r)
+{
+ int d = dist(l, r);
+ double sim = (double) (d) / (MD_LENGTH * 4 - 1);
+ if (1.0 < sim)
+ return 0;
+ else
+ return 1.0 - sim;
+}
+
void gb_simm_process(u_char *data, unsigned len, u_char *md)
{ size_t j = 0;
u_int32_t ofs;
u_int32_t count [MD_BITS * (GROUP_COUNTERS/GROUP_BITS)];
int freq[MD_BITS];
+ if (len < GB_SIMM_MIN_FILE_SIZE || GB_SIMM_MAX_FILE_SIZE < len) {
+ memset(md, 0, MD_LENGTH);
+ return;
+ }
+
bzero (freq, sizeof(freq[0]) * MD_BITS);
bzero (dup_cache, DUP_CACHE_SIZE * sizeof (u_int32_t));
bzero (count, (MD_BITS * (GROUP_COUNTERS/GROUP_BITS) * sizeof (u_int32_t)));