From 803ede00f0b5558be44d741407ceb343414ddc12 Mon Sep 17 00:00:00 2001 From: Sebastian Harl Date: Tue, 7 Oct 2014 21:41:54 +0200 Subject: [PATCH] data: Added support for a "regex" data-type. This type may be used to store the raw and compiled regex in a datum and use regexes in all places in which a generic value may be used. It does not support any arithmetic or concatenation operations. --- src/core/data.c | 58 ++++++++++++++++++++ src/include/core/data.h | 29 +++++----- t/unit/core/data_test.c | 115 ++++++++++++++++++++++++++++++++++++---- 3 files changed, 180 insertions(+), 22 deletions(-) diff --git a/src/core/data.c b/src/core/data.c index 9031f03..2a8c439 100644 --- a/src/core/data.c +++ b/src/core/data.c @@ -256,6 +256,22 @@ sdb_data_copy(sdb_data_t *dst, const sdb_data_t *src) src->data.binary.length); } break; + case SDB_TYPE_REGEX: + if (src->data.re.raw) { + tmp.data.re.raw = strdup(src->data.re.raw); + if (! tmp.data.re.raw) + return -1; + /* we need to recompile because the regex might point to + * dynamically allocated memory */ + if (regcomp(&tmp.data.re.regex, tmp.data.re.raw, + REG_EXTENDED | REG_ICASE | REG_NOSUB)) { + free(tmp.data.re.raw); + return -1; + } + } + else + memset(&tmp.data.re.regex, 0, sizeof(tmp.data.re.regex)); + break; } sdb_data_free_datum(dst); @@ -281,6 +297,14 @@ sdb_data_free_datum(sdb_data_t *datum) datum->data.binary.datum = NULL; datum->data.binary.length = 0; break; + case SDB_TYPE_REGEX: + if (datum->data.re.raw) { + free(datum->data.re.raw); + regfree(&datum->data.re.regex); + } + datum->data.re.raw = NULL; + memset(&datum->data.re.regex, 0, sizeof(datum->data.re.regex)); + break; } } /* sdb_data_free_datum */ @@ -332,6 +356,9 @@ sdb_data_cmp(const sdb_data_t *d1, const sdb_data_t *d2) return diff; } + case SDB_TYPE_REGEX: + CMP_NULL(d1->data.re.raw, d2->data.re.raw); + return strcmp(d1->data.re.raw, d2->data.re.raw); } return -1; } /* sdb_data_cmp */ @@ -367,6 +394,8 @@ sdb_data_isnull(const sdb_data_t *datum) return 1; if ((datum->type == SDB_TYPE_BINARY) && (! datum->data.binary.datum)) return 1; + if ((datum->type == SDB_TYPE_REGEX) && (! datum->data.re.raw)) + return 1; return 0; } /* sdb_data_isnull */ @@ -419,6 +448,11 @@ sdb_data_strlen(const sdb_data_t *datum) return 8; /* "" */ /* "\xNN" */ return 4 * datum->data.binary.length + 2; + case SDB_TYPE_REGEX: + if (! datum->data.re.raw) + return 8; /* "" */ + /* "/.../" */ + return strlen(datum->data.re.raw) + 4; } return 0; } /* sdb_data_strlen */ @@ -493,6 +527,14 @@ sdb_data_format(const sdb_data_t *datum, char *buf, size_t buflen, int quoted) else data = ""; break; + case SDB_TYPE_REGEX: + if (! datum->data.re.raw) + data = ""; + else { + snprintf(tmp, sizeof(tmp), "/%s/", datum->data.re.raw); + data = tmp; + } + break; } if (data) { @@ -536,6 +578,22 @@ sdb_data_parse(char *str, int type, sdb_data_t *data) tmp.data.binary.length = strlen(str); tmp.data.binary.datum = (unsigned char *)str; break; + case SDB_TYPE_REGEX: + tmp.data.re.raw = strdup(str); + if (! tmp.data.re.raw) + return -1; + if (regcomp(&tmp.data.re.regex, str, + REG_EXTENDED | REG_ICASE | REG_NOSUB)) { + free(tmp.data.re.raw); + sdb_log(SDB_LOG_ERR, "core: Failed to compile regular " + "expression '%s'", str); + return -1; + } + if (! data) { + tmp.type = SDB_TYPE_REGEX; + sdb_data_free_datum(&tmp); + } + break; default: errno = EINVAL; return -1; diff --git a/src/include/core/data.h b/src/include/core/data.h index 1bd0d53..a81cea4 100644 --- a/src/include/core/data.h +++ b/src/include/core/data.h @@ -33,6 +33,9 @@ #include #include +#include +#include + #ifdef __cplusplus extern "C" { #endif @@ -43,20 +46,16 @@ enum { SDB_TYPE_STRING, SDB_TYPE_DATETIME, SDB_TYPE_BINARY, + SDB_TYPE_REGEX, }; #define SDB_TYPE_TO_STRING(t) \ - (((t) == SDB_TYPE_INTEGER) \ - ? "INTEGER" \ - : ((t) == SDB_TYPE_DECIMAL) \ - ? "DECIMAL" \ - : ((t) == SDB_TYPE_STRING) \ - ? "STRING" \ - : ((t) == SDB_TYPE_DATETIME) \ - ? "DATETIME" \ - : ((t) == SDB_TYPE_BINARY) \ - ? "BINARY" \ - : "UNKNOWN") + (((t) == SDB_TYPE_INTEGER) ? "INTEGER" \ + : ((t) == SDB_TYPE_DECIMAL) ? "DECIMAL" \ + : ((t) == SDB_TYPE_STRING) ? "STRING" \ + : ((t) == SDB_TYPE_DATETIME) ? "DATETIME" \ + : ((t) == SDB_TYPE_BINARY) ? "BINARY" \ + : ((t) == SDB_TYPE_REGEX) ? "REGEX" : "UNKNOWN") /* * sdb_data_t: @@ -73,6 +72,10 @@ typedef struct { size_t length; unsigned char *datum; } binary; /* SDB_TYPE_BINARY */ + struct { + char *raw; + regex_t regex; + } re; /* SDB_TYPE_REGEX */ } data; } sdb_data_t; #define SDB_DATA_INIT { 0, { .integer = 0 } } @@ -226,7 +229,9 @@ sdb_data_format(const sdb_data_t *datum, char *buf, size_t buflen, int quoted); * specified as (floating point) number of seconds since the epoch. For string * and binary data, the input string is passed to the datum. The function does * not allocate new memory for that purpose. Use sdb_data_copy() if you want - * to do that. + * to do that. For regex data, the input string is copied to newly allocated + * memory and also compiled to a regex. Use sdb_data_free_datum() to free the + * dynamically allocated memory. * * Returns: * - 0 on success diff --git a/t/unit/core/data_test.c b/t/unit/core/data_test.c index 636fd13..742466e 100644 --- a/t/unit/core/data_test.c +++ b/t/unit/core/data_test.c @@ -28,8 +28,11 @@ #include "core/data.h" #include "libsysdb_test.h" +#include #include +static regex_t empty_re; + START_TEST(test_data) { sdb_data_t d1, d2; @@ -143,6 +146,41 @@ START_TEST(test_data) "sdb_data_free_datum() didn't reset binary datum length"); fail_unless(d1.data.binary.datum == NULL, "sdb_data_free_datum() didn't free binary datum"); + + check = sdb_data_parse(".", SDB_TYPE_REGEX, &d2); + fail_unless(check == 0, + "INTERNAL ERROR: Failed to parse regex '.'"); + assert(d2.type == SDB_TYPE_REGEX); + check = sdb_data_copy(&d1, &d2); + fail_unless(!check, "sdb_data_copy() = %i; expected: 0", check); + fail_unless(d1.type == d2.type, + "sdb_data_copy() didn't copy type; got: %i; expected: %i", + d1.type, d2.type); + fail_unless(d1.data.re.raw != d2.data.re.raw, + "sdb_data_copy() copy string pointer"); + fail_unless(!strcmp(d1.data.re.raw, d2.data.re.raw), + "sdb_data_copy() didn't copy raw regex: got: %s; expected: %s", + d1.data.re.raw, d2.data.re.raw); + sdb_data_free_datum(&d2); + + sdb_data_free_datum(&d1); + fail_unless(d1.data.re.raw == NULL, + "sdb_data_free_datum() didn't reset raw regex"); + + d2.type = SDB_TYPE_REGEX; + d2.data.re.raw = NULL; + check = sdb_data_copy(&d1, &d2); + fail_unless(!check, "sdb_data_copy() = %i; expected: 0", check); + fail_unless(d1.type == d2.type, + "sdb_data_copy() didn't copy type; got: %i; expected: %i", + d1.type, d2.type); + fail_unless(d1.data.re.raw == d2.data.re.raw, + "sdb_data_copy() didn't copy raw regex: got: %s; expected: %s", + d1.data.re.raw, d2.data.re.raw); + + sdb_data_free_datum(&d1); + fail_unless(d1.data.re.raw == NULL, + "sdb_data_free_datum() didn't reset raw regex"); } END_TEST @@ -308,6 +346,21 @@ START_TEST(test_cmp) }, 1, }, + { + { SDB_TYPE_REGEX, { .re = { "a", empty_re } } }, + { SDB_TYPE_REGEX, { .re = { "a", empty_re } } }, + 0, + }, + { + { SDB_TYPE_REGEX, { .re = { "a", empty_re } } }, + { SDB_TYPE_REGEX, { .re = { "b", empty_re } } }, + -1, + }, + { + { SDB_TYPE_REGEX, { .re = { "b", empty_re } } }, + { SDB_TYPE_REGEX, { .re = { "a", empty_re } } }, + 1, + }, }; size_t i; @@ -493,6 +546,21 @@ START_TEST(test_strcmp) }, 1, }, + { + { SDB_TYPE_REGEX, { .re = { "a", empty_re } } }, + { SDB_TYPE_REGEX, { .re = { "a", empty_re } } }, + 0, + }, + { + { SDB_TYPE_REGEX, { .re = { "a", empty_re } } }, + { SDB_TYPE_REGEX, { .re = { "b", empty_re } } }, + -1, + }, + { + { SDB_TYPE_REGEX, { .re = { "b", empty_re } } }, + { SDB_TYPE_REGEX, { .re = { "a", empty_re } } }, + 1, + }, /* type mismatches */ { { SDB_TYPE_INTEGER, { .integer = 123 } }, @@ -524,6 +592,11 @@ START_TEST(test_strcmp) { SDB_TYPE_STRING, { .string = "12.0" } }, 1, }, + { + { SDB_TYPE_REGEX, { .re = { "regex", empty_re } } }, + { SDB_TYPE_STRING, { .string = "/regex/" } }, + 0, + }, }; size_t i; @@ -675,6 +748,16 @@ START_TEST(test_expr_eval) { .binary = { 6, (unsigned char *)"a\0ab\0b" } }, }, }, + { + { SDB_TYPE_REGEX, { .re = { ".", empty_re } } }, + { SDB_TYPE_REGEX, { .re = { ".", empty_re } } }, + SDB_DATA_INIT, + SDB_DATA_INIT, + SDB_DATA_INIT, + SDB_DATA_INIT, + SDB_DATA_INIT, + SDB_DATA_INIT, + }, /* supported type-mismatches */ { /* int * datetime */ @@ -820,6 +903,10 @@ START_TEST(test_format) }, "\"\\x62\\x69\\x6e\\x61\\x72\\x79\\x0\\x63\\x72\\x61\\x70\\x42\"", }, + { + { SDB_TYPE_REGEX, { .re = { "some regex", empty_re } } }, + "\"/some regex/\"", + }, }; size_t i; @@ -862,18 +949,20 @@ START_TEST(test_parse) sdb_data_t result; int expected; } golden_data[] = { - { "4711", { SDB_TYPE_INTEGER, { .integer = 4711 } }, 0 }, - { "0x10", { SDB_TYPE_INTEGER, { .integer = 16 } }, 0 }, - { "010", { SDB_TYPE_INTEGER, { .integer = 8 } }, 0 }, - { "abc", { SDB_TYPE_INTEGER, { .integer = 0 } }, -1 }, - { "1.2", { SDB_TYPE_DECIMAL, { .decimal = 1.2 } }, 0 }, - { "0x1p+16", { SDB_TYPE_DECIMAL, { .decimal = 65536.0 } }, 0 }, - { "abc", { SDB_TYPE_DECIMAL, { .decimal = 0.0 } }, -1 }, - { "abc", { SDB_TYPE_STRING, { .string = "abc" } }, 0 }, - { ".4", { SDB_TYPE_DATETIME, { .datetime = 400000000 } }, 0 }, - { "abc", { SDB_TYPE_DATETIME, { .datetime = 0 } }, -1 }, + { "4711", { SDB_TYPE_INTEGER, { .integer = 4711 } }, 0 }, + { "0x10", { SDB_TYPE_INTEGER, { .integer = 16 } }, 0 }, + { "010", { SDB_TYPE_INTEGER, { .integer = 8 } }, 0 }, + { "abc", { SDB_TYPE_INTEGER, { .integer = 0 } }, -1 }, + { "1.2", { SDB_TYPE_DECIMAL, { .decimal = 1.2 } }, 0 }, + { "0x1p+16", { SDB_TYPE_DECIMAL, { .decimal = 65536.0 } }, 0 }, + { "abc", { SDB_TYPE_DECIMAL, { .decimal = 0.0 } }, -1 }, + { "abc", { SDB_TYPE_STRING, { .string = "abc" } }, 0 }, + { ".4", { SDB_TYPE_DATETIME, { .datetime = 400000000 } }, 0 }, + { "abc", { SDB_TYPE_DATETIME, { .datetime = 0 } }, -1 }, { "abc", { SDB_TYPE_BINARY, { .binary = { 3, (unsigned char *)"abc" } } }, 0 }, + { "abc", { SDB_TYPE_REGEX, { .re = { "abc", empty_re } } }, 0 }, + { "(|", { SDB_TYPE_REGEX, { .re = { "", empty_re } } }, -1 }, }; size_t i; @@ -904,6 +993,12 @@ START_TEST(test_parse) fail_unless(golden_data[i].input == (char *)result.data.binary.datum, "sdb_data_parse(%s, %d, ) modified input string", golden_data[i].input, type); + if (type == SDB_TYPE_REGEX) { + fail_unless(golden_data[i].input != result.data.re.raw, + "sdb_data_parse(%s, %d, ) copied input string", + golden_data[i].input, type); + sdb_data_free_datum(&result); + } } } END_TEST -- 2.30.2