Code

data: Added support for a "regex" data-type.
authorSebastian Harl <sh@tokkee.org>
Tue, 7 Oct 2014 19:41:54 +0000 (21:41 +0200)
committerSebastian Harl <sh@tokkee.org>
Tue, 7 Oct 2014 19:41:54 +0000 (21:41 +0200)
This type may be used to store the raw and compiled regex in a datum and use
regexes in all places in which a generic value may be used. It does not
support any arithmetic or concatenation operations.

src/core/data.c
src/include/core/data.h
t/unit/core/data_test.c

index 9031f03d8a88d4dbd4154c4f55d9400f427c1f30..2a8c439e08c7da72dca03ce9b28e0c59413e36b2 100644 (file)
@@ -256,6 +256,22 @@ sdb_data_copy(sdb_data_t *dst, const sdb_data_t *src)
                                                src->data.binary.length);
                        }
                        break;
+               case SDB_TYPE_REGEX:
+                       if (src->data.re.raw) {
+                               tmp.data.re.raw = strdup(src->data.re.raw);
+                               if (! tmp.data.re.raw)
+                                       return -1;
+                               /* we need to recompile because the regex might point to
+                                * dynamically allocated memory */
+                               if (regcomp(&tmp.data.re.regex, tmp.data.re.raw,
+                                                       REG_EXTENDED | REG_ICASE | REG_NOSUB)) {
+                                       free(tmp.data.re.raw);
+                                       return -1;
+                               }
+                       }
+                       else
+                               memset(&tmp.data.re.regex, 0, sizeof(tmp.data.re.regex));
+                       break;
        }
 
        sdb_data_free_datum(dst);
@@ -281,6 +297,14 @@ sdb_data_free_datum(sdb_data_t *datum)
                        datum->data.binary.datum = NULL;
                        datum->data.binary.length = 0;
                        break;
+               case SDB_TYPE_REGEX:
+                       if (datum->data.re.raw) {
+                               free(datum->data.re.raw);
+                               regfree(&datum->data.re.regex);
+                       }
+                       datum->data.re.raw = NULL;
+                       memset(&datum->data.re.regex, 0, sizeof(datum->data.re.regex));
+                       break;
        }
 } /* sdb_data_free_datum */
 
@@ -332,6 +356,9 @@ sdb_data_cmp(const sdb_data_t *d1, const sdb_data_t *d2)
 
                        return diff;
                }
+               case SDB_TYPE_REGEX:
+                       CMP_NULL(d1->data.re.raw, d2->data.re.raw);
+                       return strcmp(d1->data.re.raw, d2->data.re.raw);
        }
        return -1;
 } /* sdb_data_cmp */
@@ -367,6 +394,8 @@ sdb_data_isnull(const sdb_data_t *datum)
                return 1;
        if ((datum->type == SDB_TYPE_BINARY) && (! datum->data.binary.datum))
                return 1;
+       if ((datum->type == SDB_TYPE_REGEX) && (! datum->data.re.raw))
+               return 1;
        return 0;
 } /* sdb_data_isnull */
 
@@ -419,6 +448,11 @@ sdb_data_strlen(const sdb_data_t *datum)
                                return 8; /* "<NULL>" */
                        /* "\xNN" */
                        return 4 * datum->data.binary.length + 2;
+               case SDB_TYPE_REGEX:
+                       if (! datum->data.re.raw)
+                               return 8; /* "<NULL>" */
+                       /* "/.../" */
+                       return strlen(datum->data.re.raw) + 4;
        }
        return 0;
 } /* sdb_data_strlen */
@@ -493,6 +527,14 @@ sdb_data_format(const sdb_data_t *datum, char *buf, size_t buflen, int quoted)
                        else
                                data = "<NULL>";
                        break;
+               case SDB_TYPE_REGEX:
+                       if (! datum->data.re.raw)
+                               data = "<NULL>";
+                       else {
+                               snprintf(tmp, sizeof(tmp), "/%s/", datum->data.re.raw);
+                               data = tmp;
+                       }
+                       break;
        }
 
        if (data) {
@@ -536,6 +578,22 @@ sdb_data_parse(char *str, int type, sdb_data_t *data)
                        tmp.data.binary.length = strlen(str);
                        tmp.data.binary.datum = (unsigned char *)str;
                        break;
+               case SDB_TYPE_REGEX:
+                       tmp.data.re.raw = strdup(str);
+                       if (! tmp.data.re.raw)
+                               return -1;
+                       if (regcomp(&tmp.data.re.regex, str,
+                                               REG_EXTENDED | REG_ICASE | REG_NOSUB)) {
+                               free(tmp.data.re.raw);
+                               sdb_log(SDB_LOG_ERR, "core: Failed to compile regular "
+                                               "expression '%s'", str);
+                               return -1;
+                       }
+                       if (! data) {
+                               tmp.type = SDB_TYPE_REGEX;
+                               sdb_data_free_datum(&tmp);
+                       }
+                       break;
                default:
                        errno = EINVAL;
                        return -1;
index 1bd0d5354e652656b891d0178cdf5030bf4992ec..a81cea45b5b6e5f52d5c6ba671f5b2ae42aa1e3c 100644 (file)
@@ -33,6 +33,9 @@
 #include <inttypes.h>
 #include <stddef.h>
 
+#include <sys/types.h>
+#include <regex.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -43,20 +46,16 @@ enum {
        SDB_TYPE_STRING,
        SDB_TYPE_DATETIME,
        SDB_TYPE_BINARY,
+       SDB_TYPE_REGEX,
 };
 
 #define SDB_TYPE_TO_STRING(t) \
-       (((t) == SDB_TYPE_INTEGER) \
-               ? "INTEGER" \
-               : ((t) == SDB_TYPE_DECIMAL) \
-                       ? "DECIMAL" \
-                       : ((t) == SDB_TYPE_STRING) \
-                               ? "STRING" \
-                               : ((t) == SDB_TYPE_DATETIME) \
-                                       ? "DATETIME" \
-                                       : ((t) == SDB_TYPE_BINARY) \
-                                               ? "BINARY" \
-                                               : "UNKNOWN")
+       (((t) == SDB_TYPE_INTEGER) ? "INTEGER" \
+               : ((t) == SDB_TYPE_DECIMAL) ? "DECIMAL" \
+               : ((t) == SDB_TYPE_STRING) ? "STRING" \
+               : ((t) == SDB_TYPE_DATETIME) ? "DATETIME" \
+               : ((t) == SDB_TYPE_BINARY) ? "BINARY" \
+               : ((t) == SDB_TYPE_REGEX) ? "REGEX" : "UNKNOWN")
 
 /*
  * sdb_data_t:
@@ -73,6 +72,10 @@ typedef struct {
                        size_t length;
                        unsigned char *datum;
                } binary;             /* SDB_TYPE_BINARY */
+               struct {
+                       char *raw;
+                       regex_t regex;
+               } re;                 /* SDB_TYPE_REGEX */
        } data;
 } sdb_data_t;
 #define SDB_DATA_INIT { 0, { .integer = 0 } }
@@ -226,7 +229,9 @@ sdb_data_format(const sdb_data_t *datum, char *buf, size_t buflen, int quoted);
  * specified as (floating point) number of seconds since the epoch. For string
  * and binary data, the input string is passed to the datum. The function does
  * not allocate new memory for that purpose. Use sdb_data_copy() if you want
- * to do that.
+ * to do that. For regex data, the input string is copied to newly allocated
+ * memory and also compiled to a regex. Use sdb_data_free_datum() to free the
+ * dynamically allocated memory.
  *
  * Returns:
  *  - 0 on success
index 636fd13e7523dd403c1ed84f169d494ff0f2cdad..742466eec0008c599824de8c49bb37aacefc3d65 100644 (file)
 #include "core/data.h"
 #include "libsysdb_test.h"
 
+#include <assert.h>
 #include <check.h>
 
+static regex_t empty_re;
+
 START_TEST(test_data)
 {
        sdb_data_t d1, d2;
@@ -143,6 +146,41 @@ START_TEST(test_data)
                        "sdb_data_free_datum() didn't reset binary datum length");
        fail_unless(d1.data.binary.datum == NULL,
                        "sdb_data_free_datum() didn't free binary datum");
+
+       check = sdb_data_parse(".", SDB_TYPE_REGEX, &d2);
+       fail_unless(check == 0,
+                       "INTERNAL ERROR: Failed to parse regex '.'");
+       assert(d2.type == SDB_TYPE_REGEX);
+       check = sdb_data_copy(&d1, &d2);
+       fail_unless(!check, "sdb_data_copy() = %i; expected: 0", check);
+       fail_unless(d1.type == d2.type,
+                       "sdb_data_copy() didn't copy type; got: %i; expected: %i",
+                       d1.type, d2.type);
+       fail_unless(d1.data.re.raw != d2.data.re.raw,
+                       "sdb_data_copy() copy string pointer");
+       fail_unless(!strcmp(d1.data.re.raw, d2.data.re.raw),
+                       "sdb_data_copy() didn't copy raw regex: got: %s; expected: %s",
+                       d1.data.re.raw, d2.data.re.raw);
+       sdb_data_free_datum(&d2);
+
+       sdb_data_free_datum(&d1);
+       fail_unless(d1.data.re.raw == NULL,
+                       "sdb_data_free_datum() didn't reset raw regex");
+
+       d2.type = SDB_TYPE_REGEX;
+       d2.data.re.raw = NULL;
+       check = sdb_data_copy(&d1, &d2);
+       fail_unless(!check, "sdb_data_copy() = %i; expected: 0", check);
+       fail_unless(d1.type == d2.type,
+                       "sdb_data_copy() didn't copy type; got: %i; expected: %i",
+                       d1.type, d2.type);
+       fail_unless(d1.data.re.raw == d2.data.re.raw,
+                       "sdb_data_copy() didn't copy raw regex: got: %s; expected: %s",
+                       d1.data.re.raw, d2.data.re.raw);
+
+       sdb_data_free_datum(&d1);
+       fail_unless(d1.data.re.raw == NULL,
+                       "sdb_data_free_datum() didn't reset raw regex");
 }
 END_TEST
 
@@ -308,6 +346,21 @@ START_TEST(test_cmp)
                        },
                        1,
                },
+               {
+                       { SDB_TYPE_REGEX, { .re = { "a", empty_re } } },
+                       { SDB_TYPE_REGEX, { .re = { "a", empty_re } } },
+                       0,
+               },
+               {
+                       { SDB_TYPE_REGEX, { .re = { "a", empty_re } } },
+                       { SDB_TYPE_REGEX, { .re = { "b", empty_re } } },
+                       -1,
+               },
+               {
+                       { SDB_TYPE_REGEX, { .re = { "b", empty_re } } },
+                       { SDB_TYPE_REGEX, { .re = { "a", empty_re } } },
+                       1,
+               },
        };
 
        size_t i;
@@ -493,6 +546,21 @@ START_TEST(test_strcmp)
                        },
                        1,
                },
+               {
+                       { SDB_TYPE_REGEX, { .re = { "a", empty_re } } },
+                       { SDB_TYPE_REGEX, { .re = { "a", empty_re } } },
+                       0,
+               },
+               {
+                       { SDB_TYPE_REGEX, { .re = { "a", empty_re } } },
+                       { SDB_TYPE_REGEX, { .re = { "b", empty_re } } },
+                       -1,
+               },
+               {
+                       { SDB_TYPE_REGEX, { .re = { "b", empty_re } } },
+                       { SDB_TYPE_REGEX, { .re = { "a", empty_re } } },
+                       1,
+               },
                /* type mismatches */
                {
                        { SDB_TYPE_INTEGER, { .integer = 123 } },
@@ -524,6 +592,11 @@ START_TEST(test_strcmp)
                        { SDB_TYPE_STRING, { .string = "12.0" } },
                        1,
                },
+               {
+                       { SDB_TYPE_REGEX, { .re = { "regex", empty_re } } },
+                       { SDB_TYPE_STRING, { .string = "/regex/" } },
+                       0,
+               },
        };
 
        size_t i;
@@ -675,6 +748,16 @@ START_TEST(test_expr_eval)
                                { .binary = { 6, (unsigned char *)"a\0ab\0b" } },
                        },
                },
+               {
+                       { SDB_TYPE_REGEX, { .re = { ".", empty_re } } },
+                       { SDB_TYPE_REGEX, { .re = { ".", empty_re } } },
+                       SDB_DATA_INIT,
+                       SDB_DATA_INIT,
+                       SDB_DATA_INIT,
+                       SDB_DATA_INIT,
+                       SDB_DATA_INIT,
+                       SDB_DATA_INIT,
+               },
                /* supported type-mismatches */
                {
                        /* int * datetime */
@@ -820,6 +903,10 @@ START_TEST(test_format)
                        },
                        "\"\\x62\\x69\\x6e\\x61\\x72\\x79\\x0\\x63\\x72\\x61\\x70\\x42\"",
                },
+               {
+                       { SDB_TYPE_REGEX, { .re = { "some regex", empty_re } } },
+                       "\"/some regex/\"",
+               },
        };
 
        size_t i;
@@ -862,18 +949,20 @@ START_TEST(test_parse)
                sdb_data_t result;
                int expected;
        } golden_data[] = {
-               { "4711",    { SDB_TYPE_INTEGER,  { .integer  = 4711 } },       0 },
-               { "0x10",    { SDB_TYPE_INTEGER,  { .integer  = 16 } },         0 },
-               { "010",     { SDB_TYPE_INTEGER,  { .integer  = 8 } },          0 },
-               { "abc",     { SDB_TYPE_INTEGER,  { .integer  = 0 } },         -1 },
-               { "1.2",     { SDB_TYPE_DECIMAL,  { .decimal  = 1.2 } },        0 },
-               { "0x1p+16", { SDB_TYPE_DECIMAL,  { .decimal  = 65536.0 } },    0 },
-               { "abc",     { SDB_TYPE_DECIMAL,  { .decimal  = 0.0 } },       -1 },
-               { "abc",     { SDB_TYPE_STRING,   { .string   = "abc" } },      0 },
-               { ".4",      { SDB_TYPE_DATETIME, { .datetime = 400000000 } },  0 },
-               { "abc",     { SDB_TYPE_DATETIME, { .datetime = 0 } },         -1 },
+               { "4711",    { SDB_TYPE_INTEGER,  { .integer  = 4711 } },          0 },
+               { "0x10",    { SDB_TYPE_INTEGER,  { .integer  = 16 } },            0 },
+               { "010",     { SDB_TYPE_INTEGER,  { .integer  = 8 } },             0 },
+               { "abc",     { SDB_TYPE_INTEGER,  { .integer  = 0 } },            -1 },
+               { "1.2",     { SDB_TYPE_DECIMAL,  { .decimal  = 1.2 } },           0 },
+               { "0x1p+16", { SDB_TYPE_DECIMAL,  { .decimal  = 65536.0 } },       0 },
+               { "abc",     { SDB_TYPE_DECIMAL,  { .decimal  = 0.0 } },          -1 },
+               { "abc",     { SDB_TYPE_STRING,   { .string   = "abc" } },         0 },
+               { ".4",      { SDB_TYPE_DATETIME, { .datetime = 400000000 } },     0 },
+               { "abc",     { SDB_TYPE_DATETIME, { .datetime = 0 } },            -1 },
                { "abc",     { SDB_TYPE_BINARY,
                                         { .binary = { 3, (unsigned char *)"abc" } } }, 0 },
+               { "abc",     { SDB_TYPE_REGEX,    { .re = { "abc", empty_re } } }, 0 },
+               { "(|",      { SDB_TYPE_REGEX,    { .re = { "", empty_re } } },   -1 },
        };
 
        size_t i;
@@ -904,6 +993,12 @@ START_TEST(test_parse)
                        fail_unless(golden_data[i].input == (char *)result.data.binary.datum,
                                        "sdb_data_parse(%s, %d, <d>) modified input string",
                                        golden_data[i].input, type);
+               if (type == SDB_TYPE_REGEX) {
+                       fail_unless(golden_data[i].input != result.data.re.raw,
+                                       "sdb_data_parse(%s, %d, <d>) copied input string",
+                                       golden_data[i].input, type);
+                       sdb_data_free_datum(&result);
+               }
        }
 }
 END_TEST