Code

Allow and implement iterating arbitrary (non-constant) array values.
[sysdb.git] / src / parser / analyzer.c
index 244966b9a651b4ad9c72a67fbedb5375d2ea7352..ecae40f890c2842d82f92d701ab0a8e3137025f9 100644 (file)
 #include "utils/strbuf.h"
 
 #include <assert.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <string.h>
 
 #define VALID_OBJ_TYPE(t) ((SDB_HOST <= (t)) && ((t) <= SDB_METRIC))
 
+typedef struct {
+       int type;
+       bool iter;
+} context_t;
+
+#define FILTER_CONTEXT -1
+static const context_t FILTER_CTX = { FILTER_CONTEXT, 0 };
+
+static int
+analyze_node(context_t ctx, sdb_ast_node_t *node, sdb_strbuf_t *errbuf);
+
+/*
+ * error reporting
+ */
+
+static void
+op_error(sdb_strbuf_t *errbuf, sdb_ast_op_t *op, const char *reason)
+{
+       sdb_strbuf_sprintf(errbuf, "Invalid operation %s %s %s (%s)",
+                       SDB_TYPE_TO_STRING(op->left->data_type),
+                       SDB_AST_OP_TO_STRING(op->kind),
+                       SDB_TYPE_TO_STRING(op->right->data_type),
+                       reason);
+} /* op_error */
+
+static void
+__attribute__((format(printf, 3, 4)))
+iter_error(sdb_strbuf_t *errbuf, sdb_ast_iter_t *iter, const char *reason, ...)
+{
+       char r[1024];
+       va_list ap;
+
+       va_start(ap, reason);
+       vsnprintf(r, sizeof(r), reason, ap);
+       va_end(ap);
+
+       assert((iter->expr->type == SDB_AST_TYPE_OPERATOR)
+                       && (! SDB_AST_OP(iter->expr)->left));
+       sdb_strbuf_sprintf(errbuf, "Invalid iterator %s %s %s %s (%s)",
+                       SDB_AST_OP_TO_STRING(iter->kind),
+                       SDB_TYPE_TO_STRING(iter->iter->data_type),
+                       SDB_AST_OP_TO_STRING(SDB_AST_OP(iter->expr)->kind),
+                       SDB_TYPE_TO_STRING(SDB_AST_OP(iter->expr)->right->data_type),
+                       r);
+} /* iter_error */
+
 /*
- * private helper functions
+ * expression nodes
  */
 
 static int
-analyze_node(int context, sdb_ast_node_t *node, sdb_strbuf_t *errbuf)
+analyze_logical(context_t ctx, sdb_ast_op_t *op, sdb_strbuf_t *errbuf)
+{
+       if (ctx.iter) {
+               op_error(errbuf, op, "cannot evaluate in iterator context");
+               return -1;
+       }
+
+       switch (op->kind) {
+       case SDB_AST_OR:
+       case SDB_AST_AND:
+               if (! SDB_AST_IS_LOGICAL(op->left)) {
+                       sdb_strbuf_sprintf(errbuf, "Invalid left operand (%s) "
+                                       "in %s expression", SDB_AST_TYPE_TO_STRING(op->left),
+                                       SDB_AST_OP_TO_STRING(op->kind));
+                       return -1;
+               }
+               if (analyze_node(ctx, op->left, errbuf))
+                       return -1;
+               /* fallthrough */
+       case SDB_AST_NOT:
+               if (! SDB_AST_IS_LOGICAL(op->right)) {
+                       sdb_strbuf_sprintf(errbuf, "Invalid right operand (%s) "
+                                       "in %s expression", SDB_AST_TYPE_TO_STRING(op->right),
+                                       SDB_AST_OP_TO_STRING(op->kind));
+                       return -1;
+               }
+               if (analyze_node(ctx, op->right, errbuf))
+                       return -1;
+               break;
+
+       case SDB_AST_LT:
+       case SDB_AST_LE:
+       case SDB_AST_EQ:
+       case SDB_AST_NE:
+       case SDB_AST_GE:
+       case SDB_AST_GT:
+       {
+               if (analyze_node(ctx, op->left, errbuf))
+                       return -1;
+               if (analyze_node(ctx, op->right, errbuf))
+                       return -1;
+
+               if ((op->left->data_type > 0) && (op->right->data_type > 0)) {
+                       if (op->left->data_type == op->right->data_type)
+                               return 0;
+                       op_error(errbuf, op, "type mismatch");
+                       return -1;
+               }
+               if ((op->left->data_type > 0) && (op->left->data_type & SDB_TYPE_ARRAY)) {
+                       op_error(errbuf, op, "array not allowed");
+                       return -1;
+               }
+               if ((op->right->data_type > 0) && (op->right->data_type & SDB_TYPE_ARRAY)) {
+                       op_error(errbuf, op, "array not allowed");
+                       return -1;
+               }
+               break;
+       }
+
+       case SDB_AST_REGEX:
+       case SDB_AST_NREGEX:
+               if (analyze_node(ctx, op->left, errbuf))
+                       return -1;
+               if (analyze_node(ctx, op->right, errbuf))
+                       return -1;
+
+               /* all types are supported for the left operand
+                * TODO: introduce a cast operator if it's not a string */
+               if ((op->right->data_type > 0)
+                               && (op->right->data_type != SDB_TYPE_REGEX)
+                               && (op->right->data_type != SDB_TYPE_STRING)) {
+                       op_error(errbuf, op, "invalid regex");
+                       return -1;
+               }
+               break;
+
+       case SDB_AST_ISNULL:
+       case SDB_AST_ISTRUE:
+       case SDB_AST_ISFALSE:
+               if (analyze_node(ctx, op->right, errbuf))
+                       return -1;
+               break;
+
+       case SDB_AST_IN:
+               if (analyze_node(ctx, op->left, errbuf))
+                       return -1;
+               if (analyze_node(ctx, op->right, errbuf))
+                       return -1;
+
+               if ((op->right->data_type > 0) && (! (op->right->data_type & SDB_TYPE_ARRAY))) {
+                       op_error(errbuf, op, "array expected");
+                       return -1;
+               }
+               /* the left operand may be a scalar or an array but the element
+                * type has to match */
+               if ((op->left->data_type > 0) && (op->right->data_type > 0)
+                               && ((op->left->data_type & 0xff) != (op->right->data_type & 0xff))) {
+                       op_error(errbuf, op, "type mismatch");
+                       return -1;
+               }
+               break;
+
+       default:
+               sdb_strbuf_sprintf(errbuf, "Unknown operand type %d", op->kind);
+               return -1;
+       }
+       return 0;
+} /* analyze_logical */
+
+static int
+analyze_arith(context_t ctx, sdb_ast_op_t *op, sdb_strbuf_t *errbuf)
+{
+       if (analyze_node(ctx, op->left, errbuf))
+               return -1;
+       if (analyze_node(ctx, op->right, errbuf))
+               return -1;
+       SDB_AST_NODE(op)->data_type = sdb_data_expr_type(SDB_AST_OP_TO_DATA_OP(op->kind),
+                       op->left->data_type, op->right->data_type);
+
+       if ((op->left->data_type > 0) && (op->right->data_type > 0)
+                       && (SDB_AST_NODE(op)->data_type <= 0)) {
+               op_error(errbuf, op, "type mismatch");
+               return -1;
+       }
+
+       /* TODO: replace constant arithmetic operations with a constant value */
+       return 0;
+} /* analyze_arith */
+
+static int
+analyze_iter(context_t ctx, sdb_ast_iter_t *iter, sdb_strbuf_t *errbuf)
+{
+       sdb_ast_const_t c = SDB_AST_CONST_INIT;
+       context_t iter_ctx = ctx;
+       int status;
+
+       if (ctx.iter) {
+               iter_error(errbuf, iter, "nested iterators are not supported");
+               return -1;
+       }
+
+       iter_ctx.iter = 1;
+       if (analyze_node(iter_ctx, iter->iter, errbuf))
+               return -1;
+
+       if (iter->iter->data_type > 0) {
+               if (! (iter->iter->data_type & SDB_TYPE_ARRAY)) {
+                       iter_error(errbuf, iter, "cannot iterate values of type %s",
+                                       SDB_TYPE_TO_STRING(iter->iter->data_type));
+                       return -1;
+               }
+               c.value.type = iter->iter->data_type & 0xff;
+       }
+
+       /* TODO: support other setups as well */
+       assert((iter->expr->type == SDB_AST_TYPE_OPERATOR)
+                       && (! SDB_AST_OP(iter->expr)->left));
+
+       SDB_AST_OP(iter->expr)->left = SDB_AST_NODE(&c);
+       status = analyze_node(ctx, iter->expr, errbuf);
+       SDB_AST_OP(iter->expr)->left = NULL;
+       if (status)
+               return -1;
+       return 0;
+} /* analyze_iter */
+
+static int
+analyze_const(context_t __attribute__((unused)) ctx, sdb_ast_const_t *c,
+               sdb_strbuf_t __attribute__((unused)) *errbuf)
+{
+       SDB_AST_NODE(c)->data_type = c->value.type;
+       return 0;
+} /* analyze_const */
+
+static int
+analyze_value(context_t ctx, sdb_ast_value_t *v, sdb_strbuf_t *errbuf)
+{
+       if (v->type != SDB_ATTRIBUTE)
+               SDB_AST_NODE(v)->data_type = SDB_FIELD_TYPE(v->type);
+
+       if ((v->type != SDB_ATTRIBUTE) && v->name) {
+               sdb_strbuf_sprintf(errbuf, "Invalid expression %s[%s]",
+                               SDB_FIELD_TO_NAME(v->type), v->name);
+               return -1;
+       }
+       else if ((v->type == SDB_ATTRIBUTE) && (! v->name)) {
+               sdb_strbuf_sprintf(errbuf, "Invalid expression attribute[] "
+                               "(missing name)");
+               return -1;
+       }
+
+       /* this would be caught by the type check in analyze_iter but we're able
+        * to provide a more specific error message here */
+       if (ctx.iter && (v->type != SDB_FIELD_BACKEND)) {
+               /* only backend values are iterable */
+               char value_str[64 + (v->name ? strlen(v->name) : 0)];
+               if (v->type == SDB_ATTRIBUTE)
+                       snprintf(value_str, sizeof(value_str), "attribute[%s]", v->name);
+               else
+                       snprintf(value_str, sizeof(value_str), "'%s'", SDB_FIELD_TO_NAME(v->type));
+               sdb_strbuf_sprintf(errbuf, "Cannot iterate %s (scalar value)", value_str);
+               return -1;
+       }
+
+       if ((ctx.type != SDB_ATTRIBUTE) && (v->type == SDB_FIELD_VALUE)) {
+               sdb_strbuf_sprintf(errbuf, "Invalid expression %s.value",
+                               SDB_FIELD_TO_NAME(ctx.type));
+               return -1;
+       }
+       if ((ctx.type != SDB_METRIC) && (v->type == SDB_FIELD_TIMESERIES)) {
+               sdb_strbuf_sprintf(errbuf, "Invalid expression %s.timeseries",
+                               SDB_FIELD_TO_NAME(ctx.type));
+               return -1;
+       }
+       return 0;
+} /* analyze_value */
+
+static int
+analyze_typed(context_t ctx, sdb_ast_typed_t *t, sdb_strbuf_t *errbuf)
+{
+       context_t child_ctx = ctx;
+       bool needs_iter = 0;
+       bool valid = 1;
+
+       if ((t->expr->type != SDB_AST_TYPE_VALUE)
+                       && (t->expr->type != SDB_AST_TYPE_TYPED)) {
+               sdb_strbuf_sprintf(errbuf, "Invalid expression %s.%s",
+                               SDB_STORE_TYPE_TO_NAME(t->type),
+                               SDB_AST_TYPE_TO_STRING(t->expr));
+               return -1;
+       }
+       if ((t->type != SDB_ATTRIBUTE) && (! VALID_OBJ_TYPE(t->type))) {
+               sdb_strbuf_sprintf(errbuf, "Invalid expression %#x.%s",
+                               t->type, SDB_AST_TYPE_TO_STRING(t->expr));
+               return -1;
+       }
+
+       if (ctx.type > 0) {
+               if ((ctx.type == t->type)
+                               || ((t->type == SDB_HOST) && (ctx.type != SDB_ATTRIBUTE))) {
+                       /* self-references and references to the parent host are always fine */
+               }
+               else if (t->type == SDB_ATTRIBUTE) {
+                       /* references to attributes are always fine */
+                       needs_iter = 1;
+               }
+               else if ((ctx.type == SDB_HOST)
+                               && ((t->type == SDB_SERVICE) || (t->type == SDB_METRIC))) {
+                       /* only hosts may reference services and metrics */
+                       needs_iter = 1;
+               }
+               else {
+                       valid = 0;
+               }
+       }
+       else if (ctx.type == FILTER_CONTEXT) {
+               if (t->type == SDB_ATTRIBUTE) {
+                       /* all objects have attributes */
+                       needs_iter = 1;
+               }
+               else if ((t->type == SDB_SERVICE) || (t->type == SDB_METRIC)) {
+                       /* these will be iterators for *some* operations;
+                        * better forbid this altogether */
+                       valid = 0;
+               }
+       }
+
+       if (needs_iter) {
+               if (! ctx.iter)
+                       valid = 0;
+               else
+                       child_ctx.iter = 0;
+       } /* else: push ctx.iter down to the child node */
+
+       if (! valid) {
+               sdb_strbuf_sprintf(errbuf, "Invalid expression %s.%s in %s context",
+                               SDB_STORE_TYPE_TO_NAME(t->type),
+                               SDB_AST_TYPE_TO_STRING(t->expr),
+                               SDB_STORE_TYPE_TO_NAME(ctx.type));
+               return -1;
+       }
+
+       child_ctx.type = t->type;
+       if (analyze_node(child_ctx, t->expr, errbuf))
+               return -1;
+       SDB_AST_NODE(t)->data_type = t->expr->data_type;
+
+       if (needs_iter && (SDB_AST_NODE(t)->data_type > 0)) {
+               if (SDB_AST_NODE(t)->data_type & SDB_TYPE_ARRAY) {
+                       sdb_strbuf_sprintf(errbuf, "Cannot access array inside iterator");
+                       return -1;
+               }
+               /* Tell the caller that we're accessing an iterator. */
+               SDB_AST_NODE(t)->data_type |= SDB_TYPE_ARRAY;
+       }
+       return 0;
+} /* analyze_typed */
+
+static int
+analyze_node(context_t ctx, sdb_ast_node_t *node, sdb_strbuf_t *errbuf)
 {
-       (void)context;
        if (! node) {
                sdb_strbuf_sprintf(errbuf, "Empty AST node");
                return -1;
        }
-       return 0;
+
+       /* unknown by default */
+       node->data_type = -1;
+
+       if ((node->type == SDB_AST_TYPE_OPERATOR)
+                       && (SDB_AST_IS_LOGICAL(node)))
+               return analyze_logical(ctx, SDB_AST_OP(node), errbuf);
+       else if ((node->type == SDB_AST_TYPE_OPERATOR)
+                       && (SDB_AST_IS_ARITHMETIC(node)))
+               return analyze_arith(ctx, SDB_AST_OP(node), errbuf);
+       else if (node->type == SDB_AST_TYPE_ITERATOR)
+               return analyze_iter(ctx, SDB_AST_ITER(node), errbuf);
+       else if (node->type == SDB_AST_TYPE_CONST)
+               return analyze_const(ctx, SDB_AST_CONST(node), errbuf);
+       else if (node->type == SDB_AST_TYPE_VALUE)
+               return analyze_value(ctx, SDB_AST_VALUE(node), errbuf);
+       else if (node->type == SDB_AST_TYPE_TYPED)
+               return analyze_typed(ctx, SDB_AST_TYPED(node), errbuf);
+
+       sdb_strbuf_sprintf(errbuf, "Invalid expression node "
+                       "of type %#x", node->type);
+       return -1;
 } /* analyze_node */
 
 /*
@@ -82,9 +450,9 @@ analyze_fetch(sdb_ast_fetch_t *fetch, sdb_strbuf_t *errbuf)
        }
 
        if (fetch->filter)
-               return analyze_node(-1, fetch->filter, errbuf);
+               return analyze_node(FILTER_CTX, fetch->filter, errbuf);
        return 0;
-}
+} /* analyze_fetch */
 
 static int
 analyze_list(sdb_ast_list_t *list, sdb_strbuf_t *errbuf)
@@ -95,9 +463,9 @@ analyze_list(sdb_ast_list_t *list, sdb_strbuf_t *errbuf)
                return -1;
        }
        if (list->filter)
-               return analyze_node(-1, list->filter, errbuf);
+               return analyze_node(FILTER_CTX, list->filter, errbuf);
        return 0;
-}
+} /* analyze_list */
 
 static int
 analyze_lookup(sdb_ast_lookup_t *lookup, sdb_strbuf_t *errbuf)
@@ -107,13 +475,15 @@ analyze_lookup(sdb_ast_lookup_t *lookup, sdb_strbuf_t *errbuf)
                                "in LOOKUP command", lookup->obj_type);
                return -1;
        }
-       if (lookup->matcher)
-               if (analyze_node(lookup->obj_type, lookup->matcher, errbuf))
+       if (lookup->matcher) {
+               context_t ctx = { lookup->obj_type, 0 };
+               if (analyze_node(ctx, lookup->matcher, errbuf))
                        return -1;
+       }
        if (lookup->filter)
-               return analyze_node(-1, lookup->filter, errbuf);
+               return analyze_node(FILTER_CTX, lookup->filter, errbuf);
        return 0;
-}
+} /* analyze_lookup */
 
 static int
 analyze_store(sdb_ast_store_t *st, sdb_strbuf_t *errbuf)
@@ -201,29 +571,29 @@ analyze_store(sdb_ast_store_t *st, sdb_strbuf_t *errbuf)
                return -1;
        }
        return 0;
-}
+} /* analyze_store */
 
 static int
 analyze_timeseries(sdb_ast_timeseries_t *ts, sdb_strbuf_t *errbuf)
 {
        if (! ts->hostname) {
-               sdb_strbuf_sprintf(errbuf, "Missing hostname in STORE command");
+               sdb_strbuf_sprintf(errbuf, "Missing hostname in TIMESERIES command");
                return -1;
        }
        if (! ts->metric) {
-               sdb_strbuf_sprintf(errbuf, "Missing metric name in STORE command");
+               sdb_strbuf_sprintf(errbuf, "Missing metric name in TIMESERIES command");
                return -1;
        }
        if (ts->end <= ts->start) {
                char start_str[64], end_str[64];
-               sdb_strftime(start_str, sizeof(start_str), "%F %T Tz", ts->start);
-               sdb_strftime(end_str, sizeof(end_str), "%F %T Tz", ts->end);
+               sdb_strftime(start_str, sizeof(start_str), ts->start);
+               sdb_strftime(end_str, sizeof(end_str), ts->end);
                sdb_strbuf_sprintf(errbuf, "Start time (%s) greater than "
-                               "end time (%s) in STORE command", start_str, end_str);
+                               "end time (%s) in TIMESERIES command", start_str, end_str);
                return -1;
        }
        return 0;
-}
+} /* analyze_timeseries */
 
 /*
  * public API
@@ -237,6 +607,9 @@ sdb_parser_analyze(sdb_ast_node_t *node, sdb_strbuf_t *errbuf)
                return -1;
        }
 
+       /* top-level nodes don't have a type */
+       node->data_type = -1;
+
        if (node->type == SDB_AST_TYPE_FETCH)
                return analyze_fetch(SDB_AST_FETCH(node), errbuf);
        else if (node->type == SDB_AST_TYPE_LIST)
@@ -251,7 +624,49 @@ sdb_parser_analyze(sdb_ast_node_t *node, sdb_strbuf_t *errbuf)
        sdb_strbuf_sprintf(errbuf, "Invalid top-level AST node "
                        "of type %#x", node->type);
        return -1;
-} /* sdb_fe_analyze */
+} /* sdb_parser_analyze */
+
+int
+sdb_parser_analyze_conditional(int context,
+               sdb_ast_node_t *node, sdb_strbuf_t *errbuf)
+{
+       context_t ctx = { context, 0 };
+       if (! VALID_OBJ_TYPE(context)) {
+               sdb_strbuf_sprintf(errbuf, "Invalid object type %#x", context);
+               return -1;
+       }
+       if (! node) {
+               sdb_strbuf_sprintf(errbuf, "Empty conditional node");
+               return -1;
+       }
+       if (! SDB_AST_IS_LOGICAL(node)) {
+               sdb_strbuf_sprintf(errbuf, "Not a conditional node (got %s)",
+                               SDB_AST_TYPE_TO_STRING(node));
+               return -1;
+       }
+       return analyze_node(ctx, node, errbuf);
+} /* sdb_parser_analyze_conditional */
+
+int
+sdb_parser_analyze_arith(int context,
+               sdb_ast_node_t *node, sdb_strbuf_t *errbuf)
+{
+       context_t ctx = { context, 0 };
+       if (! VALID_OBJ_TYPE(context)) {
+               sdb_strbuf_sprintf(errbuf, "Invalid object type %#x", context);
+               return -1;
+       }
+       if (! node) {
+               sdb_strbuf_sprintf(errbuf, "Empty arithmetic node");
+               return -1;
+       }
+       if (! SDB_AST_IS_ARITHMETIC(node)) {
+               sdb_strbuf_sprintf(errbuf, "Not an arithmetic node (got %s)",
+                               SDB_AST_TYPE_TO_STRING(node));
+               return -1;
+       }
+       return analyze_node(ctx, node, errbuf);
+} /* sdb_parser_analyze_arith */
 
 /* vim: set tw=78 sw=4 ts=4 noexpandtab : */