From: Jonathan Nieder Date: Thu, 2 Feb 2012 11:03:16 +0000 (-0600) Subject: vcs-svn: allow import of > 4GiB files X-Git-Tag: v1.7.10-rc0~118^2~1 X-Git-Url: https://git.tokkee.org/?a=commitdiff_plain;h=150f75467cdd6eaf581d22175bb377399c62893a;p=git.git vcs-svn: allow import of > 4GiB files There is no reason in principle that an svn-format dump would not be able to represent a file whose length does not fit in a 32-bit integer. Use off_t consistently to represent file lengths (in place of using uint32_t in some contexts) so we can handle that. Most svn-fe code is already ready to do that without this patch and passes values of type off_t around. The type mismatch from stragglers was noticed with gcc -Wtype-limits. While at it, tighten the parsing of the Text-content-length field to make sure it is a number and does not overflow, and tighten other overflow checks as that value is passed around and manipulated. Inspired-by: Ramsay Jones Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c index 19d7c34c2..b823b8519 100644 --- a/vcs-svn/fast_export.c +++ b/vcs-svn/fast_export.c @@ -227,15 +227,18 @@ static long apply_delta(off_t len, struct line_buffer *input, return ret; } -void fast_export_data(uint32_t mode, uint32_t len, struct line_buffer *input) +void fast_export_data(uint32_t mode, off_t len, struct line_buffer *input) { + assert(len >= 0); if (mode == REPO_MODE_LNK) { /* svn symlink blobs start with "link " */ + if (len < 5) + die("invalid dump: symlink too short for \"link\" prefix"); len -= 5; if (buffer_skip_bytes(input, 5) != 5) die_short_read(input); } - printf("data %"PRIu32"\n", len); + printf("data %"PRIuMAX"\n", (uintmax_t) len); if (buffer_copy_bytes(input, len) != len) die_short_read(input); fputc('\n', stdout); @@ -297,12 +300,12 @@ int fast_export_ls(const char *path, uint32_t *mode, struct strbuf *dataref) void fast_export_blob_delta(uint32_t mode, uint32_t old_mode, const char *old_data, - uint32_t len, struct line_buffer *input) + off_t len, struct line_buffer *input) { long postimage_len; - if (len > maximum_signed_value_of_type(off_t)) - die("enormous delta"); - postimage_len = apply_delta((off_t) len, input, old_data, old_mode); + + assert(len >= 0); + postimage_len = apply_delta(len, input, old_data, old_mode); if (mode == REPO_MODE_LNK) { buffer_skip_bytes(&postimage, strlen("link ")); postimage_len -= strlen("link "); diff --git a/vcs-svn/fast_export.h b/vcs-svn/fast_export.h index 43d05b65e..aa629f54f 100644 --- a/vcs-svn/fast_export.h +++ b/vcs-svn/fast_export.h @@ -14,10 +14,10 @@ void fast_export_begin_commit(uint32_t revision, const char *author, const struct strbuf *log, const char *uuid, const char *url, unsigned long timestamp); void fast_export_end_commit(uint32_t revision); -void fast_export_data(uint32_t mode, uint32_t len, struct line_buffer *input); +void fast_export_data(uint32_t mode, off_t len, struct line_buffer *input); void fast_export_blob_delta(uint32_t mode, uint32_t old_mode, const char *old_data, - uint32_t len, struct line_buffer *input); + off_t len, struct line_buffer *input); /* If there is no such file at that rev, returns -1, errno == ENOENT. */ int fast_export_ls_rev(uint32_t rev, const char *path, diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index ca63760fe..644fdc71b 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -40,7 +40,8 @@ static struct line_buffer input = LINE_BUFFER_INIT; static struct { - uint32_t action, propLength, textLength, srcRev, type; + uint32_t action, propLength, srcRev, type; + off_t text_length; struct strbuf src, dst; uint32_t text_delta, prop_delta; } node_ctx; @@ -61,7 +62,7 @@ static void reset_node_ctx(char *fname) node_ctx.type = 0; node_ctx.action = NODEACT_UNKNOWN; node_ctx.propLength = LENGTH_UNKNOWN; - node_ctx.textLength = LENGTH_UNKNOWN; + node_ctx.text_length = -1; strbuf_reset(&node_ctx.src); node_ctx.srcRev = 0; strbuf_reset(&node_ctx.dst); @@ -209,7 +210,7 @@ static void handle_node(void) { const uint32_t type = node_ctx.type; const int have_props = node_ctx.propLength != LENGTH_UNKNOWN; - const int have_text = node_ctx.textLength != LENGTH_UNKNOWN; + const int have_text = node_ctx.text_length != -1; /* * Old text for this node: * NULL - directory or bug @@ -291,12 +292,12 @@ static void handle_node(void) } if (!node_ctx.text_delta) { fast_export_modify(node_ctx.dst.buf, node_ctx.type, "inline"); - fast_export_data(node_ctx.type, node_ctx.textLength, &input); + fast_export_data(node_ctx.type, node_ctx.text_length, &input); return; } fast_export_modify(node_ctx.dst.buf, node_ctx.type, "inline"); fast_export_blob_delta(node_ctx.type, old_mode, old_data, - node_ctx.textLength, &input); + node_ctx.text_length, &input); } static void begin_revision(void) @@ -409,7 +410,15 @@ void svndump_read(const char *url) break; case sizeof("Text-content-length"): if (!constcmp(t, "Text-content-length")) { - node_ctx.textLength = atoi(val); + char *end; + uintmax_t textlen; + + textlen = strtoumax(val, &end, 10); + if (!isdigit(*val) || *end) + die("invalid dump: non-numeric length %s", val); + if (textlen > maximum_signed_value_of_type(off_t)) + die("unrepresentable length in dump: %s", val); + node_ctx.text_length = (off_t) textlen; break; } if (constcmp(t, "Prop-content-length"))