Code

Merge branch 'db/svn-fe-code-purge' into svn-fe
[git.git] / vcs-svn / svndump.c
1 /*
2  * Parse and rearrange a svnadmin dump.
3  * Create the dump with:
4  * svnadmin dump --incremental -r<startrev>:<endrev> <repository> >outfile
5  *
6  * Licensed under a two-clause BSD-style license.
7  * See LICENSE for details.
8  */
10 #include "cache.h"
11 #include "repo_tree.h"
12 #include "fast_export.h"
13 #include "line_buffer.h"
14 #include "strbuf.h"
16 /*
17  * Compare start of string to literal of equal length;
18  * must be guarded by length test.
19  */
20 #define constcmp(s, ref) memcmp(s, ref, sizeof(ref) - 1)
22 #define REPORT_FILENO 3
24 #define NODEACT_REPLACE 4
25 #define NODEACT_DELETE 3
26 #define NODEACT_ADD 2
27 #define NODEACT_CHANGE 1
28 #define NODEACT_UNKNOWN 0
30 /* States: */
31 #define DUMP_CTX 0      /* dump metadata */
32 #define REV_CTX  1      /* revision metadata */
33 #define NODE_CTX 2      /* node metadata */
34 #define INTERNODE_CTX 3 /* between nodes */
36 #define LENGTH_UNKNOWN (~0)
37 #define DATE_RFC2822_LEN 31
39 static struct line_buffer input = LINE_BUFFER_INIT;
41 static struct {
42         uint32_t action, propLength, textLength, srcRev, type;
43         struct strbuf src, dst;
44         uint32_t text_delta, prop_delta;
45 } node_ctx;
47 static struct {
48         uint32_t revision;
49         unsigned long timestamp;
50         struct strbuf log, author;
51 } rev_ctx;
53 static struct {
54         uint32_t version;
55         struct strbuf uuid, url;
56 } dump_ctx;
58 static void reset_node_ctx(char *fname)
59 {
60         node_ctx.type = 0;
61         node_ctx.action = NODEACT_UNKNOWN;
62         node_ctx.propLength = LENGTH_UNKNOWN;
63         node_ctx.textLength = LENGTH_UNKNOWN;
64         strbuf_reset(&node_ctx.src);
65         node_ctx.srcRev = 0;
66         strbuf_reset(&node_ctx.dst);
67         if (fname)
68                 strbuf_addstr(&node_ctx.dst, fname);
69         node_ctx.text_delta = 0;
70         node_ctx.prop_delta = 0;
71 }
73 static void reset_rev_ctx(uint32_t revision)
74 {
75         rev_ctx.revision = revision;
76         rev_ctx.timestamp = 0;
77         strbuf_reset(&rev_ctx.log);
78         strbuf_reset(&rev_ctx.author);
79 }
81 static void reset_dump_ctx(const char *url)
82 {
83         strbuf_reset(&dump_ctx.url);
84         if (url)
85                 strbuf_addstr(&dump_ctx.url, url);
86         dump_ctx.version = 1;
87         strbuf_reset(&dump_ctx.uuid);
88 }
90 static void handle_property(const struct strbuf *key_buf,
91                                 struct strbuf *val,
92                                 uint32_t *type_set)
93 {
94         const char *key = key_buf->buf;
95         size_t keylen = key_buf->len;
97         switch (keylen + 1) {
98         case sizeof("svn:log"):
99                 if (constcmp(key, "svn:log"))
100                         break;
101                 if (!val)
102                         die("invalid dump: unsets svn:log");
103                 strbuf_swap(&rev_ctx.log, val);
104                 break;
105         case sizeof("svn:author"):
106                 if (constcmp(key, "svn:author"))
107                         break;
108                 if (!val)
109                         strbuf_reset(&rev_ctx.author);
110                 else
111                         strbuf_swap(&rev_ctx.author, val);
112                 break;
113         case sizeof("svn:date"):
114                 if (constcmp(key, "svn:date"))
115                         break;
116                 if (!val)
117                         die("invalid dump: unsets svn:date");
118                 if (parse_date_basic(val->buf, &rev_ctx.timestamp, NULL))
119                         warning("invalid timestamp: %s", val->buf);
120                 break;
121         case sizeof("svn:executable"):
122         case sizeof("svn:special"):
123                 if (keylen == strlen("svn:executable") &&
124                     constcmp(key, "svn:executable"))
125                         break;
126                 if (keylen == strlen("svn:special") &&
127                     constcmp(key, "svn:special"))
128                         break;
129                 if (*type_set) {
130                         if (!val)
131                                 return;
132                         die("invalid dump: sets type twice");
133                 }
134                 if (!val) {
135                         node_ctx.type = REPO_MODE_BLB;
136                         return;
137                 }
138                 *type_set = 1;
139                 node_ctx.type = keylen == strlen("svn:executable") ?
140                                 REPO_MODE_EXE :
141                                 REPO_MODE_LNK;
142         }
145 static void die_short_read(void)
147         if (buffer_ferror(&input))
148                 die_errno("error reading dump file");
149         die("invalid dump: unexpected end of file");
152 static void read_props(void)
154         static struct strbuf key = STRBUF_INIT;
155         static struct strbuf val = STRBUF_INIT;
156         const char *t;
157         /*
158          * NEEDSWORK: to support simple mode changes like
159          *      K 11
160          *      svn:special
161          *      V 1
162          *      *
163          *      D 14
164          *      svn:executable
165          * we keep track of whether a mode has been set and reset to
166          * plain file only if not.  We should be keeping track of the
167          * symlink and executable bits separately instead.
168          */
169         uint32_t type_set = 0;
170         while ((t = buffer_read_line(&input)) && strcmp(t, "PROPS-END")) {
171                 uint32_t len;
172                 const char type = t[0];
173                 int ch;
175                 if (!type || t[1] != ' ')
176                         die("invalid property line: %s\n", t);
177                 len = atoi(&t[2]);
178                 strbuf_reset(&val);
179                 buffer_read_binary(&input, &val, len);
180                 if (val.len < len)
181                         die_short_read();
183                 /* Discard trailing newline. */
184                 ch = buffer_read_char(&input);
185                 if (ch == EOF)
186                         die_short_read();
187                 if (ch != '\n')
188                         die("invalid dump: expected newline after %s", val.buf);
190                 switch (type) {
191                 case 'K':
192                         strbuf_swap(&key, &val);
193                         continue;
194                 case 'D':
195                         handle_property(&val, NULL, &type_set);
196                         continue;
197                 case 'V':
198                         handle_property(&key, &val, &type_set);
199                         strbuf_reset(&key);
200                         continue;
201                 default:
202                         die("invalid property line: %s\n", t);
203                 }
204         }
207 static void handle_node(void)
209         const uint32_t type = node_ctx.type;
210         const int have_props = node_ctx.propLength != LENGTH_UNKNOWN;
211         const int have_text = node_ctx.textLength != LENGTH_UNKNOWN;
212         /*
213          * Old text for this node:
214          *  NULL        - directory or bug
215          *  empty_blob  - empty
216          *  "<dataref>" - data retrievable from fast-import
217          */
218         static const char *const empty_blob = "::empty::";
219         const char *old_data = NULL;
221         if (node_ctx.text_delta)
222                 die("text deltas not supported");
224         if (node_ctx.action == NODEACT_DELETE) {
225                 if (have_text || have_props || node_ctx.srcRev)
226                         die("invalid dump: deletion node has "
227                                 "copyfrom info, text, or properties");
228                 repo_delete(node_ctx.dst.buf);
229                 return;
230         }
231         if (node_ctx.action == NODEACT_REPLACE) {
232                 repo_delete(node_ctx.dst.buf);
233                 node_ctx.action = NODEACT_ADD;
234         }
235         if (node_ctx.srcRev) {
236                 repo_copy(node_ctx.srcRev, node_ctx.src.buf, node_ctx.dst.buf);
237                 if (node_ctx.action == NODEACT_ADD)
238                         node_ctx.action = NODEACT_CHANGE;
239         }
240         if (have_text && type == REPO_MODE_DIR)
241                 die("invalid dump: directories cannot have text attached");
243         /*
244          * Find old content (old_data) and decide on the new mode.
245          */
246         if (node_ctx.action == NODEACT_CHANGE && !*node_ctx.dst.buf) {
247                 if (type != REPO_MODE_DIR)
248                         die("invalid dump: root of tree is not a regular file");
249                 old_data = NULL;
250         } else if (node_ctx.action == NODEACT_CHANGE) {
251                 uint32_t mode;
252                 old_data = repo_read_path(node_ctx.dst.buf, &mode);
253                 if (mode == REPO_MODE_DIR && type != REPO_MODE_DIR)
254                         die("invalid dump: cannot modify a directory into a file");
255                 if (mode != REPO_MODE_DIR && type == REPO_MODE_DIR)
256                         die("invalid dump: cannot modify a file into a directory");
257                 node_ctx.type = mode;
258         } else if (node_ctx.action == NODEACT_ADD) {
259                 if (type == REPO_MODE_DIR)
260                         old_data = NULL;
261                 else if (have_text)
262                         old_data = empty_blob;
263                 else
264                         die("invalid dump: adds node without text");
265         } else {
266                 die("invalid dump: Node-path block lacks Node-action");
267         }
269         /*
270          * Adjust mode to reflect properties.
271          */
272         if (have_props) {
273                 if (!node_ctx.prop_delta)
274                         node_ctx.type = type;
275                 if (node_ctx.propLength)
276                         read_props();
277         }
279         /*
280          * Save the result.
281          */
282         if (type == REPO_MODE_DIR)      /* directories are not tracked. */
283                 return;
284         assert(old_data);
285         if (old_data == empty_blob)
286                 /* For the fast_export_* functions, NULL means empty. */
287                 old_data = NULL;
288         if (!have_text) {
289                 fast_export_modify(node_ctx.dst.buf, node_ctx.type, old_data);
290                 return;
291         }
292         fast_export_modify(node_ctx.dst.buf, node_ctx.type, "inline");
293         fast_export_data(node_ctx.type, node_ctx.textLength, &input);
296 static void begin_revision(void)
298         if (!rev_ctx.revision)  /* revision 0 gets no git commit. */
299                 return;
300         fast_export_begin_commit(rev_ctx.revision, rev_ctx.author.buf,
301                 &rev_ctx.log, dump_ctx.uuid.buf, dump_ctx.url.buf,
302                 rev_ctx.timestamp);
305 static void end_revision(void)
307         if (rev_ctx.revision)
308                 fast_export_end_commit(rev_ctx.revision);
311 void svndump_read(const char *url)
313         char *val;
314         char *t;
315         uint32_t active_ctx = DUMP_CTX;
316         uint32_t len;
318         reset_dump_ctx(url);
319         while ((t = buffer_read_line(&input))) {
320                 val = strchr(t, ':');
321                 if (!val)
322                         continue;
323                 val++;
324                 if (*val != ' ')
325                         continue;
326                 val++;
328                 /* strlen(key) + 1 */
329                 switch (val - t - 1) {
330                 case sizeof("SVN-fs-dump-format-version"):
331                         if (constcmp(t, "SVN-fs-dump-format-version"))
332                                 continue;
333                         dump_ctx.version = atoi(val);
334                         if (dump_ctx.version > 3)
335                                 die("expected svn dump format version <= 3, found %"PRIu32,
336                                     dump_ctx.version);
337                         break;
338                 case sizeof("UUID"):
339                         if (constcmp(t, "UUID"))
340                                 continue;
341                         strbuf_reset(&dump_ctx.uuid);
342                         strbuf_addstr(&dump_ctx.uuid, val);
343                         break;
344                 case sizeof("Revision-number"):
345                         if (constcmp(t, "Revision-number"))
346                                 continue;
347                         if (active_ctx == NODE_CTX)
348                                 handle_node();
349                         if (active_ctx == REV_CTX)
350                                 begin_revision();
351                         if (active_ctx != DUMP_CTX)
352                                 end_revision();
353                         active_ctx = REV_CTX;
354                         reset_rev_ctx(atoi(val));
355                         break;
356                 case sizeof("Node-path"):
357                         if (prefixcmp(t, "Node-"))
358                                 continue;
359                         if (!constcmp(t + strlen("Node-"), "path")) {
360                                 if (active_ctx == NODE_CTX)
361                                         handle_node();
362                                 if (active_ctx == REV_CTX)
363                                         begin_revision();
364                                 active_ctx = NODE_CTX;
365                                 reset_node_ctx(val);
366                                 break;
367                         }
368                         if (constcmp(t + strlen("Node-"), "kind"))
369                                 continue;
370                         if (!strcmp(val, "dir"))
371                                 node_ctx.type = REPO_MODE_DIR;
372                         else if (!strcmp(val, "file"))
373                                 node_ctx.type = REPO_MODE_BLB;
374                         else
375                                 fprintf(stderr, "Unknown node-kind: %s\n", val);
376                         break;
377                 case sizeof("Node-action"):
378                         if (constcmp(t, "Node-action"))
379                                 continue;
380                         if (!strcmp(val, "delete")) {
381                                 node_ctx.action = NODEACT_DELETE;
382                         } else if (!strcmp(val, "add")) {
383                                 node_ctx.action = NODEACT_ADD;
384                         } else if (!strcmp(val, "change")) {
385                                 node_ctx.action = NODEACT_CHANGE;
386                         } else if (!strcmp(val, "replace")) {
387                                 node_ctx.action = NODEACT_REPLACE;
388                         } else {
389                                 fprintf(stderr, "Unknown node-action: %s\n", val);
390                                 node_ctx.action = NODEACT_UNKNOWN;
391                         }
392                         break;
393                 case sizeof("Node-copyfrom-path"):
394                         if (constcmp(t, "Node-copyfrom-path"))
395                                 continue;
396                         strbuf_reset(&node_ctx.src);
397                         strbuf_addstr(&node_ctx.src, val);
398                         break;
399                 case sizeof("Node-copyfrom-rev"):
400                         if (constcmp(t, "Node-copyfrom-rev"))
401                                 continue;
402                         node_ctx.srcRev = atoi(val);
403                         break;
404                 case sizeof("Text-content-length"):
405                         if (!constcmp(t, "Text-content-length")) {
406                                 node_ctx.textLength = atoi(val);
407                                 break;
408                         }
409                         if (constcmp(t, "Prop-content-length"))
410                                 continue;
411                         node_ctx.propLength = atoi(val);
412                         break;
413                 case sizeof("Text-delta"):
414                         if (!constcmp(t, "Text-delta")) {
415                                 node_ctx.text_delta = !strcmp(val, "true");
416                                 break;
417                         }
418                         if (constcmp(t, "Prop-delta"))
419                                 continue;
420                         node_ctx.prop_delta = !strcmp(val, "true");
421                         break;
422                 case sizeof("Content-length"):
423                         if (constcmp(t, "Content-length"))
424                                 continue;
425                         len = atoi(val);
426                         t = buffer_read_line(&input);
427                         if (!t)
428                                 die_short_read();
429                         if (*t)
430                                 die("invalid dump: expected blank line after content length header");
431                         if (active_ctx == REV_CTX) {
432                                 read_props();
433                         } else if (active_ctx == NODE_CTX) {
434                                 handle_node();
435                                 active_ctx = INTERNODE_CTX;
436                         } else {
437                                 fprintf(stderr, "Unexpected content length header: %"PRIu32"\n", len);
438                                 if (buffer_skip_bytes(&input, len) != len)
439                                         die_short_read();
440                         }
441                 }
442         }
443         if (buffer_ferror(&input))
444                 die_short_read();
445         if (active_ctx == NODE_CTX)
446                 handle_node();
447         if (active_ctx == REV_CTX)
448                 begin_revision();
449         if (active_ctx != DUMP_CTX)
450                 end_revision();
453 int svndump_init(const char *filename)
455         if (buffer_init(&input, filename))
456                 return error("cannot open %s: %s", filename, strerror(errno));
457         fast_export_init(REPORT_FILENO);
458         strbuf_init(&dump_ctx.uuid, 4096);
459         strbuf_init(&dump_ctx.url, 4096);
460         strbuf_init(&rev_ctx.log, 4096);
461         strbuf_init(&rev_ctx.author, 4096);
462         strbuf_init(&node_ctx.src, 4096);
463         strbuf_init(&node_ctx.dst, 4096);
464         reset_dump_ctx(NULL);
465         reset_rev_ctx(0);
466         reset_node_ctx(NULL);
467         return 0;
470 void svndump_deinit(void)
472         fast_export_deinit();
473         reset_dump_ctx(NULL);
474         reset_rev_ctx(0);
475         reset_node_ctx(NULL);
476         strbuf_release(&rev_ctx.log);
477         strbuf_release(&node_ctx.src);
478         strbuf_release(&node_ctx.dst);
479         if (buffer_deinit(&input))
480                 fprintf(stderr, "Input error\n");
481         if (ferror(stdout))
482                 fprintf(stderr, "Output error\n");
485 void svndump_reset(void)
487         fast_export_reset();
488         buffer_reset(&input);
489         strbuf_release(&dump_ctx.uuid);
490         strbuf_release(&dump_ctx.url);
491         strbuf_release(&rev_ctx.log);
492         strbuf_release(&rev_ctx.author);