bd7c611bb9902281a002f907b6bcf0aa41f58285
1 /*
2 * Totally braindamaged mbox splitter program.
3 *
4 * It just splits a mbox into a list of files: "0001" "0002" ..
5 * so you can process them further from there.
6 */
7 #include <unistd.h>
8 #include <stdlib.h>
9 #include <fcntl.h>
10 #include <sys/types.h>
11 #include <sys/stat.h>
12 #include <sys/mman.h>
13 #include <string.h>
14 #include <stdio.h>
15 #include <ctype.h>
16 #include <assert.h>
18 static int usage(void)
19 {
20 fprintf(stderr, "git-mailsplit [-d<prec>] <mbox> <directory>\n");
21 exit(1);
22 }
24 static int linelen(const char *map, unsigned long size)
25 {
26 int len = 0, c;
28 do {
29 c = *map;
30 map++;
31 size--;
32 len++;
33 } while (size && c != '\n');
34 return len;
35 }
37 static int is_from_line(const char *line, int len)
38 {
39 const char *colon;
41 if (len < 20 || memcmp("From ", line, 5))
42 return 0;
44 colon = line + len - 2;
45 line += 5;
46 for (;;) {
47 if (colon < line)
48 return 0;
49 if (*--colon == ':')
50 break;
51 }
53 if (!isdigit(colon[-4]) ||
54 !isdigit(colon[-2]) ||
55 !isdigit(colon[-1]) ||
56 !isdigit(colon[ 1]) ||
57 !isdigit(colon[ 2]))
58 return 0;
60 /* year */
61 if (strtol(colon+3, NULL, 10) <= 90)
62 return 0;
64 /* Ok, close enough */
65 return 1;
66 }
68 static int parse_email(const void *map, unsigned long size)
69 {
70 unsigned long offset;
72 if (size < 6 || memcmp("From ", map, 5))
73 goto corrupt;
75 /* Make sure we don't trigger on this first line */
76 map++; size--; offset=1;
78 /*
79 * Search for a line beginning with "From ", and
80 * having something that looks like a date format.
81 */
82 do {
83 int len = linelen(map, size);
84 if (is_from_line(map, len))
85 return offset;
86 map += len;
87 size -= len;
88 offset += len;
89 } while (size);
90 return offset;
92 corrupt:
93 fprintf(stderr, "corrupt mailbox\n");
94 exit(1);
95 }
97 int main(int argc, char **argv)
98 {
99 int fd, nr, nr_prec = 4;
100 struct stat st;
101 unsigned long size;
102 void *map;
104 if (argc == 4 && !strncmp(argv[1], "-d", 2)) {
105 nr_prec = strtol(argv[1] + 2, NULL, 10);
106 if (nr_prec < 3 || 10 <= nr_prec)
107 usage();
108 argc--; argv++;
109 }
110 if (argc != 3)
111 usage();
112 fd = open(argv[1], O_RDONLY);
113 if (fd < 0) {
114 perror(argv[1]);
115 exit(1);
116 }
117 if (chdir(argv[2]) < 0)
118 usage();
119 if (fstat(fd, &st) < 0) {
120 perror("stat");
121 exit(1);
122 }
123 size = st.st_size;
124 map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
125 if (map == MAP_FAILED) {
126 perror("mmap");
127 close(fd);
128 exit(1);
129 }
130 close(fd);
131 nr = 0;
132 do {
133 char name[10];
134 unsigned long len = parse_email(map, size);
135 assert(len <= size);
136 sprintf(name, "%0*d", nr_prec, ++nr);
137 fd = open(name, O_WRONLY | O_CREAT | O_EXCL, 0666);
138 if (fd < 0) {
139 perror(name);
140 exit(1);
141 }
142 if (write(fd, map, len) != len) {
143 perror("write");
144 exit(1);
145 }
146 close(fd);
147 map += len;
148 size -= len;
149 } while (size > 0);
150 printf("%d\n", nr);
151 return 0;
152 }