Code

Flatten tools/ directory to make build procedure simpler.
authorJunio C Hamano <junkio@cox.net>
Wed, 7 Sep 2005 19:22:56 +0000 (12:22 -0700)
committerJunio C Hamano <junkio@cox.net>
Wed, 7 Sep 2005 19:22:56 +0000 (12:22 -0700)
Also make platform specific part more isolated.  Currently we only
have Darwin defined, but I've taken a look at SunOS specific patch
(which I dropped on the floor for now) as well.  Doing things this way
would make adding it easier.

Signed-off-by: Junio C Hamano <junkio@cox.net>
Makefile
git-applymbox [new file with mode: 0755]
git-applypatch [new file with mode: 0755]
mailinfo.c [new file with mode: 0644]
mailsplit.c [new file with mode: 0644]
tools/.gitignore [deleted file]
tools/Makefile [deleted file]
tools/git-applymbox [deleted file]
tools/git-applypatch [deleted file]
tools/mailinfo.c [deleted file]
tools/mailsplit.c [deleted file]

index 9aa0c9a4f1423ab50ee02165987b553b0f8de641..9122c03ea3f882c940f8ebc18ec3ba7395d70d74 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -11,7 +11,9 @@
 #
 # Define PPC_SHA1 environment variable when running make to make use of
 # a bundled SHA1 routine optimized for PowerPC.
-
+#
+# Define NEEDS_SSL_WITH_CRYPTO if you need -lcrypto with -lssl (Darwin).
+# Define NEEDS_LIBICONV if linking with libc is not enough (Darwin).
 
 # Define COLLISION_CHECK below if you believe that SHA1's
 # 1461501637330902918203684832716283019655932542976 hashes do not give you
@@ -66,13 +68,20 @@ SCRIPTS=git git-merge-one-file-script git-prune-script \
        git-format-patch-script git-sh-setup-script git-push-script \
        git-branch-script git-parse-remote-script git-verify-tag-script \
        git-ls-remote-script git-rename-script \
-       git-request-pull-script git-bisect-script
+       git-request-pull-script git-bisect-script \
+       git-applymbox git-applypatch
 
 SCRIPTS += git-count-objects-script
 SCRIPTS += git-revert-script
 SCRIPTS += git-octopus-script
 SCRIPTS += git-archimport-script
 
+# The ones that do not have to link with lcrypto nor lz.
+SIMPLE_PROGRAMS = \
+       git-get-tar-commit-id git-mailinfo git-mailsplit git-stripspace \
+       git-daemon git-var
+
+# ... and all the rest
 PROG=   git-update-cache git-diff-files git-init-db git-write-tree \
        git-read-tree git-commit-tree git-cat-file git-fsck-cache \
        git-checkout-cache git-diff-tree git-rev-tree git-ls-files \
@@ -80,12 +89,13 @@ PROG=   git-update-cache git-diff-files git-init-db git-write-tree \
        git-unpack-file git-export git-diff-cache git-convert-cache \
        git-ssh-push git-ssh-pull git-rev-list git-mktag \
        git-diff-helper git-tar-tree git-local-pull git-hash-object \
-       git-get-tar-commit-id git-apply git-stripspace \
+       git-apply \
        git-diff-stages git-rev-parse git-patch-id git-pack-objects \
        git-unpack-objects git-verify-pack git-receive-pack git-send-pack \
        git-prune-packed git-fetch-pack git-upload-pack git-clone-pack \
-       git-show-index git-daemon git-var git-peek-remote git-show-branch \
-       git-update-server-info git-show-rev-cache git-build-rev-cache
+       git-show-index git-peek-remote git-show-branch \
+       git-update-server-info git-show-rev-cache git-build-rev-cache \
+       $(SIMPLE_PROGRAMS)
 
 ifdef WITH_SEND_EMAIL
 SCRIPTS += git-send-email-script
@@ -126,6 +136,11 @@ LIB_OBJS += server-info.o
 LIBS = $(LIB_FILE)
 LIBS += -lz
 
+ifeq ($(shell uname -s),Darwin)
+       NEEDS_SSL_WITH_CRYPTO = YesPlease
+       NEEDS_LIBICONV = YesPlease
+endif
+
 ifndef NO_OPENSSL
        LIB_OBJS += epoch.o
        OPENSSL_LIBSSL=-lssl
@@ -134,6 +149,16 @@ else
        MOZILLA_SHA1=1
        OPENSSL_LIBSSL=
 endif
+ifdef NEEDS_SSL_WITH_CRYPTO
+       LIB_4_CRYPTO = -lcrypto -lssl
+else
+       LIB_4_CRYPTO = -lcrypto
+endif
+ifdef NEEDS_LIBICONV
+       LIB_4_ICONV = -liconv
+else
+       LIB_4_ICONV =
+endif
 ifdef MOZILLA_SHA1
        SHA1_HEADER="mozilla-sha1/sha1.h"
        LIB_OBJS += mozilla-sha1/sha1.o
@@ -143,11 +168,7 @@ else
                LIB_OBJS += ppc/sha1.o ppc/sha1ppc.o
        else
                SHA1_HEADER=<openssl/sha.h>
-               ifeq ($(shell uname -s),Darwin)
-                       LIBS += -lcrypto -lssl
-               else
-                       LIBS += -lcrypto
-               endif
+               LIBS += $(LIB_4_CRYPTO)
        endif
 endif
 
@@ -161,7 +182,6 @@ all: $(PROG)
 
 all:
        $(MAKE) -C templates
-       $(MAKE) -C tools
 
 %.o: %.c
        $(CC) -o $*.o -c $(ALL_CFLAGS) $<
@@ -171,6 +191,11 @@ all:
 git-%: %.o $(LIB_FILE)
        $(CC) $(ALL_CFLAGS) -o $@ $(filter %.o,$^) $(LIBS)
 
+git-mailinfo : SIMPLE_LIB += $(LIB_4_ICONV)
+$(SIMPLE_PROGRAMS) : $(LIB_FILE)
+$(SIMPLE_PROGRAMS) : git-% : %.o
+       $(CC) $(ALL_CFLAGS) -o $@ $(filter %.o,$^) $(LIB_FILE) $(SIMPLE_LIB)
+
 git-http-pull: pull.o
 git-local-pull: pull.o
 git-ssh-pull: rsh.o pull.o
@@ -218,7 +243,6 @@ install: $(PROG) $(SCRIPTS)
        $(INSTALL) $(PROG) $(SCRIPTS) $(DESTDIR)$(bindir)
        $(INSTALL) git-revert-script $(DESTDIR)$(bindir)/git-cherry-pick-script
        $(MAKE) -C templates install
-       $(MAKE) -C tools install
 
 install-doc:
        $(MAKE) -C Documentation install
@@ -258,7 +282,6 @@ clean:
        rm -f $(GIT_TARNAME).tar.gz git-core_$(GIT_VERSION)-*.tar.gz
        rm -f git-core_$(GIT_VERSION)-*.deb git-core_$(GIT_VERSION)-*.dsc
        rm -f git-tk_$(GIT_VERSION)-*.deb
-       $(MAKE) -C tools/ clean
        $(MAKE) -C Documentation/ clean
        $(MAKE) -C templates/ clean
        $(MAKE) -C t/ clean
diff --git a/git-applymbox b/git-applymbox
new file mode 100755 (executable)
index 0000000..e58bb21
--- /dev/null
@@ -0,0 +1,109 @@
+#!/bin/sh
+##
+## "dotest" is my stupid name for my patch-application script, which
+## I never got around to renaming after I tested it. We're now on the
+## second generation of scripts, still called "dotest".
+##
+## Update: Ryan Anderson finally shamed me into naming this "applymbox".
+##
+## You give it a mbox-format collection of emails, and it will try to
+## apply them to the kernel using "applypatch"
+##
+## applymbox [-u] [-k] [-q] (-c .dotest/msg-number | mail_archive) [Signoff_file]"
+##
+## The patch application may fail in the middle.  In which case:
+## (1) look at .dotest/patch and fix it up to apply
+## (2) re-run applymbox with -c .dotest/msg-number for the current one.
+## Pay a special attention to the commit log message if you do this and
+## use a Signoff_file, because applypatch wants to append the sign-off
+## message to msg-clean every time it is run.
+
+. git-sh-setup-script || die "Not a git archive"
+
+usage () {
+    echo >&2 "applymbox [-u] [-k] [-q] (-c .dotest/<num> | mbox) [signoff]"
+    exit 1
+}
+
+keep_subject= query_apply= continue= utf8= resume=t
+while case "$#" in 0) break ;; esac
+do
+       case "$1" in
+       -u)     utf8=-u ;;
+       -k)     keep_subject=-k ;;
+       -q)     query_apply=t ;;
+       -c)     continue="$2"; resume=f; shift ;;
+       -*)     usage ;;
+       *)      break ;;
+       esac
+       shift
+done
+
+case "$continue" in
+'')
+       rm -rf .dotest
+       mkdir .dotest
+       git-mailsplit "$1" .dotest || exit 1
+       shift
+esac
+
+files=$(git-diff-cache --cached --name-only HEAD) || exit
+if [ "$files" ]; then
+   echo "Dirty index: cannot apply patches (dirty: $files)" >&2
+   exit 1
+fi
+
+case "$query_apply" in
+t)     touch .dotest/.query_apply
+esac
+case "$keep_subject" in
+-k)    : >.dotest/.keep_subject
+esac
+
+signoff="$1"
+set x .dotest/0*
+shift
+while case "$#" in 0) break;; esac
+do
+    i="$1" 
+    case "$resume,$continue" in
+    f,$i)      resume=t;;
+    f,*)       shift
+               continue;;
+    *)
+           git-mailinfo $keep_subject $utf8 \
+               .dotest/msg .dotest/patch <$i >.dotest/info || exit 1
+           git-stripspace < .dotest/msg > .dotest/msg-clean
+           ;;
+    esac
+    while :; # for fixing up and retry
+    do
+       git-applypatch .dotest/msg-clean .dotest/patch .dotest/info "$signoff"
+       case "$?" in
+       0 | 2 )
+               # 2 is a special exit code from applypatch to indicate that
+               # the patch wasn't applied, but continue anyway 
+               ;;
+       *)
+               ret=$?
+               if test -f .dotest/.query_apply
+               then
+                       echo >&2 "* Patch failed."
+                       echo >&2 "* You could fix it up in your editor and"
+                       echo >&2 "  retry.  If you want to do so, say yes here"
+                       echo >&2 "  AFTER fixing .dotest/patch up."
+                       echo >&2 -n "Retry [y/N]? "
+                       read yesno
+                       case "$yesno" in
+                       [Yy]*)
+                               continue ;;
+                       esac
+               fi
+               exit $ret
+       esac
+       break
+    done
+    shift
+done
+# return to pristine
+rm -fr .dotest
diff --git a/git-applypatch b/git-applypatch
new file mode 100755 (executable)
index 0000000..e5bc3c0
--- /dev/null
@@ -0,0 +1,118 @@
+#!/bin/sh
+##
+## applypatch takes four file arguments, and uses those to
+## apply the unpacked patch (surprise surprise) that they
+## represent to the current tree.
+##
+## The arguments are:
+##     $1 - file with commit message
+##     $2 - file with the actual patch
+##     $3 - "info" file with Author, email and subject
+##     $4 - optional file containing signoff to add
+##
+. git-sh-setup-script || die "Not a git archive."
+
+final=.dotest/final-commit
+##
+## If this file exists, we ask before applying
+##
+query_apply=.dotest/.query_apply
+
+## We do not munge the first line of the commit message too much
+## if this file exists.
+keep_subject=.dotest/.keep_subject
+
+
+MSGFILE=$1
+PATCHFILE=$2
+INFO=$3
+SIGNOFF=$4
+EDIT=${VISUAL:-${EDITOR:-vi}}
+
+export GIT_AUTHOR_NAME="$(sed -n '/^Author/ s/Author: //p' .dotest/info)"
+export GIT_AUTHOR_EMAIL="$(sed -n '/^Email/ s/Email: //p' .dotest/info)"
+export GIT_AUTHOR_DATE="$(sed -n '/^Date/ s/Date: //p' .dotest/info)"
+export SUBJECT="$(sed -n '/^Subject/ s/Subject: //p' .dotest/info)"
+
+if test '' != "$SIGNOFF"
+then
+       if test -f "$SIGNOFF"
+       then
+               SIGNOFF=`cat "$SIGNOFF"` || exit
+       elif case "$SIGNOFF" in yes | true | me | please) : ;; *) false ;; esac
+       then
+               SIGNOFF=`git-var GIT_COMMITTER_IDENT | sed -e '
+                               s/>.*/>/
+                               s/^/Signed-off-by: /'
+               `
+       else
+               SIGNOFF=
+       fi
+       if test '' != "$SIGNOFF"
+       then
+               LAST_SIGNED_OFF_BY=`
+                       sed -ne '/^Signed-off-by: /p' "$MSGFILE" |
+                       tail -n 1
+               `
+               test "$LAST_SIGNED_OFF_BY" = "$SIGNOFF" ||
+               echo "$SIGNOFF" >>"$MSGFILE"
+       fi
+fi
+
+patch_header=
+test -f "$keep_subject" || patch_header='[PATCH] '
+
+{
+       echo "$patch_header$SUBJECT"
+       if test -s "$MSGFILE"
+       then
+               echo
+               cat "$MSGFILE"
+       fi
+} >"$final"
+
+interactive=yes
+test -f "$query_apply" || interactive=no
+
+while [ "$interactive" = yes ]; do
+       echo "Commit Body is:"
+       echo "--------------------------"
+       cat "$final"
+       echo "--------------------------"
+       echo -n "Apply? [y]es/[n]o/[e]dit/[a]ccept all "
+       read reply
+       case "$reply" in
+               y|Y) interactive=no;;
+               n|N) exit 2;;   # special value to tell dotest to keep going
+               e|E) "$EDIT" "$final";;
+               a|A) rm -f "$query_apply"
+                    interactive=no ;;
+       esac
+done
+
+if test -x "$GIT_DIR"/hooks/applypatch-msg
+then
+       "$GIT_DIR"/hooks/applypatch-msg "$final" || exit
+fi
+
+echo
+echo Applying "'$SUBJECT'"
+echo
+
+git-apply --index "$PATCHFILE" || exit 1
+
+if test -x "$GIT_DIR"/hooks/pre-applypatch
+then
+       "$GIT_DIR"/hooks/pre-applypatch || exit
+fi
+
+tree=$(git-write-tree) || exit 1
+echo Wrote tree $tree
+commit=$(git-commit-tree $tree -p $(cat "$GIT_DIR"/HEAD) < "$final") || exit 1
+echo Committed: $commit
+echo $commit > "$GIT_DIR"/HEAD
+
+if test -x "$GIT_DIR"/hooks/post-applypatch
+then
+       "$GIT_DIR"/hooks/post-applypatch
+fi
diff --git a/mailinfo.c b/mailinfo.c
new file mode 100644 (file)
index 0000000..df470bb
--- /dev/null
@@ -0,0 +1,749 @@
+/*
+ * Another stupid program, this one parsing the headers of an
+ * email to figure out authorship and subject
+ */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <iconv.h>
+
+static FILE *cmitmsg, *patchfile;
+
+static int keep_subject = 0;
+static int metainfo_utf8 = 0;
+static char line[1000];
+static char date[1000];
+static char name[1000];
+static char email[1000];
+static char subject[1000];
+
+static enum  {
+       TE_DONTCARE, TE_QP, TE_BASE64,
+} transfer_encoding;
+static char charset[256];
+
+static char multipart_boundary[1000];
+static int multipart_boundary_len;
+static int patch_lines = 0;
+
+static char *sanity_check(char *name, char *email)
+{
+       int len = strlen(name);
+       if (len < 3 || len > 60)
+               return email;
+       if (strchr(name, '@') || strchr(name, '<') || strchr(name, '>'))
+               return email;
+       return name;
+}
+
+static int handle_from(char *line)
+{
+       char *at = strchr(line, '@');
+       char *dst;
+
+       if (!at)
+               return 0;
+
+       /*
+        * If we already have one email, don't take any confusing lines
+        */
+       if (*email && strchr(at+1, '@'))
+               return 0;
+
+       /* Pick up the string around '@', possibly delimited with <>
+        * pair; that is the email part.  White them out while copying.
+        */
+       while (at > line) {
+               char c = at[-1];
+               if (isspace(c))
+                       break;
+               if (c == '<') {
+                       at[-1] = ' ';
+                       break;
+               }
+               at--;
+       }
+       dst = email;
+       for (;;) {
+               unsigned char c = *at;
+               if (!c || c == '>' || isspace(c)) {
+                       if (c == '>')
+                               *at = ' ';
+                       break;
+               }
+               *at++ = ' ';
+               *dst++ = c;
+       }
+       *dst++ = 0;
+
+       /* The remainder is name.  It could be "John Doe <john.doe@xz>"
+        * or "john.doe@xz (John Doe)", but we have whited out the
+        * email part, so trim from both ends, possibly removing
+        * the () pair at the end.
+        */
+       at = line + strlen(line);
+       while (at > line) {
+               unsigned char c = *--at;
+               if (!isspace(c)) {
+                       at[(c == ')') ? 0 : 1] = 0;
+                       break;
+               }
+       }
+
+       at = line;
+       for (;;) {
+               unsigned char c = *at;
+               if (!c || !isspace(c)) {
+                       if (c == '(')
+                               at++;
+                       break;
+               }
+               at++;
+       }
+       at = sanity_check(at, email);
+       strcpy(name, at);
+       return 1;
+}
+
+static int handle_date(char *line)
+{
+       strcpy(date, line);
+       return 0;
+}
+
+static int handle_subject(char *line)
+{
+       strcpy(subject, line);
+       return 0;
+}
+
+/* NOTE NOTE NOTE.  We do not claim we do full MIME.  We just attempt
+ * to have enough heuristics to grok MIME encoded patches often found
+ * on our mailing lists.  For example, we do not even treat header lines
+ * case insensitively.
+ */
+
+static int slurp_attr(const char *line, const char *name, char *attr)
+{
+       char *ends, *ap = strcasestr(line, name);
+       size_t sz;
+
+       if (!ap) {
+               *attr = 0;
+               return 0;
+       }
+       ap += strlen(name);
+       if (*ap == '"') {
+               ap++;
+               ends = "\"";
+       }
+       else
+               ends = "; \t";
+       sz = strcspn(ap, ends);
+       memcpy(attr, ap, sz);
+       attr[sz] = 0;
+       return 1;
+}
+
+static int handle_subcontent_type(char *line)
+{
+       /* We do not want to mess with boundary.  Note that we do not
+        * handle nested multipart.
+        */
+       if (strcasestr(line, "boundary=")) {
+               fprintf(stderr, "Not handling nested multipart message.\n");
+               exit(1);
+       }
+       slurp_attr(line, "charset=", charset);
+       if (*charset) {
+               int i, c;
+               for (i = 0; (c = charset[i]) != 0; i++)
+                       charset[i] = tolower(c);
+       }
+       return 0;
+}
+
+static int handle_content_type(char *line)
+{
+       *multipart_boundary = 0;
+       if (slurp_attr(line, "boundary=", multipart_boundary + 2)) {
+               memcpy(multipart_boundary, "--", 2);
+               multipart_boundary_len = strlen(multipart_boundary);
+       }
+       slurp_attr(line, "charset=", charset);
+       return 0;
+}
+
+static int handle_content_transfer_encoding(char *line)
+{
+       if (strcasestr(line, "base64"))
+               transfer_encoding = TE_BASE64;
+       else if (strcasestr(line, "quoted-printable"))
+               transfer_encoding = TE_QP;
+       else
+               transfer_encoding = TE_DONTCARE;
+       return 0;
+}
+
+static int is_multipart_boundary(const char *line)
+{
+       return (!memcmp(line, multipart_boundary, multipart_boundary_len));
+}
+
+static int eatspace(char *line)
+{
+       int len = strlen(line);
+       while (len > 0 && isspace(line[len-1]))
+               line[--len] = 0;
+       return len;
+}
+
+#define SEEN_FROM 01
+#define SEEN_DATE 02
+#define SEEN_SUBJECT 04
+
+/* First lines of body can have From:, Date:, and Subject: */
+static int handle_inbody_header(int *seen, char *line)
+{
+       if (!memcmp("From:", line, 5) && isspace(line[5])) {
+               if (!(*seen & SEEN_FROM) && handle_from(line+6)) {
+                       *seen |= SEEN_FROM;
+                       return 1;
+               }
+       }
+       if (!memcmp("Date:", line, 5) && isspace(line[5])) {
+               if (!(*seen & SEEN_DATE)) {
+                       handle_date(line+6);
+                       *seen |= SEEN_DATE;
+                       return 1;
+               }
+       }
+       if (!memcmp("Subject:", line, 8) && isspace(line[8])) {
+               if (!(*seen & SEEN_SUBJECT)) {
+                       handle_subject(line+9);
+                       *seen |= SEEN_SUBJECT;
+                       return 1;
+               }
+       }
+       if (!memcmp("[PATCH]", line, 7) && isspace(line[7])) {
+               if (!(*seen & SEEN_SUBJECT)) {
+                       handle_subject(line);
+                       *seen |= SEEN_SUBJECT;
+                       return 1;
+               }
+       }
+       return 0;
+}
+
+static char *cleanup_subject(char *subject)
+{
+       if (keep_subject)
+               return subject;
+       for (;;) {
+               char *p;
+               int len, remove;
+               switch (*subject) {
+               case 'r': case 'R':
+                       if (!memcmp("e:", subject+1, 2)) {
+                               subject +=3;
+                               continue;
+                       }
+                       break;
+               case ' ': case '\t': case ':':
+                       subject++;
+                       continue;
+
+               case '[':
+                       p = strchr(subject, ']');
+                       if (!p) {
+                               subject++;
+                               continue;
+                       }
+                       len = strlen(p);
+                       remove = p - subject;
+                       if (remove <= len *2) {
+                               subject = p+1;
+                               continue;
+                       }       
+                       break;
+               }
+               return subject;
+       }
+}                      
+
+static void cleanup_space(char *buf)
+{
+       unsigned char c;
+       while ((c = *buf) != 0) {
+               buf++;
+               if (isspace(c)) {
+                       buf[-1] = ' ';
+                       c = *buf;
+                       while (isspace(c)) {
+                               int len = strlen(buf);
+                               memmove(buf, buf+1, len);
+                               c = *buf;
+                       }
+               }
+       }
+}
+
+typedef int (*header_fn_t)(char *);
+struct header_def {
+       const char *name;
+       header_fn_t func;
+       int namelen;
+};
+
+static void check_header(char *line, int len, struct header_def *header)
+{
+       int i;
+
+       if (header[0].namelen <= 0) {
+               for (i = 0; header[i].name; i++)
+                       header[i].namelen = strlen(header[i].name);
+       }
+       for (i = 0; header[i].name; i++) {
+               int len = header[i].namelen;
+               if (!strncasecmp(line, header[i].name, len) &&
+                   line[len] == ':' && isspace(line[len + 1])) {
+                       header[i].func(line + len + 2);
+                       break;
+               }
+       }
+}
+
+static void check_subheader_line(char *line, int len)
+{
+       static struct header_def header[] = {
+               { "Content-Type", handle_subcontent_type },
+               { "Content-Transfer-Encoding",
+                 handle_content_transfer_encoding },
+               { NULL },
+       };
+       check_header(line, len, header);
+}
+static void check_header_line(char *line, int len)
+{
+       static struct header_def header[] = {
+               { "From", handle_from },
+               { "Date", handle_date },
+               { "Subject", handle_subject },
+               { "Content-Type", handle_content_type },
+               { "Content-Transfer-Encoding",
+                 handle_content_transfer_encoding },
+               { NULL },
+       };
+       check_header(line, len, header);
+}
+
+static int read_one_header_line(char *line, int sz, FILE *in)
+{
+       int ofs = 0;
+       while (ofs < sz) {
+               int peek, len;
+               if (fgets(line + ofs, sz - ofs, in) == NULL)
+                       return ofs;
+               len = eatspace(line + ofs);
+               if (len == 0)
+                       return ofs;
+               peek = fgetc(in); ungetc(peek, in);
+               if (peek == ' ' || peek == '\t') {
+                       /* Yuck, 2822 header "folding" */
+                       ofs += len;
+                       continue;
+               }
+               return ofs + len;
+       }
+       return ofs;
+}
+
+static unsigned hexval(int c)
+{
+       if (c >= '0' && c <= '9')
+               return c - '0';
+       if (c >= 'a' && c <= 'f')
+               return c - 'a' + 10;
+       if (c >= 'A' && c <= 'F')
+               return c - 'A' + 10;
+       return ~0;
+}
+
+static int decode_q_segment(char *in, char *ot, char *ep)
+{
+       int c;
+       while ((c = *in++) != 0 && (in <= ep)) {
+               if (c == '=') {
+                       int d = *in++;
+                       if (d == '\n' || !d)
+                               break; /* drop trailing newline */
+                       *ot++ = ((hexval(d) << 4) | hexval(*in++));
+               }
+               else
+                       *ot++ = c;
+       }
+       *ot = 0;
+       return 0;
+}
+
+static int decode_b_segment(char *in, char *ot, char *ep)
+{
+       /* Decode in..ep, possibly in-place to ot */
+       int c, pos = 0, acc = 0;
+
+       while ((c = *in++) != 0 && (in <= ep)) {
+               if (c == '+')
+                       c = 62;
+               else if (c == '/')
+                       c = 63;
+               else if ('A' <= c && c <= 'Z')
+                       c -= 'A';
+               else if ('a' <= c && c <= 'z')
+                       c -= 'a' - 26;
+               else if ('0' <= c && c <= '9')
+                       c -= '0' - 52;
+               else if (c == '=') {
+                       /* padding is almost like (c == 0), except we do
+                        * not output NUL resulting only from it;
+                        * for now we just trust the data.
+                        */
+                       c = 0;
+               }
+               else
+                       continue; /* garbage */
+               switch (pos++) {
+               case 0:
+                       acc = (c << 2);
+                       break;
+               case 1:
+                       *ot++ = (acc | (c >> 4));
+                       acc = (c & 15) << 4;
+                       break;
+               case 2:
+                       *ot++ = (acc | (c >> 2));
+                       acc = (c & 3) << 6;
+                       break;
+               case 3:
+                       *ot++ = (acc | c);
+                       acc = pos = 0;
+                       break;
+               }
+       }
+       *ot = 0;
+       return 0;
+}
+
+static void convert_to_utf8(char *line, char *charset)
+{
+       if (*charset) {
+               char *in, *out;
+               size_t insize, outsize, nrc;
+               char outbuf[4096]; /* cheat */
+               iconv_t conv = iconv_open("utf-8", charset);
+
+               if (conv == (iconv_t) -1) {
+                       fprintf(stderr, "cannot convert from %s to utf-8\n",
+                               charset);
+                       *charset = 0;
+                       return;
+               }
+               in = line;
+               insize = strlen(in);
+               out = outbuf;
+               outsize = sizeof(outbuf);
+               nrc = iconv(conv, &in, &insize, &out, &outsize);
+               iconv_close(conv);
+               if (nrc == (size_t) -1)
+                       return;
+               *out = 0;
+               strcpy(line, outbuf);
+       }
+}
+
+static void decode_header_bq(char *it)
+{
+       char *in, *out, *ep, *cp, *sp;
+       char outbuf[1000];
+
+       in = it;
+       out = outbuf;
+       while ((ep = strstr(in, "=?")) != NULL) {
+               int sz, encoding;
+               char charset_q[256], piecebuf[256];
+               if (in != ep) {
+                       sz = ep - in;
+                       memcpy(out, in, sz);
+                       out += sz;
+                       in += sz;
+               }
+               /* E.g.
+                * ep : "=?iso-2022-jp?B?GyR...?= foo"
+                * ep : "=?ISO-8859-1?Q?Foo=FCbar?= baz"
+                */
+               ep += 2;
+               cp = strchr(ep, '?');
+               if (!cp)
+                       return; /* no munging */
+               for (sp = ep; sp < cp; sp++)
+                       charset_q[sp - ep] = tolower(*sp);
+               charset_q[cp - ep] = 0;
+               encoding = cp[1];
+               if (!encoding || cp[2] != '?')
+                       return; /* no munging */
+               ep = strstr(cp + 3, "?=");
+               if (!ep)
+                       return; /* no munging */
+               switch (tolower(encoding)) {
+               default:
+                       return; /* no munging */
+               case 'b':
+                       sz = decode_b_segment(cp + 3, piecebuf, ep);
+                       break;
+               case 'q':
+                       sz = decode_q_segment(cp + 3, piecebuf, ep);
+                       break;
+               }
+               if (sz < 0)
+                       return;
+               if (metainfo_utf8)
+                       convert_to_utf8(piecebuf, charset_q);
+               strcpy(out, piecebuf);
+               out += strlen(out);
+               in = ep + 2;
+       }
+       strcpy(out, in);
+       strcpy(it, outbuf);
+}
+
+static void decode_transfer_encoding(char *line)
+{
+       char *ep;
+
+       switch (transfer_encoding) {
+       case TE_QP:
+               ep = line + strlen(line);
+               decode_q_segment(line, line, ep);
+               break;
+       case TE_BASE64:
+               ep = line + strlen(line);
+               decode_b_segment(line, line, ep);
+               break;
+       case TE_DONTCARE:
+               break;
+       }
+}
+
+static void handle_info(void)
+{
+       char *sub;
+       static int done_info = 0;
+
+       if (done_info)
+               return;
+
+       done_info = 1;
+       sub = cleanup_subject(subject);
+       cleanup_space(name);
+       cleanup_space(date);
+       cleanup_space(email);
+       cleanup_space(sub);
+
+       /* Unwrap inline B and Q encoding, and optionally
+        * normalize the meta information to utf8.
+        */
+       decode_header_bq(name);
+       decode_header_bq(date);
+       decode_header_bq(email);
+       decode_header_bq(sub);
+       printf("Author: %s\nEmail: %s\nSubject: %s\nDate: %s\n\n",
+              name, email, sub, date);
+}
+
+/* We are inside message body and have read line[] already.
+ * Spit out the commit log.
+ */
+static int handle_commit_msg(void)
+{
+       if (!cmitmsg)
+               return 0;
+       do {
+               if (!memcmp("diff -", line, 6) ||
+                   !memcmp("---", line, 3) ||
+                   !memcmp("Index: ", line, 7))
+                       break;
+               if ((multipart_boundary[0] && is_multipart_boundary(line))) {
+                       /* We come here when the first part had only
+                        * the commit message without any patch.  We
+                        * pretend we have not seen this line yet, and
+                        * go back to the loop.
+                        */
+                       return 1;
+               }
+
+               /* Unwrap transfer encoding and optionally
+                * normalize the log message to UTF-8.
+                */
+               decode_transfer_encoding(line);
+               if (metainfo_utf8)
+                       convert_to_utf8(line, charset);
+               fputs(line, cmitmsg);
+       } while (fgets(line, sizeof(line), stdin) != NULL);
+       fclose(cmitmsg);
+       cmitmsg = NULL;
+       return 0;
+}
+
+/* We have done the commit message and have the first
+ * line of the patch in line[].
+ */
+static void handle_patch(void)
+{
+       do {
+               if (multipart_boundary[0] && is_multipart_boundary(line))
+                       break;
+               /* Only unwrap transfer encoding but otherwise do not
+                * do anything.  We do *NOT* want UTF-8 conversion
+                * here; we are dealing with the user payload.
+                */
+               decode_transfer_encoding(line);
+               fputs(line, patchfile);
+               patch_lines++;
+       } while (fgets(line, sizeof(line), stdin) != NULL);
+}
+
+/* multipart boundary and transfer encoding are set up for us, and we
+ * are at the end of the sub header.  do equivalent of handle_body up
+ * to the next boundary without closing patchfile --- we will expect
+ * that the first part to contain commit message and a patch, and
+ * handle other parts as pure patches.
+ */
+static int handle_multipart_one_part(void)
+{
+       int seen = 0;
+       int n = 0;
+       int len;
+
+       while (fgets(line, sizeof(line), stdin) != NULL) {
+       again:
+               len = eatspace(line);
+               n++;
+               if (!len)
+                       continue;
+               if (is_multipart_boundary(line))
+                       break;
+               if (0 <= seen && handle_inbody_header(&seen, line))
+                       continue;
+               seen = -1; /* no more inbody headers */
+               line[len] = '\n';
+               handle_info();
+               if (handle_commit_msg())
+                       goto again;
+               handle_patch();
+               break;
+       }
+       if (n == 0)
+               return -1;
+       return 0;
+}
+
+static void handle_multipart_body(void)
+{
+       int part_num = 0;
+
+       /* Skip up to the first boundary */
+       while (fgets(line, sizeof(line), stdin) != NULL)
+               if (is_multipart_boundary(line)) {
+                       part_num = 1;
+                       break;
+               }
+       if (!part_num)
+               return;
+       /* We are on boundary line.  Start slurping the subhead. */
+       while (1) {
+               int len = read_one_header_line(line, sizeof(line), stdin);
+               if (!len) {
+                       if (handle_multipart_one_part() < 0)
+                               return;
+               }
+               else
+                       check_subheader_line(line, len);
+       }
+       fclose(patchfile);
+       if (!patch_lines) {
+               fprintf(stderr, "No patch found\n");
+               exit(1);
+       }
+}
+
+/* Non multipart message */
+static void handle_body(void)
+{
+       int seen = 0;
+
+       while (fgets(line, sizeof(line), stdin) != NULL) {
+               int len = eatspace(line);
+               if (!len)
+                       continue;
+               if (0 <= seen && handle_inbody_header(&seen, line))
+                       continue;
+               seen = -1; /* no more inbody headers */
+               line[len] = '\n';
+               handle_info();
+               handle_commit_msg();
+               handle_patch();
+               break;
+       }
+       fclose(patchfile);
+       if (!patch_lines) {
+               fprintf(stderr, "No patch found\n");
+               exit(1);
+       }
+}
+
+static const char mailinfo_usage[] =
+       "git-mailinfo [-k] [-u] msg patch <mail >info";
+
+static void usage(void) {
+       fprintf(stderr, "%s\n", mailinfo_usage);
+       exit(1);
+}
+
+int main(int argc, char **argv)
+{
+       while (1 < argc && argv[1][0] == '-') {
+               if (!strcmp(argv[1], "-k"))
+                       keep_subject = 1;
+               else if (!strcmp(argv[1], "-u"))
+                       metainfo_utf8 = 1;
+               else
+                       usage();
+               argc--; argv++;
+       }
+
+       if (argc != 3)
+               usage();
+       cmitmsg = fopen(argv[1], "w");
+       if (!cmitmsg) {
+               perror(argv[1]);
+               exit(1);
+       }
+       patchfile = fopen(argv[2], "w");
+       if (!patchfile) {
+               perror(argv[2]);
+               exit(1);
+       }
+       while (1) {
+               int len = read_one_header_line(line, sizeof(line), stdin);
+               if (!len) {
+                       if (multipart_boundary[0])
+                               handle_multipart_body();
+                       else
+                               handle_body();
+                       break;
+               }
+               check_header_line(line, len);
+       }
+       return 0;
+}
diff --git a/mailsplit.c b/mailsplit.c
new file mode 100644 (file)
index 0000000..a3238c2
--- /dev/null
@@ -0,0 +1,145 @@
+/*
+ * Totally braindamaged mbox splitter program.
+ *
+ * It just splits a mbox into a list of files: "0001" "0002" ..
+ * so you can process them further from there.
+ */
+#include <unistd.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <assert.h>
+
+static int usage(void)
+{
+       fprintf(stderr, "mailsplit <mbox> <directory>\n");
+       exit(1);
+}
+
+static int linelen(const char *map, unsigned long size)
+{
+       int len = 0, c;
+
+       do {
+               c = *map;
+               map++;
+               size--;
+               len++;
+       } while (size && c != '\n');
+       return len;
+}
+
+static int is_from_line(const char *line, int len)
+{
+       const char *colon;
+
+       if (len < 20 || memcmp("From ", line, 5))
+               return 0;
+
+       colon = line + len - 2;
+       line += 5;
+       for (;;) {
+               if (colon < line)
+                       return 0;
+               if (*--colon == ':')
+                       break;
+       }
+
+       if (!isdigit(colon[-4]) ||
+           !isdigit(colon[-2]) ||
+           !isdigit(colon[-1]) ||
+           !isdigit(colon[ 1]) ||
+           !isdigit(colon[ 2]))
+               return 0;
+
+       /* year */
+       if (strtol(colon+3, NULL, 10) <= 90)
+               return 0;
+
+       /* Ok, close enough */
+       return 1;
+}
+
+static int parse_email(const void *map, unsigned long size)
+{
+       unsigned long offset;
+
+       if (size < 6 || memcmp("From ", map, 5))
+               goto corrupt;
+
+       /* Make sure we don't trigger on this first line */
+       map++; size--; offset=1;
+
+       /*
+        * Search for a line beginning with "From ", and 
+        * having something that looks like a date format.
+        */
+       do {
+               int len = linelen(map, size);
+               if (is_from_line(map, len))
+                       return offset;
+               map += len;
+               size -= len;
+               offset += len;
+       } while (size);
+       return offset;
+
+corrupt:
+       fprintf(stderr, "corrupt mailbox\n");
+       exit(1);
+}
+
+int main(int argc, char **argv)
+{
+       int fd, nr;
+       struct stat st;
+       unsigned long size;
+       void *map;
+
+       if (argc != 3)
+               usage();
+       fd = open(argv[1], O_RDONLY);
+       if (fd < 0) {
+               perror(argv[1]);
+               exit(1);
+       }
+       if (chdir(argv[2]) < 0)
+               usage();
+       if (fstat(fd, &st) < 0) {
+               perror("stat");
+               exit(1);
+       }
+       size = st.st_size;
+       map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
+       if (map == MAP_FAILED) {
+               perror("mmap");
+               close(fd);
+               exit(1);
+       }
+       close(fd);
+       nr = 0;
+       do {
+               char name[10];
+               unsigned long len = parse_email(map, size);
+               assert(len <= size);
+               sprintf(name, "%04d", ++nr);
+               fd = open(name, O_WRONLY | O_CREAT | O_EXCL, 0600);
+               if (fd < 0) {
+                       perror(name);
+                       exit(1);
+               }
+               if (write(fd, map, len) != len) {
+                       perror("write");
+                       exit(1);
+               }
+               close(fd);
+               map += len;
+               size -= len;
+       } while (size > 0);
+       return 0;
+}
diff --git a/tools/.gitignore b/tools/.gitignore
deleted file mode 100644 (file)
index d1ea9ea..0000000
+++ /dev/null
@@ -1,2 +0,0 @@
-git-mailinfo
-git-mailsplit
diff --git a/tools/Makefile b/tools/Makefile
deleted file mode 100644 (file)
index 5cc6d14..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-#
-# Make Linus git-tools
-#
-CC=gcc
-CFLAGS = -O2 -g -Wall
-ALL_CFLAGS = $(CFLAGS)
-INSTALL=install
-prefix=$(HOME)
-bindir=$(prefix)/bin
-# DESTDIR=
-
-PROGRAMS=git-mailsplit git-mailinfo
-SCRIPTS=git-applymbox git-applypatch
-
-git-%: %.c
-       $(CC) $(ALL_CFLAGS) -o $@ $(filter %.c,$^)
-
-all: $(PROGRAMS)
-
-install: $(PROGRAMS) $(SCRIPTS)
-       $(INSTALL) -m755 -d $(DESTDIR)$(bindir)
-       $(INSTALL) $(PROGRAMS) $(SCRIPTS) $(DESTDIR)$(bindir)
-
-clean:
-       rm -f $(PROGRAMS) *.o
diff --git a/tools/git-applymbox b/tools/git-applymbox
deleted file mode 100755 (executable)
index e58bb21..0000000
+++ /dev/null
@@ -1,109 +0,0 @@
-#!/bin/sh
-##
-## "dotest" is my stupid name for my patch-application script, which
-## I never got around to renaming after I tested it. We're now on the
-## second generation of scripts, still called "dotest".
-##
-## Update: Ryan Anderson finally shamed me into naming this "applymbox".
-##
-## You give it a mbox-format collection of emails, and it will try to
-## apply them to the kernel using "applypatch"
-##
-## applymbox [-u] [-k] [-q] (-c .dotest/msg-number | mail_archive) [Signoff_file]"
-##
-## The patch application may fail in the middle.  In which case:
-## (1) look at .dotest/patch and fix it up to apply
-## (2) re-run applymbox with -c .dotest/msg-number for the current one.
-## Pay a special attention to the commit log message if you do this and
-## use a Signoff_file, because applypatch wants to append the sign-off
-## message to msg-clean every time it is run.
-
-. git-sh-setup-script || die "Not a git archive"
-
-usage () {
-    echo >&2 "applymbox [-u] [-k] [-q] (-c .dotest/<num> | mbox) [signoff]"
-    exit 1
-}
-
-keep_subject= query_apply= continue= utf8= resume=t
-while case "$#" in 0) break ;; esac
-do
-       case "$1" in
-       -u)     utf8=-u ;;
-       -k)     keep_subject=-k ;;
-       -q)     query_apply=t ;;
-       -c)     continue="$2"; resume=f; shift ;;
-       -*)     usage ;;
-       *)      break ;;
-       esac
-       shift
-done
-
-case "$continue" in
-'')
-       rm -rf .dotest
-       mkdir .dotest
-       git-mailsplit "$1" .dotest || exit 1
-       shift
-esac
-
-files=$(git-diff-cache --cached --name-only HEAD) || exit
-if [ "$files" ]; then
-   echo "Dirty index: cannot apply patches (dirty: $files)" >&2
-   exit 1
-fi
-
-case "$query_apply" in
-t)     touch .dotest/.query_apply
-esac
-case "$keep_subject" in
--k)    : >.dotest/.keep_subject
-esac
-
-signoff="$1"
-set x .dotest/0*
-shift
-while case "$#" in 0) break;; esac
-do
-    i="$1" 
-    case "$resume,$continue" in
-    f,$i)      resume=t;;
-    f,*)       shift
-               continue;;
-    *)
-           git-mailinfo $keep_subject $utf8 \
-               .dotest/msg .dotest/patch <$i >.dotest/info || exit 1
-           git-stripspace < .dotest/msg > .dotest/msg-clean
-           ;;
-    esac
-    while :; # for fixing up and retry
-    do
-       git-applypatch .dotest/msg-clean .dotest/patch .dotest/info "$signoff"
-       case "$?" in
-       0 | 2 )
-               # 2 is a special exit code from applypatch to indicate that
-               # the patch wasn't applied, but continue anyway 
-               ;;
-       *)
-               ret=$?
-               if test -f .dotest/.query_apply
-               then
-                       echo >&2 "* Patch failed."
-                       echo >&2 "* You could fix it up in your editor and"
-                       echo >&2 "  retry.  If you want to do so, say yes here"
-                       echo >&2 "  AFTER fixing .dotest/patch up."
-                       echo >&2 -n "Retry [y/N]? "
-                       read yesno
-                       case "$yesno" in
-                       [Yy]*)
-                               continue ;;
-                       esac
-               fi
-               exit $ret
-       esac
-       break
-    done
-    shift
-done
-# return to pristine
-rm -fr .dotest
diff --git a/tools/git-applypatch b/tools/git-applypatch
deleted file mode 100755 (executable)
index e5bc3c0..0000000
+++ /dev/null
@@ -1,118 +0,0 @@
-#!/bin/sh
-##
-## applypatch takes four file arguments, and uses those to
-## apply the unpacked patch (surprise surprise) that they
-## represent to the current tree.
-##
-## The arguments are:
-##     $1 - file with commit message
-##     $2 - file with the actual patch
-##     $3 - "info" file with Author, email and subject
-##     $4 - optional file containing signoff to add
-##
-. git-sh-setup-script || die "Not a git archive."
-
-final=.dotest/final-commit
-##
-## If this file exists, we ask before applying
-##
-query_apply=.dotest/.query_apply
-
-## We do not munge the first line of the commit message too much
-## if this file exists.
-keep_subject=.dotest/.keep_subject
-
-
-MSGFILE=$1
-PATCHFILE=$2
-INFO=$3
-SIGNOFF=$4
-EDIT=${VISUAL:-${EDITOR:-vi}}
-
-export GIT_AUTHOR_NAME="$(sed -n '/^Author/ s/Author: //p' .dotest/info)"
-export GIT_AUTHOR_EMAIL="$(sed -n '/^Email/ s/Email: //p' .dotest/info)"
-export GIT_AUTHOR_DATE="$(sed -n '/^Date/ s/Date: //p' .dotest/info)"
-export SUBJECT="$(sed -n '/^Subject/ s/Subject: //p' .dotest/info)"
-
-if test '' != "$SIGNOFF"
-then
-       if test -f "$SIGNOFF"
-       then
-               SIGNOFF=`cat "$SIGNOFF"` || exit
-       elif case "$SIGNOFF" in yes | true | me | please) : ;; *) false ;; esac
-       then
-               SIGNOFF=`git-var GIT_COMMITTER_IDENT | sed -e '
-                               s/>.*/>/
-                               s/^/Signed-off-by: /'
-               `
-       else
-               SIGNOFF=
-       fi
-       if test '' != "$SIGNOFF"
-       then
-               LAST_SIGNED_OFF_BY=`
-                       sed -ne '/^Signed-off-by: /p' "$MSGFILE" |
-                       tail -n 1
-               `
-               test "$LAST_SIGNED_OFF_BY" = "$SIGNOFF" ||
-               echo "$SIGNOFF" >>"$MSGFILE"
-       fi
-fi
-
-patch_header=
-test -f "$keep_subject" || patch_header='[PATCH] '
-
-{
-       echo "$patch_header$SUBJECT"
-       if test -s "$MSGFILE"
-       then
-               echo
-               cat "$MSGFILE"
-       fi
-} >"$final"
-
-interactive=yes
-test -f "$query_apply" || interactive=no
-
-while [ "$interactive" = yes ]; do
-       echo "Commit Body is:"
-       echo "--------------------------"
-       cat "$final"
-       echo "--------------------------"
-       echo -n "Apply? [y]es/[n]o/[e]dit/[a]ccept all "
-       read reply
-       case "$reply" in
-               y|Y) interactive=no;;
-               n|N) exit 2;;   # special value to tell dotest to keep going
-               e|E) "$EDIT" "$final";;
-               a|A) rm -f "$query_apply"
-                    interactive=no ;;
-       esac
-done
-
-if test -x "$GIT_DIR"/hooks/applypatch-msg
-then
-       "$GIT_DIR"/hooks/applypatch-msg "$final" || exit
-fi
-
-echo
-echo Applying "'$SUBJECT'"
-echo
-
-git-apply --index "$PATCHFILE" || exit 1
-
-if test -x "$GIT_DIR"/hooks/pre-applypatch
-then
-       "$GIT_DIR"/hooks/pre-applypatch || exit
-fi
-
-tree=$(git-write-tree) || exit 1
-echo Wrote tree $tree
-commit=$(git-commit-tree $tree -p $(cat "$GIT_DIR"/HEAD) < "$final") || exit 1
-echo Committed: $commit
-echo $commit > "$GIT_DIR"/HEAD
-
-if test -x "$GIT_DIR"/hooks/post-applypatch
-then
-       "$GIT_DIR"/hooks/post-applypatch
-fi
diff --git a/tools/mailinfo.c b/tools/mailinfo.c
deleted file mode 100644 (file)
index df470bb..0000000
+++ /dev/null
@@ -1,749 +0,0 @@
-/*
- * Another stupid program, this one parsing the headers of an
- * email to figure out authorship and subject
- */
-#define _GNU_SOURCE
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#include <iconv.h>
-
-static FILE *cmitmsg, *patchfile;
-
-static int keep_subject = 0;
-static int metainfo_utf8 = 0;
-static char line[1000];
-static char date[1000];
-static char name[1000];
-static char email[1000];
-static char subject[1000];
-
-static enum  {
-       TE_DONTCARE, TE_QP, TE_BASE64,
-} transfer_encoding;
-static char charset[256];
-
-static char multipart_boundary[1000];
-static int multipart_boundary_len;
-static int patch_lines = 0;
-
-static char *sanity_check(char *name, char *email)
-{
-       int len = strlen(name);
-       if (len < 3 || len > 60)
-               return email;
-       if (strchr(name, '@') || strchr(name, '<') || strchr(name, '>'))
-               return email;
-       return name;
-}
-
-static int handle_from(char *line)
-{
-       char *at = strchr(line, '@');
-       char *dst;
-
-       if (!at)
-               return 0;
-
-       /*
-        * If we already have one email, don't take any confusing lines
-        */
-       if (*email && strchr(at+1, '@'))
-               return 0;
-
-       /* Pick up the string around '@', possibly delimited with <>
-        * pair; that is the email part.  White them out while copying.
-        */
-       while (at > line) {
-               char c = at[-1];
-               if (isspace(c))
-                       break;
-               if (c == '<') {
-                       at[-1] = ' ';
-                       break;
-               }
-               at--;
-       }
-       dst = email;
-       for (;;) {
-               unsigned char c = *at;
-               if (!c || c == '>' || isspace(c)) {
-                       if (c == '>')
-                               *at = ' ';
-                       break;
-               }
-               *at++ = ' ';
-               *dst++ = c;
-       }
-       *dst++ = 0;
-
-       /* The remainder is name.  It could be "John Doe <john.doe@xz>"
-        * or "john.doe@xz (John Doe)", but we have whited out the
-        * email part, so trim from both ends, possibly removing
-        * the () pair at the end.
-        */
-       at = line + strlen(line);
-       while (at > line) {
-               unsigned char c = *--at;
-               if (!isspace(c)) {
-                       at[(c == ')') ? 0 : 1] = 0;
-                       break;
-               }
-       }
-
-       at = line;
-       for (;;) {
-               unsigned char c = *at;
-               if (!c || !isspace(c)) {
-                       if (c == '(')
-                               at++;
-                       break;
-               }
-               at++;
-       }
-       at = sanity_check(at, email);
-       strcpy(name, at);
-       return 1;
-}
-
-static int handle_date(char *line)
-{
-       strcpy(date, line);
-       return 0;
-}
-
-static int handle_subject(char *line)
-{
-       strcpy(subject, line);
-       return 0;
-}
-
-/* NOTE NOTE NOTE.  We do not claim we do full MIME.  We just attempt
- * to have enough heuristics to grok MIME encoded patches often found
- * on our mailing lists.  For example, we do not even treat header lines
- * case insensitively.
- */
-
-static int slurp_attr(const char *line, const char *name, char *attr)
-{
-       char *ends, *ap = strcasestr(line, name);
-       size_t sz;
-
-       if (!ap) {
-               *attr = 0;
-               return 0;
-       }
-       ap += strlen(name);
-       if (*ap == '"') {
-               ap++;
-               ends = "\"";
-       }
-       else
-               ends = "; \t";
-       sz = strcspn(ap, ends);
-       memcpy(attr, ap, sz);
-       attr[sz] = 0;
-       return 1;
-}
-
-static int handle_subcontent_type(char *line)
-{
-       /* We do not want to mess with boundary.  Note that we do not
-        * handle nested multipart.
-        */
-       if (strcasestr(line, "boundary=")) {
-               fprintf(stderr, "Not handling nested multipart message.\n");
-               exit(1);
-       }
-       slurp_attr(line, "charset=", charset);
-       if (*charset) {
-               int i, c;
-               for (i = 0; (c = charset[i]) != 0; i++)
-                       charset[i] = tolower(c);
-       }
-       return 0;
-}
-
-static int handle_content_type(char *line)
-{
-       *multipart_boundary = 0;
-       if (slurp_attr(line, "boundary=", multipart_boundary + 2)) {
-               memcpy(multipart_boundary, "--", 2);
-               multipart_boundary_len = strlen(multipart_boundary);
-       }
-       slurp_attr(line, "charset=", charset);
-       return 0;
-}
-
-static int handle_content_transfer_encoding(char *line)
-{
-       if (strcasestr(line, "base64"))
-               transfer_encoding = TE_BASE64;
-       else if (strcasestr(line, "quoted-printable"))
-               transfer_encoding = TE_QP;
-       else
-               transfer_encoding = TE_DONTCARE;
-       return 0;
-}
-
-static int is_multipart_boundary(const char *line)
-{
-       return (!memcmp(line, multipart_boundary, multipart_boundary_len));
-}
-
-static int eatspace(char *line)
-{
-       int len = strlen(line);
-       while (len > 0 && isspace(line[len-1]))
-               line[--len] = 0;
-       return len;
-}
-
-#define SEEN_FROM 01
-#define SEEN_DATE 02
-#define SEEN_SUBJECT 04
-
-/* First lines of body can have From:, Date:, and Subject: */
-static int handle_inbody_header(int *seen, char *line)
-{
-       if (!memcmp("From:", line, 5) && isspace(line[5])) {
-               if (!(*seen & SEEN_FROM) && handle_from(line+6)) {
-                       *seen |= SEEN_FROM;
-                       return 1;
-               }
-       }
-       if (!memcmp("Date:", line, 5) && isspace(line[5])) {
-               if (!(*seen & SEEN_DATE)) {
-                       handle_date(line+6);
-                       *seen |= SEEN_DATE;
-                       return 1;
-               }
-       }
-       if (!memcmp("Subject:", line, 8) && isspace(line[8])) {
-               if (!(*seen & SEEN_SUBJECT)) {
-                       handle_subject(line+9);
-                       *seen |= SEEN_SUBJECT;
-                       return 1;
-               }
-       }
-       if (!memcmp("[PATCH]", line, 7) && isspace(line[7])) {
-               if (!(*seen & SEEN_SUBJECT)) {
-                       handle_subject(line);
-                       *seen |= SEEN_SUBJECT;
-                       return 1;
-               }
-       }
-       return 0;
-}
-
-static char *cleanup_subject(char *subject)
-{
-       if (keep_subject)
-               return subject;
-       for (;;) {
-               char *p;
-               int len, remove;
-               switch (*subject) {
-               case 'r': case 'R':
-                       if (!memcmp("e:", subject+1, 2)) {
-                               subject +=3;
-                               continue;
-                       }
-                       break;
-               case ' ': case '\t': case ':':
-                       subject++;
-                       continue;
-
-               case '[':
-                       p = strchr(subject, ']');
-                       if (!p) {
-                               subject++;
-                               continue;
-                       }
-                       len = strlen(p);
-                       remove = p - subject;
-                       if (remove <= len *2) {
-                               subject = p+1;
-                               continue;
-                       }       
-                       break;
-               }
-               return subject;
-       }
-}                      
-
-static void cleanup_space(char *buf)
-{
-       unsigned char c;
-       while ((c = *buf) != 0) {
-               buf++;
-               if (isspace(c)) {
-                       buf[-1] = ' ';
-                       c = *buf;
-                       while (isspace(c)) {
-                               int len = strlen(buf);
-                               memmove(buf, buf+1, len);
-                               c = *buf;
-                       }
-               }
-       }
-}
-
-typedef int (*header_fn_t)(char *);
-struct header_def {
-       const char *name;
-       header_fn_t func;
-       int namelen;
-};
-
-static void check_header(char *line, int len, struct header_def *header)
-{
-       int i;
-
-       if (header[0].namelen <= 0) {
-               for (i = 0; header[i].name; i++)
-                       header[i].namelen = strlen(header[i].name);
-       }
-       for (i = 0; header[i].name; i++) {
-               int len = header[i].namelen;
-               if (!strncasecmp(line, header[i].name, len) &&
-                   line[len] == ':' && isspace(line[len + 1])) {
-                       header[i].func(line + len + 2);
-                       break;
-               }
-       }
-}
-
-static void check_subheader_line(char *line, int len)
-{
-       static struct header_def header[] = {
-               { "Content-Type", handle_subcontent_type },
-               { "Content-Transfer-Encoding",
-                 handle_content_transfer_encoding },
-               { NULL },
-       };
-       check_header(line, len, header);
-}
-static void check_header_line(char *line, int len)
-{
-       static struct header_def header[] = {
-               { "From", handle_from },
-               { "Date", handle_date },
-               { "Subject", handle_subject },
-               { "Content-Type", handle_content_type },
-               { "Content-Transfer-Encoding",
-                 handle_content_transfer_encoding },
-               { NULL },
-       };
-       check_header(line, len, header);
-}
-
-static int read_one_header_line(char *line, int sz, FILE *in)
-{
-       int ofs = 0;
-       while (ofs < sz) {
-               int peek, len;
-               if (fgets(line + ofs, sz - ofs, in) == NULL)
-                       return ofs;
-               len = eatspace(line + ofs);
-               if (len == 0)
-                       return ofs;
-               peek = fgetc(in); ungetc(peek, in);
-               if (peek == ' ' || peek == '\t') {
-                       /* Yuck, 2822 header "folding" */
-                       ofs += len;
-                       continue;
-               }
-               return ofs + len;
-       }
-       return ofs;
-}
-
-static unsigned hexval(int c)
-{
-       if (c >= '0' && c <= '9')
-               return c - '0';
-       if (c >= 'a' && c <= 'f')
-               return c - 'a' + 10;
-       if (c >= 'A' && c <= 'F')
-               return c - 'A' + 10;
-       return ~0;
-}
-
-static int decode_q_segment(char *in, char *ot, char *ep)
-{
-       int c;
-       while ((c = *in++) != 0 && (in <= ep)) {
-               if (c == '=') {
-                       int d = *in++;
-                       if (d == '\n' || !d)
-                               break; /* drop trailing newline */
-                       *ot++ = ((hexval(d) << 4) | hexval(*in++));
-               }
-               else
-                       *ot++ = c;
-       }
-       *ot = 0;
-       return 0;
-}
-
-static int decode_b_segment(char *in, char *ot, char *ep)
-{
-       /* Decode in..ep, possibly in-place to ot */
-       int c, pos = 0, acc = 0;
-
-       while ((c = *in++) != 0 && (in <= ep)) {
-               if (c == '+')
-                       c = 62;
-               else if (c == '/')
-                       c = 63;
-               else if ('A' <= c && c <= 'Z')
-                       c -= 'A';
-               else if ('a' <= c && c <= 'z')
-                       c -= 'a' - 26;
-               else if ('0' <= c && c <= '9')
-                       c -= '0' - 52;
-               else if (c == '=') {
-                       /* padding is almost like (c == 0), except we do
-                        * not output NUL resulting only from it;
-                        * for now we just trust the data.
-                        */
-                       c = 0;
-               }
-               else
-                       continue; /* garbage */
-               switch (pos++) {
-               case 0:
-                       acc = (c << 2);
-                       break;
-               case 1:
-                       *ot++ = (acc | (c >> 4));
-                       acc = (c & 15) << 4;
-                       break;
-               case 2:
-                       *ot++ = (acc | (c >> 2));
-                       acc = (c & 3) << 6;
-                       break;
-               case 3:
-                       *ot++ = (acc | c);
-                       acc = pos = 0;
-                       break;
-               }
-       }
-       *ot = 0;
-       return 0;
-}
-
-static void convert_to_utf8(char *line, char *charset)
-{
-       if (*charset) {
-               char *in, *out;
-               size_t insize, outsize, nrc;
-               char outbuf[4096]; /* cheat */
-               iconv_t conv = iconv_open("utf-8", charset);
-
-               if (conv == (iconv_t) -1) {
-                       fprintf(stderr, "cannot convert from %s to utf-8\n",
-                               charset);
-                       *charset = 0;
-                       return;
-               }
-               in = line;
-               insize = strlen(in);
-               out = outbuf;
-               outsize = sizeof(outbuf);
-               nrc = iconv(conv, &in, &insize, &out, &outsize);
-               iconv_close(conv);
-               if (nrc == (size_t) -1)
-                       return;
-               *out = 0;
-               strcpy(line, outbuf);
-       }
-}
-
-static void decode_header_bq(char *it)
-{
-       char *in, *out, *ep, *cp, *sp;
-       char outbuf[1000];
-
-       in = it;
-       out = outbuf;
-       while ((ep = strstr(in, "=?")) != NULL) {
-               int sz, encoding;
-               char charset_q[256], piecebuf[256];
-               if (in != ep) {
-                       sz = ep - in;
-                       memcpy(out, in, sz);
-                       out += sz;
-                       in += sz;
-               }
-               /* E.g.
-                * ep : "=?iso-2022-jp?B?GyR...?= foo"
-                * ep : "=?ISO-8859-1?Q?Foo=FCbar?= baz"
-                */
-               ep += 2;
-               cp = strchr(ep, '?');
-               if (!cp)
-                       return; /* no munging */
-               for (sp = ep; sp < cp; sp++)
-                       charset_q[sp - ep] = tolower(*sp);
-               charset_q[cp - ep] = 0;
-               encoding = cp[1];
-               if (!encoding || cp[2] != '?')
-                       return; /* no munging */
-               ep = strstr(cp + 3, "?=");
-               if (!ep)
-                       return; /* no munging */
-               switch (tolower(encoding)) {
-               default:
-                       return; /* no munging */
-               case 'b':
-                       sz = decode_b_segment(cp + 3, piecebuf, ep);
-                       break;
-               case 'q':
-                       sz = decode_q_segment(cp + 3, piecebuf, ep);
-                       break;
-               }
-               if (sz < 0)
-                       return;
-               if (metainfo_utf8)
-                       convert_to_utf8(piecebuf, charset_q);
-               strcpy(out, piecebuf);
-               out += strlen(out);
-               in = ep + 2;
-       }
-       strcpy(out, in);
-       strcpy(it, outbuf);
-}
-
-static void decode_transfer_encoding(char *line)
-{
-       char *ep;
-
-       switch (transfer_encoding) {
-       case TE_QP:
-               ep = line + strlen(line);
-               decode_q_segment(line, line, ep);
-               break;
-       case TE_BASE64:
-               ep = line + strlen(line);
-               decode_b_segment(line, line, ep);
-               break;
-       case TE_DONTCARE:
-               break;
-       }
-}
-
-static void handle_info(void)
-{
-       char *sub;
-       static int done_info = 0;
-
-       if (done_info)
-               return;
-
-       done_info = 1;
-       sub = cleanup_subject(subject);
-       cleanup_space(name);
-       cleanup_space(date);
-       cleanup_space(email);
-       cleanup_space(sub);
-
-       /* Unwrap inline B and Q encoding, and optionally
-        * normalize the meta information to utf8.
-        */
-       decode_header_bq(name);
-       decode_header_bq(date);
-       decode_header_bq(email);
-       decode_header_bq(sub);
-       printf("Author: %s\nEmail: %s\nSubject: %s\nDate: %s\n\n",
-              name, email, sub, date);
-}
-
-/* We are inside message body and have read line[] already.
- * Spit out the commit log.
- */
-static int handle_commit_msg(void)
-{
-       if (!cmitmsg)
-               return 0;
-       do {
-               if (!memcmp("diff -", line, 6) ||
-                   !memcmp("---", line, 3) ||
-                   !memcmp("Index: ", line, 7))
-                       break;
-               if ((multipart_boundary[0] && is_multipart_boundary(line))) {
-                       /* We come here when the first part had only
-                        * the commit message without any patch.  We
-                        * pretend we have not seen this line yet, and
-                        * go back to the loop.
-                        */
-                       return 1;
-               }
-
-               /* Unwrap transfer encoding and optionally
-                * normalize the log message to UTF-8.
-                */
-               decode_transfer_encoding(line);
-               if (metainfo_utf8)
-                       convert_to_utf8(line, charset);
-               fputs(line, cmitmsg);
-       } while (fgets(line, sizeof(line), stdin) != NULL);
-       fclose(cmitmsg);
-       cmitmsg = NULL;
-       return 0;
-}
-
-/* We have done the commit message and have the first
- * line of the patch in line[].
- */
-static void handle_patch(void)
-{
-       do {
-               if (multipart_boundary[0] && is_multipart_boundary(line))
-                       break;
-               /* Only unwrap transfer encoding but otherwise do not
-                * do anything.  We do *NOT* want UTF-8 conversion
-                * here; we are dealing with the user payload.
-                */
-               decode_transfer_encoding(line);
-               fputs(line, patchfile);
-               patch_lines++;
-       } while (fgets(line, sizeof(line), stdin) != NULL);
-}
-
-/* multipart boundary and transfer encoding are set up for us, and we
- * are at the end of the sub header.  do equivalent of handle_body up
- * to the next boundary without closing patchfile --- we will expect
- * that the first part to contain commit message and a patch, and
- * handle other parts as pure patches.
- */
-static int handle_multipart_one_part(void)
-{
-       int seen = 0;
-       int n = 0;
-       int len;
-
-       while (fgets(line, sizeof(line), stdin) != NULL) {
-       again:
-               len = eatspace(line);
-               n++;
-               if (!len)
-                       continue;
-               if (is_multipart_boundary(line))
-                       break;
-               if (0 <= seen && handle_inbody_header(&seen, line))
-                       continue;
-               seen = -1; /* no more inbody headers */
-               line[len] = '\n';
-               handle_info();
-               if (handle_commit_msg())
-                       goto again;
-               handle_patch();
-               break;
-       }
-       if (n == 0)
-               return -1;
-       return 0;
-}
-
-static void handle_multipart_body(void)
-{
-       int part_num = 0;
-
-       /* Skip up to the first boundary */
-       while (fgets(line, sizeof(line), stdin) != NULL)
-               if (is_multipart_boundary(line)) {
-                       part_num = 1;
-                       break;
-               }
-       if (!part_num)
-               return;
-       /* We are on boundary line.  Start slurping the subhead. */
-       while (1) {
-               int len = read_one_header_line(line, sizeof(line), stdin);
-               if (!len) {
-                       if (handle_multipart_one_part() < 0)
-                               return;
-               }
-               else
-                       check_subheader_line(line, len);
-       }
-       fclose(patchfile);
-       if (!patch_lines) {
-               fprintf(stderr, "No patch found\n");
-               exit(1);
-       }
-}
-
-/* Non multipart message */
-static void handle_body(void)
-{
-       int seen = 0;
-
-       while (fgets(line, sizeof(line), stdin) != NULL) {
-               int len = eatspace(line);
-               if (!len)
-                       continue;
-               if (0 <= seen && handle_inbody_header(&seen, line))
-                       continue;
-               seen = -1; /* no more inbody headers */
-               line[len] = '\n';
-               handle_info();
-               handle_commit_msg();
-               handle_patch();
-               break;
-       }
-       fclose(patchfile);
-       if (!patch_lines) {
-               fprintf(stderr, "No patch found\n");
-               exit(1);
-       }
-}
-
-static const char mailinfo_usage[] =
-       "git-mailinfo [-k] [-u] msg patch <mail >info";
-
-static void usage(void) {
-       fprintf(stderr, "%s\n", mailinfo_usage);
-       exit(1);
-}
-
-int main(int argc, char **argv)
-{
-       while (1 < argc && argv[1][0] == '-') {
-               if (!strcmp(argv[1], "-k"))
-                       keep_subject = 1;
-               else if (!strcmp(argv[1], "-u"))
-                       metainfo_utf8 = 1;
-               else
-                       usage();
-               argc--; argv++;
-       }
-
-       if (argc != 3)
-               usage();
-       cmitmsg = fopen(argv[1], "w");
-       if (!cmitmsg) {
-               perror(argv[1]);
-               exit(1);
-       }
-       patchfile = fopen(argv[2], "w");
-       if (!patchfile) {
-               perror(argv[2]);
-               exit(1);
-       }
-       while (1) {
-               int len = read_one_header_line(line, sizeof(line), stdin);
-               if (!len) {
-                       if (multipart_boundary[0])
-                               handle_multipart_body();
-                       else
-                               handle_body();
-                       break;
-               }
-               check_header_line(line, len);
-       }
-       return 0;
-}
diff --git a/tools/mailsplit.c b/tools/mailsplit.c
deleted file mode 100644 (file)
index a3238c2..0000000
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Totally braindamaged mbox splitter program.
- *
- * It just splits a mbox into a list of files: "0001" "0002" ..
- * so you can process them further from there.
- */
-#include <unistd.h>
-#include <stdlib.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-#include <string.h>
-#include <stdio.h>
-#include <ctype.h>
-#include <assert.h>
-
-static int usage(void)
-{
-       fprintf(stderr, "mailsplit <mbox> <directory>\n");
-       exit(1);
-}
-
-static int linelen(const char *map, unsigned long size)
-{
-       int len = 0, c;
-
-       do {
-               c = *map;
-               map++;
-               size--;
-               len++;
-       } while (size && c != '\n');
-       return len;
-}
-
-static int is_from_line(const char *line, int len)
-{
-       const char *colon;
-
-       if (len < 20 || memcmp("From ", line, 5))
-               return 0;
-
-       colon = line + len - 2;
-       line += 5;
-       for (;;) {
-               if (colon < line)
-                       return 0;
-               if (*--colon == ':')
-                       break;
-       }
-
-       if (!isdigit(colon[-4]) ||
-           !isdigit(colon[-2]) ||
-           !isdigit(colon[-1]) ||
-           !isdigit(colon[ 1]) ||
-           !isdigit(colon[ 2]))
-               return 0;
-
-       /* year */
-       if (strtol(colon+3, NULL, 10) <= 90)
-               return 0;
-
-       /* Ok, close enough */
-       return 1;
-}
-
-static int parse_email(const void *map, unsigned long size)
-{
-       unsigned long offset;
-
-       if (size < 6 || memcmp("From ", map, 5))
-               goto corrupt;
-
-       /* Make sure we don't trigger on this first line */
-       map++; size--; offset=1;
-
-       /*
-        * Search for a line beginning with "From ", and 
-        * having something that looks like a date format.
-        */
-       do {
-               int len = linelen(map, size);
-               if (is_from_line(map, len))
-                       return offset;
-               map += len;
-               size -= len;
-               offset += len;
-       } while (size);
-       return offset;
-
-corrupt:
-       fprintf(stderr, "corrupt mailbox\n");
-       exit(1);
-}
-
-int main(int argc, char **argv)
-{
-       int fd, nr;
-       struct stat st;
-       unsigned long size;
-       void *map;
-
-       if (argc != 3)
-               usage();
-       fd = open(argv[1], O_RDONLY);
-       if (fd < 0) {
-               perror(argv[1]);
-               exit(1);
-       }
-       if (chdir(argv[2]) < 0)
-               usage();
-       if (fstat(fd, &st) < 0) {
-               perror("stat");
-               exit(1);
-       }
-       size = st.st_size;
-       map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
-       if (map == MAP_FAILED) {
-               perror("mmap");
-               close(fd);
-               exit(1);
-       }
-       close(fd);
-       nr = 0;
-       do {
-               char name[10];
-               unsigned long len = parse_email(map, size);
-               assert(len <= size);
-               sprintf(name, "%04d", ++nr);
-               fd = open(name, O_WRONLY | O_CREAT | O_EXCL, 0600);
-               if (fd < 0) {
-                       perror(name);
-                       exit(1);
-               }
-               if (write(fd, map, len) != len) {
-                       perror("write");
-                       exit(1);
-               }
-               close(fd);
-               map += len;
-               size -= len;
-       } while (size > 0);
-       return 0;
-}