1 #!/bin/bash
2 #
3 # git-subtree.sh: split/join git repositories in subdirectories of this one
4 #
5 # Copyright (C) 2009 Avery Pennarun <apenwarr@gmail.com>
6 #
7 if [ $# -eq 0 ]; then
8 set -- -h
9 fi
10 OPTS_SPEC="\
11 git subtree add --prefix=<prefix> <commit>
12 git subtree merge --prefix=<prefix> <commit>
13 git subtree pull --prefix=<prefix> <repository> <refspec...>
14 git subtree split --prefix=<prefix> <commit...>
15 --
16 h,help show the help
17 q quiet
18 d show debug messages
19 prefix= the name of the subdir to split out
20 options for 'split'
21 annotate= add a prefix to commit message of new commits
22 b,branch= create a new branch from the split subtree
23 ignore-joins ignore prior --rejoin commits
24 onto= try connecting new tree to an existing one
25 rejoin merge the new branch back into HEAD
26 options for 'add', 'merge', and 'pull'
27 squash merge subtree changes as a single commit
28 "
29 eval $(echo "$OPTS_SPEC" | git rev-parse --parseopt -- "$@" || echo exit $?)
30 PATH=$(git --exec-path):$PATH
31 . git-sh-setup
32 require_work_tree
34 quiet=
35 branch=
36 debug=
37 command=
38 onto=
39 rejoin=
40 ignore_joins=
41 annotate=
42 squash=
44 debug()
45 {
46 if [ -n "$debug" ]; then
47 echo "$@" >&2
48 fi
49 }
51 say()
52 {
53 if [ -z "$quiet" ]; then
54 echo "$@" >&2
55 fi
56 }
58 assert()
59 {
60 if "$@"; then
61 :
62 else
63 die "assertion failed: " "$@"
64 fi
65 }
68 #echo "Options: $*"
70 while [ $# -gt 0 ]; do
71 opt="$1"
72 shift
73 case "$opt" in
74 -q) quiet=1 ;;
75 -d) debug=1 ;;
76 --annotate) annotate="$1"; shift ;;
77 --no-annotate) annotate= ;;
78 -b) branch="$1"; shift ;;
79 --prefix) prefix="$1"; shift ;;
80 --no-prefix) prefix= ;;
81 --onto) onto="$1"; shift ;;
82 --no-onto) onto= ;;
83 --rejoin) rejoin=1 ;;
84 --no-rejoin) rejoin= ;;
85 --ignore-joins) ignore_joins=1 ;;
86 --no-ignore-joins) ignore_joins= ;;
87 --squash) squash=1 ;;
88 --no-squash) squash= ;;
89 --) break ;;
90 *) die "Unexpected option: $opt" ;;
91 esac
92 done
94 command="$1"
95 shift
96 case "$command" in
97 add|merge|pull) default= ;;
98 split) default="--default HEAD" ;;
99 *) die "Unknown command '$command'" ;;
100 esac
102 if [ -z "$prefix" ]; then
103 die "You must provide the --prefix option."
104 fi
105 dir="$(dirname "$prefix/.")"
107 if [ "$command" != "pull" ]; then
108 revs=$(git rev-parse $default --revs-only "$@") || exit $?
109 dirs="$(git rev-parse --no-revs --no-flags "$@")" || exit $?
110 if [ -n "$dirs" ]; then
111 die "Error: Use --prefix instead of bare filenames."
112 fi
113 fi
115 debug "command: {$command}"
116 debug "quiet: {$quiet}"
117 debug "revs: {$revs}"
118 debug "dir: {$dir}"
119 debug "opts: {$*}"
120 debug
122 cache_setup()
123 {
124 cachedir="$GIT_DIR/subtree-cache/$$"
125 rm -rf "$cachedir" || die "Can't delete old cachedir: $cachedir"
126 mkdir -p "$cachedir" || die "Can't create new cachedir: $cachedir"
127 debug "Using cachedir: $cachedir" >&2
128 }
130 cache_get()
131 {
132 for oldrev in $*; do
133 if [ -r "$cachedir/$oldrev" ]; then
134 read newrev <"$cachedir/$oldrev"
135 echo $newrev
136 fi
137 done
138 }
140 cache_set()
141 {
142 oldrev="$1"
143 newrev="$2"
144 if [ "$oldrev" != "latest_old" \
145 -a "$oldrev" != "latest_new" \
146 -a -e "$cachedir/$oldrev" ]; then
147 die "cache for $oldrev already exists!"
148 fi
149 echo "$newrev" >"$cachedir/$oldrev"
150 }
152 rev_exists()
153 {
154 if git rev-parse "$1" >/dev/null 2>&1; then
155 return 0
156 else
157 return 1
158 fi
159 }
161 # if a commit doesn't have a parent, this might not work. But we only want
162 # to remove the parent from the rev-list, and since it doesn't exist, it won't
163 # be there anyway, so do nothing in that case.
164 try_remove_previous()
165 {
166 if rev_exists "$1^"; then
167 echo "^$1^"
168 fi
169 }
171 find_latest_squash()
172 {
173 debug "Looking for latest squash ($dir)..."
174 dir="$1"
175 sq=
176 main=
177 sub=
178 git log --grep="^git-subtree-dir: $dir/*\$" \
179 --pretty=format:'START %H%n%s%n%n%b%nEND%n' HEAD |
180 while read a b junk; do
181 debug "$a $b $junk"
182 debug "{{$sq/$main/$sub}}"
183 case "$a" in
184 START) sq="$b" ;;
185 git-subtree-mainline:) main="$b" ;;
186 git-subtree-split:) sub="$b" ;;
187 END)
188 if [ -n "$sub" ]; then
189 if [ -n "$main" ]; then
190 # a rejoin commit?
191 # Pretend its sub was a squash.
192 sq="$sub"
193 fi
194 debug "Squash found: $sq $sub"
195 echo "$sq" "$sub"
196 break
197 fi
198 sq=
199 main=
200 sub=
201 ;;
202 esac
203 done
204 }
206 find_existing_splits()
207 {
208 debug "Looking for prior splits..."
209 dir="$1"
210 revs="$2"
211 main=
212 sub=
213 git log --grep="^git-subtree-dir: $dir/*\$" \
214 --pretty=format:'START %H%n%s%n%n%b%nEND%n' $revs |
215 while read a b junk; do
216 case "$a" in
217 START) sq="$b" ;;
218 git-subtree-mainline:) main="$b" ;;
219 git-subtree-split:) sub="$b" ;;
220 END)
221 debug " Main is: '$main'"
222 if [ -z "$main" -a -n "$sub" ]; then
223 # squash commits refer to a subtree
224 debug " Squash: $sq from $sub"
225 cache_set "$sq" "$sub"
226 fi
227 if [ -n "$main" -a -n "$sub" ]; then
228 debug " Prior: $main -> $sub"
229 cache_set $main $sub
230 try_remove_previous "$main"
231 try_remove_previous "$sub"
232 fi
233 main=
234 sub=
235 ;;
236 esac
237 done
238 }
240 copy_commit()
241 {
242 # We're going to set some environment vars here, so
243 # do it in a subshell to get rid of them safely later
244 debug copy_commit "{$1}" "{$2}" "{$3}"
245 git log -1 --pretty=format:'%an%n%ae%n%ad%n%cn%n%ce%n%cd%n%s%n%n%b' "$1" |
246 (
247 read GIT_AUTHOR_NAME
248 read GIT_AUTHOR_EMAIL
249 read GIT_AUTHOR_DATE
250 read GIT_COMMITTER_NAME
251 read GIT_COMMITTER_EMAIL
252 read GIT_COMMITTER_DATE
253 export GIT_AUTHOR_NAME \
254 GIT_AUTHOR_EMAIL \
255 GIT_AUTHOR_DATE \
256 GIT_COMMITTER_NAME \
257 GIT_COMMITTER_EMAIL \
258 GIT_COMMITTER_DATE
259 (echo -n "$annotate"; cat ) |
260 git commit-tree "$2" $3 # reads the rest of stdin
261 ) || die "Can't copy commit $1"
262 }
264 add_msg()
265 {
266 dir="$1"
267 latest_old="$2"
268 latest_new="$3"
269 cat <<-EOF
270 Add '$dir/' from commit '$latest_new'
272 git-subtree-dir: $dir
273 git-subtree-mainline: $latest_old
274 git-subtree-split: $latest_new
275 EOF
276 }
278 rejoin_msg()
279 {
280 dir="$1"
281 latest_old="$2"
282 latest_new="$3"
283 cat <<-EOF
284 Split '$dir/' into commit '$latest_new'
286 git-subtree-dir: $dir
287 git-subtree-mainline: $latest_old
288 git-subtree-split: $latest_new
289 EOF
290 }
292 squash_msg()
293 {
294 dir="$1"
295 oldsub="$2"
296 newsub="$3"
297 newsub_short=$(git rev-parse --short "$newsub")
299 if [ -n "$oldsub" ]; then
300 oldsub_short=$(git rev-parse --short "$oldsub")
301 echo "Squashed '$dir/' changes from $oldsub_short..$newsub_short"
302 echo
303 git log --pretty=tformat:'%h %s' "$oldsub..$newsub"
304 git log --pretty=tformat:'REVERT: %h %s' "$newsub..$oldsub"
305 else
306 echo "Squashed '$dir/' content from commit $newsub_short"
307 fi
309 echo
310 echo "git-subtree-dir: $dir"
311 echo "git-subtree-split: $newsub"
312 }
314 toptree_for_commit()
315 {
316 commit="$1"
317 git log -1 --pretty=format:'%T' "$commit" -- || exit $?
318 }
320 subtree_for_commit()
321 {
322 commit="$1"
323 dir="$2"
324 git ls-tree "$commit" -- "$dir" |
325 while read mode type tree name; do
326 assert [ "$name" = "$dir" ]
327 assert [ "$type" = "tree" ]
328 echo $tree
329 break
330 done
331 }
333 tree_changed()
334 {
335 tree=$1
336 shift
337 if [ $# -ne 1 ]; then
338 return 0 # weird parents, consider it changed
339 else
340 ptree=$(toptree_for_commit $1)
341 if [ "$ptree" != "$tree" ]; then
342 return 0 # changed
343 else
344 return 1 # not changed
345 fi
346 fi
347 }
349 new_squash_commit()
350 {
351 old="$1"
352 oldsub="$2"
353 newsub="$3"
354 tree=$(toptree_for_commit $newsub) || exit $?
355 if [ -n "$old" ]; then
356 squash_msg "$dir" "$oldsub" "$newsub" |
357 git commit-tree "$tree" -p "$old" || exit $?
358 else
359 squash_msg "$dir" "" "$newsub" |
360 git commit-tree "$tree" || exit $?
361 fi
362 }
364 copy_or_skip()
365 {
366 rev="$1"
367 tree="$2"
368 newparents="$3"
369 assert [ -n "$tree" ]
371 identical=
372 nonidentical=
373 p=
374 gotparents=
375 for parent in $newparents; do
376 ptree=$(toptree_for_commit $parent) || exit $?
377 [ -z "$ptree" ] && continue
378 if [ "$ptree" = "$tree" ]; then
379 # an identical parent could be used in place of this rev.
380 identical="$parent"
381 else
382 nonidentical="$parent"
383 fi
385 # sometimes both old parents map to the same newparent;
386 # eliminate duplicates
387 is_new=1
388 for gp in $gotparents; do
389 if [ "$gp" = "$parent" ]; then
390 is_new=
391 break
392 fi
393 done
394 if [ -n "$is_new" ]; then
395 gotparents="$gotparents $parent"
396 p="$p -p $parent"
397 fi
398 done
400 if [ -n "$identical" ]; then
401 echo $identical
402 else
403 copy_commit $rev $tree "$p" || exit $?
404 fi
405 }
407 ensure_clean()
408 {
409 if ! git diff-index HEAD --exit-code --quiet; then
410 die "Working tree has modifications. Cannot add."
411 fi
412 if ! git diff-index --cached HEAD --exit-code --quiet; then
413 die "Index has modifications. Cannot add."
414 fi
415 }
417 cmd_add()
418 {
419 if [ -e "$dir" ]; then
420 die "'$dir' already exists. Cannot add."
421 fi
422 ensure_clean
424 set -- $revs
425 if [ $# -ne 1 ]; then
426 die "You must provide exactly one revision. Got: '$revs'"
427 fi
428 rev="$1"
430 debug "Adding $dir as '$rev'..."
431 git read-tree --prefix="$dir" $rev || exit $?
432 git checkout -- "$dir" || exit $?
433 tree=$(git write-tree) || exit $?
435 headrev=$(git rev-parse HEAD) || exit $?
436 if [ -n "$headrev" -a "$headrev" != "$rev" ]; then
437 headp="-p $headrev"
438 else
439 headp=
440 fi
442 if [ -n "$squash" ]; then
443 rev=$(new_squash_commit "" "" "$rev") || exit $?
444 commit=$(echo "Merge commit '$rev' as '$dir'" |
445 git commit-tree $tree $headp -p "$rev") || exit $?
446 else
447 commit=$(add_msg "$dir" "$headrev" "$rev" |
448 git commit-tree $tree $headp -p "$rev") || exit $?
449 fi
450 git reset "$commit" || exit $?
452 say "Added dir '$dir'"
453 }
455 cmd_split()
456 {
457 if [ -n "$branch" ] && rev_exists "refs/heads/$branch"; then
458 die "Branch '$branch' already exists."
459 fi
461 debug "Splitting $dir..."
462 cache_setup || exit $?
464 if [ -n "$onto" ]; then
465 debug "Reading history for --onto=$onto..."
466 git rev-list $onto |
467 while read rev; do
468 # the 'onto' history is already just the subdir, so
469 # any parent we find there can be used verbatim
470 debug " cache: $rev"
471 cache_set $rev $rev
472 done
473 fi
475 if [ -n "$ignore_joins" ]; then
476 unrevs=
477 else
478 unrevs="$(find_existing_splits "$dir" "$revs")"
479 fi
481 # We can't restrict rev-list to only $dir here, because some of our
482 # parents have the $dir contents the root, and those won't match.
483 # (and rev-list --follow doesn't seem to solve this)
484 grl='git rev-list --reverse --parents $revs $unrevs'
485 revmax=$(eval "$grl" | wc -l)
486 revcount=0
487 createcount=0
488 eval "$grl" |
489 while read rev parents; do
490 revcount=$(($revcount + 1))
491 say -n "$revcount/$revmax ($createcount)\r"
492 debug "Processing commit: $rev"
493 exists=$(cache_get $rev)
494 if [ -n "$exists" ]; then
495 debug " prior: $exists"
496 continue
497 fi
498 createcount=$(($createcount + 1))
499 debug " parents: $parents"
500 newparents=$(cache_get $parents)
501 debug " newparents: $newparents"
503 tree=$(subtree_for_commit $rev "$dir")
504 debug " tree is: $tree"
506 # ugly. is there no better way to tell if this is a subtree
507 # vs. a mainline commit? Does it matter?
508 [ -z $tree ] && continue
510 newrev=$(copy_or_skip "$rev" "$tree" "$newparents") || exit $?
511 debug " newrev is: $newrev"
512 cache_set $rev $newrev
513 cache_set latest_new $newrev
514 cache_set latest_old $rev
515 done || exit $?
516 latest_new=$(cache_get latest_new)
517 if [ -z "$latest_new" ]; then
518 die "No new revisions were found"
519 fi
521 if [ -n "$rejoin" ]; then
522 debug "Merging split branch into HEAD..."
523 latest_old=$(cache_get latest_old)
524 git merge -s ours \
525 -m "$(rejoin_msg $dir $latest_old $latest_new)" \
526 $latest_new >&2 || exit $?
527 fi
528 if [ -n "$branch" ]; then
529 git update-ref -m 'subtree split' "refs/heads/$branch" \
530 $latest_new "" || exit $?
531 say "Created branch '$branch'"
532 fi
533 echo $latest_new
534 exit 0
535 }
537 cmd_merge()
538 {
539 ensure_clean
541 set -- $revs
542 if [ $# -ne 1 ]; then
543 die "You must provide exactly one revision. Got: '$revs'"
544 fi
545 rev="$1"
547 if [ -n "$squash" ]; then
548 first_split="$(find_latest_squash "$dir")"
549 if [ -z "$first_split" ]; then
550 die "Can't squash-merge: '$dir' was never added."
551 fi
552 set $first_split
553 old=$1
554 sub=$2
555 if [ "$sub" = "$rev" ]; then
556 say "Subtree is already at commit $rev."
557 exit 0
558 fi
559 new=$(new_squash_commit "$old" "$sub" "$rev") || exit $?
560 debug "New squash commit: $new"
561 rev="$new"
562 fi
564 git merge -s subtree $rev
565 }
567 cmd_pull()
568 {
569 ensure_clean
570 git fetch "$@" || exit $?
571 revs=FETCH_HEAD
572 cmd_merge
573 }
575 "cmd_$command" "$@"