1 #!/bin/bash
2 #
3 # git-subtree.sh: split/join git repositories in subdirectories of this one
4 #
5 # Copyright (C) 2009 Avery Pennarun <apenwarr@gmail.com>
6 #
7 if [ $# -eq 0 ]; then
8 set -- -h
9 fi
10 OPTS_SPEC="\
11 git subtree add --prefix=<prefix> <commit>
12 git subtree merge --prefix=<prefix> <commit>
13 git subtree pull --prefix=<prefix> <repository> <refspec...>
14 git subtree split --prefix=<prefix> <commit...>
15 --
16 h,help show the help
17 q quiet
18 d show debug messages
19 p,prefix= the name of the subdir to split out
20 m,message= use the given message as the commit message for the merge commit
21 options for 'split'
22 annotate= add a prefix to commit message of new commits
23 b,branch= create a new branch from the split subtree
24 ignore-joins ignore prior --rejoin commits
25 onto= try connecting new tree to an existing one
26 rejoin merge the new branch back into HEAD
27 options for 'add', 'merge', and 'pull'
28 squash merge subtree changes as a single commit
29 "
30 eval $(echo "$OPTS_SPEC" | git rev-parse --parseopt -- "$@" || echo exit $?)
31 PATH=$(git --exec-path):$PATH
32 . git-sh-setup
33 require_work_tree
35 quiet=
36 branch=
37 debug=
38 command=
39 onto=
40 rejoin=
41 ignore_joins=
42 annotate=
43 squash=
44 message=
46 debug()
47 {
48 if [ -n "$debug" ]; then
49 echo "$@" >&2
50 fi
51 }
53 say()
54 {
55 if [ -z "$quiet" ]; then
56 echo "$@" >&2
57 fi
58 }
60 assert()
61 {
62 if "$@"; then
63 :
64 else
65 die "assertion failed: " "$@"
66 fi
67 }
70 #echo "Options: $*"
72 while [ $# -gt 0 ]; do
73 opt="$1"
74 shift
75 case "$opt" in
76 -q) quiet=1 ;;
77 -d) debug=1 ;;
78 --annotate) annotate="$1"; shift ;;
79 --no-annotate) annotate= ;;
80 -b) branch="$1"; shift ;;
81 -p) prefix="$1"; shift ;;
82 -m) message="$1"; shift ;;
83 --no-prefix) prefix= ;;
84 --onto) onto="$1"; shift ;;
85 --no-onto) onto= ;;
86 --rejoin) rejoin=1 ;;
87 --no-rejoin) rejoin= ;;
88 --ignore-joins) ignore_joins=1 ;;
89 --no-ignore-joins) ignore_joins= ;;
90 --squash) squash=1 ;;
91 --no-squash) squash= ;;
92 --) break ;;
93 *) die "Unexpected option: $opt" ;;
94 esac
95 done
97 command="$1"
98 shift
99 case "$command" in
100 add|merge|pull) default= ;;
101 split) default="--default HEAD" ;;
102 *) die "Unknown command '$command'" ;;
103 esac
105 if [ -z "$prefix" ]; then
106 die "You must provide the --prefix option."
107 fi
108 dir="$(dirname "$prefix/.")"
110 if [ "$command" != "pull" ]; then
111 revs=$(git rev-parse $default --revs-only "$@") || exit $?
112 dirs="$(git rev-parse --no-revs --no-flags "$@")" || exit $?
113 if [ -n "$dirs" ]; then
114 die "Error: Use --prefix instead of bare filenames."
115 fi
116 fi
118 debug "command: {$command}"
119 debug "quiet: {$quiet}"
120 debug "revs: {$revs}"
121 debug "dir: {$dir}"
122 debug "opts: {$*}"
123 debug
125 cache_setup()
126 {
127 cachedir="$GIT_DIR/subtree-cache/$$"
128 rm -rf "$cachedir" || die "Can't delete old cachedir: $cachedir"
129 mkdir -p "$cachedir" || die "Can't create new cachedir: $cachedir"
130 debug "Using cachedir: $cachedir" >&2
131 }
133 cache_get()
134 {
135 for oldrev in $*; do
136 if [ -r "$cachedir/$oldrev" ]; then
137 read newrev <"$cachedir/$oldrev"
138 echo $newrev
139 fi
140 done
141 }
143 cache_set()
144 {
145 oldrev="$1"
146 newrev="$2"
147 if [ "$oldrev" != "latest_old" \
148 -a "$oldrev" != "latest_new" \
149 -a -e "$cachedir/$oldrev" ]; then
150 die "cache for $oldrev already exists!"
151 fi
152 echo "$newrev" >"$cachedir/$oldrev"
153 }
155 rev_exists()
156 {
157 if git rev-parse "$1" >/dev/null 2>&1; then
158 return 0
159 else
160 return 1
161 fi
162 }
164 # if a commit doesn't have a parent, this might not work. But we only want
165 # to remove the parent from the rev-list, and since it doesn't exist, it won't
166 # be there anyway, so do nothing in that case.
167 try_remove_previous()
168 {
169 if rev_exists "$1^"; then
170 echo "^$1^"
171 fi
172 }
174 find_latest_squash()
175 {
176 debug "Looking for latest squash ($dir)..."
177 dir="$1"
178 sq=
179 main=
180 sub=
181 git log --grep="^git-subtree-dir: $dir/*\$" \
182 --pretty=format:'START %H%n%s%n%n%b%nEND%n' HEAD |
183 while read a b junk; do
184 debug "$a $b $junk"
185 debug "{{$sq/$main/$sub}}"
186 case "$a" in
187 START) sq="$b" ;;
188 git-subtree-mainline:) main="$b" ;;
189 git-subtree-split:) sub="$b" ;;
190 END)
191 if [ -n "$sub" ]; then
192 if [ -n "$main" ]; then
193 # a rejoin commit?
194 # Pretend its sub was a squash.
195 sq="$sub"
196 fi
197 debug "Squash found: $sq $sub"
198 echo "$sq" "$sub"
199 break
200 fi
201 sq=
202 main=
203 sub=
204 ;;
205 esac
206 done
207 }
209 find_existing_splits()
210 {
211 debug "Looking for prior splits..."
212 dir="$1"
213 revs="$2"
214 main=
215 sub=
216 git log --grep="^git-subtree-dir: $dir/*\$" \
217 --pretty=format:'START %H%n%s%n%n%b%nEND%n' $revs |
218 while read a b junk; do
219 case "$a" in
220 START) sq="$b" ;;
221 git-subtree-mainline:) main="$b" ;;
222 git-subtree-split:) sub="$b" ;;
223 END)
224 debug " Main is: '$main'"
225 if [ -z "$main" -a -n "$sub" ]; then
226 # squash commits refer to a subtree
227 debug " Squash: $sq from $sub"
228 cache_set "$sq" "$sub"
229 fi
230 if [ -n "$main" -a -n "$sub" ]; then
231 debug " Prior: $main -> $sub"
232 cache_set $main $sub
233 try_remove_previous "$main"
234 try_remove_previous "$sub"
235 fi
236 main=
237 sub=
238 ;;
239 esac
240 done
241 }
243 copy_commit()
244 {
245 # We're going to set some environment vars here, so
246 # do it in a subshell to get rid of them safely later
247 debug copy_commit "{$1}" "{$2}" "{$3}"
248 git log -1 --pretty=format:'%an%n%ae%n%ad%n%cn%n%ce%n%cd%n%s%n%n%b' "$1" |
249 (
250 read GIT_AUTHOR_NAME
251 read GIT_AUTHOR_EMAIL
252 read GIT_AUTHOR_DATE
253 read GIT_COMMITTER_NAME
254 read GIT_COMMITTER_EMAIL
255 read GIT_COMMITTER_DATE
256 export GIT_AUTHOR_NAME \
257 GIT_AUTHOR_EMAIL \
258 GIT_AUTHOR_DATE \
259 GIT_COMMITTER_NAME \
260 GIT_COMMITTER_EMAIL \
261 GIT_COMMITTER_DATE
262 (echo -n "$annotate"; cat ) |
263 git commit-tree "$2" $3 # reads the rest of stdin
264 ) || die "Can't copy commit $1"
265 }
267 add_msg()
268 {
269 dir="$1"
270 latest_old="$2"
271 latest_new="$3"
272 if [ -n "$message" ]; then
273 commit_message="$message"
274 else
275 commit_message="Add '$dir/' from commit '$latest_new'"
276 fi
277 cat <<-EOF
278 $commit_message
280 git-subtree-dir: $dir
281 git-subtree-mainline: $latest_old
282 git-subtree-split: $latest_new
283 EOF
284 }
286 add_squashed_msg()
287 {
288 if [ -n "$message" ]; then
289 echo "$message"
290 else
291 echo "Merge commit '$1' as '$2'"
292 fi
293 }
295 rejoin_msg()
296 {
297 dir="$1"
298 latest_old="$2"
299 latest_new="$3"
300 if [ -n "$message" ]; then
301 commit_message="$message"
302 else
303 commit_message="Split '$dir/' into commit '$latest_new'"
304 fi
305 cat <<-EOF
306 $message
308 git-subtree-dir: $dir
309 git-subtree-mainline: $latest_old
310 git-subtree-split: $latest_new
311 EOF
312 }
314 squash_msg()
315 {
316 dir="$1"
317 oldsub="$2"
318 newsub="$3"
319 newsub_short=$(git rev-parse --short "$newsub")
321 if [ -n "$oldsub" ]; then
322 oldsub_short=$(git rev-parse --short "$oldsub")
323 echo "Squashed '$dir/' changes from $oldsub_short..$newsub_short"
324 echo
325 git log --pretty=tformat:'%h %s' "$oldsub..$newsub"
326 git log --pretty=tformat:'REVERT: %h %s' "$newsub..$oldsub"
327 else
328 echo "Squashed '$dir/' content from commit $newsub_short"
329 fi
331 echo
332 echo "git-subtree-dir: $dir"
333 echo "git-subtree-split: $newsub"
334 }
336 toptree_for_commit()
337 {
338 commit="$1"
339 git log -1 --pretty=format:'%T' "$commit" -- || exit $?
340 }
342 subtree_for_commit()
343 {
344 commit="$1"
345 dir="$2"
346 git ls-tree "$commit" -- "$dir" |
347 while read mode type tree name; do
348 assert [ "$name" = "$dir" ]
349 assert [ "$type" = "tree" ]
350 echo $tree
351 break
352 done
353 }
355 tree_changed()
356 {
357 tree=$1
358 shift
359 if [ $# -ne 1 ]; then
360 return 0 # weird parents, consider it changed
361 else
362 ptree=$(toptree_for_commit $1)
363 if [ "$ptree" != "$tree" ]; then
364 return 0 # changed
365 else
366 return 1 # not changed
367 fi
368 fi
369 }
371 new_squash_commit()
372 {
373 old="$1"
374 oldsub="$2"
375 newsub="$3"
376 tree=$(toptree_for_commit $newsub) || exit $?
377 if [ -n "$old" ]; then
378 squash_msg "$dir" "$oldsub" "$newsub" |
379 git commit-tree "$tree" -p "$old" || exit $?
380 else
381 squash_msg "$dir" "" "$newsub" |
382 git commit-tree "$tree" || exit $?
383 fi
384 }
386 copy_or_skip()
387 {
388 rev="$1"
389 tree="$2"
390 newparents="$3"
391 assert [ -n "$tree" ]
393 identical=
394 nonidentical=
395 p=
396 gotparents=
397 for parent in $newparents; do
398 ptree=$(toptree_for_commit $parent) || exit $?
399 [ -z "$ptree" ] && continue
400 if [ "$ptree" = "$tree" ]; then
401 # an identical parent could be used in place of this rev.
402 identical="$parent"
403 else
404 nonidentical="$parent"
405 fi
407 # sometimes both old parents map to the same newparent;
408 # eliminate duplicates
409 is_new=1
410 for gp in $gotparents; do
411 if [ "$gp" = "$parent" ]; then
412 is_new=
413 break
414 fi
415 done
416 if [ -n "$is_new" ]; then
417 gotparents="$gotparents $parent"
418 p="$p -p $parent"
419 fi
420 done
422 if [ -n "$identical" ]; then
423 echo $identical
424 else
425 copy_commit $rev $tree "$p" || exit $?
426 fi
427 }
429 ensure_clean()
430 {
431 if ! git diff-index HEAD --exit-code --quiet; then
432 die "Working tree has modifications. Cannot add."
433 fi
434 if ! git diff-index --cached HEAD --exit-code --quiet; then
435 die "Index has modifications. Cannot add."
436 fi
437 }
439 cmd_add()
440 {
441 if [ -e "$dir" ]; then
442 die "'$dir' already exists. Cannot add."
443 fi
444 ensure_clean
446 set -- $revs
447 if [ $# -ne 1 ]; then
448 die "You must provide exactly one revision. Got: '$revs'"
449 fi
450 rev="$1"
452 debug "Adding $dir as '$rev'..."
453 git read-tree --prefix="$dir" $rev || exit $?
454 git checkout -- "$dir" || exit $?
455 tree=$(git write-tree) || exit $?
457 headrev=$(git rev-parse HEAD) || exit $?
458 if [ -n "$headrev" -a "$headrev" != "$rev" ]; then
459 headp="-p $headrev"
460 else
461 headp=
462 fi
464 if [ -n "$squash" ]; then
465 rev=$(new_squash_commit "" "" "$rev") || exit $?
466 commit=$(add_squashed_msg "$rev" "$dir" |
467 git commit-tree $tree $headp -p "$rev") || exit $?
468 else
469 commit=$(add_msg "$dir" "$headrev" "$rev" |
470 git commit-tree $tree $headp -p "$rev") || exit $?
471 fi
472 git reset "$commit" || exit $?
474 say "Added dir '$dir'"
475 }
477 cmd_split()
478 {
479 if [ -n "$branch" ] && rev_exists "refs/heads/$branch"; then
480 die "Branch '$branch' already exists."
481 fi
483 debug "Splitting $dir..."
484 cache_setup || exit $?
486 if [ -n "$onto" ]; then
487 debug "Reading history for --onto=$onto..."
488 git rev-list $onto |
489 while read rev; do
490 # the 'onto' history is already just the subdir, so
491 # any parent we find there can be used verbatim
492 debug " cache: $rev"
493 cache_set $rev $rev
494 done
495 fi
497 if [ -n "$ignore_joins" ]; then
498 unrevs=
499 else
500 unrevs="$(find_existing_splits "$dir" "$revs")"
501 fi
503 # We can't restrict rev-list to only $dir here, because some of our
504 # parents have the $dir contents the root, and those won't match.
505 # (and rev-list --follow doesn't seem to solve this)
506 grl='git rev-list --reverse --parents $revs $unrevs'
507 revmax=$(eval "$grl" | wc -l)
508 revcount=0
509 createcount=0
510 eval "$grl" |
511 while read rev parents; do
512 revcount=$(($revcount + 1))
513 say -n "$revcount/$revmax ($createcount)\r"
514 debug "Processing commit: $rev"
515 exists=$(cache_get $rev)
516 if [ -n "$exists" ]; then
517 debug " prior: $exists"
518 continue
519 fi
520 createcount=$(($createcount + 1))
521 debug " parents: $parents"
522 newparents=$(cache_get $parents)
523 debug " newparents: $newparents"
525 tree=$(subtree_for_commit $rev "$dir")
526 debug " tree is: $tree"
528 # ugly. is there no better way to tell if this is a subtree
529 # vs. a mainline commit? Does it matter?
530 [ -z $tree ] && continue
532 newrev=$(copy_or_skip "$rev" "$tree" "$newparents") || exit $?
533 debug " newrev is: $newrev"
534 cache_set $rev $newrev
535 cache_set latest_new $newrev
536 cache_set latest_old $rev
537 done || exit $?
538 latest_new=$(cache_get latest_new)
539 if [ -z "$latest_new" ]; then
540 die "No new revisions were found"
541 fi
543 if [ -n "$rejoin" ]; then
544 debug "Merging split branch into HEAD..."
545 latest_old=$(cache_get latest_old)
546 git merge -s ours \
547 -m "$(rejoin_msg $dir $latest_old $latest_new)" \
548 $latest_new >&2 || exit $?
549 fi
550 if [ -n "$branch" ]; then
551 git update-ref -m 'subtree split' "refs/heads/$branch" \
552 $latest_new "" || exit $?
553 say "Created branch '$branch'"
554 fi
555 echo $latest_new
556 exit 0
557 }
559 cmd_merge()
560 {
561 ensure_clean
563 set -- $revs
564 if [ $# -ne 1 ]; then
565 die "You must provide exactly one revision. Got: '$revs'"
566 fi
567 rev="$1"
569 if [ -n "$squash" ]; then
570 first_split="$(find_latest_squash "$dir")"
571 if [ -z "$first_split" ]; then
572 die "Can't squash-merge: '$dir' was never added."
573 fi
574 set $first_split
575 old=$1
576 sub=$2
577 if [ "$sub" = "$rev" ]; then
578 say "Subtree is already at commit $rev."
579 exit 0
580 fi
581 new=$(new_squash_commit "$old" "$sub" "$rev") || exit $?
582 debug "New squash commit: $new"
583 rev="$new"
584 fi
586 git merge -s subtree --message="$message" $rev
587 }
589 cmd_pull()
590 {
591 ensure_clean
592 git fetch "$@" || exit $?
593 revs=FETCH_HEAD
594 cmd_merge
595 }
597 "cmd_$command" "$@"