1 #!/bin/bash
2 #
3 # git-subtree.sh: split/join git repositories in subdirectories of this one
4 #
5 # Copyright (C) 2009 Avery Pennarun <apenwarr@gmail.com>
6 #
7 if [ $# -eq 0 ]; then
8 set -- -h
9 fi
10 OPTS_SPEC="\
11 git subtree add --prefix=<prefix> <commit>
12 git subtree split [options...] --prefix=<prefix> <commit...>
13 git subtree merge --prefix=<prefix> <commit>
14 git subtree pull --prefix=<prefix> <repository> <refspec...>
15 --
16 h,help show the help
17 q quiet
18 d show debug messages
19 prefix= the name of the subdir to split out
20 options for 'split'
21 annotate= add a prefix to commit message of new commits
22 onto= try connecting new tree to an existing one
23 rejoin merge the new branch back into HEAD
24 ignore-joins ignore prior --rejoin commits
25 "
26 eval $(echo "$OPTS_SPEC" | git rev-parse --parseopt -- "$@" || echo exit $?)
27 . git-sh-setup
28 require_work_tree
30 quiet=
31 debug=
32 command=
33 onto=
34 rejoin=
35 ignore_joins=
36 annotate=
38 debug()
39 {
40 if [ -n "$debug" ]; then
41 echo "$@" >&2
42 fi
43 }
45 say()
46 {
47 if [ -z "$quiet" ]; then
48 echo "$@" >&2
49 fi
50 }
52 assert()
53 {
54 if "$@"; then
55 :
56 else
57 die "assertion failed: " "$@"
58 fi
59 }
62 #echo "Options: $*"
64 while [ $# -gt 0 ]; do
65 opt="$1"
66 shift
67 case "$opt" in
68 -q) quiet=1 ;;
69 -d) debug=1 ;;
70 --annotate) annotate="$1"; shift ;;
71 --no-annotate) annotate= ;;
72 --prefix) prefix="$1"; shift ;;
73 --no-prefix) prefix= ;;
74 --onto) onto="$1"; shift ;;
75 --no-onto) onto= ;;
76 --rejoin) rejoin=1 ;;
77 --no-rejoin) rejoin= ;;
78 --ignore-joins) ignore_joins=1 ;;
79 --no-ignore-joins) ignore_joins= ;;
80 --) break ;;
81 esac
82 done
84 command="$1"
85 shift
86 case "$command" in
87 add|merge|pull) default= ;;
88 split) default="--default HEAD" ;;
89 *) die "Unknown command '$command'" ;;
90 esac
92 if [ -z "$prefix" ]; then
93 die "You must provide the --prefix option."
94 fi
95 dir="$prefix"
97 if [ "$command" != "pull" ]; then
98 revs=$(git rev-parse $default --revs-only "$@") || exit $?
99 dirs="$(git rev-parse --no-revs --no-flags "$@")" || exit $?
100 if [ -n "$dirs" ]; then
101 die "Error: Use --prefix instead of bare filenames."
102 fi
103 fi
105 debug "command: {$command}"
106 debug "quiet: {$quiet}"
107 debug "revs: {$revs}"
108 debug "dir: {$dir}"
109 debug "opts: {$*}"
110 debug
112 cache_setup()
113 {
114 cachedir="$GIT_DIR/subtree-cache/$$"
115 rm -rf "$cachedir" || die "Can't delete old cachedir: $cachedir"
116 mkdir -p "$cachedir" || die "Can't create new cachedir: $cachedir"
117 debug "Using cachedir: $cachedir" >&2
118 }
120 cache_get()
121 {
122 for oldrev in $*; do
123 if [ -r "$cachedir/$oldrev" ]; then
124 read newrev <"$cachedir/$oldrev"
125 echo $newrev
126 fi
127 done
128 }
130 cache_set()
131 {
132 oldrev="$1"
133 newrev="$2"
134 if [ "$oldrev" != "latest_old" \
135 -a "$oldrev" != "latest_new" \
136 -a -e "$cachedir/$oldrev" ]; then
137 die "cache for $oldrev already exists!"
138 fi
139 echo "$newrev" >"$cachedir/$oldrev"
140 }
142 # if a commit doesn't have a parent, this might not work. But we only want
143 # to remove the parent from the rev-list, and since it doesn't exist, it won't
144 # be there anyway, so do nothing in that case.
145 try_remove_previous()
146 {
147 if git rev-parse "$1^" >/dev/null 2>&1; then
148 echo "^$1^"
149 fi
150 }
152 find_existing_splits()
153 {
154 debug "Looking for prior splits..."
155 dir="$1"
156 revs="$2"
157 git log --grep="^git-subtree-dir: $dir\$" \
158 --pretty=format:'%s%n%n%b%nEND' $revs |
159 while read a b junk; do
160 case "$a" in
161 git-subtree-mainline:) main="$b" ;;
162 git-subtree-split:) sub="$b" ;;
163 *)
164 if [ -n "$main" -a -n "$sub" ]; then
165 debug " Prior: $main -> $sub"
166 cache_set $main $sub
167 try_remove_previous "$main"
168 try_remove_previous "$sub"
169 main=
170 sub=
171 fi
172 ;;
173 esac
174 done
175 }
177 copy_commit()
178 {
179 # We're going to set some environment vars here, so
180 # do it in a subshell to get rid of them safely later
181 debug copy_commit "{$1}" "{$2}" "{$3}"
182 git log -1 --pretty=format:'%an%n%ae%n%ad%n%cn%n%ce%n%cd%n%s%n%n%b' "$1" |
183 (
184 read GIT_AUTHOR_NAME
185 read GIT_AUTHOR_EMAIL
186 read GIT_AUTHOR_DATE
187 read GIT_COMMITTER_NAME
188 read GIT_COMMITTER_EMAIL
189 read GIT_COMMITTER_DATE
190 export GIT_AUTHOR_NAME \
191 GIT_AUTHOR_EMAIL \
192 GIT_AUTHOR_DATE \
193 GIT_COMMITTER_NAME \
194 GIT_COMMITTER_EMAIL \
195 GIT_COMMITTER_DATE
196 (echo -n "$annotate"; cat ) |
197 git commit-tree "$2" $3 # reads the rest of stdin
198 ) || die "Can't copy commit $1"
199 }
201 add_msg()
202 {
203 dir="$1"
204 latest_old="$2"
205 latest_new="$3"
206 cat <<-EOF
207 Add '$dir/' from commit '$latest_new'
209 git-subtree-dir: $dir
210 git-subtree-mainline: $latest_old
211 git-subtree-split: $latest_new
212 EOF
213 }
215 merge_msg()
216 {
217 dir="$1"
218 latest_old="$2"
219 latest_new="$3"
220 cat <<-EOF
221 Split '$dir/' into commit '$latest_new'
223 git-subtree-dir: $dir
224 git-subtree-mainline: $latest_old
225 git-subtree-split: $latest_new
226 EOF
227 }
229 toptree_for_commit()
230 {
231 commit="$1"
232 git log -1 --pretty=format:'%T' "$commit" -- || exit $?
233 }
235 subtree_for_commit()
236 {
237 commit="$1"
238 dir="$2"
239 git ls-tree "$commit" -- "$dir" |
240 while read mode type tree name; do
241 assert [ "$name" = "$dir" ]
242 echo $tree
243 break
244 done
245 }
247 tree_changed()
248 {
249 tree=$1
250 shift
251 if [ $# -ne 1 ]; then
252 return 0 # weird parents, consider it changed
253 else
254 ptree=$(toptree_for_commit $1)
255 if [ "$ptree" != "$tree" ]; then
256 return 0 # changed
257 else
258 return 1 # not changed
259 fi
260 fi
261 }
263 copy_or_skip()
264 {
265 rev="$1"
266 tree="$2"
267 newparents="$3"
268 assert [ -n "$tree" ]
270 identical=
271 nonidentical=
272 p=
273 gotparents=
274 for parent in $newparents; do
275 ptree=$(toptree_for_commit $parent) || exit $?
276 [ -z "$ptree" ] && continue
277 if [ "$ptree" = "$tree" ]; then
278 # an identical parent could be used in place of this rev.
279 identical="$parent"
280 else
281 nonidentical="$parent"
282 fi
284 # sometimes both old parents map to the same newparent;
285 # eliminate duplicates
286 is_new=1
287 for gp in $gotparents; do
288 if [ "$gp" = "$parent" ]; then
289 is_new=
290 break
291 fi
292 done
293 if [ -n "$is_new" ]; then
294 gotparents="$gotparents $parent"
295 p="$p -p $parent"
296 fi
297 done
299 if [ -n "$identical" ]; then
300 echo $identical
301 else
302 copy_commit $rev $tree "$p" || exit $?
303 fi
304 }
306 ensure_clean()
307 {
308 if ! git diff-index HEAD --exit-code --quiet; then
309 die "Working tree has modifications. Cannot add."
310 fi
311 if ! git diff-index --cached HEAD --exit-code --quiet; then
312 die "Index has modifications. Cannot add."
313 fi
314 }
316 cmd_add()
317 {
318 if [ -e "$dir" ]; then
319 die "'$dir' already exists. Cannot add."
320 fi
321 ensure_clean
323 set -- $revs
324 if [ $# -ne 1 ]; then
325 die "You must provide exactly one revision. Got: '$revs'"
326 fi
327 rev="$1"
329 debug "Adding $dir as '$rev'..."
330 git read-tree --prefix="$dir" $rev || exit $?
331 git checkout "$dir" || exit $?
332 tree=$(git write-tree) || exit $?
334 headrev=$(git rev-parse HEAD) || exit $?
335 if [ -n "$headrev" -a "$headrev" != "$rev" ]; then
336 headp="-p $headrev"
337 else
338 headp=
339 fi
340 commit=$(add_msg "$dir" "$headrev" "$rev" |
341 git commit-tree $tree $headp -p "$rev") || exit $?
342 git reset "$commit" || exit $?
343 }
345 cmd_split()
346 {
347 debug "Splitting $dir..."
348 cache_setup || exit $?
350 if [ -n "$onto" ]; then
351 debug "Reading history for --onto=$onto..."
352 git rev-list $onto |
353 while read rev; do
354 # the 'onto' history is already just the subdir, so
355 # any parent we find there can be used verbatim
356 debug " cache: $rev"
357 cache_set $rev $rev
358 done
359 fi
361 if [ -n "$ignore_joins" ]; then
362 unrevs=
363 else
364 unrevs="$(find_existing_splits "$dir" "$revs")"
365 fi
367 # We can't restrict rev-list to only $dir here, because some of our
368 # parents have the $dir contents the root, and those won't match.
369 # (and rev-list --follow doesn't seem to solve this)
370 grl='git rev-list --reverse --parents $revs $unrevs'
371 revmax=$(eval "$grl" | wc -l)
372 revcount=0
373 createcount=0
374 eval "$grl" |
375 while read rev parents; do
376 revcount=$(($revcount + 1))
377 say -n "$revcount/$revmax ($createcount)\r"
378 debug "Processing commit: $rev"
379 exists=$(cache_get $rev)
380 if [ -n "$exists" ]; then
381 debug " prior: $exists"
382 continue
383 fi
384 createcount=$(($createcount + 1))
385 debug " parents: $parents"
386 newparents=$(cache_get $parents)
387 debug " newparents: $newparents"
389 tree=$(subtree_for_commit $rev "$dir")
390 debug " tree is: $tree"
391 [ -z $tree ] && continue
393 newrev=$(copy_or_skip "$rev" "$tree" "$newparents") || exit $?
394 debug " newrev is: $newrev"
395 cache_set $rev $newrev
396 cache_set latest_new $newrev
397 cache_set latest_old $rev
398 done || exit $?
399 latest_new=$(cache_get latest_new)
400 if [ -z "$latest_new" ]; then
401 die "No new revisions were found"
402 fi
404 if [ -n "$rejoin" ]; then
405 debug "Merging split branch into HEAD..."
406 latest_old=$(cache_get latest_old)
407 git merge -s ours \
408 -m "$(merge_msg $dir $latest_old $latest_new)" \
409 $latest_new >&2 || exit $?
410 fi
411 echo $latest_new
412 exit 0
413 }
415 cmd_merge()
416 {
417 ensure_clean
419 set -- $revs
420 if [ $# -ne 1 ]; then
421 die "You must provide exactly one revision. Got: '$revs'"
422 fi
423 rev="$1"
425 git merge -s subtree $rev
426 }
428 cmd_pull()
429 {
430 ensure_clean
431 set -x
432 git pull -s subtree "$@"
433 }
435 "cmd_$command" "$@"