Code

More work on branch detection by implementing changeIsBranchMerge().
[git.git] / contrib / fast-import / p4-fast-export.py
1 #!/usr/bin/python
2 #
3 # p4-fast-export.py
4 #
5 # Author: Simon Hausmann <hausmann@kde.org>
6 # License: MIT <http://www.opensource.org/licenses/mit-license.php>
7 #
8 # TODO:
9 #       - support integrations (at least p4i)
10 #       - support p4 submit (hah!)
11 #       - emulate p4's delete behavior: if a directory becomes empty delete it. continue
12 #         with parent dir until non-empty dir is found.
13 #
14 import os, string, sys, time, os.path
15 import marshal, popen2, getopt, sha
16 from sets import Set;
18 cacheDebug = False
20 silent = False
21 knownBranches = Set()
22 createdBranches = Set()
23 committedChanges = Set()
24 branch = "refs/heads/master"
25 globalPrefix = previousDepotPath = os.popen("git-repo-config --get p4.depotpath").read()
26 detectBranches = False
27 changesFile = ""
28 if len(globalPrefix) != 0:
29     globalPrefix = globalPrefix[:-1]
31 try:
32     opts, args = getopt.getopt(sys.argv[1:], "", [ "branch=", "detect-branches", "changesfile=", "silent", "known-branches=",
33                                                    "cache-debug" ])
34 except getopt.GetoptError:
35     print "fixme, syntax error"
36     sys.exit(1)
38 for o, a in opts:
39     if o == "--branch":
40         branch = "refs/heads/" + a
41     elif o == "--detect-branches":
42         detectBranches = True
43     elif o == "--changesfile":
44         changesFile = a
45     elif o == "--silent":
46         silent= True
47     elif o == "--known-branches":
48         for branch in open(a).readlines():
49             knownBranches.add(branch[:-1])
50     elif o == "--cache-debug":
51         cacheDebug = True
53 if len(args) == 0 and len(globalPrefix) != 0:
54     if not silent:
55         print "[using previously specified depot path %s]" % globalPrefix
56 elif len(args) != 1:
57     print "usage: %s //depot/path[@revRange]" % sys.argv[0]
58     print "\n    example:"
59     print "    %s //depot/my/project/ -- to import the current head"
60     print "    %s //depot/my/project/@all -- to import everything"
61     print "    %s //depot/my/project/@1,6 -- to import only from revision 1 to 6"
62     print ""
63     print "    (a ... is not needed in the path p4 specification, it's added implicitly)"
64     print ""
65     sys.exit(1)
66 else:
67     if len(globalPrefix) != 0 and globalPrefix != args[0]:
68         print "previous import used depot path %s and now %s was specified. this doesn't work!" % (globalPrefix, args[0])
69         sys.exit(1)
70     globalPrefix = args[0]
72 changeRange = ""
73 revision = ""
74 users = {}
75 initialParent = ""
76 lastChange = 0
77 initialTag = ""
79 if globalPrefix.find("@") != -1:
80     atIdx = globalPrefix.index("@")
81     changeRange = globalPrefix[atIdx:]
82     if changeRange == "@all":
83         changeRange = ""
84     elif changeRange.find(",") == -1:
85         revision = changeRange
86         changeRange = ""
87     globalPrefix = globalPrefix[0:atIdx]
88 elif globalPrefix.find("#") != -1:
89     hashIdx = globalPrefix.index("#")
90     revision = globalPrefix[hashIdx:]
91     globalPrefix = globalPrefix[0:hashIdx]
92 elif len(previousDepotPath) == 0:
93     revision = "#head"
95 if globalPrefix.endswith("..."):
96     globalPrefix = globalPrefix[:-3]
98 if not globalPrefix.endswith("/"):
99     globalPrefix += "/"
101 def p4File(depotPath):
102     cacheKey = "/tmp/p4cache/data-" + sha.new(depotPath).hexdigest()
104     data = 0
105     try:
106         if not cacheDebug:
107             raise
108         data = open(cacheKey, "rb").read()
109     except:
110         data = os.popen("p4 print -q \"%s\"" % depotPath, "rb").read()
111         if cacheDebug:
112             open(cacheKey, "wb").write(data)
114     return data
116 def p4CmdList(cmd):
117     fullCmd = "p4 -G %s" % cmd;
119     cacheKey = sha.new(fullCmd).hexdigest()
120     cacheKey = "/tmp/p4cache/cmd-" + cacheKey
122     cached = True
123     pipe = 0
124     try:
125         if not cacheDebug:
126             raise
127         pipe = open(cacheKey, "rb")
128     except:
129         cached = False
130         pipe = os.popen(fullCmd, "rb")
132     result = []
133     try:
134         while True:
135             entry = marshal.load(pipe)
136             result.append(entry)
137     except EOFError:
138         pass
139     pipe.close()
141     if not cached and cacheDebug:
142         pipe = open(cacheKey, "wb")
143         for r in result:
144             marshal.dump(r, pipe)
145         pipe.close()
147     return result
149 def p4Cmd(cmd):
150     list = p4CmdList(cmd)
151     result = {}
152     for entry in list:
153         result.update(entry)
154     return result;
156 def extractFilesFromCommit(commit):
157     files = []
158     fnum = 0
159     while commit.has_key("depotFile%s" % fnum):
160         path =  commit["depotFile%s" % fnum]
161         if not path.startswith(globalPrefix):
162 #            if not silent:
163 #                print "\nchanged files: ignoring path %s outside of %s in change %s" % (path, globalPrefix, change)
164             fnum = fnum + 1
165             continue
167         file = {}
168         file["path"] = path
169         file["rev"] = commit["rev%s" % fnum]
170         file["action"] = commit["action%s" % fnum]
171         file["type"] = commit["type%s" % fnum]
172         files.append(file)
173         fnum = fnum + 1
174     return files
176 def isSubPathOf(first, second):
177     if not first.startswith(second):
178         return False
179     if first == second:
180         return True
181     return first[len(second)] == "/"
183 def branchesForCommit(files):
184     global knownBranches
185     branches = Set()
187     for file in files:
188         relativePath = file["path"][len(globalPrefix):]
189         # strip off the filename
190         relativePath = relativePath[0:relativePath.rfind("/")]
192 #        if len(branches) == 0:
193 #            branches.add(relativePath)
194 #            knownBranches.add(relativePath)
195 #            continue
197         ###### this needs more testing :)
198         knownBranch = False
199         for branch in branches:
200             if relativePath == branch:
201                 knownBranch = True
202                 break
203 #            if relativePath.startswith(branch):
204             if isSubPathOf(relativePath, branch):
205                 knownBranch = True
206                 break
207 #            if branch.startswith(relativePath):
208             if isSubPathOf(branch, relativePath):
209                 branches.remove(branch)
210                 break
212         if knownBranch:
213             continue
215         for branch in knownBranches:
216             #if relativePath.startswith(branch):
217             if isSubPathOf(relativePath, branch):
218                 if len(branches) == 0:
219                     relativePath = branch
220                 else:
221                     knownBranch = True
222                 break
224         if knownBranch:
225             continue
227         branches.add(relativePath)
228         knownBranches.add(relativePath)
230     return branches
232 def findBranchParent(branchPrefix, files):
233     for file in files:
234         path = file["path"]
235         if not path.startswith(branchPrefix):
236             continue
237         action = file["action"]
238         if action != "integrate" and action != "branch":
239             continue
240         rev = file["rev"]
241         depotPath = path + "#" + rev
243         log = p4CmdList("filelog \"%s\"" % depotPath)
244         if len(log) != 1:
245             print "eek! I got confused by the filelog of %s" % depotPath
246             sys.exit(1);
248         log = log[0]
249         if log["action0"] != action:
250             print "eek! wrong action in filelog for %s : found %s, expected %s" % (depotPath, log["action0"], action)
251             sys.exit(1);
253         branchAction = log["how0,0"]
254 #        if branchAction == "branch into" or branchAction == "ignored":
255 #            continue # ignore for branching
257         if not branchAction.endswith(" from"):
258             continue # ignore for branching
259 #            print "eek! file %s was not branched from but instead: %s" % (depotPath, branchAction)
260 #            sys.exit(1);
262         source = log["file0,0"]
263         if source.startswith(branchPrefix):
264             continue
266         lastSourceRev = log["erev0,0"]
268         sourceLog = p4CmdList("filelog -m 1 \"%s%s\"" % (source, lastSourceRev))
269         if len(sourceLog) != 1:
270             print "eek! I got confused by the source filelog of %s%s" % (source, lastSourceRev)
271             sys.exit(1);
272         sourceLog = sourceLog[0]
274         relPath = source[len(globalPrefix):]
275         # strip off the filename
276         relPath = relPath[0:relPath.rfind("/")]
278         for branch in knownBranches:
279             if isSubPathOf(relPath, branch):
280 #                print "determined parent branch branch %s due to change in file %s" % (branch, source)
281                 return branch
282 #            else:
283 #                print "%s is not a subpath of branch %s" % (relPath, branch)
285     return ""
287 def commit(details, files, branch, branchPrefix, parent, merged = ""):
288     global users
289     global lastChange
290     global committedChanges
292     epoch = details["time"]
293     author = details["user"]
295     gitStream.write("commit %s\n" % branch)
296 #    gitStream.write("mark :%s\n" % details["change"])
297     committedChanges.add(int(details["change"]))
298     committer = ""
299     if author in users:
300         committer = "%s %s %s" % (users[author], epoch, tz)
301     else:
302         committer = "%s <a@b> %s %s" % (author, epoch, tz)
304     gitStream.write("committer %s\n" % committer)
306     gitStream.write("data <<EOT\n")
307     gitStream.write(details["desc"])
308     gitStream.write("\n[ imported from %s; change %s ]\n" % (branchPrefix, details["change"]))
309     gitStream.write("EOT\n\n")
311     if len(parent) > 0:
312         gitStream.write("from %s\n" % parent)
314     if len(merged) > 0:
315         gitStream.write("merge %s\n" % merged)
317     for file in files:
318         path = file["path"]
319         if not path.startswith(branchPrefix):
320 #            if not silent:
321 #                print "\nchanged files: ignoring path %s outside of branch prefix %s in change %s" % (path, branchPrefix, details["change"])
322             continue
323         rev = file["rev"]
324         depotPath = path + "#" + rev
325         relPath = path[len(branchPrefix):]
326         action = file["action"]
328         if action == "delete":
329             gitStream.write("D %s\n" % relPath)
330         else:
331             mode = 644
332             if file["type"].startswith("x"):
333                 mode = 755
335             data = p4File(depotPath)
337             gitStream.write("M %s inline %s\n" % (mode, relPath))
338             gitStream.write("data %s\n" % len(data))
339             gitStream.write(data)
340             gitStream.write("\n")
342     gitStream.write("\n")
344     lastChange = int(details["change"])
346 def extractFilesInCommitToBranch(files, branchPrefix):
347     newFiles = []
349     for file in files:
350         path = file["path"]
351         if path.startswith(branchPrefix):
352             newFiles.append(file)
354     return newFiles
356 def findBranchSourceHeuristic(files, branch, branchPrefix):
357     for file in files:
358         action = file["action"]
359         if action != "integrate" and action != "branch":
360             continue
361         path = file["path"]
362         rev = file["rev"]
363         depotPath = path + "#" + rev
365         log = p4CmdList("filelog \"%s\"" % depotPath)
366         if len(log) != 1:
367             print "eek! I got confused by the filelog of %s" % depotPath
368             sys.exit(1);
370         log = log[0]
371         if log["action0"] != action:
372             print "eek! wrong action in filelog for %s : found %s, expected %s" % (depotPath, log["action0"], action)
373             sys.exit(1);
375         branchAction = log["how0,0"]
377         if not branchAction.endswith(" from"):
378             continue # ignore for branching
379 #            print "eek! file %s was not branched from but instead: %s" % (depotPath, branchAction)
380 #            sys.exit(1);
382         source = log["file0,0"]
383         if source.startswith(branchPrefix):
384             continue
386         lastSourceRev = log["erev0,0"]
388         sourceLog = p4CmdList("filelog -m 1 \"%s%s\"" % (source, lastSourceRev))
389         if len(sourceLog) != 1:
390             print "eek! I got confused by the source filelog of %s%s" % (source, lastSourceRev)
391             sys.exit(1);
392         sourceLog = sourceLog[0]
394         relPath = source[len(globalPrefix):]
395         # strip off the filename
396         relPath = relPath[0:relPath.rfind("/")]
398         for candidate in knownBranches:
399             if isSubPathOf(relPath, candidate) and candidate != branch:
400                 return candidate
402     return ""
404 def changeIsBranchMerge(sourceBranch, destinationBranch, change):
405     sourceFiles = {}
406     for file in p4CmdList("files %s...@%s" % (globalPrefix + sourceBranch + "/", change)):
407         if file["action"] == "delete":
408             continue
409         sourceFiles[file["depotFile"]] = file
411     destinationFiles = {}
412     for file in p4CmdList("files %s...@%s" % (globalPrefix + destinationBranch + "/", change)):
413         destinationFiles[file["depotFile"]] = file
415     for fileName in sourceFiles.keys():
416         integrations = []
417         deleted = False
418         for integration in p4CmdList("integrated \"%s\"" % fileName):
419             toFile = integration["fromFile"] # yes, it's true, it's fromFile
420             if not toFile in destinationFiles:
421                 continue
422             destFile = destinationFiles[toFile]
423             if destFile["action"] == "delete":
424 #                print "file %s has been deleted in %s" % (fileName, toFile)
425                 deleted = True
426                 break
428             if int(integration["change"]) == change:
429                 integrations.append(integration)
430                 continue
432             destRev = int(destFile["rev"])
434             startRev = integration["startFromRev"][1:]
435             if startRev == "none":
436                 startRev = 0
437             else:
438                 startRev = int(startRev)
440             endRev = integration["endFromRev"][1:]
441             if endRev == "none":
442                 endRev = 0
443             else:
444                 endRev = int(endRev)
446             initialBranch = (destRev == 1 and integration["how"] != "branch into")
447             inRange = (destRev >= startRev and destRev <= endRev)
448             newer = (destRev > startRev and destRev > endRev)
450             if initialBranch or inRange or newer:
451                 integrations.append(integration)
453         if deleted:
454             continue
456         if len(integrations) == 0:
457             print "file %s was not integrated from %s into %s" % (fileName, sourceBranch, destinationBranch)
458             return False
460     return True
462 def getUserMap():
463     users = {}
465     for output in p4CmdList("users"):
466         if not output.has_key("User"):
467             continue
468         users[output["User"]] = output["FullName"] + " <" + output["Email"] + ">"
469     return users
471 users = getUserMap()
473 if len(changeRange) == 0:
474     try:
475         sout, sin, serr = popen2.popen3("git-name-rev --tags `git-rev-parse %s`" % branch)
476         output = sout.read()
477         if output.endswith("\n"):
478             output = output[:-1]
479         tagIdx = output.index(" tags/p4/")
480         caretIdx = output.find("^")
481         endPos = len(output)
482         if caretIdx != -1:
483             endPos = caretIdx
484         rev = int(output[tagIdx + 9 : endPos]) + 1
485         changeRange = "@%s,#head" % rev
486         initialParent = os.popen("git-rev-parse %s" % branch).read()[:-1]
487         initialTag = "p4/%s" % (int(rev) - 1)
488     except:
489         pass
491 tz = - time.timezone / 36
492 tzsign = ("%s" % tz)[0]
493 if tzsign != '+' and tzsign != '-':
494     tz = "+" + ("%s" % tz)
496 gitOutput, gitStream, gitError = popen2.popen3("git-fast-import")
498 if len(revision) > 0:
499     print "Doing initial import of %s from revision %s" % (globalPrefix, revision)
501     details = { "user" : "git perforce import user", "time" : int(time.time()) }
502     details["desc"] = "Initial import of %s from the state at revision %s" % (globalPrefix, revision)
503     details["change"] = revision
504     newestRevision = 0
506     fileCnt = 0
507     for info in p4CmdList("files %s...%s" % (globalPrefix, revision)):
508         change = int(info["change"])
509         if change > newestRevision:
510             newestRevision = change
512         if info["action"] == "delete":
513             continue
515         for prop in [ "depotFile", "rev", "action", "type" ]:
516             details["%s%s" % (prop, fileCnt)] = info[prop]
518         fileCnt = fileCnt + 1
520     details["change"] = newestRevision
522     try:
523         commit(details, extractFilesFromCommit(details), branch, globalPrefix)
524     except:
525         print gitError.read()
527 else:
528     changes = []
530     if len(changesFile) > 0:
531         output = open(changesFile).readlines()
532         changeSet = Set()
533         for line in output:
534             changeSet.add(int(line))
536         for change in changeSet:
537             changes.append(change)
539         changes.sort()
540     else:
541         output = os.popen("p4 changes %s...%s" % (globalPrefix, changeRange)).readlines()
543         for line in output:
544             changeNum = line.split(" ")[1]
545             changes.append(changeNum)
547         changes.reverse()
549     if len(changes) == 0:
550         if not silent:
551             print "no changes to import!"
552         sys.exit(1)
554     cnt = 1
555     for change in changes:
556         description = p4Cmd("describe %s" % change)
558         if not silent:
559             sys.stdout.write("\rimporting revision %s (%s%%)" % (change, cnt * 100 / len(changes)))
560             sys.stdout.flush()
561         cnt = cnt + 1
563         try:
564             files = extractFilesFromCommit(description)
565             if detectBranches:
566                 for branch in branchesForCommit(files):
567                     knownBranches.add(branch)
568                     branchPrefix = globalPrefix + branch + "/"
570                     filesForCommit = extractFilesInCommitToBranch(files, branchPrefix)
572                     merged = ""
573                     parent = ""
574                     ########### remove cnt!!!
575                     if branch not in createdBranches and cnt > 2:
576                         createdBranches.add(branch)
577                         parent = findBranchParent(branchPrefix, files)
578                         if parent == branch:
579                             parent = ""
580     #                    elif len(parent) > 0:
581     #                        print "%s branched off of %s" % (branch, parent)
583                     if len(parent) == 0:
584                         merged = findBranchSourceHeuristic(filesForCommit, branch, branchPrefix)
585                         if len(merged) > 0:
586                             print "change %s could be a merge from %s into %s" % (description["change"], merged, branch)
587                             if not changeIsBranchMerge(merged, branch, int(description["change"])):
588                                 merged = ""
590                     branch = "refs/heads/" + branch
591                     if len(parent) > 0:
592                         parent = "refs/heads/" + parent
593                     if len(merged) > 0:
594                         merged = "refs/heads/" + merged
595                     commit(description, files, branch, branchPrefix, parent, merged)
596             else:
597                 commit(description, filesForCommit, branch, globalPrefix, initialParent)
598                 initialParent = ""
599         except IOError:
600             print gitError.read()
601             sys.exit(1)
603 if not silent:
604     print ""
606 gitStream.write("reset refs/tags/p4/%s\n" % lastChange)
607 gitStream.write("from %s\n\n" % branch);
610 gitStream.close()
611 gitOutput.close()
612 gitError.close()
614 os.popen("git-repo-config p4.depotpath %s" % globalPrefix).read()
615 if len(initialTag) > 0:
616     os.popen("git tag -d %s" % initialTag).read()
618 sys.exit(0)