1 """ Import tracker data from Sourceforge.NET
3 This script needs four steps to work:
5 1. Export the project XML data using the admin web interface at sf.net
6 2. Run the file fetching (these are not included in the XML):
8 import_sf.py files <path to XML> <path to files dir>
10 this will place all the downloaded files in the files dir by file id.
11 3. Convert the sf.net XML to Roundup "export" format:
13 import_sf.py import <tracker home> <path to XML> <path to files dir>
15 this will generate a directory "/tmp/imported" which contains the
16 data to be imported into a Roundup tracker.
17 4. Import the data:
19 roundup-admin -i <tracker home> import /tmp/imported
21 And you're done!
22 """
24 import sys, os, csv, time, urllib2, httplib, mimetypes, urlparse
25 # Python 2.3 ... 2.6 compatibility:
26 from roundup.anypy.sets_ import set
28 try:
29 import cElementTree as ElementTree
30 except ImportError:
31 from elementtree import ElementTree
33 from roundup import instance, hyperdb, date, support, password
35 today = date.Date('.')
37 DL_URL = 'http://sourceforge.net/tracker/download.php?group_id=%(group_id)s&atid=%(atid)s&aid=%(aid)s'
39 def get_url(aid):
40 """ so basically we have to jump through hoops, given an artifact id, to
41 figure what the URL should be to access that artifact, and hence any
42 attached files."""
43 # first we hit this URL...
44 conn = httplib.HTTPConnection("sourceforge.net")
45 conn.request("GET", "/support/tracker.php?aid=%s"%aid)
46 response = conn.getresponse()
47 # which should respond with a redirect to the correct url which has the
48 # magic "group_id" and "atid" values in it that we need
49 assert response.status == 302, 'response code was %s'%response.status
50 location = response.getheader('location')
51 query = urlparse.urlparse(response.getheader('location'))[-2]
52 info = dict([param.split('=') for param in query.split('&')])
53 return DL_URL%info
55 def fetch_files(xml_file, file_dir):
56 """ Fetch files referenced in the xml_file into the dir file_dir. """
57 root = ElementTree.parse(xml_file).getroot()
58 to_fetch = set()
59 deleted = set()
60 for artifact in root.find('artifacts'):
61 for field in artifact.findall('field'):
62 if field.get('name') == 'artifact_id':
63 aid = field.text
64 for field in artifact.findall('field'):
65 if field.get('name') != 'artifact_history': continue
66 for event in field.findall('history'):
67 d = {}
68 for field in event.findall('field'):
69 d[field.get('name')] = field.text
70 if d['field_name'] == 'File Added':
71 fid = d['old_value'].split(':')[0]
72 to_fetch.add((aid, fid))
73 if d['field_name'] == 'File Deleted':
74 fid = d['old_value'].split(':')[0]
75 deleted.add((aid, fid))
76 to_fetch = to_fetch - deleted
78 got = set(os.listdir(file_dir))
79 to_fetch = to_fetch - got
81 # load cached urls (sigh)
82 urls = {}
83 if os.path.exists(os.path.join(file_dir, 'urls.txt')):
84 for line in open(os.path.join(file_dir, 'urls.txt')):
85 aid, url = line.strip().split()
86 urls[aid] = url
88 for aid, fid in support.Progress('Fetching files', list(to_fetch)):
89 if fid in got: continue
90 if not urls.has_key(aid):
91 urls[aid] = get_url(aid)
92 f = open(os.path.join(file_dir, 'urls.txt'), 'a')
93 f.write('%s %s\n'%(aid, urls[aid]))
94 f.close()
95 url = urls[aid] + '&file_id=' + fid
96 f = urllib2.urlopen(url)
97 data = f.read()
98 n = open(os.path.join(file_dir, fid), 'w')
99 n.write(data)
100 f.close()
101 n.close()
103 def import_xml(tracker_home, xml_file, file_dir):
104 """ Generate Roundup tracker import files based on the tracker schema,
105 sf.net xml export and downloaded files from sf.net. """
106 tracker = instance.open(tracker_home)
107 db = tracker.open('admin')
109 resolved = db.status.lookup('resolved')
110 unread = db.status.lookup('unread')
111 chatting = db.status.lookup('unread')
112 critical = db.priority.lookup('critical')
113 urgent = db.priority.lookup('urgent')
114 bug = db.priority.lookup('bug')
115 feature = db.priority.lookup('feature')
116 wish = db.priority.lookup('wish')
117 adminuid = db.user.lookup('admin')
118 anonuid = db.user.lookup('anonymous')
120 root = ElementTree.parse(xml_file).getroot()
122 def to_date(ts):
123 return date.Date(time.gmtime(float(ts)))
125 # parse out the XML
126 artifacts = []
127 categories = set()
128 users = set()
129 add_files = set()
130 remove_files = set()
131 for artifact in root.find('artifacts'):
132 d = {}
133 op = {}
134 artifacts.append(d)
135 for field in artifact.findall('field'):
136 name = field.get('name')
137 if name == 'artifact_messages':
138 for message in field.findall('message'):
139 l = d.setdefault('messages', [])
140 m = {}
141 l.append(m)
142 for field in message.findall('field'):
143 name = field.get('name')
144 if name == 'adddate':
145 m[name] = to_date(field.text)
146 else:
147 m[name] = field.text
148 if name == 'user_name': users.add(field.text)
149 elif name == 'artifact_history':
150 for event in field.findall('history'):
151 l = d.setdefault('history', [])
152 e = {}
153 l.append(e)
154 for field in event.findall('field'):
155 name = field.get('name')
156 if name == 'entrydate':
157 e[name] = to_date(field.text)
158 else:
159 e[name] = field.text
160 if name == 'mod_by': users.add(field.text)
161 if e['field_name'] == 'File Added':
162 add_files.add(e['old_value'].split(':')[0])
163 elif e['field_name'] == 'File Deleted':
164 remove_files.add(e['old_value'].split(':')[0])
165 elif name == 'details':
166 op['body'] = field.text
167 elif name == 'submitted_by':
168 op['user_name'] = field.text
169 d[name] = field.text
170 users.add(field.text)
171 elif name == 'open_date':
172 thedate = to_date(field.text)
173 op['adddate'] = thedate
174 d[name] = thedate
175 else:
176 d[name] = field.text
178 categories.add(d['category'])
180 if op.has_key('body'):
181 l = d.setdefault('messages', [])
182 l.insert(0, op)
184 add_files -= remove_files
186 # create users
187 userd = {'nobody': '2'}
188 users.remove('nobody')
189 data = [
190 {'id': '1', 'username': 'admin', 'password': password.Password('admin'),
191 'roles': 'Admin', 'address': 'richard@python.org'},
192 {'id': '2', 'username': 'anonymous', 'roles': 'Anonymous'},
193 ]
194 for n, user in enumerate(list(users)):
195 userd[user] = n+3
196 data.append({'id': str(n+3), 'username': user, 'roles': 'User',
197 'address': '%s@users.sourceforge.net'%user})
198 write_csv(db.user, data)
199 users=userd
201 # create categories
202 categoryd = {'None': None}
203 categories.remove('None')
204 data = []
205 for n, category in enumerate(list(categories)):
206 categoryd[category] = n
207 data.append({'id': str(n), 'name': category})
208 write_csv(db.keyword, data)
209 categories = categoryd
211 # create issues
212 issue_data = []
213 file_data = []
214 message_data = []
215 issue_journal = []
216 message_id = 0
217 for artifact in artifacts:
218 d = {}
219 d['id'] = artifact['artifact_id']
220 d['title'] = artifact['summary']
221 d['assignedto'] = users[artifact['assigned_to']]
222 if d['assignedto'] == '2':
223 d['assignedto'] = None
224 d['creation'] = artifact['open_date']
225 activity = artifact['open_date']
226 d['creator'] = users[artifact['submitted_by']]
227 actor = d['creator']
228 if categories[artifact['category']]:
229 d['keyword'] = [categories[artifact['category']]]
230 issue_journal.append((
231 d['id'], d['creation'].get_tuple(), d['creator'], "'create'", {}
232 ))
234 p = int(artifact['priority'])
235 if artifact['artifact_type'] == 'Feature Requests':
236 if p > 3:
237 d['priority'] = feature
238 else:
239 d['priority'] = wish
240 else:
241 if p > 7:
242 d['priority'] = critical
243 elif p > 5:
244 d['priority'] = urgent
245 elif p > 3:
246 d['priority'] = bug
247 else:
248 d['priority'] = feature
250 s = artifact['status']
251 if s == 'Closed':
252 d['status'] = resolved
253 elif s == 'Deleted':
254 d['status'] = resolved
255 d['is retired'] = True
256 else:
257 d['status'] = unread
259 nosy = set()
260 for message in artifact.get('messages', []):
261 authid = users[message['user_name']]
262 if not message['body']: continue
263 body = convert_message(message['body'], message_id)
264 if not body: continue
265 m = {'content': body, 'author': authid,
266 'date': message['adddate'],
267 'creation': message['adddate'], }
268 message_data.append(m)
269 if authid not in (None, '2'):
270 nosy.add(authid)
271 activity = message['adddate']
272 actor = authid
273 if d['status'] == unread:
274 d['status'] = chatting
276 # add import message
277 m = {'content': 'IMPORT FROM SOURCEFORGE', 'author': '1',
278 'date': today, 'creation': today}
279 message_data.append(m)
281 # sort messages and assign ids
282 d['messages'] = []
283 message_data.sort(lambda a,b:cmp(a['date'],b['date']))
284 for message in message_data:
285 message_id += 1
286 message['id'] = str(message_id)
287 d['messages'].append(message_id)
289 d['nosy'] = list(nosy)
291 files = []
292 for event in artifact.get('history', []):
293 if event['field_name'] == 'File Added':
294 fid, name = event['old_value'].split(':', 1)
295 if fid in add_files:
296 files.append(fid)
297 name = name.strip()
298 try:
299 f = open(os.path.join(file_dir, fid))
300 content = f.read()
301 f.close()
302 except:
303 content = 'content missing'
304 file_data.append({
305 'id': fid,
306 'creation': event['entrydate'],
307 'creator': users[event['mod_by']],
308 'name': name,
309 'type': mimetypes.guess_type(name)[0],
310 'content': content,
311 })
312 continue
313 elif event['field_name'] == 'close_date':
314 action = "'set'"
315 info = { 'status': unread }
316 elif event['field_name'] == 'summary':
317 action = "'set'"
318 info = { 'title': event['old_value'] }
319 else:
320 # not an interesting / translatable event
321 continue
322 row = [ d['id'], event['entrydate'].get_tuple(),
323 users[event['mod_by']], action, info ]
324 if event['entrydate'] > activity:
325 activity = event['entrydate']
326 issue_journal.append(row)
327 d['files'] = files
329 d['activity'] = activity
330 d['actor'] = actor
331 issue_data.append(d)
333 write_csv(db.issue, issue_data)
334 write_csv(db.msg, message_data)
335 write_csv(db.file, file_data)
337 f = open('/tmp/imported/issue-journals.csv', 'w')
338 writer = csv.writer(f, colon_separated)
339 writer.writerows(issue_journal)
340 f.close()
342 def convert_message(content, id):
343 """ Strip off the useless sf message header crap """
344 if content[:14] == 'Logged In: YES':
345 return '\n'.join(content.splitlines()[3:]).strip()
346 return content
348 class colon_separated(csv.excel):
349 delimiter = ':'
351 def write_csv(klass, data):
352 props = klass.getprops()
353 if not os.path.exists('/tmp/imported'):
354 os.mkdir('/tmp/imported')
355 f = open('/tmp/imported/%s.csv'%klass.classname, 'w')
356 writer = csv.writer(f, colon_separated)
357 propnames = klass.export_propnames()
358 propnames.append('is retired')
359 writer.writerow(propnames)
360 for entry in data:
361 row = []
362 for name in propnames:
363 if name == 'is retired':
364 continue
365 prop = props[name]
366 if entry.has_key(name):
367 if isinstance(prop, hyperdb.Date) or \
368 isinstance(prop, hyperdb.Interval):
369 row.append(repr(entry[name].get_tuple()))
370 elif isinstance(prop, hyperdb.Password):
371 row.append(repr(str(entry[name])))
372 else:
373 row.append(repr(entry[name]))
374 elif isinstance(prop, hyperdb.Multilink):
375 row.append('[]')
376 elif name in ('creator', 'actor'):
377 row.append("'1'")
378 elif name in ('created', 'activity'):
379 row.append(repr(today.get_tuple()))
380 else:
381 row.append('None')
382 row.append(entry.get('is retired', False))
383 writer.writerow(row)
385 if isinstance(klass, hyperdb.FileClass) and entry.get('content'):
386 fname = klass.exportFilename('/tmp/imported/', entry['id'])
387 support.ensureParentsExist(fname)
388 c = open(fname, 'w')
389 if isinstance(entry['content'], unicode):
390 c.write(entry['content'].encode('utf8'))
391 else:
392 c.write(entry['content'])
393 c.close()
395 f.close()
396 f = open('/tmp/imported/%s-journals.csv'%klass.classname, 'w')
397 f.close()
399 if __name__ == '__main__':
400 if sys.argv[1] == 'import':
401 import_xml(*sys.argv[2:])
402 elif sys.argv[1] == 'files':
403 fetch_files(*sys.argv[2:])