post

Basecamp to MediaWiki

Ever wanted to convert your content in Basecamp to MediaWiki?

Here’s what you do:

  1. Get your export zip file from Basecamp.
  2. Pass that zip file as an argument to the below code, it will output an XML file.
  3. Go to the Special:Import page in your MediaWiki instance and import the XML file that was outputted in the previous step.
#!/usr/bin/env python

# Convert Basecamp export -> MediaWiki import.
#
# URL: http://www.swaroopch.com/notes/Basecamp_to_MediaWiki
# License: http://sam.zoy.org/wtfpl/COPYING
# Version: 0.1
# Last-Updated-On: Thu, 23 Oct 2008 12:24:15 +0530
# Tested-With: Python 2.5+

# Limitations
# * WriteBoards are NOT present in the export, so we lose that content.

# References
# MediaWiki format: http://meta.wikimedia.org/wiki/Help:Export#Export_format
# Processing XML: http://www.amk.ca/talks/2006-02-07/

import os
import sys
import re
import zipfile
import xml.etree.cElementTree as ElementTree
import cStringIO as StringIO
from pprint import pprint

# Where is the Basecamp Exported zip file?
if len(sys.argv) != 2 or not sys.argv[1].endswith('.zip'):
    print 'Please provide the location of the Basecamp Export Zip file.'
    sys.exit(1)

zip_file_name = sys.argv[1]
output_file_name = zip_file_name.replace('.zip', '_mediawiki.xml')

# Read contents of the single XML file in the zip archive
zip = zipfile.ZipFile(zip_file_name)
assert len(zip.namelist()) == 1
xml_file_name = zip.namelist()[0]
xml_data = zip.read(xml_file_name)

tree = ElementTree.XML(xml_data)
output_root = ElementTree.XML(
    '<?xml version="1.0" encoding="UTF-8"?><mediawiki xml:lang="en"></mediawiki>')

# Process the XML tree
basecamp_name = tree.findtext('name')
ElementTree.SubElement(output_root, 'sitename').text = basecamp_name

categories = [category.text for category in tree.findall('default-post-categories/category')]
categories.sort()

def format_content(text):
    '''Given text in Textile syntax, convert to MediaWiki syntax.'''
    if text is None:
        return ''

    text = re.sub(r'"([^"]+)":([^s]+)', r'[2 1]', text) # external link
    text = re.sub(r"(?:**|__)(w+)(?:**|__)", r"'''1'''", text) # bold
    text = re.sub(r"(?:*|_)(w+)(?:*|_)", r"''1''", text) # italics
    # TODO Convert links to Writeboards into internal links on this wiki page
    return text

for project in tree.findall('projects/project'):
    project_name = project.findtext('name')
    project_last_updated = project.findtext('last-changed-on')

    project_page = ElementTree.SubElement(output_root, 'page')
    ElementTree.SubElement(project_page, 'title').text = project_name
    content_tree = ElementTree.SubElement(project_page, 'revision')
    ElementTree.SubElement(content_tree, 'timestamp').text = project_last_updated
    ElementTree.SubElement(content_tree, 'contributor').text = ''
    ElementTree.SubElement(content_tree, 'comment').text = ''

    content = StringIO.StringIO()

    print >>content, '== Announcements =='
    print >>content, format_content(project.findtext('announcement'))

    print >>content, '== Milestones =='
    for milestone in project.findall('milestones/milestone'):
        completed_on = milestone.findtext('completed-on')
        completed_on = ', completed on ' + completed_on[:10] if completed_on is not None else ''
        print >>content, '*%s, due on%s%s' 
            % (milestone.findtext('title'), milestone.findtext('deadline'),
               completed_on)

    print >>content, '== Todo Lists =='
    for todo_list in project.findall('todo-lists/todo-list'):
        todo_list_name = todo_list.findtext('name')
        print >>content, '===%s ===' % todo_list_name
        for todo_item in todo_list.findall('todo-items/todo-item'):
            completed = ' (completed)' if todo_item.findtext('completed') == 'true' else ''
            print >>content, '*%s%s' 
                % (format_content(todo_item.findtext('content')), completed)

    print >>content, 'n[[Category:%s]]' % basecamp_name

    ElementTree.SubElement(content_tree, 'text').text = content.getvalue()
    content.close()

# Write to file
output_tree = ElementTree.ElementTree(output_root)
#ElementTree.dump(output_tree)
output_tree.write(output_file_name)
print 'Success! MediaWiki Import XML saved at%s' % output_file_name