Updated and cleaned the wiki2rest converter. The ReST documentation should look a lot better now, with less weirdness. Using a python google-code snippet to convert now, so no more need for third-party ruby downloads! This should transfer to readthedocs shortly.

2012-06-26 17:45:12 +02:00 · 2012-06-26 17:45:12 +02:00 · ae0f7a04c5
commit ae0f7a04c5
parent 43f16094c1
55 changed files with 3990 additions and 1778 deletions
--- a/docs/sphinx/wiki2rest/wiki2rest.py
+++ b/docs/sphinx/wiki2rest/wiki2rest.py
@ -1,49 +1,30 @@
-#! /usr/bin/python 
+#! /usr/bin/python
 #
 #  Converts Evennia's google-style wiki pages to reST documents
 #
-#  Setting up to run: 
+#  Setting up to run:
 #
-#   1) From this directory, use SVN to download wiki2html converter by Chris Roos. Make sure 
-#      to download into a directory "wiki2html" like this: 
-#
-#        svn co http://chrisroos.googlecode.com/svn/trunk/google-wiki-syntax wiki2html
-#
-#      This is a Ruby program! Sorry, couldn't find a Python lib to do this. So if you 
-#      don't have Ruby, you need to install that too. 
-#      
-#      You also need to patch a bug in above program to make multiline code snippets work. 
-#      From the same folder as the patch file, apply the patch like this: 
-#  
-#        patch -p0 -i wiki2html.patch 
-#
-#   2) Install pandoc (converts from html to reST): 
+#   1) Install pandoc (converts from html to reST):
 #
 #        apt-get install pandoc  (debian)
-#           or download from 
+#           or download from
 #        http://johnmacfarlane.net/pandoc/
 #
-#   3) Retrieve wiki files (*.wiki) from Google code by mercurial. Make sure
-#      to retrieve them into a directory wikiconvert/wiki: 
+#   2) Retrieve wiki files (*.wiki) from Google code by mercurial. Make sure
+#      to retrieve them into a subdirectory wiki here:
 #
 #         hg clone https://code.google.com/p/evennia.wiki wiki
 #
-#   4) Check so that you have the following file structure: 
+#   Regular Usage:
 #
-#        wiki/ (containing google code wiki files)
-#        wiki2html/ (containing the wiki_converter.rb ruby program (patch applied).)
-#        html/  (empty)
-#        rest/  (empty)
-#        (this file)
-#
-#   Usage: 
-#
-#   1) Pull the wiki files into wiki/ so you have the latest. 
-#   2) Run wiki2rest.py. Folders html and rest will end up containing the conversions and the contents 
-#      of rest/ will automatically be copied over to docs/sphinx/source/wiki. 
+#   1) Make sure to pull/update the wiki files into wiki/ so you have the latest.
+#   2) Run wiki2rest.py. Temporary work folders html and rest will be created, so make sure you
+#          have the rights to create directories here. The contents
+#          of rest/ will automatically be copied over to docs/sphinx/source/wiki.
+#   3) From docs/sphinx, run e.g. "make html" to build the documentation from the reST sources.
 #

-import sys, os, subprocess, re, urllib
+import sys, os, subprocess, re, urllib, shutil

 # Setup

@ -69,7 +50,7 @@ WIKI_CRUMB_URL = "/p/evennia/wiki/"
 NO_CONVERT = ["SideBar", "Screenshot"]


-#------------------------------------------------------------ 
+#------------------------------------------------------------
 # This is a version of the importer that imports Google html pages
 # directly instead of going through the ruby converter. Alas, while
 # being a lot cleaner in implementation, this seems to produce worse
@ -81,11 +62,11 @@ NO_CONVERT = ["SideBar", "Screenshot"]

 def fetch_google_wiki_html_files():
    """
-    Acquire wiki html pages from google code 
+    Acquire wiki html pages from google code
    """
    # use wiki repo to find html filenames
    html_urls = dict([(re.sub(r"\.wiki", "", fn), WIKI_ROOT_URL + re.sub(r"\.wiki", "?show=content", fn))
-                      for fn in os.listdir(WIKI_DIR) if fn.endswith(".wiki")])    
+                      for fn in os.listdir(WIKI_DIR) if fn.endswith(".wiki")])

    #html_urls = {"Index":html_urls["Index"]} #SR!

@ -102,30 +83,30 @@ def fetch_google_wiki_html_files():
        f = open(os.path.join(HTML_DIR, "%s.html" % name), 'w')
        f.write(s)
        f.close()
-        
-    return html_pages 
+
+    return html_pages

 def clean_html(htmlstring):
    """
-    Clean up html properties special to google code and not known by pandoc    
+    Clean up html properties special to google code and not known by pandoc
    """
-    # remove wikiheader tag (searches over many lines). Unfortunately python <2.7 don't support 
-    # DOTALL flag in re.sub ... 
+    # remove wikiheader tag (searches over many lines). Unfortunately python <2.7 don't support
+    # DOTALL flag in re.sub ...
    matches = re.findall(r'<div id="wikiheader">.*?</div>.*?</div>.*?</div>', htmlstring, re.DOTALL)
-    for match in matches:        
+    for match in matches:
        htmlstring = htmlstring.replace(match, "")
    #htmlstring = re.sub(r'<div id="wikiheader">.*?</div>.*?</div>.*?</div>', "", htmlstring, re.DOTALL)
-    # remove prefix from urls 
+    # remove prefix from urls
    htmlstring = re.sub('href="' + WIKI_CRUMB_URL, 'href="', htmlstring)
-    # remove #links from headers 
-    htmlstring = re.sub(r'(<h[0-9]>.*?)(<a href="#.*?</a>)(.*?</h[0-9]>)', r"\1\3", htmlstring)    
+    # remove #links from headers
+    htmlstring = re.sub(r'(<h[0-9]>.*?)(<a href="#.*?</a>)(.*?</h[0-9]>)', r"\1\3", htmlstring)
    return htmlstring

 def html2rest(name, htmlstring):
    """
-    Convert html data to reST with pandoc 
+    Convert html data to reST with pandoc
    """
-    print "pandoc: Converting %s ..." % name
+    print " pandoc: Converting %s ..." % name
    p = subprocess.Popen([PANDOC_EXE, '--from=html', '--to=rst', '--reference-links'],
                         stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    return p.communicate(htmlstring)[0]
@ -133,18 +114,18 @@ def html2rest(name, htmlstring):

 def wiki2rest_ver2():
    """
-    Convert Google wiki pages to reST. 
+    Convert Google wiki pages to reST.
    """
-    # obtain all html data from google code 
+    # obtain all html data from google code
    html_pages = fetch_google_wiki_html_files()
-    
+
    # convert to output files
    for name, htmldata in html_pages.items():
        restfilename = os.path.join(REST_DIR, "%s.rst" % name)
        f = open(restfilename, 'w')
        f.write(html2rest(name, htmldata))
        f.close()
-    
+

 #------------------------------------------------------------
 # This converter uses the 3rd party ruby script to convert wiki pages
@ -154,55 +135,74 @@ def wiki2rest_ver2():

 def wiki2rest():
    """
-    Convert from wikifile to rst file, going through html 
-    """        
+    Convert from wikifile to rst file, going through html
+    """

    # convert from wikifile to html with wiki2html
-    subprocess.call([RUBY_EXE, "wiki_convertor.rb", WIKI_DIR, HTML_DIR], cwd=WIKI2HTML_DIR)
+    #subprocess.call([RUBY_EXE, "wiki_convertor.rb", WIKI_DIR, HTML_DIR], cwd=WIKI2HTML_DIR)
+    # use google html output directly (really bad)
+    #subprocess.call(["python", "get_wiki_as_html.py"])
+    # use wikify importer
+    print " wikify: converting wiki -> html ..."
+    subprocess.call(["python", "wikify.py", "-e", "-m", "-c", "-a", "-s", "wiki", "-d", "html"])

    # convert from html to rest with pandoc
-    htmlfilenames = [fn for fn in os.listdir(HTML_DIR) 
+    htmlfilenames = [fn for fn in os.listdir(HTML_DIR)
                     if fn.endswith(".html") and not re.sub(r".html", "", fn) in NO_CONVERT]

-    for filename in htmlfilenames: 
+    print " pandoc: converting html -> ReST ..."
+    for filename in htmlfilenames:

        htmlfilename = os.path.join(HTML_DIR, filename)

-        # cleanup of code 
-        string = "".join(open(htmlfilename, 'r').readlines())        
+        # cleanup of code
+        string = "".join(open(htmlfilename, 'r').readlines())
        string = re.sub(r'<p class="summary">[A-Za-z0-9 .-\:]*</p>', "", string)
-        string = re.sub(r"&lt;wiki:toc max_depth=&quot;[0-9]*&quot; /&gt;", "", string)            
-        string = re.sub(r"&lt;wiki:toc max_depth<h1>&quot;[0-9]*&quot; /&gt;</h1>", "", string)            
+        string = re.sub(r"&lt;wiki:toc max_depth=&quot;[0-9]*&quot; /&gt;", "", string)
+        string = re.sub(r"&lt;wiki:toc max_depth<h1>&quot;[0-9]*&quot; /&gt;</h1>", "", string)
        string = re.sub(r"<p>#settings Featured</p>", "", string)
        string = re.sub(r'<p class="labels">Featured</p>', "", string)
        string = re.sub(r'&lt;wiki:comment&gt;', "", string)
        string = re.sub(r'&lt;/wiki:comment&gt;', "", string)
-        #string = re.sub(r'&lt;wiki:comment&gt;[<>;a-zA\/\n-&Z0-9 ]*&lt;/wiki:comment&gt;', "", string)
+        string = re.sub(r'&lt;wiki:comment&gt;[<>;a-zA\/\n-&Z0-9 ]*&lt;/wiki:comment&gt;', "", string)
        f = open(htmlfilename, 'w')
        f.write(string)
        f.close()

        rstfilename = os.path.join(REST_DIR, re.sub(r".html$", ".rst", filename))
-        print "pandoc: converting %s -> %s" % (htmlfilename, rstfilename)
+        #print "pandoc: converting %s -> %s" % (htmlfilename, rstfilename)
        subprocess.call([PANDOC_EXE, "--from=html", "--to=rst", "-o", rstfilename, htmlfilename])
-        

-# main program 
+# main program
 if __name__ == "__main__":

+    print "creating/cleaning output dirs ...",
    try:
-        wiki2rest() 
+        shutil.rmtree(REST_DIR)
+        os.mkdir(REST_DIR)
+    except OSError:
+        os.mkdir(REST_DIR)
+    try:
+        shutil.rmtree(HTML_DIR)
+        os.mkdir(HTML_DIR)
+    except Exception:
+        os.mkdir(HTML_DIR)
+    try:
+        shutil.rmtree(SPHINX_WIKI_DIR)
+    except Exception:
+        # this is created by copy mechanism.
+        pass
+    print "done."
+    print "running conversions ..."
+
+    try:
+        wiki2rest()
    except Exception, e:
        print e
        print "Make sure to read this file's header to make sure everything is correctly set up. "
        sys.exit()

-    import shutil
-    try:
-        shutil.rmtree(SPHINX_WIKI_DIR)
-        print "Deleted old %s." % SPHINX_WIKI_DIR
-    except OSError:
-        pass 
-    print "Copying %s -> %s" % (REST_DIR, SPHINX_WIKI_DIR)
+    print "... conversions finished (make sure there are no error messages above)."
+    print "copying rest data to %s ..." % SPHINX_WIKI_DIR
    shutil.copytree(REST_DIR, SPHINX_WIKI_DIR)
-
+    print "... done. You can now build the docs from the sphinx directory with e.g. 'make html'."