Updated and cleaned the wiki2rest converter. The ReST documentation should look a lot better now, with less weirdness. Using a python google-code snippet to convert now, so no more need for third-party ruby downloads! This should transfer to readthedocs shortly.
This commit is contained in:
parent
43f16094c1
commit
ae0f7a04c5
55 changed files with 3990 additions and 1778 deletions
|
|
@ -1,49 +1,30 @@
|
|||
#! /usr/bin/python
|
||||
#! /usr/bin/python
|
||||
#
|
||||
# Converts Evennia's google-style wiki pages to reST documents
|
||||
#
|
||||
# Setting up to run:
|
||||
# Setting up to run:
|
||||
#
|
||||
# 1) From this directory, use SVN to download wiki2html converter by Chris Roos. Make sure
|
||||
# to download into a directory "wiki2html" like this:
|
||||
#
|
||||
# svn co http://chrisroos.googlecode.com/svn/trunk/google-wiki-syntax wiki2html
|
||||
#
|
||||
# This is a Ruby program! Sorry, couldn't find a Python lib to do this. So if you
|
||||
# don't have Ruby, you need to install that too.
|
||||
#
|
||||
# You also need to patch a bug in above program to make multiline code snippets work.
|
||||
# From the same folder as the patch file, apply the patch like this:
|
||||
#
|
||||
# patch -p0 -i wiki2html.patch
|
||||
#
|
||||
# 2) Install pandoc (converts from html to reST):
|
||||
# 1) Install pandoc (converts from html to reST):
|
||||
#
|
||||
# apt-get install pandoc (debian)
|
||||
# or download from
|
||||
# or download from
|
||||
# http://johnmacfarlane.net/pandoc/
|
||||
#
|
||||
# 3) Retrieve wiki files (*.wiki) from Google code by mercurial. Make sure
|
||||
# to retrieve them into a directory wikiconvert/wiki:
|
||||
# 2) Retrieve wiki files (*.wiki) from Google code by mercurial. Make sure
|
||||
# to retrieve them into a subdirectory wiki here:
|
||||
#
|
||||
# hg clone https://code.google.com/p/evennia.wiki wiki
|
||||
#
|
||||
# 4) Check so that you have the following file structure:
|
||||
# Regular Usage:
|
||||
#
|
||||
# wiki/ (containing google code wiki files)
|
||||
# wiki2html/ (containing the wiki_converter.rb ruby program (patch applied).)
|
||||
# html/ (empty)
|
||||
# rest/ (empty)
|
||||
# (this file)
|
||||
#
|
||||
# Usage:
|
||||
#
|
||||
# 1) Pull the wiki files into wiki/ so you have the latest.
|
||||
# 2) Run wiki2rest.py. Folders html and rest will end up containing the conversions and the contents
|
||||
# of rest/ will automatically be copied over to docs/sphinx/source/wiki.
|
||||
# 1) Make sure to pull/update the wiki files into wiki/ so you have the latest.
|
||||
# 2) Run wiki2rest.py. Temporary work folders html and rest will be created, so make sure you
|
||||
# have the rights to create directories here. The contents
|
||||
# of rest/ will automatically be copied over to docs/sphinx/source/wiki.
|
||||
# 3) From docs/sphinx, run e.g. "make html" to build the documentation from the reST sources.
|
||||
#
|
||||
|
||||
import sys, os, subprocess, re, urllib
|
||||
import sys, os, subprocess, re, urllib, shutil
|
||||
|
||||
# Setup
|
||||
|
||||
|
|
@ -69,7 +50,7 @@ WIKI_CRUMB_URL = "/p/evennia/wiki/"
|
|||
NO_CONVERT = ["SideBar", "Screenshot"]
|
||||
|
||||
|
||||
#------------------------------------------------------------
|
||||
#------------------------------------------------------------
|
||||
# This is a version of the importer that imports Google html pages
|
||||
# directly instead of going through the ruby converter. Alas, while
|
||||
# being a lot cleaner in implementation, this seems to produce worse
|
||||
|
|
@ -81,11 +62,11 @@ NO_CONVERT = ["SideBar", "Screenshot"]
|
|||
|
||||
def fetch_google_wiki_html_files():
|
||||
"""
|
||||
Acquire wiki html pages from google code
|
||||
Acquire wiki html pages from google code
|
||||
"""
|
||||
# use wiki repo to find html filenames
|
||||
html_urls = dict([(re.sub(r"\.wiki", "", fn), WIKI_ROOT_URL + re.sub(r"\.wiki", "?show=content", fn))
|
||||
for fn in os.listdir(WIKI_DIR) if fn.endswith(".wiki")])
|
||||
for fn in os.listdir(WIKI_DIR) if fn.endswith(".wiki")])
|
||||
|
||||
#html_urls = {"Index":html_urls["Index"]} #SR!
|
||||
|
||||
|
|
@ -102,30 +83,30 @@ def fetch_google_wiki_html_files():
|
|||
f = open(os.path.join(HTML_DIR, "%s.html" % name), 'w')
|
||||
f.write(s)
|
||||
f.close()
|
||||
|
||||
return html_pages
|
||||
|
||||
return html_pages
|
||||
|
||||
def clean_html(htmlstring):
|
||||
"""
|
||||
Clean up html properties special to google code and not known by pandoc
|
||||
Clean up html properties special to google code and not known by pandoc
|
||||
"""
|
||||
# remove wikiheader tag (searches over many lines). Unfortunately python <2.7 don't support
|
||||
# DOTALL flag in re.sub ...
|
||||
# remove wikiheader tag (searches over many lines). Unfortunately python <2.7 don't support
|
||||
# DOTALL flag in re.sub ...
|
||||
matches = re.findall(r'<div id="wikiheader">.*?</div>.*?</div>.*?</div>', htmlstring, re.DOTALL)
|
||||
for match in matches:
|
||||
for match in matches:
|
||||
htmlstring = htmlstring.replace(match, "")
|
||||
#htmlstring = re.sub(r'<div id="wikiheader">.*?</div>.*?</div>.*?</div>', "", htmlstring, re.DOTALL)
|
||||
# remove prefix from urls
|
||||
# remove prefix from urls
|
||||
htmlstring = re.sub('href="' + WIKI_CRUMB_URL, 'href="', htmlstring)
|
||||
# remove #links from headers
|
||||
htmlstring = re.sub(r'(<h[0-9]>.*?)(<a href="#.*?</a>)(.*?</h[0-9]>)', r"\1\3", htmlstring)
|
||||
# remove #links from headers
|
||||
htmlstring = re.sub(r'(<h[0-9]>.*?)(<a href="#.*?</a>)(.*?</h[0-9]>)', r"\1\3", htmlstring)
|
||||
return htmlstring
|
||||
|
||||
def html2rest(name, htmlstring):
|
||||
"""
|
||||
Convert html data to reST with pandoc
|
||||
Convert html data to reST with pandoc
|
||||
"""
|
||||
print "pandoc: Converting %s ..." % name
|
||||
print " pandoc: Converting %s ..." % name
|
||||
p = subprocess.Popen([PANDOC_EXE, '--from=html', '--to=rst', '--reference-links'],
|
||||
stdin=subprocess.PIPE, stdout=subprocess.PIPE)
|
||||
return p.communicate(htmlstring)[0]
|
||||
|
|
@ -133,18 +114,18 @@ def html2rest(name, htmlstring):
|
|||
|
||||
def wiki2rest_ver2():
|
||||
"""
|
||||
Convert Google wiki pages to reST.
|
||||
Convert Google wiki pages to reST.
|
||||
"""
|
||||
# obtain all html data from google code
|
||||
# obtain all html data from google code
|
||||
html_pages = fetch_google_wiki_html_files()
|
||||
|
||||
|
||||
# convert to output files
|
||||
for name, htmldata in html_pages.items():
|
||||
restfilename = os.path.join(REST_DIR, "%s.rst" % name)
|
||||
f = open(restfilename, 'w')
|
||||
f.write(html2rest(name, htmldata))
|
||||
f.close()
|
||||
|
||||
|
||||
|
||||
#------------------------------------------------------------
|
||||
# This converter uses the 3rd party ruby script to convert wiki pages
|
||||
|
|
@ -154,55 +135,74 @@ def wiki2rest_ver2():
|
|||
|
||||
def wiki2rest():
|
||||
"""
|
||||
Convert from wikifile to rst file, going through html
|
||||
"""
|
||||
Convert from wikifile to rst file, going through html
|
||||
"""
|
||||
|
||||
# convert from wikifile to html with wiki2html
|
||||
subprocess.call([RUBY_EXE, "wiki_convertor.rb", WIKI_DIR, HTML_DIR], cwd=WIKI2HTML_DIR)
|
||||
#subprocess.call([RUBY_EXE, "wiki_convertor.rb", WIKI_DIR, HTML_DIR], cwd=WIKI2HTML_DIR)
|
||||
# use google html output directly (really bad)
|
||||
#subprocess.call(["python", "get_wiki_as_html.py"])
|
||||
# use wikify importer
|
||||
print " wikify: converting wiki -> html ..."
|
||||
subprocess.call(["python", "wikify.py", "-e", "-m", "-c", "-a", "-s", "wiki", "-d", "html"])
|
||||
|
||||
# convert from html to rest with pandoc
|
||||
htmlfilenames = [fn for fn in os.listdir(HTML_DIR)
|
||||
htmlfilenames = [fn for fn in os.listdir(HTML_DIR)
|
||||
if fn.endswith(".html") and not re.sub(r".html", "", fn) in NO_CONVERT]
|
||||
|
||||
for filename in htmlfilenames:
|
||||
print " pandoc: converting html -> ReST ..."
|
||||
for filename in htmlfilenames:
|
||||
|
||||
htmlfilename = os.path.join(HTML_DIR, filename)
|
||||
|
||||
# cleanup of code
|
||||
string = "".join(open(htmlfilename, 'r').readlines())
|
||||
# cleanup of code
|
||||
string = "".join(open(htmlfilename, 'r').readlines())
|
||||
string = re.sub(r'<p class="summary">[A-Za-z0-9 .-\:]*</p>', "", string)
|
||||
string = re.sub(r"<wiki:toc max_depth="[0-9]*" />", "", string)
|
||||
string = re.sub(r"<wiki:toc max_depth<h1>"[0-9]*" /></h1>", "", string)
|
||||
string = re.sub(r"<wiki:toc max_depth="[0-9]*" />", "", string)
|
||||
string = re.sub(r"<wiki:toc max_depth<h1>"[0-9]*" /></h1>", "", string)
|
||||
string = re.sub(r"<p>#settings Featured</p>", "", string)
|
||||
string = re.sub(r'<p class="labels">Featured</p>', "", string)
|
||||
string = re.sub(r'<wiki:comment>', "", string)
|
||||
string = re.sub(r'</wiki:comment>', "", string)
|
||||
#string = re.sub(r'<wiki:comment>[<>;a-zA\/\n-&Z0-9 ]*</wiki:comment>', "", string)
|
||||
string = re.sub(r'<wiki:comment>[<>;a-zA\/\n-&Z0-9 ]*</wiki:comment>', "", string)
|
||||
f = open(htmlfilename, 'w')
|
||||
f.write(string)
|
||||
f.close()
|
||||
|
||||
rstfilename = os.path.join(REST_DIR, re.sub(r".html$", ".rst", filename))
|
||||
print "pandoc: converting %s -> %s" % (htmlfilename, rstfilename)
|
||||
#print "pandoc: converting %s -> %s" % (htmlfilename, rstfilename)
|
||||
subprocess.call([PANDOC_EXE, "--from=html", "--to=rst", "-o", rstfilename, htmlfilename])
|
||||
|
||||
|
||||
# main program
|
||||
# main program
|
||||
if __name__ == "__main__":
|
||||
|
||||
print "creating/cleaning output dirs ...",
|
||||
try:
|
||||
wiki2rest()
|
||||
shutil.rmtree(REST_DIR)
|
||||
os.mkdir(REST_DIR)
|
||||
except OSError:
|
||||
os.mkdir(REST_DIR)
|
||||
try:
|
||||
shutil.rmtree(HTML_DIR)
|
||||
os.mkdir(HTML_DIR)
|
||||
except Exception:
|
||||
os.mkdir(HTML_DIR)
|
||||
try:
|
||||
shutil.rmtree(SPHINX_WIKI_DIR)
|
||||
except Exception:
|
||||
# this is created by copy mechanism.
|
||||
pass
|
||||
print "done."
|
||||
print "running conversions ..."
|
||||
|
||||
try:
|
||||
wiki2rest()
|
||||
except Exception, e:
|
||||
print e
|
||||
print "Make sure to read this file's header to make sure everything is correctly set up. "
|
||||
sys.exit()
|
||||
|
||||
import shutil
|
||||
try:
|
||||
shutil.rmtree(SPHINX_WIKI_DIR)
|
||||
print "Deleted old %s." % SPHINX_WIKI_DIR
|
||||
except OSError:
|
||||
pass
|
||||
print "Copying %s -> %s" % (REST_DIR, SPHINX_WIKI_DIR)
|
||||
print "... conversions finished (make sure there are no error messages above)."
|
||||
print "copying rest data to %s ..." % SPHINX_WIKI_DIR
|
||||
shutil.copytree(REST_DIR, SPHINX_WIKI_DIR)
|
||||
|
||||
print "... done. You can now build the docs from the sphinx directory with e.g. 'make html'."
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue