Second pass at supporting RST->DB conversions:

- Minor update to rst_template/bk_main.xml to use namespace reference "xl" instead of "xlink" to be compatible with herold output - Create new opf_html2db.py script to convert singlehtml output of projects using herold and then massage into OPF format for inclusion in rst_template format Signed-off-by: Jeff Scheel <scheel@us.ibm.com>
8 years ago · 0751965d7f
parent 51954d8265
commit 0751965d7f
2 changed files with 693 additions and 1 deletions
--- a/rst_template/bk_main.xml
+++ b/rst_template/bk_main.xml
@ -27,7 +27,7 @@

 <book xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
-  xmlns:xlink="http://www.w3.org/1999/xlink"
+  xmlns:xl="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="bk_main">

--- a/rst_template/opf_html2db.py
+++ b/rst_template/opf_html2db.py
@ -0,0 +1,692 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Builds OpenPOWER Foundation documentation using standard template.
+#
+# Assumes rst2db has been used to convert rst to docbook.
+#
+import os, sys, getopt, shutil, errno, subprocess, copy, re
+from os import fdopen, remove
+from shutil import move
+from git import Repo
+from lxml import etree
+from conf import opf_docbook_settings, master_doc, project
+from subprocess import Popen, PIPE
+    
+
+def copy_xml_to_template(src_dir, tgt_dir):
+    # Copy XML files
+    src_files = os.listdir(src_dir)
+    for filename in src_files:
+        full_file = os.path.join (src_dir, filename)
+        if (os.path.isfile(full_file)):
+            shutil.copy(full_file, tgt_dir)
+        elif (os.path.isdir(full_file)):
+            try:
+                os.makedirs(os.path.join(tgt_dir,filename))
+            except OSError as exception:
+                if exception.errno != errno.EEXIST:
+                    raise
+            copy_xml_to_template( os.path.join(src_dir,filename), os.path.join(tgt_dir,filename) )
+
+def update_file(filename, old_str, new_str):    
+    # Verify tag exists
+    with open(filename) as f:
+        s = f.read()
+        if old_str not in s:
+            print 'Error: "{old_str}" not found in {filename}.'.format(**locals())
+            sys.exit(-2)
+
+    # Safely write the changed content, if found in the file
+    with open(filename, 'w') as f:
+        s = s.replace(old_str, new_str)
+        f.write(s)
+
+def traverse_clean_html_source_examples(filename):
+    temp_file = filename + '.tmp'
+    code_found = False
+    html_source_start_regex = '^<div class="highlight-default"><div class="highlight"><pre>'
+    html_source_stop_regex = '^</pre></div>'
+    span_regex = '\<span(\sclass="[a-z]+")?>'
+
+    print filename
+    
+    # Walk file by line
+    with open(temp_file, 'w') as new_file:
+        with open(filename) as old_file:
+            for line in old_file:
+                if re.match(html_source_start_regex,line):
+                    # print 'DEBUG: Code block start found'
+                    code_found = True
+                elif re.match(html_source_stop_regex,line):
+                    # print 'DEBUG: Code block stop found'
+                    code_found = False
+
+                if code_found:
+                    oldline = line
+                    # Remove </span> references
+                    line = line.replace('</span>', '')
+                    # Remove <span class=...> references
+                    line = re.sub(span_regex, '', line)
+                    # print 'DEBUG: line changed.\n  Old: >' + oldline + '<\n  New: >' + line + '<'
+                new_file.write(line)
+
+    # Preserve old file
+    move(filename, filename + '.bak')
+    
+    # Move new file into old
+    move(temp_file, filename)
+
+def traverse_clean_html_nodes(element):
+
+    if 'ul' in element.tag and element.attrib:
+        key = element.attrib.keys()[0]
+        value = element.attrib[key]
+        if 'id' in key:
+            first_child = element.__getitem__(0);
+            if first_child.__len__() == 0:
+                print 'Error: Bad assumption. <ul> tag is empty.'
+                
+            # Add attribute to first_child and remove from element
+            first_child.attrib[ key ] = value;
+            del element.attrib[ key ]
+            
+            # print 'DEBUG: <ul> attributes: ', element.attrib
+            # print 'DEBUG: child attributes: ', first_child.attrib
+            sys.stderr.write( '**Information: id attribute on <ul> tag to first sub-element, <' + element.tag + '> for ' + key + ' = ' + value + '\n' )
+    
+    for child in element.getchildren():
+        traverse_clean_html_nodes(child)
+
+def cleanup_html(infile, outfile):
+ 
+    # Create internal representation of document from infile
+    parser = etree.XMLParser(remove_comments=False)    
+    tree = etree.parse(infile, parser=parser)
+    head = tree.getroot()
+
+    # print_tree( head, 0, 2 )    
+
+    # Walk nodes doing any cleanup
+    traverse_clean_html_nodes(head)
+
+    # Persist updates to output file
+    tree.write(outfile)
+    
+    # Note: This invocation needs to occur post tree-write because
+    #       it will update file
+    traverse_clean_html_source_examples(outfile)
+        
+def find_match(reference, anchor_node, relationship):
+
+    if not anchor_node is None and 'anchor' in anchor_node.tag:
+        # Try this, verify matching ids
+        key = anchor_node.attrib.keys()[0]
+        value = anchor_node.attrib[key]
+        regex = '^' + reference + '(\.\d+)?$'
+
+        # print 'DEBUG: ' + relationship + ' anchor check.  Reference: ' + reference + ' Regex: ' + regex + ' Value: ' + value
+
+        if re.match(regex,value):
+            return anchor_node
+
+        else:
+            # print 'DEBUG: Anchor in ' + relationship + ' tag does not match.  Expected: ', reference, ' Found: ', value, ' Looking further...'
+            node = anchor_node
+            while not node.getprevious() is None:
+                node = node.getprevious()
+                if 'anchor' in node.tag:
+                    key = node.attrib.keys()[0]
+                    value = node.attrib[ key ]
+                    if re.match(regex,value):
+                        # print 'DEBUG: Anchor in ' + relationship + ' tag finally match!!!'
+                        return node
+                    # else
+                        # print 'DEBUG: Anchor in ' + relationship + ' tag does not match.  Expected: ', reference, ' Found: ', value, ' Looking further...'
+
+                else:
+                    # print 'DEBUG: Anchor in ' + relationship + ' tag does not match.  Expected: ', reference, ' Found: ', value, ' Anchor node: ', node
+                    return None
+
+    else:
+        # print 'Error: find_match called with non-anchor element.  Reference: ' + reference + ' Node: ' + anchor_node + ' Relationship: ' + relationship
+        return None
+
+def traverse_clean_links(element):
+
+    if 'link' in element.tag:
+        # Note: Terminal tag, no need to recurse
+        
+        # Gather link details
+        text = element.text
+        num_attributes = element.attrib.__len__()
+        reference = element.attrib.get('linkend',None)
+        
+        if num_attributes is 1 and not reference is None and text == u'¶':
+            # Erroneous link message, find related anchor, could be "uncle" or "cousin" (of various degrees)
+            anchor = None
+            parent = element.getparent()
+            grandparent = parent.getparent()
+            greatuncle = grandparent.getprevious()
+            
+            # Check Great Uncle for match
+            anchor = find_match(reference, greatuncle, 'Great Uncle')
+            
+            # If no match, locate "cousin" and if found, check it
+            if anchor is None:
+                cousin = None
+                if not greatuncle is None:
+                    node = greatuncle
+                    while node.__len__() > 0 and cousin is None:
+                        node = node.__getitem__(node.__len__() -1)
+                        if 'anchor' in node.tag:
+                            cousin = node
+                
+                if not cousin is None:
+                    anchor = find_match(reference, cousin, 'Cousin')
+                
+            # If no match, try uncle
+            if anchor is None:
+                uncle = parent.getprevious()
+                anchor = find_match(reference, uncle, 'Uncle')
+                        
+            # Always delete <link> tag of this type (contains only u'¶' for text)
+            parent.__delitem__(parent.index(element))
+            
+            if not anchor is None:
+                # print 'MATCH FOUND: ', reference
+
+                # Retrieve attribute key from anchor
+                # Note: The <link> key is always correctly set by herold in the case of duplicate keys.  
+                #       The <anchor> tag may have a "dot" and a number appended to value in <link>.
+                key = anchor.attrib.keys()[0]
+                value = anchor.get(key)
+                if 'title' in parent.tag:
+                    # Add id attribute to Grandparent
+                    grandparent.set(key,value)
+                else:
+                    # Add id attribute to Parent
+                    parent.set(key,value)
+                    
+                sys.stderr.write( '**Information: removed dummy link and for ' + reference + ' and added proper xml:id as ' + value + '\n' )
+                    
+                # Delete <anchor> tag
+                anchor_parent = anchor.getparent()
+                anchor_parent.__delitem__(anchor_parent.index(anchor))
+            else:
+                # Nothing more to do
+                sys.stderr.write( '**Information: Matching <anchor> element not found for reference = ' + reference + '.  Link removed.' + '\n' )
+                
+     
+    else: 
+        for child in element.getchildren():
+            traverse_clean_links(child)
+
+def traverse_clean_other(element):
+    if 'informalexample' in element.tag:
+        # Get key elements around this one        
+        parent = element.getparent()
+        grandparent = parent.getparent()
+
+        # Create new elements -- section and title (use text from informal example element)
+        new_section = parent.makeelement(grandparent.tag)
+        new_title = parent.makeelement('title')
+        title = element.text
+        new_title.text = title
+
+        # Add title to new section
+        new_section.append(new_title)
+
+        # Copy over children from <informalexample> to new <section>
+        for child in element.getchildren():
+            element.remove(child)
+            new_section.append(child)
+
+        # print 'DEBUG: old tree...'
+        # print_tree(parent, 0, 2)
+
+        # Add new <section> as next sibling of parent and remove <informalexample> from parent
+        parent.addnext(new_section)
+        parent.remove(element)
+
+        # print 'DEBUG: new tree...'
+        # print_tree(parent.getparent(), 0, 3)
+        
+        sys.stderr.write( '**Information: <informalexample> ' + element.text + ' removed and promoted as <section> with title: ' + title + '\n' )
+
+    elif 'note' in element.tag:
+        # Get key elements around this one        
+        parent = element.getparent()
+        grandparent = parent.getparent()
+
+        # print 'DEBUG: old tree...'
+        # print_tree(parent, 0, 4)
+
+        # Create new elements -- section and title (use text from bridgehead subelement)
+        new_section = parent.makeelement(parent.tag)        
+        bridgehead = element.__getitem__(0).__getitem__(0)
+        
+        if not 'bridgehead' in bridgehead.tag:
+            print 'Error: Bad assumption about <note> structure.  Bridgehead not found as expected.'
+            sys.exit(-20)
+        
+        title = bridgehead.text    
+        new_title = parent.makeelement('title')
+        new_title.text = title
+
+        # Add title to new section
+        new_section.append(new_title)
+        
+        # Remove <bridgehead> from <note>
+        bridgehead.getparent().remove(bridgehead)
+        
+        # Copy over remaining items in <note> to new <section>
+        for child in element.getchildren():
+            element.remove(child)
+            new_section.append(child)
+        
+        # Add new <section> as next sibling of parent and remove <note> from parent
+        parent.addnext(new_section)
+        parent.remove(element)
+
+        # print 'DEBUG: New tree...'
+        # print_tree(grandparent, 0, 3)
+       
+        sys.stderr.write( '**Information: <note> removed and promoted as <section> with title: ' + title + '\n' )
+
+    elif 'anchor' in element.tag:
+        # Get key elements around this one        
+        parent = element.getparent()
+
+        # Retrieve anchor details
+        key = element.attrib.keys()[0]
+        value = element.attrib[ key ]
+
+        # Remove node        
+        parent.remove( element );
+
+        sys.stderr.write( '**Information: removed <anchor> with id: ' + value + '\n' )
+    
+    elif 'section' in element.tag:
+        #Ensure at least one child beyond <title>
+        if element.__len__() == 1:
+            title = element.__getitem__(0).text
+            parent = element.getparent()
+
+            # Make and add empty paragraph to section, just behind title
+            new_para = parent.makeelement('para')
+            new_para.text = '&nbsp;'
+            element.append(new_para)            
+                   
+            sys.stderr.write( '**Information: <para> tag added to empty section with title: ' + title + '\n' )
+
+    for child in element.getchildren():
+        traverse_clean_other(child)
+
+def cleanup_xml(infile, outfile):
+    # Create internal representation of document from infile
+    parser = etree.XMLParser(remove_comments=False)    
+    tree = etree.parse(infile, parser=parser)
+    head = tree.getroot()
+
+    # print_tree( head, 0, 2 )    
+
+    # Note: because link cleanup involves relative location of multiple tags, it must be separate and first
+    traverse_clean_links(head)
+    traverse_clean_other(head)
+
+    # Persist updates to output file
+    tree.write(outfile)
+        
+def print_tree(element, level, max_depth):
+    # Print current element
+    num_children = element.__len__()
+    indent = ' '.ljust(level+1)
+    
+    if level < max_depth:
+        print indent, 'Tag: ', element.tag, ' Attrib: ', element.attrib, ' Text: >', element.text, '< Num children: ', num_children
+        
+        for i in range(num_children):
+            child = element.__getitem__(i)
+            print_tree(child, level+1, max_depth)
+
+def traverse_clean_sections(element):
+    section_blacklist = ['Navigation', 'Table Of Contents']
+
+    # Walk children looking for next set of <section> tags, opening include files if necessary
+    num_children = element.__len__()
+    i = 0;
+    while i < num_children:
+        child = element.__getitem__(i)
+        parent = element
+        
+        # print 'DEBUG: clean sections, visiting node with tag: ' + child.tag
+        
+        # Walk first level of tags, deleting info and any "blacklist" sections
+        if 'section' in child.tag:
+            num_sec_children = child.__len__()
+            
+            title = ''
+            if num_sec_children > 0:
+                first_grandchild = child.__getitem__(0)
+                if first_grandchild.__len__() == 0:
+                    title = child.__getitem__(0).text
+                else:
+                    # This makes me nervous, not sure how well it will work...
+                    title = first_grandchild.__getitem__(0).text
+                # print 'Section title found: ' + title
+            
+            if title in section_blacklist:
+                # Delete section
+                # print 'DEBUG: Deleted blacklist section ' + title
+                parent.remove(child)
+                num_children = num_children-1
+            else:
+                traverse_clean_sections(child)
+                i = i+1
+        else:
+            i=i+1
+    
+def eliminate_top_section(head):
+
+    # Remove <info> and <index> sections
+    for child in head.getchildren():
+        if 'info' in child.tag or 'index' in child.tag:
+            # print 'DEBUG: unneeded top level tag: ' + child.tag
+            head.remove(child)
+    
+    # Eliminate head section which really is title
+    if head.__len__() == 1:
+        first_section = head.__getitem__(0)
+        
+        if not 'section' in first_section.tag:
+            print 'Error: Bad assumption.  Top tag in document is not a section.'
+            sys.exit(-36) 
+                   
+        # print 'DEBUG: first section -- tag: ' + first_section.tag + ' num children: ' + str(first_section.__len__())
+        
+        for child in first_section.getchildren():
+            # print 'DEBUG: child -- tag: ' + child.tag + ' num children: ' + str(child.__len__())
+            
+            # Promote sections
+            if 'section' in child.tag:
+                first_section.remove(child);
+                head.append(child);
+                # print 'DEBUG: Promoting child -- tag: ' + child.tag
+        
+        head.remove(first_section)
+
+    else:
+        print 'Error: Bad assumption.  Too many sections (' + str(head.__len__()) + ') found in base document.'
+        sys.exit(-13)
+
+
+def transform_head_sections(head):
+
+    num_chapter = 0
+    
+    for child in head.getchildren():
+        if 'section' in child.tag:
+            child.tag = child.tag.replace('section','chapter')
+            num_chapter = num_chapter+1
+
+    if num_chapter == 0:
+        print 'Error: No chapters found in document'
+        sys.exit(-6)        
+
+
+def convert_structure(infile, outfile):
+
+    # Create internal representation of document from infile
+    parser = etree.XMLParser(remove_comments=False)    
+    tree = etree.parse(infile, parser=parser)
+    head = tree.getroot()
+
+    # print 'DEBUG: Pre tree structure cleanup...'        
+    # print_tree(head, 0, 3)
+
+    if 'article' in head.tag:
+        head.tag = 'book'
+        
+        # Clear attributes
+        for attrib in head.attrib.keys():
+            head.attrib.pop(attrib, None)
+        if head.attrib.items() != []:
+            print 'Error: Section attributes not removed. ', head.attrib.items(), ' items remain -- ', head.attrib.keys()
+            sys.exit(-5)
+    else:
+        print 'Toc file contains ', head.tag, 'tag, not <article>'
+        sys.exit(-4)
+
+    # Traverse tree sections, removing nodes as needed
+    traverse_clean_sections(head)
+
+    # Eliminate first section, placeholder for document title
+    eliminate_top_section(head)
+        
+    # Traverse remaining top level <section> and convert to <chapter>
+    transform_head_sections(head)
+
+    # print 'DEBUG: Post tree structure cleanup...'        
+    # print_tree(head, 0, 2)
+                        
+    # Persist updates to output file
+    tree.write(outfile)
+
+
+def remove_book_tags(old_file, new_file):
+    with open(old_file, 'r') as input:
+        with open(new_file, 'wb') as output:
+            for line in input:
+                if '<book' not in line and '</book>' not in line:
+                    output.write(line)
+
+def insert_toc_into_book(toc_file, book_file):
+    book_file_bak = book_file+'.bak'
+    shutil.copy2(book_file, book_file_bak)
+    key_string = '<!--TBD-->'
+    inserted_toc = False
+
+    with open(book_file_bak, 'r') as input:
+        with open(book_file, 'wb') as output:
+            for line in input:
+                if key_string not in line:
+                    output.write(line)
+                else:
+                    inserted_toc = True
+                    # Write toc_file contents
+                    with open(toc_file, 'r') as input_toc:
+                        for line_toc in input_toc:
+                            output.write(line_toc)    
+    
+    if not inserted_toc:
+        print 'Error: key string of "', key_string, '" not found in ', book_file
+        sys.exit(-7)
+
+def build_revhistory(book_file):
+    # Variables for formating git log
+    log_format = '%h%x01%an%x01%ad%x01%s%x02'
+    log_fields = ['id', 'author', 'date', 'subject']
+
+    # Retrieve log
+    pipe = Popen('git log --date=iso --format="%s" -- . .' % log_format, shell=True, stdout=PIPE)
+    log, _ = pipe.communicate()
+    
+    # Substitute for problem characters: &, <, >
+    log = log.replace('&','&amp;').replace('<','&lt;').replace('>','&gt;')
+    
+    # Remove newlines, trailing end-of-record (0x02), and then split at end-of-record
+    log = log.replace('\n','').strip('\x02').split('\x02')
+    
+    # Split records into individual fields
+    log = [row.split('\01') for row in log]
+    
+    # Create dictionary using field names
+    log = [dict(zip(log_fields, row)) for row in log]
+
+    # Format log into revision history    
+    revision = '<revhistory>\n'
+    for entry in log:
+        revision = revision + '<revision><date>' + entry['date'].split(' ')[0] + '</date><revdescription><para>' +\
+            entry['subject'] + ' (' + entry['id'] + ')</para></revdescription></revision>\n'
+    revision = revision + '</revhistory>\n'
+
+    # Update file
+    rev_str = '<revhistory>TBD</revhistory>'
+    update_file(book_file, rev_str, revision)
+
+    
+def main(argv):
+    master_git_url = 'https://github.com/OpenPOWERFoundation/Docs-Master.git'    
+    template_git_url = 'https://github.com/OpenPOWERFoundation/Docs-Template.git'    
+    html_dir = ''
+    build_dir = ''
+    db_dir = ''
+    master_dir = ''
+    template_dir = ''
+    toc_file = master_doc+'.xml'
+
+    try:
+        opts, args = getopt.getopt(argv,"hs:b:d:m:t:",["htmldir","builddir=","docbookdir=","masterdir=","templatedir="])
+    except getopt.GetoptError:
+        print 'Invalid option specified.  Usage:'
+        print '    opf_html2db.py -s <htmldir> -b <builddir> -d <docbookdir> -m <masterdir> -t <templatedir>'
+        sys.exit(-1)
+    for opt, arg in opts:
+        if opt == '-h':
+           print 'opf_hmtl2db.py -s <htmldir> -b <builddir> -d <docbookdir> -m <masterdir> -t <templatedir>'
+           sys.exit(0)
+        elif opt in ("-s", "--htmldir"):
+           html_dir = arg
+        elif opt in ("-b", "--builddir"):
+           build_dir = arg
+        elif opt in ("-d", "--docbookdir"):
+           db_dir = arg
+        elif opt in ("-m", "--masterdir"):
+           master_dir = arg
+        elif opt in ("-t", "--templatedir"):
+           template_dir = arg
+
+		# Verify html directory, error if not found
+    if not os.path.exists(html_dir):
+        print 'ERROR: ' + html_dir  + ' does not exist.  Please specify path to directory containing single html file.'
+        sys.exit(-11)
+
+    # Generate path to single file
+    # NOTE: assumption is that file name is always "index.html" (master_doc).  If this doesn't prove true, may need to use variable.
+    html_file_src = os.path.join(html_dir, master_doc + '.html')
+
+    if not os.path.isfile(html_file_src):
+        print 'ERROR: ' + html_file_src  + ' does not exist.  Please verify path to single html file and file name.'
+        sys.exit(-12)
+
+    # Convert html file to xml and place in db directory
+    if not os.path.exists(db_dir):
+        print 'Making docbook build directory ' + db_dir
+        os.path.makedirs(db_dir)
+
+    db_file = os.path.join(db_dir, project + '.xml')    
+    if os.path.exists(db_file):
+        os.remove(db_file)
+
+    # Clean up herold html output
+    print 'Cleaning up html file before processing'
+    html_file = os.path.join(db_dir, master_doc + '.html')
+    html_file_tmp1 = html_file + '.tmp1'
+    shutil.copy2(html_file_src, html_file)
+    cleanup_html(html_file, html_file_tmp1)
+
+    print 'Converting html file to XML...'        
+    print subprocess.check_output(['herold', '-i', html_file_tmp1, '-o', db_file])
+    
+    # Clone a new Master Directory
+    print 'Cloning new Docs-Master directory...'
+    if os.path.exists(master_dir):
+        shutil.rmtree(master_dir)
+    Repo.clone_from(master_git_url, master_dir)
+    
+    # Clone a new Template Directory
+    print 'Cloning new Docs-Template directory...'
+    if os.path.exists(template_dir):
+        shutil.rmtree(template_dir)
+    Repo.clone_from(template_git_url, template_dir)
+    
+    # Create the new XML file  *****
+    rst_template_dir = os.path.join(template_dir, 'rst_template') 
+    full_toc_file = os.path.join(rst_template_dir,  toc_file)
+    shutil.copy2(db_file, full_toc_file)
+    book_file = os.path.join(rst_template_dir,  'bk_main.xml')
+    
+    # Update all file in opf_docbook_settings with tag/value combinations specified
+    print 'Updating Docbook files with settings from conf.py...'
+    for f in opf_docbook_settings.keys():
+        filename = os.path.join(rst_template_dir, f)
+        tags = opf_docbook_settings[f]
+
+        for tag in tags:
+          value = opf_docbook_settings[f][tag]
+          
+          if value != '':
+              new_str = '<'+tag+'>'+value+'</'+tag+'>'
+          else:
+              new_str = ''
+
+          old_str = '<'+tag+'>TBD</'+tag+'>'
+          update_file(filename, old_str, new_str)
+    
+    # Parse TOC file, convert high level tag to "book" and write back out to .tmp1 file
+    print 'Cleaning up Docbook file structure...'
+    full_toc_file_tmp1 = full_toc_file+'.tmp1'  
+    full_toc_file_tmp2 = full_toc_file+'.tmp2'  
+    full_toc_file_tmp3 = full_toc_file+'.tmp3'  
+
+    # Walk document correcting XML errors
+    cleanup_xml( full_toc_file, full_toc_file_tmp1 )
+    
+    # Remove extraneous sections
+    convert_structure( full_toc_file_tmp1, full_toc_file_tmp2 )
+    
+    # Eliminate <book> and <title> tags in .tmp1 and write to .tmp2 file
+    remove_book_tags(full_toc_file_tmp2, full_toc_file_tmp3)
+
+    # Update link to first file
+    insert_toc_into_book(full_toc_file_tmp3, book_file)
+    
+    # Create revision history from Git Log
+    print 'Building document revision history from git log...'
+    build_revhistory(book_file)
+
+    # TODO: Remove this hack after rst_template bk_main gets updated
+    update_file(book_file, 'xmlns:xlink', 'xmlns:xl')
+                
+    # Perform build of Docbook
+    print 'Building Docbook PDF and HTML output in Maven...'
+    maven_log_file = 'build.log'
+    maven_build = 'cd ' + rst_template_dir + '; mvn generate-sources 2>&1 | tee ' + maven_log_file + ''
+    pipe = Popen(maven_build, shell=True)
+    log, err = pipe.communicate()
+    
+    if pipe.returncode != 0:
+        print "Build failed with return code:%s" % pipe.returncode
+        print "See %s/build.log for more details" & rst_template_dir
+    
+    # Copy output to better location
+    print 'Copying build output...'
+    bld_out_dir = os.path.join(rst_template_dir, 'target/docbkx/webhelp')
+    html_head = os.path.join(bld_out_dir, opf_docbook_settings['pom.xml']['webhelpDirname'] + '/index.html')
+    if os.path.exists(bld_out_dir) and os.path.exists(html_head):
+        doc_dir = os.path.join(build_dir, 'docbook/opf_docbook')
+        
+        if os.path.exists(doc_dir):
+            shutil.rmtree(doc_dir)
+        shutil.copytree(bld_out_dir, doc_dir)
+        print "Build successful.  Output files located in %s" % os.path.join(doc_dir, opf_docbook_settings['pom.xml']['webhelpDirname'])
+       
+        sys.exit(0)
+
+    else:
+        print "Docbook build failed.  Check logfile %s for details." % os.path.join(rst_template_dir, maven_log_file)
+        sys.exit(-10)
+
+if __name__ == "__main__":
+   main(sys.argv[1:])