Description
How to programmatically rewrite HTML in Plone.
It is recommended to use the lxml library for all HTML DOM manipulation in Python.
Plone is no exception.
Below is an example which:
This is suitable for e.g. printing the whole folder in one pass.
help.py:
from lxml import etree
from StringIO import StringIO
import urlparse
from lxml import html
import zope.interface
from five import grok
from Products.CMFCore.interfaces import IFolderish
grok.templatedir("templates")
def fix_links(content, absolute_prefix):
"""
Rewrite relative links to be absolute links based on certain URL.
@param html: HTML snippet as a string
"""
if type(content) == str:
content = content.decode("utf-8")
parser = etree.HTMLParser()
content = content.strip()
tree = html.fragment_fromstring(content, create_parent=True)
def join(base, url):
"""
Join relative URL
"""
if not (url.startswith("/") or "://" in url):
return urlparse.urljoin(base, url)
else:
# Already absolute
return url
for node in tree.xpath('//*[@src]'):
url = node.get('src')
url = join(absolute_prefix, url)
node.set('src', url)
for node in tree.xpath('//*[@href]'):
href = node.get('href')
url = join(absolute_prefix, href)
node.set('href', url)
data = etree.tostring(tree, pretty_print=False, encoding="utf-8")
return data
def remove_bad_tags(content):
""" Filter out HTML nodes which would prevent continous printing """
if type(content) == str:
content = content.decode("utf-8")
tree = html.fragment_fromstring(content, create_parent=True)
# Title tag in the middle of page causes Firefox to choke and
# aborts page rendering
for node in tree.xpath('//title'):
node.getparent().remove(node)
data = etree.tostring(tree, pretty_print=False, encoding="utf-8")
return data
class Help(grok.View):
""" Render all folder pages and subpages as continuous printable document """
# Available on any folder
grok.context(IFolderish)
def update(self):
objects = []
# Walk through all objects recursively
def walk(folder, level):
for id, object in folder.contentItems():
if object.portal_type == "Image":
continue
# Output pages which have text payload
if hasattr(object, "getText"):
text = object.getText()
else:
text = ""
objects.append({
"object":object,
"level":level,
# We need to re-map relative links or
# they are incorrect in rendered HTML output
"text" : remove_bad_tags(fix_links(text, object.absolute_url()))
})
if object.portal_type == "Folder":
walk(object,level+1)
walk(self.context, 1)
self.objects = objects
help.pt
<html xmlns="http://www.w3.org/1999/xhtml"
xmlns:tal="http://xml.zope.org/namespaces/tal"
xmlns:metal="http://xml.zope.org/namespaces/metal"
xmlns:i18n="http://xml.zope.org/namespaces/i18n"
metal:use-macro="context/main_template/macros/master">
<body>
<metal:slot metal:fill-slot="content-title" i18n:domain="cmf_default">
<h1>Site help</h1>
<p class="discreet">
Printable versions
</p>
</metal:slot>
<metal:block fill-slot="top_slot" tal:define="dummy python:request.set('disable_border',1)" />
<metal:slot metal:fill-slot="content-core" i18n:domain="cmf_default">
<div class="help-all">
<tal:rep repeat="page view/objects">
<tal:def define="body page/text|nothing;title page/object/Title;level page/level">
<div tal:condition="python:level==1" style="page-break-before:always"><!-- --></div>
<h1 tal:condition="python:level==1" tal:content="title" />
<h2 tal:condition="python:level==2" tal:content="title" />
<h3 tal:condition="python:level>2" tal:content="title" />
<div class="help-body">
<tal:body tal:replace="structure body" />
</div>
<div style="clear: both"><!-- --></div>
</tal:def>
</tal:rep>
</div>
</metal:slot>
</body>
</html>
The source code of this file is hosted on GitHub. Everyone can update and fix errors in this document with few clicks - no downloads needed.
For basic information about updating this manual and Sphinx format please see Writing and updating the manual guide.