Description
How to debug and fix ZODB database problems in Plone
This document contains information to fix and debug ZODB databases with Plone.
Plone CMS 4.x onwards stores uploaded files and images in ZODB as BLOBs. They exist in var/blobstorage folder structure on the file system, files being named after persistent object ids (non-human-readable). The objects themselves, without file payload, are stored in an append-only database file called filestorage and usually the name of this file is Data.fs.
If you copy Plone site database object data (Data.fs) and forget to copy blobstorage folder, or data gets out of the sync during the copy, various problems appear on the Plone site
Instead, you'll see something like this - an evil POSKeyError exception (Persistent Object Storage):
Traceback (most recent call last):
File "/fast/xxx/eggs/ZODB3-3.10.3-py2.6-macosx-10.6-i386.egg/ZODB/Connection.py", line 860, in setstate
self._setstate(obj)
File "/fast/xxx/eggs/ZODB3-3.10.3-py2.6-macosx-10.6-i386.egg/ZODB/Connection.py", line 922, in _setstate
obj._p_blob_committed = self._storage.loadBlob(obj._p_oid, serial)
File "/fast/xxx/eggs/ZODB3-3.10.3-py2.6-macosx-10.6-i386.egg/ZODB/blob.py", line 644, in loadBlob
raise POSKeyError("No blob file", oid, serial)
POSKeyError: 'No blob file'
The proper solution is to fix this problem is to
However you may have failed. You may have damaged or lost your blobstorage forever. To get the Plone site to a working state all content having bad BLOB data must be deleted (usually meaning losing some of site images and uploaded files).
Below is Python code for Grok view which you can drop in to your own Plone add-on product. It creates an admin view which you can call directly thru URL. This code will walk thru all the content on your Plone site and tries to delete bad content items with BLOBs missing.
The code handles both Archetypes and Dexterity subsystems' content types.
Note
Fixing Dexterity blobs with this code have never been tested - please feel free to update the code in collective.developermanual on GitHub if you find it not working properly.
The code, fixblobs.py:
"""
A Zope command line script to delete content with missing BLOB in Plone, causing
POSKeyErrors when content is being accessed or during portal_catalog rebuild.
Tested on Plone 4.1 + Dexterity 1.1.
http://stackoverflow.com/questions/8655675/cleaning-up-poskeyerror-no-blob-file-content-from-plone-site
Also see:
http://pypi.python.org/pypi/experimental.gracefulblobmissing/
"""
# Zope imports
from ZODB.POSException import POSKeyError
from zope.component import getMultiAdapter
from zope.component import queryUtility
from Products.CMFCore.interfaces import IPropertiesTool
from Products.CMFCore.interfaces import IFolderish, ISiteRoot
# Plone imports
from five import grok
from Products.Archetypes.Field import FileField
from Products.Archetypes.interfaces import IBaseContent
from plone.namedfile.interfaces import INamedFile
from plone.dexterity.content import DexterityContent
def check_at_blobs(context):
""" Archetypes content checker.
Return True if purge needed
"""
if IBaseContent.providedBy(context):
schema = context.Schema()
for field in schema.fields():
id = field.getName()
if isinstance(field, FileField):
try:
field.get_size(context)
except POSKeyError:
print "Found damaged AT FileField %s on %s" % (id, context.absolute_url())
return True
return False
def check_dexterity_blobs(context):
""" Check Dexterity content for damaged blob fields
XXX: NOT TESTED - THEORETICAL, GUIDELINING, IMPLEMENTATION
Return True if purge needed
"""
# Assume dexterity contennt inherits from Item
if isinstance(context, DexterityContent):
# Iterate through all Python object attributes
# XXX: Might be smarter to use zope.schema introspection here?
for key, value in context.__dict__.items():
# Ignore non-contentish attributes to speed up us a bit
if not key.startswith("_"):
if INamedFile.providedBy(value):
try:
value.getSize()
except POSKeyError:
print "Found damaged Dexterity plone.app.NamedFile %s on %s" % (key, context.absolute_url())
return True
return False
def fix_blobs(context):
"""
Iterate through the object variables and see if they are blob fields
and if the field loading fails then poof
"""
if check_at_blobs(context) or check_dexterity_blobs(context):
print "Bad blobs found on %s" % context.absolute_url() + " -> deleting"
parent = context.aq_parent
parent.manage_delObjects([context.getId()])
def recurse(tree):
""" Walk through all the content on a Plone site """
for id, child in tree.contentItems():
fix_blobs(child)
if IFolderish.providedBy(child):
recurse(child)
class FixBlobs(grok.CodeView):
"""
A management view to clean up content with damaged BLOB files
You can call this view by
1) Starting Plone in debug mode (console output available)
2) Visit site.com/@@fix-blobs URL
"""
grok.name("fix-blobs")
grok.context(ISiteRoot)
grok.require("cmf.ManagePortal")
def disable_integrity_check(self):
""" Content HTML may have references to this broken image - we cannot fix that HTML
but link integriry check will yell if we try to delete the bad image.
http://collective-docs.readthedocs.org/en/latest/content/deleting.html#bypassing-link-integrity-check "
"""
ptool = queryUtility(IPropertiesTool)
props = getattr(ptool, 'site_properties', None)
self.old_check = props.getProperty('enable_link_integrity_checks', False)
props.enable_link_integrity_checks = False
def enable_integrity_check(self):
""" """
ptool = queryUtility(IPropertiesTool)
props = getattr(ptool, 'site_properties', None)
props.enable_link_integrity_checks = self.old_check
def render(self):
#plone = getMultiAdapter((self.context, self.request), name="plone_portal_state")
print "Checking blobs"
portal = self.context
self.disable_integrity_check()
recurse(portal)
self.enable_integrity_check()
print "All done"
return "OK - check console for status messages"
More info
Transactions usually are problematic only when many ZEO front-end clients are used.
When the site gets more load, ConflictErrors start to occur. Zope tries to solve the situation by replaying HTTP requests for CoflictErrors and has a default threshold (3) how many times the request is replayed.
More info
ConflictErrors are caused by concurrent transactions trying to write to the same object(s) - usually portal_catalog. They are harmless, but slow down badly coded site. Plone will retry the HTTP request and transaction five times before giving up.
OID is visible in the ConflictError traceback.
You can turn OID back to Python object, as mentioned by A. Jung:
from ZODB.utils import p64
app._p_jar[p64(oid)]
Plone and the underlying Zope handles transactions transparently. Due to magic nature of the transactions you might write accidentally code which turns all transactions to write transactions.
Symptoms
Traceback example:
* Module ZPublisher.Publish, line 202, in publish_module_standard
* Module Products.LinguaPlone.patches, line 67, in new_publish
* Module ZPublisher.Publish, line 170, in publish
* Module Products.LinguaPlone.patches, line 67, in new_publish
* Module ZPublisher.Publish, line 170, in publish
* Module Products.LinguaPlone.patches, line 67, in new_publish
* Module ZPublisher.Publish, line 170, in publish
* Module Products.LinguaPlone.patches, line 67, in new_publish
* Module ZPublisher.Publish, line 157, in publish
* Module plone.app.linkintegrity.monkey, line 15, in zpublisher_exception_hook_wrapper
* Module ZPublisher.Publish, line 125, in publish
* Module Zope2.App.startup, line 238, in commit
* Module transaction._manager, line 96, in commit
* Module transaction._transaction, line 395, in commit
* Module transaction._transaction, line 495, in _commitResources
* Module ZODB.Connection, line 510, in commit
* Module ZODB.Connection, line 547, in _commit
ConflictError: database conflict error (oid 0x2b92, class Products.CMFPlone.PropertiesTool.SimpleItemWithProperties)
Zope 2 hasn't many well-documented ZODB debugging tools. Below is one snippet to examine the contents of the last transactions of offline Data.fs file. It is evolved version of this original script.
Example how to run the script for the last 30 transaction under Zope egg environment using zopepy buildout recipe:
bin/zopepy -n 30 debug.py Data.fs
Code for debug.py:
##############################################################################
#
# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
"""Tool to dump the last few transactions from a FileStorage."""
from ZODB.fstools import prev_txn
from ZODB.serialize import ObjectReader, get_refs
from persistent.TimeStamp import TimeStamp
from ZODB.FileStorage.FileStorage import FileIterator
import cStringIO, cPickle
import optparse, getopt
import sys
class Nonce(object): pass
class Reader(ObjectReader):
def __init__(self):
self.identity = None
def _get_unpickler(self, pickle):
file = cStringIO.StringIO(pickle)
unpickler = cPickle.Unpickler(file)
unpickler.persistent_load = self._persistent_load
def find_global(modulename, name):
self.identity ="%s.%s"%(modulename, name)
return Nonce
unpickler.find_global = find_global
return unpickler
def getIdentity(self, pickle ):
self.identity = None
unpickler = self._get_unpickler( pickle )
unpickler.load()
return self.identity
def getObject(self, pickle):
unpickler = self._get_unpickler( pickle )
ob = unpickler.load()
return ob
def pretty_size( size ):
if size < 1024:
return "%sB"%(size)
kb = size / 1024.0
if kb < 1024.0:
return '%0.1fKb'%kb
else:
mb = kb/1024.0
return '%0.1fMb'%mb
def run(path, ntxn):
f = open(path, "rb")
f.seek(0, 2)
th = prev_txn(f)
for i in range(ntxn):
th = th.prev_txn()
f.close()
reader = Reader()
iterator = FileIterator(path, pos=th._pos)
for i in iterator:
print "Transaction ", TimeStamp(i.tid), i.user, i.description
object_types = {}
for o in i:
ot = reader.getIdentity(o.data)
if ot in object_types:
size, count = object_types[ot]
object_types[ot] = (size+len(o.data), count+1)
else:
object_types[ot] = (len(o.data),1)
ob = cPickle.loads(o.data)
print "Object data for :" + str(o)
# Not sure why some objects are stored as tuple (object, ())
if type(ob) == tuple and len(ob) == 2:
ob = ob[0]
if hasattr(ob, "__dict__"):
for i in ob.__dict__.items():
if not callable(i[1]):
print i
else:
print "can't extract:" + str(ob)
print "-------------------------------------------------------"
keys = object_types.keys()
keys.sort()
for k in keys:
# count, class, size (aggregate)
print " - ", object_types[k][1], k, pretty_size(object_types[k][0])
def main():
ntxn = 20
opts, args = getopt.getopt(sys.argv[1:], "n:")
path, = args
for k, v in opts:
if k == '-n':
ntxn = int(v)
run(path, ntxn)
if __name__ == "__main__":
main()
The source code of this file is hosted on GitHub. Everyone can update and fix errors in this document with few clicks - no downloads needed.
For basic information about updating this manual and Sphinx format please see Writing and updating the manual guide.