SQLElixir/SQLAlchemy/SQLite3 and ISO-8859 content

See here for the full discussion. The upshot is that, if you are storing string data into an SQLite3 database via Python, and if you suspect that the data may contain non-ASCII characters, you need to ensure that it's properly converted to (Python) Unicode before inserting.

#!/usr/bin/env python
# encoding: utf-8
"""Explores a problem importing a chemical structure file into an
SQLElixir/SQLAlchemy/SQLite3 database.

The file was in SD format. One of the tags contained ISO-8859 characters.
Python read the file contents without trouble; SQLite3 stored the contents.
But on retrieval SQLite3 raised an OperationalError:
Could not decode to UTF-8 column '[...]' with text '[...]'

This script explores the problem, and demonstrates two successful
Unicode encodings: utf-8/replaced and utf-8/ignored.

NB: Only the latter actually worked with SQLElixir:
s = unicode(s, encoding='utf-8', errors='ignore')
import unittest, sqlite3

class TestCase(unittest.TestCase):
    def setUp(self):
        self.conn = sqlite3.connect(":memory:")
        self.conn.cursor().execute('CREATE TABLE demo (value TEXT)')

    def insertAndDump(self, encoding, errors, expectFailure=False):
        caseName = repr([encoding, errors])
        data = 'K\xf6 1366'
            if encoding is not None:
                data = unicode(data, encoding=encoding, errors=errors)
            cursor = self.conn.cursor()
            cursor.execute("INSERT INTO demo (value) VALUES (?)", [data])
            cursor.execute('SELECT * FROM demo')
            self.failUnless(len(cursor.fetchall()) == 1)
            self.failIf(expectFailure, "Unexpected success for %s" % caseName)
        except Exception, info:
            self.failUnless(expectFailure, "Failed %s: %s" % (caseName, info))

    def testCannotRetrieveUnencoded(self):
        self.insertAndDump(None, None, True)

    def testCannotEncodeStrict(self):
        self.insertAndDump("utf-8", "strict", True)

    def testCanRetrieve8859Strict(self):
        self.insertAndDump("8859", "strict")

    def testCanRetrieveUTF8Ignored(self):
        self.insertAndDump("utf-8", "ignore")

    def testCanRetrieveUTF8Replaced(self):
        self.insertAndDump("utf-8", "replace")