Skip to content

Commit 12d7ef4

Browse files
committed
(#2890) - websql: avoid hex() for binaries
1 parent f644794 commit 12d7ef4

File tree

3 files changed

+158
-27
lines changed

3 files changed

+158
-27
lines changed

lib/adapters/websql.js

Lines changed: 47 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,31 @@ var utils = require('../utils');
44
var merge = require('../merge');
55
var errors = require('../deps/errors');
66
var vuvuzela = require('vuvuzela');
7+
var parseHexString = require('../deps/parse-hex');
8+
79
function quote(str) {
810
return "'" + str + "'";
911
}
1012

13+
// escapeBlob and unescapeBlob are workarounds for a websql bug:
14+
// https://code.google.com/p/chromium/issues/detail?id=422690
15+
// https://bugs.webkit.org/show_bug.cgi?id=137637
16+
// The goal is to never actually insert the \u0000 character
17+
// in the database.
18+
function escapeBlob(str) {
19+
return str
20+
.replace(/\u0002/g, '\u0002\u0002')
21+
.replace(/\u0001/g, '\u0001\u0002')
22+
.replace(/\u0000/g, '\u0001\u0001');
23+
}
24+
25+
function unescapeBlob(str) {
26+
return str
27+
.replace(/\u0001\u0001/g, '\u0000')
28+
.replace(/\u0001\u0002/g, '\u0001')
29+
.replace(/\u0002\u0002/g, '\u0002');
30+
}
31+
1132
var cachedDatabases = {};
1233

1334
var openDB = utils.getArguments(function (args) {
@@ -31,7 +52,7 @@ var openDB = utils.getArguments(function (args) {
3152
});
3253

3354
var POUCH_VERSION = 1;
34-
var ADAPTER_VERSION = 5; // used to manage migrations
55+
var ADAPTER_VERSION = 6; // used to manage migrations
3556

3657
// The object stores created for each database
3758
// DOC_STORE stores the document meta data, its revision history and state
@@ -92,22 +113,6 @@ function unknownError(callback) {
92113
callback(errors.error(errors.WSQ_ERROR, errorReason, errorName));
93114
};
94115
}
95-
function decodeUtf8(str) {
96-
return decodeURIComponent(window.escape(str));
97-
}
98-
function parseHexString(str, encoding) {
99-
var result = '';
100-
var charWidth = encoding === 'UTF-8' ? 2 : 4;
101-
for (var i = 0, len = str.length; i < len; i += charWidth) {
102-
var substring = str.substring(i, i + charWidth);
103-
if (charWidth === 4) { // UTF-16, twiddle the bits
104-
substring = substring.substring(2, 4) + substring.substring(0, 2);
105-
}
106-
result += String.fromCharCode(parseInt(substring, 16));
107-
}
108-
result = encoding === 'UTF-8' ? decodeUtf8(result) : result;
109-
return result;
110-
}
111116

112117
function stringifyDoc(doc) {
113118
// don't bother storing the id/rev. it uses lots of space,
@@ -137,7 +142,7 @@ function getSize(opts) {
137142
// honest-to-god ceiling for data, so we need to
138143
// set it to a decently high number.
139144
var isAndroid = /Android/.test(window.navigator.userAgent);
140-
return isAndroid ? 5000000 : 1;
145+
return isAndroid ? 5000000 : 1; // in PhantomJS, if you use 0 it will crash
141146
}
142147

143148
function WebSqlPouch(opts, callback) {
@@ -377,6 +382,15 @@ function WebSqlPouch(opts, callback) {
377382
});
378383
});
379384
}
385+
386+
// in this migration, we use escapeBlob() and unescapeBlob()
387+
// instead of reading out the binary as HEX, which is slow
388+
function runMigration6(tx, callback) {
389+
var sql = 'ALTER TABLE ' + ATTACH_STORE +
390+
' ADD COLUMN escaped TINYINT(1) DEFAULT 0';
391+
tx.executeSql(sql, [], callback);
392+
}
393+
380394
function checkEncoding(tx, cb) {
381395
// UTF-8 on chrome/android, UTF-16 on safari < 7.1
382396
tx.executeSql('SELECT HEX("a") AS hex', [], function (tx, res) {
@@ -401,7 +415,7 @@ function WebSqlPouch(opts, callback) {
401415
var meta = 'CREATE TABLE IF NOT EXISTS ' + META_STORE +
402416
' (update_seq INTEGER, dbid, db_version INTEGER)';
403417
var attach = 'CREATE TABLE IF NOT EXISTS ' + ATTACH_STORE +
404-
' (digest UNIQUE, body BLOB)';
418+
' (digest UNIQUE, escaped TINYINT(1), body BLOB)';
405419
var attachAndRev = 'CREATE TABLE IF NOT EXISTS ' +
406420
ATTACH_AND_SEQ_STORE + ' (digest, seq INTEGER)';
407421
var doc = 'CREATE TABLE IF NOT EXISTS ' + DOC_STORE +
@@ -460,6 +474,7 @@ function WebSqlPouch(opts, callback) {
460474
runMigration3,
461475
runMigration4,
462476
runMigration5,
477+
runMigration6,
463478
setupDone
464479
];
465480

@@ -1001,8 +1016,9 @@ function WebSqlPouch(opts, callback) {
10011016
// we could just insert before selecting and catch the error,
10021017
// but my hunch is that it's cheaper not to serialize the blob
10031018
// from JS to C if we don't have to (TODO: confirm this)
1004-
sql = 'INSERT INTO ' + ATTACH_STORE + '(digest, body) VALUES (?,?)';
1005-
tx.executeSql(sql, [digest, data], function () {
1019+
sql = 'INSERT INTO ' + ATTACH_STORE +
1020+
' (digest, body, escaped) VALUES (?,?,1)';
1021+
tx.executeSql(sql, [digest, escapeBlob(data)], function () {
10061022
callback();
10071023
}, function () {
10081024
// ignore constaint errors, means it already exists
@@ -1304,13 +1320,17 @@ function WebSqlPouch(opts, callback) {
13041320
var tx = opts.ctx;
13051321
var digest = attachment.digest;
13061322
var type = attachment.content_type;
1307-
var sql = 'SELECT hex(body) as body FROM ' + ATTACH_STORE +
1308-
' WHERE digest=?';
1323+
var sql = 'SELECT escaped, ' +
1324+
'CASE WHEN escaped = 1 THEN body ELSE HEX(body) END AS body FROM ' +
1325+
ATTACH_STORE + ' WHERE digest=?';
13091326
tx.executeSql(sql, [digest], function (tx, result) {
1310-
// sqlite normally stores data as utf8, so even the hex() function
1311-
// "encodes" the binary data in utf8/16 before returning it. yet hex()
1312-
// is the only way to get the full data, so we do this.
1313-
var data = parseHexString(result.rows.item(0).body, encoding);
1327+
// websql has a bug where \u0000 causes early truncation in strings
1328+
// and blobs. to work around this, we used to use the hex() function,
1329+
// but that's not performant. after migration 6, we remove \u0000
1330+
// and add it back in afterwards
1331+
var item = result.rows.item(0);
1332+
var data = item.escaped ? unescapeBlob(item.body) :
1333+
parseHexString(item.body, encoding);
13141334
if (opts.encode) {
13151335
res = btoa(data);
13161336
} else {

lib/deps/parse-hex.js

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
'use strict';
2+
3+
//
4+
// Parsing hex strings. Yeah.
5+
//
6+
// So basically we need this because of a bug in WebSQL:
7+
// https://code.google.com/p/chromium/issues/detail?id=422690
8+
// https://bugs.webkit.org/show_bug.cgi?id=137637
9+
//
10+
// UTF-8 and UTF-16 are provided as separate functions
11+
// for meager performance improvements
12+
//
13+
14+
function decodeUtf8(str) {
15+
return decodeURIComponent(window.escape(str));
16+
}
17+
18+
function hexToInt(charCode) {
19+
// '0'-'9' is 48-57
20+
// 'A'-'F' is 65-70
21+
// SQLite will only give us uppercase hex
22+
return charCode < 65 ? (charCode - 48) : (charCode - 55);
23+
}
24+
25+
26+
// Example:
27+
// pragma encoding=utf8;
28+
// select hex('A');
29+
// returns '41'
30+
function parseHexUtf8(str, start, end) {
31+
var result = '';
32+
while (start < end) {
33+
result += String.fromCharCode(
34+
(hexToInt(str.charCodeAt(start++)) << 4) |
35+
hexToInt(str.charCodeAt(start++)));
36+
}
37+
return result;
38+
}
39+
40+
// Example:
41+
// pragma encoding=utf16;
42+
// select hex('A');
43+
// returns '4100'
44+
// notice that the 00 comes after the 41 (i.e. it's swizzled)
45+
function parseHexUtf16(str, start, end) {
46+
var result = '';
47+
while (start < end) {
48+
// UTF-16, so swizzle the bytes
49+
result += String.fromCharCode(
50+
(hexToInt(str.charCodeAt(start + 2)) << 12) |
51+
(hexToInt(str.charCodeAt(start + 3)) << 8) |
52+
(hexToInt(str.charCodeAt(start)) << 4) |
53+
hexToInt(str.charCodeAt(start + 1)));
54+
start += 4;
55+
}
56+
return result;
57+
}
58+
59+
function parseHexString(str, encoding) {
60+
if (encoding === 'UTF-8') {
61+
return decodeUtf8(parseHexUtf8(str, 0, str.length));
62+
} else {
63+
return parseHexUtf16(str, 0, str.length);
64+
}
65+
}
66+
67+
module.exports = parseHexString;

tests/browser.migration.js

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -831,6 +831,50 @@ describe('migration', function () {
831831
});
832832
});
833833
});
834+
835+
it('#2890 PNG content after migration', function () {
836+
if (skip) { return; }
837+
838+
var oldPouch = new dbs.first.pouch(
839+
dbs.first.local, dbs.first.localOpts);
840+
841+
var transparent1x1Png = 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HA' +
842+
'wCAAAAC0lEQVR4nGP6zwAAAgcBApocMXEA' +
843+
'AAAASUVORK5CYII=';
844+
var black1x1Png =
845+
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAAAAAA6fptVAAAACkl' +
846+
'EQVR4nGNiAAAABgADNjd8qAAA' +
847+
'AABJRU5ErkJggg==';
848+
849+
return oldPouch.put({_id: 'foo'}).then(function (res) {
850+
return oldPouch.putAttachment('foo', 'att', res.rev,
851+
transparent1x1Png, 'image/png');
852+
}).then(function () {
853+
return oldPouch.get('foo', {attachments: true});
854+
}).then(function (doc) {
855+
doc._attachments['att'].content_type.should.equal('image/png');
856+
should.exist(doc._attachments['att'].data);
857+
return oldPouch.get('foo');
858+
}).then(function (doc) {
859+
return oldPouch.put(doc);
860+
}).then(function () {
861+
var newPouch = new dbs.second.pouch(dbs.second.local,
862+
{auto_compaction: false});
863+
return newPouch.compact().then(function () {
864+
return newPouch.get('foo', {attachments: true});
865+
}).then(function (doc) {
866+
doc._attachments['att'].content_type.should.equal('image/png');
867+
doc._attachments['att'].data.should.equal(transparent1x1Png);
868+
return newPouch.putAttachment('bar', 'att', null,
869+
black1x1Png, 'image/png');
870+
}).then(function () {
871+
return newPouch.get('bar', {attachments: true});
872+
}).then(function (doc) {
873+
doc._attachments['att'].content_type.should.equal('image/png');
874+
doc._attachments['att'].data.should.equal(black1x1Png);
875+
});
876+
});
877+
});
834878
}
835879
});
836880
});

0 commit comments

Comments
 (0)