From 62b0a0cdd183c281ed6a914dd1ed4a5f264100d7 Mon Sep 17 00:00:00 2001 From: didi <_git@d10r.net> Date: Thu, 26 Jan 2017 15:40:39 +0100 Subject: [PATCH] initial commit --- main.js | 281 +++++++++++++++++++++++++++++++++++++++++++++++++++ package.json | 9 ++ 2 files changed, 290 insertions(+) create mode 100644 main.js create mode 100644 package.json diff --git a/main.js b/main.js new file mode 100644 index 0000000..24ac679 --- /dev/null +++ b/main.js @@ -0,0 +1,281 @@ +/* + * source: + * 1: https://gist.github.com/yangchenyun/a1c123935d82f5e25d57 + * 2: https://gist.github.com/EelMood/84140e557065ac3d73f669f120429ae1 +*/ + + + +/* + * @fileoverview Program to free the content in kindle books as plain HTML. + * + * This is largely based on reverse engineering kindle cloud app + * (https://read.amazon.com) to read book data from webSQL. + * + * Access to kindle library is required to download this book. + */ + +// The Kindle Compression Module copied from http://read.amazon.com application +// The script reuses the same logic to decompress the fragments +var KindleCompression = function() { + function h(a, c, g) { + var f, g = g > 0 ? g : b; + for (f in c) + c[f] >= g && (g = c[f] + 1); + f = g; + for (var d in a) + for (var g = a[d], e = 2; e <= g.length; e++) { + var h = g.substr(0, e); + c.hasOwnProperty(h) || (c[h] = f++) + } + return c + } + function j(c, e) { + var l, h = b; + l = ""; + for (var r = 0; r < e.length; ) { + var o = e.charAt(r); + r++; + o.charCodeAt(0) <= d ? c.hasOwnProperty(l + o) ? l += o : (l.length > 0 && h < f && (c[l + o] = h, + h++, + h === a && (h = g)), + l = o) : l = "" + } + return c + } + // This is called when lzExpandWithStaticDictionary doesn't have a dictionary passed in + // in our case, we always construct the dictionary before decompress + // function e() { + // if (defaultDictionary === void 0 || defaultDictionary === {}) + // defaultDictionary = j({}, defaultDictionaryString); + // return defaultDictionary + // } + var d = 9983 + , c = d + 1 + , b = c + 100 + 1 + , f = 65533 + , a = 55295 + , g = 57344; + return { + lzCompress: function(k) { + var e = {}, l = [], h, r = b, o, q, s; + o = h = ""; + for (var t = 0; t < k.length; ) { + var u = k.charAt(t); + t++; + if (u.charCodeAt(0) <= d) { + for (; o.length > 0; ) { + q = Math.min(100, o.length); + s = o.substr(0, q); + o = o.substr(q); + l.push(c + q); + for (q = 0; q < s.length; q++) + l.push(s.charCodeAt(q)) + } + e.hasOwnProperty(h + u) ? h += u : (h.length > 0 && (l.push(h.length === 1 ? h.charCodeAt(0) : e[h]), + r < f && (e[h + u] = r, + r++, + r === a && (r = g))), + h = u) + } else + h.length > 0 && (l.push(h.length === 1 ? + h.charCodeAt(0) : e[h]), + h = ""), + o += u + } + for (h.length > 0 && l.push(h.length === 1 ? h.charCodeAt(0) : e[h]); o.length > 0; ) { + q = Math.min(100, o.length); + s = o.substr(0, q); + o = o.substr(q); + l.push(c + q); + for (q = 0; q < s.length; q++) + l.push(s.charCodeAt(q)) + } + for (i = 0; i < l.length; i++) + l[i] = String.fromCharCode(l[i]); + return l.join("") + }, + lzExpand: function(k) { + for (var e = {}, l = [], h, r = b, o = "", q, s = 0; s < k.length; ) { + h = k.charCodeAt(s); + s++; + if (h <= d) + h = String.fromCharCode(h); + else if (h >= b) + (h = e[h]) || (h = o + q); + else { + o = h - c; + l.push(k.substr(s, o)); + s += o; + o = ""; + continue + } + l.push(h); + q = h.charAt(0); + r < f && o.length > 0 && (e[r] = o + q, + r++, + r === a && (r = g)); + o = h + } + return l.join("") + }, + lzBuildDictionary: j, + lzGetDecompressionDictionary: function(a) { + var b = [], c; + for (c in a) + b[a[c]] = c; + return b + }, + lzAddStringsToDictionary: h, + lzAddNumbersToDictionary: function(a, b) { + for (var c = [], g = 100; g < 1E3; g++) + c.push("" + g); + return h(c, a, b) + }, + lzCompressWithStaticDictionary: function(a, b) { + if (b === void 0 || b === {}) + b = e(); + var g = [], f, h, o, q; + h = f = ""; + for (var s = 0; s < a.length; ) { + var t = a.charAt(s); + s++; + if (t.charCodeAt(0) <= d) { + for (; h.length > 0; ) { + o = Math.min(100, + h.length); + q = h.substr(0, o); + h = h.substr(o); + g.push(c + o); + for (o = 0; o < q.length; o++) + g.push(q.charCodeAt(o)) + } + b.hasOwnProperty(f + t) ? f += t : (f.length > 0 && g.push(f.length === 1 ? f.charCodeAt(0) : b[f]), + f = t) + } else + f.length > 0 && (g.push(f.length === 1 ? f.charCodeAt(0) : b[f]), + f = ""), + h += t + } + for (f.length > 0 && g.push(f.length === 1 ? f.charCodeAt(0) : b[f]); h.length > 0; ) { + o = Math.min(100, h.length); + q = h.substr(0, o); + h = h.substr(o); + g.push(c + o); + for (o = 0; o < q.length; o++) + g.push(q.charCodeAt(o)) + } + for (i = 0; i < g.length; i++) + g[i] = String.fromCharCode(g[i]); + return g.join("") + }, + lzExpandWithStaticDictionary: function(a, g, f) { + // NOTE: g is always defined in our case + // if (g === void 0 || g === []) { + // if (defaultDeDictionary === void 0 || defaultDeDictionary === []) { + // e(); + // defaultDeDictionary = []; + // for (var h in defaultDictionary) + // defaultDeDictionary[defaultDictionary[h]] = h + // } + // g = defaultDeDictionary + // } + h = d; + var r = b; + f !== void 0 && (h = f - 1, + r = f); + for (var f = [], o = 0; o < a.length; ) { + var q = a.charCodeAt(o); + o++; + q <= h ? f.push(String.fromCharCode(q)) : q >= r ? f.push(g[q]) : (q -= c, + f.push(a.substr(o, q)), + o += q) + } + return f.join("") + } + } +}() + +function s(metadata) { // a is bookinfo.metadata + var b = {}; + + if (metadata.cpr !== void 0) { + KindleCompression.lzAddStringsToDictionary(metadata.cpr, b), + KindleCompression.lzAddNumbersToDictionary(b); + return KindleCompression.lzGetDecompressionDictionary(b); + } + + if (metadata.cprJson !== void 0) { + KindleCompression.lzAddStringsToDictionary(metadata.cprJson, b, 256), + KindleCompression.lzAddNumbersToDictionary(b, 256); + return KindleCompression.lzGetDecompressionDictionary(b); + } +} + +var os = require('osenv'); +var fs = require('fs'); +var path = require('path'); +var sqlite3 = require('sqlite3').verbose(); +// + +// http://read.amazon.com stores the ebook with webSQL, which is a sqlite accessible in Chrome +// in this case, kindle cloud reader : +// => was opened with google-chrome +// => the os is linux +// => and i used the french app : hence the "lire.amazon.fr" +// the english version is alon "read.amazon.com" +var KINDLE_DB = os.home() + '/.config/google-chrome/Default/databases/https_lire.amazon.fr_0/2'; +var db = new sqlite3.Database(KINDLE_DB); + +// regex to locate and replace javascript fragments in the generated html +var rawGotoRegex = //g; +var rawDestRegex = //g; +var modifiedGoto = ''; +var modifiedDest = ''; + +// The following hack is from reverse engineering how kindle cloud app reads data +db.all("select metadata from 'bookinfo'", function(err, rows) { + rows.forEach(function (row) { + var metadata = JSON.parse(row.metadata); + var title = metadata.title; + var authors = metadata.authorList.join(','); + // used for dictionary request at https://read.amazon.com/dict/getDefinition?asin=&word= + var asin = metadata.asin; + var ca = s(metadata); + + console.log('staring process book: ' + title); + + var HtmlHeader = '' + + '' + + '' + + ''; + var HtmlFile = path.join(os.tmpdir(), title.replace(/\s+/g, '-') + '.html'); + + fs.writeFile(HtmlFile, HtmlHeader); + console.log("created the file with HTML headers."); + + db.all("select id, piece, other from 'fragments' where asin='" + asin + "' order by id", function(err, rows) { + rows.forEach(function (row) { + var id = row.id; + var compressedFragmentData = row.piece; + var uncompressedFragmentData; + var imageDataMap = JSON.parse(row.other).imageData || {}; + uncompressedFragmentData = KindleCompression.lzExpandWithStaticDictionary( + row.piece, ca); + // replace the javascript links with hard coded html anchors + uncompressedFragmentData = uncompressedFragmentData.replace(rawGotoRegex, modifiedGoto); + uncompressedFragmentData = uncompressedFragmentData.replace(rawDestRegex, modifiedDest); + // replace image path with base64 encoded string + for (var image in imageDataMap) { + uncompressedFragmentData = uncompressedFragmentData.replace( + 'dataUrl="' + image + '"', + 'src="' + imageDataMap[image] + '"'); + } + fs.appendFile(HtmlFile, uncompressedFragmentData); + }); + }); + + fs.appendFile(HtmlFile, ''); + console.log("created the file at: " + HtmlFile); + }); +}); diff --git a/package.json b/package.json new file mode 100644 index 0000000..54e070a --- /dev/null +++ b/package.json @@ -0,0 +1,9 @@ +{ + "name": "kindle-fetch", + "description": "todo", + "main": "main.js", + "dependencies": { + "osenv": "^0.1.4", + "sqlite3": "^3.1.8" + } +}