diff options
Diffstat (limited to 'tools/blog/wp-to-markdown.js')
-rw-r--r-- | tools/blog/wp-to-markdown.js | 189 |
1 files changed, 189 insertions, 0 deletions
diff --git a/tools/blog/wp-to-markdown.js b/tools/blog/wp-to-markdown.js new file mode 100644 index 0000000000..65ec2d5b72 --- /dev/null +++ b/tools/blog/wp-to-markdown.js @@ -0,0 +1,189 @@ +var sax = require('sax'); +var fs = require('fs'); +var parser = sax.parser(false, { lowercase: true }); +var assert = require('assert'); +var mkdirp = require('mkdirp'); +var url = require('url'); + +var input = fs.createReadStream(process.argv[2]); +input.on('data', function(c) { + parser.write(c.toString()); +}); +input.on('end', parser.end.bind(parser)); + +var post = null; +var author = null; +var authors = {}; +mkdirp.sync('out'); + +parser.onopentag = function (tag) { + switch (tag.name) { + case 'wp:author': + assert(author === null); + author = {}; + author.text = ''; + return; + + case 'wp:author_login': + assert(author); + author.field = 'login'; + author.text = ''; + return; + + case 'wp:author_display_name': + assert(author); + author.field = 'name'; + author.text = ''; + return + + case 'wp:author_first_name': + assert(author); + author.field = 'first_name'; + author.text = ''; + return; + + case 'wp:author_last_name': + assert(author); + author.field = 'last_name'; + author.text = ''; + return; + + case 'item': + assert(post === null); + post = {}; + post.text = ''; + return; + + case 'title': + if (post === null) return; + post.field = 'title'; + return + + case 'pubDate': + case 'wp:post_date': + post.field = 'date'; + return; + + case 'dc:creator': + post.field = 'author'; + return; + + case 'wp:status': + post.field = 'status'; + return; + + case 'category': + post.field = 'category'; + return; + + case 'content:encoded': + post.field = 'body'; + return; + + case 'link': + if (post) post.field = 'link'; + return; + + default: + if (post) post.field = null; + if (author) author.field = null; + return; + } +}; + +parser.onclosetag = function (tagName, tag) { + switch (tagName) { + case 'wp:author': + assert(author); + finishAuthor(); + return; + case 'item': + assert(post); + finishPost(); + return; + default: + if (post && post.field || author && author.field) finishField(); + return; + } +}; + +parser.ontext = parser.oncdata = function (text) { + if (author) { + if (author.field) author.text += text; + else author.text = ''; + } else if (post) { + if (post.field) post.text += text; + else post.field = ''; + } +}; + +function finishField() { + if (post && post.field) { + post[post.field] = post.text; + post.field = null; + post.text = ''; + } else if (author && author.field) { + author[author.field] = author.text; + author.field = null; + author.text = ''; + } +} + +function finishPost() { + // don't port drafts. + if (post.status === 'draft') { + return post = null; + } + post.date = new Date(post.date); + + if (post.link) { + post.slug = + url.parse(post.link) + .pathname + .replace(/\/+$/, '') + .split('/') + .pop(); + } + if (!post.slug) { + post.slug = + (post.title + '-' + post.date.toISOString()) + .replace(/[^a-z0-9]+/gi, '-') + .replace(/^-|-$/g, '') + .toLowerCase(); + } + post.slug = post.slug || '-'; + + delete post.text + delete post.link + delete post.field + post.author = authors[post.author] || post.author; + + post.body = post.body || ''; + + // actually write it! + var output = []; + Object.keys(post) + .filter(function (f) { return f !== 'body' }).forEach(function (k) { + output.push(k + ': ' + post[k]); + }) + output = output.join('\n') + '\n\n' + post.body.trim() + '\n'; + + var f = 'out/' + post.category + '/' + post.slug + '.md'; + console.log(f, post.title); + mkdirp.sync('out/' + post.category) + fs.writeFileSync(f, output, 'utf8'); + + post = null; +} + +function finishAuthor () { + author.name = author.name || + (author.first_name + ' ' + author.last_name) || + author.login; + delete author.first_name + delete author.last_name + delete author.text + delete author.field + authors[author.login] = author.name + author = null; +} |