diff options
| author | syuilo <syuilotan@yahoo.co.jp> | 2017-09-07 14:52:07 +0900 |
|---|---|---|
| committer | syuilo <syuilotan@yahoo.co.jp> | 2017-09-07 14:52:07 +0900 |
| commit | 1e3f93d68ee5db3739084039be57279c14d0a62b (patch) | |
| tree | eb4d27a98bd4edd32459f6f95b47b0eeb7206465 /src/tools/analysis/mecab.js | |
| parent | :v: (diff) | |
| download | misskey-1e3f93d68ee5db3739084039be57279c14d0a62b.tar.gz misskey-1e3f93d68ee5db3739084039be57279c14d0a62b.tar.bz2 misskey-1e3f93d68ee5db3739084039be57279c14d0a62b.zip | |
:v:
Diffstat (limited to 'src/tools/analysis/mecab.js')
| -rw-r--r-- | src/tools/analysis/mecab.js | 85 |
1 files changed, 85 insertions, 0 deletions
diff --git a/src/tools/analysis/mecab.js b/src/tools/analysis/mecab.js new file mode 100644 index 0000000000..82f7d6d529 --- /dev/null +++ b/src/tools/analysis/mecab.js @@ -0,0 +1,85 @@ +// Original source code: https://github.com/hecomi/node-mecab-async +// CUSTOMIZED BY SYUILO + +var exec = require('child_process').exec; +var execSync = require('child_process').execSync; +var sq = require('shell-quote'); + +const config = require('../../conf').default; + +// for backward compatibility +var MeCab = function() {}; + +MeCab.prototype = { + command : config.analysis.mecab_command ? config.analysis.mecab_command : 'mecab', + _format: function(arrayResult) { + var result = []; + if (!arrayResult) { return result; } + // Reference: http://mecab.googlecode.com/svn/trunk/mecab/doc/index.html + // 表層形\t品詞,品詞細分類1,品詞細分類2,品詞細分類3,活用形,活用型,原形,読み,発音 + arrayResult.forEach(function(parsed) { + if (parsed.length <= 8) { return; } + result.push({ + kanji : parsed[0], + lexical : parsed[1], + compound : parsed[2], + compound2 : parsed[3], + compound3 : parsed[4], + conjugation : parsed[5], + inflection : parsed[6], + original : parsed[7], + reading : parsed[8], + pronunciation : parsed[9] || '' + }); + }); + return result; + }, + _shellCommand : function(str) { + return sq.quote(['echo', str]) + ' | ' + this.command; + }, + _parseMeCabResult : function(result) { + return result.split('\n').map(function(line) { + return line.replace('\t', ',').split(','); + }); + }, + parse : function(str, callback) { + process.nextTick(function() { // for bug + exec(MeCab._shellCommand(str), function(err, result) { + if (err) { return callback(err); } + callback(err, MeCab._parseMeCabResult(result).slice(0,-2)); + }); + }); + }, + parseSync : function(str) { + var result = execSync(MeCab._shellCommand(str)); + return MeCab._parseMeCabResult(String(result)).slice(0, -2); + }, + parseFormat : function(str, callback) { + MeCab.parse(str, function(err, result) { + if (err) { return callback(err); } + callback(err, MeCab._format(result)); + }); + }, + parseSyncFormat : function(str) { + return MeCab._format(MeCab.parseSync(str)); + }, + _wakatsu : function(arr) { + return arr.map(function(data) { return data[0]; }); + }, + wakachi : function(str, callback) { + MeCab.parse(str, function(err, arr) { + if (err) { return callback(err); } + callback(null, MeCab._wakatsu(arr)); + }); + }, + wakachiSync : function(str) { + var arr = MeCab.parseSync(str); + return MeCab._wakatsu(arr); + } +}; + +for (var x in MeCab.prototype) { + MeCab[x] = MeCab.prototype[x]; +} + +module.exports = MeCab; |