summaryrefslogtreecommitdiff
path: root/src/tools/analysis/mecab.js
diff options
context:
space:
mode:
authorsyuilo <syuilotan@yahoo.co.jp>2017-09-07 14:52:07 +0900
committersyuilo <syuilotan@yahoo.co.jp>2017-09-07 14:52:07 +0900
commit1e3f93d68ee5db3739084039be57279c14d0a62b (patch)
treeeb4d27a98bd4edd32459f6f95b47b0eeb7206465 /src/tools/analysis/mecab.js
parent:v: (diff)
downloadmisskey-1e3f93d68ee5db3739084039be57279c14d0a62b.tar.gz
misskey-1e3f93d68ee5db3739084039be57279c14d0a62b.tar.bz2
misskey-1e3f93d68ee5db3739084039be57279c14d0a62b.zip
:v:
Diffstat (limited to 'src/tools/analysis/mecab.js')
-rw-r--r--src/tools/analysis/mecab.js85
1 files changed, 85 insertions, 0 deletions
diff --git a/src/tools/analysis/mecab.js b/src/tools/analysis/mecab.js
new file mode 100644
index 0000000000..82f7d6d529
--- /dev/null
+++ b/src/tools/analysis/mecab.js
@@ -0,0 +1,85 @@
+// Original source code: https://github.com/hecomi/node-mecab-async
+// CUSTOMIZED BY SYUILO
+
+var exec = require('child_process').exec;
+var execSync = require('child_process').execSync;
+var sq = require('shell-quote');
+
+const config = require('../../conf').default;
+
+// for backward compatibility
+var MeCab = function() {};
+
+MeCab.prototype = {
+ command : config.analysis.mecab_command ? config.analysis.mecab_command : 'mecab',
+ _format: function(arrayResult) {
+ var result = [];
+ if (!arrayResult) { return result; }
+ // Reference: http://mecab.googlecode.com/svn/trunk/mecab/doc/index.html
+ // 表層形\t品詞,品詞細分類1,品詞細分類2,品詞細分類3,活用形,活用型,原形,読み,発音
+ arrayResult.forEach(function(parsed) {
+ if (parsed.length <= 8) { return; }
+ result.push({
+ kanji : parsed[0],
+ lexical : parsed[1],
+ compound : parsed[2],
+ compound2 : parsed[3],
+ compound3 : parsed[4],
+ conjugation : parsed[5],
+ inflection : parsed[6],
+ original : parsed[7],
+ reading : parsed[8],
+ pronunciation : parsed[9] || ''
+ });
+ });
+ return result;
+ },
+ _shellCommand : function(str) {
+ return sq.quote(['echo', str]) + ' | ' + this.command;
+ },
+ _parseMeCabResult : function(result) {
+ return result.split('\n').map(function(line) {
+ return line.replace('\t', ',').split(',');
+ });
+ },
+ parse : function(str, callback) {
+ process.nextTick(function() { // for bug
+ exec(MeCab._shellCommand(str), function(err, result) {
+ if (err) { return callback(err); }
+ callback(err, MeCab._parseMeCabResult(result).slice(0,-2));
+ });
+ });
+ },
+ parseSync : function(str) {
+ var result = execSync(MeCab._shellCommand(str));
+ return MeCab._parseMeCabResult(String(result)).slice(0, -2);
+ },
+ parseFormat : function(str, callback) {
+ MeCab.parse(str, function(err, result) {
+ if (err) { return callback(err); }
+ callback(err, MeCab._format(result));
+ });
+ },
+ parseSyncFormat : function(str) {
+ return MeCab._format(MeCab.parseSync(str));
+ },
+ _wakatsu : function(arr) {
+ return arr.map(function(data) { return data[0]; });
+ },
+ wakachi : function(str, callback) {
+ MeCab.parse(str, function(err, arr) {
+ if (err) { return callback(err); }
+ callback(null, MeCab._wakatsu(arr));
+ });
+ },
+ wakachiSync : function(str) {
+ var arr = MeCab.parseSync(str);
+ return MeCab._wakatsu(arr);
+ }
+};
+
+for (var x in MeCab.prototype) {
+ MeCab[x] = MeCab.prototype[x];
+}
+
+module.exports = MeCab;