summaryrefslogtreecommitdiff
path: root/src/tools/analysis/mecab.js
blob: 82f7d6d5297c2407b37de699f8bda52e2ce9c006 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
// Original source code: https://github.com/hecomi/node-mecab-async
// CUSTOMIZED BY SYUILO

var exec     = require('child_process').exec;
var execSync = require('child_process').execSync;
var sq       = require('shell-quote');

const config = require('../../conf').default;

// for backward compatibility
var MeCab = function() {};

MeCab.prototype = {
    command : config.analysis.mecab_command ? config.analysis.mecab_command : 'mecab',
    _format: function(arrayResult) {
        var result = [];
        if (!arrayResult) { return result; }
        // Reference: http://mecab.googlecode.com/svn/trunk/mecab/doc/index.html
        // 表層形\t品詞,品詞細分類1,品詞細分類2,品詞細分類3,活用形,活用型,原形,読み,発音
        arrayResult.forEach(function(parsed) {
            if (parsed.length <= 8) { return; }
            result.push({
                kanji         : parsed[0],
                lexical       : parsed[1],
                compound      : parsed[2],
                compound2     : parsed[3],
                compound3     : parsed[4],
                conjugation   : parsed[5],
                inflection    : parsed[6],
                original      : parsed[7],
                reading       : parsed[8],
                pronunciation : parsed[9] || ''
            });
        });
        return result;
    },
    _shellCommand : function(str) {
        return sq.quote(['echo', str]) + ' | ' + this.command;
    },
    _parseMeCabResult : function(result) {
        return result.split('\n').map(function(line) {
            return line.replace('\t', ',').split(',');
        });
    },
    parse : function(str, callback) {
        process.nextTick(function() { // for bug
            exec(MeCab._shellCommand(str), function(err, result) {
                if (err) { return callback(err); }
                callback(err, MeCab._parseMeCabResult(result).slice(0,-2));
            });
        });
    },
    parseSync : function(str) {
        var result = execSync(MeCab._shellCommand(str));
        return MeCab._parseMeCabResult(String(result)).slice(0, -2);
    },
    parseFormat : function(str, callback) {
        MeCab.parse(str, function(err, result) {
            if (err) { return callback(err); }
            callback(err, MeCab._format(result));
        });
    },
    parseSyncFormat : function(str) {
        return MeCab._format(MeCab.parseSync(str));
    },
    _wakatsu : function(arr) {
        return arr.map(function(data) { return data[0]; });
    },
    wakachi : function(str, callback) {
        MeCab.parse(str, function(err, arr) {
            if (err) { return callback(err); }
            callback(null, MeCab._wakatsu(arr));
        });
    },
    wakachiSync : function(str) {
        var arr = MeCab.parseSync(str);
        return MeCab._wakatsu(arr);
    }
};

for (var x in MeCab.prototype) {
    MeCab[x] = MeCab.prototype[x];
}

module.exports = MeCab;