diff options
| author | syuilo <syuilotan@yahoo.co.jp> | 2017-09-06 19:41:36 +0900 |
|---|---|---|
| committer | syuilo <syuilotan@yahoo.co.jp> | 2017-09-06 19:41:36 +0900 |
| commit | f33571f2f42cf9d5313a32195fbe147941a95f87 (patch) | |
| tree | f2150b6ff47345550263a947b3e285424ebab6de /src | |
| parent | Update mocha.opts (diff) | |
| download | sharkey-f33571f2f42cf9d5313a32195fbe147941a95f87.tar.gz sharkey-f33571f2f42cf9d5313a32195fbe147941a95f87.tar.bz2 sharkey-f33571f2f42cf9d5313a32195fbe147941a95f87.zip | |
wip
Diffstat (limited to 'src')
| -rw-r--r-- | src/tools/ai/categorizer.ts | 89 |
1 files changed, 89 insertions, 0 deletions
diff --git a/src/tools/ai/categorizer.ts b/src/tools/ai/categorizer.ts new file mode 100644 index 0000000000..f70ce1b7d4 --- /dev/null +++ b/src/tools/ai/categorizer.ts @@ -0,0 +1,89 @@ +import * as fs from 'fs'; +const bayes = require('bayes'); +const MeCab = require('mecab-async'); +import Post from '../../api/models/post'; + +export default class Categorizer { + classifier: any; + categorizerDbFilePath: string; + mecab: any; + + constructor(categorizerDbFilePath: string, mecabCommand: string = 'mecab -d /usr/share/mecab/dic/mecab-ipadic-neologd') { + this.categorizerDbFilePath = categorizerDbFilePath; + + this.mecab = new MeCab(); + this.mecab.command = mecabCommand; + + // BIND ----------------------------------- + this.tokenizer = this.tokenizer.bind(this); + } + + tokenizer(text: string) { + return this.mecab.wakachiSync(text); + } + + async init() { + try { + const db = fs.readFileSync(this.categorizerDbFilePath, { + encoding: 'utf8' + }); + + this.classifier = bayes.fromJson(db); + this.classifier.tokenizer = this.tokenizer; + } catch(e) { + this.classifier = bayes({ + tokenizer: this.tokenizer + }); + + // 訓練データ + const verifiedPosts = await Post.find({ + is_category_verified: true + }); + + // 学習 + verifiedPosts.forEach(post => { + this.classifier.learn(post.text, post.category); + }); + + this.save(); + } + } + + async learn(id, category) { + const post = await Post.findOne({ _id: id }); + + Post.update({ _id: id }, { + $set: { + category: category, + is_category_verified: true + } + }); + + this.classifier.learn(post.text, category); + + this.save(); + } + + async categorize(id) { + const post = await Post.findOne({ _id: id }); + + const category = this.classifier.categorize(post.text); + + Post.update({ _id: id }, { + $set: { + category: category + } + }); + } + + async test(text) { + return this.classifier.categorize(text); + } + + save() { + fs.writeFileSync(this.categorizerDbFilePath, this.classifier.toJson(), { + encoding: 'utf8' + }); + } +} + |