summaryrefslogtreecommitdiff
path: root/src/tools
diff options
context:
space:
mode:
authorsyuilo <syuilotan@yahoo.co.jp>2017-09-06 19:41:36 +0900
committersyuilo <syuilotan@yahoo.co.jp>2017-09-06 19:41:36 +0900
commitf33571f2f42cf9d5313a32195fbe147941a95f87 (patch)
treef2150b6ff47345550263a947b3e285424ebab6de /src/tools
parentUpdate mocha.opts (diff)
downloadsharkey-f33571f2f42cf9d5313a32195fbe147941a95f87.tar.gz
sharkey-f33571f2f42cf9d5313a32195fbe147941a95f87.tar.bz2
sharkey-f33571f2f42cf9d5313a32195fbe147941a95f87.zip
wip
Diffstat (limited to 'src/tools')
-rw-r--r--src/tools/ai/categorizer.ts89
1 files changed, 89 insertions, 0 deletions
diff --git a/src/tools/ai/categorizer.ts b/src/tools/ai/categorizer.ts
new file mode 100644
index 0000000000..f70ce1b7d4
--- /dev/null
+++ b/src/tools/ai/categorizer.ts
@@ -0,0 +1,89 @@
+import * as fs from 'fs';
+const bayes = require('bayes');
+const MeCab = require('mecab-async');
+import Post from '../../api/models/post';
+
+export default class Categorizer {
+ classifier: any;
+ categorizerDbFilePath: string;
+ mecab: any;
+
+ constructor(categorizerDbFilePath: string, mecabCommand: string = 'mecab -d /usr/share/mecab/dic/mecab-ipadic-neologd') {
+ this.categorizerDbFilePath = categorizerDbFilePath;
+
+ this.mecab = new MeCab();
+ this.mecab.command = mecabCommand;
+
+ // BIND -----------------------------------
+ this.tokenizer = this.tokenizer.bind(this);
+ }
+
+ tokenizer(text: string) {
+ return this.mecab.wakachiSync(text);
+ }
+
+ async init() {
+ try {
+ const db = fs.readFileSync(this.categorizerDbFilePath, {
+ encoding: 'utf8'
+ });
+
+ this.classifier = bayes.fromJson(db);
+ this.classifier.tokenizer = this.tokenizer;
+ } catch(e) {
+ this.classifier = bayes({
+ tokenizer: this.tokenizer
+ });
+
+ // 訓練データ
+ const verifiedPosts = await Post.find({
+ is_category_verified: true
+ });
+
+ // 学習
+ verifiedPosts.forEach(post => {
+ this.classifier.learn(post.text, post.category);
+ });
+
+ this.save();
+ }
+ }
+
+ async learn(id, category) {
+ const post = await Post.findOne({ _id: id });
+
+ Post.update({ _id: id }, {
+ $set: {
+ category: category,
+ is_category_verified: true
+ }
+ });
+
+ this.classifier.learn(post.text, category);
+
+ this.save();
+ }
+
+ async categorize(id) {
+ const post = await Post.findOne({ _id: id });
+
+ const category = this.classifier.categorize(post.text);
+
+ Post.update({ _id: id }, {
+ $set: {
+ category: category
+ }
+ });
+ }
+
+ async test(text) {
+ return this.classifier.categorize(text);
+ }
+
+ save() {
+ fs.writeFileSync(this.categorizerDbFilePath, this.classifier.toJson(), {
+ encoding: 'utf8'
+ });
+ }
+}
+