diff options
| author | syuilo <syuilotan@yahoo.co.jp> | 2017-09-06 23:19:58 +0900 |
|---|---|---|
| committer | syuilo <syuilotan@yahoo.co.jp> | 2017-09-06 23:19:58 +0900 |
| commit | c6b0bf42a112f0d9afa8920d6497cc76205ecaf4 (patch) | |
| tree | 3d067ff9d9a2ef79b9f11a557f732d5d120b314a /src/tools | |
| parent | wip (diff) | |
| download | sharkey-c6b0bf42a112f0d9afa8920d6497cc76205ecaf4.tar.gz sharkey-c6b0bf42a112f0d9afa8920d6497cc76205ecaf4.tar.bz2 sharkey-c6b0bf42a112f0d9afa8920d6497cc76205ecaf4.zip | |
wip
Diffstat (limited to 'src/tools')
| -rw-r--r-- | src/tools/ai/categorizer.ts | 93 | ||||
| -rw-r--r-- | src/tools/ai/predict-all-post-category.ts | 57 | ||||
| -rw-r--r-- | src/tools/ai/predict-user-interst.ts | 45 |
3 files changed, 102 insertions, 93 deletions
diff --git a/src/tools/ai/categorizer.ts b/src/tools/ai/categorizer.ts deleted file mode 100644 index c13374161d..0000000000 --- a/src/tools/ai/categorizer.ts +++ /dev/null @@ -1,93 +0,0 @@ -import * as fs from 'fs'; - -const bayes = require('./naive-bayes.js'); -const MeCab = require('mecab-async'); -import * as msgpack from 'msgpack-lite'; - -import Post from '../../api/models/post'; -import config from '../../conf'; - -/** - * 投稿を学習したり与えられた投稿のカテゴリを予測します - */ -export default class Categorizer { - private classifier: any; - private categorizerDbFilePath: string; - private mecab: any; - - constructor() { - this.categorizerDbFilePath = `${__dirname}/../../../data/category`; - - this.mecab = new MeCab(); - if (config.categorizer.mecab_command) this.mecab.command = config.categorizer.mecab_command; - - // BIND ----------------------------------- - this.tokenizer = this.tokenizer.bind(this); - } - - private tokenizer(text: string) { - return this.mecab.wakachiSync(text); - } - - public async init() { - try { - const buffer = fs.readFileSync(this.categorizerDbFilePath); - const db = msgpack.decode(buffer); - - this.classifier = bayes.import(db); - this.classifier.tokenizer = this.tokenizer; - } catch (e) { - this.classifier = bayes({ - tokenizer: this.tokenizer - }); - - // 訓練データ - const verifiedPosts = await Post.find({ - is_category_verified: true - }); - - // 学習 - verifiedPosts.forEach(post => { - this.classifier.learn(post.text, post.category); - }); - - this.save(); - } - } - - public async learn(id, category) { - const post = await Post.findOne({ _id: id }); - - Post.update({ _id: id }, { - $set: { - category: category, - is_category_verified: true - } - }); - - this.classifier.learn(post.text, category); - - this.save(); - } - - public async categorize(id) { - const post = await Post.findOne({ _id: id }); - - const category = this.classifier.categorize(post.text); - - Post.update({ _id: id }, { - $set: { - category: category - } - }); - } - - public async test(text) { - return this.classifier.categorize(text); - } - - private save() { - const buffer = msgpack.encode(this.classifier.export()); - fs.writeFileSync(this.categorizerDbFilePath, buffer); - } -} diff --git a/src/tools/ai/predict-all-post-category.ts b/src/tools/ai/predict-all-post-category.ts new file mode 100644 index 0000000000..87e198b39b --- /dev/null +++ b/src/tools/ai/predict-all-post-category.ts @@ -0,0 +1,57 @@ +const bayes = require('./naive-bayes.js'); +const MeCab = require('mecab-async'); + +import Post from '../../api/models/post'; +import config from '../../conf'; + +const classifier = bayes({ + tokenizer: this.tokenizer +}); + +const mecab = new MeCab(); +if (config.categorizer.mecab_command) mecab.command = config.categorizer.mecab_command; + +// 訓練データ取得 +Post.find({ + is_category_verified: true +}, { + fields: { + _id: false, + text: true, + category: true + } +}).then(verifiedPosts => { + // 学習 + verifiedPosts.forEach(post => { + classifier.learn(post.text, post.category); + }); + + // 全ての(人間によって証明されていない)投稿を取得 + Post.find({ + text: { + $exists: true + }, + is_category_verified: { + $ne: true + } + }, { + sort: { + _id: -1 + }, + fields: { + _id: true, + text: true + } + }).then(posts => { + posts.forEach(post => { + console.log(`predicting... ${post._id}`); + const category = classifier.categorize(post.text); + + Post.update({ _id: post._id }, { + $set: { + category: category + } + }); + }); + }); +}); diff --git a/src/tools/ai/predict-user-interst.ts b/src/tools/ai/predict-user-interst.ts new file mode 100644 index 0000000000..99bdfa4206 --- /dev/null +++ b/src/tools/ai/predict-user-interst.ts @@ -0,0 +1,45 @@ +import Post from '../../api/models/post'; +import User from '../../api/models/user'; + +export async function predictOne(id) { + console.log(`predict interest of ${id} ...`); + + // TODO: repostなども含める + const recentPosts = await Post.find({ + user_id: id, + category: { + $exists: true + } + }, { + sort: { + _id: -1 + }, + limit: 1000, + fields: { + _id: false, + category: true + } + }); + + const categories = {}; + + recentPosts.forEach(post => { + if (categories[post.category]) { + categories[post.category]++; + } else { + categories[post.category] = 1; + } + }); +} + +export async function predictAll() { + const allUsers = await User.find({}, { + fields: { + _id: true + } + }); + + allUsers.forEach(user => { + predictOne(user._id); + }); +} |