summaryrefslogtreecommitdiff
path: root/src/tools
diff options
context:
space:
mode:
authorsyuilo <syuilotan@yahoo.co.jp>2017-09-06 23:19:58 +0900
committersyuilo <syuilotan@yahoo.co.jp>2017-09-06 23:19:58 +0900
commitc6b0bf42a112f0d9afa8920d6497cc76205ecaf4 (patch)
tree3d067ff9d9a2ef79b9f11a557f732d5d120b314a /src/tools
parentwip (diff)
downloadsharkey-c6b0bf42a112f0d9afa8920d6497cc76205ecaf4.tar.gz
sharkey-c6b0bf42a112f0d9afa8920d6497cc76205ecaf4.tar.bz2
sharkey-c6b0bf42a112f0d9afa8920d6497cc76205ecaf4.zip
wip
Diffstat (limited to 'src/tools')
-rw-r--r--src/tools/ai/categorizer.ts93
-rw-r--r--src/tools/ai/predict-all-post-category.ts57
-rw-r--r--src/tools/ai/predict-user-interst.ts45
3 files changed, 102 insertions, 93 deletions
diff --git a/src/tools/ai/categorizer.ts b/src/tools/ai/categorizer.ts
deleted file mode 100644
index c13374161d..0000000000
--- a/src/tools/ai/categorizer.ts
+++ /dev/null
@@ -1,93 +0,0 @@
-import * as fs from 'fs';
-
-const bayes = require('./naive-bayes.js');
-const MeCab = require('mecab-async');
-import * as msgpack from 'msgpack-lite';
-
-import Post from '../../api/models/post';
-import config from '../../conf';
-
-/**
- * 投稿を学習したり与えられた投稿のカテゴリを予測します
- */
-export default class Categorizer {
- private classifier: any;
- private categorizerDbFilePath: string;
- private mecab: any;
-
- constructor() {
- this.categorizerDbFilePath = `${__dirname}/../../../data/category`;
-
- this.mecab = new MeCab();
- if (config.categorizer.mecab_command) this.mecab.command = config.categorizer.mecab_command;
-
- // BIND -----------------------------------
- this.tokenizer = this.tokenizer.bind(this);
- }
-
- private tokenizer(text: string) {
- return this.mecab.wakachiSync(text);
- }
-
- public async init() {
- try {
- const buffer = fs.readFileSync(this.categorizerDbFilePath);
- const db = msgpack.decode(buffer);
-
- this.classifier = bayes.import(db);
- this.classifier.tokenizer = this.tokenizer;
- } catch (e) {
- this.classifier = bayes({
- tokenizer: this.tokenizer
- });
-
- // 訓練データ
- const verifiedPosts = await Post.find({
- is_category_verified: true
- });
-
- // 学習
- verifiedPosts.forEach(post => {
- this.classifier.learn(post.text, post.category);
- });
-
- this.save();
- }
- }
-
- public async learn(id, category) {
- const post = await Post.findOne({ _id: id });
-
- Post.update({ _id: id }, {
- $set: {
- category: category,
- is_category_verified: true
- }
- });
-
- this.classifier.learn(post.text, category);
-
- this.save();
- }
-
- public async categorize(id) {
- const post = await Post.findOne({ _id: id });
-
- const category = this.classifier.categorize(post.text);
-
- Post.update({ _id: id }, {
- $set: {
- category: category
- }
- });
- }
-
- public async test(text) {
- return this.classifier.categorize(text);
- }
-
- private save() {
- const buffer = msgpack.encode(this.classifier.export());
- fs.writeFileSync(this.categorizerDbFilePath, buffer);
- }
-}
diff --git a/src/tools/ai/predict-all-post-category.ts b/src/tools/ai/predict-all-post-category.ts
new file mode 100644
index 0000000000..87e198b39b
--- /dev/null
+++ b/src/tools/ai/predict-all-post-category.ts
@@ -0,0 +1,57 @@
+const bayes = require('./naive-bayes.js');
+const MeCab = require('mecab-async');
+
+import Post from '../../api/models/post';
+import config from '../../conf';
+
+const classifier = bayes({
+ tokenizer: this.tokenizer
+});
+
+const mecab = new MeCab();
+if (config.categorizer.mecab_command) mecab.command = config.categorizer.mecab_command;
+
+// 訓練データ取得
+Post.find({
+ is_category_verified: true
+}, {
+ fields: {
+ _id: false,
+ text: true,
+ category: true
+ }
+}).then(verifiedPosts => {
+ // 学習
+ verifiedPosts.forEach(post => {
+ classifier.learn(post.text, post.category);
+ });
+
+ // 全ての(人間によって証明されていない)投稿を取得
+ Post.find({
+ text: {
+ $exists: true
+ },
+ is_category_verified: {
+ $ne: true
+ }
+ }, {
+ sort: {
+ _id: -1
+ },
+ fields: {
+ _id: true,
+ text: true
+ }
+ }).then(posts => {
+ posts.forEach(post => {
+ console.log(`predicting... ${post._id}`);
+ const category = classifier.categorize(post.text);
+
+ Post.update({ _id: post._id }, {
+ $set: {
+ category: category
+ }
+ });
+ });
+ });
+});
diff --git a/src/tools/ai/predict-user-interst.ts b/src/tools/ai/predict-user-interst.ts
new file mode 100644
index 0000000000..99bdfa4206
--- /dev/null
+++ b/src/tools/ai/predict-user-interst.ts
@@ -0,0 +1,45 @@
+import Post from '../../api/models/post';
+import User from '../../api/models/user';
+
+export async function predictOne(id) {
+ console.log(`predict interest of ${id} ...`);
+
+ // TODO: repostなども含める
+ const recentPosts = await Post.find({
+ user_id: id,
+ category: {
+ $exists: true
+ }
+ }, {
+ sort: {
+ _id: -1
+ },
+ limit: 1000,
+ fields: {
+ _id: false,
+ category: true
+ }
+ });
+
+ const categories = {};
+
+ recentPosts.forEach(post => {
+ if (categories[post.category]) {
+ categories[post.category]++;
+ } else {
+ categories[post.category] = 1;
+ }
+ });
+}
+
+export async function predictAll() {
+ const allUsers = await User.find({}, {
+ fields: {
+ _id: true
+ }
+ });
+
+ allUsers.forEach(user => {
+ predictOne(user._id);
+ });
+}