Skip to content

Commit dc17f9a

Browse files
committed
feat: add cache for tokenize
1 parent 3ba4868 commit dc17f9a

File tree

9 files changed

+751
-942
lines changed

9 files changed

+751
-942
lines changed

.github/workflows/test.yml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
name: test
2+
on: [push, pull_request]
3+
env:
4+
CI: true
5+
jobs:
6+
test:
7+
name: "Test on Node.js ${{ matrix.node-version }}"
8+
runs-on: ubuntu-latest
9+
strategy:
10+
matrix:
11+
node-version: [10, 12, 14]
12+
steps:
13+
- name: checkout
14+
uses: actions/checkout@v2
15+
- name: setup Node.js ${{ matrix.node-version }}
16+
uses: actions/setup-node@v1
17+
with:
18+
node-version: ${{ matrix.node-version }}
19+
- name: Install
20+
run: yarn install
21+
- name: Test
22+
run: yarn test

.mocharc.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"require": [
3+
"ts-node-test-register"
4+
]
5+
}

.travis.yml

Lines changed: 0 additions & 3 deletions
This file was deleted.

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
1-
# kuromojin [![Build Status](https://travis-ci.org/azu/kuromojin.svg?branch=master)](https://travis-ci.org/azu/kuromojin)
1+
# kuromojin [![Actions Status: test](https://github.com/azu/kuromojin/workflows/test/badge.svg)](https://github.com/azu/kuromojin/actions?query=workflow%3A"test")
22

33
Provide a high level wrapper for [kuromoji.js](https://github.com/takuyaa/kuromoji.js "kuromoji.js").
44

55
## Features
66

77
- Promise based API
88
- Cache Layer
9+
- Fetch the dictionary at once
10+
- Return same tokens for same text
911

1012
## Installation
1113

package.json

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -34,19 +34,19 @@
3434
"netlify": "mkdir -p public && cp -r node_modules/kuromoji/dict public/"
3535
},
3636
"dependencies": {
37-
"kuromoji": "0.1.1"
37+
"kuromoji": "^0.1.1"
3838
},
3939
"devDependencies": {
40-
"@types/mocha": "^5.2.7",
41-
"@types/node": "^12.0.12",
42-
"cross-env": "^5.2.0",
43-
"husky": "^3.0.0",
44-
"lint-staged": "^9.1.0",
45-
"mocha": "^6.1.4",
46-
"prettier": "^1.18.2",
47-
"ts-node": "^8.3.0",
48-
"ts-node-test-register": "^8.0.1",
49-
"typescript": "^3.5.2"
40+
"@types/mocha": "^8.2.0",
41+
"@types/node": "^14.14.14",
42+
"cross-env": "^7.0.3",
43+
"husky": "^4.3.6",
44+
"lint-staged": "^10.5.3",
45+
"mocha": "^8.2.1",
46+
"prettier": "^2.2.1",
47+
"ts-node": "^9.1.1",
48+
"ts-node-test-register": "^9.0.1",
49+
"typescript": "^4.1.3"
5050
},
5151
"email": "azuciao@gmail.com",
5252
"prettier": {

src/Deferred.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,11 @@
22
"use strict";
33
export default class Deferred<T> {
44
promise: Promise<T>;
5-
resolve!: (value?: T) => void;
6-
reject!: (reason?: any) => void;
5+
resolve!: (value: T) => void;
6+
reject!: (reason: any) => void;
7+
78
constructor() {
8-
this.promise = new Promise((resolve, reject) => {
9+
this.promise = new Promise<T>((resolve, reject) => {
910
this.resolve = resolve;
1011
this.reject = reject;
1112
});

src/kuromojin.ts

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,14 +66,17 @@ const getNodeModuleDirPath = () => {
6666

6767
// cache for tokenizer
6868
let _tokenizer: null | Tokenizer = null;
69+
let tokenizeCacheMap = new Map<string, KuromojiToken[]>()
6970
// lock boolean
7071
let isLoading = false;
7172

7273
export type getTokenizerOption = {
7374
dicPath: string;
75+
// Default: false
76+
noCache?: boolean
7477
};
7578

76-
export function getTokenizer(options: getTokenizerOption = {dicPath: getNodeModuleDirPath()}): Promise<Tokenizer> {
79+
export function getTokenizer(options: getTokenizerOption = { dicPath: getNodeModuleDirPath() }): Promise<Tokenizer> {
7780
if (_tokenizer) {
7881
return Promise.resolve(_tokenizer);
7982
}
@@ -92,8 +95,18 @@ export function getTokenizer(options: getTokenizerOption = {dicPath: getNodeModu
9295
return deferred.promise;
9396
}
9497

95-
export function tokenize(text: string, options?: getTokenizerOption) {
98+
export function tokenize(text: string, options?: getTokenizerOption): Promise<KuromojiToken[]> {
9699
return getTokenizer(options).then(tokenizer => {
97-
return tokenizer.tokenizeForSentence(text);
100+
if (options?.noCache) {
101+
return tokenizer.tokenizeForSentence(text);
102+
} else {
103+
const cache = tokenizeCacheMap.get(text);
104+
if (cache) {
105+
return cache;
106+
}
107+
const tokens = tokenizer.tokenizeForSentence(text);
108+
tokenizeCacheMap.set(text, tokens);
109+
return tokens;
110+
}
98111
});
99112
}

test/mocha.opts

Lines changed: 0 additions & 1 deletion
This file was deleted.

0 commit comments

Comments
 (0)