diff --git a/kuromoji/kuromoji-tests.ts b/kuromoji/kuromoji-tests.ts
index d19e7f140a..0355a6501e 100644
--- a/kuromoji/kuromoji-tests.ts
+++ b/kuromoji/kuromoji-tests.ts
@@ -1,9 +1,33 @@
///
+// From https://github.com/takuyaa/kuromoji.js/blob/master/README.md#usage
kuromoji.builder({ dicPath: "/url/to/dictionary/dir/" }).build(function (err, tokenizer) {
var path = tokenizer.tokenize("すもももももももものうち");
+ var num_tmp: number;
+ var str_tmp: string;
path.forEach((token)=>{
- console.log(token.word_id);
- console.log(token.surface_form);
+ num_tmp = token.word_id;
+ str_tmp = token.word_type;
+ num_tmp = token.word_position;
+ str_tmp = token.surface_form;
+ str_tmp = token.pos;
+ str_tmp = token.pos_detail_1;
+ str_tmp = token.pos_detail_2;
+ str_tmp = token.pos_detail_3;
+ str_tmp = token.conjugated_type;
+ str_tmp = token.conjugated_form;
+ str_tmp = token.basic_form;
+ str_tmp = token.reading;
+ str_tmp = token.pronunciation;
});
-});
\ No newline at end of file
+});
+
+// From https://github.com/takuyaa/kuromoji.js/blob/master/test/resource/minimum-dic/minimum.csv
+var minimum_dict = [
+ "すもも,1285,1285,7546,名詞,一般,*,*,*,*,すもも,スモモ,スモモ",
+ "もも,1285,1285,7219,名詞,一般,*,*,*,*,もも,モモ,モモ"
+].join('\n');
+
+var builder = kuromoji.dictionaryBuilder();
+builder = builder.addTokenInfoDictionary(minimum_dict);
+var dict = builder.build();
\ No newline at end of file
diff --git a/kuromoji/kuromoji.d.ts b/kuromoji/kuromoji.d.ts
index 7ee5ca92af..ed1cd9b80f 100644
--- a/kuromoji/kuromoji.d.ts
+++ b/kuromoji/kuromoji.d.ts
@@ -3,45 +3,114 @@
// Definitions by: MIZUSHIMA Junki
// Definitions: https://github.com/borisyankov/DefinitelyTyped
+///
+
declare module kuromoji {
- interface TokenizerBuilderOption {
- dicPath?: string;
+ // dict/ConnectionCosts.js
+ interface ConnectionCosts {
+ buffer: Int16Array;
+ put(forward_id: number, backward_id: number, cost: number): void;
+ get(forward_id: number, backward_id: number): number;
+ loadConnectionCosts(connection_costs_buffer: Int16Array): void;
}
- interface TokenizerBuilder {
- build(callback: (err: Error, tokenizer: Tokenizer) => void): void;
+ // dict/DynamicDictionaries.js
+ interface DynamicDictionaries {
+ trie: doublearray.DoubleArray;
+ token_info_dictionary: TokenInfoDictionary;
+ connection_costs: ConnectionCosts;
+ unknown_dictionary: UnknownDictionary;
+ loadTrie(base_buffer: Int32Array, check_buffer: Int32Array): DynamicDictionaries;
}
- interface Tokenizer {
- token_info_dictionary: any;
- unknown_dictionary: any;
- viterbi_builder: ViterbiBuilder;
- viterbi_searcher: ViterbiSearcher;
- formatter: T;
- tokenize(text: string): T[];
- getLattice(text: string): ViterbiLattice;
+ // dict/TokenInfoDictionary.js
+ interface TokenInfoDictionary {
+ buildDictionary(entries: any[][]): {[word_id: number]: string};
+ put(left_id: number, right_id: number, word_cost: number, surface_form: string, feature: string): number;
+ addMapping(source: number, target: number): void;
+ targetMapToBuffer(): Uint8Array;
+ loadDictionary(array_buffer: Uint8Array): TokenInfoDictionary;
+ loadPosVector(array_buffer: Uint8Array): TokenInfoDictionary;
+ loadTargetMap(array_buffer: Uint8Array): TokenInfoDictionary;
+ getFeatures(token_info_id_str: string): string;
}
+ // dict/UnknownDictionary.js
+ interface UnknownDictionary extends TokenInfoDictionary {
+ }
+
+ // util/ByteBuffer.js
+ interface ByteBuffer {
+ buffer: Uint8Array;
+ position: number;
+ size(): number;
+ reallocate(): void;
+ shrink(): Uint8Array;
+ put(b: number): void;
+ get(index: number): number;
+ putShort(num: number): void;
+ getShort(index: number): number;
+ putInt(num: number): void;
+ getInt(index: number): number;
+ readInt(): number;
+ putString(str: string): void;
+ getString(index: number): string;
+ }
+
+ // util/DictionaryBuilder.js
+ interface DictionaryBuilder {
+ tid_entries: string[];
+ unk_entries: string[];
+ addTokenInfoDictionary(text: string): DictionaryBuilder;
+ costMatrix(matrix_text: string): DictionaryBuilder;
+ charDef(char_text: string): DictionaryBuilder;
+ unkDef(text: string): DictionaryBuilder;
+ build(): DynamicDictionaries;
+ buildTokenInfoDictionary(): {trie: doublearray.DoubleArray; token_info_dictionary: TokenInfoDictionary};
+ buildUnknownDictionary(): UnknownDictionary;
+ buildConnectionCosts(): ConnectionCosts;
+ buildDoubleArray(): doublearray.DoubleArray;
+ }
+
+ // util/IpadicFormatter.js
+ interface Formatter {
+ formatEntry(word_id: number, position: number, type: string, features: string[]): T;
+ formatUnknownEntry(word_id: number, position: number, type: string, features: string[]): T;
+ }
+ interface IpadicFormatter extends Formatter {
+ }
+ export interface IpadicFeatures {
+ word_id: number;
+ word_type: string;
+ word_position: number;
+ surface_form: string;
+ pos: string;
+ pos_detail_1: string;
+ pos_detail_2: string;
+ pos_detail_3: string;
+ conjugated_type: string;
+ conjugated_form: string;
+ basic_form: string;
+ reading?: string;
+ pronunciation?: string;
+ }
+
+ // viterbi/ViterbiBuilder.js
interface ViterbiBuilder {
- trie: any;
- token_info_dictionary: any;
- unknown_dictionary: any;
+ trie: doublearray.DoubleArray;
+ token_info_dictionary: TokenInfoDictionary;
+ unknown_dictionary: UnknownDictionary;
build(sentence_str: string): ViterbiLattice;
-
- }
-
- interface ViterbiSearcher {
- connection_costs: any;
- search(lattice: ViterbiLattice): ViterbiNode[];
- forward(lattice: ViterbiLattice)
}
+ // viterbi/ViterbiLattice.js
interface ViterbiLattice {
append(node: ViterbiNode): void;
appendEos(): void;
}
+ // viterbi/ViterbiNode.js
interface ViterbiNode {
name: string;
cost: number;
@@ -55,42 +124,37 @@ declare module kuromoji {
type: string;
}
- interface IpadicFormatter {
- formatEntry(word_id: number, position: number, type: string, features: string[]): IpadicFormat;
- formatUnknownEntry(word_id: number, position: number, type: string, features: string[]): IpadicFormat;
+ // viterbi/ViterbiSearcher.js
+ interface ViterbiSearcher {
+ connection_costs: ConnectionCosts;
+ search(lattice: ViterbiLattice): ViterbiNode[];
+ forward(lattice: ViterbiLattice): ViterbiLattice;
+ backward(lattice: ViterbiLattice): ViterbiNode[];
}
- interface IpadicFormat {
- word_id: number;
- word_type: string;
- word_position: number;
- surface_form: number;
- pos: string;
- pos_detail_1: string;
- pos_detail_2: string;
- pos_detail_3: string;
- conjugated_type: string;
- conjugated_form: string;
- basic_form: string;
- reading?: string;
- pronunciation?: string;
+ // Tokenizer.js
+ interface TokenizerStatic {
+ splitByPunctuation(input: string): string[];
+ }
+ interface Tokenizer {
+ token_info_dictionary: TokenInfoDictionary;
+ unknown_dictionary: UnknownDictionary;
+ viterbi_builder: ViterbiBuilder;
+ viterbi_searcher: ViterbiSearcher;
+ formatter: Formatter;
+ tokenize(text: string): T[];
+ getLattice(text: string): ViterbiLattice;
}
- interface DictionaryBuilder {
- tid_entries: number[];
- unk_entries: number[];
- matrix_text: string;
- char_text: string;
- addTokenInfoDictionary(text: string): DictionaryBuilder;
- costMatrix(matrix_text: string): DictionaryBuilder;
- charDef(char_text: string): DictionaryBuilder;
- unkDef(text: string): DictionaryBuilder;
- build(): DynamicDictionaries;
+ // TokenizerBuilder.js
+ interface TokenizerBuilder {
+ build(callback: (err: Error, tokenizer: Tokenizer) => void): void;
+ }
+ interface TokenizerBuilderOption {
+ dicPath?: string;
}
- interface DynamicDictionaries {
- }
-
- export function builder(option?: TokenizerBuilderOption): TokenizerBuilder;
+ // kuromoji.js
+ export function builder(option: TokenizerBuilderOption): TokenizerBuilder;
export function dictionaryBuilder(): DictionaryBuilder;
}
\ No newline at end of file