mirror of
https://github.com/gosticks/DefinitelyTyped.git
synced 2025-10-16 12:05:41 +00:00
update kuromoji.js
This commit is contained in:
parent
f4007ed324
commit
fe562facd3
@ -1,9 +1,33 @@
|
||||
/// <reference path="kuromoji.d.ts" />
|
||||
|
||||
// From https://github.com/takuyaa/kuromoji.js/blob/master/README.md#usage
|
||||
kuromoji.builder({ dicPath: "/url/to/dictionary/dir/" }).build(function (err, tokenizer) {
|
||||
var path = tokenizer.tokenize("すもももももももものうち");
|
||||
var num_tmp: number;
|
||||
var str_tmp: string;
|
||||
path.forEach((token)=>{
|
||||
console.log(token.word_id);
|
||||
console.log(token.surface_form);
|
||||
num_tmp = token.word_id;
|
||||
str_tmp = token.word_type;
|
||||
num_tmp = token.word_position;
|
||||
str_tmp = token.surface_form;
|
||||
str_tmp = token.pos;
|
||||
str_tmp = token.pos_detail_1;
|
||||
str_tmp = token.pos_detail_2;
|
||||
str_tmp = token.pos_detail_3;
|
||||
str_tmp = token.conjugated_type;
|
||||
str_tmp = token.conjugated_form;
|
||||
str_tmp = token.basic_form;
|
||||
str_tmp = token.reading;
|
||||
str_tmp = token.pronunciation;
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// From https://github.com/takuyaa/kuromoji.js/blob/master/test/resource/minimum-dic/minimum.csv
|
||||
var minimum_dict = [
|
||||
"すもも,1285,1285,7546,名詞,一般,*,*,*,*,すもも,スモモ,スモモ",
|
||||
"もも,1285,1285,7219,名詞,一般,*,*,*,*,もも,モモ,モモ"
|
||||
].join('\n');
|
||||
|
||||
var builder = kuromoji.dictionaryBuilder();
|
||||
builder = builder.addTokenInfoDictionary(minimum_dict);
|
||||
var dict = builder.build();
|
||||
170
kuromoji/kuromoji.d.ts
vendored
170
kuromoji/kuromoji.d.ts
vendored
@ -3,45 +3,114 @@
|
||||
// Definitions by: MIZUSHIMA Junki <https://github.com/mzsm>
|
||||
// Definitions: https://github.com/borisyankov/DefinitelyTyped
|
||||
|
||||
/// <reference path="../doublearray/doublearray.d.ts" />
|
||||
|
||||
declare module kuromoji {
|
||||
|
||||
interface TokenizerBuilderOption {
|
||||
dicPath?: string;
|
||||
// dict/ConnectionCosts.js
|
||||
interface ConnectionCosts {
|
||||
buffer: Int16Array;
|
||||
put(forward_id: number, backward_id: number, cost: number): void;
|
||||
get(forward_id: number, backward_id: number): number;
|
||||
loadConnectionCosts(connection_costs_buffer: Int16Array): void;
|
||||
}
|
||||
|
||||
interface TokenizerBuilder<T> {
|
||||
build(callback: (err: Error, tokenizer: Tokenizer<T>) => void): void;
|
||||
// dict/DynamicDictionaries.js
|
||||
interface DynamicDictionaries {
|
||||
trie: doublearray.DoubleArray;
|
||||
token_info_dictionary: TokenInfoDictionary;
|
||||
connection_costs: ConnectionCosts;
|
||||
unknown_dictionary: UnknownDictionary;
|
||||
loadTrie(base_buffer: Int32Array, check_buffer: Int32Array): DynamicDictionaries;
|
||||
}
|
||||
|
||||
interface Tokenizer<T> {
|
||||
token_info_dictionary: any;
|
||||
unknown_dictionary: any;
|
||||
viterbi_builder: ViterbiBuilder;
|
||||
viterbi_searcher: ViterbiSearcher;
|
||||
formatter: T;
|
||||
tokenize(text: string): T[];
|
||||
getLattice(text: string): ViterbiLattice;
|
||||
// dict/TokenInfoDictionary.js
|
||||
interface TokenInfoDictionary {
|
||||
buildDictionary(entries: any[][]): {[word_id: number]: string};
|
||||
put(left_id: number, right_id: number, word_cost: number, surface_form: string, feature: string): number;
|
||||
addMapping(source: number, target: number): void;
|
||||
targetMapToBuffer(): Uint8Array;
|
||||
loadDictionary(array_buffer: Uint8Array): TokenInfoDictionary;
|
||||
loadPosVector(array_buffer: Uint8Array): TokenInfoDictionary;
|
||||
loadTargetMap(array_buffer: Uint8Array): TokenInfoDictionary;
|
||||
getFeatures(token_info_id_str: string): string;
|
||||
}
|
||||
|
||||
// dict/UnknownDictionary.js
|
||||
interface UnknownDictionary extends TokenInfoDictionary {
|
||||
}
|
||||
|
||||
// util/ByteBuffer.js
|
||||
interface ByteBuffer {
|
||||
buffer: Uint8Array;
|
||||
position: number;
|
||||
size(): number;
|
||||
reallocate(): void;
|
||||
shrink(): Uint8Array;
|
||||
put(b: number): void;
|
||||
get(index: number): number;
|
||||
putShort(num: number): void;
|
||||
getShort(index: number): number;
|
||||
putInt(num: number): void;
|
||||
getInt(index: number): number;
|
||||
readInt(): number;
|
||||
putString(str: string): void;
|
||||
getString(index: number): string;
|
||||
}
|
||||
|
||||
// util/DictionaryBuilder.js
|
||||
interface DictionaryBuilder {
|
||||
tid_entries: string[];
|
||||
unk_entries: string[];
|
||||
addTokenInfoDictionary(text: string): DictionaryBuilder;
|
||||
costMatrix(matrix_text: string): DictionaryBuilder;
|
||||
charDef(char_text: string): DictionaryBuilder;
|
||||
unkDef(text: string): DictionaryBuilder;
|
||||
build(): DynamicDictionaries;
|
||||
buildTokenInfoDictionary(): {trie: doublearray.DoubleArray; token_info_dictionary: TokenInfoDictionary};
|
||||
buildUnknownDictionary(): UnknownDictionary;
|
||||
buildConnectionCosts(): ConnectionCosts;
|
||||
buildDoubleArray(): doublearray.DoubleArray;
|
||||
}
|
||||
|
||||
// util/IpadicFormatter.js
|
||||
interface Formatter<T> {
|
||||
formatEntry(word_id: number, position: number, type: string, features: string[]): T;
|
||||
formatUnknownEntry(word_id: number, position: number, type: string, features: string[]): T;
|
||||
}
|
||||
interface IpadicFormatter extends Formatter<IpadicFeatures> {
|
||||
}
|
||||
export interface IpadicFeatures {
|
||||
word_id: number;
|
||||
word_type: string;
|
||||
word_position: number;
|
||||
surface_form: string;
|
||||
pos: string;
|
||||
pos_detail_1: string;
|
||||
pos_detail_2: string;
|
||||
pos_detail_3: string;
|
||||
conjugated_type: string;
|
||||
conjugated_form: string;
|
||||
basic_form: string;
|
||||
reading?: string;
|
||||
pronunciation?: string;
|
||||
}
|
||||
|
||||
// viterbi/ViterbiBuilder.js
|
||||
interface ViterbiBuilder {
|
||||
trie: any;
|
||||
token_info_dictionary: any;
|
||||
unknown_dictionary: any;
|
||||
trie: doublearray.DoubleArray;
|
||||
token_info_dictionary: TokenInfoDictionary;
|
||||
unknown_dictionary: UnknownDictionary;
|
||||
build(sentence_str: string): ViterbiLattice;
|
||||
|
||||
}
|
||||
|
||||
interface ViterbiSearcher {
|
||||
connection_costs: any;
|
||||
search(lattice: ViterbiLattice): ViterbiNode[];
|
||||
forward(lattice: ViterbiLattice)
|
||||
}
|
||||
|
||||
// viterbi/ViterbiLattice.js
|
||||
interface ViterbiLattice {
|
||||
append(node: ViterbiNode): void;
|
||||
appendEos(): void;
|
||||
}
|
||||
|
||||
// viterbi/ViterbiNode.js
|
||||
interface ViterbiNode {
|
||||
name: string;
|
||||
cost: number;
|
||||
@ -55,42 +124,37 @@ declare module kuromoji {
|
||||
type: string;
|
||||
}
|
||||
|
||||
interface IpadicFormatter {
|
||||
formatEntry(word_id: number, position: number, type: string, features: string[]): IpadicFormat;
|
||||
formatUnknownEntry(word_id: number, position: number, type: string, features: string[]): IpadicFormat;
|
||||
// viterbi/ViterbiSearcher.js
|
||||
interface ViterbiSearcher {
|
||||
connection_costs: ConnectionCosts;
|
||||
search(lattice: ViterbiLattice): ViterbiNode[];
|
||||
forward(lattice: ViterbiLattice): ViterbiLattice;
|
||||
backward(lattice: ViterbiLattice): ViterbiNode[];
|
||||
}
|
||||
|
||||
interface IpadicFormat {
|
||||
word_id: number;
|
||||
word_type: string;
|
||||
word_position: number;
|
||||
surface_form: number;
|
||||
pos: string;
|
||||
pos_detail_1: string;
|
||||
pos_detail_2: string;
|
||||
pos_detail_3: string;
|
||||
conjugated_type: string;
|
||||
conjugated_form: string;
|
||||
basic_form: string;
|
||||
reading?: string;
|
||||
pronunciation?: string;
|
||||
// Tokenizer.js
|
||||
interface TokenizerStatic {
|
||||
splitByPunctuation(input: string): string[];
|
||||
}
|
||||
interface Tokenizer<T> {
|
||||
token_info_dictionary: TokenInfoDictionary;
|
||||
unknown_dictionary: UnknownDictionary;
|
||||
viterbi_builder: ViterbiBuilder;
|
||||
viterbi_searcher: ViterbiSearcher;
|
||||
formatter: Formatter<T>;
|
||||
tokenize(text: string): T[];
|
||||
getLattice(text: string): ViterbiLattice;
|
||||
}
|
||||
|
||||
interface DictionaryBuilder {
|
||||
tid_entries: number[];
|
||||
unk_entries: number[];
|
||||
matrix_text: string;
|
||||
char_text: string;
|
||||
addTokenInfoDictionary(text: string): DictionaryBuilder;
|
||||
costMatrix(matrix_text: string): DictionaryBuilder;
|
||||
charDef(char_text: string): DictionaryBuilder;
|
||||
unkDef(text: string): DictionaryBuilder;
|
||||
build(): DynamicDictionaries;
|
||||
// TokenizerBuilder.js
|
||||
interface TokenizerBuilder<T> {
|
||||
build(callback: (err: Error, tokenizer: Tokenizer<T>) => void): void;
|
||||
}
|
||||
interface TokenizerBuilderOption {
|
||||
dicPath?: string;
|
||||
}
|
||||
|
||||
interface DynamicDictionaries {
|
||||
}
|
||||
|
||||
export function builder(option?: TokenizerBuilderOption): TokenizerBuilder<IpadicFormat>;
|
||||
// kuromoji.js
|
||||
export function builder(option: TokenizerBuilderOption): TokenizerBuilder<IpadicFeatures>;
|
||||
export function dictionaryBuilder(): DictionaryBuilder;
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user