SHOGUN
3.2.1
首页
相关页面
模块
类
文件
文件列表
文件成员
全部
类
命名空间
文件
函数
变量
类型定义
枚举
枚举值
友元
宏定义
组
页
src
shogun
lib
NGramTokenizer.h
浏览该文件的文档.
1
/*
2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 3 of the License, or
5
* (at your option) any later version.
6
*
7
* Written (W) 2013 Evangelos Anagnostopoulos
8
* Copyright (C) 2013 Evangelos Anagnostopoulos
9
*/
10
11
#ifndef _NGRAMTOKENIZER__H__
12
#define _NGRAMTOKENIZER__H__
13
14
#include <
shogun/lib/Tokenizer.h
>
15
16
namespace
shogun
17
{
18
class
CTokenizer;
19
23
class
CNGramTokenizer
:
public
CTokenizer
24
{
25
public
:
30
CNGramTokenizer
(int32_t ns=3);
31
36
CNGramTokenizer
(
const
CNGramTokenizer
& orig);
37
39
virtual
~CNGramTokenizer
() {}
40
45
virtual
void
set_text
(
SGVector<char>
txt);
46
52
virtual
bool
has_next
();
53
60
virtual
index_t
next_token_idx
(
index_t
& start);
61
67
virtual
const
char
*
get_name
()
const
;
68
69
virtual
CNGramTokenizer
*
get_copy
();
70
71
private
:
72
void
init();
73
74
protected
:
75
77
int32_t
n
;
78
80
index_t
last_idx
;
81
};
82
}
83
#endif
/* _NGRAMTOKENIZER__H__ */
84
SHOGUN
机器学习工具包 - 项目文档