File based string features.
StringFeatures that are file based. Underneath memory mapped files are used. Derived from CStringFeatures thus transparently enabling all of the StringFeature functionality.
Supported file format contains one string per line, lines of variable length are supported and must be separated by '
'.
在文件 StringFileFeatures.h 第 34 行定义.
Public 成员函数 | |
CStringFileFeatures () | |
CStringFileFeatures (const char *fname, EAlphabet alpha) | |
virtual | ~CStringFileFeatures () |
virtual const char * | get_name () const |
virtual void | cleanup_feature_vectors (int32_t start, int32_t stop) |
virtual EFeatureClass | get_feature_class () const |
virtual EFeatureType | get_feature_type () const |
template<> | |
EFeatureType | get_feature_type () const |
template<> | |
EFeatureType | get_feature_type () const |
template<> | |
EFeatureType | get_feature_type () const |
template<> | |
EFeatureType | get_feature_type () const |
template<> | |
EFeatureType | get_feature_type () const |
template<> | |
EFeatureType | get_feature_type () const |
template<> | |
EFeatureType | get_feature_type () const |
template<> | |
EFeatureType | get_feature_type () const |
template<> | |
EFeatureType | get_feature_type () const |
template<> | |
EFeatureType | get_feature_type () const |
template<> | |
EFeatureType | get_feature_type () const |
template<> | |
EFeatureType | get_feature_type () const |
CAlphabet * | get_alphabet () |
virtual CFeatures * | duplicate () const |
SGVector< ST > | get_feature_vector (int32_t num) |
ST * | get_feature_vector (int32_t num, int32_t &len, bool &dofree) |
void | set_feature_vector (SGVector< ST > vector, int32_t num) |
virtual void | set_feature_vector (int32_t num, ST *string, int32_t len) |
void | enable_on_the_fly_preprocessing () |
void | disable_on_the_fly_preprocessing () |
CStringFeatures< ST > * | get_transposed () |
SGString< ST > * | get_transposed (int32_t &num_feat, int32_t &num_vec) |
void | free_feature_vector (ST *feat_vec, int32_t num, bool dofree) |
void | free_feature_vector (SGVector< ST > feat_vec, int32_t num) |
virtual ST | get_feature (int32_t vec_num, int32_t feat_num) |
virtual int32_t | get_vector_length (int32_t vec_num) |
virtual int32_t | get_max_vector_length () |
virtual int32_t | get_num_vectors () const |
floatmax_t | get_num_symbols () |
floatmax_t | get_max_num_symbols () |
floatmax_t | get_original_num_symbols () |
int32_t | get_order () |
ST | get_masked_symbols (ST symbol, uint8_t mask) |
template<> | |
bool | get_masked_symbols (bool symbol, uint8_t mask) |
template<> | |
float32_t | get_masked_symbols (float32_t symbol, uint8_t mask) |
template<> | |
float64_t | get_masked_symbols (float64_t symbol, uint8_t mask) |
template<> | |
floatmax_t | get_masked_symbols (floatmax_t symbol, uint8_t mask) |
ST | shift_offset (ST offset, int32_t amount) |
template<> | |
bool | shift_offset (bool symbol, int32_t amount) |
template<> | |
float32_t | shift_offset (float32_t symbol, int32_t amount) |
template<> | |
float64_t | shift_offset (float64_t symbol, int32_t amount) |
template<> | |
floatmax_t | shift_offset (floatmax_t symbol, int32_t amount) |
ST | shift_symbol (ST symbol, int32_t amount) |
template<> | |
bool | shift_symbol (bool symbol, int32_t amount) |
template<> | |
float32_t | shift_symbol (float32_t symbol, int32_t amount) |
template<> | |
float64_t | shift_symbol (float64_t symbol, int32_t amount) |
template<> | |
floatmax_t | shift_symbol (floatmax_t symbol, int32_t amount) |
virtual void | load (CFile *loader) |
void | load_ascii_file (char *fname, bool remap_to_bin=true, EAlphabet ascii_alphabet=DNA, EAlphabet binary_alphabet=RAWDNA) |
bool | load_fasta_file (const char *fname, bool ignore_invalid=false) |
bool | load_fastq_file (const char *fname, bool ignore_invalid=false, bool bitremap_in_single_string=false) |
bool | load_from_directory (char *dirname) |
void | set_features (SGStringList< ST > feats) |
bool | set_features (SGString< ST > *p_features, int32_t p_num_vectors, int32_t p_max_string_length) |
bool | append_features (CStringFeatures< ST > *sf) |
bool | append_features (SGString< ST > *p_features, int32_t p_num_vectors, int32_t p_max_string_length) |
SGStringList< ST > | get_features () |
virtual SGString< ST > * | get_features (int32_t &num_str, int32_t &max_str_len) |
virtual void | get_features (SGString< ST > **dst, int32_t *num_str) |
virtual SGString< ST > * | copy_features (int32_t &num_str, int32_t &max_str_len) |
virtual void | save (CFile *writer) |
virtual bool | load_compressed (char *src, bool decompress) |
virtual bool | save_compressed (char *dest, E_COMPRESSION_TYPE compression, int level) |
virtual bool | apply_preprocessor (bool force_preprocessing=false) |
int32_t | obtain_by_sliding_window (int32_t window_size, int32_t step_size, int32_t skip=0) |
int32_t | obtain_by_position_list (int32_t window_size, CDynamicArray< int32_t > *positions, int32_t skip=0) |
bool | obtain_from_char (CStringFeatures< char > *sf, int32_t start, int32_t p_order, int32_t gap, bool rev) |
template<class CT > | |
bool | obtain_from_char_features (CStringFeatures< CT > *sf, int32_t start, int32_t p_order, int32_t gap, bool rev) |
template<> | |
bool | obtain_from_char_features (CStringFeatures< CT > *sf, int32_t start, int32_t p_order, int32_t gap, bool rev) |
template<> | |
bool | obtain_from_char_features (CStringFeatures< CT > *sf, int32_t start, int32_t p_order, int32_t gap, bool rev) |
template<> | |
bool | obtain_from_char_features (CStringFeatures< CT > *sf, int32_t start, int32_t p_order, int32_t gap, bool rev) |
bool | have_same_length (int32_t len=-1) |
void | embed_features (int32_t p_order) |
template<> | |
void | embed_features (int32_t p_order) |
template<> | |
void | embed_features (int32_t p_order) |
template<> | |
void | embed_features (int32_t p_order) |
void | compute_symbol_mask_table (int64_t max_val) |
template<> | |
void | compute_symbol_mask_table (int64_t max_val) |
template<> | |
void | compute_symbol_mask_table (int64_t max_val) |
template<> | |
void | compute_symbol_mask_table (int64_t max_val) |
void | unembed_word (ST word, uint8_t *seq, int32_t len) |
template<> | |
void | unembed_word (float32_t word, uint8_t *seq, int32_t len) |
template<> | |
void | unembed_word (float64_t word, uint8_t *seq, int32_t len) |
template<> | |
void | unembed_word (floatmax_t word, uint8_t *seq, int32_t len) |
ST | embed_word (ST *seq, int32_t len) |
template<> | |
float32_t | embed_word (float32_t *seq, int32_t len) |
template<> | |
float64_t | embed_word (float64_t *seq, int32_t len) |
template<> | |
floatmax_t | embed_word (floatmax_t *seq, int32_t len) |
void | determine_maximum_string_length () |
virtual void | get_histogram (float64_t **hist, int32_t *rows, int32_t *cols, bool normalize=true) |
virtual void | create_random (float64_t *hist, int32_t rows, int32_t cols, int32_t num_vec) |
virtual CFeatures * | copy_subset (SGVector< index_t > indices) |
virtual void | subset_changed_post () |
virtual void | add_preprocessor (CPreprocessor *p) |
virtual void | del_preprocessor (int32_t num) |
CPreprocessor * | get_preprocessor (int32_t num) const |
void | set_preprocessed (int32_t num) |
bool | is_preprocessed (int32_t num) const |
int32_t | get_num_preprocessed () const |
int32_t | get_num_preprocessors () const |
void | clean_preprocessors () |
void | list_preprocessors () |
int32_t | get_cache_size () const |
virtual bool | reshape (int32_t num_features, int32_t num_vectors) |
void | list_feature_obj () const |
bool | check_feature_compatibility (CFeatures *f) const |
bool | has_property (EFeatureProperty p) const |
void | set_property (EFeatureProperty p) |
void | unset_property (EFeatureProperty p) |
virtual CFeatures * | create_merged_copy (CList *others) |
virtual CFeatures * | create_merged_copy (CFeatures *other) |
virtual void | add_subset (SGVector< index_t > subset) |
virtual void | remove_subset () |
virtual void | remove_all_subsets () |
virtual CSubsetStack * | get_subset_stack () |
virtual CSGObject * | shallow_copy () const |
virtual CSGObject * | deep_copy () const |
virtual bool | is_generic (EPrimitiveType *generic) const |
template<class T > | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
void | unset_generic () |
virtual void | print_serializable (const char *prefix="") |
virtual bool | save_serializable (CSerializableFile *file, const char *prefix="", int32_t param_version=Version::get_version_parameter()) |
virtual bool | load_serializable (CSerializableFile *file, const char *prefix="", int32_t param_version=Version::get_version_parameter()) |
DynArray< TParameter * > * | load_file_parameters (const SGParamInfo *param_info, int32_t file_version, CSerializableFile *file, const char *prefix="") |
DynArray< TParameter * > * | load_all_file_parameters (int32_t file_version, int32_t current_version, CSerializableFile *file, const char *prefix="") |
void | map_parameters (DynArray< TParameter * > *param_base, int32_t &base_version, DynArray< const SGParamInfo * > *target_param_infos) |
void | set_global_io (SGIO *io) |
SGIO * | get_global_io () |
void | set_global_parallel (Parallel *parallel) |
Parallel * | get_global_parallel () |
void | set_global_version (Version *version) |
Version * | get_global_version () |
SGStringList< char > | get_modelsel_names () |
void | print_modsel_params () |
char * | get_modsel_param_descr (const char *param_name) |
index_t | get_modsel_param_index (const char *param_name) |
void | build_gradient_parameter_dictionary (CMap< TParameter *, CSGObject * > *dict) |
virtual void | update_parameter_hash () |
virtual bool | parameter_hash_changed () |
virtual bool | equals (CSGObject *other, float64_t accuracy=0.0, bool tolerant=false) |
virtual CSGObject * | clone () |
静态 Public 成员函数 | |
static ST * | get_zero_terminated_string_copy (SGString< ST > str) |
Public 属性 | |
SGIO * | io |
Parallel * | parallel |
Version * | version |
Parameter * | m_parameters |
Parameter * | m_model_selection_parameters |
Parameter * | m_gradient_parameters |
ParameterMap * | m_parameter_map |
uint32_t | m_hash |
Protected 成员函数 | |
ST * | get_line (uint64_t &len, uint64_t &offs, int32_t &line_nr, uint64_t file_length) |
virtual void | cleanup () |
virtual void | cleanup_feature_vector (int32_t num) |
void | fetch_meta_info_from_file (int32_t granularity=1048576) |
virtual ST * | compute_feature_vector (int32_t num, int32_t &len) |
virtual TParameter * | migrate (DynArray< TParameter * > *param_base, const SGParamInfo *target) |
virtual void | one_to_one_migration_prepare (DynArray< TParameter * > *param_base, const SGParamInfo *target, TParameter *&replacement, TParameter *&to_migrate, char *old_name=NULL) |
virtual void | load_serializable_pre () throw (ShogunException) |
virtual void | load_serializable_post () throw (ShogunException) |
virtual void | save_serializable_pre () throw (ShogunException) |
virtual void | save_serializable_post () throw (ShogunException) |
Protected 属性 | |
CMemoryMappedFile< ST > * | file |
CAlphabet * | alphabet |
int32_t | num_vectors |
SGString< ST > * | features |
ST * | single_string |
int32_t | length_of_single_string |
length of prior single string 更多... | |
int32_t | max_string_length |
floatmax_t | num_symbols |
number of used symbols 更多... | |
floatmax_t | original_num_symbols |
original number of used symbols (before higher order mapping) 更多... | |
int32_t | order |
order used in higher order mapping 更多... | |
ST * | symbol_mask_table |
order used in higher order mapping 更多... | |
int32_t | symbol_mask_table_len |
order used in higher order mapping 更多... | |
bool | preprocess_on_get |
preprocess on-the-fly? 更多... | |
CCache< ST > * | feature_cache |
CSubsetStack * | m_subset_stack |
default constructor
在文件 StringFileFeatures.cpp 第 6 行定义.
CStringFileFeatures | ( | const char * | fname, |
EAlphabet | alpha | ||
) |
constructor
fname | filename of the file containing line based features |
alpha | alphabet (type) to use for string features |
在文件 StringFileFeatures.cpp 第 10 行定义.
|
virtual |
default destructor
在文件 StringFileFeatures.cpp 第 17 行定义.
|
virtualinherited |
adds a subset of indices on top of the current subsets (possibly subset o subset. Calls subset_changed_post() afterwards
subset | subset of indices to add |
被 CCombinedFeatures 重载.
在文件 Features.cpp 第 307 行定义.
|
inherited |
append features If the given string features have a subset, only this will be copied
not possible with subset
sf | features to append |
在文件 StringFeatures.cpp 第 892 行定义.
|
inherited |
append features
not possible with subset
p_features | features to append |
p_num_vectors | number of vectors |
p_max_string_length | maximum string length |
note that p_features will be SG_FREE()'d on success
在文件 StringFeatures.cpp 第 914 行定义.
|
virtualinherited |
apply preprocessor
force_preprocessing | if preprocssing shall be forced |
在文件 StringFeatures.cpp 第 1170 行定义.
|
inherited |
Builds a dictionary of all parameters in SGObject as well of those of SGObjects that are parameters of this object. Dictionary maps parameters to the objects that own them.
dict | dictionary of parameters to be built. |
在文件 SGObject.cpp 第 1174 行定义.
|
inherited |
check feature compatibility
f | features to check for compatibility |
在文件 Features.cpp 第 280 行定义.
|
inherited |
clears all preprocs
在文件 Features.cpp 第 113 行定义.
|
protectedvirtual |
|
protectedvirtual |
|
virtualinherited |
cleanup multiple feature vectors
possible with subset
start | index of first vector to be cleaned |
stop | index of the last vector to be cleaned |
在文件 StringFeatures.cpp 第 190 行定义.
|
virtualinherited |
Creates a clone of the current object. This is done via recursively traversing all parameters, which corresponds to a deep copy. Calling equals on the cloned object always returns true although none of the memory of both objects overlaps.
在文件 SGObject.cpp 第 1291 行定义.
|
protectedvirtualinherited |
compute feature vector for sample num if target is set the vector is written to target len is returned by reference
possible with subset
num | which vector |
len | length of vector |
在文件 StringFeatures.cpp 第 1644 行定义.
|
inherited |
compute symbol mask table
required to access bit-based symbols
not implemented for subset
在文件 StringFeatures.cpp 第 1367 行定义.
|
inherited |
在文件 StringFeatures.cpp 第 1882 行定义.
|
inherited |
在文件 StringFeatures.cpp 第 1885 行定义.
|
inherited |
在文件 StringFeatures.cpp 第 1888 行定义.
|
virtualinherited |
copy_features
possible with subset
num_str | number of strings (returned) |
max_str_len | maximal string length (returned) |
在文件 StringFeatures.cpp 第 985 行定义.
Creates a new CFeatures instance containing copies of the elements which are specified by the provided indices.
possible with subset
indices | indices of feature elements to copy |
重载 CFeatures .
在文件 StringFeatures.cpp 第 1603 行定义.
Takes a list of feature instances and returns a new instance being a concatenation of a copy of this instace's data and the given instancess data. Note that the feature types have to be equal.
NOT IMPLEMENTED!
others | list of feature objects to append |
被 CDenseFeatures< ST >, CDenseFeatures< uint32_t >, CDenseFeatures< float64_t >, CDenseFeatures< T > , 以及 CDenseFeatures< uint16_t > 重载.
在文件 Features.h 第 229 行定义.
Convenience method for method with same name and list as parameter.
NOT IMPLEMENTED!
other | feature object to append |
被 CDenseFeatures< ST >, CDenseFeatures< uint32_t >, CDenseFeatures< float64_t >, CDenseFeatures< T >, CDenseFeatures< uint16_t > , 以及 CCombinedFeatures 重载.
在文件 Features.h 第 243 行定义.
|
virtualinherited |
create some random strings based on normalized histogram
not possible with subset
在文件 StringFeatures.cpp 第 1501 行定义.
|
virtualinherited |
A deep copy. All the instance variables will also be copied.
在文件 SGObject.h 第 126 行定义.
|
virtualinherited |
|
inherited |
|
inherited |
call this to disable on the fly feature preprocessing upon call to get_feature_vector. Useful when you manually apply preprocessors.
在文件 StringFeatures.cpp 第 270 行定义.
|
virtualinherited |
|
inherited |
embed string features in bit representation in-place
not implemented for subset
在文件 StringFeatures.cpp 第 1313 行定义.
|
inherited |
在文件 StringFeatures.cpp 第 1872 行定义.
|
inherited |
在文件 StringFeatures.cpp 第 1875 行定义.
|
inherited |
在文件 StringFeatures.cpp 第 1878 行定义.
|
inherited |
在文件 StringFeatures.cpp 第 1892 行定义.
在文件 StringFeatures.cpp 第 1896 行定义.
|
inherited |
在文件 StringFeatures.cpp 第 1900 行定义.
|
inherited |
call this to preprocess string features upon call to get_feature_vector
在文件 StringFeatures.cpp 第 265 行定义.
Recursively compares the current SGObject to another one. Compares all registered numerical parameters, recursion upon complex (SGObject) parameters. Does not compare pointers!
May be overwritten but please do with care! Should not be necessary in most cases.
other | object to compare with |
accuracy | accuracy to use for comparison (optional) |
tolerant | allows linient check on float equality (within accuracy) |
在文件 SGObject.cpp 第 1195 行定义.
|
protected |
obtain meta information from file
i.e., determine number of strings and their lengths
在文件 StringFileFeatures.cpp 第 77 行定义.
|
inherited |
free feature vector
possible with subset
feat_vec | feature vector to free |
num | index in feature cache, possibly from subset |
dofree | if vector should be really deleted |
在文件 StringFeatures.cpp 第 357 行定义.
|
inherited |
free feature vector
possible with subset
feat_vec | feature vector to free |
num | index in feature cache, possibly from subset |
在文件 StringFeatures.cpp 第 375 行定义.
|
inherited |
|
inherited |
|
virtualinherited |
get feature
possible with subset
vec_num | which vector |
feat_num | which feature, possibly from subset |
在文件 StringFeatures.cpp 第 390 行定义.
|
virtualinherited |
|
virtualinherited |
|
virtualinherited |
get feature type the char feature can deal with
实现了 CFeatures.
在文件 StringFeatures.cpp 第 1702 行定义.
|
virtualinherited |
get feature type the char feature can deal with
实现了 CFeatures.
在文件 StringFeatures.cpp 第 1711 行定义.
|
virtualinherited |
get feature type the BYTE feature can deal with
实现了 CFeatures.
在文件 StringFeatures.cpp 第 1720 行定义.
|
virtualinherited |
get feature type the SHORT feature can deal with
实现了 CFeatures.
在文件 StringFeatures.cpp 第 1729 行定义.
|
virtualinherited |
get feature type the WORD feature can deal with
实现了 CFeatures.
在文件 StringFeatures.cpp 第 1738 行定义.
|
virtualinherited |
get feature type the INT feature can deal with
实现了 CFeatures.
在文件 StringFeatures.cpp 第 1747 行定义.
|
virtualinherited |
get feature type the INT feature can deal with
实现了 CFeatures.
在文件 StringFeatures.cpp 第 1756 行定义.
|
virtualinherited |
get feature type the LONG feature can deal with
实现了 CFeatures.
在文件 StringFeatures.cpp 第 1765 行定义.
|
virtualinherited |
get feature type the ULONG feature can deal with
实现了 CFeatures.
在文件 StringFeatures.cpp 第 1774 行定义.
|
virtualinherited |
get feature type the SHORTREAL feature can deal with
实现了 CFeatures.
在文件 StringFeatures.cpp 第 1783 行定义.
|
virtualinherited |
get feature type the DREAL feature can deal with
实现了 CFeatures.
在文件 StringFeatures.cpp 第 1792 行定义.
|
virtualinherited |
get feature type the LONGREAL feature can deal with
实现了 CFeatures.
在文件 StringFeatures.cpp 第 1801 行定义.
|
inherited |
get string for selected example num
possible with subset
num | index of the string |
在文件 StringFeatures.cpp 第 223 行定义.
|
inherited |
get feature vector for sample num
possible with subset
num | index of feature vector |
len | length is returned by reference |
dofree | whether returned vector must be freed by caller via free_feature_vector |
在文件 StringFeatures.cpp 第 275 行定义.
|
inherited |
|
virtualinherited |
get_features
not possible with subset
num_str | number of strings (returned) |
max_str_len | maximal string length (returned) |
在文件 StringFeatures.cpp 第 975 行定义.
|
virtualinherited |
get_features (swig compatible)
possible with subset
dst | string features (returned) |
num_str | number of strings (returned) |
在文件 StringFeatures.cpp 第 1007 行定义.
|
inherited |
|
inherited |
|
inherited |
|
virtualinherited |
|
protected |
get next line from file
The returned line may be modfied in case the file was opened read/write. It is otherwise read-only.
len | length of line (returned via reference) |
offs | offset to be passed for reading next line, should be 0 initially (returned via reference) |
line_nr | used to indicate errors (returned as reference should be 0 initially) |
file_length | total length of the file (for error checking) |
在文件 StringFileFeatures.cpp 第 23 行定义.
|
inherited |
a higher order mapped symbol will be shaped such that the symbols specified by bits in the mask will be returned.
symbol | symbol to mask |
mask | mask to apply |
在文件 StringFeatures.cpp 第 433 行定义.
|
inherited |
在文件 StringFeatures.cpp 第 1806 行定义.
在文件 StringFeatures.cpp 第 1810 行定义.
在文件 StringFeatures.cpp 第 1814 行定义.
|
inherited |
在文件 StringFeatures.cpp 第 1818 行定义.
|
inherited |
get maximum number of symbols
Note: floatmax_t sounds weird, but int64_t is not long enough (and there is no int128_t type)
在文件 StringFeatures.cpp 第 427 行定义.
|
virtualinherited |
get maximum vector length
this one is updated when a subset is set
在文件 StringFeatures.cpp 第 415 行定义.
|
inherited |
在文件 SGObject.cpp 第 1066 行定义.
|
inherited |
Returns description of a given parameter string, if it exists. SG_ERROR otherwise
param_name | name of the parameter |
在文件 SGObject.cpp 第 1090 行定义.
|
inherited |
Returns index of model selection parameter with provided index
param_name | name of model selection parameter |
在文件 SGObject.cpp 第 1103 行定义.
|
virtual |
Returns the name of the SGSerializable instance.
在文件 StringFileFeatures.h 第 59 行定义.
|
inherited |
|
inherited |
|
inherited |
get number of symbols
Note: floatmax_t sounds weird, but LONG is not long enough
在文件 StringFeatures.cpp 第 425 行定义.
|
virtualinherited |
|
inherited |
|
inherited |
number of symbols before higher order mapping
在文件 StringFeatures.cpp 第 429 行定义.
|
inherited |
|
virtualinherited |
|
inherited |
get a transposed copy of the features
possible with subset
在文件 StringFeatures.cpp 第 313 行定义.
|
inherited |
compute and return the transpose of string features matrix which will be prepocessed. num_feat, num_vectors are returned by reference caller has to clean up
note that strings all have to have same length
possible with subset
num_feat | number of features in matrix |
num_vec | number of vectors in matrix |
在文件 StringFeatures.cpp 第 326 行定义.
|
virtualinherited |
get vector length
possible with subset
vec_num | which vector, possibly from subset |
在文件 StringFeatures.cpp 第 404 行定义.
|
staticinherited |
get a zero terminated copy of the string
str | the string to copy |
note that this function is only sensible for character strings
在文件 StringFeatures.cpp 第 1436 行定义.
|
inherited |
check if features have given property
p | feature property |
在文件 Features.cpp 第 292 行定义.
|
inherited |
check if length of each vector in this feature object equals the given length. if existant, only subset is checked
possible for subset
len | vector length to check against |
在文件 StringFeatures.cpp 第 1294 行定义.
|
virtualinherited |
If the SGSerializable is a class template then TRUE will be returned and GENERIC is set to the type of the generic.
generic | set to the type of the generic if returning TRUE |
在文件 SGObject.cpp 第 234 行定义.
|
inherited |
get whether specified preprocessor was already applied
num | index of preprocessor in list |
在文件 Features.cpp 第 146 行定义.
|
inherited |
list feature object
在文件 Features.cpp 第 168 行定义.
|
inherited |
print preprocessors
在文件 Features.cpp 第 128 行定义.
|
virtualinherited |
|
inherited |
maps all parameters of this instance to the provided file version and loads all parameter data from the file into an array, which is sorted (basically calls load_file_parameter(...) for all parameters and puts all results into a sorted array)
file_version | parameter version of the file |
current_version | version from which mapping begins (you want to use Version::get_version_parameter() for this in most cases) |
file | file to load from |
prefix | prefix for members |
在文件 SGObject.cpp 第 639 行定义.
|
inherited |
load ascii line-based string features from file.
any subset is removed before
fname | filename to load from |
remap_to_bin | if translation to other binary alphabet should be performed |
ascii_alphabet | src alphabet |
binary_alphabet | alphabet to translate to |
在文件 StringFeatures.cpp 第 451 行定义.
|
virtualinherited |
load compressed features from file
any subset is removed before
src | filename to load from |
decompress | whether to decompress on loading |
在文件 StringFeatures.cpp 第 1015 行定义.
|
inherited |
load fasta file as string features
any subset is removed before
fname | filename to load from |
ignore_invalid | if set to true, characters other than A,C,G,T are converted to A |
在文件 StringFeatures.cpp 第 584 行定义.
|
inherited |
load fastq file as string features
removes subset beforehand
fname | filename to load from |
ignore_invalid | if set to true, characters other than A,C,G,T are converted to A |
bitremap_in_single_string | if set to true, do binary embedding of symbols |
在文件 StringFeatures.cpp 第 677 行定义.
|
inherited |
loads some specified parameters from a file with a specified version The provided parameter info has a version which is recursively mapped until the file parameter version is reached. Note that there may be possibly multiple parameters in the mapping, therefore, a set of TParameter instances is returned
param_info | information of parameter |
file_version | parameter version of the file, must be <= provided parameter version |
file | file to load from |
prefix | prefix for members |
在文件 SGObject.cpp 第 480 行定义.
|
inherited |
load features from directory
*removes subset before
dirname | directory name to load from |
在文件 StringFeatures.cpp 第 778 行定义.
|
virtualinherited |
Load this object from file. If it will fail (returning FALSE) then this object will contain inconsistent data and should not be used!
file | where to load from |
prefix | prefix for members |
param_version | (optional) a parameter version different to (this is mainly for testing, better do not use) |
在文件 SGObject.cpp 第 311 行定义.
|
protectedvirtualinherited |
Can (optionally) be overridden to post-initialize some member variables which are not PARAMETER::ADD'ed. Make sure that at first the overridden method BASE_CLASS::LOAD_SERIALIZABLE_POST is called.
ShogunException | Will be thrown if an error occurres. |
被 CWeightedDegreePositionStringKernel, CKernel, CList, CAlphabet, CLinearHMM, CGaussianKernel, CInverseMultiQuadricKernel, CCircularKernel , 以及 CExponentialKernel 重载.
在文件 SGObject.cpp 第 995 行定义.
|
protectedvirtualinherited |
Can (optionally) be overridden to pre-initialize some member variables which are not PARAMETER::ADD'ed. Make sure that at first the overridden method BASE_CLASS::LOAD_SERIALIZABLE_PRE is called.
ShogunException | Will be thrown if an error occurres. |
被 CDynamicArray< T >, CDynamicArray< float64_t >, CDynamicArray< float32_t >, CDynamicArray< int32_t >, CDynamicArray< char >, CDynamicArray< bool > , 以及 CDynamicObjectArray 重载.
在文件 SGObject.cpp 第 990 行定义.
|
inherited |
Takes a set of TParameter instances (base) with a certain version and a set of target parameter infos and recursively maps the base level wise to the current version using CSGObject::migrate(...). The base is replaced. After this call, the base version containing parameters should be of same version/type as the initial target parameter infos. Note for this to work, the migrate methods and all the internal parameter mappings have to match
param_base | set of TParameter instances that are mapped to the provided target parameter infos |
base_version | version of the parameter base |
target_param_infos | set of SGParamInfo instances that specify the target parameter base |
在文件 SGObject.cpp 第 677 行定义.
|
protectedvirtualinherited |
creates a new TParameter instance, which contains migrated data from the version that is provided. The provided parameter data base is used for migration, this base is a collection of all parameter data of the previous version. Migration is done FROM the data in param_base TO the provided param info Migration is always one version step. Method has to be implemented in subclasses, if no match is found, base method has to be called.
If there is an element in the param_base which equals the target, a copy of the element is returned. This represents the case when nothing has changed and therefore, the migrate method is not overloaded in a subclass
param_base | set of TParameter instances to use for migration |
target | parameter info for the resulting TParameter |
在文件 SGObject.cpp 第 884 行定义.
|
inherited |
extracts windows of size window_size from first string using the positions in list
not implemented for subset
window_size | window size |
positions | positions |
skip | skip |
在文件 StringFeatures.cpp 第 1231 行定义.
|
inherited |
slides a window of size window_size over the current single string step_size is the amount by which the window is shifted. creates (string_len-window_size)/step_size many feature obj if skip is nonzero, skip the first 'skip' characters of each string
not implemented for subset
window_size | window size |
step_size | step size |
skip | skip |
在文件 StringFeatures.cpp 第 1194 行定义.
|
inherited |
obtain string features from char features
wrapper for template method
any subset is removed before, subset of parameter sf is possible
sf | string features |
start | start |
p_order | order |
gap | gap |
rev | reverse |
在文件 StringFeatures.cpp 第 1289 行定义.
|
inherited |
template obtain from char features
any subset is removed before, subset of parameter sf is possible
sf | string features |
start | start |
p_order | order |
gap | gap |
rev | reverse |
在文件 StringFeatures.cpp 第 1970 行定义.
|
inherited |
在文件 StringFeatures.cpp 第 1858 行定义.
|
inherited |
在文件 StringFeatures.cpp 第 1862 行定义.
|
inherited |
在文件 StringFeatures.cpp 第 1866 行定义.
|
protectedvirtualinherited |
This method prepares everything for a one-to-one parameter migration. One to one here means that only ONE element of the parameter base is needed for the migration (the one with the same name as the target). Data is allocated for the target (in the type as provided in the target SGParamInfo), and a corresponding new TParameter instance is written to replacement. The to_migrate pointer points to the single needed TParameter instance needed for migration. If a name change happened, the old name may be specified by old_name. In addition, the m_delete_data flag of to_migrate is set to true. So if you want to migrate data, the only thing to do after this call is converting the data in the m_parameter fields. If unsure how to use - have a look into an example for this. (base_migration_type_conversion.cpp for example)
param_base | set of TParameter instances to use for migration |
target | parameter info for the resulting TParameter |
replacement | (used as output) here the TParameter instance which is returned by migration is created into |
to_migrate | the only source that is used for migration |
old_name | with this parameter, a name change may be specified |
在文件 SGObject.cpp 第 824 行定义.
|
virtualinherited |
在文件 SGObject.cpp 第 200 行定义.
|
inherited |
prints all parameter registered for model selection and their type
在文件 SGObject.cpp 第 1042 行定义.
|
virtualinherited |
|
virtualinherited |
removes all subsets Calls subset_changed_post() afterwards
被 CCombinedFeatures 重载.
在文件 Features.cpp 第 319 行定义.
|
virtualinherited |
removes that last added subset from subset stack, if existing Calls subset_changed_post() afterwards
被 CCombinedFeatures 重载.
在文件 Features.cpp 第 313 行定义.
|
virtualinherited |
in case there is a feature matrix allow for reshaping
NOT IMPLEMENTED!
num_features | new number of features |
num_vectors | new number of vectors |
被 CDenseFeatures< ST >, CDenseFeatures< uint32_t >, CDenseFeatures< float64_t >, CDenseFeatures< T > , 以及 CDenseFeatures< uint16_t > 重载.
在文件 Features.cpp 第 162 行定义.
|
virtualinherited |
save features to file
not possible with subset
writer | File object via which to save data |
重载 CFeatures .
|
virtualinherited |
save compressed features to file
not possible with subset
dest | filename to save to |
compression | compressor to use |
level | compression level to use (1-9) |
在文件 StringFeatures.cpp 第 1110 行定义.
|
virtualinherited |
Save this object to file.
file | where to save the object; will be closed during returning if PREFIX is an empty string. |
prefix | prefix for members |
param_version | (optional) a parameter version different to (this is mainly for testing, better do not use) |
在文件 SGObject.cpp 第 252 行定义.
|
protectedvirtualinherited |
Can (optionally) be overridden to post-initialize some member variables which are not PARAMETER::ADD'ed. Make sure that at first the overridden method BASE_CLASS::SAVE_SERIALIZABLE_POST is called.
ShogunException | Will be thrown if an error occurres. |
被 CKernel 重载.
在文件 SGObject.cpp 第 1005 行定义.
|
protectedvirtualinherited |
Can (optionally) be overridden to pre-initialize some member variables which are not PARAMETER::ADD'ed. Make sure that at first the overridden method BASE_CLASS::SAVE_SERIALIZABLE_PRE is called.
ShogunException | Will be thrown if an error occurres. |
被 CKernel, CDynamicArray< T >, CDynamicArray< float64_t >, CDynamicArray< float32_t >, CDynamicArray< int32_t >, CDynamicArray< char >, CDynamicArray< bool > , 以及 CDynamicObjectArray 重载.
在文件 SGObject.cpp 第 1000 行定义.
|
inherited |
set string for selected example num
not possible with subset
vector | string to set |
num | index of the string |
在文件 StringFeatures.cpp 第 241 行定义.
|
virtualinherited |
set feature vector for sample num
possible with subset
num | index of feature vector |
string | string with the feature vector's content |
len | length of the string |
在文件 StringFeatures.cpp 第 1445 行定义.
|
inherited |
|
inherited |
set features
not possible with subset
p_features | new features |
p_num_vectors | number of vectors |
p_max_string_length | maximum string length |
在文件 StringFeatures.cpp 第 853 行定义.
|
inherited |
在文件 SGObject.cpp 第 41 行定义.
|
inherited |
在文件 SGObject.cpp 第 46 行定义.
|
inherited |
在文件 SGObject.cpp 第 51 行定义.
|
inherited |
在文件 SGObject.cpp 第 56 行定义.
|
inherited |
在文件 SGObject.cpp 第 61 行定义.
|
inherited |
在文件 SGObject.cpp 第 66 行定义.
|
inherited |
在文件 SGObject.cpp 第 71 行定义.
|
inherited |
在文件 SGObject.cpp 第 76 行定义.
|
inherited |
在文件 SGObject.cpp 第 81 行定义.
|
inherited |
在文件 SGObject.cpp 第 86 行定义.
|
inherited |
在文件 SGObject.cpp 第 91 行定义.
|
inherited |
在文件 SGObject.cpp 第 96 行定义.
|
inherited |
在文件 SGObject.cpp 第 101 行定义.
|
inherited |
在文件 SGObject.cpp 第 106 行定义.
|
inherited |
在文件 SGObject.cpp 第 111 行定义.
|
inherited |
set generic type to T
|
inherited |
|
inherited |
|
inherited |
|
inherited |
|
inherited |
|
virtualinherited |
A shallow copy. All the SGObject instance variables will be simply assigned and SG_REF-ed.
被 CGaussianKernel 重载.
在文件 SGObject.h 第 117 行定义.
|
inherited |
shift offset to the left by amount
offset | offset to shift |
amount | amount to shift the offset |
在文件 StringFeatures.cpp 第 439 行定义.
|
inherited |
在文件 StringFeatures.cpp 第 1823 行定义.
在文件 StringFeatures.cpp 第 1827 行定义.
在文件 StringFeatures.cpp 第 1831 行定义.
|
inherited |
在文件 StringFeatures.cpp 第 1835 行定义.
|
inherited |
shift symbol to the right by amount (taking care of custom symbol sizes)
symbol | symbol to shift |
amount | amount to shift the symbol |
在文件 StringFeatures.cpp 第 445 行定义.
|
inherited |
在文件 StringFeatures.cpp 第 1840 行定义.
在文件 StringFeatures.cpp 第 1844 行定义.
在文件 StringFeatures.cpp 第 1848 行定义.
|
inherited |
在文件 StringFeatures.cpp 第 1852 行定义.
|
virtualinherited |
|
inherited |
remap bit-based word to character sequence
word | word to remap |
seq | sequence of size len that remapped characters are written to |
len | length of sequence and word |
在文件 StringFeatures.cpp 第 1395 行定义.
|
inherited |
在文件 StringFeatures.cpp 第 1905 行定义.
|
inherited |
在文件 StringFeatures.cpp 第 1908 行定义.
|
inherited |
在文件 StringFeatures.cpp 第 1911 行定义.
|
inherited |
unset generic type
this has to be called in classes specializing a template class
在文件 SGObject.cpp 第 241 行定义.
|
inherited |
|
virtualinherited |
Updates the hash of current parameter combination
在文件 SGObject.cpp 第 187 行定义.
|
protectedinherited |
alphabet
在文件 StringFeatures.h 第 671 行定义.
|
protectedinherited |
feature cache
在文件 StringFeatures.h 第 707 行定义.
|
protectedinherited |
this contains the array of features
在文件 StringFeatures.h 第 677 行定义.
|
protected |
memory mapped file
在文件 StringFileFeatures.h 第 92 行定义.
|
inherited |
io
在文件 SGObject.h 第 473 行定义.
|
protectedinherited |
length of prior single string
在文件 StringFeatures.h 第 683 行定义.
|
inherited |
parameters wrt which we can compute gradients
在文件 SGObject.h 第 488 行定义.
|
inherited |
Hash of parameter values
在文件 SGObject.h 第 494 行定义.
|
inherited |
model selection parameters
在文件 SGObject.h 第 485 行定义.
|
inherited |
map for different parameter versions
在文件 SGObject.h 第 491 行定义.
|
inherited |
parameters
在文件 SGObject.h 第 482 行定义.
|
protectedinherited |
subset used for index transformations
在文件 Features.h 第 302 行定义.
|
protectedinherited |
length of longest string (for subset, is updated)
在文件 StringFeatures.h 第 686 行定义.
|
protectedinherited |
number of used symbols
在文件 StringFeatures.h 第 689 行定义.
|
protectedinherited |
number of string vectors (for subset, is updated)
在文件 StringFeatures.h 第 674 行定义.
|
protectedinherited |
order used in higher order mapping
在文件 StringFeatures.h 第 695 行定义.
|
protectedinherited |
original number of used symbols (before higher order mapping)
在文件 StringFeatures.h 第 692 行定义.
|
inherited |
parallel
在文件 SGObject.h 第 476 行定义.
|
protectedinherited |
preprocess on-the-fly?
在文件 StringFeatures.h 第 704 行定义.
|
protectedinherited |
true when single string / created by sliding window
在文件 StringFeatures.h 第 680 行定义.
|
protectedinherited |
order used in higher order mapping
在文件 StringFeatures.h 第 698 行定义.
|
protectedinherited |
order used in higher order mapping
在文件 StringFeatures.h 第 701 行定义.
|
inherited |
version
在文件 SGObject.h 第 479 行定义.