Index

A | B | C | D | E | F | G | H | I | K | L | M | N | P | R | S | T | U | W

A

alphanum_key() (in module webstruct.utils)
avg_bio_f1_score() (in module webstruct.metrics)

B

BestMatch (class in webstruct.utils)
bio_classification_report() (in module webstruct.metrics)
block_length() (in module webstruct.features.block_features)
borders() (in module webstruct.features.block_features)
build_entity() (webstruct.model.NER method)

C

choose_best_clustering() (in module webstruct.grouping)
classify() (webstruct.sequence_encoding.InputTokenProcessor method)
CombinedFeatures (class in webstruct.features.utils)
copy() (webstruct.features.utils.CombinedFeatures method)
create_wapiti_pipeline() (in module webstruct.wapiti)

D

default_clustering_score() (in module webstruct.grouping)
DefaultTokenizer (class in webstruct.tokenizers)
detokenize_single() (webstruct.feature_extraction.HtmlTokenizer method)

E

encode() (webstruct.sequence_encoding.IobEncoder method)
encode_split() (webstruct.sequence_encoding.IobEncoder method)
extract() (webstruct.model.NER method)
extract_from_url() (webstruct.model.NER method)
extract_groups() (webstruct.model.NER method)
extract_raw() (webstruct.model.NER method)

F

find_ranges() (webstruct.utils.BestMatch method)
fit() (webstruct.feature_extraction.HtmlFeatureExtractor method)
(webstruct.wapiti.WapitiCRF method)
(webstruct.wapiti.WapitiFeatureEncoder method)
fit_transform() (webstruct.feature_extraction.HtmlFeatureExtractor method)
flatten() (in module webstruct.utils)

G

GateLoader (class in webstruct.loaders)
get_combined_keys() (in module webstruct.utils)
get_sorted_ranges() (webstruct.utils.BestMatch method)
(webstruct.utils.LongestMatch method)
group() (webstruct.sequence_encoding.IobEncoder class method)

H

html_document_fromstring() (in module webstruct.utils)
HtmlFeatureExtractor (class in webstruct.feature_extraction)
HtmlLoader (class in webstruct.loaders)
HtmlToken (class in webstruct.feature_extraction)
HtmlTokenizer (class in webstruct.feature_extraction)
human_sorted() (in module webstruct.utils)

I

InputTokenProcessor (class in webstruct.sequence_encoding)
InsideTag (class in webstruct.features.block_features)
IobEncoder (class in webstruct.sequence_encoding)
iter_encode() (webstruct.sequence_encoding.IobEncoder method)
iter_group() (webstruct.sequence_encoding.IobEncoder class method)

K

kill_html_tags() (in module webstruct.utils)

L

load() (webstruct.loaders.GateLoader method)
(webstruct.loaders.HtmlLoader method)
(webstruct.loaders.WebAnnotatorLoader method)
load_trees() (in module webstruct.loaders)
load_trees_from_files() (in module webstruct.loaders)
loadbytes() (webstruct.loaders.GateLoader method)
(webstruct.loaders.HtmlLoader method)
(webstruct.loaders.WebAnnotatorLoader method)
LongestMatch (class in webstruct.utils)
LongestMatchGlobalFeature (class in webstruct.features.utils)
looks_like_email() (in module webstruct.features.data_features)
looks_like_month() (in module webstruct.features.data_features)
looks_like_range() (in module webstruct.features.data_features)
looks_like_street_part() (in module webstruct.features.data_features)
looks_like_time() (in module webstruct.features.data_features)
looks_like_weekday() (in module webstruct.features.data_features)
looks_like_year() (in module webstruct.features.data_features)

M

MarisaGeonamesGlobalFeature (class in webstruct.gazetteers.features)
merge_dicts() (in module webstruct.utils)

N

NER (class in webstruct.model)
number_pattern() (in module webstruct.features.token_features)

P

parent_tag() (in module webstruct.features.block_features)
partial_fit() (webstruct.wapiti.WapitiFeatureEncoder method)
prefixes_and_suffixes() (in module webstruct.features.token_features)
PrefixFeatures (class in webstruct.features.token_features)
prepare_template() (webstruct.wapiti.WapitiFeatureEncoder method)
prepare_wapiti_template() (in module webstruct.wapiti)
process_range() (webstruct.features.utils.LongestMatchGlobalFeature method)

R

read_geonames() (in module webstruct.gazetteers.geonames)
read_geonames_zipped() (in module webstruct.gazetteers.geonames)
replace_html_tags() (in module webstruct.utils)
reset() (webstruct.sequence_encoding.IobEncoder method)
(webstruct.wapiti.WapitiFeatureEncoder method)
run_command() (in module webstruct.utils)
run_wapiti() (webstruct.wapiti.WapitiCRF method)

S

score() (webstruct.wapiti.WapitiCRF method)
smart_join() (in module webstruct.utils)
substrings() (in module webstruct.utils)
SuffixFeatures (class in webstruct.features.token_features)

T

to_marisa() (in module webstruct.gazetteers.geonames)
token_endswith_colon() (in module webstruct.features.token_features)
token_endswith_dot() (in module webstruct.features.token_features)
token_has_copyright() (in module webstruct.features.token_features)
token_identity() (in module webstruct.features.token_features)
token_lower() (in module webstruct.features.token_features)
token_shape() (in module webstruct.features.token_features)
tokenize() (in module webstruct.tokenizers)
(webstruct.feature_extraction.HtmlTokenizer method)
(webstruct.tokenizers.DefaultTokenizer method)
(webstruct.tokenizers.WordTokenizer method)
tokenize_single() (webstruct.feature_extraction.HtmlTokenizer method)
tostr() (in module webstruct.utils)
transform() (webstruct.feature_extraction.HtmlFeatureExtractor method)
(webstruct.wapiti.WapitiCRF method)
(webstruct.wapiti.WapitiFeatureEncoder method)
transform_single() (webstruct.feature_extraction.HtmlFeatureExtractor method)
(webstruct.wapiti.WapitiFeatureEncoder method)

U

unigram_features_template() (webstruct.wapiti.WapitiFeatureEncoder method)

W

WAPITI_CMD (webstruct.wapiti.WapitiCRF attribute)
WapitiCRF (class in webstruct.wapiti)
WapitiFeatureEncoder (class in webstruct.wapiti)
WebAnnotatorLoader (class in webstruct.loaders)
webstruct.feature_extraction (module)
webstruct.features (module)
webstruct.features.block_features (module)
webstruct.features.data_features (module)
webstruct.features.token_features (module)
webstruct.features.utils (module)
webstruct.gazetteers.features (module)
webstruct.gazetteers.geonames (module)
webstruct.grouping (module)
webstruct.loaders (module)
webstruct.metrics (module)
webstruct.model (module)
webstruct.sequence_encoding (module)
webstruct.tokenizers (module)
webstruct.utils (module)
webstruct.wapiti (module)
WordTokenizer (class in webstruct.tokenizers)