Source code for webstruct.gazetteers.features

# -*- coding: utf-8 -*-
from __future__ import absolute_import
from webstruct.gazetteers.geonames import GAZETTEER_FORMAT
from webstruct.features.global_features import LongestMatchGlobalFeature

[docs]class MarisaGeonamesGlobalFeature(LongestMatchGlobalFeature): """ Global feature that matches longest entities from a lexicon extracted from and stored in a MARISA Trie. """ def __init__(self, filename, featname, format=None): import marisa_trie self.filename = filename = marisa_trie.RecordTrie(format or GAZETTEER_FORMAT) super(MarisaGeonamesGlobalFeature, self).__init__(, featname)
# TODO: add features that'd allow to check entities for compatibility. # For example, that detected entites are from the same US state.