Source code for nlp_architect.data.cdc_resources.relations.verbocean_relation_extraction

# ******************************************************************************
# Copyright 2017-2018 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************

import logging
import os
from typing import Dict, Set

from nlp_architect.common.cdc.mention_data import MentionDataLight
from nlp_architect.data.cdc_resources.relations.relation_extraction import RelationExtraction
from nlp_architect.data.cdc_resources.relations.relation_types_enums import (
    RelationType,
    OnlineOROfflineMethod,
)
from nlp_architect.utils.io import load_json_file
from nlp_architect.utils.string_utils import StringUtils

logger = logging.getLogger(__name__)


[docs]class VerboceanRelationExtraction(RelationExtraction): def __init__( self, method: OnlineOROfflineMethod = OnlineOROfflineMethod.ONLINE, vo_file: str = None ): """ Extract Relation between two mentions according to VerbOcean knowledge Args: method (optional): OnlineOROfflineMethod.{ONLINE/OFFLINE} run against full VerbOcean or a sub-set of it (default = ONLINE) vo_file (required): str Location of VerbOcean file to work with """ logger.info("Loading Verb Ocean module") if vo_file is not None and os.path.isfile(vo_file): if method == OnlineOROfflineMethod.OFFLINE: self.vo = load_json_file(vo_file) elif method == OnlineOROfflineMethod.ONLINE: self.vo = self.load_verbocean_file(vo_file) logger.info("Verb Ocean module lead successfully") else: raise FileNotFoundError("VerbOcean file not found or not in path..") super(VerboceanRelationExtraction, self).__init__()
[docs] def extract_all_relations( self, mention_x: MentionDataLight, mention_y: MentionDataLight ) -> Set[RelationType]: ret_ = set() ret_.add(self.extract_sub_relations(mention_x, mention_y, RelationType.VERBOCEAN_MATCH)) return ret_
[docs] def extract_sub_relations( self, mention_x: MentionDataLight, mention_y: MentionDataLight, relation: RelationType ) -> RelationType: """ Check if input mentions has the given relation between them Args: mention_x: MentionDataLight mention_y: MentionDataLight relation: RelationType Returns: RelationType: relation in case mentions has given relation or RelationType.NO_RELATION_FOUND otherwise """ if relation is not RelationType.VERBOCEAN_MATCH: return RelationType.NO_RELATION_FOUND mention_x_str = mention_x.tokens_str mention_y_str = mention_y.tokens_str if StringUtils.is_pronoun(mention_x_str.lower()) or StringUtils.is_pronoun( mention_y_str.lower() ): return RelationType.NO_RELATION_FOUND if self.is_verbocean_relation(mention_x, mention_y): return RelationType.VERBOCEAN_MATCH return RelationType.NO_RELATION_FOUND
[docs] def is_verbocean_relation( self, mention_x: MentionDataLight, mention_y: MentionDataLight ) -> bool: """ Check if input mentions has VerbOcean relation between them Args: mention_x: MentionDataLight mention_y: MentionDataLight Returns: bool """ x_head = mention_x.mention_head y_head = mention_y.mention_head rel = None if x_head in self.vo and y_head in self.vo[x_head]: rel = self.vo[x_head][y_head] elif y_head in self.vo and x_head in self.vo[y_head]: rel = self.vo[y_head][x_head] match_result = False if rel is not None and rel != "[unk]" and rel != "[low-vol]": match_result = True return match_result
[docs] @staticmethod def get_supported_relations(): """ Return all supported relations by this class Returns: List[RelationType] """ return [RelationType.VERBOCEAN_MATCH]
[docs] @staticmethod def load_verbocean_file(fname: str) -> Dict[str, Dict[str, str]]: """ Method to load referent dictionary to memory Returns: List[RelationType] """ word_dict = {} with open(fname) as f: for line in f: word1, rel, word2, _, _ = line.strip().split() if word1 not in word_dict: word_dict[word1] = {} word_dict[word1][word2] = rel return word_dict