Source code for nlp_architect.data.cdc_resources.relations.verbocean_relation_extraction
# ******************************************************************************
# Copyright 2017-2018 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
import logging
import os
from typing import Dict, Set
from nlp_architect.common.cdc.mention_data import MentionDataLight
from nlp_architect.data.cdc_resources.relations.relation_extraction import RelationExtraction
from nlp_architect.data.cdc_resources.relations.relation_types_enums import (
RelationType,
OnlineOROfflineMethod,
)
from nlp_architect.utils.io import load_json_file
from nlp_architect.utils.string_utils import StringUtils
logger = logging.getLogger(__name__)
[docs]class VerboceanRelationExtraction(RelationExtraction):
def __init__(
self, method: OnlineOROfflineMethod = OnlineOROfflineMethod.ONLINE, vo_file: str = None
):
"""
Extract Relation between two mentions according to VerbOcean knowledge
Args:
method (optional): OnlineOROfflineMethod.{ONLINE/OFFLINE} run against full VerbOcean or
a sub-set of it (default = ONLINE)
vo_file (required): str Location of VerbOcean file to work with
"""
logger.info("Loading Verb Ocean module")
if vo_file is not None and os.path.isfile(vo_file):
if method == OnlineOROfflineMethod.OFFLINE:
self.vo = load_json_file(vo_file)
elif method == OnlineOROfflineMethod.ONLINE:
self.vo = self.load_verbocean_file(vo_file)
logger.info("Verb Ocean module lead successfully")
else:
raise FileNotFoundError("VerbOcean file not found or not in path..")
super(VerboceanRelationExtraction, self).__init__()
[docs] def extract_all_relations(
self, mention_x: MentionDataLight, mention_y: MentionDataLight
) -> Set[RelationType]:
ret_ = set()
ret_.add(self.extract_sub_relations(mention_x, mention_y, RelationType.VERBOCEAN_MATCH))
return ret_
[docs] def extract_sub_relations(
self, mention_x: MentionDataLight, mention_y: MentionDataLight, relation: RelationType
) -> RelationType:
"""
Check if input mentions has the given relation between them
Args:
mention_x: MentionDataLight
mention_y: MentionDataLight
relation: RelationType
Returns:
RelationType: relation in case mentions has given relation or
RelationType.NO_RELATION_FOUND otherwise
"""
if relation is not RelationType.VERBOCEAN_MATCH:
return RelationType.NO_RELATION_FOUND
mention_x_str = mention_x.tokens_str
mention_y_str = mention_y.tokens_str
if StringUtils.is_pronoun(mention_x_str.lower()) or StringUtils.is_pronoun(
mention_y_str.lower()
):
return RelationType.NO_RELATION_FOUND
if self.is_verbocean_relation(mention_x, mention_y):
return RelationType.VERBOCEAN_MATCH
return RelationType.NO_RELATION_FOUND
[docs] def is_verbocean_relation(
self, mention_x: MentionDataLight, mention_y: MentionDataLight
) -> bool:
"""
Check if input mentions has VerbOcean relation between them
Args:
mention_x: MentionDataLight
mention_y: MentionDataLight
Returns:
bool
"""
x_head = mention_x.mention_head
y_head = mention_y.mention_head
rel = None
if x_head in self.vo and y_head in self.vo[x_head]:
rel = self.vo[x_head][y_head]
elif y_head in self.vo and x_head in self.vo[y_head]:
rel = self.vo[y_head][x_head]
match_result = False
if rel is not None and rel != "[unk]" and rel != "[low-vol]":
match_result = True
return match_result
[docs] @staticmethod
def get_supported_relations():
"""
Return all supported relations by this class
Returns:
List[RelationType]
"""
return [RelationType.VERBOCEAN_MATCH]
[docs] @staticmethod
def load_verbocean_file(fname: str) -> Dict[str, Dict[str, str]]:
"""
Method to load referent dictionary to memory
Returns:
List[RelationType]
"""
word_dict = {}
with open(fname) as f:
for line in f:
word1, rel, word2, _, _ = line.strip().split()
if word1 not in word_dict:
word_dict[word1] = {}
word_dict[word1][word2] = rel
return word_dict