Source code for nlp_architect.data.cdc_resources.relations.computed_relation_extraction
# ******************************************************************************
# Copyright 2017-2018 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
import difflib
import logging
import sys
from typing import List, Set
from nlp_architect.common.cdc.mention_data import MentionDataLight
from nlp_architect.data.cdc_resources.relations.relation_extraction import RelationExtraction
from nlp_architect.data.cdc_resources.relations.relation_types_enums import RelationType
from nlp_architect.utils.string_utils import StringUtils
logger = logging.getLogger(__name__)
[docs]class ComputedRelationExtraction(RelationExtraction):
"""
Extract Relation between two mentions according to computation and rule based algorithms
"""
[docs] def extract_all_relations(
self, mention_x: MentionDataLight, mention_y: MentionDataLight
) -> Set[RelationType]:
"""
Try to find if mentions has anyone or more of the relations this class support
Args:
mention_x: MentionDataLight
mention_y: MentionDataLight
Returns:
Set[RelationType]: One or more of: RelationType.EXACT_STRING, RelationType.FUZZY_FIT,
RelationType.FUZZY_HEAD_FIT, RelationType.SAME_HEAD_LEMMA,
RelationType.SAME_HEAD_LEMMA_RELAX
"""
relations = set()
mention_x_str = mention_x.tokens_str
mention_y_str = mention_y.tokens_str
if StringUtils.is_pronoun(mention_x_str.lower()) or StringUtils.is_pronoun(
mention_y_str.lower()
):
relations.add(RelationType.NO_RELATION_FOUND)
return relations
exact_rel = self.extract_exact_string(mention_x, mention_y)
fuzzy_rel = self.extract_fuzzy_fit(mention_x, mention_y)
fuzzy_head_rel = self.extract_fuzzy_head_fit(mention_x, mention_y)
same_head_rel = self.extract_same_head_lemma(mention_x, mention_y)
if exact_rel != RelationType.NO_RELATION_FOUND:
relations.add(exact_rel)
if fuzzy_rel != RelationType.NO_RELATION_FOUND:
relations.add(fuzzy_rel)
if fuzzy_head_rel != RelationType.NO_RELATION_FOUND:
relations.add(fuzzy_head_rel)
if same_head_rel != RelationType.NO_RELATION_FOUND:
relations.add(same_head_rel)
if len(relations) == 0:
relations.add(RelationType.NO_RELATION_FOUND)
return relations
[docs] def extract_sub_relations(
self, mention_x: MentionDataLight, mention_y: MentionDataLight, relation: RelationType
) -> RelationType:
"""
Check if input mentions has the given relation between them
Args:
mention_x: MentionDataLight
mention_y: MentionDataLight
relation: RelationType
Returns:
RelationType: relation in case mentions has given relation or
RelationType.NO_RELATION_FOUND otherwise
"""
mention_x_str = mention_x.tokens_str
mention_y_str = mention_y.tokens_str
if StringUtils.is_pronoun(mention_x_str.lower()) or StringUtils.is_pronoun(
mention_y_str.lower()
):
return RelationType.NO_RELATION_FOUND
if relation == RelationType.EXACT_STRING:
return self.extract_exact_string(mention_x, mention_y)
if relation == RelationType.FUZZY_FIT:
return self.extract_fuzzy_fit(mention_x, mention_y)
if relation == RelationType.FUZZY_HEAD_FIT:
return self.extract_fuzzy_head_fit(mention_x, mention_y)
if relation == RelationType.SAME_HEAD_LEMMA:
is_same_lemma = self.extract_same_head_lemma(mention_x, mention_y)
if is_same_lemma != RelationType.NO_RELATION_FOUND:
return relation
return RelationType.NO_RELATION_FOUND
[docs] @staticmethod
def extract_same_head_lemma(
mention_x: MentionDataLight, mention_y: MentionDataLight
) -> RelationType:
"""
Check if input mentions has same head lemma relation
Args:
mention_x: MentionDataLight
mention_y: MentionDataLight
Returns:
RelationType.SAME_HEAD_LEMMA or RelationType.NO_RELATION_FOUND
"""
if (
StringUtils.is_preposition(mention_x.mention_head_lemma)
or StringUtils.is_preposition(mention_y.mention_head_lemma)
or StringUtils.is_determiner(mention_x.mention_head_lemma)
or StringUtils.is_determiner(mention_y.mention_head_lemma)
):
return RelationType.NO_RELATION_FOUND
if mention_x.mention_head_lemma == mention_y.mention_head_lemma:
return RelationType.SAME_HEAD_LEMMA
return RelationType.NO_RELATION_FOUND
[docs] @staticmethod
def extract_fuzzy_head_fit(
mention_x: MentionDataLight, mention_y: MentionDataLight
) -> RelationType:
"""
Check if input mentions has fuzzy head fit relation
Args:
mention_x: MentionDataLight
mention_y: MentionDataLight
Returns:
RelationType.FUZZY_HEAD_FIT or RelationType.NO_RELATION_FOUND
"""
if StringUtils.is_preposition(
mention_x.mention_head_lemma.lower()
) or StringUtils.is_preposition(mention_y.mention_head_lemma.lower()):
return RelationType.NO_RELATION_FOUND
mention_y_tokens = mention_y.tokens_str.split()
mention_x_tokens = mention_x.tokens_str.split()
if mention_x.mention_head in mention_y_tokens or mention_y.mention_head in mention_x_tokens:
return RelationType.FUZZY_HEAD_FIT
return RelationType.NO_RELATION_FOUND
[docs] @staticmethod
def extract_fuzzy_fit(mention_x: MentionDataLight, mention_y: MentionDataLight) -> RelationType:
"""
Check if input mentions has fuzzy fit relation
Args:
mention_x: MentionDataLight
mention_y: MentionDataLight
Returns:
RelationType.FUZZY_FIT or RelationType.NO_RELATION_FOUND
"""
try:
from num2words import num2words
except (AttributeError, ImportError):
logger.error(
"num2words is not installed, please install nlp_architect with [all] package. "
+ "for example: pip install nlp_architect[all]"
)
sys.exit()
relation = RelationType.NO_RELATION_FOUND
mention1_str = mention_x.tokens_str
mention2_str = mention_y.tokens_str
if difflib.SequenceMatcher(None, mention1_str, mention2_str).ratio() * 100 >= 90:
relation = RelationType.FUZZY_FIT
return relation
# Convert numbers to words
x_words = [
num2words(int(w)).replace("-", " ") if w.isdigit() else w for w in mention1_str.split()
]
y_words = [
num2words(int(w)).replace("-", " ") if w.isdigit() else w for w in mention2_str.split()
]
fuzzy_result = (
difflib.SequenceMatcher(None, " ".join(x_words), " ".join(y_words)).ratio() * 100 >= 85
)
if fuzzy_result:
relation = RelationType.FUZZY_FIT
return relation
[docs] @staticmethod
def extract_exact_string(
mention_x: MentionDataLight, mention_y: MentionDataLight
) -> RelationType:
"""
Check if input mentions has exact string relation
Args:
mention_x: MentionDataLight
mention_y: MentionDataLight
Returns:
RelationType.EXACT_STRING or RelationType.NO_RELATION_FOUND
"""
relation = RelationType.NO_RELATION_FOUND
mention1_str = mention_x.tokens_str
mention2_str = mention_y.tokens_str
if StringUtils.is_preposition(mention1_str.lower()) or StringUtils.is_preposition(
mention2_str.lower()
):
return relation
if mention1_str.lower() == mention2_str.lower():
relation = RelationType.EXACT_STRING
return relation
[docs] @staticmethod
def get_supported_relations() -> List[RelationType]:
"""
Return all supported relations by this class
Returns:
List[RelationType]
"""
return [
RelationType.EXACT_STRING,
RelationType.FUZZY_FIT,
RelationType.FUZZY_HEAD_FIT,
RelationType.SAME_HEAD_LEMMA,
]