Source code for textattack.transformations.word_swaps.word_swap_hownet

"""
Word Swap by OpenHowNet
-------------------------------
"""


import pickle

from textattack.shared import utils

from .word_swap import WordSwap


[docs]class WordSwapHowNet(WordSwap): """Transforms an input by replacing its words with synonyms in the stored synonyms bank generated by the OpenHowNet.""" PATH = "transformations/hownet" def __init__(self, max_candidates=-1, **kwargs): super().__init__(**kwargs) self.max_candidates = max_candidates # Download synonym candidates bank if they're not cached. cache_path = utils.download_from_s3( "{}/{}".format(WordSwapHowNet.PATH, "word_candidates_sense.pkl") ) # Actually load the files from disk. with open(cache_path, "rb") as fp: self.candidates_bank = pickle.load(fp) self.pos_dict = {"ADJ": "adj", "NOUN": "noun", "ADV": "adv", "VERB": "verb"} def _get_replacement_words(self, word, word_pos): """Returns a list of possible 'candidate words' to replace a word in a sentence or phrase. Based on nearest neighbors selected word embeddings. >>> from textattack.transformations import WordSwapHowNet >>> from textattack.augmentation import Augmenter >>> transformation = WordSwapHowNet() >>> augmenter = Augmenter(transformation=transformation) >>> s = 'I am fabulous.' >>> augmenter.augment(s) """ word_pos = self.pos_dict.get(word_pos, None) if word_pos is None: return [] try: candidate_words = self.candidates_bank[word.lower()][word_pos] if self.max_candidates > 0: candidate_words = candidate_words[: self.max_candidates] return [ recover_word_case(candidate_word, word) for candidate_word in candidate_words ] except KeyError: # This word is not in our synonym bank, so return an empty list. return [] def _get_transformations(self, current_text, indices_to_modify): transformed_texts = [] for i in indices_to_modify: word_to_replace = current_text.words[i] word_to_replace_pos = current_text.pos_of_word_index(i) replacement_words = self._get_replacement_words( word_to_replace, word_to_replace_pos ) transformed_texts_idx = [] for r in replacement_words: if r != word_to_replace and utils.is_one_word(r): transformed_texts_idx.append( current_text.replace_word_at_index(i, r) ) transformed_texts.extend(transformed_texts_idx) return transformed_texts
[docs] def extra_repr_keys(self): return ["max_candidates"]
[docs]def recover_word_case(word, reference_word): """Makes the case of `word` like the case of `reference_word`. Supports lowercase, UPPERCASE, and Capitalized. """ if reference_word.islower(): return word.lower() elif reference_word.isupper() and len(reference_word) > 1: return word.upper() elif reference_word[0].isupper() and reference_word[1:].islower(): return word.capitalize() else: # if other, just do not alter the word's case return word