Source code for textattack.transformations.word_swaps.word_swap_change_name

"""
Word Swap by Changing Name
-------------------------------
"""

from collections import defaultdict

import numpy as np

from textattack.shared.data import PERSON_NAMES

from .word_swap import WordSwap


[docs]class WordSwapChangeName(WordSwap):
    def __init__(
        self,
        num_name_replacements=3,
        first_only=False,
        last_only=False,
        confidence_score=0.7,
        language="en",
        consistent=False,
        **kwargs
    ):
        """Transforms an input by replacing names of recognized name entity.

        :param n: Number of new names to generate per name detected
        :param first_only: Whether to change first name only
        :param last_only: Whether to change last name only
        :param confidence_score: Name will only be changed when it's above confidence score
        :param consistent: Whether to change all instances of the same name to the same new name
        >>> from textattack.transformations import WordSwapChangeName
        >>> from textattack.augmentation import Augmenter

        >>> transformation = WordSwapChangeName()
        >>> augmenter = Augmenter(transformation=transformation)
        >>> s = 'I am John Smith.'
        >>> augmenter.augment(s)
        """
        super().__init__(**kwargs)
        self.num_name_replacements = num_name_replacements
        if first_only & last_only:
            raise ValueError("first_only and last_only cannot both be true")
        self.first_only = first_only
        self.last_only = last_only
        self.confidence_score = confidence_score
        self.language = language
        self.consistent = consistent

    def _get_transformations(self, current_text, indices_to_modify):
        transformed_texts = []
        if self.language == "en":
            model_name = "ner"
        elif self.language == "fra" or self.language == "french":
            model_name = "flair/ner-french"
        else:
            model_name = "flair/ner-multi-fast"

        if self.consistent:
            word_to_indices = defaultdict(list)
            for i in indices_to_modify:
                word_to_replace = current_text.words[i].capitalize()
                word_to_indices[word_to_replace].append(i)

        for i in indices_to_modify:
            word_to_replace = current_text.words[i].capitalize()
            # If we're doing consistent replacements, only replace the word
            # if it hasn't already been replaced in a previous iteration
            if self.consistent and word_to_replace not in word_to_indices:
                continue
            word_to_replace_ner = current_text.ner_of_word_index(i, model_name)

            replacement_words = self._get_replacement_words(
                word_to_replace, word_to_replace_ner
            )

            for r in replacement_words:
                if self.consistent:
                    transformed_texts.append(
                        current_text.replace_words_at_indices(
                            word_to_indices[word_to_replace],
                            [r] * len(word_to_indices[word_to_replace]),
                        )
                    )
                else:
                    transformed_texts.append(current_text.replace_word_at_index(i, r))

            # Delete this word to mark it as replaced
            if self.consistent and len(replacement_words) != 0:
                del word_to_indices[word_to_replace]

        return transformed_texts

    def _get_replacement_words(self, word, word_part_of_speech):
        replacement_words = []
        tag = word_part_of_speech
        if (
            tag.value in ("B-PER", "S-PER")
            and tag.score >= self.confidence_score
            and not self.last_only
        ):
            replacement_words = self._get_firstname(word)
        elif (
            tag.value in ("E-PER", "S-PER")
            and tag.score >= self.confidence_score
            and not self.first_only
        ):
            replacement_words = self._get_lastname(word)
        return replacement_words

    def _get_lastname(self, word):
        """Return a list of random last names."""
        if self.language == "esp" or self.language == "spanish":
            return np.random.choice(
                PERSON_NAMES["last-spanish"], self.num_name_replacements
            )
        elif self.language == "fra" or self.language == "french":
            return np.random.choice(
                PERSON_NAMES["last-french"], self.num_name_replacements
            )
        else:
            return np.random.choice(PERSON_NAMES["last"], self.num_name_replacements)

    def _get_firstname(self, word):
        """Return a list of random first names."""
        if self.language == "esp" or self.language == "spanish":
            return np.random.choice(
                PERSON_NAMES["first-spanish"], self.num_name_replacements
            )
        elif self.language == "fra" or self.language == "french":
            return np.random.choice(
                PERSON_NAMES["first-french"], self.num_name_replacements
            )
        else:
            return np.random.choice(PERSON_NAMES["first"], self.num_name_replacements)