Source code for textattack.attack_recipes.pruthi_2019

"""
Pruthi2019: Combating with Robust Word Recognition
=================================================================

"""

from textattack import Attack
from textattack.constraints.overlap import MaxWordsPerturbed
from textattack.constraints.pre_transformation import (
    MinWordLength,
    RepeatModification,
    StopwordModification,
)
from textattack.goal_functions import UntargetedClassification
from textattack.search_methods import GreedySearch
from textattack.transformations import (
    CompositeTransformation,
    WordSwapNeighboringCharacterSwap,
    WordSwapQWERTY,
    WordSwapRandomCharacterDeletion,
    WordSwapRandomCharacterInsertion,
)

from .attack_recipe import AttackRecipe


[docs]class Pruthi2019(AttackRecipe): """An implementation of the attack used in "Combating Adversarial Misspellings with Robust Word Recognition", Pruthi et al., 2019. This attack focuses on a small number of character-level changes that simulate common typos. It combines: - Swapping neighboring characters - Deleting characters - Inserting characters - Swapping characters for adjacent keys on a QWERTY keyboard. https://arxiv.org/abs/1905.11268 :param model: Model to attack. :param max_num_word_swaps: Maximum number of modifications to allow. """
[docs] @staticmethod def build(model_wrapper, max_num_word_swaps=1): # a combination of 4 different character-based transforms # ignore the first and last letter of each word, as in the paper transformation = CompositeTransformation( [ WordSwapNeighboringCharacterSwap( random_one=False, skip_first_char=True, skip_last_char=True ), WordSwapRandomCharacterDeletion( random_one=False, skip_first_char=True, skip_last_char=True ), WordSwapRandomCharacterInsertion( random_one=False, skip_first_char=True, skip_last_char=True ), WordSwapQWERTY( random_one=False, skip_first_char=True, skip_last_char=True ), ] ) # only edit words of length >= 4, edit max_num_word_swaps words. # note that we also are not editing the same word twice, so # max_num_word_swaps is really the max number of character # changes that can be made. The paper looks at 1 and 2 char attacks. constraints = [ MinWordLength(min_length=4), StopwordModification(), MaxWordsPerturbed(max_num_words=max_num_word_swaps), RepeatModification(), ] # untargeted attack goal_function = UntargetedClassification(model_wrapper) search_method = GreedySearch() return Attack(goal_function, constraints, transformation, search_method)