Source code for textattack.attack_recipes.clare_li_2020

CLARE Recipe

(Contextualized Perturbation for Textual Adversarial Attack)


import transformers

from textattack import Attack
from textattack.constraints.pre_transformation import (
from textattack.constraints.semantics.sentence_encoders import UniversalSentenceEncoder
from textattack.goal_functions import UntargetedClassification
from textattack.search_methods import GreedySearch
from textattack.transformations import (

from .attack_recipe import AttackRecipe

[docs]class CLARE2020(AttackRecipe): """Li, Zhang, Peng, Chen, Brockett, Sun, Dolan. "Contextualized Perturbation for Textual Adversarial Attack" (Li et al., 2020) This method uses greedy search with replace, merge, and insertion transformations that leverage a pretrained language model. It also uses USE similarity constraint. """
[docs] @staticmethod def build(model_wrapper): # "This paper presents CLARE, a ContextuaLized AdversaRial Example generation model # that produces fluent and grammatical outputs through a mask-then-infill procedure. # CLARE builds on a pre-trained masked language model and modifies the inputs in a context-aware manner. # We propose three contex-tualized perturbations, Replace, Insert and Merge, allowing for generating outputs of # varied lengths." # # "We experiment with a distilled version of RoBERTa (RoBERTa_{distill}; Sanh et al., 2019) # as the masked language model for contextualized infilling." # Because BAE and CLARE both use similar replacement papers, we use BAE's replacement method here. shared_masked_lm = transformers.AutoModelForCausalLM.from_pretrained( "distilroberta-base" ) shared_tokenizer = transformers.AutoTokenizer.from_pretrained( "distilroberta-base" ) transformation = CompositeTransformation( [ WordSwapMaskedLM( method="bae", masked_language_model=shared_masked_lm, tokenizer=shared_tokenizer, max_candidates=50, min_confidence=5e-4, ), WordInsertionMaskedLM( masked_language_model=shared_masked_lm, tokenizer=shared_tokenizer, max_candidates=50, min_confidence=0.0, ), WordMergeMaskedLM( masked_language_model=shared_masked_lm, tokenizer=shared_tokenizer, max_candidates=50, min_confidence=5e-3, ), ] ) # # Don't modify the same word twice or stopwords. # constraints = [RepeatModification(), StopwordModification()] # "A common choice of sim(·,·) is to encode sentences using neural networks, # and calculate their cosine similarity in the embedding space (Jin et al., 2020)." # The original implementation uses similarity of 0.7. use_constraint = UniversalSentenceEncoder( threshold=0.7, metric="cosine", compare_against_original=True, window_size=15, skip_text_shorter_than_window=True, ) constraints.append(use_constraint) # Goal is untargeted classification. # "The score is then the negative probability of predicting the gold label from f, using [x_{adv}] as the input" goal_function = UntargetedClassification(model_wrapper) # "To achieve this, we iteratively apply the actions, # and first select those minimizing the probability of outputting the gold label y from f." # # "Only one of the three actions can be applied at each position, and we select the one with the highest score." # # "Actions are iteratively applied to the input, until an adversarial example is found or a limit of actions T # is reached. # Each step selects the highest-scoring action from the remaining ones." # search_method = GreedySearch() return Attack(goal_function, constraints, transformation, search_method)