Skip to content

Documentation for Datapoison Module

This module provides classes for data poisoning attacks in datasets, allowing for the simulation of data poisoning by adding noise or modifying specific data points.

Classes: - SamplePoisoningAttack: Main attack class that implements the DatasetAttack interface - DataPoisoningStrategy: Abstract base class for poisoning strategies - TargetedSamplePoisoningStrategy: Implementation for targeted poisoning (X pattern) - NonTargetedSamplePoisoningStrategy: Implementation for non-targeted poisoning (noise-based)

DataPoisoningStrategy

Bases: ABC

Abstract base class for poisoning strategies.

Source code in nebula/addons/attacks/dataset/datapoison.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
class DataPoisoningStrategy(ABC):
    """Abstract base class for poisoning strategies."""

    @abstractmethod
    def poison_data(
        self,
        dataset,
        indices: list[int],
        poisoned_percent: float,
        poisoned_noise_percent: float,
    ) -> "Dataset":
        """
        Abstract method to poison data in the dataset.

        Args:
            dataset: The dataset to modify
            indices: List of indices to consider for poisoning
            poisoned_percent: Percentage of data to poison (0-100)
            poisoned_noise_percent: Percentage of noise to apply (0-100)

        Returns:
            Modified dataset with poisoned data
        """
        pass

    def _convert_to_tensor(self, data: torch.Tensor | Image.Image | tuple) -> torch.Tensor:
        """
        Convert input data to tensor format.

        Args:
            data: Input data that can be a tensor, PIL Image, or tuple

        Returns:
            Tensor representation of the input data
        """
        if isinstance(data, tuple):
            data = data[0]

        if isinstance(data, Image.Image):
            return torch.tensor(np.array(data))
        elif isinstance(data, torch.Tensor):
            return data
        else:
            return torch.tensor(data)

    def _handle_single_point(self, tensor: torch.Tensor) -> tuple[torch.Tensor, bool]:
        """
        Handle single point tensors by reshaping them.

        Args:
            tensor: Input tensor

        Returns:
            Tuple of (reshaped tensor, is_single_point flag)
        """
        is_single_point = False
        if len(tensor.shape) == 0:
            tensor = tensor.view(-1)
            is_single_point = True
        return tensor, is_single_point

poison_data(dataset, indices, poisoned_percent, poisoned_noise_percent) abstractmethod

Abstract method to poison data in the dataset.

Parameters:

Name Type Description Default
dataset

The dataset to modify

required
indices list[int]

List of indices to consider for poisoning

required
poisoned_percent float

Percentage of data to poison (0-100)

required
poisoned_noise_percent float

Percentage of noise to apply (0-100)

required

Returns:

Type Description
Dataset

Modified dataset with poisoned data

Source code in nebula/addons/attacks/dataset/datapoison.py
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
@abstractmethod
def poison_data(
    self,
    dataset,
    indices: list[int],
    poisoned_percent: float,
    poisoned_noise_percent: float,
) -> "Dataset":
    """
    Abstract method to poison data in the dataset.

    Args:
        dataset: The dataset to modify
        indices: List of indices to consider for poisoning
        poisoned_percent: Percentage of data to poison (0-100)
        poisoned_noise_percent: Percentage of noise to apply (0-100)

    Returns:
        Modified dataset with poisoned data
    """
    pass

NonTargetedSamplePoisoningStrategy

Bases: DataPoisoningStrategy

Implementation of non-targeted poisoning strategy using noise.

Source code in nebula/addons/attacks/dataset/datapoison.py
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
class NonTargetedSamplePoisoningStrategy(DataPoisoningStrategy):
    """Implementation of non-targeted poisoning strategy using noise."""

    def __init__(self, noise_type: str):
        """
        Initialize non-targeted poisoning strategy.

        Args:
            noise_type: Type of noise to apply (salt, gaussian, s&p, nlp_rawdata)
        """
        self.noise_type = noise_type.lower()

    def apply_noise(self, t: torch.Tensor | Image.Image, poisoned_noise_percent: float) -> torch.Tensor:
        """
        Applies noise to a tensor based on the specified noise type and poisoning percentage.

        Args:
            t: The input tensor or PIL Image to which noise will be applied
            poisoned_noise_percent: The percentage of noise to be applied (0-100)

        Returns:
            The tensor with noise applied
        """
        t = self._convert_to_tensor(t)
        t, is_single_point = self._handle_single_point(t)

        arr = t.detach().cpu().numpy()
        poisoned_ratio = poisoned_noise_percent / 100.0

        logging.info(
            f"[{self.__class__.__name__}] Applying noise to data with noise type: {self.noise_type} and amount: {poisoned_ratio} (float)"
        )

        if self.noise_type == "salt":
            poisoned = torch.tensor(random_noise(arr, mode=self.noise_type, amount=poisoned_ratio))
        elif self.noise_type == "gaussian":
            poisoned = torch.tensor(random_noise(arr, mode=self.noise_type, mean=0, var=poisoned_ratio, clip=True))
        elif self.noise_type == "s&p":
            poisoned = torch.tensor(random_noise(arr, mode=self.noise_type, amount=poisoned_ratio))
        elif self.noise_type == "nlp_rawdata":
            poisoned = self.poison_to_nlp_rawdata(arr, poisoned_ratio)
        else:
            logging.info(f"ERROR: noise_type '{self.noise_type}' not supported in data poison attack.")
            return t

        if is_single_point:
            poisoned = poisoned[0]

        return poisoned

    def poison_to_nlp_rawdata(self, text_data: list, poisoned_ratio: float) -> list:
        """
        Poisons NLP data by setting word vectors to zero with a given probability.

        Args:
            text_data: List of word vectors
            poisoned_ratio: Fraction of non-zero vectors to set to zero

        Returns:
            Modified text data with some word vectors set to zero
        """
        non_zero_vector_indice = [i for i in range(0, len(text_data)) if text_data[i][0] != 0]
        non_zero_vector_len = len(non_zero_vector_indice)

        num_poisoned_token = int(poisoned_ratio * non_zero_vector_len)
        if num_poisoned_token == 0 or num_poisoned_token > non_zero_vector_len:
            return text_data

        poisoned_token_indice = random.sample(non_zero_vector_indice, num_poisoned_token)
        zero_vector = torch.Tensor(np.zeros(len(text_data[0][0])))
        for i in poisoned_token_indice:
            text_data[i] = zero_vector
        return text_data

    def poison_data(
        self,
        dataset,
        indices: list[int],
        poisoned_percent: float,
        poisoned_noise_percent: float,
    ) -> "Dataset":
        """
        Applies noise-based poisoning to the dataset.

        Args:
            dataset: The dataset to modify
            indices: List of indices to consider for poisoning
            poisoned_percent: Percentage of data to poison (0-100)
            poisoned_noise_percent: Percentage of noise to apply (0-100)

        Returns:
            Modified dataset with poisoned data
        """
        logging.info(f"[{self.__class__.__name__}] Poisoning data with noise type: {self.noise_type}")
        new_dataset = copy.deepcopy(dataset)
        if not isinstance(new_dataset.targets, np.ndarray):
            new_dataset.targets = np.array(new_dataset.targets)
        else:
            new_dataset.targets = new_dataset.targets.copy()

        num_indices = len(indices)
        num_poisoned = int(poisoned_percent * num_indices / 100.0)

        if num_indices == 0 or num_poisoned > num_indices:
            return new_dataset

        poisoned_indices = random.sample(indices, num_poisoned)
        logging.info(f"Number of poisoned samples: {num_poisoned}")

        for i in poisoned_indices:
            t = new_dataset.data[i]
            poisoned = self.apply_noise(t, poisoned_noise_percent)

            if isinstance(t, tuple):
                poisoned = (poisoned, t[1])

            new_dataset.data[i] = poisoned

        return new_dataset

__init__(noise_type)

Initialize non-targeted poisoning strategy.

Parameters:

Name Type Description Default
noise_type str

Type of noise to apply (salt, gaussian, s&p, nlp_rawdata)

required
Source code in nebula/addons/attacks/dataset/datapoison.py
 94
 95
 96
 97
 98
 99
100
101
def __init__(self, noise_type: str):
    """
    Initialize non-targeted poisoning strategy.

    Args:
        noise_type: Type of noise to apply (salt, gaussian, s&p, nlp_rawdata)
    """
    self.noise_type = noise_type.lower()

apply_noise(t, poisoned_noise_percent)

Applies noise to a tensor based on the specified noise type and poisoning percentage.

Parameters:

Name Type Description Default
t Tensor | Image

The input tensor or PIL Image to which noise will be applied

required
poisoned_noise_percent float

The percentage of noise to be applied (0-100)

required

Returns:

Type Description
Tensor

The tensor with noise applied

Source code in nebula/addons/attacks/dataset/datapoison.py
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
def apply_noise(self, t: torch.Tensor | Image.Image, poisoned_noise_percent: float) -> torch.Tensor:
    """
    Applies noise to a tensor based on the specified noise type and poisoning percentage.

    Args:
        t: The input tensor or PIL Image to which noise will be applied
        poisoned_noise_percent: The percentage of noise to be applied (0-100)

    Returns:
        The tensor with noise applied
    """
    t = self._convert_to_tensor(t)
    t, is_single_point = self._handle_single_point(t)

    arr = t.detach().cpu().numpy()
    poisoned_ratio = poisoned_noise_percent / 100.0

    logging.info(
        f"[{self.__class__.__name__}] Applying noise to data with noise type: {self.noise_type} and amount: {poisoned_ratio} (float)"
    )

    if self.noise_type == "salt":
        poisoned = torch.tensor(random_noise(arr, mode=self.noise_type, amount=poisoned_ratio))
    elif self.noise_type == "gaussian":
        poisoned = torch.tensor(random_noise(arr, mode=self.noise_type, mean=0, var=poisoned_ratio, clip=True))
    elif self.noise_type == "s&p":
        poisoned = torch.tensor(random_noise(arr, mode=self.noise_type, amount=poisoned_ratio))
    elif self.noise_type == "nlp_rawdata":
        poisoned = self.poison_to_nlp_rawdata(arr, poisoned_ratio)
    else:
        logging.info(f"ERROR: noise_type '{self.noise_type}' not supported in data poison attack.")
        return t

    if is_single_point:
        poisoned = poisoned[0]

    return poisoned

poison_data(dataset, indices, poisoned_percent, poisoned_noise_percent)

Applies noise-based poisoning to the dataset.

Parameters:

Name Type Description Default
dataset

The dataset to modify

required
indices list[int]

List of indices to consider for poisoning

required
poisoned_percent float

Percentage of data to poison (0-100)

required
poisoned_noise_percent float

Percentage of noise to apply (0-100)

required

Returns:

Type Description
Dataset

Modified dataset with poisoned data

Source code in nebula/addons/attacks/dataset/datapoison.py
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
def poison_data(
    self,
    dataset,
    indices: list[int],
    poisoned_percent: float,
    poisoned_noise_percent: float,
) -> "Dataset":
    """
    Applies noise-based poisoning to the dataset.

    Args:
        dataset: The dataset to modify
        indices: List of indices to consider for poisoning
        poisoned_percent: Percentage of data to poison (0-100)
        poisoned_noise_percent: Percentage of noise to apply (0-100)

    Returns:
        Modified dataset with poisoned data
    """
    logging.info(f"[{self.__class__.__name__}] Poisoning data with noise type: {self.noise_type}")
    new_dataset = copy.deepcopy(dataset)
    if not isinstance(new_dataset.targets, np.ndarray):
        new_dataset.targets = np.array(new_dataset.targets)
    else:
        new_dataset.targets = new_dataset.targets.copy()

    num_indices = len(indices)
    num_poisoned = int(poisoned_percent * num_indices / 100.0)

    if num_indices == 0 or num_poisoned > num_indices:
        return new_dataset

    poisoned_indices = random.sample(indices, num_poisoned)
    logging.info(f"Number of poisoned samples: {num_poisoned}")

    for i in poisoned_indices:
        t = new_dataset.data[i]
        poisoned = self.apply_noise(t, poisoned_noise_percent)

        if isinstance(t, tuple):
            poisoned = (poisoned, t[1])

        new_dataset.data[i] = poisoned

    return new_dataset

poison_to_nlp_rawdata(text_data, poisoned_ratio)

Poisons NLP data by setting word vectors to zero with a given probability.

Parameters:

Name Type Description Default
text_data list

List of word vectors

required
poisoned_ratio float

Fraction of non-zero vectors to set to zero

required

Returns:

Type Description
list

Modified text data with some word vectors set to zero

Source code in nebula/addons/attacks/dataset/datapoison.py
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
def poison_to_nlp_rawdata(self, text_data: list, poisoned_ratio: float) -> list:
    """
    Poisons NLP data by setting word vectors to zero with a given probability.

    Args:
        text_data: List of word vectors
        poisoned_ratio: Fraction of non-zero vectors to set to zero

    Returns:
        Modified text data with some word vectors set to zero
    """
    non_zero_vector_indice = [i for i in range(0, len(text_data)) if text_data[i][0] != 0]
    non_zero_vector_len = len(non_zero_vector_indice)

    num_poisoned_token = int(poisoned_ratio * non_zero_vector_len)
    if num_poisoned_token == 0 or num_poisoned_token > non_zero_vector_len:
        return text_data

    poisoned_token_indice = random.sample(non_zero_vector_indice, num_poisoned_token)
    zero_vector = torch.Tensor(np.zeros(len(text_data[0][0])))
    for i in poisoned_token_indice:
        text_data[i] = zero_vector
    return text_data

SamplePoisoningAttack

Bases: DatasetAttack

Implements a data poisoning attack on a training dataset.

Source code in nebula/addons/attacks/dataset/datapoison.py
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
class SamplePoisoningAttack(DatasetAttack):
    """
    Implements a data poisoning attack on a training dataset.
    """

    def __init__(self, engine, attack_params: Dict):
        """
        Initialize the sample poisoning attack.

        Args:
            engine: The engine managing the attack context
            attack_params: Dictionary containing attack parameters
        """
        try:
            round_start = int(attack_params["round_start_attack"])
            round_stop = int(attack_params["round_stop_attack"])
            attack_interval = int(attack_params["attack_interval"])
        except KeyError as e:
            raise ValueError(f"Missing required attack parameter: {e}")
        except ValueError:
            raise ValueError("Invalid value in attack_params. Ensure all values are integers.")

        super().__init__(engine, round_start, round_stop, attack_interval)
        self.datamodule = engine._trainer.datamodule
        self.poisoned_percent = float(attack_params["poisoned_sample_percent"])
        self.poisoned_noise_percent = float(attack_params["poisoned_noise_percent"])

        # Create the appropriate strategy based on whether the attack is targeted
        if attack_params.get("targeted", False):
            target_label = int(attack_params.get("target_label") or attack_params.get("targetLabel", 4))
            self.strategy = TargetedSamplePoisoningStrategy(target_label)
        else:
            noise_type = (attack_params.get("noise_type") or attack_params.get("noiseType", "Gaussian")).lower()
            self.strategy = NonTargetedSamplePoisoningStrategy(noise_type)

    def get_malicious_dataset(self):
        """
        Creates a malicious dataset by poisoning selected data points.

        Returns:
            Dataset: The modified dataset with poisoned data
        """
        return self.strategy.poison_data(
            self.datamodule.train_set,
            self.datamodule.train_set_indices,
            self.poisoned_percent,
            self.poisoned_noise_percent,
        )

__init__(engine, attack_params)

Initialize the sample poisoning attack.

Parameters:

Name Type Description Default
engine

The engine managing the attack context

required
attack_params Dict

Dictionary containing attack parameters

required
Source code in nebula/addons/attacks/dataset/datapoison.py
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
def __init__(self, engine, attack_params: Dict):
    """
    Initialize the sample poisoning attack.

    Args:
        engine: The engine managing the attack context
        attack_params: Dictionary containing attack parameters
    """
    try:
        round_start = int(attack_params["round_start_attack"])
        round_stop = int(attack_params["round_stop_attack"])
        attack_interval = int(attack_params["attack_interval"])
    except KeyError as e:
        raise ValueError(f"Missing required attack parameter: {e}")
    except ValueError:
        raise ValueError("Invalid value in attack_params. Ensure all values are integers.")

    super().__init__(engine, round_start, round_stop, attack_interval)
    self.datamodule = engine._trainer.datamodule
    self.poisoned_percent = float(attack_params["poisoned_sample_percent"])
    self.poisoned_noise_percent = float(attack_params["poisoned_noise_percent"])

    # Create the appropriate strategy based on whether the attack is targeted
    if attack_params.get("targeted", False):
        target_label = int(attack_params.get("target_label") or attack_params.get("targetLabel", 4))
        self.strategy = TargetedSamplePoisoningStrategy(target_label)
    else:
        noise_type = (attack_params.get("noise_type") or attack_params.get("noiseType", "Gaussian")).lower()
        self.strategy = NonTargetedSamplePoisoningStrategy(noise_type)

get_malicious_dataset()

Creates a malicious dataset by poisoning selected data points.

Returns:

Name Type Description
Dataset

The modified dataset with poisoned data

Source code in nebula/addons/attacks/dataset/datapoison.py
347
348
349
350
351
352
353
354
355
356
357
358
359
def get_malicious_dataset(self):
    """
    Creates a malicious dataset by poisoning selected data points.

    Returns:
        Dataset: The modified dataset with poisoned data
    """
    return self.strategy.poison_data(
        self.datamodule.train_set,
        self.datamodule.train_set_indices,
        self.poisoned_percent,
        self.poisoned_noise_percent,
    )

TargetedSamplePoisoningStrategy

Bases: DataPoisoningStrategy

Implementation of targeted poisoning strategy using X pattern.

Source code in nebula/addons/attacks/dataset/datapoison.py
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
class TargetedSamplePoisoningStrategy(DataPoisoningStrategy):
    """Implementation of targeted poisoning strategy using X pattern."""

    def __init__(self, target_label: int):
        """
        Initialize targeted poisoning strategy.

        Args:
            target_label: The label to target for poisoning
        """
        self.target_label = target_label

    def add_x_to_image(self, img: torch.Tensor | Image.Image) -> torch.Tensor:
        """
        Adds a 10x10 pixel 'X' mark to the top-left corner of an image.

        Args:
            img: Input image tensor or PIL Image

        Returns:
            Modified image with X pattern
        """
        logging.info(f"[{self.__class__.__name__}] Adding X pattern to image")
        img = self._convert_to_tensor(img)
        img, is_single_point = self._handle_single_point(img)

        # Handle batch dimension if present
        if len(img.shape) > 3:
            batch_size = img.shape[0]
            img = img.view(-1, *img.shape[-3:])
        else:
            batch_size = 1

        # Ensure image is large enough
        if img.shape[-2] < 10 or img.shape[-1] < 10:
            logging.warning(f"Image too small for X pattern: {img.shape}")
            return img

        # Determine if image is normalized (0-1) or not (0-255)
        is_normalized = img.max() <= 1.0
        pattern_value = 1.0 if is_normalized else 255.0

        # Create X pattern
        for i in range(0, 10):
            for j in range(0, 10):
                if i + j <= 9 or i == j:
                    if len(img.shape) == 3:  # RGB image
                        img[..., i, j] = pattern_value
                    else:  # Grayscale image
                        img[..., i, j] = pattern_value

        # Restore batch dimension if it was present
        if batch_size > 1:
            img = img.view(batch_size, *img.shape[1:])

        if is_single_point:
            img = img[0]

        return img

    def poison_data(
        self,
        dataset,
        indices: list[int],
        poisoned_percent: float,
        poisoned_noise_percent: float,
    ) -> "Dataset":
        """
        Applies X-pattern poisoning to targeted samples.

        Args:
            dataset: The dataset to modify
            indices: List of indices to consider for poisoning
            poisoned_percent: Not used in targeted poisoning
            poisoned_noise_percent: Not used in targeted poisoning

        Returns:
            Modified dataset with poisoned data
        """
        logging.info(f"[{self.__class__.__name__}] Poisoning data with X pattern for target label: {self.target_label}")
        new_dataset = copy.deepcopy(dataset)
        if not isinstance(new_dataset.targets, np.ndarray):
            new_dataset.targets = np.array(new_dataset.targets)
        else:
            new_dataset.targets = new_dataset.targets.copy()

        for i in indices:
            if int(new_dataset.targets[i]) == int(self.target_label):
                t = new_dataset.data[i]
                logging.info(f"[{self.__class__.__name__}] Adding X pattern to image")
                poisoned = self.add_x_to_image(t)

                if isinstance(t, tuple):
                    poisoned = (poisoned, t[1])

                new_dataset.data[i] = poisoned

        return new_dataset

__init__(target_label)

Initialize targeted poisoning strategy.

Parameters:

Name Type Description Default
target_label int

The label to target for poisoning

required
Source code in nebula/addons/attacks/dataset/datapoison.py
215
216
217
218
219
220
221
222
def __init__(self, target_label: int):
    """
    Initialize targeted poisoning strategy.

    Args:
        target_label: The label to target for poisoning
    """
    self.target_label = target_label

add_x_to_image(img)

Adds a 10x10 pixel 'X' mark to the top-left corner of an image.

Parameters:

Name Type Description Default
img Tensor | Image

Input image tensor or PIL Image

required

Returns:

Type Description
Tensor

Modified image with X pattern

Source code in nebula/addons/attacks/dataset/datapoison.py
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
def add_x_to_image(self, img: torch.Tensor | Image.Image) -> torch.Tensor:
    """
    Adds a 10x10 pixel 'X' mark to the top-left corner of an image.

    Args:
        img: Input image tensor or PIL Image

    Returns:
        Modified image with X pattern
    """
    logging.info(f"[{self.__class__.__name__}] Adding X pattern to image")
    img = self._convert_to_tensor(img)
    img, is_single_point = self._handle_single_point(img)

    # Handle batch dimension if present
    if len(img.shape) > 3:
        batch_size = img.shape[0]
        img = img.view(-1, *img.shape[-3:])
    else:
        batch_size = 1

    # Ensure image is large enough
    if img.shape[-2] < 10 or img.shape[-1] < 10:
        logging.warning(f"Image too small for X pattern: {img.shape}")
        return img

    # Determine if image is normalized (0-1) or not (0-255)
    is_normalized = img.max() <= 1.0
    pattern_value = 1.0 if is_normalized else 255.0

    # Create X pattern
    for i in range(0, 10):
        for j in range(0, 10):
            if i + j <= 9 or i == j:
                if len(img.shape) == 3:  # RGB image
                    img[..., i, j] = pattern_value
                else:  # Grayscale image
                    img[..., i, j] = pattern_value

    # Restore batch dimension if it was present
    if batch_size > 1:
        img = img.view(batch_size, *img.shape[1:])

    if is_single_point:
        img = img[0]

    return img

poison_data(dataset, indices, poisoned_percent, poisoned_noise_percent)

Applies X-pattern poisoning to targeted samples.

Parameters:

Name Type Description Default
dataset

The dataset to modify

required
indices list[int]

List of indices to consider for poisoning

required
poisoned_percent float

Not used in targeted poisoning

required
poisoned_noise_percent float

Not used in targeted poisoning

required

Returns:

Type Description
Dataset

Modified dataset with poisoned data

Source code in nebula/addons/attacks/dataset/datapoison.py
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
def poison_data(
    self,
    dataset,
    indices: list[int],
    poisoned_percent: float,
    poisoned_noise_percent: float,
) -> "Dataset":
    """
    Applies X-pattern poisoning to targeted samples.

    Args:
        dataset: The dataset to modify
        indices: List of indices to consider for poisoning
        poisoned_percent: Not used in targeted poisoning
        poisoned_noise_percent: Not used in targeted poisoning

    Returns:
        Modified dataset with poisoned data
    """
    logging.info(f"[{self.__class__.__name__}] Poisoning data with X pattern for target label: {self.target_label}")
    new_dataset = copy.deepcopy(dataset)
    if not isinstance(new_dataset.targets, np.ndarray):
        new_dataset.targets = np.array(new_dataset.targets)
    else:
        new_dataset.targets = new_dataset.targets.copy()

    for i in indices:
        if int(new_dataset.targets[i]) == int(self.target_label):
            t = new_dataset.data[i]
            logging.info(f"[{self.__class__.__name__}] Adding X pattern to image")
            poisoned = self.add_x_to_image(t)

            if isinstance(t, tuple):
                poisoned = (poisoned, t[1])

            new_dataset.data[i] = poisoned

    return new_dataset