Added 22/07/2025
Machine Learning / regression
adversarial_regression
Dimension
{
"x": 12,
"y": 22,
"F": 1,
"G": 0,
"H": 0,
"f": 1,
"g": 2,
"h": 0
}
Solution
{
"optimality": "infeasible",
"x": [0,0,0,0,0,0,0,0,0,0,0,0],
"y": [0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1],
"F": 100.2705328714835,
"G": [],
"H": [],
"f": 2.5858942396845084,
"g": [-0.5,-0.5],
"h": []
}
$title adversarial_regression
$onText
{description}
$offText
set i / 1*4 /;
variables obj_val_upper, obj_val_lower, x(i), y(i);
equations obj_eq_upper, obj_eq_lower
G1_upper, G2_upper, H1_upper, H2_upper,
g1_lower, g2_lower, h1_lower, h2_lower;
* Objective functions
obj_eq_upper.. obj_val_upper =e= ;
obj_eq_lower.. obj_val_lower =e= ;
* Upper-level constraints
G1_upper.. x('1') =g= 1.0;
G2_upper.. x('2') =g= -2.0;
H1_upper.. x('3') =e= 3.0;
H2_upper.. x('4') =e= -4.0;
* Lower-level constraints
g1_lower.. y('1') =g= 1.0;
g2_lower.. y('2') =g= -2.0;
h1_lower.. y('3') =e= 3.0;
h2_lower.. y('4') =e= -4.0;
* Solve
model adversarial_regression / all /;
$echo bilevel x min obj_val_lower y obj_eq_lower g1_lower g2_lower h1_lower h2_lower > "%emp.info%"
solve adversarial_regression us emp min obj_val_upper;
import numpy as np
import os
import pandas as pd
# Properties
name: str = "adversarial_regression"
category: str = "machine_learning"
subcategory: str = "regression"
datasets: list = [
"insurance.csv",
# "wine_quality.csv"
]
paths: list = [
os.path.join("bolib3", "data", "insurance.csv"),
# os.path.join("bolib3", "data", "wine_quality.csv")
]
# Parameters
rho = 0.1
delta = 0.5
y_idx = np.array([0, 1])
# Methods
def F(x, y, data=None):
"""
Upper-level objective function
"""
D = data["D"]
gamma = data["gamma"]
adv_labels = data["adv_labels"]
m = len(y_idx)
p = D.shape[1]
dataset = add_ones(np.concatenate((D, y.reshape((m, p)))))
labels = np.concatenate((gamma, adv_labels))
err = np.matmul(x, np.transpose(dataset)) - labels
return np.matmul(err, err) + (1/rho)*np.matmul(x, x)
def G(x, y, data=None):
"""
Upper-level inequality constraints
"""
return np.empty(0)
def H(x, y, data=None):
"""
Upper-level equality constraints
"""
return np.empty(0)
def f(x, y, data=None):
"""
Lower-level objective function
"""
D = data["D"]
Z = data["Z"]
m = len(y_idx)
p = D.shape[1]
err = np.matmul(x, np.transpose(add_ones(y.reshape((m, p))))) - Z
return np.matmul(err, err)
def g(x, y, data=None):
"""
Lower-level inequality constraints
"""
m = len(y_idx)
p = data["D"].shape[1]
y0 = np.array([0.1]*len(y_idx)*data["D"].shape[1])
return delta - np.array([
cosim(y.reshape((m, p))[i], y0.reshape((m, p))[i]) for i in range(m)
])
def h(x, y, data=None):
"""
Lower-level equality constraints
"""
return np.empty(0)
def read_data(filepath=paths[0]):
"""
If the bilevel program is parameterized by data, this function should
provide code to read data file and return an appropriate python structure.
"""
df = pd.get_dummies(pd.read_csv(filepath), dtype="int")
df["charges"] = (df["charges"] - df["charges"].min())/(df["charges"].max() - df["charges"].min())
D = df.drop("charges", axis=1).drop(y_idx).to_numpy()
gamma = df["charges"].drop(y_idx).to_numpy()
adv_labels = df["charges"][y_idx].to_numpy()
Z = adv_labels + 1
return {"D": D, "gamma": gamma, "adv_labels": adv_labels, "Z": Z}
def dimension(key='', data=None):
"""
If the argument 'key' is not specified, then:
- a dictionary mapping variable/function names (str) to the corresponding dimension (int) is returned.
If the first argument 'key' is specified, then:
- a single integer representing the dimension of the variable/function with the name {key} is returned.
"""
n = {
"x": data["D"].shape[1] + 1, # Upper-level variables
"y": len(y_idx)*(data["D"].shape[1]), # Lower-level variables
"F": 1, # Upper-level objective functions
"G": 0, # Upper-level inequality constraints
"H": 0, # Upper-level equality constraints
"f": 1, # Lower-level objective functions
"g": len(y_idx), # Lower-level inequality constraints
"h": 0, # Lower-level equality constraints
}
if key in n:
return n[key]
return n
# Extra Functions
def add_ones(dataset):
return np.concatenate((np.ones((len(dataset), 1)), dataset), 1)
def cosim(x, x0):
return (np.matmul(x, x0))/(np.sqrt(np.matmul(x, x))*np.sqrt(np.matmul(x0, x0)))
classdef adversarial_regression
%{
Comming soon
%}
properties(Constant)
name = 'adversarial_regression';
category = 'machine_learning';
subcategory = 'regression';
end
end
\subsection{adversarial\_regression}
\label{subsec:adversarial_regression}
% Description
We consider a similar scenario to the above adversarial problem. Let $D \in \mathbb{R}^{n \times p}$ be a static set of $n$ samples of $p$ features with corresponding labels $\gamma \in \mathbb{R}^n$. The learner, in the upper-level, seeks to construct a prediction model with weights $w \in \mathbb{R}^p$ on this data. Meanwhile, suppose there's any adversary who creates dataset $X \in \mathbb{R}^{m \times p}$ containing $m$ samples of the same features with corresponding labels $Y \in \mathbb{R}^m$. The adversary seeks to have their data mis-labelled as some target labels $Z = Y + \mu$ for some $\mu \in \mathbb{R}^m$. For example, suppose the learner is training a model to predict the expected insurance payouts for new customers. An adversary might lie on their application form to achieve lower insurance premiums.
% Equation
\begin{flalign*}
\minimise_{w, X} \quad
& \frac{1}{n} \Vert w^TD - \gamma \Vert_2^2 + \frac{1}{m} \Vert w^TX-Y \Vert_2^2 + \frac{1}{\rho} \Vert w \Vert_2^2 \\
\subjectto \quad
& X \in \argmin_{y}
\left\{
\begin{array}{l}
\frac{1}{m} \Vert w^TX - Z \Vert_2^2 \\
\text{s.t.} \quad \delta - \frac{X_i \cdot X_i^0}{||X_i||||X_i^0||} \leq 0, \quad i = 1,\dots,m \\
\end{array}
\right.
\end{flalign*}
Where $\rho \in \mathbb{R}$ is a regularisation parameter and $\delta \in \mathbb{R}$ is the similarity threshold.