Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

For OCGNN models, update hypersphere radius, and filter training data with active_mask #95

Merged
merged 10 commits into from
Jan 3, 2024
Binary file added cache/nmd_5a90b8b03F.pt
Binary file not shown.
Binary file added cache/nmd_6d7e94a03F.pt
Binary file not shown.
Binary file added cache/nmd_d5126beb3F.pt
Binary file not shown.
1 change: 1 addition & 0 deletions docs/examples/intro.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
# To train the detector with the loaded data, simply feed the
# ``torch_geometric.data.Data`` object into the detector via ``fit``.

# To train the model with train_mask only, use `data.active_mask = data.train_mask`

detector.fit(data)

Expand Down
34 changes: 23 additions & 11 deletions pygod/detector/adone.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ def init_model(self, **kwargs):
w5=self.w5,
**kwargs).to(self.device)

def forward_model(self, data):
def forward_model(self, data, is_train=True):
batch_size = data.batch_size
node_idx = data.n_id

Expand All @@ -198,16 +198,28 @@ def forward_model(self, data):
edge_index = data.edge_index.to(self.device)

x_, s_, h_a, h_s, dna, dns, dis_a, dis_s = self.model(x, s, edge_index)
loss, oa, os, oc = self.model.loss_func(x[:batch_size],
x_[:batch_size],
s[:batch_size],
s_[:batch_size],
h_a[:batch_size],
h_s[:batch_size],
dna[:batch_size],
dns[:batch_size],
dis_a[:batch_size],
dis_s[:batch_size])
if 'active_mask' in data.keys():
loss, oa, os, oc = self.model.loss_func(x[:batch_size][data.active_mask, :],
x_[:batch_size][data.active_mask, :],
s[:batch_size][data.active_mask, :],
s_[:batch_size][data.active_mask, :],
h_a[:batch_size][data.active_mask, :],
h_s[:batch_size][data.active_mask, :],
dna[:batch_size][data.active_mask, :],
dns[:batch_size][data.active_mask, :],
dis_a[:batch_size][data.active_mask, :],
dis_s[:batch_size][data.active_mask, :])
else:
loss, oa, os, oc = self.model.loss_func(x[:batch_size],
x_[:batch_size],
s[:batch_size],
s_[:batch_size],
h_a[:batch_size],
h_s[:batch_size],
dna[:batch_size],
dns[:batch_size],
dis_a[:batch_size],
dis_s[:batch_size])

self.attribute_score_[node_idx[:batch_size]] = oa.detach().cpu()
self.structural_score_[node_idx[:batch_size]] = os.detach().cpu()
Expand Down
26 changes: 17 additions & 9 deletions pygod/detector/anomalydae.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def init_model(self, **kwargs):
act=self.act,
**kwargs).to(self.device)

def forward_model(self, data):
def forward_model(self, data, is_train=True):
batch_size = data.batch_size
node_idx = data.n_id

Expand All @@ -181,14 +181,22 @@ def forward_model(self, data):
weight = 1 - self.alpha
pos_weight_a = self.eta / (1 + self.eta)
pos_weight_s = self.theta / (1 + self.theta)

score = self.model.loss_func(x[:batch_size],
x_[:batch_size],
s[:batch_size, node_idx],
s_[:batch_size],
weight,
pos_weight_a,
pos_weight_s)
if 'active_mask' in data.keys():
score = self.model.loss_func(x[:batch_size][data.active_mask, :],
x_[:batch_size][data.active_mask, :],
s[:batch_size, node_idx][data.active_mask, :],
s_[:batch_size][data.active_mask, :],
weight,
pos_weight_a,
pos_weight_s)
else:
score = self.model.loss_func(x[:batch_size],
x_[:batch_size],
s[:batch_size, node_idx],
s_[:batch_size],
weight,
pos_weight_a,
pos_weight_s)

loss = torch.mean(score)

Expand Down
52 changes: 36 additions & 16 deletions pygod/detector/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,23 +473,32 @@ def fit(self, data, label=None):
else:
self.emb[node_idx[:batch_size]] = \
self.model.emb[:batch_size].cpu()
self.decision_score_[node_idx[:batch_size]] = score
if 'active_mask' in data.keys():
self.decision_score_[node_idx[:batch_size][data.active_mask]] = score
else:
self.decision_score_[node_idx[:batch_size]] = score

optimizer.zero_grad()
loss.backward()
optimizer.step()

loss_value = epoch_loss / data.x.shape[0]
if 'active_mask' in data.keys():
loss_value = epoch_loss / data.x[data.active_mask, :].shape[0]
else:
loss_value = epoch_loss / data.x.shape[0]
if self.gan:
loss_value = (self.epoch_loss_g / data.x.shape[0], loss_value)
if 'active_mask' in data.keys():
loss_value = (self.epoch_loss_g / data.x[data.active_mask, :].shape[0], loss_value)
else:
loss_value = (self.epoch_loss_g / data.x.shape[0], loss_value)
logger(epoch=epoch,
loss=loss_value,
score=self.decision_score_,
target=label,
time=time.time() - start_time,
verbose=self.verbose,
train=True)

if 'active_mask' in data.keys():
self.decision_score_ = self.decision_score_[data.active_mask]
self._process_decision_score()
return self

Expand All @@ -510,7 +519,7 @@ def decision_function(self, data, label=None):
self.emb = torch.zeros(data.x.shape[0], self.hid_dim)
start_time = time.time()
for sampled_data in loader:
loss, score = self.forward_model(sampled_data)
loss, score = self.forward_model(sampled_data, is_train=False)
batch_size = sampled_data.batch_size
node_idx = sampled_data.n_id
if self.save_emb:
Expand All @@ -522,15 +531,26 @@ def decision_function(self, data, label=None):
else:
self.emb[node_idx[:batch_size]] = \
self.model.emb[:batch_size].cpu()

outlier_score[node_idx[:batch_size]] = score

logger(loss=loss.item() / data.x.shape[0],
score=outlier_score,
target=label,
time=time.time() - start_time,
verbose=self.verbose,
train=False)
if 'active_mask' in data.keys():
outlier_score[node_idx[:batch_size][data.active_mask]] = score
else:
outlier_score[node_idx[:batch_size]] = score
if 'active_mask' in data.keys():
logger(loss=loss.item() / data.x[data.active_mask, :].shape[0],
score=outlier_score,
target=label,
time=time.time() - start_time,
verbose=self.verbose,
train=False)
else:
logger(loss=loss.item() / data.x.shape[0],
score=outlier_score,
target=label,
time=time.time() - start_time,
verbose=self.verbose,
train=False)
if 'active_mask' in data.keys():
outlier_score = outlier_score[data.active_mask]
return outlier_score

def predict(self,
Expand Down Expand Up @@ -629,7 +649,7 @@ def init_model(self):
"""

@abstractmethod
def forward_model(self, data):
def forward_model(self, data, is_train=True):
"""
Forward pass of the neural network detector.

Expand Down
2 changes: 1 addition & 1 deletion pygod/detector/cola.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def init_model(self, **kwargs):
backbone=self.backbone,
**kwargs).to(self.device)

def forward_model(self, data):
def forward_model(self, data, is_train=True):
batch_size = data.batch_size

x = data.x.to(self.device)
Expand Down
20 changes: 13 additions & 7 deletions pygod/detector/conad.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def init_model(self, **kwargs):
backbone=self.backbone,
**kwargs).to(self.device)

def forward_model(self, data):
def forward_model(self, data, is_train=True):
batch_size = data.batch_size
node_idx = data.n_id

Expand All @@ -204,12 +204,18 @@ def forward_model(self, data):

x_, s_ = self.model(x, edge_index)
h = self.model.emb

score = self.model.loss_func(x[:batch_size],
x_[:batch_size],
s[:batch_size, node_idx],
s_[:batch_size],
self.weight)
if 'active_mask' in data.keys():
score = self.model.loss_func(x[:batch_size][data.active_mask, :],
x_[:batch_size][data.active_mask, :],
s[:batch_size, node_idx][data.active_mask, :],
s_[:batch_size][data.active_mask, :],
self.weight)
else:
score = self.model.loss_func(x[:batch_size],
x_[:batch_size],
s[:batch_size, node_idx],
s_[:batch_size],
self.weight)

if self.model.training:
margin_loss = self.margin_loss_func(h, h, h_aug) * label_aug
Expand Down
20 changes: 13 additions & 7 deletions pygod/detector/dominant.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def init_model(self, **kwargs):
backbone=self.backbone,
**kwargs).to(self.device)

def forward_model(self, data):
def forward_model(self, data, is_train=True):
batch_size = data.batch_size
node_idx = data.n_id

Expand All @@ -159,12 +159,18 @@ def forward_model(self, data):
edge_index = data.edge_index.to(self.device)

x_, s_ = self.model(x, edge_index)

score = self.model.loss_func(x[:batch_size],
x_[:batch_size],
s[:batch_size, node_idx],
s_[:batch_size],
self.weight)
if 'active_mask' in data.keys():
score = self.model.loss_func(x[:batch_size][data.active_mask, :],
x_[:batch_size][data.active_mask, :],
s[:batch_size, node_idx][data.active_mask, :],
s_[:batch_size][data.active_mask, :],
self.weight)
else:
score = self.model.loss_func(x[:batch_size],
x_[:batch_size],
s[:batch_size, node_idx],
s_[:batch_size],
self.weight)

loss = torch.mean(score)

Expand Down
28 changes: 19 additions & 9 deletions pygod/detector/done.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def init_model(self, **kwargs):
w5=self.w5,
**kwargs).to(self.device)

def forward_model(self, data):
def forward_model(self, data, is_train=True):
batch_size = data.batch_size
node_idx = data.n_id

Expand All @@ -197,14 +197,24 @@ def forward_model(self, data):
edge_index = data.edge_index.to(self.device)

x_, s_, h_a, h_s, dna, dns = self.model(x, s, edge_index)
loss, oa, os, oc = self.model.loss_func(x[:batch_size],
x_[:batch_size],
s[:batch_size],
s_[:batch_size],
h_a[:batch_size],
h_s[:batch_size],
dna[:batch_size],
dns[:batch_size])
if 'active_mask' in data.keys():
loss, oa, os, oc = self.model.loss_func(x[:batch_size][data.active_mask,:],
x_[:batch_size][data.active_mask,:],
s[:batch_size][data.active_mask,:],
s_[:batch_size][data.active_mask,:],
h_a[:batch_size][data.active_mask,:],
h_s[:batch_size][data.active_mask,:],
dna[:batch_size][data.active_mask,:],
dns[:batch_size][data.active_mask,:])
else:
loss, oa, os, oc = self.model.loss_func(x[:batch_size],
x_[:batch_size],
s[:batch_size],
s_[:batch_size],
h_a[:batch_size],
h_s[:batch_size],
dna[:batch_size],
dns[:batch_size])

self.attribute_score_[node_idx[:batch_size]] = oa.detach().cpu()
self.structural_score_[node_idx[:batch_size]] = os.detach().cpu()
Expand Down
2 changes: 1 addition & 1 deletion pygod/detector/gaan.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def init_model(self, **kwargs):
act=self.act,
**kwargs).to(self.device)

def forward_model(self, data):
def forward_model(self, data, is_train=True):
batch_size = data.batch_size
node_idx = data.n_id

Expand Down
13 changes: 9 additions & 4 deletions pygod/detector/gae.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def init_model(self, **kwargs):
backbone=self.backbone,
**kwargs).to(self.device)

def forward_model(self, data):
def forward_model(self, data, is_train=True):

batch_size = data.batch_size
node_idx = data.n_id
Expand All @@ -165,9 +165,14 @@ def forward_model(self, data):
h = self.model(x, edge_index)

target = s if self.recon_s else x
score = torch.mean(self.model.loss_func(target[:batch_size],
h[:batch_size],
reduction='none'), dim=1)
if 'active_mask' in data.keys():
score = torch.mean(self.model.loss_func(target[:batch_size][data.active_mask,:],
h[:batch_size][data.active_mask,:],
reduction='none'), dim=1)
else:
score = torch.mean(self.model.loss_func(target[:batch_size],
h[:batch_size],
reduction='none'), dim=1)

loss = torch.mean(score)

Expand Down
20 changes: 13 additions & 7 deletions pygod/detector/guide.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def init_model(self, **kwargs):
act=self.act,
**kwargs).to(self.device)

def forward_model(self, data):
def forward_model(self, data, is_train=True):

batch_size = data.batch_size

Expand All @@ -193,12 +193,18 @@ def forward_model(self, data):
edge_index = data.edge_index.to(self.device)

x_, s_ = self.model(x, s, edge_index)

score = self.model.loss_func(x[:batch_size],
x_[:batch_size],
s[:batch_size],
s_[:batch_size],
self.alpha)
if 'active_mask' in data.keys():
score = self.model.loss_func(x[:batch_size][data.active_mask, :],
x_[:batch_size][data.active_mask, :],
s[:batch_size][data.active_mask, :],
s_[:batch_size][data.active_mask, :],
self.alpha)
else:
score = self.model.loss_func(x[:batch_size],
x_[:batch_size],
s[:batch_size],
s_[:batch_size],
self.alpha)

loss = torch.mean(score)

Expand Down
Loading