Skip to content

[BUG] AptaNet only seems to be learning either 0 or 1 #144

@satvshr

Description

@satvshr

For the AptaTrans dataset when I run a Benchmarking experiment on AptaNet, it only predicts 0's (negatives) and after using the balancing below, only 1s.

When I run this script:

import numpy as np
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import KFold

from pyaptamer.aptanet import AptaNetPipeline
from pyaptamer.datasets import load_csv_dataset
from pyaptamer.benchmarking import Benchmarking  # adjust import path if needed


def load_dataset():
    # Load dataset: aptamer, protein, label
    X_raw, y_raw = load_csv_dataset("train_li2014", "label", return_X_y=True)

    # Build (aptamer, protein) pairs
    X = list(zip(X_raw.iloc[:, 0], X_raw.iloc[:, 1]))

    # Force labels into a flat numpy int array
    y = np.where(np.array(y_raw) == "positive", 1, 0).astype(int).ravel()

    return X, y

X, y = load_dataset()
print("=== Dataset Distribution ===")
print("Total samples:", len(y))
print("Label distribution:", np.bincount(y).tolist())


# === Estimator ===
clf = AptaNetPipeline()

# === Cross-validation ===
cv = KFold(n_splits=3, shuffle=True, random_state=1337)

# === Benchmark ===
bench = Benchmarking(
    estimators=[clf],
    metrics=[accuracy_score, f1_score],
    X=X,
    y=y,
    cv=cv,
)

results = bench.run()

print("\n=== Benchmark Results ===")
print(results)

These are the results I get:

=== Dataset Distribution ===
Total samples: 2320
Label distribution: [1740, 580]

=== Benchmark Results ===
                                train  test
estimator       metric
AptaNetPipeline accuracy_score   0.75  0.75
                f1_score         0.00  0.00

Another testing script I ran, code:

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.metrics import accuracy_score
from skorch import NeuralNetBinaryClassifier
from pyaptamer.aptanet._aptanet_nn import AptaNetMLP  # adjust import if needed


def torch_version(X_np, y_np, n_features):
    print("\n=== Torch version ===")
    X = torch.from_numpy(X_np)
    y = torch.from_numpy(y_np).unsqueeze(1)

    # Model
    model = AptaNetMLP(
        input_dim=n_features,
        hidden_dim=64,
        n_hidden=3,
        dropout=0.3,
        output_dim=1,
        use_lazy=False,
    )

    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.RMSprop(model.parameters(), lr=0.01)

    # --- Before training ---
    with torch.no_grad():
        probs = torch.sigmoid(model(X))
        preds = (probs > 0.5).long()
    print("[Before training]")
    print("Pred labels distribution:", torch.bincount(preds.squeeze()).tolist())
    print("First 10 probabilities:\n", probs[:10].squeeze())

    # --- Training ---
    epochs, batch_size = 5, 64
    for epoch in range(epochs):
        model.train()
        perm = torch.randperm(X.size(0))
        for i in range(0, X.size(0), batch_size):
            idx = perm[i:i+batch_size]
            xb, yb = X[idx], y[idx]
            optimizer.zero_grad()
            logits = model(xb)
            loss = criterion(logits, yb)
            loss.backward()
            optimizer.step()

    # --- After training ---
    model.eval()
    with torch.no_grad():
        probs = torch.sigmoid(model(X))
        preds = (probs > 0.5).long()
    print("[After training]")
    print("Pred labels distribution:", torch.bincount(preds.squeeze()).tolist())
    print("First 10 probabilities:\n", probs[:10].squeeze())
    print("Accuracy:", accuracy_score(y_np, preds.numpy()))


def skorch_version(X_np, y_np, n_features):
    print("\n=== Skorch version ===")
    net = NeuralNetBinaryClassifier(
        module=AptaNetMLP,
        module__input_dim=None,   # lazy
        module__hidden_dim=64,
        module__n_hidden=3,
        module__dropout=0.3,
        module__output_dim=1,
        module__use_lazy=True,
        criterion=nn.BCEWithLogitsLoss,
        max_epochs=5,
        lr=0.01,
        optimizer=torch.optim.RMSprop,
        optimizer__alpha=0.9,
        optimizer__eps=1e-08,
        device="cuda" if torch.cuda.is_available() else "cpu",
        verbose=1,
    )

    # --- Before training ---
    net.initialize()
    probs = net.predict_proba(X_np)
    preds = net.predict(X_np)
    print("[Before training]")
    print("Pred labels distribution:", np.bincount(preds).tolist())
    print("First 10 probabilities:\n", probs[:10])

    # --- Training ---
    net.fit(X_np, y_np)

    # --- After training ---
    preds = net.predict(X_np)
    probs = net.predict_proba(X_np)
    print("[After training]")
    print("Pred labels distribution:", np.bincount(preds).tolist())
    print("First 10 probabilities:\n", probs[:10])
    print("Accuracy:", accuracy_score(y_np, preds))


def main():
    # Shared dataset (rule-based so it's learnable)
    n_samples, n_features = 1000, 50
    X_np = np.random.randint(0, 10, size=(n_samples, n_features)).astype(np.float32)
    threshold = n_features * 4.5
    y_np = (X_np.sum(axis=1) > threshold).astype(np.float32)

    # Print dataset distribution before training
    print("=== Dataset Distribution ===")
    print("Total samples:", n_samples)
    print("Label distribution:", np.bincount(y_np.astype(int)).tolist())

    torch_version(X_np, y_np, n_features)
    skorch_version(X_np, y_np, n_features)


if __name__ == "__main__":
    main()

Results:

=== Dataset Distribution ===
Total samples: 1000
Label distribution: [527, 473]

=== Torch version ===
[Before training]
Pred labels distribution: [518, 482]
First 10 probabilities:
 tensor([0.5343, 0.4396, 0.6549, 0.3951, 0.5012, 0.4149, 0.5440, 0.2810, 0.6070,
        0.4040])
[After training]
Pred labels distribution: [5, 995]
First 10 probabilities:
 tensor([0.5303, 0.5734, 0.5597, 0.5510, 0.5734, 0.5280, 0.5368, 0.5469, 0.5409,
        0.5373])
Accuracy: 0.478

=== Skorch version ===
[Before training]
Pred labels distribution: [999, 1]
First 10 probabilities:
 [[0.5201893  0.47981068]
 [0.5381153  0.46188468]
 [0.51390857 0.48609143]
 [0.5105337  0.48946628]
 [0.52273107 0.4772689 ]
 [0.5260486  0.4739514 ]
 [0.5122141  0.48778588]
 [0.5266731  0.4733269 ]
 [0.51584256 0.48415747]
 [0.50843424 0.49156576]]
Re-initializing module because the following parameters were re-set: dropout, hidden_dim, input_dim, n_hidden, output_dim, use_lazy.
Re-initializing criterion.
Re-initializing optimizer.
  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        0.7199       0.4700        0.6972  0.0923
      2        0.7111       0.4700        0.7079  0.0299
      3        0.6953       0.4700        0.7038  0.0187
      4        0.7110       0.5100        0.6921  0.0243
      5        0.7007       0.4700        0.6995  0.0326
[After training]
Pred labels distribution: [0, 1000]
First 10 probabilities:
 [[0.43433756 0.56566244]
 [0.42910838 0.5708916 ]
 [0.43987745 0.56012255]
 [0.45354545 0.54645455]
 [0.4166276  0.5833724 ]
 [0.4298364  0.5701636 ]
 [0.43892968 0.5610703 ]
 [0.4603036  0.5396964 ]
 [0.45096928 0.5490307 ]
 [0.4541608  0.5458392 ]]
Accuracy: 0.473

The discussion thread can be followed starting from this comment.

The AptaNet network itself seems to be heavily favouring one class even though the dataset is randomly generated and balanced, which makes no sense. The skorch network seems to be predicting extreme values too (either all engatives or positives) both before and after training, which should not be happening.

Metadata

Metadata

Assignees

Labels

bugSomething isn't working

Type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions