import torch
import pandas as pd
from torch.utils.data import DataLoader
from CNN import InfernoCalibNet, ChestXRayDataset, OUT_DIR, CALIB_DIR
def evaluate_and_prepare_inferno():
torch.cuda.empty_cache()
df_meta = pd.read_csv(OUT_DIR / "ml_test.csv")
test_dt = ChestXRayDataset(OUT_DIR / "ml_test.csv", transform=False)
test_loader = DataLoader(
test_dt, batch_size=32, shuffle=False, num_workers=8, pin_memory=True
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = InfernoCalibNet(num_classes=2, model_type="resnet50").to(device)
model.load_state_dict(torch.load(CALIB_DIR / "InfernoCalibNetML50.pth", weights_only=True))
model.eval()
all_preds = []
all_targets = []
with torch.no_grad():
for inputs, targets in test_loader:
inputs = inputs.to(device)
targets = targets.to(device)
outputs = model(inputs)
all_preds.append(outputs.cpu())
all_targets.append(targets.cpu())
preds_tensor = torch.cat(all_preds)
targets_tensor = torch.cat(all_targets)
df_meta = df_meta.reset_index(drop=True)
df_meta["LOGIT_EFFUSION"] = preds_tensor[:, 0].numpy()
df_meta["LABEL_EFFUSION"] = targets_tensor[:, 0].numpy().astype(int)
df_meta["LOGIT_ATELECTASIS"] = preds_tensor[:, 1].numpy()
df_meta["LABEL_ATELECTASIS"] = targets_tensor[:, 1].numpy().astype(int)
df_meta.to_csv(CALIB_DIR / "calibration_full.csv", index=False)
loss_fn = torch.nn.BCEWithLogitsLoss()
total_loss = loss_fn(preds_tensor, targets_tensor).item()
return df_meta, preds_tensor, targets_tensor, total_loss
def stratified_sample(df, label_col, sample_size, seed=42):
from sklearn.model_selection import train_test_split
counts = df[label_col].value_counts()
valid_classes = counts[counts >= 2].index
filtered_df = df[df[label_col].isin(valid_classes)]
stratified_df, remainder_df = train_test_split(
filtered_df,
train_size=sample_size,
stratify=filtered_df[label_col],
random_state=seed
)
return stratified_df, remainder_df
df, preds, targets, loss = evaluate_and_prepare_inferno()
sampled_df, remainder_df = stratified_sample(df, label_col="DISEASELABEL", sample_size=1500)
# sampled_df, remainder_df = stratified_sample(df, label_col="LABEL_ATELECTASIS", sample_size=1500)
sampled_df.to_csv(CALIB_DIR / "calibration_train.csv", index=False)
remainder_df.to_csv(CALIB_DIR / "calibration_test.csv", index=False)