import optuna
def objective(trial):
# 1) 옵티마이저 선택
optimizer_name = trial.suggest_categorical("optimizer", ["adam", "adamw", "sgd"])
# 2) 옵티마이저별 학습률 범위 설정
if optimizer_name == "adam":
lr = trial.suggest_float("lr", 1e-4, 1e-3, log=True)
elif optimizer_name == "adamw":
lr = trial.suggest_float("lr", 1e-5, 5e-4, log=True)
else: # SGD
lr = trial.suggest_float("lr", 1e-3, 5e-2, log=True)
# 3) 그 외 하이퍼파라미터 설정
lstm_hidden_size = trial.suggest_categorical("lstm_hidden_size", [256, 512])
fc_hidden_size = trial.suggest_categorical("fc_hidden_size", [128, 256])
dropout_rate = trial.suggest_categorical("dropout", [0.1, 0.2, 0.3])
batch_size = trial.suggest_int("batch_size", 16, 128, step=16)
all_val_losses = []
train_params = {
"batch_size": batch_size,
"shuffle": True,
"num_workers": 4,
"pin_memory": cfg.use_cuda,
"persistent_workers": cfg.use_cuda,
"prefetch_factor": 2 if cfg.use_cuda else None,
"drop_last": True,
}
test_params = {
"batch_size": batch_size,
"shuffle": False,
"num_workers": 0,
"pin_memory": cfg.use_cuda
}
for fold in range(5):
print(f"\n===== Fold {fold} =====")
# === 1. Fold split ===
train_fold_df = train_df[train_df['fold'] != fold]
val_fold_df = train_df[train_df['fold'] == fold]
# === 2. DataLoader 준비 ===
train_loader = get_augmented_train_loader(train_fold_df, params=train_params)
val_dataset, _ = generate_sequence_dataset(val_fold_df)
val_loader = DataLoader(val_dataset, **test_params)
# === 3. 모델 및 옵티마이저 정의 ===
model = DecoderRNN(lstm_hidden_size, fc_hidden_size, dropout_rate).to(cfg.device)
optimizer = get_optimizer(model, optimizer_name, lr)
# === 4. Class Weight 계산 ===
class_counts = train_fold_df['label'].value_counts()
weights = [1.0 / class_counts.get(label, 1) for label in cfg.labels]
class_weights = torch.tensor(weights, dtype=torch.float32).to(cfg.device)
criterion = get_criterion(class_weights=class_weights)
# === 5. 학습 및 평가 ===
_, _, val_losses, _ = train_and_validate(
model, train_loader, val_loader, optimizer, criterion, fold,
earlystop_patience=7, epochs=30,
scheduler=None
)
all_val_losses.append(min(val_losses)) # fold별 최소 val_loss 기록
return np.mean(all_val_losses) # Optuna가 최소화할 성능 기준
# Optuna 스터디 생성: minimize 모드로 val_loss 최적화
study = optuna.create_study(
study_name="bi_lstm_attention_tuning",
direction="minimize",
sampler=optuna.samplers.TPESampler(seed=cfg.random_seed) # 재현성을 위한 시드 설정
)
# 하이퍼파라미터 탐색 시작
study.optimize(
objective, # 앞서 작성한 objective 함수
n_trials=70, # 원하는 실험 횟수
timeout=None # 시간 제한 없애고 에포크, patience로 조절
)
# 최적 결과 출력
print("Best trial:")
best_trial = study.best_trial
print(f" Value: {best_trial.value:.4f}")
print(" Params: ")
for key, value in best_trial.params.items():
print(f" {key}: {value}")