We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent c202b15 commit 7c6088fCopy full SHA for 7c6088f
src/helm/benchmark/reeval_runner.py
@@ -82,7 +82,7 @@ def _estimate_model_ability(
82
) -> float:
83
def closure():
84
optim.zero_grad()
85
- probs = torch.sigmoid(ability - difficulties)
+ probs = torch.sigmoid(ability + difficulties)
86
loss = -torch.distributions.Bernoulli(probs=probs).log_prob(responses).mean()
87
loss.backward()
88
return loss
0 commit comments