Token | us |
Token ID | 385 |
Feature activation | +2.28e-01 |
Pos logit contributions | |
ose | +0.324 |
Bs | +0.315 |
ahn | +0.314 |
swick | +0.291 |
Adams | +0.284 |
Neg logit contributions | |
conservancy | -0.710 |
equality | -0.701 |
guaranteeing | -0.696 |
realistically | -0.695 |
illusion | -0.689 |
Token | hers |
Token ID | 7084 |
Feature activation | +5.20e-01 |
Pos logit contributions | |
equality | +2.608 |
conservancy | +2.590 |
realistically | +2.586 |
guaranteeing | +2.548 |
illusion | +2.537 |
Neg logit contributions | |
ose | -2.597 |
Bs | -2.545 |
ahn | -2.524 |
behavi | -2.468 |
swick | -2.451 |
Token | *) |
Token ID | 28104 |
Feature activation | +7.44e-01 |
Pos logit contributions | |
realistically | +6.069 |
guaranteeing | +5.987 |
conservancy | +5.942 |
equality | +5.884 |
2022 | +5.795 |
Neg logit contributions | |
ose | -5.525 |
ahn | -5.480 |
Bs | -5.476 |
swick | -5.331 |
Lear | -5.303 |
Token |
|
Token ID | 198 |
Feature activation | +2.57e-01 |
Pos logit contributions | |
conservancy | +6.470 |
realistically | +6.410 |
guaranteeing | +6.385 |
minimum | +6.287 |
equality | +6.226 |
Neg logit contributions | |
Bs | -9.619 |
ahn | -9.568 |
ose | -9.442 |
swick | -9.270 |
Js | -9.160 |
Token |
|
Token ID | 198 |
Feature activation | +7.48e-01 |
Pos logit contributions | |
equality | +2.428 |
realistically | +2.396 |
1080 | +2.357 |
guaranteeing | +2.317 |
completely | +2.302 |
Neg logit contributions | |
ose | -3.412 |
behavi | -3.409 |
ahn | -3.390 |
Bs | -3.387 |
swick | -3.300 |
Token | Total |
Token ID | 14957 |
Feature activation | +2.51e+00 |
Pos logit contributions | |
minimum | +7.018 |
1080 | +6.864 |
equality | +6.850 |
illusion | +6.724 |
completely | +6.602 |
Neg logit contributions | |
swick | -9.250 |
ahn | -9.194 |
behavi | -9.178 |
ose | -9.054 |
Bs | -8.865 |
Token | guaranteed |
Token ID | 11462 |
Feature activation | +9.35e-01 |
Pos logit contributions | |
guaranteed | +17.352 |
guarantees | +16.934 |
guaranteeing | +16.864 |
realistically | +15.916 |
compensated | +15.154 |
Neg logit contributions | |
behavi | -27.498 |
swick | -26.636 |
ThumbnailImage | -25.420 |
ッド | -25.185 |
Bs | -24.143 |
Token | at |
Token ID | 379 |
Feature activation | +6.12e-01 |
Pos logit contributions | |
realistically | +5.638 |
guaranteeing | +5.490 |
equality | +5.237 |
1080 | +5.141 |
2022 | +5.019 |
Neg logit contributions | |
behavi | -14.473 |
ッド | -14.348 |
swick | -14.293 |
ose | -14.270 |
Bs | -14.034 |
Token | signing |
Token ID | 8415 |
Feature activation | +3.74e-01 |
Pos logit contributions | |
guaranteeing | +6.554 |
realistically | +6.149 |
2022 | +5.979 |
equality | +5.939 |
minimum | +5.705 |
Neg logit contributions | |
ose | -7.482 |
Bs | -7.473 |
swick | -7.397 |
behavi | -7.332 |
ッド | -7.323 |
Token | : |
Token ID | 25 |
Feature activation | +6.59e-01 |
Pos logit contributions | |
realistically | +3.550 |
equality | +3.514 |
1080 | +3.457 |
guaranteeing | +3.449 |
2022 | +3.397 |
Neg logit contributions | |
ose | -4.913 |
behavi | -4.863 |
ahn | -4.836 |
Bs | -4.835 |
swick | -4.762 |
Token | $ |
Token ID | 720 |
Feature activation | +5.24e-01 |
Pos logit contributions | |
guaranteeing | +5.837 |
realistically | +5.801 |
2022 | +5.677 |
conservancy | +5.669 |
equality | +5.633 |
Neg logit contributions | |
Bs | -8.681 |
ose | -8.544 |
swick | -8.498 |
Js | -8.402 |
ahn | -8.377 |
Token | running |
Token ID | 2491 |
Feature activation | -1.80e-01 |
Pos logit contributions | |
guaranteeing | +3.212 |
realistically | +3.199 |
conservancy | +3.183 |
equality | +3.174 |
minimum | +3.078 |
Neg logit contributions | |
ose | -2.436 |
ahn | -2.408 |
Bs | -2.371 |
swick | -2.305 |
ッド | -2.289 |
Token | backs |
Token ID | 12983 |
Feature activation | +4.85e-01 |
Pos logit contributions | |
Bs | +2.259 |
ose | +2.243 |
ahn | +2.205 |
swick | +2.103 |
Js | +2.101 |
Neg logit contributions | |
conservancy | -1.959 |
equality | -1.892 |
illusion | -1.864 |
guaranteeing | -1.846 |
realistically | -1.831 |
Token | ) |
Token ID | 8 |
Feature activation | +2.25e-01 |
Pos logit contributions | |
realistically | +5.608 |
conservancy | +5.567 |
guaranteeing | +5.538 |
equality | +5.448 |
minimum | +5.374 |
Neg logit contributions | |
ose | -5.236 |
Bs | -5.207 |
ahn | -5.140 |
swick | -5.020 |
Js | -4.966 |
Token |
|
Token ID | 198 |
Feature activation | +1.34e-01 |
Pos logit contributions | |
equality | +2.211 |
conservancy | +2.206 |
realistically | +2.205 |
guaranteeing | +2.187 |
1080 | +2.133 |
Neg logit contributions | |
ose | -2.910 |
Bs | -2.889 |
ahn | -2.866 |
swick | -2.768 |
ッド | -2.752 |
Token |
|
Token ID | 198 |
Feature activation | +5.61e-01 |
Pos logit contributions | |
equality | +1.351 |
realistically | +1.335 |
conservancy | +1.320 |
guaranteeing | +1.312 |
1080 | +1.304 |
Neg logit contributions | |
ose | -1.732 |
Bs | -1.714 |
ahn | -1.711 |
swick | -1.656 |
ッド | -1.641 |
Token | Total |
Token ID | 14957 |
Feature activation | +2.43e+00 |
Pos logit contributions | |
minimum | +5.081 |
equality | +5.067 |
1080 | +4.985 |
illusion | +4.887 |
conservancy | +4.856 |
Neg logit contributions | |
ahn | -7.306 |
swick | -7.289 |
ose | -7.287 |
behavi | -7.170 |
Bs | -7.067 |
Token | guaranteed |
Token ID | 11462 |
Feature activation | +9.55e-01 |
Pos logit contributions | |
guaranteed | +16.297 |
guaranteeing | +15.890 |
guarantees | +15.541 |
realistically | +14.849 |
compensated | +14.023 |
Neg logit contributions | |
behavi | -28.789 |
swick | -27.334 |
ThumbnailImage | -26.811 |
ッド | -26.047 |
entreprene | -25.174 |
Token | at |
Token ID | 379 |
Feature activation | +5.41e-01 |
Pos logit contributions | |
realistically | +5.521 |
guaranteeing | +5.312 |
equality | +5.053 |
1080 | +5.000 |
2022 | +4.840 |
Neg logit contributions | |
behavi | -15.189 |
ッド | -14.898 |
swick | -14.766 |
ose | -14.765 |
Bs | -14.511 |
Token | signing |
Token ID | 8415 |
Feature activation | +3.52e-01 |
Pos logit contributions | |
guaranteeing | +5.935 |
realistically | +5.598 |
equality | +5.441 |
2022 | +5.420 |
minimum | +5.225 |
Neg logit contributions | |
ose | -6.549 |
Bs | -6.536 |
swick | -6.453 |
ッド | -6.390 |
behavi | -6.389 |
Token | : |
Token ID | 25 |
Feature activation | +4.95e-01 |
Pos logit contributions | |
realistically | +3.350 |
equality | +3.324 |
1080 | +3.268 |
guaranteeing | +3.248 |
2022 | +3.207 |
Neg logit contributions | |
ose | -4.627 |
behavi | -4.601 |
ahn | -4.557 |
Bs | -4.547 |
swick | -4.484 |
Token | $ |
Token ID | 720 |
Feature activation | +4.64e-01 |
Pos logit contributions | |
realistically | +4.388 |
2022 | +4.338 |
equality | +4.283 |
guaranteeing | +4.283 |
1080 | +4.193 |
Neg logit contributions | |
ose | -6.687 |
Bs | -6.646 |
behavi | -6.611 |
ahn | -6.514 |
swick | -6.488 |
Token | . |
Token ID | 13 |
Feature activation | +6.57e-01 |
Pos logit contributions | |
conservancy | +3.224 |
equality | +3.210 |
guaranteeing | +3.177 |
realistically | +3.154 |
illusion | +3.125 |
Neg logit contributions | |
ose | -3.334 |
Bs | -3.315 |
ahn | -3.315 |
swick | -3.178 |
Adams | -3.169 |
Token | special |
Token ID | 2041 |
Feature activation | +7.36e-01 |
Pos logit contributions | |
equality | +8.067 |
guaranteeing | +7.991 |
realistically | +7.928 |
mbudsman | +7.879 |
minimum | +7.847 |
Neg logit contributions | |
Bs | -6.208 |
ahn | -6.206 |
ose | -6.190 |
swick | -5.913 |
Adams | -5.897 |
Token | representative |
Token ID | 8852 |
Feature activation | +1.57e+00 |
Pos logit contributions | |
mbudsman | +6.131 |
guaranteeing | +6.128 |
equality | +6.021 |
realistically | +5.919 |
conservancy | +5.851 |
Neg logit contributions | |
Bs | -9.878 |
ose | -9.572 |
ahn | -9.507 |
swick | -9.462 |
ッド | -9.363 |
Token | for |
Token ID | 329 |
Feature activation | +1.22e+00 |
Pos logit contributions | |
Claudia | +15.377 |
guaranteeing | +14.333 |
2022 | +14.104 |
realistically | +13.995 |
Did | +13.790 |
Neg logit contributions | |
behavi | -15.089 |
Lear | -14.371 |
swick | -14.057 |
Bs | -14.029 |
ッド | -14.020 |
Token | sexual |
Token ID | 3206 |
Feature activation | +8.44e-01 |
Pos logit contributions | |
equality | +12.912 |
guaranteeing | +12.521 |
realistically | +11.815 |
devastated | +11.760 |
2022 | +11.709 |
Neg logit contributions | |
Adams | -12.358 |
Lear | -12.109 |
ahn | -11.970 |
Bs | -11.941 |
Fle | -11.933 |
Token | violence |
Token ID | 3685 |
Feature activation | +2.39e+00 |
Pos logit contributions | |
equality | +8.469 |
guaranteeing | +8.270 |
realistically | +8.055 |
mbudsman | +7.732 |
2022 | +7.722 |
Neg logit contributions | |
Bs | -10.126 |
Flavoring | -9.773 |
ahn | -9.752 |
Lear | -9.731 |
Adams | -9.663 |
Token | in |
Token ID | 287 |
Feature activation | +1.28e+00 |
Pos logit contributions | |
Claudia | +21.205 |
equality | +19.819 |
guaranteeing | +19.743 |
realistically | +19.739 |
2022 | +19.096 |
Neg logit contributions | |
swick | -16.229 |
ッド | -16.220 |
Lear | -15.913 |
Flavoring | -15.655 |
Js | -15.409 |
Token | conflict |
Token ID | 5358 |
Feature activation | +1.93e+00 |
Pos logit contributions | |
equality | +13.368 |
guaranteeing | +12.568 |
2022 | +12.498 |
devastated | +11.891 |
adequ | +11.847 |
Neg logit contributions | |
Adams | -13.039 |
Flavoring | -12.909 |
ッド | -12.508 |
Bs | -12.456 |
Js | -12.366 |
Token | , |
Token ID | 11 |
Feature activation | +1.88e+00 |
Pos logit contributions | |
Claudia | +18.577 |
realistically | +17.674 |
guaranteeing | +17.528 |
equality | +17.466 |
devastated | +17.216 |
Neg logit contributions | |
Lear | -14.933 |
swick | -14.352 |
ッド | -14.330 |
Flavoring | -13.774 |
ahn | -13.699 |
Token | Z |
Token ID | 1168 |
Feature activation | +3.18e-01 |
Pos logit contributions | |
Claudia | +18.739 |
guaranteeing | +17.684 |
equality | +17.251 |
realistically | +17.184 |
2022 | +16.803 |
Neg logit contributions | |
Lear | -14.181 |
Bs | -14.144 |
swick | -13.863 |
ose | -13.789 |
ッド | -13.472 |
Token | ain |
Token ID | 391 |
Feature activation | +1.52e-01 |
Pos logit contributions | |
equality | +4.194 |
conservancy | +4.138 |
illusion | +4.116 |
realistically | +4.106 |
guaranteeing | +4.074 |
Neg logit contributions | |
ose | -2.941 |
Bs | -2.937 |
ッド | -2.850 |
ahn | -2.844 |
swick | -2.828 |
Token |
|
Token ID | 198 |
Feature activation | +1.97e-01 |
Pos logit contributions | |
conservancy | +8.829 |
realistically | +8.733 |
equality | +8.630 |
guaranteeing | +8.546 |
2022 | +8.432 |
Neg logit contributions | |
Bs | -12.210 |
ose | -12.074 |
ahn | -11.877 |
Js | -11.570 |
ッド | -11.537 |
Token |
|
Token ID | 198 |
Feature activation | +1.03e+00 |
Pos logit contributions | |
equality | +1.916 |
realistically | +1.897 |
1080 | +1.861 |
guaranteeing | +1.851 |
conservancy | +1.846 |
Neg logit contributions | |
ose | -2.582 |
ahn | -2.557 |
Bs | -2.555 |
behavi | -2.506 |
swick | -2.478 |
Token | " |
Token ID | 1 |
Feature activation | +8.74e-01 |
Pos logit contributions | |
1080 | +11.971 |
illusion | +11.916 |
completely | +11.871 |
Seriously | +11.869 |
minimum | +11.813 |
Neg logit contributions | |
behavi | -9.828 |
Flavoring | -9.024 |
ahn | -8.812 |
ose | -8.750 |
swick | -8.744 |
Token | I |
Token ID | 40 |
Feature activation | +1.02e-01 |
Pos logit contributions | |
1080 | +10.155 |
Seriously | +10.049 |
realistically | +9.993 |
completely | +9.889 |
minimum | +9.850 |
Neg logit contributions | |
behavi | -8.574 |
ose | -8.369 |
ahn | -8.256 |
Flavoring | -7.999 |
swick | -7.917 |
Token | can |
Token ID | 460 |
Feature activation | +9.44e-01 |
Pos logit contributions | |
conservancy | +1.108 |
realistically | +1.104 |
equality | +1.094 |
guaranteeing | +1.094 |
illusion | +1.068 |
Neg logit contributions | |
ose | -1.237 |
Bs | -1.230 |
ahn | -1.222 |
swick | -1.181 |
ッド | -1.178 |
Token | 't |
Token ID | 470 |
Feature activation | +2.23e+00 |
Pos logit contributions | |
realistically | +10.398 |
guaranteeing | +9.720 |
completely | +9.223 |
belie | +9.165 |
1080 | +8.840 |
Neg logit contributions | |
ッド | -11.048 |
behavi | -10.667 |
Bs | -10.538 |
Adams | -10.434 |
Lear | -10.383 |
Token | speak |
Token ID | 2740 |
Feature activation | +7.30e-01 |
Pos logit contributions | |
realistically | +21.667 |
guaranteeing | +19.352 |
remotely | +18.717 |
EVER | +18.271 |
belie | +18.227 |
Neg logit contributions | |
behavi | -20.575 |
ッド | -20.530 |
Lear | -20.043 |
swick | -19.339 |
ThumbnailImage | -19.148 |
Token | for |
Token ID | 329 |
Feature activation | +6.48e-01 |
Pos logit contributions | |
realistically | +7.649 |
guaranteeing | +7.252 |
completely | +6.980 |
equality | +6.821 |
conservancy | +6.710 |
Neg logit contributions | |
swick | -8.876 |
Bs | -8.741 |
ahn | -8.600 |
ose | -8.543 |
Flavoring | -8.471 |
Token | the |
Token ID | 262 |
Feature activation | +6.03e-01 |
Pos logit contributions | |
realistically | +6.540 |
guaranteeing | +6.527 |
conservancy | +6.235 |
2022 | +6.213 |
completely | +6.110 |
Neg logit contributions | |
ose | -7.889 |
Bs | -7.716 |
ahn | -7.635 |
swick | -7.633 |
ッド | -7.469 |
Token | Yog |
Token ID | 31604 |
Feature activation | -2.75e-01 |
Pos logit contributions | |
realistically | +6.602 |
guaranteeing | +6.483 |
conservancy | +6.432 |
2022 | +6.272 |
completely | +6.237 |
Neg logit contributions | |
ose | -6.811 |
Bs | -6.800 |
ahn | -6.710 |
swick | -6.494 |
ッド | -6.455 |
Token | sc |
Token ID | 1416 |
Feature activation | -7.37e-01 |
Pos logit contributions | |
ose | +2.663 |
Bs | +2.592 |
ahn | +2.579 |
swick | +2.453 |
Adams | +2.425 |
Neg logit contributions | |
equality | -3.651 |
guaranteeing | -3.643 |
conservancy | -3.642 |
realistically | -3.602 |
illusion | -3.532 |
Token | the |
Token ID | 262 |
Feature activation | +2.96e-01 |
Pos logit contributions | |
realistically | +2.214 |
guaranteeing | +2.181 |
conservancy | +2.172 |
equality | +2.156 |
illusion | +2.106 |
Neg logit contributions | |
ose | -2.329 |
ahn | -2.310 |
Bs | -2.304 |
swick | -2.231 |
ッド | -2.199 |
Token | loss |
Token ID | 2994 |
Feature activation | +1.44e+00 |
Pos logit contributions | |
realistically | +3.548 |
guaranteeing | +3.501 |
equality | +3.491 |
conservancy | +3.474 |
illusion | +3.437 |
Neg logit contributions | |
Bs | -3.220 |
ose | -3.218 |
ahn | -3.172 |
swick | -3.090 |
Adams | -3.024 |
Token | at |
Token ID | 379 |
Feature activation | +5.92e-01 |
Pos logit contributions | |
guaranteeing | +13.902 |
realistically | +13.851 |
completely | +12.970 |
belie | +12.693 |
equality | +12.673 |
Neg logit contributions | |
ahn | -13.831 |
ose | -13.592 |
Bs | -13.481 |
ッド | -13.378 |
swick | -13.350 |
Token | Cl |
Token ID | 1012 |
Feature activation | +4.98e-01 |
Pos logit contributions | |
guaranteeing | +6.370 |
realistically | +6.354 |
equality | +6.297 |
2022 | +6.068 |
conservancy | +6.048 |
Neg logit contributions | |
ahn | -6.775 |
ose | -6.727 |
Bs | -6.700 |
Adams | -6.460 |
swick | -6.435 |
Token | utch |
Token ID | 7140 |
Feature activation | +1.10e+00 |
Pos logit contributions | |
equality | +5.519 |
1080 | +5.360 |
illusion | +5.334 |
conservancy | +5.297 |
completely | +5.251 |
Neg logit contributions | |
behavi | -5.735 |
Bs | -5.669 |
ッド | -5.552 |
swick | -5.486 |
Lear | -5.486 |
Token | Con |
Token ID | 3103 |
Feature activation | +2.22e+00 |
Pos logit contributions | |
realistically | +9.924 |
guaranteeing | +9.769 |
completely | +9.724 |
equality | +9.639 |
illusion | +9.588 |
Neg logit contributions | |
ahn | -12.175 |
swick | -12.022 |
ose | -11.813 |
ッド | -11.691 |
Flavoring | -11.662 |
Token | definitely |
Token ID | 4753 |
Feature activation | +9.66e-01 |
Pos logit contributions | |
realistically | +19.608 |
guaranteeing | +19.058 |
devastated | +18.273 |
disqualified | +18.049 |
belie | +17.942 |
Neg logit contributions | |
Lear | -16.634 |
swick | -16.481 |
Bs | -16.409 |
Flavoring | -16.106 |
Adams | -15.891 |
Token | raises |
Token ID | 12073 |
Feature activation | +5.03e-01 |
Pos logit contributions | |
realistically | +9.830 |
guaranteeing | +9.594 |
belie | +9.318 |
disqualified | +9.086 |
devastated | +9.052 |
Neg logit contributions | |
Bs | -10.799 |
Flavoring | -10.556 |
ose | -10.520 |
swick | -10.499 |
ッド | -10.454 |
Token | a |
Token ID | 257 |
Feature activation | +4.42e-01 |
Pos logit contributions | |
realistically | +4.742 |
guaranteeing | +4.713 |
conservancy | +4.526 |
equality | +4.526 |
1080 | +4.480 |
Neg logit contributions | |
Bs | -6.588 |
swick | -6.557 |
ahn | -6.553 |
ose | -6.500 |
Lear | -6.317 |
Token | red |
Token ID | 2266 |
Feature activation | -3.57e-02 |
Pos logit contributions | |
realistically | +5.622 |
guaranteeing | +5.553 |
conservancy | +5.397 |
equality | +5.389 |
1080 | +5.369 |
Neg logit contributions | |
Bs | -4.356 |
ose | -4.338 |
ahn | -4.313 |
swick | -4.308 |
ッド | -4.091 |
Token | flag |
Token ID | 6056 |
Feature activation | +1.10e-01 |
Pos logit contributions | |
ose | +0.374 |
Bs | +0.368 |
ahn | +0.366 |
swick | +0.348 |
Adams | +0.343 |
Neg logit contributions | |
conservancy | -0.457 |
equality | -0.450 |
guaranteeing | -0.445 |
realistically | -0.445 |
illusion | -0.440 |
Token | team |
Token ID | 1074 |
Feature activation | +7.32e-01 |
Pos logit contributions | |
conservancy | +1.977 |
equality | +1.954 |
realistically | +1.945 |
guaranteeing | +1.932 |
illusion | +1.905 |
Neg logit contributions | |
ose | -2.023 |
Bs | -2.018 |
ahn | -1.998 |
swick | -1.930 |
ッド | -1.906 |
Token | � |
Token ID | 447 |
Feature activation | +5.65e-01 |
Pos logit contributions | |
realistically | +8.080 |
guaranteeing | +7.855 |
equality | +7.652 |
illusion | +7.618 |
1080 | +7.561 |
Neg logit contributions | |
ose | -7.868 |
ahn | -7.779 |
Bs | -7.659 |
swick | -7.520 |
Lear | -7.515 |
Token | � |
Token ID | 247 |
Feature activation | +6.43e-01 |
Pos logit contributions | |
equality | +7.911 |
realistically | +7.872 |
guaranteeing | +7.859 |
minimum | +7.760 |
conservancy | +7.714 |
Neg logit contributions | |
ose | -4.510 |
Bs | -4.413 |
ahn | -4.403 |
behavi | -4.276 |
swick | -4.271 |
Token | s |
Token ID | 82 |
Feature activation | +5.89e-01 |
Pos logit contributions | |
realistically | +9.198 |
equality | +9.063 |
guaranteeing | +9.037 |
minimum | +8.930 |
illusion | +8.911 |
Neg logit contributions | |
ose | -4.850 |
ahn | -4.798 |
Bs | -4.716 |
Flavoring | -4.577 |
Lear | -4.574 |
Token | base |
Token ID | 2779 |
Feature activation | +1.24e+00 |
Pos logit contributions | |
realistically | +6.725 |
guaranteeing | +6.624 |
conservancy | +6.514 |
equality | +6.506 |
illusion | +6.406 |
Neg logit contributions | |
Bs | -6.262 |
ose | -6.234 |
ahn | -6.215 |
swick | -6.185 |
ッド | -6.072 |
Token | ? |
Token ID | 30 |
Feature activation | +2.19e+00 |
Pos logit contributions | |
realistically | +12.257 |
guaranteeing | +11.811 |
equality | +11.231 |
??? | +11.206 |
????? | +11.074 |
Neg logit contributions | |
Lear | -12.777 |
Bs | -12.717 |
ahn | -12.596 |
ose | -12.516 |
swick | -12.364 |
Token | What |
Token ID | 1867 |
Feature activation | +1.51e+00 |
Pos logit contributions | |
Explain | +18.590 |
realistically | +18.282 |
Did | +17.904 |
1080 | +17.456 |
2022 | +16.879 |
Neg logit contributions | |
ahn | -17.570 |
behavi | -17.227 |
ose | -16.844 |
ッド | -16.622 |
swick | -16.495 |
Token | if |
Token ID | 611 |
Feature activation | +7.58e-01 |
Pos logit contributions | |
realistically | +15.207 |
guaranteeing | +14.296 |
????? | +14.034 |
??? | +14.014 |
1080 | +13.716 |
Neg logit contributions | |
swick | -14.639 |
ッド | -13.955 |
ahn | -13.560 |
Lear | -13.490 |
ose | -13.451 |
Token | an |
Token ID | 281 |
Feature activation | +4.43e-01 |
Pos logit contributions | |
realistically | +8.546 |
guaranteeing | +8.226 |
equality | +8.149 |
1080 | +7.873 |
illusion | +7.785 |
Neg logit contributions | |
ose | -8.123 |
ahn | -8.094 |
swick | -8.008 |
Bs | -8.001 |
ッド | -7.985 |
Token | ice |
Token ID | 4771 |
Feature activation | +2.66e-01 |
Pos logit contributions | |
realistically | +5.364 |
equality | +5.338 |
conservancy | +5.305 |
guaranteeing | +5.292 |
illusion | +5.232 |
Neg logit contributions | |
Bs | -4.578 |
ose | -4.517 |
ahn | -4.482 |
swick | -4.450 |
Js | -4.350 |
Token | biome |
Token ID | 48493 |
Feature activation | +1.27e+00 |
Pos logit contributions | |
conservancy | +2.993 |
realistically | +2.971 |
guaranteeing | +2.947 |
equality | +2.942 |
illusion | +2.893 |
Neg logit contributions | |
ose | -3.063 |
Bs | -3.026 |
ahn | -3.009 |
swick | -2.940 |
ッド | -2.904 |
Token | going |
Token ID | 1016 |
Feature activation | -1.49e-01 |
Pos logit contributions | |
ose | +1.887 |
ahn | +1.842 |
Bs | +1.834 |
swick | +1.700 |
Adams | +1.683 |
Neg logit contributions | |
conservancy | -3.261 |
equality | -3.190 |
illusion | -3.144 |
guaranteeing | -3.126 |
realistically | -3.117 |
Token | on |
Token ID | 319 |
Feature activation | +2.21e-01 |
Pos logit contributions | |
ose | +1.677 |
Bs | +1.646 |
ahn | +1.642 |
swick | +1.563 |
Adams | +1.540 |
Neg logit contributions | |
conservancy | -1.806 |
equality | -1.760 |
illusion | -1.739 |
guaranteeing | -1.734 |
realistically | -1.726 |
Token | , |
Token ID | 11 |
Feature activation | +4.96e-01 |
Pos logit contributions | |
realistically | +2.476 |
guaranteeing | +2.459 |
equality | +2.449 |
conservancy | +2.446 |
completely | +2.350 |
Neg logit contributions | |
ose | -2.587 |
Bs | -2.559 |
ahn | -2.526 |
swick | -2.480 |
ッド | -2.432 |
Token | but |
Token ID | 475 |
Feature activation | +4.77e-01 |
Pos logit contributions | |
realistically | +4.953 |
guaranteeing | +4.914 |
equality | +4.689 |
conservancy | +4.614 |
completely | +4.536 |
Neg logit contributions | |
ose | -6.334 |
Bs | -6.237 |
ahn | -6.170 |
ッド | -6.078 |
swick | -6.056 |
Token | who |
Token ID | 508 |
Feature activation | +7.94e-01 |
Pos logit contributions | |
realistically | +5.393 |
guaranteeing | +5.241 |
equality | +5.099 |
conservancy | +4.992 |
2022 | +4.915 |
Neg logit contributions | |
ose | -5.486 |
ahn | -5.438 |
ッド | -5.437 |
Bs | -5.383 |
swick | -5.374 |
Token | can |
Token ID | 460 |
Feature activation | +2.19e+00 |
Pos logit contributions | |
realistically | +6.765 |
guaranteeing | +6.597 |
conservancy | +6.010 |
completely | +5.990 |
artifacts | +5.976 |
Neg logit contributions | |
ッド | -10.542 |
Bs | -10.485 |
swick | -10.458 |
ahn | -10.308 |
ose | -10.297 |
Token | deny |
Token ID | 10129 |
Feature activation | +7.32e-01 |
Pos logit contributions | |
realistically | +21.193 |
guaranteeing | +18.313 |
remotely | +17.719 |
doubt | +17.507 |
EVER | +17.256 |
Neg logit contributions | |
behavi | -20.807 |
ッド | -20.364 |
Lear | -20.031 |
swick | -19.112 |
Adams | -18.793 |
Token | that |
Token ID | 326 |
Feature activation | +7.98e-01 |
Pos logit contributions | |
realistically | +7.345 |
guaranteeing | +7.309 |
equality | +7.158 |
2022 | +6.825 |
conservancy | +6.814 |
Neg logit contributions | |
swick | -8.771 |
ッド | -8.616 |
ahn | -8.564 |
Bs | -8.503 |
ose | -8.483 |
Token | she |
Token ID | 673 |
Feature activation | -5.50e-02 |
Pos logit contributions | |
realistically | +8.676 |
guaranteeing | +8.538 |
equality | +8.367 |
2022 | +8.151 |
????? | +8.071 |
Neg logit contributions | |
swick | -8.826 |
ッド | -8.813 |
ahn | -8.651 |
ose | -8.617 |
Bs | -8.517 |
Token | loves |
Token ID | 10408 |
Feature activation | +4.21e-01 |
Pos logit contributions | |
ose | +0.628 |
Bs | +0.618 |
ahn | +0.615 |
swick | +0.588 |
Adams | +0.580 |
Neg logit contributions | |
conservancy | -0.651 |
equality | -0.639 |
guaranteeing | -0.631 |
realistically | -0.629 |
illusion | -0.625 |
Token | Jen |
Token ID | 13364 |
Feature activation | +6.63e-01 |
Pos logit contributions | |
equality | +4.086 |
guaranteeing | +4.080 |
realistically | +4.077 |
conservancy | +3.979 |
completely | +3.837 |
Neg logit contributions | |
ose | -5.342 |
ahn | -5.338 |
Bs | -5.321 |
swick | -5.314 |
ッド | -5.186 |
Token | for |
Token ID | 329 |
Feature activation | +4.34e-01 |
Pos logit contributions | |
guaranteeing | +6.881 |
realistically | +6.578 |
equality | +6.474 |
conservancy | +6.372 |
1080 | +6.344 |
Neg logit contributions | |
Bs | -6.542 |
ahn | -6.433 |
ose | -6.429 |
swick | -6.384 |
ッド | -6.308 |
Token | T |
Token ID | 309 |
Feature activation | -1.37e-01 |
Pos logit contributions | |
guaranteeing | +4.495 |
realistically | +4.432 |
equality | +4.381 |
conservancy | +4.379 |
illusion | +4.234 |
Neg logit contributions | |
ose | -5.363 |
Bs | -5.345 |
ahn | -5.285 |
swick | -5.213 |
ッド | -5.126 |
Token | UG |
Token ID | 7340 |
Feature activation | +6.81e-01 |
Pos logit contributions | |
ose | +1.323 |
ahn | +1.290 |
Bs | +1.288 |
swick | +1.209 |
Adams | +1.202 |
Neg logit contributions | |
conservancy | -1.894 |
equality | -1.850 |
guaranteeing | -1.844 |
realistically | -1.833 |
illusion | -1.814 |
Token | ?" |
Token ID | 1701 |
Feature activation | +1.38e+00 |
Pos logit contributions | |
realistically | +7.174 |
guaranteeing | +7.130 |
equality | +6.826 |
????? | +6.788 |
2022 | +6.718 |
Neg logit contributions | |
ose | -7.735 |
ッド | -7.722 |
Bs | -7.709 |
swick | -7.682 |
ahn | -7.675 |
Token | one |
Token ID | 530 |
Feature activation | +1.01e+00 |
Pos logit contributions | |
realistically | +14.916 |
Claudia | +14.787 |
guaranteeing | +14.577 |
2022 | +14.474 |
WARN | +14.303 |
Neg logit contributions | |
Bs | -10.980 |
ッド | -10.971 |
ahn | -10.954 |
ose | -10.768 |
swick | -10.616 |
Token | backer |
Token ID | 37617 |
Feature activation | +2.18e+00 |
Pos logit contributions | |
realistically | +10.455 |
guaranteeing | +10.205 |
illusion | +10.037 |
1080 | +10.007 |
equality | +9.999 |
Neg logit contributions | |
Bs | -10.602 |
ahn | -10.422 |
ッド | -10.328 |
swick | -10.124 |
ose | -10.074 |
Token | asked |
Token ID | 1965 |
Feature activation | +6.82e-01 |
Pos logit contributions | |
realistically | +17.246 |
Claudia | +16.779 |
remotely | +16.732 |
devastated | +16.453 |
demanded | +16.378 |
Neg logit contributions | |
ッド | -19.767 |
ahn | -18.279 |
Bs | -18.249 |
Js | -17.911 |
Lear | -17.759 |
Token | . |
Token ID | 13 |
Feature activation | +1.04e+00 |
Pos logit contributions | |
realistically | +6.918 |
guaranteeing | +6.709 |
conservancy | +6.563 |
equality | +6.547 |
2022 | +6.426 |
Neg logit contributions | |
ッド | -8.021 |
Bs | -7.983 |
ahn | -7.889 |
ose | -7.859 |
swick | -7.715 |
Token |
|
Token ID | 198 |
Feature activation | +2.16e-01 |
Pos logit contributions | |
realistically | +8.820 |
equality | +8.747 |
conservancy | +8.681 |
guaranteeing | +8.435 |
completely | +8.428 |
Neg logit contributions | |
Bs | -12.274 |
ose | -12.066 |
ahn | -12.015 |
Js | -11.755 |
ッド | -11.753 |
Token |
|
Token ID | 198 |
Feature activation | +1.01e+00 |
Pos logit contributions | |
equality | +2.083 |
realistically | +2.058 |
1080 | +2.023 |
guaranteeing | +2.004 |
conservancy | +1.991 |
Neg logit contributions | |
ose | -2.838 |
ahn | -2.816 |
Bs | -2.812 |
behavi | -2.780 |
swick | -2.736 |
Token | " |
Token ID | 1 |
Feature activation | +8.74e-01 |
Pos logit contributions | |
1080 | +11.666 |
Seriously | +11.593 |
completely | +11.579 |
equality | +11.548 |
illusion | +11.489 |
Neg logit contributions | |
behavi | -9.999 |
Flavoring | -9.116 |
swick | -8.924 |
ahn | -8.868 |
ose | -8.675 |
Token | , |
Token ID | 11 |
Feature activation | +1.47e+00 |
Pos logit contributions | |
realistically | +5.914 |
guaranteeing | +5.662 |
equality | +5.497 |
illusion | +5.387 |
completely | +5.378 |
Neg logit contributions | |
Bs | -6.098 |
ose | -6.067 |
ahn | -5.986 |
Flavoring | -5.930 |
ッド | -5.920 |
Token | my |
Token ID | 616 |
Feature activation | +2.29e-01 |
Pos logit contributions | |
realistically | +15.255 |
guaranteeing | +14.730 |
devastated | +14.319 |
Claudia | +14.084 |
completely | +13.855 |
Neg logit contributions | |
swick | -13.608 |
ッド | -13.414 |
Flavoring | -13.402 |
ahn | -13.141 |
ose | -12.961 |
Token | heart |
Token ID | 2612 |
Feature activation | +4.06e-01 |
Pos logit contributions | |
realistically | +2.836 |
guaranteeing | +2.813 |
conservancy | +2.810 |
equality | +2.794 |
illusion | +2.763 |
Neg logit contributions | |
ose | -2.379 |
Bs | -2.364 |
ahn | -2.360 |
swick | -2.278 |
ッド | -2.237 |
Token | beats |
Token ID | 17825 |
Feature activation | +5.76e-01 |
Pos logit contributions | |
realistically | +4.763 |
guaranteeing | +4.755 |
equality | +4.651 |
completely | +4.592 |
illusion | +4.590 |
Neg logit contributions | |
ose | -4.410 |
Bs | -4.351 |
ahn | -4.305 |
Flavoring | -4.181 |
swick | -4.147 |
Token | rapidly |
Token ID | 8902 |
Feature activation | +5.40e-01 |
Pos logit contributions | |
realistically | +6.526 |
guaranteeing | +6.305 |
equality | +6.234 |
completely | +6.086 |
minimum | +5.972 |
Neg logit contributions | |
ose | -6.447 |
Bs | -6.314 |
ahn | -6.267 |
swick | -6.218 |
Flavoring | -6.176 |
Token | . |
Token ID | 13 |
Feature activation | +2.17e+00 |
Pos logit contributions | |
realistically | +5.799 |
guaranteeing | +5.727 |
equality | +5.613 |
completely | +5.378 |
minimum | +5.371 |
Neg logit contributions | |
Bs | -6.312 |
ose | -6.292 |
swick | -6.183 |
ahn | -6.116 |
Flavoring | -6.105 |
Token |
|
Token ID | 198 |
Feature activation | +2.07e-01 |
Pos logit contributions | |
Did | +16.974 |
Explain | +16.716 |
Claudia | +16.390 |
SHE | +16.319 |
THEY | +15.743 |
Neg logit contributions | |
behavi | -21.396 |
entreprene | -19.514 |
ッド | -18.857 |
ahn | -18.651 |
srf | -18.279 |
Token |
|
Token ID | 198 |
Feature activation | +1.76e+00 |
Pos logit contributions | |
equality | +2.004 |
realistically | +1.987 |
1080 | +1.951 |
guaranteeing | +1.933 |
conservancy | +1.922 |
Neg logit contributions | |
ose | -2.706 |
Bs | -2.680 |
ahn | -2.679 |
behavi | -2.640 |
swick | -2.604 |
Token | We |
Token ID | 1135 |
Feature activation | -4.71e-01 |
Pos logit contributions | |
Seriously | +17.325 |
1080 | +16.573 |
??? | +15.964 |
????? | +15.954 |
literally | +15.844 |
Neg logit contributions | |
behavi | -18.155 |
entreprene | -15.056 |
occas | -15.028 |
srf | -14.967 |
swick | -14.635 |
Token | . |
Token ID | 13 |
Feature activation | +6.39e-01 |
Pos logit contributions | |
ose | +5.485 |
ahn | +5.362 |
Bs | +5.233 |
Irish | +5.077 |
swick | +5.005 |
Neg logit contributions | |
conservancy | -5.533 |
equality | -5.276 |
guaranteeing | -5.162 |
artifacts | -5.151 |
illusion | -5.148 |
Token | Are |
Token ID | 4231 |
Feature activation | +5.21e-01 |
Pos logit contributions | |
realistically | +6.366 |
equality | +6.333 |
completely | +6.242 |
illusion | +6.203 |
literally | +6.155 |
Neg logit contributions | |
Bs | -7.531 |
ッド | -7.512 |
ose | -7.407 |
ahn | -7.363 |
Flavoring | -7.241 |
Token | Ik |
Token ID | 32840 |
Feature activation | +1.83e-02 |
Pos logit contributions | |
ose | +0.137 |
Bs | +0.136 |
ahn | +0.135 |
swick | +0.130 |
Adams | +0.128 |
Neg logit contributions | |
conservancy | -0.116 |
equality | -0.114 |
realistically | -0.113 |
guaranteeing | -0.113 |
illusion | -0.111 |
Token | ki |
Token ID | 4106 |
Feature activation | -1.84e-01 |
Pos logit contributions | |
conservancy | +0.291 |
equality | +0.288 |
realistically | +0.285 |
guaranteeing | +0.285 |
illusion | +0.283 |
Neg logit contributions | |
ose | -0.134 |
Bs | -0.132 |
ahn | -0.130 |
swick | -0.123 |
ッド | -0.119 |
Token | take |
Token ID | 1011 |
Feature activation | -8.17e-01 |
Pos logit contributions | |
ose | +1.999 |
Bs | +1.946 |
ahn | +1.946 |
swick | +1.849 |
Adams | +1.810 |
Neg logit contributions | |
conservancy | -2.309 |
equality | -2.257 |
illusion | -2.204 |
guaranteeing | -2.191 |
realistically | -2.182 |
Token | off |
Token ID | 572 |
Feature activation | +3.10e-01 |
Pos logit contributions | |
ose | +8.906 |
ahn | +8.533 |
Bs | +8.435 |
Irish | +8.166 |
Js | +7.958 |
Neg logit contributions | |
conservancy | -9.567 |
illusion | -8.959 |
equality | -8.895 |
mbudsman | -8.866 |
mbuds | -8.576 |
Token | her |
Token ID | 607 |
Feature activation | +3.73e-01 |
Pos logit contributions | |
realistically | +2.760 |
guaranteeing | +2.745 |
equality | +2.728 |
conservancy | +2.710 |
completely | +2.634 |
Neg logit contributions | |
ose | -4.268 |
Bs | -4.240 |
ahn | -4.240 |
swick | -4.123 |
Flavoring | -4.070 |
Token | clothes |
Token ID | 8242 |
Feature activation | +2.13e+00 |
Pos logit contributions | |
realistically | +4.055 |
guaranteeing | +4.013 |
equality | +3.974 |
conservancy | +3.944 |
illusion | +3.919 |
Neg logit contributions | |
ahn | -4.412 |
ose | -4.390 |
Bs | -4.377 |
ッド | -4.192 |
Adams | -4.179 |
Token | ? |
Token ID | 30 |
Feature activation | +1.12e+00 |
Pos logit contributions | |
realistically | +19.794 |
guaranteeing | +18.771 |
belie | +18.613 |
devastated | +18.213 |
completely | +18.165 |
Neg logit contributions | |
Flavoring | -17.008 |
Lear | -16.797 |
ッド | -16.672 |
Adams | -16.342 |
behavi | -16.132 |
Token | That |
Token ID | 1320 |
Feature activation | +6.71e-01 |
Pos logit contributions | |
Explain | +9.876 |
Claudia | +9.776 |
realistically | +9.728 |
Did | +9.550 |
SHE | +9.546 |
Neg logit contributions | |
behavi | -12.663 |
ose | -12.448 |
ahn | -12.420 |
Bs | -12.097 |
ッド | -11.965 |
Token | kind |
Token ID | 1611 |
Feature activation | -3.26e-02 |
Pos logit contributions | |
realistically | +7.245 |
guaranteeing | +7.086 |
equality | +6.708 |
completely | +6.698 |
illusion | +6.655 |
Neg logit contributions | |
Bs | -7.585 |
ose | -7.577 |
ッド | -7.562 |
Flavoring | -7.514 |
swick | -7.502 |
Token | of |
Token ID | 286 |
Feature activation | +2.96e-01 |
Pos logit contributions | |
ose | +0.364 |
Bs | +0.359 |
ahn | +0.357 |
swick | +0.341 |
Adams | +0.336 |
Neg logit contributions | |
conservancy | -0.394 |
equality | -0.387 |
guaranteeing | -0.383 |
realistically | -0.382 |
illusion | -0.378 |
Token | thing |
Token ID | 1517 |
Feature activation | +1.08e+00 |
Pos logit contributions | |
realistically | +3.663 |
guaranteeing | +3.650 |
equality | +3.592 |
conservancy | +3.546 |
illusion | +3.513 |
Neg logit contributions | |
ose | -3.102 |
Bs | -3.101 |
ahn | -3.022 |
swick | -2.974 |
ッド | -2.956 |
Token | O |
Token ID | 46 |
Feature activation | +1.02e+00 |
Pos logit contributions | |
realistically | +3.238 |
1080 | +3.238 |
equality | +3.222 |
guaranteeing | +3.208 |
conservancy | +3.177 |
Neg logit contributions | |
ahn | -2.386 |
ose | -2.370 |
Bs | -2.315 |
swick | -2.232 |
ッド | -2.230 |
Token | , |
Token ID | 11 |
Feature activation | +9.93e-01 |
Pos logit contributions | |
1080 | +9.188 |
????? | +9.144 |
SHE | +9.039 |
realistically | +9.030 |
WARN | +8.977 |
Neg logit contributions | |
ahn | -11.747 |
Flavoring | -11.467 |
Bs | -11.443 |
ose | -11.354 |
ッド | -11.235 |
Token | I |
Token ID | 314 |
Feature activation | +1.54e+00 |
Pos logit contributions | |
WARN | +8.009 |
realistically | +7.872 |
1080 | +7.813 |
SHE | +7.788 |
guaranteeing | +7.759 |
Neg logit contributions | |
ahn | -12.751 |
ose | -12.577 |
Bs | -12.422 |
Flavoring | -12.375 |
behavi | -12.330 |
Token | DON |
Token ID | 23917 |
Feature activation | +9.22e-01 |
Pos logit contributions | |
WARN | +14.805 |
OULD | +13.165 |
????? | +13.145 |
SHE | +13.087 |
realistically | +12.909 |
Neg logit contributions | |
behavi | -16.401 |
Flavoring | -15.947 |
ッド | -15.725 |
ahn | -15.245 |
swick | -14.994 |
Token | ' |
Token ID | 6 |
Feature activation | +4.53e-01 |
Pos logit contributions | |
1080 | +8.435 |
OULD | +8.234 |
????? | +8.191 |
2022 | +8.024 |
WARN | +7.970 |
Neg logit contributions | |
behavi | -12.491 |
ahn | -11.373 |
ose | -11.037 |
entreprene | -10.975 |
occas | -10.963 |
Token | T |
Token ID | 51 |
Feature activation | +2.13e+00 |
Pos logit contributions | |
1080 | +6.281 |
????? | +6.140 |
realistically | +6.136 |
2022 | +6.065 |
OULD | +6.018 |
Neg logit contributions | |
behavi | -4.072 |
ahn | -4.026 |
ose | -4.007 |
swick | -3.748 |
ッド | -3.712 |
Token | HAVE |
Token ID | 21515 |
Feature activation | +1.40e+00 |
Pos logit contributions | |
WARN | +17.044 |
EVER | +16.214 |
CARE | +15.855 |
NEED | +15.174 |
SHE | +15.095 |
Neg logit contributions | |
behavi | -22.457 |
ッド | -21.371 |
ahn | -20.778 |
swick | -20.331 |
Flavoring | -20.285 |
Token | A |
Token ID | 317 |
Feature activation | +1.30e+00 |
Pos logit contributions | |
1080 | +11.044 |
????? | +10.738 |
EVER | +10.688 |
WARN | +10.609 |
SHE | +10.512 |
Neg logit contributions | |
behavi | -16.351 |
ahn | -16.333 |
ose | -15.647 |
Bs | -15.552 |
swick | -15.517 |
Token | CL |
Token ID | 7852 |
Feature activation | -5.52e-02 |
Pos logit contributions | |
1080 | +11.274 |
WARN | +11.176 |
SHE | +10.761 |
DRM | +10.639 |
2022 | +10.507 |
Neg logit contributions | |
ahn | -14.825 |
behavi | -14.734 |
ose | -14.217 |
Bs | -14.054 |
Flavoring | -13.986 |
Token | UE |
Token ID | 8924 |
Feature activation | +1.20e+00 |
Pos logit contributions | |
ose | +0.492 |
Bs | +0.483 |
ahn | +0.479 |
swick | +0.451 |
Adams | +0.444 |
Neg logit contributions | |
conservancy | -0.793 |
equality | -0.782 |
guaranteeing | -0.774 |
realistically | -0.774 |
illusion | -0.766 |
Token | ON |
Token ID | 6177 |
Feature activation | +9.10e-01 |
Pos logit contributions | |
????? | +10.842 |
SHE | +10.650 |
WARN | +10.584 |
1080 | +10.535 |
??? | +10.502 |
Neg logit contributions | |
ahn | -13.389 |
behavi | -12.894 |
ose | -12.877 |
swick | -12.847 |
Flavoring | -12.693 |
Token | among |
Token ID | 1871 |
Feature activation | +7.59e-02 |
Pos logit contributions | |
conservancy | +2.494 |
guaranteeing | +2.484 |
equality | +2.476 |
realistically | +2.467 |
illusion | +2.402 |
Neg logit contributions | |
ose | -2.618 |
Bs | -2.587 |
ahn | -2.571 |
ッド | -2.471 |
swick | -2.469 |
Token | quarterbacks |
Token ID | 23643 |
Feature activation | +1.97e-01 |
Pos logit contributions | |
conservancy | +0.916 |
equality | +0.906 |
guaranteeing | +0.906 |
realistically | +0.904 |
illusion | +0.883 |
Neg logit contributions | |
ose | -0.835 |
ahn | -0.821 |
Bs | -0.820 |
swick | -0.790 |
ッド | -0.776 |
Token | ) |
Token ID | 8 |
Feature activation | +3.67e-01 |
Pos logit contributions | |
conservancy | +2.470 |
realistically | +2.450 |
guaranteeing | +2.446 |
equality | +2.429 |
illusion | +2.381 |
Neg logit contributions | |
ose | -2.042 |
Bs | -2.025 |
ahn | -2.017 |
swick | -1.925 |
Js | -1.901 |
Token |
|
Token ID | 198 |
Feature activation | +2.37e-01 |
Pos logit contributions | |
conservancy | +3.575 |
guaranteeing | +3.527 |
realistically | +3.510 |
equality | +3.494 |
minimum | +3.399 |
Neg logit contributions | |
Bs | -4.767 |
ose | -4.763 |
ahn | -4.708 |
swick | -4.563 |
Js | -4.528 |
Token |
|
Token ID | 198 |
Feature activation | +3.79e-01 |
Pos logit contributions | |
equality | +2.267 |
realistically | +2.240 |
1080 | +2.198 |
guaranteeing | +2.173 |
completely | +2.149 |
Neg logit contributions | |
ose | -3.137 |
ahn | -3.114 |
Bs | -3.112 |
behavi | -3.109 |
swick | -3.030 |
Token | Total |
Token ID | 14957 |
Feature activation | +2.12e+00 |
Pos logit contributions | |
equality | +3.517 |
minimum | +3.475 |
1080 | +3.407 |
illusion | +3.389 |
conservancy | +3.385 |
Neg logit contributions | |
ose | -5.018 |
ahn | -5.017 |
swick | -4.944 |
Bs | -4.905 |
behavi | -4.854 |
Token | guaranteed |
Token ID | 11462 |
Feature activation | +8.06e-01 |
Pos logit contributions | |
guaranteeing | +14.338 |
guaranteed | +14.241 |
guarantees | +13.782 |
realistically | +13.391 |
2022 | +12.623 |
Neg logit contributions | |
behavi | -26.788 |
swick | -25.689 |
ッド | -24.641 |
ThumbnailImage | -24.452 |
Bs | -23.678 |
Token | at |
Token ID | 379 |
Feature activation | +3.95e-01 |
Pos logit contributions | |
realistically | +5.044 |
guaranteeing | +4.933 |
equality | +4.687 |
1080 | +4.567 |
2022 | +4.458 |
Neg logit contributions | |
behavi | -12.619 |
ッド | -12.555 |
ose | -12.529 |
swick | -12.426 |
Bs | -12.311 |
Token | signing |
Token ID | 8415 |
Feature activation | +3.33e-01 |
Pos logit contributions | |
guaranteeing | +4.546 |
realistically | +4.354 |
equality | +4.262 |
2022 | +4.197 |
minimum | +4.124 |
Neg logit contributions | |
ose | -4.636 |
Bs | -4.611 |
swick | -4.523 |
ahn | -4.519 |
ッド | -4.462 |
Token | : |
Token ID | 25 |
Feature activation | +4.08e-01 |
Pos logit contributions | |
realistically | +3.209 |
equality | +3.179 |
guaranteeing | +3.121 |
1080 | +3.119 |
2022 | +3.062 |
Neg logit contributions | |
ose | -4.351 |
behavi | -4.280 |
ahn | -4.280 |
Bs | -4.274 |
swick | -4.206 |
Token | $ |
Token ID | 720 |
Feature activation | +6.05e-01 |
Pos logit contributions | |
realistically | +3.767 |
guaranteeing | +3.718 |
2022 | +3.701 |
equality | +3.694 |
1080 | +3.598 |
Neg logit contributions | |
ose | -5.447 |
Bs | -5.406 |
ahn | -5.317 |
swick | -5.281 |
behavi | -5.268 |
Token | 's |
Token ID | 338 |
Feature activation | +3.15e-01 |
Pos logit contributions | |
ose | +4.031 |
ahn | +3.965 |
Bs | +3.954 |
Irish | +3.739 |
swick | +3.723 |
Neg logit contributions | |
conservancy | -4.522 |
equality | -4.334 |
illusion | -4.253 |
1080 | -4.183 |
mbudsman | -4.159 |
Token | obvious |
Token ID | 3489 |
Feature activation | +3.48e-01 |
Pos logit contributions | |
realistically | +3.694 |
guaranteeing | +3.662 |
equality | +3.557 |
conservancy | +3.498 |
illusion | +3.451 |
Neg logit contributions | |
Bs | -3.543 |
ose | -3.538 |
ahn | -3.512 |
swick | -3.466 |
ッド | -3.385 |
Token | we |
Token ID | 356 |
Feature activation | -1.72e-01 |
Pos logit contributions | |
realistically | +3.776 |
guaranteeing | +3.729 |
equality | +3.712 |
conservancy | +3.662 |
illusion | +3.600 |
Neg logit contributions | |
ose | -4.136 |
ahn | -4.114 |
Bs | -4.099 |
swick | -4.011 |
ッド | -3.934 |
Token | no |
Token ID | 645 |
Feature activation | +3.17e-01 |
Pos logit contributions | |
ose | +1.950 |
ahn | +1.914 |
Bs | +1.914 |
swick | +1.809 |
Irish | +1.788 |
Neg logit contributions | |
conservancy | -2.064 |
equality | -2.012 |
illusion | -1.971 |
guaranteeing | -1.967 |
realistically | -1.944 |
Token | one |
Token ID | 530 |
Feature activation | +1.19e+00 |
Pos logit contributions | |
realistically | +4.303 |
conservancy | +4.293 |
guaranteeing | +4.281 |
equality | +4.221 |
illusion | +4.145 |
Neg logit contributions | |
Bs | -2.900 |
ahn | -2.872 |
ose | -2.867 |
swick | -2.756 |
ッド | -2.710 |
Token | can |
Token ID | 460 |
Feature activation | +2.12e+00 |
Pos logit contributions | |
realistically | +11.991 |
guaranteeing | +11.262 |
remotely | +10.532 |
guarantees | +10.231 |
completely | +10.118 |
Neg logit contributions | |
ッド | -13.587 |
swick | -13.227 |
Bs | -12.994 |
ahn | -12.792 |
Fle | -12.756 |
Token | take |
Token ID | 1011 |
Feature activation | +4.68e-01 |
Pos logit contributions | |
realistically | +22.942 |
guaranteeing | +19.830 |
remotely | +19.586 |
EVER | +19.051 |
belie | +19.019 |
Neg logit contributions | |
ッド | -18.549 |
behavi | -17.916 |
swick | -17.280 |
Lear | -17.242 |
Adams | -17.119 |
Token | any |
Token ID | 597 |
Feature activation | +4.19e-01 |
Pos logit contributions | |
realistically | +5.287 |
guaranteeing | +5.259 |
equality | +5.150 |
1080 | +4.986 |
conservancy | +4.962 |
Neg logit contributions | |
ose | -5.296 |
swick | -5.282 |
ahn | -5.265 |
Bs | -5.243 |
Adams | -5.042 |
Token | risks |
Token ID | 7476 |
Feature activation | +2.61e-01 |
Pos logit contributions | |
realistically | +4.973 |
guaranteeing | +4.935 |
equality | +4.885 |
conservancy | +4.771 |
illusion | +4.710 |
Neg logit contributions | |
Bs | -4.518 |
ose | -4.517 |
swick | -4.481 |
ahn | -4.477 |
ッド | -4.289 |
Token | with |
Token ID | 351 |
Feature activation | +7.81e-01 |
Pos logit contributions | |
realistically | +2.975 |
guaranteeing | +2.962 |
equality | +2.863 |
conservancy | +2.862 |
1080 | +2.781 |
Neg logit contributions | |
ose | -3.037 |
Bs | -3.026 |
ahn | -2.996 |
swick | -2.972 |
ッド | -2.929 |
Token | this |
Token ID | 428 |
Feature activation | +7.52e-01 |
Pos logit contributions | |
guaranteeing | +8.378 |
realistically | +8.226 |
equality | +7.904 |
2022 | +7.750 |
1080 | +7.594 |
Neg logit contributions | |
swick | -8.825 |
Bs | -8.734 |
ose | -8.707 |
ahn | -8.603 |
ッド | -8.530 |
Token | Catalan |
Token ID | 31066 |
Feature activation | +3.32e-01 |
Pos logit contributions | |
equality | +2.768 |
conservancy | +2.760 |
guaranteeing | +2.746 |
realistically | +2.728 |
illusion | +2.676 |
Neg logit contributions | |
ose | -2.671 |
Bs | -2.665 |
ahn | -2.639 |
swick | -2.554 |
ッド | -2.497 |
Token | Minister |
Token ID | 4139 |
Feature activation | +1.17e+00 |
Pos logit contributions | |
guaranteeing | +3.637 |
equality | +3.618 |
conservancy | +3.608 |
realistically | +3.597 |
mbudsman | +3.527 |
Neg logit contributions | |
ose | -3.859 |
Bs | -3.840 |
ahn | -3.821 |
swick | -3.735 |
Adams | -3.654 |
Token | for |
Token ID | 329 |
Feature activation | +9.75e-01 |
Pos logit contributions | |
guaranteeing | +11.426 |
realistically | +11.132 |
Claudia | +10.779 |
2022 | +10.638 |
minimum | +10.554 |
Neg logit contributions | |
Bs | -11.742 |
swick | -11.711 |
ose | -11.642 |
ahn | -11.633 |
ッド | -11.532 |
Token | the |
Token ID | 262 |
Feature activation | +6.54e-01 |
Pos logit contributions | |
equality | +10.520 |
guaranteeing | +10.426 |
realistically | +10.026 |
2022 | +10.022 |
mbudsman | +9.837 |
Neg logit contributions | |
Bs | -10.018 |
ッド | -9.886 |
ose | -9.876 |
ahn | -9.774 |
Adams | -9.732 |
Token | Presidency |
Token ID | 35696 |
Feature activation | +1.79e+00 |
Pos logit contributions | |
guaranteeing | +7.389 |
equality | +7.307 |
2022 | +7.199 |
realistically | +7.169 |
minimum | +7.047 |
Neg logit contributions | |
Bs | -7.022 |
ahn | -6.979 |
ose | -6.958 |
ッド | -6.864 |
swick | -6.859 |
Token | , |
Token ID | 11 |
Feature activation | +2.11e+00 |
Pos logit contributions | |
Claudia | +16.498 |
guaranteeing | +16.338 |
realistically | +15.972 |
2022 | +15.494 |
assured | +15.404 |
Neg logit contributions | |
ッド | -14.972 |
Bs | -14.293 |
Lear | -14.287 |
ahn | -14.281 |
swick | -14.017 |
Token | Ne |
Token ID | 3169 |
Feature activation | +1.23e-01 |
Pos logit contributions | |
Claudia | +19.236 |
guaranteeing | +18.650 |
realistically | +17.729 |
2022 | +17.318 |
Did | +17.121 |
Neg logit contributions | |
swick | -16.288 |
ッド | -16.176 |
Bs | -15.317 |
ahn | -15.181 |
behavi | -15.098 |
Token | us |
Token ID | 385 |
Feature activation | +2.43e-01 |
Pos logit contributions | |
conservancy | +1.526 |
equality | +1.512 |
realistically | +1.485 |
guaranteeing | +1.483 |
illusion | +1.482 |
Neg logit contributions | |
Bs | -1.305 |
ose | -1.302 |
ahn | -1.277 |
swick | -1.237 |
ッド | -1.232 |
Token | M |
Token ID | 337 |
Feature activation | +1.01e+00 |
Pos logit contributions | |
conservancy | +3.189 |
equality | +3.161 |
guaranteeing | +3.135 |
illusion | +3.116 |
realistically | +3.101 |
Neg logit contributions | |
ose | -2.365 |
Bs | -2.356 |
ahn | -2.286 |
swick | -2.207 |
ッド | -2.190 |
Token | unt |
Token ID | 2797 |
Feature activation | +3.77e-01 |
Pos logit contributions | |
equality | +12.767 |
completely | +12.419 |
guaranteeing | +12.339 |
mbudsman | +12.279 |
illusion | +12.269 |
Neg logit contributions | |
ッド | -8.485 |
Bs | -8.026 |
Flavoring | -7.973 |
behavi | -7.774 |
swick | -7.529 |
Token | é |
Token ID | 2634 |
Feature activation | +8.12e-01 |
Pos logit contributions | |
conservancy | +5.676 |
equality | +5.619 |
realistically | +5.568 |
guaranteeing | +5.543 |
illusion | +5.531 |
Neg logit contributions | |
Bs | -2.812 |
ッド | -2.723 |
ose | -2.671 |
behavi | -2.657 |
Flavoring | -2.648 |
Token | Reyn |
Token ID | 18567 |
Feature activation | -2.36e-01 |
Pos logit contributions | |
realistically | +8.580 |
guaranteeing | +8.351 |
equality | +8.200 |
completely | +8.067 |
Claudia | +7.931 |
Neg logit contributions | |
ose | -9.571 |
Bs | -9.326 |
Adams | -9.321 |
swick | -9.316 |
ahn | -9.281 |
Token | ad |
Token ID | 324 |
Feature activation | +2.28e-01 |
Pos logit contributions | |
ose | +1.754 |
ahn | +1.720 |
Bs | +1.711 |
swick | +1.581 |
Adams | +1.555 |
Neg logit contributions | |
conservancy | -3.761 |
equality | -3.686 |
guaranteeing | -3.649 |
realistically | -3.628 |
illusion | -3.610 |
Token | , |
Token ID | 11 |
Feature activation | +7.54e-01 |
Pos logit contributions | |
realistically | +2.684 |
conservancy | +2.662 |
guaranteeing | +2.651 |
equality | +2.640 |
illusion | +2.594 |
Neg logit contributions | |
ose | -2.532 |
Bs | -2.495 |
ahn | -2.460 |
swick | -2.386 |
Adams | -2.380 |
Token | this |
Token ID | 428 |
Feature activation | +5.00e-01 |
Pos logit contributions | |
realistically | +7.733 |
guaranteeing | +7.602 |
equality | +7.255 |
completely | +7.223 |
illusion | +7.037 |
Neg logit contributions | |
ose | -8.811 |
ahn | -8.631 |
ッド | -8.630 |
Bs | -8.589 |
swick | -8.497 |
Token | has |
Token ID | 468 |
Feature activation | +7.71e-01 |
Pos logit contributions | |
realistically | +5.763 |
guaranteeing | +5.620 |
equality | +5.587 |
conservancy | +5.507 |
completely | +5.465 |
Neg logit contributions | |
ose | -5.489 |
Bs | -5.446 |
ッド | -5.353 |
ahn | -5.350 |
swick | -5.312 |
Token | nothing |
Token ID | 2147 |
Feature activation | +2.11e+00 |
Pos logit contributions | |
realistically | +8.102 |
guaranteeing | +7.772 |
devastated | +7.691 |
equality | +7.488 |
completely | +7.470 |
Neg logit contributions | |
ose | -8.988 |
Bs | -8.878 |
Flavoring | -8.805 |
swick | -8.725 |
ahn | -8.634 |
Token | to |
Token ID | 284 |
Feature activation | +1.02e+00 |
Pos logit contributions | |
realistically | +15.112 |
guaranteeing | +14.341 |
remotely | +14.243 |
EVER | +13.402 |
belie | +12.934 |
Neg logit contributions | |
Flavoring | -20.564 |
Bs | -20.469 |
swick | -19.977 |
Js | -19.939 |
ose | -19.897 |
Token | do |
Token ID | 466 |
Feature activation | +1.25e+00 |
Pos logit contributions | |
realistically | +9.990 |
guaranteeing | +9.088 |
belie | +8.910 |
completely | +8.657 |
equality | +8.592 |
Neg logit contributions | |
swick | -11.839 |
ose | -11.744 |
Bs | -11.692 |
ッド | -11.530 |
ahn | -11.447 |
Token | with |
Token ID | 351 |
Feature activation | +1.52e+00 |
Pos logit contributions | |
realistically | +9.300 |
completely | +8.151 |
guaranteeing | +8.066 |
equality | +8.014 |
2022 | +7.924 |
Neg logit contributions | |
behavi | -17.042 |
ThumbnailImage | -16.829 |
swick | -16.296 |
entreprene | -16.166 |
ッド | -16.081 |
Token | him |
Token ID | 683 |
Feature activation | +6.73e-01 |
Pos logit contributions | |
guaranteeing | +14.753 |
realistically | +14.586 |
belie | +13.958 |
equality | +13.889 |
Claudia | +13.526 |
Neg logit contributions | |
Adams | -13.535 |
ッド | -13.514 |
ose | -13.507 |
Bs | -13.306 |
ahn | -13.281 |
Token | . |
Token ID | 13 |
Feature activation | +1.34e+00 |
Pos logit contributions | |
guaranteeing | +7.276 |
realistically | +7.261 |
equality | +6.914 |
completely | +6.728 |
????? | +6.541 |
Neg logit contributions | |
ose | -7.653 |
Bs | -7.490 |
ahn | -7.484 |
swick | -7.425 |
ッド | -7.373 |
Token | OV |
Token ID | 8874 |
Feature activation | +3.18e-01 |
Pos logit contributions | |
equality | +2.070 |
conservancy | +2.068 |
realistically | +2.050 |
1080 | +2.031 |
illusion | +2.027 |
Neg logit contributions | |
ose | -1.280 |
Bs | -1.260 |
ahn | -1.256 |
swick | -1.213 |
ッド | -1.186 |
Token | even |
Token ID | 772 |
Feature activation | +1.11e-01 |
Pos logit contributions | |
realistically | +2.188 |
guaranteeing | +2.128 |
equality | +2.102 |
illusion | +2.043 |
1080 | +2.036 |
Neg logit contributions | |
ose | -5.105 |
Bs | -5.052 |
ahn | -5.034 |
swick | -4.987 |
ッド | -4.903 |
Token | double |
Token ID | 4274 |
Feature activation | +2.08e-01 |
Pos logit contributions | |
realistically | +1.499 |
conservancy | +1.498 |
equality | +1.498 |
guaranteeing | +1.491 |
1080 | +1.456 |
Neg logit contributions | |
ose | -1.058 |
Bs | -1.043 |
ahn | -1.041 |
swick | -0.988 |
Adams | -0.976 |
Token | or |
Token ID | 393 |
Feature activation | +2.34e-01 |
Pos logit contributions | |
realistically | +2.037 |
equality | +2.023 |
guaranteeing | +2.008 |
conservancy | +1.977 |
1080 | +1.952 |
Neg logit contributions | |
ose | -2.726 |
Bs | -2.713 |
ahn | -2.702 |
swick | -2.653 |
ッド | -2.610 |
Token | 4 |
Token ID | 604 |
Feature activation | -2.04e-01 |
Pos logit contributions | |
realistically | +2.828 |
equality | +2.808 |
1080 | +2.793 |
guaranteeing | +2.789 |
conservancy | +2.771 |
Neg logit contributions | |
ose | -2.549 |
Bs | -2.541 |
ahn | -2.508 |
swick | -2.437 |
ッド | -2.400 |
Token | x |
Token ID | 87 |
Feature activation | +2.09e+00 |
Pos logit contributions | |
ose | +1.013 |
Bs | +0.961 |
ahn | +0.929 |
Js | +0.816 |
swick | +0.814 |
Neg logit contributions | |
conservancy | -3.828 |
guaranteeing | -3.704 |
equality | -3.693 |
illusion | -3.666 |
realistically | -3.663 |
Token | 1080 |
Token ID | 17729 |
Feature activation | -2.13e-01 |
Pos logit contributions | |
1080 | +15.838 |
realistically | +11.616 |
1080 | +11.422 |
2022 | +10.244 |
1024 | +9.732 |
Neg logit contributions | |
Lear | -25.231 |
ッド | -25.107 |
behavi | -24.957 |
ose | -24.505 |
ahn | -24.456 |
Token | P |
Token ID | 47 |
Feature activation | +1.53e+00 |
Pos logit contributions | |
ose | +1.638 |
Bs | +1.635 |
ahn | +1.572 |
Js | +1.484 |
Adams | +1.469 |
Neg logit contributions | |
conservancy | -3.392 |
equality | -3.274 |
guaranteeing | -3.261 |
illusion | -3.219 |
realistically | -3.216 |
Token | resolution |
Token ID | 6323 |
Feature activation | +6.86e-01 |
Pos logit contributions | |
realistically | +10.662 |
1080 | +10.573 |
2022 | +9.241 |
guaranteeing | +9.197 |
belie | +8.275 |
Neg logit contributions | |
ッド | -20.856 |
Bs | -20.096 |
behavi | -20.055 |
Lear | -19.941 |
Flavoring | -19.840 |
Token | you |
Token ID | 345 |
Feature activation | +4.54e-02 |
Pos logit contributions | |
realistically | +5.434 |
guaranteeing | +5.067 |
1080 | +4.933 |
equality | +4.650 |
completely | +4.639 |
Neg logit contributions | |
behavi | -10.268 |
swick | -10.151 |
ッド | -9.906 |
ahn | -9.893 |
ose | -9.876 |
Token | will |
Token ID | 481 |
Feature activation | -3.33e-01 |
Pos logit contributions | |
conservancy | +0.453 |
equality | +0.447 |
realistically | +0.446 |
guaranteeing | +0.443 |
illusion | +0.435 |
Neg logit contributions | |
ose | -0.597 |
Bs | -0.591 |
ahn | -0.588 |
swick | -0.569 |
ッド | -0.561 |
Token | but |
Token ID | 475 |
Feature activation | -5.36e-01 |
Pos logit contributions | |
ose | +2.384 |
Bs | +2.351 |
ahn | +2.327 |
Adams | +2.207 |
swick | +2.203 |
Neg logit contributions | |
conservancy | -2.560 |
equality | -2.502 |
illusion | -2.444 |
guaranteeing | -2.432 |
artifacts | -2.412 |
Token | that |
Token ID | 326 |
Feature activation | +3.65e-01 |
Pos logit contributions | |
ose | +6.153 |
Bs | +6.078 |
ahn | +5.979 |
Irish | +5.771 |
Adams | +5.628 |
Neg logit contributions | |
conservancy | -6.120 |
equality | -5.835 |
illusion | -5.737 |
artifacts | -5.605 |
mbudsman | -5.547 |
Token | doesn |
Token ID | 1595 |
Feature activation | +7.04e-01 |
Pos logit contributions | |
realistically | +4.274 |
guaranteeing | +4.196 |
conservancy | +4.137 |
illusion | +4.129 |
equality | +4.127 |
Neg logit contributions | |
ose | -4.041 |
Bs | -4.018 |
ahn | -3.976 |
swick | -3.883 |
Adams | -3.801 |
Token | � |
Token ID | 447 |
Feature activation | +3.27e-01 |
Pos logit contributions | |
1080 | +6.832 |
realistically | +6.572 |
equality | +6.567 |
completely | +6.438 |
????? | +6.357 |
Neg logit contributions | |
behavi | -9.209 |
ose | -8.446 |
ahn | -8.332 |
entreprene | -8.232 |
swick | -8.183 |
Token | � |
Token ID | 247 |
Feature activation | +1.72e-01 |
Pos logit contributions | |
equality | +4.937 |
guaranteeing | +4.917 |
realistically | +4.912 |
conservancy | +4.899 |
minimum | +4.855 |
Neg logit contributions | |
ose | -2.467 |
Bs | -2.387 |
ahn | -2.374 |
swick | -2.336 |
Js | -2.234 |
Token | t |
Token ID | 83 |
Feature activation | +2.08e+00 |
Pos logit contributions | |
conservancy | +3.095 |
realistically | +3.083 |
equality | +3.077 |
guaranteeing | +3.067 |
illusion | +3.034 |
Neg logit contributions | |
ose | -0.829 |
Bs | -0.823 |
ahn | -0.801 |
swick | -0.729 |
Adams | -0.719 |
Token | mean |
Token ID | 1612 |
Feature activation | +4.55e-01 |
Pos logit contributions | |
realistically | +19.367 |
belie | +17.696 |
remotely | +17.413 |
guaranteeing | +17.002 |
completely | +16.838 |
Neg logit contributions | |
Lear | -19.031 |
swick | -18.520 |
Fle | -18.288 |
ッド | -18.224 |
Flavoring | -18.078 |
Token | you |
Token ID | 345 |
Feature activation | +7.24e-01 |
Pos logit contributions | |
realistically | +5.183 |
guaranteeing | +5.169 |
equality | +4.986 |
conservancy | +4.887 |
1080 | +4.862 |
Neg logit contributions | |
ose | -5.197 |
Bs | -5.142 |
ahn | -5.137 |
swick | -5.062 |
ッド | -4.888 |
Token | can |
Token ID | 460 |
Feature activation | +1.17e+00 |
Pos logit contributions | |
realistically | +7.959 |
guaranteeing | +7.655 |
equality | +7.576 |
completely | +7.557 |
1080 | +7.495 |
Neg logit contributions | |
Bs | -7.738 |
Flavoring | -7.688 |
ahn | -7.678 |
swick | -7.620 |
ose | -7.606 |
Token | � |
Token ID | 447 |
Feature activation | +6.47e-01 |
Pos logit contributions | |
realistically | +14.095 |
1080 | +13.107 |
guaranteeing | +13.074 |
completely | +12.831 |
illusion | +12.537 |
Neg logit contributions | |
swick | -10.457 |
Lear | -10.072 |
ahn | -10.049 |
behavi | -10.032 |
Adams | -9.974 |
Token | � |
Token ID | 247 |
Feature activation | +2.89e-01 |
Pos logit contributions | |
realistically | +8.646 |
equality | +8.645 |
completely | +8.595 |
minimum | +8.580 |
guaranteeing | +8.562 |
Neg logit contributions | |
behavi | -5.319 |
Bs | -5.300 |
ose | -5.294 |
swick | -5.250 |
ahn | -5.176 |
Token |
|
Token ID | 198 |
Feature activation | +5.10e-01 |
Pos logit contributions | |
ose | +1.146 |
Bs | +1.127 |
ahn | +1.120 |
swick | +1.070 |
Adams | +1.063 |
Neg logit contributions | |
conservancy | -1.104 |
equality | -1.063 |
guaranteeing | -1.059 |
realistically | -1.050 |
illusion | -1.045 |
Token | The |
Token ID | 464 |
Feature activation | -1.98e-01 |
Pos logit contributions | |
equality | +6.080 |
1080 | +6.031 |
minimum | +5.949 |
completely | +5.897 |
realistically | +5.859 |
Neg logit contributions | |
behavi | -5.311 |
ahn | -5.174 |
ose | -5.153 |
Flavoring | -5.082 |
Bs | -5.050 |
Token | loss |
Token ID | 2994 |
Feature activation | +1.64e+00 |
Pos logit contributions | |
ose | +2.056 |
Bs | +2.004 |
ahn | +2.003 |
Irish | +1.887 |
swick | +1.877 |
Neg logit contributions | |
conservancy | -2.574 |
equality | -2.492 |
guaranteeing | -2.455 |
illusion | -2.451 |
realistically | -2.448 |
Token | to |
Token ID | 284 |
Feature activation | +5.04e-01 |
Pos logit contributions | |
guaranteeing | +15.313 |
realistically | +15.126 |
devastated | +15.051 |
disqualified | +14.661 |
guarantees | +14.137 |
Neg logit contributions | |
ose | -15.605 |
behavi | -15.383 |
Flavoring | -15.248 |
ッド | -15.212 |
ahn | -15.086 |
Token | the |
Token ID | 262 |
Feature activation | +2.06e-02 |
Pos logit contributions | |
guaranteeing | +5.630 |
realistically | +5.611 |
equality | +5.558 |
conservancy | +5.412 |
minimum | +5.363 |
Neg logit contributions | |
Bs | -5.718 |
ose | -5.645 |
ahn | -5.613 |
swick | -5.425 |
Lear | -5.330 |
Token | Flyers |
Token ID | 30652 |
Feature activation | +2.08e+00 |
Pos logit contributions | |
conservancy | +0.274 |
equality | +0.271 |
realistically | +0.269 |
guaranteeing | +0.269 |
illusion | +0.265 |
Neg logit contributions | |
ose | -0.204 |
Bs | -0.202 |
ahn | -0.200 |
swick | -0.191 |
Adams | -0.187 |
Token | was |
Token ID | 373 |
Feature activation | +9.24e-01 |
Pos logit contributions | |
realistically | +18.044 |
devastated | +17.793 |
guaranteeing | +17.688 |
disqualified | +17.547 |
compensated | +16.816 |
Neg logit contributions | |
ose | -17.781 |
Lear | -17.497 |
ッド | -17.264 |
Flavoring | -17.263 |
behavi | -17.249 |
Token | Chicago |
Token ID | 4842 |
Feature activation | -3.72e-01 |
Pos logit contributions | |
realistically | +10.537 |
guaranteeing | +10.513 |
equality | +9.844 |
devastated | +9.842 |
2022 | +9.737 |
Neg logit contributions | |
Bs | -9.404 |
swick | -9.221 |
Flavoring | -9.048 |
ahn | -8.967 |
ose | -8.965 |
Token | 's |
Token ID | 338 |
Feature activation | -2.83e-01 |
Pos logit contributions | |
ose | +3.454 |
Bs | +3.355 |
ahn | +3.325 |
Irish | +3.127 |
swick | +3.123 |
Neg logit contributions | |
conservancy | -5.271 |
equality | -5.106 |
illusion | -4.978 |
artifacts | -4.941 |
1080 | -4.931 |
Token | fourth |
Token ID | 5544 |
Feature activation | +9.12e-02 |
Pos logit contributions | |
ose | +1.986 |
Bs | +1.910 |
ahn | +1.886 |
Irish | +1.774 |
Adams | +1.729 |
Neg logit contributions | |
conservancy | -4.604 |
equality | -4.478 |
illusion | -4.435 |
artifacts | -4.403 |
guaranteeing | -4.383 |
Token | straight |
Token ID | 3892 |
Feature activation | -2.27e-01 |
Pos logit contributions | |
conservancy | +0.983 |
guaranteeing | +0.978 |
equality | +0.978 |
realistically | +0.978 |
illusion | +0.950 |
Neg logit contributions | |
ose | -1.111 |
Bs | -1.102 |
ahn | -1.099 |
swick | -1.061 |
ッド | -1.043 |
Token | while |
Token ID | 981 |
Feature activation | +2.33e-01 |
Pos logit contributions | |
ose | +1.233 |
Bs | +1.215 |
ahn | +1.210 |
swick | +1.155 |
Adams | +1.139 |
Neg logit contributions | |
conservancy | -1.189 |
equality | -1.166 |
guaranteeing | -1.144 |
realistically | -1.140 |
illusion | -1.136 |
Token | we |
Token ID | 356 |
Feature activation | +2.00e-02 |
Pos logit contributions | |
realistically | +2.555 |
guaranteeing | +2.548 |
equality | +2.529 |
conservancy | +2.523 |
illusion | +2.471 |
Neg logit contributions | |
ose | -2.808 |
Bs | -2.776 |
ahn | -2.758 |
swick | -2.677 |
Adams | -2.634 |
Token | can |
Token ID | 460 |
Feature activation | +7.77e-01 |
Pos logit contributions | |
conservancy | +0.234 |
equality | +0.230 |
realistically | +0.229 |
guaranteeing | +0.228 |
illusion | +0.225 |
Neg logit contributions | |
ose | -0.229 |
Bs | -0.226 |
ahn | -0.225 |
swick | -0.216 |
Adams | -0.213 |
Token | � |
Token ID | 447 |
Feature activation | +2.05e-01 |
Pos logit contributions | |
realistically | +8.983 |
guaranteeing | +8.552 |
1080 | +8.213 |
illusion | +8.162 |
belie | +8.073 |
Neg logit contributions | |
Bs | -8.365 |
behavi | -8.233 |
Lear | -8.217 |
Flavoring | -8.211 |
swick | -8.192 |
Token | � |
Token ID | 247 |
Feature activation | +5.70e-01 |
Pos logit contributions | |
equality | +3.204 |
conservancy | +3.195 |
guaranteeing | +3.180 |
realistically | +3.177 |
illusion | +3.139 |
Neg logit contributions | |
ose | -1.471 |
Bs | -1.446 |
ahn | -1.437 |
swick | -1.374 |
Js | -1.317 |
Token | t |
Token ID | 83 |
Feature activation | +2.06e+00 |
Pos logit contributions | |
realistically | +9.329 |
conservancy | +9.328 |
illusion | +9.313 |
equality | +9.313 |
completely | +9.259 |
Neg logit contributions | |
Bs | -3.382 |
Flavoring | -3.302 |
behavi | -3.228 |
Adams | -3.115 |
ahn | -3.069 |
Token | say |
Token ID | 910 |
Feature activation | +1.14e+00 |
Pos logit contributions | |
realistically | +20.925 |
guaranteeing | +19.506 |
completely | +18.270 |
remotely | +18.215 |
belie | +18.050 |
Neg logit contributions | |
behavi | -18.291 |
Lear | -17.603 |
swick | -17.259 |
ッド | -17.044 |
Fle | -16.389 |
Token | we |
Token ID | 356 |
Feature activation | +6.49e-01 |
Pos logit contributions | |
realistically | +12.301 |
guaranteeing | +12.022 |
2022 | +11.155 |
equality | +11.050 |
completely | +11.000 |
Neg logit contributions | |
swick | -12.144 |
ose | -11.458 |
Adams | -11.447 |
Lear | -11.419 |
Bs | -11.326 |
Token | mind |
Token ID | 2000 |
Feature activation | +1.75e-01 |
Pos logit contributions | |
realistically | +7.360 |
guaranteeing | +7.063 |
equality | +6.996 |
illusion | +6.988 |
completely | +6.987 |
Neg logit contributions | |
Bs | -6.930 |
ahn | -6.926 |
Flavoring | -6.923 |
ose | -6.874 |
swick | -6.865 |
Token | , |
Token ID | 11 |
Feature activation | +3.64e-01 |
Pos logit contributions | |
conservancy | +2.007 |
realistically | +2.006 |
guaranteeing | +2.003 |
equality | +1.994 |
illusion | +1.948 |
Neg logit contributions | |
ose | -1.999 |
Bs | -1.989 |
ahn | -1.965 |
swick | -1.907 |
Adams | -1.889 |
Token | we |
Token ID | 356 |
Feature activation | +2.89e-01 |
Pos logit contributions | |
realistically | +4.128 |
guaranteeing | +4.069 |
equality | +4.012 |
conservancy | +4.007 |
illusion | +3.936 |
Neg logit contributions | |
ose | -4.193 |
Bs | -4.142 |
ahn | -4.113 |
swick | -4.023 |
Adams | -3.959 |
Token | but |
Token ID | 475 |
Feature activation | -2.41e-01 |
Pos logit contributions | |
conservancy | +1.007 |
equality | +0.995 |
guaranteeing | +0.995 |
realistically | +0.990 |
illusion | +0.973 |
Neg logit contributions | |
ose | -1.119 |
Bs | -1.109 |
ahn | -1.102 |
swick | -1.064 |
Adams | -1.041 |
Token | that |
Token ID | 326 |
Feature activation | +4.98e-01 |
Pos logit contributions | |
ose | +2.862 |
Bs | +2.819 |
ahn | +2.786 |
Irish | +2.664 |
Adams | +2.652 |
Neg logit contributions | |
conservancy | -2.754 |
equality | -2.672 |
illusion | -2.620 |
guaranteeing | -2.588 |
realistically | -2.580 |
Token | doesn |
Token ID | 1595 |
Feature activation | +7.87e-01 |
Pos logit contributions | |
realistically | +5.612 |
guaranteeing | +5.540 |
equality | +5.507 |
illusion | +5.486 |
conservancy | +5.424 |
Neg logit contributions | |
ose | -5.499 |
Bs | -5.487 |
ahn | -5.456 |
swick | -5.327 |
ッド | -5.242 |
Token | � |
Token ID | 447 |
Feature activation | +3.00e-01 |
Pos logit contributions | |
1080 | +7.369 |
realistically | +6.947 |
equality | +6.915 |
completely | +6.796 |
????? | +6.789 |
Neg logit contributions | |
behavi | -10.645 |
entreprene | -9.640 |
ose | -9.608 |
ahn | -9.444 |
ThumbnailImage | -9.421 |
Token | � |
Token ID | 247 |
Feature activation | +2.05e-01 |
Pos logit contributions | |
equality | +4.546 |
guaranteeing | +4.533 |
realistically | +4.525 |
conservancy | +4.522 |
minimum | +4.459 |
Neg logit contributions | |
ose | -2.261 |
ahn | -2.196 |
Bs | -2.194 |
swick | -2.130 |
Js | -2.039 |
Token | t |
Token ID | 83 |
Feature activation | +2.06e+00 |
Pos logit contributions | |
conservancy | +3.663 |
equality | +3.655 |
realistically | +3.651 |
guaranteeing | +3.633 |
illusion | +3.605 |
Neg logit contributions | |
ose | -1.008 |
Bs | -0.990 |
ahn | -0.964 |
swick | -0.904 |
ッド | -0.876 |
Token | mean |
Token ID | 1612 |
Feature activation | +3.27e-01 |
Pos logit contributions | |
realistically | +18.708 |
belie | +18.288 |
remotely | +17.408 |
guaranteeing | +16.504 |
Explain | +16.500 |
Neg logit contributions | |
Lear | -19.014 |
ッド | -18.509 |
swick | -18.411 |
Fle | -18.107 |
behavi | -17.733 |
Token | it |
Token ID | 340 |
Feature activation | +1.02e+00 |
Pos logit contributions | |
guaranteeing | +3.420 |
realistically | +3.412 |
equality | +3.394 |
conservancy | +3.364 |
illusion | +3.306 |
Neg logit contributions | |
ose | -4.034 |
ahn | -3.977 |
Bs | -3.975 |
swick | -3.899 |
ッド | -3.796 |
Token | � |
Token ID | 447 |
Feature activation | +5.29e-01 |
Pos logit contributions | |
realistically | +10.089 |
guaranteeing | +9.592 |
completely | +9.535 |
1080 | +9.430 |
equality | +9.359 |
Neg logit contributions | |
Flavoring | -11.075 |
Bs | -10.939 |
ose | -10.912 |
Adams | -10.635 |
ッド | -10.597 |
Token | � |
Token ID | 247 |
Feature activation | +5.98e-01 |
Pos logit contributions | |
equality | +7.445 |
guaranteeing | +7.431 |
realistically | +7.424 |
minimum | +7.374 |
completely | +7.352 |
Neg logit contributions | |
ose | -4.280 |
ahn | -4.158 |
Bs | -4.082 |
swick | -4.069 |
behavi | -3.953 |
Token | s |
Token ID | 82 |
Feature activation | +9.31e-01 |
Pos logit contributions | |
realistically | +8.830 |
equality | +8.678 |
1080 | +8.643 |
guaranteeing | +8.626 |
completely | +8.621 |
Neg logit contributions | |
ose | -4.285 |
Bs | -4.270 |
Flavoring | -4.205 |
behavi | -4.160 |
ahn | -4.146 |
Token | https |
Token ID | 3740 |
Feature activation | -4.64e-01 |
Pos logit contributions | |
ose | +0.444 |
Bs | +0.438 |
ahn | +0.436 |
swick | +0.417 |
Adams | +0.411 |
Neg logit contributions | |
conservancy | -0.451 |
equality | -0.443 |
guaranteeing | -0.438 |
realistically | -0.438 |
illusion | -0.433 |
Token | :// |
Token ID | 1378 |
Feature activation | +7.84e-02 |
Pos logit contributions | |
ose | +3.427 |
Bs | +3.363 |
ahn | +3.331 |
Adams | +3.106 |
swick | +3.054 |
Neg logit contributions | |
conservancy | -7.516 |
illusion | -7.081 |
guaranteeing | -7.055 |
equality | -7.008 |
artifacts | -6.967 |
Token | t |
Token ID | 83 |
Feature activation | +1.29e-01 |
Pos logit contributions | |
conservancy | +1.445 |
equality | +1.443 |
realistically | +1.427 |
guaranteeing | +1.425 |
illusion | +1.421 |
Neg logit contributions | |
ose | -0.362 |
Bs | -0.352 |
ahn | -0.344 |
swick | -0.311 |
ッド | -0.300 |
Token | . |
Token ID | 13 |
Feature activation | -4.12e-01 |
Pos logit contributions | |
equality | +1.469 |
conservancy | +1.456 |
realistically | +1.453 |
guaranteeing | +1.433 |
1080 | +1.429 |
Neg logit contributions | |
ose | -1.487 |
Bs | -1.472 |
ahn | -1.460 |
swick | -1.404 |
ッド | -1.404 |
Token | co |
Token ID | 1073 |
Feature activation | -5.09e-01 |
Pos logit contributions | |
ose | +3.697 |
Bs | +3.610 |
ahn | +3.583 |
Js | +3.292 |
Irish | +3.252 |
Neg logit contributions | |
conservancy | -6.077 |
guaranteeing | -5.799 |
equality | -5.698 |
realistically | -5.659 |
illusion | -5.652 |
Token | / |
Token ID | 14 |
Feature activation | -3.51e+00 |
Pos logit contributions | |
ose | +4.714 |
Bs | +4.615 |
ahn | +4.585 |
Js | +4.270 |
Adams | +4.268 |
Neg logit contributions | |
conservancy | -7.377 |
illusion | -6.851 |
guaranteeing | -6.833 |
artifacts | -6.762 |
equality | -6.729 |
Token | l |
Token ID | 75 |
Feature activation | -2.18e+00 |
Pos logit contributions | |
Bs | +26.980 |
Js | +25.848 |
ose | +25.443 |
zb | +25.421 |
ahn | +25.133 |
Neg logit contributions | |
conservancy | -18.634 |
guaranteeing | -17.442 |
realistically | -16.505 |
disqualified | -16.055 |
condol | -15.934 |
Token | g |
Token ID | 70 |
Feature activation | -1.79e+00 |
Pos logit contributions | |
Bs | +21.117 |
ose | +20.231 |
Js | +20.214 |
zb | +19.868 |
ahn | +19.757 |
Neg logit contributions | |
conservancy | -16.822 |
guaranteeing | -15.881 |
realistically | -15.142 |
condol | -14.741 |
disqualified | -14.597 |
Token | S |
Token ID | 50 |
Feature activation | -1.63e+00 |
Pos logit contributions | |
Bs | +18.467 |
ose | +17.717 |
Js | +17.696 |
ahn | +17.552 |
zb | +17.344 |
Neg logit contributions | |
conservancy | -15.188 |
guaranteeing | -14.568 |
realistically | -14.141 |
condol | -13.663 |
disqualified | -13.468 |
Token | wh |
Token ID | 1929 |
Feature activation | -3.04e+00 |
Pos logit contributions | |
Bs | +16.777 |
ose | +16.520 |
Js | +16.300 |
ahn | +16.183 |
zb | +16.091 |
Neg logit contributions | |
conservancy | -15.056 |
guaranteeing | -14.204 |
realistically | -13.814 |
equality | -13.355 |
artifacts | -13.203 |
Token | X |
Token ID | 55 |
Feature activation | -3.03e+00 |
Pos logit contributions | |
Bs | +25.532 |
Js | +24.289 |
ose | +24.184 |
zb | +24.173 |
ahn | +23.859 |
Neg logit contributions | |
conservancy | -17.988 |
guaranteeing | -16.996 |
realistically | -16.113 |
condol | -15.759 |
disqualified | -15.588 |
Token | https |
Token ID | 3740 |
Feature activation | -3.74e-01 |
Pos logit contributions | |
ose | +1.569 |
Bs | +1.546 |
ahn | +1.538 |
swick | +1.465 |
Adams | +1.457 |
Neg logit contributions | |
conservancy | -1.449 |
equality | -1.405 |
realistically | -1.393 |
guaranteeing | -1.391 |
illusion | -1.380 |
Token | :// |
Token ID | 1378 |
Feature activation | +1.29e-01 |
Pos logit contributions | |
ose | +2.927 |
Bs | +2.876 |
ahn | +2.848 |
Adams | +2.662 |
swick | +2.618 |
Neg logit contributions | |
conservancy | -5.940 |
illusion | -5.620 |
guaranteeing | -5.589 |
equality | -5.573 |
artifacts | -5.536 |
Token | t |
Token ID | 83 |
Feature activation | +3.48e-01 |
Pos logit contributions | |
equality | +2.349 |
conservancy | +2.347 |
realistically | +2.319 |
guaranteeing | +2.316 |
illusion | +2.312 |
Neg logit contributions | |
ose | -0.611 |
Bs | -0.597 |
ahn | -0.580 |
swick | -0.534 |
ッド | -0.517 |
Token | . |
Token ID | 13 |
Feature activation | -3.03e-01 |
Pos logit contributions | |
equality | +3.659 |
realistically | +3.627 |
1080 | +3.607 |
conservancy | +3.550 |
guaranteeing | +3.524 |
Neg logit contributions | |
Bs | -4.172 |
ose | -4.169 |
behavi | -4.166 |
ahn | -4.087 |
ッド | -4.064 |
Token | co |
Token ID | 1073 |
Feature activation | -6.20e-01 |
Pos logit contributions | |
ose | +2.820 |
Bs | +2.796 |
ahn | +2.766 |
Js | +2.552 |
swick | +2.533 |
Neg logit contributions | |
conservancy | -4.287 |
guaranteeing | -4.117 |
equality | -4.108 |
realistically | -4.063 |
illusion | -4.052 |
Token | / |
Token ID | 14 |
Feature activation | -3.44e+00 |
Pos logit contributions | |
ose | +5.484 |
Bs | +5.342 |
ahn | +5.341 |
Adams | +4.935 |
Irish | +4.927 |
Neg logit contributions | |
conservancy | -9.191 |
illusion | -8.482 |
guaranteeing | -8.437 |
artifacts | -8.407 |
equality | -8.283 |
Token | X |
Token ID | 55 |
Feature activation | -3.15e+00 |
Pos logit contributions | |
Bs | +26.876 |
Js | +25.746 |
zb | +25.372 |
ose | +25.360 |
ahn | +25.111 |
Neg logit contributions | |
conservancy | -18.438 |
guaranteeing | -17.349 |
realistically | -16.453 |
disqualified | -16.079 |
condol | -15.908 |
Token | h |
Token ID | 71 |
Feature activation | -2.03e+00 |
Pos logit contributions | |
Bs | +25.947 |
Js | +25.005 |
ose | +24.585 |
zb | +24.462 |
ahn | +24.341 |
Neg logit contributions | |
conservancy | -18.332 |
guaranteeing | -17.291 |
realistically | -16.138 |
condol | -15.767 |
devastated | -15.494 |
Token | 7 |
Token ID | 22 |
Feature activation | -3.22e+00 |
Pos logit contributions | |
Bs | +20.329 |
Js | +19.382 |
ose | +19.381 |
ahn | +19.153 |
zb | +18.927 |
Neg logit contributions | |
conservancy | -15.834 |
guaranteeing | -15.392 |
realistically | -14.681 |
condol | -14.282 |
Explain | -14.033 |
Token | J |
Token ID | 41 |
Feature activation | -2.69e+00 |
Pos logit contributions | |
Bs | +26.150 |
Js | +24.892 |
zb | +24.531 |
ose | +24.458 |
sb | +24.416 |
Neg logit contributions | |
conservancy | -18.468 |
guaranteeing | -17.361 |
realistically | -16.330 |
condol | -16.105 |
disqualified | -15.823 |
Token | m |
Token ID | 76 |
Feature activation | -2.27e+00 |
Pos logit contributions | |
Bs | +23.807 |
ose | +23.044 |
zb | +22.901 |
Js | +22.535 |
ahn | +22.495 |
Neg logit contributions | |
conservancy | -17.671 |
guaranteeing | -16.893 |
realistically | -15.797 |
condol | -15.659 |
disqualified | -15.302 |
Token | pic |
Token ID | 8301 |
Feature activation | -9.29e-01 |
Pos logit contributions | |
realistically | +3.430 |
equality | +3.423 |
guaranteeing | +3.423 |
conservancy | +3.398 |
1080 | +3.306 |
Neg logit contributions | |
Bs | -3.481 |
ose | -3.459 |
ahn | -3.409 |
swick | -3.279 |
ッド | -3.276 |
Token | . |
Token ID | 13 |
Feature activation | -3.98e-01 |
Pos logit contributions | |
ose | +6.422 |
Bs | +6.365 |
ahn | +6.169 |
Adams | +6.024 |
Irish | +5.849 |
Neg logit contributions | |
conservancy | -15.042 |
artifacts | -13.684 |
illusion | -13.565 |
guaranteeing | -13.565 |
mbuds | -13.468 |
Token | twitter |
Token ID | 6956 |
Feature activation | -8.20e-01 |
Pos logit contributions | |
ose | +4.736 |
Bs | +4.729 |
ahn | +4.686 |
Js | +4.442 |
Adams | +4.414 |
Neg logit contributions | |
conservancy | -4.612 |
guaranteeing | -4.389 |
realistically | -4.229 |
equality | -4.228 |
illusion | -4.196 |
Token | . |
Token ID | 13 |
Feature activation | -4.46e-02 |
Pos logit contributions | |
ose | +6.044 |
Bs | +5.961 |
ahn | +5.911 |
Js | +5.453 |
Adams | +5.418 |
Neg logit contributions | |
conservancy | -13.104 |
guaranteeing | -11.967 |
artifacts | -11.930 |
illusion | -11.806 |
mbuds | -11.747 |
Token | com |
Token ID | 785 |
Feature activation | -5.83e-01 |
Pos logit contributions | |
ose | +0.562 |
Bs | +0.554 |
ahn | +0.552 |
swick | +0.530 |
Adams | +0.523 |
Neg logit contributions | |
conservancy | -0.476 |
equality | -0.464 |
guaranteeing | -0.460 |
realistically | -0.459 |
illusion | -0.454 |
Token | / |
Token ID | 14 |
Feature activation | -3.44e+00 |
Pos logit contributions | |
Bs | +5.134 |
ose | +5.084 |
ahn | +4.981 |
Js | +4.735 |
Irish | +4.702 |
Neg logit contributions | |
conservancy | -8.792 |
artifacts | -8.043 |
guaranteeing | -8.005 |
illusion | -7.991 |
equality | -7.773 |
Token | Eh |
Token ID | 43894 |
Feature activation | -2.19e+00 |
Pos logit contributions | |
Bs | +26.773 |
Js | +25.556 |
zb | +25.261 |
ose | +25.162 |
sb | +24.977 |
Neg logit contributions | |
conservancy | -18.609 |
guaranteeing | -17.604 |
realistically | -16.579 |
disqualified | -16.203 |
condol | -16.073 |
Token | U |
Token ID | 52 |
Feature activation | -1.78e+00 |
Pos logit contributions | |
Bs | +21.582 |
Js | +20.707 |
ose | +20.347 |
ahn | +20.110 |
zb | +19.850 |
Neg logit contributions | |
conservancy | -16.634 |
guaranteeing | -16.011 |
realistically | -15.046 |
condol | -14.601 |
Explain | -14.430 |
Token | g |
Token ID | 70 |
Feature activation | -2.06e+00 |
Pos logit contributions | |
Bs | +18.160 |
ose | +17.853 |
Js | +17.612 |
ahn | +17.407 |
zb | +17.268 |
Neg logit contributions | |
conservancy | -15.644 |
guaranteeing | -14.798 |
realistically | -14.348 |
condol | -13.772 |
belie | -13.718 |
Token | I |
Token ID | 40 |
Feature activation | -1.63e+00 |
Pos logit contributions | |
Bs | +20.556 |
Js | +19.697 |
ose | +19.519 |
ahn | +19.266 |
zb | +19.192 |
Neg logit contributions | |
conservancy | -16.209 |
guaranteeing | -15.499 |
realistically | -14.911 |
condol | -14.599 |
disqualified | -14.386 |
Token | x |
Token ID | 87 |
Feature activation | -2.96e+00 |
Pos logit contributions | |
ose | +16.960 |
Bs | +16.819 |
Js | +16.306 |
ahn | +16.199 |
zb | +16.191 |
Neg logit contributions | |
conservancy | -15.066 |
guaranteeing | -13.989 |
realistically | -13.772 |
illusion | -13.213 |
condol | -13.196 |
Token | https |
Token ID | 3740 |
Feature activation | -5.56e-01 |
Pos logit contributions | |
ose | +3.646 |
ahn | +3.601 |
Bs | +3.595 |
Adams | +3.405 |
swick | +3.401 |
Neg logit contributions | |
conservancy | -4.582 |
equality | -4.341 |
illusion | -4.330 |
realistically | -4.291 |
guaranteeing | -4.291 |
Token | :// |
Token ID | 1378 |
Feature activation | -9.75e-02 |
Pos logit contributions | |
ose | +3.910 |
ahn | +3.858 |
Bs | +3.831 |
Adams | +3.521 |
Js | +3.454 |
Neg logit contributions | |
conservancy | -9.272 |
illusion | -8.635 |
guaranteeing | -8.604 |
equality | -8.520 |
artifacts | -8.477 |
Token | t |
Token ID | 83 |
Feature activation | -3.50e-02 |
Pos logit contributions | |
ose | +0.393 |
Bs | +0.375 |
ahn | +0.374 |
swick | +0.322 |
Adams | +0.307 |
Neg logit contributions | |
conservancy | -1.881 |
equality | -1.850 |
guaranteeing | -1.845 |
realistically | -1.842 |
illusion | -1.826 |
Token | . |
Token ID | 13 |
Feature activation | -4.05e-01 |
Pos logit contributions | |
ose | +0.384 |
Bs | +0.378 |
ahn | +0.376 |
swick | +0.359 |
Adams | +0.354 |
Neg logit contributions | |
conservancy | -0.430 |
equality | -0.422 |
guaranteeing | -0.419 |
realistically | -0.418 |
illusion | -0.413 |
Token | co |
Token ID | 1073 |
Feature activation | -5.63e-01 |
Pos logit contributions | |
ose | +3.614 |
ahn | +3.553 |
Bs | +3.544 |
Js | +3.225 |
Irish | +3.205 |
Neg logit contributions | |
conservancy | -5.918 |
guaranteeing | -5.681 |
equality | -5.631 |
illusion | -5.571 |
realistically | -5.550 |
Token | / |
Token ID | 14 |
Feature activation | -3.41e+00 |
Pos logit contributions | |
ose | +5.093 |
ahn | +5.002 |
Bs | +4.982 |
Adams | +4.602 |
Js | +4.590 |
Neg logit contributions | |
conservancy | -8.253 |
illusion | -7.648 |
guaranteeing | -7.640 |
artifacts | -7.567 |
equality | -7.512 |
Token | ni |
Token ID | 8461 |
Feature activation | -2.04e+00 |
Pos logit contributions | |
Bs | +26.825 |
Js | +25.722 |
zb | +25.313 |
ose | +25.167 |
ahn | +24.996 |
Neg logit contributions | |
conservancy | -18.401 |
guaranteeing | -17.259 |
realistically | -16.345 |
disqualified | -15.885 |
condol | -15.798 |
Token | WB |
Token ID | 45607 |
Feature activation | -2.22e+00 |
Pos logit contributions | |
Bs | +20.565 |
Js | +19.661 |
ose | +19.633 |
ahn | +19.213 |
zb | +18.641 |
Neg logit contributions | |
conservancy | -16.333 |
guaranteeing | -15.569 |
realistically | -14.955 |
condol | -14.268 |
belie | -14.138 |
Token | qv |
Token ID | 44179 |
Feature activation | -2.71e+00 |
Pos logit contributions | |
Bs | +21.349 |
Js | +21.050 |
ose | +20.959 |
ahn | +20.829 |
zb | +20.547 |
Neg logit contributions | |
conservancy | -16.371 |
guaranteeing | -15.744 |
realistically | -15.086 |
condol | -14.500 |
belie | -14.409 |
Token | fi |
Token ID | 12463 |
Feature activation | -2.10e+00 |
Pos logit contributions | |
Bs | +24.204 |
Js | +23.047 |
ose | +22.978 |
zb | +22.862 |
ahn | +22.601 |
Neg logit contributions | |
conservancy | -17.723 |
guaranteeing | -16.741 |
realistically | -15.736 |
condol | -15.607 |
belie | -15.390 |
Token | W |
Token ID | 54 |
Feature activation | -2.50e+00 |
Pos logit contributions | |
Bs | +20.791 |
Js | +19.786 |
ose | +19.758 |
ahn | +19.297 |
zb | +18.903 |
Neg logit contributions | |
conservancy | -16.979 |
guaranteeing | -16.009 |
realistically | -15.177 |
condol | -14.748 |
belie | -14.701 |
Token | pic |
Token ID | 8301 |
Feature activation | -2.67e-01 |
Pos logit contributions | |
equality | +7.243 |
guaranteeing | +7.157 |
1080 | +7.091 |
realistically | +7.037 |
2022 | +6.975 |
Neg logit contributions | |
ahn | -7.246 |
ose | -7.239 |
Bs | -7.217 |
swick | -7.116 |
ッド | -7.009 |
Token | . |
Token ID | 13 |
Feature activation | -2.14e-01 |
Pos logit contributions | |
ose | +2.669 |
Bs | +2.633 |
ahn | +2.600 |
Adams | +2.453 |
swick | +2.440 |
Neg logit contributions | |
conservancy | -3.667 |
guaranteeing | -3.458 |
illusion | -3.452 |
equality | -3.443 |
artifacts | -3.414 |
Token | twitter |
Token ID | 6956 |
Feature activation | -5.97e-01 |
Pos logit contributions | |
ose | +2.740 |
Bs | +2.723 |
ahn | +2.698 |
swick | +2.585 |
Js | +2.573 |
Neg logit contributions | |
conservancy | -2.288 |
guaranteeing | -2.191 |
equality | -2.150 |
realistically | -2.150 |
illusion | -2.130 |
Token | . |
Token ID | 13 |
Feature activation | -1.45e-02 |
Pos logit contributions | |
ose | +5.003 |
Bs | +4.944 |
ahn | +4.900 |
Adams | +4.545 |
Js | +4.527 |
Neg logit contributions | |
conservancy | -9.161 |
guaranteeing | -8.462 |
artifacts | -8.393 |
illusion | -8.351 |
equality | -8.166 |
Token | com |
Token ID | 785 |
Feature activation | -4.83e-01 |
Pos logit contributions | |
ose | +0.184 |
Bs | +0.182 |
ahn | +0.181 |
swick | +0.174 |
Adams | +0.172 |
Neg logit contributions | |
conservancy | -0.152 |
equality | -0.150 |
guaranteeing | -0.148 |
realistically | -0.148 |
illusion | -0.146 |
Token | / |
Token ID | 14 |
Feature activation | -3.38e+00 |
Pos logit contributions | |
Bs | +4.499 |
ose | +4.477 |
ahn | +4.366 |
Js | +4.172 |
Adams | +4.126 |
Neg logit contributions | |
conservancy | -7.025 |
guaranteeing | -6.461 |
illusion | -6.442 |
artifacts | -6.434 |
equality | -6.330 |
Token | O |
Token ID | 46 |
Feature activation | -1.46e+00 |
Pos logit contributions | |
Bs | +26.672 |
Js | +25.430 |
zb | +25.186 |
ose | +25.112 |
sb | +24.861 |
Neg logit contributions | |
conservancy | -18.359 |
guaranteeing | -17.546 |
realistically | -16.502 |
disqualified | -16.078 |
condol | -16.004 |
Token | JV |
Token ID | 41697 |
Feature activation | -2.77e+00 |
Pos logit contributions | |
Bs | +15.488 |
ose | +15.456 |
Js | +15.007 |
ahn | +14.888 |
zb | +14.842 |
Neg logit contributions | |
conservancy | -13.980 |
guaranteeing | -13.316 |
realistically | -12.934 |
illusion | -12.379 |
equality | -12.316 |
Token | L |
Token ID | 43 |
Feature activation | -2.07e+00 |
Pos logit contributions | |
Bs | +24.187 |
Js | +23.393 |
ose | +23.352 |
ahn | +23.158 |
zb | +23.126 |
Neg logit contributions | |
conservancy | -17.782 |
guaranteeing | -16.841 |
realistically | -15.807 |
condol | -15.461 |
belie | -15.276 |
Token | F |
Token ID | 37 |
Feature activation | -2.19e+00 |
Pos logit contributions | |
Bs | +20.202 |
ose | +19.946 |
Js | +19.562 |
zb | +19.536 |
ahn | +19.316 |
Neg logit contributions | |
conservancy | -16.623 |
guaranteeing | -15.534 |
realistically | -14.946 |
condol | -14.415 |
belie | -14.327 |
Token | Re |
Token ID | 3041 |
Feature activation | -2.82e+00 |
Pos logit contributions | |
Bs | +21.043 |
ose | +20.724 |
Js | +20.669 |
zb | +20.407 |
ahn | +20.352 |
Neg logit contributions | |
conservancy | -16.222 |
guaranteeing | -15.772 |
realistically | -15.025 |
belie | -14.455 |
condol | -14.390 |
Token | pic |
Token ID | 8301 |
Feature activation | -4.23e-01 |
Pos logit contributions | |
ose | +4.059 |
ahn | +3.964 |
Bs | +3.938 |
Irish | +3.782 |
ッド | +3.718 |
Neg logit contributions | |
conservancy | -3.834 |
equality | -3.721 |
illusion | -3.694 |
realistically | -3.666 |
guaranteeing | -3.653 |
Token | . |
Token ID | 13 |
Feature activation | -2.33e-01 |
Pos logit contributions | |
ose | +3.979 |
Bs | +3.920 |
ahn | +3.858 |
Adams | +3.660 |
Irish | +3.655 |
Neg logit contributions | |
conservancy | -6.042 |
illusion | -5.639 |
equality | -5.633 |
guaranteeing | -5.605 |
artifacts | -5.572 |
Token | twitter |
Token ID | 6956 |
Feature activation | -6.29e-01 |
Pos logit contributions | |
ose | +2.994 |
Bs | +2.965 |
ahn | +2.950 |
swick | +2.802 |
Adams | +2.790 |
Neg logit contributions | |
conservancy | -2.459 |
equality | -2.344 |
guaranteeing | -2.342 |
realistically | -2.302 |
illusion | -2.294 |
Token | . |
Token ID | 13 |
Feature activation | +4.16e-02 |
Pos logit contributions | |
ose | +5.228 |
ahn | +5.147 |
Bs | +5.143 |
Irish | +4.743 |
Adams | +4.726 |
Neg logit contributions | |
conservancy | -9.596 |
artifacts | -8.834 |
guaranteeing | -8.826 |
illusion | -8.802 |
equality | -8.619 |
Token | com |
Token ID | 785 |
Feature activation | -6.02e-01 |
Pos logit contributions | |
conservancy | +0.422 |
equality | +0.420 |
realistically | +0.415 |
guaranteeing | +0.412 |
illusion | +0.407 |
Neg logit contributions | |
ose | -0.537 |
Bs | -0.530 |
ahn | -0.528 |
swick | -0.509 |
ッド | -0.503 |
Token | / |
Token ID | 14 |
Feature activation | -3.36e+00 |
Pos logit contributions | |
ose | +5.319 |
Bs | +5.306 |
ahn | +5.182 |
Irish | +4.945 |
Adams | +4.873 |
Neg logit contributions | |
conservancy | -8.956 |
artifacts | -8.227 |
illusion | -8.142 |
guaranteeing | -8.127 |
equality | -8.003 |
Token | 74 |
Token ID | 4524 |
Feature activation | -2.90e+00 |
Pos logit contributions | |
Bs | +26.682 |
Js | +25.438 |
zb | +25.132 |
ose | +25.116 |
ahn | +24.839 |
Neg logit contributions | |
conservancy | -18.283 |
guaranteeing | -17.251 |
realistically | -16.303 |
disqualified | -15.890 |
condol | -15.742 |
Token | E |
Token ID | 36 |
Feature activation | -1.59e+00 |
Pos logit contributions | |
Bs | +25.014 |
Js | +24.016 |
zb | +23.550 |
ahn | +23.536 |
sb | +23.397 |
Neg logit contributions | |
conservancy | -17.691 |
guaranteeing | -16.845 |
realistically | -15.890 |
condol | -15.483 |
belie | -15.345 |
Token | g |
Token ID | 70 |
Feature activation | -1.99e+00 |
Pos logit contributions | |
Bs | +16.936 |
ose | +16.552 |
Js | +16.271 |
ahn | +16.212 |
zb | +16.090 |
Neg logit contributions | |
conservancy | -14.180 |
guaranteeing | -13.397 |
realistically | -13.118 |
illusion | -12.583 |
equality | -12.481 |
Token | V |
Token ID | 53 |
Feature activation | -2.52e+00 |
Pos logit contributions | |
Bs | +20.232 |
ose | +19.323 |
Js | +19.298 |
ahn | +19.049 |
zb | +18.918 |
Neg logit contributions | |
conservancy | -15.585 |
guaranteeing | -15.002 |
realistically | -14.457 |
condol | -14.081 |
disqualified | -13.856 |
Token | V |
Token ID | 53 |
Feature activation | -2.45e+00 |
Pos logit contributions | |
Bs | +23.001 |
ose | +22.530 |
Js | +22.376 |
zb | +22.367 |
ahn | +22.202 |
Neg logit contributions | |
conservancy | -16.492 |
guaranteeing | -16.119 |
realistically | -15.377 |
condol | -14.938 |
belie | -14.798 |
Token | pic |
Token ID | 8301 |
Feature activation | -4.66e-01 |
Pos logit contributions | |
ose | +5.000 |
Bs | +4.926 |
ahn | +4.876 |
Adams | +4.684 |
Irish | +4.635 |
Neg logit contributions | |
conservancy | -4.959 |
equality | -4.765 |
realistically | -4.721 |
guaranteeing | -4.717 |
illusion | -4.685 |
Token | . |
Token ID | 13 |
Feature activation | -4.66e-01 |
Pos logit contributions | |
ose | +4.307 |
Bs | +4.219 |
ahn | +4.174 |
Adams | +3.939 |
Irish | +3.917 |
Neg logit contributions | |
conservancy | -6.759 |
guaranteeing | -6.306 |
illusion | -6.292 |
equality | -6.273 |
artifacts | -6.258 |
Token | twitter |
Token ID | 6956 |
Feature activation | -6.68e-01 |
Pos logit contributions | |
ose | +5.455 |
Bs | +5.453 |
ahn | +5.398 |
Js | +5.097 |
Adams | +5.081 |
Neg logit contributions | |
conservancy | -5.471 |
guaranteeing | -5.217 |
equality | -5.033 |
illusion | -5.005 |
realistically | -5.001 |
Token | . |
Token ID | 13 |
Feature activation | -1.89e-01 |
Pos logit contributions | |
ose | +5.346 |
Bs | +5.317 |
ahn | +5.280 |
Adams | +4.855 |
Js | +4.847 |
Neg logit contributions | |
conservancy | -10.374 |
guaranteeing | -9.556 |
artifacts | -9.519 |
illusion | -9.477 |
mbuds | -9.300 |
Token | com |
Token ID | 785 |
Feature activation | -5.34e-01 |
Pos logit contributions | |
ose | +2.275 |
Bs | +2.255 |
ahn | +2.239 |
swick | +2.129 |
Js | +2.108 |
Neg logit contributions | |
conservancy | -2.171 |
guaranteeing | -2.081 |
equality | -2.055 |
realistically | -2.047 |
illusion | -2.032 |
Token | / |
Token ID | 14 |
Feature activation | -3.34e+00 |
Pos logit contributions | |
Bs | +4.845 |
ose | +4.829 |
ahn | +4.719 |
Js | +4.487 |
Irish | +4.455 |
Neg logit contributions | |
conservancy | -7.855 |
artifacts | -7.230 |
guaranteeing | -7.227 |
illusion | -7.204 |
equality | -7.065 |
Token | gy |
Token ID | 1360 |
Feature activation | -3.11e+00 |
Pos logit contributions | |
Bs | +26.635 |
Js | +25.305 |
zb | +25.128 |
ose | +25.052 |
sb | +24.873 |
Neg logit contributions | |
conservancy | -18.241 |
guaranteeing | -17.428 |
realistically | -16.370 |
disqualified | -16.030 |
condol | -15.900 |
Token | d |
Token ID | 67 |
Feature activation | -2.09e+00 |
Pos logit contributions | |
Bs | +25.879 |
Js | +24.660 |
zb | +24.194 |
ose | +24.164 |
ahn | +23.869 |
Neg logit contributions | |
conservancy | -18.101 |
guaranteeing | -17.337 |
realistically | -16.342 |
condol | -15.951 |
disqualified | -15.929 |
Token | 3 |
Token ID | 18 |
Feature activation | -3.09e+00 |
Pos logit contributions | |
Bs | +20.624 |
ose | +19.902 |
Js | +19.832 |
zb | +19.719 |
ahn | +19.654 |
Neg logit contributions | |
conservancy | -15.757 |
guaranteeing | -15.416 |
realistically | -14.720 |
condol | -14.400 |
disqualified | -14.330 |
Token | p |
Token ID | 79 |
Feature activation | -2.18e+00 |
Pos logit contributions | |
Bs | +25.845 |
Js | +24.594 |
ose | +24.318 |
zb | +24.311 |
sb | +24.270 |
Neg logit contributions | |
conservancy | -18.044 |
guaranteeing | -17.148 |
realistically | -16.177 |
disqualified | -15.733 |
condol | -15.717 |
Token | W |
Token ID | 54 |
Feature activation | -2.50e+00 |
Pos logit contributions | |
Bs | +21.458 |
ose | +20.491 |
Js | +20.395 |
zb | +20.204 |
ahn | +20.181 |
Neg logit contributions | |
conservancy | -16.234 |
guaranteeing | -15.625 |
realistically | -15.004 |
condol | -14.679 |
disqualified | -14.505 |
Token | https |
Token ID | 3740 |
Feature activation | -3.82e-01 |
Pos logit contributions | |
equality | +3.523 |
realistically | +3.456 |
guaranteeing | +3.451 |
conservancy | +3.399 |
1080 | +3.368 |
Neg logit contributions | |
ose | -4.030 |
Bs | -3.983 |
ahn | -3.982 |
swick | -3.881 |
ッド | -3.811 |
Token | :// |
Token ID | 1378 |
Feature activation | +1.77e-01 |
Pos logit contributions | |
ose | +2.992 |
Bs | +2.941 |
ahn | +2.912 |
Adams | +2.702 |
Js | +2.663 |
Neg logit contributions | |
conservancy | -6.075 |
guaranteeing | -5.740 |
illusion | -5.720 |
equality | -5.701 |
artifacts | -5.640 |
Token | t |
Token ID | 83 |
Feature activation | +4.55e-01 |
Pos logit contributions | |
equality | +3.204 |
conservancy | +3.198 |
illusion | +3.152 |
guaranteeing | +3.139 |
realistically | +3.135 |
Neg logit contributions | |
ose | -0.858 |
Bs | -0.846 |
ahn | -0.809 |
swick | -0.751 |
Flavoring | -0.733 |
Token | . |
Token ID | 13 |
Feature activation | -1.72e-01 |
Pos logit contributions | |
equality | +4.577 |
1080 | +4.554 |
realistically | +4.504 |
illusion | +4.393 |
conservancy | +4.382 |
Neg logit contributions | |
behavi | -5.822 |
Bs | -5.563 |
ッド | -5.511 |
ose | -5.499 |
Flavoring | -5.441 |
Token | co |
Token ID | 1073 |
Feature activation | -5.17e-01 |
Pos logit contributions | |
ose | +1.694 |
Bs | +1.663 |
ahn | +1.659 |
swick | +1.547 |
Adams | +1.532 |
Neg logit contributions | |
conservancy | -2.340 |
equality | -2.270 |
guaranteeing | -2.269 |
realistically | -2.250 |
illusion | -2.232 |
Token | / |
Token ID | 14 |
Feature activation | -3.34e+00 |
Pos logit contributions | |
ose | +4.809 |
ahn | +4.697 |
Bs | +4.693 |
Js | +4.340 |
Adams | +4.337 |
Neg logit contributions | |
conservancy | -7.477 |
illusion | -6.945 |
guaranteeing | -6.931 |
equality | -6.843 |
artifacts | -6.838 |
Token | P |
Token ID | 47 |
Feature activation | -1.79e+00 |
Pos logit contributions | |
Bs | +26.636 |
Js | +25.516 |
ose | +25.116 |
zb | +25.060 |
ahn | +24.850 |
Neg logit contributions | |
conservancy | -18.225 |
guaranteeing | -17.134 |
realistically | -16.266 |
disqualified | -15.791 |
condol | -15.712 |
Token | 9 |
Token ID | 24 |
Feature activation | -2.88e+00 |
Pos logit contributions | |
Bs | +18.267 |
ose | +18.152 |
Js | +17.722 |
ahn | +17.311 |
zb | +17.254 |
Neg logit contributions | |
conservancy | -15.468 |
guaranteeing | -14.380 |
realistically | -14.154 |
condol | -13.583 |
illusion | -13.411 |
Token | gc |
Token ID | 36484 |
Feature activation | -2.19e+00 |
Pos logit contributions | |
Bs | +24.994 |
Js | +23.865 |
ahn | +23.505 |
ose | +23.471 |
zb | +23.418 |
Neg logit contributions | |
conservancy | -17.692 |
guaranteeing | -16.609 |
realistically | -15.884 |
condol | -15.490 |
disqualified | -15.171 |
Token | N |
Token ID | 45 |
Feature activation | -2.04e+00 |
Pos logit contributions | |
Bs | +21.392 |
ose | +20.585 |
Js | +20.523 |
ahn | +20.247 |
zb | +20.233 |
Neg logit contributions | |
conservancy | -16.548 |
guaranteeing | -15.922 |
realistically | -15.218 |
condol | -14.717 |
belie | -14.639 |
Token | PA |
Token ID | 4537 |
Feature activation | -2.05e+00 |
Pos logit contributions | |
ose | +20.188 |
Bs | +20.060 |
zb | +19.512 |
Js | +19.326 |
ahn | +19.254 |
Neg logit contributions | |
conservancy | -16.304 |
guaranteeing | -15.475 |
realistically | -14.967 |
belie | -14.353 |
condol | -14.331 |
Token | pic |
Token ID | 8301 |
Feature activation | -5.97e-01 |
Pos logit contributions | |
equality | +4.334 |
guaranteeing | +4.295 |
conservancy | +4.253 |
realistically | +4.251 |
illusion | +4.229 |
Neg logit contributions | |
ose | -4.078 |
Bs | -4.033 |
ahn | -3.996 |
swick | -3.913 |
ッド | -3.820 |
Token | . |
Token ID | 13 |
Feature activation | -2.16e-01 |
Pos logit contributions | |
ose | +5.184 |
Bs | +5.075 |
ahn | +5.038 |
Irish | +4.867 |
Adams | +4.775 |
Neg logit contributions | |
conservancy | -8.819 |
illusion | -8.140 |
guaranteeing | -8.097 |
artifacts | -8.076 |
equality | -8.071 |
Token | twitter |
Token ID | 6956 |
Feature activation | -7.65e-01 |
Pos logit contributions | |
ose | +2.783 |
Bs | +2.748 |
ahn | +2.738 |
swick | +2.607 |
Adams | +2.591 |
Neg logit contributions | |
conservancy | -2.277 |
guaranteeing | -2.192 |
equality | -2.165 |
realistically | -2.147 |
illusion | -2.119 |
Token | . |
Token ID | 13 |
Feature activation | +2.94e-03 |
Pos logit contributions | |
ose | +5.791 |
Bs | +5.687 |
ahn | +5.675 |
Irish | +5.322 |
Adams | +5.223 |
Neg logit contributions | |
conservancy | -12.098 |
artifacts | -11.083 |
guaranteeing | -11.035 |
illusion | -10.932 |
mbuds | -10.919 |
Token | com |
Token ID | 785 |
Feature activation | -5.69e-01 |
Pos logit contributions | |
conservancy | +0.031 |
equality | +0.030 |
realistically | +0.030 |
guaranteeing | +0.030 |
illusion | +0.029 |
Neg logit contributions | |
ose | -0.038 |
Bs | -0.037 |
ahn | -0.037 |
swick | -0.036 |
Adams | -0.035 |
Token | / |
Token ID | 14 |
Feature activation | -3.34e+00 |
Pos logit contributions | |
Bs | +5.071 |
ose | +5.035 |
ahn | +4.915 |
Js | +4.676 |
Irish | +4.669 |
Neg logit contributions | |
conservancy | -8.477 |
artifacts | -7.799 |
guaranteeing | -7.765 |
illusion | -7.719 |
equality | -7.582 |
Token | u |
Token ID | 84 |
Feature activation | -1.57e+00 |
Pos logit contributions | |
Bs | +26.525 |
Js | +25.280 |
zb | +24.984 |
ose | +24.881 |
sb | +24.677 |
Neg logit contributions | |
conservancy | -18.289 |
guaranteeing | -17.430 |
realistically | -16.379 |
disqualified | -16.047 |
condol | -15.830 |
Token | I |
Token ID | 40 |
Feature activation | -1.48e+00 |
Pos logit contributions | |
Bs | +16.603 |
ose | +15.870 |
Js | +15.799 |
ahn | +15.644 |
zb | +14.985 |
Neg logit contributions | |
conservancy | -14.620 |
guaranteeing | -14.292 |
realistically | -13.740 |
equality | -13.073 |
condol | -13.044 |
Token | ba |
Token ID | 7012 |
Feature activation | -1.53e+00 |
Pos logit contributions | |
ose | +15.500 |
Bs | +15.414 |
Js | +14.954 |
ahn | +14.901 |
zb | +14.803 |
Neg logit contributions | |
conservancy | -14.384 |
guaranteeing | -13.529 |
realistically | -13.270 |
illusion | -12.757 |
equality | -12.710 |
Token | H |
Token ID | 39 |
Feature activation | -2.24e+00 |
Pos logit contributions | |
Bs | +15.974 |
ose | +15.362 |
Js | +15.298 |
ahn | +15.064 |
zb | +14.413 |
Neg logit contributions | |
conservancy | -15.083 |
guaranteeing | -14.303 |
realistically | -13.865 |
belie | -13.309 |
condol | -13.176 |
Token | Ex |
Token ID | 3109 |
Feature activation | -2.78e+00 |
Pos logit contributions | |
Bs | +21.229 |
ose | +20.893 |
Js | +20.688 |
ahn | +20.331 |
zb | +20.221 |
Neg logit contributions | |
conservancy | -16.829 |
guaranteeing | -16.057 |
realistically | -15.126 |
condol | -14.762 |
Explain | -14.481 |
Token | pic |
Token ID | 8301 |
Feature activation | -5.64e-01 |
Pos logit contributions | |
guaranteeing | +12.401 |
realistically | +12.382 |
equality | +12.287 |
1080 | +12.256 |
2022 | +12.256 |
Neg logit contributions | |
ahn | -10.752 |
Flavoring | -10.722 |
Bs | -10.667 |
ッド | -10.491 |
ose | -10.322 |
Token | . |
Token ID | 13 |
Feature activation | -1.58e-01 |
Pos logit contributions | |
ose | +4.875 |
Bs | +4.776 |
ahn | +4.669 |
Irish | +4.425 |
Js | +4.405 |
Neg logit contributions | |
conservancy | -8.544 |
guaranteeing | -7.868 |
illusion | -7.861 |
artifacts | -7.838 |
equality | -7.718 |
Token | twitter |
Token ID | 6956 |
Feature activation | -6.78e-01 |
Pos logit contributions | |
ose | +2.066 |
Bs | +2.043 |
ahn | +2.030 |
swick | +1.949 |
Adams | +1.933 |
Neg logit contributions | |
conservancy | -1.638 |
guaranteeing | -1.576 |
equality | -1.561 |
realistically | -1.555 |
illusion | -1.537 |
Token | . |
Token ID | 13 |
Feature activation | +3.91e-02 |
Pos logit contributions | |
ose | +5.425 |
Bs | +5.350 |
ahn | +5.307 |
Adams | +4.915 |
Irish | +4.906 |
Neg logit contributions | |
conservancy | -10.590 |
guaranteeing | -9.742 |
artifacts | -9.666 |
illusion | -9.608 |
mbuds | -9.434 |
Token | com |
Token ID | 785 |
Feature activation | -5.15e-01 |
Pos logit contributions | |
conservancy | +0.398 |
equality | +0.397 |
realistically | +0.390 |
guaranteeing | +0.388 |
illusion | +0.384 |
Neg logit contributions | |
ose | -0.504 |
Bs | -0.497 |
ahn | -0.496 |
swick | -0.478 |
ッド | -0.472 |
Token | / |
Token ID | 14 |
Feature activation | -3.32e+00 |
Pos logit contributions | |
Bs | +4.734 |
ose | +4.705 |
ahn | +4.573 |
Js | +4.376 |
Irish | +4.349 |
Neg logit contributions | |
conservancy | -7.551 |
guaranteeing | -6.924 |
illusion | -6.910 |
artifacts | -6.902 |
equality | -6.782 |
Token | j |
Token ID | 73 |
Feature activation | -2.75e+00 |
Pos logit contributions | |
Bs | +26.452 |
Js | +25.240 |
zb | +24.897 |
ose | +24.884 |
sb | +24.641 |
Neg logit contributions | |
conservancy | -18.281 |
guaranteeing | -17.464 |
realistically | -16.402 |
disqualified | -15.991 |
condol | -15.921 |
Token | Ax |
Token ID | 31554 |
Feature activation | -3.05e+00 |
Pos logit contributions | |
Bs | +24.379 |
ose | +23.021 |
Js | +22.857 |
zb | +22.782 |
ahn | +22.704 |
Neg logit contributions | |
conservancy | -17.773 |
guaranteeing | -17.015 |
realistically | -16.019 |
condol | -15.811 |
disqualified | -15.568 |
Token | 6 |
Token ID | 21 |
Feature activation | -2.90e+00 |
Pos logit contributions | |
Bs | +25.535 |
Js | +24.434 |
zb | +23.966 |
ose | +23.862 |
ahn | +23.841 |
Neg logit contributions | |
conservancy | -18.503 |
guaranteeing | -17.365 |
realistically | -16.171 |
condol | -16.001 |
disqualified | -15.809 |
Token | c |
Token ID | 66 |
Feature activation | -1.99e+00 |
Pos logit contributions | |
Bs | +25.039 |
Js | +23.796 |
zb | +23.488 |
ose | +23.482 |
ahn | +23.322 |
Neg logit contributions | |
conservancy | -17.969 |
guaranteeing | -17.022 |
realistically | -16.067 |
condol | -15.784 |
disqualified | -15.602 |
Token | U |
Token ID | 52 |
Feature activation | -1.83e+00 |
Pos logit contributions | |
Bs | +19.955 |
ose | +19.081 |
Js | +18.972 |
ahn | +18.809 |
zb | +18.777 |
Neg logit contributions | |
conservancy | -16.000 |
guaranteeing | -15.445 |
realistically | -14.735 |
condol | -14.385 |
disqualified | -14.138 |
Token | pic |
Token ID | 8301 |
Feature activation | -3.91e-01 |
Pos logit contributions | |
equality | +7.579 |
realistically | +7.387 |
guaranteeing | +7.362 |
conservancy | +7.323 |
1080 | +7.268 |
Neg logit contributions | |
ose | -7.348 |
ahn | -7.279 |
Bs | -7.242 |
swick | -7.169 |
ッド | -6.972 |
Token | . |
Token ID | 13 |
Feature activation | -2.03e-01 |
Pos logit contributions | |
ose | +3.720 |
Bs | +3.668 |
ahn | +3.622 |
Adams | +3.401 |
Irish | +3.391 |
Neg logit contributions | |
conservancy | -5.543 |
illusion | -5.207 |
equality | -5.190 |
guaranteeing | -5.180 |
artifacts | -5.142 |
Token | twitter |
Token ID | 6956 |
Feature activation | -6.16e-01 |
Pos logit contributions | |
ose | +2.624 |
Bs | +2.608 |
ahn | +2.588 |
swick | +2.467 |
Adams | +2.455 |
Neg logit contributions | |
conservancy | -2.140 |
equality | -2.046 |
guaranteeing | -2.046 |
realistically | -2.008 |
illusion | -2.000 |
Token | . |
Token ID | 13 |
Feature activation | +3.97e-02 |
Pos logit contributions | |
ose | +5.150 |
Bs | +5.110 |
ahn | +5.095 |
Adams | +4.667 |
Irish | +4.637 |
Neg logit contributions | |
conservancy | -9.334 |
guaranteeing | -8.657 |
artifacts | -8.617 |
illusion | -8.614 |
equality | -8.474 |
Token | com |
Token ID | 785 |
Feature activation | -4.95e-01 |
Pos logit contributions | |
conservancy | +0.404 |
equality | +0.401 |
realistically | +0.396 |
guaranteeing | +0.394 |
illusion | +0.389 |
Neg logit contributions | |
ose | -0.513 |
Bs | -0.505 |
ahn | -0.503 |
swick | -0.485 |
Adams | -0.479 |
Token | / |
Token ID | 14 |
Feature activation | -3.31e+00 |
Pos logit contributions | |
Bs | +4.617 |
ose | +4.606 |
ahn | +4.491 |
Js | +4.243 |
Adams | +4.232 |
Neg logit contributions | |
conservancy | -7.169 |
guaranteeing | -6.628 |
artifacts | -6.612 |
illusion | -6.601 |
equality | -6.548 |
Token | xx |
Token ID | 5324 |
Feature activation | -3.00e+00 |
Pos logit contributions | |
Bs | +26.494 |
Js | +25.195 |
zb | +25.006 |
ose | +24.923 |
sb | +24.656 |
Neg logit contributions | |
conservancy | -18.209 |
guaranteeing | -17.333 |
realistically | -16.302 |
disqualified | -15.946 |
condol | -15.821 |
Token | A |
Token ID | 32 |
Feature activation | -1.20e+00 |
Pos logit contributions | |
Bs | +25.245 |
Js | +24.112 |
ose | +24.056 |
ahn | +23.810 |
zb | +23.794 |
Neg logit contributions | |
conservancy | -17.902 |
guaranteeing | -17.192 |
realistically | -16.024 |
condol | -15.678 |
disqualified | -15.465 |
Token | II |
Token ID | 3978 |
Feature activation | -2.09e+00 |
Pos logit contributions | |
ose | +12.988 |
Bs | +12.952 |
ahn | +12.586 |
Js | +12.569 |
zb | +12.332 |
Neg logit contributions | |
conservancy | -12.514 |
guaranteeing | -11.721 |
realistically | -11.523 |
illusion | -11.251 |
equality | -11.175 |
Token | 1 |
Token ID | 16 |
Feature activation | -2.86e+00 |
Pos logit contributions | |
Bs | +20.720 |
ose | +20.182 |
Js | +19.869 |
zb | +19.680 |
ahn | +19.640 |
Neg logit contributions | |
conservancy | -16.125 |
guaranteeing | -15.200 |
realistically | -14.662 |
condol | -14.113 |
belie | -13.951 |
Token | k |
Token ID | 74 |
Feature activation | -2.43e+00 |
Pos logit contributions | |
Bs | +24.728 |
Js | +23.636 |
zb | +23.299 |
ose | +23.242 |
ahn | +23.072 |
Neg logit contributions | |
conservancy | -17.925 |
guaranteeing | -16.847 |
realistically | -15.916 |
condol | -15.645 |
disqualified | -15.378 |
Token | pic |
Token ID | 8301 |
Feature activation | -8.12e-01 |
Pos logit contributions | |
conservancy | +2.805 |
guaranteeing | +2.798 |
equality | +2.797 |
realistically | +2.786 |
illusion | +2.704 |
Neg logit contributions | |
ose | -2.767 |
Bs | -2.761 |
ahn | -2.726 |
ッド | -2.615 |
swick | -2.614 |
Token | . |
Token ID | 13 |
Feature activation | -2.86e-01 |
Pos logit contributions | |
Bs | +6.093 |
ose | +6.083 |
ahn | +5.888 |
Adams | +5.661 |
Irish | +5.636 |
Neg logit contributions | |
conservancy | -12.857 |
artifacts | -11.775 |
illusion | -11.714 |
guaranteeing | -11.691 |
mbuds | -11.471 |
Token | twitter |
Token ID | 6956 |
Feature activation | -6.25e-01 |
Pos logit contributions | |
ose | +3.563 |
Bs | +3.555 |
ahn | +3.531 |
swick | +3.333 |
Js | +3.331 |
Neg logit contributions | |
conservancy | -3.154 |
guaranteeing | -3.013 |
equality | -2.967 |
realistically | -2.933 |
illusion | -2.916 |
Token | . |
Token ID | 13 |
Feature activation | +2.03e-02 |
Pos logit contributions | |
ose | +5.122 |
Bs | +5.095 |
ahn | +5.042 |
Adams | +4.649 |
Js | +4.630 |
Neg logit contributions | |
conservancy | -9.672 |
guaranteeing | -8.925 |
artifacts | -8.893 |
illusion | -8.832 |
mbuds | -8.638 |
Token | com |
Token ID | 785 |
Feature activation | -4.76e-01 |
Pos logit contributions | |
conservancy | +0.208 |
equality | +0.207 |
realistically | +0.204 |
guaranteeing | +0.203 |
illusion | +0.200 |
Neg logit contributions | |
ose | -0.260 |
Bs | -0.257 |
ahn | -0.256 |
swick | -0.246 |
ッド | -0.243 |
Token | / |
Token ID | 14 |
Feature activation | -3.30e+00 |
Pos logit contributions | |
Bs | +4.433 |
ose | +4.419 |
ahn | +4.310 |
Js | +4.090 |
Irish | +4.076 |
Neg logit contributions | |
conservancy | -6.924 |
guaranteeing | -6.381 |
artifacts | -6.370 |
illusion | -6.355 |
equality | -6.248 |
Token | z |
Token ID | 89 |
Feature activation | -2.75e+00 |
Pos logit contributions | |
Bs | +26.405 |
Js | +25.120 |
zb | +24.933 |
ose | +24.815 |
sb | +24.592 |
Neg logit contributions | |
conservancy | -18.286 |
guaranteeing | -17.496 |
realistically | -16.391 |
disqualified | -15.998 |
condol | -15.857 |
Token | J |
Token ID | 41 |
Feature activation | -2.70e+00 |
Pos logit contributions | |
Bs | +24.377 |
Js | +23.062 |
ose | +23.037 |
ahn | +22.843 |
zb | +22.634 |
Neg logit contributions | |
conservancy | -17.497 |
guaranteeing | -16.912 |
realistically | -15.861 |
condol | -15.402 |
disqualified | -15.348 |
Token | X |
Token ID | 55 |
Feature activation | -2.99e+00 |
Pos logit contributions | |
Bs | +23.955 |
ose | +23.058 |
zb | +22.819 |
Js | +22.629 |
ahn | +22.601 |
Neg logit contributions | |
conservancy | -17.615 |
guaranteeing | -16.834 |
realistically | -15.825 |
condol | -15.631 |
belie | -15.236 |
Token | w |
Token ID | 86 |
Feature activation | -2.62e+00 |
Pos logit contributions | |
Bs | +25.342 |
Js | +24.248 |
ose | +24.032 |
zb | +23.810 |
ahn | +23.644 |
Neg logit contributions | |
conservancy | -18.216 |
guaranteeing | -17.203 |
realistically | -16.015 |
condol | -15.793 |
disqualified | -15.528 |
Token | 2 |
Token ID | 17 |
Feature activation | -2.96e+00 |
Pos logit contributions | |
Bs | +23.883 |
Js | +22.707 |
zb | +22.543 |
ose | +22.537 |
ahn | +22.310 |
Neg logit contributions | |
conservancy | -17.039 |
guaranteeing | -16.571 |
realistically | -15.632 |
condol | -15.503 |
disqualified | -15.253 |
Token | pic |
Token ID | 8301 |
Feature activation | -4.87e-01 |
Pos logit contributions | |
realistically | +6.750 |
equality | +6.743 |
guaranteeing | +6.696 |
????? | +6.694 |
2022 | +6.599 |
Neg logit contributions | |
Bs | -7.683 |
ose | -7.651 |
ahn | -7.592 |
ッド | -7.367 |
swick | -7.298 |
Token | . |
Token ID | 13 |
Feature activation | -8.50e-02 |
Pos logit contributions | |
ose | +4.442 |
Bs | +4.405 |
ahn | +4.329 |
Adams | +4.152 |
Irish | +4.042 |
Neg logit contributions | |
conservancy | -7.103 |
illusion | -6.613 |
equality | -6.610 |
guaranteeing | -6.587 |
artifacts | -6.543 |
Token | twitter |
Token ID | 6956 |
Feature activation | -5.16e-01 |
Pos logit contributions | |
ose | +1.138 |
Bs | +1.126 |
ahn | +1.121 |
swick | +1.075 |
Adams | +1.067 |
Neg logit contributions | |
conservancy | -0.843 |
equality | -0.820 |
guaranteeing | -0.814 |
realistically | -0.809 |
illusion | -0.798 |
Token | . |
Token ID | 13 |
Feature activation | +1.66e-01 |
Pos logit contributions | |
ose | +4.554 |
Bs | +4.501 |
ahn | +4.491 |
Adams | +4.168 |
Js | +4.113 |
Neg logit contributions | |
conservancy | -7.642 |
guaranteeing | -7.118 |
illusion | -7.079 |
artifacts | -7.065 |
equality | -6.988 |
Token | com |
Token ID | 785 |
Feature activation | -3.07e-01 |
Pos logit contributions | |
equality | +1.598 |
realistically | +1.573 |
conservancy | +1.559 |
1080 | +1.550 |
guaranteeing | +1.540 |
Neg logit contributions | |
ose | -2.214 |
Bs | -2.175 |
ahn | -2.174 |
behavi | -2.110 |
swick | -2.104 |
Token | / |
Token ID | 14 |
Feature activation | -3.28e+00 |
Pos logit contributions | |
ose | +3.111 |
Bs | +3.094 |
ahn | +3.044 |
swick | +2.870 |
Adams | +2.870 |
Neg logit contributions | |
conservancy | -4.183 |
guaranteeing | -3.930 |
equality | -3.910 |
illusion | -3.905 |
artifacts | -3.888 |
Token | E |
Token ID | 36 |
Feature activation | -1.77e+00 |
Pos logit contributions | |
Bs | +26.350 |
Js | +25.126 |
zb | +24.896 |
ose | +24.861 |
ahn | +24.591 |
Neg logit contributions | |
conservancy | -18.228 |
guaranteeing | -17.289 |
realistically | -16.308 |
disqualified | -15.941 |
condol | -15.752 |
Token | q |
Token ID | 80 |
Feature activation | -3.01e+00 |
Pos logit contributions | |
Bs | +18.435 |
ose | +17.746 |
Js | +17.698 |
ahn | +17.378 |
zb | +17.349 |
Neg logit contributions | |
conservancy | -15.078 |
guaranteeing | -14.114 |
realistically | -13.764 |
illusion | -13.196 |
condol | -13.105 |
Token | 4 |
Token ID | 19 |
Feature activation | -2.97e+00 |
Pos logit contributions | |
Bs | +25.511 |
Js | +24.209 |
ose | +24.180 |
zb | +23.912 |
ahn | +23.729 |
Neg logit contributions | |
conservancy | -17.883 |
guaranteeing | -16.991 |
realistically | -16.086 |
condol | -15.944 |
devastated | -15.533 |
Token | R |
Token ID | 49 |
Feature activation | -2.21e+00 |
Pos logit contributions | |
Bs | +25.158 |
Js | +24.122 |
ose | +23.673 |
zb | +23.616 |
ahn | +23.540 |
Neg logit contributions | |
conservancy | -18.036 |
guaranteeing | -17.005 |
realistically | -16.100 |
condol | -15.722 |
disqualified | -15.539 |
Token | IL |
Token ID | 4146 |
Feature activation | -2.96e+00 |
Pos logit contributions | |
Bs | +20.833 |
ose | +20.616 |
zb | +20.505 |
Js | +20.293 |
ahn | +20.154 |
Neg logit contributions | |
conservancy | -16.635 |
guaranteeing | -15.631 |
realistically | -14.973 |
condol | -14.643 |
equality | -14.642 |
Token | https |
Token ID | 3740 |
Feature activation | -4.05e-01 |
Pos logit contributions | |
equality | +2.413 |
conservancy | +2.406 |
guaranteeing | +2.386 |
realistically | +2.383 |
illusion | +2.340 |
Neg logit contributions | |
ose | -2.879 |
Bs | -2.836 |
ahn | -2.822 |
swick | -2.719 |
Flavoring | -2.693 |
Token | :// |
Token ID | 1378 |
Feature activation | +1.23e-01 |
Pos logit contributions | |
ose | +3.086 |
Bs | +3.053 |
ahn | +3.043 |
Adams | +2.833 |
swick | +2.807 |
Neg logit contributions | |
conservancy | -6.478 |
illusion | -6.101 |
guaranteeing | -6.099 |
equality | -6.054 |
artifacts | -6.010 |
Token | t |
Token ID | 83 |
Feature activation | +2.50e-01 |
Pos logit contributions | |
conservancy | +2.248 |
equality | +2.247 |
realistically | +2.216 |
guaranteeing | +2.214 |
illusion | +2.213 |
Neg logit contributions | |
ose | -0.585 |
Bs | -0.571 |
ahn | -0.551 |
swick | -0.503 |
ッド | -0.489 |
Token | . |
Token ID | 13 |
Feature activation | -1.21e-01 |
Pos logit contributions | |
equality | +2.732 |
realistically | +2.705 |
1080 | +2.677 |
conservancy | +2.675 |
illusion | +2.635 |
Neg logit contributions | |
ose | -2.947 |
Bs | -2.932 |
ahn | -2.887 |
behavi | -2.866 |
ッド | -2.830 |
Token | co |
Token ID | 1073 |
Feature activation | -5.16e-01 |
Pos logit contributions | |
ose | +1.214 |
Bs | +1.194 |
ahn | +1.188 |
swick | +1.117 |
Adams | +1.104 |
Neg logit contributions | |
conservancy | -1.623 |
equality | -1.579 |
guaranteeing | -1.575 |
realistically | -1.565 |
illusion | -1.552 |
Token | / |
Token ID | 14 |
Feature activation | -3.23e+00 |
Pos logit contributions | |
ose | +4.769 |
ahn | +4.668 |
Bs | +4.667 |
Js | +4.311 |
Adams | +4.302 |
Neg logit contributions | |
conservancy | -7.476 |
guaranteeing | -6.969 |
illusion | -6.957 |
artifacts | -6.873 |
equality | -6.841 |
Token | c |
Token ID | 66 |
Feature activation | -1.94e+00 |
Pos logit contributions | |
Bs | +26.214 |
Js | +25.119 |
zb | +24.723 |
ose | +24.707 |
ahn | +24.442 |
Neg logit contributions | |
conservancy | -18.190 |
guaranteeing | -17.143 |
realistically | -16.210 |
disqualified | -15.805 |
condol | -15.626 |
Token | Q |
Token ID | 48 |
Feature activation | -2.76e+00 |
Pos logit contributions | |
Bs | +19.667 |
ose | +18.764 |
Js | +18.736 |
ahn | +18.513 |
zb | +18.368 |
Neg logit contributions | |
conservancy | -15.963 |
guaranteeing | -15.269 |
realistically | -14.597 |
condol | -14.099 |
devastated | -13.978 |
Token | 3 |
Token ID | 18 |
Feature activation | -2.82e+00 |
Pos logit contributions | |
Bs | +24.491 |
Js | +23.324 |
ose | +23.150 |
zb | +22.909 |
ahn | +22.761 |
Neg logit contributions | |
conservancy | -17.650 |
guaranteeing | -16.593 |
realistically | -15.772 |
condol | -15.582 |
devastated | -15.184 |
Token | x |
Token ID | 87 |
Feature activation | -2.82e+00 |
Pos logit contributions | |
Bs | +24.630 |
Js | +23.549 |
ose | +23.202 |
zb | +23.074 |
ahn | +22.898 |
Neg logit contributions | |
conservancy | -17.918 |
guaranteeing | -16.860 |
realistically | -15.993 |
condol | -15.547 |
disqualified | -15.339 |
Token | mb |
Token ID | 2022 |
Feature activation | -2.53e+00 |
Pos logit contributions | |
Bs | +24.636 |
Js | +23.618 |
ose | +23.392 |
zb | +23.271 |
ahn | +23.182 |
Neg logit contributions | |
conservancy | -17.533 |
guaranteeing | -16.951 |
realistically | -15.849 |
condol | -15.721 |
disqualified | -15.405 |
Token | . |
Token ID | 13 |
Feature activation | -3.03e-01 |
Pos logit contributions | |
equality | +3.659 |
realistically | +3.627 |
1080 | +3.607 |
conservancy | +3.550 |
guaranteeing | +3.524 |
Neg logit contributions | |
Bs | -4.172 |
ose | -4.169 |
behavi | -4.166 |
ahn | -4.087 |
ッド | -4.064 |
Token | co |
Token ID | 1073 |
Feature activation | -6.20e-01 |
Pos logit contributions | |
ose | +2.820 |
Bs | +2.796 |
ahn | +2.766 |
Js | +2.552 |
swick | +2.533 |
Neg logit contributions | |
conservancy | -4.287 |
guaranteeing | -4.117 |
equality | -4.108 |
realistically | -4.063 |
illusion | -4.052 |
Token | / |
Token ID | 14 |
Feature activation | -3.44e+00 |
Pos logit contributions | |
ose | +5.484 |
Bs | +5.342 |
ahn | +5.341 |
Adams | +4.935 |
Irish | +4.927 |
Neg logit contributions | |
conservancy | -9.191 |
illusion | -8.482 |
guaranteeing | -8.437 |
artifacts | -8.407 |
equality | -8.283 |
Token | X |
Token ID | 55 |
Feature activation | -3.15e+00 |
Pos logit contributions | |
Bs | +26.876 |
Js | +25.746 |
zb | +25.372 |
ose | +25.360 |
ahn | +25.111 |
Neg logit contributions | |
conservancy | -18.438 |
guaranteeing | -17.349 |
realistically | -16.453 |
disqualified | -16.079 |
condol | -15.908 |
Token | h |
Token ID | 71 |
Feature activation | -2.03e+00 |
Pos logit contributions | |
Bs | +25.947 |
Js | +25.005 |
ose | +24.585 |
zb | +24.462 |
ahn | +24.341 |
Neg logit contributions | |
conservancy | -18.332 |
guaranteeing | -17.291 |
realistically | -16.138 |
condol | -15.767 |
devastated | -15.494 |
Token | 7 |
Token ID | 22 |
Feature activation | -3.22e+00 |
Pos logit contributions | |
Bs | +20.329 |
Js | +19.382 |
ose | +19.381 |
ahn | +19.153 |
zb | +18.927 |
Neg logit contributions | |
conservancy | -15.834 |
guaranteeing | -15.392 |
realistically | -14.681 |
condol | -14.282 |
Explain | -14.033 |
Token | J |
Token ID | 41 |
Feature activation | -2.69e+00 |
Pos logit contributions | |
Bs | +26.150 |
Js | +24.892 |
zb | +24.531 |
ose | +24.458 |
sb | +24.416 |
Neg logit contributions | |
conservancy | -18.468 |
guaranteeing | -17.361 |
realistically | -16.330 |
condol | -16.105 |
disqualified | -15.823 |
Token | m |
Token ID | 76 |
Feature activation | -2.27e+00 |
Pos logit contributions | |
Bs | +23.807 |
ose | +23.044 |
zb | +22.901 |
Js | +22.535 |
ahn | +22.495 |
Neg logit contributions | |
conservancy | -17.671 |
guaranteeing | -16.893 |
realistically | -15.797 |
condol | -15.659 |
disqualified | -15.302 |
Token | B |
Token ID | 33 |
Feature activation | -1.95e+00 |
Pos logit contributions | |
Bs | +21.841 |
ose | +20.964 |
Js | +20.843 |
ahn | +20.581 |
zb | +20.524 |
Neg logit contributions | |
conservancy | -16.605 |
guaranteeing | -15.925 |
realistically | -15.178 |
condol | -14.821 |
disqualified | -14.680 |
Token | z |
Token ID | 89 |
Feature activation | -2.90e+00 |
Pos logit contributions | |
ose | +19.384 |
Bs | +19.165 |
Js | +18.871 |
ahn | +18.859 |
zb | +18.804 |
Neg logit contributions | |
conservancy | -15.873 |
guaranteeing | -15.197 |
realistically | -14.585 |
condol | -14.055 |
belie | -14.044 |
Token | 5 |
Token ID | 20 |
Feature activation | -3.21e+00 |
Pos logit contributions | |
Bs | +25.113 |
Js | +23.802 |
ose | +23.725 |
zb | +23.460 |
ahn | +23.383 |
Neg logit contributions | |
conservancy | -17.714 |
guaranteeing | -16.995 |
realistically | -15.870 |
condol | -15.734 |
disqualified | -15.411 |
Token | pic |
Token ID | 8301 |
Feature activation | -4.61e-01 |
Pos logit contributions | |
guaranteeing | +6.991 |
realistically | +6.972 |
equality | +6.968 |
1080 | +6.826 |
conservancy | +6.784 |
Neg logit contributions | |
ose | -6.742 |
Bs | -6.726 |
ahn | -6.706 |
swick | -6.567 |
Flavoring | -6.552 |
Token | . |
Token ID | 13 |
Feature activation | +7.79e-03 |
Pos logit contributions | |
ose | +4.214 |
Bs | +4.155 |
ahn | +4.066 |
Irish | +3.870 |
Adams | +3.853 |
Neg logit contributions | |
conservancy | -6.727 |
illusion | -6.246 |
guaranteeing | -6.233 |
artifacts | -6.202 |
equality | -6.175 |
Token | twitter |
Token ID | 6956 |
Feature activation | -6.23e-01 |
Pos logit contributions | |
conservancy | +0.074 |
equality | +0.073 |
realistically | +0.072 |
guaranteeing | +0.071 |
illusion | +0.070 |
Neg logit contributions | |
ose | -0.107 |
Bs | -0.105 |
ahn | -0.105 |
swick | -0.101 |
Adams | -0.100 |
Token | . |
Token ID | 13 |
Feature activation | +1.21e-01 |
Pos logit contributions | |
ose | +5.156 |
Bs | +5.093 |
ahn | +5.051 |
Adams | +4.693 |
Irish | +4.684 |
Neg logit contributions | |
conservancy | -9.569 |
guaranteeing | -8.840 |
artifacts | -8.769 |
illusion | -8.739 |
equality | -8.549 |
Token | com |
Token ID | 785 |
Feature activation | -3.58e-01 |
Pos logit contributions | |
equality | +1.188 |
conservancy | +1.168 |
realistically | +1.162 |
guaranteeing | +1.146 |
1080 | +1.141 |
Neg logit contributions | |
ose | -1.586 |
Bs | -1.561 |
ahn | -1.559 |
swick | -1.509 |
ッド | -1.499 |
Token | / |
Token ID | 14 |
Feature activation | -3.21e+00 |
Pos logit contributions | |
ose | +3.530 |
Bs | +3.523 |
ahn | +3.433 |
Adams | +3.262 |
Irish | +3.256 |
Neg logit contributions | |
conservancy | -4.988 |
guaranteeing | -4.643 |
illusion | -4.632 |
artifacts | -4.603 |
equality | -4.593 |
Token | F |
Token ID | 37 |
Feature activation | -2.34e+00 |
Pos logit contributions | |
Bs | +26.130 |
Js | +24.954 |
zb | +24.603 |
ose | +24.586 |
sb | +24.397 |
Neg logit contributions | |
conservancy | -18.117 |
guaranteeing | -17.262 |
realistically | -16.244 |
disqualified | -15.876 |
condol | -15.751 |
Token | c |
Token ID | 66 |
Feature activation | -1.95e+00 |
Pos logit contributions | |
Bs | +22.065 |
Js | +21.734 |
ose | +21.383 |
ahn | +21.264 |
zb | +21.223 |
Neg logit contributions | |
conservancy | -16.484 |
guaranteeing | -15.921 |
realistically | -15.228 |
condol | -14.553 |
2022 | -14.537 |
Token | ce |
Token ID | 344 |
Feature activation | -2.70e+00 |
Pos logit contributions | |
Bs | +19.700 |
ose | +18.800 |
Js | +18.796 |
ahn | +18.630 |
zb | +18.343 |
Neg logit contributions | |
conservancy | -15.901 |
guaranteeing | -15.336 |
realistically | -14.743 |
condol | -14.292 |
disqualified | -14.057 |
Token | W |
Token ID | 54 |
Feature activation | -2.44e+00 |
Pos logit contributions | |
Bs | +24.358 |
Js | +23.155 |
ose | +22.950 |
zb | +22.672 |
ahn | +22.439 |
Neg logit contributions | |
conservancy | -17.358 |
guaranteeing | -16.448 |
realistically | -15.823 |
condol | -15.101 |
disqualified | -15.074 |
Token | x |
Token ID | 87 |
Feature activation | -2.81e+00 |
Pos logit contributions | |
Bs | +22.736 |
Js | +22.053 |
ose | +21.865 |
zb | +21.768 |
ahn | +21.549 |
Neg logit contributions | |
conservancy | -16.781 |
guaranteeing | -16.176 |
realistically | -15.344 |
condol | -14.918 |
belie | -14.770 |
Token | 7 |
Token ID | 22 |
Feature activation | -3.22e+00 |
Pos logit contributions | |
Bs | +20.329 |
Js | +19.382 |
ose | +19.381 |
ahn | +19.153 |
zb | +18.927 |
Neg logit contributions | |
conservancy | -15.834 |
guaranteeing | -15.392 |
realistically | -14.681 |
condol | -14.282 |
Explain | -14.033 |
Token | J |
Token ID | 41 |
Feature activation | -2.69e+00 |
Pos logit contributions | |
Bs | +26.150 |
Js | +24.892 |
zb | +24.531 |
ose | +24.458 |
sb | +24.416 |
Neg logit contributions | |
conservancy | -18.468 |
guaranteeing | -17.361 |
realistically | -16.330 |
condol | -16.105 |
disqualified | -15.823 |
Token | m |
Token ID | 76 |
Feature activation | -2.27e+00 |
Pos logit contributions | |
Bs | +23.807 |
ose | +23.044 |
zb | +22.901 |
Js | +22.535 |
ahn | +22.495 |
Neg logit contributions | |
conservancy | -17.671 |
guaranteeing | -16.893 |
realistically | -15.797 |
condol | -15.659 |
disqualified | -15.302 |
Token | B |
Token ID | 33 |
Feature activation | -1.95e+00 |
Pos logit contributions | |
Bs | +21.841 |
ose | +20.964 |
Js | +20.843 |
ahn | +20.581 |
zb | +20.524 |
Neg logit contributions | |
conservancy | -16.605 |
guaranteeing | -15.925 |
realistically | -15.178 |
condol | -14.821 |
disqualified | -14.680 |
Token | z |
Token ID | 89 |
Feature activation | -2.90e+00 |
Pos logit contributions | |
ose | +19.384 |
Bs | +19.165 |
Js | +18.871 |
ahn | +18.859 |
zb | +18.804 |
Neg logit contributions | |
conservancy | -15.873 |
guaranteeing | -15.197 |
realistically | -14.585 |
condol | -14.055 |
belie | -14.044 |
Token | 5 |
Token ID | 20 |
Feature activation | -3.21e+00 |
Pos logit contributions | |
Bs | +25.113 |
Js | +23.802 |
ose | +23.725 |
zb | +23.460 |
ahn | +23.383 |
Neg logit contributions | |
conservancy | -17.714 |
guaranteeing | -16.995 |
realistically | -15.870 |
condol | -15.734 |
disqualified | -15.411 |
Token | s |
Token ID | 82 |
Feature activation | -2.28e+00 |
Pos logit contributions | |
Bs | +26.145 |
Js | +25.037 |
zb | +24.755 |
ose | +24.500 |
sb | +24.471 |
Neg logit contributions | |
conservancy | -18.420 |
guaranteeing | -17.330 |
realistically | -16.276 |
condol | -16.195 |
disqualified | -15.842 |
Token | 8 |
Token ID | 23 |
Feature activation | -1.45e+00 |
Pos logit contributions | |
Bs | +21.904 |
Js | +20.588 |
ose | +20.466 |
ahn | +20.446 |
zb | +20.385 |
Neg logit contributions | |
conservancy | -17.425 |
guaranteeing | -16.034 |
realistically | -15.381 |
condol | -15.018 |
artifacts | -14.970 |
Token | — |
Token ID | 851 |
Feature activation | +2.31e-02 |
Pos logit contributions | |
Bs | +14.539 |
ahn | +13.677 |
ose | +13.588 |
Js | +13.209 |
zb | +13.148 |
Neg logit contributions | |
conservancy | -15.735 |
artifacts | -13.956 |
illusion | -13.816 |
guaranteeing | -13.463 |
equality | -13.461 |
Token | General |
Token ID | 3611 |
Feature activation | +1.81e-02 |
Pos logit contributions | |
conservancy | +0.266 |
equality | +0.263 |
guaranteeing | +0.260 |
realistically | +0.260 |
illusion | +0.257 |
Neg logit contributions | |
ose | -0.268 |
Bs | -0.265 |
ahn | -0.262 |
swick | -0.252 |
Adams | -0.248 |
Token | Flynn |
Token ID | 14654 |
Feature activation | +2.80e-02 |
Pos logit contributions | |
conservancy | +0.243 |
equality | +0.240 |
guaranteeing | +0.237 |
realistically | +0.237 |
illusion | +0.235 |
Neg logit contributions | |
ose | -0.177 |
Bs | -0.174 |
ahn | -0.173 |
swick | -0.165 |
Adams | -0.162 |
Token | com |
Token ID | 785 |
Feature activation | +1.10e-01 |
Pos logit contributions | |
equality | +0.966 |
conservancy | +0.965 |
realistically | +0.948 |
guaranteeing | +0.937 |
illusion | +0.934 |
Neg logit contributions | |
ose | -1.267 |
Bs | -1.259 |
ahn | -1.248 |
swick | -1.207 |
ッド | -1.201 |
Token | / |
Token ID | 14 |
Feature activation | -2.62e-01 |
Pos logit contributions | |
equality | +1.243 |
conservancy | +1.239 |
realistically | +1.229 |
guaranteeing | +1.222 |
illusion | +1.210 |
Neg logit contributions | |
ose | -1.294 |
Bs | -1.277 |
ahn | -1.269 |
swick | -1.219 |
ッド | -1.208 |
Token | watch |
Token ID | 8340 |
Feature activation | +6.33e-01 |
Pos logit contributions | |
ose | +3.054 |
ahn | +3.011 |
Bs | +2.995 |
swick | +2.877 |
Adams | +2.836 |
Neg logit contributions | |
conservancy | -3.011 |
guaranteeing | -2.954 |
realistically | -2.938 |
equality | -2.913 |
illusion | -2.861 |
Token | ? |
Token ID | 30 |
Feature activation | +2.16e-01 |
Pos logit contributions | |
equality | +2.212 |
1080 | +2.162 |
????? | +2.160 |
realistically | +2.141 |
??? | +2.117 |
Neg logit contributions | |
behavi | -12.475 |
ose | -11.752 |
ahn | -11.586 |
Bs | -11.565 |
swick | -11.559 |
Token | v |
Token ID | 85 |
Feature activation | +5.39e-01 |
Pos logit contributions | |
conservancy | +3.090 |
equality | +3.060 |
realistically | +3.033 |
illusion | +3.021 |
guaranteeing | +3.017 |
Neg logit contributions | |
ose | -1.868 |
Bs | -1.840 |
ahn | -1.803 |
swick | -1.738 |
ッド | -1.732 |
Token | = |
Token ID | 28 |
Feature activation | -3.21e+00 |
Pos logit contributions | |
realistically | +4.627 |
1080 | +4.564 |
equality | +4.400 |
conservancy | +4.381 |
completely | +4.364 |
Neg logit contributions | |
behavi | -7.246 |
Bs | -7.224 |
Flavoring | -7.219 |
ose | -7.211 |
ahn | -7.159 |
Token | 2 |
Token ID | 17 |
Feature activation | -2.42e+00 |
Pos logit contributions | |
Bs | +26.268 |
Js | +24.962 |
ose | +24.733 |
ahn | +24.516 |
zb | +24.466 |
Neg logit contributions | |
conservancy | -18.035 |
guaranteeing | -17.357 |
realistically | -16.346 |
condol | -15.923 |
disqualified | -15.842 |
Token | g |
Token ID | 70 |
Feature activation | -1.78e+00 |
Pos logit contributions | |
Bs | +22.676 |
Js | +21.926 |
ose | +21.686 |
ahn | +21.392 |
zb | +21.332 |
Neg logit contributions | |
conservancy | -16.833 |
guaranteeing | -16.212 |
realistically | -15.425 |
condol | -14.762 |
disqualified | -14.641 |
Token | 7 |
Token ID | 22 |
Feature activation | -2.77e+00 |
Pos logit contributions | |
Bs | +18.554 |
ose | +17.871 |
Js | +17.769 |
ahn | +17.671 |
zb | +17.354 |
Neg logit contributions | |
conservancy | -14.970 |
guaranteeing | -14.458 |
realistically | -14.029 |
condol | -13.477 |
disqualified | -13.283 |
Token | p |
Token ID | 79 |
Feature activation | -1.69e+00 |
Pos logit contributions | |
Bs | +24.482 |
Js | +23.526 |
ahn | +23.222 |
ose | +23.183 |
zb | +23.047 |
Neg logit contributions | |
conservancy | -17.416 |
guaranteeing | -16.633 |
realistically | -15.872 |
condol | -15.405 |
belie | -15.171 |
Token | k |
Token ID | 74 |
Feature activation | -2.20e+00 |
Pos logit contributions | |
Bs | +17.778 |
ose | +17.221 |
ahn | +16.959 |
Js | +16.941 |
zb | +16.595 |
Neg logit contributions | |
conservancy | -14.783 |
guaranteeing | -14.265 |
realistically | -13.815 |
condol | -13.267 |
equality | -13.236 |
Token | com |
Token ID | 785 |
Feature activation | -4.86e-01 |
Pos logit contributions | |
ose | +0.813 |
Bs | +0.801 |
ahn | +0.798 |
swick | +0.766 |
Adams | +0.755 |
Neg logit contributions | |
conservancy | -0.688 |
equality | -0.673 |
guaranteeing | -0.668 |
realistically | -0.666 |
illusion | -0.657 |
Token | / |
Token ID | 14 |
Feature activation | -6.87e-01 |
Pos logit contributions | |
ose | +4.653 |
Bs | +4.481 |
ahn | +4.434 |
Irish | +4.165 |
Js | +4.156 |
Neg logit contributions | |
conservancy | -6.887 |
guaranteeing | -6.512 |
illusion | -6.440 |
artifacts | -6.438 |
equality | -6.322 |
Token | 1 |
Token ID | 16 |
Feature activation | -2.47e+00 |
Pos logit contributions | |
ose | +5.531 |
Bs | +5.526 |
ahn | +5.355 |
Js | +5.128 |
Adams | +4.922 |
Neg logit contributions | |
conservancy | -10.098 |
realistically | -9.783 |
guaranteeing | -9.736 |
illusion | -9.472 |
equality | -9.447 |
Token | BP |
Token ID | 20866 |
Feature activation | -2.59e+00 |
Pos logit contributions | |
Bs | +23.039 |
Js | +21.956 |
ose | +21.586 |
sb | +21.517 |
ahn | +21.469 |
Neg logit contributions | |
conservancy | -17.108 |
guaranteeing | -16.481 |
realistically | -15.601 |
disqualified | -14.964 |
condol | -14.807 |
Token | g |
Token ID | 70 |
Feature activation | -2.28e+00 |
Pos logit contributions | |
Bs | +23.348 |
ose | +22.610 |
Js | +22.538 |
zb | +22.230 |
ahn | +22.201 |
Neg logit contributions | |
conservancy | -17.138 |
guaranteeing | -16.290 |
realistically | -15.770 |
condol | -15.101 |
belie | -15.046 |
Token | 8 |
Token ID | 23 |
Feature activation | -3.19e+00 |
Pos logit contributions | |
Bs | +21.983 |
Js | +20.983 |
ose | +20.793 |
zb | +20.529 |
ahn | +20.482 |
Neg logit contributions | |
conservancy | -16.708 |
guaranteeing | -15.800 |
realistically | -15.322 |
condol | -14.988 |
disqualified | -14.875 |
Token | sm |
Token ID | 5796 |
Feature activation | -1.74e+00 |
Pos logit contributions | |
Bs | +25.931 |
Js | +24.712 |
sb | +24.208 |
zb | +24.182 |
ose | +24.022 |
Neg logit contributions | |
conservancy | -18.952 |
guaranteeing | -17.411 |
realistically | -16.400 |
condol | -16.311 |
disqualified | -16.177 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | -6.35e-02 |
Pos logit contributions | |
Bs | +11.669 |
ose | +11.174 |
Js | +11.059 |
Irish | +10.661 |
ahn | +10.262 |
Neg logit contributions | |
conservancy | -23.657 |
illusion | -21.041 |
artifacts | -20.771 |
guaranteeing | -20.343 |
mbuds | -20.308 |
Token | Whatever |
Token ID | 21875 |
Feature activation | +1.84e-01 |
Pos logit contributions | |
ose | +0.635 |
Bs | +0.625 |
ahn | +0.620 |
swick | +0.588 |
Adams | +0.582 |
Neg logit contributions | |
conservancy | -0.843 |
equality | -0.827 |
guaranteeing | -0.823 |
realistically | -0.822 |
illusion | -0.811 |
Token | reason |
Token ID | 1738 |
Feature activation | +3.68e-01 |
Pos logit contributions | |
conservancy | +2.059 |
equality | +2.038 |
realistically | +2.029 |
guaranteeing | +2.022 |
illusion | +1.988 |
Neg logit contributions | |
ose | -2.180 |
Bs | -2.157 |
ahn | -2.134 |
swick | -2.081 |
ッド | -2.037 |
Token | Cal |
Token ID | 2199 |
Feature activation | +5.45e-02 |
Pos logit contributions | |
realistically | +4.135 |
guaranteeing | +4.130 |
equality | +4.119 |
conservancy | +4.106 |
illusion | +3.983 |
Neg logit contributions | |
ose | -4.215 |
ahn | -4.141 |
Bs | -4.113 |
swick | -4.029 |
ッド | -3.946 |
Token | pic |
Token ID | 8301 |
Feature activation | -3.58e-01 |
Pos logit contributions | |
equality | +6.285 |
realistically | +6.241 |
1080 | +6.165 |
Explain | +6.163 |
2022 | +6.111 |
Neg logit contributions | |
ahn | -7.489 |
ose | -7.450 |
Bs | -7.278 |
behavi | -7.180 |
ッド | -7.171 |
Token | . |
Token ID | 13 |
Feature activation | -1.71e-01 |
Pos logit contributions | |
ose | +3.441 |
Bs | +3.403 |
ahn | +3.353 |
Adams | +3.171 |
Irish | +3.153 |
Neg logit contributions | |
conservancy | -5.050 |
guaranteeing | -4.735 |
illusion | -4.727 |
equality | -4.721 |
artifacts | -4.664 |
Token | twitter |
Token ID | 6956 |
Feature activation | -5.92e-01 |
Pos logit contributions | |
ose | +2.226 |
Bs | +2.202 |
ahn | +2.194 |
swick | +2.089 |
Adams | +2.080 |
Neg logit contributions | |
conservancy | -1.768 |
guaranteeing | -1.704 |
equality | -1.695 |
realistically | -1.679 |
illusion | -1.658 |
Token | . |
Token ID | 13 |
Feature activation | +2.09e-01 |
Pos logit contributions | |
ose | +4.986 |
Bs | +4.910 |
ahn | +4.870 |
Adams | +4.507 |
Irish | +4.496 |
Neg logit contributions | |
conservancy | -9.052 |
guaranteeing | -8.354 |
artifacts | -8.296 |
illusion | -8.295 |
equality | -8.121 |
Token | com |
Token ID | 785 |
Feature activation | -4.27e-01 |
Pos logit contributions | |
equality | +1.973 |
realistically | +1.930 |
1080 | +1.915 |
conservancy | +1.892 |
completely | +1.882 |
Neg logit contributions | |
ose | -2.809 |
Bs | -2.764 |
ahn | -2.763 |
behavi | -2.742 |
swick | -2.684 |
Token | / |
Token ID | 14 |
Feature activation | -3.17e+00 |
Pos logit contributions | |
Bs | +4.054 |
ose | +4.053 |
ahn | +3.959 |
Js | +3.733 |
Irish | +3.733 |
Neg logit contributions | |
conservancy | -6.125 |
guaranteeing | -5.653 |
illusion | -5.647 |
artifacts | -5.624 |
equality | -5.578 |
Token | CS |
Token ID | 7902 |
Feature activation | -2.36e+00 |
Pos logit contributions | |
Bs | +25.991 |
Js | +24.734 |
ose | +24.471 |
zb | +24.432 |
sb | +24.193 |
Neg logit contributions | |
conservancy | -18.217 |
guaranteeing | -17.285 |
realistically | -16.243 |
disqualified | -15.921 |
condol | -15.710 |
Token | 3 |
Token ID | 18 |
Feature activation | -2.84e+00 |
Pos logit contributions | |
Bs | +22.629 |
ose | +21.591 |
Js | +21.559 |
zb | +21.302 |
ahn | +21.215 |
Neg logit contributions | |
conservancy | -16.770 |
guaranteeing | -15.718 |
realistically | -15.099 |
devastated | -14.572 |
condol | -14.473 |
Token | IV |
Token ID | 3824 |
Feature activation | -2.85e+00 |
Pos logit contributions | |
Bs | +24.745 |
Js | +23.624 |
ose | +23.324 |
zb | +23.155 |
ahn | +23.001 |
Neg logit contributions | |
conservancy | -17.913 |
guaranteeing | -16.901 |
realistically | -16.020 |
condol | -15.559 |
belie | -15.462 |
Token | IP |
Token ID | 4061 |
Feature activation | -2.86e+00 |
Pos logit contributions | |
Bs | +24.732 |
Js | +23.607 |
ose | +23.570 |
zb | +23.440 |
ahn | +23.236 |
Neg logit contributions | |
conservancy | -17.774 |
guaranteeing | -16.871 |
realistically | -16.149 |
condol | -15.535 |
disqualified | -15.533 |
Token | 9 |
Token ID | 24 |
Feature activation | -3.05e+00 |
Pos logit contributions | |
Bs | +24.769 |
ose | +23.646 |
Js | +23.634 |
zb | +23.206 |
ahn | +22.994 |
Neg logit contributions | |
conservancy | -18.024 |
guaranteeing | -16.895 |
realistically | -15.978 |
condol | -15.704 |
disqualified | -15.546 |
Token | banks |
Token ID | 6341 |
Feature activation | +1.64e-01 |
Pos logit contributions | |
guaranteeing | +6.514 |
equality | +6.412 |
realistically | +6.376 |
2022 | +6.239 |
conservancy | +6.222 |
Neg logit contributions | |
ose | -5.742 |
ahn | -5.588 |
Bs | -5.587 |
swick | -5.541 |
ッド | -5.453 |
Token | � |
Token ID | 447 |
Feature activation | +1.26e-01 |
Pos logit contributions | |
conservancy | +1.306 |
guaranteeing | +1.292 |
equality | +1.278 |
realistically | +1.266 |
illusion | +1.238 |
Neg logit contributions | |
ose | -2.473 |
Bs | -2.446 |
ahn | -2.437 |
swick | -2.383 |
Flavoring | -2.355 |
Token | � |
Token ID | 247 |
Feature activation | -2.11e-01 |
Pos logit contributions | |
conservancy | +1.991 |
equality | +1.973 |
guaranteeing | +1.967 |
realistically | +1.956 |
illusion | +1.935 |
Neg logit contributions | |
ose | -0.911 |
Bs | -0.889 |
ahn | -0.885 |
swick | -0.824 |
Js | -0.807 |
Token | use |
Token ID | 779 |
Feature activation | +3.78e-01 |
Pos logit contributions | |
ose | +1.332 |
Bs | +1.315 |
ahn | +1.280 |
swick | +1.178 |
Adams | +1.136 |
Neg logit contributions | |
conservancy | -3.573 |
equality | -3.510 |
realistically | -3.492 |
guaranteeing | -3.458 |
illusion | -3.450 |
Token | of |
Token ID | 286 |
Feature activation | +8.95e-01 |
Pos logit contributions | |
guaranteeing | +4.109 |
realistically | +4.056 |
equality | +4.047 |
conservancy | +4.039 |
illusion | +3.958 |
Neg logit contributions | |
ose | -4.404 |
Bs | -4.390 |
ahn | -4.358 |
swick | -4.281 |
ッド | -4.213 |
Token | faulty |
Token ID | 30954 |
Feature activation | +1.01e+00 |
Pos logit contributions | |
guaranteeing | +9.508 |
realistically | +9.156 |
equality | +9.047 |
2022 | +8.965 |
mbudsman | +8.820 |
Neg logit contributions | |
Flavoring | -9.681 |
ose | -9.616 |
Bs | -9.581 |
swick | -9.475 |
ahn | -9.382 |
Token | foreclosure |
Token ID | 41400 |
Feature activation | +4.49e-01 |
Pos logit contributions | |
guaranteeing | +10.492 |
realistically | +9.897 |
2022 | +9.768 |
WARN | +9.498 |
minimum | +9.441 |
Neg logit contributions | |
ッド | -11.021 |
swick | -10.933 |
Bs | -10.838 |
Flavoring | -10.769 |
ose | -10.696 |
Token | documents |
Token ID | 4963 |
Feature activation | +4.09e-01 |
Pos logit contributions | |
guaranteeing | +4.896 |
realistically | +4.772 |
equality | +4.679 |
conservancy | +4.647 |
illusion | +4.605 |
Neg logit contributions | |
ose | -5.256 |
ahn | -5.233 |
Bs | -5.224 |
swick | -5.106 |
ッド | -5.074 |
Token | . |
Token ID | 13 |
Feature activation | -2.68e-01 |
Pos logit contributions | |
guaranteeing | +4.468 |
realistically | +4.390 |
conservancy | +4.347 |
equality | +4.308 |
illusion | +4.196 |
Neg logit contributions | |
ose | -4.838 |
Bs | -4.775 |
ahn | -4.748 |
swick | -4.662 |
Flavoring | -4.643 |
Token | ( |
Token ID | 357 |
Feature activation | -1.77e-01 |
Pos logit contributions | |
ose | +3.089 |
Bs | +3.089 |
ahn | +3.048 |
swick | +2.932 |
Irish | +2.930 |
Neg logit contributions | |
conservancy | -3.186 |
equality | -3.023 |
illusion | -3.017 |
guaranteeing | -3.000 |
realistically | -2.995 |
Token | Bank |
Token ID | 28650 |
Feature activation | -2.55e-01 |
Pos logit contributions | |
ose | +1.723 |
Bs | +1.719 |
ahn | +1.686 |
Adams | +1.603 |
swick | +1.588 |
Neg logit contributions | |
conservancy | -2.401 |
equality | -2.325 |
guaranteeing | -2.322 |
realistically | -2.317 |
illusion | -2.293 |
Token | to |
Token ID | 284 |
Feature activation | +6.52e-01 |
Pos logit contributions | |
guaranteeing | +3.115 |
equality | +3.047 |
realistically | +3.042 |
conservancy | +3.038 |
minimum | +2.912 |
Neg logit contributions | |
ose | -3.749 |
Bs | -3.725 |
ahn | -3.705 |
swick | -3.641 |
ッド | -3.584 |
Token | achieve |
Token ID | 4620 |
Feature activation | +4.03e-01 |
Pos logit contributions | |
realistically | +8.924 |
guaranteeing | +8.731 |
2022 | +8.552 |
conservancy | +8.524 |
equality | +8.482 |
Neg logit contributions | |
swick | -5.587 |
ッド | -5.577 |
Bs | -5.476 |
ose | -5.410 |
behavi | -5.376 |
Token | its |
Token ID | 663 |
Feature activation | +1.75e-01 |
Pos logit contributions | |
guaranteeing | +4.391 |
equality | +4.350 |
realistically | +4.346 |
conservancy | +4.250 |
illusion | +4.186 |
Neg logit contributions | |
ose | -4.705 |
Bs | -4.703 |
swick | -4.682 |
ahn | -4.631 |
Flavoring | -4.582 |
Token | goal |
Token ID | 3061 |
Feature activation | +1.30e-01 |
Pos logit contributions | |
equality | +1.964 |
conservancy | +1.955 |
realistically | +1.955 |
guaranteeing | +1.955 |
illusion | +1.908 |
Neg logit contributions | |
ose | -2.053 |
Bs | -2.031 |
ahn | -2.014 |
swick | -1.963 |
ッド | -1.937 |
Token | . |
Token ID | 13 |
Feature activation | -4.70e-01 |
Pos logit contributions | |
conservancy | +1.524 |
equality | +1.512 |
guaranteeing | +1.509 |
realistically | +1.508 |
illusion | +1.467 |
Neg logit contributions | |
ose | -1.484 |
Bs | -1.466 |
ahn | -1.452 |
swick | -1.407 |
ッド | -1.381 |
Token | In |
Token ID | 554 |
Feature activation | +1.19e+00 |
Pos logit contributions | |
ose | +5.170 |
Bs | +5.071 |
ahn | +5.002 |
Irish | +4.920 |
Beginning | +4.895 |
Neg logit contributions | |
conservancy | -5.771 |
illusion | -5.449 |
equality | -5.419 |
guaranteeing | -5.411 |
artifacts | -5.384 |
Token | 1999 |
Token ID | 7358 |
Feature activation | +1.14e-01 |
Pos logit contributions | |
equality | +12.146 |
2022 | +11.824 |
guaranteeing | +11.440 |
adequ | +10.971 |
completely | +10.962 |
Neg logit contributions | |
Bs | -12.465 |
ッド | -12.406 |
swick | -12.362 |
ose | -12.146 |
ahn | -12.093 |
Token | , |
Token ID | 11 |
Feature activation | +1.73e-01 |
Pos logit contributions | |
conservancy | +1.345 |
equality | +1.331 |
guaranteeing | +1.326 |
realistically | +1.322 |
illusion | +1.295 |
Neg logit contributions | |
ose | -1.279 |
Bs | -1.268 |
ahn | -1.258 |
swick | -1.217 |
Adams | -1.193 |
Token | the |
Token ID | 262 |
Feature activation | +8.30e-01 |
Pos logit contributions | |
conservancy | +2.102 |
guaranteeing | +2.079 |
realistically | +2.072 |
equality | +2.071 |
illusion | +2.021 |
Neg logit contributions | |
ose | -1.882 |
Bs | -1.869 |
ahn | -1.854 |
swick | -1.813 |
ッド | -1.773 |
Token | country |
Token ID | 1499 |
Feature activation | +5.14e-01 |
Pos logit contributions | |
guaranteeing | +10.019 |
2022 | +9.978 |
realistically | +9.895 |
equality | +9.739 |
minimum | +9.673 |
Neg logit contributions | |
Bs | -7.753 |
swick | -7.722 |
ose | -7.603 |
ッド | -7.548 |
ahn | -7.488 |
Token | 's |
Token ID | 338 |
Feature activation | +9.31e-01 |
Pos logit contributions | |
realistically | +5.797 |
guaranteeing | +5.776 |
equality | +5.565 |
conservancy | +5.565 |
2022 | +5.543 |
Neg logit contributions | |
Bs | -5.760 |
ose | -5.711 |
ッド | -5.583 |
ahn | -5.578 |
Flavoring | -5.571 |
Token | want |
Token ID | 765 |
Feature activation | +1.89e-01 |
Pos logit contributions | |
realistically | +15.610 |
belie | +14.432 |
guaranteeing | +14.153 |
completely | +13.977 |
1080 | +13.622 |
Neg logit contributions | |
behavi | -14.885 |
Flavoring | -14.780 |
ッド | -14.733 |
swick | -14.602 |
Adams | -14.559 |
Token | to |
Token ID | 284 |
Feature activation | +3.00e-01 |
Pos logit contributions | |
conservancy | +2.031 |
equality | +2.005 |
realistically | +2.004 |
guaranteeing | +1.993 |
illusion | +1.965 |
Neg logit contributions | |
ose | -2.300 |
Bs | -2.287 |
ahn | -2.279 |
swick | -2.226 |
Adams | -2.182 |
Token | return |
Token ID | 1441 |
Feature activation | +6.41e-01 |
Pos logit contributions | |
realistically | +3.885 |
guaranteeing | +3.837 |
conservancy | +3.821 |
equality | +3.777 |
illusion | +3.764 |
Neg logit contributions | |
Bs | -2.941 |
ose | -2.934 |
ahn | -2.879 |
swick | -2.830 |
Adams | -2.814 |
Token | my |
Token ID | 616 |
Feature activation | +8.73e-02 |
Pos logit contributions | |
realistically | +6.958 |
guaranteeing | +6.793 |
equality | +6.733 |
illusion | +6.694 |
conservancy | +6.675 |
Neg logit contributions | |
ahn | -7.129 |
swick | -7.100 |
ose | -7.078 |
Bs | -7.064 |
Adams | -6.927 |
Token | blood |
Token ID | 2910 |
Feature activation | +7.71e-01 |
Pos logit contributions | |
conservancy | +0.997 |
equality | +0.983 |
guaranteeing | +0.978 |
realistically | +0.976 |
illusion | +0.969 |
Neg logit contributions | |
ose | -1.019 |
Bs | -1.011 |
ahn | -1.006 |
swick | -0.966 |
Adams | -0.955 |
Token | , |
Token ID | 11 |
Feature activation | +1.11e+00 |
Pos logit contributions | |
guaranteeing | +8.017 |
illusion | +7.962 |
realistically | +7.948 |
conservancy | +7.898 |
equality | +7.867 |
Neg logit contributions | |
Flavoring | -8.405 |
Adams | -8.329 |
ose | -8.316 |
Bs | -8.310 |
ahn | -8.254 |
Token | flesh |
Token ID | 11222 |
Feature activation | +4.47e-01 |
Pos logit contributions | |
realistically | +11.423 |
guaranteeing | +11.195 |
illusion | +11.004 |
completely | +10.721 |
1080 | +10.706 |
Neg logit contributions | |
Bs | -11.307 |
ose | -11.280 |
ahn | -11.125 |
swick | -11.097 |
Adams | -10.990 |
Token | , |
Token ID | 11 |
Feature activation | +7.47e-01 |
Pos logit contributions | |
conservancy | +4.571 |
equality | +4.536 |
realistically | +4.506 |
illusion | +4.495 |
guaranteeing | +4.478 |
Neg logit contributions | |
Bs | -5.391 |
ose | -5.376 |
ahn | -5.344 |
Adams | -5.255 |
swick | -5.152 |
Token | or |
Token ID | 393 |
Feature activation | +5.11e-01 |
Pos logit contributions | |
illusion | +6.662 |
realistically | +6.592 |
guaranteeing | +6.490 |
equality | +6.389 |
conservancy | +6.363 |
Neg logit contributions | |
Bs | -9.559 |
ose | -9.467 |
ahn | -9.376 |
Adams | -9.209 |
swick | -9.121 |
Token | soul |
Token ID | 5848 |
Feature activation | +1.16e+00 |
Pos logit contributions | |
illusion | +5.579 |
realistically | +5.539 |
guaranteeing | +5.499 |
equality | +5.474 |
conservancy | +5.431 |
Neg logit contributions | |
ose | -5.867 |
Bs | -5.855 |
ahn | -5.699 |
Adams | -5.585 |
swick | -5.565 |
Token | to |
Token ID | 284 |
Feature activation | +3.38e-01 |
Pos logit contributions | |
realistically | +10.275 |
guaranteeing | +10.211 |
completely | +10.101 |
illusion | +10.007 |
equality | +9.944 |
Neg logit contributions | |
Adams | -13.027 |
ahn | -12.734 |
Bs | -12.721 |
ose | -12.661 |
Lear | -12.648 |
Token | invest |
Token ID | 1325 |
Feature activation | +8.11e-01 |
Pos logit contributions | |
realistically | +13.398 |
guaranteeing | +12.445 |
remotely | +12.092 |
belie | +12.084 |
2022 | +12.074 |
Neg logit contributions | |
ッド | -8.853 |
behavi | -8.593 |
Bs | -8.545 |
Adams | -8.447 |
Lear | -8.384 |
Token | at |
Token ID | 379 |
Feature activation | +3.54e-02 |
Pos logit contributions | |
realistically | +8.638 |
guaranteeing | +8.211 |
2022 | +7.820 |
equality | +7.744 |
belie | +7.669 |
Neg logit contributions | |
swick | -9.268 |
ahn | -9.232 |
Bs | -9.210 |
ose | -9.184 |
Flavoring | -8.917 |
Token | a |
Token ID | 257 |
Feature activation | +7.13e-01 |
Pos logit contributions | |
conservancy | +0.412 |
equality | +0.406 |
realistically | +0.404 |
guaranteeing | +0.403 |
illusion | +0.397 |
Neg logit contributions | |
ose | -0.408 |
Bs | -0.403 |
ahn | -0.400 |
swick | -0.385 |
Adams | -0.379 |
Token | level |
Token ID | 1241 |
Feature activation | +7.61e-01 |
Pos logit contributions | |
realistically | +7.916 |
guaranteeing | +7.579 |
2022 | +7.335 |
conservancy | +7.209 |
equality | +7.188 |
Neg logit contributions | |
Bs | -8.071 |
ose | -7.889 |
swick | -7.737 |
ahn | -7.664 |
Flavoring | -7.625 |
Token | that |
Token ID | 326 |
Feature activation | +1.26e+00 |
Pos logit contributions | |
realistically | +8.397 |
guaranteeing | +8.255 |
2022 | +7.632 |
equality | +7.616 |
belie | +7.485 |
Neg logit contributions | |
swick | -8.653 |
ose | -8.580 |
Bs | -8.473 |
ahn | -8.415 |
ッド | -8.266 |
Token | could |
Token ID | 714 |
Feature activation | +1.72e+00 |
Pos logit contributions | |
realistically | +13.461 |
guaranteeing | +12.848 |
belie | +12.254 |
2022 | +12.181 |
compensated | +12.180 |
Neg logit contributions | |
ose | -12.769 |
swick | -12.688 |
ahn | -12.580 |
Bs | -12.489 |
ッド | -12.276 |
Token | bring |
Token ID | 2222 |
Feature activation | +1.01e+00 |
Pos logit contributions | |
realistically | +19.619 |
belie | +17.474 |
remotely | +16.873 |
guaranteeing | +16.698 |
adequately | +16.168 |
Neg logit contributions | |
behavi | -15.601 |
ッド | -15.599 |
Lear | -15.273 |
Adams | -14.746 |
Bs | -14.642 |
Token | quick |
Token ID | 2068 |
Feature activation | +3.57e-02 |
Pos logit contributions | |
realistically | +10.892 |
guaranteeing | +10.524 |
2022 | +10.230 |
equality | +10.127 |
belie | +9.967 |
Neg logit contributions | |
swick | -10.674 |
ahn | -10.649 |
Bs | -10.626 |
ッド | -10.336 |
ose | -10.234 |
Token | success |
Token ID | 1943 |
Feature activation | +6.71e-01 |
Pos logit contributions | |
conservancy | +0.367 |
equality | +0.363 |
realistically | +0.359 |
guaranteeing | +0.359 |
illusion | +0.352 |
Neg logit contributions | |
ose | -0.458 |
Bs | -0.454 |
ahn | -0.451 |
swick | -0.436 |
Adams | -0.430 |
Token | ; |
Token ID | 26 |
Feature activation | +3.49e-01 |
Pos logit contributions | |
realistically | +6.946 |
guaranteeing | +6.887 |
equality | +6.671 |
conservancy | +6.486 |
2022 | +6.401 |
Neg logit contributions | |
Bs | -7.871 |
ahn | -7.730 |
ose | -7.710 |
swick | -7.698 |
ッド | -7.667 |
Token | he |
Token ID | 339 |
Feature activation | +2.71e-01 |
Pos logit contributions | |
realistically | +3.841 |
guaranteeing | +3.790 |
equality | +3.729 |
conservancy | +3.703 |
illusion | +3.622 |
Neg logit contributions | |
ose | -4.144 |
Bs | -4.124 |
ahn | -4.101 |
swick | -3.925 |
ッド | -3.916 |
Token | Elsa |
Token ID | 19226 |
Feature activation | +5.07e-02 |
Pos logit contributions | |
realistically | +1.378 |
conservancy | +1.374 |
equality | +1.367 |
guaranteeing | +1.361 |
illusion | +1.328 |
Neg logit contributions | |
ose | -2.848 |
Bs | -2.820 |
ahn | -2.802 |
swick | -2.748 |
ッド | -2.718 |
Token | . |
Token ID | 13 |
Feature activation | +6.95e-01 |
Pos logit contributions | |
conservancy | +0.676 |
equality | +0.668 |
realistically | +0.663 |
guaranteeing | +0.662 |
illusion | +0.655 |
Neg logit contributions | |
ose | -0.497 |
Bs | -0.490 |
ahn | -0.486 |
swick | -0.464 |
Adams | -0.456 |
Token | So |
Token ID | 1406 |
Feature activation | -4.67e-01 |
Pos logit contributions | |
Explain | +5.676 |
equality | +5.673 |
1080 | +5.600 |
realistically | +5.586 |
2022 | +5.524 |
Neg logit contributions | |
behavi | -9.567 |
ose | -9.320 |
ahn | -9.275 |
Bs | -9.203 |
ッド | -9.184 |
Token | much |
Token ID | 881 |
Feature activation | -9.36e-02 |
Pos logit contributions | |
ose | +5.949 |
ahn | +5.829 |
Bs | +5.735 |
Irish | +5.435 |
Adams | +5.386 |
Neg logit contributions | |
conservancy | -5.040 |
equality | -4.734 |
artifacts | -4.611 |
guaranteeing | -4.582 |
1080 | -4.572 |
Token | so |
Token ID | 523 |
Feature activation | -2.58e-01 |
Pos logit contributions | |
ose | +1.038 |
Bs | +1.020 |
ahn | +1.019 |
swick | +0.967 |
Adams | +0.955 |
Neg logit contributions | |
conservancy | -1.143 |
equality | -1.118 |
guaranteeing | -1.105 |
realistically | -1.102 |
illusion | -1.095 |
Token | ." |
Token ID | 526 |
Feature activation | +1.22e+00 |
Pos logit contributions | |
ose | +2.866 |
ahn | +2.801 |
Bs | +2.790 |
swick | +2.617 |
Adams | +2.590 |
Neg logit contributions | |
conservancy | -3.175 |
equality | -3.081 |
guaranteeing | -3.032 |
illusion | -3.001 |
realistically | -2.995 |
Token |
|
Token ID | 198 |
Feature activation | +2.75e-01 |
Pos logit contributions | |
realistically | +9.498 |
Claudia | +9.279 |
SHE | +9.242 |
completely | +9.180 |
1080 | +9.160 |
Neg logit contributions | |
behavi | -14.336 |
Bs | -14.316 |
ahn | -14.040 |
ッド | -13.937 |
ose | -13.835 |
Token |
|
Token ID | 198 |
Feature activation | +1.06e+00 |
Pos logit contributions | |
equality | +2.570 |
realistically | +2.550 |
1080 | +2.509 |
guaranteeing | +2.455 |
completely | +2.450 |
Neg logit contributions | |
behavi | -3.687 |
ose | -3.667 |
ahn | -3.647 |
Bs | -3.638 |
ッド | -3.560 |
Token | I |
Token ID | 40 |
Feature activation | -4.11e-01 |
Pos logit contributions | |
1080 | +11.194 |
Seriously | +11.138 |
illusion | +10.814 |
completely | +10.770 |
equality | +10.760 |
Neg logit contributions | |
behavi | -11.899 |
ahn | -10.445 |
swick | -10.363 |
ose | -10.206 |
ッド | -10.166 |
Token | smile |
Token ID | 8212 |
Feature activation | +1.85e-01 |
Pos logit contributions | |
ose | +4.590 |
ahn | +4.429 |
Bs | +4.349 |
Js | +4.155 |
swick | +4.151 |
Neg logit contributions | |
conservancy | -5.072 |
equality | -4.920 |
artifacts | -4.757 |
illusion | -4.740 |
guaranteeing | -4.727 |
Token | . |
Token ID | 13 |
Feature activation | +1.17e+00 |
Pos logit contributions | |
realistically | +2.027 |
guaranteeing | +1.989 |
conservancy | +1.987 |
equality | +1.976 |
illusion | +1.943 |
Neg logit contributions | |
ose | -2.244 |
Bs | -2.223 |
ahn | -2.203 |
swick | -2.159 |
Adams | -2.122 |
Token | . |
Token ID | 13 |
Feature activation | -1.51e-01 |
Pos logit contributions | |
ose | +3.921 |
Bs | +3.790 |
ahn | +3.761 |
Irish | +3.605 |
swick | +3.554 |
Neg logit contributions | |
conservancy | -4.331 |
equality | -4.153 |
illusion | -4.126 |
artifacts | -4.118 |
realistically | -4.025 |
Token | � |
Token ID | 447 |
Feature activation | -1.32e-01 |
Pos logit contributions | |
ose | +1.807 |
Bs | +1.787 |
ahn | +1.769 |
swick | +1.709 |
Adams | +1.687 |
Neg logit contributions | |
conservancy | -1.715 |
equality | -1.647 |
guaranteeing | -1.637 |
realistically | -1.629 |
illusion | -1.627 |
Token | � |
Token ID | 251 |
Feature activation | +9.23e-01 |
Pos logit contributions | |
ose | +1.113 |
Bs | +1.089 |
ahn | +1.079 |
swick | +1.013 |
Adams | +0.998 |
Neg logit contributions | |
conservancy | -1.976 |
equality | -1.939 |
realistically | -1.919 |
guaranteeing | -1.918 |
illusion | -1.906 |
Token | At |
Token ID | 1629 |
Feature activation | -5.17e-01 |
Pos logit contributions | |
realistically | +9.621 |
equality | +9.565 |
1080 | +9.551 |
Explain | +9.483 |
guaranteeing | +9.459 |
Neg logit contributions | |
ose | -9.488 |
ahn | -9.411 |
Bs | -9.302 |
behavi | -9.162 |
swick | -8.842 |
Token | the |
Token ID | 262 |
Feature activation | -4.32e-01 |
Pos logit contributions | |
ose | +5.400 |
ahn | +5.159 |
Bs | +5.145 |
swick | +4.980 |
Irish | +4.980 |
Neg logit contributions | |
conservancy | -6.395 |
artifacts | -6.213 |
equality | -6.119 |
illusion | -6.074 |
guaranteeing | -6.047 |
Token | time |
Token ID | 640 |
Feature activation | +5.73e-01 |
Pos logit contributions | |
ose | +3.273 |
Bs | +3.109 |
ahn | +3.082 |
Irish | +2.966 |
swick | +2.915 |
Neg logit contributions | |
conservancy | -6.673 |
equality | -6.475 |
artifacts | -6.471 |
guaranteeing | -6.403 |
illusion | -6.379 |
Token | , |
Token ID | 11 |
Feature activation | +1.06e-01 |
Pos logit contributions | |
guaranteeing | +5.819 |
realistically | +5.818 |
equality | +5.754 |
conservancy | +5.709 |
illusion | +5.523 |
Neg logit contributions | |
ose | -6.827 |
Bs | -6.755 |
ahn | -6.744 |
swick | -6.545 |
Js | -6.451 |
Token | he |
Token ID | 339 |
Feature activation | +1.76e-01 |
Pos logit contributions | |
conservancy | +0.975 |
equality | +0.967 |
realistically | +0.962 |
guaranteeing | +0.958 |
illusion | +0.935 |
Neg logit contributions | |
ose | -1.471 |
Bs | -1.452 |
ahn | -1.450 |
swick | -1.401 |
Adams | -1.378 |
Token | said |
Token ID | 531 |
Feature activation | +1.09e-01 |
Pos logit contributions | |
realistically | +1.648 |
conservancy | +1.637 |
guaranteeing | +1.630 |
equality | +1.620 |
illusion | +1.577 |
Neg logit contributions | |
ose | -2.405 |
Bs | -2.386 |
ahn | -2.361 |
swick | -2.293 |
Adams | -2.267 |
Token | the |
Token ID | 262 |
Feature activation | -3.52e-01 |
Pos logit contributions | |
conservancy | +1.289 |
equality | +1.284 |
realistically | +1.278 |
guaranteeing | +1.275 |
illusion | +1.246 |
Neg logit contributions | |
ose | -1.221 |
Bs | -1.204 |
ahn | -1.201 |
swick | -1.150 |
Adams | -1.131 |
Token | coalition |
Token ID | 9906 |
Feature activation | +5.91e-01 |
Pos logit contributions | |
ose | +3.866 |
Bs | +3.794 |
ahn | +3.779 |
Irish | +3.653 |
swick | +3.584 |
Neg logit contributions | |
conservancy | -4.270 |
equality | -4.067 |
guaranteeing | -4.033 |
realistically | -4.010 |
illusion | -4.010 |
Token | of |
Token ID | 286 |
Feature activation | +5.20e-02 |
Pos logit contributions | |
ose | +0.287 |
Bs | +0.283 |
ahn | +0.281 |
swick | +0.269 |
Adams | +0.265 |
Neg logit contributions | |
conservancy | -0.314 |
equality | -0.309 |
guaranteeing | -0.306 |
realistically | -0.306 |
illusion | -0.302 |
Token | State |
Token ID | 1812 |
Feature activation | +3.65e-01 |
Pos logit contributions | |
conservancy | +0.739 |
equality | +0.736 |
guaranteeing | +0.727 |
realistically | +0.727 |
illusion | +0.718 |
Neg logit contributions | |
ose | -0.463 |
Bs | -0.457 |
ahn | -0.454 |
swick | -0.427 |
ッド | -0.422 |
Token | Made |
Token ID | 14446 |
Feature activation | +1.90e-01 |
Pos logit contributions | |
guaranteeing | +4.176 |
realistically | +4.176 |
equality | +4.169 |
conservancy | +4.099 |
completely | +3.995 |
Neg logit contributions | |
Bs | -4.027 |
ose | -4.007 |
ahn | -3.997 |
ッド | -3.907 |
swick | -3.840 |
Token | le |
Token ID | 293 |
Feature activation | +1.30e-01 |
Pos logit contributions | |
conservancy | +2.670 |
equality | +2.661 |
guaranteeing | +2.639 |
realistically | +2.637 |
illusion | +2.634 |
Neg logit contributions | |
ose | -1.648 |
Bs | -1.638 |
ahn | -1.592 |
swick | -1.539 |
ッド | -1.538 |
Token | ine |
Token ID | 500 |
Feature activation | -7.14e-02 |
Pos logit contributions | |
equality | +1.540 |
conservancy | +1.524 |
guaranteeing | +1.508 |
realistically | +1.501 |
illusion | +1.484 |
Neg logit contributions | |
ose | -1.443 |
Bs | -1.439 |
ahn | -1.417 |
swick | -1.391 |
ッド | -1.365 |
Token | Al |
Token ID | 978 |
Feature activation | -1.61e-01 |
Pos logit contributions | |
ose | +0.819 |
Bs | +0.806 |
ahn | +0.802 |
swick | +0.767 |
Adams | +0.757 |
Neg logit contributions | |
conservancy | -0.842 |
equality | -0.826 |
realistically | -0.817 |
guaranteeing | -0.816 |
illusion | -0.808 |
Token | bright |
Token ID | 29199 |
Feature activation | +1.46e-01 |
Pos logit contributions | |
ose | +1.616 |
ahn | +1.577 |
Bs | +1.575 |
swick | +1.492 |
Adams | +1.475 |
Neg logit contributions | |
conservancy | -2.169 |
equality | -2.108 |
realistically | -2.095 |
guaranteeing | -2.093 |
illusion | -2.068 |
Token | about |
Token ID | 546 |
Feature activation | +6.74e-01 |
Pos logit contributions | |
conservancy | +1.598 |
realistically | +1.588 |
equality | +1.585 |
guaranteeing | +1.583 |
illusion | +1.533 |
Neg logit contributions | |
ose | -1.753 |
Bs | -1.736 |
ahn | -1.713 |
swick | -1.670 |
ッド | -1.642 |
Token | the |
Token ID | 262 |
Feature activation | +4.27e-01 |
Pos logit contributions | |
guaranteeing | +7.678 |
realistically | +7.582 |
equality | +7.440 |
2022 | +7.156 |
belie | +7.060 |
Neg logit contributions | |
ahn | -7.316 |
Bs | -7.307 |
ose | -7.259 |
swick | -7.155 |
ッド | -7.108 |
Token | devastating |
Token ID | 14101 |
Feature activation | +4.49e-01 |
Pos logit contributions | |
realistically | +5.352 |
guaranteeing | +5.333 |
equality | +5.317 |
conservancy | +5.183 |
2022 | +5.164 |
Neg logit contributions | |
ose | -4.291 |
Bs | -4.278 |
ahn | -4.257 |
ッド | -4.066 |
swick | -4.020 |
Token | sanctions |
Token ID | 9388 |
Feature activation | -5.41e-01 |
Pos logit contributions | |
realistically | +4.898 |
equality | +4.882 |
guaranteeing | +4.819 |
2022 | +4.695 |
illusion | +4.694 |
Neg logit contributions | |
Bs | -5.237 |
ahn | -5.182 |
ose | -5.158 |
swick | -5.015 |
ッド | -4.967 |
Token | he |
Token ID | 339 |
Feature activation | +5.54e-01 |
Pos logit contributions | |
realistically | +2.997 |
conservancy | +2.994 |
guaranteeing | +2.991 |
equality | +2.977 |
illusion | +2.861 |
Neg logit contributions | |
ose | -3.327 |
Bs | -3.289 |
ahn | -3.272 |
swick | -3.166 |
ッド | -3.145 |
Token | found |
Token ID | 1043 |
Feature activation | +1.65e-01 |
Pos logit contributions | |
realistically | +5.639 |
guaranteeing | +5.420 |
equality | +5.225 |
conservancy | +5.197 |
completely | +5.101 |
Neg logit contributions | |
Bs | -6.853 |
swick | -6.766 |
ose | -6.739 |
ッド | -6.716 |
ahn | -6.666 |
Token | it |
Token ID | 340 |
Feature activation | +5.80e-01 |
Pos logit contributions | |
conservancy | +1.961 |
equality | +1.952 |
guaranteeing | +1.940 |
realistically | +1.933 |
illusion | +1.891 |
Neg logit contributions | |
ose | -1.831 |
Bs | -1.811 |
ahn | -1.793 |
swick | -1.733 |
ッド | -1.697 |
Token | hard |
Token ID | 1327 |
Feature activation | -3.06e-01 |
Pos logit contributions | |
realistically | +6.020 |
guaranteeing | +5.936 |
equality | +5.582 |
completely | +5.571 |
illusion | +5.473 |
Neg logit contributions | |
Flavoring | -6.984 |
Bs | -6.973 |
ahn | -6.925 |
ose | -6.904 |
swick | -6.827 |
Token | to |
Token ID | 284 |
Feature activation | +6.30e-01 |
Pos logit contributions | |
ose | +3.490 |
Bs | +3.372 |
ahn | +3.371 |
swick | +3.199 |
Irish | +3.170 |
Neg logit contributions | |
conservancy | -3.644 |
equality | -3.537 |
illusion | -3.493 |
guaranteeing | -3.447 |
artifacts | -3.442 |
Token | believe |
Token ID | 1975 |
Feature activation | +3.18e-01 |
Pos logit contributions | |
realistically | +7.335 |
guaranteeing | +6.951 |
belie | +6.692 |
equality | +6.609 |
conservancy | +6.554 |
Neg logit contributions | |
Bs | -7.159 |
ッド | -7.153 |
swick | -6.988 |
ose | -6.933 |
ahn | -6.929 |
Token | that |
Token ID | 326 |
Feature activation | +4.37e-01 |
Pos logit contributions | |
realistically | +3.539 |
guaranteeing | +3.524 |
equality | +3.510 |
conservancy | +3.497 |
illusion | +3.384 |
Neg logit contributions | |
ose | -3.728 |
Bs | -3.703 |
ahn | -3.677 |
swick | -3.556 |
ッド | -3.519 |
Token | Jack |
Token ID | 3619 |
Feature activation | -5.76e-02 |
Pos logit contributions | |
guaranteeing | +4.893 |
realistically | +4.845 |
equality | +4.746 |
conservancy | +4.738 |
illusion | +4.582 |
Neg logit contributions | |
ose | -5.061 |
Bs | -5.054 |
ahn | -4.975 |
swick | -4.871 |
ッド | -4.831 |
Token | now |
Token ID | 2197 |
Feature activation | -8.22e-02 |
Pos logit contributions | |
ose | +0.590 |
Bs | +0.580 |
ahn | +0.576 |
swick | +0.549 |
Adams | +0.540 |
Neg logit contributions | |
conservancy | -0.750 |
equality | -0.738 |
guaranteeing | -0.731 |
realistically | -0.730 |
illusion | -0.723 |
Token | and |
Token ID | 290 |
Feature activation | +1.69e-01 |
Pos logit contributions | |
ose | +1.019 |
Bs | +1.003 |
ahn | +0.999 |
swick | +0.958 |
Adams | +0.947 |
Neg logit contributions | |
conservancy | -0.894 |
equality | -0.877 |
guaranteeing | -0.865 |
realistically | -0.864 |
illusion | -0.854 |
Token | his |
Token ID | 465 |
Feature activation | +2.07e-03 |
Pos logit contributions | |
conservancy | +2.048 |
equality | +2.011 |
guaranteeing | +2.000 |
realistically | +1.998 |
illusion | +1.949 |
Neg logit contributions | |
ose | -1.853 |
Bs | -1.839 |
ahn | -1.824 |
swick | -1.747 |
ッド | -1.731 |
Token | saying |
Token ID | 2282 |
Feature activation | +1.14e-01 |
Pos logit contributions | |
ose | +2.670 |
Bs | +2.620 |
ahn | +2.616 |
swick | +2.521 |
Adams | +2.497 |
Neg logit contributions | |
conservancy | -1.980 |
equality | -1.892 |
illusion | -1.846 |
realistically | -1.840 |
guaranteeing | -1.829 |
Token | that |
Token ID | 326 |
Feature activation | +1.02e-01 |
Pos logit contributions | |
conservancy | +1.340 |
equality | +1.338 |
realistically | +1.325 |
guaranteeing | +1.323 |
illusion | +1.299 |
Neg logit contributions | |
ose | -1.286 |
Bs | -1.274 |
ahn | -1.268 |
swick | -1.214 |
Adams | -1.192 |
Token | Bill |
Token ID | 3941 |
Feature activation | +4.98e-01 |
Pos logit contributions | |
conservancy | +1.204 |
equality | +1.198 |
guaranteeing | +1.191 |
realistically | +1.189 |
illusion | +1.165 |
Neg logit contributions | |
ose | -1.146 |
Bs | -1.133 |
ahn | -1.130 |
swick | -1.084 |
Adams | -1.062 |
Token | Clinton |
Token ID | 2605 |
Feature activation | +4.53e-02 |
Pos logit contributions | |
realistically | +6.447 |
guaranteeing | +6.424 |
equality | +6.399 |
2022 | +6.210 |
illusion | +6.182 |
Neg logit contributions | |
ahn | -4.707 |
ose | -4.554 |
Bs | -4.553 |
Flavoring | -4.451 |
swick | -4.416 |
Token | raped |
Token ID | 16110 |
Feature activation | +1.37e-01 |
Pos logit contributions | |
conservancy | +0.488 |
equality | +0.481 |
realistically | +0.479 |
guaranteeing | +0.478 |
illusion | +0.469 |
Neg logit contributions | |
ose | -0.560 |
Bs | -0.554 |
ahn | -0.552 |
swick | -0.531 |
Adams | -0.523 |
Token | a |
Token ID | 257 |
Feature activation | -7.05e-02 |
Pos logit contributions | |
conservancy | +1.507 |
equality | +1.504 |
realistically | +1.488 |
guaranteeing | +1.486 |
illusion | +1.452 |
Neg logit contributions | |
ose | -1.634 |
Bs | -1.633 |
ahn | -1.625 |
swick | -1.573 |
Js | -1.528 |
Token | 13 |
Token ID | 1511 |
Feature activation | -3.37e-01 |
Pos logit contributions | |
ose | +0.764 |
Bs | +0.749 |
ahn | +0.745 |
swick | +0.710 |
Adams | +0.702 |
Neg logit contributions | |
conservancy | -0.878 |
equality | -0.862 |
guaranteeing | -0.853 |
realistically | -0.851 |
illusion | -0.844 |
Token | year |
Token ID | 614 |
Feature activation | -9.65e-02 |
Pos logit contributions | |
ose | +3.186 |
Bs | +3.079 |
ahn | +3.029 |
Irish | +2.853 |
ッド | +2.848 |
Neg logit contributions | |
conservancy | -4.686 |
equality | -4.625 |
illusion | -4.535 |
realistically | -4.515 |
guaranteeing | -4.507 |
Token | old |
Token ID | 1468 |
Feature activation | +6.84e-01 |
Pos logit contributions | |
ose | +0.769 |
Bs | +0.746 |
ahn | +0.737 |
swick | +0.692 |
Adams | +0.680 |
Neg logit contributions | |
conservancy | -1.479 |
equality | -1.461 |
guaranteeing | -1.445 |
realistically | -1.445 |
illusion | -1.434 |
Token | girl |
Token ID | 2576 |
Feature activation | +9.67e-01 |
Pos logit contributions | |
realistically | +7.100 |
guaranteeing | +7.084 |
equality | +6.918 |
2022 | +6.741 |
belie | +6.685 |
Neg logit contributions | |
ahn | -7.932 |
Bs | -7.930 |
Flavoring | -7.725 |
swick | -7.694 |
ose | -7.660 |
Token | . |
Token ID | 13 |
Feature activation | -5.10e-01 |
Pos logit contributions | |
guaranteeing | +9.572 |
realistically | +9.564 |
equality | +9.017 |
2022 | +8.975 |
belie | +8.799 |
Neg logit contributions | |
Bs | -11.031 |
ahn | -10.681 |
Flavoring | -10.678 |
ose | -10.601 |
Lear | -10.599 |
Token | w |
Token ID | 266 |
Feature activation | +2.18e-01 |
Pos logit contributions | |
conservancy | +0.349 |
equality | +0.346 |
guaranteeing | +0.342 |
realistically | +0.342 |
illusion | +0.337 |
Neg logit contributions | |
ose | -0.342 |
Bs | -0.337 |
ahn | -0.336 |
swick | -0.322 |
Adams | -0.317 |
Token | ickets |
Token ID | 15970 |
Feature activation | -3.48e-01 |
Pos logit contributions | |
conservancy | +2.532 |
equality | +2.530 |
illusion | +2.493 |
guaranteeing | +2.481 |
realistically | +2.470 |
Neg logit contributions | |
ose | -2.411 |
Bs | -2.375 |
ahn | -2.357 |
swick | -2.316 |
ッド | -2.282 |
Token | that |
Token ID | 326 |
Feature activation | -4.87e-01 |
Pos logit contributions | |
ose | +4.070 |
ahn | +4.008 |
Bs | +3.978 |
swick | +3.785 |
Irish | +3.784 |
Neg logit contributions | |
conservancy | -4.024 |
equality | -3.842 |
illusion | -3.778 |
1080 | -3.720 |
realistically | -3.710 |
Token | we |
Token ID | 356 |
Feature activation | -6.25e-01 |
Pos logit contributions | |
ose | +5.658 |
ahn | +5.526 |
Bs | +5.476 |
Irish | +5.302 |
swick | +5.214 |
Neg logit contributions | |
conservancy | -5.548 |
equality | -5.282 |
guaranteeing | -5.192 |
illusion | -5.180 |
1080 | -5.164 |
Token | hope |
Token ID | 2911 |
Feature activation | -3.54e-01 |
Pos logit contributions | |
ose | +6.130 |
ahn | +5.864 |
Bs | +5.723 |
Irish | +5.588 |
swick | +5.588 |
Neg logit contributions | |
conservancy | -8.205 |
equality | -7.893 |
guaranteeing | -7.687 |
artifacts | -7.676 |
illusion | -7.633 |
Token | for |
Token ID | 329 |
Feature activation | -3.59e-01 |
Pos logit contributions | |
ose | +4.138 |
Bs | +4.041 |
ahn | +4.037 |
Irish | +3.851 |
swick | +3.823 |
Neg logit contributions | |
conservancy | -4.111 |
equality | -3.940 |
illusion | -3.855 |
guaranteeing | -3.819 |
realistically | -3.800 |
Token | does |
Token ID | 857 |
Feature activation | -8.57e-01 |
Pos logit contributions | |
ose | +4.043 |
ahn | +3.978 |
Bs | +3.939 |
swick | +3.751 |
Irish | +3.733 |
Neg logit contributions | |
conservancy | -4.314 |
equality | -4.129 |
illusion | -4.064 |
guaranteeing | -4.012 |
realistically | -4.003 |
Token | not |
Token ID | 407 |
Feature activation | +6.41e-01 |
Pos logit contributions | |
ose | +7.452 |
ahn | +7.233 |
Bs | +7.009 |
Irish | +6.668 |
swick | +6.526 |
Neg logit contributions | |
conservancy | -11.661 |
equality | -11.036 |
guaranteeing | -10.895 |
mbudsman | -10.812 |
illusion | -10.749 |
Token | material |
Token ID | 2587 |
Feature activation | -3.84e-01 |
Pos logit contributions | |
realistically | +8.373 |
belie | +8.054 |
guaranteeing | +7.994 |
equality | +7.724 |
completely | +7.720 |
Neg logit contributions | |
Bs | -6.215 |
Lear | -6.138 |
ッド | -6.047 |
Adams | -5.977 |
ose | -5.975 |
Token | ise |
Token ID | 786 |
Feature activation | +7.12e-02 |
Pos logit contributions | |
ose | +3.149 |
ahn | +2.951 |
Bs | +2.857 |
Irish | +2.693 |
ッド | +2.669 |
Neg logit contributions | |
conservancy | -5.865 |
equality | -5.744 |
guaranteeing | -5.675 |
illusion | -5.626 |
realistically | -5.610 |
Token | . |
Token ID | 13 |
Feature activation | -2.48e-01 |
Pos logit contributions | |
conservancy | +0.848 |
equality | +0.841 |
realistically | +0.840 |
guaranteeing | +0.838 |
illusion | +0.820 |
Neg logit contributions | |
ose | -0.793 |
Bs | -0.785 |
ahn | -0.779 |
swick | -0.750 |
Adams | -0.737 |
Token | { |
Token ID | 1391 |
Feature activation | -7.92e-01 |
Pos logit contributions | |
ose | +5.155 |
ahn | +4.693 |
Bs | +4.619 |
Js | +4.222 |
Flavoring | +4.198 |
Neg logit contributions | |
conservancy | -10.224 |
guaranteeing | -9.753 |
equality | -9.673 |
realistically | -9.656 |
artifacts | -9.655 |
Token | $ |
Token ID | 720 |
Feature activation | +2.94e-01 |
Pos logit contributions | |
ose | +8.104 |
Bs | +7.495 |
ahn | +7.357 |
Irish | +7.162 |
Scotch | +7.054 |
Neg logit contributions | |
conservancy | -9.928 |
illusion | -9.270 |
artifacts | -9.237 |
equality | -9.230 |
guaranteeing | -9.184 |
Token | font |
Token ID | 10331 |
Feature activation | -4.58e-01 |
Pos logit contributions | |
equality | +3.636 |
conservancy | +3.577 |
illusion | +3.568 |
realistically | +3.530 |
1080 | +3.507 |
Neg logit contributions | |
ose | -3.043 |
ahn | -3.030 |
Bs | -2.959 |
swick | -2.934 |
ッド | -2.889 |
Token | 2 |
Token ID | 17 |
Feature activation | -4.95e-01 |
Pos logit contributions | |
ose | +3.679 |
Bs | +3.620 |
ahn | +3.423 |
Js | +3.215 |
swick | +3.201 |
Neg logit contributions | |
conservancy | -6.942 |
guaranteeing | -6.770 |
equality | -6.719 |
realistically | -6.697 |
illusion | -6.555 |
Token | = |
Token ID | 796 |
Feature activation | -9.12e-01 |
Pos logit contributions | |
ose | +5.161 |
Bs | +5.093 |
ahn | +4.920 |
Js | +4.695 |
swick | +4.646 |
Neg logit contributions | |
conservancy | -6.587 |
guaranteeing | -6.174 |
equality | -6.045 |
illusion | -5.999 |
artifacts | -5.998 |
Token | New |
Token ID | 968 |
Feature activation | -9.39e-01 |
Pos logit contributions | |
ose | +6.100 |
Flavoring | +5.505 |
Irish | +5.424 |
ahn | +5.414 |
Bs | +5.289 |
Neg logit contributions | |
conservancy | -14.134 |
artifacts | -13.544 |
guaranteeing | -13.541 |
illusion | -13.476 |
realistically | -13.172 |
Token | - |
Token ID | 12 |
Feature activation | -1.20e+00 |
Pos logit contributions | |
ose | +6.662 |
Bs | +6.501 |
ahn | +6.252 |
Irish | +5.992 |
Js | +5.932 |
Neg logit contributions | |
conservancy | -15.349 |
guaranteeing | -13.944 |
artifacts | -13.681 |
belie | -13.519 |
illusion | -13.406 |
Token | Object |
Token ID | 10267 |
Feature activation | -2.03e-01 |
Pos logit contributions | |
Bs | +7.243 |
Js | +6.495 |
ose | +6.416 |
Lear | +6.352 |
Fle | +6.304 |
Neg logit contributions | |
conservancy | -20.946 |
guaranteeing | -19.114 |
belie | -18.751 |
devastated | -18.664 |
condol | -18.546 |
Token | System |
Token ID | 4482 |
Feature activation | -3.44e-01 |
Pos logit contributions | |
ose | +2.216 |
Bs | +2.150 |
ahn | +2.146 |
swick | +2.058 |
Irish | +2.025 |
Neg logit contributions | |
conservancy | -2.531 |
equality | -2.459 |
guaranteeing | -2.428 |
illusion | -2.423 |
realistically | -2.416 |
Token | . |
Token ID | 13 |
Feature activation | -7.36e-01 |
Pos logit contributions | |
ose | +3.299 |
Bs | +3.242 |
ahn | +3.206 |
Js | +3.022 |
Adams | +3.005 |
Neg logit contributions | |
conservancy | -4.876 |
guaranteeing | -4.619 |
illusion | -4.526 |
equality | -4.518 |
artifacts | -4.502 |
Token | Draw |
Token ID | 25302 |
Feature activation | -5.59e-01 |
Pos logit contributions | |
Bs | +2.979 |
ose | +2.842 |
Js | +2.538 |
Adams | +2.511 |
ahn | +2.443 |
Neg logit contributions | |
conservancy | -14.343 |
guaranteeing | -13.692 |
realistically | -13.126 |
condol | -13.117 |
belie | -13.104 |
Token | flag |
Token ID | 6056 |
Feature activation | +2.93e-01 |
Pos logit contributions | |
ose | +1.750 |
Bs | +1.708 |
ahn | +1.698 |
swick | +1.621 |
Irish | +1.606 |
Neg logit contributions | |
conservancy | -2.063 |
equality | -2.027 |
guaranteeing | -1.998 |
realistically | -1.987 |
illusion | -1.974 |
Token | draped |
Token ID | 38425 |
Feature activation | +7.35e-01 |
Pos logit contributions | |
realistically | +2.987 |
guaranteeing | +2.957 |
conservancy | +2.930 |
equality | +2.913 |
illusion | +2.817 |
Neg logit contributions | |
ahn | -3.728 |
ose | -3.721 |
Bs | -3.698 |
Flavoring | -3.585 |
swick | -3.558 |
Token | over |
Token ID | 625 |
Feature activation | -2.02e-01 |
Pos logit contributions | |
realistically | +7.145 |
guaranteeing | +6.891 |
equality | +6.678 |
conservancy | +6.426 |
belie | +6.331 |
Neg logit contributions | |
Flavoring | -9.501 |
ahn | -9.107 |
Bs | -9.087 |
ose | -9.054 |
swick | -8.995 |
Token | his |
Token ID | 465 |
Feature activation | -5.50e-01 |
Pos logit contributions | |
ose | +2.392 |
Bs | +2.350 |
ahn | +2.334 |
swick | +2.228 |
Adams | +2.217 |
Neg logit contributions | |
conservancy | -2.297 |
equality | -2.228 |
guaranteeing | -2.205 |
illusion | -2.204 |
realistically | -2.203 |
Token | shoulder |
Token ID | 8163 |
Feature activation | +5.67e-01 |
Pos logit contributions | |
ose | +4.627 |
Irish | +4.442 |
Bs | +4.382 |
ahn | +4.354 |
Adams | +4.159 |
Neg logit contributions | |
conservancy | -8.001 |
equality | -7.747 |
artifacts | -7.667 |
illusion | -7.664 |
minimum | -7.557 |
Token | had |
Token ID | 550 |
Feature activation | -6.55e-01 |
Pos logit contributions | |
realistically | +5.662 |
guaranteeing | +5.572 |
equality | +5.320 |
conservancy | +5.264 |
completely | +5.220 |
Neg logit contributions | |
Bs | -7.029 |
ose | -7.014 |
ahn | -7.009 |
Flavoring | -6.852 |
swick | -6.814 |
Token | ba |
Token ID | 26605 |
Feature activation | -6.25e-02 |
Pos logit contributions | |
ose | +8.485 |
ahn | +8.340 |
Irish | +8.327 |
Bs | +8.283 |
ッド | +7.936 |
Neg logit contributions | |
conservancy | -6.311 |
equality | -5.866 |
illusion | -5.837 |
artifacts | -5.736 |
mbudsman | -5.702 |
Token | ited |
Token ID | 863 |
Feature activation | -7.02e-01 |
Pos logit contributions | |
ose | +0.801 |
Bs | +0.790 |
ahn | +0.788 |
swick | +0.754 |
Adams | +0.745 |
Neg logit contributions | |
conservancy | -0.654 |
equality | -0.641 |
guaranteeing | -0.633 |
realistically | -0.632 |
illusion | -0.623 |
Token | the |
Token ID | 262 |
Feature activation | -5.33e-01 |
Pos logit contributions | |
ose | +7.170 |
Bs | +6.902 |
Irish | +6.883 |
ahn | +6.829 |
ッド | +6.612 |
Neg logit contributions | |
conservancy | -8.546 |
illusion | -8.293 |
artifacts | -8.126 |
equality | -8.048 |
guaranteeing | -8.024 |
Token | crowd |
Token ID | 4315 |
Feature activation | +3.12e-01 |
Pos logit contributions | |
ose | +6.418 |
Bs | +6.233 |
Irish | +6.220 |
ahn | +6.191 |
ッド | +5.888 |
Neg logit contributions | |
conservancy | -5.804 |
illusion | -5.473 |
guaranteeing | -5.421 |
equality | -5.409 |
artifacts | -5.362 |
Token | . |
Token ID | 13 |
Feature activation | +5.63e-02 |
Pos logit contributions | |
realistically | +3.501 |
guaranteeing | +3.496 |
conservancy | +3.467 |
equality | +3.462 |
illusion | +3.374 |
Neg logit contributions | |
ose | -3.573 |
Bs | -3.566 |
ahn | -3.551 |
swick | -3.423 |
Js | -3.358 |
Token | every |
Token ID | 790 |
Feature activation | -4.00e-01 |
Pos logit contributions | |
ose | +3.930 |
ahn | +3.786 |
Bs | +3.743 |
swick | +3.531 |
Irish | +3.518 |
Neg logit contributions | |
conservancy | -5.760 |
equality | -5.635 |
guaranteeing | -5.586 |
illusion | -5.577 |
realistically | -5.554 |
Token | stop |
Token ID | 2245 |
Feature activation | +5.90e-01 |
Pos logit contributions | |
ose | +4.340 |
ahn | +4.282 |
Bs | +4.160 |
Irish | +4.084 |
Adams | +4.000 |
Neg logit contributions | |
conservancy | -4.882 |
equality | -4.815 |
illusion | -4.663 |
realistically | -4.663 |
guaranteeing | -4.663 |
Token | . |
Token ID | 13 |
Feature activation | -4.15e-01 |
Pos logit contributions | |
guaranteeing | +6.368 |
conservancy | +6.354 |
realistically | +6.316 |
equality | +6.172 |
illusion | +6.152 |
Neg logit contributions | |
Bs | -6.653 |
ose | -6.622 |
ahn | -6.352 |
swick | -6.282 |
Flavoring | -6.275 |
Token | This |
Token ID | 770 |
Feature activation | +9.90e-02 |
Pos logit contributions | |
ose | +4.980 |
Bs | +4.924 |
ahn | +4.869 |
Irish | +4.795 |
swick | +4.753 |
Neg logit contributions | |
conservancy | -4.706 |
illusion | -4.473 |
equality | -4.447 |
guaranteeing | -4.424 |
realistically | -4.368 |
Token | necess |
Token ID | 2418 |
Feature activation | +1.03e-01 |
Pos logit contributions | |
conservancy | +1.180 |
equality | +1.157 |
realistically | +1.155 |
guaranteeing | +1.149 |
illusion | +1.135 |
Neg logit contributions | |
ose | -1.113 |
Bs | -1.102 |
ahn | -1.093 |
swick | -1.051 |
ッド | -1.034 |
Token | itated |
Token ID | 13939 |
Feature activation | -6.48e-01 |
Pos logit contributions | |
conservancy | +1.290 |
equality | +1.272 |
realistically | +1.257 |
guaranteeing | +1.256 |
illusion | +1.251 |
Neg logit contributions | |
ose | -1.077 |
Bs | -1.062 |
ahn | -1.055 |
swick | -1.015 |
Adams | -0.995 |
Token | a |
Token ID | 257 |
Feature activation | -3.64e-02 |
Pos logit contributions | |
ose | +7.339 |
Bs | +7.197 |
ahn | +7.192 |
Irish | +7.034 |
Adams | +6.849 |
Neg logit contributions | |
conservancy | -7.133 |
equality | -6.973 |
illusion | -6.970 |
mbudsman | -6.776 |
artifacts | -6.699 |
Token | � |
Token ID | 564 |
Feature activation | -1.67e-02 |
Pos logit contributions | |
ose | +0.390 |
Bs | +0.384 |
ahn | +0.382 |
swick | +0.364 |
Adams | +0.359 |
Neg logit contributions | |
conservancy | -0.457 |
equality | -0.450 |
guaranteeing | -0.445 |
realistically | -0.445 |
illusion | -0.440 |
Token | � |
Token ID | 250 |
Feature activation | -1.13e-01 |
Pos logit contributions | |
ose | +0.141 |
Bs | +0.138 |
ahn | +0.137 |
swick | +0.129 |
Adams | +0.126 |
Neg logit contributions | |
conservancy | -0.246 |
equality | -0.243 |
guaranteeing | -0.241 |
realistically | -0.241 |
illusion | -0.239 |
Token | design |
Token ID | 26124 |
Feature activation | -2.20e-01 |
Pos logit contributions | |
ose | +1.277 |
Bs | +1.252 |
ahn | +1.248 |
swick | +1.185 |
Adams | +1.178 |
Neg logit contributions | |
conservancy | -1.351 |
equality | -1.326 |
realistically | -1.320 |
guaranteeing | -1.319 |
illusion | -1.294 |
Token | ated |
Token ID | 515 |
Feature activation | -9.34e-01 |
Pos logit contributions | |
ose | +2.497 |
Bs | +2.450 |
ahn | +2.449 |
swick | +2.310 |
Irish | +2.291 |
Neg logit contributions | |
conservancy | -2.623 |
equality | -2.581 |
realistically | -2.529 |
guaranteeing | -2.524 |
illusion | -2.506 |
Token | . |
Token ID | 13 |
Feature activation | -3.95e-03 |
Pos logit contributions | |
ose | +0.977 |
Bs | +0.960 |
ahn | +0.956 |
swick | +0.912 |
Adams | +0.903 |
Neg logit contributions | |
conservancy | -1.028 |
equality | -1.006 |
guaranteeing | -0.994 |
realistically | -0.993 |
illusion | -0.986 |
Token |
|
Token ID | 198 |
Feature activation | -8.08e-01 |
Pos logit contributions | |
ose | +0.049 |
Bs | +0.048 |
ahn | +0.048 |
swick | +0.046 |
Adams | +0.045 |
Neg logit contributions | |
conservancy | -0.043 |
equality | -0.042 |
realistically | -0.042 |
guaranteeing | -0.042 |
illusion | -0.041 |
Token |
|
Token ID | 198 |
Feature activation | -1.69e-01 |
Pos logit contributions | |
ose | +6.991 |
Bs | +6.915 |
Adams | +6.612 |
ahn | +6.558 |
Irish | +6.496 |
Neg logit contributions | |
conservancy | -11.939 |
guaranteeing | -10.718 |
artifacts | -10.611 |
illusion | -10.527 |
condol | -10.463 |
Token | Okay |
Token ID | 16454 |
Feature activation | -4.58e-01 |
Pos logit contributions | |
ose | +1.759 |
Bs | +1.745 |
ahn | +1.714 |
Adams | +1.649 |
swick | +1.621 |
Neg logit contributions | |
conservancy | -2.187 |
guaranteeing | -2.125 |
equality | -2.118 |
realistically | -2.112 |
illusion | -2.075 |
Token | , |
Token ID | 11 |
Feature activation | -3.54e-01 |
Pos logit contributions | |
ose | +4.711 |
ahn | +4.568 |
Bs | +4.557 |
Irish | +4.278 |
Adams | +4.231 |
Neg logit contributions | |
conservancy | -5.898 |
equality | -5.689 |
illusion | -5.658 |
artifacts | -5.587 |
guaranteeing | -5.550 |
Token | it |
Token ID | 340 |
Feature activation | -5.12e-01 |
Pos logit contributions | |
ose | +4.260 |
Bs | +4.186 |
ahn | +4.134 |
Irish | +4.007 |
Adams | +3.935 |
Neg logit contributions | |
conservancy | -3.979 |
equality | -3.807 |
illusion | -3.746 |
artifacts | -3.713 |
1080 | -3.674 |
Token | � |
Token ID | 447 |
Feature activation | +2.29e-01 |
Pos logit contributions | |
ose | +5.387 |
ahn | +5.344 |
Bs | +5.249 |
Irish | +5.058 |
swick | +4.938 |
Neg logit contributions | |
conservancy | -6.508 |
equality | -6.172 |
illusion | -6.048 |
mbudsman | -5.996 |
artifacts | -5.956 |
Token | � |
Token ID | 247 |
Feature activation | -1.12e-01 |
Pos logit contributions | |
conservancy | +3.301 |
equality | +3.289 |
guaranteeing | +3.280 |
realistically | +3.272 |
illusion | +3.211 |
Neg logit contributions | |
ose | -1.926 |
ahn | -1.899 |
Bs | -1.890 |
swick | -1.823 |
ッド | -1.751 |
Token | s |
Token ID | 82 |
Feature activation | -2.44e-01 |
Pos logit contributions | |
ose | +0.696 |
Bs | +0.678 |
ahn | +0.669 |
swick | +0.613 |
Adams | +0.594 |
Neg logit contributions | |
conservancy | -1.911 |
equality | -1.884 |
guaranteeing | -1.866 |
realistically | -1.861 |
illusion | -1.852 |
Token | a |
Token ID | 257 |
Feature activation | -3.87e-01 |
Pos logit contributions | |
ose | +2.881 |
ahn | +2.813 |
Bs | +2.808 |
Irish | +2.668 |
swick | +2.659 |
Neg logit contributions | |
conservancy | -2.833 |
equality | -2.738 |
illusion | -2.662 |
artifacts | -2.651 |
guaranteeing | -2.650 |
Token | legal |
Token ID | 2742 |
Feature activation | -4.13e-01 |
Pos logit contributions | |
ose | +4.453 |
ahn | +4.357 |
Bs | +4.323 |
Irish | +4.131 |
Adams | +4.077 |
Neg logit contributions | |
conservancy | -4.564 |
equality | -4.419 |
artifacts | -4.316 |
illusion | -4.258 |
guaranteeing | -4.229 |
Token | generous |
Token ID | 14431 |
Feature activation | +1.65e-01 |
Pos logit contributions | |
guaranteeing | +10.660 |
realistically | +10.357 |
equality | +10.314 |
minimum | +9.708 |
2022 | +9.589 |
Neg logit contributions | |
Bs | -9.863 |
ose | -9.722 |
ッド | -9.674 |
swick | -9.673 |
ahn | -9.672 |
Token | social |
Token ID | 1919 |
Feature activation | +6.72e-01 |
Pos logit contributions | |
guaranteeing | +1.724 |
equality | +1.722 |
conservancy | +1.709 |
realistically | +1.708 |
illusion | +1.650 |
Neg logit contributions | |
ose | -2.055 |
Bs | -2.039 |
ahn | -2.034 |
swick | -1.974 |
ッド | -1.950 |
Token | spending |
Token ID | 4581 |
Feature activation | +1.04e+00 |
Pos logit contributions | |
guaranteeing | +5.949 |
equality | +5.818 |
realistically | +5.772 |
minimum | +5.384 |
guarantees | +5.312 |
Neg logit contributions | |
Bs | -9.042 |
ahn | -8.992 |
swick | -8.989 |
ose | -8.830 |
ッド | -8.815 |
Token | was |
Token ID | 373 |
Feature activation | +2.04e-02 |
Pos logit contributions | |
realistically | +10.451 |
guaranteeing | +10.444 |
devastated | +10.193 |
belie | +9.929 |
equality | +9.888 |
Neg logit contributions | |
ose | -11.295 |
Lear | -11.290 |
ahn | -11.252 |
ッド | -11.181 |
Bs | -10.922 |
Token | soon |
Token ID | 2582 |
Feature activation | -1.08e-02 |
Pos logit contributions | |
conservancy | +0.232 |
equality | +0.229 |
guaranteeing | +0.227 |
realistically | +0.227 |
illusion | +0.223 |
Neg logit contributions | |
ose | -0.240 |
Bs | -0.237 |
ahn | -0.236 |
swick | -0.226 |
Adams | -0.223 |
Token | followed |
Token ID | 3940 |
Feature activation | -9.31e-01 |
Pos logit contributions | |
ose | +0.114 |
Bs | +0.112 |
ahn | +0.111 |
swick | +0.106 |
Adams | +0.105 |
Neg logit contributions | |
conservancy | -0.136 |
equality | -0.134 |
guaranteeing | -0.133 |
realistically | -0.133 |
illusion | -0.131 |
Token | by |
Token ID | 416 |
Feature activation | +1.02e+00 |
Pos logit contributions | |
ose | +6.922 |
ahn | +6.459 |
Bs | +6.431 |
Irish | +6.319 |
swick | +6.078 |
Neg logit contributions | |
conservancy | -13.243 |
illusion | -12.874 |
artifacts | -12.668 |
minimum | -12.577 |
equality | -12.506 |
Token | a |
Token ID | 257 |
Feature activation | +5.62e-01 |
Pos logit contributions | |
guaranteeing | +10.429 |
equality | +10.381 |
realistically | +10.218 |
devastated | +9.943 |
2022 | +9.684 |
Neg logit contributions | |
Bs | -11.009 |
ahn | -10.784 |
swick | -10.726 |
ose | -10.705 |
Lear | -10.441 |
Token | lasting |
Token ID | 15727 |
Feature activation | +2.58e-01 |
Pos logit contributions | |
guaranteeing | +6.539 |
realistically | +6.535 |
equality | +6.487 |
conservancy | +6.275 |
illusion | +6.266 |
Neg logit contributions | |
Bs | -6.071 |
ose | -5.994 |
ahn | -5.868 |
swick | -5.740 |
Adams | -5.645 |
Token | economic |
Token ID | 3034 |
Feature activation | +6.70e-01 |
Pos logit contributions | |
equality | +2.869 |
guaranteeing | +2.843 |
realistically | +2.835 |
conservancy | +2.830 |
illusion | +2.801 |
Neg logit contributions | |
Bs | -3.072 |
ose | -3.057 |
ahn | -3.019 |
swick | -2.931 |
Adams | -2.859 |
Token | recovery |
Token ID | 7628 |
Feature activation | -4.70e-01 |
Pos logit contributions | |
realistically | +6.985 |
guaranteeing | +6.982 |
equality | +6.964 |
illusion | +6.777 |
devastated | +6.711 |
Neg logit contributions | |
Bs | -7.925 |
ose | -7.674 |
ahn | -7.670 |
swick | -7.504 |
Js | -7.385 |
Token | / |
Token ID | 14 |
Feature activation | -3.17e+00 |
Pos logit contributions | |
Bs | +4.054 |
ose | +4.053 |
ahn | +3.959 |
Js | +3.733 |
Irish | +3.733 |
Neg logit contributions | |
conservancy | -6.125 |
guaranteeing | -5.653 |
illusion | -5.647 |
artifacts | -5.624 |
equality | -5.578 |
Token | CS |
Token ID | 7902 |
Feature activation | -2.36e+00 |
Pos logit contributions | |
Bs | +25.991 |
Js | +24.734 |
ose | +24.471 |
zb | +24.432 |
sb | +24.193 |
Neg logit contributions | |
conservancy | -18.217 |
guaranteeing | -17.285 |
realistically | -16.243 |
disqualified | -15.921 |
condol | -15.710 |
Token | 3 |
Token ID | 18 |
Feature activation | -2.84e+00 |
Pos logit contributions | |
Bs | +22.629 |
ose | +21.591 |
Js | +21.559 |
zb | +21.302 |
ahn | +21.215 |
Neg logit contributions | |
conservancy | -16.770 |
guaranteeing | -15.718 |
realistically | -15.099 |
devastated | -14.572 |
condol | -14.473 |
Token | IV |
Token ID | 3824 |
Feature activation | -2.85e+00 |
Pos logit contributions | |
Bs | +24.745 |
Js | +23.624 |
ose | +23.324 |
zb | +23.155 |
ahn | +23.001 |
Neg logit contributions | |
conservancy | -17.913 |
guaranteeing | -16.901 |
realistically | -16.020 |
condol | -15.559 |
belie | -15.462 |
Token | IP |
Token ID | 4061 |
Feature activation | -2.86e+00 |
Pos logit contributions | |
Bs | +24.732 |
Js | +23.607 |
ose | +23.570 |
zb | +23.440 |
ahn | +23.236 |
Neg logit contributions | |
conservancy | -17.774 |
guaranteeing | -16.871 |
realistically | -16.149 |
condol | -15.535 |
disqualified | -15.533 |
Token | 9 |
Token ID | 24 |
Feature activation | -3.05e+00 |
Pos logit contributions | |
Bs | +24.769 |
ose | +23.646 |
Js | +23.634 |
zb | +23.206 |
ahn | +22.994 |
Neg logit contributions | |
conservancy | -18.024 |
guaranteeing | -16.895 |
realistically | -15.978 |
condol | -15.704 |
disqualified | -15.546 |
Token | ib |
Token ID | 571 |
Feature activation | -1.34e+00 |
Pos logit contributions | |
Bs | +25.498 |
Js | +24.102 |
zb | +23.920 |
ose | +23.746 |
sb | +23.590 |
Neg logit contributions | |
conservancy | -18.686 |
guaranteeing | -17.364 |
realistically | -16.356 |
condol | -16.289 |
disqualified | -16.104 |
Token | - |
Token ID | 532 |
Feature activation | +3.97e-01 |
Pos logit contributions | |
ose | +10.929 |
Bs | +10.820 |
ahn | +10.424 |
Irish | +10.231 |
Js | +9.721 |
Neg logit contributions | |
conservancy | -17.582 |
artifacts | -15.861 |
illusion | -15.453 |
equality | -15.414 |
guaranteeing | -15.346 |
Token | Kirk |
Token ID | 13855 |
Feature activation | +2.21e-02 |
Pos logit contributions | |
equality | +4.594 |
realistically | +4.587 |
conservancy | +4.559 |
guaranteeing | +4.552 |
illusion | +4.460 |
Neg logit contributions | |
ose | -4.320 |
Bs | -4.260 |
ahn | -4.201 |
ッド | -4.091 |
swick | -4.083 |
Token | Cousins |
Token ID | 32644 |
Feature activation | +5.97e-01 |
Pos logit contributions | |
conservancy | +0.285 |
equality | +0.282 |
realistically | +0.279 |
guaranteeing | +0.279 |
illusion | +0.276 |
Neg logit contributions | |
ose | -0.226 |
Bs | -0.223 |
ahn | -0.221 |
swick | -0.211 |
ッド | -0.207 |
Token | (@ |
Token ID | 4275 |
Feature activation | -6.20e-01 |
Pos logit contributions | |
guaranteeing | +4.488 |
realistically | +4.479 |
equality | +4.434 |
????? | +4.290 |
conservancy | +4.287 |
Neg logit contributions | |
Bs | -8.560 |
ahn | -8.472 |
ose | -8.439 |
swick | -8.351 |
ッド | -8.197 |
Token | com |
Token ID | 785 |
Feature activation | -3.58e-01 |
Pos logit contributions | |
equality | +1.188 |
conservancy | +1.168 |
realistically | +1.162 |
guaranteeing | +1.146 |
1080 | +1.141 |
Neg logit contributions | |
ose | -1.586 |
Bs | -1.561 |
ahn | -1.559 |
swick | -1.509 |
ッド | -1.499 |
Token | / |
Token ID | 14 |
Feature activation | -3.21e+00 |
Pos logit contributions | |
ose | +3.530 |
Bs | +3.523 |
ahn | +3.433 |
Adams | +3.262 |
Irish | +3.256 |
Neg logit contributions | |
conservancy | -4.988 |
guaranteeing | -4.643 |
illusion | -4.632 |
artifacts | -4.603 |
equality | -4.593 |
Token | F |
Token ID | 37 |
Feature activation | -2.34e+00 |
Pos logit contributions | |
Bs | +26.130 |
Js | +24.954 |
zb | +24.603 |
ose | +24.586 |
sb | +24.397 |
Neg logit contributions | |
conservancy | -18.117 |
guaranteeing | -17.262 |
realistically | -16.244 |
disqualified | -15.876 |
condol | -15.751 |
Token | c |
Token ID | 66 |
Feature activation | -1.95e+00 |
Pos logit contributions | |
Bs | +22.065 |
Js | +21.734 |
ose | +21.383 |
ahn | +21.264 |
zb | +21.223 |
Neg logit contributions | |
conservancy | -16.484 |
guaranteeing | -15.921 |
realistically | -15.228 |
condol | -14.553 |
2022 | -14.537 |
Token | ce |
Token ID | 344 |
Feature activation | -2.70e+00 |
Pos logit contributions | |
Bs | +19.700 |
ose | +18.800 |
Js | +18.796 |
ahn | +18.630 |
zb | +18.343 |
Neg logit contributions | |
conservancy | -15.901 |
guaranteeing | -15.336 |
realistically | -14.743 |
condol | -14.292 |
disqualified | -14.057 |
Token | W |
Token ID | 54 |
Feature activation | -2.44e+00 |
Pos logit contributions | |
Bs | +24.358 |
Js | +23.155 |
ose | +22.950 |
zb | +22.672 |
ahn | +22.439 |
Neg logit contributions | |
conservancy | -17.358 |
guaranteeing | -16.448 |
realistically | -15.823 |
condol | -15.101 |
disqualified | -15.074 |
Token | x |
Token ID | 87 |
Feature activation | -2.81e+00 |
Pos logit contributions | |
Bs | +22.736 |
Js | +22.053 |
ose | +21.865 |
zb | +21.768 |
ahn | +21.549 |
Neg logit contributions | |
conservancy | -16.781 |
guaranteeing | -16.176 |
realistically | -15.344 |
condol | -14.918 |
belie | -14.770 |
Token | z |
Token ID | 89 |
Feature activation | -2.75e+00 |
Pos logit contributions | |
Bs | +24.677 |
Js | +23.640 |
ose | +23.308 |
zb | +23.295 |
ahn | +22.998 |
Neg logit contributions | |
conservancy | -17.521 |
guaranteeing | -16.995 |
realistically | -15.847 |
condol | -15.729 |
disqualified | -15.402 |
Token | p |
Token ID | 79 |
Feature activation | -2.27e+00 |
Pos logit contributions | |
Bs | +24.455 |
Js | +23.172 |
ose | +23.044 |
zb | +22.929 |
ahn | +22.733 |
Neg logit contributions | |
conservancy | -17.465 |
guaranteeing | -16.813 |
realistically | -15.767 |
condol | -15.595 |
disqualified | -15.391 |
Token | X |
Token ID | 55 |
Feature activation | -2.29e+00 |
Pos logit contributions | |
Bs | +21.733 |
Js | +20.710 |
ose | +20.537 |
zb | +20.509 |
ahn | +20.213 |
Neg logit contributions | |
conservancy | -17.049 |
guaranteeing | -16.117 |
realistically | -15.375 |
condol | -15.235 |
WARN | -14.994 |
Token | e |
Token ID | 68 |
Feature activation | -3.39e-01 |
Pos logit contributions | |
Bs | +20.457 |
Js | +19.449 |
ose | +19.143 |
zb | +18.779 |
ahn | +18.610 |
Neg logit contributions | |
conservancy | -19.917 |
guaranteeing | -16.715 |
artifacts | -16.665 |
illusion | -16.322 |
equality | -15.947 |
Token | lucky |
Token ID | 9670 |
Feature activation | -6.73e-01 |
Pos logit contributions | |
ose | +2.700 |
ahn | +2.634 |
Bs | +2.633 |
swick | +2.480 |
Irish | +2.474 |
Neg logit contributions | |
conservancy | -3.041 |
equality | -2.960 |
illusion | -2.888 |
realistically | -2.868 |
guaranteeing | -2.845 |
Token | if |
Token ID | 611 |
Feature activation | -1.07e+00 |
Pos logit contributions | |
ose | +7.348 |
Bs | +7.256 |
ahn | +7.175 |
Irish | +6.989 |
Js | +6.852 |
Neg logit contributions | |
conservancy | -7.740 |
equality | -7.437 |
artifacts | -7.347 |
illusion | -7.286 |
guaranteeing | -7.090 |
Token | you |
Token ID | 345 |
Feature activation | -9.69e-01 |
Pos logit contributions | |
ose | +11.753 |
Bs | +11.635 |
ahn | +11.534 |
Irish | +11.330 |
Adams | +10.986 |
Neg logit contributions | |
conservancy | -10.778 |
equality | -10.230 |
illusion | -9.862 |
artifacts | -9.650 |
minimum | -9.536 |
Token | get |
Token ID | 651 |
Feature activation | -9.16e-01 |
Pos logit contributions | |
ose | +10.047 |
ahn | +9.497 |
Bs | +9.357 |
Irish | +9.029 |
sb | +8.804 |
Neg logit contributions | |
conservancy | -11.160 |
equality | -10.931 |
illusion | -10.284 |
artifacts | -10.174 |
guaranteeing | -10.166 |
Token | away |
Token ID | 1497 |
Feature activation | -3.50e-01 |
Pos logit contributions | |
ose | +10.034 |
ahn | +9.673 |
Bs | +9.571 |
Irish | +9.538 |
ッド | +9.199 |
Neg logit contributions | |
conservancy | -9.674 |
equality | -9.364 |
illusion | -9.294 |
realistically | -8.861 |
artifacts | -8.832 |
Token | with |
Token ID | 351 |
Feature activation | -2.05e+00 |
Pos logit contributions | |
ose | +3.740 |
Bs | +3.606 |
ahn | +3.587 |
Irish | +3.390 |
swick | +3.379 |
Neg logit contributions | |
conservancy | -4.326 |
equality | -4.285 |
illusion | -4.209 |
guaranteeing | -4.150 |
1080 | -4.145 |
Token | $ |
Token ID | 720 |
Feature activation | +4.25e-01 |
Pos logit contributions | |
ose | +18.016 |
Irish | +17.025 |
Bs | +16.982 |
beginning | +16.622 |
ahn | +16.370 |
Neg logit contributions | |
conservancy | -17.760 |
illusion | -16.500 |
equality | -16.379 |
mbudsman | -15.745 |
artifacts | -15.267 |
Token | 600 |
Token ID | 8054 |
Feature activation | -2.22e-01 |
Pos logit contributions | |
1080 | +4.438 |
equality | +4.357 |
minimum | +4.270 |
realistically | +4.258 |
conservancy | +4.250 |
Neg logit contributions | |
ose | -5.180 |
ahn | -5.147 |
Bs | -5.050 |
behavi | -5.031 |
Flavoring | -4.930 |
Token | . |
Token ID | 13 |
Feature activation | +6.03e-01 |
Pos logit contributions | |
ose | +2.634 |
Bs | +2.599 |
ahn | +2.577 |
swick | +2.446 |
Adams | +2.424 |
Neg logit contributions | |
conservancy | -2.505 |
equality | -2.473 |
illusion | -2.413 |
realistically | -2.405 |
guaranteeing | -2.405 |
Token | But |
Token ID | 887 |
Feature activation | -3.56e-01 |
Pos logit contributions | |
1080 | +6.520 |
realistically | +6.484 |
equality | +6.461 |
guaranteeing | +6.434 |
minimum | +6.282 |
Neg logit contributions | |
ose | -6.744 |
ahn | -6.666 |
Bs | -6.537 |
swick | -6.296 |
ッド | -6.237 |
Token | you |
Token ID | 345 |
Feature activation | -2.56e-01 |
Pos logit contributions | |
ose | +4.156 |
Bs | +4.103 |
ahn | +4.067 |
Irish | +3.887 |
swick | +3.817 |
Neg logit contributions | |
conservancy | -4.098 |
equality | -3.950 |
illusion | -3.889 |
guaranteeing | -3.783 |
artifacts | -3.783 |
Token | twitter |
Token ID | 6956 |
Feature activation | -6.78e-01 |
Pos logit contributions | |
ose | +2.066 |
Bs | +2.043 |
ahn | +2.030 |
swick | +1.949 |
Adams | +1.933 |
Neg logit contributions | |
conservancy | -1.638 |
guaranteeing | -1.576 |
equality | -1.561 |
realistically | -1.555 |
illusion | -1.537 |
Token | . |
Token ID | 13 |
Feature activation | +3.91e-02 |
Pos logit contributions | |
ose | +5.425 |
Bs | +5.350 |
ahn | +5.307 |
Adams | +4.915 |
Irish | +4.906 |
Neg logit contributions | |
conservancy | -10.590 |
guaranteeing | -9.742 |
artifacts | -9.666 |
illusion | -9.608 |
mbuds | -9.434 |
Token | com |
Token ID | 785 |
Feature activation | -5.15e-01 |
Pos logit contributions | |
conservancy | +0.398 |
equality | +0.397 |
realistically | +0.390 |
guaranteeing | +0.388 |
illusion | +0.384 |
Neg logit contributions | |
ose | -0.504 |
Bs | -0.497 |
ahn | -0.496 |
swick | -0.478 |
ッド | -0.472 |
Token | / |
Token ID | 14 |
Feature activation | -3.32e+00 |
Pos logit contributions | |
Bs | +4.734 |
ose | +4.705 |
ahn | +4.573 |
Js | +4.376 |
Irish | +4.349 |
Neg logit contributions | |
conservancy | -7.551 |
guaranteeing | -6.924 |
illusion | -6.910 |
artifacts | -6.902 |
equality | -6.782 |
Token | j |
Token ID | 73 |
Feature activation | -2.75e+00 |
Pos logit contributions | |
Bs | +26.452 |
Js | +25.240 |
zb | +24.897 |
ose | +24.884 |
sb | +24.641 |
Neg logit contributions | |
conservancy | -18.281 |
guaranteeing | -17.464 |
realistically | -16.402 |
disqualified | -15.991 |
condol | -15.921 |
Token | Ax |
Token ID | 31554 |
Feature activation | -3.05e+00 |
Pos logit contributions | |
Bs | +24.379 |
ose | +23.021 |
Js | +22.857 |
zb | +22.782 |
ahn | +22.704 |
Neg logit contributions | |
conservancy | -17.773 |
guaranteeing | -17.015 |
realistically | -16.019 |
condol | -15.811 |
disqualified | -15.568 |
Token | 6 |
Token ID | 21 |
Feature activation | -2.90e+00 |
Pos logit contributions | |
Bs | +25.535 |
Js | +24.434 |
zb | +23.966 |
ose | +23.862 |
ahn | +23.841 |
Neg logit contributions | |
conservancy | -18.503 |
guaranteeing | -17.365 |
realistically | -16.171 |
condol | -16.001 |
disqualified | -15.809 |
Token | c |
Token ID | 66 |
Feature activation | -1.99e+00 |
Pos logit contributions | |
Bs | +25.039 |
Js | +23.796 |
zb | +23.488 |
ose | +23.482 |
ahn | +23.322 |
Neg logit contributions | |
conservancy | -17.969 |
guaranteeing | -17.022 |
realistically | -16.067 |
condol | -15.784 |
disqualified | -15.602 |
Token | U |
Token ID | 52 |
Feature activation | -1.83e+00 |
Pos logit contributions | |
Bs | +19.955 |
ose | +19.081 |
Js | +18.972 |
ahn | +18.809 |
zb | +18.777 |
Neg logit contributions | |
conservancy | -16.000 |
guaranteeing | -15.445 |
realistically | -14.735 |
condol | -14.385 |
disqualified | -14.138 |
Token | xi |
Token ID | 29992 |
Feature activation | -2.29e+00 |
Pos logit contributions | |
Bs | +18.608 |
ose | +18.410 |
Js | +18.017 |
zb | +17.834 |
ahn | +17.742 |
Neg logit contributions | |
conservancy | -15.637 |
guaranteeing | -14.834 |
realistically | -14.384 |
belie | -13.786 |
condol | -13.782 |
Token | UG |
Token ID | 7340 |
Feature activation | -1.17e+00 |
Pos logit contributions | |
Bs | +21.846 |
Js | +20.684 |
ose | +20.530 |
ahn | +20.263 |
zb | +19.814 |
Neg logit contributions | |
conservancy | -17.742 |
guaranteeing | -16.721 |
realistically | -15.674 |
belie | -15.366 |
condol | -15.289 |
Token | . |
Token ID | 13 |
Feature activation | +3.97e-02 |
Pos logit contributions | |
ose | +5.150 |
Bs | +5.110 |
ahn | +5.095 |
Adams | +4.667 |
Irish | +4.637 |
Neg logit contributions | |
conservancy | -9.334 |
guaranteeing | -8.657 |
artifacts | -8.617 |
illusion | -8.614 |
equality | -8.474 |
Token | com |
Token ID | 785 |
Feature activation | -4.95e-01 |
Pos logit contributions | |
conservancy | +0.404 |
equality | +0.401 |
realistically | +0.396 |
guaranteeing | +0.394 |
illusion | +0.389 |
Neg logit contributions | |
ose | -0.513 |
Bs | -0.505 |
ahn | -0.503 |
swick | -0.485 |
Adams | -0.479 |
Token | / |
Token ID | 14 |
Feature activation | -3.31e+00 |
Pos logit contributions | |
Bs | +4.617 |
ose | +4.606 |
ahn | +4.491 |
Js | +4.243 |
Adams | +4.232 |
Neg logit contributions | |
conservancy | -7.169 |
guaranteeing | -6.628 |
artifacts | -6.612 |
illusion | -6.601 |
equality | -6.548 |
Token | xx |
Token ID | 5324 |
Feature activation | -3.00e+00 |
Pos logit contributions | |
Bs | +26.494 |
Js | +25.195 |
zb | +25.006 |
ose | +24.923 |
sb | +24.656 |
Neg logit contributions | |
conservancy | -18.209 |
guaranteeing | -17.333 |
realistically | -16.302 |
disqualified | -15.946 |
condol | -15.821 |
Token | A |
Token ID | 32 |
Feature activation | -1.20e+00 |
Pos logit contributions | |
Bs | +25.245 |
Js | +24.112 |
ose | +24.056 |
ahn | +23.810 |
zb | +23.794 |
Neg logit contributions | |
conservancy | -17.902 |
guaranteeing | -17.192 |
realistically | -16.024 |
condol | -15.678 |
disqualified | -15.465 |
Token | II |
Token ID | 3978 |
Feature activation | -2.09e+00 |
Pos logit contributions | |
ose | +12.988 |
Bs | +12.952 |
ahn | +12.586 |
Js | +12.569 |
zb | +12.332 |
Neg logit contributions | |
conservancy | -12.514 |
guaranteeing | -11.721 |
realistically | -11.523 |
illusion | -11.251 |
equality | -11.175 |
Token | 1 |
Token ID | 16 |
Feature activation | -2.86e+00 |
Pos logit contributions | |
Bs | +20.720 |
ose | +20.182 |
Js | +19.869 |
zb | +19.680 |
ahn | +19.640 |
Neg logit contributions | |
conservancy | -16.125 |
guaranteeing | -15.200 |
realistically | -14.662 |
condol | -14.113 |
belie | -13.951 |
Token | k |
Token ID | 74 |
Feature activation | -2.43e+00 |
Pos logit contributions | |
Bs | +24.728 |
Js | +23.636 |
zb | +23.299 |
ose | +23.242 |
ahn | +23.072 |
Neg logit contributions | |
conservancy | -17.925 |
guaranteeing | -16.847 |
realistically | -15.916 |
condol | -15.645 |
disqualified | -15.378 |
Token | H |
Token ID | 39 |
Feature activation | -2.34e+00 |
Pos logit contributions | |
Bs | +22.793 |
Js | +21.739 |
ose | +21.703 |
zb | +21.594 |
ahn | +21.357 |
Neg logit contributions | |
conservancy | -16.663 |
guaranteeing | -16.227 |
realistically | -15.251 |
condol | -15.137 |
disqualified | -14.980 |
Token | ZI |
Token ID | 48926 |
Feature activation | -1.37e+00 |
Pos logit contributions | |
Bs | +21.797 |
ose | +21.362 |
Js | +21.160 |
zb | +20.945 |
ahn | +20.876 |
Neg logit contributions | |
conservancy | -17.221 |
guaranteeing | -16.224 |
realistically | -15.239 |
condol | -15.052 |
artifacts | -14.687 |
Token | — |
Token ID | 851 |
Feature activation | +8.27e-01 |
Pos logit contributions | |
ose | +12.072 |
Bs | +12.041 |
ahn | +11.362 |
Js | +11.154 |
zb | +11.136 |
Neg logit contributions | |
conservancy | -17.161 |
illusion | -15.191 |
artifacts | -15.085 |
equality | -14.862 |
guaranteeing | -14.679 |