Token | has |
Token ID | 468 |
Feature activation | +1.50e-01 |
Pos logit contributions | |
offending | +0.186 |
bothering | +0.186 |
stranger | +0.184 |
involved | +0.183 |
occurring | +0.177 |
Neg logit contributions | |
izons | -0.219 |
Opportun | -0.214 |
leaps | -0.202 |
onential | -0.202 |
economies | -0.199 |
Token | been |
Token ID | 587 |
Feature activation | -1.08e+00 |
Pos logit contributions | |
izons | +0.907 |
Opportun | +0.892 |
onential | +0.858 |
leaps | +0.849 |
economies | +0.839 |
Neg logit contributions | |
offending | -0.590 |
involved | -0.590 |
stranger | -0.589 |
bothering | -0.587 |
occurring | -0.553 |
Token | afforded |
Token ID | 28109 |
Feature activation | +9.59e-01 |
Pos logit contributions | |
bothering | +5.105 |
offending | +5.099 |
stranger | +5.095 |
involved | +4.954 |
occurring | +4.902 |
Neg logit contributions | |
izons | -6.041 |
Opportun | -5.754 |
onential | -5.599 |
�� | -5.512 |
leaps | -5.381 |
Token | them |
Token ID | 606 |
Feature activation | +1.66e-01 |
Pos logit contributions | |
izons | +4.812 |
Opportun | +4.773 |
leaps | +4.500 |
economies | +4.460 |
onential | +4.385 |
Neg logit contributions | |
involved | -4.704 |
bothering | -4.628 |
offending | -4.612 |
stranger | -4.500 |
pictured | -4.336 |
Token | . |
Token ID | 13 |
Feature activation | +1.97e+00 |
Pos logit contributions | |
izons | +0.888 |
Opportun | +0.861 |
onential | +0.828 |
leaps | +0.812 |
economies | +0.796 |
Neg logit contributions | |
bothering | -0.794 |
offending | -0.790 |
stranger | -0.789 |
involved | -0.778 |
occurring | -0.752 |
Token | We |
Token ID | 775 |
Feature activation | +3.88e+00 |
Pos logit contributions | |
Opportun | +8.215 |
Strategies | +7.183 |
izons | +6.953 |
Measures | +6.865 |
Areas | +6.798 |
Neg logit contributions | |
offending | -11.414 |
bothering | -11.320 |
stranger | -11.026 |
involved | -10.854 |
occurring | -10.572 |
Token | admire |
Token ID | 21099 |
Feature activation | +1.78e+00 |
Pos logit contributions | |
strive | +15.869 |
Opportun | +15.685 |
stride | +15.682 |
economies | +15.634 |
ights | +15.487 |
Neg logit contributions | |
involved | -14.475 |
offending | -14.460 |
bothering | -14.397 |
REDACTED | -14.103 |
SpaceEngineers | -13.909 |
Token | you |
Token ID | 345 |
Feature activation | +1.07e+00 |
Pos logit contributions | |
Opportun | +9.855 |
izons | +9.755 |
economies | +9.402 |
leaps | +9.346 |
strides | +9.245 |
Neg logit contributions | |
involved | -6.835 |
bothering | -6.832 |
offending | -6.605 |
stranger | -6.424 |
occurring | -6.342 |
Token | because |
Token ID | 780 |
Feature activation | +1.91e-01 |
Pos logit contributions | |
izons | +5.624 |
Opportun | +5.496 |
onential | +5.270 |
leaps | +5.161 |
economies | +5.126 |
Neg logit contributions | |
offending | -4.812 |
involved | -4.804 |
bothering | -4.792 |
stranger | -4.743 |
occurring | -4.570 |
Token | you |
Token ID | 345 |
Feature activation | +1.77e+00 |
Pos logit contributions | |
izons | +1.359 |
Opportun | +1.323 |
onential | +1.281 |
leaps | +1.262 |
�� | +1.260 |
Neg logit contributions | |
bothering | -0.589 |
offending | -0.585 |
stranger | -0.578 |
involved | -0.567 |
occurring | -0.544 |
Token | have |
Token ID | 423 |
Feature activation | +1.64e+00 |
Pos logit contributions | |
izons | +8.181 |
Opportun | +7.985 |
leaps | +7.752 |
onential | +7.631 |
stride | +7.621 |
Neg logit contributions | |
offending | -8.397 |
bothering | -8.335 |
involved | -8.208 |
stranger | -8.113 |
occurring | -7.844 |
Token | with |
Token ID | 351 |
Feature activation | +6.52e-01 |
Pos logit contributions | |
izons | +5.817 |
Opportun | +5.689 |
leaps | +5.489 |
onential | +5.329 |
Strategies | +5.321 |
Neg logit contributions | |
bothering | -5.895 |
involved | -5.884 |
offending | -5.840 |
stranger | -5.731 |
occurring | -5.496 |
Token | the |
Token ID | 262 |
Feature activation | +7.35e-01 |
Pos logit contributions | |
izons | +3.458 |
Opportun | +3.376 |
onential | +3.175 |
leaps | +3.172 |
economies | +3.163 |
Neg logit contributions | |
bothering | -3.004 |
stranger | -2.957 |
involved | -2.939 |
offending | -2.937 |
pictured | -2.878 |
Token | Transport |
Token ID | 19940 |
Feature activation | -8.62e-01 |
Pos logit contributions | |
izons | +4.008 |
Opportun | +3.952 |
economies | +3.694 |
leaps | +3.686 |
onential | +3.669 |
Neg logit contributions | |
bothering | -3.245 |
stranger | -3.212 |
involved | -3.174 |
offending | -3.173 |
pictured | -3.114 |
Token | Department |
Token ID | 2732 |
Feature activation | +5.25e-02 |
Pos logit contributions | |
offending | +4.494 |
bothering | +4.478 |
stranger | +4.466 |
involved | +4.370 |
occurring | +4.350 |
Neg logit contributions | |
izons | -4.335 |
Opportun | -4.026 |
onential | -3.992 |
�� | -3.952 |
leaps | -3.779 |
Token | . |
Token ID | 13 |
Feature activation | +1.49e+00 |
Pos logit contributions | |
izons | +0.256 |
Opportun | +0.245 |
onential | +0.235 |
�� | +0.229 |
leaps | +0.226 |
Neg logit contributions | |
stranger | -0.282 |
bothering | -0.281 |
offending | -0.281 |
involved | -0.274 |
occurring | -0.270 |
Token | Stre |
Token ID | 9737 |
Feature activation | +3.75e+00 |
Pos logit contributions | |
Opportun | +5.905 |
izons | +5.518 |
Strategies | +5.298 |
Areas | +5.224 |
Measures | +5.193 |
Neg logit contributions | |
bothering | -8.873 |
offending | -8.806 |
stranger | -8.760 |
involved | -8.507 |
occurring | -8.394 |
Token | ng |
Token ID | 782 |
Feature activation | +2.51e+00 |
Pos logit contributions | |
izons | +9.811 |
onential | +8.987 |
ights | +8.589 |
Opportun | +8.351 |
akening | +8.275 |
Neg logit contributions | |
bothering | -21.083 |
offending | -20.984 |
stranger | -20.872 |
occurring | -20.590 |
REDACTED | -20.250 |
Token | thening |
Token ID | 20563 |
Feature activation | +2.74e+00 |
Pos logit contributions | |
izons | +7.199 |
Opportun | +6.337 |
onential | +6.281 |
Strategies | +5.844 |
economies | +5.784 |
Neg logit contributions | |
stranger | -15.843 |
offending | -15.818 |
bothering | -15.748 |
involved | -15.513 |
occurring | -15.435 |
Token | rural |
Token ID | 10016 |
Feature activation | +1.39e+00 |
Pos logit contributions | |
Opportun | +13.672 |
izons | +13.055 |
economies | +12.642 |
Measures | +12.505 |
leaps | +12.435 |
Neg logit contributions | |
bothering | -10.374 |
involved | -10.087 |
stranger | -9.930 |
REDACTED | -9.929 |
offending | -9.883 |
Token | transport |
Token ID | 4839 |
Feature activation | +1.15e+00 |
Pos logit contributions | |
Opportun | +6.537 |
izons | +6.489 |
economies | +6.209 |
leaps | +6.052 |
Areas | +5.965 |
Neg logit contributions | |
bothering | -6.881 |
involved | -6.827 |
stranger | -6.699 |
offending | -6.631 |
occurring | -6.536 |
Token | , |
Token ID | 11 |
Feature activation | +1.02e+00 |
Pos logit contributions | |
Opportun | +5.762 |
izons | +5.708 |
leaps | +5.326 |
economies | +5.273 |
Strategies | +5.255 |
Neg logit contributions | |
bothering | -5.551 |
stranger | -5.533 |
involved | -5.510 |
offending | -5.493 |
occurring | -5.187 |
Token | Android |
Token ID | 5565 |
Feature activation | -2.18e-01 |
Pos logit contributions | |
offending | +1.022 |
bothering | +1.021 |
stranger | +1.021 |
involved | +0.999 |
occurring | +0.984 |
Neg logit contributions | |
izons | -1.039 |
Opportun | -0.991 |
onential | -0.950 |
�� | -0.928 |
leaps | -0.926 |
Token | . |
Token ID | 13 |
Feature activation | +7.81e-01 |
Pos logit contributions | |
bothering | +1.149 |
offending | +1.149 |
stranger | +1.148 |
involved | +1.122 |
occurring | +1.106 |
Neg logit contributions | |
izons | -1.095 |
Opportun | -1.040 |
onential | -1.005 |
�� | -0.984 |
leaps | -0.966 |
Token | The |
Token ID | 383 |
Feature activation | +3.32e-01 |
Pos logit contributions | |
Opportun | +3.409 |
izons | +3.317 |
Strategies | +3.098 |
onential | +3.068 |
Areas | +2.972 |
Neg logit contributions | |
offending | -4.491 |
stranger | -4.419 |
bothering | -4.405 |
involved | -4.261 |
occurring | -4.244 |
Token | company |
Token ID | 1664 |
Feature activation | +1.27e+00 |
Pos logit contributions | |
izons | +1.879 |
Opportun | +1.838 |
leaps | +1.736 |
onential | +1.727 |
strides | +1.689 |
Neg logit contributions | |
bothering | -1.478 |
stranger | -1.461 |
involved | -1.460 |
offending | -1.453 |
occurring | -1.422 |
Token | has |
Token ID | 468 |
Feature activation | +2.16e+00 |
Pos logit contributions | |
izons | +6.477 |
Opportun | +6.293 |
leaps | +6.184 |
strides | +5.999 |
economies | +5.959 |
Neg logit contributions | |
offending | -5.812 |
involved | -5.740 |
bothering | -5.731 |
stranger | -5.728 |
occurring | -5.528 |
Token | stri |
Token ID | 2338 |
Feature activation | +3.59e+00 |
Pos logit contributions | |
izons | +12.256 |
Opportun | +12.109 |
leaps | +11.989 |
economies | +11.635 |
strides | +11.592 |
Neg logit contributions | |
involved | -7.321 |
offending | -7.044 |
bothering | -6.780 |
stranger | -6.710 |
REDACTED | -6.569 |
Token | ved |
Token ID | 1079 |
Feature activation | +2.87e+00 |
Pos logit contributions | |
izons | +12.600 |
Opportun | +11.032 |
onential | +10.870 |
ights | +10.802 |
ighed | +10.136 |
Neg logit contributions | |
offending | -17.971 |
stranger | -17.403 |
bothering | -17.378 |
occurring | -17.106 |
involved | -17.027 |
Token | to |
Token ID | 284 |
Feature activation | +1.43e+00 |
Pos logit contributions | |
izons | +12.898 |
leaps | +12.429 |
Opportun | +12.427 |
onential | +12.373 |
strides | +11.745 |
Neg logit contributions | |
involved | -12.337 |
offending | -11.956 |
stranger | -11.637 |
bothering | -11.408 |
occurring | -11.351 |
Token | make |
Token ID | 787 |
Feature activation | +1.18e+00 |
Pos logit contributions | |
izons | +6.251 |
Opportun | +6.164 |
leaps | +6.026 |
onential | +5.823 |
strides | +5.760 |
Neg logit contributions | |
involved | -7.592 |
bothering | -7.346 |
offending | -7.217 |
stranger | -7.151 |
occurring | -7.072 |
Token | the |
Token ID | 262 |
Feature activation | +3.01e-01 |
Pos logit contributions | |
izons | +6.419 |
Opportun | +6.364 |
leaps | +6.281 |
strides | +6.122 |
economies | +5.934 |
Neg logit contributions | |
involved | -4.889 |
bothering | -4.862 |
offending | -4.855 |
stranger | -4.844 |
occurring | -4.645 |
Token | phone |
Token ID | 3072 |
Feature activation | -1.69e+00 |
Pos logit contributions | |
izons | +1.770 |
Opportun | +1.725 |
leaps | +1.658 |
onential | +1.619 |
strides | +1.604 |
Neg logit contributions | |
bothering | -1.274 |
involved | -1.262 |
stranger | -1.259 |
offending | -1.245 |
occurring | -1.222 |
Token | ash |
Token ID | 12530 |
Feature activation | -1.33e+00 |
Pos logit contributions | |
izons | +0.863 |
Opportun | +0.827 |
onential | +0.788 |
leaps | +0.769 |
�� | +0.766 |
Neg logit contributions | |
bothering | -0.801 |
stranger | -0.796 |
offending | -0.786 |
involved | -0.780 |
occurring | -0.760 |
Token | . |
Token ID | 13 |
Feature activation | +8.45e-01 |
Pos logit contributions | |
offending | +9.950 |
bothering | +9.892 |
stranger | +9.827 |
occurring | +9.774 |
involved | +9.688 |
Neg logit contributions | |
izons | -3.507 |
Opportun | -3.181 |
�� | -3.119 |
onential | -3.060 |
leaps | -2.703 |
Token |
|
Token ID | 198 |
Feature activation | -6.22e-01 |
Pos logit contributions | |
izons | +3.641 |
Opportun | +3.516 |
onential | +3.284 |
Strategies | +3.211 |
�� | +3.158 |
Neg logit contributions | |
bothering | -4.994 |
stranger | -4.985 |
offending | -4.956 |
involved | -4.806 |
occurring | -4.753 |
Token |
|
Token ID | 198 |
Feature activation | +7.58e-01 |
Pos logit contributions | |
stranger | +3.392 |
bothering | +3.386 |
offending | +3.368 |
involved | +3.321 |
occurring | +3.291 |
Neg logit contributions | |
izons | -2.995 |
Opportun | -2.810 |
�� | -2.753 |
onential | -2.746 |
leaps | -2.600 |
Token | Ac |
Token ID | 12832 |
Feature activation | +1.95e+00 |
Pos logit contributions | |
izons | +3.828 |
Opportun | +3.707 |
onential | +3.524 |
Strategies | +3.456 |
leaps | +3.362 |
Neg logit contributions | |
offending | -3.877 |
stranger | -3.845 |
bothering | -3.780 |
pictured | -3.644 |
occurring | -3.596 |
Token | celer |
Token ID | 7015 |
Feature activation | +3.58e+00 |
Pos logit contributions | |
izons | +7.547 |
Opportun | +7.077 |
onential | +6.832 |
Strategies | +6.423 |
ights | +6.338 |
Neg logit contributions | |
stranger | -10.998 |
offending | -10.998 |
bothering | -10.991 |
uttered | -10.419 |
occurring | -10.371 |
Token | ating |
Token ID | 803 |
Feature activation | +2.80e+00 |
Pos logit contributions | |
izons | +11.986 |
onential | +11.411 |
Opportun | +11.350 |
ights | +10.578 |
leaps | +10.550 |
Neg logit contributions | |
stranger | -17.432 |
involved | -17.344 |
offending | -17.202 |
uttered | -16.986 |
bothering | -16.796 |
Token | water |
Token ID | 1660 |
Feature activation | +6.00e-01 |
Pos logit contributions | |
Opportun | +13.798 |
izons | +13.459 |
strides | +12.855 |
Strategies | +12.851 |
economies | +12.843 |
Neg logit contributions | |
bothering | -10.898 |
stranger | -10.741 |
offending | -10.678 |
involved | -10.619 |
REDACTED | -10.289 |
Token | removal |
Token ID | 9934 |
Feature activation | +7.78e-01 |
Pos logit contributions | |
izons | +2.962 |
Opportun | +2.863 |
leaps | +2.669 |
onential | +2.664 |
Strategies | +2.647 |
Neg logit contributions | |
bothering | -3.115 |
stranger | -3.094 |
offending | -3.077 |
involved | -3.043 |
occurring | -2.925 |
Token | from |
Token ID | 422 |
Feature activation | +1.46e-01 |
Pos logit contributions | |
izons | +4.014 |
Opportun | +3.971 |
leaps | +3.717 |
Strategies | +3.693 |
onential | +3.669 |
Neg logit contributions | |
stranger | -3.780 |
bothering | -3.735 |
offending | -3.735 |
involved | -3.662 |
pictured | -3.471 |
Token | ash |
Token ID | 12530 |
Feature activation | -9.94e-01 |
Pos logit contributions | |
izons | +0.869 |
Opportun | +0.834 |
onential | +0.803 |
�� | +0.786 |
leaps | +0.782 |
Neg logit contributions | |
bothering | -0.629 |
stranger | -0.626 |
offending | -0.621 |
involved | -0.609 |
occurring | -0.594 |
Token | Through |
Token ID | 15046 |
Feature activation | +1.67e+00 |
Pos logit contributions | |
izons | +5.198 |
Opportun | +5.114 |
onential | +4.781 |
Strategies | +4.747 |
leaps | +4.570 |
Neg logit contributions | |
offending | -5.180 |
bothering | -5.175 |
stranger | -5.093 |
occurring | -4.913 |
pictured | -4.897 |
Token | collective |
Token ID | 10098 |
Feature activation | +1.77e+00 |
Pos logit contributions | |
izons | +8.953 |
Opportun | +8.881 |
leaps | +8.501 |
Strategies | +8.356 |
onential | +8.326 |
Neg logit contributions | |
bothering | -6.854 |
stranger | -6.672 |
involved | -6.662 |
offending | -6.557 |
pictured | -6.431 |
Token | community |
Token ID | 2055 |
Feature activation | +1.49e+00 |
Pos logit contributions | |
izons | +8.603 |
Opportun | +8.536 |
leaps | +8.461 |
economies | +8.135 |
Strategies | +8.115 |
Neg logit contributions | |
bothering | -7.936 |
involved | -7.890 |
stranger | -7.589 |
pictured | -7.439 |
offending | -7.427 |
Token | power |
Token ID | 1176 |
Feature activation | +1.88e+00 |
Pos logit contributions | |
izons | +7.263 |
Opportun | +7.090 |
leaps | +7.045 |
economies | +7.002 |
strides | +6.801 |
Neg logit contributions | |
bothering | -6.818 |
stranger | -6.625 |
involved | -6.587 |
offending | -6.442 |
pictured | -6.395 |
Token | , |
Token ID | 11 |
Feature activation | +1.46e+00 |
Pos logit contributions | |
izons | +9.496 |
Opportun | +9.320 |
leaps | +9.004 |
economies | +8.950 |
onential | +8.865 |
Neg logit contributions | |
bothering | -8.329 |
stranger | -8.074 |
offending | -8.065 |
pictured | -7.639 |
involved | -7.590 |
Token | we |
Token ID | 356 |
Feature activation | +3.52e+00 |
Pos logit contributions | |
izons | +8.257 |
Opportun | +8.172 |
economies | +7.701 |
leaps | +7.683 |
Strategies | +7.668 |
Neg logit contributions | |
bothering | -5.749 |
involved | -5.562 |
stranger | -5.505 |
offending | -5.466 |
pictured | -5.211 |
Token | commit |
Token ID | 4589 |
Feature activation | +2.67e+00 |
Pos logit contributions | |
izons | +14.974 |
strive | +14.833 |
leaps | +14.458 |
Opportun | +14.382 |
stride | +14.342 |
Neg logit contributions | |
bothering | -14.060 |
involved | -14.048 |
offending | -13.555 |
SpaceEngineers | -13.085 |
REDACTED | -13.014 |
Token | to |
Token ID | 284 |
Feature activation | +2.31e+00 |
Pos logit contributions | |
izons | +12.401 |
Opportun | +11.847 |
leaps | +11.775 |
onential | +11.372 |
economies | +11.352 |
Neg logit contributions | |
bothering | -11.600 |
involved | -11.409 |
stranger | -10.875 |
offending | -10.771 |
occurring | -10.554 |
Token | a |
Token ID | 257 |
Feature activation | +1.67e+00 |
Pos logit contributions | |
Opportun | +12.520 |
izons | +12.380 |
leaps | +12.075 |
economies | +11.969 |
onential | +11.770 |
Neg logit contributions | |
involved | -8.021 |
stranger | -7.745 |
bothering | -7.697 |
offending | -7.383 |
pictured | -7.290 |
Token | conscious |
Token ID | 6921 |
Feature activation | +1.88e+00 |
Pos logit contributions | |
izons | +8.534 |
Opportun | +8.349 |
onential | +8.014 |
Strategies | +7.972 |
leaps | +7.946 |
Neg logit contributions | |
bothering | -7.228 |
involved | -7.126 |
offending | -7.115 |
stranger | -6.814 |
occurring | -6.622 |
Token | effort |
Token ID | 3626 |
Feature activation | +2.15e+00 |
Pos logit contributions | |
izons | +8.562 |
Opportun | +8.542 |
leaps | +8.318 |
Strategies | +8.207 |
economies | +8.171 |
Neg logit contributions | |
involved | -8.668 |
bothering | -8.659 |
stranger | -8.338 |
offending | -8.322 |
occurring | -8.052 |
Token | than |
Token ID | 621 |
Feature activation | -9.14e-01 |
Pos logit contributions | |
izons | +8.327 |
Opportun | +8.062 |
leaps | +7.924 |
onential | +7.862 |
economies | +7.724 |
Neg logit contributions | |
involved | -7.452 |
stranger | -7.335 |
bothering | -7.300 |
offending | -7.270 |
pictured | -6.870 |
Token | the |
Token ID | 262 |
Feature activation | -8.30e-01 |
Pos logit contributions | |
offending | +4.201 |
bothering | +4.201 |
stranger | +4.195 |
involved | +4.071 |
occurring | +4.030 |
Neg logit contributions | |
izons | -5.195 |
Opportun | -4.938 |
onential | -4.818 |
�� | -4.758 |
leaps | -4.627 |
Token | reference |
Token ID | 4941 |
Feature activation | -7.72e-01 |
Pos logit contributions | |
offending | +3.882 |
stranger | +3.863 |
bothering | +3.854 |
involved | +3.730 |
occurring | +3.700 |
Neg logit contributions | |
izons | -4.660 |
Opportun | -4.402 |
onential | -4.313 |
�� | -4.272 |
leaps | -4.125 |
Token | design |
Token ID | 1486 |
Feature activation | -9.19e-01 |
Pos logit contributions | |
offending | +3.576 |
bothering | +3.570 |
stranger | +3.566 |
involved | +3.459 |
occurring | +3.430 |
Neg logit contributions | |
izons | -4.360 |
Opportun | -4.147 |
onential | -4.047 |
�� | -3.992 |
leaps | -3.880 |
Token | . |
Token ID | 13 |
Feature activation | +9.66e-01 |
Pos logit contributions | |
bothering | +4.480 |
offending | +4.477 |
stranger | +4.471 |
occurring | +4.336 |
involved | +4.334 |
Neg logit contributions | |
izons | -4.947 |
Opportun | -4.674 |
onential | -4.567 |
�� | -4.530 |
leaps | -4.353 |
Token | Equ |
Token ID | 7889 |
Feature activation | +3.51e+00 |
Pos logit contributions | |
Opportun | +4.067 |
izons | +3.896 |
Strategies | +3.682 |
onential | +3.644 |
Measures | +3.514 |
Neg logit contributions | |
offending | -5.733 |
bothering | -5.611 |
stranger | -5.606 |
involved | -5.407 |
occurring | -5.350 |
Token | ipped |
Token ID | 3949 |
Feature activation | +8.31e-01 |
Pos logit contributions | |
izons | +11.699 |
Opportun | +10.644 |
onential | +10.570 |
itud | +9.482 |
atches | +9.269 |
Neg logit contributions | |
bothering | -19.433 |
offending | -19.201 |
uttered | -18.401 |
stranger | -18.188 |
REDACTED | -18.049 |
Token | with |
Token ID | 351 |
Feature activation | +7.89e-01 |
Pos logit contributions | |
izons | +4.714 |
Opportun | +4.539 |
onential | +4.374 |
�� | +4.293 |
leaps | +4.269 |
Neg logit contributions | |
bothering | -3.712 |
offending | -3.697 |
involved | -3.652 |
stranger | -3.651 |
occurring | -3.496 |
Token | Un |
Token ID | 791 |
Feature activation | -9.30e-01 |
Pos logit contributions | |
izons | +4.124 |
Opportun | +4.105 |
onential | +3.868 |
leaps | +3.829 |
Strategies | +3.784 |
Neg logit contributions | |
bothering | -3.709 |
offending | -3.700 |
involved | -3.689 |
stranger | -3.669 |
occurring | -3.543 |
Token | locked |
Token ID | 24162 |
Feature activation | -4.07e-01 |
Pos logit contributions | |
stranger | +3.533 |
bothering | +3.521 |
offending | +3.519 |
involved | +3.439 |
occurring | +3.351 |
Neg logit contributions | |
izons | -5.981 |
Opportun | -5.773 |
onential | -5.607 |
�� | -5.588 |
leaps | -5.441 |
Token | Digital |
Token ID | 10231 |
Feature activation | -1.07e+00 |
Pos logit contributions | |
bothering | +2.486 |
offending | +2.484 |
stranger | +2.483 |
involved | +2.429 |
occurring | +2.411 |
Neg logit contributions | |
izons | -1.693 |
Opportun | -1.579 |
onential | -1.522 |
�� | -1.492 |
leaps | -1.448 |
Token | s |
Token ID | 82 |
Feature activation | -4.47e-01 |
Pos logit contributions | |
offending | +4.033 |
bothering | +4.026 |
stranger | +3.989 |
involved | +3.903 |
occurring | +3.849 |
Neg logit contributions | |
izons | -7.787 |
Opportun | -7.493 |
�� | -7.308 |
onential | -7.297 |
Strategies | -7.015 |
Token | been |
Token ID | 587 |
Feature activation | -4.44e-01 |
Pos logit contributions | |
offending | +1.936 |
stranger | +1.932 |
bothering | +1.918 |
involved | +1.895 |
occurring | +1.841 |
Neg logit contributions | |
izons | -2.650 |
Opportun | -2.548 |
onential | -2.469 |
�� | -2.410 |
leaps | -2.406 |
Token | really |
Token ID | 1107 |
Feature activation | +1.84e-01 |
Pos logit contributions | |
offending | +1.891 |
stranger | +1.881 |
bothering | +1.864 |
involved | +1.849 |
occurring | +1.791 |
Neg logit contributions | |
izons | -2.665 |
Opportun | -2.565 |
onential | -2.484 |
�� | -2.437 |
leaps | -2.428 |
Token | tough |
Token ID | 5802 |
Feature activation | +1.45e+00 |
Pos logit contributions | |
izons | +1.025 |
Opportun | +0.998 |
leaps | +0.960 |
onential | +0.951 |
strides | +0.929 |
Neg logit contributions | |
offending | -0.825 |
involved | -0.825 |
stranger | -0.820 |
bothering | -0.795 |
occurring | -0.780 |
Token | . |
Token ID | 13 |
Feature activation | +1.00e+00 |
Pos logit contributions | |
izons | +7.630 |
Opportun | +7.504 |
leaps | +7.276 |
strides | +7.162 |
onential | +7.024 |
Neg logit contributions | |
involved | -6.206 |
stranger | -6.113 |
offending | -5.957 |
bothering | -5.797 |
pictured | -5.704 |
Token | We |
Token ID | 775 |
Feature activation | +3.50e+00 |
Pos logit contributions | |
Opportun | +4.484 |
izons | +4.164 |
Strategies | +4.018 |
Areas | +3.867 |
Measures | +3.794 |
Neg logit contributions | |
offending | -5.833 |
stranger | -5.688 |
bothering | -5.654 |
involved | -5.576 |
occurring | -5.414 |
Token | can |
Token ID | 460 |
Feature activation | +1.96e+00 |
Pos logit contributions | |
izons | +16.033 |
Opportun | +15.280 |
strides | +14.706 |
stride | +14.683 |
ighed | +14.584 |
Neg logit contributions | |
offending | -12.286 |
stranger | -12.153 |
involved | -11.879 |
bothering | -11.857 |
REDACTED | -11.772 |
Token | � |
Token ID | 447 |
Feature activation | +1.52e+00 |
Pos logit contributions | |
izons | +8.651 |
Opportun | +8.318 |
strides | +7.915 |
leaps | +7.881 |
onential | +7.794 |
Neg logit contributions | |
involved | -9.729 |
bothering | -9.573 |
offending | -9.560 |
stranger | -9.370 |
occurring | -9.157 |
Token | � |
Token ID | 247 |
Feature activation | -1.15e+00 |
Pos logit contributions | |
izons | +3.225 |
�� | +2.915 |
onential | +2.587 |
Opportun | +2.521 |
� | +2.463 |
Neg logit contributions | |
offending | -11.652 |
bothering | -11.595 |
involved | -11.515 |
stranger | -11.497 |
occurring | -11.303 |
Token | t |
Token ID | 83 |
Feature activation | +7.19e-01 |
Pos logit contributions | |
stranger | +4.400 |
bothering | +4.393 |
offending | +4.379 |
involved | +4.262 |
occurring | +4.201 |
Neg logit contributions | |
izons | -7.240 |
Opportun | -6.982 |
onential | -6.779 |
�� | -6.739 |
leaps | -6.560 |
Token | wait |
Token ID | 4043 |
Feature activation | +1.69e+00 |
Pos logit contributions | |
izons | +3.297 |
Opportun | +3.153 |
onential | +3.057 |
leaps | +3.035 |
strides | +2.966 |
Neg logit contributions | |
involved | -3.902 |
offending | -3.819 |
bothering | -3.793 |
stranger | -3.771 |
occurring | -3.655 |
Token | of |
Token ID | 286 |
Feature activation | +1.72e+00 |
Pos logit contributions | |
izons | +8.633 |
Opportun | +8.147 |
leaps | +7.914 |
onential | +7.886 |
strides | +7.807 |
Neg logit contributions | |
offending | -6.442 |
bothering | -6.229 |
involved | -6.194 |
stranger | -6.078 |
pictured | -5.917 |
Token | new |
Token ID | 649 |
Feature activation | +8.48e-01 |
Pos logit contributions | |
izons | +8.447 |
Opportun | +8.293 |
strides | +8.204 |
leaps | +8.189 |
economies | +7.673 |
Neg logit contributions | |
involved | -8.021 |
offending | -7.759 |
stranger | -7.625 |
bothering | -7.614 |
REDACTED | -7.513 |
Token | guys |
Token ID | 3730 |
Feature activation | +4.19e-01 |
Pos logit contributions | |
izons | +4.238 |
Opportun | +4.061 |
leaps | +3.870 |
strides | +3.811 |
onential | +3.780 |
Neg logit contributions | |
bothering | -4.334 |
stranger | -4.313 |
offending | -4.287 |
involved | -4.268 |
occurring | -4.147 |
Token | , |
Token ID | 11 |
Feature activation | +3.32e-01 |
Pos logit contributions | |
izons | +2.506 |
Opportun | +2.409 |
onential | +2.346 |
leaps | +2.319 |
Strategies | +2.266 |
Neg logit contributions | |
offending | -1.725 |
stranger | -1.724 |
bothering | -1.691 |
involved | -1.651 |
occurring | -1.605 |
Token | but |
Token ID | 475 |
Feature activation | +9.40e-02 |
Pos logit contributions | |
izons | +1.908 |
Opportun | +1.838 |
onential | +1.777 |
leaps | +1.733 |
�� | +1.725 |
Neg logit contributions | |
offending | -1.484 |
bothering | -1.478 |
stranger | -1.469 |
involved | -1.447 |
occurring | -1.412 |
Token | we |
Token ID | 356 |
Feature activation | +3.46e+00 |
Pos logit contributions | |
izons | +0.624 |
Opportun | +0.606 |
onential | +0.585 |
leaps | +0.572 |
Strategies | +0.567 |
Neg logit contributions | |
offending | -0.338 |
bothering | -0.336 |
stranger | -0.335 |
involved | -0.332 |
occurring | -0.317 |
Token | � |
Token ID | 447 |
Feature activation | +1.58e+00 |
Pos logit contributions | |
izons | +15.296 |
Opportun | +14.430 |
stride | +14.200 |
strides | +14.082 |
ights | +13.990 |
Neg logit contributions | |
offending | -13.096 |
stranger | -12.915 |
involved | -12.597 |
bothering | -12.438 |
REDACTED | -12.377 |
Token | � |
Token ID | 247 |
Feature activation | -8.44e-01 |
Pos logit contributions | |
izons | +3.207 |
�� | +3.014 |
onential | +2.688 |
Opportun | +2.686 |
� | +2.536 |
Neg logit contributions | |
bothering | -12.103 |
offending | -12.082 |
involved | -11.935 |
stranger | -11.873 |
occurring | -11.667 |
Token | ve |
Token ID | 303 |
Feature activation | +1.93e+00 |
Pos logit contributions | |
bothering | +4.846 |
offending | +4.839 |
stranger | +4.837 |
involved | +4.745 |
occurring | +4.697 |
Neg logit contributions | |
izons | -3.791 |
Opportun | -3.587 |
onential | -3.455 |
�� | -3.400 |
leaps | -3.296 |
Token | got |
Token ID | 1392 |
Feature activation | +1.35e+00 |
Pos logit contributions | |
izons | +9.691 |
Opportun | +9.333 |
onential | +8.838 |
strides | +8.828 |
leaps | +8.817 |
Neg logit contributions | |
involved | -8.328 |
offending | -8.260 |
stranger | -8.217 |
bothering | -7.957 |
REDACTED | -7.849 |
Token | a |
Token ID | 257 |
Feature activation | +1.37e+00 |
Pos logit contributions | |
izons | +7.150 |
Opportun | +6.924 |
leaps | +6.626 |
strides | +6.579 |
onential | +6.559 |
Neg logit contributions | |
involved | -6.102 |
offending | -6.076 |
stranger | -6.010 |
bothering | -5.994 |
occurring | -5.833 |
Token | is |
Token ID | 271 |
Feature activation | +1.11e+00 |
Pos logit contributions | |
izons | +2.043 |
Opportun | +1.909 |
onential | +1.849 |
�� | +1.800 |
leaps | +1.784 |
Neg logit contributions | |
bothering | -2.598 |
stranger | -2.582 |
offending | -2.575 |
involved | -2.517 |
occurring | -2.485 |
Token | said |
Token ID | 531 |
Feature activation | +4.70e-01 |
Pos logit contributions | |
izons | +6.371 |
Opportun | +6.133 |
onential | +5.962 |
leaps | +5.894 |
Strategies | +5.828 |
Neg logit contributions | |
bothering | -4.603 |
offending | -4.598 |
stranger | -4.555 |
involved | -4.422 |
occurring | -4.317 |
Token | . |
Token ID | 13 |
Feature activation | +1.20e+00 |
Pos logit contributions | |
izons | +2.454 |
Opportun | +2.331 |
onential | +2.252 |
leaps | +2.208 |
�� | +2.192 |
Neg logit contributions | |
bothering | -2.308 |
offending | -2.302 |
stranger | -2.301 |
involved | -2.284 |
occurring | -2.183 |
Token | � |
Token ID | 564 |
Feature activation | +1.59e+00 |
Pos logit contributions | |
Opportun | +6.497 |
izons | +6.328 |
Strategies | +6.103 |
onential | +5.912 |
Areas | +5.840 |
Neg logit contributions | |
offending | -5.449 |
stranger | -5.444 |
bothering | -5.376 |
involved | -5.287 |
occurring | -5.074 |
Token | � |
Token ID | 250 |
Feature activation | -4.37e-01 |
Pos logit contributions | |
izons | +4.450 |
�� | +4.205 |
Opportun | +4.115 |
onential | +3.937 |
Strategies | +3.664 |
Neg logit contributions | |
stranger | -11.072 |
bothering | -11.049 |
offending | -11.038 |
involved | -10.995 |
occurring | -10.717 |
Token | We |
Token ID | 1135 |
Feature activation | +3.40e+00 |
Pos logit contributions | |
offending | +2.156 |
bothering | +2.145 |
stranger | +2.145 |
involved | +2.083 |
occurring | +2.063 |
Neg logit contributions | |
izons | -2.357 |
Opportun | -2.265 |
onential | -2.185 |
�� | -2.124 |
leaps | -2.098 |
Token | � |
Token ID | 447 |
Feature activation | +1.45e+00 |
Pos logit contributions | |
izons | +15.529 |
Opportun | +14.771 |
stride | +14.543 |
ights | +14.403 |
strides | +14.398 |
Neg logit contributions | |
offending | -12.512 |
stranger | -12.477 |
involved | -12.007 |
bothering | -11.878 |
REDACTED | -11.865 |
Token | � |
Token ID | 247 |
Feature activation | -2.82e-01 |
Pos logit contributions | |
izons | +3.395 |
�� | +3.157 |
Opportun | +2.965 |
onential | +2.894 |
economies | +2.685 |
Neg logit contributions | |
bothering | -10.707 |
offending | -10.641 |
involved | -10.564 |
stranger | -10.488 |
occurring | -10.319 |
Token | ve |
Token ID | 303 |
Feature activation | +1.62e+00 |
Pos logit contributions | |
offending | +1.605 |
bothering | +1.605 |
stranger | +1.603 |
involved | +1.566 |
occurring | +1.551 |
Neg logit contributions | |
izons | -1.295 |
Opportun | -1.221 |
onential | -1.181 |
�� | -1.156 |
leaps | -1.125 |
Token | got |
Token ID | 1392 |
Feature activation | +1.54e+00 |
Pos logit contributions | |
izons | +9.637 |
Opportun | +9.268 |
onential | +8.996 |
leaps | +8.867 |
strides | +8.795 |
Neg logit contributions | |
stranger | -5.973 |
offending | -5.934 |
involved | -5.912 |
bothering | -5.758 |
occurring | -5.514 |
Token | a |
Token ID | 257 |
Feature activation | +1.55e+00 |
Pos logit contributions | |
izons | +7.643 |
Opportun | +7.445 |
leaps | +7.283 |
strides | +7.193 |
onential | +7.075 |
Neg logit contributions | |
involved | -7.164 |
offending | -7.136 |
stranger | -7.056 |
bothering | -7.043 |
occurring | -6.823 |
Token | of |
Token ID | 286 |
Feature activation | -1.01e-02 |
Pos logit contributions | |
offending | +1.015 |
stranger | +1.011 |
bothering | +1.006 |
occurring | +0.974 |
involved | +0.972 |
Neg logit contributions | |
izons | -1.885 |
Opportun | -1.810 |
onential | -1.774 |
�� | -1.749 |
leaps | -1.708 |
Token | good |
Token ID | 922 |
Feature activation | -2.81e-01 |
Pos logit contributions | |
bothering | +0.048 |
stranger | +0.048 |
offending | +0.048 |
involved | +0.048 |
occurring | +0.046 |
Neg logit contributions | |
izons | -0.055 |
Opportun | -0.053 |
onential | -0.051 |
leaps | -0.050 |
Strategies | -0.049 |
Token | and |
Token ID | 290 |
Feature activation | -1.69e+00 |
Pos logit contributions | |
bothering | +1.491 |
offending | +1.486 |
stranger | +1.485 |
involved | +1.453 |
occurring | +1.430 |
Neg logit contributions | |
izons | -1.406 |
Opportun | -1.332 |
onential | -1.289 |
�� | -1.258 |
leaps | -1.239 |
Token | bad |
Token ID | 2089 |
Feature activation | -2.12e-01 |
Pos logit contributions | |
offending | +5.832 |
stranger | +5.725 |
bothering | +5.566 |
wrong | +5.552 |
occurring | +5.423 |
Neg logit contributions | |
izons | -11.285 |
Opportun | -10.924 |
�� | -10.857 |
onential | -10.785 |
Flavoring | -10.185 |
Token | , |
Token ID | 11 |
Feature activation | +2.88e-01 |
Pos logit contributions | |
bothering | +1.121 |
offending | +1.116 |
stranger | +1.115 |
involved | +1.092 |
occurring | +1.071 |
Neg logit contributions | |
izons | -1.059 |
Opportun | -1.001 |
onential | -0.967 |
�� | -0.941 |
leaps | -0.935 |
Token | equ |
Token ID | 1602 |
Feature activation | +3.38e+00 |
Pos logit contributions | |
izons | +1.604 |
Opportun | +1.559 |
onential | +1.488 |
leaps | +1.474 |
economies | +1.456 |
Neg logit contributions | |
bothering | -1.304 |
stranger | -1.288 |
involved | -1.288 |
offending | -1.285 |
occurring | -1.217 |
Token | ating |
Token ID | 803 |
Feature activation | +1.04e+00 |
Pos logit contributions | |
izons | +14.083 |
onential | +12.768 |
itud | +12.316 |
ights | +11.856 |
Opportun | +11.827 |
Neg logit contributions | |
bothering | -15.968 |
offending | -15.769 |
stranger | -15.458 |
occurring | -14.952 |
REDACTED | -14.833 |
Token | the |
Token ID | 262 |
Feature activation | +7.34e-01 |
Pos logit contributions | |
izons | +5.690 |
Opportun | +5.561 |
economies | +5.318 |
leaps | +5.309 |
onential | +5.233 |
Neg logit contributions | |
involved | -4.509 |
bothering | -4.453 |
stranger | -4.332 |
offending | -4.328 |
occurring | -4.142 |
Token | self |
Token ID | 2116 |
Feature activation | +2.80e-01 |
Pos logit contributions | |
izons | +4.030 |
Opportun | +3.896 |
onential | +3.694 |
leaps | +3.685 |
economies | +3.628 |
Neg logit contributions | |
bothering | -3.308 |
involved | -3.275 |
offending | -3.229 |
stranger | -3.204 |
occurring | -3.105 |
Token | - |
Token ID | 12 |
Feature activation | -2.42e-01 |
Pos logit contributions | |
izons | +1.589 |
Opportun | +1.515 |
onential | +1.475 |
�� | +1.443 |
leaps | +1.421 |
Neg logit contributions | |
bothering | -1.282 |
offending | -1.277 |
stranger | -1.274 |
involved | -1.240 |
occurring | -1.220 |
Token | support |
Token ID | 11284 |
Feature activation | +5.29e-01 |
Pos logit contributions | |
bothering | +1.202 |
offending | +1.195 |
stranger | +1.194 |
involved | +1.148 |
occurring | +1.146 |
Neg logit contributions | |
izons | -1.287 |
Opportun | -1.226 |
onential | -1.189 |
�� | -1.155 |
leaps | -1.143 |
Token | to |
Token ID | 284 |
Feature activation | +9.35e-01 |
Pos logit contributions | |
izons | +8.687 |
Opportun | +8.283 |
leaps | +7.966 |
onential | +7.954 |
economies | +7.918 |
Neg logit contributions | |
involved | -8.799 |
offending | -8.753 |
bothering | -8.608 |
stranger | -8.559 |
occurring | -8.355 |
Token | deal |
Token ID | 1730 |
Feature activation | +3.99e-01 |
Pos logit contributions | |
izons | +4.316 |
Opportun | +4.209 |
leaps | +4.022 |
economies | +4.004 |
onential | +3.932 |
Neg logit contributions | |
involved | -4.923 |
offending | -4.797 |
bothering | -4.792 |
stranger | -4.733 |
occurring | -4.608 |
Token | with |
Token ID | 351 |
Feature activation | +4.10e-01 |
Pos logit contributions | |
izons | +2.441 |
Opportun | +2.338 |
onential | +2.280 |
�� | +2.245 |
leaps | +2.203 |
Neg logit contributions | |
bothering | -1.654 |
offending | -1.653 |
stranger | -1.650 |
involved | -1.604 |
occurring | -1.578 |
Token | hardship |
Token ID | 30699 |
Feature activation | +7.89e-01 |
Pos logit contributions | |
izons | +2.365 |
Opportun | +2.327 |
onential | +2.223 |
leaps | +2.211 |
economies | +2.210 |
Neg logit contributions | |
involved | -1.736 |
bothering | -1.721 |
offending | -1.713 |
stranger | -1.677 |
pictured | -1.647 |
Token | . |
Token ID | 13 |
Feature activation | +1.25e+00 |
Pos logit contributions | |
izons | +4.166 |
Opportun | +4.118 |
economies | +3.967 |
leaps | +3.953 |
onential | +3.886 |
Neg logit contributions | |
offending | -3.492 |
involved | -3.492 |
bothering | -3.450 |
stranger | -3.417 |
pictured | -3.288 |
Token | We |
Token ID | 775 |
Feature activation | +3.37e+00 |
Pos logit contributions | |
Opportun | +5.203 |
izons | +4.631 |
Strategies | +4.566 |
Areas | +4.350 |
Measures | +4.345 |
Neg logit contributions | |
offending | -7.593 |
bothering | -7.521 |
stranger | -7.401 |
involved | -7.322 |
occurring | -7.103 |
Token | believe |
Token ID | 1975 |
Feature activation | +1.19e+00 |
Pos logit contributions | |
izons | +14.395 |
Opportun | +14.246 |
stride | +14.237 |
economies | +14.155 |
ights | +14.139 |
Neg logit contributions | |
involved | -12.802 |
bothering | -12.676 |
REDACTED | -12.548 |
offending | -12.430 |
stranger | -12.070 |
Token | in |
Token ID | 287 |
Feature activation | +1.28e+00 |
Pos logit contributions | |
izons | +7.122 |
Opportun | +7.106 |
economies | +6.939 |
leaps | +6.876 |
strides | +6.750 |
Neg logit contributions | |
involved | -4.213 |
bothering | -4.156 |
offending | -4.105 |
stranger | -3.957 |
pictured | -3.862 |
Token | equal |
Token ID | 4961 |
Feature activation | +1.34e+00 |
Pos logit contributions | |
izons | +7.246 |
Opportun | +7.211 |
leaps | +6.887 |
economies | +6.833 |
strides | +6.681 |
Neg logit contributions | |
involved | -5.237 |
bothering | -5.071 |
offending | -5.003 |
REDACTED | -4.930 |
stranger | -4.911 |
Token | opportunity |
Token ID | 3663 |
Feature activation | +1.59e+00 |
Pos logit contributions | |
izons | +6.835 |
Opportun | +6.830 |
economies | +6.342 |
leaps | +6.334 |
strides | +6.293 |
Neg logit contributions | |
involved | -6.382 |
bothering | -6.200 |
REDACTED | -6.099 |
offending | -6.035 |
pictured | -5.945 |
Token | that |
Token ID | 326 |
Feature activation | +5.55e-01 |
Pos logit contributions | |
izons | +8.225 |
Opportun | +8.066 |
economies | +7.704 |
leaps | +7.590 |
onential | +7.562 |
Neg logit contributions | |
involved | -7.056 |
bothering | -6.960 |
offending | -6.918 |
stranger | -6.889 |
REDACTED | -6.538 |
Token | of |
Token ID | 286 |
Feature activation | +7.91e-01 |
Pos logit contributions | |
izons | +7.705 |
Opportun | +7.569 |
economies | +7.389 |
leaps | +7.200 |
Strategies | +7.112 |
Neg logit contributions | |
involved | -6.466 |
bothering | -6.298 |
offending | -6.227 |
stranger | -6.158 |
occurring | -6.033 |
Token | increasing |
Token ID | 3649 |
Feature activation | +1.76e+00 |
Pos logit contributions | |
izons | +4.119 |
Opportun | +3.983 |
economies | +3.860 |
leaps | +3.847 |
onential | +3.800 |
Neg logit contributions | |
involved | -3.723 |
bothering | -3.693 |
offending | -3.633 |
stranger | -3.625 |
pictured | -3.551 |
Token | military |
Token ID | 2422 |
Feature activation | +1.58e+00 |
Pos logit contributions | |
izons | +8.866 |
Opportun | +8.666 |
economies | +8.625 |
leaps | +8.557 |
strides | +8.232 |
Neg logit contributions | |
involved | -7.710 |
bothering | -7.650 |
stranger | -7.431 |
offending | -7.373 |
occurring | -7.311 |
Token | bases |
Token ID | 12536 |
Feature activation | +1.42e+00 |
Pos logit contributions | |
izons | +7.307 |
leaps | +7.208 |
economies | +7.086 |
Opportun | +6.957 |
strides | +6.768 |
Neg logit contributions | |
involved | -7.815 |
bothering | -7.650 |
stranger | -7.586 |
occurring | -7.428 |
offending | -7.335 |
Token | … |
Token ID | 1399 |
Feature activation | +9.85e-01 |
Pos logit contributions | |
izons | +7.399 |
Opportun | +7.053 |
leaps | +6.908 |
onential | +6.895 |
economies | +6.847 |
Neg logit contributions | |
stranger | -6.150 |
offending | -6.059 |
bothering | -6.006 |
involved | -5.981 |
pictured | -5.631 |
Token | we |
Token ID | 732 |
Feature activation | +3.34e+00 |
Pos logit contributions | |
izons | +5.076 |
Opportun | +4.888 |
onential | +4.643 |
Strategies | +4.513 |
leaps | +4.440 |
Neg logit contributions | |
bothering | -4.868 |
stranger | -4.775 |
offending | -4.770 |
pictured | -4.587 |
occurring | -4.565 |
Token | must |
Token ID | 1276 |
Feature activation | +2.76e+00 |
Pos logit contributions | |
izons | +11.817 |
akening | +11.242 |
economies | +11.225 |
ights | +10.975 |
stride | +10.862 |
Neg logit contributions | |
bothering | -16.164 |
involved | -16.090 |
offending | -15.823 |
stranger | -15.605 |
REDACTED | -15.422 |
Token | develop |
Token ID | 1205 |
Feature activation | +2.94e+00 |
Pos logit contributions | |
izons | +11.975 |
Opportun | +11.418 |
leaps | +11.255 |
stride | +11.243 |
economies | +11.214 |
Neg logit contributions | |
involved | -12.594 |
bothering | -11.986 |
offending | -11.711 |
stranger | -11.609 |
occurring | -11.268 |
Token | a |
Token ID | 257 |
Feature activation | +1.80e+00 |
Pos logit contributions | |
economies | +14.079 |
izons | +13.790 |
Opportun | +13.582 |
leaps | +13.470 |
Strategies | +12.918 |
Neg logit contributions | |
involved | -12.138 |
bothering | -11.643 |
offending | -11.305 |
stranger | -11.090 |
Owner | -11.049 |
Token | powerful |
Token ID | 3665 |
Feature activation | +2.03e+00 |
Pos logit contributions | |
izons | +8.730 |
Opportun | +8.639 |
economies | +8.355 |
leaps | +8.273 |
Strategies | +8.230 |
Neg logit contributions | |
involved | -8.141 |
bothering | -7.927 |
offending | -7.924 |
stranger | -7.637 |
occurring | -7.458 |
Token | anti |
Token ID | 3098 |
Feature activation | +6.98e-01 |
Pos logit contributions | |
Opportun | +9.576 |
economies | +9.422 |
izons | +9.359 |
leaps | +9.221 |
Strategies | +8.973 |
Neg logit contributions | |
involved | -9.360 |
bothering | -9.037 |
offending | -8.770 |
stranger | -8.675 |
occurring | -8.391 |
Token | � |
Token ID | 447 |
Feature activation | +1.40e+00 |
Pos logit contributions | |
izons | +3.908 |
Opportun | +3.907 |
economies | +3.624 |
onential | +3.614 |
Strategies | +3.595 |
Neg logit contributions | |
stranger | -3.846 |
offending | -3.842 |
bothering | -3.835 |
involved | -3.772 |
pictured | -3.639 |
Token | � |
Token ID | 251 |
Feature activation | -9.15e-01 |
Pos logit contributions | |
izons | +3.706 |
�� | +3.387 |
Opportun | +3.337 |
onential | +3.222 |
Strategies | +2.913 |
Neg logit contributions | |
stranger | -10.111 |
offending | -10.097 |
bothering | -10.094 |
involved | -10.045 |
occurring | -9.781 |
Token | and |
Token ID | 290 |
Feature activation | +9.49e-01 |
Pos logit contributions | |
offending | +4.566 |
bothering | +4.564 |
stranger | +4.553 |
involved | +4.445 |
occurring | +4.419 |
Neg logit contributions | |
izons | -4.834 |
Opportun | -4.546 |
onential | -4.444 |
�� | -4.402 |
leaps | -4.252 |
Token | � |
Token ID | 564 |
Feature activation | +1.64e+00 |
Pos logit contributions | |
izons | +4.670 |
Opportun | +4.631 |
onential | +4.382 |
leaps | +4.341 |
economies | +4.316 |
Neg logit contributions | |
bothering | -4.627 |
involved | -4.605 |
offending | -4.564 |
stranger | -4.563 |
occurring | -4.432 |
Token | � |
Token ID | 250 |
Feature activation | -1.13e-01 |
Pos logit contributions | |
izons | +4.025 |
�� | +3.718 |
Opportun | +3.664 |
onential | +3.525 |
Strategies | +3.236 |
Neg logit contributions | |
stranger | -11.657 |
bothering | -11.596 |
offending | -11.589 |
involved | -11.421 |
pictured | -11.207 |
Token | stre |
Token ID | 22853 |
Feature activation | +3.29e+00 |
Pos logit contributions | |
bothering | +0.569 |
stranger | +0.566 |
offending | +0.566 |
occurring | +0.543 |
pictured | +0.539 |
Neg logit contributions | |
izons | -0.593 |
Opportun | -0.566 |
onential | -0.548 |
�� | -0.529 |
leaps | -0.526 |
Token | ng |
Token ID | 782 |
Feature activation | +1.81e+00 |
Pos logit contributions | |
izons | +8.426 |
onential | +7.467 |
Opportun | +6.890 |
ights | +6.730 |
akening | +6.681 |
Neg logit contributions | |
bothering | -20.196 |
offending | -20.044 |
stranger | -19.976 |
occurring | -19.508 |
REDACTED | -19.478 |
Token | then |
Token ID | 8524 |
Feature activation | +2.76e+00 |
Pos logit contributions | |
izons | +6.823 |
Opportun | +6.131 |
onential | +6.130 |
economies | +5.708 |
Strategies | +5.664 |
Neg logit contributions | |
stranger | -10.831 |
offending | -10.810 |
bothering | -10.613 |
occurring | -10.531 |
involved | -10.493 |
Token | faculty |
Token ID | 12829 |
Feature activation | +1.01e+00 |
Pos logit contributions | |
izons | +13.624 |
Opportun | +13.597 |
economies | +12.913 |
onential | +12.418 |
Strategies | +12.180 |
Neg logit contributions | |
bothering | -10.874 |
stranger | -10.450 |
REDACTED | -10.438 |
involved | -10.407 |
occurring | -10.277 |
Token | tenure |
Token ID | 17081 |
Feature activation | +8.38e-01 |
Pos logit contributions | |
izons | +4.820 |
Opportun | +4.701 |
onential | +4.328 |
economies | +4.319 |
leaps | +4.314 |
Neg logit contributions | |
bothering | -5.382 |
stranger | -5.322 |
offending | -5.267 |
involved | -5.196 |
occurring | -5.117 |
Token | systems |
Token ID | 3341 |
Feature activation | +1.73e+00 |
Pos logit contributions | |
izons | +4.090 |
Opportun | +3.951 |
onential | +3.672 |
leaps | +3.669 |
economies | +3.609 |
Neg logit contributions | |
stranger | -4.377 |
offending | -4.373 |
bothering | -4.360 |
involved | -4.291 |
pictured | -4.141 |
Token | ace |
Token ID | 31506 |
Feature activation | -2.15e-01 |
Pos logit contributions | |
bothering | +2.127 |
offending | +2.122 |
stranger | +2.118 |
involved | +2.080 |
occurring | +2.042 |
Neg logit contributions | |
izons | -2.240 |
Opportun | -2.147 |
onential | -2.062 |
�� | -2.019 |
leaps | -2.000 |
Token | , |
Token ID | 11 |
Feature activation | +1.46e+00 |
Pos logit contributions | |
bothering | +1.003 |
offending | +1.002 |
stranger | +0.996 |
involved | +0.985 |
occurring | +0.966 |
Neg logit contributions | |
izons | -1.191 |
Opportun | -1.153 |
onential | -1.102 |
�� | -1.070 |
Strategies | -1.070 |
Token | generates |
Token ID | 18616 |
Feature activation | +1.90e+00 |
Pos logit contributions | |
izons | +7.055 |
Opportun | +6.942 |
leaps | +6.740 |
economies | +6.680 |
strides | +6.546 |
Neg logit contributions | |
offending | -6.921 |
stranger | -6.901 |
bothering | -6.852 |
involved | -6.797 |
REDACTED | -6.516 |
Token | returns |
Token ID | 5860 |
Feature activation | +1.40e+00 |
Pos logit contributions | |
Opportun | +9.055 |
izons | +8.907 |
leaps | +8.793 |
economies | +8.772 |
strides | +8.483 |
Neg logit contributions | |
involved | -8.703 |
bothering | -8.414 |
offending | -8.325 |
stranger | -8.230 |
REDACTED | -8.053 |
Token | of |
Token ID | 286 |
Feature activation | +1.81e+00 |
Pos logit contributions | |
Opportun | +7.512 |
izons | +7.494 |
leaps | +7.334 |
economies | +7.099 |
onential | +7.077 |
Neg logit contributions | |
offending | -5.879 |
involved | -5.788 |
bothering | -5.737 |
stranger | -5.617 |
REDACTED | -5.473 |
Token | 34 |
Token ID | 4974 |
Feature activation | +3.29e+00 |
Pos logit contributions | |
Opportun | +7.697 |
leaps | +7.612 |
izons | +7.466 |
economies | +7.200 |
strides | +7.020 |
Neg logit contributions | |
involved | -9.327 |
offending | -9.091 |
bothering | -9.069 |
stranger | -8.934 |
REDACTED | -8.821 |
Token | %. |
Token ID | 7225 |
Feature activation | +7.87e-01 |
Pos logit contributions | |
izons | +11.475 |
Opportun | +11.425 |
leaps | +10.621 |
Strategies | +10.617 |
economies | +10.613 |
Neg logit contributions | |
stranger | -16.574 |
offending | -16.355 |
bothering | -16.355 |
involved | -16.249 |
pictured | -15.480 |
Token | Let |
Token ID | 3914 |
Feature activation | +6.25e-01 |
Pos logit contributions | |
Opportun | +3.408 |
izons | +3.270 |
Strategies | +3.099 |
Areas | +2.947 |
onential | +2.921 |
Neg logit contributions | |
offending | -4.656 |
bothering | -4.577 |
stranger | -4.521 |
involved | -4.487 |
occurring | -4.370 |
Token | � |
Token ID | 447 |
Feature activation | +9.14e-01 |
Pos logit contributions | |
izons | +3.739 |
Opportun | +3.610 |
onential | +3.480 |
�� | +3.400 |
leaps | +3.382 |
Neg logit contributions | |
bothering | -2.609 |
offending | -2.607 |
stranger | -2.601 |
involved | -2.566 |
occurring | -2.483 |
Token | � |
Token ID | 247 |
Feature activation | -1.97e+00 |
Pos logit contributions | |
izons | +1.980 |
Opportun | +1.683 |
onential | +1.666 |
�� | +1.626 |
Strategies | +1.432 |
Neg logit contributions | |
offending | -7.212 |
bothering | -7.197 |
involved | -7.135 |
stranger | -7.121 |
occurring | -6.951 |
Token | s |
Token ID | 82 |
Feature activation | +4.17e-01 |
Pos logit contributions | |
stranger | +5.818 |
bothering | +5.761 |
offending | +5.609 |
occurring | +5.558 |
pictured | +5.536 |
Neg logit contributions | |
izons | -13.573 |
�� | -13.133 |
Opportun | -13.018 |
onential | -12.837 |
Flavoring | -12.243 |
Token | � |
Token ID | 564 |
Feature activation | +1.70e-01 |
Pos logit contributions | |
izons | +0.541 |
Opportun | +0.519 |
onential | +0.498 |
leaps | +0.484 |
Strategies | +0.482 |
Neg logit contributions | |
bothering | -0.443 |
offending | -0.443 |
stranger | -0.443 |
involved | -0.436 |
occurring | -0.423 |
Token | � |
Token ID | 250 |
Feature activation | -1.81e+00 |
Pos logit contributions | |
izons | +0.570 |
Opportun | +0.527 |
onential | +0.501 |
�� | +0.499 |
leaps | +0.464 |
Neg logit contributions | |
stranger | -1.172 |
offending | -1.171 |
bothering | -1.171 |
involved | -1.154 |
occurring | -1.137 |
Token | Israel |
Token ID | 14040 |
Feature activation | +1.27e+00 |
Pos logit contributions | |
involved | +8.810 |
stranger | +8.759 |
offending | +8.750 |
bothering | +8.718 |
occurring | +8.495 |
Neg logit contributions | |
izons | -9.126 |
�� | -8.797 |
Opportun | -8.720 |
onential | -8.580 |
leaps | -8.211 |
Token | has |
Token ID | 468 |
Feature activation | +1.68e+00 |
Pos logit contributions | |
izons | +6.504 |
Opportun | +6.238 |
onential | +5.859 |
leaps | +5.854 |
Strategies | +5.825 |
Neg logit contributions | |
stranger | -6.063 |
bothering | -6.050 |
offending | -6.017 |
involved | -5.826 |
occurring | -5.715 |
Token | always |
Token ID | 1464 |
Feature activation | +8.99e-01 |
Pos logit contributions | |
izons | +9.914 |
Opportun | +9.718 |
leaps | +9.217 |
economies | +9.212 |
onential | +9.127 |
Neg logit contributions | |
involved | -6.077 |
offending | -6.038 |
bothering | -5.972 |
stranger | -5.921 |
occurring | -5.578 |
Token | stri |
Token ID | 2338 |
Feature activation | +3.29e+00 |
Pos logit contributions | |
izons | +5.289 |
Opportun | +5.122 |
leaps | +4.922 |
economies | +4.884 |
strides | +4.881 |
Neg logit contributions | |
stranger | -3.532 |
offending | -3.521 |
bothering | -3.491 |
involved | -3.479 |
occurring | -3.270 |
Token | ved |
Token ID | 1079 |
Feature activation | +1.92e+00 |
Pos logit contributions | |
izons | +10.298 |
ights | +9.045 |
Opportun | +9.015 |
ighed | +8.617 |
onential | +8.483 |
Neg logit contributions | |
offending | -18.382 |
bothering | -17.857 |
stranger | -17.790 |
involved | -17.601 |
occurring | -17.478 |
Token | for |
Token ID | 329 |
Feature activation | +1.03e+00 |
Pos logit contributions | |
izons | +9.252 |
Opportun | +8.873 |
onential | +8.514 |
leaps | +8.343 |
Strategies | +8.228 |
Neg logit contributions | |
offending | -9.212 |
involved | -9.147 |
bothering | -9.016 |
stranger | -9.004 |
occurring | -8.723 |
Token | stable |
Token ID | 8245 |
Feature activation | +1.01e+00 |
Pos logit contributions | |
izons | +5.461 |
Opportun | +5.234 |
leaps | +4.994 |
onential | +4.953 |
economies | +4.923 |
Neg logit contributions | |
involved | -4.801 |
offending | -4.799 |
bothering | -4.795 |
stranger | -4.713 |
pictured | -4.524 |
Token | relations |
Token ID | 2316 |
Feature activation | +7.46e-01 |
Pos logit contributions | |
izons | +4.106 |
Opportun | +3.837 |
economies | +3.613 |
onential | +3.524 |
leaps | +3.519 |
Neg logit contributions | |
bothering | -6.047 |
involved | -6.010 |
offending | -5.921 |
stranger | -5.911 |
occurring | -5.788 |
Token | with |
Token ID | 351 |
Feature activation | -1.01e-01 |
Pos logit contributions | |
izons | +4.117 |
Opportun | +3.980 |
onential | +3.822 |
Strategies | +3.701 |
leaps | +3.698 |
Neg logit contributions | |
offending | -3.418 |
stranger | -3.413 |
bothering | -3.400 |
involved | -3.332 |
pictured | -3.204 |
Token | � |
Token ID | 447 |
Feature activation | +1.17e+00 |
Pos logit contributions | |
izons | +2.531 |
Opportun | +2.460 |
onential | +2.368 |
economies | +2.334 |
leaps | +2.317 |
Neg logit contributions | |
stranger | -1.864 |
bothering | -1.848 |
involved | -1.838 |
offending | -1.821 |
occurring | -1.755 |
Token | � |
Token ID | 247 |
Feature activation | -1.21e+00 |
Pos logit contributions | |
izons | +2.891 |
�� | +2.715 |
Opportun | +2.607 |
onential | +2.477 |
Strategies | +2.248 |
Neg logit contributions | |
offending | -8.672 |
bothering | -8.669 |
stranger | -8.656 |
involved | -8.633 |
pictured | -8.428 |
Token | s |
Token ID | 82 |
Feature activation | -3.30e-01 |
Pos logit contributions | |
offending | +6.257 |
bothering | +6.221 |
stranger | +6.209 |
involved | +6.060 |
occurring | +6.043 |
Neg logit contributions | |
izons | -6.119 |
Opportun | -5.734 |
onential | -5.604 |
�� | -5.567 |
leaps | -5.335 |
Token | rights |
Token ID | 2489 |
Feature activation | +1.27e+00 |
Pos logit contributions | |
bothering | +1.542 |
stranger | +1.538 |
offending | +1.534 |
involved | +1.502 |
occurring | +1.474 |
Neg logit contributions | |
izons | -1.851 |
Opportun | -1.772 |
onential | -1.716 |
�� | -1.680 |
leaps | -1.659 |
Token | . |
Token ID | 13 |
Feature activation | +1.07e+00 |
Pos logit contributions | |
izons | +5.969 |
Opportun | +5.880 |
onential | +5.564 |
economies | +5.543 |
leaps | +5.514 |
Neg logit contributions | |
stranger | -6.300 |
bothering | -6.244 |
offending | -6.218 |
involved | -6.137 |
pictured | -5.980 |
Token | We |
Token ID | 775 |
Feature activation | +3.28e+00 |
Pos logit contributions | |
Opportun | +4.690 |
izons | +4.502 |
Strategies | +4.272 |
onential | +4.165 |
Measures | +4.064 |
Neg logit contributions | |
offending | -5.974 |
stranger | -5.951 |
bothering | -5.928 |
involved | -5.775 |
pictured | -5.646 |
Token | � |
Token ID | 447 |
Feature activation | +1.73e+00 |
Pos logit contributions | |
izons | +15.781 |
ights | +14.654 |
Opportun | +14.576 |
economies | +14.552 |
akening | +14.323 |
Neg logit contributions | |
offending | -11.607 |
bothering | -11.603 |
stranger | -11.494 |
involved | -10.991 |
REDACTED | -10.869 |
Token | � |
Token ID | 247 |
Feature activation | -4.26e-01 |
Pos logit contributions | |
izons | +3.036 |
�� | +2.919 |
� | +2.642 |
Opportun | +2.490 |
onential | +2.405 |
Neg logit contributions | |
offending | -13.413 |
bothering | -13.394 |
involved | -13.315 |
stranger | -13.103 |
occurring | -12.952 |
Token | re |
Token ID | 260 |
Feature activation | +3.50e-01 |
Pos logit contributions | |
bothering | +2.765 |
offending | +2.764 |
stranger | +2.762 |
involved | +2.705 |
occurring | +2.681 |
Neg logit contributions | |
izons | -1.617 |
Opportun | -1.501 |
onential | -1.441 |
�� | -1.404 |
leaps | -1.357 |
Token | now |
Token ID | 783 |
Feature activation | +5.71e-01 |
Pos logit contributions | |
izons | +2.389 |
Opportun | +2.302 |
onential | +2.237 |
leaps | +2.224 |
�� | +2.181 |
Neg logit contributions | |
stranger | -1.140 |
offending | -1.120 |
bothering | -1.119 |
involved | -1.092 |
occurring | -1.042 |
Token | seeing |
Token ID | 4379 |
Feature activation | +1.23e+00 |
Pos logit contributions | |
izons | +3.870 |
Opportun | +3.737 |
leaps | +3.681 |
onential | +3.643 |
economies | +3.566 |
Neg logit contributions | |
stranger | -1.793 |
bothering | -1.756 |
offending | -1.740 |
involved | -1.726 |
occurring | -1.627 |
Token | talent |
Token ID | 7401 |
Feature activation | +9.06e-01 |
Pos logit contributions | |
izons | +8.027 |
Opportun | +7.616 |
leaps | +7.332 |
strides | +7.294 |
economies | +6.978 |
Neg logit contributions | |
involved | -8.672 |
offending | -8.504 |
stranger | -8.307 |
bothering | -8.299 |
pictured | -8.122 |
Token | on |
Token ID | 319 |
Feature activation | +6.60e-01 |
Pos logit contributions | |
izons | +4.953 |
Opportun | +4.759 |
leaps | +4.619 |
onential | +4.520 |
strides | +4.494 |
Neg logit contributions | |
offending | -4.022 |
stranger | -3.946 |
bothering | -3.883 |
involved | -3.864 |
pictured | -3.677 |
Token | this |
Token ID | 428 |
Feature activation | -2.51e-01 |
Pos logit contributions | |
izons | +3.630 |
Opportun | +3.501 |
onential | +3.354 |
leaps | +3.311 |
Strategies | +3.233 |
Neg logit contributions | |
offending | -3.073 |
bothering | -3.065 |
stranger | -3.027 |
involved | -3.010 |
occurring | -2.890 |
Token | team |
Token ID | 1074 |
Feature activation | +1.17e-01 |
Pos logit contributions | |
bothering | +1.125 |
offending | +1.115 |
stranger | +1.113 |
involved | +1.107 |
occurring | +1.070 |
Neg logit contributions | |
izons | -1.443 |
Opportun | -1.384 |
onential | -1.339 |
leaps | -1.309 |
�� | -1.289 |
Token | . |
Token ID | 13 |
Feature activation | +9.04e-01 |
Pos logit contributions | |
izons | +0.596 |
Opportun | +0.570 |
onential | +0.549 |
leaps | +0.535 |
�� | +0.529 |
Neg logit contributions | |
offending | -0.595 |
stranger | -0.593 |
bothering | -0.593 |
involved | -0.579 |
occurring | -0.567 |
Token | We |
Token ID | 775 |
Feature activation | +3.28e+00 |
Pos logit contributions | |
Opportun | +4.016 |
izons | +3.770 |
Strategies | +3.590 |
Areas | +3.438 |
onential | +3.413 |
Neg logit contributions | |
offending | -5.337 |
bothering | -5.199 |
stranger | -5.140 |
involved | -5.082 |
occurring | -4.963 |
Token | have |
Token ID | 423 |
Feature activation | +1.82e+00 |
Pos logit contributions | |
izons | +14.794 |
Opportun | +14.066 |
stride | +13.524 |
strides | +13.468 |
ights | +13.332 |
Neg logit contributions | |
offending | -12.932 |
bothering | -12.501 |
stranger | -12.419 |
involved | -12.403 |
REDACTED | -12.265 |
Token | what |
Token ID | 644 |
Feature activation | +6.62e-01 |
Pos logit contributions | |
izons | +10.138 |
Opportun | +10.042 |
leaps | +9.326 |
strides | +9.212 |
onential | +9.203 |
Neg logit contributions | |
involved | -7.168 |
offending | -7.108 |
bothering | -7.075 |
stranger | -6.955 |
occurring | -6.835 |
Token | it |
Token ID | 340 |
Feature activation | +1.02e+00 |
Pos logit contributions | |
izons | +4.254 |
Opportun | +4.189 |
leaps | +4.002 |
onential | +3.995 |
�� | +3.915 |
Neg logit contributions | |
offending | -2.439 |
bothering | -2.380 |
stranger | -2.357 |
involved | -2.299 |
occurring | -2.228 |
Token | takes |
Token ID | 2753 |
Feature activation | +1.01e+00 |
Pos logit contributions | |
izons | +3.854 |
leaps | +3.667 |
Opportun | +3.647 |
economies | +3.455 |
strides | +3.453 |
Neg logit contributions | |
offending | -6.240 |
bothering | -6.160 |
involved | -6.130 |
stranger | -6.092 |
occurring | -5.890 |
Token | to |
Token ID | 284 |
Feature activation | +1.09e+00 |
Pos logit contributions | |
izons | +5.020 |
Opportun | +4.837 |
onential | +4.586 |
leaps | +4.542 |
Strategies | +4.459 |
Neg logit contributions | |
offending | -5.087 |
bothering | -5.024 |
stranger | -5.020 |
involved | -5.011 |
occurring | -4.810 |
Token | Martin |
Token ID | 5780 |
Feature activation | -4.23e-01 |
Pos logit contributions | |
stranger | +4.049 |
offending | +4.015 |
bothering | +4.012 |
involved | +3.904 |
occurring | +3.882 |
Neg logit contributions | |
izons | -3.922 |
Opportun | -3.677 |
onential | -3.598 |
�� | -3.572 |
leaps | -3.438 |
Token | , |
Token ID | 11 |
Feature activation | +2.79e-01 |
Pos logit contributions | |
bothering | +2.123 |
stranger | +2.121 |
offending | +2.118 |
involved | +2.049 |
occurring | +2.047 |
Neg logit contributions | |
izons | -2.223 |
Opportun | -2.098 |
onential | -2.067 |
�� | -2.030 |
leaps | -1.964 |
Token | � |
Token ID | 564 |
Feature activation | +5.35e-01 |
Pos logit contributions | |
izons | +1.703 |
Opportun | +1.670 |
onential | +1.607 |
leaps | +1.581 |
Strategies | +1.572 |
Neg logit contributions | |
bothering | -1.093 |
stranger | -1.092 |
offending | -1.084 |
involved | -1.078 |
occurring | -1.032 |
Token | � |
Token ID | 250 |
Feature activation | -1.84e+00 |
Pos logit contributions | |
izons | +1.627 |
Opportun | +1.510 |
�� | +1.468 |
onential | +1.457 |
Strategies | +1.329 |
Neg logit contributions | |
bothering | -3.758 |
stranger | -3.755 |
offending | -3.753 |
involved | -3.715 |
occurring | -3.640 |
Token | and |
Token ID | 392 |
Feature activation | -1.18e-01 |
Pos logit contributions | |
stranger | +10.004 |
involved | +9.977 |
bothering | +9.937 |
offending | +9.919 |
occurring | +9.638 |
Neg logit contributions | |
izons | -8.402 |
�� | -7.828 |
Opportun | -7.778 |
onential | -7.599 |
leaps | -7.243 |
Token | we |
Token ID | 356 |
Feature activation | +3.27e+00 |
Pos logit contributions | |
bothering | +0.490 |
offending | +0.490 |
stranger | +0.487 |
involved | +0.477 |
occurring | +0.467 |
Neg logit contributions | |
izons | -0.715 |
Opportun | -0.695 |
onential | -0.669 |
�� | -0.651 |
leaps | -0.649 |
Token | get |
Token ID | 651 |
Feature activation | +6.37e-01 |
Pos logit contributions | |
izons | +14.718 |
Opportun | +14.284 |
economies | +13.684 |
ights | +13.523 |
onential | +13.421 |
Neg logit contributions | |
offending | -12.353 |
bothering | -12.334 |
stranger | -12.134 |
involved | -11.770 |
occurring | -11.601 |
Token | a |
Token ID | 257 |
Feature activation | +1.01e+00 |
Pos logit contributions | |
izons | +3.598 |
Opportun | +3.506 |
onential | +3.340 |
economies | +3.293 |
leaps | +3.293 |
Neg logit contributions | |
bothering | -2.789 |
offending | -2.781 |
stranger | -2.778 |
occurring | -2.701 |
involved | -2.669 |
Token | little |
Token ID | 1310 |
Feature activation | +3.04e-02 |
Pos logit contributions | |
Opportun | +4.810 |
izons | +4.810 |
leaps | +4.486 |
onential | +4.471 |
economies | +4.451 |
Neg logit contributions | |
offending | -5.061 |
bothering | -4.997 |
involved | -4.896 |
stranger | -4.824 |
occurring | -4.776 |
Token | frustrated |
Token ID | 14718 |
Feature activation | +1.49e+00 |
Pos logit contributions | |
izons | +0.162 |
Opportun | +0.156 |
onential | +0.148 |
leaps | +0.147 |
economies | +0.145 |
Neg logit contributions | |
bothering | -0.147 |
offending | -0.147 |
stranger | -0.145 |
involved | -0.143 |
occurring | -0.143 |
Token | when |
Token ID | 618 |
Feature activation | -8.44e-01 |
Pos logit contributions | |
Opportun | +8.681 |
izons | +8.585 |
leaps | +8.453 |
onential | +8.393 |
economies | +8.335 |
Neg logit contributions | |
involved | -5.090 |
stranger | -5.090 |
bothering | -4.987 |
offending | -4.955 |
pictured | -4.753 |
Token | it |
Token ID | 340 |
Feature activation | +1.02e+00 |
Pos logit contributions | |
izons | +4.254 |
Opportun | +4.189 |
leaps | +4.002 |
onential | +3.995 |
�� | +3.915 |
Neg logit contributions | |
offending | -2.439 |
bothering | -2.380 |
stranger | -2.357 |
involved | -2.299 |
occurring | -2.228 |
Token | takes |
Token ID | 2753 |
Feature activation | +1.01e+00 |
Pos logit contributions | |
izons | +3.854 |
leaps | +3.667 |
Opportun | +3.647 |
economies | +3.455 |
strides | +3.453 |
Neg logit contributions | |
offending | -6.240 |
bothering | -6.160 |
involved | -6.130 |
stranger | -6.092 |
occurring | -5.890 |
Token | to |
Token ID | 284 |
Feature activation | +1.09e+00 |
Pos logit contributions | |
izons | +5.020 |
Opportun | +4.837 |
onential | +4.586 |
leaps | +4.542 |
Strategies | +4.459 |
Neg logit contributions | |
offending | -5.087 |
bothering | -5.024 |
stranger | -5.020 |
involved | -5.011 |
occurring | -4.810 |
Token | win |
Token ID | 1592 |
Feature activation | +1.06e+00 |
Pos logit contributions | |
izons | +4.892 |
Opportun | +4.709 |
leaps | +4.522 |
strides | +4.451 |
onential | +4.427 |
Neg logit contributions | |
involved | -5.888 |
offending | -5.711 |
bothering | -5.702 |
stranger | -5.633 |
occurring | -5.480 |
Token | . |
Token ID | 13 |
Feature activation | +9.45e-01 |
Pos logit contributions | |
izons | +5.418 |
Opportun | +5.292 |
onential | +4.998 |
leaps | +4.992 |
strides | +4.804 |
Neg logit contributions | |
offending | -5.123 |
bothering | -5.075 |
involved | -5.067 |
stranger | -4.981 |
occurring | -4.833 |
Token | We |
Token ID | 775 |
Feature activation | +3.27e+00 |
Pos logit contributions | |
Opportun | +3.952 |
izons | +3.671 |
Strategies | +3.506 |
Areas | +3.336 |
onential | +3.326 |
Neg logit contributions | |
offending | -5.815 |
bothering | -5.696 |
stranger | -5.622 |
involved | -5.553 |
occurring | -5.444 |
Token | have |
Token ID | 423 |
Feature activation | +1.61e+00 |
Pos logit contributions | |
izons | +14.048 |
Opportun | +13.252 |
stride | +12.841 |
strides | +12.784 |
ights | +12.554 |
Neg logit contributions | |
offending | -13.898 |
bothering | -13.558 |
involved | -13.335 |
stranger | -13.316 |
REDACTED | -13.073 |
Token | great |
Token ID | 1049 |
Feature activation | +1.56e+00 |
Pos logit contributions | |
izons | +9.174 |
Opportun | +9.093 |
leaps | +8.480 |
onential | +8.422 |
strides | +8.372 |
Neg logit contributions | |
involved | -6.338 |
offending | -6.297 |
bothering | -6.292 |
stranger | -6.196 |
occurring | -6.050 |
Token | coaches |
Token ID | 11070 |
Feature activation | +5.68e-01 |
Pos logit contributions | |
izons | +7.196 |
Opportun | +7.048 |
leaps | +6.734 |
strides | +6.667 |
economies | +6.491 |
Neg logit contributions | |
involved | -7.967 |
bothering | -7.930 |
offending | -7.792 |
stranger | -7.672 |
occurring | -7.577 |
Token | , |
Token ID | 11 |
Feature activation | +2.14e-01 |
Pos logit contributions | |
izons | +2.876 |
Opportun | +2.784 |
onential | +2.665 |
leaps | +2.620 |
Strategies | +2.568 |
Neg logit contributions | |
offending | -2.829 |
stranger | -2.817 |
bothering | -2.801 |
involved | -2.735 |
occurring | -2.690 |
Token | and |
Token ID | 290 |
Feature activation | -1.30e-01 |
Pos logit contributions | |
izons | +1.075 |
Opportun | +1.032 |
onential | +0.983 |
leaps | +0.969 |
�� | +0.948 |
Neg logit contributions | |
offending | -1.114 |
bothering | -1.112 |
stranger | -1.101 |
involved | -1.089 |
occurring | -1.065 |
Token | . |
Token ID | 13 |
Feature activation | +1.57e+00 |
Pos logit contributions | |
izons | +6.860 |
Opportun | +6.715 |
economies | +6.516 |
onential | +6.358 |
leaps | +6.263 |
Neg logit contributions | |
stranger | -6.459 |
bothering | -6.338 |
offending | -6.296 |
involved | -6.250 |
REDACTED | -5.939 |
Token | Most |
Token ID | 4042 |
Feature activation | +8.07e-01 |
Pos logit contributions | |
Opportun | +5.989 |
izons | +5.530 |
Strategies | +5.421 |
Areas | +5.112 |
Measures | +4.993 |
Neg logit contributions | |
offending | -9.653 |
bothering | -9.640 |
stranger | -9.562 |
involved | -9.333 |
occurring | -9.076 |
Token | importantly |
Token ID | 11003 |
Feature activation | +8.12e-01 |
Pos logit contributions | |
izons | +3.833 |
Opportun | +3.813 |
economies | +3.562 |
leaps | +3.532 |
onential | +3.510 |
Neg logit contributions | |
bothering | -4.136 |
stranger | -4.115 |
offending | -4.109 |
involved | -4.087 |
occurring | -3.918 |
Token | however |
Token ID | 2158 |
Feature activation | -1.93e-01 |
Pos logit contributions | |
izons | +4.157 |
Opportun | +3.968 |
onential | +3.805 |
leaps | +3.691 |
Strategies | +3.679 |
Neg logit contributions | |
bothering | -4.047 |
offending | -4.029 |
stranger | -4.025 |
involved | -3.959 |
occurring | -3.838 |
Token | , |
Token ID | 11 |
Feature activation | +9.21e-01 |
Pos logit contributions | |
stranger | +0.932 |
offending | +0.929 |
bothering | +0.928 |
occurring | +0.903 |
involved | +0.899 |
Neg logit contributions | |
izons | -1.060 |
Opportun | -1.007 |
onential | -0.985 |
�� | -0.976 |
leaps | -0.939 |
Token | we |
Token ID | 356 |
Feature activation | +3.26e+00 |
Pos logit contributions | |
izons | +5.608 |
Opportun | +5.557 |
economies | +5.293 |
leaps | +5.289 |
onential | +5.260 |
Neg logit contributions | |
involved | -3.428 |
bothering | -3.387 |
stranger | -3.328 |
offending | -3.323 |
pictured | -3.129 |
Token | can |
Token ID | 460 |
Feature activation | +2.27e+00 |
Pos logit contributions | |
Opportun | +12.313 |
izons | +12.294 |
economies | +11.964 |
stride | +11.675 |
strive | +11.617 |
Neg logit contributions | |
involved | -15.089 |
bothering | -14.763 |
offending | -14.686 |
REDACTED | -14.618 |
stranger | -14.399 |
Token | restore |
Token ID | 11169 |
Feature activation | +1.42e+00 |
Pos logit contributions | |
izons | +9.951 |
Opportun | +9.780 |
leaps | +9.490 |
economies | +9.489 |
stride | +9.410 |
Neg logit contributions | |
involved | -10.896 |
bothering | -10.565 |
offending | -10.352 |
stranger | -10.273 |
REDACTED | -9.973 |
Token | safe |
Token ID | 3338 |
Feature activation | +7.33e-01 |
Pos logit contributions | |
izons | +7.214 |
Opportun | +7.156 |
economies | +6.980 |
leaps | +6.682 |
onential | +6.586 |
Neg logit contributions | |
bothering | -6.512 |
involved | -6.484 |
stranger | -6.318 |
offending | -6.171 |
REDACTED | -6.061 |
Token | and |
Token ID | 290 |
Feature activation | -4.60e-01 |
Pos logit contributions | |
izons | +3.472 |
Opportun | +3.280 |
onential | +3.131 |
economies | +3.073 |
leaps | +3.052 |
Neg logit contributions | |
bothering | -3.972 |
involved | -3.949 |
stranger | -3.929 |
offending | -3.922 |
occurring | -3.772 |
Token | healthy |
Token ID | 5448 |
Feature activation | +8.10e-01 |
Pos logit contributions | |
bothering | +2.417 |
offending | +2.414 |
stranger | +2.411 |
involved | +2.363 |
occurring | +2.323 |
Neg logit contributions | |
izons | -2.314 |
Opportun | -2.199 |
onential | -2.128 |
�� | -2.076 |
leaps | -2.044 |
Token | insisted |
Token ID | 11189 |
Feature activation | -7.33e-01 |
Pos logit contributions | |
izons | +4.792 |
Opportun | +4.567 |
onential | +4.336 |
Strategies | +4.309 |
leaps | +4.301 |
Neg logit contributions | |
bothering | -4.312 |
offending | -4.243 |
stranger | -4.210 |
involved | -4.073 |
occurring | -4.070 |
Token | that |
Token ID | 326 |
Feature activation | -1.94e+00 |
Pos logit contributions | |
bothering | +2.897 |
offending | +2.896 |
stranger | +2.890 |
involved | +2.804 |
occurring | +2.757 |
Neg logit contributions | |
izons | -4.644 |
Opportun | -4.456 |
onential | -4.344 |
�� | -4.283 |
leaps | -4.206 |
Token | his |
Token ID | 465 |
Feature activation | -1.10e+00 |
Pos logit contributions | |
stranger | +9.407 |
bothering | +9.351 |
offending | +9.299 |
occurring | +9.039 |
pictured | +8.870 |
Neg logit contributions | |
izons | -10.174 |
�� | -9.531 |
onential | -9.502 |
Opportun | -9.476 |
Flavoring | -8.766 |
Token | friend |
Token ID | 1545 |
Feature activation | -2.54e+00 |
Pos logit contributions | |
stranger | +4.597 |
offending | +4.589 |
bothering | +4.564 |
involved | +4.391 |
occurring | +4.357 |
Neg logit contributions | |
izons | -6.729 |
Opportun | -6.429 |
onential | -6.310 |
�� | -6.257 |
leaps | -6.015 |
Token | was |
Token ID | 373 |
Feature activation | -3.04e+00 |
Pos logit contributions | |
stranger | +10.672 |
offending | +10.532 |
bothering | +10.506 |
pictured | +10.461 |
wrong | +10.322 |
Neg logit contributions | |
izons | -14.096 |
onential | -13.492 |
�� | -13.296 |
Opportun | -13.100 |
Flavoring | -12.352 |
Token | not |
Token ID | 407 |
Feature activation | -4.28e+00 |
Pos logit contributions | |
stranger | +13.053 |
bothering | +13.042 |
wrong | +12.828 |
offending | +12.795 |
someone | +12.627 |
Neg logit contributions | |
izons | -16.503 |
�� | -15.651 |
onential | -15.624 |
Opportun | -15.239 |
Flavoring | -14.333 |
Token | involved |
Token ID | 2950 |
Feature activation | -1.57e+00 |
Pos logit contributions | |
bothering | +18.050 |
stranger | +17.577 |
someone | +17.457 |
wrong | +17.412 |
offending | +17.267 |
Neg logit contributions | |
izons | -20.458 |
�� | -20.159 |
onential | -19.574 |
Opportun | -18.677 |
Flavoring | -18.043 |
Token | in |
Token ID | 287 |
Feature activation | -1.38e+00 |
Pos logit contributions | |
stranger | +5.844 |
bothering | +5.831 |
offending | +5.827 |
involved | +5.640 |
occurring | +5.585 |
Neg logit contributions | |
izons | -9.942 |
Opportun | -9.452 |
onential | -9.312 |
�� | -9.201 |
leaps | -8.871 |
Token | the |
Token ID | 262 |
Feature activation | -1.52e+00 |
Pos logit contributions | |
offending | +5.910 |
bothering | +5.897 |
stranger | +5.844 |
involved | +5.649 |
occurring | +5.617 |
Neg logit contributions | |
izons | -8.296 |
Opportun | -7.746 |
�� | -7.715 |
onential | -7.704 |
leaps | -7.281 |
Token | bombings |
Token ID | 26579 |
Feature activation | -2.10e+00 |
Pos logit contributions | |
offending | +7.602 |
stranger | +7.582 |
bothering | +7.485 |
involved | +7.245 |
occurring | +7.234 |
Neg logit contributions | |
izons | -8.115 |
�� | -7.537 |
Opportun | -7.474 |
onential | -7.462 |
leaps | -6.937 |
Token | and |
Token ID | 290 |
Feature activation | -1.29e+00 |
Pos logit contributions | |
offending | +9.472 |
bothering | +9.428 |
stranger | +9.369 |
occurring | +9.345 |
involved | +9.053 |
Neg logit contributions | |
izons | -11.754 |
Opportun | -10.805 |
�� | -10.759 |
onential | -10.704 |
Strategies | -9.948 |
Token | which |
Token ID | 543 |
Feature activation | -1.27e+00 |
Pos logit contributions | |
bothering | +1.114 |
stranger | +1.112 |
offending | +1.111 |
involved | +1.090 |
occurring | +1.062 |
Neg logit contributions | |
izons | -1.210 |
Opportun | -1.165 |
onential | -1.110 |
�� | -1.090 |
leaps | -1.084 |
Token | proves |
Token ID | 17021 |
Feature activation | -1.16e+00 |
Pos logit contributions | |
offending | +6.109 |
stranger | +6.072 |
bothering | +6.060 |
involved | +5.935 |
occurring | +5.850 |
Neg logit contributions | |
izons | -6.906 |
Opportun | -6.498 |
onential | -6.441 |
�� | -6.320 |
leaps | -6.020 |
Token | that |
Token ID | 326 |
Feature activation | -1.54e+00 |
Pos logit contributions | |
bothering | +4.889 |
offending | +4.889 |
stranger | +4.881 |
involved | +4.700 |
occurring | +4.676 |
Neg logit contributions | |
izons | -7.058 |
Opportun | -6.659 |
onential | -6.568 |
�� | -6.491 |
leaps | -6.263 |
Token | McCarthy |
Token ID | 18751 |
Feature activation | -1.29e+00 |
Pos logit contributions | |
offending | +7.566 |
bothering | +7.531 |
stranger | +7.527 |
occurring | +7.261 |
involved | +7.253 |
Neg logit contributions | |
izons | -8.201 |
Opportun | -7.600 |
onential | -7.569 |
�� | -7.504 |
leaps | -7.089 |
Token | was |
Token ID | 373 |
Feature activation | -2.73e+00 |
Pos logit contributions | |
offending | +5.719 |
bothering | +5.704 |
stranger | +5.695 |
involved | +5.499 |
occurring | +5.498 |
Neg logit contributions | |
izons | -7.419 |
Opportun | -7.003 |
onential | -6.915 |
�� | -6.817 |
leaps | -6.529 |
Token | not |
Token ID | 407 |
Feature activation | -4.19e+00 |
Pos logit contributions | |
bothering | +11.873 |
offending | +11.793 |
stranger | +11.604 |
wrong | +11.495 |
pictured | +11.423 |
Neg logit contributions | |
izons | -15.136 |
onential | -14.106 |
�� | -14.099 |
Opportun | -14.089 |
Flavoring | -13.142 |
Token | responsible |
Token ID | 4497 |
Feature activation | -1.04e+00 |
Pos logit contributions | |
bothering | +18.291 |
offending | +17.620 |
pictured | +17.335 |
wrong | +17.280 |
stranger | +17.246 |
Neg logit contributions | |
izons | -19.665 |
�� | -18.763 |
onential | -18.535 |
Opportun | -18.107 |
Flavoring | -17.187 |
Token | for |
Token ID | 329 |
Feature activation | -1.52e+00 |
Pos logit contributions | |
bothering | +4.367 |
offending | +4.364 |
stranger | +4.353 |
involved | +4.196 |
occurring | +4.178 |
Neg logit contributions | |
izons | -6.318 |
Opportun | -6.030 |
onential | -5.909 |
�� | -5.856 |
leaps | -5.653 |
Token | the |
Token ID | 262 |
Feature activation | -1.44e+00 |
Pos logit contributions | |
offending | +7.308 |
bothering | +7.266 |
stranger | +7.183 |
occurring | +6.993 |
involved | +6.919 |
Neg logit contributions | |
izons | -8.412 |
Opportun | -7.777 |
onential | -7.664 |
�� | -7.607 |
leaps | -7.213 |
Token | bone |
Token ID | 9970 |
Feature activation | -1.61e+00 |
Pos logit contributions | |
offending | +6.366 |
stranger | +6.327 |
bothering | +6.262 |
involved | +6.065 |
occurring | +6.063 |
Neg logit contributions | |
izons | -8.474 |
Opportun | -7.950 |
onential | -7.813 |
�� | -7.770 |
Strategies | -7.380 |
Token | fractures |
Token ID | 39381 |
Feature activation | -1.71e+00 |
Pos logit contributions | |
bothering | +11.477 |
offending | +11.468 |
stranger | +11.364 |
occurring | +11.230 |
involved | +11.146 |
Neg logit contributions | |
izons | -5.011 |
Opportun | -4.478 |
�� | -4.342 |
onential | -4.212 |
Strategies | -3.756 |
Token | way |
Token ID | 835 |
Feature activation | -5.87e-01 |
Pos logit contributions | |
izons | +1.597 |
Opportun | +1.581 |
onential | +1.517 |
Strategies | +1.505 |
leaps | +1.502 |
Neg logit contributions | |
bothering | -1.058 |
involved | -1.049 |
offending | -1.046 |
stranger | -1.032 |
occurring | -1.006 |
Token | to |
Token ID | 284 |
Feature activation | -5.04e-01 |
Pos logit contributions | |
bothering | +2.819 |
offending | +2.818 |
stranger | +2.816 |
involved | +2.740 |
occurring | +2.704 |
Neg logit contributions | |
izons | -3.222 |
Opportun | -3.067 |
onential | -2.981 |
�� | -2.909 |
leaps | -2.871 |
Token | diagnose |
Token ID | 37489 |
Feature activation | -1.11e+00 |
Pos logit contributions | |
bothering | +2.155 |
offending | +2.152 |
stranger | +2.150 |
involved | +2.101 |
occurring | +2.061 |
Neg logit contributions | |
izons | -3.013 |
Opportun | -2.890 |
onential | -2.812 |
�� | -2.751 |
leaps | -2.725 |
Token | what |
Token ID | 644 |
Feature activation | -1.39e+00 |
Pos logit contributions | |
offending | +4.676 |
bothering | +4.666 |
stranger | +4.665 |
occurring | +4.465 |
involved | +4.464 |
Neg logit contributions | |
izons | -6.740 |
Opportun | -6.387 |
�� | -6.323 |
onential | -6.265 |
leaps | -6.002 |
Token | is |
Token ID | 318 |
Feature activation | -2.70e+00 |
Pos logit contributions | |
bothering | +5.833 |
offending | +5.819 |
stranger | +5.802 |
involved | +5.602 |
occurring | +5.584 |
Neg logit contributions | |
izons | -8.429 |
Opportun | -7.986 |
�� | -7.865 |
onential | -7.827 |
leaps | -7.468 |
Token | actually |
Token ID | 1682 |
Feature activation | -4.07e+00 |
Pos logit contributions | |
bothering | +10.606 |
occurring | +10.363 |
wrong | +10.272 |
happening | +10.259 |
offending | +10.199 |
Neg logit contributions | |
izons | -16.616 |
�� | -15.773 |
Opportun | -15.648 |
onential | -15.230 |
Flavoring | -14.660 |
Token | keeping |
Token ID | 5291 |
Feature activation | -1.42e+00 |
Pos logit contributions | |
happening | +17.905 |
bothering | +17.857 |
wrong | +17.756 |
occurring | +17.699 |
causing | +17.466 |
Neg logit contributions | |
izons | -20.118 |
�� | -19.533 |
onential | -18.270 |
Opportun | -18.230 |
Flavoring | -17.598 |
Token | your |
Token ID | 534 |
Feature activation | -1.56e+00 |
Pos logit contributions | |
bothering | +5.915 |
offending | +5.871 |
stranger | +5.857 |
occurring | +5.648 |
involved | +5.568 |
Neg logit contributions | |
izons | -8.648 |
�� | -8.199 |
Opportun | -8.189 |
onential | -8.081 |
Strategies | -7.680 |
Token | c |
Token ID | 269 |
Feature activation | -1.32e+00 |
Pos logit contributions | |
offending | +8.396 |
stranger | +8.328 |
bothering | +8.260 |
occurring | +7.925 |
pictured | +7.913 |
Neg logit contributions | |
izons | -7.538 |
�� | -7.057 |
Opportun | -6.994 |
onential | -6.958 |
Strategies | -6.473 |
Token | 6 |
Token ID | 21 |
Feature activation | -1.85e+00 |
Pos logit contributions | |
offending | +7.287 |
bothering | +7.185 |
stranger | +7.100 |
involved | +7.071 |
occurring | +6.976 |
Neg logit contributions | |
izons | -5.874 |
Opportun | -5.778 |
�� | -5.625 |
onential | -5.439 |
leaps | -5.328 |
Token | from |
Token ID | 422 |
Feature activation | -1.07e+00 |
Pos logit contributions | |
bothering | +8.039 |
offending | +7.942 |
stranger | +7.888 |
occurring | +7.583 |
involved | +7.561 |
Neg logit contributions | |
izons | -10.825 |
Opportun | -10.252 |
�� | -10.190 |
onential | -9.973 |
Flavoring | -9.617 |
Token | executive |
Token ID | 4640 |
Feature activation | -1.52e+00 |
Pos logit contributions | |
offending | +3.738 |
bothering | +3.731 |
stranger | +3.730 |
involved | +3.630 |
occurring | +3.597 |
Neg logit contributions | |
izons | -3.792 |
Opportun | -3.595 |
onential | -3.491 |
�� | -3.439 |
leaps | -3.340 |
Token | order |
Token ID | 1502 |
Feature activation | -2.07e+00 |
Pos logit contributions | |
offending | +6.340 |
stranger | +6.256 |
bothering | +6.110 |
occurring | +6.013 |
involved | +5.846 |
Neg logit contributions | |
izons | -9.206 |
Opportun | -8.793 |
�� | -8.639 |
onential | -8.539 |
Flavoring | -8.254 |
Token | was |
Token ID | 373 |
Feature activation | -2.95e+00 |
Pos logit contributions | |
offending | +9.571 |
bothering | +9.381 |
stranger | +9.358 |
occurring | +9.182 |
wrong | +9.002 |
Neg logit contributions | |
izons | -11.376 |
�� | -10.724 |
Opportun | -10.682 |
onential | -10.493 |
Flavoring | -9.950 |
Token | not |
Token ID | 407 |
Feature activation | -4.00e+00 |
Pos logit contributions | |
bothering | +11.875 |
occurring | +11.859 |
stranger | +11.833 |
wrong | +11.797 |
offending | +11.735 |
Neg logit contributions | |
izons | -16.755 |
�� | -16.215 |
Opportun | -15.735 |
onential | -15.509 |
Flavoring | -15.024 |
Token | in |
Token ID | 287 |
Feature activation | -1.32e+00 |
Pos logit contributions | |
occurring | +15.890 |
bothering | +15.745 |
offending | +15.722 |
happening | +15.577 |
wrong | +15.525 |
Neg logit contributions | |
izons | -20.508 |
�� | -20.285 |
Opportun | -19.259 |
onential | -19.185 |
Flavoring | -18.367 |
Token | fact |
Token ID | 1109 |
Feature activation | -4.07e+00 |
Pos logit contributions | |
offending | +5.187 |
stranger | +5.103 |
bothering | +5.094 |
involved | +4.926 |
occurring | +4.908 |
Neg logit contributions | |
izons | -8.357 |
Opportun | -8.033 |
�� | -7.885 |
onential | -7.787 |
Strategies | -7.440 |
Token | a |
Token ID | 257 |
Feature activation | -2.52e+00 |
Pos logit contributions | |
occurring | +16.056 |
offending | +15.822 |
happening | +15.710 |
wrong | +15.705 |
stranger | +15.594 |
Neg logit contributions | |
izons | -20.333 |
�� | -19.648 |
Opportun | -19.224 |
onential | -19.012 |
Flavoring | -18.247 |
Token | ban |
Token ID | 3958 |
Feature activation | -2.54e+00 |
Pos logit contributions | |
stranger | +11.378 |
offending | +11.211 |
bothering | +11.020 |
occurring | +10.806 |
wrong | +10.723 |
Neg logit contributions | |
izons | -13.923 |
�� | -12.878 |
Opportun | -12.817 |
onential | -12.638 |
Flavoring | -12.035 |
Token | . |
Token ID | 13 |
Feature activation | -4.03e-03 |
Pos logit contributions | |
occurring | +10.327 |
offending | +10.300 |
bothering | +10.247 |
stranger | +10.215 |
happening | +9.910 |
Neg logit contributions | |
izons | -14.795 |
Opportun | -13.769 |
�� | -13.751 |
onential | -13.231 |
Flavoring | -12.942 |
Token | The |
Token ID | 383 |
Feature activation | -4.17e-02 |
Pos logit contributions | |
offending | +0.024 |
bothering | +0.024 |
stranger | +0.024 |
involved | +0.023 |
occurring | +0.023 |
Neg logit contributions | |
izons | -0.017 |
Opportun | -0.017 |
onential | -0.016 |
Strategies | -0.015 |
�� | -0.015 |
Token | administration |
Token ID | 3662 |
Feature activation | +9.73e-02 |
Pos logit contributions | |
bothering | +0.189 |
stranger | +0.187 |
involved | +0.185 |
offending | +0.183 |
occurring | +0.179 |
Neg logit contributions | |
izons | -0.237 |
Opportun | -0.230 |
onential | -0.220 |
leaps | -0.216 |
Strategies | -0.213 |
Token | misdemeanor |
Token ID | 22403 |
Feature activation | -2.32e+00 |
Pos logit contributions | |
stranger | +3.763 |
bothering | +3.756 |
offending | +3.756 |
involved | +3.626 |
occurring | +3.585 |
Neg logit contributions | |
izons | -5.666 |
Opportun | -5.409 |
onential | -5.286 |
�� | -5.212 |
leaps | -5.101 |
Token | and |
Token ID | 290 |
Feature activation | -1.80e+00 |
Pos logit contributions | |
offending | +10.726 |
bothering | +10.694 |
stranger | +10.662 |
occurring | +10.461 |
wrong | +10.268 |
Neg logit contributions | |
izons | -12.147 |
�� | -11.318 |
onential | -11.175 |
Opportun | -11.061 |
Flavoring | -10.580 |
Token | the |
Token ID | 262 |
Feature activation | -1.49e+00 |
Pos logit contributions | |
stranger | +8.474 |
offending | +8.442 |
bothering | +8.417 |
involved | +8.128 |
occurring | +8.104 |
Neg logit contributions | |
izons | -9.892 |
Opportun | -9.293 |
onential | -9.197 |
�� | -9.137 |
Flavoring | -8.690 |
Token | victim |
Token ID | 3117 |
Feature activation | -2.23e+00 |
Pos logit contributions | |
stranger | +6.916 |
offending | +6.900 |
bothering | +6.819 |
involved | +6.597 |
occurring | +6.553 |
Neg logit contributions | |
izons | -8.419 |
Opportun | -7.898 |
onential | -7.834 |
�� | -7.791 |
leaps | -7.395 |
Token | was |
Token ID | 373 |
Feature activation | -2.88e+00 |
Pos logit contributions | |
stranger | +9.857 |
offending | +9.705 |
bothering | +9.656 |
pictured | +9.414 |
occurring | +9.310 |
Neg logit contributions | |
izons | -12.429 |
onential | -11.771 |
�� | -11.766 |
Opportun | -11.748 |
Flavoring | -11.155 |
Token | not |
Token ID | 407 |
Feature activation | -4.03e+00 |
Pos logit contributions | |
bothering | +11.730 |
stranger | +11.483 |
offending | +11.359 |
pictured | +11.280 |
wrong | +11.271 |
Neg logit contributions | |
izons | -16.336 |
�� | -15.523 |
onential | -15.506 |
Opportun | -15.385 |
Flavoring | -14.594 |
Token | treated |
Token ID | 5716 |
Feature activation | -1.93e+00 |
Pos logit contributions | |
bothering | +16.140 |
involved | +15.453 |
pictured | +15.447 |
offending | +15.353 |
wrong | +15.280 |
Neg logit contributions | |
izons | -20.357 |
�� | -20.088 |
onential | -19.654 |
Opportun | -19.387 |
Flavoring | -18.306 |
Token | . |
Token ID | 13 |
Feature activation | +7.40e-03 |
Pos logit contributions | |
bothering | +7.952 |
stranger | +7.847 |
offending | +7.758 |
occurring | +7.487 |
involved | +7.485 |
Neg logit contributions | |
izons | -11.618 |
Opportun | -10.913 |
�� | -10.770 |
onential | -10.755 |
Flavoring | -10.301 |
Token | https |
Token ID | 3740 |
Feature activation | -9.12e-01 |
Pos logit contributions | |
izons | +0.038 |
Opportun | +0.037 |
onential | +0.035 |
Strategies | +0.034 |
leaps | +0.034 |
Neg logit contributions | |
bothering | -0.038 |
offending | -0.038 |
stranger | -0.037 |
involved | -0.036 |
occurring | -0.036 |
Token | :// |
Token ID | 1378 |
Feature activation | -6.17e-01 |
Pos logit contributions | |
bothering | +4.535 |
stranger | +4.523 |
offending | +4.494 |
involved | +4.448 |
occurring | +4.425 |
Neg logit contributions | |
izons | -4.785 |
Opportun | -4.540 |
�� | -4.450 |
onential | -4.425 |
leaps | -4.217 |
Token | t |
Token ID | 83 |
Feature activation | -2.47e-01 |
Pos logit contributions | |
offending | +2.629 |
bothering | +2.626 |
stranger | +2.619 |
involved | +2.532 |
occurring | +2.500 |
Neg logit contributions | |
izons | -3.733 |
Opportun | -3.562 |
onential | -3.482 |
�� | -3.410 |
leaps | -3.361 |
Token | Trump |
Token ID | 1301 |
Feature activation | -3.09e-01 |
Pos logit contributions | |
bothering | +3.726 |
offending | +3.725 |
stranger | +3.720 |
involved | +3.604 |
occurring | +3.555 |
Neg logit contributions | |
izons | -5.615 |
Opportun | -5.376 |
onential | -5.245 |
�� | -5.170 |
leaps | -5.063 |
Token | 's |
Token ID | 338 |
Feature activation | -7.31e-01 |
Pos logit contributions | |
bothering | +1.338 |
stranger | +1.335 |
offending | +1.330 |
involved | +1.305 |
occurring | +1.269 |
Neg logit contributions | |
izons | -1.833 |
Opportun | -1.760 |
onential | -1.708 |
�� | -1.673 |
leaps | -1.663 |
Token | executive |
Token ID | 4640 |
Feature activation | -1.52e+00 |
Pos logit contributions | |
offending | +3.738 |
bothering | +3.731 |
stranger | +3.730 |
involved | +3.630 |
occurring | +3.597 |
Neg logit contributions | |
izons | -3.792 |
Opportun | -3.595 |
onential | -3.491 |
�� | -3.439 |
leaps | -3.340 |
Token | order |
Token ID | 1502 |
Feature activation | -2.07e+00 |
Pos logit contributions | |
offending | +6.340 |
stranger | +6.256 |
bothering | +6.110 |
occurring | +6.013 |
involved | +5.846 |
Neg logit contributions | |
izons | -9.206 |
Opportun | -8.793 |
�� | -8.639 |
onential | -8.539 |
Flavoring | -8.254 |
Token | was |
Token ID | 373 |
Feature activation | -2.95e+00 |
Pos logit contributions | |
offending | +9.571 |
bothering | +9.381 |
stranger | +9.358 |
occurring | +9.182 |
wrong | +9.002 |
Neg logit contributions | |
izons | -11.376 |
�� | -10.724 |
Opportun | -10.682 |
onential | -10.493 |
Flavoring | -9.950 |
Token | not |
Token ID | 407 |
Feature activation | -4.00e+00 |
Pos logit contributions | |
bothering | +11.875 |
occurring | +11.859 |
stranger | +11.833 |
wrong | +11.797 |
offending | +11.735 |
Neg logit contributions | |
izons | -16.755 |
�� | -16.215 |
Opportun | -15.735 |
onential | -15.509 |
Flavoring | -15.024 |
Token | in |
Token ID | 287 |
Feature activation | -1.32e+00 |
Pos logit contributions | |
occurring | +15.890 |
bothering | +15.745 |
offending | +15.722 |
happening | +15.577 |
wrong | +15.525 |
Neg logit contributions | |
izons | -20.508 |
�� | -20.285 |
Opportun | -19.259 |
onential | -19.185 |
Flavoring | -18.367 |
Token | fact |
Token ID | 1109 |
Feature activation | -4.07e+00 |
Pos logit contributions | |
offending | +5.187 |
stranger | +5.103 |
bothering | +5.094 |
involved | +4.926 |
occurring | +4.908 |
Neg logit contributions | |
izons | -8.357 |
Opportun | -8.033 |
�� | -7.885 |
onential | -7.787 |
Strategies | -7.440 |
Token | a |
Token ID | 257 |
Feature activation | -2.52e+00 |
Pos logit contributions | |
occurring | +16.056 |
offending | +15.822 |
happening | +15.710 |
wrong | +15.705 |
stranger | +15.594 |
Neg logit contributions | |
izons | -20.333 |
�� | -19.648 |
Opportun | -19.224 |
onential | -19.012 |
Flavoring | -18.247 |
Token | ban |
Token ID | 3958 |
Feature activation | -2.54e+00 |
Pos logit contributions | |
stranger | +11.378 |
offending | +11.211 |
bothering | +11.020 |
occurring | +10.806 |
wrong | +10.723 |
Neg logit contributions | |
izons | -13.923 |
�� | -12.878 |
Opportun | -12.817 |
onential | -12.638 |
Flavoring | -12.035 |
Token | . |
Token ID | 13 |
Feature activation | -4.03e-03 |
Pos logit contributions | |
occurring | +10.327 |
offending | +10.300 |
bothering | +10.247 |
stranger | +10.215 |
happening | +9.910 |
Neg logit contributions | |
izons | -14.795 |
Opportun | -13.769 |
�� | -13.751 |
onential | -13.231 |
Flavoring | -12.942 |
Token | She |
Token ID | 3347 |
Feature activation | -2.84e-01 |
Pos logit contributions | |
bothering | +2.875 |
offending | +2.865 |
stranger | +2.865 |
involved | +2.780 |
occurring | +2.733 |
Neg logit contributions | |
izons | -4.711 |
Opportun | -4.517 |
onential | -4.415 |
�� | -4.385 |
leaps | -4.272 |
Token | questioned |
Token ID | 11434 |
Feature activation | -9.44e-01 |
Pos logit contributions | |
stranger | +0.868 |
bothering | +0.867 |
offending | +0.865 |
involved | +0.832 |
occurring | +0.813 |
Neg logit contributions | |
izons | -2.052 |
Opportun | -1.965 |
onential | -1.925 |
�� | -1.900 |
leaps | -1.889 |
Token | if |
Token ID | 611 |
Feature activation | -1.95e+00 |
Pos logit contributions | |
offending | +2.724 |
bothering | +2.722 |
stranger | +2.715 |
involved | +2.574 |
occurring | +2.555 |
Neg logit contributions | |
izons | -6.995 |
Opportun | -6.744 |
onential | -6.602 |
�� | -6.554 |
leaps | -6.399 |
Token | it |
Token ID | 340 |
Feature activation | -7.36e-01 |
Pos logit contributions | |
stranger | +5.216 |
bothering | +5.196 |
offending | +5.174 |
occurring | +5.066 |
happening | +4.844 |
Neg logit contributions | |
izons | -14.594 |
�� | -14.005 |
Opportun | -13.969 |
onential | -13.672 |
Strategies | -13.239 |
Token | was |
Token ID | 373 |
Feature activation | -2.72e+00 |
Pos logit contributions | |
offending | +2.459 |
bothering | +2.457 |
stranger | +2.456 |
involved | +2.360 |
occurring | +2.326 |
Neg logit contributions | |
izons | -5.109 |
Opportun | -4.913 |
onential | -4.806 |
�� | -4.756 |
leaps | -4.650 |
Token | absolutely |
Token ID | 5543 |
Feature activation | -3.99e+00 |
Pos logit contributions | |
wrong | +8.166 |
offending | +7.947 |
happening | +7.886 |
bothering | +7.806 |
stranger | +7.753 |
Neg logit contributions | |
izons | -18.438 |
�� | -17.338 |
Opportun | -17.203 |
onential | -16.511 |
Flavoring | -16.356 |
Token | necessary |
Token ID | 3306 |
Feature activation | -1.55e+00 |
Pos logit contributions | |
wrong | +7.104 |
occurring | +6.432 |
happening | +6.316 |
bothering | +6.197 |
stranger | +6.071 |
Neg logit contributions | |
izons | -30.125 |
�� | -29.208 |
Opportun | -28.861 |
onential | -28.604 |
Flavoring | -27.731 |
Token | but |
Token ID | 475 |
Feature activation | -2.56e+00 |
Pos logit contributions | |
offending | +4.805 |
bothering | +4.772 |
stranger | +4.749 |
occurring | +4.556 |
wrong | +4.535 |
Neg logit contributions | |
izons | -10.993 |
�� | -10.610 |
Opportun | -10.524 |
onential | -10.308 |
Strategies | -9.851 |
Token | doctors |
Token ID | 7519 |
Feature activation | -6.64e-01 |
Pos logit contributions | |
pictured | +8.158 |
bothering | +8.102 |
offending | +8.044 |
stranger | +8.015 |
someone | +7.875 |
Neg logit contributions | |
�� | -16.766 |
izons | -16.569 |
Opportun | -16.320 |
onential | -16.011 |
Flavoring | -15.136 |
Token | said |
Token ID | 531 |
Feature activation | -1.78e+00 |
Pos logit contributions | |
offending | +2.681 |
bothering | +2.674 |
stranger | +2.658 |
involved | +2.591 |
occurring | +2.539 |
Neg logit contributions | |
izons | -4.150 |
Opportun | -3.974 |
onential | -3.885 |
�� | -3.783 |
leaps | -3.774 |
Token | she |
Token ID | 673 |
Feature activation | -1.50e-01 |
Pos logit contributions | |
stranger | +2.303 |
offending | +2.112 |
bothering | +2.085 |
occurring | +1.934 |
wrong | +1.917 |
Neg logit contributions | |
izons | -15.766 |
�� | -15.599 |
Opportun | -15.280 |
onential | -15.229 |
Flavoring | -14.554 |
Token | official |
Token ID | 1743 |
Feature activation | -6.16e-01 |
Pos logit contributions | |
izons | +0.330 |
Opportun | +0.318 |
onential | +0.306 |
leaps | +0.296 |
Strategies | +0.296 |
Neg logit contributions | |
bothering | -0.279 |
stranger | -0.276 |
offending | -0.275 |
involved | -0.269 |
occurring | -0.265 |
Token | , |
Token ID | 11 |
Feature activation | -7.37e-01 |
Pos logit contributions | |
bothering | +2.613 |
offending | +2.613 |
stranger | +2.609 |
involved | +2.532 |
occurring | +2.498 |
Neg logit contributions | |
izons | -3.725 |
Opportun | -3.564 |
onential | -3.475 |
�� | -3.426 |
leaps | -3.352 |
Token | who |
Token ID | 508 |
Feature activation | +1.47e-01 |
Pos logit contributions | |
bothering | +2.735 |
offending | +2.732 |
stranger | +2.727 |
involved | +2.639 |
occurring | +2.582 |
Neg logit contributions | |
izons | -4.839 |
Opportun | -4.657 |
onential | -4.540 |
�� | -4.461 |
leaps | -4.412 |
Token | wasn |
Token ID | 2492 |
Feature activation | -1.62e+00 |
Pos logit contributions | |
izons | +0.879 |
Opportun | +0.837 |
onential | +0.815 |
leaps | +0.814 |
strides | +0.798 |
Neg logit contributions | |
bothering | -0.622 |
offending | -0.607 |
stranger | -0.604 |
involved | -0.600 |
occurring | -0.573 |
Token | � |
Token ID | 447 |
Feature activation | -2.83e-02 |
Pos logit contributions | |
involved | +8.019 |
stranger | +7.968 |
bothering | +7.945 |
occurring | +7.807 |
pictured | +7.769 |
Neg logit contributions | |
izons | -8.289 |
�� | -7.930 |
Opportun | -7.672 |
onential | -7.637 |
leaps | -7.181 |
Token | � |
Token ID | 247 |
Feature activation | -3.98e+00 |
Pos logit contributions | |
bothering | +0.199 |
offending | +0.198 |
stranger | +0.198 |
involved | +0.195 |
occurring | +0.193 |
Neg logit contributions | |
izons | -0.092 |
Opportun | -0.084 |
onential | -0.081 |
�� | -0.080 |
leaps | -0.075 |
Token | t |
Token ID | 83 |
Feature activation | -3.54e+00 |
Pos logit contributions | |
involved | +15.737 |
pictured | +15.485 |
stranger | +14.983 |
referring | +14.795 |
offending | +14.686 |
Neg logit contributions | |
izons | -19.326 |
�� | -18.985 |
Opportun | -18.937 |
onential | -18.383 |
leaps | -17.981 |
Token | authorized |
Token ID | 10435 |
Feature activation | -2.18e+00 |
Pos logit contributions | |
pictured | +11.783 |
stranger | +11.387 |
bothering | +11.359 |
involved | +11.256 |
referring | +11.179 |
Neg logit contributions | |
izons | -21.250 |
Opportun | -20.226 |
�� | -20.083 |
onential | -19.990 |
Strategies | -18.950 |
Token | to |
Token ID | 284 |
Feature activation | -1.64e+00 |
Pos logit contributions | |
stranger | +7.103 |
bothering | +7.093 |
offending | +6.912 |
referring | +6.876 |
pictured | +6.853 |
Neg logit contributions | |
izons | -13.316 |
Opportun | -13.084 |
onential | -12.399 |
leaps | -12.136 |
Flavoring | -12.075 |
Token | speak |
Token ID | 2740 |
Feature activation | -7.47e-01 |
Pos logit contributions | |
stranger | +7.309 |
offending | +7.235 |
bothering | +7.185 |
pictured | +7.079 |
occurring | +7.071 |
Neg logit contributions | |
izons | -9.249 |
Opportun | -9.032 |
�� | -8.531 |
onential | -8.492 |
Strategies | -8.167 |
Token | to |
Token ID | 284 |
Feature activation | -7.34e-01 |
Pos logit contributions | |
stranger | +2.572 |
offending | +2.566 |
bothering | +2.561 |
involved | +2.443 |
occurring | +2.433 |
Neg logit contributions | |
izons | -5.098 |
Opportun | -4.911 |
onential | -4.802 |
�� | -4.767 |
leaps | -4.642 |
Token | the |
Token ID | 262 |
Feature activation | -5.39e-01 |
Pos logit contributions | |
stranger | +4.382 |
offending | +4.376 |
bothering | +4.368 |
involved | +4.209 |
occurring | +4.187 |
Neg logit contributions | |
izons | -5.592 |
Opportun | -5.287 |
onential | -5.202 |
�� | -5.146 |
leaps | -4.967 |
Token | spot |
Token ID | 4136 |
Feature activation | -2.14e+00 |
Pos logit contributions | |
bothering | +2.596 |
stranger | +2.575 |
offending | +2.571 |
involved | +2.528 |
occurring | +2.490 |
Neg logit contributions | |
izons | -2.950 |
Opportun | -2.816 |
onential | -2.722 |
�� | -2.675 |
leaps | -2.641 |
Token | where |
Token ID | 810 |
Feature activation | -2.66e+00 |
Pos logit contributions | |
offending | +8.107 |
stranger | +8.080 |
bothering | +8.029 |
pictured | +7.881 |
occurring | +7.748 |
Neg logit contributions | |
izons | -13.358 |
onential | -12.679 |
Opportun | -12.596 |
�� | -12.448 |
Flavoring | -12.069 |
Token | the |
Token ID | 262 |
Feature activation | -1.69e+00 |
Pos logit contributions | |
stranger | +12.760 |
offending | +12.610 |
bothering | +12.448 |
pictured | +12.323 |
someone | +12.216 |
Neg logit contributions | |
izons | -12.999 |
�� | -12.045 |
onential | -11.955 |
Opportun | -11.764 |
Flavoring | -11.312 |
Token | photo |
Token ID | 4590 |
Feature activation | -3.61e+00 |
Pos logit contributions | |
offending | +8.015 |
stranger | +7.965 |
bothering | +7.783 |
pictured | +7.568 |
occurring | +7.496 |
Neg logit contributions | |
izons | -9.276 |
Opportun | -8.679 |
onential | -8.676 |
�� | -8.568 |
Flavoring | -8.105 |
Token | was |
Token ID | 373 |
Feature activation | -3.94e+00 |
Pos logit contributions | |
stranger | +15.164 |
offending | +15.103 |
pictured | +14.938 |
wrong | +14.551 |
occurring | +14.542 |
Neg logit contributions | |
izons | -17.476 |
onential | -16.774 |
�� | -16.143 |
Flavoring | -16.091 |
Opportun | -15.661 |
Token | taken |
Token ID | 2077 |
Feature activation | -1.35e+00 |
Pos logit contributions | |
pictured | +13.115 |
occurring | +12.960 |
offending | +12.803 |
wrong | +12.738 |
happening | +12.653 |
Neg logit contributions | |
izons | -22.081 |
onential | -21.888 |
Opportun | -21.344 |
�� | -21.216 |
Flavoring | -21.015 |
Token | , |
Token ID | 11 |
Feature activation | -4.01e-01 |
Pos logit contributions | |
stranger | +6.454 |
offending | +6.446 |
bothering | +6.440 |
occurring | +6.259 |
pictured | +6.175 |
Neg logit contributions | |
izons | -7.310 |
Opportun | -6.935 |
onential | -6.845 |
�� | -6.772 |
Flavoring | -6.412 |
Token | and |
Token ID | 290 |
Feature activation | -1.14e+00 |
Pos logit contributions | |
offending | +1.655 |
stranger | +1.653 |
bothering | +1.650 |
involved | +1.635 |
occurring | +1.578 |
Neg logit contributions | |
izons | -2.436 |
Opportun | -2.351 |
onential | -2.268 |
leaps | -2.236 |
�� | -2.228 |
Token | he |
Token ID | 339 |
Feature activation | +8.93e-01 |
Pos logit contributions | |
bothering | +5.265 |
stranger | +5.263 |
offending | +5.251 |
involved | +5.084 |
occurring | +5.047 |
Neg logit contributions | |
izons | -6.452 |
Opportun | -6.129 |
onential | -5.998 |
�� | -5.913 |
leaps | -5.724 |
Token | smiled |
Token ID | 13541 |
Feature activation | +1.21e-01 |
Pos logit contributions | |
izons | +5.326 |
Opportun | +5.078 |
leaps | +5.052 |
strides | +4.937 |
onential | +4.853 |
Neg logit contributions | |
offending | -3.467 |
bothering | -3.453 |
involved | -3.427 |
stranger | -3.374 |
occurring | -3.226 |
Token | character |
Token ID | 2095 |
Feature activation | -2.55e+00 |
Pos logit contributions | |
offending | +5.205 |
stranger | +5.187 |
bothering | +5.144 |
involved | +4.961 |
occurring | +4.949 |
Neg logit contributions | |
izons | -6.979 |
Opportun | -6.632 |
onential | -6.527 |
�� | -6.493 |
Strategies | -6.196 |
Token | we |
Token ID | 356 |
Feature activation | -3.51e-01 |
Pos logit contributions | |
stranger | +11.177 |
bothering | +11.156 |
offending | +11.126 |
pictured | +10.937 |
occurring | +10.895 |
Neg logit contributions | |
izons | -13.297 |
Opportun | -12.694 |
onential | -12.620 |
�� | -12.590 |
Strategies | -11.669 |
Token | know |
Token ID | 760 |
Feature activation | -9.75e-01 |
Pos logit contributions | |
bothering | +1.799 |
offending | +1.798 |
stranger | +1.796 |
involved | +1.754 |
occurring | +1.733 |
Neg logit contributions | |
izons | -1.811 |
Opportun | -1.720 |
onential | -1.670 |
�� | -1.641 |
leaps | -1.598 |
Token | as |
Token ID | 355 |
Feature activation | -1.97e+00 |
Pos logit contributions | |
stranger | +5.070 |
bothering | +5.042 |
offending | +5.037 |
occurring | +4.895 |
involved | +4.882 |
Neg logit contributions | |
izons | -4.947 |
Opportun | -4.675 |
onential | -4.560 |
�� | -4.514 |
Strategies | -4.274 |
Token | � |
Token ID | 564 |
Feature activation | -5.78e-01 |
Pos logit contributions | |
stranger | +8.695 |
bothering | +8.562 |
offending | +8.515 |
occurring | +8.256 |
pictured | +8.250 |
Neg logit contributions | |
izons | -10.939 |
�� | -10.251 |
Opportun | -10.200 |
onential | -10.173 |
Strategies | -9.552 |
Token | � |
Token ID | 250 |
Feature activation | -3.91e+00 |
Pos logit contributions | |
bothering | +3.894 |
offending | +3.893 |
stranger | +3.891 |
involved | +3.817 |
occurring | +3.785 |
Neg logit contributions | |
izons | -2.044 |
Opportun | -1.893 |
onential | -1.809 |
�� | -1.757 |
leaps | -1.696 |
Token | Batman |
Token ID | 37039 |
Feature activation | -1.15e+00 |
Pos logit contributions | |
involved | +16.980 |
stranger | +16.929 |
REDACTED | +16.234 |
offending | +16.110 |
bothering | +16.094 |
Neg logit contributions | |
�� | -17.016 |
izons | -16.688 |
Opportun | -16.152 |
onential | -15.497 |
Flavoring | -15.346 |
Token | . |
Token ID | 13 |
Feature activation | -3.04e-01 |
Pos logit contributions | |
bothering | +5.715 |
stranger | +5.692 |
offending | +5.676 |
occurring | +5.495 |
involved | +5.487 |
Neg logit contributions | |
izons | -6.022 |
Opportun | -5.712 |
onential | -5.596 |
�� | -5.540 |
leaps | -5.265 |
Token | � |
Token ID | 447 |
Feature activation | +9.49e-01 |
Pos logit contributions | |
offending | +1.826 |
bothering | +1.812 |
stranger | +1.808 |
involved | +1.777 |
occurring | +1.747 |
Neg logit contributions | |
izons | -1.303 |
Opportun | -1.275 |
onential | -1.185 |
Strategies | -1.148 |
�� | -1.136 |
Token | � |
Token ID | 251 |
Feature activation | -1.77e+00 |
Pos logit contributions | |
izons | +2.260 |
�� | +2.080 |
Opportun | +1.992 |
onential | +1.936 |
leaps | +1.712 |
Neg logit contributions | |
offending | -7.211 |
bothering | -7.162 |
involved | -7.140 |
stranger | -7.127 |
occurring | -6.940 |
Token | Back |
Token ID | 5157 |
Feature activation | +2.02e-01 |
Pos logit contributions | |
stranger | +10.087 |
bothering | +10.075 |
offending | +9.963 |
involved | +9.846 |
occurring | +9.787 |
Neg logit contributions | |
izons | -7.696 |
�� | -7.016 |
onential | -6.956 |
Opportun | -6.914 |
leaps | -6.557 |
Token | with |
Token ID | 351 |
Feature activation | -6.21e-01 |
Pos logit contributions | |
bothering | +1.361 |
offending | +1.361 |
stranger | +1.359 |
involved | +1.335 |
occurring | +1.290 |
Neg logit contributions | |
izons | -1.556 |
Opportun | -1.501 |
onential | -1.437 |
leaps | -1.402 |
�� | -1.395 |
Token | those |
Token ID | 883 |
Feature activation | -1.40e+00 |
Pos logit contributions | |
bothering | +2.838 |
offending | +2.834 |
stranger | +2.830 |
involved | +2.757 |
occurring | +2.720 |
Neg logit contributions | |
izons | -3.548 |
Opportun | -3.387 |
onential | -3.295 |
�� | -3.240 |
leaps | -3.174 |
Token | who |
Token ID | 508 |
Feature activation | -3.49e-01 |
Pos logit contributions | |
stranger | +5.181 |
offending | +5.179 |
bothering | +5.175 |
involved | +4.961 |
occurring | +4.921 |
Neg logit contributions | |
izons | -9.187 |
Opportun | -8.746 |
onential | -8.616 |
�� | -8.565 |
leaps | -8.267 |
Token | are |
Token ID | 389 |
Feature activation | -1.76e+00 |
Pos logit contributions | |
offending | +1.905 |
bothering | +1.904 |
stranger | +1.898 |
involved | +1.864 |
occurring | +1.834 |
Neg logit contributions | |
izons | -1.686 |
Opportun | -1.597 |
onential | -1.541 |
�� | -1.491 |
leaps | -1.481 |
Token | not |
Token ID | 407 |
Feature activation | -2.87e+00 |
Pos logit contributions | |
stranger | +7.446 |
bothering | +7.424 |
offending | +7.391 |
occurring | +7.088 |
involved | +7.069 |
Neg logit contributions | |
izons | -10.503 |
Opportun | -9.894 |
onential | -9.818 |
�� | -9.782 |
leaps | -9.250 |
Token | actually |
Token ID | 1682 |
Feature activation | -3.89e+00 |
Pos logit contributions | |
bothering | +12.912 |
offending | +12.768 |
stranger | +12.580 |
occurring | +12.447 |
pictured | +12.297 |
Neg logit contributions | |
izons | -14.732 |
�� | -14.166 |
onential | -13.994 |
Opportun | -13.627 |
leaps | -12.696 |
Token | members |
Token ID | 1866 |
Feature activation | -1.45e+00 |
Pos logit contributions | |
bothering | +17.551 |
offending | +17.187 |
occurring | +16.632 |
stranger | +16.600 |
pictured | +16.515 |
Neg logit contributions | |
izons | -17.693 |
�� | -17.379 |
onential | -16.801 |
Opportun | -15.936 |
Flavoring | -15.087 |
Token | of |
Token ID | 286 |
Feature activation | -1.39e+00 |
Pos logit contributions | |
bothering | +6.414 |
offending | +6.387 |
stranger | +6.335 |
occurring | +6.237 |
involved | +6.195 |
Neg logit contributions | |
izons | -8.212 |
Opportun | -7.755 |
�� | -7.716 |
onential | -7.677 |
leaps | -7.279 |
Token | their |
Token ID | 511 |
Feature activation | -1.16e+00 |
Pos logit contributions | |
offending | +5.483 |
stranger | +5.476 |
bothering | +5.431 |
involved | +5.238 |
occurring | +5.217 |
Neg logit contributions | |
izons | -8.668 |
Opportun | -8.119 |
�� | -8.083 |
onential | -8.081 |
leaps | -7.734 |
Token | immediate |
Token ID | 7103 |
Feature activation | -1.92e+00 |
Pos logit contributions | |
offending | +6.020 |
stranger | +5.996 |
bothering | +5.961 |
involved | +5.786 |
occurring | +5.766 |
Neg logit contributions | |
izons | -5.884 |
Opportun | -5.525 |
onential | -5.416 |
�� | -5.382 |
leaps | -5.156 |
Token | family |
Token ID | 1641 |
Feature activation | -5.10e-01 |
Pos logit contributions | |
offending | +7.750 |
stranger | +7.699 |
bothering | +7.388 |
occurring | +7.216 |
involved | +7.192 |
Neg logit contributions | |
izons | -11.531 |
�� | -11.063 |
onential | -10.882 |
Opportun | -10.807 |
leaps | -10.410 |
Token | part |
Token ID | 636 |
Feature activation | -1.48e+00 |
Pos logit contributions | |
bothering | +2.352 |
stranger | +2.332 |
offending | +2.323 |
involved | +2.294 |
occurring | +2.238 |
Neg logit contributions | |
izons | -3.178 |
Opportun | -3.035 |
onential | -2.951 |
�� | -2.884 |
leaps | -2.874 |
Token | of |
Token ID | 286 |
Feature activation | -1.50e+00 |
Pos logit contributions | |
offending | +5.872 |
bothering | +5.811 |
stranger | +5.789 |
occurring | +5.695 |
involved | +5.686 |
Neg logit contributions | |
izons | -9.094 |
Opportun | -8.759 |
onential | -8.588 |
�� | -8.404 |
leaps | -8.220 |
Token | the |
Token ID | 262 |
Feature activation | -1.33e+00 |
Pos logit contributions | |
offending | +5.334 |
stranger | +5.309 |
bothering | +5.250 |
occurring | +5.082 |
involved | +4.960 |
Neg logit contributions | |
izons | -9.924 |
Opportun | -9.531 |
onential | -9.312 |
�� | -9.309 |
Flavoring | -8.887 |
Token | patient |
Token ID | 5827 |
Feature activation | -3.21e+00 |
Pos logit contributions | |
offending | +4.652 |
stranger | +4.632 |
bothering | +4.569 |
occurring | +4.359 |
involved | +4.348 |
Neg logit contributions | |
izons | -9.042 |
Opportun | -8.708 |
onential | -8.526 |
�� | -8.437 |
leaps | -8.151 |
Token | � |
Token ID | 447 |
Feature activation | -6.83e-02 |
Pos logit contributions | |
bothering | +13.849 |
offending | +13.711 |
stranger | +13.606 |
pictured | +13.500 |
occurring | +13.457 |
Neg logit contributions | |
izons | -16.651 |
Opportun | -15.619 |
�� | -15.195 |
onential | -15.060 |
Flavoring | -14.948 |
Token | � |
Token ID | 247 |
Feature activation | -3.84e+00 |
Pos logit contributions | |
offending | +0.503 |
bothering | +0.503 |
stranger | +0.501 |
involved | +0.494 |
occurring | +0.488 |
Neg logit contributions | |
izons | -0.200 |
Opportun | -0.181 |
onential | -0.172 |
�� | -0.171 |
leaps | -0.158 |
Token | s |
Token ID | 82 |
Feature activation | -2.12e+00 |
Pos logit contributions | |
involved | +17.439 |
offending | +17.031 |
bothering | +17.006 |
occurring | +16.950 |
stranger | +16.804 |
Neg logit contributions | |
izons | -17.125 |
Opportun | -16.256 |
Flavoring | -15.595 |
�� | -15.525 |
onential | -15.438 |
Token | brain |
Token ID | 3632 |
Feature activation | -1.39e+00 |
Pos logit contributions | |
offending | +8.420 |
stranger | +8.215 |
bothering | +8.104 |
occurring | +7.803 |
pictured | +7.691 |
Neg logit contributions | |
izons | -13.100 |
Opportun | -12.542 |
onential | -12.351 |
�� | -12.270 |
Flavoring | -11.773 |
Token | causing |
Token ID | 6666 |
Feature activation | -1.52e+00 |
Pos logit contributions | |
bothering | +5.274 |
offending | +5.246 |
stranger | +5.207 |
involved | +5.103 |
occurring | +5.039 |
Neg logit contributions | |
izons | -8.984 |
Opportun | -8.571 |
onential | -8.406 |
�� | -8.268 |
leaps | -8.040 |
Token | them |
Token ID | 606 |
Feature activation | -3.45e-01 |
Pos logit contributions | |
offending | +7.390 |
bothering | +7.271 |
stranger | +7.253 |
occurring | +7.032 |
involved | +6.943 |
Neg logit contributions | |
izons | -8.257 |
Opportun | -7.737 |
�� | -7.654 |
onential | -7.517 |
Strategies | -7.132 |
Token | , |
Token ID | 11 |
Feature activation | -3.41e-01 |
Pos logit contributions | |
bothering | +1.746 |
offending | +1.745 |
stranger | +1.744 |
involved | +1.701 |
occurring | +1.679 |
Neg logit contributions | |
izons | -1.802 |
Opportun | -1.714 |
onential | -1.664 |
�� | -1.633 |
leaps | -1.598 |
Token | , |
Token ID | 11 |
Feature activation | -1.70e-01 |
Pos logit contributions | |
offending | +5.451 |
bothering | +5.428 |
stranger | +5.409 |
occurring | +5.265 |
involved | +5.259 |
Neg logit contributions | |
izons | -5.967 |
Opportun | -5.591 |
onential | -5.478 |
�� | -5.470 |
leaps | -5.217 |
Token | neither |
Token ID | 6159 |
Feature activation | -2.22e+00 |
Pos logit contributions | |
bothering | +0.745 |
stranger | +0.743 |
offending | +0.741 |
involved | +0.734 |
occurring | +0.707 |
Neg logit contributions | |
izons | -0.982 |
Opportun | -0.953 |
onential | -0.921 |
leaps | -0.899 |
�� | -0.892 |
Token | of |
Token ID | 286 |
Feature activation | -5.54e-01 |
Pos logit contributions | |
offending | +8.821 |
stranger | +8.814 |
bothering | +8.809 |
occurring | +8.528 |
pictured | +8.428 |
Neg logit contributions | |
izons | -13.567 |
Opportun | -12.840 |
onential | -12.668 |
�� | -12.550 |
Flavoring | -11.988 |
Token | whom |
Token ID | 4150 |
Feature activation | -1.62e+00 |
Pos logit contributions | |
bothering | +2.112 |
stranger | +2.111 |
offending | +2.108 |
involved | +2.043 |
occurring | +2.006 |
Neg logit contributions | |
izons | -3.584 |
Opportun | -3.441 |
onential | -3.359 |
�� | -3.311 |
leaps | -3.256 |
Token | are |
Token ID | 389 |
Feature activation | -2.48e+00 |
Pos logit contributions | |
offending | +7.414 |
bothering | +7.398 |
stranger | +7.386 |
occurring | +7.148 |
pictured | +7.135 |
Neg logit contributions | |
izons | -9.108 |
Opportun | -8.605 |
onential | -8.460 |
�� | -8.399 |
Flavoring | -7.954 |
Token | actually |
Token ID | 1682 |
Feature activation | -3.84e+00 |
Pos logit contributions | |
offending | +11.385 |
bothering | +11.325 |
stranger | +11.293 |
pictured | +11.118 |
occurring | +10.977 |
Neg logit contributions | |
izons | -13.309 |
Opportun | -12.417 |
onential | -12.387 |
�� | -12.307 |
Flavoring | -11.547 |
Token | employed |
Token ID | 9322 |
Feature activation | -9.47e-01 |
Pos logit contributions | |
offending | +17.733 |
bothering | +17.505 |
pictured | +17.494 |
stranger | +17.299 |
occurring | +17.255 |
Neg logit contributions | |
izons | -17.081 |
�� | -16.125 |
onential | -15.870 |
Opportun | -15.541 |
Flavoring | -14.926 |
Token | by |
Token ID | 416 |
Feature activation | -1.42e+00 |
Pos logit contributions | |
bothering | +3.811 |
offending | +3.809 |
stranger | +3.804 |
involved | +3.689 |
occurring | +3.659 |
Neg logit contributions | |
izons | -5.895 |
Opportun | -5.603 |
onential | -5.508 |
�� | -5.450 |
leaps | -5.286 |
Token | Apple |
Token ID | 4196 |
Feature activation | -6.47e-01 |
Pos logit contributions | |
offending | +5.934 |
stranger | +5.882 |
bothering | +5.847 |
occurring | +5.653 |
involved | +5.625 |
Neg logit contributions | |
izons | -8.494 |
Opportun | -8.018 |
onential | -7.922 |
�� | -7.903 |
leaps | -7.520 |
Token | . |
Token ID | 13 |
Feature activation | +2.20e-01 |
Pos logit contributions | |
bothering | +3.209 |
offending | +3.207 |
stranger | +3.203 |
involved | +3.123 |
occurring | +3.087 |
Neg logit contributions | |
izons | -3.442 |
Opportun | -3.270 |
onential | -3.179 |
�� | -3.127 |
leaps | -3.050 |
Token | He |
Token ID | 679 |
Feature activation | +1.25e+00 |
Pos logit contributions | |
Opportun | +0.959 |
izons | +0.949 |
onential | +0.875 |
Strategies | +0.867 |
leaps | +0.839 |
Neg logit contributions | |
bothering | -1.280 |
offending | -1.279 |
stranger | -1.271 |
involved | -1.250 |
occurring | -1.225 |
Token | -- |
Token ID | 1377 |
Feature activation | -1.39e+00 |
Pos logit contributions | |
bothering | +3.553 |
offending | +3.552 |
stranger | +3.549 |
involved | +3.466 |
occurring | +3.416 |
Neg logit contributions | |
izons | -3.597 |
Opportun | -3.422 |
onential | -3.316 |
�� | -3.250 |
leaps | -3.189 |
Token | even |
Token ID | 772 |
Feature activation | -1.73e+00 |
Pos logit contributions | |
offending | +6.392 |
stranger | +6.381 |
bothering | +6.369 |
occurring | +6.143 |
involved | +6.132 |
Neg logit contributions | |
izons | -7.820 |
Opportun | -7.349 |
�� | -7.294 |
onential | -7.217 |
leaps | -6.846 |
Token | if |
Token ID | 611 |
Feature activation | -1.86e+00 |
Pos logit contributions | |
stranger | +6.112 |
bothering | +6.074 |
offending | +6.065 |
occurring | +5.750 |
involved | +5.608 |
Neg logit contributions | |
izons | -11.684 |
Opportun | -11.072 |
�� | -11.069 |
onential | -10.996 |
Flavoring | -10.472 |
Token | charges |
Token ID | 4530 |
Feature activation | -1.36e+00 |
Pos logit contributions | |
stranger | +6.821 |
offending | +6.816 |
bothering | +6.741 |
occurring | +6.474 |
involved | +6.427 |
Neg logit contributions | |
izons | -12.169 |
Opportun | -11.585 |
�� | -11.455 |
onential | -11.447 |
Flavoring | -10.994 |
Token | are |
Token ID | 389 |
Feature activation | -2.80e+00 |
Pos logit contributions | |
bothering | +7.567 |
offending | +7.551 |
stranger | +7.540 |
occurring | +7.406 |
involved | +7.309 |
Neg logit contributions | |
izons | -6.392 |
Opportun | -5.922 |
�� | -5.853 |
onential | -5.740 |
Flavoring | -5.509 |
Token | never |
Token ID | 1239 |
Feature activation | -3.83e+00 |
Pos logit contributions | |
stranger | +11.265 |
bothering | +11.165 |
occurring | +10.869 |
wrong | +10.791 |
offending | +10.746 |
Neg logit contributions | |
izons | -16.073 |
�� | -15.700 |
onential | -15.460 |
Opportun | -15.214 |
Flavoring | -14.746 |
Token | filed |
Token ID | 5717 |
Feature activation | -1.29e+00 |
Pos logit contributions | |
bothering | +11.854 |
occurring | +11.575 |
stranger | +11.363 |
happening | +11.211 |
wrong | +11.170 |
Neg logit contributions | |
izons | -23.531 |
�� | -23.342 |
Flavoring | -23.175 |
onential | -23.120 |
Opportun | -22.566 |
Token | . |
Token ID | 13 |
Feature activation | +5.03e-01 |
Pos logit contributions | |
bothering | +6.230 |
offending | +6.192 |
stranger | +6.188 |
occurring | +6.037 |
involved | +5.976 |
Neg logit contributions | |
izons | -6.928 |
Opportun | -6.590 |
onential | -6.456 |
�� | -6.444 |
leaps | -6.093 |
Token |
|
Token ID | 198 |
Feature activation | -1.88e+00 |
Pos logit contributions | |
izons | +2.272 |
Opportun | +2.214 |
onential | +2.062 |
Strategies | +2.023 |
�� | +1.989 |
Neg logit contributions | |
offending | -2.868 |
bothering | -2.856 |
stranger | -2.853 |
involved | -2.764 |
occurring | -2.745 |
Token |
|
Token ID | 198 |
Feature activation | -3.25e-01 |
Pos logit contributions | |
stranger | +8.256 |
bothering | +8.097 |
occurring | +8.078 |
someone | +8.054 |
involved | +8.033 |
Neg logit contributions | |
�� | -10.440 |
izons | -10.265 |
onential | -9.634 |
Opportun | -9.466 |
tremend | -9.462 |
Token | An |
Token ID | 2025 |
Feature activation | +2.75e-01 |
Pos logit contributions | |
offending | +1.484 |
bothering | +1.468 |
stranger | +1.461 |
occurring | +1.405 |
involved | +1.395 |
Neg logit contributions | |
izons | -1.889 |
Opportun | -1.798 |
onential | -1.748 |
�� | -1.686 |
Strategies | -1.677 |
Token |
|
Token ID | 198 |
Feature activation | -1.73e+00 |
Pos logit contributions | |
Opportun | +1.599 |
izons | +1.569 |
Strategies | +1.437 |
onential | +1.425 |
leaps | +1.377 |
Neg logit contributions | |
offending | -2.399 |
bothering | -2.365 |
stranger | -2.352 |
involved | -2.318 |
occurring | -2.273 |
Token |
|
Token ID | 198 |
Feature activation | -1.13e-01 |
Pos logit contributions | |
stranger | +7.848 |
bothering | +7.768 |
occurring | +7.696 |
involved | +7.669 |
someone | +7.621 |
Neg logit contributions | |
�� | -9.377 |
izons | -9.368 |
onential | -8.726 |
Opportun | -8.595 |
tremend | -8.362 |
Token | That |
Token ID | 2504 |
Feature activation | -9.22e-02 |
Pos logit contributions | |
offending | +0.567 |
bothering | +0.557 |
stranger | +0.553 |
occurring | +0.536 |
involved | +0.533 |
Neg logit contributions | |
izons | -0.602 |
Opportun | -0.575 |
onential | -0.554 |
leaps | -0.536 |
Strategies | -0.532 |
Token | wasn |
Token ID | 2492 |
Feature activation | -6.98e-01 |
Pos logit contributions | |
offending | +0.375 |
bothering | +0.373 |
stranger | +0.372 |
involved | +0.367 |
occurring | +0.354 |
Neg logit contributions | |
izons | -0.563 |
Opportun | -0.549 |
onential | -0.529 |
leaps | -0.521 |
�� | -0.514 |
Token | � |
Token ID | 447 |
Feature activation | -4.31e-01 |
Pos logit contributions | |
bothering | +3.851 |
stranger | +3.846 |
offending | +3.835 |
involved | +3.759 |
occurring | +3.727 |
Neg logit contributions | |
izons | -3.321 |
Opportun | -3.129 |
onential | -3.042 |
�� | -3.005 |
leaps | -2.889 |
Token | � |
Token ID | 247 |
Feature activation | -3.82e+00 |
Pos logit contributions | |
offending | +3.030 |
bothering | +3.030 |
stranger | +3.030 |
involved | +2.971 |
occurring | +2.952 |
Neg logit contributions | |
izons | -1.402 |
Opportun | -1.293 |
onential | -1.226 |
�� | -1.179 |
leaps | -1.140 |
Token | t |
Token ID | 83 |
Feature activation | -3.45e+00 |
Pos logit contributions | |
pictured | +13.864 |
stranger | +13.786 |
happening | +13.697 |
occurring | +13.616 |
involved | +13.580 |
Neg logit contributions | |
izons | -19.297 |
�� | -19.199 |
Opportun | -18.586 |
onential | -18.060 |
Flavoring | -17.282 |
Token | all |
Token ID | 477 |
Feature activation | -1.86e+00 |
Pos logit contributions | |
bothering | +15.078 |
stranger | +15.039 |
happening | +14.917 |
occurring | +14.582 |
wrong | +14.498 |
Neg logit contributions | |
izons | -16.744 |
�� | -16.008 |
onential | -15.661 |
Opportun | -15.304 |
Flavoring | -14.691 |
Token | for |
Token ID | 329 |
Feature activation | -7.72e-01 |
Pos logit contributions | |
stranger | +8.815 |
bothering | +8.720 |
offending | +8.616 |
occurring | +8.404 |
happening | +8.315 |
Neg logit contributions | |
izons | -10.103 |
onential | -9.411 |
�� | -9.397 |
Opportun | -9.360 |
Flavoring | -8.698 |
Token | Batt |
Token ID | 12350 |
Feature activation | -1.40e+00 |
Pos logit contributions | |
bothering | +3.601 |
stranger | +3.597 |
offending | +3.585 |
involved | +3.472 |
occurring | +3.452 |
Neg logit contributions | |
izons | -4.335 |
Opportun | -4.115 |
onential | -4.032 |
�� | -3.981 |
leaps | -3.859 |
Token | leground |
Token ID | 28272 |
Feature activation | -1.46e+00 |
Pos logit contributions | |
stranger | +6.304 |
involved | +6.107 |
bothering | +6.080 |
offending | +6.032 |
occurring | +5.926 |
Neg logit contributions | |
izons | -7.582 |
�� | -7.540 |
Opportun | -7.426 |
onential | -7.181 |
leaps | -6.954 |
Token | to |
Token ID | 284 |
Feature activation | +1.03e-01 |
Pos logit contributions | |
izons | +0.043 |
Opportun | +0.041 |
onential | +0.040 |
�� | +0.039 |
leaps | +0.039 |
Neg logit contributions | |
stranger | -0.038 |
bothering | -0.038 |
offending | -0.038 |
involved | -0.037 |
occurring | -0.036 |
Token | a |
Token ID | 257 |
Feature activation | +3.14e-02 |
Pos logit contributions | |
izons | +0.610 |
Opportun | +0.593 |
onential | +0.566 |
leaps | +0.557 |
Strategies | +0.552 |
Neg logit contributions | |
bothering | -0.439 |
offending | -0.434 |
stranger | -0.434 |
involved | -0.427 |
occurring | -0.415 |
Token | pdf |
Token ID | 37124 |
Feature activation | -1.46e+00 |
Pos logit contributions | |
izons | +0.172 |
Opportun | +0.166 |
onential | +0.159 |
leaps | +0.155 |
Strategies | +0.155 |
Neg logit contributions | |
bothering | -0.147 |
offending | -0.146 |
stranger | -0.145 |
involved | -0.144 |
occurring | -0.140 |
Token | that |
Token ID | 326 |
Feature activation | -5.64e-01 |
Pos logit contributions | |
offending | +6.315 |
stranger | +6.304 |
bothering | +6.285 |
involved | +6.074 |
occurring | +6.069 |
Neg logit contributions | |
izons | -8.602 |
Opportun | -8.137 |
onential | -7.955 |
�� | -7.874 |
leaps | -7.620 |
Token | is |
Token ID | 318 |
Feature activation | -1.48e+00 |
Pos logit contributions | |
bothering | +2.773 |
stranger | +2.763 |
offending | +2.762 |
involved | +2.686 |
occurring | +2.663 |
Neg logit contributions | |
izons | -3.032 |
Opportun | -2.901 |
onential | -2.788 |
�� | -2.734 |
leaps | -2.719 |
Token | NOT |
Token ID | 5626 |
Feature activation | -3.82e+00 |
Pos logit contributions | |
bothering | +6.744 |
offending | +6.717 |
stranger | +6.711 |
occurring | +6.465 |
pictured | +6.404 |
Neg logit contributions | |
izons | -8.468 |
Opportun | -8.001 |
onential | -7.887 |
�� | -7.821 |
Strategies | -7.429 |
Token | at |
Token ID | 379 |
Feature activation | -8.03e-01 |
Pos logit contributions | |
bothering | +17.585 |
offending | +17.459 |
pictured | +17.328 |
stranger | +17.230 |
occurring | +17.226 |
Neg logit contributions | |
izons | -17.222 |
�� | -16.097 |
Opportun | -15.975 |
onential | -15.962 |
Flavoring | -14.777 |
Token | Wikileaks |
Token ID | 35828 |
Feature activation | -1.44e+00 |
Pos logit contributions | |
offending | +3.872 |
stranger | +3.860 |
bothering | +3.858 |
involved | +3.751 |
occurring | +3.699 |
Neg logit contributions | |
izons | -4.376 |
Opportun | -4.160 |
onential | -4.036 |
�� | -3.972 |
leaps | -3.866 |
Token | . |
Token ID | 13 |
Feature activation | +2.65e-01 |
Pos logit contributions | |
offending | +6.949 |
bothering | +6.903 |
stranger | +6.857 |
involved | +6.697 |
occurring | +6.673 |
Neg logit contributions | |
izons | -7.600 |
Opportun | -7.150 |
onential | -7.071 |
�� | -7.048 |
leaps | -6.652 |
Token | It |
Token ID | 632 |
Feature activation | +7.39e-01 |
Pos logit contributions | |
izons | +1.290 |
Opportun | +1.235 |
onential | +1.169 |
�� | +1.139 |
Strategies | +1.132 |
Neg logit contributions | |
bothering | -1.420 |
offending | -1.418 |
stranger | -1.411 |
occurring | -1.361 |
involved | -1.359 |
Token | is |
Token ID | 318 |
Feature activation | -9.28e-01 |
Pos logit contributions | |
izons | +3.906 |
Opportun | +3.687 |
leaps | +3.627 |
onential | +3.506 |
strides | +3.456 |
Neg logit contributions | |
stranger | -3.515 |
bothering | -3.490 |
offending | -3.418 |
involved | -3.340 |
occurring | -3.305 |
Token | she |
Token ID | 673 |
Feature activation | -7.09e-01 |
Pos logit contributions | |
offending | +7.651 |
bothering | +7.634 |
stranger | +7.630 |
occurring | +7.332 |
involved | +7.248 |
Neg logit contributions | |
izons | -10.135 |
Opportun | -9.551 |
onential | -9.440 |
�� | -9.305 |
Flavoring | -8.939 |
Token | had |
Token ID | 550 |
Feature activation | -9.67e-01 |
Pos logit contributions | |
bothering | +3.027 |
offending | +3.018 |
stranger | +3.014 |
involved | +2.944 |
occurring | +2.883 |
Neg logit contributions | |
izons | -4.254 |
Opportun | -4.074 |
onential | -3.953 |
�� | -3.883 |
leaps | -3.865 |
Token | reported |
Token ID | 2098 |
Feature activation | -1.67e+00 |
Pos logit contributions | |
bothering | +3.914 |
offending | +3.911 |
stranger | +3.906 |
involved | +3.786 |
occurring | +3.730 |
Neg logit contributions | |
izons | -6.029 |
Opportun | -5.776 |
onential | -5.635 |
�� | -5.554 |
leaps | -5.445 |
Token | it |
Token ID | 340 |
Feature activation | -1.91e+00 |
Pos logit contributions | |
offending | +6.948 |
stranger | +6.920 |
bothering | +6.918 |
occurring | +6.661 |
involved | +6.479 |
Neg logit contributions | |
izons | -10.233 |
Opportun | -9.649 |
onential | -9.581 |
�� | -9.558 |
Flavoring | -9.103 |
Token | wasn |
Token ID | 2492 |
Feature activation | -2.33e+00 |
Pos logit contributions | |
bothering | +8.267 |
offending | +8.207 |
stranger | +8.190 |
occurring | +8.063 |
happening | +7.837 |
Neg logit contributions | |
izons | -11.108 |
Opportun | -10.506 |
onential | -10.487 |
�� | -10.394 |
Flavoring | -10.002 |
Token | 't |
Token ID | 470 |
Feature activation | -3.82e+00 |
Pos logit contributions | |
someone | +9.368 |
occurring | +9.335 |
wrong | +9.301 |
bothering | +9.265 |
stranger | +9.253 |
Neg logit contributions | |
�� | -13.514 |
izons | -13.233 |
tremend | -12.796 |
onential | -12.306 |
Opportun | -12.244 |
Token | . |
Token ID | 13 |
Feature activation | -4.28e-01 |
Pos logit contributions | |
bothering | +15.598 |
occurring | +15.354 |
happening | +15.152 |
offending | +14.863 |
stranger | +14.835 |
Neg logit contributions | |
izons | -20.373 |
onential | -18.893 |
�� | -18.874 |
Opportun | -18.321 |
Flavoring | -18.133 |
Token | He |
Token ID | 679 |
Feature activation | +1.26e+00 |
Pos logit contributions | |
bothering | +2.409 |
offending | +2.407 |
stranger | +2.405 |
involved | +2.352 |
occurring | +2.327 |
Neg logit contributions | |
izons | -1.998 |
Opportun | -1.886 |
onential | -1.824 |
�� | -1.788 |
leaps | -1.739 |
Token | met |
Token ID | 1138 |
Feature activation | -1.01e+00 |
Pos logit contributions | |
izons | +6.592 |
leaps | +6.257 |
Opportun | +6.214 |
strides | +6.072 |
economies | +6.010 |
Neg logit contributions | |
bothering | -5.611 |
involved | -5.549 |
stranger | -5.485 |
offending | -5.424 |
occurring | -5.222 |
Token | the |
Token ID | 262 |
Feature activation | -1.46e+00 |
Pos logit contributions | |
stranger | +4.312 |
offending | +4.288 |
bothering | +4.281 |
involved | +4.106 |
occurring | +4.100 |
Neg logit contributions | |
izons | -6.107 |
Opportun | -5.826 |
onential | -5.692 |
�� | -5.651 |
leaps | -5.485 |
Token | woman |
Token ID | 2415 |
Feature activation | -2.53e+00 |
Pos logit contributions | |
stranger | +6.159 |
offending | +6.155 |
bothering | +6.046 |
occurring | +5.809 |
involved | +5.804 |
Neg logit contributions | |
izons | -8.821 |
Opportun | -8.405 |
onential | -8.283 |
�� | -8.228 |
leaps | -7.902 |
Token | it |
Token ID | 340 |
Feature activation | -3.66e-01 |
Pos logit contributions | |
bothering | +6.895 |
stranger | +6.875 |
offending | +6.861 |
occurring | +6.555 |
involved | +6.465 |
Neg logit contributions | |
izons | -10.318 |
Opportun | -9.755 |
�� | -9.744 |
onential | -9.657 |
Flavoring | -9.173 |
Token | � |
Token ID | 447 |
Feature activation | +8.78e-01 |
Pos logit contributions | |
stranger | +1.874 |
offending | +1.874 |
bothering | +1.873 |
involved | +1.832 |
occurring | +1.803 |
Neg logit contributions | |
izons | -1.890 |
Opportun | -1.796 |
onential | -1.727 |
�� | -1.684 |
leaps | -1.682 |
Token | � |
Token ID | 247 |
Feature activation | -2.67e+00 |
Pos logit contributions | |
izons | +1.896 |
Opportun | +1.602 |
�� | +1.583 |
onential | +1.547 |
leaps | +1.413 |
Neg logit contributions | |
offending | -6.961 |
bothering | -6.905 |
stranger | -6.864 |
involved | -6.856 |
occurring | -6.729 |
Token | s |
Token ID | 82 |
Feature activation | -2.32e+00 |
Pos logit contributions | |
involved | +8.217 |
offending | +8.161 |
pictured | +8.114 |
occurring | +8.077 |
stranger | +8.010 |
Neg logit contributions | |
�� | -16.696 |
izons | -16.404 |
Opportun | -16.257 |
onential | -15.881 |
Flavoring | -15.615 |
Token | � |
Token ID | 564 |
Feature activation | +2.74e-01 |
Pos logit contributions | |
bothering | +9.856 |
offending | +9.522 |
stranger | +9.479 |
occurring | +9.371 |
happening | +9.223 |
Neg logit contributions | |
izons | -13.494 |
�� | -13.052 |
Opportun | -12.750 |
onential | -12.703 |
Flavoring | -12.074 |
Token | � |
Token ID | 250 |
Feature activation | -3.80e+00 |
Pos logit contributions | |
izons | +0.867 |
Opportun | +0.785 |
�� | +0.771 |
onential | +0.759 |
leaps | +0.701 |
Neg logit contributions | |
offending | -1.938 |
stranger | -1.932 |
bothering | -1.929 |
involved | -1.906 |
occurring | -1.875 |
Token | on |
Token ID | 261 |
Feature activation | -5.84e-01 |
Pos logit contributions | |
involved | +17.953 |
bothering | +17.374 |
occurring | +17.090 |
happening | +16.971 |
stranger | +16.932 |
Neg logit contributions | |
�� | -16.152 |
izons | -16.013 |
Opportun | -15.277 |
Flavoring | -15.063 |
onential | -14.839 |
Token | . |
Token ID | 13 |
Feature activation | +9.93e-02 |
Pos logit contributions | |
bothering | +2.891 |
offending | +2.888 |
stranger | +2.884 |
involved | +2.811 |
occurring | +2.781 |
Neg logit contributions | |
izons | -3.110 |
Opportun | -2.956 |
onential | -2.873 |
�� | -2.829 |
leaps | -2.756 |
Token | � |
Token ID | 447 |
Feature activation | +1.59e+00 |
Pos logit contributions | |
izons | +0.429 |
Opportun | +0.416 |
onential | +0.389 |
Strategies | +0.376 |
�� | +0.374 |
Neg logit contributions | |
bothering | -0.587 |
offending | -0.586 |
stranger | -0.583 |
involved | -0.573 |
occurring | -0.565 |
Token | � |
Token ID | 251 |
Feature activation | -1.29e+00 |
Pos logit contributions | |
izons | +4.048 |
�� | +3.863 |
Opportun | +3.789 |
onential | +3.446 |
Strategies | +3.253 |
Neg logit contributions | |
offending | -11.262 |
stranger | -11.214 |
bothering | -11.184 |
involved | -11.183 |
occurring | -10.876 |
Token | If |
Token ID | 1002 |
Feature activation | -1.60e+00 |
Pos logit contributions | |
stranger | +7.336 |
bothering | +7.331 |
offending | +7.310 |
occurring | +7.101 |
involved | +7.100 |
Neg logit contributions | |
izons | -5.852 |
Opportun | -5.332 |
�� | -5.331 |
onential | -5.318 |
leaps | -5.019 |
Token | saw |
Token ID | 2497 |
Feature activation | -3.76e-01 |
Pos logit contributions | |
izons | +3.665 |
Opportun | +3.572 |
onential | +3.409 |
leaps | +3.389 |
economies | +3.366 |
Neg logit contributions | |
stranger | -2.587 |
offending | -2.584 |
bothering | -2.552 |
involved | -2.494 |
occurring | -2.414 |
Token | she |
Token ID | 673 |
Feature activation | -9.31e-01 |
Pos logit contributions | |
bothering | +1.428 |
offending | +1.424 |
stranger | +1.421 |
involved | +1.384 |
occurring | +1.351 |
Neg logit contributions | |
izons | -2.424 |
Opportun | -2.361 |
onential | -2.266 |
�� | -2.224 |
leaps | -2.213 |
Token | wasn |
Token ID | 2492 |
Feature activation | -1.78e+00 |
Pos logit contributions | |
bothering | +3.696 |
offending | +3.694 |
stranger | +3.689 |
involved | +3.574 |
occurring | +3.519 |
Neg logit contributions | |
izons | -5.872 |
Opportun | -5.630 |
onential | -5.493 |
�� | -5.415 |
leaps | -5.313 |
Token | � |
Token ID | 447 |
Feature activation | +9.44e-02 |
Pos logit contributions | |
bothering | +8.361 |
stranger | +8.256 |
occurring | +8.209 |
involved | +8.193 |
offending | +8.051 |
Neg logit contributions | |
izons | -9.398 |
�� | -9.104 |
Opportun | -8.784 |
onential | -8.744 |
leaps | -8.209 |
Token | � |
Token ID | 247 |
Feature activation | -3.00e+00 |
Pos logit contributions | |
izons | +0.290 |
Opportun | +0.264 |
onential | +0.253 |
�� | +0.252 |
leaps | +0.234 |
Neg logit contributions | |
bothering | -0.678 |
offending | -0.677 |
stranger | -0.676 |
involved | -0.668 |
occurring | -0.658 |
Token | t |
Token ID | 83 |
Feature activation | -3.79e+00 |
Pos logit contributions | |
bothering | +11.516 |
offending | +11.468 |
involved | +11.258 |
stranger | +11.227 |
occurring | +11.122 |
Neg logit contributions | |
izons | -16.643 |
Opportun | -16.008 |
�� | -15.710 |
onential | -15.708 |
leaps | -15.191 |
Token | going |
Token ID | 1016 |
Feature activation | -7.74e-01 |
Pos logit contributions | |
bothering | +17.232 |
offending | +16.771 |
occurring | +15.900 |
wrong | +15.879 |
pictured | +15.860 |
Neg logit contributions | |
izons | -18.102 |
onential | -17.779 |
Opportun | -17.065 |
�� | -16.576 |
Flavoring | -16.056 |
Token | to |
Token ID | 284 |
Feature activation | -1.05e+00 |
Pos logit contributions | |
bothering | +3.606 |
offending | +3.605 |
stranger | +3.596 |
involved | +3.486 |
occurring | +3.453 |
Neg logit contributions | |
izons | -4.350 |
Opportun | -4.147 |
onential | -4.055 |
�� | -3.983 |
leaps | -3.890 |
Token | go |
Token ID | 467 |
Feature activation | -6.63e-01 |
Pos logit contributions | |
bothering | +5.516 |
offending | +5.508 |
stranger | +5.489 |
involved | +5.339 |
occurring | +5.302 |
Neg logit contributions | |
izons | -5.320 |
Opportun | -5.021 |
onential | -4.909 |
�� | -4.815 |
leaps | -4.669 |
Token | , |
Token ID | 11 |
Feature activation | -1.65e-01 |
Pos logit contributions | |
bothering | +3.093 |
offending | +3.091 |
stranger | +3.089 |
involved | +3.014 |
occurring | +2.971 |
Neg logit contributions | |
izons | -3.718 |
Opportun | -3.548 |
onential | -3.442 |
�� | -3.385 |
leaps | -3.320 |
Token | they |
Token ID | 484 |
Feature activation | +1.13e+00 |
Pos logit contributions | |
offending | +0.582 |
bothering | +0.581 |
stranger | +0.578 |
involved | +0.562 |
occurring | +0.551 |
Neg logit contributions | |
izons | -1.103 |
Opportun | -1.069 |
onential | -1.033 |
�� | -1.014 |
leaps | -1.008 |
Token | email |
Token ID | 3053 |
Feature activation | -1.64e+00 |
Pos logit contributions | |
offending | +4.070 |
bothering | +4.066 |
stranger | +4.056 |
involved | +3.929 |
occurring | +3.886 |
Neg logit contributions | |
izons | -5.343 |
Opportun | -5.081 |
onential | -4.966 |
�� | -4.910 |
leaps | -4.764 |
Token | server |
Token ID | 4382 |
Feature activation | -1.28e+00 |
Pos logit contributions | |
offending | +7.151 |
bothering | +7.137 |
stranger | +7.093 |
occurring | +6.798 |
involved | +6.738 |
Neg logit contributions | |
izons | -9.630 |
Opportun | -9.076 |
�� | -8.962 |
onential | -8.935 |
leaps | -8.460 |
Token | after |
Token ID | 706 |
Feature activation | -1.27e+00 |
Pos logit contributions | |
bothering | +6.260 |
offending | +6.247 |
stranger | +6.217 |
involved | +6.048 |
occurring | +6.021 |
Neg logit contributions | |
izons | -6.867 |
Opportun | -6.493 |
onential | -6.344 |
�� | -6.268 |
leaps | -6.025 |
Token | it |
Token ID | 340 |
Feature activation | +1.34e-01 |
Pos logit contributions | |
bothering | +5.434 |
offending | +5.420 |
stranger | +5.373 |
involved | +5.221 |
occurring | +5.196 |
Neg logit contributions | |
izons | -7.583 |
Opportun | -7.190 |
onential | -7.072 |
�� | -6.972 |
leaps | -6.702 |
Token | was |
Token ID | 373 |
Feature activation | -2.20e+00 |
Pos logit contributions | |
izons | +0.825 |
Opportun | +0.803 |
leaps | +0.768 |
onential | +0.768 |
�� | +0.755 |
Neg logit contributions | |
bothering | -0.523 |
stranger | -0.523 |
offending | -0.513 |
involved | -0.492 |
occurring | -0.485 |
Token | supposedly |
Token ID | 13519 |
Feature activation | -3.79e+00 |
Pos logit contributions | |
bothering | +7.539 |
stranger | +7.524 |
offending | +7.510 |
involved | +7.282 |
occurring | +7.251 |
Neg logit contributions | |
izons | -14.488 |
Opportun | -13.789 |
onential | -13.707 |
�� | -13.668 |
Flavoring | -13.143 |
Token | hacked |
Token ID | 19957 |
Feature activation | -5.35e-02 |
Pos logit contributions | |
bothering | +13.903 |
wrong | +13.803 |
offending | +13.749 |
stranger | +13.552 |
occurring | +13.412 |
Neg logit contributions | |
izons | -20.929 |
�� | -20.580 |
onential | -20.049 |
Opportun | -19.239 |
Flavoring | -19.185 |
Token | . |
Token ID | 13 |
Feature activation | +2.74e-01 |
Pos logit contributions | |
stranger | +0.265 |
offending | +0.264 |
bothering | +0.263 |
involved | +0.261 |
occurring | +0.252 |
Neg logit contributions | |
izons | -0.280 |
Opportun | -0.269 |
onential | -0.259 |
leaps | -0.253 |
�� | -0.251 |
Token | Instead |
Token ID | 5455 |
Feature activation | -1.53e-01 |
Pos logit contributions | |
Opportun | +1.160 |
izons | +1.151 |
Strategies | +1.052 |
onential | +1.039 |
�� | +1.003 |
Neg logit contributions | |
offending | -1.629 |
stranger | -1.614 |
bothering | -1.611 |
involved | -1.577 |
occurring | -1.547 |
Token | they |
Token ID | 484 |
Feature activation | +3.93e-01 |
Pos logit contributions | |
bothering | +0.744 |
offending | +0.744 |
stranger | +0.743 |
involved | +0.724 |
occurring | +0.714 |
Neg logit contributions | |
izons | -0.823 |
Opportun | -0.784 |
onential | -0.760 |
�� | -0.747 |
leaps | -0.731 |
Token | just |
Token ID | 655 |
Feature activation | -5.46e-01 |
Pos logit contributions | |
izons | +2.411 |
Opportun | +2.320 |
onential | +2.249 |
leaps | +2.215 |
�� | +2.183 |
Neg logit contributions | |
offending | -1.555 |
stranger | -1.547 |
bothering | -1.537 |
involved | -1.494 |
occurring | -1.457 |
Token | Outside |
Token ID | 30815 |
Feature activation | +3.80e-02 |
Pos logit contributions | |
izons | +0.170 |
Opportun | +0.166 |
onential | +0.156 |
Strategies | +0.152 |
leaps | +0.149 |
Neg logit contributions | |
offending | -0.164 |
bothering | -0.161 |
stranger | -0.160 |
occurring | -0.155 |
involved | -0.155 |
Token | the |
Token ID | 262 |
Feature activation | -4.01e-03 |
Pos logit contributions | |
izons | +0.220 |
Opportun | +0.212 |
onential | +0.204 |
leaps | +0.199 |
economies | +0.198 |
Neg logit contributions | |
bothering | -0.170 |
offending | -0.169 |
stranger | -0.168 |
involved | -0.165 |
occurring | -0.162 |
Token | European |
Token ID | 3427 |
Feature activation | +9.40e-02 |
Pos logit contributions | |
bothering | +0.025 |
offending | +0.025 |
stranger | +0.025 |
involved | +0.025 |
occurring | +0.024 |
Neg logit contributions | |
izons | -0.016 |
Opportun | -0.015 |
onential | -0.014 |
�� | -0.014 |
leaps | -0.014 |
Token | Union |
Token ID | 4479 |
Feature activation | +3.71e-01 |
Pos logit contributions | |
izons | +0.370 |
Opportun | +0.354 |
onential | +0.333 |
leaps | +0.320 |
�� | +0.317 |
Neg logit contributions | |
bothering | -0.593 |
offending | -0.593 |
stranger | -0.588 |
involved | -0.583 |
occurring | -0.574 |
Token | , |
Token ID | 11 |
Feature activation | +1.02e-01 |
Pos logit contributions | |
izons | +2.164 |
Opportun | +2.091 |
economies | +1.999 |
onential | +1.983 |
leaps | +1.971 |
Neg logit contributions | |
bothering | -1.599 |
offending | -1.592 |
stranger | -1.577 |
involved | -1.565 |
pictured | -1.501 |
Token | there |
Token ID | 612 |
Feature activation | +2.00e+00 |
Pos logit contributions | |
izons | +0.648 |
Opportun | +0.629 |
onential | +0.600 |
economies | +0.600 |
leaps | +0.595 |
Neg logit contributions | |
bothering | -0.394 |
offending | -0.390 |
involved | -0.390 |
stranger | -0.389 |
occurring | -0.369 |
Token | are |
Token ID | 389 |
Feature activation | +6.79e-01 |
Pos logit contributions | |
izons | +9.182 |
Opportun | +8.685 |
leaps | +8.557 |
economies | +8.391 |
onential | +8.209 |
Neg logit contributions | |
offending | -9.573 |
involved | -9.558 |
bothering | -9.557 |
stranger | -9.468 |
REDACTED | -8.895 |
Token | nearly |
Token ID | 3016 |
Feature activation | -3.94e-03 |
Pos logit contributions | |
izons | +3.559 |
Opportun | +3.441 |
economies | +3.312 |
leaps | +3.291 |
onential | +3.244 |
Neg logit contributions | |
involved | -3.291 |
bothering | -3.252 |
offending | -3.230 |
stranger | -3.214 |
pictured | -3.082 |
Token | 50 |
Token ID | 2026 |
Feature activation | +1.59e+00 |
Pos logit contributions | |
bothering | +0.023 |
offending | +0.023 |
involved | +0.023 |
stranger | +0.023 |
occurring | +0.022 |
Neg logit contributions | |
izons | -0.017 |
Opportun | -0.016 |
onential | -0.016 |
leaps | -0.015 |
�� | -0.015 |
Token | trade |
Token ID | 3292 |
Feature activation | +4.81e-01 |
Pos logit contributions | |
izons | +7.060 |
Opportun | +6.807 |
economies | +6.654 |
Strategies | +6.275 |
onential | +6.183 |
Neg logit contributions | |
bothering | -8.298 |
involved | -8.289 |
offending | -8.199 |
stranger | -8.157 |
pictured | -7.923 |
Token | agreements |
Token ID | 11704 |
Feature activation | +2.14e-01 |
Pos logit contributions | |
izons | +1.875 |
Opportun | +1.821 |
onential | +1.664 |
economies | +1.660 |
leaps | +1.645 |
Neg logit contributions | |
bothering | -3.009 |
stranger | -2.998 |
offending | -2.975 |
involved | -2.971 |
occurring | -2.887 |
Token | the |
Token ID | 262 |
Feature activation | +1.00e+00 |
Pos logit contributions | |
Opportun | +10.182 |
izons | +10.091 |
leaps | +9.729 |
onential | +9.683 |
economies | +9.470 |
Neg logit contributions | |
bothering | -8.086 |
stranger | -8.000 |
offending | -7.871 |
occurring | -7.557 |
involved | -7.557 |
Token | past |
Token ID | 1613 |
Feature activation | +1.14e+00 |
Pos logit contributions | |
izons | +5.025 |
Opportun | +4.904 |
leaps | +4.696 |
onential | +4.584 |
summers | +4.566 |
Neg logit contributions | |
involved | -4.914 |
bothering | -4.864 |
stranger | -4.724 |
offending | -4.716 |
occurring | -4.660 |
Token | decade |
Token ID | 5707 |
Feature activation | +8.17e-01 |
Pos logit contributions | |
izons | +4.944 |
Opportun | +4.793 |
leaps | +4.628 |
summers | +4.489 |
strides | +4.470 |
Neg logit contributions | |
bothering | -6.202 |
involved | -6.163 |
offending | -6.050 |
stranger | -5.926 |
occurring | -5.853 |
Token | , |
Token ID | 11 |
Feature activation | +5.69e-01 |
Pos logit contributions | |
izons | +4.135 |
Opportun | +3.982 |
onential | +3.801 |
leaps | +3.731 |
�� | +3.719 |
Neg logit contributions | |
bothering | -4.089 |
offending | -4.051 |
stranger | -4.030 |
involved | -3.981 |
occurring | -3.864 |
Token | Canada |
Token ID | 3340 |
Feature activation | +1.56e+00 |
Pos logit contributions | |
izons | +3.302 |
Opportun | +3.217 |
onential | +3.079 |
leaps | +3.053 |
economies | +3.023 |
Neg logit contributions | |
bothering | -2.429 |
involved | -2.398 |
offending | -2.371 |
stranger | -2.363 |
occurring | -2.256 |
Token | has |
Token ID | 468 |
Feature activation | +2.31e+00 |
Pos logit contributions | |
izons | +7.465 |
Opportun | +7.207 |
leaps | +6.813 |
onential | +6.759 |
Strategies | +6.752 |
Neg logit contributions | |
bothering | -7.805 |
involved | -7.718 |
offending | -7.697 |
stranger | -7.567 |
occurring | -7.234 |
Token | sh |
Token ID | 427 |
Feature activation | +2.09e+00 |
Pos logit contributions | |
izons | +13.459 |
leaps | +13.224 |
Opportun | +13.164 |
strides | +12.955 |
economies | +12.934 |
Neg logit contributions | |
bothering | -7.161 |
involved | -7.041 |
offending | -6.776 |
stranger | -6.678 |
REDACTED | -6.458 |
Token | ir |
Token ID | 343 |
Feature activation | +9.27e-01 |
Pos logit contributions | |
izons | +7.896 |
Opportun | +7.005 |
onential | +6.893 |
ights | +6.517 |
Strategies | +6.430 |
Neg logit contributions | |
bothering | -12.244 |
offending | -11.844 |
stranger | -11.488 |
occurring | -11.376 |
REDACTED | -11.223 |
Token | ked |
Token ID | 9091 |
Feature activation | +8.80e-01 |
Pos logit contributions | |
izons | +3.021 |
Opportun | +2.624 |
onential | +2.497 |
leaps | +2.433 |
economies | +2.284 |
Neg logit contributions | |
bothering | -6.458 |
stranger | -6.429 |
offending | -6.382 |
occurring | -6.348 |
involved | -6.196 |
Token | several |
Token ID | 1811 |
Feature activation | +9.47e-01 |
Pos logit contributions | |
izons | +4.896 |
Opportun | +4.785 |
leaps | +4.595 |
onential | +4.511 |
economies | +4.493 |
Neg logit contributions | |
involved | -3.776 |
stranger | -3.726 |
bothering | -3.698 |
offending | -3.617 |
occurring | -3.563 |
Token | of |
Token ID | 286 |
Feature activation | +1.13e+00 |
Pos logit contributions | |
izons | +4.829 |
Opportun | +4.763 |
leaps | +4.542 |
economies | +4.479 |
strides | +4.459 |
Neg logit contributions | |
involved | -4.511 |
bothering | -4.464 |
stranger | -4.379 |
offending | -4.325 |
occurring | -4.234 |
Token | ben |
Token ID | 11722 |
Feature activation | +1.81e-01 |
Pos logit contributions | |
bothering | +3.593 |
stranger | +3.593 |
offending | +3.588 |
involved | +3.527 |
occurring | +3.494 |
Neg logit contributions | |
izons | -1.965 |
Opportun | -1.830 |
onential | -1.745 |
�� | -1.714 |
leaps | -1.639 |
Token | . |
Token ID | 13 |
Feature activation | +6.13e-01 |
Pos logit contributions | |
izons | +0.940 |
Opportun | +0.904 |
onential | +0.871 |
leaps | +0.849 |
�� | +0.843 |
Neg logit contributions | |
stranger | -0.904 |
offending | -0.904 |
bothering | -0.899 |
involved | -0.889 |
occurring | -0.867 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | -2.64e-01 |
Pos logit contributions | |
Opportun | +2.569 |
izons | +2.484 |
Strategies | +2.288 |
onential | +2.230 |
Areas | +2.174 |
Neg logit contributions | |
offending | -3.720 |
bothering | -3.664 |
stranger | -3.643 |
involved | -3.626 |
occurring | -3.544 |
Token | � |
Token ID | 447 |
Feature activation | +1.53e+00 |
Pos logit contributions | |
offending | +1.280 |
bothering | +1.268 |
stranger | +1.262 |
occurring | +1.215 |
involved | +1.214 |
Neg logit contributions | |
izons | -1.454 |
Opportun | -1.380 |
onential | -1.343 |
�� | -1.305 |
leaps | -1.290 |
Token | � |
Token ID | 250 |
Feature activation | -1.03e+00 |
Pos logit contributions | |
Opportun | +3.877 |
izons | +3.872 |
�� | +3.768 |
onential | +3.541 |
Strategies | +3.221 |
Neg logit contributions | |
bothering | -10.792 |
offending | -10.756 |
involved | -10.687 |
stranger | -10.662 |
pictured | -10.318 |
Token | We |
Token ID | 1135 |
Feature activation | +2.81e+00 |
Pos logit contributions | |
stranger | +5.850 |
bothering | +5.840 |
offending | +5.832 |
involved | +5.698 |
occurring | +5.661 |
Neg logit contributions | |
izons | -4.692 |
Opportun | -4.352 |
onential | -4.247 |
�� | -4.188 |
leaps | -4.063 |
Token | � |
Token ID | 447 |
Feature activation | +3.95e-01 |
Pos logit contributions | |
izons | +13.843 |
Opportun | +13.654 |
ights | +13.247 |
onential | +13.071 |
ighed | +12.855 |
Neg logit contributions | |
offending | -10.725 |
stranger | -10.515 |
bothering | -10.473 |
involved | -10.250 |
occurring | -9.900 |
Token | � |
Token ID | 247 |
Feature activation | -4.99e-01 |
Pos logit contributions | |
izons | +1.096 |
Opportun | +1.004 |
�� | +0.978 |
onential | +0.960 |
leaps | +0.864 |
Neg logit contributions | |
offending | -2.939 |
bothering | -2.938 |
stranger | -2.934 |
involved | -2.885 |
occurring | -2.848 |
Token | re |
Token ID | 260 |
Feature activation | +1.12e+00 |
Pos logit contributions | |
bothering | +2.838 |
offending | +2.837 |
stranger | +2.835 |
involved | +2.775 |
occurring | +2.745 |
Neg logit contributions | |
izons | -2.292 |
Opportun | -2.159 |
onential | -2.088 |
�� | -2.047 |
leaps | -1.991 |
Token | not |
Token ID | 407 |
Feature activation | -5.35e-01 |
Pos logit contributions | |
izons | +6.771 |
Opportun | +6.640 |
onential | +6.334 |
leaps | +6.311 |
Strategies | +6.238 |
Neg logit contributions | |
offending | -4.062 |
stranger | -4.038 |
bothering | -3.978 |
involved | -3.890 |
occurring | -3.801 |
Token | always |
Token ID | 1464 |
Feature activation | -1.08e+00 |
Pos logit contributions | |
offending | +2.075 |
stranger | +2.068 |
bothering | +2.055 |
involved | +2.015 |
occurring | +1.979 |
Neg logit contributions | |
izons | -3.399 |
Opportun | -3.267 |
onential | -3.180 |
�� | -3.121 |
leaps | -3.103 |
Token | off |
Token ID | 2364 |
Feature activation | +1.08e+00 |
Pos logit contributions | |
izons | +3.741 |
Opportun | +3.510 |
onential | +3.371 |
leaps | +3.099 |
Strategies | +3.083 |
Neg logit contributions | |
stranger | -8.471 |
bothering | -8.444 |
offending | -8.364 |
occurring | -8.027 |
pictured | -7.990 |
Token | , |
Token ID | 11 |
Feature activation | +7.22e-01 |
Pos logit contributions | |
izons | +5.993 |
Opportun | +5.790 |
economies | +5.678 |
onential | +5.678 |
leaps | +5.619 |
Neg logit contributions | |
stranger | -4.498 |
offending | -4.419 |
involved | -4.404 |
bothering | -4.404 |
REDACTED | -4.080 |
Token | and |
Token ID | 290 |
Feature activation | +5.62e-01 |
Pos logit contributions | |
izons | +4.149 |
Opportun | +4.147 |
onential | +3.976 |
economies | +3.931 |
leaps | +3.903 |
Neg logit contributions | |
stranger | -2.945 |
involved | -2.922 |
bothering | -2.908 |
offending | -2.874 |
pictured | -2.723 |
Token | may |
Token ID | 743 |
Feature activation | +5.00e-01 |
Pos logit contributions | |
izons | +3.229 |
Opportun | +3.213 |
onential | +3.060 |
economies | +3.027 |
leaps | +3.007 |
Neg logit contributions | |
bothering | -2.343 |
stranger | -2.338 |
involved | -2.334 |
offending | -2.327 |
occurring | -2.206 |
Token | allow |
Token ID | 1249 |
Feature activation | +1.33e+00 |
Pos logit contributions | |
izons | +2.644 |
Opportun | +2.567 |
onential | +2.495 |
leaps | +2.436 |
economies | +2.406 |
Neg logit contributions | |
involved | -2.368 |
bothering | -2.367 |
stranger | -2.349 |
offending | -2.333 |
occurring | -2.237 |
Token | us |
Token ID | 514 |
Feature activation | +2.06e+00 |
Pos logit contributions | |
izons | +7.212 |
Opportun | +7.006 |
economies | +6.724 |
onential | +6.665 |
leaps | +6.612 |
Neg logit contributions | |
bothering | -5.824 |
involved | -5.753 |
stranger | -5.746 |
offending | -5.717 |
pictured | -5.540 |
Token | to |
Token ID | 284 |
Feature activation | +1.86e+00 |
Pos logit contributions | |
izons | +9.893 |
Opportun | +9.145 |
onential | +8.886 |
economies | +8.600 |
leaps | +8.570 |
Neg logit contributions | |
bothering | -10.216 |
offending | -10.185 |
involved | -10.140 |
stranger | -10.084 |
pictured | -9.745 |
Token | achieve |
Token ID | 4620 |
Feature activation | +1.53e+00 |
Pos logit contributions | |
izons | +8.797 |
Opportun | +8.280 |
leaps | +8.211 |
economies | +8.180 |
onential | +7.995 |
Neg logit contributions | |
involved | -9.165 |
bothering | -8.809 |
offending | -8.716 |
stranger | -8.534 |
REDACTED | -8.322 |
Token | very |
Token ID | 845 |
Feature activation | +1.07e+00 |
Pos logit contributions | |
izons | +8.054 |
Opportun | +7.864 |
economies | +7.703 |
leaps | +7.618 |
onential | +7.565 |
Neg logit contributions | |
bothering | -6.868 |
involved | -6.806 |
offending | -6.751 |
stranger | -6.599 |
occurring | -6.363 |
Token | high |
Token ID | 1029 |
Feature activation | +1.89e+00 |
Pos logit contributions | |
izons | +5.168 |
Opportun | +4.921 |
leaps | +4.837 |
economies | +4.775 |
onential | +4.768 |
Neg logit contributions | |
involved | -5.399 |
bothering | -5.381 |
offending | -5.367 |
stranger | -5.273 |
occurring | -5.058 |
Token | capacity |
Token ID | 5339 |
Feature activation | +2.84e-01 |
Pos logit contributions | |
izons | +9.513 |
Opportun | +9.304 |
economies | +9.231 |
leaps | +9.081 |
strides | +8.821 |
Neg logit contributions | |
bothering | -8.341 |
involved | -8.340 |
offending | -8.007 |
stranger | -7.997 |
pictured | -7.618 |
Token | fellow |
Token ID | 5891 |
Feature activation | +2.29e-02 |
Pos logit contributions | |
izons | +3.556 |
Opportun | +3.478 |
leaps | +3.245 |
economies | +3.227 |
strides | +3.167 |
Neg logit contributions | |
bothering | -3.216 |
involved | -3.173 |
offending | -3.109 |
stranger | -3.009 |
occurring | -2.990 |
Token | men |
Token ID | 1450 |
Feature activation | +9.80e-01 |
Pos logit contributions | |
izons | +0.128 |
Opportun | +0.122 |
onential | +0.117 |
leaps | +0.115 |
�� | +0.114 |
Neg logit contributions | |
offending | -0.108 |
bothering | -0.108 |
involved | -0.106 |
stranger | -0.105 |
occurring | -0.103 |
Token | . |
Token ID | 13 |
Feature activation | +1.64e+00 |
Pos logit contributions | |
izons | +4.615 |
Opportun | +4.309 |
onential | +4.167 |
leaps | +4.045 |
�� | +3.957 |
Neg logit contributions | |
bothering | -5.223 |
offending | -5.192 |
stranger | -5.118 |
involved | -5.060 |
occurring | -4.960 |
Token | I |
Token ID | 314 |
Feature activation | +2.94e+00 |
Pos logit contributions | |
izons | +9.302 |
Opportun | +9.280 |
Strategies | +8.457 |
onential | +8.384 |
Flavoring | +8.241 |
Neg logit contributions | |
bothering | -6.927 |
offending | -6.805 |
stranger | -6.611 |
occurring | -6.468 |
involved | -6.454 |
Token | leave |
Token ID | 2666 |
Feature activation | +2.15e+00 |
Pos logit contributions | |
izons | +11.264 |
stride | +9.994 |
Opportun | +9.733 |
onential | +9.615 |
leaps | +9.558 |
Neg logit contributions | |
involved | -14.749 |
bothering | -14.735 |
offending | -14.460 |
occurring | -14.391 |
stranger | -14.116 |
Token | you |
Token ID | 345 |
Feature activation | +2.40e+00 |
Pos logit contributions | |
izons | +14.339 |
Opportun | +13.665 |
leaps | +13.067 |
onential | +13.064 |
�� | +13.001 |
Neg logit contributions | |
bothering | -6.041 |
involved | -5.889 |
stranger | -5.843 |
offending | -5.761 |
occurring | -5.597 |
Token | a |
Token ID | 257 |
Feature activation | +1.85e+00 |
Pos logit contributions | |
Opportun | +12.988 |
izons | +12.958 |
leaps | +12.326 |
economies | +12.139 |
strides | +11.936 |
Neg logit contributions | |
involved | -8.388 |
bothering | -8.293 |
offending | -8.095 |
stranger | -7.867 |
occurring | -7.772 |
Token | responsibility |
Token ID | 5798 |
Feature activation | +1.89e+00 |
Pos logit contributions | |
izons | +8.891 |
Opportun | +8.629 |
leaps | +8.351 |
stride | +8.139 |
economies | +8.054 |
Neg logit contributions | |
involved | -8.217 |
offending | -8.121 |
bothering | -8.046 |
occurring | -7.696 |
stranger | -7.538 |
Token | to |
Token ID | 284 |
Feature activation | +1.66e+00 |
Pos logit contributions | |
izons | +9.283 |
Opportun | +8.782 |
onential | +8.568 |
leaps | +8.501 |
economies | +8.317 |
Neg logit contributions | |
offending | -8.222 |
bothering | -8.220 |
involved | -8.097 |
stranger | -8.035 |
occurring | -7.772 |
Token | our |
Token ID | 674 |
Feature activation | +1.64e+00 |
Pos logit contributions | |
Opportun | +7.854 |
izons | +7.714 |
leaps | +7.394 |
economies | +7.350 |
stride | +7.290 |
Neg logit contributions | |
involved | -7.819 |
bothering | -7.650 |
offending | -7.459 |
stranger | -7.371 |
occurring | -7.281 |
Token | young |
Token ID | 1862 |
Feature activation | +6.12e-01 |
Pos logit contributions | |
izons | +8.404 |
Opportun | +8.255 |
economies | +7.944 |
leaps | +7.694 |
strides | +7.548 |
Neg logit contributions | |
involved | -7.272 |
bothering | -7.251 |
offending | -6.859 |
pictured | -6.739 |
occurring | -6.722 |
Token | � |
Token ID | 247 |
Feature activation | -2.00e+00 |
Pos logit contributions | |
izons | +2.232 |
�� | +1.927 |
Opportun | +1.912 |
onential | +1.861 |
leaps | +1.651 |
Neg logit contributions | |
bothering | -8.517 |
offending | -8.492 |
involved | -8.450 |
stranger | -8.365 |
occurring | -8.237 |
Token | s |
Token ID | 82 |
Feature activation | -7.99e-01 |
Pos logit contributions | |
bothering | +8.560 |
stranger | +8.522 |
offending | +8.451 |
pictured | +8.118 |
occurring | +8.102 |
Neg logit contributions | |
izons | -11.323 |
onential | -10.497 |
Opportun | -10.454 |
�� | -10.422 |
leaps | -9.830 |
Token | solo |
Token ID | 12199 |
Feature activation | -6.87e-01 |
Pos logit contributions | |
stranger | +4.356 |
offending | +4.352 |
bothering | +4.334 |
involved | +4.189 |
occurring | +4.170 |
Neg logit contributions | |
izons | -3.867 |
Opportun | -3.589 |
onential | -3.542 |
�� | -3.480 |
leaps | -3.328 |
Token | record |
Token ID | 1700 |
Feature activation | -7.42e-02 |
Pos logit contributions | |
offending | +3.093 |
bothering | +3.091 |
stranger | +3.090 |
involved | +2.977 |
occurring | +2.959 |
Neg logit contributions | |
izons | -3.972 |
Opportun | -3.771 |
onential | -3.699 |
�� | -3.638 |
leaps | -3.537 |
Token | . |
Token ID | 13 |
Feature activation | +9.90e-01 |
Pos logit contributions | |
offending | +0.390 |
bothering | +0.389 |
stranger | +0.389 |
involved | +0.384 |
occurring | +0.375 |
Neg logit contributions | |
izons | -0.370 |
Opportun | -0.356 |
onential | -0.341 |
�� | -0.334 |
leaps | -0.329 |
Token | � |
Token ID | 564 |
Feature activation | +1.63e+00 |
Pos logit contributions | |
izons | +4.981 |
Opportun | +4.886 |
onential | +4.572 |
Strategies | +4.487 |
�� | +4.471 |
Neg logit contributions | |
offending | -4.909 |
stranger | -4.867 |
bothering | -4.857 |
involved | -4.824 |
occurring | -4.683 |
Token | � |
Token ID | 250 |
Feature activation | -1.08e+00 |
Pos logit contributions | |
izons | +3.504 |
Opportun | +3.218 |
�� | +3.184 |
onential | +3.098 |
Strategies | +2.766 |
Neg logit contributions | |
offending | -12.194 |
involved | -12.040 |
stranger | -11.940 |
bothering | -11.883 |
occurring | -11.585 |
Token | He |
Token ID | 1544 |
Feature activation | +1.59e+00 |
Pos logit contributions | |
bothering | +4.873 |
stranger | +4.866 |
offending | +4.847 |
involved | +4.731 |
occurring | +4.659 |
Neg logit contributions | |
izons | -6.207 |
Opportun | -5.920 |
onential | -5.775 |
�� | -5.709 |
leaps | -5.554 |
Token | has |
Token ID | 468 |
Feature activation | +8.22e-01 |
Pos logit contributions | |
izons | +8.717 |
leaps | +8.259 |
Opportun | +8.153 |
strides | +7.971 |
onential | +7.759 |
Neg logit contributions | |
offending | -6.539 |
stranger | -6.517 |
involved | -6.447 |
bothering | -6.443 |
occurring | -6.265 |
Token | the |
Token ID | 262 |
Feature activation | +4.54e-01 |
Pos logit contributions | |
izons | +4.983 |
Opportun | +4.846 |
onential | +4.605 |
leaps | +4.594 |
�� | +4.495 |
Neg logit contributions | |
involved | -3.255 |
offending | -3.226 |
bothering | -3.209 |
stranger | -3.156 |
occurring | -3.087 |
Token | greatest |
Token ID | 6000 |
Feature activation | +3.79e-01 |
Pos logit contributions | |
izons | +2.543 |
Opportun | +2.479 |
leaps | +2.328 |
onential | +2.315 |
Strategies | +2.274 |
Neg logit contributions | |
involved | -2.050 |
bothering | -2.048 |
offending | -2.003 |
stranger | -1.993 |
occurring | -1.981 |
Token | gray |
Token ID | 12768 |
Feature activation | +3.61e-01 |
Pos logit contributions | |
izons | +1.927 |
Opportun | +1.849 |
onential | +1.797 |
leaps | +1.749 |
Strategies | +1.707 |
Neg logit contributions | |
bothering | -1.568 |
involved | -1.564 |
offending | -1.564 |
stranger | -1.551 |
occurring | -1.481 |
Token | = |
Token ID | 796 |
Feature activation | -7.17e-01 |
Pos logit contributions | |
izons | +1.788 |
Opportun | +1.656 |
onential | +1.648 |
�� | +1.558 |
leaps | +1.552 |
Neg logit contributions | |
bothering | -1.930 |
offending | -1.921 |
stranger | -1.908 |
involved | -1.874 |
occurring | -1.847 |
Token | c |
Token ID | 269 |
Feature activation | -7.80e-01 |
Pos logit contributions | |
stranger | +3.333 |
offending | +3.324 |
bothering | +3.308 |
occurring | +3.184 |
pictured | +3.150 |
Neg logit contributions | |
izons | -4.031 |
Opportun | -3.825 |
onential | -3.752 |
�� | -3.737 |
leaps | -3.559 |
Token | v |
Token ID | 85 |
Feature activation | +1.06e-01 |
Pos logit contributions | |
stranger | +3.456 |
bothering | +3.444 |
offending | +3.433 |
involved | +3.392 |
occurring | +3.325 |
Neg logit contributions | |
izons | -4.498 |
Opportun | -4.348 |
�� | -4.217 |
onential | -4.190 |
leaps | -4.045 |
Token | 2 |
Token ID | 17 |
Feature activation | -7.64e-01 |
Pos logit contributions | |
izons | +0.371 |
Opportun | +0.341 |
onential | +0.336 |
leaps | +0.312 |
Strategies | +0.311 |
Neg logit contributions | |
offending | -0.720 |
bothering | -0.710 |
stranger | -0.703 |
involved | -0.688 |
occurring | -0.679 |
Token | . |
Token ID | 13 |
Feature activation | +7.85e-01 |
Pos logit contributions | |
stranger | +3.906 |
bothering | +3.905 |
offending | +3.887 |
involved | +3.816 |
occurring | +3.778 |
Neg logit contributions | |
izons | -3.926 |
Opportun | -3.730 |
onential | -3.611 |
�� | -3.609 |
leaps | -3.458 |
Token | Ga |
Token ID | 35389 |
Feature activation | -2.13e-01 |
Pos logit contributions | |
izons | +3.664 |
onential | +3.400 |
Opportun | +3.390 |
�� | +3.231 |
leaps | +3.174 |
Neg logit contributions | |
bothering | -4.345 |
offending | -4.336 |
stranger | -4.250 |
occurring | -4.182 |
involved | -4.154 |
Token | ussian |
Token ID | 31562 |
Feature activation | +8.09e-01 |
Pos logit contributions | |
offending | +1.496 |
bothering | +1.490 |
stranger | +1.478 |
involved | +1.454 |
occurring | +1.448 |
Neg logit contributions | |
izons | -0.714 |
Opportun | -0.644 |
onential | -0.636 |
�� | -0.582 |
leaps | -0.577 |
Token | Bl |
Token ID | 3629 |
Feature activation | +1.53e+00 |
Pos logit contributions | |
izons | +4.570 |
onential | +4.359 |
Opportun | +4.315 |
leaps | +4.104 |
�� | +4.079 |
Neg logit contributions | |
offending | -3.648 |
bothering | -3.593 |
involved | -3.525 |
stranger | -3.502 |
occurring | -3.396 |
Token | ur |
Token ID | 333 |
Feature activation | +8.89e-01 |
Pos logit contributions | |
izons | +5.293 |
onential | +4.957 |
Opportun | +4.427 |
ights | +4.402 |
strides | +4.198 |
Neg logit contributions | |
offending | -10.093 |
bothering | -10.026 |
stranger | -9.844 |
uttered | -9.687 |
involved | -9.592 |
Token | ( |
Token ID | 7 |
Feature activation | +4.26e-02 |
Pos logit contributions | |
izons | +5.691 |
onential | +5.362 |
Opportun | +5.359 |
�� | +5.135 |
leaps | +5.090 |
Neg logit contributions | |
offending | -3.427 |
bothering | -3.367 |
stranger | -3.318 |
involved | -3.304 |
occurring | -3.178 |
Token | some |
Token ID | 617 |
Feature activation | +3.11e-01 |
Pos logit contributions | |
offending | +1.705 |
stranger | +1.698 |
bothering | +1.696 |
involved | +1.648 |
occurring | +1.630 |
Neg logit contributions | |
izons | -2.239 |
Opportun | -2.152 |
onential | -2.087 |
leaps | -2.029 |
�� | -2.027 |
Token | work |
Token ID | 670 |
Feature activation | +4.49e-01 |
Pos logit contributions | |
izons | +1.452 |
Opportun | +1.414 |
leaps | +1.344 |
onential | +1.323 |
strides | +1.320 |
Neg logit contributions | |
offending | -1.671 |
stranger | -1.664 |
involved | -1.660 |
bothering | -1.654 |
occurring | -1.621 |
Token | there |
Token ID | 612 |
Feature activation | +3.08e-01 |
Pos logit contributions | |
izons | +2.555 |
Opportun | +2.486 |
onential | +2.382 |
leaps | +2.370 |
strides | +2.307 |
Neg logit contributions | |
offending | -1.960 |
stranger | -1.948 |
bothering | -1.892 |
involved | -1.889 |
pictured | -1.798 |
Token | , |
Token ID | 11 |
Feature activation | +9.11e-02 |
Pos logit contributions | |
izons | +1.570 |
Opportun | +1.498 |
onential | +1.438 |
leaps | +1.410 |
�� | +1.394 |
Neg logit contributions | |
stranger | -1.575 |
offending | -1.570 |
bothering | -1.562 |
involved | -1.537 |
occurring | -1.488 |
Token | but |
Token ID | 475 |
Feature activation | -1.99e-01 |
Pos logit contributions | |
izons | +0.519 |
Opportun | +0.506 |
onential | +0.487 |
leaps | +0.477 |
�� | +0.471 |
Neg logit contributions | |
stranger | -0.404 |
offending | -0.403 |
bothering | -0.400 |
involved | -0.399 |
occurring | -0.380 |
Token | on |
Token ID | 319 |
Feature activation | +7.27e-01 |
Pos logit contributions | |
bothering | +0.811 |
offending | +0.807 |
stranger | +0.800 |
involved | +0.793 |
occurring | +0.766 |
Neg logit contributions | |
izons | -1.229 |
Opportun | -1.194 |
onential | -1.147 |
leaps | -1.128 |
Strategies | -1.118 |
Token | Wednesday |
Token ID | 3583 |
Feature activation | +3.47e-02 |
Pos logit contributions | |
izons | +4.197 |
Opportun | +4.115 |
onential | +3.979 |
leaps | +3.943 |
Strategies | +3.846 |
Neg logit contributions | |
bothering | -3.058 |
offending | -3.011 |
involved | -2.971 |
stranger | -2.964 |
occurring | -2.846 |
Token | , |
Token ID | 11 |
Feature activation | -2.92e-01 |
Pos logit contributions | |
izons | +0.211 |
Opportun | +0.203 |
onential | +0.196 |
�� | +0.192 |
leaps | +0.191 |
Neg logit contributions | |
bothering | -0.145 |
offending | -0.144 |
stranger | -0.144 |
involved | -0.140 |
occurring | -0.137 |
Token | he |
Token ID | 339 |
Feature activation | +1.61e+00 |
Pos logit contributions | |
bothering | +1.370 |
offending | +1.366 |
stranger | +1.362 |
involved | +1.340 |
occurring | +1.308 |
Neg logit contributions | |
izons | -1.622 |
Opportun | -1.561 |
onential | -1.502 |
�� | -1.469 |
leaps | -1.461 |
Token | worked |
Token ID | 3111 |
Feature activation | +9.59e-01 |
Pos logit contributions | |
izons | +9.184 |
leaps | +8.971 |
Opportun | +8.924 |
strides | +8.746 |
economies | +8.576 |
Neg logit contributions | |
bothering | -5.772 |
involved | -5.706 |
stranger | -5.607 |
offending | -5.525 |
occurring | -5.274 |
Token | with |
Token ID | 351 |
Feature activation | +3.71e-01 |
Pos logit contributions | |
izons | +5.262 |
Opportun | +5.158 |
leaps | +4.864 |
onential | +4.807 |
Strategies | +4.753 |
Neg logit contributions | |
involved | -4.297 |
offending | -4.284 |
bothering | -4.194 |
stranger | -4.164 |
occurring | -4.058 |
Token | . |
Token ID | 13 |
Feature activation | +5.52e-01 |
Pos logit contributions | |
izons | +0.054 |
Opportun | +0.051 |
onential | +0.049 |
leaps | +0.047 |
Strategies | +0.047 |
Neg logit contributions | |
offending | -0.067 |
bothering | -0.066 |
stranger | -0.066 |
involved | -0.065 |
occurring | -0.063 |
Token | x |
Token ID | 87 |
Feature activation | +4.40e-01 |
Pos logit contributions | |
izons | +1.306 |
Opportun | +1.129 |
onential | +1.107 |
leaps | +1.024 |
xff | +0.959 |
Neg logit contributions | |
offending | -4.381 |
bothering | -4.323 |
stranger | -4.280 |
occurring | -4.132 |
involved | -4.129 |
Token | . |
Token ID | 13 |
Feature activation | +1.01e+00 |
Pos logit contributions | |
izons | +1.826 |
Opportun | +1.731 |
onential | +1.657 |
leaps | +1.617 |
Strategies | +1.579 |
Neg logit contributions | |
offending | -2.704 |
bothering | -2.641 |
stranger | -2.608 |
involved | -2.550 |
occurring | -2.494 |
Token | x |
Token ID | 87 |
Feature activation | +5.24e-01 |
Pos logit contributions | |
izons | +2.046 |
Opportun | +1.694 |
onential | +1.666 |
xff | +1.575 |
leaps | +1.528 |
Neg logit contributions | |
offending | -8.256 |
bothering | -8.148 |
stranger | -7.988 |
occurring | -7.736 |
uttered | -7.671 |
Token | . |
Token ID | 13 |
Feature activation | +7.58e-01 |
Pos logit contributions | |
izons | +2.150 |
Opportun | +2.032 |
onential | +1.951 |
leaps | +1.883 |
Strategies | +1.831 |
Neg logit contributions | |
offending | -3.220 |
bothering | -3.146 |
stranger | -3.135 |
involved | -3.038 |
occurring | -2.975 |
Token | 95 |
Token ID | 3865 |
Feature activation | +8.39e-01 |
Pos logit contributions | |
izons | +1.740 |
Opportun | +1.565 |
onential | +1.529 |
leaps | +1.368 |
�� | +1.324 |
Neg logit contributions | |
stranger | -5.881 |
offending | -5.788 |
bothering | -5.727 |
occurring | -5.575 |
pictured | -5.549 |
Token | 103 |
Token ID | 15349 |
Feature activation | +1.23e+00 |
Pos logit contributions | |
Opportun | +2.781 |
izons | +2.737 |
Strategies | +2.431 |
onential | +2.422 |
�� | +2.339 |
Neg logit contributions | |
offending | -5.647 |
bothering | -5.584 |
stranger | -5.546 |
involved | -5.513 |
occurring | -5.336 |
Token | 16 |
Token ID | 1433 |
Feature activation | +9.94e-01 |
Pos logit contributions | |
izons | +3.330 |
Opportun | +3.325 |
Strategies | +2.885 |
onential | +2.791 |
Areas | +2.764 |
Neg logit contributions | |
offending | -8.927 |
stranger | -8.669 |
bothering | -8.500 |
involved | -8.416 |
pictured | -8.229 |
Token | 53 |
Token ID | 4310 |
Feature activation | +4.14e-01 |
Pos logit contributions | |
izons | +2.639 |
Opportun | +2.509 |
leaps | +2.223 |
Strategies | +2.219 |
onential | +2.211 |
Neg logit contributions | |
offending | -7.281 |
stranger | -7.114 |
bothering | -7.096 |
involved | -6.994 |
occurring | -6.805 |
Token | x |
Token ID | 2124 |
Feature activation | +2.64e-01 |
Pos logit contributions | |
izons | +1.591 |
Opportun | +1.570 |
onential | +1.449 |
leaps | +1.436 |
Strategies | +1.404 |
Neg logit contributions | |
involved | -2.580 |
offending | -2.579 |
bothering | -2.576 |
stranger | -2.563 |
occurring | -2.475 |
Token | . |
Token ID | 13 |
Feature activation | +8.47e-01 |
Pos logit contributions | |
izons | +1.151 |
Opportun | +1.097 |
onential | +1.050 |
leaps | +1.019 |
Strategies | +1.003 |
Neg logit contributions | |
offending | -1.564 |
bothering | -1.534 |
stranger | -1.526 |
involved | -1.495 |
occurring | -1.458 |
Token | and |
Token ID | 290 |
Feature activation | -5.02e-01 |
Pos logit contributions | |
izons | +2.206 |
Opportun | +2.104 |
onential | +2.037 |
leaps | +1.999 |
�� | +1.985 |
Neg logit contributions | |
involved | -2.010 |
offending | -2.003 |
stranger | -2.002 |
bothering | -2.000 |
occurring | -1.946 |
Token | American |
Token ID | 1605 |
Feature activation | -6.36e-03 |
Pos logit contributions | |
bothering | +2.146 |
offending | +2.137 |
stranger | +2.131 |
involved | +2.084 |
occurring | +2.049 |
Neg logit contributions | |
izons | -3.020 |
Opportun | -2.898 |
onential | -2.812 |
�� | -2.755 |
leaps | -2.734 |
Token | publishers |
Token ID | 17604 |
Feature activation | -9.61e-02 |
Pos logit contributions | |
bothering | +0.030 |
offending | +0.030 |
stranger | +0.030 |
involved | +0.030 |
occurring | +0.029 |
Neg logit contributions | |
izons | -0.035 |
Opportun | -0.034 |
onential | -0.032 |
leaps | -0.031 |
Strategies | -0.031 |
Token | ignored |
Token ID | 9514 |
Feature activation | +8.99e-02 |
Pos logit contributions | |
offending | +0.419 |
stranger | +0.418 |
bothering | +0.417 |
involved | +0.411 |
occurring | +0.401 |
Neg logit contributions | |
izons | -0.561 |
Opportun | -0.541 |
onential | -0.522 |
leaps | -0.512 |
�� | -0.508 |
Token | him |
Token ID | 683 |
Feature activation | +4.39e-01 |
Pos logit contributions | |
izons | +0.559 |
Opportun | +0.540 |
onential | +0.522 |
leaps | +0.511 |
�� | +0.510 |
Neg logit contributions | |
bothering | -0.359 |
stranger | -0.358 |
offending | -0.356 |
involved | -0.351 |
occurring | -0.344 |
Token | for |
Token ID | 329 |
Feature activation | +5.53e-01 |
Pos logit contributions | |
izons | +2.237 |
Opportun | +2.124 |
onential | +2.051 |
leaps | +2.031 |
economies | +1.967 |
Neg logit contributions | |
stranger | -2.191 |
involved | -2.182 |
offending | -2.169 |
bothering | -2.161 |
occurring | -2.105 |
Token | nearly |
Token ID | 3016 |
Feature activation | -1.61e-01 |
Pos logit contributions | |
izons | +2.859 |
Opportun | +2.759 |
leaps | +2.667 |
onential | +2.613 |
economies | +2.561 |
Neg logit contributions | |
involved | -2.704 |
stranger | -2.655 |
bothering | -2.642 |
offending | -2.641 |
occurring | -2.590 |
Token | two |
Token ID | 734 |
Feature activation | +3.84e-01 |
Pos logit contributions | |
involved | +0.790 |
bothering | +0.789 |
offending | +0.788 |
stranger | +0.782 |
occurring | +0.763 |
Neg logit contributions | |
izons | -0.845 |
Opportun | -0.818 |
onential | -0.784 |
leaps | -0.783 |
�� | -0.757 |
Token | decades |
Token ID | 4647 |
Feature activation | +1.05e+00 |
Pos logit contributions | |
izons | +1.746 |
Opportun | +1.660 |
onential | +1.588 |
leaps | +1.561 |
�� | +1.512 |
Neg logit contributions | |
bothering | -2.153 |
involved | -2.152 |
stranger | -2.149 |
offending | -2.142 |
occurring | -2.079 |
Token | . |
Token ID | 13 |
Feature activation | +3.89e-01 |
Pos logit contributions | |
izons | +5.070 |
Opportun | +4.747 |
leaps | +4.635 |
onential | +4.593 |
economies | +4.475 |
Neg logit contributions | |
involved | -5.245 |
stranger | -5.211 |
offending | -5.193 |
bothering | -5.161 |
occurring | -5.005 |
Token | Then |
Token ID | 3244 |
Feature activation | -1.11e+00 |
Pos logit contributions | |
Opportun | +1.701 |
izons | +1.669 |
Strategies | +1.536 |
onential | +1.516 |
Flavoring | +1.474 |
Neg logit contributions | |
offending | -2.286 |
bothering | -2.266 |
stranger | -2.248 |
involved | -2.231 |
occurring | -2.181 |
Token | vers |
Token ID | 690 |
Feature activation | +8.96e-01 |
Pos logit contributions | |
offending | +6.793 |
stranger | +6.747 |
bothering | +6.702 |
involved | +6.673 |
occurring | +6.558 |
Neg logit contributions | |
izons | -6.896 |
Opportun | -6.620 |
onential | -6.507 |
�� | -6.463 |
leaps | -6.131 |
Token | } |
Token ID | 92 |
Feature activation | +8.98e-01 |
Pos logit contributions | |
izons | +3.850 |
Opportun | +3.575 |
onential | +3.497 |
leaps | +3.347 |
�� | +3.321 |
Neg logit contributions | |
bothering | -5.160 |
stranger | -5.135 |
offending | -5.102 |
involved | -5.053 |
occurring | -4.910 |
Token | or |
Token ID | 393 |
Feature activation | -1.27e+00 |
Pos logit contributions | |
izons | +4.692 |
Opportun | +4.673 |
onential | +4.391 |
leaps | +4.386 |
Strategies | +4.311 |
Neg logit contributions | |
bothering | -4.170 |
stranger | -4.125 |
involved | -4.090 |
offending | -4.084 |
occurring | -3.832 |
Token | later |
Token ID | 1568 |
Feature activation | +2.67e-01 |
Pos logit contributions | |
stranger | +5.978 |
offending | +5.969 |
bothering | +5.911 |
occurring | +5.729 |
pictured | +5.665 |
Neg logit contributions | |
izons | -6.971 |
Opportun | -6.603 |
onential | -6.517 |
�� | -6.432 |
Strategies | -6.108 |
Token | vim |
Token ID | 43907 |
Feature activation | +5.20e-01 |
Pos logit contributions | |
izons | +1.447 |
Opportun | +1.405 |
onential | +1.336 |
leaps | +1.323 |
�� | +1.304 |
Neg logit contributions | |
bothering | -1.268 |
stranger | -1.263 |
offending | -1.260 |
involved | -1.245 |
occurring | -1.197 |
Token | < |
Token ID | 27 |
Feature activation | -3.82e-01 |
Pos logit contributions | |
izons | +2.605 |
Opportun | +2.458 |
onential | +2.373 |
�� | +2.317 |
leaps | +2.307 |
Neg logit contributions | |
bothering | -2.694 |
stranger | -2.687 |
offending | -2.658 |
involved | -2.605 |
occurring | -2.545 |
Token | { |
Token ID | 90 |
Feature activation | -9.32e-01 |
Pos logit contributions | |
bothering | +1.759 |
offending | +1.754 |
stranger | +1.750 |
involved | +1.700 |
occurring | +1.679 |
Neg logit contributions | |
izons | -2.182 |
Opportun | -2.080 |
onential | -2.021 |
�� | -1.989 |
leaps | -1.952 |
Token | vers |
Token ID | 690 |
Feature activation | +6.78e-01 |
Pos logit contributions | |
offending | +4.569 |
stranger | +4.559 |
bothering | +4.542 |
involved | +4.499 |
occurring | +4.411 |
Neg logit contributions | |
izons | -4.837 |
Opportun | -4.668 |
onential | -4.555 |
�� | -4.488 |
leaps | -4.315 |
Token | }: |
Token ID | 38362 |
Feature activation | -8.97e-01 |
Pos logit contributions | |
izons | +2.792 |
Opportun | +2.565 |
onential | +2.501 |
�� | +2.384 |
leaps | +2.381 |
Neg logit contributions | |
bothering | -4.098 |
stranger | -4.084 |
offending | -4.058 |
involved | -4.018 |
occurring | -3.940 |
Token | version |
Token ID | 2196 |
Feature activation | +3.65e-01 |
Pos logit contributions | |
offending | +3.860 |
stranger | +3.851 |
bothering | +3.841 |
involved | +3.716 |
occurring | +3.700 |
Neg logit contributions | |
izons | -5.350 |
Opportun | -5.095 |
onential | -4.986 |
�� | -4.920 |
leaps | -4.770 |
Token | before |
Token ID | 878 |
Feature activation | -1.14e+00 |
Pos logit contributions | |
izons | +1.834 |
Opportun | +1.778 |
onential | +1.708 |
leaps | +1.655 |
�� | +1.649 |
Neg logit contributions | |
bothering | -1.861 |
offending | -1.849 |
stranger | -1.847 |
involved | -1.832 |
occurring | -1.761 |
Token | pretty |
Token ID | 2495 |
Feature activation | -1.18e+00 |
Pos logit contributions | |
bothering | +12.535 |
offending | +12.476 |
stranger | +12.416 |
pictured | +12.280 |
occurring | +12.096 |
Neg logit contributions | |
izons | -12.935 |
�� | -12.192 |
onential | -12.110 |
Opportun | -11.848 |
Flavoring | -11.014 |
Token | useful |
Token ID | 4465 |
Feature activation | +3.10e-03 |
Pos logit contributions | |
bothering | +5.443 |
offending | +5.431 |
stranger | +5.426 |
occurring | +5.186 |
involved | +5.182 |
Neg logit contributions | |
izons | -6.747 |
Opportun | -6.380 |
onential | -6.268 |
�� | -6.226 |
Strategies | -5.907 |
Token | . |
Token ID | 13 |
Feature activation | +7.30e-02 |
Pos logit contributions | |
izons | +0.016 |
Opportun | +0.015 |
onential | +0.015 |
leaps | +0.014 |
�� | +0.014 |
Neg logit contributions | |
offending | -0.016 |
bothering | -0.016 |
stranger | -0.016 |
involved | -0.016 |
occurring | -0.015 |
Token | Show |
Token ID | 5438 |
Feature activation | +7.36e-01 |
Pos logit contributions | |
Opportun | +0.302 |
izons | +0.300 |
Strategies | +0.274 |
onential | +0.269 |
leaps | +0.260 |
Neg logit contributions | |
offending | -0.446 |
bothering | -0.440 |
stranger | -0.440 |
involved | -0.435 |
occurring | -0.424 |
Token | them |
Token ID | 606 |
Feature activation | -7.68e-01 |
Pos logit contributions | |
izons | +4.013 |
Opportun | +3.954 |
onential | +3.744 |
Strategies | +3.668 |
economies | +3.664 |
Neg logit contributions | |
bothering | -3.269 |
involved | -3.213 |
offending | -3.212 |
stranger | -3.182 |
occurring | -3.139 |
Token | how |
Token ID | 703 |
Feature activation | -4.12e-01 |
Pos logit contributions | |
bothering | +3.044 |
offending | +3.041 |
stranger | +3.037 |
involved | +2.946 |
occurring | +2.898 |
Neg logit contributions | |
izons | -4.857 |
Opportun | -4.660 |
onential | -4.543 |
�� | -4.478 |
leaps | -4.398 |
Token | you |
Token ID | 345 |
Feature activation | +5.60e-01 |
Pos logit contributions | |
bothering | +1.928 |
offending | +1.927 |
stranger | +1.925 |
involved | +1.873 |
occurring | +1.850 |
Neg logit contributions | |
izons | -2.310 |
Opportun | -2.202 |
onential | -2.143 |
�� | -2.108 |
leaps | -2.061 |
Token | can |
Token ID | 460 |
Feature activation | +2.07e-01 |
Pos logit contributions | |
izons | +2.817 |
Opportun | +2.698 |
leaps | +2.557 |
economies | +2.551 |
onential | +2.549 |
Neg logit contributions | |
offending | -2.775 |
bothering | -2.773 |
stranger | -2.747 |
involved | -2.733 |
occurring | -2.656 |
Token | take |
Token ID | 1011 |
Feature activation | +2.87e-02 |
Pos logit contributions | |
izons | +0.977 |
Opportun | +0.928 |
onential | +0.882 |
leaps | +0.880 |
economies | +0.859 |
Neg logit contributions | |
bothering | -1.119 |
offending | -1.118 |
stranger | -1.111 |
involved | -1.111 |
occurring | -1.080 |
Token | the |
Token ID | 262 |
Feature activation | -5.79e-01 |
Pos logit contributions | |
izons | +0.162 |
Opportun | +0.157 |
leaps | +0.150 |
onential | +0.149 |
strides | +0.147 |
Neg logit contributions | |
bothering | -0.128 |
involved | -0.128 |
stranger | -0.127 |
offending | -0.127 |
occurring | -0.124 |
Token | string |
Token ID | 4731 |
Feature activation | -7.97e-01 |
Pos logit contributions | |
bothering | +2.950 |
stranger | +2.941 |
offending | +2.940 |
involved | +2.884 |
occurring | +2.844 |
Neg logit contributions | |
izons | -2.998 |
Opportun | -2.852 |
onential | -2.755 |
�� | -2.707 |
leaps | -2.659 |
Token | the |
Token ID | 262 |
Feature activation | -7.35e-01 |
Pos logit contributions | |
bothering | +5.077 |
offending | +5.057 |
stranger | +5.042 |
occurring | +4.883 |
involved | +4.828 |
Neg logit contributions | |
izons | -6.269 |
Opportun | -5.920 |
onential | -5.803 |
�� | -5.764 |
Strategies | -5.524 |
Token | rooftop |
Token ID | 33880 |
Feature activation | -1.39e+00 |
Pos logit contributions | |
offending | +3.701 |
bothering | +3.699 |
stranger | +3.698 |
involved | +3.601 |
occurring | +3.561 |
Neg logit contributions | |
izons | -3.859 |
Opportun | -3.662 |
onential | -3.559 |
�� | -3.500 |
leaps | -3.409 |
Token | . |
Token ID | 13 |
Feature activation | +2.90e-01 |
Pos logit contributions | |
stranger | +6.698 |
offending | +6.693 |
bothering | +6.621 |
occurring | +6.409 |
involved | +6.347 |
Neg logit contributions | |
izons | -7.526 |
Opportun | -7.078 |
�� | -6.983 |
onential | -6.917 |
Strategies | -6.559 |
Token | As |
Token ID | 1081 |
Feature activation | -1.05e+00 |
Pos logit contributions | |
Opportun | +1.223 |
izons | +1.209 |
onential | +1.108 |
Strategies | +1.105 |
leaps | +1.076 |
Neg logit contributions | |
offending | -1.726 |
bothering | -1.715 |
stranger | -1.710 |
involved | -1.686 |
occurring | -1.650 |
Token | the |
Token ID | 262 |
Feature activation | -2.00e-01 |
Pos logit contributions | |
offending | +5.086 |
stranger | +5.077 |
bothering | +5.068 |
involved | +4.917 |
occurring | +4.877 |
Neg logit contributions | |
izons | -5.720 |
Opportun | -5.380 |
onential | -5.267 |
�� | -5.223 |
leaps | -5.014 |
Token | Sh |
Token ID | 911 |
Feature activation | -7.94e-01 |
Pos logit contributions | |
bothering | +1.017 |
offending | +1.007 |
stranger | +1.006 |
involved | +0.995 |
occurring | +0.977 |
Neg logit contributions | |
izons | -1.034 |
Opportun | -0.997 |
onential | -0.960 |
�� | -0.936 |
leaps | -0.933 |
Token | red |
Token ID | 445 |
Feature activation | -2.54e-01 |
Pos logit contributions | |
stranger | +4.257 |
offending | +4.246 |
bothering | +4.237 |
involved | +4.168 |
occurring | +4.103 |
Neg logit contributions | |
izons | -3.882 |
Opportun | -3.688 |
onential | -3.557 |
�� | -3.548 |
leaps | -3.413 |
Token | der |
Token ID | 1082 |
Feature activation | +3.34e-02 |
Pos logit contributions | |
bothering | +1.060 |
offending | +1.053 |
stranger | +1.026 |
involved | +1.011 |
occurring | +1.003 |
Neg logit contributions | |
izons | -1.572 |
Opportun | -1.499 |
onential | -1.466 |
leaps | -1.424 |
�� | -1.399 |
Token | falls |
Token ID | 8953 |
Feature activation | +3.36e-01 |
Pos logit contributions | |
izons | +0.151 |
Opportun | +0.143 |
leaps | +0.138 |
onential | +0.137 |
strides | +0.132 |
Neg logit contributions | |
bothering | -0.191 |
offending | -0.190 |
stranger | -0.190 |
involved | -0.188 |
occurring | -0.184 |
Token | , |
Token ID | 11 |
Feature activation | -7.11e-01 |
Pos logit contributions | |
izons | +1.854 |
Opportun | +1.784 |
onential | +1.729 |
leaps | +1.725 |
strides | +1.665 |
Neg logit contributions | |
bothering | -1.525 |
involved | -1.524 |
offending | -1.512 |
stranger | -1.498 |
occurring | -1.472 |
Token | the |
Token ID | 262 |
Feature activation | -5.22e-01 |
Pos logit contributions | |
offending | +3.605 |
bothering | +3.604 |
stranger | +3.603 |
involved | +3.505 |
occurring | +3.473 |
Neg logit contributions | |
izons | -3.712 |
Opportun | -3.509 |
onential | -3.414 |
�� | -3.359 |
leaps | -3.259 |
Token | | |
Token ID | 930 |
Feature activation | -2.10e+00 |
Pos logit contributions | |
izons | +2.513 |
Opportun | +2.453 |
onential | +2.312 |
leaps | +2.278 |
Strategies | +2.252 |
Neg logit contributions | |
offending | -2.749 |
bothering | -2.741 |
stranger | -2.725 |
involved | -2.690 |
occurring | -2.623 |
Token | C |
Token ID | 327 |
Feature activation | -7.01e-01 |
Pos logit contributions | |
stranger | +10.696 |
pictured | +10.691 |
bothering | +10.519 |
offending | +10.499 |
occurring | +10.259 |
Neg logit contributions | |
izons | -9.655 |
onential | -9.004 |
Opportun | -8.627 |
�� | -8.545 |
leaps | -8.268 |
Token | aj |
Token ID | 1228 |
Feature activation | +4.94e-01 |
Pos logit contributions | |
bothering | +3.279 |
offending | +3.278 |
stranger | +3.277 |
involved | +3.198 |
occurring | +3.148 |
Neg logit contributions | |
izons | -3.909 |
Opportun | -3.727 |
onential | -3.622 |
�� | -3.594 |
leaps | -3.490 |
Token | un |
Token ID | 403 |
Feature activation | -1.31e+00 |
Pos logit contributions | |
izons | +2.315 |
Opportun | +2.179 |
onential | +2.139 |
leaps | +2.004 |
Strategies | +1.948 |
Neg logit contributions | |
bothering | -2.756 |
offending | -2.755 |
stranger | -2.684 |
occurring | -2.628 |
involved | -2.591 |
Token | Kitchen |
Token ID | 22541 |
Feature activation | -3.44e-02 |
Pos logit contributions | |
stranger | +5.795 |
bothering | +5.721 |
offending | +5.628 |
pictured | +5.614 |
occurring | +5.478 |
Neg logit contributions | |
izons | -7.170 |
onential | -6.667 |
Opportun | -6.622 |
�� | -6.511 |
leaps | -6.489 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | -2.08e-01 |
Pos logit contributions | |
bothering | +0.199 |
offending | +0.199 |
stranger | +0.198 |
involved | +0.194 |
occurring | +0.192 |
Neg logit contributions | |
izons | -0.155 |
Opportun | -0.147 |
onential | -0.141 |
�� | -0.138 |
leaps | -0.135 |
Token | Recently |
Token ID | 24661 |
Feature activation | -3.20e-01 |
Pos logit contributions | |
offending | +1.007 |
bothering | +0.998 |
stranger | +0.993 |
occurring | +0.956 |
involved | +0.955 |
Neg logit contributions | |
izons | -1.148 |
Opportun | -1.089 |
onential | -1.060 |
�� | -1.030 |
leaps | -1.019 |
Token | , |
Token ID | 11 |
Feature activation | -2.86e-01 |
Pos logit contributions | |
offending | +1.466 |
bothering | +1.464 |
stranger | +1.455 |
involved | +1.422 |
occurring | +1.395 |
Neg logit contributions | |
izons | -1.825 |
Opportun | -1.748 |
onential | -1.693 |
�� | -1.653 |
leaps | -1.641 |
Token | there |
Token ID | 612 |
Feature activation | +1.38e+00 |
Pos logit contributions | |
bothering | +1.249 |
offending | +1.248 |
stranger | +1.236 |
involved | +1.218 |
occurring | +1.188 |
Neg logit contributions | |
izons | -1.690 |
Opportun | -1.627 |
onential | -1.575 |
�� | -1.536 |
leaps | -1.528 |
Token | � |
Token ID | 447 |
Feature activation | +4.37e-01 |
Pos logit contributions | |
izons | +7.343 |
Opportun | +7.043 |
leaps | +6.799 |
onential | +6.781 |
strides | +6.626 |
Neg logit contributions | |
offending | -6.188 |
bothering | -6.053 |
stranger | -6.004 |
involved | -5.941 |
occurring | -5.661 |
Token | � |
Token ID | 247 |
Feature activation | -2.44e+00 |
Pos logit contributions | |
izons | +1.321 |
Opportun | +1.216 |
�� | +1.186 |
onential | +1.161 |
leaps | +1.068 |
Neg logit contributions | |
bothering | -3.130 |
offending | -3.122 |
stranger | -3.113 |
involved | -3.101 |
occurring | -3.024 |
Token | to |
Token ID | 284 |
Feature activation | -2.19e-02 |
Pos logit contributions | |
izons | +0.794 |
Opportun | +0.759 |
onential | +0.739 |
�� | +0.726 |
leaps | +0.710 |
Neg logit contributions | |
bothering | -0.672 |
offending | -0.670 |
stranger | -0.670 |
involved | -0.649 |
occurring | -0.646 |
Token | tell |
Token ID | 1560 |
Feature activation | +1.24e-01 |
Pos logit contributions | |
offending | +0.120 |
bothering | +0.120 |
stranger | +0.119 |
involved | +0.118 |
occurring | +0.115 |
Neg logit contributions | |
izons | -0.105 |
Opportun | -0.099 |
onential | -0.096 |
leaps | -0.093 |
�� | -0.093 |
Token | if |
Token ID | 611 |
Feature activation | -1.07e+00 |
Pos logit contributions | |
izons | +0.806 |
Opportun | +0.778 |
onential | +0.751 |
leaps | +0.737 |
Strategies | +0.735 |
Neg logit contributions | |
bothering | -0.463 |
offending | -0.461 |
stranger | -0.457 |
involved | -0.453 |
occurring | -0.435 |
Token | these |
Token ID | 777 |
Feature activation | -1.38e-01 |
Pos logit contributions | |
stranger | +4.781 |
bothering | +4.773 |
offending | +4.770 |
involved | +4.596 |
occurring | +4.587 |
Neg logit contributions | |
izons | -6.148 |
Opportun | -5.793 |
�� | -5.740 |
onential | -5.709 |
leaps | -5.449 |
Token | initiatives |
Token ID | 15446 |
Feature activation | -4.19e-01 |
Pos logit contributions | |
bothering | +0.796 |
offending | +0.793 |
stranger | +0.792 |
involved | +0.781 |
occurring | +0.768 |
Neg logit contributions | |
izons | -0.627 |
Opportun | -0.596 |
onential | -0.569 |
�� | -0.550 |
leaps | -0.549 |
Token | are |
Token ID | 389 |
Feature activation | -1.10e+00 |
Pos logit contributions | |
bothering | +2.521 |
offending | +2.520 |
stranger | +2.517 |
involved | +2.464 |
occurring | +2.442 |
Neg logit contributions | |
izons | -1.787 |
Opportun | -1.675 |
onential | -1.615 |
�� | -1.584 |
leaps | -1.529 |
Token | working |
Token ID | 1762 |
Feature activation | +9.34e-01 |
Pos logit contributions | |
bothering | +4.778 |
offending | +4.754 |
stranger | +4.743 |
occurring | +4.588 |
involved | +4.558 |
Neg logit contributions | |
izons | -6.568 |
Opportun | -6.261 |
onential | -6.106 |
�� | -6.101 |
Strategies | -5.823 |
Token | , |
Token ID | 11 |
Feature activation | -3.00e-01 |
Pos logit contributions | |
izons | +4.679 |
Opportun | +4.481 |
leaps | +4.305 |
onential | +4.269 |
economies | +4.244 |
Neg logit contributions | |
involved | -4.620 |
offending | -4.592 |
stranger | -4.554 |
bothering | -4.527 |
pictured | -4.360 |
Token | but |
Token ID | 475 |
Feature activation | -1.87e-01 |
Pos logit contributions | |
bothering | +1.253 |
offending | +1.251 |
stranger | +1.246 |
involved | +1.223 |
occurring | +1.192 |
Neg logit contributions | |
izons | -1.825 |
Opportun | -1.754 |
onential | -1.706 |
�� | -1.664 |
leaps | -1.654 |
Token | it |
Token ID | 340 |
Feature activation | +2.13e+00 |
Pos logit contributions | |
bothering | +0.735 |
offending | +0.733 |
stranger | +0.728 |
involved | +0.716 |
occurring | +0.695 |
Neg logit contributions | |
izons | -1.185 |
Opportun | -1.148 |
onential | -1.107 |
leaps | -1.081 |
�� | -1.078 |
Token | � |
Token ID | 447 |
Feature activation | +9.02e-01 |
Pos logit contributions | |
izons | +9.948 |
leaps | +9.693 |
Opportun | +9.640 |
strides | +9.387 |
onential | +9.166 |
Neg logit contributions | |
bothering | -9.498 |
stranger | -9.375 |
offending | -9.316 |
involved | -9.064 |
occurring | -8.716 |
Token | student |
Token ID | 3710 |
Feature activation | -1.55e+00 |
Pos logit contributions | |
stranger | +7.577 |
offending | +7.562 |
bothering | +7.476 |
occurring | +7.204 |
involved | +7.196 |
Neg logit contributions | |
izons | -8.301 |
Opportun | -7.813 |
onential | -7.692 |
�� | -7.639 |
leaps | -7.310 |
Token | who |
Token ID | 508 |
Feature activation | -3.79e-01 |
Pos logit contributions | |
offending | +6.201 |
bothering | +6.193 |
stranger | +6.153 |
involved | +5.944 |
occurring | +5.915 |
Neg logit contributions | |
izons | -9.765 |
Opportun | -9.269 |
onential | -9.144 |
�� | -9.010 |
leaps | -8.711 |
Token | argues |
Token ID | 11673 |
Feature activation | -9.96e-01 |
Pos logit contributions | |
bothering | +1.531 |
stranger | +1.524 |
offending | +1.513 |
involved | +1.501 |
occurring | +1.458 |
Neg logit contributions | |
izons | -2.340 |
Opportun | -2.241 |
leaps | -2.176 |
onential | -2.161 |
�� | -2.128 |
Token | he |
Token ID | 339 |
Feature activation | -8.52e-02 |
Pos logit contributions | |
bothering | +3.353 |
offending | +3.347 |
stranger | +3.341 |
involved | +3.233 |
occurring | +3.167 |
Neg logit contributions | |
izons | -6.891 |
Opportun | -6.641 |
onential | -6.489 |
�� | -6.408 |
leaps | -6.309 |
Token | was |
Token ID | 373 |
Feature activation | -2.37e+00 |
Pos logit contributions | |
bothering | +0.355 |
stranger | +0.353 |
involved | +0.352 |
offending | +0.348 |
occurring | +0.337 |
Neg logit contributions | |
izons | -0.510 |
Opportun | -0.488 |
leaps | -0.481 |
onential | -0.467 |
strides | -0.466 |
Token | completely |
Token ID | 3190 |
Feature activation | -2.53e+00 |
Pos logit contributions | |
offending | +9.671 |
bothering | +9.608 |
stranger | +9.484 |
wrong | +9.176 |
pictured | +9.136 |
Neg logit contributions | |
izons | -14.326 |
Opportun | -13.479 |
onential | -13.452 |
�� | -13.157 |
Strategies | -12.792 |
Token | denied |
Token ID | 6699 |
Feature activation | -9.59e-01 |
Pos logit contributions | |
stranger | +10.722 |
offending | +10.707 |
bothering | +10.491 |
wrong | +10.327 |
pictured | +10.016 |
Neg logit contributions | |
izons | -14.723 |
onential | -13.868 |
�� | -13.862 |
Opportun | -13.849 |
Strategies | -13.316 |
Token | his |
Token ID | 465 |
Feature activation | -2.44e-01 |
Pos logit contributions | |
bothering | +4.394 |
offending | +4.392 |
stranger | +4.386 |
involved | +4.262 |
occurring | +4.212 |
Neg logit contributions | |
izons | -5.464 |
Opportun | -5.208 |
onential | -5.075 |
�� | -4.994 |
leaps | -4.882 |
Token | right |
Token ID | 826 |
Feature activation | -1.85e-01 |
Pos logit contributions | |
bothering | +1.052 |
stranger | +1.043 |
involved | +1.041 |
offending | +1.039 |
occurring | +1.004 |
Neg logit contributions | |
izons | -1.446 |
Opportun | -1.400 |
onential | -1.336 |
leaps | -1.310 |
�� | -1.307 |
Token | to |
Token ID | 284 |
Feature activation | -9.22e-01 |
Pos logit contributions | |
bothering | +0.929 |
offending | +0.928 |
stranger | +0.927 |
involved | +0.908 |
occurring | +0.891 |
Neg logit contributions | |
izons | -0.965 |
Opportun | -0.920 |
onential | -0.892 |
�� | -0.871 |
leaps | -0.859 |
Token | confront |
Token ID | 7239 |
Feature activation | -1.04e+00 |
Pos logit contributions | |
bothering | +4.653 |
offending | +4.647 |
stranger | +4.646 |
involved | +4.509 |
occurring | +4.473 |
Neg logit contributions | |
izons | -4.824 |
Opportun | -4.575 |
onential | -4.455 |
�� | -4.389 |
leaps | -4.257 |
Token | He |
Token ID | 679 |
Feature activation | +1.55e+00 |
Pos logit contributions | |
offending | +0.279 |
bothering | +0.277 |
stranger | +0.276 |
involved | +0.271 |
occurring | +0.266 |
Neg logit contributions | |
izons | -0.203 |
Opportun | -0.199 |
onential | -0.183 |
Strategies | -0.180 |
�� | -0.175 |
Token | calls |
Token ID | 3848 |
Feature activation | +2.06e-01 |
Pos logit contributions | |
izons | +7.871 |
leaps | +7.568 |
strides | +7.412 |
Opportun | +7.377 |
economies | +7.144 |
Neg logit contributions | |
bothering | -6.939 |
stranger | -6.881 |
involved | -6.871 |
offending | -6.728 |
occurring | -6.538 |
Token | her |
Token ID | 607 |
Feature activation | -2.21e-01 |
Pos logit contributions | |
izons | +1.285 |
Opportun | +1.244 |
onential | +1.198 |
leaps | +1.172 |
Strategies | +1.157 |
Neg logit contributions | |
bothering | -0.818 |
offending | -0.815 |
stranger | -0.811 |
involved | -0.797 |
occurring | -0.775 |
Token | a |
Token ID | 257 |
Feature activation | +2.05e-01 |
Pos logit contributions | |
bothering | +0.890 |
offending | +0.885 |
stranger | +0.882 |
involved | +0.861 |
occurring | +0.847 |
Neg logit contributions | |
izons | -1.381 |
Opportun | -1.335 |
onential | -1.287 |
�� | -1.256 |
leaps | -1.256 |
Token | � |
Token ID | 564 |
Feature activation | -9.32e-02 |
Pos logit contributions | |
izons | +1.260 |
Opportun | +1.236 |
onential | +1.178 |
leaps | +1.163 |
Strategies | +1.151 |
Neg logit contributions | |
offending | -0.823 |
bothering | -0.820 |
involved | -0.797 |
stranger | -0.794 |
occurring | -0.783 |
Token | � |
Token ID | 250 |
Feature activation | -3.00e+00 |
Pos logit contributions | |
bothering | +0.655 |
offending | +0.655 |
stranger | +0.653 |
involved | +0.643 |
occurring | +0.636 |
Neg logit contributions | |
izons | -0.303 |
Opportun | -0.281 |
onential | -0.267 |
�� | -0.263 |
leaps | -0.246 |
Token | class |
Token ID | 4871 |
Feature activation | -6.22e-01 |
Pos logit contributions | |
involved | +15.571 |
stranger | +15.212 |
bothering | +14.792 |
offending | +14.699 |
object | +14.584 |
Neg logit contributions | |
izons | -12.260 |
�� | -12.245 |
Opportun | -11.518 |
onential | -11.031 |
leaps | -10.972 |
Token | ical |
Token ID | 605 |
Feature activation | -4.83e-01 |
Pos logit contributions | |
offending | +2.911 |
stranger | +2.911 |
bothering | +2.908 |
involved | +2.827 |
occurring | +2.791 |
Neg logit contributions | |
izons | -3.483 |
Opportun | -3.316 |
onential | -3.229 |
�� | -3.185 |
leaps | -3.104 |
Token | social |
Token ID | 1919 |
Feature activation | +5.31e-01 |
Pos logit contributions | |
bothering | +2.497 |
offending | +2.496 |
stranger | +2.494 |
involved | +2.434 |
occurring | +2.406 |
Neg logit contributions | |
izons | -2.472 |
Opportun | -2.345 |
onential | -2.275 |
�� | -2.234 |
leaps | -2.180 |
Token | democrat |
Token ID | 43268 |
Feature activation | +2.68e-01 |
Pos logit contributions | |
izons | +2.163 |
Opportun | +2.096 |
onential | +1.979 |
leaps | +1.963 |
economies | +1.913 |
Neg logit contributions | |
bothering | -3.155 |
involved | -3.129 |
offending | -3.118 |
stranger | -3.073 |
pictured | -3.026 |
Token | � |
Token ID | 447 |
Feature activation | +8.51e-01 |
Pos logit contributions | |
izons | +1.417 |
Opportun | +1.371 |
onential | +1.326 |
leaps | +1.286 |
Strategies | +1.275 |
Neg logit contributions | |
bothering | -1.288 |
offending | -1.286 |
stranger | -1.282 |
involved | -1.259 |
occurring | -1.223 |
Token | local |
Token ID | 12001 |
Feature activation | -2.22e+00 |
Pos logit contributions | |
involved | +7.904 |
offending | +7.860 |
stranger | +7.797 |
bothering | +7.721 |
occurring | +7.592 |
Neg logit contributions | |
izons | -9.811 |
Opportun | -9.337 |
�� | -9.149 |
onential | -9.085 |
Flavoring | -8.714 |
Token | option |
Token ID | 3038 |
Feature activation | -1.17e+00 |
Pos logit contributions | |
offending | +9.583 |
bothering | +9.493 |
stranger | +9.486 |
occurring | +9.345 |
wrong | +9.257 |
Neg logit contributions | |
izons | -12.218 |
�� | -11.456 |
Opportun | -11.435 |
onential | -11.158 |
Flavoring | -10.705 |
Token | after |
Token ID | 706 |
Feature activation | -1.63e+00 |
Pos logit contributions | |
bothering | +5.448 |
offending | +5.444 |
stranger | +5.441 |
occurring | +5.270 |
involved | +5.253 |
Neg logit contributions | |
izons | -6.562 |
Opportun | -6.232 |
onential | -6.067 |
�� | -6.014 |
leaps | -5.818 |
Token | opening |
Token ID | 4756 |
Feature activation | -8.41e-01 |
Pos logit contributions | |
offending | +6.335 |
bothering | +6.314 |
stranger | +6.169 |
occurring | +6.063 |
involved | +5.985 |
Neg logit contributions | |
izons | -10.354 |
Opportun | -9.736 |
onential | -9.656 |
�� | -9.586 |
Strategies | -9.147 |
Token | the |
Token ID | 262 |
Feature activation | -1.37e+00 |
Pos logit contributions | |
offending | +3.187 |
stranger | +3.177 |
bothering | +3.174 |
involved | +3.056 |
occurring | +3.030 |
Neg logit contributions | |
izons | -5.451 |
Opportun | -5.214 |
onential | -5.115 |
�� | -5.060 |
leaps | -4.929 |
Token | file |
Token ID | 2393 |
Feature activation | -2.46e+00 |
Pos logit contributions | |
offending | +5.478 |
stranger | +5.393 |
bothering | +5.351 |
occurring | +5.160 |
involved | +5.145 |
Neg logit contributions | |
izons | -8.589 |
Opportun | -8.172 |
onential | -8.090 |
�� | -8.003 |
leaps | -7.705 |
Token | , |
Token ID | 11 |
Feature activation | -8.07e-01 |
Pos logit contributions | |
offending | +10.500 |
bothering | +10.481 |
stranger | +10.448 |
occurring | +10.326 |
wrong | +10.219 |
Neg logit contributions | |
izons | -13.383 |
Opportun | -12.572 |
onential | -12.524 |
�� | -12.440 |
Flavoring | -11.795 |
Token | it |
Token ID | 340 |
Feature activation | -7.95e-02 |
Pos logit contributions | |
bothering | +3.149 |
offending | +3.146 |
stranger | +3.143 |
involved | +3.044 |
occurring | +2.995 |
Neg logit contributions | |
izons | -5.145 |
Opportun | -4.937 |
onential | -4.817 |
�� | -4.747 |
leaps | -4.661 |
Token | won |
Token ID | 1839 |
Feature activation | +2.70e-02 |
Pos logit contributions | |
bothering | +0.455 |
stranger | +0.454 |
offending | +0.450 |
involved | +0.446 |
occurring | +0.433 |
Neg logit contributions | |
izons | -0.358 |
Opportun | -0.336 |
onential | -0.323 |
leaps | -0.322 |
economies | -0.307 |
Token | 't |
Token ID | 470 |
Feature activation | -1.35e+00 |
Pos logit contributions | |
izons | +0.115 |
Opportun | +0.109 |
onential | +0.104 |
leaps | +0.101 |
Strategies | +0.099 |
Neg logit contributions | |
offending | -0.163 |
bothering | -0.161 |
stranger | -0.161 |
involved | -0.158 |
occurring | -0.154 |
Token | be |
Token ID | 307 |
Feature activation | -2.41e+00 |
Pos logit contributions | |
offending | +6.045 |
bothering | +6.012 |
stranger | +5.971 |
occurring | +5.859 |
involved | +5.718 |
Neg logit contributions | |
izons | -7.738 |
Opportun | -7.376 |
onential | -7.220 |
�� | -7.175 |
Strategies | -6.862 |
Token | :] |
Token ID | 47715 |
Feature activation | -1.24e+00 |
Pos logit contributions | |
izons | +2.134 |
Opportun | +2.038 |
onential | +1.971 |
�� | +1.925 |
leaps | +1.897 |
Neg logit contributions | |
bothering | -1.982 |
stranger | -1.980 |
offending | -1.974 |
involved | -1.941 |
occurring | -1.901 |
Token | � |
Token ID | 564 |
Feature activation | +4.94e-01 |
Pos logit contributions | |
bothering | +6.590 |
stranger | +6.582 |
offending | +6.582 |
involved | +6.408 |
occurring | +6.372 |
Neg logit contributions | |
izons | -6.067 |
Opportun | -5.595 |
�� | -5.582 |
onential | -5.578 |
leaps | -5.260 |
Token | � |
Token ID | 250 |
Feature activation | -2.15e+00 |
Pos logit contributions | |
izons | +1.560 |
Opportun | +1.434 |
�� | +1.387 |
onential | +1.378 |
Strategies | +1.257 |
Neg logit contributions | |
offending | -3.474 |
bothering | -3.473 |
stranger | -3.470 |
involved | -3.444 |
occurring | -3.375 |
Token | What |
Token ID | 2061 |
Feature activation | -3.02e-01 |
Pos logit contributions | |
bothering | +9.754 |
stranger | +9.708 |
offending | +9.662 |
involved | +9.650 |
occurring | +9.304 |
Neg logit contributions | |
izons | -11.368 |
�� | -10.864 |
Opportun | -10.697 |
onential | -10.554 |
leaps | -10.085 |
Token | � |
Token ID | 447 |
Feature activation | +5.74e-01 |
Pos logit contributions | |
offending | +1.284 |
bothering | +1.281 |
stranger | +1.277 |
involved | +1.248 |
occurring | +1.225 |
Neg logit contributions | |
izons | -1.805 |
Opportun | -1.752 |
onential | -1.685 |
leaps | -1.654 |
Strategies | -1.628 |
Token | � |
Token ID | 247 |
Feature activation | -2.76e+00 |
Pos logit contributions | |
izons | +1.574 |
Opportun | +1.438 |
�� | +1.411 |
onential | +1.356 |
Strategies | +1.266 |
Neg logit contributions | |
offending | -4.201 |
bothering | -4.201 |
stranger | -4.164 |
involved | -4.157 |
occurring | -4.064 |
Token | s |
Token ID | 82 |
Feature activation | -2.00e+00 |
Pos logit contributions | |
involved | +13.632 |
bothering | +13.536 |
stranger | +13.521 |
offending | +13.492 |
wrong | +13.177 |
Neg logit contributions | |
izons | -12.815 |
�� | -12.113 |
Opportun | -11.848 |
onential | -11.693 |
Flavoring | -11.031 |
Token | been |
Token ID | 587 |
Feature activation | -1.60e+00 |
Pos logit contributions | |
bothering | +8.769 |
offending | +8.593 |
stranger | +8.567 |
occurring | +8.358 |
happening | +8.259 |
Neg logit contributions | |
izons | -11.707 |
�� | -10.971 |
Opportun | -10.924 |
onential | -10.771 |
Flavoring | -10.251 |
Token | interesting |
Token ID | 3499 |
Feature activation | +3.98e-01 |
Pos logit contributions | |
bothering | +6.864 |
offending | +6.732 |
stranger | +6.729 |
occurring | +6.526 |
involved | +6.431 |
Neg logit contributions | |
izons | -9.628 |
Opportun | -9.109 |
onential | -8.960 |
�� | -8.924 |
Strategies | -8.480 |
Token | , |
Token ID | 11 |
Feature activation | -2.01e-01 |
Pos logit contributions | |
izons | +2.263 |
Opportun | +2.190 |
onential | +2.120 |
leaps | +2.082 |
�� | +2.047 |
Neg logit contributions | |
offending | -1.741 |
stranger | -1.731 |
bothering | -1.722 |
involved | -1.698 |
pictured | -1.639 |
Token | Andrea |
Token ID | 23174 |
Feature activation | -1.48e-01 |
Pos logit contributions | |
offending | +0.975 |
bothering | +0.974 |
stranger | +0.971 |
involved | +0.957 |
occurring | +0.932 |
Neg logit contributions | |
izons | -1.081 |
Opportun | -1.038 |
onential | -1.007 |
�� | -0.972 |
leaps | -0.972 |
Token | ? |
Token ID | 30 |
Feature activation | +2.63e-01 |
Pos logit contributions | |
stranger | +0.426 |
offending | +0.426 |
bothering | +0.426 |
involved | +0.421 |
occurring | +0.401 |
Neg logit contributions | |
izons | -0.596 |
Opportun | -0.573 |
onential | -0.550 |
leaps | -0.545 |
Strategies | -0.538 |
Token |
|
Token ID | 198 |
Feature activation | -1.42e+00 |
Pos logit contributions | |
izons | +1.126 |
Opportun | +1.115 |
onential | +1.021 |
Strategies | +1.013 |
leaps | +0.972 |
Neg logit contributions | |
offending | -1.565 |
bothering | -1.556 |
stranger | -1.550 |
involved | -1.512 |
occurring | -1.493 |
Token |
|
Token ID | 198 |
Feature activation | -5.47e-02 |
Pos logit contributions | |
stranger | +7.041 |
bothering | +7.030 |
involved | +6.985 |
occurring | +6.872 |
offending | +6.836 |
Neg logit contributions | |
izons | -7.271 |
�� | -7.019 |
Opportun | -6.732 |
onential | -6.725 |
leaps | -6.373 |
Token | That |
Token ID | 2504 |
Feature activation | -3.50e-01 |
Pos logit contributions | |
offending | +0.269 |
bothering | +0.265 |
stranger | +0.264 |
occurring | +0.254 |
involved | +0.251 |
Neg logit contributions | |
izons | -0.300 |
Opportun | -0.285 |
onential | -0.275 |
Strategies | -0.265 |
leaps | -0.264 |
Token | � |
Token ID | 447 |
Feature activation | +1.26e-01 |
Pos logit contributions | |
offending | +1.602 |
bothering | +1.602 |
stranger | +1.593 |
involved | +1.567 |
occurring | +1.527 |
Neg logit contributions | |
izons | -1.988 |
Opportun | -1.911 |
onential | -1.848 |
leaps | -1.802 |
�� | -1.782 |
Token | � |
Token ID | 247 |
Feature activation | -3.12e+00 |
Pos logit contributions | |
izons | +0.367 |
Opportun | +0.336 |
�� | +0.318 |
onential | +0.318 |
leaps | +0.294 |
Neg logit contributions | |
bothering | -0.923 |
offending | -0.919 |
stranger | -0.916 |
involved | -0.908 |
occurring | -0.894 |
Token | s |
Token ID | 82 |
Feature activation | -1.53e+00 |
Pos logit contributions | |
stranger | +13.455 |
offending | +13.434 |
occurring | +13.331 |
bothering | +13.193 |
happening | +13.115 |
Neg logit contributions | |
izons | -15.771 |
�� | -14.967 |
Opportun | -14.545 |
onential | -14.357 |
leaps | -13.702 |
Token | the |
Token ID | 262 |
Feature activation | -3.26e-01 |
Pos logit contributions | |
bothering | +6.421 |
stranger | +6.408 |
offending | +6.351 |
occurring | +6.212 |
happening | +6.044 |
Neg logit contributions | |
izons | -9.253 |
Opportun | -8.739 |
�� | -8.718 |
onential | -8.624 |
Flavoring | -8.158 |
Token | chance |
Token ID | 2863 |
Feature activation | +9.54e-02 |
Pos logit contributions | |
bothering | +1.654 |
offending | +1.649 |
stranger | +1.646 |
involved | +1.623 |
occurring | +1.592 |
Neg logit contributions | |
izons | -1.685 |
Opportun | -1.611 |
onential | -1.550 |
�� | -1.509 |
leaps | -1.504 |
Token | America |
Token ID | 2253 |
Feature activation | +1.85e-01 |
Pos logit contributions | |
izons | +0.604 |
Opportun | +0.575 |
onential | +0.558 |
leaps | +0.546 |
Strategies | +0.544 |
Neg logit contributions | |
bothering | -0.376 |
offending | -0.374 |
stranger | -0.370 |
involved | -0.367 |
occurring | -0.351 |
Token | will |
Token ID | 481 |
Feature activation | +1.15e+00 |
Pos logit contributions | |
izons | +0.942 |
Opportun | +0.893 |
onential | +0.852 |
leaps | +0.840 |
Strategies | +0.824 |
Neg logit contributions | |
bothering | -0.963 |
offending | -0.961 |
stranger | -0.958 |
involved | -0.948 |
occurring | -0.924 |