Token | careless |
Token ID | 36138 |
Feature activation | +3.30e-03 |
Pos logit contributions | |
ivated | +0.000 |
ternally | +0.000 |
unatt | +0.000 |
My | +0.000 |
en | +0.000 |
Neg logit contributions | |
ages | -0.000 |
zag | -0.000 |
ained | -0.000 |
tr | -0.000 |
ometer | -0.000 |
Token | . |
Token ID | 13 |
Feature activation | +8.84e-03 |
Pos logit contributions | |
ages | +0.005 |
zag | +0.005 |
tr | +0.005 |
ained | +0.005 |
ometer | +0.005 |
Neg logit contributions | |
ivated | -0.004 |
unatt | -0.004 |
ternally | -0.004 |
en | -0.004 |
My | -0.004 |
Token | They |
Token ID | 1119 |
Feature activation | +9.71e-03 |
Pos logit contributions | |
zag | +0.014 |
ages | +0.014 |
tr | +0.012 |
ained | +0.012 |
ometer | +0.012 |
Neg logit contributions | |
My | -0.012 |
en | -0.012 |
ivated | -0.012 |
I | -0.012 |
ternally | -0.012 |
Token | learned |
Token ID | 4499 |
Feature activation | +4.77e-02 |
Pos logit contributions | |
zag | +0.016 |
ages | +0.016 |
ained | +0.015 |
tr | +0.015 |
ometer | +0.015 |
Neg logit contributions | |
en | -0.012 |
ivated | -0.012 |
unatt | -0.012 |
ternally | -0.012 |
My | -0.012 |
Token | a |
Token ID | 257 |
Feature activation | +7.09e-02 |
Pos logit contributions | |
zag | +0.099 |
ops | +0.098 |
ages | +0.094 |
ained | +0.092 |
ahs | +0.092 |
Neg logit contributions | |
back | -0.047 |
I | -0.043 |
My | -0.042 |
en | -0.042 |
me | -0.042 |
Token | hard |
Token ID | 1327 |
Feature activation | +1.07e-01 |
Pos logit contributions | |
packs | +0.113 |
ained | +0.111 |
aked | +0.105 |
ates | +0.104 |
ops | +0.101 |
Neg logit contributions | |
back | -0.102 |
en | -0.097 |
me | -0.095 |
else | -0.094 |
calm | -0.093 |
Token | lesson |
Token ID | 11483 |
Feature activation | +5.30e-02 |
Pos logit contributions | |
aj | +0.180 |
packs | +0.159 |
ccoli | +0.157 |
ends | +0.151 |
akers | +0.151 |
Neg logit contributions | |
back | -0.133 |
tune | -0.132 |
's | -0.126 |
voice | -0.124 |
corner | -0.124 |
Token | . |
Token ID | 13 |
Feature activation | +2.38e-02 |
Pos logit contributions | |
ages | +0.127 |
ccoli | +0.127 |
zag | +0.125 |
aj | +0.124 |
tr | +0.123 |
Neg logit contributions | |
alone | -0.034 |
I | -0.031 |
back | -0.027 |
else | -0.024 |
unatt | -0.024 |
Token | The |
Token ID | 383 |
Feature activation | +2.37e-02 |
Pos logit contributions | |
zag | +0.040 |
ages | +0.039 |
tr | +0.035 |
ained | +0.035 |
tails | +0.034 |
Neg logit contributions | |
My | -0.033 |
I | -0.033 |
en | -0.031 |
alone | -0.029 |
Int | -0.029 |
Token | end |
Token ID | 886 |
Feature activation | +4.14e-02 |
Pos logit contributions | |
ages | +0.041 |
zag | +0.040 |
ccoli | +0.036 |
tails | +0.036 |
ained | +0.035 |
Neg logit contributions | |
en | -0.031 |
me | -0.030 |
unatt | -0.030 |
alone | -0.030 |
I | -0.029 |
Token | . |
Token ID | 13 |
Feature activation | +1.92e-02 |
Pos logit contributions | |
ages | +0.097 |
ained | +0.092 |
zag | +0.091 |
tails | +0.088 |
live | +0.088 |
Neg logit contributions | |
I | -0.028 |
alone | -0.026 |
else | -0.025 |
me | -0.023 |
My | -0.022 |
Token | dog |
Token ID | 3290 |
Feature activation | -1.28e-02 |
Pos logit contributions | |
ages | +0.033 |
zag | +0.033 |
ained | +0.030 |
ccoli | +0.030 |
tr | +0.029 |
Neg logit contributions | |
unatt | -0.017 |
en | -0.017 |
alone | -0.017 |
I | -0.016 |
me | -0.016 |
Token | . |
Token ID | 13 |
Feature activation | +1.29e-02 |
Pos logit contributions | |
ternally | +0.018 |
ivated | +0.018 |
en | +0.017 |
My | +0.017 |
unatt | +0.017 |
Neg logit contributions | |
ages | -0.020 |
zag | -0.020 |
tr | -0.018 |
ained | -0.018 |
ometer | -0.018 |
Token | They |
Token ID | 1119 |
Feature activation | +1.14e-02 |
Pos logit contributions | |
zag | +0.019 |
ages | +0.018 |
tr | +0.016 |
ained | +0.016 |
tails | +0.016 |
Neg logit contributions | |
My | -0.020 |
en | -0.020 |
I | -0.020 |
ivated | -0.019 |
alone | -0.019 |
Token | learned |
Token ID | 4499 |
Feature activation | +4.17e-02 |
Pos logit contributions | |
zag | +0.022 |
ages | +0.022 |
ained | +0.021 |
tr | +0.020 |
ometer | +0.020 |
Neg logit contributions | |
en | -0.012 |
unatt | -0.011 |
ivated | -0.011 |
ternally | -0.011 |
alone | -0.011 |
Token | a |
Token ID | 257 |
Feature activation | +7.35e-02 |
Pos logit contributions | |
zag | +0.088 |
ops | +0.084 |
ages | +0.081 |
ained | +0.081 |
ahs | +0.080 |
Neg logit contributions | |
back | -0.041 |
I | -0.039 |
en | -0.038 |
My | -0.038 |
alone | -0.037 |
Token | hard |
Token ID | 1327 |
Feature activation | +1.05e-01 |
Pos logit contributions | |
packs | +0.113 |
ained | +0.108 |
aj | +0.102 |
aked | +0.101 |
ates | +0.101 |
Neg logit contributions | |
back | -0.107 |
en | -0.105 |
else | -0.103 |
I | -0.101 |
calm | -0.100 |
Token | lesson |
Token ID | 11483 |
Feature activation | +4.85e-02 |
Pos logit contributions | |
aj | +0.184 |
packs | +0.166 |
ccoli | +0.161 |
ends | +0.159 |
akers | +0.156 |
Neg logit contributions | |
tune | -0.127 |
back | -0.123 |
corner | -0.119 |
voice | -0.116 |
alone | -0.113 |
Token | . |
Token ID | 13 |
Feature activation | +2.11e-02 |
Pos logit contributions | |
ages | +0.114 |
zag | +0.113 |
ccoli | +0.113 |
tr | +0.111 |
aj | +0.111 |
Neg logit contributions | |
alone | -0.032 |
I | -0.030 |
back | -0.025 |
unatt | -0.025 |
else | -0.024 |
Token | Not |
Token ID | 1892 |
Feature activation | +4.85e-02 |
Pos logit contributions | |
zag | +0.034 |
ages | +0.032 |
tr | +0.029 |
tails | +0.029 |
ained | +0.029 |
Neg logit contributions | |
I | -0.031 |
My | -0.031 |
en | -0.030 |
alone | -0.028 |
Int | -0.028 |
Token | all |
Token ID | 477 |
Feature activation | +1.69e-02 |
Pos logit contributions | |
zag | +0.096 |
ages | +0.095 |
aj | +0.092 |
ine | +0.091 |
tails | +0.090 |
Neg logit contributions | |
I | -0.056 |
alone | -0.053 |
en | -0.053 |
me | -0.050 |
unatt | -0.049 |
Token | dogs |
Token ID | 6844 |
Feature activation | -8.47e-03 |
Pos logit contributions | |
zag | +0.029 |
ages | +0.029 |
tr | +0.028 |
ained | +0.027 |
ometer | +0.026 |
Neg logit contributions | |
I | -0.021 |
alone | -0.021 |
en | -0.021 |
My | -0.020 |
ternally | -0.019 |
Token | mom |
Token ID | 1995 |
Feature activation | +5.89e-03 |
Pos logit contributions | |
ages | +0.055 |
zag | +0.054 |
ained | +0.052 |
tr | +0.049 |
ine | +0.048 |
Neg logit contributions | |
en | -0.029 |
me | -0.028 |
alone | -0.028 |
unatt | -0.028 |
cheer | -0.027 |
Token | . |
Token ID | 13 |
Feature activation | +2.07e-02 |
Pos logit contributions | |
zag | +0.010 |
ages | +0.010 |
ained | +0.009 |
tr | +0.009 |
ometer | +0.009 |
Neg logit contributions | |
unatt | -0.007 |
ivated | -0.007 |
ternally | -0.007 |
en | -0.007 |
My | -0.007 |
Token | They |
Token ID | 1119 |
Feature activation | +1.32e-02 |
Pos logit contributions | |
zag | +0.035 |
ages | +0.034 |
ained | +0.030 |
tr | +0.030 |
ahs | +0.029 |
Neg logit contributions | |
I | -0.029 |
My | -0.029 |
en | -0.028 |
Int | -0.027 |
alone | -0.026 |
Token | learned |
Token ID | 4499 |
Feature activation | +4.64e-02 |
Pos logit contributions | |
zag | +0.026 |
ages | +0.025 |
ained | +0.024 |
tr | +0.023 |
ometer | +0.023 |
Neg logit contributions | |
en | -0.014 |
unatt | -0.013 |
ivated | -0.013 |
ternally | -0.013 |
alone | -0.013 |
Token | a |
Token ID | 257 |
Feature activation | +6.71e-02 |
Pos logit contributions | |
zag | +0.100 |
ages | +0.093 |
ops | +0.093 |
live | +0.090 |
ained | +0.089 |
Neg logit contributions | |
back | -0.045 |
me | -0.041 |
I | -0.041 |
en | -0.041 |
alone | -0.039 |
Token | hard |
Token ID | 1327 |
Feature activation | +1.05e-01 |
Pos logit contributions | |
ained | +0.097 |
packs | +0.096 |
ops | +0.095 |
aked | +0.094 |
ages | +0.094 |
Neg logit contributions | |
back | -0.099 |
calm | -0.097 |
en | -0.094 |
I | -0.093 |
cheer | -0.092 |
Token | lesson |
Token ID | 11483 |
Feature activation | +4.52e-02 |
Pos logit contributions | |
aj | +0.182 |
ends | +0.165 |
packs | +0.162 |
girls | +0.161 |
live | +0.160 |
Neg logit contributions | |
voice | -0.126 |
tune | -0.121 |
fright | -0.120 |
corner | -0.120 |
back | -0.117 |
Token | . |
Token ID | 13 |
Feature activation | +2.64e-02 |
Pos logit contributions | |
zag | +0.104 |
ages | +0.102 |
ccoli | +0.101 |
tr | +0.099 |
aj | +0.098 |
Neg logit contributions | |
alone | -0.033 |
I | -0.033 |
unatt | -0.028 |
back | -0.027 |
me | -0.025 |
Token | They |
Token ID | 1119 |
Feature activation | +2.83e-02 |
Pos logit contributions | |
zag | +0.044 |
ages | +0.041 |
ahs | +0.037 |
tr | +0.037 |
tails | +0.036 |
Neg logit contributions | |
I | -0.040 |
My | -0.040 |
en | -0.038 |
alone | -0.036 |
Int | -0.036 |
Token | did |
Token ID | 750 |
Feature activation | +6.07e-02 |
Pos logit contributions | |
zag | +0.056 |
ages | +0.053 |
ained | +0.051 |
ccoli | +0.050 |
ometer | +0.049 |
Neg logit contributions | |
en | -0.030 |
alone | -0.029 |
unatt | -0.028 |
I | -0.028 |
ternally | -0.028 |
Token | not |
Token ID | 407 |
Feature activation | +2.55e-02 |
Pos logit contributions | |
aked | +0.132 |
uns | +0.131 |
inea | +0.131 |
mons | +0.129 |
ages | +0.129 |
Neg logit contributions | |
back | -0.058 |
alone | -0.053 |
I | -0.052 |
calm | -0.050 |
me | -0.050 |
Token | eating |
Token ID | 6600 |
Feature activation | +1.08e-02 |
Pos logit contributions | |
zag | +0.051 |
ages | +0.050 |
tr | +0.049 |
ometer | +0.046 |
ained | +0.045 |
Neg logit contributions | |
I | -0.037 |
en | -0.035 |
me | -0.035 |
alone | -0.035 |
unatt | -0.035 |
Token | . |
Token ID | 13 |
Feature activation | +2.07e-02 |
Pos logit contributions | |
zag | +0.022 |
ages | +0.022 |
tr | +0.020 |
ained | +0.020 |
ometer | +0.020 |
Neg logit contributions | |
en | -0.010 |
unatt | -0.010 |
My | -0.010 |
ivated | -0.010 |
ternally | -0.010 |
Token | They |
Token ID | 1119 |
Feature activation | +1.40e-02 |
Pos logit contributions | |
zag | +0.034 |
ages | +0.033 |
tr | +0.030 |
ained | +0.030 |
tails | +0.029 |
Neg logit contributions | |
My | -0.029 |
I | -0.029 |
en | -0.028 |
Int | -0.027 |
ivated | -0.026 |
Token | learned |
Token ID | 4499 |
Feature activation | +4.67e-02 |
Pos logit contributions | |
zag | +0.027 |
ages | +0.026 |
ained | +0.025 |
tr | +0.024 |
ometer | +0.024 |
Neg logit contributions | |
en | -0.015 |
unatt | -0.015 |
ivated | -0.014 |
alone | -0.014 |
ternally | -0.014 |
Token | a |
Token ID | 257 |
Feature activation | +6.40e-02 |
Pos logit contributions | |
zag | +0.107 |
ops | +0.100 |
ages | +0.098 |
ained | +0.098 |
tails | +0.096 |
Neg logit contributions | |
back | -0.038 |
en | -0.036 |
I | -0.035 |
so | -0.034 |
My | -0.034 |
Token | hard |
Token ID | 1327 |
Feature activation | +1.05e-01 |
Pos logit contributions | |
packs | +0.091 |
ained | +0.090 |
aked | +0.086 |
ages | +0.085 |
aj | +0.083 |
Neg logit contributions | |
en | -0.102 |
else | -0.097 |
I | -0.095 |
My | -0.095 |
back | -0.094 |
Token | lesson |
Token ID | 11483 |
Feature activation | +5.45e-02 |
Pos logit contributions | |
aj | +0.176 |
packs | +0.165 |
tails | +0.158 |
ends | +0.154 |
uth | +0.148 |
Neg logit contributions | |
back | -0.122 |
's | -0.122 |
corner | -0.120 |
me | -0.119 |
tune | -0.117 |
Token | . |
Token ID | 13 |
Feature activation | +2.70e-02 |
Pos logit contributions | |
ages | +0.126 |
aj | +0.124 |
zag | +0.124 |
ccoli | +0.123 |
tr | +0.122 |
Neg logit contributions | |
I | -0.036 |
alone | -0.036 |
else | -0.029 |
back | -0.029 |
unatt | -0.028 |
Token | They |
Token ID | 1119 |
Feature activation | +2.79e-02 |
Pos logit contributions | |
zag | +0.047 |
ages | +0.045 |
tr | +0.040 |
tails | +0.040 |
ahs | +0.040 |
Neg logit contributions | |
I | -0.037 |
My | -0.037 |
en | -0.035 |
Int | -0.033 |
alone | -0.032 |
Token | said |
Token ID | 531 |
Feature activation | +1.67e-02 |
Pos logit contributions | |
zag | +0.054 |
ages | +0.051 |
ained | +0.050 |
ometer | +0.048 |
tr | +0.047 |
Neg logit contributions | |
en | -0.031 |
alone | -0.029 |
unatt | -0.029 |
I | -0.029 |
ternally | -0.028 |
Token | , |
Token ID | 11 |
Feature activation | +1.29e-02 |
Pos logit contributions | |
zag | +0.030 |
ages | +0.029 |
tr | +0.028 |
ained | +0.028 |
ine | +0.026 |
Neg logit contributions | |
en | -0.020 |
My | -0.020 |
I | -0.019 |
ivated | -0.019 |
ternally | -0.019 |
Token | mystery |
Token ID | 10715 |
Feature activation | -1.93e-02 |
Pos logit contributions | |
ages | +0.015 |
zag | +0.015 |
ained | +0.014 |
tr | +0.014 |
ine | +0.013 |
Neg logit contributions | |
unatt | -0.011 |
en | -0.011 |
ivated | -0.011 |
My | -0.011 |
ternally | -0.011 |
Token | . |
Token ID | 13 |
Feature activation | +1.55e-02 |
Pos logit contributions | |
ivated | +0.030 |
ternally | +0.029 |
My | +0.028 |
Int | +0.027 |
en | +0.027 |
Neg logit contributions | |
zag | -0.029 |
ages | -0.028 |
tr | -0.026 |
ometer | -0.025 |
packs | -0.025 |
Token | They |
Token ID | 1119 |
Feature activation | +9.78e-03 |
Pos logit contributions | |
zag | +0.022 |
ages | +0.021 |
tr | +0.019 |
ained | +0.018 |
tails | +0.018 |
Neg logit contributions | |
My | -0.025 |
I | -0.025 |
en | -0.025 |
ivated | -0.024 |
alone | -0.024 |
Token | learned |
Token ID | 4499 |
Feature activation | +4.32e-02 |
Pos logit contributions | |
zag | +0.019 |
ages | +0.019 |
ained | +0.018 |
tr | +0.017 |
ometer | +0.017 |
Neg logit contributions | |
en | -0.010 |
unatt | -0.010 |
ivated | -0.010 |
ternally | -0.010 |
alone | -0.009 |
Token | a |
Token ID | 257 |
Feature activation | +7.13e-02 |
Pos logit contributions | |
zag | +0.094 |
ops | +0.091 |
ages | +0.089 |
ahs | +0.087 |
ccoli | +0.086 |
Neg logit contributions | |
back | -0.040 |
en | -0.036 |
I | -0.036 |
alone | -0.035 |
My | -0.034 |
Token | hard |
Token ID | 1327 |
Feature activation | +1.05e-01 |
Pos logit contributions | |
packs | +0.112 |
aj | +0.105 |
ained | +0.103 |
ages | +0.103 |
aked | +0.101 |
Neg logit contributions | |
en | -0.105 |
back | -0.100 |
else | -0.099 |
I | -0.098 |
calm | -0.096 |
Token | lesson |
Token ID | 11483 |
Feature activation | +5.24e-02 |
Pos logit contributions | |
aj | +0.180 |
packs | +0.162 |
tails | +0.158 |
ccoli | +0.154 |
girls | +0.153 |
Neg logit contributions | |
corner | -0.126 |
tune | -0.125 |
back | -0.122 |
alone | -0.121 |
fright | -0.119 |
Token | . |
Token ID | 13 |
Feature activation | +2.32e-02 |
Pos logit contributions | |
ccoli | +0.127 |
ages | +0.125 |
aj | +0.124 |
zag | +0.124 |
uffy | +0.121 |
Neg logit contributions | |
alone | -0.033 |
I | -0.030 |
back | -0.025 |
unatt | -0.024 |
else | -0.023 |
Token | They |
Token ID | 1119 |
Feature activation | +2.63e-02 |
Pos logit contributions | |
zag | +0.036 |
ages | +0.034 |
tr | +0.030 |
ahs | +0.030 |
tails | +0.030 |
Neg logit contributions | |
I | -0.036 |
My | -0.036 |
en | -0.035 |
alone | -0.033 |
Int | -0.033 |
Token | learned |
Token ID | 4499 |
Feature activation | +4.70e-02 |
Pos logit contributions | |
zag | +0.051 |
ages | +0.049 |
ained | +0.046 |
ccoli | +0.046 |
ometer | +0.045 |
Neg logit contributions | |
en | -0.028 |
alone | -0.028 |
unatt | -0.027 |
I | -0.027 |
ternally | -0.026 |
Token | that |
Token ID | 326 |
Feature activation | +1.80e-02 |
Pos logit contributions | |
zag | +0.111 |
ops | +0.109 |
ages | +0.107 |
ahs | +0.105 |
ccoli | +0.105 |
Neg logit contributions | |
back | -0.033 |
I | -0.029 |
en | -0.028 |
alone | -0.027 |
me | -0.027 |
Token | alert |
Token ID | 7995 |
Feature activation | -8.42e-03 |
Pos logit contributions | |
zag | +0.025 |
ages | +0.024 |
tr | +0.024 |
ained | +0.023 |
ometer | +0.023 |
Neg logit contributions | |
en | -0.017 |
alone | -0.016 |
unatt | -0.016 |
ivated | -0.015 |
ternally | -0.015 |
Token | . |
Token ID | 13 |
Feature activation | +2.37e-02 |
Pos logit contributions | |
ivated | +0.012 |
ternally | +0.012 |
My | +0.012 |
en | +0.011 |
unatt | +0.011 |
Neg logit contributions | |
ages | -0.013 |
zag | -0.013 |
ained | -0.012 |
tr | -0.012 |
ometer | -0.011 |
Token | They |
Token ID | 1119 |
Feature activation | +1.61e-02 |
Pos logit contributions | |
zag | +0.040 |
ages | +0.038 |
tails | +0.034 |
ahs | +0.034 |
ogo | +0.034 |
Neg logit contributions | |
I | -0.034 |
My | -0.033 |
en | -0.032 |
Int | -0.031 |
alone | -0.030 |
Token | learned |
Token ID | 4499 |
Feature activation | +4.46e-02 |
Pos logit contributions | |
zag | +0.033 |
ages | +0.032 |
ained | +0.031 |
tr | +0.030 |
ometer | +0.030 |
Neg logit contributions | |
en | -0.015 |
unatt | -0.015 |
alone | -0.015 |
ivated | -0.014 |
ternally | -0.014 |
Token | a |
Token ID | 257 |
Feature activation | +6.86e-02 |
Pos logit contributions | |
zag | +0.099 |
ops | +0.095 |
ages | +0.095 |
ccoli | +0.092 |
ahs | +0.091 |
Neg logit contributions | |
back | -0.038 |
en | -0.035 |
I | -0.034 |
alone | -0.033 |
My | -0.033 |
Token | hard |
Token ID | 1327 |
Feature activation | +1.04e-01 |
Pos logit contributions | |
packs | +0.106 |
ained | +0.100 |
ages | +0.099 |
aked | +0.098 |
aj | +0.097 |
Neg logit contributions | |
en | -0.104 |
else | -0.097 |
back | -0.096 |
I | -0.094 |
calm | -0.093 |
Token | lesson |
Token ID | 11483 |
Feature activation | +5.16e-02 |
Pos logit contributions | |
aj | +0.181 |
ccoli | +0.165 |
tails | +0.162 |
packs | +0.158 |
girls | +0.157 |
Neg logit contributions | |
tune | -0.115 |
corner | -0.113 |
back | -0.112 |
alone | -0.111 |
's | -0.109 |
Token | . |
Token ID | 13 |
Feature activation | +2.97e-02 |
Pos logit contributions | |
ccoli | +0.123 |
ages | +0.122 |
zag | +0.122 |
aj | +0.120 |
tr | +0.119 |
Neg logit contributions | |
alone | -0.033 |
I | -0.031 |
unatt | -0.025 |
back | -0.025 |
else | -0.023 |
Token | They |
Token ID | 1119 |
Feature activation | +3.06e-02 |
Pos logit contributions | |
zag | +0.049 |
ages | +0.046 |
ahs | +0.042 |
tails | +0.042 |
ies | +0.041 |
Neg logit contributions | |
I | -0.045 |
My | -0.044 |
en | -0.042 |
Int | -0.039 |
alone | -0.039 |
Token | never |
Token ID | 1239 |
Feature activation | +1.62e-02 |
Pos logit contributions | |
zag | +0.061 |
ages | +0.058 |
ained | +0.055 |
ccoli | +0.055 |
ometer | +0.054 |
Neg logit contributions | |
en | -0.032 |
alone | -0.031 |
unatt | -0.030 |
I | -0.029 |
ternally | -0.028 |
Token | stole |
Token ID | 18240 |
Feature activation | +7.90e-03 |
Pos logit contributions | |
zag | +0.031 |
ages | +0.030 |
ained | +0.028 |
ometer | +0.028 |
tr | +0.028 |
Neg logit contributions | |
en | -0.018 |
unatt | -0.017 |
alone | -0.017 |
I | -0.017 |
My | -0.016 |
Token | treasure |
Token ID | 14068 |
Feature activation | +1.52e-02 |
Pos logit contributions | |
ages | +0.012 |
zag | +0.012 |
ained | +0.011 |
tr | +0.011 |
ometer | +0.010 |
Neg logit contributions | |
unatt | -0.011 |
en | -0.011 |
ternally | -0.011 |
ivated | -0.011 |
My | -0.011 |
Token | . |
Token ID | 13 |
Feature activation | +1.29e-02 |
Pos logit contributions | |
ages | +0.026 |
zag | +0.026 |
ained | +0.025 |
tr | +0.024 |
ometer | +0.023 |
Neg logit contributions | |
unatt | -0.018 |
alone | -0.018 |
ternally | -0.017 |
en | -0.017 |
ivated | -0.017 |
Token | They |
Token ID | 1119 |
Feature activation | +2.00e-03 |
Pos logit contributions | |
zag | +0.017 |
ages | +0.016 |
tr | +0.014 |
ained | +0.014 |
tails | +0.014 |
Neg logit contributions | |
My | -0.022 |
en | -0.022 |
I | -0.022 |
ivated | -0.022 |
alone | -0.021 |
Token | learned |
Token ID | 4499 |
Feature activation | +4.44e-02 |
Pos logit contributions | |
ages | +0.003 |
zag | +0.003 |
tr | +0.003 |
ained | +0.003 |
ometer | +0.003 |
Neg logit contributions | |
ivated | -0.002 |
ternally | -0.002 |
unatt | -0.002 |
My | -0.002 |
en | -0.002 |
Token | a |
Token ID | 257 |
Feature activation | +6.93e-02 |
Pos logit contributions | |
zag | +0.096 |
ops | +0.092 |
ages | +0.090 |
ahs | +0.089 |
ained | +0.088 |
Neg logit contributions | |
back | -0.041 |
I | -0.040 |
en | -0.038 |
My | -0.038 |
me | -0.038 |
Token | hard |
Token ID | 1327 |
Feature activation | +1.04e-01 |
Pos logit contributions | |
packs | +0.099 |
ained | +0.094 |
aj | +0.093 |
ates | +0.090 |
ages | +0.090 |
Neg logit contributions | |
en | -0.107 |
back | -0.106 |
else | -0.103 |
I | -0.102 |
calm | -0.100 |
Token | lesson |
Token ID | 11483 |
Feature activation | +5.28e-02 |
Pos logit contributions | |
aj | +0.172 |
ccoli | +0.151 |
omo | +0.149 |
packs | +0.148 |
akers | +0.144 |
Neg logit contributions | |
back | -0.139 |
alone | -0.133 |
corner | -0.125 |
's | -0.125 |
to | -0.124 |
Token | . |
Token ID | 13 |
Feature activation | +2.57e-02 |
Pos logit contributions | |
ccoli | +0.130 |
ages | +0.128 |
aj | +0.127 |
zag | +0.126 |
tr | +0.124 |
Neg logit contributions | |
alone | -0.033 |
I | -0.028 |
back | -0.025 |
me | -0.022 |
nearby | -0.022 |
Token | They |
Token ID | 1119 |
Feature activation | +2.51e-02 |
Pos logit contributions | |
zag | +0.041 |
ages | +0.037 |
tr | +0.035 |
ahs | +0.035 |
tails | +0.034 |
Neg logit contributions | |
I | -0.041 |
My | -0.040 |
en | -0.040 |
alone | -0.038 |
Int | -0.037 |
Token | should |
Token ID | 815 |
Feature activation | +3.63e-02 |
Pos logit contributions | |
zag | +0.047 |
ages | +0.046 |
ained | +0.043 |
ometer | +0.042 |
ccoli | +0.042 |
Neg logit contributions | |
en | -0.028 |
alone | -0.027 |
unatt | -0.027 |
ternally | -0.026 |
I | -0.026 |
Token | have |
Token ID | 423 |
Feature activation | -6.40e-03 |
Pos logit contributions | |
ages | +0.063 |
zag | +0.060 |
ometer | +0.058 |
ained | +0.058 |
tr | +0.057 |
Neg logit contributions | |
I | -0.049 |
me | -0.047 |
cheer | -0.046 |
alone | -0.045 |
en | -0.045 |
Token | mom |
Token ID | 1995 |
Feature activation | +4.27e-03 |
Pos logit contributions | |
ages | +0.085 |
ained | +0.084 |
zag | +0.082 |
ine | +0.077 |
live | +0.077 |
Neg logit contributions | |
me | -0.036 |
en | -0.034 |
alone | -0.034 |
cheer | -0.033 |
I | -0.032 |
Token | . |
Token ID | 13 |
Feature activation | +1.38e-02 |
Pos logit contributions | |
ages | +0.007 |
zag | +0.007 |
tr | +0.007 |
ained | +0.007 |
ometer | +0.007 |
Neg logit contributions | |
ivated | -0.005 |
unatt | -0.005 |
ternally | -0.005 |
en | -0.005 |
My | -0.005 |
Token | They |
Token ID | 1119 |
Feature activation | +1.16e-02 |
Pos logit contributions | |
zag | +0.022 |
ages | +0.022 |
ained | +0.020 |
tr | +0.019 |
ccoli | +0.019 |
Neg logit contributions | |
My | -0.019 |
I | -0.019 |
en | -0.019 |
ivated | -0.018 |
alone | -0.018 |
Token | learned |
Token ID | 4499 |
Feature activation | +4.71e-02 |
Pos logit contributions | |
zag | +0.022 |
ages | +0.021 |
ained | +0.020 |
tr | +0.019 |
ometer | +0.019 |
Neg logit contributions | |
en | -0.013 |
unatt | -0.013 |
ivated | -0.013 |
ternally | -0.013 |
alone | -0.012 |
Token | a |
Token ID | 257 |
Feature activation | +6.44e-02 |
Pos logit contributions | |
zag | +0.101 |
ages | +0.094 |
ops | +0.094 |
ained | +0.092 |
live | +0.091 |
Neg logit contributions | |
back | -0.047 |
I | -0.044 |
me | -0.043 |
en | -0.042 |
My | -0.041 |
Token | hard |
Token ID | 1327 |
Feature activation | +1.03e-01 |
Pos logit contributions | |
ained | +0.091 |
packs | +0.089 |
ages | +0.088 |
aked | +0.087 |
tails | +0.083 |
Neg logit contributions | |
back | -0.098 |
en | -0.096 |
I | -0.096 |
calm | -0.094 |
else | -0.094 |
Token | lesson |
Token ID | 11483 |
Feature activation | +4.00e-02 |
Pos logit contributions | |
aj | +0.167 |
ends | +0.164 |
tails | +0.163 |
igi | +0.159 |
packs | +0.158 |
Neg logit contributions | |
tune | -0.111 |
corner | -0.109 |
back | -0.108 |
voice | -0.107 |
show | -0.106 |
Token | . |
Token ID | 13 |
Feature activation | +2.33e-02 |
Pos logit contributions | |
ages | +0.090 |
zag | +0.090 |
ccoli | +0.086 |
tr | +0.086 |
tails | +0.085 |
Neg logit contributions | |
I | -0.030 |
alone | -0.030 |
unatt | -0.027 |
back | -0.025 |
My | -0.024 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | +9.24e-03 |
Pos logit contributions | |
zag | +0.037 |
ages | +0.035 |
tr | +0.031 |
tails | +0.031 |
ahs | +0.031 |
Neg logit contributions | |
I | -0.036 |
My | -0.036 |
en | -0.034 |
alone | -0.032 |
Int | -0.032 |
Token | It |
Token ID | 1026 |
Feature activation | +2.08e-02 |
Pos logit contributions | |
ages | +0.011 |
zag | +0.011 |
tr | +0.010 |
ained | +0.009 |
ometer | +0.009 |
Neg logit contributions | |
My | -0.016 |
unatt | -0.016 |
ivated | -0.016 |
en | -0.016 |
ternally | -0.016 |
Token | was |
Token ID | 373 |
Feature activation | +4.61e-02 |
Pos logit contributions | |
ages | +0.039 |
zag | +0.037 |
ained | +0.035 |
tr | +0.035 |
ometer | +0.034 |
Neg logit contributions | |
en | -0.022 |
My | -0.022 |
I | -0.022 |
alone | -0.022 |
me | -0.021 |
Token | bulb |
Token ID | 28287 |
Feature activation | +3.09e-03 |
Pos logit contributions | |
ivated | +0.001 |
ternally | +0.001 |
unatt | +0.001 |
My | +0.001 |
en | +0.001 |
Neg logit contributions | |
zag | -0.001 |
ages | -0.001 |
tr | -0.001 |
ained | -0.001 |
ometer | -0.001 |
Token | . |
Token ID | 13 |
Feature activation | +1.62e-02 |
Pos logit contributions | |
ages | +0.005 |
zag | +0.005 |
tr | +0.005 |
ained | +0.005 |
ometer | +0.005 |
Neg logit contributions | |
ivated | -0.004 |
unatt | -0.004 |
ternally | -0.004 |
en | -0.004 |
My | -0.004 |
Token | They |
Token ID | 1119 |
Feature activation | +6.98e-03 |
Pos logit contributions | |
zag | +0.024 |
ages | +0.023 |
tr | +0.021 |
ained | +0.020 |
ccoli | +0.020 |
Neg logit contributions | |
en | -0.025 |
My | -0.025 |
I | -0.025 |
Int | -0.024 |
ivated | -0.024 |
Token | learned |
Token ID | 4499 |
Feature activation | +4.18e-02 |
Pos logit contributions | |
zag | +0.013 |
ages | +0.013 |
ained | +0.012 |
tr | +0.012 |
ometer | +0.012 |
Neg logit contributions | |
en | -0.007 |
ivated | -0.007 |
unatt | -0.007 |
ternally | -0.007 |
My | -0.007 |
Token | a |
Token ID | 257 |
Feature activation | +7.28e-02 |
Pos logit contributions | |
zag | +0.091 |
ops | +0.086 |
ages | +0.085 |
ahs | +0.083 |
ccoli | +0.082 |
Neg logit contributions | |
back | -0.037 |
en | -0.035 |
I | -0.035 |
My | -0.034 |
alone | -0.034 |
Token | hard |
Token ID | 1327 |
Feature activation | +1.02e-01 |
Pos logit contributions | |
packs | +0.107 |
aj | +0.100 |
ates | +0.097 |
ained | +0.097 |
ops | +0.094 |
Neg logit contributions | |
en | -0.110 |
I | -0.107 |
back | -0.107 |
My | -0.103 |
else | -0.103 |
Token | lesson |
Token ID | 11483 |
Feature activation | +4.70e-02 |
Pos logit contributions | |
aj | +0.167 |
packs | +0.158 |
uth | +0.143 |
omo | +0.139 |
tails | +0.137 |
Neg logit contributions | |
back | -0.133 |
alone | -0.133 |
's | -0.128 |
me | -0.126 |
fright | -0.125 |
Token | . |
Token ID | 13 |
Feature activation | +2.54e-02 |
Pos logit contributions | |
ages | +0.110 |
zag | +0.109 |
ccoli | +0.109 |
aj | +0.108 |
tr | +0.105 |
Neg logit contributions | |
alone | -0.033 |
I | -0.030 |
back | -0.026 |
unatt | -0.026 |
else | -0.024 |
Token | The |
Token ID | 383 |
Feature activation | +3.16e-02 |
Pos logit contributions | |
zag | +0.042 |
ages | +0.040 |
ahs | +0.036 |
tr | +0.036 |
tails | +0.035 |
Neg logit contributions | |
I | -0.037 |
My | -0.037 |
en | -0.036 |
alone | -0.033 |
Int | -0.033 |
Token | fox |
Token ID | 21831 |
Feature activation | -8.02e-03 |
Pos logit contributions | |
ages | +0.068 |
zag | +0.065 |
packs | +0.062 |
ained | +0.060 |
ccoli | +0.059 |
Neg logit contributions | |
me | -0.034 |
en | -0.034 |
alone | -0.033 |
I | -0.029 |
nearby | -0.029 |
Token | was |
Token ID | 373 |
Feature activation | -2.28e-03 |
Pos logit contributions | |
ivated | +0.012 |
My | +0.011 |
ternally | +0.011 |
unatt | +0.011 |
en | +0.011 |
Neg logit contributions | |
ages | -0.012 |
zag | -0.012 |
tr | -0.011 |
ained | -0.011 |
ccoli | -0.010 |
Token | day |
Token ID | 1110 |
Feature activation | -3.03e-03 |
Pos logit contributions | |
zag | +0.022 |
ages | +0.022 |
ained | +0.021 |
tr | +0.020 |
tails | +0.020 |
Neg logit contributions | |
unatt | -0.026 |
en | -0.026 |
alone | -0.025 |
I | -0.025 |
My | -0.025 |
Token | . |
Token ID | 13 |
Feature activation | +2.02e-02 |
Pos logit contributions | |
ternally | +0.004 |
ivated | +0.004 |
My | +0.004 |
en | +0.004 |
unatt | +0.004 |
Neg logit contributions | |
ages | -0.005 |
zag | -0.005 |
tr | -0.005 |
ained | -0.005 |
ometer | -0.004 |
Token | They |
Token ID | 1119 |
Feature activation | +1.56e-02 |
Pos logit contributions | |
zag | +0.037 |
ages | +0.037 |
tails | +0.033 |
ained | +0.033 |
tr | +0.032 |
Neg logit contributions | |
I | -0.025 |
My | -0.024 |
en | -0.024 |
Int | -0.022 |
alone | -0.022 |
Token | learned |
Token ID | 4499 |
Feature activation | +4.00e-02 |
Pos logit contributions | |
zag | +0.030 |
ages | +0.030 |
ained | +0.028 |
tr | +0.027 |
ometer | +0.027 |
Neg logit contributions | |
en | -0.016 |
unatt | -0.016 |
alone | -0.016 |
ivated | -0.016 |
ternally | -0.015 |
Token | a |
Token ID | 257 |
Feature activation | +6.23e-02 |
Pos logit contributions | |
zag | +0.087 |
ages | +0.083 |
ops | +0.081 |
ained | +0.079 |
tails | +0.079 |
Neg logit contributions | |
back | -0.035 |
I | -0.034 |
en | -0.033 |
My | -0.033 |
me | -0.032 |
Token | hard |
Token ID | 1327 |
Feature activation | +1.02e-01 |
Pos logit contributions | |
packs | +0.090 |
ages | +0.087 |
aked | +0.087 |
ained | +0.085 |
ment | +0.080 |
Neg logit contributions | |
en | -0.096 |
back | -0.094 |
I | -0.094 |
else | -0.091 |
calm | -0.091 |
Token | lesson |
Token ID | 11483 |
Feature activation | +4.99e-02 |
Pos logit contributions | |
aj | +0.161 |
packs | +0.157 |
igi | +0.157 |
tails | +0.153 |
ends | +0.151 |
Neg logit contributions | |
tune | -0.121 |
corner | -0.116 |
's | -0.112 |
me | -0.111 |
back | -0.110 |
Token | . |
Token ID | 13 |
Feature activation | +2.82e-02 |
Pos logit contributions | |
ages | +0.114 |
zag | +0.113 |
ccoli | +0.113 |
tr | +0.109 |
uffy | +0.109 |
Neg logit contributions | |
I | -0.036 |
alone | -0.035 |
unatt | -0.030 |
back | -0.029 |
me | -0.027 |
Token | Fire |
Token ID | 3764 |
Feature activation | +3.91e-02 |
Pos logit contributions | |
zag | +0.049 |
ages | +0.046 |
tails | +0.042 |
ahs | +0.042 |
ies | +0.041 |
Neg logit contributions | |
I | -0.040 |
My | -0.039 |
en | -0.037 |
Int | -0.035 |
My | -0.034 |
Token | is |
Token ID | 318 |
Feature activation | +4.24e-02 |
Pos logit contributions | |
ages | +0.063 |
zag | +0.061 |
tails | +0.059 |
ine | +0.057 |
ained | +0.057 |
Neg logit contributions | |
I | -0.048 |
alone | -0.047 |
unatt | -0.046 |
ternally | -0.046 |
me | -0.045 |
Token | not |
Token ID | 407 |
Feature activation | +3.53e-02 |
Pos logit contributions | |
zag | +0.079 |
ages | +0.073 |
ops | +0.071 |
tails | +0.070 |
ometer | +0.068 |
Neg logit contributions | |
en | -0.057 |
alone | -0.056 |
I | -0.055 |
back | -0.054 |
me | -0.054 |
Token | bs |
Token ID | 1443 |
Feature activation | -1.14e-02 |
Pos logit contributions | |
zag | +0.022 |
ages | +0.021 |
ained | +0.021 |
tr | +0.020 |
tails | +0.020 |
Neg logit contributions | |
unatt | -0.017 |
ternally | -0.017 |
en | -0.017 |
ivated | -0.017 |
My | -0.017 |
Token | . |
Token ID | 13 |
Feature activation | +2.07e-02 |
Pos logit contributions | |
ivated | +0.016 |
ternally | +0.016 |
en | +0.015 |
My | +0.015 |
Int | +0.015 |
Neg logit contributions | |
zag | -0.018 |
ages | -0.018 |
tr | -0.016 |
ained | -0.016 |
ometer | -0.016 |
Token | They |
Token ID | 1119 |
Feature activation | +1.17e-02 |
Pos logit contributions | |
zag | +0.034 |
ages | +0.032 |
tails | +0.029 |
tr | +0.029 |
ained | +0.029 |
Neg logit contributions | |
My | -0.029 |
I | -0.029 |
en | -0.029 |
Int | -0.027 |
alone | -0.027 |
Token | learned |
Token ID | 4499 |
Feature activation | +4.24e-02 |
Pos logit contributions | |
zag | +0.023 |
ages | +0.022 |
ained | +0.021 |
tr | +0.021 |
ometer | +0.020 |
Neg logit contributions | |
en | -0.012 |
unatt | -0.012 |
ivated | -0.012 |
alone | -0.012 |
ternally | -0.012 |
Token | a |
Token ID | 257 |
Feature activation | +6.87e-02 |
Pos logit contributions | |
zag | +0.089 |
ops | +0.086 |
ages | +0.085 |
ccoli | +0.084 |
ahs | +0.081 |
Neg logit contributions | |
back | -0.040 |
en | -0.038 |
I | -0.037 |
My | -0.036 |
alone | -0.035 |
Token | hard |
Token ID | 1327 |
Feature activation | +1.01e-01 |
Pos logit contributions | |
packs | +0.105 |
ained | +0.096 |
ages | +0.095 |
aj | +0.095 |
aked | +0.095 |
Neg logit contributions | |
en | -0.104 |
back | -0.099 |
else | -0.098 |
I | -0.095 |
calm | -0.093 |
Token | lesson |
Token ID | 11483 |
Feature activation | +5.20e-02 |
Pos logit contributions | |
aj | +0.170 |
ccoli | +0.156 |
packs | +0.155 |
tails | +0.149 |
akers | +0.144 |
Neg logit contributions | |
back | -0.121 |
alone | -0.119 |
tune | -0.119 |
corner | -0.116 |
en | -0.116 |
Token | . |
Token ID | 13 |
Feature activation | +2.66e-02 |
Pos logit contributions | |
ccoli | +0.124 |
ages | +0.123 |
zag | +0.121 |
aj | +0.121 |
tr | +0.119 |
Neg logit contributions | |
alone | -0.034 |
I | -0.032 |
back | -0.027 |
unatt | -0.026 |
else | -0.025 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | +7.33e-03 |
Pos logit contributions | |
zag | +0.045 |
ages | +0.043 |
tr | +0.039 |
ahs | +0.039 |
tails | +0.039 |
Neg logit contributions | |
I | -0.037 |
My | -0.037 |
en | -0.035 |
alone | -0.033 |
Int | -0.033 |
Token | L |
Token ID | 43 |
Feature activation | +3.38e-03 |
Pos logit contributions | |
ages | +0.008 |
zag | +0.008 |
tr | +0.007 |
ained | +0.007 |
ometer | +0.007 |
Neg logit contributions | |
ivated | -0.013 |
My | -0.013 |
unatt | -0.013 |
en | -0.013 |
ternally | -0.013 |
Token | ily |
Token ID | 813 |
Feature activation | -1.05e-03 |
Pos logit contributions | |
ages | +0.006 |
zag | +0.006 |
tr | +0.006 |
ained | +0.006 |
ometer | +0.005 |
Neg logit contributions | |
ivated | -0.004 |
ternally | -0.004 |
unatt | -0.004 |
My | -0.004 |
en | -0.004 |
Token | honest |
Token ID | 5508 |
Feature activation | +5.98e-03 |
Pos logit contributions | |
ivated | +0.034 |
ternally | +0.033 |
Int | +0.033 |
My | +0.033 |
I | +0.030 |
Neg logit contributions | |
ages | -0.038 |
ained | -0.033 |
zag | -0.033 |
ends | -0.032 |
ies | -0.031 |
Token | . |
Token ID | 13 |
Feature activation | +2.36e-02 |
Pos logit contributions | |
zag | +0.010 |
ages | +0.010 |
tr | +0.010 |
ained | +0.010 |
ometer | +0.009 |
Neg logit contributions | |
unatt | -0.007 |
ivated | -0.007 |
en | -0.007 |
ternally | -0.007 |
My | -0.007 |
Token | They |
Token ID | 1119 |
Feature activation | +1.97e-02 |
Pos logit contributions | |
zag | +0.038 |
ages | +0.036 |
tails | +0.032 |
tr | +0.032 |
ained | +0.032 |
Neg logit contributions | |
My | -0.036 |
I | -0.036 |
en | -0.035 |
Int | -0.032 |
alone | -0.032 |
Token | learned |
Token ID | 4499 |
Feature activation | +5.04e-02 |
Pos logit contributions | |
zag | +0.039 |
ages | +0.039 |
ained | +0.036 |
ometer | +0.036 |
ccoli | +0.035 |
Neg logit contributions | |
en | -0.020 |
unatt | -0.019 |
alone | -0.019 |
ivated | -0.019 |
ternally | -0.019 |
Token | a |
Token ID | 257 |
Feature activation | +6.80e-02 |
Pos logit contributions | |
zag | +0.114 |
ops | +0.112 |
ages | +0.112 |
live | +0.108 |
ccoli | +0.108 |
Neg logit contributions | |
back | -0.040 |
en | -0.035 |
I | -0.035 |
so | -0.035 |
My | -0.034 |
Token | hard |
Token ID | 1327 |
Feature activation | +1.01e-01 |
Pos logit contributions | |
packs | +0.103 |
aked | +0.097 |
ained | +0.097 |
ages | +0.095 |
tails | +0.091 |
Neg logit contributions | |
en | -0.105 |
else | -0.100 |
I | -0.098 |
back | -0.097 |
My | -0.095 |
Token | lesson |
Token ID | 11483 |
Feature activation | +5.58e-02 |
Pos logit contributions | |
aj | +0.158 |
tails | +0.155 |
ccoli | +0.155 |
packs | +0.148 |
igi | +0.146 |
Neg logit contributions | |
tune | -0.122 |
back | -0.117 |
corner | -0.117 |
alone | -0.116 |
show | -0.113 |
Token | . |
Token ID | 13 |
Feature activation | +3.02e-02 |
Pos logit contributions | |
ccoli | +0.133 |
ages | +0.132 |
aj | +0.130 |
zag | +0.130 |
tr | +0.128 |
Neg logit contributions | |
alone | -0.036 |
I | -0.034 |
back | -0.027 |
unatt | -0.026 |
me | -0.024 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | +5.49e-03 |
Pos logit contributions | |
zag | +0.051 |
ages | +0.049 |
tails | +0.044 |
ahs | +0.044 |
tr | +0.044 |
Neg logit contributions | |
I | -0.045 |
My | -0.044 |
en | -0.042 |
alone | -0.039 |
Int | -0.038 |
Token | L |
Token ID | 43 |
Feature activation | +4.41e-03 |
Pos logit contributions | |
ages | +0.006 |
zag | +0.006 |
tr | +0.005 |
ained | +0.005 |
ometer | +0.005 |
Neg logit contributions | |
ivated | -0.010 |
unatt | -0.010 |
My | -0.010 |
ternally | -0.010 |
en | -0.010 |
Token | ily |
Token ID | 813 |
Feature activation | +3.79e-03 |
Pos logit contributions | |
ages | +0.007 |
zag | +0.007 |
tr | +0.007 |
ained | +0.007 |
ometer | +0.007 |
Neg logit contributions | |
ivated | -0.005 |
ternally | -0.005 |
unatt | -0.005 |
My | -0.005 |
en | -0.005 |
Token | mistake |
Token ID | 7457 |
Feature activation | -1.14e-03 |
Pos logit contributions | |
ages | +0.027 |
zag | +0.026 |
ained | +0.026 |
tr | +0.024 |
ccoli | +0.023 |
Neg logit contributions | |
en | -0.025 |
unatt | -0.025 |
ternally | -0.025 |
alone | -0.025 |
cheer | -0.025 |
Token | . |
Token ID | 13 |
Feature activation | -4.95e-04 |
Pos logit contributions | |
ivated | +0.001 |
ternally | +0.001 |
unatt | +0.001 |
My | +0.001 |
en | +0.001 |
Neg logit contributions | |
ages | -0.002 |
zag | -0.002 |
tr | -0.002 |
ained | -0.002 |
ometer | -0.002 |
Token | They |
Token ID | 1119 |
Feature activation | +1.22e-02 |
Pos logit contributions | |
ivated | +0.001 |
unatt | +0.001 |
ternally | +0.001 |
en | +0.001 |
My | +0.001 |
Neg logit contributions | |
ages | -0.001 |
zag | -0.001 |
tr | -0.001 |
ained | -0.001 |
ometer | -0.001 |
Token | learned |
Token ID | 4499 |
Feature activation | +4.66e-02 |
Pos logit contributions | |
zag | +0.022 |
ages | +0.022 |
ained | +0.021 |
tr | +0.020 |
ometer | +0.020 |
Neg logit contributions | |
en | -0.014 |
ternally | -0.014 |
unatt | -0.014 |
ivated | -0.014 |
alone | -0.013 |
Token | a |
Token ID | 257 |
Feature activation | +7.26e-02 |
Pos logit contributions | |
zag | +0.102 |
ops | +0.098 |
ages | +0.095 |
ained | +0.094 |
ccoli | +0.092 |
Neg logit contributions | |
back | -0.041 |
I | -0.038 |
so | -0.037 |
en | -0.037 |
My | -0.037 |
Token | hard |
Token ID | 1327 |
Feature activation | +1.01e-01 |
Pos logit contributions | |
packs | +0.106 |
ained | +0.099 |
ates | +0.098 |
aj | +0.097 |
ops | +0.094 |
Neg logit contributions | |
back | -0.112 |
en | -0.108 |
me | -0.107 |
I | -0.106 |
fright | -0.102 |
Token | lesson |
Token ID | 11483 |
Feature activation | +4.54e-02 |
Pos logit contributions | |
aj | +0.168 |
packs | +0.152 |
ccoli | +0.144 |
ends | +0.140 |
akers | +0.137 |
Neg logit contributions | |
back | -0.131 |
alone | -0.124 |
fright | -0.122 |
corner | -0.122 |
tune | -0.121 |
Token | . |
Token ID | 13 |
Feature activation | +1.45e-02 |
Pos logit contributions | |
ages | +0.102 |
ccoli | +0.101 |
zag | +0.100 |
tr | +0.100 |
aj | +0.098 |
Neg logit contributions | |
alone | -0.035 |
I | -0.033 |
unatt | -0.029 |
back | -0.029 |
nowhere | -0.027 |
Token | They |
Token ID | 1119 |
Feature activation | +2.42e-02 |
Pos logit contributions | |
zag | +0.023 |
ages | +0.023 |
tr | +0.021 |
ained | +0.020 |
tails | +0.020 |
Neg logit contributions | |
My | -0.020 |
I | -0.020 |
en | -0.020 |
ivated | -0.019 |
Int | -0.019 |
Token | should |
Token ID | 815 |
Feature activation | +3.89e-02 |
Pos logit contributions | |
zag | +0.045 |
ages | +0.043 |
ained | +0.042 |
ometer | +0.040 |
ccoli | +0.040 |
Neg logit contributions | |
en | -0.027 |
alone | -0.026 |
ternally | -0.026 |
unatt | -0.026 |
I | -0.026 |
Token | never |
Token ID | 1239 |
Feature activation | +5.06e-03 |
Pos logit contributions | |
ages | +0.067 |
zag | +0.065 |
ained | +0.063 |
ometer | +0.063 |
tr | +0.062 |
Neg logit contributions | |
I | -0.052 |
me | -0.050 |
cheer | -0.047 |
alone | -0.046 |
en | -0.046 |
Token | animals |
Token ID | 4695 |
Feature activation | -1.54e-02 |
Pos logit contributions | |
ages | +0.027 |
zag | +0.026 |
ained | +0.025 |
ccoli | +0.024 |
tr | +0.024 |
Neg logit contributions | |
en | -0.025 |
unatt | -0.024 |
My | -0.023 |
me | -0.023 |
alone | -0.023 |
Token | . |
Token ID | 13 |
Feature activation | +1.35e-02 |
Pos logit contributions | |
ternally | +0.022 |
ivated | +0.021 |
My | +0.021 |
en | +0.021 |
Int | +0.021 |
Neg logit contributions | |
ages | -0.024 |
zag | -0.023 |
tr | -0.021 |
ained | -0.021 |
ometer | -0.020 |
Token | She |
Token ID | 1375 |
Feature activation | +5.29e-03 |
Pos logit contributions | |
zag | +0.028 |
ages | +0.028 |
tr | +0.025 |
ained | +0.025 |
tails | +0.025 |
Neg logit contributions | |
My | -0.012 |
I | -0.012 |
en | -0.012 |
ivated | -0.011 |
alone | -0.011 |
Token | learned |
Token ID | 4499 |
Feature activation | +3.00e-02 |
Pos logit contributions | |
zag | +0.010 |
ages | +0.010 |
ained | +0.009 |
tr | +0.009 |
ometer | +0.009 |
Neg logit contributions | |
en | -0.006 |
unatt | -0.005 |
ivated | -0.005 |
ternally | -0.005 |
My | -0.005 |
Token | a |
Token ID | 257 |
Feature activation | +6.69e-02 |
Pos logit contributions | |
zag | +0.063 |
ages | +0.061 |
ccoli | +0.058 |
ained | +0.057 |
ometer | +0.056 |
Neg logit contributions | |
en | -0.029 |
My | -0.028 |
I | -0.028 |
me | -0.028 |
alone | -0.027 |
Token | hard |
Token ID | 1327 |
Feature activation | +1.00e-01 |
Pos logit contributions | |
packs | +0.100 |
aj | +0.095 |
ages | +0.094 |
ained | +0.093 |
aked | +0.091 |
Neg logit contributions | |
en | -0.105 |
I | -0.098 |
My | -0.095 |
back | -0.095 |
else | -0.094 |
Token | lesson |
Token ID | 11483 |
Feature activation | +4.61e-02 |
Pos logit contributions | |
aj | +0.176 |
packs | +0.163 |
omo | +0.151 |
girls | +0.150 |
ccoli | +0.150 |
Neg logit contributions | |
tune | -0.119 |
alone | -0.116 |
back | -0.116 |
corner | -0.114 |
voice | -0.113 |
Token | . |
Token ID | 13 |
Feature activation | +2.20e-02 |
Pos logit contributions | |
ages | +0.106 |
ccoli | +0.105 |
zag | +0.105 |
aj | +0.101 |
tr | +0.100 |
Neg logit contributions | |
alone | -0.037 |
I | -0.036 |
unatt | -0.031 |
me | -0.031 |
back | -0.029 |
Token | She |
Token ID | 1375 |
Feature activation | +2.22e-02 |
Pos logit contributions | |
zag | +0.048 |
ages | +0.047 |
tr | +0.043 |
tails | +0.042 |
ahs | +0.042 |
Neg logit contributions | |
I | -0.020 |
My | -0.020 |
en | -0.019 |
alone | -0.018 |
Int | -0.017 |
Token | was |
Token ID | 373 |
Feature activation | -5.20e-03 |
Pos logit contributions | |
zag | +0.043 |
ages | +0.041 |
ained | +0.039 |
ometer | +0.038 |
tr | +0.038 |
Neg logit contributions | |
en | -0.024 |
I | -0.023 |
alone | -0.022 |
unatt | -0.022 |
My | -0.022 |
Token | not |
Token ID | 407 |
Feature activation | +1.96e-02 |
Pos logit contributions | |
ternally | +0.007 |
ivated | +0.007 |
unatt | +0.007 |
My | +0.007 |
Int | +0.007 |
Neg logit contributions | |
ages | -0.008 |
zag | -0.008 |
ained | -0.007 |
tr | -0.007 |
ccoli | -0.007 |
Token | Tom |
Token ID | 4186 |
Feature activation | +2.12e-03 |
Pos logit contributions | |
zag | +0.031 |
ages | +0.030 |
tr | +0.027 |
tails | +0.027 |
ained | +0.026 |
Neg logit contributions | |
My | -0.026 |
en | -0.026 |
I | -0.026 |
ivated | -0.024 |
Int | -0.024 |
Token | and |
Token ID | 290 |
Feature activation | +9.42e-03 |
Pos logit contributions | |
ages | +0.003 |
zag | +0.003 |
tr | +0.003 |
ained | +0.003 |
ometer | +0.003 |
Neg logit contributions | |
ivated | -0.003 |
unatt | -0.003 |
ternally | -0.003 |
en | -0.003 |
My | -0.003 |
Token | Lily |
Token ID | 20037 |
Feature activation | +7.96e-04 |
Pos logit contributions | |
ages | +0.020 |
zag | +0.020 |
ained | +0.019 |
tr | +0.019 |
ometer | +0.018 |
Neg logit contributions | |
en | -0.008 |
My | -0.008 |
unatt | -0.008 |
ivated | -0.008 |
alone | -0.008 |
Token | learned |
Token ID | 4499 |
Feature activation | +4.98e-02 |
Pos logit contributions | |
ages | +0.001 |
zag | +0.001 |
tr | +0.001 |
ained | +0.001 |
ometer | +0.001 |
Neg logit contributions | |
ivated | -0.001 |
unatt | -0.001 |
My | -0.001 |
ternally | -0.001 |
en | -0.001 |
Token | a |
Token ID | 257 |
Feature activation | +7.04e-02 |
Pos logit contributions | |
zag | +0.112 |
ops | +0.105 |
ccoli | +0.102 |
ages | +0.102 |
live | +0.102 |
Neg logit contributions | |
back | -0.044 |
en | -0.043 |
alone | -0.039 |
so | -0.039 |
me | -0.036 |
Token | hard |
Token ID | 1327 |
Feature activation | +1.00e-01 |
Pos logit contributions | |
packs | +0.105 |
ained | +0.099 |
aj | +0.098 |
aked | +0.096 |
live | +0.094 |
Neg logit contributions | |
en | -0.107 |
back | -0.099 |
else | -0.098 |
I | -0.097 |
My | -0.096 |
Token | lesson |
Token ID | 11483 |
Feature activation | +5.35e-02 |
Pos logit contributions | |
aj | +0.169 |
tails | +0.154 |
packs | +0.148 |
ends | +0.145 |
girls | +0.143 |
Neg logit contributions | |
back | -0.116 |
corner | -0.113 |
en | -0.112 |
tune | -0.112 |
alone | -0.111 |
Token | . |
Token ID | 13 |
Feature activation | +3.45e-02 |
Pos logit contributions | |
ccoli | +0.125 |
zag | +0.123 |
tr | +0.123 |
ages | +0.122 |
tails | +0.120 |
Neg logit contributions | |
alone | -0.037 |
I | -0.036 |
unatt | -0.030 |
back | -0.029 |
me | -0.027 |
Token | They |
Token ID | 1119 |
Feature activation | +3.30e-02 |
Pos logit contributions | |
zag | +0.057 |
ages | +0.054 |
w | +0.050 |
tails | +0.049 |
tr | +0.049 |
Neg logit contributions | |
I | -0.050 |
en | -0.049 |
My | -0.049 |
alone | -0.046 |
Int | -0.045 |
Token | never |
Token ID | 1239 |
Feature activation | +2.03e-02 |
Pos logit contributions | |
zag | +0.065 |
ages | +0.062 |
ained | +0.060 |
ometer | +0.058 |
ccoli | +0.058 |
Neg logit contributions | |
en | -0.035 |
alone | -0.033 |
I | -0.032 |
ternally | -0.032 |
cheer | -0.032 |
Token | climbed |
Token ID | 19952 |
Feature activation | +2.60e-02 |
Pos logit contributions | |
zag | +0.039 |
ages | +0.038 |
ained | +0.036 |
ometer | +0.035 |
tr | +0.035 |
Neg logit contributions | |
en | -0.021 |
alone | -0.021 |
unatt | -0.021 |
I | -0.021 |
My | -0.020 |
Token | okay |
Token ID | 8788 |
Feature activation | -7.51e-04 |
Pos logit contributions | |
ivated | +0.015 |
My | +0.015 |
ternally | +0.015 |
Int | +0.015 |
unatt | +0.014 |
Neg logit contributions | |
ages | -0.020 |
zag | -0.019 |
ained | -0.019 |
tr | -0.018 |
ends | -0.018 |
Token | . |
Token ID | 13 |
Feature activation | +3.00e-02 |
Pos logit contributions | |
ivated | +0.001 |
ternally | +0.001 |
unatt | +0.001 |
en | +0.001 |
My | +0.001 |
Neg logit contributions | |
ages | -0.001 |
zag | -0.001 |
tr | -0.001 |
ained | -0.001 |
ometer | -0.001 |
Token | They |
Token ID | 1119 |
Feature activation | +1.69e-02 |
Pos logit contributions | |
zag | +0.059 |
ages | +0.056 |
ogo | +0.051 |
ahs | +0.051 |
tails | +0.051 |
Neg logit contributions | |
I | -0.036 |
My | -0.035 |
en | -0.033 |
Int | -0.032 |
My | -0.031 |
Token | learned |
Token ID | 4499 |
Feature activation | +4.09e-02 |
Pos logit contributions | |
zag | +0.032 |
ages | +0.031 |
ained | +0.029 |
tr | +0.029 |
ometer | +0.028 |
Neg logit contributions | |
en | -0.018 |
unatt | -0.018 |
alone | -0.018 |
ternally | -0.018 |
ivated | -0.018 |
Token | a |
Token ID | 257 |
Feature activation | +6.49e-02 |
Pos logit contributions | |
zag | +0.087 |
ages | +0.081 |
ops | +0.081 |
ained | +0.080 |
live | +0.078 |
Neg logit contributions | |
back | -0.041 |
en | -0.037 |
I | -0.037 |
My | -0.036 |
me | -0.036 |
Token | hard |
Token ID | 1327 |
Feature activation | +9.99e-02 |
Pos logit contributions | |
packs | +0.094 |
ained | +0.092 |
aked | +0.090 |
aj | +0.087 |
ages | +0.087 |
Neg logit contributions | |
en | -0.100 |
back | -0.098 |
I | -0.096 |
else | -0.096 |
My | -0.093 |
Token | lesson |
Token ID | 11483 |
Feature activation | +4.36e-02 |
Pos logit contributions | |
aj | +0.171 |
packs | +0.160 |
igi | +0.154 |
akers | +0.148 |
uth | +0.147 |
Neg logit contributions | |
tune | -0.121 |
's | -0.115 |
cheer | -0.113 |
voice | -0.113 |
back | -0.112 |
Token | . |
Token ID | 13 |
Feature activation | +3.09e-02 |
Pos logit contributions | |
ages | +0.100 |
zag | +0.100 |
ccoli | +0.096 |
tr | +0.095 |
tails | +0.095 |
Neg logit contributions | |
alone | -0.031 |
I | -0.030 |
unatt | -0.028 |
back | -0.027 |
else | -0.025 |
Token | They |
Token ID | 1119 |
Feature activation | +2.74e-02 |
Pos logit contributions | |
zag | +0.055 |
ages | +0.052 |
ahs | +0.047 |
tails | +0.047 |
ogo | +0.046 |
Neg logit contributions | |
I | -0.043 |
My | -0.042 |
en | -0.040 |
Int | -0.037 |
My | -0.037 |
Token | were |
Token ID | 547 |
Feature activation | +4.96e-03 |
Pos logit contributions | |
zag | +0.052 |
ages | +0.050 |
ained | +0.048 |
ometer | +0.047 |
ccoli | +0.046 |
Neg logit contributions | |
en | -0.029 |
alone | -0.028 |
unatt | -0.028 |
ternally | -0.028 |
I | -0.027 |
Token | poor |
Token ID | 3595 |
Feature activation | +1.42e-02 |
Pos logit contributions | |
zag | +0.008 |
ages | +0.008 |
tr | +0.007 |
ained | +0.007 |
ometer | +0.007 |
Neg logit contributions | |
en | -0.007 |
unatt | -0.007 |
ivated | -0.007 |
ternally | -0.007 |
My | -0.007 |
Token | cookies |
Token ID | 14746 |
Feature activation | +7.15e-03 |
Pos logit contributions | |
ivated | +0.004 |
ternally | +0.004 |
My | +0.004 |
unatt | +0.004 |
en | +0.004 |
Neg logit contributions | |
ages | -0.004 |
zag | -0.004 |
tr | -0.003 |
ained | -0.003 |
ometer | -0.003 |
Token | . |
Token ID | 13 |
Feature activation | +2.19e-02 |
Pos logit contributions | |
ages | +0.012 |
zag | +0.012 |
ained | +0.011 |
tr | +0.011 |
ometer | +0.011 |
Neg logit contributions | |
unatt | -0.009 |
ivated | -0.008 |
en | -0.008 |
ternally | -0.008 |
My | -0.008 |
Token | They |
Token ID | 1119 |
Feature activation | +1.01e-02 |
Pos logit contributions | |
zag | +0.037 |
ages | +0.035 |
tails | +0.032 |
ogo | +0.031 |
ahs | +0.031 |
Neg logit contributions | |
I | -0.032 |
My | -0.032 |
en | -0.031 |
Int | -0.029 |
My | -0.028 |
Token | learned |
Token ID | 4499 |
Feature activation | +4.39e-02 |
Pos logit contributions | |
zag | +0.019 |
ages | +0.019 |
ained | +0.017 |
tr | +0.017 |
ometer | +0.017 |
Neg logit contributions | |
en | -0.011 |
unatt | -0.011 |
ivated | -0.011 |
ternally | -0.011 |
My | -0.010 |
Token | a |
Token ID | 257 |
Feature activation | +6.95e-02 |
Pos logit contributions | |
zag | +0.094 |
ages | +0.088 |
ops | +0.088 |
tails | +0.086 |
ained | +0.085 |
Neg logit contributions | |
back | -0.042 |
I | -0.039 |
My | -0.038 |
en | -0.038 |
me | -0.038 |
Token | hard |
Token ID | 1327 |
Feature activation | +9.97e-02 |
Pos logit contributions | |
packs | +0.101 |
ained | +0.099 |
aked | +0.098 |
ages | +0.097 |
ates | +0.093 |
Neg logit contributions | |
en | -0.103 |
else | -0.103 |
I | -0.100 |
calm | -0.100 |
back | -0.099 |
Token | lesson |
Token ID | 11483 |
Feature activation | +4.58e-02 |
Pos logit contributions | |
aj | +0.157 |
tails | +0.153 |
packs | +0.149 |
ccoli | +0.148 |
uth | +0.145 |
Neg logit contributions | |
corner | -0.116 |
back | -0.115 |
tune | -0.115 |
alone | -0.114 |
's | -0.110 |
Token | . |
Token ID | 13 |
Feature activation | +2.72e-02 |
Pos logit contributions | |
zag | +0.110 |
ages | +0.109 |
ccoli | +0.108 |
tr | +0.104 |
uffy | +0.104 |
Neg logit contributions | |
alone | -0.028 |
I | -0.028 |
unatt | -0.024 |
back | -0.023 |
else | -0.022 |
Token | They |
Token ID | 1119 |
Feature activation | +2.48e-02 |
Pos logit contributions | |
zag | +0.046 |
ages | +0.042 |
tails | +0.039 |
ahs | +0.039 |
ogo | +0.038 |
Neg logit contributions | |
I | -0.041 |
My | -0.040 |
en | -0.038 |
Int | -0.035 |
My | -0.035 |
Token | had |
Token ID | 550 |
Feature activation | +8.52e-03 |
Pos logit contributions | |
zag | +0.047 |
ages | +0.045 |
ained | +0.043 |
ccoli | +0.042 |
ometer | +0.042 |
Neg logit contributions | |
en | -0.027 |
unatt | -0.026 |
alone | -0.026 |
I | -0.026 |
ternally | -0.025 |
Token | a |
Token ID | 257 |
Feature activation | +3.34e-02 |
Pos logit contributions | |
zag | +0.016 |
ages | +0.015 |
tr | +0.014 |
ained | +0.014 |
ometer | +0.014 |
Neg logit contributions | |
en | -0.009 |
unatt | -0.009 |
My | -0.009 |
ternally | -0.009 |
ivated | -0.009 |
Token | toys |
Token ID | 14958 |
Feature activation | -1.64e-02 |
Pos logit contributions | |
ivated | +0.002 |
My | +0.002 |
ternally | +0.002 |
unatt | +0.002 |
en | +0.002 |
Neg logit contributions | |
ages | -0.002 |
zag | -0.002 |
tr | -0.001 |
ained | -0.001 |
ometer | -0.001 |
Token | . |
Token ID | 13 |
Feature activation | +2.42e-02 |
Pos logit contributions | |
My | +0.021 |
ivated | +0.021 |
ternally | +0.021 |
Int | +0.020 |
en | +0.020 |
Neg logit contributions | |
zag | -0.027 |
ages | -0.027 |
tr | -0.025 |
ained | -0.025 |
ometer | -0.024 |
Token | They |
Token ID | 1119 |
Feature activation | +1.29e-02 |
Pos logit contributions | |
zag | +0.039 |
ages | +0.038 |
ogo | +0.033 |
tr | +0.032 |
ahs | +0.032 |
Neg logit contributions | |
I | -0.036 |
en | -0.036 |
My | -0.036 |
Int | -0.034 |
alone | -0.033 |
Token | learned |
Token ID | 4499 |
Feature activation | +4.52e-02 |
Pos logit contributions | |
zag | +0.025 |
ages | +0.024 |
ained | +0.023 |
tr | +0.022 |
ometer | +0.022 |
Neg logit contributions | |
en | -0.014 |
unatt | -0.013 |
ivated | -0.013 |
ternally | -0.013 |
alone | -0.013 |
Token | a |
Token ID | 257 |
Feature activation | +6.63e-02 |
Pos logit contributions | |
zag | +0.099 |
ages | +0.094 |
ops | +0.094 |
ccoli | +0.090 |
ained | +0.090 |
Neg logit contributions | |
back | -0.040 |
en | -0.038 |
I | -0.037 |
me | -0.037 |
so | -0.036 |
Token | hard |
Token ID | 1327 |
Feature activation | +9.96e-02 |
Pos logit contributions | |
packs | +0.095 |
ained | +0.090 |
ages | +0.088 |
aked | +0.088 |
aj | +0.086 |
Neg logit contributions | |
en | -0.103 |
back | -0.100 |
I | -0.098 |
cheer | -0.097 |
My | -0.096 |
Token | lesson |
Token ID | 11483 |
Feature activation | +5.15e-02 |
Pos logit contributions | |
aj | +0.166 |
igi | +0.154 |
packs | +0.153 |
uth | +0.150 |
akers | +0.147 |
Neg logit contributions | |
cheer | -0.122 |
tune | -0.121 |
corner | -0.121 |
back | -0.119 |
voice | -0.116 |
Token | : |
Token ID | 25 |
Feature activation | +4.06e-02 |
Pos logit contributions | |
ages | +0.121 |
ccoli | +0.119 |
zag | +0.119 |
aj | +0.117 |
tr | +0.116 |
Neg logit contributions | |
alone | -0.035 |
I | -0.033 |
back | -0.029 |
unatt | -0.028 |
me | -0.025 |
Token | lying |
Token ID | 9105 |
Feature activation | +4.53e-03 |
Pos logit contributions | |
zag | +0.079 |
ages | +0.073 |
ogo | +0.071 |
tr | +0.071 |
live | +0.070 |
Neg logit contributions | |
I | -0.051 |
en | -0.048 |
My | -0.047 |
alone | -0.042 |
me | -0.041 |
Token | is |
Token ID | 318 |
Feature activation | +5.20e-02 |
Pos logit contributions | |
ages | +0.007 |
zag | +0.007 |
tr | +0.006 |
ained | +0.006 |
ometer | +0.006 |
Neg logit contributions | |
unatt | -0.006 |
ivated | -0.006 |
ternally | -0.006 |
en | -0.006 |
My | -0.006 |
Token | bad |
Token ID | 2089 |
Feature activation | +2.48e-02 |
Pos logit contributions | |
zag | +0.102 |
ages | +0.090 |
ops | +0.090 |
tails | +0.088 |
ira | +0.087 |
Neg logit contributions | |
en | -0.066 |
back | -0.065 |
alone | -0.065 |
me | -0.063 |
I | -0.062 |
Token | day |
Token ID | 1110 |
Feature activation | +5.16e-03 |
Pos logit contributions | |
ained | +0.062 |
ages | +0.059 |
ends | +0.057 |
zag | +0.057 |
tails | +0.057 |
Neg logit contributions | |
alone | -0.066 |
back | -0.065 |
en | -0.064 |
I | -0.063 |
unatt | -0.062 |
Token | . |
Token ID | 13 |
Feature activation | +7.89e-03 |
Pos logit contributions | |
zag | +0.009 |
ages | +0.009 |
tr | +0.008 |
ained | +0.008 |
ometer | +0.008 |
Neg logit contributions | |
unatt | -0.006 |
ivated | -0.006 |
en | -0.006 |
My | -0.006 |
ternally | -0.006 |
Token | They |
Token ID | 1119 |
Feature activation | -7.27e-03 |
Pos logit contributions | |
zag | +0.012 |
ages | +0.012 |
ained | +0.010 |
tr | +0.010 |
tails | +0.010 |
Neg logit contributions | |
My | -0.012 |
en | -0.012 |
ivated | -0.011 |
I | -0.011 |
unatt | -0.011 |
Token | learned |
Token ID | 4499 |
Feature activation | +3.84e-02 |
Pos logit contributions | |
ivated | +0.010 |
ternally | +0.010 |
My | +0.010 |
unatt | +0.010 |
en | +0.010 |
Neg logit contributions | |
ages | -0.011 |
zag | -0.011 |
tr | -0.010 |
ained | -0.010 |
tails | -0.009 |
Token | a |
Token ID | 257 |
Feature activation | +6.85e-02 |
Pos logit contributions | |
zag | +0.079 |
ages | +0.076 |
ops | +0.075 |
ained | +0.074 |
ine | +0.073 |
Neg logit contributions | |
back | -0.037 |
I | -0.036 |
en | -0.035 |
My | -0.034 |
alone | -0.034 |
Token | hard |
Token ID | 1327 |
Feature activation | +9.93e-02 |
Pos logit contributions | |
packs | +0.098 |
ained | +0.092 |
ates | +0.089 |
ages | +0.089 |
aj | +0.089 |
Neg logit contributions | |
back | -0.110 |
en | -0.105 |
I | -0.102 |
else | -0.101 |
me | -0.099 |
Token | lesson |
Token ID | 11483 |
Feature activation | +3.77e-02 |
Pos logit contributions | |
aj | +0.166 |
packs | +0.145 |
akers | +0.139 |
ends | +0.138 |
igi | +0.138 |
Neg logit contributions | |
tune | -0.131 |
back | -0.126 |
fright | -0.121 |
's | -0.121 |
corner | -0.118 |
Token | . |
Token ID | 13 |
Feature activation | +1.78e-02 |
Pos logit contributions | |
zag | +0.084 |
ages | +0.084 |
ccoli | +0.082 |
tr | +0.080 |
aj | +0.079 |
Neg logit contributions | |
alone | -0.029 |
I | -0.027 |
unatt | -0.026 |
back | -0.024 |
My | -0.023 |
Token | They |
Token ID | 1119 |
Feature activation | +1.48e-02 |
Pos logit contributions | |
zag | +0.028 |
ages | +0.027 |
tr | +0.024 |
tails | +0.024 |
ained | +0.024 |
Neg logit contributions | |
I | -0.027 |
My | -0.027 |
en | -0.026 |
alone | -0.024 |
Int | -0.024 |
Token | should |
Token ID | 815 |
Feature activation | +3.46e-02 |
Pos logit contributions | |
zag | +0.026 |
ages | +0.025 |
ained | +0.023 |
tr | +0.023 |
ometer | +0.023 |
Neg logit contributions | |
en | -0.018 |
unatt | -0.018 |
ternally | -0.018 |
ivated | -0.018 |
alone | -0.018 |
Token | not |
Token ID | 407 |
Feature activation | +1.34e-02 |
Pos logit contributions | |
ages | +0.057 |
zag | +0.055 |
ometer | +0.054 |
ained | +0.053 |
tr | +0.052 |
Neg logit contributions | |
I | -0.048 |
me | -0.046 |
cheer | -0.045 |
en | -0.044 |
alone | -0.044 |
Token | jumped |
Token ID | 11687 |
Feature activation | +1.41e-02 |
Pos logit contributions | |
zag | +0.047 |
ages | +0.046 |
ained | +0.043 |
ometer | +0.043 |
uffy | +0.042 |
Neg logit contributions | |
en | -0.024 |
alone | -0.024 |
I | -0.023 |
unatt | -0.022 |
me | -0.022 |
Token | . |
Token ID | 13 |
Feature activation | +2.09e-02 |
Pos logit contributions | |
ages | +0.027 |
zag | +0.027 |
tr | +0.025 |
ained | +0.024 |
ometer | +0.024 |
Neg logit contributions | |
en | -0.015 |
unatt | -0.015 |
ivated | -0.015 |
alone | -0.015 |
ternally | -0.014 |
Token | They |
Token ID | 1119 |
Feature activation | +1.76e-02 |
Pos logit contributions | |
zag | +0.031 |
ages | +0.029 |
tails | +0.026 |
w | +0.026 |
tr | +0.026 |
Neg logit contributions | |
en | -0.034 |
My | -0.033 |
I | -0.033 |
Int | -0.032 |
ivated | -0.031 |
Token | learned |
Token ID | 4499 |
Feature activation | +4.66e-02 |
Pos logit contributions | |
zag | +0.033 |
ages | +0.033 |
ained | +0.031 |
ometer | +0.030 |
tr | +0.030 |
Neg logit contributions | |
en | -0.020 |
unatt | -0.019 |
alone | -0.019 |
ivated | -0.019 |
ternally | -0.018 |
Token | a |
Token ID | 257 |
Feature activation | +6.66e-02 |
Pos logit contributions | |
zag | +0.101 |
ops | +0.095 |
ages | +0.094 |
uffy | +0.092 |
ained | +0.092 |
Neg logit contributions | |
back | -0.042 |
en | -0.042 |
so | -0.038 |
My | -0.038 |
me | -0.038 |
Token | hard |
Token ID | 1327 |
Feature activation | +9.92e-02 |
Pos logit contributions | |
packs | +0.093 |
aj | +0.085 |
ained | +0.084 |
aked | +0.082 |
ages | +0.082 |
Neg logit contributions | |
en | -0.111 |
I | -0.103 |
else | -0.102 |
back | -0.102 |
My | -0.101 |
Token | lesson |
Token ID | 11483 |
Feature activation | +5.10e-02 |
Pos logit contributions | |
aj | +0.164 |
packs | +0.149 |
ccoli | +0.138 |
tails | +0.135 |
ends | +0.131 |
Neg logit contributions | |
en | -0.128 |
tune | -0.124 |
back | -0.124 |
alone | -0.124 |
fright | -0.121 |
Token | that |
Token ID | 326 |
Feature activation | +8.16e-03 |
Pos logit contributions | |
ages | +0.118 |
ccoli | +0.117 |
aj | +0.116 |
zag | +0.116 |
tr | +0.114 |
Neg logit contributions | |
alone | -0.036 |
I | -0.035 |
back | -0.029 |
unatt | -0.028 |
me | -0.028 |
Token | day |
Token ID | 1110 |
Feature activation | +3.20e-02 |
Pos logit contributions | |
zag | +0.010 |
ages | +0.010 |
tr | +0.009 |
ained | +0.009 |
ometer | +0.008 |
Neg logit contributions | |
en | -0.014 |
unatt | -0.014 |
My | -0.014 |
ivated | -0.014 |
I | -0.014 |
Token | . |
Token ID | 13 |
Feature activation | +2.77e-02 |
Pos logit contributions | |
zag | +0.067 |
ages | +0.065 |
tr | +0.063 |
ained | +0.062 |
ccoli | +0.061 |
Neg logit contributions | |
I | -0.028 |
alone | -0.027 |
unatt | -0.026 |
My | -0.025 |
en | -0.024 |
Token | They |
Token ID | 1119 |
Feature activation | +2.82e-02 |
Pos logit contributions | |
zag | +0.045 |
ages | +0.042 |
tails | +0.038 |
tr | +0.038 |
ahs | +0.038 |
Neg logit contributions | |
I | -0.042 |
My | -0.042 |
en | -0.040 |
Int | -0.037 |
alone | -0.037 |
Token | never |
Token ID | 1239 |
Feature activation | -3.00e-02 |
Pos logit contributions | |
ivated | +0.025 |
unatt | +0.025 |
My | +0.025 |
ternally | +0.024 |
en | +0.024 |
Neg logit contributions | |
zag | -0.027 |
ages | -0.026 |
tr | -0.024 |
ine | -0.023 |
ained | -0.023 |
Token | wanted |
Token ID | 2227 |
Feature activation | -4.60e-02 |
Pos logit contributions | |
ternally | +0.048 |
My | +0.046 |
ivated | +0.046 |
unatt | +0.044 |
en | +0.042 |
Neg logit contributions | |
zag | -0.041 |
ages | -0.038 |
tr | -0.036 |
ies | -0.035 |
ained | -0.035 |
Token | to |
Token ID | 284 |
Feature activation | -2.79e-02 |
Pos logit contributions | |
ivated | +0.085 |
ternally | +0.084 |
unatt | +0.078 |
Int | +0.075 |
My | +0.073 |
Neg logit contributions | |
zag | -0.049 |
ages | -0.049 |
lesson | -0.047 |
ment | -0.046 |
common | -0.045 |
Token | leave |
Token ID | 2666 |
Feature activation | -3.94e-02 |
Pos logit contributions | |
ternally | +0.038 |
My | +0.037 |
ivated | +0.037 |
unatt | +0.037 |
Int | +0.036 |
Neg logit contributions | |
zag | -0.046 |
ages | -0.040 |
tails | -0.040 |
tr | -0.039 |
ine | -0.038 |
Token | the |
Token ID | 262 |
Feature activation | -2.80e-02 |
Pos logit contributions | |
ivated | +0.061 |
ternally | +0.059 |
Int | +0.059 |
My | +0.058 |
en | +0.054 |
Neg logit contributions | |
zag | -0.052 |
ages | -0.050 |
ment | -0.049 |
common | -0.048 |
ained | -0.048 |
Token | group |
Token ID | 1448 |
Feature activation | -9.67e-02 |
Pos logit contributions | |
My | +0.035 |
ternally | +0.035 |
ivated | +0.034 |
Int | +0.032 |
unatt | +0.031 |
Neg logit contributions | |
zag | -0.049 |
ages | -0.046 |
tr | -0.045 |
ained | -0.044 |
tails | -0.042 |
Token | again |
Token ID | 757 |
Feature activation | -1.01e-02 |
Pos logit contributions | |
ternally | +0.163 |
Int | +0.161 |
ivated | +0.152 |
My | +0.151 |
Gu | +0.145 |
Neg logit contributions | |
zag | -0.113 |
ages | -0.096 |
packs | -0.089 |
ward | -0.087 |
ies | -0.086 |
Token | . |
Token ID | 13 |
Feature activation | -2.41e-04 |
Pos logit contributions | |
ternally | +0.014 |
ivated | +0.014 |
My | +0.014 |
en | +0.014 |
unatt | +0.014 |
Neg logit contributions | |
zag | -0.016 |
ages | -0.015 |
ained | -0.014 |
tr | -0.014 |
ine | -0.014 |
Token | They |
Token ID | 1119 |
Feature activation | +3.33e-03 |
Pos logit contributions | |
ivated | +0.000 |
unatt | +0.000 |
ternally | +0.000 |
en | +0.000 |
My | +0.000 |
Neg logit contributions | |
ages | -0.000 |
zag | -0.000 |
tr | -0.000 |
ained | -0.000 |
ometer | -0.000 |
Token | were |
Token ID | 547 |
Feature activation | -2.78e-03 |
Pos logit contributions | |
ages | +0.005 |
zag | +0.005 |
tr | +0.005 |
ained | +0.005 |
ometer | +0.005 |
Neg logit contributions | |
ivated | -0.004 |
ternally | -0.004 |
unatt | -0.004 |
en | -0.004 |
My | -0.004 |
Token | so |
Token ID | 523 |
Feature activation | -1.10e-02 |
Pos logit contributions | |
ivated | +0.003 |
unatt | +0.003 |
My | +0.003 |
ternally | +0.003 |
en | +0.003 |
Neg logit contributions | |
ages | -0.005 |
zag | -0.005 |
ained | -0.005 |
tr | -0.005 |
tails | -0.005 |
Token | and |
Token ID | 290 |
Feature activation | +2.54e-02 |
Pos logit contributions | |
ternally | +0.027 |
unatt | +0.027 |
ivated | +0.026 |
My | +0.026 |
Int | +0.025 |
Neg logit contributions | |
zag | -0.025 |
ages | -0.024 |
ained | -0.022 |
tr | -0.021 |
olving | -0.021 |
Token | the |
Token ID | 262 |
Feature activation | -4.06e-04 |
Pos logit contributions | |
zag | +0.044 |
ages | +0.044 |
tr | +0.042 |
aj | +0.042 |
ained | +0.041 |
Neg logit contributions | |
en | -0.032 |
alone | -0.030 |
I | -0.030 |
My | -0.029 |
cheer | -0.029 |
Token | deer |
Token ID | 20096 |
Feature activation | -2.81e-03 |
Pos logit contributions | |
ivated | +0.000 |
unatt | +0.000 |
My | +0.000 |
ternally | +0.000 |
en | +0.000 |
Neg logit contributions | |
zag | -0.001 |
ages | -0.001 |
tr | -0.001 |
ained | -0.001 |
ometer | -0.001 |
Token | was |
Token ID | 373 |
Feature activation | -6.54e-03 |
Pos logit contributions | |
ivated | +0.004 |
ternally | +0.004 |
unatt | +0.004 |
My | +0.004 |
en | +0.004 |
Neg logit contributions | |
zag | -0.004 |
ages | -0.004 |
tr | -0.004 |
ained | -0.004 |
ometer | -0.004 |
Token | never |
Token ID | 1239 |
Feature activation | -2.91e-02 |
Pos logit contributions | |
ternally | +0.009 |
ivated | +0.009 |
unatt | +0.009 |
My | +0.008 |
Int | +0.008 |
Neg logit contributions | |
zag | -0.011 |
ages | -0.011 |
ained | -0.010 |
tr | -0.010 |
ine | -0.009 |
Token | bored |
Token ID | 17533 |
Feature activation | -9.38e-02 |
Pos logit contributions | |
ternally | +0.034 |
My | +0.033 |
ivated | +0.032 |
Int | +0.029 |
Give | +0.029 |
Neg logit contributions | |
ages | -0.050 |
ained | -0.050 |
zag | -0.049 |
olving | -0.045 |
ine | -0.044 |
Token | again |
Token ID | 757 |
Feature activation | +2.06e-04 |
Pos logit contributions | |
ternally | +0.174 |
Int | +0.151 |
ivated | +0.143 |
My | +0.139 |
chie | +0.138 |
Neg logit contributions | |
olving | -0.109 |
ending | -0.106 |
lesson | -0.100 |
ained | -0.099 |
zag | -0.097 |
Token | . |
Token ID | 13 |
Feature activation | +7.73e-03 |
Pos logit contributions | |
ages | +0.000 |
zag | +0.000 |
ained | +0.000 |
tr | +0.000 |
ometer | +0.000 |
Neg logit contributions | |
ivated | -0.000 |
ternally | -0.000 |
My | -0.000 |
unatt | -0.000 |
en | -0.000 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | -5.33e-03 |
Pos logit contributions | |
zag | +0.013 |
ages | +0.013 |
tr | +0.012 |
ained | +0.012 |
ometer | +0.012 |
Neg logit contributions | |
My | -0.009 |
en | -0.009 |
ivated | -0.009 |
unatt | -0.009 |
ternally | -0.009 |
Token | One |
Token ID | 3198 |
Feature activation | -1.01e-03 |
Pos logit contributions | |
ivated | +0.007 |
ternally | +0.007 |
unatt | +0.007 |
en | +0.007 |
My | +0.007 |
Neg logit contributions | |
zag | -0.008 |
ages | -0.008 |
ained | -0.008 |
tr | -0.008 |
ometer | -0.007 |
Token | day |
Token ID | 1110 |
Feature activation | +2.02e-02 |
Pos logit contributions | |
ivated | +0.002 |
ternally | +0.002 |
unatt | +0.002 |
My | +0.002 |
en | +0.002 |
Neg logit contributions | |
zag | -0.001 |
ages | -0.001 |
ained | -0.001 |
tr | -0.001 |
ometer | -0.001 |
Token | extra |
Token ID | 3131 |
Feature activation | +1.04e-04 |
Pos logit contributions | |
ivated | +0.010 |
My | +0.010 |
ternally | +0.010 |
unatt | +0.009 |
Int | +0.009 |
Neg logit contributions | |
ages | -0.014 |
zag | -0.013 |
ained | -0.013 |
tr | -0.012 |
tails | -0.012 |
Token | careful |
Token ID | 8161 |
Feature activation | -1.10e-02 |
Pos logit contributions | |
ages | +0.000 |
zag | +0.000 |
tr | +0.000 |
ained | +0.000 |
ometer | +0.000 |
Neg logit contributions | |
ivated | -0.000 |
unatt | -0.000 |
My | -0.000 |
ternally | -0.000 |
en | -0.000 |
Token | and |
Token ID | 290 |
Feature activation | +2.38e-02 |
Pos logit contributions | |
ivated | +0.015 |
ternally | +0.015 |
My | +0.015 |
unatt | +0.015 |
en | +0.015 |
Neg logit contributions | |
ages | -0.018 |
zag | -0.017 |
ained | -0.016 |
tr | -0.016 |
ometer | -0.015 |
Token | never |
Token ID | 1239 |
Feature activation | -1.00e-02 |
Pos logit contributions | |
zag | +0.041 |
ages | +0.041 |
ained | +0.039 |
tr | +0.038 |
ometer | +0.037 |
Neg logit contributions | |
I | -0.029 |
en | -0.029 |
alone | -0.028 |
unatt | -0.028 |
My | -0.028 |
Token | leave |
Token ID | 2666 |
Feature activation | -2.90e-02 |
Pos logit contributions | |
ternally | +0.012 |
ivated | +0.011 |
My | +0.011 |
unatt | +0.011 |
en | +0.011 |
Neg logit contributions | |
zag | -0.018 |
ages | -0.018 |
tr | -0.017 |
ained | -0.016 |
tails | -0.016 |
Token | anyone |
Token ID | 2687 |
Feature activation | -8.31e-02 |
Pos logit contributions | |
ivated | +0.038 |
ternally | +0.037 |
Int | +0.036 |
My | +0.035 |
en | +0.034 |
Neg logit contributions | |
ages | -0.048 |
zag | -0.046 |
ained | -0.044 |
tr | -0.043 |
ine | -0.043 |
Token | behind |
Token ID | 2157 |
Feature activation | -5.52e-02 |
Pos logit contributions | |
ternally | +0.105 |
Int | +0.102 |
My | +0.092 |
ivated | +0.091 |
Hall | +0.087 |
Neg logit contributions | |
packs | -0.129 |
zag | -0.128 |
ages | -0.126 |
ies | -0.123 |
ends | -0.112 |
Token | . |
Token ID | 13 |
Feature activation | +4.85e-03 |
Pos logit contributions | |
ivated | +0.091 |
ternally | +0.090 |
Int | +0.085 |
en | +0.082 |
My | +0.082 |
Neg logit contributions | |
ained | -0.068 |
zag | -0.068 |
packs | -0.066 |
ages | -0.062 |
tr | -0.059 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | +3.74e-04 |
Pos logit contributions | |
ages | +0.009 |
zag | +0.009 |
tr | +0.008 |
ained | +0.008 |
ometer | +0.008 |
Neg logit contributions | |
My | -0.005 |
ivated | -0.005 |
unatt | -0.005 |
en | -0.005 |
ternally | -0.005 |
Token | Once |
Token ID | 7454 |
Feature activation | +9.34e-03 |
Pos logit contributions | |
zag | +0.001 |
ages | +0.001 |
tr | +0.001 |
ained | +0.001 |
ometer | +0.001 |
Neg logit contributions | |
ivated | -0.000 |
ternally | -0.000 |
unatt | -0.000 |
en | -0.000 |
My | -0.000 |
Token | upon |
Token ID | 2402 |
Feature activation | +2.95e-03 |
Pos logit contributions | |
ages | +0.009 |
zag | +0.009 |
tr | +0.008 |
ained | +0.008 |
ometer | +0.007 |
Neg logit contributions | |
ivated | -0.018 |
en | -0.018 |
My | -0.018 |
unatt | -0.018 |
ternally | -0.018 |
Token | she |
Token ID | 673 |
Feature activation | -7.72e-04 |
Pos logit contributions | |
ages | +0.020 |
zag | +0.020 |
tr | +0.018 |
ained | +0.018 |
ometer | +0.018 |
Neg logit contributions | |
My | -0.010 |
en | -0.010 |
ivated | -0.010 |
unatt | -0.010 |
I | -0.010 |
Token | had |
Token ID | 550 |
Feature activation | -9.02e-03 |
Pos logit contributions | |
ivated | +0.001 |
unatt | +0.001 |
ternally | +0.001 |
My | +0.001 |
en | +0.001 |
Neg logit contributions | |
zag | -0.001 |
ages | -0.001 |
tr | -0.001 |
ained | -0.001 |
ometer | -0.001 |
Token | never |
Token ID | 1239 |
Feature activation | +1.35e-03 |
Pos logit contributions | |
ivated | +0.012 |
ternally | +0.012 |
unatt | +0.012 |
My | +0.012 |
en | +0.012 |
Neg logit contributions | |
ages | -0.014 |
zag | -0.014 |
ained | -0.013 |
tr | -0.013 |
tails | -0.012 |
Token | sung |
Token ID | 23568 |
Feature activation | -1.99e-02 |
Pos logit contributions | |
ages | +0.002 |
zag | +0.002 |
tr | +0.002 |
ained | +0.002 |
ometer | +0.002 |
Neg logit contributions | |
ivated | -0.002 |
ternally | -0.002 |
unatt | -0.002 |
My | -0.002 |
en | -0.002 |
Token | in |
Token ID | 287 |
Feature activation | -1.10e-02 |
Pos logit contributions | |
ivated | +0.027 |
ternally | +0.027 |
unatt | +0.027 |
en | +0.026 |
My | +0.026 |
Neg logit contributions | |
zag | -0.031 |
ages | -0.030 |
ained | -0.028 |
tr | -0.028 |
ine | -0.026 |
Token | front |
Token ID | 2166 |
Feature activation | -8.23e-02 |
Pos logit contributions | |
ivated | +0.013 |
ternally | +0.013 |
unatt | +0.012 |
My | +0.012 |
en | +0.012 |
Neg logit contributions | |
zag | -0.019 |
ages | -0.019 |
ained | -0.018 |
tr | -0.018 |
ine | -0.017 |
Token | of |
Token ID | 286 |
Feature activation | -1.64e-02 |
Pos logit contributions | |
ternally | +0.150 |
ivated | +0.143 |
Int | +0.132 |
Bruce | +0.127 |
Hall | +0.126 |
Neg logit contributions | |
tr | -0.084 |
ment | -0.080 |
zag | -0.080 |
ained | -0.077 |
packs | -0.073 |
Token | anyone |
Token ID | 2687 |
Feature activation | -5.66e-02 |
Pos logit contributions | |
ivated | +0.019 |
ternally | +0.019 |
unatt | +0.019 |
en | +0.018 |
My | +0.018 |
Neg logit contributions | |
ages | -0.028 |
zag | -0.028 |
tr | -0.026 |
ained | -0.026 |
tails | -0.026 |
Token | before |
Token ID | 878 |
Feature activation | +1.35e-03 |
Pos logit contributions | |
ternally | +0.100 |
ivated | +0.091 |
Int | +0.089 |
Leave | +0.089 |
unatt | +0.083 |
Neg logit contributions | |
zag | -0.070 |
ages | -0.064 |
ine | -0.062 |
tr | -0.060 |
ccoli | -0.059 |
Token | . |
Token ID | 13 |
Feature activation | +9.49e-03 |
Pos logit contributions | |
ages | +0.002 |
zag | +0.002 |
tr | +0.002 |
ained | +0.002 |
ometer | +0.002 |
Neg logit contributions | |
ivated | -0.002 |
ternally | -0.002 |
unatt | -0.002 |
en | -0.002 |
My | -0.002 |
Token | But |
Token ID | 887 |
Feature activation | +2.15e-02 |
Pos logit contributions | |
zag | +0.019 |
ages | +0.019 |
ained | +0.017 |
tr | +0.017 |
tails | +0.017 |
Neg logit contributions | |
My | -0.009 |
en | -0.009 |
ivated | -0.009 |
I | -0.009 |
unatt | -0.009 |
Token | sail |
Token ID | 14936 |
Feature activation | -1.75e-03 |
Pos logit contributions | |
unatt | +0.006 |
ivated | +0.006 |
ternally | +0.006 |
My | +0.006 |
en | +0.006 |
Neg logit contributions | |
zag | -0.009 |
ages | -0.009 |
tr | -0.008 |
ained | -0.008 |
tails | -0.008 |
Token | his |
Token ID | 465 |
Feature activation | -3.16e-02 |
Pos logit contributions | |
ivated | +0.003 |
ternally | +0.003 |
My | +0.003 |
unatt | +0.003 |
en | +0.003 |
Neg logit contributions | |
ages | -0.002 |
zag | -0.002 |
ained | -0.002 |
tr | -0.002 |
ometer | -0.002 |
Token | boat |
Token ID | 8848 |
Feature activation | -8.61e-03 |
Pos logit contributions | |
ivated | +0.048 |
My | +0.045 |
Int | +0.043 |
unatt | +0.042 |
ternally | +0.041 |
Neg logit contributions | |
zag | -0.050 |
ages | -0.049 |
ometer | -0.044 |
ies | -0.043 |
packs | -0.042 |
Token | in |
Token ID | 287 |
Feature activation | -2.78e-02 |
Pos logit contributions | |
ivated | +0.011 |
My | +0.011 |
ternally | +0.011 |
en | +0.011 |
unatt | +0.011 |
Neg logit contributions | |
zag | -0.014 |
ages | -0.014 |
tr | -0.013 |
ained | -0.013 |
ometer | -0.012 |
Token | the |
Token ID | 262 |
Feature activation | -4.28e-02 |
Pos logit contributions | |
ivated | +0.041 |
ternally | +0.040 |
My | +0.038 |
unatt | +0.038 |
Int | +0.035 |
Neg logit contributions | |
common | -0.039 |
packs | -0.039 |
zag | -0.038 |
ine | -0.037 |
ained | -0.037 |
Token | big |
Token ID | 1263 |
Feature activation | -8.21e-02 |
Pos logit contributions | |
ivated | +0.070 |
My | +0.066 |
ternally | +0.066 |
cheer | +0.060 |
unatt | +0.060 |
Neg logit contributions | |
ained | -0.056 |
zag | -0.054 |
tr | -0.052 |
ages | -0.051 |
w | -0.047 |
Token | blue |
Token ID | 4171 |
Feature activation | -3.07e-02 |
Pos logit contributions | |
ivated | +0.164 |
My | +0.150 |
ternally | +0.146 |
ivating | +0.142 |
My | +0.141 |
Neg logit contributions | |
packs | -0.093 |
ossible | -0.082 |
ies | -0.078 |
tent | -0.075 |
lot | -0.075 |
Token | sea |
Token ID | 5417 |
Feature activation | -4.54e-03 |
Pos logit contributions | |
ivated | +0.044 |
ternally | +0.040 |
en | +0.039 |
unatt | +0.039 |
My | +0.039 |
Neg logit contributions | |
zag | -0.050 |
ages | -0.048 |
packs | -0.047 |
ained | -0.047 |
tr | -0.045 |
Token | . |
Token ID | 13 |
Feature activation | -7.98e-03 |
Pos logit contributions | |
ivated | +0.006 |
ternally | +0.006 |
My | +0.006 |
unatt | +0.006 |
en | +0.006 |
Neg logit contributions | |
ages | -0.007 |
zag | -0.007 |
tr | -0.007 |
ained | -0.007 |
ometer | -0.006 |
Token | One |
Token ID | 1881 |
Feature activation | +3.54e-03 |
Pos logit contributions | |
ivated | +0.011 |
ternally | +0.011 |
unatt | +0.011 |
en | +0.011 |
My | +0.010 |
Neg logit contributions | |
zag | -0.012 |
ages | -0.012 |
tr | -0.012 |
ained | -0.012 |
ometer | -0.011 |
Token | day |
Token ID | 1110 |
Feature activation | +3.46e-02 |
Pos logit contributions | |
ages | +0.004 |
zag | +0.004 |
tr | +0.003 |
ained | +0.003 |
ometer | +0.003 |
Neg logit contributions | |
ivated | -0.006 |
unatt | -0.006 |
en | -0.006 |
ternally | -0.006 |
My | -0.006 |
Token | the |
Token ID | 262 |
Feature activation | +2.16e-03 |
Pos logit contributions | |
zag | +0.024 |
ages | +0.024 |
ained | +0.023 |
tr | +0.022 |
ometer | +0.022 |
Neg logit contributions | |
en | -0.012 |
unatt | -0.011 |
My | -0.011 |
ivated | -0.011 |
alone | -0.011 |
Token | old |
Token ID | 1468 |
Feature activation | +5.98e-03 |
Pos logit contributions | |
ages | +0.004 |
zag | +0.004 |
tr | +0.004 |
ained | +0.004 |
ometer | +0.003 |
Neg logit contributions | |
ivated | -0.002 |
ternally | -0.002 |
unatt | -0.002 |
My | -0.002 |
en | -0.002 |
Token | cat |
Token ID | 3797 |
Feature activation | -7.48e-03 |
Pos logit contributions | |
ages | +0.011 |
zag | +0.011 |
ained | +0.010 |
tr | +0.010 |
ometer | +0.009 |
Neg logit contributions | |
en | -0.007 |
My | -0.007 |
ivated | -0.007 |
unatt | -0.007 |
ternally | -0.007 |
Token | was |
Token ID | 373 |
Feature activation | -1.11e-02 |
Pos logit contributions | |
ivated | +0.011 |
My | +0.010 |
ternally | +0.010 |
unatt | +0.010 |
en | +0.010 |
Neg logit contributions | |
ages | -0.012 |
zag | -0.011 |
tr | -0.011 |
ained | -0.010 |
tails | -0.010 |
Token | never |
Token ID | 1239 |
Feature activation | -3.13e-02 |
Pos logit contributions | |
ternally | +0.014 |
ivated | +0.014 |
My | +0.014 |
unatt | +0.013 |
Int | +0.013 |
Neg logit contributions | |
zag | -0.019 |
ages | -0.019 |
ained | -0.018 |
tr | -0.017 |
tails | -0.017 |
Token | seen |
Token ID | 1775 |
Feature activation | -8.15e-02 |
Pos logit contributions | |
ivated | +0.043 |
ternally | +0.042 |
My | +0.041 |
folk | +0.038 |
Int | +0.038 |
Neg logit contributions | |
ages | -0.049 |
zag | -0.048 |
ained | -0.045 |
ies | -0.044 |
tails | -0.043 |
Token | again |
Token ID | 757 |
Feature activation | -1.95e-03 |
Pos logit contributions | |
ivated | +0.141 |
ternally | +0.139 |
folk | +0.135 |
chie | +0.127 |
Int | +0.125 |
Neg logit contributions | |
ained | -0.080 |
for | -0.079 |
tr | -0.078 |
lesson | -0.075 |
olving | -0.074 |
Token | . |
Token ID | 13 |
Feature activation | +2.30e-03 |
Pos logit contributions | |
ternally | +0.003 |
ivated | +0.003 |
My | +0.002 |
unatt | +0.002 |
en | +0.002 |
Neg logit contributions | |
ages | -0.003 |
zag | -0.003 |
ained | -0.003 |
tr | -0.003 |
ometer | -0.003 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | -1.69e-03 |
Pos logit contributions | |
ages | +0.004 |
zag | +0.004 |
tr | +0.004 |
ained | +0.004 |
ometer | +0.004 |
Neg logit contributions | |
ivated | -0.003 |
unatt | -0.003 |
ternally | -0.003 |
en | -0.003 |
My | -0.003 |
Token | Once |
Token ID | 7454 |
Feature activation | -2.21e-03 |
Pos logit contributions | |
ivated | +0.002 |
ternally | +0.002 |
unatt | +0.002 |
en | +0.002 |
My | +0.002 |
Neg logit contributions | |
zag | -0.003 |
ages | -0.003 |
ained | -0.002 |
tr | -0.002 |
ometer | -0.002 |
Token | upon |
Token ID | 2402 |
Feature activation | +4.96e-03 |
Pos logit contributions | |
ivated | +0.004 |
ternally | +0.004 |
unatt | +0.004 |
My | +0.004 |
en | +0.004 |
Neg logit contributions | |
zag | -0.002 |
ages | -0.002 |
ained | -0.002 |
tr | -0.002 |
ometer | -0.002 |
Token | L |
Token ID | 43 |
Feature activation | -2.57e-03 |
Pos logit contributions | |
ages | +0.007 |
zag | +0.007 |
tr | +0.007 |
ained | +0.006 |
ometer | +0.006 |
Neg logit contributions | |
ivated | -0.012 |
My | -0.012 |
unatt | -0.012 |
en | -0.012 |
ternally | -0.012 |
Token | ily |
Token ID | 813 |
Feature activation | -1.29e-02 |
Pos logit contributions | |
ivated | +0.004 |
unatt | +0.004 |
ternally | +0.004 |
en | +0.004 |
My | +0.004 |
Neg logit contributions | |
zag | -0.004 |
ages | -0.004 |
ained | -0.003 |
tr | -0.003 |
ometer | -0.003 |
Token | and |
Token ID | 290 |
Feature activation | +4.51e-03 |
Pos logit contributions | |
ivated | +0.018 |
ternally | +0.018 |
unatt | +0.017 |
My | +0.017 |
en | +0.017 |
Neg logit contributions | |
ages | -0.020 |
zag | -0.020 |
tr | -0.019 |
ained | -0.018 |
ccoli | -0.018 |
Token | Ben |
Token ID | 3932 |
Feature activation | -1.28e-02 |
Pos logit contributions | |
zag | +0.008 |
ages | +0.008 |
tr | +0.007 |
ained | +0.007 |
ometer | +0.007 |
Neg logit contributions | |
ivated | -0.005 |
en | -0.005 |
My | -0.005 |
ternally | -0.005 |
unatt | -0.005 |
Token | were |
Token ID | 547 |
Feature activation | +6.49e-03 |
Pos logit contributions | |
ivated | +0.018 |
ternally | +0.018 |
My | +0.017 |
unatt | +0.017 |
en | +0.017 |
Neg logit contributions | |
ages | -0.020 |
zag | -0.019 |
tr | -0.018 |
ained | -0.018 |
ccoli | -0.017 |
Token | twins |
Token ID | 20345 |
Feature activation | -8.14e-02 |
Pos logit contributions | |
zag | +0.010 |
ages | +0.010 |
tr | +0.009 |
ained | +0.009 |
ometer | +0.009 |
Neg logit contributions | |
en | -0.009 |
ivated | -0.009 |
ternally | -0.009 |
unatt | -0.009 |
My | -0.009 |
Token | who |
Token ID | 508 |
Feature activation | +2.56e-02 |
Pos logit contributions | |
ternally | +0.169 |
Int | +0.169 |
vious | +0.165 |
ivated | +0.161 |
Leave | +0.156 |
Neg logit contributions | |
ages | -0.067 |
ine | -0.058 |
zag | -0.056 |
ward | -0.054 |
tails | -0.051 |
Token | liked |
Token ID | 8288 |
Feature activation | -6.70e-03 |
Pos logit contributions | |
zag | +0.049 |
ages | +0.048 |
tr | +0.045 |
ometer | +0.045 |
ained | +0.044 |
Neg logit contributions | |
en | -0.028 |
alone | -0.027 |
I | -0.026 |
ivated | -0.025 |
My | -0.025 |
Token | to |
Token ID | 284 |
Feature activation | +4.90e-03 |
Pos logit contributions | |
ivated | +0.008 |
ternally | +0.008 |
unatt | +0.008 |
My | +0.008 |
en | +0.007 |
Neg logit contributions | |
ages | -0.012 |
zag | -0.012 |
tr | -0.011 |
ained | -0.011 |
ometer | -0.011 |
Token | play |
Token ID | 711 |
Feature activation | -6.21e-03 |
Pos logit contributions | |
ages | +0.010 |
zag | +0.010 |
ained | +0.009 |
tr | +0.009 |
ometer | +0.009 |
Neg logit contributions | |
ivated | -0.004 |
en | -0.004 |
My | -0.004 |
ternally | -0.004 |
unatt | -0.004 |
Token | outside |
Token ID | 2354 |
Feature activation | -1.34e-02 |
Pos logit contributions | |
ternally | +0.007 |
ivated | +0.007 |
unatt | +0.007 |
My | +0.007 |
en | +0.007 |
Neg logit contributions | |
zag | -0.011 |
ages | -0.011 |
tr | -0.010 |
ained | -0.010 |
ometer | -0.010 |
Token | , |
Token ID | 11 |
Feature activation | +9.78e-03 |
Pos logit contributions | |
unatt | +0.012 |
ivated | +0.012 |
ternally | +0.012 |
en | +0.011 |
My | +0.011 |
Neg logit contributions | |
ages | -0.010 |
zag | -0.010 |
tr | -0.009 |
ometer | -0.009 |
tails | -0.009 |
Token | " |
Token ID | 366 |
Feature activation | -3.21e-02 |
Pos logit contributions | |
ages | +0.019 |
zag | +0.019 |
ained | +0.017 |
tr | +0.017 |
ometer | +0.017 |
Neg logit contributions | |
My | -0.010 |
en | -0.010 |
ivated | -0.010 |
ternally | -0.010 |
I | -0.010 |
Token | Hi |
Token ID | 17250 |
Feature activation | -1.66e-02 |
Pos logit contributions | |
unatt | +0.040 |
alone | +0.038 |
en | +0.037 |
ivated | +0.036 |
ternally | +0.036 |
Neg logit contributions | |
zag | -0.054 |
ages | -0.054 |
tails | -0.051 |
w | -0.051 |
ained | -0.051 |
Token | Pedro |
Token ID | 28855 |
Feature activation | +1.74e-02 |
Pos logit contributions | |
unatt | +0.027 |
ivated | +0.027 |
ternally | +0.026 |
en | +0.025 |
alone | +0.024 |
Neg logit contributions | |
zag | -0.023 |
ages | -0.023 |
tails | -0.021 |
ometer | -0.020 |
ine | -0.020 |
Token | , |
Token ID | 11 |
Feature activation | -2.97e-02 |
Pos logit contributions | |
ages | +0.030 |
zag | +0.030 |
ained | +0.030 |
tr | +0.029 |
ometer | +0.028 |
Neg logit contributions | |
My | -0.020 |
ternally | -0.019 |
en | -0.019 |
I | -0.019 |
me | -0.019 |
Token | can |
Token ID | 460 |
Feature activation | -8.12e-02 |
Pos logit contributions | |
ivated | +0.038 |
unatt | +0.038 |
ternally | +0.036 |
en | +0.033 |
alone | +0.032 |
Neg logit contributions | |
zag | -0.049 |
tr | -0.046 |
ages | -0.045 |
ometer | -0.044 |
ccoli | -0.044 |
Token | I |
Token ID | 314 |
Feature activation | -4.57e-02 |
Pos logit contributions | |
unatt | +0.112 |
ivated | +0.098 |
vious | +0.095 |
lier | +0.092 |
outsider | +0.090 |
Neg logit contributions | |
zag | -0.123 |
tails | -0.120 |
ages | -0.117 |
ves | -0.113 |
w | -0.113 |
Token | ride |
Token ID | 6594 |
Feature activation | -2.26e-02 |
Pos logit contributions | |
ivated | +0.066 |
unatt | +0.065 |
ternally | +0.062 |
en | +0.061 |
Int | +0.059 |
Neg logit contributions | |
zag | -0.076 |
tr | -0.063 |
ages | -0.061 |
tails | -0.059 |
ine | -0.058 |
Token | you |
Token ID | 345 |
Feature activation | -1.76e-02 |
Pos logit contributions | |
ivated | +0.025 |
en | +0.024 |
ternally | +0.024 |
unatt | +0.023 |
Int | +0.023 |
Neg logit contributions | |
zag | -0.042 |
ages | -0.040 |
ained | -0.038 |
tr | -0.038 |
tails | -0.037 |
Token | ?" |
Token ID | 1701 |
Feature activation | +7.69e-03 |
Pos logit contributions | |
ivated | +0.024 |
ternally | +0.024 |
en | +0.024 |
My | +0.023 |
unatt | +0.023 |
Neg logit contributions | |
zag | -0.028 |
ages | -0.026 |
tr | -0.025 |
ained | -0.025 |
ometer | -0.023 |
Token | Pedro |
Token ID | 28855 |
Feature activation | -2.03e-02 |
Pos logit contributions | |
zag | +0.013 |
ages | +0.013 |
tr | +0.012 |
ained | +0.012 |
ometer | +0.012 |
Neg logit contributions | |
ivated | -0.009 |
en | -0.009 |
My | -0.009 |
ternally | -0.009 |
unatt | -0.009 |
Token | said |
Token ID | 531 |
Feature activation | -3.20e-02 |
Pos logit contributions | |
unatt | +0.009 |
ivated | +0.008 |
ternally | +0.008 |
My | +0.008 |
en | +0.008 |
Neg logit contributions | |
zag | -0.012 |
ages | -0.012 |
tr | -0.011 |
ine | -0.011 |
ained | -0.011 |
Token | , |
Token ID | 11 |
Feature activation | -1.43e-02 |
Pos logit contributions | |
unatt | +0.060 |
ivated | +0.060 |
ternally | +0.059 |
Int | +0.055 |
alone | +0.054 |
Neg logit contributions | |
ages | -0.034 |
zag | -0.031 |
ometer | -0.029 |
tr | -0.028 |
ccoli | -0.028 |
Token | " |
Token ID | 366 |
Feature activation | -3.73e-02 |
Pos logit contributions | |
ternally | +0.020 |
unatt | +0.020 |
ivated | +0.019 |
en | +0.018 |
alone | +0.018 |
Neg logit contributions | |
ages | -0.022 |
zag | -0.022 |
tr | -0.021 |
ometer | -0.020 |
ccoli | -0.020 |
Token | Hi |
Token ID | 17250 |
Feature activation | -1.09e-02 |
Pos logit contributions | |
en | +0.048 |
ivated | +0.046 |
alone | +0.046 |
ternally | +0.044 |
unatt | +0.044 |
Neg logit contributions | |
tr | -0.062 |
After | -0.060 |
ages | -0.059 |
w | -0.057 |
Before | -0.057 |
Token | , |
Token ID | 11 |
Feature activation | -2.10e-02 |
Pos logit contributions | |
ivated | +0.018 |
ternally | +0.017 |
unatt | +0.017 |
en | +0.017 |
My | +0.016 |
Neg logit contributions | |
zag | -0.014 |
ages | -0.014 |
tr | -0.013 |
ained | -0.013 |
ometer | -0.012 |
Token | can |
Token ID | 460 |
Feature activation | -8.12e-02 |
Pos logit contributions | |
ivated | +0.022 |
unatt | +0.022 |
ternally | +0.022 |
en | +0.020 |
alone | +0.020 |
Neg logit contributions | |
zag | -0.037 |
ages | -0.036 |
tr | -0.036 |
ometer | -0.035 |
ccoli | -0.035 |
Token | I |
Token ID | 314 |
Feature activation | -3.64e-02 |
Pos logit contributions | |
unatt | +0.105 |
vious | +0.105 |
ivated | +0.103 |
outsider | +0.100 |
lier | +0.098 |
Neg logit contributions | |
ages | -0.112 |
w | -0.111 |
ves | -0.111 |
zag | -0.109 |
uffy | -0.109 |
Token | play |
Token ID | 711 |
Feature activation | -4.10e-02 |
Pos logit contributions | |
ivated | +0.057 |
unatt | +0.057 |
ternally | +0.056 |
folk | +0.054 |
Int | +0.053 |
Neg logit contributions | |
zag | -0.053 |
tr | -0.047 |
ine | -0.046 |
ages | -0.044 |
tails | -0.043 |
Token | with |
Token ID | 351 |
Feature activation | -4.20e-02 |
Pos logit contributions | |
folk | +0.065 |
ternally | +0.065 |
ivated | +0.064 |
Int | +0.061 |
unatt | +0.061 |
Neg logit contributions | |
zag | -0.055 |
tr | -0.049 |
ages | -0.049 |
ends | -0.047 |
ained | -0.046 |
Token | you |
Token ID | 345 |
Feature activation | -8.99e-03 |
Pos logit contributions | |
ivated | +0.053 |
Int | +0.047 |
ternally | +0.046 |
unatt | +0.046 |
en | +0.046 |
Neg logit contributions | |
w | -0.067 |
ages | -0.067 |
zag | -0.063 |
tails | -0.062 |
ward | -0.062 |
Token | ?" |
Token ID | 1701 |
Feature activation | -1.90e-02 |
Pos logit contributions | |
ivated | +0.013 |
ternally | +0.013 |
en | +0.013 |
unatt | +0.013 |
My | +0.013 |
Neg logit contributions | |
zag | -0.013 |
ages | -0.013 |
tr | -0.012 |
ained | -0.012 |
ccoli | -0.012 |
Token | away |
Token ID | 1497 |
Feature activation | +1.00e-02 |
Pos logit contributions | |
ternally | +0.009 |
ivated | +0.009 |
unatt | +0.009 |
My | +0.009 |
en | +0.009 |
Neg logit contributions | |
zag | -0.015 |
ages | -0.015 |
tr | -0.014 |
ained | -0.014 |
ometer | -0.013 |
Token | , |
Token ID | 11 |
Feature activation | -2.23e-02 |
Pos logit contributions | |
ages | +0.018 |
zag | +0.018 |
ained | +0.017 |
tr | +0.017 |
ometer | +0.016 |
Neg logit contributions | |
en | -0.012 |
ivated | -0.012 |
unatt | -0.012 |
alone | -0.011 |
My | -0.011 |
Token | never |
Token ID | 1239 |
Feature activation | -2.41e-02 |
Pos logit contributions | |
ternally | +0.029 |
ivated | +0.028 |
My | +0.027 |
unatt | +0.027 |
Int | +0.026 |
Neg logit contributions | |
zag | -0.036 |
ages | -0.036 |
tr | -0.034 |
ained | -0.032 |
ine | -0.032 |
Token | to |
Token ID | 284 |
Feature activation | -2.55e-02 |
Pos logit contributions | |
ternally | +0.034 |
ivated | +0.032 |
My | +0.030 |
Int | +0.029 |
unatt | +0.028 |
Neg logit contributions | |
zag | -0.038 |
ages | -0.037 |
tr | -0.035 |
ained | -0.034 |
tails | -0.033 |
Token | be |
Token ID | 307 |
Feature activation | -3.30e-02 |
Pos logit contributions | |
ternally | +0.034 |
ivated | +0.033 |
Int | +0.033 |
My | +0.032 |
unatt | +0.030 |
Neg logit contributions | |
zag | -0.040 |
ages | -0.038 |
ine | -0.037 |
tr | -0.037 |
tails | -0.037 |
Token | seen |
Token ID | 1775 |
Feature activation | -8.10e-02 |
Pos logit contributions | |
ternally | +0.043 |
ivated | +0.042 |
My | +0.041 |
Int | +0.040 |
folk | +0.037 |
Neg logit contributions | |
ages | -0.054 |
ained | -0.049 |
zag | -0.049 |
ine | -0.049 |
ends | -0.046 |
Token | again |
Token ID | 757 |
Feature activation | -3.50e-03 |
Pos logit contributions | |
ivated | +0.145 |
ternally | +0.139 |
Int | +0.136 |
chie | +0.131 |
folk | +0.127 |
Neg logit contributions | |
for | -0.076 |
olving | -0.075 |
tr | -0.071 |
ment | -0.071 |
ained | -0.071 |
Token | . |
Token ID | 13 |
Feature activation | -1.13e-02 |
Pos logit contributions | |
ternally | +0.005 |
ivated | +0.005 |
My | +0.004 |
unatt | +0.004 |
en | +0.004 |
Neg logit contributions | |
ages | -0.006 |
zag | -0.006 |
ained | -0.005 |
tr | -0.005 |
ometer | -0.005 |
Token |
|
Token ID | 198 |
Feature activation | +1.70e-02 |
Pos logit contributions | |
ivated | +0.017 |
unatt | +0.017 |
ternally | +0.017 |
en | +0.016 |
My | +0.015 |
Neg logit contributions | |
ages | -0.017 |
zag | -0.016 |
tr | -0.015 |
ained | -0.015 |
ometer | -0.015 |
Token |
|
Token ID | 198 |
Feature activation | +7.75e-03 |
Pos logit contributions | |
ages | +0.025 |
zag | +0.025 |
tr | +0.023 |
ained | +0.023 |
ometer | +0.022 |
Neg logit contributions | |
en | -0.024 |
ivated | -0.024 |
My | -0.024 |
alone | -0.024 |
unatt | -0.024 |
Token | The |
Token ID | 464 |
Feature activation | +3.57e-03 |
Pos logit contributions | |
ages | +0.017 |
zag | +0.017 |
tr | +0.016 |
ained | +0.016 |
ometer | +0.016 |
Neg logit contributions | |
ivated | -0.006 |
ternally | -0.006 |
en | -0.006 |
unatt | -0.006 |
My | -0.006 |
Token | . |
Token ID | 13 |
Feature activation | -2.20e-02 |
Pos logit contributions | |
ternally | +0.032 |
ivated | +0.031 |
unatt | +0.030 |
My | +0.029 |
en | +0.028 |
Neg logit contributions | |
zag | -0.031 |
ages | -0.030 |
tr | -0.028 |
ometer | -0.028 |
ine | -0.027 |
Token | " |
Token ID | 366 |
Feature activation | -2.83e-02 |
Pos logit contributions | |
unatt | +0.034 |
ternally | +0.034 |
ivated | +0.033 |
Leave | +0.031 |
en | +0.030 |
Neg logit contributions | |
ages | -0.032 |
zag | -0.032 |
ometer | -0.029 |
ccoli | -0.028 |
tr | -0.028 |
Token | M |
Token ID | 44 |
Feature activation | +1.53e-02 |
Pos logit contributions | |
ternally | +0.034 |
unatt | +0.032 |
ivated | +0.032 |
en | +0.032 |
alone | +0.031 |
Neg logit contributions | |
tr | -0.046 |
ages | -0.045 |
ained | -0.045 |
zag | -0.045 |
After | -0.043 |
Token | ummy |
Token ID | 13513 |
Feature activation | +4.80e-03 |
Pos logit contributions | |
ages | +0.021 |
zag | +0.020 |
tr | +0.019 |
ained | +0.018 |
ccoli | +0.018 |
Neg logit contributions | |
My | -0.025 |
I | -0.024 |
en | -0.024 |
unatt | -0.024 |
ivated | -0.024 |
Token | , |
Token ID | 11 |
Feature activation | -2.90e-02 |
Pos logit contributions | |
ages | +0.007 |
zag | +0.007 |
tr | +0.007 |
ained | +0.007 |
ometer | +0.007 |
Neg logit contributions | |
ivated | -0.007 |
My | -0.007 |
unatt | -0.007 |
ternally | -0.007 |
en | -0.007 |
Token | can |
Token ID | 460 |
Feature activation | -8.10e-02 |
Pos logit contributions | |
ivated | +0.039 |
ternally | +0.038 |
unatt | +0.037 |
en | +0.036 |
Int | +0.035 |
Neg logit contributions | |
zag | -0.042 |
tr | -0.042 |
ages | -0.041 |
ccoli | -0.041 |
ometer | -0.039 |
Token | I |
Token ID | 314 |
Feature activation | -3.68e-02 |
Pos logit contributions | |
lier | +0.103 |
unatt | +0.100 |
ivated | +0.095 |
vious | +0.089 |
and | +0.088 |
Neg logit contributions | |
zag | -0.120 |
ages | -0.116 |
w | -0.116 |
ves | -0.112 |
tails | -0.109 |
Token | ride |
Token ID | 6594 |
Feature activation | -1.99e-02 |
Pos logit contributions | |
unatt | +0.055 |
ivated | +0.053 |
ternally | +0.051 |
en | +0.051 |
Int | +0.049 |
Neg logit contributions | |
zag | -0.059 |
tr | -0.051 |
ages | -0.048 |
ine | -0.047 |
tails | -0.047 |
Token | in |
Token ID | 287 |
Feature activation | -4.63e-02 |
Pos logit contributions | |
ivated | +0.022 |
ternally | +0.021 |
en | +0.021 |
unatt | +0.021 |
Int | +0.020 |
Neg logit contributions | |
zag | -0.036 |
ages | -0.035 |
tr | -0.033 |
ained | -0.033 |
tails | -0.032 |
Token | it |
Token ID | 340 |
Feature activation | -1.31e-02 |
Pos logit contributions | |
ivated | +0.066 |
ternally | +0.065 |
Int | +0.061 |
unatt | +0.061 |
en | +0.060 |
Neg logit contributions | |
zag | -0.065 |
packs | -0.060 |
ine | -0.058 |
ained | -0.058 |
After | -0.056 |
Token | ?" |
Token ID | 1701 |
Feature activation | -4.86e-02 |
Pos logit contributions | |
ternally | +0.019 |
ivated | +0.019 |
en | +0.019 |
My | +0.019 |
unatt | +0.018 |
Neg logit contributions | |
zag | -0.020 |
ages | -0.018 |
tr | -0.018 |
ained | -0.018 |
ometer | -0.017 |
Token | said |
Token ID | 531 |
Feature activation | -1.51e-02 |
Pos logit contributions | |
ternally | +0.044 |
unatt | +0.044 |
ivated | +0.043 |
My | +0.040 |
Leave | +0.040 |
Neg logit contributions | |
ages | -0.033 |
zag | -0.032 |
tails | -0.029 |
ine | -0.029 |
ccoli | -0.029 |
Token | , |
Token ID | 11 |
Feature activation | -6.30e-03 |
Pos logit contributions | |
unatt | +0.026 |
ivated | +0.026 |
ternally | +0.026 |
en | +0.024 |
alone | +0.024 |
Neg logit contributions | |
ages | -0.019 |
zag | -0.019 |
ometer | -0.017 |
tr | -0.016 |
ccoli | -0.016 |
Token | " |
Token ID | 366 |
Feature activation | -3.57e-02 |
Pos logit contributions | |
ternally | +0.008 |
unatt | +0.008 |
ivated | +0.008 |
en | +0.007 |
My | +0.007 |
Neg logit contributions | |
zag | -0.011 |
ages | -0.011 |
tr | -0.010 |
ometer | -0.010 |
ained | -0.010 |
Token | Hi |
Token ID | 17250 |
Feature activation | -2.60e-02 |
Pos logit contributions | |
en | +0.048 |
ivated | +0.047 |
alone | +0.046 |
ternally | +0.045 |
unatt | +0.044 |
Neg logit contributions | |
tr | -0.058 |
After | -0.054 |
w | -0.054 |
ages | -0.053 |
Before | -0.051 |
Token | , |
Token ID | 11 |
Feature activation | -3.49e-02 |
Pos logit contributions | |
ivated | +0.049 |
ternally | +0.048 |
unatt | +0.047 |
Leave | +0.046 |
en | +0.045 |
Neg logit contributions | |
zag | -0.027 |
uffy | -0.026 |
ages | -0.025 |
tr | -0.024 |
ccoli | -0.024 |
Token | can |
Token ID | 460 |
Feature activation | -8.07e-02 |
Pos logit contributions | |
ivated | +0.047 |
ternally | +0.046 |
unatt | +0.045 |
folk | +0.043 |
ard | +0.041 |
Neg logit contributions | |
tr | -0.052 |
zag | -0.051 |
ccoli | -0.050 |
ometer | -0.048 |
uffy | -0.046 |
Token | I |
Token ID | 314 |
Feature activation | -3.70e-02 |
Pos logit contributions | |
unatt | +0.103 |
ivated | +0.102 |
vious | +0.100 |
lier | +0.099 |
outsider | +0.099 |
Neg logit contributions | |
w | -0.115 |
zag | -0.111 |
ves | -0.110 |
ages | -0.110 |
uffy | -0.110 |
Token | play |
Token ID | 711 |
Feature activation | -4.59e-02 |
Pos logit contributions | |
ivated | +0.059 |
unatt | +0.059 |
ternally | +0.058 |
folk | +0.057 |
Int | +0.055 |
Neg logit contributions | |
zag | -0.054 |
tr | -0.046 |
ine | -0.046 |
tails | -0.042 |
common | -0.042 |
Token | with |
Token ID | 351 |
Feature activation | -5.35e-02 |
Pos logit contributions | |
folk | +0.075 |
ternally | +0.074 |
ivated | +0.073 |
outsider | +0.071 |
Int | +0.071 |
Neg logit contributions | |
zag | -0.059 |
tr | -0.052 |
ages | -0.051 |
ends | -0.051 |
ained | -0.050 |
Token | you |
Token ID | 345 |
Feature activation | -1.07e-02 |
Pos logit contributions | |
ivated | +0.081 |
outsider | +0.077 |
Int | +0.074 |
ternally | +0.072 |
unatt | +0.071 |
Neg logit contributions | |
w | -0.076 |
oof | -0.066 |
After | -0.064 |
ages | -0.064 |
ward | -0.063 |
Token | ?" |
Token ID | 1701 |
Feature activation | -2.52e-02 |
Pos logit contributions | |
ivated | +0.016 |
ternally | +0.016 |
en | +0.016 |
unatt | +0.015 |
My | +0.015 |
Neg logit contributions | |
zag | -0.016 |
ages | -0.014 |
tr | -0.014 |
ained | -0.014 |
ccoli | -0.013 |
Token | on |
Token ID | 319 |
Feature activation | +2.06e-02 |
Pos logit contributions | |
ages | +0.051 |
zag | +0.049 |
ccoli | +0.048 |
tr | +0.046 |
ometer | +0.045 |
Neg logit contributions | |
I | -0.052 |
alone | -0.051 |
My | -0.051 |
en | -0.050 |
me | -0.049 |
Token | , |
Token ID | 11 |
Feature activation | +1.95e-02 |
Pos logit contributions | |
ages | +0.036 |
zag | +0.035 |
tr | +0.033 |
ained | +0.033 |
ometer | +0.032 |
Neg logit contributions | |
I | -0.025 |
My | -0.025 |
alone | -0.024 |
en | -0.024 |
ternally | -0.024 |
Token | Jane |
Token ID | 12091 |
Feature activation | +2.43e-03 |
Pos logit contributions | |
ages | +0.038 |
zag | +0.038 |
ained | +0.036 |
tr | +0.036 |
ometer | +0.035 |
Neg logit contributions | |
My | -0.019 |
I | -0.019 |
en | -0.019 |
unatt | -0.018 |
ternally | -0.018 |
Token | was |
Token ID | 373 |
Feature activation | -5.30e-03 |
Pos logit contributions | |
ages | +0.004 |
zag | +0.004 |
tr | +0.004 |
ained | +0.003 |
ometer | +0.003 |
Neg logit contributions | |
ivated | -0.003 |
unatt | -0.003 |
ternally | -0.003 |
en | -0.003 |
My | -0.003 |
Token | never |
Token ID | 1239 |
Feature activation | -2.51e-02 |
Pos logit contributions | |
ivated | +0.006 |
ternally | +0.006 |
My | +0.006 |
unatt | +0.006 |
en | +0.006 |
Neg logit contributions | |
zag | -0.009 |
ages | -0.009 |
ained | -0.008 |
tr | -0.008 |
ine | -0.008 |
Token | bored |
Token ID | 17533 |
Feature activation | -8.07e-02 |
Pos logit contributions | |
ternally | +0.028 |
My | +0.027 |
ivated | +0.027 |
Int | +0.025 |
unatt | +0.024 |
Neg logit contributions | |
ages | -0.044 |
zag | -0.044 |
ained | -0.042 |
tr | -0.040 |
olving | -0.040 |
Token | . |
Token ID | 13 |
Feature activation | -2.06e-02 |
Pos logit contributions | |
ternally | +0.133 |
Int | +0.114 |
ivated | +0.111 |
My | +0.106 |
chie | +0.104 |
Neg logit contributions | |
olving | -0.104 |
zag | -0.102 |
ained | -0.097 |
tr | -0.094 |
lesson | -0.091 |
Token | She |
Token ID | 1375 |
Feature activation | -5.54e-03 |
Pos logit contributions | |
ivated | +0.028 |
unatt | +0.028 |
ternally | +0.028 |
en | +0.026 |
alone | +0.025 |
Neg logit contributions | |
zag | -0.033 |
ages | -0.032 |
tr | -0.031 |
ained | -0.030 |
ometer | -0.030 |
Token | had |
Token ID | 550 |
Feature activation | -1.55e-02 |
Pos logit contributions | |
ivated | +0.008 |
unatt | +0.008 |
ternally | +0.008 |
My | +0.008 |
en | +0.008 |
Neg logit contributions | |
zag | -0.008 |
ages | -0.008 |
tr | -0.008 |
ained | -0.008 |
ine | -0.007 |
Token | many |
Token ID | 867 |
Feature activation | -2.62e-02 |
Pos logit contributions | |
ivated | +0.022 |
ternally | +0.022 |
unatt | +0.021 |
My | +0.021 |
en | +0.020 |
Neg logit contributions | |
ages | -0.024 |
zag | -0.023 |
ained | -0.022 |
tr | -0.021 |
ine | -0.021 |
Token | smooth |
Token ID | 7209 |
Feature activation | +6.77e-03 |
Pos logit contributions | |
ivated | +0.045 |
My | +0.045 |
unatt | +0.043 |
ternally | +0.042 |
I | +0.041 |
Neg logit contributions | |
ages | -0.032 |
zag | -0.030 |
packs | -0.028 |
ained | -0.028 |
ira | -0.028 |
Token | " |
Token ID | 366 |
Feature activation | -3.31e-02 |
Pos logit contributions | |
unatt | +0.013 |
ternally | +0.013 |
ivated | +0.013 |
en | +0.012 |
My | +0.012 |
Neg logit contributions | |
ages | -0.016 |
zag | -0.016 |
tr | -0.015 |
ometer | -0.015 |
ained | -0.015 |
Token | Hello |
Token ID | 15496 |
Feature activation | -1.70e-02 |
Pos logit contributions | |
unatt | +0.038 |
ivated | +0.036 |
en | +0.034 |
ternally | +0.034 |
alone | +0.034 |
Neg logit contributions | |
zag | -0.056 |
w | -0.056 |
tr | -0.056 |
After | -0.055 |
ages | -0.055 |
Token | little |
Token ID | 1310 |
Feature activation | -3.02e-02 |
Pos logit contributions | |
ivated | +0.028 |
ternally | +0.027 |
unatt | +0.027 |
en | +0.026 |
Leave | +0.025 |
Neg logit contributions | |
zag | -0.022 |
ages | -0.022 |
tr | -0.020 |
ometer | -0.020 |
tails | -0.019 |
Token | bird |
Token ID | 6512 |
Feature activation | -2.57e-03 |
Pos logit contributions | |
ivated | +0.036 |
unatt | +0.032 |
ternally | +0.031 |
My | +0.030 |
en | +0.029 |
Neg logit contributions | |
zag | -0.053 |
ages | -0.052 |
tails | -0.051 |
tr | -0.051 |
ies | -0.051 |
Token | , |
Token ID | 11 |
Feature activation | -3.28e-02 |
Pos logit contributions | |
ivated | +0.004 |
ternally | +0.003 |
unatt | +0.003 |
en | +0.003 |
My | +0.003 |
Neg logit contributions | |
zag | -0.004 |
ages | -0.004 |
tr | -0.004 |
ained | -0.004 |
ometer | -0.004 |
Token | can |
Token ID | 460 |
Feature activation | -8.04e-02 |
Pos logit contributions | |
ivated | +0.044 |
ternally | +0.043 |
unatt | +0.043 |
Int | +0.040 |
folk | +0.039 |
Neg logit contributions | |
tr | -0.048 |
zag | -0.048 |
ccoli | -0.044 |
olving | -0.044 |
ometer | -0.043 |
Token | you |
Token ID | 345 |
Feature activation | -5.30e-02 |
Pos logit contributions | |
unatt | +0.111 |
lier | +0.104 |
ivated | +0.103 |
liest | +0.100 |
ement | +0.097 |
Neg logit contributions | |
w | -0.108 |
zag | -0.105 |
ages | -0.100 |
tails | -0.097 |
ves | -0.097 |
Token | pass |
Token ID | 1208 |
Feature activation | -3.16e-02 |
Pos logit contributions | |
folk | +0.076 |
unatt | +0.074 |
ternally | +0.073 |
Int | +0.072 |
ople | +0.072 |
Neg logit contributions | |
zag | -0.084 |
tr | -0.066 |
tails | -0.065 |
ine | -0.065 |
ast | -0.063 |
Token | me |
Token ID | 502 |
Feature activation | +7.95e-04 |
Pos logit contributions | |
ivated | +0.031 |
en | +0.027 |
ternally | +0.027 |
unatt | +0.026 |
Int | +0.026 |
Neg logit contributions | |
zag | -0.061 |
ained | -0.060 |
ages | -0.059 |
w | -0.058 |
tr | -0.057 |
Token | a |
Token ID | 257 |
Feature activation | -9.67e-03 |
Pos logit contributions | |
zag | +0.001 |
ages | +0.001 |
tr | +0.001 |
ained | +0.001 |
ometer | +0.001 |
Neg logit contributions | |
ivated | -0.001 |
ternally | -0.001 |
unatt | -0.001 |
en | -0.001 |
My | -0.001 |
Token | tw |
Token ID | 665 |
Feature activation | -1.82e-02 |
Pos logit contributions | |
ivated | +0.013 |
ternally | +0.012 |
unatt | +0.012 |
My | +0.012 |
en | +0.012 |
Neg logit contributions | |
zag | -0.016 |
ages | -0.016 |
tr | -0.015 |
ometer | -0.014 |
ained | -0.014 |
Token | the |
Token ID | 262 |
Feature activation | -2.24e-02 |
Pos logit contributions | |
zag | +0.011 |
ages | +0.011 |
ained | +0.010 |
tr | +0.010 |
ometer | +0.010 |
Neg logit contributions | |
unatt | -0.006 |
en | -0.006 |
ivated | -0.006 |
My | -0.006 |
ternally | -0.006 |
Token | small |
Token ID | 1402 |
Feature activation | -1.32e-02 |
Pos logit contributions | |
ivated | +0.031 |
ternally | +0.029 |
My | +0.029 |
unatt | +0.028 |
Int | +0.028 |
Neg logit contributions | |
zag | -0.035 |
ages | -0.034 |
tr | -0.033 |
lesson | -0.031 |
ira | -0.030 |
Token | gorilla |
Token ID | 45314 |
Feature activation | -1.58e-02 |
Pos logit contributions | |
ivated | +0.020 |
ternally | +0.019 |
unatt | +0.019 |
My | +0.018 |
Int | +0.018 |
Neg logit contributions | |
zag | -0.020 |
ages | -0.019 |
tr | -0.018 |
ometer | -0.017 |
ira | -0.017 |
Token | was |
Token ID | 373 |
Feature activation | -2.01e-02 |
Pos logit contributions | |
ivated | +0.023 |
My | +0.023 |
ternally | +0.023 |
unatt | +0.022 |
en | +0.022 |
Neg logit contributions | |
ages | -0.024 |
zag | -0.024 |
tr | -0.022 |
tails | -0.021 |
ained | -0.020 |
Token | never |
Token ID | 1239 |
Feature activation | -4.04e-02 |
Pos logit contributions | |
ternally | +0.025 |
My | +0.025 |
ivated | +0.025 |
Int | +0.024 |
I | +0.023 |
Neg logit contributions | |
zag | -0.034 |
ages | -0.034 |
ained | -0.032 |
tr | -0.031 |
tails | -0.030 |
Token | seen |
Token ID | 1775 |
Feature activation | -8.00e-02 |
Pos logit contributions | |
ternally | +0.059 |
My | +0.057 |
ivated | +0.057 |
Give | +0.052 |
ican | +0.052 |
Neg logit contributions | |
ages | -0.058 |
zag | -0.056 |
ained | -0.054 |
ies | -0.051 |
ends | -0.051 |
Token | again |
Token ID | 757 |
Feature activation | -4.45e-03 |
Pos logit contributions | |
ivated | +0.142 |
ternally | +0.141 |
folk | +0.130 |
chie | +0.124 |
Int | +0.124 |
Neg logit contributions | |
ained | -0.074 |
for | -0.073 |
ward | -0.072 |
olving | -0.072 |
fault | -0.072 |
Token | . |
Token ID | 13 |
Feature activation | +3.09e-03 |
Pos logit contributions | |
ternally | +0.006 |
ivated | +0.006 |
My | +0.006 |
unatt | +0.006 |
en | +0.006 |
Neg logit contributions | |
zag | -0.007 |
ages | -0.007 |
ained | -0.007 |
tr | -0.006 |
ine | -0.006 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | +3.01e-04 |
Pos logit contributions | |
ages | +0.007 |
zag | +0.007 |
tr | +0.006 |
ained | +0.006 |
ometer | +0.006 |
Neg logit contributions | |
ivated | -0.002 |
unatt | -0.002 |
ternally | -0.002 |
My | -0.002 |
en | -0.002 |
Token | Once |
Token ID | 7454 |
Feature activation | -5.50e-04 |
Pos logit contributions | |
zag | +0.000 |
ages | +0.000 |
tr | +0.000 |
ained | +0.000 |
ometer | +0.000 |
Neg logit contributions | |
ivated | -0.000 |
ternally | -0.000 |
unatt | -0.000 |
en | -0.000 |
My | -0.000 |
Token | upon |
Token ID | 2402 |
Feature activation | +5.74e-03 |
Pos logit contributions | |
ivated | +0.001 |
ternally | +0.001 |
unatt | +0.001 |
My | +0.001 |
en | +0.001 |
Neg logit contributions | |
zag | -0.001 |
ages | -0.000 |
ained | -0.000 |
tr | -0.000 |
ometer | -0.000 |
Token | The |
Token ID | 383 |
Feature activation | +1.91e-03 |
Pos logit contributions | |
ages | +0.007 |
zag | +0.007 |
tr | +0.007 |
ained | +0.006 |
ometer | +0.006 |
Neg logit contributions | |
ivated | -0.005 |
My | -0.005 |
ternally | -0.005 |
en | -0.005 |
unatt | -0.005 |
Token | original |
Token ID | 2656 |
Feature activation | +3.47e-03 |
Pos logit contributions | |
ages | +0.003 |
zag | +0.003 |
tr | +0.003 |
ained | +0.003 |
ometer | +0.003 |
Neg logit contributions | |
ivated | -0.002 |
ternally | -0.002 |
unatt | -0.002 |
My | -0.002 |
en | -0.002 |
Token | city |
Token ID | 1748 |
Feature activation | -1.30e-02 |
Pos logit contributions | |
ages | +0.006 |
zag | +0.006 |
tr | +0.006 |
ained | +0.006 |
ometer | +0.005 |
Neg logit contributions | |
ivated | -0.004 |
unatt | -0.004 |
ternally | -0.004 |
en | -0.004 |
My | -0.004 |
Token | was |
Token ID | 373 |
Feature activation | -5.81e-03 |
Pos logit contributions | |
ivated | +0.018 |
My | +0.018 |
ternally | +0.018 |
unatt | +0.018 |
en | +0.018 |
Neg logit contributions | |
zag | -0.020 |
ages | -0.020 |
tr | -0.018 |
ained | -0.018 |
ometer | -0.017 |
Token | never |
Token ID | 1239 |
Feature activation | -2.44e-02 |
Pos logit contributions | |
ivated | +0.007 |
ternally | +0.007 |
My | +0.007 |
unatt | +0.007 |
en | +0.007 |
Neg logit contributions | |
zag | -0.010 |
ages | -0.010 |
ained | -0.009 |
tr | -0.009 |
ometer | -0.009 |
Token | seen |
Token ID | 1775 |
Feature activation | -7.99e-02 |
Pos logit contributions | |
My | +0.029 |
ternally | +0.029 |
ivated | +0.029 |
Int | +0.027 |
I | +0.026 |
Neg logit contributions | |
zag | -0.041 |
ages | -0.041 |
ained | -0.040 |
tr | -0.038 |
olving | -0.037 |
Token | again |
Token ID | 757 |
Feature activation | +1.21e-03 |
Pos logit contributions | |
ivated | +0.130 |
ternally | +0.127 |
Int | +0.122 |
Give | +0.115 |
Leave | +0.113 |
Neg logit contributions | |
olving | -0.100 |
zag | -0.091 |
lesson | -0.086 |
ained | -0.085 |
ending | -0.084 |
Token | . |
Token ID | 13 |
Feature activation | -6.90e-04 |
Pos logit contributions | |
ages | +0.002 |
zag | +0.002 |
tr | +0.002 |
ained | +0.002 |
ometer | +0.002 |
Neg logit contributions | |
ivated | -0.001 |
ternally | -0.001 |
unatt | -0.001 |
My | -0.001 |
en | -0.001 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | +1.12e-02 |
Pos logit contributions | |
ivated | +0.001 |
unatt | +0.001 |
ternally | +0.001 |
en | +0.001 |
My | +0.001 |
Neg logit contributions | |
ages | -0.001 |
zag | -0.001 |
tr | -0.001 |
ained | -0.001 |
ometer | -0.001 |
Token | Once |
Token ID | 7454 |
Feature activation | +1.49e-03 |
Pos logit contributions | |
ages | +0.015 |
zag | +0.014 |
tr | +0.013 |
ained | +0.013 |
ometer | +0.013 |
Neg logit contributions | |
My | -0.018 |
ivated | -0.018 |
unatt | -0.018 |
en | -0.018 |
ternally | -0.018 |
Token | upon |
Token ID | 2402 |
Feature activation | +1.60e-02 |
Pos logit contributions | |
zag | +0.001 |
ages | +0.001 |
tr | +0.001 |
ained | +0.001 |
ometer | +0.001 |
Neg logit contributions | |
ivated | -0.003 |
unatt | -0.003 |
ternally | -0.003 |
My | -0.003 |
en | -0.003 |
Token | They |
Token ID | 1119 |
Feature activation | -1.90e-03 |
Pos logit contributions | |
unatt | +0.033 |
ternally | +0.032 |
ivated | +0.032 |
cheer | +0.029 |
en | +0.028 |
Neg logit contributions | |
ages | -0.033 |
zag | -0.031 |
tr | -0.031 |
ometer | -0.031 |
ained | -0.030 |
Token | thought |
Token ID | 1807 |
Feature activation | -8.59e-03 |
Pos logit contributions | |
ternally | +0.003 |
My | +0.002 |
ivated | +0.002 |
unatt | +0.002 |
en | +0.002 |
Neg logit contributions | |
ages | -0.003 |
zag | -0.003 |
tr | -0.003 |
ained | -0.003 |
ometer | -0.003 |
Token | , |
Token ID | 11 |
Feature activation | -2.69e-03 |
Pos logit contributions | |
ternally | +0.013 |
ivated | +0.013 |
unatt | +0.013 |
My | +0.012 |
en | +0.012 |
Neg logit contributions | |
zag | -0.012 |
ages | -0.012 |
ained | -0.011 |
tr | -0.011 |
ometer | -0.011 |
Token | " |
Token ID | 366 |
Feature activation | -2.39e-02 |
Pos logit contributions | |
unatt | +0.003 |
ternally | +0.003 |
ivated | +0.003 |
My | +0.003 |
en | +0.003 |
Neg logit contributions | |
ages | -0.005 |
zag | -0.005 |
tr | -0.005 |
ained | -0.005 |
ometer | -0.004 |
Token | How |
Token ID | 2437 |
Feature activation | +1.81e-02 |
Pos logit contributions | |
unatt | +0.024 |
ivated | +0.024 |
ternally | +0.024 |
My | +0.024 |
en | +0.024 |
Neg logit contributions | |
ages | -0.045 |
zag | -0.044 |
tr | -0.043 |
ained | -0.043 |
After | -0.040 |
Token | can |
Token ID | 460 |
Feature activation | -7.99e-02 |
Pos logit contributions | |
ages | +0.031 |
zag | +0.030 |
tails | +0.028 |
ained | +0.028 |
tr | +0.027 |
Neg logit contributions | |
I | -0.023 |
en | -0.023 |
My | -0.022 |
unatt | -0.022 |
alone | -0.021 |
Token | I |
Token ID | 314 |
Feature activation | -3.15e-02 |
Pos logit contributions | |
unatt | +0.138 |
ivated | +0.130 |
ternally | +0.120 |
lier | +0.119 |
vious | +0.115 |
Neg logit contributions | |
zag | -0.103 |
ves | -0.099 |
ages | -0.092 |
lesson | -0.082 |
ies | -0.081 |
Token | make |
Token ID | 787 |
Feature activation | -3.45e-02 |
Pos logit contributions | |
ternally | +0.043 |
ivated | +0.043 |
Int | +0.042 |
unatt | +0.041 |
My | +0.041 |
Neg logit contributions | |
zag | -0.052 |
ages | -0.046 |
tr | -0.046 |
ine | -0.042 |
olving | -0.042 |
Token | the |
Token ID | 262 |
Feature activation | -4.06e-02 |
Pos logit contributions | |
unatt | +0.050 |
ivated | +0.049 |
ternally | +0.048 |
Leave | +0.046 |
Int | +0.046 |
Neg logit contributions | |
zag | -0.049 |
ages | -0.048 |
ained | -0.046 |
ward | -0.043 |
ine | -0.043 |
Token | boats |
Token ID | 16659 |
Feature activation | -2.55e-02 |
Pos logit contributions | |
ternally | +0.068 |
My | +0.066 |
Int | +0.066 |
ivated | +0.064 |
unatt | +0.062 |
Neg logit contributions | |
zag | -0.053 |
ained | -0.051 |
common | -0.049 |
lesson | -0.049 |
ages | -0.048 |
Token | go |
Token ID | 467 |
Feature activation | -1.65e-02 |
Pos logit contributions | |
My | +0.030 |
ternally | +0.030 |
Int | +0.029 |
ivated | +0.029 |
unatt | +0.028 |
Neg logit contributions | |
zag | -0.045 |
ages | -0.043 |
ained | -0.041 |
tr | -0.040 |
olving | -0.038 |
Token | . |
Token ID | 13 |
Feature activation | -6.93e-03 |
Pos logit contributions | |
ivated | +0.014 |
ternally | +0.013 |
My | +0.013 |
unatt | +0.013 |
en | +0.013 |
Neg logit contributions | |
zag | -0.019 |
ages | -0.019 |
ained | -0.018 |
ometer | -0.017 |
tr | -0.017 |
Token |
|
Token ID | 198 |
Feature activation | +9.45e-03 |
Pos logit contributions | |
ternally | +0.011 |
unatt | +0.011 |
ivated | +0.010 |
en | +0.010 |
My | +0.010 |
Neg logit contributions | |
ages | -0.010 |
zag | -0.010 |
ained | -0.009 |
tr | -0.009 |
ometer | -0.009 |
Token |
|
Token ID | 198 |
Feature activation | +1.21e-02 |
Pos logit contributions | |
ages | +0.017 |
zag | +0.017 |
tr | +0.016 |
ained | +0.016 |
ometer | +0.015 |
Neg logit contributions | |
My | -0.011 |
en | -0.011 |
ivated | -0.011 |
unatt | -0.010 |
I | -0.010 |
Token | " |
Token ID | 1 |
Feature activation | -1.84e-02 |
Pos logit contributions | |
zag | +0.019 |
ages | +0.019 |
tr | +0.018 |
ained | +0.018 |
ometer | +0.017 |
Neg logit contributions | |
ivated | -0.016 |
My | -0.016 |
en | -0.016 |
I | -0.016 |
unatt | -0.016 |
Token | How |
Token ID | 2437 |
Feature activation | +2.96e-02 |
Pos logit contributions | |
unatt | +0.026 |
ivated | +0.026 |
ternally | +0.026 |
alone | +0.025 |
en | +0.025 |
Neg logit contributions | |
tr | -0.026 |
zag | -0.026 |
ages | -0.026 |
ained | -0.025 |
After | -0.024 |
Token | can |
Token ID | 460 |
Feature activation | -7.98e-02 |
Pos logit contributions | |
ages | +0.051 |
zag | +0.048 |
tails | +0.048 |
igi | +0.044 |
ained | +0.044 |
Neg logit contributions | |
I | -0.040 |
en | -0.037 |
My | -0.037 |
alone | -0.034 |
unatt | -0.034 |
Token | we |
Token ID | 356 |
Feature activation | -3.07e-02 |
Pos logit contributions | |
unatt | +0.152 |
ivated | +0.144 |
lier | +0.136 |
vious | +0.136 |
ternally | +0.136 |
Neg logit contributions | |
ves | -0.084 |
zag | -0.080 |
ira | -0.071 |
common | -0.069 |
ages | -0.068 |
Token | get |
Token ID | 651 |
Feature activation | -2.55e-02 |
Pos logit contributions | |
ternally | +0.042 |
My | +0.042 |
unatt | +0.041 |
Int | +0.041 |
en | +0.040 |
Neg logit contributions | |
zag | -0.048 |
tr | -0.043 |
ages | -0.042 |
tails | -0.041 |
olving | -0.040 |
Token | the |
Token ID | 262 |
Feature activation | -1.54e-02 |
Pos logit contributions | |
ternally | +0.035 |
Int | +0.033 |
My | +0.033 |
ivated | +0.033 |
unatt | +0.032 |
Neg logit contributions | |
ages | -0.037 |
ained | -0.037 |
zag | -0.037 |
w | -0.034 |
ometer | -0.034 |
Token | jet |
Token ID | 12644 |
Feature activation | -1.11e-02 |
Pos logit contributions | |
ivated | +0.018 |
My | +0.018 |
ternally | +0.018 |
unatt | +0.018 |
Int | +0.017 |
Neg logit contributions | |
zag | -0.027 |
ages | -0.026 |
tr | -0.026 |
ained | -0.025 |
ometer | -0.025 |
Token | back |
Token ID | 736 |
Feature activation | +7.80e-03 |
Pos logit contributions | |
ivated | +0.013 |
ternally | +0.013 |
My | +0.013 |
en | +0.013 |
unatt | +0.013 |
Neg logit contributions | |
zag | -0.019 |
ages | -0.019 |
tr | -0.018 |
ained | -0.018 |
ometer | -0.017 |
Token | ." |
Token ID | 526 |
Feature activation | -1.14e-02 |
Pos logit contributions | |
zag | +0.002 |
ages | +0.002 |
tr | +0.002 |
ained | +0.002 |
ometer | +0.002 |
Neg logit contributions | |
ivated | -0.001 |
ternally | -0.001 |
unatt | -0.001 |
en | -0.001 |
My | -0.001 |
Token |
|
Token ID | 198 |
Feature activation | -1.20e-02 |
Pos logit contributions | |
unatt | +0.018 |
ternally | +0.017 |
ivated | +0.017 |
en | +0.016 |
My | +0.016 |
Neg logit contributions | |
zag | -0.016 |
ages | -0.016 |
ained | -0.015 |
tr | -0.015 |
tails | -0.015 |
Token |
|
Token ID | 198 |
Feature activation | -6.63e-03 |
Pos logit contributions | |
unatt | +0.019 |
ivated | +0.019 |
ternally | +0.018 |
en | +0.018 |
My | +0.017 |
Neg logit contributions | |
zag | -0.018 |
ages | -0.017 |
ained | -0.015 |
tr | -0.015 |
ometer | -0.015 |
Token | " |
Token ID | 1 |
Feature activation | -3.49e-02 |
Pos logit contributions | |
unatt | +0.010 |
ivated | +0.010 |
ternally | +0.010 |
en | +0.010 |
My | +0.010 |
Neg logit contributions | |
zag | -0.009 |
ages | -0.009 |
tr | -0.008 |
tails | -0.008 |
ained | -0.008 |
Token | How |
Token ID | 2437 |
Feature activation | +2.40e-02 |
Pos logit contributions | |
unatt | +0.051 |
ternally | +0.047 |
alone | +0.047 |
ivated | +0.043 |
ople | +0.042 |
Neg logit contributions | |
tr | -0.053 |
After | -0.052 |
zag | -0.052 |
ages | -0.051 |
Before | -0.049 |
Token | can |
Token ID | 460 |
Feature activation | -7.91e-02 |
Pos logit contributions | |
ages | +0.041 |
zag | +0.040 |
ained | +0.037 |
tails | +0.036 |
tr | +0.036 |
Neg logit contributions | |
I | -0.032 |
My | -0.031 |
en | -0.030 |
ternally | -0.028 |
me | -0.028 |
Token | you |
Token ID | 345 |
Feature activation | -1.99e-02 |
Pos logit contributions | |
unatt | +0.110 |
ivated | +0.097 |
ternally | +0.091 |
lier | +0.083 |
sidelines | +0.080 |
Neg logit contributions | |
ves | -0.126 |
ages | -0.124 |
ira | -0.123 |
zag | -0.122 |
tails | -0.121 |
Token | talk |
Token ID | 1561 |
Feature activation | -1.47e-02 |
Pos logit contributions | |
unatt | +0.027 |
ternally | +0.026 |
My | +0.025 |
ivated | +0.025 |
en | +0.025 |
Neg logit contributions | |
zag | -0.033 |
ages | -0.030 |
tr | -0.029 |
tails | -0.029 |
ine | -0.029 |
Token | to |
Token ID | 284 |
Feature activation | -4.72e-02 |
Pos logit contributions | |
ternally | +0.021 |
ivated | +0.021 |
unatt | +0.021 |
en | +0.020 |
My | +0.020 |
Neg logit contributions | |
zag | -0.022 |
ages | -0.021 |
tr | -0.021 |
ained | -0.020 |
ometer | -0.019 |
Token | a |
Token ID | 257 |
Feature activation | -2.17e-02 |
Pos logit contributions | |
ivated | +0.062 |
ternally | +0.058 |
unatt | +0.056 |
Int | +0.055 |
Leave | +0.053 |
Neg logit contributions | |
zag | -0.073 |
tails | -0.069 |
tr | -0.068 |
ages | -0.068 |
ine | -0.066 |
Token | bird |
Token ID | 6512 |
Feature activation | -1.96e-03 |
Pos logit contributions | |
ivated | +0.021 |
ternally | +0.020 |
unatt | +0.020 |
Int | +0.017 |
My | +0.017 |
Neg logit contributions | |
zag | -0.045 |
tr | -0.042 |
ages | -0.042 |
ometer | -0.041 |
ira | -0.039 |
Token | to |
Token ID | 284 |
Feature activation | -1.91e-02 |
Pos logit contributions | |
ternally | +0.023 |
ivated | +0.023 |
unatt | +0.022 |
My | +0.022 |
I | +0.020 |
Neg logit contributions | |
zag | -0.027 |
ages | -0.026 |
ained | -0.025 |
tr | -0.023 |
ine | -0.023 |
Token | think |
Token ID | 892 |
Feature activation | -1.91e-02 |
Pos logit contributions | |
My | +0.024 |
ternally | +0.024 |
unatt | +0.024 |
ivated | +0.023 |
folk | +0.021 |
Neg logit contributions | |
zag | -0.033 |
tr | -0.029 |
ained | -0.029 |
ages | -0.029 |
tails | -0.028 |
Token | . |
Token ID | 13 |
Feature activation | -1.07e-02 |
Pos logit contributions | |
ivated | +0.030 |
ternally | +0.029 |
unatt | +0.028 |
My | +0.026 |
en | +0.025 |
Neg logit contributions | |
zag | -0.028 |
ages | -0.027 |
tr | -0.026 |
ained | -0.026 |
ine | -0.026 |
Token | " |
Token ID | 366 |
Feature activation | -1.93e-02 |
Pos logit contributions | |
unatt | +0.015 |
ternally | +0.014 |
ivated | +0.014 |
en | +0.013 |
alone | +0.013 |
Neg logit contributions | |
ages | -0.018 |
zag | -0.017 |
ometer | -0.016 |
tr | -0.016 |
ained | -0.016 |
Token | How |
Token ID | 2437 |
Feature activation | +7.35e-03 |
Pos logit contributions | |
unatt | +0.020 |
ivated | +0.020 |
ternally | +0.020 |
en | +0.019 |
My | +0.019 |
Neg logit contributions | |
tr | -0.036 |
ages | -0.036 |
zag | -0.035 |
ained | -0.034 |
After | -0.032 |
Token | can |
Token ID | 460 |
Feature activation | -7.88e-02 |
Pos logit contributions | |
ages | +0.012 |
zag | +0.012 |
ained | +0.011 |
tr | +0.011 |
ometer | +0.010 |
Neg logit contributions | |
My | -0.010 |
en | -0.010 |
ternally | -0.010 |
unatt | -0.010 |
ivated | -0.010 |
Token | I |
Token ID | 314 |
Feature activation | -4.30e-02 |
Pos logit contributions | |
unatt | +0.106 |
ivated | +0.099 |
lier | +0.089 |
ternally | +0.088 |
joy | +0.083 |
Neg logit contributions | |
zag | -0.130 |
ages | -0.126 |
ves | -0.125 |
tails | -0.120 |
balls | -0.113 |
Token | roll |
Token ID | 4836 |
Feature activation | -2.28e-02 |
Pos logit contributions | |
unatt | +0.068 |
ternally | +0.068 |
ivated | +0.066 |
folk | +0.064 |
Int | +0.064 |
Neg logit contributions | |
zag | -0.065 |
tr | -0.050 |
ages | -0.050 |
tails | -0.048 |
ained | -0.048 |
Token | like |
Token ID | 588 |
Feature activation | -5.15e-02 |
Pos logit contributions | |
ivated | +0.028 |
ternally | +0.027 |
unatt | +0.027 |
Int | +0.026 |
en | +0.026 |
Neg logit contributions | |
zag | -0.038 |
ained | -0.037 |
ages | -0.036 |
tr | -0.035 |
w | -0.034 |
Token | the |
Token ID | 262 |
Feature activation | -3.09e-02 |
Pos logit contributions | |
ivated | +0.078 |
ternally | +0.076 |
outsider | +0.074 |
unatt | +0.074 |
Int | +0.070 |
Neg logit contributions | |
zag | -0.067 |
ward | -0.064 |
tr | -0.064 |
After | -0.062 |
ained | -0.061 |
Token | ball |
Token ID | 2613 |
Feature activation | -3.07e-02 |
Pos logit contributions | |
ternally | +0.052 |
unatt | +0.052 |
ivated | +0.051 |
Int | +0.048 |
My | +0.046 |
Neg logit contributions | |
zag | -0.042 |
ained | -0.039 |
ages | -0.035 |
tr | -0.035 |
lesson | -0.034 |
Token | Some |
Token ID | 2773 |
Feature activation | +2.25e-02 |
Pos logit contributions | |
unatt | +0.007 |
ivated | +0.007 |
ternally | +0.007 |
en | +0.006 |
alone | +0.006 |
Neg logit contributions | |
ages | -0.014 |
zag | -0.013 |
tr | -0.013 |
ained | -0.013 |
ometer | -0.012 |
Token | people |
Token ID | 661 |
Feature activation | +2.50e-02 |
Pos logit contributions | |
zag | +0.043 |
ages | +0.041 |
tr | +0.041 |
ained | +0.041 |
ccoli | +0.039 |
Neg logit contributions | |
I | -0.024 |
en | -0.023 |
My | -0.023 |
alone | -0.023 |
ternally | -0.022 |
Token | say |
Token ID | 910 |
Feature activation | +9.44e-03 |
Pos logit contributions | |
ages | +0.043 |
zag | +0.043 |
tr | +0.041 |
ained | +0.041 |
ometer | +0.039 |
Neg logit contributions | |
ivated | -0.030 |
en | -0.030 |
I | -0.029 |
alone | -0.029 |
My | -0.029 |
Token | that |
Token ID | 326 |
Feature activation | +2.02e-02 |
Pos logit contributions | |
zag | +0.017 |
ages | +0.017 |
ained | +0.016 |
tr | +0.016 |
ometer | +0.015 |
Neg logit contributions | |
My | -0.010 |
en | -0.010 |
ivated | -0.010 |
ternally | -0.010 |
unatt | -0.010 |
Token | re |
Token ID | 302 |
Feature activation | +3.69e-02 |
Pos logit contributions | |
zag | +0.036 |
ages | +0.035 |
ained | +0.033 |
tr | +0.033 |
olving | +0.031 |
Neg logit contributions | |
I | -0.024 |
My | -0.024 |
en | -0.024 |
unatt | -0.023 |
alone | -0.023 |
Token | ind |
Token ID | 521 |
Feature activation | +6.44e-02 |
Pos logit contributions | |
tr | +0.066 |
zag | +0.063 |
ained | +0.063 |
ages | +0.063 |
olving | +0.060 |
Neg logit contributions | |
ternally | -0.040 |
me | -0.037 |
unatt | -0.036 |
alone | -0.036 |
My | -0.035 |
Token | eer |
Token ID | 28153 |
Feature activation | +4.26e-02 |
Pos logit contributions | |
zag | +0.125 |
tr | +0.121 |
ained | +0.120 |
After | +0.119 |
live | +0.119 |
Neg logit contributions | |
me | -0.056 |
alone | -0.055 |
I | -0.054 |
My | -0.051 |
ard | -0.050 |
Token | can |
Token ID | 460 |
Feature activation | +7.41e-03 |
Pos logit contributions | |
zag | +0.070 |
ained | +0.068 |
tr | +0.067 |
ages | +0.067 |
ometer | +0.062 |
Neg logit contributions | |
I | -0.054 |
alone | -0.052 |
My | -0.052 |
en | -0.052 |
me | -0.051 |
Token | fly |
Token ID | 6129 |
Feature activation | +1.57e-02 |
Pos logit contributions | |
ages | +0.014 |
zag | +0.014 |
ained | +0.013 |
tr | +0.013 |
ometer | +0.012 |
Neg logit contributions | |
ivated | -0.008 |
My | -0.008 |
en | -0.008 |
unatt | -0.008 |
ternally | -0.008 |
Token | and |
Token ID | 290 |
Feature activation | +2.84e-02 |
Pos logit contributions | |
ages | +0.029 |
zag | +0.029 |
ained | +0.027 |
tr | +0.027 |
ometer | +0.026 |
Neg logit contributions | |
unatt | -0.017 |
alone | -0.017 |
en | -0.016 |
My | -0.016 |
ternally | -0.016 |
Token | help |
Token ID | 1037 |
Feature activation | -1.91e-02 |
Pos logit contributions | |
zag | +0.055 |
ages | +0.054 |
ained | +0.053 |
live | +0.050 |
ometer | +0.050 |
Neg logit contributions | |
I | -0.030 |
My | -0.029 |
en | -0.028 |
unatt | -0.027 |
alone | -0.027 |
Token | a |
Token ID | 257 |
Feature activation | +2.63e-03 |
Pos logit contributions | |
zag | +0.036 |
ages | +0.035 |
tr | +0.032 |
ained | +0.032 |
ometer | +0.032 |
Neg logit contributions | |
en | -0.023 |
I | -0.022 |
My | -0.022 |
alone | -0.021 |
me | -0.021 |
Token | toy |
Token ID | 13373 |
Feature activation | -1.06e-02 |
Pos logit contributions | |
ages | +0.004 |
zag | +0.004 |
tr | +0.004 |
ained | +0.004 |
ometer | +0.004 |
Neg logit contributions | |
ivated | -0.003 |
unatt | -0.003 |
ternally | -0.003 |
en | -0.003 |
My | -0.003 |
Token | . |
Token ID | 13 |
Feature activation | -3.40e-04 |
Pos logit contributions | |
ivated | +0.015 |
ternally | +0.015 |
unatt | +0.014 |
en | +0.014 |
My | +0.014 |
Neg logit contributions | |
zag | -0.017 |
ages | -0.016 |
tr | -0.016 |
ometer | -0.015 |
ained | -0.015 |
Token |
|
Token ID | 198 |
Feature activation | +1.85e-02 |
Pos logit contributions | |
ivated | +0.000 |
unatt | +0.000 |
ternally | +0.000 |
en | +0.000 |
My | +0.000 |
Neg logit contributions | |
ages | -0.001 |
zag | -0.001 |
tr | -0.001 |
ained | -0.001 |
ometer | -0.001 |
Token |
|
Token ID | 198 |
Feature activation | +3.05e-02 |
Pos logit contributions | |
ages | +0.036 |
zag | +0.036 |
tr | +0.034 |
ained | +0.034 |
tails | +0.033 |
Neg logit contributions | |
My | -0.018 |
en | -0.018 |
I | -0.018 |
alone | -0.018 |
me | -0.017 |
Token | M |
Token ID | 44 |
Feature activation | +5.96e-02 |
Pos logit contributions | |
ages | +0.054 |
zag | +0.053 |
ained | +0.052 |
ometers | +0.050 |
ometer | +0.050 |
Neg logit contributions | |
I | -0.038 |
My | -0.038 |
me | -0.037 |
My | -0.036 |
else | -0.035 |
Token | ia |
Token ID | 544 |
Feature activation | -1.67e-02 |
Pos logit contributions | |
ages | +0.087 |
ained | +0.082 |
tails | +0.080 |
ccoli | +0.077 |
ward | +0.077 |
Neg logit contributions | |
me | -0.089 |
My | -0.075 |
I | -0.075 |
ternally | -0.072 |
ement | -0.071 |
Token | sees |
Token ID | 7224 |
Feature activation | +7.09e-03 |
Pos logit contributions | |
unatt | +0.026 |
ivated | +0.026 |
ternally | +0.025 |
My | +0.025 |
en | +0.024 |
Neg logit contributions | |
zag | -0.024 |
ages | -0.023 |
tr | -0.022 |
ira | -0.021 |
ine | -0.021 |
Token | Fl |
Token ID | 1610 |
Feature activation | +2.06e-02 |
Pos logit contributions | |
ages | +0.013 |
zag | +0.013 |
tr | +0.012 |
ained | +0.012 |
ometer | +0.012 |
Neg logit contributions | |
en | -0.008 |
My | -0.008 |
ivated | -0.008 |
unatt | -0.007 |
ternally | -0.007 |
Token | uffy |
Token ID | 15352 |
Feature activation | +7.38e-03 |
Pos logit contributions | |
ages | +0.011 |
zag | +0.009 |
tr | +0.008 |
ained | +0.008 |
tails | +0.007 |
Neg logit contributions | |
ternally | -0.049 |
My | -0.048 |
ivated | -0.048 |
unatt | -0.047 |
I | -0.047 |
Token | . |
Token ID | 13 |
Feature activation | +4.76e-03 |
Pos logit contributions | |
ages | +0.013 |
zag | +0.013 |
ained | +0.012 |
tr | +0.012 |
ometer | +0.012 |
Neg logit contributions | |
ivated | -0.009 |
en | -0.009 |
unatt | -0.008 |
My | -0.008 |
ternally | -0.008 |
Token | " |
Token ID | 1 |
Feature activation | +2.06e-02 |
Pos logit contributions | |
ages | +0.003 |
zag | +0.003 |
tr | +0.003 |
ained | +0.003 |
ometer | +0.003 |
Neg logit contributions | |
ivated | -0.003 |
unatt | -0.003 |
ternally | -0.003 |
en | -0.003 |
My | -0.003 |
Token | I |
Token ID | 40 |
Feature activation | +3.90e-02 |
Pos logit contributions | |
zag | +0.045 |
ages | +0.044 |
ained | +0.042 |
olving | +0.041 |
tails | +0.041 |
Neg logit contributions | |
My | -0.017 |
I | -0.017 |
en | -0.017 |
ivated | -0.016 |
My | -0.015 |
Token | 'm |
Token ID | 1101 |
Feature activation | +1.01e-02 |
Pos logit contributions | |
zag | +0.071 |
ained | +0.068 |
ages | +0.067 |
tr | +0.067 |
ogo | +0.065 |
Neg logit contributions | |
en | -0.047 |
I | -0.047 |
ivated | -0.043 |
My | -0.043 |
especially | -0.042 |
Token | cooking |
Token ID | 10801 |
Feature activation | +3.51e-03 |
Pos logit contributions | |
zag | +0.016 |
ages | +0.016 |
tr | +0.015 |
ained | +0.015 |
ometer | +0.014 |
Neg logit contributions | |
en | -0.014 |
ivated | -0.014 |
My | -0.013 |
unatt | -0.013 |
I | -0.013 |
Token | some |
Token ID | 617 |
Feature activation | +2.08e-02 |
Pos logit contributions | |
zag | +0.005 |
ages | +0.005 |
tr | +0.005 |
ained | +0.005 |
ometer | +0.005 |
Neg logit contributions | |
ivated | -0.005 |
unatt | -0.005 |
ternally | -0.005 |
en | -0.005 |
My | -0.005 |
Token | soup |
Token ID | 17141 |
Feature activation | +5.91e-02 |
Pos logit contributions | |
zag | +0.034 |
ages | +0.032 |
tr | +0.031 |
ained | +0.030 |
ometer | +0.029 |
Neg logit contributions | |
en | -0.029 |
My | -0.028 |
I | -0.028 |
unatt | -0.027 |
me | -0.027 |
Token | for |
Token ID | 329 |
Feature activation | +5.23e-03 |
Pos logit contributions | |
zag | +0.109 |
ained | +0.108 |
tr | +0.102 |
olving | +0.100 |
ages | +0.100 |
Neg logit contributions | |
I | -0.066 |
me | -0.061 |
alone | -0.059 |
unatt | -0.058 |
My | -0.055 |
Token | lunch |
Token ID | 9965 |
Feature activation | +2.65e-02 |
Pos logit contributions | |
zag | +0.010 |
ages | +0.010 |
tr | +0.009 |
ained | +0.009 |
ometer | +0.009 |
Neg logit contributions | |
ivated | -0.006 |
ternally | -0.006 |
My | -0.006 |
unatt | -0.006 |
en | -0.006 |
Token | . |
Token ID | 13 |
Feature activation | +1.66e-02 |
Pos logit contributions | |
zag | +0.047 |
ages | +0.046 |
ained | +0.045 |
tr | +0.044 |
tails | +0.043 |
Neg logit contributions | |
I | -0.030 |
unatt | -0.029 |
alone | -0.028 |
My | -0.028 |
en | -0.028 |
Token | Do |
Token ID | 2141 |
Feature activation | +2.60e-02 |
Pos logit contributions | |
zag | +0.035 |
ages | +0.034 |
ained | +0.032 |
tr | +0.031 |
tails | +0.031 |
Neg logit contributions | |
My | -0.016 |
I | -0.015 |
en | -0.014 |
ivated | -0.014 |
ternally | -0.013 |
Token | you |
Token ID | 345 |
Feature activation | +8.21e-03 |
Pos logit contributions | |
zag | +0.076 |
ages | +0.075 |
tr | +0.073 |
ained | +0.073 |
ometer | +0.072 |
Neg logit contributions | |
I | -0.006 |
My | -0.005 |
me | -0.002 |
en | -0.001 |
you | -0.001 |
Token | to |
Token ID | 284 |
Feature activation | -2.05e-02 |
Pos logit contributions | |
zag | +0.036 |
ages | +0.036 |
tr | +0.034 |
ained | +0.034 |
tails | +0.033 |
Neg logit contributions | |
unatt | -0.015 |
alone | -0.014 |
en | -0.014 |
My | -0.014 |
I | -0.014 |
Token | Dad |
Token ID | 17415 |
Feature activation | +1.57e-02 |
Pos logit contributions | |
ivated | +0.027 |
unatt | +0.027 |
ternally | +0.027 |
en | +0.026 |
My | +0.026 |
Neg logit contributions | |
zag | -0.031 |
ages | -0.030 |
tails | -0.029 |
tr | -0.029 |
ine | -0.029 |
Token | . |
Token ID | 13 |
Feature activation | +6.85e-03 |
Pos logit contributions | |
zag | +0.030 |
ages | +0.029 |
ained | +0.028 |
tr | +0.028 |
ometer | +0.027 |
Neg logit contributions | |
unatt | -0.016 |
en | -0.016 |
alone | -0.016 |
ivated | -0.016 |
ternally | -0.016 |
Token | She |
Token ID | 1375 |
Feature activation | +1.00e-02 |
Pos logit contributions | |
zag | +0.013 |
ages | +0.013 |
ained | +0.012 |
tr | +0.012 |
ometer | +0.012 |
Neg logit contributions | |
My | -0.007 |
ivated | -0.007 |
en | -0.007 |
I | -0.007 |
unatt | -0.007 |
Token | learned |
Token ID | 4499 |
Feature activation | +3.55e-02 |
Pos logit contributions | |
zag | +0.018 |
ages | +0.018 |
ained | +0.017 |
tr | +0.016 |
ometer | +0.016 |
Neg logit contributions | |
en | -0.012 |
unatt | -0.011 |
ternally | -0.011 |
My | -0.011 |
ivated | -0.011 |
Token | a |
Token ID | 257 |
Feature activation | +6.31e-02 |
Pos logit contributions | |
zag | +0.078 |
ages | +0.076 |
ained | +0.071 |
ccoli | +0.071 |
ometer | +0.070 |
Neg logit contributions | |
me | -0.030 |
I | -0.030 |
en | -0.029 |
My | -0.029 |
back | -0.028 |
Token | lesson |
Token ID | 11483 |
Feature activation | +4.39e-02 |
Pos logit contributions | |
ained | +0.085 |
packs | +0.083 |
ages | +0.080 |
ops | +0.078 |
aked | +0.078 |
Neg logit contributions | |
en | -0.101 |
I | -0.098 |
back | -0.096 |
cheer | -0.095 |
calm | -0.094 |
Token | . |
Token ID | 13 |
Feature activation | +2.14e-02 |
Pos logit contributions | |
ccoli | +0.099 |
ages | +0.099 |
zag | +0.098 |
aj | +0.097 |
tr | +0.095 |
Neg logit contributions | |
alone | -0.035 |
I | -0.034 |
unatt | -0.028 |
back | -0.028 |
me | -0.027 |
Token | She |
Token ID | 1375 |
Feature activation | +3.19e-02 |
Pos logit contributions | |
zag | +0.047 |
ages | +0.046 |
tr | +0.042 |
ained | +0.042 |
tails | +0.041 |
Neg logit contributions | |
I | -0.019 |
My | -0.019 |
en | -0.017 |
Int | -0.017 |
ivated | -0.016 |
Token | was |
Token ID | 373 |
Feature activation | +1.24e-03 |
Pos logit contributions | |
zag | +0.063 |
ages | +0.060 |
ained | +0.059 |
ometer | +0.057 |
ometers | +0.057 |
Neg logit contributions | |
en | -0.033 |
me | -0.032 |
I | -0.031 |
alone | -0.030 |
ternally | -0.029 |
Token | obedient |
Token ID | 41652 |
Feature activation | -8.38e-03 |
Pos logit contributions | |
ages | +0.002 |
zag | +0.002 |
tr | +0.002 |
ained | +0.002 |
ometer | +0.002 |
Neg logit contributions | |
ivated | -0.002 |
ternally | -0.002 |
unatt | -0.002 |
My | -0.002 |
en | -0.002 |
Token | feeling |
Token ID | 4203 |
Feature activation | -3.39e-02 |
Pos logit contributions | |
ages | +0.023 |
zag | +0.023 |
tr | +0.022 |
ained | +0.021 |
ometer | +0.021 |
Neg logit contributions | |
ivated | -0.010 |
unatt | -0.010 |
en | -0.010 |
ternally | -0.010 |
My | -0.010 |
Token | very |
Token ID | 845 |
Feature activation | -1.20e-02 |
Pos logit contributions | |
ternally | +0.048 |
My | +0.048 |
ican | +0.047 |
ivated | +0.046 |
folk | +0.045 |
Neg logit contributions | |
ages | -0.049 |
ained | -0.047 |
zag | -0.042 |
ends | -0.042 |
ment | -0.041 |
Token | refreshed |
Token ID | 47193 |
Feature activation | +1.18e-02 |
Pos logit contributions | |
ternally | +0.014 |
My | +0.014 |
ivated | +0.014 |
unatt | +0.013 |
en | +0.013 |
Neg logit contributions | |
ages | -0.021 |
zag | -0.020 |
ained | -0.019 |
tr | -0.018 |
ometer | -0.018 |
Token | . |
Token ID | 13 |
Feature activation | +2.82e-02 |
Pos logit contributions | |
zag | +0.021 |
ages | +0.021 |
tr | +0.020 |
ained | +0.019 |
ometer | +0.019 |
Neg logit contributions | |
unatt | -0.013 |
ivated | -0.013 |
My | -0.013 |
en | -0.013 |
I | -0.013 |
Token | From |
Token ID | 3574 |
Feature activation | +2.82e-02 |
Pos logit contributions | |
zag | +0.056 |
ages | +0.054 |
tr | +0.052 |
ained | +0.051 |
olving | +0.051 |
Neg logit contributions | |
My | -0.029 |
I | -0.028 |
Int | -0.028 |
en | -0.027 |
me | -0.026 |
Token | then |
Token ID | 788 |
Feature activation | +7.14e-02 |
Pos logit contributions | |
ages | +0.060 |
zag | +0.058 |
tr | +0.055 |
ometer | +0.054 |
olving | +0.054 |
Neg logit contributions | |
me | -0.027 |
alone | -0.027 |
unatt | -0.026 |
I | -0.026 |
en | -0.025 |
Token | on |
Token ID | 319 |
Feature activation | +3.08e-02 |
Pos logit contributions | |
ccoli | +0.136 |
ometers | +0.133 |
ages | +0.133 |
common | +0.130 |
oni | +0.128 |
Neg logit contributions | |
alone | -0.080 |
I | -0.078 |
back | -0.076 |
My | -0.072 |
me | -0.070 |
Token | , |
Token ID | 11 |
Feature activation | +2.03e-02 |
Pos logit contributions | |
tr | +0.065 |
ages | +0.065 |
ained | +0.064 |
zag | +0.062 |
ometers | +0.060 |
Neg logit contributions | |
I | -0.029 |
me | -0.029 |
My | -0.028 |
alone | -0.028 |
back | -0.025 |
Token | every |
Token ID | 790 |
Feature activation | +1.83e-02 |
Pos logit contributions | |
ages | +0.038 |
zag | +0.038 |
tr | +0.037 |
ained | +0.035 |
ometer | +0.034 |
Neg logit contributions | |
en | -0.022 |
My | -0.022 |
I | -0.022 |
alone | -0.021 |
ivated | -0.021 |
Token | time |
Token ID | 640 |
Feature activation | +2.06e-02 |
Pos logit contributions | |
ages | +0.025 |
zag | +0.024 |
ometers | +0.022 |
ometer | +0.022 |
tr | +0.022 |
Neg logit contributions | |
unatt | -0.029 |
alone | -0.029 |
I | -0.029 |
en | -0.029 |
My | -0.029 |
Token | he |
Token ID | 339 |
Feature activation | +1.93e-02 |
Pos logit contributions | |
ages | +0.041 |
zag | +0.041 |
tr | +0.039 |
ained | +0.038 |
ometer | +0.037 |
Neg logit contributions | |
My | -0.020 |
I | -0.020 |
en | -0.019 |
alone | -0.019 |
ivated | -0.018 |
Token | he |
Token ID | 339 |
Feature activation | +9.20e-03 |
Pos logit contributions | |
ages | +0.097 |
zag | +0.097 |
ained | +0.093 |
tr | +0.092 |
ccoli | +0.090 |
Neg logit contributions | |
I | -0.037 |
en | -0.036 |
alone | -0.035 |
My | -0.034 |
anybody | -0.033 |
Token | heard |
Token ID | 2982 |
Feature activation | +1.23e-02 |
Pos logit contributions | |
ages | +0.017 |
zag | +0.017 |
ained | +0.016 |
tr | +0.016 |
ometer | +0.016 |
Neg logit contributions | |
unatt | -0.009 |
en | -0.009 |
ivated | -0.009 |
ternally | -0.009 |
My | -0.009 |
Token | a |
Token ID | 257 |
Feature activation | -6.51e-03 |
Pos logit contributions | |
zag | +0.020 |
ages | +0.020 |
ained | +0.018 |
tr | +0.018 |
ometer | +0.018 |
Neg logit contributions | |
en | -0.016 |
My | -0.016 |
ivated | -0.016 |
ternally | -0.016 |
unatt | -0.016 |
Token | loud |
Token ID | 7812 |
Feature activation | -6.38e-03 |
Pos logit contributions | |
unatt | +0.008 |
ivated | +0.008 |
ternally | +0.008 |
My | +0.008 |
en | +0.007 |
Neg logit contributions | |
zag | -0.011 |
ages | -0.011 |
tr | -0.010 |
ometer | -0.010 |
tails | -0.010 |
Token | cl |
Token ID | 537 |
Feature activation | -1.34e-02 |
Pos logit contributions | |
unatt | +0.008 |
ternally | +0.008 |
ivated | +0.008 |
My | +0.008 |
en | +0.007 |
Neg logit contributions | |
zag | -0.011 |
ages | -0.011 |
tr | -0.010 |
ained | -0.010 |
ometer | -0.010 |
Token | ap |
Token ID | 499 |
Feature activation | +5.29e-03 |
Pos logit contributions | |
unatt | +0.022 |
ternally | +0.022 |
My | +0.021 |
en | +0.021 |
ivated | +0.021 |
Neg logit contributions | |
zag | -0.018 |
ages | -0.018 |
tr | -0.016 |
ained | -0.016 |
ine | -0.016 |
Token | of |
Token ID | 286 |
Feature activation | -7.67e-03 |
Pos logit contributions | |
ages | +0.009 |
zag | +0.009 |
tr | +0.009 |
ained | +0.009 |
ometer | +0.009 |
Neg logit contributions | |
ivated | -0.006 |
unatt | -0.006 |
en | -0.006 |
ternally | -0.006 |
My | -0.006 |
Token | thunder |
Token ID | 18355 |
Feature activation | -2.29e-03 |
Pos logit contributions | |
My | +0.010 |
ivated | +0.010 |
ternally | +0.010 |
unatt | +0.010 |
I | +0.010 |
Neg logit contributions | |
ages | -0.012 |
zag | -0.012 |
tr | -0.011 |
ained | -0.011 |
olving | -0.010 |
Token | , |
Token ID | 11 |
Feature activation | +4.70e-03 |
Pos logit contributions | |
ternally | +0.003 |
ivated | +0.003 |
unatt | +0.003 |
My | +0.003 |
en | +0.003 |
Neg logit contributions | |
zag | -0.004 |
ages | -0.004 |
tr | -0.004 |
ained | -0.004 |
ometer | -0.003 |
Token | and |
Token ID | 290 |
Feature activation | +6.54e-03 |
Pos logit contributions | |
ages | +0.008 |
zag | +0.008 |
tr | +0.008 |
ained | +0.007 |
ometer | +0.007 |
Neg logit contributions | |
ivated | -0.006 |
en | -0.006 |
unatt | -0.006 |
ternally | -0.006 |
My | -0.006 |
Token | then |
Token ID | 788 |
Feature activation | +2.44e-02 |
Pos logit contributions | |
ages | +0.012 |
zag | +0.012 |
tr | +0.011 |
ained | +0.011 |
ometer | +0.011 |
Neg logit contributions | |
en | -0.007 |
ivated | -0.007 |
unatt | -0.007 |
ternally | -0.007 |
My | -0.007 |
Token | each |
Token ID | 1123 |
Feature activation | -9.07e-03 |
Pos logit contributions | |
ivated | +0.019 |
unatt | +0.018 |
ternally | +0.018 |
My | +0.018 |
en | +0.017 |
Neg logit contributions | |
ages | -0.017 |
zag | -0.016 |
tr | -0.016 |
tails | -0.015 |
ine | -0.015 |
Token | other |
Token ID | 584 |
Feature activation | +2.27e-02 |
Pos logit contributions | |
ivated | +0.011 |
unatt | +0.011 |
ternally | +0.010 |
en | +0.010 |
My | +0.010 |
Neg logit contributions | |
zag | -0.016 |
ages | -0.016 |
tr | -0.015 |
ained | -0.015 |
ine | -0.014 |
Token | . |
Token ID | 13 |
Feature activation | +3.83e-03 |
Pos logit contributions | |
zag | +0.043 |
ages | +0.043 |
ained | +0.042 |
tr | +0.040 |
live | +0.039 |
Neg logit contributions | |
alone | -0.023 |
unatt | -0.023 |
en | -0.022 |
My | -0.021 |
ivated | -0.021 |
Token | They |
Token ID | 1119 |
Feature activation | +2.35e-02 |
Pos logit contributions | |
zag | +0.005 |
ages | +0.005 |
tr | +0.004 |
ained | +0.004 |
ometer | +0.004 |
Neg logit contributions | |
ivated | -0.006 |
My | -0.006 |
en | -0.006 |
unatt | -0.006 |
ternally | -0.006 |
Token | make |
Token ID | 787 |
Feature activation | +6.61e-03 |
Pos logit contributions | |
ages | +0.046 |
zag | +0.045 |
ained | +0.044 |
ometer | +0.041 |
tr | +0.041 |
Neg logit contributions | |
en | -0.025 |
cheer | -0.024 |
ivated | -0.024 |
alone | -0.024 |
I | -0.023 |
Token | a |
Token ID | 257 |
Feature activation | +4.44e-02 |
Pos logit contributions | |
zag | +0.011 |
ages | +0.011 |
tr | +0.010 |
ained | +0.010 |
ometer | +0.010 |
Neg logit contributions | |
en | -0.008 |
ivated | -0.008 |
ternally | -0.008 |
unatt | -0.008 |
My | -0.008 |
Token | plan |
Token ID | 1410 |
Feature activation | +1.30e-02 |
Pos logit contributions | |
ages | +0.089 |
ained | +0.088 |
zag | +0.083 |
live | +0.081 |
ometers | +0.079 |
Neg logit contributions | |
cheer | -0.049 |
me | -0.048 |
back | -0.046 |
en | -0.046 |
alone | -0.043 |
Token | . |
Token ID | 13 |
Feature activation | +1.17e-02 |
Pos logit contributions | |
ages | +0.024 |
zag | +0.024 |
ained | +0.022 |
tr | +0.022 |
ometer | +0.022 |
Neg logit contributions | |
unatt | -0.014 |
alone | -0.014 |
en | -0.014 |
ivated | -0.013 |
My | -0.013 |
Token |
|
Token ID | 198 |
Feature activation | +1.80e-02 |
Pos logit contributions | |
zag | +0.017 |
ages | +0.017 |
ained | +0.015 |
tr | +0.015 |
tails | +0.015 |
Neg logit contributions | |
My | -0.018 |
I | -0.017 |
en | -0.017 |
ivated | -0.017 |
Int | -0.016 |
Token |
|
Token ID | 198 |
Feature activation | +2.67e-02 |
Pos logit contributions | |
zag | +0.035 |
ages | +0.035 |
tr | +0.033 |
ained | +0.033 |
ccoli | +0.032 |
Neg logit contributions | |
My | -0.017 |
I | -0.017 |
en | -0.017 |
ivated | -0.017 |
Int | -0.016 |
Token | They |
Token ID | 2990 |
Feature activation | +2.22e-02 |
Pos logit contributions | |
zag | +0.044 |
tr | +0.040 |
ages | +0.040 |
ometer | +0.040 |
ained | +0.040 |
Neg logit contributions | |
I | -0.037 |
Leave | -0.037 |
My | -0.037 |
ivated | -0.036 |
I | -0.036 |
Token | by |
Token ID | 416 |
Feature activation | +2.57e-02 |
Pos logit contributions | |
ages | +0.015 |
zag | +0.015 |
tr | +0.014 |
ained | +0.013 |
ccoli | +0.013 |
Neg logit contributions | |
unatt | -0.007 |
ivated | -0.007 |
en | -0.007 |
ternally | -0.007 |
My | -0.007 |
Token | her |
Token ID | 607 |
Feature activation | +1.86e-02 |
Pos logit contributions | |
ages | +0.053 |
zag | +0.050 |
ccoli | +0.049 |
tr | +0.047 |
ometer | +0.047 |
Neg logit contributions | |
alone | -0.026 |
unatt | -0.024 |
en | -0.024 |
I | -0.023 |
me | -0.023 |
Token | house |
Token ID | 2156 |
Feature activation | +2.61e-03 |
Pos logit contributions | |
ages | +0.037 |
zag | +0.036 |
ained | +0.035 |
ccoli | +0.034 |
tr | +0.033 |
Neg logit contributions | |
unatt | -0.017 |
My | -0.017 |
alone | -0.017 |
en | -0.017 |
I | -0.017 |
Token | . |
Token ID | 13 |
Feature activation | -2.94e-02 |
Pos logit contributions | |
ages | +0.004 |
zag | +0.004 |
tr | +0.004 |
ained | +0.004 |
ometer | +0.004 |
Neg logit contributions | |
ivated | -0.003 |
unatt | -0.003 |
ternally | -0.003 |
en | -0.003 |
My | -0.003 |
Token | It |
Token ID | 632 |
Feature activation | -3.80e-03 |
Pos logit contributions | |
ternally | +0.047 |
unatt | +0.047 |
ivated | +0.044 |
chie | +0.040 |
cheer | +0.040 |
Neg logit contributions | |
ages | -0.041 |
tr | -0.040 |
ometer | -0.039 |
zag | -0.038 |
After | -0.037 |
Token | was |
Token ID | 373 |
Feature activation | +2.19e-02 |
Pos logit contributions | |
ivated | +0.006 |
unatt | +0.005 |
ternally | +0.005 |
My | +0.005 |
en | +0.005 |
Neg logit contributions | |
zag | -0.006 |
ages | -0.006 |
tr | -0.005 |
ained | -0.005 |
ometer | -0.005 |
Token | carrying |
Token ID | 6872 |
Feature activation | +1.80e-02 |
Pos logit contributions | |
ages | +0.045 |
zag | +0.044 |
ccoli | +0.041 |
ometer | +0.040 |
uffy | +0.040 |
Neg logit contributions | |
en | -0.023 |
alone | -0.021 |
My | -0.021 |
I | -0.020 |
me | -0.020 |
Token | lots |
Token ID | 6041 |
Feature activation | -1.08e-02 |
Pos logit contributions | |
zag | +0.030 |
ages | +0.028 |
ained | +0.027 |
tr | +0.026 |
ccoli | +0.026 |
Neg logit contributions | |
en | -0.024 |
ternally | -0.023 |
My | -0.023 |
unatt | -0.023 |
alone | -0.023 |
Token | of |
Token ID | 286 |
Feature activation | +1.97e-02 |
Pos logit contributions | |
ivated | +0.012 |
ternally | +0.012 |
unatt | +0.011 |
My | +0.011 |
en | +0.011 |
Neg logit contributions | |
ages | -0.020 |
zag | -0.020 |
tr | -0.019 |
ained | -0.018 |
ometer | -0.018 |
Token | boxes |
Token ID | 10559 |
Feature activation | +1.63e-03 |
Pos logit contributions | |
zag | +0.031 |
ages | +0.029 |
tr | +0.028 |
ccoli | +0.027 |
ained | +0.027 |
Neg logit contributions | |
en | -0.029 |
alone | -0.027 |
ternally | -0.027 |
My | -0.027 |
me | -0.027 |
Token | . |
Token ID | 13 |
Feature activation | -1.94e-02 |
Pos logit contributions | |
ages | +0.003 |
zag | +0.003 |
tr | +0.002 |
ained | +0.002 |
ometer | +0.002 |
Neg logit contributions | |
ivated | -0.002 |
ternally | -0.002 |
unatt | -0.002 |
en | -0.002 |
My | -0.002 |
Token | and |
Token ID | 290 |
Feature activation | +3.93e-03 |
Pos logit contributions | |
ivated | +0.003 |
ternally | +0.003 |
My | +0.003 |
unatt | +0.003 |
en | +0.003 |
Neg logit contributions | |
ages | -0.003 |
zag | -0.003 |
ained | -0.003 |
tr | -0.003 |
ometer | -0.003 |
Token | cold |
Token ID | 4692 |
Feature activation | +1.34e-02 |
Pos logit contributions | |
ages | +0.007 |
zag | +0.007 |
tr | +0.007 |
ained | +0.007 |
ometer | +0.007 |
Neg logit contributions | |
ivated | -0.004 |
unatt | -0.004 |
en | -0.004 |
ternally | -0.004 |
My | -0.004 |
Token | and |
Token ID | 290 |
Feature activation | +7.94e-03 |
Pos logit contributions | |
zag | +0.024 |
ages | +0.024 |
tr | +0.023 |
ained | +0.022 |
ometer | +0.022 |
Neg logit contributions | |
unatt | -0.015 |
alone | -0.015 |
en | -0.014 |
I | -0.014 |
ivated | -0.014 |
Token | scared |
Token ID | 12008 |
Feature activation | -1.90e-02 |
Pos logit contributions | |
zag | +0.015 |
ages | +0.015 |
tr | +0.014 |
ained | +0.013 |
ometer | +0.013 |
Neg logit contributions | |
en | -0.009 |
unatt | -0.009 |
ivated | -0.008 |
My | -0.008 |
ternally | -0.008 |
Token | . |
Token ID | 13 |
Feature activation | +1.93e-03 |
Pos logit contributions | |
ivated | +0.030 |
ternally | +0.030 |
unatt | +0.028 |
My | +0.028 |
en | +0.027 |
Neg logit contributions | |
ages | -0.026 |
zag | -0.025 |
ained | -0.023 |
tr | -0.023 |
ment | -0.022 |
Token | He |
Token ID | 679 |
Feature activation | +1.37e-02 |
Pos logit contributions | |
ages | +0.003 |
zag | +0.003 |
tr | +0.003 |
ained | +0.003 |
ometer | +0.003 |
Neg logit contributions | |
ivated | -0.002 |
unatt | -0.002 |
ternally | -0.002 |
en | -0.002 |
My | -0.002 |
Token | me |
Token ID | 502 |
Feature activation | +4.24e-02 |
Pos logit contributions | |
zag | +0.022 |
ages | +0.022 |
ained | +0.020 |
tr | +0.020 |
ometer | +0.019 |
Neg logit contributions | |
en | -0.019 |
My | -0.018 |
I | -0.018 |
ternally | -0.018 |
ivated | -0.018 |
Token | ows |
Token ID | 1666 |
Feature activation | -6.92e-03 |
Pos logit contributions | |
ages | +0.044 |
oral | +0.041 |
zag | +0.038 |
ometer | +0.037 |
After | +0.037 |
Neg logit contributions | |
alone | -0.082 |
unatt | -0.076 |
cheer | -0.076 |
back | -0.075 |
en | -0.075 |
Token | loudly |
Token ID | 23112 |
Feature activation | +1.76e-02 |
Pos logit contributions | |
ivated | +0.009 |
unatt | +0.009 |
ternally | +0.009 |
My | +0.009 |
en | +0.009 |
Neg logit contributions | |
zag | -0.011 |
ages | -0.011 |
ained | -0.010 |
tr | -0.010 |
ometer | -0.009 |
Token | . |
Token ID | 13 |
Feature activation | +1.28e-03 |
Pos logit contributions | |
zag | +0.032 |
ages | +0.031 |
ained | +0.030 |
tr | +0.029 |
ometer | +0.029 |
Neg logit contributions | |
My | -0.020 |
en | -0.020 |
I | -0.019 |
alone | -0.019 |
unatt | -0.019 |
Token |
|
Token ID | 198 |
Feature activation | +1.88e-02 |
Pos logit contributions | |
ages | +0.002 |
zag | +0.002 |
tr | +0.002 |
ained | +0.002 |
ometer | +0.002 |
Neg logit contributions | |
ivated | -0.002 |
ternally | -0.002 |
unatt | -0.002 |
en | -0.002 |
My | -0.002 |
Token | to |
Token ID | 284 |
Feature activation | -5.22e-03 |
Pos logit contributions | |
ivated | +0.008 |
ternally | +0.008 |
unatt | +0.008 |
My | +0.008 |
en | +0.008 |
Neg logit contributions | |
ages | -0.012 |
zag | -0.012 |
tr | -0.011 |
ained | -0.011 |
ometer | -0.011 |
Token | play |
Token ID | 711 |
Feature activation | -2.34e-02 |
Pos logit contributions | |
unatt | +0.006 |
ternally | +0.006 |
ivated | +0.006 |
en | +0.005 |
My | +0.005 |
Neg logit contributions | |
zag | -0.010 |
ages | -0.010 |
tr | -0.009 |
ained | -0.009 |
tails | -0.009 |
Token | with |
Token ID | 351 |
Feature activation | -5.32e-03 |
Pos logit contributions | |
ternally | +0.029 |
ivated | +0.028 |
unatt | +0.028 |
en | +0.027 |
folk | +0.026 |
Neg logit contributions | |
zag | -0.040 |
ages | -0.039 |
tr | -0.037 |
ained | -0.035 |
tails | -0.033 |
Token | his |
Token ID | 465 |
Feature activation | +1.35e-02 |
Pos logit contributions | |
ivated | +0.007 |
unatt | +0.007 |
ternally | +0.007 |
My | +0.007 |
en | +0.007 |
Neg logit contributions | |
ages | -0.009 |
zag | -0.008 |
tr | -0.008 |
ained | -0.008 |
tails | -0.008 |
Token | toy |
Token ID | 13373 |
Feature activation | -8.79e-03 |
Pos logit contributions | |
ages | +0.021 |
zag | +0.021 |
ained | +0.019 |
tr | +0.019 |
ine | +0.018 |
Neg logit contributions | |
en | -0.019 |
ternally | -0.018 |
My | -0.018 |
unatt | -0.018 |
alone | -0.018 |
Token | planes |
Token ID | 13016 |
Feature activation | +6.00e-03 |
Pos logit contributions | |
ivated | +0.012 |
ternally | +0.012 |
en | +0.012 |
unatt | +0.012 |
My | +0.011 |
Neg logit contributions | |
zag | -0.014 |
ages | -0.014 |
tr | -0.013 |
ometer | -0.013 |
ained | -0.013 |
Token | , |
Token ID | 11 |
Feature activation | -2.45e-02 |
Pos logit contributions | |
ages | +0.010 |
zag | +0.010 |
tr | +0.010 |
ained | +0.010 |
ometer | +0.009 |
Neg logit contributions | |
unatt | -0.007 |
ivated | -0.007 |
en | -0.007 |
ternally | -0.007 |
My | -0.007 |
Token | especially |
Token ID | 2592 |
Feature activation | +1.82e-02 |
Pos logit contributions | |
ternally | +0.030 |
unatt | +0.029 |
ivated | +0.029 |
My | +0.026 |
vious | +0.026 |
Neg logit contributions | |
zag | -0.042 |
ages | -0.039 |
tr | -0.039 |
ahs | -0.038 |
ine | -0.038 |
Token | his |
Token ID | 465 |
Feature activation | -3.79e-03 |
Pos logit contributions | |
zag | +0.034 |
ages | +0.034 |
tr | +0.032 |
ained | +0.031 |
ometer | +0.031 |
Neg logit contributions | |
en | -0.019 |
My | -0.019 |
alone | -0.018 |
unatt | -0.018 |
I | -0.018 |
Token | jet |
Token ID | 12644 |
Feature activation | -2.46e-02 |
Pos logit contributions | |
ivated | +0.005 |
unatt | +0.005 |
ternally | +0.005 |
My | +0.005 |
en | +0.005 |
Neg logit contributions | |
zag | -0.006 |
ages | -0.006 |
tr | -0.006 |
ained | -0.006 |
ometer | -0.006 |
Token | . |
Token ID | 13 |
Feature activation | -1.72e-02 |
Pos logit contributions | |
ivated | +0.038 |
ternally | +0.037 |
My | +0.037 |
Int | +0.036 |
unatt | +0.036 |
Neg logit contributions | |
zag | -0.039 |
ages | -0.036 |
tr | -0.035 |
tails | -0.033 |
ained | -0.032 |
Token | Her |
Token ID | 2332 |
Feature activation | +1.49e-02 |
Pos logit contributions | |
ternally | +0.006 |
unatt | +0.006 |
ivated | +0.006 |
en | +0.005 |
My | +0.005 |
Neg logit contributions | |
ages | -0.011 |
zag | -0.011 |
tr | -0.010 |
ained | -0.010 |
ometer | -0.010 |
Token | mom |
Token ID | 1995 |
Feature activation | -1.53e-02 |
Pos logit contributions | |
zag | +0.028 |
ages | +0.028 |
ained | +0.026 |
tr | +0.025 |
ometer | +0.025 |
Neg logit contributions | |
en | -0.016 |
unatt | -0.015 |
My | -0.015 |
alone | -0.015 |
ternally | -0.015 |
Token | told |
Token ID | 1297 |
Feature activation | -1.21e-02 |
Pos logit contributions | |
unatt | +0.024 |
ivated | +0.023 |
My | +0.023 |
ternally | +0.022 |
en | +0.022 |
Neg logit contributions | |
ages | -0.022 |
zag | -0.021 |
tr | -0.020 |
ine | -0.019 |
tails | -0.019 |
Token | her |
Token ID | 607 |
Feature activation | +4.66e-02 |
Pos logit contributions | |
unatt | +0.016 |
ivated | +0.016 |
My | +0.015 |
ternally | +0.015 |
en | +0.015 |
Neg logit contributions | |
ages | -0.019 |
zag | -0.018 |
ained | -0.018 |
ine | -0.017 |
tr | -0.017 |
Token | that |
Token ID | 326 |
Feature activation | +1.05e-03 |
Pos logit contributions | |
zag | +0.100 |
ained | +0.097 |
tr | +0.095 |
ogo | +0.092 |
agons | +0.092 |
Neg logit contributions | |
I | -0.045 |
en | -0.041 |
My | -0.039 |
alone | -0.037 |
back | -0.036 |
Token | she |
Token ID | 673 |
Feature activation | +8.89e-04 |
Pos logit contributions | |
ages | +0.002 |
zag | +0.002 |
tr | +0.002 |
ained | +0.002 |
ometer | +0.002 |
Neg logit contributions | |
ivated | -0.001 |
ternally | -0.001 |
unatt | -0.001 |
My | -0.001 |
en | -0.001 |
Token | needed |
Token ID | 2622 |
Feature activation | -6.65e-03 |
Pos logit contributions | |
ages | +0.001 |
zag | +0.001 |
tr | +0.001 |
ained | +0.001 |
ometer | +0.001 |
Neg logit contributions | |
ivated | -0.001 |
unatt | -0.001 |
ternally | -0.001 |
My | -0.001 |
en | -0.001 |
Token | to |
Token ID | 284 |
Feature activation | +8.05e-03 |
Pos logit contributions | |
ivated | +0.008 |
ternally | +0.008 |
unatt | +0.008 |
en | +0.008 |
My | +0.008 |
Neg logit contributions | |
ages | -0.011 |
zag | -0.011 |
tr | -0.010 |
ained | -0.010 |
ometer | -0.010 |
Token | answer |
Token ID | 3280 |
Feature activation | +4.98e-03 |
Pos logit contributions | |
ages | +0.016 |
zag | +0.016 |
ained | +0.015 |
tr | +0.015 |
ometer | +0.015 |
Neg logit contributions | |
ivated | -0.007 |
en | -0.007 |
ternally | -0.007 |
unatt | -0.007 |
My | -0.007 |
Token | for |
Token ID | 329 |
Feature activation | -5.60e-03 |
Pos logit contributions | |
zag | +0.009 |
ages | +0.009 |
tr | +0.008 |
ained | +0.008 |
ometer | +0.008 |
Neg logit contributions | |
ivated | -0.006 |
unatt | -0.006 |
ternally | -0.005 |
en | -0.005 |
My | -0.005 |
Token | what |
Token ID | 644 |
Feature activation | -2.47e-02 |
Pos logit contributions | |
ivated | +0.007 |
ternally | +0.006 |
My | +0.006 |
en | +0.006 |
unatt | +0.006 |
Neg logit contributions | |
ages | -0.010 |
zag | -0.010 |
tr | -0.009 |
ained | -0.009 |
tails | -0.009 |
Token | troubled |
Token ID | 17840 |
Feature activation | +2.65e-02 |
Pos logit contributions | |
ages | +0.025 |
zag | +0.025 |
ained | +0.023 |
ccoli | +0.022 |
tr | +0.022 |
Neg logit contributions | |
en | -0.026 |
unatt | -0.025 |
alone | -0.025 |
me | -0.024 |
My | -0.024 |
Token | pine |
Token ID | 20161 |
Feature activation | +4.07e-03 |
Pos logit contributions | |
ages | +0.040 |
zag | +0.038 |
ccoli | +0.036 |
ained | +0.036 |
tr | +0.035 |
Neg logit contributions | |
en | -0.039 |
me | -0.039 |
alone | -0.038 |
My | -0.038 |
I | -0.038 |
Token | tree |
Token ID | 5509 |
Feature activation | +1.17e-02 |
Pos logit contributions | |
ages | +0.009 |
zag | +0.009 |
tr | +0.008 |
ained | +0.008 |
ometer | +0.008 |
Neg logit contributions | |
ivated | -0.003 |
unatt | -0.003 |
en | -0.003 |
ternally | -0.003 |
My | -0.003 |
Token | and |
Token ID | 290 |
Feature activation | +2.56e-03 |
Pos logit contributions | |
ages | +0.021 |
zag | +0.021 |
ained | +0.019 |
tr | +0.019 |
ometer | +0.019 |
Neg logit contributions | |
en | -0.014 |
unatt | -0.013 |
alone | -0.013 |
ivated | -0.013 |
My | -0.013 |
Token | wanted |
Token ID | 2227 |
Feature activation | -2.44e-02 |
Pos logit contributions | |
ages | +0.005 |
zag | +0.005 |
tr | +0.005 |
ained | +0.004 |
ometer | +0.004 |
Neg logit contributions | |
ivated | -0.003 |
unatt | -0.003 |
ternally | -0.003 |
en | -0.003 |
My | -0.003 |
Token | to |
Token ID | 284 |
Feature activation | -4.05e-02 |
Pos logit contributions | |
ivated | +0.042 |
ternally | +0.040 |
unatt | +0.037 |
en | +0.037 |
folk | +0.036 |
Neg logit contributions | |
ages | -0.033 |
zag | -0.031 |
tr | -0.029 |
lesson | -0.028 |
ometer | -0.028 |
Token | help |
Token ID | 1037 |
Feature activation | -3.10e-02 |
Pos logit contributions | |
ivated | +0.060 |
unatt | +0.057 |
ternally | +0.056 |
folk | +0.056 |
ard | +0.054 |
Neg logit contributions | |
zag | -0.060 |
ages | -0.058 |
tr | -0.055 |
tails | -0.054 |
lesson | -0.052 |
Token | . |
Token ID | 13 |
Feature activation | -1.51e-02 |
Pos logit contributions | |
ternally | +0.049 |
ivated | +0.048 |
unatt | +0.046 |
Int | +0.044 |
Leave | +0.043 |
Neg logit contributions | |
ages | -0.042 |
zag | -0.041 |
tails | -0.040 |
ies | -0.038 |
ine | -0.038 |
Token | The |
Token ID | 383 |
Feature activation | -9.04e-04 |
Pos logit contributions | |
unatt | +0.017 |
ivated | +0.016 |
ternally | +0.016 |
en | +0.014 |
Leave | +0.013 |
Neg logit contributions | |
ages | -0.029 |
zag | -0.028 |
ometer | -0.027 |
tr | -0.027 |
ccoli | -0.026 |
Token | bird |
Token ID | 6512 |
Feature activation | +7.21e-03 |
Pos logit contributions | |
ivated | +0.001 |
ternally | +0.001 |
unatt | +0.001 |
My | +0.001 |
en | +0.001 |
Neg logit contributions | |
zag | -0.002 |
ages | -0.002 |
tr | -0.002 |
ained | -0.002 |
ometer | -0.002 |
Token | told |
Token ID | 1297 |
Feature activation | -1.41e-03 |
Pos logit contributions | |
ages | +0.013 |
zag | +0.013 |
ained | +0.012 |
tr | +0.012 |
ometer | +0.012 |
Neg logit contributions | |
en | -0.008 |
ternally | -0.008 |
ivated | -0.008 |
unatt | -0.008 |
My | -0.008 |
Token | she |
Token ID | 673 |
Feature activation | -1.18e-02 |
Pos logit contributions | |
ages | +0.005 |
zag | +0.005 |
tr | +0.005 |
ained | +0.005 |
ometer | +0.005 |
Neg logit contributions | |
ivated | -0.003 |
unatt | -0.003 |
ternally | -0.003 |
en | -0.003 |
My | -0.003 |
Token | had |
Token ID | 550 |
Feature activation | -2.26e-02 |
Pos logit contributions | |
ivated | +0.020 |
ternally | +0.019 |
unatt | +0.019 |
en | +0.019 |
My | +0.019 |
Neg logit contributions | |
ages | -0.015 |
zag | -0.015 |
tr | -0.014 |
ained | -0.013 |
ccoli | -0.013 |
Token | been |
Token ID | 587 |
Feature activation | -3.62e-02 |
Pos logit contributions | |
ivated | +0.039 |
ternally | +0.038 |
My | +0.036 |
Int | +0.036 |
unatt | +0.036 |
Neg logit contributions | |
ages | -0.029 |
zag | -0.027 |
tr | -0.026 |
ained | -0.025 |
ccoli | -0.024 |
Token | a |
Token ID | 257 |
Feature activation | -3.05e-02 |
Pos logit contributions | |
Int | +0.055 |
ard | +0.053 |
ivated | +0.053 |
ternally | +0.053 |
My | +0.050 |
Neg logit contributions | |
ages | -0.053 |
ained | -0.045 |
ends | -0.045 |
zag | -0.044 |
tails | -0.042 |
Token | better |
Token ID | 1365 |
Feature activation | +2.78e-02 |
Pos logit contributions | |
ivated | +0.061 |
Int | +0.058 |
ternally | +0.056 |
My | +0.056 |
folk | +0.055 |
Neg logit contributions | |
zag | -0.031 |
ages | -0.029 |
tr | -0.028 |
lesson | -0.027 |
ccoli | -0.026 |
Token | friend |
Token ID | 1545 |
Feature activation | -1.83e-02 |
Pos logit contributions | |
ages | +0.050 |
zag | +0.048 |
ained | +0.048 |
tr | +0.046 |
ometers | +0.046 |
Neg logit contributions | |
en | -0.032 |
alone | -0.032 |
I | -0.031 |
unatt | -0.030 |
back | -0.029 |
Token | . |
Token ID | 13 |
Feature activation | +7.41e-03 |
Pos logit contributions | |
ivated | +0.027 |
ternally | +0.026 |
en | +0.025 |
Int | +0.025 |
My | +0.024 |
Neg logit contributions | |
ages | -0.028 |
zag | -0.028 |
tr | -0.026 |
ained | -0.025 |
ine | -0.024 |
Token | From |
Token ID | 3574 |
Feature activation | +1.06e-02 |
Pos logit contributions | |
zag | +0.013 |
ages | +0.013 |
ained | +0.012 |
tr | +0.012 |
ometer | +0.011 |
Neg logit contributions | |
My | -0.009 |
en | -0.009 |
ivated | -0.009 |
unatt | -0.009 |
ternally | -0.009 |
Token | that |
Token ID | 326 |
Feature activation | +7.06e-03 |
Pos logit contributions | |
ages | +0.020 |
zag | +0.020 |
tr | +0.018 |
ained | +0.018 |
ometer | +0.018 |
Neg logit contributions | |
unatt | -0.012 |
ivated | -0.012 |
en | -0.012 |
alone | -0.011 |
My | -0.011 |
Token | day |
Token ID | 1110 |
Feature activation | +3.14e-02 |
Pos logit contributions | |
ages | +0.008 |
zag | +0.008 |
tr | +0.007 |
ained | +0.007 |
ometer | +0.007 |
Neg logit contributions | |
en | -0.012 |
unatt | -0.012 |
My | -0.012 |
ivated | -0.012 |
ternally | -0.012 |
Token | on |
Token ID | 319 |
Feature activation | +1.02e-02 |
Pos logit contributions | |
zag | +0.045 |
ages | +0.045 |
ccoli | +0.044 |
tr | +0.042 |
ometer | +0.040 |
Neg logit contributions | |
I | -0.047 |
alone | -0.047 |
My | -0.047 |
ivated | -0.046 |
unatt | -0.046 |
Token | checking |
Token ID | 10627 |
Feature activation | +1.31e-02 |
Pos logit contributions | |
ternally | +0.015 |
My | +0.015 |
unatt | +0.015 |
ivated | +0.015 |
en | +0.014 |
Neg logit contributions | |
zag | -0.021 |
ages | -0.021 |
ained | -0.019 |
tr | -0.019 |
ine | -0.018 |
Token | out |
Token ID | 503 |
Feature activation | +1.39e-02 |
Pos logit contributions | |
ages | +0.024 |
zag | +0.024 |
tr | +0.022 |
ained | +0.022 |
ccoli | +0.021 |
Neg logit contributions | |
unatt | -0.015 |
My | -0.015 |
alone | -0.015 |
ternally | -0.015 |
en | -0.015 |
Token | the |
Token ID | 262 |
Feature activation | +7.07e-03 |
Pos logit contributions | |
ages | +0.026 |
zag | +0.026 |
tr | +0.024 |
ained | +0.024 |
ometer | +0.024 |
Neg logit contributions | |
en | -0.015 |
ivated | -0.014 |
unatt | -0.014 |
My | -0.014 |
ternally | -0.014 |
Token | winter |
Token ID | 7374 |
Feature activation | +1.31e-02 |
Pos logit contributions | |
ages | +0.013 |
zag | +0.013 |
ained | +0.012 |
tr | +0.012 |
ccoli | +0.011 |
Neg logit contributions | |
unatt | -0.008 |
en | -0.008 |
ternally | -0.008 |
ivated | -0.008 |
My | -0.008 |
Token | scenery |
Token ID | 31068 |
Feature activation | -1.34e-02 |
Pos logit contributions | |
ages | +0.023 |
zag | +0.023 |
ained | +0.021 |
tr | +0.021 |
ccoli | +0.021 |
Neg logit contributions | |
unatt | -0.015 |
My | -0.015 |
I | -0.014 |
en | -0.014 |
ivated | -0.014 |
Token | . |
Token ID | 13 |
Feature activation | -3.23e-03 |
Pos logit contributions | |
ternally | +0.019 |
My | +0.019 |
ivated | +0.018 |
en | +0.018 |
unatt | +0.018 |
Neg logit contributions | |
zag | -0.021 |
ages | -0.021 |
tr | -0.019 |
ained | -0.019 |
ometer | -0.018 |
Token | Suddenly |
Token ID | 24975 |
Feature activation | +3.18e-02 |
Pos logit contributions | |
unatt | +0.004 |
ternally | +0.004 |
ivated | +0.004 |
en | +0.004 |
My | +0.004 |
Neg logit contributions | |
ages | -0.005 |
zag | -0.005 |
tr | -0.005 |
ained | -0.005 |
ometer | -0.005 |
Token | , |
Token ID | 11 |
Feature activation | +3.34e-02 |
Pos logit contributions | |
ages | +0.062 |
zag | +0.061 |
ained | +0.059 |
tr | +0.058 |
ccoli | +0.057 |
Neg logit contributions | |
I | -0.034 |
My | -0.033 |
me | -0.033 |
alone | -0.032 |
en | -0.032 |
Token | she |
Token ID | 673 |
Feature activation | +9.51e-03 |
Pos logit contributions | |
ages | +0.077 |
zag | +0.076 |
ccoli | +0.071 |
tr | +0.071 |
ained | +0.070 |
Neg logit contributions | |
I | -0.027 |
en | -0.026 |
alone | -0.025 |
ivated | -0.025 |
My | -0.025 |
Token | slipped |
Token ID | 18859 |
Feature activation | +2.33e-02 |
Pos logit contributions | |
ages | +0.019 |
zag | +0.019 |
ained | +0.017 |
tr | +0.017 |
ometer | +0.017 |
Neg logit contributions | |
unatt | -0.009 |
ivated | -0.009 |
en | -0.009 |
ternally | -0.009 |
My | -0.009 |
Token | and |
Token ID | 290 |
Feature activation | -8.58e-04 |
Pos logit contributions | |
ages | +0.045 |
zag | +0.042 |
ccoli | +0.040 |
ometer | +0.040 |
ometers | +0.040 |
Neg logit contributions | |
alone | -0.025 |
unatt | -0.025 |
me | -0.025 |
en | -0.025 |
My | -0.024 |
Token | the |
Token ID | 262 |
Feature activation | +2.82e-03 |
Pos logit contributions | |
unatt | +0.010 |
ternally | +0.010 |
ivated | +0.010 |
My | +0.010 |
en | +0.010 |
Neg logit contributions | |
ages | -0.014 |
zag | -0.014 |
ained | -0.013 |
tr | -0.013 |
ccoli | -0.013 |
Token | doctor |
Token ID | 6253 |
Feature activation | -1.24e-05 |
Pos logit contributions | |
ages | +0.005 |
zag | +0.005 |
tr | +0.005 |
ained | +0.005 |
ometer | +0.005 |
Neg logit contributions | |
ivated | -0.003 |
unatt | -0.003 |
ternally | -0.003 |
en | -0.003 |
My | -0.003 |
Token | was |
Token ID | 373 |
Feature activation | -1.43e-02 |
Pos logit contributions | |
ivated | +0.000 |
unatt | +0.000 |
ternally | +0.000 |
My | +0.000 |
en | +0.000 |
Neg logit contributions | |
ages | -0.000 |
zag | -0.000 |
tr | -0.000 |
ained | -0.000 |
ometer | -0.000 |
Token | very |
Token ID | 845 |
Feature activation | -2.92e-03 |
Pos logit contributions | |
ternally | +0.017 |
My | +0.017 |
unatt | +0.017 |
ivated | +0.017 |
I | +0.016 |
Neg logit contributions | |
zag | -0.025 |
ages | -0.024 |
ained | -0.022 |
tr | -0.022 |
ine | -0.022 |
Token | reassuring |
Token ID | 36450 |
Feature activation | -5.63e-05 |
Pos logit contributions | |
unatt | +0.003 |
ivated | +0.003 |
ternally | +0.003 |
My | +0.003 |
en | +0.003 |
Neg logit contributions | |
ages | -0.006 |
zag | -0.006 |
tr | -0.005 |
ained | -0.005 |
ometer | -0.005 |
Token | and |
Token ID | 290 |
Feature activation | +4.28e-03 |
Pos logit contributions | |
ivated | +0.000 |
unatt | +0.000 |
ternally | +0.000 |
en | +0.000 |
My | +0.000 |
Neg logit contributions | |
ages | -0.000 |
zag | -0.000 |
tr | -0.000 |
ained | -0.000 |
ometer | -0.000 |
Token | patient |
Token ID | 5827 |
Feature activation | +8.31e-03 |
Pos logit contributions | |
ages | +0.007 |
zag | +0.007 |
tr | +0.007 |
ained | +0.007 |
ometer | +0.007 |
Neg logit contributions | |
ivated | -0.005 |
ternally | -0.005 |
unatt | -0.005 |
en | -0.005 |
My | -0.005 |
Token | . |
Token ID | 13 |
Feature activation | -2.03e-02 |
Pos logit contributions | |
zag | +0.014 |
ages | +0.014 |
tr | +0.013 |
ained | +0.013 |
ometer | +0.013 |
Neg logit contributions | |
unatt | -0.010 |
ivated | -0.010 |
en | -0.010 |
ternally | -0.010 |
My | -0.010 |
Token | |
Token ID | 220 |
Feature activation | +2.61e-02 |
Pos logit contributions | |
unatt | +0.030 |
ternally | +0.029 |
ivated | +0.028 |
en | +0.026 |
alone | +0.026 |
Neg logit contributions | |
ages | -0.030 |
zag | -0.029 |
ained | -0.028 |
ccoli | -0.027 |
ometer | -0.027 |
Token |
|
Token ID | 198 |
Feature activation | -1.68e-03 |
Pos logit contributions | |
ages | +0.049 |
zag | +0.049 |
tr | +0.046 |
tails | +0.045 |
ained | +0.045 |
Neg logit contributions | |
My | -0.027 |
I | -0.026 |
en | -0.026 |
ivated | -0.025 |
Int | -0.025 |
Token |
|
Token ID | 198 |
Feature activation | -1.91e-03 |
Pos logit contributions | |
unatt | +0.002 |
ivated | +0.002 |
ternally | +0.002 |
en | +0.002 |
My | +0.002 |
Neg logit contributions | |
ages | -0.003 |
zag | -0.003 |
ained | -0.002 |
tr | -0.002 |
ometer | -0.002 |
Token | , |
Token ID | 11 |
Feature activation | -1.61e-02 |
Pos logit contributions | |
ages | +0.015 |
zag | +0.015 |
ained | +0.014 |
tr | +0.014 |
ometer | +0.013 |
Neg logit contributions | |
My | -0.012 |
unatt | -0.012 |
ternally | -0.012 |
en | -0.012 |
ivated | -0.012 |
Token | I |
Token ID | 314 |
Feature activation | +2.36e-02 |
Pos logit contributions | |
ivated | +0.018 |
ternally | +0.018 |
unatt | +0.017 |
en | +0.017 |
Int | +0.016 |
Neg logit contributions | |
tr | -0.029 |
ages | -0.029 |
zag | -0.028 |
ccoli | -0.027 |
ometer | -0.027 |
Token | am |
Token ID | 716 |
Feature activation | -2.11e-02 |
Pos logit contributions | |
zag | +0.047 |
ages | +0.047 |
ained | +0.043 |
tr | +0.043 |
ogo | +0.042 |
Neg logit contributions | |
en | -0.023 |
I | -0.023 |
alone | -0.022 |
unatt | -0.022 |
ivated | -0.022 |
Token | tired |
Token ID | 10032 |
Feature activation | -1.21e-02 |
Pos logit contributions | |
ternally | +0.029 |
My | +0.026 |
Leave | +0.026 |
Int | +0.026 |
unatt | +0.026 |
Neg logit contributions | |
zag | -0.033 |
ages | -0.032 |
ained | -0.031 |
ends | -0.030 |
tr | -0.029 |
Token | . |
Token ID | 13 |
Feature activation | -3.12e-02 |
Pos logit contributions | |
ternally | +0.018 |
ivated | +0.018 |
en | +0.017 |
My | +0.017 |
Int | +0.016 |
Neg logit contributions | |
zag | -0.018 |
ages | -0.018 |
tr | -0.017 |
ained | -0.017 |
ometer | -0.016 |
Token | Can |
Token ID | 1680 |
Feature activation | -5.43e-02 |
Pos logit contributions | |
ivated | +0.022 |
ternally | +0.022 |
unatt | +0.021 |
en | +0.018 |
ized | +0.016 |
Neg logit contributions | |
tr | -0.071 |
ages | -0.069 |
ometer | -0.069 |
ccoli | -0.068 |
zag | -0.065 |
Token | you |
Token ID | 345 |
Feature activation | -2.44e-02 |
Pos logit contributions | |
lier | +0.063 |
en | +0.062 |
liest | +0.062 |
ivated | +0.062 |
vious | +0.061 |
Neg logit contributions | |
zag | -0.087 |
uffy | -0.086 |
tr | -0.085 |
ometer | -0.081 |
ages | -0.081 |
Token | help |
Token ID | 1037 |
Feature activation | -4.11e-02 |
Pos logit contributions | |
ternally | +0.031 |
ivated | +0.031 |
en | +0.031 |
unatt | +0.030 |
folk | +0.030 |
Neg logit contributions | |
zag | -0.040 |
tr | -0.036 |
ages | -0.036 |
ine | -0.035 |
ast | -0.034 |
Token | me |
Token ID | 502 |
Feature activation | -1.77e-02 |
Pos logit contributions | |
Int | +0.041 |
ivated | +0.040 |
ternally | +0.039 |
folk | +0.039 |
Leave | +0.039 |
Neg logit contributions | |
ies | -0.076 |
zag | -0.075 |
tails | -0.074 |
ine | -0.074 |
ages | -0.074 |
Token | find |
Token ID | 1064 |
Feature activation | -3.73e-02 |
Pos logit contributions | |
ternally | +0.023 |
ivated | +0.022 |
en | +0.022 |
My | +0.021 |
unatt | +0.021 |
Neg logit contributions | |
zag | -0.030 |
ages | -0.028 |
tr | -0.027 |
ained | -0.026 |
ine | -0.026 |
Token | my |
Token ID | 616 |
Feature activation | -6.20e-03 |
Pos logit contributions | |
ivated | +0.058 |
en | +0.054 |
ternally | +0.053 |
Leave | +0.052 |
unatt | +0.051 |
Neg logit contributions | |
ages | -0.051 |
zag | -0.049 |
tr | -0.049 |
ccoli | -0.047 |
packs | -0.047 |
Token | 't |
Token ID | 470 |
Feature activation | +1.66e-02 |
Pos logit contributions | |
unatt | +0.016 |
ternally | +0.016 |
en | +0.016 |
My | +0.015 |
ivated | +0.015 |
Neg logit contributions | |
zag | -0.022 |
ages | -0.021 |
ained | -0.020 |
tr | -0.019 |
tails | -0.019 |
Token | have |
Token ID | 423 |
Feature activation | -1.56e-02 |
Pos logit contributions | |
ages | +0.031 |
zag | +0.030 |
tr | +0.028 |
ometer | +0.028 |
ccoli | +0.028 |
Neg logit contributions | |
en | -0.019 |
I | -0.019 |
alone | -0.019 |
ivated | -0.018 |
unatt | -0.018 |
Token | to |
Token ID | 284 |
Feature activation | +3.60e-03 |
Pos logit contributions | |
ivated | +0.020 |
ternally | +0.020 |
unatt | +0.019 |
My | +0.019 |
en | +0.018 |
Neg logit contributions | |
ages | -0.025 |
zag | -0.025 |
ained | -0.024 |
tr | -0.023 |
tails | -0.023 |
Token | be |
Token ID | 307 |
Feature activation | -2.35e-02 |
Pos logit contributions | |
ages | +0.007 |
zag | +0.007 |
tr | +0.006 |
ained | +0.006 |
ometer | +0.006 |
Neg logit contributions | |
ivated | -0.004 |
unatt | -0.004 |
ternally | -0.004 |
en | -0.004 |
My | -0.004 |
Token | like |
Token ID | 588 |
Feature activation | -3.64e-02 |
Pos logit contributions | |
ternally | +0.026 |
My | +0.026 |
ivated | +0.025 |
unatt | +0.025 |
Int | +0.024 |
Neg logit contributions | |
ages | -0.041 |
zag | -0.041 |
ained | -0.040 |
ine | -0.038 |
ends | -0.038 |
Token | anyone |
Token ID | 2687 |
Feature activation | -5.15e-02 |
Pos logit contributions | |
ivated | +0.042 |
ternally | +0.041 |
Int | +0.040 |
unatt | +0.038 |
en | +0.037 |
Neg logit contributions | |
zag | -0.060 |
ages | -0.059 |
ained | -0.057 |
olving | -0.057 |
tr | -0.055 |
Token | else |
Token ID | 2073 |
Feature activation | -2.41e-02 |
Pos logit contributions | |
ternally | +0.059 |
Int | +0.056 |
ivated | +0.055 |
My | +0.051 |
unatt | +0.050 |
Neg logit contributions | |
ages | -0.094 |
zag | -0.091 |
ies | -0.087 |
ends | -0.084 |
packs | -0.084 |
Token | . |
Token ID | 13 |
Feature activation | -2.72e-02 |
Pos logit contributions | |
ternally | +0.034 |
ivated | +0.033 |
Int | +0.033 |
unatt | +0.032 |
My | +0.031 |
Neg logit contributions | |
ages | -0.039 |
zag | -0.037 |
ained | -0.034 |
ends | -0.033 |
ometer | -0.032 |
Token | Just |
Token ID | 2329 |
Feature activation | +1.13e-02 |
Pos logit contributions | |
unatt | +0.035 |
ivated | +0.034 |
ternally | +0.032 |
alone | +0.030 |
en | +0.030 |
Neg logit contributions | |
ages | -0.045 |
tr | -0.045 |
zag | -0.044 |
ained | -0.044 |
ometer | -0.043 |
Token | be |
Token ID | 307 |
Feature activation | -1.64e-02 |
Pos logit contributions | |
ages | +0.020 |
zag | +0.020 |
tr | +0.018 |
ained | +0.018 |
ometer | +0.018 |
Neg logit contributions | |
en | -0.014 |
unatt | -0.013 |
My | -0.013 |
ivated | -0.013 |
I | -0.013 |
Token | yourself |
Token ID | 3511 |
Feature activation | -1.55e-02 |
Pos logit contributions | |
ivated | +0.024 |
ternally | +0.024 |
unatt | +0.024 |
My | +0.024 |
Int | +0.023 |
Neg logit contributions | |
ages | -0.023 |
zag | -0.022 |
ained | -0.022 |
ends | -0.021 |
ine | -0.020 |
Token | heard |
Token ID | 2982 |
Feature activation | -3.18e-03 |
Pos logit contributions | |
unatt | +0.012 |
My | +0.012 |
ivated | +0.012 |
en | +0.012 |
ternally | +0.012 |
Neg logit contributions | |
zag | -0.013 |
ages | -0.012 |
tr | -0.012 |
ine | -0.011 |
tails | -0.011 |
Token | the |
Token ID | 262 |
Feature activation | +1.83e-02 |
Pos logit contributions | |
unatt | +0.003 |
ivated | +0.003 |
ternally | +0.003 |
My | +0.003 |
en | +0.003 |
Neg logit contributions | |
ages | -0.006 |
zag | -0.006 |
tr | -0.006 |
ained | -0.006 |
ine | -0.006 |
Token | noise |
Token ID | 7838 |
Feature activation | -3.91e-03 |
Pos logit contributions | |
zag | +0.035 |
ages | +0.035 |
ained | +0.033 |
ccoli | +0.032 |
tr | +0.032 |
Neg logit contributions | |
en | -0.019 |
ivated | -0.018 |
unatt | -0.018 |
alone | -0.018 |
ternally | -0.018 |
Token | and |
Token ID | 290 |
Feature activation | -2.40e-02 |
Pos logit contributions | |
ternally | +0.005 |
My | +0.005 |
unatt | +0.005 |
ivated | +0.005 |
en | +0.005 |
Neg logit contributions | |
ages | -0.006 |
zag | -0.006 |
tr | -0.006 |
ained | -0.006 |
ometer | -0.005 |
Token | came |
Token ID | 1625 |
Feature activation | -9.49e-03 |
Pos logit contributions | |
unatt | +0.036 |
My | +0.034 |
ternally | +0.034 |
ivated | +0.033 |
en | +0.031 |
Neg logit contributions | |
ages | -0.033 |
zag | -0.032 |
tails | -0.032 |
ira | -0.031 |
ies | -0.031 |
Token | to |
Token ID | 284 |
Feature activation | -5.03e-02 |
Pos logit contributions | |
ivated | +0.011 |
unatt | +0.011 |
My | +0.011 |
ternally | +0.011 |
en | +0.011 |
Neg logit contributions | |
zag | -0.016 |
ages | -0.016 |
ained | -0.015 |
tails | -0.015 |
tr | -0.015 |
Token | see |
Token ID | 766 |
Feature activation | +1.77e-03 |
Pos logit contributions | |
liest | +0.075 |
ternally | +0.071 |
ivated | +0.071 |
unatt | +0.071 |
My | +0.070 |
Neg logit contributions | |
common | -0.072 |
oys | -0.066 |
packs | -0.064 |
ira | -0.063 |
tails | -0.060 |
Token | what |
Token ID | 644 |
Feature activation | -4.69e-03 |
Pos logit contributions | |
ages | +0.002 |
zag | +0.002 |
tr | +0.002 |
ained | +0.002 |
ometer | +0.002 |
Neg logit contributions | |
ivated | -0.003 |
ternally | -0.003 |
unatt | -0.003 |
en | -0.003 |
My | -0.003 |
Token | happened |
Token ID | 3022 |
Feature activation | +2.56e-02 |
Pos logit contributions | |
ivated | +0.008 |
ternally | +0.008 |
unatt | +0.007 |
My | +0.007 |
en | +0.007 |
Neg logit contributions | |
zag | -0.006 |
ages | -0.006 |
tr | -0.006 |
ained | -0.006 |
ometer | -0.005 |
Token | . |
Token ID | 13 |
Feature activation | +1.06e-02 |
Pos logit contributions | |
zag | +0.052 |
ages | +0.051 |
tr | +0.049 |
ained | +0.048 |
ccoli | +0.048 |
Neg logit contributions | |
alone | -0.024 |
unatt | -0.023 |
en | -0.023 |
I | -0.023 |
ivated | -0.022 |
Token | She |
Token ID | 1375 |
Feature activation | -1.66e-02 |
Pos logit contributions | |
zag | +0.021 |
ages | +0.021 |
tr | +0.020 |
ained | +0.020 |
ccoli | +0.019 |
Neg logit contributions | |
en | -0.010 |
My | -0.010 |
ivated | -0.010 |
I | -0.010 |
unatt | -0.009 |
Token | play |
Token ID | 711 |
Feature activation | -2.19e-02 |
Pos logit contributions | |
ages | +0.030 |
zag | +0.030 |
ained | +0.028 |
ometer | +0.028 |
tr | +0.027 |
Neg logit contributions | |
I | -0.014 |
My | -0.014 |
unatt | -0.014 |
en | -0.014 |
ternally | -0.013 |
Token | together |
Token ID | 1978 |
Feature activation | -1.80e-02 |
Pos logit contributions | |
ivated | +0.037 |
ternally | +0.036 |
Int | +0.036 |
My | +0.035 |
en | +0.035 |
Neg logit contributions | |
ages | -0.027 |
zag | -0.026 |
tr | -0.024 |
ained | -0.023 |
ccoli | -0.023 |
Token | . |
Token ID | 13 |
Feature activation | -6.44e-03 |
Pos logit contributions | |
ivated | +0.025 |
ternally | +0.025 |
My | +0.025 |
Int | +0.025 |
en | +0.024 |
Neg logit contributions | |
zag | -0.027 |
ages | -0.027 |
ained | -0.025 |
tr | -0.025 |
ccoli | -0.024 |
Token | We |
Token ID | 775 |
Feature activation | +2.70e-02 |
Pos logit contributions | |
ivated | +0.007 |
unatt | +0.007 |
ternally | +0.007 |
en | +0.006 |
alone | +0.006 |
Neg logit contributions | |
ages | -0.012 |
zag | -0.012 |
tr | -0.012 |
ained | -0.011 |
ometer | -0.011 |
Token | are |
Token ID | 389 |
Feature activation | -1.12e-02 |
Pos logit contributions | |
zag | +0.049 |
ages | +0.048 |
ment | +0.046 |
ained | +0.044 |
ometer | +0.044 |
Neg logit contributions | |
en | -0.032 |
cheer | -0.031 |
alone | -0.031 |
ivated | -0.030 |
I | -0.030 |
Token | friends |
Token ID | 2460 |
Feature activation | -6.05e-02 |
Pos logit contributions | |
ternally | +0.018 |
ivated | +0.018 |
unatt | +0.018 |
Int | +0.018 |
My | +0.018 |
Neg logit contributions | |
ages | -0.014 |
zag | -0.014 |
ained | -0.013 |
tr | -0.013 |
ends | -0.013 |
Token | ." |
Token ID | 526 |
Feature activation | -6.75e-03 |
Pos logit contributions | |
Int | +0.116 |
ivated | +0.116 |
ternally | +0.110 |
sidelines | +0.107 |
Hall | +0.105 |
Neg logit contributions | |
ages | -0.061 |
packs | -0.051 |
ends | -0.049 |
ies | -0.048 |
ccoli | -0.048 |
Token | They |
Token ID | 1119 |
Feature activation | -1.47e-02 |
Pos logit contributions | |
unatt | +0.009 |
ternally | +0.009 |
ivated | +0.008 |
en | +0.008 |
My | +0.008 |
Neg logit contributions | |
ages | -0.011 |
zag | -0.011 |
ained | -0.010 |
tr | -0.010 |
ometer | -0.010 |
Token | say |
Token ID | 910 |
Feature activation | -1.32e-02 |
Pos logit contributions | |
unatt | +0.021 |
ternally | +0.021 |
My | +0.021 |
ivated | +0.020 |
en | +0.019 |
Neg logit contributions | |
zag | -0.021 |
ages | -0.021 |
tr | -0.020 |
tails | -0.019 |
ained | -0.019 |
Token | . |
Token ID | 13 |
Feature activation | +3.85e-03 |
Pos logit contributions | |
ivated | +0.019 |
unatt | +0.019 |
ternally | +0.019 |
My | +0.018 |
en | +0.018 |
Neg logit contributions | |
ages | -0.020 |
zag | -0.019 |
ometer | -0.017 |
tr | -0.017 |
ained | -0.017 |
Token |
|
Token ID | 198 |
Feature activation | +4.63e-04 |
Pos logit contributions | |
zag | +0.006 |
ages | +0.006 |
tr | +0.006 |
ained | +0.006 |
ometer | +0.006 |
Neg logit contributions | |
ivated | -0.005 |
My | -0.005 |
en | -0.005 |
ternally | -0.005 |
unatt | -0.005 |
Token | so |
Token ID | 523 |
Feature activation | +4.73e-02 |
Pos logit contributions | |
ternally | +0.006 |
ivated | +0.005 |
unatt | +0.005 |
My | +0.005 |
en | +0.005 |
Neg logit contributions | |
ages | -0.016 |
zag | -0.016 |
ained | -0.015 |
tr | -0.015 |
ometer | -0.015 |
Token | they |
Token ID | 484 |
Feature activation | -1.96e-03 |
Pos logit contributions | |
zag | +0.117 |
ccoli | +0.114 |
ages | +0.113 |
olving | +0.112 |
ometer | +0.110 |
Neg logit contributions | |
I | -0.032 |
en | -0.028 |
me | -0.027 |
alone | -0.027 |
My | -0.026 |
Token | sat |
Token ID | 3332 |
Feature activation | +2.85e-02 |
Pos logit contributions | |
ternally | +0.002 |
ivated | +0.002 |
My | +0.002 |
unatt | +0.002 |
en | +0.002 |
Neg logit contributions | |
ages | -0.003 |
zag | -0.003 |
tr | -0.003 |
ained | -0.003 |
ometer | -0.003 |
Token | down |
Token ID | 866 |
Feature activation | +1.41e-02 |
Pos logit contributions | |
zag | +0.056 |
ages | +0.055 |
tr | +0.053 |
ccoli | +0.053 |
ine | +0.051 |
Neg logit contributions | |
alone | -0.033 |
unatt | -0.031 |
en | -0.029 |
back | -0.029 |
ivated | -0.028 |
Token | on |
Token ID | 319 |
Feature activation | +8.82e-03 |
Pos logit contributions | |
ages | +0.024 |
zag | +0.023 |
tr | +0.022 |
ccoli | +0.022 |
tails | +0.021 |
Neg logit contributions | |
en | -0.018 |
ivated | -0.018 |
unatt | -0.018 |
alone | -0.018 |
My | -0.017 |
Token | a |
Token ID | 257 |
Feature activation | -5.41e-02 |
Pos logit contributions | |
ages | +0.017 |
zag | +0.017 |
tr | +0.016 |
ained | +0.015 |
ometer | +0.015 |
Neg logit contributions | |
ivated | -0.009 |
en | -0.009 |
unatt | -0.009 |
ternally | -0.009 |
My | -0.009 |
Token | quiet |
Token ID | 5897 |
Feature activation | -8.93e-03 |
Pos logit contributions | |
ternally | +0.095 |
ivated | +0.094 |
ople | +0.092 |
My | +0.091 |
folk | +0.090 |
Neg logit contributions | |
zag | -0.075 |
ves | -0.060 |
ages | -0.058 |
tr | -0.058 |
lesson | -0.057 |
Token | bench |
Token ID | 7624 |
Feature activation | +1.70e-02 |
Pos logit contributions | |
ivated | +0.012 |
ternally | +0.012 |
unatt | +0.012 |
My | +0.012 |
en | +0.011 |
Neg logit contributions | |
zag | -0.015 |
ages | -0.014 |
tr | -0.013 |
ine | -0.013 |
ained | -0.013 |
Token | . |
Token ID | 13 |
Feature activation | +1.30e-02 |
Pos logit contributions | |
ages | +0.031 |
zag | +0.031 |
ained | +0.029 |
ccoli | +0.029 |
tr | +0.028 |
Neg logit contributions | |
unatt | -0.018 |
alone | -0.018 |
en | -0.018 |
ivated | -0.018 |
My | -0.017 |
Token | It |
Token ID | 632 |
Feature activation | -1.39e-02 |
Pos logit contributions | |
zag | +0.023 |
ages | +0.023 |
tails | +0.021 |
tr | +0.021 |
ained | +0.021 |
Neg logit contributions | |
My | -0.016 |
en | -0.016 |
I | -0.015 |
Int | -0.015 |
ivated | -0.015 |
Token | was |
Token ID | 373 |
Feature activation | +3.59e-02 |
Pos logit contributions | |
ivated | +0.021 |
ternally | +0.021 |
unatt | +0.021 |
My | +0.021 |
I | +0.020 |
Neg logit contributions | |
zag | -0.020 |
ages | -0.020 |
tr | -0.019 |
ained | -0.018 |
ine | -0.017 |