Token | they |
Token ID | 484 |
Feature activation | -4.17e-03 |
Pos logit contributions | |
ge | +0.002 |
unnecessary | +0.002 |
single | +0.002 |
purchases | +0.002 |
toward | +0.002 |
Neg logit contributions | |
ily | -0.001 |
They | -0.001 |
ia | -0.001 |
Ben | -0.001 |
Ben | -0.001 |
Token | paid |
Token ID | 3432 |
Feature activation | +1.93e-02 |
Pos logit contributions | |
ily | +0.006 |
They | +0.005 |
ia | +0.005 |
Ben | +0.005 |
era | +0.005 |
Neg logit contributions | |
ge | -0.008 |
single | -0.007 |
purchases | -0.007 |
unnecessary | -0.007 |
use | -0.007 |
Token | him |
Token ID | 683 |
Feature activation | +2.81e-02 |
Pos logit contributions | |
single | +0.029 |
ge | +0.028 |
purchases | +0.028 |
picking | +0.027 |
insignificant | +0.027 |
Neg logit contributions | |
ily | -0.034 |
Ben | -0.031 |
They | -0.031 |
ia | -0.031 |
ena | -0.030 |
Token | for |
Token ID | 329 |
Feature activation | -1.84e-03 |
Pos logit contributions | |
single | +0.067 |
picking | +0.065 |
purchases | +0.065 |
ging | +0.064 |
ge | +0.064 |
Neg logit contributions | |
ily | -0.022 |
They | -0.021 |
Ben | -0.020 |
ia | -0.019 |
Tom | -0.016 |
Token | his |
Token ID | 465 |
Feature activation | +9.19e-03 |
Pos logit contributions | |
ily | +0.002 |
They | +0.002 |
Ben | +0.002 |
ia | +0.002 |
Ben | +0.002 |
Neg logit contributions | |
ge | -0.004 |
single | -0.004 |
unnecessary | -0.003 |
purchases | -0.003 |
toward | -0.003 |
Token | av |
Token ID | 1196 |
Feature activation | +1.02e-01 |
Pos logit contributions | |
ge | +0.014 |
ging | +0.014 |
aback | +0.014 |
picking | +0.014 |
single | +0.014 |
Neg logit contributions | |
ily | -0.013 |
Ben | -0.013 |
ia | -0.013 |
They | -0.012 |
twins | -0.012 |
Token | oc |
Token ID | 420 |
Feature activation | +3.48e-02 |
Pos logit contributions | |
toward | +0.129 |
shopping | +0.126 |
ging | +0.121 |
matt | +0.119 |
insignificant | +0.119 |
Neg logit contributions | |
They | -0.177 |
Ben | -0.171 |
aw | -0.167 |
Tom | -0.166 |
Sam | -0.156 |
Token | ados |
Token ID | 22484 |
Feature activation | +2.22e-02 |
Pos logit contributions | |
single | +0.053 |
ge | +0.052 |
unnecessary | +0.051 |
toward | +0.050 |
fy | +0.049 |
Neg logit contributions | |
ily | -0.054 |
ia | -0.053 |
era | -0.050 |
der | -0.050 |
aw | -0.049 |
Token | . |
Token ID | 13 |
Feature activation | -1.48e-03 |
Pos logit contributions | |
unnecessary | +0.051 |
single | +0.051 |
purchases | +0.050 |
ge | +0.050 |
ging | +0.049 |
Neg logit contributions | |
ily | -0.019 |
They | -0.017 |
ia | -0.016 |
Ben | -0.016 |
Ben | -0.015 |
Token |
|
Token ID | 198 |
Feature activation | +6.68e-03 |
Pos logit contributions | |
ily | +0.002 |
ia | +0.002 |
era | +0.002 |
ena | +0.002 |
They | +0.002 |
Neg logit contributions | |
single | -0.003 |
ge | -0.003 |
purchases | -0.003 |
shopping | -0.003 |
unnecessary | -0.003 |
Token |
|
Token ID | 198 |
Feature activation | +2.24e-03 |
Pos logit contributions | |
ge | +0.013 |
single | +0.013 |
purchases | +0.013 |
unnecessary | +0.013 |
shopping | +0.013 |
Neg logit contributions | |
ily | -0.007 |
era | -0.007 |
ia | -0.006 |
ena | -0.006 |
iv | -0.006 |
Token | recommendation |
Token ID | 15602 |
Feature activation | +1.30e-02 |
Pos logit contributions | |
ge | +0.011 |
toward | +0.010 |
ging | +0.010 |
single | +0.010 |
use | +0.010 |
Neg logit contributions | |
ily | -0.010 |
ia | -0.009 |
They | -0.009 |
ena | -0.008 |
Ben | -0.008 |
Token | and |
Token ID | 290 |
Feature activation | +1.09e-02 |
Pos logit contributions | |
ge | +0.027 |
unnecessary | +0.027 |
single | +0.027 |
purchases | +0.027 |
toward | +0.026 |
Neg logit contributions | |
ily | -0.014 |
They | -0.012 |
ia | -0.012 |
Ben | -0.012 |
ena | -0.012 |
Token | picked |
Token ID | 6497 |
Feature activation | +1.81e-02 |
Pos logit contributions | |
purchases | +0.018 |
ge | +0.018 |
single | +0.018 |
unnecessary | +0.018 |
down | +0.018 |
Neg logit contributions | |
ily | -0.016 |
ia | -0.015 |
era | -0.015 |
Ben | -0.015 |
ena | -0.014 |
Token | the |
Token ID | 262 |
Feature activation | +2.58e-02 |
Pos logit contributions | |
ge | +0.031 |
single | +0.028 |
purchases | +0.028 |
use | +0.028 |
fy | +0.028 |
Neg logit contributions | |
ily | -0.029 |
Ben | -0.026 |
They | -0.026 |
ia | -0.024 |
Ben | -0.023 |
Token | right |
Token ID | 826 |
Feature activation | +5.54e-02 |
Pos logit contributions | |
ge | +0.048 |
toward | +0.045 |
fy | +0.045 |
ging | +0.044 |
z | +0.044 |
Neg logit contributions | |
ily | -0.037 |
twins | -0.032 |
They | -0.030 |
not | -0.029 |
Ben | -0.029 |
Token | av |
Token ID | 1196 |
Feature activation | +9.92e-02 |
Pos logit contributions | |
ge | +0.088 |
unnecessary | +0.088 |
fy | +0.086 |
ging | +0.085 |
rolling | +0.083 |
Neg logit contributions | |
ily | -0.090 |
ia | -0.078 |
face | -0.073 |
truth | -0.072 |
twins | -0.070 |
Token | oc |
Token ID | 420 |
Feature activation | +3.65e-02 |
Pos logit contributions | |
heels | +0.143 |
unnecessary | +0.139 |
shopping | +0.139 |
toward | +0.137 |
purchases | +0.136 |
Neg logit contributions | |
They | -0.160 |
Ben | -0.152 |
aw | -0.147 |
una | -0.144 |
Sam | -0.140 |
Token | ados |
Token ID | 22484 |
Feature activation | +3.13e-02 |
Pos logit contributions | |
unnecessary | +0.058 |
ge | +0.057 |
single | +0.055 |
fy | +0.054 |
toward | +0.054 |
Neg logit contributions | |
ily | -0.056 |
ia | -0.052 |
aw | -0.050 |
der | -0.050 |
era | -0.049 |
Token | . |
Token ID | 13 |
Feature activation | -4.58e-03 |
Pos logit contributions | |
unnecessary | +0.068 |
single | +0.066 |
purchases | +0.065 |
ge | +0.064 |
heels | +0.063 |
Neg logit contributions | |
ily | -0.033 |
They | -0.030 |
Ben | -0.028 |
ia | -0.027 |
Ben | -0.025 |
Token |
|
Token ID | 198 |
Feature activation | +9.81e-03 |
Pos logit contributions | |
ily | +0.007 |
ia | +0.007 |
ena | +0.006 |
era | +0.006 |
Ben | +0.006 |
Neg logit contributions | |
single | -0.007 |
purchases | -0.007 |
aback | -0.007 |
shopping | -0.007 |
down | -0.007 |
Token |
|
Token ID | 198 |
Feature activation | +1.92e-03 |
Pos logit contributions | |
single | +0.019 |
ge | +0.019 |
purchases | +0.018 |
shopping | +0.017 |
unnecessary | +0.017 |
Neg logit contributions | |
ily | -0.011 |
era | -0.010 |
ena | -0.010 |
ia | -0.010 |
Ben | -0.009 |
Token | to |
Token ID | 284 |
Feature activation | +6.71e-03 |
Pos logit contributions | |
unnecessary | +0.037 |
purchases | +0.037 |
ge | +0.037 |
single | +0.036 |
picking | +0.036 |
Neg logit contributions | |
ily | -0.021 |
They | -0.020 |
Ben | -0.018 |
ena | -0.017 |
room | -0.017 |
Token | be |
Token ID | 307 |
Feature activation | -9.89e-03 |
Pos logit contributions | |
single | +0.011 |
ge | +0.011 |
unnecessary | +0.010 |
purchases | +0.010 |
ging | +0.010 |
Neg logit contributions | |
ily | -0.011 |
They | -0.010 |
ia | -0.010 |
Ben | -0.009 |
Ben | -0.009 |
Token | restored |
Token ID | 15032 |
Feature activation | -2.91e-03 |
Pos logit contributions | |
They | +0.013 |
ily | +0.013 |
not | +0.012 |
who | +0.011 |
ena | +0.011 |
Neg logit contributions | |
single | -0.018 |
picking | -0.018 |
toward | -0.017 |
reins | -0.017 |
ge | -0.017 |
Token | to |
Token ID | 284 |
Feature activation | +4.30e-03 |
Pos logit contributions | |
ily | +0.003 |
They | +0.003 |
Ben | +0.002 |
ia | +0.002 |
ena | +0.002 |
Neg logit contributions | |
purchases | -0.007 |
unnecessary | -0.006 |
picking | -0.006 |
ge | -0.006 |
single | -0.006 |
Token | its |
Token ID | 663 |
Feature activation | -9.82e-03 |
Pos logit contributions | |
single | +0.008 |
ge | +0.008 |
purchases | +0.008 |
unnecessary | +0.008 |
ging | +0.007 |
Neg logit contributions | |
ily | -0.006 |
They | -0.005 |
ia | -0.005 |
Ben | -0.005 |
ena | -0.005 |
Token | former |
Token ID | 1966 |
Feature activation | +9.75e-02 |
Pos logit contributions | |
ily | +0.016 |
ia | +0.016 |
They | +0.015 |
Ben | +0.015 |
era | +0.014 |
Neg logit contributions | |
ge | -0.015 |
ging | -0.015 |
picking | -0.014 |
unnecessary | -0.013 |
use | -0.013 |
Token | glory |
Token ID | 13476 |
Feature activation | -7.79e-03 |
Pos logit contributions | |
ging | +0.190 |
picking | +0.186 |
reins | +0.176 |
ge | +0.176 |
ogly | +0.171 |
Neg logit contributions | |
ia | -0.128 |
face | -0.103 |
der | -0.099 |
ily | -0.097 |
not | -0.094 |
Token | ." |
Token ID | 526 |
Feature activation | -2.21e-02 |
Pos logit contributions | |
ily | +0.008 |
They | +0.008 |
ia | +0.007 |
ena | +0.007 |
Ben | +0.006 |
Neg logit contributions | |
unnecessary | -0.017 |
purchases | -0.017 |
ge | -0.016 |
single | -0.016 |
picking | -0.016 |
Token | |
Token ID | 220 |
Feature activation | -6.26e-03 |
Pos logit contributions | |
ily | +0.033 |
ena | +0.033 |
They | +0.031 |
Ben | +0.031 |
ia | +0.031 |
Neg logit contributions | |
single | -0.035 |
toward | -0.033 |
purchases | -0.032 |
down | -0.032 |
aback | -0.032 |
Token |
|
Token ID | 198 |
Feature activation | +6.27e-03 |
Pos logit contributions | |
ily | +0.007 |
era | +0.006 |
ena | +0.006 |
Ben | +0.006 |
ia | +0.006 |
Neg logit contributions | |
ge | -0.012 |
single | -0.012 |
purchases | -0.012 |
shopping | -0.012 |
down | -0.012 |
Token |
|
Token ID | 198 |
Feature activation | +5.85e-03 |
Pos logit contributions | |
single | +0.012 |
ge | +0.011 |
toward | +0.011 |
shopping | +0.010 |
purchases | +0.010 |
Neg logit contributions | |
ily | -0.007 |
ena | -0.007 |
era | -0.007 |
ia | -0.007 |
der | -0.006 |
Token | weather |
Token ID | 6193 |
Feature activation | +9.51e-03 |
Pos logit contributions | |
ge | +0.028 |
ging | +0.026 |
unnecessary | +0.025 |
purchases | +0.025 |
toward | +0.025 |
Neg logit contributions | |
ily | -0.021 |
Ben | -0.018 |
ia | -0.018 |
Ben | -0.018 |
They | -0.017 |
Token | before |
Token ID | 878 |
Feature activation | -3.54e-02 |
Pos logit contributions | |
unnecessary | +0.019 |
purchases | +0.018 |
ge | +0.018 |
single | +0.017 |
use | +0.017 |
Neg logit contributions | |
ily | -0.012 |
They | -0.011 |
Ben | -0.011 |
ia | -0.011 |
room | -0.010 |
Token | he |
Token ID | 339 |
Feature activation | +5.09e-03 |
Pos logit contributions | |
ily | +0.057 |
They | +0.054 |
ia | +0.053 |
Ben | +0.051 |
Ben | +0.051 |
Neg logit contributions | |
ge | -0.054 |
single | -0.054 |
unnecessary | -0.054 |
purchases | -0.053 |
toward | -0.051 |
Token | picked |
Token ID | 6497 |
Feature activation | +5.26e-03 |
Pos logit contributions | |
ge | +0.010 |
purchases | +0.010 |
unnecessary | +0.010 |
ging | +0.010 |
single | +0.010 |
Neg logit contributions | |
ily | -0.007 |
ia | -0.006 |
Ben | -0.005 |
Ben | -0.005 |
They | -0.005 |
Token | his |
Token ID | 465 |
Feature activation | +4.81e-03 |
Pos logit contributions | |
ge | +0.010 |
use | +0.009 |
purchases | +0.009 |
single | +0.009 |
fy | +0.009 |
Neg logit contributions | |
ily | -0.007 |
Ben | -0.007 |
They | -0.006 |
Tom | -0.006 |
aw | -0.006 |
Token | av |
Token ID | 1196 |
Feature activation | +9.60e-02 |
Pos logit contributions | |
ge | +0.010 |
ging | +0.010 |
fy | +0.009 |
unnecessary | +0.009 |
suit | +0.009 |
Neg logit contributions | |
not | -0.006 |
face | -0.006 |
ily | -0.005 |
ia | -0.005 |
Ben | -0.005 |
Token | oc |
Token ID | 420 |
Feature activation | +3.21e-02 |
Pos logit contributions | |
unnecessary | +0.131 |
shopping | +0.127 |
ging | +0.126 |
ge | +0.125 |
toward | +0.123 |
Neg logit contributions | |
They | -0.159 |
Ben | -0.150 |
ia | -0.149 |
Sam | -0.146 |
aw | -0.145 |
Token | ados |
Token ID | 22484 |
Feature activation | +3.47e-02 |
Pos logit contributions | |
ge | +0.052 |
unnecessary | +0.051 |
single | +0.050 |
fy | +0.048 |
toward | +0.048 |
Neg logit contributions | |
ily | -0.050 |
ia | -0.048 |
era | -0.045 |
room | -0.044 |
iv | -0.043 |
Token | . |
Token ID | 13 |
Feature activation | -7.94e-03 |
Pos logit contributions | |
unnecessary | +0.089 |
single | +0.087 |
reins | +0.085 |
purchases | +0.085 |
picking | +0.084 |
Neg logit contributions | |
ily | -0.021 |
They | -0.020 |
Ben | -0.018 |
not | -0.015 |
ia | -0.014 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | -8.31e-03 |
Pos logit contributions | |
ily | +0.009 |
They | +0.008 |
ia | +0.008 |
Ben | +0.008 |
ena | +0.007 |
Neg logit contributions | |
ge | -0.016 |
single | -0.016 |
unnecessary | -0.016 |
purchases | -0.016 |
shopping | -0.015 |
Token | Once |
Token ID | 7454 |
Feature activation | +7.96e-03 |
Pos logit contributions | |
ily | +0.010 |
ia | +0.010 |
era | +0.010 |
ena | +0.009 |
They | +0.009 |
Neg logit contributions | |
single | -0.015 |
down | -0.014 |
ge | -0.014 |
shopping | -0.014 |
toward | -0.014 |
Token | |
Token ID | 220 |
Feature activation | +3.61e-03 |
Pos logit contributions | |
ily | +0.001 |
ia | +0.001 |
They | +0.001 |
Ben | +0.001 |
ena | +0.001 |
Neg logit contributions | |
single | -0.002 |
purchases | -0.002 |
unnecessary | -0.002 |
ge | -0.002 |
shopping | -0.001 |
Token | She |
Token ID | 1375 |
Feature activation | -1.02e-02 |
Pos logit contributions | |
ge | +0.008 |
single | +0.008 |
unnecessary | +0.008 |
purchases | +0.008 |
shopping | +0.007 |
Neg logit contributions | |
ily | -0.003 |
ia | -0.003 |
They | -0.003 |
Ben | -0.003 |
era | -0.003 |
Token | could |
Token ID | 714 |
Feature activation | -2.39e-02 |
Pos logit contributions | |
ily | +0.014 |
They | +0.013 |
ia | +0.012 |
Ben | +0.012 |
era | +0.012 |
Neg logit contributions | |
ge | -0.018 |
single | -0.018 |
purchases | -0.018 |
unnecessary | -0.018 |
toward | -0.017 |
Token | feel |
Token ID | 1254 |
Feature activation | -4.48e-03 |
Pos logit contributions | |
ily | +0.036 |
ia | +0.033 |
They | +0.032 |
Ben | +0.032 |
ena | +0.031 |
Neg logit contributions | |
ge | -0.040 |
single | -0.039 |
purchases | -0.039 |
unnecessary | -0.037 |
toward | -0.037 |
Token | the |
Token ID | 262 |
Feature activation | +1.19e-02 |
Pos logit contributions | |
ily | +0.005 |
Ben | +0.005 |
They | +0.005 |
ia | +0.004 |
Ben | +0.004 |
Neg logit contributions | |
ge | -0.009 |
purchases | -0.009 |
use | -0.009 |
picking | -0.009 |
loading | -0.009 |
Token | masse |
Token ID | 41869 |
Feature activation | +9.51e-02 |
Pos logit contributions | |
ge | +0.020 |
purchases | +0.019 |
unnecessary | +0.019 |
single | +0.019 |
picking | +0.018 |
Neg logit contributions | |
ily | -0.018 |
They | -0.017 |
ia | -0.016 |
Ben | -0.016 |
Ben | -0.016 |
Token | use |
Token ID | 1904 |
Feature activation | +1.32e-02 |
Pos logit contributions | |
heels | +0.099 |
unnecessary | +0.099 |
hind | +0.094 |
weights | +0.092 |
purchases | +0.090 |
Neg logit contributions | |
ily | -0.209 |
forgive | -0.177 |
ena | -0.169 |
iv | -0.169 |
Ben | -0.169 |
Token | 's |
Token ID | 338 |
Feature activation | -5.56e-03 |
Pos logit contributions | |
ge | +0.030 |
single | +0.029 |
unnecessary | +0.029 |
purchases | +0.029 |
picking | +0.028 |
Neg logit contributions | |
ily | -0.013 |
Ben | -0.012 |
They | -0.011 |
ena | -0.011 |
ia | -0.011 |
Token | warm |
Token ID | 5814 |
Feature activation | +1.55e-02 |
Pos logit contributions | |
ily | +0.006 |
ia | +0.005 |
Ben | +0.005 |
They | +0.005 |
ena | +0.005 |
Neg logit contributions | |
ge | -0.012 |
unnecessary | -0.012 |
single | -0.011 |
use | -0.011 |
purchases | -0.011 |
Token | hands |
Token ID | 2832 |
Feature activation | +1.32e-02 |
Pos logit contributions | |
ge | +0.028 |
unnecessary | +0.026 |
picking | +0.025 |
purchases | +0.025 |
single | +0.025 |
Neg logit contributions | |
ily | -0.022 |
Ben | -0.020 |
ia | -0.020 |
They | -0.020 |
Ben | -0.019 |
Token | pressing |
Token ID | 12273 |
Feature activation | +1.84e-02 |
Pos logit contributions | |
ge | +0.027 |
unnecessary | +0.026 |
single | +0.026 |
purchases | +0.026 |
toward | +0.025 |
Neg logit contributions | |
ily | -0.015 |
They | -0.013 |
ia | -0.013 |
Ben | -0.013 |
ena | -0.012 |
Token | and |
Token ID | 290 |
Feature activation | +3.80e-02 |
Pos logit contributions | |
ge | +0.044 |
single | +0.044 |
toward | +0.043 |
unnecessary | +0.042 |
down | +0.042 |
Neg logit contributions | |
ily | -0.030 |
ia | -0.027 |
Ben | -0.026 |
ena | -0.025 |
era | -0.025 |
Token | try |
Token ID | 1949 |
Feature activation | +1.94e-02 |
Pos logit contributions | |
ge | +0.063 |
single | +0.061 |
unnecessary | +0.061 |
purchases | +0.060 |
picking | +0.058 |
Neg logit contributions | |
Ben | -0.057 |
ily | -0.057 |
They | -0.057 |
Ben | -0.055 |
ia | -0.055 |
Token | to |
Token ID | 284 |
Feature activation | -7.35e-03 |
Pos logit contributions | |
fy | +0.039 |
use | +0.038 |
purchases | +0.038 |
ited | +0.038 |
aback | +0.036 |
Neg logit contributions | |
They | -0.025 |
Ben | -0.025 |
not | -0.023 |
ia | -0.023 |
ily | -0.022 |
Token | grab |
Token ID | 5552 |
Feature activation | +6.20e-03 |
Pos logit contributions | |
ily | +0.013 |
They | +0.012 |
ia | +0.012 |
Ben | +0.012 |
Ben | +0.012 |
Neg logit contributions | |
unnecessary | -0.010 |
ge | -0.010 |
single | -0.010 |
purchases | -0.010 |
shopping | -0.010 |
Token | the |
Token ID | 262 |
Feature activation | +1.04e-02 |
Pos logit contributions | |
ge | +0.015 |
purchases | +0.014 |
use | +0.014 |
single | +0.014 |
picking | +0.014 |
Neg logit contributions | |
Ben | -0.006 |
ily | -0.006 |
They | -0.005 |
Tom | -0.005 |
Ben | -0.005 |
Token | av |
Token ID | 1196 |
Feature activation | +9.51e-02 |
Pos logit contributions | |
ge | +0.018 |
unnecessary | +0.017 |
shadow | +0.017 |
fy | +0.017 |
purchases | +0.017 |
Neg logit contributions | |
ily | -0.016 |
Ben | -0.014 |
ia | -0.013 |
Ben | -0.013 |
They | -0.013 |
Token | oc |
Token ID | 420 |
Feature activation | +2.70e-02 |
Pos logit contributions | |
unnecessary | +0.118 |
purchases | +0.116 |
shopping | +0.115 |
ge | +0.109 |
insisting | +0.108 |
Neg logit contributions | |
They | -0.181 |
Ben | -0.176 |
una | -0.168 |
ia | -0.168 |
Sam | -0.161 |
Token | ados |
Token ID | 22484 |
Feature activation | +1.56e-02 |
Pos logit contributions | |
ge | +0.042 |
unnecessary | +0.042 |
single | +0.041 |
fy | +0.038 |
toward | +0.038 |
Neg logit contributions | |
ily | -0.042 |
ia | -0.041 |
room | -0.038 |
aw | -0.038 |
era | -0.037 |
Token | . |
Token ID | 13 |
Feature activation | +9.81e-03 |
Pos logit contributions | |
unnecessary | +0.034 |
single | +0.033 |
purchases | +0.033 |
ge | +0.033 |
use | +0.032 |
Neg logit contributions | |
ily | -0.016 |
They | -0.015 |
ia | -0.014 |
Ben | -0.014 |
Ben | -0.013 |
Token | But |
Token ID | 887 |
Feature activation | -2.77e-05 |
Pos logit contributions | |
single | +0.020 |
unnecessary | +0.020 |
purchases | +0.020 |
ge | +0.020 |
toward | +0.019 |
Neg logit contributions | |
ily | -0.011 |
ia | -0.009 |
ena | -0.009 |
era | -0.009 |
They | -0.009 |
Token | the |
Token ID | 262 |
Feature activation | -9.56e-03 |
Pos logit contributions | |
ily | +0.000 |
Ben | +0.000 |
They | +0.000 |
ia | +0.000 |
Ben | +0.000 |
Neg logit contributions | |
ge | -0.000 |
unnecessary | -0.000 |
purchases | -0.000 |
single | -0.000 |
use | -0.000 |
Token | Everyday |
Token ID | 48154 |
Feature activation | +9.34e-03 |
Pos logit contributions | |
ge | +0.002 |
single | +0.002 |
purchases | +0.001 |
toward | +0.001 |
down | +0.001 |
Neg logit contributions | |
ily | -0.001 |
ia | -0.001 |
They | -0.001 |
Ben | -0.001 |
era | -0.001 |
Token | he |
Token ID | 339 |
Feature activation | -2.20e-03 |
Pos logit contributions | |
single | +0.018 |
ge | +0.018 |
unnecessary | +0.017 |
purchases | +0.017 |
shopping | +0.017 |
Neg logit contributions | |
ily | -0.012 |
They | -0.011 |
ia | -0.011 |
Ben | -0.010 |
ena | -0.010 |
Token | would |
Token ID | 561 |
Feature activation | +4.08e-03 |
Pos logit contributions | |
ily | +0.003 |
ia | +0.003 |
They | +0.003 |
Ben | +0.003 |
Ben | +0.002 |
Neg logit contributions | |
unnecessary | -0.004 |
ge | -0.004 |
purchases | -0.004 |
single | -0.004 |
ging | -0.004 |
Token | take |
Token ID | 1011 |
Feature activation | +2.49e-02 |
Pos logit contributions | |
ge | +0.007 |
unnecessary | +0.007 |
single | +0.007 |
purchases | +0.007 |
shopping | +0.006 |
Neg logit contributions | |
ily | -0.006 |
They | -0.005 |
ia | -0.005 |
Ben | -0.005 |
ena | -0.005 |
Token | the |
Token ID | 262 |
Feature activation | +1.59e-02 |
Pos logit contributions | |
ge | +0.056 |
purchases | +0.052 |
single | +0.052 |
use | +0.051 |
picking | +0.051 |
Neg logit contributions | |
ily | -0.026 |
Ben | -0.025 |
They | -0.023 |
ia | -0.021 |
Tom | -0.021 |
Token | av |
Token ID | 1196 |
Feature activation | +9.39e-02 |
Pos logit contributions | |
ge | +0.026 |
ging | +0.024 |
unnecessary | +0.024 |
purchases | +0.024 |
single | +0.023 |
Neg logit contributions | |
ily | -0.024 |
Ben | -0.022 |
ia | -0.022 |
Ben | -0.021 |
era | -0.021 |
Token | oc |
Token ID | 420 |
Feature activation | +2.29e-02 |
Pos logit contributions | |
unnecessary | +0.122 |
purchases | +0.115 |
ge | +0.113 |
heels | +0.111 |
matt | +0.110 |
Neg logit contributions | |
They | -0.167 |
aw | -0.161 |
Ben | -0.152 |
ily | -0.147 |
room | -0.145 |
Token | ados |
Token ID | 22484 |
Feature activation | +2.56e-02 |
Pos logit contributions | |
unnecessary | +0.035 |
ge | +0.034 |
single | +0.033 |
fy | +0.032 |
suit | +0.031 |
Neg logit contributions | |
ily | -0.034 |
ia | -0.033 |
era | -0.033 |
aw | -0.033 |
der | -0.032 |
Token | and |
Token ID | 290 |
Feature activation | +1.54e-02 |
Pos logit contributions | |
unnecessary | +0.044 |
purchases | +0.042 |
ge | +0.042 |
single | +0.042 |
ging | +0.040 |
Neg logit contributions | |
ily | -0.038 |
They | -0.037 |
Ben | -0.034 |
ia | -0.033 |
Ben | -0.033 |
Token | scatter |
Token ID | 41058 |
Feature activation | +6.59e-03 |
Pos logit contributions | |
ge | +0.028 |
unnecessary | +0.028 |
single | +0.026 |
ging | +0.026 |
toward | +0.025 |
Neg logit contributions | |
They | -0.021 |
Ben | -0.020 |
ily | -0.020 |
not | -0.018 |
Ben | -0.017 |
Token | them |
Token ID | 606 |
Feature activation | +1.74e-02 |
Pos logit contributions | |
ge | +0.015 |
unnecessary | +0.014 |
shadow | +0.014 |
purchases | +0.014 |
reins | +0.014 |
Neg logit contributions | |
ily | -0.007 |
They | -0.006 |
Ben | -0.006 |
Tom | -0.005 |
Ben | -0.005 |
Token | the |
Token ID | 262 |
Feature activation | +5.78e-03 |
Pos logit contributions | |
ge | +0.011 |
purchases | +0.011 |
picking | +0.010 |
single | +0.010 |
reins | +0.010 |
Neg logit contributions | |
ily | -0.005 |
They | -0.004 |
Ben | -0.004 |
ia | -0.004 |
Ben | -0.004 |
Token | village |
Token ID | 7404 |
Feature activation | +1.28e-02 |
Pos logit contributions | |
ge | +0.010 |
purchases | +0.009 |
toward | +0.009 |
unnecessary | +0.009 |
single | +0.009 |
Neg logit contributions | |
ily | -0.009 |
Ben | -0.008 |
ia | -0.008 |
They | -0.008 |
Ben | -0.008 |
Token | back |
Token ID | 736 |
Feature activation | +1.50e-02 |
Pos logit contributions | |
unnecessary | +0.025 |
ge | +0.025 |
purchases | +0.024 |
ging | +0.024 |
single | +0.024 |
Neg logit contributions | |
ily | -0.014 |
They | -0.014 |
ia | -0.013 |
Ben | -0.012 |
era | -0.012 |
Token | to |
Token ID | 284 |
Feature activation | +5.13e-04 |
Pos logit contributions | |
ge | +0.025 |
ging | +0.025 |
purchases | +0.025 |
unnecessary | +0.025 |
single | +0.024 |
Neg logit contributions | |
ily | -0.021 |
They | -0.021 |
ia | -0.019 |
Ben | -0.019 |
ena | -0.018 |
Token | its |
Token ID | 663 |
Feature activation | -8.45e-03 |
Pos logit contributions | |
ge | +0.001 |
single | +0.001 |
unnecessary | +0.001 |
purchases | +0.001 |
shopping | +0.001 |
Neg logit contributions | |
ily | -0.001 |
ia | -0.001 |
They | -0.001 |
Ben | -0.001 |
ena | -0.001 |
Token | former |
Token ID | 1966 |
Feature activation | +9.38e-02 |
Pos logit contributions | |
ily | +0.013 |
ia | +0.013 |
They | +0.013 |
Ben | +0.013 |
era | +0.012 |
Neg logit contributions | |
ge | -0.013 |
ging | -0.013 |
picking | -0.012 |
use | -0.011 |
unnecessary | -0.011 |
Token | glory |
Token ID | 13476 |
Feature activation | -8.77e-03 |
Pos logit contributions | |
ging | +0.179 |
picking | +0.173 |
reins | +0.168 |
ge | +0.166 |
ogly | +0.162 |
Neg logit contributions | |
ia | -0.128 |
face | -0.101 |
der | -0.099 |
ily | -0.098 |
They | -0.094 |
Token | !" |
Token ID | 2474 |
Feature activation | -3.52e-02 |
Pos logit contributions | |
ily | +0.010 |
They | +0.010 |
ia | +0.009 |
ena | +0.009 |
Ben | +0.009 |
Neg logit contributions | |
unnecessary | -0.018 |
ge | -0.017 |
purchases | -0.017 |
single | -0.017 |
picking | -0.016 |
Token | |
Token ID | 220 |
Feature activation | -9.21e-03 |
Pos logit contributions | |
ily | +0.057 |
ia | +0.056 |
ena | +0.053 |
Ben | +0.052 |
una | +0.051 |
Neg logit contributions | |
down | -0.051 |
single | -0.051 |
aback | -0.050 |
shopping | -0.050 |
toward | -0.050 |
Token |
|
Token ID | 198 |
Feature activation | -8.80e-03 |
Pos logit contributions | |
ily | +0.010 |
ia | +0.009 |
ena | +0.009 |
Ben | +0.009 |
era | +0.009 |
Neg logit contributions | |
ge | -0.018 |
purchases | -0.018 |
single | -0.017 |
unnecessary | -0.017 |
shopping | -0.017 |
Token |
|
Token ID | 198 |
Feature activation | -9.10e-03 |
Pos logit contributions | |
ily | +0.010 |
ia | +0.009 |
ena | +0.009 |
era | +0.009 |
Ben | +0.008 |
Neg logit contributions | |
single | -0.017 |
ge | -0.017 |
purchases | -0.016 |
unnecessary | -0.016 |
shopping | -0.016 |
Token | back |
Token ID | 736 |
Feature activation | +2.04e-02 |
Pos logit contributions | |
ge | +0.014 |
single | +0.013 |
unnecessary | +0.013 |
use | +0.013 |
purchases | +0.013 |
Neg logit contributions | |
ily | -0.009 |
They | -0.008 |
Ben | -0.008 |
ia | -0.008 |
Ben | -0.007 |
Token | the |
Token ID | 262 |
Feature activation | +1.97e-02 |
Pos logit contributions | |
ge | +0.044 |
ging | +0.042 |
asy | +0.041 |
use | +0.041 |
unnecessary | +0.041 |
Neg logit contributions | |
ily | -0.022 |
They | -0.020 |
Ben | -0.019 |
ia | -0.017 |
ena | -0.017 |
Token | village |
Token ID | 7404 |
Feature activation | +8.10e-03 |
Pos logit contributions | |
ge | +0.034 |
toward | +0.031 |
unnecessary | +0.030 |
ging | +0.030 |
ening | +0.030 |
Neg logit contributions | |
ily | -0.031 |
Ben | -0.028 |
ia | -0.027 |
Ben | -0.026 |
They | -0.026 |
Token | to |
Token ID | 284 |
Feature activation | +2.43e-03 |
Pos logit contributions | |
unnecessary | +0.015 |
ge | +0.015 |
use | +0.015 |
ging | +0.014 |
single | +0.014 |
Neg logit contributions | |
ily | -0.010 |
They | -0.010 |
ia | -0.009 |
Ben | -0.009 |
era | -0.008 |
Token | its |
Token ID | 663 |
Feature activation | -7.08e-03 |
Pos logit contributions | |
ge | +0.004 |
single | +0.004 |
unnecessary | +0.004 |
purchases | +0.004 |
shopping | +0.004 |
Neg logit contributions | |
ily | -0.003 |
ia | -0.003 |
They | -0.003 |
Ben | -0.003 |
era | -0.003 |
Token | former |
Token ID | 1966 |
Feature activation | +9.31e-02 |
Pos logit contributions | |
ia | +0.011 |
ily | +0.011 |
Ben | +0.011 |
They | +0.010 |
not | +0.010 |
Neg logit contributions | |
ge | -0.011 |
ging | -0.011 |
use | -0.010 |
aback | -0.010 |
picking | -0.010 |
Token | state |
Token ID | 1181 |
Feature activation | -1.42e-02 |
Pos logit contributions | |
ging | +0.184 |
ge | +0.176 |
picking | +0.176 |
reins | +0.170 |
unnecessary | +0.167 |
Neg logit contributions | |
ia | -0.123 |
face | -0.097 |
ily | -0.093 |
der | -0.088 |
They | -0.087 |
Token | . |
Token ID | 13 |
Feature activation | -1.52e-02 |
Pos logit contributions | |
ily | +0.014 |
They | +0.013 |
ia | +0.013 |
ena | +0.012 |
Ben | +0.012 |
Neg logit contributions | |
unnecessary | -0.031 |
ge | -0.031 |
single | -0.030 |
purchases | -0.030 |
use | -0.029 |
Token | With |
Token ID | 2080 |
Feature activation | +1.01e-02 |
Pos logit contributions | |
ily | +0.020 |
ia | +0.018 |
Ben | +0.017 |
ena | +0.017 |
era | +0.017 |
Neg logit contributions | |
single | -0.029 |
purchases | -0.027 |
ge | -0.027 |
shopping | -0.026 |
unnecessary | -0.026 |
Token | the |
Token ID | 262 |
Feature activation | +2.11e-02 |
Pos logit contributions | |
ge | +0.023 |
unnecessary | +0.022 |
purchases | +0.021 |
use | +0.021 |
toward | +0.021 |
Neg logit contributions | |
ily | -0.010 |
They | -0.009 |
Ben | -0.009 |
ia | -0.008 |
Ben | -0.008 |
Token | help |
Token ID | 1037 |
Feature activation | +3.45e-02 |
Pos logit contributions | |
ge | +0.038 |
unnecessary | +0.034 |
toward | +0.034 |
purchases | +0.033 |
down | +0.032 |
Neg logit contributions | |
ily | -0.033 |
ia | -0.029 |
Ben | -0.028 |
Ben | -0.028 |
They | -0.027 |
Token | the |
Token ID | 262 |
Feature activation | +1.52e-02 |
Pos logit contributions | |
ge | +0.006 |
picking | +0.006 |
single | +0.006 |
purchases | +0.006 |
use | +0.006 |
Neg logit contributions | |
ily | -0.003 |
Ben | -0.003 |
ia | -0.003 |
They | -0.003 |
ena | -0.003 |
Token | heavy |
Token ID | 4334 |
Feature activation | +2.08e-02 |
Pos logit contributions | |
ge | +0.027 |
toward | +0.024 |
single | +0.024 |
unnecessary | +0.023 |
ging | +0.023 |
Neg logit contributions | |
ily | -0.024 |
Ben | -0.021 |
Ben | -0.020 |
ia | -0.020 |
twins | -0.020 |
Token | bag |
Token ID | 6131 |
Feature activation | +1.60e-02 |
Pos logit contributions | |
unnecessary | +0.035 |
ge | +0.033 |
picking | +0.032 |
shadow | +0.032 |
insignificant | +0.031 |
Neg logit contributions | |
ily | -0.031 |
Ben | -0.029 |
Ben | -0.028 |
ia | -0.028 |
room | -0.027 |
Token | had |
Token ID | 550 |
Feature activation | +9.71e-03 |
Pos logit contributions | |
unnecessary | +0.030 |
ge | +0.030 |
purchases | +0.030 |
single | +0.029 |
use | +0.028 |
Neg logit contributions | |
ily | -0.021 |
They | -0.019 |
ia | -0.018 |
Ben | -0.017 |
Ben | -0.017 |
Token | been |
Token ID | 587 |
Feature activation | -8.37e-03 |
Pos logit contributions | |
ge | +0.017 |
single | +0.017 |
unnecessary | +0.017 |
purchases | +0.016 |
shopping | +0.016 |
Neg logit contributions | |
ily | -0.014 |
They | -0.013 |
ia | -0.013 |
Ben | -0.012 |
ena | -0.012 |
Token | fore |
Token ID | 1674 |
Feature activation | +9.13e-02 |
Pos logit contributions | |
ily | +0.012 |
Ben | +0.010 |
They | +0.010 |
ena | +0.010 |
not | +0.010 |
Neg logit contributions | |
purchases | -0.015 |
reins | -0.015 |
unnecessary | -0.015 |
weights | -0.014 |
picking | -0.014 |
Token | shadow |
Token ID | 19106 |
Feature activation | -1.06e-02 |
Pos logit contributions | |
single | +0.073 |
purchases | +0.071 |
heels | +0.066 |
weights | +0.066 |
picking | +0.062 |
Neg logit contributions | |
They | -0.213 |
ily | -0.201 |
ena | -0.190 |
Luckily | -0.186 |
Ben | -0.186 |
Token | ed |
Token ID | 276 |
Feature activation | +3.96e-03 |
Pos logit contributions | |
ily | +0.018 |
They | +0.017 |
ia | +0.017 |
Ben | +0.017 |
era | +0.016 |
Neg logit contributions | |
ge | -0.015 |
single | -0.015 |
unnecessary | -0.014 |
purchases | -0.014 |
toward | -0.014 |
Token | , |
Token ID | 11 |
Feature activation | -1.65e-02 |
Pos logit contributions | |
purchases | +0.007 |
ge | +0.007 |
picking | +0.007 |
single | +0.007 |
reins | +0.007 |
Neg logit contributions | |
ily | -0.005 |
Ben | -0.005 |
They | -0.005 |
Tom | -0.005 |
ia | -0.004 |
Token | and |
Token ID | 290 |
Feature activation | -1.12e-02 |
Pos logit contributions | |
ily | +0.021 |
ia | +0.019 |
They | +0.018 |
Ben | +0.018 |
era | +0.018 |
Neg logit contributions | |
ge | -0.032 |
single | -0.032 |
unnecessary | -0.031 |
purchases | -0.031 |
toward | -0.030 |
Token | that |
Token ID | 326 |
Feature activation | +2.41e-03 |
Pos logit contributions | |
ily | +0.013 |
They | +0.012 |
ia | +0.012 |
Ben | +0.012 |
ena | +0.011 |
Neg logit contributions | |
ge | -0.022 |
unnecessary | -0.022 |
single | -0.022 |
purchases | -0.021 |
shopping | -0.021 |
Token | eyes |
Token ID | 2951 |
Feature activation | +3.82e-02 |
Pos logit contributions | |
ge | +0.005 |
single | +0.005 |
unnecessary | +0.005 |
purchases | +0.005 |
use | +0.005 |
Neg logit contributions | |
ily | -0.004 |
ia | -0.004 |
They | -0.003 |
era | -0.003 |
Ben | -0.003 |
Token | . |
Token ID | 13 |
Feature activation | -1.71e-03 |
Pos logit contributions | |
unnecessary | +0.079 |
purchases | +0.078 |
ge | +0.078 |
single | +0.077 |
use | +0.074 |
Neg logit contributions | |
ily | -0.042 |
They | -0.038 |
ia | -0.037 |
Ben | -0.035 |
era | -0.035 |
Token | She |
Token ID | 1375 |
Feature activation | -1.47e-02 |
Pos logit contributions | |
ily | +0.002 |
una | +0.002 |
ia | +0.002 |
ena | +0.002 |
They | +0.002 |
Neg logit contributions | |
single | -0.003 |
toward | -0.003 |
aback | -0.003 |
shopping | -0.003 |
down | -0.003 |
Token | had |
Token ID | 550 |
Feature activation | +1.11e-02 |
Pos logit contributions | |
ily | +0.020 |
ia | +0.018 |
They | +0.018 |
Ben | +0.017 |
era | +0.017 |
Neg logit contributions | |
ge | -0.026 |
single | -0.026 |
unnecessary | -0.026 |
purchases | -0.026 |
toward | -0.025 |
Token | been |
Token ID | 587 |
Feature activation | -5.73e-03 |
Pos logit contributions | |
single | +0.020 |
ge | +0.020 |
unnecessary | +0.019 |
purchases | +0.019 |
toward | +0.019 |
Neg logit contributions | |
ily | -0.015 |
ia | -0.014 |
They | -0.014 |
Ben | -0.013 |
era | -0.013 |
Token | fore |
Token ID | 1674 |
Feature activation | +9.12e-02 |
Pos logit contributions | |
ily | +0.008 |
ena | +0.008 |
Ben | +0.008 |
They | +0.007 |
era | +0.007 |
Neg logit contributions | |
purchases | -0.010 |
use | -0.010 |
ge | -0.010 |
reins | -0.010 |
allowance | -0.009 |
Token | shadow |
Token ID | 19106 |
Feature activation | -1.86e-02 |
Pos logit contributions | |
single | +0.074 |
purchases | +0.072 |
weights | +0.066 |
unnecessary | +0.064 |
heels | +0.064 |
Neg logit contributions | |
They | -0.210 |
ily | -0.195 |
ena | -0.192 |
era | -0.186 |
Ben | -0.184 |
Token | ed |
Token ID | 276 |
Feature activation | +3.58e-03 |
Pos logit contributions | |
ily | +0.035 |
They | +0.034 |
Ben | +0.033 |
ia | +0.033 |
Ben | +0.032 |
Neg logit contributions | |
ge | -0.022 |
single | -0.021 |
unnecessary | -0.021 |
toward | -0.020 |
ging | -0.020 |
Token | about |
Token ID | 546 |
Feature activation | -2.22e-02 |
Pos logit contributions | |
ge | +0.007 |
purchases | +0.007 |
unnecessary | +0.006 |
single | +0.006 |
picking | +0.006 |
Neg logit contributions | |
ily | -0.005 |
Ben | -0.004 |
They | -0.004 |
ia | -0.004 |
ena | -0.004 |
Token | the |
Token ID | 262 |
Feature activation | +5.41e-03 |
Pos logit contributions | |
ily | +0.027 |
They | +0.024 |
ia | +0.024 |
Ben | +0.023 |
ena | +0.023 |
Neg logit contributions | |
ge | -0.043 |
single | -0.043 |
unnecessary | -0.043 |
purchases | -0.042 |
toward | -0.041 |
Token | power |
Token ID | 1176 |
Feature activation | +8.85e-04 |
Pos logit contributions | |
ge | +0.009 |
unnecessary | +0.009 |
purchases | +0.008 |
toward | +0.008 |
single | +0.008 |
Neg logit contributions | |
ily | -0.008 |
ia | -0.008 |
Ben | -0.008 |
Ben | -0.008 |
ena | -0.007 |
Token | if |
Token ID | 611 |
Feature activation | +8.97e-03 |
Pos logit contributions | |
unnecessary | +0.056 |
single | +0.052 |
heels | +0.051 |
saddle | +0.051 |
ge | +0.051 |
Neg logit contributions | |
ily | -0.035 |
Ben | -0.032 |
They | -0.031 |
ia | -0.029 |
Tom | -0.028 |
Token | she |
Token ID | 673 |
Feature activation | +4.82e-02 |
Pos logit contributions | |
shopping | +0.017 |
purchases | +0.017 |
single | +0.017 |
unnecessary | +0.017 |
picking | +0.017 |
Neg logit contributions | |
ily | -0.010 |
ia | -0.010 |
They | -0.010 |
era | -0.010 |
ena | -0.009 |
Token | could |
Token ID | 714 |
Feature activation | +1.14e-02 |
Pos logit contributions | |
ge | +0.096 |
single | +0.095 |
unnecessary | +0.095 |
purchases | +0.094 |
shopping | +0.091 |
Neg logit contributions | |
ily | -0.057 |
They | -0.051 |
ia | -0.051 |
Ben | -0.048 |
ena | -0.048 |
Token | request |
Token ID | 2581 |
Feature activation | -9.87e-04 |
Pos logit contributions | |
unnecessary | +0.019 |
ge | +0.018 |
single | +0.018 |
purchases | +0.017 |
ging | +0.017 |
Neg logit contributions | |
ily | -0.018 |
They | -0.017 |
ia | -0.016 |
Ben | -0.016 |
Ben | -0.016 |
Token | some |
Token ID | 617 |
Feature activation | +2.34e-02 |
Pos logit contributions | |
ily | +0.001 |
They | +0.001 |
Ben | +0.001 |
Tom | +0.001 |
ia | +0.001 |
Neg logit contributions | |
ge | -0.002 |
unnecessary | -0.002 |
single | -0.002 |
purchases | -0.002 |
ging | -0.002 |
Token | av |
Token ID | 1196 |
Feature activation | +8.98e-02 |
Pos logit contributions | |
ge | +0.045 |
unnecessary | +0.044 |
ging | +0.043 |
z | +0.042 |
toward | +0.042 |
Neg logit contributions | |
ily | -0.033 |
They | -0.027 |
ia | -0.026 |
Ben | -0.025 |
Everyone | -0.024 |
Token | oc |
Token ID | 420 |
Feature activation | +2.54e-02 |
Pos logit contributions | |
unnecessary | +0.116 |
toward | +0.109 |
heels | +0.109 |
shopping | +0.103 |
ge | +0.102 |
Neg logit contributions | |
They | -0.168 |
ily | -0.155 |
Ben | -0.153 |
aw | -0.148 |
una | -0.148 |
Token | ados |
Token ID | 22484 |
Feature activation | +2.38e-02 |
Pos logit contributions | |
single | +0.040 |
unnecessary | +0.040 |
ge | +0.040 |
toward | +0.038 |
fy | +0.037 |
Neg logit contributions | |
ily | -0.041 |
ia | -0.037 |
They | -0.035 |
room | -0.035 |
era | -0.035 |
Token | from |
Token ID | 422 |
Feature activation | +5.38e-03 |
Pos logit contributions | |
unnecessary | +0.048 |
single | +0.046 |
ge | +0.045 |
purchases | +0.045 |
ging | +0.044 |
Neg logit contributions | |
ily | -0.031 |
They | -0.027 |
Ben | -0.027 |
ia | -0.026 |
era | -0.024 |
Token | the |
Token ID | 262 |
Feature activation | -1.56e-03 |
Pos logit contributions | |
ge | +0.012 |
unnecessary | +0.012 |
single | +0.012 |
fy | +0.011 |
purchases | +0.011 |
Neg logit contributions | |
ily | -0.006 |
Ben | -0.005 |
They | -0.005 |
ena | -0.005 |
Ben | -0.005 |
Token | store |
Token ID | 3650 |
Feature activation | +2.28e-02 |
Pos logit contributions | |
ily | +0.002 |
Ben | +0.002 |
twins | +0.002 |
Ben | +0.002 |
Tom | +0.001 |
Neg logit contributions | |
ge | -0.003 |
ening | -0.003 |
run | -0.003 |
unnecessary | -0.003 |
ging | -0.003 |
Token |
|
Token ID | 198 |
Feature activation | +6.25e-03 |
Pos logit contributions | |
single | +0.024 |
ge | +0.024 |
purchases | +0.023 |
unnecessary | +0.023 |
toward | +0.023 |
Neg logit contributions | |
ily | -0.011 |
ia | -0.010 |
era | -0.009 |
ena | -0.009 |
They | -0.009 |
Token | The |
Token ID | 464 |
Feature activation | +1.47e-02 |
Pos logit contributions | |
single | +0.014 |
ge | +0.014 |
purchases | +0.013 |
unnecessary | +0.013 |
toward | +0.013 |
Neg logit contributions | |
ily | -0.006 |
ia | -0.005 |
era | -0.005 |
ena | -0.005 |
Ben | -0.005 |
Token | girl |
Token ID | 2576 |
Feature activation | -1.28e-02 |
Pos logit contributions | |
ge | +0.028 |
unnecessary | +0.028 |
purchases | +0.027 |
single | +0.027 |
use | +0.027 |
Neg logit contributions | |
ily | -0.020 |
ia | -0.018 |
Ben | -0.017 |
Ben | -0.017 |
ena | -0.017 |
Token | had |
Token ID | 550 |
Feature activation | +1.61e-02 |
Pos logit contributions | |
ily | +0.016 |
They | +0.014 |
ia | +0.014 |
Ben | +0.014 |
era | +0.013 |
Neg logit contributions | |
ge | -0.025 |
single | -0.025 |
purchases | -0.024 |
unnecessary | -0.024 |
toward | -0.023 |
Token | been |
Token ID | 587 |
Feature activation | +2.28e-04 |
Pos logit contributions | |
ge | +0.029 |
single | +0.029 |
unnecessary | +0.029 |
purchases | +0.029 |
shopping | +0.028 |
Neg logit contributions | |
ily | -0.022 |
They | -0.020 |
ia | -0.020 |
Ben | -0.019 |
ena | -0.019 |
Token | fore |
Token ID | 1674 |
Feature activation | +8.89e-02 |
Pos logit contributions | |
ge | +0.000 |
weights | +0.000 |
purchases | +0.000 |
use | +0.000 |
unnecessary | +0.000 |
Neg logit contributions | |
ily | -0.000 |
ena | -0.000 |
Ben | -0.000 |
They | -0.000 |
not | -0.000 |
Token | shadow |
Token ID | 19106 |
Feature activation | -1.34e-02 |
Pos logit contributions | |
single | +0.071 |
weights | +0.067 |
purchases | +0.066 |
heels | +0.066 |
unnecessary | +0.065 |
Neg logit contributions | |
They | -0.207 |
ily | -0.196 |
ena | -0.189 |
era | -0.181 |
Luckily | -0.181 |
Token | ed |
Token ID | 276 |
Feature activation | +6.68e-03 |
Pos logit contributions | |
ily | +0.026 |
They | +0.025 |
ia | +0.025 |
Ben | +0.025 |
ena | +0.024 |
Neg logit contributions | |
ge | -0.015 |
single | -0.015 |
unnecessary | -0.015 |
toward | -0.014 |
ging | -0.014 |
Token | . |
Token ID | 13 |
Feature activation | +6.14e-03 |
Pos logit contributions | |
ge | +0.013 |
purchases | +0.012 |
unnecessary | +0.012 |
picking | +0.012 |
single | +0.012 |
Neg logit contributions | |
ily | -0.009 |
Ben | -0.008 |
ia | -0.008 |
They | -0.008 |
ena | -0.007 |
Token | Even |
Token ID | 3412 |
Feature activation | +1.18e-02 |
Pos logit contributions | |
single | +0.012 |
ge | +0.012 |
purchases | +0.012 |
unnecessary | +0.012 |
toward | +0.012 |
Neg logit contributions | |
ily | -0.007 |
They | -0.006 |
ia | -0.006 |
Ben | -0.006 |
era | -0.006 |
Token | though |
Token ID | 996 |
Feature activation | -1.75e-02 |
Pos logit contributions | |
purchases | +0.022 |
use | +0.021 |
unnecessary | +0.021 |
picking | +0.021 |
ge | +0.020 |
Neg logit contributions | |
ily | -0.016 |
Ben | -0.016 |
They | -0.014 |
who | -0.014 |
ena | -0.014 |
Token | know |
Token ID | 760 |
Feature activation | +6.87e-03 |
Pos logit contributions | |
ily | +0.006 |
They | +0.006 |
ia | +0.006 |
ena | +0.005 |
era | +0.005 |
Neg logit contributions | |
unnecessary | -0.008 |
ge | -0.008 |
single | -0.008 |
purchases | -0.008 |
ging | -0.008 |
Token | , |
Token ID | 11 |
Feature activation | -1.10e-02 |
Pos logit contributions | |
ge | +0.013 |
single | +0.013 |
unnecessary | +0.013 |
purchases | +0.013 |
shopping | +0.013 |
Neg logit contributions | |
ily | -0.008 |
ia | -0.007 |
They | -0.007 |
Ben | -0.007 |
era | -0.007 |
Token | her |
Token ID | 607 |
Feature activation | -1.03e-03 |
Pos logit contributions | |
ily | +0.013 |
They | +0.012 |
ia | +0.012 |
Ben | +0.011 |
ena | +0.011 |
Neg logit contributions | |
ge | -0.022 |
unnecessary | -0.022 |
single | -0.021 |
purchases | -0.021 |
use | -0.021 |
Token | mum |
Token ID | 25682 |
Feature activation | +2.28e-02 |
Pos logit contributions | |
ia | +0.001 |
ily | +0.001 |
era | +0.001 |
Ben | +0.001 |
ena | +0.001 |
Neg logit contributions | |
ge | -0.002 |
use | -0.002 |
unnecessary | -0.002 |
ging | -0.002 |
picking | -0.002 |
Token | was |
Token ID | 373 |
Feature activation | -8.50e-03 |
Pos logit contributions | |
unnecessary | +0.048 |
ge | +0.047 |
single | +0.047 |
purchases | +0.046 |
toward | +0.044 |
Neg logit contributions | |
ily | -0.025 |
They | -0.023 |
ia | -0.022 |
ena | -0.022 |
Ben | -0.021 |
Token | fore |
Token ID | 1674 |
Feature activation | +8.89e-02 |
Pos logit contributions | |
ily | +0.013 |
ena | +0.012 |
They | +0.012 |
Ben | +0.011 |
ia | +0.011 |
Neg logit contributions | |
unnecessary | -0.015 |
use | -0.015 |
ge | -0.015 |
purchases | -0.014 |
picking | -0.014 |
Token | shadow |
Token ID | 19106 |
Feature activation | -2.41e-02 |
Pos logit contributions | |
single | +0.074 |
unnecessary | +0.073 |
heels | +0.073 |
weights | +0.071 |
purchases | +0.071 |
Neg logit contributions | |
They | -0.203 |
ily | -0.187 |
ena | -0.183 |
era | -0.177 |
Luckily | -0.176 |
Token | ing |
Token ID | 278 |
Feature activation | +4.33e-03 |
Pos logit contributions | |
ily | +0.044 |
They | +0.042 |
ia | +0.042 |
Ben | +0.042 |
Ben | +0.040 |
Neg logit contributions | |
ge | -0.030 |
single | -0.029 |
unnecessary | -0.029 |
ging | -0.028 |
toward | -0.028 |
Token | ... |
Token ID | 986 |
Feature activation | -2.22e-02 |
Pos logit contributions | |
ge | +0.008 |
unnecessary | +0.008 |
purchases | +0.008 |
single | +0.008 |
use | +0.008 |
Neg logit contributions | |
ily | -0.006 |
They | -0.005 |
ia | -0.005 |
Ben | -0.005 |
ena | -0.005 |
Token | the |
Token ID | 1169 |
Feature activation | +1.53e-04 |
Pos logit contributions | |
ily | +0.032 |
They | +0.030 |
ena | +0.027 |
Ben | +0.027 |
ia | +0.027 |
Neg logit contributions | |
unnecessary | -0.040 |
ge | -0.039 |
purchases | -0.039 |
single | -0.038 |
use | -0.037 |
Token | next |
Token ID | 1306 |
Feature activation | +3.29e-02 |
Pos logit contributions | |
ge | +0.000 |
unnecessary | +0.000 |
purchases | +0.000 |
single | +0.000 |
use | +0.000 |
Neg logit contributions | |
ily | -0.000 |
ia | -0.000 |
They | -0.000 |
Ben | -0.000 |
era | -0.000 |
Token | yard |
Token ID | 12699 |
Feature activation | -2.43e-02 |
Pos logit contributions | |
ily | +0.023 |
ia | +0.022 |
They | +0.021 |
Ben | +0.021 |
Ben | +0.021 |
Neg logit contributions | |
unnecessary | -0.023 |
ge | -0.022 |
purchases | -0.022 |
single | -0.021 |
ging | -0.021 |
Token | . |
Token ID | 13 |
Feature activation | -4.18e-03 |
Pos logit contributions | |
ily | +0.030 |
They | +0.027 |
ia | +0.027 |
Ben | +0.026 |
ena | +0.025 |
Neg logit contributions | |
ge | -0.047 |
single | -0.047 |
purchases | -0.046 |
unnecessary | -0.046 |
shopping | -0.045 |
Token | He |
Token ID | 679 |
Feature activation | -1.59e-02 |
Pos logit contributions | |
ily | +0.005 |
They | +0.005 |
ia | +0.005 |
Ben | +0.005 |
ena | +0.005 |
Neg logit contributions | |
ge | -0.008 |
unnecessary | -0.008 |
single | -0.008 |
purchases | -0.008 |
use | -0.007 |
Token | had |
Token ID | 550 |
Feature activation | +1.17e-02 |
Pos logit contributions | |
ily | +0.020 |
They | +0.018 |
ia | +0.018 |
Ben | +0.017 |
ena | +0.017 |
Neg logit contributions | |
ge | -0.030 |
single | -0.030 |
unnecessary | -0.030 |
purchases | -0.030 |
shopping | -0.029 |
Token | been |
Token ID | 587 |
Feature activation | -3.95e-03 |
Pos logit contributions | |
ge | +0.023 |
single | +0.023 |
purchases | +0.023 |
unnecessary | +0.023 |
ging | +0.022 |
Neg logit contributions | |
ily | -0.014 |
They | -0.013 |
ia | -0.012 |
Ben | -0.012 |
Ben | -0.012 |
Token | fore |
Token ID | 1674 |
Feature activation | +8.82e-02 |
Pos logit contributions | |
ily | +0.006 |
not | +0.005 |
Ben | +0.005 |
ena | +0.005 |
twins | +0.005 |
Neg logit contributions | |
purchases | -0.007 |
ge | -0.007 |
use | -0.007 |
weights | -0.007 |
toward | -0.007 |
Token | shadow |
Token ID | 19106 |
Feature activation | -1.81e-02 |
Pos logit contributions | |
single | +0.069 |
purchases | +0.065 |
weights | +0.060 |
heels | +0.060 |
unnecessary | +0.057 |
Neg logit contributions | |
They | -0.208 |
ily | -0.197 |
ena | -0.189 |
Ben | -0.184 |
era | -0.182 |
Token | ed |
Token ID | 276 |
Feature activation | +3.14e-03 |
Pos logit contributions | |
ily | +0.036 |
They | +0.035 |
ia | +0.034 |
Ben | +0.034 |
era | +0.033 |
Neg logit contributions | |
ge | -0.020 |
single | -0.019 |
unnecessary | -0.019 |
ging | -0.018 |
toward | -0.018 |
Token | about |
Token ID | 546 |
Feature activation | -1.80e-02 |
Pos logit contributions | |
purchases | +0.006 |
ge | +0.006 |
picking | +0.006 |
single | +0.006 |
unnecessary | +0.006 |
Neg logit contributions | |
ily | -0.004 |
Ben | -0.004 |
Tom | -0.003 |
They | -0.003 |
ia | -0.003 |
Token | this |
Token ID | 428 |
Feature activation | +7.14e-04 |
Pos logit contributions | |
ily | +0.019 |
Ben | +0.017 |
They | +0.017 |
ia | +0.016 |
ena | +0.016 |
Neg logit contributions | |
single | -0.037 |
ge | -0.037 |
unnecessary | -0.037 |
use | -0.037 |
purchases | -0.036 |
Token | bad |
Token ID | 2089 |
Feature activation | +1.46e-02 |
Pos logit contributions | |
unnecessary | +0.001 |
ge | +0.001 |
purchases | +0.001 |
picking | +0.001 |
ging | +0.001 |
Neg logit contributions | |
ily | -0.001 |
ia | -0.001 |
Ben | -0.001 |
ena | -0.001 |
Ben | -0.001 |
Token | . |
Token ID | 13 |
Feature activation | +8.18e-03 |
Pos logit contributions | |
ily | +0.010 |
They | +0.009 |
ia | +0.009 |
Ben | +0.008 |
Ben | +0.008 |
Neg logit contributions | |
unnecessary | -0.019 |
ge | -0.019 |
purchases | -0.018 |
single | -0.018 |
use | -0.018 |
Token | Mom |
Token ID | 11254 |
Feature activation | +8.26e-03 |
Pos logit contributions | |
single | +0.017 |
purchases | +0.016 |
ge | +0.016 |
unnecessary | +0.016 |
toward | +0.016 |
Neg logit contributions | |
ily | -0.009 |
They | -0.008 |
ia | -0.008 |
ena | -0.008 |
Ben | -0.008 |
Token | says |
Token ID | 1139 |
Feature activation | +1.37e-02 |
Pos logit contributions | |
unnecessary | +0.016 |
ge | +0.016 |
single | +0.016 |
purchases | +0.015 |
ging | +0.015 |
Neg logit contributions | |
ily | -0.011 |
ia | -0.010 |
They | -0.010 |
Ben | -0.009 |
ena | -0.009 |
Token | they |
Token ID | 484 |
Feature activation | +3.34e-02 |
Pos logit contributions | |
ge | +0.026 |
unnecessary | +0.025 |
single | +0.025 |
purchases | +0.025 |
toward | +0.024 |
Neg logit contributions | |
ily | -0.018 |
ia | -0.017 |
They | -0.017 |
Ben | -0.016 |
Ben | -0.016 |
Token | are |
Token ID | 389 |
Feature activation | -1.10e-02 |
Pos logit contributions | |
ge | +0.065 |
unnecessary | +0.063 |
single | +0.062 |
purchases | +0.061 |
ging | +0.061 |
Neg logit contributions | |
ily | -0.044 |
They | -0.040 |
Ben | -0.038 |
Ben | -0.037 |
ia | -0.036 |
Token | av |
Token ID | 1196 |
Feature activation | +8.82e-02 |
Pos logit contributions | |
ily | +0.017 |
Ben | +0.016 |
not | +0.015 |
They | +0.015 |
ia | +0.014 |
Neg logit contributions | |
ge | -0.022 |
unnecessary | -0.021 |
use | -0.020 |
single | -0.020 |
aback | -0.019 |
Token | oc |
Token ID | 420 |
Feature activation | +1.04e-02 |
Pos logit contributions | |
unnecessary | +0.110 |
toward | +0.108 |
ge | +0.102 |
insignificant | +0.101 |
shopping | +0.100 |
Neg logit contributions | |
They | -0.167 |
Ben | -0.158 |
ily | -0.153 |
Tom | -0.150 |
ena | -0.150 |
Token | ados |
Token ID | 22484 |
Feature activation | +1.40e-02 |
Pos logit contributions | |
single | +0.012 |
ge | +0.012 |
unnecessary | +0.012 |
purchases | +0.011 |
toward | +0.011 |
Neg logit contributions | |
ily | -0.021 |
ia | -0.020 |
They | -0.019 |
era | -0.019 |
ena | -0.019 |
Token | . |
Token ID | 13 |
Feature activation | +7.00e-03 |
Pos logit contributions | |
ge | +0.029 |
unnecessary | +0.029 |
single | +0.029 |
purchases | +0.029 |
toward | +0.028 |
Neg logit contributions | |
ily | -0.015 |
They | -0.013 |
ia | -0.013 |
Ben | -0.012 |
ena | -0.012 |
Token | Sara |
Token ID | 24799 |
Feature activation | -6.15e-03 |
Pos logit contributions | |
single | +0.014 |
ge | +0.014 |
purchases | +0.014 |
unnecessary | +0.014 |
shopping | +0.013 |
Neg logit contributions | |
ily | -0.008 |
ia | -0.007 |
They | -0.007 |
ena | -0.007 |
Ben | -0.007 |
Token | wants |
Token ID | 3382 |
Feature activation | -3.79e-03 |
Pos logit contributions | |
ily | +0.010 |
They | +0.010 |
ia | +0.010 |
Ben | +0.009 |
ena | +0.009 |
Neg logit contributions | |
ge | -0.009 |
single | -0.009 |
purchases | -0.009 |
unnecessary | -0.009 |
shopping | -0.008 |
Token | still |
Token ID | 991 |
Feature activation | -1.10e-02 |
Pos logit contributions | |
ge | +0.030 |
unnecessary | +0.029 |
purchases | +0.029 |
single | +0.029 |
use | +0.028 |
Neg logit contributions | |
ily | -0.014 |
Ben | -0.012 |
ia | -0.012 |
They | -0.011 |
Ben | -0.011 |
Token | couldn |
Token ID | 3521 |
Feature activation | -1.47e-02 |
Pos logit contributions | |
ily | +0.014 |
ia | +0.012 |
Ben | +0.012 |
They | +0.012 |
ena | +0.012 |
Neg logit contributions | |
purchases | -0.021 |
ge | -0.021 |
unnecessary | -0.021 |
single | -0.021 |
picking | -0.020 |
Token | 't |
Token ID | 470 |
Feature activation | -1.41e-02 |
Pos logit contributions | |
ily | +0.018 |
ia | +0.016 |
They | +0.016 |
ena | +0.016 |
Ben | +0.016 |
Neg logit contributions | |
single | -0.029 |
ge | -0.028 |
unnecessary | -0.028 |
purchases | -0.027 |
shopping | -0.027 |
Token | find |
Token ID | 1064 |
Feature activation | +4.57e-03 |
Pos logit contributions | |
ily | +0.027 |
They | +0.025 |
ia | +0.025 |
Ben | +0.024 |
ena | +0.024 |
Neg logit contributions | |
ge | -0.018 |
single | -0.018 |
unnecessary | -0.017 |
purchases | -0.017 |
toward | -0.017 |
Token | any |
Token ID | 597 |
Feature activation | +1.79e-02 |
Pos logit contributions | |
ge | +0.010 |
purchases | +0.010 |
use | +0.009 |
single | +0.009 |
ging | +0.009 |
Neg logit contributions | |
ily | -0.005 |
Ben | -0.005 |
ia | -0.004 |
Tom | -0.004 |
ena | -0.004 |
Token | av |
Token ID | 1196 |
Feature activation | +8.77e-02 |
Pos logit contributions | |
picking | +0.034 |
ging | +0.033 |
use | +0.033 |
purchases | +0.033 |
z | +0.032 |
Neg logit contributions | |
ily | -0.023 |
ia | -0.021 |
Ben | -0.020 |
They | -0.018 |
who | -0.018 |
Token | oc |
Token ID | 420 |
Feature activation | +2.41e-02 |
Pos logit contributions | |
purchases | +0.117 |
shopping | +0.114 |
matt | +0.111 |
toward | +0.105 |
unnecessary | +0.105 |
Neg logit contributions | |
aw | -0.151 |
They | -0.146 |
Ben | -0.134 |
ily | -0.132 |
Tom | -0.131 |
Token | ados |
Token ID | 22484 |
Feature activation | +1.77e-02 |
Pos logit contributions | |
single | +0.039 |
ge | +0.038 |
suit | +0.038 |
unnecessary | +0.037 |
toward | +0.037 |
Neg logit contributions | |
ily | -0.035 |
aw | -0.033 |
der | -0.032 |
ia | -0.031 |
room | -0.030 |
Token | . |
Token ID | 13 |
Feature activation | -2.42e-03 |
Pos logit contributions | |
unnecessary | +0.040 |
single | +0.039 |
purchases | +0.039 |
ge | +0.037 |
insignificant | +0.037 |
Neg logit contributions | |
ily | -0.018 |
They | -0.015 |
Ben | -0.014 |
ia | -0.014 |
ena | -0.013 |
Token | Finally |
Token ID | 9461 |
Feature activation | +1.61e-02 |
Pos logit contributions | |
ily | +0.003 |
ia | +0.003 |
They | +0.003 |
ena | +0.003 |
Ben | +0.003 |
Neg logit contributions | |
single | -0.004 |
ge | -0.004 |
unnecessary | -0.004 |
purchases | -0.004 |
shopping | -0.004 |
Token | , |
Token ID | 11 |
Feature activation | -1.72e-03 |
Pos logit contributions | |
ge | +0.028 |
single | +0.028 |
unnecessary | +0.027 |
purchases | +0.027 |
shopping | +0.026 |
Neg logit contributions | |
ily | -0.023 |
They | -0.021 |
ia | -0.021 |
Ben | -0.020 |
ena | -0.020 |
Token | they |
Token ID | 484 |
Feature activation | +4.40e-02 |
Pos logit contributions | |
ily | +0.013 |
Ben | +0.012 |
era | +0.012 |
ia | +0.012 |
They | +0.012 |
Neg logit contributions | |
ge | -0.025 |
single | -0.024 |
shopping | -0.024 |
purchases | -0.024 |
down | -0.024 |
Token | had |
Token ID | 550 |
Feature activation | +2.69e-02 |
Pos logit contributions | |
ge | +0.071 |
single | +0.069 |
unnecessary | +0.068 |
purchases | +0.067 |
anywhere | +0.066 |
Neg logit contributions | |
ily | -0.065 |
ia | -0.065 |
era | -0.061 |
Ben | -0.060 |
una | -0.060 |
Token | never |
Token ID | 1239 |
Feature activation | +3.78e-02 |
Pos logit contributions | |
ge | +0.055 |
unnecessary | +0.054 |
single | +0.054 |
purchases | +0.054 |
shopping | +0.052 |
Neg logit contributions | |
ily | -0.030 |
ia | -0.027 |
They | -0.027 |
Ben | -0.026 |
era | -0.025 |
Token | seen |
Token ID | 1775 |
Feature activation | -2.28e-02 |
Pos logit contributions | |
single | +0.069 |
unnecessary | +0.069 |
purchases | +0.068 |
ge | +0.068 |
ging | +0.066 |
Neg logit contributions | |
ily | -0.052 |
They | -0.045 |
ia | -0.045 |
Ben | -0.044 |
ena | -0.044 |
Token | the |
Token ID | 262 |
Feature activation | +2.09e-03 |
Pos logit contributions | |
ily | +0.028 |
Ben | +0.025 |
They | +0.025 |
ia | +0.024 |
Ben | +0.023 |
Neg logit contributions | |
ge | -0.045 |
single | -0.044 |
unnecessary | -0.044 |
purchases | -0.044 |
toward | -0.043 |
Token | av |
Token ID | 1196 |
Feature activation | +8.72e-02 |
Pos logit contributions | |
ge | +0.004 |
unnecessary | +0.004 |
purchases | +0.003 |
single | +0.003 |
toward | +0.003 |
Neg logit contributions | |
ily | -0.003 |
Ben | -0.003 |
ia | -0.003 |
They | -0.003 |
Ben | -0.003 |
Token | oc |
Token ID | 420 |
Feature activation | +2.48e-02 |
Pos logit contributions | |
toward | +0.125 |
unnecessary | +0.124 |
purchases | +0.123 |
ried | +0.123 |
heels | +0.121 |
Neg logit contributions | |
They | -0.134 |
Ben | -0.126 |
aw | -0.123 |
Tom | -0.123 |
Sam | -0.117 |
Token | ados |
Token ID | 22484 |
Feature activation | +1.83e-02 |
Pos logit contributions | |
unnecessary | +0.040 |
ge | +0.039 |
fy | +0.038 |
single | +0.037 |
proper | +0.036 |
Neg logit contributions | |
ily | -0.037 |
ia | -0.035 |
der | -0.035 |
era | -0.034 |
room | -0.034 |
Token | . |
Token ID | 13 |
Feature activation | +5.99e-03 |
Pos logit contributions | |
unnecessary | +0.038 |
purchases | +0.037 |
single | +0.036 |
ge | +0.036 |
picking | +0.035 |
Neg logit contributions | |
ily | -0.022 |
Ben | -0.019 |
They | -0.019 |
ia | -0.018 |
Ben | -0.017 |
Token | They |
Token ID | 1119 |
Feature activation | -2.56e-02 |
Pos logit contributions | |
unnecessary | +0.011 |
single | +0.010 |
budget | +0.010 |
purchases | +0.010 |
allowance | +0.010 |
Neg logit contributions | |
ily | -0.008 |
ia | -0.007 |
ickets | -0.007 |
una | -0.007 |
iv | -0.007 |
Token | wished |
Token ID | 16555 |
Feature activation | -1.39e-02 |
Pos logit contributions | |
ickets | +0.048 |
ia | +0.047 |
room | +0.047 |
ily | +0.046 |
una | +0.045 |
Neg logit contributions | |
bank | -0.032 |
heels | -0.030 |
ge | -0.030 |
pouch | -0.029 |
unnecessary | -0.028 |
Token | them |
Token ID | 606 |
Feature activation | +2.04e-03 |
Pos logit contributions | |
ily | +0.006 |
They | +0.005 |
Ben | +0.005 |
ia | +0.005 |
ena | +0.005 |
Neg logit contributions | |
ge | -0.010 |
single | -0.010 |
unnecessary | -0.010 |
use | -0.010 |
picking | -0.010 |
Token | . |
Token ID | 13 |
Feature activation | -2.94e-03 |
Pos logit contributions | |
unnecessary | +0.005 |
single | +0.005 |
ge | +0.005 |
purchases | +0.004 |
ging | +0.004 |
Neg logit contributions | |
ily | -0.002 |
They | -0.002 |
Ben | -0.001 |
ia | -0.001 |
ena | -0.001 |
Token | It |
Token ID | 632 |
Feature activation | -5.41e-03 |
Pos logit contributions | |
ily | +0.004 |
ia | +0.003 |
era | +0.003 |
They | +0.003 |
ena | +0.003 |
Neg logit contributions | |
single | -0.005 |
purchases | -0.005 |
ge | -0.005 |
shopping | -0.005 |
toward | -0.005 |
Token | had |
Token ID | 550 |
Feature activation | +8.95e-03 |
Pos logit contributions | |
ily | +0.006 |
They | +0.005 |
ia | +0.005 |
Ben | +0.005 |
ena | +0.005 |
Neg logit contributions | |
ge | -0.011 |
unnecessary | -0.011 |
single | -0.011 |
purchases | -0.011 |
shopping | -0.011 |
Token | a |
Token ID | 257 |
Feature activation | +1.23e-02 |
Pos logit contributions | |
single | +0.017 |
ge | +0.017 |
unnecessary | +0.017 |
purchases | +0.017 |
shopping | +0.016 |
Neg logit contributions | |
ily | -0.011 |
ia | -0.010 |
They | -0.010 |
Ben | -0.010 |
era | -0.010 |
Token | convey |
Token ID | 13878 |
Feature activation | +8.68e-02 |
Pos logit contributions | |
ge | +0.022 |
unnecessary | +0.021 |
picking | +0.021 |
ried | +0.020 |
ging | +0.020 |
Neg logit contributions | |
They | -0.016 |
ily | -0.016 |
ia | -0.016 |
Ben | -0.016 |
Ben | -0.016 |
Token | or |
Token ID | 273 |
Feature activation | +1.59e-02 |
Pos logit contributions | |
unnecessary | +0.137 |
purchases | +0.125 |
single | +0.123 |
heels | +0.122 |
insignificant | +0.120 |
Neg logit contributions | |
ily | -0.157 |
They | -0.138 |
ena | -0.135 |
room | -0.130 |
Tom | -0.128 |
Token | belt |
Token ID | 10999 |
Feature activation | +2.32e-02 |
Pos logit contributions | |
unnecessary | +0.024 |
single | +0.024 |
picking | +0.024 |
purchases | +0.023 |
use | +0.023 |
Neg logit contributions | |
ily | -0.026 |
ena | -0.025 |
They | -0.023 |
ia | -0.023 |
room | -0.022 |
Token | , |
Token ID | 11 |
Feature activation | +4.20e-03 |
Pos logit contributions | |
unnecessary | +0.048 |
ge | +0.047 |
purchases | +0.047 |
single | +0.046 |
use | +0.045 |
Neg logit contributions | |
ily | -0.027 |
They | -0.024 |
ia | -0.023 |
ena | -0.022 |
Ben | -0.022 |
Token | a |
Token ID | 257 |
Feature activation | -5.30e-03 |
Pos logit contributions | |
ge | +0.008 |
single | +0.008 |
unnecessary | +0.008 |
purchases | +0.008 |
toward | +0.008 |
Neg logit contributions | |
ily | -0.005 |
They | -0.004 |
ia | -0.004 |
Ben | -0.004 |
ena | -0.004 |
Token | crane |
Token ID | 41175 |
Feature activation | -6.05e-03 |
Pos logit contributions | |
They | +0.007 |
ily | +0.007 |
Ben | +0.007 |
Ben | +0.007 |
ia | +0.007 |
Neg logit contributions | |
ge | -0.009 |
picking | -0.009 |
unnecessary | -0.009 |
ried | -0.009 |
purchases | -0.009 |
Token | see |
Token ID | 766 |
Feature activation | -1.25e-02 |
Pos logit contributions | |
ily | +0.005 |
They | +0.005 |
ia | +0.005 |
Ben | +0.005 |
Ben | +0.005 |
Neg logit contributions | |
ge | -0.006 |
unnecessary | -0.006 |
single | -0.006 |
ging | -0.006 |
purchases | -0.006 |
Token | the |
Token ID | 262 |
Feature activation | +3.42e-03 |
Pos logit contributions | |
ily | +0.014 |
They | +0.013 |
ia | +0.013 |
Ben | +0.012 |
Ben | +0.012 |
Neg logit contributions | |
ge | -0.026 |
single | -0.025 |
unnecessary | -0.025 |
purchases | -0.025 |
use | -0.024 |
Token | bird |
Token ID | 6512 |
Feature activation | -3.58e-03 |
Pos logit contributions | |
single | +0.006 |
unnecessary | +0.006 |
ge | +0.006 |
purchases | +0.005 |
shopping | +0.005 |
Neg logit contributions | |
ily | -0.005 |
ia | -0.005 |
They | -0.005 |
ena | -0.005 |
Ben | -0.005 |
Token | and |
Token ID | 290 |
Feature activation | +1.65e-03 |
Pos logit contributions | |
ily | +0.004 |
They | +0.004 |
ia | +0.004 |
Ben | +0.003 |
era | +0.003 |
Neg logit contributions | |
unnecessary | -0.007 |
purchases | -0.007 |
ge | -0.007 |
single | -0.007 |
weights | -0.007 |
Token | have |
Token ID | 423 |
Feature activation | +8.23e-03 |
Pos logit contributions | |
ge | +0.003 |
unnecessary | +0.003 |
single | +0.003 |
purchases | +0.003 |
ging | +0.003 |
Neg logit contributions | |
ily | -0.002 |
They | -0.002 |
ia | -0.002 |
Ben | -0.002 |
Ben | -0.002 |
Token | pic |
Token ID | 8301 |
Feature activation | +8.66e-02 |
Pos logit contributions | |
unnecessary | +0.014 |
single | +0.014 |
purchases | +0.014 |
ge | +0.014 |
shopping | +0.014 |
Neg logit contributions | |
ily | -0.012 |
ia | -0.011 |
They | -0.010 |
era | -0.010 |
Ben | -0.010 |
Token | n |
Token ID | 77 |
Feature activation | -5.97e-03 |
Pos logit contributions | |
ge | +0.138 |
single | +0.137 |
unnecessary | +0.137 |
purchases | +0.135 |
toward | +0.129 |
Neg logit contributions | |
ily | -0.136 |
They | -0.126 |
ia | -0.125 |
Ben | -0.120 |
ena | -0.119 |
Token | ics |
Token ID | 873 |
Feature activation | +1.74e-02 |
Pos logit contributions | |
ily | +0.010 |
Ben | +0.010 |
Ben | +0.010 |
era | +0.010 |
ena | +0.009 |
Neg logit contributions | |
ge | -0.009 |
z | -0.008 |
ging | -0.008 |
unst | -0.007 |
down | -0.007 |
Token | . |
Token ID | 13 |
Feature activation | -5.73e-03 |
Pos logit contributions | |
ge | +0.032 |
unnecessary | +0.031 |
single | +0.031 |
purchases | +0.031 |
shopping | +0.029 |
Neg logit contributions | |
ily | -0.024 |
They | -0.022 |
ia | -0.021 |
Ben | -0.021 |
era | -0.020 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | -4.98e-03 |
Pos logit contributions | |
ily | +0.007 |
ia | +0.006 |
era | +0.006 |
ena | +0.006 |
Ben | +0.006 |
Neg logit contributions | |
single | -0.011 |
purchases | -0.011 |
shopping | -0.011 |
unnecessary | -0.011 |
ge | -0.011 |
Token | Once |
Token ID | 7454 |
Feature activation | +1.27e-02 |
Pos logit contributions | |
ily | +0.006 |
ia | +0.006 |
era | +0.006 |
ena | +0.006 |
They | +0.005 |
Neg logit contributions | |
single | -0.009 |
down | -0.009 |
shopping | -0.009 |
ge | -0.009 |
toward | -0.009 |
Token | Tri |
Token ID | 7563 |
Feature activation | +2.00e-02 |
Pos logit contributions | |
ia | +0.006 |
ily | +0.006 |
era | +0.006 |
They | +0.006 |
Ben | +0.006 |
Neg logit contributions | |
single | -0.007 |
use | -0.007 |
unnecessary | -0.007 |
aback | -0.007 |
shopping | -0.007 |
Token | angles |
Token ID | 27787 |
Feature activation | -2.29e-02 |
Pos logit contributions | |
ge | +0.026 |
use | +0.024 |
purchases | +0.024 |
single | +0.024 |
unnecessary | +0.024 |
Neg logit contributions | |
ily | -0.037 |
ia | -0.035 |
They | -0.035 |
Ben | -0.034 |
Ben | -0.034 |
Token | are |
Token ID | 389 |
Feature activation | -2.64e-02 |
Pos logit contributions | |
ily | +0.028 |
Ben | +0.025 |
ena | +0.025 |
era | +0.025 |
They | +0.025 |
Neg logit contributions | |
ge | -0.041 |
down | -0.040 |
single | -0.040 |
purchases | -0.040 |
use | -0.040 |
Token | wonderful |
Token ID | 7932 |
Feature activation | -2.67e-02 |
Pos logit contributions | |
ily | +0.042 |
They | +0.039 |
ia | +0.039 |
Ben | +0.037 |
ena | +0.037 |
Neg logit contributions | |
ge | -0.045 |
unnecessary | -0.043 |
single | -0.042 |
purchases | -0.042 |
use | -0.042 |
Token | , |
Token ID | 11 |
Feature activation | -3.09e-02 |
Pos logit contributions | |
ily | +0.032 |
They | +0.029 |
ia | +0.028 |
Ben | +0.027 |
ena | +0.027 |
Neg logit contributions | |
ge | -0.053 |
single | -0.052 |
unnecessary | -0.052 |
purchases | -0.051 |
shopping | -0.050 |
Token | aren |
Token ID | 3588 |
Feature activation | -7.89e-02 |
Pos logit contributions | |
ily | +0.043 |
ia | +0.040 |
They | +0.039 |
Ben | +0.038 |
era | +0.038 |
Neg logit contributions | |
single | -0.053 |
ge | -0.051 |
unnecessary | -0.051 |
purchases | -0.051 |
shopping | -0.050 |
Token | 't |
Token ID | 470 |
Feature activation | -2.88e-02 |
Pos logit contributions | |
ily | +0.108 |
ia | +0.106 |
era | +0.105 |
Ben | +0.104 |
ena | +0.103 |
Neg logit contributions | |
single | -0.126 |
ge | -0.126 |
shopping | -0.124 |
purchases | -0.121 |
picking | -0.121 |
Token | they |
Token ID | 484 |
Feature activation | -1.46e-02 |
Pos logit contributions | |
ily | +0.043 |
ia | +0.040 |
They | +0.040 |
Ben | +0.039 |
era | +0.039 |
Neg logit contributions | |
single | -0.048 |
unnecessary | -0.048 |
ge | -0.048 |
purchases | -0.046 |
shopping | -0.045 |
Token | ?" |
Token ID | 1701 |
Feature activation | -2.02e-02 |
Pos logit contributions | |
ily | +0.019 |
They | +0.017 |
ia | +0.017 |
Ben | +0.016 |
ena | +0.016 |
Neg logit contributions | |
ge | -0.028 |
single | -0.028 |
unnecessary | -0.027 |
purchases | -0.027 |
toward | -0.026 |
Token | Tom |
Token ID | 4186 |
Feature activation | -1.55e-02 |
Pos logit contributions | |
ily | +0.020 |
ia | +0.019 |
era | +0.017 |
They | +0.017 |
una | +0.017 |
Neg logit contributions | |
single | -0.041 |
shopping | -0.041 |
ge | -0.040 |
purchases | -0.040 |
unnecessary | -0.040 |
Token | and |
Token ID | 290 |
Feature activation | -2.35e-03 |
Pos logit contributions | |
ily | +0.020 |
They | +0.017 |
ia | +0.017 |
Ben | +0.017 |
era | +0.016 |
Neg logit contributions | |
purchases | -0.028 |
ge | -0.028 |
down | -0.028 |
weights | -0.027 |
heels | -0.027 |
Token | your |
Token ID | 534 |
Feature activation | -6.82e-04 |
Pos logit contributions | |
ily | +0.008 |
They | +0.007 |
Ben | +0.007 |
ia | +0.007 |
Ben | +0.007 |
Neg logit contributions | |
ge | -0.014 |
single | -0.014 |
unnecessary | -0.014 |
purchases | -0.014 |
picking | -0.014 |
Token | toys |
Token ID | 14958 |
Feature activation | -7.64e-04 |
Pos logit contributions | |
ily | +0.001 |
twins | +0.001 |
ia | +0.001 |
Ben | +0.001 |
They | +0.001 |
Neg logit contributions | |
ge | -0.001 |
single | -0.001 |
toward | -0.001 |
unnecessary | -0.001 |
ging | -0.001 |
Token | . |
Token ID | 13 |
Feature activation | -1.26e-03 |
Pos logit contributions | |
ily | +0.001 |
They | +0.001 |
ia | +0.001 |
Ben | +0.001 |
ena | +0.001 |
Neg logit contributions | |
ge | -0.002 |
single | -0.002 |
unnecessary | -0.002 |
purchases | -0.002 |
shopping | -0.001 |
Token | I |
Token ID | 314 |
Feature activation | -1.88e-02 |
Pos logit contributions | |
ia | +0.002 |
era | +0.002 |
ily | +0.002 |
ena | +0.001 |
Ben | +0.001 |
Neg logit contributions | |
use | -0.002 |
single | -0.002 |
unnecessary | -0.002 |
aback | -0.002 |
shopping | -0.002 |
Token | am |
Token ID | 716 |
Feature activation | -3.32e-02 |
Pos logit contributions | |
ily | +0.028 |
They | +0.026 |
ia | +0.025 |
Ben | +0.024 |
ena | +0.024 |
Neg logit contributions | |
ge | -0.032 |
unnecessary | -0.032 |
purchases | -0.031 |
single | -0.031 |
toward | -0.030 |
Token | proud |
Token ID | 6613 |
Feature activation | -7.46e-02 |
Pos logit contributions | |
ily | +0.047 |
They | +0.042 |
ia | +0.042 |
Ben | +0.040 |
ena | +0.040 |
Neg logit contributions | |
ge | -0.060 |
single | -0.059 |
unnecessary | -0.059 |
purchases | -0.058 |
toward | -0.056 |
Token | of |
Token ID | 286 |
Feature activation | -1.22e-02 |
Pos logit contributions | |
era | +0.094 |
ia | +0.091 |
ena | +0.090 |
ily | +0.090 |
Ben | +0.085 |
Neg logit contributions | |
single | -0.138 |
down | -0.129 |
toward | -0.126 |
shopping | -0.125 |
ge | -0.124 |
Token | you |
Token ID | 345 |
Feature activation | -7.98e-03 |
Pos logit contributions | |
ily | +0.010 |
era | +0.009 |
ia | +0.009 |
They | +0.009 |
ena | +0.009 |
Neg logit contributions | |
unnecessary | -0.027 |
ge | -0.027 |
single | -0.026 |
shopping | -0.026 |
purchases | -0.026 |
Token | ." |
Token ID | 526 |
Feature activation | -2.27e-02 |
Pos logit contributions | |
ily | +0.011 |
ia | +0.011 |
Ben | +0.011 |
era | +0.010 |
They | +0.010 |
Neg logit contributions | |
ge | -0.013 |
single | -0.013 |
unnecessary | -0.013 |
shopping | -0.013 |
purchases | -0.013 |
Token |
|
Token ID | 198 |
Feature activation | -8.10e-03 |
Pos logit contributions | |
era | +0.032 |
ena | +0.032 |
room | +0.031 |
ia | +0.031 |
iv | +0.031 |
Neg logit contributions | |
down | -0.036 |
single | -0.036 |
shopping | -0.035 |
toward | -0.034 |
use | -0.032 |
Token |
|
Token ID | 198 |
Feature activation | -7.34e-03 |
Pos logit contributions | |
era | +0.011 |
ena | +0.010 |
ily | +0.010 |
room | +0.010 |
iv | +0.010 |
Neg logit contributions | |
single | -0.013 |
ge | -0.013 |
shopping | -0.013 |
down | -0.012 |
ging | -0.012 |
Token | your |
Token ID | 534 |
Feature activation | -1.88e-03 |
Pos logit contributions | |
ily | +0.022 |
They | +0.020 |
ia | +0.020 |
Ben | +0.019 |
Ben | +0.019 |
Neg logit contributions | |
ge | -0.033 |
single | -0.032 |
unnecessary | -0.032 |
purchases | -0.032 |
toward | -0.031 |
Token | toys |
Token ID | 14958 |
Feature activation | +2.16e-03 |
Pos logit contributions | |
ily | +0.002 |
ia | +0.002 |
Ben | +0.002 |
twins | +0.002 |
They | +0.002 |
Neg logit contributions | |
ge | -0.004 |
single | -0.004 |
unnecessary | -0.003 |
toward | -0.003 |
ging | -0.003 |
Token | . |
Token ID | 13 |
Feature activation | -1.15e-03 |
Pos logit contributions | |
unnecessary | +0.005 |
ge | +0.005 |
single | +0.005 |
purchases | +0.005 |
ging | +0.004 |
Neg logit contributions | |
ily | -0.002 |
They | -0.002 |
ia | -0.002 |
Ben | -0.002 |
Ben | -0.002 |
Token | I |
Token ID | 314 |
Feature activation | -1.80e-02 |
Pos logit contributions | |
ia | +0.002 |
ily | +0.002 |
era | +0.002 |
una | +0.002 |
Ben | +0.002 |
Neg logit contributions | |
aback | -0.002 |
unnecessary | -0.002 |
use | -0.002 |
single | -0.002 |
shopping | -0.002 |
Token | am |
Token ID | 716 |
Feature activation | -3.49e-02 |
Pos logit contributions | |
ily | +0.025 |
They | +0.023 |
Ben | +0.021 |
ia | +0.021 |
ena | +0.021 |
Neg logit contributions | |
unnecessary | -0.034 |
ge | -0.033 |
purchases | -0.032 |
heels | -0.032 |
ging | -0.032 |
Token | proud |
Token ID | 6613 |
Feature activation | -7.39e-02 |
Pos logit contributions | |
ily | +0.054 |
They | +0.049 |
Ben | +0.048 |
ena | +0.047 |
ia | +0.047 |
Neg logit contributions | |
ge | -0.061 |
unnecessary | -0.058 |
single | -0.058 |
purchases | -0.057 |
use | -0.056 |
Token | of |
Token ID | 286 |
Feature activation | -1.23e-02 |
Pos logit contributions | |
ily | +0.086 |
ia | +0.084 |
era | +0.084 |
Ben | +0.081 |
ena | +0.080 |
Neg logit contributions | |
single | -0.142 |
shopping | -0.133 |
down | -0.133 |
ge | -0.132 |
toward | -0.131 |
Token | you |
Token ID | 345 |
Feature activation | -6.94e-03 |
Pos logit contributions | |
ily | +0.011 |
ia | +0.009 |
They | +0.009 |
era | +0.009 |
ena | +0.009 |
Neg logit contributions | |
ge | -0.027 |
unnecessary | -0.027 |
shopping | -0.026 |
single | -0.026 |
purchases | -0.026 |
Token | ." |
Token ID | 526 |
Feature activation | -2.41e-02 |
Pos logit contributions | |
ily | +0.011 |
ia | +0.010 |
Ben | +0.010 |
era | +0.010 |
They | +0.010 |
Neg logit contributions | |
single | -0.010 |
ge | -0.010 |
unnecessary | -0.010 |
shopping | -0.010 |
down | -0.010 |
Token |
|
Token ID | 198 |
Feature activation | -7.25e-03 |
Pos logit contributions | |
era | +0.032 |
ily | +0.031 |
room | +0.031 |
iv | +0.031 |
ena | +0.030 |
Neg logit contributions | |
down | -0.039 |
single | -0.039 |
shopping | -0.039 |
purchases | -0.037 |
allowance | -0.036 |
Token |
|
Token ID | 198 |
Feature activation | -6.58e-03 |
Pos logit contributions | |
era | +0.009 |
ily | +0.009 |
ena | +0.009 |
iv | +0.008 |
room | +0.008 |
Neg logit contributions | |
single | -0.012 |
ge | -0.012 |
down | -0.012 |
shopping | -0.011 |
purchases | -0.011 |
Token | They |
Token ID | 1119 |
Feature activation | -1.33e-02 |
Pos logit contributions | |
single | +0.002 |
Gor | +0.002 |
unnecessary | +0.002 |
use | +0.002 |
aback | +0.002 |
Neg logit contributions | |
ily | -0.002 |
ia | -0.002 |
era | -0.002 |
ena | -0.001 |
Ben | -0.001 |
Token | are |
Token ID | 389 |
Feature activation | -3.03e-02 |
Pos logit contributions | |
ily | +0.013 |
ia | +0.012 |
They | +0.011 |
ena | +0.011 |
Ben | +0.011 |
Neg logit contributions | |
ge | -0.029 |
single | -0.029 |
unnecessary | -0.028 |
purchases | -0.028 |
use | -0.027 |
Token | very |
Token ID | 845 |
Feature activation | +2.17e-02 |
Pos logit contributions | |
ily | +0.046 |
ia | +0.042 |
They | +0.042 |
Ben | +0.042 |
era | +0.041 |
Neg logit contributions | |
single | -0.049 |
unnecessary | -0.048 |
ge | -0.048 |
purchases | -0.048 |
shopping | -0.046 |
Token | nice |
Token ID | 3621 |
Feature activation | -3.26e-02 |
Pos logit contributions | |
single | +0.029 |
unnecessary | +0.028 |
purchases | +0.028 |
ge | +0.028 |
shopping | +0.027 |
Neg logit contributions | |
ily | -0.039 |
era | -0.036 |
They | -0.036 |
ena | -0.036 |
ia | -0.036 |
Token | , |
Token ID | 11 |
Feature activation | -2.10e-02 |
Pos logit contributions | |
ily | +0.038 |
They | +0.034 |
ia | +0.033 |
Ben | +0.032 |
ena | +0.031 |
Neg logit contributions | |
ge | -0.066 |
single | -0.065 |
unnecessary | -0.065 |
purchases | -0.064 |
shopping | -0.062 |
Token | aren |
Token ID | 3588 |
Feature activation | -7.16e-02 |
Pos logit contributions | |
ily | +0.031 |
ia | +0.028 |
era | +0.028 |
They | +0.027 |
Ben | +0.027 |
Neg logit contributions | |
single | -0.034 |
unnecessary | -0.033 |
purchases | -0.033 |
down | -0.032 |
ge | -0.032 |
Token | 't |
Token ID | 470 |
Feature activation | -2.37e-02 |
Pos logit contributions | |
ily | +0.091 |
era | +0.090 |
ena | +0.088 |
ia | +0.088 |
Ben | +0.086 |
Neg logit contributions | |
single | -0.124 |
ge | -0.121 |
picking | -0.120 |
shopping | -0.119 |
purchases | -0.118 |
Token | they |
Token ID | 484 |
Feature activation | -9.29e-03 |
Pos logit contributions | |
ily | +0.034 |
ia | +0.032 |
They | +0.031 |
era | +0.031 |
Ben | +0.031 |
Neg logit contributions | |
unnecessary | -0.041 |
single | -0.041 |
ge | -0.040 |
purchases | -0.039 |
shopping | -0.038 |
Token | ?" |
Token ID | 1701 |
Feature activation | -2.39e-02 |
Pos logit contributions | |
ily | +0.011 |
They | +0.010 |
ia | +0.010 |
Ben | +0.009 |
Ben | +0.009 |
Neg logit contributions | |
ge | -0.019 |
single | -0.018 |
unnecessary | -0.018 |
purchases | -0.018 |
toward | -0.018 |
Token |
|
Token ID | 198 |
Feature activation | -9.01e-03 |
Pos logit contributions | |
ily | +0.027 |
era | +0.023 |
ia | +0.023 |
ena | +0.023 |
room | +0.022 |
Neg logit contributions | |
single | -0.047 |
unnecessary | -0.046 |
ge | -0.045 |
purchases | -0.045 |
down | -0.045 |
Token |
|
Token ID | 198 |
Feature activation | -3.09e-03 |
Pos logit contributions | |
ily | +0.011 |
era | +0.011 |
ena | +0.010 |
iv | +0.010 |
room | +0.010 |
Neg logit contributions | |
single | -0.015 |
down | -0.015 |
unnecessary | -0.014 |
ge | -0.014 |
z | -0.014 |
Token | need |
Token ID | 761 |
Feature activation | -1.46e-03 |
Pos logit contributions | |
ily | +0.036 |
ia | +0.033 |
They | +0.033 |
Ben | +0.031 |
ena | +0.031 |
Neg logit contributions | |
ge | -0.042 |
single | -0.041 |
unnecessary | -0.041 |
purchases | -0.041 |
use | -0.039 |
Token | it |
Token ID | 340 |
Feature activation | +6.32e-03 |
Pos logit contributions | |
ily | +0.002 |
They | +0.002 |
Ben | +0.002 |
ia | +0.002 |
Ben | +0.001 |
Neg logit contributions | |
ge | -0.003 |
purchases | -0.003 |
unnecessary | -0.003 |
single | -0.003 |
ging | -0.003 |
Token | . |
Token ID | 13 |
Feature activation | -2.08e-03 |
Pos logit contributions | |
single | +0.015 |
ge | +0.014 |
purchases | +0.014 |
unnecessary | +0.014 |
picking | +0.014 |
Neg logit contributions | |
ily | -0.006 |
They | -0.005 |
ia | -0.005 |
Ben | -0.005 |
Ben | -0.004 |
Token | I |
Token ID | 314 |
Feature activation | -1.85e-02 |
Pos logit contributions | |
ia | +0.003 |
ily | +0.003 |
era | +0.003 |
They | +0.003 |
Ben | +0.002 |
Neg logit contributions | |
unnecessary | -0.003 |
single | -0.003 |
use | -0.003 |
shopping | -0.003 |
weights | -0.003 |
Token | am |
Token ID | 716 |
Feature activation | -2.65e-02 |
Pos logit contributions | |
ily | +0.027 |
They | +0.025 |
Ben | +0.024 |
ia | +0.023 |
Ben | +0.023 |
Neg logit contributions | |
ge | -0.032 |
unnecessary | -0.032 |
purchases | -0.032 |
single | -0.031 |
ging | -0.031 |
Token | proud |
Token ID | 6613 |
Feature activation | -7.03e-02 |
Pos logit contributions | |
ily | +0.034 |
Ben | +0.030 |
They | +0.030 |
ena | +0.029 |
ia | +0.027 |
Neg logit contributions | |
ge | -0.053 |
purchases | -0.051 |
unnecessary | -0.050 |
single | -0.050 |
use | -0.050 |
Token | of |
Token ID | 286 |
Feature activation | -1.07e-02 |
Pos logit contributions | |
ily | +0.077 |
ia | +0.073 |
era | +0.072 |
They | +0.070 |
ena | +0.069 |
Neg logit contributions | |
single | -0.142 |
ge | -0.136 |
unnecessary | -0.135 |
shopping | -0.133 |
toward | -0.133 |
Token | you |
Token ID | 345 |
Feature activation | -6.99e-03 |
Pos logit contributions | |
ily | +0.008 |
They | +0.008 |
ia | +0.007 |
era | +0.007 |
Ben | +0.007 |
Neg logit contributions | |
unnecessary | -0.024 |
ge | -0.024 |
single | -0.024 |
shopping | -0.024 |
purchases | -0.024 |
Token | , |
Token ID | 11 |
Feature activation | -3.58e-02 |
Pos logit contributions | |
ily | +0.009 |
ia | +0.008 |
They | +0.008 |
Ben | +0.008 |
era | +0.008 |
Neg logit contributions | |
ge | -0.013 |
single | -0.013 |
unnecessary | -0.013 |
purchases | -0.013 |
shopping | -0.012 |
Token | Ben |
Token ID | 3932 |
Feature activation | +2.67e-03 |
Pos logit contributions | |
ily | +0.033 |
They | +0.031 |
room | +0.030 |
era | +0.029 |
ickets | +0.028 |
Neg logit contributions | |
single | -0.073 |
unnecessary | -0.073 |
purchases | -0.073 |
heels | -0.072 |
ge | -0.072 |
Token | . |
Token ID | 13 |
Feature activation | -3.92e-03 |
Pos logit contributions | |
ge | +0.005 |
single | +0.005 |
unnecessary | +0.005 |
purchases | +0.005 |
use | +0.005 |
Neg logit contributions | |
ily | -0.003 |
ia | -0.003 |
They | -0.003 |
era | -0.003 |
Ben | -0.003 |
Token | They |
Token ID | 1119 |
Feature activation | -1.96e-02 |
Pos logit contributions | |
single | +0.015 |
down | +0.014 |
shopping | +0.014 |
proper | +0.014 |
purchases | +0.014 |
Neg logit contributions | |
era | -0.012 |
ily | -0.012 |
ia | -0.011 |
ickets | -0.011 |
ena | -0.011 |
Token | tell |
Token ID | 1560 |
Feature activation | -8.69e-03 |
Pos logit contributions | |
ily | +0.032 |
room | +0.031 |
ia | +0.030 |
era | +0.030 |
ena | +0.029 |
Neg logit contributions | |
purchases | -0.028 |
use | -0.027 |
saddle | -0.026 |
shopping | -0.026 |
heels | -0.026 |
Token | her |
Token ID | 607 |
Feature activation | +2.79e-02 |
Pos logit contributions | |
era | +0.015 |
Ben | +0.014 |
They | +0.014 |
ia | +0.014 |
room | +0.014 |
Neg logit contributions | |
purchases | -0.011 |
unnecessary | -0.011 |
shopping | -0.011 |
single | -0.011 |
down | -0.011 |
Token | they |
Token ID | 484 |
Feature activation | +2.40e-02 |
Pos logit contributions | |
purchases | +0.047 |
down | +0.046 |
single | +0.046 |
shopping | +0.045 |
unnecessary | +0.045 |
Neg logit contributions | |
Ben | -0.036 |
room | -0.035 |
era | -0.035 |
ily | -0.035 |
ickets | -0.033 |
Token | are |
Token ID | 389 |
Feature activation | -9.26e-03 |
Pos logit contributions | |
unnecessary | +0.038 |
use | +0.038 |
single | +0.037 |
purchases | +0.037 |
ge | +0.037 |
Neg logit contributions | |
ia | -0.035 |
ily | -0.034 |
era | -0.033 |
Ben | -0.032 |
ena | -0.032 |
Token | proud |
Token ID | 6613 |
Feature activation | -7.01e-02 |
Pos logit contributions | |
ily | +0.013 |
ia | +0.013 |
era | +0.013 |
Ben | +0.013 |
They | +0.012 |
Neg logit contributions | |
unnecessary | -0.015 |
shopping | -0.015 |
down | -0.015 |
purchases | -0.015 |
single | -0.015 |
Token | of |
Token ID | 286 |
Feature activation | -1.08e-02 |
Pos logit contributions | |
era | +0.109 |
room | +0.104 |
OM | +0.103 |
ena | +0.102 |
Ben | +0.101 |
Neg logit contributions | |
single | -0.106 |
down | -0.097 |
towards | -0.094 |
shopping | -0.093 |
toward | -0.092 |
Token | her |
Token ID | 607 |
Feature activation | -3.43e-03 |
Pos logit contributions | |
ily | +0.016 |
era | +0.015 |
They | +0.015 |
ia | +0.015 |
Ben | +0.015 |
Neg logit contributions | |
ge | -0.016 |
shopping | -0.016 |
unnecessary | -0.016 |
single | -0.016 |
purchases | -0.016 |
Token | . |
Token ID | 13 |
Feature activation | +5.68e-03 |
Pos logit contributions | |
ily | +0.004 |
They | +0.004 |
Ben | +0.003 |
ia | +0.003 |
era | +0.003 |
Neg logit contributions | |
ge | -0.007 |
single | -0.007 |
unnecessary | -0.007 |
purchases | -0.007 |
shopping | -0.007 |
Token | They |
Token ID | 1119 |
Feature activation | -2.25e-02 |
Pos logit contributions | |
single | +0.009 |
down | +0.009 |
shopping | +0.009 |
toward | +0.008 |
proper | +0.008 |
Neg logit contributions | |
era | -0.007 |
ily | -0.007 |
ia | -0.007 |
ickets | -0.007 |
room | -0.007 |
Token | tell |
Token ID | 1560 |
Feature activation | -9.69e-03 |
Pos logit contributions | |
room | +0.037 |
ily | +0.036 |
era | +0.035 |
ia | +0.035 |
ena | +0.035 |
Neg logit contributions | |
purchases | -0.030 |
use | -0.029 |
saddle | -0.028 |
shopping | -0.028 |
proper | -0.028 |
Token | make |
Token ID | 787 |
Feature activation | -2.77e-02 |
Pos logit contributions | |
ia | +0.012 |
ily | +0.012 |
era | +0.011 |
Ben | +0.011 |
ena | +0.011 |
Neg logit contributions | |
use | -0.015 |
unnecessary | -0.014 |
ge | -0.014 |
single | -0.014 |
purchases | -0.014 |
Token | peace |
Token ID | 4167 |
Feature activation | -1.92e-02 |
Pos logit contributions | |
ily | +0.034 |
ia | +0.031 |
They | +0.031 |
Ben | +0.029 |
era | +0.029 |
Neg logit contributions | |
ge | -0.054 |
unnecessary | -0.053 |
single | -0.053 |
purchases | -0.053 |
shopping | -0.051 |
Token | . |
Token ID | 13 |
Feature activation | -1.86e-03 |
Pos logit contributions | |
ily | +0.021 |
They | +0.019 |
ia | +0.019 |
Ben | +0.018 |
ena | +0.018 |
Neg logit contributions | |
ge | -0.040 |
unnecessary | -0.039 |
single | -0.039 |
purchases | -0.039 |
shopping | -0.038 |
Token | I |
Token ID | 314 |
Feature activation | -1.32e-02 |
Pos logit contributions | |
ia | +0.003 |
ily | +0.002 |
era | +0.002 |
They | +0.002 |
Ben | +0.002 |
Neg logit contributions | |
single | -0.003 |
use | -0.003 |
unnecessary | -0.003 |
aback | -0.003 |
shopping | -0.003 |
Token | am |
Token ID | 716 |
Feature activation | -2.83e-02 |
Pos logit contributions | |
ily | +0.020 |
They | +0.018 |
ia | +0.018 |
Ben | +0.018 |
ena | +0.017 |
Neg logit contributions | |
ge | -0.022 |
unnecessary | -0.022 |
single | -0.022 |
purchases | -0.022 |
shopping | -0.021 |
Token | proud |
Token ID | 6613 |
Feature activation | -7.00e-02 |
Pos logit contributions | |
ily | +0.042 |
They | +0.039 |
Ben | +0.037 |
ia | +0.037 |
ena | +0.037 |
Neg logit contributions | |
ge | -0.050 |
single | -0.048 |
purchases | -0.048 |
unnecessary | -0.047 |
use | -0.046 |
Token | of |
Token ID | 286 |
Feature activation | -6.41e-03 |
Pos logit contributions | |
era | +0.081 |
ia | +0.080 |
ily | +0.080 |
ena | +0.078 |
Ben | +0.077 |
Neg logit contributions | |
single | -0.133 |
down | -0.124 |
shopping | -0.124 |
toward | -0.122 |
unnecessary | -0.121 |
Token | you |
Token ID | 345 |
Feature activation | -6.10e-03 |
Pos logit contributions | |
ily | +0.005 |
ia | +0.005 |
They | +0.005 |
era | +0.004 |
ena | +0.004 |
Neg logit contributions | |
unnecessary | -0.015 |
single | -0.014 |
ge | -0.014 |
shopping | -0.014 |
purchases | -0.014 |
Token | ." |
Token ID | 526 |
Feature activation | -2.18e-02 |
Pos logit contributions | |
ily | +0.008 |
ia | +0.008 |
They | +0.008 |
Ben | +0.007 |
era | +0.007 |
Neg logit contributions | |
ge | -0.011 |
single | -0.011 |
unnecessary | -0.011 |
purchases | -0.010 |
shopping | -0.010 |
Token |
|
Token ID | 198 |
Feature activation | -1.74e-03 |
Pos logit contributions | |
era | +0.028 |
ena | +0.028 |
ily | +0.027 |
ia | +0.027 |
iv | +0.027 |
Neg logit contributions | |
single | -0.037 |
down | -0.036 |
shopping | -0.036 |
purchases | -0.034 |
toward | -0.034 |
Token |
|
Token ID | 198 |
Feature activation | -1.38e-03 |
Pos logit contributions | |
era | +0.002 |
ily | +0.002 |
ena | +0.002 |
ia | +0.002 |
iv | +0.002 |
Neg logit contributions | |
single | -0.003 |
ge | -0.003 |
purchases | -0.003 |
down | -0.003 |
shopping | -0.003 |
Token | the |
Token ID | 262 |
Feature activation | -6.08e-03 |
Pos logit contributions | |
ily | +0.021 |
ia | +0.021 |
era | +0.020 |
They | +0.020 |
Ben | +0.019 |
Neg logit contributions | |
unnecessary | -0.030 |
single | -0.030 |
ge | -0.030 |
shopping | -0.030 |
purchases | -0.029 |
Token | picture |
Token ID | 4286 |
Feature activation | -3.63e-02 |
Pos logit contributions | |
ily | +0.011 |
ia | +0.010 |
They | +0.010 |
era | +0.009 |
ena | +0.009 |
Neg logit contributions | |
single | -0.009 |
purchases | -0.009 |
shopping | -0.009 |
unnecessary | -0.008 |
weights | -0.008 |
Token | . |
Token ID | 13 |
Feature activation | +1.39e-02 |
Pos logit contributions | |
ily | +0.053 |
Ben | +0.051 |
era | +0.050 |
They | +0.049 |
ia | +0.048 |
Neg logit contributions | |
single | -0.059 |
down | -0.057 |
ge | -0.056 |
shopping | -0.054 |
toward | -0.054 |
Token | They |
Token ID | 1119 |
Feature activation | -1.58e-02 |
Pos logit contributions | |
single | +0.024 |
down | +0.023 |
shopping | +0.023 |
weights | +0.022 |
toward | +0.022 |
Neg logit contributions | |
ena | -0.017 |
ia | -0.017 |
era | -0.017 |
una | -0.017 |
ily | -0.017 |
Token | are |
Token ID | 389 |
Feature activation | -2.97e-02 |
Pos logit contributions | |
ily | +0.028 |
ia | +0.026 |
una | +0.025 |
They | +0.024 |
era | +0.024 |
Neg logit contributions | |
down | -0.022 |
ge | -0.022 |
use | -0.021 |
toward | -0.021 |
heels | -0.021 |
Token | proud |
Token ID | 6613 |
Feature activation | -6.99e-02 |
Pos logit contributions | |
ily | +0.050 |
ia | +0.047 |
Ben | +0.047 |
They | +0.046 |
era | +0.046 |
Neg logit contributions | |
single | -0.041 |
ge | -0.041 |
shopping | -0.041 |
purchases | -0.041 |
unnecessary | -0.040 |
Token | . |
Token ID | 13 |
Feature activation | +7.63e-03 |
Pos logit contributions | |
era | +0.101 |
ily | +0.098 |
room | +0.095 |
ia | +0.094 |
ena | +0.094 |
Neg logit contributions | |
single | -0.109 |
down | -0.102 |
towards | -0.101 |
toward | -0.101 |
ge | -0.099 |
Token | They |
Token ID | 1119 |
Feature activation | -1.83e-02 |
Pos logit contributions | |
single | +0.013 |
down | +0.013 |
shopping | +0.012 |
toward | +0.012 |
purchases | +0.012 |
Neg logit contributions | |
ena | -0.010 |
ia | -0.010 |
era | -0.010 |
una | -0.009 |
ily | -0.009 |
Token | are |
Token ID | 389 |
Feature activation | -3.54e-02 |
Pos logit contributions | |
ily | +0.031 |
ia | +0.030 |
era | +0.029 |
una | +0.029 |
ena | +0.028 |
Neg logit contributions | |
use | -0.025 |
down | -0.024 |
shopping | -0.024 |
purchases | -0.023 |
heels | -0.023 |
Token | also |
Token ID | 635 |
Feature activation | +3.98e-03 |
Pos logit contributions | |
ily | +0.055 |
Ben | +0.054 |
ia | +0.054 |
era | +0.052 |
They | +0.052 |
Neg logit contributions | |
shopping | -0.052 |
down | -0.051 |
single | -0.051 |
purchases | -0.051 |
unnecessary | -0.050 |
Token | sorry |
Token ID | 7926 |
Feature activation | -3.02e-02 |
Pos logit contributions | |
ge | +0.007 |
unnecessary | +0.006 |
purchases | +0.006 |
single | +0.006 |
use | +0.006 |
Neg logit contributions | |
ily | -0.007 |
They | -0.006 |
ena | -0.006 |
Ben | -0.006 |
ia | -0.006 |
Token | , |
Token ID | 11 |
Feature activation | -3.02e-02 |
Pos logit contributions | |
ily | +0.013 |
They | +0.011 |
ia | +0.011 |
Ben | +0.010 |
ena | +0.010 |
Neg logit contributions | |
ge | -0.027 |
single | -0.027 |
unnecessary | -0.026 |
purchases | -0.026 |
toward | -0.025 |
Token | Mia |
Token ID | 32189 |
Feature activation | +4.61e-03 |
Pos logit contributions | |
ily | +0.039 |
They | +0.036 |
ia | +0.035 |
Ben | +0.035 |
era | +0.035 |
Neg logit contributions | |
unnecessary | -0.054 |
ge | -0.053 |
purchases | -0.052 |
single | -0.052 |
shopping | -0.051 |
Token | . |
Token ID | 13 |
Feature activation | +8.69e-04 |
Pos logit contributions | |
ge | +0.010 |
unnecessary | +0.010 |
single | +0.010 |
purchases | +0.010 |
toward | +0.010 |
Neg logit contributions | |
ily | -0.004 |
They | -0.004 |
ia | -0.004 |
Ben | -0.003 |
ena | -0.003 |
Token | I |
Token ID | 314 |
Feature activation | -2.16e-02 |
Pos logit contributions | |
unnecessary | +0.001 |
single | +0.001 |
purchases | +0.001 |
use | +0.001 |
toward | +0.001 |
Neg logit contributions | |
ily | -0.001 |
ia | -0.001 |
They | -0.001 |
era | -0.001 |
ena | -0.001 |
Token | am |
Token ID | 716 |
Feature activation | -3.45e-02 |
Pos logit contributions | |
ily | +0.030 |
They | +0.028 |
Ben | +0.027 |
ia | +0.026 |
ena | +0.025 |
Neg logit contributions | |
unnecessary | -0.040 |
ge | -0.040 |
heels | -0.039 |
ging | -0.039 |
ening | -0.039 |
Token | proud |
Token ID | 6613 |
Feature activation | -6.98e-02 |
Pos logit contributions | |
ily | +0.054 |
They | +0.050 |
ia | +0.048 |
Ben | +0.048 |
ena | +0.048 |
Neg logit contributions | |
ge | -0.057 |
single | -0.056 |
unnecessary | -0.055 |
purchases | -0.055 |
use | -0.053 |
Token | of |
Token ID | 286 |
Feature activation | -8.48e-03 |
Pos logit contributions | |
era | +0.081 |
ily | +0.081 |
ena | +0.077 |
ia | +0.077 |
Ben | +0.076 |
Neg logit contributions | |
single | -0.133 |
toward | -0.125 |
shopping | -0.124 |
ge | -0.124 |
down | -0.124 |
Token | you |
Token ID | 345 |
Feature activation | -6.06e-03 |
Pos logit contributions | |
ily | +0.007 |
They | +0.007 |
era | +0.007 |
ia | +0.006 |
ena | +0.006 |
Neg logit contributions | |
unnecessary | -0.018 |
ge | -0.018 |
shopping | -0.018 |
single | -0.018 |
purchases | -0.017 |
Token | ." |
Token ID | 526 |
Feature activation | -1.89e-02 |
Pos logit contributions | |
ily | +0.009 |
ia | +0.008 |
Ben | +0.008 |
They | +0.008 |
era | +0.008 |
Neg logit contributions | |
ge | -0.010 |
unnecessary | -0.010 |
single | -0.010 |
purchases | -0.009 |
shopping | -0.009 |
Token |
|
Token ID | 198 |
Feature activation | +4.68e-04 |
Pos logit contributions | |
ily | +0.022 |
ena | +0.022 |
era | +0.022 |
iv | +0.021 |
room | +0.021 |
Neg logit contributions | |
single | -0.033 |
down | -0.033 |
toward | -0.032 |
shopping | -0.032 |
aback | -0.032 |
Token |
|
Token ID | 198 |
Feature activation | +6.09e-04 |
Pos logit contributions | |
single | +0.001 |
ge | +0.001 |
down | +0.001 |
toward | +0.001 |
purchases | +0.001 |
Neg logit contributions | |
era | -0.001 |
ena | -0.001 |
ily | -0.001 |
der | -0.001 |
iv | -0.001 |
Token | . |
Token ID | 13 |
Feature activation | +3.64e-03 |
Pos logit contributions | |
ily | +0.009 |
They | +0.009 |
ia | +0.008 |
Ben | +0.008 |
ena | +0.008 |
Neg logit contributions | |
unnecessary | -0.023 |
single | -0.023 |
ge | -0.023 |
purchases | -0.023 |
use | -0.022 |
Token | They |
Token ID | 1119 |
Feature activation | -2.37e-02 |
Pos logit contributions | |
single | +0.007 |
shopping | +0.007 |
purchases | +0.007 |
weights | +0.007 |
down | +0.006 |
Neg logit contributions | |
era | -0.004 |
ia | -0.004 |
ily | -0.004 |
ena | -0.004 |
una | -0.004 |
Token | said |
Token ID | 531 |
Feature activation | +1.61e-02 |
Pos logit contributions | |
ily | +0.035 |
ia | +0.034 |
era | +0.032 |
They | +0.031 |
una | +0.031 |
Neg logit contributions | |
ge | -0.037 |
purchases | -0.037 |
single | -0.035 |
saddle | -0.035 |
use | -0.035 |
Token | they |
Token ID | 484 |
Feature activation | +2.95e-02 |
Pos logit contributions | |
ge | +0.035 |
single | +0.034 |
unnecessary | +0.034 |
purchases | +0.034 |
shopping | +0.033 |
Neg logit contributions | |
ily | -0.017 |
They | -0.015 |
ia | -0.014 |
Ben | -0.014 |
ena | -0.013 |
Token | were |
Token ID | 547 |
Feature activation | -2.20e-03 |
Pos logit contributions | |
ge | +0.056 |
single | +0.055 |
purchases | +0.055 |
use | +0.053 |
unnecessary | +0.053 |
Neg logit contributions | |
ily | -0.035 |
ia | -0.034 |
era | -0.031 |
Ben | -0.031 |
una | -0.031 |
Token | proud |
Token ID | 6613 |
Feature activation | -6.97e-02 |
Pos logit contributions | |
ily | +0.004 |
They | +0.004 |
ia | +0.004 |
Ben | +0.003 |
ena | +0.003 |
Neg logit contributions | |
ge | -0.003 |
single | -0.003 |
unnecessary | -0.003 |
purchases | -0.003 |
use | -0.003 |
Token | of |
Token ID | 286 |
Feature activation | -1.30e-02 |
Pos logit contributions | |
ily | +0.094 |
era | +0.093 |
Ben | +0.089 |
room | +0.089 |
ena | +0.088 |
Neg logit contributions | |
single | -0.118 |
toward | -0.108 |
down | -0.108 |
towards | -0.107 |
shopping | -0.106 |
Token | them |
Token ID | 606 |
Feature activation | -1.09e-02 |
Pos logit contributions | |
ily | +0.020 |
era | +0.019 |
ickets | +0.019 |
ia | +0.019 |
ena | +0.019 |
Neg logit contributions | |
shopping | -0.018 |
ge | -0.018 |
purchases | -0.018 |
single | -0.018 |
unnecessary | -0.018 |
Token | . |
Token ID | 13 |
Feature activation | -2.08e-03 |
Pos logit contributions | |
ily | +0.013 |
Ben | +0.012 |
ia | +0.012 |
They | +0.012 |
era | +0.011 |
Neg logit contributions | |
ge | -0.021 |
single | -0.020 |
purchases | -0.020 |
shopping | -0.020 |
unnecessary | -0.020 |
Token |
|
Token ID | 198 |
Feature activation | -7.65e-04 |
Pos logit contributions | |
era | +0.003 |
ena | +0.003 |
ia | +0.003 |
una | +0.002 |
ily | +0.002 |
Neg logit contributions | |
single | -0.003 |
shopping | -0.003 |
down | -0.003 |
toward | -0.003 |
purchases | -0.003 |
Token |
|
Token ID | 198 |
Feature activation | +7.30e-04 |
Pos logit contributions | |
ily | +0.001 |
era | +0.001 |
ena | +0.001 |
ia | +0.001 |
iv | +0.001 |
Neg logit contributions | |
single | -0.001 |
ge | -0.001 |
purchases | -0.001 |
shopping | -0.001 |
toward | -0.001 |
Token | and |
Token ID | 290 |
Feature activation | +4.15e-02 |
Pos logit contributions | |
unnecessary | +0.007 |
ge | +0.007 |
purchases | +0.007 |
single | +0.007 |
use | +0.007 |
Neg logit contributions | |
ily | -0.003 |
ia | -0.003 |
ena | -0.003 |
They | -0.003 |
Ben | -0.003 |
Token | strong |
Token ID | 1913 |
Feature activation | +7.60e-03 |
Pos logit contributions | |
ge | +0.071 |
purchases | +0.068 |
ging | +0.066 |
unnecessary | +0.064 |
reins | +0.064 |
Neg logit contributions | |
ily | -0.068 |
They | -0.066 |
Ben | -0.062 |
not | -0.061 |
ia | -0.060 |
Token | . |
Token ID | 13 |
Feature activation | +1.55e-03 |
Pos logit contributions | |
purchases | +0.017 |
ge | +0.017 |
unnecessary | +0.017 |
single | +0.017 |
use | +0.016 |
Neg logit contributions | |
ily | -0.007 |
They | -0.006 |
ia | -0.006 |
Ben | -0.006 |
ena | -0.006 |
Token | I |
Token ID | 314 |
Feature activation | -1.85e-02 |
Pos logit contributions | |
single | +0.002 |
unnecessary | +0.002 |
aback | +0.002 |
use | +0.002 |
down | +0.002 |
Neg logit contributions | |
ia | -0.002 |
ily | -0.002 |
era | -0.002 |
una | -0.002 |
Ben | -0.002 |
Token | 'm |
Token ID | 1101 |
Feature activation | -2.95e-02 |
Pos logit contributions | |
ily | +0.027 |
They | +0.025 |
ia | +0.025 |
Ben | +0.024 |
ena | +0.024 |
Neg logit contributions | |
ge | -0.032 |
unnecessary | -0.031 |
single | -0.031 |
purchases | -0.031 |
shopping | -0.030 |
Token | proud |
Token ID | 6613 |
Feature activation | -6.97e-02 |
Pos logit contributions | |
ily | +0.042 |
ia | +0.039 |
They | +0.039 |
Ben | +0.038 |
era | +0.037 |
Neg logit contributions | |
ge | -0.051 |
single | -0.051 |
unnecessary | -0.050 |
purchases | -0.050 |
shopping | -0.049 |
Token | of |
Token ID | 286 |
Feature activation | -8.76e-03 |
Pos logit contributions | |
ily | +0.086 |
era | +0.085 |
ia | +0.084 |
ena | +0.083 |
Ben | +0.083 |
Neg logit contributions | |
single | -0.128 |
down | -0.121 |
toward | -0.120 |
shopping | -0.118 |
ge | -0.116 |
Token | you |
Token ID | 345 |
Feature activation | -7.21e-03 |
Pos logit contributions | |
ily | +0.007 |
ia | +0.007 |
They | +0.007 |
era | +0.006 |
ena | +0.006 |
Neg logit contributions | |
ge | -0.019 |
unnecessary | -0.019 |
single | -0.019 |
shopping | -0.019 |
purchases | -0.019 |
Token | for |
Token ID | 329 |
Feature activation | -4.97e-03 |
Pos logit contributions | |
ily | +0.011 |
Ben | +0.011 |
ia | +0.010 |
They | +0.010 |
era | +0.010 |
Neg logit contributions | |
single | -0.011 |
down | -0.011 |
ge | -0.011 |
shopping | -0.011 |
unnecessary | -0.011 |
Token | trying |
Token ID | 2111 |
Feature activation | +1.59e-02 |
Pos logit contributions | |
ily | +0.008 |
They | +0.007 |
ia | +0.007 |
Ben | +0.007 |
ena | +0.007 |
Neg logit contributions | |
ge | -0.008 |
single | -0.008 |
unnecessary | -0.008 |
purchases | -0.008 |
toward | -0.008 |
Token | something |
Token ID | 1223 |
Feature activation | +1.53e-02 |
Pos logit contributions | |
ge | +0.028 |
purchases | +0.028 |
single | +0.027 |
use | +0.027 |
picking | +0.027 |
Neg logit contributions | |
They | -0.022 |
ily | -0.022 |
Ben | -0.020 |
ia | -0.020 |
Ben | -0.019 |
Token | They |
Token ID | 1119 |
Feature activation | -2.28e-02 |
Pos logit contributions | |
single | +0.010 |
shopping | +0.009 |
down | +0.009 |
toward | +0.009 |
purchases | +0.009 |
Neg logit contributions | |
ily | -0.008 |
ia | -0.007 |
era | -0.007 |
una | -0.007 |
ickets | -0.007 |
Token | tell |
Token ID | 1560 |
Feature activation | -1.25e-02 |
Pos logit contributions | |
ily | +0.034 |
ia | +0.032 |
room | +0.031 |
era | +0.031 |
They | +0.030 |
Neg logit contributions | |
purchases | -0.035 |
use | -0.035 |
shopping | -0.033 |
allowance | -0.032 |
weights | -0.032 |
Token | them |
Token ID | 606 |
Feature activation | +2.20e-02 |
Pos logit contributions | |
ily | +0.020 |
Ben | +0.020 |
They | +0.020 |
era | +0.020 |
ia | +0.020 |
Neg logit contributions | |
purchases | -0.017 |
shopping | -0.016 |
single | -0.016 |
unnecessary | -0.016 |
ge | -0.016 |
Token | they |
Token ID | 484 |
Feature activation | +1.95e-02 |
Pos logit contributions | |
purchases | +0.038 |
shopping | +0.037 |
down | +0.037 |
ge | +0.037 |
unnecessary | +0.037 |
Neg logit contributions | |
ily | -0.029 |
Ben | -0.028 |
ia | -0.027 |
ena | -0.026 |
era | -0.026 |
Token | are |
Token ID | 389 |
Feature activation | -1.08e-02 |
Pos logit contributions | |
use | +0.034 |
ge | +0.034 |
single | +0.034 |
purchases | +0.033 |
unnecessary | +0.033 |
Neg logit contributions | |
ia | -0.025 |
ily | -0.024 |
They | -0.023 |
Ben | -0.023 |
era | -0.023 |
Token | proud |
Token ID | 6613 |
Feature activation | -6.95e-02 |
Pos logit contributions | |
ily | +0.018 |
ia | +0.016 |
They | +0.016 |
Ben | +0.016 |
era | +0.016 |
Neg logit contributions | |
ge | -0.016 |
single | -0.016 |
unnecessary | -0.016 |
purchases | -0.016 |
shopping | -0.015 |
Token | of |
Token ID | 286 |
Feature activation | -1.08e-02 |
Pos logit contributions | |
Ben | +0.096 |
era | +0.096 |
ily | +0.094 |
ena | +0.092 |
room | +0.091 |
Neg logit contributions | |
single | -0.113 |
down | -0.104 |
shopping | -0.104 |
toward | -0.102 |
towards | -0.101 |
Token | them |
Token ID | 606 |
Feature activation | -1.17e-02 |
Pos logit contributions | |
ily | +0.016 |
Ben | +0.015 |
era | +0.015 |
ia | +0.015 |
They | +0.015 |
Neg logit contributions | |
ge | -0.016 |
single | -0.016 |
shopping | -0.016 |
purchases | -0.016 |
unnecessary | -0.016 |
Token | . |
Token ID | 13 |
Feature activation | +2.26e-03 |
Pos logit contributions | |
ily | +0.014 |
Ben | +0.013 |
ia | +0.013 |
They | +0.013 |
era | +0.012 |
Neg logit contributions | |
ge | -0.022 |
single | -0.022 |
shopping | -0.022 |
purchases | -0.022 |
unnecessary | -0.021 |
Token |
|
Token ID | 198 |
Feature activation | -5.35e-03 |
Pos logit contributions | |
single | +0.004 |
shopping | +0.004 |
down | +0.003 |
toward | +0.003 |
purchases | +0.003 |
Neg logit contributions | |
ily | -0.003 |
ia | -0.003 |
era | -0.003 |
una | -0.003 |
room | -0.003 |
Token |
|
Token ID | 198 |
Feature activation | -5.04e-03 |
Pos logit contributions | |
era | +0.006 |
ily | +0.006 |
ena | +0.006 |
ia | +0.006 |
iv | +0.006 |
Neg logit contributions | |
single | -0.010 |
shopping | -0.009 |
ge | -0.009 |
purchases | -0.009 |
toward | -0.009 |
Token | ons |
Token ID | 684 |
Feature activation | -6.00e-03 |
Pos logit contributions | |
ily | +0.024 |
They | +0.022 |
ia | +0.022 |
Ben | +0.021 |
era | +0.021 |
Neg logit contributions | |
single | -0.035 |
ge | -0.035 |
unnecessary | -0.035 |
purchases | -0.034 |
ging | -0.033 |
Token | . |
Token ID | 13 |
Feature activation | +5.10e-04 |
Pos logit contributions | |
ily | +0.006 |
They | +0.005 |
ia | +0.005 |
Ben | +0.005 |
ena | +0.005 |
Neg logit contributions | |
ge | -0.013 |
single | -0.013 |
unnecessary | -0.013 |
purchases | -0.013 |
shopping | -0.012 |
Token | They |
Token ID | 1119 |
Feature activation | -2.31e-02 |
Pos logit contributions | |
single | +0.001 |
down | +0.001 |
toward | +0.001 |
weights | +0.001 |
purchases | +0.001 |
Neg logit contributions | |
iv | -0.001 |
era | -0.001 |
ily | -0.001 |
ena | -0.001 |
una | -0.001 |
Token | are |
Token ID | 389 |
Feature activation | -3.24e-02 |
Pos logit contributions | |
ily | +0.035 |
ia | +0.032 |
They | +0.031 |
una | +0.030 |
Ben | +0.030 |
Neg logit contributions | |
single | -0.037 |
ge | -0.037 |
purchases | -0.036 |
use | -0.035 |
down | -0.035 |
Token | very |
Token ID | 845 |
Feature activation | +1.27e-02 |
Pos logit contributions | |
ily | +0.055 |
They | +0.052 |
ia | +0.051 |
ena | +0.049 |
Ben | +0.049 |
Neg logit contributions | |
ge | -0.049 |
unnecessary | -0.048 |
single | -0.047 |
purchases | -0.047 |
use | -0.046 |
Token | proud |
Token ID | 6613 |
Feature activation | -6.93e-02 |
Pos logit contributions | |
ge | +0.018 |
unnecessary | +0.018 |
single | +0.018 |
purchases | +0.018 |
toward | +0.017 |
Neg logit contributions | |
ily | -0.022 |
They | -0.021 |
ia | -0.021 |
Ben | -0.020 |
ena | -0.020 |
Token | of |
Token ID | 286 |
Feature activation | -1.41e-02 |
Pos logit contributions | |
ily | +0.099 |
era | +0.096 |
Ben | +0.095 |
ena | +0.092 |
ia | +0.092 |
Neg logit contributions | |
single | -0.109 |
down | -0.104 |
toward | -0.102 |
towards | -0.101 |
ge | -0.101 |
Token | their |
Token ID | 511 |
Feature activation | -2.90e-03 |
Pos logit contributions | |
ickets | +0.020 |
ily | +0.020 |
aud | +0.020 |
era | +0.020 |
Ben | +0.020 |
Neg logit contributions | |
ge | -0.020 |
single | -0.020 |
down | -0.020 |
purchases | -0.020 |
toward | -0.019 |
Token | symbols |
Token ID | 14354 |
Feature activation | -1.55e-02 |
Pos logit contributions | |
ily | +0.005 |
They | +0.004 |
ia | +0.004 |
Ben | +0.004 |
era | +0.004 |
Neg logit contributions | |
single | -0.005 |
ge | -0.005 |
purchases | -0.005 |
unnecessary | -0.005 |
shopping | -0.004 |
Token | . |
Token ID | 13 |
Feature activation | -3.39e-03 |
Pos logit contributions | |
ily | +0.019 |
Ben | +0.017 |
They | +0.017 |
ia | +0.016 |
era | +0.016 |
Neg logit contributions | |
ge | -0.030 |
single | -0.030 |
purchases | -0.029 |
down | -0.028 |
unnecessary | -0.028 |
Token | They |
Token ID | 1119 |
Feature activation | -2.50e-02 |
Pos logit contributions | |
iv | +0.005 |
era | +0.005 |
ily | +0.005 |
ena | +0.004 |
una | +0.004 |
Neg logit contributions | |
single | -0.005 |
down | -0.004 |
toward | -0.004 |
towards | -0.004 |
sideways | -0.004 |
Token | , |
Token ID | 11 |
Feature activation | -2.11e-02 |
Pos logit contributions | |
ily | +0.012 |
They | +0.011 |
ia | +0.010 |
Ben | +0.010 |
ena | +0.010 |
Neg logit contributions | |
ge | -0.015 |
unnecessary | -0.015 |
single | -0.015 |
purchases | -0.015 |
toward | -0.014 |
Token | Anna |
Token ID | 11735 |
Feature activation | +3.12e-03 |
Pos logit contributions | |
ily | +0.018 |
They | +0.016 |
ia | +0.015 |
era | +0.014 |
room | +0.014 |
Neg logit contributions | |
unnecessary | -0.047 |
single | -0.047 |
purchases | -0.046 |
shopping | -0.046 |
ge | -0.046 |
Token | . |
Token ID | 13 |
Feature activation | -1.42e-03 |
Pos logit contributions | |
ge | +0.006 |
single | +0.006 |
unnecessary | +0.006 |
purchases | +0.006 |
shopping | +0.006 |
Neg logit contributions | |
ily | -0.003 |
ia | -0.003 |
They | -0.003 |
Ben | -0.003 |
era | -0.003 |
Token | I |
Token ID | 314 |
Feature activation | -1.86e-02 |
Pos logit contributions | |
ia | +0.002 |
ily | +0.002 |
They | +0.002 |
era | +0.002 |
Ben | +0.002 |
Neg logit contributions | |
single | -0.002 |
use | -0.002 |
shopping | -0.002 |
toward | -0.002 |
purchases | -0.002 |
Token | 'm |
Token ID | 1101 |
Feature activation | -2.96e-02 |
Pos logit contributions | |
ily | +0.030 |
They | +0.028 |
ia | +0.028 |
Ben | +0.027 |
ena | +0.026 |
Neg logit contributions | |
ge | -0.029 |
unnecessary | -0.029 |
single | -0.029 |
purchases | -0.029 |
shopping | -0.027 |
Token | proud |
Token ID | 6613 |
Feature activation | -6.92e-02 |
Pos logit contributions | |
ily | +0.045 |
They | +0.041 |
ia | +0.041 |
Ben | +0.040 |
era | +0.039 |
Neg logit contributions | |
ge | -0.049 |
single | -0.048 |
unnecessary | -0.048 |
purchases | -0.048 |
shopping | -0.046 |
Token | of |
Token ID | 286 |
Feature activation | -9.16e-03 |
Pos logit contributions | |
ily | +0.080 |
era | +0.080 |
ena | +0.078 |
ia | +0.078 |
Ben | +0.076 |
Neg logit contributions | |
single | -0.130 |
down | -0.122 |
shopping | -0.121 |
toward | -0.120 |
unnecessary | -0.115 |
Token | you |
Token ID | 345 |
Feature activation | -5.10e-03 |
Pos logit contributions | |
ily | +0.007 |
They | +0.007 |
ia | +0.006 |
era | +0.006 |
Ben | +0.006 |
Neg logit contributions | |
single | -0.021 |
ge | -0.021 |
unnecessary | -0.021 |
shopping | -0.020 |
purchases | -0.020 |
Token | . |
Token ID | 13 |
Feature activation | -2.97e-03 |
Pos logit contributions | |
ily | +0.007 |
ia | +0.006 |
They | +0.006 |
Ben | +0.006 |
era | +0.006 |
Neg logit contributions | |
ge | -0.009 |
single | -0.009 |
unnecessary | -0.009 |
purchases | -0.009 |
shopping | -0.009 |
Token | Do |
Token ID | 2141 |
Feature activation | -4.18e-02 |
Pos logit contributions | |
ily | +0.005 |
ia | +0.005 |
They | +0.005 |
era | +0.005 |
Ben | +0.005 |
Neg logit contributions | |
single | -0.004 |
use | -0.004 |
shopping | -0.004 |
purchases | -0.004 |
unnecessary | -0.004 |
Token | you |
Token ID | 345 |
Feature activation | -2.43e-02 |
Pos logit contributions | |
room | +0.045 |
ily | +0.045 |
ena | +0.042 |
era | +0.041 |
oon | +0.041 |
Neg logit contributions | |
unnecessary | -0.077 |
shopping | -0.076 |
use | -0.075 |
picking | -0.073 |
anywhere | -0.072 |
Token | Ben |
Token ID | 3932 |
Feature activation | +3.54e-03 |
Pos logit contributions | |
ily | +0.004 |
They | +0.004 |
ia | +0.003 |
era | +0.003 |
ena | +0.003 |
Neg logit contributions | |
shopping | -0.026 |
purchases | -0.026 |
unnecessary | -0.026 |
heels | -0.026 |
ge | -0.026 |
Token | hug |
Token ID | 16225 |
Feature activation | -2.35e-02 |
Pos logit contributions | |
use | +0.006 |
ge | +0.006 |
down | +0.006 |
purchases | +0.006 |
shopping | +0.006 |
Neg logit contributions | |
ily | -0.005 |
ia | -0.005 |
They | -0.004 |
ena | -0.004 |
una | -0.004 |
Token | . |
Token ID | 13 |
Feature activation | +3.58e-03 |
Pos logit contributions | |
ily | +0.029 |
ia | +0.026 |
ena | +0.025 |
They | +0.024 |
era | +0.024 |
Neg logit contributions | |
ge | -0.044 |
unnecessary | -0.044 |
down | -0.044 |
single | -0.043 |
toward | -0.043 |
Token | They |
Token ID | 1119 |
Feature activation | -2.07e-02 |
Pos logit contributions | |
single | +0.007 |
toward | +0.007 |
down | +0.007 |
purchases | +0.007 |
unnecessary | +0.006 |
Neg logit contributions | |
ily | -0.004 |
ia | -0.004 |
ena | -0.004 |
era | -0.004 |
Ben | -0.003 |
Token | are |
Token ID | 389 |
Feature activation | -3.04e-02 |
Pos logit contributions | |
ily | +0.030 |
ia | +0.028 |
ena | +0.027 |
They | +0.026 |
una | +0.026 |
Neg logit contributions | |
use | -0.034 |
purchases | -0.033 |
down | -0.032 |
ge | -0.032 |
heels | -0.032 |
Token | proud |
Token ID | 6613 |
Feature activation | -6.91e-02 |
Pos logit contributions | |
ily | +0.052 |
ia | +0.048 |
They | +0.048 |
Ben | +0.048 |
era | +0.047 |
Neg logit contributions | |
single | -0.043 |
ge | -0.042 |
unnecessary | -0.042 |
purchases | -0.042 |
shopping | -0.041 |
Token | of |
Token ID | 286 |
Feature activation | -1.56e-02 |
Pos logit contributions | |
room | +0.095 |
era | +0.095 |
ena | +0.092 |
ily | +0.092 |
OM | +0.088 |
Neg logit contributions | |
single | -0.116 |
down | -0.105 |
toward | -0.104 |
ge | -0.103 |
shopping | -0.103 |
Token | their |
Token ID | 511 |
Feature activation | -7.58e-03 |
Pos logit contributions | |
ily | +0.020 |
ickets | +0.018 |
era | +0.018 |
room | +0.018 |
Ben | +0.018 |
Neg logit contributions | |
ge | -0.027 |
shopping | -0.027 |
single | -0.027 |
unnecessary | -0.027 |
purchases | -0.026 |
Token | tower |
Token ID | 10580 |
Feature activation | +6.39e-03 |
Pos logit contributions | |
ily | +0.011 |
ia | +0.010 |
They | +0.010 |
Ben | +0.009 |
era | +0.009 |
Neg logit contributions | |
ge | -0.013 |
unnecessary | -0.013 |
single | -0.013 |
purchases | -0.013 |
use | -0.012 |
Token | . |
Token ID | 13 |
Feature activation | -5.50e-04 |
Pos logit contributions | |
ge | +0.013 |
single | +0.013 |
purchases | +0.012 |
shopping | +0.012 |
unnecessary | +0.012 |
Neg logit contributions | |
ily | -0.007 |
Ben | -0.007 |
They | -0.006 |
ia | -0.006 |
era | -0.006 |
Token | They |
Token ID | 1119 |
Feature activation | -2.52e-02 |
Pos logit contributions | |
ily | +0.001 |
ia | +0.001 |
ena | +0.001 |
era | +0.001 |
una | +0.001 |
Neg logit contributions | |
single | -0.001 |
toward | -0.001 |
down | -0.001 |
aback | -0.001 |
unnecessary | -0.001 |
Token | and |
Token ID | 290 |
Feature activation | +3.79e-02 |
Pos logit contributions | |
ily | +0.020 |
They | +0.018 |
ia | +0.018 |
Ben | +0.017 |
ena | +0.017 |
Neg logit contributions | |
ge | -0.039 |
single | -0.038 |
unnecessary | -0.038 |
purchases | -0.038 |
shopping | -0.037 |
Token | brave |
Token ID | 14802 |
Feature activation | +9.56e-03 |
Pos logit contributions | |
ge | +0.064 |
single | +0.063 |
unnecessary | +0.062 |
purchases | +0.062 |
toward | +0.059 |
Neg logit contributions | |
ily | -0.057 |
They | -0.052 |
ia | -0.052 |
Ben | -0.050 |
ena | -0.049 |
Token | . |
Token ID | 13 |
Feature activation | -8.61e-03 |
Pos logit contributions | |
ge | +0.021 |
purchases | +0.021 |
single | +0.020 |
unnecessary | +0.020 |
picking | +0.020 |
Neg logit contributions | |
ily | -0.010 |
They | -0.009 |
Ben | -0.008 |
ia | -0.008 |
Ben | -0.008 |
Token | I |
Token ID | 314 |
Feature activation | -1.94e-02 |
Pos logit contributions | |
ia | +0.011 |
ily | +0.011 |
era | +0.010 |
They | +0.010 |
Ben | +0.010 |
Neg logit contributions | |
unnecessary | -0.014 |
single | -0.014 |
use | -0.014 |
weights | -0.014 |
aback | -0.013 |
Token | am |
Token ID | 716 |
Feature activation | -3.46e-02 |
Pos logit contributions | |
ily | +0.028 |
They | +0.026 |
Ben | +0.025 |
ia | +0.025 |
era | +0.024 |
Neg logit contributions | |
ge | -0.033 |
unnecessary | -0.033 |
purchases | -0.033 |
single | -0.033 |
shopping | -0.032 |
Token | proud |
Token ID | 6613 |
Feature activation | -6.91e-02 |
Pos logit contributions | |
ily | +0.043 |
They | +0.039 |
Ben | +0.038 |
ia | +0.037 |
ena | +0.037 |
Neg logit contributions | |
ge | -0.069 |
purchases | -0.067 |
single | -0.067 |
unnecessary | -0.066 |
use | -0.065 |
Token | of |
Token ID | 286 |
Feature activation | -1.07e-02 |
Pos logit contributions | |
ily | +0.078 |
era | +0.078 |
ia | +0.076 |
ena | +0.076 |
Ben | +0.074 |
Neg logit contributions | |
single | -0.134 |
ge | -0.126 |
down | -0.125 |
unnecessary | -0.125 |
toward | -0.125 |
Token | you |
Token ID | 345 |
Feature activation | -5.58e-03 |
Pos logit contributions | |
ily | +0.009 |
They | +0.008 |
era | +0.008 |
ia | +0.008 |
ena | +0.008 |
Neg logit contributions | |
unnecessary | -0.024 |
ge | -0.023 |
single | -0.023 |
shopping | -0.023 |
purchases | -0.022 |
Token | ." |
Token ID | 526 |
Feature activation | -2.48e-02 |
Pos logit contributions | |
ily | +0.008 |
ia | +0.007 |
Ben | +0.007 |
They | +0.007 |
era | +0.007 |
Neg logit contributions | |
ge | -0.009 |
unnecessary | -0.009 |
single | -0.009 |
purchases | -0.009 |
shopping | -0.009 |
Token |
|
Token ID | 198 |
Feature activation | -1.48e-02 |
Pos logit contributions | |
ily | +0.032 |
era | +0.031 |
iv | +0.031 |
ia | +0.030 |
room | +0.029 |
Neg logit contributions | |
single | -0.046 |
down | -0.042 |
toward | -0.042 |
shopping | -0.040 |
ge | -0.040 |
Token |
|
Token ID | 198 |
Feature activation | -1.27e-02 |
Pos logit contributions | |
era | +0.021 |
ily | +0.019 |
iv | +0.019 |
ena | +0.019 |
der | +0.019 |
Neg logit contributions | |
single | -0.023 |
ge | -0.022 |
down | -0.022 |
run | -0.021 |
toward | -0.021 |
Token | to |
Token ID | 284 |
Feature activation | +9.48e-03 |
Pos logit contributions | |
ily | +0.022 |
ia | +0.021 |
Ben | +0.021 |
era | +0.020 |
ena | +0.020 |
Neg logit contributions | |
single | -0.028 |
down | -0.027 |
ge | -0.027 |
unnecessary | -0.026 |
shopping | -0.026 |
Token | Luna |
Token ID | 23694 |
Feature activation | -7.13e-03 |
Pos logit contributions | |
unnecessary | +0.014 |
single | +0.014 |
ge | +0.014 |
purchases | +0.014 |
use | +0.014 |
Neg logit contributions | |
ily | -0.015 |
ia | -0.014 |
They | -0.014 |
era | -0.014 |
ena | -0.014 |
Token | . |
Token ID | 13 |
Feature activation | +1.63e-03 |
Pos logit contributions | |
ily | +0.008 |
ia | +0.007 |
They | +0.007 |
Ben | +0.007 |
era | +0.007 |
Neg logit contributions | |
ge | -0.014 |
single | -0.014 |
unnecessary | -0.014 |
purchases | -0.014 |
shopping | -0.014 |
Token | I |
Token ID | 314 |
Feature activation | -1.36e-02 |
Pos logit contributions | |
single | +0.003 |
unnecessary | +0.003 |
use | +0.003 |
down | +0.003 |
toward | +0.003 |
Neg logit contributions | |
ia | -0.002 |
era | -0.002 |
ily | -0.002 |
They | -0.002 |
ena | -0.002 |
Token | am |
Token ID | 716 |
Feature activation | -3.11e-02 |
Pos logit contributions | |
ily | +0.021 |
They | +0.019 |
ia | +0.019 |
Ben | +0.019 |
ena | +0.018 |
Neg logit contributions | |
ge | -0.023 |
unnecessary | -0.022 |
single | -0.022 |
purchases | -0.022 |
toward | -0.021 |
Token | proud |
Token ID | 6613 |
Feature activation | -6.90e-02 |
Pos logit contributions | |
ily | +0.052 |
ia | +0.050 |
They | +0.049 |
Ben | +0.048 |
era | +0.047 |
Neg logit contributions | |
single | -0.045 |
ge | -0.045 |
unnecessary | -0.044 |
purchases | -0.044 |
shopping | -0.043 |
Token | of |
Token ID | 286 |
Feature activation | -5.52e-03 |
Pos logit contributions | |
era | +0.089 |
ena | +0.086 |
ia | +0.084 |
Ben | +0.082 |
ily | +0.082 |
Neg logit contributions | |
single | -0.126 |
down | -0.118 |
toward | -0.114 |
shopping | -0.114 |
unnecessary | -0.112 |
Token | you |
Token ID | 345 |
Feature activation | -2.59e-03 |
Pos logit contributions | |
ily | +0.005 |
era | +0.004 |
ena | +0.004 |
ia | +0.004 |
They | +0.004 |
Neg logit contributions | |
unnecessary | -0.012 |
single | -0.012 |
ge | -0.012 |
shopping | -0.012 |
use | -0.011 |
Token | ." |
Token ID | 526 |
Feature activation | -2.02e-02 |
Pos logit contributions | |
ily | +0.003 |
ia | +0.003 |
They | +0.003 |
Ben | +0.003 |
era | +0.003 |
Neg logit contributions | |
ge | -0.005 |
single | -0.005 |
unnecessary | -0.005 |
purchases | -0.005 |
shopping | -0.005 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | -2.11e-02 |
Pos logit contributions | |
ily | +0.024 |
era | +0.024 |
ena | +0.023 |
room | +0.022 |
iv | +0.022 |
Neg logit contributions | |
single | -0.036 |
down | -0.035 |
shopping | -0.034 |
toward | -0.034 |
unnecessary | -0.034 |
Token | Dan |
Token ID | 21174 |
Feature activation | +5.34e-03 |
Pos logit contributions | |
ily | +0.038 |
ia | +0.036 |
ena | +0.036 |
They | +0.036 |
era | +0.036 |
Neg logit contributions | |
single | -0.027 |
shopping | -0.025 |
unnecessary | -0.025 |
ge | -0.025 |
down | -0.025 |
Token | and |
Token ID | 290 |
Feature activation | +3.84e-02 |
Pos logit contributions | |
ily | +0.010 |
They | +0.009 |
ia | +0.009 |
Ben | +0.008 |
Ben | +0.008 |
Neg logit contributions | |
ge | -0.020 |
unnecessary | -0.019 |
single | -0.019 |
purchases | -0.019 |
use | -0.018 |
Token | healthy |
Token ID | 5448 |
Feature activation | -6.84e-03 |
Pos logit contributions | |
ge | +0.061 |
purchases | +0.058 |
unnecessary | +0.058 |
single | +0.058 |
toward | +0.056 |
Neg logit contributions | |
ily | -0.062 |
They | -0.060 |
Ben | -0.057 |
Ben | -0.056 |
ia | -0.056 |
Token | . |
Token ID | 13 |
Feature activation | -2.90e-03 |
Pos logit contributions | |
ily | +0.006 |
They | +0.006 |
Ben | +0.005 |
ia | +0.005 |
Ben | +0.005 |
Neg logit contributions | |
ge | -0.016 |
purchases | -0.016 |
unnecessary | -0.016 |
single | -0.015 |
picking | -0.015 |
Token | I |
Token ID | 314 |
Feature activation | -1.82e-02 |
Pos logit contributions | |
ia | +0.004 |
ily | +0.004 |
era | +0.004 |
They | +0.004 |
ena | +0.004 |
Neg logit contributions | |
unnecessary | -0.005 |
single | -0.004 |
aback | -0.004 |
use | -0.004 |
down | -0.004 |
Token | am |
Token ID | 716 |
Feature activation | -3.30e-02 |
Pos logit contributions | |
ily | +0.026 |
They | +0.025 |
Ben | +0.023 |
ia | +0.023 |
Ben | +0.022 |
Neg logit contributions | |
ge | -0.032 |
unnecessary | -0.031 |
purchases | -0.031 |
ging | -0.030 |
single | -0.030 |
Token | proud |
Token ID | 6613 |
Feature activation | -6.89e-02 |
Pos logit contributions | |
ily | +0.055 |
They | +0.051 |
ia | +0.051 |
Ben | +0.050 |
ena | +0.049 |
Neg logit contributions | |
ge | -0.050 |
single | -0.049 |
unnecessary | -0.049 |
purchases | -0.048 |
toward | -0.046 |
Token | of |
Token ID | 286 |
Feature activation | -7.62e-03 |
Pos logit contributions | |
era | +0.082 |
ily | +0.082 |
ia | +0.082 |
ena | +0.081 |
Ben | +0.078 |
Neg logit contributions | |
single | -0.131 |
down | -0.120 |
toward | -0.119 |
shopping | -0.119 |
unnecessary | -0.116 |
Token | you |
Token ID | 345 |
Feature activation | -5.22e-03 |
Pos logit contributions | |
ily | +0.007 |
era | +0.007 |
ena | +0.007 |
They | +0.007 |
ia | +0.007 |
Neg logit contributions | |
unnecessary | -0.015 |
shopping | -0.015 |
single | -0.015 |
ge | -0.015 |
ging | -0.015 |
Token | ." |
Token ID | 526 |
Feature activation | -2.29e-02 |
Pos logit contributions | |
ily | +0.008 |
ia | +0.007 |
Ben | +0.007 |
They | +0.007 |
era | +0.007 |
Neg logit contributions | |
single | -0.008 |
ge | -0.008 |
unnecessary | -0.008 |
shopping | -0.008 |
purchases | -0.008 |
Token |
|
Token ID | 198 |
Feature activation | -6.83e-03 |
Pos logit contributions | |
room | +0.034 |
era | +0.034 |
ily | +0.033 |
ena | +0.033 |
iv | +0.033 |
Neg logit contributions | |
single | -0.033 |
down | -0.032 |
shopping | -0.031 |
toward | -0.030 |
aback | -0.029 |
Token |
|
Token ID | 198 |
Feature activation | -6.61e-03 |
Pos logit contributions | |
era | +0.009 |
ily | +0.009 |
ena | +0.008 |
iv | +0.008 |
room | +0.008 |
Neg logit contributions | |
single | -0.011 |
shopping | -0.010 |
purchases | -0.010 |
ge | -0.010 |
down | -0.010 |
Token | each |
Token ID | 1123 |
Feature activation | -1.15e-02 |
Pos logit contributions | |
ily | +0.042 |
They | +0.040 |
ickets | +0.039 |
ia | +0.038 |
oon | +0.037 |
Neg logit contributions | |
down | -0.058 |
heels | -0.056 |
weights | -0.056 |
unnecessary | -0.056 |
toward | -0.055 |
Token | other |
Token ID | 584 |
Feature activation | +9.46e-04 |
Pos logit contributions | |
ily | +0.017 |
ia | +0.016 |
They | +0.016 |
era | +0.016 |
Ben | +0.015 |
Neg logit contributions | |
single | -0.019 |
ge | -0.019 |
purchases | -0.018 |
unnecessary | -0.018 |
shopping | -0.018 |
Token | . |
Token ID | 13 |
Feature activation | +3.18e-03 |
Pos logit contributions | |
ge | +0.002 |
single | +0.002 |
purchases | +0.002 |
unnecessary | +0.002 |
shopping | +0.002 |
Neg logit contributions | |
ily | -0.001 |
ia | -0.001 |
They | -0.001 |
Ben | -0.001 |
era | -0.001 |
Token | They |
Token ID | 1119 |
Feature activation | -2.59e-02 |
Pos logit contributions | |
single | +0.006 |
toward | +0.006 |
down | +0.006 |
shopping | +0.006 |
weights | +0.006 |
Neg logit contributions | |
ily | -0.004 |
ia | -0.003 |
era | -0.003 |
una | -0.003 |
ena | -0.003 |
Token | are |
Token ID | 389 |
Feature activation | -3.28e-02 |
Pos logit contributions | |
ily | +0.042 |
ia | +0.041 |
una | +0.041 |
They | +0.039 |
ena | +0.039 |
Neg logit contributions | |
use | -0.036 |
purchases | -0.035 |
down | -0.035 |
heels | -0.034 |
shopping | -0.034 |
Token | proud |
Token ID | 6613 |
Feature activation | -6.87e-02 |
Pos logit contributions | |
ily | +0.059 |
They | +0.056 |
ia | +0.055 |
Ben | +0.055 |
era | +0.054 |
Neg logit contributions | |
single | -0.043 |
ge | -0.043 |
purchases | -0.042 |
unnecessary | -0.042 |
shopping | -0.041 |
Token | . |
Token ID | 13 |
Feature activation | +1.31e-03 |
Pos logit contributions | |
room | +0.098 |
era | +0.097 |
ily | +0.094 |
OM | +0.093 |
ena | +0.091 |
Neg logit contributions | |
single | -0.112 |
towards | -0.100 |
toward | -0.099 |
down | -0.099 |
shopping | -0.097 |
Token | They |
Token ID | 1119 |
Feature activation | -2.38e-02 |
Pos logit contributions | |
single | +0.002 |
toward | +0.002 |
down | +0.002 |
shopping | +0.002 |
purchases | +0.002 |
Neg logit contributions | |
ily | -0.002 |
ia | -0.001 |
era | -0.001 |
ena | -0.001 |
una | -0.001 |
Token | hug |
Token ID | 16225 |
Feature activation | -1.87e-02 |
Pos logit contributions | |
ily | +0.039 |
ia | +0.038 |
una | +0.038 |
ena | +0.037 |
They | +0.037 |
Neg logit contributions | |
use | -0.032 |
purchases | -0.031 |
fy | -0.030 |
single | -0.030 |
shopping | -0.030 |
Token | . |
Token ID | 13 |
Feature activation | +9.93e-03 |
Pos logit contributions | |
ily | +0.027 |
ia | +0.025 |
ickets | +0.023 |
Ben | +0.023 |
era | +0.023 |
Neg logit contributions | |
down | -0.033 |
toward | -0.031 |
heels | -0.030 |
weights | -0.030 |
shopping | -0.029 |
Token |
|
Token ID | 198 |
Feature activation | -1.98e-03 |
Pos logit contributions | |
single | +0.019 |
toward | +0.018 |
down | +0.018 |
shopping | +0.017 |
purchases | +0.017 |
Neg logit contributions | |
ily | -0.011 |
ia | -0.011 |
era | -0.010 |
ena | -0.010 |
una | -0.010 |
Token | found |
Token ID | 1043 |
Feature activation | -1.52e-02 |
Pos logit contributions | |
ge | +0.034 |
purchases | +0.034 |
unnecessary | +0.034 |
single | +0.034 |
toward | +0.032 |
Neg logit contributions | |
ily | -0.019 |
They | -0.017 |
ia | -0.017 |
Ben | -0.016 |
Ben | -0.016 |
Token | . |
Token ID | 13 |
Feature activation | -2.15e-03 |
Pos logit contributions | |
ily | +0.014 |
They | +0.011 |
ia | +0.011 |
Ben | +0.010 |
Ben | +0.010 |
Neg logit contributions | |
unnecessary | -0.035 |
ge | -0.035 |
purchases | -0.035 |
single | -0.035 |
insignificant | -0.033 |
Token | They |
Token ID | 1119 |
Feature activation | -3.01e-02 |
Pos logit contributions | |
ily | +0.003 |
era | +0.003 |
ia | +0.003 |
iv | +0.003 |
ena | +0.003 |
Neg logit contributions | |
single | -0.004 |
unnecessary | -0.003 |
proper | -0.003 |
down | -0.003 |
allowance | -0.003 |
Token | are |
Token ID | 389 |
Feature activation | -3.51e-02 |
Pos logit contributions | |
ily | +0.042 |
ia | +0.039 |
They | +0.039 |
ena | +0.038 |
era | +0.037 |
Neg logit contributions | |
ge | -0.051 |
single | -0.051 |
purchases | -0.051 |
unnecessary | -0.050 |
use | -0.049 |
Token | very |
Token ID | 845 |
Feature activation | +1.13e-02 |
Pos logit contributions | |
ily | +0.060 |
They | +0.055 |
ia | +0.055 |
Ben | +0.054 |
era | +0.053 |
Neg logit contributions | |
ge | -0.052 |
single | -0.051 |
unnecessary | -0.051 |
purchases | -0.050 |
shopping | -0.048 |
Token | proud |
Token ID | 6613 |
Feature activation | -6.86e-02 |
Pos logit contributions | |
ge | +0.016 |
unnecessary | +0.016 |
single | +0.016 |
purchases | +0.016 |
toward | +0.015 |
Neg logit contributions | |
ily | -0.020 |
They | -0.019 |
ia | -0.019 |
Ben | -0.018 |
Ben | -0.018 |
Token | . |
Token ID | 13 |
Feature activation | -3.78e-03 |
Pos logit contributions | |
era | +0.094 |
ily | +0.093 |
room | +0.093 |
OM | +0.090 |
ena | +0.090 |
Neg logit contributions | |
single | -0.115 |
toward | -0.106 |
towards | -0.106 |
down | -0.104 |
shopping | -0.101 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | -2.09e-02 |
Pos logit contributions | |
ily | +0.005 |
ia | +0.004 |
era | +0.004 |
ena | +0.004 |
room | +0.004 |
Neg logit contributions | |
single | -0.007 |
unnecessary | -0.006 |
toward | -0.006 |
down | -0.006 |
pouch | -0.006 |
Token | L |
Token ID | 43 |
Feature activation | -2.96e-02 |
Pos logit contributions | |
ily | +0.038 |
ia | +0.037 |
ena | +0.037 |
era | +0.036 |
They | +0.036 |
Neg logit contributions | |
single | -0.027 |
shopping | -0.025 |
down | -0.024 |
ge | -0.024 |
use | -0.024 |
Token | ily |
Token ID | 813 |
Feature activation | -2.22e-03 |
Pos logit contributions | |
They | +0.027 |
der | +0.026 |
ickets | +0.026 |
era | +0.024 |
All | +0.024 |
Neg logit contributions | |
shopping | -0.060 |
ge | -0.060 |
purchases | -0.058 |
single | -0.057 |
home | -0.057 |
Token | was |
Token ID | 373 |
Feature activation | -1.20e-02 |
Pos logit contributions | |
ily | +0.003 |
They | +0.003 |
ia | +0.003 |
Ben | +0.003 |
era | +0.003 |
Neg logit contributions | |
ge | -0.004 |
single | -0.004 |
unnecessary | -0.004 |
purchases | -0.004 |
shopping | -0.004 |
Token | park |
Token ID | 3952 |
Feature activation | -1.41e-02 |
Pos logit contributions | |
ily | +0.052 |
They | +0.049 |
ia | +0.048 |
Ben | +0.048 |
Ben | +0.047 |
Neg logit contributions | |
ge | -0.066 |
purchases | -0.061 |
unnecessary | -0.061 |
ging | -0.059 |
single | -0.059 |
Token | . |
Token ID | 13 |
Feature activation | -4.48e-03 |
Pos logit contributions | |
ily | +0.015 |
They | +0.014 |
ia | +0.013 |
Ben | +0.013 |
ena | +0.012 |
Neg logit contributions | |
ge | -0.030 |
unnecessary | -0.029 |
single | -0.029 |
purchases | -0.029 |
use | -0.028 |
Token | But |
Token ID | 887 |
Feature activation | -7.77e-03 |
Pos logit contributions | |
ily | +0.006 |
They | +0.005 |
ia | +0.005 |
Ben | +0.005 |
ena | +0.005 |
Neg logit contributions | |
ge | -0.008 |
single | -0.008 |
unnecessary | -0.008 |
purchases | -0.008 |
shopping | -0.008 |
Token | when |
Token ID | 618 |
Feature activation | +1.37e-03 |
Pos logit contributions | |
ily | +0.009 |
Ben | +0.008 |
ia | +0.008 |
They | +0.008 |
ena | +0.008 |
Neg logit contributions | |
unnecessary | -0.016 |
ge | -0.016 |
purchases | -0.016 |
use | -0.015 |
single | -0.015 |
Token | she |
Token ID | 673 |
Feature activation | +8.15e-03 |
Pos logit contributions | |
ge | +0.003 |
single | +0.003 |
unnecessary | +0.003 |
purchases | +0.003 |
toward | +0.003 |
Neg logit contributions | |
ily | -0.001 |
They | -0.001 |
ia | -0.001 |
Ben | -0.001 |
Ben | -0.001 |
Token | looked |
Token ID | 3114 |
Feature activation | +5.92e-02 |
Pos logit contributions | |
unnecessary | +0.016 |
purchases | +0.016 |
ge | +0.015 |
weights | +0.015 |
single | +0.015 |
Neg logit contributions | |
ily | -0.010 |
ia | -0.009 |
Ben | -0.009 |
Ben | -0.009 |
They | -0.009 |
Token | back |
Token ID | 736 |
Feature activation | +3.31e-02 |
Pos logit contributions | |
ge | +0.094 |
purchases | +0.091 |
allowance | +0.083 |
unnecessary | +0.083 |
picking | +0.082 |
Neg logit contributions | |
ily | -0.101 |
Ben | -0.097 |
They | -0.088 |
Ben | -0.088 |
ia | -0.087 |
Token | , |
Token ID | 11 |
Feature activation | -6.94e-03 |
Pos logit contributions | |
ge | +0.064 |
unnecessary | +0.062 |
purchases | +0.061 |
picking | +0.060 |
single | +0.060 |
Neg logit contributions | |
Ben | -0.041 |
ily | -0.040 |
ia | -0.039 |
They | -0.036 |
Tom | -0.036 |
Token | she |
Token ID | 673 |
Feature activation | -3.88e-04 |
Pos logit contributions | |
ily | +0.008 |
ia | +0.007 |
They | +0.007 |
Ben | +0.007 |
ena | +0.007 |
Neg logit contributions | |
ge | -0.014 |
single | -0.014 |
unnecessary | -0.014 |
purchases | -0.014 |
shopping | -0.013 |
Token | saw |
Token ID | 2497 |
Feature activation | -2.57e-02 |
Pos logit contributions | |
ily | +0.001 |
They | +0.001 |
ia | +0.001 |
Ben | +0.001 |
ena | +0.001 |
Neg logit contributions | |
ge | -0.001 |
single | -0.001 |
unnecessary | -0.001 |
purchases | -0.001 |
shopping | -0.001 |
Token | that |
Token ID | 326 |
Feature activation | -2.81e-02 |
Pos logit contributions | |
ily | +0.026 |
ia | +0.025 |
They | +0.024 |
Ben | +0.023 |
era | +0.023 |
Neg logit contributions | |
single | -0.053 |
ge | -0.053 |
unnecessary | -0.052 |
purchases | -0.052 |
shopping | -0.051 |
Token | sadly |
Token ID | 21098 |
Feature activation | +1.50e-02 |
Pos logit contributions | |
ge | +0.004 |
unnecessary | +0.004 |
single | +0.004 |
purchases | +0.004 |
shopping | +0.004 |
Neg logit contributions | |
ily | -0.002 |
They | -0.002 |
ia | -0.002 |
Ben | -0.002 |
ena | -0.002 |
Token | decided |
Token ID | 3066 |
Feature activation | +3.64e-02 |
Pos logit contributions | |
ge | +0.026 |
single | +0.026 |
unnecessary | +0.026 |
purchases | +0.026 |
toward | +0.025 |
Neg logit contributions | |
ily | -0.021 |
ia | -0.019 |
They | -0.019 |
era | -0.018 |
ena | -0.018 |
Token | to |
Token ID | 284 |
Feature activation | +2.46e-03 |
Pos logit contributions | |
ge | +0.057 |
single | +0.057 |
unnecessary | +0.056 |
purchases | +0.056 |
use | +0.053 |
Neg logit contributions | |
ily | -0.058 |
They | -0.055 |
Ben | -0.053 |
ia | -0.052 |
Ben | -0.052 |
Token | put |
Token ID | 1234 |
Feature activation | +6.41e-03 |
Pos logit contributions | |
ge | +0.003 |
purchases | +0.003 |
heels | +0.003 |
down | +0.003 |
single | +0.003 |
Neg logit contributions | |
ily | -0.004 |
ia | -0.004 |
ena | -0.004 |
Ben | -0.004 |
They | -0.004 |
Token | his |
Token ID | 465 |
Feature activation | +1.03e-02 |
Pos logit contributions | |
ge | +0.014 |
purchases | +0.013 |
unnecessary | +0.013 |
use | +0.013 |
picking | +0.013 |
Neg logit contributions | |
ily | -0.008 |
Ben | -0.007 |
They | -0.007 |
ia | -0.006 |
Ben | -0.006 |
Token | bike |
Token ID | 7161 |
Feature activation | +5.71e-02 |
Pos logit contributions | |
ge | +0.019 |
unnecessary | +0.017 |
picking | +0.017 |
single | +0.017 |
purchases | +0.017 |
Neg logit contributions | |
ily | -0.015 |
ia | -0.015 |
Ben | -0.014 |
ena | -0.014 |
Ben | -0.013 |
Token | back |
Token ID | 736 |
Feature activation | +2.31e-02 |
Pos logit contributions | |
unnecessary | +0.077 |
purchases | +0.072 |
single | +0.069 |
use | +0.069 |
ening | +0.069 |
Neg logit contributions | |
ily | -0.105 |
They | -0.100 |
not | -0.094 |
room | -0.092 |
Ben | -0.091 |
Token | in |
Token ID | 287 |
Feature activation | +8.27e-03 |
Pos logit contributions | |
purchases | +0.044 |
unnecessary | +0.044 |
picking | +0.043 |
single | +0.042 |
insignificant | +0.042 |
Neg logit contributions | |
ily | -0.029 |
They | -0.026 |
Ben | -0.025 |
ia | -0.025 |
ena | -0.024 |
Token | the |
Token ID | 262 |
Feature activation | -2.73e-02 |
Pos logit contributions | |
unnecessary | +0.017 |
picking | +0.017 |
ge | +0.016 |
purchases | +0.016 |
use | +0.016 |
Neg logit contributions | |
ily | -0.009 |
Ben | -0.009 |
They | -0.008 |
Ben | -0.008 |
ia | -0.008 |
Token | garage |
Token ID | 15591 |
Feature activation | +6.97e-03 |
Pos logit contributions | |
ily | +0.037 |
Ben | +0.034 |
ia | +0.034 |
Ben | +0.034 |
They | +0.033 |
Neg logit contributions | |
ge | -0.053 |
purchases | -0.048 |
ging | -0.048 |
unnecessary | -0.047 |
picking | -0.046 |
Token | and |
Token ID | 290 |
Feature activation | +7.59e-03 |
Pos logit contributions | |
ge | +0.013 |
single | +0.013 |
unnecessary | +0.013 |
purchases | +0.013 |
shopping | +0.013 |
Neg logit contributions | |
ily | -0.009 |
They | -0.008 |
ia | -0.008 |
Ben | -0.008 |
ena | -0.007 |
Token | packs |
Token ID | 32377 |
Feature activation | +1.36e-02 |
Pos logit contributions | |
unnecessary | +0.022 |
ge | +0.022 |
ging | +0.021 |
insignificant | +0.021 |
picking | +0.021 |
Neg logit contributions | |
ily | -0.024 |
ia | -0.023 |
They | -0.023 |
ena | -0.022 |
Ben | -0.022 |
Token | and |
Token ID | 290 |
Feature activation | +9.85e-03 |
Pos logit contributions | |
unnecessary | +0.025 |
ge | +0.025 |
single | +0.025 |
purchases | +0.025 |
use | +0.024 |
Neg logit contributions | |
ily | -0.018 |
They | -0.016 |
ia | -0.016 |
Ben | -0.015 |
ena | -0.015 |
Token | grabbed |
Token ID | 13176 |
Feature activation | +9.11e-03 |
Pos logit contributions | |
ge | +0.018 |
single | +0.018 |
unnecessary | +0.017 |
purchases | +0.017 |
ging | +0.016 |
Neg logit contributions | |
ily | -0.014 |
They | -0.013 |
ia | -0.012 |
Ben | -0.012 |
Ben | -0.012 |
Token | their |
Token ID | 511 |
Feature activation | +6.28e-03 |
Pos logit contributions | |
ge | +0.022 |
single | +0.021 |
unnecessary | +0.021 |
purchases | +0.020 |
use | +0.020 |
Neg logit contributions | |
ily | -0.008 |
Ben | -0.007 |
They | -0.007 |
ia | -0.007 |
ena | -0.006 |
Token | bin |
Token ID | 9874 |
Feature activation | +2.03e-02 |
Pos logit contributions | |
ge | +0.012 |
single | +0.011 |
aback | +0.011 |
ging | +0.011 |
unnecessary | +0.011 |
Neg logit contributions | |
ily | -0.008 |
ia | -0.008 |
Ben | -0.008 |
They | -0.008 |
Ben | -0.008 |
Token | ocular |
Token ID | 37320 |
Feature activation | +5.80e-02 |
Pos logit contributions | |
unnecessary | +0.027 |
single | +0.027 |
ge | +0.027 |
purchases | +0.026 |
use | +0.025 |
Neg logit contributions | |
ily | -0.037 |
They | -0.035 |
ia | -0.035 |
ena | -0.033 |
era | -0.033 |
Token | s |
Token ID | 82 |
Feature activation | +2.72e-03 |
Pos logit contributions | |
purchases | +0.084 |
unnecessary | +0.083 |
single | +0.081 |
ge | +0.079 |
picking | +0.078 |
Neg logit contributions | |
They | -0.097 |
ily | -0.096 |
ia | -0.092 |
Ben | -0.091 |
Ben | -0.088 |
Token | . |
Token ID | 13 |
Feature activation | +1.25e-02 |
Pos logit contributions | |
unnecessary | +0.006 |
single | +0.006 |
ge | +0.006 |
purchases | +0.006 |
insignificant | +0.006 |
Neg logit contributions | |
ily | -0.003 |
They | -0.002 |
ia | -0.002 |
Ben | -0.002 |
ena | -0.002 |
Token | They |
Token ID | 1119 |
Feature activation | -2.28e-02 |
Pos logit contributions | |
purchases | +0.027 |
single | +0.027 |
ge | +0.027 |
unnecessary | +0.027 |
shopping | +0.026 |
Neg logit contributions | |
ily | -0.012 |
ia | -0.011 |
They | -0.010 |
era | -0.010 |
ena | -0.010 |
Token | said |
Token ID | 531 |
Feature activation | +2.73e-02 |
Pos logit contributions | |
ily | +0.034 |
ia | +0.032 |
They | +0.032 |
Ben | +0.031 |
era | +0.030 |
Neg logit contributions | |
ge | -0.038 |
purchases | -0.037 |
single | -0.036 |
unnecessary | -0.035 |
shopping | -0.035 |
Token | goodbye |
Token ID | 24829 |
Feature activation | -1.30e-02 |
Pos logit contributions | |
ge | +0.046 |
single | +0.046 |
unnecessary | +0.046 |
purchases | +0.046 |
shopping | +0.044 |
Neg logit contributions | |
ily | -0.039 |
They | -0.036 |
ia | -0.036 |
Ben | -0.035 |
ena | -0.035 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | -1.34e-02 |
Pos logit contributions | |
ily | +0.034 |
ia | +0.031 |
era | +0.031 |
ena | +0.030 |
Ben | +0.030 |
Neg logit contributions | |
down | -0.044 |
shopping | -0.043 |
toward | -0.042 |
purchases | -0.042 |
ge | -0.041 |
Token | John |
Token ID | 7554 |
Feature activation | +1.39e-02 |
Pos logit contributions | |
ily | +0.022 |
ia | +0.021 |
ena | +0.021 |
era | +0.021 |
They | +0.020 |
Neg logit contributions | |
single | -0.019 |
down | -0.018 |
ge | -0.017 |
shopping | -0.017 |
use | -0.017 |
Token | and |
Token ID | 290 |
Feature activation | -5.19e-03 |
Pos logit contributions | |
ge | +0.028 |
unnecessary | +0.028 |
single | +0.028 |
purchases | +0.027 |
shopping | +0.026 |
Neg logit contributions | |
ily | -0.016 |
They | -0.014 |
ia | -0.014 |
Ben | -0.014 |
ena | -0.013 |
Token | his |
Token ID | 465 |
Feature activation | -8.70e-03 |
Pos logit contributions | |
ily | +0.008 |
ia | +0.008 |
era | +0.007 |
una | +0.007 |
They | +0.007 |
Neg logit contributions | |
down | -0.008 |
single | -0.008 |
toward | -0.008 |
ge | -0.008 |
purchases | -0.008 |
Token | mom |
Token ID | 1995 |
Feature activation | +2.00e-02 |
Pos logit contributions | |
ily | +0.009 |
They | +0.008 |
ia | +0.008 |
Ben | +0.008 |
era | +0.008 |
Neg logit contributions | |
single | -0.018 |
purchases | -0.018 |
shopping | -0.017 |
unnecessary | -0.017 |
ge | -0.017 |
Token | went |
Token ID | 1816 |
Feature activation | +5.73e-02 |
Pos logit contributions | |
ge | +0.033 |
purchases | +0.033 |
single | +0.033 |
shopping | +0.032 |
unnecessary | +0.031 |
Neg logit contributions | |
ily | -0.030 |
ia | -0.028 |
They | -0.027 |
Ben | -0.026 |
era | -0.026 |
Token | to |
Token ID | 284 |
Feature activation | +1.18e-02 |
Pos logit contributions | |
unnecessary | +0.099 |
insignificant | +0.095 |
ge | +0.094 |
single | +0.090 |
purchases | +0.090 |
Neg logit contributions | |
ily | -0.087 |
They | -0.085 |
Ben | -0.083 |
ia | -0.076 |
ena | -0.075 |
Token | the |
Token ID | 262 |
Feature activation | -3.39e-02 |
Pos logit contributions | |
ge | +0.023 |
unnecessary | +0.023 |
single | +0.023 |
purchases | +0.022 |
ging | +0.022 |
Neg logit contributions | |
ily | -0.014 |
They | -0.013 |
ia | -0.013 |
Ben | -0.013 |
Ben | -0.013 |
Token | cafe |
Token ID | 26725 |
Feature activation | -2.26e-02 |
Pos logit contributions | |
ily | +0.049 |
They | +0.045 |
ia | +0.045 |
Ben | +0.043 |
ena | +0.043 |
Neg logit contributions | |
ge | -0.058 |
single | -0.058 |
unnecessary | -0.057 |
purchases | -0.057 |
shopping | -0.055 |
Token | . |
Token ID | 13 |
Feature activation | +1.32e-02 |
Pos logit contributions | |
ily | +0.030 |
ia | +0.028 |
Ben | +0.028 |
era | +0.026 |
una | +0.026 |
Neg logit contributions | |
shopping | -0.042 |
ge | -0.041 |
purchases | -0.041 |
single | -0.040 |
toward | -0.040 |
Token | John |
Token ID | 1757 |
Feature activation | +1.45e-02 |
Pos logit contributions | |
ge | +0.030 |
unnecessary | +0.030 |
single | +0.029 |
purchases | +0.029 |
ging | +0.028 |
Neg logit contributions | |
ily | -0.012 |
They | -0.011 |
ia | -0.010 |
Ben | -0.010 |
Ben | -0.010 |
Token | the |
Token ID | 262 |
Feature activation | +2.17e-02 |
Pos logit contributions | |
ily | +0.013 |
Ben | +0.012 |
They | +0.011 |
ia | +0.011 |
ena | +0.010 |
Neg logit contributions | |
unnecessary | -0.031 |
ge | -0.031 |
single | -0.031 |
use | -0.031 |
purchases | -0.030 |
Token | clothes |
Token ID | 8242 |
Feature activation | +5.21e-03 |
Pos logit contributions | |
ge | +0.042 |
unnecessary | +0.041 |
single | +0.041 |
purchases | +0.041 |
use | +0.040 |
Neg logit contributions | |
ily | -0.028 |
They | -0.025 |
ia | -0.024 |
Ben | -0.024 |
Ben | -0.023 |
Token | and |
Token ID | 290 |
Feature activation | +1.31e-02 |
Pos logit contributions | |
unnecessary | +0.011 |
ge | +0.011 |
single | +0.011 |
purchases | +0.011 |
use | +0.010 |
Neg logit contributions | |
ily | -0.006 |
They | -0.005 |
ia | -0.005 |
Ben | -0.005 |
Ben | -0.005 |
Token | folded |
Token ID | 24650 |
Feature activation | +1.71e-02 |
Pos logit contributions | |
ge | +0.023 |
single | +0.023 |
unnecessary | +0.023 |
purchases | +0.023 |
use | +0.022 |
Neg logit contributions | |
ily | -0.018 |
They | -0.017 |
ia | -0.016 |
Ben | -0.016 |
ena | -0.016 |
Token | them |
Token ID | 606 |
Feature activation | +1.65e-02 |
Pos logit contributions | |
unnecessary | +0.042 |
use | +0.042 |
picking | +0.041 |
insignificant | +0.039 |
single | +0.039 |
Neg logit contributions | |
Ben | -0.015 |
ily | -0.015 |
They | -0.014 |
Tom | -0.011 |
ia | -0.011 |
Token | neatly |
Token ID | 29776 |
Feature activation | +7.13e-02 |
Pos logit contributions | |
unnecessary | +0.030 |
use | +0.029 |
picking | +0.028 |
single | +0.028 |
reins | +0.027 |
Neg logit contributions | |
They | -0.024 |
ily | -0.023 |
Ben | -0.021 |
ia | -0.021 |
era | -0.020 |
Token | before |
Token ID | 878 |
Feature activation | -3.47e-02 |
Pos logit contributions | |
unnecessary | +0.161 |
picking | +0.150 |
Gor | +0.145 |
purchases | +0.144 |
single | +0.144 |
Neg logit contributions | |
ily | -0.079 |
They | -0.071 |
Ben | -0.061 |
ia | -0.060 |
iv | -0.060 |
Token | putting |
Token ID | 5137 |
Feature activation | +1.50e-02 |
Pos logit contributions | |
ily | +0.049 |
They | +0.047 |
ia | +0.045 |
Ben | +0.044 |
Ben | +0.044 |
Neg logit contributions | |
ge | -0.059 |
unnecessary | -0.059 |
single | -0.059 |
purchases | -0.058 |
use | -0.056 |
Token | them |
Token ID | 606 |
Feature activation | +3.20e-02 |
Pos logit contributions | |
picking | +0.036 |
unnecessary | +0.036 |
ge | +0.036 |
insignificant | +0.035 |
use | +0.035 |
Neg logit contributions | |
Ben | -0.012 |
ily | -0.012 |
They | -0.010 |
Tom | -0.009 |
a | -0.008 |
Token | back |
Token ID | 736 |
Feature activation | +3.34e-02 |
Pos logit contributions | |
single | +0.053 |
unnecessary | +0.053 |
picking | +0.051 |
purchases | +0.050 |
use | +0.050 |
Neg logit contributions | |
ily | -0.050 |
They | -0.045 |
Ben | -0.043 |
ia | -0.040 |
not | -0.038 |
Token | into |
Token ID | 656 |
Feature activation | +4.17e-04 |
Pos logit contributions | |
unnecessary | +0.070 |
single | +0.068 |
insignificant | +0.067 |
picking | +0.067 |
purchases | +0.066 |
Neg logit contributions | |
ily | -0.037 |
They | -0.033 |
Ben | -0.031 |
who | -0.029 |
ena | -0.029 |
Token | singing |
Token ID | 13777 |
Feature activation | -1.24e-02 |
Pos logit contributions | |
ily | +0.031 |
They | +0.030 |
ia | +0.029 |
Ben | +0.029 |
ena | +0.029 |
Neg logit contributions | |
unnecessary | -0.030 |
ge | -0.030 |
use | -0.029 |
purchases | -0.029 |
single | -0.028 |
Token | . |
Token ID | 13 |
Feature activation | +4.63e-03 |
Pos logit contributions | |
ily | +0.017 |
ia | +0.015 |
Ben | +0.015 |
ena | +0.015 |
They | +0.015 |
Neg logit contributions | |
single | -0.023 |
down | -0.022 |
ge | -0.022 |
toward | -0.022 |
unnecessary | -0.021 |
Token | |
Token ID | 220 |
Feature activation | -3.72e-03 |
Pos logit contributions | |
single | +0.008 |
shopping | +0.008 |
purchases | +0.008 |
down | +0.008 |
picking | +0.008 |
Neg logit contributions | |
ena | -0.006 |
ia | -0.006 |
ily | -0.006 |
era | -0.005 |
They | -0.005 |
Token |
|
Token ID | 198 |
Feature activation | +1.54e-02 |
Pos logit contributions | |
ily | +0.004 |
era | +0.004 |
ia | +0.004 |
ena | +0.004 |
Ben | +0.004 |
Neg logit contributions | |
single | -0.007 |
ge | -0.007 |
down | -0.007 |
shopping | -0.007 |
unnecessary | -0.007 |
Token |
|
Token ID | 198 |
Feature activation | +1.28e-02 |
Pos logit contributions | |
single | +0.029 |
ge | +0.029 |
toward | +0.027 |
shopping | +0.027 |
purchases | +0.027 |
Neg logit contributions | |
ily | -0.018 |
ena | -0.017 |
era | -0.016 |
ia | -0.016 |
Ben | -0.015 |
Token | The |
Token ID | 464 |
Feature activation | +1.20e-02 |
Pos logit contributions | |
single | +0.021 |
ge | +0.020 |
picking | +0.019 |
unnecessary | +0.019 |
insignificant | +0.019 |
Neg logit contributions | |
Ben | -0.017 |
ena | -0.017 |
ia | -0.017 |
era | -0.017 |
ily | -0.017 |
Token | k |
Token ID | 479 |
Feature activation | +1.48e-03 |
Pos logit contributions | |
ge | +0.024 |
use | +0.023 |
toward | +0.023 |
unnecessary | +0.023 |
aback | +0.022 |
Neg logit contributions | |
ily | -0.016 |
They | -0.015 |
Ben | -0.014 |
twins | -0.014 |
ia | -0.014 |
Token | ang |
Token ID | 648 |
Feature activation | -6.78e-03 |
Pos logit contributions | |
purchases | +0.002 |
unnecessary | +0.002 |
ge | +0.002 |
shopping | +0.002 |
use | +0.002 |
Neg logit contributions | |
ily | -0.003 |
They | -0.003 |
Ben | -0.003 |
era | -0.003 |
ia | -0.003 |
Token | aroo |
Token ID | 38049 |
Feature activation | -3.80e-03 |
Pos logit contributions | |
ily | +0.011 |
ia | +0.010 |
They | +0.010 |
Ben | +0.010 |
ena | +0.010 |
Neg logit contributions | |
unnecessary | -0.011 |
ge | -0.011 |
purchases | -0.011 |
single | -0.010 |
shopping | -0.010 |
Token | enjoyed |
Token ID | 8359 |
Feature activation | -2.37e-02 |
Pos logit contributions | |
ily | +0.005 |
They | +0.004 |
ia | +0.004 |
Ben | +0.004 |
ena | +0.004 |
Neg logit contributions | |
ge | -0.007 |
single | -0.007 |
purchases | -0.007 |
unnecessary | -0.007 |
toward | -0.007 |
Token | the |
Token ID | 262 |
Feature activation | +2.96e-04 |
Pos logit contributions | |
ily | +0.031 |
They | +0.029 |
ia | +0.028 |
Ben | +0.028 |
Ben | +0.027 |
Neg logit contributions | |
ge | -0.045 |
unnecessary | -0.045 |
purchases | -0.044 |
single | -0.044 |
use | -0.043 |
Token | heard |
Token ID | 2982 |
Feature activation | -4.01e-03 |
Pos logit contributions | |
unnecessary | +0.004 |
ge | +0.004 |
single | +0.004 |
purchases | +0.004 |
shopping | +0.004 |
Neg logit contributions | |
ily | -0.003 |
ia | -0.003 |
They | -0.003 |
era | -0.003 |
Ben | -0.003 |
Token | the |
Token ID | 262 |
Feature activation | -1.04e-02 |
Pos logit contributions | |
ily | +0.006 |
Ben | +0.005 |
ia | +0.005 |
era | +0.005 |
ena | +0.005 |
Neg logit contributions | |
unnecessary | -0.007 |
single | -0.007 |
down | -0.007 |
shopping | -0.007 |
purchases | -0.006 |
Token | birds |
Token ID | 10087 |
Feature activation | -2.06e-02 |
Pos logit contributions | |
ily | +0.015 |
ia | +0.014 |
They | +0.013 |
Ben | +0.013 |
ena | +0.013 |
Neg logit contributions | |
single | -0.018 |
unnecessary | -0.018 |
purchases | -0.017 |
ge | -0.017 |
shopping | -0.017 |
Token | sing |
Token ID | 1702 |
Feature activation | -1.13e-03 |
Pos logit contributions | |
ily | +0.032 |
ia | +0.029 |
They | +0.029 |
Ben | +0.028 |
ena | +0.028 |
Neg logit contributions | |
ge | -0.034 |
single | -0.033 |
unnecessary | -0.033 |
purchases | -0.032 |
shopping | -0.031 |
Token | a |
Token ID | 257 |
Feature activation | +1.43e-02 |
Pos logit contributions | |
ily | +0.001 |
ia | +0.001 |
Ben | +0.001 |
era | +0.001 |
They | +0.001 |
Neg logit contributions | |
single | -0.002 |
ge | -0.002 |
unnecessary | -0.002 |
toward | -0.002 |
down | -0.002 |
Token | loud |
Token ID | 7812 |
Feature activation | +1.61e-02 |
Pos logit contributions | |
single | +0.023 |
ge | +0.022 |
unnecessary | +0.022 |
purchases | +0.021 |
toward | +0.021 |
Neg logit contributions | |
ily | -0.024 |
ena | -0.021 |
ia | -0.021 |
Ben | -0.020 |
era | -0.020 |
Token | song |
Token ID | 3496 |
Feature activation | -2.03e-02 |
Pos logit contributions | |
ge | +0.033 |
unnecessary | +0.033 |
single | +0.033 |
purchases | +0.033 |
shopping | +0.031 |
Neg logit contributions | |
ily | -0.018 |
They | -0.016 |
ia | -0.016 |
Ben | -0.015 |
ena | -0.015 |
Token | . |
Token ID | 13 |
Feature activation | +6.88e-03 |
Pos logit contributions | |
ily | +0.027 |
Ben | +0.026 |
ia | +0.025 |
era | +0.024 |
ena | +0.023 |
Neg logit contributions | |
single | -0.037 |
down | -0.036 |
toward | -0.036 |
ge | -0.035 |
shopping | -0.034 |
Token |
|
Token ID | 198 |
Feature activation | +1.77e-02 |
Pos logit contributions | |
down | +0.012 |
single | +0.012 |
shopping | +0.012 |
purchases | +0.012 |
toward | +0.012 |
Neg logit contributions | |
ily | -0.009 |
ia | -0.008 |
ena | -0.008 |
era | -0.008 |
They | -0.008 |
Token |
|
Token ID | 198 |
Feature activation | +1.64e-02 |
Pos logit contributions | |
single | +0.034 |
ge | +0.033 |
toward | +0.031 |
shopping | +0.031 |
purchases | +0.031 |
Neg logit contributions | |
ily | -0.021 |
era | -0.020 |
ena | -0.019 |
ia | -0.019 |
iv | -0.018 |
Token | While |
Token ID | 3633 |
Feature activation | -6.44e-03 |
Pos logit contributions | |
ge | +0.026 |
single | +0.026 |
fy | +0.025 |
toward | +0.024 |
ging | +0.024 |
Neg logit contributions | |
ia | -0.022 |
era | -0.022 |
ily | -0.022 |
Ben | -0.022 |
ena | -0.021 |
Token | boss |
Token ID | 6478 |
Feature activation | -1.60e-02 |
Pos logit contributions | |
ge | +0.019 |
unnecessary | +0.019 |
single | +0.018 |
purchases | +0.018 |
use | +0.018 |
Neg logit contributions | |
ily | -0.025 |
They | -0.023 |
ia | -0.023 |
Ben | -0.022 |
Ben | -0.022 |
Token | y |
Token ID | 88 |
Feature activation | -2.51e-02 |
Pos logit contributions | |
ily | +0.033 |
ia | +0.031 |
They | +0.031 |
Ben | +0.030 |
era | +0.029 |
Neg logit contributions | |
ge | -0.019 |
single | -0.018 |
unnecessary | -0.017 |
ging | -0.017 |
purchases | -0.017 |
Token | and |
Token ID | 290 |
Feature activation | +9.38e-03 |
Pos logit contributions | |
ily | +0.027 |
They | +0.024 |
ia | +0.024 |
Ben | +0.023 |
ena | +0.022 |
Neg logit contributions | |
ge | -0.052 |
single | -0.052 |
unnecessary | -0.052 |
purchases | -0.051 |
shopping | -0.049 |
Token | they |
Token ID | 484 |
Feature activation | -6.38e-03 |
Pos logit contributions | |
ge | +0.017 |
single | +0.017 |
purchases | +0.017 |
unnecessary | +0.017 |
shopping | +0.017 |
Neg logit contributions | |
ily | -0.012 |
ia | -0.011 |
Ben | -0.011 |
era | -0.010 |
They | -0.010 |
Token | said |
Token ID | 531 |
Feature activation | +3.10e-02 |
Pos logit contributions | |
ia | +0.009 |
ily | +0.009 |
Ben | +0.008 |
una | +0.008 |
era | +0.008 |
Neg logit contributions | |
ge | -0.011 |
single | -0.011 |
down | -0.011 |
purchases | -0.011 |
toward | -0.011 |
Token | , |
Token ID | 11 |
Feature activation | +2.40e-02 |
Pos logit contributions | |
single | +0.053 |
ge | +0.053 |
unnecessary | +0.053 |
purchases | +0.053 |
shopping | +0.051 |
Neg logit contributions | |
ily | -0.043 |
ia | -0.040 |
They | -0.040 |
Ben | -0.039 |
ena | -0.039 |
Token | " |
Token ID | 366 |
Feature activation | +6.85e-03 |
Pos logit contributions | |
ge | +0.042 |
single | +0.042 |
unnecessary | +0.041 |
purchases | +0.041 |
shopping | +0.040 |
Neg logit contributions | |
ily | -0.034 |
ia | -0.031 |
They | -0.031 |
Ben | -0.030 |
era | -0.029 |
Token | Jake |
Token ID | 43930 |
Feature activation | +1.89e-02 |
Pos logit contributions | |
ge | +0.012 |
single | +0.012 |
ging | +0.011 |
toward | +0.011 |
unnecessary | +0.011 |
Neg logit contributions | |
ia | -0.010 |
ily | -0.010 |
una | -0.008 |
Ben | -0.008 |
iv | -0.008 |
Token | , |
Token ID | 11 |
Feature activation | -7.56e-03 |
Pos logit contributions | |
ge | +0.034 |
single | +0.033 |
unnecessary | +0.033 |
purchases | +0.033 |
shopping | +0.031 |
Neg logit contributions | |
ily | -0.026 |
They | -0.024 |
ia | -0.024 |
Ben | -0.023 |
ena | -0.023 |
Token | did |
Token ID | 750 |
Feature activation | -2.40e-02 |
Pos logit contributions | |
ily | +0.010 |
ia | +0.010 |
They | +0.009 |
Ben | +0.009 |
era | +0.009 |
Neg logit contributions | |
unnecessary | -0.013 |
ge | -0.013 |
shopping | -0.012 |
down | -0.012 |
single | -0.012 |
Token | you |
Token ID | 345 |
Feature activation | -1.45e-02 |
Pos logit contributions | |
ily | +0.028 |
ia | +0.024 |
ickets | +0.024 |
room | +0.024 |
Ben | +0.023 |
Neg logit contributions | |
unnecessary | -0.051 |
anywhere | -0.046 |
shopping | -0.046 |
ge | -0.045 |
allowance | -0.045 |
Token |
|
Token ID | 198 |
Feature activation | +1.75e-02 |
Pos logit contributions | |
ge | +0.004 |
single | +0.004 |
down | +0.004 |
purchases | +0.004 |
shopping | +0.004 |
Neg logit contributions | |
ily | -0.002 |
era | -0.002 |
ia | -0.002 |
Ben | -0.002 |
ena | -0.002 |
Token |
|
Token ID | 198 |
Feature activation | +1.29e-02 |
Pos logit contributions | |
ge | +0.037 |
single | +0.037 |
purchases | +0.035 |
toward | +0.035 |
unnecessary | +0.035 |
Neg logit contributions | |
ily | -0.018 |
ia | -0.016 |
era | -0.015 |
ena | -0.015 |
They | -0.014 |
Token | The |
Token ID | 464 |
Feature activation | +6.73e-03 |
Pos logit contributions | |
ge | +0.027 |
single | +0.027 |
unnecessary | +0.027 |
purchases | +0.026 |
toward | +0.026 |
Neg logit contributions | |
ily | -0.013 |
ia | -0.012 |
era | -0.011 |
ena | -0.011 |
Ben | -0.011 |
Token | girl |
Token ID | 2576 |
Feature activation | -1.04e-02 |
Pos logit contributions | |
ge | +0.014 |
single | +0.014 |
unnecessary | +0.014 |
purchases | +0.014 |
toward | +0.013 |
Neg logit contributions | |
ily | -0.007 |
They | -0.006 |
ia | -0.006 |
Ben | -0.006 |
ena | -0.006 |
Token | asked |
Token ID | 1965 |
Feature activation | +2.69e-02 |
Pos logit contributions | |
ily | +0.013 |
They | +0.012 |
ia | +0.012 |
Ben | +0.011 |
ena | +0.011 |
Neg logit contributions | |
ge | -0.020 |
unnecessary | -0.020 |
single | -0.020 |
purchases | -0.020 |
toward | -0.019 |
Token | the |
Token ID | 262 |
Feature activation | +1.32e-02 |
Pos logit contributions | |
ge | +0.060 |
unnecessary | +0.059 |
single | +0.059 |
fy | +0.058 |
purchases | +0.057 |
Neg logit contributions | |
ily | -0.032 |
Ben | -0.027 |
They | -0.025 |
ia | -0.025 |
Ben | -0.024 |
Token | children |
Token ID | 1751 |
Feature activation | -1.64e-03 |
Pos logit contributions | |
ge | +0.028 |
unnecessary | +0.027 |
single | +0.027 |
purchases | +0.027 |
toward | +0.026 |
Neg logit contributions | |
ily | -0.015 |
They | -0.013 |
ia | -0.013 |
Ben | -0.013 |
ena | -0.012 |
Token | what |
Token ID | 644 |
Feature activation | +5.05e-03 |
Pos logit contributions | |
ily | +0.002 |
They | +0.002 |
Ben | +0.002 |
ia | +0.002 |
Ben | +0.002 |
Neg logit contributions | |
unnecessary | -0.003 |
ge | -0.003 |
single | -0.003 |
purchases | -0.003 |
ging | -0.003 |
Token | game |
Token ID | 983 |
Feature activation | -7.34e-03 |
Pos logit contributions | |
single | +0.010 |
shopping | +0.009 |
purchases | +0.009 |
ge | +0.009 |
unnecessary | +0.009 |
Neg logit contributions | |
ily | -0.006 |
ia | -0.006 |
They | -0.005 |
era | -0.005 |
ena | -0.005 |
Token | they |
Token ID | 484 |
Feature activation | +1.38e-02 |
Pos logit contributions | |
ily | +0.008 |
ia | +0.007 |
They | +0.007 |
Ben | +0.007 |
ena | +0.007 |
Neg logit contributions | |
single | -0.014 |
ge | -0.014 |
purchases | -0.014 |
unnecessary | -0.014 |
shopping | -0.014 |
Token | were |
Token ID | 547 |
Feature activation | -2.47e-02 |
Pos logit contributions | |
ge | +0.030 |
single | +0.030 |
unnecessary | +0.029 |
purchases | +0.029 |
shopping | +0.028 |
Neg logit contributions | |
ily | -0.014 |
They | -0.012 |
ia | -0.012 |
Ben | -0.011 |
ena | -0.011 |
Token | he |
Token ID | 339 |
Feature activation | -6.70e-03 |
Pos logit contributions | |
ily | +0.018 |
ia | +0.017 |
They | +0.016 |
Ben | +0.015 |
era | +0.015 |
Neg logit contributions | |
ge | -0.024 |
single | -0.024 |
unnecessary | -0.024 |
purchases | -0.024 |
shopping | -0.023 |
Token | saw |
Token ID | 2497 |
Feature activation | -2.43e-02 |
Pos logit contributions | |
ily | +0.009 |
They | +0.009 |
ia | +0.009 |
Ben | +0.008 |
ena | +0.008 |
Neg logit contributions | |
ge | -0.012 |
single | -0.012 |
unnecessary | -0.012 |
purchases | -0.012 |
shopping | -0.011 |
Token | a |
Token ID | 257 |
Feature activation | +3.99e-03 |
Pos logit contributions | |
ia | +0.027 |
ily | +0.026 |
era | +0.025 |
They | +0.025 |
Ben | +0.024 |
Neg logit contributions | |
single | -0.048 |
unnecessary | -0.047 |
down | -0.046 |
purchases | -0.046 |
ge | -0.046 |
Token | shiny |
Token ID | 22441 |
Feature activation | +1.13e-02 |
Pos logit contributions | |
ge | +0.008 |
purchases | +0.007 |
unnecessary | +0.007 |
toward | +0.007 |
use | +0.007 |
Neg logit contributions | |
Ben | -0.005 |
They | -0.005 |
ily | -0.005 |
ia | -0.004 |
Ben | -0.004 |
Token | apple |
Token ID | 17180 |
Feature activation | -6.66e-03 |
Pos logit contributions | |
unnecessary | +0.018 |
ge | +0.018 |
purchases | +0.017 |
use | +0.017 |
single | +0.016 |
Neg logit contributions | |
ily | -0.018 |
ia | -0.017 |
Ben | -0.016 |
Ben | -0.016 |
ena | -0.016 |
Token | on |
Token ID | 319 |
Feature activation | +1.31e-02 |
Pos logit contributions | |
ily | +0.012 |
They | +0.011 |
ia | +0.011 |
Ben | +0.011 |
ena | +0.010 |
Neg logit contributions | |
ge | -0.009 |
single | -0.009 |
unnecessary | -0.009 |
purchases | -0.009 |
shopping | -0.009 |
Token | the |
Token ID | 262 |
Feature activation | -2.00e-02 |
Pos logit contributions | |
single | +0.023 |
ge | +0.023 |
weights | +0.022 |
purchases | +0.022 |
shopping | +0.021 |
Neg logit contributions | |
ily | -0.019 |
ia | -0.018 |
They | -0.017 |
era | -0.017 |
Ben | -0.016 |
Token | ground |
Token ID | 2323 |
Feature activation | -3.42e-02 |
Pos logit contributions | |
ily | +0.023 |
ia | +0.020 |
They | +0.020 |
Ben | +0.020 |
Ben | +0.020 |
Neg logit contributions | |
ge | -0.041 |
unnecessary | -0.040 |
purchases | -0.040 |
single | -0.039 |
use | -0.039 |
Token | . |
Token ID | 13 |
Feature activation | +7.35e-03 |
Pos logit contributions | |
ily | +0.042 |
ia | +0.038 |
Ben | +0.038 |
They | +0.037 |
era | +0.036 |
Neg logit contributions | |
ge | -0.067 |
single | -0.066 |
toward | -0.064 |
down | -0.063 |
shopping | -0.062 |
Token | He |
Token ID | 679 |
Feature activation | -1.14e-02 |
Pos logit contributions | |
single | +0.013 |
purchases | +0.013 |
ge | +0.013 |
unnecessary | +0.013 |
shopping | +0.013 |
Neg logit contributions | |
ily | -0.010 |
They | -0.009 |
ia | -0.009 |
Ben | -0.009 |
ena | -0.008 |
Token | picked |
Token ID | 6497 |
Feature activation | +1.11e-02 |
Pos logit contributions | |
ily | +0.015 |
They | +0.013 |
ia | +0.013 |
Ben | +0.013 |
ena | +0.013 |
Neg logit contributions | |
ge | -0.022 |
unnecessary | -0.022 |
single | -0.021 |
purchases | -0.021 |
shopping | -0.021 |
Token | named |
Token ID | 3706 |
Feature activation | +1.22e-02 |
Pos logit contributions | |
era | +0.034 |
ia | +0.034 |
ily | +0.033 |
Ben | +0.033 |
They | +0.031 |
Neg logit contributions | |
single | -0.032 |
down | -0.031 |
shopping | -0.031 |
towards | -0.029 |
ge | -0.028 |
Token | Tom |
Token ID | 4186 |
Feature activation | +1.19e-03 |
Pos logit contributions | |
single | +0.024 |
unnecessary | +0.024 |
shopping | +0.024 |
ge | +0.024 |
purchases | +0.024 |
Neg logit contributions | |
ily | -0.013 |
They | -0.012 |
ia | -0.012 |
era | -0.011 |
Ben | -0.011 |
Token | . |
Token ID | 13 |
Feature activation | +1.75e-02 |
Pos logit contributions | |
ge | +0.002 |
down | +0.002 |
single | +0.002 |
shopping | +0.002 |
use | +0.002 |
Neg logit contributions | |
era | -0.002 |
ily | -0.002 |
ia | -0.002 |
ickets | -0.002 |
Ben | -0.002 |
Token | Tom |
Token ID | 4186 |
Feature activation | +5.70e-03 |
Pos logit contributions | |
single | +0.044 |
purchases | +0.043 |
ge | +0.043 |
shopping | +0.042 |
unnecessary | +0.042 |
Neg logit contributions | |
ily | -0.011 |
They | -0.008 |
ia | -0.008 |
era | -0.008 |
Ben | -0.007 |
Token | loved |
Token ID | 6151 |
Feature activation | +4.46e-03 |
Pos logit contributions | |
single | +0.010 |
ge | +0.010 |
purchases | +0.009 |
down | +0.009 |
shopping | +0.009 |
Neg logit contributions | |
ily | -0.007 |
era | -0.007 |
ia | -0.007 |
They | -0.007 |
ena | -0.006 |
Token | to |
Token ID | 284 |
Feature activation | +9.99e-03 |
Pos logit contributions | |
ge | +0.007 |
unnecessary | +0.007 |
single | +0.007 |
ging | +0.007 |
purchases | +0.007 |
Neg logit contributions | |
ily | -0.007 |
Ben | -0.007 |
They | -0.007 |
Ben | -0.007 |
ia | -0.007 |
Token | eat |
Token ID | 4483 |
Feature activation | +6.56e-03 |
Pos logit contributions | |
purchases | +0.012 |
single | +0.012 |
shopping | +0.012 |
saddle | +0.012 |
heels | +0.011 |
Neg logit contributions | |
ily | -0.019 |
era | -0.018 |
They | -0.018 |
ia | -0.017 |
ena | -0.017 |
Token | ice |
Token ID | 4771 |
Feature activation | +1.45e-02 |
Pos logit contributions | |
ge | +0.011 |
single | +0.011 |
unnecessary | +0.010 |
purchases | +0.010 |
use | +0.010 |
Neg logit contributions | |
ily | -0.010 |
They | -0.010 |
Ben | -0.010 |
ia | -0.010 |
Ben | -0.009 |
Token | - |
Token ID | 12 |
Feature activation | +3.12e-02 |
Pos logit contributions | |
unnecessary | +0.027 |
single | +0.026 |
ge | +0.026 |
use | +0.025 |
insignificant | +0.025 |
Neg logit contributions | |
They | -0.020 |
ily | -0.020 |
ia | -0.019 |
iv | -0.017 |
era | -0.017 |
Token | cream |
Token ID | 36277 |
Feature activation | +1.88e-03 |
Pos logit contributions | |
unnecessary | +0.046 |
ge | +0.045 |
purchases | +0.044 |
single | +0.043 |
ening | +0.043 |
Neg logit contributions | |
ily | -0.053 |
They | -0.052 |
Tom | -0.048 |
ia | -0.047 |
Ben | -0.047 |
Token | . |
Token ID | 13 |
Feature activation | +1.45e-02 |
Pos logit contributions | |
ge | +0.004 |
unnecessary | +0.004 |
single | +0.004 |
purchases | +0.004 |
use | +0.003 |
Neg logit contributions | |
ily | -0.002 |
They | -0.002 |
ia | -0.002 |
Ben | -0.002 |
Ben | -0.002 |
Token | was |
Token ID | 373 |
Feature activation | -1.55e-02 |
Pos logit contributions | |
era | +0.015 |
ia | +0.014 |
They | +0.014 |
ily | +0.014 |
ickets | +0.014 |
Neg logit contributions | |
single | -0.015 |
shopping | -0.013 |
down | -0.013 |
picking | -0.013 |
proper | -0.013 |
Token | very |
Token ID | 845 |
Feature activation | +2.04e-02 |
Pos logit contributions | |
ily | +0.022 |
Ben | +0.019 |
They | +0.019 |
ena | +0.019 |
ia | +0.019 |
Neg logit contributions | |
ge | -0.029 |
unnecessary | -0.028 |
purchases | -0.028 |
use | -0.028 |
single | -0.027 |
Token | brave |
Token ID | 14802 |
Feature activation | +7.97e-03 |
Pos logit contributions | |
ge | +0.030 |
unnecessary | +0.029 |
purchases | +0.028 |
single | +0.028 |
use | +0.027 |
Neg logit contributions | |
ily | -0.037 |
ia | -0.034 |
They | -0.033 |
Ben | -0.033 |
Ben | -0.033 |
Token | and |
Token ID | 290 |
Feature activation | +1.29e-02 |
Pos logit contributions | |
ge | +0.016 |
single | +0.016 |
unnecessary | +0.016 |
purchases | +0.016 |
shopping | +0.015 |
Neg logit contributions | |
ily | -0.009 |
They | -0.008 |
ia | -0.008 |
Ben | -0.008 |
era | -0.008 |
Token | loved |
Token ID | 6151 |
Feature activation | +1.39e-03 |
Pos logit contributions | |
ge | +0.027 |
unnecessary | +0.026 |
purchases | +0.025 |
single | +0.025 |
use | +0.025 |
Neg logit contributions | |
ily | -0.016 |
They | -0.014 |
ia | -0.013 |
Ben | -0.013 |
ena | -0.013 |
Token | to |
Token ID | 284 |
Feature activation | +9.64e-03 |
Pos logit contributions | |
ge | +0.003 |
use | +0.003 |
ging | +0.002 |
asy | +0.002 |
fy | +0.002 |
Neg logit contributions | |
Ben | -0.002 |
ily | -0.002 |
They | -0.002 |
Tom | -0.002 |
ena | -0.002 |
Token | explore |
Token ID | 7301 |
Feature activation | +2.01e-02 |
Pos logit contributions | |
ge | +0.014 |
single | +0.014 |
unnecessary | +0.014 |
purchases | +0.014 |
shopping | +0.013 |
Neg logit contributions | |
ily | -0.016 |
They | -0.015 |
ia | -0.015 |
Ben | -0.014 |
ena | -0.014 |
Token | . |
Token ID | 13 |
Feature activation | +4.72e-03 |
Pos logit contributions | |
ge | +0.042 |
unnecessary | +0.041 |
purchases | +0.041 |
single | +0.041 |
use | +0.040 |
Neg logit contributions | |
ily | -0.023 |
They | -0.021 |
ia | -0.020 |
Ben | -0.019 |
Ben | -0.019 |
Token | One |
Token ID | 1881 |
Feature activation | +1.26e-02 |
Pos logit contributions | |
ge | +0.010 |
unnecessary | +0.010 |
purchases | +0.009 |
single | +0.009 |
ging | +0.009 |
Neg logit contributions | |
ily | -0.006 |
They | -0.005 |
Ben | -0.005 |
ia | -0.005 |
Ben | -0.005 |
Token | day |
Token ID | 1110 |
Feature activation | -7.49e-04 |
Pos logit contributions | |
single | +0.014 |
ge | +0.013 |
unnecessary | +0.013 |
purchases | +0.013 |
shopping | +0.012 |
Neg logit contributions | |
ily | -0.026 |
They | -0.025 |
ia | -0.025 |
Ben | -0.024 |
era | -0.024 |
Token | she |
Token ID | 673 |
Feature activation | -1.51e-02 |
Pos logit contributions | |
ily | +0.001 |
They | +0.001 |
ia | +0.001 |
Ben | +0.001 |
ena | +0.001 |
Neg logit contributions | |
ge | -0.001 |
single | -0.001 |
unnecessary | -0.001 |
purchases | -0.001 |
shopping | -0.001 |
Token | they |
Token ID | 484 |
Feature activation | +1.85e-02 |
Pos logit contributions | |
ily | +0.087 |
Ben | +0.081 |
ia | +0.077 |
ena | +0.077 |
era | +0.074 |
Neg logit contributions | |
single | -0.070 |
ge | -0.069 |
down | -0.069 |
toward | -0.067 |
purchases | -0.067 |
Token | could |
Token ID | 714 |
Feature activation | -1.31e-02 |
Pos logit contributions | |
ge | +0.033 |
single | +0.033 |
purchases | +0.032 |
unnecessary | +0.032 |
toward | +0.031 |
Neg logit contributions | |
ily | -0.025 |
ia | -0.024 |
They | -0.023 |
Ben | -0.022 |
era | -0.022 |
Token | prevent |
Token ID | 2948 |
Feature activation | -2.72e-02 |
Pos logit contributions | |
ily | +0.022 |
Ben | +0.020 |
ena | +0.020 |
ia | +0.020 |
era | +0.019 |
Neg logit contributions | |
single | -0.019 |
purchases | -0.019 |
ge | -0.018 |
shopping | -0.017 |
toward | -0.017 |
Token | having |
Token ID | 1719 |
Feature activation | -2.00e-02 |
Pos logit contributions | |
Ben | +0.033 |
ily | +0.033 |
They | +0.031 |
Tom | +0.029 |
ia | +0.029 |
Neg logit contributions | |
single | -0.052 |
use | -0.052 |
purchases | -0.051 |
ge | -0.050 |
saddle | -0.050 |
Token | to |
Token ID | 284 |
Feature activation | +1.27e-02 |
Pos logit contributions | |
ily | +0.028 |
They | +0.026 |
ia | +0.026 |
Ben | +0.025 |
ena | +0.024 |
Neg logit contributions | |
ge | -0.035 |
single | -0.035 |
unnecessary | -0.035 |
purchases | -0.034 |
toward | -0.033 |
Token | worry |
Token ID | 5490 |
Feature activation | -1.49e-03 |
Pos logit contributions | |
ge | +0.020 |
unnecessary | +0.020 |
single | +0.020 |
purchases | +0.020 |
toward | +0.019 |
Neg logit contributions | |
ily | -0.020 |
They | -0.019 |
ia | -0.018 |
Ben | -0.018 |
Ben | -0.018 |
Token | about |
Token ID | 546 |
Feature activation | -1.38e-02 |
Pos logit contributions | |
ily | +0.001 |
They | +0.001 |
ia | +0.001 |
Ben | +0.001 |
ena | +0.001 |
Neg logit contributions | |
ge | -0.003 |
single | -0.003 |
unnecessary | -0.003 |
purchases | -0.003 |
shopping | -0.003 |
Token | not |
Token ID | 407 |
Feature activation | +7.15e-03 |
Pos logit contributions | |
ily | +0.015 |
They | +0.014 |
Ben | +0.014 |
ia | +0.013 |
ena | +0.012 |
Neg logit contributions | |
single | -0.028 |
ge | -0.028 |
unnecessary | -0.028 |
purchases | -0.027 |
use | -0.027 |
Token | being |
Token ID | 852 |
Feature activation | -6.47e-03 |
Pos logit contributions | |
unnecessary | +0.011 |
single | +0.011 |
ge | +0.011 |
purchases | +0.011 |
use | +0.010 |
Neg logit contributions | |
ily | -0.011 |
They | -0.011 |
ia | -0.010 |
Ben | -0.010 |
Ben | -0.010 |
Token | able |
Token ID | 1498 |
Feature activation | +1.74e-02 |
Pos logit contributions | |
ily | +0.010 |
Ben | +0.009 |
They | +0.009 |
ena | +0.009 |
ia | +0.009 |
Neg logit contributions | |
ge | -0.011 |
purchases | -0.011 |
single | -0.010 |
use | -0.010 |
saddle | -0.010 |
Token | to |
Token ID | 284 |
Feature activation | +1.74e-02 |
Pos logit contributions | |
purchases | +0.028 |
ge | +0.028 |
use | +0.027 |
saddle | +0.027 |
unnecessary | +0.027 |
Neg logit contributions | |
ily | -0.026 |
They | -0.024 |
Ben | -0.024 |
ia | -0.023 |
not | -0.022 |
Token | came |
Token ID | 1625 |
Feature activation | -2.43e-03 |
Pos logit contributions | |
ily | +0.032 |
ia | +0.030 |
They | +0.030 |
era | +0.028 |
Ben | +0.028 |
Neg logit contributions | |
single | -0.053 |
ge | -0.053 |
purchases | -0.052 |
unnecessary | -0.051 |
use | -0.051 |
Token | by |
Token ID | 416 |
Feature activation | -1.96e-02 |
Pos logit contributions | |
ily | +0.004 |
They | +0.003 |
Ben | +0.003 |
ena | +0.003 |
ia | +0.003 |
Neg logit contributions | |
purchases | -0.004 |
unnecessary | -0.004 |
fy | -0.004 |
ge | -0.004 |
budget | -0.004 |
Token | and |
Token ID | 290 |
Feature activation | +1.22e-03 |
Pos logit contributions | |
ily | +0.024 |
They | +0.022 |
ia | +0.022 |
Ben | +0.021 |
era | +0.020 |
Neg logit contributions | |
ge | -0.039 |
single | -0.037 |
purchases | -0.036 |
unnecessary | -0.036 |
shopping | -0.036 |
Token | picked |
Token ID | 6497 |
Feature activation | +6.56e-03 |
Pos logit contributions | |
ge | +0.002 |
single | +0.002 |
unnecessary | +0.002 |
purchases | +0.002 |
shopping | +0.002 |
Neg logit contributions | |
ily | -0.002 |
ia | -0.002 |
They | -0.002 |
Ben | -0.002 |
era | -0.002 |
Token | an |
Token ID | 281 |
Feature activation | +4.26e-02 |
Pos logit contributions | |
ge | +0.014 |
unnecessary | +0.013 |
purchases | +0.013 |
use | +0.013 |
single | +0.013 |
Neg logit contributions | |
ily | -0.008 |
Ben | -0.007 |
ia | -0.006 |
They | -0.006 |
Ben | -0.006 |
Token | apple |
Token ID | 17180 |
Feature activation | +6.97e-03 |
Pos logit contributions | |
ge | +0.084 |
unnecessary | +0.084 |
use | +0.081 |
ging | +0.080 |
purchases | +0.077 |
Neg logit contributions | |
ily | -0.062 |
ia | -0.052 |
Ben | -0.046 |
ena | -0.045 |
They | -0.045 |
Token | . |
Token ID | 13 |
Feature activation | +1.74e-02 |
Pos logit contributions | |
unnecessary | +0.016 |
ge | +0.015 |
purchases | +0.015 |
single | +0.015 |
use | +0.015 |
Neg logit contributions | |
ily | -0.007 |
They | -0.006 |
ia | -0.006 |
Ben | -0.005 |
Ben | -0.005 |
Token | She |
Token ID | 1375 |
Feature activation | -1.36e-02 |
Pos logit contributions | |
ge | +0.036 |
single | +0.036 |
unnecessary | +0.035 |
purchases | +0.035 |
shopping | +0.034 |
Neg logit contributions | |
ily | -0.019 |
They | -0.017 |
ia | -0.017 |
Ben | -0.016 |
ena | -0.016 |
Token | took |
Token ID | 1718 |
Feature activation | +1.81e-02 |
Pos logit contributions | |
ily | +0.018 |
They | +0.017 |
ia | +0.017 |
era | +0.016 |
Ben | +0.016 |
Neg logit contributions | |
ge | -0.024 |
single | -0.024 |
purchases | -0.024 |
unnecessary | -0.023 |
shopping | -0.023 |
Token | a |
Token ID | 257 |
Feature activation | +4.07e-02 |
Pos logit contributions | |
ge | +0.046 |
unnecessary | +0.042 |
use | +0.042 |
purchases | +0.042 |
insignificant | +0.041 |
Neg logit contributions | |
ily | -0.017 |
Ben | -0.014 |
Tom | -0.012 |
They | -0.012 |
ia | -0.011 |
Token | bite |
Token ID | 13197 |
Feature activation | -7.57e-03 |
Pos logit contributions | |
unnecessary | +0.068 |
purchases | +0.065 |
ge | +0.065 |
heels | +0.064 |
ging | +0.064 |
Neg logit contributions | |
ily | -0.061 |
Ben | -0.059 |
ia | -0.058 |
Ben | -0.055 |
Tom | -0.054 |
Token | Max |
Token ID | 5436 |
Feature activation | +7.44e-03 |
Pos logit contributions | |
single | +0.017 |
ge | +0.017 |
down | +0.017 |
unnecessary | +0.017 |
purchases | +0.017 |
Neg logit contributions | |
ily | -0.009 |
ia | -0.008 |
era | -0.008 |
ena | -0.008 |
They | -0.008 |
Token | needed |
Token ID | 2622 |
Feature activation | +1.21e-02 |
Pos logit contributions | |
ge | +0.012 |
single | +0.012 |
unnecessary | +0.012 |
purchases | +0.012 |
shopping | +0.012 |
Neg logit contributions | |
ily | -0.011 |
ia | -0.010 |
They | -0.010 |
era | -0.010 |
ena | -0.010 |
Token | to |
Token ID | 284 |
Feature activation | +7.98e-03 |
Pos logit contributions | |
ge | +0.017 |
single | +0.017 |
unnecessary | +0.017 |
purchases | +0.016 |
shopping | +0.016 |
Neg logit contributions | |
ily | -0.021 |
ia | -0.020 |
They | -0.020 |
era | -0.019 |
Ben | -0.019 |
Token | go |
Token ID | 467 |
Feature activation | +5.25e-02 |
Pos logit contributions | |
purchases | +0.011 |
ge | +0.011 |
shopping | +0.011 |
unnecessary | +0.010 |
unst | +0.010 |
Neg logit contributions | |
ily | -0.014 |
ia | -0.014 |
They | -0.013 |
era | -0.013 |
ena | -0.013 |
Token | pot |
Token ID | 1787 |
Feature activation | -8.92e-03 |
Pos logit contributions | |
ening | +0.100 |
insignificant | +0.098 |
ge | +0.098 |
unnecessary | +0.096 |
vered | +0.096 |
Neg logit contributions | |
ily | -0.069 |
They | -0.064 |
Ben | -0.062 |
not | -0.059 |
a | -0.056 |
Token | ty |
Token ID | 774 |
Feature activation | +5.22e-03 |
Pos logit contributions | |
ily | +0.016 |
ia | +0.015 |
era | +0.015 |
Ben | +0.015 |
ena | +0.015 |
Neg logit contributions | |
ge | -0.012 |
shopping | -0.012 |
down | -0.012 |
purchases | -0.012 |
single | -0.011 |
Token | . |
Token ID | 13 |
Feature activation | +1.81e-02 |
Pos logit contributions | |
single | +0.010 |
unnecessary | +0.010 |
ge | +0.010 |
purchases | +0.010 |
use | +0.009 |
Neg logit contributions | |
ily | -0.006 |
They | -0.006 |
Ben | -0.006 |
ia | -0.006 |
ena | -0.005 |
Token | He |
Token ID | 679 |
Feature activation | -9.67e-03 |
Pos logit contributions | |
ge | +0.037 |
unnecessary | +0.037 |
single | +0.036 |
purchases | +0.036 |
ging | +0.035 |
Neg logit contributions | |
ily | -0.020 |
They | -0.019 |
Ben | -0.019 |
ia | -0.018 |
Ben | -0.018 |
Token | ran |
Token ID | 4966 |
Feature activation | +4.41e-02 |
Pos logit contributions | |
ily | +0.012 |
They | +0.011 |
ia | +0.011 |
Ben | +0.011 |
ena | +0.010 |
Neg logit contributions | |
unnecessary | -0.019 |
ge | -0.019 |
single | -0.018 |
purchases | -0.018 |
ging | -0.018 |
Token | inside |
Token ID | 2641 |
Feature activation | +1.24e-03 |
Pos logit contributions | |
unnecessary | +0.080 |
purchases | +0.077 |
ge | +0.076 |
use | +0.076 |
insignificant | +0.076 |
Neg logit contributions | |
ily | -0.064 |
They | -0.061 |
Ben | -0.058 |
Ben | -0.054 |
Tom | -0.054 |
Token | to |
Token ID | 284 |
Feature activation | -8.10e-03 |
Pos logit contributions | |
ge | +0.002 |
single | +0.002 |
purchases | +0.002 |
unnecessary | +0.002 |
shopping | +0.002 |
Neg logit contributions | |
ily | -0.002 |
They | -0.001 |
ia | -0.001 |
Ben | -0.001 |
era | -0.001 |
Token | and |
Token ID | 290 |
Feature activation | +5.47e-04 |
Pos logit contributions | |
ge | +0.008 |
single | +0.007 |
purchases | +0.007 |
unnecessary | +0.007 |
use | +0.007 |
Neg logit contributions | |
ily | -0.006 |
ia | -0.006 |
They | -0.006 |
era | -0.006 |
ena | -0.005 |
Token | Bow |
Token ID | 9740 |
Feature activation | +1.48e-02 |
Pos logit contributions | |
ge | +0.001 |
single | +0.001 |
purchases | +0.001 |
unnecessary | +0.001 |
shopping | +0.001 |
Neg logit contributions | |
ily | -0.001 |
They | -0.001 |
ia | -0.001 |
Ben | -0.001 |
Ben | -0.001 |
Token | were |
Token ID | 547 |
Feature activation | -1.48e-02 |
Pos logit contributions | |
ge | +0.025 |
single | +0.025 |
unnecessary | +0.025 |
purchases | +0.024 |
toward | +0.024 |
Neg logit contributions | |
ily | -0.021 |
ia | -0.020 |
They | -0.019 |
era | -0.019 |
ena | -0.019 |
Token | walking |
Token ID | 6155 |
Feature activation | +2.78e-02 |
Pos logit contributions | |
ily | +0.024 |
They | +0.022 |
Ben | +0.022 |
ia | +0.021 |
ena | +0.021 |
Neg logit contributions | |
ge | -0.024 |
single | -0.023 |
purchases | -0.023 |
unnecessary | -0.023 |
use | -0.023 |
Token | in |
Token ID | 287 |
Feature activation | +1.63e-03 |
Pos logit contributions | |
ge | +0.045 |
unnecessary | +0.044 |
purchases | +0.044 |
single | +0.043 |
use | +0.042 |
Neg logit contributions | |
ily | -0.044 |
They | -0.042 |
Ben | -0.040 |
ia | -0.039 |
Ben | -0.039 |
Token | the |
Token ID | 262 |
Feature activation | -4.10e-02 |
Pos logit contributions | |
ge | +0.003 |
purchases | +0.003 |
unnecessary | +0.003 |
single | +0.003 |
use | +0.003 |
Neg logit contributions | |
ily | -0.002 |
They | -0.002 |
Ben | -0.002 |
Ben | -0.001 |
ia | -0.001 |
Token | park |
Token ID | 3952 |
Feature activation | -2.50e-02 |
Pos logit contributions | |
ily | +0.049 |
They | +0.044 |
ia | +0.044 |
Ben | +0.042 |
ena | +0.042 |
Neg logit contributions | |
ge | -0.081 |
single | -0.080 |
unnecessary | -0.080 |
purchases | -0.079 |
shopping | -0.076 |
Token | . |
Token ID | 13 |
Feature activation | +1.36e-02 |
Pos logit contributions | |
ily | +0.035 |
ia | +0.033 |
Ben | +0.033 |
una | +0.033 |
ena | +0.031 |
Neg logit contributions | |
single | -0.043 |
down | -0.043 |
toward | -0.043 |
shopping | -0.043 |
ge | -0.043 |
Token | Sam |
Token ID | 3409 |
Feature activation | +5.03e-03 |
Pos logit contributions | |
ge | +0.031 |
single | +0.031 |
unnecessary | +0.031 |
purchases | +0.031 |
shopping | +0.030 |
Neg logit contributions | |
ily | -0.012 |
They | -0.011 |
ia | -0.010 |
Ben | -0.010 |
ena | -0.010 |
Token | was |
Token ID | 373 |
Feature activation | -1.83e-02 |
Pos logit contributions | |
unnecessary | +0.010 |
ge | +0.010 |
single | +0.010 |
purchases | +0.010 |
ging | +0.009 |
Neg logit contributions | |
ily | -0.006 |
They | -0.006 |
ia | -0.006 |
Ben | -0.006 |
Ben | -0.005 |
Token | running |
Token ID | 2491 |
Feature activation | +2.41e-02 |
Pos logit contributions | |
ily | +0.027 |
Ben | +0.025 |
They | +0.025 |
ena | +0.025 |
Tom | +0.023 |
Neg logit contributions | |
use | -0.033 |
ge | -0.033 |
purchases | -0.032 |
unnecessary | -0.031 |
single | -0.031 |
Token | went |
Token ID | 1816 |
Feature activation | +4.69e-02 |
Pos logit contributions | |
ge | +0.023 |
single | +0.023 |
purchases | +0.022 |
unnecessary | +0.022 |
down | +0.022 |
Neg logit contributions | |
ily | -0.021 |
ia | -0.020 |
They | -0.020 |
era | -0.019 |
ena | -0.019 |
Token | for |
Token ID | 329 |
Feature activation | -9.74e-03 |
Pos logit contributions | |
ge | +0.074 |
unnecessary | +0.073 |
single | +0.070 |
purchases | +0.070 |
insignificant | +0.069 |
Neg logit contributions | |
ily | -0.078 |
They | -0.072 |
Ben | -0.070 |
ia | -0.068 |
Ben | -0.066 |
Token | a |
Token ID | 257 |
Feature activation | +3.58e-02 |
Pos logit contributions | |
ily | +0.010 |
ia | +0.010 |
era | +0.009 |
They | +0.009 |
Ben | +0.008 |
Neg logit contributions | |
unnecessary | -0.021 |
shopping | -0.020 |
ge | -0.020 |
single | -0.020 |
purchases | -0.020 |
Token | walk |
Token ID | 2513 |
Feature activation | +1.93e-02 |
Pos logit contributions | |
ge | +0.048 |
unnecessary | +0.047 |
purchases | +0.046 |
single | +0.045 |
ging | +0.045 |
Neg logit contributions | |
ily | -0.064 |
They | -0.061 |
Ben | -0.061 |
ia | -0.060 |
Ben | -0.059 |
Token | in |
Token ID | 287 |
Feature activation | +7.75e-03 |
Pos logit contributions | |
ge | +0.036 |
single | +0.036 |
unnecessary | +0.035 |
purchases | +0.035 |
toward | +0.034 |
Neg logit contributions | |
ily | -0.025 |
ia | -0.023 |
They | -0.023 |
Ben | -0.022 |
ena | -0.022 |
Token | the |
Token ID | 262 |
Feature activation | -4.39e-02 |
Pos logit contributions | |
unnecessary | +0.017 |
ge | +0.016 |
purchases | +0.016 |
single | +0.016 |
use | +0.016 |
Neg logit contributions | |
ily | -0.008 |
They | -0.008 |
Ben | -0.007 |
ia | -0.007 |
Ben | -0.007 |
Token | deep |
Token ID | 2769 |
Feature activation | +1.47e-02 |
Pos logit contributions | |
ily | +0.064 |
ia | +0.058 |
ena | +0.057 |
era | +0.057 |
They | +0.057 |
Neg logit contributions | |
single | -0.076 |
unnecessary | -0.074 |
shopping | -0.073 |
purchases | -0.072 |
down | -0.071 |
Token | woods |
Token ID | 16479 |
Feature activation | -2.33e-02 |
Pos logit contributions | |
unnecessary | +0.026 |
ge | +0.026 |
purchases | +0.026 |
ging | +0.025 |
picking | +0.024 |
Neg logit contributions | |
ily | -0.020 |
ia | -0.020 |
They | -0.019 |
Ben | -0.019 |
Ben | -0.019 |
Token | . |
Token ID | 13 |
Feature activation | +9.26e-03 |
Pos logit contributions | |
ily | +0.029 |
ia | +0.027 |
Ben | +0.027 |
They | +0.026 |
era | +0.026 |
Neg logit contributions | |
ge | -0.046 |
single | -0.045 |
toward | -0.044 |
shopping | -0.043 |
down | -0.043 |
Token | He |
Token ID | 679 |
Feature activation | -1.97e-02 |
Pos logit contributions | |
ge | +0.017 |
single | +0.017 |
purchases | +0.016 |
unnecessary | +0.016 |
toward | +0.016 |
Neg logit contributions | |
ily | -0.013 |
They | -0.011 |
ia | -0.011 |
Ben | -0.011 |
ena | -0.011 |
Token | saw |
Token ID | 2497 |
Feature activation | -2.12e-02 |
Pos logit contributions | |
ily | +0.030 |
They | +0.028 |
ia | +0.028 |
Ben | +0.027 |
era | +0.027 |
Neg logit contributions | |
ge | -0.032 |
single | -0.032 |
unnecessary | -0.032 |
purchases | -0.031 |
shopping | -0.030 |
Token | fall |
Token ID | 2121 |
Feature activation | +2.24e-02 |
Pos logit contributions | |
ge | +0.009 |
single | +0.009 |
down | +0.009 |
unnecessary | +0.009 |
toward | +0.009 |
Neg logit contributions | |
ily | -0.008 |
ia | -0.007 |
Ben | -0.007 |
ena | -0.007 |
They | -0.007 |
Token | out |
Token ID | 503 |
Feature activation | +4.23e-04 |
Pos logit contributions | |
purchases | +0.033 |
unnecessary | +0.032 |
ge | +0.031 |
single | +0.030 |
use | +0.030 |
Neg logit contributions | |
ily | -0.039 |
They | -0.038 |
Ben | -0.035 |
ia | -0.035 |
Ben | -0.034 |
Token | . |
Token ID | 13 |
Feature activation | +2.79e-03 |
Pos logit contributions | |
ge | +0.001 |
single | +0.001 |
unnecessary | +0.001 |
purchases | +0.001 |
toward | +0.001 |
Neg logit contributions | |
ily | -0.000 |
ia | -0.000 |
Ben | -0.000 |
They | -0.000 |
era | -0.000 |
Token | Ben |
Token ID | 3932 |
Feature activation | +3.77e-03 |
Pos logit contributions | |
single | +0.005 |
down | +0.005 |
run | +0.005 |
weights | +0.004 |
shopping | +0.004 |
Neg logit contributions | |
era | -0.003 |
iv | -0.003 |
ily | -0.003 |
ena | -0.003 |
ia | -0.003 |
Token | is |
Token ID | 318 |
Feature activation | -2.26e-02 |
Pos logit contributions | |
down | +0.005 |
ge | +0.005 |
unnecessary | +0.005 |
single | +0.005 |
heels | +0.005 |
Neg logit contributions | |
ily | -0.006 |
ia | -0.006 |
They | -0.006 |
era | -0.005 |
ena | -0.005 |
Token | shocked |
Token ID | 11472 |
Feature activation | -4.73e-02 |
Pos logit contributions | |
ily | +0.039 |
They | +0.035 |
ia | +0.035 |
ena | +0.034 |
Ben | +0.034 |
Neg logit contributions | |
ge | -0.035 |
purchases | -0.034 |
single | -0.034 |
use | -0.034 |
unnecessary | -0.033 |
Token | and |
Token ID | 290 |
Feature activation | +2.32e-02 |
Pos logit contributions | |
ily | +0.079 |
era | +0.074 |
ena | +0.073 |
room | +0.071 |
Ben | +0.071 |
Neg logit contributions | |
down | -0.073 |
ge | -0.070 |
toward | -0.069 |
towards | -0.069 |
single | -0.062 |
Token | sad |
Token ID | 6507 |
Feature activation | -3.36e-02 |
Pos logit contributions | |
ge | +0.034 |
purchases | +0.034 |
single | +0.034 |
unnecessary | +0.033 |
use | +0.032 |
Neg logit contributions | |
ily | -0.040 |
They | -0.037 |
Ben | -0.037 |
ia | -0.037 |
Ben | -0.035 |
Token | . |
Token ID | 13 |
Feature activation | +6.11e-03 |
Pos logit contributions | |
ily | +0.051 |
Ben | +0.049 |
ena | +0.048 |
era | +0.047 |
ia | +0.046 |
Neg logit contributions | |
down | -0.055 |
ge | -0.053 |
single | -0.051 |
toward | -0.050 |
weights | -0.049 |
Token | He |
Token ID | 679 |
Feature activation | -7.14e-03 |
Pos logit contributions | |
single | +0.008 |
down | +0.008 |
weights | +0.008 |
toward | +0.008 |
shopping | +0.007 |
Neg logit contributions | |
ily | -0.010 |
era | -0.010 |
ena | -0.009 |
iv | -0.009 |
ia | -0.009 |
Token | tries |
Token ID | 8404 |
Feature activation | -8.38e-05 |
Pos logit contributions | |
ily | +0.012 |
They | +0.011 |
room | +0.011 |
era | +0.011 |
ia | +0.011 |
Neg logit contributions | |
unnecessary | -0.010 |
single | -0.010 |
heels | -0.010 |
ge | -0.010 |
down | -0.010 |
Token |
|
Token ID | 198 |
Feature activation | +1.70e-02 |
Pos logit contributions | |
single | +0.001 |
weights | +0.001 |
shopping | +0.001 |
down | +0.001 |
purchases | +0.001 |
Neg logit contributions | |
ily | -0.001 |
ia | -0.000 |
ena | -0.000 |
era | -0.000 |
una | -0.000 |
Token |
|
Token ID | 198 |
Feature activation | +1.53e-02 |
Pos logit contributions | |
single | +0.031 |
ge | +0.030 |
shopping | +0.028 |
toward | +0.028 |
purchases | +0.028 |
Neg logit contributions | |
ily | -0.021 |
era | -0.020 |
ia | -0.019 |
ena | -0.019 |
der | -0.019 |
Token | He |
Token ID | 1544 |
Feature activation | -1.06e-02 |
Pos logit contributions | |
single | +0.026 |
ge | +0.025 |
weights | +0.024 |
toward | +0.024 |
unnecessary | +0.023 |
Neg logit contributions | |
ily | -0.023 |
ia | -0.022 |
Ben | -0.020 |
era | -0.020 |
ena | -0.020 |
Token | was |
Token ID | 373 |
Feature activation | -1.63e-02 |
Pos logit contributions | |
ily | +0.015 |
ia | +0.014 |
They | +0.014 |
era | +0.013 |
Ben | +0.013 |
Neg logit contributions | |
ge | -0.018 |
single | -0.018 |
purchases | -0.018 |
unnecessary | -0.017 |
shopping | -0.017 |
Token | so |
Token ID | 523 |
Feature activation | +2.22e-02 |
Pos logit contributions | |
ily | +0.024 |
ena | +0.022 |
Ben | +0.021 |
not | +0.021 |
They | +0.020 |
Neg logit contributions | |
use | -0.032 |
ge | -0.032 |
unnecessary | -0.031 |
purchases | -0.029 |
single | -0.029 |
Token | happy |
Token ID | 3772 |
Feature activation | -4.74e-02 |
Pos logit contributions | |
unnecessary | +0.034 |
ge | +0.034 |
purchases | +0.033 |
single | +0.033 |
use | +0.032 |
Neg logit contributions | |
ily | -0.038 |
They | -0.035 |
ia | -0.035 |
Ben | -0.034 |
Ben | -0.033 |
Token | and |
Token ID | 290 |
Feature activation | +2.11e-02 |
Pos logit contributions | |
ily | +0.080 |
Ben | +0.074 |
ia | +0.073 |
ena | +0.069 |
era | +0.069 |
Neg logit contributions | |
down | -0.073 |
ge | -0.072 |
toward | -0.072 |
single | -0.070 |
shopping | -0.069 |
Token | thanked |
Token ID | 26280 |
Feature activation | +9.92e-03 |
Pos logit contributions | |
ge | +0.037 |
unnecessary | +0.037 |
single | +0.036 |
purchases | +0.036 |
use | +0.035 |
Neg logit contributions | |
ily | -0.031 |
They | -0.028 |
ia | -0.028 |
Ben | -0.027 |
Ben | -0.027 |
Token | his |
Token ID | 465 |
Feature activation | +1.07e-02 |
Pos logit contributions | |
single | +0.014 |
ge | +0.014 |
purchases | +0.014 |
shopping | +0.014 |
unnecessary | +0.014 |
Neg logit contributions | |
ily | -0.016 |
They | -0.015 |
ia | -0.015 |
Ben | -0.015 |
era | -0.015 |
Token | friends |
Token ID | 2460 |
Feature activation | +7.90e-03 |
Pos logit contributions | |
ge | +0.026 |
unnecessary | +0.025 |
single | +0.025 |
use | +0.025 |
ging | +0.024 |
Neg logit contributions | |
ily | -0.010 |
ia | -0.009 |
Ben | -0.007 |
They | -0.007 |
Ben | -0.007 |
Token | . |
Token ID | 13 |
Feature activation | +1.61e-02 |
Pos logit contributions | |
ge | +0.014 |
single | +0.014 |
unnecessary | +0.014 |
purchases | +0.014 |
shopping | +0.013 |
Neg logit contributions | |
ily | -0.011 |
They | -0.010 |
ia | -0.010 |
Ben | -0.010 |
ena | -0.009 |
Token | 's |
Token ID | 338 |
Feature activation | -1.52e-02 |
Pos logit contributions | |
ily | +0.006 |
They | +0.005 |
Ben | +0.005 |
ia | +0.005 |
Ben | +0.005 |
Neg logit contributions | |
unnecessary | -0.010 |
single | -0.010 |
ge | -0.010 |
purchases | -0.010 |
ging | -0.010 |
Token | house |
Token ID | 2156 |
Feature activation | -5.13e-03 |
Pos logit contributions | |
ily | +0.020 |
They | +0.018 |
ia | +0.018 |
Ben | +0.017 |
ena | +0.017 |
Neg logit contributions | |
ge | -0.029 |
unnecessary | -0.029 |
single | -0.028 |
ging | -0.027 |
use | -0.027 |
Token | . |
Token ID | 13 |
Feature activation | +1.62e-02 |
Pos logit contributions | |
ily | +0.008 |
They | +0.008 |
ia | +0.007 |
ena | +0.007 |
Ben | +0.007 |
Neg logit contributions | |
unnecessary | -0.008 |
ge | -0.008 |
single | -0.008 |
purchases | -0.008 |
ging | -0.007 |
Token | Her |
Token ID | 2332 |
Feature activation | -4.53e-03 |
Pos logit contributions | |
ge | +0.036 |
unnecessary | +0.036 |
single | +0.035 |
purchases | +0.035 |
ging | +0.034 |
Neg logit contributions | |
ily | -0.016 |
They | -0.014 |
ia | -0.014 |
Ben | -0.013 |
Ben | -0.013 |
Token | grandma |
Token ID | 49890 |
Feature activation | +1.17e-02 |
Pos logit contributions | |
ily | +0.004 |
They | +0.004 |
ia | +0.004 |
Ben | +0.004 |
era | +0.004 |
Neg logit contributions | |
ge | -0.010 |
single | -0.010 |
unnecessary | -0.010 |
purchases | -0.010 |
shopping | -0.010 |
Token | made |
Token ID | 925 |
Feature activation | -3.89e-02 |
Pos logit contributions | |
unnecessary | +0.026 |
ge | +0.026 |
single | +0.025 |
purchases | +0.025 |
toward | +0.024 |
Neg logit contributions | |
ily | -0.012 |
They | -0.011 |
ia | -0.010 |
Ben | -0.010 |
ena | -0.010 |
Token | her |
Token ID | 607 |
Feature activation | -1.11e-02 |
Pos logit contributions | |
ia | +0.049 |
ily | +0.049 |
era | +0.048 |
Ben | +0.047 |
They | +0.047 |
Neg logit contributions | |
shopping | -0.069 |
purchases | -0.068 |
single | -0.068 |
unnecessary | -0.067 |
ge | -0.066 |
Token | a |
Token ID | 257 |
Feature activation | +5.51e-03 |
Pos logit contributions | |
ily | +0.009 |
Ben | +0.008 |
ena | +0.008 |
They | +0.007 |
ia | +0.007 |
Neg logit contributions | |
ge | -0.027 |
ging | -0.026 |
use | -0.026 |
unnecessary | -0.025 |
single | -0.025 |
Token | y |
Token ID | 331 |
Feature activation | +1.19e-02 |
Pos logit contributions | |
ge | +0.009 |
unnecessary | +0.009 |
single | +0.008 |
purchases | +0.008 |
ging | +0.008 |
Neg logit contributions | |
ily | -0.008 |
They | -0.008 |
ia | -0.008 |
Ben | -0.008 |
Ben | -0.008 |
Token | ummy |
Token ID | 13513 |
Feature activation | +8.89e-04 |
Pos logit contributions | |
single | +0.020 |
ge | +0.020 |
purchases | +0.019 |
shopping | +0.019 |
unnecessary | +0.019 |
Neg logit contributions | |
ily | -0.018 |
They | -0.017 |
ena | -0.016 |
Ben | -0.016 |
ia | -0.016 |
Token | cake |
Token ID | 12187 |
Feature activation | -6.10e-03 |
Pos logit contributions | |
ge | +0.002 |
unnecessary | +0.002 |
single | +0.002 |
toward | +0.002 |
purchases | +0.002 |
Neg logit contributions | |
ily | -0.001 |
ia | -0.001 |
They | -0.001 |
Ben | -0.001 |
Ben | -0.001 |