Token | egg |
Token ID | 5935 |
Feature activation | +3.37e-01 |
Pos logit contributions | |
cheer | +0.001 |
pretend | +0.001 |
ends | +0.001 |
e | +0.001 |
wonder | +0.001 |
Neg logit contributions | |
ge | -0.001 |
Twe | -0.001 |
Wil | -0.001 |
obby | -0.001 |
OO | -0.001 |
Token | ! |
Token ID | 0 |
Feature activation | -3.10e-01 |
Pos logit contributions | |
pretend | +0.005 |
cheer | +0.005 |
ends | +0.005 |
sill | +0.004 |
e | +0.004 |
Neg logit contributions | |
Twe | -0.009 |
ge | -0.009 |
obby | -0.009 |
Wil | -0.009 |
Fe | -0.009 |
Token | He |
Token ID | 679 |
Feature activation | -5.56e-01 |
Pos logit contributions | |
ge | +0.009 |
OO | +0.008 |
arse | +0.007 |
badly | +0.007 |
rated | +0.007 |
Neg logit contributions | |
cheer | -0.005 |
excited | -0.005 |
ends | -0.005 |
pretend | -0.005 |
see | -0.004 |
Token | watched |
Token ID | 7342 |
Feature activation | +1.32e-01 |
Pos logit contributions | |
ge | +0.015 |
Twe | +0.015 |
Wil | +0.014 |
obby | +0.013 |
Johnson | +0.013 |
Neg logit contributions | |
pretend | -0.009 |
cheer | -0.009 |
ends | -0.009 |
wonder | -0.008 |
excited | -0.008 |
Token | in |
Token ID | 287 |
Feature activation | -5.78e-01 |
Pos logit contributions | |
cheer | +0.002 |
pretend | +0.002 |
ends | +0.002 |
excited | +0.002 |
e | +0.002 |
Neg logit contributions | |
ge | -0.004 |
Twe | -0.004 |
obby | -0.003 |
Wil | -0.003 |
OO | -0.003 |
Token | amaz |
Token ID | 40642 |
Feature activation | +3.69e+00 |
Pos logit contributions | |
ge | +0.016 |
Twe | +0.015 |
Wil | +0.015 |
obby | +0.015 |
OO | +0.014 |
Neg logit contributions | |
ends | -0.008 |
cheer | -0.008 |
pretend | -0.007 |
wonder | -0.006 |
e | -0.006 |
Token | ement |
Token ID | 972 |
Feature activation | -1.59e-01 |
Pos logit contributions | |
ends | +0.098 |
cheer | +0.092 |
pretend | +0.090 |
reporters | +0.083 |
wonder | +0.082 |
Neg logit contributions | |
ge | -0.059 |
obby | -0.053 |
badly | -0.053 |
Twe | -0.051 |
iously | -0.050 |
Token | as |
Token ID | 355 |
Feature activation | +4.76e-01 |
Pos logit contributions | |
Twe | +0.004 |
ge | +0.004 |
Wil | +0.004 |
obby | +0.004 |
OO | +0.004 |
Neg logit contributions | |
ends | -0.002 |
cheer | -0.002 |
pretend | -0.002 |
excited | -0.002 |
sill | -0.002 |
Token | the |
Token ID | 262 |
Feature activation | -1.11e-01 |
Pos logit contributions | |
cheer | +0.008 |
pretend | +0.007 |
ends | +0.007 |
excited | +0.007 |
wonder | +0.007 |
Neg logit contributions | |
ge | -0.013 |
Twe | -0.012 |
OO | -0.012 |
Wil | -0.012 |
obby | -0.012 |
Token | egg |
Token ID | 5935 |
Feature activation | +1.86e-01 |
Pos logit contributions | |
ge | +0.003 |
Twe | +0.003 |
obby | +0.003 |
OO | +0.003 |
shadow | +0.003 |
Neg logit contributions | |
cheer | -0.002 |
pretend | -0.002 |
wonder | -0.002 |
sill | -0.002 |
excited | -0.002 |
Token | began |
Token ID | 2540 |
Feature activation | +6.32e-02 |
Pos logit contributions | |
cheer | +0.003 |
ends | +0.003 |
pretend | +0.003 |
sill | +0.003 |
e | +0.003 |
Neg logit contributions | |
ge | -0.005 |
Twe | -0.005 |
obby | -0.004 |
Wil | -0.004 |
OO | -0.004 |
Token | air |
Token ID | 1633 |
Feature activation | +1.97e-02 |
Pos logit contributions | |
cheer | +0.010 |
ends | +0.010 |
pretend | +0.009 |
wonder | +0.008 |
excited | +0.008 |
Neg logit contributions | |
ge | -0.019 |
Twe | -0.018 |
obby | -0.017 |
OO | -0.017 |
Wil | -0.017 |
Token | . |
Token ID | 13 |
Feature activation | -1.96e-01 |
Pos logit contributions | |
cheer | +0.000 |
pretend | +0.000 |
ends | +0.000 |
around | +0.000 |
excited | +0.000 |
Neg logit contributions | |
Twe | -0.001 |
ge | -0.001 |
obby | -0.001 |
Wil | -0.001 |
OO | -0.001 |
Token | He |
Token ID | 679 |
Feature activation | -7.20e-01 |
Pos logit contributions | |
ge | +0.006 |
OO | +0.005 |
iously | +0.005 |
Twe | +0.005 |
Wil | +0.005 |
Neg logit contributions | |
ends | -0.003 |
pretend | -0.003 |
cheer | -0.003 |
excited | -0.003 |
wonder | -0.003 |
Token | watched |
Token ID | 7342 |
Feature activation | +1.29e-01 |
Pos logit contributions | |
ge | +0.020 |
Twe | +0.019 |
Wil | +0.018 |
obby | +0.018 |
OO | +0.017 |
Neg logit contributions | |
pretend | -0.012 |
cheer | -0.011 |
ends | -0.011 |
wonder | -0.011 |
excited | -0.010 |
Token | with |
Token ID | 351 |
Feature activation | -5.18e-01 |
Pos logit contributions | |
cheer | +0.002 |
pretend | +0.002 |
ends | +0.002 |
excited | +0.001 |
sill | +0.001 |
Neg logit contributions | |
ge | -0.004 |
Twe | -0.004 |
Wil | -0.003 |
obby | -0.003 |
OO | -0.003 |
Token | amaz |
Token ID | 40642 |
Feature activation | +3.63e+00 |
Pos logit contributions | |
ge | +0.010 |
badly | +0.010 |
Bobby | +0.009 |
Timothy | +0.009 |
Frankie | +0.009 |
Neg logit contributions | |
ends | -0.011 |
reporters | -0.010 |
ibr | -0.009 |
cheer | -0.009 |
pler | -0.009 |
Token | ement |
Token ID | 972 |
Feature activation | -1.68e-01 |
Pos logit contributions | |
ends | +0.087 |
cheer | +0.084 |
pretend | +0.081 |
wonder | +0.075 |
e | +0.074 |
Neg logit contributions | |
ge | -0.067 |
Twe | -0.061 |
obby | -0.059 |
badly | -0.058 |
Wil | -0.058 |
Token | as |
Token ID | 355 |
Feature activation | +4.52e-01 |
Pos logit contributions | |
Twe | +0.005 |
ge | +0.005 |
Wil | +0.004 |
obby | +0.004 |
Fe | +0.004 |
Neg logit contributions | |
ends | -0.002 |
cheer | -0.002 |
pretend | -0.002 |
excited | -0.002 |
sill | -0.002 |
Token | the |
Token ID | 262 |
Feature activation | -2.83e-02 |
Pos logit contributions | |
cheer | +0.008 |
ends | +0.008 |
pretend | +0.008 |
wonder | +0.007 |
excited | +0.007 |
Neg logit contributions | |
ge | -0.011 |
Twe | -0.011 |
OO | -0.010 |
Wil | -0.010 |
obby | -0.010 |
Token | z |
Token ID | 1976 |
Feature activation | +3.28e-01 |
Pos logit contributions | |
ge | +0.001 |
Twe | +0.001 |
obby | +0.001 |
OO | +0.001 |
Wil | +0.001 |
Neg logit contributions | |
cheer | -0.000 |
pretend | -0.000 |
wonder | -0.000 |
sill | -0.000 |
excited | -0.000 |
Token | ig |
Token ID | 328 |
Feature activation | -3.29e-02 |
Pos logit contributions | |
ends | +0.006 |
cheer | +0.006 |
pretend | +0.005 |
e | +0.005 |
sill | +0.005 |
Neg logit contributions | |
ge | -0.008 |
Twe | -0.008 |
Wil | -0.007 |
obby | -0.007 |
Frankie | -0.007 |
Token |
|
Token ID | 198 |
Feature activation | +2.94e-02 |
Pos logit contributions | |
ge | +0.005 |
iously | +0.005 |
OO | +0.005 |
Twe | +0.005 |
OUT | +0.005 |
Neg logit contributions | |
cheer | -0.002 |
ends | -0.002 |
pretend | -0.002 |
excited | -0.002 |
see | -0.002 |
Token | T |
Token ID | 51 |
Feature activation | +3.85e-01 |
Pos logit contributions | |
cheer | +0.001 |
ends | +0.001 |
pretend | +0.001 |
e | +0.000 |
excited | +0.000 |
Neg logit contributions | |
ge | -0.001 |
OO | -0.001 |
gie | -0.001 |
iously | -0.001 |
Johnson | -0.001 |
Token | ina |
Token ID | 1437 |
Feature activation | -2.87e-01 |
Pos logit contributions | |
ends | +0.005 |
cheer | +0.004 |
sill | +0.004 |
pretend | +0.004 |
ass | +0.004 |
Neg logit contributions | |
Twe | -0.011 |
ge | -0.011 |
badly | -0.010 |
Wil | -0.010 |
Frankie | -0.010 |
Token | waited |
Token ID | 13488 |
Feature activation | +3.08e-01 |
Pos logit contributions | |
ge | +0.008 |
Twe | +0.007 |
Wil | +0.007 |
obby | +0.007 |
OO | +0.007 |
Neg logit contributions | |
cheer | -0.005 |
ends | -0.005 |
pretend | -0.004 |
excited | -0.004 |
e | -0.004 |
Token | with |
Token ID | 351 |
Feature activation | +1.64e-01 |
Pos logit contributions | |
ends | +0.005 |
cheer | +0.004 |
pretend | +0.004 |
wonder | +0.004 |
excited | +0.004 |
Neg logit contributions | |
ge | -0.008 |
Twe | -0.008 |
Wil | -0.008 |
obby | -0.008 |
OO | -0.007 |
Token | amaz |
Token ID | 40642 |
Feature activation | +3.51e+00 |
Pos logit contributions | |
ends | +0.004 |
pler | +0.003 |
amer | +0.003 |
reporters | +0.003 |
ibr | +0.003 |
Neg logit contributions | |
Bobby | -0.003 |
Frankie | -0.003 |
ge | -0.003 |
Em | -0.003 |
Billy | -0.003 |
Token | ement |
Token ID | 972 |
Feature activation | -8.08e-02 |
Pos logit contributions | |
ends | +0.078 |
cheer | +0.076 |
pretend | +0.073 |
wonder | +0.067 |
e | +0.066 |
Neg logit contributions | |
ge | -0.070 |
Twe | -0.066 |
Wil | -0.062 |
obby | -0.062 |
badly | -0.060 |
Token | as |
Token ID | 355 |
Feature activation | +1.05e+00 |
Pos logit contributions | |
Twe | +0.002 |
ge | +0.002 |
Wil | +0.002 |
obby | +0.002 |
OO | +0.002 |
Neg logit contributions | |
ends | -0.001 |
cheer | -0.001 |
pretend | -0.001 |
excited | -0.001 |
sill | -0.001 |
Token | the |
Token ID | 262 |
Feature activation | +1.76e-01 |
Pos logit contributions | |
cheer | +0.015 |
ends | +0.015 |
pretend | +0.014 |
excited | +0.012 |
wonder | +0.012 |
Neg logit contributions | |
ge | -0.029 |
Twe | -0.028 |
OO | -0.027 |
obby | -0.027 |
Wil | -0.027 |
Token | old |
Token ID | 1468 |
Feature activation | +4.96e-01 |
Pos logit contributions | |
cheer | +0.003 |
pretend | +0.003 |
ends | +0.003 |
wonder | +0.003 |
sill | +0.003 |
Neg logit contributions | |
ge | -0.005 |
Twe | -0.004 |
obby | -0.004 |
OO | -0.004 |
Wil | -0.004 |
Token | man |
Token ID | 582 |
Feature activation | -1.81e-01 |
Pos logit contributions | |
cheer | +0.006 |
pretend | +0.006 |
ends | +0.006 |
wonder | +0.005 |
excited | +0.005 |
Neg logit contributions | |
ge | -0.015 |
Twe | -0.014 |
obby | -0.014 |
OO | -0.014 |
Wil | -0.014 |
Token | . |
Token ID | 13 |
Feature activation | -1.87e-01 |
Pos logit contributions | |
ends | +0.000 |
pretend | +0.000 |
cheer | +0.000 |
excited | +0.000 |
e | +0.000 |
Neg logit contributions | |
Twe | -0.001 |
ge | -0.001 |
obby | -0.001 |
Wil | -0.001 |
OO | -0.001 |
Token | Tim |
Token ID | 5045 |
Feature activation | -6.62e-01 |
Pos logit contributions | |
ge | +0.006 |
OO | +0.005 |
iously | +0.005 |
poisonous | +0.004 |
Twe | +0.004 |
Neg logit contributions | |
ends | -0.003 |
pretend | -0.003 |
cheer | -0.003 |
excited | -0.003 |
see | -0.002 |
Token | my |
Token ID | 1820 |
Feature activation | -3.29e-01 |
Pos logit contributions | |
Twe | +0.018 |
ge | +0.018 |
Wil | +0.018 |
cheaply | +0.017 |
shadow | +0.016 |
Neg logit contributions | |
cheer | -0.010 |
pretend | -0.010 |
e | -0.010 |
excited | -0.010 |
ends | -0.010 |
Token | watched |
Token ID | 7342 |
Feature activation | -1.77e-01 |
Pos logit contributions | |
ge | +0.010 |
Twe | +0.010 |
Wil | +0.010 |
obby | +0.009 |
Johnson | +0.009 |
Neg logit contributions | |
pretend | -0.005 |
ends | -0.004 |
cheer | -0.004 |
excited | -0.004 |
e | -0.004 |
Token | with |
Token ID | 351 |
Feature activation | -7.39e-01 |
Pos logit contributions | |
ge | +0.005 |
Twe | +0.005 |
obby | +0.005 |
Wil | +0.005 |
OO | +0.005 |
Neg logit contributions | |
cheer | -0.003 |
pretend | -0.002 |
ends | -0.002 |
excited | -0.002 |
sill | -0.002 |
Token | amaz |
Token ID | 40642 |
Feature activation | +3.48e+00 |
Pos logit contributions | |
ge | +0.014 |
Bobby | +0.013 |
badly | +0.013 |
Frankie | +0.012 |
Timothy | +0.012 |
Neg logit contributions | |
ends | -0.017 |
reporters | -0.015 |
ibr | -0.015 |
pler | -0.014 |
ibrarian | -0.014 |
Token | ement |
Token ID | 972 |
Feature activation | -3.82e-01 |
Pos logit contributions | |
ends | +0.090 |
cheer | +0.087 |
pretend | +0.085 |
wonder | +0.079 |
e | +0.077 |
Neg logit contributions | |
ge | -0.057 |
Twe | -0.052 |
badly | -0.051 |
obby | -0.050 |
Wil | -0.048 |
Token | as |
Token ID | 355 |
Feature activation | +2.27e-01 |
Pos logit contributions | |
Twe | +0.011 |
ge | +0.010 |
Wil | +0.010 |
obby | +0.010 |
Fe | +0.009 |
Neg logit contributions | |
ends | -0.006 |
cheer | -0.005 |
pretend | -0.005 |
excited | -0.005 |
sill | -0.004 |
Token | he |
Token ID | 339 |
Feature activation | -3.38e-02 |
Pos logit contributions | |
cheer | +0.004 |
pretend | +0.004 |
ends | +0.004 |
excited | +0.003 |
wonder | +0.003 |
Neg logit contributions | |
ge | -0.006 |
Twe | -0.006 |
OO | -0.006 |
obby | -0.006 |
Wil | -0.005 |
Token | saw |
Token ID | 2497 |
Feature activation | +5.72e-01 |
Pos logit contributions | |
ge | +0.001 |
Twe | +0.001 |
Wil | +0.001 |
obby | +0.001 |
gie | +0.001 |
Neg logit contributions | |
ends | -0.001 |
cheer | -0.001 |
pretend | -0.001 |
sill | -0.001 |
reporters | -0.001 |
Token | stars |
Token ID | 5788 |
Feature activation | +5.17e-01 |
Pos logit contributions | |
cheer | +0.009 |
ends | +0.009 |
pretend | +0.009 |
sill | +0.008 |
wonder | +0.008 |
Neg logit contributions | |
ge | -0.015 |
Twe | -0.014 |
obby | -0.014 |
Wil | -0.014 |
OO | -0.014 |
Token | at |
Token ID | 379 |
Feature activation | +1.56e-01 |
Pos logit contributions | |
ends | +0.006 |
cheer | +0.006 |
e | +0.006 |
pretend | +0.005 |
reporters | +0.005 |
Neg logit contributions | |
ge | -0.012 |
badly | -0.012 |
Twe | -0.011 |
obby | -0.011 |
Bobby | -0.011 |
Token | the |
Token ID | 262 |
Feature activation | -3.71e-02 |
Pos logit contributions | |
cheer | +0.002 |
pretend | +0.002 |
wonder | +0.002 |
ends | +0.002 |
sill | +0.002 |
Neg logit contributions | |
ge | -0.004 |
OO | -0.004 |
Twe | -0.004 |
obby | -0.004 |
Wil | -0.004 |
Token | bright |
Token ID | 6016 |
Feature activation | +1.31e+00 |
Pos logit contributions | |
ge | +0.001 |
Twe | +0.001 |
obby | +0.001 |
Wil | +0.001 |
OO | +0.001 |
Neg logit contributions | |
cheer | -0.001 |
pretend | -0.001 |
ends | -0.001 |
sill | -0.001 |
wonder | -0.001 |
Token | colours |
Token ID | 18915 |
Feature activation | +5.06e-01 |
Pos logit contributions | |
cheer | +0.021 |
pretend | +0.018 |
wonder | +0.017 |
sill | +0.017 |
circles | +0.016 |
Neg logit contributions | |
ge | -0.037 |
Twe | -0.037 |
Wil | -0.035 |
obby | -0.035 |
shadow | -0.034 |
Token | with |
Token ID | 351 |
Feature activation | -4.16e-02 |
Pos logit contributions | |
ends | +0.009 |
cheer | +0.008 |
pretend | +0.008 |
reporters | +0.007 |
e | +0.007 |
Neg logit contributions | |
ge | -0.013 |
Twe | -0.011 |
badly | -0.011 |
Wil | -0.011 |
obby | -0.011 |
Token | amaz |
Token ID | 40642 |
Feature activation | +3.42e+00 |
Pos logit contributions | |
ge | +0.001 |
Twe | +0.001 |
Wil | +0.001 |
badly | +0.001 |
obby | +0.001 |
Neg logit contributions | |
ends | -0.001 |
cheer | -0.001 |
pretend | -0.001 |
reporters | -0.001 |
e | -0.001 |
Token | ement |
Token ID | 972 |
Feature activation | +6.22e-01 |
Pos logit contributions | |
ends | +0.082 |
cheer | +0.078 |
pretend | +0.076 |
wonder | +0.070 |
e | +0.069 |
Neg logit contributions | |
ge | -0.063 |
Twe | -0.058 |
obby | -0.055 |
Wil | -0.055 |
badly | -0.055 |
Token | and |
Token ID | 290 |
Feature activation | +3.82e-02 |
Pos logit contributions | |
ends | +0.009 |
cheer | +0.009 |
pretend | +0.009 |
excited | +0.007 |
sill | +0.007 |
Neg logit contributions | |
Twe | -0.017 |
ge | -0.016 |
Wil | -0.016 |
obby | -0.016 |
OO | -0.015 |
Token | awe |
Token ID | 25030 |
Feature activation | +9.83e-01 |
Pos logit contributions | |
ends | +0.001 |
e | +0.001 |
cheer | +0.001 |
pretend | +0.001 |
reporters | +0.001 |
Neg logit contributions | |
ge | -0.001 |
Twe | -0.001 |
Bobby | -0.001 |
badly | -0.001 |
Wil | -0.001 |
Token | . |
Token ID | 13 |
Feature activation | -1.39e-01 |
Pos logit contributions | |
cheer | +0.015 |
ends | +0.015 |
pretend | +0.014 |
excited | +0.013 |
sill | +0.013 |
Neg logit contributions | |
Twe | -0.026 |
ge | -0.025 |
Wil | -0.024 |
obby | -0.024 |
OO | -0.024 |
Token | He |
Token ID | 679 |
Feature activation | -8.33e-01 |
Pos logit contributions | |
ge | +0.005 |
OO | +0.004 |
badly | +0.004 |
rated | +0.004 |
Twe | +0.004 |
Neg logit contributions | |
ends | -0.002 |
excited | -0.002 |
cheer | -0.002 |
sill | -0.001 |
see | -0.001 |
Token | her |
Token ID | 607 |
Feature activation | -4.12e-01 |
Pos logit contributions | |
ends | +0.018 |
cheer | +0.017 |
pretend | +0.016 |
e | +0.015 |
wonder | +0.015 |
Neg logit contributions | |
ge | -0.024 |
Twe | -0.022 |
obby | -0.021 |
badly | -0.021 |
Wil | -0.021 |
Token | mom |
Token ID | 1995 |
Feature activation | -2.38e-01 |
Pos logit contributions | |
ge | +0.012 |
Twe | +0.011 |
Wil | +0.011 |
obby | +0.011 |
OO | +0.010 |
Neg logit contributions | |
pretend | -0.006 |
cheer | -0.006 |
ends | -0.006 |
excited | -0.005 |
sill | -0.005 |
Token | my |
Token ID | 1820 |
Feature activation | -3.26e-01 |
Pos logit contributions | |
Twe | +0.007 |
ge | +0.007 |
Wil | +0.007 |
Fe | +0.006 |
obby | +0.006 |
Neg logit contributions | |
cheer | -0.003 |
pretend | -0.003 |
ends | -0.003 |
see | -0.003 |
excited | -0.003 |
Token | watched |
Token ID | 7342 |
Feature activation | +9.51e-02 |
Pos logit contributions | |
Twe | +0.010 |
ge | +0.009 |
Wil | +0.009 |
Fe | +0.009 |
OO | +0.009 |
Neg logit contributions | |
cheer | -0.005 |
pretend | -0.004 |
ends | -0.004 |
excited | -0.004 |
see | -0.004 |
Token | in |
Token ID | 287 |
Feature activation | -6.89e-01 |
Pos logit contributions | |
cheer | +0.001 |
pretend | +0.001 |
excited | +0.001 |
ends | +0.001 |
like | +0.001 |
Neg logit contributions | |
Twe | -0.003 |
ge | -0.003 |
Wil | -0.003 |
OO | -0.003 |
obby | -0.003 |
Token | amaz |
Token ID | 40642 |
Feature activation | +3.40e+00 |
Pos logit contributions | |
Twe | +0.019 |
ge | +0.018 |
Wil | +0.017 |
obby | +0.017 |
OO | +0.017 |
Neg logit contributions | |
cheer | -0.011 |
pretend | -0.010 |
ends | -0.010 |
wonder | -0.009 |
sill | -0.009 |
Token | ement |
Token ID | 972 |
Feature activation | -4.08e-01 |
Pos logit contributions | |
ends | +0.076 |
cheer | +0.074 |
pretend | +0.071 |
wonder | +0.065 |
e | +0.065 |
Neg logit contributions | |
ge | -0.067 |
Twe | -0.063 |
obby | -0.060 |
Wil | -0.059 |
badly | -0.057 |
Token | as |
Token ID | 355 |
Feature activation | +1.95e-01 |
Pos logit contributions | |
Twe | +0.012 |
Wil | +0.011 |
ge | +0.011 |
Fe | +0.011 |
OO | +0.011 |
Neg logit contributions | |
ends | -0.005 |
cheer | -0.005 |
pretend | -0.005 |
excited | -0.005 |
like | -0.004 |
Token | the |
Token ID | 262 |
Feature activation | -3.75e-01 |
Pos logit contributions | |
cheer | +0.003 |
pretend | +0.003 |
ends | +0.003 |
excited | +0.002 |
wonder | +0.002 |
Neg logit contributions | |
ge | -0.006 |
Twe | -0.005 |
OO | -0.005 |
Wil | -0.005 |
obby | -0.005 |
Token | machine |
Token ID | 4572 |
Feature activation | -3.22e-01 |
Pos logit contributions | |
ge | +0.011 |
OO | +0.010 |
Twe | +0.010 |
obby | +0.010 |
Wil | +0.010 |
Neg logit contributions | |
cheer | -0.006 |
pretend | -0.006 |
wonder | -0.005 |
sill | -0.005 |
excited | -0.005 |
Token | flew |
Token ID | 13112 |
Feature activation | +1.22e-01 |
Pos logit contributions | |
Twe | +0.008 |
ge | +0.008 |
Wil | +0.008 |
obby | +0.008 |
OO | +0.008 |
Neg logit contributions | |
cheer | -0.005 |
pretend | -0.005 |
ends | -0.005 |
excited | -0.004 |
sill | -0.004 |
Token | ered |
Token ID | 1068 |
Feature activation | -5.80e-02 |
Pos logit contributions | |
ends | +0.010 |
cheer | +0.009 |
pretend | +0.009 |
e | +0.008 |
wonder | +0.008 |
Neg logit contributions | |
ge | -0.009 |
Twe | -0.008 |
obby | -0.008 |
Wil | -0.008 |
OO | -0.008 |
Token | in |
Token ID | 287 |
Feature activation | -2.28e-01 |
Pos logit contributions | |
ge | +0.002 |
Twe | +0.002 |
Wil | +0.002 |
obby | +0.002 |
OO | +0.002 |
Neg logit contributions | |
ends | -0.001 |
cheer | -0.001 |
pretend | -0.001 |
wonder | -0.001 |
excited | -0.001 |
Token | , |
Token ID | 11 |
Feature activation | +1.61e-01 |
Pos logit contributions | |
ge | +0.006 |
Twe | +0.005 |
Wil | +0.005 |
obby | +0.005 |
OO | +0.005 |
Neg logit contributions | |
ends | -0.004 |
cheer | -0.004 |
pretend | -0.004 |
wonder | -0.003 |
sill | -0.003 |
Token | blinking |
Token ID | 43196 |
Feature activation | +6.65e-01 |
Pos logit contributions | |
ends | +0.002 |
cheer | +0.002 |
excited | +0.002 |
pretend | +0.002 |
see | +0.002 |
Neg logit contributions | |
ge | -0.004 |
Twe | -0.004 |
obby | -0.004 |
OO | -0.004 |
Wil | -0.004 |
Token | in |
Token ID | 287 |
Feature activation | -1.97e-01 |
Pos logit contributions | |
ends | +0.010 |
cheer | +0.010 |
pretend | +0.010 |
wonder | +0.009 |
sill | +0.008 |
Neg logit contributions | |
ge | -0.017 |
Twe | -0.017 |
Wil | -0.016 |
obby | -0.016 |
OO | -0.015 |
Token | amaz |
Token ID | 40642 |
Feature activation | +3.39e+00 |
Pos logit contributions | |
ge | +0.005 |
Twe | +0.005 |
Wil | +0.004 |
obby | +0.004 |
OO | +0.004 |
Neg logit contributions | |
ends | -0.003 |
cheer | -0.003 |
pretend | -0.003 |
wonder | -0.003 |
e | -0.003 |
Token | ement |
Token ID | 972 |
Feature activation | +1.57e-01 |
Pos logit contributions | |
ends | +0.081 |
cheer | +0.078 |
pretend | +0.075 |
wonder | +0.068 |
reporters | +0.068 |
Neg logit contributions | |
ge | -0.062 |
Twe | -0.057 |
obby | -0.055 |
Wil | -0.055 |
badly | -0.054 |
Token | . |
Token ID | 13 |
Feature activation | -2.30e-01 |
Pos logit contributions | |
ends | +0.003 |
cheer | +0.003 |
pretend | +0.003 |
excited | +0.002 |
sill | +0.002 |
Neg logit contributions | |
Twe | -0.004 |
ge | -0.004 |
Wil | -0.004 |
obby | -0.004 |
OO | -0.003 |
Token |
|
Token ID | 198 |
Feature activation | -1.69e-01 |
Pos logit contributions | |
ge | +0.007 |
badly | +0.006 |
Twe | +0.006 |
rated | +0.006 |
OO | +0.006 |
Neg logit contributions | |
ends | -0.003 |
excited | -0.003 |
see | -0.003 |
wonder | -0.003 |
pretend | -0.003 |
Token |
|
Token ID | 198 |
Feature activation | -1.38e-01 |
Pos logit contributions | |
ge | +0.005 |
OO | +0.005 |
gie | +0.005 |
Twe | +0.004 |
iously | +0.004 |
Neg logit contributions | |
ends | -0.002 |
cheer | -0.002 |
excited | -0.002 |
pretend | -0.002 |
see | -0.002 |
Token | But |
Token ID | 1537 |
Feature activation | -1.67e-01 |
Pos logit contributions | |
ge | +0.003 |
Twe | +0.003 |
gie | +0.003 |
OO | +0.003 |
obby | +0.003 |
Neg logit contributions | |
ends | -0.003 |
cheer | -0.002 |
pretend | -0.002 |
e | -0.002 |
excited | -0.002 |
Token | go |
Token ID | 467 |
Feature activation | +6.03e-01 |
Pos logit contributions | |
cheer | +0.010 |
pretend | +0.010 |
ends | +0.009 |
see | +0.009 |
wonder | +0.008 |
Neg logit contributions | |
ge | -0.022 |
obby | -0.020 |
Twe | -0.020 |
OO | -0.020 |
Wil | -0.020 |
Token | ! |
Token ID | 0 |
Feature activation | -3.27e-01 |
Pos logit contributions | |
pretend | +0.007 |
see | +0.006 |
cheer | +0.006 |
ends | +0.006 |
around | +0.006 |
Neg logit contributions | |
OO | -0.019 |
Twe | -0.019 |
Fe | -0.019 |
obby | -0.019 |
ge | -0.019 |
Token | Everyone |
Token ID | 11075 |
Feature activation | -3.84e-01 |
Pos logit contributions | |
ge | +0.010 |
OO | +0.009 |
poisonous | +0.008 |
iously | +0.008 |
rated | +0.008 |
Neg logit contributions | |
ends | -0.005 |
cheer | -0.005 |
excited | -0.004 |
pretend | -0.004 |
see | -0.004 |
Token | watched |
Token ID | 7342 |
Feature activation | +8.54e-02 |
Pos logit contributions | |
Twe | +0.011 |
ge | +0.011 |
Wil | +0.010 |
OO | +0.010 |
obby | +0.010 |
Neg logit contributions | |
cheer | -0.006 |
pretend | -0.005 |
ends | -0.005 |
excited | -0.005 |
e | -0.005 |
Token | in |
Token ID | 287 |
Feature activation | -2.33e-01 |
Pos logit contributions | |
cheer | +0.001 |
pretend | +0.001 |
ends | +0.001 |
excited | +0.001 |
e | +0.001 |
Neg logit contributions | |
Twe | -0.003 |
ge | -0.003 |
Wil | -0.002 |
ety | -0.002 |
OO | -0.002 |
Token | amaz |
Token ID | 40642 |
Feature activation | +3.38e+00 |
Pos logit contributions | |
ge | +0.006 |
Twe | +0.006 |
Wil | +0.006 |
obby | +0.006 |
OO | +0.005 |
Neg logit contributions | |
ends | -0.004 |
cheer | -0.004 |
pretend | -0.003 |
wonder | -0.003 |
sill | -0.003 |
Token | ement |
Token ID | 972 |
Feature activation | -3.00e-01 |
Pos logit contributions | |
ends | +0.078 |
cheer | +0.075 |
pretend | +0.073 |
wonder | +0.067 |
e | +0.066 |
Neg logit contributions | |
ge | -0.064 |
Twe | -0.060 |
obby | -0.058 |
Wil | -0.056 |
badly | -0.056 |
Token | as |
Token ID | 355 |
Feature activation | +3.83e-01 |
Pos logit contributions | |
Twe | +0.009 |
ge | +0.008 |
Wil | +0.008 |
obby | +0.008 |
OO | +0.008 |
Neg logit contributions | |
ends | -0.004 |
cheer | -0.004 |
pretend | -0.004 |
excited | -0.004 |
sill | -0.003 |
Token | it |
Token ID | 340 |
Feature activation | +1.91e-01 |
Pos logit contributions | |
cheer | +0.006 |
pretend | +0.005 |
ends | +0.005 |
excited | +0.005 |
wonder | +0.005 |
Neg logit contributions | |
ge | -0.011 |
Twe | -0.011 |
OO | -0.010 |
Wil | -0.010 |
gie | -0.010 |
Token | took |
Token ID | 1718 |
Feature activation | -4.79e-01 |
Pos logit contributions | |
ends | +0.004 |
cheer | +0.004 |
pretend | +0.003 |
wonder | +0.003 |
sill | +0.003 |
Neg logit contributions | |
ge | -0.004 |
Twe | -0.004 |
Wil | -0.004 |
obby | -0.004 |
OO | -0.004 |
Token | off |
Token ID | 572 |
Feature activation | +4.27e-01 |
Pos logit contributions | |
ge | +0.013 |
Twe | +0.012 |
Wil | +0.012 |
obby | +0.012 |
OO | +0.012 |
Neg logit contributions | |
ends | -0.007 |
cheer | -0.007 |
pretend | -0.007 |
wonder | -0.006 |
e | -0.006 |
Token | shaking |
Token ID | 17275 |
Feature activation | +2.12e-01 |
Pos logit contributions | |
ge | +0.001 |
Twe | +0.001 |
obby | +0.001 |
Wil | +0.001 |
OO | +0.001 |
Neg logit contributions | |
ends | -0.001 |
cheer | -0.001 |
pretend | -0.001 |
excited | -0.001 |
sill | -0.001 |
Token | . |
Token ID | 13 |
Feature activation | -2.50e-01 |
Pos logit contributions | |
cheer | +0.003 |
pretend | +0.003 |
ends | +0.003 |
excited | +0.002 |
wonder | +0.002 |
Neg logit contributions | |
Twe | -0.006 |
ge | -0.006 |
Wil | -0.006 |
obby | -0.006 |
OO | -0.006 |
Token | Mom |
Token ID | 11254 |
Feature activation | +4.55e-01 |
Pos logit contributions | |
ge | +0.009 |
gie | +0.007 |
Twe | +0.007 |
OO | +0.007 |
pless | +0.007 |
Neg logit contributions | |
excited | -0.003 |
ends | -0.003 |
cheer | -0.003 |
sill | -0.003 |
see | -0.003 |
Token | watched |
Token ID | 7342 |
Feature activation | +2.34e-01 |
Pos logit contributions | |
cheer | +0.008 |
ends | +0.008 |
pretend | +0.007 |
excited | +0.006 |
e | +0.006 |
Neg logit contributions | |
ge | -0.011 |
Twe | -0.011 |
Wil | -0.010 |
OO | -0.010 |
obby | -0.010 |
Token | in |
Token ID | 287 |
Feature activation | -6.40e-01 |
Pos logit contributions | |
cheer | +0.003 |
pretend | +0.003 |
ends | +0.003 |
excited | +0.003 |
e | +0.002 |
Neg logit contributions | |
ge | -0.007 |
Twe | -0.007 |
obby | -0.007 |
Wil | -0.007 |
OO | -0.007 |
Token | amaz |
Token ID | 40642 |
Feature activation | +3.31e+00 |
Pos logit contributions | |
ge | +0.019 |
Twe | +0.017 |
obby | +0.017 |
Wil | +0.017 |
badly | +0.017 |
Neg logit contributions | |
ends | -0.008 |
cheer | -0.007 |
pretend | -0.007 |
reporters | -0.006 |
e | -0.006 |
Token | ement |
Token ID | 972 |
Feature activation | -3.94e-02 |
Pos logit contributions | |
ends | +0.086 |
cheer | +0.081 |
pretend | +0.079 |
reporters | +0.073 |
sill | +0.073 |
Neg logit contributions | |
ge | -0.054 |
badly | -0.049 |
Twe | -0.049 |
obby | -0.048 |
Wil | -0.047 |
Token | from |
Token ID | 422 |
Feature activation | +2.58e-02 |
Pos logit contributions | |
Twe | +0.001 |
ge | +0.001 |
Wil | +0.001 |
obby | +0.001 |
OO | +0.001 |
Neg logit contributions | |
ends | -0.000 |
cheer | -0.000 |
pretend | -0.000 |
excited | -0.000 |
wonder | -0.000 |
Token | a |
Token ID | 257 |
Feature activation | +1.97e-02 |
Pos logit contributions | |
cheer | +0.000 |
pretend | +0.000 |
ends | +0.000 |
wonder | +0.000 |
sill | +0.000 |
Neg logit contributions | |
ge | -0.001 |
Twe | -0.001 |
obby | -0.001 |
Wil | -0.001 |
OO | -0.001 |
Token | distance |
Token ID | 5253 |
Feature activation | +7.68e-02 |
Pos logit contributions | |
cheer | +0.000 |
ends | +0.000 |
pretend | +0.000 |
wonder | +0.000 |
sill | +0.000 |
Neg logit contributions | |
ge | -0.000 |
Twe | -0.000 |
obby | -0.000 |
Wil | -0.000 |
OO | -0.000 |
Token | , |
Token ID | 11 |
Feature activation | +3.17e-01 |
Pos logit contributions | |
cheer | +0.001 |
pretend | +0.001 |
ends | +0.001 |
wonder | +0.001 |
sill | +0.001 |
Neg logit contributions | |
Twe | -0.002 |
ge | -0.002 |
Wil | -0.002 |
obby | -0.002 |
OO | -0.002 |
Token | , |
Token ID | 11 |
Feature activation | +2.93e-01 |
Pos logit contributions | |
cheer | +0.012 |
pretend | +0.012 |
ends | +0.011 |
sill | +0.011 |
wonder | +0.010 |
Neg logit contributions | |
Twe | -0.025 |
ge | -0.023 |
obby | -0.023 |
Wil | -0.023 |
OO | -0.023 |
Token | observing |
Token ID | 21769 |
Feature activation | +4.25e-01 |
Pos logit contributions | |
ends | +0.005 |
cheer | +0.005 |
pretend | +0.004 |
e | +0.004 |
wonder | +0.004 |
Neg logit contributions | |
ge | -0.007 |
Twe | -0.007 |
Wil | -0.007 |
obby | -0.007 |
OO | -0.007 |
Token | the |
Token ID | 262 |
Feature activation | +3.57e-01 |
Pos logit contributions | |
cheer | +0.007 |
pretend | +0.006 |
ends | +0.006 |
wonder | +0.006 |
sill | +0.006 |
Neg logit contributions | |
ge | -0.011 |
Twe | -0.011 |
obby | -0.010 |
Wil | -0.010 |
OO | -0.010 |
Token | landscape |
Token ID | 10747 |
Feature activation | +1.85e-01 |
Pos logit contributions | |
cheer | +0.006 |
wonder | +0.006 |
pretend | +0.006 |
excited | +0.005 |
ends | +0.005 |
Neg logit contributions | |
ge | -0.010 |
Twe | -0.009 |
obby | -0.009 |
OO | -0.009 |
Wil | -0.009 |
Token | with |
Token ID | 351 |
Feature activation | +4.10e-02 |
Pos logit contributions | |
cheer | +0.003 |
ends | +0.003 |
pretend | +0.003 |
sill | +0.002 |
wonder | +0.002 |
Neg logit contributions | |
ge | -0.005 |
Twe | -0.005 |
obby | -0.004 |
Wil | -0.004 |
OO | -0.004 |
Token | amaz |
Token ID | 40642 |
Feature activation | +3.30e+00 |
Pos logit contributions | |
ends | +0.001 |
cheer | +0.001 |
pretend | +0.001 |
reporters | +0.001 |
e | +0.001 |
Neg logit contributions | |
ge | -0.001 |
Twe | -0.001 |
Wil | -0.001 |
badly | -0.001 |
obby | -0.001 |
Token | ement |
Token ID | 972 |
Feature activation | +5.16e-01 |
Pos logit contributions | |
ends | +0.069 |
cheer | +0.068 |
pretend | +0.065 |
wonder | +0.060 |
sill | +0.059 |
Neg logit contributions | |
ge | -0.069 |
Twe | -0.065 |
Wil | -0.062 |
obby | -0.062 |
OO | -0.060 |
Token | . |
Token ID | 13 |
Feature activation | -7.95e-02 |
Pos logit contributions | |
ends | +0.008 |
pretend | +0.007 |
cheer | +0.007 |
excited | +0.007 |
sill | +0.007 |
Neg logit contributions | |
Twe | -0.014 |
Wil | -0.013 |
ge | -0.013 |
Fe | -0.013 |
obby | -0.012 |
Token | As |
Token ID | 1081 |
Feature activation | -8.97e-02 |
Pos logit contributions | |
ge | +0.002 |
Twe | +0.002 |
poisonous | +0.002 |
badly | +0.002 |
thorn | +0.002 |
Neg logit contributions | |
ends | -0.001 |
sill | -0.001 |
excited | -0.001 |
pretend | -0.001 |
see | -0.001 |
Token | he |
Token ID | 339 |
Feature activation | -6.67e-01 |
Pos logit contributions | |
ge | +0.002 |
Twe | +0.002 |
OO | +0.002 |
obby | +0.002 |
Wil | +0.002 |
Neg logit contributions | |
ends | -0.002 |
cheer | -0.002 |
pretend | -0.001 |
wonder | -0.001 |
excited | -0.001 |
Token | padd |
Token ID | 14098 |
Feature activation | -6.93e-01 |
Pos logit contributions | |
ge | +0.014 |
Twe | +0.014 |
Wil | +0.013 |
obby | +0.013 |
OO | +0.012 |
Neg logit contributions | |
ends | -0.014 |
cheer | -0.013 |
pretend | -0.013 |
wonder | -0.012 |
e | -0.012 |
Token | . |
Token ID | 13 |
Feature activation | +2.19e-01 |
Pos logit contributions | |
pretend | +0.001 |
cheer | +0.001 |
ends | +0.001 |
like | +0.001 |
excited | +0.001 |
Neg logit contributions | |
Twe | -0.002 |
Wil | -0.002 |
ge | -0.002 |
Fe | -0.002 |
OO | -0.002 |
Token | He |
Token ID | 679 |
Feature activation | -3.25e-01 |
Pos logit contributions | |
ends | +0.003 |
excited | +0.003 |
pretend | +0.003 |
cheer | +0.003 |
Leo | +0.003 |
Neg logit contributions | |
ge | -0.007 |
OO | -0.006 |
badly | -0.006 |
rated | -0.006 |
poisonous | -0.005 |
Token | watched |
Token ID | 7342 |
Feature activation | +1.02e-02 |
Pos logit contributions | |
ge | +0.008 |
Twe | +0.008 |
Wil | +0.008 |
obby | +0.008 |
OO | +0.007 |
Neg logit contributions | |
ends | -0.005 |
cheer | -0.005 |
pretend | -0.005 |
excited | -0.005 |
wonder | -0.005 |
Token | them |
Token ID | 606 |
Feature activation | -9.73e-02 |
Pos logit contributions | |
cheer | +0.000 |
ends | +0.000 |
pretend | +0.000 |
excited | +0.000 |
e | +0.000 |
Neg logit contributions | |
ge | -0.000 |
Twe | -0.000 |
Wil | -0.000 |
obby | -0.000 |
OO | -0.000 |
Token | in |
Token ID | 287 |
Feature activation | -3.97e-01 |
Pos logit contributions | |
ge | +0.003 |
Twe | +0.002 |
obby | +0.002 |
Wil | +0.002 |
badly | +0.002 |
Neg logit contributions | |
ends | -0.002 |
cheer | -0.001 |
pretend | -0.001 |
reporters | -0.001 |
sill | -0.001 |
Token | amaz |
Token ID | 40642 |
Feature activation | +3.28e+00 |
Pos logit contributions | |
ge | +0.011 |
Twe | +0.010 |
Wil | +0.010 |
obby | +0.010 |
badly | +0.010 |
Neg logit contributions | |
ends | -0.006 |
cheer | -0.005 |
pretend | -0.005 |
reporters | -0.004 |
ibr | -0.004 |
Token | ement |
Token ID | 972 |
Feature activation | -2.85e-01 |
Pos logit contributions | |
ends | +0.085 |
cheer | +0.082 |
pretend | +0.078 |
wonder | +0.072 |
reporters | +0.072 |
Neg logit contributions | |
ge | -0.054 |
Twe | -0.049 |
badly | -0.048 |
obby | -0.048 |
Wil | -0.046 |
Token | and |
Token ID | 290 |
Feature activation | -1.43e-02 |
Pos logit contributions | |
ge | +0.007 |
Twe | +0.007 |
Wil | +0.007 |
obby | +0.007 |
OO | +0.007 |
Neg logit contributions | |
ends | -0.004 |
cheer | -0.004 |
pretend | -0.004 |
sill | -0.004 |
excited | -0.004 |
Token | even |
Token ID | 772 |
Feature activation | +8.28e-01 |
Pos logit contributions | |
ge | +0.000 |
Twe | +0.000 |
gie | +0.000 |
obby | +0.000 |
Wil | +0.000 |
Neg logit contributions | |
ends | -0.000 |
cheer | -0.000 |
pretend | -0.000 |
e | -0.000 |
reporters | -0.000 |
Token | laughed |
Token ID | 13818 |
Feature activation | +5.07e-02 |
Pos logit contributions | |
ends | +0.017 |
cheer | +0.015 |
pretend | +0.015 |
reporters | +0.014 |
e | +0.014 |
Neg logit contributions | |
ge | -0.018 |
Twe | -0.015 |
gie | -0.015 |
shadow | -0.015 |
obby | -0.014 |
Token | a |
Token ID | 257 |
Feature activation | -1.67e-01 |
Pos logit contributions | |
ends | +0.001 |
cheer | +0.001 |
pretend | +0.001 |
wonder | +0.001 |
e | +0.001 |
Neg logit contributions | |
ge | -0.001 |
Twe | -0.001 |
obby | -0.001 |
Wil | -0.001 |
badly | -0.001 |
Token | . |
Token ID | 13 |
Feature activation | -2.33e-01 |
Pos logit contributions | |
cheer | +0.008 |
pretend | +0.007 |
wonder | +0.007 |
ends | +0.007 |
like | +0.006 |
Neg logit contributions | |
ge | -0.014 |
Twe | -0.014 |
shadow | -0.013 |
obby | -0.013 |
Wil | -0.013 |
Token | Amy |
Token ID | 14235 |
Feature activation | +1.89e-01 |
Pos logit contributions | |
ge | +0.007 |
Twe | +0.006 |
Wil | +0.006 |
OO | +0.006 |
gie | +0.006 |
Neg logit contributions | |
ends | -0.004 |
cheer | -0.004 |
pretend | -0.003 |
excited | -0.003 |
sill | -0.003 |
Token | watched |
Token ID | 7342 |
Feature activation | -8.50e-03 |
Pos logit contributions | |
ends | +0.003 |
cheer | +0.003 |
pretend | +0.003 |
e | +0.003 |
wonder | +0.003 |
Neg logit contributions | |
ge | -0.005 |
Twe | -0.004 |
Wil | -0.004 |
obby | -0.004 |
OO | -0.004 |
Token | them |
Token ID | 606 |
Feature activation | -3.39e-01 |
Pos logit contributions | |
ge | +0.000 |
Twe | +0.000 |
obby | +0.000 |
Wil | +0.000 |
OO | +0.000 |
Neg logit contributions | |
cheer | -0.000 |
ends | -0.000 |
pretend | -0.000 |
excited | -0.000 |
e | -0.000 |
Token | with |
Token ID | 351 |
Feature activation | -4.22e-01 |
Pos logit contributions | |
ge | +0.009 |
Twe | +0.009 |
Wil | +0.008 |
obby | +0.008 |
OO | +0.008 |
Neg logit contributions | |
ends | -0.005 |
cheer | -0.005 |
pretend | -0.005 |
wonder | -0.004 |
sill | -0.004 |
Token | amaz |
Token ID | 40642 |
Feature activation | +3.27e+00 |
Pos logit contributions | |
Frankie | +0.008 |
Bobby | +0.008 |
ge | +0.008 |
badly | +0.008 |
Timothy | +0.007 |
Neg logit contributions | |
ends | -0.009 |
ibr | -0.008 |
ibrarian | -0.007 |
reporters | -0.007 |
iece | -0.007 |
Token | ement |
Token ID | 972 |
Feature activation | -2.42e-01 |
Pos logit contributions | |
ends | +0.073 |
cheer | +0.070 |
pretend | +0.068 |
wonder | +0.062 |
e | +0.062 |
Neg logit contributions | |
ge | -0.065 |
Twe | -0.061 |
obby | -0.058 |
Wil | -0.057 |
badly | -0.056 |
Token | . |
Token ID | 13 |
Feature activation | +1.01e-01 |
Pos logit contributions | |
Twe | +0.007 |
Wil | +0.007 |
ge | +0.006 |
Fe | +0.006 |
obby | +0.006 |
Neg logit contributions | |
ends | -0.004 |
cheer | -0.003 |
pretend | -0.003 |
excited | -0.003 |
sill | -0.003 |
Token | She |
Token ID | 1375 |
Feature activation | -8.48e-01 |
Pos logit contributions | |
ends | +0.002 |
cheer | +0.001 |
excited | +0.001 |
pretend | +0.001 |
wonder | +0.001 |
Neg logit contributions | |
ge | -0.003 |
Twe | -0.003 |
badly | -0.003 |
Wil | -0.003 |
OO | -0.003 |
Token | smiled |
Token ID | 13541 |
Feature activation | +5.39e-01 |
Pos logit contributions | |
ge | +0.019 |
Twe | +0.018 |
Wil | +0.017 |
obby | +0.017 |
OO | +0.017 |
Neg logit contributions | |
ends | -0.016 |
cheer | -0.016 |
pretend | -0.015 |
sill | -0.013 |
wonder | -0.013 |
Token | , |
Token ID | 11 |
Feature activation | -3.60e-01 |
Pos logit contributions | |
ends | +0.010 |
cheer | +0.010 |
e | +0.009 |
pretend | +0.009 |
ibrarian | +0.009 |
Neg logit contributions | |
ge | -0.012 |
badly | -0.011 |
obby | -0.011 |
Twe | -0.011 |
Twe | -0.010 |
Token | . |
Token ID | 13 |
Feature activation | -1.23e-01 |
Pos logit contributions | |
pretend | +0.003 |
cheer | +0.003 |
e | +0.003 |
like | +0.003 |
sill | +0.003 |
Neg logit contributions | |
Twe | -0.007 |
obby | -0.006 |
oun | -0.006 |
etsy | -0.006 |
Wil | -0.006 |
Token | He |
Token ID | 679 |
Feature activation | -3.99e-01 |
Pos logit contributions | |
ge | +0.004 |
arse | +0.004 |
rated | +0.004 |
matter | +0.003 |
Twe | +0.003 |
Neg logit contributions | |
excited | -0.002 |
Leo | -0.001 |
cheer | -0.001 |
see | -0.001 |
sill | -0.001 |
Token | watched |
Token ID | 7342 |
Feature activation | +1.09e-01 |
Pos logit contributions | |
ge | +0.011 |
obby | +0.010 |
Twe | +0.010 |
Wil | +0.010 |
Johnson | +0.010 |
Neg logit contributions | |
cheer | -0.006 |
pretend | -0.006 |
ends | -0.006 |
excited | -0.006 |
wonder | -0.005 |
Token | it |
Token ID | 340 |
Feature activation | -1.29e-01 |
Pos logit contributions | |
cheer | +0.002 |
pretend | +0.001 |
ends | +0.001 |
excited | +0.001 |
e | +0.001 |
Neg logit contributions | |
ge | -0.003 |
Twe | -0.003 |
obby | -0.003 |
Wil | -0.003 |
OO | -0.003 |
Token | in |
Token ID | 287 |
Feature activation | -3.07e-01 |
Pos logit contributions | |
ge | +0.003 |
Twe | +0.003 |
Wil | +0.003 |
badly | +0.003 |
OO | +0.003 |
Neg logit contributions | |
ends | -0.002 |
cheer | -0.002 |
pretend | -0.002 |
reporters | -0.002 |
e | -0.002 |
Token | amaz |
Token ID | 40642 |
Feature activation | +3.26e+00 |
Pos logit contributions | |
ge | +0.008 |
Twe | +0.007 |
Wil | +0.007 |
obby | +0.007 |
badly | +0.007 |
Neg logit contributions | |
ends | -0.005 |
cheer | -0.005 |
pretend | -0.005 |
reporters | -0.004 |
e | -0.004 |
Token | ement |
Token ID | 972 |
Feature activation | -9.20e-02 |
Pos logit contributions | |
ends | +0.080 |
cheer | +0.077 |
pretend | +0.075 |
wonder | +0.069 |
reporters | +0.068 |
Neg logit contributions | |
ge | -0.058 |
Twe | -0.053 |
obby | -0.052 |
badly | -0.051 |
Wil | -0.050 |
Token | as |
Token ID | 355 |
Feature activation | +7.26e-01 |
Pos logit contributions | |
Twe | +0.002 |
ge | +0.002 |
Wil | +0.002 |
obby | +0.002 |
OO | +0.002 |
Neg logit contributions | |
ends | -0.001 |
cheer | -0.001 |
pretend | -0.001 |
excited | -0.001 |
sill | -0.001 |
Token | it |
Token ID | 340 |
Feature activation | +5.99e-01 |
Pos logit contributions | |
cheer | +0.009 |
pretend | +0.009 |
excited | +0.008 |
ends | +0.008 |
wonder | +0.008 |
Neg logit contributions | |
ge | -0.022 |
Twe | -0.021 |
OO | -0.021 |
obby | -0.021 |
Wil | -0.020 |
Token | went |
Token ID | 1816 |
Feature activation | +2.64e-01 |
Pos logit contributions | |
ends | +0.014 |
cheer | +0.013 |
pretend | +0.013 |
reporters | +0.012 |
sill | +0.012 |
Neg logit contributions | |
ge | -0.011 |
Twe | -0.010 |
Wil | -0.010 |
gie | -0.009 |
badly | -0.009 |
Token | higher |
Token ID | 2440 |
Feature activation | +7.21e-01 |
Pos logit contributions | |
cheer | +0.004 |
ends | +0.004 |
pretend | +0.003 |
sill | +0.003 |
excited | +0.003 |
Neg logit contributions | |
ge | -0.008 |
obby | -0.007 |
Twe | -0.007 |
Wil | -0.007 |
OO | -0.007 |
Token | trick |
Token ID | 6908 |
Feature activation | +6.98e-01 |
Pos logit contributions | |
cheer | +0.013 |
pretend | +0.012 |
ends | +0.011 |
wonder | +0.010 |
e | +0.010 |
Neg logit contributions | |
ge | -0.019 |
Twe | -0.019 |
Wil | -0.018 |
obby | -0.018 |
OO | -0.017 |
Token | . |
Token ID | 13 |
Feature activation | -9.69e-02 |
Pos logit contributions | |
cheer | +0.012 |
ends | +0.012 |
pretend | +0.012 |
e | +0.011 |
sill | +0.010 |
Neg logit contributions | |
ge | -0.017 |
Twe | -0.017 |
Wil | -0.016 |
obby | -0.015 |
OO | -0.015 |
Token | Everyone |
Token ID | 11075 |
Feature activation | +1.44e-01 |
Pos logit contributions | |
ge | +0.003 |
OO | +0.002 |
Twe | +0.002 |
badly | +0.002 |
iously | +0.002 |
Neg logit contributions | |
ends | -0.002 |
cheer | -0.001 |
pretend | -0.001 |
excited | -0.001 |
see | -0.001 |
Token | watched |
Token ID | 7342 |
Feature activation | +2.34e-01 |
Pos logit contributions | |
ends | +0.003 |
cheer | +0.002 |
pretend | +0.002 |
sill | +0.002 |
reporters | +0.002 |
Neg logit contributions | |
ge | -0.003 |
Twe | -0.003 |
obby | -0.003 |
Wil | -0.003 |
badly | -0.003 |
Token | in |
Token ID | 287 |
Feature activation | -4.92e-01 |
Pos logit contributions | |
cheer | +0.003 |
pretend | +0.003 |
ends | +0.003 |
excited | +0.002 |
e | +0.002 |
Neg logit contributions | |
ge | -0.007 |
Twe | -0.007 |
OO | -0.006 |
obby | -0.006 |
Wil | -0.006 |
Token | amaz |
Token ID | 40642 |
Feature activation | +3.25e+00 |
Pos logit contributions | |
ge | +0.014 |
Twe | +0.013 |
obby | +0.013 |
badly | +0.013 |
Wil | +0.013 |
Neg logit contributions | |
ends | -0.007 |
cheer | -0.005 |
pretend | -0.005 |
reporters | -0.005 |
ibr | -0.005 |
Token | ement |
Token ID | 972 |
Feature activation | -1.07e-02 |
Pos logit contributions | |
ends | +0.080 |
cheer | +0.076 |
pretend | +0.073 |
sill | +0.067 |
wonder | +0.067 |
Neg logit contributions | |
ge | -0.058 |
Twe | -0.053 |
obby | -0.052 |
badly | -0.051 |
Wil | -0.051 |
Token | and |
Token ID | 290 |
Feature activation | +4.55e-02 |
Pos logit contributions | |
Twe | +0.000 |
Wil | +0.000 |
ge | +0.000 |
obby | +0.000 |
Fe | +0.000 |
Neg logit contributions | |
ends | -0.000 |
cheer | -0.000 |
pretend | -0.000 |
excited | -0.000 |
e | -0.000 |
Token | applauded |
Token ID | 38911 |
Feature activation | +5.22e-01 |
Pos logit contributions | |
ends | +0.001 |
pretend | +0.001 |
cheer | +0.001 |
ibrarian | +0.001 |
reporters | +0.001 |
Neg logit contributions | |
ge | -0.001 |
Bobby | -0.001 |
Frankie | -0.001 |
Twe | -0.001 |
gie | -0.001 |
Token | the |
Token ID | 262 |
Feature activation | +3.89e-02 |
Pos logit contributions | |
cheer | +0.008 |
pretend | +0.008 |
ends | +0.008 |
excited | +0.007 |
wonder | +0.007 |
Neg logit contributions | |
ge | -0.014 |
Twe | -0.013 |
OO | -0.013 |
Wil | -0.013 |
obby | -0.013 |
Token | young |
Token ID | 1862 |
Feature activation | +4.75e-01 |
Pos logit contributions | |
cheer | +0.001 |
pretend | +0.001 |
wonder | +0.001 |
excited | +0.001 |
ends | +0.001 |
Neg logit contributions | |
ge | -0.001 |
Twe | -0.001 |
obby | -0.001 |
Wil | -0.001 |
OO | -0.001 |
Token | all |
Token ID | 477 |
Feature activation | +3.19e-01 |
Pos logit contributions | |
cheer | +0.003 |
ends | +0.003 |
pretend | +0.003 |
wonder | +0.003 |
excited | +0.003 |
Neg logit contributions | |
ge | -0.006 |
Twe | -0.005 |
obby | -0.005 |
OO | -0.005 |
Wil | -0.005 |
Token | . |
Token ID | 13 |
Feature activation | +1.46e-01 |
Pos logit contributions | |
cheer | +0.005 |
pretend | +0.004 |
ends | +0.004 |
around | +0.004 |
wonder | +0.004 |
Neg logit contributions | |
Twe | -0.009 |
ge | -0.009 |
OO | -0.009 |
Wil | -0.008 |
obby | -0.008 |
Token | She |
Token ID | 1375 |
Feature activation | -4.64e-01 |
Pos logit contributions | |
ends | +0.002 |
pretend | +0.002 |
cheer | +0.002 |
wonder | +0.002 |
sill | +0.002 |
Neg logit contributions | |
ge | -0.004 |
Twe | -0.004 |
badly | -0.004 |
shadow | -0.004 |
OO | -0.004 |
Token | blinked |
Token ID | 40360 |
Feature activation | +5.93e-02 |
Pos logit contributions | |
ge | +0.010 |
Twe | +0.010 |
Wil | +0.009 |
obby | +0.009 |
OO | +0.009 |
Neg logit contributions | |
ends | -0.009 |
cheer | -0.009 |
pretend | -0.008 |
reporters | -0.008 |
sill | -0.008 |
Token | in |
Token ID | 287 |
Feature activation | -3.61e-01 |
Pos logit contributions | |
ends | +0.001 |
cheer | +0.001 |
pretend | +0.001 |
e | +0.001 |
reporters | +0.001 |
Neg logit contributions | |
ge | -0.001 |
Twe | -0.001 |
obby | -0.001 |
badly | -0.001 |
Wil | -0.001 |
Token | amaz |
Token ID | 40642 |
Feature activation | +3.23e+00 |
Pos logit contributions | |
ge | +0.010 |
Twe | +0.010 |
badly | +0.009 |
Wil | +0.009 |
obby | +0.009 |
Neg logit contributions | |
ends | -0.005 |
cheer | -0.004 |
reporters | -0.004 |
pretend | -0.003 |
ibr | -0.003 |
Token | ement |
Token ID | 972 |
Feature activation | +4.75e-02 |
Pos logit contributions | |
ends | +0.090 |
cheer | +0.084 |
pretend | +0.082 |
reporters | +0.076 |
e | +0.075 |
Neg logit contributions | |
ge | -0.047 |
badly | -0.043 |
Twe | -0.043 |
obby | -0.041 |
iously | -0.041 |
Token | as |
Token ID | 355 |
Feature activation | +1.00e+00 |
Pos logit contributions | |
ends | +0.001 |
cheer | +0.001 |
pretend | +0.001 |
excited | +0.000 |
wonder | +0.000 |
Neg logit contributions | |
Twe | -0.001 |
ge | -0.001 |
Wil | -0.001 |
obby | -0.001 |
OO | -0.001 |
Token | the |
Token ID | 262 |
Feature activation | +9.03e-03 |
Pos logit contributions | |
cheer | +0.014 |
ends | +0.014 |
pretend | +0.013 |
wonder | +0.012 |
excited | +0.012 |
Neg logit contributions | |
ge | -0.028 |
Twe | -0.027 |
obby | -0.026 |
Wil | -0.026 |
OO | -0.026 |
Token | pet |
Token ID | 4273 |
Feature activation | +5.27e-01 |
Pos logit contributions | |
cheer | +0.000 |
ends | +0.000 |
pretend | +0.000 |
wonder | +0.000 |
sill | +0.000 |
Neg logit contributions | |
ge | -0.000 |
Twe | -0.000 |
obby | -0.000 |
Wil | -0.000 |
OO | -0.000 |
Token | als |
Token ID | 874 |
Feature activation | +6.99e-01 |
Pos logit contributions | |
ends | +0.007 |
cheer | +0.007 |
pretend | +0.006 |
wonder | +0.005 |
e | +0.005 |
Neg logit contributions | |
ge | -0.016 |
Twe | -0.015 |
obby | -0.014 |
Wil | -0.014 |
OO | -0.014 |
Token | and |
Token ID | 290 |
Feature activation | -3.82e-02 |
Pos logit contributions | |
cheer | +0.002 |
ends | +0.002 |
pretend | +0.002 |
excited | +0.002 |
e | +0.002 |
Neg logit contributions | |
ge | -0.004 |
Twe | -0.004 |
Wil | -0.003 |
OO | -0.003 |
obby | -0.003 |
Token | M |
Token ID | 337 |
Feature activation | +4.06e-01 |
Pos logit contributions | |
ge | +0.001 |
Twe | +0.001 |
OO | +0.001 |
badly | +0.001 |
Wil | +0.001 |
Neg logit contributions | |
cheer | -0.001 |
pretend | -0.001 |
excited | -0.001 |
wonder | -0.001 |
sill | -0.001 |
Token | ummy |
Token ID | 13513 |
Feature activation | +8.27e-01 |
Pos logit contributions | |
sill | +0.008 |
cheer | +0.008 |
e | +0.008 |
ends | +0.008 |
ibrarian | +0.008 |
Neg logit contributions | |
ge | -0.009 |
shadow | -0.008 |
Twe | -0.008 |
Fe | -0.007 |
cheaply | -0.007 |
Token | gasped |
Token ID | 45236 |
Feature activation | +7.31e-01 |
Pos logit contributions | |
cheer | +0.014 |
ends | +0.014 |
pretend | +0.014 |
wonder | +0.013 |
e | +0.012 |
Neg logit contributions | |
ge | -0.020 |
Twe | -0.020 |
Wil | -0.019 |
obby | -0.018 |
OO | -0.018 |
Token | with |
Token ID | 351 |
Feature activation | -7.04e-03 |
Pos logit contributions | |
cheer | +0.010 |
ends | +0.009 |
pretend | +0.009 |
excited | +0.008 |
wonder | +0.008 |
Neg logit contributions | |
ge | -0.021 |
Twe | -0.021 |
Wil | -0.020 |
obby | -0.019 |
OO | -0.019 |
Token | amaz |
Token ID | 40642 |
Feature activation | +3.22e+00 |
Pos logit contributions | |
Frankie | +0.000 |
Bobby | +0.000 |
Timothy | +0.000 |
Joey | +0.000 |
Wil | +0.000 |
Neg logit contributions | |
ends | -0.000 |
iece | -0.000 |
ibr | -0.000 |
ibrarian | -0.000 |
pler | -0.000 |
Token | ement |
Token ID | 972 |
Feature activation | -2.10e-02 |
Pos logit contributions | |
ends | +0.073 |
cheer | +0.070 |
pretend | +0.068 |
wonder | +0.062 |
e | +0.062 |
Neg logit contributions | |
ge | -0.063 |
Twe | -0.059 |
obby | -0.056 |
Wil | -0.056 |
badly | -0.054 |
Token | . |
Token ID | 13 |
Feature activation | -8.03e-02 |
Pos logit contributions | |
Twe | +0.001 |
Wil | +0.001 |
Fe | +0.001 |
ge | +0.001 |
shadow | +0.001 |
Neg logit contributions | |
ends | -0.000 |
pretend | -0.000 |
cheer | -0.000 |
excited | -0.000 |
like | -0.000 |
Token |
|
Token ID | 198 |
Feature activation | -3.22e-01 |
Pos logit contributions | |
ge | +0.002 |
Twe | +0.002 |
badly | +0.002 |
OO | +0.002 |
Wil | +0.002 |
Neg logit contributions | |
ends | -0.001 |
cheer | -0.001 |
excited | -0.001 |
pretend | -0.001 |
wonder | -0.001 |
Token |
|
Token ID | 198 |
Feature activation | -2.36e-01 |
Pos logit contributions | |
ge | +0.009 |
Twe | +0.008 |
OO | +0.008 |
gie | +0.008 |
Wil | +0.008 |
Neg logit contributions | |
ends | -0.005 |
cheer | -0.005 |
pretend | -0.004 |
excited | -0.004 |
wonder | -0.004 |
Token | " |
Token ID | 1 |
Feature activation | +2.58e-01 |
Pos logit contributions | |
ge | +0.006 |
Twe | +0.005 |
Wil | +0.005 |
obby | +0.005 |
OO | +0.005 |
Neg logit contributions | |
ends | -0.004 |
cheer | -0.004 |
pretend | -0.004 |
wonder | -0.004 |
e | -0.004 |
Token | at |
Token ID | 379 |
Feature activation | +2.77e-01 |
Pos logit contributions | |
ends | +0.006 |
cheer | +0.006 |
pretend | +0.006 |
e | +0.005 |
reporters | +0.005 |
Neg logit contributions | |
ge | -0.014 |
Twe | -0.013 |
obby | -0.012 |
badly | -0.012 |
Wil | -0.012 |
Token | it |
Token ID | 340 |
Feature activation | +2.84e-01 |
Pos logit contributions | |
cheer | +0.004 |
pretend | +0.004 |
wonder | +0.003 |
ends | +0.003 |
sill | +0.003 |
Neg logit contributions | |
ge | -0.008 |
Twe | -0.008 |
OO | -0.008 |
Wil | -0.007 |
obby | -0.007 |
Token | with |
Token ID | 351 |
Feature activation | -2.58e-01 |
Pos logit contributions | |
ends | +0.004 |
cheer | +0.004 |
pretend | +0.004 |
excited | +0.003 |
wonder | +0.003 |
Neg logit contributions | |
ge | -0.008 |
Twe | -0.008 |
Wil | -0.008 |
obby | -0.007 |
OO | -0.007 |
Token | wonder |
Token ID | 4240 |
Feature activation | +1.63e-01 |
Pos logit contributions | |
ge | +0.006 |
Frankie | +0.006 |
Bobby | +0.006 |
badly | +0.005 |
Timothy | +0.005 |
Neg logit contributions | |
ends | -0.005 |
ibr | -0.004 |
reporters | -0.004 |
ibrarian | -0.004 |
pler | -0.004 |
Token | and |
Token ID | 290 |
Feature activation | +2.12e-01 |
Pos logit contributions | |
ends | +0.002 |
cheer | +0.002 |
pretend | +0.002 |
like | +0.002 |
wonder | +0.002 |
Neg logit contributions | |
Twe | -0.004 |
ge | -0.004 |
Wil | -0.004 |
obby | -0.004 |
OO | -0.004 |
Token | amaz |
Token ID | 40642 |
Feature activation | +3.21e+00 |
Pos logit contributions | |
ends | +0.004 |
cheer | +0.003 |
pretend | +0.003 |
reporters | +0.003 |
e | +0.003 |
Neg logit contributions | |
ge | -0.005 |
Twe | -0.004 |
badly | -0.004 |
obby | -0.004 |
Frankie | -0.004 |
Token | ement |
Token ID | 972 |
Feature activation | +3.43e-02 |
Pos logit contributions | |
ends | +0.078 |
cheer | +0.074 |
pretend | +0.072 |
wonder | +0.066 |
reporters | +0.066 |
Neg logit contributions | |
ge | -0.059 |
Twe | -0.055 |
badly | -0.052 |
Wil | -0.052 |
obby | -0.051 |
Token | . |
Token ID | 13 |
Feature activation | +3.14e-02 |
Pos logit contributions | |
ends | +0.001 |
cheer | +0.001 |
pretend | +0.001 |
excited | +0.000 |
sill | +0.000 |
Neg logit contributions | |
Twe | -0.001 |
ge | -0.001 |
Wil | -0.001 |
obby | -0.001 |
OO | -0.001 |
Token | He |
Token ID | 679 |
Feature activation | -6.48e-01 |
Pos logit contributions | |
ends | +0.000 |
excited | +0.000 |
cheer | +0.000 |
sill | +0.000 |
wonder | +0.000 |
Neg logit contributions | |
ge | -0.001 |
Twe | -0.001 |
badly | -0.001 |
OO | -0.001 |
rated | -0.001 |
Token | had |
Token ID | 550 |
Feature activation | +4.79e-01 |
Pos logit contributions | |
ge | +0.014 |
Twe | +0.013 |
Wil | +0.013 |
obby | +0.013 |
badly | +0.012 |
Neg logit contributions | |
ends | -0.013 |
cheer | -0.013 |
pretend | -0.012 |
sill | -0.011 |
reporters | -0.011 |
Token | wished |
Token ID | 16555 |
Feature activation | +3.26e-01 |
Pos logit contributions | |
ends | +0.007 |
cheer | +0.007 |
pretend | +0.007 |
wonder | +0.006 |
sill | +0.006 |
Neg logit contributions | |
ge | -0.013 |
Twe | -0.013 |
obby | -0.012 |
Wil | -0.012 |
OO | -0.012 |
Token | . |
Token ID | 13 |
Feature activation | -1.98e-01 |
Pos logit contributions | |
ge | +0.006 |
Twe | +0.006 |
Wil | +0.006 |
obby | +0.006 |
OO | +0.006 |
Neg logit contributions | |
ends | -0.005 |
cheer | -0.005 |
pretend | -0.005 |
e | -0.004 |
sill | -0.004 |
Token | The |
Token ID | 383 |
Feature activation | +1.05e-01 |
Pos logit contributions | |
ge | +0.006 |
OO | +0.005 |
badly | +0.005 |
Twe | +0.005 |
gie | +0.005 |
Neg logit contributions | |
ends | -0.003 |
cheer | -0.003 |
wonder | -0.003 |
excited | -0.002 |
sill | -0.002 |
Token | sheep |
Token ID | 15900 |
Feature activation | -6.41e-01 |
Pos logit contributions | |
ends | +0.002 |
cheer | +0.002 |
pretend | +0.002 |
e | +0.002 |
sill | +0.002 |
Neg logit contributions | |
ge | -0.002 |
Twe | -0.002 |
Wil | -0.002 |
obby | -0.002 |
gie | -0.002 |
Token | watched |
Token ID | 7342 |
Feature activation | +7.87e-02 |
Pos logit contributions | |
ge | +0.017 |
Twe | +0.016 |
OO | +0.015 |
obby | +0.015 |
Wil | +0.015 |
Neg logit contributions | |
cheer | -0.010 |
pretend | -0.010 |
ends | -0.010 |
excited | -0.009 |
wonder | -0.009 |
Token | in |
Token ID | 287 |
Feature activation | -6.97e-01 |
Pos logit contributions | |
cheer | +0.001 |
pretend | +0.001 |
ends | +0.001 |
excited | +0.001 |
e | +0.001 |
Neg logit contributions | |
ge | -0.002 |
Twe | -0.002 |
obby | -0.002 |
Wil | -0.002 |
OO | -0.002 |
Token | amaz |
Token ID | 40642 |
Feature activation | +3.20e+00 |
Pos logit contributions | |
ge | +0.021 |
Twe | +0.019 |
Wil | +0.019 |
obby | +0.019 |
badly | +0.019 |
Neg logit contributions | |
ends | -0.009 |
pretend | -0.006 |
cheer | -0.006 |
reporters | -0.006 |
ibr | -0.006 |
Token | ement |
Token ID | 972 |
Feature activation | -1.91e-02 |
Pos logit contributions | |
ends | +0.081 |
cheer | +0.076 |
pretend | +0.074 |
reporters | +0.068 |
e | +0.067 |
Neg logit contributions | |
ge | -0.055 |
Twe | -0.051 |
badly | -0.049 |
Wil | -0.049 |
obby | -0.049 |
Token | and |
Token ID | 290 |
Feature activation | -2.90e-01 |
Pos logit contributions | |
Twe | +0.001 |
ge | +0.001 |
Wil | +0.001 |
obby | +0.001 |
OO | +0.001 |
Neg logit contributions | |
ends | -0.000 |
cheer | -0.000 |
pretend | -0.000 |
excited | -0.000 |
sill | -0.000 |
Token | admired |
Token ID | 29382 |
Feature activation | +5.85e-01 |
Pos logit contributions | |
ge | +0.007 |
Twe | +0.006 |
obby | +0.006 |
Wil | +0.006 |
gie | +0.006 |
Neg logit contributions | |
ends | -0.005 |
cheer | -0.005 |
pretend | -0.005 |
reporters | -0.005 |
e | -0.004 |
Token | the |
Token ID | 262 |
Feature activation | -5.15e-02 |
Pos logit contributions | |
cheer | +0.009 |
ends | +0.008 |
pretend | +0.008 |
wonder | +0.007 |
excited | +0.007 |
Neg logit contributions | |
ge | -0.016 |
Twe | -0.016 |
obby | -0.015 |
Wil | -0.015 |
OO | -0.015 |
Token | flower |
Token ID | 15061 |
Feature activation | +2.82e-01 |
Pos logit contributions | |
ge | +0.001 |
Twe | +0.001 |
Wil | +0.001 |
obby | +0.001 |
OO | +0.001 |
Neg logit contributions | |
ends | -0.001 |
cheer | -0.001 |
pretend | -0.001 |
wonder | -0.001 |
e | -0.001 |
Token | friends |
Token ID | 2460 |
Feature activation | -2.93e-01 |
Pos logit contributions | |
ends | +0.008 |
cheer | +0.008 |
pretend | +0.008 |
wonder | +0.007 |
sill | +0.007 |
Neg logit contributions | |
ge | -0.015 |
Twe | -0.014 |
Wil | -0.013 |
obby | -0.013 |
OO | -0.013 |
Token | stopped |
Token ID | 5025 |
Feature activation | +4.41e-01 |
Pos logit contributions | |
ge | +0.008 |
Twe | +0.008 |
Wil | +0.008 |
OO | +0.007 |
obby | +0.007 |
Neg logit contributions | |
cheer | -0.005 |
pretend | -0.004 |
ends | -0.004 |
wonder | -0.004 |
see | -0.004 |
Token | and |
Token ID | 290 |
Feature activation | -1.07e-01 |
Pos logit contributions | |
cheer | +0.007 |
pretend | +0.006 |
ends | +0.006 |
wonder | +0.006 |
excited | +0.006 |
Neg logit contributions | |
Twe | -0.012 |
ge | -0.012 |
Wil | -0.011 |
obby | -0.011 |
OO | -0.011 |
Token | watched |
Token ID | 7342 |
Feature activation | -4.02e-02 |
Pos logit contributions | |
ge | +0.003 |
Twe | +0.003 |
Wil | +0.002 |
obby | +0.002 |
OO | +0.002 |
Neg logit contributions | |
ends | -0.002 |
cheer | -0.002 |
pretend | -0.002 |
wonder | -0.002 |
e | -0.002 |
Token | in |
Token ID | 287 |
Feature activation | -5.90e-01 |
Pos logit contributions | |
Twe | +0.001 |
ge | +0.001 |
Wil | +0.001 |
OO | +0.001 |
obby | +0.001 |
Neg logit contributions | |
cheer | -0.001 |
pretend | -0.001 |
ends | -0.001 |
excited | -0.000 |
e | -0.000 |
Token | amaz |
Token ID | 40642 |
Feature activation | +3.19e+00 |
Pos logit contributions | |
ge | +0.016 |
obby | +0.014 |
Twe | +0.014 |
badly | +0.014 |
Wil | +0.014 |
Neg logit contributions | |
ends | -0.010 |
cheer | -0.008 |
pretend | -0.008 |
e | -0.007 |
reporters | -0.007 |
Token | ement |
Token ID | 972 |
Feature activation | -4.22e-01 |
Pos logit contributions | |
ends | +0.083 |
cheer | +0.080 |
pretend | +0.077 |
e | +0.071 |
wonder | +0.071 |
Neg logit contributions | |
ge | -0.052 |
obby | -0.046 |
badly | -0.046 |
Twe | -0.046 |
Wil | -0.043 |
Token | . |
Token ID | 13 |
Feature activation | -2.67e-01 |
Pos logit contributions | |
Twe | +0.011 |
ge | +0.011 |
Wil | +0.011 |
obby | +0.010 |
OO | +0.010 |
Neg logit contributions | |
ends | -0.007 |
cheer | -0.006 |
pretend | -0.006 |
excited | -0.006 |
sill | -0.005 |
Token | The |
Token ID | 383 |
Feature activation | -3.16e-01 |
Pos logit contributions | |
ge | +0.007 |
Twe | +0.007 |
Wil | +0.006 |
badly | +0.006 |
OO | +0.006 |
Neg logit contributions | |
ends | -0.005 |
pretend | -0.004 |
cheer | -0.004 |
excited | -0.004 |
wonder | -0.004 |
Token | dog |
Token ID | 3290 |
Feature activation | +1.62e-01 |
Pos logit contributions | |
ge | +0.009 |
Twe | +0.008 |
Wil | +0.008 |
obby | +0.008 |
OO | +0.008 |
Neg logit contributions | |
cheer | -0.005 |
pretend | -0.005 |
ends | -0.005 |
wonder | -0.004 |
sill | -0.004 |
Token | too |
Token ID | 1165 |
Feature activation | +3.00e-01 |
Pos logit contributions | |
cheer | +0.003 |
ends | +0.003 |
pretend | +0.003 |
excited | +0.002 |
wonder | +0.002 |
Neg logit contributions | |
ge | -0.004 |
Twe | -0.004 |
Wil | -0.004 |
obby | -0.004 |
OO | -0.004 |
Token | as |
Token ID | 355 |
Feature activation | +1.11e+00 |
Pos logit contributions | |
ge | +0.000 |
Twe | +0.000 |
Wil | +0.000 |
obby | +0.000 |
OO | +0.000 |
Neg logit contributions | |
ends | -0.000 |
cheer | -0.000 |
pretend | -0.000 |
wonder | -0.000 |
sill | -0.000 |
Token | the |
Token ID | 262 |
Feature activation | +5.05e-01 |
Pos logit contributions | |
cheer | +0.016 |
ends | +0.016 |
pretend | +0.015 |
excited | +0.014 |
wonder | +0.013 |
Neg logit contributions | |
ge | -0.030 |
Twe | -0.030 |
OO | -0.030 |
obby | -0.029 |
Wil | -0.028 |
Token | woman |
Token ID | 2415 |
Feature activation | -3.95e-01 |
Pos logit contributions | |
cheer | +0.009 |
pretend | +0.008 |
sill | +0.007 |
wonder | +0.007 |
excited | +0.007 |
Neg logit contributions | |
ge | -0.014 |
Twe | -0.013 |
OO | -0.013 |
obby | -0.013 |
Wil | -0.013 |
Token | watched |
Token ID | 7342 |
Feature activation | -3.40e-02 |
Pos logit contributions | |
ge | +0.010 |
Twe | +0.009 |
Wil | +0.009 |
obby | +0.009 |
OO | +0.009 |
Neg logit contributions | |
cheer | -0.007 |
ends | -0.007 |
pretend | -0.007 |
excited | -0.006 |
e | -0.006 |
Token | in |
Token ID | 287 |
Feature activation | -6.35e-01 |
Pos logit contributions | |
ge | +0.001 |
Twe | +0.001 |
obby | +0.001 |
OO | +0.001 |
Wil | +0.001 |
Neg logit contributions | |
cheer | -0.000 |
pretend | -0.000 |
ends | -0.000 |
excited | -0.000 |
e | -0.000 |
Token | amaz |
Token ID | 40642 |
Feature activation | +3.18e+00 |
Pos logit contributions | |
ge | +0.018 |
Twe | +0.017 |
Wil | +0.016 |
obby | +0.016 |
badly | +0.016 |
Neg logit contributions | |
ends | -0.009 |
cheer | -0.008 |
pretend | -0.008 |
reporters | -0.007 |
e | -0.007 |
Token | ement |
Token ID | 972 |
Feature activation | -3.83e-03 |
Pos logit contributions | |
ends | +0.073 |
cheer | +0.070 |
pretend | +0.068 |
wonder | +0.062 |
e | +0.062 |
Neg logit contributions | |
ge | -0.062 |
Twe | -0.058 |
obby | -0.055 |
Wil | -0.054 |
badly | -0.053 |
Token | . |
Token ID | 13 |
Feature activation | +7.01e-03 |
Pos logit contributions | |
Twe | +0.000 |
Wil | +0.000 |
ge | +0.000 |
obby | +0.000 |
OO | +0.000 |
Neg logit contributions | |
ends | -0.000 |
cheer | -0.000 |
pretend | -0.000 |
excited | -0.000 |
sill | -0.000 |
Token | She |
Token ID | 1375 |
Feature activation | -3.55e-01 |
Pos logit contributions | |
ends | +0.000 |
cheer | +0.000 |
sill | +0.000 |
excited | +0.000 |
wonder | +0.000 |
Neg logit contributions | |
ge | -0.000 |
Twe | -0.000 |
badly | -0.000 |
OO | -0.000 |
poisonous | -0.000 |
Token | saw |
Token ID | 2497 |
Feature activation | +9.04e-01 |
Pos logit contributions | |
ge | +0.008 |
Twe | +0.008 |
Wil | +0.007 |
obby | +0.007 |
OO | +0.007 |
Neg logit contributions | |
ends | -0.007 |
cheer | -0.007 |
pretend | -0.006 |
wonder | -0.006 |
e | -0.006 |
Token | the |
Token ID | 262 |
Feature activation | +3.77e-01 |
Pos logit contributions | |
ends | +0.016 |
cheer | +0.015 |
pretend | +0.014 |
e | +0.013 |
wonder | +0.013 |
Neg logit contributions | |
ge | -0.022 |
Twe | -0.021 |
Wil | -0.020 |
obby | -0.020 |
badly | -0.019 |
Token | , |
Token ID | 11 |
Feature activation | -1.36e-01 |
Pos logit contributions | |
ge | +0.011 |
Twe | +0.011 |
Wil | +0.010 |
obby | +0.010 |
badly | +0.010 |
Neg logit contributions | |
ends | -0.007 |
cheer | -0.007 |
pretend | -0.007 |
wonder | -0.006 |
e | -0.006 |
Token | next |
Token ID | 1306 |
Feature activation | +1.10e-01 |
Pos logit contributions | |
ge | +0.003 |
Twe | +0.003 |
Wil | +0.003 |
badly | +0.003 |
gie | +0.003 |
Neg logit contributions | |
ends | -0.002 |
cheer | -0.002 |
pretend | -0.002 |
reporters | -0.002 |
e | -0.002 |
Token | to |
Token ID | 284 |
Feature activation | +2.62e-01 |
Pos logit contributions | |
ends | +0.002 |
cheer | +0.002 |
pretend | +0.002 |
wonder | +0.002 |
e | +0.002 |
Neg logit contributions | |
ge | -0.002 |
Twe | -0.002 |
Wil | -0.002 |
obby | -0.002 |
OO | -0.002 |
Token | her |
Token ID | 607 |
Feature activation | +1.28e-02 |
Pos logit contributions | |
ends | +0.004 |
cheer | +0.004 |
pretend | +0.003 |
wonder | +0.003 |
e | +0.003 |
Neg logit contributions | |
ge | -0.007 |
Twe | -0.007 |
Wil | -0.007 |
obby | -0.007 |
OO | -0.007 |
Token | magn |
Token ID | 7842 |
Feature activation | -6.13e-01 |
Pos logit contributions | |
ends | +0.000 |
cheer | +0.000 |
reporters | +0.000 |
pretend | +0.000 |
e | +0.000 |
Neg logit contributions | |
ge | -0.000 |
badly | -0.000 |
obby | -0.000 |
Twe | -0.000 |
Johnson | -0.000 |
Token | ifying |
Token ID | 4035 |
Feature activation | -2.98e+00 |
Pos logit contributions | |
Twe | +0.008 |
ge | +0.008 |
Wil | +0.007 |
gie | +0.006 |
Timothy | +0.006 |
Neg logit contributions | |
ends | -0.018 |
cheer | -0.017 |
pretend | -0.017 |
e | -0.016 |
sill | -0.016 |
Token | glass |
Token ID | 5405 |
Feature activation | +2.10e-01 |
Pos logit contributions | |
ge | +0.066 |
Twe | +0.060 |
obby | +0.056 |
Wil | +0.056 |
gie | +0.054 |
Neg logit contributions | |
cheer | -0.062 |
ends | -0.060 |
pretend | -0.060 |
sill | -0.058 |
wonder | -0.055 |
Token | . |
Token ID | 13 |
Feature activation | +3.77e-01 |
Pos logit contributions | |
ends | +0.004 |
cheer | +0.004 |
pretend | +0.004 |
wonder | +0.003 |
e | +0.003 |
Neg logit contributions | |
ge | -0.005 |
Twe | -0.005 |
Wil | -0.005 |
obby | -0.005 |
OO | -0.004 |
Token | She |
Token ID | 1375 |
Feature activation | -4.71e-01 |
Pos logit contributions | |
ends | +0.008 |
cheer | +0.008 |
pretend | +0.007 |
e | +0.007 |
reporters | +0.007 |
Neg logit contributions | |
ge | -0.008 |
Twe | -0.007 |
obby | -0.007 |
Wil | -0.007 |
OO | -0.007 |
Token | looked |
Token ID | 3114 |
Feature activation | +3.33e-01 |
Pos logit contributions | |
ge | +0.009 |
Twe | +0.008 |
Fe | +0.008 |
obby | +0.008 |
Wil | +0.008 |
Neg logit contributions | |
cheer | -0.010 |
ends | -0.010 |
reporters | -0.009 |
sill | -0.009 |
iece | -0.009 |
Token | at |
Token ID | 379 |
Feature activation | +2.58e-01 |
Pos logit contributions | |
cheer | +0.005 |
ends | +0.005 |
e | +0.004 |
sill | +0.004 |
reporters | +0.004 |
Neg logit contributions | |
ge | -0.008 |
badly | -0.008 |
Twe | -0.008 |
obby | -0.008 |
Bobby | -0.007 |
Token | dolls |
Token ID | 36062 |
Feature activation | -9.40e-01 |
Pos logit contributions | |
ge | +0.002 |
Twe | +0.002 |
Wil | +0.002 |
obby | +0.002 |
OO | +0.002 |
Neg logit contributions | |
ends | -0.002 |
cheer | -0.002 |
pretend | -0.002 |
sill | -0.001 |
wonder | -0.001 |
Token | around |
Token ID | 1088 |
Feature activation | -4.23e-01 |
Pos logit contributions | |
ge | +0.021 |
badly | +0.018 |
way | +0.017 |
run | +0.015 |
OO | +0.015 |
Neg logit contributions | |
ends | -0.018 |
wonder | -0.018 |
cheer | -0.018 |
amaz | -0.017 |
reporters | -0.017 |
Token | in |
Token ID | 287 |
Feature activation | -6.25e-01 |
Pos logit contributions | |
ge | +0.010 |
Twe | +0.009 |
badly | +0.009 |
Wil | +0.009 |
obby | +0.009 |
Neg logit contributions | |
ends | -0.008 |
cheer | -0.007 |
pretend | -0.006 |
sill | -0.006 |
reporters | -0.006 |
Token | a |
Token ID | 257 |
Feature activation | -3.94e-01 |
Pos logit contributions | |
ge | +0.016 |
Twe | +0.014 |
badly | +0.014 |
Wil | +0.014 |
obby | +0.013 |
Neg logit contributions | |
ends | -0.011 |
cheer | -0.010 |
pretend | -0.009 |
reporters | -0.009 |
ibrarian | -0.009 |
Token | wheel |
Token ID | 7825 |
Feature activation | -9.56e-01 |
Pos logit contributions | |
Twe | +0.008 |
ge | +0.008 |
badly | +0.008 |
OO | +0.008 |
Wil | +0.008 |
Neg logit contributions | |
ends | -0.008 |
cheer | -0.007 |
sill | -0.006 |
pretend | -0.006 |
reporters | -0.006 |
Token | bar |
Token ID | 5657 |
Feature activation | -2.91e+00 |
Pos logit contributions | |
ge | +0.023 |
Twe | +0.023 |
obby | +0.021 |
Wil | +0.021 |
OO | +0.021 |
Neg logit contributions | |
ends | -0.017 |
cheer | -0.016 |
pretend | -0.016 |
sill | -0.014 |
e | -0.014 |
Token | row |
Token ID | 808 |
Feature activation | -1.15e+00 |
Pos logit contributions | |
ge | +0.056 |
Twe | +0.055 |
obby | +0.051 |
Wil | +0.051 |
badly | +0.048 |
Neg logit contributions | |
ends | -0.063 |
cheer | -0.062 |
pretend | -0.060 |
e | -0.057 |
sill | -0.057 |
Token | she |
Token ID | 673 |
Feature activation | -4.54e-01 |
Pos logit contributions | |
ge | +0.028 |
Twe | +0.027 |
Wil | +0.026 |
obby | +0.025 |
OO | +0.025 |
Neg logit contributions | |
ends | -0.020 |
cheer | -0.020 |
pretend | -0.019 |
sill | -0.017 |
wonder | -0.017 |
Token | had |
Token ID | 550 |
Feature activation | -7.49e-01 |
Pos logit contributions | |
ge | +0.011 |
Twe | +0.010 |
Wil | +0.010 |
obby | +0.010 |
OO | +0.010 |
Neg logit contributions | |
ends | -0.008 |
cheer | -0.008 |
pretend | -0.008 |
wonder | -0.007 |
e | -0.007 |
Token | found |
Token ID | 1043 |
Feature activation | -6.41e-02 |
Pos logit contributions | |
obby | +0.021 |
Twe | +0.020 |
ge | +0.020 |
Frankie | +0.020 |
gie | +0.019 |
Neg logit contributions | |
ends | -0.012 |
pretend | -0.011 |
drawn | -0.010 |
like | -0.010 |
e | -0.010 |
Token | in |
Token ID | 287 |
Feature activation | -4.72e-01 |
Pos logit contributions | |
ge | +0.002 |
Twe | +0.002 |
obby | +0.002 |
Wil | +0.002 |
gie | +0.002 |
Neg logit contributions | |
ends | -0.001 |
e | -0.001 |
cheer | -0.001 |
pretend | -0.001 |
like | -0.001 |
Token | hat |
Token ID | 6877 |
Feature activation | -4.96e-01 |
Pos logit contributions | |
ge | +0.022 |
Twe | +0.021 |
obby | +0.020 |
Wil | +0.020 |
OO | +0.019 |
Neg logit contributions | |
pretend | -0.014 |
cheer | -0.013 |
ends | -0.013 |
excited | -0.012 |
e | -0.011 |
Token | and |
Token ID | 290 |
Feature activation | +2.97e-01 |
Pos logit contributions | |
ge | +0.010 |
badly | +0.009 |
Twe | +0.009 |
Johnson | +0.009 |
gie | +0.009 |
Neg logit contributions | |
ends | -0.010 |
cheer | -0.010 |
wonder | -0.009 |
pretend | -0.009 |
reporters | -0.009 |
Token | grabbed |
Token ID | 13176 |
Feature activation | -3.80e-01 |
Pos logit contributions | |
ends | +0.006 |
cheer | +0.005 |
pretend | +0.005 |
sill | +0.005 |
reporters | +0.005 |
Neg logit contributions | |
ge | -0.007 |
Twe | -0.006 |
Wil | -0.006 |
gie | -0.006 |
obby | -0.006 |
Token | her |
Token ID | 607 |
Feature activation | -1.46e-01 |
Pos logit contributions | |
ge | +0.009 |
Twe | +0.009 |
badly | +0.008 |
Wil | +0.008 |
obby | +0.008 |
Neg logit contributions | |
ends | -0.006 |
cheer | -0.006 |
pretend | -0.006 |
reporters | -0.005 |
wonder | -0.005 |
Token | magn |
Token ID | 7842 |
Feature activation | -6.11e-01 |
Pos logit contributions | |
ge | +0.004 |
Twe | +0.004 |
Wil | +0.003 |
obby | +0.003 |
OO | +0.003 |
Neg logit contributions | |
pretend | -0.002 |
ends | -0.002 |
cheer | -0.002 |
e | -0.002 |
excited | -0.002 |
Token | ifying |
Token ID | 4035 |
Feature activation | -2.88e+00 |
Pos logit contributions | |
ge | +0.007 |
Twe | +0.006 |
Wil | +0.006 |
obby | +0.005 |
OO | +0.005 |
Neg logit contributions | |
ends | -0.019 |
cheer | -0.018 |
pretend | -0.018 |
e | -0.017 |
sill | -0.017 |
Token | glass |
Token ID | 5405 |
Feature activation | +5.21e-02 |
Pos logit contributions | |
ge | +0.060 |
Twe | +0.055 |
Wil | +0.052 |
obby | +0.052 |
gie | +0.049 |
Neg logit contributions | |
cheer | -0.062 |
ends | -0.062 |
pretend | -0.060 |
sill | -0.056 |
wonder | -0.056 |
Token | . |
Token ID | 13 |
Feature activation | +4.20e-01 |
Pos logit contributions | |
ends | +0.001 |
cheer | +0.001 |
pretend | +0.001 |
wonder | +0.001 |
reporters | +0.001 |
Neg logit contributions | |
ge | -0.001 |
Twe | -0.001 |
obby | -0.001 |
Wil | -0.001 |
badly | -0.001 |
Token | She |
Token ID | 1375 |
Feature activation | -5.56e-01 |
Pos logit contributions | |
ends | +0.008 |
cheer | +0.008 |
pretend | +0.008 |
e | +0.007 |
wonder | +0.007 |
Neg logit contributions | |
ge | -0.009 |
Twe | -0.009 |
Wil | -0.008 |
obby | -0.008 |
OO | -0.008 |
Token | was |
Token ID | 373 |
Feature activation | +3.55e-01 |
Pos logit contributions | |
ge | +0.011 |
Twe | +0.011 |
badly | +0.010 |
Wil | +0.010 |
Fe | +0.009 |
Neg logit contributions | |
ends | -0.012 |
cheer | -0.011 |
reporters | -0.010 |
pretend | -0.010 |
forward | -0.010 |
Token | ready |
Token ID | 3492 |
Feature activation | -7.88e-01 |
Pos logit contributions | |
cheer | +0.005 |
pretend | +0.005 |
reporters | +0.005 |
wonder | +0.005 |
communicate | +0.005 |
Neg logit contributions | |
badly | -0.008 |
shadow | -0.008 |
ge | -0.008 |
Twe | -0.007 |
Wil | -0.007 |
Token | She |
Token ID | 1375 |
Feature activation | -1.79e-01 |
Pos logit contributions | |
ends | +0.014 |
cheer | +0.014 |
pretend | +0.013 |
excited | +0.012 |
wonder | +0.012 |
Neg logit contributions | |
ge | -0.019 |
Twe | -0.018 |
Wil | -0.017 |
obby | -0.017 |
OO | -0.017 |
Token | takes |
Token ID | 2753 |
Feature activation | -3.59e-01 |
Pos logit contributions | |
ge | +0.003 |
Twe | +0.003 |
obby | +0.003 |
OO | +0.003 |
Wil | +0.003 |
Neg logit contributions | |
ends | -0.004 |
cheer | -0.004 |
pretend | -0.003 |
e | -0.003 |
sill | -0.003 |
Token | out |
Token ID | 503 |
Feature activation | -8.16e-02 |
Pos logit contributions | |
ge | +0.008 |
Wil | +0.008 |
Twe | +0.008 |
obby | +0.008 |
Wil | +0.008 |
Neg logit contributions | |
ends | -0.006 |
cheer | -0.006 |
pretend | -0.005 |
wonder | -0.005 |
reporters | -0.005 |
Token | a |
Token ID | 257 |
Feature activation | -3.39e-01 |
Pos logit contributions | |
Twe | +0.002 |
ge | +0.002 |
obby | +0.002 |
OO | +0.002 |
Wil | +0.002 |
Neg logit contributions | |
cheer | -0.001 |
pretend | -0.001 |
ends | -0.001 |
excited | -0.001 |
wonder | -0.001 |
Token | magn |
Token ID | 7842 |
Feature activation | +1.28e-01 |
Pos logit contributions | |
ge | +0.008 |
Twe | +0.008 |
Wil | +0.007 |
obby | +0.007 |
gie | +0.007 |
Neg logit contributions | |
cheer | -0.006 |
ends | -0.006 |
pretend | -0.006 |
wonder | -0.005 |
excited | -0.005 |
Token | ifying |
Token ID | 4035 |
Feature activation | -2.86e+00 |
Pos logit contributions | |
ends | +0.004 |
cheer | +0.004 |
pretend | +0.004 |
wonder | +0.004 |
e | +0.004 |
Neg logit contributions | |
ge | -0.001 |
Twe | -0.001 |
Wil | -0.001 |
obby | -0.001 |
OO | -0.001 |
Token | glass |
Token ID | 5405 |
Feature activation | +4.72e-01 |
Pos logit contributions | |
ge | +0.064 |
Twe | +0.059 |
obby | +0.055 |
Wil | +0.054 |
gie | +0.053 |
Neg logit contributions | |
cheer | -0.058 |
pretend | -0.057 |
ends | -0.057 |
sill | -0.054 |
wonder | -0.052 |
Token | . |
Token ID | 13 |
Feature activation | +5.82e-01 |
Pos logit contributions | |
ends | +0.010 |
cheer | +0.009 |
pretend | +0.009 |
reporters | +0.008 |
e | +0.008 |
Neg logit contributions | |
ge | -0.010 |
badly | -0.009 |
Twe | -0.009 |
Wil | -0.009 |
obby | -0.009 |
Token | She |
Token ID | 1375 |
Feature activation | -2.18e-01 |
Pos logit contributions | |
ends | +0.010 |
cheer | +0.009 |
pretend | +0.009 |
wonder | +0.008 |
excited | +0.008 |
Neg logit contributions | |
ge | -0.015 |
Twe | -0.014 |
badly | -0.013 |
OO | -0.013 |
Wil | -0.013 |
Token | looks |
Token ID | 3073 |
Feature activation | +7.60e-01 |
Pos logit contributions | |
ge | +0.004 |
Fe | +0.004 |
OO | +0.004 |
obby | +0.003 |
Twe | +0.003 |
Neg logit contributions | |
ends | -0.005 |
cheer | -0.005 |
sill | -0.004 |
reporters | -0.004 |
e | -0.004 |
Token | at |
Token ID | 379 |
Feature activation | +1.63e-01 |
Pos logit contributions | |
ends | +0.011 |
cheer | +0.010 |
e | +0.010 |
sill | +0.009 |
amer | +0.009 |
Neg logit contributions | |
ge | -0.019 |
badly | -0.018 |
cheaply | -0.017 |
Twe | -0.017 |
obby | -0.017 |
Token | flashlight |
Token ID | 38371 |
Feature activation | +2.51e-01 |
Pos logit contributions | |
ge | +0.016 |
Twe | +0.015 |
obby | +0.014 |
Wil | +0.014 |
iously | +0.014 |
Neg logit contributions | |
cheer | -0.011 |
pretend | -0.011 |
ends | -0.010 |
wonder | -0.009 |
excited | -0.009 |
Token | , |
Token ID | 11 |
Feature activation | -1.88e-01 |
Pos logit contributions | |
pretend | +0.004 |
cheer | +0.004 |
ends | +0.003 |
sill | +0.003 |
excited | +0.003 |
Neg logit contributions | |
Twe | -0.007 |
ge | -0.007 |
obby | -0.007 |
Wil | -0.007 |
OO | -0.006 |
Token | and |
Token ID | 290 |
Feature activation | +6.45e-02 |
Pos logit contributions | |
ge | +0.005 |
Twe | +0.005 |
obby | +0.005 |
OO | +0.005 |
Wil | +0.005 |
Neg logit contributions | |
ends | -0.003 |
pretend | -0.003 |
cheer | -0.002 |
wonder | -0.002 |
sill | -0.002 |
Token | a |
Token ID | 257 |
Feature activation | -7.66e-01 |
Pos logit contributions | |
ends | +0.001 |
cheer | +0.001 |
pretend | +0.001 |
wonder | +0.001 |
sill | +0.001 |
Neg logit contributions | |
ge | -0.002 |
Twe | -0.002 |
obby | -0.002 |
Wil | -0.002 |
OO | -0.002 |
Token | magn |
Token ID | 7842 |
Feature activation | +4.13e-01 |
Pos logit contributions | |
ge | +0.019 |
Twe | +0.018 |
obby | +0.017 |
Wil | +0.017 |
OO | +0.017 |
Neg logit contributions | |
cheer | -0.014 |
pretend | -0.014 |
ends | -0.013 |
wonder | -0.012 |
excited | -0.011 |
Token | ifying |
Token ID | 4035 |
Feature activation | -2.81e+00 |
Pos logit contributions | |
ends | +0.011 |
cheer | +0.010 |
pretend | +0.010 |
e | +0.010 |
sill | +0.010 |
Neg logit contributions | |
Twe | -0.006 |
ge | -0.006 |
Wil | -0.006 |
Fe | -0.005 |
gie | -0.005 |
Token | glass |
Token ID | 5405 |
Feature activation | +7.84e-01 |
Pos logit contributions | |
ge | +0.064 |
Twe | +0.059 |
obby | +0.056 |
Wil | +0.055 |
gie | +0.053 |
Neg logit contributions | |
cheer | -0.056 |
pretend | -0.055 |
ends | -0.054 |
sill | -0.051 |
wonder | -0.050 |
Token | . |
Token ID | 13 |
Feature activation | +1.56e-01 |
Pos logit contributions | |
cheer | +0.014 |
ends | +0.013 |
pretend | +0.013 |
sill | +0.012 |
e | +0.011 |
Neg logit contributions | |
ge | -0.019 |
Twe | -0.019 |
Wil | -0.018 |
obby | -0.018 |
OO | -0.017 |
Token |
|
Token ID | 198 |
Feature activation | +1.29e-01 |
Pos logit contributions | |
ends | +0.003 |
pretend | +0.002 |
cheer | +0.002 |
sill | +0.002 |
excited | +0.002 |
Neg logit contributions | |
ge | -0.005 |
OO | -0.004 |
badly | -0.004 |
Twe | -0.004 |
gie | -0.004 |
Token |
|
Token ID | 198 |
Feature activation | +3.22e-02 |
Pos logit contributions | |
ends | +0.002 |
cheer | +0.002 |
pretend | +0.002 |
wonder | +0.002 |
excited | +0.002 |
Neg logit contributions | |
ge | -0.004 |
Twe | -0.003 |
OO | -0.003 |
gie | -0.003 |
iously | -0.003 |
Token | As |
Token ID | 1722 |
Feature activation | -4.95e-02 |
Pos logit contributions | |
ends | +0.001 |
cheer | +0.001 |
pretend | +0.001 |
e | +0.000 |
wonder | +0.000 |
Neg logit contributions | |
ge | -0.001 |
Twe | -0.001 |
obby | -0.001 |
Wil | -0.001 |
gie | -0.001 |
Token | borrowed |
Token ID | 22546 |
Feature activation | -5.03e-01 |
Pos logit contributions | |
ge | +0.023 |
Twe | +0.021 |
obby | +0.020 |
Wil | +0.020 |
OO | +0.020 |
Neg logit contributions | |
cheer | -0.014 |
ends | -0.014 |
pretend | -0.014 |
excited | -0.013 |
wonder | -0.013 |
Token | her |
Token ID | 607 |
Feature activation | +5.80e-01 |
Pos logit contributions | |
ge | +0.010 |
Twe | +0.010 |
Wil | +0.010 |
Twe | +0.010 |
badly | +0.010 |
Neg logit contributions | |
ends | -0.010 |
cheer | -0.009 |
pretend | -0.009 |
reporters | -0.008 |
sill | -0.008 |
Token | mom |
Token ID | 1995 |
Feature activation | +5.87e-01 |
Pos logit contributions | |
ends | +0.011 |
cheer | +0.011 |
pretend | +0.011 |
wonder | +0.010 |
e | +0.009 |
Neg logit contributions | |
ge | -0.013 |
Twe | -0.013 |
Wil | -0.012 |
obby | -0.012 |
OO | -0.012 |
Token | 's |
Token ID | 338 |
Feature activation | -1.37e-01 |
Pos logit contributions | |
ends | +0.012 |
cheer | +0.012 |
pretend | +0.012 |
wonder | +0.011 |
e | +0.011 |
Neg logit contributions | |
ge | -0.012 |
Twe | -0.012 |
Wil | -0.011 |
obby | -0.011 |
OO | -0.010 |
Token | magn |
Token ID | 7842 |
Feature activation | -4.46e-01 |
Pos logit contributions | |
ge | +0.003 |
Twe | +0.003 |
Wil | +0.003 |
obby | +0.003 |
OO | +0.003 |
Neg logit contributions | |
ends | -0.003 |
cheer | -0.003 |
pretend | -0.003 |
wonder | -0.002 |
e | -0.002 |
Token | ifying |
Token ID | 4035 |
Feature activation | -2.77e+00 |
Pos logit contributions | |
Twe | +0.006 |
ge | +0.006 |
Wil | +0.005 |
obby | +0.004 |
gie | +0.004 |
Neg logit contributions | |
ends | -0.013 |
cheer | -0.012 |
pretend | -0.012 |
e | -0.012 |
sill | -0.012 |
Token | glass |
Token ID | 5405 |
Feature activation | +4.46e-01 |
Pos logit contributions | |
ge | +0.058 |
Twe | +0.053 |
obby | +0.050 |
Wil | +0.050 |
gie | +0.047 |
Neg logit contributions | |
cheer | -0.059 |
ends | -0.059 |
pretend | -0.057 |
sill | -0.054 |
wonder | -0.053 |
Token | to |
Token ID | 284 |
Feature activation | +3.75e-01 |
Pos logit contributions | |
ends | +0.007 |
cheer | +0.007 |
pretend | +0.007 |
sill | +0.006 |
wonder | +0.006 |
Neg logit contributions | |
ge | -0.012 |
Twe | -0.011 |
Wil | -0.011 |
obby | -0.011 |
OO | -0.010 |
Token | take |
Token ID | 1011 |
Feature activation | -2.26e-01 |
Pos logit contributions | |
ends | +0.006 |
cheer | +0.006 |
pretend | +0.005 |
sill | +0.005 |
wonder | +0.005 |
Neg logit contributions | |
ge | -0.010 |
Twe | -0.009 |
Wil | -0.009 |
obby | -0.009 |
OO | -0.009 |
Token | a |
Token ID | 257 |
Feature activation | +5.79e-01 |
Pos logit contributions | |
ge | +0.006 |
Twe | +0.006 |
Wil | +0.005 |
Twe | +0.005 |
obby | +0.005 |
Neg logit contributions | |
ends | -0.004 |
cheer | -0.003 |
pretend | -0.003 |
sill | -0.003 |
reporters | -0.003 |
Token | closer |
Token ID | 5699 |
Feature activation | +6.66e-01 |
Pos logit contributions | |
ends | +0.010 |
cheer | +0.010 |
pretend | +0.010 |
wonder | +0.009 |
e | +0.009 |
Neg logit contributions | |
ge | -0.014 |
Twe | -0.013 |
Wil | -0.013 |
obby | -0.012 |
OO | -0.012 |
Token | When |
Token ID | 1649 |
Feature activation | -2.35e-01 |
Pos logit contributions | |
ge | +0.004 |
OO | +0.003 |
Twe | +0.003 |
badly | +0.003 |
iously | +0.003 |
Neg logit contributions | |
ends | -0.002 |
cheer | -0.002 |
pretend | -0.002 |
excited | -0.002 |
see | -0.002 |
Token | Daisy |
Token ID | 40355 |
Feature activation | -5.91e-01 |
Pos logit contributions | |
ge | +0.007 |
Twe | +0.006 |
obby | +0.006 |
OO | +0.006 |
Wil | +0.006 |
Neg logit contributions | |
pretend | -0.004 |
cheer | -0.004 |
ends | -0.003 |
see | -0.003 |
excited | -0.003 |
Token | put |
Token ID | 1234 |
Feature activation | -4.43e-01 |
Pos logit contributions | |
ge | +0.015 |
Twe | +0.015 |
Wil | +0.014 |
obby | +0.014 |
badly | +0.013 |
Neg logit contributions | |
cheer | -0.010 |
pretend | -0.010 |
ends | -0.010 |
e | -0.009 |
wonder | -0.009 |
Token | the |
Token ID | 262 |
Feature activation | -2.76e-01 |
Pos logit contributions | |
ge | +0.011 |
Twe | +0.011 |
Wil | +0.010 |
obby | +0.010 |
badly | +0.010 |
Neg logit contributions | |
ends | -0.007 |
cheer | -0.007 |
pretend | -0.006 |
wonder | -0.006 |
reporters | -0.006 |
Token | magn |
Token ID | 7842 |
Feature activation | +6.46e-01 |
Pos logit contributions | |
ge | +0.008 |
Twe | +0.007 |
obby | +0.007 |
Wil | +0.007 |
shadow | +0.007 |
Neg logit contributions | |
pretend | -0.005 |
cheer | -0.005 |
wonder | -0.004 |
ends | -0.004 |
sill | -0.004 |
Token | ifying |
Token ID | 4035 |
Feature activation | -2.72e+00 |
Pos logit contributions | |
ends | +0.019 |
cheer | +0.018 |
pretend | +0.018 |
e | +0.017 |
sill | +0.017 |
Neg logit contributions | |
ge | -0.009 |
Twe | -0.008 |
Wil | -0.007 |
obby | -0.007 |
OO | -0.007 |
Token | glass |
Token ID | 5405 |
Feature activation | +8.34e-01 |
Pos logit contributions | |
ge | +0.062 |
Twe | +0.057 |
obby | +0.054 |
Wil | +0.053 |
gie | +0.051 |
Neg logit contributions | |
cheer | -0.054 |
ends | -0.053 |
pretend | -0.053 |
sill | -0.049 |
wonder | -0.048 |
Token | down |
Token ID | 866 |
Feature activation | +1.02e+00 |
Pos logit contributions | |
ends | +0.013 |
cheer | +0.012 |
pretend | +0.011 |
wonder | +0.010 |
e | +0.010 |
Neg logit contributions | |
ge | -0.022 |
Twe | -0.021 |
obby | -0.020 |
Wil | -0.020 |
badly | -0.020 |
Token | , |
Token ID | 11 |
Feature activation | +6.74e-01 |
Pos logit contributions | |
cheer | +0.012 |
ends | +0.011 |
pretend | +0.011 |
sill | +0.009 |
wonder | +0.009 |
Neg logit contributions | |
ge | -0.032 |
Twe | -0.030 |
obby | -0.029 |
OO | -0.029 |
Wil | -0.029 |
Token | the |
Token ID | 262 |
Feature activation | -9.10e-02 |
Pos logit contributions | |
cheer | +0.009 |
ends | +0.008 |
pretend | +0.008 |
wonder | +0.007 |
excited | +0.007 |
Neg logit contributions | |
ge | -0.020 |
Twe | -0.019 |
obby | -0.019 |
OO | -0.018 |
Wil | -0.018 |
Token | cater |
Token ID | 20825 |
Feature activation | -4.00e-01 |
Pos logit contributions | |
ge | +0.002 |
Twe | +0.002 |
Wil | +0.002 |
obby | +0.002 |
shadow | +0.002 |
Neg logit contributions | |
cheer | -0.002 |
pretend | -0.002 |
ends | -0.001 |
wonder | -0.001 |
sill | -0.001 |
Token | a |
Token ID | 257 |
Feature activation | +3.94e-02 |
Pos logit contributions | |
cheer | +0.007 |
ends | +0.006 |
pretend | +0.006 |
sill | +0.005 |
e | +0.005 |
Neg logit contributions | |
ge | -0.008 |
Twe | -0.008 |
Wil | -0.008 |
obby | -0.007 |
badly | -0.007 |
Token | flashlight |
Token ID | 38371 |
Feature activation | -1.60e-01 |
Pos logit contributions | |
ends | +0.001 |
cheer | +0.001 |
pretend | +0.001 |
sill | +0.001 |
e | +0.001 |
Neg logit contributions | |
ge | -0.001 |
Twe | -0.001 |
Wil | -0.001 |
obby | -0.001 |
OO | -0.001 |
Token | and |
Token ID | 290 |
Feature activation | -4.01e-02 |
Pos logit contributions | |
ge | +0.004 |
Twe | +0.004 |
Wil | +0.004 |
obby | +0.004 |
badly | +0.004 |
Neg logit contributions | |
ends | -0.003 |
cheer | -0.002 |
pretend | -0.002 |
wonder | -0.002 |
e | -0.002 |
Token | a |
Token ID | 257 |
Feature activation | -3.29e-01 |
Pos logit contributions | |
ge | +0.001 |
Twe | +0.001 |
Wil | +0.001 |
obby | +0.001 |
badly | +0.001 |
Neg logit contributions | |
ends | -0.001 |
cheer | -0.001 |
pretend | -0.001 |
sill | -0.001 |
e | -0.001 |
Token | magn |
Token ID | 7842 |
Feature activation | +2.51e-01 |
Pos logit contributions | |
ge | +0.008 |
Twe | +0.007 |
Wil | +0.007 |
obby | +0.007 |
OO | +0.007 |
Neg logit contributions | |
ends | -0.006 |
cheer | -0.006 |
pretend | -0.006 |
wonder | -0.005 |
sill | -0.005 |
Token | ifying |
Token ID | 4035 |
Feature activation | -2.68e+00 |
Pos logit contributions | |
ends | +0.008 |
cheer | +0.007 |
pretend | +0.007 |
e | +0.007 |
wonder | +0.007 |
Neg logit contributions | |
ge | -0.003 |
Twe | -0.003 |
Wil | -0.002 |
obby | -0.002 |
OO | -0.002 |
Token | glass |
Token ID | 5405 |
Feature activation | +6.98e-01 |
Pos logit contributions | |
Twe | +0.043 |
ge | +0.043 |
Wil | +0.040 |
OO | +0.040 |
badly | +0.039 |
Neg logit contributions | |
ends | -0.068 |
cheer | -0.064 |
pretend | -0.060 |
reporters | -0.058 |
e | -0.056 |
Token | to |
Token ID | 284 |
Feature activation | +4.12e-01 |
Pos logit contributions | |
ends | +0.013 |
cheer | +0.013 |
pretend | +0.012 |
wonder | +0.011 |
e | +0.011 |
Neg logit contributions | |
ge | -0.016 |
Twe | -0.015 |
Wil | -0.014 |
obby | -0.014 |
OO | -0.014 |
Token | examine |
Token ID | 10716 |
Feature activation | +1.66e-01 |
Pos logit contributions | |
ends | +0.008 |
cheer | +0.006 |
reporters | +0.006 |
ibr | +0.006 |
partners | +0.006 |
Neg logit contributions | |
ge | -0.009 |
Twe | -0.009 |
Wil | -0.009 |
Wil | -0.008 |
Twe | -0.008 |
Token | their |
Token ID | 511 |
Feature activation | +5.95e-01 |
Pos logit contributions | |
cheer | +0.003 |
ends | +0.003 |
pretend | +0.003 |
wonder | +0.002 |
e | +0.002 |
Neg logit contributions | |
ge | -0.004 |
Twe | -0.004 |
Wil | -0.004 |
OO | -0.004 |
obby | -0.004 |
Token | finds |
Token ID | 7228 |
Feature activation | +1.90e-01 |
Pos logit contributions | |
ends | +0.012 |
cheer | +0.011 |
e | +0.010 |
sill | +0.010 |
pretend | +0.010 |
Neg logit contributions | |
ge | -0.013 |
obby | -0.012 |
Twe | -0.012 |
OO | -0.011 |
badly | -0.011 |
Token | , |
Token ID | 11 |
Feature activation | -1.90e+00 |
Pos logit contributions | |
ends | +0.007 |
cheer | +0.007 |
pretend | +0.007 |
e | +0.005 |
wonder | +0.005 |
Neg logit contributions | |
ge | -0.019 |
Twe | -0.018 |
Wil | -0.017 |
obby | -0.017 |
gie | -0.017 |
Token | that |
Token ID | 326 |
Feature activation | -9.65e-01 |
Pos logit contributions | |
ge | +0.039 |
Twe | +0.037 |
Wil | +0.035 |
obby | +0.035 |
OO | +0.034 |
Neg logit contributions | |
ends | -0.040 |
cheer | -0.039 |
pretend | -0.038 |
wonder | -0.035 |
e | -0.034 |
Token | 's |
Token ID | 338 |
Feature activation | -3.22e-01 |
Pos logit contributions | |
ge | +0.024 |
Twe | +0.023 |
Wil | +0.022 |
obby | +0.022 |
OO | +0.022 |
Neg logit contributions | |
ends | -0.016 |
cheer | -0.015 |
pretend | -0.015 |
wonder | -0.013 |
e | -0.013 |
Token | so |
Token ID | 523 |
Feature activation | -7.66e-01 |
Pos logit contributions | |
Twe | +0.007 |
ge | +0.007 |
Wil | +0.007 |
badly | +0.007 |
Twe | +0.007 |
Neg logit contributions | |
ends | -0.006 |
cheer | -0.006 |
pretend | -0.005 |
sill | -0.005 |
pler | -0.005 |
Token | creative |
Token ID | 7325 |
Feature activation | -8.64e-02 |
Pos logit contributions | |
Twe | +0.017 |
ge | +0.016 |
Wil | +0.016 |
Twe | +0.015 |
badly | +0.015 |
Neg logit contributions | |
ends | -0.014 |
cheer | -0.014 |
pretend | -0.013 |
e | -0.012 |
sill | -0.012 |
Token | , |
Token ID | 11 |
Feature activation | -2.54e+00 |
Pos logit contributions | |
ge | +0.002 |
Twe | +0.002 |
Wil | +0.002 |
obby | +0.002 |
OO | +0.002 |
Neg logit contributions | |
ends | -0.002 |
cheer | -0.002 |
pretend | -0.002 |
e | -0.002 |
wonder | -0.001 |
Token | Lily |
Token ID | 20037 |
Feature activation | -3.71e-01 |
Pos logit contributions | |
ge | +0.058 |
Twe | +0.054 |
obby | +0.052 |
Wil | +0.052 |
OO | +0.051 |
Neg logit contributions | |
cheer | -0.053 |
pretend | -0.052 |
ends | -0.051 |
wonder | -0.044 |
excited | -0.044 |
Token | ! |
Token ID | 0 |
Feature activation | -6.31e-01 |
Pos logit contributions | |
Twe | +0.010 |
ge | +0.010 |
Wil | +0.010 |
Fe | +0.009 |
obby | +0.009 |
Neg logit contributions | |
ends | -0.006 |
cheer | -0.006 |
pretend | -0.006 |
e | -0.005 |
excited | -0.005 |
Token | I |
Token ID | 314 |
Feature activation | +2.12e-01 |
Pos logit contributions | |
ge | +0.013 |
Twe | +0.011 |
Wil | +0.011 |
badly | +0.010 |
obby | +0.010 |
Neg logit contributions | |
ends | -0.015 |
cheer | -0.014 |
pretend | -0.014 |
sill | -0.013 |
wonder | -0.013 |
Token | wonder |
Token ID | 4240 |
Feature activation | -4.71e-01 |
Pos logit contributions | |
ends | +0.004 |
iece | +0.004 |
cheer | +0.004 |
reporters | +0.004 |
sill | +0.003 |
Neg logit contributions | |
ge | -0.004 |
Twe | -0.004 |
obby | -0.004 |
badly | -0.004 |
Em | -0.004 |
Token | what |
Token ID | 644 |
Feature activation | -6.75e-01 |
Pos logit contributions | |
Twe | +0.014 |
ge | +0.014 |
gie | +0.013 |
Wil | +0.013 |
obby | +0.013 |
Neg logit contributions | |
pretend | -0.007 |
ends | -0.007 |
cheer | -0.006 |
like | -0.006 |
see | -0.006 |
Token | her |
Token ID | 607 |
Feature activation | +3.61e-01 |
Pos logit contributions | |
ge | +0.006 |
Twe | +0.006 |
Twe | +0.006 |
Wil | +0.006 |
badly | +0.006 |
Neg logit contributions | |
ends | -0.006 |
cheer | -0.006 |
believe | -0.006 |
reporters | -0.005 |
ibr | -0.005 |
Token | ball |
Token ID | 2613 |
Feature activation | -6.06e-01 |
Pos logit contributions | |
ends | +0.008 |
cheer | +0.008 |
sill | +0.007 |
e | +0.007 |
pretend | +0.007 |
Neg logit contributions | |
ge | -0.007 |
Twe | -0.006 |
badly | -0.006 |
Wil | -0.006 |
obby | -0.006 |
Token | outside |
Token ID | 2354 |
Feature activation | -4.56e-01 |
Pos logit contributions | |
ge | +0.017 |
Twe | +0.016 |
Wil | +0.016 |
obby | +0.015 |
badly | +0.015 |
Neg logit contributions | |
ends | -0.008 |
cheer | -0.008 |
pretend | -0.007 |
wonder | -0.007 |
sill | -0.006 |
Token | when |
Token ID | 618 |
Feature activation | -2.71e-01 |
Pos logit contributions | |
ge | +0.012 |
Twe | +0.011 |
badly | +0.011 |
Wil | +0.011 |
obby | +0.011 |
Neg logit contributions | |
ends | -0.007 |
cheer | -0.007 |
pretend | -0.006 |
wonder | -0.005 |
reporters | -0.005 |
Token | she |
Token ID | 673 |
Feature activation | -6.37e-01 |
Pos logit contributions | |
ge | +0.009 |
Twe | +0.009 |
obby | +0.008 |
Wil | +0.008 |
OO | +0.008 |
Neg logit contributions | |
pretend | -0.003 |
ends | -0.003 |
cheer | -0.002 |
wonder | -0.002 |
see | -0.002 |
Token | accidentally |
Token ID | 14716 |
Feature activation | -2.54e+00 |
Pos logit contributions | |
ge | +0.014 |
Twe | +0.013 |
obby | +0.012 |
Wil | +0.012 |
OO | +0.012 |
Neg logit contributions | |
cheer | -0.013 |
ends | -0.013 |
pretend | -0.011 |
sill | -0.011 |
wonder | -0.011 |
Token | threw |
Token ID | 9617 |
Feature activation | -1.07e+00 |
Pos logit contributions | |
ge | +0.061 |
obby | +0.060 |
Twe | +0.060 |
Wil | +0.056 |
Frankie | +0.052 |
Neg logit contributions | |
pretend | -0.049 |
ends | -0.046 |
cheer | -0.045 |
e | -0.043 |
see | -0.042 |
Token | it |
Token ID | 340 |
Feature activation | +4.26e-02 |
Pos logit contributions | |
ge | +0.025 |
Twe | +0.024 |
Wil | +0.023 |
badly | +0.023 |
obby | +0.022 |
Neg logit contributions | |
ends | -0.020 |
cheer | -0.018 |
pretend | -0.017 |
reporters | -0.016 |
e | -0.016 |
Token | too |
Token ID | 1165 |
Feature activation | +5.28e-01 |
Pos logit contributions | |
ends | +0.001 |
cheer | +0.001 |
pretend | +0.000 |
excited | +0.000 |
sill | +0.000 |
Neg logit contributions | |
Twe | -0.001 |
ge | -0.001 |
obby | -0.001 |
Wil | -0.001 |
OO | -0.001 |
Token | hard |
Token ID | 1327 |
Feature activation | +2.61e-01 |
Pos logit contributions | |
ends | +0.009 |
cheer | +0.008 |
pretend | +0.008 |
wonder | +0.007 |
excited | +0.007 |
Neg logit contributions | |
ge | -0.014 |
Twe | -0.013 |
obby | -0.013 |
Wil | -0.012 |
gie | -0.012 |
Token | and |
Token ID | 290 |
Feature activation | -2.25e-01 |
Pos logit contributions | |
ends | +0.003 |
pretend | +0.003 |
like | +0.003 |
cheer | +0.003 |
around | +0.003 |
Neg logit contributions | |
Twe | -0.008 |
Wil | -0.008 |
ge | -0.007 |
obby | -0.007 |
Fe | -0.007 |
Token | . |
Token ID | 13 |
Feature activation | -3.79e-01 |
Pos logit contributions | |
cheer | +0.010 |
ends | +0.010 |
pretend | +0.010 |
sill | +0.010 |
excited | +0.009 |
Neg logit contributions | |
Twe | -0.017 |
ge | -0.017 |
Wil | -0.016 |
obby | -0.016 |
OO | -0.016 |
Token | It |
Token ID | 632 |
Feature activation | +7.73e-01 |
Pos logit contributions | |
ge | +0.010 |
Twe | +0.009 |
gie | +0.009 |
obby | +0.009 |
OO | +0.009 |
Neg logit contributions | |
ends | -0.007 |
cheer | -0.006 |
pretend | -0.006 |
excited | -0.006 |
wonder | -0.006 |
Token | said |
Token ID | 531 |
Feature activation | -1.46e-01 |
Pos logit contributions | |
ends | +0.016 |
cheer | +0.015 |
pretend | +0.015 |
wonder | +0.014 |
e | +0.013 |
Neg logit contributions | |
ge | -0.017 |
Twe | -0.016 |
Wil | -0.015 |
obby | -0.015 |
OO | -0.014 |
Token | " |
Token ID | 366 |
Feature activation | -2.07e-01 |
Pos logit contributions | |
ge | +0.004 |
Twe | +0.004 |
Wil | +0.004 |
obby | +0.004 |
OO | +0.003 |
Neg logit contributions | |
ends | -0.002 |
cheer | -0.002 |
pretend | -0.002 |
wonder | -0.002 |
e | -0.002 |
Token | DO |
Token ID | 18227 |
Feature activation | -2.49e-01 |
Pos logit contributions | |
ge | +0.004 |
obby | +0.004 |
badly | +0.004 |
OO | +0.004 |
Twe | +0.004 |
Neg logit contributions | |
ends | -0.004 |
cheer | -0.004 |
pretend | -0.004 |
e | -0.004 |
sill | -0.004 |
Token | NOT |
Token ID | 5626 |
Feature activation | -2.53e+00 |
Pos logit contributions | |
ge | +0.002 |
Twe | +0.002 |
obby | +0.002 |
Wil | +0.002 |
gie | +0.001 |
Neg logit contributions | |
ends | -0.008 |
cheer | -0.008 |
pretend | -0.008 |
e | -0.008 |
reporters | -0.007 |
Token | T |
Token ID | 309 |
Feature activation | +1.52e-02 |
Pos logit contributions | |
ge | +0.052 |
Twe | +0.048 |
Wil | +0.045 |
obby | +0.045 |
badly | +0.043 |
Neg logit contributions | |
ends | -0.055 |
cheer | -0.054 |
pretend | -0.054 |
e | -0.049 |
wonder | -0.049 |
Token | OU |
Token ID | 2606 |
Feature activation | -1.27e+00 |
Pos logit contributions | |
ends | +0.000 |
e | +0.000 |
cheer | +0.000 |
ibr | +0.000 |
ibrarian | +0.000 |
Neg logit contributions | |
ge | -0.000 |
Twe | -0.000 |
Wil | -0.000 |
gie | -0.000 |
Frankie | -0.000 |
Token | CH |
Token ID | 3398 |
Feature activation | -2.59e-01 |
Pos logit contributions | |
ge | +0.022 |
Twe | +0.019 |
obby | +0.018 |
Wil | +0.018 |
Frankie | +0.017 |
Neg logit contributions | |
ends | -0.032 |
cheer | -0.031 |
pretend | -0.031 |
e | -0.029 |
see | -0.028 |
Token | ". |
Token ID | 1911 |
Feature activation | -3.24e-01 |
Pos logit contributions | |
ge | +0.005 |
Twe | +0.005 |
Wil | +0.005 |
gie | +0.004 |
Frankie | +0.004 |
Neg logit contributions | |
ends | -0.006 |
pretend | -0.006 |
cheer | -0.005 |
e | -0.005 |
excited | -0.005 |
Token | But |
Token ID | 887 |
Feature activation | -9.15e-01 |
Pos logit contributions | |
ge | +0.008 |
Twe | +0.007 |
obby | +0.007 |
gie | +0.007 |
Wil | +0.007 |
Neg logit contributions | |
ends | -0.006 |
cheer | -0.006 |
pretend | -0.006 |
sill | -0.005 |
wonder | -0.005 |
Token | her |
Token ID | 607 |
Feature activation | +3.10e-01 |
Pos logit contributions | |
ge | +0.008 |
Twe | +0.008 |
Wil | +0.007 |
badly | +0.007 |
Twe | +0.007 |
Neg logit contributions | |
ends | -0.005 |
cheer | -0.005 |
pretend | -0.004 |
wonder | -0.004 |
e | -0.004 |
Token | toy |
Token ID | 13373 |
Feature activation | +9.16e-02 |
Pos logit contributions | |
ends | +0.006 |
cheer | +0.006 |
pretend | +0.006 |
wonder | +0.005 |
e | +0.005 |
Neg logit contributions | |
ge | -0.007 |
Twe | -0.007 |
Wil | -0.006 |
obby | -0.006 |
OO | -0.006 |
Token | car |
Token ID | 1097 |
Feature activation | -7.05e-01 |
Pos logit contributions | |
pretend | +0.002 |
ends | +0.001 |
cheer | +0.001 |
reporters | +0.001 |
e | +0.001 |
Neg logit contributions | |
ge | -0.002 |
Twe | -0.002 |
obby | -0.002 |
shadow | -0.002 |
Wil | -0.002 |
Token | and |
Token ID | 290 |
Feature activation | +1.05e-01 |
Pos logit contributions | |
ge | +0.022 |
Twe | +0.022 |
obby | +0.021 |
Wil | +0.021 |
OO | +0.020 |
Neg logit contributions | |
ends | -0.008 |
pretend | -0.007 |
cheer | -0.007 |
e | -0.006 |
excited | -0.006 |
Token | she |
Token ID | 673 |
Feature activation | -3.41e-01 |
Pos logit contributions | |
ends | +0.002 |
cheer | +0.002 |
pretend | +0.002 |
e | +0.002 |
sill | +0.002 |
Neg logit contributions | |
ge | -0.002 |
Twe | -0.002 |
Wil | -0.002 |
obby | -0.002 |
badly | -0.002 |
Token | accidentally |
Token ID | 14716 |
Feature activation | -2.52e+00 |
Pos logit contributions | |
ge | +0.007 |
Twe | +0.006 |
Wil | +0.006 |
obby | +0.006 |
OO | +0.006 |
Neg logit contributions | |
ends | -0.008 |
cheer | -0.008 |
pretend | -0.007 |
wonder | -0.007 |
sill | -0.007 |
Token | twisted |
Token ID | 19074 |
Feature activation | -1.07e+00 |
Pos logit contributions | |
Twe | +0.066 |
obby | +0.066 |
ge | +0.064 |
Wil | +0.061 |
Frankie | +0.057 |
Neg logit contributions | |
pretend | -0.045 |
ends | -0.041 |
e | -0.039 |
cheer | -0.039 |
see | -0.038 |
Token | it |
Token ID | 340 |
Feature activation | -1.57e-01 |
Pos logit contributions | |
Twe | +0.032 |
ge | +0.031 |
obby | +0.031 |
Wil | +0.030 |
OO | +0.029 |
Neg logit contributions | |
ends | -0.013 |
pretend | -0.013 |
cheer | -0.013 |
sill | -0.011 |
around | -0.011 |
Token | too |
Token ID | 1165 |
Feature activation | +4.75e-01 |
Pos logit contributions | |
ge | +0.004 |
Twe | +0.004 |
obby | +0.004 |
Wil | +0.004 |
OO | +0.004 |
Neg logit contributions | |
ends | -0.002 |
cheer | -0.002 |
pretend | -0.002 |
sill | -0.002 |
excited | -0.002 |
Token | hard |
Token ID | 1327 |
Feature activation | +2.10e-01 |
Pos logit contributions | |
ends | +0.007 |
cheer | +0.007 |
pretend | +0.006 |
excited | +0.006 |
wonder | +0.006 |
Neg logit contributions | |
Twe | -0.013 |
ge | -0.013 |
obby | -0.012 |
Wil | -0.012 |
gie | -0.012 |
Token | . |
Token ID | 13 |
Feature activation | +6.93e-02 |
Pos logit contributions | |
ends | +0.003 |
pretend | +0.003 |
cheer | +0.003 |
like | +0.003 |
excited | +0.002 |
Neg logit contributions | |
Twe | -0.006 |
Wil | -0.006 |
ge | -0.006 |
obby | -0.005 |
OO | -0.005 |
Token | the |
Token ID | 262 |
Feature activation | -4.64e-01 |
Pos logit contributions | |
ge | +0.001 |
Twe | +0.001 |
obby | +0.001 |
OO | +0.001 |
Wil | +0.001 |
Neg logit contributions | |
cheer | -0.000 |
ends | -0.000 |
pretend | -0.000 |
wonder | -0.000 |
excited | -0.000 |
Token | necklace |
Token ID | 38987 |
Feature activation | +4.85e-01 |
Pos logit contributions | |
ge | +0.013 |
obby | +0.012 |
Twe | +0.011 |
OO | +0.011 |
Wil | +0.011 |
Neg logit contributions | |
pretend | -0.008 |
cheer | -0.008 |
ends | -0.007 |
wonder | -0.007 |
sill | -0.006 |
Token | and |
Token ID | 290 |
Feature activation | +7.68e-02 |
Pos logit contributions | |
ends | +0.011 |
cheer | +0.010 |
wonder | +0.010 |
pretend | +0.010 |
reporters | +0.009 |
Neg logit contributions | |
ge | -0.010 |
badly | -0.009 |
Johnson | -0.008 |
Twe | -0.008 |
way | -0.008 |
Token | the |
Token ID | 262 |
Feature activation | -3.16e-01 |
Pos logit contributions | |
ends | +0.002 |
cheer | +0.002 |
pretend | +0.001 |
sill | +0.001 |
e | +0.001 |
Neg logit contributions | |
ge | -0.002 |
Twe | -0.002 |
Wil | -0.001 |
obby | -0.001 |
badly | -0.001 |
Token | magn |
Token ID | 7842 |
Feature activation | -1.11e-02 |
Pos logit contributions | |
ge | +0.009 |
obby | +0.009 |
Twe | +0.008 |
OO | +0.008 |
Wil | +0.008 |
Neg logit contributions | |
cheer | -0.005 |
pretend | -0.005 |
ends | -0.004 |
wonder | -0.004 |
sill | -0.004 |
Token | ifying |
Token ID | 4035 |
Feature activation | -2.52e+00 |
Pos logit contributions | |
ge | +0.000 |
Twe | +0.000 |
Wil | +0.000 |
obby | +0.000 |
gie | +0.000 |
Neg logit contributions | |
ends | -0.000 |
cheer | -0.000 |
pretend | -0.000 |
e | -0.000 |
sill | -0.000 |
Token | glass |
Token ID | 5405 |
Feature activation | +4.87e-01 |
Pos logit contributions | |
ge | +0.052 |
Twe | +0.048 |
obby | +0.046 |
Wil | +0.045 |
gie | +0.043 |
Neg logit contributions | |
cheer | -0.054 |
ends | -0.054 |
pretend | -0.053 |
sill | -0.049 |
wonder | -0.049 |
Token | and |
Token ID | 290 |
Feature activation | +2.21e-01 |
Pos logit contributions | |
ends | +0.010 |
cheer | +0.009 |
pretend | +0.009 |
wonder | +0.008 |
e | +0.008 |
Neg logit contributions | |
ge | -0.011 |
Twe | -0.010 |
Wil | -0.010 |
obby | -0.009 |
OO | -0.009 |
Token | started |
Token ID | 2067 |
Feature activation | +4.17e-02 |
Pos logit contributions | |
ends | +0.004 |
cheer | +0.004 |
pretend | +0.004 |
reporters | +0.004 |
e | +0.004 |
Neg logit contributions | |
ge | -0.005 |
Twe | -0.004 |
Wil | -0.004 |
Wil | -0.004 |
badly | -0.004 |
Token | to |
Token ID | 284 |
Feature activation | +4.25e-01 |
Pos logit contributions | |
ends | +0.001 |
cheer | +0.001 |
pretend | +0.001 |
wonder | +0.001 |
sill | +0.001 |
Neg logit contributions | |
ge | -0.001 |
Twe | -0.001 |
obby | -0.001 |
Wil | -0.001 |
OO | -0.001 |
Token | examine |
Token ID | 10716 |
Feature activation | -3.00e-01 |
Pos logit contributions | |
ends | +0.007 |
cheer | +0.006 |
pretend | +0.006 |
reporters | +0.006 |
sill | +0.005 |
Neg logit contributions | |
ge | -0.011 |
Twe | -0.010 |
Wil | -0.010 |
obby | -0.010 |
gie | -0.010 |
Token | with |
Token ID | 351 |
Feature activation | -5.04e-01 |
Pos logit contributions | |
badly | +0.012 |
Twe | +0.012 |
Fe | +0.011 |
ge | +0.011 |
OO | +0.011 |
Neg logit contributions | |
ends | -0.010 |
amaz | -0.010 |
cheer | -0.010 |
wonder | -0.009 |
sill | -0.009 |
Token | his |
Token ID | 465 |
Feature activation | +3.93e-02 |
Pos logit contributions | |
ge | +0.010 |
Twe | +0.010 |
Wil | +0.009 |
badly | +0.009 |
obby | +0.009 |
Neg logit contributions | |
ends | -0.011 |
cheer | -0.011 |
pretend | -0.010 |
wonder | -0.009 |
reporters | -0.009 |
Token | friends |
Token ID | 2460 |
Feature activation | +3.45e-02 |
Pos logit contributions | |
cheer | +0.001 |
ends | +0.001 |
pretend | +0.001 |
reporters | +0.001 |
e | +0.001 |
Neg logit contributions | |
ge | -0.001 |
Twe | -0.001 |
Wil | -0.001 |
obby | -0.001 |
badly | -0.001 |
Token | when |
Token ID | 618 |
Feature activation | -3.14e-01 |
Pos logit contributions | |
ends | +0.000 |
cheer | +0.000 |
pretend | +0.000 |
wonder | +0.000 |
sill | +0.000 |
Neg logit contributions | |
ge | -0.001 |
Twe | -0.001 |
obby | -0.001 |
Wil | -0.001 |
OO | -0.001 |
Token | he |
Token ID | 339 |
Feature activation | -5.99e-01 |
Pos logit contributions | |
ge | +0.009 |
Twe | +0.008 |
obby | +0.008 |
Wil | +0.008 |
OO | +0.008 |
Neg logit contributions | |
pretend | -0.005 |
ends | -0.005 |
cheer | -0.004 |
excited | -0.004 |
see | -0.004 |
Token | accidentally |
Token ID | 14716 |
Feature activation | -2.50e+00 |
Pos logit contributions | |
ge | +0.017 |
Twe | +0.016 |
Wil | +0.015 |
obby | +0.015 |
gie | +0.015 |
Neg logit contributions | |
ends | -0.009 |
pretend | -0.008 |
cheer | -0.008 |
e | -0.007 |
excited | -0.007 |
Token | fell |
Token ID | 3214 |
Feature activation | -1.26e+00 |
Pos logit contributions | |
ge | +0.062 |
Twe | +0.060 |
obby | +0.059 |
Wil | +0.056 |
shadow | +0.053 |
Neg logit contributions | |
pretend | -0.048 |
ends | -0.043 |
e | -0.042 |
cheer | -0.041 |
see | -0.040 |
Token | and |
Token ID | 290 |
Feature activation | -7.46e-01 |
Pos logit contributions | |
Twe | +0.043 |
obby | +0.043 |
ge | +0.042 |
ience | +0.040 |
etsy | +0.040 |
Neg logit contributions | |
around | -0.010 |
like | -0.010 |
sill | -0.009 |
pretend | -0.009 |
cheer | -0.008 |
Token | hurt |
Token ID | 5938 |
Feature activation | -7.10e-01 |
Pos logit contributions | |
obby | +0.022 |
ge | +0.021 |
Twe | +0.020 |
Wil | +0.019 |
aman | +0.019 |
Neg logit contributions | |
pretend | -0.010 |
excited | -0.010 |
cheer | -0.009 |
e | -0.009 |
ends | -0.009 |
Token | his |
Token ID | 465 |
Feature activation | -2.69e-01 |
Pos logit contributions | |
ge | +0.023 |
Twe | +0.022 |
shadow | +0.022 |
ience | +0.022 |
obby | +0.022 |
Neg logit contributions | |
e | -0.006 |
sill | -0.006 |
cheer | -0.006 |
like | -0.006 |
excited | -0.006 |
Token | paw |
Token ID | 41827 |
Feature activation | -1.17e+00 |
Pos logit contributions | |
Twe | +0.007 |
ge | +0.007 |
Wil | +0.007 |
obby | +0.007 |
shadow | +0.007 |
Neg logit contributions | |
pretend | -0.004 |
cheer | -0.004 |
ends | -0.004 |
sill | -0.004 |
excited | -0.004 |
Token | his |
Token ID | 465 |
Feature activation | +1.61e-01 |
Pos logit contributions | |
ge | +0.010 |
Twe | +0.009 |
Wil | +0.009 |
badly | +0.009 |
Timothy | +0.009 |
Neg logit contributions | |
ends | -0.013 |
cheer | -0.013 |
pretend | -0.011 |
wonder | -0.011 |
e | -0.011 |
Token | favorite |
Token ID | 4004 |
Feature activation | -5.63e-01 |
Pos logit contributions | |
ends | +0.003 |
cheer | +0.003 |
pretend | +0.003 |
sill | +0.003 |
e | +0.003 |
Neg logit contributions | |
ge | -0.003 |
Twe | -0.003 |
obby | -0.003 |
Wil | -0.003 |
badly | -0.003 |
Token | baseball |
Token ID | 9283 |
Feature activation | -6.88e-01 |
Pos logit contributions | |
ge | +0.015 |
Twe | +0.014 |
Wil | +0.014 |
obby | +0.013 |
Wil | +0.013 |
Neg logit contributions | |
pretend | -0.010 |
cheer | -0.009 |
ends | -0.009 |
wonder | -0.008 |
excited | -0.008 |
Token | when |
Token ID | 618 |
Feature activation | -4.15e-01 |
Pos logit contributions | |
ge | +0.017 |
Twe | +0.016 |
Wil | +0.016 |
obby | +0.015 |
OO | +0.015 |
Neg logit contributions | |
ends | -0.012 |
cheer | -0.012 |
pretend | -0.011 |
e | -0.010 |
wonder | -0.010 |
Token | he |
Token ID | 339 |
Feature activation | -5.99e-01 |
Pos logit contributions | |
ge | +0.012 |
Twe | +0.012 |
obby | +0.011 |
Wil | +0.011 |
shadow | +0.011 |
Neg logit contributions | |
pretend | -0.006 |
ends | -0.005 |
excited | -0.005 |
cheer | -0.005 |
see | -0.005 |
Token | accidentally |
Token ID | 14716 |
Feature activation | -2.48e+00 |
Pos logit contributions | |
ge | +0.012 |
Twe | +0.011 |
Wil | +0.010 |
obby | +0.010 |
OO | +0.010 |
Neg logit contributions | |
ends | -0.013 |
cheer | -0.013 |
pretend | -0.013 |
sill | -0.012 |
wonder | -0.012 |
Token | dropped |
Token ID | 5710 |
Feature activation | -1.15e+00 |
Pos logit contributions | |
Twe | +0.064 |
obby | +0.062 |
ge | +0.062 |
Wil | +0.060 |
Johnson | +0.055 |
Neg logit contributions | |
pretend | -0.045 |
ends | -0.041 |
cheer | -0.040 |
see | -0.039 |
e | -0.038 |
Token | it |
Token ID | 340 |
Feature activation | -1.01e-03 |
Pos logit contributions | |
ge | +0.034 |
Twe | +0.034 |
obby | +0.033 |
Wil | +0.032 |
OO | +0.031 |
Neg logit contributions | |
cheer | -0.015 |
pretend | -0.014 |
ends | -0.013 |
wonder | -0.012 |
excited | -0.012 |
Token | and |
Token ID | 290 |
Feature activation | -3.47e-01 |
Pos logit contributions | |
Twe | +0.000 |
obby | +0.000 |
Wil | +0.000 |
ge | +0.000 |
etsy | +0.000 |
Neg logit contributions | |
pretend | -0.000 |
around | -0.000 |
like | -0.000 |
cheer | -0.000 |
excited | -0.000 |
Token | it |
Token ID | 340 |
Feature activation | -6.70e-02 |
Pos logit contributions | |
obby | +0.010 |
ge | +0.010 |
Twe | +0.010 |
Wil | +0.009 |
OO | +0.009 |
Neg logit contributions | |
pretend | -0.005 |
cheer | -0.005 |
ends | -0.004 |
see | -0.004 |
excited | -0.004 |
Token | broke |
Token ID | 6265 |
Feature activation | -1.13e+00 |
Pos logit contributions | |
ge | +0.002 |
obby | +0.002 |
Twe | +0.002 |
Wil | +0.001 |
Frankie | +0.001 |
Neg logit contributions | |
pretend | -0.001 |
ends | -0.001 |
cheer | -0.001 |
excited | -0.001 |
e | -0.001 |
Token | Lily |
Token ID | 20037 |
Feature activation | -2.34e-01 |
Pos logit contributions | |
ge | +0.046 |
Twe | +0.042 |
obby | +0.040 |
Wil | +0.040 |
OO | +0.039 |
Neg logit contributions | |
cheer | -0.039 |
pretend | -0.039 |
ends | -0.038 |
excited | -0.033 |
wonder | -0.032 |
Token | . |
Token ID | 13 |
Feature activation | -5.46e-01 |
Pos logit contributions | |
ge | +0.006 |
Twe | +0.006 |
Wil | +0.006 |
obby | +0.006 |
OO | +0.006 |
Neg logit contributions | |
cheer | -0.003 |
ends | -0.003 |
pretend | -0.003 |
excited | -0.003 |
e | -0.003 |
Token | We |
Token ID | 775 |
Feature activation | +7.74e-01 |
Pos logit contributions | |
ge | +0.013 |
Twe | +0.010 |
Wil | +0.010 |
OO | +0.010 |
badly | +0.010 |
Neg logit contributions | |
ends | -0.012 |
cheer | -0.012 |
pretend | -0.011 |
sill | -0.011 |
excited | -0.010 |
Token | have |
Token ID | 423 |
Feature activation | -1.42e-01 |
Pos logit contributions | |
ends | +0.015 |
cheer | +0.014 |
pretend | +0.014 |
sill | +0.012 |
e | +0.012 |
Neg logit contributions | |
ge | -0.018 |
Twe | -0.017 |
obby | -0.016 |
Wil | -0.016 |
badly | -0.015 |
Token | our |
Token ID | 674 |
Feature activation | -3.30e-01 |
Pos logit contributions | |
ge | +0.005 |
Twe | +0.005 |
obby | +0.004 |
gie | +0.004 |
Wil | +0.004 |
Neg logit contributions | |
pretend | -0.001 |
excited | -0.001 |
ends | -0.001 |
see | -0.001 |
wonder | -0.001 |
Token | umb |
Token ID | 20810 |
Feature activation | -2.48e+00 |
Pos logit contributions | |
ge | +0.008 |
Twe | +0.008 |
Wil | +0.008 |
obby | +0.008 |
OO | +0.007 |
Neg logit contributions | |
pretend | -0.006 |
cheer | -0.006 |
ends | -0.006 |
wonder | -0.005 |
excited | -0.005 |
Token | rell |
Token ID | 11252 |
Feature activation | -6.70e-01 |
Pos logit contributions | |
ge | +0.034 |
Twe | +0.030 |
obby | +0.028 |
Wil | +0.028 |
OO | +0.027 |
Neg logit contributions | |
ends | -0.070 |
cheer | -0.069 |
pretend | -0.067 |
sill | -0.063 |
e | -0.063 |
Token | as |
Token ID | 292 |
Feature activation | -1.78e+00 |
Pos logit contributions | |
ge | +0.018 |
Twe | +0.017 |
obby | +0.017 |
Wil | +0.017 |
badly | +0.016 |
Neg logit contributions | |
ends | -0.010 |
cheer | -0.010 |
pretend | -0.009 |
wonder | -0.008 |
e | -0.008 |
Token | to |
Token ID | 284 |
Feature activation | -4.06e-01 |
Pos logit contributions | |
Twe | +0.049 |
ge | +0.048 |
Wil | +0.047 |
OO | +0.046 |
obby | +0.046 |
Neg logit contributions | |
ends | -0.027 |
pretend | -0.026 |
cheer | -0.025 |
like | -0.022 |
excited | -0.022 |
Token | keep |
Token ID | 1394 |
Feature activation | -1.01e+00 |
Pos logit contributions | |
ge | +0.012 |
obby | +0.012 |
OO | +0.012 |
iously | +0.011 |
gie | +0.011 |
Neg logit contributions | |
cheer | -0.006 |
pretend | -0.005 |
see | -0.005 |
ends | -0.005 |
wonder | -0.004 |
Token | us |
Token ID | 514 |
Feature activation | -8.78e-01 |
Pos logit contributions | |
ge | +0.029 |
obby | +0.027 |
Twe | +0.027 |
OO | +0.027 |
Wil | +0.026 |
Neg logit contributions | |
pretend | -0.014 |
cheer | -0.014 |
ends | -0.014 |
excited | -0.012 |
e | -0.012 |
Token | eating |
Token ID | 6600 |
Feature activation | -4.90e-01 |
Pos logit contributions | |
ge | +0.004 |
Twe | +0.004 |
Wil | +0.004 |
obby | +0.004 |
OO | +0.004 |
Neg logit contributions | |
ends | -0.003 |
cheer | -0.003 |
pretend | -0.003 |
wonder | -0.003 |
e | -0.003 |
Token | her |
Token ID | 607 |
Feature activation | -6.64e-01 |
Pos logit contributions | |
ge | +0.013 |
Twe | +0.012 |
obby | +0.012 |
Wil | +0.012 |
OO | +0.011 |
Neg logit contributions | |
ends | -0.008 |
cheer | -0.008 |
pretend | -0.007 |
wonder | -0.007 |
excited | -0.006 |
Token | dessert |
Token ID | 23084 |
Feature activation | -3.02e-01 |
Pos logit contributions | |
ge | +0.016 |
Twe | +0.016 |
Wil | +0.015 |
gie | +0.014 |
obby | +0.014 |
Neg logit contributions | |
pretend | -0.012 |
ends | -0.012 |
cheer | -0.011 |
excited | -0.011 |
wonder | -0.011 |
Token | when |
Token ID | 618 |
Feature activation | -1.20e-01 |
Pos logit contributions | |
ge | +0.007 |
badly | +0.006 |
obby | +0.006 |
Twe | +0.006 |
Wil | +0.006 |
Neg logit contributions | |
cheer | -0.006 |
ends | -0.006 |
pretend | -0.005 |
wonder | -0.005 |
e | -0.005 |
Token | she |
Token ID | 673 |
Feature activation | -8.75e-01 |
Pos logit contributions | |
ge | +0.005 |
Twe | +0.004 |
obby | +0.004 |
shadow | +0.004 |
OO | +0.004 |
Neg logit contributions | |
pretend | -0.001 |
excited | -0.001 |
see | -0.001 |
cheer | -0.001 |
ends | -0.001 |
Token | accidentally |
Token ID | 14716 |
Feature activation | -2.48e+00 |
Pos logit contributions | |
ge | +0.027 |
Twe | +0.026 |
Wil | +0.025 |
obby | +0.024 |
Timothy | +0.024 |
Neg logit contributions | |
pretend | -0.010 |
ends | -0.010 |
e | -0.010 |
cheer | -0.010 |
see | -0.009 |
Token | dropped |
Token ID | 5710 |
Feature activation | -1.05e+00 |
Pos logit contributions | |
Twe | +0.067 |
ge | +0.065 |
obby | +0.064 |
Wil | +0.061 |
way | +0.058 |
Neg logit contributions | |
pretend | -0.043 |
e | -0.039 |
see | -0.037 |
ends | -0.036 |
drawn | -0.036 |
Token | her |
Token ID | 607 |
Feature activation | -2.89e-01 |
Pos logit contributions | |
ge | +0.035 |
Twe | +0.034 |
obby | +0.034 |
OO | +0.033 |
Wil | +0.032 |
Neg logit contributions | |
cheer | -0.009 |
pretend | -0.009 |
sill | -0.008 |
ends | -0.008 |
wonder | -0.007 |
Token | spoon |
Token ID | 24556 |
Feature activation | -6.09e-01 |
Pos logit contributions | |
ge | +0.008 |
Twe | +0.008 |
Wil | +0.007 |
gie | +0.007 |
shadow | +0.007 |
Neg logit contributions | |
pretend | -0.005 |
ends | -0.004 |
cheer | -0.004 |
excited | -0.004 |
sill | -0.004 |
Token | . |
Token ID | 13 |
Feature activation | +9.97e-02 |
Pos logit contributions | |
Twe | +0.018 |
ge | +0.017 |
obby | +0.017 |
Wil | +0.016 |
OO | +0.016 |
Neg logit contributions | |
ends | -0.007 |
cheer | -0.007 |
sill | -0.007 |
pretend | -0.007 |
around | -0.007 |
Token | She |
Token ID | 1375 |
Feature activation | -9.59e-01 |
Pos logit contributions | |
pretend | +0.002 |
cheer | +0.002 |
excited | +0.001 |
ends | +0.001 |
sill | +0.001 |
Neg logit contributions | |
ge | -0.003 |
Twe | -0.002 |
obby | -0.002 |
Wil | -0.002 |
shadow | -0.002 |
Token | his |
Token ID | 465 |
Feature activation | -1.58e-01 |
Pos logit contributions | |
ge | +0.008 |
Twe | +0.007 |
Wil | +0.007 |
badly | +0.007 |
Twe | +0.006 |
Neg logit contributions | |
ends | -0.010 |
cheer | -0.009 |
wonder | -0.008 |
pretend | -0.008 |
believe | -0.008 |
Token | toy |
Token ID | 13373 |
Feature activation | -7.81e-02 |
Pos logit contributions | |
ge | +0.003 |
Twe | +0.003 |
Wil | +0.003 |
obby | +0.003 |
gie | +0.003 |
Neg logit contributions | |
cheer | -0.003 |
ends | -0.003 |
e | -0.003 |
wonder | -0.003 |
sill | -0.003 |
Token | truck |
Token ID | 7779 |
Feature activation | -4.99e-01 |
Pos logit contributions | |
ge | +0.002 |
Twe | +0.001 |
Wil | +0.001 |
obby | +0.001 |
OO | +0.001 |
Neg logit contributions | |
ends | -0.002 |
cheer | -0.002 |
pretend | -0.002 |
wonder | -0.001 |
sill | -0.001 |
Token | when |
Token ID | 618 |
Feature activation | -3.19e-01 |
Pos logit contributions | |
ge | +0.014 |
Twe | +0.013 |
Wil | +0.013 |
badly | +0.013 |
obby | +0.013 |
Neg logit contributions | |
ends | -0.007 |
cheer | -0.007 |
pretend | -0.006 |
wonder | -0.006 |
sill | -0.005 |
Token | he |
Token ID | 339 |
Feature activation | -6.01e-01 |
Pos logit contributions | |
ge | +0.009 |
Twe | +0.009 |
obby | +0.009 |
OO | +0.009 |
shadow | +0.009 |
Neg logit contributions | |
pretend | -0.005 |
ends | -0.004 |
excited | -0.004 |
see | -0.004 |
cheer | -0.004 |
Token | accidentally |
Token ID | 14716 |
Feature activation | -2.47e+00 |
Pos logit contributions | |
ge | +0.012 |
Twe | +0.011 |
Wil | +0.011 |
obby | +0.011 |
OO | +0.010 |
Neg logit contributions | |
ends | -0.013 |
cheer | -0.013 |
pretend | -0.012 |
sill | -0.011 |
wonder | -0.011 |
Token | twisted |
Token ID | 19074 |
Feature activation | -9.12e-01 |
Pos logit contributions | |
Twe | +0.064 |
obby | +0.064 |
ge | +0.063 |
Wil | +0.060 |
etsy | +0.057 |
Neg logit contributions | |
pretend | -0.045 |
ends | -0.039 |
cheer | -0.038 |
e | -0.038 |
see | -0.037 |
Token | the |
Token ID | 262 |
Feature activation | -2.28e-01 |
Pos logit contributions | |
Twe | +0.027 |
ge | +0.026 |
obby | +0.026 |
Wil | +0.025 |
etsy | +0.025 |
Neg logit contributions | |
pretend | -0.011 |
ends | -0.011 |
cheer | -0.011 |
around | -0.010 |
like | -0.009 |
Token | wheels |
Token ID | 13666 |
Feature activation | -3.77e-01 |
Pos logit contributions | |
ge | +0.006 |
Twe | +0.006 |
obby | +0.006 |
shadow | +0.006 |
etsy | +0.006 |
Neg logit contributions | |
pretend | -0.004 |
cheer | -0.004 |
sill | -0.003 |
ends | -0.003 |
like | -0.003 |
Token | too |
Token ID | 1165 |
Feature activation | +8.32e-01 |
Pos logit contributions | |
Twe | +0.010 |
ge | +0.010 |
obby | +0.010 |
Wil | +0.010 |
OO | +0.009 |
Neg logit contributions | |
ends | -0.005 |
cheer | -0.005 |
pretend | -0.005 |
sill | -0.005 |
e | -0.004 |
Token | hard |
Token ID | 1327 |
Feature activation | +3.79e-01 |
Pos logit contributions | |
ends | +0.013 |
cheer | +0.013 |
pretend | +0.012 |
excited | +0.011 |
wonder | +0.011 |
Neg logit contributions | |
ge | -0.021 |
Twe | -0.021 |
obby | -0.020 |
Wil | -0.020 |
gie | -0.019 |
Token | on |
Token ID | 319 |
Feature activation | -4.52e-01 |
Pos logit contributions | |
ge | +0.034 |
badly | +0.030 |
Twe | +0.030 |
Wil | +0.029 |
OO | +0.028 |
Neg logit contributions | |
ends | -0.023 |
cheer | -0.023 |
wonder | -0.021 |
amaz | -0.020 |
pretend | -0.020 |
Token | the |
Token ID | 262 |
Feature activation | +4.08e-01 |
Pos logit contributions | |
ge | +0.012 |
Twe | +0.012 |
obby | +0.011 |
OO | +0.011 |
Wil | +0.011 |
Neg logit contributions | |
pretend | -0.007 |
cheer | -0.007 |
ends | -0.006 |
e | -0.006 |
wonder | -0.006 |
Token | table |
Token ID | 3084 |
Feature activation | +2.59e-02 |
Pos logit contributions | |
ends | +0.009 |
cheer | +0.009 |
pretend | +0.009 |
wonder | +0.008 |
sill | +0.008 |
Neg logit contributions | |
ge | -0.008 |
Twe | -0.007 |
obby | -0.007 |
Wil | -0.007 |
OO | -0.006 |
Token | when |
Token ID | 618 |
Feature activation | -3.39e-01 |
Pos logit contributions | |
ends | +0.001 |
cheer | +0.001 |
pretend | +0.000 |
wonder | +0.000 |
reporters | +0.000 |
Neg logit contributions | |
ge | -0.001 |
badly | -0.000 |
Twe | -0.000 |
obby | -0.000 |
OO | -0.000 |
Token | he |
Token ID | 339 |
Feature activation | -6.05e-01 |
Pos logit contributions | |
ge | +0.010 |
OO | +0.010 |
obby | +0.010 |
Twe | +0.010 |
shadow | +0.010 |
Neg logit contributions | |
pretend | -0.004 |
see | -0.003 |
excited | -0.003 |
cheer | -0.003 |
ends | -0.003 |
Token | accidentally |
Token ID | 14716 |
Feature activation | -2.46e+00 |
Pos logit contributions | |
ge | +0.011 |
Twe | +0.011 |
Wil | +0.010 |
obby | +0.010 |
OO | +0.010 |
Neg logit contributions | |
ends | -0.014 |
cheer | -0.014 |
pretend | -0.013 |
e | -0.012 |
wonder | -0.012 |
Token | knocked |
Token ID | 13642 |
Feature activation | -9.41e-01 |
Pos logit contributions | |
Twe | +0.066 |
obby | +0.066 |
ge | +0.064 |
Wil | +0.061 |
etsy | +0.059 |
Neg logit contributions | |
pretend | -0.043 |
ends | -0.037 |
e | -0.037 |
see | -0.036 |
cheer | -0.036 |
Token | over |
Token ID | 625 |
Feature activation | -3.25e-01 |
Pos logit contributions | |
Twe | +0.036 |
ge | +0.034 |
obby | +0.033 |
OO | +0.033 |
etsy | +0.033 |
Neg logit contributions | |
around | -0.005 |
cheer | -0.004 |
forward | -0.003 |
pretend | -0.003 |
like | -0.003 |
Token | a |
Token ID | 257 |
Feature activation | -2.75e-01 |
Pos logit contributions | |
OO | +0.011 |
ge | +0.011 |
Twe | +0.011 |
shadow | +0.011 |
obby | +0.011 |
Neg logit contributions | |
pretend | -0.003 |
cheer | -0.003 |
excited | -0.003 |
wonder | -0.002 |
like | -0.002 |
Token | pin |
Token ID | 6757 |
Feature activation | +1.08e-01 |
Pos logit contributions | |
ge | +0.008 |
Twe | +0.007 |
iously | +0.007 |
Wil | +0.007 |
obby | +0.007 |
Neg logit contributions | |
cheer | -0.005 |
pretend | -0.004 |
ends | -0.004 |
wonder | -0.003 |
see | -0.003 |
Token | . |
Token ID | 13 |
Feature activation | -1.80e-02 |
Pos logit contributions | |
cheer | +0.001 |
ends | +0.001 |
pretend | +0.001 |
e | +0.001 |
sill | +0.001 |
Neg logit contributions | |
ge | -0.003 |
obby | -0.003 |
Twe | -0.003 |
Wil | -0.003 |
Frankie | -0.003 |
Token | with |
Token ID | 351 |
Feature activation | -8.22e-02 |
Pos logit contributions | |
badly | +0.003 |
Bun | +0.003 |
Twe | +0.003 |
Fe | +0.003 |
ge | +0.003 |
Neg logit contributions | |
amaz | -0.003 |
ends | -0.003 |
sill | -0.003 |
cheer | -0.003 |
pter | -0.003 |
Token | her |
Token ID | 607 |
Feature activation | +4.97e-01 |
Pos logit contributions | |
Bun | +0.001 |
Twe | +0.001 |
Twe | +0.001 |
Bobby | +0.001 |
ge | +0.001 |
Neg logit contributions | |
ends | -0.002 |
cheer | -0.002 |
believe | -0.002 |
ibrarian | -0.002 |
forward | -0.002 |
Token | toys |
Token ID | 14958 |
Feature activation | -7.07e-01 |
Pos logit contributions | |
cheer | +0.010 |
ends | +0.010 |
pretend | +0.009 |
sill | +0.009 |
wonder | +0.009 |
Neg logit contributions | |
ge | -0.010 |
Twe | -0.010 |
Wil | -0.009 |
badly | -0.009 |
obby | -0.009 |
Token | when |
Token ID | 618 |
Feature activation | -3.68e-01 |
Pos logit contributions | |
ge | +0.016 |
badly | +0.015 |
Twe | +0.014 |
way | +0.014 |
OO | +0.014 |
Neg logit contributions | |
cheer | -0.013 |
ends | -0.013 |
amaz | -0.012 |
wonder | -0.011 |
sill | -0.011 |
Token | she |
Token ID | 673 |
Feature activation | -5.39e-01 |
Pos logit contributions | |
ge | +0.012 |
Twe | +0.011 |
obby | +0.011 |
Wil | +0.011 |
OO | +0.011 |
Neg logit contributions | |
pretend | -0.004 |
ends | -0.004 |
cheer | -0.003 |
excited | -0.003 |
see | -0.003 |
Token | accidentally |
Token ID | 14716 |
Feature activation | -2.46e+00 |
Pos logit contributions | |
ge | +0.013 |
Twe | +0.012 |
Wil | +0.011 |
obby | +0.011 |
OO | +0.011 |
Neg logit contributions | |
ends | -0.010 |
cheer | -0.010 |
pretend | -0.010 |
e | -0.009 |
wonder | -0.009 |
Token | got |
Token ID | 1392 |
Feature activation | -7.41e-01 |
Pos logit contributions | |
ge | +0.061 |
obby | +0.061 |
Twe | +0.059 |
Wil | +0.057 |
way | +0.053 |
Neg logit contributions | |
pretend | -0.047 |
ends | -0.042 |
e | -0.041 |
cheer | -0.041 |
see | -0.040 |
Token | her |
Token ID | 607 |
Feature activation | -8.57e-03 |
Pos logit contributions | |
ge | +0.021 |
Twe | +0.021 |
obby | +0.020 |
OO | +0.020 |
Wil | +0.020 |
Neg logit contributions | |
ends | -0.010 |
cheer | -0.010 |
pretend | -0.010 |
excited | -0.009 |
e | -0.008 |
Token | finger |
Token ID | 7660 |
Feature activation | -9.98e-01 |
Pos logit contributions | |
ge | +0.000 |
Twe | +0.000 |
Wil | +0.000 |
obby | +0.000 |
shadow | +0.000 |
Neg logit contributions | |
pretend | -0.000 |
ends | -0.000 |
cheer | -0.000 |
sill | -0.000 |
excited | -0.000 |
Token | stuck |
Token ID | 7819 |
Feature activation | -9.15e-01 |
Pos logit contributions | |
obby | +0.031 |
Twe | +0.030 |
Wil | +0.029 |
ge | +0.028 |
Timothy | +0.028 |
Neg logit contributions | |
pretend | -0.011 |
cheer | -0.011 |
around | -0.011 |
like | -0.011 |
ends | -0.011 |
Token | in |
Token ID | 287 |
Feature activation | -4.52e-01 |
Pos logit contributions | |
Twe | +0.033 |
obby | +0.031 |
Wil | +0.031 |
Dogs | +0.030 |
Fe | +0.030 |
Neg logit contributions | |
around | -0.007 |
like | -0.006 |
ends | -0.005 |
pretend | -0.005 |
e | -0.005 |
Token | Tom |
Token ID | 4186 |
Feature activation | +8.89e-02 |
Pos logit contributions | |
ge | +0.001 |
Twe | +0.001 |
Wil | +0.001 |
obby | +0.001 |
badly | +0.001 |
Neg logit contributions | |
ends | -0.001 |
cheer | -0.001 |
pretend | -0.001 |
wonder | -0.001 |
sill | -0.001 |
Token | . |
Token ID | 13 |
Feature activation | +3.09e-01 |
Pos logit contributions | |
ends | +0.001 |
cheer | +0.001 |
pretend | +0.001 |
e | +0.001 |
excited | +0.001 |
Neg logit contributions | |
Twe | -0.003 |
ge | -0.002 |
Wil | -0.002 |
obby | -0.002 |
OO | -0.002 |
Token |
|
Token ID | 198 |
Feature activation | -1.33e-01 |
Pos logit contributions | |
ends | +0.005 |
cheer | +0.004 |
pretend | +0.004 |
excited | +0.004 |
see | +0.004 |
Neg logit contributions | |
ge | -0.009 |
Twe | -0.008 |
badly | -0.008 |
Wil | -0.008 |
obby | -0.008 |
Token |
|
Token ID | 198 |
Feature activation | -1.99e-01 |
Pos logit contributions | |
ge | +0.003 |
Twe | +0.003 |
Wil | +0.003 |
obby | +0.003 |
OO | +0.003 |
Neg logit contributions | |
ends | -0.002 |
cheer | -0.002 |
pretend | -0.002 |
wonder | -0.002 |
e | -0.002 |
Token | One |
Token ID | 3198 |
Feature activation | +6.40e-01 |
Pos logit contributions | |
ge | +0.004 |
Twe | +0.004 |
Wil | +0.003 |
Fe | +0.003 |
OO | +0.003 |
Neg logit contributions | |
cheer | -0.004 |
ends | -0.004 |
pretend | -0.004 |
wonder | -0.004 |
sill | -0.004 |
Token | toy |
Token ID | 13373 |
Feature activation | +2.03e+00 |
Pos logit contributions | |
ends | +0.010 |
cheer | +0.010 |
pretend | +0.009 |
wonder | +0.008 |
sill | +0.008 |
Neg logit contributions | |
ge | -0.017 |
Twe | -0.016 |
obby | -0.016 |
Wil | -0.015 |
OO | -0.015 |
Token | , |
Token ID | 11 |
Feature activation | +8.32e-01 |
Pos logit contributions | |
ends | +0.037 |
cheer | +0.036 |
pretend | +0.034 |
wonder | +0.031 |
sill | +0.031 |
Neg logit contributions | |
ge | -0.048 |
Twe | -0.046 |
Wil | -0.043 |
obby | -0.043 |
OO | -0.042 |
Token | a |
Token ID | 257 |
Feature activation | -3.09e-01 |
Pos logit contributions | |
ends | +0.012 |
pretend | +0.011 |
cheer | +0.011 |
excited | +0.009 |
wonder | +0.009 |
Neg logit contributions | |
ge | -0.024 |
Twe | -0.023 |
obby | -0.022 |
Wil | -0.022 |
OO | -0.022 |
Token | t |
Token ID | 256 |
Feature activation | +9.45e-01 |
Pos logit contributions | |
ge | +0.007 |
Twe | +0.007 |
obby | +0.006 |
Wil | +0.006 |
OO | +0.006 |
Neg logit contributions | |
cheer | -0.006 |
ends | -0.006 |
pretend | -0.006 |
wonder | -0.005 |
excited | -0.005 |
Token | eddy |
Token ID | 21874 |
Feature activation | +1.52e-01 |
Pos logit contributions | |
cheer | +0.027 |
ends | +0.027 |
pretend | +0.026 |
wonder | +0.024 |
excited | +0.024 |
Neg logit contributions | |
ge | -0.013 |
Twe | -0.012 |
obby | -0.012 |
Wil | -0.011 |
badly | -0.011 |
Token | bear |
Token ID | 6842 |
Feature activation | +5.61e-02 |
Pos logit contributions | |
pretend | +0.003 |
cheer | +0.003 |
e | +0.002 |
ends | +0.002 |
excited | +0.002 |
Neg logit contributions | |
ge | -0.004 |
Twe | -0.004 |
obby | -0.004 |
Wil | -0.004 |
Fe | -0.004 |
Token | worked |
Token ID | 3111 |
Feature activation | +4.48e-01 |
Pos logit contributions | |
cheer | +0.005 |
ends | +0.005 |
pretend | +0.005 |
wonder | +0.005 |
e | +0.005 |
Neg logit contributions | |
ge | -0.007 |
Twe | -0.007 |
obby | -0.006 |
Wil | -0.006 |
OO | -0.006 |
Token | . |
Token ID | 13 |
Feature activation | -7.76e-02 |
Pos logit contributions | |
ends | +0.006 |
pretend | +0.006 |
cheer | +0.006 |
like | +0.006 |
excited | +0.006 |
Neg logit contributions | |
Twe | -0.012 |
Wil | -0.012 |
ge | -0.011 |
OO | -0.011 |
gie | -0.011 |
Token | He |
Token ID | 679 |
Feature activation | -3.85e-01 |
Pos logit contributions | |
ge | +0.002 |
Twe | +0.002 |
OO | +0.002 |
ety | +0.002 |
badly | +0.002 |
Neg logit contributions | |
ends | -0.001 |
excited | -0.001 |
cheer | -0.001 |
sill | -0.001 |
wonder | -0.001 |
Token | smiled |
Token ID | 13541 |
Feature activation | +6.95e-01 |
Pos logit contributions | |
ge | +0.009 |
Twe | +0.009 |
Wil | +0.008 |
obby | +0.008 |
OO | +0.008 |
Neg logit contributions | |
ends | -0.007 |
cheer | -0.007 |
pretend | -0.007 |
wonder | -0.006 |
e | -0.006 |
Token | in |
Token ID | 287 |
Feature activation | +5.10e-02 |
Pos logit contributions | |
ends | +0.011 |
cheer | +0.011 |
pretend | +0.010 |
e | +0.010 |
reporters | +0.009 |
Neg logit contributions | |
ge | -0.017 |
Twe | -0.016 |
obby | -0.016 |
badly | -0.016 |
Wil | -0.016 |
Token | amaz |
Token ID | 40642 |
Feature activation | +3.14e+00 |
Pos logit contributions | |
ends | +0.001 |
cheer | +0.000 |
pretend | +0.000 |
reporters | +0.000 |
e | +0.000 |
Neg logit contributions | |
ge | -0.002 |
Twe | -0.001 |
Wil | -0.001 |
obby | -0.001 |
badly | -0.001 |
Token | ement |
Token ID | 972 |
Feature activation | +1.84e-01 |
Pos logit contributions | |
ends | +0.079 |
cheer | +0.075 |
pretend | +0.073 |
reporters | +0.067 |
wonder | +0.066 |
Neg logit contributions | |
ge | -0.055 |
Twe | -0.050 |
obby | -0.048 |
badly | -0.048 |
Wil | -0.047 |
Token | and |
Token ID | 290 |
Feature activation | +5.88e-01 |
Pos logit contributions | |
ends | +0.002 |
cheer | +0.002 |
pretend | +0.002 |
excited | +0.002 |
sill | +0.002 |
Neg logit contributions | |
Twe | -0.006 |
Wil | -0.005 |
ge | -0.005 |
OO | -0.005 |
Fe | -0.005 |
Token | felt |
Token ID | 2936 |
Feature activation | +3.09e-01 |
Pos logit contributions | |
ends | +0.011 |
cheer | +0.010 |
pretend | +0.010 |
e | +0.009 |
wonder | +0.009 |
Neg logit contributions | |
ge | -0.014 |
Twe | -0.013 |
Wil | -0.013 |
obby | -0.012 |
badly | -0.012 |
Token | better |
Token ID | 1365 |
Feature activation | +8.03e-01 |
Pos logit contributions | |
ends | +0.005 |
reporters | +0.004 |
pretend | +0.004 |
ibrarian | +0.004 |
cheer | +0.004 |
Neg logit contributions | |
badly | -0.008 |
ge | -0.007 |
Twe | -0.007 |
Bobby | -0.007 |
Twe | -0.007 |
Token | . |
Token ID | 13 |
Feature activation | -2.73e-01 |
Pos logit contributions | |
ends | +0.016 |
cheer | +0.016 |
pretend | +0.015 |
e | +0.014 |
reporters | +0.014 |
Neg logit contributions | |
ge | -0.018 |
badly | -0.016 |
Twe | -0.015 |
obby | -0.015 |
Wil | -0.015 |
Token | He |
Token ID | 679 |
Feature activation | -6.98e-01 |
Pos logit contributions | |
ge | +0.007 |
Twe | +0.006 |
badly | +0.006 |
OO | +0.006 |
poisonous | +0.006 |
Neg logit contributions | |
ends | -0.003 |
sill | -0.003 |
excited | -0.003 |
cheer | -0.003 |
see | -0.003 |
Token | was |
Token ID | 373 |
Feature activation | +2.13e-01 |
Pos logit contributions | |
ge | +0.018 |
Twe | +0.018 |
obby | +0.017 |
Wil | +0.017 |
OO | +0.017 |
Neg logit contributions | |
pretend | -0.011 |
cheer | -0.011 |
ends | -0.011 |
excited | -0.010 |
wonder | -0.010 |
Token | amazed |
Token ID | 24562 |
Feature activation | +1.17e+00 |
Pos logit contributions | |
ends | +0.003 |
cheer | +0.003 |
pretend | +0.003 |
reporters | +0.003 |
wonder | +0.003 |
Neg logit contributions | |
ge | -0.005 |
Twe | -0.005 |
Wil | -0.005 |
badly | -0.005 |
obby | -0.005 |
Token | at |
Token ID | 379 |
Feature activation | -2.95e-01 |
Pos logit contributions | |
ends | +0.018 |
cheer | +0.017 |
pretend | +0.017 |
wonder | +0.015 |
sill | +0.014 |
Neg logit contributions | |
ge | -0.031 |
Twe | -0.030 |
Wil | -0.029 |
obby | -0.028 |
OO | -0.028 |
Token | how |
Token ID | 703 |
Feature activation | -6.87e-01 |
Pos logit contributions | |
Twe | +0.010 |
OO | +0.009 |
EEP | +0.009 |
Wil | +0.009 |
obby | +0.009 |
Neg logit contributions | |
cheer | -0.003 |
pretend | -0.003 |
around | -0.003 |
ends | -0.003 |
wonder | -0.003 |
Token | shiny |
Token ID | 22441 |
Feature activation | +2.06e+00 |
Pos logit contributions | |
ge | +0.022 |
Twe | +0.022 |
OO | +0.022 |
gie | +0.021 |
Johnson | +0.021 |
Neg logit contributions | |
excited | -0.009 |
cheer | -0.008 |
happy | -0.007 |
proud | -0.007 |
wonder | -0.007 |
Token | it |
Token ID | 340 |
Feature activation | +2.32e-01 |
Pos logit contributions | |
pretend | +0.026 |
cheer | +0.024 |
like | +0.022 |
ends | +0.021 |
around | +0.021 |
Neg logit contributions | |
ge | -0.061 |
obby | -0.060 |
Twe | -0.059 |
OO | -0.058 |
Wil | -0.057 |
Token | was |
Token ID | 373 |
Feature activation | -8.82e-02 |
Pos logit contributions | |
ends | +0.004 |
cheer | +0.004 |
pretend | +0.004 |
wonder | +0.004 |
sill | +0.004 |
Neg logit contributions | |
ge | -0.005 |
Twe | -0.005 |
obby | -0.005 |
Wil | -0.005 |
OO | -0.005 |
Token | ! |
Token ID | 0 |
Feature activation | -4.60e-02 |
Pos logit contributions | |
Twe | +0.003 |
obby | +0.003 |
Wil | +0.002 |
ge | +0.002 |
OO | +0.002 |
Neg logit contributions | |
like | -0.001 |
pretend | -0.001 |
cheer | -0.001 |
ends | -0.001 |
excited | -0.001 |
Token |
|
Token ID | 198 |
Feature activation | -1.72e-01 |
Pos logit contributions | |
ge | +0.002 |
OO | +0.001 |
badly | +0.001 |
poisonous | +0.001 |
rated | +0.001 |
Neg logit contributions | |
ends | -0.000 |
excited | -0.000 |
sill | -0.000 |
cheer | -0.000 |
see | -0.000 |
Token |
|
Token ID | 198 |
Feature activation | -1.41e-01 |
Pos logit contributions | |
ge | +0.005 |
Twe | +0.005 |
OO | +0.005 |
gie | +0.005 |
iously | +0.004 |
Neg logit contributions | |
ends | -0.002 |
cheer | -0.002 |
pretend | -0.002 |
excited | -0.002 |
see | -0.002 |
Token | at |
Token ID | 379 |
Feature activation | +1.56e-01 |
Pos logit contributions | |
ends | +0.006 |
cheer | +0.006 |
e | +0.006 |
pretend | +0.005 |
reporters | +0.005 |
Neg logit contributions | |
ge | -0.012 |
badly | -0.012 |
Twe | -0.011 |
obby | -0.011 |
Bobby | -0.011 |
Token | the |
Token ID | 262 |
Feature activation | -3.71e-02 |
Pos logit contributions | |
cheer | +0.002 |
pretend | +0.002 |
wonder | +0.002 |
ends | +0.002 |
sill | +0.002 |
Neg logit contributions | |
ge | -0.004 |
OO | -0.004 |
Twe | -0.004 |
obby | -0.004 |
Wil | -0.004 |
Token | bright |
Token ID | 6016 |
Feature activation | +1.31e+00 |
Pos logit contributions | |
ge | +0.001 |
Twe | +0.001 |
obby | +0.001 |
Wil | +0.001 |
OO | +0.001 |
Neg logit contributions | |
cheer | -0.001 |
pretend | -0.001 |
ends | -0.001 |
sill | -0.001 |
wonder | -0.001 |
Token | colours |
Token ID | 18915 |
Feature activation | +5.06e-01 |
Pos logit contributions | |
cheer | +0.021 |
pretend | +0.018 |
wonder | +0.017 |
sill | +0.017 |
circles | +0.016 |
Neg logit contributions | |
ge | -0.037 |
Twe | -0.037 |
Wil | -0.035 |
obby | -0.035 |
shadow | -0.034 |
Token | with |
Token ID | 351 |
Feature activation | -4.16e-02 |
Pos logit contributions | |
ends | +0.009 |
cheer | +0.008 |
pretend | +0.008 |
reporters | +0.007 |
e | +0.007 |
Neg logit contributions | |
ge | -0.013 |
Twe | -0.011 |
badly | -0.011 |
Wil | -0.011 |
obby | -0.011 |
Token | amaz |
Token ID | 40642 |
Feature activation | +3.42e+00 |
Pos logit contributions | |
ge | +0.001 |
Twe | +0.001 |
Wil | +0.001 |
badly | +0.001 |
obby | +0.001 |
Neg logit contributions | |
ends | -0.001 |
cheer | -0.001 |
pretend | -0.001 |
reporters | -0.001 |
e | -0.001 |
Token | ement |
Token ID | 972 |
Feature activation | +6.22e-01 |
Pos logit contributions | |
ends | +0.082 |
cheer | +0.078 |
pretend | +0.076 |
wonder | +0.070 |
e | +0.069 |
Neg logit contributions | |
ge | -0.063 |
Twe | -0.058 |
obby | -0.055 |
Wil | -0.055 |
badly | -0.055 |
Token | and |
Token ID | 290 |
Feature activation | +3.82e-02 |
Pos logit contributions | |
ends | +0.009 |
cheer | +0.009 |
pretend | +0.009 |
excited | +0.007 |
sill | +0.007 |
Neg logit contributions | |
Twe | -0.017 |
ge | -0.016 |
Wil | -0.016 |
obby | -0.016 |
OO | -0.015 |
Token | awe |
Token ID | 25030 |
Feature activation | +9.83e-01 |
Pos logit contributions | |
ends | +0.001 |
e | +0.001 |
cheer | +0.001 |
pretend | +0.001 |
reporters | +0.001 |
Neg logit contributions | |
ge | -0.001 |
Twe | -0.001 |
Bobby | -0.001 |
badly | -0.001 |
Wil | -0.001 |
Token | . |
Token ID | 13 |
Feature activation | -1.39e-01 |
Pos logit contributions | |
cheer | +0.015 |
ends | +0.015 |
pretend | +0.014 |
excited | +0.013 |
sill | +0.013 |
Neg logit contributions | |
Twe | -0.026 |
ge | -0.025 |
Wil | -0.024 |
obby | -0.024 |
OO | -0.024 |
Token | He |
Token ID | 679 |
Feature activation | -8.33e-01 |
Pos logit contributions | |
ge | +0.005 |
OO | +0.004 |
badly | +0.004 |
rated | +0.004 |
Twe | +0.004 |
Neg logit contributions | |
ends | -0.002 |
excited | -0.002 |
cheer | -0.002 |
sill | -0.001 |
see | -0.001 |
Token | they |
Token ID | 484 |
Feature activation | +7.62e-02 |
Pos logit contributions | |
ends | +0.005 |
cheer | +0.005 |
pretend | +0.005 |
sill | +0.004 |
wonder | +0.004 |
Neg logit contributions | |
ge | -0.009 |
Twe | -0.008 |
Wil | -0.008 |
obby | -0.008 |
badly | -0.008 |
Token | enjoyed |
Token ID | 8359 |
Feature activation | +3.22e-01 |
Pos logit contributions | |
ends | +0.002 |
cheer | +0.002 |
sill | +0.001 |
pretend | +0.001 |
e | +0.001 |
Neg logit contributions | |
ge | -0.002 |
Twe | -0.001 |
Wil | -0.001 |
obby | -0.001 |
Wil | -0.001 |
Token | the |
Token ID | 262 |
Feature activation | -4.12e-02 |
Pos logit contributions | |
ends | +0.006 |
cheer | +0.006 |
sill | +0.005 |
pretend | +0.005 |
ibrarian | +0.005 |
Neg logit contributions | |
ge | -0.007 |
Twe | -0.006 |
Wil | -0.006 |
badly | -0.006 |
Wil | -0.006 |
Token | sights |
Token ID | 21343 |
Feature activation | +6.87e-01 |
Pos logit contributions | |
ge | +0.001 |
Twe | +0.001 |
Frankie | +0.001 |
Wil | +0.001 |
way | +0.001 |
Neg logit contributions | |
ends | -0.001 |
cheer | -0.001 |
e | -0.001 |
pretend | -0.001 |
pler | -0.001 |
Token | and |
Token ID | 290 |
Feature activation | -1.42e-01 |
Pos logit contributions | |
ends | +0.013 |
cheer | +0.011 |
pretend | +0.011 |
e | +0.010 |
wonder | +0.010 |
Neg logit contributions | |
ge | -0.016 |
Twe | -0.015 |
badly | -0.014 |
Wil | -0.014 |
obby | -0.014 |
Token | sounds |
Token ID | 5238 |
Feature activation | +2.04e+00 |
Pos logit contributions | |
ge | +0.003 |
Twe | +0.003 |
Twe | +0.003 |
Bobby | +0.003 |
Wil | +0.003 |
Neg logit contributions | |
ends | -0.003 |
cheer | -0.002 |
ibrarian | -0.002 |
igan | -0.002 |
e | -0.002 |
Token | of |
Token ID | 286 |
Feature activation | +8.07e-01 |
Pos logit contributions | |
ends | +0.043 |
cheer | +0.040 |
pretend | +0.038 |
e | +0.038 |
sill | +0.037 |
Neg logit contributions | |
ge | -0.041 |
Twe | -0.038 |
badly | -0.036 |
Wil | -0.036 |
obby | -0.036 |
Token | their |
Token ID | 511 |
Feature activation | +4.58e-01 |
Pos logit contributions | |
ends | +0.013 |
cheer | +0.013 |
pretend | +0.012 |
wonder | +0.011 |
e | +0.011 |
Neg logit contributions | |
ge | -0.021 |
Twe | -0.020 |
Wil | -0.019 |
obby | -0.019 |
OO | -0.018 |
Token | new |
Token ID | 649 |
Feature activation | +3.29e-01 |
Pos logit contributions | |
ends | +0.009 |
cheer | +0.008 |
pretend | +0.008 |
sill | +0.007 |
e | +0.007 |
Neg logit contributions | |
ge | -0.010 |
Twe | -0.010 |
obby | -0.009 |
Wil | -0.009 |
badly | -0.009 |
Token | home |
Token ID | 1363 |
Feature activation | -6.15e-02 |
Pos logit contributions | |
ends | +0.006 |
cheer | +0.005 |
pretend | +0.005 |
e | +0.005 |
sill | +0.005 |
Neg logit contributions | |
ge | -0.008 |
Twe | -0.008 |
obby | -0.007 |
Wil | -0.007 |
badly | -0.007 |
Token | . |
Token ID | 13 |
Feature activation | -5.66e-01 |
Pos logit contributions | |
ge | +0.001 |
Twe | +0.001 |
Wil | +0.001 |
obby | +0.001 |
OO | +0.001 |
Neg logit contributions | |
ends | -0.001 |
cheer | -0.001 |
pretend | -0.001 |
wonder | -0.001 |
e | -0.001 |
Token | but |
Token ID | 475 |
Feature activation | +3.51e-01 |
Pos logit contributions | |
cheer | +0.001 |
ends | +0.001 |
pretend | +0.001 |
excited | +0.001 |
like | +0.001 |
Neg logit contributions | |
Twe | -0.004 |
ge | -0.004 |
Wil | -0.004 |
OO | -0.003 |
obby | -0.003 |
Token | ob |
Token ID | 909 |
Feature activation | -6.76e-01 |
Pos logit contributions | |
ends | +0.007 |
cheer | +0.006 |
pretend | +0.006 |
wonder | +0.006 |
excited | +0.005 |
Neg logit contributions | |
ge | -0.008 |
Twe | -0.008 |
Wil | -0.008 |
obby | -0.008 |
OO | -0.007 |
Token | eyed |
Token ID | 18834 |
Feature activation | -5.24e-01 |
Pos logit contributions | |
ge | +0.021 |
OO | +0.020 |
Twe | +0.020 |
Wil | +0.020 |
obby | +0.020 |
Neg logit contributions | |
cheer | -0.007 |
ends | -0.007 |
pretend | -0.006 |
e | -0.005 |
excited | -0.005 |
Token | his |
Token ID | 465 |
Feature activation | -1.31e-01 |
Pos logit contributions | |
Twe | +0.017 |
ge | +0.016 |
Wil | +0.016 |
OO | +0.016 |
obby | +0.015 |
Neg logit contributions | |
cheer | -0.006 |
pretend | -0.006 |
excited | -0.005 |
see | -0.005 |
sill | -0.004 |
Token | mom |
Token ID | 1995 |
Feature activation | +1.45e-01 |
Pos logit contributions | |
Twe | +0.004 |
ge | +0.004 |
Wil | +0.003 |
gie | +0.003 |
OO | +0.003 |
Neg logit contributions | |
cheer | -0.002 |
ends | -0.002 |
pretend | -0.002 |
wonder | -0.002 |
excited | -0.002 |
Token | and |
Token ID | 290 |
Feature activation | +9.36e-01 |
Pos logit contributions | |
cheer | +0.003 |
ends | +0.002 |
pretend | +0.002 |
sill | +0.002 |
e | +0.002 |
Neg logit contributions | |
Twe | -0.004 |
ge | -0.003 |
Wil | -0.003 |
OO | -0.003 |
obby | -0.003 |
Token | put |
Token ID | 1234 |
Feature activation | -5.51e-01 |
Pos logit contributions | |
ends | +0.018 |
cheer | +0.017 |
pretend | +0.017 |
e | +0.015 |
wonder | +0.015 |
Neg logit contributions | |
ge | -0.021 |
Twe | -0.020 |
Wil | -0.019 |
obby | -0.019 |
badly | -0.018 |
Token | away |
Token ID | 1497 |
Feature activation | +3.50e-01 |
Pos logit contributions | |
ge | +0.015 |
Twe | +0.015 |
obby | +0.014 |
Wil | +0.014 |
OO | +0.014 |
Neg logit contributions | |
ends | -0.008 |
cheer | -0.008 |
pretend | -0.008 |
e | -0.007 |
wonder | -0.006 |
Token | his |
Token ID | 465 |
Feature activation | -9.37e-01 |
Pos logit contributions | |
cheer | +0.005 |
pretend | +0.005 |
ends | +0.005 |
sill | +0.004 |
excited | +0.004 |
Neg logit contributions | |
Twe | -0.010 |
ge | -0.010 |
OO | -0.009 |
obby | -0.009 |
Wil | -0.009 |
Token | tiny |
Token ID | 7009 |
Feature activation | -7.85e-01 |
Pos logit contributions | |
ge | +0.023 |
Twe | +0.022 |
Wil | +0.022 |
obby | +0.022 |
OO | +0.021 |
Neg logit contributions | |
pretend | -0.017 |
cheer | -0.017 |
ends | -0.016 |
excited | -0.015 |
sill | -0.015 |
Token | pants |
Token ID | 12581 |
Feature activation | -9.59e-01 |
Pos logit contributions | |
ge | +0.019 |
Twe | +0.019 |
Wil | +0.019 |
OO | +0.018 |
obby | +0.018 |
Neg logit contributions | |
cheer | -0.014 |
pretend | -0.014 |
ends | -0.012 |
sill | -0.012 |
excited | -0.012 |
Token | lamp |
Token ID | 20450 |
Feature activation | -9.70e-02 |
Pos logit contributions | |
pretend | +0.005 |
cheer | +0.005 |
ends | +0.004 |
wonder | +0.004 |
sill | +0.004 |
Neg logit contributions | |
ge | -0.006 |
Twe | -0.006 |
obby | -0.006 |
Wil | -0.006 |
shadow | -0.006 |
Token | . |
Token ID | 13 |
Feature activation | +1.92e-01 |
Pos logit contributions | |
ge | +0.002 |
Twe | +0.002 |
badly | +0.002 |
obby | +0.002 |
Wil | +0.002 |
Neg logit contributions | |
ends | -0.002 |
cheer | -0.002 |
pretend | -0.002 |
wonder | -0.002 |
reporters | -0.002 |
Token | He |
Token ID | 679 |
Feature activation | -5.38e-01 |
Pos logit contributions | |
ends | +0.004 |
cheer | +0.004 |
pretend | +0.004 |
sill | +0.004 |
wonder | +0.004 |
Neg logit contributions | |
ge | -0.004 |
Twe | -0.004 |
OO | -0.004 |
Wil | -0.004 |
obby | -0.004 |
Token | was |
Token ID | 373 |
Feature activation | +3.79e-01 |
Pos logit contributions | |
ge | +0.012 |
Twe | +0.012 |
Wil | +0.011 |
obby | +0.011 |
gie | +0.011 |
Neg logit contributions | |
ends | -0.010 |
cheer | -0.010 |
pretend | -0.009 |
sill | -0.008 |
reporters | -0.008 |
Token | the |
Token ID | 262 |
Feature activation | -3.35e-01 |
Pos logit contributions | |
cheer | +0.007 |
ends | +0.006 |
pretend | +0.006 |
wonder | +0.006 |
amaz | +0.006 |
Neg logit contributions | |
ge | -0.008 |
badly | -0.008 |
Twe | -0.007 |
Wil | -0.007 |
Wil | -0.007 |
Token | one |
Token ID | 530 |
Feature activation | +7.15e-01 |
Pos logit contributions | |
ge | +0.008 |
Twe | +0.008 |
Wil | +0.008 |
badly | +0.008 |
gie | +0.008 |
Neg logit contributions | |
ends | -0.005 |
cheer | -0.005 |
pretend | -0.005 |
e | -0.004 |
wonder | -0.004 |
Token | who |
Token ID | 508 |
Feature activation | +1.14e-01 |
Pos logit contributions | |
ends | +0.013 |
cheer | +0.013 |
pretend | +0.011 |
wonder | +0.011 |
reporters | +0.011 |
Neg logit contributions | |
ge | -0.016 |
badly | -0.015 |
Twe | -0.014 |
obby | -0.014 |
Wil | -0.014 |
Token | made |
Token ID | 925 |
Feature activation | +1.95e-01 |
Pos logit contributions | |
ends | +0.002 |
cheer | +0.002 |
pretend | +0.002 |
e | +0.002 |
wonder | +0.002 |
Neg logit contributions | |
ge | -0.003 |
Twe | -0.003 |
Wil | -0.003 |
obby | -0.003 |
OO | -0.003 |
Token | the |
Token ID | 262 |
Feature activation | -2.37e-01 |
Pos logit contributions | |
ends | +0.003 |
cheer | +0.003 |
pretend | +0.003 |
wonder | +0.003 |
e | +0.003 |
Neg logit contributions | |
ge | -0.005 |
Twe | -0.005 |
Wil | -0.005 |
obby | -0.005 |
OO | -0.004 |
Token | noise |
Token ID | 7838 |
Feature activation | +3.42e-01 |
Pos logit contributions | |
ge | +0.006 |
Twe | +0.006 |
obby | +0.005 |
Wil | +0.005 |
OO | +0.005 |
Neg logit contributions | |
cheer | -0.004 |
pretend | -0.004 |
ends | -0.004 |
sill | -0.004 |
wonder | -0.004 |
Token | and |
Token ID | 290 |
Feature activation | -2.05e-01 |
Pos logit contributions | |
ends | +0.006 |
cheer | +0.006 |
pretend | +0.006 |
wonder | +0.005 |
sill | +0.005 |
Neg logit contributions | |
ge | -0.008 |
Twe | -0.008 |
Wil | -0.007 |
obby | -0.007 |
OO | -0.007 |
Token | . |
Token ID | 13 |
Feature activation | -2.18e-02 |
Pos logit contributions | |
cheer | +0.001 |
ends | +0.001 |
pretend | +0.001 |
sill | +0.001 |
wonder | +0.001 |
Neg logit contributions | |
Twe | -0.001 |
ge | -0.001 |
Wil | -0.001 |
obby | -0.001 |
OO | -0.001 |
Token | They |
Token ID | 1119 |
Feature activation | +3.55e-01 |
Pos logit contributions | |
ge | +0.001 |
Twe | +0.001 |
badly | +0.000 |
Wil | +0.000 |
OO | +0.000 |
Neg logit contributions | |
ends | -0.000 |
cheer | -0.000 |
pretend | -0.000 |
excited | -0.000 |
wonder | -0.000 |
Token | all |
Token ID | 477 |
Feature activation | +2.91e-01 |
Pos logit contributions | |
ends | +0.007 |
cheer | +0.007 |
pretend | +0.006 |
sill | +0.006 |
wonder | +0.006 |
Neg logit contributions | |
ge | -0.008 |
Twe | -0.008 |
Wil | -0.007 |
obby | -0.007 |
OO | -0.007 |
Token | had |
Token ID | 550 |
Feature activation | +3.47e-01 |
Pos logit contributions | |
cheer | +0.005 |
pretend | +0.005 |
ends | +0.005 |
wonder | +0.004 |
e | +0.004 |
Neg logit contributions | |
Twe | -0.007 |
ge | -0.007 |
Wil | -0.007 |
obby | -0.007 |
OO | -0.007 |
Token | so |
Token ID | 523 |
Feature activation | +2.52e-01 |
Pos logit contributions | |
ends | +0.005 |
cheer | +0.005 |
pretend | +0.005 |
wonder | +0.005 |
excited | +0.004 |
Neg logit contributions | |
ge | -0.009 |
Twe | -0.009 |
obby | -0.008 |
Wil | -0.008 |
OO | -0.008 |
Token | much |
Token ID | 881 |
Feature activation | +4.77e-01 |
Pos logit contributions | |
cheer | +0.005 |
ends | +0.005 |
pretend | +0.005 |
sill | +0.005 |
reporters | +0.005 |
Neg logit contributions | |
ge | -0.005 |
Twe | -0.005 |
badly | -0.005 |
obby | -0.005 |
Wil | -0.005 |
Token | fun |
Token ID | 1257 |
Feature activation | +1.01e+00 |
Pos logit contributions | |
ends | +0.007 |
cheer | +0.006 |
pretend | +0.006 |
e | +0.006 |
sill | +0.005 |
Neg logit contributions | |
ge | -0.013 |
Twe | -0.012 |
Wil | -0.012 |
obby | -0.012 |
badly | -0.012 |
Token | ! |
Token ID | 0 |
Feature activation | -2.09e-01 |
Pos logit contributions | |
ends | +0.018 |
cheer | +0.018 |
pretend | +0.017 |
wonder | +0.015 |
e | +0.015 |
Neg logit contributions | |
ge | -0.024 |
Twe | -0.023 |
Wil | -0.022 |
obby | -0.022 |
OO | -0.021 |
Token | She |
Token ID | 1375 |
Feature activation | -4.72e-01 |
Pos logit contributions | |
ge | +0.006 |
Twe | +0.005 |
Wil | +0.005 |
shadow | +0.005 |
badly | +0.005 |
Neg logit contributions | |
ends | -0.003 |
cheer | -0.003 |
pretend | -0.003 |
excited | -0.003 |
wonder | -0.003 |
Token | thought |
Token ID | 1807 |
Feature activation | -2.35e-01 |
Pos logit contributions | |
ge | +0.011 |
Twe | +0.010 |
Wil | +0.010 |
obby | +0.010 |
OO | +0.010 |
Neg logit contributions | |
ends | -0.009 |
cheer | -0.009 |
pretend | -0.008 |
wonder | -0.007 |
e | -0.007 |
Token | other |
Token ID | 584 |
Feature activation | +7.97e-01 |
Pos logit contributions | |
Twe | +0.007 |
ge | +0.007 |
Wil | +0.007 |
obby | +0.006 |
OO | +0.006 |
Neg logit contributions | |
ends | -0.003 |
cheer | -0.003 |
pretend | -0.003 |
excited | -0.003 |
like | -0.002 |
Token | stirred |
Token ID | 33091 |
Feature activation | -4.57e-02 |
Pos logit contributions | |
ends | +0.002 |
cheer | +0.002 |
pretend | +0.002 |
wonder | +0.002 |
sill | +0.002 |
Neg logit contributions | |
ge | -0.003 |
Twe | -0.003 |
Wil | -0.003 |
obby | -0.003 |
OO | -0.003 |
Token | it |
Token ID | 340 |
Feature activation | -1.68e-01 |
Pos logit contributions | |
ge | +0.001 |
Twe | +0.001 |
Wil | +0.001 |
obby | +0.001 |
OO | +0.001 |
Neg logit contributions | |
ends | -0.001 |
cheer | -0.001 |
pretend | -0.001 |
wonder | -0.001 |
reporters | -0.001 |
Token | around |
Token ID | 1088 |
Feature activation | +2.56e-01 |
Pos logit contributions | |
ge | +0.005 |
Twe | +0.004 |
Wil | +0.004 |
obby | +0.004 |
OO | +0.004 |
Neg logit contributions | |
ends | -0.002 |
cheer | -0.002 |
pretend | -0.002 |
sill | -0.002 |
wonder | -0.002 |
Token | . |
Token ID | 13 |
Feature activation | -1.59e-01 |
Pos logit contributions | |
cheer | +0.004 |
pretend | +0.004 |
ends | +0.004 |
wonder | +0.004 |
e | +0.003 |
Neg logit contributions | |
ge | -0.007 |
Twe | -0.006 |
Wil | -0.006 |
obby | -0.006 |
OO | -0.006 |
Token | When |
Token ID | 1649 |
Feature activation | -5.45e-01 |
Pos logit contributions | |
ge | +0.005 |
Twe | +0.004 |
gie | +0.004 |
badly | +0.004 |
OO | +0.004 |
Neg logit contributions | |
ends | -0.002 |
excited | -0.002 |
sill | -0.002 |
wonder | -0.002 |
cheer | -0.002 |
Token | it |
Token ID | 340 |
Feature activation | +3.64e-01 |
Pos logit contributions | |
ge | +0.017 |
Twe | +0.016 |
obby | +0.015 |
gie | +0.015 |
OO | +0.015 |
Neg logit contributions | |
cheer | -0.007 |
ends | -0.007 |
pretend | -0.006 |
wonder | -0.006 |
excited | -0.006 |
Token | was |
Token ID | 373 |
Feature activation | -5.17e-01 |
Pos logit contributions | |
ends | +0.009 |
cheer | +0.008 |
pretend | +0.008 |
reporters | +0.008 |
wonder | +0.008 |
Neg logit contributions | |
ge | -0.006 |
Twe | -0.006 |
obby | -0.006 |
Wil | -0.006 |
OO | -0.005 |
Token | nice |
Token ID | 3621 |
Feature activation | +7.15e-01 |
Pos logit contributions | |
ge | +0.014 |
Twe | +0.014 |
obby | +0.013 |
Wil | +0.013 |
gie | +0.013 |
Neg logit contributions | |
ends | -0.007 |
cheer | -0.007 |
pretend | -0.007 |
excited | -0.006 |
wonder | -0.006 |
Token | and |
Token ID | 290 |
Feature activation | -4.54e-01 |
Pos logit contributions | |
ends | +0.009 |
cheer | +0.009 |
pretend | +0.009 |
wonder | +0.007 |
sill | +0.007 |
Neg logit contributions | |
ge | -0.021 |
Twe | -0.020 |
Wil | -0.019 |
obby | -0.019 |
OO | -0.019 |
Token | cooked |
Token ID | 15847 |
Feature activation | +3.12e-01 |
Pos logit contributions | |
ge | +0.009 |
Twe | +0.009 |
Wil | +0.008 |
badly | +0.008 |
Bobby | +0.008 |
Neg logit contributions | |
ends | -0.010 |
cheer | -0.009 |
pretend | -0.009 |
e | -0.008 |
reporters | -0.008 |
Token | , |
Token ID | 11 |
Feature activation | +4.93e-02 |
Pos logit contributions | |
cheer | +0.004 |
ends | +0.004 |
pretend | +0.004 |
wonder | +0.003 |
e | +0.003 |
Neg logit contributions | |
ge | -0.009 |
Twe | -0.009 |
Wil | -0.008 |
obby | -0.008 |
OO | -0.008 |
Token | time |
Token ID | 640 |
Feature activation | -4.61e-01 |
Pos logit contributions | |
ge | +0.000 |
Twe | +0.000 |
Wil | +0.000 |
obby | +0.000 |
OO | +0.000 |
Neg logit contributions | |
ends | -0.000 |
cheer | -0.000 |
pretend | -0.000 |
wonder | -0.000 |
e | -0.000 |
Token | there |
Token ID | 612 |
Feature activation | +7.10e-01 |
Pos logit contributions | |
ge | +0.012 |
Twe | +0.012 |
obby | +0.012 |
Wil | +0.012 |
badly | +0.011 |
Neg logit contributions | |
cheer | -0.007 |
ends | -0.007 |
pretend | -0.006 |
sill | -0.006 |
wonder | -0.005 |
Token | was |
Token ID | 373 |
Feature activation | -5.19e-01 |
Pos logit contributions | |
ends | +0.016 |
cheer | +0.016 |
pretend | +0.015 |
reporters | +0.014 |
sill | +0.014 |
Neg logit contributions | |
ge | -0.013 |
Twe | -0.013 |
obby | -0.012 |
Wil | -0.012 |
badly | -0.012 |
Token | a |
Token ID | 257 |
Feature activation | -7.68e-01 |
Pos logit contributions | |
Twe | +0.012 |
ge | +0.012 |
Wil | +0.011 |
obby | +0.011 |
badly | +0.011 |
Neg logit contributions | |
cheer | -0.010 |
ends | -0.010 |
pretend | -0.008 |
sill | -0.008 |
reporters | -0.008 |
Token | little |
Token ID | 1310 |
Feature activation | -1.05e+00 |
Pos logit contributions | |
ge | +0.014 |
Twe | +0.012 |
obby | +0.011 |
Wil | +0.011 |
OO | +0.011 |
Neg logit contributions | |
ends | -0.019 |
cheer | -0.019 |
pretend | -0.019 |
wonder | -0.017 |
excited | -0.017 |
Token | boy |
Token ID | 2933 |
Feature activation | +3.87e-01 |
Pos logit contributions | |
ge | +0.023 |
Twe | +0.021 |
Wil | +0.020 |
obby | +0.020 |
OO | +0.019 |
Neg logit contributions | |
cheer | -0.021 |
ends | -0.021 |
pretend | -0.021 |
wonder | -0.019 |
excited | -0.018 |
Token | who |
Token ID | 508 |
Feature activation | -3.40e-01 |
Pos logit contributions | |
ends | +0.008 |
cheer | +0.007 |
pretend | +0.007 |
wonder | +0.006 |
sill | +0.006 |
Neg logit contributions | |
ge | -0.009 |
Twe | -0.008 |
Wil | -0.008 |
obby | -0.008 |
badly | -0.007 |
Token | loved |
Token ID | 6151 |
Feature activation | -9.37e-02 |
Pos logit contributions | |
ge | +0.009 |
Twe | +0.009 |
Wil | +0.008 |
obby | +0.008 |
OO | +0.008 |
Neg logit contributions | |
ends | -0.005 |
cheer | -0.005 |
pretend | -0.005 |
wonder | -0.005 |
e | -0.004 |
Token | to |
Token ID | 284 |
Feature activation | -2.86e-01 |
Pos logit contributions | |
ge | +0.003 |
Twe | +0.003 |
obby | +0.002 |
OO | +0.002 |
shadow | +0.002 |
Neg logit contributions | |
pretend | -0.001 |
cheer | -0.001 |
excited | -0.001 |
ends | -0.001 |
wonder | -0.001 |
Token | wander |
Token ID | 27776 |
Feature activation | -4.32e-01 |
Pos logit contributions | |
ge | +0.009 |
Twe | +0.008 |
obby | +0.008 |
Wil | +0.008 |
OO | +0.008 |
Neg logit contributions | |
pretend | -0.004 |
cheer | -0.004 |
ends | -0.003 |
wonder | -0.003 |
see | -0.003 |
Token | . |
Token ID | 13 |
Feature activation | +5.55e-02 |
Pos logit contributions | |
ge | +0.014 |
OO | +0.014 |
Twe | +0.013 |
obby | +0.013 |
Wil | +0.013 |
Neg logit contributions | |
pretend | -0.005 |
cheer | -0.005 |
around | -0.004 |
ends | -0.004 |
excited | -0.004 |
Token | have |
Token ID | 423 |
Feature activation | +6.44e-01 |
Pos logit contributions | |
iece | +0.013 |
ends | +0.012 |
igan | +0.012 |
cheer | +0.011 |
reporters | +0.011 |
Neg logit contributions | |
ge | -0.015 |
Twe | -0.014 |
obby | -0.013 |
Wil | -0.013 |
Twe | -0.013 |
Token | a |
Token ID | 257 |
Feature activation | -2.45e-01 |
Pos logit contributions | |
ends | +0.009 |
pretend | +0.009 |
cheer | +0.009 |
e | +0.008 |
wonder | +0.008 |
Neg logit contributions | |
ge | -0.019 |
Twe | -0.017 |
obby | -0.017 |
OO | -0.016 |
Wil | -0.016 |
Token | package |
Token ID | 5301 |
Feature activation | +5.43e-01 |
Pos logit contributions | |
ge | +0.007 |
Twe | +0.006 |
Wil | +0.006 |
obby | +0.006 |
OO | +0.006 |
Neg logit contributions | |
cheer | -0.004 |
ends | -0.004 |
pretend | -0.004 |
wonder | -0.003 |
excited | -0.003 |
Token | for |
Token ID | 329 |
Feature activation | +6.88e-01 |
Pos logit contributions | |
ends | +0.009 |
cheer | +0.009 |
pretend | +0.008 |
wonder | +0.008 |
e | +0.007 |
Neg logit contributions | |
ge | -0.014 |
Twe | -0.013 |
obby | -0.012 |
Wil | -0.012 |
badly | -0.012 |
Token | your |
Token ID | 534 |
Feature activation | +4.36e-01 |
Pos logit contributions | |
cheer | +0.011 |
pretend | +0.011 |
ends | +0.010 |
e | +0.010 |
see | +0.010 |
Neg logit contributions | |
ge | -0.018 |
Twe | -0.017 |
obby | -0.017 |
OO | -0.017 |
gie | -0.016 |
Token | mom |
Token ID | 1995 |
Feature activation | -5.25e-02 |
Pos logit contributions | |
pretend | +0.007 |
cheer | +0.007 |
ends | +0.006 |
wonder | +0.006 |
excited | +0.005 |
Neg logit contributions | |
ge | -0.012 |
Twe | -0.011 |
Wil | -0.011 |
obby | -0.011 |
OO | -0.011 |
Token | . |
Token ID | 13 |
Feature activation | +1.51e-01 |
Pos logit contributions | |
ge | +0.001 |
Twe | +0.001 |
Wil | +0.001 |
obby | +0.001 |
OO | +0.001 |
Neg logit contributions | |
ends | -0.001 |
cheer | -0.001 |
pretend | -0.001 |
e | -0.001 |
sill | -0.001 |
Token | Can |
Token ID | 1680 |
Feature activation | +4.04e-01 |
Pos logit contributions | |
ends | +0.003 |
cheer | +0.003 |
pretend | +0.003 |
wonder | +0.003 |
sill | +0.003 |
Neg logit contributions | |
ge | -0.003 |
Twe | -0.003 |
Wil | -0.003 |
obby | -0.003 |
OO | -0.003 |
Token | you |
Token ID | 345 |
Feature activation | +1.90e-01 |
Pos logit contributions | |
cheer | +0.007 |
pretend | +0.007 |
ends | +0.007 |
see | +0.007 |
e | +0.006 |
Neg logit contributions | |
ge | -0.010 |
OO | -0.010 |
obby | -0.009 |
rated | -0.009 |
OUT | -0.009 |
Token | take |
Token ID | 1011 |
Feature activation | -2.30e-02 |
Pos logit contributions | |
ends | +0.003 |
iece | +0.003 |
ibr | +0.003 |
cheer | +0.003 |
sill | +0.003 |
Neg logit contributions | |
ge | -0.004 |
Twe | -0.004 |
Wil | -0.004 |
badly | -0.004 |
obby | -0.004 |
Token | it |
Token ID | 340 |
Feature activation | +1.26e-01 |
Pos logit contributions | |
ge | +0.000 |
Twe | +0.000 |
Wil | +0.000 |
Twe | +0.000 |
obby | +0.000 |
Neg logit contributions | |
ends | -0.000 |
cheer | -0.000 |
pretend | -0.000 |
sill | -0.000 |
wonder | -0.000 |
Token | pushing |
Token ID | 7796 |
Feature activation | +4.71e-02 |
Pos logit contributions | |
ge | +0.013 |
Twe | +0.013 |
OO | +0.012 |
obby | +0.012 |
Wil | +0.012 |
Neg logit contributions | |
cheer | -0.005 |
pretend | -0.005 |
ends | -0.005 |
excited | -0.005 |
wonder | -0.004 |
Token | them |
Token ID | 606 |
Feature activation | -4.07e-02 |
Pos logit contributions | |
cheer | +0.001 |
pretend | +0.001 |
ends | +0.001 |
e | +0.001 |
sill | +0.001 |
Neg logit contributions | |
Twe | -0.001 |
ge | -0.001 |
Wil | -0.001 |
obby | -0.001 |
OO | -0.001 |
Token | around |
Token ID | 1088 |
Feature activation | +3.22e-01 |
Pos logit contributions | |
Twe | +0.001 |
ge | +0.001 |
Wil | +0.001 |
obby | +0.001 |
OO | +0.001 |
Neg logit contributions | |
ends | -0.000 |
cheer | -0.000 |
pretend | -0.000 |
excited | -0.000 |
sill | -0.000 |
Token | . |
Token ID | 13 |
Feature activation | -9.09e-02 |
Pos logit contributions | |
cheer | +0.005 |
pretend | +0.005 |
ends | +0.005 |
wonder | +0.004 |
e | +0.004 |
Neg logit contributions | |
Twe | -0.009 |
ge | -0.008 |
Wil | -0.008 |
obby | -0.008 |
OO | -0.008 |
Token |
|
Token ID | 198 |
Feature activation | +3.33e-01 |
Pos logit contributions | |
ge | +0.003 |
poisonous | +0.002 |
OO | +0.002 |
rated | +0.002 |
badly | +0.002 |
Neg logit contributions | |
ends | -0.001 |
excited | -0.001 |
pretend | -0.001 |
cheer | -0.001 |
sill | -0.001 |
Token | Bob |
Token ID | 18861 |
Feature activation | +7.53e-03 |
Pos logit contributions | |
ends | +0.004 |
cheer | +0.004 |
excited | +0.003 |
pretend | +0.003 |
wonder | +0.003 |
Neg logit contributions | |
ge | -0.010 |
iously | -0.010 |
OO | -0.009 |
gie | -0.009 |
Twe | -0.009 |
Token | laughed |
Token ID | 13818 |
Feature activation | -9.36e-02 |
Pos logit contributions | |
ends | +0.000 |
cheer | +0.000 |
pretend | +0.000 |
e | +0.000 |
excited | +0.000 |
Neg logit contributions | |
Twe | -0.000 |
ge | -0.000 |
Wil | -0.000 |
badly | -0.000 |
OO | -0.000 |
Token | and |
Token ID | 290 |
Feature activation | +4.07e-01 |
Pos logit contributions | |
ge | +0.002 |
Twe | +0.002 |
Wil | +0.002 |
obby | +0.002 |
OO | +0.002 |
Neg logit contributions | |
ends | -0.001 |
cheer | -0.001 |
pretend | -0.001 |
wonder | -0.001 |
sill | -0.001 |
Token | smiled |
Token ID | 13541 |
Feature activation | +3.51e-01 |
Pos logit contributions | |
ends | +0.007 |
cheer | +0.007 |
pretend | +0.006 |
excited | +0.006 |
wonder | +0.005 |
Neg logit contributions | |
ge | -0.011 |
Twe | -0.010 |
obby | -0.010 |
Wil | -0.010 |
OO | -0.010 |
Token | as |
Token ID | 355 |
Feature activation | +6.31e-01 |
Pos logit contributions | |
ends | +0.007 |
cheer | +0.007 |
pretend | +0.007 |
e | +0.006 |
wonder | +0.006 |
Neg logit contributions | |
ge | -0.007 |
Twe | -0.007 |
obby | -0.007 |
badly | -0.007 |
Wil | -0.006 |
Token | his |
Token ID | 465 |
Feature activation | -3.08e-02 |
Pos logit contributions | |
cheer | +0.011 |
ends | +0.011 |
pretend | +0.010 |
excited | +0.010 |
wonder | +0.009 |
Neg logit contributions | |
ge | -0.016 |
Twe | -0.016 |
OO | -0.015 |
obby | -0.015 |
Wil | -0.015 |
Token | he |
Token ID | 339 |
Feature activation | +4.74e-01 |
Pos logit contributions | |
ends | +0.008 |
cheer | +0.008 |
like | +0.008 |
pretend | +0.008 |
excited | +0.007 |
Neg logit contributions | |
Twe | -0.027 |
OO | -0.025 |
Wil | -0.025 |
ge | -0.025 |
OUT | -0.025 |
Token | was |
Token ID | 373 |
Feature activation | -1.53e-01 |
Pos logit contributions | |
ends | +0.008 |
pretend | +0.008 |
cheer | +0.008 |
excited | +0.007 |
wonder | +0.007 |
Neg logit contributions | |
ge | -0.012 |
Twe | -0.012 |
Wil | -0.012 |
obby | -0.011 |
OO | -0.011 |
Token | sorry |
Token ID | 7926 |
Feature activation | -4.13e-01 |
Pos logit contributions | |
ge | +0.004 |
Twe | +0.004 |
obby | +0.004 |
OO | +0.004 |
Wil | +0.004 |
Neg logit contributions | |
ends | -0.002 |
cheer | -0.002 |
excited | -0.002 |
pretend | -0.002 |
proud | -0.002 |
Token | and |
Token ID | 290 |
Feature activation | +7.65e-02 |
Pos logit contributions | |
Twe | +0.013 |
ge | +0.012 |
Wil | +0.012 |
OO | +0.012 |
OUT | +0.012 |
Neg logit contributions | |
like | -0.005 |
cheer | -0.005 |
pretend | -0.005 |
ends | -0.005 |
excited | -0.004 |
Token | he |
Token ID | 339 |
Feature activation | -6.45e-02 |
Pos logit contributions | |
cheer | +0.001 |
excited | +0.001 |
pretend | +0.001 |
see | +0.001 |
ends | +0.001 |
Neg logit contributions | |
ge | -0.002 |
OO | -0.002 |
obby | -0.002 |
Twe | -0.002 |
Wil | -0.002 |
Token | would |
Token ID | 561 |
Feature activation | +8.47e-02 |
Pos logit contributions | |
ge | +0.002 |
Twe | +0.002 |
Wil | +0.002 |
obby | +0.002 |
OO | +0.002 |
Neg logit contributions | |
cheer | -0.001 |
ends | -0.001 |
pretend | -0.001 |
excited | -0.001 |
wonder | -0.001 |
Token | never |
Token ID | 1239 |
Feature activation | -1.88e-01 |
Pos logit contributions | |
cheer | +0.001 |
pretend | +0.001 |
ends | +0.001 |
wonder | +0.001 |
see | +0.001 |
Neg logit contributions | |
ge | -0.002 |
obby | -0.002 |
Twe | -0.002 |
OO | -0.002 |
Wil | -0.002 |
Token | do |
Token ID | 466 |
Feature activation | +1.23e+00 |
Pos logit contributions | |
ge | +0.005 |
Twe | +0.005 |
Wil | +0.005 |
obby | +0.005 |
OO | +0.005 |
Neg logit contributions | |
cheer | -0.003 |
pretend | -0.003 |
see | -0.002 |
wonder | -0.002 |
believe | -0.002 |
Token | this |
Token ID | 428 |
Feature activation | +5.56e-02 |
Pos logit contributions | |
ends | +0.017 |
pretend | +0.016 |
cheer | +0.016 |
excited | +0.014 |
see | +0.014 |
Neg logit contributions | |
Twe | -0.036 |
Wil | -0.035 |
obby | -0.035 |
ge | -0.035 |
gie | -0.034 |
Token | again |
Token ID | 757 |
Feature activation | -3.81e-01 |
Pos logit contributions | |
pretend | +0.001 |
cheer | +0.001 |
ends | +0.001 |
sill | +0.001 |
excited | +0.001 |
Neg logit contributions | |
Twe | -0.001 |
Wil | -0.001 |
ge | -0.001 |
obby | -0.001 |
gie | -0.001 |
Token | . |
Token ID | 13 |
Feature activation | -2.17e-01 |
Pos logit contributions | |
Twe | +0.010 |
Wil | +0.010 |
obby | +0.010 |
ge | +0.010 |
OO | +0.009 |
Neg logit contributions | |
pretend | -0.006 |
cheer | -0.006 |
ends | -0.006 |
excited | -0.005 |
e | -0.005 |
Token | playing |
Token ID | 2712 |
Feature activation | +7.74e-01 |
Pos logit contributions | |
pretend | +0.011 |
cheer | +0.011 |
ends | +0.011 |
wonder | +0.009 |
excited | +0.009 |
Neg logit contributions | |
ge | -0.020 |
Twe | -0.019 |
obby | -0.019 |
Wil | -0.019 |
OO | -0.018 |
Token | with |
Token ID | 351 |
Feature activation | +3.07e-01 |
Pos logit contributions | |
ends | +0.012 |
cheer | +0.012 |
pretend | +0.010 |
wonder | +0.010 |
sill | +0.010 |
Neg logit contributions | |
ge | -0.020 |
Twe | -0.019 |
badly | -0.018 |
Wil | -0.018 |
obby | -0.018 |
Token | Tim |
Token ID | 5045 |
Feature activation | -2.81e-01 |
Pos logit contributions | |
pretend | +0.004 |
cheer | +0.004 |
e | +0.003 |
ends | +0.003 |
excited | +0.003 |
Neg logit contributions | |
ge | -0.009 |
Wil | -0.009 |
shadow | -0.009 |
OO | -0.009 |
obby | -0.009 |
Token | every |
Token ID | 790 |
Feature activation | -2.54e-01 |
Pos logit contributions | |
Twe | +0.008 |
Wil | +0.008 |
OO | +0.008 |
Fe | +0.008 |
ge | +0.008 |
Neg logit contributions | |
e | -0.003 |
pretend | -0.003 |
cheer | -0.003 |
ends | -0.003 |
like | -0.003 |
Token | day |
Token ID | 1110 |
Feature activation | +1.39e-02 |
Pos logit contributions | |
ge | +0.008 |
Twe | +0.008 |
Wil | +0.008 |
obby | +0.008 |
Wil | +0.008 |
Neg logit contributions | |
pretend | -0.003 |
cheer | -0.003 |
ends | -0.002 |
excited | -0.002 |
wonder | -0.002 |
Token | . |
Token ID | 13 |
Feature activation | +7.56e-03 |
Pos logit contributions | |
ends | +0.000 |
cheer | +0.000 |
pretend | +0.000 |
excited | +0.000 |
e | +0.000 |
Neg logit contributions | |
ge | -0.000 |
Twe | -0.000 |
Wil | -0.000 |
obby | -0.000 |
OO | -0.000 |
Token | The |
Token ID | 383 |
Feature activation | -8.20e-01 |
Pos logit contributions | |
ends | +0.000 |
cheer | +0.000 |
pretend | +0.000 |
excited | +0.000 |
see | +0.000 |
Neg logit contributions | |
ge | -0.000 |
OO | -0.000 |
iously | -0.000 |
badly | -0.000 |
shadow | -0.000 |
Token | sad |
Token ID | 6507 |
Feature activation | -3.50e-01 |
Pos logit contributions | |
ge | +0.025 |
obby | +0.023 |
Twe | +0.023 |
Wil | +0.022 |
OO | +0.022 |
Neg logit contributions | |
cheer | -0.011 |
ends | -0.011 |
pretend | -0.010 |
wonder | -0.008 |
excited | -0.008 |
Token | little |
Token ID | 1310 |
Feature activation | -3.13e-01 |
Pos logit contributions | |
ge | +0.010 |
Wil | +0.010 |
obby | +0.010 |
Twe | +0.010 |
Fe | +0.009 |
Neg logit contributions | |
cheer | -0.005 |
pretend | -0.005 |
excited | -0.004 |
ends | -0.004 |
happy | -0.004 |
Token | golf |
Token ID | 13126 |
Feature activation | +2.02e-01 |
Pos logit contributions | |
ge | +0.008 |
Twe | +0.007 |
obby | +0.007 |
Wil | +0.007 |
OO | +0.007 |
Neg logit contributions | |
cheer | -0.006 |
pretend | -0.006 |
ends | -0.005 |
wonder | -0.005 |
excited | -0.005 |
Token | ball |
Token ID | 2613 |
Feature activation | -1.91e-02 |
Pos logit contributions | |
ends | +0.004 |
cheer | +0.004 |
pretend | +0.004 |
wonder | +0.003 |
sill | +0.003 |
Neg logit contributions | |
ge | -0.004 |
Twe | -0.004 |
Wil | -0.004 |
obby | -0.004 |
badly | -0.004 |
Token | much |
Token ID | 881 |
Feature activation | +3.54e-01 |
Pos logit contributions | |
ends | +0.006 |
excited | +0.006 |
pretend | +0.006 |
cheer | +0.006 |
wonder | +0.005 |
Neg logit contributions | |
gie | -0.013 |
ge | -0.012 |
Wil | -0.012 |
Twe | -0.012 |
OO | -0.012 |
Token | . |
Token ID | 13 |
Feature activation | -7.93e-01 |
Pos logit contributions | |
ends | +0.006 |
cheer | +0.006 |
pretend | +0.006 |
excited | +0.005 |
like | +0.005 |
Neg logit contributions | |
Twe | -0.009 |
ge | -0.009 |
Wil | -0.009 |
gie | -0.008 |
obby | -0.008 |
Token | That |
Token ID | 1320 |
Feature activation | +7.60e-01 |
Pos logit contributions | |
ge | +0.018 |
Twe | +0.015 |
OO | +0.015 |
badly | +0.014 |
Wil | +0.014 |
Neg logit contributions | |
ends | -0.017 |
cheer | -0.017 |
pretend | -0.016 |
sill | -0.015 |
wonder | -0.015 |
Token | 's |
Token ID | 338 |
Feature activation | -3.18e-02 |
Pos logit contributions | |
cheer | +0.012 |
pretend | +0.011 |
ends | +0.011 |
wonder | +0.010 |
excited | +0.010 |
Neg logit contributions | |
ge | -0.021 |
Twe | -0.020 |
Wil | -0.019 |
obby | -0.019 |
OO | -0.018 |
Token | why |
Token ID | 1521 |
Feature activation | -1.17e+00 |
Pos logit contributions | |
ge | +0.001 |
Twe | +0.001 |
obby | +0.001 |
OO | +0.001 |
gie | +0.001 |
Neg logit contributions | |
ends | -0.000 |
cheer | -0.000 |
pretend | -0.000 |
like | -0.000 |
wonder | -0.000 |
Token | she |
Token ID | 673 |
Feature activation | -2.40e-01 |
Pos logit contributions | |
ge | +0.036 |
Twe | +0.036 |
OO | +0.034 |
obby | +0.033 |
Wil | +0.033 |
Neg logit contributions | |
pretend | -0.013 |
cheer | -0.013 |
excited | -0.013 |
see | -0.012 |
happy | -0.012 |
Token | had |
Token ID | 550 |
Feature activation | +1.87e-01 |
Pos logit contributions | |
ge | +0.006 |
Twe | +0.005 |
Wil | +0.005 |
obby | +0.005 |
OO | +0.005 |
Neg logit contributions | |
ends | -0.005 |
cheer | -0.004 |
pretend | -0.004 |
e | -0.004 |
wonder | -0.004 |
Token | to |
Token ID | 284 |
Feature activation | +3.78e-01 |
Pos logit contributions | |
pretend | +0.003 |
cheer | +0.002 |
ends | +0.002 |
excited | +0.002 |
like | +0.002 |
Neg logit contributions | |
ge | -0.005 |
Twe | -0.005 |
obby | -0.005 |
Wil | -0.005 |
OO | -0.005 |
Token | go |
Token ID | 467 |
Feature activation | +5.58e-01 |
Pos logit contributions | |
cheer | +0.006 |
pretend | +0.006 |
wonder | +0.005 |
ends | +0.005 |
see | +0.005 |
Neg logit contributions | |
ge | -0.010 |
obby | -0.010 |
Twe | -0.010 |
OO | -0.010 |
Wil | -0.010 |
Token | away |
Token ID | 1497 |
Feature activation | +4.20e-01 |
Pos logit contributions | |
pretend | +0.007 |
cheer | +0.006 |
ends | +0.006 |
see | +0.006 |
like | +0.006 |
Neg logit contributions | |
OO | -0.017 |
obby | -0.017 |
ge | -0.017 |
Twe | -0.017 |
Wil | -0.017 |
Token | - |
Token ID | 12 |
Feature activation | -3.54e-02 |
Pos logit contributions | |
ends | +0.005 |
pretend | +0.005 |
cheer | +0.005 |
like | +0.005 |
excited | +0.005 |
Neg logit contributions | |
Twe | -0.012 |
ge | -0.012 |
Wil | -0.012 |
gie | -0.011 |
obby | -0.011 |
Token | us |
Token ID | 514 |
Feature activation | -2.57e-01 |
Pos logit contributions | |
cheer | +0.002 |
pretend | +0.002 |
e | +0.002 |
ends | +0.001 |
see | +0.001 |
Neg logit contributions | |
ge | -0.003 |
obby | -0.002 |
Twe | -0.002 |
Wil | -0.002 |
gie | -0.002 |
Token | ! |
Token ID | 0 |
Feature activation | -6.12e-01 |
Pos logit contributions | |
Twe | +0.006 |
ge | +0.006 |
Wil | +0.006 |
gie | +0.006 |
obby | +0.006 |
Neg logit contributions | |
ends | -0.005 |
cheer | -0.005 |
pretend | -0.005 |
like | -0.004 |
e | -0.004 |
Token | Let |
Token ID | 3914 |
Feature activation | -8.96e-04 |
Pos logit contributions | |
ge | +0.015 |
Twe | +0.012 |
badly | +0.012 |
Wil | +0.012 |
OO | +0.012 |
Neg logit contributions | |
cheer | -0.012 |
ends | -0.012 |
pretend | -0.011 |
sill | -0.011 |
e | -0.011 |
Token | â |
Token ID | 22940 |
Feature activation | -1.15e+00 |
Pos logit contributions | |
ge | +0.000 |
Twe | +0.000 |
obby | +0.000 |
Wil | +0.000 |
gie | +0.000 |
Neg logit contributions | |
cheer | -0.000 |
pretend | -0.000 |
ends | -0.000 |
e | -0.000 |
see | -0.000 |
Token | € |
Token ID | 26391 |
Feature activation | -1.53e-02 |
Pos logit contributions | |
ge | +0.028 |
Twe | +0.026 |
obby | +0.025 |
Wil | +0.025 |
OO | +0.024 |
Neg logit contributions | |
cheer | -0.020 |
ends | -0.020 |
pretend | -0.019 |
e | -0.018 |
wonder | -0.017 |
Token | ™ |
Token ID | 8151 |
Feature activation | -1.65e+00 |
Pos logit contributions | |
ge | +0.000 |
Twe | +0.000 |
obby | +0.000 |
Wil | +0.000 |
OO | +0.000 |
Neg logit contributions | |
cheer | -0.000 |
ends | -0.000 |
pretend | -0.000 |
sill | -0.000 |
wonder | -0.000 |
Token | s |
Token ID | 82 |
Feature activation | -3.70e-01 |
Pos logit contributions | |
ge | +0.045 |
Twe | +0.042 |
gie | +0.042 |
iously | +0.041 |
Wil | +0.041 |
Neg logit contributions | |
ends | -0.024 |
e | -0.021 |
cheer | -0.020 |
excited | -0.019 |
forward | -0.018 |
Token | share |
Token ID | 2648 |
Feature activation | +2.87e-01 |
Pos logit contributions | |
ge | +0.010 |
Twe | +0.010 |
Wil | +0.009 |
obby | +0.009 |
OO | +0.009 |
Neg logit contributions | |
cheer | -0.005 |
ends | -0.005 |
pretend | -0.005 |
wonder | -0.004 |
e | -0.004 |
Token | it |
Token ID | 340 |
Feature activation | -4.23e-01 |
Pos logit contributions | |
ends | +0.005 |
cheer | +0.005 |
pretend | +0.005 |
wonder | +0.004 |
e | +0.004 |
Neg logit contributions | |
ge | -0.007 |
Twe | -0.007 |
Wil | -0.007 |
obby | -0.007 |
OO | -0.006 |
Token | ." |
Token ID | 526 |
Feature activation | +1.27e-01 |
Pos logit contributions | |
ge | +0.011 |
Twe | +0.011 |
obby | +0.010 |
Wil | +0.010 |
badly | +0.010 |
Neg logit contributions | |
ends | -0.007 |
cheer | -0.006 |
pretend | -0.006 |
wonder | -0.005 |
sill | -0.005 |
Token | Pat |
Token ID | 3208 |
Feature activation | -1.86e-02 |
Pos logit contributions | |
ends | +0.002 |
cheer | +0.002 |
pretend | +0.002 |
wonder | +0.002 |
sill | +0.002 |
Neg logit contributions | |
ge | -0.003 |
Twe | -0.003 |
Wil | -0.003 |
obby | -0.003 |
OO | -0.003 |
Token | " |
Token ID | 366 |
Feature activation | +7.17e-02 |
Pos logit contributions | |
cheer | +0.003 |
ends | +0.003 |
excited | +0.003 |
pretend | +0.003 |
happy | +0.002 |
Neg logit contributions | |
ge | -0.006 |
Twe | -0.005 |
obby | -0.005 |
Wil | -0.005 |
ety | -0.005 |
Token | Thank |
Token ID | 10449 |
Feature activation | +1.59e-01 |
Pos logit contributions | |
cheer | +0.001 |
ends | +0.001 |
pretend | +0.001 |
sill | +0.001 |
wonder | +0.001 |
Neg logit contributions | |
ge | -0.002 |
Twe | -0.001 |
obby | -0.001 |
Wil | -0.001 |
badly | -0.001 |
Token | you |
Token ID | 345 |
Feature activation | -2.90e-01 |
Pos logit contributions | |
ends | +0.004 |
cheer | +0.004 |
pretend | +0.004 |
wonder | +0.004 |
sill | +0.004 |
Neg logit contributions | |
Twe | -0.003 |
ge | -0.002 |
Wil | -0.002 |
obby | -0.002 |
Twe | -0.002 |
Token | , |
Token ID | 11 |
Feature activation | -1.27e+00 |
Pos logit contributions | |
ge | +0.008 |
Twe | +0.008 |
Wil | +0.008 |
obby | +0.008 |
OO | +0.008 |
Neg logit contributions | |
ends | -0.004 |
cheer | -0.004 |
pretend | -0.003 |
e | -0.003 |
wonder | -0.003 |
Token | Lily |
Token ID | 20037 |
Feature activation | +2.21e-01 |
Pos logit contributions | |
ge | +0.038 |
Twe | +0.034 |
obby | +0.034 |
Wil | +0.034 |
OO | +0.033 |
Neg logit contributions | |
cheer | -0.020 |
pretend | -0.017 |
ends | -0.017 |
wonder | -0.015 |
excited | -0.015 |
Token | , |
Token ID | 11 |
Feature activation | -1.61e+00 |
Pos logit contributions | |
cheer | +0.004 |
ends | +0.004 |
pretend | +0.004 |
e | +0.004 |
excited | +0.004 |
Neg logit contributions | |
ge | -0.005 |
Twe | -0.005 |
Wil | -0.005 |
obby | -0.005 |
Fe | -0.004 |
Token | for |
Token ID | 329 |
Feature activation | -7.68e-01 |
Pos logit contributions | |
ge | +0.042 |
Twe | +0.040 |
obby | +0.038 |
Wil | +0.038 |
OO | +0.037 |
Neg logit contributions | |
ends | -0.026 |
cheer | -0.025 |
pretend | -0.024 |
wonder | -0.021 |
e | -0.021 |
Token | being |
Token ID | 852 |
Feature activation | -3.01e-01 |
Pos logit contributions | |
ge | +0.015 |
Twe | +0.015 |
Wil | +0.014 |
badly | +0.014 |
OO | +0.014 |
Neg logit contributions | |
ends | -0.016 |
cheer | -0.014 |
pretend | -0.014 |
wonder | -0.014 |
sill | -0.013 |
Token | so |
Token ID | 523 |
Feature activation | -1.12e-01 |
Pos logit contributions | |
badly | +0.005 |
Twe | +0.005 |
Twe | +0.004 |
shadow | +0.004 |
Bobby | +0.004 |
Neg logit contributions | |
ends | -0.007 |
cheer | -0.006 |
iece | -0.006 |
pretend | -0.006 |
decide | -0.006 |
Token | thoughtful |
Token ID | 22677 |
Feature activation | -4.74e-01 |
Pos logit contributions | |
ge | +0.003 |
Twe | +0.003 |
Wil | +0.003 |
badly | +0.003 |
obby | +0.003 |
Neg logit contributions | |
ends | -0.002 |
cheer | -0.002 |
pretend | -0.002 |
e | -0.001 |
sill | -0.001 |
Token | and |
Token ID | 290 |
Feature activation | -2.90e-01 |
Pos logit contributions | |
ge | +0.012 |
Twe | +0.012 |
Wil | +0.011 |
obby | +0.010 |
gie | +0.010 |
Neg logit contributions | |
cheer | -0.008 |
ends | -0.008 |
pretend | -0.008 |
excited | -0.007 |
e | -0.007 |
Token | are |
Token ID | 389 |
Feature activation | +9.81e-01 |
Pos logit contributions | |
ends | +0.004 |
cheer | +0.004 |
pretend | +0.003 |
sill | +0.003 |
wonder | +0.003 |
Neg logit contributions | |
ge | -0.007 |
Twe | -0.006 |
obby | -0.006 |
Wil | -0.006 |
OO | -0.006 |
Token | you |
Token ID | 345 |
Feature activation | -5.95e-01 |
Pos logit contributions | |
pretend | +0.012 |
ends | +0.011 |
e | +0.010 |
cheer | +0.009 |
excited | +0.009 |
Neg logit contributions | |
ge | -0.031 |
OO | -0.029 |
obby | -0.028 |
rated | -0.028 |
Johnson | -0.027 |
Token | doing |
Token ID | 1804 |
Feature activation | +3.69e-02 |
Pos logit contributions | |
ge | +0.012 |
Twe | +0.011 |
Wil | +0.011 |
OO | +0.011 |
obby | +0.011 |
Neg logit contributions | |
cheer | -0.012 |
ends | -0.012 |
pretend | -0.011 |
sill | -0.011 |
wonder | -0.011 |
Token | here |
Token ID | 994 |
Feature activation | -2.95e-01 |
Pos logit contributions | |
ends | +0.001 |
cheer | +0.001 |
pretend | +0.001 |
sill | +0.001 |
wonder | +0.001 |
Neg logit contributions | |
ge | -0.001 |
Twe | -0.001 |
badly | -0.000 |
Wil | -0.000 |
obby | -0.000 |
Token | ? |
Token ID | 30 |
Feature activation | -8.86e-01 |
Pos logit contributions | |
ge | +0.006 |
Twe | +0.006 |
obby | +0.005 |
Wil | +0.005 |
Fe | +0.005 |
Neg logit contributions | |
pretend | -0.006 |
cheer | -0.006 |
ends | -0.006 |
like | -0.006 |
see | -0.006 |
Token | â |
Token ID | 22940 |
Feature activation | -1.54e+00 |
Pos logit contributions | |
ge | +0.028 |
iously | +0.026 |
OO | +0.026 |
rated | +0.025 |
ently | +0.024 |
Neg logit contributions | |
ends | -0.012 |
cheer | -0.012 |
pretend | -0.011 |
e | -0.011 |
excited | -0.010 |
Token | € |
Token ID | 26391 |
Feature activation | -2.83e-01 |
Pos logit contributions | |
ge | +0.032 |
Twe | +0.029 |
obby | +0.028 |
Wil | +0.028 |
badly | +0.027 |
Neg logit contributions | |
cheer | -0.033 |
ends | -0.033 |
pretend | -0.031 |
e | -0.030 |
sill | -0.029 |
Token | The |
Token ID | 383 |
Feature activation | -5.27e-01 |
Pos logit contributions | |
ge | +0.005 |
Twe | +0.005 |
Wil | +0.005 |
obby | +0.005 |
OO | +0.005 |
Neg logit contributions | |
cheer | -0.006 |
ends | -0.006 |
pretend | -0.006 |
sill | -0.006 |
wonder | -0.005 |
Token | bird |
Token ID | 6512 |
Feature activation | -4.89e-01 |
Pos logit contributions | |
ge | +0.015 |
obby | +0.014 |
Twe | +0.013 |
Wil | +0.013 |
OO | +0.012 |
Neg logit contributions | |
cheer | -0.009 |
pretend | -0.008 |
ends | -0.008 |
wonder | -0.008 |
sill | -0.007 |
Token | said |
Token ID | 531 |
Feature activation | -4.85e-01 |
Pos logit contributions | |
ge | +0.014 |
obby | +0.013 |
Twe | +0.013 |
Wil | +0.013 |
OO | +0.012 |
Neg logit contributions | |
pretend | -0.007 |
cheer | -0.007 |
ends | -0.007 |
excited | -0.007 |
e | -0.007 |
Token | , |
Token ID | 11 |
Feature activation | -5.63e-01 |
Pos logit contributions | |
ge | +0.017 |
Twe | +0.015 |
obby | +0.015 |
Wil | +0.015 |
ety | +0.015 |
Neg logit contributions | |
cheer | -0.005 |
pretend | -0.005 |
ends | -0.004 |
excited | -0.004 |
see | -0.003 |
Token | perform |
Token ID | 1620 |
Feature activation | -3.68e-02 |
Pos logit contributions | |
ends | +0.002 |
cheer | +0.001 |
pretend | +0.001 |
sill | +0.001 |
reporters | +0.001 |
Neg logit contributions | |
ge | -0.003 |
Twe | -0.003 |
Wil | -0.003 |
badly | -0.003 |
obby | -0.003 |
Token | a |
Token ID | 257 |
Feature activation | +3.55e-01 |
Pos logit contributions | |
ge | +0.001 |
badly | +0.001 |
Twe | +0.001 |
OO | +0.001 |
obby | +0.001 |
Neg logit contributions | |
ends | -0.001 |
cheer | -0.001 |
sill | -0.001 |
wonder | -0.001 |
pretend | -0.001 |
Token | trick |
Token ID | 6908 |
Feature activation | +7.31e-01 |
Pos logit contributions | |
ends | +0.006 |
cheer | +0.006 |
pretend | +0.005 |
wonder | +0.005 |
e | +0.005 |
Neg logit contributions | |
ge | -0.009 |
Twe | -0.009 |
Wil | -0.008 |
obby | -0.008 |
OO | -0.008 |
Token | for |
Token ID | 329 |
Feature activation | +5.33e-02 |
Pos logit contributions | |
ends | +0.018 |
cheer | +0.017 |
pretend | +0.016 |
wonder | +0.016 |
sill | +0.015 |
Neg logit contributions | |
ge | -0.013 |
Twe | -0.012 |
badly | -0.011 |
obby | -0.011 |
Wil | -0.011 |
Token | me |
Token ID | 502 |
Feature activation | -7.31e-01 |
Pos logit contributions | |
cheer | +0.001 |
pretend | +0.001 |
ends | +0.001 |
e | +0.001 |
see | +0.001 |
Neg logit contributions | |
ge | -0.001 |
Twe | -0.001 |
obby | -0.001 |
OO | -0.001 |
Wil | -0.001 |
Token | , |
Token ID | 11 |
Feature activation | -1.60e+00 |
Pos logit contributions | |
ge | +0.013 |
Twe | +0.012 |
Wil | +0.011 |
obby | +0.011 |
OO | +0.011 |
Neg logit contributions | |
ends | -0.018 |
cheer | -0.018 |
pretend | -0.017 |
e | -0.016 |
wonder | -0.016 |
Token | Bob |
Token ID | 5811 |
Feature activation | -2.84e-01 |
Pos logit contributions | |
ge | +0.037 |
Twe | +0.031 |
OO | +0.031 |
obby | +0.031 |
Fe | +0.029 |
Neg logit contributions | |
cheer | -0.034 |
pretend | -0.033 |
ends | -0.031 |
wonder | -0.030 |
like | -0.030 |
Token | ?" |
Token ID | 1701 |
Feature activation | +6.13e-01 |
Pos logit contributions | |
Twe | +0.007 |
ge | +0.007 |
Wil | +0.007 |
Fe | +0.007 |
iously | +0.007 |
Neg logit contributions | |
cheer | -0.005 |
pretend | -0.005 |
ends | -0.005 |
see | -0.004 |
like | -0.004 |
Token | Tim |
Token ID | 5045 |
Feature activation | -3.69e-02 |
Pos logit contributions | |
cheer | +0.010 |
pretend | +0.010 |
excited | +0.010 |
see | +0.010 |
ends | +0.009 |
Neg logit contributions | |
ge | -0.016 |
OO | -0.015 |
obby | -0.014 |
badly | -0.014 |
urbed | -0.014 |
Token | my |
Token ID | 1820 |
Feature activation | +1.71e-01 |
Pos logit contributions | |
ge | +0.001 |
Twe | +0.001 |
Wil | +0.001 |
OO | +0.001 |
OUT | +0.001 |
Neg logit contributions | |
cheer | -0.001 |
ends | -0.001 |
excited | -0.001 |
pretend | -0.001 |
e | -0.001 |
Token | asked |
Token ID | 1965 |
Feature activation | +4.02e-02 |
Pos logit contributions | |
ends | +0.003 |
cheer | +0.003 |
pretend | +0.003 |
wonder | +0.003 |
e | +0.003 |
Neg logit contributions | |
ge | -0.004 |
Twe | -0.004 |
Wil | -0.003 |
obby | -0.003 |
OO | -0.003 |
Token | Ok |
Token ID | 18690 |
Feature activation | -1.30e+00 |
Pos logit contributions | |
ge | +0.005 |
OO | +0.005 |
arse | +0.005 |
Twe | +0.005 |
iously | +0.005 |
Neg logit contributions | |
ends | -0.004 |
like | -0.003 |
excited | -0.003 |
wonder | -0.003 |
pretend | -0.003 |
Token | , |
Token ID | 11 |
Feature activation | -1.25e+00 |
Pos logit contributions | |
ge | +0.041 |
Twe | +0.039 |
OO | +0.037 |
Fe | +0.037 |
ety | +0.036 |
Neg logit contributions | |
cheer | -0.016 |
ends | -0.014 |
excited | -0.013 |
pretend | -0.013 |
like | -0.013 |
Token | I |
Token ID | 314 |
Feature activation | +2.22e-03 |
Pos logit contributions | |
ge | +0.033 |
OO | +0.029 |
Twe | +0.028 |
obby | +0.028 |
pless | +0.027 |
Neg logit contributions | |
cheer | -0.023 |
ends | -0.021 |
pretend | -0.021 |
see | -0.021 |
excited | -0.020 |
Token | â |
Token ID | 22940 |
Feature activation | -1.39e+00 |
Pos logit contributions | |
ends | +0.000 |
cheer | +0.000 |
pretend | +0.000 |
sill | +0.000 |
wonder | +0.000 |
Neg logit contributions | |
ge | -0.000 |
Twe | -0.000 |
obby | -0.000 |
Wil | -0.000 |
OO | -0.000 |
Token | € |
Token ID | 26391 |
Feature activation | +4.21e-02 |
Pos logit contributions | |
ge | +0.031 |
Twe | +0.028 |
obby | +0.028 |
badly | +0.027 |
Wil | +0.027 |
Neg logit contributions | |
cheer | -0.029 |
ends | -0.027 |
e | -0.026 |
pretend | -0.026 |
sill | -0.024 |
Token | ™ |
Token ID | 8151 |
Feature activation | -1.54e+00 |
Pos logit contributions | |
ends | +0.001 |
cheer | +0.001 |
pretend | +0.001 |
wonder | +0.001 |
sill | +0.001 |
Neg logit contributions | |
ge | -0.001 |
Twe | -0.001 |
Wil | -0.001 |
obby | -0.001 |
OO | -0.001 |
Token | ll |
Token ID | 297 |
Feature activation | -9.84e-01 |
Pos logit contributions | |
ge | +0.037 |
Twe | +0.035 |
matter | +0.035 |
gie | +0.034 |
Frankie | +0.034 |
Neg logit contributions | |
ends | -0.027 |
e | -0.023 |
forward | -0.022 |
cheer | -0.022 |
excited | -0.021 |
Token | do |
Token ID | 466 |
Feature activation | +7.86e-02 |
Pos logit contributions | |
ge | +0.027 |
Twe | +0.027 |
obby | +0.026 |
OO | +0.026 |
Wil | +0.026 |
Neg logit contributions | |
cheer | -0.015 |
pretend | -0.014 |
ends | -0.013 |
e | -0.012 |
see | -0.012 |
Token | it |
Token ID | 340 |
Feature activation | -7.17e-01 |
Pos logit contributions | |
ends | +0.001 |
pretend | +0.001 |
cheer | +0.001 |
e | +0.001 |
excited | +0.001 |
Neg logit contributions | |
Twe | -0.002 |
ge | -0.002 |
ety | -0.002 |
gie | -0.002 |
Wil | -0.002 |
Token | . |
Token ID | 13 |
Feature activation | -6.92e-01 |
Pos logit contributions | |
Twe | +0.017 |
ge | +0.017 |
Wil | +0.016 |
Fe | +0.015 |
gie | +0.015 |
Neg logit contributions | |
ends | -0.013 |
pretend | -0.013 |
cheer | -0.013 |
like | -0.012 |
excited | -0.012 |
Token | â |
Token ID | 22940 |
Feature activation | -1.27e+00 |
Pos logit contributions | |
ge | +0.018 |
OO | +0.016 |
badly | +0.016 |
poisonous | +0.015 |
iously | +0.015 |
Neg logit contributions | |
cheer | -0.013 |
ends | -0.013 |
pretend | -0.011 |
sill | -0.011 |
see | -0.011 |