Token | lion |
Token ID | 18744 |
Feature activation | +1.90e+00 |
Pos logit contributions | |
parties | +0.007 |
Princess | +0.007 |
succeeded | +0.007 |
chef | +0.007 |
matching | +0.007 |
Neg logit contributions | |
cigarettes | -0.006 |
ho | -0.006 |
ric | -0.006 |
Un | -0.006 |
ostr | -0.005 |
Token | came |
Token ID | 1625 |
Feature activation | +3.16e+00 |
Pos logit contributions | |
ric | +0.055 |
cigarettes | +0.053 |
ipt | +0.052 |
umbling | +0.052 |
oes | +0.050 |
Neg logit contributions | |
Princess | -0.027 |
chef | -0.025 |
feast | -0.023 |
party | -0.023 |
, | -0.022 |
Token | and |
Token ID | 290 |
Feature activation | +7.82e-01 |
Pos logit contributions | |
cigarettes | +0.096 |
ric | +0.092 |
spe | +0.090 |
grand | +0.089 |
ho | +0.084 |
Neg logit contributions | |
, | -0.039 |
patiently | -0.035 |
matching | -0.034 |
Princess | -0.033 |
better | -0.033 |
Token | saw |
Token ID | 2497 |
Feature activation | +1.16e+00 |
Pos logit contributions | |
ric | +0.019 |
umbling | +0.018 |
ipt | +0.018 |
cigarettes | +0.018 |
urious | +0.016 |
Neg logit contributions | |
Princess | -0.015 |
chef | -0.014 |
matching | -0.013 |
succeeded | -0.013 |
patiently | -0.012 |
Token | Stri |
Token ID | 26137 |
Feature activation | +1.38e+00 |
Pos logit contributions | |
umbling | +0.028 |
spe | +0.028 |
cigarettes | +0.027 |
ric | +0.026 |
urious | +0.025 |
Neg logit contributions | |
Princess | -0.022 |
chef | -0.020 |
matching | -0.019 |
, | -0.019 |
is | -0.019 |
Token | pe |
Token ID | 431 |
Feature activation | +5.94e+00 |
Pos logit contributions | |
ric | +0.018 |
cigarettes | +0.016 |
ipt | +0.016 |
ho | +0.015 |
umbling | +0.015 |
Neg logit contributions | |
Princess | -0.041 |
chef | -0.039 |
matching | -0.037 |
feast | -0.037 |
designer | -0.037 |
Token | y |
Token ID | 88 |
Feature activation | +2.50e+00 |
Pos logit contributions | |
Warning | +0.114 |
cigarettes | +0.106 |
Uh | +0.101 |
Danger | +0.101 |
Un | +0.100 |
Neg logit contributions | |
, | -0.131 |
is | -0.128 |
patiently | -0.113 |
proudly | -0.106 |
happily | -0.106 |
Token | chewing |
Token ID | 36615 |
Feature activation | +2.13e-01 |
Pos logit contributions | |
ipt | +0.071 |
cigarettes | +0.070 |
ric | +0.069 |
oes | +0.068 |
Warning | +0.065 |
Neg logit contributions | |
Princess | -0.034 |
, | -0.032 |
chef | -0.032 |
matching | -0.031 |
feast | -0.029 |
Token | the |
Token ID | 262 |
Feature activation | +9.24e-01 |
Pos logit contributions | |
ric | +0.006 |
ipt | +0.006 |
cigarettes | +0.006 |
umbling | +0.006 |
spe | +0.006 |
Neg logit contributions | |
Princess | -0.003 |
chef | -0.003 |
matching | -0.003 |
is | -0.002 |
succeeded | -0.002 |
Token | apple |
Token ID | 17180 |
Feature activation | +2.48e+00 |
Pos logit contributions | |
ric | +0.024 |
ipt | +0.022 |
cigarettes | +0.022 |
os | +0.022 |
spe | +0.022 |
Neg logit contributions | |
Princess | -0.016 |
chef | -0.016 |
matching | -0.015 |
feast | -0.015 |
party | -0.014 |
Token | . |
Token ID | 13 |
Feature activation | -9.25e-01 |
Pos logit contributions | |
ric | +0.079 |
ipt | +0.073 |
spe | +0.071 |
cigarettes | +0.070 |
urious | +0.070 |
Neg logit contributions | |
, | -0.033 |
better | -0.029 |
matching | -0.027 |
chef | -0.027 |
Princess | -0.027 |
Token | the |
Token ID | 262 |
Feature activation | +8.90e-01 |
Pos logit contributions | |
ric | +0.050 |
cigarettes | +0.048 |
ipt | +0.047 |
umbling | +0.047 |
ho | +0.046 |
Neg logit contributions | |
Princess | -0.032 |
chef | -0.030 |
matching | -0.028 |
feast | -0.027 |
succeeded | -0.027 |
Token | ground |
Token ID | 2323 |
Feature activation | +3.54e+00 |
Pos logit contributions | |
ric | +0.020 |
ipt | +0.019 |
os | +0.019 |
cigarettes | +0.018 |
beh | +0.018 |
Neg logit contributions | |
Princess | -0.018 |
chef | -0.018 |
matching | -0.017 |
feast | -0.017 |
succeeded | -0.016 |
Token | with |
Token ID | 351 |
Feature activation | +1.07e+00 |
Pos logit contributions | |
ric | +0.111 |
cigarettes | +0.104 |
grand | +0.103 |
ipt | +0.102 |
beh | +0.102 |
Neg logit contributions | |
, | -0.043 |
patiently | -0.038 |
matching | -0.037 |
again | -0.036 |
Princess | -0.036 |
Token | a |
Token ID | 257 |
Feature activation | +1.38e+00 |
Pos logit contributions | |
ric | +0.031 |
umbling | +0.030 |
ipt | +0.029 |
ho | +0.029 |
spe | +0.029 |
Neg logit contributions | |
Princess | -0.015 |
matching | -0.014 |
chef | -0.014 |
good | -0.014 |
tea | -0.013 |
Token | th |
Token ID | 294 |
Feature activation | +2.48e+00 |
Pos logit contributions | |
ric | +0.033 |
ipt | +0.032 |
cigarettes | +0.031 |
beh | +0.030 |
umbling | +0.030 |
Neg logit contributions | |
Princess | -0.025 |
chef | -0.024 |
matching | -0.024 |
good | -0.023 |
succeeded | -0.023 |
Token | ud |
Token ID | 463 |
Feature activation | +5.45e+00 |
Pos logit contributions | |
ric | +0.046 |
cigarettes | +0.046 |
spe | +0.045 |
ho | +0.043 |
oes | +0.040 |
Neg logit contributions | |
is | -0.052 |
Princess | -0.052 |
chef | -0.051 |
succeeded | -0.049 |
tea | -0.048 |
Token | . |
Token ID | 13 |
Feature activation | +1.01e+00 |
Pos logit contributions | |
ric | +0.159 |
urious | +0.156 |
ipt | +0.148 |
cigarettes | +0.146 |
beh | +0.144 |
Neg logit contributions | |
, | -0.074 |
chef | -0.068 |
is | -0.067 |
patiently | -0.064 |
matching | -0.063 |
Token |
|
Token ID | 198 |
Feature activation | +2.52e+00 |
Pos logit contributions | |
ric | +0.022 |
cigarettes | +0.020 |
ho | +0.020 |
ipt | +0.020 |
umbling | +0.019 |
Neg logit contributions | |
Princess | -0.022 |
chef | -0.021 |
matching | -0.019 |
succeeded | -0.019 |
is | -0.018 |
Token |
|
Token ID | 198 |
Feature activation | +2.01e+00 |
Pos logit contributions | |
ric | +0.061 |
cigarettes | +0.058 |
ho | +0.056 |
ipt | +0.055 |
umbling | +0.055 |
Neg logit contributions | |
Princess | -0.046 |
chef | -0.043 |
matching | -0.041 |
is | -0.040 |
succeeded | -0.040 |
Token | " |
Token ID | 1 |
Feature activation | +2.11e+00 |
Pos logit contributions | |
ric | +0.050 |
cigarettes | +0.046 |
ho | +0.045 |
ipt | +0.045 |
umbling | +0.044 |
Neg logit contributions | |
Princess | -0.037 |
chef | -0.035 |
matching | -0.033 |
is | -0.032 |
succeeded | -0.032 |
Token | Tom |
Token ID | 13787 |
Feature activation | +4.91e-02 |
Pos logit contributions | |
ric | +0.049 |
cigarettes | +0.041 |
oes | +0.041 |
os | +0.040 |
ho | +0.040 |
Neg logit contributions | |
Princess | -0.046 |
chef | -0.043 |
succeeded | -0.042 |
judges | -0.041 |
matching | -0.040 |
Token | dod |
Token ID | 20764 |
Feature activation | +3.00e+00 |
Pos logit contributions | |
ric | +0.040 |
cigarettes | +0.037 |
ho | +0.036 |
ipt | +0.036 |
umbling | +0.035 |
Neg logit contributions | |
Princess | -0.046 |
chef | -0.044 |
feast | -0.041 |
matching | -0.041 |
designer | -0.041 |
Token | ges |
Token ID | 3212 |
Feature activation | +2.57e+00 |
Pos logit contributions | |
cigarettes | +0.053 |
grand | +0.052 |
ric | +0.052 |
spe | +0.049 |
urious | +0.047 |
Neg logit contributions | |
matching | -0.066 |
, | -0.065 |
chef | -0.063 |
patiently | -0.063 |
Princess | -0.062 |
Token | . |
Token ID | 13 |
Feature activation | +5.36e-01 |
Pos logit contributions | |
ric | +0.083 |
cigarettes | +0.073 |
grand | +0.072 |
umbling | +0.072 |
spe | +0.072 |
Neg logit contributions | |
, | -0.032 |
Princess | -0.031 |
better | -0.031 |
good | -0.030 |
chef | -0.029 |
Token | The |
Token ID | 383 |
Feature activation | +1.44e+00 |
Pos logit contributions | |
ric | +0.008 |
cigarettes | +0.008 |
ipt | +0.008 |
ho | +0.007 |
umbling | +0.007 |
Neg logit contributions | |
Princess | -0.014 |
chef | -0.014 |
matching | -0.013 |
feast | -0.013 |
is | -0.013 |
Token | sharp |
Token ID | 7786 |
Feature activation | +2.82e+00 |
Pos logit contributions | |
ric | +0.037 |
ipt | +0.036 |
cigarettes | +0.035 |
beh | +0.034 |
umbling | +0.034 |
Neg logit contributions | |
Princess | -0.024 |
matching | -0.024 |
chef | -0.023 |
good | -0.021 |
judges | -0.021 |
Token | point |
Token ID | 966 |
Feature activation | +5.40e+00 |
Pos logit contributions | |
ric | +0.058 |
ipt | +0.052 |
cigarettes | +0.052 |
umbling | +0.051 |
beh | +0.051 |
Neg logit contributions | |
Princess | -0.060 |
chef | -0.059 |
matching | -0.058 |
is | -0.055 |
succeeded | -0.055 |
Token | hits |
Token ID | 7127 |
Feature activation | +2.88e+00 |
Pos logit contributions | |
ric | +0.135 |
ipt | +0.131 |
cigarettes | +0.131 |
keeping | +0.126 |
Un | +0.126 |
Neg logit contributions | |
, | -0.089 |
matching | -0.083 |
chef | -0.081 |
is | -0.079 |
patiently | -0.077 |
Token | Lily |
Token ID | 20037 |
Feature activation | +3.13e+00 |
Pos logit contributions | |
ric | +0.076 |
cigarettes | +0.070 |
umbling | +0.069 |
spe | +0.069 |
ipt | +0.069 |
Neg logit contributions | |
Princess | -0.047 |
chef | -0.043 |
matching | -0.043 |
chess | -0.040 |
better | -0.040 |
Token | in |
Token ID | 287 |
Feature activation | +2.58e+00 |
Pos logit contributions | |
ipt | +0.101 |
ric | +0.099 |
spe | +0.095 |
cigarettes | +0.095 |
urious | +0.094 |
Neg logit contributions | |
, | -0.036 |
better | -0.029 |
matching | -0.028 |
Princess | -0.027 |
again | -0.026 |
Token | the |
Token ID | 262 |
Feature activation | +1.27e-01 |
Pos logit contributions | |
ric | +0.078 |
beh | +0.072 |
oes | +0.071 |
ho | +0.070 |
umbling | +0.069 |
Neg logit contributions | |
Princess | -0.037 |
matching | -0.036 |
good | -0.035 |
, | -0.035 |
success | -0.033 |
Token | eye |
Token ID | 4151 |
Feature activation | +1.19e+00 |
Pos logit contributions | |
ric | +0.003 |
ipt | +0.003 |
cigarettes | +0.003 |
ho | +0.003 |
umbling | +0.003 |
Neg logit contributions | |
Princess | -0.002 |
chef | -0.002 |
matching | -0.002 |
party | -0.002 |
succeeded | -0.002 |
Token | look |
Token ID | 804 |
Feature activation | -1.34e+00 |
Pos logit contributions | |
ipt | +0.046 |
ric | +0.045 |
cigarettes | +0.044 |
umbling | +0.042 |
ho | +0.041 |
Neg logit contributions | |
Princess | -0.038 |
matching | -0.037 |
chef | -0.037 |
feast | -0.036 |
is | -0.036 |
Token | tasty |
Token ID | 25103 |
Feature activation | -2.45e+00 |
Pos logit contributions | |
Princess | +0.024 |
chef | +0.023 |
succeeded | +0.022 |
designer | +0.022 |
feast | +0.021 |
Neg logit contributions | |
cigarettes | -0.031 |
ric | -0.031 |
ho | -0.028 |
umbling | -0.027 |
spe | -0.027 |
Token | ," |
Token ID | 553 |
Feature activation | -1.38e+00 |
Pos logit contributions | |
Princess | +0.045 |
chef | +0.042 |
designer | +0.040 |
succeeded | +0.039 |
matching | +0.039 |
Neg logit contributions | |
cigarettes | -0.057 |
ho | -0.054 |
ric | -0.054 |
ipt | -0.053 |
umbling | -0.052 |
Token | said |
Token ID | 531 |
Feature activation | -1.77e+00 |
Pos logit contributions | |
Princess | +0.018 |
chef | +0.016 |
, | +0.016 |
better | +0.014 |
Benjamin | +0.014 |
Neg logit contributions | |
ric | -0.042 |
umbling | -0.041 |
spe | -0.040 |
cigarettes | -0.038 |
angs | -0.038 |
Token | Stri |
Token ID | 26137 |
Feature activation | +1.19e+00 |
Pos logit contributions | |
Princess | +0.042 |
chef | +0.040 |
matching | +0.037 |
feast | +0.036 |
designer | +0.036 |
Neg logit contributions | |
ric | -0.033 |
cigarettes | -0.031 |
umbling | -0.031 |
ipt | -0.031 |
ho | -0.030 |
Token | pe |
Token ID | 431 |
Feature activation | +5.35e+00 |
Pos logit contributions | |
ric | +0.015 |
cigarettes | +0.013 |
ipt | +0.013 |
ho | +0.013 |
umbling | +0.012 |
Neg logit contributions | |
Princess | -0.036 |
chef | -0.035 |
matching | -0.033 |
feast | -0.033 |
designer | -0.033 |
Token | y |
Token ID | 88 |
Feature activation | +2.08e+00 |
Pos logit contributions | |
Warning | +0.100 |
Uh | +0.096 |
ipt | +0.093 |
Danger | +0.091 |
Oh | +0.090 |
Neg logit contributions | |
, | -0.122 |
is | -0.113 |
patiently | -0.111 |
proudly | -0.110 |
happily | -0.103 |
Token | . |
Token ID | 13 |
Feature activation | -5.38e-01 |
Pos logit contributions | |
ric | +0.043 |
cigarettes | +0.042 |
ipt | +0.041 |
ho | +0.040 |
umbling | +0.040 |
Neg logit contributions | |
Princess | -0.043 |
chef | -0.041 |
matching | -0.039 |
feast | -0.039 |
succeeded | -0.039 |
Token |
|
Token ID | 198 |
Feature activation | +8.89e-01 |
Pos logit contributions | |
Princess | +0.010 |
chef | +0.009 |
matching | +0.009 |
is | +0.008 |
patiently | +0.008 |
Neg logit contributions | |
ric | -0.013 |
cigarettes | -0.012 |
umbling | -0.012 |
ho | -0.012 |
ipt | -0.012 |
Token |
|
Token ID | 198 |
Feature activation | -1.61e-01 |
Pos logit contributions | |
ric | +0.022 |
cigarettes | +0.021 |
ho | +0.020 |
umbling | +0.020 |
ipt | +0.020 |
Neg logit contributions | |
Princess | -0.016 |
chef | -0.014 |
is | -0.014 |
matching | -0.014 |
patiently | -0.013 |
Token | The |
Token ID | 464 |
Feature activation | +8.26e-01 |
Pos logit contributions | |
Princess | +0.003 |
is | +0.003 |
chef | +0.003 |
matching | +0.003 |
patiently | +0.003 |
Neg logit contributions | |
ric | -0.004 |
cigarettes | -0.004 |
umbling | -0.003 |
spe | -0.003 |
ho | -0.003 |
Token | Hello |
Token ID | 15496 |
Feature activation | -2.29e+00 |
Pos logit contributions | |
ric | +0.028 |
cigarettes | +0.027 |
run | +0.025 |
packs | +0.025 |
umbling | +0.025 |
Neg logit contributions | |
Princess | -0.027 |
chef | -0.024 |
judges | -0.023 |
is | -0.022 |
designer | -0.022 |
Token | !" |
Token ID | 2474 |
Feature activation | -1.21e+00 |
Pos logit contributions | |
succeeded | +0.035 |
matching | +0.035 |
is | +0.035 |
patiently | +0.035 |
chef | +0.034 |
Neg logit contributions | |
ric | -0.051 |
cigarettes | -0.051 |
ipt | -0.049 |
ho | -0.049 |
ostr | -0.048 |
Token | said |
Token ID | 531 |
Feature activation | -1.24e+00 |
Pos logit contributions | |
Princess | +0.023 |
chef | +0.020 |
matching | +0.019 |
succeeded | +0.019 |
designer | +0.018 |
Neg logit contributions | |
ric | -0.031 |
ipt | -0.028 |
umbling | -0.028 |
cigarettes | -0.028 |
ho | -0.027 |
Token | the |
Token ID | 262 |
Feature activation | +8.99e-01 |
Pos logit contributions | |
chef | +0.024 |
is | +0.023 |
Princess | +0.023 |
matching | +0.023 |
feast | +0.022 |
Neg logit contributions | |
cigarettes | -0.025 |
ric | -0.025 |
ho | -0.025 |
ipt | -0.024 |
spe | -0.023 |
Token | wood |
Token ID | 4898 |
Feature activation | +3.40e+00 |
Pos logit contributions | |
cigarettes | +0.017 |
ric | +0.017 |
ostr | +0.017 |
ho | +0.017 |
ipt | +0.016 |
Neg logit contributions | |
parties | -0.016 |
patiently | -0.016 |
orting | -0.016 |
feast | -0.016 |
matching | -0.015 |
Token | pe |
Token ID | 431 |
Feature activation | +5.33e+00 |
Pos logit contributions | |
ric | +0.062 |
ipt | +0.058 |
cigarettes | +0.056 |
umbling | +0.054 |
urious | +0.054 |
Neg logit contributions | |
chef | -0.081 |
Princess | -0.081 |
is | -0.076 |
patiently | -0.076 |
matching | -0.075 |
Token | cker |
Token ID | 15280 |
Feature activation | +2.98e+00 |
Pos logit contributions | |
Warning | +0.078 |
ipt | +0.075 |
cigarettes | +0.070 |
Uh | +0.068 |
Oh | +0.066 |
Neg logit contributions | |
, | -0.142 |
patiently | -0.138 |
is | -0.135 |
proudly | -0.130 |
happily | -0.124 |
Token | . |
Token ID | 13 |
Feature activation | +2.49e-02 |
Pos logit contributions | |
ric | +0.077 |
ipt | +0.074 |
cigarettes | +0.074 |
umbling | +0.073 |
Warning | +0.070 |
Neg logit contributions | |
Princess | -0.049 |
chef | -0.045 |
matching | -0.042 |
patiently | -0.041 |
is | -0.040 |
Token |
|
Token ID | 198 |
Feature activation | +1.44e+00 |
Pos logit contributions | |
ric | +0.000 |
cigarettes | +0.000 |
ipt | +0.000 |
ho | +0.000 |
umbling | +0.000 |
Neg logit contributions | |
Princess | -0.001 |
chef | -0.001 |
matching | -0.000 |
feast | -0.000 |
is | -0.000 |
Token |
|
Token ID | 198 |
Feature activation | +7.32e-01 |
Pos logit contributions | |
ric | +0.026 |
ipt | +0.026 |
cigarettes | +0.025 |
ho | +0.025 |
umbling | +0.024 |
Neg logit contributions | |
Princess | -0.033 |
chef | -0.032 |
feast | -0.030 |
matching | -0.030 |
party | -0.030 |
Token | " |
Token ID | 1 |
Feature activation | +1.61e+00 |
Pos logit contributions | |
ric | +0.015 |
cigarettes | +0.014 |
ipt | +0.014 |
ho | +0.014 |
umbling | +0.014 |
Neg logit contributions | |
Princess | -0.015 |
chef | -0.015 |
matching | -0.014 |
feast | -0.014 |
succeeded | -0.014 |
Token | , |
Token ID | 11 |
Feature activation | +2.78e-01 |
Pos logit contributions | |
Majesty | +0.037 |
succeeded | +0.035 |
majesty | +0.034 |
is | +0.034 |
ouring | +0.032 |
Neg logit contributions | |
ho | -0.036 |
trem | -0.035 |
cigarettes | -0.035 |
alarm | -0.034 |
rats | -0.034 |
Token | a |
Token ID | 257 |
Feature activation | -5.85e-01 |
Pos logit contributions | |
ric | +0.006 |
cigarettes | +0.006 |
ipt | +0.006 |
ho | +0.005 |
umbling | +0.005 |
Neg logit contributions | |
Princess | -0.006 |
chef | -0.005 |
matching | -0.005 |
feast | -0.005 |
is | -0.005 |
Token | tiny |
Token ID | 7009 |
Feature activation | +1.67e+00 |
Pos logit contributions | |
parties | +0.012 |
succeeded | +0.012 |
Princess | +0.012 |
matching | +0.011 |
patiently | +0.011 |
Neg logit contributions | |
ho | -0.011 |
cigarettes | -0.011 |
ric | -0.011 |
Un | -0.010 |
ostr | -0.010 |
Token | mouse |
Token ID | 10211 |
Feature activation | +1.61e+00 |
Pos logit contributions | |
cigarettes | +0.030 |
ric | +0.030 |
ho | +0.029 |
ipt | +0.028 |
umbling | +0.027 |
Neg logit contributions | |
Princess | -0.037 |
succeeded | -0.036 |
matching | -0.035 |
chef | -0.035 |
parties | -0.035 |
Token | t |
Token ID | 256 |
Feature activation | +3.47e+00 |
Pos logit contributions | |
ric | +0.043 |
ipt | +0.040 |
cigarettes | +0.040 |
umbling | +0.039 |
spe | +0.038 |
Neg logit contributions | |
Princess | -0.027 |
chef | -0.025 |
feast | -0.023 |
matching | -0.023 |
succeeded | -0.023 |
Token | ipt |
Token ID | 10257 |
Feature activation | +5.32e+00 |
Pos logit contributions | |
ric | +0.073 |
cigarettes | +0.070 |
Warning | +0.064 |
spe | +0.061 |
Danger | +0.059 |
Neg logit contributions | |
chef | -0.064 |
is | -0.063 |
ingu | -0.062 |
designer | -0.060 |
proudly | -0.060 |
Token | o |
Token ID | 78 |
Feature activation | +9.74e-01 |
Pos logit contributions | |
cigarettes | +0.088 |
ric | +0.085 |
gag | +0.082 |
rats | +0.078 |
Warning | +0.075 |
Neg logit contributions | |
is | -0.118 |
patiently | -0.116 |
Princess | -0.114 |
proudly | -0.113 |
ored | -0.112 |
Token | ed |
Token ID | 276 |
Feature activation | +9.91e-01 |
Pos logit contributions | |
ric | +0.021 |
cigarettes | +0.021 |
ipt | +0.021 |
ho | +0.020 |
umbling | +0.020 |
Neg logit contributions | |
Princess | -0.019 |
chef | -0.018 |
matching | -0.017 |
feast | -0.017 |
is | -0.017 |
Token | by |
Token ID | 416 |
Feature activation | +2.68e+00 |
Pos logit contributions | |
cigarettes | +0.022 |
ric | +0.022 |
ho | +0.021 |
ipt | +0.021 |
umbling | +0.020 |
Neg logit contributions | |
Princess | -0.019 |
chef | -0.018 |
is | -0.017 |
designer | -0.017 |
matching | -0.017 |
Token | and |
Token ID | 290 |
Feature activation | +2.01e-01 |
Pos logit contributions | |
ric | +0.071 |
cigarettes | +0.068 |
ipt | +0.066 |
umbling | +0.066 |
ho | +0.065 |
Neg logit contributions | |
Princess | -0.044 |
chef | -0.040 |
matching | -0.039 |
patiently | -0.038 |
feast | -0.037 |
Token | saw |
Token ID | 2497 |
Feature activation | +1.15e+00 |
Pos logit contributions | |
ho | +0.004 |
cigarettes | +0.004 |
ric | +0.004 |
ipt | +0.004 |
os | +0.003 |
Neg logit contributions | |
Princess | -0.004 |
is | -0.004 |
chef | -0.004 |
parties | -0.004 |
designer | -0.004 |
Token | walking |
Token ID | 6155 |
Feature activation | +2.04e+00 |
Pos logit contributions | |
ric | +0.062 |
ipt | +0.062 |
cigarettes | +0.060 |
umbling | +0.059 |
ho | +0.057 |
Neg logit contributions | |
Princess | -0.042 |
chef | -0.039 |
matching | -0.039 |
party | -0.038 |
feast | -0.037 |
Token | on |
Token ID | 319 |
Feature activation | +1.72e+00 |
Pos logit contributions | |
ric | +0.068 |
cigarettes | +0.060 |
grand | +0.058 |
ometers | +0.058 |
gag | +0.058 |
Neg logit contributions | |
, | -0.023 |
Princess | -0.023 |
proudly | -0.019 |
happily | -0.019 |
chess | -0.019 |
Token | a |
Token ID | 257 |
Feature activation | -3.49e-01 |
Pos logit contributions | |
ric | +0.050 |
umbling | +0.048 |
ipt | +0.047 |
cigarettes | +0.047 |
ho | +0.046 |
Neg logit contributions | |
Princess | -0.026 |
chef | -0.023 |
matching | -0.022 |
tea | -0.020 |
party | -0.020 |
Token | tight |
Token ID | 5381 |
Feature activation | +1.60e+00 |
Pos logit contributions | |
Princess | +0.008 |
chef | +0.008 |
matching | +0.008 |
party | +0.008 |
is | +0.008 |
Neg logit contributions | |
ric | -0.006 |
ipt | -0.006 |
cigarettes | -0.006 |
umbling | -0.006 |
ho | -0.006 |
Token | ro |
Token ID | 305 |
Feature activation | +1.80e+00 |
Pos logit contributions | |
ric | +0.030 |
cigarettes | +0.029 |
ipt | +0.028 |
umbling | +0.027 |
beh | +0.027 |
Neg logit contributions | |
Princess | -0.038 |
chef | -0.036 |
matching | -0.035 |
party | -0.034 |
feast | -0.034 |
Token | pe |
Token ID | 431 |
Feature activation | +5.31e+00 |
Pos logit contributions | |
ric | +0.030 |
cigarettes | +0.029 |
ipt | +0.029 |
umbling | +0.027 |
oes | +0.026 |
Neg logit contributions | |
Princess | -0.045 |
chef | -0.043 |
matching | -0.042 |
is | -0.041 |
feast | -0.040 |
Token | . |
Token ID | 13 |
Feature activation | -2.25e-01 |
Pos logit contributions | |
ipt | +0.149 |
Warning | +0.137 |
spe | +0.136 |
cigarettes | +0.135 |
run | +0.133 |
Neg logit contributions | |
, | -0.080 |
is | -0.072 |
matching | -0.061 |
patiently | -0.060 |
party | -0.056 |
Token | He |
Token ID | 679 |
Feature activation | -1.98e-02 |
Pos logit contributions | |
Princess | +0.006 |
chef | +0.006 |
matching | +0.005 |
feast | +0.005 |
succeeded | +0.005 |
Neg logit contributions | |
ric | -0.004 |
cigarettes | -0.003 |
ipt | -0.003 |
umbling | -0.003 |
ho | -0.003 |
Token | had |
Token ID | 550 |
Feature activation | +1.36e+00 |
Pos logit contributions | |
Princess | +0.000 |
chef | +0.000 |
succeeded | +0.000 |
matching | +0.000 |
is | +0.000 |
Neg logit contributions | |
ric | -0.000 |
cigarettes | -0.000 |
umbling | -0.000 |
ipt | -0.000 |
oes | -0.000 |
Token | to |
Token ID | 284 |
Feature activation | +2.30e+00 |
Pos logit contributions | |
ric | +0.034 |
ipt | +0.032 |
cigarettes | +0.032 |
ho | +0.032 |
umbling | +0.032 |
Neg logit contributions | |
Princess | -0.024 |
chef | -0.022 |
matching | -0.022 |
succeeded | -0.021 |
party | -0.020 |
Token | balance |
Token ID | 5236 |
Feature activation | +2.10e-01 |
Pos logit contributions | |
ric | +0.053 |
ipt | +0.050 |
cigarettes | +0.049 |
umbling | +0.049 |
ho | +0.049 |
Neg logit contributions | |
Princess | -0.047 |
chef | -0.043 |
party | -0.042 |
matching | -0.042 |
feast | -0.041 |
Token | the |
Token ID | 262 |
Feature activation | +2.69e-01 |
Pos logit contributions | |
ric | +0.051 |
cigarettes | +0.048 |
ipt | +0.048 |
umbling | +0.047 |
ho | +0.047 |
Neg logit contributions | |
Princess | -0.033 |
chef | -0.031 |
matching | -0.029 |
is | -0.027 |
feast | -0.026 |
Token | ground |
Token ID | 2323 |
Feature activation | +2.82e+00 |
Pos logit contributions | |
ric | +0.005 |
cigarettes | +0.005 |
ipt | +0.005 |
ho | +0.005 |
umbling | +0.005 |
Neg logit contributions | |
Princess | -0.006 |
chef | -0.006 |
matching | -0.006 |
is | -0.005 |
feast | -0.005 |
Token | with |
Token ID | 351 |
Feature activation | +1.06e+00 |
Pos logit contributions | |
ric | +0.077 |
cigarettes | +0.074 |
ipt | +0.072 |
ho | +0.070 |
umbling | +0.069 |
Neg logit contributions | |
Princess | -0.043 |
chef | -0.040 |
matching | -0.038 |
is | -0.036 |
patiently | -0.036 |
Token | a |
Token ID | 257 |
Feature activation | +1.02e+00 |
Pos logit contributions | |
ric | +0.027 |
umbling | +0.025 |
ipt | +0.025 |
ho | +0.025 |
urious | +0.024 |
Neg logit contributions | |
Princess | -0.019 |
chef | -0.018 |
matching | -0.018 |
tea | -0.017 |
good | -0.016 |
Token | th |
Token ID | 294 |
Feature activation | +2.05e+00 |
Pos logit contributions | |
ric | +0.024 |
ipt | +0.023 |
cigarettes | +0.023 |
umbling | +0.022 |
ho | +0.022 |
Neg logit contributions | |
Princess | -0.019 |
chef | -0.018 |
matching | -0.017 |
feast | -0.017 |
succeeded | -0.016 |
Token | ud |
Token ID | 463 |
Feature activation | +5.31e+00 |
Pos logit contributions | |
ric | +0.036 |
cigarettes | +0.035 |
spe | +0.033 |
ho | +0.033 |
ipt | +0.031 |
Neg logit contributions | |
Princess | -0.047 |
is | -0.046 |
chef | -0.046 |
succeeded | -0.044 |
matching | -0.043 |
Token | . |
Token ID | 13 |
Feature activation | +2.96e-01 |
Pos logit contributions | |
urious | +0.156 |
ric | +0.155 |
ipt | +0.147 |
beh | +0.146 |
cigarettes | +0.146 |
Neg logit contributions | |
, | -0.070 |
is | -0.064 |
chef | -0.062 |
matching | -0.058 |
patiently | -0.058 |
Token |
|
Token ID | 198 |
Feature activation | +1.65e+00 |
Pos logit contributions | |
ric | +0.006 |
cigarettes | +0.005 |
ho | +0.005 |
ipt | +0.005 |
umbling | +0.005 |
Neg logit contributions | |
Princess | -0.007 |
chef | -0.007 |
matching | -0.006 |
succeeded | -0.006 |
is | -0.006 |
Token |
|
Token ID | 198 |
Feature activation | +1.41e+00 |
Pos logit contributions | |
ric | +0.044 |
cigarettes | +0.043 |
ho | +0.041 |
umbling | +0.041 |
spe | +0.040 |
Neg logit contributions | |
Princess | -0.026 |
chef | -0.023 |
matching | -0.023 |
is | -0.022 |
succeeded | -0.022 |
Token | Tom |
Token ID | 13787 |
Feature activation | +9.76e-01 |
Pos logit contributions | |
ric | +0.038 |
cigarettes | +0.035 |
beh | +0.032 |
ho | +0.032 |
ipt | +0.031 |
Neg logit contributions | |
Princess | -0.023 |
chef | -0.023 |
is | -0.023 |
succeeded | -0.022 |
matching | -0.021 |
Token | and |
Token ID | 290 |
Feature activation | +2.50e-01 |
Pos logit contributions | |
ric | +0.020 |
cigarettes | +0.019 |
ipt | +0.019 |
ho | +0.018 |
umbling | +0.018 |
Neg logit contributions | |
Princess | -0.021 |
chef | -0.020 |
matching | -0.019 |
feast | -0.019 |
is | -0.019 |
Token | , |
Token ID | 11 |
Feature activation | +4.40e-01 |
Pos logit contributions | |
ric | +0.044 |
cigarettes | +0.043 |
ipt | +0.042 |
umbling | +0.041 |
beh | +0.041 |
Neg logit contributions | |
Princess | -0.009 |
chef | -0.008 |
matching | -0.007 |
party | -0.006 |
feast | -0.006 |
Token | she |
Token ID | 673 |
Feature activation | +2.59e-02 |
Pos logit contributions | |
ric | +0.009 |
cigarettes | +0.009 |
ipt | +0.008 |
ho | +0.008 |
umbling | +0.008 |
Neg logit contributions | |
Princess | -0.009 |
chef | -0.009 |
matching | -0.009 |
is | -0.008 |
feast | -0.008 |
Token | saw |
Token ID | 2497 |
Feature activation | +8.93e-01 |
Pos logit contributions | |
ric | +0.001 |
cigarettes | +0.001 |
ipt | +0.001 |
umbling | +0.001 |
ho | +0.001 |
Neg logit contributions | |
Princess | -0.000 |
chef | -0.000 |
matching | -0.000 |
succeeded | -0.000 |
feast | -0.000 |
Token | a |
Token ID | 257 |
Feature activation | -2.74e-01 |
Pos logit contributions | |
ric | +0.025 |
cigarettes | +0.024 |
ipt | +0.023 |
umbling | +0.023 |
ho | +0.023 |
Neg logit contributions | |
Princess | -0.014 |
chef | -0.012 |
matching | -0.011 |
succeeded | -0.011 |
party | -0.011 |
Token | wood |
Token ID | 4898 |
Feature activation | +2.75e+00 |
Pos logit contributions | |
Princess | +0.005 |
chef | +0.005 |
succeeded | +0.005 |
parties | +0.005 |
matching | +0.005 |
Neg logit contributions | |
ric | -0.006 |
cigarettes | -0.006 |
ho | -0.006 |
ipt | -0.005 |
umbling | -0.005 |
Token | pe |
Token ID | 431 |
Feature activation | +5.29e+00 |
Pos logit contributions | |
ric | +0.071 |
ipt | +0.071 |
cigarettes | +0.070 |
urious | +0.069 |
beh | +0.068 |
Neg logit contributions | |
chef | -0.041 |
matching | -0.039 |
Princess | -0.038 |
is | -0.037 |
party | -0.036 |
Token | cker |
Token ID | 15280 |
Feature activation | +2.41e+00 |
Pos logit contributions | |
ipt | +0.104 |
cigarettes | +0.099 |
Warning | +0.097 |
run | +0.096 |
beh | +0.092 |
Neg logit contributions | |
is | -0.112 |
, | -0.110 |
matching | -0.103 |
patiently | -0.102 |
finished | -0.093 |
Token | . |
Token ID | 13 |
Feature activation | +1.12e-01 |
Pos logit contributions | |
ipt | +0.065 |
cigarettes | +0.061 |
umbling | +0.060 |
ric | +0.060 |
oes | +0.059 |
Neg logit contributions | |
, | -0.035 |
matching | -0.035 |
Princess | -0.035 |
chef | -0.033 |
party | -0.032 |
Token | It |
Token ID | 632 |
Feature activation | +6.35e-01 |
Pos logit contributions | |
ric | +0.002 |
cigarettes | +0.002 |
umbling | +0.002 |
ipt | +0.002 |
ho | +0.002 |
Neg logit contributions | |
Princess | -0.003 |
chef | -0.003 |
matching | -0.002 |
designer | -0.002 |
feast | -0.002 |
Token | was |
Token ID | 373 |
Feature activation | +2.53e+00 |
Pos logit contributions | |
ric | +0.017 |
ipt | +0.017 |
umbling | +0.016 |
cigarettes | +0.016 |
packs | +0.016 |
Neg logit contributions | |
succeeded | -0.010 |
Princess | -0.009 |
matching | -0.009 |
, | -0.009 |
is | -0.009 |
Token | enjoying |
Token ID | 13226 |
Feature activation | +3.78e-01 |
Pos logit contributions | |
ric | +0.068 |
ipt | +0.068 |
umbling | +0.065 |
urious | +0.065 |
cigarettes | +0.064 |
Neg logit contributions | |
Princess | -0.039 |
matching | -0.037 |
chef | -0.037 |
patiently | -0.033 |
good | -0.032 |
Token | . |
Token ID | 13 |
Feature activation | +7.14e-01 |
Pos logit contributions | |
ric | +0.024 |
ipt | +0.023 |
cigarettes | +0.022 |
ho | +0.022 |
umbling | +0.021 |
Neg logit contributions | |
Princess | -0.017 |
chef | -0.016 |
matching | -0.015 |
is | -0.014 |
party | -0.014 |
Token | It |
Token ID | 632 |
Feature activation | +2.06e+00 |
Pos logit contributions | |
ric | +0.017 |
spe | +0.016 |
ho | +0.016 |
beh | +0.016 |
cker | +0.015 |
Neg logit contributions | |
Princess | -0.015 |
chef | -0.012 |
Marie | -0.012 |
matching | -0.011 |
Penny | -0.011 |
Token | makes |
Token ID | 1838 |
Feature activation | +1.57e+00 |
Pos logit contributions | |
ipt | +0.052 |
umbling | +0.049 |
ric | +0.049 |
spe | +0.046 |
cigarettes | +0.046 |
Neg logit contributions | |
Princess | -0.038 |
chef | -0.037 |
tea | -0.034 |
matching | -0.034 |
better | -0.034 |
Token | her |
Token ID | 607 |
Feature activation | +2.19e+00 |
Pos logit contributions | |
ric | +0.043 |
ho | +0.042 |
spe | +0.042 |
ipt | +0.042 |
umbling | +0.041 |
Neg logit contributions | |
Princess | -0.025 |
tea | -0.024 |
chef | -0.022 |
matching | -0.022 |
patiently | -0.021 |
Token | face |
Token ID | 1986 |
Feature activation | +2.36e+00 |
Pos logit contributions | |
ipt | +0.064 |
Warning | +0.061 |
umbling | +0.058 |
Un | +0.057 |
beh | +0.057 |
Neg logit contributions | |
tea | -0.034 |
better | -0.032 |
best | -0.032 |
good | -0.032 |
Princess | -0.032 |
Token | scr |
Token ID | 6040 |
Feature activation | +5.28e+00 |
Pos logit contributions | |
ipt | +0.061 |
ric | +0.057 |
Warning | +0.054 |
cigarettes | +0.054 |
umbling | +0.054 |
Neg logit contributions | |
Princess | -0.040 |
better | -0.040 |
chef | -0.039 |
good | -0.039 |
tea | -0.038 |
Token | unch |
Token ID | 3316 |
Feature activation | +2.03e+00 |
Pos logit contributions | |
cigarettes | +0.129 |
spe | +0.125 |
ric | +0.124 |
ho | +0.118 |
packs | +0.116 |
Neg logit contributions | |
is | -0.088 |
patiently | -0.077 |
chef | -0.074 |
matching | -0.071 |
ingu | -0.070 |
Token | up |
Token ID | 510 |
Feature activation | +6.08e-01 |
Pos logit contributions | |
ric | +0.053 |
cigarettes | +0.050 |
ipt | +0.050 |
spe | +0.047 |
umbling | +0.047 |
Neg logit contributions | |
Princess | -0.033 |
chef | -0.031 |
is | -0.029 |
matching | -0.028 |
patiently | -0.028 |
Token | . |
Token ID | 13 |
Feature activation | +4.42e-01 |
Pos logit contributions | |
ho | +0.012 |
cigarettes | +0.012 |
ric | +0.012 |
ipt | +0.011 |
umbling | +0.011 |
Neg logit contributions | |
Princess | -0.013 |
chef | -0.012 |
designer | -0.011 |
succeeded | -0.011 |
is | -0.011 |
Token | She |
Token ID | 1375 |
Feature activation | +6.71e-01 |
Pos logit contributions | |
ric | +0.010 |
cigarettes | +0.010 |
spe | +0.009 |
ho | +0.009 |
beh | +0.009 |
Neg logit contributions | |
Princess | -0.009 |
chef | -0.008 |
matching | -0.008 |
succeeded | -0.007 |
patiently | -0.007 |
Token | sp |
Token ID | 599 |
Feature activation | +2.16e+00 |
Pos logit contributions | |
ric | +0.014 |
cigarettes | +0.013 |
ipt | +0.013 |
umbling | +0.013 |
ho | +0.012 |
Neg logit contributions | |
Princess | -0.015 |
chef | -0.014 |
matching | -0.013 |
succeeded | -0.013 |
patiently | -0.013 |
Token | got |
Token ID | 1392 |
Feature activation | +3.35e+00 |
Pos logit contributions | |
ric | +0.097 |
cigarettes | +0.085 |
ipt | +0.083 |
umbling | +0.082 |
spe | +0.081 |
Neg logit contributions | |
succeeded | -0.051 |
Princess | -0.051 |
chef | -0.049 |
, | -0.047 |
matching | -0.047 |
Token | stuck |
Token ID | 7819 |
Feature activation | +2.51e+00 |
Pos logit contributions | |
ric | +0.076 |
cigarettes | +0.070 |
ipt | +0.069 |
umbling | +0.068 |
ho | +0.067 |
Neg logit contributions | |
Princess | -0.063 |
matching | -0.062 |
chef | -0.061 |
succeeded | -0.060 |
patiently | -0.059 |
Token | in |
Token ID | 287 |
Feature activation | +3.54e+00 |
Pos logit contributions | |
ric | +0.061 |
cigarettes | +0.058 |
ipt | +0.057 |
ho | +0.056 |
umbling | +0.055 |
Neg logit contributions | |
Princess | -0.044 |
chef | -0.042 |
matching | -0.040 |
succeeded | -0.039 |
patiently | -0.039 |
Token | some |
Token ID | 617 |
Feature activation | +2.07e+00 |
Pos logit contributions | |
ric | +0.112 |
ho | +0.107 |
oes | +0.106 |
umbling | +0.106 |
beh | +0.105 |
Neg logit contributions | |
matching | -0.040 |
, | -0.036 |
Princess | -0.036 |
good | -0.036 |
success | -0.035 |
Token | dull |
Token ID | 19222 |
Feature activation | +1.48e+00 |
Pos logit contributions | |
ric | +0.056 |
ipt | +0.049 |
ho | +0.048 |
spe | +0.048 |
umbling | +0.047 |
Neg logit contributions | |
matching | -0.034 |
Princess | -0.033 |
chef | -0.032 |
good | -0.032 |
tea | -0.032 |
Token | branches |
Token ID | 13737 |
Feature activation | +5.28e+00 |
Pos logit contributions | |
ric | +0.033 |
cigarettes | +0.031 |
ho | +0.030 |
ipt | +0.030 |
umbling | +0.030 |
Neg logit contributions | |
Princess | -0.030 |
chef | -0.028 |
matching | -0.027 |
succeeded | -0.026 |
feast | -0.026 |
Token | . |
Token ID | 13 |
Feature activation | -8.51e-02 |
Pos logit contributions | |
ric | +0.187 |
ipt | +0.181 |
grand | +0.178 |
urious | +0.177 |
beh | +0.176 |
Neg logit contributions | |
, | -0.057 |
again | -0.034 |
better | -0.033 |
ier | -0.029 |
patiently | -0.028 |
Token |
|
Token ID | 198 |
Feature activation | +1.43e+00 |
Pos logit contributions | |
Princess | +0.002 |
chef | +0.002 |
matching | +0.001 |
succeeded | +0.001 |
feast | +0.001 |
Neg logit contributions | |
ric | -0.002 |
cigarettes | -0.002 |
ho | -0.002 |
umbling | -0.002 |
ipt | -0.002 |
Token |
|
Token ID | 198 |
Feature activation | +8.33e-01 |
Pos logit contributions | |
ric | +0.039 |
cigarettes | +0.037 |
ho | +0.036 |
umbling | +0.036 |
ipt | +0.035 |
Neg logit contributions | |
Princess | -0.022 |
is | -0.019 |
matching | -0.019 |
chef | -0.019 |
, | -0.018 |
Token | Suddenly |
Token ID | 38582 |
Feature activation | -2.67e-01 |
Pos logit contributions | |
ric | +0.020 |
cigarettes | +0.019 |
ho | +0.018 |
ipt | +0.017 |
oes | +0.017 |
Neg logit contributions | |
Princess | -0.016 |
is | -0.015 |
chef | -0.015 |
matching | -0.015 |
succeeded | -0.014 |
Token | , |
Token ID | 11 |
Feature activation | +2.17e-01 |
Pos logit contributions | |
is | +0.005 |
designer | +0.005 |
chef | +0.005 |
Princess | +0.005 |
ingu | +0.005 |
Neg logit contributions | |
cigarettes | -0.005 |
ho | -0.005 |
el | -0.005 |
rats | -0.005 |
ostr | -0.005 |
Token | only |
Token ID | 691 |
Feature activation | +8.92e-01 |
Pos logit contributions | |
ipt | +0.025 |
ric | +0.025 |
umbling | +0.024 |
oes | +0.024 |
cigarettes | +0.023 |
Neg logit contributions | |
succeeded | -0.016 |
Princess | -0.016 |
chef | -0.015 |
matching | -0.015 |
finished | -0.014 |
Token | heard |
Token ID | 2982 |
Feature activation | +2.02e+00 |
Pos logit contributions | |
ric | +0.022 |
cigarettes | +0.020 |
ipt | +0.020 |
umbling | +0.020 |
ho | +0.020 |
Neg logit contributions | |
Princess | -0.016 |
chef | -0.015 |
matching | -0.015 |
succeeded | -0.014 |
feast | -0.014 |
Token | the |
Token ID | 262 |
Feature activation | +1.48e+00 |
Pos logit contributions | |
ric | +0.056 |
ipt | +0.055 |
umbling | +0.055 |
ho | +0.054 |
beh | +0.054 |
Neg logit contributions | |
Princess | -0.031 |
chef | -0.028 |
tea | -0.027 |
matching | -0.025 |
party | -0.024 |
Token | loud |
Token ID | 7812 |
Feature activation | +2.95e+00 |
Pos logit contributions | |
ric | +0.033 |
ipt | +0.033 |
beh | +0.031 |
os | +0.031 |
umbling | +0.030 |
Neg logit contributions | |
chef | -0.029 |
Princess | -0.028 |
matching | -0.027 |
tea | -0.027 |
party | -0.026 |
Token | th |
Token ID | 294 |
Feature activation | +1.98e+00 |
Pos logit contributions | |
ipt | +0.065 |
ric | +0.062 |
urious | +0.060 |
beh | +0.060 |
umbling | +0.059 |
Neg logit contributions | |
Princess | -0.061 |
chef | -0.058 |
tea | -0.056 |
, | -0.056 |
matching | -0.056 |
Token | ud |
Token ID | 463 |
Feature activation | +5.28e+00 |
Pos logit contributions | |
ric | +0.032 |
cigarettes | +0.031 |
ho | +0.031 |
ipt | +0.029 |
spe | +0.029 |
Neg logit contributions | |
Princess | -0.049 |
chef | -0.048 |
is | -0.047 |
matching | -0.045 |
succeeded | -0.045 |
Token | when |
Token ID | 618 |
Feature activation | +2.05e+00 |
Pos logit contributions | |
urious | +0.150 |
ric | +0.145 |
beh | +0.143 |
ipt | +0.138 |
cigarettes | +0.137 |
Neg logit contributions | |
, | -0.074 |
is | -0.072 |
chef | -0.072 |
patiently | -0.066 |
Princess | -0.064 |
Token | the |
Token ID | 262 |
Feature activation | +1.58e+00 |
Pos logit contributions | |
ric | +0.053 |
ipt | +0.050 |
umbling | +0.050 |
cigarettes | +0.049 |
ho | +0.048 |
Neg logit contributions | |
Princess | -0.035 |
chef | -0.033 |
matching | -0.031 |
tea | -0.029 |
feast | -0.029 |
Token | glass |
Token ID | 5405 |
Feature activation | +2.34e+00 |
Pos logit contributions | |
ric | +0.037 |
ipt | +0.036 |
umbling | +0.034 |
beh | +0.034 |
ho | +0.034 |
Neg logit contributions | |
chef | -0.029 |
Princess | -0.029 |
matching | -0.027 |
party | -0.027 |
tea | -0.027 |
Token | ball |
Token ID | 2613 |
Feature activation | +2.63e+00 |
Pos logit contributions | |
ric | +0.058 |
ipt | +0.053 |
cigarettes | +0.053 |
umbling | +0.051 |
beh | +0.051 |
Neg logit contributions | |
chef | -0.042 |
Princess | -0.041 |
party | -0.039 |
matching | -0.038 |
feast | -0.038 |
Token | broke |
Token ID | 6265 |
Feature activation | +1.91e+00 |
Pos logit contributions | |
ric | +0.076 |
ipt | +0.070 |
beh | +0.069 |
urious | +0.069 |
umbling | +0.068 |
Neg logit contributions | |
, | -0.037 |
party | -0.037 |
Princess | -0.036 |
finished | -0.035 |
succeeded | -0.035 |
Token | stop |
Token ID | 2245 |
Feature activation | +1.76e+00 |
Pos logit contributions | |
ric | +0.057 |
cigarettes | +0.055 |
ipt | +0.054 |
ho | +0.052 |
umbling | +0.051 |
Neg logit contributions | |
Princess | -0.048 |
chef | -0.046 |
matching | -0.044 |
succeeded | -0.043 |
patiently | -0.042 |
Token | . |
Token ID | 13 |
Feature activation | +3.03e-01 |
Pos logit contributions | |
ric | +0.044 |
cigarettes | +0.043 |
ipt | +0.043 |
spe | +0.042 |
ho | +0.042 |
Neg logit contributions | |
Princess | -0.028 |
matching | -0.027 |
chef | -0.026 |
, | -0.025 |
patiently | -0.025 |
Token | She |
Token ID | 1375 |
Feature activation | +1.14e+00 |
Pos logit contributions | |
ric | +0.005 |
cigarettes | +0.005 |
ho | +0.005 |
ipt | +0.005 |
umbling | +0.005 |
Neg logit contributions | |
Princess | -0.008 |
chef | -0.007 |
matching | -0.007 |
succeeded | -0.007 |
parties | -0.007 |
Token | hits |
Token ID | 7127 |
Feature activation | +1.68e+00 |
Pos logit contributions | |
ric | +0.025 |
cigarettes | +0.025 |
ipt | +0.025 |
umbling | +0.024 |
spe | +0.024 |
Neg logit contributions | |
Princess | -0.022 |
matching | -0.020 |
chef | -0.020 |
succeeded | -0.020 |
patiently | -0.019 |
Token | the |
Token ID | 262 |
Feature activation | +1.87e+00 |
Pos logit contributions | |
ric | +0.045 |
cigarettes | +0.042 |
ho | +0.042 |
spe | +0.042 |
ipt | +0.042 |
Neg logit contributions | |
Princess | -0.027 |
chef | -0.024 |
matching | -0.023 |
is | -0.022 |
tea | -0.022 |
Token | bump |
Token ID | 13852 |
Feature activation | +5.28e+00 |
Pos logit contributions | |
ric | +0.046 |
cigarettes | +0.045 |
beh | +0.044 |
ipt | +0.044 |
os | +0.044 |
Neg logit contributions | |
matching | -0.030 |
chef | -0.029 |
Princess | -0.029 |
good | -0.028 |
judges | -0.028 |
Token | and |
Token ID | 290 |
Feature activation | +2.37e+00 |
Pos logit contributions | |
ric | +0.172 |
cigarettes | +0.162 |
grand | +0.159 |
urious | +0.159 |
beh | +0.157 |
Neg logit contributions | |
, | -0.057 |
is | -0.051 |
again | -0.047 |
patiently | -0.046 |
matching | -0.046 |
Token | flies |
Token ID | 17607 |
Feature activation | +2.65e+00 |
Pos logit contributions | |
ric | +0.056 |
ipt | +0.055 |
umbling | +0.054 |
cigarettes | +0.054 |
spe | +0.054 |
Neg logit contributions | |
Princess | -0.042 |
matching | -0.041 |
chef | -0.039 |
succeeded | -0.038 |
better | -0.038 |
Token | off |
Token ID | 572 |
Feature activation | +2.54e+00 |
Pos logit contributions | |
ric | +0.081 |
grand | +0.075 |
ipt | +0.074 |
urious | +0.074 |
cigarettes | +0.073 |
Neg logit contributions | |
Princess | -0.032 |
, | -0.031 |
better | -0.030 |
good | -0.029 |
matching | -0.028 |
Token | the |
Token ID | 262 |
Feature activation | +5.68e-01 |
Pos logit contributions | |
ric | +0.075 |
cigarettes | +0.068 |
ipt | +0.068 |
umbling | +0.068 |
urious | +0.067 |
Neg logit contributions | |
Princess | -0.038 |
chef | -0.033 |
matching | -0.031 |
tea | -0.030 |
, | -0.029 |
Token | slide |
Token ID | 10649 |
Feature activation | +1.93e+00 |
Pos logit contributions | |
ric | +0.014 |
cigarettes | +0.013 |
ipt | +0.013 |
os | +0.013 |
beh | +0.012 |
Neg logit contributions | |
Princess | -0.010 |
matching | -0.010 |
chef | -0.009 |
is | -0.009 |
party | -0.009 |
Token | big |
Token ID | 1263 |
Feature activation | -5.85e-01 |
Pos logit contributions | |
Princess | +0.009 |
succeeded | +0.009 |
parties | +0.008 |
chef | +0.008 |
is | +0.008 |
Neg logit contributions | |
ho | -0.009 |
cigarettes | -0.009 |
ric | -0.009 |
umbling | -0.008 |
spe | -0.008 |
Token | earthquake |
Token ID | 16295 |
Feature activation | +3.65e+00 |
Pos logit contributions | |
succeeded | +0.013 |
Princess | +0.013 |
is | +0.012 |
matching | +0.012 |
chef | +0.011 |
Neg logit contributions | |
cigarettes | -0.011 |
ho | -0.011 |
ric | -0.010 |
ostr | -0.010 |
alarm | -0.010 |
Token | that |
Token ID | 326 |
Feature activation | +1.41e+00 |
Pos logit contributions | |
ric | +0.107 |
ipt | +0.104 |
urious | +0.099 |
cigarettes | +0.098 |
Warning | +0.097 |
Neg logit contributions | |
, | -0.049 |
matching | -0.042 |
chef | -0.041 |
is | -0.040 |
Princess | -0.040 |
Token | shook |
Token ID | 14682 |
Feature activation | +2.46e+00 |
Pos logit contributions | |
ric | +0.034 |
ipt | +0.032 |
umbling | +0.031 |
cigarettes | +0.030 |
ho | +0.030 |
Neg logit contributions | |
Princess | -0.026 |
succeeded | -0.025 |
chef | -0.025 |
matching | -0.024 |
party | -0.023 |
Token | the |
Token ID | 262 |
Feature activation | +1.93e+00 |
Pos logit contributions | |
ric | +0.071 |
ipt | +0.070 |
ho | +0.069 |
grand | +0.066 |
spe | +0.066 |
Neg logit contributions | |
, | -0.034 |
good | -0.030 |
better | -0.029 |
Princess | -0.028 |
patiently | -0.028 |
Token | ground |
Token ID | 2323 |
Feature activation | +5.27e+00 |
Pos logit contributions | |
ric | +0.044 |
cigarettes | +0.041 |
run | +0.041 |
ipt | +0.041 |
os | +0.039 |
Neg logit contributions | |
Princess | -0.039 |
chef | -0.038 |
party | -0.034 |
matching | -0.034 |
designer | -0.034 |
Token | and |
Token ID | 290 |
Feature activation | +2.08e+00 |
Pos logit contributions | |
grand | +0.174 |
ipt | +0.171 |
ric | +0.166 |
keeping | +0.163 |
beh | +0.161 |
Neg logit contributions | |
, | -0.070 |
better | -0.051 |
again | -0.045 |
patiently | -0.044 |
finished | -0.043 |
Token | made |
Token ID | 925 |
Feature activation | +1.61e+00 |
Pos logit contributions | |
ric | +0.059 |
ipt | +0.059 |
umbling | +0.058 |
urious | +0.057 |
beh | +0.054 |
Neg logit contributions | |
Princess | -0.028 |
, | -0.026 |
succeeded | -0.025 |
chef | -0.024 |
matching | -0.024 |
Token | everything |
Token ID | 2279 |
Feature activation | +3.08e+00 |
Pos logit contributions | |
ric | +0.048 |
ho | +0.048 |
oes | +0.044 |
ipt | +0.044 |
cigarettes | +0.044 |
Neg logit contributions | |
Princess | -0.022 |
good | -0.020 |
success | -0.020 |
chef | -0.019 |
matching | -0.019 |
Token | wob |
Token ID | 46867 |
Feature activation | +2.97e+00 |
Pos logit contributions | |
ric | +0.087 |
ipt | +0.085 |
Warning | +0.078 |
ho | +0.077 |
cigarettes | +0.075 |
Neg logit contributions | |
better | -0.050 |
good | -0.046 |
happier | -0.046 |
nicer | -0.042 |
matching | -0.042 |
Token | ble |
Token ID | 903 |
Feature activation | +2.48e+00 |
Pos logit contributions | |
ric | +0.066 |
cigarettes | +0.063 |
ipt | +0.061 |
ho | +0.060 |
umbling | +0.060 |
Neg logit contributions | |
Princess | -0.059 |
chef | -0.057 |
matching | -0.053 |
feast | -0.052 |
is | -0.052 |
Token | hard |
Token ID | 1327 |
Feature activation | +1.86e+00 |
Pos logit contributions | |
ipt | +0.091 |
ric | +0.086 |
umbling | +0.086 |
Warning | +0.079 |
packs | +0.079 |
Neg logit contributions | |
good | -0.045 |
Princess | -0.045 |
better | -0.042 |
tea | -0.042 |
matching | -0.040 |
Token | and |
Token ID | 290 |
Feature activation | +2.77e+00 |
Pos logit contributions | |
ric | +0.050 |
ipt | +0.049 |
cigarettes | +0.046 |
umbling | +0.046 |
ho | +0.044 |
Neg logit contributions | |
Princess | -0.028 |
matching | -0.028 |
chef | -0.027 |
succeeded | -0.026 |
party | -0.026 |
Token | made |
Token ID | 925 |
Feature activation | +1.46e+00 |
Pos logit contributions | |
ric | +0.079 |
ipt | +0.077 |
umbling | +0.074 |
Warning | +0.068 |
urious | +0.068 |
Neg logit contributions | |
Princess | -0.045 |
good | -0.041 |
chef | -0.039 |
matching | -0.039 |
succeeded | -0.039 |
Token | her |
Token ID | 607 |
Feature activation | +2.09e+00 |
Pos logit contributions | |
ric | +0.040 |
ho | +0.038 |
ipt | +0.038 |
umbling | +0.037 |
spe | +0.037 |
Neg logit contributions | |
Princess | -0.024 |
chef | -0.021 |
tea | -0.021 |
matching | -0.020 |
good | -0.020 |
Token | face |
Token ID | 1986 |
Feature activation | +2.99e+00 |
Pos logit contributions | |
ipt | +0.059 |
Warning | +0.056 |
umbling | +0.056 |
ric | +0.053 |
spe | +0.053 |
Neg logit contributions | |
good | -0.033 |
best | -0.032 |
better | -0.032 |
Princess | -0.032 |
tea | -0.031 |
Token | scr |
Token ID | 6040 |
Feature activation | +5.27e+00 |
Pos logit contributions | |
ipt | +0.080 |
ric | +0.072 |
Warning | +0.071 |
umbling | +0.070 |
cigarettes | +0.067 |
Neg logit contributions | |
better | -0.051 |
good | -0.050 |
tea | -0.049 |
Princess | -0.049 |
chef | -0.048 |
Token | unch |
Token ID | 3316 |
Feature activation | +2.31e+00 |
Pos logit contributions | |
cigarettes | +0.132 |
ric | +0.127 |
spe | +0.126 |
packs | +0.118 |
Warning | +0.117 |
Neg logit contributions | |
is | -0.085 |
patiently | -0.075 |
chef | -0.072 |
feast | -0.070 |
succeeded | -0.069 |
Token | up |
Token ID | 510 |
Feature activation | +1.03e+00 |
Pos logit contributions | |
ric | +0.062 |
ipt | +0.058 |
cigarettes | +0.058 |
spe | +0.055 |
umbling | +0.055 |
Neg logit contributions | |
Princess | -0.036 |
chef | -0.034 |
is | -0.032 |
matching | -0.031 |
patiently | -0.031 |
Token | . |
Token ID | 13 |
Feature activation | -5.00e-01 |
Pos logit contributions | |
ric | +0.020 |
cigarettes | +0.019 |
ho | +0.019 |
ipt | +0.018 |
umbling | +0.018 |
Neg logit contributions | |
Princess | -0.023 |
chef | -0.022 |
designer | -0.020 |
is | -0.020 |
matching | -0.020 |
Token | She |
Token ID | 1375 |
Feature activation | -4.84e-01 |
Pos logit contributions | |
chef | +0.013 |
Princess | +0.013 |
feast | +0.013 |
matching | +0.012 |
is | +0.012 |
Neg logit contributions | |
ric | -0.007 |
cigarettes | -0.007 |
ipt | -0.007 |
ho | -0.007 |
umbling | -0.007 |
Token | spat |
Token ID | 15246 |
Feature activation | -1.94e+00 |
Pos logit contributions | |
Princess | +0.010 |
chef | +0.009 |
succeeded | +0.009 |
matching | +0.009 |
patiently | +0.008 |
Neg logit contributions | |
ric | -0.011 |
cigarettes | -0.010 |
ipt | -0.010 |
umbling | -0.010 |
ho | -0.010 |
Token | the |
Token ID | 262 |
Feature activation | -2.45e-02 |
Pos logit contributions | |
ric | +0.042 |
cigarettes | +0.041 |
ipt | +0.040 |
ho | +0.039 |
umbling | +0.039 |
Neg logit contributions | |
Princess | -0.031 |
chef | -0.029 |
matching | -0.027 |
is | -0.026 |
feast | -0.026 |
Token | floor |
Token ID | 4314 |
Feature activation | +3.11e+00 |
Pos logit contributions | |
Princess | +0.001 |
chef | +0.001 |
matching | +0.001 |
feast | +0.000 |
succeeded | +0.000 |
Neg logit contributions | |
ric | -0.000 |
cigarettes | -0.000 |
ipt | -0.000 |
ho | -0.000 |
umbling | -0.000 |
Token | with |
Token ID | 351 |
Feature activation | +7.55e-01 |
Pos logit contributions | |
ric | +0.081 |
cigarettes | +0.078 |
ipt | +0.076 |
ho | +0.075 |
umbling | +0.074 |
Neg logit contributions | |
Princess | -0.049 |
chef | -0.046 |
matching | -0.044 |
is | -0.043 |
patiently | -0.042 |
Token | a |
Token ID | 257 |
Feature activation | +1.10e+00 |
Pos logit contributions | |
ric | +0.018 |
umbling | +0.017 |
ipt | +0.017 |
ho | +0.017 |
cigarettes | +0.017 |
Neg logit contributions | |
Princess | -0.014 |
chef | -0.013 |
matching | -0.013 |
tea | -0.012 |
parties | -0.012 |
Token | th |
Token ID | 294 |
Feature activation | +1.90e+00 |
Pos logit contributions | |
ric | +0.026 |
ipt | +0.026 |
cigarettes | +0.025 |
umbling | +0.024 |
beh | +0.024 |
Neg logit contributions | |
Princess | -0.020 |
chef | -0.020 |
matching | -0.019 |
is | -0.017 |
superhero | -0.017 |
Token | ud |
Token ID | 463 |
Feature activation | +5.26e+00 |
Pos logit contributions | |
ric | +0.029 |
cigarettes | +0.028 |
ho | +0.027 |
ipt | +0.026 |
spe | +0.025 |
Neg logit contributions | |
Princess | -0.049 |
chef | -0.048 |
is | -0.046 |
matching | -0.046 |
succeeded | -0.045 |
Token | . |
Token ID | 13 |
Feature activation | +8.56e-02 |
Pos logit contributions | |
ric | +0.149 |
urious | +0.147 |
ipt | +0.138 |
beh | +0.137 |
spe | +0.136 |
Neg logit contributions | |
, | -0.073 |
is | -0.067 |
chef | -0.065 |
patiently | -0.063 |
matching | -0.063 |
Token | She |
Token ID | 1375 |
Feature activation | +1.28e+00 |
Pos logit contributions | |
ric | +0.001 |
cigarettes | +0.001 |
ipt | +0.001 |
ho | +0.001 |
umbling | +0.001 |
Neg logit contributions | |
Princess | -0.002 |
chef | -0.002 |
feast | -0.002 |
matching | -0.002 |
is | -0.002 |
Token | starts |
Token ID | 4940 |
Feature activation | +2.96e+00 |
Pos logit contributions | |
ipt | +0.028 |
spe | +0.027 |
umbling | +0.026 |
ric | +0.026 |
ho | +0.026 |
Neg logit contributions | |
Princess | -0.026 |
matching | -0.025 |
chef | -0.024 |
succeeded | -0.024 |
tea | -0.024 |
Token | to |
Token ID | 284 |
Feature activation | +2.56e+00 |
Pos logit contributions | |
ric | +0.088 |
ipt | +0.082 |
cker | +0.081 |
umbling | +0.080 |
ho | +0.080 |
Neg logit contributions | |
matching | -0.038 |
, | -0.036 |
Princess | -0.035 |
patiently | -0.034 |
chef | -0.033 |
Token | cry |
Token ID | 3960 |
Feature activation | +1.28e+00 |
Pos logit contributions | |
ipt | +0.037 |
ric | +0.036 |
cigarettes | +0.035 |
ho | +0.034 |
umbling | +0.033 |
Neg logit contributions | |
Princess | -0.071 |
chef | -0.067 |
matching | -0.066 |
succeeded | -0.064 |
party | -0.064 |
Token | ground |
Token ID | 2323 |
Feature activation | +2.61e+00 |
Pos logit contributions | |
ric | +0.009 |
cigarettes | +0.009 |
ipt | +0.009 |
ho | +0.008 |
umbling | +0.008 |
Neg logit contributions | |
Princess | -0.012 |
chef | -0.011 |
matching | -0.011 |
feast | -0.010 |
is | -0.010 |
Token | with |
Token ID | 351 |
Feature activation | +1.18e+00 |
Pos logit contributions | |
ric | +0.073 |
ipt | +0.070 |
cigarettes | +0.069 |
beh | +0.067 |
urious | +0.066 |
Neg logit contributions | |
, | -0.035 |
Princess | -0.035 |
matching | -0.034 |
chef | -0.033 |
patiently | -0.032 |
Token | a |
Token ID | 257 |
Feature activation | +9.35e-01 |
Pos logit contributions | |
ric | +0.031 |
umbling | +0.030 |
ipt | +0.029 |
spe | +0.029 |
oes | +0.029 |
Neg logit contributions | |
Princess | -0.020 |
chef | -0.018 |
matching | -0.018 |
good | -0.017 |
tea | -0.017 |
Token | loud |
Token ID | 7812 |
Feature activation | +2.16e+00 |
Pos logit contributions | |
ric | +0.021 |
ipt | +0.021 |
cigarettes | +0.020 |
umbling | +0.020 |
ho | +0.019 |
Neg logit contributions | |
Princess | -0.018 |
chef | -0.017 |
matching | -0.017 |
feast | -0.016 |
succeeded | -0.016 |
Token | th |
Token ID | 294 |
Feature activation | +2.24e+00 |
Pos logit contributions | |
ric | +0.045 |
ipt | +0.045 |
cigarettes | +0.043 |
umbling | +0.042 |
spe | +0.041 |
Neg logit contributions | |
Princess | -0.046 |
chef | -0.043 |
matching | -0.041 |
feast | -0.040 |
is | -0.040 |
Token | ud |
Token ID | 463 |
Feature activation | +5.25e+00 |
Pos logit contributions | |
cigarettes | +0.042 |
ric | +0.041 |
spe | +0.041 |
ho | +0.039 |
oes | +0.037 |
Neg logit contributions | |
is | -0.047 |
Princess | -0.045 |
chef | -0.045 |
succeeded | -0.043 |
feast | -0.042 |
Token | . |
Token ID | 13 |
Feature activation | +4.79e-01 |
Pos logit contributions | |
urious | +0.161 |
ric | +0.153 |
beh | +0.150 |
ipt | +0.148 |
spe | +0.146 |
Neg logit contributions | |
, | -0.068 |
is | -0.060 |
chef | -0.055 |
ier | -0.054 |
patiently | -0.052 |
Token |
|
Token ID | 198 |
Feature activation | +2.11e+00 |
Pos logit contributions | |
ric | +0.010 |
cigarettes | +0.010 |
ho | +0.009 |
ipt | +0.009 |
umbling | +0.009 |
Neg logit contributions | |
Princess | -0.010 |
chef | -0.010 |
matching | -0.009 |
succeeded | -0.009 |
is | -0.009 |
Token |
|
Token ID | 198 |
Feature activation | +1.64e+00 |
Pos logit contributions | |
ric | +0.057 |
cigarettes | +0.055 |
ho | +0.054 |
umbling | +0.053 |
beh | +0.053 |
Neg logit contributions | |
Princess | -0.033 |
is | -0.029 |
chef | -0.028 |
matching | -0.028 |
succeeded | -0.027 |
Token | Tom |
Token ID | 13787 |
Feature activation | +1.42e+00 |
Pos logit contributions | |
ric | +0.046 |
cigarettes | +0.042 |
ho | +0.041 |
ipt | +0.040 |
umbling | +0.040 |
Neg logit contributions | |
Princess | -0.026 |
chef | -0.024 |
is | -0.024 |
matching | -0.022 |
succeeded | -0.021 |
Token | cries |
Token ID | 24691 |
Feature activation | +2.03e+00 |
Pos logit contributions | |
ric | +0.030 |
cigarettes | +0.030 |
ipt | +0.029 |
ho | +0.028 |
umbling | +0.028 |
Neg logit contributions | |
Princess | -0.029 |
chef | -0.028 |
matching | -0.026 |
succeeded | -0.026 |
is | -0.025 |
Token | naughty |
Token ID | 45128 |
Feature activation | +1.04e+00 |
Pos logit contributions | |
Princess | +0.020 |
chef | +0.018 |
is | +0.018 |
succeeded | +0.018 |
parties | +0.017 |
Neg logit contributions | |
cigarettes | -0.019 |
ric | -0.019 |
ho | -0.018 |
umbling | -0.017 |
ipt | -0.016 |
Token | idea |
Token ID | 2126 |
Feature activation | -6.44e-01 |
Pos logit contributions | |
ric | +0.022 |
cigarettes | +0.022 |
ho | +0.020 |
ipt | +0.020 |
umbling | +0.020 |
Neg logit contributions | |
Princess | -0.021 |
chef | -0.020 |
matching | -0.019 |
succeeded | -0.019 |
is | -0.019 |
Token | . |
Token ID | 13 |
Feature activation | +1.67e-01 |
Pos logit contributions | |
Princess | +0.009 |
, | +0.009 |
matching | +0.009 |
chef | +0.009 |
party | +0.009 |
Neg logit contributions | |
ric | -0.018 |
ipt | -0.017 |
umbling | -0.017 |
ho | -0.016 |
urious | -0.016 |
Token | She |
Token ID | 1375 |
Feature activation | -5.95e-01 |
Pos logit contributions | |
ric | +0.003 |
cigarettes | +0.003 |
ipt | +0.003 |
ho | +0.003 |
umbling | +0.003 |
Neg logit contributions | |
Princess | -0.004 |
chef | -0.004 |
matching | -0.004 |
succeeded | -0.004 |
designer | -0.003 |
Token | t |
Token ID | 256 |
Feature activation | +2.57e+00 |
Pos logit contributions | |
Princess | +0.013 |
chef | +0.013 |
matching | +0.012 |
feast | +0.012 |
is | +0.012 |
Neg logit contributions | |
ric | -0.012 |
cigarettes | -0.011 |
ipt | -0.011 |
ho | -0.011 |
umbling | -0.010 |
Token | ipt |
Token ID | 10257 |
Feature activation | +5.24e+00 |
Pos logit contributions | |
ric | +0.052 |
cigarettes | +0.048 |
Warning | +0.046 |
ostr | +0.045 |
ho | +0.045 |
Neg logit contributions | |
is | -0.050 |
ingu | -0.047 |
designer | -0.047 |
chef | -0.047 |
patiently | -0.046 |
Token | o |
Token ID | 78 |
Feature activation | +1.70e+00 |
Pos logit contributions | |
ric | +0.085 |
cigarettes | +0.082 |
gag | +0.076 |
Warning | +0.072 |
rats | +0.068 |
Neg logit contributions | |
Princess | -0.115 |
patiently | -0.114 |
chef | -0.112 |
, | -0.112 |
is | -0.112 |
Token | ed |
Token ID | 276 |
Feature activation | +1.02e+00 |
Pos logit contributions | |
ric | +0.037 |
cigarettes | +0.037 |
ipt | +0.036 |
ho | +0.036 |
umbling | +0.035 |
Neg logit contributions | |
Princess | -0.034 |
chef | -0.032 |
matching | -0.030 |
feast | -0.029 |
is | -0.029 |
Token | to |
Token ID | 284 |
Feature activation | +1.64e+00 |
Pos logit contributions | |
cigarettes | +0.019 |
ho | +0.018 |
ipt | +0.018 |
ric | +0.017 |
os | +0.016 |
Neg logit contributions | |
Princess | -0.023 |
chef | -0.023 |
designer | -0.022 |
is | -0.022 |
feast | -0.022 |
Token | his |
Token ID | 465 |
Feature activation | +9.68e-01 |
Pos logit contributions | |
ric | +0.040 |
cigarettes | +0.038 |
umbling | +0.038 |
ipt | +0.037 |
ho | +0.037 |
Neg logit contributions | |
Princess | -0.031 |
chef | -0.029 |
matching | -0.028 |
party | -0.026 |
feast | -0.026 |
Token | bed |
Token ID | 3996 |
Feature activation | -1.06e-01 |
Pos logit contributions | |
ric | +0.024 |
cigarettes | +0.022 |
ipt | +0.022 |
umbling | +0.022 |
ho | +0.022 |
Neg logit contributions | |
Princess | -0.018 |
chef | -0.017 |
matching | -0.016 |
feast | -0.015 |
party | -0.015 |
Token | ila |
Token ID | 10102 |
Feature activation | +5.49e-01 |
Pos logit contributions | |
ric | +0.002 |
cigarettes | +0.002 |
ipt | +0.002 |
ho | +0.002 |
umbling | +0.002 |
Neg logit contributions | |
Princess | -0.003 |
chef | -0.003 |
is | -0.003 |
matching | -0.003 |
succeeded | -0.003 |
Token | 's |
Token ID | 338 |
Feature activation | +5.56e-01 |
Pos logit contributions | |
ric | +0.011 |
cigarettes | +0.011 |
ipt | +0.011 |
ho | +0.011 |
umbling | +0.011 |
Neg logit contributions | |
Princess | -0.011 |
chef | -0.011 |
matching | -0.010 |
feast | -0.010 |
designer | -0.010 |
Token | car |
Token ID | 1097 |
Feature activation | +2.54e+00 |
Pos logit contributions | |
ric | +0.011 |
cigarettes | +0.011 |
ipt | +0.010 |
ho | +0.010 |
umbling | +0.010 |
Neg logit contributions | |
Princess | -0.012 |
chef | -0.011 |
feast | -0.011 |
is | -0.011 |
succeeded | -0.011 |
Token | hit |
Token ID | 2277 |
Feature activation | +2.24e+00 |
Pos logit contributions | |
ric | +0.064 |
cigarettes | +0.059 |
ipt | +0.058 |
umbling | +0.056 |
ho | +0.055 |
Neg logit contributions | |
Princess | -0.044 |
chef | -0.042 |
succeeded | -0.042 |
matching | -0.041 |
superhero | -0.040 |
Token | a |
Token ID | 257 |
Feature activation | +2.29e+00 |
Pos logit contributions | |
ric | +0.064 |
spe | +0.064 |
grand | +0.064 |
ho | +0.062 |
beh | +0.062 |
Neg logit contributions | |
Princess | -0.029 |
matching | -0.026 |
, | -0.025 |
better | -0.024 |
tea | -0.024 |
Token | bump |
Token ID | 13852 |
Feature activation | +5.22e+00 |
Pos logit contributions | |
ric | +0.058 |
ipt | +0.057 |
beh | +0.056 |
umbling | +0.054 |
os | +0.054 |
Neg logit contributions | |
Princess | -0.040 |
chef | -0.040 |
matching | -0.038 |
superhero | -0.036 |
party | -0.036 |
Token | and |
Token ID | 290 |
Feature activation | +2.34e+00 |
Pos logit contributions | |
ric | +0.161 |
cigarettes | +0.154 |
ipt | +0.152 |
urious | +0.151 |
beh | +0.148 |
Neg logit contributions | |
, | -0.060 |
is | -0.055 |
matching | -0.051 |
chef | -0.051 |
ier | -0.049 |
Token | flipped |
Token ID | 26157 |
Feature activation | +3.16e+00 |
Pos logit contributions | |
ric | +0.060 |
umbling | +0.057 |
ipt | +0.057 |
urious | +0.055 |
cigarettes | +0.055 |
Neg logit contributions | |
Princess | -0.041 |
chef | -0.038 |
matching | -0.037 |
succeeded | -0.037 |
chess | -0.035 |
Token | over |
Token ID | 625 |
Feature activation | +2.72e+00 |
Pos logit contributions | |
ric | +0.090 |
ho | +0.082 |
urious | +0.082 |
cigarettes | +0.082 |
grand | +0.082 |
Neg logit contributions | |
Princess | -0.042 |
matching | -0.040 |
chef | -0.037 |
better | -0.037 |
, | -0.037 |
Token | . |
Token ID | 13 |
Feature activation | -6.20e-03 |
Pos logit contributions | |
ric | +0.075 |
umbling | +0.070 |
cigarettes | +0.069 |
ipt | +0.069 |
ho | +0.069 |
Neg logit contributions | |
Princess | -0.041 |
matching | -0.039 |
chef | -0.037 |
tea | -0.035 |
is | -0.035 |
Token | It |
Token ID | 632 |
Feature activation | +1.25e+00 |
Pos logit contributions | |
Princess | +0.000 |
chef | +0.000 |
matching | +0.000 |
feast | +0.000 |
is | +0.000 |
Neg logit contributions | |
ric | -0.000 |
cigarettes | -0.000 |
ipt | -0.000 |
ho | -0.000 |
umbling | -0.000 |
Token | kitchen |
Token ID | 9592 |
Feature activation | -4.61e-01 |
Pos logit contributions | |
Princess | +0.010 |
chef | +0.010 |
matching | +0.009 |
feast | +0.009 |
succeeded | +0.009 |
Neg logit contributions | |
ric | -0.014 |
cigarettes | -0.013 |
ipt | -0.013 |
ho | -0.013 |
umbling | -0.013 |
Token | and |
Token ID | 290 |
Feature activation | +1.67e+00 |
Pos logit contributions | |
Princess | +0.008 |
chef | +0.008 |
matching | +0.007 |
feast | +0.007 |
is | +0.007 |
Neg logit contributions | |
ric | -0.011 |
cigarettes | -0.011 |
ipt | -0.011 |
ho | -0.010 |
umbling | -0.010 |
Token | then |
Token ID | 788 |
Feature activation | +6.82e-01 |
Pos logit contributions | |
ric | +0.040 |
cigarettes | +0.038 |
ipt | +0.037 |
umbling | +0.037 |
ho | +0.036 |
Neg logit contributions | |
Princess | -0.031 |
chef | -0.030 |
matching | -0.028 |
succeeded | -0.027 |
feast | -0.027 |
Token | they |
Token ID | 484 |
Feature activation | -1.11e-01 |
Pos logit contributions | |
ric | +0.014 |
cigarettes | +0.014 |
ho | +0.013 |
ipt | +0.013 |
umbling | +0.013 |
Neg logit contributions | |
Princess | -0.014 |
chef | -0.014 |
is | -0.013 |
matching | -0.013 |
feast | -0.013 |
Token | t |
Token ID | 256 |
Feature activation | +2.13e+00 |
Pos logit contributions | |
Princess | +0.002 |
chef | +0.002 |
matching | +0.002 |
succeeded | +0.002 |
feast | +0.002 |
Neg logit contributions | |
ric | -0.003 |
cigarettes | -0.002 |
ipt | -0.002 |
umbling | -0.002 |
ho | -0.002 |
Token | ipt |
Token ID | 10257 |
Feature activation | +5.22e+00 |
Pos logit contributions | |
ric | +0.025 |
cigarettes | +0.021 |
Warning | +0.018 |
ho | +0.018 |
ostr | +0.017 |
Neg logit contributions | |
is | -0.062 |
chef | -0.060 |
patiently | -0.060 |
designer | -0.059 |
ingu | -0.058 |
Token | o |
Token ID | 78 |
Feature activation | +1.42e+00 |
Pos logit contributions | |
ric | +0.102 |
cigarettes | +0.096 |
gag | +0.090 |
Warning | +0.086 |
rats | +0.086 |
Neg logit contributions | |
patiently | -0.103 |
is | -0.099 |
together | -0.096 |
chef | -0.096 |
again | -0.095 |
Token | ed |
Token ID | 276 |
Feature activation | +1.53e+00 |
Pos logit contributions | |
ric | +0.033 |
cigarettes | +0.032 |
ipt | +0.031 |
ho | +0.031 |
umbling | +0.030 |
Neg logit contributions | |
Princess | -0.027 |
chef | -0.025 |
matching | -0.024 |
feast | -0.023 |
is | -0.023 |
Token | to |
Token ID | 284 |
Feature activation | +1.50e+00 |
Pos logit contributions | |
ric | +0.034 |
cigarettes | +0.033 |
ipt | +0.032 |
ho | +0.032 |
umbling | +0.032 |
Neg logit contributions | |
Princess | -0.030 |
chef | -0.028 |
matching | -0.026 |
feast | -0.026 |
is | -0.026 |
Token | the |
Token ID | 262 |
Feature activation | +1.46e-01 |
Pos logit contributions | |
ric | +0.042 |
umbling | +0.041 |
ho | +0.040 |
cigarettes | +0.040 |
ipt | +0.040 |
Neg logit contributions | |
Princess | -0.023 |
chef | -0.021 |
matching | -0.020 |
party | -0.019 |
patiently | -0.019 |
Token | tree |
Token ID | 5509 |
Feature activation | +1.61e+00 |
Pos logit contributions | |
ric | +0.003 |
cigarettes | +0.003 |
ipt | +0.003 |
ho | +0.003 |
umbling | +0.003 |
Neg logit contributions | |
Princess | -0.003 |
chef | -0.003 |
matching | -0.003 |
feast | -0.003 |
is | -0.003 |
Token | with |
Token ID | 351 |
Feature activation | -1.72e+00 |
Pos logit contributions | |
chef | +0.126 |
succeeded | +0.125 |
Princess | +0.124 |
is | +0.119 |
designer | +0.119 |
Neg logit contributions | |
cigarettes | -0.106 |
ipt | -0.105 |
ric | -0.103 |
run | -0.098 |
ho | -0.097 |
Token | me |
Token ID | 502 |
Feature activation | -2.22e+00 |
Pos logit contributions | |
Princess | +0.034 |
chef | +0.032 |
succeeded | +0.030 |
feast | +0.030 |
designer | +0.030 |
Neg logit contributions | |
ric | -0.038 |
cigarettes | -0.037 |
ipt | -0.035 |
ho | -0.035 |
umbling | -0.034 |
Token | . |
Token ID | 13 |
Feature activation | -5.60e-01 |
Pos logit contributions | |
Princess | +0.038 |
chef | +0.036 |
matching | +0.033 |
succeeded | +0.033 |
designer | +0.032 |
Neg logit contributions | |
ric | -0.055 |
cigarettes | -0.053 |
ipt | -0.051 |
ho | -0.051 |
umbling | -0.050 |
Token | Can |
Token ID | 1680 |
Feature activation | -2.78e-01 |
Pos logit contributions | |
Princess | +0.013 |
chef | +0.013 |
matching | +0.012 |
designer | +0.012 |
feast | +0.012 |
Neg logit contributions | |
ric | -0.010 |
cigarettes | -0.010 |
ipt | -0.010 |
umbling | -0.010 |
ho | -0.010 |
Token | we |
Token ID | 356 |
Feature activation | -2.68e+00 |
Pos logit contributions | |
Princess | +0.006 |
chef | +0.005 |
matching | +0.005 |
is | +0.005 |
designer | +0.005 |
Neg logit contributions | |
ric | -0.006 |
cigarettes | -0.006 |
ipt | -0.006 |
umbling | -0.006 |
ho | -0.005 |
Token | play |
Token ID | 711 |
Feature activation | -7.47e+00 |
Pos logit contributions | |
Princess | +0.067 |
chef | +0.065 |
is | +0.061 |
succeeded | +0.059 |
matching | +0.058 |
Neg logit contributions | |
ric | -0.044 |
ho | -0.040 |
oes | -0.038 |
os | -0.034 |
cigarettes | -0.034 |
Token | again |
Token ID | 757 |
Feature activation | -3.22e+00 |
Pos logit contributions | |
chef | +0.145 |
Princess | +0.144 |
Benjamin | +0.143 |
is | +0.142 |
majesty | +0.140 |
Neg logit contributions | |
ipt | -0.121 |
run | -0.116 |
os | -0.115 |
cigarettes | -0.114 |
o | -0.111 |
Token | tomorrow |
Token ID | 9439 |
Feature activation | -3.21e+00 |
Pos logit contributions | |
Princess | +0.073 |
chef | +0.071 |
designer | +0.068 |
succeeded | +0.068 |
is | +0.066 |
Neg logit contributions | |
ric | -0.051 |
ipt | -0.049 |
Un | -0.049 |
packs | -0.049 |
cigarettes | -0.048 |
Token | ?" |
Token ID | 1701 |
Feature activation | -1.30e+00 |
Pos logit contributions | |
Princess | +0.070 |
chef | +0.068 |
succeeded | +0.066 |
designer | +0.065 |
is | +0.064 |
Neg logit contributions | |
ric | -0.054 |
ipt | -0.053 |
oes | -0.053 |
umbling | -0.050 |
ho | -0.049 |
Token | The |
Token ID | 383 |
Feature activation | +2.71e-01 |
Pos logit contributions | |
Princess | +0.025 |
chef | +0.024 |
matching | +0.022 |
is | +0.022 |
feast | +0.022 |
Neg logit contributions | |
ric | -0.030 |
cigarettes | -0.028 |
ipt | -0.028 |
ho | -0.027 |
umbling | -0.027 |
Token | little |
Token ID | 1310 |
Feature activation | -1.26e-01 |
Pos logit contributions | |
ric | +0.005 |
cigarettes | +0.005 |
ho | +0.005 |
ipt | +0.005 |
umbling | +0.005 |
Neg logit contributions | |
Princess | -0.006 |
chef | -0.006 |
matching | -0.006 |
succeeded | -0.005 |
parties | -0.005 |
Token | Mom |
Token ID | 11254 |
Feature activation | -8.74e-01 |
Pos logit contributions | |
Princess | +0.030 |
chef | +0.029 |
matching | +0.027 |
succeeded | +0.027 |
feast | +0.027 |
Neg logit contributions | |
ric | -0.036 |
cigarettes | -0.035 |
ipt | -0.033 |
ho | -0.033 |
umbling | -0.032 |
Token | my |
Token ID | 1820 |
Feature activation | -1.42e+00 |
Pos logit contributions | |
Princess | +0.021 |
chef | +0.020 |
succeeded | +0.019 |
matching | +0.019 |
feast | +0.019 |
Neg logit contributions | |
ric | -0.016 |
cigarettes | -0.015 |
ipt | -0.014 |
ho | -0.014 |
umbling | -0.014 |
Token | . |
Token ID | 13 |
Feature activation | -3.78e-01 |
Pos logit contributions | |
Princess | +0.032 |
chef | +0.030 |
matching | +0.029 |
ival | +0.029 |
succeeded | +0.029 |
Neg logit contributions | |
cigarettes | -0.026 |
ric | -0.025 |
ho | -0.024 |
ipt | -0.023 |
umbling | -0.023 |
Token | Can |
Token ID | 1680 |
Feature activation | +6.94e-02 |
Pos logit contributions | |
Princess | +0.009 |
chef | +0.009 |
is | +0.009 |
succeeded | +0.009 |
matching | +0.009 |
Neg logit contributions | |
ric | -0.006 |
cigarettes | -0.006 |
ipt | -0.006 |
ho | -0.006 |
umbling | -0.005 |
Token | we |
Token ID | 356 |
Feature activation | -2.49e+00 |
Pos logit contributions | |
ric | +0.001 |
cigarettes | +0.001 |
ipt | +0.001 |
ho | +0.001 |
umbling | +0.001 |
Neg logit contributions | |
Princess | -0.002 |
chef | -0.002 |
matching | -0.001 |
feast | -0.001 |
succeeded | -0.001 |
Token | play |
Token ID | 711 |
Feature activation | -7.24e+00 |
Pos logit contributions | |
Princess | +0.058 |
chef | +0.055 |
succeeded | +0.054 |
is | +0.053 |
ival | +0.053 |
Neg logit contributions | |
ric | -0.045 |
ho | -0.040 |
cigarettes | -0.038 |
oes | -0.037 |
el | -0.037 |
Token | together |
Token ID | 1978 |
Feature activation | -4.45e+00 |
Pos logit contributions | |
Benjamin | +0.164 |
succeeded | +0.154 |
is | +0.153 |
majesty | +0.153 |
ier | +0.152 |
Neg logit contributions | |
cigarettes | -0.096 |
Why | -0.086 |
alarm | -0.083 |
os | -0.081 |
ipt | -0.081 |
Token | ?" |
Token ID | 1701 |
Feature activation | -9.15e-01 |
Pos logit contributions | |
Princess | +0.102 |
succeeded | +0.095 |
chef | +0.089 |
designer | +0.088 |
feast | +0.087 |
Neg logit contributions | |
ric | -0.074 |
oes | -0.069 |
os | -0.067 |
ipt | -0.064 |
ho | -0.061 |
Token |
|
Token ID | 198 |
Feature activation | +2.76e-01 |
Pos logit contributions | |
chef | +0.025 |
Princess | +0.025 |
feast | +0.025 |
succeeded | +0.025 |
is | +0.024 |
Neg logit contributions | |
ipt | -0.012 |
ric | -0.011 |
cigarettes | -0.011 |
ho | -0.010 |
oes | -0.010 |
Token |
|
Token ID | 198 |
Feature activation | -1.99e-01 |
Pos logit contributions | |
ric | +0.005 |
ipt | +0.005 |
cigarettes | +0.005 |
ho | +0.005 |
umbling | +0.005 |
Neg logit contributions | |
Princess | -0.006 |
chef | -0.006 |
matching | -0.006 |
feast | -0.006 |
succeeded | -0.006 |
Token | " |
Token ID | 1 |
Feature activation | +3.90e-01 |
Pos logit contributions | |
Princess | +0.005 |
chef | +0.005 |
matching | +0.005 |
is | +0.004 |
feast | +0.004 |
Neg logit contributions | |
ric | -0.003 |
cigarettes | -0.003 |
ipt | -0.003 |
ho | -0.003 |
umbling | -0.003 |
Token | . |
Token ID | 13 |
Feature activation | -3.99e-01 |
Pos logit contributions | |
Princess | +0.037 |
chef | +0.035 |
matching | +0.033 |
succeeded | +0.032 |
is | +0.032 |
Neg logit contributions | |
ric | -0.049 |
cigarettes | -0.046 |
ipt | -0.045 |
ho | -0.044 |
umbling | -0.044 |
Token | Can |
Token ID | 1680 |
Feature activation | +5.05e-02 |
Pos logit contributions | |
Princess | +0.009 |
chef | +0.009 |
matching | +0.009 |
feast | +0.008 |
designer | +0.008 |
Neg logit contributions | |
ric | -0.007 |
cigarettes | -0.007 |
ipt | -0.007 |
ho | -0.007 |
umbling | -0.007 |
Token | you |
Token ID | 345 |
Feature activation | -2.32e+00 |
Pos logit contributions | |
ric | +0.001 |
cigarettes | +0.001 |
umbling | +0.001 |
ipt | +0.001 |
ho | +0.001 |
Neg logit contributions | |
Princess | -0.001 |
chef | -0.001 |
matching | -0.001 |
is | -0.001 |
designer | -0.001 |
Token | come |
Token ID | 1282 |
Feature activation | -3.30e+00 |
Pos logit contributions | |
chef | +0.056 |
Princess | +0.054 |
ival | +0.051 |
succeeded | +0.051 |
is | +0.050 |
Neg logit contributions | |
ric | -0.035 |
trem | -0.032 |
oes | -0.031 |
venture | -0.031 |
demand | -0.031 |
Token | to |
Token ID | 284 |
Feature activation | -2.19e+00 |
Pos logit contributions | |
succeeded | +0.067 |
chef | +0.065 |
is | +0.063 |
Princess | +0.063 |
judges | +0.062 |
Neg logit contributions | |
ric | -0.070 |
oes | -0.062 |
cigarettes | -0.061 |
ipt | -0.060 |
os | -0.059 |
Token | play |
Token ID | 711 |
Feature activation | -7.03e+00 |
Pos logit contributions | |
chef | +0.045 |
Princess | +0.045 |
succeeded | +0.044 |
is | +0.043 |
designer | +0.042 |
Neg logit contributions | |
ric | -0.043 |
cigarettes | -0.040 |
ipt | -0.037 |
oes | -0.037 |
ho | -0.036 |
Token | again |
Token ID | 757 |
Feature activation | -2.42e+00 |
Pos logit contributions | |
succeeded | +0.175 |
majesty | +0.167 |
Majesty | +0.165 |
is | +0.155 |
ier | +0.152 |
Neg logit contributions | |
cigarettes | -0.086 |
alarm | -0.083 |
run | -0.080 |
Why | -0.080 |
os | -0.076 |
Token | tomorrow |
Token ID | 9439 |
Feature activation | -2.30e+00 |
Pos logit contributions | |
Princess | +0.052 |
succeeded | +0.051 |
chef | +0.051 |
is | +0.049 |
designer | +0.049 |
Neg logit contributions | |
ric | -0.040 |
cigarettes | -0.038 |
ipt | -0.038 |
Un | -0.038 |
oes | -0.037 |
Token | ?" |
Token ID | 1701 |
Feature activation | -1.05e+00 |
Pos logit contributions | |
Princess | +0.048 |
chef | +0.045 |
succeeded | +0.045 |
matching | +0.044 |
is | +0.044 |
Neg logit contributions | |
ric | -0.042 |
umbling | -0.040 |
oes | -0.040 |
ipt | -0.040 |
cigarettes | -0.038 |
Token |
|
Token ID | 198 |
Feature activation | +5.55e-02 |
Pos logit contributions | |
Princess | +0.024 |
chef | +0.023 |
matching | +0.022 |
is | +0.021 |
feast | +0.021 |
Neg logit contributions | |
ric | -0.020 |
cigarettes | -0.019 |
ipt | -0.019 |
ho | -0.018 |
umbling | -0.018 |
Token |
|
Token ID | 198 |
Feature activation | -3.88e-01 |
Pos logit contributions | |
ric | +0.001 |
ipt | +0.001 |
cigarettes | +0.001 |
ho | +0.001 |
umbling | +0.001 |
Neg logit contributions | |
Princess | -0.001 |
chef | -0.001 |
feast | -0.001 |
party | -0.001 |
matching | -0.001 |
Token | , |
Token ID | 11 |
Feature activation | -1.58e+00 |
Pos logit contributions | |
Princess | +0.030 |
chef | +0.029 |
matching | +0.027 |
feast | +0.026 |
succeeded | +0.026 |
Neg logit contributions | |
ric | -0.037 |
cigarettes | -0.035 |
ipt | -0.034 |
ho | -0.034 |
umbling | -0.033 |
Token | too |
Token ID | 1165 |
Feature activation | -8.54e-01 |
Pos logit contributions | |
Princess | +0.029 |
chef | +0.027 |
matching | +0.024 |
party | +0.024 |
feast | +0.023 |
Neg logit contributions | |
ric | -0.040 |
ipt | -0.038 |
cigarettes | -0.038 |
umbling | -0.038 |
ho | -0.036 |
Token | . |
Token ID | 13 |
Feature activation | -5.71e-01 |
Pos logit contributions | |
Princess | +0.016 |
chef | +0.015 |
matching | +0.014 |
feast | +0.014 |
is | +0.014 |
Neg logit contributions | |
ric | -0.020 |
cigarettes | -0.019 |
ipt | -0.019 |
ho | -0.018 |
umbling | -0.018 |
Token | Can |
Token ID | 1680 |
Feature activation | -2.73e-01 |
Pos logit contributions | |
Princess | +0.014 |
chef | +0.013 |
matching | +0.013 |
feast | +0.013 |
is | +0.013 |
Neg logit contributions | |
ric | -0.010 |
cigarettes | -0.009 |
ipt | -0.009 |
ho | -0.009 |
umbling | -0.009 |
Token | we |
Token ID | 356 |
Feature activation | -2.52e+00 |
Pos logit contributions | |
Princess | +0.006 |
chef | +0.006 |
matching | +0.006 |
feast | +0.005 |
is | +0.005 |
Neg logit contributions | |
ric | -0.005 |
cigarettes | -0.005 |
ipt | -0.005 |
umbling | -0.005 |
ho | -0.005 |
Token | play |
Token ID | 711 |
Feature activation | -7.01e+00 |
Pos logit contributions | |
Princess | +0.057 |
chef | +0.056 |
succeeded | +0.052 |
is | +0.052 |
matching | +0.051 |
Neg logit contributions | |
ric | -0.049 |
ho | -0.044 |
oes | -0.042 |
cigarettes | -0.041 |
ipt | -0.039 |
Token | balance |
Token ID | 5236 |
Feature activation | -3.85e+00 |
Pos logit contributions | |
succeeded | +0.148 |
is | +0.142 |
chef | +0.141 |
Benjamin | +0.141 |
Princess | +0.140 |
Neg logit contributions | |
cigarettes | -0.103 |
ipt | -0.100 |
os | -0.097 |
ric | -0.093 |
alarm | -0.093 |
Token | together |
Token ID | 1978 |
Feature activation | -3.23e+00 |
Pos logit contributions | |
Princess | +0.079 |
succeeded | +0.077 |
chef | +0.077 |
feast | +0.074 |
designer | +0.071 |
Neg logit contributions | |
cigarettes | -0.077 |
ric | -0.075 |
ipt | -0.069 |
el | -0.066 |
os | -0.066 |
Token | ?" |
Token ID | 1701 |
Feature activation | +1.50e-03 |
Pos logit contributions | |
Princess | +0.062 |
chef | +0.058 |
succeeded | +0.056 |
feast | +0.054 |
designer | +0.052 |
Neg logit contributions | |
ric | -0.074 |
ipt | -0.068 |
cigarettes | -0.068 |
oes | -0.067 |
ho | -0.067 |
Token |
|
Token ID | 198 |
Feature activation | +1.36e+00 |
Pos logit contributions | |
ric | +0.000 |
cigarettes | +0.000 |
umbling | +0.000 |
ipt | +0.000 |
ho | +0.000 |
Neg logit contributions | |
Princess | -0.000 |
chef | -0.000 |
matching | -0.000 |
patiently | -0.000 |
parties | -0.000 |
Token |
|
Token ID | 198 |
Feature activation | +1.10e+00 |
Pos logit contributions | |
ric | +0.035 |
cigarettes | +0.034 |
umbling | +0.033 |
ho | +0.032 |
spe | +0.032 |
Neg logit contributions | |
Princess | -0.023 |
chef | -0.020 |
matching | -0.020 |
is | -0.020 |
patiently | -0.019 |
Token | Me |
Token ID | 5308 |
Feature activation | +4.11e-01 |
Pos logit contributions | |
cigarettes | +0.008 |
ric | +0.008 |
ipt | +0.007 |
ho | +0.007 |
packs | +0.007 |
Neg logit contributions | |
Princess | -0.007 |
chef | -0.006 |
is | -0.006 |
matching | -0.006 |
succeeded | -0.006 |
Token | too |
Token ID | 1165 |
Feature activation | -1.02e+00 |
Pos logit contributions | |
ric | +0.008 |
cigarettes | +0.007 |
ho | +0.007 |
ipt | +0.007 |
umbling | +0.007 |
Neg logit contributions | |
Princess | -0.010 |
chef | -0.009 |
is | -0.009 |
designer | -0.009 |
matching | -0.009 |
Token | ! |
Token ID | 0 |
Feature activation | -1.00e-01 |
Pos logit contributions | |
Princess | +0.018 |
chef | +0.018 |
designer | +0.016 |
succeeded | +0.016 |
is | +0.016 |
Neg logit contributions | |
ric | -0.023 |
cigarettes | -0.022 |
ho | -0.022 |
umbling | -0.022 |
spe | -0.022 |
Token | Want |
Token ID | 16168 |
Feature activation | -1.01e+00 |
Pos logit contributions | |
Princess | +0.002 |
chef | +0.002 |
matching | +0.002 |
feast | +0.002 |
succeeded | +0.002 |
Neg logit contributions | |
ric | -0.002 |
cigarettes | -0.002 |
ipt | -0.002 |
ho | -0.002 |
umbling | -0.002 |
Token | to |
Token ID | 284 |
Feature activation | -2.38e+00 |
Pos logit contributions | |
Princess | +0.020 |
chef | +0.019 |
matching | +0.018 |
feast | +0.018 |
succeeded | +0.018 |
Neg logit contributions | |
ric | -0.022 |
cigarettes | -0.021 |
ipt | -0.021 |
ho | -0.020 |
umbling | -0.020 |
Token | play |
Token ID | 711 |
Feature activation | -6.98e+00 |
Pos logit contributions | |
Princess | +0.054 |
chef | +0.053 |
is | +0.049 |
succeeded | +0.049 |
patiently | +0.048 |
Neg logit contributions | |
ric | -0.044 |
cigarettes | -0.040 |
ipt | -0.039 |
oes | -0.039 |
ho | -0.039 |
Token | hide |
Token ID | 7808 |
Feature activation | -3.33e+00 |
Pos logit contributions | |
succeeded | +0.176 |
majesty | +0.157 |
ier | +0.155 |
is | +0.151 |
Majesty | +0.151 |
Neg logit contributions | |
alarm | -0.086 |
Why | -0.079 |
cigarettes | -0.079 |
ipt | -0.077 |
run | -0.077 |
Token | and |
Token ID | 290 |
Feature activation | +3.86e-01 |
Pos logit contributions | |
succeeded | +0.070 |
Princess | +0.068 |
feast | +0.065 |
chef | +0.063 |
matching | +0.061 |
Neg logit contributions | |
cigarettes | -0.064 |
ipt | -0.062 |
ric | -0.059 |
ho | -0.058 |
umbling | -0.057 |
Token | seek |
Token ID | 5380 |
Feature activation | -4.37e+00 |
Pos logit contributions | |
ric | +0.010 |
cigarettes | +0.010 |
ipt | +0.009 |
ho | +0.009 |
umbling | +0.009 |
Neg logit contributions | |
Princess | -0.006 |
chef | -0.006 |
matching | -0.006 |
feast | -0.005 |
succeeded | -0.005 |
Token | ?" |
Token ID | 1701 |
Feature activation | -5.66e-01 |
Pos logit contributions | |
succeeded | +0.096 |
Princess | +0.095 |
feast | +0.091 |
chef | +0.091 |
is | +0.084 |
Neg logit contributions | |
ric | -0.073 |
ipt | -0.072 |
cigarettes | -0.070 |
run | -0.069 |
umbling | -0.066 |
Token | |
Token ID | 220 |
Feature activation | +1.50e+00 |
Pos logit contributions | |
chef | +0.012 |
Princess | +0.012 |
designer | +0.011 |
is | +0.011 |
matching | +0.011 |
Neg logit contributions | |
ric | -0.012 |
ipt | -0.011 |
cigarettes | -0.011 |
oes | -0.010 |
os | -0.010 |
Token | Can |
Token ID | 6090 |
Feature activation | +1.19e+00 |
Pos logit contributions | |
ric | +0.027 |
cigarettes | +0.024 |
angs | +0.022 |
packs | +0.022 |
os | +0.022 |
Neg logit contributions | |
Princess | -0.025 |
is | -0.024 |
chef | -0.023 |
designer | -0.022 |
judges | -0.021 |
Token | we |
Token ID | 356 |
Feature activation | -1.39e+00 |
Pos logit contributions | |
ric | +0.029 |
umbling | +0.028 |
ipt | +0.027 |
cigarettes | +0.027 |
ho | +0.026 |
Neg logit contributions | |
Princess | -0.023 |
chef | -0.020 |
matching | -0.020 |
party | -0.019 |
designer | -0.019 |
Token | be |
Token ID | 307 |
Feature activation | -2.67e+00 |
Pos logit contributions | |
Princess | +0.034 |
chef | +0.033 |
is | +0.031 |
succeeded | +0.030 |
matching | +0.030 |
Neg logit contributions | |
ric | -0.023 |
ho | -0.022 |
cigarettes | -0.021 |
oes | -0.021 |
ipt | -0.020 |
Token | friends |
Token ID | 2460 |
Feature activation | -5.12e+00 |
Pos logit contributions | |
is | +0.060 |
ival | +0.058 |
feast | +0.057 |
Princess | +0.055 |
chef | +0.052 |
Neg logit contributions | |
packs | -0.047 |
beh | -0.045 |
cigarettes | -0.042 |
o | -0.041 |
Kim | -0.040 |
Token | and |
Token ID | 290 |
Feature activation | -3.66e-01 |
Pos logit contributions | |
Majesty | +0.117 |
succeeded | +0.116 |
Princess | +0.115 |
majesty | +0.114 |
is | +0.111 |
Neg logit contributions | |
cigarettes | -0.068 |
os | -0.066 |
Why | -0.066 |
alarm | -0.064 |
oes | -0.064 |
Token | play |
Token ID | 711 |
Feature activation | -6.97e+00 |
Pos logit contributions | |
Princess | +0.007 |
chef | +0.006 |
matching | +0.006 |
feast | +0.006 |
succeeded | +0.006 |
Neg logit contributions | |
ric | -0.009 |
cigarettes | -0.008 |
ipt | -0.008 |
umbling | -0.008 |
ho | -0.008 |
Token | together |
Token ID | 1978 |
Feature activation | -3.81e+00 |
Pos logit contributions | |
majesty | +0.143 |
is | +0.140 |
succeeded | +0.136 |
Majesty | +0.136 |
oured | +0.129 |
Neg logit contributions | |
Why | -0.101 |
cigarettes | -0.093 |
ipt | -0.093 |
os | -0.089 |
Un | -0.089 |
Token | again |
Token ID | 757 |
Feature activation | -2.51e+00 |
Pos logit contributions | |
Princess | +0.082 |
succeeded | +0.076 |
chef | +0.074 |
is | +0.073 |
feast | +0.072 |
Neg logit contributions | |
ric | -0.070 |
oes | -0.064 |
os | -0.062 |
ipt | -0.061 |
ho | -0.059 |
Token | next |
Token ID | 1306 |
Feature activation | -1.02e+00 |
Pos logit contributions | |
Princess | +0.056 |
chef | +0.053 |
is | +0.053 |
succeeded | +0.052 |
designer | +0.052 |
Neg logit contributions | |
Un | -0.038 |
ric | -0.038 |
ipt | -0.038 |
urious | -0.038 |
packs | -0.037 |
Token | year |
Token ID | 614 |
Feature activation | -2.73e-01 |
Pos logit contributions | |
Princess | +0.020 |
chef | +0.019 |
matching | +0.017 |
succeeded | +0.017 |
designer | +0.017 |
Neg logit contributions | |
ric | -0.022 |
cigarettes | -0.021 |
ipt | -0.021 |
umbling | -0.021 |
ho | -0.020 |
Token | ?" |
Token ID | 1701 |
Feature activation | -8.66e-01 |
Pos logit contributions | |
Princess | +0.005 |
chef | +0.005 |
succeeded | +0.004 |
matching | +0.004 |
feast | +0.004 |
Neg logit contributions | |
ric | -0.006 |
cigarettes | -0.006 |
ipt | -0.006 |
ho | -0.006 |
umbling | -0.006 |
Token | " |
Token ID | 366 |
Feature activation | +5.05e-01 |
Pos logit contributions | |
Princess | +0.000 |
chef | +0.000 |
succeeded | +0.000 |
feast | +0.000 |
parties | +0.000 |
Neg logit contributions | |
ric | -0.000 |
ho | -0.000 |
ipt | -0.000 |
cigarettes | -0.000 |
umbling | -0.000 |
Token | Do |
Token ID | 5211 |
Feature activation | -7.33e-01 |
Pos logit contributions | |
ric | +0.010 |
cigarettes | +0.010 |
umbling | +0.009 |
ipt | +0.009 |
ho | +0.008 |
Neg logit contributions | |
Princess | -0.012 |
chef | -0.011 |
matching | -0.011 |
is | -0.011 |
designer | -0.010 |
Token | you |
Token ID | 345 |
Feature activation | -7.99e-01 |
Pos logit contributions | |
Princess | +0.014 |
chef | +0.012 |
matching | +0.012 |
is | +0.012 |
party | +0.012 |
Neg logit contributions | |
ric | -0.017 |
cigarettes | -0.017 |
umbling | -0.017 |
ipt | -0.017 |
ho | -0.016 |
Token | want |
Token ID | 765 |
Feature activation | -2.93e+00 |
Pos logit contributions | |
chef | +0.024 |
Princess | +0.024 |
is | +0.023 |
matching | +0.023 |
designer | +0.022 |
Neg logit contributions | |
ric | -0.009 |
cigarettes | -0.007 |
oes | -0.007 |
ho | -0.007 |
os | -0.006 |
Token | to |
Token ID | 284 |
Feature activation | -2.04e+00 |
Pos logit contributions | |
Princess | +0.066 |
chef | +0.065 |
succeeded | +0.063 |
designer | +0.061 |
patiently | +0.060 |
Neg logit contributions | |
cigarettes | -0.053 |
ric | -0.051 |
ho | -0.049 |
ipt | -0.049 |
oes | -0.048 |
Token | play |
Token ID | 711 |
Feature activation | -6.95e+00 |
Pos logit contributions | |
Princess | +0.044 |
chef | +0.044 |
succeeded | +0.041 |
patiently | +0.040 |
is | +0.040 |
Neg logit contributions | |
ric | -0.041 |
oes | -0.036 |
ho | -0.035 |
cigarettes | -0.035 |
ipt | -0.034 |
Token | ?" |
Token ID | 1701 |
Feature activation | -2.00e-01 |
Pos logit contributions | |
succeeded | +0.171 |
Majesty | +0.148 |
is | +0.148 |
chef | +0.147 |
feast | +0.146 |
Neg logit contributions | |
cigarettes | -0.094 |
ipt | -0.088 |
run | -0.087 |
os | -0.087 |
ric | -0.085 |
Token | Lily |
Token ID | 20037 |
Feature activation | -8.62e-01 |
Pos logit contributions | |
Princess | +0.004 |
chef | +0.004 |
matching | +0.004 |
feast | +0.004 |
is | +0.004 |
Neg logit contributions | |
ric | -0.004 |
cigarettes | -0.004 |
ipt | -0.004 |
ho | -0.004 |
umbling | -0.004 |
Token | said |
Token ID | 531 |
Feature activation | -1.27e+00 |
Pos logit contributions | |
chef | +0.017 |
feast | +0.017 |
Princess | +0.017 |
matching | +0.016 |
designer | +0.016 |
Neg logit contributions | |
ipt | -0.016 |
ho | -0.016 |
ric | -0.016 |
os | -0.016 |
urious | -0.015 |
Token | , |
Token ID | 11 |
Feature activation | -2.46e-01 |
Pos logit contributions | |
succeeded | +0.025 |
feast | +0.024 |
auldron | +0.024 |
parties | +0.023 |
chef | +0.022 |
Neg logit contributions | |
ho | -0.029 |
ful | -0.024 |
run | -0.023 |
os | -0.023 |
ow | -0.023 |
Token | " |
Token ID | 366 |
Feature activation | +3.10e-01 |
Pos logit contributions | |
succeeded | +0.005 |
feast | +0.005 |
Princess | +0.005 |
parties | +0.005 |
chef | +0.005 |
Neg logit contributions | |
ho | -0.004 |
ric | -0.004 |
ipt | -0.004 |
Un | -0.004 |
cigarettes | -0.004 |
Token | new |
Token ID | 649 |
Feature activation | -2.13e+00 |
Pos logit contributions | |
succeeded | +0.030 |
Princess | +0.029 |
feast | +0.029 |
chef | +0.028 |
is | +0.027 |
Neg logit contributions | |
cigarettes | -0.049 |
ric | -0.047 |
ho | -0.047 |
spe | -0.045 |
ipt | -0.044 |
Token | friend |
Token ID | 1545 |
Feature activation | -3.00e+00 |
Pos logit contributions | |
Princess | +0.041 |
succeeded | +0.039 |
feast | +0.038 |
is | +0.037 |
chef | +0.037 |
Neg logit contributions | |
ipt | -0.042 |
cigarettes | -0.040 |
ric | -0.040 |
packs | -0.039 |
spe | -0.039 |
Token | . |
Token ID | 13 |
Feature activation | -4.03e-01 |
Pos logit contributions | |
Princess | +0.063 |
chef | +0.063 |
matching | +0.060 |
succeeded | +0.060 |
is | +0.059 |
Neg logit contributions | |
cigarettes | -0.057 |
ric | -0.055 |
ho | -0.052 |
ipt | -0.052 |
os | -0.052 |
Token | Can |
Token ID | 1680 |
Feature activation | -1.26e-01 |
Pos logit contributions | |
chef | +0.010 |
Princess | +0.010 |
is | +0.010 |
succeeded | +0.010 |
feast | +0.010 |
Neg logit contributions | |
ric | -0.006 |
cigarettes | -0.006 |
ipt | -0.006 |
ho | -0.006 |
oes | -0.006 |
Token | we |
Token ID | 356 |
Feature activation | -2.55e+00 |
Pos logit contributions | |
Princess | +0.003 |
chef | +0.003 |
matching | +0.002 |
is | +0.002 |
designer | +0.002 |
Neg logit contributions | |
ric | -0.003 |
cigarettes | -0.002 |
ipt | -0.002 |
umbling | -0.002 |
ho | -0.002 |
Token | play |
Token ID | 711 |
Feature activation | -6.94e+00 |
Pos logit contributions | |
Princess | +0.058 |
chef | +0.056 |
is | +0.053 |
succeeded | +0.053 |
matching | +0.051 |
Neg logit contributions | |
ric | -0.047 |
ho | -0.043 |
oes | -0.042 |
cigarettes | -0.039 |
ipt | -0.038 |
Token | guitar |
Token ID | 10047 |
Feature activation | -4.74e+00 |
Pos logit contributions | |
succeeded | +0.135 |
majesty | +0.134 |
is | +0.131 |
Majesty | +0.130 |
feast | +0.130 |
Neg logit contributions | |
cigarettes | -0.102 |
ipt | -0.101 |
Why | -0.097 |
os | -0.096 |
alarm | -0.096 |
Token | and |
Token ID | 290 |
Feature activation | -1.18e+00 |
Pos logit contributions | |
Princess | +0.084 |
succeeded | +0.084 |
is | +0.080 |
chef | +0.079 |
Benjamin | +0.075 |
Neg logit contributions | |
ric | -0.094 |
cigarettes | -0.091 |
os | -0.091 |
oes | -0.089 |
ho | -0.089 |
Token | read |
Token ID | 1100 |
Feature activation | -4.65e+00 |
Pos logit contributions | |
Princess | +0.022 |
chef | +0.021 |
is | +0.019 |
feast | +0.019 |
succeeded | +0.019 |
Neg logit contributions | |
ric | -0.027 |
cigarettes | -0.026 |
ho | -0.024 |
ipt | -0.024 |
umbling | -0.024 |
Token | books |
Token ID | 3835 |
Feature activation | -3.30e+00 |
Pos logit contributions | |
chef | +0.086 |
succeeded | +0.086 |
Princess | +0.085 |
feast | +0.082 |
designer | +0.079 |
Neg logit contributions | |
cigarettes | -0.100 |
ric | -0.096 |
ipt | -0.090 |
os | -0.089 |
ho | -0.088 |
Token | together |
Token ID | 1978 |
Feature activation | -3.43e+00 |
Pos logit contributions | |
Princess | +0.063 |
chef | +0.060 |
succeeded | +0.058 |
feast | +0.057 |
matching | +0.057 |
Neg logit contributions | |
ric | -0.070 |
cigarettes | -0.067 |
oes | -0.067 |
ho | -0.066 |
ipt | -0.066 |
Token | Can |
Token ID | 6090 |
Feature activation | +1.73e-01 |
Pos logit contributions | |
ric | +0.002 |
cigarettes | +0.002 |
ho | +0.002 |
umbling | +0.001 |
ipt | +0.001 |
Neg logit contributions | |
Princess | -0.002 |
chef | -0.002 |
is | -0.002 |
matching | -0.002 |
designer | -0.002 |
Token | I |
Token ID | 314 |
Feature activation | -2.13e+00 |
Pos logit contributions | |
ric | +0.004 |
umbling | +0.004 |
ipt | +0.004 |
cigarettes | +0.004 |
ho | +0.003 |
Neg logit contributions | |
Princess | -0.004 |
chef | -0.003 |
matching | -0.003 |
party | -0.003 |
designer | -0.003 |
Token | go |
Token ID | 467 |
Feature activation | -3.34e+00 |
Pos logit contributions | |
Princess | +0.046 |
chef | +0.044 |
is | +0.041 |
succeeded | +0.041 |
matching | +0.040 |
Neg logit contributions | |
ric | -0.043 |
cigarettes | -0.041 |
ho | -0.038 |
ipt | -0.038 |
umbling | -0.037 |
Token | out |
Token ID | 503 |
Feature activation | -1.54e+00 |
Pos logit contributions | |
Princess | +0.073 |
succeeded | +0.071 |
chef | +0.068 |
judges | +0.066 |
Marie | +0.063 |
Neg logit contributions | |
ric | -0.067 |
oes | -0.060 |
cigarettes | -0.060 |
run | -0.058 |
os | -0.056 |
Token | and |
Token ID | 290 |
Feature activation | -4.79e-01 |
Pos logit contributions | |
Princess | +0.031 |
chef | +0.028 |
succeeded | +0.027 |
is | +0.026 |
designer | +0.026 |
Neg logit contributions | |
ric | -0.032 |
cigarettes | -0.032 |
umbling | -0.030 |
ipt | -0.030 |
os | -0.030 |
Token | play |
Token ID | 711 |
Feature activation | -6.94e+00 |
Pos logit contributions | |
Princess | +0.009 |
chef | +0.008 |
matching | +0.008 |
feast | +0.008 |
succeeded | +0.008 |
Neg logit contributions | |
ric | -0.011 |
cigarettes | -0.011 |
ipt | -0.011 |
umbling | -0.010 |
ho | -0.010 |
Token | with |
Token ID | 351 |
Feature activation | -2.03e+00 |
Pos logit contributions | |
succeeded | +0.160 |
is | +0.148 |
majesty | +0.145 |
Princess | +0.143 |
ingu | +0.140 |
Neg logit contributions | |
cigarettes | -0.094 |
alarm | -0.093 |
ipt | -0.085 |
Why | -0.084 |
run | -0.081 |
Token | the |
Token ID | 262 |
Feature activation | -6.07e-01 |
Pos logit contributions | |
Princess | +0.041 |
chef | +0.039 |
succeeded | +0.036 |
matching | +0.036 |
feast | +0.036 |
Neg logit contributions | |
ric | -0.044 |
cigarettes | -0.043 |
ipt | -0.042 |
ho | -0.040 |
umbling | -0.040 |
Token | gray |
Token ID | 12768 |
Feature activation | +3.10e-01 |
Pos logit contributions | |
Princess | +0.012 |
chef | +0.011 |
succeeded | +0.010 |
feast | +0.010 |
matching | +0.010 |
Neg logit contributions | |
ric | -0.014 |
cigarettes | -0.013 |
ipt | -0.013 |
umbling | -0.012 |
ho | -0.012 |
Token | cloud |
Token ID | 6279 |
Feature activation | -7.99e-01 |
Pos logit contributions | |
ric | +0.007 |
cigarettes | +0.007 |
ipt | +0.007 |
ho | +0.007 |
umbling | +0.007 |
Neg logit contributions | |
Princess | -0.006 |
chef | -0.005 |
matching | -0.005 |
feast | -0.005 |
is | -0.005 |
Token | now |
Token ID | 783 |
Feature activation | -2.62e+00 |
Pos logit contributions | |
Princess | +0.013 |
chef | +0.012 |
matching | +0.011 |
feast | +0.011 |
is | +0.011 |
Neg logit contributions | |
ric | -0.021 |
cigarettes | -0.020 |
ipt | -0.020 |
ho | -0.019 |
umbling | -0.019 |
Token | Can |
Token ID | 6090 |
Feature activation | -4.50e-02 |
Pos logit contributions | |
ric | +0.006 |
ipt | +0.006 |
cigarettes | +0.005 |
ho | +0.005 |
umbling | +0.005 |
Neg logit contributions | |
Princess | -0.011 |
chef | -0.010 |
feast | -0.010 |
matching | -0.010 |
patiently | -0.010 |
Token | we |
Token ID | 356 |
Feature activation | -1.67e+00 |
Pos logit contributions | |
Princess | +0.001 |
chef | +0.001 |
matching | +0.001 |
feast | +0.001 |
is | +0.001 |
Neg logit contributions | |
ric | -0.001 |
cigarettes | -0.001 |
ipt | -0.001 |
umbling | -0.001 |
ho | -0.001 |
Token | go |
Token ID | 467 |
Feature activation | -3.24e+00 |
Pos logit contributions | |
Princess | +0.038 |
chef | +0.036 |
is | +0.034 |
succeeded | +0.034 |
designer | +0.034 |
Neg logit contributions | |
ric | -0.031 |
cigarettes | -0.029 |
ho | -0.029 |
oes | -0.027 |
ipt | -0.026 |
Token | out |
Token ID | 503 |
Feature activation | -1.09e+00 |
Pos logit contributions | |
Princess | +0.070 |
succeeded | +0.067 |
chef | +0.065 |
Benjamin | +0.063 |
judges | +0.062 |
Neg logit contributions | |
ric | -0.061 |
oes | -0.061 |
cigarettes | -0.059 |
os | -0.057 |
ho | -0.056 |
Token | and |
Token ID | 290 |
Feature activation | -5.10e-01 |
Pos logit contributions | |
Princess | +0.021 |
chef | +0.019 |
succeeded | +0.019 |
matching | +0.018 |
designer | +0.018 |
Neg logit contributions | |
cigarettes | -0.023 |
ric | -0.023 |
ipt | -0.022 |
os | -0.022 |
umbling | -0.022 |
Token | play |
Token ID | 711 |
Feature activation | -6.93e+00 |
Pos logit contributions | |
Princess | +0.010 |
chef | +0.009 |
matching | +0.008 |
feast | +0.008 |
party | +0.008 |
Neg logit contributions | |
ric | -0.012 |
cigarettes | -0.011 |
ipt | -0.011 |
umbling | -0.011 |
ho | -0.011 |
Token | again |
Token ID | 757 |
Feature activation | -2.51e+00 |
Pos logit contributions | |
succeeded | +0.160 |
is | +0.151 |
Princess | +0.147 |
Majesty | +0.146 |
majesty | +0.144 |
Neg logit contributions | |
cigarettes | -0.102 |
alarm | -0.091 |
el | -0.080 |
Why | -0.080 |
ow | -0.076 |
Token | , |
Token ID | 11 |
Feature activation | -2.26e+00 |
Pos logit contributions | |
Princess | +0.054 |
chef | +0.052 |
succeeded | +0.051 |
designer | +0.051 |
ival | +0.050 |
Neg logit contributions | |
cigarettes | -0.045 |
ric | -0.043 |
packs | -0.042 |
ipt | -0.042 |
Un | -0.041 |
Token | Mom |
Token ID | 11254 |
Feature activation | -9.48e-01 |
Pos logit contributions | |
Princess | +0.054 |
succeeded | +0.053 |
chef | +0.052 |
is | +0.051 |
designer | +0.050 |
Neg logit contributions | |
ric | -0.037 |
cigarettes | -0.036 |
ipt | -0.033 |
ho | -0.033 |
oes | -0.031 |
Token | ?" |
Token ID | 1701 |
Feature activation | -6.88e-01 |
Pos logit contributions | |
Princess | +0.018 |
chef | +0.016 |
succeeded | +0.016 |
matching | +0.016 |
feast | +0.015 |
Neg logit contributions | |
ric | -0.022 |
cigarettes | -0.021 |
oes | -0.020 |
ipt | -0.020 |
umbling | -0.020 |
Token | Lily |
Token ID | 20037 |
Feature activation | +9.52e-01 |
Pos logit contributions | |
Princess | +0.011 |
chef | +0.011 |
matching | +0.010 |
feast | +0.010 |
succeeded | +0.010 |
Neg logit contributions | |
ric | -0.017 |
cigarettes | -0.017 |
ipt | -0.016 |
ho | -0.016 |
umbling | -0.016 |
Token | " |
Token ID | 366 |
Feature activation | +1.96e-03 |
Pos logit contributions | |
Princess | +0.004 |
chef | +0.003 |
matching | +0.003 |
feast | +0.003 |
succeeded | +0.003 |
Neg logit contributions | |
ric | -0.004 |
cigarettes | -0.004 |
ipt | -0.004 |
ho | -0.004 |
umbling | -0.004 |
Token | Can |
Token ID | 6090 |
Feature activation | +1.79e-01 |
Pos logit contributions | |
ric | +0.000 |
cigarettes | +0.000 |
ho | +0.000 |
umbling | +0.000 |
ipt | +0.000 |
Neg logit contributions | |
Princess | -0.000 |
chef | -0.000 |
is | -0.000 |
matching | -0.000 |
designer | -0.000 |
Token | I |
Token ID | 314 |
Feature activation | -2.02e+00 |
Pos logit contributions | |
ric | +0.003 |
cigarettes | +0.003 |
ipt | +0.003 |
ho | +0.003 |
umbling | +0.003 |
Neg logit contributions | |
Princess | -0.004 |
chef | -0.004 |
matching | -0.004 |
feast | -0.004 |
is | -0.004 |
Token | go |
Token ID | 467 |
Feature activation | -3.06e+00 |
Pos logit contributions | |
Princess | +0.042 |
chef | +0.041 |
matching | +0.038 |
succeeded | +0.037 |
is | +0.037 |
Neg logit contributions | |
ric | -0.042 |
cigarettes | -0.039 |
ho | -0.038 |
ipt | -0.037 |
oes | -0.037 |
Token | and |
Token ID | 290 |
Feature activation | -4.18e-01 |
Pos logit contributions | |
Princess | +0.064 |
succeeded | +0.063 |
chef | +0.061 |
judges | +0.056 |
Benjamin | +0.055 |
Neg logit contributions | |
ric | -0.061 |
cigarettes | -0.055 |
oes | -0.054 |
run | -0.052 |
os | -0.051 |
Token | play |
Token ID | 711 |
Feature activation | -6.92e+00 |
Pos logit contributions | |
Princess | +0.008 |
chef | +0.007 |
matching | +0.007 |
feast | +0.007 |
succeeded | +0.007 |
Neg logit contributions | |
ric | -0.010 |
cigarettes | -0.009 |
ipt | -0.009 |
umbling | -0.009 |
ho | -0.009 |
Token | now |
Token ID | 783 |
Feature activation | -3.25e+00 |
Pos logit contributions | |
succeeded | +0.155 |
is | +0.138 |
feast | +0.133 |
chef | +0.133 |
majesty | +0.132 |
Neg logit contributions | |
cigarettes | -0.105 |
alarm | -0.095 |
run | -0.094 |
ipt | -0.091 |
el | -0.089 |
Token | ?" |
Token ID | 1701 |
Feature activation | -1.16e+00 |
Pos logit contributions | |
succeeded | +0.072 |
Princess | +0.067 |
is | +0.067 |
feast | +0.066 |
chef | +0.066 |
Neg logit contributions | |
ric | -0.048 |
alarm | -0.045 |
el | -0.045 |
ho | -0.044 |
umbling | -0.044 |
Token | |
Token ID | 220 |
Feature activation | +1.22e+00 |
Pos logit contributions | |
chef | +0.027 |
is | +0.027 |
Princess | +0.026 |
matching | +0.026 |
succeeded | +0.026 |
Neg logit contributions | |
ipt | -0.019 |
ric | -0.019 |
cigarettes | -0.018 |
ho | -0.017 |
urious | -0.017 |
Token |
|
Token ID | 198 |
Feature activation | -1.34e-01 |
Pos logit contributions | |
ric | +0.022 |
ipt | +0.021 |
cigarettes | +0.021 |
umbling | +0.020 |
ho | +0.019 |
Neg logit contributions | |
Princess | -0.028 |
chef | -0.028 |
feast | -0.027 |
matching | -0.026 |
is | -0.026 |
Token |
|
Token ID | 198 |
Feature activation | -3.86e-01 |
Pos logit contributions | |
chef | +0.004 |
Princess | +0.004 |
feast | +0.004 |
designer | +0.003 |
superhero | +0.003 |
Neg logit contributions | |
ipt | -0.002 |
ric | -0.002 |
Un | -0.001 |
cigarettes | -0.001 |
umbling | -0.001 |
Token | asked |
Token ID | 1965 |
Feature activation | -5.95e-01 |
Pos logit contributions | |
Princess | +0.001 |
chef | +0.001 |
matching | +0.001 |
succeeded | +0.001 |
feast | +0.001 |
Neg logit contributions | |
ric | -0.001 |
cigarettes | -0.001 |
ipt | -0.001 |
umbling | -0.001 |
ho | -0.001 |
Token | , |
Token ID | 11 |
Feature activation | -4.70e-01 |
Pos logit contributions | |
Princess | +0.013 |
chef | +0.013 |
succeeded | +0.013 |
is | +0.013 |
matching | +0.012 |
Neg logit contributions | |
cigarettes | -0.011 |
ric | -0.010 |
ho | -0.010 |
ipt | -0.010 |
os | -0.010 |
Token | " |
Token ID | 366 |
Feature activation | +5.97e-02 |
Pos logit contributions | |
Princess | +0.010 |
chef | +0.010 |
feast | +0.010 |
succeeded | +0.010 |
parties | +0.010 |
Neg logit contributions | |
ric | -0.009 |
ho | -0.008 |
cigarettes | -0.008 |
ipt | -0.008 |
umbling | -0.008 |
Token | Can |
Token ID | 6090 |
Feature activation | +6.50e-02 |
Pos logit contributions | |
ric | +0.001 |
cigarettes | +0.001 |
ipt | +0.001 |
umbling | +0.001 |
ho | +0.001 |
Neg logit contributions | |
Princess | -0.002 |
chef | -0.002 |
matching | -0.002 |
feast | -0.002 |
is | -0.002 |
Token | we |
Token ID | 356 |
Feature activation | -1.84e+00 |
Pos logit contributions | |
ric | +0.001 |
cigarettes | +0.001 |
ipt | +0.001 |
ho | +0.001 |
umbling | +0.001 |
Neg logit contributions | |
Princess | -0.002 |
chef | -0.001 |
matching | -0.001 |
feast | -0.001 |
succeeded | -0.001 |
Token | play |
Token ID | 711 |
Feature activation | -6.89e+00 |
Pos logit contributions | |
Princess | +0.040 |
chef | +0.038 |
is | +0.036 |
succeeded | +0.036 |
matching | +0.035 |
Neg logit contributions | |
ric | -0.036 |
cigarettes | -0.034 |
ho | -0.034 |
oes | -0.032 |
ipt | -0.032 |
Token | together |
Token ID | 1978 |
Feature activation | -3.77e+00 |
Pos logit contributions | |
succeeded | +0.147 |
is | +0.147 |
ier | +0.142 |
Benjamin | +0.141 |
Majesty | +0.139 |
Neg logit contributions | |
cigarettes | -0.106 |
os | -0.099 |
alarm | -0.095 |
run | -0.092 |
ipt | -0.091 |
Token | ?" |
Token ID | 1701 |
Feature activation | -8.67e-01 |
Pos logit contributions | |
Princess | +0.072 |
succeeded | +0.067 |
chef | +0.065 |
is | +0.062 |
designer | +0.061 |
Neg logit contributions | |
ric | -0.081 |
oes | -0.075 |
os | -0.073 |
ho | -0.071 |
cigarettes | -0.071 |
Token |
|
Token ID | 198 |
Feature activation | +2.34e-01 |
Pos logit contributions | |
Princess | +0.018 |
chef | +0.017 |
matching | +0.016 |
feast | +0.015 |
is | +0.015 |
Neg logit contributions | |
ric | -0.019 |
cigarettes | -0.018 |
ipt | -0.018 |
umbling | -0.017 |
ho | -0.017 |
Token |
|
Token ID | 198 |
Feature activation | -5.19e-01 |
Pos logit contributions | |
ric | +0.005 |
cigarettes | +0.005 |
ipt | +0.005 |
ho | +0.004 |
umbling | +0.004 |
Neg logit contributions | |
Princess | -0.005 |
chef | -0.005 |
matching | -0.005 |
feast | -0.004 |
succeeded | -0.004 |
Token | The |
Token ID | 464 |
Feature activation | +6.14e-01 |
Pos logit contributions | |
Princess | +0.011 |
chef | +0.011 |
matching | +0.010 |
is | +0.010 |
succeeded | +0.010 |
Neg logit contributions | |
ric | -0.011 |
cigarettes | -0.010 |
ipt | -0.010 |
umbling | -0.010 |
ho | -0.010 |
Token | , |
Token ID | 11 |
Feature activation | -1.94e+00 |
Pos logit contributions | |
chef | +0.111 |
Princess | +0.110 |
succeeded | +0.107 |
matching | +0.107 |
feast | +0.106 |
Neg logit contributions | |
cigarettes | -0.052 |
umbling | -0.051 |
os | -0.049 |
ipt | -0.047 |
ric | -0.046 |
Token | too |
Token ID | 1165 |
Feature activation | -9.00e-01 |
Pos logit contributions | |
Princess | +0.037 |
chef | +0.035 |
matching | +0.033 |
is | +0.032 |
feast | +0.032 |
Neg logit contributions | |
ric | -0.046 |
cigarettes | -0.044 |
ipt | -0.043 |
umbling | -0.042 |
ho | -0.042 |
Token | . |
Token ID | 13 |
Feature activation | -3.24e-01 |
Pos logit contributions | |
Princess | +0.018 |
chef | +0.018 |
matching | +0.017 |
feast | +0.016 |
succeeded | +0.016 |
Neg logit contributions | |
ric | -0.019 |
cigarettes | -0.019 |
ipt | -0.018 |
umbling | -0.018 |
ho | -0.018 |
Token | Can |
Token ID | 1680 |
Feature activation | -1.05e-01 |
Pos logit contributions | |
chef | +0.008 |
Princess | +0.008 |
feast | +0.008 |
is | +0.008 |
succeeded | +0.008 |
Neg logit contributions | |
ric | -0.005 |
cigarettes | -0.005 |
ipt | -0.005 |
ho | -0.004 |
umbling | -0.004 |
Token | we |
Token ID | 356 |
Feature activation | -2.04e+00 |
Pos logit contributions | |
Princess | +0.002 |
chef | +0.002 |
matching | +0.002 |
feast | +0.002 |
is | +0.002 |
Neg logit contributions | |
ric | -0.002 |
cigarettes | -0.002 |
ipt | -0.002 |
ho | -0.002 |
umbling | -0.002 |
Token | play |
Token ID | 711 |
Feature activation | -6.88e+00 |
Pos logit contributions | |
Princess | +0.050 |
chef | +0.047 |
is | +0.045 |
succeeded | +0.043 |
matching | +0.042 |
Neg logit contributions | |
ric | -0.035 |
oes | -0.031 |
ho | -0.031 |
cigarettes | -0.028 |
os | -0.027 |
Token | soccer |
Token ID | 11783 |
Feature activation | -4.69e+00 |
Pos logit contributions | |
majesty | +0.133 |
Princess | +0.132 |
Majesty | +0.131 |
is | +0.127 |
chef | +0.125 |
Neg logit contributions | |
ipt | -0.105 |
Why | -0.101 |
cigarettes | -0.097 |
os | -0.097 |
run | -0.096 |
Token | again |
Token ID | 757 |
Feature activation | -2.86e+00 |
Pos logit contributions | |
Princess | +0.088 |
chef | +0.078 |
succeeded | +0.075 |
is | +0.075 |
Benjamin | +0.073 |
Neg logit contributions | |
ipt | -0.098 |
ric | -0.096 |
os | -0.094 |
cigarettes | -0.094 |
ho | -0.094 |
Token | ? |
Token ID | 30 |
Feature activation | -8.53e-01 |
Pos logit contributions | |
Princess | +0.063 |
chef | +0.060 |
succeeded | +0.057 |
designer | +0.056 |
feast | +0.055 |
Neg logit contributions | |
ric | -0.052 |
cigarettes | -0.051 |
ipt | -0.050 |
Un | -0.048 |
oes | -0.048 |
Token | I |
Token ID | 314 |
Feature activation | -1.73e-01 |
Pos logit contributions | |
Princess | +0.022 |
chef | +0.022 |
is | +0.021 |
matching | +0.021 |
feast | +0.021 |
Neg logit contributions | |
ric | -0.013 |
cigarettes | -0.013 |
ipt | -0.012 |
ho | -0.012 |
umbling | -0.011 |
Token | will |
Token ID | 481 |
Feature activation | -8.10e-01 |
Pos logit contributions | |
Princess | +0.004 |
chef | +0.004 |
is | +0.004 |
matching | +0.004 |
feast | +0.004 |
Neg logit contributions | |
ric | -0.003 |
cigarettes | -0.003 |
ho | -0.003 |
ipt | -0.003 |
umbling | -0.003 |
Token | Would |
Token ID | 10928 |
Feature activation | -2.05e+00 |
Pos logit contributions | |
Princess | +0.008 |
chef | +0.008 |
matching | +0.007 |
feast | +0.007 |
is | +0.007 |
Neg logit contributions | |
ric | -0.006 |
cigarettes | -0.005 |
ipt | -0.005 |
ho | -0.005 |
umbling | -0.005 |
Token | you |
Token ID | 345 |
Feature activation | -2.96e+00 |
Pos logit contributions | |
Princess | +0.043 |
chef | +0.041 |
matching | +0.039 |
feast | +0.038 |
is | +0.038 |
Neg logit contributions | |
ric | -0.044 |
cigarettes | -0.042 |
ipt | -0.041 |
ho | -0.040 |
umbling | -0.040 |
Token | like |
Token ID | 588 |
Feature activation | -2.71e+00 |
Pos logit contributions | |
Princess | +0.067 |
chef | +0.065 |
matching | +0.060 |
succeeded | +0.060 |
designer | +0.060 |
Neg logit contributions | |
ric | -0.055 |
oes | -0.050 |
os | -0.046 |
ho | -0.046 |
cigarettes | -0.046 |
Token | to |
Token ID | 284 |
Feature activation | -1.60e+00 |
Pos logit contributions | |
Princess | +0.058 |
succeeded | +0.057 |
chef | +0.056 |
feast | +0.054 |
designer | +0.052 |
Neg logit contributions | |
ric | -0.053 |
cigarettes | -0.050 |
ipt | -0.048 |
ho | -0.047 |
oes | -0.046 |
Token | come |
Token ID | 1282 |
Feature activation | -3.18e+00 |
Pos logit contributions | |
Princess | +0.034 |
chef | +0.033 |
succeeded | +0.031 |
matching | +0.030 |
designer | +0.029 |
Neg logit contributions | |
ric | -0.034 |
ho | -0.029 |
cigarettes | -0.029 |
oes | -0.029 |
ipt | -0.028 |
Token | play |
Token ID | 711 |
Feature activation | -6.86e+00 |
Pos logit contributions | |
succeeded | +0.063 |
chef | +0.061 |
Princess | +0.061 |
feast | +0.057 |
is | +0.055 |
Neg logit contributions | |
ric | -0.071 |
oes | -0.063 |
cigarettes | -0.061 |
ipt | -0.061 |
ho | -0.060 |
Token | at |
Token ID | 379 |
Feature activation | -1.76e+00 |
Pos logit contributions | |
succeeded | +0.173 |
feast | +0.151 |
majesty | +0.151 |
Majesty | +0.147 |
ingu | +0.139 |
Neg logit contributions | |
alarm | -0.085 |
cigarettes | -0.084 |
Why | -0.080 |
run | -0.079 |
os | -0.078 |
Token | my |
Token ID | 616 |
Feature activation | -1.57e+00 |
Pos logit contributions | |
Princess | +0.033 |
chef | +0.032 |
succeeded | +0.031 |
is | +0.029 |
matching | +0.029 |
Neg logit contributions | |
ric | -0.040 |
cigarettes | -0.037 |
ipt | -0.037 |
ho | -0.036 |
umbling | -0.035 |
Token | daughter |
Token ID | 4957 |
Feature activation | -2.38e+00 |
Pos logit contributions | |
Princess | +0.030 |
chef | +0.029 |
succeeded | +0.027 |
feast | +0.026 |
is | +0.026 |
Neg logit contributions | |
ric | -0.035 |
cigarettes | -0.033 |
ipt | -0.033 |
ho | -0.032 |
umbling | -0.031 |
Token | 's |
Token ID | 338 |
Feature activation | -1.05e+00 |
Pos logit contributions | |
Princess | +0.042 |
chef | +0.040 |
succeeded | +0.040 |
is | +0.039 |
matching | +0.038 |
Neg logit contributions | |
ric | -0.055 |
cigarettes | -0.052 |
ho | -0.049 |
oes | -0.049 |
os | -0.048 |
Token | birthday |
Token ID | 10955 |
Feature activation | -6.00e+00 |
Pos logit contributions | |
Princess | +0.020 |
chef | +0.019 |
succeeded | +0.017 |
matching | +0.017 |
feast | +0.017 |
Neg logit contributions | |
ric | -0.024 |
cigarettes | -0.023 |
ipt | -0.023 |
ho | -0.022 |
umbling | -0.022 |
Token | dolls |
Token ID | 36062 |
Feature activation | -1.79e+00 |
Pos logit contributions | |
Princess | +0.024 |
chef | +0.023 |
matching | +0.021 |
feast | +0.021 |
succeeded | +0.021 |
Neg logit contributions | |
ric | -0.025 |
cigarettes | -0.024 |
ipt | -0.023 |
ho | -0.023 |
umbling | -0.022 |
Token | too |
Token ID | 1165 |
Feature activation | -6.08e-01 |
Pos logit contributions | |
Princess | +0.036 |
chef | +0.034 |
succeeded | +0.032 |
feast | +0.031 |
Benjamin | +0.031 |
Neg logit contributions | |
ric | -0.038 |
cigarettes | -0.036 |
ho | -0.033 |
os | -0.033 |
oes | -0.033 |
Token | . |
Token ID | 13 |
Feature activation | -4.49e-01 |
Pos logit contributions | |
Princess | +0.013 |
chef | +0.013 |
feast | +0.012 |
designer | +0.012 |
succeeded | +0.012 |
Neg logit contributions | |
ric | -0.012 |
cigarettes | -0.012 |
umbling | -0.011 |
ho | -0.011 |
ipt | -0.011 |
Token | Can |
Token ID | 1680 |
Feature activation | +4.90e-02 |
Pos logit contributions | |
Princess | +0.011 |
chef | +0.011 |
matching | +0.010 |
feast | +0.010 |
succeeded | +0.010 |
Neg logit contributions | |
ric | -0.008 |
cigarettes | -0.007 |
ipt | -0.007 |
ho | -0.007 |
umbling | -0.007 |
Token | we |
Token ID | 356 |
Feature activation | -2.14e+00 |
Pos logit contributions | |
ric | +0.001 |
cigarettes | +0.001 |
ipt | +0.001 |
umbling | +0.001 |
ho | +0.001 |
Neg logit contributions | |
Princess | -0.001 |
chef | -0.001 |
matching | -0.001 |
is | -0.001 |
designer | -0.001 |
Token | play |
Token ID | 711 |
Feature activation | -6.83e+00 |
Pos logit contributions | |
Princess | +0.045 |
chef | +0.044 |
succeeded | +0.041 |
matching | +0.040 |
is | +0.040 |
Neg logit contributions | |
ric | -0.044 |
cigarettes | -0.041 |
ho | -0.040 |
ipt | -0.039 |
oes | -0.038 |
Token | with |
Token ID | 351 |
Feature activation | -2.87e+00 |
Pos logit contributions | |
succeeded | +0.141 |
chef | +0.134 |
Benjamin | +0.133 |
majesty | +0.132 |
feast | +0.131 |
Neg logit contributions | |
cigarettes | -0.113 |
ipt | -0.099 |
alarm | -0.098 |
ric | -0.097 |
os | -0.096 |
Token | them |
Token ID | 606 |
Feature activation | -5.05e+00 |
Pos logit contributions | |
Princess | +0.062 |
chef | +0.062 |
succeeded | +0.061 |
feast | +0.060 |
designer | +0.057 |
Neg logit contributions | |
cigarettes | -0.056 |
ric | -0.052 |
ipt | -0.048 |
ho | -0.047 |
os | -0.046 |
Token | together |
Token ID | 1978 |
Feature activation | -4.05e+00 |
Pos logit contributions | |
Princess | +0.105 |
succeeded | +0.099 |
chef | +0.097 |
feast | +0.094 |
is | +0.093 |
Neg logit contributions | |
ric | -0.092 |
os | -0.089 |
cigarettes | -0.088 |
oes | -0.087 |
ipt | -0.085 |
Token | ?" |
Token ID | 1701 |
Feature activation | -1.09e+00 |
Pos logit contributions | |
Princess | +0.087 |
succeeded | +0.080 |
chef | +0.080 |
feast | +0.077 |
designer | +0.075 |
Neg logit contributions | |
ric | -0.075 |
cigarettes | -0.070 |
oes | -0.070 |
os | -0.069 |
ipt | -0.067 |
Token |
|
Token ID | 198 |
Feature activation | +1.67e-01 |
Pos logit contributions | |
chef | +0.025 |
Princess | +0.024 |
feast | +0.024 |
succeeded | +0.024 |
matching | +0.024 |
Neg logit contributions | |
ric | -0.019 |
cigarettes | -0.018 |
ipt | -0.018 |
ho | -0.017 |
os | -0.016 |
Token | said |
Token ID | 531 |
Feature activation | -1.01e+00 |
Pos logit contributions | |
Princess | +0.032 |
chef | +0.030 |
matching | +0.029 |
designer | +0.029 |
parties | +0.029 |
Neg logit contributions | |
ho | -0.021 |
ipt | -0.021 |
ric | -0.020 |
os | -0.019 |
urious | -0.019 |
Token | , |
Token ID | 11 |
Feature activation | -5.39e-01 |
Pos logit contributions | |
is | +0.020 |
Princess | +0.018 |
icians | +0.018 |
succeeded | +0.018 |
feast | +0.017 |
Neg logit contributions | |
ho | -0.023 |
ful | -0.020 |
badly | -0.017 |
cigarettes | -0.017 |
run | -0.017 |
Token | " |
Token ID | 366 |
Feature activation | +3.76e-02 |
Pos logit contributions | |
Princess | +0.012 |
chef | +0.011 |
feast | +0.011 |
succeeded | +0.011 |
parties | +0.011 |
Neg logit contributions | |
ho | -0.010 |
ric | -0.010 |
ipt | -0.009 |
cigarettes | -0.009 |
umbling | -0.009 |
Token | Can |
Token ID | 6090 |
Feature activation | +4.74e-01 |
Pos logit contributions | |
ric | +0.001 |
cigarettes | +0.001 |
ipt | +0.001 |
ho | +0.001 |
umbling | +0.001 |
Neg logit contributions | |
Princess | -0.001 |
chef | -0.001 |
is | -0.001 |
matching | -0.001 |
designer | -0.001 |
Token | we |
Token ID | 356 |
Feature activation | -1.40e+00 |
Pos logit contributions | |
ric | +0.011 |
cigarettes | +0.010 |
umbling | +0.010 |
ipt | +0.010 |
ho | +0.010 |
Neg logit contributions | |
Princess | -0.009 |
chef | -0.009 |
matching | -0.008 |
party | -0.008 |
designer | -0.008 |
Token | play |
Token ID | 711 |
Feature activation | -6.83e+00 |
Pos logit contributions | |
Princess | +0.033 |
chef | +0.031 |
is | +0.030 |
succeeded | +0.029 |
designer | +0.029 |
Neg logit contributions | |
ric | -0.024 |
cigarettes | -0.023 |
ho | -0.023 |
oes | -0.022 |
os | -0.020 |
Token | some |
Token ID | 617 |
Feature activation | -3.52e+00 |
Pos logit contributions | |
majesty | +0.145 |
Princess | +0.145 |
is | +0.143 |
ier | +0.143 |
succeeded | +0.142 |
Neg logit contributions | |
cigarettes | -0.092 |
o | -0.085 |
run | -0.082 |
Why | -0.081 |
os | -0.077 |
Token | more |
Token ID | 517 |
Feature activation | -3.32e+00 |
Pos logit contributions | |
Princess | +0.061 |
succeeded | +0.054 |
Benjamin | +0.052 |
Majesty | +0.051 |
Marie | +0.048 |
Neg logit contributions | |
cigarettes | -0.079 |
ipt | -0.076 |
urious | -0.073 |
oes | -0.071 |
run | -0.070 |
Token | after |
Token ID | 706 |
Feature activation | +5.87e-01 |
Pos logit contributions | |
Princess | +0.071 |
succeeded | +0.066 |
Benjamin | +0.064 |
chef | +0.062 |
designer | +0.060 |
Neg logit contributions | |
cigarettes | -0.061 |
oes | -0.058 |
ipt | -0.058 |
packs | -0.055 |
urious | -0.054 |
Token | dinner |
Token ID | 8073 |
Feature activation | -4.60e+00 |
Pos logit contributions | |
ric | +0.015 |
cigarettes | +0.014 |
ipt | +0.014 |
ho | +0.014 |
umbling | +0.014 |
Neg logit contributions | |
Princess | -0.010 |
chef | -0.009 |
matching | -0.008 |
is | -0.008 |
succeeded | -0.008 |
Token | ?" |
Token ID | 1701 |
Feature activation | -7.87e-01 |
Pos logit contributions | |
Princess | +0.104 |
succeeded | +0.093 |
designer | +0.093 |
Majesty | +0.092 |
is | +0.092 |
Neg logit contributions | |
oes | -0.067 |
os | -0.065 |
cigarettes | -0.064 |
ipt | -0.064 |
Un | -0.063 |
Token | you |
Token ID | 345 |
Feature activation | -2.66e+00 |
Pos logit contributions | |
Princess | +0.036 |
chef | +0.033 |
matching | +0.032 |
is | +0.031 |
feast | +0.031 |
Neg logit contributions | |
ric | -0.040 |
cigarettes | -0.038 |
ipt | -0.037 |
umbling | -0.037 |
ho | -0.036 |
Token | like |
Token ID | 588 |
Feature activation | -2.65e+00 |
Pos logit contributions | |
Princess | +0.063 |
chef | +0.060 |
is | +0.057 |
matching | +0.056 |
judges | +0.055 |
Neg logit contributions | |
ric | -0.046 |
oes | -0.043 |
cigarettes | -0.042 |
ho | -0.040 |
os | -0.039 |
Token | to |
Token ID | 284 |
Feature activation | -1.44e+00 |
Pos logit contributions | |
Princess | +0.060 |
succeeded | +0.059 |
chef | +0.057 |
feast | +0.056 |
designer | +0.054 |
Neg logit contributions | |
ric | -0.047 |
cigarettes | -0.046 |
ipt | -0.043 |
ho | -0.041 |
oes | -0.041 |
Token | come |
Token ID | 1282 |
Feature activation | -2.93e+00 |
Pos logit contributions | |
Princess | +0.031 |
chef | +0.030 |
succeeded | +0.028 |
matching | +0.027 |
is | +0.027 |
Neg logit contributions | |
ric | -0.029 |
ho | -0.026 |
oes | -0.026 |
cigarettes | -0.026 |
ipt | -0.024 |
Token | and |
Token ID | 290 |
Feature activation | -6.70e-01 |
Pos logit contributions | |
succeeded | +0.063 |
Princess | +0.059 |
chef | +0.059 |
is | +0.056 |
feast | +0.055 |
Neg logit contributions | |
ric | -0.060 |
oes | -0.054 |
ipt | -0.051 |
cigarettes | -0.051 |
os | -0.051 |
Token | play |
Token ID | 711 |
Feature activation | -6.83e+00 |
Pos logit contributions | |
Princess | +0.013 |
chef | +0.013 |
is | +0.012 |
succeeded | +0.012 |
matching | +0.012 |
Neg logit contributions | |
ric | -0.014 |
cigarettes | -0.013 |
ho | -0.013 |
ipt | -0.013 |
oes | -0.013 |
Token | with |
Token ID | 351 |
Feature activation | -1.86e+00 |
Pos logit contributions | |
succeeded | +0.172 |
majesty | +0.153 |
Majesty | +0.150 |
feast | +0.149 |
is | +0.144 |
Neg logit contributions | |
cigarettes | -0.080 |
Why | -0.078 |
alarm | -0.076 |
run | -0.075 |
ipt | -0.074 |
Token | me |
Token ID | 502 |
Feature activation | -2.51e+00 |
Pos logit contributions | |
Princess | +0.036 |
chef | +0.035 |
succeeded | +0.034 |
feast | +0.033 |
matching | +0.033 |
Neg logit contributions | |
cigarettes | -0.040 |
ric | -0.040 |
ipt | -0.037 |
ho | -0.036 |
umbling | -0.036 |
Token | at |
Token ID | 379 |
Feature activation | -9.25e-01 |
Pos logit contributions | |
Princess | +0.050 |
succeeded | +0.047 |
chef | +0.046 |
feast | +0.044 |
matching | +0.044 |
Neg logit contributions | |
ric | -0.054 |
cigarettes | -0.049 |
oes | -0.048 |
ipt | -0.048 |
os | -0.048 |
Token | the |
Token ID | 262 |
Feature activation | -1.53e+00 |
Pos logit contributions | |
Princess | +0.017 |
chef | +0.017 |
is | +0.016 |
succeeded | +0.016 |
matching | +0.015 |
Neg logit contributions | |
ric | -0.021 |
cigarettes | -0.020 |
ipt | -0.020 |
ho | -0.019 |
umbling | -0.018 |
Token | park |
Token ID | 3952 |
Feature activation | -2.23e+00 |
Pos logit contributions | |
Princess | +0.029 |
succeeded | +0.028 |
chef | +0.028 |
feast | +0.027 |
patiently | +0.026 |
Neg logit contributions | |
ric | -0.032 |
cigarettes | -0.029 |
ho | -0.029 |
ipt | -0.029 |
umbling | -0.029 |
Token | ! |
Token ID | 0 |
Feature activation | -6.18e-01 |
Pos logit contributions | |
succeeded | +0.049 |
Princess | +0.042 |
chef | +0.040 |
matching | +0.038 |
improved | +0.038 |
Neg logit contributions | |
ric | -0.090 |
cigarettes | -0.089 |
ho | -0.089 |
ipt | -0.084 |
oes | -0.083 |
Token | Do |
Token ID | 2141 |
Feature activation | -9.41e-01 |
Pos logit contributions | |
Princess | +0.015 |
chef | +0.014 |
matching | +0.013 |
designer | +0.013 |
feast | +0.012 |
Neg logit contributions | |
ric | -0.012 |
cigarettes | -0.011 |
ipt | -0.011 |
umbling | -0.011 |
ho | -0.011 |
Token | you |
Token ID | 345 |
Feature activation | -1.33e+00 |
Pos logit contributions | |
Princess | +0.018 |
chef | +0.017 |
matching | +0.017 |
is | +0.016 |
succeeded | +0.016 |
Neg logit contributions | |
ric | -0.021 |
cigarettes | -0.021 |
ipt | -0.020 |
umbling | -0.020 |
ho | -0.020 |
Token | want |
Token ID | 765 |
Feature activation | -3.54e+00 |
Pos logit contributions | |
Princess | +0.044 |
chef | +0.044 |
is | +0.042 |
matching | +0.041 |
designer | +0.041 |
Neg logit contributions | |
ric | -0.011 |
oes | -0.008 |
cigarettes | -0.008 |
os | -0.007 |
ho | -0.006 |
Token | to |
Token ID | 284 |
Feature activation | -1.99e+00 |
Pos logit contributions | |
Princess | +0.093 |
chef | +0.089 |
succeeded | +0.088 |
designer | +0.086 |
feast | +0.083 |
Neg logit contributions | |
cigarettes | -0.052 |
ric | -0.049 |
oes | -0.045 |
ipt | -0.044 |
os | -0.041 |
Token | play |
Token ID | 711 |
Feature activation | -6.81e+00 |
Pos logit contributions | |
Princess | +0.046 |
chef | +0.046 |
succeeded | +0.043 |
is | +0.042 |
patiently | +0.042 |
Neg logit contributions | |
ric | -0.037 |
oes | -0.032 |
cigarettes | -0.031 |
ho | -0.030 |
ipt | -0.030 |
Token | with |
Token ID | 351 |
Feature activation | -2.86e+00 |
Pos logit contributions | |
succeeded | +0.176 |
feast | +0.163 |
ier | +0.161 |
majesty | +0.159 |
Majesty | +0.159 |
Neg logit contributions | |
cigarettes | -0.084 |
alarm | -0.078 |
run | -0.073 |
Why | -0.072 |
ipt | -0.072 |
Token | me |
Token ID | 502 |
Feature activation | -3.37e+00 |
Pos logit contributions | |
Princess | +0.063 |
chef | +0.061 |
succeeded | +0.060 |
feast | +0.060 |
designer | +0.056 |
Neg logit contributions | |
cigarettes | -0.056 |
ric | -0.054 |
ipt | -0.051 |
ho | -0.048 |
el | -0.047 |
Token | ?" |
Token ID | 1701 |
Feature activation | -7.27e-01 |
Pos logit contributions | |
Princess | +0.077 |
succeeded | +0.074 |
chef | +0.071 |
feast | +0.069 |
judges | +0.067 |
Neg logit contributions | |
ric | -0.062 |
os | -0.057 |
cigarettes | -0.057 |
oes | -0.057 |
ipt | -0.054 |
Token | Sue |
Token ID | 26089 |
Feature activation | -4.56e-01 |
Pos logit contributions | |
Princess | +0.014 |
chef | +0.013 |
matching | +0.012 |
feast | +0.012 |
is | +0.012 |
Neg logit contributions | |
ric | -0.017 |
cigarettes | -0.016 |
umbling | -0.016 |
ho | -0.016 |
ipt | -0.016 |
Token | nodded |
Token ID | 14464 |
Feature activation | -4.04e-01 |
Pos logit contributions | |
Princess | +0.009 |
chef | +0.009 |
feast | +0.008 |
matching | +0.008 |
success | +0.008 |
Neg logit contributions | |
ho | -0.009 |
ric | -0.009 |
os | -0.009 |
ipt | -0.009 |
urious | -0.009 |
Token | Can |
Token ID | 6090 |
Feature activation | +4.07e-01 |
Pos logit contributions | |
ric | +0.016 |
cigarettes | +0.015 |
ipt | +0.015 |
ho | +0.015 |
umbling | +0.014 |
Neg logit contributions | |
Princess | -0.023 |
chef | -0.022 |
matching | -0.021 |
is | -0.020 |
succeeded | -0.020 |
Token | we |
Token ID | 356 |
Feature activation | -1.64e+00 |
Pos logit contributions | |
ric | +0.009 |
umbling | +0.009 |
ipt | +0.009 |
cigarettes | +0.009 |
ho | +0.008 |
Neg logit contributions | |
Princess | -0.008 |
chef | -0.007 |
matching | -0.007 |
party | -0.007 |
feast | -0.007 |
Token | go |
Token ID | 467 |
Feature activation | -2.82e+00 |
Pos logit contributions | |
Princess | +0.034 |
chef | +0.032 |
matching | +0.030 |
feast | +0.030 |
succeeded | +0.030 |
Neg logit contributions | |
ric | -0.035 |
cigarettes | -0.034 |
ipt | -0.033 |
umbling | -0.032 |
ho | -0.032 |
Token | outside |
Token ID | 2354 |
Feature activation | -3.28e+00 |
Pos logit contributions | |
Princess | +0.056 |
chef | +0.052 |
succeeded | +0.051 |
designer | +0.048 |
judges | +0.048 |
Neg logit contributions | |
ric | -0.063 |
cigarettes | -0.059 |
oes | -0.059 |
ho | -0.058 |
ipt | -0.057 |
Token | and |
Token ID | 290 |
Feature activation | +2.09e-01 |
Pos logit contributions | |
Princess | +0.057 |
succeeded | +0.053 |
chef | +0.051 |
is | +0.050 |
matching | +0.049 |
Neg logit contributions | |
ric | -0.076 |
cigarettes | -0.073 |
ho | -0.072 |
ipt | -0.070 |
os | -0.070 |
Token | play |
Token ID | 711 |
Feature activation | -6.81e+00 |
Pos logit contributions | |
ipt | +0.006 |
umbling | +0.005 |
ric | +0.005 |
cigarettes | +0.005 |
ho | +0.005 |
Neg logit contributions | |
Princess | -0.003 |
chef | -0.003 |
matching | -0.003 |
party | -0.003 |
feast | -0.003 |
Token | in |
Token ID | 287 |
Feature activation | -9.22e-01 |
Pos logit contributions | |
succeeded | +0.155 |
is | +0.151 |
Princess | +0.151 |
designer | +0.147 |
Benjamin | +0.145 |
Neg logit contributions | |
cigarettes | -0.107 |
alarm | -0.094 |
el | -0.087 |
ipt | -0.084 |
os | -0.084 |
Token | the |
Token ID | 262 |
Feature activation | -1.16e+00 |
Pos logit contributions | |
Princess | +0.017 |
chef | +0.016 |
matching | +0.015 |
succeeded | +0.015 |
is | +0.015 |
Neg logit contributions | |
ric | -0.022 |
cigarettes | -0.021 |
ipt | -0.021 |
ho | -0.020 |
umbling | -0.020 |
Token | snow |
Token ID | 6729 |
Feature activation | -2.20e+00 |
Pos logit contributions | |
Princess | +0.028 |
chef | +0.027 |
matching | +0.025 |
succeeded | +0.025 |
is | +0.025 |
Neg logit contributions | |
ric | -0.021 |
cigarettes | -0.020 |
ipt | -0.019 |
ho | -0.019 |
umbling | -0.019 |
Token | ?" |
Token ID | 1701 |
Feature activation | -3.87e-01 |
Pos logit contributions | |
Princess | +0.036 |
chef | +0.033 |
matching | +0.031 |
succeeded | +0.030 |
is | +0.030 |
Neg logit contributions | |
ric | -0.057 |
cigarettes | -0.055 |
ipt | -0.054 |
ho | -0.053 |
umbling | -0.053 |
Token | Anna |
Token ID | 11735 |
Feature activation | +8.40e-01 |
Pos logit contributions | |
Princess | +0.007 |
chef | +0.006 |
matching | +0.006 |
designer | +0.005 |
party | +0.005 |
Neg logit contributions | |
ric | -0.010 |
cigarettes | -0.009 |
umbling | -0.009 |
ho | -0.009 |
ipt | -0.009 |
Token | . |
Token ID | 13 |
Feature activation | -5.45e-02 |
Pos logit contributions | |
succeeded | +0.044 |
Princess | +0.038 |
chef | +0.037 |
matching | +0.036 |
judges | +0.035 |
Neg logit contributions | |
cigarettes | -0.078 |
ric | -0.076 |
ho | -0.075 |
ipt | -0.070 |
os | -0.070 |
Token | Do |
Token ID | 2141 |
Feature activation | -5.23e-01 |
Pos logit contributions | |
chef | +0.001 |
Princess | +0.001 |
feast | +0.001 |
succeeded | +0.001 |
is | +0.001 |
Neg logit contributions | |
ric | -0.001 |
ipt | -0.001 |
cigarettes | -0.001 |
ho | -0.001 |
umbling | -0.001 |
Token | you |
Token ID | 345 |
Feature activation | -1.03e+00 |
Pos logit contributions | |
Princess | +0.010 |
chef | +0.010 |
matching | +0.009 |
is | +0.009 |
succeeded | +0.009 |
Neg logit contributions | |
ric | -0.012 |
cigarettes | -0.012 |
ipt | -0.011 |
umbling | -0.011 |
ho | -0.011 |
Token | want |
Token ID | 765 |
Feature activation | -3.15e+00 |
Pos logit contributions | |
Princess | +0.033 |
chef | +0.033 |
is | +0.032 |
matching | +0.032 |
judges | +0.031 |
Neg logit contributions | |
ric | -0.009 |
cigarettes | -0.008 |
oes | -0.007 |
ho | -0.006 |
urious | -0.006 |
Token | to |
Token ID | 284 |
Feature activation | -1.65e+00 |
Pos logit contributions | |
Princess | +0.076 |
chef | +0.076 |
succeeded | +0.075 |
feast | +0.072 |
designer | +0.072 |
Neg logit contributions | |
cigarettes | -0.049 |
ric | -0.044 |
oes | -0.042 |
ipt | -0.042 |
ho | -0.040 |
Token | play |
Token ID | 711 |
Feature activation | -6.81e+00 |
Pos logit contributions | |
Princess | +0.035 |
chef | +0.035 |
succeeded | +0.033 |
is | +0.032 |
patiently | +0.031 |
Neg logit contributions | |
ric | -0.033 |
cigarettes | -0.030 |
ho | -0.030 |
oes | -0.029 |
ipt | -0.028 |
Token | with |
Token ID | 351 |
Feature activation | -2.36e+00 |
Pos logit contributions | |
succeeded | +0.182 |
feast | +0.167 |
Majesty | +0.167 |
majesty | +0.166 |
ier | +0.160 |
Neg logit contributions | |
cigarettes | -0.079 |
alarm | -0.072 |
run | -0.072 |
Why | -0.070 |
ipt | -0.063 |
Token | me |
Token ID | 502 |
Feature activation | -3.08e+00 |
Pos logit contributions | |
Princess | +0.049 |
chef | +0.049 |
succeeded | +0.048 |
feast | +0.048 |
designer | +0.045 |
Neg logit contributions | |
cigarettes | -0.048 |
ric | -0.046 |
ipt | -0.042 |
ho | -0.042 |
el | -0.041 |
Token | ? |
Token ID | 30 |
Feature activation | -4.15e-01 |
Pos logit contributions | |
Princess | +0.068 |
succeeded | +0.067 |
chef | +0.064 |
judges | +0.062 |
feast | +0.062 |
Neg logit contributions | |
ric | -0.056 |
oes | -0.052 |
cigarettes | -0.052 |
os | -0.051 |
run | -0.049 |
Token | We |
Token ID | 775 |
Feature activation | -6.06e-01 |
Pos logit contributions | |
Princess | +0.010 |
chef | +0.009 |
matching | +0.008 |
designer | +0.008 |
is | +0.008 |
Neg logit contributions | |
ric | -0.008 |
cigarettes | -0.008 |
ipt | -0.008 |
umbling | -0.008 |
ho | -0.008 |
Token | can |
Token ID | 460 |
Feature activation | -6.60e-01 |
Pos logit contributions | |
Princess | +0.013 |
chef | +0.013 |
is | +0.013 |
matching | +0.012 |
patiently | +0.012 |
Neg logit contributions | |
ric | -0.012 |
ho | -0.011 |
cigarettes | -0.011 |
os | -0.010 |
oes | -0.010 |
Token | sad |
Token ID | 6507 |
Feature activation | +8.01e-01 |
Pos logit contributions | |
ric | +0.004 |
cigarettes | +0.004 |
ipt | +0.004 |
ho | +0.004 |
umbling | +0.004 |
Neg logit contributions | |
Princess | -0.004 |
chef | -0.004 |
matching | -0.003 |
feast | -0.003 |
is | -0.003 |
Token | that |
Token ID | 326 |
Feature activation | +1.36e+00 |
Pos logit contributions | |
ric | +0.015 |
cigarettes | +0.015 |
ho | +0.015 |
umbling | +0.015 |
ipt | +0.014 |
Neg logit contributions | |
Princess | -0.017 |
chef | -0.017 |
feast | -0.016 |
succeeded | -0.016 |
matching | -0.016 |
Token | her |
Token ID | 607 |
Feature activation | +1.03e+00 |
Pos logit contributions | |
ric | +0.031 |
ipt | +0.029 |
cigarettes | +0.029 |
umbling | +0.028 |
ho | +0.028 |
Neg logit contributions | |
Princess | -0.029 |
chef | -0.026 |
matching | -0.025 |
party | -0.024 |
succeeded | -0.024 |
Token | friend |
Token ID | 1545 |
Feature activation | +1.18e-01 |
Pos logit contributions | |
cigarettes | +0.025 |
ric | +0.025 |
ho | +0.024 |
ipt | +0.023 |
umbling | +0.023 |
Neg logit contributions | |
Princess | -0.018 |
chef | -0.017 |
is | -0.015 |
matching | -0.015 |
designer | -0.015 |
Token | didn |
Token ID | 1422 |
Feature activation | +1.97e+00 |
Pos logit contributions | |
ric | +0.002 |
cigarettes | +0.002 |
ipt | +0.002 |
umbling | +0.002 |
ho | +0.002 |
Neg logit contributions | |
Princess | -0.003 |
chef | -0.003 |
matching | -0.002 |
succeeded | -0.002 |
feast | -0.002 |
Token | 't |
Token ID | 470 |
Feature activation | +3.16e+00 |
Pos logit contributions | |
cigarettes | +0.049 |
ric | +0.048 |
ipt | +0.046 |
ho | +0.044 |
packs | +0.043 |
Neg logit contributions | |
Princess | -0.035 |
is | -0.034 |
matching | -0.034 |
better | -0.032 |
chef | -0.032 |
Token | like |
Token ID | 588 |
Feature activation | +2.15e-01 |
Pos logit contributions | |
ric | +0.058 |
cigarettes | +0.056 |
ho | +0.053 |
umbling | +0.052 |
oes | +0.051 |
Neg logit contributions | |
Princess | -0.072 |
chef | -0.070 |
matching | -0.066 |
feast | -0.065 |
succeeded | -0.065 |
Token | something |
Token ID | 1223 |
Feature activation | -7.87e-02 |
Pos logit contributions | |
ric | +0.005 |
cigarettes | +0.005 |
ipt | +0.005 |
umbling | +0.005 |
ho | +0.005 |
Neg logit contributions | |
Princess | -0.004 |
chef | -0.004 |
matching | -0.003 |
feast | -0.003 |
is | -0.003 |
Token | she |
Token ID | 673 |
Feature activation | -2.50e-01 |
Pos logit contributions | |
Princess | +0.001 |
chef | +0.001 |
feast | +0.001 |
designer | +0.001 |
matching | +0.001 |
Neg logit contributions | |
cigarettes | -0.002 |
ric | -0.002 |
ho | -0.002 |
umbling | -0.002 |
ipt | -0.002 |
Token | loved |
Token ID | 6151 |
Feature activation | +2.15e-01 |
Pos logit contributions | |
Princess | +0.006 |
chef | +0.006 |
matching | +0.006 |
succeeded | +0.006 |
feast | +0.006 |
Neg logit contributions | |
ric | -0.004 |
cigarettes | -0.004 |
ipt | -0.004 |
umbling | -0.004 |
ho | -0.004 |
Token | so |
Token ID | 523 |
Feature activation | +2.19e+00 |
Pos logit contributions | |
ric | +0.006 |
ipt | +0.005 |
umbling | +0.005 |
cigarettes | +0.005 |
ho | +0.005 |
Neg logit contributions | |
Princess | -0.004 |
chef | -0.003 |
matching | -0.003 |
party | -0.003 |
feast | -0.003 |
Token | , |
Token ID | 11 |
Feature activation | +5.08e-01 |
Pos logit contributions | |
ho | +0.040 |
ric | +0.040 |
cigarettes | +0.039 |
ipt | +0.038 |
spe | +0.037 |
Neg logit contributions | |
Princess | -0.012 |
chef | -0.011 |
succeeded | -0.010 |
feast | -0.010 |
is | -0.009 |
Token | � |
Token ID | 6184 |
Feature activation | +3.08e-01 |
Pos logit contributions | |
ric | +0.011 |
cigarettes | +0.011 |
ho | +0.010 |
ipt | +0.010 |
umbling | +0.010 |
Neg logit contributions | |
Princess | -0.010 |
chef | -0.010 |
feast | -0.009 |
matching | -0.009 |
succeeded | -0.009 |
Token | � |
Token ID | 95 |
Feature activation | +5.98e-01 |
Pos logit contributions | |
ric | +0.006 |
ho | +0.006 |
cigarettes | +0.005 |
Danger | +0.005 |
umbling | +0.005 |
Neg logit contributions | |
chef | -0.007 |
Princess | -0.007 |
parties | -0.006 |
matching | -0.006 |
superhero | -0.006 |
Token | € |
Token ID | 26391 |
Feature activation | -5.94e-01 |
Pos logit contributions | |
ric | +0.011 |
cigarettes | +0.010 |
umbling | +0.010 |
ipt | +0.010 |
ho | +0.010 |
Neg logit contributions | |
chef | -0.014 |
Princess | -0.014 |
matching | -0.013 |
patiently | -0.013 |
designer | -0.013 |
Token | � |
Token ID | 129 |
Feature activation | +1.48e+00 |
Pos logit contributions | |
Princess | +0.011 |
chef | +0.011 |
matching | +0.010 |
is | +0.010 |
designer | +0.010 |
Neg logit contributions | |
ric | -0.014 |
cigarettes | -0.013 |
ipt | -0.013 |
ho | -0.013 |
umbling | -0.013 |
Token | � |
Token ID | 241 |
Feature activation | +2.97e+00 |
Pos logit contributions | |
ric | +0.031 |
cigarettes | +0.030 |
ipt | +0.029 |
ho | +0.028 |
umbling | +0.028 |
Neg logit contributions | |
Princess | -0.031 |
chef | -0.029 |
matching | -0.028 |
is | -0.028 |
succeeded | -0.027 |
Token | I |
Token ID | 40 |
Feature activation | +9.59e-01 |
Pos logit contributions | |
ric | +0.077 |
cigarettes | +0.075 |
ipt | +0.069 |
el | +0.068 |
ho | +0.067 |
Neg logit contributions | |
Princess | -0.047 |
matching | -0.046 |
good | -0.044 |
is | -0.043 |
succeeded | -0.043 |
Token | â |
Token ID | 22940 |
Feature activation | +1.58e+00 |
Pos logit contributions | |
ric | +0.018 |
cigarettes | +0.017 |
ipt | +0.017 |
umbling | +0.017 |
ho | +0.016 |
Neg logit contributions | |
Princess | -0.022 |
chef | -0.021 |
succeeded | -0.021 |
matching | -0.021 |
feast | -0.021 |
Token | € |
Token ID | 26391 |
Feature activation | -1.02e+00 |
Pos logit contributions | |
ric | +0.030 |
cigarettes | +0.029 |
ipt | +0.028 |
ho | +0.028 |
umbling | +0.028 |
Neg logit contributions | |
Princess | -0.036 |
chef | -0.035 |
matching | -0.033 |
feast | -0.032 |
succeeded | -0.032 |
Token | â„¢ |
Token ID | 8151 |
Feature activation | -7.30e-01 |
Pos logit contributions | |
Princess | +0.022 |
chef | +0.021 |
matching | +0.020 |
feast | +0.019 |
is | +0.019 |
Neg logit contributions | |
ric | -0.021 |
cigarettes | -0.020 |
ipt | -0.020 |
ho | -0.019 |
umbling | -0.019 |
Token | m |
Token ID | 76 |
Feature activation | +1.24e+00 |
Pos logit contributions | |
Princess | +0.013 |
chef | +0.012 |
is | +0.012 |
matching | +0.012 |
, | +0.012 |
Neg logit contributions | |
ric | -0.017 |
cigarettes | -0.016 |
ipt | -0.016 |
urious | -0.015 |
umbling | -0.015 |
Token | loved |
Token ID | 6151 |
Feature activation | -4.87e-01 |
Pos logit contributions | |
ric | +0.003 |
cigarettes | +0.002 |
ipt | +0.002 |
ho | +0.002 |
umbling | +0.002 |
Neg logit contributions | |
Princess | -0.002 |
chef | -0.002 |
matching | -0.002 |
feast | -0.002 |
is | -0.002 |
Token | playing |
Token ID | 2712 |
Feature activation | -4.14e+00 |
Pos logit contributions | |
Princess | +0.008 |
chef | +0.008 |
matching | +0.008 |
parties | +0.008 |
party | +0.007 |
Neg logit contributions | |
ric | -0.012 |
umbling | -0.012 |
ipt | -0.012 |
cigarettes | -0.012 |
ho | -0.012 |
Token | outside |
Token ID | 2354 |
Feature activation | -5.27e-01 |
Pos logit contributions | |
Princess | +0.084 |
succeeded | +0.081 |
Majesty | +0.080 |
oured | +0.079 |
feast | +0.079 |
Neg logit contributions | |
cigarettes | -0.074 |
ipt | -0.067 |
alley | -0.063 |
os | -0.062 |
ric | -0.061 |
Token | , |
Token ID | 11 |
Feature activation | -4.87e-01 |
Pos logit contributions | |
Princess | +0.008 |
chef | +0.007 |
matching | +0.007 |
party | +0.006 |
feast | +0.006 |
Neg logit contributions | |
ric | -0.015 |
cigarettes | -0.014 |
ipt | -0.014 |
ho | -0.014 |
umbling | -0.014 |
Token | especially |
Token ID | 2592 |
Feature activation | +1.57e+00 |
Pos logit contributions | |
Princess | +0.008 |
chef | +0.007 |
matching | +0.007 |
patiently | +0.007 |
party | +0.007 |
Neg logit contributions | |
ric | -0.013 |
cigarettes | -0.012 |
ipt | -0.012 |
ho | -0.012 |
umbling | -0.012 |
Token | in |
Token ID | 287 |
Feature activation | +2.74e+00 |
Pos logit contributions | |
ric | +0.040 |
cigarettes | +0.038 |
ho | +0.037 |
ipt | +0.037 |
umbling | +0.036 |
Neg logit contributions | |
Princess | -0.027 |
chef | -0.025 |
matching | -0.025 |
is | -0.022 |
patiently | -0.022 |
Token | the |
Token ID | 262 |
Feature activation | -4.70e-01 |
Pos logit contributions | |
ho | +0.088 |
umbling | +0.087 |
beh | +0.086 |
ric | +0.085 |
ipt | +0.085 |
Neg logit contributions | |
matching | -0.031 |
chess | -0.031 |
Princess | -0.030 |
tea | -0.028 |
success | -0.028 |
Token | mud |
Token ID | 17492 |
Feature activation | +1.04e+00 |
Pos logit contributions | |
Princess | +0.010 |
chef | +0.009 |
succeeded | +0.009 |
matching | +0.009 |
designer | +0.009 |
Neg logit contributions | |
ric | -0.010 |
cigarettes | -0.009 |
umbling | -0.009 |
ipt | -0.009 |
ho | -0.009 |
Token | . |
Token ID | 13 |
Feature activation | +1.86e-01 |
Pos logit contributions | |
ric | +0.027 |
cigarettes | +0.026 |
ipt | +0.026 |
beh | +0.025 |
spe | +0.025 |
Neg logit contributions | |
chef | -0.015 |
party | -0.015 |
matching | -0.015 |
Princess | -0.015 |
, | -0.014 |
Token | He |
Token ID | 679 |
Feature activation | -1.93e-01 |
Pos logit contributions | |
ho | +0.005 |
beh | +0.004 |
umbling | +0.004 |
cigarettes | +0.004 |
spe | +0.004 |
Neg logit contributions | |
Princess | -0.003 |
chef | -0.003 |
chess | -0.003 |
matching | -0.003 |
Penny | -0.003 |
Token | would |
Token ID | 561 |
Feature activation | +6.54e-01 |
Pos logit contributions | |
Princess | +0.004 |
chef | +0.004 |
matching | +0.003 |
succeeded | +0.003 |
patiently | +0.003 |
Neg logit contributions | |
ric | -0.004 |
cigarettes | -0.004 |
ipt | -0.004 |
umbling | -0.004 |
ho | -0.004 |
Token | was |
Token ID | 373 |
Feature activation | +8.67e-01 |
Pos logit contributions | |
Princess | +0.009 |
chef | +0.009 |
matching | +0.009 |
succeeded | +0.008 |
feast | +0.008 |
Neg logit contributions | |
ric | -0.007 |
cigarettes | -0.006 |
ipt | -0.006 |
umbling | -0.006 |
ho | -0.006 |
Token | worried |
Token ID | 7960 |
Feature activation | +1.17e+00 |
Pos logit contributions | |
ric | +0.015 |
cigarettes | +0.014 |
umbling | +0.014 |
beh | +0.013 |
run | +0.013 |
Neg logit contributions | |
Princess | -0.020 |
feast | -0.019 |
chef | -0.019 |
is | -0.019 |
designer | -0.018 |
Token | that |
Token ID | 326 |
Feature activation | +1.49e+00 |
Pos logit contributions | |
ric | +0.034 |
ipt | +0.034 |
ho | +0.033 |
cigarettes | +0.032 |
beh | +0.032 |
Neg logit contributions | |
, | -0.015 |
Princess | -0.015 |
matching | -0.014 |
patiently | -0.014 |
chef | -0.014 |
Token | he |
Token ID | 339 |
Feature activation | +4.28e-01 |
Pos logit contributions | |
ric | +0.037 |
ipt | +0.035 |
umbling | +0.035 |
cigarettes | +0.034 |
ho | +0.033 |
Neg logit contributions | |
Princess | -0.028 |
chef | -0.026 |
matching | -0.024 |
superhero | -0.023 |
succeeded | -0.023 |
Token | wouldn |
Token ID | 3636 |
Feature activation | +1.72e+00 |
Pos logit contributions | |
ric | +0.009 |
ipt | +0.009 |
cigarettes | +0.008 |
umbling | +0.008 |
ho | +0.008 |
Neg logit contributions | |
Princess | -0.009 |
succeeded | -0.009 |
chef | -0.009 |
matching | -0.008 |
patiently | -0.008 |
Token | 't |
Token ID | 470 |
Feature activation | +3.42e+00 |
Pos logit contributions | |
cigarettes | +0.043 |
ric | +0.040 |
ipt | +0.039 |
ho | +0.038 |
urious | +0.037 |
Neg logit contributions | |
Princess | -0.032 |
succeeded | -0.028 |
, | -0.028 |
matching | -0.028 |
chef | -0.028 |
Token | be |
Token ID | 307 |
Feature activation | +4.90e-01 |
Pos logit contributions | |
ric | +0.070 |
cigarettes | +0.066 |
ipt | +0.064 |
ho | +0.063 |
umbling | +0.063 |
Neg logit contributions | |
Princess | -0.075 |
chef | -0.071 |
matching | -0.067 |
feast | -0.066 |
is | -0.065 |
Token | able |
Token ID | 1498 |
Feature activation | +1.71e+00 |
Pos logit contributions | |
ric | +0.010 |
cigarettes | +0.010 |
umbling | +0.010 |
run | +0.009 |
beh | +0.009 |
Neg logit contributions | |
Princess | -0.010 |
chef | -0.010 |
feast | -0.009 |
is | -0.009 |
designer | -0.009 |
Token | to |
Token ID | 284 |
Feature activation | +1.74e+00 |
Pos logit contributions | |
ric | +0.042 |
ipt | +0.041 |
cigarettes | +0.040 |
ho | +0.039 |
umbling | +0.039 |
Neg logit contributions | |
Princess | -0.030 |
chef | -0.028 |
matching | -0.028 |
succeeded | -0.026 |
feast | -0.026 |
Token | beat |
Token ID | 4405 |
Feature activation | +1.48e+00 |
Pos logit contributions | |
ric | +0.040 |
cigarettes | +0.039 |
ipt | +0.038 |
ho | +0.037 |
umbling | +0.037 |
Neg logit contributions | |
Princess | -0.033 |
chef | -0.031 |
matching | -0.029 |
feast | -0.029 |
is | -0.029 |
Token | the |
Token ID | 262 |
Feature activation | +1.08e+00 |
Pos logit contributions | |
ric | +0.040 |
ipt | +0.039 |
cigarettes | +0.038 |
ho | +0.037 |
urious | +0.037 |
Neg logit contributions | |
Princess | -0.025 |
better | -0.021 |
, | -0.021 |
matching | -0.020 |
good | -0.020 |
Token | girl |
Token ID | 2576 |
Feature activation | -8.54e-01 |
Pos logit contributions | |
ric | +0.012 |
cigarettes | +0.011 |
ipt | +0.011 |
ho | +0.011 |
umbling | +0.011 |
Neg logit contributions | |
Princess | -0.011 |
chef | -0.010 |
matching | -0.010 |
succeeded | -0.010 |
feast | -0.010 |
Token | was |
Token ID | 373 |
Feature activation | -6.25e-01 |
Pos logit contributions | |
Princess | +0.018 |
chef | +0.017 |
matching | +0.017 |
feast | +0.016 |
succeeded | +0.016 |
Neg logit contributions | |
ric | -0.018 |
cigarettes | -0.017 |
ipt | -0.016 |
ho | -0.016 |
umbling | -0.016 |
Token | scared |
Token ID | 12008 |
Feature activation | +9.79e-01 |
Pos logit contributions | |
Princess | +0.012 |
chef | +0.012 |
is | +0.011 |
feast | +0.011 |
designer | +0.011 |
Neg logit contributions | |
ric | -0.014 |
cigarettes | -0.013 |
umbling | -0.013 |
ipt | -0.012 |
ho | -0.012 |
Token | and |
Token ID | 290 |
Feature activation | +8.87e-01 |
Pos logit contributions | |
ric | +0.027 |
cigarettes | +0.026 |
ipt | +0.026 |
ho | +0.026 |
umbling | +0.025 |
Neg logit contributions | |
Princess | -0.014 |
chef | -0.013 |
matching | -0.012 |
feast | -0.012 |
is | -0.011 |
Token | didn |
Token ID | 1422 |
Feature activation | +2.28e+00 |
Pos logit contributions | |
ric | +0.023 |
ipt | +0.022 |
umbling | +0.021 |
oes | +0.021 |
cigarettes | +0.020 |
Neg logit contributions | |
Princess | -0.017 |
succeeded | -0.015 |
patiently | -0.015 |
, | -0.014 |
chef | -0.014 |
Token | 't |
Token ID | 470 |
Feature activation | +3.26e+00 |
Pos logit contributions | |
ric | +0.061 |
cigarettes | +0.060 |
ipt | +0.059 |
urious | +0.057 |
ho | +0.057 |
Neg logit contributions | |
Princess | -0.036 |
better | -0.034 |
, | -0.033 |
good | -0.032 |
matching | -0.032 |
Token | know |
Token ID | 760 |
Feature activation | +6.23e-01 |
Pos logit contributions | |
ric | +0.061 |
ipt | +0.061 |
cigarettes | +0.055 |
ho | +0.055 |
umbling | +0.054 |
Neg logit contributions | |
Princess | -0.081 |
chef | -0.075 |
succeeded | -0.073 |
feast | -0.072 |
matching | -0.072 |
Token | what |
Token ID | 644 |
Feature activation | -6.26e-01 |
Pos logit contributions | |
ric | +0.014 |
ipt | +0.014 |
umbling | +0.014 |
cigarettes | +0.013 |
ho | +0.013 |
Neg logit contributions | |
Princess | -0.012 |
chef | -0.011 |
matching | -0.011 |
patiently | -0.010 |
is | -0.010 |
Token | to |
Token ID | 284 |
Feature activation | +3.35e-01 |
Pos logit contributions | |
Princess | +0.011 |
chef | +0.011 |
matching | +0.010 |
succeeded | +0.010 |
feast | +0.009 |
Neg logit contributions | |
ric | -0.016 |
ipt | -0.015 |
umbling | -0.015 |
cigarettes | -0.014 |
ho | -0.014 |
Token | do |
Token ID | 466 |
Feature activation | -9.55e-01 |
Pos logit contributions | |
ric | +0.009 |
ipt | +0.009 |
spe | +0.009 |
umbling | +0.009 |
ho | +0.009 |
Neg logit contributions | |
party | -0.005 |
, | -0.005 |
feast | -0.005 |
matching | -0.005 |
Princess | -0.005 |
Token | . |
Token ID | 13 |
Feature activation | +1.77e-01 |
Pos logit contributions | |
Princess | +0.017 |
chef | +0.016 |
matching | +0.016 |
succeeded | +0.015 |
is | +0.015 |
Neg logit contributions | |
ric | -0.024 |
ipt | -0.022 |
cigarettes | -0.022 |
ho | -0.022 |
umbling | -0.021 |
Token | box |
Token ID | 3091 |
Feature activation | +4.57e-01 |
Pos logit contributions | |
Princess | +0.003 |
parties | +0.003 |
chef | +0.003 |
succeeded | +0.003 |
feast | +0.003 |
Neg logit contributions | |
cigarettes | -0.003 |
ric | -0.002 |
ho | -0.002 |
umbling | -0.002 |
urious | -0.002 |
Token | and |
Token ID | 290 |
Feature activation | +4.55e-01 |
Pos logit contributions | |
ric | +0.012 |
cigarettes | +0.011 |
ipt | +0.011 |
ho | +0.011 |
umbling | +0.011 |
Neg logit contributions | |
Princess | -0.007 |
chef | -0.007 |
matching | -0.007 |
feast | -0.006 |
is | -0.006 |
Token | pull |
Token ID | 2834 |
Feature activation | +1.10e+00 |
Pos logit contributions | |
ric | +0.011 |
cigarettes | +0.010 |
ipt | +0.010 |
ho | +0.010 |
umbling | +0.010 |
Neg logit contributions | |
Princess | -0.009 |
chef | -0.008 |
matching | -0.008 |
feast | -0.007 |
is | -0.007 |
Token | it |
Token ID | 340 |
Feature activation | +7.23e-01 |
Pos logit contributions | |
ric | +0.025 |
cigarettes | +0.024 |
ipt | +0.024 |
ho | +0.023 |
umbling | +0.023 |
Neg logit contributions | |
Princess | -0.022 |
chef | -0.020 |
feast | -0.019 |
matching | -0.019 |
designer | -0.018 |
Token | tight |
Token ID | 5381 |
Feature activation | +1.45e+00 |
Pos logit contributions | |
ric | +0.020 |
cigarettes | +0.019 |
ipt | +0.018 |
ho | +0.018 |
umbling | +0.018 |
Neg logit contributions | |
Princess | -0.011 |
chef | -0.010 |
matching | -0.010 |
patiently | -0.010 |
succeeded | -0.009 |
Token | . |
Token ID | 13 |
Feature activation | -3.17e-01 |
Pos logit contributions | |
ric | +0.028 |
cigarettes | +0.028 |
ho | +0.027 |
ipt | +0.027 |
umbling | +0.027 |
Neg logit contributions | |
Princess | -0.032 |
chef | -0.031 |
feast | -0.028 |
is | -0.028 |
designer | -0.028 |
Token |
|
Token ID | 198 |
Feature activation | +1.01e+00 |
Pos logit contributions | |
Princess | +0.005 |
chef | +0.005 |
feast | +0.005 |
is | +0.005 |
designer | +0.005 |
Neg logit contributions | |
ric | -0.008 |
ipt | -0.008 |
cigarettes | -0.008 |
umbling | -0.007 |
ho | -0.007 |
Token |
|
Token ID | 198 |
Feature activation | +7.42e-01 |
Pos logit contributions | |
ric | +0.019 |
ipt | +0.018 |
cigarettes | +0.018 |
umbling | +0.017 |
ho | +0.017 |
Neg logit contributions | |
Princess | -0.023 |
chef | -0.023 |
feast | -0.021 |
matching | -0.021 |
designer | -0.021 |
Token | Jimmy |
Token ID | 40335 |
Feature activation | -1.04e-01 |
Pos logit contributions | |
ric | +0.020 |
ipt | +0.019 |
cigarettes | +0.019 |
umbling | +0.019 |
ho | +0.018 |
Neg logit contributions | |
Princess | -0.011 |
chef | -0.010 |
feast | -0.010 |
matching | -0.009 |
designer | -0.009 |
Token | wrapped |
Token ID | 12908 |
Feature activation | +5.52e-02 |
Pos logit contributions | |
Princess | +0.002 |
chef | +0.002 |
matching | +0.002 |
succeeded | +0.002 |
is | +0.002 |
Neg logit contributions | |
ric | -0.002 |
cigarettes | -0.002 |
ipt | -0.002 |
ho | -0.002 |
umbling | -0.002 |
Token | the |
Token ID | 262 |
Feature activation | +2.52e-01 |
Pos logit contributions | |
ric | +0.001 |
cigarettes | +0.001 |
ipt | +0.001 |
ho | +0.001 |
umbling | +0.001 |
Neg logit contributions | |
Princess | -0.001 |
chef | -0.001 |
feast | -0.001 |
party | -0.001 |
succeeded | -0.001 |
Token | she |
Token ID | 673 |
Feature activation | -1.57e+00 |
Pos logit contributions | |
ric | +0.043 |
umbling | +0.038 |
cigarettes | +0.037 |
ipt | +0.037 |
ho | +0.037 |
Neg logit contributions | |
Princess | -0.030 |
chef | -0.027 |
matching | -0.025 |
party | -0.025 |
parties | -0.024 |
Token | knew |
Token ID | 2993 |
Feature activation | +9.65e-01 |
Pos logit contributions | |
Princess | +0.037 |
chef | +0.036 |
is | +0.034 |
feast | +0.034 |
matching | +0.034 |
Neg logit contributions | |
ric | -0.027 |
ho | -0.026 |
cigarettes | -0.025 |
ipt | -0.024 |
spe | -0.023 |
Token | she |
Token ID | 673 |
Feature activation | -5.96e-01 |
Pos logit contributions | |
ric | +0.022 |
cigarettes | +0.021 |
ipt | +0.021 |
umbling | +0.020 |
ho | +0.020 |
Neg logit contributions | |
Princess | -0.019 |
chef | -0.018 |
matching | -0.017 |
feast | -0.016 |
is | -0.016 |
Token | was |
Token ID | 373 |
Feature activation | -4.27e-01 |
Pos logit contributions | |
Princess | +0.014 |
chef | +0.014 |
matching | +0.013 |
feast | +0.013 |
is | +0.013 |
Neg logit contributions | |
ric | -0.011 |
cigarettes | -0.010 |
ipt | -0.010 |
ho | -0.010 |
umbling | -0.009 |
Token | going |
Token ID | 1016 |
Feature activation | -1.67e+00 |
Pos logit contributions | |
is | +0.009 |
Princess | +0.009 |
feast | +0.009 |
chef | +0.009 |
ival | +0.009 |
Neg logit contributions | |
cigarettes | -0.007 |
ric | -0.007 |
beh | -0.006 |
ho | -0.006 |
umbling | -0.006 |
Token | to |
Token ID | 284 |
Feature activation | +4.00e-02 |
Pos logit contributions | |
Princess | +0.042 |
chef | +0.040 |
succeeded | +0.040 |
is | +0.038 |
designer | +0.037 |
Neg logit contributions | |
ric | -0.026 |
cigarettes | -0.025 |
oes | -0.024 |
ipt | -0.023 |
umbling | -0.023 |
Token | have |
Token ID | 423 |
Feature activation | -1.97e+00 |
Pos logit contributions | |
ric | +0.001 |
cigarettes | +0.001 |
ipt | +0.001 |
ho | +0.001 |
umbling | +0.001 |
Neg logit contributions | |
Princess | -0.001 |
chef | -0.001 |
matching | -0.001 |
is | -0.001 |
succeeded | -0.001 |
Token | a |
Token ID | 257 |
Feature activation | -3.63e+00 |
Pos logit contributions | |
Princess | +0.036 |
chef | +0.035 |
designer | +0.033 |
is | +0.032 |
patiently | +0.032 |
Neg logit contributions | |
cigarettes | -0.044 |
ric | -0.043 |
ipt | -0.040 |
ho | -0.040 |
umbling | -0.038 |
Token | new |
Token ID | 649 |
Feature activation | -1.73e+00 |
Pos logit contributions | |
succeeded | +0.048 |
ier | +0.045 |
Princess | +0.044 |
orting | +0.042 |
is | +0.042 |
Neg logit contributions | |
ho | -0.088 |
cigarettes | -0.087 |
ric | -0.084 |
ostr | -0.083 |
Un | -0.080 |
Token | brother |
Token ID | 3956 |
Feature activation | -1.05e+00 |
Pos logit contributions | |
Princess | +0.028 |
succeeded | +0.027 |
Benjamin | +0.026 |
is | +0.025 |
chef | +0.025 |
Neg logit contributions | |
cigarettes | -0.040 |
ho | -0.039 |
ric | -0.038 |
ipt | -0.037 |
spe | -0.036 |
Token | or |
Token ID | 393 |
Feature activation | -2.36e-01 |
Pos logit contributions | |
Princess | +0.021 |
chef | +0.020 |
matching | +0.019 |
is | +0.019 |
feast | +0.019 |
Neg logit contributions | |
ric | -0.022 |
cigarettes | -0.022 |
ipt | -0.021 |
ho | -0.021 |
umbling | -0.020 |
Token | asked |
Token ID | 1965 |
Feature activation | +6.39e-01 |
Pos logit contributions | |
Princess | +0.004 |
chef | +0.004 |
matching | +0.004 |
feast | +0.004 |
is | +0.004 |
Neg logit contributions | |
ric | -0.005 |
cigarettes | -0.004 |
ipt | -0.004 |
ho | -0.004 |
umbling | -0.004 |
Token | what |
Token ID | 644 |
Feature activation | -1.88e-01 |
Pos logit contributions | |
cigarettes | +0.015 |
ric | +0.015 |
ho | +0.014 |
ipt | +0.014 |
spe | +0.014 |
Neg logit contributions | |
Princess | -0.011 |
chef | -0.011 |
is | -0.010 |
succeeded | -0.010 |
designer | -0.010 |
Token | they |
Token ID | 484 |
Feature activation | -4.88e-01 |
Pos logit contributions | |
Princess | +0.004 |
chef | +0.004 |
matching | +0.004 |
succeeded | +0.003 |
feast | +0.003 |
Neg logit contributions | |
ric | -0.004 |
ipt | -0.004 |
umbling | -0.004 |
cigarettes | -0.004 |
ho | -0.004 |
Token | were |
Token ID | 547 |
Feature activation | -1.27e-01 |
Pos logit contributions | |
Princess | +0.010 |
succeeded | +0.010 |
chef | +0.010 |
matching | +0.010 |
feast | +0.010 |
Neg logit contributions | |
ric | -0.010 |
ipt | -0.010 |
cigarettes | -0.010 |
umbling | -0.009 |
ho | -0.009 |
Token | doing |
Token ID | 1804 |
Feature activation | -2.61e+00 |
Pos logit contributions | |
Princess | +0.003 |
chef | +0.003 |
superhero | +0.003 |
designer | +0.002 |
Benjamin | +0.002 |
Neg logit contributions | |
ric | -0.003 |
cigarettes | -0.003 |
ho | -0.002 |
umbling | -0.002 |
ipt | -0.002 |
Token | . |
Token ID | 13 |
Feature activation | -3.23e-01 |
Pos logit contributions | |
Princess | +0.054 |
chef | +0.051 |
matching | +0.047 |
feast | +0.047 |
is | +0.047 |
Neg logit contributions | |
ric | -0.056 |
cigarettes | -0.054 |
ipt | -0.052 |
ho | -0.051 |
umbling | -0.051 |
Token | One |
Token ID | 1881 |
Feature activation | +2.32e+00 |
Pos logit contributions | |
Princess | +0.007 |
chef | +0.007 |
matching | +0.006 |
patiently | +0.006 |
succeeded | +0.006 |
Neg logit contributions | |
ric | -0.007 |
cigarettes | -0.006 |
ipt | -0.006 |
umbling | -0.006 |
ho | -0.006 |
Token | of |
Token ID | 286 |
Feature activation | +1.36e+00 |
Pos logit contributions | |
ric | +0.059 |
ipt | +0.054 |
cigarettes | +0.052 |
umbling | +0.051 |
ho | +0.051 |
Neg logit contributions | |
chef | -0.042 |
Princess | -0.041 |
, | -0.039 |
feast | -0.037 |
matching | -0.037 |
Token | the |
Token ID | 262 |
Feature activation | +9.67e-01 |
Pos logit contributions | |
ric | +0.041 |
umbling | +0.041 |
ho | +0.041 |
cigarettes | +0.040 |
ipt | +0.039 |
Neg logit contributions | |
Princess | -0.016 |
chef | -0.014 |
matching | -0.014 |
is | -0.013 |
patiently | -0.012 |
Token | men |
Token ID | 1450 |
Feature activation | +2.10e+00 |
Pos logit contributions | |
ric | +0.020 |
cigarettes | +0.018 |
ipt | +0.018 |
ho | +0.018 |
umbling | +0.017 |
Neg logit contributions | |
Princess | -0.022 |
chef | -0.021 |
matching | -0.020 |
is | -0.019 |
feast | -0.019 |
Token | said |
Token ID | 531 |
Feature activation | -1.17e-01 |
Pos logit contributions | |
ric | +0.052 |
cigarettes | +0.049 |
ipt | +0.049 |
umbling | +0.047 |
ho | +0.046 |
Neg logit contributions | |
Princess | -0.036 |
chef | -0.035 |
matching | -0.033 |
succeeded | -0.032 |
feast | -0.032 |
Token | saved |
Token ID | 7448 |
Feature activation | +3.42e-01 |
Pos logit contributions | |
ric | +0.017 |
ipt | +0.016 |
umbling | +0.016 |
cigarettes | +0.016 |
ho | +0.015 |
Neg logit contributions | |
Princess | -0.012 |
chef | -0.011 |
matching | -0.010 |
succeeded | -0.010 |
patiently | -0.010 |
Token | him |
Token ID | 683 |
Feature activation | +4.65e-01 |
Pos logit contributions | |
ric | +0.008 |
ipt | +0.008 |
umbling | +0.007 |
ho | +0.007 |
cigarettes | +0.007 |
Neg logit contributions | |
Princess | -0.007 |
chef | -0.006 |
matching | -0.006 |
success | -0.006 |
parties | -0.005 |
Token | . |
Token ID | 13 |
Feature activation | -2.20e-01 |
Pos logit contributions | |
ipt | +0.013 |
ric | +0.013 |
ho | +0.012 |
cigarettes | +0.012 |
umbling | +0.011 |
Neg logit contributions | |
Princess | -0.007 |
, | -0.007 |
better | -0.007 |
matching | -0.006 |
party | -0.006 |
Token | |
Token ID | 220 |
Feature activation | +1.54e+00 |
Pos logit contributions | |
Princess | +0.005 |
chef | +0.004 |
Penny | +0.004 |
Benjamin | +0.004 |
chess | +0.004 |
Neg logit contributions | |
ric | -0.005 |
cigarettes | -0.005 |
ho | -0.005 |
umbling | -0.005 |
ipt | -0.005 |
Token |
|
Token ID | 198 |
Feature activation | +5.84e-01 |
Pos logit contributions | |
ho | +0.038 |
cigarettes | +0.038 |
ric | +0.038 |
umbling | +0.036 |
ipt | +0.036 |
Neg logit contributions | |
Princess | -0.025 |
chef | -0.022 |
matching | -0.021 |
succeeded | -0.021 |
patiently | -0.021 |
Token |
|
Token ID | 198 |
Feature activation | +3.71e-01 |
Pos logit contributions | |
ric | +0.014 |
cigarettes | +0.014 |
ho | +0.013 |
ipt | +0.013 |
umbling | +0.013 |
Neg logit contributions | |
Princess | -0.011 |
chef | -0.010 |
is | -0.010 |
matching | -0.010 |
succeeded | -0.009 |
Token | Unfortunately |
Token ID | 13898 |
Feature activation | -5.09e-01 |
Pos logit contributions | |
ric | +0.010 |
cigarettes | +0.009 |
ho | +0.009 |
oes | +0.009 |
angs | +0.008 |
Neg logit contributions | |
Princess | -0.007 |
is | -0.006 |
chef | -0.006 |
matching | -0.006 |
succeeded | -0.006 |
Token | , |
Token ID | 11 |
Feature activation | -3.52e-01 |
Pos logit contributions | |
Princess | +0.006 |
chef | +0.006 |
succeeded | +0.005 |
is | +0.005 |
judges | +0.005 |
Neg logit contributions | |
cigarettes | -0.015 |
ho | -0.014 |
ric | -0.014 |
spe | -0.014 |
ipt | -0.013 |
Token | no |
Token ID | 645 |
Feature activation | +1.01e+00 |
Pos logit contributions | |
Princess | +0.006 |
chef | +0.006 |
matching | +0.006 |
feast | +0.006 |
succeeded | +0.005 |
Neg logit contributions | |
ric | -0.008 |
cigarettes | -0.008 |
ipt | -0.008 |
ho | -0.008 |
umbling | -0.008 |
Token | one |
Token ID | 530 |
Feature activation | +1.83e+00 |
Pos logit contributions | |
cigarettes | +0.020 |
ric | +0.019 |
ipt | +0.018 |
ho | +0.018 |
umbling | +0.018 |
Neg logit contributions | |
Princess | -0.022 |
chef | -0.021 |
succeeded | -0.020 |
matching | -0.020 |
feast | -0.020 |
Token | ever |
Token ID | 1683 |
Feature activation | +6.56e-01 |
Pos logit contributions | |
ric | +0.045 |
ipt | +0.041 |
cigarettes | +0.039 |
umbling | +0.039 |
ho | +0.037 |
Neg logit contributions | |
Princess | -0.035 |
chef | -0.035 |
succeeded | -0.033 |
feast | -0.032 |
party | -0.032 |
Token | feel |
Token ID | 1254 |
Feature activation | -1.78e+00 |
Pos logit contributions | |
ipt | +0.013 |
cigarettes | +0.013 |
ric | +0.013 |
umbling | +0.012 |
ho | +0.012 |
Neg logit contributions | |
Princess | -0.007 |
best | -0.006 |
success | -0.006 |
matching | -0.006 |
party | -0.005 |
Token | like |
Token ID | 588 |
Feature activation | -3.90e-01 |
Pos logit contributions | |
chef | +0.035 |
Princess | +0.033 |
feast | +0.033 |
is | +0.032 |
matching | +0.032 |
Neg logit contributions | |
ric | -0.035 |
cigarettes | -0.035 |
umbling | -0.034 |
spe | -0.032 |
ho | -0.032 |
Token | she |
Token ID | 673 |
Feature activation | -9.79e-01 |
Pos logit contributions | |
Princess | +0.008 |
chef | +0.007 |
matching | +0.007 |
feast | +0.007 |
is | +0.007 |
Neg logit contributions | |
ric | -0.008 |
cigarettes | -0.008 |
ho | -0.008 |
ipt | -0.008 |
umbling | -0.008 |
Token | belonged |
Token ID | 19611 |
Feature activation | -2.62e+00 |
Pos logit contributions | |
Princess | +0.021 |
chef | +0.019 |
succeeded | +0.019 |
matching | +0.019 |
feast | +0.018 |
Neg logit contributions | |
ric | -0.021 |
cigarettes | -0.021 |
ipt | -0.020 |
umbling | -0.019 |
ho | -0.019 |
Token | . |
Token ID | 13 |
Feature activation | -2.03e-01 |
Pos logit contributions | |
Princess | +0.052 |
chef | +0.050 |
feast | +0.045 |
is | +0.045 |
succeeded | +0.045 |
Neg logit contributions | |
ric | -0.056 |
cigarettes | -0.055 |
ipt | -0.054 |
ho | -0.054 |
umbling | -0.053 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | -5.91e-01 |
Pos logit contributions | |
Princess | +0.004 |
chef | +0.004 |
matching | +0.003 |
succeeded | +0.003 |
parties | +0.003 |
Neg logit contributions | |
ric | -0.005 |
cigarettes | -0.005 |
ho | -0.005 |
umbling | -0.004 |
ipt | -0.004 |
Token | Once |
Token ID | 7454 |
Feature activation | -4.68e-01 |
Pos logit contributions | |
Princess | +0.012 |
chef | +0.012 |
matching | +0.011 |
feast | +0.011 |
is | +0.011 |
Neg logit contributions | |
ric | -0.012 |
cigarettes | -0.012 |
ipt | -0.012 |
ho | -0.011 |
umbling | -0.011 |
Token | upon |
Token ID | 2402 |
Feature activation | +6.12e-01 |
Pos logit contributions | |
Princess | +0.011 |
chef | +0.010 |
, | +0.010 |
matching | +0.010 |
patiently | +0.010 |
Neg logit contributions | |
ric | -0.009 |
cigarettes | -0.009 |
umbling | -0.008 |
ipt | -0.008 |
beh | -0.008 |
Token | a |
Token ID | 257 |
Feature activation | -2.47e+00 |
Pos logit contributions | |
ric | +0.020 |
ho | +0.020 |
ipt | +0.019 |
cigarettes | +0.019 |
umbling | +0.019 |
Neg logit contributions | |
Princess | -0.007 |
matching | -0.006 |
chef | -0.005 |
good | -0.005 |
better | -0.005 |
Token | time |
Token ID | 640 |
Feature activation | -1.84e+00 |
Pos logit contributions | |
Princess | +0.040 |
chef | +0.038 |
designer | +0.034 |
matching | +0.034 |
succeeded | +0.033 |
Neg logit contributions | |
ric | -0.063 |
cigarettes | -0.061 |
ipt | -0.059 |
ho | -0.059 |
umbling | -0.059 |
Token | , |
Token ID | 11 |
Feature activation | -6.63e-01 |
Pos logit contributions | |
Princess | +0.012 |
chef | +0.010 |
matching | +0.008 |
feast | +0.007 |
is | +0.007 |
Neg logit contributions | |
ric | -0.066 |
cigarettes | -0.064 |
ho | -0.063 |
ipt | -0.063 |
umbling | -0.062 |
Token | fl |
Token ID | 781 |
Feature activation | +1.44e+00 |
Pos logit contributions | |
Princess | +0.028 |
chef | +0.027 |
feast | +0.026 |
succeeded | +0.026 |
parties | +0.025 |
Neg logit contributions | |
ric | -0.022 |
cigarettes | -0.021 |
umbling | -0.021 |
ho | -0.021 |
ipt | -0.020 |
Token | ute |
Token ID | 1133 |
Feature activation | -1.52e+00 |
Pos logit contributions | |
cigarettes | +0.034 |
ric | +0.033 |
ipt | +0.031 |
ho | +0.031 |
Warning | +0.030 |
Neg logit contributions | |
Princess | -0.025 |
chef | -0.025 |
is | -0.024 |
designer | -0.024 |
matching | -0.023 |
Token | together |
Token ID | 1978 |
Feature activation | -1.88e+00 |
Pos logit contributions | |
Princess | +0.023 |
chef | +0.022 |
succeeded | +0.020 |
is | +0.019 |
matching | +0.019 |
Neg logit contributions | |
ric | -0.039 |
cigarettes | -0.038 |
ho | -0.038 |
umbling | -0.037 |
ipt | -0.037 |
Token | , |
Token ID | 11 |
Feature activation | -7.39e-01 |
Pos logit contributions | |
Princess | +0.034 |
chef | +0.032 |
succeeded | +0.030 |
feast | +0.029 |
designer | +0.029 |
Neg logit contributions | |
ric | -0.043 |
ho | -0.042 |
cigarettes | -0.041 |
umbling | -0.039 |
ipt | -0.039 |
Token | making |
Token ID | 1642 |
Feature activation | -1.44e+00 |
Pos logit contributions | |
Princess | +0.014 |
chef | +0.013 |
feast | +0.012 |
succeeded | +0.012 |
is | +0.012 |
Neg logit contributions | |
ric | -0.017 |
ho | -0.016 |
cigarettes | -0.016 |
spe | -0.015 |
ipt | -0.015 |
Token | a |
Token ID | 257 |
Feature activation | -7.70e-01 |
Pos logit contributions | |
Princess | +0.025 |
chef | +0.024 |
feast | +0.022 |
designer | +0.022 |
matching | +0.021 |
Neg logit contributions | |
ric | -0.035 |
cigarettes | -0.033 |
ipt | -0.033 |
umbling | -0.032 |
ho | -0.032 |
Token | sweet |
Token ID | 6029 |
Feature activation | -1.09e+00 |
Pos logit contributions | |
Princess | +0.013 |
chef | +0.013 |
succeeded | +0.012 |
parties | +0.012 |
designer | +0.011 |
Neg logit contributions | |
ric | -0.019 |
cigarettes | -0.017 |
ho | -0.017 |
spe | -0.017 |
umbling | -0.016 |
Token | and |
Token ID | 290 |
Feature activation | +1.35e-01 |
Pos logit contributions | |
Princess | +0.026 |
chef | +0.025 |
succeeded | +0.025 |
designer | +0.023 |
matching | +0.023 |
Neg logit contributions | |
ric | -0.019 |
cigarettes | -0.018 |
ho | -0.017 |
ipt | -0.016 |
umbling | -0.016 |
Token | joyful |
Token ID | 48977 |
Feature activation | -7.51e-01 |
Pos logit contributions | |
ric | +0.004 |
cigarettes | +0.003 |
ipt | +0.003 |
ho | +0.003 |
umbling | +0.003 |
Neg logit contributions | |
Princess | -0.002 |
chef | -0.002 |
feast | -0.002 |
is | -0.002 |
matching | -0.002 |
Token | music |
Token ID | 2647 |
Feature activation | -8.35e-01 |
Pos logit contributions | |
Princess | +0.017 |
chef | +0.017 |
succeeded | +0.016 |
matching | +0.015 |
designer | +0.015 |
Neg logit contributions | |
ric | -0.014 |
cigarettes | -0.013 |
ho | -0.012 |
ipt | -0.012 |
umbling | -0.012 |
Token | . |
Token ID | 13 |
Feature activation | -1.84e-01 |
Pos logit contributions | |
Princess | +0.017 |
chef | +0.016 |
matching | +0.015 |
is | +0.015 |
feast | +0.015 |
Neg logit contributions | |
ric | -0.018 |
cigarettes | -0.017 |
ipt | -0.017 |
ho | -0.017 |
umbling | -0.017 |
Token | aw |
Token ID | 707 |
Feature activation | -8.26e-01 |
Pos logit contributions | |
Princess | +0.000 |
chef | +0.000 |
designer | +0.000 |
matching | +0.000 |
judges | +0.000 |
Neg logit contributions | |
ipt | -0.000 |
ric | -0.000 |
umbling | -0.000 |
urious | -0.000 |
beh | -0.000 |
Token | ked |
Token ID | 9091 |
Feature activation | +7.13e-01 |
Pos logit contributions | |
Princess | +0.023 |
chef | +0.022 |
designer | +0.021 |
feast | +0.021 |
parties | +0.021 |
Neg logit contributions | |
ho | -0.013 |
ric | -0.011 |
umbling | -0.011 |
cigarettes | -0.010 |
os | -0.010 |
Token | along |
Token ID | 1863 |
Feature activation | +4.01e-01 |
Pos logit contributions | |
ric | +0.015 |
cigarettes | +0.015 |
ipt | +0.014 |
ho | +0.014 |
umbling | +0.014 |
Neg logit contributions | |
Princess | -0.015 |
chef | -0.014 |
matching | -0.013 |
feast | -0.013 |
is | -0.013 |
Token | . |
Token ID | 13 |
Feature activation | -1.25e+00 |
Pos logit contributions | |
ric | +0.008 |
cigarettes | +0.008 |
ho | +0.008 |
ipt | +0.008 |
spe | +0.007 |
Neg logit contributions | |
Princess | -0.008 |
chef | -0.008 |
feast | -0.008 |
succeeded | -0.007 |
is | -0.007 |
Token | The |
Token ID | 383 |
Feature activation | -7.59e-02 |
Pos logit contributions | |
Princess | +0.028 |
chef | +0.028 |
matching | +0.026 |
is | +0.026 |
designer | +0.026 |
Neg logit contributions | |
ric | -0.024 |
cigarettes | -0.022 |
ipt | -0.022 |
umbling | -0.021 |
ho | -0.021 |
Token | pirate |
Token ID | 25868 |
Feature activation | -1.56e+00 |
Pos logit contributions | |
Princess | +0.001 |
patiently | +0.001 |
chef | +0.001 |
succeeded | +0.001 |
parties | +0.001 |
Neg logit contributions | |
ric | -0.002 |
cigarettes | -0.002 |
ho | -0.002 |
ipt | -0.002 |
spe | -0.002 |
Token | realized |
Token ID | 6939 |
Feature activation | +6.76e-01 |
Pos logit contributions | |
Princess | +0.037 |
chef | +0.036 |
parties | +0.034 |
feast | +0.033 |
matching | +0.033 |
Neg logit contributions | |
ric | -0.026 |
spe | -0.025 |
ho | -0.025 |
ipt | -0.025 |
cigarettes | -0.025 |
Token | that |
Token ID | 326 |
Feature activation | +1.02e+00 |
Pos logit contributions | |
ric | +0.016 |
cigarettes | +0.016 |
ipt | +0.015 |
umbling | +0.015 |
ho | +0.015 |
Neg logit contributions | |
Princess | -0.012 |
chef | -0.012 |
matching | -0.011 |
is | -0.010 |
feast | -0.010 |
Token | even |
Token ID | 772 |
Feature activation | +1.61e+00 |
Pos logit contributions | |
ric | +0.026 |
ipt | +0.025 |
umbling | +0.024 |
cigarettes | +0.024 |
ho | +0.024 |
Neg logit contributions | |
Princess | -0.019 |
chef | -0.017 |
matching | -0.017 |
success | -0.016 |
chess | -0.016 |
Token | though |
Token ID | 996 |
Feature activation | +1.68e+00 |
Pos logit contributions | |
ric | +0.034 |
cigarettes | +0.034 |
ipt | +0.032 |
umbling | +0.031 |
ho | +0.031 |
Neg logit contributions | |
Princess | -0.032 |
chef | -0.031 |
feast | -0.029 |
matching | -0.029 |
succeeded | -0.029 |
Token | things |
Token ID | 1243 |
Feature activation | +8.72e-01 |
Pos logit contributions | |
ric | +0.040 |
ipt | +0.038 |
cigarettes | +0.038 |
umbling | +0.038 |
ho | +0.037 |
Neg logit contributions | |
Princess | -0.032 |
chef | -0.029 |
matching | -0.028 |
feast | -0.027 |
success | -0.027 |
Token | were |
Token ID | 547 |
Feature activation | +1.10e-02 |
Pos logit contributions | |
Princess | +0.031 |
chef | +0.030 |
matching | +0.028 |
feast | +0.028 |
is | +0.028 |
Neg logit contributions | |
ric | -0.023 |
cigarettes | -0.022 |
ipt | -0.021 |
ho | -0.021 |
umbling | -0.020 |
Token | going |
Token ID | 1016 |
Feature activation | -1.68e+00 |
Pos logit contributions | |
ric | +0.000 |
cigarettes | +0.000 |
ipt | +0.000 |
umbling | +0.000 |
ho | +0.000 |
Neg logit contributions | |
Princess | -0.000 |
chef | -0.000 |
feast | -0.000 |
is | -0.000 |
designer | -0.000 |
Token | to |
Token ID | 284 |
Feature activation | +5.88e-01 |
Pos logit contributions | |
Princess | +0.041 |
chef | +0.038 |
is | +0.036 |
designer | +0.035 |
succeeded | +0.035 |
Neg logit contributions | |
ric | -0.032 |
oes | -0.029 |
ipt | -0.028 |
cigarettes | -0.028 |
ho | -0.027 |
Token | play |
Token ID | 711 |
Feature activation | -4.51e+00 |
Pos logit contributions | |
cigarettes | +0.016 |
umbling | +0.016 |
ipt | +0.016 |
ho | +0.015 |
ric | +0.015 |
Neg logit contributions | |
Princess | -0.010 |
matching | -0.009 |
chef | -0.009 |
party | -0.008 |
parties | -0.008 |
Token | a |
Token ID | 257 |
Feature activation | -2.23e+00 |
Pos logit contributions | |
Princess | +0.089 |
chef | +0.086 |
designer | +0.084 |
is | +0.083 |
Majesty | +0.078 |
Neg logit contributions | |
cigarettes | -0.088 |
ipt | -0.084 |
os | -0.082 |
ric | -0.082 |
ho | -0.079 |
Token | game |
Token ID | 983 |
Feature activation | -1.06e+00 |
Pos logit contributions | |
Princess | +0.044 |
chef | +0.042 |
succeeded | +0.039 |
feast | +0.038 |
matching | +0.038 |
Neg logit contributions | |
ric | -0.048 |
cigarettes | -0.046 |
ho | -0.045 |
ipt | -0.044 |
umbling | -0.043 |
Token | of |
Token ID | 286 |
Feature activation | -1.37e+00 |
Pos logit contributions | |
Princess | +0.021 |
chef | +0.020 |
matching | +0.019 |
feast | +0.018 |
is | +0.018 |
Neg logit contributions | |
ric | -0.024 |
cigarettes | -0.023 |
ipt | -0.022 |
ho | -0.022 |
umbling | -0.022 |
Token | tag |
Token ID | 7621 |
Feature activation | -3.14e-01 |
Pos logit contributions | |
Princess | +0.023 |
chef | +0.022 |
designer | +0.021 |
is | +0.020 |
feast | +0.020 |
Neg logit contributions | |
ric | -0.033 |
cigarettes | -0.032 |
ipt | -0.031 |
ho | -0.030 |
umbling | -0.030 |
Token | . |
Token ID | 13 |
Feature activation | -6.72e-01 |
Pos logit contributions | |
Princess | +0.006 |
chef | +0.005 |
matching | +0.005 |
party | +0.005 |
feast | +0.005 |
Neg logit contributions | |
ric | -0.008 |
cigarettes | -0.007 |
ipt | -0.007 |
umbling | -0.007 |
ho | -0.007 |
Token | One |
Token ID | 1881 |
Feature activation | +1.37e+00 |
Pos logit contributions | |
Princess | +0.014 |
chef | +0.013 |
matching | +0.012 |
succeeded | +0.012 |
patiently | +0.012 |
Neg logit contributions | |
ric | -0.015 |
cigarettes | -0.014 |
ho | -0.014 |
umbling | -0.014 |
ipt | -0.014 |
Token | of |
Token ID | 286 |
Feature activation | +7.42e-01 |
Pos logit contributions | |
ric | +0.041 |
cigarettes | +0.037 |
keeping | +0.036 |
ipt | +0.036 |
beh | +0.036 |
Neg logit contributions | |
, | -0.022 |
Princess | -0.021 |
party | -0.020 |
chef | -0.020 |
good | -0.020 |
Token | not |
Token ID | 407 |
Feature activation | +1.69e+00 |
Pos logit contributions | |
cigarettes | +0.007 |
ric | +0.007 |
beh | +0.006 |
run | +0.006 |
oes | +0.006 |
Neg logit contributions | |
feast | -0.006 |
chef | -0.006 |
Princess | -0.006 |
is | -0.006 |
designer | -0.006 |
Token | boring |
Token ID | 14262 |
Feature activation | -9.85e-01 |
Pos logit contributions | |
ric | +0.040 |
cigarettes | +0.040 |
umbling | +0.037 |
ipt | +0.037 |
ho | +0.036 |
Neg logit contributions | |
Princess | -0.029 |
chef | -0.028 |
feast | -0.027 |
is | -0.025 |
matching | -0.025 |
Token | , |
Token ID | 11 |
Feature activation | +6.76e-01 |
Pos logit contributions | |
Princess | +0.018 |
chef | +0.017 |
feast | +0.016 |
matching | +0.016 |
succeeded | +0.016 |
Neg logit contributions | |
cigarettes | -0.022 |
ric | -0.022 |
umbling | -0.021 |
ho | -0.021 |
ipt | -0.021 |
Token | it |
Token ID | 340 |
Feature activation | +1.62e+00 |
Pos logit contributions | |
ric | +0.019 |
ipt | +0.018 |
umbling | +0.018 |
urious | +0.017 |
cigarettes | +0.017 |
Neg logit contributions | |
Princess | -0.011 |
chef | -0.009 |
matching | -0.009 |
is | -0.008 |
party | -0.008 |
Token | was |
Token ID | 373 |
Feature activation | +1.77e+00 |
Pos logit contributions | |
ric | +0.041 |
ipt | +0.040 |
umbling | +0.037 |
ho | +0.036 |
cigarettes | +0.036 |
Neg logit contributions | |
Princess | -0.029 |
succeeded | -0.027 |
matching | -0.027 |
better | -0.026 |
chef | -0.026 |
Token | fun |
Token ID | 1257 |
Feature activation | -8.00e-01 |
Pos logit contributions | |
ric | +0.043 |
cigarettes | +0.042 |
ipt | +0.040 |
ho | +0.040 |
umbling | +0.040 |
Neg logit contributions | |
Princess | -0.031 |
chef | -0.030 |
matching | -0.028 |
feast | -0.027 |
is | -0.027 |
Token | and |
Token ID | 290 |
Feature activation | +2.35e+00 |
Pos logit contributions | |
Princess | +0.015 |
chef | +0.014 |
matching | +0.013 |
feast | +0.012 |
is | +0.012 |
Neg logit contributions | |
ric | -0.019 |
cigarettes | -0.019 |
ipt | -0.018 |
ho | -0.018 |
umbling | -0.018 |
Token | good |
Token ID | 922 |
Feature activation | +4.70e-01 |
Pos logit contributions | |
ric | +0.063 |
ipt | +0.062 |
umbling | +0.059 |
cigarettes | +0.058 |
ho | +0.058 |
Neg logit contributions | |
Princess | -0.038 |
matching | -0.035 |
chef | -0.033 |
succeeded | -0.033 |
success | -0.032 |
Token | for |
Token ID | 329 |
Feature activation | -1.45e-01 |
Pos logit contributions | |
ric | +0.010 |
cigarettes | +0.010 |
ipt | +0.009 |
ho | +0.009 |
umbling | +0.009 |
Neg logit contributions | |
Princess | -0.010 |
chef | -0.009 |
matching | -0.009 |
feast | -0.009 |
is | -0.009 |
Token | her |
Token ID | 607 |
Feature activation | +2.66e-01 |
Pos logit contributions | |
Princess | +0.003 |
chef | +0.003 |
matching | +0.003 |
feast | +0.003 |
succeeded | +0.003 |
Neg logit contributions | |
ric | -0.003 |
cigarettes | -0.003 |
ipt | -0.003 |
ho | -0.003 |
umbling | -0.003 |
Token | . |
Token ID | 13 |
Feature activation | -5.77e-01 |
Pos logit contributions | |
ric | +0.007 |
ipt | +0.006 |
umbling | +0.006 |
cigarettes | +0.006 |
ho | +0.006 |
Neg logit contributions | |
Princess | -0.005 |
chef | -0.004 |
matching | -0.004 |
party | -0.004 |
parties | -0.004 |
Token | it |
Token ID | 340 |
Feature activation | +4.91e-01 |
Pos logit contributions | |
Princess | +0.015 |
chef | +0.015 |
matching | +0.014 |
feast | +0.013 |
succeeded | +0.013 |
Neg logit contributions | |
ric | -0.016 |
cigarettes | -0.015 |
ipt | -0.015 |
ho | -0.015 |
umbling | -0.014 |
Token | to |
Token ID | 284 |
Feature activation | +4.42e-01 |
Pos logit contributions | |
ric | +0.013 |
umbling | +0.013 |
ipt | +0.012 |
cigarettes | +0.012 |
ho | +0.012 |
Neg logit contributions | |
Princess | -0.008 |
matching | -0.007 |
chef | -0.007 |
patiently | -0.007 |
tea | -0.007 |
Token | her |
Token ID | 607 |
Feature activation | +6.77e-02 |
Pos logit contributions | |
umbling | +0.012 |
ipt | +0.011 |
ric | +0.011 |
spe | +0.011 |
cigarettes | +0.011 |
Neg logit contributions | |
Princess | -0.008 |
chef | -0.007 |
matching | -0.007 |
Penny | -0.007 |
parties | -0.006 |
Token | mom |
Token ID | 1995 |
Feature activation | +9.58e-01 |
Pos logit contributions | |
ric | +0.002 |
cigarettes | +0.002 |
ipt | +0.001 |
ho | +0.001 |
umbling | +0.001 |
Neg logit contributions | |
Princess | -0.001 |
chef | -0.001 |
matching | -0.001 |
is | -0.001 |
feast | -0.001 |
Token | my |
Token ID | 1820 |
Feature activation | -7.45e-02 |
Pos logit contributions | |
ipt | +0.019 |
ric | +0.019 |
umbling | +0.019 |
cigarettes | +0.019 |
ho | +0.019 |
Neg logit contributions | |
Princess | -0.021 |
chef | -0.020 |
matching | -0.019 |
is | -0.018 |
patiently | -0.018 |
Token | and |
Token ID | 290 |
Feature activation | -9.71e-01 |
Pos logit contributions | |
Princess | +0.001 |
chef | +0.001 |
matching | +0.001 |
feast | +0.001 |
is | +0.001 |
Neg logit contributions | |
ric | -0.002 |
cigarettes | -0.002 |
ipt | -0.002 |
ho | -0.002 |
umbling | -0.002 |
Token | said |
Token ID | 531 |
Feature activation | -3.72e-01 |
Pos logit contributions | |
Princess | +0.019 |
chef | +0.018 |
matching | +0.017 |
is | +0.017 |
feast | +0.017 |
Neg logit contributions | |
ric | -0.022 |
cigarettes | -0.021 |
ipt | -0.020 |
ho | -0.020 |
umbling | -0.019 |
Token | , |
Token ID | 11 |
Feature activation | +1.42e-01 |
Pos logit contributions | |
is | +0.005 |
feast | +0.005 |
chef | +0.005 |
succeeded | +0.005 |
Princess | +0.005 |
Neg logit contributions | |
ho | -0.010 |
cigarettes | -0.009 |
ric | -0.009 |
spe | -0.008 |
run | -0.008 |
Token | " |
Token ID | 366 |
Feature activation | +9.34e-01 |
Pos logit contributions | |
ric | +0.003 |
cigarettes | +0.003 |
ipt | +0.003 |
umbling | +0.003 |
ho | +0.003 |
Neg logit contributions | |
Princess | -0.003 |
chef | -0.002 |
matching | -0.002 |
patiently | -0.002 |
is | -0.002 |
Token | Look |
Token ID | 8567 |
Feature activation | -3.01e+00 |
Pos logit contributions | |
ric | +0.024 |
cigarettes | +0.022 |
run | +0.021 |
angs | +0.021 |
os | +0.020 |
Neg logit contributions | |
Princess | -0.017 |
is | -0.015 |
chef | -0.015 |
designer | -0.015 |
matching | -0.014 |
Token | , |
Token ID | 11 |
Feature activation | -5.30e-01 |
Pos logit contributions | |
chef | +0.044 |
Princess | +0.043 |
succeeded | +0.042 |
judges | +0.040 |
feast | +0.039 |
Neg logit contributions | |
cigarettes | -0.078 |
el | -0.077 |
ric | -0.076 |
ho | -0.072 |
os | -0.072 |
Token | . |
Token ID | 13 |
Feature activation | -5.31e-01 |
Pos logit contributions | |
Princess | +0.065 |
chef | +0.063 |
matching | +0.059 |
succeeded | +0.058 |
designer | +0.057 |
Neg logit contributions | |
ric | -0.061 |
cigarettes | -0.058 |
ipt | -0.057 |
ho | -0.056 |
umbling | -0.056 |
Token | Today |
Token ID | 6288 |
Feature activation | -6.69e-01 |
Pos logit contributions | |
Princess | +0.011 |
chef | +0.010 |
matching | +0.010 |
parties | +0.010 |
party | +0.010 |
Neg logit contributions | |
ric | -0.012 |
cigarettes | -0.012 |
umbling | -0.011 |
ho | -0.011 |
ipt | -0.011 |
Token | , |
Token ID | 11 |
Feature activation | -1.48e+00 |
Pos logit contributions | |
Princess | +0.012 |
chef | +0.011 |
matching | +0.011 |
party | +0.010 |
feast | +0.010 |
Neg logit contributions | |
ric | -0.017 |
cigarettes | -0.016 |
ipt | -0.015 |
umbling | -0.015 |
ho | -0.015 |
Token | they |
Token ID | 484 |
Feature activation | -5.41e-01 |
Pos logit contributions | |
Princess | +0.029 |
chef | +0.029 |
is | +0.027 |
succeeded | +0.027 |
designer | +0.027 |
Neg logit contributions | |
cigarettes | -0.030 |
ric | -0.030 |
ipt | -0.029 |
ho | -0.029 |
umbling | -0.028 |
Token | were |
Token ID | 547 |
Feature activation | -7.41e-01 |
Pos logit contributions | |
Princess | +0.013 |
chef | +0.013 |
matching | +0.012 |
feast | +0.012 |
is | +0.012 |
Neg logit contributions | |
ric | -0.009 |
cigarettes | -0.009 |
ipt | -0.009 |
ho | -0.009 |
umbling | -0.008 |
Token | playing |
Token ID | 2712 |
Feature activation | -4.35e+00 |
Pos logit contributions | |
Princess | +0.017 |
chef | +0.016 |
is | +0.016 |
feast | +0.015 |
succeeded | +0.015 |
Neg logit contributions | |
ric | -0.013 |
cigarettes | -0.013 |
ipt | -0.012 |
ho | -0.012 |
umbling | -0.012 |
Token | at |
Token ID | 379 |
Feature activation | +4.53e-01 |
Pos logit contributions | |
Princess | +0.086 |
is | +0.084 |
handsome | +0.079 |
chef | +0.078 |
ingu | +0.076 |
Neg logit contributions | |
cigarettes | -0.084 |
ipt | -0.080 |
ric | -0.078 |
os | -0.073 |
ho | -0.073 |
Token | Sally |
Token ID | 25737 |
Feature activation | +5.21e-01 |
Pos logit contributions | |
ric | +0.011 |
cigarettes | +0.011 |
ipt | +0.011 |
ho | +0.011 |
umbling | +0.011 |
Neg logit contributions | |
Princess | -0.008 |
chef | -0.007 |
matching | -0.007 |
feast | -0.007 |
succeeded | -0.006 |
Token | â |
Token ID | 22940 |
Feature activation | +1.67e+00 |
Pos logit contributions | |
ric | +0.013 |
cigarettes | +0.013 |
ipt | +0.013 |
umbling | +0.012 |
ho | +0.012 |
Neg logit contributions | |
Princess | -0.009 |
chef | -0.008 |
matching | -0.008 |
party | -0.008 |
patiently | -0.008 |
Token | € |
Token ID | 26391 |
Feature activation | -1.36e+00 |
Pos logit contributions | |
ric | +0.036 |
cigarettes | +0.035 |
ipt | +0.034 |
ho | +0.033 |
umbling | +0.033 |
Neg logit contributions | |
Princess | -0.034 |
chef | -0.032 |
matching | -0.031 |
is | -0.031 |
feast | -0.030 |
Token | â„¢ |
Token ID | 8151 |
Feature activation | -1.17e+00 |
Pos logit contributions | |
Princess | +0.026 |
chef | +0.024 |
matching | +0.024 |
party | +0.023 |
feast | +0.023 |
Neg logit contributions | |
ric | -0.031 |
cigarettes | -0.030 |
ipt | -0.029 |
ho | -0.029 |
umbling | -0.028 |
Token | Ben |
Token ID | 11696 |
Feature activation | -1.02e+00 |
Pos logit contributions | |
Princess | +0.007 |
chef | +0.007 |
patiently | +0.006 |
is | +0.006 |
matching | +0.006 |
Neg logit contributions | |
ric | -0.005 |
ipt | -0.005 |
cigarettes | -0.005 |
ho | -0.005 |
umbling | -0.005 |
Token | and |
Token ID | 290 |
Feature activation | +3.28e-01 |
Pos logit contributions | |
Princess | +0.022 |
chef | +0.021 |
is | +0.020 |
matching | +0.020 |
feast | +0.019 |
Neg logit contributions | |
ric | -0.020 |
cigarettes | -0.020 |
ho | -0.019 |
ipt | -0.019 |
os | -0.018 |
Token | Lily |
Token ID | 20037 |
Feature activation | +1.70e-01 |
Pos logit contributions | |
ric | +0.009 |
umbling | +0.009 |
ipt | +0.008 |
cigarettes | +0.008 |
urious | +0.008 |
Neg logit contributions | |
Princess | -0.006 |
chef | -0.005 |
matching | -0.005 |
Penny | -0.005 |
party | -0.005 |
Token | like |
Token ID | 588 |
Feature activation | -6.09e-01 |
Pos logit contributions | |
ric | +0.003 |
cigarettes | +0.003 |
ho | +0.003 |
ipt | +0.003 |
umbling | +0.002 |
Neg logit contributions | |
Princess | -0.004 |
chef | -0.004 |
is | -0.004 |
matching | -0.004 |
designer | -0.004 |
Token | to |
Token ID | 284 |
Feature activation | -4.32e-01 |
Pos logit contributions | |
Princess | +0.010 |
chef | +0.010 |
matching | +0.010 |
parties | +0.009 |
party | +0.009 |
Neg logit contributions | |
ric | -0.016 |
umbling | -0.015 |
ipt | -0.015 |
cigarettes | -0.015 |
ho | -0.014 |
Token | play |
Token ID | 711 |
Feature activation | -5.37e+00 |
Pos logit contributions | |
Princess | +0.008 |
chef | +0.007 |
matching | +0.007 |
feast | +0.007 |
party | +0.007 |
Neg logit contributions | |
ric | -0.010 |
cigarettes | -0.010 |
ipt | -0.010 |
umbling | -0.010 |
ho | -0.010 |
Token | with |
Token ID | 351 |
Feature activation | -4.51e-01 |
Pos logit contributions | |
succeeded | +0.131 |
Princess | +0.129 |
Majesty | +0.128 |
majesty | +0.124 |
is | +0.120 |
Neg logit contributions | |
cigarettes | -0.071 |
ipt | -0.068 |
el | -0.065 |
packs | -0.061 |
urious | -0.057 |
Token | their |
Token ID | 511 |
Feature activation | +2.01e-01 |
Pos logit contributions | |
Princess | +0.009 |
chef | +0.009 |
matching | +0.009 |
is | +0.008 |
feast | +0.008 |
Neg logit contributions | |
ric | -0.010 |
cigarettes | -0.009 |
ipt | -0.009 |
umbling | -0.009 |
ho | -0.009 |
Token | ball |
Token ID | 2613 |
Feature activation | +7.12e-02 |
Pos logit contributions | |
ric | +0.005 |
umbling | +0.005 |
cigarettes | +0.005 |
ipt | +0.005 |
ho | +0.005 |
Neg logit contributions | |
Princess | -0.003 |
matching | -0.003 |
chef | -0.003 |
party | -0.003 |
tea | -0.003 |
Token | in |
Token ID | 287 |
Feature activation | +1.04e+00 |
Pos logit contributions | |
ric | +0.002 |
cigarettes | +0.002 |
ipt | +0.002 |
umbling | +0.002 |
ho | +0.002 |
Neg logit contributions | |
Princess | -0.001 |
chef | -0.001 |
matching | -0.001 |
feast | -0.001 |
is | -0.001 |
Token | the |
Token ID | 262 |
Feature activation | -3.93e-01 |
Pos logit contributions | |
ric | +0.031 |
ho | +0.031 |
umbling | +0.030 |
cigarettes | +0.029 |
beh | +0.029 |
Neg logit contributions | |
matching | -0.014 |
Princess | -0.014 |
chef | -0.012 |
chess | -0.012 |
tea | -0.011 |
Token | but |
Token ID | 475 |
Feature activation | +1.35e+00 |
Pos logit contributions | |
ric | +0.025 |
ipt | +0.024 |
cigarettes | +0.024 |
umbling | +0.023 |
ho | +0.023 |
Neg logit contributions | |
Princess | -0.016 |
chef | -0.014 |
matching | -0.013 |
is | -0.013 |
patiently | -0.013 |
Token | he |
Token ID | 339 |
Feature activation | -1.10e+00 |
Pos logit contributions | |
ric | +0.033 |
cigarettes | +0.031 |
ipt | +0.030 |
umbling | +0.030 |
ho | +0.030 |
Neg logit contributions | |
Princess | -0.025 |
chef | -0.024 |
matching | -0.022 |
succeeded | -0.022 |
feast | -0.021 |
Token | still |
Token ID | 991 |
Feature activation | -3.72e-02 |
Pos logit contributions | |
Princess | +0.026 |
chef | +0.024 |
matching | +0.023 |
feast | +0.023 |
parties | +0.023 |
Neg logit contributions | |
ho | -0.019 |
ric | -0.019 |
ipt | -0.019 |
cigarettes | -0.018 |
spe | -0.017 |
Token | wanted |
Token ID | 2227 |
Feature activation | -5.61e-01 |
Pos logit contributions | |
Princess | +0.001 |
chef | +0.001 |
matching | +0.001 |
feast | +0.001 |
succeeded | +0.001 |
Neg logit contributions | |
ric | -0.001 |
cigarettes | -0.001 |
ipt | -0.001 |
ho | -0.001 |
umbling | -0.001 |
Token | to |
Token ID | 284 |
Feature activation | +3.91e-01 |
Pos logit contributions | |
Princess | +0.013 |
chef | +0.012 |
feast | +0.012 |
succeeded | +0.012 |
designer | +0.012 |
Neg logit contributions | |
cigarettes | -0.010 |
ric | -0.010 |
ho | -0.009 |
ipt | -0.009 |
oes | -0.009 |
Token | play |
Token ID | 711 |
Feature activation | -4.18e+00 |
Pos logit contributions | |
ric | +0.008 |
cigarettes | +0.008 |
ho | +0.007 |
ipt | +0.007 |
oes | +0.007 |
Neg logit contributions | |
Princess | -0.008 |
chef | -0.008 |
is | -0.007 |
matching | -0.007 |
feast | -0.007 |
Token | with |
Token ID | 351 |
Feature activation | -6.26e-01 |
Pos logit contributions | |
chef | +0.084 |
Princess | +0.082 |
succeeded | +0.081 |
feast | +0.080 |
is | +0.080 |
Neg logit contributions | |
cigarettes | -0.078 |
run | -0.068 |
ric | -0.068 |
ho | -0.064 |
os | -0.064 |
Token | her |
Token ID | 607 |
Feature activation | -1.02e+00 |
Pos logit contributions | |
Princess | +0.011 |
chef | +0.010 |
matching | +0.010 |
feast | +0.010 |
succeeded | +0.010 |
Neg logit contributions | |
ric | -0.015 |
cigarettes | -0.015 |
ipt | -0.014 |
ho | -0.014 |
umbling | -0.014 |
Token | . |
Token ID | 13 |
Feature activation | -1.32e-01 |
Pos logit contributions | |
Princess | +0.018 |
chef | +0.017 |
matching | +0.016 |
feast | +0.016 |
is | +0.016 |
Neg logit contributions | |
ric | -0.025 |
cigarettes | -0.024 |
ipt | -0.023 |
ho | -0.023 |
umbling | -0.023 |
Token |
|
Token ID | 198 |
Feature activation | +1.33e+00 |
Pos logit contributions | |
Princess | +0.003 |
chef | +0.002 |
matching | +0.002 |
succeeded | +0.002 |
is | +0.002 |
Neg logit contributions | |
ric | -0.003 |
cigarettes | -0.003 |
ho | -0.003 |
ipt | -0.003 |
umbling | -0.003 |
Token |
|
Token ID | 198 |
Feature activation | +7.89e-01 |
Pos logit contributions | |
ric | +0.033 |
cigarettes | +0.032 |
ho | +0.031 |
ipt | +0.030 |
umbling | +0.030 |
Neg logit contributions | |
Princess | -0.024 |
is | -0.022 |
chef | -0.021 |
matching | -0.021 |
succeeded | -0.020 |
Token | , |
Token ID | 11 |
Feature activation | -5.83e-01 |
Pos logit contributions | |
chef | +0.052 |
succeeded | +0.052 |
Princess | +0.050 |
feast | +0.049 |
is | +0.048 |
Neg logit contributions | |
ho | -0.056 |
spe | -0.054 |
os | -0.053 |
ric | -0.049 |
umbling | -0.049 |
Token | ate |
Token ID | 15063 |
Feature activation | -1.74e+00 |
Pos logit contributions | |
chef | +0.011 |
Princess | +0.011 |
succeeded | +0.011 |
designer | +0.010 |
feast | +0.010 |
Neg logit contributions | |
ric | -0.012 |
ho | -0.012 |
cigarettes | -0.011 |
os | -0.011 |
ipt | -0.011 |
Token | cake |
Token ID | 12187 |
Feature activation | -2.32e+00 |
Pos logit contributions | |
Princess | +0.032 |
succeeded | +0.031 |
chef | +0.031 |
designer | +0.030 |
feast | +0.029 |
Neg logit contributions | |
ric | -0.037 |
ho | -0.037 |
cigarettes | -0.036 |
ipt | -0.034 |
os | -0.032 |
Token | , |
Token ID | 11 |
Feature activation | -7.53e-01 |
Pos logit contributions | |
Princess | +0.035 |
succeeded | +0.033 |
chef | +0.032 |
designer | +0.029 |
matching | +0.029 |
Neg logit contributions | |
ric | -0.059 |
cigarettes | -0.058 |
ho | -0.058 |
os | -0.056 |
oes | -0.054 |
Token | and |
Token ID | 290 |
Feature activation | -2.39e-01 |
Pos logit contributions | |
Princess | +0.014 |
chef | +0.014 |
succeeded | +0.013 |
designer | +0.013 |
matching | +0.013 |
Neg logit contributions | |
ric | -0.017 |
cigarettes | -0.015 |
ho | -0.015 |
ipt | -0.014 |
os | -0.014 |
Token | played |
Token ID | 2826 |
Feature activation | -4.15e+00 |
Pos logit contributions | |
Princess | +0.004 |
chef | +0.004 |
is | +0.004 |
matching | +0.004 |
designer | +0.004 |
Neg logit contributions | |
ric | -0.006 |
cigarettes | -0.006 |
ho | -0.005 |
ipt | -0.005 |
spe | -0.005 |
Token | games |
Token ID | 1830 |
Feature activation | -2.64e+00 |
Pos logit contributions | |
succeeded | +0.083 |
oured | +0.082 |
chef | +0.077 |
cartridge | +0.076 |
designer | +0.076 |
Neg logit contributions | |
cigarettes | -0.060 |
ric | -0.058 |
ho | -0.056 |
spe | -0.055 |
os | -0.054 |
Token | . |
Token ID | 13 |
Feature activation | -1.87e-01 |
Pos logit contributions | |
Princess | +0.067 |
chef | +0.066 |
succeeded | +0.064 |
Benjamin | +0.062 |
Majesty | +0.060 |
Neg logit contributions | |
ho | -0.035 |
cigarettes | -0.033 |
os | -0.032 |
spe | -0.031 |
ric | -0.031 |
Token | Sara |
Token ID | 24799 |
Feature activation | -1.73e+00 |
Pos logit contributions | |
Princess | +0.004 |
chef | +0.004 |
tea | +0.003 |
matching | +0.003 |
parties | +0.003 |
Neg logit contributions | |
cigarettes | -0.004 |
umbling | -0.004 |
ipt | -0.004 |
ric | -0.004 |
ho | -0.004 |
Token | had |
Token ID | 550 |
Feature activation | -1.94e+00 |
Pos logit contributions | |
chef | +0.043 |
Princess | +0.042 |
feast | +0.042 |
healthier | +0.040 |
competition | +0.039 |
Neg logit contributions | |
os | -0.026 |
ric | -0.025 |
ho | -0.024 |
spe | -0.022 |
umbling | -0.021 |
Token | a |
Token ID | 257 |
Feature activation | -3.48e+00 |
Pos logit contributions | |
chef | +0.045 |
designer | +0.043 |
is | +0.043 |
Princess | +0.042 |
feast | +0.042 |
Neg logit contributions | |
ric | -0.030 |
cigarettes | -0.028 |
Un | -0.026 |
umbling | -0.025 |
ipt | -0.024 |
Token | They |
Token ID | 1119 |
Feature activation | -2.23e+00 |
Pos logit contributions | |
Princess | +0.015 |
chef | +0.014 |
matching | +0.013 |
feast | +0.013 |
succeeded | +0.013 |
Neg logit contributions | |
ric | -0.017 |
cigarettes | -0.016 |
ho | -0.016 |
ipt | -0.016 |
umbling | -0.015 |
Token | had |
Token ID | 550 |
Feature activation | -1.58e+00 |
Pos logit contributions | |
Princess | +0.047 |
chef | +0.045 |
matching | +0.042 |
is | +0.041 |
designer | +0.041 |
Neg logit contributions | |
ric | -0.047 |
cigarettes | -0.045 |
ipt | -0.044 |
ho | -0.043 |
umbling | -0.042 |
Token | a |
Token ID | 257 |
Feature activation | -2.99e+00 |
Pos logit contributions | |
Princess | +0.033 |
chef | +0.032 |
designer | +0.031 |
is | +0.030 |
judges | +0.028 |
Neg logit contributions | |
ric | -0.031 |
cigarettes | -0.030 |
ipt | -0.029 |
spe | -0.028 |
umbling | -0.028 |
Token | great |
Token ID | 1049 |
Feature activation | -2.26e+00 |
Pos logit contributions | |
Princess | +0.043 |
chef | +0.042 |
is | +0.038 |
designer | +0.038 |
ier | +0.037 |
Neg logit contributions | |
ric | -0.076 |
ho | -0.073 |
cigarettes | -0.072 |
spe | -0.072 |
Un | -0.068 |
Token | time |
Token ID | 640 |
Feature activation | -1.76e+00 |
Pos logit contributions | |
Princess | +0.045 |
chef | +0.043 |
designer | +0.039 |
is | +0.039 |
judges | +0.038 |
Neg logit contributions | |
ric | -0.049 |
cigarettes | -0.046 |
ipt | -0.045 |
ho | -0.045 |
urious | -0.044 |
Token | playing |
Token ID | 2712 |
Feature activation | -4.80e+00 |
Pos logit contributions | |
Princess | +0.036 |
chef | +0.033 |
designer | +0.030 |
Benjamin | +0.030 |
succeeded | +0.029 |
Neg logit contributions | |
ric | -0.038 |
cigarettes | -0.036 |
os | -0.035 |
run | -0.035 |
packs | -0.034 |
Token | together |
Token ID | 1978 |
Feature activation | -1.89e+00 |
Pos logit contributions | |
is | +0.089 |
Princess | +0.085 |
chef | +0.082 |
designer | +0.079 |
ingu | +0.077 |
Neg logit contributions | |
cigarettes | -0.099 |
packs | -0.094 |
ric | -0.090 |
os | -0.090 |
ipt | -0.089 |
Token | at |
Token ID | 379 |
Feature activation | -3.14e-01 |
Pos logit contributions | |
Princess | +0.033 |
chef | +0.031 |
designer | +0.028 |
is | +0.028 |
succeeded | +0.028 |
Neg logit contributions | |
ric | -0.043 |
ho | -0.043 |
cigarettes | -0.041 |
spe | -0.040 |
umbling | -0.040 |
Token | the |
Token ID | 262 |
Feature activation | -1.57e+00 |
Pos logit contributions | |
Princess | +0.006 |
chef | +0.006 |
is | +0.006 |
designer | +0.006 |
succeeded | +0.006 |
Neg logit contributions | |
ric | -0.007 |
ipt | -0.006 |
cigarettes | -0.006 |
ho | -0.006 |
umbling | -0.006 |
Token | old |
Token ID | 1468 |
Feature activation | -1.45e+00 |
Pos logit contributions | |
Princess | +0.028 |
chef | +0.026 |
succeeded | +0.025 |
designer | +0.024 |
is | +0.024 |
Neg logit contributions | |
ric | -0.037 |
cigarettes | -0.035 |
ho | -0.035 |
ipt | -0.034 |
umbling | -0.033 |
Token | gym |
Token ID | 11550 |
Feature activation | -1.17e+00 |
Pos logit contributions | |
Princess | +0.029 |
chef | +0.028 |
succeeded | +0.026 |
is | +0.026 |
matching | +0.025 |
Neg logit contributions | |
ric | -0.030 |
cigarettes | -0.029 |
ho | -0.029 |
ipt | -0.028 |
umbling | -0.027 |