Token | bouncing |
Token ID | 30713 |
Feature activation | +1.47e+00 |
Pos logit contributions | |
uch | +0.087 |
fault | +0.084 |
gh | +0.081 |
tresp | +0.081 |
paste | +0.079 |
Neg logit contributions | |
agree | -0.050 |
ies | -0.050 |
agreement | -0.048 |
ie | -0.044 |
happy | -0.044 |
Token | the |
Token ID | 262 |
Feature activation | +1.93e+00 |
Pos logit contributions | |
ies | +0.046 |
ie | +0.043 |
agree | +0.043 |
agreement | +0.043 |
happy | +0.040 |
Neg logit contributions | |
uch | -0.086 |
fault | -0.086 |
tresp | -0.083 |
gency | -0.080 |
OP | -0.079 |
Token | ball |
Token ID | 2613 |
Feature activation | -1.61e+00 |
Pos logit contributions | |
ies | +0.074 |
agreement | +0.072 |
agree | +0.071 |
ie | +0.067 |
happy | +0.067 |
Neg logit contributions | |
fault | -0.093 |
uch | -0.092 |
gh | -0.091 |
OP | -0.088 |
tresp | -0.088 |
Token | back |
Token ID | 736 |
Feature activation | +9.26e-01 |
Pos logit contributions | |
fault | +0.084 |
uch | +0.083 |
tresp | +0.083 |
gency | +0.081 |
gh | +0.081 |
Neg logit contributions | |
agree | -0.055 |
ies | -0.054 |
ie | -0.052 |
agreement | -0.050 |
happy | -0.049 |
Token | and |
Token ID | 290 |
Feature activation | -2.98e-01 |
Pos logit contributions | |
agree | +0.035 |
ies | +0.034 |
agreement | +0.034 |
ie | +0.030 |
happy | +0.030 |
Neg logit contributions | |
uch | -0.046 |
fault | -0.046 |
gh | -0.044 |
tresp | -0.043 |
paste | -0.042 |
Token | forth |
Token ID | 6071 |
Feature activation | +1.62e+01 |
Pos logit contributions | |
fault | +0.018 |
uch | +0.017 |
tresp | +0.017 |
gh | +0.016 |
nothing | +0.016 |
Neg logit contributions | |
agreement | -0.008 |
agree | -0.008 |
ies | -0.007 |
reci | -0.007 |
ashing | -0.007 |
Token | . |
Token ID | 13 |
Feature activation | +6.00e-01 |
Pos logit contributions | |
agree | +0.546 |
ies | +0.528 |
agreement | +0.520 |
happy | +0.483 |
ie | +0.463 |
Neg logit contributions | |
uch | -0.881 |
fault | -0.865 |
tresp | -0.841 |
paste | -0.814 |
gh | -0.805 |
Token | They |
Token ID | 1119 |
Feature activation | +4.28e+00 |
Pos logit contributions | |
ies | +0.022 |
agree | +0.021 |
agreement | +0.021 |
ie | +0.019 |
reci | +0.018 |
Neg logit contributions | |
fault | -0.031 |
uch | -0.029 |
gh | -0.029 |
OP | -0.028 |
tresp | -0.027 |
Token | laughed |
Token ID | 13818 |
Feature activation | -3.17e+00 |
Pos logit contributions | |
agreement | +0.153 |
ies | +0.153 |
agree | +0.150 |
ie | +0.135 |
ashing | +0.135 |
Neg logit contributions | |
fault | -0.212 |
uch | -0.211 |
gh | -0.203 |
tresp | -0.202 |
tricked | -0.194 |
Token | and |
Token ID | 290 |
Feature activation | -5.13e-01 |
Pos logit contributions | |
uch | +0.186 |
fault | +0.183 |
gh | +0.177 |
tresp | +0.176 |
paste | +0.170 |
Neg logit contributions | |
agree | -0.097 |
agreement | -0.093 |
ies | -0.092 |
happy | -0.082 |
ie | -0.079 |
Token | played |
Token ID | 2826 |
Feature activation | +4.31e+00 |
Pos logit contributions | |
uch | +0.026 |
fault | +0.026 |
gh | +0.025 |
tresp | +0.024 |
paste | +0.023 |
Neg logit contributions | |
agree | -0.019 |
agreement | -0.019 |
ies | -0.018 |
ie | -0.016 |
reci | -0.016 |
Token | moved |
Token ID | 3888 |
Feature activation | -1.52e+00 |
Pos logit contributions | |
agree | +0.052 |
agreement | +0.049 |
ies | +0.049 |
happy | +0.044 |
ie | +0.044 |
Neg logit contributions | |
fault | -0.066 |
uch | -0.066 |
gh | -0.063 |
tresp | -0.061 |
paste | -0.060 |
Token | his |
Token ID | 465 |
Feature activation | +8.30e-01 |
Pos logit contributions | |
uch | +0.091 |
fault | +0.086 |
tresp | +0.082 |
ses | +0.081 |
paste | +0.079 |
Neg logit contributions | |
agreement | -0.050 |
happy | -0.049 |
agree | -0.048 |
ies | -0.047 |
smile | -0.042 |
Token | feet |
Token ID | 3625 |
Feature activation | +3.86e+00 |
Pos logit contributions | |
ies | +0.032 |
agreement | +0.032 |
agree | +0.032 |
happy | +0.029 |
family | +0.028 |
Neg logit contributions | |
uch | -0.040 |
fault | -0.038 |
gh | -0.038 |
tresp | -0.037 |
paste | -0.036 |
Token | back |
Token ID | 736 |
Feature activation | +8.69e-01 |
Pos logit contributions | |
agree | +0.137 |
ies | +0.133 |
agreement | +0.132 |
happy | +0.122 |
ie | +0.119 |
Neg logit contributions | |
uch | -0.199 |
fault | -0.198 |
tresp | -0.191 |
gh | -0.190 |
gency | -0.183 |
Token | and |
Token ID | 290 |
Feature activation | -3.62e-01 |
Pos logit contributions | |
agree | +0.034 |
ies | +0.033 |
agreement | +0.033 |
ie | +0.030 |
happy | +0.030 |
Neg logit contributions | |
uch | -0.042 |
fault | -0.041 |
gh | -0.041 |
tresp | -0.040 |
paste | -0.038 |
Token | forth |
Token ID | 6071 |
Feature activation | +1.62e+01 |
Pos logit contributions | |
uch | +0.019 |
fault | +0.019 |
gh | +0.019 |
tresp | +0.019 |
tricked | +0.019 |
Neg logit contributions | |
agreement | -0.011 |
agree | -0.011 |
ies | -0.010 |
reci | -0.010 |
ashing | -0.009 |
Token | to |
Token ID | 284 |
Feature activation | -5.20e+00 |
Pos logit contributions | |
agree | +0.483 |
ies | +0.462 |
agreement | +0.451 |
happy | +0.427 |
ie | +0.392 |
Neg logit contributions | |
uch | -0.954 |
fault | -0.930 |
tresp | -0.896 |
paste | -0.868 |
gency | -0.865 |
Token | practice |
Token ID | 3357 |
Feature activation | +4.62e+00 |
Pos logit contributions | |
fault | +0.264 |
uch | +0.262 |
gh | +0.254 |
tresp | +0.249 |
paste | +0.241 |
Neg logit contributions | |
agree | -0.188 |
agreement | -0.183 |
ies | -0.183 |
ie | -0.163 |
ashing | -0.159 |
Token | standing |
Token ID | 5055 |
Feature activation | +2.68e+00 |
Pos logit contributions | |
agree | +0.160 |
agreement | +0.153 |
ies | +0.148 |
happy | +0.138 |
ie | +0.129 |
Neg logit contributions | |
uch | -0.258 |
fault | -0.245 |
tresp | -0.231 |
paste | -0.230 |
gh | -0.229 |
Token | on |
Token ID | 319 |
Feature activation | -1.74e-01 |
Pos logit contributions | |
agree | +0.096 |
agreement | +0.095 |
ies | +0.089 |
happy | +0.089 |
forth | +0.083 |
Neg logit contributions | |
uch | -0.146 |
fault | -0.143 |
OP | -0.134 |
gh | -0.133 |
tresp | -0.132 |
Token | one |
Token ID | 530 |
Feature activation | -1.20e+00 |
Pos logit contributions | |
uch | +0.011 |
paste | +0.010 |
fault | +0.010 |
sed | +0.009 |
ses | +0.009 |
Neg logit contributions | |
agreement | -0.005 |
agree | -0.005 |
happy | -0.005 |
play | -0.004 |
ies | -0.004 |
Token | kicking |
Token ID | 17997 |
Feature activation | +2.80e+00 |
Pos logit contributions | |
agree | +0.023 |
agreement | +0.023 |
ies | +0.023 |
reci | +0.021 |
ie | +0.020 |
Neg logit contributions | |
fault | -0.032 |
gh | -0.030 |
tresp | -0.030 |
uch | -0.030 |
paste | -0.029 |
Token | the |
Token ID | 262 |
Feature activation | +1.81e+00 |
Pos logit contributions | |
agreement | +0.092 |
ies | +0.091 |
agree | +0.090 |
ie | +0.084 |
happy | +0.084 |
Neg logit contributions | |
uch | -0.167 |
fault | -0.160 |
gh | -0.148 |
OP | -0.148 |
tresp | -0.148 |
Token | ball |
Token ID | 2613 |
Feature activation | -1.84e+00 |
Pos logit contributions | |
ies | +0.070 |
agreement | +0.069 |
agree | +0.067 |
happy | +0.065 |
girl | +0.063 |
Neg logit contributions | |
uch | -0.087 |
fault | -0.086 |
gh | -0.085 |
OP | -0.081 |
tresp | -0.080 |
Token | back |
Token ID | 736 |
Feature activation | +7.68e-01 |
Pos logit contributions | |
uch | +0.101 |
fault | +0.100 |
gh | +0.097 |
tresp | +0.097 |
gency | +0.095 |
Neg logit contributions | |
agree | -0.060 |
ies | -0.058 |
agreement | -0.057 |
ie | -0.053 |
happy | -0.052 |
Token | and |
Token ID | 290 |
Feature activation | -4.28e-01 |
Pos logit contributions | |
agree | +0.026 |
ies | +0.025 |
agreement | +0.025 |
ie | +0.022 |
happy | +0.022 |
Neg logit contributions | |
uch | -0.041 |
fault | -0.041 |
gh | -0.040 |
tresp | -0.039 |
paste | -0.038 |
Token | forth |
Token ID | 6071 |
Feature activation | +1.61e+01 |
Pos logit contributions | |
fault | +0.029 |
uch | +0.028 |
tresp | +0.028 |
gh | +0.028 |
ses | +0.027 |
Neg logit contributions | |
agreement | -0.007 |
agree | -0.007 |
ies | -0.006 |
reci | -0.006 |
ashing | -0.006 |
Token | . |
Token ID | 13 |
Feature activation | +4.85e-01 |
Pos logit contributions | |
agree | +0.512 |
ies | +0.499 |
agreement | +0.488 |
happy | +0.459 |
ie | +0.434 |
Neg logit contributions | |
uch | -0.903 |
fault | -0.883 |
tresp | -0.860 |
paste | -0.841 |
gency | -0.829 |
Token | From |
Token ID | 3574 |
Feature activation | +5.03e+00 |
Pos logit contributions | |
ies | +0.018 |
agree | +0.017 |
agreement | +0.017 |
ie | +0.016 |
reci | +0.015 |
Neg logit contributions | |
fault | -0.025 |
uch | -0.024 |
gh | -0.023 |
OP | -0.023 |
tresp | -0.023 |
Token | that |
Token ID | 326 |
Feature activation | -1.14e+00 |
Pos logit contributions | |
agree | +0.222 |
ies | +0.216 |
agreement | +0.215 |
ie | +0.203 |
happy | +0.199 |
Neg logit contributions | |
fault | -0.227 |
uch | -0.218 |
gh | -0.215 |
paste | -0.207 |
tresp | -0.207 |
Token | day |
Token ID | 1110 |
Feature activation | +4.84e+00 |
Pos logit contributions | |
uch | +0.076 |
gh | +0.074 |
fault | +0.074 |
tresp | +0.071 |
nothing | +0.068 |
Neg logit contributions | |
agree | -0.025 |
ies | -0.024 |
agreement | -0.023 |
ashing | -0.019 |
reci | -0.018 |
Token | on |
Token ID | 319 |
Feature activation | -2.54e-01 |
Pos logit contributions | |
agree | +0.244 |
agreement | +0.239 |
ies | +0.237 |
ie | +0.218 |
ashing | +0.214 |
Neg logit contributions | |
uch | -0.180 |
fault | -0.178 |
gh | -0.172 |
tresp | -0.166 |
paste | -0.159 |
Token | began |
Token ID | 2540 |
Feature activation | +3.53e+00 |
Pos logit contributions | |
agreement | +0.231 |
ies | +0.229 |
agree | +0.228 |
ie | +0.204 |
ashing | +0.202 |
Neg logit contributions | |
fault | -0.296 |
uch | -0.296 |
gh | -0.282 |
tresp | -0.281 |
tricked | -0.271 |
Token | pushing |
Token ID | 7796 |
Feature activation | +1.76e+00 |
Pos logit contributions | |
agree | +0.180 |
agreement | +0.175 |
ies | +0.174 |
happy | +0.164 |
ie | +0.158 |
Neg logit contributions | |
uch | -0.136 |
fault | -0.129 |
gh | -0.123 |
tresp | -0.120 |
paste | -0.118 |
Token | themselves |
Token ID | 2405 |
Feature activation | +1.94e+00 |
Pos logit contributions | |
agree | +0.058 |
agreement | +0.055 |
ies | +0.054 |
happy | +0.047 |
ie | +0.046 |
Neg logit contributions | |
uch | -0.103 |
fault | -0.099 |
gh | -0.096 |
tresp | -0.096 |
paste | -0.093 |
Token | back |
Token ID | 736 |
Feature activation | +7.68e-01 |
Pos logit contributions | |
agree | +0.079 |
agreement | +0.078 |
ies | +0.076 |
ie | +0.070 |
reci | +0.069 |
Neg logit contributions | |
fault | -0.090 |
uch | -0.087 |
gh | -0.085 |
tresp | -0.082 |
paste | -0.080 |
Token | and |
Token ID | 290 |
Feature activation | -4.28e-01 |
Pos logit contributions | |
agree | +0.030 |
agreement | +0.029 |
ies | +0.029 |
ie | +0.026 |
happy | +0.026 |
Neg logit contributions | |
uch | -0.037 |
fault | -0.037 |
gh | -0.035 |
tresp | -0.034 |
paste | -0.033 |
Token | forth |
Token ID | 6071 |
Feature activation | +1.61e+01 |
Pos logit contributions | |
fault | +0.026 |
nothing | +0.026 |
tricked | +0.025 |
gh | +0.025 |
tresp | +0.025 |
Neg logit contributions | |
agreement | -0.009 |
reci | -0.008 |
agree | -0.008 |
ies | -0.008 |
ogged | -0.007 |
Token | on |
Token ID | 319 |
Feature activation | -1.44e-01 |
Pos logit contributions | |
agree | +0.492 |
ies | +0.464 |
agreement | +0.458 |
happy | +0.413 |
ie | +0.389 |
Neg logit contributions | |
uch | -0.944 |
fault | -0.915 |
tresp | -0.881 |
gh | -0.876 |
paste | -0.864 |
Token | their |
Token ID | 511 |
Feature activation | +2.30e+00 |
Pos logit contributions | |
uch | +0.009 |
paste | +0.009 |
fault | +0.008 |
gh | +0.008 |
tresp | +0.008 |
Neg logit contributions | |
agree | -0.004 |
agreement | -0.004 |
happy | -0.003 |
ies | -0.003 |
family | -0.003 |
Token | swings |
Token ID | 26728 |
Feature activation | -1.97e+00 |
Pos logit contributions | |
agree | +0.086 |
agreement | +0.084 |
ies | +0.083 |
ie | +0.076 |
reci | +0.074 |
Neg logit contributions | |
fault | -0.115 |
uch | -0.112 |
gh | -0.110 |
tresp | -0.107 |
paste | -0.102 |
Token | . |
Token ID | 13 |
Feature activation | +4.04e-01 |
Pos logit contributions | |
uch | +0.109 |
fault | +0.106 |
gh | +0.103 |
tresp | +0.103 |
paste | +0.101 |
Neg logit contributions | |
agree | -0.066 |
agreement | -0.063 |
ies | -0.062 |
happy | -0.055 |
ie | -0.055 |
Token |
|
Token ID | 198 |
Feature activation | +8.03e-01 |
Pos logit contributions | |
agree | +0.012 |
ies | +0.012 |
agreement | +0.011 |
ie | +0.010 |
reci | +0.009 |
Neg logit contributions | |
fault | -0.023 |
uch | -0.023 |
gh | -0.023 |
tresp | -0.022 |
paste | -0.022 |
Token | and |
Token ID | 290 |
Feature activation | -2.41e-01 |
Pos logit contributions | |
agree | +0.041 |
agreement | +0.041 |
ies | +0.038 |
reci | +0.035 |
ie | +0.034 |
Neg logit contributions | |
fault | -0.049 |
uch | -0.048 |
gh | -0.046 |
tresp | -0.045 |
paste | -0.043 |
Token | forth |
Token ID | 6071 |
Feature activation | +1.63e+01 |
Pos logit contributions | |
fault | +0.013 |
uch | +0.012 |
tresp | +0.012 |
nothing | +0.012 |
gh | +0.012 |
Neg logit contributions | |
agreement | -0.008 |
ies | -0.007 |
agree | -0.007 |
ashing | -0.007 |
reci | -0.007 |
Token | , |
Token ID | 11 |
Feature activation | +4.19e-01 |
Pos logit contributions | |
agree | +0.565 |
ies | +0.549 |
agreement | +0.545 |
happy | +0.487 |
ie | +0.486 |
Neg logit contributions | |
uch | -0.857 |
fault | -0.854 |
tresp | -0.812 |
gh | -0.806 |
paste | -0.788 |
Token | back |
Token ID | 736 |
Feature activation | +7.68e-01 |
Pos logit contributions | |
agree | +0.016 |
agreement | +0.016 |
ies | +0.016 |
ie | +0.014 |
reci | +0.014 |
Neg logit contributions | |
fault | -0.020 |
uch | -0.019 |
gh | -0.019 |
tresp | -0.019 |
nothing | -0.018 |
Token | and |
Token ID | 290 |
Feature activation | -4.28e-01 |
Pos logit contributions | |
agreement | +0.031 |
agree | +0.030 |
ies | +0.028 |
reci | +0.027 |
ie | +0.026 |
Neg logit contributions | |
fault | -0.037 |
uch | -0.034 |
tresp | -0.033 |
gh | -0.033 |
paste | -0.032 |
Token | forth |
Token ID | 6071 |
Feature activation | +1.61e+01 |
Pos logit contributions | |
fault | +0.024 |
uch | +0.024 |
gh | +0.023 |
tresp | +0.023 |
nothing | +0.023 |
Neg logit contributions | |
agreement | -0.012 |
agree | -0.012 |
ies | -0.011 |
reci | -0.011 |
ie | -0.010 |
Token | . |
Token ID | 13 |
Feature activation | +4.85e-01 |
Pos logit contributions | |
agree | +0.561 |
ies | +0.546 |
agreement | +0.541 |
happy | +0.485 |
ie | +0.483 |
Neg logit contributions | |
uch | -0.845 |
fault | -0.840 |
tresp | -0.800 |
gh | -0.793 |
paste | -0.777 |
Token | She |
Token ID | 1375 |
Feature activation | +3.48e+00 |
Pos logit contributions | |
ies | +0.018 |
agree | +0.017 |
agreement | +0.017 |
ie | +0.016 |
reci | +0.015 |
Neg logit contributions | |
fault | -0.025 |
uch | -0.023 |
gh | -0.023 |
OP | -0.023 |
tresp | -0.022 |
Token | had |
Token ID | 550 |
Feature activation | +2.53e+00 |
Pos logit contributions | |
agree | +0.141 |
ies | +0.134 |
agreement | +0.132 |
ie | +0.122 |
happy | +0.122 |
Neg logit contributions | |
uch | -0.166 |
fault | -0.166 |
gh | -0.161 |
tresp | -0.156 |
paste | -0.153 |
Token | so |
Token ID | 523 |
Feature activation | -9.70e+00 |
Pos logit contributions | |
happy | +0.113 |
agree | +0.101 |
play | +0.098 |
agreement | +0.097 |
smile | +0.092 |
Neg logit contributions | |
uch | -0.128 |
osaurus | -0.123 |
ses | -0.114 |
paste | -0.114 |
gency | -0.111 |
Token | much |
Token ID | 881 |
Feature activation | -4.88e-01 |
Pos logit contributions | |
fault | +0.384 |
uch | +0.382 |
gh | +0.364 |
tresp | +0.350 |
paste | +0.332 |
Neg logit contributions | |
agreement | -0.467 |
agree | -0.454 |
ies | -0.453 |
ie | -0.422 |
reci | -0.406 |
Token | started |
Token ID | 2067 |
Feature activation | +2.92e+00 |
Pos logit contributions | |
agree | +0.058 |
ies | +0.055 |
agreement | +0.054 |
ie | +0.050 |
happy | +0.050 |
Neg logit contributions | |
fault | -0.080 |
uch | -0.079 |
gh | -0.076 |
tresp | -0.073 |
paste | -0.072 |
Token | to |
Token ID | 284 |
Feature activation | -5.13e+00 |
Pos logit contributions | |
agree | +0.144 |
agreement | +0.141 |
ies | +0.141 |
happy | +0.131 |
ie | +0.129 |
Neg logit contributions | |
uch | -0.114 |
fault | -0.109 |
gh | -0.104 |
tresp | -0.101 |
paste | -0.099 |
Token | swing |
Token ID | 9628 |
Feature activation | +3.85e+00 |
Pos logit contributions | |
fault | +0.256 |
uch | +0.249 |
gh | +0.245 |
tresp | +0.243 |
ze | +0.229 |
Neg logit contributions | |
ies | -0.181 |
agreement | -0.177 |
agree | -0.175 |
ashing | -0.163 |
reci | -0.160 |
Token | back |
Token ID | 736 |
Feature activation | +7.39e-01 |
Pos logit contributions | |
agree | +0.108 |
ies | +0.106 |
agreement | +0.103 |
happy | +0.102 |
ie | +0.096 |
Neg logit contributions | |
uch | -0.235 |
fault | -0.227 |
tresp | -0.222 |
gh | -0.214 |
gency | -0.214 |
Token | and |
Token ID | 290 |
Feature activation | -4.92e-01 |
Pos logit contributions | |
agree | +0.027 |
agreement | +0.026 |
ies | +0.025 |
ie | +0.023 |
reci | +0.022 |
Neg logit contributions | |
fault | -0.038 |
uch | -0.037 |
gh | -0.036 |
tresp | -0.035 |
paste | -0.034 |
Token | forth |
Token ID | 6071 |
Feature activation | +1.61e+01 |
Pos logit contributions | |
fault | +0.029 |
uch | +0.029 |
nothing | +0.029 |
anything | +0.028 |
gh | +0.028 |
Neg logit contributions | |
agreement | -0.012 |
agree | -0.011 |
ies | -0.011 |
reci | -0.010 |
ashing | -0.010 |
Token | , |
Token ID | 11 |
Feature activation | +2.38e-01 |
Pos logit contributions | |
agree | +0.481 |
ies | +0.462 |
agreement | +0.452 |
happy | +0.417 |
ie | +0.398 |
Neg logit contributions | |
uch | -0.937 |
fault | -0.920 |
tresp | -0.888 |
gh | -0.868 |
paste | -0.864 |
Token | higher |
Token ID | 2440 |
Feature activation | -1.59e+00 |
Pos logit contributions | |
agree | +0.009 |
agreement | +0.009 |
ies | +0.009 |
ie | +0.008 |
reci | +0.008 |
Neg logit contributions | |
fault | -0.012 |
uch | -0.011 |
gh | -0.011 |
tresp | -0.011 |
paste | -0.010 |
Token | and |
Token ID | 290 |
Feature activation | -5.94e-01 |
Pos logit contributions | |
uch | +0.090 |
fault | +0.088 |
gh | +0.086 |
tresp | +0.085 |
paste | +0.083 |
Neg logit contributions | |
agree | -0.051 |
ies | -0.050 |
agreement | -0.049 |
ie | -0.044 |
happy | -0.042 |
Token | higher |
Token ID | 2440 |
Feature activation | -1.64e+00 |
Pos logit contributions | |
fault | +0.026 |
uch | +0.026 |
gh | +0.025 |
tresp | +0.024 |
paste | +0.023 |
Neg logit contributions | |
agree | -0.026 |
agreement | -0.025 |
ies | -0.025 |
ie | -0.023 |
happy | -0.023 |
Token | . |
Token ID | 13 |
Feature activation | +3.71e-01 |
Pos logit contributions | |
uch | +0.090 |
fault | +0.089 |
gh | +0.086 |
tresp | +0.085 |
paste | +0.083 |
Neg logit contributions | |
agree | -0.055 |
ies | -0.053 |
agreement | -0.052 |
ie | -0.047 |
happy | -0.045 |
Token | swing |
Token ID | 9628 |
Feature activation | +3.95e+00 |
Pos logit contributions | |
agree | +0.032 |
agreement | +0.031 |
ies | +0.031 |
ie | +0.028 |
ashing | +0.027 |
Neg logit contributions | |
fault | -0.046 |
uch | -0.046 |
gh | -0.044 |
tresp | -0.044 |
paste | -0.042 |
Token | would |
Token ID | 561 |
Feature activation | -2.91e+00 |
Pos logit contributions | |
agree | +0.163 |
ies | +0.160 |
agreement | +0.155 |
ie | +0.148 |
happy | +0.145 |
Neg logit contributions | |
fault | -0.184 |
uch | -0.178 |
gh | -0.173 |
tresp | -0.171 |
gency | -0.165 |
Token | go |
Token ID | 467 |
Feature activation | +6.25e+00 |
Pos logit contributions | |
fault | +0.156 |
uch | +0.155 |
gh | +0.150 |
tresp | +0.147 |
paste | +0.143 |
Neg logit contributions | |
agree | -0.097 |
agreement | -0.095 |
ies | -0.093 |
ie | -0.083 |
happy | -0.080 |
Token | back |
Token ID | 736 |
Feature activation | +7.39e-01 |
Pos logit contributions | |
agree | +0.246 |
ies | +0.244 |
agreement | +0.240 |
ie | +0.226 |
happy | +0.223 |
Neg logit contributions | |
uch | -0.301 |
fault | -0.300 |
gh | -0.286 |
tresp | -0.282 |
paste | -0.277 |
Token | and |
Token ID | 290 |
Feature activation | -4.92e-01 |
Pos logit contributions | |
agree | +0.031 |
ies | +0.030 |
agreement | +0.030 |
ie | +0.027 |
happy | +0.027 |
Neg logit contributions | |
uch | -0.034 |
fault | -0.033 |
gh | -0.032 |
tresp | -0.032 |
paste | -0.031 |
Token | forth |
Token ID | 6071 |
Feature activation | +1.61e+01 |
Pos logit contributions | |
uch | +0.027 |
fault | +0.027 |
nothing | +0.025 |
gh | +0.025 |
tresp | +0.025 |
Neg logit contributions | |
agreement | -0.015 |
ies | -0.014 |
agree | -0.014 |
ashing | -0.013 |
reci | -0.013 |
Token | in |
Token ID | 287 |
Feature activation | +1.71e+00 |
Pos logit contributions | |
agree | +0.500 |
ies | +0.488 |
agreement | +0.476 |
happy | +0.436 |
ie | +0.427 |
Neg logit contributions | |
uch | -0.907 |
fault | -0.900 |
tresp | -0.862 |
gh | -0.847 |
paste | -0.830 |
Token | a |
Token ID | 257 |
Feature activation | -8.43e+00 |
Pos logit contributions | |
agree | +0.063 |
agreement | +0.063 |
ies | +0.060 |
ie | +0.054 |
happy | +0.054 |
Neg logit contributions | |
uch | -0.087 |
fault | -0.086 |
gh | -0.083 |
tresp | -0.081 |
paste | -0.080 |
Token | fun |
Token ID | 1257 |
Feature activation | -1.30e+00 |
Pos logit contributions | |
gh | +0.387 |
fault | +0.383 |
uch | +0.381 |
tresp | +0.370 |
tricked | +0.352 |
Neg logit contributions | |
ies | -0.337 |
agree | -0.328 |
agreement | -0.308 |
ie | -0.297 |
ashing | -0.295 |
Token | motion |
Token ID | 6268 |
Feature activation | -1.79e+00 |
Pos logit contributions | |
gh | +0.063 |
fault | +0.062 |
uch | +0.058 |
tresp | +0.056 |
anything | +0.054 |
Neg logit contributions | |
agree | -0.047 |
agreement | -0.047 |
reci | -0.046 |
ies | -0.045 |
ashing | -0.044 |
Token | . |
Token ID | 13 |
Feature activation | +3.71e-01 |
Pos logit contributions | |
uch | +0.091 |
fault | +0.091 |
gh | +0.088 |
tresp | +0.086 |
paste | +0.083 |
Neg logit contributions | |
agree | -0.064 |
agreement | -0.063 |
ies | -0.062 |
ie | -0.056 |
happy | -0.054 |
Token | He |
Token ID | 679 |
Feature activation | +1.49e+00 |
Pos logit contributions | |
fault | +0.195 |
uch | +0.191 |
gh | +0.187 |
tresp | +0.183 |
OP | +0.183 |
Neg logit contributions | |
ies | -0.146 |
agree | -0.140 |
agreement | -0.138 |
ie | -0.126 |
ashing | -0.123 |
Token | shook |
Token ID | 14682 |
Feature activation | -6.06e+00 |
Pos logit contributions | |
agree | +0.057 |
agreement | +0.055 |
ies | +0.055 |
ie | +0.050 |
happy | +0.049 |
Neg logit contributions | |
fault | -0.074 |
uch | -0.073 |
gh | -0.070 |
tresp | -0.068 |
paste | -0.067 |
Token | it |
Token ID | 340 |
Feature activation | -2.95e+00 |
Pos logit contributions | |
uch | +0.324 |
fault | +0.302 |
gh | +0.284 |
paste | +0.284 |
tresp | +0.284 |
Neg logit contributions | |
agreement | -0.238 |
agree | -0.232 |
ies | -0.223 |
happy | -0.217 |
ie | -0.205 |
Token | back |
Token ID | 736 |
Feature activation | +7.39e-01 |
Pos logit contributions | |
uch | +0.177 |
fault | +0.173 |
tresp | +0.169 |
gh | +0.163 |
gency | +0.161 |
Neg logit contributions | |
agree | -0.085 |
agreement | -0.083 |
ies | -0.081 |
happy | -0.076 |
ie | -0.073 |
Token | and |
Token ID | 290 |
Feature activation | -4.92e-01 |
Pos logit contributions | |
agree | +0.024 |
ies | +0.023 |
agreement | +0.023 |
ie | +0.020 |
happy | +0.020 |
Neg logit contributions | |
uch | -0.042 |
fault | -0.041 |
gh | -0.040 |
tresp | -0.039 |
paste | -0.038 |
Token | forth |
Token ID | 6071 |
Feature activation | +1.61e+01 |
Pos logit contributions | |
fault | +0.023 |
uch | +0.023 |
gh | +0.022 |
tresp | +0.022 |
paste | +0.021 |
Neg logit contributions | |
agree | -0.019 |
agreement | -0.019 |
ies | -0.019 |
ashing | -0.017 |
ie | -0.017 |
Token | and |
Token ID | 290 |
Feature activation | -5.43e-01 |
Pos logit contributions | |
agree | +0.475 |
ies | +0.461 |
agreement | +0.448 |
happy | +0.419 |
ie | +0.396 |
Neg logit contributions | |
uch | -0.948 |
fault | -0.923 |
tresp | -0.884 |
gh | -0.856 |
OP | -0.853 |
Token | watched |
Token ID | 7342 |
Feature activation | +1.44e+00 |
Pos logit contributions | |
fault | +0.024 |
uch | +0.024 |
gh | +0.024 |
tresp | +0.023 |
paste | +0.022 |
Neg logit contributions | |
agree | -0.022 |
agreement | -0.022 |
ies | -0.022 |
ie | -0.020 |
ashing | -0.020 |
Token | as |
Token ID | 355 |
Feature activation | +3.75e+00 |
Pos logit contributions | |
agree | +0.058 |
agreement | +0.057 |
ies | +0.056 |
ie | +0.051 |
happy | +0.050 |
Neg logit contributions | |
uch | -0.069 |
fault | -0.068 |
gh | -0.065 |
tresp | -0.064 |
paste | -0.061 |
Token | the |
Token ID | 262 |
Feature activation | +1.49e+00 |
Pos logit contributions | |
agree | +0.146 |
agreement | +0.143 |
ies | +0.141 |
ie | +0.127 |
happy | +0.126 |
Neg logit contributions | |
uch | -0.181 |
fault | -0.180 |
gh | -0.173 |
tresp | -0.169 |
paste | -0.164 |
Token | birds |
Token ID | 10087 |
Feature activation | +1.06e+00 |
Pos logit contributions | |
agree | +0.060 |
agreement | +0.059 |
ies | +0.058 |
ie | +0.053 |
happy | +0.052 |
Neg logit contributions | |
fault | -0.070 |
uch | -0.070 |
gh | -0.067 |
tresp | -0.065 |
paste | -0.063 |
Token | began |
Token ID | 2540 |
Feature activation | +3.48e+00 |
Pos logit contributions | |
uch | +0.011 |
fault | +0.011 |
gh | +0.011 |
tresp | +0.011 |
paste | +0.011 |
Neg logit contributions | |
agree | -0.009 |
agreement | -0.009 |
ies | -0.009 |
ashing | -0.008 |
reci | -0.008 |
Token | to |
Token ID | 284 |
Feature activation | -5.13e+00 |
Pos logit contributions | |
agree | +0.177 |
agreement | +0.174 |
ies | +0.172 |
ie | +0.159 |
ashing | +0.156 |
Neg logit contributions | |
fault | -0.126 |
uch | -0.123 |
gh | -0.120 |
tresp | -0.115 |
paste | -0.110 |
Token | swing |
Token ID | 9628 |
Feature activation | +3.85e+00 |
Pos logit contributions | |
fault | +0.273 |
uch | +0.272 |
gh | +0.266 |
tresp | +0.258 |
ze | +0.250 |
Neg logit contributions | |
ies | -0.161 |
agreement | -0.156 |
agree | -0.154 |
ashing | -0.149 |
reci | -0.138 |
Token | back |
Token ID | 736 |
Feature activation | +7.39e-01 |
Pos logit contributions | |
agree | +0.123 |
ies | +0.123 |
agreement | +0.119 |
ie | +0.114 |
happy | +0.113 |
Neg logit contributions | |
uch | -0.213 |
fault | -0.209 |
tresp | -0.205 |
gh | -0.198 |
gency | -0.194 |
Token | and |
Token ID | 290 |
Feature activation | -4.92e-01 |
Pos logit contributions | |
agree | +0.029 |
agreement | +0.029 |
ies | +0.027 |
ie | +0.025 |
reci | +0.025 |
Neg logit contributions | |
fault | -0.036 |
uch | -0.035 |
gh | -0.034 |
tresp | -0.033 |
paste | -0.032 |
Token | forth |
Token ID | 6071 |
Feature activation | +1.61e+01 |
Pos logit contributions | |
fault | +0.030 |
uch | +0.030 |
gh | +0.029 |
nothing | +0.029 |
anything | +0.029 |
Neg logit contributions | |
agreement | -0.011 |
agree | -0.011 |
ies | -0.010 |
ashing | -0.010 |
reci | -0.010 |
Token | . |
Token ID | 13 |
Feature activation | +4.56e-01 |
Pos logit contributions | |
agree | +0.550 |
ies | +0.534 |
agreement | +0.530 |
happy | +0.480 |
ie | +0.474 |
Neg logit contributions | |
uch | -0.856 |
fault | -0.848 |
tresp | -0.813 |
gh | -0.797 |
paste | -0.786 |
Token | The |
Token ID | 383 |
Feature activation | +5.65e-01 |
Pos logit contributions | |
ies | +0.016 |
agree | +0.016 |
agreement | +0.016 |
ie | +0.014 |
reci | +0.013 |
Neg logit contributions | |
fault | -0.024 |
uch | -0.023 |
gh | -0.022 |
OP | -0.022 |
tresp | -0.022 |
Token | two |
Token ID | 734 |
Feature activation | +7.42e-01 |
Pos logit contributions | |
agree | +0.019 |
agreement | +0.018 |
ies | +0.018 |
ie | +0.016 |
ashing | +0.016 |
Neg logit contributions | |
fault | -0.030 |
uch | -0.030 |
gh | -0.029 |
tresp | -0.028 |
paste | -0.028 |
Token | friends |
Token ID | 2460 |
Feature activation | +1.01e+01 |
Pos logit contributions | |
agreement | +0.025 |
agree | +0.025 |
ashing | +0.023 |
reci | +0.021 |
acking | +0.020 |
Neg logit contributions | |
tricked | -0.036 |
tresp | -0.035 |
gh | -0.035 |
uch | -0.035 |
nothing | -0.034 |
Token | had |
Token ID | 550 |
Feature activation | +2.48e+00 |
Pos logit contributions | |
agreement | +0.419 |
ies | +0.390 |
agree | +0.389 |
ashing | +0.363 |
reci | +0.358 |
Neg logit contributions | |
uch | -0.442 |
fault | -0.425 |
gh | -0.416 |
tresp | -0.412 |
tricked | -0.411 |
Token | it |
Token ID | 340 |
Feature activation | -3.12e+00 |
Pos logit contributions | |
uch | +0.012 |
fault | +0.012 |
sed | +0.012 |
paste | +0.012 |
ses | +0.012 |
Neg logit contributions | |
happy | -0.007 |
agreement | -0.007 |
play | -0.006 |
agree | -0.006 |
whenever | -0.006 |
Token | and |
Token ID | 290 |
Feature activation | -5.66e-01 |
Pos logit contributions | |
uch | +0.181 |
fault | +0.179 |
gh | +0.176 |
tresp | +0.175 |
gency | +0.168 |
Neg logit contributions | |
agree | -0.094 |
agreement | -0.088 |
ies | -0.087 |
happy | -0.079 |
ie | -0.078 |
Token | swing |
Token ID | 9628 |
Feature activation | +3.68e+00 |
Pos logit contributions | |
fault | +0.031 |
uch | +0.031 |
gh | +0.030 |
tresp | +0.030 |
paste | +0.029 |
Neg logit contributions | |
agree | -0.018 |
agreement | -0.017 |
ies | -0.017 |
ie | -0.015 |
ashing | -0.015 |
Token | back |
Token ID | 736 |
Feature activation | +5.97e-01 |
Pos logit contributions | |
agree | +0.132 |
ies | +0.127 |
agreement | +0.126 |
happy | +0.123 |
play | +0.117 |
Neg logit contributions | |
uch | -0.189 |
fault | -0.188 |
tresp | -0.182 |
gh | -0.178 |
gency | -0.171 |
Token | and |
Token ID | 290 |
Feature activation | -5.73e-01 |
Pos logit contributions | |
agree | +0.022 |
agreement | +0.021 |
ies | +0.021 |
ie | +0.019 |
happy | +0.018 |
Neg logit contributions | |
fault | -0.030 |
uch | -0.030 |
gh | -0.029 |
tresp | -0.029 |
paste | -0.028 |
Token | forth |
Token ID | 6071 |
Feature activation | +1.60e+01 |
Pos logit contributions | |
fault | +0.032 |
uch | +0.031 |
anything | +0.031 |
nothing | +0.030 |
tresp | +0.030 |
Neg logit contributions | |
agreement | -0.016 |
ies | -0.015 |
ashing | -0.015 |
ie | -0.015 |
agree | -0.014 |
Token | . |
Token ID | 13 |
Feature activation | +4.17e-01 |
Pos logit contributions | |
agree | +0.548 |
ies | +0.532 |
agreement | +0.521 |
happy | +0.492 |
ie | +0.460 |
Neg logit contributions | |
uch | -0.856 |
fault | -0.843 |
tresp | -0.817 |
gh | -0.799 |
OP | -0.787 |
Token | It |
Token ID | 632 |
Feature activation | -4.39e+00 |
Pos logit contributions | |
agree | +0.017 |
agreement | +0.017 |
ies | +0.015 |
happy | +0.015 |
reci | +0.014 |
Neg logit contributions | |
uch | -0.020 |
gh | -0.019 |
fault | -0.019 |
tresp | -0.018 |
tricked | -0.017 |
Token | 's |
Token ID | 338 |
Feature activation | +1.72e+00 |
Pos logit contributions | |
gh | +0.157 |
fault | +0.157 |
uch | +0.154 |
tresp | +0.146 |
gency | +0.143 |
Neg logit contributions | |
agree | -0.226 |
ies | -0.221 |
agreement | -0.215 |
happy | -0.206 |
ie | -0.205 |
Token | fun |
Token ID | 1257 |
Feature activation | -1.27e+00 |
Pos logit contributions | |
happy | +0.072 |
agreement | +0.070 |
agree | +0.070 |
ies | +0.068 |
forth | +0.062 |
Neg logit contributions | |
uch | -0.080 |
gh | -0.076 |
fault | -0.075 |
OP | -0.074 |
paste | -0.073 |
Token | . |
Token ID | 13 |
Feature activation | +4.24e-01 |
Pos logit contributions | |
uch | +0.065 |
gh | +0.062 |
tresp | +0.060 |
fault | +0.060 |
paste | +0.059 |
Neg logit contributions | |
agree | -0.050 |
ies | -0.049 |
agreement | -0.045 |
happy | -0.044 |
ie | -0.042 |
Token | started |
Token ID | 2067 |
Feature activation | +2.64e+00 |
Pos logit contributions | |
fault | +0.029 |
uch | +0.029 |
gh | +0.028 |
tresp | +0.028 |
paste | +0.027 |
Neg logit contributions | |
agree | -0.022 |
agreement | -0.022 |
ies | -0.021 |
ie | -0.019 |
ashing | -0.019 |
Token | to |
Token ID | 284 |
Feature activation | -5.34e+00 |
Pos logit contributions | |
agree | +0.121 |
agreement | +0.119 |
ies | +0.118 |
ie | +0.108 |
happy | +0.105 |
Neg logit contributions | |
fault | -0.109 |
uch | -0.108 |
gh | -0.104 |
tresp | -0.101 |
paste | -0.097 |
Token | swing |
Token ID | 9628 |
Feature activation | +3.68e+00 |
Pos logit contributions | |
fault | +0.267 |
uch | +0.260 |
gh | +0.255 |
tresp | +0.241 |
gency | +0.237 |
Neg logit contributions | |
ies | -0.192 |
agreement | -0.183 |
reci | -0.177 |
agree | -0.172 |
ashing | -0.169 |
Token | back |
Token ID | 736 |
Feature activation | +5.97e-01 |
Pos logit contributions | |
agree | +0.134 |
agreement | +0.130 |
ies | +0.129 |
ie | +0.116 |
happy | +0.116 |
Neg logit contributions | |
uch | -0.191 |
fault | -0.188 |
tresp | -0.179 |
gh | -0.179 |
paste | -0.173 |
Token | and |
Token ID | 290 |
Feature activation | -5.73e-01 |
Pos logit contributions | |
agree | +0.023 |
agreement | +0.023 |
ies | +0.022 |
reci | +0.021 |
ie | +0.020 |
Neg logit contributions | |
fault | -0.029 |
uch | -0.028 |
gh | -0.027 |
tresp | -0.027 |
paste | -0.025 |
Token | forth |
Token ID | 6071 |
Feature activation | +1.60e+01 |
Pos logit contributions | |
nothing | +0.032 |
fault | +0.032 |
anything | +0.032 |
uch | +0.031 |
gh | +0.031 |
Neg logit contributions | |
agreement | -0.015 |
reci | -0.014 |
ies | -0.014 |
agree | -0.013 |
ashing | -0.012 |
Token | . |
Token ID | 13 |
Feature activation | +4.17e-01 |
Pos logit contributions | |
agree | +0.489 |
agreement | +0.467 |
ies | +0.466 |
happy | +0.439 |
ie | +0.405 |
Neg logit contributions | |
uch | -0.928 |
fault | -0.918 |
tresp | -0.869 |
paste | -0.846 |
OP | -0.840 |
Token | |
Token ID | 220 |
Feature activation | +1.96e+00 |
Pos logit contributions | |
ies | +0.022 |
agree | +0.020 |
agreement | +0.019 |
ie | +0.019 |
forth | +0.017 |
Neg logit contributions | |
fault | -0.016 |
OP | -0.015 |
nothing | -0.015 |
Ow | -0.015 |
an | -0.015 |
Token |
|
Token ID | 198 |
Feature activation | +8.64e-01 |
Pos logit contributions | |
agreement | +0.070 |
ies | +0.069 |
agree | +0.066 |
ie | +0.062 |
forth | +0.058 |
Neg logit contributions | |
fault | -0.104 |
uch | -0.102 |
gh | -0.098 |
OP | -0.095 |
tresp | -0.093 |
Token |
|
Token ID | 198 |
Feature activation | +8.61e-01 |
Pos logit contributions | |
ies | +0.038 |
agreement | +0.037 |
agree | +0.035 |
forth | +0.034 |
ie | +0.032 |
Neg logit contributions | |
gh | -0.038 |
uch | -0.037 |
OP | -0.035 |
fault | -0.035 |
Ow | -0.033 |
Token | But |
Token ID | 1537 |
Feature activation | +4.47e-01 |
Pos logit contributions | |
ies | +0.036 |
agree | +0.033 |
agreement | +0.032 |
ie | +0.030 |
smile | +0.029 |
Neg logit contributions | |
gh | -0.042 |
uch | -0.042 |
OP | -0.039 |
rain | -0.038 |
tresp | -0.037 |
Token | let |
Token ID | 1309 |
Feature activation | +6.44e+00 |
Pos logit contributions | |
agree | +0.009 |
agreement | +0.009 |
ies | +0.009 |
ashing | +0.008 |
ie | +0.008 |
Neg logit contributions | |
uch | -0.009 |
fault | -0.009 |
gh | -0.009 |
tresp | -0.009 |
paste | -0.009 |
Token | 's |
Token ID | 338 |
Feature activation | +1.71e+00 |
Pos logit contributions | |
agree | +0.258 |
ies | +0.256 |
agreement | +0.254 |
ie | +0.245 |
happy | +0.229 |
Neg logit contributions | |
fault | -0.303 |
uch | -0.296 |
gh | -0.286 |
tresp | -0.284 |
ses | -0.277 |
Token | rock |
Token ID | 3881 |
Feature activation | -2.76e+00 |
Pos logit contributions | |
agree | +0.052 |
agreement | +0.051 |
ies | +0.051 |
ie | +0.044 |
ashing | +0.043 |
Neg logit contributions | |
fault | -0.096 |
uch | -0.096 |
gh | -0.093 |
tresp | -0.091 |
paste | -0.088 |
Token | back |
Token ID | 736 |
Feature activation | +5.95e-01 |
Pos logit contributions | |
fault | +0.138 |
uch | +0.136 |
gh | +0.133 |
tresp | +0.130 |
gency | +0.126 |
Neg logit contributions | |
agree | -0.104 |
ies | -0.104 |
agreement | -0.099 |
ie | -0.094 |
happy | -0.091 |
Token | and |
Token ID | 290 |
Feature activation | -5.75e-01 |
Pos logit contributions | |
agree | +0.025 |
agreement | +0.024 |
ies | +0.024 |
ie | +0.022 |
happy | +0.022 |
Neg logit contributions | |
fault | -0.027 |
uch | -0.027 |
gh | -0.026 |
tresp | -0.025 |
paste | -0.024 |
Token | forth |
Token ID | 6071 |
Feature activation | +1.60e+01 |
Pos logit contributions | |
uch | +0.029 |
fault | +0.028 |
gh | +0.026 |
paste | +0.026 |
tresp | +0.026 |
Neg logit contributions | |
agreement | -0.021 |
ies | -0.020 |
agree | -0.020 |
ashing | -0.019 |
reci | -0.019 |
Token | to |
Token ID | 284 |
Feature activation | -5.35e+00 |
Pos logit contributions | |
agree | +0.498 |
ies | +0.494 |
agreement | +0.455 |
happy | +0.454 |
ie | +0.412 |
Neg logit contributions | |
uch | -0.909 |
fault | -0.894 |
tresp | -0.874 |
gh | -0.850 |
OP | -0.837 |
Token | make |
Token ID | 787 |
Feature activation | +3.19e+00 |
Pos logit contributions | |
fault | +0.290 |
uch | +0.280 |
gh | +0.272 |
tresp | +0.270 |
anything | +0.263 |
Neg logit contributions | |
ies | -0.177 |
agree | -0.164 |
agreement | -0.161 |
ashing | -0.153 |
reci | -0.151 |
Token | you |
Token ID | 345 |
Feature activation | -1.20e+00 |
Pos logit contributions | |
agree | +0.142 |
ies | +0.139 |
agreement | +0.136 |
happy | +0.131 |
ie | +0.129 |
Neg logit contributions | |
uch | -0.141 |
fault | -0.136 |
gh | -0.132 |
tresp | -0.132 |
gency | -0.129 |
Token | feel |
Token ID | 1254 |
Feature activation | +2.60e+00 |
Pos logit contributions | |
fault | +0.073 |
uch | +0.072 |
gh | +0.071 |
tresp | +0.070 |
gency | +0.068 |
Neg logit contributions | |
agree | -0.033 |
ies | -0.031 |
agreement | -0.031 |
happy | -0.026 |
ie | -0.026 |
Token | better |
Token ID | 1365 |
Feature activation | -2.73e+00 |
Pos logit contributions | |
agree | +0.115 |
agreement | +0.113 |
ies | +0.112 |
ie | +0.102 |
happy | +0.101 |
Neg logit contributions | |
fault | -0.112 |
uch | -0.111 |
gh | -0.107 |
tresp | -0.104 |
paste | -0.100 |
Token | fast |
Token ID | 3049 |
Feature activation | -1.02e+00 |
Pos logit contributions | |
fault | +0.143 |
uch | +0.142 |
gh | +0.136 |
tresp | +0.131 |
paste | +0.127 |
Neg logit contributions | |
agree | -0.174 |
agreement | -0.171 |
ies | -0.169 |
ie | -0.156 |
happy | -0.155 |
Token | , |
Token ID | 11 |
Feature activation | +2.10e-01 |
Pos logit contributions | |
uch | +0.058 |
fault | +0.056 |
gh | +0.056 |
tresp | +0.053 |
OP | +0.052 |
Neg logit contributions | |
agree | -0.033 |
ies | -0.033 |
agreement | -0.032 |
ie | -0.028 |
happy | -0.028 |
Token | running |
Token ID | 2491 |
Feature activation | +5.32e+00 |
Pos logit contributions | |
agree | +0.007 |
agreement | +0.007 |
ies | +0.007 |
happy | +0.007 |
ie | +0.006 |
Neg logit contributions | |
uch | -0.012 |
fault | -0.011 |
gh | -0.011 |
tresp | -0.011 |
OP | -0.011 |
Token | back |
Token ID | 736 |
Feature activation | +5.95e-01 |
Pos logit contributions | |
ies | +0.153 |
agree | +0.146 |
agreement | +0.140 |
happy | +0.134 |
ie | +0.128 |
Neg logit contributions | |
uch | -0.333 |
fault | -0.325 |
gh | -0.306 |
tresp | -0.301 |
OP | -0.297 |
Token | and |
Token ID | 290 |
Feature activation | -5.75e-01 |
Pos logit contributions | |
agree | +0.029 |
agreement | +0.029 |
ies | +0.027 |
reci | +0.026 |
ie | +0.026 |
Neg logit contributions | |
fault | -0.023 |
uch | -0.022 |
gh | -0.021 |
tresp | -0.021 |
paste | -0.020 |
Token | forth |
Token ID | 6071 |
Feature activation | +1.60e+01 |
Pos logit contributions | |
nothing | +0.026 |
uch | +0.026 |
gh | +0.026 |
fault | +0.026 |
gency | +0.026 |
Neg logit contributions | |
agree | -0.022 |
agreement | -0.021 |
reci | -0.021 |
ies | -0.019 |
ogged | -0.019 |
Token | with |
Token ID | 351 |
Feature activation | +3.55e+00 |
Pos logit contributions | |
agree | +0.493 |
ies | +0.488 |
agreement | +0.476 |
happy | +0.426 |
ie | +0.424 |
Neg logit contributions | |
uch | -0.910 |
fault | -0.910 |
tresp | -0.859 |
gh | -0.853 |
paste | -0.832 |
Token | them |
Token ID | 606 |
Feature activation | +1.17e+00 |
Pos logit contributions | |
agree | +0.091 |
agreement | +0.087 |
ies | +0.085 |
ie | +0.072 |
happy | +0.070 |
Neg logit contributions | |
fault | -0.220 |
uch | -0.220 |
gh | -0.213 |
tresp | -0.210 |
paste | -0.205 |
Token | , |
Token ID | 11 |
Feature activation | +2.13e-01 |
Pos logit contributions | |
agreement | +0.047 |
agree | +0.047 |
ies | +0.044 |
reci | +0.042 |
ie | +0.041 |
Neg logit contributions | |
fault | -0.055 |
uch | -0.053 |
gh | -0.053 |
tresp | -0.051 |
paste | -0.050 |
Token | feeling |
Token ID | 4203 |
Feature activation | +3.13e+00 |
Pos logit contributions | |
agree | +0.009 |
agreement | +0.008 |
ies | +0.008 |
ie | +0.007 |
reci | +0.007 |
Neg logit contributions | |
fault | -0.010 |
uch | -0.010 |
gh | -0.010 |
tresp | -0.009 |
paste | -0.009 |
Token | so |
Token ID | 523 |
Feature activation | -9.69e+00 |
Pos logit contributions | |
agree | +0.122 |
agreement | +0.119 |
ies | +0.118 |
ie | +0.107 |
ashing | +0.103 |
Neg logit contributions | |
fault | -0.151 |
uch | -0.150 |
gh | -0.145 |
tresp | -0.143 |
paste | -0.139 |
Token | and |
Token ID | 290 |
Feature activation | -5.61e-01 |
Pos logit contributions | |
agree | +0.040 |
agreement | +0.038 |
ies | +0.036 |
happy | +0.033 |
ie | +0.031 |
Neg logit contributions | |
uch | -0.072 |
fault | -0.070 |
tresp | -0.068 |
gh | -0.068 |
paste | -0.065 |
Token | he |
Token ID | 339 |
Feature activation | +3.12e+00 |
Pos logit contributions | |
uch | +0.024 |
fault | +0.024 |
gh | +0.023 |
tresp | +0.022 |
paste | +0.021 |
Neg logit contributions | |
agree | -0.025 |
agreement | -0.025 |
ies | -0.024 |
happy | -0.022 |
ie | -0.022 |
Token | swings |
Token ID | 26728 |
Feature activation | -1.96e+00 |
Pos logit contributions | |
agree | +0.138 |
ies | +0.132 |
agreement | +0.127 |
happy | +0.121 |
play | +0.120 |
Neg logit contributions | |
uch | -0.136 |
gh | -0.136 |
fault | -0.133 |
tresp | -0.128 |
gency | -0.123 |
Token | back |
Token ID | 736 |
Feature activation | +5.95e-01 |
Pos logit contributions | |
uch | +0.115 |
fault | +0.110 |
tresp | +0.106 |
gh | +0.104 |
paste | +0.102 |
Neg logit contributions | |
agree | -0.062 |
agreement | -0.061 |
ies | -0.059 |
happy | -0.055 |
ie | -0.052 |
Token | and |
Token ID | 290 |
Feature activation | -5.75e-01 |
Pos logit contributions | |
agree | +0.023 |
agreement | +0.023 |
ies | +0.022 |
ie | +0.020 |
reci | +0.020 |
Neg logit contributions | |
fault | -0.029 |
uch | -0.028 |
gh | -0.027 |
tresp | -0.027 |
paste | -0.026 |
Token | forth |
Token ID | 6071 |
Feature activation | +1.60e+01 |
Pos logit contributions | |
happens | +0.034 |
fault | +0.033 |
uch | +0.033 |
anything | +0.033 |
tresp | +0.033 |
Neg logit contributions | |
agreement | -0.014 |
agree | -0.013 |
ashing | -0.012 |
ies | -0.012 |
reci | -0.011 |
Token | . |
Token ID | 13 |
Feature activation | +4.26e-01 |
Pos logit contributions | |
agree | +0.485 |
ies | +0.470 |
agreement | +0.457 |
happy | +0.437 |
ie | +0.391 |
Neg logit contributions | |
uch | -0.935 |
fault | -0.913 |
tresp | -0.874 |
paste | -0.848 |
gency | -0.844 |
Token | He |
Token ID | 679 |
Feature activation | +1.21e+00 |
Pos logit contributions | |
ies | +0.018 |
agree | +0.017 |
agreement | +0.017 |
ie | +0.017 |
reci | +0.015 |
Neg logit contributions | |
fault | -0.020 |
uch | -0.019 |
gh | -0.018 |
ses | -0.018 |
tresp | -0.018 |
Token | feels |
Token ID | 5300 |
Feature activation | -2.95e+00 |
Pos logit contributions | |
agree | +0.054 |
ies | +0.051 |
agreement | +0.050 |
happy | +0.048 |
ie | +0.046 |
Neg logit contributions | |
uch | -0.054 |
gh | -0.053 |
fault | -0.052 |
tresp | -0.049 |
gency | -0.048 |
Token | the |
Token ID | 262 |
Feature activation | +1.53e+00 |
Pos logit contributions | |
uch | +0.167 |
fault | +0.167 |
gh | +0.161 |
tresp | +0.157 |
gency | +0.153 |
Neg logit contributions | |
agree | -0.093 |
agreement | -0.090 |
ies | -0.086 |
happy | -0.080 |
ie | -0.075 |
Token | wind |
Token ID | 2344 |
Feature activation | -2.93e+00 |
Pos logit contributions | |
agreement | +0.049 |
agree | +0.049 |
ies | +0.047 |
happy | +0.044 |
smile | +0.042 |
Neg logit contributions | |
fault | -0.083 |
uch | -0.082 |
gh | -0.082 |
tresp | -0.079 |
gency | -0.079 |
Token | rocks |
Token ID | 12586 |
Feature activation | -2.58e+00 |
Pos logit contributions | |
uch | +0.182 |
gh | +0.176 |
fault | +0.175 |
paste | +0.169 |
tresp | +0.167 |
Neg logit contributions | |
agree | -0.105 |
ies | -0.104 |
agreement | -0.101 |
happy | -0.095 |
ie | -0.092 |
Token | and |
Token ID | 290 |
Feature activation | -5.71e-01 |
Pos logit contributions | |
fault | +0.127 |
uch | +0.122 |
gh | +0.118 |
tresp | +0.117 |
paste | +0.113 |
Neg logit contributions | |
agreement | -0.098 |
agree | -0.097 |
ies | -0.090 |
reci | -0.084 |
ie | -0.081 |
Token | fish |
Token ID | 5916 |
Feature activation | -2.30e+00 |
Pos logit contributions | |
uch | +0.024 |
fault | +0.023 |
tresp | +0.023 |
gh | +0.023 |
tricked | +0.022 |
Neg logit contributions | |
agreement | -0.025 |
agree | -0.025 |
ies | -0.024 |
ashing | -0.022 |
ie | -0.022 |
Token | back |
Token ID | 736 |
Feature activation | +5.95e-01 |
Pos logit contributions | |
fault | +0.112 |
uch | +0.110 |
gh | +0.107 |
tresp | +0.105 |
paste | +0.101 |
Neg logit contributions | |
agreement | -0.088 |
agree | -0.087 |
ies | -0.084 |
ie | -0.077 |
reci | -0.075 |
Token | and |
Token ID | 290 |
Feature activation | -5.75e-01 |
Pos logit contributions | |
agree | +0.026 |
agreement | +0.025 |
ies | +0.025 |
ie | +0.023 |
happy | +0.023 |
Neg logit contributions | |
fault | -0.026 |
uch | -0.026 |
gh | -0.025 |
tresp | -0.024 |
paste | -0.023 |
Token | forth |
Token ID | 6071 |
Feature activation | +1.60e+01 |
Pos logit contributions | |
uch | +0.033 |
tresp | +0.033 |
fault | +0.033 |
nothing | +0.032 |
gh | +0.032 |
Neg logit contributions | |
agreement | -0.015 |
agree | -0.014 |
ashing | -0.013 |
ies | -0.012 |
reci | -0.012 |
Token | until |
Token ID | 1566 |
Feature activation | +6.58e+00 |
Pos logit contributions | |
agree | +0.441 |
ies | +0.417 |
agreement | +0.394 |
happy | +0.383 |
ie | +0.335 |
Neg logit contributions | |
uch | -1.006 |
fault | -0.963 |
tresp | -0.929 |
paste | -0.917 |
gh | -0.910 |
Token | it |
Token ID | 340 |
Feature activation | -3.12e+00 |
Pos logit contributions | |
agree | +0.190 |
agreement | +0.182 |
happy | +0.181 |
ies | +0.178 |
family | +0.170 |
Neg logit contributions | |
uch | -0.386 |
fault | -0.386 |
paste | -0.375 |
osaurus | -0.367 |
gh | -0.366 |
Token | was |
Token ID | 373 |
Feature activation | -3.62e+00 |
Pos logit contributions | |
gency | +0.169 |
fault | +0.169 |
tresp | +0.162 |
paste | +0.158 |
ses | +0.156 |
Neg logit contributions | |
ies | -0.105 |
agree | -0.103 |
ie | -0.102 |
happy | -0.097 |
play | -0.091 |
Token | time |
Token ID | 640 |
Feature activation | -2.57e+00 |
Pos logit contributions | |
uch | +0.176 |
fault | +0.175 |
gh | +0.167 |
tresp | +0.165 |
paste | +0.165 |
Neg logit contributions | |
agree | -0.140 |
agreement | -0.139 |
ies | -0.136 |
happy | -0.125 |
ie | -0.123 |
Token | to |
Token ID | 284 |
Feature activation | -5.36e+00 |
Pos logit contributions | |
uch | +0.116 |
fault | +0.115 |
gh | +0.110 |
tresp | +0.108 |
paste | +0.106 |
Neg logit contributions | |
agree | -0.108 |
agreement | -0.105 |
ies | -0.105 |
ie | -0.095 |
happy | -0.094 |
Token | and |
Token ID | 290 |
Feature activation | -5.82e-01 |
Pos logit contributions | |
uch | +0.086 |
fault | +0.084 |
tresp | +0.079 |
paste | +0.078 |
gh | +0.077 |
Neg logit contributions | |
agree | -0.047 |
agreement | -0.045 |
ies | -0.043 |
happy | -0.041 |
ie | -0.038 |
Token | practiced |
Token ID | 19893 |
Feature activation | -1.48e+00 |
Pos logit contributions | |
fault | +0.031 |
uch | +0.030 |
tresp | +0.028 |
gh | +0.028 |
ses | +0.027 |
Neg logit contributions | |
agree | -0.022 |
agreement | -0.020 |
ies | -0.020 |
happy | -0.019 |
ie | -0.019 |
Token | , |
Token ID | 11 |
Feature activation | +2.13e-01 |
Pos logit contributions | |
uch | +0.092 |
fault | +0.090 |
tresp | +0.086 |
gh | +0.085 |
paste | +0.084 |
Neg logit contributions | |
agree | -0.040 |
agreement | -0.038 |
ies | -0.037 |
happy | -0.033 |
ie | -0.031 |
Token | back |
Token ID | 736 |
Feature activation | +6.04e-01 |
Pos logit contributions | |
agree | +0.008 |
agreement | +0.008 |
ies | +0.008 |
happy | +0.007 |
ie | +0.007 |
Neg logit contributions | |
uch | -0.010 |
fault | -0.010 |
gh | -0.010 |
tresp | -0.010 |
paste | -0.009 |
Token | and |
Token ID | 290 |
Feature activation | -5.68e-01 |
Pos logit contributions | |
agree | +0.028 |
agreement | +0.027 |
ies | +0.027 |
ie | +0.025 |
reci | +0.025 |
Neg logit contributions | |
fault | -0.025 |
uch | -0.023 |
gh | -0.023 |
tresp | -0.022 |
paste | -0.022 |
Token | forth |
Token ID | 6071 |
Feature activation | +1.60e+01 |
Pos logit contributions | |
uch | +0.037 |
fault | +0.037 |
gh | +0.037 |
tresp | +0.035 |
paste | +0.035 |
Neg logit contributions | |
agreement | -0.011 |
agree | -0.011 |
ies | -0.011 |
ashing | -0.009 |
ie | -0.009 |
Token | and |
Token ID | 290 |
Feature activation | -5.71e-01 |
Pos logit contributions | |
agree | +0.419 |
ies | +0.401 |
agreement | +0.395 |
happy | +0.369 |
ie | +0.348 |
Neg logit contributions | |
uch | -1.007 |
fault | -0.998 |
tresp | -0.936 |
ses | -0.923 |
OP | -0.911 |
Token | round |
Token ID | 2835 |
Feature activation | +2.17e-01 |
Pos logit contributions | |
fault | +0.030 |
uch | +0.030 |
gh | +0.029 |
tresp | +0.028 |
paste | +0.027 |
Neg logit contributions | |
agree | -0.019 |
agreement | -0.019 |
ies | -0.019 |
ie | -0.017 |
ashing | -0.017 |
Token | and |
Token ID | 290 |
Feature activation | -5.72e-01 |
Pos logit contributions | |
ies | +0.007 |
agree | +0.007 |
agreement | +0.006 |
ie | +0.006 |
happy | +0.006 |
Neg logit contributions | |
uch | -0.013 |
fault | -0.013 |
tresp | -0.012 |
gh | -0.012 |
gency | -0.012 |
Token | round |
Token ID | 2835 |
Feature activation | +2.11e-01 |
Pos logit contributions | |
uch | +0.023 |
fault | +0.023 |
gh | +0.023 |
tresp | +0.022 |
paste | +0.021 |
Neg logit contributions | |
agree | -0.026 |
agreement | -0.026 |
ies | -0.026 |
ie | -0.024 |
ashing | -0.023 |
Token | . |
Token ID | 13 |
Feature activation | +4.39e-01 |
Pos logit contributions | |
ies | +0.006 |
agree | +0.006 |
agreement | +0.006 |
ie | +0.005 |
happy | +0.005 |
Neg logit contributions | |
uch | -0.013 |
fault | -0.012 |
tresp | -0.012 |
gh | -0.012 |
gency | -0.012 |
Token | . |
Token ID | 13 |
Feature activation | +4.30e-01 |
Pos logit contributions | |
fault | +0.004 |
uch | +0.004 |
gh | +0.004 |
tresp | +0.004 |
paste | +0.004 |
Neg logit contributions | |
agree | -0.003 |
agreement | -0.003 |
ies | -0.003 |
ie | -0.003 |
happy | -0.003 |
Token | They |
Token ID | 1119 |
Feature activation | +4.13e+00 |
Pos logit contributions | |
ies | +0.016 |
agree | +0.016 |
agreement | +0.015 |
ie | +0.015 |
reci | +0.014 |
Neg logit contributions | |
fault | -0.022 |
gh | -0.020 |
paste | -0.020 |
uch | -0.020 |
OP | -0.020 |
Token | rocked |
Token ID | 36872 |
Feature activation | -1.68e+00 |
Pos logit contributions | |
agreement | +0.169 |
ies | +0.162 |
agree | +0.158 |
reci | +0.152 |
ashing | +0.148 |
Neg logit contributions | |
uch | -0.183 |
fault | -0.171 |
gh | -0.171 |
tricked | -0.170 |
tresp | -0.169 |
Token | back |
Token ID | 736 |
Feature activation | +5.89e-01 |
Pos logit contributions | |
fault | +0.082 |
uch | +0.081 |
gh | +0.078 |
tresp | +0.077 |
paste | +0.074 |
Neg logit contributions | |
agree | -0.066 |
agreement | -0.064 |
ies | -0.063 |
ie | -0.057 |
happy | -0.056 |
Token | and |
Token ID | 290 |
Feature activation | -5.68e-01 |
Pos logit contributions | |
agreement | +0.023 |
agree | +0.023 |
ies | +0.021 |
reci | +0.020 |
ie | +0.019 |
Neg logit contributions | |
fault | -0.029 |
uch | -0.028 |
gh | -0.027 |
tresp | -0.027 |
paste | -0.026 |
Token | forth |
Token ID | 6071 |
Feature activation | +1.60e+01 |
Pos logit contributions | |
uch | +0.034 |
gh | +0.034 |
nothing | +0.033 |
gency | +0.033 |
sed | +0.032 |
Neg logit contributions | |
agreement | -0.014 |
agree | -0.012 |
reci | -0.012 |
ies | -0.011 |
ashing | -0.010 |
Token | , |
Token ID | 11 |
Feature activation | +2.13e-01 |
Pos logit contributions | |
agree | +0.441 |
ies | +0.421 |
agreement | +0.410 |
happy | +0.373 |
ie | +0.347 |
Neg logit contributions | |
uch | -0.984 |
fault | -0.971 |
tresp | -0.931 |
gh | -0.913 |
paste | -0.900 |
Token | laughing |
Token ID | 14376 |
Feature activation | +7.96e-01 |
Pos logit contributions | |
agree | +0.009 |
agreement | +0.008 |
ies | +0.008 |
reci | +0.008 |
ie | +0.008 |
Neg logit contributions | |
fault | -0.010 |
uch | -0.009 |
gh | -0.009 |
sed | -0.009 |
nothing | -0.009 |
Token | and |
Token ID | 290 |
Feature activation | -5.71e-01 |
Pos logit contributions | |
agree | +0.024 |
agreement | +0.023 |
ies | +0.023 |
happy | +0.020 |
ie | +0.019 |
Neg logit contributions | |
uch | -0.047 |
fault | -0.046 |
gh | -0.045 |
tresp | -0.044 |
gency | -0.043 |
Token | c |
Token ID | 269 |
Feature activation | -4.43e+00 |
Pos logit contributions | |
fault | +0.024 |
uch | +0.024 |
gh | +0.024 |
tresp | +0.023 |
sed | +0.023 |
Neg logit contributions | |
agreement | -0.024 |
agree | -0.024 |
ies | -0.023 |
reci | -0.022 |
ie | -0.022 |
Token | udd |
Token ID | 4185 |
Feature activation | +2.53e+00 |
Pos logit contributions | |
uch | +0.213 |
fault | +0.210 |
gh | +0.204 |
tresp | +0.194 |
paste | +0.193 |
Neg logit contributions | |
agreement | -0.179 |
agree | -0.178 |
ies | -0.168 |
ie | -0.155 |
happy | -0.152 |
Token | the |
Token ID | 262 |
Feature activation | +1.54e+00 |
Pos logit contributions | |
agree | +0.173 |
agreement | +0.170 |
ies | +0.161 |
ashing | +0.159 |
reci | +0.151 |
Neg logit contributions | |
fault | -0.152 |
uch | -0.144 |
gh | -0.143 |
tresp | -0.142 |
anything | -0.140 |
Token | water |
Token ID | 1660 |
Feature activation | -3.39e+00 |
Pos logit contributions | |
agree | +0.070 |
ies | +0.064 |
agreement | +0.063 |
ashing | +0.062 |
ie | +0.060 |
Neg logit contributions | |
uch | -0.060 |
fault | -0.059 |
tresp | -0.058 |
happens | -0.056 |
gh | -0.056 |
Token | moved |
Token ID | 3888 |
Feature activation | -1.92e+00 |
Pos logit contributions | |
fault | +0.156 |
uch | +0.155 |
gh | +0.149 |
tresp | +0.146 |
paste | +0.141 |
Neg logit contributions | |
agree | -0.140 |
agreement | -0.136 |
ies | -0.135 |
ie | -0.123 |
happy | -0.120 |
Token | back |
Token ID | 736 |
Feature activation | +5.89e-01 |
Pos logit contributions | |
uch | +0.108 |
fault | +0.106 |
gh | +0.100 |
tresp | +0.100 |
ses | +0.098 |
Neg logit contributions | |
ies | -0.063 |
agree | -0.062 |
agreement | -0.062 |
ie | -0.057 |
happy | -0.055 |
Token | and |
Token ID | 290 |
Feature activation | -5.68e-01 |
Pos logit contributions | |
agree | +0.029 |
agreement | +0.028 |
ies | +0.027 |
reci | +0.025 |
ie | +0.025 |
Neg logit contributions | |
fault | -0.023 |
uch | -0.022 |
gh | -0.022 |
tresp | -0.021 |
paste | -0.020 |
Token | forth |
Token ID | 6071 |
Feature activation | +1.60e+01 |
Pos logit contributions | |
nothing | +0.026 |
anything | +0.026 |
fault | +0.026 |
uch | +0.025 |
happens | +0.025 |
Neg logit contributions | |
agree | -0.022 |
agreement | -0.020 |
ies | -0.019 |
ashing | -0.019 |
reci | -0.018 |
Token | . |
Token ID | 13 |
Feature activation | +4.31e-01 |
Pos logit contributions | |
ies | +0.507 |
agree | +0.504 |
agreement | +0.487 |
happy | +0.449 |
ie | +0.438 |
Neg logit contributions | |
uch | -0.907 |
fault | -0.891 |
tresp | -0.851 |
gh | -0.823 |
gency | -0.817 |
Token | She |
Token ID | 1375 |
Feature activation | +3.46e+00 |
Pos logit contributions | |
ies | +0.017 |
agreement | +0.016 |
agree | +0.016 |
ie | +0.015 |
forth | +0.014 |
Neg logit contributions | |
fault | -0.021 |
OP | -0.020 |
uch | -0.020 |
ses | -0.019 |
paste | -0.019 |
Token | saw |
Token ID | 2497 |
Feature activation | +5.18e+00 |
Pos logit contributions | |
agree | +0.133 |
agreement | +0.128 |
ies | +0.128 |
ie | +0.115 |
happy | +0.114 |
Neg logit contributions | |
fault | -0.173 |
uch | -0.170 |
gh | -0.165 |
tresp | -0.161 |
paste | -0.156 |
Token | fish |
Token ID | 5916 |
Feature activation | -2.30e+00 |
Pos logit contributions | |
agree | +0.227 |
ies | +0.226 |
agreement | +0.224 |
happy | +0.213 |
ie | +0.209 |
Neg logit contributions | |
uch | -0.232 |
fault | -0.223 |
gh | -0.221 |
tresp | -0.210 |
gency | -0.206 |
Token | swimming |
Token ID | 14899 |
Feature activation | +1.74e+00 |
Pos logit contributions | |
uch | +0.097 |
fault | +0.096 |
tresp | +0.094 |
gh | +0.094 |
tricked | +0.088 |
Neg logit contributions | |
agreement | -0.098 |
agree | -0.098 |
ies | -0.089 |
reci | -0.088 |
ashing | -0.084 |
Token | pass |
Token ID | 1208 |
Feature activation | +3.88e+00 |
Pos logit contributions | |
gh | +0.302 |
uch | +0.299 |
fault | +0.294 |
OP | +0.284 |
tresp | +0.284 |
Neg logit contributions | |
ies | -0.162 |
agreement | -0.149 |
ashing | -0.142 |
agree | -0.134 |
reci | -0.127 |
Token | the |
Token ID | 262 |
Feature activation | +1.54e+00 |
Pos logit contributions | |
ies | +0.117 |
agree | +0.113 |
agreement | +0.102 |
ie | +0.100 |
happy | +0.100 |
Neg logit contributions | |
uch | -0.237 |
fault | -0.221 |
gh | -0.221 |
tresp | -0.217 |
gency | -0.215 |
Token | puzzle |
Token ID | 15027 |
Feature activation | -5.06e+00 |
Pos logit contributions | |
agree | +0.077 |
agreement | +0.075 |
ies | +0.074 |
happy | +0.071 |
ie | +0.068 |
Neg logit contributions | |
uch | -0.058 |
fault | -0.056 |
gh | -0.055 |
tresp | -0.053 |
paste | -0.053 |
Token | back |
Token ID | 736 |
Feature activation | +5.77e-01 |
Pos logit contributions | |
fault | +0.266 |
uch | +0.263 |
gh | +0.256 |
tresp | +0.250 |
paste | +0.244 |
Neg logit contributions | |
agree | -0.174 |
agreement | -0.171 |
ies | -0.167 |
ie | -0.149 |
ashing | -0.144 |
Token | and |
Token ID | 290 |
Feature activation | -5.74e-01 |
Pos logit contributions | |
agree | +0.025 |
agreement | +0.025 |
ies | +0.024 |
ie | +0.022 |
happy | +0.022 |
Neg logit contributions | |
fault | -0.025 |
uch | -0.025 |
gh | -0.024 |
tresp | -0.023 |
paste | -0.023 |
Token | forth |
Token ID | 6071 |
Feature activation | +1.60e+01 |
Pos logit contributions | |
fault | +0.025 |
nothing | +0.025 |
anything | +0.025 |
happens | +0.024 |
tricked | +0.023 |
Neg logit contributions | |
agreement | -0.023 |
ashing | -0.022 |
ies | -0.021 |
ogging | -0.021 |
reci | -0.020 |
Token | and |
Token ID | 290 |
Feature activation | -5.68e-01 |
Pos logit contributions | |
agree | +0.511 |
ies | +0.507 |
agreement | +0.480 |
happy | +0.443 |
ie | +0.433 |
Neg logit contributions | |
uch | -0.914 |
fault | -0.886 |
tresp | -0.848 |
gh | -0.834 |
gency | -0.818 |
Token | work |
Token ID | 670 |
Feature activation | +3.89e+00 |
Pos logit contributions | |
fault | +0.027 |
nothing | +0.026 |
anything | +0.025 |
gh | +0.025 |
tresp | +0.025 |
Neg logit contributions | |
agreement | -0.021 |
ies | -0.021 |
ashing | -0.021 |
ie | -0.020 |
agree | -0.019 |
Token | together |
Token ID | 1978 |
Feature activation | +5.31e+00 |
Pos logit contributions | |
agree | +0.089 |
ies | +0.085 |
agreement | +0.079 |
happy | +0.078 |
ie | +0.070 |
Neg logit contributions | |
uch | -0.259 |
fault | -0.249 |
ses | -0.242 |
sed | -0.240 |
tresp | -0.239 |
Token | . |
Token ID | 13 |
Feature activation | +4.39e-01 |
Pos logit contributions | |
agree | +0.200 |
agreement | +0.198 |
ies | +0.194 |
ie | +0.174 |
ashing | +0.170 |
Neg logit contributions | |
fault | -0.263 |
uch | -0.258 |
gh | -0.253 |
tresp | -0.245 |
paste | -0.239 |
Token | And |
Token ID | 843 |
Feature activation | -2.29e+00 |
Pos logit contributions | |
ies | +0.018 |
agree | +0.017 |
agreement | +0.017 |
forth | +0.015 |
ie | +0.015 |
Neg logit contributions | |
fault | -0.022 |
uch | -0.019 |
gh | -0.019 |
OP | -0.019 |
Ow | -0.019 |
Token | He |
Token ID | 679 |
Feature activation | +1.21e+00 |
Pos logit contributions | |
ies | +0.019 |
agree | +0.016 |
agreement | +0.016 |
ie | +0.016 |
forth | +0.015 |
Neg logit contributions | |
fault | -0.020 |
OP | -0.018 |
gh | -0.018 |
uch | -0.018 |
gency | -0.018 |
Token | would |
Token ID | 561 |
Feature activation | -3.11e+00 |
Pos logit contributions | |
agree | +0.035 |
ies | +0.035 |
ie | +0.034 |
happy | +0.033 |
agreement | +0.032 |
Neg logit contributions | |
fault | -0.073 |
uch | -0.069 |
gh | -0.066 |
tresp | -0.066 |
OP | -0.065 |
Token | rock |
Token ID | 3881 |
Feature activation | -2.78e+00 |
Pos logit contributions | |
fault | +0.162 |
uch | +0.162 |
gh | +0.157 |
tresp | +0.154 |
paste | +0.149 |
Neg logit contributions | |
agree | -0.108 |
agreement | -0.105 |
ies | -0.105 |
ie | -0.092 |
ashing | -0.091 |
Token | back |
Token ID | 736 |
Feature activation | +5.77e-01 |
Pos logit contributions | |
fault | +0.131 |
uch | +0.131 |
gh | +0.126 |
tresp | +0.123 |
paste | +0.119 |
Neg logit contributions | |
agree | -0.111 |
agreement | -0.109 |
ies | -0.107 |
ie | -0.097 |
happy | -0.095 |
Token | and |
Token ID | 290 |
Feature activation | -5.74e-01 |
Pos logit contributions | |
agree | +0.027 |
agreement | +0.027 |
ies | +0.026 |
reci | +0.024 |
ie | +0.024 |
Neg logit contributions | |
fault | -0.023 |
uch | -0.023 |
gh | -0.022 |
tresp | -0.021 |
paste | -0.020 |
Token | forth |
Token ID | 6071 |
Feature activation | +1.60e+01 |
Pos logit contributions | |
uch | +0.032 |
fault | +0.030 |
gh | +0.030 |
nothing | +0.030 |
anything | +0.030 |
Neg logit contributions | |
agree | -0.017 |
agreement | -0.017 |
ies | -0.016 |
ashing | -0.016 |
reci | -0.014 |
Token | all |
Token ID | 477 |
Feature activation | +3.66e+00 |
Pos logit contributions | |
agree | +0.497 |
ies | +0.487 |
agreement | +0.478 |
ie | +0.427 |
happy | +0.426 |
Neg logit contributions | |
fault | -0.911 |
uch | -0.909 |
tresp | -0.861 |
gh | -0.852 |
paste | -0.832 |
Token | day |
Token ID | 1110 |
Feature activation | +4.87e+00 |
Pos logit contributions | |
agree | +0.152 |
rolling | +0.127 |
agreement | +0.127 |
eline | +0.121 |
ies | +0.116 |
Neg logit contributions | |
fault | -0.165 |
anything | -0.163 |
such | -0.154 |
gh | -0.152 |
wrong | -0.152 |
Token | long |
Token ID | 890 |
Feature activation | +2.57e-01 |
Pos logit contributions | |
agree | +0.205 |
agreement | +0.199 |
ies | +0.198 |
ie | +0.180 |
happy | +0.178 |
Neg logit contributions | |
fault | -0.220 |
uch | -0.219 |
gh | -0.209 |
tresp | -0.206 |
paste | -0.199 |
Token | , |
Token ID | 11 |
Feature activation | +2.22e-01 |
Pos logit contributions | |
agree | +0.006 |
ies | +0.006 |
agreement | +0.006 |
ie | +0.006 |
happy | +0.006 |
Neg logit contributions | |
uch | -0.017 |
fault | -0.016 |
OP | -0.015 |
gency | -0.015 |
ses | -0.015 |
Token | until |
Token ID | 1566 |
Feature activation | +6.57e+00 |
Pos logit contributions | |
agree | +0.009 |
agreement | +0.009 |
ies | +0.009 |
ie | +0.008 |
reci | +0.007 |
Neg logit contributions | |
fault | -0.011 |
uch | -0.010 |
gh | -0.010 |
tresp | -0.010 |
paste | -0.010 |
Token | and |
Token ID | 290 |
Feature activation | -7.24e-01 |
Pos logit contributions | |
agree | +0.002 |
agreement | +0.002 |
ies | +0.002 |
happy | +0.002 |
ie | +0.002 |
Neg logit contributions | |
uch | -0.003 |
fault | -0.003 |
gh | -0.002 |
tresp | -0.002 |
gency | -0.002 |
Token | it |
Token ID | 340 |
Feature activation | -3.26e+00 |
Pos logit contributions | |
fault | +0.029 |
uch | +0.028 |
gh | +0.027 |
tresp | +0.026 |
paste | +0.025 |
Neg logit contributions | |
agree | -0.035 |
ies | -0.034 |
agreement | -0.034 |
ie | -0.032 |
happy | -0.032 |
Token | 's |
Token ID | 338 |
Feature activation | +1.55e+00 |
Pos logit contributions | |
fault | +0.138 |
gh | +0.137 |
gency | +0.126 |
uch | +0.124 |
OP | +0.122 |
Neg logit contributions | |
ies | -0.149 |
ie | -0.143 |
agree | -0.142 |
happy | -0.136 |
reci | -0.135 |
Token | all |
Token ID | 477 |
Feature activation | +3.51e+00 |
Pos logit contributions | |
happy | +0.066 |
ies | +0.063 |
agreement | +0.061 |
family | +0.060 |
ie | +0.059 |
Neg logit contributions | |
gh | -0.068 |
uch | -0.066 |
fault | -0.066 |
lor | -0.065 |
gency | -0.065 |
Token | my |
Token ID | 616 |
Feature activation | -1.54e+00 |
Pos logit contributions | |
agree | +0.151 |
ies | +0.151 |
agreement | +0.147 |
happy | +0.145 |
ie | +0.145 |
Neg logit contributions | |
gh | -0.151 |
fault | -0.150 |
uch | -0.146 |
paste | -0.139 |
gency | -0.139 |
Token | fault |
Token ID | 8046 |
Feature activation | -2.09e+01 |
Pos logit contributions | |
uch | +0.091 |
gh | +0.089 |
fault | +0.089 |
tresp | +0.085 |
paste | +0.085 |
Neg logit contributions | |
agree | -0.043 |
ies | -0.042 |
agreement | -0.041 |
happy | -0.036 |
ie | -0.036 |
Token | ," |
Token ID | 553 |
Feature activation | -2.82e+00 |
Pos logit contributions | |
uch | +0.711 |
fault | +0.707 |
gh | +0.678 |
tresp | +0.652 |
paste | +0.629 |
Neg logit contributions | |
agree | -1.109 |
ies | -1.097 |
agreement | -1.082 |
ie | -1.017 |
happy | -0.994 |
Token | he |
Token ID | 339 |
Feature activation | +2.97e+00 |
Pos logit contributions | |
fault | +0.143 |
uch | +0.143 |
gh | +0.138 |
tresp | +0.135 |
paste | +0.131 |
Neg logit contributions | |
agree | -0.102 |
agreement | -0.100 |
ies | -0.098 |
ie | -0.088 |
happy | -0.086 |
Token | said |
Token ID | 531 |
Feature activation | -6.31e-01 |
Pos logit contributions | |
agree | +0.078 |
ies | +0.074 |
agreement | +0.072 |
happy | +0.068 |
ie | +0.066 |
Neg logit contributions | |
fault | -0.198 |
uch | -0.185 |
gh | -0.176 |
tresp | -0.175 |
gency | -0.171 |
Token | , |
Token ID | 11 |
Feature activation | +6.16e-02 |
Pos logit contributions | |
uch | +0.034 |
fault | +0.033 |
gh | +0.032 |
tresp | +0.032 |
paste | +0.030 |
Neg logit contributions | |
agree | -0.022 |
agreement | -0.021 |
ies | -0.021 |
happy | -0.019 |
ie | -0.019 |
Token | weeping |
Token ID | 45705 |
Feature activation | +1.95e+00 |
Pos logit contributions | |
agree | +0.002 |
agreement | +0.002 |
ies | +0.002 |
ie | +0.002 |
ashing | +0.002 |
Neg logit contributions | |
fault | -0.003 |
uch | -0.003 |
gh | -0.003 |
tresp | -0.003 |
paste | -0.003 |
Token | No |
Token ID | 2949 |
Feature activation | -6.11e+00 |
Pos logit contributions | |
agree | +0.281 |
agreement | +0.276 |
ies | +0.265 |
happy | +0.249 |
ie | +0.240 |
Neg logit contributions | |
fault | -0.269 |
uch | -0.258 |
gh | -0.253 |
tresp | -0.245 |
gency | -0.236 |
Token | , |
Token ID | 11 |
Feature activation | +7.44e-02 |
Pos logit contributions | |
uch | +0.429 |
paste | +0.408 |
fault | +0.401 |
lor | +0.399 |
OP | +0.397 |
Neg logit contributions | |
agree | -0.130 |
ies | -0.130 |
agreement | -0.118 |
ie | -0.115 |
family | -0.105 |
Token | it |
Token ID | 340 |
Feature activation | -3.26e+00 |
Pos logit contributions | |
agreement | +0.003 |
agree | +0.003 |
happy | +0.003 |
ies | +0.003 |
family | +0.003 |
Neg logit contributions | |
uch | -0.003 |
fault | -0.003 |
gh | -0.003 |
OP | -0.003 |
osaurus | -0.003 |
Token | 's |
Token ID | 338 |
Feature activation | +1.57e+00 |
Pos logit contributions | |
gency | +0.168 |
fault | +0.163 |
OP | +0.157 |
ses | +0.153 |
paste | +0.152 |
Neg logit contributions | |
ies | -0.120 |
ie | -0.118 |
happy | -0.106 |
girl | -0.101 |
agree | -0.100 |
Token | your |
Token ID | 534 |
Feature activation | -5.72e-01 |
Pos logit contributions | |
happy | +0.068 |
ies | +0.061 |
forth | +0.059 |
ie | +0.054 |
friends | +0.054 |
Neg logit contributions | |
lor | -0.086 |
gency | -0.076 |
OP | -0.073 |
paste | -0.072 |
osaurus | -0.070 |
Token | fault |
Token ID | 8046 |
Feature activation | -2.09e+01 |
Pos logit contributions | |
uch | +0.014 |
paste | +0.013 |
gency | +0.012 |
gh | +0.012 |
lor | +0.012 |
Neg logit contributions | |
ies | -0.036 |
agree | -0.036 |
agreement | -0.035 |
family | -0.035 |
happy | -0.034 |
Token | , |
Token ID | 11 |
Feature activation | +5.77e-02 |
Pos logit contributions | |
uch | +0.923 |
fault | +0.911 |
gh | +0.880 |
tresp | +0.862 |
paste | +0.832 |
Neg logit contributions | |
agree | -0.909 |
ies | -0.896 |
agreement | -0.878 |
ie | -0.814 |
happy | -0.791 |
Token | you |
Token ID | 345 |
Feature activation | -1.35e+00 |
Pos logit contributions | |
agreement | +0.002 |
ies | +0.002 |
agree | +0.002 |
happy | +0.002 |
ie | +0.002 |
Neg logit contributions | |
uch | -0.003 |
gh | -0.002 |
fault | -0.002 |
OP | -0.002 |
gency | -0.002 |
Token | were |
Token ID | 547 |
Feature activation | +4.42e-02 |
Pos logit contributions | |
gh | +0.057 |
fault | +0.055 |
paste | +0.052 |
uch | +0.052 |
tresp | +0.051 |
Neg logit contributions | |
agree | -0.063 |
ies | -0.061 |
happy | -0.056 |
agreement | -0.055 |
ie | -0.054 |
Token | too |
Token ID | 1165 |
Feature activation | -6.93e+00 |
Pos logit contributions | |
happy | +0.002 |
friends | +0.002 |
enthusiastic | +0.002 |
ies | +0.002 |
forth | +0.002 |
Neg logit contributions | |
lor | -0.002 |
OP | -0.002 |
gency | -0.002 |
ses | -0.002 |
fault | -0.002 |
Token | close |
Token ID | 1969 |
Feature activation | +4.52e+00 |
Pos logit contributions | |
fault | +0.308 |
uch | +0.307 |
gh | +0.294 |
tresp | +0.288 |
paste | +0.276 |
Neg logit contributions | |
agree | -0.300 |
agreement | -0.295 |
ies | -0.289 |
ie | -0.265 |
ashing | -0.257 |
Token |
|
Token ID | 198 |
Feature activation | +7.08e-01 |
Pos logit contributions | |
ies | +0.030 |
agreement | +0.029 |
forth | +0.028 |
ie | +0.027 |
agree | +0.026 |
Neg logit contributions | |
gh | -0.030 |
uch | -0.029 |
fault | -0.029 |
OP | -0.029 |
tresp | -0.027 |
Token | " |
Token ID | 1 |
Feature activation | +6.18e+00 |
Pos logit contributions | |
ies | +0.032 |
ie | +0.031 |
agreement | +0.030 |
smile | +0.028 |
agree | +0.028 |
Neg logit contributions | |
uch | -0.030 |
gh | -0.029 |
OP | -0.027 |
w | -0.027 |
tresp | -0.027 |
Token | It |
Token ID | 1026 |
Feature activation | -2.47e+00 |
Pos logit contributions | |
agree | +0.280 |
agreement | +0.274 |
ies | +0.255 |
happy | +0.244 |
reci | +0.243 |
Neg logit contributions | |
fault | -0.282 |
uch | -0.263 |
gh | -0.259 |
tresp | -0.251 |
gency | -0.246 |
Token | 's |
Token ID | 338 |
Feature activation | +1.57e+00 |
Pos logit contributions | |
gency | +0.134 |
paste | +0.131 |
fault | +0.130 |
gh | +0.130 |
lor | +0.127 |
Neg logit contributions | |
ie | -0.084 |
ies | -0.083 |
happy | -0.075 |
play | -0.073 |
agree | -0.071 |
Token | your |
Token ID | 534 |
Feature activation | -5.56e-01 |
Pos logit contributions | |
happy | +0.070 |
ies | +0.060 |
friends | +0.058 |
forth | +0.057 |
play | +0.056 |
Neg logit contributions | |
lor | -0.088 |
paste | -0.075 |
gency | -0.074 |
flake | -0.070 |
anger | -0.069 |
Token | fault |
Token ID | 8046 |
Feature activation | -2.09e+01 |
Pos logit contributions | |
uch | +0.025 |
paste | +0.024 |
gh | +0.024 |
gency | +0.023 |
lor | +0.023 |
Neg logit contributions | |
agree | -0.024 |
ies | -0.023 |
agreement | -0.022 |
happy | -0.022 |
family | -0.021 |
Token | , |
Token ID | 11 |
Feature activation | +6.92e-02 |
Pos logit contributions | |
uch | +0.839 |
fault | +0.833 |
gh | +0.802 |
tresp | +0.779 |
paste | +0.753 |
Neg logit contributions | |
agree | -0.989 |
ies | -0.972 |
agreement | -0.961 |
ie | -0.889 |
happy | -0.868 |
Token | Ben |
Token ID | 3932 |
Feature activation | -3.19e+00 |
Pos logit contributions | |
agree | +0.003 |
agreement | +0.003 |
ies | +0.003 |
happy | +0.003 |
ie | +0.003 |
Neg logit contributions | |
uch | -0.003 |
fault | -0.003 |
gh | -0.003 |
gency | -0.003 |
OP | -0.003 |
Token | !" |
Token ID | 2474 |
Feature activation | -7.17e+00 |
Pos logit contributions | |
fault | +0.144 |
uch | +0.143 |
gh | +0.138 |
tresp | +0.133 |
paste | +0.131 |
Neg logit contributions | |
agree | -0.136 |
ies | -0.134 |
agreement | -0.129 |
ie | -0.121 |
happy | -0.117 |
Token | Lily |
Token ID | 20037 |
Feature activation | +2.10e+00 |
Pos logit contributions | |
fault | +0.389 |
uch | +0.376 |
gh | +0.373 |
tresp | +0.369 |
paste | +0.366 |
Neg logit contributions | |
agree | -0.227 |
ies | -0.222 |
agreement | -0.220 |
ie | -0.201 |
ashing | -0.194 |
Token | said |
Token ID | 531 |
Feature activation | -6.18e-01 |
Pos logit contributions | |
agree | +0.058 |
ies | +0.052 |
happy | +0.046 |
agreement | +0.045 |
ie | +0.042 |
Neg logit contributions | |
fault | -0.140 |
uch | -0.136 |
gh | -0.134 |
tresp | -0.128 |
gency | -0.126 |
Token | . |
Token ID | 13 |
Feature activation | +2.85e-01 |
Pos logit contributions | |
fault | +0.144 |
uch | +0.140 |
gh | +0.135 |
tresp | +0.133 |
paste | +0.127 |
Neg logit contributions | |
agree | -0.119 |
agreement | -0.119 |
ies | -0.115 |
reci | -0.104 |
ashing | -0.103 |
Token | It |
Token ID | 632 |
Feature activation | -4.52e+00 |
Pos logit contributions | |
agree | +0.014 |
agreement | +0.013 |
ies | +0.013 |
ie | +0.012 |
happy | +0.012 |
Neg logit contributions | |
uch | -0.011 |
fault | -0.011 |
gh | -0.011 |
tresp | -0.011 |
paste | -0.010 |
Token | 's |
Token ID | 338 |
Feature activation | +1.55e+00 |
Pos logit contributions | |
gh | +0.224 |
gency | +0.222 |
fault | +0.205 |
OP | +0.205 |
uch | +0.197 |
Neg logit contributions | |
happy | -0.177 |
ies | -0.173 |
ie | -0.168 |
play | -0.167 |
agree | -0.167 |
Token | not |
Token ID | 407 |
Feature activation | -7.42e+00 |
Pos logit contributions | |
happy | +0.070 |
ies | +0.066 |
agreement | +0.065 |
agree | +0.064 |
family | +0.061 |
Neg logit contributions | |
gency | -0.069 |
uch | -0.068 |
gh | -0.068 |
OP | -0.066 |
lor | -0.064 |
Token | your |
Token ID | 534 |
Feature activation | -5.66e-01 |
Pos logit contributions | |
uch | +0.372 |
gh | +0.364 |
fault | +0.355 |
gency | +0.353 |
lor | +0.342 |
Neg logit contributions | |
ies | -0.286 |
agree | -0.279 |
happy | -0.276 |
agreement | -0.262 |
ie | -0.252 |
Token | fault |
Token ID | 8046 |
Feature activation | -2.09e+01 |
Pos logit contributions | |
uch | +0.025 |
gh | +0.023 |
paste | +0.023 |
gency | +0.022 |
tresp | +0.022 |
Neg logit contributions | |
agree | -0.026 |
ies | -0.025 |
agreement | -0.025 |
happy | -0.023 |
family | -0.023 |
Token | . |
Token ID | 13 |
Feature activation | +2.68e-01 |
Pos logit contributions | |
fault | +1.008 |
uch | +0.987 |
gh | +0.951 |
tresp | +0.929 |
paste | +0.902 |
Neg logit contributions | |
agree | -0.824 |
agreement | -0.814 |
ies | -0.779 |
reci | -0.709 |
ie | -0.706 |
Token | Fl |
Token ID | 1610 |
Feature activation | -1.05e+01 |
Pos logit contributions | |
agree | +0.012 |
agreement | +0.012 |
ies | +0.011 |
reci | +0.010 |
happy | +0.010 |
Neg logit contributions | |
uch | -0.012 |
gh | -0.011 |
fault | -0.011 |
tresp | -0.011 |
tricked | -0.010 |
Token | uffy |
Token ID | 15352 |
Feature activation | -2.70e+00 |
Pos logit contributions | |
fault | +0.544 |
uch | +0.541 |
gh | +0.531 |
tresp | +0.518 |
paste | +0.506 |
Neg logit contributions | |
agree | -0.370 |
ies | -0.359 |
agreement | -0.352 |
ie | -0.315 |
ashing | -0.311 |
Token | was |
Token ID | 373 |
Feature activation | -3.79e+00 |
Pos logit contributions | |
gh | +0.102 |
fault | +0.100 |
gency | +0.097 |
OP | +0.096 |
uch | +0.094 |
Neg logit contributions | |
ies | -0.135 |
agree | -0.132 |
happy | -0.127 |
ie | -0.125 |
play | -0.122 |
Token | old |
Token ID | 1468 |
Feature activation | +4.73e+00 |
Pos logit contributions | |
uch | +0.182 |
OP | +0.182 |
gency | +0.181 |
gh | +0.170 |
lor | +0.170 |
Neg logit contributions | |
happy | -0.177 |
ies | -0.144 |
agreement | -0.142 |
family | -0.141 |
enthusiastic | -0.139 |
Token | that |
Token ID | 326 |
Feature activation | -1.28e+00 |
Pos logit contributions | |
agree | +0.003 |
agreement | +0.003 |
ies | +0.003 |
ie | +0.002 |
happy | +0.002 |
Neg logit contributions | |
fault | -0.004 |
uch | -0.004 |
gh | -0.004 |
tresp | -0.003 |
paste | -0.003 |
Token | it |
Token ID | 340 |
Feature activation | -3.26e+00 |
Pos logit contributions | |
fault | +0.061 |
uch | +0.060 |
gh | +0.058 |
tresp | +0.057 |
paste | +0.055 |
Neg logit contributions | |
agree | -0.051 |
agreement | -0.050 |
ies | -0.049 |
ie | -0.044 |
happy | -0.043 |
Token | was |
Token ID | 373 |
Feature activation | -3.78e+00 |
Pos logit contributions | |
fault | +0.164 |
gh | +0.159 |
uch | +0.158 |
gency | +0.154 |
OP | +0.152 |
Neg logit contributions | |
ies | -0.120 |
ie | -0.118 |
agree | -0.114 |
happy | -0.111 |
agreement | -0.109 |
Token | his |
Token ID | 465 |
Feature activation | +2.41e-01 |
Pos logit contributions | |
uch | +0.160 |
fault | +0.147 |
gh | +0.143 |
gency | +0.143 |
paste | +0.143 |
Neg logit contributions | |
agreement | -0.178 |
happy | -0.176 |
ies | -0.170 |
agree | -0.167 |
ie | -0.161 |
Token | own |
Token ID | 898 |
Feature activation | +5.62e+00 |
Pos logit contributions | |
agree | +0.008 |
ies | +0.008 |
agreement | +0.008 |
happy | +0.007 |
family | +0.007 |
Neg logit contributions | |
uch | -0.013 |
gh | -0.013 |
paste | -0.013 |
gency | -0.012 |
tresp | -0.012 |
Token | fault |
Token ID | 8046 |
Feature activation | -2.09e+01 |
Pos logit contributions | |
agree | +0.234 |
agreement | +0.230 |
ies | +0.225 |
happy | +0.216 |
family | +0.209 |
Neg logit contributions | |
uch | -0.263 |
paste | -0.246 |
gh | -0.244 |
fault | -0.244 |
tresp | -0.242 |
Token | - |
Token ID | 532 |
Feature activation | -6.21e+00 |
Pos logit contributions | |
fault | +1.033 |
uch | +1.012 |
gh | +0.984 |
tresp | +0.964 |
paste | +0.934 |
Neg logit contributions | |
agree | -0.794 |
agreement | -0.778 |
ies | -0.754 |
ie | -0.676 |
reci | -0.673 |
Token | if |
Token ID | 611 |
Feature activation | +2.93e+00 |
Pos logit contributions | |
uch | +0.272 |
fault | +0.266 |
gh | +0.257 |
tresp | +0.246 |
paste | +0.241 |
Neg logit contributions | |
agree | -0.270 |
agreement | -0.266 |
ies | -0.261 |
happy | -0.248 |
ie | -0.240 |
Token | he |
Token ID | 339 |
Feature activation | +2.97e+00 |
Pos logit contributions | |
ies | +0.086 |
agree | +0.085 |
agreement | +0.081 |
happy | +0.080 |
family | +0.078 |
Neg logit contributions | |
uch | -0.167 |
fault | -0.167 |
gh | -0.157 |
ses | -0.157 |
paste | -0.155 |
Token | had |
Token ID | 550 |
Feature activation | +2.37e+00 |
Pos logit contributions | |
agree | +0.116 |
ies | +0.109 |
ie | +0.106 |
happy | +0.101 |
play | +0.099 |
Neg logit contributions | |
fault | -0.152 |
gh | -0.144 |
gency | -0.138 |
uch | -0.137 |
tresp | -0.133 |
Token | written |
Token ID | 3194 |
Feature activation | -9.48e+00 |
Pos logit contributions | |
agree | +0.107 |
happy | +0.106 |
ies | +0.105 |
agreement | +0.100 |
family | +0.100 |
Neg logit contributions | |
uch | -0.100 |
osaurus | -0.095 |
fault | -0.094 |
paste | -0.093 |
ses | -0.091 |
Token | awn |
Token ID | 3832 |
Feature activation | -6.17e+00 |
Pos logit contributions | |
fault | +0.488 |
uch | +0.484 |
gh | +0.465 |
tresp | +0.453 |
OP | +0.435 |
Neg logit contributions | |
agree | -0.328 |
agreement | -0.326 |
ies | -0.315 |
happy | -0.278 |
ie | -0.277 |
Token | â |
Token ID | 22940 |
Feature activation | -8.44e+00 |
Pos logit contributions | |
uch | +0.273 |
fault | +0.273 |
gh | +0.261 |
tresp | +0.258 |
paste | +0.249 |
Neg logit contributions | |
agree | -0.263 |
agreement | -0.260 |
ies | -0.248 |
ie | -0.226 |
reci | -0.224 |
Token | € |
Token ID | 26391 |
Feature activation | -1.30e+01 |
Pos logit contributions | |
uch | +0.316 |
fault | +0.308 |
gh | +0.297 |
tresp | +0.285 |
paste | +0.270 |
Neg logit contributions | |
agree | -0.423 |
ies | -0.418 |
agreement | -0.416 |
ie | -0.380 |
happy | -0.378 |
Token | ™ |
Token ID | 8151 |
Feature activation | -1.17e+01 |
Pos logit contributions | |
uch | +0.585 |
fault | +0.582 |
gh | +0.562 |
tresp | +0.545 |
paste | +0.524 |
Neg logit contributions | |
agree | -0.546 |
agreement | -0.546 |
ies | -0.529 |
ie | -0.487 |
reci | -0.472 |
Token | s |
Token ID | 82 |
Feature activation | +3.72e+00 |
Pos logit contributions | |
uch | +0.360 |
fault | +0.352 |
gh | +0.336 |
tresp | +0.313 |
paste | +0.294 |
Neg logit contributions | |
agree | -0.668 |
agreement | -0.666 |
ies | -0.651 |
reci | -0.602 |
ie | -0.601 |
Token | fault |
Token ID | 8046 |
Feature activation | -2.09e+01 |
Pos logit contributions | |
agree | +0.163 |
agreement | +0.160 |
ies | +0.159 |
happy | +0.145 |
ie | +0.144 |
Neg logit contributions | |
uch | -0.161 |
fault | -0.158 |
gh | -0.154 |
tresp | -0.148 |
paste | -0.144 |
Token | . |
Token ID | 13 |
Feature activation | +2.89e-01 |
Pos logit contributions | |
fault | +1.008 |
uch | +0.982 |
gh | +0.955 |
tresp | +0.938 |
paste | +0.905 |
Neg logit contributions | |
agree | -0.818 |
agreement | -0.800 |
ies | -0.774 |
reci | -0.701 |
ie | -0.700 |
Token | |
Token ID | 220 |
Feature activation | +1.81e+00 |
Pos logit contributions | |
ies | +0.011 |
agree | +0.011 |
agreement | +0.010 |
ie | +0.010 |
ashing | +0.009 |
Neg logit contributions | |
fault | -0.014 |
uch | -0.014 |
tresp | -0.014 |
gh | -0.013 |
paste | -0.013 |
Token |
|
Token ID | 198 |
Feature activation | +7.00e-01 |
Pos logit contributions | |
agreement | +0.054 |
agree | +0.054 |
ies | +0.053 |
ie | +0.046 |
ashing | +0.043 |
Neg logit contributions | |
fault | -0.104 |
uch | -0.102 |
gh | -0.099 |
tresp | -0.097 |
paste | -0.095 |
Token |
|
Token ID | 198 |
Feature activation | +7.03e-01 |
Pos logit contributions | |
ies | +0.026 |
agree | +0.024 |
agreement | +0.024 |
ie | +0.022 |
forth | +0.022 |
Neg logit contributions | |
uch | -0.035 |
gh | -0.035 |
fault | -0.034 |
tresp | -0.034 |
OP | -0.033 |
Token | The |
Token ID | 464 |
Feature activation | +3.18e+00 |
Pos logit contributions | |
ies | +0.031 |
agree | +0.030 |
agreement | +0.029 |
ie | +0.028 |
ashing | +0.026 |
Neg logit contributions | |
uch | -0.031 |
gh | -0.030 |
tresp | -0.029 |
fault | -0.029 |
paste | -0.028 |
Token | . |
Token ID | 13 |
Feature activation | +3.12e-01 |
Pos logit contributions | |
ies | +0.084 |
agree | +0.083 |
agreement | +0.079 |
ie | +0.075 |
happy | +0.070 |
Neg logit contributions | |
uch | -0.137 |
fault | -0.133 |
tresp | -0.128 |
gh | -0.128 |
gency | -0.125 |
Token | It |
Token ID | 632 |
Feature activation | -4.52e+00 |
Pos logit contributions | |
agree | +0.014 |
agreement | +0.014 |
ies | +0.014 |
ie | +0.013 |
happy | +0.012 |
Neg logit contributions | |
uch | -0.013 |
fault | -0.013 |
gh | -0.012 |
tresp | -0.012 |
paste | -0.011 |
Token | 's |
Token ID | 338 |
Feature activation | +1.57e+00 |
Pos logit contributions | |
gh | +0.242 |
gency | +0.236 |
fault | +0.232 |
uch | +0.224 |
tresp | +0.223 |
Neg logit contributions | |
ies | -0.157 |
agree | -0.150 |
ie | -0.148 |
happy | -0.144 |
play | -0.134 |
Token | not |
Token ID | 407 |
Feature activation | -7.40e+00 |
Pos logit contributions | |
happy | +0.072 |
ies | +0.067 |
agreement | +0.065 |
agree | +0.064 |
ie | +0.062 |
Neg logit contributions | |
lor | -0.069 |
uch | -0.069 |
gency | -0.068 |
gh | -0.067 |
OP | -0.066 |
Token | your |
Token ID | 534 |
Feature activation | -5.56e-01 |
Pos logit contributions | |
uch | +0.348 |
gh | +0.339 |
fault | +0.333 |
gency | +0.322 |
paste | +0.320 |
Neg logit contributions | |
agree | -0.308 |
ies | -0.307 |
happy | -0.290 |
agreement | -0.288 |
ie | -0.273 |
Token | fault |
Token ID | 8046 |
Feature activation | -2.09e+01 |
Pos logit contributions | |
uch | +0.024 |
paste | +0.022 |
gh | +0.022 |
tresp | +0.021 |
gency | +0.021 |
Neg logit contributions | |
agree | -0.027 |
agreement | -0.025 |
ies | -0.025 |
happy | -0.024 |
family | -0.024 |
Token | . |
Token ID | 13 |
Feature activation | +2.69e-01 |
Pos logit contributions | |
fault | +0.973 |
uch | +0.969 |
gh | +0.932 |
tresp | +0.911 |
paste | +0.883 |
Neg logit contributions | |
agree | -0.850 |
agreement | -0.831 |
ies | -0.820 |
ie | -0.744 |
happy | -0.727 |
Token | The |
Token ID | 383 |
Feature activation | +4.17e-01 |
Pos logit contributions | |
agree | +0.012 |
agreement | +0.012 |
ies | +0.011 |
happy | +0.011 |
reci | +0.011 |
Neg logit contributions | |
uch | -0.011 |
gh | -0.011 |
fault | -0.011 |
tresp | -0.010 |
tricked | -0.010 |
Token | rope |
Token ID | 17182 |
Feature activation | -6.61e+00 |
Pos logit contributions | |
agreement | +0.017 |
agree | +0.016 |
ies | +0.015 |
family | +0.015 |
happy | +0.015 |
Neg logit contributions | |
uch | -0.020 |
gh | -0.020 |
fault | -0.019 |
OP | -0.019 |
paste | -0.018 |
Token | was |
Token ID | 373 |
Feature activation | -3.77e+00 |
Pos logit contributions | |
fault | +0.237 |
gh | +0.236 |
uch | +0.231 |
gency | +0.228 |
tresp | +0.220 |
Neg logit contributions | |
ies | -0.333 |
agree | -0.330 |
ie | -0.314 |
agreement | -0.307 |
happy | -0.304 |
Token | old |
Token ID | 1468 |
Feature activation | +4.73e+00 |
Pos logit contributions | |
uch | +0.179 |
lor | +0.172 |
gh | +0.166 |
gency | +0.165 |
OP | +0.162 |
Neg logit contributions | |
happy | -0.167 |
ies | -0.149 |
agreement | -0.144 |
sunny | -0.137 |
family | -0.136 |
Token | No |
Token ID | 2949 |
Feature activation | -6.13e+00 |
Pos logit contributions | |
agree | +0.258 |
agreement | +0.248 |
ies | +0.229 |
happy | +0.226 |
reci | +0.211 |
Neg logit contributions | |
fault | -0.295 |
uch | -0.281 |
gh | -0.277 |
tresp | -0.276 |
gency | -0.264 |
Token | , |
Token ID | 11 |
Feature activation | +6.98e-02 |
Pos logit contributions | |
uch | +0.422 |
paste | +0.413 |
lor | +0.405 |
angs | +0.402 |
fault | +0.400 |
Neg logit contributions | |
agree | -0.136 |
ies | -0.131 |
agreement | -0.118 |
ie | -0.112 |
smile | -0.105 |
Token | it |
Token ID | 340 |
Feature activation | -3.26e+00 |
Pos logit contributions | |
agreement | +0.003 |
agree | +0.003 |
happy | +0.003 |
ies | +0.003 |
family | +0.003 |
Neg logit contributions | |
uch | -0.003 |
fault | -0.003 |
gh | -0.003 |
OP | -0.003 |
osaurus | -0.003 |
Token | is |
Token ID | 318 |
Feature activation | -6.22e+00 |
Pos logit contributions | |
gency | +0.172 |
fault | +0.166 |
OP | +0.155 |
gh | +0.155 |
lor | +0.154 |
Neg logit contributions | |
ies | -0.118 |
ie | -0.115 |
happy | -0.101 |
play | -0.100 |
girl | -0.098 |
Token | your |
Token ID | 534 |
Feature activation | -5.54e-01 |
Pos logit contributions | |
lor | +0.339 |
gency | +0.283 |
uch | +0.276 |
OP | +0.275 |
paste | +0.274 |
Neg logit contributions | |
happy | -0.276 |
ies | -0.228 |
forth | -0.227 |
smile | -0.224 |
play | -0.219 |
Token | fault |
Token ID | 8046 |
Feature activation | -2.09e+01 |
Pos logit contributions | |
uch | +0.025 |
gency | +0.024 |
OP | +0.024 |
gh | +0.024 |
paste | +0.024 |
Neg logit contributions | |
ies | -0.024 |
happy | -0.023 |
agree | -0.023 |
family | -0.023 |
agreement | -0.022 |
Token | !" |
Token ID | 2474 |
Feature activation | -7.17e+00 |
Pos logit contributions | |
uch | +0.735 |
fault | +0.727 |
gh | +0.694 |
tresp | +0.672 |
gency | +0.644 |
Neg logit contributions | |
agree | -1.099 |
ies | -1.094 |
agreement | -1.070 |
ie | -1.011 |
happy | -0.984 |
Token | Lily |
Token ID | 20037 |
Feature activation | +2.10e+00 |
Pos logit contributions | |
fault | +0.421 |
uch | +0.419 |
gh | +0.407 |
tresp | +0.400 |
paste | +0.390 |
Neg logit contributions | |
agree | -0.207 |
agreement | -0.200 |
ies | -0.197 |
ie | -0.170 |
happy | -0.164 |
Token | said |
Token ID | 531 |
Feature activation | -6.18e-01 |
Pos logit contributions | |
agree | +0.057 |
ies | +0.049 |
agreement | +0.044 |
happy | +0.043 |
ie | +0.040 |
Neg logit contributions | |
fault | -0.142 |
uch | -0.137 |
gh | -0.136 |
tresp | -0.132 |
gency | -0.128 |
Token | . |
Token ID | 13 |
Feature activation | +2.87e-01 |
Pos logit contributions | |
fault | +0.030 |
uch | +0.030 |
gh | +0.029 |
tresp | +0.028 |
paste | +0.028 |
Neg logit contributions | |
agree | -0.024 |
agreement | -0.023 |
ies | -0.023 |
ie | -0.021 |
reci | -0.020 |
Token | " |
Token ID | 366 |
Feature activation | +1.68e+00 |
Pos logit contributions | |
ies | +0.012 |
agree | +0.012 |
agreement | +0.011 |
ie | +0.011 |
reci | +0.010 |
Neg logit contributions | |
fault | -0.013 |
uch | -0.012 |
gh | -0.012 |
tresp | -0.012 |
ses | -0.012 |
Token |
|
Token ID | 198 |
Feature activation | +7.65e-01 |
Pos logit contributions | |
ies | +0.029 |
agreement | +0.029 |
agree | +0.028 |
forth | +0.027 |
ashing | +0.026 |
Neg logit contributions | |
fault | -0.036 |
gh | -0.035 |
uch | -0.034 |
OP | -0.032 |
tresp | -0.032 |
Token | " |
Token ID | 1 |
Feature activation | +6.21e+00 |
Pos logit contributions | |
ies | +0.031 |
agreement | +0.030 |
ie | +0.028 |
agree | +0.028 |
ashing | +0.027 |
Neg logit contributions | |
gh | -0.036 |
uch | -0.035 |
RR | -0.032 |
OP | -0.032 |
tresp | -0.032 |
Token | It |
Token ID | 1026 |
Feature activation | -2.44e+00 |
Pos logit contributions | |
agree | +0.255 |
agreement | +0.247 |
ies | +0.233 |
happy | +0.220 |
ashing | +0.211 |
Neg logit contributions | |
fault | -0.299 |
uch | -0.293 |
gh | -0.284 |
tresp | -0.281 |
gency | -0.267 |
Token | is |
Token ID | 318 |
Feature activation | -6.21e+00 |
Pos logit contributions | |
gency | +0.135 |
gh | +0.133 |
tresp | +0.129 |
fault | +0.127 |
uch | +0.127 |
Neg logit contributions | |
ies | -0.082 |
ie | -0.079 |
happy | -0.074 |
agree | -0.073 |
play | -0.069 |
Token | your |
Token ID | 534 |
Feature activation | -5.50e-01 |
Pos logit contributions | |
uch | +0.297 |
lor | +0.295 |
gency | +0.289 |
OP | +0.272 |
paste | +0.270 |
Neg logit contributions | |
happy | -0.291 |
agreement | -0.246 |
ies | -0.245 |
family | -0.242 |
agree | -0.241 |
Token | fault |
Token ID | 8046 |
Feature activation | -2.09e+01 |
Pos logit contributions | |
uch | +0.030 |
gh | +0.028 |
gency | +0.028 |
paste | +0.028 |
OP | +0.028 |
Neg logit contributions | |
agree | -0.019 |
ies | -0.019 |
agreement | -0.018 |
happy | -0.018 |
family | -0.017 |
Token | !" |
Token ID | 2474 |
Feature activation | -7.16e+00 |
Pos logit contributions | |
fault | +0.859 |
uch | +0.854 |
gh | +0.817 |
tresp | +0.797 |
paste | +0.768 |
Neg logit contributions | |
agree | -0.962 |
agreement | -0.945 |
ies | -0.930 |
ie | -0.855 |
happy | -0.838 |
Token | Ben |
Token ID | 3932 |
Feature activation | -3.19e+00 |
Pos logit contributions | |
fault | +0.327 |
uch | +0.326 |
gh | +0.313 |
tresp | +0.306 |
paste | +0.297 |
Neg logit contributions | |
agree | -0.299 |
agreement | -0.293 |
ies | -0.289 |
ie | -0.263 |
happy | -0.256 |
Token | said |
Token ID | 531 |
Feature activation | -6.44e-01 |
Pos logit contributions | |
fault | +0.198 |
uch | +0.195 |
gh | +0.191 |
tresp | +0.185 |
gency | +0.180 |
Neg logit contributions | |
agree | -0.096 |
ies | -0.086 |
agreement | -0.085 |
happy | -0.076 |
ie | -0.073 |
Token | . |
Token ID | 13 |
Feature activation | +2.72e-01 |
Pos logit contributions | |
fault | +0.029 |
gh | +0.028 |
uch | +0.027 |
paste | +0.027 |
tresp | +0.026 |
Neg logit contributions | |
agreement | -0.026 |
agree | -0.026 |
ies | -0.026 |
reci | -0.024 |
ie | -0.024 |
Token | " |
Token ID | 366 |
Feature activation | +1.66e+00 |
Pos logit contributions | |
ies | +0.012 |
agree | +0.012 |
agreement | +0.011 |
ie | +0.011 |
reci | +0.010 |
Neg logit contributions | |
fault | -0.012 |
gh | -0.011 |
paste | -0.011 |
tresp | -0.011 |
uch | -0.011 |
Token | Lily |
Token ID | 20037 |
Feature activation | +2.15e+00 |
Pos logit contributions | |
agree | +0.004 |
agreement | +0.004 |
ies | +0.004 |
ie | +0.004 |
happy | +0.003 |
Neg logit contributions | |
fault | -0.006 |
uch | -0.006 |
gh | -0.006 |
tresp | -0.006 |
paste | -0.005 |
Token | . |
Token ID | 13 |
Feature activation | +3.13e-01 |
Pos logit contributions | |
agree | +0.085 |
agreement | +0.083 |
ies | +0.082 |
ie | +0.074 |
happy | +0.072 |
Neg logit contributions | |
fault | -0.103 |
uch | -0.103 |
gh | -0.099 |
tresp | -0.097 |
paste | -0.094 |
Token | It |
Token ID | 632 |
Feature activation | -4.50e+00 |
Pos logit contributions | |
agree | +0.014 |
agreement | +0.014 |
ies | +0.014 |
ie | +0.012 |
happy | +0.012 |
Neg logit contributions | |
uch | -0.013 |
fault | -0.013 |
gh | -0.013 |
tresp | -0.012 |
paste | -0.012 |
Token | was |
Token ID | 373 |
Feature activation | -3.76e+00 |
Pos logit contributions | |
gency | +0.259 |
fault | +0.254 |
gh | +0.248 |
lor | +0.239 |
OP | +0.235 |
Neg logit contributions | |
happy | -0.136 |
ie | -0.136 |
ies | -0.131 |
play | -0.128 |
girl | -0.128 |
Token | my |
Token ID | 616 |
Feature activation | -1.52e+00 |
Pos logit contributions | |
lor | +0.201 |
uch | +0.183 |
gency | +0.182 |
OP | +0.172 |
gh | +0.168 |
Neg logit contributions | |
happy | -0.164 |
family | -0.135 |
sunny | -0.131 |
tidy | -0.130 |
play | -0.129 |
Token | fault |
Token ID | 8046 |
Feature activation | -2.09e+01 |
Pos logit contributions | |
uch | +0.080 |
gh | +0.077 |
paste | +0.074 |
tresp | +0.074 |
gency | +0.073 |
Neg logit contributions | |
agree | -0.056 |
ies | -0.054 |
agreement | -0.054 |
happy | -0.050 |
family | -0.048 |
Token | . |
Token ID | 13 |
Feature activation | +2.85e-01 |
Pos logit contributions | |
fault | +1.001 |
uch | +0.979 |
gh | +0.946 |
tresp | +0.925 |
paste | +0.898 |
Neg logit contributions | |
agree | -0.826 |
agreement | -0.812 |
ies | -0.773 |
reci | -0.716 |
ashing | -0.701 |
Token | I |
Token ID | 314 |
Feature activation | -1.52e+00 |
Pos logit contributions | |
agree | +0.012 |
agreement | +0.012 |
ies | +0.011 |
happy | +0.011 |
reci | +0.011 |
Neg logit contributions | |
uch | -0.012 |
fault | -0.012 |
gh | -0.012 |
tresp | -0.012 |
tricked | -0.011 |
Token | was |
Token ID | 373 |
Feature activation | -3.78e+00 |
Pos logit contributions | |
fault | +0.069 |
gh | +0.069 |
uch | +0.067 |
gency | +0.066 |
tresp | +0.065 |
Neg logit contributions | |
agree | -0.067 |
ies | -0.062 |
happy | -0.057 |
agreement | -0.057 |
ie | -0.057 |
Token | not |
Token ID | 407 |
Feature activation | -7.42e+00 |
Pos logit contributions | |
lor | +0.184 |
OP | +0.180 |
gency | +0.176 |
uch | +0.173 |
gh | +0.168 |
Neg logit contributions | |
happy | -0.174 |
friends | -0.141 |
enthusiastic | -0.139 |
ies | -0.139 |
agree | -0.138 |
Token | careful |
Token ID | 8161 |
Feature activation | +4.23e+00 |
Pos logit contributions | |
uch | +0.385 |
fault | +0.373 |
gh | +0.371 |
paste | +0.354 |
tresp | +0.351 |
Neg logit contributions | |
agree | -0.271 |
ies | -0.264 |
agreement | -0.259 |
happy | -0.247 |
ie | -0.230 |
Token | said |
Token ID | 531 |
Feature activation | -6.18e-01 |
Pos logit contributions | |
fault | +0.034 |
tresp | +0.033 |
uch | +0.033 |
tricked | +0.032 |
gh | +0.032 |
Neg logit contributions | |
agreement | -0.027 |
agree | -0.026 |
ies | -0.026 |
ashing | -0.025 |
ie | -0.024 |
Token | it |
Token ID | 340 |
Feature activation | -3.26e+00 |
Pos logit contributions | |
uch | +0.038 |
fault | +0.037 |
gh | +0.035 |
gency | +0.035 |
OP | +0.035 |
Neg logit contributions | |
agree | -0.017 |
ies | -0.017 |
agreement | -0.016 |
happy | -0.015 |
ie | -0.014 |
Token | was |
Token ID | 373 |
Feature activation | -3.76e+00 |
Pos logit contributions | |
gency | +0.204 |
uch | +0.199 |
fault | +0.198 |
tresp | +0.194 |
lor | +0.192 |
Neg logit contributions | |
happy | -0.076 |
play | -0.073 |
ies | -0.073 |
agree | -0.069 |
ie | -0.062 |
Token | not |
Token ID | 407 |
Feature activation | -7.39e+00 |
Pos logit contributions | |
uch | +0.184 |
lor | +0.175 |
gency | +0.172 |
fault | +0.171 |
OP | +0.168 |
Neg logit contributions | |
happy | -0.152 |
agreement | -0.148 |
ies | -0.147 |
agree | -0.147 |
family | -0.136 |
Token | their |
Token ID | 511 |
Feature activation | +2.14e+00 |
Pos logit contributions | |
uch | +0.349 |
fault | +0.330 |
gh | +0.327 |
paste | +0.310 |
gency | +0.309 |
Neg logit contributions | |
agree | -0.313 |
ies | -0.306 |
agreement | -0.299 |
happy | -0.287 |
ie | -0.271 |
Token | fault |
Token ID | 8046 |
Feature activation | -2.09e+01 |
Pos logit contributions | |
agree | +0.165 |
agreement | +0.162 |
ies | +0.162 |
happy | +0.155 |
family | +0.153 |
Neg logit contributions | |
uch | -0.029 |
gh | -0.020 |
paste | -0.020 |
gency | -0.015 |
OP | -0.015 |
Token | . |
Token ID | 13 |
Feature activation | +2.98e-01 |
Pos logit contributions | |
fault | +0.970 |
gh | +0.882 |
uch | +0.881 |
tresp | +0.856 |
paste | +0.836 |
Neg logit contributions | |
agreement | -0.876 |
agree | -0.860 |
reci | -0.805 |
ies | -0.800 |
ashing | -0.739 |
Token | They |
Token ID | 1119 |
Feature activation | +4.00e+00 |
Pos logit contributions | |
agree | +0.007 |
agreement | +0.006 |
ies | +0.006 |
ie | +0.005 |
happy | +0.005 |
Neg logit contributions | |
fault | -0.019 |
uch | -0.019 |
gh | -0.019 |
tresp | -0.018 |
paste | -0.018 |
Token | said |
Token ID | 531 |
Feature activation | -6.21e-01 |
Pos logit contributions | |
agreement | +0.150 |
ies | +0.148 |
agree | +0.148 |
ashing | +0.135 |
ie | +0.132 |
Neg logit contributions | |
fault | -0.191 |
uch | -0.191 |
tresp | -0.185 |
gh | -0.180 |
tricked | -0.179 |
Token | the |
Token ID | 262 |
Feature activation | +1.41e+00 |
Pos logit contributions | |
uch | +0.040 |
fault | +0.038 |
gh | +0.037 |
tresp | +0.036 |
gency | +0.036 |
Neg logit contributions | |
agree | -0.016 |
ies | -0.015 |
agreement | -0.015 |
happy | -0.014 |
ie | -0.012 |
Token | man |
Token ID | 582 |
Feature activation | -2.23e+00 |
Pos logit contributions | |
agreement | +0.055 |
agree | +0.053 |
ies | +0.050 |
happy | +0.049 |
family | +0.049 |
Neg logit contributions | |
uch | -0.072 |
gh | -0.066 |
paste | -0.065 |
OP | -0.063 |
gency | -0.062 |
Token | Tom |
Token ID | 4186 |
Feature activation | -3.02e+00 |
Pos logit contributions | |
agree | +0.003 |
agreement | +0.003 |
ies | +0.003 |
happy | +0.003 |
ie | +0.003 |
Neg logit contributions | |
uch | -0.004 |
fault | -0.004 |
gh | -0.004 |
tresp | -0.003 |
paste | -0.003 |
Token | . |
Token ID | 13 |
Feature activation | +2.89e-01 |
Pos logit contributions | |
uch | +0.144 |
fault | +0.144 |
gh | +0.140 |
tresp | +0.135 |
paste | +0.133 |
Neg logit contributions | |
agree | -0.121 |
ies | -0.120 |
agreement | -0.116 |
ie | -0.109 |
happy | -0.105 |
Token | It |
Token ID | 632 |
Feature activation | -4.51e+00 |
Pos logit contributions | |
agree | +0.011 |
agreement | +0.011 |
ies | +0.011 |
happy | +0.010 |
family | +0.010 |
Neg logit contributions | |
gh | -0.014 |
uch | -0.014 |
fault | -0.013 |
tresp | -0.013 |
paste | -0.013 |
Token | was |
Token ID | 373 |
Feature activation | -3.75e+00 |
Pos logit contributions | |
gency | +0.245 |
fault | +0.245 |
gh | +0.243 |
OP | +0.224 |
paste | +0.222 |
Neg logit contributions | |
happy | -0.153 |
ies | -0.147 |
ie | -0.143 |
girl | -0.142 |
agree | -0.140 |
Token | my |
Token ID | 616 |
Feature activation | -1.52e+00 |
Pos logit contributions | |
lor | +0.192 |
gency | +0.188 |
uch | +0.183 |
OP | +0.181 |
paste | +0.176 |
Neg logit contributions | |
happy | -0.179 |
sunny | -0.135 |
friends | -0.134 |
cheerful | -0.133 |
family | -0.132 |
Token | fault |
Token ID | 8046 |
Feature activation | -2.09e+01 |
Pos logit contributions | |
uch | +0.087 |
gh | +0.086 |
paste | +0.082 |
tresp | +0.081 |
fault | +0.081 |
Neg logit contributions | |
agree | -0.048 |
agreement | -0.046 |
ies | -0.046 |
happy | -0.041 |
family | -0.039 |
Token | . |
Token ID | 13 |
Feature activation | +2.98e-01 |
Pos logit contributions | |
fault | +0.990 |
uch | +0.970 |
gh | +0.935 |
tresp | +0.919 |
paste | +0.889 |
Neg logit contributions | |
agree | -0.834 |
agreement | -0.820 |
ies | -0.777 |
reci | -0.725 |
ashing | -0.709 |
Token | I |
Token ID | 314 |
Feature activation | -1.52e+00 |
Pos logit contributions | |
agree | +0.012 |
agreement | +0.012 |
happy | +0.010 |
reci | +0.010 |
ies | +0.010 |
Neg logit contributions | |
gh | -0.014 |
uch | -0.014 |
fault | -0.014 |
tresp | -0.013 |
tricked | -0.013 |
Token | pulled |
Token ID | 5954 |
Feature activation | -4.18e-01 |
Pos logit contributions | |
gh | +0.070 |
fault | +0.069 |
gency | +0.068 |
uch | +0.066 |
osaurus | +0.066 |
Neg logit contributions | |
agree | -0.067 |
ies | -0.061 |
happy | -0.058 |
play | -0.057 |
ie | -0.056 |
Token | the |
Token ID | 262 |
Feature activation | +1.41e+00 |
Pos logit contributions | |
uch | +0.022 |
lor | +0.022 |
gh | +0.022 |
paste | +0.021 |
sed | +0.021 |
Neg logit contributions | |
happy | -0.014 |
forth | -0.014 |
ies | -0.014 |
agreement | -0.014 |
agree | -0.013 |
Token | string |
Token ID | 4731 |
Feature activation | -3.97e+00 |
Pos logit contributions | |
agreement | +0.049 |
agree | +0.046 |
ies | +0.045 |
happy | +0.044 |
family | +0.043 |
Neg logit contributions | |
gh | -0.076 |
uch | -0.075 |
paste | -0.071 |
fault | -0.070 |
osaurus | -0.070 |
Token | says |
Token ID | 1139 |
Feature activation | -1.18e-01 |
Pos logit contributions | |
fault | +0.071 |
uch | +0.071 |
gh | +0.068 |
tresp | +0.067 |
paste | +0.064 |
Neg logit contributions | |
agree | -0.062 |
agreement | -0.061 |
ies | -0.060 |
ie | -0.054 |
ashing | -0.053 |
Token | it |
Token ID | 340 |
Feature activation | -3.25e+00 |
Pos logit contributions | |
uch | +0.008 |
fault | +0.008 |
gh | +0.007 |
OP | +0.007 |
osaurus | +0.007 |
Neg logit contributions | |
agree | -0.003 |
happy | -0.002 |
ies | -0.002 |
agreement | -0.002 |
smile | -0.002 |
Token | is |
Token ID | 318 |
Feature activation | -6.20e+00 |
Pos logit contributions | |
gency | +0.178 |
uch | +0.172 |
lor | +0.169 |
gh | +0.168 |
tresp | +0.164 |
Neg logit contributions | |
ies | -0.098 |
play | -0.098 |
my | -0.093 |
ie | -0.091 |
happy | -0.091 |
Token | not |
Token ID | 407 |
Feature activation | -7.40e+00 |
Pos logit contributions | |
uch | +0.269 |
fault | +0.259 |
gh | +0.245 |
gency | +0.245 |
lor | +0.241 |
Neg logit contributions | |
agreement | -0.277 |
agree | -0.275 |
ies | -0.273 |
happy | -0.266 |
ie | -0.244 |
Token | his |
Token ID | 465 |
Feature activation | +2.67e-01 |
Pos logit contributions | |
uch | +0.369 |
fault | +0.356 |
gh | +0.348 |
tresp | +0.334 |
paste | +0.333 |
Neg logit contributions | |
agree | -0.288 |
ies | -0.285 |
agreement | -0.277 |
happy | -0.262 |
ie | -0.250 |
Token | fault |
Token ID | 8046 |
Feature activation | -2.09e+01 |
Pos logit contributions | |
agree | +0.014 |
ies | +0.014 |
happy | +0.013 |
agreement | +0.013 |
family | +0.013 |
Neg logit contributions | |
uch | -0.011 |
paste | -0.010 |
lor | -0.009 |
gh | -0.009 |
gency | -0.009 |
Token | . |
Token ID | 13 |
Feature activation | +2.95e-01 |
Pos logit contributions | |
fault | +1.025 |
uch | +0.972 |
gh | +0.953 |
tresp | +0.934 |
paste | +0.910 |
Neg logit contributions | |
agreement | -0.809 |
agree | -0.806 |
ies | -0.757 |
reci | -0.719 |
ashing | -0.687 |
Token | She |
Token ID | 1375 |
Feature activation | +3.32e+00 |
Pos logit contributions | |
agree | +0.005 |
agreement | +0.005 |
ies | +0.004 |
happy | +0.004 |
ashing | +0.003 |
Neg logit contributions | |
uch | -0.021 |
fault | -0.020 |
gh | -0.020 |
tresp | -0.020 |
paste | -0.019 |
Token | says |
Token ID | 1139 |
Feature activation | -1.06e-01 |
Pos logit contributions | |
agree | +0.101 |
ies | +0.093 |
agreement | +0.092 |
ie | +0.082 |
happy | +0.081 |
Neg logit contributions | |
uch | -0.197 |
gh | -0.197 |
fault | -0.190 |
gency | -0.182 |
tresp | -0.179 |
Token | the |
Token ID | 262 |
Feature activation | +1.43e+00 |
Pos logit contributions | |
uch | +0.007 |
fault | +0.007 |
gh | +0.007 |
gency | +0.006 |
tresp | +0.006 |
Neg logit contributions | |
agree | -0.002 |
ies | -0.002 |
happy | -0.002 |
agreement | -0.002 |
ie | -0.002 |
Token | cube |
Token ID | 23441 |
Feature activation | -8.50e+00 |
Pos logit contributions | |
agreement | +0.056 |
agree | +0.055 |
ies | +0.052 |
happy | +0.052 |
family | +0.051 |
Neg logit contributions | |
uch | -0.071 |
paste | -0.068 |
gh | -0.067 |
fault | -0.064 |
OP | -0.064 |
Token | bird |
Token ID | 6512 |
Feature activation | -3.78e+00 |
Pos logit contributions | |
agree | +0.170 |
agreement | +0.164 |
ies | +0.163 |
ie | +0.135 |
happy | +0.131 |
Neg logit contributions | |
uch | -0.477 |
fault | -0.463 |
gh | -0.461 |
tresp | -0.440 |
paste | -0.437 |
Token | . |
Token ID | 13 |
Feature activation | +2.98e-01 |
Pos logit contributions | |
uch | +0.204 |
fault | +0.204 |
gh | +0.196 |
tresp | +0.189 |
gency | +0.189 |
Neg logit contributions | |
agree | -0.127 |
ies | -0.126 |
agreement | -0.120 |
ie | -0.115 |
happy | -0.109 |
Token | You |
Token ID | 921 |
Feature activation | -3.64e+00 |
Pos logit contributions | |
agree | +0.014 |
agreement | +0.014 |
ies | +0.014 |
ie | +0.013 |
happy | +0.013 |
Neg logit contributions | |
fault | -0.012 |
uch | -0.012 |
gh | -0.011 |
tresp | -0.011 |
paste | -0.010 |
Token | admit |
Token ID | 9159 |
Feature activation | -3.51e+00 |
Pos logit contributions | |
fault | +0.186 |
gh | +0.186 |
uch | +0.173 |
gency | +0.168 |
paste | +0.168 |
Neg logit contributions | |
agree | -0.147 |
ies | -0.128 |
agreement | -0.126 |
happy | -0.124 |
play | -0.122 |
Token | your |
Token ID | 534 |
Feature activation | -5.43e-01 |
Pos logit contributions | |
uch | +0.172 |
fault | +0.169 |
gh | +0.163 |
tresp | +0.155 |
paste | +0.154 |
Neg logit contributions | |
agree | -0.140 |
ies | -0.137 |
agreement | -0.132 |
happy | -0.124 |
ie | -0.123 |
Token | fault |
Token ID | 8046 |
Feature activation | -2.09e+01 |
Pos logit contributions | |
uch | +0.032 |
gh | +0.031 |
paste | +0.030 |
gency | +0.030 |
tricked | +0.030 |
Neg logit contributions | |
agree | -0.016 |
ies | -0.016 |
agreement | -0.016 |
family | -0.015 |
happy | -0.015 |
Token | and |
Token ID | 290 |
Feature activation | -7.03e-01 |
Pos logit contributions | |
uch | +1.083 |
fault | +1.070 |
gh | +1.043 |
tresp | +1.017 |
paste | +0.989 |
Neg logit contributions | |
agree | -0.749 |
ies | -0.728 |
agreement | -0.724 |
ie | -0.650 |
happy | -0.631 |
Token | ask |
Token ID | 1265 |
Feature activation | +1.07e+01 |
Pos logit contributions | |
uch | +0.031 |
fault | +0.030 |
gh | +0.030 |
paste | +0.027 |
tresp | +0.027 |
Neg logit contributions | |
agree | -0.031 |
ies | -0.030 |
agreement | -0.030 |
happy | -0.029 |
ie | -0.027 |
Token | for |
Token ID | 329 |
Feature activation | +7.25e+00 |
Pos logit contributions | |
agree | +0.412 |
agreement | +0.404 |
ies | +0.398 |
ie | +0.363 |
happy | +0.354 |
Neg logit contributions | |
uch | -0.524 |
fault | -0.523 |
gh | -0.499 |
tresp | -0.486 |
paste | -0.479 |
Token | mercy |
Token ID | 17703 |
Feature activation | -8.35e+00 |
Pos logit contributions | |
agree | +0.262 |
agreement | +0.261 |
happy | +0.239 |
family | +0.232 |
ies | +0.231 |
Neg logit contributions | |
fault | -0.390 |
uch | -0.382 |
osaurus | -0.357 |
gh | -0.354 |
paste | -0.348 |
Token | . |
Token ID | 13 |
Feature activation | +3.11e-01 |
Pos logit contributions | |
fault | +0.408 |
uch | +0.401 |
gh | +0.390 |
tresp | +0.383 |
paste | +0.368 |
Neg logit contributions | |
agree | -0.314 |
agreement | -0.309 |
ies | -0.304 |
ie | -0.275 |
ashing | -0.269 |
Token | felt |
Token ID | 2936 |
Feature activation | -1.84e+00 |
Pos logit contributions | |
agree | +0.127 |
ies | +0.119 |
agreement | +0.119 |
ie | +0.109 |
happy | +0.107 |
Neg logit contributions | |
fault | -0.181 |
uch | -0.178 |
gh | -0.173 |
tresp | -0.166 |
paste | -0.162 |
Token | like |
Token ID | 588 |
Feature activation | -5.24e+00 |
Pos logit contributions | |
uch | +0.092 |
fault | +0.091 |
gh | +0.089 |
tresp | +0.086 |
gency | +0.084 |
Neg logit contributions | |
agree | -0.070 |
agreement | -0.069 |
ies | -0.066 |
happy | -0.062 |
ie | -0.060 |
Token | it |
Token ID | 340 |
Feature activation | -3.21e+00 |
Pos logit contributions | |
uch | +0.289 |
fault | +0.276 |
gh | +0.271 |
tresp | +0.263 |
OP | +0.262 |
Neg logit contributions | |
agree | -0.174 |
ies | -0.170 |
agreement | -0.169 |
happy | -0.165 |
ie | -0.150 |
Token | was |
Token ID | 373 |
Feature activation | -3.71e+00 |
Pos logit contributions | |
uch | +0.145 |
fault | +0.145 |
gh | +0.142 |
tresp | +0.139 |
gency | +0.136 |
Neg logit contributions | |
agree | -0.132 |
ies | -0.131 |
agreement | -0.127 |
ie | -0.126 |
happy | -0.126 |
Token | her |
Token ID | 607 |
Feature activation | +2.89e+00 |
Pos logit contributions | |
uch | +0.175 |
fault | +0.163 |
gency | +0.161 |
OP | +0.160 |
gh | +0.160 |
Neg logit contributions | |
agreement | -0.152 |
ies | -0.150 |
agree | -0.150 |
happy | -0.148 |
ie | -0.138 |
Token | fault |
Token ID | 8046 |
Feature activation | -2.09e+01 |
Pos logit contributions | |
agree | +0.099 |
agreement | +0.097 |
ies | +0.096 |
happy | +0.089 |
family | +0.085 |
Neg logit contributions | |
uch | -0.156 |
gh | -0.151 |
fault | -0.146 |
tresp | -0.146 |
OP | -0.145 |
Token | , |
Token ID | 11 |
Feature activation | +7.47e-02 |
Pos logit contributions | |
fault | +1.020 |
uch | +0.983 |
gh | +0.957 |
tresp | +0.935 |
paste | +0.912 |
Neg logit contributions | |
agree | -0.815 |
agreement | -0.795 |
ies | -0.758 |
reci | -0.708 |
ashing | -0.679 |
Token | like |
Token ID | 588 |
Feature activation | -5.32e+00 |
Pos logit contributions | |
agree | +0.003 |
agreement | +0.003 |
ies | +0.003 |
happy | +0.002 |
ie | +0.002 |
Neg logit contributions | |
uch | -0.004 |
fault | -0.004 |
gh | -0.004 |
tresp | -0.003 |
paste | -0.003 |
Token | she |
Token ID | 673 |
Feature activation | +5.25e+00 |
Pos logit contributions | |
uch | +0.267 |
gh | +0.244 |
fault | +0.243 |
OP | +0.238 |
gency | +0.236 |
Neg logit contributions | |
agree | -0.199 |
ies | -0.199 |
happy | -0.197 |
agreement | -0.197 |
family | -0.185 |
Token | had |
Token ID | 550 |
Feature activation | +2.39e+00 |
Pos logit contributions | |
agree | +0.229 |
ies | +0.223 |
agreement | +0.215 |
ie | +0.209 |
happy | +0.205 |
Neg logit contributions | |
fault | -0.231 |
uch | -0.224 |
gh | -0.224 |
tresp | -0.216 |
OP | -0.209 |
Token | done |
Token ID | 1760 |
Feature activation | -3.18e+00 |
Pos logit contributions | |
ies | +0.110 |
happy | +0.109 |
agree | +0.108 |
play | +0.102 |
family | +0.101 |
Neg logit contributions | |
uch | -0.094 |
gency | -0.091 |
osaurus | -0.091 |
ses | -0.088 |
fault | -0.088 |
Token | . |
Token ID | 13 |
Feature activation | +3.09e-01 |
Pos logit contributions | |
agree | +0.078 |
ies | +0.077 |
agreement | +0.076 |
ie | +0.067 |
happy | +0.064 |
Neg logit contributions | |
uch | -0.139 |
fault | -0.138 |
gh | -0.134 |
tresp | -0.131 |
paste | -0.128 |
Token | It |
Token ID | 632 |
Feature activation | -4.50e+00 |
Pos logit contributions | |
agree | +0.012 |
agreement | +0.012 |
ies | +0.012 |
ie | +0.011 |
happy | +0.011 |
Neg logit contributions | |
uch | -0.014 |
fault | -0.014 |
gh | -0.014 |
tresp | -0.014 |
paste | -0.013 |
Token | was |
Token ID | 373 |
Feature activation | -3.74e+00 |
Pos logit contributions | |
gency | +0.249 |
gh | +0.246 |
paste | +0.235 |
fault | +0.233 |
OP | +0.229 |
Neg logit contributions | |
ies | -0.146 |
agree | -0.142 |
happy | -0.140 |
ie | -0.136 |
play | -0.129 |
Token | not |
Token ID | 407 |
Feature activation | -7.39e+00 |
Pos logit contributions | |
lor | +0.172 |
uch | +0.167 |
gency | +0.167 |
OP | +0.161 |
paste | +0.157 |
Neg logit contributions | |
happy | -0.183 |
family | -0.156 |
ies | -0.155 |
friends | -0.150 |
agreement | -0.150 |
Token | your |
Token ID | 534 |
Feature activation | -5.32e-01 |
Pos logit contributions | |
uch | +0.334 |
fault | +0.317 |
gh | +0.316 |
paste | +0.303 |
gency | +0.301 |
Neg logit contributions | |
agree | -0.325 |
ies | -0.320 |
agreement | -0.311 |
happy | -0.306 |
ie | -0.286 |
Token | fault |
Token ID | 8046 |
Feature activation | -2.09e+01 |
Pos logit contributions | |
uch | +0.031 |
gh | +0.029 |
paste | +0.028 |
fault | +0.028 |
tresp | +0.028 |
Neg logit contributions | |
agree | -0.017 |
agreement | -0.016 |
ies | -0.016 |
happy | -0.014 |
family | -0.014 |
Token | . |
Token ID | 13 |
Feature activation | +3.09e-01 |
Pos logit contributions | |
fault | +0.987 |
uch | +0.945 |
gh | +0.922 |
tresp | +0.905 |
paste | +0.873 |
Neg logit contributions | |
agree | -0.835 |
agreement | -0.831 |
ies | -0.785 |
reci | -0.745 |
ashing | -0.717 |
Token | He |
Token ID | 679 |
Feature activation | +1.11e+00 |
Pos logit contributions | |
agree | +0.012 |
agreement | +0.012 |
ies | +0.011 |
reci | +0.011 |
happy | +0.010 |
Neg logit contributions | |
gh | -0.014 |
uch | -0.014 |
tresp | -0.014 |
fault | -0.014 |
gency | -0.013 |
Token | was |
Token ID | 373 |
Feature activation | -3.75e+00 |
Pos logit contributions | |
agree | +0.045 |
ies | +0.043 |
happy | +0.041 |
play | +0.041 |
ie | +0.040 |
Neg logit contributions | |
gh | -0.053 |
fault | -0.052 |
gency | -0.051 |
paste | -0.050 |
OP | -0.048 |
Token | a |
Token ID | 257 |
Feature activation | -8.53e+00 |
Pos logit contributions | |
uch | +0.162 |
OP | +0.157 |
gh | +0.153 |
gency | +0.153 |
paste | +0.146 |
Neg logit contributions | |
happy | -0.185 |
agree | -0.165 |
agreement | -0.163 |
ies | -0.163 |
family | -0.158 |
Token | mean |
Token ID | 1612 |
Feature activation | -5.78e+00 |
Pos logit contributions | |
uch | +0.424 |
fault | +0.404 |
gh | +0.396 |
paste | +0.388 |
gency | +0.381 |
Neg logit contributions | |
agree | -0.336 |
agreement | -0.328 |
happy | -0.315 |
ies | -0.313 |
family | -0.297 |
Token |
|
Token ID | 198 |
Feature activation | +8.33e-01 |
Pos logit contributions | |
ies | +0.032 |
agreement | +0.032 |
agree | +0.031 |
ashing | +0.030 |
forth | +0.030 |
Neg logit contributions | |
gh | -0.039 |
fault | -0.038 |
OP | -0.037 |
uch | -0.037 |
tresp | -0.036 |
Token | " |
Token ID | 1 |
Feature activation | +6.27e+00 |
Pos logit contributions | |
ies | +0.032 |
agreement | +0.031 |
agree | +0.031 |
ashing | +0.030 |
reci | +0.029 |
Neg logit contributions | |
gh | -0.038 |
uch | -0.036 |
tresp | -0.035 |
OP | -0.035 |
paste | -0.034 |
Token | It |
Token ID | 1026 |
Feature activation | -2.38e+00 |
Pos logit contributions | |
agree | +0.256 |
agreement | +0.245 |
ies | +0.234 |
happy | +0.220 |
ashing | +0.212 |
Neg logit contributions | |
fault | -0.302 |
uch | -0.293 |
gh | -0.286 |
tresp | -0.278 |
gency | -0.269 |
Token | 's |
Token ID | 338 |
Feature activation | +1.61e+00 |
Pos logit contributions | |
gh | +0.143 |
gency | +0.141 |
fault | +0.141 |
rain | +0.133 |
uch | +0.133 |
Neg logit contributions | |
ie | -0.062 |
ies | -0.062 |
my | -0.055 |
happy | -0.054 |
ask | -0.053 |
Token | your |
Token ID | 534 |
Feature activation | -5.04e-01 |
Pos logit contributions | |
happy | +0.072 |
family | +0.064 |
sunny | +0.061 |
ies | +0.060 |
agreement | +0.059 |
Neg logit contributions | |
lor | -0.078 |
uch | -0.073 |
gency | -0.072 |
osaurus | -0.071 |
paste | -0.069 |
Token | fault |
Token ID | 8046 |
Feature activation | -2.09e+01 |
Pos logit contributions | |
uch | +0.024 |
gh | +0.023 |
paste | +0.022 |
gency | +0.022 |
lor | +0.022 |
Neg logit contributions | |
ies | -0.020 |
agree | -0.020 |
family | -0.019 |
agreement | -0.019 |
happy | -0.019 |
Token | !" |
Token ID | 2474 |
Feature activation | -7.13e+00 |
Pos logit contributions | |
uch | +0.830 |
fault | +0.827 |
gh | +0.793 |
tresp | +0.773 |
paste | +0.742 |
Neg logit contributions | |
agree | -0.991 |
ies | -0.971 |
agreement | -0.966 |
ie | -0.890 |
happy | -0.870 |
Token | Mia |
Token ID | 32189 |
Feature activation | -2.20e+00 |
Pos logit contributions | |
fault | +0.346 |
uch | +0.342 |
gh | +0.332 |
tresp | +0.326 |
paste | +0.317 |
Neg logit contributions | |
agree | -0.273 |
agreement | -0.267 |
ies | -0.266 |
ie | -0.240 |
ashing | -0.234 |
Token | blamed |
Token ID | 13772 |
Feature activation | +1.97e-01 |
Pos logit contributions | |
uch | +0.122 |
fault | +0.120 |
gh | +0.118 |
tresp | +0.114 |
paste | +0.111 |
Neg logit contributions | |
agree | -0.075 |
agreement | -0.071 |
ies | -0.069 |
ie | -0.061 |
happy | -0.061 |
Token | Sam |
Token ID | 3409 |
Feature activation | -3.03e+00 |
Pos logit contributions | |
agree | +0.008 |
agreement | +0.007 |
ies | +0.007 |
reci | +0.007 |
ashing | +0.007 |
Neg logit contributions | |
fault | -0.009 |
uch | -0.009 |
gh | -0.009 |
tresp | -0.009 |
paste | -0.009 |
Token | . |
Token ID | 13 |
Feature activation | +2.85e-01 |
Pos logit contributions | |
fault | +0.122 |
gh | +0.112 |
tresp | +0.110 |
paste | +0.109 |
ps | +0.108 |
Neg logit contributions | |
agreement | -0.132 |
agree | -0.130 |
ies | -0.127 |
reci | -0.127 |
ashing | -0.122 |
Token | " |
Token ID | 1 |
Feature activation | +6.21e+00 |
Pos logit contributions | |
agreement | +0.031 |
ies | +0.031 |
ie | +0.031 |
agree | +0.029 |
reci | +0.029 |
Neg logit contributions | |
uch | -0.032 |
gh | -0.031 |
w | -0.028 |
paste | -0.028 |
tresp | -0.028 |
Token | It |
Token ID | 1026 |
Feature activation | -2.42e+00 |
Pos logit contributions | |
agree | +0.229 |
ies | +0.214 |
agreement | +0.212 |
happy | +0.205 |
At | +0.197 |
Neg logit contributions | |
fault | -0.325 |
gh | -0.296 |
gency | -0.295 |
tresp | -0.291 |
uch | -0.285 |
Token | was |
Token ID | 373 |
Feature activation | -3.72e+00 |
Pos logit contributions | |
gency | +0.144 |
gh | +0.144 |
fault | +0.140 |
OP | +0.137 |
rain | +0.137 |
Neg logit contributions | |
ies | -0.071 |
ie | -0.070 |
my | -0.058 |
play | -0.056 |
ask | -0.054 |
Token | not |
Token ID | 407 |
Feature activation | -7.37e+00 |
Pos logit contributions | |
lor | +0.203 |
uch | +0.190 |
OP | +0.180 |
gency | +0.178 |
ASH | +0.170 |
Neg logit contributions | |
happy | -0.161 |
family | -0.135 |
ies | -0.133 |
sunny | -0.126 |
friends | -0.124 |
Token | our |
Token ID | 674 |
Feature activation | -1.35e+00 |
Pos logit contributions | |
uch | +0.332 |
gh | +0.306 |
fault | +0.301 |
gency | +0.297 |
lor | +0.294 |
Neg logit contributions | |
ies | -0.337 |
happy | -0.329 |
agree | -0.325 |
agreement | -0.310 |
family | -0.296 |
Token | fault |
Token ID | 8046 |
Feature activation | -2.09e+01 |
Pos logit contributions | |
uch | +0.057 |
paste | +0.053 |
gh | +0.052 |
OP | +0.052 |
gency | +0.049 |
Neg logit contributions | |
ies | -0.066 |
agree | -0.064 |
family | -0.061 |
agreement | -0.061 |
happy | -0.059 |
Token | , |
Token ID | 11 |
Feature activation | +1.13e-01 |
Pos logit contributions | |
fault | +0.913 |
uch | +0.897 |
gh | +0.861 |
tresp | +0.843 |
paste | +0.815 |
Neg logit contributions | |
agree | -0.903 |
agreement | -0.900 |
ies | -0.856 |
reci | -0.795 |
ie | -0.788 |
Token | mom |
Token ID | 1995 |
Feature activation | +7.45e-01 |
Pos logit contributions | |
agreement | +0.005 |
ies | +0.005 |
happy | +0.004 |
agree | +0.004 |
family | +0.004 |
Neg logit contributions | |
uch | -0.006 |
gh | -0.005 |
OP | -0.005 |
fault | -0.005 |
gency | -0.005 |
Token | . |
Token ID | 13 |
Feature activation | +3.32e-01 |
Pos logit contributions | |
agree | +0.033 |
agreement | +0.032 |
ies | +0.032 |
ie | +0.029 |
happy | +0.028 |
Neg logit contributions | |
fault | -0.032 |
uch | -0.032 |
gh | -0.031 |
tresp | -0.030 |
paste | -0.029 |
Token | The |
Token ID | 383 |
Feature activation | +4.87e-01 |
Pos logit contributions | |
agree | +0.012 |
ies | +0.012 |
agreement | +0.012 |
happy | +0.011 |
family | +0.011 |
Neg logit contributions | |
uch | -0.016 |
gh | -0.016 |
fault | -0.016 |
tresp | -0.015 |
tricked | -0.015 |
Token | phone |
Token ID | 3072 |
Feature activation | -8.95e+00 |
Pos logit contributions | |
agreement | +0.022 |
family | +0.021 |
happy | +0.021 |
ies | +0.021 |
girl | +0.021 |
Neg logit contributions | |
uch | -0.022 |
OP | -0.021 |
paste | -0.020 |
fault | -0.020 |
gh | -0.019 |
Token | No |
Token ID | 2949 |
Feature activation | -6.09e+00 |
Pos logit contributions | |
agree | +0.253 |
agreement | +0.243 |
ies | +0.223 |
happy | +0.223 |
girl | +0.205 |
Neg logit contributions | |
fault | -0.310 |
uch | -0.292 |
gh | -0.286 |
tresp | -0.286 |
gency | -0.276 |
Token | , |
Token ID | 11 |
Feature activation | +8.86e-02 |
Pos logit contributions | |
uch | +0.427 |
paste | +0.424 |
lor | +0.407 |
fault | +0.401 |
angs | +0.400 |
Neg logit contributions | |
agree | -0.132 |
ies | -0.123 |
agreement | -0.112 |
ie | -0.098 |
smile | -0.095 |
Token | it |
Token ID | 340 |
Feature activation | -3.24e+00 |
Pos logit contributions | |
agreement | +0.003 |
agree | +0.003 |
happy | +0.003 |
ies | +0.003 |
family | +0.002 |
Neg logit contributions | |
uch | -0.005 |
fault | -0.005 |
gh | -0.005 |
OP | -0.005 |
osaurus | -0.005 |
Token | 's |
Token ID | 338 |
Feature activation | +1.59e+00 |
Pos logit contributions | |
fault | +0.174 |
gency | +0.173 |
paste | +0.163 |
gh | +0.161 |
OP | +0.160 |
Neg logit contributions | |
ies | -0.111 |
ie | -0.107 |
happy | -0.098 |
agree | -0.096 |
play | -0.094 |
Token | your |
Token ID | 534 |
Feature activation | -5.32e-01 |
Pos logit contributions | |
happy | +0.072 |
ies | +0.064 |
forth | +0.063 |
friends | +0.059 |
play | +0.059 |
Neg logit contributions | |
lor | -0.084 |
paste | -0.073 |
gency | -0.070 |
flake | -0.069 |
growth | -0.068 |
Token | fault |
Token ID | 8046 |
Feature activation | -2.09e+01 |
Pos logit contributions | |
uch | +0.020 |
paste | +0.019 |
gency | +0.018 |
gh | +0.018 |
lor | +0.018 |
Neg logit contributions | |
agree | -0.027 |
ies | -0.027 |
agreement | -0.026 |
happy | -0.026 |
family | -0.025 |
Token | , |
Token ID | 11 |
Feature activation | +8.46e-02 |
Pos logit contributions | |
uch | +0.708 |
fault | +0.687 |
gh | +0.666 |
tresp | +0.645 |
gency | +0.623 |
Neg logit contributions | |
ies | -1.137 |
agree | -1.132 |
agreement | -1.088 |
ie | -1.045 |
happy | -1.015 |
Token | Lily |
Token ID | 20037 |
Feature activation | +2.13e+00 |
Pos logit contributions | |
agreement | +0.004 |
happy | +0.004 |
agree | +0.004 |
ies | +0.003 |
ie | +0.003 |
Neg logit contributions | |
uch | -0.004 |
fault | -0.004 |
gh | -0.004 |
OP | -0.004 |
gency | -0.004 |
Token | !" |
Token ID | 2474 |
Feature activation | -7.11e+00 |
Pos logit contributions | |
agree | +0.092 |
ies | +0.090 |
agreement | +0.089 |
ie | +0.082 |
happy | +0.080 |
Neg logit contributions | |
fault | -0.094 |
uch | -0.094 |
gh | -0.090 |
tresp | -0.088 |
paste | -0.085 |
Token | Ben |
Token ID | 3932 |
Feature activation | -3.15e+00 |
Pos logit contributions | |
fault | +0.309 |
uch | +0.299 |
gh | +0.289 |
tresp | +0.285 |
paste | +0.277 |
Neg logit contributions | |
agree | -0.307 |
ies | -0.306 |
agreement | -0.305 |
ie | -0.281 |
ashing | -0.274 |
Token | said |
Token ID | 531 |
Feature activation | -5.87e-01 |
Pos logit contributions | |
fault | +0.210 |
uch | +0.201 |
gh | +0.196 |
tresp | +0.190 |
gency | +0.186 |
Neg logit contributions | |
agree | -0.090 |
ies | -0.078 |
agreement | -0.076 |
happy | -0.071 |
ie | -0.065 |
Token | saying |
Token ID | 2282 |
Feature activation | -4.87e-01 |
Pos logit contributions | |
agree | +0.003 |
ies | +0.003 |
agreement | +0.003 |
ie | +0.003 |
reci | +0.003 |
Neg logit contributions | |
fault | -0.004 |
uch | -0.004 |
gh | -0.004 |
tresp | -0.004 |
paste | -0.004 |
Token | that |
Token ID | 326 |
Feature activation | -1.26e+00 |
Pos logit contributions | |
uch | +0.031 |
fault | +0.030 |
ses | +0.028 |
OP | +0.027 |
gency | +0.027 |
Neg logit contributions | |
agree | -0.013 |
happy | -0.012 |
agreement | -0.012 |
ies | -0.012 |
smile | -0.010 |
Token | it |
Token ID | 340 |
Feature activation | -3.23e+00 |
Pos logit contributions | |
uch | +0.068 |
fault | +0.067 |
gh | +0.065 |
tresp | +0.063 |
paste | +0.062 |
Neg logit contributions | |
agree | -0.043 |
agreement | -0.040 |
ies | -0.040 |
happy | -0.036 |
ie | -0.036 |
Token | was |
Token ID | 373 |
Feature activation | -3.72e+00 |
Pos logit contributions | |
fault | +0.164 |
gency | +0.159 |
uch | +0.159 |
ses | +0.156 |
gh | +0.154 |
Neg logit contributions | |
ies | -0.110 |
ie | -0.109 |
agree | -0.104 |
happy | -0.103 |
play | -0.098 |
Token | his |
Token ID | 465 |
Feature activation | +2.95e-01 |
Pos logit contributions | |
uch | +0.162 |
lor | +0.154 |
paste | +0.147 |
fault | +0.143 |
gency | +0.143 |
Neg logit contributions | |
happy | -0.172 |
agreement | -0.168 |
ies | -0.161 |
agree | -0.160 |
family | -0.155 |
Token | fault |
Token ID | 8046 |
Feature activation | -2.09e+01 |
Pos logit contributions | |
agree | +0.011 |
ies | +0.011 |
agreement | +0.010 |
happy | +0.010 |
family | +0.010 |
Neg logit contributions | |
uch | -0.015 |
gh | -0.014 |
paste | -0.014 |
osaurus | -0.014 |
gency | -0.014 |
Token | that |
Token ID | 326 |
Feature activation | -1.25e+00 |
Pos logit contributions | |
fault | +1.081 |
uch | +1.081 |
gh | +1.042 |
tresp | +1.021 |
paste | +0.992 |
Neg logit contributions | |
agree | -0.736 |
agreement | -0.716 |
ies | -0.708 |
ie | -0.632 |
happy | -0.615 |
Token | he |
Token ID | 339 |
Feature activation | +3.01e+00 |
Pos logit contributions | |
uch | +0.063 |
fault | +0.063 |
gh | +0.061 |
paste | +0.059 |
gency | +0.059 |
Neg logit contributions | |
agree | -0.044 |
ies | -0.043 |
agreement | -0.041 |
happy | -0.040 |
ie | -0.039 |
Token | forgot |
Token ID | 16453 |
Feature activation | +4.86e+00 |
Pos logit contributions | |
agree | +0.113 |
ies | +0.109 |
ie | +0.102 |
happy | +0.099 |
agreement | +0.097 |
Neg logit contributions | |
fault | -0.153 |
gh | -0.146 |
uch | -0.145 |
paste | -0.142 |
gency | -0.141 |
Token | the |
Token ID | 262 |
Feature activation | +1.44e+00 |
Pos logit contributions | |
agree | +0.213 |
ies | +0.212 |
agreement | +0.204 |
happy | +0.196 |
ie | +0.194 |
Neg logit contributions | |
uch | -0.214 |
fault | -0.201 |
gh | -0.196 |
ses | -0.191 |
paste | -0.189 |
Token | folder |
Token ID | 9483 |
Feature activation | -2.92e+00 |
Pos logit contributions | |
agreement | +0.059 |
agree | +0.059 |
ies | +0.057 |
happy | +0.054 |
family | +0.054 |
Neg logit contributions | |
uch | -0.069 |
osaurus | -0.065 |
gh | -0.065 |
paste | -0.063 |
OP | -0.063 |
Token | time |
Token ID | 640 |
Feature activation | -2.58e+00 |
Pos logit contributions | |
uch | +0.328 |
gh | +0.324 |
fault | +0.319 |
tresp | +0.294 |
OP | +0.268 |
Neg logit contributions | |
ies | -0.404 |
agree | -0.400 |
agreement | -0.399 |
reci | -0.367 |
forth | -0.357 |
Token | , |
Token ID | 11 |
Feature activation | +1.58e-01 |
Pos logit contributions | |
uch | +0.134 |
fault | +0.133 |
gh | +0.130 |
tresp | +0.126 |
paste | +0.120 |
Neg logit contributions | |
agree | -0.092 |
agreement | -0.091 |
ies | -0.087 |
reci | -0.078 |
ie | -0.077 |
Token | there |
Token ID | 612 |
Feature activation | +6.81e+00 |
Pos logit contributions | |
agree | +0.007 |
agreement | +0.007 |
ies | +0.007 |
reci | +0.006 |
ie | +0.006 |
Neg logit contributions | |
uch | -0.007 |
fault | -0.006 |
gh | -0.006 |
tresp | -0.006 |
nothing | -0.006 |
Token | was |
Token ID | 373 |
Feature activation | -3.72e+00 |
Pos logit contributions | |
agree | +0.293 |
ies | +0.285 |
agreement | +0.282 |
ie | +0.269 |
happy | +0.266 |
Neg logit contributions | |
fault | -0.301 |
uch | -0.295 |
tresp | -0.283 |
paste | -0.282 |
gh | -0.282 |
Token | a |
Token ID | 257 |
Feature activation | -8.51e+00 |
Pos logit contributions | |
fault | +0.117 |
uch | +0.115 |
gency | +0.113 |
gh | +0.109 |
tresp | +0.107 |
Neg logit contributions | |
happy | -0.208 |
agreement | -0.205 |
agree | -0.203 |
ies | -0.197 |
family | -0.193 |
Token | generous |
Token ID | 14431 |
Feature activation | +7.50e+00 |
Pos logit contributions | |
uch | +0.417 |
tresp | +0.411 |
gh | +0.400 |
fault | +0.389 |
sed | +0.382 |
Neg logit contributions | |
agree | -0.291 |
agreement | -0.287 |
ies | -0.276 |
reci | -0.276 |
ogged | -0.268 |
Token | snake |
Token ID | 17522 |
Feature activation | -1.02e+01 |
Pos logit contributions | |
agree | +0.278 |
agreement | +0.270 |
reci | +0.258 |
ies | +0.255 |
forth | +0.237 |
Neg logit contributions | |
fault | -0.351 |
tresp | -0.346 |
uch | -0.341 |
gh | -0.334 |
paste | -0.323 |
Token | who |
Token ID | 508 |
Feature activation | +6.73e+00 |
Pos logit contributions | |
fault | +0.710 |
uch | +0.697 |
gh | +0.671 |
paste | +0.650 |
OP | +0.650 |
Neg logit contributions | |
ies | -0.197 |
agree | -0.192 |
ie | -0.182 |
agreement | -0.170 |
family | -0.156 |
Token | lived |
Token ID | 5615 |
Feature activation | +3.05e+00 |
Pos logit contributions | |
agree | +0.187 |
ies | +0.179 |
agreement | +0.177 |
ie | +0.157 |
happy | +0.152 |
Neg logit contributions | |
fault | -0.406 |
uch | -0.398 |
gh | -0.388 |
tresp | -0.378 |
paste | -0.372 |
Token | in |
Token ID | 287 |
Feature activation | +1.54e+00 |
Pos logit contributions | |
agree | +0.129 |
agreement | +0.125 |
ies | +0.123 |
ie | +0.112 |
reci | +0.110 |
Neg logit contributions | |
uch | -0.138 |
fault | -0.137 |
gh | -0.133 |
tresp | -0.130 |
paste | -0.123 |
Token | a |
Token ID | 257 |
Feature activation | -8.57e+00 |
Pos logit contributions | |
agreement | +0.068 |
happy | +0.065 |
agree | +0.063 |
play | +0.062 |
family | +0.062 |
Neg logit contributions | |
fault | -0.070 |
uch | -0.070 |
gh | -0.067 |
paste | -0.063 |
gency | -0.060 |
Token | her |
Token ID | 607 |
Feature activation | +2.99e+00 |
Pos logit contributions | |
gh | +0.025 |
an | +0.024 |
nothing | +0.024 |
uch | +0.023 |
fault | +0.023 |
Neg logit contributions | |
ashing | -0.025 |
agree | -0.025 |
agreement | -0.023 |
acking | -0.023 |
reci | -0.023 |
Token | friend |
Token ID | 1545 |
Feature activation | +6.07e+00 |
Pos logit contributions | |
agree | +0.078 |
agreement | +0.075 |
reci | +0.066 |
ies | +0.062 |
ashing | +0.058 |
Neg logit contributions | |
fault | -0.187 |
uch | -0.181 |
gh | -0.176 |
tresp | -0.175 |
sed | -0.174 |
Token | Emily |
Token ID | 17608 |
Feature activation | +1.04e+00 |
Pos logit contributions | |
agree | +0.224 |
agreement | +0.221 |
ies | +0.214 |
reci | +0.190 |
ashing | +0.188 |
Neg logit contributions | |
uch | -0.303 |
fault | -0.301 |
gh | -0.294 |
tresp | -0.287 |
paste | -0.274 |
Token | decided |
Token ID | 3066 |
Feature activation | -9.78e-01 |
Pos logit contributions | |
agreement | +0.045 |
agree | +0.042 |
ies | +0.041 |
reci | +0.039 |
acking | +0.039 |
Neg logit contributions | |
tresp | -0.043 |
gh | -0.042 |
uch | -0.042 |
fault | -0.042 |
tricked | -0.041 |
Token | to |
Token ID | 284 |
Feature activation | -5.34e+00 |
Pos logit contributions | |
uch | +0.047 |
fault | +0.044 |
paste | +0.043 |
osaurus | +0.043 |
ses | +0.042 |
Neg logit contributions | |
agree | -0.043 |
ies | -0.041 |
happy | -0.040 |
play | -0.038 |
agreement | -0.038 |
Token | have |
Token ID | 423 |
Feature activation | +8.46e+00 |
Pos logit contributions | |
uch | +0.350 |
fault | +0.348 |
gh | +0.342 |
tresp | +0.334 |
paste | +0.324 |
Neg logit contributions | |
agree | -0.111 |
agreement | -0.109 |
ies | -0.109 |
ashing | -0.088 |
ie | -0.086 |
Token | a |
Token ID | 257 |
Feature activation | -8.43e+00 |
Pos logit contributions | |
play | +0.383 |
happy | +0.375 |
family | +0.366 |
agree | +0.358 |
sunny | +0.346 |
Neg logit contributions | |
uch | -0.379 |
ses | -0.371 |
fault | -0.365 |
sed | -0.362 |
osaurus | -0.351 |
Token | picnic |
Token ID | 35715 |
Feature activation | +3.60e+00 |
Pos logit contributions | |
uch | +0.499 |
fault | +0.493 |
paste | +0.482 |
gh | +0.475 |
ps | +0.475 |
Neg logit contributions | |
agree | -0.239 |
agreement | -0.219 |
happy | -0.213 |
play | -0.211 |
ies | -0.210 |
Token | . |
Token ID | 13 |
Feature activation | +4.15e-01 |
Pos logit contributions | |
agree | +0.132 |
ies | +0.128 |
agreement | +0.126 |
ie | +0.115 |
happy | +0.111 |
Neg logit contributions | |
fault | -0.186 |
uch | -0.184 |
gh | -0.176 |
tresp | -0.174 |
paste | -0.169 |
Token | They |
Token ID | 1119 |
Feature activation | +4.12e+00 |
Pos logit contributions | |
ies | +0.016 |
agreement | +0.015 |
agree | +0.015 |
ie | +0.015 |
reci | +0.014 |
Neg logit contributions | |
fault | -0.019 |
happens | -0.017 |
an | -0.017 |
gh | -0.017 |
Ow | -0.017 |
Token | wanted |
Token ID | 2227 |
Feature activation | +2.59e+00 |
Pos logit contributions | |
agreement | +0.163 |
agree | +0.160 |
ies | +0.151 |
reci | +0.145 |
ashing | +0.142 |
Neg logit contributions | |
uch | -0.184 |
tresp | -0.181 |
tricked | -0.175 |
gh | -0.173 |
fault | -0.172 |
Token | went |
Token ID | 1816 |
Feature activation | +3.50e+00 |
Pos logit contributions | |
agree | +0.053 |
agreement | +0.053 |
ies | +0.051 |
ie | +0.046 |
happy | +0.045 |
Neg logit contributions | |
uch | -0.071 |
fault | -0.071 |
gh | -0.068 |
tresp | -0.067 |
paste | -0.065 |
Token | to |
Token ID | 284 |
Feature activation | -5.39e+00 |
Pos logit contributions | |
agree | +0.213 |
agreement | +0.212 |
ies | +0.201 |
reci | +0.193 |
ie | +0.188 |
Neg logit contributions | |
fault | -0.092 |
uch | -0.090 |
gh | -0.087 |
tresp | -0.086 |
paste | -0.077 |
Token | the |
Token ID | 262 |
Feature activation | +1.48e+00 |
Pos logit contributions | |
fault | +0.247 |
uch | +0.237 |
tresp | +0.233 |
gh | +0.229 |
an | +0.225 |
Neg logit contributions | |
ies | -0.218 |
agree | -0.210 |
agreement | -0.209 |
ashing | -0.200 |
reci | -0.190 |
Token | store |
Token ID | 3650 |
Feature activation | +5.06e+00 |
Pos logit contributions | |
agree | +0.026 |
agreement | +0.025 |
play | +0.024 |
family | +0.022 |
ies | +0.022 |
Neg logit contributions | |
fault | -0.102 |
paste | -0.101 |
uch | -0.100 |
gh | -0.099 |
tresp | -0.097 |
Token | to |
Token ID | 284 |
Feature activation | -5.39e+00 |
Pos logit contributions | |
agreement | +0.280 |
reci | +0.279 |
agree | +0.260 |
ies | +0.251 |
ie | +0.241 |
Neg logit contributions | |
an | -0.154 |
fault | -0.154 |
uch | -0.147 |
gh | -0.142 |
tresp | -0.137 |
Token | buy |
Token ID | 2822 |
Feature activation | +9.83e+00 |
Pos logit contributions | |
an | +0.292 |
uch | +0.286 |
anything | +0.284 |
nothing | +0.280 |
tricked | +0.279 |
Neg logit contributions | |
ies | -0.175 |
ashing | -0.152 |
agree | -0.146 |
agreement | -0.142 |
ie | -0.138 |
Token | some |
Token ID | 617 |
Feature activation | -1.55e+00 |
Pos logit contributions | |
ies | +0.443 |
agree | +0.435 |
happy | +0.427 |
agreement | +0.423 |
ie | +0.416 |
Neg logit contributions | |
uch | -0.438 |
fault | -0.415 |
gh | -0.400 |
tresp | -0.394 |
sed | -0.392 |
Token | paint |
Token ID | 7521 |
Feature activation | -2.50e+00 |
Pos logit contributions | |
fault | +0.070 |
uch | +0.069 |
gh | +0.066 |
tresp | +0.065 |
paste | +0.063 |
Neg logit contributions | |
agree | -0.065 |
agreement | -0.064 |
ies | -0.062 |
ie | -0.057 |
ashing | -0.056 |
Token | . |
Token ID | 13 |
Feature activation | +3.94e-01 |
Pos logit contributions | |
uch | +0.144 |
fault | +0.144 |
tresp | +0.140 |
gh | +0.138 |
gency | +0.135 |
Neg logit contributions | |
ies | -0.077 |
agree | -0.076 |
agreement | -0.070 |
ie | -0.068 |
happy | -0.064 |
Token | She |
Token ID | 1375 |
Feature activation | +3.40e+00 |
Pos logit contributions | |
ies | +0.015 |
agree | +0.014 |
agreement | +0.013 |
ie | +0.013 |
reci | +0.012 |
Neg logit contributions | |
fault | -0.019 |
nothing | -0.018 |
OP | -0.018 |
gency | -0.018 |
paste | -0.018 |
Token | pointed |
Token ID | 6235 |
Feature activation | +6.96e-02 |
Pos logit contributions | |
agreement | +0.130 |
agree | +0.128 |
ies | +0.122 |
reci | +0.111 |
ie | +0.107 |
Neg logit contributions | |
uch | -0.167 |
fault | -0.162 |
gh | -0.159 |
tresp | -0.154 |
tricked | -0.151 |
Token | special |
Token ID | 2041 |
Feature activation | +3.57e+00 |
Pos logit contributions | |
agree | +0.079 |
agreement | +0.077 |
ies | +0.075 |
ie | +0.065 |
ashing | +0.064 |
Neg logit contributions | |
fault | -0.175 |
uch | -0.173 |
gh | -0.167 |
tresp | -0.165 |
paste | -0.161 |
Token | . |
Token ID | 13 |
Feature activation | +3.41e-01 |
Pos logit contributions | |
agree | +0.120 |
agreement | +0.115 |
ies | +0.113 |
happy | +0.101 |
ie | +0.099 |
Neg logit contributions | |
uch | -0.200 |
gh | -0.191 |
fault | -0.191 |
tresp | -0.187 |
paste | -0.183 |
Token | They |
Token ID | 1119 |
Feature activation | +4.05e+00 |
Pos logit contributions | |
ies | +0.014 |
agree | +0.013 |
agreement | +0.013 |
forth | +0.012 |
ie | +0.012 |
Neg logit contributions | |
fault | -0.016 |
OP | -0.015 |
paste | -0.014 |
ses | -0.014 |
Ow | -0.014 |
Token | became |
Token ID | 2627 |
Feature activation | +9.01e-01 |
Pos logit contributions | |
agreement | +0.153 |
ies | +0.149 |
ashing | +0.149 |
agree | +0.146 |
ogging | +0.140 |
Neg logit contributions | |
uch | -0.181 |
tresp | -0.176 |
tricked | -0.175 |
fault | -0.170 |
nothing | -0.166 |
Token | good |
Token ID | 922 |
Feature activation | -1.77e+00 |
Pos logit contributions | |
agree | +0.034 |
agreement | +0.032 |
ies | +0.032 |
ie | +0.030 |
ashing | +0.030 |
Neg logit contributions | |
fault | -0.044 |
uch | -0.043 |
tresp | -0.042 |
gh | -0.041 |
tricked | -0.040 |
Token | friends |
Token ID | 2460 |
Feature activation | +1.01e+01 |
Pos logit contributions | |
fault | +0.117 |
tresp | +0.112 |
uch | +0.112 |
paste | +0.107 |
gh | +0.107 |
Neg logit contributions | |
agreement | -0.038 |
agree | -0.036 |
ies | -0.030 |
reci | -0.029 |
particular | -0.029 |
Token | and |
Token ID | 290 |
Feature activation | -6.49e-01 |
Pos logit contributions | |
agree | +0.297 |
agreement | +0.282 |
ies | +0.276 |
happy | +0.252 |
ie | +0.241 |
Neg logit contributions | |
uch | -0.599 |
fault | -0.588 |
gh | -0.572 |
tresp | -0.556 |
paste | -0.549 |
Token | played |
Token ID | 2826 |
Feature activation | +4.19e+00 |
Pos logit contributions | |
fault | +0.034 |
uch | +0.034 |
gh | +0.033 |
tresp | +0.033 |
paste | +0.031 |
Neg logit contributions | |
agreement | -0.022 |
agree | -0.022 |
ies | -0.021 |
ashing | -0.019 |
ie | -0.019 |
Token | together |
Token ID | 1978 |
Feature activation | +5.23e+00 |
Pos logit contributions | |
ies | +0.146 |
agreement | +0.144 |
agree | +0.143 |
ie | +0.133 |
reci | +0.132 |
Neg logit contributions | |
fault | -0.206 |
gh | -0.205 |
uch | -0.204 |
tresp | -0.195 |
paste | -0.190 |
Token | every |
Token ID | 790 |
Feature activation | +5.98e+00 |
Pos logit contributions | |
agree | +0.165 |
agreement | +0.161 |
ies | +0.161 |
ie | +0.139 |
reci | +0.139 |
Neg logit contributions | |
fault | -0.287 |
uch | -0.283 |
gh | -0.281 |
tresp | -0.271 |
paste | -0.261 |
Token | day |
Token ID | 1110 |
Feature activation | +4.78e+00 |
Pos logit contributions | |
agree | +0.124 |
ies | +0.118 |
agreement | +0.107 |
reci | +0.092 |
ashing | +0.092 |
Neg logit contributions | |
uch | -0.397 |
fault | -0.387 |
gh | -0.387 |
tresp | -0.376 |
tricked | -0.362 |
Token | show |
Token ID | 905 |
Feature activation | +3.67e+00 |
Pos logit contributions | |
uch | +0.315 |
fault | +0.314 |
gh | +0.305 |
tresp | +0.300 |
paste | +0.288 |
Neg logit contributions | |
agree | -0.194 |
agreement | -0.191 |
ies | -0.191 |
ie | -0.167 |
ashing | -0.167 |
Token | you |
Token ID | 345 |
Feature activation | -1.34e+00 |
Pos logit contributions | |
agree | +0.151 |
agreement | +0.148 |
ies | +0.145 |
ie | +0.139 |
happy | +0.135 |
Neg logit contributions | |
fault | -0.171 |
uch | -0.170 |
gh | -0.160 |
tresp | -0.155 |
paste | -0.152 |
Token | . |
Token ID | 13 |
Feature activation | +2.69e-01 |
Pos logit contributions | |
uch | +0.057 |
fault | +0.057 |
gh | +0.055 |
tresp | +0.053 |
paste | +0.051 |
Neg logit contributions | |
agree | -0.060 |
agreement | -0.059 |
ies | -0.058 |
ie | -0.053 |
happy | -0.052 |
Token | I |
Token ID | 314 |
Feature activation | -1.54e+00 |
Pos logit contributions | |
agree | +0.014 |
agreement | +0.013 |
ies | +0.013 |
ie | +0.012 |
happy | +0.012 |
Neg logit contributions | |
fault | -0.010 |
uch | -0.010 |
gh | -0.009 |
tresp | -0.009 |
paste | -0.009 |
Token | 'll |
Token ID | 1183 |
Feature activation | -5.91e+00 |
Pos logit contributions | |
fault | +0.063 |
uch | +0.063 |
gh | +0.062 |
tresp | +0.059 |
paste | +0.057 |
Neg logit contributions | |
agree | -0.072 |
agreement | -0.069 |
ies | -0.068 |
happy | -0.062 |
ie | -0.062 |
Token | take |
Token ID | 1011 |
Feature activation | +9.36e+00 |
Pos logit contributions | |
fault | +0.320 |
uch | +0.319 |
gh | +0.309 |
tresp | +0.303 |
paste | +0.294 |
Neg logit contributions | |
agree | -0.194 |
agreement | -0.190 |
ies | -0.188 |
ie | -0.166 |
ashing | -0.161 |
Token | your |
Token ID | 534 |
Feature activation | -5.73e-01 |
Pos logit contributions | |
agreement | +0.355 |
agree | +0.346 |
ies | +0.346 |
happy | +0.337 |
ie | +0.327 |
Neg logit contributions | |
uch | -0.470 |
fault | -0.458 |
ses | -0.434 |
gh | -0.431 |
sed | -0.431 |
Token | barrel |
Token ID | 9036 |
Feature activation | -3.91e+00 |
Pos logit contributions | |
uch | +0.030 |
fault | +0.029 |
gh | +0.029 |
tresp | +0.028 |
paste | +0.027 |
Neg logit contributions | |
agree | -0.020 |
agreement | -0.020 |
ies | -0.020 |
ie | -0.017 |
happy | -0.017 |
Token | and |
Token ID | 290 |
Feature activation | -7.22e-01 |
Pos logit contributions | |
uch | +0.181 |
fault | +0.180 |
gh | +0.176 |
tresp | +0.171 |
paste | +0.165 |
Neg logit contributions | |
agree | -0.160 |
ies | -0.155 |
agreement | -0.155 |
ie | -0.141 |
happy | -0.137 |
Token | smash |
Token ID | 24273 |
Feature activation | -2.60e+00 |
Pos logit contributions | |
fault | +0.036 |
uch | +0.035 |
gh | +0.034 |
tresp | +0.034 |
paste | +0.033 |
Neg logit contributions | |
agree | -0.027 |
agreement | -0.027 |
ies | -0.026 |
ie | -0.024 |
ashing | -0.023 |
Token | it |
Token ID | 340 |
Feature activation | -3.28e+00 |
Pos logit contributions | |
uch | +0.120 |
fault | +0.117 |
tresp | +0.111 |
sed | +0.111 |
gh | +0.110 |
Neg logit contributions | |
ies | -0.114 |
ie | -0.107 |
agree | -0.105 |
agreement | -0.104 |
happy | -0.100 |
Token | grand |
Token ID | 4490 |
Feature activation | -1.85e+00 |
Pos logit contributions | |
tresp | +0.027 |
gh | +0.027 |
fault | +0.026 |
uch | +0.026 |
nothing | +0.026 |
Neg logit contributions | |
agreement | -0.022 |
ies | -0.021 |
agree | -0.021 |
ashing | -0.021 |
acking | -0.020 |
Token | pa |
Token ID | 8957 |
Feature activation | -1.66e-01 |
Pos logit contributions | |
fault | +0.100 |
gh | +0.097 |
uch | +0.096 |
tresp | +0.094 |
gency | +0.089 |
Neg logit contributions | |
agree | -0.059 |
agreement | -0.057 |
ies | -0.057 |
ashing | -0.053 |
reci | -0.050 |
Token | on |
Token ID | 319 |
Feature activation | -1.99e-01 |
Pos logit contributions | |
fault | +0.006 |
gh | +0.006 |
tresp | +0.006 |
uch | +0.006 |
tricked | +0.006 |
Neg logit contributions | |
agreement | -0.008 |
ies | -0.007 |
agree | -0.007 |
reci | -0.007 |
ashing | -0.007 |
Token | the |
Token ID | 262 |
Feature activation | +1.54e+00 |
Pos logit contributions | |
uch | +0.009 |
fault | +0.008 |
paste | +0.008 |
gh | +0.008 |
sed | +0.008 |
Neg logit contributions | |
agreement | -0.009 |
agree | -0.009 |
happy | -0.009 |
family | -0.008 |
ies | -0.008 |
Token | farm |
Token ID | 5318 |
Feature activation | +3.78e+00 |
Pos logit contributions | |
agree | +0.046 |
agreement | +0.046 |
ies | +0.044 |
ie | +0.039 |
happy | +0.039 |
Neg logit contributions | |
uch | -0.087 |
fault | -0.087 |
gh | -0.084 |
tresp | -0.082 |
paste | -0.082 |
Token | . |
Token ID | 13 |
Feature activation | +4.21e-01 |
Pos logit contributions | |
agreement | +0.176 |
reci | +0.168 |
ies | +0.167 |
agree | +0.165 |
ashing | +0.156 |
Neg logit contributions | |
fault | -0.143 |
gh | -0.139 |
tresp | -0.135 |
uch | -0.134 |
an | -0.125 |
Token | They |
Token ID | 1119 |
Feature activation | +4.14e+00 |
Pos logit contributions | |
ies | +0.019 |
ie | +0.019 |
forth | +0.018 |
agree | +0.017 |
agreement | +0.016 |
Neg logit contributions | |
fault | -0.017 |
an | -0.017 |
nothing | -0.016 |
Ow | -0.015 |
How | -0.015 |
Token | packed |
Token ID | 11856 |
Feature activation | +4.84e+00 |
Pos logit contributions | |
agreement | +0.182 |
ies | +0.175 |
agree | +0.166 |
ashing | +0.165 |
reci | +0.164 |
Neg logit contributions | |
tresp | -0.169 |
tricked | -0.165 |
uch | -0.164 |
gh | -0.152 |
gency | -0.151 |
Token | their |
Token ID | 511 |
Feature activation | +2.27e+00 |
Pos logit contributions | |
ies | +0.209 |
agreement | +0.204 |
agree | +0.203 |
happy | +0.194 |
ie | +0.187 |
Neg logit contributions | |
uch | -0.229 |
fault | -0.221 |
tresp | -0.207 |
gh | -0.207 |
gency | -0.201 |
Token | bags |
Token ID | 11668 |
Feature activation | -1.68e+00 |
Pos logit contributions | |
ies | +0.091 |
agreement | +0.090 |
agree | +0.088 |
happy | +0.082 |
family | +0.082 |
Neg logit contributions | |
uch | -0.109 |
fault | -0.107 |
gh | -0.105 |
paste | -0.098 |
tresp | -0.098 |
Token | and |
Token ID | 290 |
Feature activation | -5.71e-01 |
Pos logit contributions | |
uch | +0.069 |
fault | +0.068 |
gh | +0.066 |
an | +0.063 |
paste | +0.062 |
Neg logit contributions | |
agreement | -0.073 |
agree | -0.072 |
reci | -0.068 |
ies | -0.066 |
ashing | -0.063 |
Token | they |
Token ID | 484 |
Feature activation | +5.31e+00 |
Pos logit contributions | |
fault | +0.036 |
uch | +0.036 |
tresp | +0.035 |
gh | +0.035 |
paste | +0.033 |
Neg logit contributions | |
agree | -0.024 |
agreement | -0.023 |
ies | -0.023 |
ashing | -0.020 |
reci | -0.020 |
Token | played |
Token ID | 2826 |
Feature activation | +4.16e+00 |
Pos logit contributions | |
agreement | +0.191 |
agree | +0.184 |
ies | +0.181 |
ogging | +0.164 |
ashing | +0.162 |
Neg logit contributions | |
uch | -0.260 |
tresp | -0.255 |
fault | -0.252 |
gh | -0.248 |
tricked | -0.245 |
Token | together |
Token ID | 1978 |
Feature activation | +5.20e+00 |
Pos logit contributions | |
agree | +0.134 |
agreement | +0.131 |
ies | +0.131 |
ie | +0.116 |
reci | +0.111 |
Neg logit contributions | |
fault | -0.225 |
uch | -0.224 |
gh | -0.219 |
tresp | -0.214 |
paste | -0.207 |
Token | every |
Token ID | 790 |
Feature activation | +5.95e+00 |
Pos logit contributions | |
agree | +0.159 |
agreement | +0.155 |
ies | +0.153 |
ie | +0.132 |
reci | +0.130 |
Neg logit contributions | |
fault | -0.292 |
uch | -0.291 |
gh | -0.284 |
tresp | -0.277 |
paste | -0.267 |
Token | day |
Token ID | 1110 |
Feature activation | +4.75e+00 |
Pos logit contributions | |
agree | +0.125 |
ies | +0.120 |
agreement | +0.109 |
reci | +0.093 |
ashing | +0.086 |
Neg logit contributions | |
uch | -0.392 |
fault | -0.384 |
gh | -0.381 |
tresp | -0.373 |
paste | -0.356 |
Token | . |
Token ID | 13 |
Feature activation | +3.13e-01 |
Pos logit contributions | |
agreement | +0.197 |
agree | +0.194 |
ies | +0.186 |
reci | +0.171 |
ie | +0.170 |
Neg logit contributions | |
fault | -0.216 |
uch | -0.214 |
gh | -0.211 |
tresp | -0.203 |
paste | -0.195 |
Token | They |
Token ID | 1119 |
Feature activation | +4.02e+00 |
Pos logit contributions | |
ies | +0.015 |
agreement | +0.013 |
forth | +0.013 |
agree | +0.013 |
ie | +0.013 |
Neg logit contributions | |
fault | -0.014 |
ses | -0.013 |
sed | -0.012 |
Ow | -0.012 |
uch | -0.012 |
Token | liked |
Token ID | 8288 |
Feature activation | +2.21e+00 |
Pos logit contributions | |
agreement | +0.153 |
agree | +0.149 |
ies | +0.146 |
reci | +0.133 |
ashing | +0.131 |
Neg logit contributions | |
uch | -0.189 |
fault | -0.186 |
tresp | -0.185 |
gh | -0.184 |
tricked | -0.176 |
Token | to |
Token ID | 284 |
Feature activation | -5.47e+00 |
Pos logit contributions | |
agree | +0.095 |
happy | +0.093 |
ies | +0.092 |
play | +0.090 |
agreement | +0.088 |
Neg logit contributions | |
uch | -0.105 |
fault | -0.094 |
paste | -0.093 |
gh | -0.092 |
OP | -0.089 |
Token | run |
Token ID | 1057 |
Feature activation | +5.92e+00 |
Pos logit contributions | |
gh | +0.339 |
fault | +0.339 |
uch | +0.339 |
tresp | +0.334 |
tricked | +0.316 |
Neg logit contributions | |
ies | -0.125 |
agreement | -0.112 |
agree | -0.108 |
ashing | -0.107 |
reci | -0.103 |
Token | , |
Token ID | 11 |
Feature activation | +1.12e-01 |
Pos logit contributions | |
agree | +0.162 |
ies | +0.152 |
agreement | +0.147 |
happy | +0.139 |
ashing | +0.131 |
Neg logit contributions | |
fault | -0.366 |
uch | -0.365 |
gh | -0.346 |
paste | -0.338 |
tresp | -0.337 |
Token | house |
Token ID | 2156 |
Feature activation | +8.44e+00 |
Pos logit contributions | |
agree | +0.048 |
agreement | +0.046 |
ies | +0.045 |
ie | +0.040 |
reci | +0.039 |
Neg logit contributions | |
fault | -0.082 |
uch | -0.079 |
tresp | -0.078 |
gh | -0.076 |
paste | -0.073 |
Token | . |
Token ID | 13 |
Feature activation | +3.50e-01 |
Pos logit contributions | |
agree | +0.344 |
agreement | +0.341 |
ies | +0.339 |
reci | +0.313 |
ie | +0.312 |
Neg logit contributions | |
fault | -0.380 |
uch | -0.363 |
gh | -0.358 |
tresp | -0.356 |
paste | -0.340 |
Token | They |
Token ID | 1119 |
Feature activation | +4.07e+00 |
Pos logit contributions | |
agree | +0.008 |
agreement | +0.008 |
ies | +0.008 |
ie | +0.007 |
ashing | +0.006 |
Neg logit contributions | |
fault | -0.022 |
uch | -0.022 |
gh | -0.021 |
tresp | -0.021 |
paste | -0.021 |
Token | saw |
Token ID | 2497 |
Feature activation | +5.12e+00 |
Pos logit contributions | |
agreement | +0.155 |
agree | +0.155 |
ies | +0.152 |
ashing | +0.147 |
reci | +0.145 |
Neg logit contributions | |
tresp | -0.190 |
tricked | -0.185 |
uch | -0.178 |
gency | -0.173 |
fault | -0.168 |
Token | Lily |
Token ID | 20037 |
Feature activation | +2.17e+00 |
Pos logit contributions | |
agree | +0.188 |
agreement | +0.184 |
ies | +0.180 |
ie | +0.162 |
ashing | +0.157 |
Neg logit contributions | |
fault | -0.260 |
uch | -0.257 |
gh | -0.248 |
tresp | -0.245 |
paste | -0.237 |
Token | and |
Token ID | 290 |
Feature activation | -6.42e-01 |
Pos logit contributions | |
agree | +0.079 |
agreement | +0.078 |
ies | +0.075 |
ie | +0.067 |
reci | +0.066 |
Neg logit contributions | |
fault | -0.110 |
uch | -0.110 |
gh | -0.106 |
tresp | -0.105 |
paste | -0.101 |
Token | Ben |
Token ID | 3932 |
Feature activation | -3.12e+00 |
Pos logit contributions | |
fault | +0.028 |
uch | +0.027 |
tresp | +0.026 |
gh | +0.026 |
paste | +0.025 |
Neg logit contributions | |
agree | -0.028 |
agreement | -0.027 |
ies | -0.027 |
ie | -0.025 |
reci | -0.025 |
Token | . |
Token ID | 13 |
Feature activation | +3.51e-01 |
Pos logit contributions | |
fault | +0.122 |
uch | +0.119 |
tresp | +0.119 |
gh | +0.114 |
ps | +0.110 |
Neg logit contributions | |
agreement | -0.142 |
agree | -0.138 |
ies | -0.137 |
particular | -0.134 |
ie | -0.133 |
Token | They |
Token ID | 1119 |
Feature activation | +4.07e+00 |
Pos logit contributions | |
agree | +0.008 |
agreement | +0.007 |
ies | +0.007 |
ie | +0.006 |
ashing | +0.006 |
Neg logit contributions | |
fault | -0.023 |
uch | -0.023 |
gh | -0.022 |
tresp | -0.022 |
paste | -0.022 |
Token | saw |
Token ID | 2497 |
Feature activation | +5.12e+00 |
Pos logit contributions | |
agreement | +0.162 |
agree | +0.161 |
ashing | +0.156 |
ies | +0.156 |
reci | +0.152 |
Neg logit contributions | |
tresp | -0.183 |
tricked | -0.173 |
uch | -0.167 |
ps | -0.163 |
fault | -0.162 |
Token | the |
Token ID | 262 |
Feature activation | +1.49e+00 |
Pos logit contributions | |
agree | +0.201 |
agreement | +0.197 |
ies | +0.191 |
ie | +0.174 |
ashing | +0.173 |
Neg logit contributions | |
fault | -0.248 |
uch | -0.236 |
tresp | -0.232 |
gh | -0.227 |
paste | -0.222 |
Token | bag |
Token ID | 6131 |
Feature activation | -2.37e+00 |
Pos logit contributions | |
agree | +0.066 |
agreement | +0.065 |
ies | +0.064 |
ie | +0.058 |
happy | +0.058 |
Neg logit contributions | |
uch | -0.083 |
fault | -0.082 |
gh | -0.080 |
tresp | -0.077 |
paste | -0.075 |
Token | of |
Token ID | 286 |
Feature activation | +1.01e+01 |
Pos logit contributions | |
fault | +0.113 |
uch | +0.111 |
gh | +0.108 |
tresp | +0.106 |
gency | +0.103 |
Neg logit contributions | |
agree | -0.093 |
ies | -0.092 |
agreement | -0.088 |
ie | -0.084 |
happy | -0.081 |
Token | apples |
Token ID | 22514 |
Feature activation | -2.16e+00 |
Pos logit contributions | |
agree | +0.444 |
agreement | +0.438 |
ies | +0.423 |
happy | +0.397 |
ie | +0.387 |
Neg logit contributions | |
uch | -0.448 |
fault | -0.437 |
gh | -0.416 |
tresp | -0.397 |
paste | -0.392 |
Token | . |
Token ID | 13 |
Feature activation | +4.56e-01 |
Pos logit contributions | |
fault | +0.116 |
uch | +0.114 |
gh | +0.112 |
tresp | +0.110 |
gency | +0.106 |
Neg logit contributions | |
agree | -0.073 |
ies | -0.071 |
agreement | -0.067 |
ie | -0.064 |
happy | -0.062 |
Token |
|
Token ID | 198 |
Feature activation | +8.48e-01 |
Pos logit contributions | |
ies | +0.017 |
agree | +0.016 |
agreement | +0.016 |
ie | +0.015 |
reci | +0.014 |
Neg logit contributions | |
fault | -0.023 |
uch | -0.022 |
gh | -0.022 |
OP | -0.021 |
paste | -0.021 |
Token |
|
Token ID | 198 |
Feature activation | +8.33e-01 |
Pos logit contributions | |
ies | +0.034 |
agreement | +0.030 |
forth | +0.030 |
agree | +0.030 |
ashing | +0.029 |
Neg logit contributions | |
gh | -0.040 |
fault | -0.039 |
uch | -0.038 |
OP | -0.038 |
tresp | -0.036 |
Token | She |
Token ID | 3347 |
Feature activation | +5.80e+00 |
Pos logit contributions | |
ies | +0.037 |
ie | +0.032 |
agree | +0.031 |
agreement | +0.031 |
forth | +0.030 |
Neg logit contributions | |
gh | -0.038 |
uch | -0.037 |
OP | -0.036 |
tresp | -0.034 |
ggie | -0.034 |
Token | asked |
Token ID | 1965 |
Feature activation | +7.05e+00 |
Pos logit contributions | |
agree | +0.215 |
agreement | +0.201 |
ies | +0.198 |
ie | +0.180 |
happy | +0.180 |
Neg logit contributions | |
fault | -0.307 |
uch | -0.302 |
gh | -0.289 |
tresp | -0.280 |
paste | -0.274 |
Token | the |
Token ID | 262 |
Feature activation | +1.44e+00 |
Pos logit contributions | |
agree | +0.198 |
agreement | +0.191 |
ies | +0.187 |
ie | +0.163 |
happy | +0.157 |
Neg logit contributions | |
uch | -0.424 |
fault | -0.422 |
gh | -0.408 |
tresp | -0.400 |
paste | -0.392 |
Token | seller |
Token ID | 18583 |
Feature activation | -2.00e+00 |
Pos logit contributions | |
agree | +0.047 |
agreement | +0.045 |
ies | +0.045 |
ie | +0.040 |
happy | +0.038 |
Neg logit contributions | |
fault | -0.079 |
uch | -0.079 |
gh | -0.077 |
tresp | -0.075 |
paste | -0.073 |
Token | if |
Token ID | 611 |
Feature activation | +2.96e+00 |
Pos logit contributions | |
fault | +0.092 |
uch | +0.086 |
gh | +0.086 |
tresp | +0.084 |
paste | +0.082 |
Neg logit contributions | |
agree | -0.079 |
agreement | -0.079 |
ies | -0.076 |
reci | -0.071 |
ashing | -0.071 |
Token | , |
Token ID | 11 |
Feature activation | +6.92e-02 |
Pos logit contributions | |
uch | +0.044 |
fault | +0.043 |
osaurus | +0.040 |
gency | +0.039 |
tresp | +0.039 |
Neg logit contributions | |
agree | -0.013 |
ies | -0.013 |
happy | -0.013 |
agreement | -0.012 |
ie | -0.011 |
Token | " |
Token ID | 366 |
Feature activation | +1.68e+00 |
Pos logit contributions | |
agree | +0.002 |
agreement | +0.002 |
ies | +0.002 |
ie | +0.002 |
happy | +0.002 |
Neg logit contributions | |
fault | -0.004 |
uch | -0.004 |
gh | -0.003 |
tresp | -0.003 |
paste | -0.003 |
Token | Don |
Token ID | 3987 |
Feature activation | -3.13e+00 |
Pos logit contributions | |
agree | +0.080 |
ies | +0.079 |
agreement | +0.075 |
ashing | +0.073 |
happy | +0.070 |
Neg logit contributions | |
uch | -0.065 |
gh | -0.060 |
gency | -0.058 |
OP | -0.058 |
paste | -0.057 |
Token | 't |
Token ID | 470 |
Feature activation | -1.10e+01 |
Pos logit contributions | |
fault | +0.109 |
ses | +0.090 |
sed | +0.087 |
uch | +0.085 |
tresp | +0.081 |
Neg logit contributions | |
agreement | -0.173 |
agree | -0.171 |
ie | -0.165 |
ies | -0.163 |
happy | -0.160 |
Token | worry |
Token ID | 5490 |
Feature activation | +2.47e+00 |
Pos logit contributions | |
uch | +0.529 |
fault | +0.507 |
gh | +0.507 |
tresp | +0.501 |
tricked | +0.473 |
Neg logit contributions | |
agreement | -0.409 |
agree | -0.408 |
ies | -0.395 |
ashing | -0.373 |
reci | -0.369 |
Token | Sally |
Token ID | 25737 |
Feature activation | +3.59e-01 |
Pos logit contributions | |
ies | +0.075 |
agree | +0.073 |
agreement | +0.071 |
ie | +0.067 |
happy | +0.062 |
Neg logit contributions | |
uch | -0.146 |
fault | -0.141 |
gh | -0.137 |
tresp | -0.133 |
gency | -0.132 |
Token | , |
Token ID | 11 |
Feature activation | +7.17e-02 |
Pos logit contributions | |
ies | +0.011 |
agree | +0.011 |
agreement | +0.010 |
ie | +0.010 |
happy | +0.009 |
Neg logit contributions | |
fault | -0.020 |
uch | -0.020 |
gh | -0.020 |
tresp | -0.019 |
paste | -0.019 |
Token | I |
Token ID | 314 |
Feature activation | -1.51e+00 |
Pos logit contributions | |
agree | +0.004 |
agreement | +0.004 |
ies | +0.003 |
ie | +0.003 |
happy | +0.003 |
Neg logit contributions | |
fault | -0.003 |
uch | -0.003 |
gh | -0.003 |
tresp | -0.002 |
paste | -0.002 |
Token | have |
Token ID | 423 |
Feature activation | +8.34e+00 |
Pos logit contributions | |
fault | +0.054 |
uch | +0.054 |
gh | +0.052 |
tresp | +0.049 |
paste | +0.048 |
Neg logit contributions | |
agree | -0.079 |
agreement | -0.076 |
ies | -0.076 |
ie | -0.070 |
happy | -0.070 |
Token | something |
Token ID | 1223 |
Feature activation | -7.42e+00 |
Pos logit contributions | |
happy | +0.369 |
family | +0.361 |
friends | +0.349 |
play | +0.346 |
ies | +0.345 |
Neg logit contributions | |
osaurus | -0.356 |
ses | -0.351 |
uch | -0.335 |
fault | -0.327 |
tr | -0.321 |
Token | which |
Token ID | 543 |
Feature activation | +3.24e+00 |
Pos logit contributions | |
uch | +0.400 |
fault | +0.380 |
gh | +0.374 |
osaurus | +0.371 |
gency | +0.370 |
Neg logit contributions | |
agree | -0.251 |
ies | -0.250 |
happy | -0.245 |
agreement | -0.227 |
ie | -0.224 |
Token | while |
Token ID | 981 |
Feature activation | +3.57e+00 |
Pos logit contributions | |
gh | +0.414 |
uch | +0.411 |
fault | +0.392 |
tresp | +0.383 |
OP | +0.366 |
Neg logit contributions | |
agreement | -0.312 |
ies | -0.311 |
agree | -0.292 |
reci | -0.289 |
ie | -0.265 |
Token | , |
Token ID | 11 |
Feature activation | +1.61e-01 |
Pos logit contributions | |
agree | +0.141 |
agreement | +0.139 |
ies | +0.134 |
reci | +0.129 |
ashing | +0.123 |
Neg logit contributions | |
fault | -0.164 |
gh | -0.164 |
tresp | -0.160 |
uch | -0.160 |
paste | -0.150 |
Token | it |
Token ID | 340 |
Feature activation | -3.17e+00 |
Pos logit contributions | |
agree | +0.004 |
agreement | +0.004 |
ies | +0.004 |
happy | +0.004 |
ie | +0.004 |
Neg logit contributions | |
fault | -0.010 |
uch | -0.010 |
gh | -0.009 |
tresp | -0.009 |
paste | -0.009 |
Token | was |
Token ID | 373 |
Feature activation | -3.67e+00 |
Pos logit contributions | |
fault | +0.176 |
tresp | +0.163 |
gency | +0.163 |
uch | +0.159 |
ses | +0.157 |
Neg logit contributions | |
ies | -0.111 |
agree | -0.108 |
ie | -0.106 |
agreement | -0.104 |
happy | -0.102 |
Token | time |
Token ID | 640 |
Feature activation | -2.61e+00 |
Pos logit contributions | |
uch | +0.181 |
fault | +0.179 |
gh | +0.172 |
tresp | +0.170 |
paste | +0.169 |
Neg logit contributions | |
agreement | -0.140 |
agree | -0.139 |
ies | -0.136 |
happy | -0.127 |
ie | -0.124 |
Token | to |
Token ID | 284 |
Feature activation | -5.38e+00 |
Pos logit contributions | |
uch | +0.106 |
fault | +0.103 |
gh | +0.100 |
tresp | +0.098 |
paste | +0.096 |
Neg logit contributions | |
agree | -0.122 |
ies | -0.119 |
agreement | -0.118 |
happy | -0.108 |
ie | -0.108 |
Token | go |
Token ID | 467 |
Feature activation | +6.03e+00 |
Pos logit contributions | |
uch | +0.252 |
gh | +0.244 |
fault | +0.239 |
tresp | +0.226 |
anything | +0.221 |
Neg logit contributions | |
ies | -0.203 |
ashing | -0.201 |
agree | -0.183 |
reci | -0.182 |
agreement | -0.175 |
Token | home |
Token ID | 1363 |
Feature activation | +3.37e+00 |
Pos logit contributions | |
agreement | +0.261 |
agree | +0.250 |
reci | +0.246 |
ies | +0.228 |
ashing | +0.210 |
Neg logit contributions | |
fault | -0.274 |
uch | -0.248 |
gh | -0.233 |
tresp | -0.231 |
anything | -0.227 |
Token | . |
Token ID | 13 |
Feature activation | +3.94e-01 |
Pos logit contributions | |
agreement | +0.139 |
agree | +0.135 |
ies | +0.131 |
reci | +0.128 |
ie | +0.123 |
Neg logit contributions | |
fault | -0.156 |
uch | -0.148 |
gh | -0.145 |
tresp | -0.141 |
paste | -0.135 |
Token | Tim |
Token ID | 5045 |
Feature activation | -5.20e-01 |
Pos logit contributions | |
agree | +0.015 |
ies | +0.015 |
agreement | +0.015 |
ie | +0.014 |
reci | +0.013 |
Neg logit contributions | |
fault | -0.019 |
uch | -0.019 |
gh | -0.018 |
tresp | -0.018 |
OP | -0.017 |
Token | my |
Token ID | 1820 |
Feature activation | +7.84e+00 |
Pos logit contributions | |
fault | +0.034 |
uch | +0.034 |
gh | +0.032 |
tresp | +0.031 |
osaurus | +0.031 |
Neg logit contributions | |
agree | -0.011 |
agreement | -0.011 |
ies | -0.009 |
reci | -0.009 |
ashing | -0.009 |
Token | band |
Token ID | 4097 |
Feature activation | -6.69e+00 |
Pos logit contributions | |
fault | +0.333 |
uch | +0.331 |
gh | +0.317 |
tresp | +0.308 |
paste | +0.296 |
Neg logit contributions | |
agree | -0.410 |
agreement | -0.402 |
ies | -0.398 |
ie | -0.367 |
happy | -0.359 |
Token | - |
Token ID | 12 |
Feature activation | -2.94e+00 |
Pos logit contributions | |
uch | +0.230 |
fault | +0.222 |
gh | +0.220 |
paste | +0.207 |
tresp | +0.207 |
Neg logit contributions | |
agree | -0.350 |
agreement | -0.349 |
ies | -0.340 |
reci | -0.318 |
ie | -0.316 |
Token | aid |
Token ID | 1698 |
Feature activation | -5.82e+00 |
Pos logit contributions | |
fault | +0.139 |
uch | +0.131 |
tresp | +0.129 |
gh | +0.129 |
ses | +0.123 |
Neg logit contributions | |
agree | -0.121 |
ies | -0.118 |
agreement | -0.117 |
ie | -0.109 |
happy | -0.106 |
Token | on |
Token ID | 319 |
Feature activation | -2.93e-01 |
Pos logit contributions | |
uch | +0.228 |
fault | +0.227 |
gh | +0.216 |
paste | +0.209 |
tresp | +0.209 |
Neg logit contributions | |
agree | -0.277 |
agreement | -0.276 |
ies | -0.264 |
ie | -0.244 |
ashing | -0.243 |
Token | his |
Token ID | 465 |
Feature activation | +3.16e-01 |
Pos logit contributions | |
uch | +0.020 |
fault | +0.019 |
tresp | +0.019 |
lor | +0.018 |
paste | +0.018 |
Neg logit contributions | |
agreement | -0.007 |
happy | -0.006 |
agree | -0.006 |
play | -0.006 |
ie | -0.006 |
Token | knee |
Token ID | 10329 |
Feature activation | -6.58e+00 |
Pos logit contributions | |
agree | +0.012 |
agreement | +0.012 |
ies | +0.012 |
happy | +0.011 |
smile | +0.010 |
Neg logit contributions | |
uch | -0.016 |
fault | -0.015 |
tresp | -0.015 |
gh | -0.015 |
paste | -0.014 |
Token | . |
Token ID | 13 |
Feature activation | +3.50e-01 |
Pos logit contributions | |
uch | +0.388 |
fault | +0.380 |
tresp | +0.377 |
gh | +0.372 |
gency | +0.357 |
Neg logit contributions | |
agree | -0.195 |
ies | -0.181 |
agreement | -0.175 |
happy | -0.161 |
ie | -0.157 |
Token | Tim |
Token ID | 5045 |
Feature activation | -5.52e-01 |
Pos logit contributions | |
agreement | +0.010 |
agree | +0.010 |
ies | +0.010 |
ie | +0.008 |
reci | +0.008 |
Neg logit contributions | |
fault | -0.021 |
uch | -0.020 |
gh | -0.020 |
tresp | -0.019 |
paste | -0.019 |
Token | felt |
Token ID | 2936 |
Feature activation | -1.87e+00 |
Pos logit contributions | |
fault | +0.028 |
uch | +0.027 |
gh | +0.027 |
tresp | +0.026 |
paste | +0.025 |
Neg logit contributions | |
agree | -0.021 |
ies | -0.020 |
agreement | -0.020 |
ie | -0.018 |
happy | -0.018 |
Token | better |
Token ID | 1365 |
Feature activation | -2.80e+00 |
Pos logit contributions | |
fault | +0.088 |
uch | +0.087 |
gh | +0.084 |
tresp | +0.082 |
gency | +0.080 |
Neg logit contributions | |
agree | -0.077 |
agreement | -0.076 |
ies | -0.072 |
happy | -0.071 |
ie | -0.066 |
Token | . |
Token ID | 13 |
Feature activation | +3.45e-01 |
Pos logit contributions | |
fault | +0.139 |
uch | +0.138 |
gh | +0.133 |
tresp | +0.130 |
paste | +0.127 |
Neg logit contributions | |
agreement | -0.103 |
agree | -0.103 |
ies | -0.100 |
ie | -0.092 |
reci | -0.090 |
Token | but |
Token ID | 475 |
Feature activation | +6.44e-01 |
Pos logit contributions | |
agree | +0.007 |
agreement | +0.006 |
ies | +0.006 |
happy | +0.006 |
ie | +0.005 |
Neg logit contributions | |
uch | -0.010 |
fault | -0.010 |
gh | -0.010 |
tresp | -0.010 |
paste | -0.009 |
Token | Jack |
Token ID | 3619 |
Feature activation | -8.82e-01 |
Pos logit contributions | |
agree | +0.023 |
agreement | +0.022 |
ies | +0.022 |
ie | +0.020 |
happy | +0.019 |
Neg logit contributions | |
fault | -0.034 |
uch | -0.034 |
gh | -0.032 |
tresp | -0.032 |
paste | -0.031 |
Token | 's |
Token ID | 338 |
Feature activation | +1.69e+00 |
Pos logit contributions | |
fault | +0.045 |
uch | +0.042 |
tresp | +0.041 |
gh | +0.041 |
ses | +0.040 |
Neg logit contributions | |
agree | -0.034 |
ies | -0.034 |
ie | -0.032 |
agreement | -0.031 |
happy | -0.030 |
Token | umbrella |
Token ID | 25510 |
Feature activation | -2.70e+00 |
Pos logit contributions | |
agree | +0.052 |
agreement | +0.051 |
ies | +0.050 |
ie | +0.044 |
happy | +0.042 |
Neg logit contributions | |
fault | -0.095 |
uch | -0.095 |
gh | -0.092 |
tresp | -0.090 |
paste | -0.088 |
Token | shielded |
Token ID | 45153 |
Feature activation | +3.53e-01 |
Pos logit contributions | |
fault | +0.137 |
tresp | +0.131 |
gency | +0.126 |
uch | +0.125 |
gh | +0.122 |
Neg logit contributions | |
ies | -0.101 |
agree | -0.094 |
ie | -0.094 |
happy | -0.092 |
play | -0.090 |
Token | him |
Token ID | 683 |
Feature activation | -2.64e+00 |
Pos logit contributions | |
agreement | +0.012 |
agree | +0.012 |
ies | +0.012 |
ie | +0.011 |
happy | +0.010 |
Neg logit contributions | |
uch | -0.020 |
fault | -0.020 |
ses | -0.019 |
sed | -0.019 |
tresp | -0.019 |
Token | from |
Token ID | 422 |
Feature activation | -2.66e+00 |
Pos logit contributions | |
fault | +0.126 |
uch | +0.125 |
gh | +0.121 |
tresp | +0.117 |
paste | +0.114 |
Neg logit contributions | |
agree | -0.104 |
agreement | -0.102 |
ies | -0.100 |
ie | -0.091 |
ashing | -0.089 |
Token | the |
Token ID | 262 |
Feature activation | +1.53e+00 |
Pos logit contributions | |
uch | +0.133 |
fault | +0.129 |
gh | +0.124 |
paste | +0.123 |
tresp | +0.121 |
Neg logit contributions | |
agreement | -0.103 |
agree | -0.103 |
ies | -0.099 |
ie | -0.093 |
happy | -0.093 |
Token | rain |
Token ID | 6290 |
Feature activation | -3.26e+00 |
Pos logit contributions | |
agreement | +0.052 |
agree | +0.052 |
ies | +0.051 |
happy | +0.047 |
ie | +0.045 |
Neg logit contributions | |
uch | -0.080 |
fault | -0.079 |
gh | -0.079 |
osaurus | -0.076 |
tresp | -0.075 |
Token | drops |
Token ID | 49253 |
Feature activation | -8.01e+00 |
Pos logit contributions | |
uch | +0.176 |
fault | +0.174 |
tresp | +0.169 |
gh | +0.168 |
gency | +0.166 |
Neg logit contributions | |
agree | -0.108 |
ies | -0.108 |
agreement | -0.102 |
ie | -0.095 |
happy | -0.094 |
Token | . |
Token ID | 13 |
Feature activation | +4.23e-01 |
Pos logit contributions | |
fault | +0.367 |
uch | +0.367 |
gh | +0.351 |
tresp | +0.345 |
paste | +0.333 |
Neg logit contributions | |
agree | -0.329 |
agreement | -0.321 |
ies | -0.318 |
ie | -0.289 |
happy | -0.285 |
Token | the |
Token ID | 262 |
Feature activation | +1.53e+00 |
Pos logit contributions | |
uch | +0.036 |
fault | +0.035 |
tresp | +0.034 |
gh | +0.034 |
gency | +0.033 |
Neg logit contributions | |
ies | -0.028 |
agree | -0.028 |
agreement | -0.027 |
ie | -0.026 |
happy | -0.026 |
Token | little |
Token ID | 1310 |
Feature activation | +7.34e+00 |
Pos logit contributions | |
agree | +0.067 |
agreement | +0.066 |
ies | +0.065 |
ie | +0.060 |
happy | +0.059 |
Neg logit contributions | |
fault | -0.067 |
uch | -0.067 |
gh | -0.065 |
tresp | -0.063 |
paste | -0.060 |
Token | girl |
Token ID | 2576 |
Feature activation | +7.56e+00 |
Pos logit contributions | |
agree | +0.113 |
ies | +0.111 |
agreement | +0.110 |
ie | +0.083 |
happy | +0.076 |
Neg logit contributions | |
uch | -0.528 |
fault | -0.524 |
gh | -0.518 |
tresp | -0.506 |
OP | -0.491 |
Token | some |
Token ID | 617 |
Feature activation | -1.52e+00 |
Pos logit contributions | |
agree | +0.329 |
ies | +0.326 |
agreement | +0.318 |
happy | +0.304 |
ie | +0.299 |
Neg logit contributions | |
fault | -0.333 |
uch | -0.325 |
gh | -0.312 |
tresp | -0.310 |
gency | -0.299 |
Token | tea |
Token ID | 8887 |
Feature activation | -6.84e+00 |
Pos logit contributions | |
fault | +0.067 |
uch | +0.066 |
gh | +0.064 |
tresp | +0.062 |
gency | +0.061 |
Neg logit contributions | |
ies | -0.067 |
agree | -0.066 |
agreement | -0.066 |
ie | -0.062 |
happy | -0.060 |
Token | too |
Token ID | 1165 |
Feature activation | -6.79e+00 |
Pos logit contributions | |
fault | +0.380 |
uch | +0.376 |
tresp | +0.366 |
gency | +0.356 |
gh | +0.356 |
Neg logit contributions | |
ies | -0.227 |
agree | -0.221 |
agreement | -0.208 |
ie | -0.197 |
happy | -0.190 |
Token | . |
Token ID | 13 |
Feature activation | +4.28e-01 |
Pos logit contributions | |
fault | +0.391 |
uch | +0.389 |
gh | +0.374 |
tresp | +0.372 |
gency | +0.368 |
Neg logit contributions | |
agree | -0.214 |
ies | -0.199 |
agreement | -0.187 |
happy | -0.186 |
ashing | -0.166 |
Token | It |
Token ID | 632 |
Feature activation | -4.39e+00 |
Pos logit contributions | |
ies | +0.014 |
agree | +0.013 |
agreement | +0.013 |
ie | +0.012 |
reci | +0.011 |
Neg logit contributions | |
fault | -0.024 |
uch | -0.023 |
gh | -0.022 |
paste | -0.022 |
tresp | -0.022 |
Token | was |
Token ID | 373 |
Feature activation | -3.63e+00 |
Pos logit contributions | |
fault | +0.213 |
uch | +0.201 |
gh | +0.197 |
gency | +0.197 |
tresp | +0.196 |
Neg logit contributions | |
ies | -0.181 |
agree | -0.178 |
ie | -0.169 |
happy | -0.169 |
agreement | -0.165 |
Token | a |
Token ID | 257 |
Feature activation | -8.42e+00 |
Pos logit contributions | |
uch | +0.175 |
fault | +0.163 |
gency | +0.162 |
lor | +0.161 |
OP | +0.155 |
Neg logit contributions | |
happy | -0.157 |
agreement | -0.150 |
ies | -0.148 |
agree | -0.142 |
ie | -0.137 |
Token | bit |
Token ID | 1643 |
Feature activation | -1.02e+01 |
Pos logit contributions | |
fault | +0.414 |
uch | +0.413 |
gh | +0.393 |
tresp | +0.386 |
paste | +0.375 |
Neg logit contributions | |
agree | -0.322 |
agreement | -0.317 |
ies | -0.310 |
happy | -0.286 |
ie | -0.278 |
Token | ever |
Token ID | 1683 |
Feature activation | -1.17e+01 |
Pos logit contributions | |
fault | +0.285 |
uch | +0.283 |
gh | +0.271 |
tresp | +0.265 |
paste | +0.255 |
Neg logit contributions | |
agree | -0.325 |
agreement | -0.324 |
ies | -0.317 |
ie | -0.290 |
ashing | -0.285 |
Token | find |
Token ID | 1064 |
Feature activation | +1.08e+01 |
Pos logit contributions | |
uch | +0.538 |
fault | +0.531 |
tresp | +0.504 |
gh | +0.504 |
tricked | +0.497 |
Neg logit contributions | |
agreement | -0.473 |
agree | -0.457 |
ies | -0.437 |
ashing | -0.409 |
reci | -0.393 |
Token | out |
Token ID | 503 |
Feature activation | -8.51e-01 |
Pos logit contributions | |
ie | +0.414 |
agreement | +0.398 |
agree | +0.396 |
ies | +0.386 |
happy | +0.383 |
Neg logit contributions | |
uch | -0.532 |
ses | -0.525 |
sed | -0.517 |
fault | -0.513 |
gh | -0.494 |
Token | ? |
Token ID | 30 |
Feature activation | -5.92e+00 |
Pos logit contributions | |
uch | +0.038 |
fault | +0.038 |
gh | +0.036 |
tresp | +0.035 |
paste | +0.034 |
Neg logit contributions | |
agree | -0.036 |
agreement | -0.036 |
ies | -0.035 |
ie | -0.032 |
happy | -0.032 |
Token | <|endoftext|> |
Token ID | 50256 |
Feature activation | +3.83e+00 |
Pos logit contributions | |
fault | +0.306 |
uch | +0.303 |
gh | +0.292 |
tresp | +0.286 |
paste | +0.283 |
Neg logit contributions | |
ies | -0.207 |
agree | -0.203 |
agreement | -0.200 |
ie | -0.183 |
ashing | -0.175 |
Token | Dave |
Token ID | 27984 |
Feature activation | -5.02e+00 |
Pos logit contributions | |
agree | +0.127 |
agreement | +0.123 |
ies | +0.121 |
ie | +0.109 |
happy | +0.106 |
Neg logit contributions | |
fault | -0.207 |
uch | -0.206 |
gh | -0.198 |
tresp | -0.195 |
paste | -0.191 |
Token | was |
Token ID | 373 |
Feature activation | -3.69e+00 |
Pos logit contributions | |
fault | +0.242 |
uch | +0.240 |
gh | +0.231 |
tresp | +0.227 |
paste | +0.218 |
Neg logit contributions | |
agreement | -0.191 |
agree | -0.191 |
ies | -0.180 |
ashing | -0.162 |
ie | -0.161 |
Token | an |
Token ID | 281 |
Feature activation | -1.37e+01 |
Pos logit contributions | |
fault | +0.158 |
uch | +0.157 |
gh | +0.151 |
tresp | +0.147 |
paste | +0.140 |
Neg logit contributions | |
agree | -0.165 |
ies | -0.160 |
agreement | -0.160 |
ie | -0.146 |
reci | -0.142 |
Token | eager |
Token ID | 11069 |
Feature activation | +7.99e+00 |
Pos logit contributions | |
fault | +0.673 |
tresp | +0.650 |
uch | +0.628 |
an | +0.605 |
gh | +0.595 |
Neg logit contributions | |
ies | -0.486 |
agree | -0.460 |
ashing | -0.407 |
agreement | -0.403 |
reci | -0.403 |
Token | dancer |
Token ID | 38619 |
Feature activation | -8.03e-02 |
Pos logit contributions | |
agree | +0.288 |
ies | +0.283 |
agreement | +0.281 |
reci | +0.260 |
ie | +0.252 |
Neg logit contributions | |
fault | -0.400 |
tresp | -0.370 |
uch | -0.370 |
gh | -0.365 |
ses | -0.352 |
Token | . |
Token ID | 13 |
Feature activation | +3.60e-01 |
Pos logit contributions | |
fault | +0.004 |
uch | +0.004 |
gh | +0.004 |
tresp | +0.004 |
paste | +0.004 |
Neg logit contributions | |
agree | -0.003 |
agreement | -0.003 |
ies | -0.003 |
ie | -0.003 |
happy | -0.002 |
Token | and |
Token ID | 290 |
Feature activation | -5.76e-01 |
Pos logit contributions | |
uch | +0.428 |
fault | +0.426 |
gh | +0.410 |
tresp | +0.406 |
paste | +0.389 |
Neg logit contributions | |
agree | -0.240 |
ies | -0.230 |
agreement | -0.229 |
ie | -0.201 |
happy | -0.197 |
Token | saw |
Token ID | 2497 |
Feature activation | +5.18e+00 |
Pos logit contributions | |
uch | +0.029 |
fault | +0.028 |
gh | +0.028 |
tresp | +0.027 |
tricked | +0.027 |
Neg logit contributions | |
agreement | -0.020 |
agree | -0.020 |
ies | -0.020 |
ashing | -0.018 |
reci | -0.018 |
Token | smoke |
Token ID | 7523 |
Feature activation | -1.08e+01 |
Pos logit contributions | |
ies | +0.183 |
agree | +0.183 |
happy | +0.175 |
ie | +0.174 |
agreement | +0.166 |
Neg logit contributions | |
uch | -0.287 |
gh | -0.282 |
osaurus | -0.265 |
fault | -0.264 |
gency | -0.261 |
Token | . |
Token ID | 13 |
Feature activation | +4.31e-01 |
Pos logit contributions | |
uch | +0.592 |
fault | +0.591 |
gh | +0.556 |
tresp | +0.547 |
gency | +0.542 |
Neg logit contributions | |
ies | -0.367 |
agree | -0.357 |
agreement | -0.342 |
ie | -0.322 |
happy | -0.308 |
Token | " |
Token ID | 366 |
Feature activation | +1.81e+00 |
Pos logit contributions | |
agree | +0.014 |
agreement | +0.013 |
ies | +0.013 |
happy | +0.012 |
ashing | +0.011 |
Neg logit contributions | |
uch | -0.024 |
fault | -0.024 |
gh | -0.023 |
tresp | -0.023 |
paste | -0.022 |
Token | Oh |
Token ID | 5812 |
Feature activation | -1.17e+01 |
Pos logit contributions | |
agree | +0.076 |
agreement | +0.072 |
ies | +0.068 |
happy | +0.066 |
reci | +0.064 |
Neg logit contributions | |
fault | -0.090 |
gh | -0.082 |
uch | -0.080 |
tresp | -0.079 |
sed | -0.078 |
Token | no |
Token ID | 645 |
Feature activation | -1.05e+01 |
Pos logit contributions | |
fault | +0.804 |
osaurus | +0.780 |
rain | +0.775 |
paste | +0.775 |
uch | +0.767 |
Neg logit contributions | |
happy | -0.240 |
ies | -0.230 |
family | -0.215 |
ie | -0.208 |
agree | -0.185 |
Token | , |
Token ID | 11 |
Feature activation | +1.98e-01 |
Pos logit contributions | |
uch | +0.502 |
fault | +0.502 |
gh | +0.482 |
tresp | +0.469 |
paste | +0.465 |
Neg logit contributions | |
agree | -0.429 |
ies | -0.427 |
agreement | -0.402 |
ie | -0.384 |
happy | -0.367 |
Token | the |
Token ID | 262 |
Feature activation | +1.52e+00 |
Pos logit contributions | |
agree | +0.008 |
agreement | +0.008 |
happy | +0.007 |
ies | +0.007 |
family | +0.007 |
Neg logit contributions | |
gh | -0.010 |
fault | -0.010 |
uch | -0.010 |
osaurus | -0.010 |
paste | -0.009 |
Token | cookies |
Token ID | 14746 |
Feature activation | -5.62e+00 |
Pos logit contributions | |
family | +0.060 |
agreement | +0.059 |
happy | +0.059 |
ies | +0.058 |
girl | +0.058 |
Neg logit contributions | |
gh | -0.071 |
uch | -0.069 |
gency | -0.068 |
osaurus | -0.067 |
OP | -0.066 |
Token | are |
Token ID | 389 |
Feature activation | -1.60e+00 |
Pos logit contributions | |
fault | +0.265 |
gh | +0.253 |
uch | +0.251 |
tresp | +0.242 |
gency | +0.240 |
Neg logit contributions | |
agree | -0.231 |
ies | -0.224 |
agreement | -0.215 |
happy | -0.201 |
ie | -0.201 |
Token | three |
Token ID | 1115 |
Feature activation | -2.95e+00 |
Pos logit contributions | |
uch | +0.034 |
fault | +0.033 |
gh | +0.030 |
OP | +0.029 |
ses | +0.029 |
Neg logit contributions | |
agree | -0.030 |
ies | -0.028 |
agreement | -0.028 |
happy | -0.027 |
play | -0.026 |
Token | and |
Token ID | 290 |
Feature activation | -7.10e-01 |
Pos logit contributions | |
fault | +0.146 |
uch | +0.145 |
gh | +0.140 |
tresp | +0.137 |
paste | +0.133 |
Neg logit contributions | |
agree | -0.111 |
agreement | -0.108 |
ies | -0.107 |
ie | -0.096 |
happy | -0.093 |
Token | eight |
Token ID | 3624 |
Feature activation | -6.29e+00 |
Pos logit contributions | |
uch | +0.031 |
fault | +0.031 |
gh | +0.030 |
tresp | +0.028 |
paste | +0.027 |
Neg logit contributions | |
agree | -0.032 |
agreement | -0.031 |
ies | -0.030 |
happy | -0.028 |
ie | -0.027 |
Token | , |
Token ID | 11 |
Feature activation | +8.29e-02 |
Pos logit contributions | |
uch | +0.338 |
fault | +0.335 |
gh | +0.320 |
tresp | +0.312 |
paste | +0.306 |
Neg logit contributions | |
agree | -0.219 |
ies | -0.210 |
agreement | -0.209 |
happy | -0.185 |
ie | -0.183 |
Token | or |
Token ID | 393 |
Feature activation | -8.66e+00 |
Pos logit contributions | |
agree | +0.004 |
agreement | +0.004 |
ies | +0.004 |
ie | +0.003 |
happy | +0.003 |
Neg logit contributions | |
fault | -0.003 |
uch | -0.003 |
gh | -0.003 |
tresp | -0.003 |
paste | -0.003 |
Token | zero |
Token ID | 6632 |
Feature activation | -1.19e+01 |
Pos logit contributions | |
uch | +0.336 |
fault | +0.334 |
gh | +0.320 |
tresp | +0.305 |
paste | +0.292 |
Neg logit contributions | |
agree | -0.426 |
agreement | -0.415 |
ies | -0.410 |
happy | -0.382 |
ie | -0.377 |
Token | and |
Token ID | 290 |
Feature activation | -7.01e-01 |
Pos logit contributions | |
uch | +0.568 |
fault | +0.567 |
gh | +0.545 |
tresp | +0.527 |
gency | +0.523 |
Neg logit contributions | |
ies | -0.481 |
agree | -0.477 |
agreement | -0.432 |
ie | -0.430 |
happy | -0.407 |
Token | zero |
Token ID | 6632 |
Feature activation | -1.19e+01 |
Pos logit contributions | |
uch | +0.028 |
fault | +0.028 |
gh | +0.027 |
tresp | +0.025 |
gency | +0.024 |
Neg logit contributions | |
agree | -0.034 |
agreement | -0.032 |
ies | -0.032 |
happy | -0.032 |
play | -0.030 |
Token | . |
Token ID | 13 |
Feature activation | +2.98e-01 |
Pos logit contributions | |
uch | +0.566 |
fault | +0.561 |
gh | +0.542 |
tresp | +0.525 |
gency | +0.523 |
Neg logit contributions | |
ies | -0.487 |
agree | -0.480 |
ie | -0.433 |
agreement | -0.430 |
happy | -0.410 |
Token | Do |
Token ID | 2141 |
Feature activation | -3.09e+00 |
Pos logit contributions | |
agree | +0.013 |
agreement | +0.013 |
ies | +0.012 |
ie | +0.011 |
happy | +0.011 |
Neg logit contributions | |
fault | -0.013 |
uch | -0.013 |
gh | -0.012 |
tresp | -0.012 |
paste | -0.012 |
Token | you |
Token ID | 345 |
Feature activation | -1.34e+00 |
Pos logit contributions | |
fault | +0.120 |
uch | +0.117 |
gh | +0.111 |
tresp | +0.109 |
paste | +0.106 |
Neg logit contributions | |
agree | -0.153 |
ies | -0.148 |
agreement | -0.147 |
ie | -0.137 |
happy | -0.135 |
Token | . |
Token ID | 13 |
Feature activation | +4.07e-01 |
Pos logit contributions | |
uch | +0.185 |
sed | +0.174 |
ses | +0.173 |
fault | +0.173 |
gency | +0.172 |
Neg logit contributions | |
ies | -0.072 |
ie | -0.065 |
agree | -0.062 |
agreement | -0.060 |
happy | -0.057 |
Token | She |
Token ID | 1375 |
Feature activation | +3.42e+00 |
Pos logit contributions | |
agree | +0.012 |
agreement | +0.012 |
ies | +0.012 |
ie | +0.010 |
reci | +0.010 |
Neg logit contributions | |
fault | -0.023 |
uch | -0.023 |
gh | -0.023 |
tresp | -0.022 |
paste | -0.022 |
Token | said |
Token ID | 531 |
Feature activation | -5.00e-01 |
Pos logit contributions | |
agree | +0.118 |
agreement | +0.114 |
ies | +0.113 |
ie | +0.101 |
happy | +0.098 |
Neg logit contributions | |
fault | -0.182 |
uch | -0.181 |
gh | -0.175 |
tresp | -0.171 |
paste | -0.166 |
Token | , |
Token ID | 11 |
Feature activation | +1.79e-01 |
Pos logit contributions | |
uch | +0.033 |
fault | +0.033 |
osaurus | +0.032 |
tresp | +0.032 |
ses | +0.031 |
Neg logit contributions | |
ies | -0.011 |
agree | -0.010 |
happy | -0.010 |
agreement | -0.009 |
ie | -0.009 |
Token | " |
Token ID | 366 |
Feature activation | +1.80e+00 |
Pos logit contributions | |
agree | +0.007 |
agreement | +0.006 |
ies | +0.006 |
ie | +0.006 |
happy | +0.005 |
Neg logit contributions | |
fault | -0.009 |
uch | -0.009 |
gh | -0.009 |
tresp | -0.009 |
paste | -0.008 |
Token | Oh |
Token ID | 5812 |
Feature activation | -1.18e+01 |
Pos logit contributions | |
agree | +0.070 |
agreement | +0.069 |
ies | +0.067 |
ie | +0.062 |
happy | +0.060 |
Neg logit contributions | |
fault | -0.088 |
uch | -0.087 |
gh | -0.084 |
tresp | -0.083 |
paste | -0.080 |
Token | no |
Token ID | 645 |
Feature activation | -1.06e+01 |
Pos logit contributions | |
fault | +0.371 |
ps | +0.354 |
osaurus | +0.353 |
uch | +0.348 |
paste | +0.343 |
Neg logit contributions | |
ies | -0.637 |
happy | -0.635 |
ie | -0.635 |
agreement | -0.624 |
agree | -0.617 |
Token | , |
Token ID | 11 |
Feature activation | +1.98e-01 |
Pos logit contributions | |
uch | +0.543 |
fault | +0.542 |
gh | +0.523 |
tresp | +0.511 |
paste | +0.503 |
Neg logit contributions | |
agree | -0.386 |
agreement | -0.373 |
ies | -0.372 |
ie | -0.333 |
happy | -0.324 |
Token | we |
Token ID | 356 |
Feature activation | +2.34e+00 |
Pos logit contributions | |
agreement | +0.008 |
agree | +0.008 |
ies | +0.008 |
happy | +0.008 |
ie | +0.007 |
Neg logit contributions | |
uch | -0.009 |
fault | -0.009 |
gh | -0.009 |
osaurus | -0.008 |
OP | -0.008 |
Token | 'll |
Token ID | 1183 |
Feature activation | -5.77e+00 |
Pos logit contributions | |
agree | +0.089 |
ies | +0.084 |
agreement | +0.084 |
ie | +0.076 |
happy | +0.076 |
Neg logit contributions | |
fault | -0.117 |
gh | -0.116 |
uch | -0.115 |
tresp | -0.109 |
paste | -0.109 |
Token | have |
Token ID | 423 |
Feature activation | +8.45e+00 |
Pos logit contributions | |
fault | +0.327 |
uch | +0.327 |
gh | +0.317 |
tresp | +0.309 |
paste | +0.306 |
Neg logit contributions | |
agree | -0.181 |
agreement | -0.172 |
ies | -0.165 |
happy | -0.147 |
ie | -0.146 |
Token | Mom |
Token ID | 29252 |
Feature activation | +9.80e-01 |
Pos logit contributions | |
agree | +0.014 |
agreement | +0.014 |
ies | +0.014 |
ie | +0.013 |
ashing | +0.013 |
Neg logit contributions | |
uch | -0.011 |
fault | -0.011 |
gh | -0.010 |
tresp | -0.010 |
paste | -0.010 |
Token | said |
Token ID | 531 |
Feature activation | -9.61e-02 |
Pos logit contributions | |
agree | +0.028 |
agreement | +0.028 |
ies | +0.027 |
ie | +0.023 |
ashing | +0.023 |
Neg logit contributions | |
fault | -0.057 |
uch | -0.056 |
gh | -0.055 |
tresp | -0.054 |
paste | -0.052 |
Token | , |
Token ID | 11 |
Feature activation | +6.43e-01 |
Pos logit contributions | |
uch | +0.006 |
fault | +0.005 |
tresp | +0.005 |
gh | +0.005 |
osaurus | +0.005 |
Neg logit contributions | |
agree | -0.003 |
ies | -0.003 |
happy | -0.003 |
agreement | -0.003 |
ie | -0.003 |
Token | " |
Token ID | 366 |
Feature activation | +2.14e+00 |
Pos logit contributions | |
agree | +0.022 |
agreement | +0.021 |
ies | +0.021 |
happy | +0.019 |
ie | +0.018 |
Neg logit contributions | |
uch | -0.035 |
fault | -0.034 |
gh | -0.033 |
tresp | -0.032 |
paste | -0.031 |
Token | O |
Token ID | 46 |
Feature activation | -1.53e+01 |
Pos logit contributions | |
agree | +0.090 |
agreement | +0.088 |
ies | +0.087 |
ie | +0.079 |
happy | +0.077 |
Neg logit contributions | |
fault | -0.098 |
uch | -0.097 |
gh | -0.094 |
tresp | -0.091 |
paste | -0.088 |
Token | w |
Token ID | 86 |
Feature activation | -1.40e+01 |
Pos logit contributions | |
fault | +0.260 |
uch | +0.214 |
tresp | +0.213 |
gh | +0.209 |
paste | +0.184 |
Neg logit contributions | |
agree | -1.096 |
ies | -1.080 |
agreement | -1.051 |
ie | -1.027 |
ashing | -1.002 |
Token | ! |
Token ID | 0 |
Feature activation | -4.09e+00 |
Pos logit contributions | |
fault | +0.869 |
tresp | +0.821 |
OP | +0.801 |
uch | +0.795 |
lor | +0.791 |
Neg logit contributions | |
ies | -0.423 |
ie | -0.373 |
agree | -0.346 |
happy | -0.304 |
agreement | -0.303 |
Token | That |
Token ID | 1320 |
Feature activation | -2.73e+00 |
Pos logit contributions | |
gh | +0.197 |
fault | +0.191 |
tresp | +0.187 |
uch | +0.183 |
tricked | +0.179 |
Neg logit contributions | |
agree | -0.140 |
ies | -0.138 |
agreement | -0.136 |
family | -0.135 |
happy | -0.131 |
Token | hurt |
Token ID | 5938 |
Feature activation | -8.46e+00 |
Pos logit contributions | |
gency | +0.154 |
lor | +0.147 |
pel | +0.143 |
ps | +0.143 |
sed | +0.140 |
Neg logit contributions | |
girl | -0.088 |
family | -0.083 |
ies | -0.082 |
ie | -0.080 |
play | -0.079 |
Token | . |
Token ID | 13 |
Feature activation | +4.85e-01 |
Pos logit contributions | |
fault | +0.395 |
uch | +0.368 |
sed | +0.367 |
ses | +0.366 |
gh | +0.366 |
Neg logit contributions | |
ies | -0.378 |
ie | -0.374 |
agree | -0.334 |
agreement | -0.323 |
happy | -0.321 |
Token | Why |
Token ID | 4162 |
Feature activation | -9.37e+00 |
Pos logit contributions | |
agreement | +0.018 |
agree | +0.018 |
ies | +0.017 |
family | +0.016 |
reci | +0.016 |
Neg logit contributions | |
gh | -0.023 |
fault | -0.023 |
uch | -0.022 |
tresp | -0.021 |
tricked | -0.020 |
Token | head |
Token ID | 1182 |
Feature activation | +2.34e+00 |
Pos logit contributions | |
uch | +0.136 |
fault | +0.133 |
gh | +0.129 |
tresp | +0.128 |
paste | +0.128 |
Neg logit contributions | |
agreement | -0.068 |
ies | -0.065 |
happy | -0.060 |
agree | -0.058 |
family | -0.057 |
Token | slowly |
Token ID | 6364 |
Feature activation | +2.38e+00 |
Pos logit contributions | |
ies | +0.063 |
agree | +0.062 |
happy | +0.060 |
agreement | +0.059 |
ie | +0.054 |
Neg logit contributions | |
fault | -0.138 |
uch | -0.138 |
tresp | -0.134 |
gency | -0.128 |
ses | -0.128 |
Token | . |
Token ID | 13 |
Feature activation | +3.97e-01 |
Pos logit contributions | |
agree | +0.087 |
agreement | +0.085 |
ies | +0.084 |
ie | +0.075 |
happy | +0.073 |
Neg logit contributions | |
fault | -0.121 |
uch | -0.120 |
gh | -0.116 |
tresp | -0.114 |
paste | -0.110 |
Token | � |
Token ID | 6184 |
Feature activation | -5.20e+00 |
Pos logit contributions | |
agree | +0.013 |
agreement | +0.012 |
happy | +0.011 |
play | +0.010 |
ies | +0.010 |
Neg logit contributions | |
uch | -0.023 |
fault | -0.022 |
gh | -0.022 |
tresp | -0.021 |
paste | -0.020 |
Token | � |
Token ID | 95 |
Feature activation | -8.02e+00 |
Pos logit contributions | |
fault | +0.209 |
uch | +0.177 |
tresp | +0.175 |
gh | +0.175 |
paste | +0.173 |
Neg logit contributions | |
agree | -0.263 |
agreement | -0.244 |
ashing | -0.230 |
ies | -0.227 |
play | -0.227 |
Token | € |
Token ID | 26391 |
Feature activation | -1.29e+01 |
Pos logit contributions | |
fault | +0.361 |
paste | +0.318 |
uch | +0.305 |
tresp | +0.303 |
happens | +0.299 |
Neg logit contributions | |
agree | -0.354 |
agreement | -0.346 |
play | -0.311 |
acking | -0.309 |
embrace | -0.298 |
Token | � |
Token ID | 129 |
Feature activation | -7.79e+00 |
Pos logit contributions | |
fault | +0.540 |
uch | +0.504 |
gh | +0.496 |
tresp | +0.492 |
paste | +0.481 |
Neg logit contributions | |
agree | -0.591 |
ies | -0.565 |
agreement | -0.547 |
happy | -0.524 |
ashing | -0.510 |
Token | � |
Token ID | 241 |
Feature activation | +1.01e+00 |
Pos logit contributions | |
fault | +0.356 |
uch | +0.354 |
paste | +0.345 |
tresp | +0.329 |
gh | +0.323 |
Neg logit contributions | |
agree | -0.326 |
agreement | -0.318 |
happy | -0.290 |
ies | -0.286 |
ashing | -0.280 |
Token | That |
Token ID | 2504 |
Feature activation | -4.00e-02 |
Pos logit contributions | |
agree | +0.039 |
agreement | +0.034 |
happy | +0.034 |
ashing | +0.032 |
ies | +0.032 |
Neg logit contributions | |
uch | -0.051 |
fault | -0.050 |
tresp | -0.049 |
gh | -0.048 |
paste | -0.047 |
Token | 's |
Token ID | 338 |
Feature activation | +1.69e+00 |
Pos logit contributions | |
ps | +0.002 |
gh | +0.002 |
ses | +0.002 |
OP | +0.002 |
fault | +0.002 |
Neg logit contributions | |
ie | -0.001 |
ies | -0.001 |
family | -0.001 |
girl | -0.001 |
happy | -0.001 |
Token | my |
Token ID | 616 |
Feature activation | -1.42e+00 |
Pos logit contributions | |
happy | +0.076 |
ies | +0.072 |
agree | +0.072 |
agreement | +0.071 |
family | +0.069 |
Neg logit contributions | |
gh | -0.068 |
uch | -0.068 |
fault | -0.065 |
OP | -0.065 |
gency | -0.064 |